1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22#include <linux/kvm_host.h>
23#include "irq.h"
24#include "mmu.h"
25#include "i8254.h"
26#include "tss.h"
27#include "kvm_cache_regs.h"
28#include "x86.h"
29#include "cpuid.h"
30
31#include <linux/clocksource.h>
32#include <linux/interrupt.h>
33#include <linux/kvm.h>
34#include <linux/fs.h>
35#include <linux/vmalloc.h>
36#include <linux/module.h>
37#include <linux/mman.h>
38#include <linux/highmem.h>
39#include <linux/iommu.h>
40#include <linux/intel-iommu.h>
41#include <linux/cpufreq.h>
42#include <linux/user-return-notifier.h>
43#include <linux/srcu.h>
44#include <linux/slab.h>
45#include <linux/perf_event.h>
46#include <linux/uaccess.h>
47#include <linux/hash.h>
48#include <linux/pci.h>
49#include <linux/timekeeper_internal.h>
50#include <linux/pvclock_gtod.h>
51#include <trace/events/kvm.h>
52
53#define CREATE_TRACE_POINTS
54#include "trace.h"
55
56#include <asm/debugreg.h>
57#include <asm/msr.h>
58#include <asm/desc.h>
59#include <asm/mtrr.h>
60#include <asm/mce.h>
61#include <asm/i387.h>
62#include <asm/fpu-internal.h>
63#include <asm/xcr.h>
64#include <asm/pvclock.h>
65#include <asm/div64.h>
66
67#define MAX_IO_MSRS 256
68#define KVM_MAX_MCE_BANKS 32
69#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
70
71#define emul_to_vcpu(ctxt) \
72 container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
73
74
75
76
77
78#ifdef CONFIG_X86_64
79static
80u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
81#else
82static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
83#endif
84
85#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
86#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
87
88static void update_cr8_intercept(struct kvm_vcpu *vcpu);
89static void process_nmi(struct kvm_vcpu *vcpu);
90
91struct kvm_x86_ops *kvm_x86_ops;
92EXPORT_SYMBOL_GPL(kvm_x86_ops);
93
94static bool ignore_msrs = 0;
95module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
96
97bool kvm_has_tsc_control;
98EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
99u32 kvm_max_guest_tsc_khz;
100EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
101
102
103static u32 tsc_tolerance_ppm = 250;
104module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
105
106#define KVM_NR_SHARED_MSRS 16
107
108struct kvm_shared_msrs_global {
109 int nr;
110 u32 msrs[KVM_NR_SHARED_MSRS];
111};
112
113struct kvm_shared_msrs {
114 struct user_return_notifier urn;
115 bool registered;
116 struct kvm_shared_msr_values {
117 u64 host;
118 u64 curr;
119 } values[KVM_NR_SHARED_MSRS];
120};
121
122static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
123static struct kvm_shared_msrs __percpu *shared_msrs;
124
125struct kvm_stats_debugfs_item debugfs_entries[] = {
126 { "pf_fixed", VCPU_STAT(pf_fixed) },
127 { "pf_guest", VCPU_STAT(pf_guest) },
128 { "tlb_flush", VCPU_STAT(tlb_flush) },
129 { "invlpg", VCPU_STAT(invlpg) },
130 { "exits", VCPU_STAT(exits) },
131 { "io_exits", VCPU_STAT(io_exits) },
132 { "mmio_exits", VCPU_STAT(mmio_exits) },
133 { "signal_exits", VCPU_STAT(signal_exits) },
134 { "irq_window", VCPU_STAT(irq_window_exits) },
135 { "nmi_window", VCPU_STAT(nmi_window_exits) },
136 { "halt_exits", VCPU_STAT(halt_exits) },
137 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
138 { "hypercalls", VCPU_STAT(hypercalls) },
139 { "request_irq", VCPU_STAT(request_irq_exits) },
140 { "irq_exits", VCPU_STAT(irq_exits) },
141 { "host_state_reload", VCPU_STAT(host_state_reload) },
142 { "efer_reload", VCPU_STAT(efer_reload) },
143 { "fpu_reload", VCPU_STAT(fpu_reload) },
144 { "insn_emulation", VCPU_STAT(insn_emulation) },
145 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
146 { "irq_injections", VCPU_STAT(irq_injections) },
147 { "nmi_injections", VCPU_STAT(nmi_injections) },
148 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
149 { "mmu_pte_write", VM_STAT(mmu_pte_write) },
150 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
151 { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) },
152 { "mmu_flooded", VM_STAT(mmu_flooded) },
153 { "mmu_recycled", VM_STAT(mmu_recycled) },
154 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
155 { "mmu_unsync", VM_STAT(mmu_unsync) },
156 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
157 { "largepages", VM_STAT(lpages) },
158 { NULL }
159};
160
161u64 __read_mostly host_xcr0;
162
163static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
164
165static int kvm_vcpu_reset(struct kvm_vcpu *vcpu);
166
167static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
168{
169 int i;
170 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++)
171 vcpu->arch.apf.gfns[i] = ~0;
172}
173
174static void kvm_on_user_return(struct user_return_notifier *urn)
175{
176 unsigned slot;
177 struct kvm_shared_msrs *locals
178 = container_of(urn, struct kvm_shared_msrs, urn);
179 struct kvm_shared_msr_values *values;
180
181 for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
182 values = &locals->values[slot];
183 if (values->host != values->curr) {
184 wrmsrl(shared_msrs_global.msrs[slot], values->host);
185 values->curr = values->host;
186 }
187 }
188 locals->registered = false;
189 user_return_notifier_unregister(urn);
190}
191
192static void shared_msr_update(unsigned slot, u32 msr)
193{
194 u64 value;
195 unsigned int cpu = smp_processor_id();
196 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
197
198
199
200 if (slot >= shared_msrs_global.nr) {
201 printk(KERN_ERR "kvm: invalid MSR slot!");
202 return;
203 }
204 rdmsrl_safe(msr, &value);
205 smsr->values[slot].host = value;
206 smsr->values[slot].curr = value;
207}
208
209void kvm_define_shared_msr(unsigned slot, u32 msr)
210{
211 if (slot >= shared_msrs_global.nr)
212 shared_msrs_global.nr = slot + 1;
213 shared_msrs_global.msrs[slot] = msr;
214
215 smp_wmb();
216}
217EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
218
219static void kvm_shared_msr_cpu_online(void)
220{
221 unsigned i;
222
223 for (i = 0; i < shared_msrs_global.nr; ++i)
224 shared_msr_update(i, shared_msrs_global.msrs[i]);
225}
226
227void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
228{
229 unsigned int cpu = smp_processor_id();
230 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
231
232 if (((value ^ smsr->values[slot].curr) & mask) == 0)
233 return;
234 smsr->values[slot].curr = value;
235 wrmsrl(shared_msrs_global.msrs[slot], value);
236 if (!smsr->registered) {
237 smsr->urn.on_user_return = kvm_on_user_return;
238 user_return_notifier_register(&smsr->urn);
239 smsr->registered = true;
240 }
241}
242EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
243
244static void drop_user_return_notifiers(void *ignore)
245{
246 unsigned int cpu = smp_processor_id();
247 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
248
249 if (smsr->registered)
250 kvm_on_user_return(&smsr->urn);
251}
252
253u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
254{
255 return vcpu->arch.apic_base;
256}
257EXPORT_SYMBOL_GPL(kvm_get_apic_base);
258
259void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
260{
261
262 kvm_lapic_set_base(vcpu, data);
263}
264EXPORT_SYMBOL_GPL(kvm_set_apic_base);
265
266#define EXCPT_BENIGN 0
267#define EXCPT_CONTRIBUTORY 1
268#define EXCPT_PF 2
269
270static int exception_class(int vector)
271{
272 switch (vector) {
273 case PF_VECTOR:
274 return EXCPT_PF;
275 case DE_VECTOR:
276 case TS_VECTOR:
277 case NP_VECTOR:
278 case SS_VECTOR:
279 case GP_VECTOR:
280 return EXCPT_CONTRIBUTORY;
281 default:
282 break;
283 }
284 return EXCPT_BENIGN;
285}
286
287static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
288 unsigned nr, bool has_error, u32 error_code,
289 bool reinject)
290{
291 u32 prev_nr;
292 int class1, class2;
293
294 kvm_make_request(KVM_REQ_EVENT, vcpu);
295
296 if (!vcpu->arch.exception.pending) {
297 queue:
298 vcpu->arch.exception.pending = true;
299 vcpu->arch.exception.has_error_code = has_error;
300 vcpu->arch.exception.nr = nr;
301 vcpu->arch.exception.error_code = error_code;
302 vcpu->arch.exception.reinject = reinject;
303 return;
304 }
305
306
307 prev_nr = vcpu->arch.exception.nr;
308 if (prev_nr == DF_VECTOR) {
309
310 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
311 return;
312 }
313 class1 = exception_class(prev_nr);
314 class2 = exception_class(nr);
315 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
316 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
317
318 vcpu->arch.exception.pending = true;
319 vcpu->arch.exception.has_error_code = true;
320 vcpu->arch.exception.nr = DF_VECTOR;
321 vcpu->arch.exception.error_code = 0;
322 } else
323
324
325
326 goto queue;
327}
328
329void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
330{
331 kvm_multiple_exception(vcpu, nr, false, 0, false);
332}
333EXPORT_SYMBOL_GPL(kvm_queue_exception);
334
335void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
336{
337 kvm_multiple_exception(vcpu, nr, false, 0, true);
338}
339EXPORT_SYMBOL_GPL(kvm_requeue_exception);
340
341void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
342{
343 if (err)
344 kvm_inject_gp(vcpu, 0);
345 else
346 kvm_x86_ops->skip_emulated_instruction(vcpu);
347}
348EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
349
350void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
351{
352 ++vcpu->stat.pf_guest;
353 vcpu->arch.cr2 = fault->address;
354 kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
355}
356EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
357
358void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
359{
360 if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
361 vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
362 else
363 vcpu->arch.mmu.inject_page_fault(vcpu, fault);
364}
365
366void kvm_inject_nmi(struct kvm_vcpu *vcpu)
367{
368 atomic_inc(&vcpu->arch.nmi_queued);
369 kvm_make_request(KVM_REQ_NMI, vcpu);
370}
371EXPORT_SYMBOL_GPL(kvm_inject_nmi);
372
373void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
374{
375 kvm_multiple_exception(vcpu, nr, true, error_code, false);
376}
377EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
378
379void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
380{
381 kvm_multiple_exception(vcpu, nr, true, error_code, true);
382}
383EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
384
385
386
387
388
389bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
390{
391 if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl)
392 return true;
393 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
394 return false;
395}
396EXPORT_SYMBOL_GPL(kvm_require_cpl);
397
398
399
400
401
402
403int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
404 gfn_t ngfn, void *data, int offset, int len,
405 u32 access)
406{
407 gfn_t real_gfn;
408 gpa_t ngpa;
409
410 ngpa = gfn_to_gpa(ngfn);
411 real_gfn = mmu->translate_gpa(vcpu, ngpa, access);
412 if (real_gfn == UNMAPPED_GVA)
413 return -EFAULT;
414
415 real_gfn = gpa_to_gfn(real_gfn);
416
417 return kvm_read_guest_page(vcpu->kvm, real_gfn, data, offset, len);
418}
419EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
420
421int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
422 void *data, int offset, int len, u32 access)
423{
424 return kvm_read_guest_page_mmu(vcpu, vcpu->arch.walk_mmu, gfn,
425 data, offset, len, access);
426}
427
428
429
430
431int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
432{
433 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
434 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
435 int i;
436 int ret;
437 u64 pdpte[ARRAY_SIZE(mmu->pdptrs)];
438
439 ret = kvm_read_guest_page_mmu(vcpu, mmu, pdpt_gfn, pdpte,
440 offset * sizeof(u64), sizeof(pdpte),
441 PFERR_USER_MASK|PFERR_WRITE_MASK);
442 if (ret < 0) {
443 ret = 0;
444 goto out;
445 }
446 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
447 if (is_present_gpte(pdpte[i]) &&
448 (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
449 ret = 0;
450 goto out;
451 }
452 }
453 ret = 1;
454
455 memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
456 __set_bit(VCPU_EXREG_PDPTR,
457 (unsigned long *)&vcpu->arch.regs_avail);
458 __set_bit(VCPU_EXREG_PDPTR,
459 (unsigned long *)&vcpu->arch.regs_dirty);
460out:
461
462 return ret;
463}
464EXPORT_SYMBOL_GPL(load_pdptrs);
465
466static bool pdptrs_changed(struct kvm_vcpu *vcpu)
467{
468 u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
469 bool changed = true;
470 int offset;
471 gfn_t gfn;
472 int r;
473
474 if (is_long_mode(vcpu) || !is_pae(vcpu))
475 return false;
476
477 if (!test_bit(VCPU_EXREG_PDPTR,
478 (unsigned long *)&vcpu->arch.regs_avail))
479 return true;
480
481 gfn = (kvm_read_cr3(vcpu) & ~31u) >> PAGE_SHIFT;
482 offset = (kvm_read_cr3(vcpu) & ~31u) & (PAGE_SIZE - 1);
483 r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
484 PFERR_USER_MASK | PFERR_WRITE_MASK);
485 if (r < 0)
486 goto out;
487 changed = memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
488out:
489
490 return changed;
491}
492
493int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
494{
495 unsigned long old_cr0 = kvm_read_cr0(vcpu);
496 unsigned long update_bits = X86_CR0_PG | X86_CR0_WP |
497 X86_CR0_CD | X86_CR0_NW;
498
499 cr0 |= X86_CR0_ET;
500
501#ifdef CONFIG_X86_64
502 if (cr0 & 0xffffffff00000000UL)
503 return 1;
504#endif
505
506 cr0 &= ~CR0_RESERVED_BITS;
507
508 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
509 return 1;
510
511 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
512 return 1;
513
514 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
515#ifdef CONFIG_X86_64
516 if ((vcpu->arch.efer & EFER_LME)) {
517 int cs_db, cs_l;
518
519 if (!is_pae(vcpu))
520 return 1;
521 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
522 if (cs_l)
523 return 1;
524 } else
525#endif
526 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
527 kvm_read_cr3(vcpu)))
528 return 1;
529 }
530
531 if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
532 return 1;
533
534 kvm_x86_ops->set_cr0(vcpu, cr0);
535
536 if ((cr0 ^ old_cr0) & X86_CR0_PG) {
537 kvm_clear_async_pf_completion_queue(vcpu);
538 kvm_async_pf_hash_reset(vcpu);
539 }
540
541 if ((cr0 ^ old_cr0) & update_bits)
542 kvm_mmu_reset_context(vcpu);
543 return 0;
544}
545EXPORT_SYMBOL_GPL(kvm_set_cr0);
546
547void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
548{
549 (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
550}
551EXPORT_SYMBOL_GPL(kvm_lmsw);
552
553int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
554{
555 u64 xcr0;
556
557
558 if (index != XCR_XFEATURE_ENABLED_MASK)
559 return 1;
560 xcr0 = xcr;
561 if (kvm_x86_ops->get_cpl(vcpu) != 0)
562 return 1;
563 if (!(xcr0 & XSTATE_FP))
564 return 1;
565 if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
566 return 1;
567 if (xcr0 & ~host_xcr0)
568 return 1;
569 vcpu->arch.xcr0 = xcr0;
570 vcpu->guest_xcr0_loaded = 0;
571 return 0;
572}
573
574int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
575{
576 if (__kvm_set_xcr(vcpu, index, xcr)) {
577 kvm_inject_gp(vcpu, 0);
578 return 1;
579 }
580 return 0;
581}
582EXPORT_SYMBOL_GPL(kvm_set_xcr);
583
584int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
585{
586 unsigned long old_cr4 = kvm_read_cr4(vcpu);
587 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE |
588 X86_CR4_PAE | X86_CR4_SMEP;
589 if (cr4 & CR4_RESERVED_BITS)
590 return 1;
591
592 if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE))
593 return 1;
594
595 if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP))
596 return 1;
597
598 if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_RDWRGSFS))
599 return 1;
600
601 if (is_long_mode(vcpu)) {
602 if (!(cr4 & X86_CR4_PAE))
603 return 1;
604 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
605 && ((cr4 ^ old_cr4) & pdptr_bits)
606 && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
607 kvm_read_cr3(vcpu)))
608 return 1;
609
610 if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
611 if (!guest_cpuid_has_pcid(vcpu))
612 return 1;
613
614
615 if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
616 return 1;
617 }
618
619 if (kvm_x86_ops->set_cr4(vcpu, cr4))
620 return 1;
621
622 if (((cr4 ^ old_cr4) & pdptr_bits) ||
623 (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
624 kvm_mmu_reset_context(vcpu);
625
626 if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
627 kvm_update_cpuid(vcpu);
628
629 return 0;
630}
631EXPORT_SYMBOL_GPL(kvm_set_cr4);
632
633int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
634{
635 if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
636 kvm_mmu_sync_roots(vcpu);
637 kvm_mmu_flush_tlb(vcpu);
638 return 0;
639 }
640
641 if (is_long_mode(vcpu)) {
642 if (kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)) {
643 if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS)
644 return 1;
645 } else
646 if (cr3 & CR3_L_MODE_RESERVED_BITS)
647 return 1;
648 } else {
649 if (is_pae(vcpu)) {
650 if (cr3 & CR3_PAE_RESERVED_BITS)
651 return 1;
652 if (is_paging(vcpu) &&
653 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
654 return 1;
655 }
656
657
658
659
660 }
661
662
663
664
665
666
667
668
669
670
671 if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
672 return 1;
673 vcpu->arch.cr3 = cr3;
674 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
675 vcpu->arch.mmu.new_cr3(vcpu);
676 return 0;
677}
678EXPORT_SYMBOL_GPL(kvm_set_cr3);
679
680int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
681{
682 if (cr8 & CR8_RESERVED_BITS)
683 return 1;
684 if (irqchip_in_kernel(vcpu->kvm))
685 kvm_lapic_set_tpr(vcpu, cr8);
686 else
687 vcpu->arch.cr8 = cr8;
688 return 0;
689}
690EXPORT_SYMBOL_GPL(kvm_set_cr8);
691
692unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
693{
694 if (irqchip_in_kernel(vcpu->kvm))
695 return kvm_lapic_get_cr8(vcpu);
696 else
697 return vcpu->arch.cr8;
698}
699EXPORT_SYMBOL_GPL(kvm_get_cr8);
700
701static void kvm_update_dr7(struct kvm_vcpu *vcpu)
702{
703 unsigned long dr7;
704
705 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
706 dr7 = vcpu->arch.guest_debug_dr7;
707 else
708 dr7 = vcpu->arch.dr7;
709 kvm_x86_ops->set_dr7(vcpu, dr7);
710 vcpu->arch.switch_db_regs = (dr7 & DR7_BP_EN_MASK);
711}
712
713static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
714{
715 switch (dr) {
716 case 0 ... 3:
717 vcpu->arch.db[dr] = val;
718 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
719 vcpu->arch.eff_db[dr] = val;
720 break;
721 case 4:
722 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
723 return 1;
724
725 case 6:
726 if (val & 0xffffffff00000000ULL)
727 return -1;
728 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
729 break;
730 case 5:
731 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
732 return 1;
733
734 default:
735 if (val & 0xffffffff00000000ULL)
736 return -1;
737 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
738 kvm_update_dr7(vcpu);
739 break;
740 }
741
742 return 0;
743}
744
745int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
746{
747 int res;
748
749 res = __kvm_set_dr(vcpu, dr, val);
750 if (res > 0)
751 kvm_queue_exception(vcpu, UD_VECTOR);
752 else if (res < 0)
753 kvm_inject_gp(vcpu, 0);
754
755 return res;
756}
757EXPORT_SYMBOL_GPL(kvm_set_dr);
758
759static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
760{
761 switch (dr) {
762 case 0 ... 3:
763 *val = vcpu->arch.db[dr];
764 break;
765 case 4:
766 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
767 return 1;
768
769 case 6:
770 *val = vcpu->arch.dr6;
771 break;
772 case 5:
773 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
774 return 1;
775
776 default:
777 *val = vcpu->arch.dr7;
778 break;
779 }
780
781 return 0;
782}
783
784int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
785{
786 if (_kvm_get_dr(vcpu, dr, val)) {
787 kvm_queue_exception(vcpu, UD_VECTOR);
788 return 1;
789 }
790 return 0;
791}
792EXPORT_SYMBOL_GPL(kvm_get_dr);
793
794bool kvm_rdpmc(struct kvm_vcpu *vcpu)
795{
796 u32 ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
797 u64 data;
798 int err;
799
800 err = kvm_pmu_read_pmc(vcpu, ecx, &data);
801 if (err)
802 return err;
803 kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)data);
804 kvm_register_write(vcpu, VCPU_REGS_RDX, data >> 32);
805 return err;
806}
807EXPORT_SYMBOL_GPL(kvm_rdpmc);
808
809
810
811
812
813
814
815
816
817
818#define KVM_SAVE_MSRS_BEGIN 10
819static u32 msrs_to_save[] = {
820 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
821 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
822 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
823 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
824 MSR_KVM_PV_EOI_EN,
825 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
826 MSR_STAR,
827#ifdef CONFIG_X86_64
828 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
829#endif
830 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
831};
832
833static unsigned num_msrs_to_save;
834
835static const u32 emulated_msrs[] = {
836 MSR_IA32_TSC_ADJUST,
837 MSR_IA32_TSCDEADLINE,
838 MSR_IA32_MISC_ENABLE,
839 MSR_IA32_MCG_STATUS,
840 MSR_IA32_MCG_CTL,
841};
842
843static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
844{
845 u64 old_efer = vcpu->arch.efer;
846
847 if (efer & efer_reserved_bits)
848 return 1;
849
850 if (is_paging(vcpu)
851 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
852 return 1;
853
854 if (efer & EFER_FFXSR) {
855 struct kvm_cpuid_entry2 *feat;
856
857 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
858 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT)))
859 return 1;
860 }
861
862 if (efer & EFER_SVME) {
863 struct kvm_cpuid_entry2 *feat;
864
865 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
866 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM)))
867 return 1;
868 }
869
870 efer &= ~EFER_LMA;
871 efer |= vcpu->arch.efer & EFER_LMA;
872
873 kvm_x86_ops->set_efer(vcpu, efer);
874
875 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
876
877
878 if ((efer ^ old_efer) & EFER_NX)
879 kvm_mmu_reset_context(vcpu);
880
881 return 0;
882}
883
884void kvm_enable_efer_bits(u64 mask)
885{
886 efer_reserved_bits &= ~mask;
887}
888EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
889
890
891
892
893
894
895
896int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
897{
898 return kvm_x86_ops->set_msr(vcpu, msr);
899}
900
901
902
903
904static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
905{
906 struct msr_data msr;
907
908 msr.data = *data;
909 msr.index = index;
910 msr.host_initiated = true;
911 return kvm_set_msr(vcpu, &msr);
912}
913
914#ifdef CONFIG_X86_64
915struct pvclock_gtod_data {
916 seqcount_t seq;
917
918 struct {
919 int vclock_mode;
920 cycle_t cycle_last;
921 cycle_t mask;
922 u32 mult;
923 u32 shift;
924 } clock;
925
926
927 u64 monotonic_time_snsec;
928 time_t monotonic_time_sec;
929};
930
931static struct pvclock_gtod_data pvclock_gtod_data;
932
933static void update_pvclock_gtod(struct timekeeper *tk)
934{
935 struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
936
937 write_seqcount_begin(&vdata->seq);
938
939
940 vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode;
941 vdata->clock.cycle_last = tk->clock->cycle_last;
942 vdata->clock.mask = tk->clock->mask;
943 vdata->clock.mult = tk->mult;
944 vdata->clock.shift = tk->shift;
945
946 vdata->monotonic_time_sec = tk->xtime_sec
947 + tk->wall_to_monotonic.tv_sec;
948 vdata->monotonic_time_snsec = tk->xtime_nsec
949 + (tk->wall_to_monotonic.tv_nsec
950 << tk->shift);
951 while (vdata->monotonic_time_snsec >=
952 (((u64)NSEC_PER_SEC) << tk->shift)) {
953 vdata->monotonic_time_snsec -=
954 ((u64)NSEC_PER_SEC) << tk->shift;
955 vdata->monotonic_time_sec++;
956 }
957
958 write_seqcount_end(&vdata->seq);
959}
960#endif
961
962
963static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
964{
965 int version;
966 int r;
967 struct pvclock_wall_clock wc;
968 struct timespec boot;
969
970 if (!wall_clock)
971 return;
972
973 r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
974 if (r)
975 return;
976
977 if (version & 1)
978 ++version;
979
980 ++version;
981
982 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
983
984
985
986
987
988
989
990 getboottime(&boot);
991
992 if (kvm->arch.kvmclock_offset) {
993 struct timespec ts = ns_to_timespec(kvm->arch.kvmclock_offset);
994 boot = timespec_sub(boot, ts);
995 }
996 wc.sec = boot.tv_sec;
997 wc.nsec = boot.tv_nsec;
998 wc.version = version;
999
1000 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
1001
1002 version++;
1003 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
1004}
1005
1006static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
1007{
1008 uint32_t quotient, remainder;
1009
1010
1011
1012 __asm__ ( "divl %4"
1013 : "=a" (quotient), "=d" (remainder)
1014 : "0" (0), "1" (dividend), "r" (divisor) );
1015 return quotient;
1016}
1017
1018static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz,
1019 s8 *pshift, u32 *pmultiplier)
1020{
1021 uint64_t scaled64;
1022 int32_t shift = 0;
1023 uint64_t tps64;
1024 uint32_t tps32;
1025
1026 tps64 = base_khz * 1000LL;
1027 scaled64 = scaled_khz * 1000LL;
1028 while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
1029 tps64 >>= 1;
1030 shift--;
1031 }
1032
1033 tps32 = (uint32_t)tps64;
1034 while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) {
1035 if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000)
1036 scaled64 >>= 1;
1037 else
1038 tps32 <<= 1;
1039 shift++;
1040 }
1041
1042 *pshift = shift;
1043 *pmultiplier = div_frac(scaled64, tps32);
1044
1045 pr_debug("%s: base_khz %u => %u, shift %d, mul %u\n",
1046 __func__, base_khz, scaled_khz, shift, *pmultiplier);
1047}
1048
1049static inline u64 get_kernel_ns(void)
1050{
1051 struct timespec ts;
1052
1053 WARN_ON(preemptible());
1054 ktime_get_ts(&ts);
1055 monotonic_to_bootbased(&ts);
1056 return timespec_to_ns(&ts);
1057}
1058
1059#ifdef CONFIG_X86_64
1060static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
1061#endif
1062
1063static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
1064unsigned long max_tsc_khz;
1065
1066static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
1067{
1068 return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
1069 vcpu->arch.virtual_tsc_shift);
1070}
1071
1072static u32 adjust_tsc_khz(u32 khz, s32 ppm)
1073{
1074 u64 v = (u64)khz * (1000000 + ppm);
1075 do_div(v, 1000000);
1076 return v;
1077}
1078
1079static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
1080{
1081 u32 thresh_lo, thresh_hi;
1082 int use_scaling = 0;
1083
1084
1085 kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
1086 &vcpu->arch.virtual_tsc_shift,
1087 &vcpu->arch.virtual_tsc_mult);
1088 vcpu->arch.virtual_tsc_khz = this_tsc_khz;
1089
1090
1091
1092
1093
1094
1095
1096 thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
1097 thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
1098 if (this_tsc_khz < thresh_lo || this_tsc_khz > thresh_hi) {
1099 pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi);
1100 use_scaling = 1;
1101 }
1102 kvm_x86_ops->set_tsc_khz(vcpu, this_tsc_khz, use_scaling);
1103}
1104
1105static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
1106{
1107 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec,
1108 vcpu->arch.virtual_tsc_mult,
1109 vcpu->arch.virtual_tsc_shift);
1110 tsc += vcpu->arch.this_tsc_write;
1111 return tsc;
1112}
1113
1114void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
1115{
1116#ifdef CONFIG_X86_64
1117 bool vcpus_matched;
1118 bool do_request = false;
1119 struct kvm_arch *ka = &vcpu->kvm->arch;
1120 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1121
1122 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
1123 atomic_read(&vcpu->kvm->online_vcpus));
1124
1125 if (vcpus_matched && gtod->clock.vclock_mode == VCLOCK_TSC)
1126 if (!ka->use_master_clock)
1127 do_request = 1;
1128
1129 if (!vcpus_matched && ka->use_master_clock)
1130 do_request = 1;
1131
1132 if (do_request)
1133 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
1134
1135 trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
1136 atomic_read(&vcpu->kvm->online_vcpus),
1137 ka->use_master_clock, gtod->clock.vclock_mode);
1138#endif
1139}
1140
1141static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
1142{
1143 u64 curr_offset = kvm_x86_ops->read_tsc_offset(vcpu);
1144 vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
1145}
1146
1147void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1148{
1149 struct kvm *kvm = vcpu->kvm;
1150 u64 offset, ns, elapsed;
1151 unsigned long flags;
1152 s64 usdiff;
1153 bool matched;
1154 u64 data = msr->data;
1155
1156 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
1157 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
1158 ns = get_kernel_ns();
1159 elapsed = ns - kvm->arch.last_tsc_nsec;
1160
1161
1162 usdiff = data - kvm->arch.last_tsc_write;
1163#ifdef CONFIG_X86_64
1164 usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz;
1165#else
1166
1167 asm("idivl %2; xor %%edx, %%edx"
1168 : "=A"(usdiff)
1169 : "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz));
1170#endif
1171 do_div(elapsed, 1000);
1172 usdiff -= elapsed;
1173 if (usdiff < 0)
1174 usdiff = -usdiff;
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186 if (usdiff < USEC_PER_SEC &&
1187 vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
1188 if (!check_tsc_unstable()) {
1189 offset = kvm->arch.cur_tsc_offset;
1190 pr_debug("kvm: matched tsc offset for %llu\n", data);
1191 } else {
1192 u64 delta = nsec_to_cycles(vcpu, elapsed);
1193 data += delta;
1194 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
1195 pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
1196 }
1197 matched = true;
1198 } else {
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208 kvm->arch.cur_tsc_generation++;
1209 kvm->arch.cur_tsc_nsec = ns;
1210 kvm->arch.cur_tsc_write = data;
1211 kvm->arch.cur_tsc_offset = offset;
1212 matched = false;
1213 pr_debug("kvm: new tsc generation %u, clock %llu\n",
1214 kvm->arch.cur_tsc_generation, data);
1215 }
1216
1217
1218
1219
1220
1221 kvm->arch.last_tsc_nsec = ns;
1222 kvm->arch.last_tsc_write = data;
1223 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
1224
1225
1226 vcpu->arch.hv_clock.tsc_timestamp = 0;
1227 vcpu->arch.last_guest_tsc = data;
1228
1229
1230 vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation;
1231 vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
1232 vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
1233
1234 if (guest_cpuid_has_tsc_adjust(vcpu) && !msr->host_initiated)
1235 update_ia32_tsc_adjust_msr(vcpu, offset);
1236 kvm_x86_ops->write_tsc_offset(vcpu, offset);
1237 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
1238
1239 spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
1240 if (matched)
1241 kvm->arch.nr_vcpus_matched_tsc++;
1242 else
1243 kvm->arch.nr_vcpus_matched_tsc = 0;
1244
1245 kvm_track_tsc_matching(vcpu);
1246 spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
1247}
1248
1249EXPORT_SYMBOL_GPL(kvm_write_tsc);
1250
1251#ifdef CONFIG_X86_64
1252
1253static cycle_t read_tsc(void)
1254{
1255 cycle_t ret;
1256 u64 last;
1257
1258
1259
1260
1261
1262
1263
1264
1265 rdtsc_barrier();
1266 ret = (cycle_t)vget_cycles();
1267
1268 last = pvclock_gtod_data.clock.cycle_last;
1269
1270 if (likely(ret >= last))
1271 return ret;
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281 asm volatile ("");
1282 return last;
1283}
1284
1285static inline u64 vgettsc(cycle_t *cycle_now)
1286{
1287 long v;
1288 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1289
1290 *cycle_now = read_tsc();
1291
1292 v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask;
1293 return v * gtod->clock.mult;
1294}
1295
1296static int do_monotonic(struct timespec *ts, cycle_t *cycle_now)
1297{
1298 unsigned long seq;
1299 u64 ns;
1300 int mode;
1301 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1302
1303 ts->tv_nsec = 0;
1304 do {
1305 seq = read_seqcount_begin(>od->seq);
1306 mode = gtod->clock.vclock_mode;
1307 ts->tv_sec = gtod->monotonic_time_sec;
1308 ns = gtod->monotonic_time_snsec;
1309 ns += vgettsc(cycle_now);
1310 ns >>= gtod->clock.shift;
1311 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
1312 timespec_add_ns(ts, ns);
1313
1314 return mode;
1315}
1316
1317
1318static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now)
1319{
1320 struct timespec ts;
1321
1322
1323 if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
1324 return false;
1325
1326 if (do_monotonic(&ts, cycle_now) != VCLOCK_TSC)
1327 return false;
1328
1329 monotonic_to_bootbased(&ts);
1330 *kernel_ns = timespec_to_ns(&ts);
1331
1332 return true;
1333}
1334#endif
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
1378{
1379#ifdef CONFIG_X86_64
1380 struct kvm_arch *ka = &kvm->arch;
1381 int vclock_mode;
1382 bool host_tsc_clocksource, vcpus_matched;
1383
1384 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
1385 atomic_read(&kvm->online_vcpus));
1386
1387
1388
1389
1390
1391 host_tsc_clocksource = kvm_get_time_and_clockread(
1392 &ka->master_kernel_ns,
1393 &ka->master_cycle_now);
1394
1395 ka->use_master_clock = host_tsc_clocksource & vcpus_matched;
1396
1397 if (ka->use_master_clock)
1398 atomic_set(&kvm_guest_has_master_clock, 1);
1399
1400 vclock_mode = pvclock_gtod_data.clock.vclock_mode;
1401 trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
1402 vcpus_matched);
1403#endif
1404}
1405
1406static int kvm_guest_time_update(struct kvm_vcpu *v)
1407{
1408 unsigned long flags, this_tsc_khz;
1409 struct kvm_vcpu_arch *vcpu = &v->arch;
1410 struct kvm_arch *ka = &v->kvm->arch;
1411 void *shared_kaddr;
1412 s64 kernel_ns, max_kernel_ns;
1413 u64 tsc_timestamp, host_tsc;
1414 struct pvclock_vcpu_time_info *guest_hv_clock;
1415 u8 pvclock_flags;
1416 bool use_master_clock;
1417
1418 kernel_ns = 0;
1419 host_tsc = 0;
1420
1421
1422 local_irq_save(flags);
1423 this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
1424 if (unlikely(this_tsc_khz == 0)) {
1425 local_irq_restore(flags);
1426 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
1427 return 1;
1428 }
1429
1430
1431
1432
1433
1434 spin_lock(&ka->pvclock_gtod_sync_lock);
1435 use_master_clock = ka->use_master_clock;
1436 if (use_master_clock) {
1437 host_tsc = ka->master_cycle_now;
1438 kernel_ns = ka->master_kernel_ns;
1439 }
1440 spin_unlock(&ka->pvclock_gtod_sync_lock);
1441 if (!use_master_clock) {
1442 host_tsc = native_read_tsc();
1443 kernel_ns = get_kernel_ns();
1444 }
1445
1446 tsc_timestamp = kvm_x86_ops->read_l1_tsc(v, host_tsc);
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458 if (vcpu->tsc_catchup) {
1459 u64 tsc = compute_guest_tsc(v, kernel_ns);
1460 if (tsc > tsc_timestamp) {
1461 adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
1462 tsc_timestamp = tsc;
1463 }
1464 }
1465
1466 local_irq_restore(flags);
1467
1468 if (!vcpu->time_page)
1469 return 0;
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492 max_kernel_ns = 0;
1493 if (vcpu->hv_clock.tsc_timestamp) {
1494 max_kernel_ns = vcpu->last_guest_tsc -
1495 vcpu->hv_clock.tsc_timestamp;
1496 max_kernel_ns = pvclock_scale_delta(max_kernel_ns,
1497 vcpu->hv_clock.tsc_to_system_mul,
1498 vcpu->hv_clock.tsc_shift);
1499 max_kernel_ns += vcpu->last_kernel_ns;
1500 }
1501
1502 if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) {
1503 kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz,
1504 &vcpu->hv_clock.tsc_shift,
1505 &vcpu->hv_clock.tsc_to_system_mul);
1506 vcpu->hw_tsc_khz = this_tsc_khz;
1507 }
1508
1509
1510
1511
1512
1513 if (!use_master_clock) {
1514 if (max_kernel_ns > kernel_ns)
1515 kernel_ns = max_kernel_ns;
1516 }
1517
1518 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
1519 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
1520 vcpu->last_kernel_ns = kernel_ns;
1521 vcpu->last_guest_tsc = tsc_timestamp;
1522
1523
1524
1525
1526
1527
1528 vcpu->hv_clock.version += 2;
1529
1530 shared_kaddr = kmap_atomic(vcpu->time_page);
1531
1532 guest_hv_clock = shared_kaddr + vcpu->time_offset;
1533
1534
1535 pvclock_flags = (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
1536
1537 if (vcpu->pvclock_set_guest_stopped_request) {
1538 pvclock_flags |= PVCLOCK_GUEST_STOPPED;
1539 vcpu->pvclock_set_guest_stopped_request = false;
1540 }
1541
1542
1543 if (use_master_clock)
1544 pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
1545
1546 vcpu->hv_clock.flags = pvclock_flags;
1547
1548 memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
1549 sizeof(vcpu->hv_clock));
1550
1551 kunmap_atomic(shared_kaddr);
1552
1553 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
1554 return 0;
1555}
1556
1557static bool msr_mtrr_valid(unsigned msr)
1558{
1559 switch (msr) {
1560 case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1:
1561 case MSR_MTRRfix64K_00000:
1562 case MSR_MTRRfix16K_80000:
1563 case MSR_MTRRfix16K_A0000:
1564 case MSR_MTRRfix4K_C0000:
1565 case MSR_MTRRfix4K_C8000:
1566 case MSR_MTRRfix4K_D0000:
1567 case MSR_MTRRfix4K_D8000:
1568 case MSR_MTRRfix4K_E0000:
1569 case MSR_MTRRfix4K_E8000:
1570 case MSR_MTRRfix4K_F0000:
1571 case MSR_MTRRfix4K_F8000:
1572 case MSR_MTRRdefType:
1573 case MSR_IA32_CR_PAT:
1574 return true;
1575 case 0x2f8:
1576 return true;
1577 }
1578 return false;
1579}
1580
1581static bool valid_pat_type(unsigned t)
1582{
1583 return t < 8 && (1 << t) & 0xf3;
1584}
1585
1586static bool valid_mtrr_type(unsigned t)
1587{
1588 return t < 8 && (1 << t) & 0x73;
1589}
1590
1591static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1592{
1593 int i;
1594
1595 if (!msr_mtrr_valid(msr))
1596 return false;
1597
1598 if (msr == MSR_IA32_CR_PAT) {
1599 for (i = 0; i < 8; i++)
1600 if (!valid_pat_type((data >> (i * 8)) & 0xff))
1601 return false;
1602 return true;
1603 } else if (msr == MSR_MTRRdefType) {
1604 if (data & ~0xcff)
1605 return false;
1606 return valid_mtrr_type(data & 0xff);
1607 } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) {
1608 for (i = 0; i < 8 ; i++)
1609 if (!valid_mtrr_type((data >> (i * 8)) & 0xff))
1610 return false;
1611 return true;
1612 }
1613
1614
1615 return valid_mtrr_type(data & 0xff);
1616}
1617
1618static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1619{
1620 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
1621
1622 if (!mtrr_valid(vcpu, msr, data))
1623 return 1;
1624
1625 if (msr == MSR_MTRRdefType) {
1626 vcpu->arch.mtrr_state.def_type = data;
1627 vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
1628 } else if (msr == MSR_MTRRfix64K_00000)
1629 p[0] = data;
1630 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
1631 p[1 + msr - MSR_MTRRfix16K_80000] = data;
1632 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
1633 p[3 + msr - MSR_MTRRfix4K_C0000] = data;
1634 else if (msr == MSR_IA32_CR_PAT)
1635 vcpu->arch.pat = data;
1636 else {
1637 int idx, is_mtrr_mask;
1638 u64 *pt;
1639
1640 idx = (msr - 0x200) / 2;
1641 is_mtrr_mask = msr - 0x200 - 2 * idx;
1642 if (!is_mtrr_mask)
1643 pt =
1644 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
1645 else
1646 pt =
1647 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
1648 *pt = data;
1649 }
1650
1651 kvm_mmu_reset_context(vcpu);
1652 return 0;
1653}
1654
1655static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1656{
1657 u64 mcg_cap = vcpu->arch.mcg_cap;
1658 unsigned bank_num = mcg_cap & 0xff;
1659
1660 switch (msr) {
1661 case MSR_IA32_MCG_STATUS:
1662 vcpu->arch.mcg_status = data;
1663 break;
1664 case MSR_IA32_MCG_CTL:
1665 if (!(mcg_cap & MCG_CTL_P))
1666 return 1;
1667 if (data != 0 && data != ~(u64)0)
1668 return -1;
1669 vcpu->arch.mcg_ctl = data;
1670 break;
1671 default:
1672 if (msr >= MSR_IA32_MC0_CTL &&
1673 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
1674 u32 offset = msr - MSR_IA32_MC0_CTL;
1675
1676
1677
1678
1679
1680 if ((offset & 0x3) == 0 &&
1681 data != 0 && (data | (1 << 10)) != ~(u64)0)
1682 return -1;
1683 vcpu->arch.mce_banks[offset] = data;
1684 break;
1685 }
1686 return 1;
1687 }
1688 return 0;
1689}
1690
1691static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
1692{
1693 struct kvm *kvm = vcpu->kvm;
1694 int lm = is_long_mode(vcpu);
1695 u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
1696 : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
1697 u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
1698 : kvm->arch.xen_hvm_config.blob_size_32;
1699 u32 page_num = data & ~PAGE_MASK;
1700 u64 page_addr = data & PAGE_MASK;
1701 u8 *page;
1702 int r;
1703
1704 r = -E2BIG;
1705 if (page_num >= blob_size)
1706 goto out;
1707 r = -ENOMEM;
1708 page = memdup_user(blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE);
1709 if (IS_ERR(page)) {
1710 r = PTR_ERR(page);
1711 goto out;
1712 }
1713 if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE))
1714 goto out_free;
1715 r = 0;
1716out_free:
1717 kfree(page);
1718out:
1719 return r;
1720}
1721
1722static bool kvm_hv_hypercall_enabled(struct kvm *kvm)
1723{
1724 return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
1725}
1726
1727static bool kvm_hv_msr_partition_wide(u32 msr)
1728{
1729 bool r = false;
1730 switch (msr) {
1731 case HV_X64_MSR_GUEST_OS_ID:
1732 case HV_X64_MSR_HYPERCALL:
1733 r = true;
1734 break;
1735 }
1736
1737 return r;
1738}
1739
1740static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1741{
1742 struct kvm *kvm = vcpu->kvm;
1743
1744 switch (msr) {
1745 case HV_X64_MSR_GUEST_OS_ID:
1746 kvm->arch.hv_guest_os_id = data;
1747
1748 if (!kvm->arch.hv_guest_os_id)
1749 kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
1750 break;
1751 case HV_X64_MSR_HYPERCALL: {
1752 u64 gfn;
1753 unsigned long addr;
1754 u8 instructions[4];
1755
1756
1757 if (!kvm->arch.hv_guest_os_id)
1758 break;
1759 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
1760 kvm->arch.hv_hypercall = data;
1761 break;
1762 }
1763 gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
1764 addr = gfn_to_hva(kvm, gfn);
1765 if (kvm_is_error_hva(addr))
1766 return 1;
1767 kvm_x86_ops->patch_hypercall(vcpu, instructions);
1768 ((unsigned char *)instructions)[3] = 0xc3;
1769 if (__copy_to_user((void __user *)addr, instructions, 4))
1770 return 1;
1771 kvm->arch.hv_hypercall = data;
1772 break;
1773 }
1774 default:
1775 vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1776 "data 0x%llx\n", msr, data);
1777 return 1;
1778 }
1779 return 0;
1780}
1781
1782static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1783{
1784 switch (msr) {
1785 case HV_X64_MSR_APIC_ASSIST_PAGE: {
1786 unsigned long addr;
1787
1788 if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
1789 vcpu->arch.hv_vapic = data;
1790 break;
1791 }
1792 addr = gfn_to_hva(vcpu->kvm, data >>
1793 HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT);
1794 if (kvm_is_error_hva(addr))
1795 return 1;
1796 if (__clear_user((void __user *)addr, PAGE_SIZE))
1797 return 1;
1798 vcpu->arch.hv_vapic = data;
1799 break;
1800 }
1801 case HV_X64_MSR_EOI:
1802 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
1803 case HV_X64_MSR_ICR:
1804 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
1805 case HV_X64_MSR_TPR:
1806 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
1807 default:
1808 vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1809 "data 0x%llx\n", msr, data);
1810 return 1;
1811 }
1812
1813 return 0;
1814}
1815
1816static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
1817{
1818 gpa_t gpa = data & ~0x3f;
1819
1820
1821 if (data & 0x3c)
1822 return 1;
1823
1824 vcpu->arch.apf.msr_val = data;
1825
1826 if (!(data & KVM_ASYNC_PF_ENABLED)) {
1827 kvm_clear_async_pf_completion_queue(vcpu);
1828 kvm_async_pf_hash_reset(vcpu);
1829 return 0;
1830 }
1831
1832 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa))
1833 return 1;
1834
1835 vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
1836 kvm_async_pf_wakeup_all(vcpu);
1837 return 0;
1838}
1839
1840static void kvmclock_reset(struct kvm_vcpu *vcpu)
1841{
1842 if (vcpu->arch.time_page) {
1843 kvm_release_page_dirty(vcpu->arch.time_page);
1844 vcpu->arch.time_page = NULL;
1845 }
1846}
1847
1848static void accumulate_steal_time(struct kvm_vcpu *vcpu)
1849{
1850 u64 delta;
1851
1852 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
1853 return;
1854
1855 delta = current->sched_info.run_delay - vcpu->arch.st.last_steal;
1856 vcpu->arch.st.last_steal = current->sched_info.run_delay;
1857 vcpu->arch.st.accum_steal = delta;
1858}
1859
1860static void record_steal_time(struct kvm_vcpu *vcpu)
1861{
1862 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
1863 return;
1864
1865 if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
1866 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
1867 return;
1868
1869 vcpu->arch.st.steal.steal += vcpu->arch.st.accum_steal;
1870 vcpu->arch.st.steal.version += 2;
1871 vcpu->arch.st.accum_steal = 0;
1872
1873 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
1874 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
1875}
1876
1877int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1878{
1879 bool pr = false;
1880 u32 msr = msr_info->index;
1881 u64 data = msr_info->data;
1882
1883 switch (msr) {
1884 case MSR_EFER:
1885 return set_efer(vcpu, data);
1886 case MSR_K7_HWCR:
1887 data &= ~(u64)0x40;
1888 data &= ~(u64)0x100;
1889 data &= ~(u64)0x8;
1890 if (data != 0) {
1891 vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
1892 data);
1893 return 1;
1894 }
1895 break;
1896 case MSR_FAM10H_MMIO_CONF_BASE:
1897 if (data != 0) {
1898 vcpu_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
1899 "0x%llx\n", data);
1900 return 1;
1901 }
1902 break;
1903 case MSR_AMD64_NB_CFG:
1904 break;
1905 case MSR_IA32_DEBUGCTLMSR:
1906 if (!data) {
1907
1908 break;
1909 } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
1910
1911
1912 return 1;
1913 }
1914 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
1915 __func__, data);
1916 break;
1917 case MSR_IA32_UCODE_REV:
1918 case MSR_IA32_UCODE_WRITE:
1919 case MSR_VM_HSAVE_PA:
1920 case MSR_AMD64_PATCH_LOADER:
1921 break;
1922 case 0x200 ... 0x2ff:
1923 return set_msr_mtrr(vcpu, msr, data);
1924 case MSR_IA32_APICBASE:
1925 kvm_set_apic_base(vcpu, data);
1926 break;
1927 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
1928 return kvm_x2apic_msr_write(vcpu, msr, data);
1929 case MSR_IA32_TSCDEADLINE:
1930 kvm_set_lapic_tscdeadline_msr(vcpu, data);
1931 break;
1932 case MSR_IA32_TSC_ADJUST:
1933 if (guest_cpuid_has_tsc_adjust(vcpu)) {
1934 if (!msr_info->host_initiated) {
1935 u64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
1936 kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true);
1937 }
1938 vcpu->arch.ia32_tsc_adjust_msr = data;
1939 }
1940 break;
1941 case MSR_IA32_MISC_ENABLE:
1942 vcpu->arch.ia32_misc_enable_msr = data;
1943 break;
1944 case MSR_KVM_WALL_CLOCK_NEW:
1945 case MSR_KVM_WALL_CLOCK:
1946 vcpu->kvm->arch.wall_clock = data;
1947 kvm_write_wall_clock(vcpu->kvm, data);
1948 break;
1949 case MSR_KVM_SYSTEM_TIME_NEW:
1950 case MSR_KVM_SYSTEM_TIME: {
1951 kvmclock_reset(vcpu);
1952
1953 vcpu->arch.time = data;
1954 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
1955
1956
1957 if (!(data & 1))
1958 break;
1959
1960
1961 vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
1962
1963 vcpu->arch.time_page =
1964 gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
1965
1966 if (is_error_page(vcpu->arch.time_page))
1967 vcpu->arch.time_page = NULL;
1968
1969 break;
1970 }
1971 case MSR_KVM_ASYNC_PF_EN:
1972 if (kvm_pv_enable_async_pf(vcpu, data))
1973 return 1;
1974 break;
1975 case MSR_KVM_STEAL_TIME:
1976
1977 if (unlikely(!sched_info_on()))
1978 return 1;
1979
1980 if (data & KVM_STEAL_RESERVED_MASK)
1981 return 1;
1982
1983 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
1984 data & KVM_STEAL_VALID_BITS))
1985 return 1;
1986
1987 vcpu->arch.st.msr_val = data;
1988
1989 if (!(data & KVM_MSR_ENABLED))
1990 break;
1991
1992 vcpu->arch.st.last_steal = current->sched_info.run_delay;
1993
1994 preempt_disable();
1995 accumulate_steal_time(vcpu);
1996 preempt_enable();
1997
1998 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
1999
2000 break;
2001 case MSR_KVM_PV_EOI_EN:
2002 if (kvm_lapic_enable_pv_eoi(vcpu, data))
2003 return 1;
2004 break;
2005
2006 case MSR_IA32_MCG_CTL:
2007 case MSR_IA32_MCG_STATUS:
2008 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
2009 return set_msr_mce(vcpu, msr, data);
2010
2011
2012
2013
2014
2015
2016
2017
2018 case MSR_K7_EVNTSEL0:
2019 case MSR_K7_EVNTSEL1:
2020 case MSR_K7_EVNTSEL2:
2021 case MSR_K7_EVNTSEL3:
2022 if (data != 0)
2023 vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: "
2024 "0x%x data 0x%llx\n", msr, data);
2025 break;
2026
2027
2028
2029 case MSR_K7_PERFCTR0:
2030 case MSR_K7_PERFCTR1:
2031 case MSR_K7_PERFCTR2:
2032 case MSR_K7_PERFCTR3:
2033 vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: "
2034 "0x%x data 0x%llx\n", msr, data);
2035 break;
2036 case MSR_P6_PERFCTR0:
2037 case MSR_P6_PERFCTR1:
2038 pr = true;
2039 case MSR_P6_EVNTSEL0:
2040 case MSR_P6_EVNTSEL1:
2041 if (kvm_pmu_msr(vcpu, msr))
2042 return kvm_pmu_set_msr(vcpu, msr, data);
2043
2044 if (pr || data != 0)
2045 vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
2046 "0x%x data 0x%llx\n", msr, data);
2047 break;
2048 case MSR_K7_CLK_CTL:
2049
2050
2051
2052
2053
2054
2055
2056
2057 break;
2058 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
2059 if (kvm_hv_msr_partition_wide(msr)) {
2060 int r;
2061 mutex_lock(&vcpu->kvm->lock);
2062 r = set_msr_hyperv_pw(vcpu, msr, data);
2063 mutex_unlock(&vcpu->kvm->lock);
2064 return r;
2065 } else
2066 return set_msr_hyperv(vcpu, msr, data);
2067 break;
2068 case MSR_IA32_BBL_CR_CTL3:
2069
2070
2071
2072 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data);
2073 break;
2074 case MSR_AMD64_OSVW_ID_LENGTH:
2075 if (!guest_cpuid_has_osvw(vcpu))
2076 return 1;
2077 vcpu->arch.osvw.length = data;
2078 break;
2079 case MSR_AMD64_OSVW_STATUS:
2080 if (!guest_cpuid_has_osvw(vcpu))
2081 return 1;
2082 vcpu->arch.osvw.status = data;
2083 break;
2084 default:
2085 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
2086 return xen_hvm_config(vcpu, data);
2087 if (kvm_pmu_msr(vcpu, msr))
2088 return kvm_pmu_set_msr(vcpu, msr, data);
2089 if (!ignore_msrs) {
2090 vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
2091 msr, data);
2092 return 1;
2093 } else {
2094 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n",
2095 msr, data);
2096 break;
2097 }
2098 }
2099 return 0;
2100}
2101EXPORT_SYMBOL_GPL(kvm_set_msr_common);
2102
2103
2104
2105
2106
2107
2108
2109int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2110{
2111 return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
2112}
2113
2114static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2115{
2116 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
2117
2118 if (!msr_mtrr_valid(msr))
2119 return 1;
2120
2121 if (msr == MSR_MTRRdefType)
2122 *pdata = vcpu->arch.mtrr_state.def_type +
2123 (vcpu->arch.mtrr_state.enabled << 10);
2124 else if (msr == MSR_MTRRfix64K_00000)
2125 *pdata = p[0];
2126 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
2127 *pdata = p[1 + msr - MSR_MTRRfix16K_80000];
2128 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
2129 *pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
2130 else if (msr == MSR_IA32_CR_PAT)
2131 *pdata = vcpu->arch.pat;
2132 else {
2133 int idx, is_mtrr_mask;
2134 u64 *pt;
2135
2136 idx = (msr - 0x200) / 2;
2137 is_mtrr_mask = msr - 0x200 - 2 * idx;
2138 if (!is_mtrr_mask)
2139 pt =
2140 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
2141 else
2142 pt =
2143 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
2144 *pdata = *pt;
2145 }
2146
2147 return 0;
2148}
2149
2150static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2151{
2152 u64 data;
2153 u64 mcg_cap = vcpu->arch.mcg_cap;
2154 unsigned bank_num = mcg_cap & 0xff;
2155
2156 switch (msr) {
2157 case MSR_IA32_P5_MC_ADDR:
2158 case MSR_IA32_P5_MC_TYPE:
2159 data = 0;
2160 break;
2161 case MSR_IA32_MCG_CAP:
2162 data = vcpu->arch.mcg_cap;
2163 break;
2164 case MSR_IA32_MCG_CTL:
2165 if (!(mcg_cap & MCG_CTL_P))
2166 return 1;
2167 data = vcpu->arch.mcg_ctl;
2168 break;
2169 case MSR_IA32_MCG_STATUS:
2170 data = vcpu->arch.mcg_status;
2171 break;
2172 default:
2173 if (msr >= MSR_IA32_MC0_CTL &&
2174 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
2175 u32 offset = msr - MSR_IA32_MC0_CTL;
2176 data = vcpu->arch.mce_banks[offset];
2177 break;
2178 }
2179 return 1;
2180 }
2181 *pdata = data;
2182 return 0;
2183}
2184
2185static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2186{
2187 u64 data = 0;
2188 struct kvm *kvm = vcpu->kvm;
2189
2190 switch (msr) {
2191 case HV_X64_MSR_GUEST_OS_ID:
2192 data = kvm->arch.hv_guest_os_id;
2193 break;
2194 case HV_X64_MSR_HYPERCALL:
2195 data = kvm->arch.hv_hypercall;
2196 break;
2197 default:
2198 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
2199 return 1;
2200 }
2201
2202 *pdata = data;
2203 return 0;
2204}
2205
2206static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2207{
2208 u64 data = 0;
2209
2210 switch (msr) {
2211 case HV_X64_MSR_VP_INDEX: {
2212 int r;
2213 struct kvm_vcpu *v;
2214 kvm_for_each_vcpu(r, v, vcpu->kvm)
2215 if (v == vcpu)
2216 data = r;
2217 break;
2218 }
2219 case HV_X64_MSR_EOI:
2220 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
2221 case HV_X64_MSR_ICR:
2222 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
2223 case HV_X64_MSR_TPR:
2224 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
2225 case HV_X64_MSR_APIC_ASSIST_PAGE:
2226 data = vcpu->arch.hv_vapic;
2227 break;
2228 default:
2229 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
2230 return 1;
2231 }
2232 *pdata = data;
2233 return 0;
2234}
2235
2236int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2237{
2238 u64 data;
2239
2240 switch (msr) {
2241 case MSR_IA32_PLATFORM_ID:
2242 case MSR_IA32_EBL_CR_POWERON:
2243 case MSR_IA32_DEBUGCTLMSR:
2244 case MSR_IA32_LASTBRANCHFROMIP:
2245 case MSR_IA32_LASTBRANCHTOIP:
2246 case MSR_IA32_LASTINTFROMIP:
2247 case MSR_IA32_LASTINTTOIP:
2248 case MSR_K8_SYSCFG:
2249 case MSR_K7_HWCR:
2250 case MSR_VM_HSAVE_PA:
2251 case MSR_K7_EVNTSEL0:
2252 case MSR_K7_PERFCTR0:
2253 case MSR_K8_INT_PENDING_MSG:
2254 case MSR_AMD64_NB_CFG:
2255 case MSR_FAM10H_MMIO_CONF_BASE:
2256 data = 0;
2257 break;
2258 case MSR_P6_PERFCTR0:
2259 case MSR_P6_PERFCTR1:
2260 case MSR_P6_EVNTSEL0:
2261 case MSR_P6_EVNTSEL1:
2262 if (kvm_pmu_msr(vcpu, msr))
2263 return kvm_pmu_get_msr(vcpu, msr, pdata);
2264 data = 0;
2265 break;
2266 case MSR_IA32_UCODE_REV:
2267 data = 0x100000000ULL;
2268 break;
2269 case MSR_MTRRcap:
2270 data = 0x500 | KVM_NR_VAR_MTRR;
2271 break;
2272 case 0x200 ... 0x2ff:
2273 return get_msr_mtrr(vcpu, msr, pdata);
2274 case 0xcd:
2275 data = 3;
2276 break;
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288 case MSR_EBC_FREQUENCY_ID:
2289 data = 1 << 24;
2290 break;
2291 case MSR_IA32_APICBASE:
2292 data = kvm_get_apic_base(vcpu);
2293 break;
2294 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
2295 return kvm_x2apic_msr_read(vcpu, msr, pdata);
2296 break;
2297 case MSR_IA32_TSCDEADLINE:
2298 data = kvm_get_lapic_tscdeadline_msr(vcpu);
2299 break;
2300 case MSR_IA32_TSC_ADJUST:
2301 data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
2302 break;
2303 case MSR_IA32_MISC_ENABLE:
2304 data = vcpu->arch.ia32_misc_enable_msr;
2305 break;
2306 case MSR_IA32_PERF_STATUS:
2307
2308 data = 1000ULL;
2309
2310 data |= (((uint64_t)4ULL) << 40);
2311 break;
2312 case MSR_EFER:
2313 data = vcpu->arch.efer;
2314 break;
2315 case MSR_KVM_WALL_CLOCK:
2316 case MSR_KVM_WALL_CLOCK_NEW:
2317 data = vcpu->kvm->arch.wall_clock;
2318 break;
2319 case MSR_KVM_SYSTEM_TIME:
2320 case MSR_KVM_SYSTEM_TIME_NEW:
2321 data = vcpu->arch.time;
2322 break;
2323 case MSR_KVM_ASYNC_PF_EN:
2324 data = vcpu->arch.apf.msr_val;
2325 break;
2326 case MSR_KVM_STEAL_TIME:
2327 data = vcpu->arch.st.msr_val;
2328 break;
2329 case MSR_KVM_PV_EOI_EN:
2330 data = vcpu->arch.pv_eoi.msr_val;
2331 break;
2332 case MSR_IA32_P5_MC_ADDR:
2333 case MSR_IA32_P5_MC_TYPE:
2334 case MSR_IA32_MCG_CAP:
2335 case MSR_IA32_MCG_CTL:
2336 case MSR_IA32_MCG_STATUS:
2337 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
2338 return get_msr_mce(vcpu, msr, pdata);
2339 case MSR_K7_CLK_CTL:
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349 data = 0x20000000;
2350 break;
2351 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
2352 if (kvm_hv_msr_partition_wide(msr)) {
2353 int r;
2354 mutex_lock(&vcpu->kvm->lock);
2355 r = get_msr_hyperv_pw(vcpu, msr, pdata);
2356 mutex_unlock(&vcpu->kvm->lock);
2357 return r;
2358 } else
2359 return get_msr_hyperv(vcpu, msr, pdata);
2360 break;
2361 case MSR_IA32_BBL_CR_CTL3:
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372 data = 0xbe702111;
2373 break;
2374 case MSR_AMD64_OSVW_ID_LENGTH:
2375 if (!guest_cpuid_has_osvw(vcpu))
2376 return 1;
2377 data = vcpu->arch.osvw.length;
2378 break;
2379 case MSR_AMD64_OSVW_STATUS:
2380 if (!guest_cpuid_has_osvw(vcpu))
2381 return 1;
2382 data = vcpu->arch.osvw.status;
2383 break;
2384 default:
2385 if (kvm_pmu_msr(vcpu, msr))
2386 return kvm_pmu_get_msr(vcpu, msr, pdata);
2387 if (!ignore_msrs) {
2388 vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
2389 return 1;
2390 } else {
2391 vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr);
2392 data = 0;
2393 }
2394 break;
2395 }
2396 *pdata = data;
2397 return 0;
2398}
2399EXPORT_SYMBOL_GPL(kvm_get_msr_common);
2400
2401
2402
2403
2404
2405
2406static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
2407 struct kvm_msr_entry *entries,
2408 int (*do_msr)(struct kvm_vcpu *vcpu,
2409 unsigned index, u64 *data))
2410{
2411 int i, idx;
2412
2413 idx = srcu_read_lock(&vcpu->kvm->srcu);
2414 for (i = 0; i < msrs->nmsrs; ++i)
2415 if (do_msr(vcpu, entries[i].index, &entries[i].data))
2416 break;
2417 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2418
2419 return i;
2420}
2421
2422
2423
2424
2425
2426
2427static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
2428 int (*do_msr)(struct kvm_vcpu *vcpu,
2429 unsigned index, u64 *data),
2430 int writeback)
2431{
2432 struct kvm_msrs msrs;
2433 struct kvm_msr_entry *entries;
2434 int r, n;
2435 unsigned size;
2436
2437 r = -EFAULT;
2438 if (copy_from_user(&msrs, user_msrs, sizeof msrs))
2439 goto out;
2440
2441 r = -E2BIG;
2442 if (msrs.nmsrs >= MAX_IO_MSRS)
2443 goto out;
2444
2445 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
2446 entries = memdup_user(user_msrs->entries, size);
2447 if (IS_ERR(entries)) {
2448 r = PTR_ERR(entries);
2449 goto out;
2450 }
2451
2452 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
2453 if (r < 0)
2454 goto out_free;
2455
2456 r = -EFAULT;
2457 if (writeback && copy_to_user(user_msrs->entries, entries, size))
2458 goto out_free;
2459
2460 r = n;
2461
2462out_free:
2463 kfree(entries);
2464out:
2465 return r;
2466}
2467
2468int kvm_dev_ioctl_check_extension(long ext)
2469{
2470 int r;
2471
2472 switch (ext) {
2473 case KVM_CAP_IRQCHIP:
2474 case KVM_CAP_HLT:
2475 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
2476 case KVM_CAP_SET_TSS_ADDR:
2477 case KVM_CAP_EXT_CPUID:
2478 case KVM_CAP_CLOCKSOURCE:
2479 case KVM_CAP_PIT:
2480 case KVM_CAP_NOP_IO_DELAY:
2481 case KVM_CAP_MP_STATE:
2482 case KVM_CAP_SYNC_MMU:
2483 case KVM_CAP_USER_NMI:
2484 case KVM_CAP_REINJECT_CONTROL:
2485 case KVM_CAP_IRQ_INJECT_STATUS:
2486 case KVM_CAP_ASSIGN_DEV_IRQ:
2487 case KVM_CAP_IRQFD:
2488 case KVM_CAP_IOEVENTFD:
2489 case KVM_CAP_PIT2:
2490 case KVM_CAP_PIT_STATE2:
2491 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
2492 case KVM_CAP_XEN_HVM:
2493 case KVM_CAP_ADJUST_CLOCK:
2494 case KVM_CAP_VCPU_EVENTS:
2495 case KVM_CAP_HYPERV:
2496 case KVM_CAP_HYPERV_VAPIC:
2497 case KVM_CAP_HYPERV_SPIN:
2498 case KVM_CAP_PCI_SEGMENT:
2499 case KVM_CAP_DEBUGREGS:
2500 case KVM_CAP_X86_ROBUST_SINGLESTEP:
2501 case KVM_CAP_XSAVE:
2502 case KVM_CAP_ASYNC_PF:
2503 case KVM_CAP_GET_TSC_KHZ:
2504 case KVM_CAP_PCI_2_3:
2505 case KVM_CAP_KVMCLOCK_CTRL:
2506 case KVM_CAP_READONLY_MEM:
2507 case KVM_CAP_IRQFD_RESAMPLE:
2508 r = 1;
2509 break;
2510 case KVM_CAP_COALESCED_MMIO:
2511 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
2512 break;
2513 case KVM_CAP_VAPIC:
2514 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
2515 break;
2516 case KVM_CAP_NR_VCPUS:
2517 r = KVM_SOFT_MAX_VCPUS;
2518 break;
2519 case KVM_CAP_MAX_VCPUS:
2520 r = KVM_MAX_VCPUS;
2521 break;
2522 case KVM_CAP_NR_MEMSLOTS:
2523 r = KVM_MEMORY_SLOTS;
2524 break;
2525 case KVM_CAP_PV_MMU:
2526 r = 0;
2527 break;
2528 case KVM_CAP_IOMMU:
2529 r = iommu_present(&pci_bus_type);
2530 break;
2531 case KVM_CAP_MCE:
2532 r = KVM_MAX_MCE_BANKS;
2533 break;
2534 case KVM_CAP_XCRS:
2535 r = cpu_has_xsave;
2536 break;
2537 case KVM_CAP_TSC_CONTROL:
2538 r = kvm_has_tsc_control;
2539 break;
2540 case KVM_CAP_TSC_DEADLINE_TIMER:
2541 r = boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER);
2542 break;
2543 default:
2544 r = 0;
2545 break;
2546 }
2547 return r;
2548
2549}
2550
2551long kvm_arch_dev_ioctl(struct file *filp,
2552 unsigned int ioctl, unsigned long arg)
2553{
2554 void __user *argp = (void __user *)arg;
2555 long r;
2556
2557 switch (ioctl) {
2558 case KVM_GET_MSR_INDEX_LIST: {
2559 struct kvm_msr_list __user *user_msr_list = argp;
2560 struct kvm_msr_list msr_list;
2561 unsigned n;
2562
2563 r = -EFAULT;
2564 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
2565 goto out;
2566 n = msr_list.nmsrs;
2567 msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
2568 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
2569 goto out;
2570 r = -E2BIG;
2571 if (n < msr_list.nmsrs)
2572 goto out;
2573 r = -EFAULT;
2574 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
2575 num_msrs_to_save * sizeof(u32)))
2576 goto out;
2577 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
2578 &emulated_msrs,
2579 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
2580 goto out;
2581 r = 0;
2582 break;
2583 }
2584 case KVM_GET_SUPPORTED_CPUID: {
2585 struct kvm_cpuid2 __user *cpuid_arg = argp;
2586 struct kvm_cpuid2 cpuid;
2587
2588 r = -EFAULT;
2589 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2590 goto out;
2591 r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
2592 cpuid_arg->entries);
2593 if (r)
2594 goto out;
2595
2596 r = -EFAULT;
2597 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
2598 goto out;
2599 r = 0;
2600 break;
2601 }
2602 case KVM_X86_GET_MCE_CAP_SUPPORTED: {
2603 u64 mce_cap;
2604
2605 mce_cap = KVM_MCE_CAP_SUPPORTED;
2606 r = -EFAULT;
2607 if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
2608 goto out;
2609 r = 0;
2610 break;
2611 }
2612 default:
2613 r = -EINVAL;
2614 }
2615out:
2616 return r;
2617}
2618
2619static void wbinvd_ipi(void *garbage)
2620{
2621 wbinvd();
2622}
2623
2624static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
2625{
2626 return vcpu->kvm->arch.iommu_domain &&
2627 !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY);
2628}
2629
2630void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2631{
2632
2633 if (need_emulate_wbinvd(vcpu)) {
2634 if (kvm_x86_ops->has_wbinvd_exit())
2635 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
2636 else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
2637 smp_call_function_single(vcpu->cpu,
2638 wbinvd_ipi, NULL, 1);
2639 }
2640
2641 kvm_x86_ops->vcpu_load(vcpu, cpu);
2642
2643
2644 if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
2645 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
2646 vcpu->arch.tsc_offset_adjustment = 0;
2647 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
2648 }
2649
2650 if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
2651 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
2652 native_read_tsc() - vcpu->arch.last_host_tsc;
2653 if (tsc_delta < 0)
2654 mark_tsc_unstable("KVM discovered backwards TSC");
2655 if (check_tsc_unstable()) {
2656 u64 offset = kvm_x86_ops->compute_tsc_offset(vcpu,
2657 vcpu->arch.last_guest_tsc);
2658 kvm_x86_ops->write_tsc_offset(vcpu, offset);
2659 vcpu->arch.tsc_catchup = 1;
2660 }
2661
2662
2663
2664
2665 if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
2666 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2667 if (vcpu->cpu != cpu)
2668 kvm_migrate_timers(vcpu);
2669 vcpu->cpu = cpu;
2670 }
2671
2672 accumulate_steal_time(vcpu);
2673 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
2674}
2675
2676void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2677{
2678 kvm_x86_ops->vcpu_put(vcpu);
2679 kvm_put_guest_fpu(vcpu);
2680 vcpu->arch.last_host_tsc = native_read_tsc();
2681}
2682
2683static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
2684 struct kvm_lapic_state *s)
2685{
2686 memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
2687
2688 return 0;
2689}
2690
2691static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
2692 struct kvm_lapic_state *s)
2693{
2694 kvm_apic_post_state_restore(vcpu, s);
2695 update_cr8_intercept(vcpu);
2696
2697 return 0;
2698}
2699
2700static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
2701 struct kvm_interrupt *irq)
2702{
2703 if (irq->irq < 0 || irq->irq >= KVM_NR_INTERRUPTS)
2704 return -EINVAL;
2705 if (irqchip_in_kernel(vcpu->kvm))
2706 return -ENXIO;
2707
2708 kvm_queue_interrupt(vcpu, irq->irq, false);
2709 kvm_make_request(KVM_REQ_EVENT, vcpu);
2710
2711 return 0;
2712}
2713
2714static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
2715{
2716 kvm_inject_nmi(vcpu);
2717
2718 return 0;
2719}
2720
2721static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
2722 struct kvm_tpr_access_ctl *tac)
2723{
2724 if (tac->flags)
2725 return -EINVAL;
2726 vcpu->arch.tpr_access_reporting = !!tac->enabled;
2727 return 0;
2728}
2729
2730static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
2731 u64 mcg_cap)
2732{
2733 int r;
2734 unsigned bank_num = mcg_cap & 0xff, bank;
2735
2736 r = -EINVAL;
2737 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
2738 goto out;
2739 if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
2740 goto out;
2741 r = 0;
2742 vcpu->arch.mcg_cap = mcg_cap;
2743
2744 if (mcg_cap & MCG_CTL_P)
2745 vcpu->arch.mcg_ctl = ~(u64)0;
2746
2747 for (bank = 0; bank < bank_num; bank++)
2748 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
2749out:
2750 return r;
2751}
2752
2753static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
2754 struct kvm_x86_mce *mce)
2755{
2756 u64 mcg_cap = vcpu->arch.mcg_cap;
2757 unsigned bank_num = mcg_cap & 0xff;
2758 u64 *banks = vcpu->arch.mce_banks;
2759
2760 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
2761 return -EINVAL;
2762
2763
2764
2765
2766 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
2767 vcpu->arch.mcg_ctl != ~(u64)0)
2768 return 0;
2769 banks += 4 * mce->bank;
2770
2771
2772
2773
2774 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
2775 return 0;
2776 if (mce->status & MCI_STATUS_UC) {
2777 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
2778 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
2779 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
2780 return 0;
2781 }
2782 if (banks[1] & MCI_STATUS_VAL)
2783 mce->status |= MCI_STATUS_OVER;
2784 banks[2] = mce->addr;
2785 banks[3] = mce->misc;
2786 vcpu->arch.mcg_status = mce->mcg_status;
2787 banks[1] = mce->status;
2788 kvm_queue_exception(vcpu, MC_VECTOR);
2789 } else if (!(banks[1] & MCI_STATUS_VAL)
2790 || !(banks[1] & MCI_STATUS_UC)) {
2791 if (banks[1] & MCI_STATUS_VAL)
2792 mce->status |= MCI_STATUS_OVER;
2793 banks[2] = mce->addr;
2794 banks[3] = mce->misc;
2795 banks[1] = mce->status;
2796 } else
2797 banks[1] |= MCI_STATUS_OVER;
2798 return 0;
2799}
2800
2801static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
2802 struct kvm_vcpu_events *events)
2803{
2804 process_nmi(vcpu);
2805 events->exception.injected =
2806 vcpu->arch.exception.pending &&
2807 !kvm_exception_is_soft(vcpu->arch.exception.nr);
2808 events->exception.nr = vcpu->arch.exception.nr;
2809 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
2810 events->exception.pad = 0;
2811 events->exception.error_code = vcpu->arch.exception.error_code;
2812
2813 events->interrupt.injected =
2814 vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft;
2815 events->interrupt.nr = vcpu->arch.interrupt.nr;
2816 events->interrupt.soft = 0;
2817 events->interrupt.shadow =
2818 kvm_x86_ops->get_interrupt_shadow(vcpu,
2819 KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI);
2820
2821 events->nmi.injected = vcpu->arch.nmi_injected;
2822 events->nmi.pending = vcpu->arch.nmi_pending != 0;
2823 events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
2824 events->nmi.pad = 0;
2825
2826 events->sipi_vector = vcpu->arch.sipi_vector;
2827
2828 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
2829 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
2830 | KVM_VCPUEVENT_VALID_SHADOW);
2831 memset(&events->reserved, 0, sizeof(events->reserved));
2832}
2833
2834static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2835 struct kvm_vcpu_events *events)
2836{
2837 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
2838 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
2839 | KVM_VCPUEVENT_VALID_SHADOW))
2840 return -EINVAL;
2841
2842 process_nmi(vcpu);
2843 vcpu->arch.exception.pending = events->exception.injected;
2844 vcpu->arch.exception.nr = events->exception.nr;
2845 vcpu->arch.exception.has_error_code = events->exception.has_error_code;
2846 vcpu->arch.exception.error_code = events->exception.error_code;
2847
2848 vcpu->arch.interrupt.pending = events->interrupt.injected;
2849 vcpu->arch.interrupt.nr = events->interrupt.nr;
2850 vcpu->arch.interrupt.soft = events->interrupt.soft;
2851 if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
2852 kvm_x86_ops->set_interrupt_shadow(vcpu,
2853 events->interrupt.shadow);
2854
2855 vcpu->arch.nmi_injected = events->nmi.injected;
2856 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
2857 vcpu->arch.nmi_pending = events->nmi.pending;
2858 kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
2859
2860 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR)
2861 vcpu->arch.sipi_vector = events->sipi_vector;
2862
2863 kvm_make_request(KVM_REQ_EVENT, vcpu);
2864
2865 return 0;
2866}
2867
2868static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
2869 struct kvm_debugregs *dbgregs)
2870{
2871 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
2872 dbgregs->dr6 = vcpu->arch.dr6;
2873 dbgregs->dr7 = vcpu->arch.dr7;
2874 dbgregs->flags = 0;
2875 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
2876}
2877
2878static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
2879 struct kvm_debugregs *dbgregs)
2880{
2881 if (dbgregs->flags)
2882 return -EINVAL;
2883
2884 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
2885 vcpu->arch.dr6 = dbgregs->dr6;
2886 vcpu->arch.dr7 = dbgregs->dr7;
2887
2888 return 0;
2889}
2890
2891static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
2892 struct kvm_xsave *guest_xsave)
2893{
2894 if (cpu_has_xsave)
2895 memcpy(guest_xsave->region,
2896 &vcpu->arch.guest_fpu.state->xsave,
2897 xstate_size);
2898 else {
2899 memcpy(guest_xsave->region,
2900 &vcpu->arch.guest_fpu.state->fxsave,
2901 sizeof(struct i387_fxsave_struct));
2902 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
2903 XSTATE_FPSSE;
2904 }
2905}
2906
2907static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
2908 struct kvm_xsave *guest_xsave)
2909{
2910 u64 xstate_bv =
2911 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
2912
2913 if (cpu_has_xsave)
2914 memcpy(&vcpu->arch.guest_fpu.state->xsave,
2915 guest_xsave->region, xstate_size);
2916 else {
2917 if (xstate_bv & ~XSTATE_FPSSE)
2918 return -EINVAL;
2919 memcpy(&vcpu->arch.guest_fpu.state->fxsave,
2920 guest_xsave->region, sizeof(struct i387_fxsave_struct));
2921 }
2922 return 0;
2923}
2924
2925static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
2926 struct kvm_xcrs *guest_xcrs)
2927{
2928 if (!cpu_has_xsave) {
2929 guest_xcrs->nr_xcrs = 0;
2930 return;
2931 }
2932
2933 guest_xcrs->nr_xcrs = 1;
2934 guest_xcrs->flags = 0;
2935 guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
2936 guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
2937}
2938
2939static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
2940 struct kvm_xcrs *guest_xcrs)
2941{
2942 int i, r = 0;
2943
2944 if (!cpu_has_xsave)
2945 return -EINVAL;
2946
2947 if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
2948 return -EINVAL;
2949
2950 for (i = 0; i < guest_xcrs->nr_xcrs; i++)
2951
2952 if (guest_xcrs->xcrs[0].xcr == XCR_XFEATURE_ENABLED_MASK) {
2953 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
2954 guest_xcrs->xcrs[0].value);
2955 break;
2956 }
2957 if (r)
2958 r = -EINVAL;
2959 return r;
2960}
2961
2962
2963
2964
2965
2966
2967
2968static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
2969{
2970 if (!vcpu->arch.time_page)
2971 return -EINVAL;
2972 vcpu->arch.pvclock_set_guest_stopped_request = true;
2973 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2974 return 0;
2975}
2976
2977long kvm_arch_vcpu_ioctl(struct file *filp,
2978 unsigned int ioctl, unsigned long arg)
2979{
2980 struct kvm_vcpu *vcpu = filp->private_data;
2981 void __user *argp = (void __user *)arg;
2982 int r;
2983 union {
2984 struct kvm_lapic_state *lapic;
2985 struct kvm_xsave *xsave;
2986 struct kvm_xcrs *xcrs;
2987 void *buffer;
2988 } u;
2989
2990 u.buffer = NULL;
2991 switch (ioctl) {
2992 case KVM_GET_LAPIC: {
2993 r = -EINVAL;
2994 if (!vcpu->arch.apic)
2995 goto out;
2996 u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
2997
2998 r = -ENOMEM;
2999 if (!u.lapic)
3000 goto out;
3001 r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);
3002 if (r)
3003 goto out;
3004 r = -EFAULT;
3005 if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))
3006 goto out;
3007 r = 0;
3008 break;
3009 }
3010 case KVM_SET_LAPIC: {
3011 r = -EINVAL;
3012 if (!vcpu->arch.apic)
3013 goto out;
3014 u.lapic = memdup_user(argp, sizeof(*u.lapic));
3015 if (IS_ERR(u.lapic))
3016 return PTR_ERR(u.lapic);
3017
3018 r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
3019 break;
3020 }
3021 case KVM_INTERRUPT: {
3022 struct kvm_interrupt irq;
3023
3024 r = -EFAULT;
3025 if (copy_from_user(&irq, argp, sizeof irq))
3026 goto out;
3027 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
3028 break;
3029 }
3030 case KVM_NMI: {
3031 r = kvm_vcpu_ioctl_nmi(vcpu);
3032 break;
3033 }
3034 case KVM_SET_CPUID: {
3035 struct kvm_cpuid __user *cpuid_arg = argp;
3036 struct kvm_cpuid cpuid;
3037
3038 r = -EFAULT;
3039 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3040 goto out;
3041 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
3042 break;
3043 }
3044 case KVM_SET_CPUID2: {
3045 struct kvm_cpuid2 __user *cpuid_arg = argp;
3046 struct kvm_cpuid2 cpuid;
3047
3048 r = -EFAULT;
3049 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3050 goto out;
3051 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
3052 cpuid_arg->entries);
3053 break;
3054 }
3055 case KVM_GET_CPUID2: {
3056 struct kvm_cpuid2 __user *cpuid_arg = argp;
3057 struct kvm_cpuid2 cpuid;
3058
3059 r = -EFAULT;
3060 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3061 goto out;
3062 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
3063 cpuid_arg->entries);
3064 if (r)
3065 goto out;
3066 r = -EFAULT;
3067 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
3068 goto out;
3069 r = 0;
3070 break;
3071 }
3072 case KVM_GET_MSRS:
3073 r = msr_io(vcpu, argp, kvm_get_msr, 1);
3074 break;
3075 case KVM_SET_MSRS:
3076 r = msr_io(vcpu, argp, do_set_msr, 0);
3077 break;
3078 case KVM_TPR_ACCESS_REPORTING: {
3079 struct kvm_tpr_access_ctl tac;
3080
3081 r = -EFAULT;
3082 if (copy_from_user(&tac, argp, sizeof tac))
3083 goto out;
3084 r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
3085 if (r)
3086 goto out;
3087 r = -EFAULT;
3088 if (copy_to_user(argp, &tac, sizeof tac))
3089 goto out;
3090 r = 0;
3091 break;
3092 };
3093 case KVM_SET_VAPIC_ADDR: {
3094 struct kvm_vapic_addr va;
3095
3096 r = -EINVAL;
3097 if (!irqchip_in_kernel(vcpu->kvm))
3098 goto out;
3099 r = -EFAULT;
3100 if (copy_from_user(&va, argp, sizeof va))
3101 goto out;
3102 r = 0;
3103 kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
3104 break;
3105 }
3106 case KVM_X86_SETUP_MCE: {
3107 u64 mcg_cap;
3108
3109 r = -EFAULT;
3110 if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
3111 goto out;
3112 r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
3113 break;
3114 }
3115 case KVM_X86_SET_MCE: {
3116 struct kvm_x86_mce mce;
3117
3118 r = -EFAULT;
3119 if (copy_from_user(&mce, argp, sizeof mce))
3120 goto out;
3121 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
3122 break;
3123 }
3124 case KVM_GET_VCPU_EVENTS: {
3125 struct kvm_vcpu_events events;
3126
3127 kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
3128
3129 r = -EFAULT;
3130 if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
3131 break;
3132 r = 0;
3133 break;
3134 }
3135 case KVM_SET_VCPU_EVENTS: {
3136 struct kvm_vcpu_events events;
3137
3138 r = -EFAULT;
3139 if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
3140 break;
3141
3142 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
3143 break;
3144 }
3145 case KVM_GET_DEBUGREGS: {
3146 struct kvm_debugregs dbgregs;
3147
3148 kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
3149
3150 r = -EFAULT;
3151 if (copy_to_user(argp, &dbgregs,
3152 sizeof(struct kvm_debugregs)))
3153 break;
3154 r = 0;
3155 break;
3156 }
3157 case KVM_SET_DEBUGREGS: {
3158 struct kvm_debugregs dbgregs;
3159
3160 r = -EFAULT;
3161 if (copy_from_user(&dbgregs, argp,
3162 sizeof(struct kvm_debugregs)))
3163 break;
3164
3165 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
3166 break;
3167 }
3168 case KVM_GET_XSAVE: {
3169 u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
3170 r = -ENOMEM;
3171 if (!u.xsave)
3172 break;
3173
3174 kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
3175
3176 r = -EFAULT;
3177 if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
3178 break;
3179 r = 0;
3180 break;
3181 }
3182 case KVM_SET_XSAVE: {
3183 u.xsave = memdup_user(argp, sizeof(*u.xsave));
3184 if (IS_ERR(u.xsave))
3185 return PTR_ERR(u.xsave);
3186
3187 r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
3188 break;
3189 }
3190 case KVM_GET_XCRS: {
3191 u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
3192 r = -ENOMEM;
3193 if (!u.xcrs)
3194 break;
3195
3196 kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
3197
3198 r = -EFAULT;
3199 if (copy_to_user(argp, u.xcrs,
3200 sizeof(struct kvm_xcrs)))
3201 break;
3202 r = 0;
3203 break;
3204 }
3205 case KVM_SET_XCRS: {
3206 u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
3207 if (IS_ERR(u.xcrs))
3208 return PTR_ERR(u.xcrs);
3209
3210 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
3211 break;
3212 }
3213 case KVM_SET_TSC_KHZ: {
3214 u32 user_tsc_khz;
3215
3216 r = -EINVAL;
3217 user_tsc_khz = (u32)arg;
3218
3219 if (user_tsc_khz >= kvm_max_guest_tsc_khz)
3220 goto out;
3221
3222 if (user_tsc_khz == 0)
3223 user_tsc_khz = tsc_khz;
3224
3225 kvm_set_tsc_khz(vcpu, user_tsc_khz);
3226
3227 r = 0;
3228 goto out;
3229 }
3230 case KVM_GET_TSC_KHZ: {
3231 r = vcpu->arch.virtual_tsc_khz;
3232 goto out;
3233 }
3234 case KVM_KVMCLOCK_CTRL: {
3235 r = kvm_set_guest_paused(vcpu);
3236 goto out;
3237 }
3238 default:
3239 r = -EINVAL;
3240 }
3241out:
3242 kfree(u.buffer);
3243 return r;
3244}
3245
3246int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3247{
3248 return VM_FAULT_SIGBUS;
3249}
3250
3251static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
3252{
3253 int ret;
3254
3255 if (addr > (unsigned int)(-3 * PAGE_SIZE))
3256 return -EINVAL;
3257 ret = kvm_x86_ops->set_tss_addr(kvm, addr);
3258 return ret;
3259}
3260
3261static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
3262 u64 ident_addr)
3263{
3264 kvm->arch.ept_identity_map_addr = ident_addr;
3265 return 0;
3266}
3267
3268static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
3269 u32 kvm_nr_mmu_pages)
3270{
3271 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
3272 return -EINVAL;
3273
3274 mutex_lock(&kvm->slots_lock);
3275 spin_lock(&kvm->mmu_lock);
3276
3277 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
3278 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
3279
3280 spin_unlock(&kvm->mmu_lock);
3281 mutex_unlock(&kvm->slots_lock);
3282 return 0;
3283}
3284
3285static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
3286{
3287 return kvm->arch.n_max_mmu_pages;
3288}
3289
3290static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
3291{
3292 int r;
3293
3294 r = 0;
3295 switch (chip->chip_id) {
3296 case KVM_IRQCHIP_PIC_MASTER:
3297 memcpy(&chip->chip.pic,
3298 &pic_irqchip(kvm)->pics[0],
3299 sizeof(struct kvm_pic_state));
3300 break;
3301 case KVM_IRQCHIP_PIC_SLAVE:
3302 memcpy(&chip->chip.pic,
3303 &pic_irqchip(kvm)->pics[1],
3304 sizeof(struct kvm_pic_state));
3305 break;
3306 case KVM_IRQCHIP_IOAPIC:
3307 r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
3308 break;
3309 default:
3310 r = -EINVAL;
3311 break;
3312 }
3313 return r;
3314}
3315
3316static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
3317{
3318 int r;
3319
3320 r = 0;
3321 switch (chip->chip_id) {
3322 case KVM_IRQCHIP_PIC_MASTER:
3323 spin_lock(&pic_irqchip(kvm)->lock);
3324 memcpy(&pic_irqchip(kvm)->pics[0],
3325 &chip->chip.pic,
3326 sizeof(struct kvm_pic_state));
3327 spin_unlock(&pic_irqchip(kvm)->lock);
3328 break;
3329 case KVM_IRQCHIP_PIC_SLAVE:
3330 spin_lock(&pic_irqchip(kvm)->lock);
3331 memcpy(&pic_irqchip(kvm)->pics[1],
3332 &chip->chip.pic,
3333 sizeof(struct kvm_pic_state));
3334 spin_unlock(&pic_irqchip(kvm)->lock);
3335 break;
3336 case KVM_IRQCHIP_IOAPIC:
3337 r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
3338 break;
3339 default:
3340 r = -EINVAL;
3341 break;
3342 }
3343 kvm_pic_update_irq(pic_irqchip(kvm));
3344 return r;
3345}
3346
3347static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
3348{
3349 int r = 0;
3350
3351 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3352 memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
3353 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3354 return r;
3355}
3356
3357static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
3358{
3359 int r = 0;
3360
3361 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3362 memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
3363 kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
3364 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3365 return r;
3366}
3367
3368static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
3369{
3370 int r = 0;
3371
3372 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3373 memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
3374 sizeof(ps->channels));
3375 ps->flags = kvm->arch.vpit->pit_state.flags;
3376 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3377 memset(&ps->reserved, 0, sizeof(ps->reserved));
3378 return r;
3379}
3380
3381static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
3382{
3383 int r = 0, start = 0;
3384 u32 prev_legacy, cur_legacy;
3385 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3386 prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
3387 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
3388 if (!prev_legacy && cur_legacy)
3389 start = 1;
3390 memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels,
3391 sizeof(kvm->arch.vpit->pit_state.channels));
3392 kvm->arch.vpit->pit_state.flags = ps->flags;
3393 kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
3394 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3395 return r;
3396}
3397
3398static int kvm_vm_ioctl_reinject(struct kvm *kvm,
3399 struct kvm_reinject_control *control)
3400{
3401 if (!kvm->arch.vpit)
3402 return -ENXIO;
3403 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3404 kvm->arch.vpit->pit_state.reinject = control->pit_reinject;
3405 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3406 return 0;
3407}
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
3429{
3430 int r;
3431 struct kvm_memory_slot *memslot;
3432 unsigned long n, i;
3433 unsigned long *dirty_bitmap;
3434 unsigned long *dirty_bitmap_buffer;
3435 bool is_dirty = false;
3436
3437 mutex_lock(&kvm->slots_lock);
3438
3439 r = -EINVAL;
3440 if (log->slot >= KVM_MEMORY_SLOTS)
3441 goto out;
3442
3443 memslot = id_to_memslot(kvm->memslots, log->slot);
3444
3445 dirty_bitmap = memslot->dirty_bitmap;
3446 r = -ENOENT;
3447 if (!dirty_bitmap)
3448 goto out;
3449
3450 n = kvm_dirty_bitmap_bytes(memslot);
3451
3452 dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
3453 memset(dirty_bitmap_buffer, 0, n);
3454
3455 spin_lock(&kvm->mmu_lock);
3456
3457 for (i = 0; i < n / sizeof(long); i++) {
3458 unsigned long mask;
3459 gfn_t offset;
3460
3461 if (!dirty_bitmap[i])
3462 continue;
3463
3464 is_dirty = true;
3465
3466 mask = xchg(&dirty_bitmap[i], 0);
3467 dirty_bitmap_buffer[i] = mask;
3468
3469 offset = i * BITS_PER_LONG;
3470 kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
3471 }
3472 if (is_dirty)
3473 kvm_flush_remote_tlbs(kvm);
3474
3475 spin_unlock(&kvm->mmu_lock);
3476
3477 r = -EFAULT;
3478 if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
3479 goto out;
3480
3481 r = 0;
3482out:
3483 mutex_unlock(&kvm->slots_lock);
3484 return r;
3485}
3486
3487int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event)
3488{
3489 if (!irqchip_in_kernel(kvm))
3490 return -ENXIO;
3491
3492 irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
3493 irq_event->irq, irq_event->level);
3494 return 0;
3495}
3496
3497long kvm_arch_vm_ioctl(struct file *filp,
3498 unsigned int ioctl, unsigned long arg)
3499{
3500 struct kvm *kvm = filp->private_data;
3501 void __user *argp = (void __user *)arg;
3502 int r = -ENOTTY;
3503
3504
3505
3506
3507
3508 union {
3509 struct kvm_pit_state ps;
3510 struct kvm_pit_state2 ps2;
3511 struct kvm_pit_config pit_config;
3512 } u;
3513
3514 switch (ioctl) {
3515 case KVM_SET_TSS_ADDR:
3516 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
3517 break;
3518 case KVM_SET_IDENTITY_MAP_ADDR: {
3519 u64 ident_addr;
3520
3521 r = -EFAULT;
3522 if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
3523 goto out;
3524 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
3525 break;
3526 }
3527 case KVM_SET_NR_MMU_PAGES:
3528 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
3529 break;
3530 case KVM_GET_NR_MMU_PAGES:
3531 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
3532 break;
3533 case KVM_CREATE_IRQCHIP: {
3534 struct kvm_pic *vpic;
3535
3536 mutex_lock(&kvm->lock);
3537 r = -EEXIST;
3538 if (kvm->arch.vpic)
3539 goto create_irqchip_unlock;
3540 r = -EINVAL;
3541 if (atomic_read(&kvm->online_vcpus))
3542 goto create_irqchip_unlock;
3543 r = -ENOMEM;
3544 vpic = kvm_create_pic(kvm);
3545 if (vpic) {
3546 r = kvm_ioapic_init(kvm);
3547 if (r) {
3548 mutex_lock(&kvm->slots_lock);
3549 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
3550 &vpic->dev_master);
3551 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
3552 &vpic->dev_slave);
3553 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
3554 &vpic->dev_eclr);
3555 mutex_unlock(&kvm->slots_lock);
3556 kfree(vpic);
3557 goto create_irqchip_unlock;
3558 }
3559 } else
3560 goto create_irqchip_unlock;
3561 smp_wmb();
3562 kvm->arch.vpic = vpic;
3563 smp_wmb();
3564 r = kvm_setup_default_irq_routing(kvm);
3565 if (r) {
3566 mutex_lock(&kvm->slots_lock);
3567 mutex_lock(&kvm->irq_lock);
3568 kvm_ioapic_destroy(kvm);
3569 kvm_destroy_pic(kvm);
3570 mutex_unlock(&kvm->irq_lock);
3571 mutex_unlock(&kvm->slots_lock);
3572 }
3573 create_irqchip_unlock:
3574 mutex_unlock(&kvm->lock);
3575 break;
3576 }
3577 case KVM_CREATE_PIT:
3578 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
3579 goto create_pit;
3580 case KVM_CREATE_PIT2:
3581 r = -EFAULT;
3582 if (copy_from_user(&u.pit_config, argp,
3583 sizeof(struct kvm_pit_config)))
3584 goto out;
3585 create_pit:
3586 mutex_lock(&kvm->slots_lock);
3587 r = -EEXIST;
3588 if (kvm->arch.vpit)
3589 goto create_pit_unlock;
3590 r = -ENOMEM;
3591 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
3592 if (kvm->arch.vpit)
3593 r = 0;
3594 create_pit_unlock:
3595 mutex_unlock(&kvm->slots_lock);
3596 break;
3597 case KVM_GET_IRQCHIP: {
3598
3599 struct kvm_irqchip *chip;
3600
3601 chip = memdup_user(argp, sizeof(*chip));
3602 if (IS_ERR(chip)) {
3603 r = PTR_ERR(chip);
3604 goto out;
3605 }
3606
3607 r = -ENXIO;
3608 if (!irqchip_in_kernel(kvm))
3609 goto get_irqchip_out;
3610 r = kvm_vm_ioctl_get_irqchip(kvm, chip);
3611 if (r)
3612 goto get_irqchip_out;
3613 r = -EFAULT;
3614 if (copy_to_user(argp, chip, sizeof *chip))
3615 goto get_irqchip_out;
3616 r = 0;
3617 get_irqchip_out:
3618 kfree(chip);
3619 break;
3620 }
3621 case KVM_SET_IRQCHIP: {
3622
3623 struct kvm_irqchip *chip;
3624
3625 chip = memdup_user(argp, sizeof(*chip));
3626 if (IS_ERR(chip)) {
3627 r = PTR_ERR(chip);
3628 goto out;
3629 }
3630
3631 r = -ENXIO;
3632 if (!irqchip_in_kernel(kvm))
3633 goto set_irqchip_out;
3634 r = kvm_vm_ioctl_set_irqchip(kvm, chip);
3635 if (r)
3636 goto set_irqchip_out;
3637 r = 0;
3638 set_irqchip_out:
3639 kfree(chip);
3640 break;
3641 }
3642 case KVM_GET_PIT: {
3643 r = -EFAULT;
3644 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
3645 goto out;
3646 r = -ENXIO;
3647 if (!kvm->arch.vpit)
3648 goto out;
3649 r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
3650 if (r)
3651 goto out;
3652 r = -EFAULT;
3653 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
3654 goto out;
3655 r = 0;
3656 break;
3657 }
3658 case KVM_SET_PIT: {
3659 r = -EFAULT;
3660 if (copy_from_user(&u.ps, argp, sizeof u.ps))
3661 goto out;
3662 r = -ENXIO;
3663 if (!kvm->arch.vpit)
3664 goto out;
3665 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
3666 break;
3667 }
3668 case KVM_GET_PIT2: {
3669 r = -ENXIO;
3670 if (!kvm->arch.vpit)
3671 goto out;
3672 r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
3673 if (r)
3674 goto out;
3675 r = -EFAULT;
3676 if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
3677 goto out;
3678 r = 0;
3679 break;
3680 }
3681 case KVM_SET_PIT2: {
3682 r = -EFAULT;
3683 if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
3684 goto out;
3685 r = -ENXIO;
3686 if (!kvm->arch.vpit)
3687 goto out;
3688 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
3689 break;
3690 }
3691 case KVM_REINJECT_CONTROL: {
3692 struct kvm_reinject_control control;
3693 r = -EFAULT;
3694 if (copy_from_user(&control, argp, sizeof(control)))
3695 goto out;
3696 r = kvm_vm_ioctl_reinject(kvm, &control);
3697 break;
3698 }
3699 case KVM_XEN_HVM_CONFIG: {
3700 r = -EFAULT;
3701 if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
3702 sizeof(struct kvm_xen_hvm_config)))
3703 goto out;
3704 r = -EINVAL;
3705 if (kvm->arch.xen_hvm_config.flags)
3706 goto out;
3707 r = 0;
3708 break;
3709 }
3710 case KVM_SET_CLOCK: {
3711 struct kvm_clock_data user_ns;
3712 u64 now_ns;
3713 s64 delta;
3714
3715 r = -EFAULT;
3716 if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
3717 goto out;
3718
3719 r = -EINVAL;
3720 if (user_ns.flags)
3721 goto out;
3722
3723 r = 0;
3724 local_irq_disable();
3725 now_ns = get_kernel_ns();
3726 delta = user_ns.clock - now_ns;
3727 local_irq_enable();
3728 kvm->arch.kvmclock_offset = delta;
3729 break;
3730 }
3731 case KVM_GET_CLOCK: {
3732 struct kvm_clock_data user_ns;
3733 u64 now_ns;
3734
3735 local_irq_disable();
3736 now_ns = get_kernel_ns();
3737 user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
3738 local_irq_enable();
3739 user_ns.flags = 0;
3740 memset(&user_ns.pad, 0, sizeof(user_ns.pad));
3741
3742 r = -EFAULT;
3743 if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
3744 goto out;
3745 r = 0;
3746 break;
3747 }
3748
3749 default:
3750 ;
3751 }
3752out:
3753 return r;
3754}
3755
3756static void kvm_init_msr_list(void)
3757{
3758 u32 dummy[2];
3759 unsigned i, j;
3760
3761
3762 for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
3763 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
3764 continue;
3765 if (j < i)
3766 msrs_to_save[j] = msrs_to_save[i];
3767 j++;
3768 }
3769 num_msrs_to_save = j;
3770}
3771
3772static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
3773 const void *v)
3774{
3775 int handled = 0;
3776 int n;
3777
3778 do {
3779 n = min(len, 8);
3780 if (!(vcpu->arch.apic &&
3781 !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v))
3782 && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
3783 break;
3784 handled += n;
3785 addr += n;
3786 len -= n;
3787 v += n;
3788 } while (len);
3789
3790 return handled;
3791}
3792
3793static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
3794{
3795 int handled = 0;
3796 int n;
3797
3798 do {
3799 n = min(len, 8);
3800 if (!(vcpu->arch.apic &&
3801 !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v))
3802 && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
3803 break;
3804 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
3805 handled += n;
3806 addr += n;
3807 len -= n;
3808 v += n;
3809 } while (len);
3810
3811 return handled;
3812}
3813
3814static void kvm_set_segment(struct kvm_vcpu *vcpu,
3815 struct kvm_segment *var, int seg)
3816{
3817 kvm_x86_ops->set_segment(vcpu, var, seg);
3818}
3819
3820void kvm_get_segment(struct kvm_vcpu *vcpu,
3821 struct kvm_segment *var, int seg)
3822{
3823 kvm_x86_ops->get_segment(vcpu, var, seg);
3824}
3825
3826gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
3827{
3828 gpa_t t_gpa;
3829 struct x86_exception exception;
3830
3831 BUG_ON(!mmu_is_nested(vcpu));
3832
3833
3834 access |= PFERR_USER_MASK;
3835 t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &exception);
3836
3837 return t_gpa;
3838}
3839
3840gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
3841 struct x86_exception *exception)
3842{
3843 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3844 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
3845}
3846
3847 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
3848 struct x86_exception *exception)
3849{
3850 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3851 access |= PFERR_FETCH_MASK;
3852 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
3853}
3854
3855gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
3856 struct x86_exception *exception)
3857{
3858 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3859 access |= PFERR_WRITE_MASK;
3860 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
3861}
3862
3863
3864gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
3865 struct x86_exception *exception)
3866{
3867 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
3868}
3869
3870static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
3871 struct kvm_vcpu *vcpu, u32 access,
3872 struct x86_exception *exception)
3873{
3874 void *data = val;
3875 int r = X86EMUL_CONTINUE;
3876
3877 while (bytes) {
3878 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access,
3879 exception);
3880 unsigned offset = addr & (PAGE_SIZE-1);
3881 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
3882 int ret;
3883
3884 if (gpa == UNMAPPED_GVA)
3885 return X86EMUL_PROPAGATE_FAULT;
3886 ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
3887 if (ret < 0) {
3888 r = X86EMUL_IO_NEEDED;
3889 goto out;
3890 }
3891
3892 bytes -= toread;
3893 data += toread;
3894 addr += toread;
3895 }
3896out:
3897 return r;
3898}
3899
3900
3901static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
3902 gva_t addr, void *val, unsigned int bytes,
3903 struct x86_exception *exception)
3904{
3905 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3906 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3907
3908 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu,
3909 access | PFERR_FETCH_MASK,
3910 exception);
3911}
3912
3913int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
3914 gva_t addr, void *val, unsigned int bytes,
3915 struct x86_exception *exception)
3916{
3917 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3918 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3919
3920 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
3921 exception);
3922}
3923EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
3924
3925static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
3926 gva_t addr, void *val, unsigned int bytes,
3927 struct x86_exception *exception)
3928{
3929 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3930 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
3931}
3932
3933int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
3934 gva_t addr, void *val,
3935 unsigned int bytes,
3936 struct x86_exception *exception)
3937{
3938 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
3939 void *data = val;
3940 int r = X86EMUL_CONTINUE;
3941
3942 while (bytes) {
3943 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
3944 PFERR_WRITE_MASK,
3945 exception);
3946 unsigned offset = addr & (PAGE_SIZE-1);
3947 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
3948 int ret;
3949
3950 if (gpa == UNMAPPED_GVA)
3951 return X86EMUL_PROPAGATE_FAULT;
3952 ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
3953 if (ret < 0) {
3954 r = X86EMUL_IO_NEEDED;
3955 goto out;
3956 }
3957
3958 bytes -= towrite;
3959 data += towrite;
3960 addr += towrite;
3961 }
3962out:
3963 return r;
3964}
3965EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
3966
3967static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
3968 gpa_t *gpa, struct x86_exception *exception,
3969 bool write)
3970{
3971 u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
3972 | (write ? PFERR_WRITE_MASK : 0);
3973
3974 if (vcpu_match_mmio_gva(vcpu, gva)
3975 && !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) {
3976 *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
3977 (gva & (PAGE_SIZE - 1));
3978 trace_vcpu_match_mmio(gva, *gpa, write, false);
3979 return 1;
3980 }
3981
3982 *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
3983
3984 if (*gpa == UNMAPPED_GVA)
3985 return -1;
3986
3987
3988 if ((*gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
3989 return 1;
3990
3991 if (vcpu_match_mmio_gpa(vcpu, *gpa)) {
3992 trace_vcpu_match_mmio(gva, *gpa, write, true);
3993 return 1;
3994 }
3995
3996 return 0;
3997}
3998
3999int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
4000 const void *val, int bytes)
4001{
4002 int ret;
4003
4004 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
4005 if (ret < 0)
4006 return 0;
4007 kvm_mmu_pte_write(vcpu, gpa, val, bytes);
4008 return 1;
4009}
4010
4011struct read_write_emulator_ops {
4012 int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
4013 int bytes);
4014 int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
4015 void *val, int bytes);
4016 int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
4017 int bytes, void *val);
4018 int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
4019 void *val, int bytes);
4020 bool write;
4021};
4022
4023static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
4024{
4025 if (vcpu->mmio_read_completed) {
4026 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
4027 vcpu->mmio_fragments[0].gpa, *(u64 *)val);
4028 vcpu->mmio_read_completed = 0;
4029 return 1;
4030 }
4031
4032 return 0;
4033}
4034
4035static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
4036 void *val, int bytes)
4037{
4038 return !kvm_read_guest(vcpu->kvm, gpa, val, bytes);
4039}
4040
4041static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
4042 void *val, int bytes)
4043{
4044 return emulator_write_phys(vcpu, gpa, val, bytes);
4045}
4046
4047static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
4048{
4049 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
4050 return vcpu_mmio_write(vcpu, gpa, bytes, val);
4051}
4052
4053static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
4054 void *val, int bytes)
4055{
4056 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
4057 return X86EMUL_IO_NEEDED;
4058}
4059
4060static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
4061 void *val, int bytes)
4062{
4063 struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
4064
4065 memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
4066 return X86EMUL_CONTINUE;
4067}
4068
4069static const struct read_write_emulator_ops read_emultor = {
4070 .read_write_prepare = read_prepare,
4071 .read_write_emulate = read_emulate,
4072 .read_write_mmio = vcpu_mmio_read,
4073 .read_write_exit_mmio = read_exit_mmio,
4074};
4075
4076static const struct read_write_emulator_ops write_emultor = {
4077 .read_write_emulate = write_emulate,
4078 .read_write_mmio = write_mmio,
4079 .read_write_exit_mmio = write_exit_mmio,
4080 .write = true,
4081};
4082
4083static int emulator_read_write_onepage(unsigned long addr, void *val,
4084 unsigned int bytes,
4085 struct x86_exception *exception,
4086 struct kvm_vcpu *vcpu,
4087 const struct read_write_emulator_ops *ops)
4088{
4089 gpa_t gpa;
4090 int handled, ret;
4091 bool write = ops->write;
4092 struct kvm_mmio_fragment *frag;
4093
4094 ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
4095
4096 if (ret < 0)
4097 return X86EMUL_PROPAGATE_FAULT;
4098
4099
4100 if (ret)
4101 goto mmio;
4102
4103 if (ops->read_write_emulate(vcpu, gpa, val, bytes))
4104 return X86EMUL_CONTINUE;
4105
4106mmio:
4107
4108
4109
4110 handled = ops->read_write_mmio(vcpu, gpa, bytes, val);
4111 if (handled == bytes)
4112 return X86EMUL_CONTINUE;
4113
4114 gpa += handled;
4115 bytes -= handled;
4116 val += handled;
4117
4118 WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS);
4119 frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
4120 frag->gpa = gpa;
4121 frag->data = val;
4122 frag->len = bytes;
4123 return X86EMUL_CONTINUE;
4124}
4125
4126int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
4127 void *val, unsigned int bytes,
4128 struct x86_exception *exception,
4129 const struct read_write_emulator_ops *ops)
4130{
4131 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4132 gpa_t gpa;
4133 int rc;
4134
4135 if (ops->read_write_prepare &&
4136 ops->read_write_prepare(vcpu, val, bytes))
4137 return X86EMUL_CONTINUE;
4138
4139 vcpu->mmio_nr_fragments = 0;
4140
4141
4142 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
4143 int now;
4144
4145 now = -addr & ~PAGE_MASK;
4146 rc = emulator_read_write_onepage(addr, val, now, exception,
4147 vcpu, ops);
4148
4149 if (rc != X86EMUL_CONTINUE)
4150 return rc;
4151 addr += now;
4152 val += now;
4153 bytes -= now;
4154 }
4155
4156 rc = emulator_read_write_onepage(addr, val, bytes, exception,
4157 vcpu, ops);
4158 if (rc != X86EMUL_CONTINUE)
4159 return rc;
4160
4161 if (!vcpu->mmio_nr_fragments)
4162 return rc;
4163
4164 gpa = vcpu->mmio_fragments[0].gpa;
4165
4166 vcpu->mmio_needed = 1;
4167 vcpu->mmio_cur_fragment = 0;
4168
4169 vcpu->run->mmio.len = min(8u, vcpu->mmio_fragments[0].len);
4170 vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
4171 vcpu->run->exit_reason = KVM_EXIT_MMIO;
4172 vcpu->run->mmio.phys_addr = gpa;
4173
4174 return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
4175}
4176
4177static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
4178 unsigned long addr,
4179 void *val,
4180 unsigned int bytes,
4181 struct x86_exception *exception)
4182{
4183 return emulator_read_write(ctxt, addr, val, bytes,
4184 exception, &read_emultor);
4185}
4186
4187int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
4188 unsigned long addr,
4189 const void *val,
4190 unsigned int bytes,
4191 struct x86_exception *exception)
4192{
4193 return emulator_read_write(ctxt, addr, (void *)val, bytes,
4194 exception, &write_emultor);
4195}
4196
4197#define CMPXCHG_TYPE(t, ptr, old, new) \
4198 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
4199
4200#ifdef CONFIG_X86_64
4201# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
4202#else
4203# define CMPXCHG64(ptr, old, new) \
4204 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
4205#endif
4206
4207static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
4208 unsigned long addr,
4209 const void *old,
4210 const void *new,
4211 unsigned int bytes,
4212 struct x86_exception *exception)
4213{
4214 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4215 gpa_t gpa;
4216 struct page *page;
4217 char *kaddr;
4218 bool exchanged;
4219
4220
4221 if (bytes > 8 || (bytes & (bytes - 1)))
4222 goto emul_write;
4223
4224 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
4225
4226 if (gpa == UNMAPPED_GVA ||
4227 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
4228 goto emul_write;
4229
4230 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
4231 goto emul_write;
4232
4233 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
4234 if (is_error_page(page))
4235 goto emul_write;
4236
4237 kaddr = kmap_atomic(page);
4238 kaddr += offset_in_page(gpa);
4239 switch (bytes) {
4240 case 1:
4241 exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
4242 break;
4243 case 2:
4244 exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
4245 break;
4246 case 4:
4247 exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
4248 break;
4249 case 8:
4250 exchanged = CMPXCHG64(kaddr, old, new);
4251 break;
4252 default:
4253 BUG();
4254 }
4255 kunmap_atomic(kaddr);
4256 kvm_release_page_dirty(page);
4257
4258 if (!exchanged)
4259 return X86EMUL_CMPXCHG_FAILED;
4260
4261 kvm_mmu_pte_write(vcpu, gpa, new, bytes);
4262
4263 return X86EMUL_CONTINUE;
4264
4265emul_write:
4266 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
4267
4268 return emulator_write_emulated(ctxt, addr, new, bytes, exception);
4269}
4270
4271static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
4272{
4273
4274 int r;
4275
4276 if (vcpu->arch.pio.in)
4277 r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
4278 vcpu->arch.pio.size, pd);
4279 else
4280 r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
4281 vcpu->arch.pio.port, vcpu->arch.pio.size,
4282 pd);
4283 return r;
4284}
4285
4286static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
4287 unsigned short port, void *val,
4288 unsigned int count, bool in)
4289{
4290 trace_kvm_pio(!in, port, size, count);
4291
4292 vcpu->arch.pio.port = port;
4293 vcpu->arch.pio.in = in;
4294 vcpu->arch.pio.count = count;
4295 vcpu->arch.pio.size = size;
4296
4297 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
4298 vcpu->arch.pio.count = 0;
4299 return 1;
4300 }
4301
4302 vcpu->run->exit_reason = KVM_EXIT_IO;
4303 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
4304 vcpu->run->io.size = size;
4305 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
4306 vcpu->run->io.count = count;
4307 vcpu->run->io.port = port;
4308
4309 return 0;
4310}
4311
4312static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
4313 int size, unsigned short port, void *val,
4314 unsigned int count)
4315{
4316 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4317 int ret;
4318
4319 if (vcpu->arch.pio.count)
4320 goto data_avail;
4321
4322 ret = emulator_pio_in_out(vcpu, size, port, val, count, true);
4323 if (ret) {
4324data_avail:
4325 memcpy(val, vcpu->arch.pio_data, size * count);
4326 vcpu->arch.pio.count = 0;
4327 return 1;
4328 }
4329
4330 return 0;
4331}
4332
4333static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
4334 int size, unsigned short port,
4335 const void *val, unsigned int count)
4336{
4337 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4338
4339 memcpy(vcpu->arch.pio_data, val, size * count);
4340 return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
4341}
4342
4343static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
4344{
4345 return kvm_x86_ops->get_segment_base(vcpu, seg);
4346}
4347
4348static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
4349{
4350 kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
4351}
4352
4353int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
4354{
4355 if (!need_emulate_wbinvd(vcpu))
4356 return X86EMUL_CONTINUE;
4357
4358 if (kvm_x86_ops->has_wbinvd_exit()) {
4359 int cpu = get_cpu();
4360
4361 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
4362 smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
4363 wbinvd_ipi, NULL, 1);
4364 put_cpu();
4365 cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
4366 } else
4367 wbinvd();
4368 return X86EMUL_CONTINUE;
4369}
4370EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
4371
4372static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
4373{
4374 kvm_emulate_wbinvd(emul_to_vcpu(ctxt));
4375}
4376
4377int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
4378{
4379 return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
4380}
4381
4382int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
4383{
4384
4385 return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
4386}
4387
4388static u64 mk_cr_64(u64 curr_cr, u32 new_val)
4389{
4390 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
4391}
4392
4393static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
4394{
4395 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4396 unsigned long value;
4397
4398 switch (cr) {
4399 case 0:
4400 value = kvm_read_cr0(vcpu);
4401 break;
4402 case 2:
4403 value = vcpu->arch.cr2;
4404 break;
4405 case 3:
4406 value = kvm_read_cr3(vcpu);
4407 break;
4408 case 4:
4409 value = kvm_read_cr4(vcpu);
4410 break;
4411 case 8:
4412 value = kvm_get_cr8(vcpu);
4413 break;
4414 default:
4415 kvm_err("%s: unexpected cr %u\n", __func__, cr);
4416 return 0;
4417 }
4418
4419 return value;
4420}
4421
4422static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
4423{
4424 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4425 int res = 0;
4426
4427 switch (cr) {
4428 case 0:
4429 res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
4430 break;
4431 case 2:
4432 vcpu->arch.cr2 = val;
4433 break;
4434 case 3:
4435 res = kvm_set_cr3(vcpu, val);
4436 break;
4437 case 4:
4438 res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
4439 break;
4440 case 8:
4441 res = kvm_set_cr8(vcpu, val);
4442 break;
4443 default:
4444 kvm_err("%s: unexpected cr %u\n", __func__, cr);
4445 res = -1;
4446 }
4447
4448 return res;
4449}
4450
4451static void emulator_set_rflags(struct x86_emulate_ctxt *ctxt, ulong val)
4452{
4453 kvm_set_rflags(emul_to_vcpu(ctxt), val);
4454}
4455
4456static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
4457{
4458 return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
4459}
4460
4461static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4462{
4463 kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt);
4464}
4465
4466static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4467{
4468 kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt);
4469}
4470
4471static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4472{
4473 kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt);
4474}
4475
4476static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4477{
4478 kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt);
4479}
4480
4481static unsigned long emulator_get_cached_segment_base(
4482 struct x86_emulate_ctxt *ctxt, int seg)
4483{
4484 return get_segment_base(emul_to_vcpu(ctxt), seg);
4485}
4486
4487static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
4488 struct desc_struct *desc, u32 *base3,
4489 int seg)
4490{
4491 struct kvm_segment var;
4492
4493 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
4494 *selector = var.selector;
4495
4496 if (var.unusable)
4497 return false;
4498
4499 if (var.g)
4500 var.limit >>= 12;
4501 set_desc_limit(desc, var.limit);
4502 set_desc_base(desc, (unsigned long)var.base);
4503#ifdef CONFIG_X86_64
4504 if (base3)
4505 *base3 = var.base >> 32;
4506#endif
4507 desc->type = var.type;
4508 desc->s = var.s;
4509 desc->dpl = var.dpl;
4510 desc->p = var.present;
4511 desc->avl = var.avl;
4512 desc->l = var.l;
4513 desc->d = var.db;
4514 desc->g = var.g;
4515
4516 return true;
4517}
4518
4519static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
4520 struct desc_struct *desc, u32 base3,
4521 int seg)
4522{
4523 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4524 struct kvm_segment var;
4525
4526 var.selector = selector;
4527 var.base = get_desc_base(desc);
4528#ifdef CONFIG_X86_64
4529 var.base |= ((u64)base3) << 32;
4530#endif
4531 var.limit = get_desc_limit(desc);
4532 if (desc->g)
4533 var.limit = (var.limit << 12) | 0xfff;
4534 var.type = desc->type;
4535 var.present = desc->p;
4536 var.dpl = desc->dpl;
4537 var.db = desc->d;
4538 var.s = desc->s;
4539 var.l = desc->l;
4540 var.g = desc->g;
4541 var.avl = desc->avl;
4542 var.present = desc->p;
4543 var.unusable = !var.present;
4544 var.padding = 0;
4545
4546 kvm_set_segment(vcpu, &var, seg);
4547 return;
4548}
4549
4550static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
4551 u32 msr_index, u64 *pdata)
4552{
4553 return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
4554}
4555
4556static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
4557 u32 msr_index, u64 data)
4558{
4559 struct msr_data msr;
4560
4561 msr.data = data;
4562 msr.index = msr_index;
4563 msr.host_initiated = false;
4564 return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
4565}
4566
4567static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
4568 u32 pmc, u64 *pdata)
4569{
4570 return kvm_pmu_read_pmc(emul_to_vcpu(ctxt), pmc, pdata);
4571}
4572
4573static void emulator_halt(struct x86_emulate_ctxt *ctxt)
4574{
4575 emul_to_vcpu(ctxt)->arch.halt_request = 1;
4576}
4577
4578static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
4579{
4580 preempt_disable();
4581 kvm_load_guest_fpu(emul_to_vcpu(ctxt));
4582
4583
4584
4585
4586 clts();
4587}
4588
4589static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
4590{
4591 preempt_enable();
4592}
4593
4594static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
4595 struct x86_instruction_info *info,
4596 enum x86_intercept_stage stage)
4597{
4598 return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
4599}
4600
4601static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
4602 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
4603{
4604 kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx);
4605}
4606
4607static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
4608{
4609 return kvm_register_read(emul_to_vcpu(ctxt), reg);
4610}
4611
4612static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
4613{
4614 kvm_register_write(emul_to_vcpu(ctxt), reg, val);
4615}
4616
4617static const struct x86_emulate_ops emulate_ops = {
4618 .read_gpr = emulator_read_gpr,
4619 .write_gpr = emulator_write_gpr,
4620 .read_std = kvm_read_guest_virt_system,
4621 .write_std = kvm_write_guest_virt_system,
4622 .fetch = kvm_fetch_guest_virt,
4623 .read_emulated = emulator_read_emulated,
4624 .write_emulated = emulator_write_emulated,
4625 .cmpxchg_emulated = emulator_cmpxchg_emulated,
4626 .invlpg = emulator_invlpg,
4627 .pio_in_emulated = emulator_pio_in_emulated,
4628 .pio_out_emulated = emulator_pio_out_emulated,
4629 .get_segment = emulator_get_segment,
4630 .set_segment = emulator_set_segment,
4631 .get_cached_segment_base = emulator_get_cached_segment_base,
4632 .get_gdt = emulator_get_gdt,
4633 .get_idt = emulator_get_idt,
4634 .set_gdt = emulator_set_gdt,
4635 .set_idt = emulator_set_idt,
4636 .get_cr = emulator_get_cr,
4637 .set_cr = emulator_set_cr,
4638 .set_rflags = emulator_set_rflags,
4639 .cpl = emulator_get_cpl,
4640 .get_dr = emulator_get_dr,
4641 .set_dr = emulator_set_dr,
4642 .set_msr = emulator_set_msr,
4643 .get_msr = emulator_get_msr,
4644 .read_pmc = emulator_read_pmc,
4645 .halt = emulator_halt,
4646 .wbinvd = emulator_wbinvd,
4647 .fix_hypercall = emulator_fix_hypercall,
4648 .get_fpu = emulator_get_fpu,
4649 .put_fpu = emulator_put_fpu,
4650 .intercept = emulator_intercept,
4651 .get_cpuid = emulator_get_cpuid,
4652};
4653
4654static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
4655{
4656 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask);
4657
4658
4659
4660
4661
4662
4663
4664 if (!(int_shadow & mask))
4665 kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
4666}
4667
4668static void inject_emulated_exception(struct kvm_vcpu *vcpu)
4669{
4670 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4671 if (ctxt->exception.vector == PF_VECTOR)
4672 kvm_propagate_fault(vcpu, &ctxt->exception);
4673 else if (ctxt->exception.error_code_valid)
4674 kvm_queue_exception_e(vcpu, ctxt->exception.vector,
4675 ctxt->exception.error_code);
4676 else
4677 kvm_queue_exception(vcpu, ctxt->exception.vector);
4678}
4679
4680static void init_decode_cache(struct x86_emulate_ctxt *ctxt)
4681{
4682 memset(&ctxt->twobyte, 0,
4683 (void *)&ctxt->_regs - (void *)&ctxt->twobyte);
4684
4685 ctxt->fetch.start = 0;
4686 ctxt->fetch.end = 0;
4687 ctxt->io_read.pos = 0;
4688 ctxt->io_read.end = 0;
4689 ctxt->mem_read.pos = 0;
4690 ctxt->mem_read.end = 0;
4691}
4692
4693static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
4694{
4695 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4696 int cs_db, cs_l;
4697
4698 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
4699
4700 ctxt->eflags = kvm_get_rflags(vcpu);
4701 ctxt->eip = kvm_rip_read(vcpu);
4702 ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
4703 (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
4704 cs_l ? X86EMUL_MODE_PROT64 :
4705 cs_db ? X86EMUL_MODE_PROT32 :
4706 X86EMUL_MODE_PROT16;
4707 ctxt->guest_mode = is_guest_mode(vcpu);
4708
4709 init_decode_cache(ctxt);
4710 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
4711}
4712
4713int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
4714{
4715 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4716 int ret;
4717
4718 init_emulate_ctxt(vcpu);
4719
4720 ctxt->op_bytes = 2;
4721 ctxt->ad_bytes = 2;
4722 ctxt->_eip = ctxt->eip + inc_eip;
4723 ret = emulate_int_real(ctxt, irq);
4724
4725 if (ret != X86EMUL_CONTINUE)
4726 return EMULATE_FAIL;
4727
4728 ctxt->eip = ctxt->_eip;
4729 kvm_rip_write(vcpu, ctxt->eip);
4730 kvm_set_rflags(vcpu, ctxt->eflags);
4731
4732 if (irq == NMI_VECTOR)
4733 vcpu->arch.nmi_pending = 0;
4734 else
4735 vcpu->arch.interrupt.pending = false;
4736
4737 return EMULATE_DONE;
4738}
4739EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
4740
4741static int handle_emulation_failure(struct kvm_vcpu *vcpu)
4742{
4743 int r = EMULATE_DONE;
4744
4745 ++vcpu->stat.insn_emulation_fail;
4746 trace_kvm_emulate_insn_failed(vcpu);
4747 if (!is_guest_mode(vcpu)) {
4748 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
4749 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
4750 vcpu->run->internal.ndata = 0;
4751 r = EMULATE_FAIL;
4752 }
4753 kvm_queue_exception(vcpu, UD_VECTOR);
4754
4755 return r;
4756}
4757
4758static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
4759{
4760 gpa_t gpa;
4761 pfn_t pfn;
4762
4763 if (tdp_enabled)
4764 return false;
4765
4766
4767
4768
4769
4770
4771 if (kvm_mmu_unprotect_page_virt(vcpu, gva))
4772 return true;
4773
4774 gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL);
4775
4776 if (gpa == UNMAPPED_GVA)
4777 return true;
4778
4779
4780
4781
4782
4783
4784
4785 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
4786 if (!is_error_noslot_pfn(pfn)) {
4787 kvm_release_pfn_clean(pfn);
4788 return true;
4789 }
4790
4791 return false;
4792}
4793
4794static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
4795 unsigned long cr2, int emulation_type)
4796{
4797 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4798 unsigned long last_retry_eip, last_retry_addr, gpa = cr2;
4799
4800 last_retry_eip = vcpu->arch.last_retry_eip;
4801 last_retry_addr = vcpu->arch.last_retry_addr;
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813
4814
4815
4816 vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
4817
4818 if (!(emulation_type & EMULTYPE_RETRY))
4819 return false;
4820
4821 if (x86_page_table_writing_insn(ctxt))
4822 return false;
4823
4824 if (ctxt->eip == last_retry_eip && last_retry_addr == cr2)
4825 return false;
4826
4827 vcpu->arch.last_retry_eip = ctxt->eip;
4828 vcpu->arch.last_retry_addr = cr2;
4829
4830 if (!vcpu->arch.mmu.direct_map)
4831 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
4832
4833 kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
4834
4835 return true;
4836}
4837
4838static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
4839static int complete_emulated_pio(struct kvm_vcpu *vcpu);
4840
4841int x86_emulate_instruction(struct kvm_vcpu *vcpu,
4842 unsigned long cr2,
4843 int emulation_type,
4844 void *insn,
4845 int insn_len)
4846{
4847 int r;
4848 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4849 bool writeback = true;
4850
4851 kvm_clear_exception_queue(vcpu);
4852
4853 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
4854 init_emulate_ctxt(vcpu);
4855 ctxt->interruptibility = 0;
4856 ctxt->have_exception = false;
4857 ctxt->perm_ok = false;
4858
4859 ctxt->only_vendor_specific_insn
4860 = emulation_type & EMULTYPE_TRAP_UD;
4861
4862 r = x86_decode_insn(ctxt, insn, insn_len);
4863
4864 trace_kvm_emulate_insn_start(vcpu);
4865 ++vcpu->stat.insn_emulation;
4866 if (r != EMULATION_OK) {
4867 if (emulation_type & EMULTYPE_TRAP_UD)
4868 return EMULATE_FAIL;
4869 if (reexecute_instruction(vcpu, cr2))
4870 return EMULATE_DONE;
4871 if (emulation_type & EMULTYPE_SKIP)
4872 return EMULATE_FAIL;
4873 return handle_emulation_failure(vcpu);
4874 }
4875 }
4876
4877 if (emulation_type & EMULTYPE_SKIP) {
4878 kvm_rip_write(vcpu, ctxt->_eip);
4879 return EMULATE_DONE;
4880 }
4881
4882 if (retry_instruction(ctxt, cr2, emulation_type))
4883 return EMULATE_DONE;
4884
4885
4886
4887 if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
4888 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
4889 emulator_invalidate_register_cache(ctxt);
4890 }
4891
4892restart:
4893 r = x86_emulate_insn(ctxt);
4894
4895 if (r == EMULATION_INTERCEPTED)
4896 return EMULATE_DONE;
4897
4898 if (r == EMULATION_FAILED) {
4899 if (reexecute_instruction(vcpu, cr2))
4900 return EMULATE_DONE;
4901
4902 return handle_emulation_failure(vcpu);
4903 }
4904
4905 if (ctxt->have_exception) {
4906 inject_emulated_exception(vcpu);
4907 r = EMULATE_DONE;
4908 } else if (vcpu->arch.pio.count) {
4909 if (!vcpu->arch.pio.in)
4910 vcpu->arch.pio.count = 0;
4911 else {
4912 writeback = false;
4913 vcpu->arch.complete_userspace_io = complete_emulated_pio;
4914 }
4915 r = EMULATE_DO_MMIO;
4916 } else if (vcpu->mmio_needed) {
4917 if (!vcpu->mmio_is_write)
4918 writeback = false;
4919 r = EMULATE_DO_MMIO;
4920 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
4921 } else if (r == EMULATION_RESTART)
4922 goto restart;
4923 else
4924 r = EMULATE_DONE;
4925
4926 if (writeback) {
4927 toggle_interruptibility(vcpu, ctxt->interruptibility);
4928 kvm_set_rflags(vcpu, ctxt->eflags);
4929 kvm_make_request(KVM_REQ_EVENT, vcpu);
4930 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
4931 kvm_rip_write(vcpu, ctxt->eip);
4932 } else
4933 vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
4934
4935 return r;
4936}
4937EXPORT_SYMBOL_GPL(x86_emulate_instruction);
4938
4939int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
4940{
4941 unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
4942 int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
4943 size, port, &val, 1);
4944
4945 vcpu->arch.pio.count = 0;
4946 return ret;
4947}
4948EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
4949
4950static void tsc_bad(void *info)
4951{
4952 __this_cpu_write(cpu_tsc_khz, 0);
4953}
4954
4955static void tsc_khz_changed(void *data)
4956{
4957 struct cpufreq_freqs *freq = data;
4958 unsigned long khz = 0;
4959
4960 if (data)
4961 khz = freq->new;
4962 else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
4963 khz = cpufreq_quick_get(raw_smp_processor_id());
4964 if (!khz)
4965 khz = tsc_khz;
4966 __this_cpu_write(cpu_tsc_khz, khz);
4967}
4968
4969static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
4970 void *data)
4971{
4972 struct cpufreq_freqs *freq = data;
4973 struct kvm *kvm;
4974 struct kvm_vcpu *vcpu;
4975 int i, send_ipi = 0;
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
5017 return 0;
5018 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
5019 return 0;
5020
5021 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
5022
5023 raw_spin_lock(&kvm_lock);
5024 list_for_each_entry(kvm, &vm_list, vm_list) {
5025 kvm_for_each_vcpu(i, vcpu, kvm) {
5026 if (vcpu->cpu != freq->cpu)
5027 continue;
5028 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
5029 if (vcpu->cpu != smp_processor_id())
5030 send_ipi = 1;
5031 }
5032 }
5033 raw_spin_unlock(&kvm_lock);
5034
5035 if (freq->old < freq->new && send_ipi) {
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
5049 }
5050 return 0;
5051}
5052
5053static struct notifier_block kvmclock_cpufreq_notifier_block = {
5054 .notifier_call = kvmclock_cpufreq_notifier
5055};
5056
5057static int kvmclock_cpu_notifier(struct notifier_block *nfb,
5058 unsigned long action, void *hcpu)
5059{
5060 unsigned int cpu = (unsigned long)hcpu;
5061
5062 switch (action) {
5063 case CPU_ONLINE:
5064 case CPU_DOWN_FAILED:
5065 smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
5066 break;
5067 case CPU_DOWN_PREPARE:
5068 smp_call_function_single(cpu, tsc_bad, NULL, 1);
5069 break;
5070 }
5071 return NOTIFY_OK;
5072}
5073
5074static struct notifier_block kvmclock_cpu_notifier_block = {
5075 .notifier_call = kvmclock_cpu_notifier,
5076 .priority = -INT_MAX
5077};
5078
5079static void kvm_timer_init(void)
5080{
5081 int cpu;
5082
5083 max_tsc_khz = tsc_khz;
5084 register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
5085 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
5086#ifdef CONFIG_CPU_FREQ
5087 struct cpufreq_policy policy;
5088 memset(&policy, 0, sizeof(policy));
5089 cpu = get_cpu();
5090 cpufreq_get_policy(&policy, cpu);
5091 if (policy.cpuinfo.max_freq)
5092 max_tsc_khz = policy.cpuinfo.max_freq;
5093 put_cpu();
5094#endif
5095 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
5096 CPUFREQ_TRANSITION_NOTIFIER);
5097 }
5098 pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
5099 for_each_online_cpu(cpu)
5100 smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
5101}
5102
5103static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
5104
5105int kvm_is_in_guest(void)
5106{
5107 return __this_cpu_read(current_vcpu) != NULL;
5108}
5109
5110static int kvm_is_user_mode(void)
5111{
5112 int user_mode = 3;
5113
5114 if (__this_cpu_read(current_vcpu))
5115 user_mode = kvm_x86_ops->get_cpl(__this_cpu_read(current_vcpu));
5116
5117 return user_mode != 0;
5118}
5119
5120static unsigned long kvm_get_guest_ip(void)
5121{
5122 unsigned long ip = 0;
5123
5124 if (__this_cpu_read(current_vcpu))
5125 ip = kvm_rip_read(__this_cpu_read(current_vcpu));
5126
5127 return ip;
5128}
5129
5130static struct perf_guest_info_callbacks kvm_guest_cbs = {
5131 .is_in_guest = kvm_is_in_guest,
5132 .is_user_mode = kvm_is_user_mode,
5133 .get_guest_ip = kvm_get_guest_ip,
5134};
5135
5136void kvm_before_handle_nmi(struct kvm_vcpu *vcpu)
5137{
5138 __this_cpu_write(current_vcpu, vcpu);
5139}
5140EXPORT_SYMBOL_GPL(kvm_before_handle_nmi);
5141
5142void kvm_after_handle_nmi(struct kvm_vcpu *vcpu)
5143{
5144 __this_cpu_write(current_vcpu, NULL);
5145}
5146EXPORT_SYMBOL_GPL(kvm_after_handle_nmi);
5147
5148static void kvm_set_mmio_spte_mask(void)
5149{
5150 u64 mask;
5151 int maxphyaddr = boot_cpu_data.x86_phys_bits;
5152
5153
5154
5155
5156
5157 mask = ((1ull << (62 - maxphyaddr + 1)) - 1) << maxphyaddr;
5158 mask |= 1ull;
5159
5160#ifdef CONFIG_X86_64
5161
5162
5163
5164
5165 if (maxphyaddr == 52)
5166 mask &= ~1ull;
5167#endif
5168
5169 kvm_mmu_set_mmio_spte_mask(mask);
5170}
5171
5172#ifdef CONFIG_X86_64
5173static void pvclock_gtod_update_fn(struct work_struct *work)
5174{
5175 struct kvm *kvm;
5176
5177 struct kvm_vcpu *vcpu;
5178 int i;
5179
5180 raw_spin_lock(&kvm_lock);
5181 list_for_each_entry(kvm, &vm_list, vm_list)
5182 kvm_for_each_vcpu(i, vcpu, kvm)
5183 set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
5184 atomic_set(&kvm_guest_has_master_clock, 0);
5185 raw_spin_unlock(&kvm_lock);
5186}
5187
5188static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
5189
5190
5191
5192
5193static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
5194 void *priv)
5195{
5196 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
5197 struct timekeeper *tk = priv;
5198
5199 update_pvclock_gtod(tk);
5200
5201
5202
5203
5204 if (gtod->clock.vclock_mode != VCLOCK_TSC &&
5205 atomic_read(&kvm_guest_has_master_clock) != 0)
5206 queue_work(system_long_wq, &pvclock_gtod_work);
5207
5208 return 0;
5209}
5210
5211static struct notifier_block pvclock_gtod_notifier = {
5212 .notifier_call = pvclock_gtod_notify,
5213};
5214#endif
5215
5216int kvm_arch_init(void *opaque)
5217{
5218 int r;
5219 struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
5220
5221 if (kvm_x86_ops) {
5222 printk(KERN_ERR "kvm: already loaded the other module\n");
5223 r = -EEXIST;
5224 goto out;
5225 }
5226
5227 if (!ops->cpu_has_kvm_support()) {
5228 printk(KERN_ERR "kvm: no hardware support\n");
5229 r = -EOPNOTSUPP;
5230 goto out;
5231 }
5232 if (ops->disabled_by_bios()) {
5233 printk(KERN_ERR "kvm: disabled by bios\n");
5234 r = -EOPNOTSUPP;
5235 goto out;
5236 }
5237
5238 r = -ENOMEM;
5239 shared_msrs = alloc_percpu(struct kvm_shared_msrs);
5240 if (!shared_msrs) {
5241 printk(KERN_ERR "kvm: failed to allocate percpu kvm_shared_msrs\n");
5242 goto out;
5243 }
5244
5245 r = kvm_mmu_module_init();
5246 if (r)
5247 goto out_free_percpu;
5248
5249 kvm_set_mmio_spte_mask();
5250 kvm_init_msr_list();
5251
5252 kvm_x86_ops = ops;
5253 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
5254 PT_DIRTY_MASK, PT64_NX_MASK, 0);
5255
5256 kvm_timer_init();
5257
5258 perf_register_guest_info_callbacks(&kvm_guest_cbs);
5259
5260 if (cpu_has_xsave)
5261 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
5262
5263 kvm_lapic_init();
5264#ifdef CONFIG_X86_64
5265 pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
5266#endif
5267
5268 return 0;
5269
5270out_free_percpu:
5271 free_percpu(shared_msrs);
5272out:
5273 return r;
5274}
5275
5276void kvm_arch_exit(void)
5277{
5278 perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
5279
5280 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
5281 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
5282 CPUFREQ_TRANSITION_NOTIFIER);
5283 unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
5284#ifdef CONFIG_X86_64
5285 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
5286#endif
5287 kvm_x86_ops = NULL;
5288 kvm_mmu_module_exit();
5289 free_percpu(shared_msrs);
5290}
5291
5292int kvm_emulate_halt(struct kvm_vcpu *vcpu)
5293{
5294 ++vcpu->stat.halt_exits;
5295 if (irqchip_in_kernel(vcpu->kvm)) {
5296 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
5297 return 1;
5298 } else {
5299 vcpu->run->exit_reason = KVM_EXIT_HLT;
5300 return 0;
5301 }
5302}
5303EXPORT_SYMBOL_GPL(kvm_emulate_halt);
5304
5305int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
5306{
5307 u64 param, ingpa, outgpa, ret;
5308 uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
5309 bool fast, longmode;
5310 int cs_db, cs_l;
5311
5312
5313
5314
5315
5316 if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
5317 kvm_queue_exception(vcpu, UD_VECTOR);
5318 return 0;
5319 }
5320
5321 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
5322 longmode = is_long_mode(vcpu) && cs_l == 1;
5323
5324 if (!longmode) {
5325 param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
5326 (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
5327 ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
5328 (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
5329 outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
5330 (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
5331 }
5332#ifdef CONFIG_X86_64
5333 else {
5334 param = kvm_register_read(vcpu, VCPU_REGS_RCX);
5335 ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
5336 outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
5337 }
5338#endif
5339
5340 code = param & 0xffff;
5341 fast = (param >> 16) & 0x1;
5342 rep_cnt = (param >> 32) & 0xfff;
5343 rep_idx = (param >> 48) & 0xfff;
5344
5345 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
5346
5347 switch (code) {
5348 case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
5349 kvm_vcpu_on_spin(vcpu);
5350 break;
5351 default:
5352 res = HV_STATUS_INVALID_HYPERCALL_CODE;
5353 break;
5354 }
5355
5356 ret = res | (((u64)rep_done & 0xfff) << 32);
5357 if (longmode) {
5358 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
5359 } else {
5360 kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
5361 kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
5362 }
5363
5364 return 1;
5365}
5366
5367int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
5368{
5369 unsigned long nr, a0, a1, a2, a3, ret;
5370 int r = 1;
5371
5372 if (kvm_hv_hypercall_enabled(vcpu->kvm))
5373 return kvm_hv_hypercall(vcpu);
5374
5375 nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
5376 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
5377 a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
5378 a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
5379 a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
5380
5381 trace_kvm_hypercall(nr, a0, a1, a2, a3);
5382
5383 if (!is_long_mode(vcpu)) {
5384 nr &= 0xFFFFFFFF;
5385 a0 &= 0xFFFFFFFF;
5386 a1 &= 0xFFFFFFFF;
5387 a2 &= 0xFFFFFFFF;
5388 a3 &= 0xFFFFFFFF;
5389 }
5390
5391 if (kvm_x86_ops->get_cpl(vcpu) != 0) {
5392 ret = -KVM_EPERM;
5393 goto out;
5394 }
5395
5396 switch (nr) {
5397 case KVM_HC_VAPIC_POLL_IRQ:
5398 ret = 0;
5399 break;
5400 default:
5401 ret = -KVM_ENOSYS;
5402 break;
5403 }
5404out:
5405 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
5406 ++vcpu->stat.hypercalls;
5407 return r;
5408}
5409EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
5410
5411static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
5412{
5413 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5414 char instruction[3];
5415 unsigned long rip = kvm_rip_read(vcpu);
5416
5417
5418
5419
5420
5421
5422 kvm_mmu_zap_all(vcpu->kvm);
5423
5424 kvm_x86_ops->patch_hypercall(vcpu, instruction);
5425
5426 return emulator_write_emulated(ctxt, rip, instruction, 3, NULL);
5427}
5428
5429
5430
5431
5432
5433
5434
5435static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
5436{
5437 return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
5438 vcpu->run->request_interrupt_window &&
5439 kvm_arch_interrupt_allowed(vcpu));
5440}
5441
5442static void post_kvm_run_save(struct kvm_vcpu *vcpu)
5443{
5444 struct kvm_run *kvm_run = vcpu->run;
5445
5446 kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
5447 kvm_run->cr8 = kvm_get_cr8(vcpu);
5448 kvm_run->apic_base = kvm_get_apic_base(vcpu);
5449 if (irqchip_in_kernel(vcpu->kvm))
5450 kvm_run->ready_for_interrupt_injection = 1;
5451 else
5452 kvm_run->ready_for_interrupt_injection =
5453 kvm_arch_interrupt_allowed(vcpu) &&
5454 !kvm_cpu_has_interrupt(vcpu) &&
5455 !kvm_event_needs_reinjection(vcpu);
5456}
5457
5458static int vapic_enter(struct kvm_vcpu *vcpu)
5459{
5460 struct kvm_lapic *apic = vcpu->arch.apic;
5461 struct page *page;
5462
5463 if (!apic || !apic->vapic_addr)
5464 return 0;
5465
5466 page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
5467 if (is_error_page(page))
5468 return -EFAULT;
5469
5470 vcpu->arch.apic->vapic_page = page;
5471 return 0;
5472}
5473
5474static void vapic_exit(struct kvm_vcpu *vcpu)
5475{
5476 struct kvm_lapic *apic = vcpu->arch.apic;
5477 int idx;
5478
5479 if (!apic || !apic->vapic_addr)
5480 return;
5481
5482 idx = srcu_read_lock(&vcpu->kvm->srcu);
5483 kvm_release_page_dirty(apic->vapic_page);
5484 mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
5485 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5486}
5487
5488static void update_cr8_intercept(struct kvm_vcpu *vcpu)
5489{
5490 int max_irr, tpr;
5491
5492 if (!kvm_x86_ops->update_cr8_intercept)
5493 return;
5494
5495 if (!vcpu->arch.apic)
5496 return;
5497
5498 if (!vcpu->arch.apic->vapic_addr)
5499 max_irr = kvm_lapic_find_highest_irr(vcpu);
5500 else
5501 max_irr = -1;
5502
5503 if (max_irr != -1)
5504 max_irr >>= 4;
5505
5506 tpr = kvm_lapic_get_cr8(vcpu);
5507
5508 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
5509}
5510
5511static void inject_pending_event(struct kvm_vcpu *vcpu)
5512{
5513
5514 if (vcpu->arch.exception.pending) {
5515 trace_kvm_inj_exception(vcpu->arch.exception.nr,
5516 vcpu->arch.exception.has_error_code,
5517 vcpu->arch.exception.error_code);
5518 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
5519 vcpu->arch.exception.has_error_code,
5520 vcpu->arch.exception.error_code,
5521 vcpu->arch.exception.reinject);
5522 return;
5523 }
5524
5525 if (vcpu->arch.nmi_injected) {
5526 kvm_x86_ops->set_nmi(vcpu);
5527 return;
5528 }
5529
5530 if (vcpu->arch.interrupt.pending) {
5531 kvm_x86_ops->set_irq(vcpu);
5532 return;
5533 }
5534
5535
5536 if (vcpu->arch.nmi_pending) {
5537 if (kvm_x86_ops->nmi_allowed(vcpu)) {
5538 --vcpu->arch.nmi_pending;
5539 vcpu->arch.nmi_injected = true;
5540 kvm_x86_ops->set_nmi(vcpu);
5541 }
5542 } else if (kvm_cpu_has_interrupt(vcpu)) {
5543 if (kvm_x86_ops->interrupt_allowed(vcpu)) {
5544 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
5545 false);
5546 kvm_x86_ops->set_irq(vcpu);
5547 }
5548 }
5549}
5550
5551static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
5552{
5553 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
5554 !vcpu->guest_xcr0_loaded) {
5555
5556 xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
5557 vcpu->guest_xcr0_loaded = 1;
5558 }
5559}
5560
5561static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
5562{
5563 if (vcpu->guest_xcr0_loaded) {
5564 if (vcpu->arch.xcr0 != host_xcr0)
5565 xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
5566 vcpu->guest_xcr0_loaded = 0;
5567 }
5568}
5569
5570static void process_nmi(struct kvm_vcpu *vcpu)
5571{
5572 unsigned limit = 2;
5573
5574
5575
5576
5577
5578
5579 if (kvm_x86_ops->get_nmi_mask(vcpu) || vcpu->arch.nmi_injected)
5580 limit = 1;
5581
5582 vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
5583 vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
5584 kvm_make_request(KVM_REQ_EVENT, vcpu);
5585}
5586
5587static void kvm_gen_update_masterclock(struct kvm *kvm)
5588{
5589#ifdef CONFIG_X86_64
5590 int i;
5591 struct kvm_vcpu *vcpu;
5592 struct kvm_arch *ka = &kvm->arch;
5593
5594 spin_lock(&ka->pvclock_gtod_sync_lock);
5595 kvm_make_mclock_inprogress_request(kvm);
5596
5597 pvclock_update_vm_gtod_copy(kvm);
5598
5599 kvm_for_each_vcpu(i, vcpu, kvm)
5600 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
5601
5602
5603 kvm_for_each_vcpu(i, vcpu, kvm)
5604 clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests);
5605
5606 spin_unlock(&ka->pvclock_gtod_sync_lock);
5607#endif
5608}
5609
5610static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5611{
5612 int r;
5613 bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
5614 vcpu->run->request_interrupt_window;
5615 bool req_immediate_exit = 0;
5616
5617 if (vcpu->requests) {
5618 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
5619 kvm_mmu_unload(vcpu);
5620 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
5621 __kvm_migrate_timers(vcpu);
5622 if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
5623 kvm_gen_update_masterclock(vcpu->kvm);
5624 if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
5625 r = kvm_guest_time_update(vcpu);
5626 if (unlikely(r))
5627 goto out;
5628 }
5629 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
5630 kvm_mmu_sync_roots(vcpu);
5631 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
5632 kvm_x86_ops->tlb_flush(vcpu);
5633 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
5634 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
5635 r = 0;
5636 goto out;
5637 }
5638 if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
5639 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
5640 r = 0;
5641 goto out;
5642 }
5643 if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) {
5644 vcpu->fpu_active = 0;
5645 kvm_x86_ops->fpu_deactivate(vcpu);
5646 }
5647 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
5648
5649 vcpu->arch.apf.halted = true;
5650 r = 1;
5651 goto out;
5652 }
5653 if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
5654 record_steal_time(vcpu);
5655 if (kvm_check_request(KVM_REQ_NMI, vcpu))
5656 process_nmi(vcpu);
5657 req_immediate_exit =
5658 kvm_check_request(KVM_REQ_IMMEDIATE_EXIT, vcpu);
5659 if (kvm_check_request(KVM_REQ_PMU, vcpu))
5660 kvm_handle_pmu_event(vcpu);
5661 if (kvm_check_request(KVM_REQ_PMI, vcpu))
5662 kvm_deliver_pmi(vcpu);
5663 }
5664
5665 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
5666 inject_pending_event(vcpu);
5667
5668
5669 if (vcpu->arch.nmi_pending)
5670 kvm_x86_ops->enable_nmi_window(vcpu);
5671 else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
5672 kvm_x86_ops->enable_irq_window(vcpu);
5673
5674 if (kvm_lapic_enabled(vcpu)) {
5675 update_cr8_intercept(vcpu);
5676 kvm_lapic_sync_to_vapic(vcpu);
5677 }
5678 }
5679
5680 r = kvm_mmu_reload(vcpu);
5681 if (unlikely(r)) {
5682 goto cancel_injection;
5683 }
5684
5685 preempt_disable();
5686
5687 kvm_x86_ops->prepare_guest_switch(vcpu);
5688 if (vcpu->fpu_active)
5689 kvm_load_guest_fpu(vcpu);
5690 kvm_load_guest_xcr0(vcpu);
5691
5692 vcpu->mode = IN_GUEST_MODE;
5693
5694
5695
5696
5697 smp_mb();
5698
5699 local_irq_disable();
5700
5701 if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
5702 || need_resched() || signal_pending(current)) {
5703 vcpu->mode = OUTSIDE_GUEST_MODE;
5704 smp_wmb();
5705 local_irq_enable();
5706 preempt_enable();
5707 r = 1;
5708 goto cancel_injection;
5709 }
5710
5711 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
5712
5713 if (req_immediate_exit)
5714 smp_send_reschedule(vcpu->cpu);
5715
5716 kvm_guest_enter();
5717
5718 if (unlikely(vcpu->arch.switch_db_regs)) {
5719 set_debugreg(0, 7);
5720 set_debugreg(vcpu->arch.eff_db[0], 0);
5721 set_debugreg(vcpu->arch.eff_db[1], 1);
5722 set_debugreg(vcpu->arch.eff_db[2], 2);
5723 set_debugreg(vcpu->arch.eff_db[3], 3);
5724 }
5725
5726 trace_kvm_entry(vcpu->vcpu_id);
5727 kvm_x86_ops->run(vcpu);
5728
5729
5730
5731
5732
5733
5734
5735
5736 if (hw_breakpoint_active())
5737 hw_breakpoint_restore();
5738
5739 vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu,
5740 native_read_tsc());
5741
5742 vcpu->mode = OUTSIDE_GUEST_MODE;
5743 smp_wmb();
5744 local_irq_enable();
5745
5746 ++vcpu->stat.exits;
5747
5748
5749
5750
5751
5752
5753
5754 barrier();
5755
5756 kvm_guest_exit();
5757
5758 preempt_enable();
5759
5760 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5761
5762
5763
5764
5765 if (unlikely(prof_on == KVM_PROFILING)) {
5766 unsigned long rip = kvm_rip_read(vcpu);
5767 profile_hit(KVM_PROFILING, (void *)rip);
5768 }
5769
5770 if (unlikely(vcpu->arch.tsc_always_catchup))
5771 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
5772
5773 if (vcpu->arch.apic_attention)
5774 kvm_lapic_sync_from_vapic(vcpu);
5775
5776 r = kvm_x86_ops->handle_exit(vcpu);
5777 return r;
5778
5779cancel_injection:
5780 kvm_x86_ops->cancel_injection(vcpu);
5781 if (unlikely(vcpu->arch.apic_attention))
5782 kvm_lapic_sync_from_vapic(vcpu);
5783out:
5784 return r;
5785}
5786
5787
5788static int __vcpu_run(struct kvm_vcpu *vcpu)
5789{
5790 int r;
5791 struct kvm *kvm = vcpu->kvm;
5792
5793 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
5794 pr_debug("vcpu %d received sipi with vector # %x\n",
5795 vcpu->vcpu_id, vcpu->arch.sipi_vector);
5796 kvm_lapic_reset(vcpu);
5797 r = kvm_vcpu_reset(vcpu);
5798 if (r)
5799 return r;
5800 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
5801 }
5802
5803 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
5804 r = vapic_enter(vcpu);
5805 if (r) {
5806 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
5807 return r;
5808 }
5809
5810 r = 1;
5811 while (r > 0) {
5812 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
5813 !vcpu->arch.apf.halted)
5814 r = vcpu_enter_guest(vcpu);
5815 else {
5816 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
5817 kvm_vcpu_block(vcpu);
5818 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
5819 if (kvm_check_request(KVM_REQ_UNHALT, vcpu))
5820 {
5821 switch(vcpu->arch.mp_state) {
5822 case KVM_MP_STATE_HALTED:
5823 vcpu->arch.mp_state =
5824 KVM_MP_STATE_RUNNABLE;
5825 case KVM_MP_STATE_RUNNABLE:
5826 vcpu->arch.apf.halted = false;
5827 break;
5828 case KVM_MP_STATE_SIPI_RECEIVED:
5829 default:
5830 r = -EINTR;
5831 break;
5832 }
5833 }
5834 }
5835
5836 if (r <= 0)
5837 break;
5838
5839 clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
5840 if (kvm_cpu_has_pending_timer(vcpu))
5841 kvm_inject_pending_timer_irqs(vcpu);
5842
5843 if (dm_request_for_irq_injection(vcpu)) {
5844 r = -EINTR;
5845 vcpu->run->exit_reason = KVM_EXIT_INTR;
5846 ++vcpu->stat.request_irq_exits;
5847 }
5848
5849 kvm_check_async_pf_completion(vcpu);
5850
5851 if (signal_pending(current)) {
5852 r = -EINTR;
5853 vcpu->run->exit_reason = KVM_EXIT_INTR;
5854 ++vcpu->stat.signal_exits;
5855 }
5856 if (need_resched()) {
5857 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
5858 kvm_resched(vcpu);
5859 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
5860 }
5861 }
5862
5863 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
5864
5865 vapic_exit(vcpu);
5866
5867 return r;
5868}
5869
5870static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
5871{
5872 int r;
5873 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
5874 r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
5875 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
5876 if (r != EMULATE_DONE)
5877 return 0;
5878 return 1;
5879}
5880
5881static int complete_emulated_pio(struct kvm_vcpu *vcpu)
5882{
5883 BUG_ON(!vcpu->arch.pio.count);
5884
5885 return complete_emulated_io(vcpu);
5886}
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
5907{
5908 struct kvm_run *run = vcpu->run;
5909 struct kvm_mmio_fragment *frag;
5910 unsigned len;
5911
5912 BUG_ON(!vcpu->mmio_needed);
5913
5914
5915 frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
5916 len = min(8u, frag->len);
5917 if (!vcpu->mmio_is_write)
5918 memcpy(frag->data, run->mmio.data, len);
5919
5920 if (frag->len <= 8) {
5921
5922 frag++;
5923 vcpu->mmio_cur_fragment++;
5924 } else {
5925
5926 frag->data += len;
5927 frag->gpa += len;
5928 frag->len -= len;
5929 }
5930
5931 if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
5932 vcpu->mmio_needed = 0;
5933 if (vcpu->mmio_is_write)
5934 return 1;
5935 vcpu->mmio_read_completed = 1;
5936 return complete_emulated_io(vcpu);
5937 }
5938
5939 run->exit_reason = KVM_EXIT_MMIO;
5940 run->mmio.phys_addr = frag->gpa;
5941 if (vcpu->mmio_is_write)
5942 memcpy(run->mmio.data, frag->data, min(8u, frag->len));
5943 run->mmio.len = min(8u, frag->len);
5944 run->mmio.is_write = vcpu->mmio_is_write;
5945 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
5946 return 0;
5947}
5948
5949
5950int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
5951{
5952 int r;
5953 sigset_t sigsaved;
5954
5955 if (!tsk_used_math(current) && init_fpu(current))
5956 return -ENOMEM;
5957
5958 if (vcpu->sigset_active)
5959 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
5960
5961 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
5962 kvm_vcpu_block(vcpu);
5963 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
5964 r = -EAGAIN;
5965 goto out;
5966 }
5967
5968
5969 if (!irqchip_in_kernel(vcpu->kvm)) {
5970 if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
5971 r = -EINVAL;
5972 goto out;
5973 }
5974 }
5975
5976 if (unlikely(vcpu->arch.complete_userspace_io)) {
5977 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
5978 vcpu->arch.complete_userspace_io = NULL;
5979 r = cui(vcpu);
5980 if (r <= 0)
5981 goto out;
5982 } else
5983 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
5984
5985 r = __vcpu_run(vcpu);
5986
5987out:
5988 post_kvm_run_save(vcpu);
5989 if (vcpu->sigset_active)
5990 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
5991
5992 return r;
5993}
5994
5995int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
5996{
5997 if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
5998
5999
6000
6001
6002
6003
6004
6005 emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt);
6006 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
6007 }
6008 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
6009 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
6010 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
6011 regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
6012 regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
6013 regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
6014 regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
6015 regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP);
6016#ifdef CONFIG_X86_64
6017 regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8);
6018 regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9);
6019 regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10);
6020 regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11);
6021 regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12);
6022 regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13);
6023 regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14);
6024 regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15);
6025#endif
6026
6027 regs->rip = kvm_rip_read(vcpu);
6028 regs->rflags = kvm_get_rflags(vcpu);
6029
6030 return 0;
6031}
6032
6033int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
6034{
6035 vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
6036 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
6037
6038 kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
6039 kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
6040 kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
6041 kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx);
6042 kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi);
6043 kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi);
6044 kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp);
6045 kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp);
6046#ifdef CONFIG_X86_64
6047 kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8);
6048 kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9);
6049 kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10);
6050 kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11);
6051 kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12);
6052 kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
6053 kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
6054 kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
6055#endif
6056
6057 kvm_rip_write(vcpu, regs->rip);
6058 kvm_set_rflags(vcpu, regs->rflags);
6059
6060 vcpu->arch.exception.pending = false;
6061
6062 kvm_make_request(KVM_REQ_EVENT, vcpu);
6063
6064 return 0;
6065}
6066
6067void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
6068{
6069 struct kvm_segment cs;
6070
6071 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
6072 *db = cs.db;
6073 *l = cs.l;
6074}
6075EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
6076
6077int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
6078 struct kvm_sregs *sregs)
6079{
6080 struct desc_ptr dt;
6081
6082 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
6083 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
6084 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
6085 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
6086 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
6087 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
6088
6089 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
6090 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
6091
6092 kvm_x86_ops->get_idt(vcpu, &dt);
6093 sregs->idt.limit = dt.size;
6094 sregs->idt.base = dt.address;
6095 kvm_x86_ops->get_gdt(vcpu, &dt);
6096 sregs->gdt.limit = dt.size;
6097 sregs->gdt.base = dt.address;
6098
6099 sregs->cr0 = kvm_read_cr0(vcpu);
6100 sregs->cr2 = vcpu->arch.cr2;
6101 sregs->cr3 = kvm_read_cr3(vcpu);
6102 sregs->cr4 = kvm_read_cr4(vcpu);
6103 sregs->cr8 = kvm_get_cr8(vcpu);
6104 sregs->efer = vcpu->arch.efer;
6105 sregs->apic_base = kvm_get_apic_base(vcpu);
6106
6107 memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
6108
6109 if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft)
6110 set_bit(vcpu->arch.interrupt.nr,
6111 (unsigned long *)sregs->interrupt_bitmap);
6112
6113 return 0;
6114}
6115
6116int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
6117 struct kvm_mp_state *mp_state)
6118{
6119 mp_state->mp_state = vcpu->arch.mp_state;
6120 return 0;
6121}
6122
6123int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
6124 struct kvm_mp_state *mp_state)
6125{
6126 vcpu->arch.mp_state = mp_state->mp_state;
6127 kvm_make_request(KVM_REQ_EVENT, vcpu);
6128 return 0;
6129}
6130
6131int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
6132 int reason, bool has_error_code, u32 error_code)
6133{
6134 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
6135 int ret;
6136
6137 init_emulate_ctxt(vcpu);
6138
6139 ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
6140 has_error_code, error_code);
6141
6142 if (ret)
6143 return EMULATE_FAIL;
6144
6145 kvm_rip_write(vcpu, ctxt->eip);
6146 kvm_set_rflags(vcpu, ctxt->eflags);
6147 kvm_make_request(KVM_REQ_EVENT, vcpu);
6148 return EMULATE_DONE;
6149}
6150EXPORT_SYMBOL_GPL(kvm_task_switch);
6151
6152int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
6153 struct kvm_sregs *sregs)
6154{
6155 int mmu_reset_needed = 0;
6156 int pending_vec, max_bits, idx;
6157 struct desc_ptr dt;
6158
6159 if (!guest_cpuid_has_xsave(vcpu) && (sregs->cr4 & X86_CR4_OSXSAVE))
6160 return -EINVAL;
6161
6162 dt.size = sregs->idt.limit;
6163 dt.address = sregs->idt.base;
6164 kvm_x86_ops->set_idt(vcpu, &dt);
6165 dt.size = sregs->gdt.limit;
6166 dt.address = sregs->gdt.base;
6167 kvm_x86_ops->set_gdt(vcpu, &dt);
6168
6169 vcpu->arch.cr2 = sregs->cr2;
6170 mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
6171 vcpu->arch.cr3 = sregs->cr3;
6172 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
6173
6174 kvm_set_cr8(vcpu, sregs->cr8);
6175
6176 mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
6177 kvm_x86_ops->set_efer(vcpu, sregs->efer);
6178 kvm_set_apic_base(vcpu, sregs->apic_base);
6179
6180 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
6181 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
6182 vcpu->arch.cr0 = sregs->cr0;
6183
6184 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
6185 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
6186 if (sregs->cr4 & X86_CR4_OSXSAVE)
6187 kvm_update_cpuid(vcpu);
6188
6189 idx = srcu_read_lock(&vcpu->kvm->srcu);
6190 if (!is_long_mode(vcpu) && is_pae(vcpu)) {
6191 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
6192 mmu_reset_needed = 1;
6193 }
6194 srcu_read_unlock(&vcpu->kvm->srcu, idx);
6195
6196 if (mmu_reset_needed)
6197 kvm_mmu_reset_context(vcpu);
6198
6199 max_bits = KVM_NR_INTERRUPTS;
6200 pending_vec = find_first_bit(
6201 (const unsigned long *)sregs->interrupt_bitmap, max_bits);
6202 if (pending_vec < max_bits) {
6203 kvm_queue_interrupt(vcpu, pending_vec, false);
6204 pr_debug("Set back pending irq %d\n", pending_vec);
6205 }
6206
6207 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
6208 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
6209 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
6210 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
6211 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
6212 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
6213
6214 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
6215 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
6216
6217 update_cr8_intercept(vcpu);
6218
6219
6220 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
6221 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
6222 !is_protmode(vcpu))
6223 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
6224
6225 kvm_make_request(KVM_REQ_EVENT, vcpu);
6226
6227 return 0;
6228}
6229
6230int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
6231 struct kvm_guest_debug *dbg)
6232{
6233 unsigned long rflags;
6234 int i, r;
6235
6236 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
6237 r = -EBUSY;
6238 if (vcpu->arch.exception.pending)
6239 goto out;
6240 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
6241 kvm_queue_exception(vcpu, DB_VECTOR);
6242 else
6243 kvm_queue_exception(vcpu, BP_VECTOR);
6244 }
6245
6246
6247
6248
6249
6250 rflags = kvm_get_rflags(vcpu);
6251
6252 vcpu->guest_debug = dbg->control;
6253 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
6254 vcpu->guest_debug = 0;
6255
6256 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
6257 for (i = 0; i < KVM_NR_DB_REGS; ++i)
6258 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
6259 vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7];
6260 } else {
6261 for (i = 0; i < KVM_NR_DB_REGS; i++)
6262 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
6263 }
6264 kvm_update_dr7(vcpu);
6265
6266 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
6267 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
6268 get_segment_base(vcpu, VCPU_SREG_CS);
6269
6270
6271
6272
6273
6274 kvm_set_rflags(vcpu, rflags);
6275
6276 kvm_x86_ops->update_db_bp_intercept(vcpu);
6277
6278 r = 0;
6279
6280out:
6281
6282 return r;
6283}
6284
6285
6286
6287
6288int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
6289 struct kvm_translation *tr)
6290{
6291 unsigned long vaddr = tr->linear_address;
6292 gpa_t gpa;
6293 int idx;
6294
6295 idx = srcu_read_lock(&vcpu->kvm->srcu);
6296 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
6297 srcu_read_unlock(&vcpu->kvm->srcu, idx);
6298 tr->physical_address = gpa;
6299 tr->valid = gpa != UNMAPPED_GVA;
6300 tr->writeable = 1;
6301 tr->usermode = 0;
6302
6303 return 0;
6304}
6305
6306int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
6307{
6308 struct i387_fxsave_struct *fxsave =
6309 &vcpu->arch.guest_fpu.state->fxsave;
6310
6311 memcpy(fpu->fpr, fxsave->st_space, 128);
6312 fpu->fcw = fxsave->cwd;
6313 fpu->fsw = fxsave->swd;
6314 fpu->ftwx = fxsave->twd;
6315 fpu->last_opcode = fxsave->fop;
6316 fpu->last_ip = fxsave->rip;
6317 fpu->last_dp = fxsave->rdp;
6318 memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
6319
6320 return 0;
6321}
6322
6323int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
6324{
6325 struct i387_fxsave_struct *fxsave =
6326 &vcpu->arch.guest_fpu.state->fxsave;
6327
6328 memcpy(fxsave->st_space, fpu->fpr, 128);
6329 fxsave->cwd = fpu->fcw;
6330 fxsave->swd = fpu->fsw;
6331 fxsave->twd = fpu->ftwx;
6332 fxsave->fop = fpu->last_opcode;
6333 fxsave->rip = fpu->last_ip;
6334 fxsave->rdp = fpu->last_dp;
6335 memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
6336
6337 return 0;
6338}
6339
6340int fx_init(struct kvm_vcpu *vcpu)
6341{
6342 int err;
6343
6344 err = fpu_alloc(&vcpu->arch.guest_fpu);
6345 if (err)
6346 return err;
6347
6348 fpu_finit(&vcpu->arch.guest_fpu);
6349
6350
6351
6352
6353 vcpu->arch.xcr0 = XSTATE_FP;
6354
6355 vcpu->arch.cr0 |= X86_CR0_ET;
6356
6357 return 0;
6358}
6359EXPORT_SYMBOL_GPL(fx_init);
6360
6361static void fx_free(struct kvm_vcpu *vcpu)
6362{
6363 fpu_free(&vcpu->arch.guest_fpu);
6364}
6365
6366void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
6367{
6368 if (vcpu->guest_fpu_loaded)
6369 return;
6370
6371
6372
6373
6374
6375
6376 kvm_put_guest_xcr0(vcpu);
6377 vcpu->guest_fpu_loaded = 1;
6378 __kernel_fpu_begin();
6379 fpu_restore_checking(&vcpu->arch.guest_fpu);
6380 trace_kvm_fpu(1);
6381}
6382
6383void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
6384{
6385 kvm_put_guest_xcr0(vcpu);
6386
6387 if (!vcpu->guest_fpu_loaded)
6388 return;
6389
6390 vcpu->guest_fpu_loaded = 0;
6391 fpu_save_init(&vcpu->arch.guest_fpu);
6392 __kernel_fpu_end();
6393 ++vcpu->stat.fpu_reload;
6394 kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
6395 trace_kvm_fpu(0);
6396}
6397
6398void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
6399{
6400 kvmclock_reset(vcpu);
6401
6402 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
6403 fx_free(vcpu);
6404 kvm_x86_ops->vcpu_free(vcpu);
6405}
6406
6407struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
6408 unsigned int id)
6409{
6410 if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
6411 printk_once(KERN_WARNING
6412 "kvm: SMP vm created on host with unstable TSC; "
6413 "guest TSC will not be reliable\n");
6414 return kvm_x86_ops->vcpu_create(kvm, id);
6415}
6416
6417int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
6418{
6419 int r;
6420
6421 vcpu->arch.mtrr_state.have_fixed = 1;
6422 r = vcpu_load(vcpu);
6423 if (r)
6424 return r;
6425 r = kvm_vcpu_reset(vcpu);
6426 if (r == 0)
6427 r = kvm_mmu_setup(vcpu);
6428 vcpu_put(vcpu);
6429
6430 return r;
6431}
6432
6433int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
6434{
6435 int r;
6436 struct msr_data msr;
6437
6438 r = vcpu_load(vcpu);
6439 if (r)
6440 return r;
6441 msr.data = 0x0;
6442 msr.index = MSR_IA32_TSC;
6443 msr.host_initiated = true;
6444 kvm_write_tsc(vcpu, &msr);
6445 vcpu_put(vcpu);
6446
6447 return r;
6448}
6449
6450void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
6451{
6452 int r;
6453 vcpu->arch.apf.msr_val = 0;
6454
6455 r = vcpu_load(vcpu);
6456 BUG_ON(r);
6457 kvm_mmu_unload(vcpu);
6458 vcpu_put(vcpu);
6459
6460 fx_free(vcpu);
6461 kvm_x86_ops->vcpu_free(vcpu);
6462}
6463
6464static int kvm_vcpu_reset(struct kvm_vcpu *vcpu)
6465{
6466 atomic_set(&vcpu->arch.nmi_queued, 0);
6467 vcpu->arch.nmi_pending = 0;
6468 vcpu->arch.nmi_injected = false;
6469
6470 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
6471 vcpu->arch.dr6 = DR6_FIXED_1;
6472 vcpu->arch.dr7 = DR7_FIXED_1;
6473 kvm_update_dr7(vcpu);
6474
6475 kvm_make_request(KVM_REQ_EVENT, vcpu);
6476 vcpu->arch.apf.msr_val = 0;
6477 vcpu->arch.st.msr_val = 0;
6478
6479 kvmclock_reset(vcpu);
6480
6481 kvm_clear_async_pf_completion_queue(vcpu);
6482 kvm_async_pf_hash_reset(vcpu);
6483 vcpu->arch.apf.halted = false;
6484
6485 kvm_pmu_reset(vcpu);
6486
6487 memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
6488 vcpu->arch.regs_avail = ~0;
6489 vcpu->arch.regs_dirty = ~0;
6490
6491 return kvm_x86_ops->vcpu_reset(vcpu);
6492}
6493
6494int kvm_arch_hardware_enable(void *garbage)
6495{
6496 struct kvm *kvm;
6497 struct kvm_vcpu *vcpu;
6498 int i;
6499 int ret;
6500 u64 local_tsc;
6501 u64 max_tsc = 0;
6502 bool stable, backwards_tsc = false;
6503
6504 kvm_shared_msr_cpu_online();
6505 ret = kvm_x86_ops->hardware_enable(garbage);
6506 if (ret != 0)
6507 return ret;
6508
6509 local_tsc = native_read_tsc();
6510 stable = !check_tsc_unstable();
6511 list_for_each_entry(kvm, &vm_list, vm_list) {
6512 kvm_for_each_vcpu(i, vcpu, kvm) {
6513 if (!stable && vcpu->cpu == smp_processor_id())
6514 set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
6515 if (stable && vcpu->arch.last_host_tsc > local_tsc) {
6516 backwards_tsc = true;
6517 if (vcpu->arch.last_host_tsc > max_tsc)
6518 max_tsc = vcpu->arch.last_host_tsc;
6519 }
6520 }
6521 }
6522
6523
6524
6525
6526
6527
6528
6529
6530
6531
6532
6533
6534
6535
6536
6537
6538
6539
6540
6541
6542
6543
6544
6545
6546
6547
6548
6549
6550
6551
6552
6553
6554
6555
6556
6557
6558
6559
6560
6561 if (backwards_tsc) {
6562 u64 delta_cyc = max_tsc - local_tsc;
6563 list_for_each_entry(kvm, &vm_list, vm_list) {
6564 kvm_for_each_vcpu(i, vcpu, kvm) {
6565 vcpu->arch.tsc_offset_adjustment += delta_cyc;
6566 vcpu->arch.last_host_tsc = local_tsc;
6567 set_bit(KVM_REQ_MASTERCLOCK_UPDATE,
6568 &vcpu->requests);
6569 }
6570
6571
6572
6573
6574
6575
6576
6577 kvm->arch.last_tsc_nsec = 0;
6578 kvm->arch.last_tsc_write = 0;
6579 }
6580
6581 }
6582 return 0;
6583}
6584
6585void kvm_arch_hardware_disable(void *garbage)
6586{
6587 kvm_x86_ops->hardware_disable(garbage);
6588 drop_user_return_notifiers(garbage);
6589}
6590
6591int kvm_arch_hardware_setup(void)
6592{
6593 return kvm_x86_ops->hardware_setup();
6594}
6595
6596void kvm_arch_hardware_unsetup(void)
6597{
6598 kvm_x86_ops->hardware_unsetup();
6599}
6600
6601void kvm_arch_check_processor_compat(void *rtn)
6602{
6603 kvm_x86_ops->check_processor_compatibility(rtn);
6604}
6605
6606bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
6607{
6608 return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
6609}
6610
6611struct static_key kvm_no_apic_vcpu __read_mostly;
6612
6613int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
6614{
6615 struct page *page;
6616 struct kvm *kvm;
6617 int r;
6618
6619 BUG_ON(vcpu->kvm == NULL);
6620 kvm = vcpu->kvm;
6621
6622 vcpu->arch.emulate_ctxt.ops = &emulate_ops;
6623 if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
6624 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
6625 else
6626 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
6627
6628 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
6629 if (!page) {
6630 r = -ENOMEM;
6631 goto fail;
6632 }
6633 vcpu->arch.pio_data = page_address(page);
6634
6635 kvm_set_tsc_khz(vcpu, max_tsc_khz);
6636
6637 r = kvm_mmu_create(vcpu);
6638 if (r < 0)
6639 goto fail_free_pio_data;
6640
6641 if (irqchip_in_kernel(kvm)) {
6642 r = kvm_create_lapic(vcpu);
6643 if (r < 0)
6644 goto fail_mmu_destroy;
6645 } else
6646 static_key_slow_inc(&kvm_no_apic_vcpu);
6647
6648 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
6649 GFP_KERNEL);
6650 if (!vcpu->arch.mce_banks) {
6651 r = -ENOMEM;
6652 goto fail_free_lapic;
6653 }
6654 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
6655
6656 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
6657 goto fail_free_mce_banks;
6658
6659 r = fx_init(vcpu);
6660 if (r)
6661 goto fail_free_wbinvd_dirty_mask;
6662
6663 vcpu->arch.ia32_tsc_adjust_msr = 0x0;
6664 kvm_async_pf_hash_reset(vcpu);
6665 kvm_pmu_init(vcpu);
6666
6667 return 0;
6668fail_free_wbinvd_dirty_mask:
6669 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
6670fail_free_mce_banks:
6671 kfree(vcpu->arch.mce_banks);
6672fail_free_lapic:
6673 kvm_free_lapic(vcpu);
6674fail_mmu_destroy:
6675 kvm_mmu_destroy(vcpu);
6676fail_free_pio_data:
6677 free_page((unsigned long)vcpu->arch.pio_data);
6678fail:
6679 return r;
6680}
6681
6682void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
6683{
6684 int idx;
6685
6686 kvm_pmu_destroy(vcpu);
6687 kfree(vcpu->arch.mce_banks);
6688 kvm_free_lapic(vcpu);
6689 idx = srcu_read_lock(&vcpu->kvm->srcu);
6690 kvm_mmu_destroy(vcpu);
6691 srcu_read_unlock(&vcpu->kvm->srcu, idx);
6692 free_page((unsigned long)vcpu->arch.pio_data);
6693 if (!irqchip_in_kernel(vcpu->kvm))
6694 static_key_slow_dec(&kvm_no_apic_vcpu);
6695}
6696
6697int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
6698{
6699 if (type)
6700 return -EINVAL;
6701
6702 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
6703 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
6704
6705
6706 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
6707
6708 set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
6709 &kvm->arch.irq_sources_bitmap);
6710
6711 raw_spin_lock_init(&kvm->arch.tsc_write_lock);
6712 mutex_init(&kvm->arch.apic_map_lock);
6713 spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
6714
6715 pvclock_update_vm_gtod_copy(kvm);
6716
6717 return 0;
6718}
6719
6720static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
6721{
6722 int r;
6723 r = vcpu_load(vcpu);
6724 BUG_ON(r);
6725 kvm_mmu_unload(vcpu);
6726 vcpu_put(vcpu);
6727}
6728
6729static void kvm_free_vcpus(struct kvm *kvm)
6730{
6731 unsigned int i;
6732 struct kvm_vcpu *vcpu;
6733
6734
6735
6736
6737 kvm_for_each_vcpu(i, vcpu, kvm) {
6738 kvm_clear_async_pf_completion_queue(vcpu);
6739 kvm_unload_vcpu_mmu(vcpu);
6740 }
6741 kvm_for_each_vcpu(i, vcpu, kvm)
6742 kvm_arch_vcpu_free(vcpu);
6743
6744 mutex_lock(&kvm->lock);
6745 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
6746 kvm->vcpus[i] = NULL;
6747
6748 atomic_set(&kvm->online_vcpus, 0);
6749 mutex_unlock(&kvm->lock);
6750}
6751
6752void kvm_arch_sync_events(struct kvm *kvm)
6753{
6754 kvm_free_all_assigned_devices(kvm);
6755 kvm_free_pit(kvm);
6756}
6757
6758void kvm_arch_destroy_vm(struct kvm *kvm)
6759{
6760 kvm_iommu_unmap_guest(kvm);
6761 kfree(kvm->arch.vpic);
6762 kfree(kvm->arch.vioapic);
6763 kvm_free_vcpus(kvm);
6764 if (kvm->arch.apic_access_page)
6765 put_page(kvm->arch.apic_access_page);
6766 if (kvm->arch.ept_identity_pagetable)
6767 put_page(kvm->arch.ept_identity_pagetable);
6768 kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
6769}
6770
6771void kvm_arch_free_memslot(struct kvm_memory_slot *free,
6772 struct kvm_memory_slot *dont)
6773{
6774 int i;
6775
6776 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
6777 if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
6778 kvm_kvfree(free->arch.rmap[i]);
6779 free->arch.rmap[i] = NULL;
6780 }
6781 if (i == 0)
6782 continue;
6783
6784 if (!dont || free->arch.lpage_info[i - 1] !=
6785 dont->arch.lpage_info[i - 1]) {
6786 kvm_kvfree(free->arch.lpage_info[i - 1]);
6787 free->arch.lpage_info[i - 1] = NULL;
6788 }
6789 }
6790}
6791
6792int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
6793{
6794 int i;
6795
6796 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
6797 unsigned long ugfn;
6798 int lpages;
6799 int level = i + 1;
6800
6801 lpages = gfn_to_index(slot->base_gfn + npages - 1,
6802 slot->base_gfn, level) + 1;
6803
6804 slot->arch.rmap[i] =
6805 kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap[i]));
6806 if (!slot->arch.rmap[i])
6807 goto out_free;
6808 if (i == 0)
6809 continue;
6810
6811 slot->arch.lpage_info[i - 1] = kvm_kvzalloc(lpages *
6812 sizeof(*slot->arch.lpage_info[i - 1]));
6813 if (!slot->arch.lpage_info[i - 1])
6814 goto out_free;
6815
6816 if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
6817 slot->arch.lpage_info[i - 1][0].write_count = 1;
6818 if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
6819 slot->arch.lpage_info[i - 1][lpages - 1].write_count = 1;
6820 ugfn = slot->userspace_addr >> PAGE_SHIFT;
6821
6822
6823
6824
6825
6826 if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
6827 !kvm_largepages_enabled()) {
6828 unsigned long j;
6829
6830 for (j = 0; j < lpages; ++j)
6831 slot->arch.lpage_info[i - 1][j].write_count = 1;
6832 }
6833 }
6834
6835 return 0;
6836
6837out_free:
6838 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
6839 kvm_kvfree(slot->arch.rmap[i]);
6840 slot->arch.rmap[i] = NULL;
6841 if (i == 0)
6842 continue;
6843
6844 kvm_kvfree(slot->arch.lpage_info[i - 1]);
6845 slot->arch.lpage_info[i - 1] = NULL;
6846 }
6847 return -ENOMEM;
6848}
6849
6850int kvm_arch_prepare_memory_region(struct kvm *kvm,
6851 struct kvm_memory_slot *memslot,
6852 struct kvm_memory_slot old,
6853 struct kvm_userspace_memory_region *mem,
6854 int user_alloc)
6855{
6856 int npages = memslot->npages;
6857 int map_flags = MAP_PRIVATE | MAP_ANONYMOUS;
6858
6859
6860 if (memslot->id >= KVM_MEMORY_SLOTS)
6861 map_flags = MAP_SHARED | MAP_ANONYMOUS;
6862
6863
6864
6865
6866 if (!user_alloc) {
6867 if (npages && !old.npages) {
6868 unsigned long userspace_addr;
6869
6870 userspace_addr = vm_mmap(NULL, 0,
6871 npages * PAGE_SIZE,
6872 PROT_READ | PROT_WRITE,
6873 map_flags,
6874 0);
6875
6876 if (IS_ERR((void *)userspace_addr))
6877 return PTR_ERR((void *)userspace_addr);
6878
6879 memslot->userspace_addr = userspace_addr;
6880 }
6881 }
6882
6883
6884 return 0;
6885}
6886
6887void kvm_arch_commit_memory_region(struct kvm *kvm,
6888 struct kvm_userspace_memory_region *mem,
6889 struct kvm_memory_slot old,
6890 int user_alloc)
6891{
6892
6893 int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT;
6894
6895 if (!user_alloc && !old.user_alloc && old.npages && !npages) {
6896 int ret;
6897
6898 ret = vm_munmap(old.userspace_addr,
6899 old.npages * PAGE_SIZE);
6900 if (ret < 0)
6901 printk(KERN_WARNING
6902 "kvm_vm_ioctl_set_memory_region: "
6903 "failed to munmap memory\n");
6904 }
6905
6906 if (!kvm->arch.n_requested_mmu_pages)
6907 nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
6908
6909 spin_lock(&kvm->mmu_lock);
6910 if (nr_mmu_pages)
6911 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
6912 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
6913 spin_unlock(&kvm->mmu_lock);
6914
6915
6916
6917
6918 if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT) {
6919 kvm_mmu_zap_all(kvm);
6920 kvm_reload_remote_mmus(kvm);
6921 }
6922}
6923
6924void kvm_arch_flush_shadow_all(struct kvm *kvm)
6925{
6926 kvm_mmu_zap_all(kvm);
6927 kvm_reload_remote_mmus(kvm);
6928}
6929
6930void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
6931 struct kvm_memory_slot *slot)
6932{
6933 kvm_arch_flush_shadow_all(kvm);
6934}
6935
6936int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
6937{
6938 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
6939 !vcpu->arch.apf.halted)
6940 || !list_empty_careful(&vcpu->async_pf.done)
6941 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
6942 || atomic_read(&vcpu->arch.nmi_queued) ||
6943 (kvm_arch_interrupt_allowed(vcpu) &&
6944 kvm_cpu_has_interrupt(vcpu));
6945}
6946
6947int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
6948{
6949 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
6950}
6951
6952int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
6953{
6954 return kvm_x86_ops->interrupt_allowed(vcpu);
6955}
6956
6957bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
6958{
6959 unsigned long current_rip = kvm_rip_read(vcpu) +
6960 get_segment_base(vcpu, VCPU_SREG_CS);
6961
6962 return current_rip == linear_rip;
6963}
6964EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
6965
6966unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
6967{
6968 unsigned long rflags;
6969
6970 rflags = kvm_x86_ops->get_rflags(vcpu);
6971 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
6972 rflags &= ~X86_EFLAGS_TF;
6973 return rflags;
6974}
6975EXPORT_SYMBOL_GPL(kvm_get_rflags);
6976
6977void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
6978{
6979 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
6980 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
6981 rflags |= X86_EFLAGS_TF;
6982 kvm_x86_ops->set_rflags(vcpu, rflags);
6983 kvm_make_request(KVM_REQ_EVENT, vcpu);
6984}
6985EXPORT_SYMBOL_GPL(kvm_set_rflags);
6986
6987void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
6988{
6989 int r;
6990
6991 if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
6992 is_error_page(work->page))
6993 return;
6994
6995 r = kvm_mmu_reload(vcpu);
6996 if (unlikely(r))
6997 return;
6998
6999 if (!vcpu->arch.mmu.direct_map &&
7000 work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu))
7001 return;
7002
7003 vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true);
7004}
7005
7006static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
7007{
7008 return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
7009}
7010
7011static inline u32 kvm_async_pf_next_probe(u32 key)
7012{
7013 return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
7014}
7015
7016static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
7017{
7018 u32 key = kvm_async_pf_hash_fn(gfn);
7019
7020 while (vcpu->arch.apf.gfns[key] != ~0)
7021 key = kvm_async_pf_next_probe(key);
7022
7023 vcpu->arch.apf.gfns[key] = gfn;
7024}
7025
7026static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
7027{
7028 int i;
7029 u32 key = kvm_async_pf_hash_fn(gfn);
7030
7031 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
7032 (vcpu->arch.apf.gfns[key] != gfn &&
7033 vcpu->arch.apf.gfns[key] != ~0); i++)
7034 key = kvm_async_pf_next_probe(key);
7035
7036 return key;
7037}
7038
7039bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
7040{
7041 return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
7042}
7043
7044static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
7045{
7046 u32 i, j, k;
7047
7048 i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
7049 while (true) {
7050 vcpu->arch.apf.gfns[i] = ~0;
7051 do {
7052 j = kvm_async_pf_next_probe(j);
7053 if (vcpu->arch.apf.gfns[j] == ~0)
7054 return;
7055 k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
7056
7057
7058
7059
7060
7061 } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
7062 vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
7063 i = j;
7064 }
7065}
7066
7067static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
7068{
7069
7070 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
7071 sizeof(val));
7072}
7073
7074void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
7075 struct kvm_async_pf *work)
7076{
7077 struct x86_exception fault;
7078
7079 trace_kvm_async_pf_not_present(work->arch.token, work->gva);
7080 kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
7081
7082 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
7083 (vcpu->arch.apf.send_user_only &&
7084 kvm_x86_ops->get_cpl(vcpu) == 0))
7085 kvm_make_request(KVM_REQ_APF_HALT, vcpu);
7086 else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
7087 fault.vector = PF_VECTOR;
7088 fault.error_code_valid = true;
7089 fault.error_code = 0;
7090 fault.nested_page_fault = false;
7091 fault.address = work->arch.token;
7092 kvm_inject_page_fault(vcpu, &fault);
7093 }
7094}
7095
7096void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
7097 struct kvm_async_pf *work)
7098{
7099 struct x86_exception fault;
7100
7101 trace_kvm_async_pf_ready(work->arch.token, work->gva);
7102 if (is_error_page(work->page))
7103 work->arch.token = ~0;
7104 else
7105 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
7106
7107 if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
7108 !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
7109 fault.vector = PF_VECTOR;
7110 fault.error_code_valid = true;
7111 fault.error_code = 0;
7112 fault.nested_page_fault = false;
7113 fault.address = work->arch.token;
7114 kvm_inject_page_fault(vcpu, &fault);
7115 }
7116 vcpu->arch.apf.halted = false;
7117 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
7118}
7119
7120bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
7121{
7122 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
7123 return true;
7124 else
7125 return !kvm_event_needs_reinjection(vcpu) &&
7126 kvm_x86_ops->interrupt_allowed(vcpu);
7127}
7128
7129EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
7130EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
7131EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
7132EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
7133EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
7134EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
7135EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
7136EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
7137EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
7138EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
7139EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
7140EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
7141