1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22#include <linux/kvm_host.h>
23#include "irq.h"
24#include "mmu.h"
25#include "i8254.h"
26#include "tss.h"
27#include "kvm_cache_regs.h"
28#include "x86.h"
29#include "cpuid.h"
30
31#include <linux/clocksource.h>
32#include <linux/interrupt.h>
33#include <linux/kvm.h>
34#include <linux/fs.h>
35#include <linux/vmalloc.h>
36#include <linux/module.h>
37#include <linux/mman.h>
38#include <linux/highmem.h>
39#include <linux/iommu.h>
40#include <linux/intel-iommu.h>
41#include <linux/cpufreq.h>
42#include <linux/user-return-notifier.h>
43#include <linux/srcu.h>
44#include <linux/slab.h>
45#include <linux/perf_event.h>
46#include <linux/uaccess.h>
47#include <linux/hash.h>
48#include <linux/pci.h>
49#include <linux/timekeeper_internal.h>
50#include <linux/pvclock_gtod.h>
51#include <trace/events/kvm.h>
52
53#define CREATE_TRACE_POINTS
54#include "trace.h"
55
56#include <asm/debugreg.h>
57#include <asm/msr.h>
58#include <asm/desc.h>
59#include <asm/mtrr.h>
60#include <asm/mce.h>
61#include <asm/i387.h>
62#include <asm/fpu-internal.h>
63#include <asm/xcr.h>
64#include <asm/pvclock.h>
65#include <asm/div64.h>
66
67#define MAX_IO_MSRS 256
68#define KVM_MAX_MCE_BANKS 32
69#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
70
71#define emul_to_vcpu(ctxt) \
72 container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
73
74
75
76
77
78#ifdef CONFIG_X86_64
79static
80u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
81#else
82static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
83#endif
84
85#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
86#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
87
88static void update_cr8_intercept(struct kvm_vcpu *vcpu);
89static void process_nmi(struct kvm_vcpu *vcpu);
90static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
91
92struct kvm_x86_ops *kvm_x86_ops;
93EXPORT_SYMBOL_GPL(kvm_x86_ops);
94
95static bool ignore_msrs = 0;
96module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
97
98unsigned int min_timer_period_us = 500;
99module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
100
101bool kvm_has_tsc_control;
102EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
103u32 kvm_max_guest_tsc_khz;
104EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
105
106
107static u32 tsc_tolerance_ppm = 250;
108module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
109
110static bool backwards_tsc_observed = false;
111
112#define KVM_NR_SHARED_MSRS 16
113
114struct kvm_shared_msrs_global {
115 int nr;
116 u32 msrs[KVM_NR_SHARED_MSRS];
117};
118
119struct kvm_shared_msrs {
120 struct user_return_notifier urn;
121 bool registered;
122 struct kvm_shared_msr_values {
123 u64 host;
124 u64 curr;
125 } values[KVM_NR_SHARED_MSRS];
126};
127
128static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
129static struct kvm_shared_msrs __percpu *shared_msrs;
130
131struct kvm_stats_debugfs_item debugfs_entries[] = {
132 { "pf_fixed", VCPU_STAT(pf_fixed) },
133 { "pf_guest", VCPU_STAT(pf_guest) },
134 { "tlb_flush", VCPU_STAT(tlb_flush) },
135 { "invlpg", VCPU_STAT(invlpg) },
136 { "exits", VCPU_STAT(exits) },
137 { "io_exits", VCPU_STAT(io_exits) },
138 { "mmio_exits", VCPU_STAT(mmio_exits) },
139 { "signal_exits", VCPU_STAT(signal_exits) },
140 { "irq_window", VCPU_STAT(irq_window_exits) },
141 { "nmi_window", VCPU_STAT(nmi_window_exits) },
142 { "halt_exits", VCPU_STAT(halt_exits) },
143 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
144 { "hypercalls", VCPU_STAT(hypercalls) },
145 { "request_irq", VCPU_STAT(request_irq_exits) },
146 { "irq_exits", VCPU_STAT(irq_exits) },
147 { "host_state_reload", VCPU_STAT(host_state_reload) },
148 { "efer_reload", VCPU_STAT(efer_reload) },
149 { "fpu_reload", VCPU_STAT(fpu_reload) },
150 { "insn_emulation", VCPU_STAT(insn_emulation) },
151 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
152 { "irq_injections", VCPU_STAT(irq_injections) },
153 { "nmi_injections", VCPU_STAT(nmi_injections) },
154 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
155 { "mmu_pte_write", VM_STAT(mmu_pte_write) },
156 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
157 { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) },
158 { "mmu_flooded", VM_STAT(mmu_flooded) },
159 { "mmu_recycled", VM_STAT(mmu_recycled) },
160 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
161 { "mmu_unsync", VM_STAT(mmu_unsync) },
162 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
163 { "largepages", VM_STAT(lpages) },
164 { NULL }
165};
166
167u64 __read_mostly host_xcr0;
168
169static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
170
171static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
172{
173 int i;
174 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++)
175 vcpu->arch.apf.gfns[i] = ~0;
176}
177
178static void kvm_on_user_return(struct user_return_notifier *urn)
179{
180 unsigned slot;
181 struct kvm_shared_msrs *locals
182 = container_of(urn, struct kvm_shared_msrs, urn);
183 struct kvm_shared_msr_values *values;
184
185 for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
186 values = &locals->values[slot];
187 if (values->host != values->curr) {
188 wrmsrl(shared_msrs_global.msrs[slot], values->host);
189 values->curr = values->host;
190 }
191 }
192 locals->registered = false;
193 user_return_notifier_unregister(urn);
194}
195
196static void shared_msr_update(unsigned slot, u32 msr)
197{
198 u64 value;
199 unsigned int cpu = smp_processor_id();
200 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
201
202
203
204 if (slot >= shared_msrs_global.nr) {
205 printk(KERN_ERR "kvm: invalid MSR slot!");
206 return;
207 }
208 rdmsrl_safe(msr, &value);
209 smsr->values[slot].host = value;
210 smsr->values[slot].curr = value;
211}
212
213void kvm_define_shared_msr(unsigned slot, u32 msr)
214{
215 BUG_ON(slot >= KVM_NR_SHARED_MSRS);
216 if (slot >= shared_msrs_global.nr)
217 shared_msrs_global.nr = slot + 1;
218 shared_msrs_global.msrs[slot] = msr;
219
220 smp_wmb();
221}
222EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
223
224static void kvm_shared_msr_cpu_online(void)
225{
226 unsigned i;
227
228 for (i = 0; i < shared_msrs_global.nr; ++i)
229 shared_msr_update(i, shared_msrs_global.msrs[i]);
230}
231
232int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
233{
234 unsigned int cpu = smp_processor_id();
235 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
236 int err;
237
238 if (((value ^ smsr->values[slot].curr) & mask) == 0)
239 return 0;
240 smsr->values[slot].curr = value;
241 err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
242 if (err)
243 return 1;
244
245 if (!smsr->registered) {
246 smsr->urn.on_user_return = kvm_on_user_return;
247 user_return_notifier_register(&smsr->urn);
248 smsr->registered = true;
249 }
250 return 0;
251}
252EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
253
254static void drop_user_return_notifiers(void)
255{
256 unsigned int cpu = smp_processor_id();
257 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
258
259 if (smsr->registered)
260 kvm_on_user_return(&smsr->urn);
261}
262
263u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
264{
265 return vcpu->arch.apic_base;
266}
267EXPORT_SYMBOL_GPL(kvm_get_apic_base);
268
269int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
270{
271 u64 old_state = vcpu->arch.apic_base &
272 (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
273 u64 new_state = msr_info->data &
274 (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
275 u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) |
276 0x2ff | (guest_cpuid_has_x2apic(vcpu) ? 0 : X2APIC_ENABLE);
277
278 if (!msr_info->host_initiated &&
279 ((msr_info->data & reserved_bits) != 0 ||
280 new_state == X2APIC_ENABLE ||
281 (new_state == MSR_IA32_APICBASE_ENABLE &&
282 old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) ||
283 (new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) &&
284 old_state == 0)))
285 return 1;
286
287 kvm_lapic_set_base(vcpu, msr_info->data);
288 return 0;
289}
290EXPORT_SYMBOL_GPL(kvm_set_apic_base);
291
292asmlinkage __visible void kvm_spurious_fault(void)
293{
294
295 BUG();
296}
297EXPORT_SYMBOL_GPL(kvm_spurious_fault);
298
299#define EXCPT_BENIGN 0
300#define EXCPT_CONTRIBUTORY 1
301#define EXCPT_PF 2
302
303static int exception_class(int vector)
304{
305 switch (vector) {
306 case PF_VECTOR:
307 return EXCPT_PF;
308 case DE_VECTOR:
309 case TS_VECTOR:
310 case NP_VECTOR:
311 case SS_VECTOR:
312 case GP_VECTOR:
313 return EXCPT_CONTRIBUTORY;
314 default:
315 break;
316 }
317 return EXCPT_BENIGN;
318}
319
320#define EXCPT_FAULT 0
321#define EXCPT_TRAP 1
322#define EXCPT_ABORT 2
323#define EXCPT_INTERRUPT 3
324
325static int exception_type(int vector)
326{
327 unsigned int mask;
328
329 if (WARN_ON(vector > 31 || vector == NMI_VECTOR))
330 return EXCPT_INTERRUPT;
331
332 mask = 1 << vector;
333
334
335 if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR)))
336 return EXCPT_TRAP;
337
338 if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR)))
339 return EXCPT_ABORT;
340
341
342 return EXCPT_FAULT;
343}
344
345static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
346 unsigned nr, bool has_error, u32 error_code,
347 bool reinject)
348{
349 u32 prev_nr;
350 int class1, class2;
351
352 kvm_make_request(KVM_REQ_EVENT, vcpu);
353
354 if (!vcpu->arch.exception.pending) {
355 queue:
356 vcpu->arch.exception.pending = true;
357 vcpu->arch.exception.has_error_code = has_error;
358 vcpu->arch.exception.nr = nr;
359 vcpu->arch.exception.error_code = error_code;
360 vcpu->arch.exception.reinject = reinject;
361 return;
362 }
363
364
365 prev_nr = vcpu->arch.exception.nr;
366 if (prev_nr == DF_VECTOR) {
367
368 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
369 return;
370 }
371 class1 = exception_class(prev_nr);
372 class2 = exception_class(nr);
373 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
374 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
375
376 vcpu->arch.exception.pending = true;
377 vcpu->arch.exception.has_error_code = true;
378 vcpu->arch.exception.nr = DF_VECTOR;
379 vcpu->arch.exception.error_code = 0;
380 } else
381
382
383
384 goto queue;
385}
386
387void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
388{
389 kvm_multiple_exception(vcpu, nr, false, 0, false);
390}
391EXPORT_SYMBOL_GPL(kvm_queue_exception);
392
393void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
394{
395 kvm_multiple_exception(vcpu, nr, false, 0, true);
396}
397EXPORT_SYMBOL_GPL(kvm_requeue_exception);
398
399void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
400{
401 if (err)
402 kvm_inject_gp(vcpu, 0);
403 else
404 kvm_x86_ops->skip_emulated_instruction(vcpu);
405}
406EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
407
408void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
409{
410 ++vcpu->stat.pf_guest;
411 vcpu->arch.cr2 = fault->address;
412 kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
413}
414EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
415
416static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
417{
418 if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
419 vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
420 else
421 vcpu->arch.mmu.inject_page_fault(vcpu, fault);
422
423 return fault->nested_page_fault;
424}
425
426void kvm_inject_nmi(struct kvm_vcpu *vcpu)
427{
428 atomic_inc(&vcpu->arch.nmi_queued);
429 kvm_make_request(KVM_REQ_NMI, vcpu);
430}
431EXPORT_SYMBOL_GPL(kvm_inject_nmi);
432
433void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
434{
435 kvm_multiple_exception(vcpu, nr, true, error_code, false);
436}
437EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
438
439void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
440{
441 kvm_multiple_exception(vcpu, nr, true, error_code, true);
442}
443EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
444
445
446
447
448
449bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
450{
451 if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl)
452 return true;
453 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
454 return false;
455}
456EXPORT_SYMBOL_GPL(kvm_require_cpl);
457
458
459
460
461
462
463int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
464 gfn_t ngfn, void *data, int offset, int len,
465 u32 access)
466{
467 struct x86_exception exception;
468 gfn_t real_gfn;
469 gpa_t ngpa;
470
471 ngpa = gfn_to_gpa(ngfn);
472 real_gfn = mmu->translate_gpa(vcpu, ngpa, access, &exception);
473 if (real_gfn == UNMAPPED_GVA)
474 return -EFAULT;
475
476 real_gfn = gpa_to_gfn(real_gfn);
477
478 return kvm_read_guest_page(vcpu->kvm, real_gfn, data, offset, len);
479}
480EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
481
482int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
483 void *data, int offset, int len, u32 access)
484{
485 return kvm_read_guest_page_mmu(vcpu, vcpu->arch.walk_mmu, gfn,
486 data, offset, len, access);
487}
488
489
490
491
492int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
493{
494 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
495 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
496 int i;
497 int ret;
498 u64 pdpte[ARRAY_SIZE(mmu->pdptrs)];
499
500 ret = kvm_read_guest_page_mmu(vcpu, mmu, pdpt_gfn, pdpte,
501 offset * sizeof(u64), sizeof(pdpte),
502 PFERR_USER_MASK|PFERR_WRITE_MASK);
503 if (ret < 0) {
504 ret = 0;
505 goto out;
506 }
507 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
508 if (is_present_gpte(pdpte[i]) &&
509 (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) {
510 ret = 0;
511 goto out;
512 }
513 }
514 ret = 1;
515
516 memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
517 __set_bit(VCPU_EXREG_PDPTR,
518 (unsigned long *)&vcpu->arch.regs_avail);
519 __set_bit(VCPU_EXREG_PDPTR,
520 (unsigned long *)&vcpu->arch.regs_dirty);
521out:
522
523 return ret;
524}
525EXPORT_SYMBOL_GPL(load_pdptrs);
526
527static bool pdptrs_changed(struct kvm_vcpu *vcpu)
528{
529 u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
530 bool changed = true;
531 int offset;
532 gfn_t gfn;
533 int r;
534
535 if (is_long_mode(vcpu) || !is_pae(vcpu))
536 return false;
537
538 if (!test_bit(VCPU_EXREG_PDPTR,
539 (unsigned long *)&vcpu->arch.regs_avail))
540 return true;
541
542 gfn = (kvm_read_cr3(vcpu) & ~31u) >> PAGE_SHIFT;
543 offset = (kvm_read_cr3(vcpu) & ~31u) & (PAGE_SIZE - 1);
544 r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
545 PFERR_USER_MASK | PFERR_WRITE_MASK);
546 if (r < 0)
547 goto out;
548 changed = memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
549out:
550
551 return changed;
552}
553
554int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
555{
556 unsigned long old_cr0 = kvm_read_cr0(vcpu);
557 unsigned long update_bits = X86_CR0_PG | X86_CR0_WP |
558 X86_CR0_CD | X86_CR0_NW;
559
560 cr0 |= X86_CR0_ET;
561
562#ifdef CONFIG_X86_64
563 if (cr0 & 0xffffffff00000000UL)
564 return 1;
565#endif
566
567 cr0 &= ~CR0_RESERVED_BITS;
568
569 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
570 return 1;
571
572 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
573 return 1;
574
575 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
576#ifdef CONFIG_X86_64
577 if ((vcpu->arch.efer & EFER_LME)) {
578 int cs_db, cs_l;
579
580 if (!is_pae(vcpu))
581 return 1;
582 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
583 if (cs_l)
584 return 1;
585 } else
586#endif
587 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
588 kvm_read_cr3(vcpu)))
589 return 1;
590 }
591
592 if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
593 return 1;
594
595 kvm_x86_ops->set_cr0(vcpu, cr0);
596
597 if ((cr0 ^ old_cr0) & X86_CR0_PG) {
598 kvm_clear_async_pf_completion_queue(vcpu);
599 kvm_async_pf_hash_reset(vcpu);
600 }
601
602 if ((cr0 ^ old_cr0) & update_bits)
603 kvm_mmu_reset_context(vcpu);
604 return 0;
605}
606EXPORT_SYMBOL_GPL(kvm_set_cr0);
607
608void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
609{
610 (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
611}
612EXPORT_SYMBOL_GPL(kvm_lmsw);
613
614static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
615{
616 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
617 !vcpu->guest_xcr0_loaded) {
618
619 xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
620 vcpu->guest_xcr0_loaded = 1;
621 }
622}
623
624static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
625{
626 if (vcpu->guest_xcr0_loaded) {
627 if (vcpu->arch.xcr0 != host_xcr0)
628 xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
629 vcpu->guest_xcr0_loaded = 0;
630 }
631}
632
633int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
634{
635 u64 xcr0 = xcr;
636 u64 old_xcr0 = vcpu->arch.xcr0;
637 u64 valid_bits;
638
639
640 if (index != XCR_XFEATURE_ENABLED_MASK)
641 return 1;
642 if (!(xcr0 & XSTATE_FP))
643 return 1;
644 if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
645 return 1;
646
647
648
649
650
651
652 valid_bits = vcpu->arch.guest_supported_xcr0 | XSTATE_FP;
653 if (xcr0 & ~valid_bits)
654 return 1;
655
656 if ((!(xcr0 & XSTATE_BNDREGS)) != (!(xcr0 & XSTATE_BNDCSR)))
657 return 1;
658
659 kvm_put_guest_xcr0(vcpu);
660 vcpu->arch.xcr0 = xcr0;
661
662 if ((xcr0 ^ old_xcr0) & XSTATE_EXTEND_MASK)
663 kvm_update_cpuid(vcpu);
664 return 0;
665}
666
667int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
668{
669 if (kvm_x86_ops->get_cpl(vcpu) != 0 ||
670 __kvm_set_xcr(vcpu, index, xcr)) {
671 kvm_inject_gp(vcpu, 0);
672 return 1;
673 }
674 return 0;
675}
676EXPORT_SYMBOL_GPL(kvm_set_xcr);
677
678int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
679{
680 unsigned long old_cr4 = kvm_read_cr4(vcpu);
681 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE |
682 X86_CR4_PAE | X86_CR4_SMEP;
683 if (cr4 & CR4_RESERVED_BITS)
684 return 1;
685
686 if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE))
687 return 1;
688
689 if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP))
690 return 1;
691
692 if (!guest_cpuid_has_smap(vcpu) && (cr4 & X86_CR4_SMAP))
693 return 1;
694
695 if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE))
696 return 1;
697
698 if (is_long_mode(vcpu)) {
699 if (!(cr4 & X86_CR4_PAE))
700 return 1;
701 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
702 && ((cr4 ^ old_cr4) & pdptr_bits)
703 && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
704 kvm_read_cr3(vcpu)))
705 return 1;
706
707 if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
708 if (!guest_cpuid_has_pcid(vcpu))
709 return 1;
710
711
712 if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
713 return 1;
714 }
715
716 if (kvm_x86_ops->set_cr4(vcpu, cr4))
717 return 1;
718
719 if (((cr4 ^ old_cr4) & pdptr_bits) ||
720 (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
721 kvm_mmu_reset_context(vcpu);
722
723 if ((cr4 ^ old_cr4) & X86_CR4_SMAP)
724 update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false);
725
726 if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
727 kvm_update_cpuid(vcpu);
728
729 return 0;
730}
731EXPORT_SYMBOL_GPL(kvm_set_cr4);
732
733int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
734{
735 if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
736 kvm_mmu_sync_roots(vcpu);
737 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
738 return 0;
739 }
740
741 if (is_long_mode(vcpu)) {
742 if (cr3 & CR3_L_MODE_RESERVED_BITS)
743 return 1;
744 } else if (is_pae(vcpu) && is_paging(vcpu) &&
745 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
746 return 1;
747
748 vcpu->arch.cr3 = cr3;
749 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
750 kvm_mmu_new_cr3(vcpu);
751 return 0;
752}
753EXPORT_SYMBOL_GPL(kvm_set_cr3);
754
755int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
756{
757 if (cr8 & CR8_RESERVED_BITS)
758 return 1;
759 if (irqchip_in_kernel(vcpu->kvm))
760 kvm_lapic_set_tpr(vcpu, cr8);
761 else
762 vcpu->arch.cr8 = cr8;
763 return 0;
764}
765EXPORT_SYMBOL_GPL(kvm_set_cr8);
766
767unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
768{
769 if (irqchip_in_kernel(vcpu->kvm))
770 return kvm_lapic_get_cr8(vcpu);
771 else
772 return vcpu->arch.cr8;
773}
774EXPORT_SYMBOL_GPL(kvm_get_cr8);
775
776static void kvm_update_dr6(struct kvm_vcpu *vcpu)
777{
778 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
779 kvm_x86_ops->set_dr6(vcpu, vcpu->arch.dr6);
780}
781
782static void kvm_update_dr7(struct kvm_vcpu *vcpu)
783{
784 unsigned long dr7;
785
786 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
787 dr7 = vcpu->arch.guest_debug_dr7;
788 else
789 dr7 = vcpu->arch.dr7;
790 kvm_x86_ops->set_dr7(vcpu, dr7);
791 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_BP_ENABLED;
792 if (dr7 & DR7_BP_EN_MASK)
793 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
794}
795
796static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
797{
798 u64 fixed = DR6_FIXED_1;
799
800 if (!guest_cpuid_has_rtm(vcpu))
801 fixed |= DR6_RTM;
802 return fixed;
803}
804
805static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
806{
807 switch (dr) {
808 case 0 ... 3:
809 vcpu->arch.db[dr] = val;
810 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
811 vcpu->arch.eff_db[dr] = val;
812 break;
813 case 4:
814 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
815 return 1;
816
817 case 6:
818 if (val & 0xffffffff00000000ULL)
819 return -1;
820 vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu);
821 kvm_update_dr6(vcpu);
822 break;
823 case 5:
824 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
825 return 1;
826
827 default:
828 if (val & 0xffffffff00000000ULL)
829 return -1;
830 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
831 kvm_update_dr7(vcpu);
832 break;
833 }
834
835 return 0;
836}
837
838int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
839{
840 int res;
841
842 res = __kvm_set_dr(vcpu, dr, val);
843 if (res > 0)
844 kvm_queue_exception(vcpu, UD_VECTOR);
845 else if (res < 0)
846 kvm_inject_gp(vcpu, 0);
847
848 return res;
849}
850EXPORT_SYMBOL_GPL(kvm_set_dr);
851
852static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
853{
854 switch (dr) {
855 case 0 ... 3:
856 *val = vcpu->arch.db[dr];
857 break;
858 case 4:
859 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
860 return 1;
861
862 case 6:
863 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
864 *val = vcpu->arch.dr6;
865 else
866 *val = kvm_x86_ops->get_dr6(vcpu);
867 break;
868 case 5:
869 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
870 return 1;
871
872 default:
873 *val = vcpu->arch.dr7;
874 break;
875 }
876
877 return 0;
878}
879
880int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
881{
882 if (_kvm_get_dr(vcpu, dr, val)) {
883 kvm_queue_exception(vcpu, UD_VECTOR);
884 return 1;
885 }
886 return 0;
887}
888EXPORT_SYMBOL_GPL(kvm_get_dr);
889
890bool kvm_rdpmc(struct kvm_vcpu *vcpu)
891{
892 u32 ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
893 u64 data;
894 int err;
895
896 err = kvm_pmu_read_pmc(vcpu, ecx, &data);
897 if (err)
898 return err;
899 kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)data);
900 kvm_register_write(vcpu, VCPU_REGS_RDX, data >> 32);
901 return err;
902}
903EXPORT_SYMBOL_GPL(kvm_rdpmc);
904
905
906
907
908
909
910
911
912
913
914#define KVM_SAVE_MSRS_BEGIN 12
915static u32 msrs_to_save[] = {
916 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
917 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
918 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
919 HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
920 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
921 MSR_KVM_PV_EOI_EN,
922 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
923 MSR_STAR,
924#ifdef CONFIG_X86_64
925 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
926#endif
927 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
928 MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS
929};
930
931static unsigned num_msrs_to_save;
932
933static const u32 emulated_msrs[] = {
934 MSR_IA32_TSC_ADJUST,
935 MSR_IA32_TSCDEADLINE,
936 MSR_IA32_MISC_ENABLE,
937 MSR_IA32_MCG_STATUS,
938 MSR_IA32_MCG_CTL,
939};
940
941bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
942{
943 if (efer & efer_reserved_bits)
944 return false;
945
946 if (efer & EFER_FFXSR) {
947 struct kvm_cpuid_entry2 *feat;
948
949 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
950 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT)))
951 return false;
952 }
953
954 if (efer & EFER_SVME) {
955 struct kvm_cpuid_entry2 *feat;
956
957 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
958 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM)))
959 return false;
960 }
961
962 return true;
963}
964EXPORT_SYMBOL_GPL(kvm_valid_efer);
965
966static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
967{
968 u64 old_efer = vcpu->arch.efer;
969
970 if (!kvm_valid_efer(vcpu, efer))
971 return 1;
972
973 if (is_paging(vcpu)
974 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
975 return 1;
976
977 efer &= ~EFER_LMA;
978 efer |= vcpu->arch.efer & EFER_LMA;
979
980 kvm_x86_ops->set_efer(vcpu, efer);
981
982
983 if ((efer ^ old_efer) & EFER_NX)
984 kvm_mmu_reset_context(vcpu);
985
986 return 0;
987}
988
989void kvm_enable_efer_bits(u64 mask)
990{
991 efer_reserved_bits &= ~mask;
992}
993EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
994
995
996
997
998
999
1000int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
1001{
1002 switch (msr->index) {
1003 case MSR_FS_BASE:
1004 case MSR_GS_BASE:
1005 case MSR_KERNEL_GS_BASE:
1006 case MSR_CSTAR:
1007 case MSR_LSTAR:
1008 if (is_noncanonical_address(msr->data))
1009 return 1;
1010 break;
1011 case MSR_IA32_SYSENTER_EIP:
1012 case MSR_IA32_SYSENTER_ESP:
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025 msr->data = get_canonical(msr->data);
1026 }
1027 return kvm_x86_ops->set_msr(vcpu, msr);
1028}
1029EXPORT_SYMBOL_GPL(kvm_set_msr);
1030
1031
1032
1033
1034static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1035{
1036 struct msr_data msr;
1037
1038 msr.data = *data;
1039 msr.index = index;
1040 msr.host_initiated = true;
1041 return kvm_set_msr(vcpu, &msr);
1042}
1043
1044#ifdef CONFIG_X86_64
1045struct pvclock_gtod_data {
1046 seqcount_t seq;
1047
1048 struct {
1049 int vclock_mode;
1050 cycle_t cycle_last;
1051 cycle_t mask;
1052 u32 mult;
1053 u32 shift;
1054 } clock;
1055
1056 u64 boot_ns;
1057 u64 nsec_base;
1058};
1059
1060static struct pvclock_gtod_data pvclock_gtod_data;
1061
1062static void update_pvclock_gtod(struct timekeeper *tk)
1063{
1064 struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
1065 u64 boot_ns;
1066
1067 boot_ns = ktime_to_ns(ktime_add(tk->tkr.base_mono, tk->offs_boot));
1068
1069 write_seqcount_begin(&vdata->seq);
1070
1071
1072 vdata->clock.vclock_mode = tk->tkr.clock->archdata.vclock_mode;
1073 vdata->clock.cycle_last = tk->tkr.cycle_last;
1074 vdata->clock.mask = tk->tkr.mask;
1075 vdata->clock.mult = tk->tkr.mult;
1076 vdata->clock.shift = tk->tkr.shift;
1077
1078 vdata->boot_ns = boot_ns;
1079 vdata->nsec_base = tk->tkr.xtime_nsec;
1080
1081 write_seqcount_end(&vdata->seq);
1082}
1083#endif
1084
1085
1086static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
1087{
1088 int version;
1089 int r;
1090 struct pvclock_wall_clock wc;
1091 struct timespec boot;
1092
1093 if (!wall_clock)
1094 return;
1095
1096 r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
1097 if (r)
1098 return;
1099
1100 if (version & 1)
1101 ++version;
1102
1103 ++version;
1104
1105 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
1106
1107
1108
1109
1110
1111
1112
1113 getboottime(&boot);
1114
1115 if (kvm->arch.kvmclock_offset) {
1116 struct timespec ts = ns_to_timespec(kvm->arch.kvmclock_offset);
1117 boot = timespec_sub(boot, ts);
1118 }
1119 wc.sec = boot.tv_sec;
1120 wc.nsec = boot.tv_nsec;
1121 wc.version = version;
1122
1123 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
1124
1125 version++;
1126 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
1127}
1128
1129static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
1130{
1131 uint32_t quotient, remainder;
1132
1133
1134
1135 __asm__ ( "divl %4"
1136 : "=a" (quotient), "=d" (remainder)
1137 : "0" (0), "1" (dividend), "r" (divisor) );
1138 return quotient;
1139}
1140
1141static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz,
1142 s8 *pshift, u32 *pmultiplier)
1143{
1144 uint64_t scaled64;
1145 int32_t shift = 0;
1146 uint64_t tps64;
1147 uint32_t tps32;
1148
1149 tps64 = base_khz * 1000LL;
1150 scaled64 = scaled_khz * 1000LL;
1151 while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
1152 tps64 >>= 1;
1153 shift--;
1154 }
1155
1156 tps32 = (uint32_t)tps64;
1157 while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) {
1158 if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000)
1159 scaled64 >>= 1;
1160 else
1161 tps32 <<= 1;
1162 shift++;
1163 }
1164
1165 *pshift = shift;
1166 *pmultiplier = div_frac(scaled64, tps32);
1167
1168 pr_debug("%s: base_khz %u => %u, shift %d, mul %u\n",
1169 __func__, base_khz, scaled_khz, shift, *pmultiplier);
1170}
1171
1172static inline u64 get_kernel_ns(void)
1173{
1174 return ktime_get_boot_ns();
1175}
1176
1177#ifdef CONFIG_X86_64
1178static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
1179#endif
1180
1181static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
1182unsigned long max_tsc_khz;
1183
1184static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
1185{
1186 return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
1187 vcpu->arch.virtual_tsc_shift);
1188}
1189
1190static u32 adjust_tsc_khz(u32 khz, s32 ppm)
1191{
1192 u64 v = (u64)khz * (1000000 + ppm);
1193 do_div(v, 1000000);
1194 return v;
1195}
1196
1197static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
1198{
1199 u32 thresh_lo, thresh_hi;
1200 int use_scaling = 0;
1201
1202
1203 if (this_tsc_khz == 0)
1204 return;
1205
1206
1207 kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
1208 &vcpu->arch.virtual_tsc_shift,
1209 &vcpu->arch.virtual_tsc_mult);
1210 vcpu->arch.virtual_tsc_khz = this_tsc_khz;
1211
1212
1213
1214
1215
1216
1217
1218 thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
1219 thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
1220 if (this_tsc_khz < thresh_lo || this_tsc_khz > thresh_hi) {
1221 pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi);
1222 use_scaling = 1;
1223 }
1224 kvm_x86_ops->set_tsc_khz(vcpu, this_tsc_khz, use_scaling);
1225}
1226
1227static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
1228{
1229 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec,
1230 vcpu->arch.virtual_tsc_mult,
1231 vcpu->arch.virtual_tsc_shift);
1232 tsc += vcpu->arch.this_tsc_write;
1233 return tsc;
1234}
1235
1236void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
1237{
1238#ifdef CONFIG_X86_64
1239 bool vcpus_matched;
1240 bool do_request = false;
1241 struct kvm_arch *ka = &vcpu->kvm->arch;
1242 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1243
1244 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
1245 atomic_read(&vcpu->kvm->online_vcpus));
1246
1247 if (vcpus_matched && gtod->clock.vclock_mode == VCLOCK_TSC)
1248 if (!ka->use_master_clock)
1249 do_request = 1;
1250
1251 if (!vcpus_matched && ka->use_master_clock)
1252 do_request = 1;
1253
1254 if (do_request)
1255 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
1256
1257 trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
1258 atomic_read(&vcpu->kvm->online_vcpus),
1259 ka->use_master_clock, gtod->clock.vclock_mode);
1260#endif
1261}
1262
1263static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
1264{
1265 u64 curr_offset = kvm_x86_ops->read_tsc_offset(vcpu);
1266 vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
1267}
1268
1269void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1270{
1271 struct kvm *kvm = vcpu->kvm;
1272 u64 offset, ns, elapsed;
1273 unsigned long flags;
1274 s64 usdiff;
1275 bool matched;
1276 bool already_matched;
1277 u64 data = msr->data;
1278
1279 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
1280 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
1281 ns = get_kernel_ns();
1282 elapsed = ns - kvm->arch.last_tsc_nsec;
1283
1284 if (vcpu->arch.virtual_tsc_khz) {
1285 int faulted = 0;
1286
1287
1288 usdiff = data - kvm->arch.last_tsc_write;
1289#ifdef CONFIG_X86_64
1290 usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz;
1291#else
1292
1293 asm("1: idivl %[divisor]\n"
1294 "2: xor %%edx, %%edx\n"
1295 " movl $0, %[faulted]\n"
1296 "3:\n"
1297 ".section .fixup,\"ax\"\n"
1298 "4: movl $1, %[faulted]\n"
1299 " jmp 3b\n"
1300 ".previous\n"
1301
1302 _ASM_EXTABLE(1b, 4b)
1303
1304 : "=A"(usdiff), [faulted] "=r" (faulted)
1305 : "A"(usdiff * 1000), [divisor] "rm"(vcpu->arch.virtual_tsc_khz));
1306
1307#endif
1308 do_div(elapsed, 1000);
1309 usdiff -= elapsed;
1310 if (usdiff < 0)
1311 usdiff = -usdiff;
1312
1313
1314 if (faulted)
1315 usdiff = USEC_PER_SEC;
1316 } else
1317 usdiff = USEC_PER_SEC;
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329 if (usdiff < USEC_PER_SEC &&
1330 vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
1331 if (!check_tsc_unstable()) {
1332 offset = kvm->arch.cur_tsc_offset;
1333 pr_debug("kvm: matched tsc offset for %llu\n", data);
1334 } else {
1335 u64 delta = nsec_to_cycles(vcpu, elapsed);
1336 data += delta;
1337 offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
1338 pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
1339 }
1340 matched = true;
1341 already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation);
1342 } else {
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352 kvm->arch.cur_tsc_generation++;
1353 kvm->arch.cur_tsc_nsec = ns;
1354 kvm->arch.cur_tsc_write = data;
1355 kvm->arch.cur_tsc_offset = offset;
1356 matched = false;
1357 pr_debug("kvm: new tsc generation %llu, clock %llu\n",
1358 kvm->arch.cur_tsc_generation, data);
1359 }
1360
1361
1362
1363
1364
1365 kvm->arch.last_tsc_nsec = ns;
1366 kvm->arch.last_tsc_write = data;
1367 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
1368
1369 vcpu->arch.last_guest_tsc = data;
1370
1371
1372 vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation;
1373 vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
1374 vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
1375
1376 if (guest_cpuid_has_tsc_adjust(vcpu) && !msr->host_initiated)
1377 update_ia32_tsc_adjust_msr(vcpu, offset);
1378 kvm_x86_ops->write_tsc_offset(vcpu, offset);
1379 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
1380
1381 spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
1382 if (!matched) {
1383 kvm->arch.nr_vcpus_matched_tsc = 0;
1384 } else if (!already_matched) {
1385 kvm->arch.nr_vcpus_matched_tsc++;
1386 }
1387
1388 kvm_track_tsc_matching(vcpu);
1389 spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
1390}
1391
1392EXPORT_SYMBOL_GPL(kvm_write_tsc);
1393
1394#ifdef CONFIG_X86_64
1395
1396static cycle_t read_tsc(void)
1397{
1398 cycle_t ret;
1399 u64 last;
1400
1401
1402
1403
1404
1405
1406
1407
1408 rdtsc_barrier();
1409 ret = (cycle_t)vget_cycles();
1410
1411 last = pvclock_gtod_data.clock.cycle_last;
1412
1413 if (likely(ret >= last))
1414 return ret;
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424 asm volatile ("");
1425 return last;
1426}
1427
1428static inline u64 vgettsc(cycle_t *cycle_now)
1429{
1430 long v;
1431 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1432
1433 *cycle_now = read_tsc();
1434
1435 v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask;
1436 return v * gtod->clock.mult;
1437}
1438
1439static int do_monotonic_boot(s64 *t, cycle_t *cycle_now)
1440{
1441 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1442 unsigned long seq;
1443 int mode;
1444 u64 ns;
1445
1446 do {
1447 seq = read_seqcount_begin(>od->seq);
1448 mode = gtod->clock.vclock_mode;
1449 ns = gtod->nsec_base;
1450 ns += vgettsc(cycle_now);
1451 ns >>= gtod->clock.shift;
1452 ns += gtod->boot_ns;
1453 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
1454 *t = ns;
1455
1456 return mode;
1457}
1458
1459
1460static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now)
1461{
1462
1463 if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
1464 return false;
1465
1466 return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC;
1467}
1468#endif
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
1512{
1513#ifdef CONFIG_X86_64
1514 struct kvm_arch *ka = &kvm->arch;
1515 int vclock_mode;
1516 bool host_tsc_clocksource, vcpus_matched;
1517
1518 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
1519 atomic_read(&kvm->online_vcpus));
1520
1521
1522
1523
1524
1525 host_tsc_clocksource = kvm_get_time_and_clockread(
1526 &ka->master_kernel_ns,
1527 &ka->master_cycle_now);
1528
1529 ka->use_master_clock = host_tsc_clocksource && vcpus_matched
1530 && !backwards_tsc_observed;
1531
1532 if (ka->use_master_clock)
1533 atomic_set(&kvm_guest_has_master_clock, 1);
1534
1535 vclock_mode = pvclock_gtod_data.clock.vclock_mode;
1536 trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
1537 vcpus_matched);
1538#endif
1539}
1540
1541static void kvm_gen_update_masterclock(struct kvm *kvm)
1542{
1543#ifdef CONFIG_X86_64
1544 int i;
1545 struct kvm_vcpu *vcpu;
1546 struct kvm_arch *ka = &kvm->arch;
1547
1548 spin_lock(&ka->pvclock_gtod_sync_lock);
1549 kvm_make_mclock_inprogress_request(kvm);
1550
1551 pvclock_update_vm_gtod_copy(kvm);
1552
1553 kvm_for_each_vcpu(i, vcpu, kvm)
1554 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
1555
1556
1557 kvm_for_each_vcpu(i, vcpu, kvm)
1558 clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests);
1559
1560 spin_unlock(&ka->pvclock_gtod_sync_lock);
1561#endif
1562}
1563
1564static int kvm_guest_time_update(struct kvm_vcpu *v)
1565{
1566 unsigned long flags, this_tsc_khz;
1567 struct kvm_vcpu_arch *vcpu = &v->arch;
1568 struct kvm_arch *ka = &v->kvm->arch;
1569 s64 kernel_ns;
1570 u64 tsc_timestamp, host_tsc;
1571 struct pvclock_vcpu_time_info guest_hv_clock;
1572 u8 pvclock_flags;
1573 bool use_master_clock;
1574
1575 kernel_ns = 0;
1576 host_tsc = 0;
1577
1578
1579
1580
1581
1582 spin_lock(&ka->pvclock_gtod_sync_lock);
1583 use_master_clock = ka->use_master_clock;
1584 if (use_master_clock) {
1585 host_tsc = ka->master_cycle_now;
1586 kernel_ns = ka->master_kernel_ns;
1587 }
1588 spin_unlock(&ka->pvclock_gtod_sync_lock);
1589
1590
1591 local_irq_save(flags);
1592 this_tsc_khz = __this_cpu_read(cpu_tsc_khz);
1593 if (unlikely(this_tsc_khz == 0)) {
1594 local_irq_restore(flags);
1595 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
1596 return 1;
1597 }
1598 if (!use_master_clock) {
1599 host_tsc = native_read_tsc();
1600 kernel_ns = get_kernel_ns();
1601 }
1602
1603 tsc_timestamp = kvm_x86_ops->read_l1_tsc(v, host_tsc);
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615 if (vcpu->tsc_catchup) {
1616 u64 tsc = compute_guest_tsc(v, kernel_ns);
1617 if (tsc > tsc_timestamp) {
1618 adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
1619 tsc_timestamp = tsc;
1620 }
1621 }
1622
1623 local_irq_restore(flags);
1624
1625 if (!vcpu->pv_time_enabled)
1626 return 0;
1627
1628 if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) {
1629 kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz,
1630 &vcpu->hv_clock.tsc_shift,
1631 &vcpu->hv_clock.tsc_to_system_mul);
1632 vcpu->hw_tsc_khz = this_tsc_khz;
1633 }
1634
1635
1636 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
1637 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
1638 vcpu->last_guest_tsc = tsc_timestamp;
1639
1640
1641
1642
1643
1644
1645 vcpu->hv_clock.version += 2;
1646
1647 if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
1648 &guest_hv_clock, sizeof(guest_hv_clock))))
1649 return 0;
1650
1651
1652 pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
1653
1654 if (vcpu->pvclock_set_guest_stopped_request) {
1655 pvclock_flags |= PVCLOCK_GUEST_STOPPED;
1656 vcpu->pvclock_set_guest_stopped_request = false;
1657 }
1658
1659
1660 if (use_master_clock)
1661 pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
1662
1663 vcpu->hv_clock.flags = pvclock_flags;
1664
1665 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
1666 &vcpu->hv_clock,
1667 sizeof(vcpu->hv_clock));
1668 return 0;
1669}
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685#define KVMCLOCK_UPDATE_DELAY msecs_to_jiffies(100)
1686
1687static void kvmclock_update_fn(struct work_struct *work)
1688{
1689 int i;
1690 struct delayed_work *dwork = to_delayed_work(work);
1691 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
1692 kvmclock_update_work);
1693 struct kvm *kvm = container_of(ka, struct kvm, arch);
1694 struct kvm_vcpu *vcpu;
1695
1696 kvm_for_each_vcpu(i, vcpu, kvm) {
1697 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
1698 kvm_vcpu_kick(vcpu);
1699 }
1700}
1701
1702static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
1703{
1704 struct kvm *kvm = v->kvm;
1705
1706 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
1707 schedule_delayed_work(&kvm->arch.kvmclock_update_work,
1708 KVMCLOCK_UPDATE_DELAY);
1709}
1710
1711#define KVMCLOCK_SYNC_PERIOD (300 * HZ)
1712
1713static void kvmclock_sync_fn(struct work_struct *work)
1714{
1715 struct delayed_work *dwork = to_delayed_work(work);
1716 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
1717 kvmclock_sync_work);
1718 struct kvm *kvm = container_of(ka, struct kvm, arch);
1719
1720 schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0);
1721 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
1722 KVMCLOCK_SYNC_PERIOD);
1723}
1724
1725static bool msr_mtrr_valid(unsigned msr)
1726{
1727 switch (msr) {
1728 case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1:
1729 case MSR_MTRRfix64K_00000:
1730 case MSR_MTRRfix16K_80000:
1731 case MSR_MTRRfix16K_A0000:
1732 case MSR_MTRRfix4K_C0000:
1733 case MSR_MTRRfix4K_C8000:
1734 case MSR_MTRRfix4K_D0000:
1735 case MSR_MTRRfix4K_D8000:
1736 case MSR_MTRRfix4K_E0000:
1737 case MSR_MTRRfix4K_E8000:
1738 case MSR_MTRRfix4K_F0000:
1739 case MSR_MTRRfix4K_F8000:
1740 case MSR_MTRRdefType:
1741 case MSR_IA32_CR_PAT:
1742 return true;
1743 case 0x2f8:
1744 return true;
1745 }
1746 return false;
1747}
1748
1749static bool valid_pat_type(unsigned t)
1750{
1751 return t < 8 && (1 << t) & 0xf3;
1752}
1753
1754static bool valid_mtrr_type(unsigned t)
1755{
1756 return t < 8 && (1 << t) & 0x73;
1757}
1758
1759bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1760{
1761 int i;
1762 u64 mask;
1763
1764 if (!msr_mtrr_valid(msr))
1765 return false;
1766
1767 if (msr == MSR_IA32_CR_PAT) {
1768 for (i = 0; i < 8; i++)
1769 if (!valid_pat_type((data >> (i * 8)) & 0xff))
1770 return false;
1771 return true;
1772 } else if (msr == MSR_MTRRdefType) {
1773 if (data & ~0xcff)
1774 return false;
1775 return valid_mtrr_type(data & 0xff);
1776 } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) {
1777 for (i = 0; i < 8 ; i++)
1778 if (!valid_mtrr_type((data >> (i * 8)) & 0xff))
1779 return false;
1780 return true;
1781 }
1782
1783
1784 WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR));
1785
1786 mask = (~0ULL) << cpuid_maxphyaddr(vcpu);
1787 if ((msr & 1) == 0) {
1788
1789 if (!valid_mtrr_type(data & 0xff))
1790 return false;
1791 mask |= 0xf00;
1792 } else
1793
1794 mask |= 0x7ff;
1795 if (data & mask) {
1796 kvm_inject_gp(vcpu, 0);
1797 return false;
1798 }
1799
1800 return true;
1801}
1802EXPORT_SYMBOL_GPL(kvm_mtrr_valid);
1803
1804static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1805{
1806 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
1807
1808 if (!kvm_mtrr_valid(vcpu, msr, data))
1809 return 1;
1810
1811 if (msr == MSR_MTRRdefType) {
1812 vcpu->arch.mtrr_state.def_type = data;
1813 vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
1814 } else if (msr == MSR_MTRRfix64K_00000)
1815 p[0] = data;
1816 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
1817 p[1 + msr - MSR_MTRRfix16K_80000] = data;
1818 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
1819 p[3 + msr - MSR_MTRRfix4K_C0000] = data;
1820 else if (msr == MSR_IA32_CR_PAT)
1821 vcpu->arch.pat = data;
1822 else {
1823 int idx, is_mtrr_mask;
1824 u64 *pt;
1825
1826 idx = (msr - 0x200) / 2;
1827 is_mtrr_mask = msr - 0x200 - 2 * idx;
1828 if (!is_mtrr_mask)
1829 pt =
1830 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
1831 else
1832 pt =
1833 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
1834 *pt = data;
1835 }
1836
1837 kvm_mmu_reset_context(vcpu);
1838 return 0;
1839}
1840
1841static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1842{
1843 u64 mcg_cap = vcpu->arch.mcg_cap;
1844 unsigned bank_num = mcg_cap & 0xff;
1845
1846 switch (msr) {
1847 case MSR_IA32_MCG_STATUS:
1848 vcpu->arch.mcg_status = data;
1849 break;
1850 case MSR_IA32_MCG_CTL:
1851 if (!(mcg_cap & MCG_CTL_P))
1852 return 1;
1853 if (data != 0 && data != ~(u64)0)
1854 return -1;
1855 vcpu->arch.mcg_ctl = data;
1856 break;
1857 default:
1858 if (msr >= MSR_IA32_MC0_CTL &&
1859 msr < MSR_IA32_MCx_CTL(bank_num)) {
1860 u32 offset = msr - MSR_IA32_MC0_CTL;
1861
1862
1863
1864
1865
1866 if ((offset & 0x3) == 0 &&
1867 data != 0 && (data | (1 << 10)) != ~(u64)0)
1868 return -1;
1869 vcpu->arch.mce_banks[offset] = data;
1870 break;
1871 }
1872 return 1;
1873 }
1874 return 0;
1875}
1876
1877static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
1878{
1879 struct kvm *kvm = vcpu->kvm;
1880 int lm = is_long_mode(vcpu);
1881 u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
1882 : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
1883 u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
1884 : kvm->arch.xen_hvm_config.blob_size_32;
1885 u32 page_num = data & ~PAGE_MASK;
1886 u64 page_addr = data & PAGE_MASK;
1887 u8 *page;
1888 int r;
1889
1890 r = -E2BIG;
1891 if (page_num >= blob_size)
1892 goto out;
1893 r = -ENOMEM;
1894 page = memdup_user(blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE);
1895 if (IS_ERR(page)) {
1896 r = PTR_ERR(page);
1897 goto out;
1898 }
1899 if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE))
1900 goto out_free;
1901 r = 0;
1902out_free:
1903 kfree(page);
1904out:
1905 return r;
1906}
1907
1908static bool kvm_hv_hypercall_enabled(struct kvm *kvm)
1909{
1910 return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
1911}
1912
1913static bool kvm_hv_msr_partition_wide(u32 msr)
1914{
1915 bool r = false;
1916 switch (msr) {
1917 case HV_X64_MSR_GUEST_OS_ID:
1918 case HV_X64_MSR_HYPERCALL:
1919 case HV_X64_MSR_REFERENCE_TSC:
1920 case HV_X64_MSR_TIME_REF_COUNT:
1921 r = true;
1922 break;
1923 }
1924
1925 return r;
1926}
1927
1928static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1929{
1930 struct kvm *kvm = vcpu->kvm;
1931
1932 switch (msr) {
1933 case HV_X64_MSR_GUEST_OS_ID:
1934 kvm->arch.hv_guest_os_id = data;
1935
1936 if (!kvm->arch.hv_guest_os_id)
1937 kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
1938 break;
1939 case HV_X64_MSR_HYPERCALL: {
1940 u64 gfn;
1941 unsigned long addr;
1942 u8 instructions[4];
1943
1944
1945 if (!kvm->arch.hv_guest_os_id)
1946 break;
1947 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
1948 kvm->arch.hv_hypercall = data;
1949 break;
1950 }
1951 gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
1952 addr = gfn_to_hva(kvm, gfn);
1953 if (kvm_is_error_hva(addr))
1954 return 1;
1955 kvm_x86_ops->patch_hypercall(vcpu, instructions);
1956 ((unsigned char *)instructions)[3] = 0xc3;
1957 if (__copy_to_user((void __user *)addr, instructions, 4))
1958 return 1;
1959 kvm->arch.hv_hypercall = data;
1960 mark_page_dirty(kvm, gfn);
1961 break;
1962 }
1963 case HV_X64_MSR_REFERENCE_TSC: {
1964 u64 gfn;
1965 HV_REFERENCE_TSC_PAGE tsc_ref;
1966 memset(&tsc_ref, 0, sizeof(tsc_ref));
1967 kvm->arch.hv_tsc_page = data;
1968 if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
1969 break;
1970 gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
1971 if (kvm_write_guest(kvm, gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
1972 &tsc_ref, sizeof(tsc_ref)))
1973 return 1;
1974 mark_page_dirty(kvm, gfn);
1975 break;
1976 }
1977 default:
1978 vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1979 "data 0x%llx\n", msr, data);
1980 return 1;
1981 }
1982 return 0;
1983}
1984
1985static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1986{
1987 switch (msr) {
1988 case HV_X64_MSR_APIC_ASSIST_PAGE: {
1989 u64 gfn;
1990 unsigned long addr;
1991
1992 if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
1993 vcpu->arch.hv_vapic = data;
1994 if (kvm_lapic_enable_pv_eoi(vcpu, 0))
1995 return 1;
1996 break;
1997 }
1998 gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT;
1999 addr = gfn_to_hva(vcpu->kvm, gfn);
2000 if (kvm_is_error_hva(addr))
2001 return 1;
2002 if (__clear_user((void __user *)addr, PAGE_SIZE))
2003 return 1;
2004 vcpu->arch.hv_vapic = data;
2005 mark_page_dirty(vcpu->kvm, gfn);
2006 if (kvm_lapic_enable_pv_eoi(vcpu, gfn_to_gpa(gfn) | KVM_MSR_ENABLED))
2007 return 1;
2008 break;
2009 }
2010 case HV_X64_MSR_EOI:
2011 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
2012 case HV_X64_MSR_ICR:
2013 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
2014 case HV_X64_MSR_TPR:
2015 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
2016 default:
2017 vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
2018 "data 0x%llx\n", msr, data);
2019 return 1;
2020 }
2021
2022 return 0;
2023}
2024
2025static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
2026{
2027 gpa_t gpa = data & ~0x3f;
2028
2029
2030 if (data & 0x3c)
2031 return 1;
2032
2033 vcpu->arch.apf.msr_val = data;
2034
2035 if (!(data & KVM_ASYNC_PF_ENABLED)) {
2036 kvm_clear_async_pf_completion_queue(vcpu);
2037 kvm_async_pf_hash_reset(vcpu);
2038 return 0;
2039 }
2040
2041 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
2042 sizeof(u32)))
2043 return 1;
2044
2045 vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
2046 kvm_async_pf_wakeup_all(vcpu);
2047 return 0;
2048}
2049
2050static void kvmclock_reset(struct kvm_vcpu *vcpu)
2051{
2052 vcpu->arch.pv_time_enabled = false;
2053}
2054
2055static void accumulate_steal_time(struct kvm_vcpu *vcpu)
2056{
2057 u64 delta;
2058
2059 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
2060 return;
2061
2062 delta = current->sched_info.run_delay - vcpu->arch.st.last_steal;
2063 vcpu->arch.st.last_steal = current->sched_info.run_delay;
2064 vcpu->arch.st.accum_steal = delta;
2065}
2066
2067static void record_steal_time(struct kvm_vcpu *vcpu)
2068{
2069 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
2070 return;
2071
2072 if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
2073 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
2074 return;
2075
2076 vcpu->arch.st.steal.steal += vcpu->arch.st.accum_steal;
2077 vcpu->arch.st.steal.version += 2;
2078 vcpu->arch.st.accum_steal = 0;
2079
2080 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
2081 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
2082}
2083
2084int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2085{
2086 bool pr = false;
2087 u32 msr = msr_info->index;
2088 u64 data = msr_info->data;
2089
2090 switch (msr) {
2091 case MSR_AMD64_NB_CFG:
2092 case MSR_IA32_UCODE_REV:
2093 case MSR_IA32_UCODE_WRITE:
2094 case MSR_VM_HSAVE_PA:
2095 case MSR_AMD64_PATCH_LOADER:
2096 case MSR_AMD64_BU_CFG2:
2097 break;
2098
2099 case MSR_EFER:
2100 return set_efer(vcpu, data);
2101 case MSR_K7_HWCR:
2102 data &= ~(u64)0x40;
2103 data &= ~(u64)0x100;
2104 data &= ~(u64)0x8;
2105 data &= ~(u64)0x40000;
2106 if (data != 0) {
2107 vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
2108 data);
2109 return 1;
2110 }
2111 break;
2112 case MSR_FAM10H_MMIO_CONF_BASE:
2113 if (data != 0) {
2114 vcpu_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
2115 "0x%llx\n", data);
2116 return 1;
2117 }
2118 break;
2119 case MSR_IA32_DEBUGCTLMSR:
2120 if (!data) {
2121
2122 break;
2123 } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
2124
2125
2126 return 1;
2127 }
2128 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
2129 __func__, data);
2130 break;
2131 case 0x200 ... 0x2ff:
2132 return set_msr_mtrr(vcpu, msr, data);
2133 case MSR_IA32_APICBASE:
2134 return kvm_set_apic_base(vcpu, msr_info);
2135 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
2136 return kvm_x2apic_msr_write(vcpu, msr, data);
2137 case MSR_IA32_TSCDEADLINE:
2138 kvm_set_lapic_tscdeadline_msr(vcpu, data);
2139 break;
2140 case MSR_IA32_TSC_ADJUST:
2141 if (guest_cpuid_has_tsc_adjust(vcpu)) {
2142 if (!msr_info->host_initiated) {
2143 u64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
2144 kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true);
2145 }
2146 vcpu->arch.ia32_tsc_adjust_msr = data;
2147 }
2148 break;
2149 case MSR_IA32_MISC_ENABLE:
2150 vcpu->arch.ia32_misc_enable_msr = data;
2151 break;
2152 case MSR_KVM_WALL_CLOCK_NEW:
2153 case MSR_KVM_WALL_CLOCK:
2154 vcpu->kvm->arch.wall_clock = data;
2155 kvm_write_wall_clock(vcpu->kvm, data);
2156 break;
2157 case MSR_KVM_SYSTEM_TIME_NEW:
2158 case MSR_KVM_SYSTEM_TIME: {
2159 u64 gpa_offset;
2160 kvmclock_reset(vcpu);
2161
2162 vcpu->arch.time = data;
2163 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
2164
2165
2166 if (!(data & 1))
2167 break;
2168
2169 gpa_offset = data & ~(PAGE_MASK | 1);
2170
2171 if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
2172 &vcpu->arch.pv_time, data & ~1ULL,
2173 sizeof(struct pvclock_vcpu_time_info)))
2174 vcpu->arch.pv_time_enabled = false;
2175 else
2176 vcpu->arch.pv_time_enabled = true;
2177
2178 break;
2179 }
2180 case MSR_KVM_ASYNC_PF_EN:
2181 if (kvm_pv_enable_async_pf(vcpu, data))
2182 return 1;
2183 break;
2184 case MSR_KVM_STEAL_TIME:
2185
2186 if (unlikely(!sched_info_on()))
2187 return 1;
2188
2189 if (data & KVM_STEAL_RESERVED_MASK)
2190 return 1;
2191
2192 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
2193 data & KVM_STEAL_VALID_BITS,
2194 sizeof(struct kvm_steal_time)))
2195 return 1;
2196
2197 vcpu->arch.st.msr_val = data;
2198
2199 if (!(data & KVM_MSR_ENABLED))
2200 break;
2201
2202 vcpu->arch.st.last_steal = current->sched_info.run_delay;
2203
2204 preempt_disable();
2205 accumulate_steal_time(vcpu);
2206 preempt_enable();
2207
2208 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
2209
2210 break;
2211 case MSR_KVM_PV_EOI_EN:
2212 if (kvm_lapic_enable_pv_eoi(vcpu, data))
2213 return 1;
2214 break;
2215
2216 case MSR_IA32_MCG_CTL:
2217 case MSR_IA32_MCG_STATUS:
2218 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
2219 return set_msr_mce(vcpu, msr, data);
2220
2221
2222
2223
2224
2225
2226
2227
2228 case MSR_K7_EVNTSEL0:
2229 case MSR_K7_EVNTSEL1:
2230 case MSR_K7_EVNTSEL2:
2231 case MSR_K7_EVNTSEL3:
2232 if (data != 0)
2233 vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: "
2234 "0x%x data 0x%llx\n", msr, data);
2235 break;
2236
2237
2238
2239 case MSR_K7_PERFCTR0:
2240 case MSR_K7_PERFCTR1:
2241 case MSR_K7_PERFCTR2:
2242 case MSR_K7_PERFCTR3:
2243 vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: "
2244 "0x%x data 0x%llx\n", msr, data);
2245 break;
2246 case MSR_P6_PERFCTR0:
2247 case MSR_P6_PERFCTR1:
2248 pr = true;
2249 case MSR_P6_EVNTSEL0:
2250 case MSR_P6_EVNTSEL1:
2251 if (kvm_pmu_msr(vcpu, msr))
2252 return kvm_pmu_set_msr(vcpu, msr_info);
2253
2254 if (pr || data != 0)
2255 vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
2256 "0x%x data 0x%llx\n", msr, data);
2257 break;
2258 case MSR_K7_CLK_CTL:
2259
2260
2261
2262
2263
2264
2265
2266
2267 break;
2268 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
2269 if (kvm_hv_msr_partition_wide(msr)) {
2270 int r;
2271 mutex_lock(&vcpu->kvm->lock);
2272 r = set_msr_hyperv_pw(vcpu, msr, data);
2273 mutex_unlock(&vcpu->kvm->lock);
2274 return r;
2275 } else
2276 return set_msr_hyperv(vcpu, msr, data);
2277 break;
2278 case MSR_IA32_BBL_CR_CTL3:
2279
2280
2281
2282 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data);
2283 break;
2284 case MSR_AMD64_OSVW_ID_LENGTH:
2285 if (!guest_cpuid_has_osvw(vcpu))
2286 return 1;
2287 vcpu->arch.osvw.length = data;
2288 break;
2289 case MSR_AMD64_OSVW_STATUS:
2290 if (!guest_cpuid_has_osvw(vcpu))
2291 return 1;
2292 vcpu->arch.osvw.status = data;
2293 break;
2294 default:
2295 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
2296 return xen_hvm_config(vcpu, data);
2297 if (kvm_pmu_msr(vcpu, msr))
2298 return kvm_pmu_set_msr(vcpu, msr_info);
2299 if (!ignore_msrs) {
2300 vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
2301 msr, data);
2302 return 1;
2303 } else {
2304 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n",
2305 msr, data);
2306 break;
2307 }
2308 }
2309 return 0;
2310}
2311EXPORT_SYMBOL_GPL(kvm_set_msr_common);
2312
2313
2314
2315
2316
2317
2318
2319int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2320{
2321 return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
2322}
2323
2324static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2325{
2326 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
2327
2328 if (!msr_mtrr_valid(msr))
2329 return 1;
2330
2331 if (msr == MSR_MTRRdefType)
2332 *pdata = vcpu->arch.mtrr_state.def_type +
2333 (vcpu->arch.mtrr_state.enabled << 10);
2334 else if (msr == MSR_MTRRfix64K_00000)
2335 *pdata = p[0];
2336 else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
2337 *pdata = p[1 + msr - MSR_MTRRfix16K_80000];
2338 else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
2339 *pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
2340 else if (msr == MSR_IA32_CR_PAT)
2341 *pdata = vcpu->arch.pat;
2342 else {
2343 int idx, is_mtrr_mask;
2344 u64 *pt;
2345
2346 idx = (msr - 0x200) / 2;
2347 is_mtrr_mask = msr - 0x200 - 2 * idx;
2348 if (!is_mtrr_mask)
2349 pt =
2350 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
2351 else
2352 pt =
2353 (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
2354 *pdata = *pt;
2355 }
2356
2357 return 0;
2358}
2359
2360static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2361{
2362 u64 data;
2363 u64 mcg_cap = vcpu->arch.mcg_cap;
2364 unsigned bank_num = mcg_cap & 0xff;
2365
2366 switch (msr) {
2367 case MSR_IA32_P5_MC_ADDR:
2368 case MSR_IA32_P5_MC_TYPE:
2369 data = 0;
2370 break;
2371 case MSR_IA32_MCG_CAP:
2372 data = vcpu->arch.mcg_cap;
2373 break;
2374 case MSR_IA32_MCG_CTL:
2375 if (!(mcg_cap & MCG_CTL_P))
2376 return 1;
2377 data = vcpu->arch.mcg_ctl;
2378 break;
2379 case MSR_IA32_MCG_STATUS:
2380 data = vcpu->arch.mcg_status;
2381 break;
2382 default:
2383 if (msr >= MSR_IA32_MC0_CTL &&
2384 msr < MSR_IA32_MCx_CTL(bank_num)) {
2385 u32 offset = msr - MSR_IA32_MC0_CTL;
2386 data = vcpu->arch.mce_banks[offset];
2387 break;
2388 }
2389 return 1;
2390 }
2391 *pdata = data;
2392 return 0;
2393}
2394
2395static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2396{
2397 u64 data = 0;
2398 struct kvm *kvm = vcpu->kvm;
2399
2400 switch (msr) {
2401 case HV_X64_MSR_GUEST_OS_ID:
2402 data = kvm->arch.hv_guest_os_id;
2403 break;
2404 case HV_X64_MSR_HYPERCALL:
2405 data = kvm->arch.hv_hypercall;
2406 break;
2407 case HV_X64_MSR_TIME_REF_COUNT: {
2408 data =
2409 div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
2410 break;
2411 }
2412 case HV_X64_MSR_REFERENCE_TSC:
2413 data = kvm->arch.hv_tsc_page;
2414 break;
2415 default:
2416 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
2417 return 1;
2418 }
2419
2420 *pdata = data;
2421 return 0;
2422}
2423
2424static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2425{
2426 u64 data = 0;
2427
2428 switch (msr) {
2429 case HV_X64_MSR_VP_INDEX: {
2430 int r;
2431 struct kvm_vcpu *v;
2432 kvm_for_each_vcpu(r, v, vcpu->kvm) {
2433 if (v == vcpu) {
2434 data = r;
2435 break;
2436 }
2437 }
2438 break;
2439 }
2440 case HV_X64_MSR_EOI:
2441 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
2442 case HV_X64_MSR_ICR:
2443 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
2444 case HV_X64_MSR_TPR:
2445 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
2446 case HV_X64_MSR_APIC_ASSIST_PAGE:
2447 data = vcpu->arch.hv_vapic;
2448 break;
2449 default:
2450 vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
2451 return 1;
2452 }
2453 *pdata = data;
2454 return 0;
2455}
2456
2457int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2458{
2459 u64 data;
2460
2461 switch (msr) {
2462 case MSR_IA32_PLATFORM_ID:
2463 case MSR_IA32_EBL_CR_POWERON:
2464 case MSR_IA32_DEBUGCTLMSR:
2465 case MSR_IA32_LASTBRANCHFROMIP:
2466 case MSR_IA32_LASTBRANCHTOIP:
2467 case MSR_IA32_LASTINTFROMIP:
2468 case MSR_IA32_LASTINTTOIP:
2469 case MSR_K8_SYSCFG:
2470 case MSR_K7_HWCR:
2471 case MSR_VM_HSAVE_PA:
2472 case MSR_K7_EVNTSEL0:
2473 case MSR_K7_EVNTSEL1:
2474 case MSR_K7_EVNTSEL2:
2475 case MSR_K7_EVNTSEL3:
2476 case MSR_K7_PERFCTR0:
2477 case MSR_K7_PERFCTR1:
2478 case MSR_K7_PERFCTR2:
2479 case MSR_K7_PERFCTR3:
2480 case MSR_K8_INT_PENDING_MSG:
2481 case MSR_AMD64_NB_CFG:
2482 case MSR_FAM10H_MMIO_CONF_BASE:
2483 case MSR_AMD64_BU_CFG2:
2484 data = 0;
2485 break;
2486 case MSR_P6_PERFCTR0:
2487 case MSR_P6_PERFCTR1:
2488 case MSR_P6_EVNTSEL0:
2489 case MSR_P6_EVNTSEL1:
2490 if (kvm_pmu_msr(vcpu, msr))
2491 return kvm_pmu_get_msr(vcpu, msr, pdata);
2492 data = 0;
2493 break;
2494 case MSR_IA32_UCODE_REV:
2495 data = 0x100000000ULL;
2496 break;
2497 case MSR_MTRRcap:
2498 data = 0x500 | KVM_NR_VAR_MTRR;
2499 break;
2500 case 0x200 ... 0x2ff:
2501 return get_msr_mtrr(vcpu, msr, pdata);
2502 case 0xcd:
2503 data = 3;
2504 break;
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516 case MSR_EBC_FREQUENCY_ID:
2517 data = 1 << 24;
2518 break;
2519 case MSR_IA32_APICBASE:
2520 data = kvm_get_apic_base(vcpu);
2521 break;
2522 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
2523 return kvm_x2apic_msr_read(vcpu, msr, pdata);
2524 break;
2525 case MSR_IA32_TSCDEADLINE:
2526 data = kvm_get_lapic_tscdeadline_msr(vcpu);
2527 break;
2528 case MSR_IA32_TSC_ADJUST:
2529 data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
2530 break;
2531 case MSR_IA32_MISC_ENABLE:
2532 data = vcpu->arch.ia32_misc_enable_msr;
2533 break;
2534 case MSR_IA32_PERF_STATUS:
2535
2536 data = 1000ULL;
2537
2538 data |= (((uint64_t)4ULL) << 40);
2539 break;
2540 case MSR_EFER:
2541 data = vcpu->arch.efer;
2542 break;
2543 case MSR_KVM_WALL_CLOCK:
2544 case MSR_KVM_WALL_CLOCK_NEW:
2545 data = vcpu->kvm->arch.wall_clock;
2546 break;
2547 case MSR_KVM_SYSTEM_TIME:
2548 case MSR_KVM_SYSTEM_TIME_NEW:
2549 data = vcpu->arch.time;
2550 break;
2551 case MSR_KVM_ASYNC_PF_EN:
2552 data = vcpu->arch.apf.msr_val;
2553 break;
2554 case MSR_KVM_STEAL_TIME:
2555 data = vcpu->arch.st.msr_val;
2556 break;
2557 case MSR_KVM_PV_EOI_EN:
2558 data = vcpu->arch.pv_eoi.msr_val;
2559 break;
2560 case MSR_IA32_P5_MC_ADDR:
2561 case MSR_IA32_P5_MC_TYPE:
2562 case MSR_IA32_MCG_CAP:
2563 case MSR_IA32_MCG_CTL:
2564 case MSR_IA32_MCG_STATUS:
2565 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
2566 return get_msr_mce(vcpu, msr, pdata);
2567 case MSR_K7_CLK_CTL:
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577 data = 0x20000000;
2578 break;
2579 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
2580 if (kvm_hv_msr_partition_wide(msr)) {
2581 int r;
2582 mutex_lock(&vcpu->kvm->lock);
2583 r = get_msr_hyperv_pw(vcpu, msr, pdata);
2584 mutex_unlock(&vcpu->kvm->lock);
2585 return r;
2586 } else
2587 return get_msr_hyperv(vcpu, msr, pdata);
2588 break;
2589 case MSR_IA32_BBL_CR_CTL3:
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600 data = 0xbe702111;
2601 break;
2602 case MSR_AMD64_OSVW_ID_LENGTH:
2603 if (!guest_cpuid_has_osvw(vcpu))
2604 return 1;
2605 data = vcpu->arch.osvw.length;
2606 break;
2607 case MSR_AMD64_OSVW_STATUS:
2608 if (!guest_cpuid_has_osvw(vcpu))
2609 return 1;
2610 data = vcpu->arch.osvw.status;
2611 break;
2612 default:
2613 if (kvm_pmu_msr(vcpu, msr))
2614 return kvm_pmu_get_msr(vcpu, msr, pdata);
2615 if (!ignore_msrs) {
2616 vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
2617 return 1;
2618 } else {
2619 vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr);
2620 data = 0;
2621 }
2622 break;
2623 }
2624 *pdata = data;
2625 return 0;
2626}
2627EXPORT_SYMBOL_GPL(kvm_get_msr_common);
2628
2629
2630
2631
2632
2633
2634static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
2635 struct kvm_msr_entry *entries,
2636 int (*do_msr)(struct kvm_vcpu *vcpu,
2637 unsigned index, u64 *data))
2638{
2639 int i, idx;
2640
2641 idx = srcu_read_lock(&vcpu->kvm->srcu);
2642 for (i = 0; i < msrs->nmsrs; ++i)
2643 if (do_msr(vcpu, entries[i].index, &entries[i].data))
2644 break;
2645 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2646
2647 return i;
2648}
2649
2650
2651
2652
2653
2654
2655static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
2656 int (*do_msr)(struct kvm_vcpu *vcpu,
2657 unsigned index, u64 *data),
2658 int writeback)
2659{
2660 struct kvm_msrs msrs;
2661 struct kvm_msr_entry *entries;
2662 int r, n;
2663 unsigned size;
2664
2665 r = -EFAULT;
2666 if (copy_from_user(&msrs, user_msrs, sizeof msrs))
2667 goto out;
2668
2669 r = -E2BIG;
2670 if (msrs.nmsrs >= MAX_IO_MSRS)
2671 goto out;
2672
2673 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
2674 entries = memdup_user(user_msrs->entries, size);
2675 if (IS_ERR(entries)) {
2676 r = PTR_ERR(entries);
2677 goto out;
2678 }
2679
2680 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
2681 if (r < 0)
2682 goto out_free;
2683
2684 r = -EFAULT;
2685 if (writeback && copy_to_user(user_msrs->entries, entries, size))
2686 goto out_free;
2687
2688 r = n;
2689
2690out_free:
2691 kfree(entries);
2692out:
2693 return r;
2694}
2695
2696int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
2697{
2698 int r;
2699
2700 switch (ext) {
2701 case KVM_CAP_IRQCHIP:
2702 case KVM_CAP_HLT:
2703 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
2704 case KVM_CAP_SET_TSS_ADDR:
2705 case KVM_CAP_EXT_CPUID:
2706 case KVM_CAP_EXT_EMUL_CPUID:
2707 case KVM_CAP_CLOCKSOURCE:
2708 case KVM_CAP_PIT:
2709 case KVM_CAP_NOP_IO_DELAY:
2710 case KVM_CAP_MP_STATE:
2711 case KVM_CAP_SYNC_MMU:
2712 case KVM_CAP_USER_NMI:
2713 case KVM_CAP_REINJECT_CONTROL:
2714 case KVM_CAP_IRQ_INJECT_STATUS:
2715 case KVM_CAP_IRQFD:
2716 case KVM_CAP_IOEVENTFD:
2717 case KVM_CAP_IOEVENTFD_NO_LENGTH:
2718 case KVM_CAP_PIT2:
2719 case KVM_CAP_PIT_STATE2:
2720 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
2721 case KVM_CAP_XEN_HVM:
2722 case KVM_CAP_ADJUST_CLOCK:
2723 case KVM_CAP_VCPU_EVENTS:
2724 case KVM_CAP_HYPERV:
2725 case KVM_CAP_HYPERV_VAPIC:
2726 case KVM_CAP_HYPERV_SPIN:
2727 case KVM_CAP_PCI_SEGMENT:
2728 case KVM_CAP_DEBUGREGS:
2729 case KVM_CAP_X86_ROBUST_SINGLESTEP:
2730 case KVM_CAP_XSAVE:
2731 case KVM_CAP_ASYNC_PF:
2732 case KVM_CAP_GET_TSC_KHZ:
2733 case KVM_CAP_KVMCLOCK_CTRL:
2734 case KVM_CAP_READONLY_MEM:
2735 case KVM_CAP_HYPERV_TIME:
2736 case KVM_CAP_IOAPIC_POLARITY_IGNORED:
2737#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
2738 case KVM_CAP_ASSIGN_DEV_IRQ:
2739 case KVM_CAP_PCI_2_3:
2740#endif
2741 r = 1;
2742 break;
2743 case KVM_CAP_COALESCED_MMIO:
2744 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
2745 break;
2746 case KVM_CAP_VAPIC:
2747 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
2748 break;
2749 case KVM_CAP_NR_VCPUS:
2750 r = KVM_SOFT_MAX_VCPUS;
2751 break;
2752 case KVM_CAP_MAX_VCPUS:
2753 r = KVM_MAX_VCPUS;
2754 break;
2755 case KVM_CAP_NR_MEMSLOTS:
2756 r = KVM_USER_MEM_SLOTS;
2757 break;
2758 case KVM_CAP_PV_MMU:
2759 r = 0;
2760 break;
2761#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
2762 case KVM_CAP_IOMMU:
2763 r = iommu_present(&pci_bus_type);
2764 break;
2765#endif
2766 case KVM_CAP_MCE:
2767 r = KVM_MAX_MCE_BANKS;
2768 break;
2769 case KVM_CAP_XCRS:
2770 r = cpu_has_xsave;
2771 break;
2772 case KVM_CAP_TSC_CONTROL:
2773 r = kvm_has_tsc_control;
2774 break;
2775 case KVM_CAP_TSC_DEADLINE_TIMER:
2776 r = boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER);
2777 break;
2778 default:
2779 r = 0;
2780 break;
2781 }
2782 return r;
2783
2784}
2785
2786long kvm_arch_dev_ioctl(struct file *filp,
2787 unsigned int ioctl, unsigned long arg)
2788{
2789 void __user *argp = (void __user *)arg;
2790 long r;
2791
2792 switch (ioctl) {
2793 case KVM_GET_MSR_INDEX_LIST: {
2794 struct kvm_msr_list __user *user_msr_list = argp;
2795 struct kvm_msr_list msr_list;
2796 unsigned n;
2797
2798 r = -EFAULT;
2799 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
2800 goto out;
2801 n = msr_list.nmsrs;
2802 msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
2803 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
2804 goto out;
2805 r = -E2BIG;
2806 if (n < msr_list.nmsrs)
2807 goto out;
2808 r = -EFAULT;
2809 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
2810 num_msrs_to_save * sizeof(u32)))
2811 goto out;
2812 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
2813 &emulated_msrs,
2814 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
2815 goto out;
2816 r = 0;
2817 break;
2818 }
2819 case KVM_GET_SUPPORTED_CPUID:
2820 case KVM_GET_EMULATED_CPUID: {
2821 struct kvm_cpuid2 __user *cpuid_arg = argp;
2822 struct kvm_cpuid2 cpuid;
2823
2824 r = -EFAULT;
2825 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2826 goto out;
2827
2828 r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
2829 ioctl);
2830 if (r)
2831 goto out;
2832
2833 r = -EFAULT;
2834 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
2835 goto out;
2836 r = 0;
2837 break;
2838 }
2839 case KVM_X86_GET_MCE_CAP_SUPPORTED: {
2840 u64 mce_cap;
2841
2842 mce_cap = KVM_MCE_CAP_SUPPORTED;
2843 r = -EFAULT;
2844 if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
2845 goto out;
2846 r = 0;
2847 break;
2848 }
2849 default:
2850 r = -EINVAL;
2851 }
2852out:
2853 return r;
2854}
2855
2856static void wbinvd_ipi(void *garbage)
2857{
2858 wbinvd();
2859}
2860
2861static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
2862{
2863 return kvm_arch_has_noncoherent_dma(vcpu->kvm);
2864}
2865
2866void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2867{
2868
2869 if (need_emulate_wbinvd(vcpu)) {
2870 if (kvm_x86_ops->has_wbinvd_exit())
2871 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
2872 else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
2873 smp_call_function_single(vcpu->cpu,
2874 wbinvd_ipi, NULL, 1);
2875 }
2876
2877 kvm_x86_ops->vcpu_load(vcpu, cpu);
2878
2879
2880 if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
2881 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
2882 vcpu->arch.tsc_offset_adjustment = 0;
2883 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2884 }
2885
2886 if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
2887 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
2888 native_read_tsc() - vcpu->arch.last_host_tsc;
2889 if (tsc_delta < 0)
2890 mark_tsc_unstable("KVM discovered backwards TSC");
2891 if (check_tsc_unstable()) {
2892 u64 offset = kvm_x86_ops->compute_tsc_offset(vcpu,
2893 vcpu->arch.last_guest_tsc);
2894 kvm_x86_ops->write_tsc_offset(vcpu, offset);
2895 vcpu->arch.tsc_catchup = 1;
2896 }
2897
2898
2899
2900
2901 if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
2902 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
2903 if (vcpu->cpu != cpu)
2904 kvm_migrate_timers(vcpu);
2905 vcpu->cpu = cpu;
2906 }
2907
2908 accumulate_steal_time(vcpu);
2909 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
2910}
2911
2912void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2913{
2914 kvm_x86_ops->vcpu_put(vcpu);
2915 kvm_put_guest_fpu(vcpu);
2916 vcpu->arch.last_host_tsc = native_read_tsc();
2917}
2918
2919static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
2920 struct kvm_lapic_state *s)
2921{
2922 kvm_x86_ops->sync_pir_to_irr(vcpu);
2923 memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
2924
2925 return 0;
2926}
2927
2928static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
2929 struct kvm_lapic_state *s)
2930{
2931 kvm_apic_post_state_restore(vcpu, s);
2932 update_cr8_intercept(vcpu);
2933
2934 return 0;
2935}
2936
2937static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
2938 struct kvm_interrupt *irq)
2939{
2940 if (irq->irq >= KVM_NR_INTERRUPTS)
2941 return -EINVAL;
2942 if (irqchip_in_kernel(vcpu->kvm))
2943 return -ENXIO;
2944
2945 kvm_queue_interrupt(vcpu, irq->irq, false);
2946 kvm_make_request(KVM_REQ_EVENT, vcpu);
2947
2948 return 0;
2949}
2950
2951static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
2952{
2953 kvm_inject_nmi(vcpu);
2954
2955 return 0;
2956}
2957
2958static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
2959 struct kvm_tpr_access_ctl *tac)
2960{
2961 if (tac->flags)
2962 return -EINVAL;
2963 vcpu->arch.tpr_access_reporting = !!tac->enabled;
2964 return 0;
2965}
2966
2967static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
2968 u64 mcg_cap)
2969{
2970 int r;
2971 unsigned bank_num = mcg_cap & 0xff, bank;
2972
2973 r = -EINVAL;
2974 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
2975 goto out;
2976 if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
2977 goto out;
2978 r = 0;
2979 vcpu->arch.mcg_cap = mcg_cap;
2980
2981 if (mcg_cap & MCG_CTL_P)
2982 vcpu->arch.mcg_ctl = ~(u64)0;
2983
2984 for (bank = 0; bank < bank_num; bank++)
2985 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
2986out:
2987 return r;
2988}
2989
2990static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
2991 struct kvm_x86_mce *mce)
2992{
2993 u64 mcg_cap = vcpu->arch.mcg_cap;
2994 unsigned bank_num = mcg_cap & 0xff;
2995 u64 *banks = vcpu->arch.mce_banks;
2996
2997 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
2998 return -EINVAL;
2999
3000
3001
3002
3003 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
3004 vcpu->arch.mcg_ctl != ~(u64)0)
3005 return 0;
3006 banks += 4 * mce->bank;
3007
3008
3009
3010
3011 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
3012 return 0;
3013 if (mce->status & MCI_STATUS_UC) {
3014 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
3015 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
3016 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
3017 return 0;
3018 }
3019 if (banks[1] & MCI_STATUS_VAL)
3020 mce->status |= MCI_STATUS_OVER;
3021 banks[2] = mce->addr;
3022 banks[3] = mce->misc;
3023 vcpu->arch.mcg_status = mce->mcg_status;
3024 banks[1] = mce->status;
3025 kvm_queue_exception(vcpu, MC_VECTOR);
3026 } else if (!(banks[1] & MCI_STATUS_VAL)
3027 || !(banks[1] & MCI_STATUS_UC)) {
3028 if (banks[1] & MCI_STATUS_VAL)
3029 mce->status |= MCI_STATUS_OVER;
3030 banks[2] = mce->addr;
3031 banks[3] = mce->misc;
3032 banks[1] = mce->status;
3033 } else
3034 banks[1] |= MCI_STATUS_OVER;
3035 return 0;
3036}
3037
3038static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
3039 struct kvm_vcpu_events *events)
3040{
3041 process_nmi(vcpu);
3042 events->exception.injected =
3043 vcpu->arch.exception.pending &&
3044 !kvm_exception_is_soft(vcpu->arch.exception.nr);
3045 events->exception.nr = vcpu->arch.exception.nr;
3046 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
3047 events->exception.pad = 0;
3048 events->exception.error_code = vcpu->arch.exception.error_code;
3049
3050 events->interrupt.injected =
3051 vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft;
3052 events->interrupt.nr = vcpu->arch.interrupt.nr;
3053 events->interrupt.soft = 0;
3054 events->interrupt.shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
3055
3056 events->nmi.injected = vcpu->arch.nmi_injected;
3057 events->nmi.pending = vcpu->arch.nmi_pending != 0;
3058 events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
3059 events->nmi.pad = 0;
3060
3061 events->sipi_vector = 0;
3062
3063 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
3064 | KVM_VCPUEVENT_VALID_SHADOW);
3065 memset(&events->reserved, 0, sizeof(events->reserved));
3066}
3067
3068static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
3069 struct kvm_vcpu_events *events)
3070{
3071 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
3072 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
3073 | KVM_VCPUEVENT_VALID_SHADOW))
3074 return -EINVAL;
3075
3076 process_nmi(vcpu);
3077 vcpu->arch.exception.pending = events->exception.injected;
3078 vcpu->arch.exception.nr = events->exception.nr;
3079 vcpu->arch.exception.has_error_code = events->exception.has_error_code;
3080 vcpu->arch.exception.error_code = events->exception.error_code;
3081
3082 vcpu->arch.interrupt.pending = events->interrupt.injected;
3083 vcpu->arch.interrupt.nr = events->interrupt.nr;
3084 vcpu->arch.interrupt.soft = events->interrupt.soft;
3085 if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
3086 kvm_x86_ops->set_interrupt_shadow(vcpu,
3087 events->interrupt.shadow);
3088
3089 vcpu->arch.nmi_injected = events->nmi.injected;
3090 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
3091 vcpu->arch.nmi_pending = events->nmi.pending;
3092 kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
3093
3094 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
3095 kvm_vcpu_has_lapic(vcpu))
3096 vcpu->arch.apic->sipi_vector = events->sipi_vector;
3097
3098 kvm_make_request(KVM_REQ_EVENT, vcpu);
3099
3100 return 0;
3101}
3102
3103static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
3104 struct kvm_debugregs *dbgregs)
3105{
3106 unsigned long val;
3107
3108 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
3109 _kvm_get_dr(vcpu, 6, &val);
3110 dbgregs->dr6 = val;
3111 dbgregs->dr7 = vcpu->arch.dr7;
3112 dbgregs->flags = 0;
3113 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
3114}
3115
3116static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
3117 struct kvm_debugregs *dbgregs)
3118{
3119 if (dbgregs->flags)
3120 return -EINVAL;
3121
3122 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
3123 vcpu->arch.dr6 = dbgregs->dr6;
3124 kvm_update_dr6(vcpu);
3125 vcpu->arch.dr7 = dbgregs->dr7;
3126 kvm_update_dr7(vcpu);
3127
3128 return 0;
3129}
3130
3131static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
3132 struct kvm_xsave *guest_xsave)
3133{
3134 if (cpu_has_xsave) {
3135 memcpy(guest_xsave->region,
3136 &vcpu->arch.guest_fpu.state->xsave,
3137 vcpu->arch.guest_xstate_size);
3138 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] &=
3139 vcpu->arch.guest_supported_xcr0 | XSTATE_FPSSE;
3140 } else {
3141 memcpy(guest_xsave->region,
3142 &vcpu->arch.guest_fpu.state->fxsave,
3143 sizeof(struct i387_fxsave_struct));
3144 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
3145 XSTATE_FPSSE;
3146 }
3147}
3148
3149static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
3150 struct kvm_xsave *guest_xsave)
3151{
3152 u64 xstate_bv =
3153 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
3154
3155 if (cpu_has_xsave) {
3156
3157
3158
3159
3160
3161 if (xstate_bv & ~kvm_supported_xcr0())
3162 return -EINVAL;
3163 memcpy(&vcpu->arch.guest_fpu.state->xsave,
3164 guest_xsave->region, vcpu->arch.guest_xstate_size);
3165 } else {
3166 if (xstate_bv & ~XSTATE_FPSSE)
3167 return -EINVAL;
3168 memcpy(&vcpu->arch.guest_fpu.state->fxsave,
3169 guest_xsave->region, sizeof(struct i387_fxsave_struct));
3170 }
3171 return 0;
3172}
3173
3174static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
3175 struct kvm_xcrs *guest_xcrs)
3176{
3177 if (!cpu_has_xsave) {
3178 guest_xcrs->nr_xcrs = 0;
3179 return;
3180 }
3181
3182 guest_xcrs->nr_xcrs = 1;
3183 guest_xcrs->flags = 0;
3184 guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
3185 guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
3186}
3187
3188static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
3189 struct kvm_xcrs *guest_xcrs)
3190{
3191 int i, r = 0;
3192
3193 if (!cpu_has_xsave)
3194 return -EINVAL;
3195
3196 if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
3197 return -EINVAL;
3198
3199 for (i = 0; i < guest_xcrs->nr_xcrs; i++)
3200
3201 if (guest_xcrs->xcrs[i].xcr == XCR_XFEATURE_ENABLED_MASK) {
3202 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
3203 guest_xcrs->xcrs[i].value);
3204 break;
3205 }
3206 if (r)
3207 r = -EINVAL;
3208 return r;
3209}
3210
3211
3212
3213
3214
3215
3216
3217static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
3218{
3219 if (!vcpu->arch.pv_time_enabled)
3220 return -EINVAL;
3221 vcpu->arch.pvclock_set_guest_stopped_request = true;
3222 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
3223 return 0;
3224}
3225
3226long kvm_arch_vcpu_ioctl(struct file *filp,
3227 unsigned int ioctl, unsigned long arg)
3228{
3229 struct kvm_vcpu *vcpu = filp->private_data;
3230 void __user *argp = (void __user *)arg;
3231 int r;
3232 union {
3233 struct kvm_lapic_state *lapic;
3234 struct kvm_xsave *xsave;
3235 struct kvm_xcrs *xcrs;
3236 void *buffer;
3237 } u;
3238
3239 u.buffer = NULL;
3240 switch (ioctl) {
3241 case KVM_GET_LAPIC: {
3242 r = -EINVAL;
3243 if (!vcpu->arch.apic)
3244 goto out;
3245 u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
3246
3247 r = -ENOMEM;
3248 if (!u.lapic)
3249 goto out;
3250 r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);
3251 if (r)
3252 goto out;
3253 r = -EFAULT;
3254 if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))
3255 goto out;
3256 r = 0;
3257 break;
3258 }
3259 case KVM_SET_LAPIC: {
3260 r = -EINVAL;
3261 if (!vcpu->arch.apic)
3262 goto out;
3263 u.lapic = memdup_user(argp, sizeof(*u.lapic));
3264 if (IS_ERR(u.lapic))
3265 return PTR_ERR(u.lapic);
3266
3267 r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
3268 break;
3269 }
3270 case KVM_INTERRUPT: {
3271 struct kvm_interrupt irq;
3272
3273 r = -EFAULT;
3274 if (copy_from_user(&irq, argp, sizeof irq))
3275 goto out;
3276 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
3277 break;
3278 }
3279 case KVM_NMI: {
3280 r = kvm_vcpu_ioctl_nmi(vcpu);
3281 break;
3282 }
3283 case KVM_SET_CPUID: {
3284 struct kvm_cpuid __user *cpuid_arg = argp;
3285 struct kvm_cpuid cpuid;
3286
3287 r = -EFAULT;
3288 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3289 goto out;
3290 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
3291 break;
3292 }
3293 case KVM_SET_CPUID2: {
3294 struct kvm_cpuid2 __user *cpuid_arg = argp;
3295 struct kvm_cpuid2 cpuid;
3296
3297 r = -EFAULT;
3298 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3299 goto out;
3300 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
3301 cpuid_arg->entries);
3302 break;
3303 }
3304 case KVM_GET_CPUID2: {
3305 struct kvm_cpuid2 __user *cpuid_arg = argp;
3306 struct kvm_cpuid2 cpuid;
3307
3308 r = -EFAULT;
3309 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3310 goto out;
3311 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
3312 cpuid_arg->entries);
3313 if (r)
3314 goto out;
3315 r = -EFAULT;
3316 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
3317 goto out;
3318 r = 0;
3319 break;
3320 }
3321 case KVM_GET_MSRS:
3322 r = msr_io(vcpu, argp, kvm_get_msr, 1);
3323 break;
3324 case KVM_SET_MSRS:
3325 r = msr_io(vcpu, argp, do_set_msr, 0);
3326 break;
3327 case KVM_TPR_ACCESS_REPORTING: {
3328 struct kvm_tpr_access_ctl tac;
3329
3330 r = -EFAULT;
3331 if (copy_from_user(&tac, argp, sizeof tac))
3332 goto out;
3333 r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
3334 if (r)
3335 goto out;
3336 r = -EFAULT;
3337 if (copy_to_user(argp, &tac, sizeof tac))
3338 goto out;
3339 r = 0;
3340 break;
3341 };
3342 case KVM_SET_VAPIC_ADDR: {
3343 struct kvm_vapic_addr va;
3344
3345 r = -EINVAL;
3346 if (!irqchip_in_kernel(vcpu->kvm))
3347 goto out;
3348 r = -EFAULT;
3349 if (copy_from_user(&va, argp, sizeof va))
3350 goto out;
3351 r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
3352 break;
3353 }
3354 case KVM_X86_SETUP_MCE: {
3355 u64 mcg_cap;
3356
3357 r = -EFAULT;
3358 if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
3359 goto out;
3360 r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
3361 break;
3362 }
3363 case KVM_X86_SET_MCE: {
3364 struct kvm_x86_mce mce;
3365
3366 r = -EFAULT;
3367 if (copy_from_user(&mce, argp, sizeof mce))
3368 goto out;
3369 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
3370 break;
3371 }
3372 case KVM_GET_VCPU_EVENTS: {
3373 struct kvm_vcpu_events events;
3374
3375 kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
3376
3377 r = -EFAULT;
3378 if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
3379 break;
3380 r = 0;
3381 break;
3382 }
3383 case KVM_SET_VCPU_EVENTS: {
3384 struct kvm_vcpu_events events;
3385
3386 r = -EFAULT;
3387 if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
3388 break;
3389
3390 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
3391 break;
3392 }
3393 case KVM_GET_DEBUGREGS: {
3394 struct kvm_debugregs dbgregs;
3395
3396 kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
3397
3398 r = -EFAULT;
3399 if (copy_to_user(argp, &dbgregs,
3400 sizeof(struct kvm_debugregs)))
3401 break;
3402 r = 0;
3403 break;
3404 }
3405 case KVM_SET_DEBUGREGS: {
3406 struct kvm_debugregs dbgregs;
3407
3408 r = -EFAULT;
3409 if (copy_from_user(&dbgregs, argp,
3410 sizeof(struct kvm_debugregs)))
3411 break;
3412
3413 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
3414 break;
3415 }
3416 case KVM_GET_XSAVE: {
3417 u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
3418 r = -ENOMEM;
3419 if (!u.xsave)
3420 break;
3421
3422 kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
3423
3424 r = -EFAULT;
3425 if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
3426 break;
3427 r = 0;
3428 break;
3429 }
3430 case KVM_SET_XSAVE: {
3431 u.xsave = memdup_user(argp, sizeof(*u.xsave));
3432 if (IS_ERR(u.xsave))
3433 return PTR_ERR(u.xsave);
3434
3435 r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
3436 break;
3437 }
3438 case KVM_GET_XCRS: {
3439 u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
3440 r = -ENOMEM;
3441 if (!u.xcrs)
3442 break;
3443
3444 kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
3445
3446 r = -EFAULT;
3447 if (copy_to_user(argp, u.xcrs,
3448 sizeof(struct kvm_xcrs)))
3449 break;
3450 r = 0;
3451 break;
3452 }
3453 case KVM_SET_XCRS: {
3454 u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
3455 if (IS_ERR(u.xcrs))
3456 return PTR_ERR(u.xcrs);
3457
3458 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
3459 break;
3460 }
3461 case KVM_SET_TSC_KHZ: {
3462 u32 user_tsc_khz;
3463
3464 r = -EINVAL;
3465 user_tsc_khz = (u32)arg;
3466
3467 if (user_tsc_khz >= kvm_max_guest_tsc_khz)
3468 goto out;
3469
3470 if (user_tsc_khz == 0)
3471 user_tsc_khz = tsc_khz;
3472
3473 kvm_set_tsc_khz(vcpu, user_tsc_khz);
3474
3475 r = 0;
3476 goto out;
3477 }
3478 case KVM_GET_TSC_KHZ: {
3479 r = vcpu->arch.virtual_tsc_khz;
3480 goto out;
3481 }
3482 case KVM_KVMCLOCK_CTRL: {
3483 r = kvm_set_guest_paused(vcpu);
3484 goto out;
3485 }
3486 default:
3487 r = -EINVAL;
3488 }
3489out:
3490 kfree(u.buffer);
3491 return r;
3492}
3493
3494int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3495{
3496 return VM_FAULT_SIGBUS;
3497}
3498
3499static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
3500{
3501 int ret;
3502
3503 if (addr > (unsigned int)(-3 * PAGE_SIZE))
3504 return -EINVAL;
3505 ret = kvm_x86_ops->set_tss_addr(kvm, addr);
3506 return ret;
3507}
3508
3509static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
3510 u64 ident_addr)
3511{
3512 kvm->arch.ept_identity_map_addr = ident_addr;
3513 return 0;
3514}
3515
3516static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
3517 u32 kvm_nr_mmu_pages)
3518{
3519 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
3520 return -EINVAL;
3521
3522 mutex_lock(&kvm->slots_lock);
3523
3524 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
3525 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
3526
3527 mutex_unlock(&kvm->slots_lock);
3528 return 0;
3529}
3530
3531static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
3532{
3533 return kvm->arch.n_max_mmu_pages;
3534}
3535
3536static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
3537{
3538 int r;
3539
3540 r = 0;
3541 switch (chip->chip_id) {
3542 case KVM_IRQCHIP_PIC_MASTER:
3543 memcpy(&chip->chip.pic,
3544 &pic_irqchip(kvm)->pics[0],
3545 sizeof(struct kvm_pic_state));
3546 break;
3547 case KVM_IRQCHIP_PIC_SLAVE:
3548 memcpy(&chip->chip.pic,
3549 &pic_irqchip(kvm)->pics[1],
3550 sizeof(struct kvm_pic_state));
3551 break;
3552 case KVM_IRQCHIP_IOAPIC:
3553 r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
3554 break;
3555 default:
3556 r = -EINVAL;
3557 break;
3558 }
3559 return r;
3560}
3561
3562static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
3563{
3564 int r;
3565
3566 r = 0;
3567 switch (chip->chip_id) {
3568 case KVM_IRQCHIP_PIC_MASTER:
3569 spin_lock(&pic_irqchip(kvm)->lock);
3570 memcpy(&pic_irqchip(kvm)->pics[0],
3571 &chip->chip.pic,
3572 sizeof(struct kvm_pic_state));
3573 spin_unlock(&pic_irqchip(kvm)->lock);
3574 break;
3575 case KVM_IRQCHIP_PIC_SLAVE:
3576 spin_lock(&pic_irqchip(kvm)->lock);
3577 memcpy(&pic_irqchip(kvm)->pics[1],
3578 &chip->chip.pic,
3579 sizeof(struct kvm_pic_state));
3580 spin_unlock(&pic_irqchip(kvm)->lock);
3581 break;
3582 case KVM_IRQCHIP_IOAPIC:
3583 r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
3584 break;
3585 default:
3586 r = -EINVAL;
3587 break;
3588 }
3589 kvm_pic_update_irq(pic_irqchip(kvm));
3590 return r;
3591}
3592
3593static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
3594{
3595 int r = 0;
3596
3597 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3598 memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
3599 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3600 return r;
3601}
3602
3603static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
3604{
3605 int r = 0;
3606
3607 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3608 memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
3609 kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
3610 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3611 return r;
3612}
3613
3614static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
3615{
3616 int r = 0;
3617
3618 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3619 memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
3620 sizeof(ps->channels));
3621 ps->flags = kvm->arch.vpit->pit_state.flags;
3622 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3623 memset(&ps->reserved, 0, sizeof(ps->reserved));
3624 return r;
3625}
3626
3627static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
3628{
3629 int r = 0, start = 0;
3630 u32 prev_legacy, cur_legacy;
3631 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3632 prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
3633 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
3634 if (!prev_legacy && cur_legacy)
3635 start = 1;
3636 memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels,
3637 sizeof(kvm->arch.vpit->pit_state.channels));
3638 kvm->arch.vpit->pit_state.flags = ps->flags;
3639 kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
3640 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3641 return r;
3642}
3643
3644static int kvm_vm_ioctl_reinject(struct kvm *kvm,
3645 struct kvm_reinject_control *control)
3646{
3647 if (!kvm->arch.vpit)
3648 return -ENXIO;
3649 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3650 kvm->arch.vpit->pit_state.reinject = control->pit_reinject;
3651 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3652 return 0;
3653}
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670
3671
3672
3673
3674int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
3675{
3676 int r;
3677 struct kvm_memory_slot *memslot;
3678 unsigned long n, i;
3679 unsigned long *dirty_bitmap;
3680 unsigned long *dirty_bitmap_buffer;
3681 bool is_dirty = false;
3682
3683 mutex_lock(&kvm->slots_lock);
3684
3685 r = -EINVAL;
3686 if (log->slot >= KVM_USER_MEM_SLOTS)
3687 goto out;
3688
3689 memslot = id_to_memslot(kvm->memslots, log->slot);
3690
3691 dirty_bitmap = memslot->dirty_bitmap;
3692 r = -ENOENT;
3693 if (!dirty_bitmap)
3694 goto out;
3695
3696 n = kvm_dirty_bitmap_bytes(memslot);
3697
3698 dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
3699 memset(dirty_bitmap_buffer, 0, n);
3700
3701 spin_lock(&kvm->mmu_lock);
3702
3703 for (i = 0; i < n / sizeof(long); i++) {
3704 unsigned long mask;
3705 gfn_t offset;
3706
3707 if (!dirty_bitmap[i])
3708 continue;
3709
3710 is_dirty = true;
3711
3712 mask = xchg(&dirty_bitmap[i], 0);
3713 dirty_bitmap_buffer[i] = mask;
3714
3715 offset = i * BITS_PER_LONG;
3716 kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
3717 }
3718
3719 spin_unlock(&kvm->mmu_lock);
3720
3721
3722 lockdep_assert_held(&kvm->slots_lock);
3723
3724
3725
3726
3727
3728 if (is_dirty)
3729 kvm_flush_remote_tlbs(kvm);
3730
3731 r = -EFAULT;
3732 if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
3733 goto out;
3734
3735 r = 0;
3736out:
3737 mutex_unlock(&kvm->slots_lock);
3738 return r;
3739}
3740
3741int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
3742 bool line_status)
3743{
3744 if (!irqchip_in_kernel(kvm))
3745 return -ENXIO;
3746
3747 irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
3748 irq_event->irq, irq_event->level,
3749 line_status);
3750 return 0;
3751}
3752
3753long kvm_arch_vm_ioctl(struct file *filp,
3754 unsigned int ioctl, unsigned long arg)
3755{
3756 struct kvm *kvm = filp->private_data;
3757 void __user *argp = (void __user *)arg;
3758 int r = -ENOTTY;
3759
3760
3761
3762
3763
3764 union {
3765 struct kvm_pit_state ps;
3766 struct kvm_pit_state2 ps2;
3767 struct kvm_pit_config pit_config;
3768 } u;
3769
3770 switch (ioctl) {
3771 case KVM_SET_TSS_ADDR:
3772 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
3773 break;
3774 case KVM_SET_IDENTITY_MAP_ADDR: {
3775 u64 ident_addr;
3776
3777 r = -EFAULT;
3778 if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
3779 goto out;
3780 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
3781 break;
3782 }
3783 case KVM_SET_NR_MMU_PAGES:
3784 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
3785 break;
3786 case KVM_GET_NR_MMU_PAGES:
3787 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
3788 break;
3789 case KVM_CREATE_IRQCHIP: {
3790 struct kvm_pic *vpic;
3791
3792 mutex_lock(&kvm->lock);
3793 r = -EEXIST;
3794 if (kvm->arch.vpic)
3795 goto create_irqchip_unlock;
3796 r = -EINVAL;
3797 if (atomic_read(&kvm->online_vcpus))
3798 goto create_irqchip_unlock;
3799 r = -ENOMEM;
3800 vpic = kvm_create_pic(kvm);
3801 if (vpic) {
3802 r = kvm_ioapic_init(kvm);
3803 if (r) {
3804 mutex_lock(&kvm->slots_lock);
3805 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
3806 &vpic->dev_master);
3807 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
3808 &vpic->dev_slave);
3809 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
3810 &vpic->dev_eclr);
3811 mutex_unlock(&kvm->slots_lock);
3812 kfree(vpic);
3813 goto create_irqchip_unlock;
3814 }
3815 } else
3816 goto create_irqchip_unlock;
3817 smp_wmb();
3818 kvm->arch.vpic = vpic;
3819 smp_wmb();
3820 r = kvm_setup_default_irq_routing(kvm);
3821 if (r) {
3822 mutex_lock(&kvm->slots_lock);
3823 mutex_lock(&kvm->irq_lock);
3824 kvm_ioapic_destroy(kvm);
3825 kvm_destroy_pic(kvm);
3826 mutex_unlock(&kvm->irq_lock);
3827 mutex_unlock(&kvm->slots_lock);
3828 }
3829 create_irqchip_unlock:
3830 mutex_unlock(&kvm->lock);
3831 break;
3832 }
3833 case KVM_CREATE_PIT:
3834 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
3835 goto create_pit;
3836 case KVM_CREATE_PIT2:
3837 r = -EFAULT;
3838 if (copy_from_user(&u.pit_config, argp,
3839 sizeof(struct kvm_pit_config)))
3840 goto out;
3841 create_pit:
3842 mutex_lock(&kvm->slots_lock);
3843 r = -EEXIST;
3844 if (kvm->arch.vpit)
3845 goto create_pit_unlock;
3846 r = -ENOMEM;
3847 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
3848 if (kvm->arch.vpit)
3849 r = 0;
3850 create_pit_unlock:
3851 mutex_unlock(&kvm->slots_lock);
3852 break;
3853 case KVM_GET_IRQCHIP: {
3854
3855 struct kvm_irqchip *chip;
3856
3857 chip = memdup_user(argp, sizeof(*chip));
3858 if (IS_ERR(chip)) {
3859 r = PTR_ERR(chip);
3860 goto out;
3861 }
3862
3863 r = -ENXIO;
3864 if (!irqchip_in_kernel(kvm))
3865 goto get_irqchip_out;
3866 r = kvm_vm_ioctl_get_irqchip(kvm, chip);
3867 if (r)
3868 goto get_irqchip_out;
3869 r = -EFAULT;
3870 if (copy_to_user(argp, chip, sizeof *chip))
3871 goto get_irqchip_out;
3872 r = 0;
3873 get_irqchip_out:
3874 kfree(chip);
3875 break;
3876 }
3877 case KVM_SET_IRQCHIP: {
3878
3879 struct kvm_irqchip *chip;
3880
3881 chip = memdup_user(argp, sizeof(*chip));
3882 if (IS_ERR(chip)) {
3883 r = PTR_ERR(chip);
3884 goto out;
3885 }
3886
3887 r = -ENXIO;
3888 if (!irqchip_in_kernel(kvm))
3889 goto set_irqchip_out;
3890 r = kvm_vm_ioctl_set_irqchip(kvm, chip);
3891 if (r)
3892 goto set_irqchip_out;
3893 r = 0;
3894 set_irqchip_out:
3895 kfree(chip);
3896 break;
3897 }
3898 case KVM_GET_PIT: {
3899 r = -EFAULT;
3900 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
3901 goto out;
3902 r = -ENXIO;
3903 if (!kvm->arch.vpit)
3904 goto out;
3905 r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
3906 if (r)
3907 goto out;
3908 r = -EFAULT;
3909 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
3910 goto out;
3911 r = 0;
3912 break;
3913 }
3914 case KVM_SET_PIT: {
3915 r = -EFAULT;
3916 if (copy_from_user(&u.ps, argp, sizeof u.ps))
3917 goto out;
3918 r = -ENXIO;
3919 if (!kvm->arch.vpit)
3920 goto out;
3921 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
3922 break;
3923 }
3924 case KVM_GET_PIT2: {
3925 r = -ENXIO;
3926 if (!kvm->arch.vpit)
3927 goto out;
3928 r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
3929 if (r)
3930 goto out;
3931 r = -EFAULT;
3932 if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
3933 goto out;
3934 r = 0;
3935 break;
3936 }
3937 case KVM_SET_PIT2: {
3938 r = -EFAULT;
3939 if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
3940 goto out;
3941 r = -ENXIO;
3942 if (!kvm->arch.vpit)
3943 goto out;
3944 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
3945 break;
3946 }
3947 case KVM_REINJECT_CONTROL: {
3948 struct kvm_reinject_control control;
3949 r = -EFAULT;
3950 if (copy_from_user(&control, argp, sizeof(control)))
3951 goto out;
3952 r = kvm_vm_ioctl_reinject(kvm, &control);
3953 break;
3954 }
3955 case KVM_XEN_HVM_CONFIG: {
3956 r = -EFAULT;
3957 if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
3958 sizeof(struct kvm_xen_hvm_config)))
3959 goto out;
3960 r = -EINVAL;
3961 if (kvm->arch.xen_hvm_config.flags)
3962 goto out;
3963 r = 0;
3964 break;
3965 }
3966 case KVM_SET_CLOCK: {
3967 struct kvm_clock_data user_ns;
3968 u64 now_ns;
3969 s64 delta;
3970
3971 r = -EFAULT;
3972 if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
3973 goto out;
3974
3975 r = -EINVAL;
3976 if (user_ns.flags)
3977 goto out;
3978
3979 r = 0;
3980 local_irq_disable();
3981 now_ns = get_kernel_ns();
3982 delta = user_ns.clock - now_ns;
3983 local_irq_enable();
3984 kvm->arch.kvmclock_offset = delta;
3985 kvm_gen_update_masterclock(kvm);
3986 break;
3987 }
3988 case KVM_GET_CLOCK: {
3989 struct kvm_clock_data user_ns;
3990 u64 now_ns;
3991
3992 local_irq_disable();
3993 now_ns = get_kernel_ns();
3994 user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
3995 local_irq_enable();
3996 user_ns.flags = 0;
3997 memset(&user_ns.pad, 0, sizeof(user_ns.pad));
3998
3999 r = -EFAULT;
4000 if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
4001 goto out;
4002 r = 0;
4003 break;
4004 }
4005
4006 default:
4007 ;
4008 }
4009out:
4010 return r;
4011}
4012
4013static void kvm_init_msr_list(void)
4014{
4015 u32 dummy[2];
4016 unsigned i, j;
4017
4018
4019 for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
4020 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
4021 continue;
4022
4023
4024
4025
4026
4027
4028
4029
4030 switch (msrs_to_save[i]) {
4031 case MSR_IA32_BNDCFGS:
4032 if (!kvm_x86_ops->mpx_supported())
4033 continue;
4034 break;
4035 default:
4036 break;
4037 }
4038
4039 if (j < i)
4040 msrs_to_save[j] = msrs_to_save[i];
4041 j++;
4042 }
4043 num_msrs_to_save = j;
4044}
4045
4046static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
4047 const void *v)
4048{
4049 int handled = 0;
4050 int n;
4051
4052 do {
4053 n = min(len, 8);
4054 if (!(vcpu->arch.apic &&
4055 !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v))
4056 && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
4057 break;
4058 handled += n;
4059 addr += n;
4060 len -= n;
4061 v += n;
4062 } while (len);
4063
4064 return handled;
4065}
4066
4067static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
4068{
4069 int handled = 0;
4070 int n;
4071
4072 do {
4073 n = min(len, 8);
4074 if (!(vcpu->arch.apic &&
4075 !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v))
4076 && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
4077 break;
4078 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
4079 handled += n;
4080 addr += n;
4081 len -= n;
4082 v += n;
4083 } while (len);
4084
4085 return handled;
4086}
4087
4088static void kvm_set_segment(struct kvm_vcpu *vcpu,
4089 struct kvm_segment *var, int seg)
4090{
4091 kvm_x86_ops->set_segment(vcpu, var, seg);
4092}
4093
4094void kvm_get_segment(struct kvm_vcpu *vcpu,
4095 struct kvm_segment *var, int seg)
4096{
4097 kvm_x86_ops->get_segment(vcpu, var, seg);
4098}
4099
4100gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
4101 struct x86_exception *exception)
4102{
4103 gpa_t t_gpa;
4104
4105 BUG_ON(!mmu_is_nested(vcpu));
4106
4107
4108 access |= PFERR_USER_MASK;
4109 t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, exception);
4110
4111 return t_gpa;
4112}
4113
4114gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
4115 struct x86_exception *exception)
4116{
4117 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4118 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
4119}
4120
4121 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
4122 struct x86_exception *exception)
4123{
4124 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4125 access |= PFERR_FETCH_MASK;
4126 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
4127}
4128
4129gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
4130 struct x86_exception *exception)
4131{
4132 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4133 access |= PFERR_WRITE_MASK;
4134 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
4135}
4136
4137
4138gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
4139 struct x86_exception *exception)
4140{
4141 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
4142}
4143
4144static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
4145 struct kvm_vcpu *vcpu, u32 access,
4146 struct x86_exception *exception)
4147{
4148 void *data = val;
4149 int r = X86EMUL_CONTINUE;
4150
4151 while (bytes) {
4152 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access,
4153 exception);
4154 unsigned offset = addr & (PAGE_SIZE-1);
4155 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
4156 int ret;
4157
4158 if (gpa == UNMAPPED_GVA)
4159 return X86EMUL_PROPAGATE_FAULT;
4160 ret = kvm_read_guest_page(vcpu->kvm, gpa >> PAGE_SHIFT, data,
4161 offset, toread);
4162 if (ret < 0) {
4163 r = X86EMUL_IO_NEEDED;
4164 goto out;
4165 }
4166
4167 bytes -= toread;
4168 data += toread;
4169 addr += toread;
4170 }
4171out:
4172 return r;
4173}
4174
4175
4176static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
4177 gva_t addr, void *val, unsigned int bytes,
4178 struct x86_exception *exception)
4179{
4180 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4181 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4182 unsigned offset;
4183 int ret;
4184
4185
4186 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access|PFERR_FETCH_MASK,
4187 exception);
4188 if (unlikely(gpa == UNMAPPED_GVA))
4189 return X86EMUL_PROPAGATE_FAULT;
4190
4191 offset = addr & (PAGE_SIZE-1);
4192 if (WARN_ON(offset + bytes > PAGE_SIZE))
4193 bytes = (unsigned)PAGE_SIZE - offset;
4194 ret = kvm_read_guest_page(vcpu->kvm, gpa >> PAGE_SHIFT, val,
4195 offset, bytes);
4196 if (unlikely(ret < 0))
4197 return X86EMUL_IO_NEEDED;
4198
4199 return X86EMUL_CONTINUE;
4200}
4201
4202int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
4203 gva_t addr, void *val, unsigned int bytes,
4204 struct x86_exception *exception)
4205{
4206 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4207 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4208
4209 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
4210 exception);
4211}
4212EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
4213
4214static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
4215 gva_t addr, void *val, unsigned int bytes,
4216 struct x86_exception *exception)
4217{
4218 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4219 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
4220}
4221
4222int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
4223 gva_t addr, void *val,
4224 unsigned int bytes,
4225 struct x86_exception *exception)
4226{
4227 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4228 void *data = val;
4229 int r = X86EMUL_CONTINUE;
4230
4231 while (bytes) {
4232 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
4233 PFERR_WRITE_MASK,
4234 exception);
4235 unsigned offset = addr & (PAGE_SIZE-1);
4236 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
4237 int ret;
4238
4239 if (gpa == UNMAPPED_GVA)
4240 return X86EMUL_PROPAGATE_FAULT;
4241 ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
4242 if (ret < 0) {
4243 r = X86EMUL_IO_NEEDED;
4244 goto out;
4245 }
4246
4247 bytes -= towrite;
4248 data += towrite;
4249 addr += towrite;
4250 }
4251out:
4252 return r;
4253}
4254EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
4255
4256static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
4257 gpa_t *gpa, struct x86_exception *exception,
4258 bool write)
4259{
4260 u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
4261 | (write ? PFERR_WRITE_MASK : 0);
4262
4263 if (vcpu_match_mmio_gva(vcpu, gva)
4264 && !permission_fault(vcpu, vcpu->arch.walk_mmu,
4265 vcpu->arch.access, access)) {
4266 *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
4267 (gva & (PAGE_SIZE - 1));
4268 trace_vcpu_match_mmio(gva, *gpa, write, false);
4269 return 1;
4270 }
4271
4272 *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
4273
4274 if (*gpa == UNMAPPED_GVA)
4275 return -1;
4276
4277
4278 if ((*gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
4279 return 1;
4280
4281 if (vcpu_match_mmio_gpa(vcpu, *gpa)) {
4282 trace_vcpu_match_mmio(gva, *gpa, write, true);
4283 return 1;
4284 }
4285
4286 return 0;
4287}
4288
4289int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
4290 const void *val, int bytes)
4291{
4292 int ret;
4293
4294 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
4295 if (ret < 0)
4296 return 0;
4297 kvm_mmu_pte_write(vcpu, gpa, val, bytes);
4298 return 1;
4299}
4300
4301struct read_write_emulator_ops {
4302 int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
4303 int bytes);
4304 int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
4305 void *val, int bytes);
4306 int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
4307 int bytes, void *val);
4308 int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
4309 void *val, int bytes);
4310 bool write;
4311};
4312
4313static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
4314{
4315 if (vcpu->mmio_read_completed) {
4316 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
4317 vcpu->mmio_fragments[0].gpa, *(u64 *)val);
4318 vcpu->mmio_read_completed = 0;
4319 return 1;
4320 }
4321
4322 return 0;
4323}
4324
4325static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
4326 void *val, int bytes)
4327{
4328 return !kvm_read_guest(vcpu->kvm, gpa, val, bytes);
4329}
4330
4331static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
4332 void *val, int bytes)
4333{
4334 return emulator_write_phys(vcpu, gpa, val, bytes);
4335}
4336
4337static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
4338{
4339 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
4340 return vcpu_mmio_write(vcpu, gpa, bytes, val);
4341}
4342
4343static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
4344 void *val, int bytes)
4345{
4346 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
4347 return X86EMUL_IO_NEEDED;
4348}
4349
4350static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
4351 void *val, int bytes)
4352{
4353 struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
4354
4355 memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
4356 return X86EMUL_CONTINUE;
4357}
4358
4359static const struct read_write_emulator_ops read_emultor = {
4360 .read_write_prepare = read_prepare,
4361 .read_write_emulate = read_emulate,
4362 .read_write_mmio = vcpu_mmio_read,
4363 .read_write_exit_mmio = read_exit_mmio,
4364};
4365
4366static const struct read_write_emulator_ops write_emultor = {
4367 .read_write_emulate = write_emulate,
4368 .read_write_mmio = write_mmio,
4369 .read_write_exit_mmio = write_exit_mmio,
4370 .write = true,
4371};
4372
4373static int emulator_read_write_onepage(unsigned long addr, void *val,
4374 unsigned int bytes,
4375 struct x86_exception *exception,
4376 struct kvm_vcpu *vcpu,
4377 const struct read_write_emulator_ops *ops)
4378{
4379 gpa_t gpa;
4380 int handled, ret;
4381 bool write = ops->write;
4382 struct kvm_mmio_fragment *frag;
4383
4384 ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
4385
4386 if (ret < 0)
4387 return X86EMUL_PROPAGATE_FAULT;
4388
4389
4390 if (ret)
4391 goto mmio;
4392
4393 if (ops->read_write_emulate(vcpu, gpa, val, bytes))
4394 return X86EMUL_CONTINUE;
4395
4396mmio:
4397
4398
4399
4400 handled = ops->read_write_mmio(vcpu, gpa, bytes, val);
4401 if (handled == bytes)
4402 return X86EMUL_CONTINUE;
4403
4404 gpa += handled;
4405 bytes -= handled;
4406 val += handled;
4407
4408 WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS);
4409 frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
4410 frag->gpa = gpa;
4411 frag->data = val;
4412 frag->len = bytes;
4413 return X86EMUL_CONTINUE;
4414}
4415
4416int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
4417 void *val, unsigned int bytes,
4418 struct x86_exception *exception,
4419 const struct read_write_emulator_ops *ops)
4420{
4421 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4422 gpa_t gpa;
4423 int rc;
4424
4425 if (ops->read_write_prepare &&
4426 ops->read_write_prepare(vcpu, val, bytes))
4427 return X86EMUL_CONTINUE;
4428
4429 vcpu->mmio_nr_fragments = 0;
4430
4431
4432 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
4433 int now;
4434
4435 now = -addr & ~PAGE_MASK;
4436 rc = emulator_read_write_onepage(addr, val, now, exception,
4437 vcpu, ops);
4438
4439 if (rc != X86EMUL_CONTINUE)
4440 return rc;
4441 addr += now;
4442 val += now;
4443 bytes -= now;
4444 }
4445
4446 rc = emulator_read_write_onepage(addr, val, bytes, exception,
4447 vcpu, ops);
4448 if (rc != X86EMUL_CONTINUE)
4449 return rc;
4450
4451 if (!vcpu->mmio_nr_fragments)
4452 return rc;
4453
4454 gpa = vcpu->mmio_fragments[0].gpa;
4455
4456 vcpu->mmio_needed = 1;
4457 vcpu->mmio_cur_fragment = 0;
4458
4459 vcpu->run->mmio.len = min(8u, vcpu->mmio_fragments[0].len);
4460 vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
4461 vcpu->run->exit_reason = KVM_EXIT_MMIO;
4462 vcpu->run->mmio.phys_addr = gpa;
4463
4464 return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
4465}
4466
4467static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
4468 unsigned long addr,
4469 void *val,
4470 unsigned int bytes,
4471 struct x86_exception *exception)
4472{
4473 return emulator_read_write(ctxt, addr, val, bytes,
4474 exception, &read_emultor);
4475}
4476
4477int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
4478 unsigned long addr,
4479 const void *val,
4480 unsigned int bytes,
4481 struct x86_exception *exception)
4482{
4483 return emulator_read_write(ctxt, addr, (void *)val, bytes,
4484 exception, &write_emultor);
4485}
4486
4487#define CMPXCHG_TYPE(t, ptr, old, new) \
4488 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
4489
4490#ifdef CONFIG_X86_64
4491# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
4492#else
4493# define CMPXCHG64(ptr, old, new) \
4494 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
4495#endif
4496
4497static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
4498 unsigned long addr,
4499 const void *old,
4500 const void *new,
4501 unsigned int bytes,
4502 struct x86_exception *exception)
4503{
4504 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4505 gpa_t gpa;
4506 struct page *page;
4507 char *kaddr;
4508 bool exchanged;
4509
4510
4511 if (bytes > 8 || (bytes & (bytes - 1)))
4512 goto emul_write;
4513
4514 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
4515
4516 if (gpa == UNMAPPED_GVA ||
4517 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
4518 goto emul_write;
4519
4520 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
4521 goto emul_write;
4522
4523 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
4524 if (is_error_page(page))
4525 goto emul_write;
4526
4527 kaddr = kmap_atomic(page);
4528 kaddr += offset_in_page(gpa);
4529 switch (bytes) {
4530 case 1:
4531 exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
4532 break;
4533 case 2:
4534 exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
4535 break;
4536 case 4:
4537 exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
4538 break;
4539 case 8:
4540 exchanged = CMPXCHG64(kaddr, old, new);
4541 break;
4542 default:
4543 BUG();
4544 }
4545 kunmap_atomic(kaddr);
4546 kvm_release_page_dirty(page);
4547
4548 if (!exchanged)
4549 return X86EMUL_CMPXCHG_FAILED;
4550
4551 mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT);
4552 kvm_mmu_pte_write(vcpu, gpa, new, bytes);
4553
4554 return X86EMUL_CONTINUE;
4555
4556emul_write:
4557 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
4558
4559 return emulator_write_emulated(ctxt, addr, new, bytes, exception);
4560}
4561
4562static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
4563{
4564
4565 int r;
4566
4567 if (vcpu->arch.pio.in)
4568 r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
4569 vcpu->arch.pio.size, pd);
4570 else
4571 r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
4572 vcpu->arch.pio.port, vcpu->arch.pio.size,
4573 pd);
4574 return r;
4575}
4576
4577static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
4578 unsigned short port, void *val,
4579 unsigned int count, bool in)
4580{
4581 vcpu->arch.pio.port = port;
4582 vcpu->arch.pio.in = in;
4583 vcpu->arch.pio.count = count;
4584 vcpu->arch.pio.size = size;
4585
4586 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
4587 vcpu->arch.pio.count = 0;
4588 return 1;
4589 }
4590
4591 vcpu->run->exit_reason = KVM_EXIT_IO;
4592 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
4593 vcpu->run->io.size = size;
4594 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
4595 vcpu->run->io.count = count;
4596 vcpu->run->io.port = port;
4597
4598 return 0;
4599}
4600
4601static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
4602 int size, unsigned short port, void *val,
4603 unsigned int count)
4604{
4605 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4606 int ret;
4607
4608 if (vcpu->arch.pio.count)
4609 goto data_avail;
4610
4611 ret = emulator_pio_in_out(vcpu, size, port, val, count, true);
4612 if (ret) {
4613data_avail:
4614 memcpy(val, vcpu->arch.pio_data, size * count);
4615 trace_kvm_pio(KVM_PIO_IN, port, size, count, vcpu->arch.pio_data);
4616 vcpu->arch.pio.count = 0;
4617 return 1;
4618 }
4619
4620 return 0;
4621}
4622
4623static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
4624 int size, unsigned short port,
4625 const void *val, unsigned int count)
4626{
4627 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4628
4629 memcpy(vcpu->arch.pio_data, val, size * count);
4630 trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data);
4631 return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
4632}
4633
4634static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
4635{
4636 return kvm_x86_ops->get_segment_base(vcpu, seg);
4637}
4638
4639static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
4640{
4641 kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
4642}
4643
4644int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
4645{
4646 if (!need_emulate_wbinvd(vcpu))
4647 return X86EMUL_CONTINUE;
4648
4649 if (kvm_x86_ops->has_wbinvd_exit()) {
4650 int cpu = get_cpu();
4651
4652 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
4653 smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
4654 wbinvd_ipi, NULL, 1);
4655 put_cpu();
4656 cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
4657 } else
4658 wbinvd();
4659 return X86EMUL_CONTINUE;
4660}
4661EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
4662
4663static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
4664{
4665 kvm_emulate_wbinvd(emul_to_vcpu(ctxt));
4666}
4667
4668int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
4669{
4670 return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
4671}
4672
4673int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
4674{
4675
4676 return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
4677}
4678
4679static u64 mk_cr_64(u64 curr_cr, u32 new_val)
4680{
4681 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
4682}
4683
4684static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
4685{
4686 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4687 unsigned long value;
4688
4689 switch (cr) {
4690 case 0:
4691 value = kvm_read_cr0(vcpu);
4692 break;
4693 case 2:
4694 value = vcpu->arch.cr2;
4695 break;
4696 case 3:
4697 value = kvm_read_cr3(vcpu);
4698 break;
4699 case 4:
4700 value = kvm_read_cr4(vcpu);
4701 break;
4702 case 8:
4703 value = kvm_get_cr8(vcpu);
4704 break;
4705 default:
4706 kvm_err("%s: unexpected cr %u\n", __func__, cr);
4707 return 0;
4708 }
4709
4710 return value;
4711}
4712
4713static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
4714{
4715 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4716 int res = 0;
4717
4718 switch (cr) {
4719 case 0:
4720 res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
4721 break;
4722 case 2:
4723 vcpu->arch.cr2 = val;
4724 break;
4725 case 3:
4726 res = kvm_set_cr3(vcpu, val);
4727 break;
4728 case 4:
4729 res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
4730 break;
4731 case 8:
4732 res = kvm_set_cr8(vcpu, val);
4733 break;
4734 default:
4735 kvm_err("%s: unexpected cr %u\n", __func__, cr);
4736 res = -1;
4737 }
4738
4739 return res;
4740}
4741
4742static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
4743{
4744 return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
4745}
4746
4747static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4748{
4749 kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt);
4750}
4751
4752static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4753{
4754 kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt);
4755}
4756
4757static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4758{
4759 kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt);
4760}
4761
4762static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4763{
4764 kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt);
4765}
4766
4767static unsigned long emulator_get_cached_segment_base(
4768 struct x86_emulate_ctxt *ctxt, int seg)
4769{
4770 return get_segment_base(emul_to_vcpu(ctxt), seg);
4771}
4772
4773static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
4774 struct desc_struct *desc, u32 *base3,
4775 int seg)
4776{
4777 struct kvm_segment var;
4778
4779 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
4780 *selector = var.selector;
4781
4782 if (var.unusable) {
4783 memset(desc, 0, sizeof(*desc));
4784 return false;
4785 }
4786
4787 if (var.g)
4788 var.limit >>= 12;
4789 set_desc_limit(desc, var.limit);
4790 set_desc_base(desc, (unsigned long)var.base);
4791#ifdef CONFIG_X86_64
4792 if (base3)
4793 *base3 = var.base >> 32;
4794#endif
4795 desc->type = var.type;
4796 desc->s = var.s;
4797 desc->dpl = var.dpl;
4798 desc->p = var.present;
4799 desc->avl = var.avl;
4800 desc->l = var.l;
4801 desc->d = var.db;
4802 desc->g = var.g;
4803
4804 return true;
4805}
4806
4807static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
4808 struct desc_struct *desc, u32 base3,
4809 int seg)
4810{
4811 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4812 struct kvm_segment var;
4813
4814 var.selector = selector;
4815 var.base = get_desc_base(desc);
4816#ifdef CONFIG_X86_64
4817 var.base |= ((u64)base3) << 32;
4818#endif
4819 var.limit = get_desc_limit(desc);
4820 if (desc->g)
4821 var.limit = (var.limit << 12) | 0xfff;
4822 var.type = desc->type;
4823 var.dpl = desc->dpl;
4824 var.db = desc->d;
4825 var.s = desc->s;
4826 var.l = desc->l;
4827 var.g = desc->g;
4828 var.avl = desc->avl;
4829 var.present = desc->p;
4830 var.unusable = !var.present;
4831 var.padding = 0;
4832
4833 kvm_set_segment(vcpu, &var, seg);
4834 return;
4835}
4836
4837static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
4838 u32 msr_index, u64 *pdata)
4839{
4840 return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
4841}
4842
4843static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
4844 u32 msr_index, u64 data)
4845{
4846 struct msr_data msr;
4847
4848 msr.data = data;
4849 msr.index = msr_index;
4850 msr.host_initiated = false;
4851 return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
4852}
4853
4854static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
4855 u32 pmc)
4856{
4857 return kvm_pmu_check_pmc(emul_to_vcpu(ctxt), pmc);
4858}
4859
4860static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
4861 u32 pmc, u64 *pdata)
4862{
4863 return kvm_pmu_read_pmc(emul_to_vcpu(ctxt), pmc, pdata);
4864}
4865
4866static void emulator_halt(struct x86_emulate_ctxt *ctxt)
4867{
4868 emul_to_vcpu(ctxt)->arch.halt_request = 1;
4869}
4870
4871static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
4872{
4873 preempt_disable();
4874 kvm_load_guest_fpu(emul_to_vcpu(ctxt));
4875
4876
4877
4878
4879 clts();
4880}
4881
4882static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
4883{
4884 preempt_enable();
4885}
4886
4887static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
4888 struct x86_instruction_info *info,
4889 enum x86_intercept_stage stage)
4890{
4891 return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
4892}
4893
4894static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
4895 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
4896{
4897 kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx);
4898}
4899
4900static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
4901{
4902 return kvm_register_read(emul_to_vcpu(ctxt), reg);
4903}
4904
4905static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
4906{
4907 kvm_register_write(emul_to_vcpu(ctxt), reg, val);
4908}
4909
4910static const struct x86_emulate_ops emulate_ops = {
4911 .read_gpr = emulator_read_gpr,
4912 .write_gpr = emulator_write_gpr,
4913 .read_std = kvm_read_guest_virt_system,
4914 .write_std = kvm_write_guest_virt_system,
4915 .fetch = kvm_fetch_guest_virt,
4916 .read_emulated = emulator_read_emulated,
4917 .write_emulated = emulator_write_emulated,
4918 .cmpxchg_emulated = emulator_cmpxchg_emulated,
4919 .invlpg = emulator_invlpg,
4920 .pio_in_emulated = emulator_pio_in_emulated,
4921 .pio_out_emulated = emulator_pio_out_emulated,
4922 .get_segment = emulator_get_segment,
4923 .set_segment = emulator_set_segment,
4924 .get_cached_segment_base = emulator_get_cached_segment_base,
4925 .get_gdt = emulator_get_gdt,
4926 .get_idt = emulator_get_idt,
4927 .set_gdt = emulator_set_gdt,
4928 .set_idt = emulator_set_idt,
4929 .get_cr = emulator_get_cr,
4930 .set_cr = emulator_set_cr,
4931 .cpl = emulator_get_cpl,
4932 .get_dr = emulator_get_dr,
4933 .set_dr = emulator_set_dr,
4934 .set_msr = emulator_set_msr,
4935 .get_msr = emulator_get_msr,
4936 .check_pmc = emulator_check_pmc,
4937 .read_pmc = emulator_read_pmc,
4938 .halt = emulator_halt,
4939 .wbinvd = emulator_wbinvd,
4940 .fix_hypercall = emulator_fix_hypercall,
4941 .get_fpu = emulator_get_fpu,
4942 .put_fpu = emulator_put_fpu,
4943 .intercept = emulator_intercept,
4944 .get_cpuid = emulator_get_cpuid,
4945};
4946
4947static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
4948{
4949 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
4950
4951
4952
4953
4954
4955
4956
4957 if (int_shadow & mask)
4958 mask = 0;
4959 if (unlikely(int_shadow || mask)) {
4960 kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
4961 if (!mask)
4962 kvm_make_request(KVM_REQ_EVENT, vcpu);
4963 }
4964}
4965
4966static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
4967{
4968 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4969 if (ctxt->exception.vector == PF_VECTOR)
4970 return kvm_propagate_fault(vcpu, &ctxt->exception);
4971
4972 if (ctxt->exception.error_code_valid)
4973 kvm_queue_exception_e(vcpu, ctxt->exception.vector,
4974 ctxt->exception.error_code);
4975 else
4976 kvm_queue_exception(vcpu, ctxt->exception.vector);
4977 return false;
4978}
4979
4980static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
4981{
4982 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4983 int cs_db, cs_l;
4984
4985 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
4986
4987 ctxt->eflags = kvm_get_rflags(vcpu);
4988 ctxt->eip = kvm_rip_read(vcpu);
4989 ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
4990 (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
4991 (cs_l && is_long_mode(vcpu)) ? X86EMUL_MODE_PROT64 :
4992 cs_db ? X86EMUL_MODE_PROT32 :
4993 X86EMUL_MODE_PROT16;
4994 ctxt->guest_mode = is_guest_mode(vcpu);
4995
4996 init_decode_cache(ctxt);
4997 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
4998}
4999
5000int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
5001{
5002 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
5003 int ret;
5004
5005 init_emulate_ctxt(vcpu);
5006
5007 ctxt->op_bytes = 2;
5008 ctxt->ad_bytes = 2;
5009 ctxt->_eip = ctxt->eip + inc_eip;
5010 ret = emulate_int_real(ctxt, irq);
5011
5012 if (ret != X86EMUL_CONTINUE)
5013 return EMULATE_FAIL;
5014
5015 ctxt->eip = ctxt->_eip;
5016 kvm_rip_write(vcpu, ctxt->eip);
5017 kvm_set_rflags(vcpu, ctxt->eflags);
5018
5019 if (irq == NMI_VECTOR)
5020 vcpu->arch.nmi_pending = 0;
5021 else
5022 vcpu->arch.interrupt.pending = false;
5023
5024 return EMULATE_DONE;
5025}
5026EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
5027
5028static int handle_emulation_failure(struct kvm_vcpu *vcpu)
5029{
5030 int r = EMULATE_DONE;
5031
5032 ++vcpu->stat.insn_emulation_fail;
5033 trace_kvm_emulate_insn_failed(vcpu);
5034 if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
5035 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
5036 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
5037 vcpu->run->internal.ndata = 0;
5038 r = EMULATE_FAIL;
5039 }
5040 kvm_queue_exception(vcpu, UD_VECTOR);
5041
5042 return r;
5043}
5044
5045static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
5046 bool write_fault_to_shadow_pgtable,
5047 int emulation_type)
5048{
5049 gpa_t gpa = cr2;
5050 pfn_t pfn;
5051
5052 if (emulation_type & EMULTYPE_NO_REEXECUTE)
5053 return false;
5054
5055 if (!vcpu->arch.mmu.direct_map) {
5056
5057
5058
5059
5060 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
5061
5062
5063
5064
5065
5066 if (gpa == UNMAPPED_GVA)
5067 return true;
5068 }
5069
5070
5071
5072
5073
5074
5075
5076 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
5077
5078
5079
5080
5081
5082 if (is_error_noslot_pfn(pfn))
5083 return false;
5084
5085 kvm_release_pfn_clean(pfn);
5086
5087
5088 if (vcpu->arch.mmu.direct_map) {
5089 unsigned int indirect_shadow_pages;
5090
5091 spin_lock(&vcpu->kvm->mmu_lock);
5092 indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
5093 spin_unlock(&vcpu->kvm->mmu_lock);
5094
5095 if (indirect_shadow_pages)
5096 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
5097
5098 return true;
5099 }
5100
5101
5102
5103
5104
5105
5106 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
5107
5108
5109
5110
5111
5112
5113 return !write_fault_to_shadow_pgtable;
5114}
5115
5116static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
5117 unsigned long cr2, int emulation_type)
5118{
5119 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5120 unsigned long last_retry_eip, last_retry_addr, gpa = cr2;
5121
5122 last_retry_eip = vcpu->arch.last_retry_eip;
5123 last_retry_addr = vcpu->arch.last_retry_addr;
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138 vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
5139
5140 if (!(emulation_type & EMULTYPE_RETRY))
5141 return false;
5142
5143 if (x86_page_table_writing_insn(ctxt))
5144 return false;
5145
5146 if (ctxt->eip == last_retry_eip && last_retry_addr == cr2)
5147 return false;
5148
5149 vcpu->arch.last_retry_eip = ctxt->eip;
5150 vcpu->arch.last_retry_addr = cr2;
5151
5152 if (!vcpu->arch.mmu.direct_map)
5153 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
5154
5155 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
5156
5157 return true;
5158}
5159
5160static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
5161static int complete_emulated_pio(struct kvm_vcpu *vcpu);
5162
5163static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
5164 unsigned long *db)
5165{
5166 u32 dr6 = 0;
5167 int i;
5168 u32 enable, rwlen;
5169
5170 enable = dr7;
5171 rwlen = dr7 >> 16;
5172 for (i = 0; i < 4; i++, enable >>= 2, rwlen >>= 4)
5173 if ((enable & 3) && (rwlen & 15) == type && db[i] == addr)
5174 dr6 |= (1 << i);
5175 return dr6;
5176}
5177
5178static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r)
5179{
5180 struct kvm_run *kvm_run = vcpu->run;
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190 if (unlikely(rflags & X86_EFLAGS_TF)) {
5191 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
5192 kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 |
5193 DR6_RTM;
5194 kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
5195 kvm_run->debug.arch.exception = DB_VECTOR;
5196 kvm_run->exit_reason = KVM_EXIT_DEBUG;
5197 *r = EMULATE_USER_EXIT;
5198 } else {
5199 vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF;
5200
5201
5202
5203
5204
5205 vcpu->arch.dr6 &= ~15;
5206 vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
5207 kvm_queue_exception(vcpu, DB_VECTOR);
5208 }
5209 }
5210}
5211
5212static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
5213{
5214 struct kvm_run *kvm_run = vcpu->run;
5215 unsigned long eip = vcpu->arch.emulate_ctxt.eip;
5216 u32 dr6 = 0;
5217
5218 if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
5219 (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
5220 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
5221 vcpu->arch.guest_debug_dr7,
5222 vcpu->arch.eff_db);
5223
5224 if (dr6 != 0) {
5225 kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM;
5226 kvm_run->debug.arch.pc = kvm_rip_read(vcpu) +
5227 get_segment_base(vcpu, VCPU_SREG_CS);
5228
5229 kvm_run->debug.arch.exception = DB_VECTOR;
5230 kvm_run->exit_reason = KVM_EXIT_DEBUG;
5231 *r = EMULATE_USER_EXIT;
5232 return true;
5233 }
5234 }
5235
5236 if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) &&
5237 !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) {
5238 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
5239 vcpu->arch.dr7,
5240 vcpu->arch.db);
5241
5242 if (dr6 != 0) {
5243 vcpu->arch.dr6 &= ~15;
5244 vcpu->arch.dr6 |= dr6 | DR6_RTM;
5245 kvm_queue_exception(vcpu, DB_VECTOR);
5246 *r = EMULATE_DONE;
5247 return true;
5248 }
5249 }
5250
5251 return false;
5252}
5253
5254int x86_emulate_instruction(struct kvm_vcpu *vcpu,
5255 unsigned long cr2,
5256 int emulation_type,
5257 void *insn,
5258 int insn_len)
5259{
5260 int r;
5261 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
5262 bool writeback = true;
5263 bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
5264
5265
5266
5267
5268
5269 vcpu->arch.write_fault_to_shadow_pgtable = false;
5270 kvm_clear_exception_queue(vcpu);
5271
5272 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
5273 init_emulate_ctxt(vcpu);
5274
5275
5276
5277
5278
5279
5280
5281 if (kvm_vcpu_check_breakpoint(vcpu, &r))
5282 return r;
5283
5284 ctxt->interruptibility = 0;
5285 ctxt->have_exception = false;
5286 ctxt->exception.vector = -1;
5287 ctxt->perm_ok = false;
5288
5289 ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
5290
5291 r = x86_decode_insn(ctxt, insn, insn_len);
5292
5293 trace_kvm_emulate_insn_start(vcpu);
5294 ++vcpu->stat.insn_emulation;
5295 if (r != EMULATION_OK) {
5296 if (emulation_type & EMULTYPE_TRAP_UD)
5297 return EMULATE_FAIL;
5298 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
5299 emulation_type))
5300 return EMULATE_DONE;
5301 if (emulation_type & EMULTYPE_SKIP)
5302 return EMULATE_FAIL;
5303 return handle_emulation_failure(vcpu);
5304 }
5305 }
5306
5307 if (emulation_type & EMULTYPE_SKIP) {
5308 kvm_rip_write(vcpu, ctxt->_eip);
5309 if (ctxt->eflags & X86_EFLAGS_RF)
5310 kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
5311 return EMULATE_DONE;
5312 }
5313
5314 if (retry_instruction(ctxt, cr2, emulation_type))
5315 return EMULATE_DONE;
5316
5317
5318
5319 if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
5320 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
5321 emulator_invalidate_register_cache(ctxt);
5322 }
5323
5324restart:
5325 r = x86_emulate_insn(ctxt);
5326
5327 if (r == EMULATION_INTERCEPTED)
5328 return EMULATE_DONE;
5329
5330 if (r == EMULATION_FAILED) {
5331 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
5332 emulation_type))
5333 return EMULATE_DONE;
5334
5335 return handle_emulation_failure(vcpu);
5336 }
5337
5338 if (ctxt->have_exception) {
5339 r = EMULATE_DONE;
5340 if (inject_emulated_exception(vcpu))
5341 return r;
5342 } else if (vcpu->arch.pio.count) {
5343 if (!vcpu->arch.pio.in) {
5344
5345 vcpu->arch.pio.count = 0;
5346 } else {
5347 writeback = false;
5348 vcpu->arch.complete_userspace_io = complete_emulated_pio;
5349 }
5350 r = EMULATE_USER_EXIT;
5351 } else if (vcpu->mmio_needed) {
5352 if (!vcpu->mmio_is_write)
5353 writeback = false;
5354 r = EMULATE_USER_EXIT;
5355 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
5356 } else if (r == EMULATION_RESTART)
5357 goto restart;
5358 else
5359 r = EMULATE_DONE;
5360
5361 if (writeback) {
5362 unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
5363 toggle_interruptibility(vcpu, ctxt->interruptibility);
5364 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
5365 kvm_rip_write(vcpu, ctxt->eip);
5366 if (r == EMULATE_DONE)
5367 kvm_vcpu_check_singlestep(vcpu, rflags, &r);
5368 __kvm_set_rflags(vcpu, ctxt->eflags);
5369
5370
5371
5372
5373
5374
5375
5376 if (unlikely((ctxt->eflags & ~rflags) & X86_EFLAGS_IF))
5377 kvm_make_request(KVM_REQ_EVENT, vcpu);
5378 } else
5379 vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
5380
5381 return r;
5382}
5383EXPORT_SYMBOL_GPL(x86_emulate_instruction);
5384
5385int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
5386{
5387 unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
5388 int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
5389 size, port, &val, 1);
5390
5391 vcpu->arch.pio.count = 0;
5392 return ret;
5393}
5394EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
5395
5396static void tsc_bad(void *info)
5397{
5398 __this_cpu_write(cpu_tsc_khz, 0);
5399}
5400
5401static void tsc_khz_changed(void *data)
5402{
5403 struct cpufreq_freqs *freq = data;
5404 unsigned long khz = 0;
5405
5406 if (data)
5407 khz = freq->new;
5408 else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
5409 khz = cpufreq_quick_get(raw_smp_processor_id());
5410 if (!khz)
5411 khz = tsc_khz;
5412 __this_cpu_write(cpu_tsc_khz, khz);
5413}
5414
5415static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
5416 void *data)
5417{
5418 struct cpufreq_freqs *freq = data;
5419 struct kvm *kvm;
5420 struct kvm_vcpu *vcpu;
5421 int i, send_ipi = 0;
5422
5423
5424
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461
5462 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
5463 return 0;
5464 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
5465 return 0;
5466
5467 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
5468
5469 spin_lock(&kvm_lock);
5470 list_for_each_entry(kvm, &vm_list, vm_list) {
5471 kvm_for_each_vcpu(i, vcpu, kvm) {
5472 if (vcpu->cpu != freq->cpu)
5473 continue;
5474 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
5475 if (vcpu->cpu != smp_processor_id())
5476 send_ipi = 1;
5477 }
5478 }
5479 spin_unlock(&kvm_lock);
5480
5481 if (freq->old < freq->new && send_ipi) {
5482
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
5495 }
5496 return 0;
5497}
5498
5499static struct notifier_block kvmclock_cpufreq_notifier_block = {
5500 .notifier_call = kvmclock_cpufreq_notifier
5501};
5502
5503static int kvmclock_cpu_notifier(struct notifier_block *nfb,
5504 unsigned long action, void *hcpu)
5505{
5506 unsigned int cpu = (unsigned long)hcpu;
5507
5508 switch (action) {
5509 case CPU_ONLINE:
5510 case CPU_DOWN_FAILED:
5511 smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
5512 break;
5513 case CPU_DOWN_PREPARE:
5514 smp_call_function_single(cpu, tsc_bad, NULL, 1);
5515 break;
5516 }
5517 return NOTIFY_OK;
5518}
5519
5520static struct notifier_block kvmclock_cpu_notifier_block = {
5521 .notifier_call = kvmclock_cpu_notifier,
5522 .priority = -INT_MAX
5523};
5524
5525static void kvm_timer_init(void)
5526{
5527 int cpu;
5528
5529 max_tsc_khz = tsc_khz;
5530
5531 cpu_notifier_register_begin();
5532 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
5533#ifdef CONFIG_CPU_FREQ
5534 struct cpufreq_policy policy;
5535 memset(&policy, 0, sizeof(policy));
5536 cpu = get_cpu();
5537 cpufreq_get_policy(&policy, cpu);
5538 if (policy.cpuinfo.max_freq)
5539 max_tsc_khz = policy.cpuinfo.max_freq;
5540 put_cpu();
5541#endif
5542 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
5543 CPUFREQ_TRANSITION_NOTIFIER);
5544 }
5545 pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
5546 for_each_online_cpu(cpu)
5547 smp_call_function_single(cpu, tsc_khz_changed, NULL, 1);
5548
5549 __register_hotcpu_notifier(&kvmclock_cpu_notifier_block);
5550 cpu_notifier_register_done();
5551
5552}
5553
5554static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
5555
5556int kvm_is_in_guest(void)
5557{
5558 return __this_cpu_read(current_vcpu) != NULL;
5559}
5560
5561static int kvm_is_user_mode(void)
5562{
5563 int user_mode = 3;
5564
5565 if (__this_cpu_read(current_vcpu))
5566 user_mode = kvm_x86_ops->get_cpl(__this_cpu_read(current_vcpu));
5567
5568 return user_mode != 0;
5569}
5570
5571static unsigned long kvm_get_guest_ip(void)
5572{
5573 unsigned long ip = 0;
5574
5575 if (__this_cpu_read(current_vcpu))
5576 ip = kvm_rip_read(__this_cpu_read(current_vcpu));
5577
5578 return ip;
5579}
5580
5581static struct perf_guest_info_callbacks kvm_guest_cbs = {
5582 .is_in_guest = kvm_is_in_guest,
5583 .is_user_mode = kvm_is_user_mode,
5584 .get_guest_ip = kvm_get_guest_ip,
5585};
5586
5587void kvm_before_handle_nmi(struct kvm_vcpu *vcpu)
5588{
5589 __this_cpu_write(current_vcpu, vcpu);
5590}
5591EXPORT_SYMBOL_GPL(kvm_before_handle_nmi);
5592
5593void kvm_after_handle_nmi(struct kvm_vcpu *vcpu)
5594{
5595 __this_cpu_write(current_vcpu, NULL);
5596}
5597EXPORT_SYMBOL_GPL(kvm_after_handle_nmi);
5598
5599static void kvm_set_mmio_spte_mask(void)
5600{
5601 u64 mask;
5602 int maxphyaddr = boot_cpu_data.x86_phys_bits;
5603
5604
5605
5606
5607
5608
5609 mask = rsvd_bits(maxphyaddr, 51);
5610
5611
5612 mask |= 0x3ull << 62;
5613
5614
5615 mask |= 1ull;
5616
5617#ifdef CONFIG_X86_64
5618
5619
5620
5621
5622 if (maxphyaddr == 52)
5623 mask &= ~1ull;
5624#endif
5625
5626 kvm_mmu_set_mmio_spte_mask(mask);
5627}
5628
5629#ifdef CONFIG_X86_64
5630static void pvclock_gtod_update_fn(struct work_struct *work)
5631{
5632 struct kvm *kvm;
5633
5634 struct kvm_vcpu *vcpu;
5635 int i;
5636
5637 spin_lock(&kvm_lock);
5638 list_for_each_entry(kvm, &vm_list, vm_list)
5639 kvm_for_each_vcpu(i, vcpu, kvm)
5640 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
5641 atomic_set(&kvm_guest_has_master_clock, 0);
5642 spin_unlock(&kvm_lock);
5643}
5644
5645static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
5646
5647
5648
5649
5650static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
5651 void *priv)
5652{
5653 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
5654 struct timekeeper *tk = priv;
5655
5656 update_pvclock_gtod(tk);
5657
5658
5659
5660
5661 if (gtod->clock.vclock_mode != VCLOCK_TSC &&
5662 atomic_read(&kvm_guest_has_master_clock) != 0)
5663 queue_work(system_long_wq, &pvclock_gtod_work);
5664
5665 return 0;
5666}
5667
5668static struct notifier_block pvclock_gtod_notifier = {
5669 .notifier_call = pvclock_gtod_notify,
5670};
5671#endif
5672
5673int kvm_arch_init(void *opaque)
5674{
5675 int r;
5676 struct kvm_x86_ops *ops = opaque;
5677
5678 if (kvm_x86_ops) {
5679 printk(KERN_ERR "kvm: already loaded the other module\n");
5680 r = -EEXIST;
5681 goto out;
5682 }
5683
5684 if (!ops->cpu_has_kvm_support()) {
5685 printk(KERN_ERR "kvm: no hardware support\n");
5686 r = -EOPNOTSUPP;
5687 goto out;
5688 }
5689 if (ops->disabled_by_bios()) {
5690 printk(KERN_ERR "kvm: disabled by bios\n");
5691 r = -EOPNOTSUPP;
5692 goto out;
5693 }
5694
5695 r = -ENOMEM;
5696 shared_msrs = alloc_percpu(struct kvm_shared_msrs);
5697 if (!shared_msrs) {
5698 printk(KERN_ERR "kvm: failed to allocate percpu kvm_shared_msrs\n");
5699 goto out;
5700 }
5701
5702 r = kvm_mmu_module_init();
5703 if (r)
5704 goto out_free_percpu;
5705
5706 kvm_set_mmio_spte_mask();
5707
5708 kvm_x86_ops = ops;
5709 kvm_init_msr_list();
5710
5711 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
5712 PT_DIRTY_MASK, PT64_NX_MASK, 0);
5713
5714 kvm_timer_init();
5715
5716 perf_register_guest_info_callbacks(&kvm_guest_cbs);
5717
5718 if (cpu_has_xsave)
5719 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
5720
5721 kvm_lapic_init();
5722#ifdef CONFIG_X86_64
5723 pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
5724#endif
5725
5726 return 0;
5727
5728out_free_percpu:
5729 free_percpu(shared_msrs);
5730out:
5731 return r;
5732}
5733
5734void kvm_arch_exit(void)
5735{
5736 perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
5737
5738 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
5739 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
5740 CPUFREQ_TRANSITION_NOTIFIER);
5741 unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
5742#ifdef CONFIG_X86_64
5743 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
5744#endif
5745 kvm_x86_ops = NULL;
5746 kvm_mmu_module_exit();
5747 free_percpu(shared_msrs);
5748}
5749
5750int kvm_emulate_halt(struct kvm_vcpu *vcpu)
5751{
5752 ++vcpu->stat.halt_exits;
5753 if (irqchip_in_kernel(vcpu->kvm)) {
5754 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
5755 return 1;
5756 } else {
5757 vcpu->run->exit_reason = KVM_EXIT_HLT;
5758 return 0;
5759 }
5760}
5761EXPORT_SYMBOL_GPL(kvm_emulate_halt);
5762
5763int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
5764{
5765 u64 param, ingpa, outgpa, ret;
5766 uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
5767 bool fast, longmode;
5768
5769
5770
5771
5772
5773 if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
5774 kvm_queue_exception(vcpu, UD_VECTOR);
5775 return 0;
5776 }
5777
5778 longmode = is_64_bit_mode(vcpu);
5779
5780 if (!longmode) {
5781 param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
5782 (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
5783 ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
5784 (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
5785 outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
5786 (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
5787 }
5788#ifdef CONFIG_X86_64
5789 else {
5790 param = kvm_register_read(vcpu, VCPU_REGS_RCX);
5791 ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
5792 outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
5793 }
5794#endif
5795
5796 code = param & 0xffff;
5797 fast = (param >> 16) & 0x1;
5798 rep_cnt = (param >> 32) & 0xfff;
5799 rep_idx = (param >> 48) & 0xfff;
5800
5801 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
5802
5803 switch (code) {
5804 case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
5805 kvm_vcpu_on_spin(vcpu);
5806 break;
5807 default:
5808 res = HV_STATUS_INVALID_HYPERCALL_CODE;
5809 break;
5810 }
5811
5812 ret = res | (((u64)rep_done & 0xfff) << 32);
5813 if (longmode) {
5814 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
5815 } else {
5816 kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
5817 kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
5818 }
5819
5820 return 1;
5821}
5822
5823
5824
5825
5826
5827
5828static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
5829{
5830 struct kvm_lapic_irq lapic_irq;
5831
5832 lapic_irq.shorthand = 0;
5833 lapic_irq.dest_mode = 0;
5834 lapic_irq.dest_id = apicid;
5835
5836 lapic_irq.delivery_mode = APIC_DM_REMRD;
5837 kvm_irq_delivery_to_apic(kvm, 0, &lapic_irq, NULL);
5838}
5839
5840int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
5841{
5842 unsigned long nr, a0, a1, a2, a3, ret;
5843 int op_64_bit, r = 1;
5844
5845 if (kvm_hv_hypercall_enabled(vcpu->kvm))
5846 return kvm_hv_hypercall(vcpu);
5847
5848 nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
5849 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
5850 a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
5851 a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
5852 a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
5853
5854 trace_kvm_hypercall(nr, a0, a1, a2, a3);
5855
5856 op_64_bit = is_64_bit_mode(vcpu);
5857 if (!op_64_bit) {
5858 nr &= 0xFFFFFFFF;
5859 a0 &= 0xFFFFFFFF;
5860 a1 &= 0xFFFFFFFF;
5861 a2 &= 0xFFFFFFFF;
5862 a3 &= 0xFFFFFFFF;
5863 }
5864
5865 if (kvm_x86_ops->get_cpl(vcpu) != 0) {
5866 ret = -KVM_EPERM;
5867 goto out;
5868 }
5869
5870 switch (nr) {
5871 case KVM_HC_VAPIC_POLL_IRQ:
5872 ret = 0;
5873 break;
5874 case KVM_HC_KICK_CPU:
5875 kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
5876 ret = 0;
5877 break;
5878 default:
5879 ret = -KVM_ENOSYS;
5880 break;
5881 }
5882out:
5883 if (!op_64_bit)
5884 ret = (u32)ret;
5885 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
5886 ++vcpu->stat.hypercalls;
5887 return r;
5888}
5889EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
5890
5891static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
5892{
5893 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5894 char instruction[3];
5895 unsigned long rip = kvm_rip_read(vcpu);
5896
5897 kvm_x86_ops->patch_hypercall(vcpu, instruction);
5898
5899 return emulator_write_emulated(ctxt, rip, instruction, 3, NULL);
5900}
5901
5902
5903
5904
5905
5906
5907
5908static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
5909{
5910 return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
5911 vcpu->run->request_interrupt_window &&
5912 kvm_arch_interrupt_allowed(vcpu));
5913}
5914
5915static void post_kvm_run_save(struct kvm_vcpu *vcpu)
5916{
5917 struct kvm_run *kvm_run = vcpu->run;
5918
5919 kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
5920 kvm_run->cr8 = kvm_get_cr8(vcpu);
5921 kvm_run->apic_base = kvm_get_apic_base(vcpu);
5922 if (irqchip_in_kernel(vcpu->kvm))
5923 kvm_run->ready_for_interrupt_injection = 1;
5924 else
5925 kvm_run->ready_for_interrupt_injection =
5926 kvm_arch_interrupt_allowed(vcpu) &&
5927 !kvm_cpu_has_interrupt(vcpu) &&
5928 !kvm_event_needs_reinjection(vcpu);
5929}
5930
5931static void update_cr8_intercept(struct kvm_vcpu *vcpu)
5932{
5933 int max_irr, tpr;
5934
5935 if (!kvm_x86_ops->update_cr8_intercept)
5936 return;
5937
5938 if (!vcpu->arch.apic)
5939 return;
5940
5941 if (!vcpu->arch.apic->vapic_addr)
5942 max_irr = kvm_lapic_find_highest_irr(vcpu);
5943 else
5944 max_irr = -1;
5945
5946 if (max_irr != -1)
5947 max_irr >>= 4;
5948
5949 tpr = kvm_lapic_get_cr8(vcpu);
5950
5951 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
5952}
5953
5954static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
5955{
5956 int r;
5957
5958
5959 if (vcpu->arch.exception.pending) {
5960 trace_kvm_inj_exception(vcpu->arch.exception.nr,
5961 vcpu->arch.exception.has_error_code,
5962 vcpu->arch.exception.error_code);
5963
5964 if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
5965 __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
5966 X86_EFLAGS_RF);
5967
5968 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
5969 vcpu->arch.exception.has_error_code,
5970 vcpu->arch.exception.error_code,
5971 vcpu->arch.exception.reinject);
5972 return 0;
5973 }
5974
5975 if (vcpu->arch.nmi_injected) {
5976 kvm_x86_ops->set_nmi(vcpu);
5977 return 0;
5978 }
5979
5980 if (vcpu->arch.interrupt.pending) {
5981 kvm_x86_ops->set_irq(vcpu);
5982 return 0;
5983 }
5984
5985 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
5986 r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
5987 if (r != 0)
5988 return r;
5989 }
5990
5991
5992 if (vcpu->arch.nmi_pending) {
5993 if (kvm_x86_ops->nmi_allowed(vcpu)) {
5994 --vcpu->arch.nmi_pending;
5995 vcpu->arch.nmi_injected = true;
5996 kvm_x86_ops->set_nmi(vcpu);
5997 }
5998 } else if (kvm_cpu_has_injectable_intr(vcpu)) {
5999
6000
6001
6002
6003
6004
6005
6006 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
6007 r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
6008 if (r != 0)
6009 return r;
6010 }
6011 if (kvm_x86_ops->interrupt_allowed(vcpu)) {
6012 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
6013 false);
6014 kvm_x86_ops->set_irq(vcpu);
6015 }
6016 }
6017 return 0;
6018}
6019
6020static void process_nmi(struct kvm_vcpu *vcpu)
6021{
6022 unsigned limit = 2;
6023
6024
6025
6026
6027
6028
6029 if (kvm_x86_ops->get_nmi_mask(vcpu) || vcpu->arch.nmi_injected)
6030 limit = 1;
6031
6032 vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
6033 vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
6034 kvm_make_request(KVM_REQ_EVENT, vcpu);
6035}
6036
6037static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
6038{
6039 u64 eoi_exit_bitmap[4];
6040 u32 tmr[8];
6041
6042 if (!kvm_apic_hw_enabled(vcpu->arch.apic))
6043 return;
6044
6045 memset(eoi_exit_bitmap, 0, 32);
6046 memset(tmr, 0, 32);
6047
6048 kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap, tmr);
6049 kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
6050 kvm_apic_update_tmr(vcpu, tmr);
6051}
6052
6053static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
6054{
6055 ++vcpu->stat.tlb_flush;
6056 kvm_x86_ops->tlb_flush(vcpu);
6057}
6058
6059void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
6060{
6061 struct page *page = NULL;
6062
6063 if (!irqchip_in_kernel(vcpu->kvm))
6064 return;
6065
6066 if (!kvm_x86_ops->set_apic_access_page_addr)
6067 return;
6068
6069 page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
6070 kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page));
6071
6072
6073
6074
6075
6076 put_page(page);
6077}
6078EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
6079
6080void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
6081 unsigned long address)
6082{
6083
6084
6085
6086
6087 if (address == gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT))
6088 kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
6089}
6090
6091
6092
6093
6094
6095
6096static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
6097{
6098 int r;
6099 bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
6100 vcpu->run->request_interrupt_window;
6101 bool req_immediate_exit = false;
6102
6103 if (vcpu->requests) {
6104 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
6105 kvm_mmu_unload(vcpu);
6106 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
6107 __kvm_migrate_timers(vcpu);
6108 if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
6109 kvm_gen_update_masterclock(vcpu->kvm);
6110 if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu))
6111 kvm_gen_kvmclock_update(vcpu);
6112 if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
6113 r = kvm_guest_time_update(vcpu);
6114 if (unlikely(r))
6115 goto out;
6116 }
6117 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
6118 kvm_mmu_sync_roots(vcpu);
6119 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
6120 kvm_vcpu_flush_tlb(vcpu);
6121 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
6122 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
6123 r = 0;
6124 goto out;
6125 }
6126 if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
6127 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
6128 r = 0;
6129 goto out;
6130 }
6131 if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) {
6132 vcpu->fpu_active = 0;
6133 kvm_x86_ops->fpu_deactivate(vcpu);
6134 }
6135 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
6136
6137 vcpu->arch.apf.halted = true;
6138 r = 1;
6139 goto out;
6140 }
6141 if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
6142 record_steal_time(vcpu);
6143 if (kvm_check_request(KVM_REQ_NMI, vcpu))
6144 process_nmi(vcpu);
6145 if (kvm_check_request(KVM_REQ_PMU, vcpu))
6146 kvm_handle_pmu_event(vcpu);
6147 if (kvm_check_request(KVM_REQ_PMI, vcpu))
6148 kvm_deliver_pmi(vcpu);
6149 if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
6150 vcpu_scan_ioapic(vcpu);
6151 if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
6152 kvm_vcpu_reload_apic_access_page(vcpu);
6153 }
6154
6155 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
6156 kvm_apic_accept_events(vcpu);
6157 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
6158 r = 1;
6159 goto out;
6160 }
6161
6162 if (inject_pending_event(vcpu, req_int_win) != 0)
6163 req_immediate_exit = true;
6164
6165 else if (vcpu->arch.nmi_pending)
6166 kvm_x86_ops->enable_nmi_window(vcpu);
6167 else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
6168 kvm_x86_ops->enable_irq_window(vcpu);
6169
6170 if (kvm_lapic_enabled(vcpu)) {
6171
6172
6173
6174
6175 if (kvm_x86_ops->hwapic_irr_update)
6176 kvm_x86_ops->hwapic_irr_update(vcpu,
6177 kvm_lapic_find_highest_irr(vcpu));
6178 update_cr8_intercept(vcpu);
6179 kvm_lapic_sync_to_vapic(vcpu);
6180 }
6181 }
6182
6183 r = kvm_mmu_reload(vcpu);
6184 if (unlikely(r)) {
6185 goto cancel_injection;
6186 }
6187
6188 preempt_disable();
6189
6190 kvm_x86_ops->prepare_guest_switch(vcpu);
6191 if (vcpu->fpu_active)
6192 kvm_load_guest_fpu(vcpu);
6193 kvm_load_guest_xcr0(vcpu);
6194
6195 vcpu->mode = IN_GUEST_MODE;
6196
6197 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
6198
6199
6200
6201
6202 smp_mb__after_srcu_read_unlock();
6203
6204 local_irq_disable();
6205
6206 if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
6207 || need_resched() || signal_pending(current)) {
6208 vcpu->mode = OUTSIDE_GUEST_MODE;
6209 smp_wmb();
6210 local_irq_enable();
6211 preempt_enable();
6212 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
6213 r = 1;
6214 goto cancel_injection;
6215 }
6216
6217 if (req_immediate_exit)
6218 smp_send_reschedule(vcpu->cpu);
6219
6220 kvm_guest_enter();
6221
6222 if (unlikely(vcpu->arch.switch_db_regs)) {
6223 set_debugreg(0, 7);
6224 set_debugreg(vcpu->arch.eff_db[0], 0);
6225 set_debugreg(vcpu->arch.eff_db[1], 1);
6226 set_debugreg(vcpu->arch.eff_db[2], 2);
6227 set_debugreg(vcpu->arch.eff_db[3], 3);
6228 set_debugreg(vcpu->arch.dr6, 6);
6229 }
6230
6231 trace_kvm_entry(vcpu->vcpu_id);
6232 kvm_x86_ops->run(vcpu);
6233
6234
6235
6236
6237
6238
6239
6240 if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) {
6241 int i;
6242
6243 WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
6244 kvm_x86_ops->sync_dirty_debug_regs(vcpu);
6245 for (i = 0; i < KVM_NR_DB_REGS; i++)
6246 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
6247 }
6248
6249
6250
6251
6252
6253
6254
6255
6256 if (hw_breakpoint_active())
6257 hw_breakpoint_restore();
6258
6259 vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu,
6260 native_read_tsc());
6261
6262 vcpu->mode = OUTSIDE_GUEST_MODE;
6263 smp_wmb();
6264
6265
6266 kvm_x86_ops->handle_external_intr(vcpu);
6267
6268 ++vcpu->stat.exits;
6269
6270
6271
6272
6273
6274
6275
6276 barrier();
6277
6278 kvm_guest_exit();
6279
6280 preempt_enable();
6281
6282 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
6283
6284
6285
6286
6287 if (unlikely(prof_on == KVM_PROFILING)) {
6288 unsigned long rip = kvm_rip_read(vcpu);
6289 profile_hit(KVM_PROFILING, (void *)rip);
6290 }
6291
6292 if (unlikely(vcpu->arch.tsc_always_catchup))
6293 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
6294
6295 if (vcpu->arch.apic_attention)
6296 kvm_lapic_sync_from_vapic(vcpu);
6297
6298 r = kvm_x86_ops->handle_exit(vcpu);
6299 return r;
6300
6301cancel_injection:
6302 kvm_x86_ops->cancel_injection(vcpu);
6303 if (unlikely(vcpu->arch.apic_attention))
6304 kvm_lapic_sync_from_vapic(vcpu);
6305out:
6306 return r;
6307}
6308
6309
6310static int __vcpu_run(struct kvm_vcpu *vcpu)
6311{
6312 int r;
6313 struct kvm *kvm = vcpu->kvm;
6314
6315 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
6316
6317 r = 1;
6318 while (r > 0) {
6319 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
6320 !vcpu->arch.apf.halted)
6321 r = vcpu_enter_guest(vcpu);
6322 else {
6323 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
6324 kvm_vcpu_block(vcpu);
6325 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
6326 if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) {
6327 kvm_apic_accept_events(vcpu);
6328 switch(vcpu->arch.mp_state) {
6329 case KVM_MP_STATE_HALTED:
6330 vcpu->arch.pv.pv_unhalted = false;
6331 vcpu->arch.mp_state =
6332 KVM_MP_STATE_RUNNABLE;
6333 case KVM_MP_STATE_RUNNABLE:
6334 vcpu->arch.apf.halted = false;
6335 break;
6336 case KVM_MP_STATE_INIT_RECEIVED:
6337 break;
6338 default:
6339 r = -EINTR;
6340 break;
6341 }
6342 }
6343 }
6344
6345 if (r <= 0)
6346 break;
6347
6348 clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
6349 if (kvm_cpu_has_pending_timer(vcpu))
6350 kvm_inject_pending_timer_irqs(vcpu);
6351
6352 if (dm_request_for_irq_injection(vcpu)) {
6353 r = -EINTR;
6354 vcpu->run->exit_reason = KVM_EXIT_INTR;
6355 ++vcpu->stat.request_irq_exits;
6356 }
6357
6358 kvm_check_async_pf_completion(vcpu);
6359
6360 if (signal_pending(current)) {
6361 r = -EINTR;
6362 vcpu->run->exit_reason = KVM_EXIT_INTR;
6363 ++vcpu->stat.signal_exits;
6364 }
6365 if (need_resched()) {
6366 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
6367 cond_resched();
6368 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
6369 }
6370 }
6371
6372 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
6373
6374 return r;
6375}
6376
6377static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
6378{
6379 int r;
6380 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
6381 r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
6382 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
6383 if (r != EMULATE_DONE)
6384 return 0;
6385 return 1;
6386}
6387
6388static int complete_emulated_pio(struct kvm_vcpu *vcpu)
6389{
6390 BUG_ON(!vcpu->arch.pio.count);
6391
6392 return complete_emulated_io(vcpu);
6393}
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
6414{
6415 struct kvm_run *run = vcpu->run;
6416 struct kvm_mmio_fragment *frag;
6417 unsigned len;
6418
6419 BUG_ON(!vcpu->mmio_needed);
6420
6421
6422 frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
6423 len = min(8u, frag->len);
6424 if (!vcpu->mmio_is_write)
6425 memcpy(frag->data, run->mmio.data, len);
6426
6427 if (frag->len <= 8) {
6428
6429 frag++;
6430 vcpu->mmio_cur_fragment++;
6431 } else {
6432
6433 frag->data += len;
6434 frag->gpa += len;
6435 frag->len -= len;
6436 }
6437
6438 if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) {
6439 vcpu->mmio_needed = 0;
6440
6441
6442 if (vcpu->mmio_is_write)
6443 return 1;
6444 vcpu->mmio_read_completed = 1;
6445 return complete_emulated_io(vcpu);
6446 }
6447
6448 run->exit_reason = KVM_EXIT_MMIO;
6449 run->mmio.phys_addr = frag->gpa;
6450 if (vcpu->mmio_is_write)
6451 memcpy(run->mmio.data, frag->data, min(8u, frag->len));
6452 run->mmio.len = min(8u, frag->len);
6453 run->mmio.is_write = vcpu->mmio_is_write;
6454 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
6455 return 0;
6456}
6457
6458
6459int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
6460{
6461 int r;
6462 sigset_t sigsaved;
6463
6464 if (!tsk_used_math(current) && init_fpu(current))
6465 return -ENOMEM;
6466
6467 if (vcpu->sigset_active)
6468 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
6469
6470 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
6471 kvm_vcpu_block(vcpu);
6472 kvm_apic_accept_events(vcpu);
6473 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
6474 r = -EAGAIN;
6475 goto out;
6476 }
6477
6478
6479 if (!irqchip_in_kernel(vcpu->kvm)) {
6480 if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
6481 r = -EINVAL;
6482 goto out;
6483 }
6484 }
6485
6486 if (unlikely(vcpu->arch.complete_userspace_io)) {
6487 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
6488 vcpu->arch.complete_userspace_io = NULL;
6489 r = cui(vcpu);
6490 if (r <= 0)
6491 goto out;
6492 } else
6493 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
6494
6495 r = __vcpu_run(vcpu);
6496
6497out:
6498 post_kvm_run_save(vcpu);
6499 if (vcpu->sigset_active)
6500 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
6501
6502 return r;
6503}
6504
6505int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
6506{
6507 if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
6508
6509
6510
6511
6512
6513
6514
6515 emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt);
6516 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
6517 }
6518 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
6519 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
6520 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
6521 regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
6522 regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
6523 regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
6524 regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
6525 regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP);
6526#ifdef CONFIG_X86_64
6527 regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8);
6528 regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9);
6529 regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10);
6530 regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11);
6531 regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12);
6532 regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13);
6533 regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14);
6534 regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15);
6535#endif
6536
6537 regs->rip = kvm_rip_read(vcpu);
6538 regs->rflags = kvm_get_rflags(vcpu);
6539
6540 return 0;
6541}
6542
6543int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
6544{
6545 vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
6546 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
6547
6548 kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
6549 kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
6550 kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
6551 kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx);
6552 kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi);
6553 kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi);
6554 kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp);
6555 kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp);
6556#ifdef CONFIG_X86_64
6557 kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8);
6558 kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9);
6559 kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10);
6560 kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11);
6561 kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12);
6562 kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
6563 kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
6564 kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
6565#endif
6566
6567 kvm_rip_write(vcpu, regs->rip);
6568 kvm_set_rflags(vcpu, regs->rflags);
6569
6570 vcpu->arch.exception.pending = false;
6571
6572 kvm_make_request(KVM_REQ_EVENT, vcpu);
6573
6574 return 0;
6575}
6576
6577void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
6578{
6579 struct kvm_segment cs;
6580
6581 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
6582 *db = cs.db;
6583 *l = cs.l;
6584}
6585EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
6586
6587int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
6588 struct kvm_sregs *sregs)
6589{
6590 struct desc_ptr dt;
6591
6592 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
6593 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
6594 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
6595 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
6596 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
6597 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
6598
6599 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
6600 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
6601
6602 kvm_x86_ops->get_idt(vcpu, &dt);
6603 sregs->idt.limit = dt.size;
6604 sregs->idt.base = dt.address;
6605 kvm_x86_ops->get_gdt(vcpu, &dt);
6606 sregs->gdt.limit = dt.size;
6607 sregs->gdt.base = dt.address;
6608
6609 sregs->cr0 = kvm_read_cr0(vcpu);
6610 sregs->cr2 = vcpu->arch.cr2;
6611 sregs->cr3 = kvm_read_cr3(vcpu);
6612 sregs->cr4 = kvm_read_cr4(vcpu);
6613 sregs->cr8 = kvm_get_cr8(vcpu);
6614 sregs->efer = vcpu->arch.efer;
6615 sregs->apic_base = kvm_get_apic_base(vcpu);
6616
6617 memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
6618
6619 if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft)
6620 set_bit(vcpu->arch.interrupt.nr,
6621 (unsigned long *)sregs->interrupt_bitmap);
6622
6623 return 0;
6624}
6625
6626int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
6627 struct kvm_mp_state *mp_state)
6628{
6629 kvm_apic_accept_events(vcpu);
6630 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
6631 vcpu->arch.pv.pv_unhalted)
6632 mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
6633 else
6634 mp_state->mp_state = vcpu->arch.mp_state;
6635
6636 return 0;
6637}
6638
6639int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
6640 struct kvm_mp_state *mp_state)
6641{
6642 if (!kvm_vcpu_has_lapic(vcpu) &&
6643 mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
6644 return -EINVAL;
6645
6646 if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
6647 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
6648 set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
6649 } else
6650 vcpu->arch.mp_state = mp_state->mp_state;
6651 kvm_make_request(KVM_REQ_EVENT, vcpu);
6652 return 0;
6653}
6654
6655int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
6656 int reason, bool has_error_code, u32 error_code)
6657{
6658 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
6659 int ret;
6660
6661 init_emulate_ctxt(vcpu);
6662
6663 ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
6664 has_error_code, error_code);
6665
6666 if (ret)
6667 return EMULATE_FAIL;
6668
6669 kvm_rip_write(vcpu, ctxt->eip);
6670 kvm_set_rflags(vcpu, ctxt->eflags);
6671 kvm_make_request(KVM_REQ_EVENT, vcpu);
6672 return EMULATE_DONE;
6673}
6674EXPORT_SYMBOL_GPL(kvm_task_switch);
6675
6676int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
6677 struct kvm_sregs *sregs)
6678{
6679 struct msr_data apic_base_msr;
6680 int mmu_reset_needed = 0;
6681 int pending_vec, max_bits, idx;
6682 struct desc_ptr dt;
6683
6684 if (!guest_cpuid_has_xsave(vcpu) && (sregs->cr4 & X86_CR4_OSXSAVE))
6685 return -EINVAL;
6686
6687 dt.size = sregs->idt.limit;
6688 dt.address = sregs->idt.base;
6689 kvm_x86_ops->set_idt(vcpu, &dt);
6690 dt.size = sregs->gdt.limit;
6691 dt.address = sregs->gdt.base;
6692 kvm_x86_ops->set_gdt(vcpu, &dt);
6693
6694 vcpu->arch.cr2 = sregs->cr2;
6695 mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
6696 vcpu->arch.cr3 = sregs->cr3;
6697 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
6698
6699 kvm_set_cr8(vcpu, sregs->cr8);
6700
6701 mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
6702 kvm_x86_ops->set_efer(vcpu, sregs->efer);
6703 apic_base_msr.data = sregs->apic_base;
6704 apic_base_msr.host_initiated = true;
6705 kvm_set_apic_base(vcpu, &apic_base_msr);
6706
6707 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
6708 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
6709 vcpu->arch.cr0 = sregs->cr0;
6710
6711 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
6712 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
6713 if (sregs->cr4 & X86_CR4_OSXSAVE)
6714 kvm_update_cpuid(vcpu);
6715
6716 idx = srcu_read_lock(&vcpu->kvm->srcu);
6717 if (!is_long_mode(vcpu) && is_pae(vcpu)) {
6718 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
6719 mmu_reset_needed = 1;
6720 }
6721 srcu_read_unlock(&vcpu->kvm->srcu, idx);
6722
6723 if (mmu_reset_needed)
6724 kvm_mmu_reset_context(vcpu);
6725
6726 max_bits = KVM_NR_INTERRUPTS;
6727 pending_vec = find_first_bit(
6728 (const unsigned long *)sregs->interrupt_bitmap, max_bits);
6729 if (pending_vec < max_bits) {
6730 kvm_queue_interrupt(vcpu, pending_vec, false);
6731 pr_debug("Set back pending irq %d\n", pending_vec);
6732 }
6733
6734 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
6735 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
6736 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
6737 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
6738 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
6739 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
6740
6741 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
6742 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
6743
6744 update_cr8_intercept(vcpu);
6745
6746
6747 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
6748 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
6749 !is_protmode(vcpu))
6750 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
6751
6752 kvm_make_request(KVM_REQ_EVENT, vcpu);
6753
6754 return 0;
6755}
6756
6757int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
6758 struct kvm_guest_debug *dbg)
6759{
6760 unsigned long rflags;
6761 int i, r;
6762
6763 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
6764 r = -EBUSY;
6765 if (vcpu->arch.exception.pending)
6766 goto out;
6767 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
6768 kvm_queue_exception(vcpu, DB_VECTOR);
6769 else
6770 kvm_queue_exception(vcpu, BP_VECTOR);
6771 }
6772
6773
6774
6775
6776
6777 rflags = kvm_get_rflags(vcpu);
6778
6779 vcpu->guest_debug = dbg->control;
6780 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
6781 vcpu->guest_debug = 0;
6782
6783 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
6784 for (i = 0; i < KVM_NR_DB_REGS; ++i)
6785 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
6786 vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7];
6787 } else {
6788 for (i = 0; i < KVM_NR_DB_REGS; i++)
6789 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
6790 }
6791 kvm_update_dr7(vcpu);
6792
6793 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
6794 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
6795 get_segment_base(vcpu, VCPU_SREG_CS);
6796
6797
6798
6799
6800
6801 kvm_set_rflags(vcpu, rflags);
6802
6803 kvm_x86_ops->update_db_bp_intercept(vcpu);
6804
6805 r = 0;
6806
6807out:
6808
6809 return r;
6810}
6811
6812
6813
6814
6815int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
6816 struct kvm_translation *tr)
6817{
6818 unsigned long vaddr = tr->linear_address;
6819 gpa_t gpa;
6820 int idx;
6821
6822 idx = srcu_read_lock(&vcpu->kvm->srcu);
6823 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
6824 srcu_read_unlock(&vcpu->kvm->srcu, idx);
6825 tr->physical_address = gpa;
6826 tr->valid = gpa != UNMAPPED_GVA;
6827 tr->writeable = 1;
6828 tr->usermode = 0;
6829
6830 return 0;
6831}
6832
6833int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
6834{
6835 struct i387_fxsave_struct *fxsave =
6836 &vcpu->arch.guest_fpu.state->fxsave;
6837
6838 memcpy(fpu->fpr, fxsave->st_space, 128);
6839 fpu->fcw = fxsave->cwd;
6840 fpu->fsw = fxsave->swd;
6841 fpu->ftwx = fxsave->twd;
6842 fpu->last_opcode = fxsave->fop;
6843 fpu->last_ip = fxsave->rip;
6844 fpu->last_dp = fxsave->rdp;
6845 memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
6846
6847 return 0;
6848}
6849
6850int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
6851{
6852 struct i387_fxsave_struct *fxsave =
6853 &vcpu->arch.guest_fpu.state->fxsave;
6854
6855 memcpy(fxsave->st_space, fpu->fpr, 128);
6856 fxsave->cwd = fpu->fcw;
6857 fxsave->swd = fpu->fsw;
6858 fxsave->twd = fpu->ftwx;
6859 fxsave->fop = fpu->last_opcode;
6860 fxsave->rip = fpu->last_ip;
6861 fxsave->rdp = fpu->last_dp;
6862 memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
6863
6864 return 0;
6865}
6866
6867int fx_init(struct kvm_vcpu *vcpu)
6868{
6869 int err;
6870
6871 err = fpu_alloc(&vcpu->arch.guest_fpu);
6872 if (err)
6873 return err;
6874
6875 fpu_finit(&vcpu->arch.guest_fpu);
6876
6877
6878
6879
6880 vcpu->arch.xcr0 = XSTATE_FP;
6881
6882 vcpu->arch.cr0 |= X86_CR0_ET;
6883
6884 return 0;
6885}
6886EXPORT_SYMBOL_GPL(fx_init);
6887
6888static void fx_free(struct kvm_vcpu *vcpu)
6889{
6890 fpu_free(&vcpu->arch.guest_fpu);
6891}
6892
6893void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
6894{
6895 if (vcpu->guest_fpu_loaded)
6896 return;
6897
6898
6899
6900
6901
6902
6903 kvm_put_guest_xcr0(vcpu);
6904 vcpu->guest_fpu_loaded = 1;
6905 __kernel_fpu_begin();
6906 fpu_restore_checking(&vcpu->arch.guest_fpu);
6907 trace_kvm_fpu(1);
6908}
6909
6910void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
6911{
6912 kvm_put_guest_xcr0(vcpu);
6913
6914 if (!vcpu->guest_fpu_loaded)
6915 return;
6916
6917 vcpu->guest_fpu_loaded = 0;
6918 fpu_save_init(&vcpu->arch.guest_fpu);
6919 __kernel_fpu_end();
6920 ++vcpu->stat.fpu_reload;
6921 kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
6922 trace_kvm_fpu(0);
6923}
6924
6925void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
6926{
6927 kvmclock_reset(vcpu);
6928
6929 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
6930 fx_free(vcpu);
6931 kvm_x86_ops->vcpu_free(vcpu);
6932}
6933
6934struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
6935 unsigned int id)
6936{
6937 if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
6938 printk_once(KERN_WARNING
6939 "kvm: SMP vm created on host with unstable TSC; "
6940 "guest TSC will not be reliable\n");
6941 return kvm_x86_ops->vcpu_create(kvm, id);
6942}
6943
6944int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
6945{
6946 int r;
6947
6948 vcpu->arch.mtrr_state.have_fixed = 1;
6949 r = vcpu_load(vcpu);
6950 if (r)
6951 return r;
6952 kvm_vcpu_reset(vcpu);
6953 kvm_mmu_setup(vcpu);
6954 vcpu_put(vcpu);
6955
6956 return r;
6957}
6958
6959int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
6960{
6961 int r;
6962 struct msr_data msr;
6963 struct kvm *kvm = vcpu->kvm;
6964
6965 r = vcpu_load(vcpu);
6966 if (r)
6967 return r;
6968 msr.data = 0x0;
6969 msr.index = MSR_IA32_TSC;
6970 msr.host_initiated = true;
6971 kvm_write_tsc(vcpu, &msr);
6972 vcpu_put(vcpu);
6973
6974 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
6975 KVMCLOCK_SYNC_PERIOD);
6976
6977 return r;
6978}
6979
6980void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
6981{
6982 int r;
6983 vcpu->arch.apf.msr_val = 0;
6984
6985 r = vcpu_load(vcpu);
6986 BUG_ON(r);
6987 kvm_mmu_unload(vcpu);
6988 vcpu_put(vcpu);
6989
6990 fx_free(vcpu);
6991 kvm_x86_ops->vcpu_free(vcpu);
6992}
6993
6994void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
6995{
6996 atomic_set(&vcpu->arch.nmi_queued, 0);
6997 vcpu->arch.nmi_pending = 0;
6998 vcpu->arch.nmi_injected = false;
6999 kvm_clear_interrupt_queue(vcpu);
7000 kvm_clear_exception_queue(vcpu);
7001
7002 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
7003 vcpu->arch.dr6 = DR6_INIT;
7004 kvm_update_dr6(vcpu);
7005 vcpu->arch.dr7 = DR7_FIXED_1;
7006 kvm_update_dr7(vcpu);
7007
7008 kvm_make_request(KVM_REQ_EVENT, vcpu);
7009 vcpu->arch.apf.msr_val = 0;
7010 vcpu->arch.st.msr_val = 0;
7011
7012 kvmclock_reset(vcpu);
7013
7014 kvm_clear_async_pf_completion_queue(vcpu);
7015 kvm_async_pf_hash_reset(vcpu);
7016 vcpu->arch.apf.halted = false;
7017
7018 kvm_pmu_reset(vcpu);
7019
7020 memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
7021 vcpu->arch.regs_avail = ~0;
7022 vcpu->arch.regs_dirty = ~0;
7023
7024 kvm_x86_ops->vcpu_reset(vcpu);
7025}
7026
7027void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector)
7028{
7029 struct kvm_segment cs;
7030
7031 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
7032 cs.selector = vector << 8;
7033 cs.base = vector << 12;
7034 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
7035 kvm_rip_write(vcpu, 0);
7036}
7037
7038int kvm_arch_hardware_enable(void)
7039{
7040 struct kvm *kvm;
7041 struct kvm_vcpu *vcpu;
7042 int i;
7043 int ret;
7044 u64 local_tsc;
7045 u64 max_tsc = 0;
7046 bool stable, backwards_tsc = false;
7047
7048 kvm_shared_msr_cpu_online();
7049 ret = kvm_x86_ops->hardware_enable();
7050 if (ret != 0)
7051 return ret;
7052
7053 local_tsc = native_read_tsc();
7054 stable = !check_tsc_unstable();
7055 list_for_each_entry(kvm, &vm_list, vm_list) {
7056 kvm_for_each_vcpu(i, vcpu, kvm) {
7057 if (!stable && vcpu->cpu == smp_processor_id())
7058 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
7059 if (stable && vcpu->arch.last_host_tsc > local_tsc) {
7060 backwards_tsc = true;
7061 if (vcpu->arch.last_host_tsc > max_tsc)
7062 max_tsc = vcpu->arch.last_host_tsc;
7063 }
7064 }
7065 }
7066
7067
7068
7069
7070
7071
7072
7073
7074
7075
7076
7077
7078
7079
7080
7081
7082
7083
7084
7085
7086
7087
7088
7089
7090
7091
7092
7093
7094
7095
7096
7097
7098
7099
7100
7101
7102
7103
7104
7105 if (backwards_tsc) {
7106 u64 delta_cyc = max_tsc - local_tsc;
7107 backwards_tsc_observed = true;
7108 list_for_each_entry(kvm, &vm_list, vm_list) {
7109 kvm_for_each_vcpu(i, vcpu, kvm) {
7110 vcpu->arch.tsc_offset_adjustment += delta_cyc;
7111 vcpu->arch.last_host_tsc = local_tsc;
7112 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
7113 }
7114
7115
7116
7117
7118
7119
7120
7121 kvm->arch.last_tsc_nsec = 0;
7122 kvm->arch.last_tsc_write = 0;
7123 }
7124
7125 }
7126 return 0;
7127}
7128
7129void kvm_arch_hardware_disable(void)
7130{
7131 kvm_x86_ops->hardware_disable();
7132 drop_user_return_notifiers();
7133}
7134
7135int kvm_arch_hardware_setup(void)
7136{
7137 return kvm_x86_ops->hardware_setup();
7138}
7139
7140void kvm_arch_hardware_unsetup(void)
7141{
7142 kvm_x86_ops->hardware_unsetup();
7143}
7144
7145void kvm_arch_check_processor_compat(void *rtn)
7146{
7147 kvm_x86_ops->check_processor_compatibility(rtn);
7148}
7149
7150bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
7151{
7152 return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
7153}
7154
7155struct static_key kvm_no_apic_vcpu __read_mostly;
7156
7157int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
7158{
7159 struct page *page;
7160 struct kvm *kvm;
7161 int r;
7162
7163 BUG_ON(vcpu->kvm == NULL);
7164 kvm = vcpu->kvm;
7165
7166 vcpu->arch.pv.pv_unhalted = false;
7167 vcpu->arch.emulate_ctxt.ops = &emulate_ops;
7168 if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
7169 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
7170 else
7171 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
7172
7173 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
7174 if (!page) {
7175 r = -ENOMEM;
7176 goto fail;
7177 }
7178 vcpu->arch.pio_data = page_address(page);
7179
7180 kvm_set_tsc_khz(vcpu, max_tsc_khz);
7181
7182 r = kvm_mmu_create(vcpu);
7183 if (r < 0)
7184 goto fail_free_pio_data;
7185
7186 if (irqchip_in_kernel(kvm)) {
7187 r = kvm_create_lapic(vcpu);
7188 if (r < 0)
7189 goto fail_mmu_destroy;
7190 } else
7191 static_key_slow_inc(&kvm_no_apic_vcpu);
7192
7193 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
7194 GFP_KERNEL);
7195 if (!vcpu->arch.mce_banks) {
7196 r = -ENOMEM;
7197 goto fail_free_lapic;
7198 }
7199 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
7200
7201 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) {
7202 r = -ENOMEM;
7203 goto fail_free_mce_banks;
7204 }
7205
7206 r = fx_init(vcpu);
7207 if (r)
7208 goto fail_free_wbinvd_dirty_mask;
7209
7210 vcpu->arch.ia32_tsc_adjust_msr = 0x0;
7211 vcpu->arch.pv_time_enabled = false;
7212
7213 vcpu->arch.guest_supported_xcr0 = 0;
7214 vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
7215
7216 kvm_async_pf_hash_reset(vcpu);
7217 kvm_pmu_init(vcpu);
7218
7219 return 0;
7220fail_free_wbinvd_dirty_mask:
7221 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
7222fail_free_mce_banks:
7223 kfree(vcpu->arch.mce_banks);
7224fail_free_lapic:
7225 kvm_free_lapic(vcpu);
7226fail_mmu_destroy:
7227 kvm_mmu_destroy(vcpu);
7228fail_free_pio_data:
7229 free_page((unsigned long)vcpu->arch.pio_data);
7230fail:
7231 return r;
7232}
7233
7234void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
7235{
7236 int idx;
7237
7238 kvm_pmu_destroy(vcpu);
7239 kfree(vcpu->arch.mce_banks);
7240 kvm_free_lapic(vcpu);
7241 idx = srcu_read_lock(&vcpu->kvm->srcu);
7242 kvm_mmu_destroy(vcpu);
7243 srcu_read_unlock(&vcpu->kvm->srcu, idx);
7244 free_page((unsigned long)vcpu->arch.pio_data);
7245 if (!irqchip_in_kernel(vcpu->kvm))
7246 static_key_slow_dec(&kvm_no_apic_vcpu);
7247}
7248
7249void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
7250{
7251 kvm_x86_ops->sched_in(vcpu, cpu);
7252}
7253
7254int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
7255{
7256 if (type)
7257 return -EINVAL;
7258
7259 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
7260 INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
7261 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
7262 atomic_set(&kvm->arch.noncoherent_dma_count, 0);
7263
7264
7265 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
7266
7267 set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
7268 &kvm->arch.irq_sources_bitmap);
7269
7270 raw_spin_lock_init(&kvm->arch.tsc_write_lock);
7271 mutex_init(&kvm->arch.apic_map_lock);
7272 spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
7273
7274 pvclock_update_vm_gtod_copy(kvm);
7275
7276 INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
7277 INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
7278
7279 return 0;
7280}
7281
7282static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
7283{
7284 int r;
7285 r = vcpu_load(vcpu);
7286 BUG_ON(r);
7287 kvm_mmu_unload(vcpu);
7288 vcpu_put(vcpu);
7289}
7290
7291static void kvm_free_vcpus(struct kvm *kvm)
7292{
7293 unsigned int i;
7294 struct kvm_vcpu *vcpu;
7295
7296
7297
7298
7299 kvm_for_each_vcpu(i, vcpu, kvm) {
7300 kvm_clear_async_pf_completion_queue(vcpu);
7301 kvm_unload_vcpu_mmu(vcpu);
7302 }
7303 kvm_for_each_vcpu(i, vcpu, kvm)
7304 kvm_arch_vcpu_free(vcpu);
7305
7306 mutex_lock(&kvm->lock);
7307 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
7308 kvm->vcpus[i] = NULL;
7309
7310 atomic_set(&kvm->online_vcpus, 0);
7311 mutex_unlock(&kvm->lock);
7312}
7313
7314void kvm_arch_sync_events(struct kvm *kvm)
7315{
7316 cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
7317 cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
7318 kvm_free_all_assigned_devices(kvm);
7319 kvm_free_pit(kvm);
7320}
7321
7322void kvm_arch_destroy_vm(struct kvm *kvm)
7323{
7324 if (current->mm == kvm->mm) {
7325
7326
7327
7328
7329
7330 struct kvm_userspace_memory_region mem;
7331 memset(&mem, 0, sizeof(mem));
7332 mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
7333 kvm_set_memory_region(kvm, &mem);
7334
7335 mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
7336 kvm_set_memory_region(kvm, &mem);
7337
7338 mem.slot = TSS_PRIVATE_MEMSLOT;
7339 kvm_set_memory_region(kvm, &mem);
7340 }
7341 kvm_iommu_unmap_guest(kvm);
7342 kfree(kvm->arch.vpic);
7343 kfree(kvm->arch.vioapic);
7344 kvm_free_vcpus(kvm);
7345 kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
7346}
7347
7348void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
7349 struct kvm_memory_slot *dont)
7350{
7351 int i;
7352
7353 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
7354 if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
7355 kvm_kvfree(free->arch.rmap[i]);
7356 free->arch.rmap[i] = NULL;
7357 }
7358 if (i == 0)
7359 continue;
7360
7361 if (!dont || free->arch.lpage_info[i - 1] !=
7362 dont->arch.lpage_info[i - 1]) {
7363 kvm_kvfree(free->arch.lpage_info[i - 1]);
7364 free->arch.lpage_info[i - 1] = NULL;
7365 }
7366 }
7367}
7368
7369int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
7370 unsigned long npages)
7371{
7372 int i;
7373
7374 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
7375 unsigned long ugfn;
7376 int lpages;
7377 int level = i + 1;
7378
7379 lpages = gfn_to_index(slot->base_gfn + npages - 1,
7380 slot->base_gfn, level) + 1;
7381
7382 slot->arch.rmap[i] =
7383 kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap[i]));
7384 if (!slot->arch.rmap[i])
7385 goto out_free;
7386 if (i == 0)
7387 continue;
7388
7389 slot->arch.lpage_info[i - 1] = kvm_kvzalloc(lpages *
7390 sizeof(*slot->arch.lpage_info[i - 1]));
7391 if (!slot->arch.lpage_info[i - 1])
7392 goto out_free;
7393
7394 if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
7395 slot->arch.lpage_info[i - 1][0].write_count = 1;
7396 if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
7397 slot->arch.lpage_info[i - 1][lpages - 1].write_count = 1;
7398 ugfn = slot->userspace_addr >> PAGE_SHIFT;
7399
7400
7401
7402
7403
7404 if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
7405 !kvm_largepages_enabled()) {
7406 unsigned long j;
7407
7408 for (j = 0; j < lpages; ++j)
7409 slot->arch.lpage_info[i - 1][j].write_count = 1;
7410 }
7411 }
7412
7413 return 0;
7414
7415out_free:
7416 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
7417 kvm_kvfree(slot->arch.rmap[i]);
7418 slot->arch.rmap[i] = NULL;
7419 if (i == 0)
7420 continue;
7421
7422 kvm_kvfree(slot->arch.lpage_info[i - 1]);
7423 slot->arch.lpage_info[i - 1] = NULL;
7424 }
7425 return -ENOMEM;
7426}
7427
7428void kvm_arch_memslots_updated(struct kvm *kvm)
7429{
7430
7431
7432
7433
7434 kvm_mmu_invalidate_mmio_sptes(kvm);
7435}
7436
7437int kvm_arch_prepare_memory_region(struct kvm *kvm,
7438 struct kvm_memory_slot *memslot,
7439 struct kvm_userspace_memory_region *mem,
7440 enum kvm_mr_change change)
7441{
7442
7443
7444
7445
7446 if ((memslot->id >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_CREATE)) {
7447 unsigned long userspace_addr;
7448
7449
7450
7451
7452
7453 userspace_addr = vm_mmap(NULL, 0, memslot->npages * PAGE_SIZE,
7454 PROT_READ | PROT_WRITE,
7455 MAP_SHARED | MAP_ANONYMOUS, 0);
7456
7457 if (IS_ERR((void *)userspace_addr))
7458 return PTR_ERR((void *)userspace_addr);
7459
7460 memslot->userspace_addr = userspace_addr;
7461 }
7462
7463 return 0;
7464}
7465
7466void kvm_arch_commit_memory_region(struct kvm *kvm,
7467 struct kvm_userspace_memory_region *mem,
7468 const struct kvm_memory_slot *old,
7469 enum kvm_mr_change change)
7470{
7471
7472 int nr_mmu_pages = 0;
7473
7474 if ((mem->slot >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_DELETE)) {
7475 int ret;
7476
7477 ret = vm_munmap(old->userspace_addr,
7478 old->npages * PAGE_SIZE);
7479 if (ret < 0)
7480 printk(KERN_WARNING
7481 "kvm_vm_ioctl_set_memory_region: "
7482 "failed to munmap memory\n");
7483 }
7484
7485 if (!kvm->arch.n_requested_mmu_pages)
7486 nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
7487
7488 if (nr_mmu_pages)
7489 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
7490
7491
7492
7493
7494
7495
7496
7497
7498
7499 if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
7500 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
7501}
7502
7503void kvm_arch_flush_shadow_all(struct kvm *kvm)
7504{
7505 kvm_mmu_invalidate_zap_all_pages(kvm);
7506}
7507
7508void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
7509 struct kvm_memory_slot *slot)
7510{
7511 kvm_mmu_invalidate_zap_all_pages(kvm);
7512}
7513
7514int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
7515{
7516 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
7517 kvm_x86_ops->check_nested_events(vcpu, false);
7518
7519 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
7520 !vcpu->arch.apf.halted)
7521 || !list_empty_careful(&vcpu->async_pf.done)
7522 || kvm_apic_has_events(vcpu)
7523 || vcpu->arch.pv.pv_unhalted
7524 || atomic_read(&vcpu->arch.nmi_queued) ||
7525 (kvm_arch_interrupt_allowed(vcpu) &&
7526 kvm_cpu_has_interrupt(vcpu));
7527}
7528
7529int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
7530{
7531 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
7532}
7533
7534int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
7535{
7536 return kvm_x86_ops->interrupt_allowed(vcpu);
7537}
7538
7539bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
7540{
7541 unsigned long current_rip = kvm_rip_read(vcpu) +
7542 get_segment_base(vcpu, VCPU_SREG_CS);
7543
7544 return current_rip == linear_rip;
7545}
7546EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
7547
7548unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
7549{
7550 unsigned long rflags;
7551
7552 rflags = kvm_x86_ops->get_rflags(vcpu);
7553 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
7554 rflags &= ~X86_EFLAGS_TF;
7555 return rflags;
7556}
7557EXPORT_SYMBOL_GPL(kvm_get_rflags);
7558
7559static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
7560{
7561 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
7562 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
7563 rflags |= X86_EFLAGS_TF;
7564 kvm_x86_ops->set_rflags(vcpu, rflags);
7565}
7566
7567void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
7568{
7569 __kvm_set_rflags(vcpu, rflags);
7570 kvm_make_request(KVM_REQ_EVENT, vcpu);
7571}
7572EXPORT_SYMBOL_GPL(kvm_set_rflags);
7573
7574void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
7575{
7576 int r;
7577
7578 if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
7579 work->wakeup_all)
7580 return;
7581
7582 r = kvm_mmu_reload(vcpu);
7583 if (unlikely(r))
7584 return;
7585
7586 if (!vcpu->arch.mmu.direct_map &&
7587 work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu))
7588 return;
7589
7590 vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true);
7591}
7592
7593static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
7594{
7595 return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
7596}
7597
7598static inline u32 kvm_async_pf_next_probe(u32 key)
7599{
7600 return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
7601}
7602
7603static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
7604{
7605 u32 key = kvm_async_pf_hash_fn(gfn);
7606
7607 while (vcpu->arch.apf.gfns[key] != ~0)
7608 key = kvm_async_pf_next_probe(key);
7609
7610 vcpu->arch.apf.gfns[key] = gfn;
7611}
7612
7613static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
7614{
7615 int i;
7616 u32 key = kvm_async_pf_hash_fn(gfn);
7617
7618 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
7619 (vcpu->arch.apf.gfns[key] != gfn &&
7620 vcpu->arch.apf.gfns[key] != ~0); i++)
7621 key = kvm_async_pf_next_probe(key);
7622
7623 return key;
7624}
7625
7626bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
7627{
7628 return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
7629}
7630
7631static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
7632{
7633 u32 i, j, k;
7634
7635 i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
7636 while (true) {
7637 vcpu->arch.apf.gfns[i] = ~0;
7638 do {
7639 j = kvm_async_pf_next_probe(j);
7640 if (vcpu->arch.apf.gfns[j] == ~0)
7641 return;
7642 k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
7643
7644
7645
7646
7647
7648 } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
7649 vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
7650 i = j;
7651 }
7652}
7653
7654static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
7655{
7656
7657 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
7658 sizeof(val));
7659}
7660
7661void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
7662 struct kvm_async_pf *work)
7663{
7664 struct x86_exception fault;
7665
7666 trace_kvm_async_pf_not_present(work->arch.token, work->gva);
7667 kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
7668
7669 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
7670 (vcpu->arch.apf.send_user_only &&
7671 kvm_x86_ops->get_cpl(vcpu) == 0))
7672 kvm_make_request(KVM_REQ_APF_HALT, vcpu);
7673 else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
7674 fault.vector = PF_VECTOR;
7675 fault.error_code_valid = true;
7676 fault.error_code = 0;
7677 fault.nested_page_fault = false;
7678 fault.address = work->arch.token;
7679 kvm_inject_page_fault(vcpu, &fault);
7680 }
7681}
7682
7683void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
7684 struct kvm_async_pf *work)
7685{
7686 struct x86_exception fault;
7687
7688 trace_kvm_async_pf_ready(work->arch.token, work->gva);
7689 if (work->wakeup_all)
7690 work->arch.token = ~0;
7691 else
7692 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
7693
7694 if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
7695 !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
7696 fault.vector = PF_VECTOR;
7697 fault.error_code_valid = true;
7698 fault.error_code = 0;
7699 fault.nested_page_fault = false;
7700 fault.address = work->arch.token;
7701 kvm_inject_page_fault(vcpu, &fault);
7702 }
7703 vcpu->arch.apf.halted = false;
7704 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
7705}
7706
7707bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
7708{
7709 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
7710 return true;
7711 else
7712 return !kvm_event_needs_reinjection(vcpu) &&
7713 kvm_x86_ops->interrupt_allowed(vcpu);
7714}
7715
7716void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
7717{
7718 atomic_inc(&kvm->arch.noncoherent_dma_count);
7719}
7720EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
7721
7722void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
7723{
7724 atomic_dec(&kvm->arch.noncoherent_dma_count);
7725}
7726EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
7727
7728bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
7729{
7730 return atomic_read(&kvm->arch.noncoherent_dma_count);
7731}
7732EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
7733
7734EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
7735EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
7736EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
7737EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
7738EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
7739EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
7740EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
7741EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
7742EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
7743EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
7744EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
7745EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
7746EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
7747EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
7748