1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22#include <linux/kvm_host.h>
23#include "irq.h"
24#include "mmu.h"
25#include "i8254.h"
26#include "tss.h"
27#include "kvm_cache_regs.h"
28#include "x86.h"
29#include "cpuid.h"
30#include "assigned-dev.h"
31#include "pmu.h"
32#include "hyperv.h"
33
34#include <linux/clocksource.h>
35#include <linux/interrupt.h>
36#include <linux/kvm.h>
37#include <linux/fs.h>
38#include <linux/vmalloc.h>
39#include <linux/export.h>
40#include <linux/moduleparam.h>
41#include <linux/mman.h>
42#include <linux/highmem.h>
43#include <linux/iommu.h>
44#include <linux/intel-iommu.h>
45#include <linux/cpufreq.h>
46#include <linux/user-return-notifier.h>
47#include <linux/srcu.h>
48#include <linux/slab.h>
49#include <linux/perf_event.h>
50#include <linux/uaccess.h>
51#include <linux/hash.h>
52#include <linux/pci.h>
53#include <linux/timekeeper_internal.h>
54#include <linux/pvclock_gtod.h>
55#include <linux/kvm_irqfd.h>
56#include <linux/irqbypass.h>
57#include <trace/events/kvm.h>
58
59#include <asm/debugreg.h>
60#include <asm/msr.h>
61#include <asm/desc.h>
62#include <asm/mce.h>
63#include <linux/kernel_stat.h>
64#include <asm/fpu/internal.h>
65#include <asm/pvclock.h>
66#include <asm/div64.h>
67#include <asm/irq_remapping.h>
68
69#define CREATE_TRACE_POINTS
70#include "trace.h"
71
72#define MAX_IO_MSRS 256
73#define KVM_MAX_MCE_BANKS 32
74u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
75EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
76
77#define emul_to_vcpu(ctxt) \
78 container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
79
80
81
82
83
84#ifdef CONFIG_X86_64
85static
86u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
87#else
88static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
89#endif
90
91#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
92#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
93
94#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
95 KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
96
97static void update_cr8_intercept(struct kvm_vcpu *vcpu);
98static void process_nmi(struct kvm_vcpu *vcpu);
99static void enter_smm(struct kvm_vcpu *vcpu);
100static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
101
102struct kvm_x86_ops *kvm_x86_ops __read_mostly;
103EXPORT_SYMBOL_GPL(kvm_x86_ops);
104
105static bool __read_mostly ignore_msrs = 0;
106module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
107
108unsigned int min_timer_period_us = 500;
109module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
110
111static bool __read_mostly kvmclock_periodic_sync = true;
112module_param(kvmclock_periodic_sync, bool, S_IRUGO);
113
114bool __read_mostly kvm_has_tsc_control;
115EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
116u32 __read_mostly kvm_max_guest_tsc_khz;
117EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
118u8 __read_mostly kvm_tsc_scaling_ratio_frac_bits;
119EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits);
120u64 __read_mostly kvm_max_tsc_scaling_ratio;
121EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio);
122u64 __read_mostly kvm_default_tsc_scaling_ratio;
123EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
124
125
126static u32 __read_mostly tsc_tolerance_ppm = 250;
127module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
128
129
130unsigned int __read_mostly lapic_timer_advance_ns = 0;
131module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
132
133static bool __read_mostly vector_hashing = true;
134module_param(vector_hashing, bool, S_IRUGO);
135
136static bool __read_mostly backwards_tsc_observed = false;
137
138#define KVM_NR_SHARED_MSRS 16
139
140struct kvm_shared_msrs_global {
141 int nr;
142 u32 msrs[KVM_NR_SHARED_MSRS];
143};
144
145struct kvm_shared_msrs {
146 struct user_return_notifier urn;
147 bool registered;
148 struct kvm_shared_msr_values {
149 u64 host;
150 u64 curr;
151 } values[KVM_NR_SHARED_MSRS];
152};
153
154static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
155static struct kvm_shared_msrs __percpu *shared_msrs;
156
157struct kvm_stats_debugfs_item debugfs_entries[] = {
158 { "pf_fixed", VCPU_STAT(pf_fixed) },
159 { "pf_guest", VCPU_STAT(pf_guest) },
160 { "tlb_flush", VCPU_STAT(tlb_flush) },
161 { "invlpg", VCPU_STAT(invlpg) },
162 { "exits", VCPU_STAT(exits) },
163 { "io_exits", VCPU_STAT(io_exits) },
164 { "mmio_exits", VCPU_STAT(mmio_exits) },
165 { "signal_exits", VCPU_STAT(signal_exits) },
166 { "irq_window", VCPU_STAT(irq_window_exits) },
167 { "nmi_window", VCPU_STAT(nmi_window_exits) },
168 { "halt_exits", VCPU_STAT(halt_exits) },
169 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
170 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
171 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
172 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
173 { "hypercalls", VCPU_STAT(hypercalls) },
174 { "request_irq", VCPU_STAT(request_irq_exits) },
175 { "irq_exits", VCPU_STAT(irq_exits) },
176 { "host_state_reload", VCPU_STAT(host_state_reload) },
177 { "efer_reload", VCPU_STAT(efer_reload) },
178 { "fpu_reload", VCPU_STAT(fpu_reload) },
179 { "insn_emulation", VCPU_STAT(insn_emulation) },
180 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
181 { "irq_injections", VCPU_STAT(irq_injections) },
182 { "nmi_injections", VCPU_STAT(nmi_injections) },
183 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
184 { "mmu_pte_write", VM_STAT(mmu_pte_write) },
185 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
186 { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) },
187 { "mmu_flooded", VM_STAT(mmu_flooded) },
188 { "mmu_recycled", VM_STAT(mmu_recycled) },
189 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
190 { "mmu_unsync", VM_STAT(mmu_unsync) },
191 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
192 { "largepages", VM_STAT(lpages) },
193 { NULL }
194};
195
196u64 __read_mostly host_xcr0;
197
198static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
199
200static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
201{
202 int i;
203 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++)
204 vcpu->arch.apf.gfns[i] = ~0;
205}
206
207static void kvm_on_user_return(struct user_return_notifier *urn)
208{
209 unsigned slot;
210 struct kvm_shared_msrs *locals
211 = container_of(urn, struct kvm_shared_msrs, urn);
212 struct kvm_shared_msr_values *values;
213
214 for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
215 values = &locals->values[slot];
216 if (values->host != values->curr) {
217 wrmsrl(shared_msrs_global.msrs[slot], values->host);
218 values->curr = values->host;
219 }
220 }
221 locals->registered = false;
222 user_return_notifier_unregister(urn);
223}
224
225static void shared_msr_update(unsigned slot, u32 msr)
226{
227 u64 value;
228 unsigned int cpu = smp_processor_id();
229 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
230
231
232
233 if (slot >= shared_msrs_global.nr) {
234 printk(KERN_ERR "kvm: invalid MSR slot!");
235 return;
236 }
237 rdmsrl_safe(msr, &value);
238 smsr->values[slot].host = value;
239 smsr->values[slot].curr = value;
240}
241
242void kvm_define_shared_msr(unsigned slot, u32 msr)
243{
244 BUG_ON(slot >= KVM_NR_SHARED_MSRS);
245 shared_msrs_global.msrs[slot] = msr;
246 if (slot >= shared_msrs_global.nr)
247 shared_msrs_global.nr = slot + 1;
248}
249EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
250
251static void kvm_shared_msr_cpu_online(void)
252{
253 unsigned i;
254
255 for (i = 0; i < shared_msrs_global.nr; ++i)
256 shared_msr_update(i, shared_msrs_global.msrs[i]);
257}
258
259int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
260{
261 unsigned int cpu = smp_processor_id();
262 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
263 int err;
264
265 if (((value ^ smsr->values[slot].curr) & mask) == 0)
266 return 0;
267 smsr->values[slot].curr = value;
268 err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
269 if (err)
270 return 1;
271
272 if (!smsr->registered) {
273 smsr->urn.on_user_return = kvm_on_user_return;
274 user_return_notifier_register(&smsr->urn);
275 smsr->registered = true;
276 }
277 return 0;
278}
279EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
280
281static void drop_user_return_notifiers(void)
282{
283 unsigned int cpu = smp_processor_id();
284 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
285
286 if (smsr->registered)
287 kvm_on_user_return(&smsr->urn);
288}
289
290u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
291{
292 return vcpu->arch.apic_base;
293}
294EXPORT_SYMBOL_GPL(kvm_get_apic_base);
295
296int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
297{
298 u64 old_state = vcpu->arch.apic_base &
299 (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
300 u64 new_state = msr_info->data &
301 (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
302 u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) |
303 0x2ff | (guest_cpuid_has_x2apic(vcpu) ? 0 : X2APIC_ENABLE);
304
305 if (!msr_info->host_initiated &&
306 ((msr_info->data & reserved_bits) != 0 ||
307 new_state == X2APIC_ENABLE ||
308 (new_state == MSR_IA32_APICBASE_ENABLE &&
309 old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) ||
310 (new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) &&
311 old_state == 0)))
312 return 1;
313
314 kvm_lapic_set_base(vcpu, msr_info->data);
315 return 0;
316}
317EXPORT_SYMBOL_GPL(kvm_set_apic_base);
318
319asmlinkage __visible void kvm_spurious_fault(void)
320{
321
322 BUG();
323}
324EXPORT_SYMBOL_GPL(kvm_spurious_fault);
325
326#define EXCPT_BENIGN 0
327#define EXCPT_CONTRIBUTORY 1
328#define EXCPT_PF 2
329
330static int exception_class(int vector)
331{
332 switch (vector) {
333 case PF_VECTOR:
334 return EXCPT_PF;
335 case DE_VECTOR:
336 case TS_VECTOR:
337 case NP_VECTOR:
338 case SS_VECTOR:
339 case GP_VECTOR:
340 return EXCPT_CONTRIBUTORY;
341 default:
342 break;
343 }
344 return EXCPT_BENIGN;
345}
346
347#define EXCPT_FAULT 0
348#define EXCPT_TRAP 1
349#define EXCPT_ABORT 2
350#define EXCPT_INTERRUPT 3
351
352static int exception_type(int vector)
353{
354 unsigned int mask;
355
356 if (WARN_ON(vector > 31 || vector == NMI_VECTOR))
357 return EXCPT_INTERRUPT;
358
359 mask = 1 << vector;
360
361
362 if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR)))
363 return EXCPT_TRAP;
364
365 if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR)))
366 return EXCPT_ABORT;
367
368
369 return EXCPT_FAULT;
370}
371
372static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
373 unsigned nr, bool has_error, u32 error_code,
374 bool reinject)
375{
376 u32 prev_nr;
377 int class1, class2;
378
379 kvm_make_request(KVM_REQ_EVENT, vcpu);
380
381 if (!vcpu->arch.exception.pending) {
382 queue:
383 if (has_error && !is_protmode(vcpu))
384 has_error = false;
385 vcpu->arch.exception.pending = true;
386 vcpu->arch.exception.has_error_code = has_error;
387 vcpu->arch.exception.nr = nr;
388 vcpu->arch.exception.error_code = error_code;
389 vcpu->arch.exception.reinject = reinject;
390 return;
391 }
392
393
394 prev_nr = vcpu->arch.exception.nr;
395 if (prev_nr == DF_VECTOR) {
396
397 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
398 return;
399 }
400 class1 = exception_class(prev_nr);
401 class2 = exception_class(nr);
402 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
403 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
404
405 vcpu->arch.exception.pending = true;
406 vcpu->arch.exception.has_error_code = true;
407 vcpu->arch.exception.nr = DF_VECTOR;
408 vcpu->arch.exception.error_code = 0;
409 } else
410
411
412
413 goto queue;
414}
415
416void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
417{
418 kvm_multiple_exception(vcpu, nr, false, 0, false);
419}
420EXPORT_SYMBOL_GPL(kvm_queue_exception);
421
422void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
423{
424 kvm_multiple_exception(vcpu, nr, false, 0, true);
425}
426EXPORT_SYMBOL_GPL(kvm_requeue_exception);
427
428void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
429{
430 if (err)
431 kvm_inject_gp(vcpu, 0);
432 else
433 kvm_x86_ops->skip_emulated_instruction(vcpu);
434}
435EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
436
437void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
438{
439 ++vcpu->stat.pf_guest;
440 vcpu->arch.cr2 = fault->address;
441 kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
442}
443EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
444
445static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
446{
447 if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
448 vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
449 else
450 vcpu->arch.mmu.inject_page_fault(vcpu, fault);
451
452 return fault->nested_page_fault;
453}
454
455void kvm_inject_nmi(struct kvm_vcpu *vcpu)
456{
457 atomic_inc(&vcpu->arch.nmi_queued);
458 kvm_make_request(KVM_REQ_NMI, vcpu);
459}
460EXPORT_SYMBOL_GPL(kvm_inject_nmi);
461
462void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
463{
464 kvm_multiple_exception(vcpu, nr, true, error_code, false);
465}
466EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
467
468void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
469{
470 kvm_multiple_exception(vcpu, nr, true, error_code, true);
471}
472EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
473
474
475
476
477
478bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
479{
480 if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl)
481 return true;
482 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
483 return false;
484}
485EXPORT_SYMBOL_GPL(kvm_require_cpl);
486
487bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
488{
489 if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE))
490 return true;
491
492 kvm_queue_exception(vcpu, UD_VECTOR);
493 return false;
494}
495EXPORT_SYMBOL_GPL(kvm_require_dr);
496
497
498
499
500
501
502int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
503 gfn_t ngfn, void *data, int offset, int len,
504 u32 access)
505{
506 struct x86_exception exception;
507 gfn_t real_gfn;
508 gpa_t ngpa;
509
510 ngpa = gfn_to_gpa(ngfn);
511 real_gfn = mmu->translate_gpa(vcpu, ngpa, access, &exception);
512 if (real_gfn == UNMAPPED_GVA)
513 return -EFAULT;
514
515 real_gfn = gpa_to_gfn(real_gfn);
516
517 return kvm_vcpu_read_guest_page(vcpu, real_gfn, data, offset, len);
518}
519EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
520
521static int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
522 void *data, int offset, int len, u32 access)
523{
524 return kvm_read_guest_page_mmu(vcpu, vcpu->arch.walk_mmu, gfn,
525 data, offset, len, access);
526}
527
528
529
530
531int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
532{
533 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
534 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
535 int i;
536 int ret;
537 u64 pdpte[ARRAY_SIZE(mmu->pdptrs)];
538
539 ret = kvm_read_guest_page_mmu(vcpu, mmu, pdpt_gfn, pdpte,
540 offset * sizeof(u64), sizeof(pdpte),
541 PFERR_USER_MASK|PFERR_WRITE_MASK);
542 if (ret < 0) {
543 ret = 0;
544 goto out;
545 }
546 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
547 if ((pdpte[i] & PT_PRESENT_MASK) &&
548 (pdpte[i] &
549 vcpu->arch.mmu.guest_rsvd_check.rsvd_bits_mask[0][2])) {
550 ret = 0;
551 goto out;
552 }
553 }
554 ret = 1;
555
556 memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
557 __set_bit(VCPU_EXREG_PDPTR,
558 (unsigned long *)&vcpu->arch.regs_avail);
559 __set_bit(VCPU_EXREG_PDPTR,
560 (unsigned long *)&vcpu->arch.regs_dirty);
561out:
562
563 return ret;
564}
565EXPORT_SYMBOL_GPL(load_pdptrs);
566
567static bool pdptrs_changed(struct kvm_vcpu *vcpu)
568{
569 u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
570 bool changed = true;
571 int offset;
572 gfn_t gfn;
573 int r;
574
575 if (is_long_mode(vcpu) || !is_pae(vcpu))
576 return false;
577
578 if (!test_bit(VCPU_EXREG_PDPTR,
579 (unsigned long *)&vcpu->arch.regs_avail))
580 return true;
581
582 gfn = (kvm_read_cr3(vcpu) & ~31u) >> PAGE_SHIFT;
583 offset = (kvm_read_cr3(vcpu) & ~31u) & (PAGE_SIZE - 1);
584 r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
585 PFERR_USER_MASK | PFERR_WRITE_MASK);
586 if (r < 0)
587 goto out;
588 changed = memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
589out:
590
591 return changed;
592}
593
594int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
595{
596 unsigned long old_cr0 = kvm_read_cr0(vcpu);
597 unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
598
599 cr0 |= X86_CR0_ET;
600
601#ifdef CONFIG_X86_64
602 if (cr0 & 0xffffffff00000000UL)
603 return 1;
604#endif
605
606 cr0 &= ~CR0_RESERVED_BITS;
607
608 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
609 return 1;
610
611 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
612 return 1;
613
614 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
615#ifdef CONFIG_X86_64
616 if ((vcpu->arch.efer & EFER_LME)) {
617 int cs_db, cs_l;
618
619 if (!is_pae(vcpu))
620 return 1;
621 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
622 if (cs_l)
623 return 1;
624 } else
625#endif
626 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
627 kvm_read_cr3(vcpu)))
628 return 1;
629 }
630
631 if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
632 return 1;
633
634 kvm_x86_ops->set_cr0(vcpu, cr0);
635
636 if ((cr0 ^ old_cr0) & X86_CR0_PG) {
637 kvm_clear_async_pf_completion_queue(vcpu);
638 kvm_async_pf_hash_reset(vcpu);
639 }
640
641 if ((cr0 ^ old_cr0) & update_bits)
642 kvm_mmu_reset_context(vcpu);
643
644 if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
645 kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
646 !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
647 kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
648
649 return 0;
650}
651EXPORT_SYMBOL_GPL(kvm_set_cr0);
652
653void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
654{
655 (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
656}
657EXPORT_SYMBOL_GPL(kvm_lmsw);
658
659static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
660{
661 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
662 !vcpu->guest_xcr0_loaded) {
663
664 xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
665 vcpu->guest_xcr0_loaded = 1;
666 }
667}
668
669static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
670{
671 if (vcpu->guest_xcr0_loaded) {
672 if (vcpu->arch.xcr0 != host_xcr0)
673 xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
674 vcpu->guest_xcr0_loaded = 0;
675 }
676}
677
678static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
679{
680 u64 xcr0 = xcr;
681 u64 old_xcr0 = vcpu->arch.xcr0;
682 u64 valid_bits;
683
684
685 if (index != XCR_XFEATURE_ENABLED_MASK)
686 return 1;
687 if (!(xcr0 & XFEATURE_MASK_FP))
688 return 1;
689 if ((xcr0 & XFEATURE_MASK_YMM) && !(xcr0 & XFEATURE_MASK_SSE))
690 return 1;
691
692
693
694
695
696
697 valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP;
698 if (xcr0 & ~valid_bits)
699 return 1;
700
701 if ((!(xcr0 & XFEATURE_MASK_BNDREGS)) !=
702 (!(xcr0 & XFEATURE_MASK_BNDCSR)))
703 return 1;
704
705 if (xcr0 & XFEATURE_MASK_AVX512) {
706 if (!(xcr0 & XFEATURE_MASK_YMM))
707 return 1;
708 if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
709 return 1;
710 }
711 vcpu->arch.xcr0 = xcr0;
712
713 if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
714 kvm_update_cpuid(vcpu);
715 return 0;
716}
717
718int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
719{
720 if (kvm_x86_ops->get_cpl(vcpu) != 0 ||
721 __kvm_set_xcr(vcpu, index, xcr)) {
722 kvm_inject_gp(vcpu, 0);
723 return 1;
724 }
725 return 0;
726}
727EXPORT_SYMBOL_GPL(kvm_set_xcr);
728
729int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
730{
731 unsigned long old_cr4 = kvm_read_cr4(vcpu);
732 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
733 X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
734
735 if (cr4 & CR4_RESERVED_BITS)
736 return 1;
737
738 if (!guest_cpuid_has_xsave(vcpu) && (cr4 & X86_CR4_OSXSAVE))
739 return 1;
740
741 if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP))
742 return 1;
743
744 if (!guest_cpuid_has_smap(vcpu) && (cr4 & X86_CR4_SMAP))
745 return 1;
746
747 if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE))
748 return 1;
749
750 if (!guest_cpuid_has_pku(vcpu) && (cr4 & X86_CR4_PKE))
751 return 1;
752
753 if (is_long_mode(vcpu)) {
754 if (!(cr4 & X86_CR4_PAE))
755 return 1;
756 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
757 && ((cr4 ^ old_cr4) & pdptr_bits)
758 && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
759 kvm_read_cr3(vcpu)))
760 return 1;
761
762 if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
763 if (!guest_cpuid_has_pcid(vcpu))
764 return 1;
765
766
767 if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
768 return 1;
769 }
770
771 if (kvm_x86_ops->set_cr4(vcpu, cr4))
772 return 1;
773
774 if (((cr4 ^ old_cr4) & pdptr_bits) ||
775 (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
776 kvm_mmu_reset_context(vcpu);
777
778 if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
779 kvm_update_cpuid(vcpu);
780
781 return 0;
782}
783EXPORT_SYMBOL_GPL(kvm_set_cr4);
784
785int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
786{
787#ifdef CONFIG_X86_64
788 cr3 &= ~CR3_PCID_INVD;
789#endif
790
791 if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
792 kvm_mmu_sync_roots(vcpu);
793 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
794 return 0;
795 }
796
797 if (is_long_mode(vcpu)) {
798 if (cr3 & CR3_L_MODE_RESERVED_BITS)
799 return 1;
800 } else if (is_pae(vcpu) && is_paging(vcpu) &&
801 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
802 return 1;
803
804 vcpu->arch.cr3 = cr3;
805 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
806 kvm_mmu_new_cr3(vcpu);
807 return 0;
808}
809EXPORT_SYMBOL_GPL(kvm_set_cr3);
810
811int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
812{
813 if (cr8 & CR8_RESERVED_BITS)
814 return 1;
815 if (lapic_in_kernel(vcpu))
816 kvm_lapic_set_tpr(vcpu, cr8);
817 else
818 vcpu->arch.cr8 = cr8;
819 return 0;
820}
821EXPORT_SYMBOL_GPL(kvm_set_cr8);
822
823unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
824{
825 if (lapic_in_kernel(vcpu))
826 return kvm_lapic_get_cr8(vcpu);
827 else
828 return vcpu->arch.cr8;
829}
830EXPORT_SYMBOL_GPL(kvm_get_cr8);
831
832static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
833{
834 int i;
835
836 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
837 for (i = 0; i < KVM_NR_DB_REGS; i++)
838 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
839 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD;
840 }
841}
842
843static void kvm_update_dr6(struct kvm_vcpu *vcpu)
844{
845 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
846 kvm_x86_ops->set_dr6(vcpu, vcpu->arch.dr6);
847}
848
849static void kvm_update_dr7(struct kvm_vcpu *vcpu)
850{
851 unsigned long dr7;
852
853 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
854 dr7 = vcpu->arch.guest_debug_dr7;
855 else
856 dr7 = vcpu->arch.dr7;
857 kvm_x86_ops->set_dr7(vcpu, dr7);
858 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_BP_ENABLED;
859 if (dr7 & DR7_BP_EN_MASK)
860 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
861}
862
863static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
864{
865 u64 fixed = DR6_FIXED_1;
866
867 if (!guest_cpuid_has_rtm(vcpu))
868 fixed |= DR6_RTM;
869 return fixed;
870}
871
872static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
873{
874 switch (dr) {
875 case 0 ... 3:
876 vcpu->arch.db[dr] = val;
877 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
878 vcpu->arch.eff_db[dr] = val;
879 break;
880 case 4:
881
882 case 6:
883 if (val & 0xffffffff00000000ULL)
884 return -1;
885 vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu);
886 kvm_update_dr6(vcpu);
887 break;
888 case 5:
889
890 default:
891 if (val & 0xffffffff00000000ULL)
892 return -1;
893 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
894 kvm_update_dr7(vcpu);
895 break;
896 }
897
898 return 0;
899}
900
901int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
902{
903 if (__kvm_set_dr(vcpu, dr, val)) {
904 kvm_inject_gp(vcpu, 0);
905 return 1;
906 }
907 return 0;
908}
909EXPORT_SYMBOL_GPL(kvm_set_dr);
910
911int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
912{
913 switch (dr) {
914 case 0 ... 3:
915 *val = vcpu->arch.db[dr];
916 break;
917 case 4:
918
919 case 6:
920 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
921 *val = vcpu->arch.dr6;
922 else
923 *val = kvm_x86_ops->get_dr6(vcpu);
924 break;
925 case 5:
926
927 default:
928 *val = vcpu->arch.dr7;
929 break;
930 }
931 return 0;
932}
933EXPORT_SYMBOL_GPL(kvm_get_dr);
934
935bool kvm_rdpmc(struct kvm_vcpu *vcpu)
936{
937 u32 ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
938 u64 data;
939 int err;
940
941 err = kvm_pmu_rdpmc(vcpu, ecx, &data);
942 if (err)
943 return err;
944 kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)data);
945 kvm_register_write(vcpu, VCPU_REGS_RDX, data >> 32);
946 return err;
947}
948EXPORT_SYMBOL_GPL(kvm_rdpmc);
949
950
951
952
953
954
955
956
957
958
959
960static u32 msrs_to_save[] = {
961 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
962 MSR_STAR,
963#ifdef CONFIG_X86_64
964 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
965#endif
966 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
967 MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
968};
969
970static unsigned num_msrs_to_save;
971
972static u32 emulated_msrs[] = {
973 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
974 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
975 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
976 HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
977 HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
978 HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
979 HV_X64_MSR_RESET,
980 HV_X64_MSR_VP_INDEX,
981 HV_X64_MSR_VP_RUNTIME,
982 HV_X64_MSR_SCONTROL,
983 HV_X64_MSR_STIMER0_CONFIG,
984 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
985 MSR_KVM_PV_EOI_EN,
986
987 MSR_IA32_TSC_ADJUST,
988 MSR_IA32_TSCDEADLINE,
989 MSR_IA32_MISC_ENABLE,
990 MSR_IA32_MCG_STATUS,
991 MSR_IA32_MCG_CTL,
992 MSR_IA32_MCG_EXT_CTL,
993 MSR_IA32_SMBASE,
994};
995
996static unsigned num_emulated_msrs;
997
998bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
999{
1000 if (efer & efer_reserved_bits)
1001 return false;
1002
1003 if (efer & EFER_FFXSR) {
1004 struct kvm_cpuid_entry2 *feat;
1005
1006 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
1007 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT)))
1008 return false;
1009 }
1010
1011 if (efer & EFER_SVME) {
1012 struct kvm_cpuid_entry2 *feat;
1013
1014 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
1015 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM)))
1016 return false;
1017 }
1018
1019 return true;
1020}
1021EXPORT_SYMBOL_GPL(kvm_valid_efer);
1022
1023static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
1024{
1025 u64 old_efer = vcpu->arch.efer;
1026
1027 if (!kvm_valid_efer(vcpu, efer))
1028 return 1;
1029
1030 if (is_paging(vcpu)
1031 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
1032 return 1;
1033
1034 efer &= ~EFER_LMA;
1035 efer |= vcpu->arch.efer & EFER_LMA;
1036
1037 kvm_x86_ops->set_efer(vcpu, efer);
1038
1039
1040 if ((efer ^ old_efer) & EFER_NX)
1041 kvm_mmu_reset_context(vcpu);
1042
1043 return 0;
1044}
1045
1046void kvm_enable_efer_bits(u64 mask)
1047{
1048 efer_reserved_bits &= ~mask;
1049}
1050EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
1051
1052
1053
1054
1055
1056
1057int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
1058{
1059 switch (msr->index) {
1060 case MSR_FS_BASE:
1061 case MSR_GS_BASE:
1062 case MSR_KERNEL_GS_BASE:
1063 case MSR_CSTAR:
1064 case MSR_LSTAR:
1065 if (is_noncanonical_address(msr->data))
1066 return 1;
1067 break;
1068 case MSR_IA32_SYSENTER_EIP:
1069 case MSR_IA32_SYSENTER_ESP:
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082 msr->data = get_canonical(msr->data);
1083 }
1084 return kvm_x86_ops->set_msr(vcpu, msr);
1085}
1086EXPORT_SYMBOL_GPL(kvm_set_msr);
1087
1088
1089
1090
1091static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1092{
1093 struct msr_data msr;
1094 int r;
1095
1096 msr.index = index;
1097 msr.host_initiated = true;
1098 r = kvm_get_msr(vcpu, &msr);
1099 if (r)
1100 return r;
1101
1102 *data = msr.data;
1103 return 0;
1104}
1105
1106static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1107{
1108 struct msr_data msr;
1109
1110 msr.data = *data;
1111 msr.index = index;
1112 msr.host_initiated = true;
1113 return kvm_set_msr(vcpu, &msr);
1114}
1115
1116#ifdef CONFIG_X86_64
1117struct pvclock_gtod_data {
1118 seqcount_t seq;
1119
1120 struct {
1121 int vclock_mode;
1122 cycle_t cycle_last;
1123 cycle_t mask;
1124 u32 mult;
1125 u32 shift;
1126 } clock;
1127
1128 u64 boot_ns;
1129 u64 nsec_base;
1130};
1131
1132static struct pvclock_gtod_data pvclock_gtod_data;
1133
1134static void update_pvclock_gtod(struct timekeeper *tk)
1135{
1136 struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
1137 u64 boot_ns;
1138
1139 boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
1140
1141 write_seqcount_begin(&vdata->seq);
1142
1143
1144 vdata->clock.vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
1145 vdata->clock.cycle_last = tk->tkr_mono.cycle_last;
1146 vdata->clock.mask = tk->tkr_mono.mask;
1147 vdata->clock.mult = tk->tkr_mono.mult;
1148 vdata->clock.shift = tk->tkr_mono.shift;
1149
1150 vdata->boot_ns = boot_ns;
1151 vdata->nsec_base = tk->tkr_mono.xtime_nsec;
1152
1153 write_seqcount_end(&vdata->seq);
1154}
1155#endif
1156
1157void kvm_set_pending_timer(struct kvm_vcpu *vcpu)
1158{
1159
1160
1161
1162
1163
1164 kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
1165}
1166
1167static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
1168{
1169 int version;
1170 int r;
1171 struct pvclock_wall_clock wc;
1172 struct timespec64 boot;
1173
1174 if (!wall_clock)
1175 return;
1176
1177 r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
1178 if (r)
1179 return;
1180
1181 if (version & 1)
1182 ++version;
1183
1184 ++version;
1185
1186 if (kvm_write_guest(kvm, wall_clock, &version, sizeof(version)))
1187 return;
1188
1189
1190
1191
1192
1193
1194
1195 getboottime64(&boot);
1196
1197 if (kvm->arch.kvmclock_offset) {
1198 struct timespec64 ts = ns_to_timespec64(kvm->arch.kvmclock_offset);
1199 boot = timespec64_sub(boot, ts);
1200 }
1201 wc.sec = (u32)boot.tv_sec;
1202 wc.nsec = boot.tv_nsec;
1203 wc.version = version;
1204
1205 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
1206
1207 version++;
1208 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
1209}
1210
1211static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
1212{
1213 do_shl32_div32(dividend, divisor);
1214 return dividend;
1215}
1216
1217static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz,
1218 s8 *pshift, u32 *pmultiplier)
1219{
1220 uint64_t scaled64;
1221 int32_t shift = 0;
1222 uint64_t tps64;
1223 uint32_t tps32;
1224
1225 tps64 = base_hz;
1226 scaled64 = scaled_hz;
1227 while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
1228 tps64 >>= 1;
1229 shift--;
1230 }
1231
1232 tps32 = (uint32_t)tps64;
1233 while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) {
1234 if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000)
1235 scaled64 >>= 1;
1236 else
1237 tps32 <<= 1;
1238 shift++;
1239 }
1240
1241 *pshift = shift;
1242 *pmultiplier = div_frac(scaled64, tps32);
1243
1244 pr_debug("%s: base_hz %llu => %llu, shift %d, mul %u\n",
1245 __func__, base_hz, scaled_hz, shift, *pmultiplier);
1246}
1247
1248#ifdef CONFIG_X86_64
1249static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
1250#endif
1251
1252static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
1253static unsigned long max_tsc_khz;
1254
1255static u32 adjust_tsc_khz(u32 khz, s32 ppm)
1256{
1257 u64 v = (u64)khz * (1000000 + ppm);
1258 do_div(v, 1000000);
1259 return v;
1260}
1261
1262static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
1263{
1264 u64 ratio;
1265
1266
1267 if (!scale) {
1268 vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
1269 return 0;
1270 }
1271
1272
1273 if (!kvm_has_tsc_control) {
1274 if (user_tsc_khz > tsc_khz) {
1275 vcpu->arch.tsc_catchup = 1;
1276 vcpu->arch.tsc_always_catchup = 1;
1277 return 0;
1278 } else {
1279 WARN(1, "user requested TSC rate below hardware speed\n");
1280 return -1;
1281 }
1282 }
1283
1284
1285 ratio = mul_u64_u32_div(1ULL << kvm_tsc_scaling_ratio_frac_bits,
1286 user_tsc_khz, tsc_khz);
1287
1288 if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) {
1289 WARN_ONCE(1, "Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
1290 user_tsc_khz);
1291 return -1;
1292 }
1293
1294 vcpu->arch.tsc_scaling_ratio = ratio;
1295 return 0;
1296}
1297
1298static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
1299{
1300 u32 thresh_lo, thresh_hi;
1301 int use_scaling = 0;
1302
1303
1304 if (user_tsc_khz == 0) {
1305
1306 vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
1307 return -1;
1308 }
1309
1310
1311 kvm_get_time_scale(user_tsc_khz * 1000LL, NSEC_PER_SEC,
1312 &vcpu->arch.virtual_tsc_shift,
1313 &vcpu->arch.virtual_tsc_mult);
1314 vcpu->arch.virtual_tsc_khz = user_tsc_khz;
1315
1316
1317
1318
1319
1320
1321
1322 thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
1323 thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
1324 if (user_tsc_khz < thresh_lo || user_tsc_khz > thresh_hi) {
1325 pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", user_tsc_khz, thresh_lo, thresh_hi);
1326 use_scaling = 1;
1327 }
1328 return set_tsc_khz(vcpu, user_tsc_khz, use_scaling);
1329}
1330
1331static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
1332{
1333 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec,
1334 vcpu->arch.virtual_tsc_mult,
1335 vcpu->arch.virtual_tsc_shift);
1336 tsc += vcpu->arch.this_tsc_write;
1337 return tsc;
1338}
1339
1340static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
1341{
1342#ifdef CONFIG_X86_64
1343 bool vcpus_matched;
1344 struct kvm_arch *ka = &vcpu->kvm->arch;
1345 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1346
1347 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
1348 atomic_read(&vcpu->kvm->online_vcpus));
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358 if (ka->use_master_clock ||
1359 (gtod->clock.vclock_mode == VCLOCK_TSC && vcpus_matched))
1360 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
1361
1362 trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
1363 atomic_read(&vcpu->kvm->online_vcpus),
1364 ka->use_master_clock, gtod->clock.vclock_mode);
1365#endif
1366}
1367
1368static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
1369{
1370 u64 curr_offset = kvm_x86_ops->read_tsc_offset(vcpu);
1371 vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
1372}
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384static inline u64 __scale_tsc(u64 ratio, u64 tsc)
1385{
1386 return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits);
1387}
1388
1389u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
1390{
1391 u64 _tsc = tsc;
1392 u64 ratio = vcpu->arch.tsc_scaling_ratio;
1393
1394 if (ratio != kvm_default_tsc_scaling_ratio)
1395 _tsc = __scale_tsc(ratio, tsc);
1396
1397 return _tsc;
1398}
1399EXPORT_SYMBOL_GPL(kvm_scale_tsc);
1400
1401static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
1402{
1403 u64 tsc;
1404
1405 tsc = kvm_scale_tsc(vcpu, rdtsc());
1406
1407 return target_tsc - tsc;
1408}
1409
1410u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
1411{
1412 return kvm_x86_ops->read_l1_tsc(vcpu, kvm_scale_tsc(vcpu, host_tsc));
1413}
1414EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
1415
1416void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1417{
1418 struct kvm *kvm = vcpu->kvm;
1419 u64 offset, ns, elapsed;
1420 unsigned long flags;
1421 s64 usdiff;
1422 bool matched;
1423 bool already_matched;
1424 u64 data = msr->data;
1425
1426 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
1427 offset = kvm_compute_tsc_offset(vcpu, data);
1428 ns = get_kernel_ns();
1429 elapsed = ns - kvm->arch.last_tsc_nsec;
1430
1431 if (vcpu->arch.virtual_tsc_khz) {
1432 int faulted = 0;
1433
1434
1435 usdiff = data - kvm->arch.last_tsc_write;
1436#ifdef CONFIG_X86_64
1437 usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz;
1438#else
1439
1440 asm("1: idivl %[divisor]\n"
1441 "2: xor %%edx, %%edx\n"
1442 " movl $0, %[faulted]\n"
1443 "3:\n"
1444 ".section .fixup,\"ax\"\n"
1445 "4: movl $1, %[faulted]\n"
1446 " jmp 3b\n"
1447 ".previous\n"
1448
1449 _ASM_EXTABLE(1b, 4b)
1450
1451 : "=A"(usdiff), [faulted] "=r" (faulted)
1452 : "A"(usdiff * 1000), [divisor] "rm"(vcpu->arch.virtual_tsc_khz));
1453
1454#endif
1455 do_div(elapsed, 1000);
1456 usdiff -= elapsed;
1457 if (usdiff < 0)
1458 usdiff = -usdiff;
1459
1460
1461 if (faulted)
1462 usdiff = USEC_PER_SEC;
1463 } else
1464 usdiff = USEC_PER_SEC;
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476 if (usdiff < USEC_PER_SEC &&
1477 vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
1478 if (!check_tsc_unstable()) {
1479 offset = kvm->arch.cur_tsc_offset;
1480 pr_debug("kvm: matched tsc offset for %llu\n", data);
1481 } else {
1482 u64 delta = nsec_to_cycles(vcpu, elapsed);
1483 data += delta;
1484 offset = kvm_compute_tsc_offset(vcpu, data);
1485 pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
1486 }
1487 matched = true;
1488 already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation);
1489 } else {
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499 kvm->arch.cur_tsc_generation++;
1500 kvm->arch.cur_tsc_nsec = ns;
1501 kvm->arch.cur_tsc_write = data;
1502 kvm->arch.cur_tsc_offset = offset;
1503 matched = false;
1504 pr_debug("kvm: new tsc generation %llu, clock %llu\n",
1505 kvm->arch.cur_tsc_generation, data);
1506 }
1507
1508
1509
1510
1511
1512 kvm->arch.last_tsc_nsec = ns;
1513 kvm->arch.last_tsc_write = data;
1514 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
1515
1516 vcpu->arch.last_guest_tsc = data;
1517
1518
1519 vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation;
1520 vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
1521 vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
1522
1523 if (guest_cpuid_has_tsc_adjust(vcpu) && !msr->host_initiated)
1524 update_ia32_tsc_adjust_msr(vcpu, offset);
1525 kvm_x86_ops->write_tsc_offset(vcpu, offset);
1526 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
1527
1528 spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
1529 if (!matched) {
1530 kvm->arch.nr_vcpus_matched_tsc = 0;
1531 } else if (!already_matched) {
1532 kvm->arch.nr_vcpus_matched_tsc++;
1533 }
1534
1535 kvm_track_tsc_matching(vcpu);
1536 spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
1537}
1538
1539EXPORT_SYMBOL_GPL(kvm_write_tsc);
1540
1541static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
1542 s64 adjustment)
1543{
1544 kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment);
1545}
1546
1547static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
1548{
1549 if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
1550 WARN_ON(adjustment < 0);
1551 adjustment = kvm_scale_tsc(vcpu, (u64) adjustment);
1552 kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment);
1553}
1554
1555#ifdef CONFIG_X86_64
1556
1557static cycle_t read_tsc(void)
1558{
1559 cycle_t ret = (cycle_t)rdtsc_ordered();
1560 u64 last = pvclock_gtod_data.clock.cycle_last;
1561
1562 if (likely(ret >= last))
1563 return ret;
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573 asm volatile ("");
1574 return last;
1575}
1576
1577static inline u64 vgettsc(cycle_t *cycle_now)
1578{
1579 long v;
1580 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1581
1582 *cycle_now = read_tsc();
1583
1584 v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask;
1585 return v * gtod->clock.mult;
1586}
1587
1588static int do_monotonic_boot(s64 *t, cycle_t *cycle_now)
1589{
1590 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1591 unsigned long seq;
1592 int mode;
1593 u64 ns;
1594
1595 do {
1596 seq = read_seqcount_begin(>od->seq);
1597 mode = gtod->clock.vclock_mode;
1598 ns = gtod->nsec_base;
1599 ns += vgettsc(cycle_now);
1600 ns >>= gtod->clock.shift;
1601 ns += gtod->boot_ns;
1602 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
1603 *t = ns;
1604
1605 return mode;
1606}
1607
1608
1609static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now)
1610{
1611
1612 if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
1613 return false;
1614
1615 return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC;
1616}
1617#endif
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
1661{
1662#ifdef CONFIG_X86_64
1663 struct kvm_arch *ka = &kvm->arch;
1664 int vclock_mode;
1665 bool host_tsc_clocksource, vcpus_matched;
1666
1667 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
1668 atomic_read(&kvm->online_vcpus));
1669
1670
1671
1672
1673
1674 host_tsc_clocksource = kvm_get_time_and_clockread(
1675 &ka->master_kernel_ns,
1676 &ka->master_cycle_now);
1677
1678 ka->use_master_clock = host_tsc_clocksource && vcpus_matched
1679 && !backwards_tsc_observed
1680 && !ka->boot_vcpu_runs_old_kvmclock;
1681
1682 if (ka->use_master_clock)
1683 atomic_set(&kvm_guest_has_master_clock, 1);
1684
1685 vclock_mode = pvclock_gtod_data.clock.vclock_mode;
1686 trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
1687 vcpus_matched);
1688#endif
1689}
1690
1691void kvm_make_mclock_inprogress_request(struct kvm *kvm)
1692{
1693 kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
1694}
1695
1696static void kvm_gen_update_masterclock(struct kvm *kvm)
1697{
1698#ifdef CONFIG_X86_64
1699 int i;
1700 struct kvm_vcpu *vcpu;
1701 struct kvm_arch *ka = &kvm->arch;
1702
1703 spin_lock(&ka->pvclock_gtod_sync_lock);
1704 kvm_make_mclock_inprogress_request(kvm);
1705
1706 pvclock_update_vm_gtod_copy(kvm);
1707
1708 kvm_for_each_vcpu(i, vcpu, kvm)
1709 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
1710
1711
1712 kvm_for_each_vcpu(i, vcpu, kvm)
1713 clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests);
1714
1715 spin_unlock(&ka->pvclock_gtod_sync_lock);
1716#endif
1717}
1718
1719static int kvm_guest_time_update(struct kvm_vcpu *v)
1720{
1721 unsigned long flags, tgt_tsc_khz;
1722 struct kvm_vcpu_arch *vcpu = &v->arch;
1723 struct kvm_arch *ka = &v->kvm->arch;
1724 s64 kernel_ns;
1725 u64 tsc_timestamp, host_tsc;
1726 struct pvclock_vcpu_time_info guest_hv_clock;
1727 u8 pvclock_flags;
1728 bool use_master_clock;
1729
1730 kernel_ns = 0;
1731 host_tsc = 0;
1732
1733
1734
1735
1736
1737 spin_lock(&ka->pvclock_gtod_sync_lock);
1738 use_master_clock = ka->use_master_clock;
1739 if (use_master_clock) {
1740 host_tsc = ka->master_cycle_now;
1741 kernel_ns = ka->master_kernel_ns;
1742 }
1743 spin_unlock(&ka->pvclock_gtod_sync_lock);
1744
1745
1746 local_irq_save(flags);
1747 tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz);
1748 if (unlikely(tgt_tsc_khz == 0)) {
1749 local_irq_restore(flags);
1750 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
1751 return 1;
1752 }
1753 if (!use_master_clock) {
1754 host_tsc = rdtsc();
1755 kernel_ns = get_kernel_ns();
1756 }
1757
1758 tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770 if (vcpu->tsc_catchup) {
1771 u64 tsc = compute_guest_tsc(v, kernel_ns);
1772 if (tsc > tsc_timestamp) {
1773 adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
1774 tsc_timestamp = tsc;
1775 }
1776 }
1777
1778 local_irq_restore(flags);
1779
1780 if (!vcpu->pv_time_enabled)
1781 return 0;
1782
1783 if (kvm_has_tsc_control)
1784 tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
1785
1786 if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
1787 kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
1788 &vcpu->hv_clock.tsc_shift,
1789 &vcpu->hv_clock.tsc_to_system_mul);
1790 vcpu->hw_tsc_khz = tgt_tsc_khz;
1791 }
1792
1793
1794 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
1795 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
1796 vcpu->last_guest_tsc = tsc_timestamp;
1797
1798 if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
1799 &guest_hv_clock, sizeof(guest_hv_clock))))
1800 return 0;
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816 BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
1817
1818 vcpu->hv_clock.version = guest_hv_clock.version + 1;
1819 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
1820 &vcpu->hv_clock,
1821 sizeof(vcpu->hv_clock.version));
1822
1823 smp_wmb();
1824
1825
1826 pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
1827
1828 if (vcpu->pvclock_set_guest_stopped_request) {
1829 pvclock_flags |= PVCLOCK_GUEST_STOPPED;
1830 vcpu->pvclock_set_guest_stopped_request = false;
1831 }
1832
1833
1834 if (use_master_clock)
1835 pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
1836
1837 vcpu->hv_clock.flags = pvclock_flags;
1838
1839 trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
1840
1841 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
1842 &vcpu->hv_clock,
1843 sizeof(vcpu->hv_clock));
1844
1845 smp_wmb();
1846
1847 vcpu->hv_clock.version++;
1848 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
1849 &vcpu->hv_clock,
1850 sizeof(vcpu->hv_clock.version));
1851 return 0;
1852}
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868#define KVMCLOCK_UPDATE_DELAY msecs_to_jiffies(100)
1869
1870static void kvmclock_update_fn(struct work_struct *work)
1871{
1872 int i;
1873 struct delayed_work *dwork = to_delayed_work(work);
1874 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
1875 kvmclock_update_work);
1876 struct kvm *kvm = container_of(ka, struct kvm, arch);
1877 struct kvm_vcpu *vcpu;
1878
1879 kvm_for_each_vcpu(i, vcpu, kvm) {
1880 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
1881 kvm_vcpu_kick(vcpu);
1882 }
1883}
1884
1885static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
1886{
1887 struct kvm *kvm = v->kvm;
1888
1889 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
1890 schedule_delayed_work(&kvm->arch.kvmclock_update_work,
1891 KVMCLOCK_UPDATE_DELAY);
1892}
1893
1894#define KVMCLOCK_SYNC_PERIOD (300 * HZ)
1895
1896static void kvmclock_sync_fn(struct work_struct *work)
1897{
1898 struct delayed_work *dwork = to_delayed_work(work);
1899 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
1900 kvmclock_sync_work);
1901 struct kvm *kvm = container_of(ka, struct kvm, arch);
1902
1903 if (!kvmclock_periodic_sync)
1904 return;
1905
1906 schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0);
1907 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
1908 KVMCLOCK_SYNC_PERIOD);
1909}
1910
1911static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1912{
1913 u64 mcg_cap = vcpu->arch.mcg_cap;
1914 unsigned bank_num = mcg_cap & 0xff;
1915
1916 switch (msr) {
1917 case MSR_IA32_MCG_STATUS:
1918 vcpu->arch.mcg_status = data;
1919 break;
1920 case MSR_IA32_MCG_CTL:
1921 if (!(mcg_cap & MCG_CTL_P))
1922 return 1;
1923 if (data != 0 && data != ~(u64)0)
1924 return -1;
1925 vcpu->arch.mcg_ctl = data;
1926 break;
1927 default:
1928 if (msr >= MSR_IA32_MC0_CTL &&
1929 msr < MSR_IA32_MCx_CTL(bank_num)) {
1930 u32 offset = msr - MSR_IA32_MC0_CTL;
1931
1932
1933
1934
1935
1936 if ((offset & 0x3) == 0 &&
1937 data != 0 && (data | (1 << 10)) != ~(u64)0)
1938 return -1;
1939 vcpu->arch.mce_banks[offset] = data;
1940 break;
1941 }
1942 return 1;
1943 }
1944 return 0;
1945}
1946
1947static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
1948{
1949 struct kvm *kvm = vcpu->kvm;
1950 int lm = is_long_mode(vcpu);
1951 u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
1952 : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
1953 u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
1954 : kvm->arch.xen_hvm_config.blob_size_32;
1955 u32 page_num = data & ~PAGE_MASK;
1956 u64 page_addr = data & PAGE_MASK;
1957 u8 *page;
1958 int r;
1959
1960 r = -E2BIG;
1961 if (page_num >= blob_size)
1962 goto out;
1963 r = -ENOMEM;
1964 page = memdup_user(blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE);
1965 if (IS_ERR(page)) {
1966 r = PTR_ERR(page);
1967 goto out;
1968 }
1969 if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE))
1970 goto out_free;
1971 r = 0;
1972out_free:
1973 kfree(page);
1974out:
1975 return r;
1976}
1977
1978static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
1979{
1980 gpa_t gpa = data & ~0x3f;
1981
1982
1983 if (data & 0x3c)
1984 return 1;
1985
1986 vcpu->arch.apf.msr_val = data;
1987
1988 if (!(data & KVM_ASYNC_PF_ENABLED)) {
1989 kvm_clear_async_pf_completion_queue(vcpu);
1990 kvm_async_pf_hash_reset(vcpu);
1991 return 0;
1992 }
1993
1994 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
1995 sizeof(u32)))
1996 return 1;
1997
1998 vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
1999 kvm_async_pf_wakeup_all(vcpu);
2000 return 0;
2001}
2002
2003static void kvmclock_reset(struct kvm_vcpu *vcpu)
2004{
2005 vcpu->arch.pv_time_enabled = false;
2006}
2007
2008static void record_steal_time(struct kvm_vcpu *vcpu)
2009{
2010 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
2011 return;
2012
2013 if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
2014 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
2015 return;
2016
2017 if (vcpu->arch.st.steal.version & 1)
2018 vcpu->arch.st.steal.version += 1;
2019
2020 vcpu->arch.st.steal.version += 1;
2021
2022 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
2023 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
2024
2025 smp_wmb();
2026
2027 vcpu->arch.st.steal.steal += current->sched_info.run_delay -
2028 vcpu->arch.st.last_steal;
2029 vcpu->arch.st.last_steal = current->sched_info.run_delay;
2030
2031 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
2032 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
2033
2034 smp_wmb();
2035
2036 vcpu->arch.st.steal.version += 1;
2037
2038 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
2039 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
2040}
2041
2042int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2043{
2044 bool pr = false;
2045 u32 msr = msr_info->index;
2046 u64 data = msr_info->data;
2047
2048 switch (msr) {
2049 case MSR_AMD64_NB_CFG:
2050 case MSR_IA32_UCODE_REV:
2051 case MSR_IA32_UCODE_WRITE:
2052 case MSR_VM_HSAVE_PA:
2053 case MSR_AMD64_PATCH_LOADER:
2054 case MSR_AMD64_BU_CFG2:
2055 break;
2056
2057 case MSR_EFER:
2058 return set_efer(vcpu, data);
2059 case MSR_K7_HWCR:
2060 data &= ~(u64)0x40;
2061 data &= ~(u64)0x100;
2062 data &= ~(u64)0x8;
2063 data &= ~(u64)0x40000;
2064 if (data != 0) {
2065 vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
2066 data);
2067 return 1;
2068 }
2069 break;
2070 case MSR_FAM10H_MMIO_CONF_BASE:
2071 if (data != 0) {
2072 vcpu_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
2073 "0x%llx\n", data);
2074 return 1;
2075 }
2076 break;
2077 case MSR_IA32_DEBUGCTLMSR:
2078 if (!data) {
2079
2080 break;
2081 } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
2082
2083
2084 return 1;
2085 }
2086 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
2087 __func__, data);
2088 break;
2089 case 0x200 ... 0x2ff:
2090 return kvm_mtrr_set_msr(vcpu, msr, data);
2091 case MSR_IA32_APICBASE:
2092 return kvm_set_apic_base(vcpu, msr_info);
2093 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
2094 return kvm_x2apic_msr_write(vcpu, msr, data);
2095 case MSR_IA32_TSCDEADLINE:
2096 kvm_set_lapic_tscdeadline_msr(vcpu, data);
2097 break;
2098 case MSR_IA32_TSC_ADJUST:
2099 if (guest_cpuid_has_tsc_adjust(vcpu)) {
2100 if (!msr_info->host_initiated) {
2101 s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
2102 adjust_tsc_offset_guest(vcpu, adj);
2103 }
2104 vcpu->arch.ia32_tsc_adjust_msr = data;
2105 }
2106 break;
2107 case MSR_IA32_MISC_ENABLE:
2108 vcpu->arch.ia32_misc_enable_msr = data;
2109 break;
2110 case MSR_IA32_SMBASE:
2111 if (!msr_info->host_initiated)
2112 return 1;
2113 vcpu->arch.smbase = data;
2114 break;
2115 case MSR_KVM_WALL_CLOCK_NEW:
2116 case MSR_KVM_WALL_CLOCK:
2117 vcpu->kvm->arch.wall_clock = data;
2118 kvm_write_wall_clock(vcpu->kvm, data);
2119 break;
2120 case MSR_KVM_SYSTEM_TIME_NEW:
2121 case MSR_KVM_SYSTEM_TIME: {
2122 u64 gpa_offset;
2123 struct kvm_arch *ka = &vcpu->kvm->arch;
2124
2125 kvmclock_reset(vcpu);
2126
2127 if (vcpu->vcpu_id == 0 && !msr_info->host_initiated) {
2128 bool tmp = (msr == MSR_KVM_SYSTEM_TIME);
2129
2130 if (ka->boot_vcpu_runs_old_kvmclock != tmp)
2131 set_bit(KVM_REQ_MASTERCLOCK_UPDATE,
2132 &vcpu->requests);
2133
2134 ka->boot_vcpu_runs_old_kvmclock = tmp;
2135 }
2136
2137 vcpu->arch.time = data;
2138 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
2139
2140
2141 if (!(data & 1))
2142 break;
2143
2144 gpa_offset = data & ~(PAGE_MASK | 1);
2145
2146 if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
2147 &vcpu->arch.pv_time, data & ~1ULL,
2148 sizeof(struct pvclock_vcpu_time_info)))
2149 vcpu->arch.pv_time_enabled = false;
2150 else
2151 vcpu->arch.pv_time_enabled = true;
2152
2153 break;
2154 }
2155 case MSR_KVM_ASYNC_PF_EN:
2156 if (kvm_pv_enable_async_pf(vcpu, data))
2157 return 1;
2158 break;
2159 case MSR_KVM_STEAL_TIME:
2160
2161 if (unlikely(!sched_info_on()))
2162 return 1;
2163
2164 if (data & KVM_STEAL_RESERVED_MASK)
2165 return 1;
2166
2167 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
2168 data & KVM_STEAL_VALID_BITS,
2169 sizeof(struct kvm_steal_time)))
2170 return 1;
2171
2172 vcpu->arch.st.msr_val = data;
2173
2174 if (!(data & KVM_MSR_ENABLED))
2175 break;
2176
2177 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
2178
2179 break;
2180 case MSR_KVM_PV_EOI_EN:
2181 if (kvm_lapic_enable_pv_eoi(vcpu, data))
2182 return 1;
2183 break;
2184
2185 case MSR_IA32_MCG_CTL:
2186 case MSR_IA32_MCG_STATUS:
2187 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
2188 return set_msr_mce(vcpu, msr, data);
2189
2190 case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
2191 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
2192 pr = true;
2193 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
2194 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
2195 if (kvm_pmu_is_valid_msr(vcpu, msr))
2196 return kvm_pmu_set_msr(vcpu, msr_info);
2197
2198 if (pr || data != 0)
2199 vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
2200 "0x%x data 0x%llx\n", msr, data);
2201 break;
2202 case MSR_K7_CLK_CTL:
2203
2204
2205
2206
2207
2208
2209
2210
2211 break;
2212 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
2213 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
2214 case HV_X64_MSR_CRASH_CTL:
2215 case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
2216 return kvm_hv_set_msr_common(vcpu, msr, data,
2217 msr_info->host_initiated);
2218 case MSR_IA32_BBL_CR_CTL3:
2219
2220
2221
2222 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data);
2223 break;
2224 case MSR_AMD64_OSVW_ID_LENGTH:
2225 if (!guest_cpuid_has_osvw(vcpu))
2226 return 1;
2227 vcpu->arch.osvw.length = data;
2228 break;
2229 case MSR_AMD64_OSVW_STATUS:
2230 if (!guest_cpuid_has_osvw(vcpu))
2231 return 1;
2232 vcpu->arch.osvw.status = data;
2233 break;
2234 default:
2235 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
2236 return xen_hvm_config(vcpu, data);
2237 if (kvm_pmu_is_valid_msr(vcpu, msr))
2238 return kvm_pmu_set_msr(vcpu, msr_info);
2239 if (!ignore_msrs) {
2240 vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
2241 msr, data);
2242 return 1;
2243 } else {
2244 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n",
2245 msr, data);
2246 break;
2247 }
2248 }
2249 return 0;
2250}
2251EXPORT_SYMBOL_GPL(kvm_set_msr_common);
2252
2253
2254
2255
2256
2257
2258
2259int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
2260{
2261 return kvm_x86_ops->get_msr(vcpu, msr);
2262}
2263EXPORT_SYMBOL_GPL(kvm_get_msr);
2264
2265static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2266{
2267 u64 data;
2268 u64 mcg_cap = vcpu->arch.mcg_cap;
2269 unsigned bank_num = mcg_cap & 0xff;
2270
2271 switch (msr) {
2272 case MSR_IA32_P5_MC_ADDR:
2273 case MSR_IA32_P5_MC_TYPE:
2274 data = 0;
2275 break;
2276 case MSR_IA32_MCG_CAP:
2277 data = vcpu->arch.mcg_cap;
2278 break;
2279 case MSR_IA32_MCG_CTL:
2280 if (!(mcg_cap & MCG_CTL_P))
2281 return 1;
2282 data = vcpu->arch.mcg_ctl;
2283 break;
2284 case MSR_IA32_MCG_STATUS:
2285 data = vcpu->arch.mcg_status;
2286 break;
2287 default:
2288 if (msr >= MSR_IA32_MC0_CTL &&
2289 msr < MSR_IA32_MCx_CTL(bank_num)) {
2290 u32 offset = msr - MSR_IA32_MC0_CTL;
2291 data = vcpu->arch.mce_banks[offset];
2292 break;
2293 }
2294 return 1;
2295 }
2296 *pdata = data;
2297 return 0;
2298}
2299
2300int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2301{
2302 switch (msr_info->index) {
2303 case MSR_IA32_PLATFORM_ID:
2304 case MSR_IA32_EBL_CR_POWERON:
2305 case MSR_IA32_DEBUGCTLMSR:
2306 case MSR_IA32_LASTBRANCHFROMIP:
2307 case MSR_IA32_LASTBRANCHTOIP:
2308 case MSR_IA32_LASTINTFROMIP:
2309 case MSR_IA32_LASTINTTOIP:
2310 case MSR_K8_SYSCFG:
2311 case MSR_K8_TSEG_ADDR:
2312 case MSR_K8_TSEG_MASK:
2313 case MSR_K7_HWCR:
2314 case MSR_VM_HSAVE_PA:
2315 case MSR_K8_INT_PENDING_MSG:
2316 case MSR_AMD64_NB_CFG:
2317 case MSR_FAM10H_MMIO_CONF_BASE:
2318 case MSR_AMD64_BU_CFG2:
2319 case MSR_IA32_PERF_CTL:
2320 msr_info->data = 0;
2321 break;
2322 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
2323 case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
2324 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
2325 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
2326 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
2327 return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
2328 msr_info->data = 0;
2329 break;
2330 case MSR_IA32_UCODE_REV:
2331 msr_info->data = 0x100000000ULL;
2332 break;
2333 case MSR_MTRRcap:
2334 case 0x200 ... 0x2ff:
2335 return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
2336 case 0xcd:
2337 msr_info->data = 3;
2338 break;
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350 case MSR_EBC_FREQUENCY_ID:
2351 msr_info->data = 1 << 24;
2352 break;
2353 case MSR_IA32_APICBASE:
2354 msr_info->data = kvm_get_apic_base(vcpu);
2355 break;
2356 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
2357 return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data);
2358 break;
2359 case MSR_IA32_TSCDEADLINE:
2360 msr_info->data = kvm_get_lapic_tscdeadline_msr(vcpu);
2361 break;
2362 case MSR_IA32_TSC_ADJUST:
2363 msr_info->data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
2364 break;
2365 case MSR_IA32_MISC_ENABLE:
2366 msr_info->data = vcpu->arch.ia32_misc_enable_msr;
2367 break;
2368 case MSR_IA32_SMBASE:
2369 if (!msr_info->host_initiated)
2370 return 1;
2371 msr_info->data = vcpu->arch.smbase;
2372 break;
2373 case MSR_IA32_PERF_STATUS:
2374
2375 msr_info->data = 1000ULL;
2376
2377 msr_info->data |= (((uint64_t)4ULL) << 40);
2378 break;
2379 case MSR_EFER:
2380 msr_info->data = vcpu->arch.efer;
2381 break;
2382 case MSR_KVM_WALL_CLOCK:
2383 case MSR_KVM_WALL_CLOCK_NEW:
2384 msr_info->data = vcpu->kvm->arch.wall_clock;
2385 break;
2386 case MSR_KVM_SYSTEM_TIME:
2387 case MSR_KVM_SYSTEM_TIME_NEW:
2388 msr_info->data = vcpu->arch.time;
2389 break;
2390 case MSR_KVM_ASYNC_PF_EN:
2391 msr_info->data = vcpu->arch.apf.msr_val;
2392 break;
2393 case MSR_KVM_STEAL_TIME:
2394 msr_info->data = vcpu->arch.st.msr_val;
2395 break;
2396 case MSR_KVM_PV_EOI_EN:
2397 msr_info->data = vcpu->arch.pv_eoi.msr_val;
2398 break;
2399 case MSR_IA32_P5_MC_ADDR:
2400 case MSR_IA32_P5_MC_TYPE:
2401 case MSR_IA32_MCG_CAP:
2402 case MSR_IA32_MCG_CTL:
2403 case MSR_IA32_MCG_STATUS:
2404 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
2405 return get_msr_mce(vcpu, msr_info->index, &msr_info->data);
2406 case MSR_K7_CLK_CTL:
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416 msr_info->data = 0x20000000;
2417 break;
2418 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
2419 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
2420 case HV_X64_MSR_CRASH_CTL:
2421 case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
2422 return kvm_hv_get_msr_common(vcpu,
2423 msr_info->index, &msr_info->data);
2424 break;
2425 case MSR_IA32_BBL_CR_CTL3:
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436 msr_info->data = 0xbe702111;
2437 break;
2438 case MSR_AMD64_OSVW_ID_LENGTH:
2439 if (!guest_cpuid_has_osvw(vcpu))
2440 return 1;
2441 msr_info->data = vcpu->arch.osvw.length;
2442 break;
2443 case MSR_AMD64_OSVW_STATUS:
2444 if (!guest_cpuid_has_osvw(vcpu))
2445 return 1;
2446 msr_info->data = vcpu->arch.osvw.status;
2447 break;
2448 default:
2449 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
2450 return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
2451 if (!ignore_msrs) {
2452 vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr_info->index);
2453 return 1;
2454 } else {
2455 vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr_info->index);
2456 msr_info->data = 0;
2457 }
2458 break;
2459 }
2460 return 0;
2461}
2462EXPORT_SYMBOL_GPL(kvm_get_msr_common);
2463
2464
2465
2466
2467
2468
2469static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
2470 struct kvm_msr_entry *entries,
2471 int (*do_msr)(struct kvm_vcpu *vcpu,
2472 unsigned index, u64 *data))
2473{
2474 int i, idx;
2475
2476 idx = srcu_read_lock(&vcpu->kvm->srcu);
2477 for (i = 0; i < msrs->nmsrs; ++i)
2478 if (do_msr(vcpu, entries[i].index, &entries[i].data))
2479 break;
2480 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2481
2482 return i;
2483}
2484
2485
2486
2487
2488
2489
2490static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
2491 int (*do_msr)(struct kvm_vcpu *vcpu,
2492 unsigned index, u64 *data),
2493 int writeback)
2494{
2495 struct kvm_msrs msrs;
2496 struct kvm_msr_entry *entries;
2497 int r, n;
2498 unsigned size;
2499
2500 r = -EFAULT;
2501 if (copy_from_user(&msrs, user_msrs, sizeof msrs))
2502 goto out;
2503
2504 r = -E2BIG;
2505 if (msrs.nmsrs >= MAX_IO_MSRS)
2506 goto out;
2507
2508 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
2509 entries = memdup_user(user_msrs->entries, size);
2510 if (IS_ERR(entries)) {
2511 r = PTR_ERR(entries);
2512 goto out;
2513 }
2514
2515 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
2516 if (r < 0)
2517 goto out_free;
2518
2519 r = -EFAULT;
2520 if (writeback && copy_to_user(user_msrs->entries, entries, size))
2521 goto out_free;
2522
2523 r = n;
2524
2525out_free:
2526 kfree(entries);
2527out:
2528 return r;
2529}
2530
2531int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
2532{
2533 int r;
2534
2535 switch (ext) {
2536 case KVM_CAP_IRQCHIP:
2537 case KVM_CAP_HLT:
2538 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
2539 case KVM_CAP_SET_TSS_ADDR:
2540 case KVM_CAP_EXT_CPUID:
2541 case KVM_CAP_EXT_EMUL_CPUID:
2542 case KVM_CAP_CLOCKSOURCE:
2543 case KVM_CAP_PIT:
2544 case KVM_CAP_NOP_IO_DELAY:
2545 case KVM_CAP_MP_STATE:
2546 case KVM_CAP_SYNC_MMU:
2547 case KVM_CAP_USER_NMI:
2548 case KVM_CAP_REINJECT_CONTROL:
2549 case KVM_CAP_IRQ_INJECT_STATUS:
2550 case KVM_CAP_IOEVENTFD:
2551 case KVM_CAP_IOEVENTFD_NO_LENGTH:
2552 case KVM_CAP_PIT2:
2553 case KVM_CAP_PIT_STATE2:
2554 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
2555 case KVM_CAP_XEN_HVM:
2556 case KVM_CAP_ADJUST_CLOCK:
2557 case KVM_CAP_VCPU_EVENTS:
2558 case KVM_CAP_HYPERV:
2559 case KVM_CAP_HYPERV_VAPIC:
2560 case KVM_CAP_HYPERV_SPIN:
2561 case KVM_CAP_HYPERV_SYNIC:
2562 case KVM_CAP_PCI_SEGMENT:
2563 case KVM_CAP_DEBUGREGS:
2564 case KVM_CAP_X86_ROBUST_SINGLESTEP:
2565 case KVM_CAP_XSAVE:
2566 case KVM_CAP_ASYNC_PF:
2567 case KVM_CAP_GET_TSC_KHZ:
2568 case KVM_CAP_KVMCLOCK_CTRL:
2569 case KVM_CAP_READONLY_MEM:
2570 case KVM_CAP_HYPERV_TIME:
2571 case KVM_CAP_IOAPIC_POLARITY_IGNORED:
2572 case KVM_CAP_TSC_DEADLINE_TIMER:
2573 case KVM_CAP_ENABLE_CAP_VM:
2574 case KVM_CAP_DISABLE_QUIRKS:
2575 case KVM_CAP_SET_BOOT_CPU_ID:
2576 case KVM_CAP_SPLIT_IRQCHIP:
2577#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
2578 case KVM_CAP_ASSIGN_DEV_IRQ:
2579 case KVM_CAP_PCI_2_3:
2580#endif
2581 r = 1;
2582 break;
2583 case KVM_CAP_X86_SMM:
2584
2585
2586
2587
2588
2589
2590
2591
2592 r = kvm_x86_ops->cpu_has_high_real_mode_segbase();
2593 break;
2594 case KVM_CAP_COALESCED_MMIO:
2595 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
2596 break;
2597 case KVM_CAP_VAPIC:
2598 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
2599 break;
2600 case KVM_CAP_NR_VCPUS:
2601 r = KVM_SOFT_MAX_VCPUS;
2602 break;
2603 case KVM_CAP_MAX_VCPUS:
2604 r = KVM_MAX_VCPUS;
2605 break;
2606 case KVM_CAP_NR_MEMSLOTS:
2607 r = KVM_USER_MEM_SLOTS;
2608 break;
2609 case KVM_CAP_PV_MMU:
2610 r = 0;
2611 break;
2612#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
2613 case KVM_CAP_IOMMU:
2614 r = iommu_present(&pci_bus_type);
2615 break;
2616#endif
2617 case KVM_CAP_MCE:
2618 r = KVM_MAX_MCE_BANKS;
2619 break;
2620 case KVM_CAP_XCRS:
2621 r = boot_cpu_has(X86_FEATURE_XSAVE);
2622 break;
2623 case KVM_CAP_TSC_CONTROL:
2624 r = kvm_has_tsc_control;
2625 break;
2626 case KVM_CAP_X2APIC_API:
2627 r = KVM_X2APIC_API_VALID_FLAGS;
2628 break;
2629 default:
2630 r = 0;
2631 break;
2632 }
2633 return r;
2634
2635}
2636
2637long kvm_arch_dev_ioctl(struct file *filp,
2638 unsigned int ioctl, unsigned long arg)
2639{
2640 void __user *argp = (void __user *)arg;
2641 long r;
2642
2643 switch (ioctl) {
2644 case KVM_GET_MSR_INDEX_LIST: {
2645 struct kvm_msr_list __user *user_msr_list = argp;
2646 struct kvm_msr_list msr_list;
2647 unsigned n;
2648
2649 r = -EFAULT;
2650 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
2651 goto out;
2652 n = msr_list.nmsrs;
2653 msr_list.nmsrs = num_msrs_to_save + num_emulated_msrs;
2654 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
2655 goto out;
2656 r = -E2BIG;
2657 if (n < msr_list.nmsrs)
2658 goto out;
2659 r = -EFAULT;
2660 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
2661 num_msrs_to_save * sizeof(u32)))
2662 goto out;
2663 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
2664 &emulated_msrs,
2665 num_emulated_msrs * sizeof(u32)))
2666 goto out;
2667 r = 0;
2668 break;
2669 }
2670 case KVM_GET_SUPPORTED_CPUID:
2671 case KVM_GET_EMULATED_CPUID: {
2672 struct kvm_cpuid2 __user *cpuid_arg = argp;
2673 struct kvm_cpuid2 cpuid;
2674
2675 r = -EFAULT;
2676 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2677 goto out;
2678
2679 r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
2680 ioctl);
2681 if (r)
2682 goto out;
2683
2684 r = -EFAULT;
2685 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
2686 goto out;
2687 r = 0;
2688 break;
2689 }
2690 case KVM_X86_GET_MCE_CAP_SUPPORTED: {
2691 r = -EFAULT;
2692 if (copy_to_user(argp, &kvm_mce_cap_supported,
2693 sizeof(kvm_mce_cap_supported)))
2694 goto out;
2695 r = 0;
2696 break;
2697 }
2698 default:
2699 r = -EINVAL;
2700 }
2701out:
2702 return r;
2703}
2704
2705static void wbinvd_ipi(void *garbage)
2706{
2707 wbinvd();
2708}
2709
2710static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
2711{
2712 return kvm_arch_has_noncoherent_dma(vcpu->kvm);
2713}
2714
2715static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
2716{
2717 set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
2718}
2719
2720void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2721{
2722
2723 if (need_emulate_wbinvd(vcpu)) {
2724 if (kvm_x86_ops->has_wbinvd_exit())
2725 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
2726 else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
2727 smp_call_function_single(vcpu->cpu,
2728 wbinvd_ipi, NULL, 1);
2729 }
2730
2731 kvm_x86_ops->vcpu_load(vcpu, cpu);
2732
2733
2734 if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
2735 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
2736 vcpu->arch.tsc_offset_adjustment = 0;
2737 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2738 }
2739
2740 if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
2741 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
2742 rdtsc() - vcpu->arch.last_host_tsc;
2743 if (tsc_delta < 0)
2744 mark_tsc_unstable("KVM discovered backwards TSC");
2745
2746 if (check_tsc_unstable()) {
2747 u64 offset = kvm_compute_tsc_offset(vcpu,
2748 vcpu->arch.last_guest_tsc);
2749 kvm_x86_ops->write_tsc_offset(vcpu, offset);
2750 vcpu->arch.tsc_catchup = 1;
2751 }
2752 if (kvm_lapic_hv_timer_in_use(vcpu) &&
2753 kvm_x86_ops->set_hv_timer(vcpu,
2754 kvm_get_lapic_tscdeadline_msr(vcpu)))
2755 kvm_lapic_switch_to_sw_timer(vcpu);
2756
2757
2758
2759
2760 if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
2761 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
2762 if (vcpu->cpu != cpu)
2763 kvm_migrate_timers(vcpu);
2764 vcpu->cpu = cpu;
2765 }
2766
2767 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
2768}
2769
2770void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2771{
2772 kvm_x86_ops->vcpu_put(vcpu);
2773 kvm_put_guest_fpu(vcpu);
2774 vcpu->arch.last_host_tsc = rdtsc();
2775}
2776
2777static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
2778 struct kvm_lapic_state *s)
2779{
2780 if (vcpu->arch.apicv_active)
2781 kvm_x86_ops->sync_pir_to_irr(vcpu);
2782
2783 return kvm_apic_get_state(vcpu, s);
2784}
2785
2786static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
2787 struct kvm_lapic_state *s)
2788{
2789 int r;
2790
2791 r = kvm_apic_set_state(vcpu, s);
2792 if (r)
2793 return r;
2794 update_cr8_intercept(vcpu);
2795
2796 return 0;
2797}
2798
2799static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
2800{
2801 return (!lapic_in_kernel(vcpu) ||
2802 kvm_apic_accept_pic_intr(vcpu));
2803}
2804
2805
2806
2807
2808
2809
2810
2811static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
2812{
2813 return kvm_arch_interrupt_allowed(vcpu) &&
2814 !kvm_cpu_has_interrupt(vcpu) &&
2815 !kvm_event_needs_reinjection(vcpu) &&
2816 kvm_cpu_accept_dm_intr(vcpu);
2817}
2818
2819static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
2820 struct kvm_interrupt *irq)
2821{
2822 if (irq->irq >= KVM_NR_INTERRUPTS)
2823 return -EINVAL;
2824
2825 if (!irqchip_in_kernel(vcpu->kvm)) {
2826 kvm_queue_interrupt(vcpu, irq->irq, false);
2827 kvm_make_request(KVM_REQ_EVENT, vcpu);
2828 return 0;
2829 }
2830
2831
2832
2833
2834
2835 if (pic_in_kernel(vcpu->kvm))
2836 return -ENXIO;
2837
2838 if (vcpu->arch.pending_external_vector != -1)
2839 return -EEXIST;
2840
2841 vcpu->arch.pending_external_vector = irq->irq;
2842 kvm_make_request(KVM_REQ_EVENT, vcpu);
2843 return 0;
2844}
2845
2846static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
2847{
2848 kvm_inject_nmi(vcpu);
2849
2850 return 0;
2851}
2852
2853static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu)
2854{
2855 kvm_make_request(KVM_REQ_SMI, vcpu);
2856
2857 return 0;
2858}
2859
2860static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
2861 struct kvm_tpr_access_ctl *tac)
2862{
2863 if (tac->flags)
2864 return -EINVAL;
2865 vcpu->arch.tpr_access_reporting = !!tac->enabled;
2866 return 0;
2867}
2868
2869static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
2870 u64 mcg_cap)
2871{
2872 int r;
2873 unsigned bank_num = mcg_cap & 0xff, bank;
2874
2875 r = -EINVAL;
2876 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
2877 goto out;
2878 if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000))
2879 goto out;
2880 r = 0;
2881 vcpu->arch.mcg_cap = mcg_cap;
2882
2883 if (mcg_cap & MCG_CTL_P)
2884 vcpu->arch.mcg_ctl = ~(u64)0;
2885
2886 for (bank = 0; bank < bank_num; bank++)
2887 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
2888
2889 if (kvm_x86_ops->setup_mce)
2890 kvm_x86_ops->setup_mce(vcpu);
2891out:
2892 return r;
2893}
2894
2895static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
2896 struct kvm_x86_mce *mce)
2897{
2898 u64 mcg_cap = vcpu->arch.mcg_cap;
2899 unsigned bank_num = mcg_cap & 0xff;
2900 u64 *banks = vcpu->arch.mce_banks;
2901
2902 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
2903 return -EINVAL;
2904
2905
2906
2907
2908 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
2909 vcpu->arch.mcg_ctl != ~(u64)0)
2910 return 0;
2911 banks += 4 * mce->bank;
2912
2913
2914
2915
2916 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
2917 return 0;
2918 if (mce->status & MCI_STATUS_UC) {
2919 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
2920 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
2921 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
2922 return 0;
2923 }
2924 if (banks[1] & MCI_STATUS_VAL)
2925 mce->status |= MCI_STATUS_OVER;
2926 banks[2] = mce->addr;
2927 banks[3] = mce->misc;
2928 vcpu->arch.mcg_status = mce->mcg_status;
2929 banks[1] = mce->status;
2930 kvm_queue_exception(vcpu, MC_VECTOR);
2931 } else if (!(banks[1] & MCI_STATUS_VAL)
2932 || !(banks[1] & MCI_STATUS_UC)) {
2933 if (banks[1] & MCI_STATUS_VAL)
2934 mce->status |= MCI_STATUS_OVER;
2935 banks[2] = mce->addr;
2936 banks[3] = mce->misc;
2937 banks[1] = mce->status;
2938 } else
2939 banks[1] |= MCI_STATUS_OVER;
2940 return 0;
2941}
2942
2943static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
2944 struct kvm_vcpu_events *events)
2945{
2946 process_nmi(vcpu);
2947 events->exception.injected =
2948 vcpu->arch.exception.pending &&
2949 !kvm_exception_is_soft(vcpu->arch.exception.nr);
2950 events->exception.nr = vcpu->arch.exception.nr;
2951 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
2952 events->exception.pad = 0;
2953 events->exception.error_code = vcpu->arch.exception.error_code;
2954
2955 events->interrupt.injected =
2956 vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft;
2957 events->interrupt.nr = vcpu->arch.interrupt.nr;
2958 events->interrupt.soft = 0;
2959 events->interrupt.shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
2960
2961 events->nmi.injected = vcpu->arch.nmi_injected;
2962 events->nmi.pending = vcpu->arch.nmi_pending != 0;
2963 events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
2964 events->nmi.pad = 0;
2965
2966 events->sipi_vector = 0;
2967
2968 events->smi.smm = is_smm(vcpu);
2969 events->smi.pending = vcpu->arch.smi_pending;
2970 events->smi.smm_inside_nmi =
2971 !!(vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK);
2972 events->smi.latched_init = kvm_lapic_latched_init(vcpu);
2973
2974 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
2975 | KVM_VCPUEVENT_VALID_SHADOW
2976 | KVM_VCPUEVENT_VALID_SMM);
2977 memset(&events->reserved, 0, sizeof(events->reserved));
2978}
2979
2980static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
2981 struct kvm_vcpu_events *events)
2982{
2983 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
2984 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
2985 | KVM_VCPUEVENT_VALID_SHADOW
2986 | KVM_VCPUEVENT_VALID_SMM))
2987 return -EINVAL;
2988
2989 if (events->exception.injected &&
2990 (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR))
2991 return -EINVAL;
2992
2993 process_nmi(vcpu);
2994 vcpu->arch.exception.pending = events->exception.injected;
2995 vcpu->arch.exception.nr = events->exception.nr;
2996 vcpu->arch.exception.has_error_code = events->exception.has_error_code;
2997 vcpu->arch.exception.error_code = events->exception.error_code;
2998
2999 vcpu->arch.interrupt.pending = events->interrupt.injected;
3000 vcpu->arch.interrupt.nr = events->interrupt.nr;
3001 vcpu->arch.interrupt.soft = events->interrupt.soft;
3002 if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
3003 kvm_x86_ops->set_interrupt_shadow(vcpu,
3004 events->interrupt.shadow);
3005
3006 vcpu->arch.nmi_injected = events->nmi.injected;
3007 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
3008 vcpu->arch.nmi_pending = events->nmi.pending;
3009 kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
3010
3011 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
3012 lapic_in_kernel(vcpu))
3013 vcpu->arch.apic->sipi_vector = events->sipi_vector;
3014
3015 if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
3016 if (events->smi.smm)
3017 vcpu->arch.hflags |= HF_SMM_MASK;
3018 else
3019 vcpu->arch.hflags &= ~HF_SMM_MASK;
3020 vcpu->arch.smi_pending = events->smi.pending;
3021 if (events->smi.smm_inside_nmi)
3022 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
3023 else
3024 vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
3025 if (lapic_in_kernel(vcpu)) {
3026 if (events->smi.latched_init)
3027 set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
3028 else
3029 clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
3030 }
3031 }
3032
3033 kvm_make_request(KVM_REQ_EVENT, vcpu);
3034
3035 return 0;
3036}
3037
3038static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
3039 struct kvm_debugregs *dbgregs)
3040{
3041 unsigned long val;
3042
3043 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
3044 kvm_get_dr(vcpu, 6, &val);
3045 dbgregs->dr6 = val;
3046 dbgregs->dr7 = vcpu->arch.dr7;
3047 dbgregs->flags = 0;
3048 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
3049}
3050
3051static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
3052 struct kvm_debugregs *dbgregs)
3053{
3054 if (dbgregs->flags)
3055 return -EINVAL;
3056
3057 if (dbgregs->dr6 & ~0xffffffffull)
3058 return -EINVAL;
3059 if (dbgregs->dr7 & ~0xffffffffull)
3060 return -EINVAL;
3061
3062 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
3063 kvm_update_dr0123(vcpu);
3064 vcpu->arch.dr6 = dbgregs->dr6;
3065 kvm_update_dr6(vcpu);
3066 vcpu->arch.dr7 = dbgregs->dr7;
3067 kvm_update_dr7(vcpu);
3068
3069 return 0;
3070}
3071
3072#define XSTATE_COMPACTION_ENABLED (1ULL << 63)
3073
3074static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
3075{
3076 struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave;
3077 u64 xstate_bv = xsave->header.xfeatures;
3078 u64 valid;
3079
3080
3081
3082
3083
3084 memcpy(dest, xsave, XSAVE_HDR_OFFSET);
3085
3086
3087 *(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv;
3088
3089
3090
3091
3092
3093 valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
3094 while (valid) {
3095 u64 feature = valid & -valid;
3096 int index = fls64(feature) - 1;
3097 void *src = get_xsave_addr(xsave, feature);
3098
3099 if (src) {
3100 u32 size, offset, ecx, edx;
3101 cpuid_count(XSTATE_CPUID, index,
3102 &size, &offset, &ecx, &edx);
3103 memcpy(dest + offset, src, size);
3104 }
3105
3106 valid -= feature;
3107 }
3108}
3109
3110static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
3111{
3112 struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave;
3113 u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
3114 u64 valid;
3115
3116
3117
3118
3119
3120 memcpy(xsave, src, XSAVE_HDR_OFFSET);
3121
3122
3123 xsave->header.xfeatures = xstate_bv;
3124 if (boot_cpu_has(X86_FEATURE_XSAVES))
3125 xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
3126
3127
3128
3129
3130
3131 valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
3132 while (valid) {
3133 u64 feature = valid & -valid;
3134 int index = fls64(feature) - 1;
3135 void *dest = get_xsave_addr(xsave, feature);
3136
3137 if (dest) {
3138 u32 size, offset, ecx, edx;
3139 cpuid_count(XSTATE_CPUID, index,
3140 &size, &offset, &ecx, &edx);
3141 memcpy(dest, src + offset, size);
3142 }
3143
3144 valid -= feature;
3145 }
3146}
3147
3148static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
3149 struct kvm_xsave *guest_xsave)
3150{
3151 if (boot_cpu_has(X86_FEATURE_XSAVE)) {
3152 memset(guest_xsave, 0, sizeof(struct kvm_xsave));
3153 fill_xsave((u8 *) guest_xsave->region, vcpu);
3154 } else {
3155 memcpy(guest_xsave->region,
3156 &vcpu->arch.guest_fpu.state.fxsave,
3157 sizeof(struct fxregs_state));
3158 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
3159 XFEATURE_MASK_FPSSE;
3160 }
3161}
3162
3163static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
3164 struct kvm_xsave *guest_xsave)
3165{
3166 u64 xstate_bv =
3167 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
3168
3169 if (boot_cpu_has(X86_FEATURE_XSAVE)) {
3170
3171
3172
3173
3174
3175 if (xstate_bv & ~kvm_supported_xcr0())
3176 return -EINVAL;
3177 load_xsave(vcpu, (u8 *)guest_xsave->region);
3178 } else {
3179 if (xstate_bv & ~XFEATURE_MASK_FPSSE)
3180 return -EINVAL;
3181 memcpy(&vcpu->arch.guest_fpu.state.fxsave,
3182 guest_xsave->region, sizeof(struct fxregs_state));
3183 }
3184 return 0;
3185}
3186
3187static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
3188 struct kvm_xcrs *guest_xcrs)
3189{
3190 if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
3191 guest_xcrs->nr_xcrs = 0;
3192 return;
3193 }
3194
3195 guest_xcrs->nr_xcrs = 1;
3196 guest_xcrs->flags = 0;
3197 guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
3198 guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
3199}
3200
3201static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
3202 struct kvm_xcrs *guest_xcrs)
3203{
3204 int i, r = 0;
3205
3206 if (!boot_cpu_has(X86_FEATURE_XSAVE))
3207 return -EINVAL;
3208
3209 if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
3210 return -EINVAL;
3211
3212 for (i = 0; i < guest_xcrs->nr_xcrs; i++)
3213
3214 if (guest_xcrs->xcrs[i].xcr == XCR_XFEATURE_ENABLED_MASK) {
3215 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
3216 guest_xcrs->xcrs[i].value);
3217 break;
3218 }
3219 if (r)
3220 r = -EINVAL;
3221 return r;
3222}
3223
3224
3225
3226
3227
3228
3229
3230static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
3231{
3232 if (!vcpu->arch.pv_time_enabled)
3233 return -EINVAL;
3234 vcpu->arch.pvclock_set_guest_stopped_request = true;
3235 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
3236 return 0;
3237}
3238
3239static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3240 struct kvm_enable_cap *cap)
3241{
3242 if (cap->flags)
3243 return -EINVAL;
3244
3245 switch (cap->cap) {
3246 case KVM_CAP_HYPERV_SYNIC:
3247 return kvm_hv_activate_synic(vcpu);
3248 default:
3249 return -EINVAL;
3250 }
3251}
3252
3253long kvm_arch_vcpu_ioctl(struct file *filp,
3254 unsigned int ioctl, unsigned long arg)
3255{
3256 struct kvm_vcpu *vcpu = filp->private_data;
3257 void __user *argp = (void __user *)arg;
3258 int r;
3259 union {
3260 struct kvm_lapic_state *lapic;
3261 struct kvm_xsave *xsave;
3262 struct kvm_xcrs *xcrs;
3263 void *buffer;
3264 } u;
3265
3266 u.buffer = NULL;
3267 switch (ioctl) {
3268 case KVM_GET_LAPIC: {
3269 r = -EINVAL;
3270 if (!lapic_in_kernel(vcpu))
3271 goto out;
3272 u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
3273
3274 r = -ENOMEM;
3275 if (!u.lapic)
3276 goto out;
3277 r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);
3278 if (r)
3279 goto out;
3280 r = -EFAULT;
3281 if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))
3282 goto out;
3283 r = 0;
3284 break;
3285 }
3286 case KVM_SET_LAPIC: {
3287 r = -EINVAL;
3288 if (!lapic_in_kernel(vcpu))
3289 goto out;
3290 u.lapic = memdup_user(argp, sizeof(*u.lapic));
3291 if (IS_ERR(u.lapic))
3292 return PTR_ERR(u.lapic);
3293
3294 r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
3295 break;
3296 }
3297 case KVM_INTERRUPT: {
3298 struct kvm_interrupt irq;
3299
3300 r = -EFAULT;
3301 if (copy_from_user(&irq, argp, sizeof irq))
3302 goto out;
3303 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
3304 break;
3305 }
3306 case KVM_NMI: {
3307 r = kvm_vcpu_ioctl_nmi(vcpu);
3308 break;
3309 }
3310 case KVM_SMI: {
3311 r = kvm_vcpu_ioctl_smi(vcpu);
3312 break;
3313 }
3314 case KVM_SET_CPUID: {
3315 struct kvm_cpuid __user *cpuid_arg = argp;
3316 struct kvm_cpuid cpuid;
3317
3318 r = -EFAULT;
3319 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3320 goto out;
3321 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
3322 break;
3323 }
3324 case KVM_SET_CPUID2: {
3325 struct kvm_cpuid2 __user *cpuid_arg = argp;
3326 struct kvm_cpuid2 cpuid;
3327
3328 r = -EFAULT;
3329 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3330 goto out;
3331 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
3332 cpuid_arg->entries);
3333 break;
3334 }
3335 case KVM_GET_CPUID2: {
3336 struct kvm_cpuid2 __user *cpuid_arg = argp;
3337 struct kvm_cpuid2 cpuid;
3338
3339 r = -EFAULT;
3340 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3341 goto out;
3342 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
3343 cpuid_arg->entries);
3344 if (r)
3345 goto out;
3346 r = -EFAULT;
3347 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
3348 goto out;
3349 r = 0;
3350 break;
3351 }
3352 case KVM_GET_MSRS:
3353 r = msr_io(vcpu, argp, do_get_msr, 1);
3354 break;
3355 case KVM_SET_MSRS:
3356 r = msr_io(vcpu, argp, do_set_msr, 0);
3357 break;
3358 case KVM_TPR_ACCESS_REPORTING: {
3359 struct kvm_tpr_access_ctl tac;
3360
3361 r = -EFAULT;
3362 if (copy_from_user(&tac, argp, sizeof tac))
3363 goto out;
3364 r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
3365 if (r)
3366 goto out;
3367 r = -EFAULT;
3368 if (copy_to_user(argp, &tac, sizeof tac))
3369 goto out;
3370 r = 0;
3371 break;
3372 };
3373 case KVM_SET_VAPIC_ADDR: {
3374 struct kvm_vapic_addr va;
3375
3376 r = -EINVAL;
3377 if (!lapic_in_kernel(vcpu))
3378 goto out;
3379 r = -EFAULT;
3380 if (copy_from_user(&va, argp, sizeof va))
3381 goto out;
3382 r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
3383 break;
3384 }
3385 case KVM_X86_SETUP_MCE: {
3386 u64 mcg_cap;
3387
3388 r = -EFAULT;
3389 if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
3390 goto out;
3391 r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
3392 break;
3393 }
3394 case KVM_X86_SET_MCE: {
3395 struct kvm_x86_mce mce;
3396
3397 r = -EFAULT;
3398 if (copy_from_user(&mce, argp, sizeof mce))
3399 goto out;
3400 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
3401 break;
3402 }
3403 case KVM_GET_VCPU_EVENTS: {
3404 struct kvm_vcpu_events events;
3405
3406 kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
3407
3408 r = -EFAULT;
3409 if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
3410 break;
3411 r = 0;
3412 break;
3413 }
3414 case KVM_SET_VCPU_EVENTS: {
3415 struct kvm_vcpu_events events;
3416
3417 r = -EFAULT;
3418 if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
3419 break;
3420
3421 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
3422 break;
3423 }
3424 case KVM_GET_DEBUGREGS: {
3425 struct kvm_debugregs dbgregs;
3426
3427 kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
3428
3429 r = -EFAULT;
3430 if (copy_to_user(argp, &dbgregs,
3431 sizeof(struct kvm_debugregs)))
3432 break;
3433 r = 0;
3434 break;
3435 }
3436 case KVM_SET_DEBUGREGS: {
3437 struct kvm_debugregs dbgregs;
3438
3439 r = -EFAULT;
3440 if (copy_from_user(&dbgregs, argp,
3441 sizeof(struct kvm_debugregs)))
3442 break;
3443
3444 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
3445 break;
3446 }
3447 case KVM_GET_XSAVE: {
3448 u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
3449 r = -ENOMEM;
3450 if (!u.xsave)
3451 break;
3452
3453 kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
3454
3455 r = -EFAULT;
3456 if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
3457 break;
3458 r = 0;
3459 break;
3460 }
3461 case KVM_SET_XSAVE: {
3462 u.xsave = memdup_user(argp, sizeof(*u.xsave));
3463 if (IS_ERR(u.xsave))
3464 return PTR_ERR(u.xsave);
3465
3466 r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
3467 break;
3468 }
3469 case KVM_GET_XCRS: {
3470 u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
3471 r = -ENOMEM;
3472 if (!u.xcrs)
3473 break;
3474
3475 kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
3476
3477 r = -EFAULT;
3478 if (copy_to_user(argp, u.xcrs,
3479 sizeof(struct kvm_xcrs)))
3480 break;
3481 r = 0;
3482 break;
3483 }
3484 case KVM_SET_XCRS: {
3485 u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
3486 if (IS_ERR(u.xcrs))
3487 return PTR_ERR(u.xcrs);
3488
3489 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
3490 break;
3491 }
3492 case KVM_SET_TSC_KHZ: {
3493 u32 user_tsc_khz;
3494
3495 r = -EINVAL;
3496 user_tsc_khz = (u32)arg;
3497
3498 if (user_tsc_khz >= kvm_max_guest_tsc_khz)
3499 goto out;
3500
3501 if (user_tsc_khz == 0)
3502 user_tsc_khz = tsc_khz;
3503
3504 if (!kvm_set_tsc_khz(vcpu, user_tsc_khz))
3505 r = 0;
3506
3507 goto out;
3508 }
3509 case KVM_GET_TSC_KHZ: {
3510 r = vcpu->arch.virtual_tsc_khz;
3511 goto out;
3512 }
3513 case KVM_KVMCLOCK_CTRL: {
3514 r = kvm_set_guest_paused(vcpu);
3515 goto out;
3516 }
3517 case KVM_ENABLE_CAP: {
3518 struct kvm_enable_cap cap;
3519
3520 r = -EFAULT;
3521 if (copy_from_user(&cap, argp, sizeof(cap)))
3522 goto out;
3523 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3524 break;
3525 }
3526 default:
3527 r = -EINVAL;
3528 }
3529out:
3530 kfree(u.buffer);
3531 return r;
3532}
3533
3534int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3535{
3536 return VM_FAULT_SIGBUS;
3537}
3538
3539static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
3540{
3541 int ret;
3542
3543 if (addr > (unsigned int)(-3 * PAGE_SIZE))
3544 return -EINVAL;
3545 ret = kvm_x86_ops->set_tss_addr(kvm, addr);
3546 return ret;
3547}
3548
3549static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
3550 u64 ident_addr)
3551{
3552 kvm->arch.ept_identity_map_addr = ident_addr;
3553 return 0;
3554}
3555
3556static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
3557 u32 kvm_nr_mmu_pages)
3558{
3559 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
3560 return -EINVAL;
3561
3562 mutex_lock(&kvm->slots_lock);
3563
3564 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
3565 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
3566
3567 mutex_unlock(&kvm->slots_lock);
3568 return 0;
3569}
3570
3571static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
3572{
3573 return kvm->arch.n_max_mmu_pages;
3574}
3575
3576static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
3577{
3578 int r;
3579
3580 r = 0;
3581 switch (chip->chip_id) {
3582 case KVM_IRQCHIP_PIC_MASTER:
3583 memcpy(&chip->chip.pic,
3584 &pic_irqchip(kvm)->pics[0],
3585 sizeof(struct kvm_pic_state));
3586 break;
3587 case KVM_IRQCHIP_PIC_SLAVE:
3588 memcpy(&chip->chip.pic,
3589 &pic_irqchip(kvm)->pics[1],
3590 sizeof(struct kvm_pic_state));
3591 break;
3592 case KVM_IRQCHIP_IOAPIC:
3593 r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
3594 break;
3595 default:
3596 r = -EINVAL;
3597 break;
3598 }
3599 return r;
3600}
3601
3602static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
3603{
3604 int r;
3605
3606 r = 0;
3607 switch (chip->chip_id) {
3608 case KVM_IRQCHIP_PIC_MASTER:
3609 spin_lock(&pic_irqchip(kvm)->lock);
3610 memcpy(&pic_irqchip(kvm)->pics[0],
3611 &chip->chip.pic,
3612 sizeof(struct kvm_pic_state));
3613 spin_unlock(&pic_irqchip(kvm)->lock);
3614 break;
3615 case KVM_IRQCHIP_PIC_SLAVE:
3616 spin_lock(&pic_irqchip(kvm)->lock);
3617 memcpy(&pic_irqchip(kvm)->pics[1],
3618 &chip->chip.pic,
3619 sizeof(struct kvm_pic_state));
3620 spin_unlock(&pic_irqchip(kvm)->lock);
3621 break;
3622 case KVM_IRQCHIP_IOAPIC:
3623 r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
3624 break;
3625 default:
3626 r = -EINVAL;
3627 break;
3628 }
3629 kvm_pic_update_irq(pic_irqchip(kvm));
3630 return r;
3631}
3632
3633static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
3634{
3635 struct kvm_kpit_state *kps = &kvm->arch.vpit->pit_state;
3636
3637 BUILD_BUG_ON(sizeof(*ps) != sizeof(kps->channels));
3638
3639 mutex_lock(&kps->lock);
3640 memcpy(ps, &kps->channels, sizeof(*ps));
3641 mutex_unlock(&kps->lock);
3642 return 0;
3643}
3644
3645static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
3646{
3647 int i;
3648 struct kvm_pit *pit = kvm->arch.vpit;
3649
3650 mutex_lock(&pit->pit_state.lock);
3651 memcpy(&pit->pit_state.channels, ps, sizeof(*ps));
3652 for (i = 0; i < 3; i++)
3653 kvm_pit_load_count(pit, i, ps->channels[i].count, 0);
3654 mutex_unlock(&pit->pit_state.lock);
3655 return 0;
3656}
3657
3658static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
3659{
3660 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3661 memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
3662 sizeof(ps->channels));
3663 ps->flags = kvm->arch.vpit->pit_state.flags;
3664 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3665 memset(&ps->reserved, 0, sizeof(ps->reserved));
3666 return 0;
3667}
3668
3669static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
3670{
3671 int start = 0;
3672 int i;
3673 u32 prev_legacy, cur_legacy;
3674 struct kvm_pit *pit = kvm->arch.vpit;
3675
3676 mutex_lock(&pit->pit_state.lock);
3677 prev_legacy = pit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
3678 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
3679 if (!prev_legacy && cur_legacy)
3680 start = 1;
3681 memcpy(&pit->pit_state.channels, &ps->channels,
3682 sizeof(pit->pit_state.channels));
3683 pit->pit_state.flags = ps->flags;
3684 for (i = 0; i < 3; i++)
3685 kvm_pit_load_count(pit, i, pit->pit_state.channels[i].count,
3686 start && i == 0);
3687 mutex_unlock(&pit->pit_state.lock);
3688 return 0;
3689}
3690
3691static int kvm_vm_ioctl_reinject(struct kvm *kvm,
3692 struct kvm_reinject_control *control)
3693{
3694 struct kvm_pit *pit = kvm->arch.vpit;
3695
3696 if (!pit)
3697 return -ENXIO;
3698
3699
3700
3701
3702
3703 mutex_lock(&pit->pit_state.lock);
3704 kvm_pit_set_reinject(pit, control->pit_reinject);
3705 mutex_unlock(&pit->pit_state.lock);
3706
3707 return 0;
3708}
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722
3723
3724
3725
3726
3727
3728
3729int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
3730{
3731 bool is_dirty = false;
3732 int r;
3733
3734 mutex_lock(&kvm->slots_lock);
3735
3736
3737
3738
3739 if (kvm_x86_ops->flush_log_dirty)
3740 kvm_x86_ops->flush_log_dirty(kvm);
3741
3742 r = kvm_get_dirty_log_protect(kvm, log, &is_dirty);
3743
3744
3745
3746
3747
3748 lockdep_assert_held(&kvm->slots_lock);
3749 if (is_dirty)
3750 kvm_flush_remote_tlbs(kvm);
3751
3752 mutex_unlock(&kvm->slots_lock);
3753 return r;
3754}
3755
3756int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
3757 bool line_status)
3758{
3759 if (!irqchip_in_kernel(kvm))
3760 return -ENXIO;
3761
3762 irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
3763 irq_event->irq, irq_event->level,
3764 line_status);
3765 return 0;
3766}
3767
3768static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
3769 struct kvm_enable_cap *cap)
3770{
3771 int r;
3772
3773 if (cap->flags)
3774 return -EINVAL;
3775
3776 switch (cap->cap) {
3777 case KVM_CAP_DISABLE_QUIRKS:
3778 kvm->arch.disabled_quirks = cap->args[0];
3779 r = 0;
3780 break;
3781 case KVM_CAP_SPLIT_IRQCHIP: {
3782 mutex_lock(&kvm->lock);
3783 r = -EINVAL;
3784 if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS)
3785 goto split_irqchip_unlock;
3786 r = -EEXIST;
3787 if (irqchip_in_kernel(kvm))
3788 goto split_irqchip_unlock;
3789 if (kvm->created_vcpus)
3790 goto split_irqchip_unlock;
3791 r = kvm_setup_empty_irq_routing(kvm);
3792 if (r)
3793 goto split_irqchip_unlock;
3794
3795 smp_wmb();
3796 kvm->arch.irqchip_split = true;
3797 kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
3798 r = 0;
3799split_irqchip_unlock:
3800 mutex_unlock(&kvm->lock);
3801 break;
3802 }
3803 case KVM_CAP_X2APIC_API:
3804 r = -EINVAL;
3805 if (cap->args[0] & ~KVM_X2APIC_API_VALID_FLAGS)
3806 break;
3807
3808 if (cap->args[0] & KVM_X2APIC_API_USE_32BIT_IDS)
3809 kvm->arch.x2apic_format = true;
3810 if (cap->args[0] & KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
3811 kvm->arch.x2apic_broadcast_quirk_disabled = true;
3812
3813 r = 0;
3814 break;
3815 default:
3816 r = -EINVAL;
3817 break;
3818 }
3819 return r;
3820}
3821
3822long kvm_arch_vm_ioctl(struct file *filp,
3823 unsigned int ioctl, unsigned long arg)
3824{
3825 struct kvm *kvm = filp->private_data;
3826 void __user *argp = (void __user *)arg;
3827 int r = -ENOTTY;
3828
3829
3830
3831
3832
3833 union {
3834 struct kvm_pit_state ps;
3835 struct kvm_pit_state2 ps2;
3836 struct kvm_pit_config pit_config;
3837 } u;
3838
3839 switch (ioctl) {
3840 case KVM_SET_TSS_ADDR:
3841 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
3842 break;
3843 case KVM_SET_IDENTITY_MAP_ADDR: {
3844 u64 ident_addr;
3845
3846 r = -EFAULT;
3847 if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
3848 goto out;
3849 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
3850 break;
3851 }
3852 case KVM_SET_NR_MMU_PAGES:
3853 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
3854 break;
3855 case KVM_GET_NR_MMU_PAGES:
3856 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
3857 break;
3858 case KVM_CREATE_IRQCHIP: {
3859 struct kvm_pic *vpic;
3860
3861 mutex_lock(&kvm->lock);
3862 r = -EEXIST;
3863 if (kvm->arch.vpic)
3864 goto create_irqchip_unlock;
3865 r = -EINVAL;
3866 if (kvm->created_vcpus)
3867 goto create_irqchip_unlock;
3868 r = -ENOMEM;
3869 vpic = kvm_create_pic(kvm);
3870 if (vpic) {
3871 r = kvm_ioapic_init(kvm);
3872 if (r) {
3873 mutex_lock(&kvm->slots_lock);
3874 kvm_destroy_pic(vpic);
3875 mutex_unlock(&kvm->slots_lock);
3876 goto create_irqchip_unlock;
3877 }
3878 } else
3879 goto create_irqchip_unlock;
3880 r = kvm_setup_default_irq_routing(kvm);
3881 if (r) {
3882 mutex_lock(&kvm->slots_lock);
3883 mutex_lock(&kvm->irq_lock);
3884 kvm_ioapic_destroy(kvm);
3885 kvm_destroy_pic(vpic);
3886 mutex_unlock(&kvm->irq_lock);
3887 mutex_unlock(&kvm->slots_lock);
3888 goto create_irqchip_unlock;
3889 }
3890
3891 smp_wmb();
3892 kvm->arch.vpic = vpic;
3893 create_irqchip_unlock:
3894 mutex_unlock(&kvm->lock);
3895 break;
3896 }
3897 case KVM_CREATE_PIT:
3898 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
3899 goto create_pit;
3900 case KVM_CREATE_PIT2:
3901 r = -EFAULT;
3902 if (copy_from_user(&u.pit_config, argp,
3903 sizeof(struct kvm_pit_config)))
3904 goto out;
3905 create_pit:
3906 mutex_lock(&kvm->lock);
3907 r = -EEXIST;
3908 if (kvm->arch.vpit)
3909 goto create_pit_unlock;
3910 r = -ENOMEM;
3911 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
3912 if (kvm->arch.vpit)
3913 r = 0;
3914 create_pit_unlock:
3915 mutex_unlock(&kvm->lock);
3916 break;
3917 case KVM_GET_IRQCHIP: {
3918
3919 struct kvm_irqchip *chip;
3920
3921 chip = memdup_user(argp, sizeof(*chip));
3922 if (IS_ERR(chip)) {
3923 r = PTR_ERR(chip);
3924 goto out;
3925 }
3926
3927 r = -ENXIO;
3928 if (!irqchip_in_kernel(kvm) || irqchip_split(kvm))
3929 goto get_irqchip_out;
3930 r = kvm_vm_ioctl_get_irqchip(kvm, chip);
3931 if (r)
3932 goto get_irqchip_out;
3933 r = -EFAULT;
3934 if (copy_to_user(argp, chip, sizeof *chip))
3935 goto get_irqchip_out;
3936 r = 0;
3937 get_irqchip_out:
3938 kfree(chip);
3939 break;
3940 }
3941 case KVM_SET_IRQCHIP: {
3942
3943 struct kvm_irqchip *chip;
3944
3945 chip = memdup_user(argp, sizeof(*chip));
3946 if (IS_ERR(chip)) {
3947 r = PTR_ERR(chip);
3948 goto out;
3949 }
3950
3951 r = -ENXIO;
3952 if (!irqchip_in_kernel(kvm) || irqchip_split(kvm))
3953 goto set_irqchip_out;
3954 r = kvm_vm_ioctl_set_irqchip(kvm, chip);
3955 if (r)
3956 goto set_irqchip_out;
3957 r = 0;
3958 set_irqchip_out:
3959 kfree(chip);
3960 break;
3961 }
3962 case KVM_GET_PIT: {
3963 r = -EFAULT;
3964 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
3965 goto out;
3966 r = -ENXIO;
3967 if (!kvm->arch.vpit)
3968 goto out;
3969 r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
3970 if (r)
3971 goto out;
3972 r = -EFAULT;
3973 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
3974 goto out;
3975 r = 0;
3976 break;
3977 }
3978 case KVM_SET_PIT: {
3979 r = -EFAULT;
3980 if (copy_from_user(&u.ps, argp, sizeof u.ps))
3981 goto out;
3982 r = -ENXIO;
3983 if (!kvm->arch.vpit)
3984 goto out;
3985 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
3986 break;
3987 }
3988 case KVM_GET_PIT2: {
3989 r = -ENXIO;
3990 if (!kvm->arch.vpit)
3991 goto out;
3992 r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
3993 if (r)
3994 goto out;
3995 r = -EFAULT;
3996 if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
3997 goto out;
3998 r = 0;
3999 break;
4000 }
4001 case KVM_SET_PIT2: {
4002 r = -EFAULT;
4003 if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
4004 goto out;
4005 r = -ENXIO;
4006 if (!kvm->arch.vpit)
4007 goto out;
4008 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
4009 break;
4010 }
4011 case KVM_REINJECT_CONTROL: {
4012 struct kvm_reinject_control control;
4013 r = -EFAULT;
4014 if (copy_from_user(&control, argp, sizeof(control)))
4015 goto out;
4016 r = kvm_vm_ioctl_reinject(kvm, &control);
4017 break;
4018 }
4019 case KVM_SET_BOOT_CPU_ID:
4020 r = 0;
4021 mutex_lock(&kvm->lock);
4022 if (kvm->created_vcpus)
4023 r = -EBUSY;
4024 else
4025 kvm->arch.bsp_vcpu_id = arg;
4026 mutex_unlock(&kvm->lock);
4027 break;
4028 case KVM_XEN_HVM_CONFIG: {
4029 r = -EFAULT;
4030 if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
4031 sizeof(struct kvm_xen_hvm_config)))
4032 goto out;
4033 r = -EINVAL;
4034 if (kvm->arch.xen_hvm_config.flags)
4035 goto out;
4036 r = 0;
4037 break;
4038 }
4039 case KVM_SET_CLOCK: {
4040 struct kvm_clock_data user_ns;
4041 u64 now_ns;
4042 s64 delta;
4043
4044 r = -EFAULT;
4045 if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
4046 goto out;
4047
4048 r = -EINVAL;
4049 if (user_ns.flags)
4050 goto out;
4051
4052 r = 0;
4053 local_irq_disable();
4054 now_ns = get_kernel_ns();
4055 delta = user_ns.clock - now_ns;
4056 local_irq_enable();
4057 kvm->arch.kvmclock_offset = delta;
4058 kvm_gen_update_masterclock(kvm);
4059 break;
4060 }
4061 case KVM_GET_CLOCK: {
4062 struct kvm_clock_data user_ns;
4063 u64 now_ns;
4064
4065 local_irq_disable();
4066 now_ns = get_kernel_ns();
4067 user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
4068 local_irq_enable();
4069 user_ns.flags = 0;
4070 memset(&user_ns.pad, 0, sizeof(user_ns.pad));
4071
4072 r = -EFAULT;
4073 if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
4074 goto out;
4075 r = 0;
4076 break;
4077 }
4078 case KVM_ENABLE_CAP: {
4079 struct kvm_enable_cap cap;
4080
4081 r = -EFAULT;
4082 if (copy_from_user(&cap, argp, sizeof(cap)))
4083 goto out;
4084 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
4085 break;
4086 }
4087 default:
4088 r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
4089 }
4090out:
4091 return r;
4092}
4093
4094static void kvm_init_msr_list(void)
4095{
4096 u32 dummy[2];
4097 unsigned i, j;
4098
4099 for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
4100 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
4101 continue;
4102
4103
4104
4105
4106
4107 switch (msrs_to_save[i]) {
4108 case MSR_IA32_BNDCFGS:
4109 if (!kvm_x86_ops->mpx_supported())
4110 continue;
4111 break;
4112 case MSR_TSC_AUX:
4113 if (!kvm_x86_ops->rdtscp_supported())
4114 continue;
4115 break;
4116 default:
4117 break;
4118 }
4119
4120 if (j < i)
4121 msrs_to_save[j] = msrs_to_save[i];
4122 j++;
4123 }
4124 num_msrs_to_save = j;
4125
4126 for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) {
4127 switch (emulated_msrs[i]) {
4128 case MSR_IA32_SMBASE:
4129 if (!kvm_x86_ops->cpu_has_high_real_mode_segbase())
4130 continue;
4131 break;
4132 default:
4133 break;
4134 }
4135
4136 if (j < i)
4137 emulated_msrs[j] = emulated_msrs[i];
4138 j++;
4139 }
4140 num_emulated_msrs = j;
4141}
4142
4143static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
4144 const void *v)
4145{
4146 int handled = 0;
4147 int n;
4148
4149 do {
4150 n = min(len, 8);
4151 if (!(lapic_in_kernel(vcpu) &&
4152 !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v))
4153 && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v))
4154 break;
4155 handled += n;
4156 addr += n;
4157 len -= n;
4158 v += n;
4159 } while (len);
4160
4161 return handled;
4162}
4163
4164static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
4165{
4166 int handled = 0;
4167 int n;
4168
4169 do {
4170 n = min(len, 8);
4171 if (!(lapic_in_kernel(vcpu) &&
4172 !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev,
4173 addr, n, v))
4174 && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
4175 break;
4176 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
4177 handled += n;
4178 addr += n;
4179 len -= n;
4180 v += n;
4181 } while (len);
4182
4183 return handled;
4184}
4185
4186static void kvm_set_segment(struct kvm_vcpu *vcpu,
4187 struct kvm_segment *var, int seg)
4188{
4189 kvm_x86_ops->set_segment(vcpu, var, seg);
4190}
4191
4192void kvm_get_segment(struct kvm_vcpu *vcpu,
4193 struct kvm_segment *var, int seg)
4194{
4195 kvm_x86_ops->get_segment(vcpu, var, seg);
4196}
4197
4198gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
4199 struct x86_exception *exception)
4200{
4201 gpa_t t_gpa;
4202
4203 BUG_ON(!mmu_is_nested(vcpu));
4204
4205
4206 access |= PFERR_USER_MASK;
4207 t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, exception);
4208
4209 return t_gpa;
4210}
4211
4212gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
4213 struct x86_exception *exception)
4214{
4215 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4216 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
4217}
4218
4219 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
4220 struct x86_exception *exception)
4221{
4222 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4223 access |= PFERR_FETCH_MASK;
4224 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
4225}
4226
4227gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
4228 struct x86_exception *exception)
4229{
4230 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4231 access |= PFERR_WRITE_MASK;
4232 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
4233}
4234
4235
4236gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
4237 struct x86_exception *exception)
4238{
4239 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
4240}
4241
4242static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
4243 struct kvm_vcpu *vcpu, u32 access,
4244 struct x86_exception *exception)
4245{
4246 void *data = val;
4247 int r = X86EMUL_CONTINUE;
4248
4249 while (bytes) {
4250 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access,
4251 exception);
4252 unsigned offset = addr & (PAGE_SIZE-1);
4253 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
4254 int ret;
4255
4256 if (gpa == UNMAPPED_GVA)
4257 return X86EMUL_PROPAGATE_FAULT;
4258 ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, data,
4259 offset, toread);
4260 if (ret < 0) {
4261 r = X86EMUL_IO_NEEDED;
4262 goto out;
4263 }
4264
4265 bytes -= toread;
4266 data += toread;
4267 addr += toread;
4268 }
4269out:
4270 return r;
4271}
4272
4273
4274static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
4275 gva_t addr, void *val, unsigned int bytes,
4276 struct x86_exception *exception)
4277{
4278 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4279 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4280 unsigned offset;
4281 int ret;
4282
4283
4284 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access|PFERR_FETCH_MASK,
4285 exception);
4286 if (unlikely(gpa == UNMAPPED_GVA))
4287 return X86EMUL_PROPAGATE_FAULT;
4288
4289 offset = addr & (PAGE_SIZE-1);
4290 if (WARN_ON(offset + bytes > PAGE_SIZE))
4291 bytes = (unsigned)PAGE_SIZE - offset;
4292 ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, val,
4293 offset, bytes);
4294 if (unlikely(ret < 0))
4295 return X86EMUL_IO_NEEDED;
4296
4297 return X86EMUL_CONTINUE;
4298}
4299
4300int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
4301 gva_t addr, void *val, unsigned int bytes,
4302 struct x86_exception *exception)
4303{
4304 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4305 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4306
4307 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
4308 exception);
4309}
4310EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
4311
4312static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
4313 gva_t addr, void *val, unsigned int bytes,
4314 struct x86_exception *exception)
4315{
4316 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4317 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
4318}
4319
4320static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
4321 unsigned long addr, void *val, unsigned int bytes)
4322{
4323 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4324 int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes);
4325
4326 return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
4327}
4328
4329int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
4330 gva_t addr, void *val,
4331 unsigned int bytes,
4332 struct x86_exception *exception)
4333{
4334 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4335 void *data = val;
4336 int r = X86EMUL_CONTINUE;
4337
4338 while (bytes) {
4339 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
4340 PFERR_WRITE_MASK,
4341 exception);
4342 unsigned offset = addr & (PAGE_SIZE-1);
4343 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
4344 int ret;
4345
4346 if (gpa == UNMAPPED_GVA)
4347 return X86EMUL_PROPAGATE_FAULT;
4348 ret = kvm_vcpu_write_guest(vcpu, gpa, data, towrite);
4349 if (ret < 0) {
4350 r = X86EMUL_IO_NEEDED;
4351 goto out;
4352 }
4353
4354 bytes -= towrite;
4355 data += towrite;
4356 addr += towrite;
4357 }
4358out:
4359 return r;
4360}
4361EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
4362
4363static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
4364 gpa_t *gpa, struct x86_exception *exception,
4365 bool write)
4366{
4367 u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
4368 | (write ? PFERR_WRITE_MASK : 0);
4369
4370
4371
4372
4373
4374
4375 if (vcpu_match_mmio_gva(vcpu, gva)
4376 && !permission_fault(vcpu, vcpu->arch.walk_mmu,
4377 vcpu->arch.access, 0, access)) {
4378 *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
4379 (gva & (PAGE_SIZE - 1));
4380 trace_vcpu_match_mmio(gva, *gpa, write, false);
4381 return 1;
4382 }
4383
4384 *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
4385
4386 if (*gpa == UNMAPPED_GVA)
4387 return -1;
4388
4389
4390 if ((*gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
4391 return 1;
4392
4393 if (vcpu_match_mmio_gpa(vcpu, *gpa)) {
4394 trace_vcpu_match_mmio(gva, *gpa, write, true);
4395 return 1;
4396 }
4397
4398 return 0;
4399}
4400
4401int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
4402 const void *val, int bytes)
4403{
4404 int ret;
4405
4406 ret = kvm_vcpu_write_guest(vcpu, gpa, val, bytes);
4407 if (ret < 0)
4408 return 0;
4409 kvm_page_track_write(vcpu, gpa, val, bytes);
4410 return 1;
4411}
4412
4413struct read_write_emulator_ops {
4414 int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
4415 int bytes);
4416 int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
4417 void *val, int bytes);
4418 int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
4419 int bytes, void *val);
4420 int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
4421 void *val, int bytes);
4422 bool write;
4423};
4424
4425static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
4426{
4427 if (vcpu->mmio_read_completed) {
4428 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
4429 vcpu->mmio_fragments[0].gpa, *(u64 *)val);
4430 vcpu->mmio_read_completed = 0;
4431 return 1;
4432 }
4433
4434 return 0;
4435}
4436
4437static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
4438 void *val, int bytes)
4439{
4440 return !kvm_vcpu_read_guest(vcpu, gpa, val, bytes);
4441}
4442
4443static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
4444 void *val, int bytes)
4445{
4446 return emulator_write_phys(vcpu, gpa, val, bytes);
4447}
4448
4449static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
4450{
4451 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
4452 return vcpu_mmio_write(vcpu, gpa, bytes, val);
4453}
4454
4455static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
4456 void *val, int bytes)
4457{
4458 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
4459 return X86EMUL_IO_NEEDED;
4460}
4461
4462static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
4463 void *val, int bytes)
4464{
4465 struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
4466
4467 memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
4468 return X86EMUL_CONTINUE;
4469}
4470
4471static const struct read_write_emulator_ops read_emultor = {
4472 .read_write_prepare = read_prepare,
4473 .read_write_emulate = read_emulate,
4474 .read_write_mmio = vcpu_mmio_read,
4475 .read_write_exit_mmio = read_exit_mmio,
4476};
4477
4478static const struct read_write_emulator_ops write_emultor = {
4479 .read_write_emulate = write_emulate,
4480 .read_write_mmio = write_mmio,
4481 .read_write_exit_mmio = write_exit_mmio,
4482 .write = true,
4483};
4484
4485static int emulator_read_write_onepage(unsigned long addr, void *val,
4486 unsigned int bytes,
4487 struct x86_exception *exception,
4488 struct kvm_vcpu *vcpu,
4489 const struct read_write_emulator_ops *ops)
4490{
4491 gpa_t gpa;
4492 int handled, ret;
4493 bool write = ops->write;
4494 struct kvm_mmio_fragment *frag;
4495
4496 ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
4497
4498 if (ret < 0)
4499 return X86EMUL_PROPAGATE_FAULT;
4500
4501
4502 if (ret)
4503 goto mmio;
4504
4505 if (ops->read_write_emulate(vcpu, gpa, val, bytes))
4506 return X86EMUL_CONTINUE;
4507
4508mmio:
4509
4510
4511
4512 handled = ops->read_write_mmio(vcpu, gpa, bytes, val);
4513 if (handled == bytes)
4514 return X86EMUL_CONTINUE;
4515
4516 gpa += handled;
4517 bytes -= handled;
4518 val += handled;
4519
4520 WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS);
4521 frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
4522 frag->gpa = gpa;
4523 frag->data = val;
4524 frag->len = bytes;
4525 return X86EMUL_CONTINUE;
4526}
4527
4528static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
4529 unsigned long addr,
4530 void *val, unsigned int bytes,
4531 struct x86_exception *exception,
4532 const struct read_write_emulator_ops *ops)
4533{
4534 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4535 gpa_t gpa;
4536 int rc;
4537
4538 if (ops->read_write_prepare &&
4539 ops->read_write_prepare(vcpu, val, bytes))
4540 return X86EMUL_CONTINUE;
4541
4542 vcpu->mmio_nr_fragments = 0;
4543
4544
4545 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
4546 int now;
4547
4548 now = -addr & ~PAGE_MASK;
4549 rc = emulator_read_write_onepage(addr, val, now, exception,
4550 vcpu, ops);
4551
4552 if (rc != X86EMUL_CONTINUE)
4553 return rc;
4554 addr += now;
4555 if (ctxt->mode != X86EMUL_MODE_PROT64)
4556 addr = (u32)addr;
4557 val += now;
4558 bytes -= now;
4559 }
4560
4561 rc = emulator_read_write_onepage(addr, val, bytes, exception,
4562 vcpu, ops);
4563 if (rc != X86EMUL_CONTINUE)
4564 return rc;
4565
4566 if (!vcpu->mmio_nr_fragments)
4567 return rc;
4568
4569 gpa = vcpu->mmio_fragments[0].gpa;
4570
4571 vcpu->mmio_needed = 1;
4572 vcpu->mmio_cur_fragment = 0;
4573
4574 vcpu->run->mmio.len = min(8u, vcpu->mmio_fragments[0].len);
4575 vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
4576 vcpu->run->exit_reason = KVM_EXIT_MMIO;
4577 vcpu->run->mmio.phys_addr = gpa;
4578
4579 return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
4580}
4581
4582static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
4583 unsigned long addr,
4584 void *val,
4585 unsigned int bytes,
4586 struct x86_exception *exception)
4587{
4588 return emulator_read_write(ctxt, addr, val, bytes,
4589 exception, &read_emultor);
4590}
4591
4592static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
4593 unsigned long addr,
4594 const void *val,
4595 unsigned int bytes,
4596 struct x86_exception *exception)
4597{
4598 return emulator_read_write(ctxt, addr, (void *)val, bytes,
4599 exception, &write_emultor);
4600}
4601
4602#define CMPXCHG_TYPE(t, ptr, old, new) \
4603 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
4604
4605#ifdef CONFIG_X86_64
4606# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
4607#else
4608# define CMPXCHG64(ptr, old, new) \
4609 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
4610#endif
4611
4612static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
4613 unsigned long addr,
4614 const void *old,
4615 const void *new,
4616 unsigned int bytes,
4617 struct x86_exception *exception)
4618{
4619 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4620 gpa_t gpa;
4621 struct page *page;
4622 char *kaddr;
4623 bool exchanged;
4624
4625
4626 if (bytes > 8 || (bytes & (bytes - 1)))
4627 goto emul_write;
4628
4629 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
4630
4631 if (gpa == UNMAPPED_GVA ||
4632 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
4633 goto emul_write;
4634
4635 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
4636 goto emul_write;
4637
4638 page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
4639 if (is_error_page(page))
4640 goto emul_write;
4641
4642 kaddr = kmap_atomic(page);
4643 kaddr += offset_in_page(gpa);
4644 switch (bytes) {
4645 case 1:
4646 exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
4647 break;
4648 case 2:
4649 exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
4650 break;
4651 case 4:
4652 exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
4653 break;
4654 case 8:
4655 exchanged = CMPXCHG64(kaddr, old, new);
4656 break;
4657 default:
4658 BUG();
4659 }
4660 kunmap_atomic(kaddr);
4661 kvm_release_page_dirty(page);
4662
4663 if (!exchanged)
4664 return X86EMUL_CMPXCHG_FAILED;
4665
4666 kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
4667 kvm_page_track_write(vcpu, gpa, new, bytes);
4668
4669 return X86EMUL_CONTINUE;
4670
4671emul_write:
4672 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
4673
4674 return emulator_write_emulated(ctxt, addr, new, bytes, exception);
4675}
4676
4677static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
4678{
4679
4680 int r;
4681
4682 if (vcpu->arch.pio.in)
4683 r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
4684 vcpu->arch.pio.size, pd);
4685 else
4686 r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
4687 vcpu->arch.pio.port, vcpu->arch.pio.size,
4688 pd);
4689 return r;
4690}
4691
4692static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
4693 unsigned short port, void *val,
4694 unsigned int count, bool in)
4695{
4696 vcpu->arch.pio.port = port;
4697 vcpu->arch.pio.in = in;
4698 vcpu->arch.pio.count = count;
4699 vcpu->arch.pio.size = size;
4700
4701 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
4702 vcpu->arch.pio.count = 0;
4703 return 1;
4704 }
4705
4706 vcpu->run->exit_reason = KVM_EXIT_IO;
4707 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
4708 vcpu->run->io.size = size;
4709 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
4710 vcpu->run->io.count = count;
4711 vcpu->run->io.port = port;
4712
4713 return 0;
4714}
4715
4716static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
4717 int size, unsigned short port, void *val,
4718 unsigned int count)
4719{
4720 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4721 int ret;
4722
4723 if (vcpu->arch.pio.count)
4724 goto data_avail;
4725
4726 ret = emulator_pio_in_out(vcpu, size, port, val, count, true);
4727 if (ret) {
4728data_avail:
4729 memcpy(val, vcpu->arch.pio_data, size * count);
4730 trace_kvm_pio(KVM_PIO_IN, port, size, count, vcpu->arch.pio_data);
4731 vcpu->arch.pio.count = 0;
4732 return 1;
4733 }
4734
4735 return 0;
4736}
4737
4738static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
4739 int size, unsigned short port,
4740 const void *val, unsigned int count)
4741{
4742 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4743
4744 memcpy(vcpu->arch.pio_data, val, size * count);
4745 trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data);
4746 return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
4747}
4748
4749static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
4750{
4751 return kvm_x86_ops->get_segment_base(vcpu, seg);
4752}
4753
4754static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
4755{
4756 kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
4757}
4758
4759int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
4760{
4761 if (!need_emulate_wbinvd(vcpu))
4762 return X86EMUL_CONTINUE;
4763
4764 if (kvm_x86_ops->has_wbinvd_exit()) {
4765 int cpu = get_cpu();
4766
4767 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
4768 smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
4769 wbinvd_ipi, NULL, 1);
4770 put_cpu();
4771 cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
4772 } else
4773 wbinvd();
4774 return X86EMUL_CONTINUE;
4775}
4776
4777int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
4778{
4779 kvm_x86_ops->skip_emulated_instruction(vcpu);
4780 return kvm_emulate_wbinvd_noskip(vcpu);
4781}
4782EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
4783
4784
4785
4786static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
4787{
4788 kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
4789}
4790
4791static int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
4792 unsigned long *dest)
4793{
4794 return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
4795}
4796
4797static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
4798 unsigned long value)
4799{
4800
4801 return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
4802}
4803
4804static u64 mk_cr_64(u64 curr_cr, u32 new_val)
4805{
4806 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
4807}
4808
4809static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
4810{
4811 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4812 unsigned long value;
4813
4814 switch (cr) {
4815 case 0:
4816 value = kvm_read_cr0(vcpu);
4817 break;
4818 case 2:
4819 value = vcpu->arch.cr2;
4820 break;
4821 case 3:
4822 value = kvm_read_cr3(vcpu);
4823 break;
4824 case 4:
4825 value = kvm_read_cr4(vcpu);
4826 break;
4827 case 8:
4828 value = kvm_get_cr8(vcpu);
4829 break;
4830 default:
4831 kvm_err("%s: unexpected cr %u\n", __func__, cr);
4832 return 0;
4833 }
4834
4835 return value;
4836}
4837
4838static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
4839{
4840 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4841 int res = 0;
4842
4843 switch (cr) {
4844 case 0:
4845 res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
4846 break;
4847 case 2:
4848 vcpu->arch.cr2 = val;
4849 break;
4850 case 3:
4851 res = kvm_set_cr3(vcpu, val);
4852 break;
4853 case 4:
4854 res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
4855 break;
4856 case 8:
4857 res = kvm_set_cr8(vcpu, val);
4858 break;
4859 default:
4860 kvm_err("%s: unexpected cr %u\n", __func__, cr);
4861 res = -1;
4862 }
4863
4864 return res;
4865}
4866
4867static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
4868{
4869 return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
4870}
4871
4872static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4873{
4874 kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt);
4875}
4876
4877static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4878{
4879 kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt);
4880}
4881
4882static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4883{
4884 kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt);
4885}
4886
4887static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
4888{
4889 kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt);
4890}
4891
4892static unsigned long emulator_get_cached_segment_base(
4893 struct x86_emulate_ctxt *ctxt, int seg)
4894{
4895 return get_segment_base(emul_to_vcpu(ctxt), seg);
4896}
4897
4898static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
4899 struct desc_struct *desc, u32 *base3,
4900 int seg)
4901{
4902 struct kvm_segment var;
4903
4904 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
4905 *selector = var.selector;
4906
4907 if (var.unusable) {
4908 memset(desc, 0, sizeof(*desc));
4909 return false;
4910 }
4911
4912 if (var.g)
4913 var.limit >>= 12;
4914 set_desc_limit(desc, var.limit);
4915 set_desc_base(desc, (unsigned long)var.base);
4916#ifdef CONFIG_X86_64
4917 if (base3)
4918 *base3 = var.base >> 32;
4919#endif
4920 desc->type = var.type;
4921 desc->s = var.s;
4922 desc->dpl = var.dpl;
4923 desc->p = var.present;
4924 desc->avl = var.avl;
4925 desc->l = var.l;
4926 desc->d = var.db;
4927 desc->g = var.g;
4928
4929 return true;
4930}
4931
4932static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
4933 struct desc_struct *desc, u32 base3,
4934 int seg)
4935{
4936 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4937 struct kvm_segment var;
4938
4939 var.selector = selector;
4940 var.base = get_desc_base(desc);
4941#ifdef CONFIG_X86_64
4942 var.base |= ((u64)base3) << 32;
4943#endif
4944 var.limit = get_desc_limit(desc);
4945 if (desc->g)
4946 var.limit = (var.limit << 12) | 0xfff;
4947 var.type = desc->type;
4948 var.dpl = desc->dpl;
4949 var.db = desc->d;
4950 var.s = desc->s;
4951 var.l = desc->l;
4952 var.g = desc->g;
4953 var.avl = desc->avl;
4954 var.present = desc->p;
4955 var.unusable = !var.present;
4956 var.padding = 0;
4957
4958 kvm_set_segment(vcpu, &var, seg);
4959 return;
4960}
4961
4962static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
4963 u32 msr_index, u64 *pdata)
4964{
4965 struct msr_data msr;
4966 int r;
4967
4968 msr.index = msr_index;
4969 msr.host_initiated = false;
4970 r = kvm_get_msr(emul_to_vcpu(ctxt), &msr);
4971 if (r)
4972 return r;
4973
4974 *pdata = msr.data;
4975 return 0;
4976}
4977
4978static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
4979 u32 msr_index, u64 data)
4980{
4981 struct msr_data msr;
4982
4983 msr.data = data;
4984 msr.index = msr_index;
4985 msr.host_initiated = false;
4986 return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
4987}
4988
4989static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt)
4990{
4991 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4992
4993 return vcpu->arch.smbase;
4994}
4995
4996static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
4997{
4998 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4999
5000 vcpu->arch.smbase = smbase;
5001}
5002
5003static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
5004 u32 pmc)
5005{
5006 return kvm_pmu_is_valid_msr_idx(emul_to_vcpu(ctxt), pmc);
5007}
5008
5009static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
5010 u32 pmc, u64 *pdata)
5011{
5012 return kvm_pmu_rdpmc(emul_to_vcpu(ctxt), pmc, pdata);
5013}
5014
5015static void emulator_halt(struct x86_emulate_ctxt *ctxt)
5016{
5017 emul_to_vcpu(ctxt)->arch.halt_request = 1;
5018}
5019
5020static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
5021{
5022 preempt_disable();
5023 kvm_load_guest_fpu(emul_to_vcpu(ctxt));
5024
5025
5026
5027
5028 clts();
5029}
5030
5031static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
5032{
5033 preempt_enable();
5034}
5035
5036static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
5037 struct x86_instruction_info *info,
5038 enum x86_intercept_stage stage)
5039{
5040 return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
5041}
5042
5043static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
5044 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
5045{
5046 kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx);
5047}
5048
5049static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
5050{
5051 return kvm_register_read(emul_to_vcpu(ctxt), reg);
5052}
5053
5054static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
5055{
5056 kvm_register_write(emul_to_vcpu(ctxt), reg, val);
5057}
5058
5059static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked)
5060{
5061 kvm_x86_ops->set_nmi_mask(emul_to_vcpu(ctxt), masked);
5062}
5063
5064static const struct x86_emulate_ops emulate_ops = {
5065 .read_gpr = emulator_read_gpr,
5066 .write_gpr = emulator_write_gpr,
5067 .read_std = kvm_read_guest_virt_system,
5068 .write_std = kvm_write_guest_virt_system,
5069 .read_phys = kvm_read_guest_phys_system,
5070 .fetch = kvm_fetch_guest_virt,
5071 .read_emulated = emulator_read_emulated,
5072 .write_emulated = emulator_write_emulated,
5073 .cmpxchg_emulated = emulator_cmpxchg_emulated,
5074 .invlpg = emulator_invlpg,
5075 .pio_in_emulated = emulator_pio_in_emulated,
5076 .pio_out_emulated = emulator_pio_out_emulated,
5077 .get_segment = emulator_get_segment,
5078 .set_segment = emulator_set_segment,
5079 .get_cached_segment_base = emulator_get_cached_segment_base,
5080 .get_gdt = emulator_get_gdt,
5081 .get_idt = emulator_get_idt,
5082 .set_gdt = emulator_set_gdt,
5083 .set_idt = emulator_set_idt,
5084 .get_cr = emulator_get_cr,
5085 .set_cr = emulator_set_cr,
5086 .cpl = emulator_get_cpl,
5087 .get_dr = emulator_get_dr,
5088 .set_dr = emulator_set_dr,
5089 .get_smbase = emulator_get_smbase,
5090 .set_smbase = emulator_set_smbase,
5091 .set_msr = emulator_set_msr,
5092 .get_msr = emulator_get_msr,
5093 .check_pmc = emulator_check_pmc,
5094 .read_pmc = emulator_read_pmc,
5095 .halt = emulator_halt,
5096 .wbinvd = emulator_wbinvd,
5097 .fix_hypercall = emulator_fix_hypercall,
5098 .get_fpu = emulator_get_fpu,
5099 .put_fpu = emulator_put_fpu,
5100 .intercept = emulator_intercept,
5101 .get_cpuid = emulator_get_cpuid,
5102 .set_nmi_mask = emulator_set_nmi_mask,
5103};
5104
5105static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
5106{
5107 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
5108
5109
5110
5111
5112
5113
5114
5115 if (int_shadow & mask)
5116 mask = 0;
5117 if (unlikely(int_shadow || mask)) {
5118 kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
5119 if (!mask)
5120 kvm_make_request(KVM_REQ_EVENT, vcpu);
5121 }
5122}
5123
5124static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
5125{
5126 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
5127 if (ctxt->exception.vector == PF_VECTOR)
5128 return kvm_propagate_fault(vcpu, &ctxt->exception);
5129
5130 if (ctxt->exception.error_code_valid)
5131 kvm_queue_exception_e(vcpu, ctxt->exception.vector,
5132 ctxt->exception.error_code);
5133 else
5134 kvm_queue_exception(vcpu, ctxt->exception.vector);
5135 return false;
5136}
5137
5138static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
5139{
5140 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
5141 int cs_db, cs_l;
5142
5143 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
5144
5145 ctxt->eflags = kvm_get_rflags(vcpu);
5146 ctxt->eip = kvm_rip_read(vcpu);
5147 ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
5148 (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
5149 (cs_l && is_long_mode(vcpu)) ? X86EMUL_MODE_PROT64 :
5150 cs_db ? X86EMUL_MODE_PROT32 :
5151 X86EMUL_MODE_PROT16;
5152 BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK);
5153 BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
5154 BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
5155 ctxt->emul_flags = vcpu->arch.hflags;
5156
5157 init_decode_cache(ctxt);
5158 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
5159}
5160
5161int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
5162{
5163 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
5164 int ret;
5165
5166 init_emulate_ctxt(vcpu);
5167
5168 ctxt->op_bytes = 2;
5169 ctxt->ad_bytes = 2;
5170 ctxt->_eip = ctxt->eip + inc_eip;
5171 ret = emulate_int_real(ctxt, irq);
5172
5173 if (ret != X86EMUL_CONTINUE)
5174 return EMULATE_FAIL;
5175
5176 ctxt->eip = ctxt->_eip;
5177 kvm_rip_write(vcpu, ctxt->eip);
5178 kvm_set_rflags(vcpu, ctxt->eflags);
5179
5180 if (irq == NMI_VECTOR)
5181 vcpu->arch.nmi_pending = 0;
5182 else
5183 vcpu->arch.interrupt.pending = false;
5184
5185 return EMULATE_DONE;
5186}
5187EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
5188
5189static int handle_emulation_failure(struct kvm_vcpu *vcpu)
5190{
5191 int r = EMULATE_DONE;
5192
5193 ++vcpu->stat.insn_emulation_fail;
5194 trace_kvm_emulate_insn_failed(vcpu);
5195 if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
5196 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
5197 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
5198 vcpu->run->internal.ndata = 0;
5199 r = EMULATE_FAIL;
5200 }
5201 kvm_queue_exception(vcpu, UD_VECTOR);
5202
5203 return r;
5204}
5205
5206static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
5207 bool write_fault_to_shadow_pgtable,
5208 int emulation_type)
5209{
5210 gpa_t gpa = cr2;
5211 kvm_pfn_t pfn;
5212
5213 if (emulation_type & EMULTYPE_NO_REEXECUTE)
5214 return false;
5215
5216 if (!vcpu->arch.mmu.direct_map) {
5217
5218
5219
5220
5221 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
5222
5223
5224
5225
5226
5227 if (gpa == UNMAPPED_GVA)
5228 return true;
5229 }
5230
5231
5232
5233
5234
5235
5236
5237 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
5238
5239
5240
5241
5242
5243 if (is_error_noslot_pfn(pfn))
5244 return false;
5245
5246 kvm_release_pfn_clean(pfn);
5247
5248
5249 if (vcpu->arch.mmu.direct_map) {
5250 unsigned int indirect_shadow_pages;
5251
5252 spin_lock(&vcpu->kvm->mmu_lock);
5253 indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
5254 spin_unlock(&vcpu->kvm->mmu_lock);
5255
5256 if (indirect_shadow_pages)
5257 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
5258
5259 return true;
5260 }
5261
5262
5263
5264
5265
5266
5267 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
5268
5269
5270
5271
5272
5273
5274 return !write_fault_to_shadow_pgtable;
5275}
5276
5277static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
5278 unsigned long cr2, int emulation_type)
5279{
5280 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5281 unsigned long last_retry_eip, last_retry_addr, gpa = cr2;
5282
5283 last_retry_eip = vcpu->arch.last_retry_eip;
5284 last_retry_addr = vcpu->arch.last_retry_addr;
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299 vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
5300
5301 if (!(emulation_type & EMULTYPE_RETRY))
5302 return false;
5303
5304 if (x86_page_table_writing_insn(ctxt))
5305 return false;
5306
5307 if (ctxt->eip == last_retry_eip && last_retry_addr == cr2)
5308 return false;
5309
5310 vcpu->arch.last_retry_eip = ctxt->eip;
5311 vcpu->arch.last_retry_addr = cr2;
5312
5313 if (!vcpu->arch.mmu.direct_map)
5314 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
5315
5316 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
5317
5318 return true;
5319}
5320
5321static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
5322static int complete_emulated_pio(struct kvm_vcpu *vcpu);
5323
5324static void kvm_smm_changed(struct kvm_vcpu *vcpu)
5325{
5326 if (!(vcpu->arch.hflags & HF_SMM_MASK)) {
5327
5328 trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, false);
5329
5330
5331 kvm_make_request(KVM_REQ_EVENT, vcpu);
5332 }
5333
5334 kvm_mmu_reset_context(vcpu);
5335}
5336
5337static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags)
5338{
5339 unsigned changed = vcpu->arch.hflags ^ emul_flags;
5340
5341 vcpu->arch.hflags = emul_flags;
5342
5343 if (changed & HF_SMM_MASK)
5344 kvm_smm_changed(vcpu);
5345}
5346
5347static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
5348 unsigned long *db)
5349{
5350 u32 dr6 = 0;
5351 int i;
5352 u32 enable, rwlen;
5353
5354 enable = dr7;
5355 rwlen = dr7 >> 16;
5356 for (i = 0; i < 4; i++, enable >>= 2, rwlen >>= 4)
5357 if ((enable & 3) && (rwlen & 15) == type && db[i] == addr)
5358 dr6 |= (1 << i);
5359 return dr6;
5360}
5361
5362static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r)
5363{
5364 struct kvm_run *kvm_run = vcpu->run;
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374 if (unlikely(rflags & X86_EFLAGS_TF)) {
5375 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
5376 kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 |
5377 DR6_RTM;
5378 kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
5379 kvm_run->debug.arch.exception = DB_VECTOR;
5380 kvm_run->exit_reason = KVM_EXIT_DEBUG;
5381 *r = EMULATE_USER_EXIT;
5382 } else {
5383 vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF;
5384
5385
5386
5387
5388
5389 vcpu->arch.dr6 &= ~15;
5390 vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
5391 kvm_queue_exception(vcpu, DB_VECTOR);
5392 }
5393 }
5394}
5395
5396static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
5397{
5398 if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
5399 (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
5400 struct kvm_run *kvm_run = vcpu->run;
5401 unsigned long eip = kvm_get_linear_rip(vcpu);
5402 u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
5403 vcpu->arch.guest_debug_dr7,
5404 vcpu->arch.eff_db);
5405
5406 if (dr6 != 0) {
5407 kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM;
5408 kvm_run->debug.arch.pc = eip;
5409 kvm_run->debug.arch.exception = DB_VECTOR;
5410 kvm_run->exit_reason = KVM_EXIT_DEBUG;
5411 *r = EMULATE_USER_EXIT;
5412 return true;
5413 }
5414 }
5415
5416 if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) &&
5417 !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) {
5418 unsigned long eip = kvm_get_linear_rip(vcpu);
5419 u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
5420 vcpu->arch.dr7,
5421 vcpu->arch.db);
5422
5423 if (dr6 != 0) {
5424 vcpu->arch.dr6 &= ~15;
5425 vcpu->arch.dr6 |= dr6 | DR6_RTM;
5426 kvm_queue_exception(vcpu, DB_VECTOR);
5427 *r = EMULATE_DONE;
5428 return true;
5429 }
5430 }
5431
5432 return false;
5433}
5434
5435int x86_emulate_instruction(struct kvm_vcpu *vcpu,
5436 unsigned long cr2,
5437 int emulation_type,
5438 void *insn,
5439 int insn_len)
5440{
5441 int r;
5442 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
5443 bool writeback = true;
5444 bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
5445
5446
5447
5448
5449
5450 vcpu->arch.write_fault_to_shadow_pgtable = false;
5451 kvm_clear_exception_queue(vcpu);
5452
5453 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
5454 init_emulate_ctxt(vcpu);
5455
5456
5457
5458
5459
5460
5461
5462 if (kvm_vcpu_check_breakpoint(vcpu, &r))
5463 return r;
5464
5465 ctxt->interruptibility = 0;
5466 ctxt->have_exception = false;
5467 ctxt->exception.vector = -1;
5468 ctxt->perm_ok = false;
5469
5470 ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
5471
5472 r = x86_decode_insn(ctxt, insn, insn_len);
5473
5474 trace_kvm_emulate_insn_start(vcpu);
5475 ++vcpu->stat.insn_emulation;
5476 if (r != EMULATION_OK) {
5477 if (emulation_type & EMULTYPE_TRAP_UD)
5478 return EMULATE_FAIL;
5479 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
5480 emulation_type))
5481 return EMULATE_DONE;
5482 if (emulation_type & EMULTYPE_SKIP)
5483 return EMULATE_FAIL;
5484 return handle_emulation_failure(vcpu);
5485 }
5486 }
5487
5488 if (emulation_type & EMULTYPE_SKIP) {
5489 kvm_rip_write(vcpu, ctxt->_eip);
5490 if (ctxt->eflags & X86_EFLAGS_RF)
5491 kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
5492 return EMULATE_DONE;
5493 }
5494
5495 if (retry_instruction(ctxt, cr2, emulation_type))
5496 return EMULATE_DONE;
5497
5498
5499
5500 if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
5501 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
5502 emulator_invalidate_register_cache(ctxt);
5503 }
5504
5505restart:
5506 r = x86_emulate_insn(ctxt);
5507
5508 if (r == EMULATION_INTERCEPTED)
5509 return EMULATE_DONE;
5510
5511 if (r == EMULATION_FAILED) {
5512 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
5513 emulation_type))
5514 return EMULATE_DONE;
5515
5516 return handle_emulation_failure(vcpu);
5517 }
5518
5519 if (ctxt->have_exception) {
5520 r = EMULATE_DONE;
5521 if (inject_emulated_exception(vcpu))
5522 return r;
5523 } else if (vcpu->arch.pio.count) {
5524 if (!vcpu->arch.pio.in) {
5525
5526 vcpu->arch.pio.count = 0;
5527 } else {
5528 writeback = false;
5529 vcpu->arch.complete_userspace_io = complete_emulated_pio;
5530 }
5531 r = EMULATE_USER_EXIT;
5532 } else if (vcpu->mmio_needed) {
5533 if (!vcpu->mmio_is_write)
5534 writeback = false;
5535 r = EMULATE_USER_EXIT;
5536 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
5537 } else if (r == EMULATION_RESTART)
5538 goto restart;
5539 else
5540 r = EMULATE_DONE;
5541
5542 if (writeback) {
5543 unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
5544 toggle_interruptibility(vcpu, ctxt->interruptibility);
5545 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
5546 if (vcpu->arch.hflags != ctxt->emul_flags)
5547 kvm_set_hflags(vcpu, ctxt->emul_flags);
5548 kvm_rip_write(vcpu, ctxt->eip);
5549 if (r == EMULATE_DONE)
5550 kvm_vcpu_check_singlestep(vcpu, rflags, &r);
5551 if (!ctxt->have_exception ||
5552 exception_type(ctxt->exception.vector) == EXCPT_TRAP)
5553 __kvm_set_rflags(vcpu, ctxt->eflags);
5554
5555
5556
5557
5558
5559
5560
5561 if (unlikely((ctxt->eflags & ~rflags) & X86_EFLAGS_IF))
5562 kvm_make_request(KVM_REQ_EVENT, vcpu);
5563 } else
5564 vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
5565
5566 return r;
5567}
5568EXPORT_SYMBOL_GPL(x86_emulate_instruction);
5569
5570int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
5571{
5572 unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
5573 int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
5574 size, port, &val, 1);
5575
5576 vcpu->arch.pio.count = 0;
5577 return ret;
5578}
5579EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
5580
5581static int kvmclock_cpu_down_prep(unsigned int cpu)
5582{
5583 __this_cpu_write(cpu_tsc_khz, 0);
5584 return 0;
5585}
5586
5587static void tsc_khz_changed(void *data)
5588{
5589 struct cpufreq_freqs *freq = data;
5590 unsigned long khz = 0;
5591
5592 if (data)
5593 khz = freq->new;
5594 else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
5595 khz = cpufreq_quick_get(raw_smp_processor_id());
5596 if (!khz)
5597 khz = tsc_khz;
5598 __this_cpu_write(cpu_tsc_khz, khz);
5599}
5600
5601static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
5602 void *data)
5603{
5604 struct cpufreq_freqs *freq = data;
5605 struct kvm *kvm;
5606 struct kvm_vcpu *vcpu;
5607 int i, send_ipi = 0;
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
5649 return 0;
5650 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
5651 return 0;
5652
5653 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
5654
5655 spin_lock(&kvm_lock);
5656 list_for_each_entry(kvm, &vm_list, vm_list) {
5657 kvm_for_each_vcpu(i, vcpu, kvm) {
5658 if (vcpu->cpu != freq->cpu)
5659 continue;
5660 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
5661 if (vcpu->cpu != smp_processor_id())
5662 send_ipi = 1;
5663 }
5664 }
5665 spin_unlock(&kvm_lock);
5666
5667 if (freq->old < freq->new && send_ipi) {
5668
5669
5670
5671
5672
5673
5674
5675
5676
5677
5678
5679
5680 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
5681 }
5682 return 0;
5683}
5684
5685static struct notifier_block kvmclock_cpufreq_notifier_block = {
5686 .notifier_call = kvmclock_cpufreq_notifier
5687};
5688
5689static int kvmclock_cpu_online(unsigned int cpu)
5690{
5691 tsc_khz_changed(NULL);
5692 return 0;
5693}
5694
5695static void kvm_timer_init(void)
5696{
5697 int cpu;
5698
5699 max_tsc_khz = tsc_khz;
5700
5701 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
5702#ifdef CONFIG_CPU_FREQ
5703 struct cpufreq_policy policy;
5704 memset(&policy, 0, sizeof(policy));
5705 cpu = get_cpu();
5706 cpufreq_get_policy(&policy, cpu);
5707 if (policy.cpuinfo.max_freq)
5708 max_tsc_khz = policy.cpuinfo.max_freq;
5709 put_cpu();
5710#endif
5711 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
5712 CPUFREQ_TRANSITION_NOTIFIER);
5713 }
5714 pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
5715
5716 cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "AP_X86_KVM_CLK_ONLINE",
5717 kvmclock_cpu_online, kvmclock_cpu_down_prep);
5718}
5719
5720static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
5721
5722int kvm_is_in_guest(void)
5723{
5724 return __this_cpu_read(current_vcpu) != NULL;
5725}
5726
5727static int kvm_is_user_mode(void)
5728{
5729 int user_mode = 3;
5730
5731 if (__this_cpu_read(current_vcpu))
5732 user_mode = kvm_x86_ops->get_cpl(__this_cpu_read(current_vcpu));
5733
5734 return user_mode != 0;
5735}
5736
5737static unsigned long kvm_get_guest_ip(void)
5738{
5739 unsigned long ip = 0;
5740
5741 if (__this_cpu_read(current_vcpu))
5742 ip = kvm_rip_read(__this_cpu_read(current_vcpu));
5743
5744 return ip;
5745}
5746
5747static struct perf_guest_info_callbacks kvm_guest_cbs = {
5748 .is_in_guest = kvm_is_in_guest,
5749 .is_user_mode = kvm_is_user_mode,
5750 .get_guest_ip = kvm_get_guest_ip,
5751};
5752
5753void kvm_before_handle_nmi(struct kvm_vcpu *vcpu)
5754{
5755 __this_cpu_write(current_vcpu, vcpu);
5756}
5757EXPORT_SYMBOL_GPL(kvm_before_handle_nmi);
5758
5759void kvm_after_handle_nmi(struct kvm_vcpu *vcpu)
5760{
5761 __this_cpu_write(current_vcpu, NULL);
5762}
5763EXPORT_SYMBOL_GPL(kvm_after_handle_nmi);
5764
5765static void kvm_set_mmio_spte_mask(void)
5766{
5767 u64 mask;
5768 int maxphyaddr = boot_cpu_data.x86_phys_bits;
5769
5770
5771
5772
5773
5774
5775 mask = rsvd_bits(maxphyaddr, 51);
5776
5777
5778 mask |= 0x3ull << 62;
5779
5780
5781 mask |= 1ull;
5782
5783#ifdef CONFIG_X86_64
5784
5785
5786
5787
5788 if (maxphyaddr == 52)
5789 mask &= ~1ull;
5790#endif
5791
5792 kvm_mmu_set_mmio_spte_mask(mask);
5793}
5794
5795#ifdef CONFIG_X86_64
5796static void pvclock_gtod_update_fn(struct work_struct *work)
5797{
5798 struct kvm *kvm;
5799
5800 struct kvm_vcpu *vcpu;
5801 int i;
5802
5803 spin_lock(&kvm_lock);
5804 list_for_each_entry(kvm, &vm_list, vm_list)
5805 kvm_for_each_vcpu(i, vcpu, kvm)
5806 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
5807 atomic_set(&kvm_guest_has_master_clock, 0);
5808 spin_unlock(&kvm_lock);
5809}
5810
5811static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
5812
5813
5814
5815
5816static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
5817 void *priv)
5818{
5819 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
5820 struct timekeeper *tk = priv;
5821
5822 update_pvclock_gtod(tk);
5823
5824
5825
5826
5827 if (gtod->clock.vclock_mode != VCLOCK_TSC &&
5828 atomic_read(&kvm_guest_has_master_clock) != 0)
5829 queue_work(system_long_wq, &pvclock_gtod_work);
5830
5831 return 0;
5832}
5833
5834static struct notifier_block pvclock_gtod_notifier = {
5835 .notifier_call = pvclock_gtod_notify,
5836};
5837#endif
5838
5839int kvm_arch_init(void *opaque)
5840{
5841 int r;
5842 struct kvm_x86_ops *ops = opaque;
5843
5844 if (kvm_x86_ops) {
5845 printk(KERN_ERR "kvm: already loaded the other module\n");
5846 r = -EEXIST;
5847 goto out;
5848 }
5849
5850 if (!ops->cpu_has_kvm_support()) {
5851 printk(KERN_ERR "kvm: no hardware support\n");
5852 r = -EOPNOTSUPP;
5853 goto out;
5854 }
5855 if (ops->disabled_by_bios()) {
5856 printk(KERN_ERR "kvm: disabled by bios\n");
5857 r = -EOPNOTSUPP;
5858 goto out;
5859 }
5860
5861 r = -ENOMEM;
5862 shared_msrs = alloc_percpu(struct kvm_shared_msrs);
5863 if (!shared_msrs) {
5864 printk(KERN_ERR "kvm: failed to allocate percpu kvm_shared_msrs\n");
5865 goto out;
5866 }
5867
5868 r = kvm_mmu_module_init();
5869 if (r)
5870 goto out_free_percpu;
5871
5872 kvm_set_mmio_spte_mask();
5873
5874 kvm_x86_ops = ops;
5875
5876 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
5877 PT_DIRTY_MASK, PT64_NX_MASK, 0,
5878 PT_PRESENT_MASK);
5879 kvm_timer_init();
5880
5881 perf_register_guest_info_callbacks(&kvm_guest_cbs);
5882
5883 if (boot_cpu_has(X86_FEATURE_XSAVE))
5884 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
5885
5886 kvm_lapic_init();
5887#ifdef CONFIG_X86_64
5888 pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
5889#endif
5890
5891 return 0;
5892
5893out_free_percpu:
5894 free_percpu(shared_msrs);
5895out:
5896 return r;
5897}
5898
5899void kvm_arch_exit(void)
5900{
5901 perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
5902
5903 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
5904 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
5905 CPUFREQ_TRANSITION_NOTIFIER);
5906 cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
5907#ifdef CONFIG_X86_64
5908 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
5909#endif
5910 kvm_x86_ops = NULL;
5911 kvm_mmu_module_exit();
5912 free_percpu(shared_msrs);
5913}
5914
5915int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
5916{
5917 ++vcpu->stat.halt_exits;
5918 if (lapic_in_kernel(vcpu)) {
5919 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
5920 return 1;
5921 } else {
5922 vcpu->run->exit_reason = KVM_EXIT_HLT;
5923 return 0;
5924 }
5925}
5926EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
5927
5928int kvm_emulate_halt(struct kvm_vcpu *vcpu)
5929{
5930 kvm_x86_ops->skip_emulated_instruction(vcpu);
5931 return kvm_vcpu_halt(vcpu);
5932}
5933EXPORT_SYMBOL_GPL(kvm_emulate_halt);
5934
5935
5936
5937
5938
5939
5940static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
5941{
5942 struct kvm_lapic_irq lapic_irq;
5943
5944 lapic_irq.shorthand = 0;
5945 lapic_irq.dest_mode = 0;
5946 lapic_irq.dest_id = apicid;
5947 lapic_irq.msi_redir_hint = false;
5948
5949 lapic_irq.delivery_mode = APIC_DM_REMRD;
5950 kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
5951}
5952
5953void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
5954{
5955 vcpu->arch.apicv_active = false;
5956 kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
5957}
5958
5959int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
5960{
5961 unsigned long nr, a0, a1, a2, a3, ret;
5962 int op_64_bit, r = 1;
5963
5964 kvm_x86_ops->skip_emulated_instruction(vcpu);
5965
5966 if (kvm_hv_hypercall_enabled(vcpu->kvm))
5967 return kvm_hv_hypercall(vcpu);
5968
5969 nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
5970 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
5971 a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
5972 a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
5973 a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
5974
5975 trace_kvm_hypercall(nr, a0, a1, a2, a3);
5976
5977 op_64_bit = is_64_bit_mode(vcpu);
5978 if (!op_64_bit) {
5979 nr &= 0xFFFFFFFF;
5980 a0 &= 0xFFFFFFFF;
5981 a1 &= 0xFFFFFFFF;
5982 a2 &= 0xFFFFFFFF;
5983 a3 &= 0xFFFFFFFF;
5984 }
5985
5986 if (kvm_x86_ops->get_cpl(vcpu) != 0) {
5987 ret = -KVM_EPERM;
5988 goto out;
5989 }
5990
5991 switch (nr) {
5992 case KVM_HC_VAPIC_POLL_IRQ:
5993 ret = 0;
5994 break;
5995 case KVM_HC_KICK_CPU:
5996 kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
5997 ret = 0;
5998 break;
5999 default:
6000 ret = -KVM_ENOSYS;
6001 break;
6002 }
6003out:
6004 if (!op_64_bit)
6005 ret = (u32)ret;
6006 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
6007 ++vcpu->stat.hypercalls;
6008 return r;
6009}
6010EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
6011
6012static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
6013{
6014 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6015 char instruction[3];
6016 unsigned long rip = kvm_rip_read(vcpu);
6017
6018 kvm_x86_ops->patch_hypercall(vcpu, instruction);
6019
6020 return emulator_write_emulated(ctxt, rip, instruction, 3, NULL);
6021}
6022
6023static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
6024{
6025 return vcpu->run->request_interrupt_window &&
6026 likely(!pic_in_kernel(vcpu->kvm));
6027}
6028
6029static void post_kvm_run_save(struct kvm_vcpu *vcpu)
6030{
6031 struct kvm_run *kvm_run = vcpu->run;
6032
6033 kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
6034 kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
6035 kvm_run->cr8 = kvm_get_cr8(vcpu);
6036 kvm_run->apic_base = kvm_get_apic_base(vcpu);
6037 kvm_run->ready_for_interrupt_injection =
6038 pic_in_kernel(vcpu->kvm) ||
6039 kvm_vcpu_ready_for_interrupt_injection(vcpu);
6040}
6041
6042static void update_cr8_intercept(struct kvm_vcpu *vcpu)
6043{
6044 int max_irr, tpr;
6045
6046 if (!kvm_x86_ops->update_cr8_intercept)
6047 return;
6048
6049 if (!lapic_in_kernel(vcpu))
6050 return;
6051
6052 if (vcpu->arch.apicv_active)
6053 return;
6054
6055 if (!vcpu->arch.apic->vapic_addr)
6056 max_irr = kvm_lapic_find_highest_irr(vcpu);
6057 else
6058 max_irr = -1;
6059
6060 if (max_irr != -1)
6061 max_irr >>= 4;
6062
6063 tpr = kvm_lapic_get_cr8(vcpu);
6064
6065 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
6066}
6067
6068static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
6069{
6070 int r;
6071
6072
6073 if (vcpu->arch.exception.pending) {
6074 trace_kvm_inj_exception(vcpu->arch.exception.nr,
6075 vcpu->arch.exception.has_error_code,
6076 vcpu->arch.exception.error_code);
6077
6078 if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
6079 __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
6080 X86_EFLAGS_RF);
6081
6082 if (vcpu->arch.exception.nr == DB_VECTOR &&
6083 (vcpu->arch.dr7 & DR7_GD)) {
6084 vcpu->arch.dr7 &= ~DR7_GD;
6085 kvm_update_dr7(vcpu);
6086 }
6087
6088 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
6089 vcpu->arch.exception.has_error_code,
6090 vcpu->arch.exception.error_code,
6091 vcpu->arch.exception.reinject);
6092 return 0;
6093 }
6094
6095 if (vcpu->arch.nmi_injected) {
6096 kvm_x86_ops->set_nmi(vcpu);
6097 return 0;
6098 }
6099
6100 if (vcpu->arch.interrupt.pending) {
6101 kvm_x86_ops->set_irq(vcpu);
6102 return 0;
6103 }
6104
6105 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
6106 r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
6107 if (r != 0)
6108 return r;
6109 }
6110
6111
6112 if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
6113 vcpu->arch.smi_pending = false;
6114 enter_smm(vcpu);
6115 } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
6116 --vcpu->arch.nmi_pending;
6117 vcpu->arch.nmi_injected = true;
6118 kvm_x86_ops->set_nmi(vcpu);
6119 } else if (kvm_cpu_has_injectable_intr(vcpu)) {
6120
6121
6122
6123
6124
6125
6126
6127 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
6128 r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
6129 if (r != 0)
6130 return r;
6131 }
6132 if (kvm_x86_ops->interrupt_allowed(vcpu)) {
6133 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
6134 false);
6135 kvm_x86_ops->set_irq(vcpu);
6136 }
6137 }
6138
6139 return 0;
6140}
6141
6142static void process_nmi(struct kvm_vcpu *vcpu)
6143{
6144 unsigned limit = 2;
6145
6146
6147
6148
6149
6150
6151 if (kvm_x86_ops->get_nmi_mask(vcpu) || vcpu->arch.nmi_injected)
6152 limit = 1;
6153
6154 vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
6155 vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
6156 kvm_make_request(KVM_REQ_EVENT, vcpu);
6157}
6158
6159#define put_smstate(type, buf, offset, val) \
6160 *(type *)((buf) + (offset) - 0x7e00) = val
6161
6162static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
6163{
6164 u32 flags = 0;
6165 flags |= seg->g << 23;
6166 flags |= seg->db << 22;
6167 flags |= seg->l << 21;
6168 flags |= seg->avl << 20;
6169 flags |= seg->present << 15;
6170 flags |= seg->dpl << 13;
6171 flags |= seg->s << 12;
6172 flags |= seg->type << 8;
6173 return flags;
6174}
6175
6176static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
6177{
6178 struct kvm_segment seg;
6179 int offset;
6180
6181 kvm_get_segment(vcpu, &seg, n);
6182 put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector);
6183
6184 if (n < 3)
6185 offset = 0x7f84 + n * 12;
6186 else
6187 offset = 0x7f2c + (n - 3) * 12;
6188
6189 put_smstate(u32, buf, offset + 8, seg.base);
6190 put_smstate(u32, buf, offset + 4, seg.limit);
6191 put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg));
6192}
6193
6194#ifdef CONFIG_X86_64
6195static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
6196{
6197 struct kvm_segment seg;
6198 int offset;
6199 u16 flags;
6200
6201 kvm_get_segment(vcpu, &seg, n);
6202 offset = 0x7e00 + n * 16;
6203
6204 flags = enter_smm_get_segment_flags(&seg) >> 8;
6205 put_smstate(u16, buf, offset, seg.selector);
6206 put_smstate(u16, buf, offset + 2, flags);
6207 put_smstate(u32, buf, offset + 4, seg.limit);
6208 put_smstate(u64, buf, offset + 8, seg.base);
6209}
6210#endif
6211
6212static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
6213{
6214 struct desc_ptr dt;
6215 struct kvm_segment seg;
6216 unsigned long val;
6217 int i;
6218
6219 put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
6220 put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
6221 put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
6222 put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
6223
6224 for (i = 0; i < 8; i++)
6225 put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read(vcpu, i));
6226
6227 kvm_get_dr(vcpu, 6, &val);
6228 put_smstate(u32, buf, 0x7fcc, (u32)val);
6229 kvm_get_dr(vcpu, 7, &val);
6230 put_smstate(u32, buf, 0x7fc8, (u32)val);
6231
6232 kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
6233 put_smstate(u32, buf, 0x7fc4, seg.selector);
6234 put_smstate(u32, buf, 0x7f64, seg.base);
6235 put_smstate(u32, buf, 0x7f60, seg.limit);
6236 put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
6237
6238 kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
6239 put_smstate(u32, buf, 0x7fc0, seg.selector);
6240 put_smstate(u32, buf, 0x7f80, seg.base);
6241 put_smstate(u32, buf, 0x7f7c, seg.limit);
6242 put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
6243
6244 kvm_x86_ops->get_gdt(vcpu, &dt);
6245 put_smstate(u32, buf, 0x7f74, dt.address);
6246 put_smstate(u32, buf, 0x7f70, dt.size);
6247
6248 kvm_x86_ops->get_idt(vcpu, &dt);
6249 put_smstate(u32, buf, 0x7f58, dt.address);
6250 put_smstate(u32, buf, 0x7f54, dt.size);
6251
6252 for (i = 0; i < 6; i++)
6253 enter_smm_save_seg_32(vcpu, buf, i);
6254
6255 put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
6256
6257
6258 put_smstate(u32, buf, 0x7efc, 0x00020000);
6259 put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
6260}
6261
6262static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
6263{
6264#ifdef CONFIG_X86_64
6265 struct desc_ptr dt;
6266 struct kvm_segment seg;
6267 unsigned long val;
6268 int i;
6269
6270 for (i = 0; i < 16; i++)
6271 put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read(vcpu, i));
6272
6273 put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
6274 put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
6275
6276 kvm_get_dr(vcpu, 6, &val);
6277 put_smstate(u64, buf, 0x7f68, val);
6278 kvm_get_dr(vcpu, 7, &val);
6279 put_smstate(u64, buf, 0x7f60, val);
6280
6281 put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
6282 put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
6283 put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
6284
6285 put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase);
6286
6287
6288 put_smstate(u32, buf, 0x7efc, 0x00020064);
6289
6290 put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer);
6291
6292 kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
6293 put_smstate(u16, buf, 0x7e90, seg.selector);
6294 put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
6295 put_smstate(u32, buf, 0x7e94, seg.limit);
6296 put_smstate(u64, buf, 0x7e98, seg.base);
6297
6298 kvm_x86_ops->get_idt(vcpu, &dt);
6299 put_smstate(u32, buf, 0x7e84, dt.size);
6300 put_smstate(u64, buf, 0x7e88, dt.address);
6301
6302 kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
6303 put_smstate(u16, buf, 0x7e70, seg.selector);
6304 put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
6305 put_smstate(u32, buf, 0x7e74, seg.limit);
6306 put_smstate(u64, buf, 0x7e78, seg.base);
6307
6308 kvm_x86_ops->get_gdt(vcpu, &dt);
6309 put_smstate(u32, buf, 0x7e64, dt.size);
6310 put_smstate(u64, buf, 0x7e68, dt.address);
6311
6312 for (i = 0; i < 6; i++)
6313 enter_smm_save_seg_64(vcpu, buf, i);
6314#else
6315 WARN_ON_ONCE(1);
6316#endif
6317}
6318
6319static void enter_smm(struct kvm_vcpu *vcpu)
6320{
6321 struct kvm_segment cs, ds;
6322 struct desc_ptr dt;
6323 char buf[512];
6324 u32 cr0;
6325
6326 trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
6327 vcpu->arch.hflags |= HF_SMM_MASK;
6328 memset(buf, 0, 512);
6329 if (guest_cpuid_has_longmode(vcpu))
6330 enter_smm_save_state_64(vcpu, buf);
6331 else
6332 enter_smm_save_state_32(vcpu, buf);
6333
6334 kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
6335
6336 if (kvm_x86_ops->get_nmi_mask(vcpu))
6337 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
6338 else
6339 kvm_x86_ops->set_nmi_mask(vcpu, true);
6340
6341 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
6342 kvm_rip_write(vcpu, 0x8000);
6343
6344 cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
6345 kvm_x86_ops->set_cr0(vcpu, cr0);
6346 vcpu->arch.cr0 = cr0;
6347
6348 kvm_x86_ops->set_cr4(vcpu, 0);
6349
6350
6351 dt.address = dt.size = 0;
6352 kvm_x86_ops->set_idt(vcpu, &dt);
6353
6354 __kvm_set_dr(vcpu, 7, DR7_FIXED_1);
6355
6356 cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
6357 cs.base = vcpu->arch.smbase;
6358
6359 ds.selector = 0;
6360 ds.base = 0;
6361
6362 cs.limit = ds.limit = 0xffffffff;
6363 cs.type = ds.type = 0x3;
6364 cs.dpl = ds.dpl = 0;
6365 cs.db = ds.db = 0;
6366 cs.s = ds.s = 1;
6367 cs.l = ds.l = 0;
6368 cs.g = ds.g = 1;
6369 cs.avl = ds.avl = 0;
6370 cs.present = ds.present = 1;
6371 cs.unusable = ds.unusable = 0;
6372 cs.padding = ds.padding = 0;
6373
6374 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
6375 kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
6376 kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
6377 kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
6378 kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
6379 kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
6380
6381 if (guest_cpuid_has_longmode(vcpu))
6382 kvm_x86_ops->set_efer(vcpu, 0);
6383
6384 kvm_update_cpuid(vcpu);
6385 kvm_mmu_reset_context(vcpu);
6386}
6387
6388static void process_smi(struct kvm_vcpu *vcpu)
6389{
6390 vcpu->arch.smi_pending = true;
6391 kvm_make_request(KVM_REQ_EVENT, vcpu);
6392}
6393
6394void kvm_make_scan_ioapic_request(struct kvm *kvm)
6395{
6396 kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
6397}
6398
6399static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
6400{
6401 u64 eoi_exit_bitmap[4];
6402
6403 if (!kvm_apic_hw_enabled(vcpu->arch.apic))
6404 return;
6405
6406 bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256);
6407
6408 if (irqchip_split(vcpu->kvm))
6409 kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
6410 else {
6411 if (vcpu->arch.apicv_active)
6412 kvm_x86_ops->sync_pir_to_irr(vcpu);
6413 kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
6414 }
6415 bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors,
6416 vcpu_to_synic(vcpu)->vec_bitmap, 256);
6417 kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
6418}
6419
6420static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
6421{
6422 ++vcpu->stat.tlb_flush;
6423 kvm_x86_ops->tlb_flush(vcpu);
6424}
6425
6426void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
6427{
6428 struct page *page = NULL;
6429
6430 if (!lapic_in_kernel(vcpu))
6431 return;
6432
6433 if (!kvm_x86_ops->set_apic_access_page_addr)
6434 return;
6435
6436 page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
6437 if (is_error_page(page))
6438 return;
6439 kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page));
6440
6441
6442
6443
6444
6445 put_page(page);
6446}
6447EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
6448
6449void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
6450 unsigned long address)
6451{
6452
6453
6454
6455
6456 if (address == gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT))
6457 kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
6458}
6459
6460
6461
6462
6463
6464
6465static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
6466{
6467 int r;
6468 bool req_int_win =
6469 dm_request_for_irq_injection(vcpu) &&
6470 kvm_cpu_accept_dm_intr(vcpu);
6471
6472 bool req_immediate_exit = false;
6473
6474 if (vcpu->requests) {
6475 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
6476 kvm_mmu_unload(vcpu);
6477 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
6478 __kvm_migrate_timers(vcpu);
6479 if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
6480 kvm_gen_update_masterclock(vcpu->kvm);
6481 if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu))
6482 kvm_gen_kvmclock_update(vcpu);
6483 if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
6484 r = kvm_guest_time_update(vcpu);
6485 if (unlikely(r))
6486 goto out;
6487 }
6488 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
6489 kvm_mmu_sync_roots(vcpu);
6490 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
6491 kvm_vcpu_flush_tlb(vcpu);
6492 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
6493 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
6494 r = 0;
6495 goto out;
6496 }
6497 if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
6498 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
6499 r = 0;
6500 goto out;
6501 }
6502 if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) {
6503 vcpu->fpu_active = 0;
6504 kvm_x86_ops->fpu_deactivate(vcpu);
6505 }
6506 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
6507
6508 vcpu->arch.apf.halted = true;
6509 r = 1;
6510 goto out;
6511 }
6512 if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
6513 record_steal_time(vcpu);
6514 if (kvm_check_request(KVM_REQ_SMI, vcpu))
6515 process_smi(vcpu);
6516 if (kvm_check_request(KVM_REQ_NMI, vcpu))
6517 process_nmi(vcpu);
6518 if (kvm_check_request(KVM_REQ_PMU, vcpu))
6519 kvm_pmu_handle_event(vcpu);
6520 if (kvm_check_request(KVM_REQ_PMI, vcpu))
6521 kvm_pmu_deliver_pmi(vcpu);
6522 if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
6523 BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
6524 if (test_bit(vcpu->arch.pending_ioapic_eoi,
6525 vcpu->arch.ioapic_handled_vectors)) {
6526 vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
6527 vcpu->run->eoi.vector =
6528 vcpu->arch.pending_ioapic_eoi;
6529 r = 0;
6530 goto out;
6531 }
6532 }
6533 if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
6534 vcpu_scan_ioapic(vcpu);
6535 if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
6536 kvm_vcpu_reload_apic_access_page(vcpu);
6537 if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
6538 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
6539 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
6540 r = 0;
6541 goto out;
6542 }
6543 if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) {
6544 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
6545 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET;
6546 r = 0;
6547 goto out;
6548 }
6549 if (kvm_check_request(KVM_REQ_HV_EXIT, vcpu)) {
6550 vcpu->run->exit_reason = KVM_EXIT_HYPERV;
6551 vcpu->run->hyperv = vcpu->arch.hyperv.exit;
6552 r = 0;
6553 goto out;
6554 }
6555
6556
6557
6558
6559
6560
6561 if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
6562 kvm_hv_process_stimers(vcpu);
6563 }
6564
6565
6566
6567
6568
6569 if (kvm_lapic_enabled(vcpu)) {
6570
6571
6572
6573
6574 if (vcpu->arch.apicv_active)
6575 kvm_x86_ops->hwapic_irr_update(vcpu,
6576 kvm_lapic_find_highest_irr(vcpu));
6577 }
6578
6579 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
6580 kvm_apic_accept_events(vcpu);
6581 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
6582 r = 1;
6583 goto out;
6584 }
6585
6586 if (inject_pending_event(vcpu, req_int_win) != 0)
6587 req_immediate_exit = true;
6588 else {
6589
6590
6591
6592
6593
6594
6595
6596
6597
6598 if (vcpu->arch.smi_pending && !is_smm(vcpu))
6599 req_immediate_exit = true;
6600 if (vcpu->arch.nmi_pending)
6601 kvm_x86_ops->enable_nmi_window(vcpu);
6602 if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
6603 kvm_x86_ops->enable_irq_window(vcpu);
6604 }
6605
6606 if (kvm_lapic_enabled(vcpu)) {
6607 update_cr8_intercept(vcpu);
6608 kvm_lapic_sync_to_vapic(vcpu);
6609 }
6610 }
6611
6612 r = kvm_mmu_reload(vcpu);
6613 if (unlikely(r)) {
6614 goto cancel_injection;
6615 }
6616
6617 preempt_disable();
6618
6619 kvm_x86_ops->prepare_guest_switch(vcpu);
6620 if (vcpu->fpu_active)
6621 kvm_load_guest_fpu(vcpu);
6622 vcpu->mode = IN_GUEST_MODE;
6623
6624 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
6625
6626
6627
6628
6629
6630
6631
6632
6633 smp_mb__after_srcu_read_unlock();
6634
6635 local_irq_disable();
6636
6637 if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
6638 || need_resched() || signal_pending(current)) {
6639 vcpu->mode = OUTSIDE_GUEST_MODE;
6640 smp_wmb();
6641 local_irq_enable();
6642 preempt_enable();
6643 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
6644 r = 1;
6645 goto cancel_injection;
6646 }
6647
6648 kvm_load_guest_xcr0(vcpu);
6649
6650 if (req_immediate_exit) {
6651 kvm_make_request(KVM_REQ_EVENT, vcpu);
6652 smp_send_reschedule(vcpu->cpu);
6653 }
6654
6655 trace_kvm_entry(vcpu->vcpu_id);
6656 wait_lapic_expire(vcpu);
6657 guest_enter_irqoff();
6658
6659 if (unlikely(vcpu->arch.switch_db_regs)) {
6660 set_debugreg(0, 7);
6661 set_debugreg(vcpu->arch.eff_db[0], 0);
6662 set_debugreg(vcpu->arch.eff_db[1], 1);
6663 set_debugreg(vcpu->arch.eff_db[2], 2);
6664 set_debugreg(vcpu->arch.eff_db[3], 3);
6665 set_debugreg(vcpu->arch.dr6, 6);
6666 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
6667 }
6668
6669 kvm_x86_ops->run(vcpu);
6670
6671
6672
6673
6674
6675
6676
6677 if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) {
6678 WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
6679 kvm_x86_ops->sync_dirty_debug_regs(vcpu);
6680 kvm_update_dr0123(vcpu);
6681 kvm_update_dr6(vcpu);
6682 kvm_update_dr7(vcpu);
6683 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
6684 }
6685
6686
6687
6688
6689
6690
6691
6692
6693 if (hw_breakpoint_active())
6694 hw_breakpoint_restore();
6695
6696 vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
6697
6698 vcpu->mode = OUTSIDE_GUEST_MODE;
6699 smp_wmb();
6700
6701 kvm_put_guest_xcr0(vcpu);
6702
6703
6704 kvm_x86_ops->handle_external_intr(vcpu);
6705
6706 ++vcpu->stat.exits;
6707
6708 guest_exit_irqoff();
6709
6710 local_irq_enable();
6711 preempt_enable();
6712
6713 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
6714
6715
6716
6717
6718 if (unlikely(prof_on == KVM_PROFILING)) {
6719 unsigned long rip = kvm_rip_read(vcpu);
6720 profile_hit(KVM_PROFILING, (void *)rip);
6721 }
6722
6723 if (unlikely(vcpu->arch.tsc_always_catchup))
6724 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
6725
6726 if (vcpu->arch.apic_attention)
6727 kvm_lapic_sync_from_vapic(vcpu);
6728
6729 r = kvm_x86_ops->handle_exit(vcpu);
6730 return r;
6731
6732cancel_injection:
6733 kvm_x86_ops->cancel_injection(vcpu);
6734 if (unlikely(vcpu->arch.apic_attention))
6735 kvm_lapic_sync_from_vapic(vcpu);
6736out:
6737 return r;
6738}
6739
6740static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
6741{
6742 if (!kvm_arch_vcpu_runnable(vcpu) &&
6743 (!kvm_x86_ops->pre_block || kvm_x86_ops->pre_block(vcpu) == 0)) {
6744 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
6745 kvm_vcpu_block(vcpu);
6746 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
6747
6748 if (kvm_x86_ops->post_block)
6749 kvm_x86_ops->post_block(vcpu);
6750
6751 if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
6752 return 1;
6753 }
6754
6755 kvm_apic_accept_events(vcpu);
6756 switch(vcpu->arch.mp_state) {
6757 case KVM_MP_STATE_HALTED:
6758 vcpu->arch.pv.pv_unhalted = false;
6759 vcpu->arch.mp_state =
6760 KVM_MP_STATE_RUNNABLE;
6761 case KVM_MP_STATE_RUNNABLE:
6762 vcpu->arch.apf.halted = false;
6763 break;
6764 case KVM_MP_STATE_INIT_RECEIVED:
6765 break;
6766 default:
6767 return -EINTR;
6768 break;
6769 }
6770 return 1;
6771}
6772
6773static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
6774{
6775 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
6776 !vcpu->arch.apf.halted);
6777}
6778
6779static int vcpu_run(struct kvm_vcpu *vcpu)
6780{
6781 int r;
6782 struct kvm *kvm = vcpu->kvm;
6783
6784 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
6785
6786 for (;;) {
6787 if (kvm_vcpu_running(vcpu)) {
6788 r = vcpu_enter_guest(vcpu);
6789 } else {
6790 r = vcpu_block(kvm, vcpu);
6791 }
6792
6793 if (r <= 0)
6794 break;
6795
6796 clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
6797 if (kvm_cpu_has_pending_timer(vcpu))
6798 kvm_inject_pending_timer_irqs(vcpu);
6799
6800 if (dm_request_for_irq_injection(vcpu) &&
6801 kvm_vcpu_ready_for_interrupt_injection(vcpu)) {
6802 r = 0;
6803 vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
6804 ++vcpu->stat.request_irq_exits;
6805 break;
6806 }
6807
6808 kvm_check_async_pf_completion(vcpu);
6809
6810 if (signal_pending(current)) {
6811 r = -EINTR;
6812 vcpu->run->exit_reason = KVM_EXIT_INTR;
6813 ++vcpu->stat.signal_exits;
6814 break;
6815 }
6816 if (need_resched()) {
6817 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
6818 cond_resched();
6819 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
6820 }
6821 }
6822
6823 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
6824
6825 return r;
6826}
6827
6828static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
6829{
6830 int r;
6831 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
6832 r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
6833 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
6834 if (r != EMULATE_DONE)
6835 return 0;
6836 return 1;
6837}
6838
6839static int complete_emulated_pio(struct kvm_vcpu *vcpu)
6840{
6841 BUG_ON(!vcpu->arch.pio.count);
6842
6843 return complete_emulated_io(vcpu);
6844}
6845
6846
6847
6848
6849
6850
6851
6852
6853
6854
6855
6856
6857
6858
6859
6860
6861
6862
6863
6864static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
6865{
6866 struct kvm_run *run = vcpu->run;
6867 struct kvm_mmio_fragment *frag;
6868 unsigned len;
6869
6870 BUG_ON(!vcpu->mmio_needed);
6871
6872
6873 frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
6874 len = min(8u, frag->len);
6875 if (!vcpu->mmio_is_write)
6876 memcpy(frag->data, run->mmio.data, len);
6877
6878 if (frag->len <= 8) {
6879
6880 frag++;
6881 vcpu->mmio_cur_fragment++;
6882 } else {
6883
6884 frag->data += len;
6885 frag->gpa += len;
6886 frag->len -= len;
6887 }
6888
6889 if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) {
6890 vcpu->mmio_needed = 0;
6891
6892
6893 if (vcpu->mmio_is_write)
6894 return 1;
6895 vcpu->mmio_read_completed = 1;
6896 return complete_emulated_io(vcpu);
6897 }
6898
6899 run->exit_reason = KVM_EXIT_MMIO;
6900 run->mmio.phys_addr = frag->gpa;
6901 if (vcpu->mmio_is_write)
6902 memcpy(run->mmio.data, frag->data, min(8u, frag->len));
6903 run->mmio.len = min(8u, frag->len);
6904 run->mmio.is_write = vcpu->mmio_is_write;
6905 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
6906 return 0;
6907}
6908
6909
6910int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
6911{
6912 struct fpu *fpu = ¤t->thread.fpu;
6913 int r;
6914 sigset_t sigsaved;
6915
6916 fpu__activate_curr(fpu);
6917
6918 if (vcpu->sigset_active)
6919 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
6920
6921 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
6922 kvm_vcpu_block(vcpu);
6923 kvm_apic_accept_events(vcpu);
6924 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
6925 r = -EAGAIN;
6926 goto out;
6927 }
6928
6929
6930 if (!lapic_in_kernel(vcpu)) {
6931 if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
6932 r = -EINVAL;
6933 goto out;
6934 }
6935 }
6936
6937 if (unlikely(vcpu->arch.complete_userspace_io)) {
6938 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
6939 vcpu->arch.complete_userspace_io = NULL;
6940 r = cui(vcpu);
6941 if (r <= 0)
6942 goto out;
6943 } else
6944 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
6945
6946 r = vcpu_run(vcpu);
6947
6948out:
6949 post_kvm_run_save(vcpu);
6950 if (vcpu->sigset_active)
6951 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
6952
6953 return r;
6954}
6955
6956int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
6957{
6958 if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
6959
6960
6961
6962
6963
6964
6965
6966 emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt);
6967 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
6968 }
6969 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
6970 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
6971 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
6972 regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
6973 regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
6974 regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
6975 regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
6976 regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP);
6977#ifdef CONFIG_X86_64
6978 regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8);
6979 regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9);
6980 regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10);
6981 regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11);
6982 regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12);
6983 regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13);
6984 regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14);
6985 regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15);
6986#endif
6987
6988 regs->rip = kvm_rip_read(vcpu);
6989 regs->rflags = kvm_get_rflags(vcpu);
6990
6991 return 0;
6992}
6993
6994int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
6995{
6996 vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
6997 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
6998
6999 kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
7000 kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
7001 kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
7002 kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx);
7003 kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi);
7004 kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi);
7005 kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp);
7006 kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp);
7007#ifdef CONFIG_X86_64
7008 kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8);
7009 kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9);
7010 kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10);
7011 kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11);
7012 kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12);
7013 kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
7014 kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
7015 kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
7016#endif
7017
7018 kvm_rip_write(vcpu, regs->rip);
7019 kvm_set_rflags(vcpu, regs->rflags);
7020
7021 vcpu->arch.exception.pending = false;
7022
7023 kvm_make_request(KVM_REQ_EVENT, vcpu);
7024
7025 return 0;
7026}
7027
7028void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
7029{
7030 struct kvm_segment cs;
7031
7032 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
7033 *db = cs.db;
7034 *l = cs.l;
7035}
7036EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
7037
7038int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
7039 struct kvm_sregs *sregs)
7040{
7041 struct desc_ptr dt;
7042
7043 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
7044 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
7045 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
7046 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
7047 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
7048 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
7049
7050 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
7051 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
7052
7053 kvm_x86_ops->get_idt(vcpu, &dt);
7054 sregs->idt.limit = dt.size;
7055 sregs->idt.base = dt.address;
7056 kvm_x86_ops->get_gdt(vcpu, &dt);
7057 sregs->gdt.limit = dt.size;
7058 sregs->gdt.base = dt.address;
7059
7060 sregs->cr0 = kvm_read_cr0(vcpu);
7061 sregs->cr2 = vcpu->arch.cr2;
7062 sregs->cr3 = kvm_read_cr3(vcpu);
7063 sregs->cr4 = kvm_read_cr4(vcpu);
7064 sregs->cr8 = kvm_get_cr8(vcpu);
7065 sregs->efer = vcpu->arch.efer;
7066 sregs->apic_base = kvm_get_apic_base(vcpu);
7067
7068 memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
7069
7070 if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft)
7071 set_bit(vcpu->arch.interrupt.nr,
7072 (unsigned long *)sregs->interrupt_bitmap);
7073
7074 return 0;
7075}
7076
7077int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
7078 struct kvm_mp_state *mp_state)
7079{
7080 kvm_apic_accept_events(vcpu);
7081 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
7082 vcpu->arch.pv.pv_unhalted)
7083 mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
7084 else
7085 mp_state->mp_state = vcpu->arch.mp_state;
7086
7087 return 0;
7088}
7089
7090int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
7091 struct kvm_mp_state *mp_state)
7092{
7093 if (!lapic_in_kernel(vcpu) &&
7094 mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
7095 return -EINVAL;
7096
7097 if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
7098 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
7099 set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
7100 } else
7101 vcpu->arch.mp_state = mp_state->mp_state;
7102 kvm_make_request(KVM_REQ_EVENT, vcpu);
7103 return 0;
7104}
7105
7106int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
7107 int reason, bool has_error_code, u32 error_code)
7108{
7109 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
7110 int ret;
7111
7112 init_emulate_ctxt(vcpu);
7113
7114 ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
7115 has_error_code, error_code);
7116
7117 if (ret)
7118 return EMULATE_FAIL;
7119
7120 kvm_rip_write(vcpu, ctxt->eip);
7121 kvm_set_rflags(vcpu, ctxt->eflags);
7122 kvm_make_request(KVM_REQ_EVENT, vcpu);
7123 return EMULATE_DONE;
7124}
7125EXPORT_SYMBOL_GPL(kvm_task_switch);
7126
7127int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
7128 struct kvm_sregs *sregs)
7129{
7130 struct msr_data apic_base_msr;
7131 int mmu_reset_needed = 0;
7132 int pending_vec, max_bits, idx;
7133 struct desc_ptr dt;
7134
7135 if (!guest_cpuid_has_xsave(vcpu) && (sregs->cr4 & X86_CR4_OSXSAVE))
7136 return -EINVAL;
7137
7138 dt.size = sregs->idt.limit;
7139 dt.address = sregs->idt.base;
7140 kvm_x86_ops->set_idt(vcpu, &dt);
7141 dt.size = sregs->gdt.limit;
7142 dt.address = sregs->gdt.base;
7143 kvm_x86_ops->set_gdt(vcpu, &dt);
7144
7145 vcpu->arch.cr2 = sregs->cr2;
7146 mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
7147 vcpu->arch.cr3 = sregs->cr3;
7148 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
7149
7150 kvm_set_cr8(vcpu, sregs->cr8);
7151
7152 mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
7153 kvm_x86_ops->set_efer(vcpu, sregs->efer);
7154 apic_base_msr.data = sregs->apic_base;
7155 apic_base_msr.host_initiated = true;
7156 kvm_set_apic_base(vcpu, &apic_base_msr);
7157
7158 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
7159 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
7160 vcpu->arch.cr0 = sregs->cr0;
7161
7162 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
7163 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
7164 if (sregs->cr4 & (X86_CR4_OSXSAVE | X86_CR4_PKE))
7165 kvm_update_cpuid(vcpu);
7166
7167 idx = srcu_read_lock(&vcpu->kvm->srcu);
7168 if (!is_long_mode(vcpu) && is_pae(vcpu)) {
7169 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
7170 mmu_reset_needed = 1;
7171 }
7172 srcu_read_unlock(&vcpu->kvm->srcu, idx);
7173
7174 if (mmu_reset_needed)
7175 kvm_mmu_reset_context(vcpu);
7176
7177 max_bits = KVM_NR_INTERRUPTS;
7178 pending_vec = find_first_bit(
7179 (const unsigned long *)sregs->interrupt_bitmap, max_bits);
7180 if (pending_vec < max_bits) {
7181 kvm_queue_interrupt(vcpu, pending_vec, false);
7182 pr_debug("Set back pending irq %d\n", pending_vec);
7183 }
7184
7185 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
7186 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
7187 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
7188 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
7189 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
7190 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
7191
7192 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
7193 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
7194
7195 update_cr8_intercept(vcpu);
7196
7197
7198 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
7199 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
7200 !is_protmode(vcpu))
7201 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
7202
7203 kvm_make_request(KVM_REQ_EVENT, vcpu);
7204
7205 return 0;
7206}
7207
7208int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
7209 struct kvm_guest_debug *dbg)
7210{
7211 unsigned long rflags;
7212 int i, r;
7213
7214 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
7215 r = -EBUSY;
7216 if (vcpu->arch.exception.pending)
7217 goto out;
7218 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
7219 kvm_queue_exception(vcpu, DB_VECTOR);
7220 else
7221 kvm_queue_exception(vcpu, BP_VECTOR);
7222 }
7223
7224
7225
7226
7227
7228 rflags = kvm_get_rflags(vcpu);
7229
7230 vcpu->guest_debug = dbg->control;
7231 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
7232 vcpu->guest_debug = 0;
7233
7234 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
7235 for (i = 0; i < KVM_NR_DB_REGS; ++i)
7236 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
7237 vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7];
7238 } else {
7239 for (i = 0; i < KVM_NR_DB_REGS; i++)
7240 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
7241 }
7242 kvm_update_dr7(vcpu);
7243
7244 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
7245 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
7246 get_segment_base(vcpu, VCPU_SREG_CS);
7247
7248
7249
7250
7251
7252 kvm_set_rflags(vcpu, rflags);
7253
7254 kvm_x86_ops->update_bp_intercept(vcpu);
7255
7256 r = 0;
7257
7258out:
7259
7260 return r;
7261}
7262
7263
7264
7265
7266int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
7267 struct kvm_translation *tr)
7268{
7269 unsigned long vaddr = tr->linear_address;
7270 gpa_t gpa;
7271 int idx;
7272
7273 idx = srcu_read_lock(&vcpu->kvm->srcu);
7274 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
7275 srcu_read_unlock(&vcpu->kvm->srcu, idx);
7276 tr->physical_address = gpa;
7277 tr->valid = gpa != UNMAPPED_GVA;
7278 tr->writeable = 1;
7279 tr->usermode = 0;
7280
7281 return 0;
7282}
7283
7284int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
7285{
7286 struct fxregs_state *fxsave =
7287 &vcpu->arch.guest_fpu.state.fxsave;
7288
7289 memcpy(fpu->fpr, fxsave->st_space, 128);
7290 fpu->fcw = fxsave->cwd;
7291 fpu->fsw = fxsave->swd;
7292 fpu->ftwx = fxsave->twd;
7293 fpu->last_opcode = fxsave->fop;
7294 fpu->last_ip = fxsave->rip;
7295 fpu->last_dp = fxsave->rdp;
7296 memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
7297
7298 return 0;
7299}
7300
7301int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
7302{
7303 struct fxregs_state *fxsave =
7304 &vcpu->arch.guest_fpu.state.fxsave;
7305
7306 memcpy(fxsave->st_space, fpu->fpr, 128);
7307 fxsave->cwd = fpu->fcw;
7308 fxsave->swd = fpu->fsw;
7309 fxsave->twd = fpu->ftwx;
7310 fxsave->fop = fpu->last_opcode;
7311 fxsave->rip = fpu->last_ip;
7312 fxsave->rdp = fpu->last_dp;
7313 memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
7314
7315 return 0;
7316}
7317
7318static void fx_init(struct kvm_vcpu *vcpu)
7319{
7320 fpstate_init(&vcpu->arch.guest_fpu.state);
7321 if (boot_cpu_has(X86_FEATURE_XSAVES))
7322 vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv =
7323 host_xcr0 | XSTATE_COMPACTION_ENABLED;
7324
7325
7326
7327
7328 vcpu->arch.xcr0 = XFEATURE_MASK_FP;
7329
7330 vcpu->arch.cr0 |= X86_CR0_ET;
7331}
7332
7333void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
7334{
7335 if (vcpu->guest_fpu_loaded)
7336 return;
7337
7338
7339
7340
7341
7342
7343 vcpu->guest_fpu_loaded = 1;
7344 __kernel_fpu_begin();
7345 __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state);
7346 trace_kvm_fpu(1);
7347}
7348
7349void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
7350{
7351 if (!vcpu->guest_fpu_loaded) {
7352 vcpu->fpu_counter = 0;
7353 return;
7354 }
7355
7356 vcpu->guest_fpu_loaded = 0;
7357 copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
7358 __kernel_fpu_end();
7359 ++vcpu->stat.fpu_reload;
7360
7361
7362
7363
7364
7365
7366 if (!use_eager_fpu()) {
7367 if (++vcpu->fpu_counter < 5)
7368 kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
7369 }
7370 trace_kvm_fpu(0);
7371}
7372
7373void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
7374{
7375 kvmclock_reset(vcpu);
7376
7377 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
7378 kvm_x86_ops->vcpu_free(vcpu);
7379}
7380
7381struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
7382 unsigned int id)
7383{
7384 struct kvm_vcpu *vcpu;
7385
7386 if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
7387 printk_once(KERN_WARNING
7388 "kvm: SMP vm created on host with unstable TSC; "
7389 "guest TSC will not be reliable\n");
7390
7391 vcpu = kvm_x86_ops->vcpu_create(kvm, id);
7392
7393 return vcpu;
7394}
7395
7396int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
7397{
7398 int r;
7399
7400 kvm_vcpu_mtrr_init(vcpu);
7401 r = vcpu_load(vcpu);
7402 if (r)
7403 return r;
7404 kvm_vcpu_reset(vcpu, false);
7405 kvm_mmu_setup(vcpu);
7406 vcpu_put(vcpu);
7407 return r;
7408}
7409
7410void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
7411{
7412 struct msr_data msr;
7413 struct kvm *kvm = vcpu->kvm;
7414
7415 if (vcpu_load(vcpu))
7416 return;
7417 msr.data = 0x0;
7418 msr.index = MSR_IA32_TSC;
7419 msr.host_initiated = true;
7420 kvm_write_tsc(vcpu, &msr);
7421 vcpu_put(vcpu);
7422
7423 if (!kvmclock_periodic_sync)
7424 return;
7425
7426 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
7427 KVMCLOCK_SYNC_PERIOD);
7428}
7429
7430void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
7431{
7432 int r;
7433 vcpu->arch.apf.msr_val = 0;
7434
7435 r = vcpu_load(vcpu);
7436 BUG_ON(r);
7437 kvm_mmu_unload(vcpu);
7438 vcpu_put(vcpu);
7439
7440 kvm_x86_ops->vcpu_free(vcpu);
7441}
7442
7443void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
7444{
7445 vcpu->arch.hflags = 0;
7446
7447 vcpu->arch.smi_pending = 0;
7448 atomic_set(&vcpu->arch.nmi_queued, 0);
7449 vcpu->arch.nmi_pending = 0;
7450 vcpu->arch.nmi_injected = false;
7451 kvm_clear_interrupt_queue(vcpu);
7452 kvm_clear_exception_queue(vcpu);
7453
7454 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
7455 kvm_update_dr0123(vcpu);
7456 vcpu->arch.dr6 = DR6_INIT;
7457 kvm_update_dr6(vcpu);
7458 vcpu->arch.dr7 = DR7_FIXED_1;
7459 kvm_update_dr7(vcpu);
7460
7461 vcpu->arch.cr2 = 0;
7462
7463 kvm_make_request(KVM_REQ_EVENT, vcpu);
7464 vcpu->arch.apf.msr_val = 0;
7465 vcpu->arch.st.msr_val = 0;
7466
7467 kvmclock_reset(vcpu);
7468
7469 kvm_clear_async_pf_completion_queue(vcpu);
7470 kvm_async_pf_hash_reset(vcpu);
7471 vcpu->arch.apf.halted = false;
7472
7473 if (!init_event) {
7474 kvm_pmu_reset(vcpu);
7475 vcpu->arch.smbase = 0x30000;
7476 }
7477
7478 memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
7479 vcpu->arch.regs_avail = ~0;
7480 vcpu->arch.regs_dirty = ~0;
7481
7482 kvm_x86_ops->vcpu_reset(vcpu, init_event);
7483}
7484
7485void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
7486{
7487 struct kvm_segment cs;
7488
7489 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
7490 cs.selector = vector << 8;
7491 cs.base = vector << 12;
7492 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
7493 kvm_rip_write(vcpu, 0);
7494}
7495
7496int kvm_arch_hardware_enable(void)
7497{
7498 struct kvm *kvm;
7499 struct kvm_vcpu *vcpu;
7500 int i;
7501 int ret;
7502 u64 local_tsc;
7503 u64 max_tsc = 0;
7504 bool stable, backwards_tsc = false;
7505
7506 kvm_shared_msr_cpu_online();
7507 ret = kvm_x86_ops->hardware_enable();
7508 if (ret != 0)
7509 return ret;
7510
7511 local_tsc = rdtsc();
7512 stable = !check_tsc_unstable();
7513 list_for_each_entry(kvm, &vm_list, vm_list) {
7514 kvm_for_each_vcpu(i, vcpu, kvm) {
7515 if (!stable && vcpu->cpu == smp_processor_id())
7516 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
7517 if (stable && vcpu->arch.last_host_tsc > local_tsc) {
7518 backwards_tsc = true;
7519 if (vcpu->arch.last_host_tsc > max_tsc)
7520 max_tsc = vcpu->arch.last_host_tsc;
7521 }
7522 }
7523 }
7524
7525
7526
7527
7528
7529
7530
7531
7532
7533
7534
7535
7536
7537
7538
7539
7540
7541
7542
7543
7544
7545
7546
7547
7548
7549
7550
7551
7552
7553
7554
7555
7556
7557
7558
7559
7560
7561
7562
7563 if (backwards_tsc) {
7564 u64 delta_cyc = max_tsc - local_tsc;
7565 backwards_tsc_observed = true;
7566 list_for_each_entry(kvm, &vm_list, vm_list) {
7567 kvm_for_each_vcpu(i, vcpu, kvm) {
7568 vcpu->arch.tsc_offset_adjustment += delta_cyc;
7569 vcpu->arch.last_host_tsc = local_tsc;
7570 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
7571 }
7572
7573
7574
7575
7576
7577
7578
7579 kvm->arch.last_tsc_nsec = 0;
7580 kvm->arch.last_tsc_write = 0;
7581 }
7582
7583 }
7584 return 0;
7585}
7586
7587void kvm_arch_hardware_disable(void)
7588{
7589 kvm_x86_ops->hardware_disable();
7590 drop_user_return_notifiers();
7591}
7592
7593int kvm_arch_hardware_setup(void)
7594{
7595 int r;
7596
7597 r = kvm_x86_ops->hardware_setup();
7598 if (r != 0)
7599 return r;
7600
7601 if (kvm_has_tsc_control) {
7602
7603
7604
7605
7606
7607
7608 u64 max = min(0x7fffffffULL,
7609 __scale_tsc(kvm_max_tsc_scaling_ratio, tsc_khz));
7610 kvm_max_guest_tsc_khz = max;
7611
7612 kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits;
7613 }
7614
7615 kvm_init_msr_list();
7616 return 0;
7617}
7618
7619void kvm_arch_hardware_unsetup(void)
7620{
7621 kvm_x86_ops->hardware_unsetup();
7622}
7623
7624void kvm_arch_check_processor_compat(void *rtn)
7625{
7626 kvm_x86_ops->check_processor_compatibility(rtn);
7627}
7628
7629bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
7630{
7631 return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
7632}
7633EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp);
7634
7635bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
7636{
7637 return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
7638}
7639
7640struct static_key kvm_no_apic_vcpu __read_mostly;
7641EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu);
7642
7643int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
7644{
7645 struct page *page;
7646 struct kvm *kvm;
7647 int r;
7648
7649 BUG_ON(vcpu->kvm == NULL);
7650 kvm = vcpu->kvm;
7651
7652 vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv();
7653 vcpu->arch.pv.pv_unhalted = false;
7654 vcpu->arch.emulate_ctxt.ops = &emulate_ops;
7655 if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu))
7656 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
7657 else
7658 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
7659
7660 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
7661 if (!page) {
7662 r = -ENOMEM;
7663 goto fail;
7664 }
7665 vcpu->arch.pio_data = page_address(page);
7666
7667 kvm_set_tsc_khz(vcpu, max_tsc_khz);
7668
7669 r = kvm_mmu_create(vcpu);
7670 if (r < 0)
7671 goto fail_free_pio_data;
7672
7673 if (irqchip_in_kernel(kvm)) {
7674 r = kvm_create_lapic(vcpu);
7675 if (r < 0)
7676 goto fail_mmu_destroy;
7677 } else
7678 static_key_slow_inc(&kvm_no_apic_vcpu);
7679
7680 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
7681 GFP_KERNEL);
7682 if (!vcpu->arch.mce_banks) {
7683 r = -ENOMEM;
7684 goto fail_free_lapic;
7685 }
7686 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
7687
7688 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) {
7689 r = -ENOMEM;
7690 goto fail_free_mce_banks;
7691 }
7692
7693 fx_init(vcpu);
7694
7695 vcpu->arch.ia32_tsc_adjust_msr = 0x0;
7696 vcpu->arch.pv_time_enabled = false;
7697
7698 vcpu->arch.guest_supported_xcr0 = 0;
7699 vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
7700
7701 vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
7702
7703 vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
7704
7705 kvm_async_pf_hash_reset(vcpu);
7706 kvm_pmu_init(vcpu);
7707
7708 vcpu->arch.pending_external_vector = -1;
7709
7710 kvm_hv_vcpu_init(vcpu);
7711
7712 return 0;
7713
7714fail_free_mce_banks:
7715 kfree(vcpu->arch.mce_banks);
7716fail_free_lapic:
7717 kvm_free_lapic(vcpu);
7718fail_mmu_destroy:
7719 kvm_mmu_destroy(vcpu);
7720fail_free_pio_data:
7721 free_page((unsigned long)vcpu->arch.pio_data);
7722fail:
7723 return r;
7724}
7725
7726void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
7727{
7728 int idx;
7729
7730 kvm_hv_vcpu_uninit(vcpu);
7731 kvm_pmu_destroy(vcpu);
7732 kfree(vcpu->arch.mce_banks);
7733 kvm_free_lapic(vcpu);
7734 idx = srcu_read_lock(&vcpu->kvm->srcu);
7735 kvm_mmu_destroy(vcpu);
7736 srcu_read_unlock(&vcpu->kvm->srcu, idx);
7737 free_page((unsigned long)vcpu->arch.pio_data);
7738 if (!lapic_in_kernel(vcpu))
7739 static_key_slow_dec(&kvm_no_apic_vcpu);
7740}
7741
7742void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
7743{
7744 kvm_x86_ops->sched_in(vcpu, cpu);
7745}
7746
7747int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
7748{
7749 if (type)
7750 return -EINVAL;
7751
7752 INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
7753 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
7754 INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
7755 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
7756 atomic_set(&kvm->arch.noncoherent_dma_count, 0);
7757
7758
7759 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
7760
7761 set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
7762 &kvm->arch.irq_sources_bitmap);
7763
7764 raw_spin_lock_init(&kvm->arch.tsc_write_lock);
7765 mutex_init(&kvm->arch.apic_map_lock);
7766 spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
7767
7768 pvclock_update_vm_gtod_copy(kvm);
7769
7770 INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
7771 INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
7772
7773 kvm_page_track_init(kvm);
7774 kvm_mmu_init_vm(kvm);
7775
7776 if (kvm_x86_ops->vm_init)
7777 return kvm_x86_ops->vm_init(kvm);
7778
7779 return 0;
7780}
7781
7782static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
7783{
7784 int r;
7785 r = vcpu_load(vcpu);
7786 BUG_ON(r);
7787 kvm_mmu_unload(vcpu);
7788 vcpu_put(vcpu);
7789}
7790
7791static void kvm_free_vcpus(struct kvm *kvm)
7792{
7793 unsigned int i;
7794 struct kvm_vcpu *vcpu;
7795
7796
7797
7798
7799 kvm_for_each_vcpu(i, vcpu, kvm) {
7800 kvm_clear_async_pf_completion_queue(vcpu);
7801 kvm_unload_vcpu_mmu(vcpu);
7802 }
7803 kvm_for_each_vcpu(i, vcpu, kvm)
7804 kvm_arch_vcpu_free(vcpu);
7805
7806 mutex_lock(&kvm->lock);
7807 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
7808 kvm->vcpus[i] = NULL;
7809
7810 atomic_set(&kvm->online_vcpus, 0);
7811 mutex_unlock(&kvm->lock);
7812}
7813
7814void kvm_arch_sync_events(struct kvm *kvm)
7815{
7816 cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
7817 cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
7818 kvm_free_all_assigned_devices(kvm);
7819 kvm_free_pit(kvm);
7820}
7821
7822int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
7823{
7824 int i, r;
7825 unsigned long hva;
7826 struct kvm_memslots *slots = kvm_memslots(kvm);
7827 struct kvm_memory_slot *slot, old;
7828
7829
7830 if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
7831 return -EINVAL;
7832
7833 slot = id_to_memslot(slots, id);
7834 if (size) {
7835 if (slot->npages)
7836 return -EEXIST;
7837
7838
7839
7840
7841
7842 hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE,
7843 MAP_SHARED | MAP_ANONYMOUS, 0);
7844 if (IS_ERR((void *)hva))
7845 return PTR_ERR((void *)hva);
7846 } else {
7847 if (!slot->npages)
7848 return 0;
7849
7850 hva = 0;
7851 }
7852
7853 old = *slot;
7854 for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
7855 struct kvm_userspace_memory_region m;
7856
7857 m.slot = id | (i << 16);
7858 m.flags = 0;
7859 m.guest_phys_addr = gpa;
7860 m.userspace_addr = hva;
7861 m.memory_size = size;
7862 r = __kvm_set_memory_region(kvm, &m);
7863 if (r < 0)
7864 return r;
7865 }
7866
7867 if (!size) {
7868 r = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
7869 WARN_ON(r < 0);
7870 }
7871
7872 return 0;
7873}
7874EXPORT_SYMBOL_GPL(__x86_set_memory_region);
7875
7876int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
7877{
7878 int r;
7879
7880 mutex_lock(&kvm->slots_lock);
7881 r = __x86_set_memory_region(kvm, id, gpa, size);
7882 mutex_unlock(&kvm->slots_lock);
7883
7884 return r;
7885}
7886EXPORT_SYMBOL_GPL(x86_set_memory_region);
7887
7888void kvm_arch_destroy_vm(struct kvm *kvm)
7889{
7890 if (current->mm == kvm->mm) {
7891
7892
7893
7894
7895
7896 x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 0, 0);
7897 x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, 0, 0);
7898 x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
7899 }
7900 if (kvm_x86_ops->vm_destroy)
7901 kvm_x86_ops->vm_destroy(kvm);
7902 kvm_iommu_unmap_guest(kvm);
7903 kfree(kvm->arch.vpic);
7904 kfree(kvm->arch.vioapic);
7905 kvm_free_vcpus(kvm);
7906 kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
7907 kvm_mmu_uninit_vm(kvm);
7908}
7909
7910void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
7911 struct kvm_memory_slot *dont)
7912{
7913 int i;
7914
7915 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
7916 if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
7917 kvfree(free->arch.rmap[i]);
7918 free->arch.rmap[i] = NULL;
7919 }
7920 if (i == 0)
7921 continue;
7922
7923 if (!dont || free->arch.lpage_info[i - 1] !=
7924 dont->arch.lpage_info[i - 1]) {
7925 kvfree(free->arch.lpage_info[i - 1]);
7926 free->arch.lpage_info[i - 1] = NULL;
7927 }
7928 }
7929
7930 kvm_page_track_free_memslot(free, dont);
7931}
7932
7933int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
7934 unsigned long npages)
7935{
7936 int i;
7937
7938 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
7939 struct kvm_lpage_info *linfo;
7940 unsigned long ugfn;
7941 int lpages;
7942 int level = i + 1;
7943
7944 lpages = gfn_to_index(slot->base_gfn + npages - 1,
7945 slot->base_gfn, level) + 1;
7946
7947 slot->arch.rmap[i] =
7948 kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap[i]));
7949 if (!slot->arch.rmap[i])
7950 goto out_free;
7951 if (i == 0)
7952 continue;
7953
7954 linfo = kvm_kvzalloc(lpages * sizeof(*linfo));
7955 if (!linfo)
7956 goto out_free;
7957
7958 slot->arch.lpage_info[i - 1] = linfo;
7959
7960 if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
7961 linfo[0].disallow_lpage = 1;
7962 if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
7963 linfo[lpages - 1].disallow_lpage = 1;
7964 ugfn = slot->userspace_addr >> PAGE_SHIFT;
7965
7966
7967
7968
7969
7970 if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
7971 !kvm_largepages_enabled()) {
7972 unsigned long j;
7973
7974 for (j = 0; j < lpages; ++j)
7975 linfo[j].disallow_lpage = 1;
7976 }
7977 }
7978
7979 if (kvm_page_track_create_memslot(slot, npages))
7980 goto out_free;
7981
7982 return 0;
7983
7984out_free:
7985 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
7986 kvfree(slot->arch.rmap[i]);
7987 slot->arch.rmap[i] = NULL;
7988 if (i == 0)
7989 continue;
7990
7991 kvfree(slot->arch.lpage_info[i - 1]);
7992 slot->arch.lpage_info[i - 1] = NULL;
7993 }
7994 return -ENOMEM;
7995}
7996
7997void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots)
7998{
7999
8000
8001
8002
8003 kvm_mmu_invalidate_mmio_sptes(kvm, slots);
8004}
8005
8006int kvm_arch_prepare_memory_region(struct kvm *kvm,
8007 struct kvm_memory_slot *memslot,
8008 const struct kvm_userspace_memory_region *mem,
8009 enum kvm_mr_change change)
8010{
8011 return 0;
8012}
8013
8014static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
8015 struct kvm_memory_slot *new)
8016{
8017
8018 if (new->flags & KVM_MEM_READONLY) {
8019 kvm_mmu_slot_remove_write_access(kvm, new);
8020 return;
8021 }
8022
8023
8024
8025
8026
8027
8028
8029
8030
8031
8032
8033
8034
8035
8036
8037
8038
8039
8040
8041
8042
8043
8044
8045
8046
8047
8048
8049
8050
8051
8052
8053 if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
8054 if (kvm_x86_ops->slot_enable_log_dirty)
8055 kvm_x86_ops->slot_enable_log_dirty(kvm, new);
8056 else
8057 kvm_mmu_slot_remove_write_access(kvm, new);
8058 } else {
8059 if (kvm_x86_ops->slot_disable_log_dirty)
8060 kvm_x86_ops->slot_disable_log_dirty(kvm, new);
8061 }
8062}
8063
8064void kvm_arch_commit_memory_region(struct kvm *kvm,
8065 const struct kvm_userspace_memory_region *mem,
8066 const struct kvm_memory_slot *old,
8067 const struct kvm_memory_slot *new,
8068 enum kvm_mr_change change)
8069{
8070 int nr_mmu_pages = 0;
8071
8072 if (!kvm->arch.n_requested_mmu_pages)
8073 nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
8074
8075 if (nr_mmu_pages)
8076 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
8077
8078
8079
8080
8081
8082
8083
8084
8085
8086
8087
8088
8089
8090 if ((change != KVM_MR_DELETE) &&
8091 (old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
8092 !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
8093 kvm_mmu_zap_collapsible_sptes(kvm, new);
8094
8095
8096
8097
8098
8099
8100
8101
8102
8103
8104
8105 if (change != KVM_MR_DELETE)
8106 kvm_mmu_slot_apply_flags(kvm, (struct kvm_memory_slot *) new);
8107}
8108
8109void kvm_arch_flush_shadow_all(struct kvm *kvm)
8110{
8111 kvm_mmu_invalidate_zap_all_pages(kvm);
8112}
8113
8114void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
8115 struct kvm_memory_slot *slot)
8116{
8117 kvm_mmu_invalidate_zap_all_pages(kvm);
8118}
8119
8120static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
8121{
8122 if (!list_empty_careful(&vcpu->async_pf.done))
8123 return true;
8124
8125 if (kvm_apic_has_events(vcpu))
8126 return true;
8127
8128 if (vcpu->arch.pv.pv_unhalted)
8129 return true;
8130
8131 if (atomic_read(&vcpu->arch.nmi_queued))
8132 return true;
8133
8134 if (test_bit(KVM_REQ_SMI, &vcpu->requests))
8135 return true;
8136
8137 if (kvm_arch_interrupt_allowed(vcpu) &&
8138 kvm_cpu_has_interrupt(vcpu))
8139 return true;
8140
8141 if (kvm_hv_has_stimer_pending(vcpu))
8142 return true;
8143
8144 return false;
8145}
8146
8147int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
8148{
8149 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
8150 kvm_x86_ops->check_nested_events(vcpu, false);
8151
8152 return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
8153}
8154
8155int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
8156{
8157 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
8158}
8159
8160int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
8161{
8162 return kvm_x86_ops->interrupt_allowed(vcpu);
8163}
8164
8165unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
8166{
8167 if (is_64_bit_mode(vcpu))
8168 return kvm_rip_read(vcpu);
8169 return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) +
8170 kvm_rip_read(vcpu));
8171}
8172EXPORT_SYMBOL_GPL(kvm_get_linear_rip);
8173
8174bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
8175{
8176 return kvm_get_linear_rip(vcpu) == linear_rip;
8177}
8178EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
8179
8180unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
8181{
8182 unsigned long rflags;
8183
8184 rflags = kvm_x86_ops->get_rflags(vcpu);
8185 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
8186 rflags &= ~X86_EFLAGS_TF;
8187 return rflags;
8188}
8189EXPORT_SYMBOL_GPL(kvm_get_rflags);
8190
8191static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
8192{
8193 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
8194 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
8195 rflags |= X86_EFLAGS_TF;
8196 kvm_x86_ops->set_rflags(vcpu, rflags);
8197}
8198
8199void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
8200{
8201 __kvm_set_rflags(vcpu, rflags);
8202 kvm_make_request(KVM_REQ_EVENT, vcpu);
8203}
8204EXPORT_SYMBOL_GPL(kvm_set_rflags);
8205
8206void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
8207{
8208 int r;
8209
8210 if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
8211 work->wakeup_all)
8212 return;
8213
8214 r = kvm_mmu_reload(vcpu);
8215 if (unlikely(r))
8216 return;
8217
8218 if (!vcpu->arch.mmu.direct_map &&
8219 work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu))
8220 return;
8221
8222 vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true);
8223}
8224
8225static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
8226{
8227 return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
8228}
8229
8230static inline u32 kvm_async_pf_next_probe(u32 key)
8231{
8232 return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
8233}
8234
8235static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
8236{
8237 u32 key = kvm_async_pf_hash_fn(gfn);
8238
8239 while (vcpu->arch.apf.gfns[key] != ~0)
8240 key = kvm_async_pf_next_probe(key);
8241
8242 vcpu->arch.apf.gfns[key] = gfn;
8243}
8244
8245static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
8246{
8247 int i;
8248 u32 key = kvm_async_pf_hash_fn(gfn);
8249
8250 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
8251 (vcpu->arch.apf.gfns[key] != gfn &&
8252 vcpu->arch.apf.gfns[key] != ~0); i++)
8253 key = kvm_async_pf_next_probe(key);
8254
8255 return key;
8256}
8257
8258bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
8259{
8260 return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
8261}
8262
8263static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
8264{
8265 u32 i, j, k;
8266
8267 i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
8268 while (true) {
8269 vcpu->arch.apf.gfns[i] = ~0;
8270 do {
8271 j = kvm_async_pf_next_probe(j);
8272 if (vcpu->arch.apf.gfns[j] == ~0)
8273 return;
8274 k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
8275
8276
8277
8278
8279
8280 } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
8281 vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
8282 i = j;
8283 }
8284}
8285
8286static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
8287{
8288
8289 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
8290 sizeof(val));
8291}
8292
8293void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
8294 struct kvm_async_pf *work)
8295{
8296 struct x86_exception fault;
8297
8298 trace_kvm_async_pf_not_present(work->arch.token, work->gva);
8299 kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
8300
8301 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
8302 (vcpu->arch.apf.send_user_only &&
8303 kvm_x86_ops->get_cpl(vcpu) == 0))
8304 kvm_make_request(KVM_REQ_APF_HALT, vcpu);
8305 else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
8306 fault.vector = PF_VECTOR;
8307 fault.error_code_valid = true;
8308 fault.error_code = 0;
8309 fault.nested_page_fault = false;
8310 fault.address = work->arch.token;
8311 kvm_inject_page_fault(vcpu, &fault);
8312 }
8313}
8314
8315void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
8316 struct kvm_async_pf *work)
8317{
8318 struct x86_exception fault;
8319
8320 trace_kvm_async_pf_ready(work->arch.token, work->gva);
8321 if (work->wakeup_all)
8322 work->arch.token = ~0;
8323 else
8324 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
8325
8326 if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
8327 !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
8328 fault.vector = PF_VECTOR;
8329 fault.error_code_valid = true;
8330 fault.error_code = 0;
8331 fault.nested_page_fault = false;
8332 fault.address = work->arch.token;
8333 kvm_inject_page_fault(vcpu, &fault);
8334 }
8335 vcpu->arch.apf.halted = false;
8336 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
8337}
8338
8339bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
8340{
8341 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
8342 return true;
8343 else
8344 return !kvm_event_needs_reinjection(vcpu) &&
8345 kvm_x86_ops->interrupt_allowed(vcpu);
8346}
8347
8348void kvm_arch_start_assignment(struct kvm *kvm)
8349{
8350 atomic_inc(&kvm->arch.assigned_device_count);
8351}
8352EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);
8353
8354void kvm_arch_end_assignment(struct kvm *kvm)
8355{
8356 atomic_dec(&kvm->arch.assigned_device_count);
8357}
8358EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
8359
8360bool kvm_arch_has_assigned_device(struct kvm *kvm)
8361{
8362 return atomic_read(&kvm->arch.assigned_device_count);
8363}
8364EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
8365
8366void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
8367{
8368 atomic_inc(&kvm->arch.noncoherent_dma_count);
8369}
8370EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
8371
8372void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
8373{
8374 atomic_dec(&kvm->arch.noncoherent_dma_count);
8375}
8376EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
8377
8378bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
8379{
8380 return atomic_read(&kvm->arch.noncoherent_dma_count);
8381}
8382EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
8383
8384bool kvm_arch_has_irq_bypass(void)
8385{
8386 return kvm_x86_ops->update_pi_irte != NULL;
8387}
8388
8389int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
8390 struct irq_bypass_producer *prod)
8391{
8392 struct kvm_kernel_irqfd *irqfd =
8393 container_of(cons, struct kvm_kernel_irqfd, consumer);
8394
8395 irqfd->producer = prod;
8396
8397 return kvm_x86_ops->update_pi_irte(irqfd->kvm,
8398 prod->irq, irqfd->gsi, 1);
8399}
8400
8401void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
8402 struct irq_bypass_producer *prod)
8403{
8404 int ret;
8405 struct kvm_kernel_irqfd *irqfd =
8406 container_of(cons, struct kvm_kernel_irqfd, consumer);
8407
8408 WARN_ON(irqfd->producer != prod);
8409 irqfd->producer = NULL;
8410
8411
8412
8413
8414
8415
8416
8417 ret = kvm_x86_ops->update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 0);
8418 if (ret)
8419 printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
8420 " fails: %d\n", irqfd->consumer.token, ret);
8421}
8422
8423int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
8424 uint32_t guest_irq, bool set)
8425{
8426 if (!kvm_x86_ops->update_pi_irte)
8427 return -EINVAL;
8428
8429 return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
8430}
8431
8432bool kvm_vector_hashing_enabled(void)
8433{
8434 return vector_hashing;
8435}
8436EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled);
8437
8438EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
8439EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
8440EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
8441EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
8442EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
8443EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
8444EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
8445EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
8446EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
8447EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
8448EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
8449EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
8450EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
8451EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
8452EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
8453EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
8454EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
8455EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
8456EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
8457