1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22#include <linux/kvm_host.h>
23#include "irq.h"
24#include "mmu.h"
25#include "i8254.h"
26#include "tss.h"
27#include "kvm_cache_regs.h"
28#include "x86.h"
29#include "cpuid.h"
30#include "pmu.h"
31#include "hyperv.h"
32
33#include <linux/clocksource.h>
34#include <linux/interrupt.h>
35#include <linux/kvm.h>
36#include <linux/fs.h>
37#include <linux/vmalloc.h>
38#include <linux/export.h>
39#include <linux/moduleparam.h>
40#include <linux/mman.h>
41#include <linux/highmem.h>
42#include <linux/iommu.h>
43#include <linux/intel-iommu.h>
44#include <linux/cpufreq.h>
45#include <linux/user-return-notifier.h>
46#include <linux/srcu.h>
47#include <linux/slab.h>
48#include <linux/perf_event.h>
49#include <linux/uaccess.h>
50#include <linux/hash.h>
51#include <linux/pci.h>
52#include <linux/timekeeper_internal.h>
53#include <linux/pvclock_gtod.h>
54#include <linux/kvm_irqfd.h>
55#include <linux/irqbypass.h>
56#include <linux/sched/stat.h>
57#include <linux/mem_encrypt.h>
58
59#include <trace/events/kvm.h>
60
61#include <asm/debugreg.h>
62#include <asm/msr.h>
63#include <asm/desc.h>
64#include <asm/mce.h>
65#include <linux/kernel_stat.h>
66#include <asm/fpu/internal.h>
67#include <asm/pvclock.h>
68#include <asm/div64.h>
69#include <asm/irq_remapping.h>
70
71#define CREATE_TRACE_POINTS
72#include "trace.h"
73
74#define MAX_IO_MSRS 256
75#define KVM_MAX_MCE_BANKS 32
76u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
77EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
78
79#define emul_to_vcpu(ctxt) \
80 container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
81
82
83
84
85
86#ifdef CONFIG_X86_64
87static
88u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
89#else
90static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
91#endif
92
93#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
94#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
95
96#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
97 KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
98
99static void update_cr8_intercept(struct kvm_vcpu *vcpu);
100static void process_nmi(struct kvm_vcpu *vcpu);
101static void enter_smm(struct kvm_vcpu *vcpu);
102static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
103
104struct kvm_x86_ops *kvm_x86_ops __read_mostly;
105EXPORT_SYMBOL_GPL(kvm_x86_ops);
106
107static bool __read_mostly ignore_msrs = 0;
108module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
109
110unsigned int min_timer_period_us = 500;
111module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
112
113static bool __read_mostly kvmclock_periodic_sync = true;
114module_param(kvmclock_periodic_sync, bool, S_IRUGO);
115
116bool __read_mostly kvm_has_tsc_control;
117EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
118u32 __read_mostly kvm_max_guest_tsc_khz;
119EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
120u8 __read_mostly kvm_tsc_scaling_ratio_frac_bits;
121EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits);
122u64 __read_mostly kvm_max_tsc_scaling_ratio;
123EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio);
124u64 __read_mostly kvm_default_tsc_scaling_ratio;
125EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
126
127
128static u32 __read_mostly tsc_tolerance_ppm = 250;
129module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
130
131
132unsigned int __read_mostly lapic_timer_advance_ns = 0;
133module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
134
135static bool __read_mostly vector_hashing = true;
136module_param(vector_hashing, bool, S_IRUGO);
137
138#define KVM_NR_SHARED_MSRS 16
139
140struct kvm_shared_msrs_global {
141 int nr;
142 u32 msrs[KVM_NR_SHARED_MSRS];
143};
144
145struct kvm_shared_msrs {
146 struct user_return_notifier urn;
147 bool registered;
148 struct kvm_shared_msr_values {
149 u64 host;
150 u64 curr;
151 } values[KVM_NR_SHARED_MSRS];
152};
153
154static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
155static struct kvm_shared_msrs __percpu *shared_msrs;
156
157struct kvm_stats_debugfs_item debugfs_entries[] = {
158 { "pf_fixed", VCPU_STAT(pf_fixed) },
159 { "pf_guest", VCPU_STAT(pf_guest) },
160 { "tlb_flush", VCPU_STAT(tlb_flush) },
161 { "invlpg", VCPU_STAT(invlpg) },
162 { "exits", VCPU_STAT(exits) },
163 { "io_exits", VCPU_STAT(io_exits) },
164 { "mmio_exits", VCPU_STAT(mmio_exits) },
165 { "signal_exits", VCPU_STAT(signal_exits) },
166 { "irq_window", VCPU_STAT(irq_window_exits) },
167 { "nmi_window", VCPU_STAT(nmi_window_exits) },
168 { "halt_exits", VCPU_STAT(halt_exits) },
169 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
170 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
171 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
172 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
173 { "hypercalls", VCPU_STAT(hypercalls) },
174 { "request_irq", VCPU_STAT(request_irq_exits) },
175 { "irq_exits", VCPU_STAT(irq_exits) },
176 { "host_state_reload", VCPU_STAT(host_state_reload) },
177 { "efer_reload", VCPU_STAT(efer_reload) },
178 { "fpu_reload", VCPU_STAT(fpu_reload) },
179 { "insn_emulation", VCPU_STAT(insn_emulation) },
180 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
181 { "irq_injections", VCPU_STAT(irq_injections) },
182 { "nmi_injections", VCPU_STAT(nmi_injections) },
183 { "req_event", VCPU_STAT(req_event) },
184 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
185 { "mmu_pte_write", VM_STAT(mmu_pte_write) },
186 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
187 { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) },
188 { "mmu_flooded", VM_STAT(mmu_flooded) },
189 { "mmu_recycled", VM_STAT(mmu_recycled) },
190 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
191 { "mmu_unsync", VM_STAT(mmu_unsync) },
192 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
193 { "largepages", VM_STAT(lpages) },
194 { "max_mmu_page_hash_collisions",
195 VM_STAT(max_mmu_page_hash_collisions) },
196 { NULL }
197};
198
199u64 __read_mostly host_xcr0;
200
201static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
202
203static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
204{
205 int i;
206 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++)
207 vcpu->arch.apf.gfns[i] = ~0;
208}
209
210static void kvm_on_user_return(struct user_return_notifier *urn)
211{
212 unsigned slot;
213 struct kvm_shared_msrs *locals
214 = container_of(urn, struct kvm_shared_msrs, urn);
215 struct kvm_shared_msr_values *values;
216 unsigned long flags;
217
218
219
220
221
222 local_irq_save(flags);
223 if (locals->registered) {
224 locals->registered = false;
225 user_return_notifier_unregister(urn);
226 }
227 local_irq_restore(flags);
228 for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
229 values = &locals->values[slot];
230 if (values->host != values->curr) {
231 wrmsrl(shared_msrs_global.msrs[slot], values->host);
232 values->curr = values->host;
233 }
234 }
235}
236
237static void shared_msr_update(unsigned slot, u32 msr)
238{
239 u64 value;
240 unsigned int cpu = smp_processor_id();
241 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
242
243
244
245 if (slot >= shared_msrs_global.nr) {
246 printk(KERN_ERR "kvm: invalid MSR slot!");
247 return;
248 }
249 rdmsrl_safe(msr, &value);
250 smsr->values[slot].host = value;
251 smsr->values[slot].curr = value;
252}
253
254void kvm_define_shared_msr(unsigned slot, u32 msr)
255{
256 BUG_ON(slot >= KVM_NR_SHARED_MSRS);
257 shared_msrs_global.msrs[slot] = msr;
258 if (slot >= shared_msrs_global.nr)
259 shared_msrs_global.nr = slot + 1;
260}
261EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
262
263static void kvm_shared_msr_cpu_online(void)
264{
265 unsigned i;
266
267 for (i = 0; i < shared_msrs_global.nr; ++i)
268 shared_msr_update(i, shared_msrs_global.msrs[i]);
269}
270
271int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
272{
273 unsigned int cpu = smp_processor_id();
274 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
275 int err;
276
277 if (((value ^ smsr->values[slot].curr) & mask) == 0)
278 return 0;
279 smsr->values[slot].curr = value;
280 err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
281 if (err)
282 return 1;
283
284 if (!smsr->registered) {
285 smsr->urn.on_user_return = kvm_on_user_return;
286 user_return_notifier_register(&smsr->urn);
287 smsr->registered = true;
288 }
289 return 0;
290}
291EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
292
293static void drop_user_return_notifiers(void)
294{
295 unsigned int cpu = smp_processor_id();
296 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
297
298 if (smsr->registered)
299 kvm_on_user_return(&smsr->urn);
300}
301
302u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
303{
304 return vcpu->arch.apic_base;
305}
306EXPORT_SYMBOL_GPL(kvm_get_apic_base);
307
308int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
309{
310 u64 old_state = vcpu->arch.apic_base &
311 (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
312 u64 new_state = msr_info->data &
313 (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
314 u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | 0x2ff |
315 (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
316
317 if ((msr_info->data & reserved_bits) || new_state == X2APIC_ENABLE)
318 return 1;
319 if (!msr_info->host_initiated &&
320 ((new_state == MSR_IA32_APICBASE_ENABLE &&
321 old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) ||
322 (new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) &&
323 old_state == 0)))
324 return 1;
325
326 kvm_lapic_set_base(vcpu, msr_info->data);
327 return 0;
328}
329EXPORT_SYMBOL_GPL(kvm_set_apic_base);
330
331asmlinkage __visible void kvm_spurious_fault(void)
332{
333
334 BUG();
335}
336EXPORT_SYMBOL_GPL(kvm_spurious_fault);
337
338#define EXCPT_BENIGN 0
339#define EXCPT_CONTRIBUTORY 1
340#define EXCPT_PF 2
341
342static int exception_class(int vector)
343{
344 switch (vector) {
345 case PF_VECTOR:
346 return EXCPT_PF;
347 case DE_VECTOR:
348 case TS_VECTOR:
349 case NP_VECTOR:
350 case SS_VECTOR:
351 case GP_VECTOR:
352 return EXCPT_CONTRIBUTORY;
353 default:
354 break;
355 }
356 return EXCPT_BENIGN;
357}
358
359#define EXCPT_FAULT 0
360#define EXCPT_TRAP 1
361#define EXCPT_ABORT 2
362#define EXCPT_INTERRUPT 3
363
364static int exception_type(int vector)
365{
366 unsigned int mask;
367
368 if (WARN_ON(vector > 31 || vector == NMI_VECTOR))
369 return EXCPT_INTERRUPT;
370
371 mask = 1 << vector;
372
373
374 if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR)))
375 return EXCPT_TRAP;
376
377 if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR)))
378 return EXCPT_ABORT;
379
380
381 return EXCPT_FAULT;
382}
383
384static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
385 unsigned nr, bool has_error, u32 error_code,
386 bool reinject)
387{
388 u32 prev_nr;
389 int class1, class2;
390
391 kvm_make_request(KVM_REQ_EVENT, vcpu);
392
393 if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) {
394 queue:
395 if (has_error && !is_protmode(vcpu))
396 has_error = false;
397 if (reinject) {
398
399
400
401
402
403
404
405
406 WARN_ON_ONCE(vcpu->arch.exception.pending);
407 vcpu->arch.exception.injected = true;
408 } else {
409 vcpu->arch.exception.pending = true;
410 vcpu->arch.exception.injected = false;
411 }
412 vcpu->arch.exception.has_error_code = has_error;
413 vcpu->arch.exception.nr = nr;
414 vcpu->arch.exception.error_code = error_code;
415 return;
416 }
417
418
419 prev_nr = vcpu->arch.exception.nr;
420 if (prev_nr == DF_VECTOR) {
421
422 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
423 return;
424 }
425 class1 = exception_class(prev_nr);
426 class2 = exception_class(nr);
427 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
428 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
429
430
431
432
433
434 vcpu->arch.exception.pending = true;
435 vcpu->arch.exception.injected = false;
436 vcpu->arch.exception.has_error_code = true;
437 vcpu->arch.exception.nr = DF_VECTOR;
438 vcpu->arch.exception.error_code = 0;
439 } else
440
441
442
443 goto queue;
444}
445
446void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
447{
448 kvm_multiple_exception(vcpu, nr, false, 0, false);
449}
450EXPORT_SYMBOL_GPL(kvm_queue_exception);
451
452void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
453{
454 kvm_multiple_exception(vcpu, nr, false, 0, true);
455}
456EXPORT_SYMBOL_GPL(kvm_requeue_exception);
457
458int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
459{
460 if (err)
461 kvm_inject_gp(vcpu, 0);
462 else
463 return kvm_skip_emulated_instruction(vcpu);
464
465 return 1;
466}
467EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
468
469void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
470{
471 ++vcpu->stat.pf_guest;
472 vcpu->arch.exception.nested_apf =
473 is_guest_mode(vcpu) && fault->async_page_fault;
474 if (vcpu->arch.exception.nested_apf)
475 vcpu->arch.apf.nested_apf_token = fault->address;
476 else
477 vcpu->arch.cr2 = fault->address;
478 kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
479}
480EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
481
482static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
483{
484 if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
485 vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
486 else
487 vcpu->arch.mmu.inject_page_fault(vcpu, fault);
488
489 return fault->nested_page_fault;
490}
491
492void kvm_inject_nmi(struct kvm_vcpu *vcpu)
493{
494 atomic_inc(&vcpu->arch.nmi_queued);
495 kvm_make_request(KVM_REQ_NMI, vcpu);
496}
497EXPORT_SYMBOL_GPL(kvm_inject_nmi);
498
499void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
500{
501 kvm_multiple_exception(vcpu, nr, true, error_code, false);
502}
503EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
504
505void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
506{
507 kvm_multiple_exception(vcpu, nr, true, error_code, true);
508}
509EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
510
511
512
513
514
515bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
516{
517 if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl)
518 return true;
519 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
520 return false;
521}
522EXPORT_SYMBOL_GPL(kvm_require_cpl);
523
524bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
525{
526 if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE))
527 return true;
528
529 kvm_queue_exception(vcpu, UD_VECTOR);
530 return false;
531}
532EXPORT_SYMBOL_GPL(kvm_require_dr);
533
534
535
536
537
538
539int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
540 gfn_t ngfn, void *data, int offset, int len,
541 u32 access)
542{
543 struct x86_exception exception;
544 gfn_t real_gfn;
545 gpa_t ngpa;
546
547 ngpa = gfn_to_gpa(ngfn);
548 real_gfn = mmu->translate_gpa(vcpu, ngpa, access, &exception);
549 if (real_gfn == UNMAPPED_GVA)
550 return -EFAULT;
551
552 real_gfn = gpa_to_gfn(real_gfn);
553
554 return kvm_vcpu_read_guest_page(vcpu, real_gfn, data, offset, len);
555}
556EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
557
558static int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
559 void *data, int offset, int len, u32 access)
560{
561 return kvm_read_guest_page_mmu(vcpu, vcpu->arch.walk_mmu, gfn,
562 data, offset, len, access);
563}
564
565
566
567
568int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
569{
570 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
571 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
572 int i;
573 int ret;
574 u64 pdpte[ARRAY_SIZE(mmu->pdptrs)];
575
576 ret = kvm_read_guest_page_mmu(vcpu, mmu, pdpt_gfn, pdpte,
577 offset * sizeof(u64), sizeof(pdpte),
578 PFERR_USER_MASK|PFERR_WRITE_MASK);
579 if (ret < 0) {
580 ret = 0;
581 goto out;
582 }
583 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
584 if ((pdpte[i] & PT_PRESENT_MASK) &&
585 (pdpte[i] &
586 vcpu->arch.mmu.guest_rsvd_check.rsvd_bits_mask[0][2])) {
587 ret = 0;
588 goto out;
589 }
590 }
591 ret = 1;
592
593 memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
594 __set_bit(VCPU_EXREG_PDPTR,
595 (unsigned long *)&vcpu->arch.regs_avail);
596 __set_bit(VCPU_EXREG_PDPTR,
597 (unsigned long *)&vcpu->arch.regs_dirty);
598out:
599
600 return ret;
601}
602EXPORT_SYMBOL_GPL(load_pdptrs);
603
604bool pdptrs_changed(struct kvm_vcpu *vcpu)
605{
606 u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
607 bool changed = true;
608 int offset;
609 gfn_t gfn;
610 int r;
611
612 if (is_long_mode(vcpu) || !is_pae(vcpu))
613 return false;
614
615 if (!test_bit(VCPU_EXREG_PDPTR,
616 (unsigned long *)&vcpu->arch.regs_avail))
617 return true;
618
619 gfn = (kvm_read_cr3(vcpu) & 0xffffffe0ul) >> PAGE_SHIFT;
620 offset = (kvm_read_cr3(vcpu) & 0xffffffe0ul) & (PAGE_SIZE - 1);
621 r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
622 PFERR_USER_MASK | PFERR_WRITE_MASK);
623 if (r < 0)
624 goto out;
625 changed = memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
626out:
627
628 return changed;
629}
630EXPORT_SYMBOL_GPL(pdptrs_changed);
631
632int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
633{
634 unsigned long old_cr0 = kvm_read_cr0(vcpu);
635 unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
636
637 cr0 |= X86_CR0_ET;
638
639#ifdef CONFIG_X86_64
640 if (cr0 & 0xffffffff00000000UL)
641 return 1;
642#endif
643
644 cr0 &= ~CR0_RESERVED_BITS;
645
646 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
647 return 1;
648
649 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
650 return 1;
651
652 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
653#ifdef CONFIG_X86_64
654 if ((vcpu->arch.efer & EFER_LME)) {
655 int cs_db, cs_l;
656
657 if (!is_pae(vcpu))
658 return 1;
659 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
660 if (cs_l)
661 return 1;
662 } else
663#endif
664 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
665 kvm_read_cr3(vcpu)))
666 return 1;
667 }
668
669 if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
670 return 1;
671
672 kvm_x86_ops->set_cr0(vcpu, cr0);
673
674 if ((cr0 ^ old_cr0) & X86_CR0_PG) {
675 kvm_clear_async_pf_completion_queue(vcpu);
676 kvm_async_pf_hash_reset(vcpu);
677 }
678
679 if ((cr0 ^ old_cr0) & update_bits)
680 kvm_mmu_reset_context(vcpu);
681
682 if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
683 kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
684 !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
685 kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
686
687 return 0;
688}
689EXPORT_SYMBOL_GPL(kvm_set_cr0);
690
691void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
692{
693 (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
694}
695EXPORT_SYMBOL_GPL(kvm_lmsw);
696
697static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
698{
699 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
700 !vcpu->guest_xcr0_loaded) {
701
702 xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
703 vcpu->guest_xcr0_loaded = 1;
704 }
705}
706
707static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
708{
709 if (vcpu->guest_xcr0_loaded) {
710 if (vcpu->arch.xcr0 != host_xcr0)
711 xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
712 vcpu->guest_xcr0_loaded = 0;
713 }
714}
715
716static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
717{
718 u64 xcr0 = xcr;
719 u64 old_xcr0 = vcpu->arch.xcr0;
720 u64 valid_bits;
721
722
723 if (index != XCR_XFEATURE_ENABLED_MASK)
724 return 1;
725 if (!(xcr0 & XFEATURE_MASK_FP))
726 return 1;
727 if ((xcr0 & XFEATURE_MASK_YMM) && !(xcr0 & XFEATURE_MASK_SSE))
728 return 1;
729
730
731
732
733
734
735 valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP;
736 if (xcr0 & ~valid_bits)
737 return 1;
738
739 if ((!(xcr0 & XFEATURE_MASK_BNDREGS)) !=
740 (!(xcr0 & XFEATURE_MASK_BNDCSR)))
741 return 1;
742
743 if (xcr0 & XFEATURE_MASK_AVX512) {
744 if (!(xcr0 & XFEATURE_MASK_YMM))
745 return 1;
746 if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
747 return 1;
748 }
749 vcpu->arch.xcr0 = xcr0;
750
751 if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
752 kvm_update_cpuid(vcpu);
753 return 0;
754}
755
756int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
757{
758 if (kvm_x86_ops->get_cpl(vcpu) != 0 ||
759 __kvm_set_xcr(vcpu, index, xcr)) {
760 kvm_inject_gp(vcpu, 0);
761 return 1;
762 }
763 return 0;
764}
765EXPORT_SYMBOL_GPL(kvm_set_xcr);
766
767int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
768{
769 unsigned long old_cr4 = kvm_read_cr4(vcpu);
770 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
771 X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
772
773 if (cr4 & CR4_RESERVED_BITS)
774 return 1;
775
776 if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE))
777 return 1;
778
779 if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP))
780 return 1;
781
782 if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP))
783 return 1;
784
785 if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE))
786 return 1;
787
788 if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE))
789 return 1;
790
791 if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57))
792 return 1;
793
794 if (is_long_mode(vcpu)) {
795 if (!(cr4 & X86_CR4_PAE))
796 return 1;
797 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
798 && ((cr4 ^ old_cr4) & pdptr_bits)
799 && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
800 kvm_read_cr3(vcpu)))
801 return 1;
802
803 if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
804 if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID))
805 return 1;
806
807
808 if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
809 return 1;
810 }
811
812 if (kvm_x86_ops->set_cr4(vcpu, cr4))
813 return 1;
814
815 if (((cr4 ^ old_cr4) & pdptr_bits) ||
816 (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
817 kvm_mmu_reset_context(vcpu);
818
819 if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
820 kvm_update_cpuid(vcpu);
821
822 return 0;
823}
824EXPORT_SYMBOL_GPL(kvm_set_cr4);
825
826int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
827{
828#ifdef CONFIG_X86_64
829 cr3 &= ~CR3_PCID_INVD;
830#endif
831
832 if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
833 kvm_mmu_sync_roots(vcpu);
834 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
835 return 0;
836 }
837
838 if (is_long_mode(vcpu) &&
839 (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 62)))
840 return 1;
841 else if (is_pae(vcpu) && is_paging(vcpu) &&
842 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
843 return 1;
844
845 vcpu->arch.cr3 = cr3;
846 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
847 kvm_mmu_new_cr3(vcpu);
848 return 0;
849}
850EXPORT_SYMBOL_GPL(kvm_set_cr3);
851
852int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
853{
854 if (cr8 & CR8_RESERVED_BITS)
855 return 1;
856 if (lapic_in_kernel(vcpu))
857 kvm_lapic_set_tpr(vcpu, cr8);
858 else
859 vcpu->arch.cr8 = cr8;
860 return 0;
861}
862EXPORT_SYMBOL_GPL(kvm_set_cr8);
863
864unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
865{
866 if (lapic_in_kernel(vcpu))
867 return kvm_lapic_get_cr8(vcpu);
868 else
869 return vcpu->arch.cr8;
870}
871EXPORT_SYMBOL_GPL(kvm_get_cr8);
872
873static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
874{
875 int i;
876
877 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
878 for (i = 0; i < KVM_NR_DB_REGS; i++)
879 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
880 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD;
881 }
882}
883
884static void kvm_update_dr6(struct kvm_vcpu *vcpu)
885{
886 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
887 kvm_x86_ops->set_dr6(vcpu, vcpu->arch.dr6);
888}
889
890static void kvm_update_dr7(struct kvm_vcpu *vcpu)
891{
892 unsigned long dr7;
893
894 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
895 dr7 = vcpu->arch.guest_debug_dr7;
896 else
897 dr7 = vcpu->arch.dr7;
898 kvm_x86_ops->set_dr7(vcpu, dr7);
899 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_BP_ENABLED;
900 if (dr7 & DR7_BP_EN_MASK)
901 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
902}
903
904static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
905{
906 u64 fixed = DR6_FIXED_1;
907
908 if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM))
909 fixed |= DR6_RTM;
910 return fixed;
911}
912
913static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
914{
915 switch (dr) {
916 case 0 ... 3:
917 vcpu->arch.db[dr] = val;
918 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
919 vcpu->arch.eff_db[dr] = val;
920 break;
921 case 4:
922
923 case 6:
924 if (val & 0xffffffff00000000ULL)
925 return -1;
926 vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu);
927 kvm_update_dr6(vcpu);
928 break;
929 case 5:
930
931 default:
932 if (val & 0xffffffff00000000ULL)
933 return -1;
934 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
935 kvm_update_dr7(vcpu);
936 break;
937 }
938
939 return 0;
940}
941
942int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
943{
944 if (__kvm_set_dr(vcpu, dr, val)) {
945 kvm_inject_gp(vcpu, 0);
946 return 1;
947 }
948 return 0;
949}
950EXPORT_SYMBOL_GPL(kvm_set_dr);
951
952int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
953{
954 switch (dr) {
955 case 0 ... 3:
956 *val = vcpu->arch.db[dr];
957 break;
958 case 4:
959
960 case 6:
961 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
962 *val = vcpu->arch.dr6;
963 else
964 *val = kvm_x86_ops->get_dr6(vcpu);
965 break;
966 case 5:
967
968 default:
969 *val = vcpu->arch.dr7;
970 break;
971 }
972 return 0;
973}
974EXPORT_SYMBOL_GPL(kvm_get_dr);
975
976bool kvm_rdpmc(struct kvm_vcpu *vcpu)
977{
978 u32 ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
979 u64 data;
980 int err;
981
982 err = kvm_pmu_rdpmc(vcpu, ecx, &data);
983 if (err)
984 return err;
985 kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)data);
986 kvm_register_write(vcpu, VCPU_REGS_RDX, data >> 32);
987 return err;
988}
989EXPORT_SYMBOL_GPL(kvm_rdpmc);
990
991
992
993
994
995
996
997
998
999
1000
1001static u32 msrs_to_save[] = {
1002 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
1003 MSR_STAR,
1004#ifdef CONFIG_X86_64
1005 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
1006#endif
1007 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
1008 MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
1009};
1010
1011static unsigned num_msrs_to_save;
1012
1013static u32 emulated_msrs[] = {
1014 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
1015 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
1016 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
1017 HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
1018 HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY,
1019 HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
1020 HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
1021 HV_X64_MSR_RESET,
1022 HV_X64_MSR_VP_INDEX,
1023 HV_X64_MSR_VP_RUNTIME,
1024 HV_X64_MSR_SCONTROL,
1025 HV_X64_MSR_STIMER0_CONFIG,
1026 HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
1027 MSR_KVM_PV_EOI_EN,
1028
1029 MSR_IA32_TSC_ADJUST,
1030 MSR_IA32_TSCDEADLINE,
1031 MSR_IA32_MISC_ENABLE,
1032 MSR_IA32_MCG_STATUS,
1033 MSR_IA32_MCG_CTL,
1034 MSR_IA32_MCG_EXT_CTL,
1035 MSR_IA32_SMBASE,
1036 MSR_PLATFORM_INFO,
1037 MSR_MISC_FEATURES_ENABLES,
1038};
1039
1040static unsigned num_emulated_msrs;
1041
1042bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
1043{
1044 if (efer & efer_reserved_bits)
1045 return false;
1046
1047 if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT))
1048 return false;
1049
1050 if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM))
1051 return false;
1052
1053 return true;
1054}
1055EXPORT_SYMBOL_GPL(kvm_valid_efer);
1056
1057static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
1058{
1059 u64 old_efer = vcpu->arch.efer;
1060
1061 if (!kvm_valid_efer(vcpu, efer))
1062 return 1;
1063
1064 if (is_paging(vcpu)
1065 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
1066 return 1;
1067
1068 efer &= ~EFER_LMA;
1069 efer |= vcpu->arch.efer & EFER_LMA;
1070
1071 kvm_x86_ops->set_efer(vcpu, efer);
1072
1073
1074 if ((efer ^ old_efer) & EFER_NX)
1075 kvm_mmu_reset_context(vcpu);
1076
1077 return 0;
1078}
1079
1080void kvm_enable_efer_bits(u64 mask)
1081{
1082 efer_reserved_bits &= ~mask;
1083}
1084EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
1085
1086
1087
1088
1089
1090
1091int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
1092{
1093 switch (msr->index) {
1094 case MSR_FS_BASE:
1095 case MSR_GS_BASE:
1096 case MSR_KERNEL_GS_BASE:
1097 case MSR_CSTAR:
1098 case MSR_LSTAR:
1099 if (is_noncanonical_address(msr->data, vcpu))
1100 return 1;
1101 break;
1102 case MSR_IA32_SYSENTER_EIP:
1103 case MSR_IA32_SYSENTER_ESP:
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116 msr->data = get_canonical(msr->data, vcpu_virt_addr_bits(vcpu));
1117 }
1118 return kvm_x86_ops->set_msr(vcpu, msr);
1119}
1120EXPORT_SYMBOL_GPL(kvm_set_msr);
1121
1122
1123
1124
1125static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1126{
1127 struct msr_data msr;
1128 int r;
1129
1130 msr.index = index;
1131 msr.host_initiated = true;
1132 r = kvm_get_msr(vcpu, &msr);
1133 if (r)
1134 return r;
1135
1136 *data = msr.data;
1137 return 0;
1138}
1139
1140static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1141{
1142 struct msr_data msr;
1143
1144 msr.data = *data;
1145 msr.index = index;
1146 msr.host_initiated = true;
1147 return kvm_set_msr(vcpu, &msr);
1148}
1149
1150#ifdef CONFIG_X86_64
1151struct pvclock_gtod_data {
1152 seqcount_t seq;
1153
1154 struct {
1155 int vclock_mode;
1156 u64 cycle_last;
1157 u64 mask;
1158 u32 mult;
1159 u32 shift;
1160 } clock;
1161
1162 u64 boot_ns;
1163 u64 nsec_base;
1164 u64 wall_time_sec;
1165};
1166
1167static struct pvclock_gtod_data pvclock_gtod_data;
1168
1169static void update_pvclock_gtod(struct timekeeper *tk)
1170{
1171 struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
1172 u64 boot_ns;
1173
1174 boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
1175
1176 write_seqcount_begin(&vdata->seq);
1177
1178
1179 vdata->clock.vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
1180 vdata->clock.cycle_last = tk->tkr_mono.cycle_last;
1181 vdata->clock.mask = tk->tkr_mono.mask;
1182 vdata->clock.mult = tk->tkr_mono.mult;
1183 vdata->clock.shift = tk->tkr_mono.shift;
1184
1185 vdata->boot_ns = boot_ns;
1186 vdata->nsec_base = tk->tkr_mono.xtime_nsec;
1187
1188 vdata->wall_time_sec = tk->xtime_sec;
1189
1190 write_seqcount_end(&vdata->seq);
1191}
1192#endif
1193
1194void kvm_set_pending_timer(struct kvm_vcpu *vcpu)
1195{
1196
1197
1198
1199
1200
1201 kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
1202}
1203
1204static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
1205{
1206 int version;
1207 int r;
1208 struct pvclock_wall_clock wc;
1209 struct timespec64 boot;
1210
1211 if (!wall_clock)
1212 return;
1213
1214 r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
1215 if (r)
1216 return;
1217
1218 if (version & 1)
1219 ++version;
1220
1221 ++version;
1222
1223 if (kvm_write_guest(kvm, wall_clock, &version, sizeof(version)))
1224 return;
1225
1226
1227
1228
1229
1230
1231
1232 getboottime64(&boot);
1233
1234 if (kvm->arch.kvmclock_offset) {
1235 struct timespec64 ts = ns_to_timespec64(kvm->arch.kvmclock_offset);
1236 boot = timespec64_sub(boot, ts);
1237 }
1238 wc.sec = (u32)boot.tv_sec;
1239 wc.nsec = boot.tv_nsec;
1240 wc.version = version;
1241
1242 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
1243
1244 version++;
1245 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
1246}
1247
1248static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
1249{
1250 do_shl32_div32(dividend, divisor);
1251 return dividend;
1252}
1253
1254static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz,
1255 s8 *pshift, u32 *pmultiplier)
1256{
1257 uint64_t scaled64;
1258 int32_t shift = 0;
1259 uint64_t tps64;
1260 uint32_t tps32;
1261
1262 tps64 = base_hz;
1263 scaled64 = scaled_hz;
1264 while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
1265 tps64 >>= 1;
1266 shift--;
1267 }
1268
1269 tps32 = (uint32_t)tps64;
1270 while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) {
1271 if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000)
1272 scaled64 >>= 1;
1273 else
1274 tps32 <<= 1;
1275 shift++;
1276 }
1277
1278 *pshift = shift;
1279 *pmultiplier = div_frac(scaled64, tps32);
1280
1281 pr_debug("%s: base_hz %llu => %llu, shift %d, mul %u\n",
1282 __func__, base_hz, scaled_hz, shift, *pmultiplier);
1283}
1284
1285#ifdef CONFIG_X86_64
1286static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
1287#endif
1288
1289static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
1290static unsigned long max_tsc_khz;
1291
1292static u32 adjust_tsc_khz(u32 khz, s32 ppm)
1293{
1294 u64 v = (u64)khz * (1000000 + ppm);
1295 do_div(v, 1000000);
1296 return v;
1297}
1298
1299static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
1300{
1301 u64 ratio;
1302
1303
1304 if (!scale) {
1305 vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
1306 return 0;
1307 }
1308
1309
1310 if (!kvm_has_tsc_control) {
1311 if (user_tsc_khz > tsc_khz) {
1312 vcpu->arch.tsc_catchup = 1;
1313 vcpu->arch.tsc_always_catchup = 1;
1314 return 0;
1315 } else {
1316 WARN(1, "user requested TSC rate below hardware speed\n");
1317 return -1;
1318 }
1319 }
1320
1321
1322 ratio = mul_u64_u32_div(1ULL << kvm_tsc_scaling_ratio_frac_bits,
1323 user_tsc_khz, tsc_khz);
1324
1325 if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) {
1326 WARN_ONCE(1, "Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
1327 user_tsc_khz);
1328 return -1;
1329 }
1330
1331 vcpu->arch.tsc_scaling_ratio = ratio;
1332 return 0;
1333}
1334
1335static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
1336{
1337 u32 thresh_lo, thresh_hi;
1338 int use_scaling = 0;
1339
1340
1341 if (user_tsc_khz == 0) {
1342
1343 vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
1344 return -1;
1345 }
1346
1347
1348 kvm_get_time_scale(user_tsc_khz * 1000LL, NSEC_PER_SEC,
1349 &vcpu->arch.virtual_tsc_shift,
1350 &vcpu->arch.virtual_tsc_mult);
1351 vcpu->arch.virtual_tsc_khz = user_tsc_khz;
1352
1353
1354
1355
1356
1357
1358
1359 thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
1360 thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
1361 if (user_tsc_khz < thresh_lo || user_tsc_khz > thresh_hi) {
1362 pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", user_tsc_khz, thresh_lo, thresh_hi);
1363 use_scaling = 1;
1364 }
1365 return set_tsc_khz(vcpu, user_tsc_khz, use_scaling);
1366}
1367
1368static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
1369{
1370 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec,
1371 vcpu->arch.virtual_tsc_mult,
1372 vcpu->arch.virtual_tsc_shift);
1373 tsc += vcpu->arch.this_tsc_write;
1374 return tsc;
1375}
1376
1377static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
1378{
1379#ifdef CONFIG_X86_64
1380 bool vcpus_matched;
1381 struct kvm_arch *ka = &vcpu->kvm->arch;
1382 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1383
1384 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
1385 atomic_read(&vcpu->kvm->online_vcpus));
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395 if (ka->use_master_clock ||
1396 (gtod->clock.vclock_mode == VCLOCK_TSC && vcpus_matched))
1397 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
1398
1399 trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
1400 atomic_read(&vcpu->kvm->online_vcpus),
1401 ka->use_master_clock, gtod->clock.vclock_mode);
1402#endif
1403}
1404
1405static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
1406{
1407 u64 curr_offset = vcpu->arch.tsc_offset;
1408 vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
1409}
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421static inline u64 __scale_tsc(u64 ratio, u64 tsc)
1422{
1423 return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits);
1424}
1425
1426u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
1427{
1428 u64 _tsc = tsc;
1429 u64 ratio = vcpu->arch.tsc_scaling_ratio;
1430
1431 if (ratio != kvm_default_tsc_scaling_ratio)
1432 _tsc = __scale_tsc(ratio, tsc);
1433
1434 return _tsc;
1435}
1436EXPORT_SYMBOL_GPL(kvm_scale_tsc);
1437
1438static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
1439{
1440 u64 tsc;
1441
1442 tsc = kvm_scale_tsc(vcpu, rdtsc());
1443
1444 return target_tsc - tsc;
1445}
1446
1447u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
1448{
1449 return vcpu->arch.tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
1450}
1451EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
1452
1453static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
1454{
1455 kvm_x86_ops->write_tsc_offset(vcpu, offset);
1456 vcpu->arch.tsc_offset = offset;
1457}
1458
1459void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1460{
1461 struct kvm *kvm = vcpu->kvm;
1462 u64 offset, ns, elapsed;
1463 unsigned long flags;
1464 bool matched;
1465 bool already_matched;
1466 u64 data = msr->data;
1467 bool synchronizing = false;
1468
1469 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
1470 offset = kvm_compute_tsc_offset(vcpu, data);
1471 ns = ktime_get_boot_ns();
1472 elapsed = ns - kvm->arch.last_tsc_nsec;
1473
1474 if (vcpu->arch.virtual_tsc_khz) {
1475 if (data == 0 && msr->host_initiated) {
1476
1477
1478
1479
1480
1481 synchronizing = true;
1482 } else {
1483 u64 tsc_exp = kvm->arch.last_tsc_write +
1484 nsec_to_cycles(vcpu, elapsed);
1485 u64 tsc_hz = vcpu->arch.virtual_tsc_khz * 1000LL;
1486
1487
1488
1489
1490
1491 synchronizing = data < tsc_exp + tsc_hz &&
1492 data + tsc_hz > tsc_exp;
1493 }
1494 }
1495
1496
1497
1498
1499
1500
1501
1502 if (synchronizing &&
1503 vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
1504 if (!check_tsc_unstable()) {
1505 offset = kvm->arch.cur_tsc_offset;
1506 pr_debug("kvm: matched tsc offset for %llu\n", data);
1507 } else {
1508 u64 delta = nsec_to_cycles(vcpu, elapsed);
1509 data += delta;
1510 offset = kvm_compute_tsc_offset(vcpu, data);
1511 pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
1512 }
1513 matched = true;
1514 already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation);
1515 } else {
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525 kvm->arch.cur_tsc_generation++;
1526 kvm->arch.cur_tsc_nsec = ns;
1527 kvm->arch.cur_tsc_write = data;
1528 kvm->arch.cur_tsc_offset = offset;
1529 matched = false;
1530 pr_debug("kvm: new tsc generation %llu, clock %llu\n",
1531 kvm->arch.cur_tsc_generation, data);
1532 }
1533
1534
1535
1536
1537
1538 kvm->arch.last_tsc_nsec = ns;
1539 kvm->arch.last_tsc_write = data;
1540 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
1541
1542 vcpu->arch.last_guest_tsc = data;
1543
1544
1545 vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation;
1546 vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
1547 vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
1548
1549 if (!msr->host_initiated && guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST))
1550 update_ia32_tsc_adjust_msr(vcpu, offset);
1551
1552 kvm_vcpu_write_tsc_offset(vcpu, offset);
1553 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
1554
1555 spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
1556 if (!matched) {
1557 kvm->arch.nr_vcpus_matched_tsc = 0;
1558 } else if (!already_matched) {
1559 kvm->arch.nr_vcpus_matched_tsc++;
1560 }
1561
1562 kvm_track_tsc_matching(vcpu);
1563 spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
1564}
1565
1566EXPORT_SYMBOL_GPL(kvm_write_tsc);
1567
1568static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
1569 s64 adjustment)
1570{
1571 kvm_vcpu_write_tsc_offset(vcpu, vcpu->arch.tsc_offset + adjustment);
1572}
1573
1574static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
1575{
1576 if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
1577 WARN_ON(adjustment < 0);
1578 adjustment = kvm_scale_tsc(vcpu, (u64) adjustment);
1579 adjust_tsc_offset_guest(vcpu, adjustment);
1580}
1581
1582#ifdef CONFIG_X86_64
1583
1584static u64 read_tsc(void)
1585{
1586 u64 ret = (u64)rdtsc_ordered();
1587 u64 last = pvclock_gtod_data.clock.cycle_last;
1588
1589 if (likely(ret >= last))
1590 return ret;
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600 asm volatile ("");
1601 return last;
1602}
1603
1604static inline u64 vgettsc(u64 *cycle_now)
1605{
1606 long v;
1607 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1608
1609 *cycle_now = read_tsc();
1610
1611 v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask;
1612 return v * gtod->clock.mult;
1613}
1614
1615static int do_monotonic_boot(s64 *t, u64 *cycle_now)
1616{
1617 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1618 unsigned long seq;
1619 int mode;
1620 u64 ns;
1621
1622 do {
1623 seq = read_seqcount_begin(>od->seq);
1624 mode = gtod->clock.vclock_mode;
1625 ns = gtod->nsec_base;
1626 ns += vgettsc(cycle_now);
1627 ns >>= gtod->clock.shift;
1628 ns += gtod->boot_ns;
1629 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
1630 *t = ns;
1631
1632 return mode;
1633}
1634
1635static int do_realtime(struct timespec *ts, u64 *cycle_now)
1636{
1637 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1638 unsigned long seq;
1639 int mode;
1640 u64 ns;
1641
1642 do {
1643 seq = read_seqcount_begin(>od->seq);
1644 mode = gtod->clock.vclock_mode;
1645 ts->tv_sec = gtod->wall_time_sec;
1646 ns = gtod->nsec_base;
1647 ns += vgettsc(cycle_now);
1648 ns >>= gtod->clock.shift;
1649 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
1650
1651 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
1652 ts->tv_nsec = ns;
1653
1654 return mode;
1655}
1656
1657
1658static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *cycle_now)
1659{
1660
1661 if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
1662 return false;
1663
1664 return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC;
1665}
1666
1667
1668static bool kvm_get_walltime_and_clockread(struct timespec *ts,
1669 u64 *cycle_now)
1670{
1671
1672 if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
1673 return false;
1674
1675 return do_realtime(ts, cycle_now) == VCLOCK_TSC;
1676}
1677#endif
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
1721{
1722#ifdef CONFIG_X86_64
1723 struct kvm_arch *ka = &kvm->arch;
1724 int vclock_mode;
1725 bool host_tsc_clocksource, vcpus_matched;
1726
1727 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
1728 atomic_read(&kvm->online_vcpus));
1729
1730
1731
1732
1733
1734 host_tsc_clocksource = kvm_get_time_and_clockread(
1735 &ka->master_kernel_ns,
1736 &ka->master_cycle_now);
1737
1738 ka->use_master_clock = host_tsc_clocksource && vcpus_matched
1739 && !ka->backwards_tsc_observed
1740 && !ka->boot_vcpu_runs_old_kvmclock;
1741
1742 if (ka->use_master_clock)
1743 atomic_set(&kvm_guest_has_master_clock, 1);
1744
1745 vclock_mode = pvclock_gtod_data.clock.vclock_mode;
1746 trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
1747 vcpus_matched);
1748#endif
1749}
1750
1751void kvm_make_mclock_inprogress_request(struct kvm *kvm)
1752{
1753 kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
1754}
1755
1756static void kvm_gen_update_masterclock(struct kvm *kvm)
1757{
1758#ifdef CONFIG_X86_64
1759 int i;
1760 struct kvm_vcpu *vcpu;
1761 struct kvm_arch *ka = &kvm->arch;
1762
1763 spin_lock(&ka->pvclock_gtod_sync_lock);
1764 kvm_make_mclock_inprogress_request(kvm);
1765
1766 pvclock_update_vm_gtod_copy(kvm);
1767
1768 kvm_for_each_vcpu(i, vcpu, kvm)
1769 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
1770
1771
1772 kvm_for_each_vcpu(i, vcpu, kvm)
1773 kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
1774
1775 spin_unlock(&ka->pvclock_gtod_sync_lock);
1776#endif
1777}
1778
1779u64 get_kvmclock_ns(struct kvm *kvm)
1780{
1781 struct kvm_arch *ka = &kvm->arch;
1782 struct pvclock_vcpu_time_info hv_clock;
1783 u64 ret;
1784
1785 spin_lock(&ka->pvclock_gtod_sync_lock);
1786 if (!ka->use_master_clock) {
1787 spin_unlock(&ka->pvclock_gtod_sync_lock);
1788 return ktime_get_boot_ns() + ka->kvmclock_offset;
1789 }
1790
1791 hv_clock.tsc_timestamp = ka->master_cycle_now;
1792 hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
1793 spin_unlock(&ka->pvclock_gtod_sync_lock);
1794
1795
1796 get_cpu();
1797
1798 kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
1799 &hv_clock.tsc_shift,
1800 &hv_clock.tsc_to_system_mul);
1801 ret = __pvclock_read_cycles(&hv_clock, rdtsc());
1802
1803 put_cpu();
1804
1805 return ret;
1806}
1807
1808static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
1809{
1810 struct kvm_vcpu_arch *vcpu = &v->arch;
1811 struct pvclock_vcpu_time_info guest_hv_clock;
1812
1813 if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
1814 &guest_hv_clock, sizeof(guest_hv_clock))))
1815 return;
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831 BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
1832
1833 vcpu->hv_clock.version = guest_hv_clock.version + 1;
1834 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
1835 &vcpu->hv_clock,
1836 sizeof(vcpu->hv_clock.version));
1837
1838 smp_wmb();
1839
1840
1841 vcpu->hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
1842
1843 if (vcpu->pvclock_set_guest_stopped_request) {
1844 vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
1845 vcpu->pvclock_set_guest_stopped_request = false;
1846 }
1847
1848 trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
1849
1850 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
1851 &vcpu->hv_clock,
1852 sizeof(vcpu->hv_clock));
1853
1854 smp_wmb();
1855
1856 vcpu->hv_clock.version++;
1857 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
1858 &vcpu->hv_clock,
1859 sizeof(vcpu->hv_clock.version));
1860}
1861
1862static int kvm_guest_time_update(struct kvm_vcpu *v)
1863{
1864 unsigned long flags, tgt_tsc_khz;
1865 struct kvm_vcpu_arch *vcpu = &v->arch;
1866 struct kvm_arch *ka = &v->kvm->arch;
1867 s64 kernel_ns;
1868 u64 tsc_timestamp, host_tsc;
1869 u8 pvclock_flags;
1870 bool use_master_clock;
1871
1872 kernel_ns = 0;
1873 host_tsc = 0;
1874
1875
1876
1877
1878
1879 spin_lock(&ka->pvclock_gtod_sync_lock);
1880 use_master_clock = ka->use_master_clock;
1881 if (use_master_clock) {
1882 host_tsc = ka->master_cycle_now;
1883 kernel_ns = ka->master_kernel_ns;
1884 }
1885 spin_unlock(&ka->pvclock_gtod_sync_lock);
1886
1887
1888 local_irq_save(flags);
1889 tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz);
1890 if (unlikely(tgt_tsc_khz == 0)) {
1891 local_irq_restore(flags);
1892 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
1893 return 1;
1894 }
1895 if (!use_master_clock) {
1896 host_tsc = rdtsc();
1897 kernel_ns = ktime_get_boot_ns();
1898 }
1899
1900 tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912 if (vcpu->tsc_catchup) {
1913 u64 tsc = compute_guest_tsc(v, kernel_ns);
1914 if (tsc > tsc_timestamp) {
1915 adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
1916 tsc_timestamp = tsc;
1917 }
1918 }
1919
1920 local_irq_restore(flags);
1921
1922
1923
1924 if (kvm_has_tsc_control)
1925 tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
1926
1927 if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
1928 kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
1929 &vcpu->hv_clock.tsc_shift,
1930 &vcpu->hv_clock.tsc_to_system_mul);
1931 vcpu->hw_tsc_khz = tgt_tsc_khz;
1932 }
1933
1934 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
1935 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
1936 vcpu->last_guest_tsc = tsc_timestamp;
1937
1938
1939 pvclock_flags = 0;
1940 if (use_master_clock)
1941 pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
1942
1943 vcpu->hv_clock.flags = pvclock_flags;
1944
1945 if (vcpu->pv_time_enabled)
1946 kvm_setup_pvclock_page(v);
1947 if (v == kvm_get_vcpu(v->kvm, 0))
1948 kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
1949 return 0;
1950}
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966#define KVMCLOCK_UPDATE_DELAY msecs_to_jiffies(100)
1967
1968static void kvmclock_update_fn(struct work_struct *work)
1969{
1970 int i;
1971 struct delayed_work *dwork = to_delayed_work(work);
1972 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
1973 kvmclock_update_work);
1974 struct kvm *kvm = container_of(ka, struct kvm, arch);
1975 struct kvm_vcpu *vcpu;
1976
1977 kvm_for_each_vcpu(i, vcpu, kvm) {
1978 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
1979 kvm_vcpu_kick(vcpu);
1980 }
1981}
1982
1983static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
1984{
1985 struct kvm *kvm = v->kvm;
1986
1987 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
1988 schedule_delayed_work(&kvm->arch.kvmclock_update_work,
1989 KVMCLOCK_UPDATE_DELAY);
1990}
1991
1992#define KVMCLOCK_SYNC_PERIOD (300 * HZ)
1993
1994static void kvmclock_sync_fn(struct work_struct *work)
1995{
1996 struct delayed_work *dwork = to_delayed_work(work);
1997 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
1998 kvmclock_sync_work);
1999 struct kvm *kvm = container_of(ka, struct kvm, arch);
2000
2001 if (!kvmclock_periodic_sync)
2002 return;
2003
2004 schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0);
2005 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
2006 KVMCLOCK_SYNC_PERIOD);
2007}
2008
2009static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
2010{
2011 u64 mcg_cap = vcpu->arch.mcg_cap;
2012 unsigned bank_num = mcg_cap & 0xff;
2013
2014 switch (msr) {
2015 case MSR_IA32_MCG_STATUS:
2016 vcpu->arch.mcg_status = data;
2017 break;
2018 case MSR_IA32_MCG_CTL:
2019 if (!(mcg_cap & MCG_CTL_P))
2020 return 1;
2021 if (data != 0 && data != ~(u64)0)
2022 return -1;
2023 vcpu->arch.mcg_ctl = data;
2024 break;
2025 default:
2026 if (msr >= MSR_IA32_MC0_CTL &&
2027 msr < MSR_IA32_MCx_CTL(bank_num)) {
2028 u32 offset = msr - MSR_IA32_MC0_CTL;
2029
2030
2031
2032
2033
2034 if ((offset & 0x3) == 0 &&
2035 data != 0 && (data | (1 << 10)) != ~(u64)0)
2036 return -1;
2037 vcpu->arch.mce_banks[offset] = data;
2038 break;
2039 }
2040 return 1;
2041 }
2042 return 0;
2043}
2044
2045static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
2046{
2047 struct kvm *kvm = vcpu->kvm;
2048 int lm = is_long_mode(vcpu);
2049 u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
2050 : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
2051 u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
2052 : kvm->arch.xen_hvm_config.blob_size_32;
2053 u32 page_num = data & ~PAGE_MASK;
2054 u64 page_addr = data & PAGE_MASK;
2055 u8 *page;
2056 int r;
2057
2058 r = -E2BIG;
2059 if (page_num >= blob_size)
2060 goto out;
2061 r = -ENOMEM;
2062 page = memdup_user(blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE);
2063 if (IS_ERR(page)) {
2064 r = PTR_ERR(page);
2065 goto out;
2066 }
2067 if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE))
2068 goto out_free;
2069 r = 0;
2070out_free:
2071 kfree(page);
2072out:
2073 return r;
2074}
2075
2076static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
2077{
2078 gpa_t gpa = data & ~0x3f;
2079
2080
2081 if (data & 0x38)
2082 return 1;
2083
2084 vcpu->arch.apf.msr_val = data;
2085
2086 if (!(data & KVM_ASYNC_PF_ENABLED)) {
2087 kvm_clear_async_pf_completion_queue(vcpu);
2088 kvm_async_pf_hash_reset(vcpu);
2089 return 0;
2090 }
2091
2092 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
2093 sizeof(u32)))
2094 return 1;
2095
2096 vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
2097 vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
2098 kvm_async_pf_wakeup_all(vcpu);
2099 return 0;
2100}
2101
2102static void kvmclock_reset(struct kvm_vcpu *vcpu)
2103{
2104 vcpu->arch.pv_time_enabled = false;
2105}
2106
2107static void record_steal_time(struct kvm_vcpu *vcpu)
2108{
2109 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
2110 return;
2111
2112 if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
2113 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
2114 return;
2115
2116 vcpu->arch.st.steal.preempted = 0;
2117
2118 if (vcpu->arch.st.steal.version & 1)
2119 vcpu->arch.st.steal.version += 1;
2120
2121 vcpu->arch.st.steal.version += 1;
2122
2123 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
2124 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
2125
2126 smp_wmb();
2127
2128 vcpu->arch.st.steal.steal += current->sched_info.run_delay -
2129 vcpu->arch.st.last_steal;
2130 vcpu->arch.st.last_steal = current->sched_info.run_delay;
2131
2132 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
2133 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
2134
2135 smp_wmb();
2136
2137 vcpu->arch.st.steal.version += 1;
2138
2139 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
2140 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
2141}
2142
2143int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2144{
2145 bool pr = false;
2146 u32 msr = msr_info->index;
2147 u64 data = msr_info->data;
2148
2149 switch (msr) {
2150 case MSR_AMD64_NB_CFG:
2151 case MSR_IA32_UCODE_REV:
2152 case MSR_IA32_UCODE_WRITE:
2153 case MSR_VM_HSAVE_PA:
2154 case MSR_AMD64_PATCH_LOADER:
2155 case MSR_AMD64_BU_CFG2:
2156 case MSR_AMD64_DC_CFG:
2157 break;
2158
2159 case MSR_EFER:
2160 return set_efer(vcpu, data);
2161 case MSR_K7_HWCR:
2162 data &= ~(u64)0x40;
2163 data &= ~(u64)0x100;
2164 data &= ~(u64)0x8;
2165 data &= ~(u64)0x40000;
2166 if (data != 0) {
2167 vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
2168 data);
2169 return 1;
2170 }
2171 break;
2172 case MSR_FAM10H_MMIO_CONF_BASE:
2173 if (data != 0) {
2174 vcpu_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
2175 "0x%llx\n", data);
2176 return 1;
2177 }
2178 break;
2179 case MSR_IA32_DEBUGCTLMSR:
2180 if (!data) {
2181
2182 break;
2183 } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
2184
2185
2186 return 1;
2187 }
2188 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
2189 __func__, data);
2190 break;
2191 case 0x200 ... 0x2ff:
2192 return kvm_mtrr_set_msr(vcpu, msr, data);
2193 case MSR_IA32_APICBASE:
2194 return kvm_set_apic_base(vcpu, msr_info);
2195 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
2196 return kvm_x2apic_msr_write(vcpu, msr, data);
2197 case MSR_IA32_TSCDEADLINE:
2198 kvm_set_lapic_tscdeadline_msr(vcpu, data);
2199 break;
2200 case MSR_IA32_TSC_ADJUST:
2201 if (guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) {
2202 if (!msr_info->host_initiated) {
2203 s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
2204 adjust_tsc_offset_guest(vcpu, adj);
2205 }
2206 vcpu->arch.ia32_tsc_adjust_msr = data;
2207 }
2208 break;
2209 case MSR_IA32_MISC_ENABLE:
2210 vcpu->arch.ia32_misc_enable_msr = data;
2211 break;
2212 case MSR_IA32_SMBASE:
2213 if (!msr_info->host_initiated)
2214 return 1;
2215 vcpu->arch.smbase = data;
2216 break;
2217 case MSR_KVM_WALL_CLOCK_NEW:
2218 case MSR_KVM_WALL_CLOCK:
2219 vcpu->kvm->arch.wall_clock = data;
2220 kvm_write_wall_clock(vcpu->kvm, data);
2221 break;
2222 case MSR_KVM_SYSTEM_TIME_NEW:
2223 case MSR_KVM_SYSTEM_TIME: {
2224 struct kvm_arch *ka = &vcpu->kvm->arch;
2225
2226 kvmclock_reset(vcpu);
2227
2228 if (vcpu->vcpu_id == 0 && !msr_info->host_initiated) {
2229 bool tmp = (msr == MSR_KVM_SYSTEM_TIME);
2230
2231 if (ka->boot_vcpu_runs_old_kvmclock != tmp)
2232 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
2233
2234 ka->boot_vcpu_runs_old_kvmclock = tmp;
2235 }
2236
2237 vcpu->arch.time = data;
2238 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
2239
2240
2241 if (!(data & 1))
2242 break;
2243
2244 if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
2245 &vcpu->arch.pv_time, data & ~1ULL,
2246 sizeof(struct pvclock_vcpu_time_info)))
2247 vcpu->arch.pv_time_enabled = false;
2248 else
2249 vcpu->arch.pv_time_enabled = true;
2250
2251 break;
2252 }
2253 case MSR_KVM_ASYNC_PF_EN:
2254 if (kvm_pv_enable_async_pf(vcpu, data))
2255 return 1;
2256 break;
2257 case MSR_KVM_STEAL_TIME:
2258
2259 if (unlikely(!sched_info_on()))
2260 return 1;
2261
2262 if (data & KVM_STEAL_RESERVED_MASK)
2263 return 1;
2264
2265 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
2266 data & KVM_STEAL_VALID_BITS,
2267 sizeof(struct kvm_steal_time)))
2268 return 1;
2269
2270 vcpu->arch.st.msr_val = data;
2271
2272 if (!(data & KVM_MSR_ENABLED))
2273 break;
2274
2275 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
2276
2277 break;
2278 case MSR_KVM_PV_EOI_EN:
2279 if (kvm_lapic_enable_pv_eoi(vcpu, data))
2280 return 1;
2281 break;
2282
2283 case MSR_IA32_MCG_CTL:
2284 case MSR_IA32_MCG_STATUS:
2285 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
2286 return set_msr_mce(vcpu, msr, data);
2287
2288 case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
2289 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
2290 pr = true;
2291 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
2292 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
2293 if (kvm_pmu_is_valid_msr(vcpu, msr))
2294 return kvm_pmu_set_msr(vcpu, msr_info);
2295
2296 if (pr || data != 0)
2297 vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
2298 "0x%x data 0x%llx\n", msr, data);
2299 break;
2300 case MSR_K7_CLK_CTL:
2301
2302
2303
2304
2305
2306
2307
2308
2309 break;
2310 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
2311 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
2312 case HV_X64_MSR_CRASH_CTL:
2313 case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
2314 return kvm_hv_set_msr_common(vcpu, msr, data,
2315 msr_info->host_initiated);
2316 case MSR_IA32_BBL_CR_CTL3:
2317
2318
2319
2320 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data);
2321 break;
2322 case MSR_AMD64_OSVW_ID_LENGTH:
2323 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
2324 return 1;
2325 vcpu->arch.osvw.length = data;
2326 break;
2327 case MSR_AMD64_OSVW_STATUS:
2328 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
2329 return 1;
2330 vcpu->arch.osvw.status = data;
2331 break;
2332 case MSR_PLATFORM_INFO:
2333 if (!msr_info->host_initiated ||
2334 data & ~MSR_PLATFORM_INFO_CPUID_FAULT ||
2335 (!(data & MSR_PLATFORM_INFO_CPUID_FAULT) &&
2336 cpuid_fault_enabled(vcpu)))
2337 return 1;
2338 vcpu->arch.msr_platform_info = data;
2339 break;
2340 case MSR_MISC_FEATURES_ENABLES:
2341 if (data & ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT ||
2342 (data & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
2343 !supports_cpuid_fault(vcpu)))
2344 return 1;
2345 vcpu->arch.msr_misc_features_enables = data;
2346 break;
2347 default:
2348 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
2349 return xen_hvm_config(vcpu, data);
2350 if (kvm_pmu_is_valid_msr(vcpu, msr))
2351 return kvm_pmu_set_msr(vcpu, msr_info);
2352 if (!ignore_msrs) {
2353 vcpu_debug_ratelimited(vcpu, "unhandled wrmsr: 0x%x data 0x%llx\n",
2354 msr, data);
2355 return 1;
2356 } else {
2357 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
2358 msr, data);
2359 break;
2360 }
2361 }
2362 return 0;
2363}
2364EXPORT_SYMBOL_GPL(kvm_set_msr_common);
2365
2366
2367
2368
2369
2370
2371
2372int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
2373{
2374 return kvm_x86_ops->get_msr(vcpu, msr);
2375}
2376EXPORT_SYMBOL_GPL(kvm_get_msr);
2377
2378static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2379{
2380 u64 data;
2381 u64 mcg_cap = vcpu->arch.mcg_cap;
2382 unsigned bank_num = mcg_cap & 0xff;
2383
2384 switch (msr) {
2385 case MSR_IA32_P5_MC_ADDR:
2386 case MSR_IA32_P5_MC_TYPE:
2387 data = 0;
2388 break;
2389 case MSR_IA32_MCG_CAP:
2390 data = vcpu->arch.mcg_cap;
2391 break;
2392 case MSR_IA32_MCG_CTL:
2393 if (!(mcg_cap & MCG_CTL_P))
2394 return 1;
2395 data = vcpu->arch.mcg_ctl;
2396 break;
2397 case MSR_IA32_MCG_STATUS:
2398 data = vcpu->arch.mcg_status;
2399 break;
2400 default:
2401 if (msr >= MSR_IA32_MC0_CTL &&
2402 msr < MSR_IA32_MCx_CTL(bank_num)) {
2403 u32 offset = msr - MSR_IA32_MC0_CTL;
2404 data = vcpu->arch.mce_banks[offset];
2405 break;
2406 }
2407 return 1;
2408 }
2409 *pdata = data;
2410 return 0;
2411}
2412
2413int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2414{
2415 switch (msr_info->index) {
2416 case MSR_IA32_PLATFORM_ID:
2417 case MSR_IA32_EBL_CR_POWERON:
2418 case MSR_IA32_DEBUGCTLMSR:
2419 case MSR_IA32_LASTBRANCHFROMIP:
2420 case MSR_IA32_LASTBRANCHTOIP:
2421 case MSR_IA32_LASTINTFROMIP:
2422 case MSR_IA32_LASTINTTOIP:
2423 case MSR_K8_SYSCFG:
2424 case MSR_K8_TSEG_ADDR:
2425 case MSR_K8_TSEG_MASK:
2426 case MSR_K7_HWCR:
2427 case MSR_VM_HSAVE_PA:
2428 case MSR_K8_INT_PENDING_MSG:
2429 case MSR_AMD64_NB_CFG:
2430 case MSR_FAM10H_MMIO_CONF_BASE:
2431 case MSR_AMD64_BU_CFG2:
2432 case MSR_IA32_PERF_CTL:
2433 case MSR_AMD64_DC_CFG:
2434 msr_info->data = 0;
2435 break;
2436 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
2437 case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
2438 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
2439 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
2440 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
2441 return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
2442 msr_info->data = 0;
2443 break;
2444 case MSR_IA32_UCODE_REV:
2445 msr_info->data = 0x100000000ULL;
2446 break;
2447 case MSR_MTRRcap:
2448 case 0x200 ... 0x2ff:
2449 return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
2450 case 0xcd:
2451 msr_info->data = 3;
2452 break;
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464 case MSR_EBC_FREQUENCY_ID:
2465 msr_info->data = 1 << 24;
2466 break;
2467 case MSR_IA32_APICBASE:
2468 msr_info->data = kvm_get_apic_base(vcpu);
2469 break;
2470 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
2471 return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data);
2472 break;
2473 case MSR_IA32_TSCDEADLINE:
2474 msr_info->data = kvm_get_lapic_tscdeadline_msr(vcpu);
2475 break;
2476 case MSR_IA32_TSC_ADJUST:
2477 msr_info->data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
2478 break;
2479 case MSR_IA32_MISC_ENABLE:
2480 msr_info->data = vcpu->arch.ia32_misc_enable_msr;
2481 break;
2482 case MSR_IA32_SMBASE:
2483 if (!msr_info->host_initiated)
2484 return 1;
2485 msr_info->data = vcpu->arch.smbase;
2486 break;
2487 case MSR_IA32_PERF_STATUS:
2488
2489 msr_info->data = 1000ULL;
2490
2491 msr_info->data |= (((uint64_t)4ULL) << 40);
2492 break;
2493 case MSR_EFER:
2494 msr_info->data = vcpu->arch.efer;
2495 break;
2496 case MSR_KVM_WALL_CLOCK:
2497 case MSR_KVM_WALL_CLOCK_NEW:
2498 msr_info->data = vcpu->kvm->arch.wall_clock;
2499 break;
2500 case MSR_KVM_SYSTEM_TIME:
2501 case MSR_KVM_SYSTEM_TIME_NEW:
2502 msr_info->data = vcpu->arch.time;
2503 break;
2504 case MSR_KVM_ASYNC_PF_EN:
2505 msr_info->data = vcpu->arch.apf.msr_val;
2506 break;
2507 case MSR_KVM_STEAL_TIME:
2508 msr_info->data = vcpu->arch.st.msr_val;
2509 break;
2510 case MSR_KVM_PV_EOI_EN:
2511 msr_info->data = vcpu->arch.pv_eoi.msr_val;
2512 break;
2513 case MSR_IA32_P5_MC_ADDR:
2514 case MSR_IA32_P5_MC_TYPE:
2515 case MSR_IA32_MCG_CAP:
2516 case MSR_IA32_MCG_CTL:
2517 case MSR_IA32_MCG_STATUS:
2518 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
2519 return get_msr_mce(vcpu, msr_info->index, &msr_info->data);
2520 case MSR_K7_CLK_CTL:
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530 msr_info->data = 0x20000000;
2531 break;
2532 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
2533 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
2534 case HV_X64_MSR_CRASH_CTL:
2535 case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
2536 return kvm_hv_get_msr_common(vcpu,
2537 msr_info->index, &msr_info->data);
2538 break;
2539 case MSR_IA32_BBL_CR_CTL3:
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550 msr_info->data = 0xbe702111;
2551 break;
2552 case MSR_AMD64_OSVW_ID_LENGTH:
2553 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
2554 return 1;
2555 msr_info->data = vcpu->arch.osvw.length;
2556 break;
2557 case MSR_AMD64_OSVW_STATUS:
2558 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
2559 return 1;
2560 msr_info->data = vcpu->arch.osvw.status;
2561 break;
2562 case MSR_PLATFORM_INFO:
2563 msr_info->data = vcpu->arch.msr_platform_info;
2564 break;
2565 case MSR_MISC_FEATURES_ENABLES:
2566 msr_info->data = vcpu->arch.msr_misc_features_enables;
2567 break;
2568 default:
2569 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
2570 return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
2571 if (!ignore_msrs) {
2572 vcpu_debug_ratelimited(vcpu, "unhandled rdmsr: 0x%x\n",
2573 msr_info->index);
2574 return 1;
2575 } else {
2576 vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr_info->index);
2577 msr_info->data = 0;
2578 }
2579 break;
2580 }
2581 return 0;
2582}
2583EXPORT_SYMBOL_GPL(kvm_get_msr_common);
2584
2585
2586
2587
2588
2589
2590static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
2591 struct kvm_msr_entry *entries,
2592 int (*do_msr)(struct kvm_vcpu *vcpu,
2593 unsigned index, u64 *data))
2594{
2595 int i, idx;
2596
2597 idx = srcu_read_lock(&vcpu->kvm->srcu);
2598 for (i = 0; i < msrs->nmsrs; ++i)
2599 if (do_msr(vcpu, entries[i].index, &entries[i].data))
2600 break;
2601 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2602
2603 return i;
2604}
2605
2606
2607
2608
2609
2610
2611static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
2612 int (*do_msr)(struct kvm_vcpu *vcpu,
2613 unsigned index, u64 *data),
2614 int writeback)
2615{
2616 struct kvm_msrs msrs;
2617 struct kvm_msr_entry *entries;
2618 int r, n;
2619 unsigned size;
2620
2621 r = -EFAULT;
2622 if (copy_from_user(&msrs, user_msrs, sizeof msrs))
2623 goto out;
2624
2625 r = -E2BIG;
2626 if (msrs.nmsrs >= MAX_IO_MSRS)
2627 goto out;
2628
2629 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
2630 entries = memdup_user(user_msrs->entries, size);
2631 if (IS_ERR(entries)) {
2632 r = PTR_ERR(entries);
2633 goto out;
2634 }
2635
2636 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
2637 if (r < 0)
2638 goto out_free;
2639
2640 r = -EFAULT;
2641 if (writeback && copy_to_user(user_msrs->entries, entries, size))
2642 goto out_free;
2643
2644 r = n;
2645
2646out_free:
2647 kfree(entries);
2648out:
2649 return r;
2650}
2651
2652int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
2653{
2654 int r;
2655
2656 switch (ext) {
2657 case KVM_CAP_IRQCHIP:
2658 case KVM_CAP_HLT:
2659 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
2660 case KVM_CAP_SET_TSS_ADDR:
2661 case KVM_CAP_EXT_CPUID:
2662 case KVM_CAP_EXT_EMUL_CPUID:
2663 case KVM_CAP_CLOCKSOURCE:
2664 case KVM_CAP_PIT:
2665 case KVM_CAP_NOP_IO_DELAY:
2666 case KVM_CAP_MP_STATE:
2667 case KVM_CAP_SYNC_MMU:
2668 case KVM_CAP_USER_NMI:
2669 case KVM_CAP_REINJECT_CONTROL:
2670 case KVM_CAP_IRQ_INJECT_STATUS:
2671 case KVM_CAP_IOEVENTFD:
2672 case KVM_CAP_IOEVENTFD_NO_LENGTH:
2673 case KVM_CAP_PIT2:
2674 case KVM_CAP_PIT_STATE2:
2675 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
2676 case KVM_CAP_XEN_HVM:
2677 case KVM_CAP_VCPU_EVENTS:
2678 case KVM_CAP_HYPERV:
2679 case KVM_CAP_HYPERV_VAPIC:
2680 case KVM_CAP_HYPERV_SPIN:
2681 case KVM_CAP_HYPERV_SYNIC:
2682 case KVM_CAP_HYPERV_SYNIC2:
2683 case KVM_CAP_HYPERV_VP_INDEX:
2684 case KVM_CAP_PCI_SEGMENT:
2685 case KVM_CAP_DEBUGREGS:
2686 case KVM_CAP_X86_ROBUST_SINGLESTEP:
2687 case KVM_CAP_XSAVE:
2688 case KVM_CAP_ASYNC_PF:
2689 case KVM_CAP_GET_TSC_KHZ:
2690 case KVM_CAP_KVMCLOCK_CTRL:
2691 case KVM_CAP_READONLY_MEM:
2692 case KVM_CAP_HYPERV_TIME:
2693 case KVM_CAP_IOAPIC_POLARITY_IGNORED:
2694 case KVM_CAP_TSC_DEADLINE_TIMER:
2695 case KVM_CAP_ENABLE_CAP_VM:
2696 case KVM_CAP_DISABLE_QUIRKS:
2697 case KVM_CAP_SET_BOOT_CPU_ID:
2698 case KVM_CAP_SPLIT_IRQCHIP:
2699 case KVM_CAP_IMMEDIATE_EXIT:
2700 r = 1;
2701 break;
2702 case KVM_CAP_ADJUST_CLOCK:
2703 r = KVM_CLOCK_TSC_STABLE;
2704 break;
2705 case KVM_CAP_X86_GUEST_MWAIT:
2706 r = kvm_mwait_in_guest();
2707 break;
2708 case KVM_CAP_X86_SMM:
2709
2710
2711
2712
2713
2714
2715
2716
2717 r = kvm_x86_ops->cpu_has_high_real_mode_segbase();
2718 break;
2719 case KVM_CAP_VAPIC:
2720 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
2721 break;
2722 case KVM_CAP_NR_VCPUS:
2723 r = KVM_SOFT_MAX_VCPUS;
2724 break;
2725 case KVM_CAP_MAX_VCPUS:
2726 r = KVM_MAX_VCPUS;
2727 break;
2728 case KVM_CAP_NR_MEMSLOTS:
2729 r = KVM_USER_MEM_SLOTS;
2730 break;
2731 case KVM_CAP_PV_MMU:
2732 r = 0;
2733 break;
2734 case KVM_CAP_MCE:
2735 r = KVM_MAX_MCE_BANKS;
2736 break;
2737 case KVM_CAP_XCRS:
2738 r = boot_cpu_has(X86_FEATURE_XSAVE);
2739 break;
2740 case KVM_CAP_TSC_CONTROL:
2741 r = kvm_has_tsc_control;
2742 break;
2743 case KVM_CAP_X2APIC_API:
2744 r = KVM_X2APIC_API_VALID_FLAGS;
2745 break;
2746 default:
2747 r = 0;
2748 break;
2749 }
2750 return r;
2751
2752}
2753
2754long kvm_arch_dev_ioctl(struct file *filp,
2755 unsigned int ioctl, unsigned long arg)
2756{
2757 void __user *argp = (void __user *)arg;
2758 long r;
2759
2760 switch (ioctl) {
2761 case KVM_GET_MSR_INDEX_LIST: {
2762 struct kvm_msr_list __user *user_msr_list = argp;
2763 struct kvm_msr_list msr_list;
2764 unsigned n;
2765
2766 r = -EFAULT;
2767 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
2768 goto out;
2769 n = msr_list.nmsrs;
2770 msr_list.nmsrs = num_msrs_to_save + num_emulated_msrs;
2771 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
2772 goto out;
2773 r = -E2BIG;
2774 if (n < msr_list.nmsrs)
2775 goto out;
2776 r = -EFAULT;
2777 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
2778 num_msrs_to_save * sizeof(u32)))
2779 goto out;
2780 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
2781 &emulated_msrs,
2782 num_emulated_msrs * sizeof(u32)))
2783 goto out;
2784 r = 0;
2785 break;
2786 }
2787 case KVM_GET_SUPPORTED_CPUID:
2788 case KVM_GET_EMULATED_CPUID: {
2789 struct kvm_cpuid2 __user *cpuid_arg = argp;
2790 struct kvm_cpuid2 cpuid;
2791
2792 r = -EFAULT;
2793 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
2794 goto out;
2795
2796 r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
2797 ioctl);
2798 if (r)
2799 goto out;
2800
2801 r = -EFAULT;
2802 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
2803 goto out;
2804 r = 0;
2805 break;
2806 }
2807 case KVM_X86_GET_MCE_CAP_SUPPORTED: {
2808 r = -EFAULT;
2809 if (copy_to_user(argp, &kvm_mce_cap_supported,
2810 sizeof(kvm_mce_cap_supported)))
2811 goto out;
2812 r = 0;
2813 break;
2814 }
2815 default:
2816 r = -EINVAL;
2817 }
2818out:
2819 return r;
2820}
2821
2822static void wbinvd_ipi(void *garbage)
2823{
2824 wbinvd();
2825}
2826
2827static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
2828{
2829 return kvm_arch_has_noncoherent_dma(vcpu->kvm);
2830}
2831
2832void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2833{
2834
2835 if (need_emulate_wbinvd(vcpu)) {
2836 if (kvm_x86_ops->has_wbinvd_exit())
2837 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
2838 else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
2839 smp_call_function_single(vcpu->cpu,
2840 wbinvd_ipi, NULL, 1);
2841 }
2842
2843 kvm_x86_ops->vcpu_load(vcpu, cpu);
2844
2845
2846 if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
2847 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
2848 vcpu->arch.tsc_offset_adjustment = 0;
2849 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2850 }
2851
2852 if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
2853 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
2854 rdtsc() - vcpu->arch.last_host_tsc;
2855 if (tsc_delta < 0)
2856 mark_tsc_unstable("KVM discovered backwards TSC");
2857
2858 if (check_tsc_unstable()) {
2859 u64 offset = kvm_compute_tsc_offset(vcpu,
2860 vcpu->arch.last_guest_tsc);
2861 kvm_vcpu_write_tsc_offset(vcpu, offset);
2862 vcpu->arch.tsc_catchup = 1;
2863 }
2864
2865 if (kvm_lapic_hv_timer_in_use(vcpu))
2866 kvm_lapic_restart_hv_timer(vcpu);
2867
2868
2869
2870
2871
2872 if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
2873 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
2874 if (vcpu->cpu != cpu)
2875 kvm_make_request(KVM_REQ_MIGRATE_TIMER, vcpu);
2876 vcpu->cpu = cpu;
2877 }
2878
2879 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
2880}
2881
2882static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
2883{
2884 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
2885 return;
2886
2887 vcpu->arch.st.steal.preempted = 1;
2888
2889 kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime,
2890 &vcpu->arch.st.steal.preempted,
2891 offsetof(struct kvm_steal_time, preempted),
2892 sizeof(vcpu->arch.st.steal.preempted));
2893}
2894
2895void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2896{
2897 int idx;
2898
2899 if (vcpu->preempted)
2900 vcpu->arch.preempted_in_kernel = !kvm_x86_ops->get_cpl(vcpu);
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910 pagefault_disable();
2911
2912
2913
2914
2915 idx = srcu_read_lock(&vcpu->kvm->srcu);
2916 kvm_steal_time_set_preempted(vcpu);
2917 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2918 pagefault_enable();
2919 kvm_x86_ops->vcpu_put(vcpu);
2920 kvm_put_guest_fpu(vcpu);
2921 vcpu->arch.last_host_tsc = rdtsc();
2922}
2923
2924static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
2925 struct kvm_lapic_state *s)
2926{
2927 if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
2928 kvm_x86_ops->sync_pir_to_irr(vcpu);
2929
2930 return kvm_apic_get_state(vcpu, s);
2931}
2932
2933static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
2934 struct kvm_lapic_state *s)
2935{
2936 int r;
2937
2938 r = kvm_apic_set_state(vcpu, s);
2939 if (r)
2940 return r;
2941 update_cr8_intercept(vcpu);
2942
2943 return 0;
2944}
2945
2946static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
2947{
2948 return (!lapic_in_kernel(vcpu) ||
2949 kvm_apic_accept_pic_intr(vcpu));
2950}
2951
2952
2953
2954
2955
2956
2957
2958static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
2959{
2960 return kvm_arch_interrupt_allowed(vcpu) &&
2961 !kvm_cpu_has_interrupt(vcpu) &&
2962 !kvm_event_needs_reinjection(vcpu) &&
2963 kvm_cpu_accept_dm_intr(vcpu);
2964}
2965
2966static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
2967 struct kvm_interrupt *irq)
2968{
2969 if (irq->irq >= KVM_NR_INTERRUPTS)
2970 return -EINVAL;
2971
2972 if (!irqchip_in_kernel(vcpu->kvm)) {
2973 kvm_queue_interrupt(vcpu, irq->irq, false);
2974 kvm_make_request(KVM_REQ_EVENT, vcpu);
2975 return 0;
2976 }
2977
2978
2979
2980
2981
2982 if (pic_in_kernel(vcpu->kvm))
2983 return -ENXIO;
2984
2985 if (vcpu->arch.pending_external_vector != -1)
2986 return -EEXIST;
2987
2988 vcpu->arch.pending_external_vector = irq->irq;
2989 kvm_make_request(KVM_REQ_EVENT, vcpu);
2990 return 0;
2991}
2992
2993static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
2994{
2995 kvm_inject_nmi(vcpu);
2996
2997 return 0;
2998}
2999
3000static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu)
3001{
3002 kvm_make_request(KVM_REQ_SMI, vcpu);
3003
3004 return 0;
3005}
3006
3007static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
3008 struct kvm_tpr_access_ctl *tac)
3009{
3010 if (tac->flags)
3011 return -EINVAL;
3012 vcpu->arch.tpr_access_reporting = !!tac->enabled;
3013 return 0;
3014}
3015
3016static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
3017 u64 mcg_cap)
3018{
3019 int r;
3020 unsigned bank_num = mcg_cap & 0xff, bank;
3021
3022 r = -EINVAL;
3023 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
3024 goto out;
3025 if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000))
3026 goto out;
3027 r = 0;
3028 vcpu->arch.mcg_cap = mcg_cap;
3029
3030 if (mcg_cap & MCG_CTL_P)
3031 vcpu->arch.mcg_ctl = ~(u64)0;
3032
3033 for (bank = 0; bank < bank_num; bank++)
3034 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
3035
3036 if (kvm_x86_ops->setup_mce)
3037 kvm_x86_ops->setup_mce(vcpu);
3038out:
3039 return r;
3040}
3041
3042static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
3043 struct kvm_x86_mce *mce)
3044{
3045 u64 mcg_cap = vcpu->arch.mcg_cap;
3046 unsigned bank_num = mcg_cap & 0xff;
3047 u64 *banks = vcpu->arch.mce_banks;
3048
3049 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
3050 return -EINVAL;
3051
3052
3053
3054
3055 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
3056 vcpu->arch.mcg_ctl != ~(u64)0)
3057 return 0;
3058 banks += 4 * mce->bank;
3059
3060
3061
3062
3063 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
3064 return 0;
3065 if (mce->status & MCI_STATUS_UC) {
3066 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
3067 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
3068 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
3069 return 0;
3070 }
3071 if (banks[1] & MCI_STATUS_VAL)
3072 mce->status |= MCI_STATUS_OVER;
3073 banks[2] = mce->addr;
3074 banks[3] = mce->misc;
3075 vcpu->arch.mcg_status = mce->mcg_status;
3076 banks[1] = mce->status;
3077 kvm_queue_exception(vcpu, MC_VECTOR);
3078 } else if (!(banks[1] & MCI_STATUS_VAL)
3079 || !(banks[1] & MCI_STATUS_UC)) {
3080 if (banks[1] & MCI_STATUS_VAL)
3081 mce->status |= MCI_STATUS_OVER;
3082 banks[2] = mce->addr;
3083 banks[3] = mce->misc;
3084 banks[1] = mce->status;
3085 } else
3086 banks[1] |= MCI_STATUS_OVER;
3087 return 0;
3088}
3089
3090static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
3091 struct kvm_vcpu_events *events)
3092{
3093 process_nmi(vcpu);
3094
3095
3096
3097
3098
3099 events->exception.injected =
3100 (vcpu->arch.exception.pending ||
3101 vcpu->arch.exception.injected) &&
3102 !kvm_exception_is_soft(vcpu->arch.exception.nr);
3103 events->exception.nr = vcpu->arch.exception.nr;
3104 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
3105 events->exception.pad = 0;
3106 events->exception.error_code = vcpu->arch.exception.error_code;
3107
3108 events->interrupt.injected =
3109 vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft;
3110 events->interrupt.nr = vcpu->arch.interrupt.nr;
3111 events->interrupt.soft = 0;
3112 events->interrupt.shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
3113
3114 events->nmi.injected = vcpu->arch.nmi_injected;
3115 events->nmi.pending = vcpu->arch.nmi_pending != 0;
3116 events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
3117 events->nmi.pad = 0;
3118
3119 events->sipi_vector = 0;
3120
3121 events->smi.smm = is_smm(vcpu);
3122 events->smi.pending = vcpu->arch.smi_pending;
3123 events->smi.smm_inside_nmi =
3124 !!(vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK);
3125 events->smi.latched_init = kvm_lapic_latched_init(vcpu);
3126
3127 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
3128 | KVM_VCPUEVENT_VALID_SHADOW
3129 | KVM_VCPUEVENT_VALID_SMM);
3130 memset(&events->reserved, 0, sizeof(events->reserved));
3131}
3132
3133static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags);
3134
3135static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
3136 struct kvm_vcpu_events *events)
3137{
3138 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
3139 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
3140 | KVM_VCPUEVENT_VALID_SHADOW
3141 | KVM_VCPUEVENT_VALID_SMM))
3142 return -EINVAL;
3143
3144 if (events->exception.injected &&
3145 (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR ||
3146 is_guest_mode(vcpu)))
3147 return -EINVAL;
3148
3149
3150 if (events->flags & KVM_VCPUEVENT_VALID_SMM &&
3151 (events->smi.smm || events->smi.pending) &&
3152 vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
3153 return -EINVAL;
3154
3155 process_nmi(vcpu);
3156 vcpu->arch.exception.injected = false;
3157 vcpu->arch.exception.pending = events->exception.injected;
3158 vcpu->arch.exception.nr = events->exception.nr;
3159 vcpu->arch.exception.has_error_code = events->exception.has_error_code;
3160 vcpu->arch.exception.error_code = events->exception.error_code;
3161
3162 vcpu->arch.interrupt.pending = events->interrupt.injected;
3163 vcpu->arch.interrupt.nr = events->interrupt.nr;
3164 vcpu->arch.interrupt.soft = events->interrupt.soft;
3165 if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
3166 kvm_x86_ops->set_interrupt_shadow(vcpu,
3167 events->interrupt.shadow);
3168
3169 vcpu->arch.nmi_injected = events->nmi.injected;
3170 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
3171 vcpu->arch.nmi_pending = events->nmi.pending;
3172 kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
3173
3174 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
3175 lapic_in_kernel(vcpu))
3176 vcpu->arch.apic->sipi_vector = events->sipi_vector;
3177
3178 if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
3179 u32 hflags = vcpu->arch.hflags;
3180 if (events->smi.smm)
3181 hflags |= HF_SMM_MASK;
3182 else
3183 hflags &= ~HF_SMM_MASK;
3184 kvm_set_hflags(vcpu, hflags);
3185
3186 vcpu->arch.smi_pending = events->smi.pending;
3187
3188 if (events->smi.smm) {
3189 if (events->smi.smm_inside_nmi)
3190 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
3191 else
3192 vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
3193 if (lapic_in_kernel(vcpu)) {
3194 if (events->smi.latched_init)
3195 set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
3196 else
3197 clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
3198 }
3199 }
3200 }
3201
3202 kvm_make_request(KVM_REQ_EVENT, vcpu);
3203
3204 return 0;
3205}
3206
3207static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
3208 struct kvm_debugregs *dbgregs)
3209{
3210 unsigned long val;
3211
3212 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
3213 kvm_get_dr(vcpu, 6, &val);
3214 dbgregs->dr6 = val;
3215 dbgregs->dr7 = vcpu->arch.dr7;
3216 dbgregs->flags = 0;
3217 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
3218}
3219
3220static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
3221 struct kvm_debugregs *dbgregs)
3222{
3223 if (dbgregs->flags)
3224 return -EINVAL;
3225
3226 if (dbgregs->dr6 & ~0xffffffffull)
3227 return -EINVAL;
3228 if (dbgregs->dr7 & ~0xffffffffull)
3229 return -EINVAL;
3230
3231 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
3232 kvm_update_dr0123(vcpu);
3233 vcpu->arch.dr6 = dbgregs->dr6;
3234 kvm_update_dr6(vcpu);
3235 vcpu->arch.dr7 = dbgregs->dr7;
3236 kvm_update_dr7(vcpu);
3237
3238 return 0;
3239}
3240
3241#define XSTATE_COMPACTION_ENABLED (1ULL << 63)
3242
3243static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
3244{
3245 struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave;
3246 u64 xstate_bv = xsave->header.xfeatures;
3247 u64 valid;
3248
3249
3250
3251
3252
3253 memcpy(dest, xsave, XSAVE_HDR_OFFSET);
3254
3255
3256 xstate_bv &= vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE;
3257 *(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv;
3258
3259
3260
3261
3262
3263 valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
3264 while (valid) {
3265 u64 feature = valid & -valid;
3266 int index = fls64(feature) - 1;
3267 void *src = get_xsave_addr(xsave, feature);
3268
3269 if (src) {
3270 u32 size, offset, ecx, edx;
3271 cpuid_count(XSTATE_CPUID, index,
3272 &size, &offset, &ecx, &edx);
3273 if (feature == XFEATURE_MASK_PKRU)
3274 memcpy(dest + offset, &vcpu->arch.pkru,
3275 sizeof(vcpu->arch.pkru));
3276 else
3277 memcpy(dest + offset, src, size);
3278
3279 }
3280
3281 valid -= feature;
3282 }
3283}
3284
3285static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
3286{
3287 struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave;
3288 u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
3289 u64 valid;
3290
3291
3292
3293
3294
3295 memcpy(xsave, src, XSAVE_HDR_OFFSET);
3296
3297
3298 xsave->header.xfeatures = xstate_bv;
3299 if (boot_cpu_has(X86_FEATURE_XSAVES))
3300 xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
3301
3302
3303
3304
3305
3306 valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
3307 while (valid) {
3308 u64 feature = valid & -valid;
3309 int index = fls64(feature) - 1;
3310 void *dest = get_xsave_addr(xsave, feature);
3311
3312 if (dest) {
3313 u32 size, offset, ecx, edx;
3314 cpuid_count(XSTATE_CPUID, index,
3315 &size, &offset, &ecx, &edx);
3316 if (feature == XFEATURE_MASK_PKRU)
3317 memcpy(&vcpu->arch.pkru, src + offset,
3318 sizeof(vcpu->arch.pkru));
3319 else
3320 memcpy(dest, src + offset, size);
3321 }
3322
3323 valid -= feature;
3324 }
3325}
3326
3327static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
3328 struct kvm_xsave *guest_xsave)
3329{
3330 if (boot_cpu_has(X86_FEATURE_XSAVE)) {
3331 memset(guest_xsave, 0, sizeof(struct kvm_xsave));
3332 fill_xsave((u8 *) guest_xsave->region, vcpu);
3333 } else {
3334 memcpy(guest_xsave->region,
3335 &vcpu->arch.guest_fpu.state.fxsave,
3336 sizeof(struct fxregs_state));
3337 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
3338 XFEATURE_MASK_FPSSE;
3339 }
3340}
3341
3342#define XSAVE_MXCSR_OFFSET 24
3343
3344static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
3345 struct kvm_xsave *guest_xsave)
3346{
3347 u64 xstate_bv =
3348 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
3349 u32 mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)];
3350
3351 if (boot_cpu_has(X86_FEATURE_XSAVE)) {
3352
3353
3354
3355
3356
3357 if (xstate_bv & ~kvm_supported_xcr0() ||
3358 mxcsr & ~mxcsr_feature_mask)
3359 return -EINVAL;
3360 load_xsave(vcpu, (u8 *)guest_xsave->region);
3361 } else {
3362 if (xstate_bv & ~XFEATURE_MASK_FPSSE ||
3363 mxcsr & ~mxcsr_feature_mask)
3364 return -EINVAL;
3365 memcpy(&vcpu->arch.guest_fpu.state.fxsave,
3366 guest_xsave->region, sizeof(struct fxregs_state));
3367 }
3368 return 0;
3369}
3370
3371static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
3372 struct kvm_xcrs *guest_xcrs)
3373{
3374 if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
3375 guest_xcrs->nr_xcrs = 0;
3376 return;
3377 }
3378
3379 guest_xcrs->nr_xcrs = 1;
3380 guest_xcrs->flags = 0;
3381 guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
3382 guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
3383}
3384
3385static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
3386 struct kvm_xcrs *guest_xcrs)
3387{
3388 int i, r = 0;
3389
3390 if (!boot_cpu_has(X86_FEATURE_XSAVE))
3391 return -EINVAL;
3392
3393 if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
3394 return -EINVAL;
3395
3396 for (i = 0; i < guest_xcrs->nr_xcrs; i++)
3397
3398 if (guest_xcrs->xcrs[i].xcr == XCR_XFEATURE_ENABLED_MASK) {
3399 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
3400 guest_xcrs->xcrs[i].value);
3401 break;
3402 }
3403 if (r)
3404 r = -EINVAL;
3405 return r;
3406}
3407
3408
3409
3410
3411
3412
3413
3414static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
3415{
3416 if (!vcpu->arch.pv_time_enabled)
3417 return -EINVAL;
3418 vcpu->arch.pvclock_set_guest_stopped_request = true;
3419 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
3420 return 0;
3421}
3422
3423static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3424 struct kvm_enable_cap *cap)
3425{
3426 if (cap->flags)
3427 return -EINVAL;
3428
3429 switch (cap->cap) {
3430 case KVM_CAP_HYPERV_SYNIC2:
3431 if (cap->args[0])
3432 return -EINVAL;
3433 case KVM_CAP_HYPERV_SYNIC:
3434 if (!irqchip_in_kernel(vcpu->kvm))
3435 return -EINVAL;
3436 return kvm_hv_activate_synic(vcpu, cap->cap ==
3437 KVM_CAP_HYPERV_SYNIC2);
3438 default:
3439 return -EINVAL;
3440 }
3441}
3442
3443long kvm_arch_vcpu_ioctl(struct file *filp,
3444 unsigned int ioctl, unsigned long arg)
3445{
3446 struct kvm_vcpu *vcpu = filp->private_data;
3447 void __user *argp = (void __user *)arg;
3448 int r;
3449 union {
3450 struct kvm_lapic_state *lapic;
3451 struct kvm_xsave *xsave;
3452 struct kvm_xcrs *xcrs;
3453 void *buffer;
3454 } u;
3455
3456 u.buffer = NULL;
3457 switch (ioctl) {
3458 case KVM_GET_LAPIC: {
3459 r = -EINVAL;
3460 if (!lapic_in_kernel(vcpu))
3461 goto out;
3462 u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
3463
3464 r = -ENOMEM;
3465 if (!u.lapic)
3466 goto out;
3467 r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);
3468 if (r)
3469 goto out;
3470 r = -EFAULT;
3471 if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))
3472 goto out;
3473 r = 0;
3474 break;
3475 }
3476 case KVM_SET_LAPIC: {
3477 r = -EINVAL;
3478 if (!lapic_in_kernel(vcpu))
3479 goto out;
3480 u.lapic = memdup_user(argp, sizeof(*u.lapic));
3481 if (IS_ERR(u.lapic))
3482 return PTR_ERR(u.lapic);
3483
3484 r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
3485 break;
3486 }
3487 case KVM_INTERRUPT: {
3488 struct kvm_interrupt irq;
3489
3490 r = -EFAULT;
3491 if (copy_from_user(&irq, argp, sizeof irq))
3492 goto out;
3493 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
3494 break;
3495 }
3496 case KVM_NMI: {
3497 r = kvm_vcpu_ioctl_nmi(vcpu);
3498 break;
3499 }
3500 case KVM_SMI: {
3501 r = kvm_vcpu_ioctl_smi(vcpu);
3502 break;
3503 }
3504 case KVM_SET_CPUID: {
3505 struct kvm_cpuid __user *cpuid_arg = argp;
3506 struct kvm_cpuid cpuid;
3507
3508 r = -EFAULT;
3509 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3510 goto out;
3511 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
3512 break;
3513 }
3514 case KVM_SET_CPUID2: {
3515 struct kvm_cpuid2 __user *cpuid_arg = argp;
3516 struct kvm_cpuid2 cpuid;
3517
3518 r = -EFAULT;
3519 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3520 goto out;
3521 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
3522 cpuid_arg->entries);
3523 break;
3524 }
3525 case KVM_GET_CPUID2: {
3526 struct kvm_cpuid2 __user *cpuid_arg = argp;
3527 struct kvm_cpuid2 cpuid;
3528
3529 r = -EFAULT;
3530 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
3531 goto out;
3532 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
3533 cpuid_arg->entries);
3534 if (r)
3535 goto out;
3536 r = -EFAULT;
3537 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
3538 goto out;
3539 r = 0;
3540 break;
3541 }
3542 case KVM_GET_MSRS:
3543 r = msr_io(vcpu, argp, do_get_msr, 1);
3544 break;
3545 case KVM_SET_MSRS:
3546 r = msr_io(vcpu, argp, do_set_msr, 0);
3547 break;
3548 case KVM_TPR_ACCESS_REPORTING: {
3549 struct kvm_tpr_access_ctl tac;
3550
3551 r = -EFAULT;
3552 if (copy_from_user(&tac, argp, sizeof tac))
3553 goto out;
3554 r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
3555 if (r)
3556 goto out;
3557 r = -EFAULT;
3558 if (copy_to_user(argp, &tac, sizeof tac))
3559 goto out;
3560 r = 0;
3561 break;
3562 };
3563 case KVM_SET_VAPIC_ADDR: {
3564 struct kvm_vapic_addr va;
3565 int idx;
3566
3567 r = -EINVAL;
3568 if (!lapic_in_kernel(vcpu))
3569 goto out;
3570 r = -EFAULT;
3571 if (copy_from_user(&va, argp, sizeof va))
3572 goto out;
3573 idx = srcu_read_lock(&vcpu->kvm->srcu);
3574 r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
3575 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3576 break;
3577 }
3578 case KVM_X86_SETUP_MCE: {
3579 u64 mcg_cap;
3580
3581 r = -EFAULT;
3582 if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
3583 goto out;
3584 r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
3585 break;
3586 }
3587 case KVM_X86_SET_MCE: {
3588 struct kvm_x86_mce mce;
3589
3590 r = -EFAULT;
3591 if (copy_from_user(&mce, argp, sizeof mce))
3592 goto out;
3593 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
3594 break;
3595 }
3596 case KVM_GET_VCPU_EVENTS: {
3597 struct kvm_vcpu_events events;
3598
3599 kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
3600
3601 r = -EFAULT;
3602 if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
3603 break;
3604 r = 0;
3605 break;
3606 }
3607 case KVM_SET_VCPU_EVENTS: {
3608 struct kvm_vcpu_events events;
3609
3610 r = -EFAULT;
3611 if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
3612 break;
3613
3614 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
3615 break;
3616 }
3617 case KVM_GET_DEBUGREGS: {
3618 struct kvm_debugregs dbgregs;
3619
3620 kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
3621
3622 r = -EFAULT;
3623 if (copy_to_user(argp, &dbgregs,
3624 sizeof(struct kvm_debugregs)))
3625 break;
3626 r = 0;
3627 break;
3628 }
3629 case KVM_SET_DEBUGREGS: {
3630 struct kvm_debugregs dbgregs;
3631
3632 r = -EFAULT;
3633 if (copy_from_user(&dbgregs, argp,
3634 sizeof(struct kvm_debugregs)))
3635 break;
3636
3637 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
3638 break;
3639 }
3640 case KVM_GET_XSAVE: {
3641 u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
3642 r = -ENOMEM;
3643 if (!u.xsave)
3644 break;
3645
3646 kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
3647
3648 r = -EFAULT;
3649 if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
3650 break;
3651 r = 0;
3652 break;
3653 }
3654 case KVM_SET_XSAVE: {
3655 u.xsave = memdup_user(argp, sizeof(*u.xsave));
3656 if (IS_ERR(u.xsave))
3657 return PTR_ERR(u.xsave);
3658
3659 r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
3660 break;
3661 }
3662 case KVM_GET_XCRS: {
3663 u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
3664 r = -ENOMEM;
3665 if (!u.xcrs)
3666 break;
3667
3668 kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
3669
3670 r = -EFAULT;
3671 if (copy_to_user(argp, u.xcrs,
3672 sizeof(struct kvm_xcrs)))
3673 break;
3674 r = 0;
3675 break;
3676 }
3677 case KVM_SET_XCRS: {
3678 u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
3679 if (IS_ERR(u.xcrs))
3680 return PTR_ERR(u.xcrs);
3681
3682 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
3683 break;
3684 }
3685 case KVM_SET_TSC_KHZ: {
3686 u32 user_tsc_khz;
3687
3688 r = -EINVAL;
3689 user_tsc_khz = (u32)arg;
3690
3691 if (user_tsc_khz >= kvm_max_guest_tsc_khz)
3692 goto out;
3693
3694 if (user_tsc_khz == 0)
3695 user_tsc_khz = tsc_khz;
3696
3697 if (!kvm_set_tsc_khz(vcpu, user_tsc_khz))
3698 r = 0;
3699
3700 goto out;
3701 }
3702 case KVM_GET_TSC_KHZ: {
3703 r = vcpu->arch.virtual_tsc_khz;
3704 goto out;
3705 }
3706 case KVM_KVMCLOCK_CTRL: {
3707 r = kvm_set_guest_paused(vcpu);
3708 goto out;
3709 }
3710 case KVM_ENABLE_CAP: {
3711 struct kvm_enable_cap cap;
3712
3713 r = -EFAULT;
3714 if (copy_from_user(&cap, argp, sizeof(cap)))
3715 goto out;
3716 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3717 break;
3718 }
3719 default:
3720 r = -EINVAL;
3721 }
3722out:
3723 kfree(u.buffer);
3724 return r;
3725}
3726
3727int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3728{
3729 return VM_FAULT_SIGBUS;
3730}
3731
3732static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
3733{
3734 int ret;
3735
3736 if (addr > (unsigned int)(-3 * PAGE_SIZE))
3737 return -EINVAL;
3738 ret = kvm_x86_ops->set_tss_addr(kvm, addr);
3739 return ret;
3740}
3741
3742static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
3743 u64 ident_addr)
3744{
3745 kvm->arch.ept_identity_map_addr = ident_addr;
3746 return 0;
3747}
3748
3749static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
3750 u32 kvm_nr_mmu_pages)
3751{
3752 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
3753 return -EINVAL;
3754
3755 mutex_lock(&kvm->slots_lock);
3756
3757 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
3758 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
3759
3760 mutex_unlock(&kvm->slots_lock);
3761 return 0;
3762}
3763
3764static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
3765{
3766 return kvm->arch.n_max_mmu_pages;
3767}
3768
3769static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
3770{
3771 struct kvm_pic *pic = kvm->arch.vpic;
3772 int r;
3773
3774 r = 0;
3775 switch (chip->chip_id) {
3776 case KVM_IRQCHIP_PIC_MASTER:
3777 memcpy(&chip->chip.pic, &pic->pics[0],
3778 sizeof(struct kvm_pic_state));
3779 break;
3780 case KVM_IRQCHIP_PIC_SLAVE:
3781 memcpy(&chip->chip.pic, &pic->pics[1],
3782 sizeof(struct kvm_pic_state));
3783 break;
3784 case KVM_IRQCHIP_IOAPIC:
3785 kvm_get_ioapic(kvm, &chip->chip.ioapic);
3786 break;
3787 default:
3788 r = -EINVAL;
3789 break;
3790 }
3791 return r;
3792}
3793
3794static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
3795{
3796 struct kvm_pic *pic = kvm->arch.vpic;
3797 int r;
3798
3799 r = 0;
3800 switch (chip->chip_id) {
3801 case KVM_IRQCHIP_PIC_MASTER:
3802 spin_lock(&pic->lock);
3803 memcpy(&pic->pics[0], &chip->chip.pic,
3804 sizeof(struct kvm_pic_state));
3805 spin_unlock(&pic->lock);
3806 break;
3807 case KVM_IRQCHIP_PIC_SLAVE:
3808 spin_lock(&pic->lock);
3809 memcpy(&pic->pics[1], &chip->chip.pic,
3810 sizeof(struct kvm_pic_state));
3811 spin_unlock(&pic->lock);
3812 break;
3813 case KVM_IRQCHIP_IOAPIC:
3814 kvm_set_ioapic(kvm, &chip->chip.ioapic);
3815 break;
3816 default:
3817 r = -EINVAL;
3818 break;
3819 }
3820 kvm_pic_update_irq(pic);
3821 return r;
3822}
3823
3824static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
3825{
3826 struct kvm_kpit_state *kps = &kvm->arch.vpit->pit_state;
3827
3828 BUILD_BUG_ON(sizeof(*ps) != sizeof(kps->channels));
3829
3830 mutex_lock(&kps->lock);
3831 memcpy(ps, &kps->channels, sizeof(*ps));
3832 mutex_unlock(&kps->lock);
3833 return 0;
3834}
3835
3836static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
3837{
3838 int i;
3839 struct kvm_pit *pit = kvm->arch.vpit;
3840
3841 mutex_lock(&pit->pit_state.lock);
3842 memcpy(&pit->pit_state.channels, ps, sizeof(*ps));
3843 for (i = 0; i < 3; i++)
3844 kvm_pit_load_count(pit, i, ps->channels[i].count, 0);
3845 mutex_unlock(&pit->pit_state.lock);
3846 return 0;
3847}
3848
3849static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
3850{
3851 mutex_lock(&kvm->arch.vpit->pit_state.lock);
3852 memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
3853 sizeof(ps->channels));
3854 ps->flags = kvm->arch.vpit->pit_state.flags;
3855 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
3856 memset(&ps->reserved, 0, sizeof(ps->reserved));
3857 return 0;
3858}
3859
3860static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
3861{
3862 int start = 0;
3863 int i;
3864 u32 prev_legacy, cur_legacy;
3865 struct kvm_pit *pit = kvm->arch.vpit;
3866
3867 mutex_lock(&pit->pit_state.lock);
3868 prev_legacy = pit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
3869 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
3870 if (!prev_legacy && cur_legacy)
3871 start = 1;
3872 memcpy(&pit->pit_state.channels, &ps->channels,
3873 sizeof(pit->pit_state.channels));
3874 pit->pit_state.flags = ps->flags;
3875 for (i = 0; i < 3; i++)
3876 kvm_pit_load_count(pit, i, pit->pit_state.channels[i].count,
3877 start && i == 0);
3878 mutex_unlock(&pit->pit_state.lock);
3879 return 0;
3880}
3881
3882static int kvm_vm_ioctl_reinject(struct kvm *kvm,
3883 struct kvm_reinject_control *control)
3884{
3885 struct kvm_pit *pit = kvm->arch.vpit;
3886
3887 if (!pit)
3888 return -ENXIO;
3889
3890
3891
3892
3893
3894 mutex_lock(&pit->pit_state.lock);
3895 kvm_pit_set_reinject(pit, control->pit_reinject);
3896 mutex_unlock(&pit->pit_state.lock);
3897
3898 return 0;
3899}
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
3921{
3922 bool is_dirty = false;
3923 int r;
3924
3925 mutex_lock(&kvm->slots_lock);
3926
3927
3928
3929
3930 if (kvm_x86_ops->flush_log_dirty)
3931 kvm_x86_ops->flush_log_dirty(kvm);
3932
3933 r = kvm_get_dirty_log_protect(kvm, log, &is_dirty);
3934
3935
3936
3937
3938
3939 lockdep_assert_held(&kvm->slots_lock);
3940 if (is_dirty)
3941 kvm_flush_remote_tlbs(kvm);
3942
3943 mutex_unlock(&kvm->slots_lock);
3944 return r;
3945}
3946
3947int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
3948 bool line_status)
3949{
3950 if (!irqchip_in_kernel(kvm))
3951 return -ENXIO;
3952
3953 irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
3954 irq_event->irq, irq_event->level,
3955 line_status);
3956 return 0;
3957}
3958
3959static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
3960 struct kvm_enable_cap *cap)
3961{
3962 int r;
3963
3964 if (cap->flags)
3965 return -EINVAL;
3966
3967 switch (cap->cap) {
3968 case KVM_CAP_DISABLE_QUIRKS:
3969 kvm->arch.disabled_quirks = cap->args[0];
3970 r = 0;
3971 break;
3972 case KVM_CAP_SPLIT_IRQCHIP: {
3973 mutex_lock(&kvm->lock);
3974 r = -EINVAL;
3975 if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS)
3976 goto split_irqchip_unlock;
3977 r = -EEXIST;
3978 if (irqchip_in_kernel(kvm))
3979 goto split_irqchip_unlock;
3980 if (kvm->created_vcpus)
3981 goto split_irqchip_unlock;
3982 r = kvm_setup_empty_irq_routing(kvm);
3983 if (r)
3984 goto split_irqchip_unlock;
3985
3986 smp_wmb();
3987 kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT;
3988 kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
3989 r = 0;
3990split_irqchip_unlock:
3991 mutex_unlock(&kvm->lock);
3992 break;
3993 }
3994 case KVM_CAP_X2APIC_API:
3995 r = -EINVAL;
3996 if (cap->args[0] & ~KVM_X2APIC_API_VALID_FLAGS)
3997 break;
3998
3999 if (cap->args[0] & KVM_X2APIC_API_USE_32BIT_IDS)
4000 kvm->arch.x2apic_format = true;
4001 if (cap->args[0] & KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
4002 kvm->arch.x2apic_broadcast_quirk_disabled = true;
4003
4004 r = 0;
4005 break;
4006 default:
4007 r = -EINVAL;
4008 break;
4009 }
4010 return r;
4011}
4012
4013long kvm_arch_vm_ioctl(struct file *filp,
4014 unsigned int ioctl, unsigned long arg)
4015{
4016 struct kvm *kvm = filp->private_data;
4017 void __user *argp = (void __user *)arg;
4018 int r = -ENOTTY;
4019
4020
4021
4022
4023
4024 union {
4025 struct kvm_pit_state ps;
4026 struct kvm_pit_state2 ps2;
4027 struct kvm_pit_config pit_config;
4028 } u;
4029
4030 switch (ioctl) {
4031 case KVM_SET_TSS_ADDR:
4032 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
4033 break;
4034 case KVM_SET_IDENTITY_MAP_ADDR: {
4035 u64 ident_addr;
4036
4037 r = -EFAULT;
4038 if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
4039 goto out;
4040 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
4041 break;
4042 }
4043 case KVM_SET_NR_MMU_PAGES:
4044 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
4045 break;
4046 case KVM_GET_NR_MMU_PAGES:
4047 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
4048 break;
4049 case KVM_CREATE_IRQCHIP: {
4050 mutex_lock(&kvm->lock);
4051
4052 r = -EEXIST;
4053 if (irqchip_in_kernel(kvm))
4054 goto create_irqchip_unlock;
4055
4056 r = -EINVAL;
4057 if (kvm->created_vcpus)
4058 goto create_irqchip_unlock;
4059
4060 r = kvm_pic_init(kvm);
4061 if (r)
4062 goto create_irqchip_unlock;
4063
4064 r = kvm_ioapic_init(kvm);
4065 if (r) {
4066 kvm_pic_destroy(kvm);
4067 goto create_irqchip_unlock;
4068 }
4069
4070 r = kvm_setup_default_irq_routing(kvm);
4071 if (r) {
4072 kvm_ioapic_destroy(kvm);
4073 kvm_pic_destroy(kvm);
4074 goto create_irqchip_unlock;
4075 }
4076
4077 smp_wmb();
4078 kvm->arch.irqchip_mode = KVM_IRQCHIP_KERNEL;
4079 create_irqchip_unlock:
4080 mutex_unlock(&kvm->lock);
4081 break;
4082 }
4083 case KVM_CREATE_PIT:
4084 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
4085 goto create_pit;
4086 case KVM_CREATE_PIT2:
4087 r = -EFAULT;
4088 if (copy_from_user(&u.pit_config, argp,
4089 sizeof(struct kvm_pit_config)))
4090 goto out;
4091 create_pit:
4092 mutex_lock(&kvm->lock);
4093 r = -EEXIST;
4094 if (kvm->arch.vpit)
4095 goto create_pit_unlock;
4096 r = -ENOMEM;
4097 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
4098 if (kvm->arch.vpit)
4099 r = 0;
4100 create_pit_unlock:
4101 mutex_unlock(&kvm->lock);
4102 break;
4103 case KVM_GET_IRQCHIP: {
4104
4105 struct kvm_irqchip *chip;
4106
4107 chip = memdup_user(argp, sizeof(*chip));
4108 if (IS_ERR(chip)) {
4109 r = PTR_ERR(chip);
4110 goto out;
4111 }
4112
4113 r = -ENXIO;
4114 if (!irqchip_kernel(kvm))
4115 goto get_irqchip_out;
4116 r = kvm_vm_ioctl_get_irqchip(kvm, chip);
4117 if (r)
4118 goto get_irqchip_out;
4119 r = -EFAULT;
4120 if (copy_to_user(argp, chip, sizeof *chip))
4121 goto get_irqchip_out;
4122 r = 0;
4123 get_irqchip_out:
4124 kfree(chip);
4125 break;
4126 }
4127 case KVM_SET_IRQCHIP: {
4128
4129 struct kvm_irqchip *chip;
4130
4131 chip = memdup_user(argp, sizeof(*chip));
4132 if (IS_ERR(chip)) {
4133 r = PTR_ERR(chip);
4134 goto out;
4135 }
4136
4137 r = -ENXIO;
4138 if (!irqchip_kernel(kvm))
4139 goto set_irqchip_out;
4140 r = kvm_vm_ioctl_set_irqchip(kvm, chip);
4141 if (r)
4142 goto set_irqchip_out;
4143 r = 0;
4144 set_irqchip_out:
4145 kfree(chip);
4146 break;
4147 }
4148 case KVM_GET_PIT: {
4149 r = -EFAULT;
4150 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
4151 goto out;
4152 r = -ENXIO;
4153 if (!kvm->arch.vpit)
4154 goto out;
4155 r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
4156 if (r)
4157 goto out;
4158 r = -EFAULT;
4159 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
4160 goto out;
4161 r = 0;
4162 break;
4163 }
4164 case KVM_SET_PIT: {
4165 r = -EFAULT;
4166 if (copy_from_user(&u.ps, argp, sizeof u.ps))
4167 goto out;
4168 r = -ENXIO;
4169 if (!kvm->arch.vpit)
4170 goto out;
4171 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
4172 break;
4173 }
4174 case KVM_GET_PIT2: {
4175 r = -ENXIO;
4176 if (!kvm->arch.vpit)
4177 goto out;
4178 r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
4179 if (r)
4180 goto out;
4181 r = -EFAULT;
4182 if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
4183 goto out;
4184 r = 0;
4185 break;
4186 }
4187 case KVM_SET_PIT2: {
4188 r = -EFAULT;
4189 if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
4190 goto out;
4191 r = -ENXIO;
4192 if (!kvm->arch.vpit)
4193 goto out;
4194 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
4195 break;
4196 }
4197 case KVM_REINJECT_CONTROL: {
4198 struct kvm_reinject_control control;
4199 r = -EFAULT;
4200 if (copy_from_user(&control, argp, sizeof(control)))
4201 goto out;
4202 r = kvm_vm_ioctl_reinject(kvm, &control);
4203 break;
4204 }
4205 case KVM_SET_BOOT_CPU_ID:
4206 r = 0;
4207 mutex_lock(&kvm->lock);
4208 if (kvm->created_vcpus)
4209 r = -EBUSY;
4210 else
4211 kvm->arch.bsp_vcpu_id = arg;
4212 mutex_unlock(&kvm->lock);
4213 break;
4214 case KVM_XEN_HVM_CONFIG: {
4215 r = -EFAULT;
4216 if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
4217 sizeof(struct kvm_xen_hvm_config)))
4218 goto out;
4219 r = -EINVAL;
4220 if (kvm->arch.xen_hvm_config.flags)
4221 goto out;
4222 r = 0;
4223 break;
4224 }
4225 case KVM_SET_CLOCK: {
4226 struct kvm_clock_data user_ns;
4227 u64 now_ns;
4228
4229 r = -EFAULT;
4230 if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
4231 goto out;
4232
4233 r = -EINVAL;
4234 if (user_ns.flags)
4235 goto out;
4236
4237 r = 0;
4238
4239
4240
4241
4242
4243 kvm_gen_update_masterclock(kvm);
4244 now_ns = get_kvmclock_ns(kvm);
4245 kvm->arch.kvmclock_offset += user_ns.clock - now_ns;
4246 kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
4247 break;
4248 }
4249 case KVM_GET_CLOCK: {
4250 struct kvm_clock_data user_ns;
4251 u64 now_ns;
4252
4253 now_ns = get_kvmclock_ns(kvm);
4254 user_ns.clock = now_ns;
4255 user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
4256 memset(&user_ns.pad, 0, sizeof(user_ns.pad));
4257
4258 r = -EFAULT;
4259 if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
4260 goto out;
4261 r = 0;
4262 break;
4263 }
4264 case KVM_ENABLE_CAP: {
4265 struct kvm_enable_cap cap;
4266
4267 r = -EFAULT;
4268 if (copy_from_user(&cap, argp, sizeof(cap)))
4269 goto out;
4270 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
4271 break;
4272 }
4273 default:
4274 r = -ENOTTY;
4275 }
4276out:
4277 return r;
4278}
4279
4280static void kvm_init_msr_list(void)
4281{
4282 u32 dummy[2];
4283 unsigned i, j;
4284
4285 for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
4286 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
4287 continue;
4288
4289
4290
4291
4292
4293 switch (msrs_to_save[i]) {
4294 case MSR_IA32_BNDCFGS:
4295 if (!kvm_x86_ops->mpx_supported())
4296 continue;
4297 break;
4298 case MSR_TSC_AUX:
4299 if (!kvm_x86_ops->rdtscp_supported())
4300 continue;
4301 break;
4302 default:
4303 break;
4304 }
4305
4306 if (j < i)
4307 msrs_to_save[j] = msrs_to_save[i];
4308 j++;
4309 }
4310 num_msrs_to_save = j;
4311
4312 for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) {
4313 switch (emulated_msrs[i]) {
4314 case MSR_IA32_SMBASE:
4315 if (!kvm_x86_ops->cpu_has_high_real_mode_segbase())
4316 continue;
4317 break;
4318 default:
4319 break;
4320 }
4321
4322 if (j < i)
4323 emulated_msrs[j] = emulated_msrs[i];
4324 j++;
4325 }
4326 num_emulated_msrs = j;
4327}
4328
4329static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
4330 const void *v)
4331{
4332 int handled = 0;
4333 int n;
4334
4335 do {
4336 n = min(len, 8);
4337 if (!(lapic_in_kernel(vcpu) &&
4338 !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v))
4339 && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v))
4340 break;
4341 handled += n;
4342 addr += n;
4343 len -= n;
4344 v += n;
4345 } while (len);
4346
4347 return handled;
4348}
4349
4350static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
4351{
4352 int handled = 0;
4353 int n;
4354
4355 do {
4356 n = min(len, 8);
4357 if (!(lapic_in_kernel(vcpu) &&
4358 !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev,
4359 addr, n, v))
4360 && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
4361 break;
4362 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
4363 handled += n;
4364 addr += n;
4365 len -= n;
4366 v += n;
4367 } while (len);
4368
4369 return handled;
4370}
4371
4372static void kvm_set_segment(struct kvm_vcpu *vcpu,
4373 struct kvm_segment *var, int seg)
4374{
4375 kvm_x86_ops->set_segment(vcpu, var, seg);
4376}
4377
4378void kvm_get_segment(struct kvm_vcpu *vcpu,
4379 struct kvm_segment *var, int seg)
4380{
4381 kvm_x86_ops->get_segment(vcpu, var, seg);
4382}
4383
4384gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
4385 struct x86_exception *exception)
4386{
4387 gpa_t t_gpa;
4388
4389 BUG_ON(!mmu_is_nested(vcpu));
4390
4391
4392 access |= PFERR_USER_MASK;
4393 t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, exception);
4394
4395 return t_gpa;
4396}
4397
4398gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
4399 struct x86_exception *exception)
4400{
4401 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4402 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
4403}
4404
4405 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
4406 struct x86_exception *exception)
4407{
4408 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4409 access |= PFERR_FETCH_MASK;
4410 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
4411}
4412
4413gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
4414 struct x86_exception *exception)
4415{
4416 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4417 access |= PFERR_WRITE_MASK;
4418 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
4419}
4420
4421
4422gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
4423 struct x86_exception *exception)
4424{
4425 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
4426}
4427
4428static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
4429 struct kvm_vcpu *vcpu, u32 access,
4430 struct x86_exception *exception)
4431{
4432 void *data = val;
4433 int r = X86EMUL_CONTINUE;
4434
4435 while (bytes) {
4436 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access,
4437 exception);
4438 unsigned offset = addr & (PAGE_SIZE-1);
4439 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
4440 int ret;
4441
4442 if (gpa == UNMAPPED_GVA)
4443 return X86EMUL_PROPAGATE_FAULT;
4444 ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, data,
4445 offset, toread);
4446 if (ret < 0) {
4447 r = X86EMUL_IO_NEEDED;
4448 goto out;
4449 }
4450
4451 bytes -= toread;
4452 data += toread;
4453 addr += toread;
4454 }
4455out:
4456 return r;
4457}
4458
4459
4460static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
4461 gva_t addr, void *val, unsigned int bytes,
4462 struct x86_exception *exception)
4463{
4464 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4465 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4466 unsigned offset;
4467 int ret;
4468
4469
4470 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access|PFERR_FETCH_MASK,
4471 exception);
4472 if (unlikely(gpa == UNMAPPED_GVA))
4473 return X86EMUL_PROPAGATE_FAULT;
4474
4475 offset = addr & (PAGE_SIZE-1);
4476 if (WARN_ON(offset + bytes > PAGE_SIZE))
4477 bytes = (unsigned)PAGE_SIZE - offset;
4478 ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, val,
4479 offset, bytes);
4480 if (unlikely(ret < 0))
4481 return X86EMUL_IO_NEEDED;
4482
4483 return X86EMUL_CONTINUE;
4484}
4485
4486int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
4487 gva_t addr, void *val, unsigned int bytes,
4488 struct x86_exception *exception)
4489{
4490 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4491 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4492
4493 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
4494 exception);
4495}
4496EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
4497
4498static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
4499 gva_t addr, void *val, unsigned int bytes,
4500 struct x86_exception *exception)
4501{
4502 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4503 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
4504}
4505
4506static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
4507 unsigned long addr, void *val, unsigned int bytes)
4508{
4509 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4510 int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes);
4511
4512 return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
4513}
4514
4515int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
4516 gva_t addr, void *val,
4517 unsigned int bytes,
4518 struct x86_exception *exception)
4519{
4520 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4521 void *data = val;
4522 int r = X86EMUL_CONTINUE;
4523
4524 while (bytes) {
4525 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
4526 PFERR_WRITE_MASK,
4527 exception);
4528 unsigned offset = addr & (PAGE_SIZE-1);
4529 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
4530 int ret;
4531
4532 if (gpa == UNMAPPED_GVA)
4533 return X86EMUL_PROPAGATE_FAULT;
4534 ret = kvm_vcpu_write_guest(vcpu, gpa, data, towrite);
4535 if (ret < 0) {
4536 r = X86EMUL_IO_NEEDED;
4537 goto out;
4538 }
4539
4540 bytes -= towrite;
4541 data += towrite;
4542 addr += towrite;
4543 }
4544out:
4545 return r;
4546}
4547EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
4548
4549static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
4550 gpa_t gpa, bool write)
4551{
4552
4553 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
4554 return 1;
4555
4556 if (vcpu_match_mmio_gpa(vcpu, gpa)) {
4557 trace_vcpu_match_mmio(gva, gpa, write, true);
4558 return 1;
4559 }
4560
4561 return 0;
4562}
4563
4564static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
4565 gpa_t *gpa, struct x86_exception *exception,
4566 bool write)
4567{
4568 u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
4569 | (write ? PFERR_WRITE_MASK : 0);
4570
4571
4572
4573
4574
4575
4576 if (vcpu_match_mmio_gva(vcpu, gva)
4577 && !permission_fault(vcpu, vcpu->arch.walk_mmu,
4578 vcpu->arch.access, 0, access)) {
4579 *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
4580 (gva & (PAGE_SIZE - 1));
4581 trace_vcpu_match_mmio(gva, *gpa, write, false);
4582 return 1;
4583 }
4584
4585 *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
4586
4587 if (*gpa == UNMAPPED_GVA)
4588 return -1;
4589
4590 return vcpu_is_mmio_gpa(vcpu, gva, *gpa, write);
4591}
4592
4593int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
4594 const void *val, int bytes)
4595{
4596 int ret;
4597
4598 ret = kvm_vcpu_write_guest(vcpu, gpa, val, bytes);
4599 if (ret < 0)
4600 return 0;
4601 kvm_page_track_write(vcpu, gpa, val, bytes);
4602 return 1;
4603}
4604
4605struct read_write_emulator_ops {
4606 int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
4607 int bytes);
4608 int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
4609 void *val, int bytes);
4610 int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
4611 int bytes, void *val);
4612 int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
4613 void *val, int bytes);
4614 bool write;
4615};
4616
4617static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
4618{
4619 if (vcpu->mmio_read_completed) {
4620 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
4621 vcpu->mmio_fragments[0].gpa, *(u64 *)val);
4622 vcpu->mmio_read_completed = 0;
4623 return 1;
4624 }
4625
4626 return 0;
4627}
4628
4629static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
4630 void *val, int bytes)
4631{
4632 return !kvm_vcpu_read_guest(vcpu, gpa, val, bytes);
4633}
4634
4635static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
4636 void *val, int bytes)
4637{
4638 return emulator_write_phys(vcpu, gpa, val, bytes);
4639}
4640
4641static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
4642{
4643 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
4644 return vcpu_mmio_write(vcpu, gpa, bytes, val);
4645}
4646
4647static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
4648 void *val, int bytes)
4649{
4650 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
4651 return X86EMUL_IO_NEEDED;
4652}
4653
4654static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
4655 void *val, int bytes)
4656{
4657 struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
4658
4659 memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
4660 return X86EMUL_CONTINUE;
4661}
4662
4663static const struct read_write_emulator_ops read_emultor = {
4664 .read_write_prepare = read_prepare,
4665 .read_write_emulate = read_emulate,
4666 .read_write_mmio = vcpu_mmio_read,
4667 .read_write_exit_mmio = read_exit_mmio,
4668};
4669
4670static const struct read_write_emulator_ops write_emultor = {
4671 .read_write_emulate = write_emulate,
4672 .read_write_mmio = write_mmio,
4673 .read_write_exit_mmio = write_exit_mmio,
4674 .write = true,
4675};
4676
4677static int emulator_read_write_onepage(unsigned long addr, void *val,
4678 unsigned int bytes,
4679 struct x86_exception *exception,
4680 struct kvm_vcpu *vcpu,
4681 const struct read_write_emulator_ops *ops)
4682{
4683 gpa_t gpa;
4684 int handled, ret;
4685 bool write = ops->write;
4686 struct kvm_mmio_fragment *frag;
4687 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
4688
4689
4690
4691
4692
4693
4694
4695
4696 if (vcpu->arch.gpa_available &&
4697 emulator_can_use_gpa(ctxt) &&
4698 (addr & ~PAGE_MASK) == (vcpu->arch.gpa_val & ~PAGE_MASK)) {
4699 gpa = vcpu->arch.gpa_val;
4700 ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write);
4701 } else {
4702 ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
4703 if (ret < 0)
4704 return X86EMUL_PROPAGATE_FAULT;
4705 }
4706
4707 if (!ret && ops->read_write_emulate(vcpu, gpa, val, bytes))
4708 return X86EMUL_CONTINUE;
4709
4710
4711
4712
4713 handled = ops->read_write_mmio(vcpu, gpa, bytes, val);
4714 if (handled == bytes)
4715 return X86EMUL_CONTINUE;
4716
4717 gpa += handled;
4718 bytes -= handled;
4719 val += handled;
4720
4721 WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS);
4722 frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
4723 frag->gpa = gpa;
4724 frag->data = val;
4725 frag->len = bytes;
4726 return X86EMUL_CONTINUE;
4727}
4728
4729static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
4730 unsigned long addr,
4731 void *val, unsigned int bytes,
4732 struct x86_exception *exception,
4733 const struct read_write_emulator_ops *ops)
4734{
4735 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4736 gpa_t gpa;
4737 int rc;
4738
4739 if (ops->read_write_prepare &&
4740 ops->read_write_prepare(vcpu, val, bytes))
4741 return X86EMUL_CONTINUE;
4742
4743 vcpu->mmio_nr_fragments = 0;
4744
4745
4746 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
4747 int now;
4748
4749 now = -addr & ~PAGE_MASK;
4750 rc = emulator_read_write_onepage(addr, val, now, exception,
4751 vcpu, ops);
4752
4753 if (rc != X86EMUL_CONTINUE)
4754 return rc;
4755 addr += now;
4756 if (ctxt->mode != X86EMUL_MODE_PROT64)
4757 addr = (u32)addr;
4758 val += now;
4759 bytes -= now;
4760 }
4761
4762 rc = emulator_read_write_onepage(addr, val, bytes, exception,
4763 vcpu, ops);
4764 if (rc != X86EMUL_CONTINUE)
4765 return rc;
4766
4767 if (!vcpu->mmio_nr_fragments)
4768 return rc;
4769
4770 gpa = vcpu->mmio_fragments[0].gpa;
4771
4772 vcpu->mmio_needed = 1;
4773 vcpu->mmio_cur_fragment = 0;
4774
4775 vcpu->run->mmio.len = min(8u, vcpu->mmio_fragments[0].len);
4776 vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
4777 vcpu->run->exit_reason = KVM_EXIT_MMIO;
4778 vcpu->run->mmio.phys_addr = gpa;
4779
4780 return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
4781}
4782
4783static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
4784 unsigned long addr,
4785 void *val,
4786 unsigned int bytes,
4787 struct x86_exception *exception)
4788{
4789 return emulator_read_write(ctxt, addr, val, bytes,
4790 exception, &read_emultor);
4791}
4792
4793static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
4794 unsigned long addr,
4795 const void *val,
4796 unsigned int bytes,
4797 struct x86_exception *exception)
4798{
4799 return emulator_read_write(ctxt, addr, (void *)val, bytes,
4800 exception, &write_emultor);
4801}
4802
4803#define CMPXCHG_TYPE(t, ptr, old, new) \
4804 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
4805
4806#ifdef CONFIG_X86_64
4807# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
4808#else
4809# define CMPXCHG64(ptr, old, new) \
4810 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
4811#endif
4812
4813static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
4814 unsigned long addr,
4815 const void *old,
4816 const void *new,
4817 unsigned int bytes,
4818 struct x86_exception *exception)
4819{
4820 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4821 gpa_t gpa;
4822 struct page *page;
4823 char *kaddr;
4824 bool exchanged;
4825
4826
4827 if (bytes > 8 || (bytes & (bytes - 1)))
4828 goto emul_write;
4829
4830 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
4831
4832 if (gpa == UNMAPPED_GVA ||
4833 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
4834 goto emul_write;
4835
4836 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
4837 goto emul_write;
4838
4839 page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
4840 if (is_error_page(page))
4841 goto emul_write;
4842
4843 kaddr = kmap_atomic(page);
4844 kaddr += offset_in_page(gpa);
4845 switch (bytes) {
4846 case 1:
4847 exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
4848 break;
4849 case 2:
4850 exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
4851 break;
4852 case 4:
4853 exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
4854 break;
4855 case 8:
4856 exchanged = CMPXCHG64(kaddr, old, new);
4857 break;
4858 default:
4859 BUG();
4860 }
4861 kunmap_atomic(kaddr);
4862 kvm_release_page_dirty(page);
4863
4864 if (!exchanged)
4865 return X86EMUL_CMPXCHG_FAILED;
4866
4867 kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
4868 kvm_page_track_write(vcpu, gpa, new, bytes);
4869
4870 return X86EMUL_CONTINUE;
4871
4872emul_write:
4873 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
4874
4875 return emulator_write_emulated(ctxt, addr, new, bytes, exception);
4876}
4877
4878static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
4879{
4880 int r = 0, i;
4881
4882 for (i = 0; i < vcpu->arch.pio.count; i++) {
4883 if (vcpu->arch.pio.in)
4884 r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
4885 vcpu->arch.pio.size, pd);
4886 else
4887 r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
4888 vcpu->arch.pio.port, vcpu->arch.pio.size,
4889 pd);
4890 if (r)
4891 break;
4892 pd += vcpu->arch.pio.size;
4893 }
4894 return r;
4895}
4896
4897static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
4898 unsigned short port, void *val,
4899 unsigned int count, bool in)
4900{
4901 vcpu->arch.pio.port = port;
4902 vcpu->arch.pio.in = in;
4903 vcpu->arch.pio.count = count;
4904 vcpu->arch.pio.size = size;
4905
4906 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
4907 vcpu->arch.pio.count = 0;
4908 return 1;
4909 }
4910
4911 vcpu->run->exit_reason = KVM_EXIT_IO;
4912 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
4913 vcpu->run->io.size = size;
4914 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
4915 vcpu->run->io.count = count;
4916 vcpu->run->io.port = port;
4917
4918 return 0;
4919}
4920
4921static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
4922 int size, unsigned short port, void *val,
4923 unsigned int count)
4924{
4925 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4926 int ret;
4927
4928 if (vcpu->arch.pio.count)
4929 goto data_avail;
4930
4931 memset(vcpu->arch.pio_data, 0, size * count);
4932
4933 ret = emulator_pio_in_out(vcpu, size, port, val, count, true);
4934 if (ret) {
4935data_avail:
4936 memcpy(val, vcpu->arch.pio_data, size * count);
4937 trace_kvm_pio(KVM_PIO_IN, port, size, count, vcpu->arch.pio_data);
4938 vcpu->arch.pio.count = 0;
4939 return 1;
4940 }
4941
4942 return 0;
4943}
4944
4945static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
4946 int size, unsigned short port,
4947 const void *val, unsigned int count)
4948{
4949 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4950
4951 memcpy(vcpu->arch.pio_data, val, size * count);
4952 trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data);
4953 return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
4954}
4955
4956static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
4957{
4958 return kvm_x86_ops->get_segment_base(vcpu, seg);
4959}
4960
4961static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
4962{
4963 kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
4964}
4965
4966static int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
4967{
4968 if (!need_emulate_wbinvd(vcpu))
4969 return X86EMUL_CONTINUE;
4970
4971 if (kvm_x86_ops->has_wbinvd_exit()) {
4972 int cpu = get_cpu();
4973
4974 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
4975 smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
4976 wbinvd_ipi, NULL, 1);
4977 put_cpu();
4978 cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
4979 } else
4980 wbinvd();
4981 return X86EMUL_CONTINUE;
4982}
4983
4984int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
4985{
4986 kvm_emulate_wbinvd_noskip(vcpu);
4987 return kvm_skip_emulated_instruction(vcpu);
4988}
4989EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
4990
4991
4992
4993static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
4994{
4995 kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
4996}
4997
4998static int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
4999 unsigned long *dest)
5000{
5001 return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
5002}
5003
5004static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
5005 unsigned long value)
5006{
5007
5008 return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
5009}
5010
5011static u64 mk_cr_64(u64 curr_cr, u32 new_val)
5012{
5013 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
5014}
5015
5016static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
5017{
5018 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5019 unsigned long value;
5020
5021 switch (cr) {
5022 case 0:
5023 value = kvm_read_cr0(vcpu);
5024 break;
5025 case 2:
5026 value = vcpu->arch.cr2;
5027 break;
5028 case 3:
5029 value = kvm_read_cr3(vcpu);
5030 break;
5031 case 4:
5032 value = kvm_read_cr4(vcpu);
5033 break;
5034 case 8:
5035 value = kvm_get_cr8(vcpu);
5036 break;
5037 default:
5038 kvm_err("%s: unexpected cr %u\n", __func__, cr);
5039 return 0;
5040 }
5041
5042 return value;
5043}
5044
5045static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
5046{
5047 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5048 int res = 0;
5049
5050 switch (cr) {
5051 case 0:
5052 res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
5053 break;
5054 case 2:
5055 vcpu->arch.cr2 = val;
5056 break;
5057 case 3:
5058 res = kvm_set_cr3(vcpu, val);
5059 break;
5060 case 4:
5061 res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
5062 break;
5063 case 8:
5064 res = kvm_set_cr8(vcpu, val);
5065 break;
5066 default:
5067 kvm_err("%s: unexpected cr %u\n", __func__, cr);
5068 res = -1;
5069 }
5070
5071 return res;
5072}
5073
5074static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
5075{
5076 return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
5077}
5078
5079static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
5080{
5081 kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt);
5082}
5083
5084static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
5085{
5086 kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt);
5087}
5088
5089static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
5090{
5091 kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt);
5092}
5093
5094static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
5095{
5096 kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt);
5097}
5098
5099static unsigned long emulator_get_cached_segment_base(
5100 struct x86_emulate_ctxt *ctxt, int seg)
5101{
5102 return get_segment_base(emul_to_vcpu(ctxt), seg);
5103}
5104
5105static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
5106 struct desc_struct *desc, u32 *base3,
5107 int seg)
5108{
5109 struct kvm_segment var;
5110
5111 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
5112 *selector = var.selector;
5113
5114 if (var.unusable) {
5115 memset(desc, 0, sizeof(*desc));
5116 if (base3)
5117 *base3 = 0;
5118 return false;
5119 }
5120
5121 if (var.g)
5122 var.limit >>= 12;
5123 set_desc_limit(desc, var.limit);
5124 set_desc_base(desc, (unsigned long)var.base);
5125#ifdef CONFIG_X86_64
5126 if (base3)
5127 *base3 = var.base >> 32;
5128#endif
5129 desc->type = var.type;
5130 desc->s = var.s;
5131 desc->dpl = var.dpl;
5132 desc->p = var.present;
5133 desc->avl = var.avl;
5134 desc->l = var.l;
5135 desc->d = var.db;
5136 desc->g = var.g;
5137
5138 return true;
5139}
5140
5141static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
5142 struct desc_struct *desc, u32 base3,
5143 int seg)
5144{
5145 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5146 struct kvm_segment var;
5147
5148 var.selector = selector;
5149 var.base = get_desc_base(desc);
5150#ifdef CONFIG_X86_64
5151 var.base |= ((u64)base3) << 32;
5152#endif
5153 var.limit = get_desc_limit(desc);
5154 if (desc->g)
5155 var.limit = (var.limit << 12) | 0xfff;
5156 var.type = desc->type;
5157 var.dpl = desc->dpl;
5158 var.db = desc->d;
5159 var.s = desc->s;
5160 var.l = desc->l;
5161 var.g = desc->g;
5162 var.avl = desc->avl;
5163 var.present = desc->p;
5164 var.unusable = !var.present;
5165 var.padding = 0;
5166
5167 kvm_set_segment(vcpu, &var, seg);
5168 return;
5169}
5170
5171static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
5172 u32 msr_index, u64 *pdata)
5173{
5174 struct msr_data msr;
5175 int r;
5176
5177 msr.index = msr_index;
5178 msr.host_initiated = false;
5179 r = kvm_get_msr(emul_to_vcpu(ctxt), &msr);
5180 if (r)
5181 return r;
5182
5183 *pdata = msr.data;
5184 return 0;
5185}
5186
5187static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
5188 u32 msr_index, u64 data)
5189{
5190 struct msr_data msr;
5191
5192 msr.data = data;
5193 msr.index = msr_index;
5194 msr.host_initiated = false;
5195 return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
5196}
5197
5198static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt)
5199{
5200 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5201
5202 return vcpu->arch.smbase;
5203}
5204
5205static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
5206{
5207 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5208
5209 vcpu->arch.smbase = smbase;
5210}
5211
5212static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
5213 u32 pmc)
5214{
5215 return kvm_pmu_is_valid_msr_idx(emul_to_vcpu(ctxt), pmc);
5216}
5217
5218static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
5219 u32 pmc, u64 *pdata)
5220{
5221 return kvm_pmu_rdpmc(emul_to_vcpu(ctxt), pmc, pdata);
5222}
5223
5224static void emulator_halt(struct x86_emulate_ctxt *ctxt)
5225{
5226 emul_to_vcpu(ctxt)->arch.halt_request = 1;
5227}
5228
5229static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
5230{
5231 preempt_disable();
5232 kvm_load_guest_fpu(emul_to_vcpu(ctxt));
5233}
5234
5235static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
5236{
5237 preempt_enable();
5238}
5239
5240static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
5241 struct x86_instruction_info *info,
5242 enum x86_intercept_stage stage)
5243{
5244 return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
5245}
5246
5247static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
5248 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool check_limit)
5249{
5250 return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, check_limit);
5251}
5252
5253static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
5254{
5255 return kvm_register_read(emul_to_vcpu(ctxt), reg);
5256}
5257
5258static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
5259{
5260 kvm_register_write(emul_to_vcpu(ctxt), reg, val);
5261}
5262
5263static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked)
5264{
5265 kvm_x86_ops->set_nmi_mask(emul_to_vcpu(ctxt), masked);
5266}
5267
5268static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
5269{
5270 return emul_to_vcpu(ctxt)->arch.hflags;
5271}
5272
5273static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
5274{
5275 kvm_set_hflags(emul_to_vcpu(ctxt), emul_flags);
5276}
5277
5278static const struct x86_emulate_ops emulate_ops = {
5279 .read_gpr = emulator_read_gpr,
5280 .write_gpr = emulator_write_gpr,
5281 .read_std = kvm_read_guest_virt_system,
5282 .write_std = kvm_write_guest_virt_system,
5283 .read_phys = kvm_read_guest_phys_system,
5284 .fetch = kvm_fetch_guest_virt,
5285 .read_emulated = emulator_read_emulated,
5286 .write_emulated = emulator_write_emulated,
5287 .cmpxchg_emulated = emulator_cmpxchg_emulated,
5288 .invlpg = emulator_invlpg,
5289 .pio_in_emulated = emulator_pio_in_emulated,
5290 .pio_out_emulated = emulator_pio_out_emulated,
5291 .get_segment = emulator_get_segment,
5292 .set_segment = emulator_set_segment,
5293 .get_cached_segment_base = emulator_get_cached_segment_base,
5294 .get_gdt = emulator_get_gdt,
5295 .get_idt = emulator_get_idt,
5296 .set_gdt = emulator_set_gdt,
5297 .set_idt = emulator_set_idt,
5298 .get_cr = emulator_get_cr,
5299 .set_cr = emulator_set_cr,
5300 .cpl = emulator_get_cpl,
5301 .get_dr = emulator_get_dr,
5302 .set_dr = emulator_set_dr,
5303 .get_smbase = emulator_get_smbase,
5304 .set_smbase = emulator_set_smbase,
5305 .set_msr = emulator_set_msr,
5306 .get_msr = emulator_get_msr,
5307 .check_pmc = emulator_check_pmc,
5308 .read_pmc = emulator_read_pmc,
5309 .halt = emulator_halt,
5310 .wbinvd = emulator_wbinvd,
5311 .fix_hypercall = emulator_fix_hypercall,
5312 .get_fpu = emulator_get_fpu,
5313 .put_fpu = emulator_put_fpu,
5314 .intercept = emulator_intercept,
5315 .get_cpuid = emulator_get_cpuid,
5316 .set_nmi_mask = emulator_set_nmi_mask,
5317 .get_hflags = emulator_get_hflags,
5318 .set_hflags = emulator_set_hflags,
5319};
5320
5321static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
5322{
5323 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
5324
5325
5326
5327
5328
5329
5330
5331 if (int_shadow & mask)
5332 mask = 0;
5333 if (unlikely(int_shadow || mask)) {
5334 kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
5335 if (!mask)
5336 kvm_make_request(KVM_REQ_EVENT, vcpu);
5337 }
5338}
5339
5340static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
5341{
5342 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
5343 if (ctxt->exception.vector == PF_VECTOR)
5344 return kvm_propagate_fault(vcpu, &ctxt->exception);
5345
5346 if (ctxt->exception.error_code_valid)
5347 kvm_queue_exception_e(vcpu, ctxt->exception.vector,
5348 ctxt->exception.error_code);
5349 else
5350 kvm_queue_exception(vcpu, ctxt->exception.vector);
5351 return false;
5352}
5353
5354static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
5355{
5356 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
5357 int cs_db, cs_l;
5358
5359 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
5360
5361 ctxt->eflags = kvm_get_rflags(vcpu);
5362 ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
5363
5364 ctxt->eip = kvm_rip_read(vcpu);
5365 ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
5366 (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
5367 (cs_l && is_long_mode(vcpu)) ? X86EMUL_MODE_PROT64 :
5368 cs_db ? X86EMUL_MODE_PROT32 :
5369 X86EMUL_MODE_PROT16;
5370 BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK);
5371 BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
5372 BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
5373
5374 init_decode_cache(ctxt);
5375 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
5376}
5377
5378int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
5379{
5380 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
5381 int ret;
5382
5383 init_emulate_ctxt(vcpu);
5384
5385 ctxt->op_bytes = 2;
5386 ctxt->ad_bytes = 2;
5387 ctxt->_eip = ctxt->eip + inc_eip;
5388 ret = emulate_int_real(ctxt, irq);
5389
5390 if (ret != X86EMUL_CONTINUE)
5391 return EMULATE_FAIL;
5392
5393 ctxt->eip = ctxt->_eip;
5394 kvm_rip_write(vcpu, ctxt->eip);
5395 kvm_set_rflags(vcpu, ctxt->eflags);
5396
5397 if (irq == NMI_VECTOR)
5398 vcpu->arch.nmi_pending = 0;
5399 else
5400 vcpu->arch.interrupt.pending = false;
5401
5402 return EMULATE_DONE;
5403}
5404EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
5405
5406static int handle_emulation_failure(struct kvm_vcpu *vcpu)
5407{
5408 int r = EMULATE_DONE;
5409
5410 ++vcpu->stat.insn_emulation_fail;
5411 trace_kvm_emulate_insn_failed(vcpu);
5412 if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
5413 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
5414 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
5415 vcpu->run->internal.ndata = 0;
5416 r = EMULATE_FAIL;
5417 }
5418 kvm_queue_exception(vcpu, UD_VECTOR);
5419
5420 return r;
5421}
5422
5423static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
5424 bool write_fault_to_shadow_pgtable,
5425 int emulation_type)
5426{
5427 gpa_t gpa = cr2;
5428 kvm_pfn_t pfn;
5429
5430 if (emulation_type & EMULTYPE_NO_REEXECUTE)
5431 return false;
5432
5433 if (!vcpu->arch.mmu.direct_map) {
5434
5435
5436
5437
5438 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
5439
5440
5441
5442
5443
5444 if (gpa == UNMAPPED_GVA)
5445 return true;
5446 }
5447
5448
5449
5450
5451
5452
5453
5454 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
5455
5456
5457
5458
5459
5460 if (is_error_noslot_pfn(pfn))
5461 return false;
5462
5463 kvm_release_pfn_clean(pfn);
5464
5465
5466 if (vcpu->arch.mmu.direct_map) {
5467 unsigned int indirect_shadow_pages;
5468
5469 spin_lock(&vcpu->kvm->mmu_lock);
5470 indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
5471 spin_unlock(&vcpu->kvm->mmu_lock);
5472
5473 if (indirect_shadow_pages)
5474 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
5475
5476 return true;
5477 }
5478
5479
5480
5481
5482
5483
5484 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
5485
5486
5487
5488
5489
5490
5491 return !write_fault_to_shadow_pgtable;
5492}
5493
5494static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
5495 unsigned long cr2, int emulation_type)
5496{
5497 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5498 unsigned long last_retry_eip, last_retry_addr, gpa = cr2;
5499
5500 last_retry_eip = vcpu->arch.last_retry_eip;
5501 last_retry_addr = vcpu->arch.last_retry_addr;
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516 vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
5517
5518 if (!(emulation_type & EMULTYPE_RETRY))
5519 return false;
5520
5521 if (x86_page_table_writing_insn(ctxt))
5522 return false;
5523
5524 if (ctxt->eip == last_retry_eip && last_retry_addr == cr2)
5525 return false;
5526
5527 vcpu->arch.last_retry_eip = ctxt->eip;
5528 vcpu->arch.last_retry_addr = cr2;
5529
5530 if (!vcpu->arch.mmu.direct_map)
5531 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
5532
5533 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
5534
5535 return true;
5536}
5537
5538static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
5539static int complete_emulated_pio(struct kvm_vcpu *vcpu);
5540
5541static void kvm_smm_changed(struct kvm_vcpu *vcpu)
5542{
5543 if (!(vcpu->arch.hflags & HF_SMM_MASK)) {
5544
5545 trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, false);
5546
5547
5548 kvm_make_request(KVM_REQ_EVENT, vcpu);
5549 }
5550
5551 kvm_mmu_reset_context(vcpu);
5552}
5553
5554static void kvm_set_hflags(struct kvm_vcpu *vcpu, unsigned emul_flags)
5555{
5556 unsigned changed = vcpu->arch.hflags ^ emul_flags;
5557
5558 vcpu->arch.hflags = emul_flags;
5559
5560 if (changed & HF_SMM_MASK)
5561 kvm_smm_changed(vcpu);
5562}
5563
5564static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
5565 unsigned long *db)
5566{
5567 u32 dr6 = 0;
5568 int i;
5569 u32 enable, rwlen;
5570
5571 enable = dr7;
5572 rwlen = dr7 >> 16;
5573 for (i = 0; i < 4; i++, enable >>= 2, rwlen >>= 4)
5574 if ((enable & 3) && (rwlen & 15) == type && db[i] == addr)
5575 dr6 |= (1 << i);
5576 return dr6;
5577}
5578
5579static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
5580{
5581 struct kvm_run *kvm_run = vcpu->run;
5582
5583 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
5584 kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
5585 kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
5586 kvm_run->debug.arch.exception = DB_VECTOR;
5587 kvm_run->exit_reason = KVM_EXIT_DEBUG;
5588 *r = EMULATE_USER_EXIT;
5589 } else {
5590
5591
5592
5593
5594
5595 vcpu->arch.dr6 &= ~15;
5596 vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
5597 kvm_queue_exception(vcpu, DB_VECTOR);
5598 }
5599}
5600
5601int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
5602{
5603 unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
5604 int r = EMULATE_DONE;
5605
5606 kvm_x86_ops->skip_emulated_instruction(vcpu);
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616 if (unlikely(rflags & X86_EFLAGS_TF))
5617 kvm_vcpu_do_singlestep(vcpu, &r);
5618 return r == EMULATE_DONE;
5619}
5620EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
5621
5622static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
5623{
5624 if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
5625 (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
5626 struct kvm_run *kvm_run = vcpu->run;
5627 unsigned long eip = kvm_get_linear_rip(vcpu);
5628 u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
5629 vcpu->arch.guest_debug_dr7,
5630 vcpu->arch.eff_db);
5631
5632 if (dr6 != 0) {
5633 kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM;
5634 kvm_run->debug.arch.pc = eip;
5635 kvm_run->debug.arch.exception = DB_VECTOR;
5636 kvm_run->exit_reason = KVM_EXIT_DEBUG;
5637 *r = EMULATE_USER_EXIT;
5638 return true;
5639 }
5640 }
5641
5642 if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) &&
5643 !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) {
5644 unsigned long eip = kvm_get_linear_rip(vcpu);
5645 u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
5646 vcpu->arch.dr7,
5647 vcpu->arch.db);
5648
5649 if (dr6 != 0) {
5650 vcpu->arch.dr6 &= ~15;
5651 vcpu->arch.dr6 |= dr6 | DR6_RTM;
5652 kvm_queue_exception(vcpu, DB_VECTOR);
5653 *r = EMULATE_DONE;
5654 return true;
5655 }
5656 }
5657
5658 return false;
5659}
5660
5661int x86_emulate_instruction(struct kvm_vcpu *vcpu,
5662 unsigned long cr2,
5663 int emulation_type,
5664 void *insn,
5665 int insn_len)
5666{
5667 int r;
5668 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
5669 bool writeback = true;
5670 bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
5671
5672
5673
5674
5675
5676 vcpu->arch.write_fault_to_shadow_pgtable = false;
5677 kvm_clear_exception_queue(vcpu);
5678
5679 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
5680 init_emulate_ctxt(vcpu);
5681
5682
5683
5684
5685
5686
5687
5688 if (kvm_vcpu_check_breakpoint(vcpu, &r))
5689 return r;
5690
5691 ctxt->interruptibility = 0;
5692 ctxt->have_exception = false;
5693 ctxt->exception.vector = -1;
5694 ctxt->perm_ok = false;
5695
5696 ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
5697
5698 r = x86_decode_insn(ctxt, insn, insn_len);
5699
5700 trace_kvm_emulate_insn_start(vcpu);
5701 ++vcpu->stat.insn_emulation;
5702 if (r != EMULATION_OK) {
5703 if (emulation_type & EMULTYPE_TRAP_UD)
5704 return EMULATE_FAIL;
5705 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
5706 emulation_type))
5707 return EMULATE_DONE;
5708 if (emulation_type & EMULTYPE_SKIP)
5709 return EMULATE_FAIL;
5710 return handle_emulation_failure(vcpu);
5711 }
5712 }
5713
5714 if (emulation_type & EMULTYPE_SKIP) {
5715 kvm_rip_write(vcpu, ctxt->_eip);
5716 if (ctxt->eflags & X86_EFLAGS_RF)
5717 kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
5718 return EMULATE_DONE;
5719 }
5720
5721 if (retry_instruction(ctxt, cr2, emulation_type))
5722 return EMULATE_DONE;
5723
5724
5725
5726 if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
5727 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
5728 emulator_invalidate_register_cache(ctxt);
5729 }
5730
5731restart:
5732
5733 ctxt->exception.address = cr2;
5734
5735 r = x86_emulate_insn(ctxt);
5736
5737 if (r == EMULATION_INTERCEPTED)
5738 return EMULATE_DONE;
5739
5740 if (r == EMULATION_FAILED) {
5741 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
5742 emulation_type))
5743 return EMULATE_DONE;
5744
5745 return handle_emulation_failure(vcpu);
5746 }
5747
5748 if (ctxt->have_exception) {
5749 r = EMULATE_DONE;
5750 if (inject_emulated_exception(vcpu))
5751 return r;
5752 } else if (vcpu->arch.pio.count) {
5753 if (!vcpu->arch.pio.in) {
5754
5755 vcpu->arch.pio.count = 0;
5756 } else {
5757 writeback = false;
5758 vcpu->arch.complete_userspace_io = complete_emulated_pio;
5759 }
5760 r = EMULATE_USER_EXIT;
5761 } else if (vcpu->mmio_needed) {
5762 if (!vcpu->mmio_is_write)
5763 writeback = false;
5764 r = EMULATE_USER_EXIT;
5765 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
5766 } else if (r == EMULATION_RESTART)
5767 goto restart;
5768 else
5769 r = EMULATE_DONE;
5770
5771 if (writeback) {
5772 unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
5773 toggle_interruptibility(vcpu, ctxt->interruptibility);
5774 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
5775 kvm_rip_write(vcpu, ctxt->eip);
5776 if (r == EMULATE_DONE &&
5777 (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
5778 kvm_vcpu_do_singlestep(vcpu, &r);
5779 if (!ctxt->have_exception ||
5780 exception_type(ctxt->exception.vector) == EXCPT_TRAP)
5781 __kvm_set_rflags(vcpu, ctxt->eflags);
5782
5783
5784
5785
5786
5787
5788
5789 if (unlikely((ctxt->eflags & ~rflags) & X86_EFLAGS_IF))
5790 kvm_make_request(KVM_REQ_EVENT, vcpu);
5791 } else
5792 vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
5793
5794 return r;
5795}
5796EXPORT_SYMBOL_GPL(x86_emulate_instruction);
5797
5798int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
5799{
5800 unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
5801 int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
5802 size, port, &val, 1);
5803
5804 vcpu->arch.pio.count = 0;
5805 return ret;
5806}
5807EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
5808
5809static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
5810{
5811 unsigned long val;
5812
5813
5814 BUG_ON(vcpu->arch.pio.count != 1);
5815
5816
5817 val = (vcpu->arch.pio.size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX)
5818 : 0;
5819
5820
5821
5822
5823
5824 emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, vcpu->arch.pio.size,
5825 vcpu->arch.pio.port, &val, 1);
5826 kvm_register_write(vcpu, VCPU_REGS_RAX, val);
5827
5828 return 1;
5829}
5830
5831int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port)
5832{
5833 unsigned long val;
5834 int ret;
5835
5836
5837 val = (size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX) : 0;
5838
5839 ret = emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, size, port,
5840 &val, 1);
5841 if (ret) {
5842 kvm_register_write(vcpu, VCPU_REGS_RAX, val);
5843 return ret;
5844 }
5845
5846 vcpu->arch.complete_userspace_io = complete_fast_pio_in;
5847
5848 return 0;
5849}
5850EXPORT_SYMBOL_GPL(kvm_fast_pio_in);
5851
5852static int kvmclock_cpu_down_prep(unsigned int cpu)
5853{
5854 __this_cpu_write(cpu_tsc_khz, 0);
5855 return 0;
5856}
5857
5858static void tsc_khz_changed(void *data)
5859{
5860 struct cpufreq_freqs *freq = data;
5861 unsigned long khz = 0;
5862
5863 if (data)
5864 khz = freq->new;
5865 else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
5866 khz = cpufreq_quick_get(raw_smp_processor_id());
5867 if (!khz)
5868 khz = tsc_khz;
5869 __this_cpu_write(cpu_tsc_khz, khz);
5870}
5871
5872static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
5873 void *data)
5874{
5875 struct cpufreq_freqs *freq = data;
5876 struct kvm *kvm;
5877 struct kvm_vcpu *vcpu;
5878 int i, send_ipi = 0;
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916
5917
5918
5919 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
5920 return 0;
5921 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
5922 return 0;
5923
5924 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
5925
5926 spin_lock(&kvm_lock);
5927 list_for_each_entry(kvm, &vm_list, vm_list) {
5928 kvm_for_each_vcpu(i, vcpu, kvm) {
5929 if (vcpu->cpu != freq->cpu)
5930 continue;
5931 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
5932 if (vcpu->cpu != smp_processor_id())
5933 send_ipi = 1;
5934 }
5935 }
5936 spin_unlock(&kvm_lock);
5937
5938 if (freq->old < freq->new && send_ipi) {
5939
5940
5941
5942
5943
5944
5945
5946
5947
5948
5949
5950
5951 smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
5952 }
5953 return 0;
5954}
5955
5956static struct notifier_block kvmclock_cpufreq_notifier_block = {
5957 .notifier_call = kvmclock_cpufreq_notifier
5958};
5959
5960static int kvmclock_cpu_online(unsigned int cpu)
5961{
5962 tsc_khz_changed(NULL);
5963 return 0;
5964}
5965
5966static void kvm_timer_init(void)
5967{
5968 max_tsc_khz = tsc_khz;
5969
5970 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
5971#ifdef CONFIG_CPU_FREQ
5972 struct cpufreq_policy policy;
5973 int cpu;
5974
5975 memset(&policy, 0, sizeof(policy));
5976 cpu = get_cpu();
5977 cpufreq_get_policy(&policy, cpu);
5978 if (policy.cpuinfo.max_freq)
5979 max_tsc_khz = policy.cpuinfo.max_freq;
5980 put_cpu();
5981#endif
5982 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
5983 CPUFREQ_TRANSITION_NOTIFIER);
5984 }
5985 pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
5986
5987 cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online",
5988 kvmclock_cpu_online, kvmclock_cpu_down_prep);
5989}
5990
5991static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
5992
5993int kvm_is_in_guest(void)
5994{
5995 return __this_cpu_read(current_vcpu) != NULL;
5996}
5997
5998static int kvm_is_user_mode(void)
5999{
6000 int user_mode = 3;
6001
6002 if (__this_cpu_read(current_vcpu))
6003 user_mode = kvm_x86_ops->get_cpl(__this_cpu_read(current_vcpu));
6004
6005 return user_mode != 0;
6006}
6007
6008static unsigned long kvm_get_guest_ip(void)
6009{
6010 unsigned long ip = 0;
6011
6012 if (__this_cpu_read(current_vcpu))
6013 ip = kvm_rip_read(__this_cpu_read(current_vcpu));
6014
6015 return ip;
6016}
6017
6018static struct perf_guest_info_callbacks kvm_guest_cbs = {
6019 .is_in_guest = kvm_is_in_guest,
6020 .is_user_mode = kvm_is_user_mode,
6021 .get_guest_ip = kvm_get_guest_ip,
6022};
6023
6024void kvm_before_handle_nmi(struct kvm_vcpu *vcpu)
6025{
6026 __this_cpu_write(current_vcpu, vcpu);
6027}
6028EXPORT_SYMBOL_GPL(kvm_before_handle_nmi);
6029
6030void kvm_after_handle_nmi(struct kvm_vcpu *vcpu)
6031{
6032 __this_cpu_write(current_vcpu, NULL);
6033}
6034EXPORT_SYMBOL_GPL(kvm_after_handle_nmi);
6035
6036static void kvm_set_mmio_spte_mask(void)
6037{
6038 u64 mask;
6039 int maxphyaddr = boot_cpu_data.x86_phys_bits;
6040
6041
6042
6043
6044
6045
6046 mask = rsvd_bits(maxphyaddr, 51);
6047
6048
6049 mask |= 1ull;
6050
6051#ifdef CONFIG_X86_64
6052
6053
6054
6055
6056 if (maxphyaddr == 52)
6057 mask &= ~1ull;
6058#endif
6059
6060 kvm_mmu_set_mmio_spte_mask(mask, mask);
6061}
6062
6063#ifdef CONFIG_X86_64
6064static void pvclock_gtod_update_fn(struct work_struct *work)
6065{
6066 struct kvm *kvm;
6067
6068 struct kvm_vcpu *vcpu;
6069 int i;
6070
6071 spin_lock(&kvm_lock);
6072 list_for_each_entry(kvm, &vm_list, vm_list)
6073 kvm_for_each_vcpu(i, vcpu, kvm)
6074 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
6075 atomic_set(&kvm_guest_has_master_clock, 0);
6076 spin_unlock(&kvm_lock);
6077}
6078
6079static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
6080
6081
6082
6083
6084static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
6085 void *priv)
6086{
6087 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
6088 struct timekeeper *tk = priv;
6089
6090 update_pvclock_gtod(tk);
6091
6092
6093
6094
6095 if (gtod->clock.vclock_mode != VCLOCK_TSC &&
6096 atomic_read(&kvm_guest_has_master_clock) != 0)
6097 queue_work(system_long_wq, &pvclock_gtod_work);
6098
6099 return 0;
6100}
6101
6102static struct notifier_block pvclock_gtod_notifier = {
6103 .notifier_call = pvclock_gtod_notify,
6104};
6105#endif
6106
6107int kvm_arch_init(void *opaque)
6108{
6109 int r;
6110 struct kvm_x86_ops *ops = opaque;
6111
6112 if (kvm_x86_ops) {
6113 printk(KERN_ERR "kvm: already loaded the other module\n");
6114 r = -EEXIST;
6115 goto out;
6116 }
6117
6118 if (!ops->cpu_has_kvm_support()) {
6119 printk(KERN_ERR "kvm: no hardware support\n");
6120 r = -EOPNOTSUPP;
6121 goto out;
6122 }
6123 if (ops->disabled_by_bios()) {
6124 printk(KERN_ERR "kvm: disabled by bios\n");
6125 r = -EOPNOTSUPP;
6126 goto out;
6127 }
6128
6129 r = -ENOMEM;
6130 shared_msrs = alloc_percpu(struct kvm_shared_msrs);
6131 if (!shared_msrs) {
6132 printk(KERN_ERR "kvm: failed to allocate percpu kvm_shared_msrs\n");
6133 goto out;
6134 }
6135
6136 r = kvm_mmu_module_init();
6137 if (r)
6138 goto out_free_percpu;
6139
6140 kvm_set_mmio_spte_mask();
6141
6142 kvm_x86_ops = ops;
6143
6144 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
6145 PT_DIRTY_MASK, PT64_NX_MASK, 0,
6146 PT_PRESENT_MASK, 0, sme_me_mask);
6147 kvm_timer_init();
6148
6149 perf_register_guest_info_callbacks(&kvm_guest_cbs);
6150
6151 if (boot_cpu_has(X86_FEATURE_XSAVE))
6152 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
6153
6154 kvm_lapic_init();
6155#ifdef CONFIG_X86_64
6156 pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
6157#endif
6158
6159 return 0;
6160
6161out_free_percpu:
6162 free_percpu(shared_msrs);
6163out:
6164 return r;
6165}
6166
6167void kvm_arch_exit(void)
6168{
6169 kvm_lapic_exit();
6170 perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
6171
6172 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
6173 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
6174 CPUFREQ_TRANSITION_NOTIFIER);
6175 cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
6176#ifdef CONFIG_X86_64
6177 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
6178#endif
6179 kvm_x86_ops = NULL;
6180 kvm_mmu_module_exit();
6181 free_percpu(shared_msrs);
6182}
6183
6184int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
6185{
6186 ++vcpu->stat.halt_exits;
6187 if (lapic_in_kernel(vcpu)) {
6188 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
6189 return 1;
6190 } else {
6191 vcpu->run->exit_reason = KVM_EXIT_HLT;
6192 return 0;
6193 }
6194}
6195EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
6196
6197int kvm_emulate_halt(struct kvm_vcpu *vcpu)
6198{
6199 int ret = kvm_skip_emulated_instruction(vcpu);
6200
6201
6202
6203
6204 return kvm_vcpu_halt(vcpu) && ret;
6205}
6206EXPORT_SYMBOL_GPL(kvm_emulate_halt);
6207
6208#ifdef CONFIG_X86_64
6209static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
6210 unsigned long clock_type)
6211{
6212 struct kvm_clock_pairing clock_pairing;
6213 struct timespec ts;
6214 u64 cycle;
6215 int ret;
6216
6217 if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK)
6218 return -KVM_EOPNOTSUPP;
6219
6220 if (kvm_get_walltime_and_clockread(&ts, &cycle) == false)
6221 return -KVM_EOPNOTSUPP;
6222
6223 clock_pairing.sec = ts.tv_sec;
6224 clock_pairing.nsec = ts.tv_nsec;
6225 clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle);
6226 clock_pairing.flags = 0;
6227
6228 ret = 0;
6229 if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing,
6230 sizeof(struct kvm_clock_pairing)))
6231 ret = -KVM_EFAULT;
6232
6233 return ret;
6234}
6235#endif
6236
6237
6238
6239
6240
6241
6242static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
6243{
6244 struct kvm_lapic_irq lapic_irq;
6245
6246 lapic_irq.shorthand = 0;
6247 lapic_irq.dest_mode = 0;
6248 lapic_irq.level = 0;
6249 lapic_irq.dest_id = apicid;
6250 lapic_irq.msi_redir_hint = false;
6251
6252 lapic_irq.delivery_mode = APIC_DM_REMRD;
6253 kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
6254}
6255
6256void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
6257{
6258 vcpu->arch.apicv_active = false;
6259 kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
6260}
6261
6262int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
6263{
6264 unsigned long nr, a0, a1, a2, a3, ret;
6265 int op_64_bit, r;
6266
6267 r = kvm_skip_emulated_instruction(vcpu);
6268
6269 if (kvm_hv_hypercall_enabled(vcpu->kvm))
6270 return kvm_hv_hypercall(vcpu);
6271
6272 nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
6273 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
6274 a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
6275 a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
6276 a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
6277
6278 trace_kvm_hypercall(nr, a0, a1, a2, a3);
6279
6280 op_64_bit = is_64_bit_mode(vcpu);
6281 if (!op_64_bit) {
6282 nr &= 0xFFFFFFFF;
6283 a0 &= 0xFFFFFFFF;
6284 a1 &= 0xFFFFFFFF;
6285 a2 &= 0xFFFFFFFF;
6286 a3 &= 0xFFFFFFFF;
6287 }
6288
6289 if (kvm_x86_ops->get_cpl(vcpu) != 0) {
6290 ret = -KVM_EPERM;
6291 goto out;
6292 }
6293
6294 switch (nr) {
6295 case KVM_HC_VAPIC_POLL_IRQ:
6296 ret = 0;
6297 break;
6298 case KVM_HC_KICK_CPU:
6299 kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
6300 ret = 0;
6301 break;
6302#ifdef CONFIG_X86_64
6303 case KVM_HC_CLOCK_PAIRING:
6304 ret = kvm_pv_clock_pairing(vcpu, a0, a1);
6305 break;
6306#endif
6307 default:
6308 ret = -KVM_ENOSYS;
6309 break;
6310 }
6311out:
6312 if (!op_64_bit)
6313 ret = (u32)ret;
6314 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
6315 ++vcpu->stat.hypercalls;
6316 return r;
6317}
6318EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
6319
6320static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
6321{
6322 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6323 char instruction[3];
6324 unsigned long rip = kvm_rip_read(vcpu);
6325
6326 kvm_x86_ops->patch_hypercall(vcpu, instruction);
6327
6328 return emulator_write_emulated(ctxt, rip, instruction, 3,
6329 &ctxt->exception);
6330}
6331
6332static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
6333{
6334 return vcpu->run->request_interrupt_window &&
6335 likely(!pic_in_kernel(vcpu->kvm));
6336}
6337
6338static void post_kvm_run_save(struct kvm_vcpu *vcpu)
6339{
6340 struct kvm_run *kvm_run = vcpu->run;
6341
6342 kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
6343 kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
6344 kvm_run->cr8 = kvm_get_cr8(vcpu);
6345 kvm_run->apic_base = kvm_get_apic_base(vcpu);
6346 kvm_run->ready_for_interrupt_injection =
6347 pic_in_kernel(vcpu->kvm) ||
6348 kvm_vcpu_ready_for_interrupt_injection(vcpu);
6349}
6350
6351static void update_cr8_intercept(struct kvm_vcpu *vcpu)
6352{
6353 int max_irr, tpr;
6354
6355 if (!kvm_x86_ops->update_cr8_intercept)
6356 return;
6357
6358 if (!lapic_in_kernel(vcpu))
6359 return;
6360
6361 if (vcpu->arch.apicv_active)
6362 return;
6363
6364 if (!vcpu->arch.apic->vapic_addr)
6365 max_irr = kvm_lapic_find_highest_irr(vcpu);
6366 else
6367 max_irr = -1;
6368
6369 if (max_irr != -1)
6370 max_irr >>= 4;
6371
6372 tpr = kvm_lapic_get_cr8(vcpu);
6373
6374 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
6375}
6376
6377static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
6378{
6379 int r;
6380
6381
6382 if (vcpu->arch.exception.injected) {
6383 kvm_x86_ops->queue_exception(vcpu);
6384 return 0;
6385 }
6386
6387
6388
6389
6390
6391 if (!vcpu->arch.exception.pending) {
6392 if (vcpu->arch.nmi_injected) {
6393 kvm_x86_ops->set_nmi(vcpu);
6394 return 0;
6395 }
6396
6397 if (vcpu->arch.interrupt.pending) {
6398 kvm_x86_ops->set_irq(vcpu);
6399 return 0;
6400 }
6401 }
6402
6403 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
6404 r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
6405 if (r != 0)
6406 return r;
6407 }
6408
6409
6410 if (vcpu->arch.exception.pending) {
6411 trace_kvm_inj_exception(vcpu->arch.exception.nr,
6412 vcpu->arch.exception.has_error_code,
6413 vcpu->arch.exception.error_code);
6414
6415 vcpu->arch.exception.pending = false;
6416 vcpu->arch.exception.injected = true;
6417
6418 if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
6419 __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
6420 X86_EFLAGS_RF);
6421
6422 if (vcpu->arch.exception.nr == DB_VECTOR &&
6423 (vcpu->arch.dr7 & DR7_GD)) {
6424 vcpu->arch.dr7 &= ~DR7_GD;
6425 kvm_update_dr7(vcpu);
6426 }
6427
6428 kvm_x86_ops->queue_exception(vcpu);
6429 } else if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
6430 vcpu->arch.smi_pending = false;
6431 enter_smm(vcpu);
6432 } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
6433 --vcpu->arch.nmi_pending;
6434 vcpu->arch.nmi_injected = true;
6435 kvm_x86_ops->set_nmi(vcpu);
6436 } else if (kvm_cpu_has_injectable_intr(vcpu)) {
6437
6438
6439
6440
6441
6442
6443
6444 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
6445 r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
6446 if (r != 0)
6447 return r;
6448 }
6449 if (kvm_x86_ops->interrupt_allowed(vcpu)) {
6450 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
6451 false);
6452 kvm_x86_ops->set_irq(vcpu);
6453 }
6454 }
6455
6456 return 0;
6457}
6458
6459static void process_nmi(struct kvm_vcpu *vcpu)
6460{
6461 unsigned limit = 2;
6462
6463
6464
6465
6466
6467
6468 if (kvm_x86_ops->get_nmi_mask(vcpu) || vcpu->arch.nmi_injected)
6469 limit = 1;
6470
6471 vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
6472 vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
6473 kvm_make_request(KVM_REQ_EVENT, vcpu);
6474}
6475
6476#define put_smstate(type, buf, offset, val) \
6477 *(type *)((buf) + (offset) - 0x7e00) = val
6478
6479static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
6480{
6481 u32 flags = 0;
6482 flags |= seg->g << 23;
6483 flags |= seg->db << 22;
6484 flags |= seg->l << 21;
6485 flags |= seg->avl << 20;
6486 flags |= seg->present << 15;
6487 flags |= seg->dpl << 13;
6488 flags |= seg->s << 12;
6489 flags |= seg->type << 8;
6490 return flags;
6491}
6492
6493static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
6494{
6495 struct kvm_segment seg;
6496 int offset;
6497
6498 kvm_get_segment(vcpu, &seg, n);
6499 put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector);
6500
6501 if (n < 3)
6502 offset = 0x7f84 + n * 12;
6503 else
6504 offset = 0x7f2c + (n - 3) * 12;
6505
6506 put_smstate(u32, buf, offset + 8, seg.base);
6507 put_smstate(u32, buf, offset + 4, seg.limit);
6508 put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg));
6509}
6510
6511#ifdef CONFIG_X86_64
6512static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
6513{
6514 struct kvm_segment seg;
6515 int offset;
6516 u16 flags;
6517
6518 kvm_get_segment(vcpu, &seg, n);
6519 offset = 0x7e00 + n * 16;
6520
6521 flags = enter_smm_get_segment_flags(&seg) >> 8;
6522 put_smstate(u16, buf, offset, seg.selector);
6523 put_smstate(u16, buf, offset + 2, flags);
6524 put_smstate(u32, buf, offset + 4, seg.limit);
6525 put_smstate(u64, buf, offset + 8, seg.base);
6526}
6527#endif
6528
6529static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
6530{
6531 struct desc_ptr dt;
6532 struct kvm_segment seg;
6533 unsigned long val;
6534 int i;
6535
6536 put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
6537 put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
6538 put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
6539 put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
6540
6541 for (i = 0; i < 8; i++)
6542 put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read(vcpu, i));
6543
6544 kvm_get_dr(vcpu, 6, &val);
6545 put_smstate(u32, buf, 0x7fcc, (u32)val);
6546 kvm_get_dr(vcpu, 7, &val);
6547 put_smstate(u32, buf, 0x7fc8, (u32)val);
6548
6549 kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
6550 put_smstate(u32, buf, 0x7fc4, seg.selector);
6551 put_smstate(u32, buf, 0x7f64, seg.base);
6552 put_smstate(u32, buf, 0x7f60, seg.limit);
6553 put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
6554
6555 kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
6556 put_smstate(u32, buf, 0x7fc0, seg.selector);
6557 put_smstate(u32, buf, 0x7f80, seg.base);
6558 put_smstate(u32, buf, 0x7f7c, seg.limit);
6559 put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
6560
6561 kvm_x86_ops->get_gdt(vcpu, &dt);
6562 put_smstate(u32, buf, 0x7f74, dt.address);
6563 put_smstate(u32, buf, 0x7f70, dt.size);
6564
6565 kvm_x86_ops->get_idt(vcpu, &dt);
6566 put_smstate(u32, buf, 0x7f58, dt.address);
6567 put_smstate(u32, buf, 0x7f54, dt.size);
6568
6569 for (i = 0; i < 6; i++)
6570 enter_smm_save_seg_32(vcpu, buf, i);
6571
6572 put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
6573
6574
6575 put_smstate(u32, buf, 0x7efc, 0x00020000);
6576 put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
6577}
6578
6579static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
6580{
6581#ifdef CONFIG_X86_64
6582 struct desc_ptr dt;
6583 struct kvm_segment seg;
6584 unsigned long val;
6585 int i;
6586
6587 for (i = 0; i < 16; i++)
6588 put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read(vcpu, i));
6589
6590 put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
6591 put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
6592
6593 kvm_get_dr(vcpu, 6, &val);
6594 put_smstate(u64, buf, 0x7f68, val);
6595 kvm_get_dr(vcpu, 7, &val);
6596 put_smstate(u64, buf, 0x7f60, val);
6597
6598 put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
6599 put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
6600 put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
6601
6602 put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase);
6603
6604
6605 put_smstate(u32, buf, 0x7efc, 0x00020064);
6606
6607 put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer);
6608
6609 kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
6610 put_smstate(u16, buf, 0x7e90, seg.selector);
6611 put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
6612 put_smstate(u32, buf, 0x7e94, seg.limit);
6613 put_smstate(u64, buf, 0x7e98, seg.base);
6614
6615 kvm_x86_ops->get_idt(vcpu, &dt);
6616 put_smstate(u32, buf, 0x7e84, dt.size);
6617 put_smstate(u64, buf, 0x7e88, dt.address);
6618
6619 kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
6620 put_smstate(u16, buf, 0x7e70, seg.selector);
6621 put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
6622 put_smstate(u32, buf, 0x7e74, seg.limit);
6623 put_smstate(u64, buf, 0x7e78, seg.base);
6624
6625 kvm_x86_ops->get_gdt(vcpu, &dt);
6626 put_smstate(u32, buf, 0x7e64, dt.size);
6627 put_smstate(u64, buf, 0x7e68, dt.address);
6628
6629 for (i = 0; i < 6; i++)
6630 enter_smm_save_seg_64(vcpu, buf, i);
6631#else
6632 WARN_ON_ONCE(1);
6633#endif
6634}
6635
6636static void enter_smm(struct kvm_vcpu *vcpu)
6637{
6638 struct kvm_segment cs, ds;
6639 struct desc_ptr dt;
6640 char buf[512];
6641 u32 cr0;
6642
6643 trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
6644 vcpu->arch.hflags |= HF_SMM_MASK;
6645 memset(buf, 0, 512);
6646 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
6647 enter_smm_save_state_64(vcpu, buf);
6648 else
6649 enter_smm_save_state_32(vcpu, buf);
6650
6651 kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
6652
6653 if (kvm_x86_ops->get_nmi_mask(vcpu))
6654 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
6655 else
6656 kvm_x86_ops->set_nmi_mask(vcpu, true);
6657
6658 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
6659 kvm_rip_write(vcpu, 0x8000);
6660
6661 cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
6662 kvm_x86_ops->set_cr0(vcpu, cr0);
6663 vcpu->arch.cr0 = cr0;
6664
6665 kvm_x86_ops->set_cr4(vcpu, 0);
6666
6667
6668 dt.address = dt.size = 0;
6669 kvm_x86_ops->set_idt(vcpu, &dt);
6670
6671 __kvm_set_dr(vcpu, 7, DR7_FIXED_1);
6672
6673 cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
6674 cs.base = vcpu->arch.smbase;
6675
6676 ds.selector = 0;
6677 ds.base = 0;
6678
6679 cs.limit = ds.limit = 0xffffffff;
6680 cs.type = ds.type = 0x3;
6681 cs.dpl = ds.dpl = 0;
6682 cs.db = ds.db = 0;
6683 cs.s = ds.s = 1;
6684 cs.l = ds.l = 0;
6685 cs.g = ds.g = 1;
6686 cs.avl = ds.avl = 0;
6687 cs.present = ds.present = 1;
6688 cs.unusable = ds.unusable = 0;
6689 cs.padding = ds.padding = 0;
6690
6691 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
6692 kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
6693 kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
6694 kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
6695 kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
6696 kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
6697
6698 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
6699 kvm_x86_ops->set_efer(vcpu, 0);
6700
6701 kvm_update_cpuid(vcpu);
6702 kvm_mmu_reset_context(vcpu);
6703}
6704
6705static void process_smi(struct kvm_vcpu *vcpu)
6706{
6707 vcpu->arch.smi_pending = true;
6708 kvm_make_request(KVM_REQ_EVENT, vcpu);
6709}
6710
6711void kvm_make_scan_ioapic_request(struct kvm *kvm)
6712{
6713 kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
6714}
6715
6716static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
6717{
6718 u64 eoi_exit_bitmap[4];
6719
6720 if (!kvm_apic_hw_enabled(vcpu->arch.apic))
6721 return;
6722
6723 bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256);
6724
6725 if (irqchip_split(vcpu->kvm))
6726 kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
6727 else {
6728 if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
6729 kvm_x86_ops->sync_pir_to_irr(vcpu);
6730 kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
6731 }
6732 bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors,
6733 vcpu_to_synic(vcpu)->vec_bitmap, 256);
6734 kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
6735}
6736
6737static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
6738{
6739 ++vcpu->stat.tlb_flush;
6740 kvm_x86_ops->tlb_flush(vcpu);
6741}
6742
6743void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
6744{
6745 struct page *page = NULL;
6746
6747 if (!lapic_in_kernel(vcpu))
6748 return;
6749
6750 if (!kvm_x86_ops->set_apic_access_page_addr)
6751 return;
6752
6753 page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
6754 if (is_error_page(page))
6755 return;
6756 kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page));
6757
6758
6759
6760
6761
6762 put_page(page);
6763}
6764EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
6765
6766
6767
6768
6769
6770
6771static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
6772{
6773 int r;
6774 bool req_int_win =
6775 dm_request_for_irq_injection(vcpu) &&
6776 kvm_cpu_accept_dm_intr(vcpu);
6777
6778 bool req_immediate_exit = false;
6779
6780 if (kvm_request_pending(vcpu)) {
6781 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
6782 kvm_mmu_unload(vcpu);
6783 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
6784 __kvm_migrate_timers(vcpu);
6785 if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
6786 kvm_gen_update_masterclock(vcpu->kvm);
6787 if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu))
6788 kvm_gen_kvmclock_update(vcpu);
6789 if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
6790 r = kvm_guest_time_update(vcpu);
6791 if (unlikely(r))
6792 goto out;
6793 }
6794 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
6795 kvm_mmu_sync_roots(vcpu);
6796 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
6797 kvm_vcpu_flush_tlb(vcpu);
6798 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
6799 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
6800 r = 0;
6801 goto out;
6802 }
6803 if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
6804 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
6805 vcpu->mmio_needed = 0;
6806 r = 0;
6807 goto out;
6808 }
6809 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
6810
6811 vcpu->arch.apf.halted = true;
6812 r = 1;
6813 goto out;
6814 }
6815 if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
6816 record_steal_time(vcpu);
6817 if (kvm_check_request(KVM_REQ_SMI, vcpu))
6818 process_smi(vcpu);
6819 if (kvm_check_request(KVM_REQ_NMI, vcpu))
6820 process_nmi(vcpu);
6821 if (kvm_check_request(KVM_REQ_PMU, vcpu))
6822 kvm_pmu_handle_event(vcpu);
6823 if (kvm_check_request(KVM_REQ_PMI, vcpu))
6824 kvm_pmu_deliver_pmi(vcpu);
6825 if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
6826 BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
6827 if (test_bit(vcpu->arch.pending_ioapic_eoi,
6828 vcpu->arch.ioapic_handled_vectors)) {
6829 vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
6830 vcpu->run->eoi.vector =
6831 vcpu->arch.pending_ioapic_eoi;
6832 r = 0;
6833 goto out;
6834 }
6835 }
6836 if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
6837 vcpu_scan_ioapic(vcpu);
6838 if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
6839 kvm_vcpu_reload_apic_access_page(vcpu);
6840 if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
6841 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
6842 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
6843 r = 0;
6844 goto out;
6845 }
6846 if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) {
6847 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
6848 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET;
6849 r = 0;
6850 goto out;
6851 }
6852 if (kvm_check_request(KVM_REQ_HV_EXIT, vcpu)) {
6853 vcpu->run->exit_reason = KVM_EXIT_HYPERV;
6854 vcpu->run->hyperv = vcpu->arch.hyperv.exit;
6855 r = 0;
6856 goto out;
6857 }
6858
6859
6860
6861
6862
6863
6864 if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
6865 kvm_hv_process_stimers(vcpu);
6866 }
6867
6868 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
6869 ++vcpu->stat.req_event;
6870 kvm_apic_accept_events(vcpu);
6871 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
6872 r = 1;
6873 goto out;
6874 }
6875
6876 if (inject_pending_event(vcpu, req_int_win) != 0)
6877 req_immediate_exit = true;
6878 else {
6879
6880
6881
6882
6883
6884
6885
6886
6887
6888 if (vcpu->arch.smi_pending && !is_smm(vcpu))
6889 req_immediate_exit = true;
6890 if (vcpu->arch.nmi_pending)
6891 kvm_x86_ops->enable_nmi_window(vcpu);
6892 if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
6893 kvm_x86_ops->enable_irq_window(vcpu);
6894 WARN_ON(vcpu->arch.exception.pending);
6895 }
6896
6897 if (kvm_lapic_enabled(vcpu)) {
6898 update_cr8_intercept(vcpu);
6899 kvm_lapic_sync_to_vapic(vcpu);
6900 }
6901 }
6902
6903 r = kvm_mmu_reload(vcpu);
6904 if (unlikely(r)) {
6905 goto cancel_injection;
6906 }
6907
6908 preempt_disable();
6909
6910 kvm_x86_ops->prepare_guest_switch(vcpu);
6911 kvm_load_guest_fpu(vcpu);
6912
6913
6914
6915
6916
6917
6918 local_irq_disable();
6919 vcpu->mode = IN_GUEST_MODE;
6920
6921 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
6922
6923
6924
6925
6926
6927
6928
6929
6930
6931
6932
6933
6934
6935 smp_mb__after_srcu_read_unlock();
6936
6937
6938
6939
6940
6941 if (kvm_lapic_enabled(vcpu)) {
6942 if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
6943 kvm_x86_ops->sync_pir_to_irr(vcpu);
6944 }
6945
6946 if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu)
6947 || need_resched() || signal_pending(current)) {
6948 vcpu->mode = OUTSIDE_GUEST_MODE;
6949 smp_wmb();
6950 local_irq_enable();
6951 preempt_enable();
6952 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
6953 r = 1;
6954 goto cancel_injection;
6955 }
6956
6957 kvm_load_guest_xcr0(vcpu);
6958
6959 if (req_immediate_exit) {
6960 kvm_make_request(KVM_REQ_EVENT, vcpu);
6961 smp_send_reschedule(vcpu->cpu);
6962 }
6963
6964 trace_kvm_entry(vcpu->vcpu_id);
6965 wait_lapic_expire(vcpu);
6966 guest_enter_irqoff();
6967
6968 if (unlikely(vcpu->arch.switch_db_regs)) {
6969 set_debugreg(0, 7);
6970 set_debugreg(vcpu->arch.eff_db[0], 0);
6971 set_debugreg(vcpu->arch.eff_db[1], 1);
6972 set_debugreg(vcpu->arch.eff_db[2], 2);
6973 set_debugreg(vcpu->arch.eff_db[3], 3);
6974 set_debugreg(vcpu->arch.dr6, 6);
6975 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
6976 }
6977
6978 kvm_x86_ops->run(vcpu);
6979
6980
6981
6982
6983
6984
6985
6986 if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) {
6987 WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
6988 kvm_x86_ops->sync_dirty_debug_regs(vcpu);
6989 kvm_update_dr0123(vcpu);
6990 kvm_update_dr6(vcpu);
6991 kvm_update_dr7(vcpu);
6992 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
6993 }
6994
6995
6996
6997
6998
6999
7000
7001
7002 if (hw_breakpoint_active())
7003 hw_breakpoint_restore();
7004
7005 vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
7006
7007 vcpu->mode = OUTSIDE_GUEST_MODE;
7008 smp_wmb();
7009
7010 kvm_put_guest_xcr0(vcpu);
7011
7012 kvm_x86_ops->handle_external_intr(vcpu);
7013
7014 ++vcpu->stat.exits;
7015
7016 guest_exit_irqoff();
7017
7018 local_irq_enable();
7019 preempt_enable();
7020
7021 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
7022
7023
7024
7025
7026 if (unlikely(prof_on == KVM_PROFILING)) {
7027 unsigned long rip = kvm_rip_read(vcpu);
7028 profile_hit(KVM_PROFILING, (void *)rip);
7029 }
7030
7031 if (unlikely(vcpu->arch.tsc_always_catchup))
7032 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
7033
7034 if (vcpu->arch.apic_attention)
7035 kvm_lapic_sync_from_vapic(vcpu);
7036
7037 vcpu->arch.gpa_available = false;
7038 r = kvm_x86_ops->handle_exit(vcpu);
7039 return r;
7040
7041cancel_injection:
7042 kvm_x86_ops->cancel_injection(vcpu);
7043 if (unlikely(vcpu->arch.apic_attention))
7044 kvm_lapic_sync_from_vapic(vcpu);
7045out:
7046 return r;
7047}
7048
7049static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
7050{
7051 if (!kvm_arch_vcpu_runnable(vcpu) &&
7052 (!kvm_x86_ops->pre_block || kvm_x86_ops->pre_block(vcpu) == 0)) {
7053 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
7054 kvm_vcpu_block(vcpu);
7055 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
7056
7057 if (kvm_x86_ops->post_block)
7058 kvm_x86_ops->post_block(vcpu);
7059
7060 if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
7061 return 1;
7062 }
7063
7064 kvm_apic_accept_events(vcpu);
7065 switch(vcpu->arch.mp_state) {
7066 case KVM_MP_STATE_HALTED:
7067 vcpu->arch.pv.pv_unhalted = false;
7068 vcpu->arch.mp_state =
7069 KVM_MP_STATE_RUNNABLE;
7070 case KVM_MP_STATE_RUNNABLE:
7071 vcpu->arch.apf.halted = false;
7072 break;
7073 case KVM_MP_STATE_INIT_RECEIVED:
7074 break;
7075 default:
7076 return -EINTR;
7077 break;
7078 }
7079 return 1;
7080}
7081
7082static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
7083{
7084 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
7085 kvm_x86_ops->check_nested_events(vcpu, false);
7086
7087 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
7088 !vcpu->arch.apf.halted);
7089}
7090
7091static int vcpu_run(struct kvm_vcpu *vcpu)
7092{
7093 int r;
7094 struct kvm *kvm = vcpu->kvm;
7095
7096 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
7097
7098 for (;;) {
7099 if (kvm_vcpu_running(vcpu)) {
7100 r = vcpu_enter_guest(vcpu);
7101 } else {
7102 r = vcpu_block(kvm, vcpu);
7103 }
7104
7105 if (r <= 0)
7106 break;
7107
7108 kvm_clear_request(KVM_REQ_PENDING_TIMER, vcpu);
7109 if (kvm_cpu_has_pending_timer(vcpu))
7110 kvm_inject_pending_timer_irqs(vcpu);
7111
7112 if (dm_request_for_irq_injection(vcpu) &&
7113 kvm_vcpu_ready_for_interrupt_injection(vcpu)) {
7114 r = 0;
7115 vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
7116 ++vcpu->stat.request_irq_exits;
7117 break;
7118 }
7119
7120 kvm_check_async_pf_completion(vcpu);
7121
7122 if (signal_pending(current)) {
7123 r = -EINTR;
7124 vcpu->run->exit_reason = KVM_EXIT_INTR;
7125 ++vcpu->stat.signal_exits;
7126 break;
7127 }
7128 if (need_resched()) {
7129 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
7130 cond_resched();
7131 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
7132 }
7133 }
7134
7135 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
7136
7137 return r;
7138}
7139
7140static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
7141{
7142 int r;
7143 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
7144 r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
7145 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
7146 if (r != EMULATE_DONE)
7147 return 0;
7148 return 1;
7149}
7150
7151static int complete_emulated_pio(struct kvm_vcpu *vcpu)
7152{
7153 BUG_ON(!vcpu->arch.pio.count);
7154
7155 return complete_emulated_io(vcpu);
7156}
7157
7158
7159
7160
7161
7162
7163
7164
7165
7166
7167
7168
7169
7170
7171
7172
7173
7174
7175
7176static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
7177{
7178 struct kvm_run *run = vcpu->run;
7179 struct kvm_mmio_fragment *frag;
7180 unsigned len;
7181
7182 BUG_ON(!vcpu->mmio_needed);
7183
7184
7185 frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
7186 len = min(8u, frag->len);
7187 if (!vcpu->mmio_is_write)
7188 memcpy(frag->data, run->mmio.data, len);
7189
7190 if (frag->len <= 8) {
7191
7192 frag++;
7193 vcpu->mmio_cur_fragment++;
7194 } else {
7195
7196 frag->data += len;
7197 frag->gpa += len;
7198 frag->len -= len;
7199 }
7200
7201 if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) {
7202 vcpu->mmio_needed = 0;
7203
7204
7205 if (vcpu->mmio_is_write)
7206 return 1;
7207 vcpu->mmio_read_completed = 1;
7208 return complete_emulated_io(vcpu);
7209 }
7210
7211 run->exit_reason = KVM_EXIT_MMIO;
7212 run->mmio.phys_addr = frag->gpa;
7213 if (vcpu->mmio_is_write)
7214 memcpy(run->mmio.data, frag->data, min(8u, frag->len));
7215 run->mmio.len = min(8u, frag->len);
7216 run->mmio.is_write = vcpu->mmio_is_write;
7217 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
7218 return 0;
7219}
7220
7221
7222int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
7223{
7224 struct fpu *fpu = ¤t->thread.fpu;
7225 int r;
7226 sigset_t sigsaved;
7227
7228 fpu__initialize(fpu);
7229
7230 if (vcpu->sigset_active)
7231 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
7232
7233 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
7234 if (kvm_run->immediate_exit) {
7235 r = -EINTR;
7236 goto out;
7237 }
7238 kvm_vcpu_block(vcpu);
7239 kvm_apic_accept_events(vcpu);
7240 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
7241 r = -EAGAIN;
7242 if (signal_pending(current)) {
7243 r = -EINTR;
7244 vcpu->run->exit_reason = KVM_EXIT_INTR;
7245 ++vcpu->stat.signal_exits;
7246 }
7247 goto out;
7248 }
7249
7250
7251 if (!lapic_in_kernel(vcpu)) {
7252 if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
7253 r = -EINVAL;
7254 goto out;
7255 }
7256 }
7257
7258 if (unlikely(vcpu->arch.complete_userspace_io)) {
7259 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
7260 vcpu->arch.complete_userspace_io = NULL;
7261 r = cui(vcpu);
7262 if (r <= 0)
7263 goto out;
7264 } else
7265 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
7266
7267 if (kvm_run->immediate_exit)
7268 r = -EINTR;
7269 else
7270 r = vcpu_run(vcpu);
7271
7272out:
7273 post_kvm_run_save(vcpu);
7274 if (vcpu->sigset_active)
7275 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
7276
7277 return r;
7278}
7279
7280int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
7281{
7282 if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
7283
7284
7285
7286
7287
7288
7289
7290 emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt);
7291 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
7292 }
7293 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
7294 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
7295 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
7296 regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
7297 regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
7298 regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
7299 regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
7300 regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP);
7301#ifdef CONFIG_X86_64
7302 regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8);
7303 regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9);
7304 regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10);
7305 regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11);
7306 regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12);
7307 regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13);
7308 regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14);
7309 regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15);
7310#endif
7311
7312 regs->rip = kvm_rip_read(vcpu);
7313 regs->rflags = kvm_get_rflags(vcpu);
7314
7315 return 0;
7316}
7317
7318int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
7319{
7320 vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
7321 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
7322
7323 kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
7324 kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
7325 kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
7326 kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx);
7327 kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi);
7328 kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi);
7329 kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp);
7330 kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp);
7331#ifdef CONFIG_X86_64
7332 kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8);
7333 kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9);
7334 kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10);
7335 kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11);
7336 kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12);
7337 kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
7338 kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
7339 kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
7340#endif
7341
7342 kvm_rip_write(vcpu, regs->rip);
7343 kvm_set_rflags(vcpu, regs->rflags);
7344
7345 vcpu->arch.exception.pending = false;
7346
7347 kvm_make_request(KVM_REQ_EVENT, vcpu);
7348
7349 return 0;
7350}
7351
7352void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
7353{
7354 struct kvm_segment cs;
7355
7356 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
7357 *db = cs.db;
7358 *l = cs.l;
7359}
7360EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
7361
7362int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
7363 struct kvm_sregs *sregs)
7364{
7365 struct desc_ptr dt;
7366
7367 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
7368 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
7369 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
7370 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
7371 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
7372 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
7373
7374 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
7375 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
7376
7377 kvm_x86_ops->get_idt(vcpu, &dt);
7378 sregs->idt.limit = dt.size;
7379 sregs->idt.base = dt.address;
7380 kvm_x86_ops->get_gdt(vcpu, &dt);
7381 sregs->gdt.limit = dt.size;
7382 sregs->gdt.base = dt.address;
7383
7384 sregs->cr0 = kvm_read_cr0(vcpu);
7385 sregs->cr2 = vcpu->arch.cr2;
7386 sregs->cr3 = kvm_read_cr3(vcpu);
7387 sregs->cr4 = kvm_read_cr4(vcpu);
7388 sregs->cr8 = kvm_get_cr8(vcpu);
7389 sregs->efer = vcpu->arch.efer;
7390 sregs->apic_base = kvm_get_apic_base(vcpu);
7391
7392 memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
7393
7394 if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft)
7395 set_bit(vcpu->arch.interrupt.nr,
7396 (unsigned long *)sregs->interrupt_bitmap);
7397
7398 return 0;
7399}
7400
7401int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
7402 struct kvm_mp_state *mp_state)
7403{
7404 kvm_apic_accept_events(vcpu);
7405 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
7406 vcpu->arch.pv.pv_unhalted)
7407 mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
7408 else
7409 mp_state->mp_state = vcpu->arch.mp_state;
7410
7411 return 0;
7412}
7413
7414int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
7415 struct kvm_mp_state *mp_state)
7416{
7417 if (!lapic_in_kernel(vcpu) &&
7418 mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
7419 return -EINVAL;
7420
7421
7422 if ((is_smm(vcpu) || vcpu->arch.smi_pending) &&
7423 (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
7424 mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
7425 return -EINVAL;
7426
7427 if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
7428 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
7429 set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
7430 } else
7431 vcpu->arch.mp_state = mp_state->mp_state;
7432 kvm_make_request(KVM_REQ_EVENT, vcpu);
7433 return 0;
7434}
7435
7436int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
7437 int reason, bool has_error_code, u32 error_code)
7438{
7439 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
7440 int ret;
7441
7442 init_emulate_ctxt(vcpu);
7443
7444 ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
7445 has_error_code, error_code);
7446
7447 if (ret)
7448 return EMULATE_FAIL;
7449
7450 kvm_rip_write(vcpu, ctxt->eip);
7451 kvm_set_rflags(vcpu, ctxt->eflags);
7452 kvm_make_request(KVM_REQ_EVENT, vcpu);
7453 return EMULATE_DONE;
7454}
7455EXPORT_SYMBOL_GPL(kvm_task_switch);
7456
7457int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
7458 struct kvm_sregs *sregs)
7459{
7460 struct msr_data apic_base_msr;
7461 int mmu_reset_needed = 0;
7462 int pending_vec, max_bits, idx;
7463 struct desc_ptr dt;
7464
7465 if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
7466 (sregs->cr4 & X86_CR4_OSXSAVE))
7467 return -EINVAL;
7468
7469 apic_base_msr.data = sregs->apic_base;
7470 apic_base_msr.host_initiated = true;
7471 if (kvm_set_apic_base(vcpu, &apic_base_msr))
7472 return -EINVAL;
7473
7474 dt.size = sregs->idt.limit;
7475 dt.address = sregs->idt.base;
7476 kvm_x86_ops->set_idt(vcpu, &dt);
7477 dt.size = sregs->gdt.limit;
7478 dt.address = sregs->gdt.base;
7479 kvm_x86_ops->set_gdt(vcpu, &dt);
7480
7481 vcpu->arch.cr2 = sregs->cr2;
7482 mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
7483 vcpu->arch.cr3 = sregs->cr3;
7484 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
7485
7486 kvm_set_cr8(vcpu, sregs->cr8);
7487
7488 mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
7489 kvm_x86_ops->set_efer(vcpu, sregs->efer);
7490
7491 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
7492 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
7493 vcpu->arch.cr0 = sregs->cr0;
7494
7495 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
7496 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
7497 if (sregs->cr4 & (X86_CR4_OSXSAVE | X86_CR4_PKE))
7498 kvm_update_cpuid(vcpu);
7499
7500 idx = srcu_read_lock(&vcpu->kvm->srcu);
7501 if (!is_long_mode(vcpu) && is_pae(vcpu)) {
7502 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
7503 mmu_reset_needed = 1;
7504 }
7505 srcu_read_unlock(&vcpu->kvm->srcu, idx);
7506
7507 if (mmu_reset_needed)
7508 kvm_mmu_reset_context(vcpu);
7509
7510 max_bits = KVM_NR_INTERRUPTS;
7511 pending_vec = find_first_bit(
7512 (const unsigned long *)sregs->interrupt_bitmap, max_bits);
7513 if (pending_vec < max_bits) {
7514 kvm_queue_interrupt(vcpu, pending_vec, false);
7515 pr_debug("Set back pending irq %d\n", pending_vec);
7516 }
7517
7518 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
7519 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
7520 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
7521 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
7522 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
7523 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
7524
7525 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
7526 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
7527
7528 update_cr8_intercept(vcpu);
7529
7530
7531 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
7532 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
7533 !is_protmode(vcpu))
7534 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
7535
7536 kvm_make_request(KVM_REQ_EVENT, vcpu);
7537
7538 return 0;
7539}
7540
7541int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
7542 struct kvm_guest_debug *dbg)
7543{
7544 unsigned long rflags;
7545 int i, r;
7546
7547 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
7548 r = -EBUSY;
7549 if (vcpu->arch.exception.pending)
7550 goto out;
7551 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
7552 kvm_queue_exception(vcpu, DB_VECTOR);
7553 else
7554 kvm_queue_exception(vcpu, BP_VECTOR);
7555 }
7556
7557
7558
7559
7560
7561 rflags = kvm_get_rflags(vcpu);
7562
7563 vcpu->guest_debug = dbg->control;
7564 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
7565 vcpu->guest_debug = 0;
7566
7567 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
7568 for (i = 0; i < KVM_NR_DB_REGS; ++i)
7569 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
7570 vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7];
7571 } else {
7572 for (i = 0; i < KVM_NR_DB_REGS; i++)
7573 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
7574 }
7575 kvm_update_dr7(vcpu);
7576
7577 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
7578 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
7579 get_segment_base(vcpu, VCPU_SREG_CS);
7580
7581
7582
7583
7584
7585 kvm_set_rflags(vcpu, rflags);
7586
7587 kvm_x86_ops->update_bp_intercept(vcpu);
7588
7589 r = 0;
7590
7591out:
7592
7593 return r;
7594}
7595
7596
7597
7598
7599int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
7600 struct kvm_translation *tr)
7601{
7602 unsigned long vaddr = tr->linear_address;
7603 gpa_t gpa;
7604 int idx;
7605
7606 idx = srcu_read_lock(&vcpu->kvm->srcu);
7607 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
7608 srcu_read_unlock(&vcpu->kvm->srcu, idx);
7609 tr->physical_address = gpa;
7610 tr->valid = gpa != UNMAPPED_GVA;
7611 tr->writeable = 1;
7612 tr->usermode = 0;
7613
7614 return 0;
7615}
7616
7617int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
7618{
7619 struct fxregs_state *fxsave =
7620 &vcpu->arch.guest_fpu.state.fxsave;
7621
7622 memcpy(fpu->fpr, fxsave->st_space, 128);
7623 fpu->fcw = fxsave->cwd;
7624 fpu->fsw = fxsave->swd;
7625 fpu->ftwx = fxsave->twd;
7626 fpu->last_opcode = fxsave->fop;
7627 fpu->last_ip = fxsave->rip;
7628 fpu->last_dp = fxsave->rdp;
7629 memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
7630
7631 return 0;
7632}
7633
7634int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
7635{
7636 struct fxregs_state *fxsave =
7637 &vcpu->arch.guest_fpu.state.fxsave;
7638
7639 memcpy(fxsave->st_space, fpu->fpr, 128);
7640 fxsave->cwd = fpu->fcw;
7641 fxsave->swd = fpu->fsw;
7642 fxsave->twd = fpu->ftwx;
7643 fxsave->fop = fpu->last_opcode;
7644 fxsave->rip = fpu->last_ip;
7645 fxsave->rdp = fpu->last_dp;
7646 memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
7647
7648 return 0;
7649}
7650
7651static void fx_init(struct kvm_vcpu *vcpu)
7652{
7653 fpstate_init(&vcpu->arch.guest_fpu.state);
7654 if (boot_cpu_has(X86_FEATURE_XSAVES))
7655 vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv =
7656 host_xcr0 | XSTATE_COMPACTION_ENABLED;
7657
7658
7659
7660
7661 vcpu->arch.xcr0 = XFEATURE_MASK_FP;
7662
7663 vcpu->arch.cr0 |= X86_CR0_ET;
7664}
7665
7666void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
7667{
7668 if (vcpu->guest_fpu_loaded)
7669 return;
7670
7671
7672
7673
7674
7675
7676 vcpu->guest_fpu_loaded = 1;
7677 __kernel_fpu_begin();
7678
7679 __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
7680 ~XFEATURE_MASK_PKRU);
7681 trace_kvm_fpu(1);
7682}
7683
7684void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
7685{
7686 if (!vcpu->guest_fpu_loaded)
7687 return;
7688
7689 vcpu->guest_fpu_loaded = 0;
7690 copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
7691 __kernel_fpu_end();
7692 ++vcpu->stat.fpu_reload;
7693 trace_kvm_fpu(0);
7694}
7695
7696void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
7697{
7698 void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask;
7699
7700 kvmclock_reset(vcpu);
7701
7702 kvm_x86_ops->vcpu_free(vcpu);
7703 free_cpumask_var(wbinvd_dirty_mask);
7704}
7705
7706struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
7707 unsigned int id)
7708{
7709 struct kvm_vcpu *vcpu;
7710
7711 if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
7712 printk_once(KERN_WARNING
7713 "kvm: SMP vm created on host with unstable TSC; "
7714 "guest TSC will not be reliable\n");
7715
7716 vcpu = kvm_x86_ops->vcpu_create(kvm, id);
7717
7718 return vcpu;
7719}
7720
7721int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
7722{
7723 int r;
7724
7725 kvm_vcpu_mtrr_init(vcpu);
7726 r = vcpu_load(vcpu);
7727 if (r)
7728 return r;
7729 kvm_vcpu_reset(vcpu, false);
7730 kvm_mmu_setup(vcpu);
7731 vcpu_put(vcpu);
7732 return r;
7733}
7734
7735void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
7736{
7737 struct msr_data msr;
7738 struct kvm *kvm = vcpu->kvm;
7739
7740 kvm_hv_vcpu_postcreate(vcpu);
7741
7742 if (vcpu_load(vcpu))
7743 return;
7744 msr.data = 0x0;
7745 msr.index = MSR_IA32_TSC;
7746 msr.host_initiated = true;
7747 kvm_write_tsc(vcpu, &msr);
7748 vcpu_put(vcpu);
7749
7750 if (!kvmclock_periodic_sync)
7751 return;
7752
7753 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
7754 KVMCLOCK_SYNC_PERIOD);
7755}
7756
7757void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
7758{
7759 int r;
7760 vcpu->arch.apf.msr_val = 0;
7761
7762 r = vcpu_load(vcpu);
7763 BUG_ON(r);
7764 kvm_mmu_unload(vcpu);
7765 vcpu_put(vcpu);
7766
7767 kvm_x86_ops->vcpu_free(vcpu);
7768}
7769
7770void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
7771{
7772 vcpu->arch.hflags = 0;
7773
7774 vcpu->arch.smi_pending = 0;
7775 atomic_set(&vcpu->arch.nmi_queued, 0);
7776 vcpu->arch.nmi_pending = 0;
7777 vcpu->arch.nmi_injected = false;
7778 kvm_clear_interrupt_queue(vcpu);
7779 kvm_clear_exception_queue(vcpu);
7780 vcpu->arch.exception.pending = false;
7781
7782 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
7783 kvm_update_dr0123(vcpu);
7784 vcpu->arch.dr6 = DR6_INIT;
7785 kvm_update_dr6(vcpu);
7786 vcpu->arch.dr7 = DR7_FIXED_1;
7787 kvm_update_dr7(vcpu);
7788
7789 vcpu->arch.cr2 = 0;
7790
7791 kvm_make_request(KVM_REQ_EVENT, vcpu);
7792 vcpu->arch.apf.msr_val = 0;
7793 vcpu->arch.st.msr_val = 0;
7794
7795 kvmclock_reset(vcpu);
7796
7797 kvm_clear_async_pf_completion_queue(vcpu);
7798 kvm_async_pf_hash_reset(vcpu);
7799 vcpu->arch.apf.halted = false;
7800
7801 if (!init_event) {
7802 kvm_pmu_reset(vcpu);
7803 vcpu->arch.smbase = 0x30000;
7804
7805 vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
7806 vcpu->arch.msr_misc_features_enables = 0;
7807 }
7808
7809 memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
7810 vcpu->arch.regs_avail = ~0;
7811 vcpu->arch.regs_dirty = ~0;
7812
7813 kvm_x86_ops->vcpu_reset(vcpu, init_event);
7814}
7815
7816void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
7817{
7818 struct kvm_segment cs;
7819
7820 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
7821 cs.selector = vector << 8;
7822 cs.base = vector << 12;
7823 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
7824 kvm_rip_write(vcpu, 0);
7825}
7826
7827int kvm_arch_hardware_enable(void)
7828{
7829 struct kvm *kvm;
7830 struct kvm_vcpu *vcpu;
7831 int i;
7832 int ret;
7833 u64 local_tsc;
7834 u64 max_tsc = 0;
7835 bool stable, backwards_tsc = false;
7836
7837 kvm_shared_msr_cpu_online();
7838 ret = kvm_x86_ops->hardware_enable();
7839 if (ret != 0)
7840 return ret;
7841
7842 local_tsc = rdtsc();
7843 stable = !check_tsc_unstable();
7844 list_for_each_entry(kvm, &vm_list, vm_list) {
7845 kvm_for_each_vcpu(i, vcpu, kvm) {
7846 if (!stable && vcpu->cpu == smp_processor_id())
7847 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
7848 if (stable && vcpu->arch.last_host_tsc > local_tsc) {
7849 backwards_tsc = true;
7850 if (vcpu->arch.last_host_tsc > max_tsc)
7851 max_tsc = vcpu->arch.last_host_tsc;
7852 }
7853 }
7854 }
7855
7856
7857
7858
7859
7860
7861
7862
7863
7864
7865
7866
7867
7868
7869
7870
7871
7872
7873
7874
7875
7876
7877
7878
7879
7880
7881
7882
7883
7884
7885
7886
7887
7888
7889
7890
7891
7892
7893
7894 if (backwards_tsc) {
7895 u64 delta_cyc = max_tsc - local_tsc;
7896 list_for_each_entry(kvm, &vm_list, vm_list) {
7897 kvm->arch.backwards_tsc_observed = true;
7898 kvm_for_each_vcpu(i, vcpu, kvm) {
7899 vcpu->arch.tsc_offset_adjustment += delta_cyc;
7900 vcpu->arch.last_host_tsc = local_tsc;
7901 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
7902 }
7903
7904
7905
7906
7907
7908
7909
7910 kvm->arch.last_tsc_nsec = 0;
7911 kvm->arch.last_tsc_write = 0;
7912 }
7913
7914 }
7915 return 0;
7916}
7917
7918void kvm_arch_hardware_disable(void)
7919{
7920 kvm_x86_ops->hardware_disable();
7921 drop_user_return_notifiers();
7922}
7923
7924int kvm_arch_hardware_setup(void)
7925{
7926 int r;
7927
7928 r = kvm_x86_ops->hardware_setup();
7929 if (r != 0)
7930 return r;
7931
7932 if (kvm_has_tsc_control) {
7933
7934
7935
7936
7937
7938
7939 u64 max = min(0x7fffffffULL,
7940 __scale_tsc(kvm_max_tsc_scaling_ratio, tsc_khz));
7941 kvm_max_guest_tsc_khz = max;
7942
7943 kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits;
7944 }
7945
7946 kvm_init_msr_list();
7947 return 0;
7948}
7949
7950void kvm_arch_hardware_unsetup(void)
7951{
7952 kvm_x86_ops->hardware_unsetup();
7953}
7954
7955void kvm_arch_check_processor_compat(void *rtn)
7956{
7957 kvm_x86_ops->check_processor_compatibility(rtn);
7958}
7959
7960bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
7961{
7962 return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
7963}
7964EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp);
7965
7966bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
7967{
7968 return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
7969}
7970
7971struct static_key kvm_no_apic_vcpu __read_mostly;
7972EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu);
7973
7974int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
7975{
7976 struct page *page;
7977 struct kvm *kvm;
7978 int r;
7979
7980 BUG_ON(vcpu->kvm == NULL);
7981 kvm = vcpu->kvm;
7982
7983 vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
7984 vcpu->arch.pv.pv_unhalted = false;
7985 vcpu->arch.emulate_ctxt.ops = &emulate_ops;
7986 if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu))
7987 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
7988 else
7989 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
7990
7991 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
7992 if (!page) {
7993 r = -ENOMEM;
7994 goto fail;
7995 }
7996 vcpu->arch.pio_data = page_address(page);
7997
7998 kvm_set_tsc_khz(vcpu, max_tsc_khz);
7999
8000 r = kvm_mmu_create(vcpu);
8001 if (r < 0)
8002 goto fail_free_pio_data;
8003
8004 if (irqchip_in_kernel(kvm)) {
8005 r = kvm_create_lapic(vcpu);
8006 if (r < 0)
8007 goto fail_mmu_destroy;
8008 } else
8009 static_key_slow_inc(&kvm_no_apic_vcpu);
8010
8011 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
8012 GFP_KERNEL);
8013 if (!vcpu->arch.mce_banks) {
8014 r = -ENOMEM;
8015 goto fail_free_lapic;
8016 }
8017 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
8018
8019 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) {
8020 r = -ENOMEM;
8021 goto fail_free_mce_banks;
8022 }
8023
8024 fx_init(vcpu);
8025
8026 vcpu->arch.ia32_tsc_adjust_msr = 0x0;
8027 vcpu->arch.pv_time_enabled = false;
8028
8029 vcpu->arch.guest_supported_xcr0 = 0;
8030 vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
8031
8032 vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
8033
8034 vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
8035
8036 kvm_async_pf_hash_reset(vcpu);
8037 kvm_pmu_init(vcpu);
8038
8039 vcpu->arch.pending_external_vector = -1;
8040 vcpu->arch.preempted_in_kernel = false;
8041
8042 kvm_hv_vcpu_init(vcpu);
8043
8044 return 0;
8045
8046fail_free_mce_banks:
8047 kfree(vcpu->arch.mce_banks);
8048fail_free_lapic:
8049 kvm_free_lapic(vcpu);
8050fail_mmu_destroy:
8051 kvm_mmu_destroy(vcpu);
8052fail_free_pio_data:
8053 free_page((unsigned long)vcpu->arch.pio_data);
8054fail:
8055 return r;
8056}
8057
8058void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
8059{
8060 int idx;
8061
8062 kvm_hv_vcpu_uninit(vcpu);
8063 kvm_pmu_destroy(vcpu);
8064 kfree(vcpu->arch.mce_banks);
8065 kvm_free_lapic(vcpu);
8066 idx = srcu_read_lock(&vcpu->kvm->srcu);
8067 kvm_mmu_destroy(vcpu);
8068 srcu_read_unlock(&vcpu->kvm->srcu, idx);
8069 free_page((unsigned long)vcpu->arch.pio_data);
8070 if (!lapic_in_kernel(vcpu))
8071 static_key_slow_dec(&kvm_no_apic_vcpu);
8072}
8073
8074void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
8075{
8076 kvm_x86_ops->sched_in(vcpu, cpu);
8077}
8078
8079int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
8080{
8081 if (type)
8082 return -EINVAL;
8083
8084 INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
8085 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
8086 INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
8087 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
8088 atomic_set(&kvm->arch.noncoherent_dma_count, 0);
8089
8090
8091 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
8092
8093 set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
8094 &kvm->arch.irq_sources_bitmap);
8095
8096 raw_spin_lock_init(&kvm->arch.tsc_write_lock);
8097 mutex_init(&kvm->arch.apic_map_lock);
8098 mutex_init(&kvm->arch.hyperv.hv_lock);
8099 spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
8100
8101 kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
8102 pvclock_update_vm_gtod_copy(kvm);
8103
8104 INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
8105 INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
8106
8107 kvm_page_track_init(kvm);
8108 kvm_mmu_init_vm(kvm);
8109
8110 if (kvm_x86_ops->vm_init)
8111 return kvm_x86_ops->vm_init(kvm);
8112
8113 return 0;
8114}
8115
8116static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
8117{
8118 int r;
8119 r = vcpu_load(vcpu);
8120 BUG_ON(r);
8121 kvm_mmu_unload(vcpu);
8122 vcpu_put(vcpu);
8123}
8124
8125static void kvm_free_vcpus(struct kvm *kvm)
8126{
8127 unsigned int i;
8128 struct kvm_vcpu *vcpu;
8129
8130
8131
8132
8133 kvm_for_each_vcpu(i, vcpu, kvm) {
8134 kvm_clear_async_pf_completion_queue(vcpu);
8135 kvm_unload_vcpu_mmu(vcpu);
8136 }
8137 kvm_for_each_vcpu(i, vcpu, kvm)
8138 kvm_arch_vcpu_free(vcpu);
8139
8140 mutex_lock(&kvm->lock);
8141 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
8142 kvm->vcpus[i] = NULL;
8143
8144 atomic_set(&kvm->online_vcpus, 0);
8145 mutex_unlock(&kvm->lock);
8146}
8147
8148void kvm_arch_sync_events(struct kvm *kvm)
8149{
8150 cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
8151 cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
8152 kvm_free_pit(kvm);
8153}
8154
8155int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
8156{
8157 int i, r;
8158 unsigned long hva;
8159 struct kvm_memslots *slots = kvm_memslots(kvm);
8160 struct kvm_memory_slot *slot, old;
8161
8162
8163 if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
8164 return -EINVAL;
8165
8166 slot = id_to_memslot(slots, id);
8167 if (size) {
8168 if (slot->npages)
8169 return -EEXIST;
8170
8171
8172
8173
8174
8175 hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE,
8176 MAP_SHARED | MAP_ANONYMOUS, 0);
8177 if (IS_ERR((void *)hva))
8178 return PTR_ERR((void *)hva);
8179 } else {
8180 if (!slot->npages)
8181 return 0;
8182
8183 hva = 0;
8184 }
8185
8186 old = *slot;
8187 for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
8188 struct kvm_userspace_memory_region m;
8189
8190 m.slot = id | (i << 16);
8191 m.flags = 0;
8192 m.guest_phys_addr = gpa;
8193 m.userspace_addr = hva;
8194 m.memory_size = size;
8195 r = __kvm_set_memory_region(kvm, &m);
8196 if (r < 0)
8197 return r;
8198 }
8199
8200 if (!size) {
8201 r = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
8202 WARN_ON(r < 0);
8203 }
8204
8205 return 0;
8206}
8207EXPORT_SYMBOL_GPL(__x86_set_memory_region);
8208
8209int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
8210{
8211 int r;
8212
8213 mutex_lock(&kvm->slots_lock);
8214 r = __x86_set_memory_region(kvm, id, gpa, size);
8215 mutex_unlock(&kvm->slots_lock);
8216
8217 return r;
8218}
8219EXPORT_SYMBOL_GPL(x86_set_memory_region);
8220
8221void kvm_arch_destroy_vm(struct kvm *kvm)
8222{
8223 if (current->mm == kvm->mm) {
8224
8225
8226
8227
8228
8229 x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 0, 0);
8230 x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, 0, 0);
8231 x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
8232 }
8233 if (kvm_x86_ops->vm_destroy)
8234 kvm_x86_ops->vm_destroy(kvm);
8235 kvm_pic_destroy(kvm);
8236 kvm_ioapic_destroy(kvm);
8237 kvm_free_vcpus(kvm);
8238 kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
8239 kvm_mmu_uninit_vm(kvm);
8240 kvm_page_track_cleanup(kvm);
8241}
8242
8243void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
8244 struct kvm_memory_slot *dont)
8245{
8246 int i;
8247
8248 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
8249 if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
8250 kvfree(free->arch.rmap[i]);
8251 free->arch.rmap[i] = NULL;
8252 }
8253 if (i == 0)
8254 continue;
8255
8256 if (!dont || free->arch.lpage_info[i - 1] !=
8257 dont->arch.lpage_info[i - 1]) {
8258 kvfree(free->arch.lpage_info[i - 1]);
8259 free->arch.lpage_info[i - 1] = NULL;
8260 }
8261 }
8262
8263 kvm_page_track_free_memslot(free, dont);
8264}
8265
8266int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
8267 unsigned long npages)
8268{
8269 int i;
8270
8271 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
8272 struct kvm_lpage_info *linfo;
8273 unsigned long ugfn;
8274 int lpages;
8275 int level = i + 1;
8276
8277 lpages = gfn_to_index(slot->base_gfn + npages - 1,
8278 slot->base_gfn, level) + 1;
8279
8280 slot->arch.rmap[i] =
8281 kvzalloc(lpages * sizeof(*slot->arch.rmap[i]), GFP_KERNEL);
8282 if (!slot->arch.rmap[i])
8283 goto out_free;
8284 if (i == 0)
8285 continue;
8286
8287 linfo = kvzalloc(lpages * sizeof(*linfo), GFP_KERNEL);
8288 if (!linfo)
8289 goto out_free;
8290
8291 slot->arch.lpage_info[i - 1] = linfo;
8292
8293 if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
8294 linfo[0].disallow_lpage = 1;
8295 if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
8296 linfo[lpages - 1].disallow_lpage = 1;
8297 ugfn = slot->userspace_addr >> PAGE_SHIFT;
8298
8299
8300
8301
8302
8303 if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
8304 !kvm_largepages_enabled()) {
8305 unsigned long j;
8306
8307 for (j = 0; j < lpages; ++j)
8308 linfo[j].disallow_lpage = 1;
8309 }
8310 }
8311
8312 if (kvm_page_track_create_memslot(slot, npages))
8313 goto out_free;
8314
8315 return 0;
8316
8317out_free:
8318 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
8319 kvfree(slot->arch.rmap[i]);
8320 slot->arch.rmap[i] = NULL;
8321 if (i == 0)
8322 continue;
8323
8324 kvfree(slot->arch.lpage_info[i - 1]);
8325 slot->arch.lpage_info[i - 1] = NULL;
8326 }
8327 return -ENOMEM;
8328}
8329
8330void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots)
8331{
8332
8333
8334
8335
8336 kvm_mmu_invalidate_mmio_sptes(kvm, slots);
8337}
8338
8339int kvm_arch_prepare_memory_region(struct kvm *kvm,
8340 struct kvm_memory_slot *memslot,
8341 const struct kvm_userspace_memory_region *mem,
8342 enum kvm_mr_change change)
8343{
8344 return 0;
8345}
8346
8347static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
8348 struct kvm_memory_slot *new)
8349{
8350
8351 if (new->flags & KVM_MEM_READONLY) {
8352 kvm_mmu_slot_remove_write_access(kvm, new);
8353 return;
8354 }
8355
8356
8357
8358
8359
8360
8361
8362
8363
8364
8365
8366
8367
8368
8369
8370
8371
8372
8373
8374
8375
8376
8377
8378
8379
8380
8381
8382
8383
8384
8385
8386 if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
8387 if (kvm_x86_ops->slot_enable_log_dirty)
8388 kvm_x86_ops->slot_enable_log_dirty(kvm, new);
8389 else
8390 kvm_mmu_slot_remove_write_access(kvm, new);
8391 } else {
8392 if (kvm_x86_ops->slot_disable_log_dirty)
8393 kvm_x86_ops->slot_disable_log_dirty(kvm, new);
8394 }
8395}
8396
8397void kvm_arch_commit_memory_region(struct kvm *kvm,
8398 const struct kvm_userspace_memory_region *mem,
8399 const struct kvm_memory_slot *old,
8400 const struct kvm_memory_slot *new,
8401 enum kvm_mr_change change)
8402{
8403 int nr_mmu_pages = 0;
8404
8405 if (!kvm->arch.n_requested_mmu_pages)
8406 nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
8407
8408 if (nr_mmu_pages)
8409 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
8410
8411
8412
8413
8414
8415
8416
8417
8418
8419
8420
8421
8422
8423 if ((change != KVM_MR_DELETE) &&
8424 (old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
8425 !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
8426 kvm_mmu_zap_collapsible_sptes(kvm, new);
8427
8428
8429
8430
8431
8432
8433
8434
8435
8436
8437
8438 if (change != KVM_MR_DELETE)
8439 kvm_mmu_slot_apply_flags(kvm, (struct kvm_memory_slot *) new);
8440}
8441
8442void kvm_arch_flush_shadow_all(struct kvm *kvm)
8443{
8444 kvm_mmu_invalidate_zap_all_pages(kvm);
8445}
8446
8447void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
8448 struct kvm_memory_slot *slot)
8449{
8450 kvm_page_track_flush_slot(kvm, slot);
8451}
8452
8453static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
8454{
8455 if (!list_empty_careful(&vcpu->async_pf.done))
8456 return true;
8457
8458 if (kvm_apic_has_events(vcpu))
8459 return true;
8460
8461 if (vcpu->arch.pv.pv_unhalted)
8462 return true;
8463
8464 if (vcpu->arch.exception.pending)
8465 return true;
8466
8467 if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
8468 (vcpu->arch.nmi_pending &&
8469 kvm_x86_ops->nmi_allowed(vcpu)))
8470 return true;
8471
8472 if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
8473 (vcpu->arch.smi_pending && !is_smm(vcpu)))
8474 return true;
8475
8476 if (kvm_arch_interrupt_allowed(vcpu) &&
8477 kvm_cpu_has_interrupt(vcpu))
8478 return true;
8479
8480 if (kvm_hv_has_stimer_pending(vcpu))
8481 return true;
8482
8483 return false;
8484}
8485
8486int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
8487{
8488 return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
8489}
8490
8491bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
8492{
8493 return vcpu->arch.preempted_in_kernel;
8494}
8495
8496int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
8497{
8498 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
8499}
8500
8501int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
8502{
8503 return kvm_x86_ops->interrupt_allowed(vcpu);
8504}
8505
8506unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
8507{
8508 if (is_64_bit_mode(vcpu))
8509 return kvm_rip_read(vcpu);
8510 return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) +
8511 kvm_rip_read(vcpu));
8512}
8513EXPORT_SYMBOL_GPL(kvm_get_linear_rip);
8514
8515bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
8516{
8517 return kvm_get_linear_rip(vcpu) == linear_rip;
8518}
8519EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
8520
8521unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
8522{
8523 unsigned long rflags;
8524
8525 rflags = kvm_x86_ops->get_rflags(vcpu);
8526 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
8527 rflags &= ~X86_EFLAGS_TF;
8528 return rflags;
8529}
8530EXPORT_SYMBOL_GPL(kvm_get_rflags);
8531
8532static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
8533{
8534 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
8535 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
8536 rflags |= X86_EFLAGS_TF;
8537 kvm_x86_ops->set_rflags(vcpu, rflags);
8538}
8539
8540void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
8541{
8542 __kvm_set_rflags(vcpu, rflags);
8543 kvm_make_request(KVM_REQ_EVENT, vcpu);
8544}
8545EXPORT_SYMBOL_GPL(kvm_set_rflags);
8546
8547void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
8548{
8549 int r;
8550
8551 if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
8552 work->wakeup_all)
8553 return;
8554
8555 r = kvm_mmu_reload(vcpu);
8556 if (unlikely(r))
8557 return;
8558
8559 if (!vcpu->arch.mmu.direct_map &&
8560 work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu))
8561 return;
8562
8563 vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true);
8564}
8565
8566static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
8567{
8568 return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
8569}
8570
8571static inline u32 kvm_async_pf_next_probe(u32 key)
8572{
8573 return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
8574}
8575
8576static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
8577{
8578 u32 key = kvm_async_pf_hash_fn(gfn);
8579
8580 while (vcpu->arch.apf.gfns[key] != ~0)
8581 key = kvm_async_pf_next_probe(key);
8582
8583 vcpu->arch.apf.gfns[key] = gfn;
8584}
8585
8586static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
8587{
8588 int i;
8589 u32 key = kvm_async_pf_hash_fn(gfn);
8590
8591 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
8592 (vcpu->arch.apf.gfns[key] != gfn &&
8593 vcpu->arch.apf.gfns[key] != ~0); i++)
8594 key = kvm_async_pf_next_probe(key);
8595
8596 return key;
8597}
8598
8599bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
8600{
8601 return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
8602}
8603
8604static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
8605{
8606 u32 i, j, k;
8607
8608 i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
8609 while (true) {
8610 vcpu->arch.apf.gfns[i] = ~0;
8611 do {
8612 j = kvm_async_pf_next_probe(j);
8613 if (vcpu->arch.apf.gfns[j] == ~0)
8614 return;
8615 k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
8616
8617
8618
8619
8620
8621 } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
8622 vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
8623 i = j;
8624 }
8625}
8626
8627static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
8628{
8629
8630 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
8631 sizeof(val));
8632}
8633
8634static int apf_get_user(struct kvm_vcpu *vcpu, u32 *val)
8635{
8636
8637 return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, val,
8638 sizeof(u32));
8639}
8640
8641void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
8642 struct kvm_async_pf *work)
8643{
8644 struct x86_exception fault;
8645
8646 trace_kvm_async_pf_not_present(work->arch.token, work->gva);
8647 kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
8648
8649 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
8650 (vcpu->arch.apf.send_user_only &&
8651 kvm_x86_ops->get_cpl(vcpu) == 0))
8652 kvm_make_request(KVM_REQ_APF_HALT, vcpu);
8653 else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
8654 fault.vector = PF_VECTOR;
8655 fault.error_code_valid = true;
8656 fault.error_code = 0;
8657 fault.nested_page_fault = false;
8658 fault.address = work->arch.token;
8659 fault.async_page_fault = true;
8660 kvm_inject_page_fault(vcpu, &fault);
8661 }
8662}
8663
8664void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
8665 struct kvm_async_pf *work)
8666{
8667 struct x86_exception fault;
8668 u32 val;
8669
8670 if (work->wakeup_all)
8671 work->arch.token = ~0;
8672 else
8673 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
8674 trace_kvm_async_pf_ready(work->arch.token, work->gva);
8675
8676 if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED &&
8677 !apf_get_user(vcpu, &val)) {
8678 if (val == KVM_PV_REASON_PAGE_NOT_PRESENT &&
8679 vcpu->arch.exception.pending &&
8680 vcpu->arch.exception.nr == PF_VECTOR &&
8681 !apf_put_user(vcpu, 0)) {
8682 vcpu->arch.exception.injected = false;
8683 vcpu->arch.exception.pending = false;
8684 vcpu->arch.exception.nr = 0;
8685 vcpu->arch.exception.has_error_code = false;
8686 vcpu->arch.exception.error_code = 0;
8687 } else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
8688 fault.vector = PF_VECTOR;
8689 fault.error_code_valid = true;
8690 fault.error_code = 0;
8691 fault.nested_page_fault = false;
8692 fault.address = work->arch.token;
8693 fault.async_page_fault = true;
8694 kvm_inject_page_fault(vcpu, &fault);
8695 }
8696 }
8697 vcpu->arch.apf.halted = false;
8698 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
8699}
8700
8701bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
8702{
8703 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
8704 return true;
8705 else
8706 return kvm_can_do_async_pf(vcpu);
8707}
8708
8709void kvm_arch_start_assignment(struct kvm *kvm)
8710{
8711 atomic_inc(&kvm->arch.assigned_device_count);
8712}
8713EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);
8714
8715void kvm_arch_end_assignment(struct kvm *kvm)
8716{
8717 atomic_dec(&kvm->arch.assigned_device_count);
8718}
8719EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
8720
8721bool kvm_arch_has_assigned_device(struct kvm *kvm)
8722{
8723 return atomic_read(&kvm->arch.assigned_device_count);
8724}
8725EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
8726
8727void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
8728{
8729 atomic_inc(&kvm->arch.noncoherent_dma_count);
8730}
8731EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
8732
8733void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
8734{
8735 atomic_dec(&kvm->arch.noncoherent_dma_count);
8736}
8737EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
8738
8739bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
8740{
8741 return atomic_read(&kvm->arch.noncoherent_dma_count);
8742}
8743EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
8744
8745bool kvm_arch_has_irq_bypass(void)
8746{
8747 return kvm_x86_ops->update_pi_irte != NULL;
8748}
8749
8750int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
8751 struct irq_bypass_producer *prod)
8752{
8753 struct kvm_kernel_irqfd *irqfd =
8754 container_of(cons, struct kvm_kernel_irqfd, consumer);
8755
8756 irqfd->producer = prod;
8757
8758 return kvm_x86_ops->update_pi_irte(irqfd->kvm,
8759 prod->irq, irqfd->gsi, 1);
8760}
8761
8762void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
8763 struct irq_bypass_producer *prod)
8764{
8765 int ret;
8766 struct kvm_kernel_irqfd *irqfd =
8767 container_of(cons, struct kvm_kernel_irqfd, consumer);
8768
8769 WARN_ON(irqfd->producer != prod);
8770 irqfd->producer = NULL;
8771
8772
8773
8774
8775
8776
8777
8778 ret = kvm_x86_ops->update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 0);
8779 if (ret)
8780 printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
8781 " fails: %d\n", irqfd->consumer.token, ret);
8782}
8783
8784int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
8785 uint32_t guest_irq, bool set)
8786{
8787 if (!kvm_x86_ops->update_pi_irte)
8788 return -EINVAL;
8789
8790 return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
8791}
8792
8793bool kvm_vector_hashing_enabled(void)
8794{
8795 return vector_hashing;
8796}
8797EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled);
8798
8799EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
8800EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
8801EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
8802EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
8803EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
8804EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
8805EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
8806EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
8807EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
8808EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
8809EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
8810EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
8811EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
8812EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
8813EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
8814EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
8815EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
8816EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
8817EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
8818