1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/kvm_host.h>
20#include "irq.h"
21#include "mmu.h"
22#include "i8254.h"
23#include "tss.h"
24#include "kvm_cache_regs.h"
25#include "x86.h"
26#include "cpuid.h"
27#include "pmu.h"
28#include "hyperv.h"
29
30#include <linux/clocksource.h>
31#include <linux/interrupt.h>
32#include <linux/kvm.h>
33#include <linux/fs.h>
34#include <linux/vmalloc.h>
35#include <linux/export.h>
36#include <linux/moduleparam.h>
37#include <linux/mman.h>
38#include <linux/highmem.h>
39#include <linux/iommu.h>
40#include <linux/intel-iommu.h>
41#include <linux/cpufreq.h>
42#include <linux/user-return-notifier.h>
43#include <linux/srcu.h>
44#include <linux/slab.h>
45#include <linux/perf_event.h>
46#include <linux/uaccess.h>
47#include <linux/hash.h>
48#include <linux/pci.h>
49#include <linux/timekeeper_internal.h>
50#include <linux/pvclock_gtod.h>
51#include <linux/kvm_irqfd.h>
52#include <linux/irqbypass.h>
53#include <linux/sched/stat.h>
54#include <linux/sched/isolation.h>
55#include <linux/mem_encrypt.h>
56
57#include <trace/events/kvm.h>
58
59#include <asm/debugreg.h>
60#include <asm/msr.h>
61#include <asm/desc.h>
62#include <asm/mce.h>
63#include <linux/kernel_stat.h>
64#include <asm/fpu/internal.h>
65#include <asm/pvclock.h>
66#include <asm/div64.h>
67#include <asm/irq_remapping.h>
68#include <asm/mshyperv.h>
69#include <asm/hypervisor.h>
70#include <asm/intel_pt.h>
71#include <asm/emulate_prefix.h>
72#include <clocksource/hyperv_timer.h>
73
74#define CREATE_TRACE_POINTS
75#include "trace.h"
76
77#define MAX_IO_MSRS 256
78#define KVM_MAX_MCE_BANKS 32
79u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
80EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
81
82#define emul_to_vcpu(ctxt) \
83 container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
84
85
86
87
88
89#ifdef CONFIG_X86_64
90static
91u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
92#else
93static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
94#endif
95
96#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__
97#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__
98
99#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
100 KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
101
102static void update_cr8_intercept(struct kvm_vcpu *vcpu);
103static void process_nmi(struct kvm_vcpu *vcpu);
104static void enter_smm(struct kvm_vcpu *vcpu);
105static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
106static void store_regs(struct kvm_vcpu *vcpu);
107static int sync_regs(struct kvm_vcpu *vcpu);
108
109struct kvm_x86_ops *kvm_x86_ops __read_mostly;
110EXPORT_SYMBOL_GPL(kvm_x86_ops);
111
112static bool __read_mostly ignore_msrs = 0;
113module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
114
115static bool __read_mostly report_ignored_msrs = true;
116module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR);
117
118unsigned int min_timer_period_us = 200;
119module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
120
121static bool __read_mostly kvmclock_periodic_sync = true;
122module_param(kvmclock_periodic_sync, bool, S_IRUGO);
123
124bool __read_mostly kvm_has_tsc_control;
125EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
126u32 __read_mostly kvm_max_guest_tsc_khz;
127EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
128u8 __read_mostly kvm_tsc_scaling_ratio_frac_bits;
129EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits);
130u64 __read_mostly kvm_max_tsc_scaling_ratio;
131EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio);
132u64 __read_mostly kvm_default_tsc_scaling_ratio;
133EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
134
135
136static u32 __read_mostly tsc_tolerance_ppm = 250;
137module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
138
139
140
141
142
143
144
145static int __read_mostly lapic_timer_advance_ns = -1;
146module_param(lapic_timer_advance_ns, int, S_IRUGO | S_IWUSR);
147
148static bool __read_mostly vector_hashing = true;
149module_param(vector_hashing, bool, S_IRUGO);
150
151bool __read_mostly enable_vmware_backdoor = false;
152module_param(enable_vmware_backdoor, bool, S_IRUGO);
153EXPORT_SYMBOL_GPL(enable_vmware_backdoor);
154
155static bool __read_mostly force_emulation_prefix = false;
156module_param(force_emulation_prefix, bool, S_IRUGO);
157
158int __read_mostly pi_inject_timer = -1;
159module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
160
161#define KVM_NR_SHARED_MSRS 16
162
163struct kvm_shared_msrs_global {
164 int nr;
165 u32 msrs[KVM_NR_SHARED_MSRS];
166};
167
168struct kvm_shared_msrs {
169 struct user_return_notifier urn;
170 bool registered;
171 struct kvm_shared_msr_values {
172 u64 host;
173 u64 curr;
174 } values[KVM_NR_SHARED_MSRS];
175};
176
177static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
178static struct kvm_shared_msrs __percpu *shared_msrs;
179
180static u64 __read_mostly host_xss;
181
182struct kvm_stats_debugfs_item debugfs_entries[] = {
183 { "pf_fixed", VCPU_STAT(pf_fixed) },
184 { "pf_guest", VCPU_STAT(pf_guest) },
185 { "tlb_flush", VCPU_STAT(tlb_flush) },
186 { "invlpg", VCPU_STAT(invlpg) },
187 { "exits", VCPU_STAT(exits) },
188 { "io_exits", VCPU_STAT(io_exits) },
189 { "mmio_exits", VCPU_STAT(mmio_exits) },
190 { "signal_exits", VCPU_STAT(signal_exits) },
191 { "irq_window", VCPU_STAT(irq_window_exits) },
192 { "nmi_window", VCPU_STAT(nmi_window_exits) },
193 { "halt_exits", VCPU_STAT(halt_exits) },
194 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
195 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
196 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
197 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
198 { "hypercalls", VCPU_STAT(hypercalls) },
199 { "request_irq", VCPU_STAT(request_irq_exits) },
200 { "irq_exits", VCPU_STAT(irq_exits) },
201 { "host_state_reload", VCPU_STAT(host_state_reload) },
202 { "fpu_reload", VCPU_STAT(fpu_reload) },
203 { "insn_emulation", VCPU_STAT(insn_emulation) },
204 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
205 { "irq_injections", VCPU_STAT(irq_injections) },
206 { "nmi_injections", VCPU_STAT(nmi_injections) },
207 { "req_event", VCPU_STAT(req_event) },
208 { "l1d_flush", VCPU_STAT(l1d_flush) },
209 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
210 { "mmu_pte_write", VM_STAT(mmu_pte_write) },
211 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
212 { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) },
213 { "mmu_flooded", VM_STAT(mmu_flooded) },
214 { "mmu_recycled", VM_STAT(mmu_recycled) },
215 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
216 { "mmu_unsync", VM_STAT(mmu_unsync) },
217 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
218 { "largepages", VM_STAT(lpages, .mode = 0444) },
219 { "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) },
220 { "max_mmu_page_hash_collisions",
221 VM_STAT(max_mmu_page_hash_collisions) },
222 { NULL }
223};
224
225u64 __read_mostly host_xcr0;
226
227struct kmem_cache *x86_fpu_cache;
228EXPORT_SYMBOL_GPL(x86_fpu_cache);
229
230static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
231
232static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
233{
234 int i;
235 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++)
236 vcpu->arch.apf.gfns[i] = ~0;
237}
238
239static void kvm_on_user_return(struct user_return_notifier *urn)
240{
241 unsigned slot;
242 struct kvm_shared_msrs *locals
243 = container_of(urn, struct kvm_shared_msrs, urn);
244 struct kvm_shared_msr_values *values;
245 unsigned long flags;
246
247
248
249
250
251 local_irq_save(flags);
252 if (locals->registered) {
253 locals->registered = false;
254 user_return_notifier_unregister(urn);
255 }
256 local_irq_restore(flags);
257 for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
258 values = &locals->values[slot];
259 if (values->host != values->curr) {
260 wrmsrl(shared_msrs_global.msrs[slot], values->host);
261 values->curr = values->host;
262 }
263 }
264}
265
266void kvm_define_shared_msr(unsigned slot, u32 msr)
267{
268 BUG_ON(slot >= KVM_NR_SHARED_MSRS);
269 shared_msrs_global.msrs[slot] = msr;
270 if (slot >= shared_msrs_global.nr)
271 shared_msrs_global.nr = slot + 1;
272}
273EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
274
275static void kvm_shared_msr_cpu_online(void)
276{
277 unsigned int cpu = smp_processor_id();
278 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
279 u64 value;
280 int i;
281
282 for (i = 0; i < shared_msrs_global.nr; ++i) {
283 rdmsrl_safe(shared_msrs_global.msrs[i], &value);
284 smsr->values[i].host = value;
285 smsr->values[i].curr = value;
286 }
287}
288
289int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
290{
291 unsigned int cpu = smp_processor_id();
292 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
293 int err;
294
295 value = (value & mask) | (smsr->values[slot].host & ~mask);
296 if (value == smsr->values[slot].curr)
297 return 0;
298 err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
299 if (err)
300 return 1;
301
302 smsr->values[slot].curr = value;
303 if (!smsr->registered) {
304 smsr->urn.on_user_return = kvm_on_user_return;
305 user_return_notifier_register(&smsr->urn);
306 smsr->registered = true;
307 }
308 return 0;
309}
310EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
311
312static void drop_user_return_notifiers(void)
313{
314 unsigned int cpu = smp_processor_id();
315 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
316
317 if (smsr->registered)
318 kvm_on_user_return(&smsr->urn);
319}
320
321u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
322{
323 return vcpu->arch.apic_base;
324}
325EXPORT_SYMBOL_GPL(kvm_get_apic_base);
326
327enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu)
328{
329 return kvm_apic_mode(kvm_get_apic_base(vcpu));
330}
331EXPORT_SYMBOL_GPL(kvm_get_apic_mode);
332
333int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
334{
335 enum lapic_mode old_mode = kvm_get_apic_mode(vcpu);
336 enum lapic_mode new_mode = kvm_apic_mode(msr_info->data);
337 u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | 0x2ff |
338 (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
339
340 if ((msr_info->data & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID)
341 return 1;
342 if (!msr_info->host_initiated) {
343 if (old_mode == LAPIC_MODE_X2APIC && new_mode == LAPIC_MODE_XAPIC)
344 return 1;
345 if (old_mode == LAPIC_MODE_DISABLED && new_mode == LAPIC_MODE_X2APIC)
346 return 1;
347 }
348
349 kvm_lapic_set_base(vcpu, msr_info->data);
350 return 0;
351}
352EXPORT_SYMBOL_GPL(kvm_set_apic_base);
353
354asmlinkage __visible void kvm_spurious_fault(void)
355{
356
357 BUG_ON(!kvm_rebooting);
358}
359EXPORT_SYMBOL_GPL(kvm_spurious_fault);
360
361#define EXCPT_BENIGN 0
362#define EXCPT_CONTRIBUTORY 1
363#define EXCPT_PF 2
364
365static int exception_class(int vector)
366{
367 switch (vector) {
368 case PF_VECTOR:
369 return EXCPT_PF;
370 case DE_VECTOR:
371 case TS_VECTOR:
372 case NP_VECTOR:
373 case SS_VECTOR:
374 case GP_VECTOR:
375 return EXCPT_CONTRIBUTORY;
376 default:
377 break;
378 }
379 return EXCPT_BENIGN;
380}
381
382#define EXCPT_FAULT 0
383#define EXCPT_TRAP 1
384#define EXCPT_ABORT 2
385#define EXCPT_INTERRUPT 3
386
387static int exception_type(int vector)
388{
389 unsigned int mask;
390
391 if (WARN_ON(vector > 31 || vector == NMI_VECTOR))
392 return EXCPT_INTERRUPT;
393
394 mask = 1 << vector;
395
396
397 if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR)))
398 return EXCPT_TRAP;
399
400 if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR)))
401 return EXCPT_ABORT;
402
403
404 return EXCPT_FAULT;
405}
406
407void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
408{
409 unsigned nr = vcpu->arch.exception.nr;
410 bool has_payload = vcpu->arch.exception.has_payload;
411 unsigned long payload = vcpu->arch.exception.payload;
412
413 if (!has_payload)
414 return;
415
416 switch (nr) {
417 case DB_VECTOR:
418
419
420
421
422
423 vcpu->arch.dr6 &= ~DR_TRAP_BITS;
424
425
426
427 vcpu->arch.dr6 |= DR6_RTM;
428 vcpu->arch.dr6 |= payload;
429
430
431
432
433
434
435
436
437 vcpu->arch.dr6 ^= payload & DR6_RTM;
438 break;
439 case PF_VECTOR:
440 vcpu->arch.cr2 = payload;
441 break;
442 }
443
444 vcpu->arch.exception.has_payload = false;
445 vcpu->arch.exception.payload = 0;
446}
447EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload);
448
449static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
450 unsigned nr, bool has_error, u32 error_code,
451 bool has_payload, unsigned long payload, bool reinject)
452{
453 u32 prev_nr;
454 int class1, class2;
455
456 kvm_make_request(KVM_REQ_EVENT, vcpu);
457
458 if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) {
459 queue:
460 if (has_error && !is_protmode(vcpu))
461 has_error = false;
462 if (reinject) {
463
464
465
466
467
468
469
470
471 WARN_ON_ONCE(vcpu->arch.exception.pending);
472 vcpu->arch.exception.injected = true;
473 if (WARN_ON_ONCE(has_payload)) {
474
475
476
477
478 has_payload = false;
479 payload = 0;
480 }
481 } else {
482 vcpu->arch.exception.pending = true;
483 vcpu->arch.exception.injected = false;
484 }
485 vcpu->arch.exception.has_error_code = has_error;
486 vcpu->arch.exception.nr = nr;
487 vcpu->arch.exception.error_code = error_code;
488 vcpu->arch.exception.has_payload = has_payload;
489 vcpu->arch.exception.payload = payload;
490
491
492
493
494
495
496
497
498
499
500
501 if (!vcpu->kvm->arch.exception_payload_enabled ||
502 !is_guest_mode(vcpu))
503 kvm_deliver_exception_payload(vcpu);
504 return;
505 }
506
507
508 prev_nr = vcpu->arch.exception.nr;
509 if (prev_nr == DF_VECTOR) {
510
511 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
512 return;
513 }
514 class1 = exception_class(prev_nr);
515 class2 = exception_class(nr);
516 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
517 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
518
519
520
521
522
523 vcpu->arch.exception.pending = true;
524 vcpu->arch.exception.injected = false;
525 vcpu->arch.exception.has_error_code = true;
526 vcpu->arch.exception.nr = DF_VECTOR;
527 vcpu->arch.exception.error_code = 0;
528 vcpu->arch.exception.has_payload = false;
529 vcpu->arch.exception.payload = 0;
530 } else
531
532
533
534 goto queue;
535}
536
537void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
538{
539 kvm_multiple_exception(vcpu, nr, false, 0, false, 0, false);
540}
541EXPORT_SYMBOL_GPL(kvm_queue_exception);
542
543void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
544{
545 kvm_multiple_exception(vcpu, nr, false, 0, false, 0, true);
546}
547EXPORT_SYMBOL_GPL(kvm_requeue_exception);
548
549static void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
550 unsigned long payload)
551{
552 kvm_multiple_exception(vcpu, nr, false, 0, true, payload, false);
553}
554
555static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr,
556 u32 error_code, unsigned long payload)
557{
558 kvm_multiple_exception(vcpu, nr, true, error_code,
559 true, payload, false);
560}
561
562int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
563{
564 if (err)
565 kvm_inject_gp(vcpu, 0);
566 else
567 return kvm_skip_emulated_instruction(vcpu);
568
569 return 1;
570}
571EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
572
573void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
574{
575 ++vcpu->stat.pf_guest;
576 vcpu->arch.exception.nested_apf =
577 is_guest_mode(vcpu) && fault->async_page_fault;
578 if (vcpu->arch.exception.nested_apf) {
579 vcpu->arch.apf.nested_apf_token = fault->address;
580 kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
581 } else {
582 kvm_queue_exception_e_p(vcpu, PF_VECTOR, fault->error_code,
583 fault->address);
584 }
585}
586EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
587
588static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
589{
590 if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
591 vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
592 else
593 vcpu->arch.mmu->inject_page_fault(vcpu, fault);
594
595 return fault->nested_page_fault;
596}
597
598void kvm_inject_nmi(struct kvm_vcpu *vcpu)
599{
600 atomic_inc(&vcpu->arch.nmi_queued);
601 kvm_make_request(KVM_REQ_NMI, vcpu);
602}
603EXPORT_SYMBOL_GPL(kvm_inject_nmi);
604
605void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
606{
607 kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, false);
608}
609EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
610
611void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
612{
613 kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, true);
614}
615EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
616
617
618
619
620
621bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
622{
623 if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl)
624 return true;
625 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
626 return false;
627}
628EXPORT_SYMBOL_GPL(kvm_require_cpl);
629
630bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
631{
632 if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE))
633 return true;
634
635 kvm_queue_exception(vcpu, UD_VECTOR);
636 return false;
637}
638EXPORT_SYMBOL_GPL(kvm_require_dr);
639
640
641
642
643
644
645int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
646 gfn_t ngfn, void *data, int offset, int len,
647 u32 access)
648{
649 struct x86_exception exception;
650 gfn_t real_gfn;
651 gpa_t ngpa;
652
653 ngpa = gfn_to_gpa(ngfn);
654 real_gfn = mmu->translate_gpa(vcpu, ngpa, access, &exception);
655 if (real_gfn == UNMAPPED_GVA)
656 return -EFAULT;
657
658 real_gfn = gpa_to_gfn(real_gfn);
659
660 return kvm_vcpu_read_guest_page(vcpu, real_gfn, data, offset, len);
661}
662EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
663
664static int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
665 void *data, int offset, int len, u32 access)
666{
667 return kvm_read_guest_page_mmu(vcpu, vcpu->arch.walk_mmu, gfn,
668 data, offset, len, access);
669}
670
671static inline u64 pdptr_rsvd_bits(struct kvm_vcpu *vcpu)
672{
673 return rsvd_bits(cpuid_maxphyaddr(vcpu), 63) | rsvd_bits(5, 8) |
674 rsvd_bits(1, 2);
675}
676
677
678
679
680int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
681{
682 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
683 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
684 int i;
685 int ret;
686 u64 pdpte[ARRAY_SIZE(mmu->pdptrs)];
687
688 ret = kvm_read_guest_page_mmu(vcpu, mmu, pdpt_gfn, pdpte,
689 offset * sizeof(u64), sizeof(pdpte),
690 PFERR_USER_MASK|PFERR_WRITE_MASK);
691 if (ret < 0) {
692 ret = 0;
693 goto out;
694 }
695 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
696 if ((pdpte[i] & PT_PRESENT_MASK) &&
697 (pdpte[i] & pdptr_rsvd_bits(vcpu))) {
698 ret = 0;
699 goto out;
700 }
701 }
702 ret = 1;
703
704 memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
705 kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
706
707out:
708
709 return ret;
710}
711EXPORT_SYMBOL_GPL(load_pdptrs);
712
713bool pdptrs_changed(struct kvm_vcpu *vcpu)
714{
715 u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
716 int offset;
717 gfn_t gfn;
718 int r;
719
720 if (!is_pae_paging(vcpu))
721 return false;
722
723 if (!kvm_register_is_available(vcpu, VCPU_EXREG_PDPTR))
724 return true;
725
726 gfn = (kvm_read_cr3(vcpu) & 0xffffffe0ul) >> PAGE_SHIFT;
727 offset = (kvm_read_cr3(vcpu) & 0xffffffe0ul) & (PAGE_SIZE - 1);
728 r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
729 PFERR_USER_MASK | PFERR_WRITE_MASK);
730 if (r < 0)
731 return true;
732
733 return memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
734}
735EXPORT_SYMBOL_GPL(pdptrs_changed);
736
737int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
738{
739 unsigned long old_cr0 = kvm_read_cr0(vcpu);
740 unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
741
742 cr0 |= X86_CR0_ET;
743
744#ifdef CONFIG_X86_64
745 if (cr0 & 0xffffffff00000000UL)
746 return 1;
747#endif
748
749 cr0 &= ~CR0_RESERVED_BITS;
750
751 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
752 return 1;
753
754 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
755 return 1;
756
757 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
758#ifdef CONFIG_X86_64
759 if ((vcpu->arch.efer & EFER_LME)) {
760 int cs_db, cs_l;
761
762 if (!is_pae(vcpu))
763 return 1;
764 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
765 if (cs_l)
766 return 1;
767 } else
768#endif
769 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
770 kvm_read_cr3(vcpu)))
771 return 1;
772 }
773
774 if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
775 return 1;
776
777 kvm_x86_ops->set_cr0(vcpu, cr0);
778
779 if ((cr0 ^ old_cr0) & X86_CR0_PG) {
780 kvm_clear_async_pf_completion_queue(vcpu);
781 kvm_async_pf_hash_reset(vcpu);
782 }
783
784 if ((cr0 ^ old_cr0) & update_bits)
785 kvm_mmu_reset_context(vcpu);
786
787 if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
788 kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
789 !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
790 kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
791
792 return 0;
793}
794EXPORT_SYMBOL_GPL(kvm_set_cr0);
795
796void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
797{
798 (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
799}
800EXPORT_SYMBOL_GPL(kvm_lmsw);
801
802void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
803{
804 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
805
806 if (vcpu->arch.xcr0 != host_xcr0)
807 xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
808
809 if (vcpu->arch.xsaves_enabled &&
810 vcpu->arch.ia32_xss != host_xss)
811 wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
812 }
813}
814EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state);
815
816void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
817{
818 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
819
820 if (vcpu->arch.xcr0 != host_xcr0)
821 xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
822
823 if (vcpu->arch.xsaves_enabled &&
824 vcpu->arch.ia32_xss != host_xss)
825 wrmsrl(MSR_IA32_XSS, host_xss);
826 }
827
828}
829EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state);
830
831static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
832{
833 u64 xcr0 = xcr;
834 u64 old_xcr0 = vcpu->arch.xcr0;
835 u64 valid_bits;
836
837
838 if (index != XCR_XFEATURE_ENABLED_MASK)
839 return 1;
840 if (!(xcr0 & XFEATURE_MASK_FP))
841 return 1;
842 if ((xcr0 & XFEATURE_MASK_YMM) && !(xcr0 & XFEATURE_MASK_SSE))
843 return 1;
844
845
846
847
848
849
850 valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP;
851 if (xcr0 & ~valid_bits)
852 return 1;
853
854 if ((!(xcr0 & XFEATURE_MASK_BNDREGS)) !=
855 (!(xcr0 & XFEATURE_MASK_BNDCSR)))
856 return 1;
857
858 if (xcr0 & XFEATURE_MASK_AVX512) {
859 if (!(xcr0 & XFEATURE_MASK_YMM))
860 return 1;
861 if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
862 return 1;
863 }
864 vcpu->arch.xcr0 = xcr0;
865
866 if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
867 kvm_update_cpuid(vcpu);
868 return 0;
869}
870
871int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
872{
873 if (kvm_x86_ops->get_cpl(vcpu) != 0 ||
874 __kvm_set_xcr(vcpu, index, xcr)) {
875 kvm_inject_gp(vcpu, 0);
876 return 1;
877 }
878 return 0;
879}
880EXPORT_SYMBOL_GPL(kvm_set_xcr);
881
882static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
883{
884 if (cr4 & CR4_RESERVED_BITS)
885 return -EINVAL;
886
887 if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE))
888 return -EINVAL;
889
890 if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP))
891 return -EINVAL;
892
893 if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP))
894 return -EINVAL;
895
896 if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE))
897 return -EINVAL;
898
899 if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE))
900 return -EINVAL;
901
902 if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57))
903 return -EINVAL;
904
905 if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP))
906 return -EINVAL;
907
908 return 0;
909}
910
911int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
912{
913 unsigned long old_cr4 = kvm_read_cr4(vcpu);
914 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
915 X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
916
917 if (kvm_valid_cr4(vcpu, cr4))
918 return 1;
919
920 if (is_long_mode(vcpu)) {
921 if (!(cr4 & X86_CR4_PAE))
922 return 1;
923 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
924 && ((cr4 ^ old_cr4) & pdptr_bits)
925 && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
926 kvm_read_cr3(vcpu)))
927 return 1;
928
929 if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
930 if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID))
931 return 1;
932
933
934 if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
935 return 1;
936 }
937
938 if (kvm_x86_ops->set_cr4(vcpu, cr4))
939 return 1;
940
941 if (((cr4 ^ old_cr4) & pdptr_bits) ||
942 (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
943 kvm_mmu_reset_context(vcpu);
944
945 if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
946 kvm_update_cpuid(vcpu);
947
948 return 0;
949}
950EXPORT_SYMBOL_GPL(kvm_set_cr4);
951
952int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
953{
954 bool skip_tlb_flush = false;
955#ifdef CONFIG_X86_64
956 bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
957
958 if (pcid_enabled) {
959 skip_tlb_flush = cr3 & X86_CR3_PCID_NOFLUSH;
960 cr3 &= ~X86_CR3_PCID_NOFLUSH;
961 }
962#endif
963
964 if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
965 if (!skip_tlb_flush) {
966 kvm_mmu_sync_roots(vcpu);
967 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
968 }
969 return 0;
970 }
971
972 if (is_long_mode(vcpu) &&
973 (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63)))
974 return 1;
975 else if (is_pae_paging(vcpu) &&
976 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
977 return 1;
978
979 kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush);
980 vcpu->arch.cr3 = cr3;
981 kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
982
983 return 0;
984}
985EXPORT_SYMBOL_GPL(kvm_set_cr3);
986
987int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
988{
989 if (cr8 & CR8_RESERVED_BITS)
990 return 1;
991 if (lapic_in_kernel(vcpu))
992 kvm_lapic_set_tpr(vcpu, cr8);
993 else
994 vcpu->arch.cr8 = cr8;
995 return 0;
996}
997EXPORT_SYMBOL_GPL(kvm_set_cr8);
998
999unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
1000{
1001 if (lapic_in_kernel(vcpu))
1002 return kvm_lapic_get_cr8(vcpu);
1003 else
1004 return vcpu->arch.cr8;
1005}
1006EXPORT_SYMBOL_GPL(kvm_get_cr8);
1007
1008static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
1009{
1010 int i;
1011
1012 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
1013 for (i = 0; i < KVM_NR_DB_REGS; i++)
1014 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
1015 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD;
1016 }
1017}
1018
1019static void kvm_update_dr6(struct kvm_vcpu *vcpu)
1020{
1021 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
1022 kvm_x86_ops->set_dr6(vcpu, vcpu->arch.dr6);
1023}
1024
1025static void kvm_update_dr7(struct kvm_vcpu *vcpu)
1026{
1027 unsigned long dr7;
1028
1029 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1030 dr7 = vcpu->arch.guest_debug_dr7;
1031 else
1032 dr7 = vcpu->arch.dr7;
1033 kvm_x86_ops->set_dr7(vcpu, dr7);
1034 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_BP_ENABLED;
1035 if (dr7 & DR7_BP_EN_MASK)
1036 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
1037}
1038
1039static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
1040{
1041 u64 fixed = DR6_FIXED_1;
1042
1043 if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM))
1044 fixed |= DR6_RTM;
1045 return fixed;
1046}
1047
1048static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
1049{
1050 switch (dr) {
1051 case 0 ... 3:
1052 vcpu->arch.db[dr] = val;
1053 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
1054 vcpu->arch.eff_db[dr] = val;
1055 break;
1056 case 4:
1057
1058 case 6:
1059 if (val & 0xffffffff00000000ULL)
1060 return -1;
1061 vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu);
1062 kvm_update_dr6(vcpu);
1063 break;
1064 case 5:
1065
1066 default:
1067 if (val & 0xffffffff00000000ULL)
1068 return -1;
1069 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
1070 kvm_update_dr7(vcpu);
1071 break;
1072 }
1073
1074 return 0;
1075}
1076
1077int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
1078{
1079 if (__kvm_set_dr(vcpu, dr, val)) {
1080 kvm_inject_gp(vcpu, 0);
1081 return 1;
1082 }
1083 return 0;
1084}
1085EXPORT_SYMBOL_GPL(kvm_set_dr);
1086
1087int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
1088{
1089 switch (dr) {
1090 case 0 ... 3:
1091 *val = vcpu->arch.db[dr];
1092 break;
1093 case 4:
1094
1095 case 6:
1096 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1097 *val = vcpu->arch.dr6;
1098 else
1099 *val = kvm_x86_ops->get_dr6(vcpu);
1100 break;
1101 case 5:
1102
1103 default:
1104 *val = vcpu->arch.dr7;
1105 break;
1106 }
1107 return 0;
1108}
1109EXPORT_SYMBOL_GPL(kvm_get_dr);
1110
1111bool kvm_rdpmc(struct kvm_vcpu *vcpu)
1112{
1113 u32 ecx = kvm_rcx_read(vcpu);
1114 u64 data;
1115 int err;
1116
1117 err = kvm_pmu_rdpmc(vcpu, ecx, &data);
1118 if (err)
1119 return err;
1120 kvm_rax_write(vcpu, (u32)data);
1121 kvm_rdx_write(vcpu, data >> 32);
1122 return err;
1123}
1124EXPORT_SYMBOL_GPL(kvm_rdpmc);
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138static const u32 msrs_to_save_all[] = {
1139 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
1140 MSR_STAR,
1141#ifdef CONFIG_X86_64
1142 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
1143#endif
1144 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
1145 MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
1146 MSR_IA32_SPEC_CTRL,
1147 MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH,
1148 MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK,
1149 MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B,
1150 MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
1151 MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
1152 MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
1153 MSR_IA32_UMWAIT_CONTROL,
1154
1155 MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
1156 MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_ARCH_PERFMON_FIXED_CTR0 + 3,
1157 MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
1158 MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
1159 MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1,
1160 MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3,
1161 MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5,
1162 MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7,
1163 MSR_ARCH_PERFMON_PERFCTR0 + 8, MSR_ARCH_PERFMON_PERFCTR0 + 9,
1164 MSR_ARCH_PERFMON_PERFCTR0 + 10, MSR_ARCH_PERFMON_PERFCTR0 + 11,
1165 MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13,
1166 MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15,
1167 MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17,
1168 MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1,
1169 MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3,
1170 MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5,
1171 MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7,
1172 MSR_ARCH_PERFMON_EVENTSEL0 + 8, MSR_ARCH_PERFMON_EVENTSEL0 + 9,
1173 MSR_ARCH_PERFMON_EVENTSEL0 + 10, MSR_ARCH_PERFMON_EVENTSEL0 + 11,
1174 MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13,
1175 MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15,
1176 MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
1177};
1178
1179static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)];
1180static unsigned num_msrs_to_save;
1181
1182static const u32 emulated_msrs_all[] = {
1183 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
1184 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
1185 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
1186 HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
1187 HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY,
1188 HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
1189 HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
1190 HV_X64_MSR_RESET,
1191 HV_X64_MSR_VP_INDEX,
1192 HV_X64_MSR_VP_RUNTIME,
1193 HV_X64_MSR_SCONTROL,
1194 HV_X64_MSR_STIMER0_CONFIG,
1195 HV_X64_MSR_VP_ASSIST_PAGE,
1196 HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL,
1197 HV_X64_MSR_TSC_EMULATION_STATUS,
1198
1199 MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
1200 MSR_KVM_PV_EOI_EN,
1201
1202 MSR_IA32_TSC_ADJUST,
1203 MSR_IA32_TSCDEADLINE,
1204 MSR_IA32_ARCH_CAPABILITIES,
1205 MSR_IA32_MISC_ENABLE,
1206 MSR_IA32_MCG_STATUS,
1207 MSR_IA32_MCG_CTL,
1208 MSR_IA32_MCG_EXT_CTL,
1209 MSR_IA32_SMBASE,
1210 MSR_SMI_COUNT,
1211 MSR_PLATFORM_INFO,
1212 MSR_MISC_FEATURES_ENABLES,
1213 MSR_AMD64_VIRT_SPEC_CTRL,
1214 MSR_IA32_POWER_CTL,
1215
1216
1217
1218
1219
1220
1221
1222
1223 MSR_IA32_VMX_BASIC,
1224 MSR_IA32_VMX_TRUE_PINBASED_CTLS,
1225 MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
1226 MSR_IA32_VMX_TRUE_EXIT_CTLS,
1227 MSR_IA32_VMX_TRUE_ENTRY_CTLS,
1228 MSR_IA32_VMX_MISC,
1229 MSR_IA32_VMX_CR0_FIXED0,
1230 MSR_IA32_VMX_CR4_FIXED0,
1231 MSR_IA32_VMX_VMCS_ENUM,
1232 MSR_IA32_VMX_PROCBASED_CTLS2,
1233 MSR_IA32_VMX_EPT_VPID_CAP,
1234 MSR_IA32_VMX_VMFUNC,
1235
1236 MSR_K7_HWCR,
1237 MSR_KVM_POLL_CONTROL,
1238};
1239
1240static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)];
1241static unsigned num_emulated_msrs;
1242
1243
1244
1245
1246
1247static const u32 msr_based_features_all[] = {
1248 MSR_IA32_VMX_BASIC,
1249 MSR_IA32_VMX_TRUE_PINBASED_CTLS,
1250 MSR_IA32_VMX_PINBASED_CTLS,
1251 MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
1252 MSR_IA32_VMX_PROCBASED_CTLS,
1253 MSR_IA32_VMX_TRUE_EXIT_CTLS,
1254 MSR_IA32_VMX_EXIT_CTLS,
1255 MSR_IA32_VMX_TRUE_ENTRY_CTLS,
1256 MSR_IA32_VMX_ENTRY_CTLS,
1257 MSR_IA32_VMX_MISC,
1258 MSR_IA32_VMX_CR0_FIXED0,
1259 MSR_IA32_VMX_CR0_FIXED1,
1260 MSR_IA32_VMX_CR4_FIXED0,
1261 MSR_IA32_VMX_CR4_FIXED1,
1262 MSR_IA32_VMX_VMCS_ENUM,
1263 MSR_IA32_VMX_PROCBASED_CTLS2,
1264 MSR_IA32_VMX_EPT_VPID_CAP,
1265 MSR_IA32_VMX_VMFUNC,
1266
1267 MSR_F10H_DECFG,
1268 MSR_IA32_UCODE_REV,
1269 MSR_IA32_ARCH_CAPABILITIES,
1270};
1271
1272static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
1273static unsigned int num_msr_based_features;
1274
1275static u64 kvm_get_arch_capabilities(void)
1276{
1277 u64 data = 0;
1278
1279 if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
1280 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
1281
1282
1283
1284
1285
1286
1287
1288 data |= ARCH_CAP_PSCHANGE_MC_NO;
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299 if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER)
1300 data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH;
1301
1302 if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
1303 data |= ARCH_CAP_RDCL_NO;
1304 if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
1305 data |= ARCH_CAP_SSB_NO;
1306 if (!boot_cpu_has_bug(X86_BUG_MDS))
1307 data |= ARCH_CAP_MDS_NO;
1308
1309
1310
1311
1312
1313
1314
1315 if (!boot_cpu_has(X86_FEATURE_RTM))
1316 data &= ~(ARCH_CAP_TAA_NO | ARCH_CAP_TSX_CTRL_MSR);
1317 else if (!boot_cpu_has_bug(X86_BUG_TAA))
1318 data |= ARCH_CAP_TAA_NO;
1319
1320 return data;
1321}
1322
1323static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
1324{
1325 switch (msr->index) {
1326 case MSR_IA32_ARCH_CAPABILITIES:
1327 msr->data = kvm_get_arch_capabilities();
1328 break;
1329 case MSR_IA32_UCODE_REV:
1330 rdmsrl_safe(msr->index, &msr->data);
1331 break;
1332 default:
1333 if (kvm_x86_ops->get_msr_feature(msr))
1334 return 1;
1335 }
1336 return 0;
1337}
1338
1339static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1340{
1341 struct kvm_msr_entry msr;
1342 int r;
1343
1344 msr.index = index;
1345 r = kvm_get_msr_feature(&msr);
1346 if (r)
1347 return r;
1348
1349 *data = msr.data;
1350
1351 return 0;
1352}
1353
1354static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
1355{
1356 if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT))
1357 return false;
1358
1359 if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM))
1360 return false;
1361
1362 if (efer & (EFER_LME | EFER_LMA) &&
1363 !guest_cpuid_has(vcpu, X86_FEATURE_LM))
1364 return false;
1365
1366 if (efer & EFER_NX && !guest_cpuid_has(vcpu, X86_FEATURE_NX))
1367 return false;
1368
1369 return true;
1370
1371}
1372bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
1373{
1374 if (efer & efer_reserved_bits)
1375 return false;
1376
1377 return __kvm_valid_efer(vcpu, efer);
1378}
1379EXPORT_SYMBOL_GPL(kvm_valid_efer);
1380
1381static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1382{
1383 u64 old_efer = vcpu->arch.efer;
1384 u64 efer = msr_info->data;
1385
1386 if (efer & efer_reserved_bits)
1387 return 1;
1388
1389 if (!msr_info->host_initiated) {
1390 if (!__kvm_valid_efer(vcpu, efer))
1391 return 1;
1392
1393 if (is_paging(vcpu) &&
1394 (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
1395 return 1;
1396 }
1397
1398 efer &= ~EFER_LMA;
1399 efer |= vcpu->arch.efer & EFER_LMA;
1400
1401 kvm_x86_ops->set_efer(vcpu, efer);
1402
1403
1404 if ((efer ^ old_efer) & EFER_NX)
1405 kvm_mmu_reset_context(vcpu);
1406
1407 return 0;
1408}
1409
1410void kvm_enable_efer_bits(u64 mask)
1411{
1412 efer_reserved_bits &= ~mask;
1413}
1414EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
1415
1416
1417
1418
1419
1420
1421
1422static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
1423 bool host_initiated)
1424{
1425 struct msr_data msr;
1426
1427 switch (index) {
1428 case MSR_FS_BASE:
1429 case MSR_GS_BASE:
1430 case MSR_KERNEL_GS_BASE:
1431 case MSR_CSTAR:
1432 case MSR_LSTAR:
1433 if (is_noncanonical_address(data, vcpu))
1434 return 1;
1435 break;
1436 case MSR_IA32_SYSENTER_EIP:
1437 case MSR_IA32_SYSENTER_ESP:
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450 data = get_canonical(data, vcpu_virt_addr_bits(vcpu));
1451 }
1452
1453 msr.data = data;
1454 msr.index = index;
1455 msr.host_initiated = host_initiated;
1456
1457 return kvm_x86_ops->set_msr(vcpu, &msr);
1458}
1459
1460
1461
1462
1463
1464
1465
1466int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
1467 bool host_initiated)
1468{
1469 struct msr_data msr;
1470 int ret;
1471
1472 msr.index = index;
1473 msr.host_initiated = host_initiated;
1474
1475 ret = kvm_x86_ops->get_msr(vcpu, &msr);
1476 if (!ret)
1477 *data = msr.data;
1478 return ret;
1479}
1480
1481int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
1482{
1483 return __kvm_get_msr(vcpu, index, data, false);
1484}
1485EXPORT_SYMBOL_GPL(kvm_get_msr);
1486
1487int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
1488{
1489 return __kvm_set_msr(vcpu, index, data, false);
1490}
1491EXPORT_SYMBOL_GPL(kvm_set_msr);
1492
1493int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
1494{
1495 u32 ecx = kvm_rcx_read(vcpu);
1496 u64 data;
1497
1498 if (kvm_get_msr(vcpu, ecx, &data)) {
1499 trace_kvm_msr_read_ex(ecx);
1500 kvm_inject_gp(vcpu, 0);
1501 return 1;
1502 }
1503
1504 trace_kvm_msr_read(ecx, data);
1505
1506 kvm_rax_write(vcpu, data & -1u);
1507 kvm_rdx_write(vcpu, (data >> 32) & -1u);
1508 return kvm_skip_emulated_instruction(vcpu);
1509}
1510EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr);
1511
1512int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
1513{
1514 u32 ecx = kvm_rcx_read(vcpu);
1515 u64 data = kvm_read_edx_eax(vcpu);
1516
1517 if (kvm_set_msr(vcpu, ecx, data)) {
1518 trace_kvm_msr_write_ex(ecx, data);
1519 kvm_inject_gp(vcpu, 0);
1520 return 1;
1521 }
1522
1523 trace_kvm_msr_write(ecx, data);
1524 return kvm_skip_emulated_instruction(vcpu);
1525}
1526EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
1527
1528
1529
1530
1531static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1532{
1533 return __kvm_get_msr(vcpu, index, data, true);
1534}
1535
1536static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1537{
1538 return __kvm_set_msr(vcpu, index, *data, true);
1539}
1540
1541#ifdef CONFIG_X86_64
1542struct pvclock_clock {
1543 int vclock_mode;
1544 u64 cycle_last;
1545 u64 mask;
1546 u32 mult;
1547 u32 shift;
1548};
1549
1550struct pvclock_gtod_data {
1551 seqcount_t seq;
1552
1553 struct pvclock_clock clock;
1554 struct pvclock_clock raw_clock;
1555
1556 u64 boot_ns_raw;
1557 u64 boot_ns;
1558 u64 nsec_base;
1559 u64 wall_time_sec;
1560 u64 monotonic_raw_nsec;
1561};
1562
1563static struct pvclock_gtod_data pvclock_gtod_data;
1564
1565static void update_pvclock_gtod(struct timekeeper *tk)
1566{
1567 struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
1568 u64 boot_ns, boot_ns_raw;
1569
1570 boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
1571 boot_ns_raw = ktime_to_ns(ktime_add(tk->tkr_raw.base, tk->offs_boot));
1572
1573 write_seqcount_begin(&vdata->seq);
1574
1575
1576 vdata->clock.vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
1577 vdata->clock.cycle_last = tk->tkr_mono.cycle_last;
1578 vdata->clock.mask = tk->tkr_mono.mask;
1579 vdata->clock.mult = tk->tkr_mono.mult;
1580 vdata->clock.shift = tk->tkr_mono.shift;
1581
1582 vdata->raw_clock.vclock_mode = tk->tkr_raw.clock->archdata.vclock_mode;
1583 vdata->raw_clock.cycle_last = tk->tkr_raw.cycle_last;
1584 vdata->raw_clock.mask = tk->tkr_raw.mask;
1585 vdata->raw_clock.mult = tk->tkr_raw.mult;
1586 vdata->raw_clock.shift = tk->tkr_raw.shift;
1587
1588 vdata->boot_ns = boot_ns;
1589 vdata->nsec_base = tk->tkr_mono.xtime_nsec;
1590
1591 vdata->wall_time_sec = tk->xtime_sec;
1592
1593 vdata->boot_ns_raw = boot_ns_raw;
1594 vdata->monotonic_raw_nsec = tk->tkr_raw.xtime_nsec;
1595
1596 write_seqcount_end(&vdata->seq);
1597}
1598#endif
1599
1600void kvm_set_pending_timer(struct kvm_vcpu *vcpu)
1601{
1602 kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
1603 kvm_vcpu_kick(vcpu);
1604}
1605
1606static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
1607{
1608 int version;
1609 int r;
1610 struct pvclock_wall_clock wc;
1611 struct timespec64 boot;
1612
1613 if (!wall_clock)
1614 return;
1615
1616 r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
1617 if (r)
1618 return;
1619
1620 if (version & 1)
1621 ++version;
1622
1623 ++version;
1624
1625 if (kvm_write_guest(kvm, wall_clock, &version, sizeof(version)))
1626 return;
1627
1628
1629
1630
1631
1632
1633
1634 getboottime64(&boot);
1635
1636 if (kvm->arch.kvmclock_offset) {
1637 struct timespec64 ts = ns_to_timespec64(kvm->arch.kvmclock_offset);
1638 boot = timespec64_sub(boot, ts);
1639 }
1640 wc.sec = (u32)boot.tv_sec;
1641 wc.nsec = boot.tv_nsec;
1642 wc.version = version;
1643
1644 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
1645
1646 version++;
1647 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
1648}
1649
1650static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
1651{
1652 do_shl32_div32(dividend, divisor);
1653 return dividend;
1654}
1655
1656static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz,
1657 s8 *pshift, u32 *pmultiplier)
1658{
1659 uint64_t scaled64;
1660 int32_t shift = 0;
1661 uint64_t tps64;
1662 uint32_t tps32;
1663
1664 tps64 = base_hz;
1665 scaled64 = scaled_hz;
1666 while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
1667 tps64 >>= 1;
1668 shift--;
1669 }
1670
1671 tps32 = (uint32_t)tps64;
1672 while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) {
1673 if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000)
1674 scaled64 >>= 1;
1675 else
1676 tps32 <<= 1;
1677 shift++;
1678 }
1679
1680 *pshift = shift;
1681 *pmultiplier = div_frac(scaled64, tps32);
1682}
1683
1684#ifdef CONFIG_X86_64
1685static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
1686#endif
1687
1688static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
1689static unsigned long max_tsc_khz;
1690
1691static u32 adjust_tsc_khz(u32 khz, s32 ppm)
1692{
1693 u64 v = (u64)khz * (1000000 + ppm);
1694 do_div(v, 1000000);
1695 return v;
1696}
1697
1698static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
1699{
1700 u64 ratio;
1701
1702
1703 if (!scale) {
1704 vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
1705 return 0;
1706 }
1707
1708
1709 if (!kvm_has_tsc_control) {
1710 if (user_tsc_khz > tsc_khz) {
1711 vcpu->arch.tsc_catchup = 1;
1712 vcpu->arch.tsc_always_catchup = 1;
1713 return 0;
1714 } else {
1715 pr_warn_ratelimited("user requested TSC rate below hardware speed\n");
1716 return -1;
1717 }
1718 }
1719
1720
1721 ratio = mul_u64_u32_div(1ULL << kvm_tsc_scaling_ratio_frac_bits,
1722 user_tsc_khz, tsc_khz);
1723
1724 if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) {
1725 pr_warn_ratelimited("Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
1726 user_tsc_khz);
1727 return -1;
1728 }
1729
1730 vcpu->arch.tsc_scaling_ratio = ratio;
1731 return 0;
1732}
1733
1734static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
1735{
1736 u32 thresh_lo, thresh_hi;
1737 int use_scaling = 0;
1738
1739
1740 if (user_tsc_khz == 0) {
1741
1742 vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
1743 return -1;
1744 }
1745
1746
1747 kvm_get_time_scale(user_tsc_khz * 1000LL, NSEC_PER_SEC,
1748 &vcpu->arch.virtual_tsc_shift,
1749 &vcpu->arch.virtual_tsc_mult);
1750 vcpu->arch.virtual_tsc_khz = user_tsc_khz;
1751
1752
1753
1754
1755
1756
1757
1758 thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
1759 thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
1760 if (user_tsc_khz < thresh_lo || user_tsc_khz > thresh_hi) {
1761 pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", user_tsc_khz, thresh_lo, thresh_hi);
1762 use_scaling = 1;
1763 }
1764 return set_tsc_khz(vcpu, user_tsc_khz, use_scaling);
1765}
1766
1767static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
1768{
1769 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec,
1770 vcpu->arch.virtual_tsc_mult,
1771 vcpu->arch.virtual_tsc_shift);
1772 tsc += vcpu->arch.this_tsc_write;
1773 return tsc;
1774}
1775
1776static inline int gtod_is_based_on_tsc(int mode)
1777{
1778 return mode == VCLOCK_TSC || mode == VCLOCK_HVCLOCK;
1779}
1780
1781static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
1782{
1783#ifdef CONFIG_X86_64
1784 bool vcpus_matched;
1785 struct kvm_arch *ka = &vcpu->kvm->arch;
1786 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1787
1788 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
1789 atomic_read(&vcpu->kvm->online_vcpus));
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799 if (ka->use_master_clock ||
1800 (gtod_is_based_on_tsc(gtod->clock.vclock_mode) && vcpus_matched))
1801 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
1802
1803 trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
1804 atomic_read(&vcpu->kvm->online_vcpus),
1805 ka->use_master_clock, gtod->clock.vclock_mode);
1806#endif
1807}
1808
1809static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
1810{
1811 u64 curr_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
1812 vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
1813}
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825static inline u64 __scale_tsc(u64 ratio, u64 tsc)
1826{
1827 return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits);
1828}
1829
1830u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
1831{
1832 u64 _tsc = tsc;
1833 u64 ratio = vcpu->arch.tsc_scaling_ratio;
1834
1835 if (ratio != kvm_default_tsc_scaling_ratio)
1836 _tsc = __scale_tsc(ratio, tsc);
1837
1838 return _tsc;
1839}
1840EXPORT_SYMBOL_GPL(kvm_scale_tsc);
1841
1842static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
1843{
1844 u64 tsc;
1845
1846 tsc = kvm_scale_tsc(vcpu, rdtsc());
1847
1848 return target_tsc - tsc;
1849}
1850
1851u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
1852{
1853 u64 tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
1854
1855 return tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
1856}
1857EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
1858
1859static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
1860{
1861 vcpu->arch.tsc_offset = kvm_x86_ops->write_l1_tsc_offset(vcpu, offset);
1862}
1863
1864static inline bool kvm_check_tsc_unstable(void)
1865{
1866#ifdef CONFIG_X86_64
1867
1868
1869
1870
1871 if (pvclock_gtod_data.clock.vclock_mode == VCLOCK_HVCLOCK)
1872 return false;
1873#endif
1874 return check_tsc_unstable();
1875}
1876
1877void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1878{
1879 struct kvm *kvm = vcpu->kvm;
1880 u64 offset, ns, elapsed;
1881 unsigned long flags;
1882 bool matched;
1883 bool already_matched;
1884 u64 data = msr->data;
1885 bool synchronizing = false;
1886
1887 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
1888 offset = kvm_compute_tsc_offset(vcpu, data);
1889 ns = ktime_get_boottime_ns();
1890 elapsed = ns - kvm->arch.last_tsc_nsec;
1891
1892 if (vcpu->arch.virtual_tsc_khz) {
1893 if (data == 0 && msr->host_initiated) {
1894
1895
1896
1897
1898
1899 synchronizing = true;
1900 } else {
1901 u64 tsc_exp = kvm->arch.last_tsc_write +
1902 nsec_to_cycles(vcpu, elapsed);
1903 u64 tsc_hz = vcpu->arch.virtual_tsc_khz * 1000LL;
1904
1905
1906
1907
1908
1909 synchronizing = data < tsc_exp + tsc_hz &&
1910 data + tsc_hz > tsc_exp;
1911 }
1912 }
1913
1914
1915
1916
1917
1918
1919
1920 if (synchronizing &&
1921 vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
1922 if (!kvm_check_tsc_unstable()) {
1923 offset = kvm->arch.cur_tsc_offset;
1924 } else {
1925 u64 delta = nsec_to_cycles(vcpu, elapsed);
1926 data += delta;
1927 offset = kvm_compute_tsc_offset(vcpu, data);
1928 }
1929 matched = true;
1930 already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation);
1931 } else {
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941 kvm->arch.cur_tsc_generation++;
1942 kvm->arch.cur_tsc_nsec = ns;
1943 kvm->arch.cur_tsc_write = data;
1944 kvm->arch.cur_tsc_offset = offset;
1945 matched = false;
1946 }
1947
1948
1949
1950
1951
1952 kvm->arch.last_tsc_nsec = ns;
1953 kvm->arch.last_tsc_write = data;
1954 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
1955
1956 vcpu->arch.last_guest_tsc = data;
1957
1958
1959 vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation;
1960 vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
1961 vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
1962
1963 if (!msr->host_initiated && guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST))
1964 update_ia32_tsc_adjust_msr(vcpu, offset);
1965
1966 kvm_vcpu_write_tsc_offset(vcpu, offset);
1967 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
1968
1969 spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
1970 if (!matched) {
1971 kvm->arch.nr_vcpus_matched_tsc = 0;
1972 } else if (!already_matched) {
1973 kvm->arch.nr_vcpus_matched_tsc++;
1974 }
1975
1976 kvm_track_tsc_matching(vcpu);
1977 spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
1978}
1979
1980EXPORT_SYMBOL_GPL(kvm_write_tsc);
1981
1982static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
1983 s64 adjustment)
1984{
1985 u64 tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
1986 kvm_vcpu_write_tsc_offset(vcpu, tsc_offset + adjustment);
1987}
1988
1989static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
1990{
1991 if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
1992 WARN_ON(adjustment < 0);
1993 adjustment = kvm_scale_tsc(vcpu, (u64) adjustment);
1994 adjust_tsc_offset_guest(vcpu, adjustment);
1995}
1996
1997#ifdef CONFIG_X86_64
1998
1999static u64 read_tsc(void)
2000{
2001 u64 ret = (u64)rdtsc_ordered();
2002 u64 last = pvclock_gtod_data.clock.cycle_last;
2003
2004 if (likely(ret >= last))
2005 return ret;
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015 asm volatile ("");
2016 return last;
2017}
2018
2019static inline u64 vgettsc(struct pvclock_clock *clock, u64 *tsc_timestamp,
2020 int *mode)
2021{
2022 long v;
2023 u64 tsc_pg_val;
2024
2025 switch (clock->vclock_mode) {
2026 case VCLOCK_HVCLOCK:
2027 tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(),
2028 tsc_timestamp);
2029 if (tsc_pg_val != U64_MAX) {
2030
2031 *mode = VCLOCK_HVCLOCK;
2032 v = (tsc_pg_val - clock->cycle_last) &
2033 clock->mask;
2034 } else {
2035
2036 *mode = VCLOCK_NONE;
2037 }
2038 break;
2039 case VCLOCK_TSC:
2040 *mode = VCLOCK_TSC;
2041 *tsc_timestamp = read_tsc();
2042 v = (*tsc_timestamp - clock->cycle_last) &
2043 clock->mask;
2044 break;
2045 default:
2046 *mode = VCLOCK_NONE;
2047 }
2048
2049 if (*mode == VCLOCK_NONE)
2050 *tsc_timestamp = v = 0;
2051
2052 return v * clock->mult;
2053}
2054
2055static int do_monotonic_raw(s64 *t, u64 *tsc_timestamp)
2056{
2057 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
2058 unsigned long seq;
2059 int mode;
2060 u64 ns;
2061
2062 do {
2063 seq = read_seqcount_begin(>od->seq);
2064 ns = gtod->monotonic_raw_nsec;
2065 ns += vgettsc(>od->raw_clock, tsc_timestamp, &mode);
2066 ns >>= gtod->clock.shift;
2067 ns += gtod->boot_ns_raw;
2068 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
2069 *t = ns;
2070
2071 return mode;
2072}
2073
2074static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp)
2075{
2076 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
2077 unsigned long seq;
2078 int mode;
2079 u64 ns;
2080
2081 do {
2082 seq = read_seqcount_begin(>od->seq);
2083 ts->tv_sec = gtod->wall_time_sec;
2084 ns = gtod->nsec_base;
2085 ns += vgettsc(>od->clock, tsc_timestamp, &mode);
2086 ns >>= gtod->clock.shift;
2087 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
2088
2089 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
2090 ts->tv_nsec = ns;
2091
2092 return mode;
2093}
2094
2095
2096static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp)
2097{
2098
2099 if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
2100 return false;
2101
2102 return gtod_is_based_on_tsc(do_monotonic_raw(kernel_ns,
2103 tsc_timestamp));
2104}
2105
2106
2107static bool kvm_get_walltime_and_clockread(struct timespec64 *ts,
2108 u64 *tsc_timestamp)
2109{
2110
2111 if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
2112 return false;
2113
2114 return gtod_is_based_on_tsc(do_realtime(ts, tsc_timestamp));
2115}
2116#endif
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
2160{
2161#ifdef CONFIG_X86_64
2162 struct kvm_arch *ka = &kvm->arch;
2163 int vclock_mode;
2164 bool host_tsc_clocksource, vcpus_matched;
2165
2166 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
2167 atomic_read(&kvm->online_vcpus));
2168
2169
2170
2171
2172
2173 host_tsc_clocksource = kvm_get_time_and_clockread(
2174 &ka->master_kernel_ns,
2175 &ka->master_cycle_now);
2176
2177 ka->use_master_clock = host_tsc_clocksource && vcpus_matched
2178 && !ka->backwards_tsc_observed
2179 && !ka->boot_vcpu_runs_old_kvmclock;
2180
2181 if (ka->use_master_clock)
2182 atomic_set(&kvm_guest_has_master_clock, 1);
2183
2184 vclock_mode = pvclock_gtod_data.clock.vclock_mode;
2185 trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
2186 vcpus_matched);
2187#endif
2188}
2189
2190void kvm_make_mclock_inprogress_request(struct kvm *kvm)
2191{
2192 kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
2193}
2194
2195static void kvm_gen_update_masterclock(struct kvm *kvm)
2196{
2197#ifdef CONFIG_X86_64
2198 int i;
2199 struct kvm_vcpu *vcpu;
2200 struct kvm_arch *ka = &kvm->arch;
2201
2202 spin_lock(&ka->pvclock_gtod_sync_lock);
2203 kvm_make_mclock_inprogress_request(kvm);
2204
2205 pvclock_update_vm_gtod_copy(kvm);
2206
2207 kvm_for_each_vcpu(i, vcpu, kvm)
2208 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2209
2210
2211 kvm_for_each_vcpu(i, vcpu, kvm)
2212 kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
2213
2214 spin_unlock(&ka->pvclock_gtod_sync_lock);
2215#endif
2216}
2217
2218u64 get_kvmclock_ns(struct kvm *kvm)
2219{
2220 struct kvm_arch *ka = &kvm->arch;
2221 struct pvclock_vcpu_time_info hv_clock;
2222 u64 ret;
2223
2224 spin_lock(&ka->pvclock_gtod_sync_lock);
2225 if (!ka->use_master_clock) {
2226 spin_unlock(&ka->pvclock_gtod_sync_lock);
2227 return ktime_get_boottime_ns() + ka->kvmclock_offset;
2228 }
2229
2230 hv_clock.tsc_timestamp = ka->master_cycle_now;
2231 hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
2232 spin_unlock(&ka->pvclock_gtod_sync_lock);
2233
2234
2235 get_cpu();
2236
2237 if (__this_cpu_read(cpu_tsc_khz)) {
2238 kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
2239 &hv_clock.tsc_shift,
2240 &hv_clock.tsc_to_system_mul);
2241 ret = __pvclock_read_cycles(&hv_clock, rdtsc());
2242 } else
2243 ret = ktime_get_boottime_ns() + ka->kvmclock_offset;
2244
2245 put_cpu();
2246
2247 return ret;
2248}
2249
2250static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
2251{
2252 struct kvm_vcpu_arch *vcpu = &v->arch;
2253 struct pvclock_vcpu_time_info guest_hv_clock;
2254
2255 if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
2256 &guest_hv_clock, sizeof(guest_hv_clock))))
2257 return;
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273 BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
2274
2275 if (guest_hv_clock.version & 1)
2276 ++guest_hv_clock.version;
2277
2278 vcpu->hv_clock.version = guest_hv_clock.version + 1;
2279 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
2280 &vcpu->hv_clock,
2281 sizeof(vcpu->hv_clock.version));
2282
2283 smp_wmb();
2284
2285
2286 vcpu->hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
2287
2288 if (vcpu->pvclock_set_guest_stopped_request) {
2289 vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
2290 vcpu->pvclock_set_guest_stopped_request = false;
2291 }
2292
2293 trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
2294
2295 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
2296 &vcpu->hv_clock,
2297 sizeof(vcpu->hv_clock));
2298
2299 smp_wmb();
2300
2301 vcpu->hv_clock.version++;
2302 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
2303 &vcpu->hv_clock,
2304 sizeof(vcpu->hv_clock.version));
2305}
2306
2307static int kvm_guest_time_update(struct kvm_vcpu *v)
2308{
2309 unsigned long flags, tgt_tsc_khz;
2310 struct kvm_vcpu_arch *vcpu = &v->arch;
2311 struct kvm_arch *ka = &v->kvm->arch;
2312 s64 kernel_ns;
2313 u64 tsc_timestamp, host_tsc;
2314 u8 pvclock_flags;
2315 bool use_master_clock;
2316
2317 kernel_ns = 0;
2318 host_tsc = 0;
2319
2320
2321
2322
2323
2324 spin_lock(&ka->pvclock_gtod_sync_lock);
2325 use_master_clock = ka->use_master_clock;
2326 if (use_master_clock) {
2327 host_tsc = ka->master_cycle_now;
2328 kernel_ns = ka->master_kernel_ns;
2329 }
2330 spin_unlock(&ka->pvclock_gtod_sync_lock);
2331
2332
2333 local_irq_save(flags);
2334 tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz);
2335 if (unlikely(tgt_tsc_khz == 0)) {
2336 local_irq_restore(flags);
2337 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
2338 return 1;
2339 }
2340 if (!use_master_clock) {
2341 host_tsc = rdtsc();
2342 kernel_ns = ktime_get_boottime_ns();
2343 }
2344
2345 tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357 if (vcpu->tsc_catchup) {
2358 u64 tsc = compute_guest_tsc(v, kernel_ns);
2359 if (tsc > tsc_timestamp) {
2360 adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
2361 tsc_timestamp = tsc;
2362 }
2363 }
2364
2365 local_irq_restore(flags);
2366
2367
2368
2369 if (kvm_has_tsc_control)
2370 tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
2371
2372 if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
2373 kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
2374 &vcpu->hv_clock.tsc_shift,
2375 &vcpu->hv_clock.tsc_to_system_mul);
2376 vcpu->hw_tsc_khz = tgt_tsc_khz;
2377 }
2378
2379 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
2380 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
2381 vcpu->last_guest_tsc = tsc_timestamp;
2382
2383
2384 pvclock_flags = 0;
2385 if (use_master_clock)
2386 pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
2387
2388 vcpu->hv_clock.flags = pvclock_flags;
2389
2390 if (vcpu->pv_time_enabled)
2391 kvm_setup_pvclock_page(v);
2392 if (v == kvm_get_vcpu(v->kvm, 0))
2393 kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
2394 return 0;
2395}
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411#define KVMCLOCK_UPDATE_DELAY msecs_to_jiffies(100)
2412
2413static void kvmclock_update_fn(struct work_struct *work)
2414{
2415 int i;
2416 struct delayed_work *dwork = to_delayed_work(work);
2417 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
2418 kvmclock_update_work);
2419 struct kvm *kvm = container_of(ka, struct kvm, arch);
2420 struct kvm_vcpu *vcpu;
2421
2422 kvm_for_each_vcpu(i, vcpu, kvm) {
2423 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2424 kvm_vcpu_kick(vcpu);
2425 }
2426}
2427
2428static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
2429{
2430 struct kvm *kvm = v->kvm;
2431
2432 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
2433 schedule_delayed_work(&kvm->arch.kvmclock_update_work,
2434 KVMCLOCK_UPDATE_DELAY);
2435}
2436
2437#define KVMCLOCK_SYNC_PERIOD (300 * HZ)
2438
2439static void kvmclock_sync_fn(struct work_struct *work)
2440{
2441 struct delayed_work *dwork = to_delayed_work(work);
2442 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
2443 kvmclock_sync_work);
2444 struct kvm *kvm = container_of(ka, struct kvm, arch);
2445
2446 if (!kvmclock_periodic_sync)
2447 return;
2448
2449 schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0);
2450 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
2451 KVMCLOCK_SYNC_PERIOD);
2452}
2453
2454
2455
2456
2457static bool can_set_mci_status(struct kvm_vcpu *vcpu)
2458{
2459
2460 if (guest_cpuid_is_amd(vcpu))
2461 return !!(vcpu->arch.msr_hwcr & BIT_ULL(18));
2462
2463 return false;
2464}
2465
2466static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2467{
2468 u64 mcg_cap = vcpu->arch.mcg_cap;
2469 unsigned bank_num = mcg_cap & 0xff;
2470 u32 msr = msr_info->index;
2471 u64 data = msr_info->data;
2472
2473 switch (msr) {
2474 case MSR_IA32_MCG_STATUS:
2475 vcpu->arch.mcg_status = data;
2476 break;
2477 case MSR_IA32_MCG_CTL:
2478 if (!(mcg_cap & MCG_CTL_P) &&
2479 (data || !msr_info->host_initiated))
2480 return 1;
2481 if (data != 0 && data != ~(u64)0)
2482 return 1;
2483 vcpu->arch.mcg_ctl = data;
2484 break;
2485 default:
2486 if (msr >= MSR_IA32_MC0_CTL &&
2487 msr < MSR_IA32_MCx_CTL(bank_num)) {
2488 u32 offset = msr - MSR_IA32_MC0_CTL;
2489
2490
2491
2492
2493
2494 if ((offset & 0x3) == 0 &&
2495 data != 0 && (data | (1 << 10)) != ~(u64)0)
2496 return -1;
2497
2498
2499 if (!msr_info->host_initiated &&
2500 (offset & 0x3) == 1 && data != 0) {
2501 if (!can_set_mci_status(vcpu))
2502 return -1;
2503 }
2504
2505 vcpu->arch.mce_banks[offset] = data;
2506 break;
2507 }
2508 return 1;
2509 }
2510 return 0;
2511}
2512
2513static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
2514{
2515 struct kvm *kvm = vcpu->kvm;
2516 int lm = is_long_mode(vcpu);
2517 u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
2518 : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
2519 u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
2520 : kvm->arch.xen_hvm_config.blob_size_32;
2521 u32 page_num = data & ~PAGE_MASK;
2522 u64 page_addr = data & PAGE_MASK;
2523 u8 *page;
2524 int r;
2525
2526 r = -E2BIG;
2527 if (page_num >= blob_size)
2528 goto out;
2529 r = -ENOMEM;
2530 page = memdup_user(blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE);
2531 if (IS_ERR(page)) {
2532 r = PTR_ERR(page);
2533 goto out;
2534 }
2535 if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE))
2536 goto out_free;
2537 r = 0;
2538out_free:
2539 kfree(page);
2540out:
2541 return r;
2542}
2543
2544static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
2545{
2546 gpa_t gpa = data & ~0x3f;
2547
2548
2549 if (data & 0x38)
2550 return 1;
2551
2552 vcpu->arch.apf.msr_val = data;
2553
2554 if (!(data & KVM_ASYNC_PF_ENABLED)) {
2555 kvm_clear_async_pf_completion_queue(vcpu);
2556 kvm_async_pf_hash_reset(vcpu);
2557 return 0;
2558 }
2559
2560 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
2561 sizeof(u32)))
2562 return 1;
2563
2564 vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
2565 vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
2566 kvm_async_pf_wakeup_all(vcpu);
2567 return 0;
2568}
2569
2570static void kvmclock_reset(struct kvm_vcpu *vcpu)
2571{
2572 vcpu->arch.pv_time_enabled = false;
2573 vcpu->arch.time = 0;
2574}
2575
2576static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
2577{
2578 ++vcpu->stat.tlb_flush;
2579 kvm_x86_ops->tlb_flush(vcpu, invalidate_gpa);
2580}
2581
2582static void record_steal_time(struct kvm_vcpu *vcpu)
2583{
2584 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
2585 return;
2586
2587 if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
2588 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
2589 return;
2590
2591
2592
2593
2594
2595 trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
2596 vcpu->arch.st.steal.preempted & KVM_VCPU_FLUSH_TLB);
2597 if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB)
2598 kvm_vcpu_flush_tlb(vcpu, false);
2599
2600 if (vcpu->arch.st.steal.version & 1)
2601 vcpu->arch.st.steal.version += 1;
2602
2603 vcpu->arch.st.steal.version += 1;
2604
2605 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
2606 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
2607
2608 smp_wmb();
2609
2610 vcpu->arch.st.steal.steal += current->sched_info.run_delay -
2611 vcpu->arch.st.last_steal;
2612 vcpu->arch.st.last_steal = current->sched_info.run_delay;
2613
2614 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
2615 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
2616
2617 smp_wmb();
2618
2619 vcpu->arch.st.steal.version += 1;
2620
2621 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
2622 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
2623}
2624
2625int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2626{
2627 bool pr = false;
2628 u32 msr = msr_info->index;
2629 u64 data = msr_info->data;
2630
2631 switch (msr) {
2632 case MSR_AMD64_NB_CFG:
2633 case MSR_IA32_UCODE_WRITE:
2634 case MSR_VM_HSAVE_PA:
2635 case MSR_AMD64_PATCH_LOADER:
2636 case MSR_AMD64_BU_CFG2:
2637 case MSR_AMD64_DC_CFG:
2638 case MSR_F15H_EX_CFG:
2639 break;
2640
2641 case MSR_IA32_UCODE_REV:
2642 if (msr_info->host_initiated)
2643 vcpu->arch.microcode_version = data;
2644 break;
2645 case MSR_IA32_ARCH_CAPABILITIES:
2646 if (!msr_info->host_initiated)
2647 return 1;
2648 vcpu->arch.arch_capabilities = data;
2649 break;
2650 case MSR_EFER:
2651 return set_efer(vcpu, msr_info);
2652 case MSR_K7_HWCR:
2653 data &= ~(u64)0x40;
2654 data &= ~(u64)0x100;
2655 data &= ~(u64)0x8;
2656
2657
2658 if (data == BIT_ULL(18)) {
2659 vcpu->arch.msr_hwcr = data;
2660 } else if (data != 0) {
2661 vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
2662 data);
2663 return 1;
2664 }
2665 break;
2666 case MSR_FAM10H_MMIO_CONF_BASE:
2667 if (data != 0) {
2668 vcpu_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
2669 "0x%llx\n", data);
2670 return 1;
2671 }
2672 break;
2673 case MSR_IA32_DEBUGCTLMSR:
2674 if (!data) {
2675
2676 break;
2677 } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
2678
2679
2680 return 1;
2681 }
2682 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
2683 __func__, data);
2684 break;
2685 case 0x200 ... 0x2ff:
2686 return kvm_mtrr_set_msr(vcpu, msr, data);
2687 case MSR_IA32_APICBASE:
2688 return kvm_set_apic_base(vcpu, msr_info);
2689 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
2690 return kvm_x2apic_msr_write(vcpu, msr, data);
2691 case MSR_IA32_TSCDEADLINE:
2692 kvm_set_lapic_tscdeadline_msr(vcpu, data);
2693 break;
2694 case MSR_IA32_TSC_ADJUST:
2695 if (guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) {
2696 if (!msr_info->host_initiated) {
2697 s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
2698 adjust_tsc_offset_guest(vcpu, adj);
2699 }
2700 vcpu->arch.ia32_tsc_adjust_msr = data;
2701 }
2702 break;
2703 case MSR_IA32_MISC_ENABLE:
2704 if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT) &&
2705 ((vcpu->arch.ia32_misc_enable_msr ^ data) & MSR_IA32_MISC_ENABLE_MWAIT)) {
2706 if (!guest_cpuid_has(vcpu, X86_FEATURE_XMM3))
2707 return 1;
2708 vcpu->arch.ia32_misc_enable_msr = data;
2709 kvm_update_cpuid(vcpu);
2710 } else {
2711 vcpu->arch.ia32_misc_enable_msr = data;
2712 }
2713 break;
2714 case MSR_IA32_SMBASE:
2715 if (!msr_info->host_initiated)
2716 return 1;
2717 vcpu->arch.smbase = data;
2718 break;
2719 case MSR_IA32_POWER_CTL:
2720 vcpu->arch.msr_ia32_power_ctl = data;
2721 break;
2722 case MSR_IA32_TSC:
2723 kvm_write_tsc(vcpu, msr_info);
2724 break;
2725 case MSR_IA32_XSS:
2726 if (!msr_info->host_initiated &&
2727 !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
2728 return 1;
2729
2730
2731
2732
2733
2734
2735 if (data != 0)
2736 return 1;
2737 vcpu->arch.ia32_xss = data;
2738 break;
2739 case MSR_SMI_COUNT:
2740 if (!msr_info->host_initiated)
2741 return 1;
2742 vcpu->arch.smi_count = data;
2743 break;
2744 case MSR_KVM_WALL_CLOCK_NEW:
2745 case MSR_KVM_WALL_CLOCK:
2746 vcpu->kvm->arch.wall_clock = data;
2747 kvm_write_wall_clock(vcpu->kvm, data);
2748 break;
2749 case MSR_KVM_SYSTEM_TIME_NEW:
2750 case MSR_KVM_SYSTEM_TIME: {
2751 struct kvm_arch *ka = &vcpu->kvm->arch;
2752
2753 if (vcpu->vcpu_id == 0 && !msr_info->host_initiated) {
2754 bool tmp = (msr == MSR_KVM_SYSTEM_TIME);
2755
2756 if (ka->boot_vcpu_runs_old_kvmclock != tmp)
2757 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
2758
2759 ka->boot_vcpu_runs_old_kvmclock = tmp;
2760 }
2761
2762 vcpu->arch.time = data;
2763 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
2764
2765
2766 vcpu->arch.pv_time_enabled = false;
2767 if (!(data & 1))
2768 break;
2769
2770 if (!kvm_gfn_to_hva_cache_init(vcpu->kvm,
2771 &vcpu->arch.pv_time, data & ~1ULL,
2772 sizeof(struct pvclock_vcpu_time_info)))
2773 vcpu->arch.pv_time_enabled = true;
2774
2775 break;
2776 }
2777 case MSR_KVM_ASYNC_PF_EN:
2778 if (kvm_pv_enable_async_pf(vcpu, data))
2779 return 1;
2780 break;
2781 case MSR_KVM_STEAL_TIME:
2782
2783 if (unlikely(!sched_info_on()))
2784 return 1;
2785
2786 if (data & KVM_STEAL_RESERVED_MASK)
2787 return 1;
2788
2789 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
2790 data & KVM_STEAL_VALID_BITS,
2791 sizeof(struct kvm_steal_time)))
2792 return 1;
2793
2794 vcpu->arch.st.msr_val = data;
2795
2796 if (!(data & KVM_MSR_ENABLED))
2797 break;
2798
2799 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
2800
2801 break;
2802 case MSR_KVM_PV_EOI_EN:
2803 if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8)))
2804 return 1;
2805 break;
2806
2807 case MSR_KVM_POLL_CONTROL:
2808
2809 if (data & (-1ULL << 1))
2810 return 1;
2811
2812 vcpu->arch.msr_kvm_poll_control = data;
2813 break;
2814
2815 case MSR_IA32_MCG_CTL:
2816 case MSR_IA32_MCG_STATUS:
2817 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
2818 return set_msr_mce(vcpu, msr_info);
2819
2820 case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
2821 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
2822 pr = true;
2823 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
2824 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
2825 if (kvm_pmu_is_valid_msr(vcpu, msr))
2826 return kvm_pmu_set_msr(vcpu, msr_info);
2827
2828 if (pr || data != 0)
2829 vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
2830 "0x%x data 0x%llx\n", msr, data);
2831 break;
2832 case MSR_K7_CLK_CTL:
2833
2834
2835
2836
2837
2838
2839
2840
2841 break;
2842 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
2843 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
2844 case HV_X64_MSR_CRASH_CTL:
2845 case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
2846 case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
2847 case HV_X64_MSR_TSC_EMULATION_CONTROL:
2848 case HV_X64_MSR_TSC_EMULATION_STATUS:
2849 return kvm_hv_set_msr_common(vcpu, msr, data,
2850 msr_info->host_initiated);
2851 case MSR_IA32_BBL_CR_CTL3:
2852
2853
2854
2855 if (report_ignored_msrs)
2856 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
2857 msr, data);
2858 break;
2859 case MSR_AMD64_OSVW_ID_LENGTH:
2860 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
2861 return 1;
2862 vcpu->arch.osvw.length = data;
2863 break;
2864 case MSR_AMD64_OSVW_STATUS:
2865 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
2866 return 1;
2867 vcpu->arch.osvw.status = data;
2868 break;
2869 case MSR_PLATFORM_INFO:
2870 if (!msr_info->host_initiated ||
2871 (!(data & MSR_PLATFORM_INFO_CPUID_FAULT) &&
2872 cpuid_fault_enabled(vcpu)))
2873 return 1;
2874 vcpu->arch.msr_platform_info = data;
2875 break;
2876 case MSR_MISC_FEATURES_ENABLES:
2877 if (data & ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT ||
2878 (data & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
2879 !supports_cpuid_fault(vcpu)))
2880 return 1;
2881 vcpu->arch.msr_misc_features_enables = data;
2882 break;
2883 default:
2884 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
2885 return xen_hvm_config(vcpu, data);
2886 if (kvm_pmu_is_valid_msr(vcpu, msr))
2887 return kvm_pmu_set_msr(vcpu, msr_info);
2888 if (!ignore_msrs) {
2889 vcpu_debug_ratelimited(vcpu, "unhandled wrmsr: 0x%x data 0x%llx\n",
2890 msr, data);
2891 return 1;
2892 } else {
2893 if (report_ignored_msrs)
2894 vcpu_unimpl(vcpu,
2895 "ignored wrmsr: 0x%x data 0x%llx\n",
2896 msr, data);
2897 break;
2898 }
2899 }
2900 return 0;
2901}
2902EXPORT_SYMBOL_GPL(kvm_set_msr_common);
2903
2904static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
2905{
2906 u64 data;
2907 u64 mcg_cap = vcpu->arch.mcg_cap;
2908 unsigned bank_num = mcg_cap & 0xff;
2909
2910 switch (msr) {
2911 case MSR_IA32_P5_MC_ADDR:
2912 case MSR_IA32_P5_MC_TYPE:
2913 data = 0;
2914 break;
2915 case MSR_IA32_MCG_CAP:
2916 data = vcpu->arch.mcg_cap;
2917 break;
2918 case MSR_IA32_MCG_CTL:
2919 if (!(mcg_cap & MCG_CTL_P) && !host)
2920 return 1;
2921 data = vcpu->arch.mcg_ctl;
2922 break;
2923 case MSR_IA32_MCG_STATUS:
2924 data = vcpu->arch.mcg_status;
2925 break;
2926 default:
2927 if (msr >= MSR_IA32_MC0_CTL &&
2928 msr < MSR_IA32_MCx_CTL(bank_num)) {
2929 u32 offset = msr - MSR_IA32_MC0_CTL;
2930 data = vcpu->arch.mce_banks[offset];
2931 break;
2932 }
2933 return 1;
2934 }
2935 *pdata = data;
2936 return 0;
2937}
2938
2939int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2940{
2941 switch (msr_info->index) {
2942 case MSR_IA32_PLATFORM_ID:
2943 case MSR_IA32_EBL_CR_POWERON:
2944 case MSR_IA32_DEBUGCTLMSR:
2945 case MSR_IA32_LASTBRANCHFROMIP:
2946 case MSR_IA32_LASTBRANCHTOIP:
2947 case MSR_IA32_LASTINTFROMIP:
2948 case MSR_IA32_LASTINTTOIP:
2949 case MSR_K8_SYSCFG:
2950 case MSR_K8_TSEG_ADDR:
2951 case MSR_K8_TSEG_MASK:
2952 case MSR_VM_HSAVE_PA:
2953 case MSR_K8_INT_PENDING_MSG:
2954 case MSR_AMD64_NB_CFG:
2955 case MSR_FAM10H_MMIO_CONF_BASE:
2956 case MSR_AMD64_BU_CFG2:
2957 case MSR_IA32_PERF_CTL:
2958 case MSR_AMD64_DC_CFG:
2959 case MSR_F15H_EX_CFG:
2960 msr_info->data = 0;
2961 break;
2962 case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
2963 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
2964 case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
2965 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
2966 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
2967 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
2968 return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
2969 msr_info->data = 0;
2970 break;
2971 case MSR_IA32_UCODE_REV:
2972 msr_info->data = vcpu->arch.microcode_version;
2973 break;
2974 case MSR_IA32_ARCH_CAPABILITIES:
2975 if (!msr_info->host_initiated &&
2976 !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
2977 return 1;
2978 msr_info->data = vcpu->arch.arch_capabilities;
2979 break;
2980 case MSR_IA32_POWER_CTL:
2981 msr_info->data = vcpu->arch.msr_ia32_power_ctl;
2982 break;
2983 case MSR_IA32_TSC:
2984 msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset;
2985 break;
2986 case MSR_MTRRcap:
2987 case 0x200 ... 0x2ff:
2988 return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
2989 case 0xcd:
2990 msr_info->data = 3;
2991 break;
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003 case MSR_EBC_FREQUENCY_ID:
3004 msr_info->data = 1 << 24;
3005 break;
3006 case MSR_IA32_APICBASE:
3007 msr_info->data = kvm_get_apic_base(vcpu);
3008 break;
3009 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
3010 return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data);
3011 break;
3012 case MSR_IA32_TSCDEADLINE:
3013 msr_info->data = kvm_get_lapic_tscdeadline_msr(vcpu);
3014 break;
3015 case MSR_IA32_TSC_ADJUST:
3016 msr_info->data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
3017 break;
3018 case MSR_IA32_MISC_ENABLE:
3019 msr_info->data = vcpu->arch.ia32_misc_enable_msr;
3020 break;
3021 case MSR_IA32_SMBASE:
3022 if (!msr_info->host_initiated)
3023 return 1;
3024 msr_info->data = vcpu->arch.smbase;
3025 break;
3026 case MSR_SMI_COUNT:
3027 msr_info->data = vcpu->arch.smi_count;
3028 break;
3029 case MSR_IA32_PERF_STATUS:
3030
3031 msr_info->data = 1000ULL;
3032
3033 msr_info->data |= (((uint64_t)4ULL) << 40);
3034 break;
3035 case MSR_EFER:
3036 msr_info->data = vcpu->arch.efer;
3037 break;
3038 case MSR_KVM_WALL_CLOCK:
3039 case MSR_KVM_WALL_CLOCK_NEW:
3040 msr_info->data = vcpu->kvm->arch.wall_clock;
3041 break;
3042 case MSR_KVM_SYSTEM_TIME:
3043 case MSR_KVM_SYSTEM_TIME_NEW:
3044 msr_info->data = vcpu->arch.time;
3045 break;
3046 case MSR_KVM_ASYNC_PF_EN:
3047 msr_info->data = vcpu->arch.apf.msr_val;
3048 break;
3049 case MSR_KVM_STEAL_TIME:
3050 msr_info->data = vcpu->arch.st.msr_val;
3051 break;
3052 case MSR_KVM_PV_EOI_EN:
3053 msr_info->data = vcpu->arch.pv_eoi.msr_val;
3054 break;
3055 case MSR_KVM_POLL_CONTROL:
3056 msr_info->data = vcpu->arch.msr_kvm_poll_control;
3057 break;
3058 case MSR_IA32_P5_MC_ADDR:
3059 case MSR_IA32_P5_MC_TYPE:
3060 case MSR_IA32_MCG_CAP:
3061 case MSR_IA32_MCG_CTL:
3062 case MSR_IA32_MCG_STATUS:
3063 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
3064 return get_msr_mce(vcpu, msr_info->index, &msr_info->data,
3065 msr_info->host_initiated);
3066 case MSR_IA32_XSS:
3067 if (!msr_info->host_initiated &&
3068 !guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))
3069 return 1;
3070 msr_info->data = vcpu->arch.ia32_xss;
3071 break;
3072 case MSR_K7_CLK_CTL:
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082 msr_info->data = 0x20000000;
3083 break;
3084 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
3085 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
3086 case HV_X64_MSR_CRASH_CTL:
3087 case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
3088 case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
3089 case HV_X64_MSR_TSC_EMULATION_CONTROL:
3090 case HV_X64_MSR_TSC_EMULATION_STATUS:
3091 return kvm_hv_get_msr_common(vcpu,
3092 msr_info->index, &msr_info->data,
3093 msr_info->host_initiated);
3094 break;
3095 case MSR_IA32_BBL_CR_CTL3:
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106 msr_info->data = 0xbe702111;
3107 break;
3108 case MSR_AMD64_OSVW_ID_LENGTH:
3109 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3110 return 1;
3111 msr_info->data = vcpu->arch.osvw.length;
3112 break;
3113 case MSR_AMD64_OSVW_STATUS:
3114 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3115 return 1;
3116 msr_info->data = vcpu->arch.osvw.status;
3117 break;
3118 case MSR_PLATFORM_INFO:
3119 if (!msr_info->host_initiated &&
3120 !vcpu->kvm->arch.guest_can_read_msr_platform_info)
3121 return 1;
3122 msr_info->data = vcpu->arch.msr_platform_info;
3123 break;
3124 case MSR_MISC_FEATURES_ENABLES:
3125 msr_info->data = vcpu->arch.msr_misc_features_enables;
3126 break;
3127 case MSR_K7_HWCR:
3128 msr_info->data = vcpu->arch.msr_hwcr;
3129 break;
3130 default:
3131 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
3132 return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
3133 if (!ignore_msrs) {
3134 vcpu_debug_ratelimited(vcpu, "unhandled rdmsr: 0x%x\n",
3135 msr_info->index);
3136 return 1;
3137 } else {
3138 if (report_ignored_msrs)
3139 vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n",
3140 msr_info->index);
3141 msr_info->data = 0;
3142 }
3143 break;
3144 }
3145 return 0;
3146}
3147EXPORT_SYMBOL_GPL(kvm_get_msr_common);
3148
3149
3150
3151
3152
3153
3154static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
3155 struct kvm_msr_entry *entries,
3156 int (*do_msr)(struct kvm_vcpu *vcpu,
3157 unsigned index, u64 *data))
3158{
3159 int i;
3160
3161 for (i = 0; i < msrs->nmsrs; ++i)
3162 if (do_msr(vcpu, entries[i].index, &entries[i].data))
3163 break;
3164
3165 return i;
3166}
3167
3168
3169
3170
3171
3172
3173static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
3174 int (*do_msr)(struct kvm_vcpu *vcpu,
3175 unsigned index, u64 *data),
3176 int writeback)
3177{
3178 struct kvm_msrs msrs;
3179 struct kvm_msr_entry *entries;
3180 int r, n;
3181 unsigned size;
3182
3183 r = -EFAULT;
3184 if (copy_from_user(&msrs, user_msrs, sizeof(msrs)))
3185 goto out;
3186
3187 r = -E2BIG;
3188 if (msrs.nmsrs >= MAX_IO_MSRS)
3189 goto out;
3190
3191 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
3192 entries = memdup_user(user_msrs->entries, size);
3193 if (IS_ERR(entries)) {
3194 r = PTR_ERR(entries);
3195 goto out;
3196 }
3197
3198 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
3199 if (r < 0)
3200 goto out_free;
3201
3202 r = -EFAULT;
3203 if (writeback && copy_to_user(user_msrs->entries, entries, size))
3204 goto out_free;
3205
3206 r = n;
3207
3208out_free:
3209 kfree(entries);
3210out:
3211 return r;
3212}
3213
3214static inline bool kvm_can_mwait_in_guest(void)
3215{
3216 return boot_cpu_has(X86_FEATURE_MWAIT) &&
3217 !boot_cpu_has_bug(X86_BUG_MONITOR) &&
3218 boot_cpu_has(X86_FEATURE_ARAT);
3219}
3220
3221int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
3222{
3223 int r = 0;
3224
3225 switch (ext) {
3226 case KVM_CAP_IRQCHIP:
3227 case KVM_CAP_HLT:
3228 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
3229 case KVM_CAP_SET_TSS_ADDR:
3230 case KVM_CAP_EXT_CPUID:
3231 case KVM_CAP_EXT_EMUL_CPUID:
3232 case KVM_CAP_CLOCKSOURCE:
3233 case KVM_CAP_PIT:
3234 case KVM_CAP_NOP_IO_DELAY:
3235 case KVM_CAP_MP_STATE:
3236 case KVM_CAP_SYNC_MMU:
3237 case KVM_CAP_USER_NMI:
3238 case KVM_CAP_REINJECT_CONTROL:
3239 case KVM_CAP_IRQ_INJECT_STATUS:
3240 case KVM_CAP_IOEVENTFD:
3241 case KVM_CAP_IOEVENTFD_NO_LENGTH:
3242 case KVM_CAP_PIT2:
3243 case KVM_CAP_PIT_STATE2:
3244 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
3245 case KVM_CAP_XEN_HVM:
3246 case KVM_CAP_VCPU_EVENTS:
3247 case KVM_CAP_HYPERV:
3248 case KVM_CAP_HYPERV_VAPIC:
3249 case KVM_CAP_HYPERV_SPIN:
3250 case KVM_CAP_HYPERV_SYNIC:
3251 case KVM_CAP_HYPERV_SYNIC2:
3252 case KVM_CAP_HYPERV_VP_INDEX:
3253 case KVM_CAP_HYPERV_EVENTFD:
3254 case KVM_CAP_HYPERV_TLBFLUSH:
3255 case KVM_CAP_HYPERV_SEND_IPI:
3256 case KVM_CAP_HYPERV_CPUID:
3257 case KVM_CAP_PCI_SEGMENT:
3258 case KVM_CAP_DEBUGREGS:
3259 case KVM_CAP_X86_ROBUST_SINGLESTEP:
3260 case KVM_CAP_XSAVE:
3261 case KVM_CAP_ASYNC_PF:
3262 case KVM_CAP_GET_TSC_KHZ:
3263 case KVM_CAP_KVMCLOCK_CTRL:
3264 case KVM_CAP_READONLY_MEM:
3265 case KVM_CAP_HYPERV_TIME:
3266 case KVM_CAP_IOAPIC_POLARITY_IGNORED:
3267 case KVM_CAP_TSC_DEADLINE_TIMER:
3268 case KVM_CAP_DISABLE_QUIRKS:
3269 case KVM_CAP_SET_BOOT_CPU_ID:
3270 case KVM_CAP_SPLIT_IRQCHIP:
3271 case KVM_CAP_IMMEDIATE_EXIT:
3272 case KVM_CAP_PMU_EVENT_FILTER:
3273 case KVM_CAP_GET_MSR_FEATURES:
3274 case KVM_CAP_MSR_PLATFORM_INFO:
3275 case KVM_CAP_EXCEPTION_PAYLOAD:
3276 r = 1;
3277 break;
3278 case KVM_CAP_SYNC_REGS:
3279 r = KVM_SYNC_X86_VALID_FIELDS;
3280 break;
3281 case KVM_CAP_ADJUST_CLOCK:
3282 r = KVM_CLOCK_TSC_STABLE;
3283 break;
3284 case KVM_CAP_X86_DISABLE_EXITS:
3285 r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE |
3286 KVM_X86_DISABLE_EXITS_CSTATE;
3287 if(kvm_can_mwait_in_guest())
3288 r |= KVM_X86_DISABLE_EXITS_MWAIT;
3289 break;
3290 case KVM_CAP_X86_SMM:
3291
3292
3293
3294
3295
3296
3297
3298
3299 r = kvm_x86_ops->has_emulated_msr(MSR_IA32_SMBASE);
3300 break;
3301 case KVM_CAP_VAPIC:
3302 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
3303 break;
3304 case KVM_CAP_NR_VCPUS:
3305 r = KVM_SOFT_MAX_VCPUS;
3306 break;
3307 case KVM_CAP_MAX_VCPUS:
3308 r = KVM_MAX_VCPUS;
3309 break;
3310 case KVM_CAP_MAX_VCPU_ID:
3311 r = KVM_MAX_VCPU_ID;
3312 break;
3313 case KVM_CAP_PV_MMU:
3314 r = 0;
3315 break;
3316 case KVM_CAP_MCE:
3317 r = KVM_MAX_MCE_BANKS;
3318 break;
3319 case KVM_CAP_XCRS:
3320 r = boot_cpu_has(X86_FEATURE_XSAVE);
3321 break;
3322 case KVM_CAP_TSC_CONTROL:
3323 r = kvm_has_tsc_control;
3324 break;
3325 case KVM_CAP_X2APIC_API:
3326 r = KVM_X2APIC_API_VALID_FLAGS;
3327 break;
3328 case KVM_CAP_NESTED_STATE:
3329 r = kvm_x86_ops->get_nested_state ?
3330 kvm_x86_ops->get_nested_state(NULL, NULL, 0) : 0;
3331 break;
3332 case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
3333 r = kvm_x86_ops->enable_direct_tlbflush != NULL;
3334 break;
3335 case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
3336 r = kvm_x86_ops->nested_enable_evmcs != NULL;
3337 break;
3338 default:
3339 break;
3340 }
3341 return r;
3342
3343}
3344
3345long kvm_arch_dev_ioctl(struct file *filp,
3346 unsigned int ioctl, unsigned long arg)
3347{
3348 void __user *argp = (void __user *)arg;
3349 long r;
3350
3351 switch (ioctl) {
3352 case KVM_GET_MSR_INDEX_LIST: {
3353 struct kvm_msr_list __user *user_msr_list = argp;
3354 struct kvm_msr_list msr_list;
3355 unsigned n;
3356
3357 r = -EFAULT;
3358 if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
3359 goto out;
3360 n = msr_list.nmsrs;
3361 msr_list.nmsrs = num_msrs_to_save + num_emulated_msrs;
3362 if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
3363 goto out;
3364 r = -E2BIG;
3365 if (n < msr_list.nmsrs)
3366 goto out;
3367 r = -EFAULT;
3368 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
3369 num_msrs_to_save * sizeof(u32)))
3370 goto out;
3371 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
3372 &emulated_msrs,
3373 num_emulated_msrs * sizeof(u32)))
3374 goto out;
3375 r = 0;
3376 break;
3377 }
3378 case KVM_GET_SUPPORTED_CPUID:
3379 case KVM_GET_EMULATED_CPUID: {
3380 struct kvm_cpuid2 __user *cpuid_arg = argp;
3381 struct kvm_cpuid2 cpuid;
3382
3383 r = -EFAULT;
3384 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
3385 goto out;
3386
3387 r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
3388 ioctl);
3389 if (r)
3390 goto out;
3391
3392 r = -EFAULT;
3393 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
3394 goto out;
3395 r = 0;
3396 break;
3397 }
3398 case KVM_X86_GET_MCE_CAP_SUPPORTED: {
3399 r = -EFAULT;
3400 if (copy_to_user(argp, &kvm_mce_cap_supported,
3401 sizeof(kvm_mce_cap_supported)))
3402 goto out;
3403 r = 0;
3404 break;
3405 case KVM_GET_MSR_FEATURE_INDEX_LIST: {
3406 struct kvm_msr_list __user *user_msr_list = argp;
3407 struct kvm_msr_list msr_list;
3408 unsigned int n;
3409
3410 r = -EFAULT;
3411 if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
3412 goto out;
3413 n = msr_list.nmsrs;
3414 msr_list.nmsrs = num_msr_based_features;
3415 if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
3416 goto out;
3417 r = -E2BIG;
3418 if (n < msr_list.nmsrs)
3419 goto out;
3420 r = -EFAULT;
3421 if (copy_to_user(user_msr_list->indices, &msr_based_features,
3422 num_msr_based_features * sizeof(u32)))
3423 goto out;
3424 r = 0;
3425 break;
3426 }
3427 case KVM_GET_MSRS:
3428 r = msr_io(NULL, argp, do_get_msr_feature, 1);
3429 break;
3430 }
3431 default:
3432 r = -EINVAL;
3433 }
3434out:
3435 return r;
3436}
3437
3438static void wbinvd_ipi(void *garbage)
3439{
3440 wbinvd();
3441}
3442
3443static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
3444{
3445 return kvm_arch_has_noncoherent_dma(vcpu->kvm);
3446}
3447
3448void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3449{
3450
3451 if (need_emulate_wbinvd(vcpu)) {
3452 if (kvm_x86_ops->has_wbinvd_exit())
3453 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
3454 else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
3455 smp_call_function_single(vcpu->cpu,
3456 wbinvd_ipi, NULL, 1);
3457 }
3458
3459 kvm_x86_ops->vcpu_load(vcpu, cpu);
3460
3461 fpregs_assert_state_consistent();
3462 if (test_thread_flag(TIF_NEED_FPU_LOAD))
3463 switch_fpu_return();
3464
3465
3466 if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
3467 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
3468 vcpu->arch.tsc_offset_adjustment = 0;
3469 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
3470 }
3471
3472 if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) {
3473 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
3474 rdtsc() - vcpu->arch.last_host_tsc;
3475 if (tsc_delta < 0)
3476 mark_tsc_unstable("KVM discovered backwards TSC");
3477
3478 if (kvm_check_tsc_unstable()) {
3479 u64 offset = kvm_compute_tsc_offset(vcpu,
3480 vcpu->arch.last_guest_tsc);
3481 kvm_vcpu_write_tsc_offset(vcpu, offset);
3482 vcpu->arch.tsc_catchup = 1;
3483 }
3484
3485 if (kvm_lapic_hv_timer_in_use(vcpu))
3486 kvm_lapic_restart_hv_timer(vcpu);
3487
3488
3489
3490
3491
3492 if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
3493 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
3494 if (vcpu->cpu != cpu)
3495 kvm_make_request(KVM_REQ_MIGRATE_TIMER, vcpu);
3496 vcpu->cpu = cpu;
3497 }
3498
3499 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
3500}
3501
3502static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
3503{
3504 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
3505 return;
3506
3507 vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED;
3508
3509 kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime,
3510 &vcpu->arch.st.steal.preempted,
3511 offsetof(struct kvm_steal_time, preempted),
3512 sizeof(vcpu->arch.st.steal.preempted));
3513}
3514
3515void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3516{
3517 int idx;
3518
3519 if (vcpu->preempted)
3520 vcpu->arch.preempted_in_kernel = !kvm_x86_ops->get_cpl(vcpu);
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530 pagefault_disable();
3531
3532
3533
3534
3535 idx = srcu_read_lock(&vcpu->kvm->srcu);
3536 kvm_steal_time_set_preempted(vcpu);
3537 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3538 pagefault_enable();
3539 kvm_x86_ops->vcpu_put(vcpu);
3540 vcpu->arch.last_host_tsc = rdtsc();
3541
3542
3543
3544
3545
3546 set_debugreg(0, 6);
3547}
3548
3549static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
3550 struct kvm_lapic_state *s)
3551{
3552 if (vcpu->arch.apicv_active)
3553 kvm_x86_ops->sync_pir_to_irr(vcpu);
3554
3555 return kvm_apic_get_state(vcpu, s);
3556}
3557
3558static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
3559 struct kvm_lapic_state *s)
3560{
3561 int r;
3562
3563 r = kvm_apic_set_state(vcpu, s);
3564 if (r)
3565 return r;
3566 update_cr8_intercept(vcpu);
3567
3568 return 0;
3569}
3570
3571static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
3572{
3573 return (!lapic_in_kernel(vcpu) ||
3574 kvm_apic_accept_pic_intr(vcpu));
3575}
3576
3577
3578
3579
3580
3581
3582
3583static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
3584{
3585 return kvm_arch_interrupt_allowed(vcpu) &&
3586 !kvm_cpu_has_interrupt(vcpu) &&
3587 !kvm_event_needs_reinjection(vcpu) &&
3588 kvm_cpu_accept_dm_intr(vcpu);
3589}
3590
3591static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
3592 struct kvm_interrupt *irq)
3593{
3594 if (irq->irq >= KVM_NR_INTERRUPTS)
3595 return -EINVAL;
3596
3597 if (!irqchip_in_kernel(vcpu->kvm)) {
3598 kvm_queue_interrupt(vcpu, irq->irq, false);
3599 kvm_make_request(KVM_REQ_EVENT, vcpu);
3600 return 0;
3601 }
3602
3603
3604
3605
3606
3607 if (pic_in_kernel(vcpu->kvm))
3608 return -ENXIO;
3609
3610 if (vcpu->arch.pending_external_vector != -1)
3611 return -EEXIST;
3612
3613 vcpu->arch.pending_external_vector = irq->irq;
3614 kvm_make_request(KVM_REQ_EVENT, vcpu);
3615 return 0;
3616}
3617
3618static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
3619{
3620 kvm_inject_nmi(vcpu);
3621
3622 return 0;
3623}
3624
3625static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu)
3626{
3627 kvm_make_request(KVM_REQ_SMI, vcpu);
3628
3629 return 0;
3630}
3631
3632static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
3633 struct kvm_tpr_access_ctl *tac)
3634{
3635 if (tac->flags)
3636 return -EINVAL;
3637 vcpu->arch.tpr_access_reporting = !!tac->enabled;
3638 return 0;
3639}
3640
3641static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
3642 u64 mcg_cap)
3643{
3644 int r;
3645 unsigned bank_num = mcg_cap & 0xff, bank;
3646
3647 r = -EINVAL;
3648 if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
3649 goto out;
3650 if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000))
3651 goto out;
3652 r = 0;
3653 vcpu->arch.mcg_cap = mcg_cap;
3654
3655 if (mcg_cap & MCG_CTL_P)
3656 vcpu->arch.mcg_ctl = ~(u64)0;
3657
3658 for (bank = 0; bank < bank_num; bank++)
3659 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
3660
3661 kvm_x86_ops->setup_mce(vcpu);
3662out:
3663 return r;
3664}
3665
3666static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
3667 struct kvm_x86_mce *mce)
3668{
3669 u64 mcg_cap = vcpu->arch.mcg_cap;
3670 unsigned bank_num = mcg_cap & 0xff;
3671 u64 *banks = vcpu->arch.mce_banks;
3672
3673 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
3674 return -EINVAL;
3675
3676
3677
3678
3679 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
3680 vcpu->arch.mcg_ctl != ~(u64)0)
3681 return 0;
3682 banks += 4 * mce->bank;
3683
3684
3685
3686
3687 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
3688 return 0;
3689 if (mce->status & MCI_STATUS_UC) {
3690 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
3691 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
3692 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
3693 return 0;
3694 }
3695 if (banks[1] & MCI_STATUS_VAL)
3696 mce->status |= MCI_STATUS_OVER;
3697 banks[2] = mce->addr;
3698 banks[3] = mce->misc;
3699 vcpu->arch.mcg_status = mce->mcg_status;
3700 banks[1] = mce->status;
3701 kvm_queue_exception(vcpu, MC_VECTOR);
3702 } else if (!(banks[1] & MCI_STATUS_VAL)
3703 || !(banks[1] & MCI_STATUS_UC)) {
3704 if (banks[1] & MCI_STATUS_VAL)
3705 mce->status |= MCI_STATUS_OVER;
3706 banks[2] = mce->addr;
3707 banks[3] = mce->misc;
3708 banks[1] = mce->status;
3709 } else
3710 banks[1] |= MCI_STATUS_OVER;
3711 return 0;
3712}
3713
3714static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
3715 struct kvm_vcpu_events *events)
3716{
3717 process_nmi(vcpu);
3718
3719
3720
3721
3722
3723
3724
3725 if (kvm_exception_is_soft(vcpu->arch.exception.nr)) {
3726 events->exception.injected = 0;
3727 events->exception.pending = 0;
3728 } else {
3729 events->exception.injected = vcpu->arch.exception.injected;
3730 events->exception.pending = vcpu->arch.exception.pending;
3731
3732
3733
3734
3735
3736 if (!vcpu->kvm->arch.exception_payload_enabled)
3737 events->exception.injected |=
3738 vcpu->arch.exception.pending;
3739 }
3740 events->exception.nr = vcpu->arch.exception.nr;
3741 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
3742 events->exception.error_code = vcpu->arch.exception.error_code;
3743 events->exception_has_payload = vcpu->arch.exception.has_payload;
3744 events->exception_payload = vcpu->arch.exception.payload;
3745
3746 events->interrupt.injected =
3747 vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft;
3748 events->interrupt.nr = vcpu->arch.interrupt.nr;
3749 events->interrupt.soft = 0;
3750 events->interrupt.shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
3751
3752 events->nmi.injected = vcpu->arch.nmi_injected;
3753 events->nmi.pending = vcpu->arch.nmi_pending != 0;
3754 events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
3755 events->nmi.pad = 0;
3756
3757 events->sipi_vector = 0;
3758
3759 events->smi.smm = is_smm(vcpu);
3760 events->smi.pending = vcpu->arch.smi_pending;
3761 events->smi.smm_inside_nmi =
3762 !!(vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK);
3763 events->smi.latched_init = kvm_lapic_latched_init(vcpu);
3764
3765 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
3766 | KVM_VCPUEVENT_VALID_SHADOW
3767 | KVM_VCPUEVENT_VALID_SMM);
3768 if (vcpu->kvm->arch.exception_payload_enabled)
3769 events->flags |= KVM_VCPUEVENT_VALID_PAYLOAD;
3770
3771 memset(&events->reserved, 0, sizeof(events->reserved));
3772}
3773
3774static void kvm_smm_changed(struct kvm_vcpu *vcpu);
3775
3776static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
3777 struct kvm_vcpu_events *events)
3778{
3779 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
3780 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
3781 | KVM_VCPUEVENT_VALID_SHADOW
3782 | KVM_VCPUEVENT_VALID_SMM
3783 | KVM_VCPUEVENT_VALID_PAYLOAD))
3784 return -EINVAL;
3785
3786 if (events->flags & KVM_VCPUEVENT_VALID_PAYLOAD) {
3787 if (!vcpu->kvm->arch.exception_payload_enabled)
3788 return -EINVAL;
3789 if (events->exception.pending)
3790 events->exception.injected = 0;
3791 else
3792 events->exception_has_payload = 0;
3793 } else {
3794 events->exception.pending = 0;
3795 events->exception_has_payload = 0;
3796 }
3797
3798 if ((events->exception.injected || events->exception.pending) &&
3799 (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR))
3800 return -EINVAL;
3801
3802
3803 if (events->flags & KVM_VCPUEVENT_VALID_SMM &&
3804 (events->smi.smm || events->smi.pending) &&
3805 vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
3806 return -EINVAL;
3807
3808 process_nmi(vcpu);
3809 vcpu->arch.exception.injected = events->exception.injected;
3810 vcpu->arch.exception.pending = events->exception.pending;
3811 vcpu->arch.exception.nr = events->exception.nr;
3812 vcpu->arch.exception.has_error_code = events->exception.has_error_code;
3813 vcpu->arch.exception.error_code = events->exception.error_code;
3814 vcpu->arch.exception.has_payload = events->exception_has_payload;
3815 vcpu->arch.exception.payload = events->exception_payload;
3816
3817 vcpu->arch.interrupt.injected = events->interrupt.injected;
3818 vcpu->arch.interrupt.nr = events->interrupt.nr;
3819 vcpu->arch.interrupt.soft = events->interrupt.soft;
3820 if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
3821 kvm_x86_ops->set_interrupt_shadow(vcpu,
3822 events->interrupt.shadow);
3823
3824 vcpu->arch.nmi_injected = events->nmi.injected;
3825 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
3826 vcpu->arch.nmi_pending = events->nmi.pending;
3827 kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
3828
3829 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
3830 lapic_in_kernel(vcpu))
3831 vcpu->arch.apic->sipi_vector = events->sipi_vector;
3832
3833 if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
3834 if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
3835 if (events->smi.smm)
3836 vcpu->arch.hflags |= HF_SMM_MASK;
3837 else
3838 vcpu->arch.hflags &= ~HF_SMM_MASK;
3839 kvm_smm_changed(vcpu);
3840 }
3841
3842 vcpu->arch.smi_pending = events->smi.pending;
3843
3844 if (events->smi.smm) {
3845 if (events->smi.smm_inside_nmi)
3846 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
3847 else
3848 vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
3849 }
3850
3851 if (lapic_in_kernel(vcpu)) {
3852 if (events->smi.latched_init)
3853 set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
3854 else
3855 clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
3856 }
3857 }
3858
3859 kvm_make_request(KVM_REQ_EVENT, vcpu);
3860
3861 return 0;
3862}
3863
3864static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
3865 struct kvm_debugregs *dbgregs)
3866{
3867 unsigned long val;
3868
3869 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
3870 kvm_get_dr(vcpu, 6, &val);
3871 dbgregs->dr6 = val;
3872 dbgregs->dr7 = vcpu->arch.dr7;
3873 dbgregs->flags = 0;
3874 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
3875}
3876
3877static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
3878 struct kvm_debugregs *dbgregs)
3879{
3880 if (dbgregs->flags)
3881 return -EINVAL;
3882
3883 if (dbgregs->dr6 & ~0xffffffffull)
3884 return -EINVAL;
3885 if (dbgregs->dr7 & ~0xffffffffull)
3886 return -EINVAL;
3887
3888 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
3889 kvm_update_dr0123(vcpu);
3890 vcpu->arch.dr6 = dbgregs->dr6;
3891 kvm_update_dr6(vcpu);
3892 vcpu->arch.dr7 = dbgregs->dr7;
3893 kvm_update_dr7(vcpu);
3894
3895 return 0;
3896}
3897
3898#define XSTATE_COMPACTION_ENABLED (1ULL << 63)
3899
3900static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
3901{
3902 struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
3903 u64 xstate_bv = xsave->header.xfeatures;
3904 u64 valid;
3905
3906
3907
3908
3909
3910 memcpy(dest, xsave, XSAVE_HDR_OFFSET);
3911
3912
3913 xstate_bv &= vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE;
3914 *(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv;
3915
3916
3917
3918
3919
3920 valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
3921 while (valid) {
3922 u64 xfeature_mask = valid & -valid;
3923 int xfeature_nr = fls64(xfeature_mask) - 1;
3924 void *src = get_xsave_addr(xsave, xfeature_nr);
3925
3926 if (src) {
3927 u32 size, offset, ecx, edx;
3928 cpuid_count(XSTATE_CPUID, xfeature_nr,
3929 &size, &offset, &ecx, &edx);
3930 if (xfeature_nr == XFEATURE_PKRU)
3931 memcpy(dest + offset, &vcpu->arch.pkru,
3932 sizeof(vcpu->arch.pkru));
3933 else
3934 memcpy(dest + offset, src, size);
3935
3936 }
3937
3938 valid -= xfeature_mask;
3939 }
3940}
3941
3942static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
3943{
3944 struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
3945 u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
3946 u64 valid;
3947
3948
3949
3950
3951
3952 memcpy(xsave, src, XSAVE_HDR_OFFSET);
3953
3954
3955 xsave->header.xfeatures = xstate_bv;
3956 if (boot_cpu_has(X86_FEATURE_XSAVES))
3957 xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
3958
3959
3960
3961
3962
3963 valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
3964 while (valid) {
3965 u64 xfeature_mask = valid & -valid;
3966 int xfeature_nr = fls64(xfeature_mask) - 1;
3967 void *dest = get_xsave_addr(xsave, xfeature_nr);
3968
3969 if (dest) {
3970 u32 size, offset, ecx, edx;
3971 cpuid_count(XSTATE_CPUID, xfeature_nr,
3972 &size, &offset, &ecx, &edx);
3973 if (xfeature_nr == XFEATURE_PKRU)
3974 memcpy(&vcpu->arch.pkru, src + offset,
3975 sizeof(vcpu->arch.pkru));
3976 else
3977 memcpy(dest, src + offset, size);
3978 }
3979
3980 valid -= xfeature_mask;
3981 }
3982}
3983
3984static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
3985 struct kvm_xsave *guest_xsave)
3986{
3987 if (boot_cpu_has(X86_FEATURE_XSAVE)) {
3988 memset(guest_xsave, 0, sizeof(struct kvm_xsave));
3989 fill_xsave((u8 *) guest_xsave->region, vcpu);
3990 } else {
3991 memcpy(guest_xsave->region,
3992 &vcpu->arch.guest_fpu->state.fxsave,
3993 sizeof(struct fxregs_state));
3994 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
3995 XFEATURE_MASK_FPSSE;
3996 }
3997}
3998
3999#define XSAVE_MXCSR_OFFSET 24
4000
4001static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
4002 struct kvm_xsave *guest_xsave)
4003{
4004 u64 xstate_bv =
4005 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
4006 u32 mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)];
4007
4008 if (boot_cpu_has(X86_FEATURE_XSAVE)) {
4009
4010
4011
4012
4013
4014 if (xstate_bv & ~kvm_supported_xcr0() ||
4015 mxcsr & ~mxcsr_feature_mask)
4016 return -EINVAL;
4017 load_xsave(vcpu, (u8 *)guest_xsave->region);
4018 } else {
4019 if (xstate_bv & ~XFEATURE_MASK_FPSSE ||
4020 mxcsr & ~mxcsr_feature_mask)
4021 return -EINVAL;
4022 memcpy(&vcpu->arch.guest_fpu->state.fxsave,
4023 guest_xsave->region, sizeof(struct fxregs_state));
4024 }
4025 return 0;
4026}
4027
4028static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
4029 struct kvm_xcrs *guest_xcrs)
4030{
4031 if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
4032 guest_xcrs->nr_xcrs = 0;
4033 return;
4034 }
4035
4036 guest_xcrs->nr_xcrs = 1;
4037 guest_xcrs->flags = 0;
4038 guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
4039 guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
4040}
4041
4042static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
4043 struct kvm_xcrs *guest_xcrs)
4044{
4045 int i, r = 0;
4046
4047 if (!boot_cpu_has(X86_FEATURE_XSAVE))
4048 return -EINVAL;
4049
4050 if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
4051 return -EINVAL;
4052
4053 for (i = 0; i < guest_xcrs->nr_xcrs; i++)
4054
4055 if (guest_xcrs->xcrs[i].xcr == XCR_XFEATURE_ENABLED_MASK) {
4056 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
4057 guest_xcrs->xcrs[i].value);
4058 break;
4059 }
4060 if (r)
4061 r = -EINVAL;
4062 return r;
4063}
4064
4065
4066
4067
4068
4069
4070
4071static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
4072{
4073 if (!vcpu->arch.pv_time_enabled)
4074 return -EINVAL;
4075 vcpu->arch.pvclock_set_guest_stopped_request = true;
4076 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
4077 return 0;
4078}
4079
4080static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4081 struct kvm_enable_cap *cap)
4082{
4083 int r;
4084 uint16_t vmcs_version;
4085 void __user *user_ptr;
4086
4087 if (cap->flags)
4088 return -EINVAL;
4089
4090 switch (cap->cap) {
4091 case KVM_CAP_HYPERV_SYNIC2:
4092 if (cap->args[0])
4093 return -EINVAL;
4094
4095
4096 case KVM_CAP_HYPERV_SYNIC:
4097 if (!irqchip_in_kernel(vcpu->kvm))
4098 return -EINVAL;
4099 return kvm_hv_activate_synic(vcpu, cap->cap ==
4100 KVM_CAP_HYPERV_SYNIC2);
4101 case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
4102 if (!kvm_x86_ops->nested_enable_evmcs)
4103 return -ENOTTY;
4104 r = kvm_x86_ops->nested_enable_evmcs(vcpu, &vmcs_version);
4105 if (!r) {
4106 user_ptr = (void __user *)(uintptr_t)cap->args[0];
4107 if (copy_to_user(user_ptr, &vmcs_version,
4108 sizeof(vmcs_version)))
4109 r = -EFAULT;
4110 }
4111 return r;
4112 case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
4113 if (!kvm_x86_ops->enable_direct_tlbflush)
4114 return -ENOTTY;
4115
4116 return kvm_x86_ops->enable_direct_tlbflush(vcpu);
4117
4118 default:
4119 return -EINVAL;
4120 }
4121}
4122
4123long kvm_arch_vcpu_ioctl(struct file *filp,
4124 unsigned int ioctl, unsigned long arg)
4125{
4126 struct kvm_vcpu *vcpu = filp->private_data;
4127 void __user *argp = (void __user *)arg;
4128 int r;
4129 union {
4130 struct kvm_lapic_state *lapic;
4131 struct kvm_xsave *xsave;
4132 struct kvm_xcrs *xcrs;
4133 void *buffer;
4134 } u;
4135
4136 vcpu_load(vcpu);
4137
4138 u.buffer = NULL;
4139 switch (ioctl) {
4140 case KVM_GET_LAPIC: {
4141 r = -EINVAL;
4142 if (!lapic_in_kernel(vcpu))
4143 goto out;
4144 u.lapic = kzalloc(sizeof(struct kvm_lapic_state),
4145 GFP_KERNEL_ACCOUNT);
4146
4147 r = -ENOMEM;
4148 if (!u.lapic)
4149 goto out;
4150 r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);
4151 if (r)
4152 goto out;
4153 r = -EFAULT;
4154 if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))
4155 goto out;
4156 r = 0;
4157 break;
4158 }
4159 case KVM_SET_LAPIC: {
4160 r = -EINVAL;
4161 if (!lapic_in_kernel(vcpu))
4162 goto out;
4163 u.lapic = memdup_user(argp, sizeof(*u.lapic));
4164 if (IS_ERR(u.lapic)) {
4165 r = PTR_ERR(u.lapic);
4166 goto out_nofree;
4167 }
4168
4169 r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
4170 break;
4171 }
4172 case KVM_INTERRUPT: {
4173 struct kvm_interrupt irq;
4174
4175 r = -EFAULT;
4176 if (copy_from_user(&irq, argp, sizeof(irq)))
4177 goto out;
4178 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
4179 break;
4180 }
4181 case KVM_NMI: {
4182 r = kvm_vcpu_ioctl_nmi(vcpu);
4183 break;
4184 }
4185 case KVM_SMI: {
4186 r = kvm_vcpu_ioctl_smi(vcpu);
4187 break;
4188 }
4189 case KVM_SET_CPUID: {
4190 struct kvm_cpuid __user *cpuid_arg = argp;
4191 struct kvm_cpuid cpuid;
4192
4193 r = -EFAULT;
4194 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4195 goto out;
4196 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
4197 break;
4198 }
4199 case KVM_SET_CPUID2: {
4200 struct kvm_cpuid2 __user *cpuid_arg = argp;
4201 struct kvm_cpuid2 cpuid;
4202
4203 r = -EFAULT;
4204 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4205 goto out;
4206 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
4207 cpuid_arg->entries);
4208 break;
4209 }
4210 case KVM_GET_CPUID2: {
4211 struct kvm_cpuid2 __user *cpuid_arg = argp;
4212 struct kvm_cpuid2 cpuid;
4213
4214 r = -EFAULT;
4215 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4216 goto out;
4217 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
4218 cpuid_arg->entries);
4219 if (r)
4220 goto out;
4221 r = -EFAULT;
4222 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
4223 goto out;
4224 r = 0;
4225 break;
4226 }
4227 case KVM_GET_MSRS: {
4228 int idx = srcu_read_lock(&vcpu->kvm->srcu);
4229 r = msr_io(vcpu, argp, do_get_msr, 1);
4230 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4231 break;
4232 }
4233 case KVM_SET_MSRS: {
4234 int idx = srcu_read_lock(&vcpu->kvm->srcu);
4235 r = msr_io(vcpu, argp, do_set_msr, 0);
4236 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4237 break;
4238 }
4239 case KVM_TPR_ACCESS_REPORTING: {
4240 struct kvm_tpr_access_ctl tac;
4241
4242 r = -EFAULT;
4243 if (copy_from_user(&tac, argp, sizeof(tac)))
4244 goto out;
4245 r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
4246 if (r)
4247 goto out;
4248 r = -EFAULT;
4249 if (copy_to_user(argp, &tac, sizeof(tac)))
4250 goto out;
4251 r = 0;
4252 break;
4253 };
4254 case KVM_SET_VAPIC_ADDR: {
4255 struct kvm_vapic_addr va;
4256 int idx;
4257
4258 r = -EINVAL;
4259 if (!lapic_in_kernel(vcpu))
4260 goto out;
4261 r = -EFAULT;
4262 if (copy_from_user(&va, argp, sizeof(va)))
4263 goto out;
4264 idx = srcu_read_lock(&vcpu->kvm->srcu);
4265 r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
4266 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4267 break;
4268 }
4269 case KVM_X86_SETUP_MCE: {
4270 u64 mcg_cap;
4271
4272 r = -EFAULT;
4273 if (copy_from_user(&mcg_cap, argp, sizeof(mcg_cap)))
4274 goto out;
4275 r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
4276 break;
4277 }
4278 case KVM_X86_SET_MCE: {
4279 struct kvm_x86_mce mce;
4280
4281 r = -EFAULT;
4282 if (copy_from_user(&mce, argp, sizeof(mce)))
4283 goto out;
4284 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
4285 break;
4286 }
4287 case KVM_GET_VCPU_EVENTS: {
4288 struct kvm_vcpu_events events;
4289
4290 kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
4291
4292 r = -EFAULT;
4293 if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
4294 break;
4295 r = 0;
4296 break;
4297 }
4298 case KVM_SET_VCPU_EVENTS: {
4299 struct kvm_vcpu_events events;
4300
4301 r = -EFAULT;
4302 if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
4303 break;
4304
4305 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
4306 break;
4307 }
4308 case KVM_GET_DEBUGREGS: {
4309 struct kvm_debugregs dbgregs;
4310
4311 kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
4312
4313 r = -EFAULT;
4314 if (copy_to_user(argp, &dbgregs,
4315 sizeof(struct kvm_debugregs)))
4316 break;
4317 r = 0;
4318 break;
4319 }
4320 case KVM_SET_DEBUGREGS: {
4321 struct kvm_debugregs dbgregs;
4322
4323 r = -EFAULT;
4324 if (copy_from_user(&dbgregs, argp,
4325 sizeof(struct kvm_debugregs)))
4326 break;
4327
4328 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
4329 break;
4330 }
4331 case KVM_GET_XSAVE: {
4332 u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL_ACCOUNT);
4333 r = -ENOMEM;
4334 if (!u.xsave)
4335 break;
4336
4337 kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
4338
4339 r = -EFAULT;
4340 if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
4341 break;
4342 r = 0;
4343 break;
4344 }
4345 case KVM_SET_XSAVE: {
4346 u.xsave = memdup_user(argp, sizeof(*u.xsave));
4347 if (IS_ERR(u.xsave)) {
4348 r = PTR_ERR(u.xsave);
4349 goto out_nofree;
4350 }
4351
4352 r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
4353 break;
4354 }
4355 case KVM_GET_XCRS: {
4356 u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL_ACCOUNT);
4357 r = -ENOMEM;
4358 if (!u.xcrs)
4359 break;
4360
4361 kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
4362
4363 r = -EFAULT;
4364 if (copy_to_user(argp, u.xcrs,
4365 sizeof(struct kvm_xcrs)))
4366 break;
4367 r = 0;
4368 break;
4369 }
4370 case KVM_SET_XCRS: {
4371 u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
4372 if (IS_ERR(u.xcrs)) {
4373 r = PTR_ERR(u.xcrs);
4374 goto out_nofree;
4375 }
4376
4377 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
4378 break;
4379 }
4380 case KVM_SET_TSC_KHZ: {
4381 u32 user_tsc_khz;
4382
4383 r = -EINVAL;
4384 user_tsc_khz = (u32)arg;
4385
4386 if (user_tsc_khz >= kvm_max_guest_tsc_khz)
4387 goto out;
4388
4389 if (user_tsc_khz == 0)
4390 user_tsc_khz = tsc_khz;
4391
4392 if (!kvm_set_tsc_khz(vcpu, user_tsc_khz))
4393 r = 0;
4394
4395 goto out;
4396 }
4397 case KVM_GET_TSC_KHZ: {
4398 r = vcpu->arch.virtual_tsc_khz;
4399 goto out;
4400 }
4401 case KVM_KVMCLOCK_CTRL: {
4402 r = kvm_set_guest_paused(vcpu);
4403 goto out;
4404 }
4405 case KVM_ENABLE_CAP: {
4406 struct kvm_enable_cap cap;
4407
4408 r = -EFAULT;
4409 if (copy_from_user(&cap, argp, sizeof(cap)))
4410 goto out;
4411 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4412 break;
4413 }
4414 case KVM_GET_NESTED_STATE: {
4415 struct kvm_nested_state __user *user_kvm_nested_state = argp;
4416 u32 user_data_size;
4417
4418 r = -EINVAL;
4419 if (!kvm_x86_ops->get_nested_state)
4420 break;
4421
4422 BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size));
4423 r = -EFAULT;
4424 if (get_user(user_data_size, &user_kvm_nested_state->size))
4425 break;
4426
4427 r = kvm_x86_ops->get_nested_state(vcpu, user_kvm_nested_state,
4428 user_data_size);
4429 if (r < 0)
4430 break;
4431
4432 if (r > user_data_size) {
4433 if (put_user(r, &user_kvm_nested_state->size))
4434 r = -EFAULT;
4435 else
4436 r = -E2BIG;
4437 break;
4438 }
4439
4440 r = 0;
4441 break;
4442 }
4443 case KVM_SET_NESTED_STATE: {
4444 struct kvm_nested_state __user *user_kvm_nested_state = argp;
4445 struct kvm_nested_state kvm_state;
4446 int idx;
4447
4448 r = -EINVAL;
4449 if (!kvm_x86_ops->set_nested_state)
4450 break;
4451
4452 r = -EFAULT;
4453 if (copy_from_user(&kvm_state, user_kvm_nested_state, sizeof(kvm_state)))
4454 break;
4455
4456 r = -EINVAL;
4457 if (kvm_state.size < sizeof(kvm_state))
4458 break;
4459
4460 if (kvm_state.flags &
4461 ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE
4462 | KVM_STATE_NESTED_EVMCS))
4463 break;
4464
4465
4466 if ((kvm_state.flags & KVM_STATE_NESTED_RUN_PENDING)
4467 && !(kvm_state.flags & KVM_STATE_NESTED_GUEST_MODE))
4468 break;
4469
4470 idx = srcu_read_lock(&vcpu->kvm->srcu);
4471 r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
4472 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4473 break;
4474 }
4475 case KVM_GET_SUPPORTED_HV_CPUID: {
4476 struct kvm_cpuid2 __user *cpuid_arg = argp;
4477 struct kvm_cpuid2 cpuid;
4478
4479 r = -EFAULT;
4480 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4481 goto out;
4482
4483 r = kvm_vcpu_ioctl_get_hv_cpuid(vcpu, &cpuid,
4484 cpuid_arg->entries);
4485 if (r)
4486 goto out;
4487
4488 r = -EFAULT;
4489 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
4490 goto out;
4491 r = 0;
4492 break;
4493 }
4494 default:
4495 r = -EINVAL;
4496 }
4497out:
4498 kfree(u.buffer);
4499out_nofree:
4500 vcpu_put(vcpu);
4501 return r;
4502}
4503
4504vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4505{
4506 return VM_FAULT_SIGBUS;
4507}
4508
4509static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
4510{
4511 int ret;
4512
4513 if (addr > (unsigned int)(-3 * PAGE_SIZE))
4514 return -EINVAL;
4515 ret = kvm_x86_ops->set_tss_addr(kvm, addr);
4516 return ret;
4517}
4518
4519static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
4520 u64 ident_addr)
4521{
4522 return kvm_x86_ops->set_identity_map_addr(kvm, ident_addr);
4523}
4524
4525static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
4526 unsigned long kvm_nr_mmu_pages)
4527{
4528 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
4529 return -EINVAL;
4530
4531 mutex_lock(&kvm->slots_lock);
4532
4533 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
4534 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
4535
4536 mutex_unlock(&kvm->slots_lock);
4537 return 0;
4538}
4539
4540static unsigned long kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
4541{
4542 return kvm->arch.n_max_mmu_pages;
4543}
4544
4545static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
4546{
4547 struct kvm_pic *pic = kvm->arch.vpic;
4548 int r;
4549
4550 r = 0;
4551 switch (chip->chip_id) {
4552 case KVM_IRQCHIP_PIC_MASTER:
4553 memcpy(&chip->chip.pic, &pic->pics[0],
4554 sizeof(struct kvm_pic_state));
4555 break;
4556 case KVM_IRQCHIP_PIC_SLAVE:
4557 memcpy(&chip->chip.pic, &pic->pics[1],
4558 sizeof(struct kvm_pic_state));
4559 break;
4560 case KVM_IRQCHIP_IOAPIC:
4561 kvm_get_ioapic(kvm, &chip->chip.ioapic);
4562 break;
4563 default:
4564 r = -EINVAL;
4565 break;
4566 }
4567 return r;
4568}
4569
4570static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
4571{
4572 struct kvm_pic *pic = kvm->arch.vpic;
4573 int r;
4574
4575 r = 0;
4576 switch (chip->chip_id) {
4577 case KVM_IRQCHIP_PIC_MASTER:
4578 spin_lock(&pic->lock);
4579 memcpy(&pic->pics[0], &chip->chip.pic,
4580 sizeof(struct kvm_pic_state));
4581 spin_unlock(&pic->lock);
4582 break;
4583 case KVM_IRQCHIP_PIC_SLAVE:
4584 spin_lock(&pic->lock);
4585 memcpy(&pic->pics[1], &chip->chip.pic,
4586 sizeof(struct kvm_pic_state));
4587 spin_unlock(&pic->lock);
4588 break;
4589 case KVM_IRQCHIP_IOAPIC:
4590 kvm_set_ioapic(kvm, &chip->chip.ioapic);
4591 break;
4592 default:
4593 r = -EINVAL;
4594 break;
4595 }
4596 kvm_pic_update_irq(pic);
4597 return r;
4598}
4599
4600static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
4601{
4602 struct kvm_kpit_state *kps = &kvm->arch.vpit->pit_state;
4603
4604 BUILD_BUG_ON(sizeof(*ps) != sizeof(kps->channels));
4605
4606 mutex_lock(&kps->lock);
4607 memcpy(ps, &kps->channels, sizeof(*ps));
4608 mutex_unlock(&kps->lock);
4609 return 0;
4610}
4611
4612static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
4613{
4614 int i;
4615 struct kvm_pit *pit = kvm->arch.vpit;
4616
4617 mutex_lock(&pit->pit_state.lock);
4618 memcpy(&pit->pit_state.channels, ps, sizeof(*ps));
4619 for (i = 0; i < 3; i++)
4620 kvm_pit_load_count(pit, i, ps->channels[i].count, 0);
4621 mutex_unlock(&pit->pit_state.lock);
4622 return 0;
4623}
4624
4625static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
4626{
4627 mutex_lock(&kvm->arch.vpit->pit_state.lock);
4628 memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
4629 sizeof(ps->channels));
4630 ps->flags = kvm->arch.vpit->pit_state.flags;
4631 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
4632 memset(&ps->reserved, 0, sizeof(ps->reserved));
4633 return 0;
4634}
4635
4636static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
4637{
4638 int start = 0;
4639 int i;
4640 u32 prev_legacy, cur_legacy;
4641 struct kvm_pit *pit = kvm->arch.vpit;
4642
4643 mutex_lock(&pit->pit_state.lock);
4644 prev_legacy = pit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
4645 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
4646 if (!prev_legacy && cur_legacy)
4647 start = 1;
4648 memcpy(&pit->pit_state.channels, &ps->channels,
4649 sizeof(pit->pit_state.channels));
4650 pit->pit_state.flags = ps->flags;
4651 for (i = 0; i < 3; i++)
4652 kvm_pit_load_count(pit, i, pit->pit_state.channels[i].count,
4653 start && i == 0);
4654 mutex_unlock(&pit->pit_state.lock);
4655 return 0;
4656}
4657
4658static int kvm_vm_ioctl_reinject(struct kvm *kvm,
4659 struct kvm_reinject_control *control)
4660{
4661 struct kvm_pit *pit = kvm->arch.vpit;
4662
4663 if (!pit)
4664 return -ENXIO;
4665
4666
4667
4668
4669
4670 mutex_lock(&pit->pit_state.lock);
4671 kvm_pit_set_reinject(pit, control->pit_reinject);
4672 mutex_unlock(&pit->pit_state.lock);
4673
4674 return 0;
4675}
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
4697{
4698 bool flush = false;
4699 int r;
4700
4701 mutex_lock(&kvm->slots_lock);
4702
4703
4704
4705
4706 if (kvm_x86_ops->flush_log_dirty)
4707 kvm_x86_ops->flush_log_dirty(kvm);
4708
4709 r = kvm_get_dirty_log_protect(kvm, log, &flush);
4710
4711
4712
4713
4714
4715 lockdep_assert_held(&kvm->slots_lock);
4716 if (flush)
4717 kvm_flush_remote_tlbs(kvm);
4718
4719 mutex_unlock(&kvm->slots_lock);
4720 return r;
4721}
4722
4723int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log)
4724{
4725 bool flush = false;
4726 int r;
4727
4728 mutex_lock(&kvm->slots_lock);
4729
4730
4731
4732
4733 if (kvm_x86_ops->flush_log_dirty)
4734 kvm_x86_ops->flush_log_dirty(kvm);
4735
4736 r = kvm_clear_dirty_log_protect(kvm, log, &flush);
4737
4738
4739
4740
4741
4742 lockdep_assert_held(&kvm->slots_lock);
4743 if (flush)
4744 kvm_flush_remote_tlbs(kvm);
4745
4746 mutex_unlock(&kvm->slots_lock);
4747 return r;
4748}
4749
4750int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
4751 bool line_status)
4752{
4753 if (!irqchip_in_kernel(kvm))
4754 return -ENXIO;
4755
4756 irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
4757 irq_event->irq, irq_event->level,
4758 line_status);
4759 return 0;
4760}
4761
4762int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
4763 struct kvm_enable_cap *cap)
4764{
4765 int r;
4766
4767 if (cap->flags)
4768 return -EINVAL;
4769
4770 switch (cap->cap) {
4771 case KVM_CAP_DISABLE_QUIRKS:
4772 kvm->arch.disabled_quirks = cap->args[0];
4773 r = 0;
4774 break;
4775 case KVM_CAP_SPLIT_IRQCHIP: {
4776 mutex_lock(&kvm->lock);
4777 r = -EINVAL;
4778 if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS)
4779 goto split_irqchip_unlock;
4780 r = -EEXIST;
4781 if (irqchip_in_kernel(kvm))
4782 goto split_irqchip_unlock;
4783 if (kvm->created_vcpus)
4784 goto split_irqchip_unlock;
4785 r = kvm_setup_empty_irq_routing(kvm);
4786 if (r)
4787 goto split_irqchip_unlock;
4788
4789 smp_wmb();
4790 kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT;
4791 kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
4792 r = 0;
4793split_irqchip_unlock:
4794 mutex_unlock(&kvm->lock);
4795 break;
4796 }
4797 case KVM_CAP_X2APIC_API:
4798 r = -EINVAL;
4799 if (cap->args[0] & ~KVM_X2APIC_API_VALID_FLAGS)
4800 break;
4801
4802 if (cap->args[0] & KVM_X2APIC_API_USE_32BIT_IDS)
4803 kvm->arch.x2apic_format = true;
4804 if (cap->args[0] & KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
4805 kvm->arch.x2apic_broadcast_quirk_disabled = true;
4806
4807 r = 0;
4808 break;
4809 case KVM_CAP_X86_DISABLE_EXITS:
4810 r = -EINVAL;
4811 if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS)
4812 break;
4813
4814 if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
4815 kvm_can_mwait_in_guest())
4816 kvm->arch.mwait_in_guest = true;
4817 if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT)
4818 kvm->arch.hlt_in_guest = true;
4819 if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
4820 kvm->arch.pause_in_guest = true;
4821 if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE)
4822 kvm->arch.cstate_in_guest = true;
4823 r = 0;
4824 break;
4825 case KVM_CAP_MSR_PLATFORM_INFO:
4826 kvm->arch.guest_can_read_msr_platform_info = cap->args[0];
4827 r = 0;
4828 break;
4829 case KVM_CAP_EXCEPTION_PAYLOAD:
4830 kvm->arch.exception_payload_enabled = cap->args[0];
4831 r = 0;
4832 break;
4833 default:
4834 r = -EINVAL;
4835 break;
4836 }
4837 return r;
4838}
4839
4840long kvm_arch_vm_ioctl(struct file *filp,
4841 unsigned int ioctl, unsigned long arg)
4842{
4843 struct kvm *kvm = filp->private_data;
4844 void __user *argp = (void __user *)arg;
4845 int r = -ENOTTY;
4846
4847
4848
4849
4850
4851 union {
4852 struct kvm_pit_state ps;
4853 struct kvm_pit_state2 ps2;
4854 struct kvm_pit_config pit_config;
4855 } u;
4856
4857 switch (ioctl) {
4858 case KVM_SET_TSS_ADDR:
4859 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
4860 break;
4861 case KVM_SET_IDENTITY_MAP_ADDR: {
4862 u64 ident_addr;
4863
4864 mutex_lock(&kvm->lock);
4865 r = -EINVAL;
4866 if (kvm->created_vcpus)
4867 goto set_identity_unlock;
4868 r = -EFAULT;
4869 if (copy_from_user(&ident_addr, argp, sizeof(ident_addr)))
4870 goto set_identity_unlock;
4871 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
4872set_identity_unlock:
4873 mutex_unlock(&kvm->lock);
4874 break;
4875 }
4876 case KVM_SET_NR_MMU_PAGES:
4877 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
4878 break;
4879 case KVM_GET_NR_MMU_PAGES:
4880 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
4881 break;
4882 case KVM_CREATE_IRQCHIP: {
4883 mutex_lock(&kvm->lock);
4884
4885 r = -EEXIST;
4886 if (irqchip_in_kernel(kvm))
4887 goto create_irqchip_unlock;
4888
4889 r = -EINVAL;
4890 if (kvm->created_vcpus)
4891 goto create_irqchip_unlock;
4892
4893 r = kvm_pic_init(kvm);
4894 if (r)
4895 goto create_irqchip_unlock;
4896
4897 r = kvm_ioapic_init(kvm);
4898 if (r) {
4899 kvm_pic_destroy(kvm);
4900 goto create_irqchip_unlock;
4901 }
4902
4903 r = kvm_setup_default_irq_routing(kvm);
4904 if (r) {
4905 kvm_ioapic_destroy(kvm);
4906 kvm_pic_destroy(kvm);
4907 goto create_irqchip_unlock;
4908 }
4909
4910 smp_wmb();
4911 kvm->arch.irqchip_mode = KVM_IRQCHIP_KERNEL;
4912 create_irqchip_unlock:
4913 mutex_unlock(&kvm->lock);
4914 break;
4915 }
4916 case KVM_CREATE_PIT:
4917 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
4918 goto create_pit;
4919 case KVM_CREATE_PIT2:
4920 r = -EFAULT;
4921 if (copy_from_user(&u.pit_config, argp,
4922 sizeof(struct kvm_pit_config)))
4923 goto out;
4924 create_pit:
4925 mutex_lock(&kvm->lock);
4926 r = -EEXIST;
4927 if (kvm->arch.vpit)
4928 goto create_pit_unlock;
4929 r = -ENOMEM;
4930 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
4931 if (kvm->arch.vpit)
4932 r = 0;
4933 create_pit_unlock:
4934 mutex_unlock(&kvm->lock);
4935 break;
4936 case KVM_GET_IRQCHIP: {
4937
4938 struct kvm_irqchip *chip;
4939
4940 chip = memdup_user(argp, sizeof(*chip));
4941 if (IS_ERR(chip)) {
4942 r = PTR_ERR(chip);
4943 goto out;
4944 }
4945
4946 r = -ENXIO;
4947 if (!irqchip_kernel(kvm))
4948 goto get_irqchip_out;
4949 r = kvm_vm_ioctl_get_irqchip(kvm, chip);
4950 if (r)
4951 goto get_irqchip_out;
4952 r = -EFAULT;
4953 if (copy_to_user(argp, chip, sizeof(*chip)))
4954 goto get_irqchip_out;
4955 r = 0;
4956 get_irqchip_out:
4957 kfree(chip);
4958 break;
4959 }
4960 case KVM_SET_IRQCHIP: {
4961
4962 struct kvm_irqchip *chip;
4963
4964 chip = memdup_user(argp, sizeof(*chip));
4965 if (IS_ERR(chip)) {
4966 r = PTR_ERR(chip);
4967 goto out;
4968 }
4969
4970 r = -ENXIO;
4971 if (!irqchip_kernel(kvm))
4972 goto set_irqchip_out;
4973 r = kvm_vm_ioctl_set_irqchip(kvm, chip);
4974 set_irqchip_out:
4975 kfree(chip);
4976 break;
4977 }
4978 case KVM_GET_PIT: {
4979 r = -EFAULT;
4980 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
4981 goto out;
4982 r = -ENXIO;
4983 if (!kvm->arch.vpit)
4984 goto out;
4985 r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
4986 if (r)
4987 goto out;
4988 r = -EFAULT;
4989 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
4990 goto out;
4991 r = 0;
4992 break;
4993 }
4994 case KVM_SET_PIT: {
4995 r = -EFAULT;
4996 if (copy_from_user(&u.ps, argp, sizeof(u.ps)))
4997 goto out;
4998 r = -ENXIO;
4999 if (!kvm->arch.vpit)
5000 goto out;
5001 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
5002 break;
5003 }
5004 case KVM_GET_PIT2: {
5005 r = -ENXIO;
5006 if (!kvm->arch.vpit)
5007 goto out;
5008 r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
5009 if (r)
5010 goto out;
5011 r = -EFAULT;
5012 if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
5013 goto out;
5014 r = 0;
5015 break;
5016 }
5017 case KVM_SET_PIT2: {
5018 r = -EFAULT;
5019 if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
5020 goto out;
5021 r = -ENXIO;
5022 if (!kvm->arch.vpit)
5023 goto out;
5024 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
5025 break;
5026 }
5027 case KVM_REINJECT_CONTROL: {
5028 struct kvm_reinject_control control;
5029 r = -EFAULT;
5030 if (copy_from_user(&control, argp, sizeof(control)))
5031 goto out;
5032 r = kvm_vm_ioctl_reinject(kvm, &control);
5033 break;
5034 }
5035 case KVM_SET_BOOT_CPU_ID:
5036 r = 0;
5037 mutex_lock(&kvm->lock);
5038 if (kvm->created_vcpus)
5039 r = -EBUSY;
5040 else
5041 kvm->arch.bsp_vcpu_id = arg;
5042 mutex_unlock(&kvm->lock);
5043 break;
5044 case KVM_XEN_HVM_CONFIG: {
5045 struct kvm_xen_hvm_config xhc;
5046 r = -EFAULT;
5047 if (copy_from_user(&xhc, argp, sizeof(xhc)))
5048 goto out;
5049 r = -EINVAL;
5050 if (xhc.flags)
5051 goto out;
5052 memcpy(&kvm->arch.xen_hvm_config, &xhc, sizeof(xhc));
5053 r = 0;
5054 break;
5055 }
5056 case KVM_SET_CLOCK: {
5057 struct kvm_clock_data user_ns;
5058 u64 now_ns;
5059
5060 r = -EFAULT;
5061 if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
5062 goto out;
5063
5064 r = -EINVAL;
5065 if (user_ns.flags)
5066 goto out;
5067
5068 r = 0;
5069
5070
5071
5072
5073
5074 kvm_gen_update_masterclock(kvm);
5075 now_ns = get_kvmclock_ns(kvm);
5076 kvm->arch.kvmclock_offset += user_ns.clock - now_ns;
5077 kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
5078 break;
5079 }
5080 case KVM_GET_CLOCK: {
5081 struct kvm_clock_data user_ns;
5082 u64 now_ns;
5083
5084 now_ns = get_kvmclock_ns(kvm);
5085 user_ns.clock = now_ns;
5086 user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
5087 memset(&user_ns.pad, 0, sizeof(user_ns.pad));
5088
5089 r = -EFAULT;
5090 if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
5091 goto out;
5092 r = 0;
5093 break;
5094 }
5095 case KVM_MEMORY_ENCRYPT_OP: {
5096 r = -ENOTTY;
5097 if (kvm_x86_ops->mem_enc_op)
5098 r = kvm_x86_ops->mem_enc_op(kvm, argp);
5099 break;
5100 }
5101 case KVM_MEMORY_ENCRYPT_REG_REGION: {
5102 struct kvm_enc_region region;
5103
5104 r = -EFAULT;
5105 if (copy_from_user(®ion, argp, sizeof(region)))
5106 goto out;
5107
5108 r = -ENOTTY;
5109 if (kvm_x86_ops->mem_enc_reg_region)
5110 r = kvm_x86_ops->mem_enc_reg_region(kvm, ®ion);
5111 break;
5112 }
5113 case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
5114 struct kvm_enc_region region;
5115
5116 r = -EFAULT;
5117 if (copy_from_user(®ion, argp, sizeof(region)))
5118 goto out;
5119
5120 r = -ENOTTY;
5121 if (kvm_x86_ops->mem_enc_unreg_region)
5122 r = kvm_x86_ops->mem_enc_unreg_region(kvm, ®ion);
5123 break;
5124 }
5125 case KVM_HYPERV_EVENTFD: {
5126 struct kvm_hyperv_eventfd hvevfd;
5127
5128 r = -EFAULT;
5129 if (copy_from_user(&hvevfd, argp, sizeof(hvevfd)))
5130 goto out;
5131 r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd);
5132 break;
5133 }
5134 case KVM_SET_PMU_EVENT_FILTER:
5135 r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
5136 break;
5137 default:
5138 r = -ENOTTY;
5139 }
5140out:
5141 return r;
5142}
5143
5144static void kvm_init_msr_list(void)
5145{
5146 struct x86_pmu_capability x86_pmu;
5147 u32 dummy[2];
5148 unsigned i;
5149
5150 BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4,
5151 "Please update the fixed PMCs in msrs_to_saved_all[]");
5152
5153 perf_get_x86_pmu_capability(&x86_pmu);
5154
5155 num_msrs_to_save = 0;
5156 num_emulated_msrs = 0;
5157 num_msr_based_features = 0;
5158
5159 for (i = 0; i < ARRAY_SIZE(msrs_to_save_all); i++) {
5160 if (rdmsr_safe(msrs_to_save_all[i], &dummy[0], &dummy[1]) < 0)
5161 continue;
5162
5163
5164
5165
5166
5167 switch (msrs_to_save_all[i]) {
5168 case MSR_IA32_BNDCFGS:
5169 if (!kvm_mpx_supported())
5170 continue;
5171 break;
5172 case MSR_TSC_AUX:
5173 if (!kvm_x86_ops->rdtscp_supported())
5174 continue;
5175 break;
5176 case MSR_IA32_RTIT_CTL:
5177 case MSR_IA32_RTIT_STATUS:
5178 if (!kvm_x86_ops->pt_supported())
5179 continue;
5180 break;
5181 case MSR_IA32_RTIT_CR3_MATCH:
5182 if (!kvm_x86_ops->pt_supported() ||
5183 !intel_pt_validate_hw_cap(PT_CAP_cr3_filtering))
5184 continue;
5185 break;
5186 case MSR_IA32_RTIT_OUTPUT_BASE:
5187 case MSR_IA32_RTIT_OUTPUT_MASK:
5188 if (!kvm_x86_ops->pt_supported() ||
5189 (!intel_pt_validate_hw_cap(PT_CAP_topa_output) &&
5190 !intel_pt_validate_hw_cap(PT_CAP_single_range_output)))
5191 continue;
5192 break;
5193 case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: {
5194 if (!kvm_x86_ops->pt_supported() ||
5195 msrs_to_save_all[i] - MSR_IA32_RTIT_ADDR0_A >=
5196 intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
5197 continue;
5198 break;
5199 case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17:
5200 if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
5201 min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
5202 continue;
5203 break;
5204 case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17:
5205 if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
5206 min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
5207 continue;
5208 }
5209 default:
5210 break;
5211 }
5212
5213 msrs_to_save[num_msrs_to_save++] = msrs_to_save_all[i];
5214 }
5215
5216 for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) {
5217 if (!kvm_x86_ops->has_emulated_msr(emulated_msrs_all[i]))
5218 continue;
5219
5220 emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i];
5221 }
5222
5223 for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) {
5224 struct kvm_msr_entry msr;
5225
5226 msr.index = msr_based_features_all[i];
5227 if (kvm_get_msr_feature(&msr))
5228 continue;
5229
5230 msr_based_features[num_msr_based_features++] = msr_based_features_all[i];
5231 }
5232}
5233
5234static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
5235 const void *v)
5236{
5237 int handled = 0;
5238 int n;
5239
5240 do {
5241 n = min(len, 8);
5242 if (!(lapic_in_kernel(vcpu) &&
5243 !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v))
5244 && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v))
5245 break;
5246 handled += n;
5247 addr += n;
5248 len -= n;
5249 v += n;
5250 } while (len);
5251
5252 return handled;
5253}
5254
5255static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
5256{
5257 int handled = 0;
5258 int n;
5259
5260 do {
5261 n = min(len, 8);
5262 if (!(lapic_in_kernel(vcpu) &&
5263 !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev,
5264 addr, n, v))
5265 && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
5266 break;
5267 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
5268 handled += n;
5269 addr += n;
5270 len -= n;
5271 v += n;
5272 } while (len);
5273
5274 return handled;
5275}
5276
5277static void kvm_set_segment(struct kvm_vcpu *vcpu,
5278 struct kvm_segment *var, int seg)
5279{
5280 kvm_x86_ops->set_segment(vcpu, var, seg);
5281}
5282
5283void kvm_get_segment(struct kvm_vcpu *vcpu,
5284 struct kvm_segment *var, int seg)
5285{
5286 kvm_x86_ops->get_segment(vcpu, var, seg);
5287}
5288
5289gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
5290 struct x86_exception *exception)
5291{
5292 gpa_t t_gpa;
5293
5294 BUG_ON(!mmu_is_nested(vcpu));
5295
5296
5297 access |= PFERR_USER_MASK;
5298 t_gpa = vcpu->arch.mmu->gva_to_gpa(vcpu, gpa, access, exception);
5299
5300 return t_gpa;
5301}
5302
5303gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
5304 struct x86_exception *exception)
5305{
5306 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
5307 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
5308}
5309
5310 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
5311 struct x86_exception *exception)
5312{
5313 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
5314 access |= PFERR_FETCH_MASK;
5315 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
5316}
5317
5318gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
5319 struct x86_exception *exception)
5320{
5321 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
5322 access |= PFERR_WRITE_MASK;
5323 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
5324}
5325
5326
5327gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
5328 struct x86_exception *exception)
5329{
5330 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
5331}
5332
5333static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
5334 struct kvm_vcpu *vcpu, u32 access,
5335 struct x86_exception *exception)
5336{
5337 void *data = val;
5338 int r = X86EMUL_CONTINUE;
5339
5340 while (bytes) {
5341 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access,
5342 exception);
5343 unsigned offset = addr & (PAGE_SIZE-1);
5344 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
5345 int ret;
5346
5347 if (gpa == UNMAPPED_GVA)
5348 return X86EMUL_PROPAGATE_FAULT;
5349 ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, data,
5350 offset, toread);
5351 if (ret < 0) {
5352 r = X86EMUL_IO_NEEDED;
5353 goto out;
5354 }
5355
5356 bytes -= toread;
5357 data += toread;
5358 addr += toread;
5359 }
5360out:
5361 return r;
5362}
5363
5364
5365static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
5366 gva_t addr, void *val, unsigned int bytes,
5367 struct x86_exception *exception)
5368{
5369 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5370 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
5371 unsigned offset;
5372 int ret;
5373
5374
5375 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access|PFERR_FETCH_MASK,
5376 exception);
5377 if (unlikely(gpa == UNMAPPED_GVA))
5378 return X86EMUL_PROPAGATE_FAULT;
5379
5380 offset = addr & (PAGE_SIZE-1);
5381 if (WARN_ON(offset + bytes > PAGE_SIZE))
5382 bytes = (unsigned)PAGE_SIZE - offset;
5383 ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, val,
5384 offset, bytes);
5385 if (unlikely(ret < 0))
5386 return X86EMUL_IO_NEEDED;
5387
5388 return X86EMUL_CONTINUE;
5389}
5390
5391int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
5392 gva_t addr, void *val, unsigned int bytes,
5393 struct x86_exception *exception)
5394{
5395 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
5396
5397
5398
5399
5400
5401
5402
5403 memset(exception, 0, sizeof(*exception));
5404 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
5405 exception);
5406}
5407EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
5408
5409static int emulator_read_std(struct x86_emulate_ctxt *ctxt,
5410 gva_t addr, void *val, unsigned int bytes,
5411 struct x86_exception *exception, bool system)
5412{
5413 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5414 u32 access = 0;
5415
5416 if (!system && kvm_x86_ops->get_cpl(vcpu) == 3)
5417 access |= PFERR_USER_MASK;
5418
5419 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception);
5420}
5421
5422static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
5423 unsigned long addr, void *val, unsigned int bytes)
5424{
5425 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5426 int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes);
5427
5428 return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
5429}
5430
5431static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
5432 struct kvm_vcpu *vcpu, u32 access,
5433 struct x86_exception *exception)
5434{
5435 void *data = val;
5436 int r = X86EMUL_CONTINUE;
5437
5438 while (bytes) {
5439 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
5440 access,
5441 exception);
5442 unsigned offset = addr & (PAGE_SIZE-1);
5443 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
5444 int ret;
5445
5446 if (gpa == UNMAPPED_GVA)
5447 return X86EMUL_PROPAGATE_FAULT;
5448 ret = kvm_vcpu_write_guest(vcpu, gpa, data, towrite);
5449 if (ret < 0) {
5450 r = X86EMUL_IO_NEEDED;
5451 goto out;
5452 }
5453
5454 bytes -= towrite;
5455 data += towrite;
5456 addr += towrite;
5457 }
5458out:
5459 return r;
5460}
5461
5462static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val,
5463 unsigned int bytes, struct x86_exception *exception,
5464 bool system)
5465{
5466 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5467 u32 access = PFERR_WRITE_MASK;
5468
5469 if (!system && kvm_x86_ops->get_cpl(vcpu) == 3)
5470 access |= PFERR_USER_MASK;
5471
5472 return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
5473 access, exception);
5474}
5475
5476int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
5477 unsigned int bytes, struct x86_exception *exception)
5478{
5479
5480 vcpu->arch.l1tf_flush_l1d = true;
5481
5482
5483
5484
5485
5486
5487
5488 memset(exception, 0, sizeof(*exception));
5489 return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
5490 PFERR_WRITE_MASK, exception);
5491}
5492EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
5493
5494int handle_ud(struct kvm_vcpu *vcpu)
5495{
5496 static const char kvm_emulate_prefix[] = { __KVM_EMULATE_PREFIX };
5497 int emul_type = EMULTYPE_TRAP_UD;
5498 char sig[5];
5499 struct x86_exception e;
5500
5501 if (force_emulation_prefix &&
5502 kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu),
5503 sig, sizeof(sig), &e) == 0 &&
5504 memcmp(sig, kvm_emulate_prefix, sizeof(sig)) == 0) {
5505 kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
5506 emul_type = EMULTYPE_TRAP_UD_FORCED;
5507 }
5508
5509 return kvm_emulate_instruction(vcpu, emul_type);
5510}
5511EXPORT_SYMBOL_GPL(handle_ud);
5512
5513static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
5514 gpa_t gpa, bool write)
5515{
5516
5517 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
5518 return 1;
5519
5520 if (vcpu_match_mmio_gpa(vcpu, gpa)) {
5521 trace_vcpu_match_mmio(gva, gpa, write, true);
5522 return 1;
5523 }
5524
5525 return 0;
5526}
5527
5528static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
5529 gpa_t *gpa, struct x86_exception *exception,
5530 bool write)
5531{
5532 u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
5533 | (write ? PFERR_WRITE_MASK : 0);
5534
5535
5536
5537
5538
5539
5540 if (vcpu_match_mmio_gva(vcpu, gva)
5541 && !permission_fault(vcpu, vcpu->arch.walk_mmu,
5542 vcpu->arch.mmio_access, 0, access)) {
5543 *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
5544 (gva & (PAGE_SIZE - 1));
5545 trace_vcpu_match_mmio(gva, *gpa, write, false);
5546 return 1;
5547 }
5548
5549 *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
5550
5551 if (*gpa == UNMAPPED_GVA)
5552 return -1;
5553
5554 return vcpu_is_mmio_gpa(vcpu, gva, *gpa, write);
5555}
5556
5557int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
5558 const void *val, int bytes)
5559{
5560 int ret;
5561
5562 ret = kvm_vcpu_write_guest(vcpu, gpa, val, bytes);
5563 if (ret < 0)
5564 return 0;
5565 kvm_page_track_write(vcpu, gpa, val, bytes);
5566 return 1;
5567}
5568
5569struct read_write_emulator_ops {
5570 int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
5571 int bytes);
5572 int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
5573 void *val, int bytes);
5574 int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
5575 int bytes, void *val);
5576 int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
5577 void *val, int bytes);
5578 bool write;
5579};
5580
5581static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
5582{
5583 if (vcpu->mmio_read_completed) {
5584 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
5585 vcpu->mmio_fragments[0].gpa, val);
5586 vcpu->mmio_read_completed = 0;
5587 return 1;
5588 }
5589
5590 return 0;
5591}
5592
5593static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
5594 void *val, int bytes)
5595{
5596 return !kvm_vcpu_read_guest(vcpu, gpa, val, bytes);
5597}
5598
5599static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
5600 void *val, int bytes)
5601{
5602 return emulator_write_phys(vcpu, gpa, val, bytes);
5603}
5604
5605static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
5606{
5607 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
5608 return vcpu_mmio_write(vcpu, gpa, bytes, val);
5609}
5610
5611static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
5612 void *val, int bytes)
5613{
5614 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
5615 return X86EMUL_IO_NEEDED;
5616}
5617
5618static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
5619 void *val, int bytes)
5620{
5621 struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
5622
5623 memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
5624 return X86EMUL_CONTINUE;
5625}
5626
5627static const struct read_write_emulator_ops read_emultor = {
5628 .read_write_prepare = read_prepare,
5629 .read_write_emulate = read_emulate,
5630 .read_write_mmio = vcpu_mmio_read,
5631 .read_write_exit_mmio = read_exit_mmio,
5632};
5633
5634static const struct read_write_emulator_ops write_emultor = {
5635 .read_write_emulate = write_emulate,
5636 .read_write_mmio = write_mmio,
5637 .read_write_exit_mmio = write_exit_mmio,
5638 .write = true,
5639};
5640
5641static int emulator_read_write_onepage(unsigned long addr, void *val,
5642 unsigned int bytes,
5643 struct x86_exception *exception,
5644 struct kvm_vcpu *vcpu,
5645 const struct read_write_emulator_ops *ops)
5646{
5647 gpa_t gpa;
5648 int handled, ret;
5649 bool write = ops->write;
5650 struct kvm_mmio_fragment *frag;
5651 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
5652
5653
5654
5655
5656
5657
5658
5659
5660 if (vcpu->arch.gpa_available &&
5661 emulator_can_use_gpa(ctxt) &&
5662 (addr & ~PAGE_MASK) == (vcpu->arch.gpa_val & ~PAGE_MASK)) {
5663 gpa = vcpu->arch.gpa_val;
5664 ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write);
5665 } else {
5666 ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
5667 if (ret < 0)
5668 return X86EMUL_PROPAGATE_FAULT;
5669 }
5670
5671 if (!ret && ops->read_write_emulate(vcpu, gpa, val, bytes))
5672 return X86EMUL_CONTINUE;
5673
5674
5675
5676
5677 handled = ops->read_write_mmio(vcpu, gpa, bytes, val);
5678 if (handled == bytes)
5679 return X86EMUL_CONTINUE;
5680
5681 gpa += handled;
5682 bytes -= handled;
5683 val += handled;
5684
5685 WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS);
5686 frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
5687 frag->gpa = gpa;
5688 frag->data = val;
5689 frag->len = bytes;
5690 return X86EMUL_CONTINUE;
5691}
5692
5693static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
5694 unsigned long addr,
5695 void *val, unsigned int bytes,
5696 struct x86_exception *exception,
5697 const struct read_write_emulator_ops *ops)
5698{
5699 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5700 gpa_t gpa;
5701 int rc;
5702
5703 if (ops->read_write_prepare &&
5704 ops->read_write_prepare(vcpu, val, bytes))
5705 return X86EMUL_CONTINUE;
5706
5707 vcpu->mmio_nr_fragments = 0;
5708
5709
5710 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
5711 int now;
5712
5713 now = -addr & ~PAGE_MASK;
5714 rc = emulator_read_write_onepage(addr, val, now, exception,
5715 vcpu, ops);
5716
5717 if (rc != X86EMUL_CONTINUE)
5718 return rc;
5719 addr += now;
5720 if (ctxt->mode != X86EMUL_MODE_PROT64)
5721 addr = (u32)addr;
5722 val += now;
5723 bytes -= now;
5724 }
5725
5726 rc = emulator_read_write_onepage(addr, val, bytes, exception,
5727 vcpu, ops);
5728 if (rc != X86EMUL_CONTINUE)
5729 return rc;
5730
5731 if (!vcpu->mmio_nr_fragments)
5732 return rc;
5733
5734 gpa = vcpu->mmio_fragments[0].gpa;
5735
5736 vcpu->mmio_needed = 1;
5737 vcpu->mmio_cur_fragment = 0;
5738
5739 vcpu->run->mmio.len = min(8u, vcpu->mmio_fragments[0].len);
5740 vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
5741 vcpu->run->exit_reason = KVM_EXIT_MMIO;
5742 vcpu->run->mmio.phys_addr = gpa;
5743
5744 return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
5745}
5746
5747static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
5748 unsigned long addr,
5749 void *val,
5750 unsigned int bytes,
5751 struct x86_exception *exception)
5752{
5753 return emulator_read_write(ctxt, addr, val, bytes,
5754 exception, &read_emultor);
5755}
5756
5757static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
5758 unsigned long addr,
5759 const void *val,
5760 unsigned int bytes,
5761 struct x86_exception *exception)
5762{
5763 return emulator_read_write(ctxt, addr, (void *)val, bytes,
5764 exception, &write_emultor);
5765}
5766
5767#define CMPXCHG_TYPE(t, ptr, old, new) \
5768 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
5769
5770#ifdef CONFIG_X86_64
5771# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
5772#else
5773# define CMPXCHG64(ptr, old, new) \
5774 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
5775#endif
5776
5777static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
5778 unsigned long addr,
5779 const void *old,
5780 const void *new,
5781 unsigned int bytes,
5782 struct x86_exception *exception)
5783{
5784 struct kvm_host_map map;
5785 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5786 gpa_t gpa;
5787 char *kaddr;
5788 bool exchanged;
5789
5790
5791 if (bytes > 8 || (bytes & (bytes - 1)))
5792 goto emul_write;
5793
5794 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
5795
5796 if (gpa == UNMAPPED_GVA ||
5797 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
5798 goto emul_write;
5799
5800 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
5801 goto emul_write;
5802
5803 if (kvm_vcpu_map(vcpu, gpa_to_gfn(gpa), &map))
5804 goto emul_write;
5805
5806 kaddr = map.hva + offset_in_page(gpa);
5807
5808 switch (bytes) {
5809 case 1:
5810 exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
5811 break;
5812 case 2:
5813 exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
5814 break;
5815 case 4:
5816 exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
5817 break;
5818 case 8:
5819 exchanged = CMPXCHG64(kaddr, old, new);
5820 break;
5821 default:
5822 BUG();
5823 }
5824
5825 kvm_vcpu_unmap(vcpu, &map, true);
5826
5827 if (!exchanged)
5828 return X86EMUL_CMPXCHG_FAILED;
5829
5830 kvm_page_track_write(vcpu, gpa, new, bytes);
5831
5832 return X86EMUL_CONTINUE;
5833
5834emul_write:
5835 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
5836
5837 return emulator_write_emulated(ctxt, addr, new, bytes, exception);
5838}
5839
5840static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
5841{
5842 int r = 0, i;
5843
5844 for (i = 0; i < vcpu->arch.pio.count; i++) {
5845 if (vcpu->arch.pio.in)
5846 r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
5847 vcpu->arch.pio.size, pd);
5848 else
5849 r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
5850 vcpu->arch.pio.port, vcpu->arch.pio.size,
5851 pd);
5852 if (r)
5853 break;
5854 pd += vcpu->arch.pio.size;
5855 }
5856 return r;
5857}
5858
5859static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
5860 unsigned short port, void *val,
5861 unsigned int count, bool in)
5862{
5863 vcpu->arch.pio.port = port;
5864 vcpu->arch.pio.in = in;
5865 vcpu->arch.pio.count = count;
5866 vcpu->arch.pio.size = size;
5867
5868 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
5869 vcpu->arch.pio.count = 0;
5870 return 1;
5871 }
5872
5873 vcpu->run->exit_reason = KVM_EXIT_IO;
5874 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
5875 vcpu->run->io.size = size;
5876 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
5877 vcpu->run->io.count = count;
5878 vcpu->run->io.port = port;
5879
5880 return 0;
5881}
5882
5883static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
5884 int size, unsigned short port, void *val,
5885 unsigned int count)
5886{
5887 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5888 int ret;
5889
5890 if (vcpu->arch.pio.count)
5891 goto data_avail;
5892
5893 memset(vcpu->arch.pio_data, 0, size * count);
5894
5895 ret = emulator_pio_in_out(vcpu, size, port, val, count, true);
5896 if (ret) {
5897data_avail:
5898 memcpy(val, vcpu->arch.pio_data, size * count);
5899 trace_kvm_pio(KVM_PIO_IN, port, size, count, vcpu->arch.pio_data);
5900 vcpu->arch.pio.count = 0;
5901 return 1;
5902 }
5903
5904 return 0;
5905}
5906
5907static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
5908 int size, unsigned short port,
5909 const void *val, unsigned int count)
5910{
5911 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5912
5913 memcpy(vcpu->arch.pio_data, val, size * count);
5914 trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data);
5915 return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
5916}
5917
5918static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
5919{
5920 return kvm_x86_ops->get_segment_base(vcpu, seg);
5921}
5922
5923static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
5924{
5925 kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
5926}
5927
5928static int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
5929{
5930 if (!need_emulate_wbinvd(vcpu))
5931 return X86EMUL_CONTINUE;
5932
5933 if (kvm_x86_ops->has_wbinvd_exit()) {
5934 int cpu = get_cpu();
5935
5936 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
5937 smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
5938 wbinvd_ipi, NULL, 1);
5939 put_cpu();
5940 cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
5941 } else
5942 wbinvd();
5943 return X86EMUL_CONTINUE;
5944}
5945
5946int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
5947{
5948 kvm_emulate_wbinvd_noskip(vcpu);
5949 return kvm_skip_emulated_instruction(vcpu);
5950}
5951EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
5952
5953
5954
5955static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
5956{
5957 kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
5958}
5959
5960static int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
5961 unsigned long *dest)
5962{
5963 return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
5964}
5965
5966static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
5967 unsigned long value)
5968{
5969
5970 return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
5971}
5972
5973static u64 mk_cr_64(u64 curr_cr, u32 new_val)
5974{
5975 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
5976}
5977
5978static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
5979{
5980 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5981 unsigned long value;
5982
5983 switch (cr) {
5984 case 0:
5985 value = kvm_read_cr0(vcpu);
5986 break;
5987 case 2:
5988 value = vcpu->arch.cr2;
5989 break;
5990 case 3:
5991 value = kvm_read_cr3(vcpu);
5992 break;
5993 case 4:
5994 value = kvm_read_cr4(vcpu);
5995 break;
5996 case 8:
5997 value = kvm_get_cr8(vcpu);
5998 break;
5999 default:
6000 kvm_err("%s: unexpected cr %u\n", __func__, cr);
6001 return 0;
6002 }
6003
6004 return value;
6005}
6006
6007static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
6008{
6009 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6010 int res = 0;
6011
6012 switch (cr) {
6013 case 0:
6014 res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
6015 break;
6016 case 2:
6017 vcpu->arch.cr2 = val;
6018 break;
6019 case 3:
6020 res = kvm_set_cr3(vcpu, val);
6021 break;
6022 case 4:
6023 res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
6024 break;
6025 case 8:
6026 res = kvm_set_cr8(vcpu, val);
6027 break;
6028 default:
6029 kvm_err("%s: unexpected cr %u\n", __func__, cr);
6030 res = -1;
6031 }
6032
6033 return res;
6034}
6035
6036static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
6037{
6038 return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
6039}
6040
6041static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
6042{
6043 kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt);
6044}
6045
6046static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
6047{
6048 kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt);
6049}
6050
6051static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
6052{
6053 kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt);
6054}
6055
6056static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
6057{
6058 kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt);
6059}
6060
6061static unsigned long emulator_get_cached_segment_base(
6062 struct x86_emulate_ctxt *ctxt, int seg)
6063{
6064 return get_segment_base(emul_to_vcpu(ctxt), seg);
6065}
6066
6067static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
6068 struct desc_struct *desc, u32 *base3,
6069 int seg)
6070{
6071 struct kvm_segment var;
6072
6073 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
6074 *selector = var.selector;
6075
6076 if (var.unusable) {
6077 memset(desc, 0, sizeof(*desc));
6078 if (base3)
6079 *base3 = 0;
6080 return false;
6081 }
6082
6083 if (var.g)
6084 var.limit >>= 12;
6085 set_desc_limit(desc, var.limit);
6086 set_desc_base(desc, (unsigned long)var.base);
6087#ifdef CONFIG_X86_64
6088 if (base3)
6089 *base3 = var.base >> 32;
6090#endif
6091 desc->type = var.type;
6092 desc->s = var.s;
6093 desc->dpl = var.dpl;
6094 desc->p = var.present;
6095 desc->avl = var.avl;
6096 desc->l = var.l;
6097 desc->d = var.db;
6098 desc->g = var.g;
6099
6100 return true;
6101}
6102
6103static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
6104 struct desc_struct *desc, u32 base3,
6105 int seg)
6106{
6107 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6108 struct kvm_segment var;
6109
6110 var.selector = selector;
6111 var.base = get_desc_base(desc);
6112#ifdef CONFIG_X86_64
6113 var.base |= ((u64)base3) << 32;
6114#endif
6115 var.limit = get_desc_limit(desc);
6116 if (desc->g)
6117 var.limit = (var.limit << 12) | 0xfff;
6118 var.type = desc->type;
6119 var.dpl = desc->dpl;
6120 var.db = desc->d;
6121 var.s = desc->s;
6122 var.l = desc->l;
6123 var.g = desc->g;
6124 var.avl = desc->avl;
6125 var.present = desc->p;
6126 var.unusable = !var.present;
6127 var.padding = 0;
6128
6129 kvm_set_segment(vcpu, &var, seg);
6130 return;
6131}
6132
6133static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
6134 u32 msr_index, u64 *pdata)
6135{
6136 return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
6137}
6138
6139static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
6140 u32 msr_index, u64 data)
6141{
6142 return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);
6143}
6144
6145static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt)
6146{
6147 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6148
6149 return vcpu->arch.smbase;
6150}
6151
6152static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
6153{
6154 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6155
6156 vcpu->arch.smbase = smbase;
6157}
6158
6159static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
6160 u32 pmc)
6161{
6162 return kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc);
6163}
6164
6165static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
6166 u32 pmc, u64 *pdata)
6167{
6168 return kvm_pmu_rdpmc(emul_to_vcpu(ctxt), pmc, pdata);
6169}
6170
6171static void emulator_halt(struct x86_emulate_ctxt *ctxt)
6172{
6173 emul_to_vcpu(ctxt)->arch.halt_request = 1;
6174}
6175
6176static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
6177 struct x86_instruction_info *info,
6178 enum x86_intercept_stage stage)
6179{
6180 return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
6181}
6182
6183static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
6184 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool check_limit)
6185{
6186 return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, check_limit);
6187}
6188
6189static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
6190{
6191 return kvm_register_read(emul_to_vcpu(ctxt), reg);
6192}
6193
6194static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
6195{
6196 kvm_register_write(emul_to_vcpu(ctxt), reg, val);
6197}
6198
6199static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked)
6200{
6201 kvm_x86_ops->set_nmi_mask(emul_to_vcpu(ctxt), masked);
6202}
6203
6204static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
6205{
6206 return emul_to_vcpu(ctxt)->arch.hflags;
6207}
6208
6209static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
6210{
6211 emul_to_vcpu(ctxt)->arch.hflags = emul_flags;
6212}
6213
6214static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
6215 const char *smstate)
6216{
6217 return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smstate);
6218}
6219
6220static void emulator_post_leave_smm(struct x86_emulate_ctxt *ctxt)
6221{
6222 kvm_smm_changed(emul_to_vcpu(ctxt));
6223}
6224
6225static int emulator_set_xcr(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr)
6226{
6227 return __kvm_set_xcr(emul_to_vcpu(ctxt), index, xcr);
6228}
6229
6230static const struct x86_emulate_ops emulate_ops = {
6231 .read_gpr = emulator_read_gpr,
6232 .write_gpr = emulator_write_gpr,
6233 .read_std = emulator_read_std,
6234 .write_std = emulator_write_std,
6235 .read_phys = kvm_read_guest_phys_system,
6236 .fetch = kvm_fetch_guest_virt,
6237 .read_emulated = emulator_read_emulated,
6238 .write_emulated = emulator_write_emulated,
6239 .cmpxchg_emulated = emulator_cmpxchg_emulated,
6240 .invlpg = emulator_invlpg,
6241 .pio_in_emulated = emulator_pio_in_emulated,
6242 .pio_out_emulated = emulator_pio_out_emulated,
6243 .get_segment = emulator_get_segment,
6244 .set_segment = emulator_set_segment,
6245 .get_cached_segment_base = emulator_get_cached_segment_base,
6246 .get_gdt = emulator_get_gdt,
6247 .get_idt = emulator_get_idt,
6248 .set_gdt = emulator_set_gdt,
6249 .set_idt = emulator_set_idt,
6250 .get_cr = emulator_get_cr,
6251 .set_cr = emulator_set_cr,
6252 .cpl = emulator_get_cpl,
6253 .get_dr = emulator_get_dr,
6254 .set_dr = emulator_set_dr,
6255 .get_smbase = emulator_get_smbase,
6256 .set_smbase = emulator_set_smbase,
6257 .set_msr = emulator_set_msr,
6258 .get_msr = emulator_get_msr,
6259 .check_pmc = emulator_check_pmc,
6260 .read_pmc = emulator_read_pmc,
6261 .halt = emulator_halt,
6262 .wbinvd = emulator_wbinvd,
6263 .fix_hypercall = emulator_fix_hypercall,
6264 .intercept = emulator_intercept,
6265 .get_cpuid = emulator_get_cpuid,
6266 .set_nmi_mask = emulator_set_nmi_mask,
6267 .get_hflags = emulator_get_hflags,
6268 .set_hflags = emulator_set_hflags,
6269 .pre_leave_smm = emulator_pre_leave_smm,
6270 .post_leave_smm = emulator_post_leave_smm,
6271 .set_xcr = emulator_set_xcr,
6272};
6273
6274static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
6275{
6276 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
6277
6278
6279
6280
6281
6282
6283
6284 if (int_shadow & mask)
6285 mask = 0;
6286 if (unlikely(int_shadow || mask)) {
6287 kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
6288 if (!mask)
6289 kvm_make_request(KVM_REQ_EVENT, vcpu);
6290 }
6291}
6292
6293static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
6294{
6295 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
6296 if (ctxt->exception.vector == PF_VECTOR)
6297 return kvm_propagate_fault(vcpu, &ctxt->exception);
6298
6299 if (ctxt->exception.error_code_valid)
6300 kvm_queue_exception_e(vcpu, ctxt->exception.vector,
6301 ctxt->exception.error_code);
6302 else
6303 kvm_queue_exception(vcpu, ctxt->exception.vector);
6304 return false;
6305}
6306
6307static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
6308{
6309 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
6310 int cs_db, cs_l;
6311
6312 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
6313
6314 ctxt->eflags = kvm_get_rflags(vcpu);
6315 ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
6316
6317 ctxt->eip = kvm_rip_read(vcpu);
6318 ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
6319 (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
6320 (cs_l && is_long_mode(vcpu)) ? X86EMUL_MODE_PROT64 :
6321 cs_db ? X86EMUL_MODE_PROT32 :
6322 X86EMUL_MODE_PROT16;
6323 BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK);
6324 BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
6325 BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
6326
6327 init_decode_cache(ctxt);
6328 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
6329}
6330
6331void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
6332{
6333 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
6334 int ret;
6335
6336 init_emulate_ctxt(vcpu);
6337
6338 ctxt->op_bytes = 2;
6339 ctxt->ad_bytes = 2;
6340 ctxt->_eip = ctxt->eip + inc_eip;
6341 ret = emulate_int_real(ctxt, irq);
6342
6343 if (ret != X86EMUL_CONTINUE) {
6344 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
6345 } else {
6346 ctxt->eip = ctxt->_eip;
6347 kvm_rip_write(vcpu, ctxt->eip);
6348 kvm_set_rflags(vcpu, ctxt->eflags);
6349 }
6350}
6351EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
6352
6353static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
6354{
6355 ++vcpu->stat.insn_emulation_fail;
6356 trace_kvm_emulate_insn_failed(vcpu);
6357
6358 if (emulation_type & EMULTYPE_VMWARE_GP) {
6359 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
6360 return 1;
6361 }
6362
6363 if (emulation_type & EMULTYPE_SKIP) {
6364 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
6365 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
6366 vcpu->run->internal.ndata = 0;
6367 return 0;
6368 }
6369
6370 kvm_queue_exception(vcpu, UD_VECTOR);
6371
6372 if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
6373 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
6374 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
6375 vcpu->run->internal.ndata = 0;
6376 return 0;
6377 }
6378
6379 return 1;
6380}
6381
6382static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
6383 bool write_fault_to_shadow_pgtable,
6384 int emulation_type)
6385{
6386 gpa_t gpa = cr2;
6387 kvm_pfn_t pfn;
6388
6389 if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
6390 return false;
6391
6392 if (WARN_ON_ONCE(is_guest_mode(vcpu)))
6393 return false;
6394
6395 if (!vcpu->arch.mmu->direct_map) {
6396
6397
6398
6399
6400 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
6401
6402
6403
6404
6405
6406 if (gpa == UNMAPPED_GVA)
6407 return true;
6408 }
6409
6410
6411
6412
6413
6414
6415
6416 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
6417
6418
6419
6420
6421
6422 if (is_error_noslot_pfn(pfn))
6423 return false;
6424
6425 kvm_release_pfn_clean(pfn);
6426
6427
6428 if (vcpu->arch.mmu->direct_map) {
6429 unsigned int indirect_shadow_pages;
6430
6431 spin_lock(&vcpu->kvm->mmu_lock);
6432 indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
6433 spin_unlock(&vcpu->kvm->mmu_lock);
6434
6435 if (indirect_shadow_pages)
6436 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
6437
6438 return true;
6439 }
6440
6441
6442
6443
6444
6445
6446 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
6447
6448
6449
6450
6451
6452
6453 return !write_fault_to_shadow_pgtable;
6454}
6455
6456static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
6457 unsigned long cr2, int emulation_type)
6458{
6459 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6460 unsigned long last_retry_eip, last_retry_addr, gpa = cr2;
6461
6462 last_retry_eip = vcpu->arch.last_retry_eip;
6463 last_retry_addr = vcpu->arch.last_retry_addr;
6464
6465
6466
6467
6468
6469
6470
6471
6472
6473
6474
6475
6476
6477
6478 vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
6479
6480 if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
6481 return false;
6482
6483 if (WARN_ON_ONCE(is_guest_mode(vcpu)))
6484 return false;
6485
6486 if (x86_page_table_writing_insn(ctxt))
6487 return false;
6488
6489 if (ctxt->eip == last_retry_eip && last_retry_addr == cr2)
6490 return false;
6491
6492 vcpu->arch.last_retry_eip = ctxt->eip;
6493 vcpu->arch.last_retry_addr = cr2;
6494
6495 if (!vcpu->arch.mmu->direct_map)
6496 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
6497
6498 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
6499
6500 return true;
6501}
6502
6503static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
6504static int complete_emulated_pio(struct kvm_vcpu *vcpu);
6505
6506static void kvm_smm_changed(struct kvm_vcpu *vcpu)
6507{
6508 if (!(vcpu->arch.hflags & HF_SMM_MASK)) {
6509
6510 trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, false);
6511
6512
6513 kvm_make_request(KVM_REQ_EVENT, vcpu);
6514 }
6515
6516 kvm_mmu_reset_context(vcpu);
6517}
6518
6519static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
6520 unsigned long *db)
6521{
6522 u32 dr6 = 0;
6523 int i;
6524 u32 enable, rwlen;
6525
6526 enable = dr7;
6527 rwlen = dr7 >> 16;
6528 for (i = 0; i < 4; i++, enable >>= 2, rwlen >>= 4)
6529 if ((enable & 3) && (rwlen & 15) == type && db[i] == addr)
6530 dr6 |= (1 << i);
6531 return dr6;
6532}
6533
6534static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu)
6535{
6536 struct kvm_run *kvm_run = vcpu->run;
6537
6538 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
6539 kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
6540 kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
6541 kvm_run->debug.arch.exception = DB_VECTOR;
6542 kvm_run->exit_reason = KVM_EXIT_DEBUG;
6543 return 0;
6544 }
6545 kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS);
6546 return 1;
6547}
6548
6549int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
6550{
6551 unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
6552 int r;
6553
6554 r = kvm_x86_ops->skip_emulated_instruction(vcpu);
6555 if (unlikely(!r))
6556 return 0;
6557
6558
6559
6560
6561
6562
6563
6564
6565
6566 if (unlikely(rflags & X86_EFLAGS_TF))
6567 r = kvm_vcpu_do_singlestep(vcpu);
6568 return r;
6569}
6570EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
6571
6572static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
6573{
6574 if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
6575 (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
6576 struct kvm_run *kvm_run = vcpu->run;
6577 unsigned long eip = kvm_get_linear_rip(vcpu);
6578 u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
6579 vcpu->arch.guest_debug_dr7,
6580 vcpu->arch.eff_db);
6581
6582 if (dr6 != 0) {
6583 kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM;
6584 kvm_run->debug.arch.pc = eip;
6585 kvm_run->debug.arch.exception = DB_VECTOR;
6586 kvm_run->exit_reason = KVM_EXIT_DEBUG;
6587 *r = 0;
6588 return true;
6589 }
6590 }
6591
6592 if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) &&
6593 !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) {
6594 unsigned long eip = kvm_get_linear_rip(vcpu);
6595 u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
6596 vcpu->arch.dr7,
6597 vcpu->arch.db);
6598
6599 if (dr6 != 0) {
6600 vcpu->arch.dr6 &= ~DR_TRAP_BITS;
6601 vcpu->arch.dr6 |= dr6 | DR6_RTM;
6602 kvm_queue_exception(vcpu, DB_VECTOR);
6603 *r = 1;
6604 return true;
6605 }
6606 }
6607
6608 return false;
6609}
6610
6611static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt)
6612{
6613 switch (ctxt->opcode_len) {
6614 case 1:
6615 switch (ctxt->b) {
6616 case 0xe4:
6617 case 0xe5:
6618 case 0xec:
6619 case 0xed:
6620 case 0xe6:
6621 case 0xe7:
6622 case 0xee:
6623 case 0xef:
6624 case 0x6c:
6625 case 0x6d:
6626 case 0x6e:
6627 case 0x6f:
6628 return true;
6629 }
6630 break;
6631 case 2:
6632 switch (ctxt->b) {
6633 case 0x33:
6634 return true;
6635 }
6636 break;
6637 }
6638
6639 return false;
6640}
6641
6642int x86_emulate_instruction(struct kvm_vcpu *vcpu,
6643 unsigned long cr2,
6644 int emulation_type,
6645 void *insn,
6646 int insn_len)
6647{
6648 int r;
6649 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
6650 bool writeback = true;
6651 bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
6652
6653 vcpu->arch.l1tf_flush_l1d = true;
6654
6655
6656
6657
6658
6659 vcpu->arch.write_fault_to_shadow_pgtable = false;
6660 kvm_clear_exception_queue(vcpu);
6661
6662 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
6663 init_emulate_ctxt(vcpu);
6664
6665
6666
6667
6668
6669
6670
6671 if (!(emulation_type & EMULTYPE_SKIP) &&
6672 kvm_vcpu_check_breakpoint(vcpu, &r))
6673 return r;
6674
6675 ctxt->interruptibility = 0;
6676 ctxt->have_exception = false;
6677 ctxt->exception.vector = -1;
6678 ctxt->perm_ok = false;
6679
6680 ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
6681
6682 r = x86_decode_insn(ctxt, insn, insn_len);
6683
6684 trace_kvm_emulate_insn_start(vcpu);
6685 ++vcpu->stat.insn_emulation;
6686 if (r != EMULATION_OK) {
6687 if ((emulation_type & EMULTYPE_TRAP_UD) ||
6688 (emulation_type & EMULTYPE_TRAP_UD_FORCED)) {
6689 kvm_queue_exception(vcpu, UD_VECTOR);
6690 return 1;
6691 }
6692 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
6693 emulation_type))
6694 return 1;
6695 if (ctxt->have_exception) {
6696
6697
6698
6699
6700 WARN_ON_ONCE(ctxt->exception.vector == UD_VECTOR ||
6701 exception_type(ctxt->exception.vector) == EXCPT_TRAP);
6702 inject_emulated_exception(vcpu);
6703 return 1;
6704 }
6705 return handle_emulation_failure(vcpu, emulation_type);
6706 }
6707 }
6708
6709 if ((emulation_type & EMULTYPE_VMWARE_GP) &&
6710 !is_vmware_backdoor_opcode(ctxt)) {
6711 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
6712 return 1;
6713 }
6714
6715
6716
6717
6718
6719
6720 if (emulation_type & EMULTYPE_SKIP) {
6721 kvm_rip_write(vcpu, ctxt->_eip);
6722 if (ctxt->eflags & X86_EFLAGS_RF)
6723 kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
6724 return 1;
6725 }
6726
6727 if (retry_instruction(ctxt, cr2, emulation_type))
6728 return 1;
6729
6730
6731
6732 if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
6733 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
6734 emulator_invalidate_register_cache(ctxt);
6735 }
6736
6737restart:
6738
6739 ctxt->exception.address = cr2;
6740
6741 r = x86_emulate_insn(ctxt);
6742
6743 if (r == EMULATION_INTERCEPTED)
6744 return 1;
6745
6746 if (r == EMULATION_FAILED) {
6747 if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
6748 emulation_type))
6749 return 1;
6750
6751 return handle_emulation_failure(vcpu, emulation_type);
6752 }
6753
6754 if (ctxt->have_exception) {
6755 r = 1;
6756 if (inject_emulated_exception(vcpu))
6757 return r;
6758 } else if (vcpu->arch.pio.count) {
6759 if (!vcpu->arch.pio.in) {
6760
6761 vcpu->arch.pio.count = 0;
6762 } else {
6763 writeback = false;
6764 vcpu->arch.complete_userspace_io = complete_emulated_pio;
6765 }
6766 r = 0;
6767 } else if (vcpu->mmio_needed) {
6768 ++vcpu->stat.mmio_exits;
6769
6770 if (!vcpu->mmio_is_write)
6771 writeback = false;
6772 r = 0;
6773 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
6774 } else if (r == EMULATION_RESTART)
6775 goto restart;
6776 else
6777 r = 1;
6778
6779 if (writeback) {
6780 unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
6781 toggle_interruptibility(vcpu, ctxt->interruptibility);
6782 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
6783 if (!ctxt->have_exception ||
6784 exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
6785 kvm_rip_write(vcpu, ctxt->eip);
6786 if (r && ctxt->tf)
6787 r = kvm_vcpu_do_singlestep(vcpu);
6788 __kvm_set_rflags(vcpu, ctxt->eflags);
6789 }
6790
6791
6792
6793
6794
6795
6796
6797 if (unlikely((ctxt->eflags & ~rflags) & X86_EFLAGS_IF))
6798 kvm_make_request(KVM_REQ_EVENT, vcpu);
6799 } else
6800 vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
6801
6802 return r;
6803}
6804
6805int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type)
6806{
6807 return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
6808}
6809EXPORT_SYMBOL_GPL(kvm_emulate_instruction);
6810
6811int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
6812 void *insn, int insn_len)
6813{
6814 return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len);
6815}
6816EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
6817
6818static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu)
6819{
6820 vcpu->arch.pio.count = 0;
6821 return 1;
6822}
6823
6824static int complete_fast_pio_out(struct kvm_vcpu *vcpu)
6825{
6826 vcpu->arch.pio.count = 0;
6827
6828 if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip)))
6829 return 1;
6830
6831 return kvm_skip_emulated_instruction(vcpu);
6832}
6833
6834static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
6835 unsigned short port)
6836{
6837 unsigned long val = kvm_rax_read(vcpu);
6838 int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
6839 size, port, &val, 1);
6840 if (ret)
6841 return ret;
6842
6843
6844
6845
6846
6847 if (port == 0x7e &&
6848 kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_OUT_7E_INC_RIP)) {
6849 vcpu->arch.complete_userspace_io =
6850 complete_fast_pio_out_port_0x7e;
6851 kvm_skip_emulated_instruction(vcpu);
6852 } else {
6853 vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
6854 vcpu->arch.complete_userspace_io = complete_fast_pio_out;
6855 }
6856 return 0;
6857}
6858
6859static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
6860{
6861 unsigned long val;
6862
6863
6864 BUG_ON(vcpu->arch.pio.count != 1);
6865
6866 if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip))) {
6867 vcpu->arch.pio.count = 0;
6868 return 1;
6869 }
6870
6871
6872 val = (vcpu->arch.pio.size < 4) ? kvm_rax_read(vcpu) : 0;
6873
6874
6875
6876
6877
6878 emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, vcpu->arch.pio.size,
6879 vcpu->arch.pio.port, &val, 1);
6880 kvm_rax_write(vcpu, val);
6881
6882 return kvm_skip_emulated_instruction(vcpu);
6883}
6884
6885static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size,
6886 unsigned short port)
6887{
6888 unsigned long val;
6889 int ret;
6890
6891
6892 val = (size < 4) ? kvm_rax_read(vcpu) : 0;
6893
6894 ret = emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, size, port,
6895 &val, 1);
6896 if (ret) {
6897 kvm_rax_write(vcpu, val);
6898 return ret;
6899 }
6900
6901 vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
6902 vcpu->arch.complete_userspace_io = complete_fast_pio_in;
6903
6904 return 0;
6905}
6906
6907int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in)
6908{
6909 int ret;
6910
6911 if (in)
6912 ret = kvm_fast_pio_in(vcpu, size, port);
6913 else
6914 ret = kvm_fast_pio_out(vcpu, size, port);
6915 return ret && kvm_skip_emulated_instruction(vcpu);
6916}
6917EXPORT_SYMBOL_GPL(kvm_fast_pio);
6918
6919static int kvmclock_cpu_down_prep(unsigned int cpu)
6920{
6921 __this_cpu_write(cpu_tsc_khz, 0);
6922 return 0;
6923}
6924
6925static void tsc_khz_changed(void *data)
6926{
6927 struct cpufreq_freqs *freq = data;
6928 unsigned long khz = 0;
6929
6930 if (data)
6931 khz = freq->new;
6932 else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
6933 khz = cpufreq_quick_get(raw_smp_processor_id());
6934 if (!khz)
6935 khz = tsc_khz;
6936 __this_cpu_write(cpu_tsc_khz, khz);
6937}
6938
6939#ifdef CONFIG_X86_64
6940static void kvm_hyperv_tsc_notifier(void)
6941{
6942 struct kvm *kvm;
6943 struct kvm_vcpu *vcpu;
6944 int cpu;
6945
6946 mutex_lock(&kvm_lock);
6947 list_for_each_entry(kvm, &vm_list, vm_list)
6948 kvm_make_mclock_inprogress_request(kvm);
6949
6950 hyperv_stop_tsc_emulation();
6951
6952
6953 for_each_present_cpu(cpu)
6954 per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
6955 kvm_max_guest_tsc_khz = tsc_khz;
6956
6957 list_for_each_entry(kvm, &vm_list, vm_list) {
6958 struct kvm_arch *ka = &kvm->arch;
6959
6960 spin_lock(&ka->pvclock_gtod_sync_lock);
6961
6962 pvclock_update_vm_gtod_copy(kvm);
6963
6964 kvm_for_each_vcpu(cpu, vcpu, kvm)
6965 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
6966
6967 kvm_for_each_vcpu(cpu, vcpu, kvm)
6968 kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
6969
6970 spin_unlock(&ka->pvclock_gtod_sync_lock);
6971 }
6972 mutex_unlock(&kvm_lock);
6973}
6974#endif
6975
6976static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu)
6977{
6978 struct kvm *kvm;
6979 struct kvm_vcpu *vcpu;
6980 int i, send_ipi = 0;
6981
6982
6983
6984
6985
6986
6987
6988
6989
6990
6991
6992
6993
6994
6995
6996
6997
6998
6999
7000
7001
7002
7003
7004
7005
7006
7007
7008
7009
7010
7011
7012
7013
7014
7015
7016
7017
7018
7019
7020
7021 smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
7022
7023 mutex_lock(&kvm_lock);
7024 list_for_each_entry(kvm, &vm_list, vm_list) {
7025 kvm_for_each_vcpu(i, vcpu, kvm) {
7026 if (vcpu->cpu != cpu)
7027 continue;
7028 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
7029 if (vcpu->cpu != raw_smp_processor_id())
7030 send_ipi = 1;
7031 }
7032 }
7033 mutex_unlock(&kvm_lock);
7034
7035 if (freq->old < freq->new && send_ipi) {
7036
7037
7038
7039
7040
7041
7042
7043
7044
7045
7046
7047
7048 smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
7049 }
7050}
7051
7052static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
7053 void *data)
7054{
7055 struct cpufreq_freqs *freq = data;
7056 int cpu;
7057
7058 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
7059 return 0;
7060 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
7061 return 0;
7062
7063 for_each_cpu(cpu, freq->policy->cpus)
7064 __kvmclock_cpufreq_notifier(freq, cpu);
7065
7066 return 0;
7067}
7068
7069static struct notifier_block kvmclock_cpufreq_notifier_block = {
7070 .notifier_call = kvmclock_cpufreq_notifier
7071};
7072
7073static int kvmclock_cpu_online(unsigned int cpu)
7074{
7075 tsc_khz_changed(NULL);
7076 return 0;
7077}
7078
7079static void kvm_timer_init(void)
7080{
7081 max_tsc_khz = tsc_khz;
7082
7083 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
7084#ifdef CONFIG_CPU_FREQ
7085 struct cpufreq_policy policy;
7086 int cpu;
7087
7088 memset(&policy, 0, sizeof(policy));
7089 cpu = get_cpu();
7090 cpufreq_get_policy(&policy, cpu);
7091 if (policy.cpuinfo.max_freq)
7092 max_tsc_khz = policy.cpuinfo.max_freq;
7093 put_cpu();
7094#endif
7095 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
7096 CPUFREQ_TRANSITION_NOTIFIER);
7097 }
7098
7099 cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online",
7100 kvmclock_cpu_online, kvmclock_cpu_down_prep);
7101}
7102
7103DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
7104EXPORT_PER_CPU_SYMBOL_GPL(current_vcpu);
7105
7106int kvm_is_in_guest(void)
7107{
7108 return __this_cpu_read(current_vcpu) != NULL;
7109}
7110
7111static int kvm_is_user_mode(void)
7112{
7113 int user_mode = 3;
7114
7115 if (__this_cpu_read(current_vcpu))
7116 user_mode = kvm_x86_ops->get_cpl(__this_cpu_read(current_vcpu));
7117
7118 return user_mode != 0;
7119}
7120
7121static unsigned long kvm_get_guest_ip(void)
7122{
7123 unsigned long ip = 0;
7124
7125 if (__this_cpu_read(current_vcpu))
7126 ip = kvm_rip_read(__this_cpu_read(current_vcpu));
7127
7128 return ip;
7129}
7130
7131static void kvm_handle_intel_pt_intr(void)
7132{
7133 struct kvm_vcpu *vcpu = __this_cpu_read(current_vcpu);
7134
7135 kvm_make_request(KVM_REQ_PMI, vcpu);
7136 __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT,
7137 (unsigned long *)&vcpu->arch.pmu.global_status);
7138}
7139
7140static struct perf_guest_info_callbacks kvm_guest_cbs = {
7141 .is_in_guest = kvm_is_in_guest,
7142 .is_user_mode = kvm_is_user_mode,
7143 .get_guest_ip = kvm_get_guest_ip,
7144 .handle_intel_pt_intr = kvm_handle_intel_pt_intr,
7145};
7146
7147#ifdef CONFIG_X86_64
7148static void pvclock_gtod_update_fn(struct work_struct *work)
7149{
7150 struct kvm *kvm;
7151
7152 struct kvm_vcpu *vcpu;
7153 int i;
7154
7155 mutex_lock(&kvm_lock);
7156 list_for_each_entry(kvm, &vm_list, vm_list)
7157 kvm_for_each_vcpu(i, vcpu, kvm)
7158 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
7159 atomic_set(&kvm_guest_has_master_clock, 0);
7160 mutex_unlock(&kvm_lock);
7161}
7162
7163static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
7164
7165
7166
7167
7168static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
7169 void *priv)
7170{
7171 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
7172 struct timekeeper *tk = priv;
7173
7174 update_pvclock_gtod(tk);
7175
7176
7177
7178
7179 if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
7180 atomic_read(&kvm_guest_has_master_clock) != 0)
7181 queue_work(system_long_wq, &pvclock_gtod_work);
7182
7183 return 0;
7184}
7185
7186static struct notifier_block pvclock_gtod_notifier = {
7187 .notifier_call = pvclock_gtod_notify,
7188};
7189#endif
7190
7191int kvm_arch_init(void *opaque)
7192{
7193 int r;
7194 struct kvm_x86_ops *ops = opaque;
7195
7196 if (kvm_x86_ops) {
7197 printk(KERN_ERR "kvm: already loaded the other module\n");
7198 r = -EEXIST;
7199 goto out;
7200 }
7201
7202 if (!ops->cpu_has_kvm_support()) {
7203 printk(KERN_ERR "kvm: no hardware support\n");
7204 r = -EOPNOTSUPP;
7205 goto out;
7206 }
7207 if (ops->disabled_by_bios()) {
7208 printk(KERN_ERR "kvm: disabled by bios\n");
7209 r = -EOPNOTSUPP;
7210 goto out;
7211 }
7212
7213
7214
7215
7216
7217
7218 if (!boot_cpu_has(X86_FEATURE_FPU) || !boot_cpu_has(X86_FEATURE_FXSR)) {
7219 printk(KERN_ERR "kvm: inadequate fpu\n");
7220 r = -EOPNOTSUPP;
7221 goto out;
7222 }
7223
7224 r = -ENOMEM;
7225 x86_fpu_cache = kmem_cache_create("x86_fpu", sizeof(struct fpu),
7226 __alignof__(struct fpu), SLAB_ACCOUNT,
7227 NULL);
7228 if (!x86_fpu_cache) {
7229 printk(KERN_ERR "kvm: failed to allocate cache for x86 fpu\n");
7230 goto out;
7231 }
7232
7233 shared_msrs = alloc_percpu(struct kvm_shared_msrs);
7234 if (!shared_msrs) {
7235 printk(KERN_ERR "kvm: failed to allocate percpu kvm_shared_msrs\n");
7236 goto out_free_x86_fpu_cache;
7237 }
7238
7239 r = kvm_mmu_module_init();
7240 if (r)
7241 goto out_free_percpu;
7242
7243 kvm_x86_ops = ops;
7244
7245 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
7246 PT_DIRTY_MASK, PT64_NX_MASK, 0,
7247 PT_PRESENT_MASK, 0, sme_me_mask);
7248 kvm_timer_init();
7249
7250 perf_register_guest_info_callbacks(&kvm_guest_cbs);
7251
7252 if (boot_cpu_has(X86_FEATURE_XSAVE))
7253 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
7254
7255 kvm_lapic_init();
7256 if (pi_inject_timer == -1)
7257 pi_inject_timer = housekeeping_enabled(HK_FLAG_TIMER);
7258#ifdef CONFIG_X86_64
7259 pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
7260
7261 if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
7262 set_hv_tscchange_cb(kvm_hyperv_tsc_notifier);
7263#endif
7264
7265 return 0;
7266
7267out_free_percpu:
7268 free_percpu(shared_msrs);
7269out_free_x86_fpu_cache:
7270 kmem_cache_destroy(x86_fpu_cache);
7271out:
7272 return r;
7273}
7274
7275void kvm_arch_exit(void)
7276{
7277#ifdef CONFIG_X86_64
7278 if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
7279 clear_hv_tscchange_cb();
7280#endif
7281 kvm_lapic_exit();
7282 perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
7283
7284 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
7285 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
7286 CPUFREQ_TRANSITION_NOTIFIER);
7287 cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
7288#ifdef CONFIG_X86_64
7289 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
7290#endif
7291 kvm_x86_ops = NULL;
7292 kvm_mmu_module_exit();
7293 free_percpu(shared_msrs);
7294 kmem_cache_destroy(x86_fpu_cache);
7295}
7296
7297int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
7298{
7299 ++vcpu->stat.halt_exits;
7300 if (lapic_in_kernel(vcpu)) {
7301 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
7302 return 1;
7303 } else {
7304 vcpu->run->exit_reason = KVM_EXIT_HLT;
7305 return 0;
7306 }
7307}
7308EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
7309
7310int kvm_emulate_halt(struct kvm_vcpu *vcpu)
7311{
7312 int ret = kvm_skip_emulated_instruction(vcpu);
7313
7314
7315
7316
7317 return kvm_vcpu_halt(vcpu) && ret;
7318}
7319EXPORT_SYMBOL_GPL(kvm_emulate_halt);
7320
7321#ifdef CONFIG_X86_64
7322static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
7323 unsigned long clock_type)
7324{
7325 struct kvm_clock_pairing clock_pairing;
7326 struct timespec64 ts;
7327 u64 cycle;
7328 int ret;
7329
7330 if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK)
7331 return -KVM_EOPNOTSUPP;
7332
7333 if (kvm_get_walltime_and_clockread(&ts, &cycle) == false)
7334 return -KVM_EOPNOTSUPP;
7335
7336 clock_pairing.sec = ts.tv_sec;
7337 clock_pairing.nsec = ts.tv_nsec;
7338 clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle);
7339 clock_pairing.flags = 0;
7340 memset(&clock_pairing.pad, 0, sizeof(clock_pairing.pad));
7341
7342 ret = 0;
7343 if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing,
7344 sizeof(struct kvm_clock_pairing)))
7345 ret = -KVM_EFAULT;
7346
7347 return ret;
7348}
7349#endif
7350
7351
7352
7353
7354
7355
7356static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
7357{
7358 struct kvm_lapic_irq lapic_irq;
7359
7360 lapic_irq.shorthand = 0;
7361 lapic_irq.dest_mode = 0;
7362 lapic_irq.level = 0;
7363 lapic_irq.dest_id = apicid;
7364 lapic_irq.msi_redir_hint = false;
7365
7366 lapic_irq.delivery_mode = APIC_DM_REMRD;
7367 kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
7368}
7369
7370void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
7371{
7372 if (!lapic_in_kernel(vcpu)) {
7373 WARN_ON_ONCE(vcpu->arch.apicv_active);
7374 return;
7375 }
7376 if (!vcpu->arch.apicv_active)
7377 return;
7378
7379 vcpu->arch.apicv_active = false;
7380 kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
7381}
7382
7383static void kvm_sched_yield(struct kvm *kvm, unsigned long dest_id)
7384{
7385 struct kvm_vcpu *target = NULL;
7386 struct kvm_apic_map *map;
7387
7388 rcu_read_lock();
7389 map = rcu_dereference(kvm->arch.apic_map);
7390
7391 if (likely(map) && dest_id <= map->max_apic_id && map->phys_map[dest_id])
7392 target = map->phys_map[dest_id]->vcpu;
7393
7394 rcu_read_unlock();
7395
7396 if (target && READ_ONCE(target->ready))
7397 kvm_vcpu_yield_to(target);
7398}
7399
7400int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
7401{
7402 unsigned long nr, a0, a1, a2, a3, ret;
7403 int op_64_bit;
7404
7405 if (kvm_hv_hypercall_enabled(vcpu->kvm))
7406 return kvm_hv_hypercall(vcpu);
7407
7408 nr = kvm_rax_read(vcpu);
7409 a0 = kvm_rbx_read(vcpu);
7410 a1 = kvm_rcx_read(vcpu);
7411 a2 = kvm_rdx_read(vcpu);
7412 a3 = kvm_rsi_read(vcpu);
7413
7414 trace_kvm_hypercall(nr, a0, a1, a2, a3);
7415
7416 op_64_bit = is_64_bit_mode(vcpu);
7417 if (!op_64_bit) {
7418 nr &= 0xFFFFFFFF;
7419 a0 &= 0xFFFFFFFF;
7420 a1 &= 0xFFFFFFFF;
7421 a2 &= 0xFFFFFFFF;
7422 a3 &= 0xFFFFFFFF;
7423 }
7424
7425 if (kvm_x86_ops->get_cpl(vcpu) != 0) {
7426 ret = -KVM_EPERM;
7427 goto out;
7428 }
7429
7430 switch (nr) {
7431 case KVM_HC_VAPIC_POLL_IRQ:
7432 ret = 0;
7433 break;
7434 case KVM_HC_KICK_CPU:
7435 kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
7436 kvm_sched_yield(vcpu->kvm, a1);
7437 ret = 0;
7438 break;
7439#ifdef CONFIG_X86_64
7440 case KVM_HC_CLOCK_PAIRING:
7441 ret = kvm_pv_clock_pairing(vcpu, a0, a1);
7442 break;
7443#endif
7444 case KVM_HC_SEND_IPI:
7445 ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
7446 break;
7447 case KVM_HC_SCHED_YIELD:
7448 kvm_sched_yield(vcpu->kvm, a0);
7449 ret = 0;
7450 break;
7451 default:
7452 ret = -KVM_ENOSYS;
7453 break;
7454 }
7455out:
7456 if (!op_64_bit)
7457 ret = (u32)ret;
7458 kvm_rax_write(vcpu, ret);
7459
7460 ++vcpu->stat.hypercalls;
7461 return kvm_skip_emulated_instruction(vcpu);
7462}
7463EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
7464
7465static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
7466{
7467 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7468 char instruction[3];
7469 unsigned long rip = kvm_rip_read(vcpu);
7470
7471 kvm_x86_ops->patch_hypercall(vcpu, instruction);
7472
7473 return emulator_write_emulated(ctxt, rip, instruction, 3,
7474 &ctxt->exception);
7475}
7476
7477static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
7478{
7479 return vcpu->run->request_interrupt_window &&
7480 likely(!pic_in_kernel(vcpu->kvm));
7481}
7482
7483static void post_kvm_run_save(struct kvm_vcpu *vcpu)
7484{
7485 struct kvm_run *kvm_run = vcpu->run;
7486
7487 kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
7488 kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
7489 kvm_run->cr8 = kvm_get_cr8(vcpu);
7490 kvm_run->apic_base = kvm_get_apic_base(vcpu);
7491 kvm_run->ready_for_interrupt_injection =
7492 pic_in_kernel(vcpu->kvm) ||
7493 kvm_vcpu_ready_for_interrupt_injection(vcpu);
7494}
7495
7496static void update_cr8_intercept(struct kvm_vcpu *vcpu)
7497{
7498 int max_irr, tpr;
7499
7500 if (!kvm_x86_ops->update_cr8_intercept)
7501 return;
7502
7503 if (!lapic_in_kernel(vcpu))
7504 return;
7505
7506 if (vcpu->arch.apicv_active)
7507 return;
7508
7509 if (!vcpu->arch.apic->vapic_addr)
7510 max_irr = kvm_lapic_find_highest_irr(vcpu);
7511 else
7512 max_irr = -1;
7513
7514 if (max_irr != -1)
7515 max_irr >>= 4;
7516
7517 tpr = kvm_lapic_get_cr8(vcpu);
7518
7519 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
7520}
7521
7522static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
7523{
7524 int r;
7525
7526
7527
7528 if (vcpu->arch.exception.injected)
7529 kvm_x86_ops->queue_exception(vcpu);
7530
7531
7532
7533
7534
7535
7536
7537
7538
7539
7540
7541
7542
7543
7544 else if (!vcpu->arch.exception.pending) {
7545 if (vcpu->arch.nmi_injected)
7546 kvm_x86_ops->set_nmi(vcpu);
7547 else if (vcpu->arch.interrupt.injected)
7548 kvm_x86_ops->set_irq(vcpu);
7549 }
7550
7551
7552
7553
7554
7555
7556
7557 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
7558 r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
7559 if (r != 0)
7560 return r;
7561 }
7562
7563
7564 if (vcpu->arch.exception.pending) {
7565 trace_kvm_inj_exception(vcpu->arch.exception.nr,
7566 vcpu->arch.exception.has_error_code,
7567 vcpu->arch.exception.error_code);
7568
7569 WARN_ON_ONCE(vcpu->arch.exception.injected);
7570 vcpu->arch.exception.pending = false;
7571 vcpu->arch.exception.injected = true;
7572
7573 if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
7574 __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
7575 X86_EFLAGS_RF);
7576
7577 if (vcpu->arch.exception.nr == DB_VECTOR) {
7578
7579
7580
7581
7582
7583
7584
7585
7586
7587
7588 kvm_deliver_exception_payload(vcpu);
7589 if (vcpu->arch.dr7 & DR7_GD) {
7590 vcpu->arch.dr7 &= ~DR7_GD;
7591 kvm_update_dr7(vcpu);
7592 }
7593 }
7594
7595 kvm_x86_ops->queue_exception(vcpu);
7596 }
7597
7598
7599 if (kvm_event_needs_reinjection(vcpu))
7600 return 0;
7601
7602 if (vcpu->arch.smi_pending && !is_smm(vcpu) &&
7603 kvm_x86_ops->smi_allowed(vcpu)) {
7604 vcpu->arch.smi_pending = false;
7605 ++vcpu->arch.smi_count;
7606 enter_smm(vcpu);
7607 } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
7608 --vcpu->arch.nmi_pending;
7609 vcpu->arch.nmi_injected = true;
7610 kvm_x86_ops->set_nmi(vcpu);
7611 } else if (kvm_cpu_has_injectable_intr(vcpu)) {
7612
7613
7614
7615
7616
7617
7618
7619 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
7620 r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
7621 if (r != 0)
7622 return r;
7623 }
7624 if (kvm_x86_ops->interrupt_allowed(vcpu)) {
7625 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
7626 false);
7627 kvm_x86_ops->set_irq(vcpu);
7628 }
7629 }
7630
7631 return 0;
7632}
7633
7634static void process_nmi(struct kvm_vcpu *vcpu)
7635{
7636 unsigned limit = 2;
7637
7638
7639
7640
7641
7642
7643 if (kvm_x86_ops->get_nmi_mask(vcpu) || vcpu->arch.nmi_injected)
7644 limit = 1;
7645
7646 vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
7647 vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
7648 kvm_make_request(KVM_REQ_EVENT, vcpu);
7649}
7650
7651static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
7652{
7653 u32 flags = 0;
7654 flags |= seg->g << 23;
7655 flags |= seg->db << 22;
7656 flags |= seg->l << 21;
7657 flags |= seg->avl << 20;
7658 flags |= seg->present << 15;
7659 flags |= seg->dpl << 13;
7660 flags |= seg->s << 12;
7661 flags |= seg->type << 8;
7662 return flags;
7663}
7664
7665static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
7666{
7667 struct kvm_segment seg;
7668 int offset;
7669
7670 kvm_get_segment(vcpu, &seg, n);
7671 put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector);
7672
7673 if (n < 3)
7674 offset = 0x7f84 + n * 12;
7675 else
7676 offset = 0x7f2c + (n - 3) * 12;
7677
7678 put_smstate(u32, buf, offset + 8, seg.base);
7679 put_smstate(u32, buf, offset + 4, seg.limit);
7680 put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg));
7681}
7682
7683#ifdef CONFIG_X86_64
7684static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
7685{
7686 struct kvm_segment seg;
7687 int offset;
7688 u16 flags;
7689
7690 kvm_get_segment(vcpu, &seg, n);
7691 offset = 0x7e00 + n * 16;
7692
7693 flags = enter_smm_get_segment_flags(&seg) >> 8;
7694 put_smstate(u16, buf, offset, seg.selector);
7695 put_smstate(u16, buf, offset + 2, flags);
7696 put_smstate(u32, buf, offset + 4, seg.limit);
7697 put_smstate(u64, buf, offset + 8, seg.base);
7698}
7699#endif
7700
7701static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
7702{
7703 struct desc_ptr dt;
7704 struct kvm_segment seg;
7705 unsigned long val;
7706 int i;
7707
7708 put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
7709 put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
7710 put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
7711 put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
7712
7713 for (i = 0; i < 8; i++)
7714 put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read(vcpu, i));
7715
7716 kvm_get_dr(vcpu, 6, &val);
7717 put_smstate(u32, buf, 0x7fcc, (u32)val);
7718 kvm_get_dr(vcpu, 7, &val);
7719 put_smstate(u32, buf, 0x7fc8, (u32)val);
7720
7721 kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
7722 put_smstate(u32, buf, 0x7fc4, seg.selector);
7723 put_smstate(u32, buf, 0x7f64, seg.base);
7724 put_smstate(u32, buf, 0x7f60, seg.limit);
7725 put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
7726
7727 kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
7728 put_smstate(u32, buf, 0x7fc0, seg.selector);
7729 put_smstate(u32, buf, 0x7f80, seg.base);
7730 put_smstate(u32, buf, 0x7f7c, seg.limit);
7731 put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
7732
7733 kvm_x86_ops->get_gdt(vcpu, &dt);
7734 put_smstate(u32, buf, 0x7f74, dt.address);
7735 put_smstate(u32, buf, 0x7f70, dt.size);
7736
7737 kvm_x86_ops->get_idt(vcpu, &dt);
7738 put_smstate(u32, buf, 0x7f58, dt.address);
7739 put_smstate(u32, buf, 0x7f54, dt.size);
7740
7741 for (i = 0; i < 6; i++)
7742 enter_smm_save_seg_32(vcpu, buf, i);
7743
7744 put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
7745
7746
7747 put_smstate(u32, buf, 0x7efc, 0x00020000);
7748 put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
7749}
7750
7751#ifdef CONFIG_X86_64
7752static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
7753{
7754 struct desc_ptr dt;
7755 struct kvm_segment seg;
7756 unsigned long val;
7757 int i;
7758
7759 for (i = 0; i < 16; i++)
7760 put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read(vcpu, i));
7761
7762 put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
7763 put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
7764
7765 kvm_get_dr(vcpu, 6, &val);
7766 put_smstate(u64, buf, 0x7f68, val);
7767 kvm_get_dr(vcpu, 7, &val);
7768 put_smstate(u64, buf, 0x7f60, val);
7769
7770 put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
7771 put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
7772 put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
7773
7774 put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase);
7775
7776
7777 put_smstate(u32, buf, 0x7efc, 0x00020064);
7778
7779 put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer);
7780
7781 kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
7782 put_smstate(u16, buf, 0x7e90, seg.selector);
7783 put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
7784 put_smstate(u32, buf, 0x7e94, seg.limit);
7785 put_smstate(u64, buf, 0x7e98, seg.base);
7786
7787 kvm_x86_ops->get_idt(vcpu, &dt);
7788 put_smstate(u32, buf, 0x7e84, dt.size);
7789 put_smstate(u64, buf, 0x7e88, dt.address);
7790
7791 kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
7792 put_smstate(u16, buf, 0x7e70, seg.selector);
7793 put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
7794 put_smstate(u32, buf, 0x7e74, seg.limit);
7795 put_smstate(u64, buf, 0x7e78, seg.base);
7796
7797 kvm_x86_ops->get_gdt(vcpu, &dt);
7798 put_smstate(u32, buf, 0x7e64, dt.size);
7799 put_smstate(u64, buf, 0x7e68, dt.address);
7800
7801 for (i = 0; i < 6; i++)
7802 enter_smm_save_seg_64(vcpu, buf, i);
7803}
7804#endif
7805
7806static void enter_smm(struct kvm_vcpu *vcpu)
7807{
7808 struct kvm_segment cs, ds;
7809 struct desc_ptr dt;
7810 char buf[512];
7811 u32 cr0;
7812
7813 trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
7814 memset(buf, 0, 512);
7815#ifdef CONFIG_X86_64
7816 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
7817 enter_smm_save_state_64(vcpu, buf);
7818 else
7819#endif
7820 enter_smm_save_state_32(vcpu, buf);
7821
7822
7823
7824
7825
7826
7827 kvm_x86_ops->pre_enter_smm(vcpu, buf);
7828
7829 vcpu->arch.hflags |= HF_SMM_MASK;
7830 kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
7831
7832 if (kvm_x86_ops->get_nmi_mask(vcpu))
7833 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
7834 else
7835 kvm_x86_ops->set_nmi_mask(vcpu, true);
7836
7837 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
7838 kvm_rip_write(vcpu, 0x8000);
7839
7840 cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
7841 kvm_x86_ops->set_cr0(vcpu, cr0);
7842 vcpu->arch.cr0 = cr0;
7843
7844 kvm_x86_ops->set_cr4(vcpu, 0);
7845
7846
7847 dt.address = dt.size = 0;
7848 kvm_x86_ops->set_idt(vcpu, &dt);
7849
7850 __kvm_set_dr(vcpu, 7, DR7_FIXED_1);
7851
7852 cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
7853 cs.base = vcpu->arch.smbase;
7854
7855 ds.selector = 0;
7856 ds.base = 0;
7857
7858 cs.limit = ds.limit = 0xffffffff;
7859 cs.type = ds.type = 0x3;
7860 cs.dpl = ds.dpl = 0;
7861 cs.db = ds.db = 0;
7862 cs.s = ds.s = 1;
7863 cs.l = ds.l = 0;
7864 cs.g = ds.g = 1;
7865 cs.avl = ds.avl = 0;
7866 cs.present = ds.present = 1;
7867 cs.unusable = ds.unusable = 0;
7868 cs.padding = ds.padding = 0;
7869
7870 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
7871 kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
7872 kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
7873 kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
7874 kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
7875 kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
7876
7877#ifdef CONFIG_X86_64
7878 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
7879 kvm_x86_ops->set_efer(vcpu, 0);
7880#endif
7881
7882 kvm_update_cpuid(vcpu);
7883 kvm_mmu_reset_context(vcpu);
7884}
7885
7886static void process_smi(struct kvm_vcpu *vcpu)
7887{
7888 vcpu->arch.smi_pending = true;
7889 kvm_make_request(KVM_REQ_EVENT, vcpu);
7890}
7891
7892void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
7893 unsigned long *vcpu_bitmap)
7894{
7895 cpumask_var_t cpus;
7896
7897 zalloc_cpumask_var(&cpus, GFP_ATOMIC);
7898
7899 kvm_make_vcpus_request_mask(kvm, KVM_REQ_SCAN_IOAPIC,
7900 vcpu_bitmap, cpus);
7901
7902 free_cpumask_var(cpus);
7903}
7904
7905void kvm_make_scan_ioapic_request(struct kvm *kvm)
7906{
7907 kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
7908}
7909
7910static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
7911{
7912 if (!kvm_apic_present(vcpu))
7913 return;
7914
7915 bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256);
7916
7917 if (irqchip_split(vcpu->kvm))
7918 kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
7919 else {
7920 if (vcpu->arch.apicv_active)
7921 kvm_x86_ops->sync_pir_to_irr(vcpu);
7922 if (ioapic_in_kernel(vcpu->kvm))
7923 kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
7924 }
7925
7926 if (is_guest_mode(vcpu))
7927 vcpu->arch.load_eoi_exitmap_pending = true;
7928 else
7929 kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
7930}
7931
7932static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
7933{
7934 u64 eoi_exit_bitmap[4];
7935
7936 if (!kvm_apic_hw_enabled(vcpu->arch.apic))
7937 return;
7938
7939 bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors,
7940 vcpu_to_synic(vcpu)->vec_bitmap, 256);
7941 kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
7942}
7943
7944int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
7945 unsigned long start, unsigned long end,
7946 bool blockable)
7947{
7948 unsigned long apic_address;
7949
7950
7951
7952
7953
7954 apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
7955 if (start <= apic_address && apic_address < end)
7956 kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
7957
7958 return 0;
7959}
7960
7961void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
7962{
7963 struct page *page = NULL;
7964
7965 if (!lapic_in_kernel(vcpu))
7966 return;
7967
7968 if (!kvm_x86_ops->set_apic_access_page_addr)
7969 return;
7970
7971 page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
7972 if (is_error_page(page))
7973 return;
7974 kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page));
7975
7976
7977
7978
7979
7980 put_page(page);
7981}
7982
7983void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
7984{
7985 smp_send_reschedule(vcpu->cpu);
7986}
7987EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
7988
7989
7990
7991
7992
7993
7994static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
7995{
7996 int r;
7997 bool req_int_win =
7998 dm_request_for_irq_injection(vcpu) &&
7999 kvm_cpu_accept_dm_intr(vcpu);
8000
8001 bool req_immediate_exit = false;
8002
8003 if (kvm_request_pending(vcpu)) {
8004 if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) {
8005 if (unlikely(!kvm_x86_ops->get_vmcs12_pages(vcpu))) {
8006 r = 0;
8007 goto out;
8008 }
8009 }
8010 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
8011 kvm_mmu_unload(vcpu);
8012 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
8013 __kvm_migrate_timers(vcpu);
8014 if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
8015 kvm_gen_update_masterclock(vcpu->kvm);
8016 if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu))
8017 kvm_gen_kvmclock_update(vcpu);
8018 if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
8019 r = kvm_guest_time_update(vcpu);
8020 if (unlikely(r))
8021 goto out;
8022 }
8023 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
8024 kvm_mmu_sync_roots(vcpu);
8025 if (kvm_check_request(KVM_REQ_LOAD_CR3, vcpu))
8026 kvm_mmu_load_cr3(vcpu);
8027 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
8028 kvm_vcpu_flush_tlb(vcpu, true);
8029 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
8030 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
8031 r = 0;
8032 goto out;
8033 }
8034 if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
8035 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
8036 vcpu->mmio_needed = 0;
8037 r = 0;
8038 goto out;
8039 }
8040 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
8041
8042 vcpu->arch.apf.halted = true;
8043 r = 1;
8044 goto out;
8045 }
8046 if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
8047 record_steal_time(vcpu);
8048 if (kvm_check_request(KVM_REQ_SMI, vcpu))
8049 process_smi(vcpu);
8050 if (kvm_check_request(KVM_REQ_NMI, vcpu))
8051 process_nmi(vcpu);
8052 if (kvm_check_request(KVM_REQ_PMU, vcpu))
8053 kvm_pmu_handle_event(vcpu);
8054 if (kvm_check_request(KVM_REQ_PMI, vcpu))
8055 kvm_pmu_deliver_pmi(vcpu);
8056 if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
8057 BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
8058 if (test_bit(vcpu->arch.pending_ioapic_eoi,
8059 vcpu->arch.ioapic_handled_vectors)) {
8060 vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
8061 vcpu->run->eoi.vector =
8062 vcpu->arch.pending_ioapic_eoi;
8063 r = 0;
8064 goto out;
8065 }
8066 }
8067 if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
8068 vcpu_scan_ioapic(vcpu);
8069 if (kvm_check_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu))
8070 vcpu_load_eoi_exitmap(vcpu);
8071 if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
8072 kvm_vcpu_reload_apic_access_page(vcpu);
8073 if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
8074 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
8075 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
8076 r = 0;
8077 goto out;
8078 }
8079 if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) {
8080 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
8081 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET;
8082 r = 0;
8083 goto out;
8084 }
8085 if (kvm_check_request(KVM_REQ_HV_EXIT, vcpu)) {
8086 vcpu->run->exit_reason = KVM_EXIT_HYPERV;
8087 vcpu->run->hyperv = vcpu->arch.hyperv.exit;
8088 r = 0;
8089 goto out;
8090 }
8091
8092
8093
8094
8095
8096
8097 if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
8098 kvm_hv_process_stimers(vcpu);
8099 }
8100
8101 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
8102 ++vcpu->stat.req_event;
8103 kvm_apic_accept_events(vcpu);
8104 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
8105 r = 1;
8106 goto out;
8107 }
8108
8109 if (inject_pending_event(vcpu, req_int_win) != 0)
8110 req_immediate_exit = true;
8111 else {
8112
8113
8114
8115
8116
8117
8118
8119
8120
8121
8122
8123
8124
8125
8126 if (vcpu->arch.smi_pending && !is_smm(vcpu))
8127 if (!kvm_x86_ops->enable_smi_window(vcpu))
8128 req_immediate_exit = true;
8129 if (vcpu->arch.nmi_pending)
8130 kvm_x86_ops->enable_nmi_window(vcpu);
8131 if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
8132 kvm_x86_ops->enable_irq_window(vcpu);
8133 WARN_ON(vcpu->arch.exception.pending);
8134 }
8135
8136 if (kvm_lapic_enabled(vcpu)) {
8137 update_cr8_intercept(vcpu);
8138 kvm_lapic_sync_to_vapic(vcpu);
8139 }
8140 }
8141
8142 r = kvm_mmu_reload(vcpu);
8143 if (unlikely(r)) {
8144 goto cancel_injection;
8145 }
8146
8147 preempt_disable();
8148
8149 kvm_x86_ops->prepare_guest_switch(vcpu);
8150
8151
8152
8153
8154
8155
8156 local_irq_disable();
8157 vcpu->mode = IN_GUEST_MODE;
8158
8159 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
8160
8161
8162
8163
8164
8165
8166
8167
8168
8169
8170
8171
8172
8173 smp_mb__after_srcu_read_unlock();
8174
8175
8176
8177
8178
8179 if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
8180 kvm_x86_ops->sync_pir_to_irr(vcpu);
8181
8182 if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu)
8183 || need_resched() || signal_pending(current)) {
8184 vcpu->mode = OUTSIDE_GUEST_MODE;
8185 smp_wmb();
8186 local_irq_enable();
8187 preempt_enable();
8188 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
8189 r = 1;
8190 goto cancel_injection;
8191 }
8192
8193 if (req_immediate_exit) {
8194 kvm_make_request(KVM_REQ_EVENT, vcpu);
8195 kvm_x86_ops->request_immediate_exit(vcpu);
8196 }
8197
8198 trace_kvm_entry(vcpu->vcpu_id);
8199 guest_enter_irqoff();
8200
8201
8202 WARN_ON_ONCE(test_thread_flag(TIF_NEED_FPU_LOAD));
8203
8204 if (unlikely(vcpu->arch.switch_db_regs)) {
8205 set_debugreg(0, 7);
8206 set_debugreg(vcpu->arch.eff_db[0], 0);
8207 set_debugreg(vcpu->arch.eff_db[1], 1);
8208 set_debugreg(vcpu->arch.eff_db[2], 2);
8209 set_debugreg(vcpu->arch.eff_db[3], 3);
8210 set_debugreg(vcpu->arch.dr6, 6);
8211 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
8212 }
8213
8214 kvm_x86_ops->run(vcpu);
8215
8216
8217
8218
8219
8220
8221
8222 if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) {
8223 WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
8224 kvm_x86_ops->sync_dirty_debug_regs(vcpu);
8225 kvm_update_dr0123(vcpu);
8226 kvm_update_dr6(vcpu);
8227 kvm_update_dr7(vcpu);
8228 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
8229 }
8230
8231
8232
8233
8234
8235
8236
8237
8238 if (hw_breakpoint_active())
8239 hw_breakpoint_restore();
8240
8241 vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
8242
8243 vcpu->mode = OUTSIDE_GUEST_MODE;
8244 smp_wmb();
8245
8246 kvm_x86_ops->handle_exit_irqoff(vcpu);
8247
8248
8249
8250
8251
8252
8253
8254
8255 kvm_before_interrupt(vcpu);
8256 local_irq_enable();
8257 ++vcpu->stat.exits;
8258 local_irq_disable();
8259 kvm_after_interrupt(vcpu);
8260
8261 guest_exit_irqoff();
8262 if (lapic_in_kernel(vcpu)) {
8263 s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
8264 if (delta != S64_MIN) {
8265 trace_kvm_wait_lapic_expire(vcpu->vcpu_id, delta);
8266 vcpu->arch.apic->lapic_timer.advance_expire_delta = S64_MIN;
8267 }
8268 }
8269
8270 local_irq_enable();
8271 preempt_enable();
8272
8273 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
8274
8275
8276
8277
8278 if (unlikely(prof_on == KVM_PROFILING)) {
8279 unsigned long rip = kvm_rip_read(vcpu);
8280 profile_hit(KVM_PROFILING, (void *)rip);
8281 }
8282
8283 if (unlikely(vcpu->arch.tsc_always_catchup))
8284 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
8285
8286 if (vcpu->arch.apic_attention)
8287 kvm_lapic_sync_from_vapic(vcpu);
8288
8289 vcpu->arch.gpa_available = false;
8290 r = kvm_x86_ops->handle_exit(vcpu);
8291 return r;
8292
8293cancel_injection:
8294 kvm_x86_ops->cancel_injection(vcpu);
8295 if (unlikely(vcpu->arch.apic_attention))
8296 kvm_lapic_sync_from_vapic(vcpu);
8297out:
8298 return r;
8299}
8300
8301static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
8302{
8303 if (!kvm_arch_vcpu_runnable(vcpu) &&
8304 (!kvm_x86_ops->pre_block || kvm_x86_ops->pre_block(vcpu) == 0)) {
8305 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
8306 kvm_vcpu_block(vcpu);
8307 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
8308
8309 if (kvm_x86_ops->post_block)
8310 kvm_x86_ops->post_block(vcpu);
8311
8312 if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
8313 return 1;
8314 }
8315
8316 kvm_apic_accept_events(vcpu);
8317 switch(vcpu->arch.mp_state) {
8318 case KVM_MP_STATE_HALTED:
8319 vcpu->arch.pv.pv_unhalted = false;
8320 vcpu->arch.mp_state =
8321 KVM_MP_STATE_RUNNABLE;
8322
8323 case KVM_MP_STATE_RUNNABLE:
8324 vcpu->arch.apf.halted = false;
8325 break;
8326 case KVM_MP_STATE_INIT_RECEIVED:
8327 break;
8328 default:
8329 return -EINTR;
8330 break;
8331 }
8332 return 1;
8333}
8334
8335static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
8336{
8337 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
8338 kvm_x86_ops->check_nested_events(vcpu, false);
8339
8340 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
8341 !vcpu->arch.apf.halted);
8342}
8343
8344static int vcpu_run(struct kvm_vcpu *vcpu)
8345{
8346 int r;
8347 struct kvm *kvm = vcpu->kvm;
8348
8349 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
8350 vcpu->arch.l1tf_flush_l1d = true;
8351
8352 for (;;) {
8353 if (kvm_vcpu_running(vcpu)) {
8354 r = vcpu_enter_guest(vcpu);
8355 } else {
8356 r = vcpu_block(kvm, vcpu);
8357 }
8358
8359 if (r <= 0)
8360 break;
8361
8362 kvm_clear_request(KVM_REQ_PENDING_TIMER, vcpu);
8363 if (kvm_cpu_has_pending_timer(vcpu))
8364 kvm_inject_pending_timer_irqs(vcpu);
8365
8366 if (dm_request_for_irq_injection(vcpu) &&
8367 kvm_vcpu_ready_for_interrupt_injection(vcpu)) {
8368 r = 0;
8369 vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
8370 ++vcpu->stat.request_irq_exits;
8371 break;
8372 }
8373
8374 kvm_check_async_pf_completion(vcpu);
8375
8376 if (signal_pending(current)) {
8377 r = -EINTR;
8378 vcpu->run->exit_reason = KVM_EXIT_INTR;
8379 ++vcpu->stat.signal_exits;
8380 break;
8381 }
8382 if (need_resched()) {
8383 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
8384 cond_resched();
8385 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
8386 }
8387 }
8388
8389 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
8390
8391 return r;
8392}
8393
8394static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
8395{
8396 int r;
8397
8398 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
8399 r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
8400 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
8401 return r;
8402}
8403
8404static int complete_emulated_pio(struct kvm_vcpu *vcpu)
8405{
8406 BUG_ON(!vcpu->arch.pio.count);
8407
8408 return complete_emulated_io(vcpu);
8409}
8410
8411
8412
8413
8414
8415
8416
8417
8418
8419
8420
8421
8422
8423
8424
8425
8426
8427
8428
8429static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
8430{
8431 struct kvm_run *run = vcpu->run;
8432 struct kvm_mmio_fragment *frag;
8433 unsigned len;
8434
8435 BUG_ON(!vcpu->mmio_needed);
8436
8437
8438 frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
8439 len = min(8u, frag->len);
8440 if (!vcpu->mmio_is_write)
8441 memcpy(frag->data, run->mmio.data, len);
8442
8443 if (frag->len <= 8) {
8444
8445 frag++;
8446 vcpu->mmio_cur_fragment++;
8447 } else {
8448
8449 frag->data += len;
8450 frag->gpa += len;
8451 frag->len -= len;
8452 }
8453
8454 if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) {
8455 vcpu->mmio_needed = 0;
8456
8457
8458 if (vcpu->mmio_is_write)
8459 return 1;
8460 vcpu->mmio_read_completed = 1;
8461 return complete_emulated_io(vcpu);
8462 }
8463
8464 run->exit_reason = KVM_EXIT_MMIO;
8465 run->mmio.phys_addr = frag->gpa;
8466 if (vcpu->mmio_is_write)
8467 memcpy(run->mmio.data, frag->data, min(8u, frag->len));
8468 run->mmio.len = min(8u, frag->len);
8469 run->mmio.is_write = vcpu->mmio_is_write;
8470 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
8471 return 0;
8472}
8473
8474
8475static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
8476{
8477 fpregs_lock();
8478
8479 copy_fpregs_to_fpstate(vcpu->arch.user_fpu);
8480
8481 __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
8482 ~XFEATURE_MASK_PKRU);
8483
8484 fpregs_mark_activate();
8485 fpregs_unlock();
8486
8487 trace_kvm_fpu(1);
8488}
8489
8490
8491static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
8492{
8493 fpregs_lock();
8494
8495 copy_fpregs_to_fpstate(vcpu->arch.guest_fpu);
8496 copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state);
8497
8498 fpregs_mark_activate();
8499 fpregs_unlock();
8500
8501 ++vcpu->stat.fpu_reload;
8502 trace_kvm_fpu(0);
8503}
8504
8505int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
8506{
8507 int r;
8508
8509 vcpu_load(vcpu);
8510 kvm_sigset_activate(vcpu);
8511 kvm_load_guest_fpu(vcpu);
8512
8513 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
8514 if (kvm_run->immediate_exit) {
8515 r = -EINTR;
8516 goto out;
8517 }
8518 kvm_vcpu_block(vcpu);
8519 kvm_apic_accept_events(vcpu);
8520 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
8521 r = -EAGAIN;
8522 if (signal_pending(current)) {
8523 r = -EINTR;
8524 vcpu->run->exit_reason = KVM_EXIT_INTR;
8525 ++vcpu->stat.signal_exits;
8526 }
8527 goto out;
8528 }
8529
8530 if (vcpu->run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) {
8531 r = -EINVAL;
8532 goto out;
8533 }
8534
8535 if (vcpu->run->kvm_dirty_regs) {
8536 r = sync_regs(vcpu);
8537 if (r != 0)
8538 goto out;
8539 }
8540
8541
8542 if (!lapic_in_kernel(vcpu)) {
8543 if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
8544 r = -EINVAL;
8545 goto out;
8546 }
8547 }
8548
8549 if (unlikely(vcpu->arch.complete_userspace_io)) {
8550 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
8551 vcpu->arch.complete_userspace_io = NULL;
8552 r = cui(vcpu);
8553 if (r <= 0)
8554 goto out;
8555 } else
8556 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
8557
8558 if (kvm_run->immediate_exit)
8559 r = -EINTR;
8560 else
8561 r = vcpu_run(vcpu);
8562
8563out:
8564 kvm_put_guest_fpu(vcpu);
8565 if (vcpu->run->kvm_valid_regs)
8566 store_regs(vcpu);
8567 post_kvm_run_save(vcpu);
8568 kvm_sigset_deactivate(vcpu);
8569
8570 vcpu_put(vcpu);
8571 return r;
8572}
8573
8574static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
8575{
8576 if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
8577
8578
8579
8580
8581
8582
8583
8584 emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt);
8585 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
8586 }
8587 regs->rax = kvm_rax_read(vcpu);
8588 regs->rbx = kvm_rbx_read(vcpu);
8589 regs->rcx = kvm_rcx_read(vcpu);
8590 regs->rdx = kvm_rdx_read(vcpu);
8591 regs->rsi = kvm_rsi_read(vcpu);
8592 regs->rdi = kvm_rdi_read(vcpu);
8593 regs->rsp = kvm_rsp_read(vcpu);
8594 regs->rbp = kvm_rbp_read(vcpu);
8595#ifdef CONFIG_X86_64
8596 regs->r8 = kvm_r8_read(vcpu);
8597 regs->r9 = kvm_r9_read(vcpu);
8598 regs->r10 = kvm_r10_read(vcpu);
8599 regs->r11 = kvm_r11_read(vcpu);
8600 regs->r12 = kvm_r12_read(vcpu);
8601 regs->r13 = kvm_r13_read(vcpu);
8602 regs->r14 = kvm_r14_read(vcpu);
8603 regs->r15 = kvm_r15_read(vcpu);
8604#endif
8605
8606 regs->rip = kvm_rip_read(vcpu);
8607 regs->rflags = kvm_get_rflags(vcpu);
8608}
8609
8610int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
8611{
8612 vcpu_load(vcpu);
8613 __get_regs(vcpu, regs);
8614 vcpu_put(vcpu);
8615 return 0;
8616}
8617
8618static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
8619{
8620 vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
8621 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
8622
8623 kvm_rax_write(vcpu, regs->rax);
8624 kvm_rbx_write(vcpu, regs->rbx);
8625 kvm_rcx_write(vcpu, regs->rcx);
8626 kvm_rdx_write(vcpu, regs->rdx);
8627 kvm_rsi_write(vcpu, regs->rsi);
8628 kvm_rdi_write(vcpu, regs->rdi);
8629 kvm_rsp_write(vcpu, regs->rsp);
8630 kvm_rbp_write(vcpu, regs->rbp);
8631#ifdef CONFIG_X86_64
8632 kvm_r8_write(vcpu, regs->r8);
8633 kvm_r9_write(vcpu, regs->r9);
8634 kvm_r10_write(vcpu, regs->r10);
8635 kvm_r11_write(vcpu, regs->r11);
8636 kvm_r12_write(vcpu, regs->r12);
8637 kvm_r13_write(vcpu, regs->r13);
8638 kvm_r14_write(vcpu, regs->r14);
8639 kvm_r15_write(vcpu, regs->r15);
8640#endif
8641
8642 kvm_rip_write(vcpu, regs->rip);
8643 kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
8644
8645 vcpu->arch.exception.pending = false;
8646
8647 kvm_make_request(KVM_REQ_EVENT, vcpu);
8648}
8649
8650int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
8651{
8652 vcpu_load(vcpu);
8653 __set_regs(vcpu, regs);
8654 vcpu_put(vcpu);
8655 return 0;
8656}
8657
8658void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
8659{
8660 struct kvm_segment cs;
8661
8662 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
8663 *db = cs.db;
8664 *l = cs.l;
8665}
8666EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
8667
8668static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
8669{
8670 struct desc_ptr dt;
8671
8672 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
8673 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
8674 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
8675 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
8676 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
8677 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
8678
8679 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
8680 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
8681
8682 kvm_x86_ops->get_idt(vcpu, &dt);
8683 sregs->idt.limit = dt.size;
8684 sregs->idt.base = dt.address;
8685 kvm_x86_ops->get_gdt(vcpu, &dt);
8686 sregs->gdt.limit = dt.size;
8687 sregs->gdt.base = dt.address;
8688
8689 sregs->cr0 = kvm_read_cr0(vcpu);
8690 sregs->cr2 = vcpu->arch.cr2;
8691 sregs->cr3 = kvm_read_cr3(vcpu);
8692 sregs->cr4 = kvm_read_cr4(vcpu);
8693 sregs->cr8 = kvm_get_cr8(vcpu);
8694 sregs->efer = vcpu->arch.efer;
8695 sregs->apic_base = kvm_get_apic_base(vcpu);
8696
8697 memset(sregs->interrupt_bitmap, 0, sizeof(sregs->interrupt_bitmap));
8698
8699 if (vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft)
8700 set_bit(vcpu->arch.interrupt.nr,
8701 (unsigned long *)sregs->interrupt_bitmap);
8702}
8703
8704int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
8705 struct kvm_sregs *sregs)
8706{
8707 vcpu_load(vcpu);
8708 __get_sregs(vcpu, sregs);
8709 vcpu_put(vcpu);
8710 return 0;
8711}
8712
8713int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
8714 struct kvm_mp_state *mp_state)
8715{
8716 vcpu_load(vcpu);
8717
8718 kvm_apic_accept_events(vcpu);
8719 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
8720 vcpu->arch.pv.pv_unhalted)
8721 mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
8722 else
8723 mp_state->mp_state = vcpu->arch.mp_state;
8724
8725 vcpu_put(vcpu);
8726 return 0;
8727}
8728
8729int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
8730 struct kvm_mp_state *mp_state)
8731{
8732 int ret = -EINVAL;
8733
8734 vcpu_load(vcpu);
8735
8736 if (!lapic_in_kernel(vcpu) &&
8737 mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
8738 goto out;
8739
8740
8741
8742
8743
8744
8745 if ((kvm_vcpu_latch_init(vcpu) || vcpu->arch.smi_pending) &&
8746 (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
8747 mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
8748 goto out;
8749
8750 if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
8751 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
8752 set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
8753 } else
8754 vcpu->arch.mp_state = mp_state->mp_state;
8755 kvm_make_request(KVM_REQ_EVENT, vcpu);
8756
8757 ret = 0;
8758out:
8759 vcpu_put(vcpu);
8760 return ret;
8761}
8762
8763int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
8764 int reason, bool has_error_code, u32 error_code)
8765{
8766 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
8767 int ret;
8768
8769 init_emulate_ctxt(vcpu);
8770
8771 ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
8772 has_error_code, error_code);
8773 if (ret) {
8774 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
8775 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
8776 vcpu->run->internal.ndata = 0;
8777 return 0;
8778 }
8779
8780 kvm_rip_write(vcpu, ctxt->eip);
8781 kvm_set_rflags(vcpu, ctxt->eflags);
8782 kvm_make_request(KVM_REQ_EVENT, vcpu);
8783 return 1;
8784}
8785EXPORT_SYMBOL_GPL(kvm_task_switch);
8786
8787static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
8788{
8789 if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
8790
8791
8792
8793
8794
8795 if (!(sregs->cr4 & X86_CR4_PAE)
8796 || !(sregs->efer & EFER_LMA))
8797 return -EINVAL;
8798 } else {
8799
8800
8801
8802
8803 if (sregs->efer & EFER_LMA || sregs->cs.l)
8804 return -EINVAL;
8805 }
8806
8807 return kvm_valid_cr4(vcpu, sregs->cr4);
8808}
8809
8810static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
8811{
8812 struct msr_data apic_base_msr;
8813 int mmu_reset_needed = 0;
8814 int cpuid_update_needed = 0;
8815 int pending_vec, max_bits, idx;
8816 struct desc_ptr dt;
8817 int ret = -EINVAL;
8818
8819 if (kvm_valid_sregs(vcpu, sregs))
8820 goto out;
8821
8822 apic_base_msr.data = sregs->apic_base;
8823 apic_base_msr.host_initiated = true;
8824 if (kvm_set_apic_base(vcpu, &apic_base_msr))
8825 goto out;
8826
8827 dt.size = sregs->idt.limit;
8828 dt.address = sregs->idt.base;
8829 kvm_x86_ops->set_idt(vcpu, &dt);
8830 dt.size = sregs->gdt.limit;
8831 dt.address = sregs->gdt.base;
8832 kvm_x86_ops->set_gdt(vcpu, &dt);
8833
8834 vcpu->arch.cr2 = sregs->cr2;
8835 mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
8836 vcpu->arch.cr3 = sregs->cr3;
8837 kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
8838
8839 kvm_set_cr8(vcpu, sregs->cr8);
8840
8841 mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
8842 kvm_x86_ops->set_efer(vcpu, sregs->efer);
8843
8844 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
8845 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
8846 vcpu->arch.cr0 = sregs->cr0;
8847
8848 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
8849 cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) &
8850 (X86_CR4_OSXSAVE | X86_CR4_PKE));
8851 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
8852 if (cpuid_update_needed)
8853 kvm_update_cpuid(vcpu);
8854
8855 idx = srcu_read_lock(&vcpu->kvm->srcu);
8856 if (is_pae_paging(vcpu)) {
8857 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
8858 mmu_reset_needed = 1;
8859 }
8860 srcu_read_unlock(&vcpu->kvm->srcu, idx);
8861
8862 if (mmu_reset_needed)
8863 kvm_mmu_reset_context(vcpu);
8864
8865 max_bits = KVM_NR_INTERRUPTS;
8866 pending_vec = find_first_bit(
8867 (const unsigned long *)sregs->interrupt_bitmap, max_bits);
8868 if (pending_vec < max_bits) {
8869 kvm_queue_interrupt(vcpu, pending_vec, false);
8870 pr_debug("Set back pending irq %d\n", pending_vec);
8871 }
8872
8873 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
8874 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
8875 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
8876 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
8877 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
8878 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
8879
8880 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
8881 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
8882
8883 update_cr8_intercept(vcpu);
8884
8885
8886 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
8887 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
8888 !is_protmode(vcpu))
8889 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
8890
8891 kvm_make_request(KVM_REQ_EVENT, vcpu);
8892
8893 ret = 0;
8894out:
8895 return ret;
8896}
8897
8898int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
8899 struct kvm_sregs *sregs)
8900{
8901 int ret;
8902
8903 vcpu_load(vcpu);
8904 ret = __set_sregs(vcpu, sregs);
8905 vcpu_put(vcpu);
8906 return ret;
8907}
8908
8909int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
8910 struct kvm_guest_debug *dbg)
8911{
8912 unsigned long rflags;
8913 int i, r;
8914
8915 vcpu_load(vcpu);
8916
8917 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
8918 r = -EBUSY;
8919 if (vcpu->arch.exception.pending)
8920 goto out;
8921 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
8922 kvm_queue_exception(vcpu, DB_VECTOR);
8923 else
8924 kvm_queue_exception(vcpu, BP_VECTOR);
8925 }
8926
8927
8928
8929
8930
8931 rflags = kvm_get_rflags(vcpu);
8932
8933 vcpu->guest_debug = dbg->control;
8934 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
8935 vcpu->guest_debug = 0;
8936
8937 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
8938 for (i = 0; i < KVM_NR_DB_REGS; ++i)
8939 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
8940 vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7];
8941 } else {
8942 for (i = 0; i < KVM_NR_DB_REGS; i++)
8943 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
8944 }
8945 kvm_update_dr7(vcpu);
8946
8947 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
8948 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
8949 get_segment_base(vcpu, VCPU_SREG_CS);
8950
8951
8952
8953
8954
8955 kvm_set_rflags(vcpu, rflags);
8956
8957 kvm_x86_ops->update_bp_intercept(vcpu);
8958
8959 r = 0;
8960
8961out:
8962 vcpu_put(vcpu);
8963 return r;
8964}
8965
8966
8967
8968
8969int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
8970 struct kvm_translation *tr)
8971{
8972 unsigned long vaddr = tr->linear_address;
8973 gpa_t gpa;
8974 int idx;
8975
8976 vcpu_load(vcpu);
8977
8978 idx = srcu_read_lock(&vcpu->kvm->srcu);
8979 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
8980 srcu_read_unlock(&vcpu->kvm->srcu, idx);
8981 tr->physical_address = gpa;
8982 tr->valid = gpa != UNMAPPED_GVA;
8983 tr->writeable = 1;
8984 tr->usermode = 0;
8985
8986 vcpu_put(vcpu);
8987 return 0;
8988}
8989
8990int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
8991{
8992 struct fxregs_state *fxsave;
8993
8994 vcpu_load(vcpu);
8995
8996 fxsave = &vcpu->arch.guest_fpu->state.fxsave;
8997 memcpy(fpu->fpr, fxsave->st_space, 128);
8998 fpu->fcw = fxsave->cwd;
8999 fpu->fsw = fxsave->swd;
9000 fpu->ftwx = fxsave->twd;
9001 fpu->last_opcode = fxsave->fop;
9002 fpu->last_ip = fxsave->rip;
9003 fpu->last_dp = fxsave->rdp;
9004 memcpy(fpu->xmm, fxsave->xmm_space, sizeof(fxsave->xmm_space));
9005
9006 vcpu_put(vcpu);
9007 return 0;
9008}
9009
9010int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
9011{
9012 struct fxregs_state *fxsave;
9013
9014 vcpu_load(vcpu);
9015
9016 fxsave = &vcpu->arch.guest_fpu->state.fxsave;
9017
9018 memcpy(fxsave->st_space, fpu->fpr, 128);
9019 fxsave->cwd = fpu->fcw;
9020 fxsave->swd = fpu->fsw;
9021 fxsave->twd = fpu->ftwx;
9022 fxsave->fop = fpu->last_opcode;
9023 fxsave->rip = fpu->last_ip;
9024 fxsave->rdp = fpu->last_dp;
9025 memcpy(fxsave->xmm_space, fpu->xmm, sizeof(fxsave->xmm_space));
9026
9027 vcpu_put(vcpu);
9028 return 0;
9029}
9030
9031static void store_regs(struct kvm_vcpu *vcpu)
9032{
9033 BUILD_BUG_ON(sizeof(struct kvm_sync_regs) > SYNC_REGS_SIZE_BYTES);
9034
9035 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_REGS)
9036 __get_regs(vcpu, &vcpu->run->s.regs.regs);
9037
9038 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_SREGS)
9039 __get_sregs(vcpu, &vcpu->run->s.regs.sregs);
9040
9041 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_EVENTS)
9042 kvm_vcpu_ioctl_x86_get_vcpu_events(
9043 vcpu, &vcpu->run->s.regs.events);
9044}
9045
9046static int sync_regs(struct kvm_vcpu *vcpu)
9047{
9048 if (vcpu->run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)
9049 return -EINVAL;
9050
9051 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) {
9052 __set_regs(vcpu, &vcpu->run->s.regs.regs);
9053 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS;
9054 }
9055 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) {
9056 if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs))
9057 return -EINVAL;
9058 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS;
9059 }
9060 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) {
9061 if (kvm_vcpu_ioctl_x86_set_vcpu_events(
9062 vcpu, &vcpu->run->s.regs.events))
9063 return -EINVAL;
9064 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS;
9065 }
9066
9067 return 0;
9068}
9069
9070static void fx_init(struct kvm_vcpu *vcpu)
9071{
9072 fpstate_init(&vcpu->arch.guest_fpu->state);
9073 if (boot_cpu_has(X86_FEATURE_XSAVES))
9074 vcpu->arch.guest_fpu->state.xsave.header.xcomp_bv =
9075 host_xcr0 | XSTATE_COMPACTION_ENABLED;
9076
9077
9078
9079
9080 vcpu->arch.xcr0 = XFEATURE_MASK_FP;
9081
9082 vcpu->arch.cr0 |= X86_CR0_ET;
9083}
9084
9085void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
9086{
9087 void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask;
9088
9089 kvmclock_reset(vcpu);
9090
9091 kvm_x86_ops->vcpu_free(vcpu);
9092 free_cpumask_var(wbinvd_dirty_mask);
9093}
9094
9095struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
9096 unsigned int id)
9097{
9098 struct kvm_vcpu *vcpu;
9099
9100 if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
9101 printk_once(KERN_WARNING
9102 "kvm: SMP vm created on host with unstable TSC; "
9103 "guest TSC will not be reliable\n");
9104
9105 vcpu = kvm_x86_ops->vcpu_create(kvm, id);
9106
9107 return vcpu;
9108}
9109
9110int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
9111{
9112 vcpu->arch.arch_capabilities = kvm_get_arch_capabilities();
9113 vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
9114 kvm_vcpu_mtrr_init(vcpu);
9115 vcpu_load(vcpu);
9116 kvm_vcpu_reset(vcpu, false);
9117 kvm_init_mmu(vcpu, false);
9118 vcpu_put(vcpu);
9119 return 0;
9120}
9121
9122void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
9123{
9124 struct msr_data msr;
9125 struct kvm *kvm = vcpu->kvm;
9126
9127 kvm_hv_vcpu_postcreate(vcpu);
9128
9129 if (mutex_lock_killable(&vcpu->mutex))
9130 return;
9131 vcpu_load(vcpu);
9132 msr.data = 0x0;
9133 msr.index = MSR_IA32_TSC;
9134 msr.host_initiated = true;
9135 kvm_write_tsc(vcpu, &msr);
9136 vcpu_put(vcpu);
9137
9138
9139 vcpu->arch.msr_kvm_poll_control = 1;
9140
9141 mutex_unlock(&vcpu->mutex);
9142
9143 if (!kvmclock_periodic_sync)
9144 return;
9145
9146 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
9147 KVMCLOCK_SYNC_PERIOD);
9148}
9149
9150void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
9151{
9152 vcpu->arch.apf.msr_val = 0;
9153
9154 vcpu_load(vcpu);
9155 kvm_mmu_unload(vcpu);
9156 vcpu_put(vcpu);
9157
9158 kvm_x86_ops->vcpu_free(vcpu);
9159}
9160
9161void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
9162{
9163 kvm_lapic_reset(vcpu, init_event);
9164
9165 vcpu->arch.hflags = 0;
9166
9167 vcpu->arch.smi_pending = 0;
9168 vcpu->arch.smi_count = 0;
9169 atomic_set(&vcpu->arch.nmi_queued, 0);
9170 vcpu->arch.nmi_pending = 0;
9171 vcpu->arch.nmi_injected = false;
9172 kvm_clear_interrupt_queue(vcpu);
9173 kvm_clear_exception_queue(vcpu);
9174 vcpu->arch.exception.pending = false;
9175
9176 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
9177 kvm_update_dr0123(vcpu);
9178 vcpu->arch.dr6 = DR6_INIT;
9179 kvm_update_dr6(vcpu);
9180 vcpu->arch.dr7 = DR7_FIXED_1;
9181 kvm_update_dr7(vcpu);
9182
9183 vcpu->arch.cr2 = 0;
9184
9185 kvm_make_request(KVM_REQ_EVENT, vcpu);
9186 vcpu->arch.apf.msr_val = 0;
9187 vcpu->arch.st.msr_val = 0;
9188
9189 kvmclock_reset(vcpu);
9190
9191 kvm_clear_async_pf_completion_queue(vcpu);
9192 kvm_async_pf_hash_reset(vcpu);
9193 vcpu->arch.apf.halted = false;
9194
9195 if (kvm_mpx_supported()) {
9196 void *mpx_state_buffer;
9197
9198
9199
9200
9201
9202 if (init_event)
9203 kvm_put_guest_fpu(vcpu);
9204 mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
9205 XFEATURE_BNDREGS);
9206 if (mpx_state_buffer)
9207 memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state));
9208 mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
9209 XFEATURE_BNDCSR);
9210 if (mpx_state_buffer)
9211 memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
9212 if (init_event)
9213 kvm_load_guest_fpu(vcpu);
9214 }
9215
9216 if (!init_event) {
9217 kvm_pmu_reset(vcpu);
9218 vcpu->arch.smbase = 0x30000;
9219
9220 vcpu->arch.msr_misc_features_enables = 0;
9221
9222 vcpu->arch.xcr0 = XFEATURE_MASK_FP;
9223 }
9224
9225 memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
9226 vcpu->arch.regs_avail = ~0;
9227 vcpu->arch.regs_dirty = ~0;
9228
9229 vcpu->arch.ia32_xss = 0;
9230
9231 kvm_x86_ops->vcpu_reset(vcpu, init_event);
9232}
9233
9234void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
9235{
9236 struct kvm_segment cs;
9237
9238 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
9239 cs.selector = vector << 8;
9240 cs.base = vector << 12;
9241 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
9242 kvm_rip_write(vcpu, 0);
9243}
9244
9245int kvm_arch_hardware_enable(void)
9246{
9247 struct kvm *kvm;
9248 struct kvm_vcpu *vcpu;
9249 int i;
9250 int ret;
9251 u64 local_tsc;
9252 u64 max_tsc = 0;
9253 bool stable, backwards_tsc = false;
9254
9255 kvm_shared_msr_cpu_online();
9256 ret = kvm_x86_ops->hardware_enable();
9257 if (ret != 0)
9258 return ret;
9259
9260 local_tsc = rdtsc();
9261 stable = !kvm_check_tsc_unstable();
9262 list_for_each_entry(kvm, &vm_list, vm_list) {
9263 kvm_for_each_vcpu(i, vcpu, kvm) {
9264 if (!stable && vcpu->cpu == smp_processor_id())
9265 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
9266 if (stable && vcpu->arch.last_host_tsc > local_tsc) {
9267 backwards_tsc = true;
9268 if (vcpu->arch.last_host_tsc > max_tsc)
9269 max_tsc = vcpu->arch.last_host_tsc;
9270 }
9271 }
9272 }
9273
9274
9275
9276
9277
9278
9279
9280
9281
9282
9283
9284
9285
9286
9287
9288
9289
9290
9291
9292
9293
9294
9295
9296
9297
9298
9299
9300
9301
9302
9303
9304
9305
9306
9307
9308
9309
9310
9311
9312 if (backwards_tsc) {
9313 u64 delta_cyc = max_tsc - local_tsc;
9314 list_for_each_entry(kvm, &vm_list, vm_list) {
9315 kvm->arch.backwards_tsc_observed = true;
9316 kvm_for_each_vcpu(i, vcpu, kvm) {
9317 vcpu->arch.tsc_offset_adjustment += delta_cyc;
9318 vcpu->arch.last_host_tsc = local_tsc;
9319 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
9320 }
9321
9322
9323
9324
9325
9326
9327
9328 kvm->arch.last_tsc_nsec = 0;
9329 kvm->arch.last_tsc_write = 0;
9330 }
9331
9332 }
9333 return 0;
9334}
9335
9336void kvm_arch_hardware_disable(void)
9337{
9338 kvm_x86_ops->hardware_disable();
9339 drop_user_return_notifiers();
9340}
9341
9342int kvm_arch_hardware_setup(void)
9343{
9344 int r;
9345
9346 r = kvm_x86_ops->hardware_setup();
9347 if (r != 0)
9348 return r;
9349
9350 if (kvm_has_tsc_control) {
9351
9352
9353
9354
9355
9356
9357 u64 max = min(0x7fffffffULL,
9358 __scale_tsc(kvm_max_tsc_scaling_ratio, tsc_khz));
9359 kvm_max_guest_tsc_khz = max;
9360
9361 kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits;
9362 }
9363
9364 if (boot_cpu_has(X86_FEATURE_XSAVES))
9365 rdmsrl(MSR_IA32_XSS, host_xss);
9366
9367 kvm_init_msr_list();
9368 return 0;
9369}
9370
9371void kvm_arch_hardware_unsetup(void)
9372{
9373 kvm_x86_ops->hardware_unsetup();
9374}
9375
9376int kvm_arch_check_processor_compat(void)
9377{
9378 return kvm_x86_ops->check_processor_compatibility();
9379}
9380
9381bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
9382{
9383 return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
9384}
9385EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp);
9386
9387bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
9388{
9389 return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
9390}
9391
9392struct static_key kvm_no_apic_vcpu __read_mostly;
9393EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu);
9394
9395int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
9396{
9397 struct page *page;
9398 int r;
9399
9400 vcpu->arch.emulate_ctxt.ops = &emulate_ops;
9401 if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
9402 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
9403 else
9404 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
9405
9406 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
9407 if (!page) {
9408 r = -ENOMEM;
9409 goto fail;
9410 }
9411 vcpu->arch.pio_data = page_address(page);
9412
9413 kvm_set_tsc_khz(vcpu, max_tsc_khz);
9414
9415 r = kvm_mmu_create(vcpu);
9416 if (r < 0)
9417 goto fail_free_pio_data;
9418
9419 if (irqchip_in_kernel(vcpu->kvm)) {
9420 vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu->kvm);
9421 r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
9422 if (r < 0)
9423 goto fail_mmu_destroy;
9424 } else
9425 static_key_slow_inc(&kvm_no_apic_vcpu);
9426
9427 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
9428 GFP_KERNEL_ACCOUNT);
9429 if (!vcpu->arch.mce_banks) {
9430 r = -ENOMEM;
9431 goto fail_free_lapic;
9432 }
9433 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
9434
9435 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask,
9436 GFP_KERNEL_ACCOUNT)) {
9437 r = -ENOMEM;
9438 goto fail_free_mce_banks;
9439 }
9440
9441 fx_init(vcpu);
9442
9443 vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
9444
9445 vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
9446
9447 vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
9448
9449 kvm_async_pf_hash_reset(vcpu);
9450 kvm_pmu_init(vcpu);
9451
9452 vcpu->arch.pending_external_vector = -1;
9453 vcpu->arch.preempted_in_kernel = false;
9454
9455 kvm_hv_vcpu_init(vcpu);
9456
9457 return 0;
9458
9459fail_free_mce_banks:
9460 kfree(vcpu->arch.mce_banks);
9461fail_free_lapic:
9462 kvm_free_lapic(vcpu);
9463fail_mmu_destroy:
9464 kvm_mmu_destroy(vcpu);
9465fail_free_pio_data:
9466 free_page((unsigned long)vcpu->arch.pio_data);
9467fail:
9468 return r;
9469}
9470
9471void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
9472{
9473 int idx;
9474
9475 kvm_hv_vcpu_uninit(vcpu);
9476 kvm_pmu_destroy(vcpu);
9477 kfree(vcpu->arch.mce_banks);
9478 kvm_free_lapic(vcpu);
9479 idx = srcu_read_lock(&vcpu->kvm->srcu);
9480 kvm_mmu_destroy(vcpu);
9481 srcu_read_unlock(&vcpu->kvm->srcu, idx);
9482 free_page((unsigned long)vcpu->arch.pio_data);
9483 if (!lapic_in_kernel(vcpu))
9484 static_key_slow_dec(&kvm_no_apic_vcpu);
9485}
9486
9487void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
9488{
9489 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
9490
9491 vcpu->arch.l1tf_flush_l1d = true;
9492 if (pmu->version && unlikely(pmu->event_count)) {
9493 pmu->need_cleanup = true;
9494 kvm_make_request(KVM_REQ_PMU, vcpu);
9495 }
9496 kvm_x86_ops->sched_in(vcpu, cpu);
9497}
9498
9499int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
9500{
9501 if (type)
9502 return -EINVAL;
9503
9504 INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
9505 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
9506 INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
9507 INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages);
9508 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
9509 atomic_set(&kvm->arch.noncoherent_dma_count, 0);
9510
9511
9512 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
9513
9514 set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
9515 &kvm->arch.irq_sources_bitmap);
9516
9517 raw_spin_lock_init(&kvm->arch.tsc_write_lock);
9518 mutex_init(&kvm->arch.apic_map_lock);
9519 spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
9520
9521 kvm->arch.kvmclock_offset = -ktime_get_boottime_ns();
9522 pvclock_update_vm_gtod_copy(kvm);
9523
9524 kvm->arch.guest_can_read_msr_platform_info = true;
9525
9526 INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
9527 INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
9528
9529 kvm_hv_init_vm(kvm);
9530 kvm_page_track_init(kvm);
9531 kvm_mmu_init_vm(kvm);
9532
9533 return kvm_x86_ops->vm_init(kvm);
9534}
9535
9536int kvm_arch_post_init_vm(struct kvm *kvm)
9537{
9538 return kvm_mmu_post_init_vm(kvm);
9539}
9540
9541static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
9542{
9543 vcpu_load(vcpu);
9544 kvm_mmu_unload(vcpu);
9545 vcpu_put(vcpu);
9546}
9547
9548static void kvm_free_vcpus(struct kvm *kvm)
9549{
9550 unsigned int i;
9551 struct kvm_vcpu *vcpu;
9552
9553
9554
9555
9556 kvm_for_each_vcpu(i, vcpu, kvm) {
9557 kvm_clear_async_pf_completion_queue(vcpu);
9558 kvm_unload_vcpu_mmu(vcpu);
9559 }
9560 kvm_for_each_vcpu(i, vcpu, kvm)
9561 kvm_arch_vcpu_free(vcpu);
9562
9563 mutex_lock(&kvm->lock);
9564 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
9565 kvm->vcpus[i] = NULL;
9566
9567 atomic_set(&kvm->online_vcpus, 0);
9568 mutex_unlock(&kvm->lock);
9569}
9570
9571void kvm_arch_sync_events(struct kvm *kvm)
9572{
9573 cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
9574 cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
9575 kvm_free_pit(kvm);
9576}
9577
9578int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
9579{
9580 int i, r;
9581 unsigned long hva;
9582 struct kvm_memslots *slots = kvm_memslots(kvm);
9583 struct kvm_memory_slot *slot, old;
9584
9585
9586 if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
9587 return -EINVAL;
9588
9589 slot = id_to_memslot(slots, id);
9590 if (size) {
9591 if (slot->npages)
9592 return -EEXIST;
9593
9594
9595
9596
9597
9598 hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE,
9599 MAP_SHARED | MAP_ANONYMOUS, 0);
9600 if (IS_ERR((void *)hva))
9601 return PTR_ERR((void *)hva);
9602 } else {
9603 if (!slot->npages)
9604 return 0;
9605
9606 hva = 0;
9607 }
9608
9609 old = *slot;
9610 for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
9611 struct kvm_userspace_memory_region m;
9612
9613 m.slot = id | (i << 16);
9614 m.flags = 0;
9615 m.guest_phys_addr = gpa;
9616 m.userspace_addr = hva;
9617 m.memory_size = size;
9618 r = __kvm_set_memory_region(kvm, &m);
9619 if (r < 0)
9620 return r;
9621 }
9622
9623 if (!size)
9624 vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
9625
9626 return 0;
9627}
9628EXPORT_SYMBOL_GPL(__x86_set_memory_region);
9629
9630int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
9631{
9632 int r;
9633
9634 mutex_lock(&kvm->slots_lock);
9635 r = __x86_set_memory_region(kvm, id, gpa, size);
9636 mutex_unlock(&kvm->slots_lock);
9637
9638 return r;
9639}
9640EXPORT_SYMBOL_GPL(x86_set_memory_region);
9641
9642void kvm_arch_pre_destroy_vm(struct kvm *kvm)
9643{
9644 kvm_mmu_pre_destroy_vm(kvm);
9645}
9646
9647void kvm_arch_destroy_vm(struct kvm *kvm)
9648{
9649 if (current->mm == kvm->mm) {
9650
9651
9652
9653
9654
9655 x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 0, 0);
9656 x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, 0, 0);
9657 x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
9658 }
9659 if (kvm_x86_ops->vm_destroy)
9660 kvm_x86_ops->vm_destroy(kvm);
9661 kvm_pic_destroy(kvm);
9662 kvm_ioapic_destroy(kvm);
9663 kvm_free_vcpus(kvm);
9664 kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
9665 kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
9666 kvm_mmu_uninit_vm(kvm);
9667 kvm_page_track_cleanup(kvm);
9668 kvm_hv_destroy_vm(kvm);
9669}
9670
9671void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
9672 struct kvm_memory_slot *dont)
9673{
9674 int i;
9675
9676 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
9677 if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
9678 kvfree(free->arch.rmap[i]);
9679 free->arch.rmap[i] = NULL;
9680 }
9681 if (i == 0)
9682 continue;
9683
9684 if (!dont || free->arch.lpage_info[i - 1] !=
9685 dont->arch.lpage_info[i - 1]) {
9686 kvfree(free->arch.lpage_info[i - 1]);
9687 free->arch.lpage_info[i - 1] = NULL;
9688 }
9689 }
9690
9691 kvm_page_track_free_memslot(free, dont);
9692}
9693
9694int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
9695 unsigned long npages)
9696{
9697 int i;
9698
9699 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
9700 struct kvm_lpage_info *linfo;
9701 unsigned long ugfn;
9702 int lpages;
9703 int level = i + 1;
9704
9705 lpages = gfn_to_index(slot->base_gfn + npages - 1,
9706 slot->base_gfn, level) + 1;
9707
9708 slot->arch.rmap[i] =
9709 kvcalloc(lpages, sizeof(*slot->arch.rmap[i]),
9710 GFP_KERNEL_ACCOUNT);
9711 if (!slot->arch.rmap[i])
9712 goto out_free;
9713 if (i == 0)
9714 continue;
9715
9716 linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT);
9717 if (!linfo)
9718 goto out_free;
9719
9720 slot->arch.lpage_info[i - 1] = linfo;
9721
9722 if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
9723 linfo[0].disallow_lpage = 1;
9724 if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
9725 linfo[lpages - 1].disallow_lpage = 1;
9726 ugfn = slot->userspace_addr >> PAGE_SHIFT;
9727
9728
9729
9730
9731
9732 if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
9733 !kvm_largepages_enabled()) {
9734 unsigned long j;
9735
9736 for (j = 0; j < lpages; ++j)
9737 linfo[j].disallow_lpage = 1;
9738 }
9739 }
9740
9741 if (kvm_page_track_create_memslot(slot, npages))
9742 goto out_free;
9743
9744 return 0;
9745
9746out_free:
9747 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
9748 kvfree(slot->arch.rmap[i]);
9749 slot->arch.rmap[i] = NULL;
9750 if (i == 0)
9751 continue;
9752
9753 kvfree(slot->arch.lpage_info[i - 1]);
9754 slot->arch.lpage_info[i - 1] = NULL;
9755 }
9756 return -ENOMEM;
9757}
9758
9759void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
9760{
9761
9762
9763
9764
9765 kvm_mmu_invalidate_mmio_sptes(kvm, gen);
9766}
9767
9768int kvm_arch_prepare_memory_region(struct kvm *kvm,
9769 struct kvm_memory_slot *memslot,
9770 const struct kvm_userspace_memory_region *mem,
9771 enum kvm_mr_change change)
9772{
9773 return 0;
9774}
9775
9776static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
9777 struct kvm_memory_slot *new)
9778{
9779
9780 if (new->flags & KVM_MEM_READONLY) {
9781 kvm_mmu_slot_remove_write_access(kvm, new);
9782 return;
9783 }
9784
9785
9786
9787
9788
9789
9790
9791
9792
9793
9794
9795
9796
9797
9798
9799
9800
9801
9802
9803
9804
9805
9806
9807
9808
9809
9810
9811
9812
9813
9814
9815 if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
9816 if (kvm_x86_ops->slot_enable_log_dirty)
9817 kvm_x86_ops->slot_enable_log_dirty(kvm, new);
9818 else
9819 kvm_mmu_slot_remove_write_access(kvm, new);
9820 } else {
9821 if (kvm_x86_ops->slot_disable_log_dirty)
9822 kvm_x86_ops->slot_disable_log_dirty(kvm, new);
9823 }
9824}
9825
9826void kvm_arch_commit_memory_region(struct kvm *kvm,
9827 const struct kvm_userspace_memory_region *mem,
9828 const struct kvm_memory_slot *old,
9829 const struct kvm_memory_slot *new,
9830 enum kvm_mr_change change)
9831{
9832 if (!kvm->arch.n_requested_mmu_pages)
9833 kvm_mmu_change_mmu_pages(kvm,
9834 kvm_mmu_calculate_default_mmu_pages(kvm));
9835
9836
9837
9838
9839
9840
9841
9842
9843
9844
9845
9846
9847
9848
9849
9850
9851
9852
9853 if (change == KVM_MR_FLAGS_ONLY &&
9854 (old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
9855 !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
9856 kvm_mmu_zap_collapsible_sptes(kvm, new);
9857
9858
9859
9860
9861
9862
9863
9864
9865
9866
9867
9868 if (change != KVM_MR_DELETE)
9869 kvm_mmu_slot_apply_flags(kvm, (struct kvm_memory_slot *) new);
9870}
9871
9872void kvm_arch_flush_shadow_all(struct kvm *kvm)
9873{
9874 kvm_mmu_zap_all(kvm);
9875}
9876
9877void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
9878 struct kvm_memory_slot *slot)
9879{
9880 kvm_page_track_flush_slot(kvm, slot);
9881}
9882
9883static inline bool kvm_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
9884{
9885 return (is_guest_mode(vcpu) &&
9886 kvm_x86_ops->guest_apic_has_interrupt &&
9887 kvm_x86_ops->guest_apic_has_interrupt(vcpu));
9888}
9889
9890static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
9891{
9892 if (!list_empty_careful(&vcpu->async_pf.done))
9893 return true;
9894
9895 if (kvm_apic_has_events(vcpu))
9896 return true;
9897
9898 if (vcpu->arch.pv.pv_unhalted)
9899 return true;
9900
9901 if (vcpu->arch.exception.pending)
9902 return true;
9903
9904 if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
9905 (vcpu->arch.nmi_pending &&
9906 kvm_x86_ops->nmi_allowed(vcpu)))
9907 return true;
9908
9909 if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
9910 (vcpu->arch.smi_pending && !is_smm(vcpu)))
9911 return true;
9912
9913 if (kvm_arch_interrupt_allowed(vcpu) &&
9914 (kvm_cpu_has_interrupt(vcpu) ||
9915 kvm_guest_apic_has_interrupt(vcpu)))
9916 return true;
9917
9918 if (kvm_hv_has_stimer_pending(vcpu))
9919 return true;
9920
9921 return false;
9922}
9923
9924int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
9925{
9926 return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
9927}
9928
9929bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
9930{
9931 if (READ_ONCE(vcpu->arch.pv.pv_unhalted))
9932 return true;
9933
9934 if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
9935 kvm_test_request(KVM_REQ_SMI, vcpu) ||
9936 kvm_test_request(KVM_REQ_EVENT, vcpu))
9937 return true;
9938
9939 if (vcpu->arch.apicv_active && kvm_x86_ops->dy_apicv_has_pending_interrupt(vcpu))
9940 return true;
9941
9942 return false;
9943}
9944
9945bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
9946{
9947 return vcpu->arch.preempted_in_kernel;
9948}
9949
9950int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
9951{
9952 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
9953}
9954
9955int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
9956{
9957 return kvm_x86_ops->interrupt_allowed(vcpu);
9958}
9959
9960unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
9961{
9962 if (is_64_bit_mode(vcpu))
9963 return kvm_rip_read(vcpu);
9964 return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) +
9965 kvm_rip_read(vcpu));
9966}
9967EXPORT_SYMBOL_GPL(kvm_get_linear_rip);
9968
9969bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
9970{
9971 return kvm_get_linear_rip(vcpu) == linear_rip;
9972}
9973EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
9974
9975unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
9976{
9977 unsigned long rflags;
9978
9979 rflags = kvm_x86_ops->get_rflags(vcpu);
9980 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
9981 rflags &= ~X86_EFLAGS_TF;
9982 return rflags;
9983}
9984EXPORT_SYMBOL_GPL(kvm_get_rflags);
9985
9986static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
9987{
9988 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
9989 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
9990 rflags |= X86_EFLAGS_TF;
9991 kvm_x86_ops->set_rflags(vcpu, rflags);
9992}
9993
9994void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
9995{
9996 __kvm_set_rflags(vcpu, rflags);
9997 kvm_make_request(KVM_REQ_EVENT, vcpu);
9998}
9999EXPORT_SYMBOL_GPL(kvm_set_rflags);
10000
10001void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
10002{
10003 int r;
10004
10005 if ((vcpu->arch.mmu->direct_map != work->arch.direct_map) ||
10006 work->wakeup_all)
10007 return;
10008
10009 r = kvm_mmu_reload(vcpu);
10010 if (unlikely(r))
10011 return;
10012
10013 if (!vcpu->arch.mmu->direct_map &&
10014 work->arch.cr3 != vcpu->arch.mmu->get_cr3(vcpu))
10015 return;
10016
10017 vcpu->arch.mmu->page_fault(vcpu, work->gva, 0, true);
10018}
10019
10020static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
10021{
10022 return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
10023}
10024
10025static inline u32 kvm_async_pf_next_probe(u32 key)
10026{
10027 return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
10028}
10029
10030static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
10031{
10032 u32 key = kvm_async_pf_hash_fn(gfn);
10033
10034 while (vcpu->arch.apf.gfns[key] != ~0)
10035 key = kvm_async_pf_next_probe(key);
10036
10037 vcpu->arch.apf.gfns[key] = gfn;
10038}
10039
10040static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
10041{
10042 int i;
10043 u32 key = kvm_async_pf_hash_fn(gfn);
10044
10045 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
10046 (vcpu->arch.apf.gfns[key] != gfn &&
10047 vcpu->arch.apf.gfns[key] != ~0); i++)
10048 key = kvm_async_pf_next_probe(key);
10049
10050 return key;
10051}
10052
10053bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
10054{
10055 return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
10056}
10057
10058static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
10059{
10060 u32 i, j, k;
10061
10062 i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
10063 while (true) {
10064 vcpu->arch.apf.gfns[i] = ~0;
10065 do {
10066 j = kvm_async_pf_next_probe(j);
10067 if (vcpu->arch.apf.gfns[j] == ~0)
10068 return;
10069 k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
10070
10071
10072
10073
10074
10075 } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
10076 vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
10077 i = j;
10078 }
10079}
10080
10081static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
10082{
10083
10084 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
10085 sizeof(val));
10086}
10087
10088static int apf_get_user(struct kvm_vcpu *vcpu, u32 *val)
10089{
10090
10091 return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, val,
10092 sizeof(u32));
10093}
10094
10095static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
10096{
10097 if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
10098 return false;
10099
10100 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
10101 (vcpu->arch.apf.send_user_only &&
10102 kvm_x86_ops->get_cpl(vcpu) == 0))
10103 return false;
10104
10105 return true;
10106}
10107
10108bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
10109{
10110 if (unlikely(!lapic_in_kernel(vcpu) ||
10111 kvm_event_needs_reinjection(vcpu) ||
10112 vcpu->arch.exception.pending))
10113 return false;
10114
10115 if (kvm_hlt_in_guest(vcpu->kvm) && !kvm_can_deliver_async_pf(vcpu))
10116 return false;
10117
10118
10119
10120
10121
10122 return kvm_x86_ops->interrupt_allowed(vcpu);
10123}
10124
10125void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
10126 struct kvm_async_pf *work)
10127{
10128 struct x86_exception fault;
10129
10130 trace_kvm_async_pf_not_present(work->arch.token, work->gva);
10131 kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
10132
10133 if (kvm_can_deliver_async_pf(vcpu) &&
10134 !apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
10135 fault.vector = PF_VECTOR;
10136 fault.error_code_valid = true;
10137 fault.error_code = 0;
10138 fault.nested_page_fault = false;
10139 fault.address = work->arch.token;
10140 fault.async_page_fault = true;
10141 kvm_inject_page_fault(vcpu, &fault);
10142 } else {
10143
10144
10145
10146
10147
10148
10149
10150
10151 kvm_make_request(KVM_REQ_APF_HALT, vcpu);
10152 }
10153}
10154
10155void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
10156 struct kvm_async_pf *work)
10157{
10158 struct x86_exception fault;
10159 u32 val;
10160
10161 if (work->wakeup_all)
10162 work->arch.token = ~0;
10163 else
10164 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
10165 trace_kvm_async_pf_ready(work->arch.token, work->gva);
10166
10167 if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED &&
10168 !apf_get_user(vcpu, &val)) {
10169 if (val == KVM_PV_REASON_PAGE_NOT_PRESENT &&
10170 vcpu->arch.exception.pending &&
10171 vcpu->arch.exception.nr == PF_VECTOR &&
10172 !apf_put_user(vcpu, 0)) {
10173 vcpu->arch.exception.injected = false;
10174 vcpu->arch.exception.pending = false;
10175 vcpu->arch.exception.nr = 0;
10176 vcpu->arch.exception.has_error_code = false;
10177 vcpu->arch.exception.error_code = 0;
10178 vcpu->arch.exception.has_payload = false;
10179 vcpu->arch.exception.payload = 0;
10180 } else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
10181 fault.vector = PF_VECTOR;
10182 fault.error_code_valid = true;
10183 fault.error_code = 0;
10184 fault.nested_page_fault = false;
10185 fault.address = work->arch.token;
10186 fault.async_page_fault = true;
10187 kvm_inject_page_fault(vcpu, &fault);
10188 }
10189 }
10190 vcpu->arch.apf.halted = false;
10191 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
10192}
10193
10194bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
10195{
10196 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
10197 return true;
10198 else
10199 return kvm_can_do_async_pf(vcpu);
10200}
10201
10202void kvm_arch_start_assignment(struct kvm *kvm)
10203{
10204 atomic_inc(&kvm->arch.assigned_device_count);
10205}
10206EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);
10207
10208void kvm_arch_end_assignment(struct kvm *kvm)
10209{
10210 atomic_dec(&kvm->arch.assigned_device_count);
10211}
10212EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
10213
10214bool kvm_arch_has_assigned_device(struct kvm *kvm)
10215{
10216 return atomic_read(&kvm->arch.assigned_device_count);
10217}
10218EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
10219
10220void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
10221{
10222 atomic_inc(&kvm->arch.noncoherent_dma_count);
10223}
10224EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
10225
10226void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
10227{
10228 atomic_dec(&kvm->arch.noncoherent_dma_count);
10229}
10230EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
10231
10232bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
10233{
10234 return atomic_read(&kvm->arch.noncoherent_dma_count);
10235}
10236EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
10237
10238bool kvm_arch_has_irq_bypass(void)
10239{
10240 return true;
10241}
10242
10243int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
10244 struct irq_bypass_producer *prod)
10245{
10246 struct kvm_kernel_irqfd *irqfd =
10247 container_of(cons, struct kvm_kernel_irqfd, consumer);
10248
10249 irqfd->producer = prod;
10250
10251 return kvm_x86_ops->update_pi_irte(irqfd->kvm,
10252 prod->irq, irqfd->gsi, 1);
10253}
10254
10255void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
10256 struct irq_bypass_producer *prod)
10257{
10258 int ret;
10259 struct kvm_kernel_irqfd *irqfd =
10260 container_of(cons, struct kvm_kernel_irqfd, consumer);
10261
10262 WARN_ON(irqfd->producer != prod);
10263 irqfd->producer = NULL;
10264
10265
10266
10267
10268
10269
10270
10271 ret = kvm_x86_ops->update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 0);
10272 if (ret)
10273 printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
10274 " fails: %d\n", irqfd->consumer.token, ret);
10275}
10276
10277int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
10278 uint32_t guest_irq, bool set)
10279{
10280 return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
10281}
10282
10283bool kvm_vector_hashing_enabled(void)
10284{
10285 return vector_hashing;
10286}
10287EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled);
10288
10289bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
10290{
10291 return (vcpu->arch.msr_kvm_poll_control & 1) == 0;
10292}
10293EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
10294
10295
10296EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
10297EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
10298EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
10299EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
10300EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
10301EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
10302EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
10303EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
10304EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
10305EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
10306EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmenter_failed);
10307EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
10308EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
10309EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
10310EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
10311EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window_update);
10312EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
10313EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
10314EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
10315EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
10316