1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23#define pr_fmt(fmt) "kvm-guest: " fmt
24
25#include <linux/context_tracking.h>
26#include <linux/init.h>
27#include <linux/kernel.h>
28#include <linux/kvm_para.h>
29#include <linux/cpu.h>
30#include <linux/mm.h>
31#include <linux/highmem.h>
32#include <linux/hardirq.h>
33#include <linux/notifier.h>
34#include <linux/reboot.h>
35#include <linux/hash.h>
36#include <linux/sched.h>
37#include <linux/slab.h>
38#include <linux/kprobes.h>
39#include <linux/nmi.h>
40#include <linux/swait.h>
41#include <asm/timer.h>
42#include <asm/cpu.h>
43#include <asm/traps.h>
44#include <asm/desc.h>
45#include <asm/tlbflush.h>
46#include <asm/apic.h>
47#include <asm/apicdef.h>
48#include <asm/hypervisor.h>
49#include <asm/tlb.h>
50#include <asm/cpuidle_haltpoll.h>
51#include <asm/ptrace.h>
52#include <asm/svm.h>
53
54DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
55
56static int kvmapf = 1;
57
58static int __init parse_no_kvmapf(char *arg)
59{
60 kvmapf = 0;
61 return 0;
62}
63
64early_param("no-kvmapf", parse_no_kvmapf);
65
66static int steal_acc = 1;
67static int __init parse_no_stealacc(char *arg)
68{
69 steal_acc = 0;
70 return 0;
71}
72
73early_param("no-steal-acc", parse_no_stealacc);
74
75static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
76DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible;
77static int has_steal_clock = 0;
78
79
80
81
82static void kvm_io_delay(void)
83{
84}
85
86#define KVM_TASK_SLEEP_HASHBITS 8
87#define KVM_TASK_SLEEP_HASHSIZE (1<<KVM_TASK_SLEEP_HASHBITS)
88
89struct kvm_task_sleep_node {
90 struct hlist_node link;
91 struct swait_queue_head wq;
92 u32 token;
93 int cpu;
94};
95
96static struct kvm_task_sleep_head {
97 raw_spinlock_t lock;
98 struct hlist_head list;
99} async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE];
100
101static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b,
102 u32 token)
103{
104 struct hlist_node *p;
105
106 hlist_for_each(p, &b->list) {
107 struct kvm_task_sleep_node *n =
108 hlist_entry(p, typeof(*n), link);
109 if (n->token == token)
110 return n;
111 }
112
113 return NULL;
114}
115
116static bool kvm_async_pf_queue_task(u32 token, struct kvm_task_sleep_node *n)
117{
118 u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
119 struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
120 struct kvm_task_sleep_node *e;
121
122 raw_spin_lock(&b->lock);
123 e = _find_apf_task(b, token);
124 if (e) {
125
126 hlist_del(&e->link);
127 raw_spin_unlock(&b->lock);
128 kfree(e);
129 return false;
130 }
131
132 n->token = token;
133 n->cpu = smp_processor_id();
134 init_swait_queue_head(&n->wq);
135 hlist_add_head(&n->link, &b->list);
136 raw_spin_unlock(&b->lock);
137 return true;
138}
139
140
141
142
143
144
145
146
147void kvm_async_pf_task_wait_schedule(u32 token)
148{
149 struct kvm_task_sleep_node n;
150 DECLARE_SWAITQUEUE(wait);
151
152 lockdep_assert_irqs_disabled();
153
154 if (!kvm_async_pf_queue_task(token, &n))
155 return;
156
157 for (;;) {
158 prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
159 if (hlist_unhashed(&n.link))
160 break;
161
162 local_irq_enable();
163 schedule();
164 local_irq_disable();
165 }
166 finish_swait(&n.wq, &wait);
167}
168EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait_schedule);
169
170static void apf_task_wake_one(struct kvm_task_sleep_node *n)
171{
172 hlist_del_init(&n->link);
173 if (swq_has_sleeper(&n->wq))
174 swake_up_one(&n->wq);
175}
176
177static void apf_task_wake_all(void)
178{
179 int i;
180
181 for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) {
182 struct kvm_task_sleep_head *b = &async_pf_sleepers[i];
183 struct kvm_task_sleep_node *n;
184 struct hlist_node *p, *next;
185
186 raw_spin_lock(&b->lock);
187 hlist_for_each_safe(p, next, &b->list) {
188 n = hlist_entry(p, typeof(*n), link);
189 if (n->cpu == smp_processor_id())
190 apf_task_wake_one(n);
191 }
192 raw_spin_unlock(&b->lock);
193 }
194}
195
196void kvm_async_pf_task_wake(u32 token)
197{
198 u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
199 struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
200 struct kvm_task_sleep_node *n;
201
202 if (token == ~0) {
203 apf_task_wake_all();
204 return;
205 }
206
207again:
208 raw_spin_lock(&b->lock);
209 n = _find_apf_task(b, token);
210 if (!n) {
211
212
213
214
215 n = kzalloc(sizeof(*n), GFP_ATOMIC);
216 if (!n) {
217
218
219
220
221 raw_spin_unlock(&b->lock);
222 cpu_relax();
223 goto again;
224 }
225 n->token = token;
226 n->cpu = smp_processor_id();
227 init_swait_queue_head(&n->wq);
228 hlist_add_head(&n->link, &b->list);
229 } else {
230 apf_task_wake_one(n);
231 }
232 raw_spin_unlock(&b->lock);
233 return;
234}
235EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
236
237u32 kvm_read_and_reset_apf_flags(void)
238{
239 u32 flags = 0;
240
241 if (__this_cpu_read(apf_reason.enabled)) {
242 flags = __this_cpu_read(apf_reason.flags);
243 __this_cpu_write(apf_reason.flags, 0);
244 }
245
246 return flags;
247}
248EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags);
249NOKPROBE_SYMBOL(kvm_read_and_reset_apf_flags);
250
251bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
252{
253 u32 flags = kvm_read_and_reset_apf_flags();
254
255 if (!flags)
256 return false;
257
258
259
260
261
262
263 if (unlikely(!(regs->flags & X86_EFLAGS_IF)))
264 panic("Host injected async #PF in interrupt disabled region\n");
265
266 if (flags & KVM_PV_REASON_PAGE_NOT_PRESENT) {
267 if (unlikely(!(user_mode(regs))))
268 panic("Host injected async #PF in kernel mode\n");
269
270 kvm_async_pf_task_wait_schedule(token);
271 return true;
272 }
273
274 WARN_ONCE(1, "Unexpected async PF flags: %x\n", flags);
275 return true;
276}
277NOKPROBE_SYMBOL(__kvm_handle_async_pf);
278
279__visible void __irq_entry kvm_async_pf_intr(struct pt_regs *regs)
280{
281 u32 token;
282
283 entering_ack_irq();
284
285 inc_irq_stat(irq_hv_callback_count);
286
287 if (__this_cpu_read(apf_reason.enabled)) {
288 token = __this_cpu_read(apf_reason.token);
289 rcu_irq_enter();
290 kvm_async_pf_task_wake(token);
291 rcu_irq_exit();
292 __this_cpu_write(apf_reason.token, 0);
293 wrmsrl(MSR_KVM_ASYNC_PF_ACK, 1);
294 }
295
296 exiting_irq();
297}
298
299static void __init paravirt_ops_setup(void)
300{
301 pv_info.name = "KVM";
302
303 if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
304 pv_cpu_ops.io_delay = kvm_io_delay;
305
306#ifdef CONFIG_X86_IO_APIC
307 no_timer_check = 1;
308#endif
309}
310
311static void kvm_register_steal_time(void)
312{
313 int cpu = smp_processor_id();
314 struct kvm_steal_time *st = &per_cpu(steal_time, cpu);
315
316 if (!has_steal_clock)
317 return;
318
319 wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED));
320 pr_info("stealtime: cpu %d, msr %llx\n", cpu,
321 (unsigned long long) slow_virt_to_phys(st));
322}
323
324static DEFINE_PER_CPU_DECRYPTED(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
325
326static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val)
327{
328
329
330
331
332
333
334
335 if (__test_and_clear_bit(KVM_PV_EOI_BIT, this_cpu_ptr(&kvm_apic_eoi)))
336 return;
337 apic->native_eoi_write(APIC_EOI, APIC_EOI_ACK);
338}
339
340static void kvm_guest_cpu_init(void)
341{
342 if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) {
343 u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
344
345 WARN_ON_ONCE(!static_branch_likely(&kvm_async_pf_enabled));
346
347 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
348 pa |= KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT;
349
350 if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
351 pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
352
353 wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);
354
355 wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
356 __this_cpu_write(apf_reason.enabled, 1);
357 pr_info("KVM setup async PF for cpu %d\n", smp_processor_id());
358 }
359
360 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
361 unsigned long pa;
362
363
364 BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4);
365 __this_cpu_write(kvm_apic_eoi, 0);
366 pa = slow_virt_to_phys(this_cpu_ptr(&kvm_apic_eoi))
367 | KVM_MSR_ENABLED;
368 wrmsrl(MSR_KVM_PV_EOI_EN, pa);
369 }
370
371 if (has_steal_clock)
372 kvm_register_steal_time();
373}
374
375static void kvm_pv_disable_apf(void)
376{
377 if (!__this_cpu_read(apf_reason.enabled))
378 return;
379
380 wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
381 __this_cpu_write(apf_reason.enabled, 0);
382
383 pr_info("Unregister pv shared memory for cpu %d\n", smp_processor_id());
384}
385
386static void kvm_pv_guest_cpu_reboot(void *unused)
387{
388
389
390
391
392
393 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
394 wrmsrl(MSR_KVM_PV_EOI_EN, 0);
395 kvm_pv_disable_apf();
396 kvm_disable_steal_time();
397}
398
399static int kvm_pv_reboot_notify(struct notifier_block *nb,
400 unsigned long code, void *unused)
401{
402 if (code == SYS_RESTART)
403 on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
404 return NOTIFY_DONE;
405}
406
407static struct notifier_block kvm_pv_reboot_nb = {
408 .notifier_call = kvm_pv_reboot_notify,
409};
410
411static u64 kvm_steal_clock(int cpu)
412{
413 u64 steal;
414 struct kvm_steal_time *src;
415 int version;
416
417 src = &per_cpu(steal_time, cpu);
418 do {
419 version = src->version;
420 virt_rmb();
421 steal = src->steal;
422 virt_rmb();
423 } while ((version & 1) || (version != src->version));
424
425 return steal;
426}
427
428void kvm_disable_steal_time(void)
429{
430 if (!has_steal_clock)
431 return;
432
433 wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
434}
435
436static inline void __set_percpu_decrypted(void *ptr, unsigned long size)
437{
438 early_set_memory_decrypted((unsigned long) ptr, size);
439}
440
441
442
443
444
445
446
447
448
449static void __init sev_map_percpu_data(void)
450{
451 int cpu;
452
453 if (!sev_active())
454 return;
455
456 for_each_possible_cpu(cpu) {
457 __set_percpu_decrypted(&per_cpu(apf_reason, cpu), sizeof(apf_reason));
458 __set_percpu_decrypted(&per_cpu(steal_time, cpu), sizeof(steal_time));
459 __set_percpu_decrypted(&per_cpu(kvm_apic_eoi, cpu), sizeof(kvm_apic_eoi));
460 }
461}
462
463static bool pv_tlb_flush_supported(void)
464{
465 return (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
466 !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
467 kvm_para_has_feature(KVM_FEATURE_STEAL_TIME));
468}
469
470static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);
471
472#ifdef CONFIG_SMP
473
474static bool pv_ipi_supported(void)
475{
476 return kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI);
477}
478
479static bool pv_sched_yield_supported(void)
480{
481 return (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) &&
482 !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
483 kvm_para_has_feature(KVM_FEATURE_STEAL_TIME));
484}
485
486#define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
487
488static void __send_ipi_mask(const struct cpumask *mask, int vector)
489{
490 unsigned long flags;
491 int cpu, apic_id, icr;
492 int min = 0, max = 0;
493#ifdef CONFIG_X86_64
494 __uint128_t ipi_bitmap = 0;
495#else
496 u64 ipi_bitmap = 0;
497#endif
498 long ret;
499
500 if (cpumask_empty(mask))
501 return;
502
503 local_irq_save(flags);
504
505 switch (vector) {
506 default:
507 icr = APIC_DM_FIXED | vector;
508 break;
509 case NMI_VECTOR:
510 icr = APIC_DM_NMI;
511 break;
512 }
513
514 for_each_cpu(cpu, mask) {
515 apic_id = per_cpu(x86_cpu_to_apicid, cpu);
516 if (!ipi_bitmap) {
517 min = max = apic_id;
518 } else if (apic_id < min && max - apic_id < KVM_IPI_CLUSTER_SIZE) {
519 ipi_bitmap <<= min - apic_id;
520 min = apic_id;
521 } else if (apic_id < min + KVM_IPI_CLUSTER_SIZE) {
522 max = apic_id < max ? max : apic_id;
523 } else {
524 ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
525 (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
526 WARN_ONCE(ret < 0, "kvm-guest: failed to send PV IPI: %ld",
527 ret);
528 min = max = apic_id;
529 ipi_bitmap = 0;
530 }
531 __set_bit(apic_id - min, (unsigned long *)&ipi_bitmap);
532 }
533
534 if (ipi_bitmap) {
535 ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
536 (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
537 WARN_ONCE(ret < 0, "kvm-guest: failed to send PV IPI: %ld",
538 ret);
539 }
540
541 local_irq_restore(flags);
542}
543
544static void kvm_send_ipi_mask(const struct cpumask *mask, int vector)
545{
546 __send_ipi_mask(mask, vector);
547}
548
549static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
550{
551 unsigned int this_cpu = smp_processor_id();
552 struct cpumask *new_mask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
553 const struct cpumask *local_mask;
554
555 cpumask_copy(new_mask, mask);
556 cpumask_clear_cpu(this_cpu, new_mask);
557 local_mask = new_mask;
558 __send_ipi_mask(local_mask, vector);
559}
560
561
562
563
564static void kvm_setup_pv_ipi(void)
565{
566 apic->send_IPI_mask = kvm_send_ipi_mask;
567 apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
568 pr_info("setup PV IPIs\n");
569}
570
571static void kvm_smp_send_call_func_ipi(const struct cpumask *mask)
572{
573 int cpu;
574
575 native_send_call_func_ipi(mask);
576
577
578 for_each_cpu(cpu, mask) {
579 if (vcpu_is_preempted(cpu)) {
580 kvm_hypercall1(KVM_HC_SCHED_YIELD, per_cpu(x86_cpu_to_apicid, cpu));
581 break;
582 }
583 }
584}
585
586static void __init kvm_smp_prepare_boot_cpu(void)
587{
588
589
590
591
592 sev_map_percpu_data();
593
594 kvm_guest_cpu_init();
595 native_smp_prepare_boot_cpu();
596 kvm_spinlock_init();
597}
598
599static void kvm_guest_cpu_offline(void)
600{
601 kvm_disable_steal_time();
602 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
603 wrmsrl(MSR_KVM_PV_EOI_EN, 0);
604 kvm_pv_disable_apf();
605 apf_task_wake_all();
606}
607
608static int kvm_cpu_online(unsigned int cpu)
609{
610 local_irq_disable();
611 kvm_guest_cpu_init();
612 local_irq_enable();
613 return 0;
614}
615
616static int kvm_cpu_down_prepare(unsigned int cpu)
617{
618 local_irq_disable();
619 kvm_guest_cpu_offline();
620 local_irq_enable();
621 return 0;
622}
623#endif
624
625static void kvm_flush_tlb_others(const struct cpumask *cpumask,
626 const struct flush_tlb_info *info)
627{
628 u8 state;
629 int cpu;
630 struct kvm_steal_time *src;
631 struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
632
633 cpumask_copy(flushmask, cpumask);
634
635
636
637
638 for_each_cpu(cpu, flushmask) {
639 src = &per_cpu(steal_time, cpu);
640 state = READ_ONCE(src->preempted);
641 if ((state & KVM_VCPU_PREEMPTED)) {
642 if (try_cmpxchg(&src->preempted, &state,
643 state | KVM_VCPU_FLUSH_TLB))
644 __cpumask_clear_cpu(cpu, flushmask);
645 }
646 }
647
648 native_flush_tlb_others(flushmask, info);
649}
650
651static void __init kvm_guest_init(void)
652{
653 int i;
654
655 paravirt_ops_setup();
656 register_reboot_notifier(&kvm_pv_reboot_nb);
657 for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
658 raw_spin_lock_init(&async_pf_sleepers[i].lock);
659
660 if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
661 has_steal_clock = 1;
662 pv_time_ops.steal_clock = kvm_steal_clock;
663 }
664
665 if (pv_tlb_flush_supported()) {
666 pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
667 pv_mmu_ops.tlb_remove_table = tlb_remove_table;
668 pr_info("KVM setup pv remote TLB flush\n");
669 }
670
671 if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
672 apic_set_eoi_write(kvm_guest_apic_eoi_write);
673
674 if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) {
675 static_branch_enable(&kvm_async_pf_enabled);
676 alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, kvm_async_pf_vector);
677 }
678
679#ifdef CONFIG_SMP
680 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
681 if (pv_sched_yield_supported()) {
682 smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi;
683 pr_info("setup PV sched yield\n");
684 }
685 if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
686 kvm_cpu_online, kvm_cpu_down_prepare) < 0)
687 pr_err("failed to install cpu hotplug callbacks\n");
688#else
689 sev_map_percpu_data();
690 kvm_guest_cpu_init();
691#endif
692
693
694
695
696
697
698 hardlockup_detector_disable();
699}
700
701static noinline uint32_t __kvm_cpuid_base(void)
702{
703 if (boot_cpu_data.cpuid_level < 0)
704 return 0;
705
706 if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
707 return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0);
708
709 return 0;
710}
711
712static inline uint32_t kvm_cpuid_base(void)
713{
714 static int kvm_cpuid_base = -1;
715
716 if (kvm_cpuid_base == -1)
717 kvm_cpuid_base = __kvm_cpuid_base();
718
719 return kvm_cpuid_base;
720}
721
722bool kvm_para_available(void)
723{
724 return kvm_cpuid_base() != 0;
725}
726EXPORT_SYMBOL_GPL(kvm_para_available);
727
728unsigned int kvm_arch_para_features(void)
729{
730 return cpuid_eax(kvm_cpuid_base() | KVM_CPUID_FEATURES);
731}
732
733unsigned int kvm_arch_para_hints(void)
734{
735 return cpuid_edx(kvm_cpuid_base() | KVM_CPUID_FEATURES);
736}
737EXPORT_SYMBOL_GPL(kvm_arch_para_hints);
738
739static uint32_t __init kvm_detect(void)
740{
741 return kvm_cpuid_base();
742}
743
744static void __init kvm_apic_init(void)
745{
746#if defined(CONFIG_SMP)
747 if (pv_ipi_supported())
748 kvm_setup_pv_ipi();
749#endif
750}
751
752static bool __init kvm_msi_ext_dest_id(void)
753{
754 return kvm_para_has_feature(KVM_FEATURE_MSI_EXT_DEST_ID);
755}
756
757static void __init kvm_init_platform(void)
758{
759 kvmclock_init();
760 x86_platform.apic_post_init = kvm_apic_init;
761}
762
763#if defined(CONFIG_AMD_MEM_ENCRYPT)
764static void kvm_sev_es_hcall_prepare(struct ghcb *ghcb, struct pt_regs *regs)
765{
766
767 ghcb_set_rbx(ghcb, regs->bx);
768 ghcb_set_rcx(ghcb, regs->cx);
769 ghcb_set_rdx(ghcb, regs->dx);
770 ghcb_set_rsi(ghcb, regs->si);
771}
772
773static bool kvm_sev_es_hcall_finish(struct ghcb *ghcb, struct pt_regs *regs)
774{
775
776 return true;
777}
778#endif
779
780const __initconst struct hypervisor_x86 x86_hyper_kvm = {
781 .name = "KVM",
782 .detect = kvm_detect,
783 .type = X86_HYPER_KVM,
784 .init.guest_late_init = kvm_guest_init,
785 .init.x2apic_available = kvm_para_available,
786 .init.init_platform = kvm_init_platform,
787 .init.msi_ext_dest_id = kvm_msi_ext_dest_id,
788#if defined(CONFIG_AMD_MEM_ENCRYPT)
789 .runtime.sev_es_hcall_prepare = kvm_sev_es_hcall_prepare,
790 .runtime.sev_es_hcall_finish = kvm_sev_es_hcall_finish,
791#endif
792};
793
794static __init int activate_jump_labels(void)
795{
796 if (has_steal_clock) {
797 static_key_slow_inc(¶virt_steal_enabled);
798 if (steal_acc)
799 static_key_slow_inc(¶virt_steal_rq_enabled);
800 }
801
802 return 0;
803}
804arch_initcall(activate_jump_labels);
805
806static __init int kvm_alloc_cpumask(void)
807{
808 int cpu;
809 bool alloc = false;
810
811 if (!kvm_para_available() || nopv)
812 return 0;
813
814 if (pv_tlb_flush_supported())
815 alloc = true;
816
817#if defined(CONFIG_SMP)
818 if (pv_ipi_supported())
819 alloc = true;
820#endif
821
822 if (alloc)
823 for_each_possible_cpu(cpu) {
824 zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu),
825 GFP_KERNEL, cpu_to_node(cpu));
826 }
827
828 return 0;
829}
830arch_initcall(kvm_alloc_cpumask);
831
832#ifdef CONFIG_PARAVIRT_SPINLOCKS
833
834
835static void kvm_kick_cpu(int cpu)
836{
837 int apicid;
838 unsigned long flags = 0;
839
840 apicid = per_cpu(x86_cpu_to_apicid, cpu);
841 kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
842}
843
844#include <asm/qspinlock.h>
845
846static void kvm_wait(u8 *ptr, u8 val)
847{
848 unsigned long flags;
849
850 if (in_nmi())
851 return;
852
853 local_irq_save(flags);
854
855 if (READ_ONCE(*ptr) != val)
856 goto out;
857
858
859
860
861
862
863 if (arch_irqs_disabled_flags(flags))
864 halt();
865 else
866 safe_halt();
867
868out:
869 local_irq_restore(flags);
870}
871
872#ifdef CONFIG_X86_32
873__visible bool __kvm_vcpu_is_preempted(long cpu)
874{
875 struct kvm_steal_time *src = &per_cpu(steal_time, cpu);
876
877 return !!(src->preempted & KVM_VCPU_PREEMPTED);
878}
879PV_CALLEE_SAVE_REGS_THUNK(__kvm_vcpu_is_preempted);
880
881#else
882
883#include <asm/asm-offsets.h>
884
885extern bool __raw_callee_save___kvm_vcpu_is_preempted(long);
886
887
888
889
890
891asm(
892".pushsection .text;"
893".global __raw_callee_save___kvm_vcpu_is_preempted;"
894".type __raw_callee_save___kvm_vcpu_is_preempted, @function;"
895"__raw_callee_save___kvm_vcpu_is_preempted:"
896"movq __per_cpu_offset(,%rdi,8), %rax;"
897"cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);"
898"setne %al;"
899"ret;"
900".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;"
901".popsection");
902
903#endif
904
905
906
907
908void __init kvm_spinlock_init(void)
909{
910
911
912
913
914
915 if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) {
916 pr_info("PV spinlocks disabled, no host support\n");
917 return;
918 }
919
920
921
922
923
924 if (kvm_para_has_hint(KVM_HINTS_REALTIME)) {
925 pr_info("PV spinlocks disabled with KVM_HINTS_REALTIME hints\n");
926 goto out;
927 }
928
929 if (num_possible_cpus() == 1) {
930 pr_info("PV spinlocks disabled, single CPU\n");
931 goto out;
932 }
933
934 if (nopvspin) {
935 pr_info("PV spinlocks disabled, forced by \"nopvspin\" parameter\n");
936 goto out;
937 }
938
939 pr_info("PV spinlocks enabled\n");
940
941 __pv_init_lock_hash();
942 pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
943 pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
944 pv_lock_ops.wait = kvm_wait;
945 pv_lock_ops.kick = kvm_kick_cpu;
946
947 if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
948 pv_lock_ops.vcpu_is_preempted =
949 PV_CALLEE_SAVE(__kvm_vcpu_is_preempted);
950 }
951
952
953
954
955
956out:
957 static_branch_disable(&virt_spin_lock_key);
958}
959
960#endif
961
962#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
963
964static void kvm_disable_host_haltpoll(void *i)
965{
966 wrmsrl(MSR_KVM_POLL_CONTROL, 0);
967}
968
969static void kvm_enable_host_haltpoll(void *i)
970{
971 wrmsrl(MSR_KVM_POLL_CONTROL, 1);
972}
973
974void arch_haltpoll_enable(unsigned int cpu)
975{
976 if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) {
977 pr_err_once("host does not support poll control\n");
978 pr_err_once("host upgrade recommended\n");
979 return;
980 }
981
982
983 smp_call_function_single(cpu, kvm_disable_host_haltpoll, NULL, 1);
984}
985EXPORT_SYMBOL_GPL(arch_haltpoll_enable);
986
987void arch_haltpoll_disable(unsigned int cpu)
988{
989 if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL))
990 return;
991
992
993 smp_call_function_single(cpu, kvm_enable_host_haltpoll, NULL, 1);
994}
995EXPORT_SYMBOL_GPL(arch_haltpoll_disable);
996#endif
997