1
2
3
4
5
6
7#include <linux/bug.h>
8#include <linux/cpu_pm.h>
9#include <linux/errno.h>
10#include <linux/err.h>
11#include <linux/kvm_host.h>
12#include <linux/list.h>
13#include <linux/module.h>
14#include <linux/vmalloc.h>
15#include <linux/fs.h>
16#include <linux/mman.h>
17#include <linux/sched.h>
18#include <linux/kvm.h>
19#include <linux/kvm_irqfd.h>
20#include <linux/irqbypass.h>
21#include <linux/sched/stat.h>
22#include <linux/psci.h>
23#include <trace/events/kvm.h>
24
25#define CREATE_TRACE_POINTS
26#include "trace_arm.h"
27
28#include <linux/uaccess.h>
29#include <asm/ptrace.h>
30#include <asm/mman.h>
31#include <asm/tlbflush.h>
32#include <asm/cacheflush.h>
33#include <asm/cpufeature.h>
34#include <asm/virt.h>
35#include <asm/kvm_arm.h>
36#include <asm/kvm_asm.h>
37#include <asm/kvm_mmu.h>
38#include <asm/kvm_emulate.h>
39#include <asm/sections.h>
40
41#include <kvm/arm_hypercalls.h>
42#include <kvm/arm_pmu.h>
43#include <kvm/arm_psci.h>
44
45#ifdef REQUIRES_VIRT
46__asm__(".arch_extension virt");
47#endif
48
49static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
50DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
51
52DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
53
54static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
55unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
56DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
57
58
59static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
60static u32 kvm_next_vmid;
61static DEFINE_SPINLOCK(kvm_vmid_lock);
62
63static bool vgic_present;
64
65static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
66DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
67
68int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
69{
70 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
71}
72
73int kvm_arch_hardware_setup(void *opaque)
74{
75 return 0;
76}
77
78int kvm_arch_check_processor_compat(void *opaque)
79{
80 return 0;
81}
82
83int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
84 struct kvm_enable_cap *cap)
85{
86 int r;
87
88 if (cap->flags)
89 return -EINVAL;
90
91 switch (cap->cap) {
92 case KVM_CAP_ARM_NISV_TO_USER:
93 r = 0;
94 kvm->arch.return_nisv_io_abort_to_user = true;
95 break;
96 case KVM_CAP_ARM_MTE:
97 mutex_lock(&kvm->lock);
98 if (!system_supports_mte() || kvm->created_vcpus) {
99 r = -EINVAL;
100 } else {
101 r = 0;
102 kvm->arch.mte_enabled = true;
103 }
104 mutex_unlock(&kvm->lock);
105 break;
106 default:
107 r = -EINVAL;
108 break;
109 }
110
111 return r;
112}
113
114static int kvm_arm_default_max_vcpus(void)
115{
116 return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
117}
118
119static void set_default_spectre(struct kvm *kvm)
120{
121
122
123
124
125
126
127
128
129 if (arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED)
130 kvm->arch.pfr0_csv2 = 1;
131 if (arm64_get_meltdown_state() == SPECTRE_UNAFFECTED)
132 kvm->arch.pfr0_csv3 = 1;
133}
134
135
136
137
138
139int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
140{
141 int ret;
142
143 ret = kvm_arm_setup_stage2(kvm, type);
144 if (ret)
145 return ret;
146
147 ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu);
148 if (ret)
149 return ret;
150
151 ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP);
152 if (ret)
153 goto out_free_stage2_pgd;
154
155 kvm_vgic_early_init(kvm);
156
157
158 kvm->arch.max_vcpus = kvm_arm_default_max_vcpus();
159
160 set_default_spectre(kvm);
161
162 return ret;
163out_free_stage2_pgd:
164 kvm_free_stage2_pgd(&kvm->arch.mmu);
165 return ret;
166}
167
168vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
169{
170 return VM_FAULT_SIGBUS;
171}
172
173
174
175
176
177
178void kvm_arch_destroy_vm(struct kvm *kvm)
179{
180 int i;
181
182 bitmap_free(kvm->arch.pmu_filter);
183
184 kvm_vgic_destroy(kvm);
185
186 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
187 if (kvm->vcpus[i]) {
188 kvm_vcpu_destroy(kvm->vcpus[i]);
189 kvm->vcpus[i] = NULL;
190 }
191 }
192 atomic_set(&kvm->online_vcpus, 0);
193}
194
195int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
196{
197 int r;
198 switch (ext) {
199 case KVM_CAP_IRQCHIP:
200 r = vgic_present;
201 break;
202 case KVM_CAP_IOEVENTFD:
203 case KVM_CAP_DEVICE_CTRL:
204 case KVM_CAP_USER_MEMORY:
205 case KVM_CAP_SYNC_MMU:
206 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
207 case KVM_CAP_ONE_REG:
208 case KVM_CAP_ARM_PSCI:
209 case KVM_CAP_ARM_PSCI_0_2:
210 case KVM_CAP_READONLY_MEM:
211 case KVM_CAP_MP_STATE:
212 case KVM_CAP_IMMEDIATE_EXIT:
213 case KVM_CAP_VCPU_EVENTS:
214 case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2:
215 case KVM_CAP_ARM_NISV_TO_USER:
216 case KVM_CAP_ARM_INJECT_EXT_DABT:
217 case KVM_CAP_SET_GUEST_DEBUG:
218 case KVM_CAP_VCPU_ATTRIBUTES:
219 case KVM_CAP_PTP_KVM:
220 r = 1;
221 break;
222 case KVM_CAP_SET_GUEST_DEBUG2:
223 return KVM_GUESTDBG_VALID_MASK;
224 case KVM_CAP_ARM_SET_DEVICE_ADDR:
225 r = 1;
226 break;
227 case KVM_CAP_NR_VCPUS:
228 r = num_online_cpus();
229 break;
230 case KVM_CAP_MAX_VCPUS:
231 case KVM_CAP_MAX_VCPU_ID:
232 if (kvm)
233 r = kvm->arch.max_vcpus;
234 else
235 r = kvm_arm_default_max_vcpus();
236 break;
237 case KVM_CAP_MSI_DEVID:
238 if (!kvm)
239 r = -EINVAL;
240 else
241 r = kvm->arch.vgic.msis_require_devid;
242 break;
243 case KVM_CAP_ARM_USER_IRQ:
244
245
246
247
248 r = 1;
249 break;
250 case KVM_CAP_ARM_MTE:
251 r = system_supports_mte();
252 break;
253 case KVM_CAP_STEAL_TIME:
254 r = kvm_arm_pvtime_supported();
255 break;
256 case KVM_CAP_ARM_EL1_32BIT:
257 r = cpus_have_const_cap(ARM64_HAS_32BIT_EL1);
258 break;
259 case KVM_CAP_GUEST_DEBUG_HW_BPS:
260 r = get_num_brps();
261 break;
262 case KVM_CAP_GUEST_DEBUG_HW_WPS:
263 r = get_num_wrps();
264 break;
265 case KVM_CAP_ARM_PMU_V3:
266 r = kvm_arm_support_pmu_v3();
267 break;
268 case KVM_CAP_ARM_INJECT_SERROR_ESR:
269 r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN);
270 break;
271 case KVM_CAP_ARM_VM_IPA_SIZE:
272 r = get_kvm_ipa_limit();
273 break;
274 case KVM_CAP_ARM_SVE:
275 r = system_supports_sve();
276 break;
277 case KVM_CAP_ARM_PTRAUTH_ADDRESS:
278 case KVM_CAP_ARM_PTRAUTH_GENERIC:
279 r = system_has_full_ptr_auth();
280 break;
281 default:
282 r = 0;
283 }
284
285 return r;
286}
287
288long kvm_arch_dev_ioctl(struct file *filp,
289 unsigned int ioctl, unsigned long arg)
290{
291 return -EINVAL;
292}
293
294struct kvm *kvm_arch_alloc_vm(void)
295{
296 if (!has_vhe())
297 return kzalloc(sizeof(struct kvm), GFP_KERNEL);
298
299 return vzalloc(sizeof(struct kvm));
300}
301
302void kvm_arch_free_vm(struct kvm *kvm)
303{
304 if (!has_vhe())
305 kfree(kvm);
306 else
307 vfree(kvm);
308}
309
310int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
311{
312 if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
313 return -EBUSY;
314
315 if (id >= kvm->arch.max_vcpus)
316 return -EINVAL;
317
318 return 0;
319}
320
321int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
322{
323 int err;
324
325
326 vcpu->arch.target = -1;
327 bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
328
329 vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
330
331
332 kvm_timer_vcpu_init(vcpu);
333
334 kvm_pmu_vcpu_init(vcpu);
335
336 kvm_arm_reset_debug_ptr(vcpu);
337
338 kvm_arm_pvtime_vcpu_init(&vcpu->arch);
339
340 vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
341
342 err = kvm_vgic_vcpu_init(vcpu);
343 if (err)
344 return err;
345
346 return create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP);
347}
348
349void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
350{
351}
352
353void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
354{
355 if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm)))
356 static_branch_dec(&userspace_irqchip_in_use);
357
358 kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
359 kvm_timer_vcpu_terminate(vcpu);
360 kvm_pmu_vcpu_destroy(vcpu);
361
362 kvm_arm_vcpu_destroy(vcpu);
363}
364
365int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
366{
367 return kvm_timer_is_pending(vcpu);
368}
369
370void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
371{
372
373
374
375
376
377
378
379
380
381
382 preempt_disable();
383 kvm_vgic_vmcr_sync(vcpu);
384 vgic_v4_put(vcpu, true);
385 preempt_enable();
386}
387
388void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
389{
390 preempt_disable();
391 vgic_v4_load(vcpu);
392 preempt_enable();
393}
394
395void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
396{
397 struct kvm_s2_mmu *mmu;
398 int *last_ran;
399
400 mmu = vcpu->arch.hw_mmu;
401 last_ran = this_cpu_ptr(mmu->last_vcpu_ran);
402
403
404
405
406
407
408
409
410
411
412 if (*last_ran != vcpu->vcpu_id) {
413 kvm_call_hyp(__kvm_flush_cpu_context, mmu);
414 *last_ran = vcpu->vcpu_id;
415 }
416
417 vcpu->cpu = cpu;
418
419 kvm_vgic_load(vcpu);
420 kvm_timer_vcpu_load(vcpu);
421 if (has_vhe())
422 kvm_vcpu_load_sysregs_vhe(vcpu);
423 kvm_arch_vcpu_load_fp(vcpu);
424 kvm_vcpu_pmu_restore_guest(vcpu);
425 if (kvm_arm_is_pvtime_enabled(&vcpu->arch))
426 kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu);
427
428 if (single_task_running())
429 vcpu_clear_wfx_traps(vcpu);
430 else
431 vcpu_set_wfx_traps(vcpu);
432
433 if (vcpu_has_ptrauth(vcpu))
434 vcpu_ptrauth_disable(vcpu);
435 kvm_arch_vcpu_load_debug_state_flags(vcpu);
436}
437
438void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
439{
440 kvm_arch_vcpu_put_debug_state_flags(vcpu);
441 kvm_arch_vcpu_put_fp(vcpu);
442 if (has_vhe())
443 kvm_vcpu_put_sysregs_vhe(vcpu);
444 kvm_timer_vcpu_put(vcpu);
445 kvm_vgic_put(vcpu);
446 kvm_vcpu_pmu_restore_host(vcpu);
447
448 vcpu->cpu = -1;
449}
450
451static void vcpu_power_off(struct kvm_vcpu *vcpu)
452{
453 vcpu->arch.power_off = true;
454 kvm_make_request(KVM_REQ_SLEEP, vcpu);
455 kvm_vcpu_kick(vcpu);
456}
457
458int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
459 struct kvm_mp_state *mp_state)
460{
461 if (vcpu->arch.power_off)
462 mp_state->mp_state = KVM_MP_STATE_STOPPED;
463 else
464 mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
465
466 return 0;
467}
468
469int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
470 struct kvm_mp_state *mp_state)
471{
472 int ret = 0;
473
474 switch (mp_state->mp_state) {
475 case KVM_MP_STATE_RUNNABLE:
476 vcpu->arch.power_off = false;
477 break;
478 case KVM_MP_STATE_STOPPED:
479 vcpu_power_off(vcpu);
480 break;
481 default:
482 ret = -EINVAL;
483 }
484
485 return ret;
486}
487
488
489
490
491
492
493
494
495int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
496{
497 bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF);
498 return ((irq_lines || kvm_vgic_vcpu_pending_irq(v))
499 && !v->arch.power_off && !v->arch.pause);
500}
501
502bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
503{
504 return vcpu_mode_priv(vcpu);
505}
506
507
508static void exit_vm_noop(void *info)
509{
510}
511
512void force_vm_exit(const cpumask_t *mask)
513{
514 preempt_disable();
515 smp_call_function_many(mask, exit_vm_noop, NULL, true);
516 preempt_enable();
517}
518
519
520
521
522
523
524
525
526
527
528
529
530
531static bool need_new_vmid_gen(struct kvm_vmid *vmid)
532{
533 u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen);
534 smp_rmb();
535 return unlikely(READ_ONCE(vmid->vmid_gen) != current_vmid_gen);
536}
537
538
539
540
541
542static void update_vmid(struct kvm_vmid *vmid)
543{
544 if (!need_new_vmid_gen(vmid))
545 return;
546
547 spin_lock(&kvm_vmid_lock);
548
549
550
551
552
553
554 if (!need_new_vmid_gen(vmid)) {
555 spin_unlock(&kvm_vmid_lock);
556 return;
557 }
558
559
560 if (unlikely(kvm_next_vmid == 0)) {
561 atomic64_inc(&kvm_vmid_gen);
562 kvm_next_vmid = 1;
563
564
565
566
567
568
569 force_vm_exit(cpu_all_mask);
570
571
572
573
574
575 kvm_call_hyp(__kvm_flush_vm_context);
576 }
577
578 vmid->vmid = kvm_next_vmid;
579 kvm_next_vmid++;
580 kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1;
581
582 smp_wmb();
583 WRITE_ONCE(vmid->vmid_gen, atomic64_read(&kvm_vmid_gen));
584
585 spin_unlock(&kvm_vmid_lock);
586}
587
588static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
589{
590 struct kvm *kvm = vcpu->kvm;
591 int ret = 0;
592
593 if (likely(vcpu->arch.has_run_once))
594 return 0;
595
596 if (!kvm_arm_vcpu_is_finalized(vcpu))
597 return -EPERM;
598
599 vcpu->arch.has_run_once = true;
600
601 kvm_arm_vcpu_init_debug(vcpu);
602
603 if (likely(irqchip_in_kernel(kvm))) {
604
605
606
607
608 ret = kvm_vgic_map_resources(kvm);
609 if (ret)
610 return ret;
611 } else {
612
613
614
615
616 static_branch_inc(&userspace_irqchip_in_use);
617 }
618
619 ret = kvm_timer_enable(vcpu);
620 if (ret)
621 return ret;
622
623 ret = kvm_arm_pmu_v3_enable(vcpu);
624
625 return ret;
626}
627
628bool kvm_arch_intc_initialized(struct kvm *kvm)
629{
630 return vgic_initialized(kvm);
631}
632
633void kvm_arm_halt_guest(struct kvm *kvm)
634{
635 int i;
636 struct kvm_vcpu *vcpu;
637
638 kvm_for_each_vcpu(i, vcpu, kvm)
639 vcpu->arch.pause = true;
640 kvm_make_all_cpus_request(kvm, KVM_REQ_SLEEP);
641}
642
643void kvm_arm_resume_guest(struct kvm *kvm)
644{
645 int i;
646 struct kvm_vcpu *vcpu;
647
648 kvm_for_each_vcpu(i, vcpu, kvm) {
649 vcpu->arch.pause = false;
650 rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu));
651 }
652}
653
654static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
655{
656 struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
657
658 rcuwait_wait_event(wait,
659 (!vcpu->arch.power_off) &&(!vcpu->arch.pause),
660 TASK_INTERRUPTIBLE);
661
662 if (vcpu->arch.power_off || vcpu->arch.pause) {
663
664 kvm_make_request(KVM_REQ_SLEEP, vcpu);
665 }
666
667
668
669
670
671
672 smp_rmb();
673}
674
675static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
676{
677 return vcpu->arch.target >= 0;
678}
679
680static void check_vcpu_requests(struct kvm_vcpu *vcpu)
681{
682 if (kvm_request_pending(vcpu)) {
683 if (kvm_check_request(KVM_REQ_SLEEP, vcpu))
684 vcpu_req_sleep(vcpu);
685
686 if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
687 kvm_reset_vcpu(vcpu);
688
689
690
691
692
693 kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
694
695 if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu))
696 kvm_update_stolen_time(vcpu);
697
698 if (kvm_check_request(KVM_REQ_RELOAD_GICv4, vcpu)) {
699
700 preempt_disable();
701 vgic_v4_put(vcpu, false);
702 vgic_v4_load(vcpu);
703 preempt_enable();
704 }
705
706 if (kvm_check_request(KVM_REQ_RELOAD_PMU, vcpu))
707 kvm_pmu_handle_pmcr(vcpu,
708 __vcpu_sys_reg(vcpu, PMCR_EL0));
709 }
710}
711
712static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu)
713{
714 if (likely(!vcpu_mode_is_32bit(vcpu)))
715 return false;
716
717 return !system_supports_32bit_el0() ||
718 static_branch_unlikely(&arm64_mismatched_32bit_el0);
719}
720
721
722
723
724
725
726
727
728
729
730
731int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
732{
733 struct kvm_run *run = vcpu->run;
734 int ret;
735
736 if (unlikely(!kvm_vcpu_initialized(vcpu)))
737 return -ENOEXEC;
738
739 ret = kvm_vcpu_first_run_init(vcpu);
740 if (ret)
741 return ret;
742
743 if (run->exit_reason == KVM_EXIT_MMIO) {
744 ret = kvm_handle_mmio_return(vcpu);
745 if (ret)
746 return ret;
747 }
748
749 vcpu_load(vcpu);
750
751 if (run->immediate_exit) {
752 ret = -EINTR;
753 goto out;
754 }
755
756 kvm_sigset_activate(vcpu);
757
758 ret = 1;
759 run->exit_reason = KVM_EXIT_UNKNOWN;
760 while (ret > 0) {
761
762
763
764 cond_resched();
765
766 update_vmid(&vcpu->arch.hw_mmu->vmid);
767
768 check_vcpu_requests(vcpu);
769
770
771
772
773
774
775 preempt_disable();
776
777 kvm_pmu_flush_hwstate(vcpu);
778
779 local_irq_disable();
780
781 kvm_vgic_flush_hwstate(vcpu);
782
783
784
785
786
787 if (signal_pending(current)) {
788 ret = -EINTR;
789 run->exit_reason = KVM_EXIT_INTR;
790 }
791
792
793
794
795
796
797
798
799 if (static_branch_unlikely(&userspace_irqchip_in_use)) {
800 if (kvm_timer_should_notify_user(vcpu) ||
801 kvm_pmu_should_notify_user(vcpu)) {
802 ret = -EINTR;
803 run->exit_reason = KVM_EXIT_INTR;
804 }
805 }
806
807
808
809
810
811
812
813 smp_store_mb(vcpu->mode, IN_GUEST_MODE);
814
815 if (ret <= 0 || need_new_vmid_gen(&vcpu->arch.hw_mmu->vmid) ||
816 kvm_request_pending(vcpu)) {
817 vcpu->mode = OUTSIDE_GUEST_MODE;
818 isb();
819 kvm_pmu_sync_hwstate(vcpu);
820 if (static_branch_unlikely(&userspace_irqchip_in_use))
821 kvm_timer_sync_user(vcpu);
822 kvm_vgic_sync_hwstate(vcpu);
823 local_irq_enable();
824 preempt_enable();
825 continue;
826 }
827
828 kvm_arm_setup_debug(vcpu);
829
830
831
832
833 trace_kvm_entry(*vcpu_pc(vcpu));
834 guest_enter_irqoff();
835
836 ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu);
837
838 vcpu->mode = OUTSIDE_GUEST_MODE;
839 vcpu->stat.exits++;
840
841
842
843
844 kvm_arm_clear_debug(vcpu);
845
846
847
848
849
850
851 kvm_pmu_sync_hwstate(vcpu);
852
853
854
855
856
857
858 kvm_vgic_sync_hwstate(vcpu);
859
860
861
862
863
864
865 if (static_branch_unlikely(&userspace_irqchip_in_use))
866 kvm_timer_sync_user(vcpu);
867
868 kvm_arch_vcpu_ctxsync_fp(vcpu);
869
870
871
872
873
874
875
876
877
878
879
880 local_irq_enable();
881
882
883
884
885
886
887
888
889
890 guest_exit();
891 trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
892
893
894 handle_exit_early(vcpu, ret);
895
896 preempt_enable();
897
898
899
900
901
902
903
904
905
906 if (vcpu_mode_is_bad_32bit(vcpu)) {
907
908
909
910
911
912
913 vcpu->arch.target = -1;
914 ret = ARM_EXCEPTION_IL;
915 }
916
917 ret = handle_exit(vcpu, ret);
918 }
919
920
921 if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
922 kvm_timer_update_run(vcpu);
923 kvm_pmu_update_run(vcpu);
924 }
925
926 kvm_sigset_deactivate(vcpu);
927
928out:
929
930
931
932
933
934
935
936 if (unlikely(vcpu->arch.flags & (KVM_ARM64_PENDING_EXCEPTION |
937 KVM_ARM64_INCREMENT_PC)))
938 kvm_call_hyp(__kvm_adjust_pc, vcpu);
939
940 vcpu_put(vcpu);
941 return ret;
942}
943
944static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
945{
946 int bit_index;
947 bool set;
948 unsigned long *hcr;
949
950 if (number == KVM_ARM_IRQ_CPU_IRQ)
951 bit_index = __ffs(HCR_VI);
952 else
953 bit_index = __ffs(HCR_VF);
954
955 hcr = vcpu_hcr(vcpu);
956 if (level)
957 set = test_and_set_bit(bit_index, hcr);
958 else
959 set = test_and_clear_bit(bit_index, hcr);
960
961
962
963
964 if (set == level)
965 return 0;
966
967
968
969
970
971
972 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
973 kvm_vcpu_kick(vcpu);
974
975 return 0;
976}
977
978int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
979 bool line_status)
980{
981 u32 irq = irq_level->irq;
982 unsigned int irq_type, vcpu_idx, irq_num;
983 int nrcpus = atomic_read(&kvm->online_vcpus);
984 struct kvm_vcpu *vcpu = NULL;
985 bool level = irq_level->level;
986
987 irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK;
988 vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK;
989 vcpu_idx += ((irq >> KVM_ARM_IRQ_VCPU2_SHIFT) & KVM_ARM_IRQ_VCPU2_MASK) * (KVM_ARM_IRQ_VCPU_MASK + 1);
990 irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK;
991
992 trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level);
993
994 switch (irq_type) {
995 case KVM_ARM_IRQ_TYPE_CPU:
996 if (irqchip_in_kernel(kvm))
997 return -ENXIO;
998
999 if (vcpu_idx >= nrcpus)
1000 return -EINVAL;
1001
1002 vcpu = kvm_get_vcpu(kvm, vcpu_idx);
1003 if (!vcpu)
1004 return -EINVAL;
1005
1006 if (irq_num > KVM_ARM_IRQ_CPU_FIQ)
1007 return -EINVAL;
1008
1009 return vcpu_interrupt_line(vcpu, irq_num, level);
1010 case KVM_ARM_IRQ_TYPE_PPI:
1011 if (!irqchip_in_kernel(kvm))
1012 return -ENXIO;
1013
1014 if (vcpu_idx >= nrcpus)
1015 return -EINVAL;
1016
1017 vcpu = kvm_get_vcpu(kvm, vcpu_idx);
1018 if (!vcpu)
1019 return -EINVAL;
1020
1021 if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS)
1022 return -EINVAL;
1023
1024 return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level, NULL);
1025 case KVM_ARM_IRQ_TYPE_SPI:
1026 if (!irqchip_in_kernel(kvm))
1027 return -ENXIO;
1028
1029 if (irq_num < VGIC_NR_PRIVATE_IRQS)
1030 return -EINVAL;
1031
1032 return kvm_vgic_inject_irq(kvm, 0, irq_num, level, NULL);
1033 }
1034
1035 return -EINVAL;
1036}
1037
1038static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
1039 const struct kvm_vcpu_init *init)
1040{
1041 unsigned int i, ret;
1042 int phys_target = kvm_target_cpu();
1043
1044 if (init->target != phys_target)
1045 return -EINVAL;
1046
1047
1048
1049
1050
1051 if (vcpu->arch.target != -1 && vcpu->arch.target != init->target)
1052 return -EINVAL;
1053
1054
1055 for (i = 0; i < sizeof(init->features) * 8; i++) {
1056 bool set = (init->features[i / 32] & (1 << (i % 32)));
1057
1058 if (set && i >= KVM_VCPU_MAX_FEATURES)
1059 return -ENOENT;
1060
1061
1062
1063
1064
1065 if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES &&
1066 test_bit(i, vcpu->arch.features) != set)
1067 return -EINVAL;
1068
1069 if (set)
1070 set_bit(i, vcpu->arch.features);
1071 }
1072
1073 vcpu->arch.target = phys_target;
1074
1075
1076 ret = kvm_reset_vcpu(vcpu);
1077 if (ret) {
1078 vcpu->arch.target = -1;
1079 bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
1080 }
1081
1082 return ret;
1083}
1084
1085static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
1086 struct kvm_vcpu_init *init)
1087{
1088 int ret;
1089
1090 ret = kvm_vcpu_set_target(vcpu, init);
1091 if (ret)
1092 return ret;
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103 if (vcpu->arch.has_run_once) {
1104 if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
1105 stage2_unmap_vm(vcpu->kvm);
1106 else
1107 icache_inval_all_pou();
1108 }
1109
1110 vcpu_reset_hcr(vcpu);
1111
1112
1113
1114
1115 if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
1116 vcpu_power_off(vcpu);
1117 else
1118 vcpu->arch.power_off = false;
1119
1120 return 0;
1121}
1122
1123static int kvm_arm_vcpu_set_attr(struct kvm_vcpu *vcpu,
1124 struct kvm_device_attr *attr)
1125{
1126 int ret = -ENXIO;
1127
1128 switch (attr->group) {
1129 default:
1130 ret = kvm_arm_vcpu_arch_set_attr(vcpu, attr);
1131 break;
1132 }
1133
1134 return ret;
1135}
1136
1137static int kvm_arm_vcpu_get_attr(struct kvm_vcpu *vcpu,
1138 struct kvm_device_attr *attr)
1139{
1140 int ret = -ENXIO;
1141
1142 switch (attr->group) {
1143 default:
1144 ret = kvm_arm_vcpu_arch_get_attr(vcpu, attr);
1145 break;
1146 }
1147
1148 return ret;
1149}
1150
1151static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu,
1152 struct kvm_device_attr *attr)
1153{
1154 int ret = -ENXIO;
1155
1156 switch (attr->group) {
1157 default:
1158 ret = kvm_arm_vcpu_arch_has_attr(vcpu, attr);
1159 break;
1160 }
1161
1162 return ret;
1163}
1164
1165static int kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
1166 struct kvm_vcpu_events *events)
1167{
1168 memset(events, 0, sizeof(*events));
1169
1170 return __kvm_arm_vcpu_get_events(vcpu, events);
1171}
1172
1173static int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
1174 struct kvm_vcpu_events *events)
1175{
1176 int i;
1177
1178
1179 for (i = 0; i < ARRAY_SIZE(events->reserved); i++)
1180 if (events->reserved[i])
1181 return -EINVAL;
1182
1183
1184 for (i = 0; i < ARRAY_SIZE(events->exception.pad); i++)
1185 if (events->exception.pad[i])
1186 return -EINVAL;
1187
1188 return __kvm_arm_vcpu_set_events(vcpu, events);
1189}
1190
1191long kvm_arch_vcpu_ioctl(struct file *filp,
1192 unsigned int ioctl, unsigned long arg)
1193{
1194 struct kvm_vcpu *vcpu = filp->private_data;
1195 void __user *argp = (void __user *)arg;
1196 struct kvm_device_attr attr;
1197 long r;
1198
1199 switch (ioctl) {
1200 case KVM_ARM_VCPU_INIT: {
1201 struct kvm_vcpu_init init;
1202
1203 r = -EFAULT;
1204 if (copy_from_user(&init, argp, sizeof(init)))
1205 break;
1206
1207 r = kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init);
1208 break;
1209 }
1210 case KVM_SET_ONE_REG:
1211 case KVM_GET_ONE_REG: {
1212 struct kvm_one_reg reg;
1213
1214 r = -ENOEXEC;
1215 if (unlikely(!kvm_vcpu_initialized(vcpu)))
1216 break;
1217
1218 r = -EFAULT;
1219 if (copy_from_user(®, argp, sizeof(reg)))
1220 break;
1221
1222 if (ioctl == KVM_SET_ONE_REG)
1223 r = kvm_arm_set_reg(vcpu, ®);
1224 else
1225 r = kvm_arm_get_reg(vcpu, ®);
1226 break;
1227 }
1228 case KVM_GET_REG_LIST: {
1229 struct kvm_reg_list __user *user_list = argp;
1230 struct kvm_reg_list reg_list;
1231 unsigned n;
1232
1233 r = -ENOEXEC;
1234 if (unlikely(!kvm_vcpu_initialized(vcpu)))
1235 break;
1236
1237 r = -EPERM;
1238 if (!kvm_arm_vcpu_is_finalized(vcpu))
1239 break;
1240
1241 r = -EFAULT;
1242 if (copy_from_user(®_list, user_list, sizeof(reg_list)))
1243 break;
1244 n = reg_list.n;
1245 reg_list.n = kvm_arm_num_regs(vcpu);
1246 if (copy_to_user(user_list, ®_list, sizeof(reg_list)))
1247 break;
1248 r = -E2BIG;
1249 if (n < reg_list.n)
1250 break;
1251 r = kvm_arm_copy_reg_indices(vcpu, user_list->reg);
1252 break;
1253 }
1254 case KVM_SET_DEVICE_ATTR: {
1255 r = -EFAULT;
1256 if (copy_from_user(&attr, argp, sizeof(attr)))
1257 break;
1258 r = kvm_arm_vcpu_set_attr(vcpu, &attr);
1259 break;
1260 }
1261 case KVM_GET_DEVICE_ATTR: {
1262 r = -EFAULT;
1263 if (copy_from_user(&attr, argp, sizeof(attr)))
1264 break;
1265 r = kvm_arm_vcpu_get_attr(vcpu, &attr);
1266 break;
1267 }
1268 case KVM_HAS_DEVICE_ATTR: {
1269 r = -EFAULT;
1270 if (copy_from_user(&attr, argp, sizeof(attr)))
1271 break;
1272 r = kvm_arm_vcpu_has_attr(vcpu, &attr);
1273 break;
1274 }
1275 case KVM_GET_VCPU_EVENTS: {
1276 struct kvm_vcpu_events events;
1277
1278 if (kvm_arm_vcpu_get_events(vcpu, &events))
1279 return -EINVAL;
1280
1281 if (copy_to_user(argp, &events, sizeof(events)))
1282 return -EFAULT;
1283
1284 return 0;
1285 }
1286 case KVM_SET_VCPU_EVENTS: {
1287 struct kvm_vcpu_events events;
1288
1289 if (copy_from_user(&events, argp, sizeof(events)))
1290 return -EFAULT;
1291
1292 return kvm_arm_vcpu_set_events(vcpu, &events);
1293 }
1294 case KVM_ARM_VCPU_FINALIZE: {
1295 int what;
1296
1297 if (!kvm_vcpu_initialized(vcpu))
1298 return -ENOEXEC;
1299
1300 if (get_user(what, (const int __user *)argp))
1301 return -EFAULT;
1302
1303 return kvm_arm_vcpu_finalize(vcpu, what);
1304 }
1305 default:
1306 r = -EINVAL;
1307 }
1308
1309 return r;
1310}
1311
1312void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
1313{
1314
1315}
1316
1317void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
1318 const struct kvm_memory_slot *memslot)
1319{
1320 kvm_flush_remote_tlbs(kvm);
1321}
1322
1323static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
1324 struct kvm_arm_device_addr *dev_addr)
1325{
1326 unsigned long dev_id, type;
1327
1328 dev_id = (dev_addr->id & KVM_ARM_DEVICE_ID_MASK) >>
1329 KVM_ARM_DEVICE_ID_SHIFT;
1330 type = (dev_addr->id & KVM_ARM_DEVICE_TYPE_MASK) >>
1331 KVM_ARM_DEVICE_TYPE_SHIFT;
1332
1333 switch (dev_id) {
1334 case KVM_ARM_DEVICE_VGIC_V2:
1335 if (!vgic_present)
1336 return -ENXIO;
1337 return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
1338 default:
1339 return -ENODEV;
1340 }
1341}
1342
1343long kvm_arch_vm_ioctl(struct file *filp,
1344 unsigned int ioctl, unsigned long arg)
1345{
1346 struct kvm *kvm = filp->private_data;
1347 void __user *argp = (void __user *)arg;
1348
1349 switch (ioctl) {
1350 case KVM_CREATE_IRQCHIP: {
1351 int ret;
1352 if (!vgic_present)
1353 return -ENXIO;
1354 mutex_lock(&kvm->lock);
1355 ret = kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
1356 mutex_unlock(&kvm->lock);
1357 return ret;
1358 }
1359 case KVM_ARM_SET_DEVICE_ADDR: {
1360 struct kvm_arm_device_addr dev_addr;
1361
1362 if (copy_from_user(&dev_addr, argp, sizeof(dev_addr)))
1363 return -EFAULT;
1364 return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
1365 }
1366 case KVM_ARM_PREFERRED_TARGET: {
1367 int err;
1368 struct kvm_vcpu_init init;
1369
1370 err = kvm_vcpu_preferred_target(&init);
1371 if (err)
1372 return err;
1373
1374 if (copy_to_user(argp, &init, sizeof(init)))
1375 return -EFAULT;
1376
1377 return 0;
1378 }
1379 case KVM_ARM_MTE_COPY_TAGS: {
1380 struct kvm_arm_copy_mte_tags copy_tags;
1381
1382 if (copy_from_user(©_tags, argp, sizeof(copy_tags)))
1383 return -EFAULT;
1384 return kvm_vm_ioctl_mte_copy_tags(kvm, ©_tags);
1385 }
1386 default:
1387 return -EINVAL;
1388 }
1389}
1390
1391static unsigned long nvhe_percpu_size(void)
1392{
1393 return (unsigned long)CHOOSE_NVHE_SYM(__per_cpu_end) -
1394 (unsigned long)CHOOSE_NVHE_SYM(__per_cpu_start);
1395}
1396
1397static unsigned long nvhe_percpu_order(void)
1398{
1399 unsigned long size = nvhe_percpu_size();
1400
1401 return size ? get_order(size) : 0;
1402}
1403
1404
1405static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS];
1406
1407static void kvm_init_vector_slot(void *base, enum arm64_hyp_spectre_vector slot)
1408{
1409 hyp_spectre_vector_selector[slot] = __kvm_vector_slot2addr(base, slot);
1410}
1411
1412static int kvm_init_vector_slots(void)
1413{
1414 int err;
1415 void *base;
1416
1417 base = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector));
1418 kvm_init_vector_slot(base, HYP_VECTOR_DIRECT);
1419
1420 base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs));
1421 kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT);
1422
1423 if (!cpus_have_const_cap(ARM64_SPECTRE_V3A))
1424 return 0;
1425
1426 if (!has_vhe()) {
1427 err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs),
1428 __BP_HARDEN_HYP_VECS_SZ, &base);
1429 if (err)
1430 return err;
1431 }
1432
1433 kvm_init_vector_slot(base, HYP_VECTOR_INDIRECT);
1434 kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_INDIRECT);
1435 return 0;
1436}
1437
1438static void cpu_prepare_hyp_mode(int cpu)
1439{
1440 struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
1441 unsigned long tcr;
1442
1443
1444
1445
1446
1447
1448
1449 params->tpidr_el2 = (unsigned long)kasan_reset_tag(per_cpu_ptr_nvhe_sym(__per_cpu_start, cpu)) -
1450 (unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start));
1451
1452 params->mair_el2 = read_sysreg(mair_el1);
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468 tcr = (read_sysreg(tcr_el1) & TCR_EL2_MASK) | TCR_EL2_RES1;
1469 tcr &= ~TCR_T0SZ_MASK;
1470 tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET;
1471 params->tcr_el2 = tcr;
1472
1473 params->stack_hyp_va = kern_hyp_va(per_cpu(kvm_arm_hyp_stack_page, cpu) + PAGE_SIZE);
1474 params->pgd_pa = kvm_mmu_get_httbr();
1475 if (is_protected_kvm_enabled())
1476 params->hcr_el2 = HCR_HOST_NVHE_PROTECTED_FLAGS;
1477 else
1478 params->hcr_el2 = HCR_HOST_NVHE_FLAGS;
1479 params->vttbr = params->vtcr = 0;
1480
1481
1482
1483
1484
1485 kvm_flush_dcache_to_poc(params, sizeof(*params));
1486}
1487
1488static void hyp_install_host_vector(void)
1489{
1490 struct kvm_nvhe_init_params *params;
1491 struct arm_smccc_res res;
1492
1493
1494 __hyp_set_vectors(kvm_get_idmap_vector());
1495
1496
1497
1498
1499
1500
1501
1502 BUG_ON(!system_capabilities_finalized());
1503 params = this_cpu_ptr_nvhe_sym(kvm_init_params);
1504 arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init), virt_to_phys(params), &res);
1505 WARN_ON(res.a0 != SMCCC_RET_SUCCESS);
1506}
1507
1508static void cpu_init_hyp_mode(void)
1509{
1510 hyp_install_host_vector();
1511
1512
1513
1514
1515
1516 if (this_cpu_has_cap(ARM64_SSBS) &&
1517 arm64_get_spectre_v4_state() == SPECTRE_VULNERABLE) {
1518 kvm_call_hyp_nvhe(__kvm_enable_ssbs);
1519 }
1520}
1521
1522static void cpu_hyp_reset(void)
1523{
1524 if (!is_kernel_in_hyp_mode())
1525 __hyp_reset_vectors();
1526}
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548static void cpu_set_hyp_vector(void)
1549{
1550 struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data);
1551 void *vector = hyp_spectre_vector_selector[data->slot];
1552
1553 if (!is_protected_kvm_enabled())
1554 *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)vector;
1555 else
1556 kvm_call_hyp_nvhe(__pkvm_cpu_set_vector, data->slot);
1557}
1558
1559static void cpu_hyp_reinit(void)
1560{
1561 kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt);
1562
1563 cpu_hyp_reset();
1564
1565 if (is_kernel_in_hyp_mode())
1566 kvm_timer_init_vhe();
1567 else
1568 cpu_init_hyp_mode();
1569
1570 cpu_set_hyp_vector();
1571
1572 kvm_arm_init_debug();
1573
1574 if (vgic_present)
1575 kvm_vgic_init_cpu_hardware();
1576}
1577
1578static void _kvm_arch_hardware_enable(void *discard)
1579{
1580 if (!__this_cpu_read(kvm_arm_hardware_enabled)) {
1581 cpu_hyp_reinit();
1582 __this_cpu_write(kvm_arm_hardware_enabled, 1);
1583 }
1584}
1585
1586int kvm_arch_hardware_enable(void)
1587{
1588 _kvm_arch_hardware_enable(NULL);
1589 return 0;
1590}
1591
1592static void _kvm_arch_hardware_disable(void *discard)
1593{
1594 if (__this_cpu_read(kvm_arm_hardware_enabled)) {
1595 cpu_hyp_reset();
1596 __this_cpu_write(kvm_arm_hardware_enabled, 0);
1597 }
1598}
1599
1600void kvm_arch_hardware_disable(void)
1601{
1602 if (!is_protected_kvm_enabled())
1603 _kvm_arch_hardware_disable(NULL);
1604}
1605
1606#ifdef CONFIG_CPU_PM
1607static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
1608 unsigned long cmd,
1609 void *v)
1610{
1611
1612
1613
1614
1615
1616 switch (cmd) {
1617 case CPU_PM_ENTER:
1618 if (__this_cpu_read(kvm_arm_hardware_enabled))
1619
1620
1621
1622
1623
1624 cpu_hyp_reset();
1625
1626 return NOTIFY_OK;
1627 case CPU_PM_ENTER_FAILED:
1628 case CPU_PM_EXIT:
1629 if (__this_cpu_read(kvm_arm_hardware_enabled))
1630
1631 cpu_hyp_reinit();
1632
1633 return NOTIFY_OK;
1634
1635 default:
1636 return NOTIFY_DONE;
1637 }
1638}
1639
1640static struct notifier_block hyp_init_cpu_pm_nb = {
1641 .notifier_call = hyp_init_cpu_pm_notifier,
1642};
1643
1644static void hyp_cpu_pm_init(void)
1645{
1646 if (!is_protected_kvm_enabled())
1647 cpu_pm_register_notifier(&hyp_init_cpu_pm_nb);
1648}
1649static void hyp_cpu_pm_exit(void)
1650{
1651 if (!is_protected_kvm_enabled())
1652 cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb);
1653}
1654#else
1655static inline void hyp_cpu_pm_init(void)
1656{
1657}
1658static inline void hyp_cpu_pm_exit(void)
1659{
1660}
1661#endif
1662
1663static void init_cpu_logical_map(void)
1664{
1665 unsigned int cpu;
1666
1667
1668
1669
1670
1671
1672
1673 for_each_online_cpu(cpu)
1674 hyp_cpu_logical_map[cpu] = cpu_logical_map(cpu);
1675}
1676
1677#define init_psci_0_1_impl_state(config, what) \
1678 config.psci_0_1_ ## what ## _implemented = psci_ops.what
1679
1680static bool init_psci_relay(void)
1681{
1682
1683
1684
1685
1686 if (!psci_ops.get_version) {
1687 kvm_err("Cannot initialize protected mode without PSCI\n");
1688 return false;
1689 }
1690
1691 kvm_host_psci_config.version = psci_ops.get_version();
1692
1693 if (kvm_host_psci_config.version == PSCI_VERSION(0, 1)) {
1694 kvm_host_psci_config.function_ids_0_1 = get_psci_0_1_function_ids();
1695 init_psci_0_1_impl_state(kvm_host_psci_config, cpu_suspend);
1696 init_psci_0_1_impl_state(kvm_host_psci_config, cpu_on);
1697 init_psci_0_1_impl_state(kvm_host_psci_config, cpu_off);
1698 init_psci_0_1_impl_state(kvm_host_psci_config, migrate);
1699 }
1700 return true;
1701}
1702
1703static int init_common_resources(void)
1704{
1705 return kvm_set_ipa_limit();
1706}
1707
1708static int init_subsystems(void)
1709{
1710 int err = 0;
1711
1712
1713
1714
1715 on_each_cpu(_kvm_arch_hardware_enable, NULL, 1);
1716
1717
1718
1719
1720 hyp_cpu_pm_init();
1721
1722
1723
1724
1725 err = kvm_vgic_hyp_init();
1726 switch (err) {
1727 case 0:
1728 vgic_present = true;
1729 break;
1730 case -ENODEV:
1731 case -ENXIO:
1732 vgic_present = false;
1733 err = 0;
1734 break;
1735 default:
1736 goto out;
1737 }
1738
1739
1740
1741
1742 err = kvm_timer_hyp_init(vgic_present);
1743 if (err)
1744 goto out;
1745
1746 kvm_perf_init();
1747 kvm_sys_reg_table_init();
1748
1749out:
1750 if (err || !is_protected_kvm_enabled())
1751 on_each_cpu(_kvm_arch_hardware_disable, NULL, 1);
1752
1753 return err;
1754}
1755
1756static void teardown_hyp_mode(void)
1757{
1758 int cpu;
1759
1760 free_hyp_pgds();
1761 for_each_possible_cpu(cpu) {
1762 free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
1763 free_pages(kvm_arm_hyp_percpu_base[cpu], nvhe_percpu_order());
1764 }
1765}
1766
1767static int do_pkvm_init(u32 hyp_va_bits)
1768{
1769 void *per_cpu_base = kvm_ksym_ref(kvm_arm_hyp_percpu_base);
1770 int ret;
1771
1772 preempt_disable();
1773 hyp_install_host_vector();
1774 ret = kvm_call_hyp_nvhe(__pkvm_init, hyp_mem_base, hyp_mem_size,
1775 num_possible_cpus(), kern_hyp_va(per_cpu_base),
1776 hyp_va_bits);
1777 preempt_enable();
1778
1779 return ret;
1780}
1781
1782static int kvm_hyp_init_protection(u32 hyp_va_bits)
1783{
1784 void *addr = phys_to_virt(hyp_mem_base);
1785 int ret;
1786
1787 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
1788 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
1789
1790 ret = create_hyp_mappings(addr, addr + hyp_mem_size, PAGE_HYP);
1791 if (ret)
1792 return ret;
1793
1794 ret = do_pkvm_init(hyp_va_bits);
1795 if (ret)
1796 return ret;
1797
1798 free_hyp_pgds();
1799
1800 return 0;
1801}
1802
1803
1804
1805
1806static int init_hyp_mode(void)
1807{
1808 u32 hyp_va_bits;
1809 int cpu;
1810 int err = -ENOMEM;
1811
1812
1813
1814
1815
1816 if (is_protected_kvm_enabled() && !hyp_mem_base)
1817 goto out_err;
1818
1819
1820
1821
1822 err = kvm_mmu_init(&hyp_va_bits);
1823 if (err)
1824 goto out_err;
1825
1826
1827
1828
1829 for_each_possible_cpu(cpu) {
1830 unsigned long stack_page;
1831
1832 stack_page = __get_free_page(GFP_KERNEL);
1833 if (!stack_page) {
1834 err = -ENOMEM;
1835 goto out_err;
1836 }
1837
1838 per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
1839 }
1840
1841
1842
1843
1844 for_each_possible_cpu(cpu) {
1845 struct page *page;
1846 void *page_addr;
1847
1848 page = alloc_pages(GFP_KERNEL, nvhe_percpu_order());
1849 if (!page) {
1850 err = -ENOMEM;
1851 goto out_err;
1852 }
1853
1854 page_addr = page_address(page);
1855 memcpy(page_addr, CHOOSE_NVHE_SYM(__per_cpu_start), nvhe_percpu_size());
1856 kvm_arm_hyp_percpu_base[cpu] = (unsigned long)page_addr;
1857 }
1858
1859
1860
1861
1862 err = create_hyp_mappings(kvm_ksym_ref(__hyp_text_start),
1863 kvm_ksym_ref(__hyp_text_end), PAGE_HYP_EXEC);
1864 if (err) {
1865 kvm_err("Cannot map world-switch code\n");
1866 goto out_err;
1867 }
1868
1869 err = create_hyp_mappings(kvm_ksym_ref(__hyp_rodata_start),
1870 kvm_ksym_ref(__hyp_rodata_end), PAGE_HYP_RO);
1871 if (err) {
1872 kvm_err("Cannot map .hyp.rodata section\n");
1873 goto out_err;
1874 }
1875
1876 err = create_hyp_mappings(kvm_ksym_ref(__start_rodata),
1877 kvm_ksym_ref(__end_rodata), PAGE_HYP_RO);
1878 if (err) {
1879 kvm_err("Cannot map rodata section\n");
1880 goto out_err;
1881 }
1882
1883
1884
1885
1886
1887
1888 err = create_hyp_mappings(kvm_ksym_ref(__hyp_bss_start),
1889 kvm_ksym_ref(__hyp_bss_end), PAGE_HYP);
1890 if (err) {
1891 kvm_err("Cannot map hyp bss section: %d\n", err);
1892 goto out_err;
1893 }
1894
1895 err = create_hyp_mappings(kvm_ksym_ref(__hyp_bss_end),
1896 kvm_ksym_ref(__bss_stop), PAGE_HYP_RO);
1897 if (err) {
1898 kvm_err("Cannot map bss section\n");
1899 goto out_err;
1900 }
1901
1902
1903
1904
1905 for_each_possible_cpu(cpu) {
1906 char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
1907 err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE,
1908 PAGE_HYP);
1909
1910 if (err) {
1911 kvm_err("Cannot map hyp stack\n");
1912 goto out_err;
1913 }
1914 }
1915
1916 for_each_possible_cpu(cpu) {
1917 char *percpu_begin = (char *)kvm_arm_hyp_percpu_base[cpu];
1918 char *percpu_end = percpu_begin + nvhe_percpu_size();
1919
1920
1921 err = create_hyp_mappings(percpu_begin, percpu_end, PAGE_HYP);
1922 if (err) {
1923 kvm_err("Cannot map hyp percpu region\n");
1924 goto out_err;
1925 }
1926
1927
1928 cpu_prepare_hyp_mode(cpu);
1929 }
1930
1931 if (is_protected_kvm_enabled()) {
1932 init_cpu_logical_map();
1933
1934 if (!init_psci_relay()) {
1935 err = -ENODEV;
1936 goto out_err;
1937 }
1938 }
1939
1940 if (is_protected_kvm_enabled()) {
1941 err = kvm_hyp_init_protection(hyp_va_bits);
1942 if (err) {
1943 kvm_err("Failed to init hyp memory protection\n");
1944 goto out_err;
1945 }
1946 }
1947
1948 return 0;
1949
1950out_err:
1951 teardown_hyp_mode();
1952 kvm_err("error initializing Hyp mode: %d\n", err);
1953 return err;
1954}
1955
1956static void _kvm_host_prot_finalize(void *discard)
1957{
1958 WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize));
1959}
1960
1961static inline int pkvm_mark_hyp(phys_addr_t start, phys_addr_t end)
1962{
1963 return kvm_call_hyp_nvhe(__pkvm_mark_hyp, start, end);
1964}
1965
1966#define pkvm_mark_hyp_section(__section) \
1967 pkvm_mark_hyp(__pa_symbol(__section##_start), \
1968 __pa_symbol(__section##_end))
1969
1970static int finalize_hyp_mode(void)
1971{
1972 int cpu, ret;
1973
1974 if (!is_protected_kvm_enabled())
1975 return 0;
1976
1977 ret = pkvm_mark_hyp_section(__hyp_idmap_text);
1978 if (ret)
1979 return ret;
1980
1981 ret = pkvm_mark_hyp_section(__hyp_text);
1982 if (ret)
1983 return ret;
1984
1985 ret = pkvm_mark_hyp_section(__hyp_rodata);
1986 if (ret)
1987 return ret;
1988
1989 ret = pkvm_mark_hyp_section(__hyp_bss);
1990 if (ret)
1991 return ret;
1992
1993 ret = pkvm_mark_hyp(hyp_mem_base, hyp_mem_base + hyp_mem_size);
1994 if (ret)
1995 return ret;
1996
1997 for_each_possible_cpu(cpu) {
1998 phys_addr_t start = virt_to_phys((void *)kvm_arm_hyp_percpu_base[cpu]);
1999 phys_addr_t end = start + (PAGE_SIZE << nvhe_percpu_order());
2000
2001 ret = pkvm_mark_hyp(start, end);
2002 if (ret)
2003 return ret;
2004
2005 start = virt_to_phys((void *)per_cpu(kvm_arm_hyp_stack_page, cpu));
2006 end = start + PAGE_SIZE;
2007 ret = pkvm_mark_hyp(start, end);
2008 if (ret)
2009 return ret;
2010 }
2011
2012
2013
2014
2015
2016 static_branch_enable(&kvm_protected_mode_initialized);
2017 on_each_cpu(_kvm_host_prot_finalize, NULL, 1);
2018
2019 return 0;
2020}
2021
2022static void check_kvm_target_cpu(void *ret)
2023{
2024 *(int *)ret = kvm_target_cpu();
2025}
2026
2027struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
2028{
2029 struct kvm_vcpu *vcpu;
2030 int i;
2031
2032 mpidr &= MPIDR_HWID_BITMASK;
2033 kvm_for_each_vcpu(i, vcpu, kvm) {
2034 if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu))
2035 return vcpu;
2036 }
2037 return NULL;
2038}
2039
2040bool kvm_arch_has_irq_bypass(void)
2041{
2042 return true;
2043}
2044
2045int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
2046 struct irq_bypass_producer *prod)
2047{
2048 struct kvm_kernel_irqfd *irqfd =
2049 container_of(cons, struct kvm_kernel_irqfd, consumer);
2050
2051 return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq,
2052 &irqfd->irq_entry);
2053}
2054void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
2055 struct irq_bypass_producer *prod)
2056{
2057 struct kvm_kernel_irqfd *irqfd =
2058 container_of(cons, struct kvm_kernel_irqfd, consumer);
2059
2060 kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq,
2061 &irqfd->irq_entry);
2062}
2063
2064void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons)
2065{
2066 struct kvm_kernel_irqfd *irqfd =
2067 container_of(cons, struct kvm_kernel_irqfd, consumer);
2068
2069 kvm_arm_halt_guest(irqfd->kvm);
2070}
2071
2072void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons)
2073{
2074 struct kvm_kernel_irqfd *irqfd =
2075 container_of(cons, struct kvm_kernel_irqfd, consumer);
2076
2077 kvm_arm_resume_guest(irqfd->kvm);
2078}
2079
2080
2081
2082
2083int kvm_arch_init(void *opaque)
2084{
2085 int err;
2086 int ret, cpu;
2087 bool in_hyp_mode;
2088
2089 if (!is_hyp_mode_available()) {
2090 kvm_info("HYP mode not available\n");
2091 return -ENODEV;
2092 }
2093
2094 in_hyp_mode = is_kernel_in_hyp_mode();
2095
2096 if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) ||
2097 cpus_have_final_cap(ARM64_WORKAROUND_1508412))
2098 kvm_info("Guests without required CPU erratum workarounds can deadlock system!\n" \
2099 "Only trusted guests should be used on this system.\n");
2100
2101 for_each_online_cpu(cpu) {
2102 smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1);
2103 if (ret < 0) {
2104 kvm_err("Error, CPU %d not supported!\n", cpu);
2105 return -ENODEV;
2106 }
2107 }
2108
2109 err = init_common_resources();
2110 if (err)
2111 return err;
2112
2113 err = kvm_arm_init_sve();
2114 if (err)
2115 return err;
2116
2117 if (!in_hyp_mode) {
2118 err = init_hyp_mode();
2119 if (err)
2120 goto out_err;
2121 }
2122
2123 err = kvm_init_vector_slots();
2124 if (err) {
2125 kvm_err("Cannot initialise vector slots\n");
2126 goto out_err;
2127 }
2128
2129 err = init_subsystems();
2130 if (err)
2131 goto out_hyp;
2132
2133 if (!in_hyp_mode) {
2134 err = finalize_hyp_mode();
2135 if (err) {
2136 kvm_err("Failed to finalize Hyp protection\n");
2137 goto out_hyp;
2138 }
2139 }
2140
2141 if (is_protected_kvm_enabled()) {
2142 kvm_info("Protected nVHE mode initialized successfully\n");
2143 } else if (in_hyp_mode) {
2144 kvm_info("VHE mode initialized successfully\n");
2145 } else {
2146 kvm_info("Hyp mode initialized successfully\n");
2147 }
2148
2149 return 0;
2150
2151out_hyp:
2152 hyp_cpu_pm_exit();
2153 if (!in_hyp_mode)
2154 teardown_hyp_mode();
2155out_err:
2156 return err;
2157}
2158
2159
2160void kvm_arch_exit(void)
2161{
2162 kvm_perf_teardown();
2163}
2164
2165static int __init early_kvm_mode_cfg(char *arg)
2166{
2167 if (!arg)
2168 return -EINVAL;
2169
2170 if (strcmp(arg, "protected") == 0) {
2171 kvm_mode = KVM_MODE_PROTECTED;
2172 return 0;
2173 }
2174
2175 if (strcmp(arg, "nvhe") == 0 && !WARN_ON(is_kernel_in_hyp_mode()))
2176 return 0;
2177
2178 return -EINVAL;
2179}
2180early_param("kvm-arm.mode", early_kvm_mode_cfg);
2181
2182enum kvm_mode kvm_get_mode(void)
2183{
2184 return kvm_mode;
2185}
2186
2187static int arm_init(void)
2188{
2189 int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2190 return rc;
2191}
2192
2193module_init(arm_init);
2194