1
2
3
4
5
6
7#include <linux/bug.h>
8#include <linux/cpu_pm.h>
9#include <linux/entry-kvm.h>
10#include <linux/errno.h>
11#include <linux/err.h>
12#include <linux/kvm_host.h>
13#include <linux/list.h>
14#include <linux/module.h>
15#include <linux/vmalloc.h>
16#include <linux/fs.h>
17#include <linux/mman.h>
18#include <linux/sched.h>
19#include <linux/kmemleak.h>
20#include <linux/kvm.h>
21#include <linux/kvm_irqfd.h>
22#include <linux/irqbypass.h>
23#include <linux/sched/stat.h>
24#include <linux/psci.h>
25#include <trace/events/kvm.h>
26
27#define CREATE_TRACE_POINTS
28#include "trace_arm.h"
29
30#include <linux/uaccess.h>
31#include <asm/ptrace.h>
32#include <asm/mman.h>
33#include <asm/tlbflush.h>
34#include <asm/cacheflush.h>
35#include <asm/cpufeature.h>
36#include <asm/virt.h>
37#include <asm/kvm_arm.h>
38#include <asm/kvm_asm.h>
39#include <asm/kvm_mmu.h>
40#include <asm/kvm_emulate.h>
41#include <asm/sections.h>
42
43#include <kvm/arm_hypercalls.h>
44#include <kvm/arm_pmu.h>
45#include <kvm/arm_psci.h>
46
47static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
48DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
49
50DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
51
52static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
53unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
54DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
55
56static bool vgic_present;
57
58static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
59DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
60
61int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
62{
63 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
64}
65
66int kvm_arch_hardware_setup(void *opaque)
67{
68 return 0;
69}
70
71int kvm_arch_check_processor_compat(void *opaque)
72{
73 return 0;
74}
75
76int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
77 struct kvm_enable_cap *cap)
78{
79 int r;
80
81 if (cap->flags)
82 return -EINVAL;
83
84 switch (cap->cap) {
85 case KVM_CAP_ARM_NISV_TO_USER:
86 r = 0;
87 set_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER,
88 &kvm->arch.flags);
89 break;
90 case KVM_CAP_ARM_MTE:
91 mutex_lock(&kvm->lock);
92 if (!system_supports_mte() || kvm->created_vcpus) {
93 r = -EINVAL;
94 } else {
95 r = 0;
96 set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags);
97 }
98 mutex_unlock(&kvm->lock);
99 break;
100 case KVM_CAP_ARM_SYSTEM_SUSPEND:
101 r = 0;
102 set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags);
103 break;
104 default:
105 r = -EINVAL;
106 break;
107 }
108
109 return r;
110}
111
112static int kvm_arm_default_max_vcpus(void)
113{
114 return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
115}
116
117static void set_default_spectre(struct kvm *kvm)
118{
119
120
121
122
123
124
125
126
127 if (arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED)
128 kvm->arch.pfr0_csv2 = 1;
129 if (arm64_get_meltdown_state() == SPECTRE_UNAFFECTED)
130 kvm->arch.pfr0_csv3 = 1;
131}
132
133
134
135
136
137int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
138{
139 int ret;
140
141 ret = kvm_arm_setup_stage2(kvm, type);
142 if (ret)
143 return ret;
144
145 ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu);
146 if (ret)
147 return ret;
148
149 ret = kvm_share_hyp(kvm, kvm + 1);
150 if (ret)
151 goto out_free_stage2_pgd;
152
153 if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) {
154 ret = -ENOMEM;
155 goto out_free_stage2_pgd;
156 }
157 cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask);
158
159 kvm_vgic_early_init(kvm);
160
161
162 kvm->max_vcpus = kvm_arm_default_max_vcpus();
163
164 set_default_spectre(kvm);
165 kvm_arm_init_hypercalls(kvm);
166
167 return ret;
168out_free_stage2_pgd:
169 kvm_free_stage2_pgd(&kvm->arch.mmu);
170 return ret;
171}
172
173vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
174{
175 return VM_FAULT_SIGBUS;
176}
177
178
179
180
181
182
183void kvm_arch_destroy_vm(struct kvm *kvm)
184{
185 bitmap_free(kvm->arch.pmu_filter);
186 free_cpumask_var(kvm->arch.supported_cpus);
187
188 kvm_vgic_destroy(kvm);
189
190 kvm_destroy_vcpus(kvm);
191
192 kvm_unshare_hyp(kvm, kvm + 1);
193}
194
195int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
196{
197 int r;
198 switch (ext) {
199 case KVM_CAP_IRQCHIP:
200 r = vgic_present;
201 break;
202 case KVM_CAP_IOEVENTFD:
203 case KVM_CAP_DEVICE_CTRL:
204 case KVM_CAP_USER_MEMORY:
205 case KVM_CAP_SYNC_MMU:
206 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
207 case KVM_CAP_ONE_REG:
208 case KVM_CAP_ARM_PSCI:
209 case KVM_CAP_ARM_PSCI_0_2:
210 case KVM_CAP_READONLY_MEM:
211 case KVM_CAP_MP_STATE:
212 case KVM_CAP_IMMEDIATE_EXIT:
213 case KVM_CAP_VCPU_EVENTS:
214 case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2:
215 case KVM_CAP_ARM_NISV_TO_USER:
216 case KVM_CAP_ARM_INJECT_EXT_DABT:
217 case KVM_CAP_SET_GUEST_DEBUG:
218 case KVM_CAP_VCPU_ATTRIBUTES:
219 case KVM_CAP_PTP_KVM:
220 case KVM_CAP_ARM_SYSTEM_SUSPEND:
221 r = 1;
222 break;
223 case KVM_CAP_SET_GUEST_DEBUG2:
224 return KVM_GUESTDBG_VALID_MASK;
225 case KVM_CAP_ARM_SET_DEVICE_ADDR:
226 r = 1;
227 break;
228 case KVM_CAP_NR_VCPUS:
229
230
231
232
233
234
235 r = min_t(unsigned int, num_online_cpus(),
236 kvm_arm_default_max_vcpus());
237 break;
238 case KVM_CAP_MAX_VCPUS:
239 case KVM_CAP_MAX_VCPU_ID:
240 if (kvm)
241 r = kvm->max_vcpus;
242 else
243 r = kvm_arm_default_max_vcpus();
244 break;
245 case KVM_CAP_MSI_DEVID:
246 if (!kvm)
247 r = -EINVAL;
248 else
249 r = kvm->arch.vgic.msis_require_devid;
250 break;
251 case KVM_CAP_ARM_USER_IRQ:
252
253
254
255
256 r = 1;
257 break;
258 case KVM_CAP_ARM_MTE:
259 r = system_supports_mte();
260 break;
261 case KVM_CAP_STEAL_TIME:
262 r = kvm_arm_pvtime_supported();
263 break;
264 case KVM_CAP_ARM_EL1_32BIT:
265 r = cpus_have_const_cap(ARM64_HAS_32BIT_EL1);
266 break;
267 case KVM_CAP_GUEST_DEBUG_HW_BPS:
268 r = get_num_brps();
269 break;
270 case KVM_CAP_GUEST_DEBUG_HW_WPS:
271 r = get_num_wrps();
272 break;
273 case KVM_CAP_ARM_PMU_V3:
274 r = kvm_arm_support_pmu_v3();
275 break;
276 case KVM_CAP_ARM_INJECT_SERROR_ESR:
277 r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN);
278 break;
279 case KVM_CAP_ARM_VM_IPA_SIZE:
280 r = get_kvm_ipa_limit();
281 break;
282 case KVM_CAP_ARM_SVE:
283 r = system_supports_sve();
284 break;
285 case KVM_CAP_ARM_PTRAUTH_ADDRESS:
286 case KVM_CAP_ARM_PTRAUTH_GENERIC:
287 r = system_has_full_ptr_auth();
288 break;
289 default:
290 r = 0;
291 }
292
293 return r;
294}
295
296long kvm_arch_dev_ioctl(struct file *filp,
297 unsigned int ioctl, unsigned long arg)
298{
299 return -EINVAL;
300}
301
302struct kvm *kvm_arch_alloc_vm(void)
303{
304 size_t sz = sizeof(struct kvm);
305
306 if (!has_vhe())
307 return kzalloc(sz, GFP_KERNEL_ACCOUNT);
308
309 return __vmalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_HIGHMEM | __GFP_ZERO);
310}
311
312int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
313{
314 if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
315 return -EBUSY;
316
317 if (id >= kvm->max_vcpus)
318 return -EINVAL;
319
320 return 0;
321}
322
323int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
324{
325 int err;
326
327
328 vcpu->arch.target = -1;
329 bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
330
331 vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
332
333
334 kvm_timer_vcpu_init(vcpu);
335
336 kvm_pmu_vcpu_init(vcpu);
337
338 kvm_arm_reset_debug_ptr(vcpu);
339
340 kvm_arm_pvtime_vcpu_init(&vcpu->arch);
341
342 vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
343
344 err = kvm_vgic_vcpu_init(vcpu);
345 if (err)
346 return err;
347
348 return kvm_share_hyp(vcpu, vcpu + 1);
349}
350
351void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
352{
353}
354
355void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
356{
357 if (vcpu_has_run_once(vcpu) && unlikely(!irqchip_in_kernel(vcpu->kvm)))
358 static_branch_dec(&userspace_irqchip_in_use);
359
360 kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
361 kvm_timer_vcpu_terminate(vcpu);
362 kvm_pmu_vcpu_destroy(vcpu);
363
364 kvm_arm_vcpu_destroy(vcpu);
365}
366
367void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
368{
369
370}
371
372void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
373{
374
375}
376
377void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
378{
379 struct kvm_s2_mmu *mmu;
380 int *last_ran;
381
382 mmu = vcpu->arch.hw_mmu;
383 last_ran = this_cpu_ptr(mmu->last_vcpu_ran);
384
385
386
387
388
389
390
391
392
393
394 if (*last_ran != vcpu->vcpu_id) {
395 kvm_call_hyp(__kvm_flush_cpu_context, mmu);
396 *last_ran = vcpu->vcpu_id;
397 }
398
399 vcpu->cpu = cpu;
400
401 kvm_vgic_load(vcpu);
402 kvm_timer_vcpu_load(vcpu);
403 if (has_vhe())
404 kvm_vcpu_load_sysregs_vhe(vcpu);
405 kvm_arch_vcpu_load_fp(vcpu);
406 kvm_vcpu_pmu_restore_guest(vcpu);
407 if (kvm_arm_is_pvtime_enabled(&vcpu->arch))
408 kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu);
409
410 if (single_task_running())
411 vcpu_clear_wfx_traps(vcpu);
412 else
413 vcpu_set_wfx_traps(vcpu);
414
415 if (vcpu_has_ptrauth(vcpu))
416 vcpu_ptrauth_disable(vcpu);
417 kvm_arch_vcpu_load_debug_state_flags(vcpu);
418
419 if (!cpumask_test_cpu(smp_processor_id(), vcpu->kvm->arch.supported_cpus))
420 vcpu_set_on_unsupported_cpu(vcpu);
421}
422
423void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
424{
425 kvm_arch_vcpu_put_debug_state_flags(vcpu);
426 kvm_arch_vcpu_put_fp(vcpu);
427 if (has_vhe())
428 kvm_vcpu_put_sysregs_vhe(vcpu);
429 kvm_timer_vcpu_put(vcpu);
430 kvm_vgic_put(vcpu);
431 kvm_vcpu_pmu_restore_host(vcpu);
432 kvm_arm_vmid_clear_active();
433
434 vcpu_clear_on_unsupported_cpu(vcpu);
435 vcpu->cpu = -1;
436}
437
438void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu)
439{
440 vcpu->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED;
441 kvm_make_request(KVM_REQ_SLEEP, vcpu);
442 kvm_vcpu_kick(vcpu);
443}
444
445bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu)
446{
447 return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_STOPPED;
448}
449
450static void kvm_arm_vcpu_suspend(struct kvm_vcpu *vcpu)
451{
452 vcpu->arch.mp_state.mp_state = KVM_MP_STATE_SUSPENDED;
453 kvm_make_request(KVM_REQ_SUSPEND, vcpu);
454 kvm_vcpu_kick(vcpu);
455}
456
457static bool kvm_arm_vcpu_suspended(struct kvm_vcpu *vcpu)
458{
459 return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_SUSPENDED;
460}
461
462int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
463 struct kvm_mp_state *mp_state)
464{
465 *mp_state = vcpu->arch.mp_state;
466
467 return 0;
468}
469
470int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
471 struct kvm_mp_state *mp_state)
472{
473 int ret = 0;
474
475 switch (mp_state->mp_state) {
476 case KVM_MP_STATE_RUNNABLE:
477 vcpu->arch.mp_state = *mp_state;
478 break;
479 case KVM_MP_STATE_STOPPED:
480 kvm_arm_vcpu_power_off(vcpu);
481 break;
482 case KVM_MP_STATE_SUSPENDED:
483 kvm_arm_vcpu_suspend(vcpu);
484 break;
485 default:
486 ret = -EINVAL;
487 }
488
489 return ret;
490}
491
492
493
494
495
496
497
498
499int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
500{
501 bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF);
502 return ((irq_lines || kvm_vgic_vcpu_pending_irq(v))
503 && !kvm_arm_vcpu_stopped(v) && !v->arch.pause);
504}
505
506bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
507{
508 return vcpu_mode_priv(vcpu);
509}
510
511#ifdef CONFIG_GUEST_PERF_EVENTS
512unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
513{
514 return *vcpu_pc(vcpu);
515}
516#endif
517
518static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
519{
520 return vcpu->arch.target >= 0;
521}
522
523
524
525
526
527
528int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
529{
530 struct kvm *kvm = vcpu->kvm;
531 int ret;
532
533 if (!kvm_vcpu_initialized(vcpu))
534 return -ENOEXEC;
535
536 if (!kvm_arm_vcpu_is_finalized(vcpu))
537 return -EPERM;
538
539 ret = kvm_arch_vcpu_run_map_fp(vcpu);
540 if (ret)
541 return ret;
542
543 if (likely(vcpu_has_run_once(vcpu)))
544 return 0;
545
546 kvm_arm_vcpu_init_debug(vcpu);
547
548 if (likely(irqchip_in_kernel(kvm))) {
549
550
551
552
553 ret = kvm_vgic_map_resources(kvm);
554 if (ret)
555 return ret;
556 }
557
558 ret = kvm_timer_enable(vcpu);
559 if (ret)
560 return ret;
561
562 ret = kvm_arm_pmu_v3_enable(vcpu);
563 if (ret)
564 return ret;
565
566 if (!irqchip_in_kernel(kvm)) {
567
568
569
570
571 static_branch_inc(&userspace_irqchip_in_use);
572 }
573
574
575
576
577
578
579 if (kvm_vm_is_protected(kvm))
580 kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu);
581
582 mutex_lock(&kvm->lock);
583 set_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags);
584 mutex_unlock(&kvm->lock);
585
586 return ret;
587}
588
589bool kvm_arch_intc_initialized(struct kvm *kvm)
590{
591 return vgic_initialized(kvm);
592}
593
594void kvm_arm_halt_guest(struct kvm *kvm)
595{
596 unsigned long i;
597 struct kvm_vcpu *vcpu;
598
599 kvm_for_each_vcpu(i, vcpu, kvm)
600 vcpu->arch.pause = true;
601 kvm_make_all_cpus_request(kvm, KVM_REQ_SLEEP);
602}
603
604void kvm_arm_resume_guest(struct kvm *kvm)
605{
606 unsigned long i;
607 struct kvm_vcpu *vcpu;
608
609 kvm_for_each_vcpu(i, vcpu, kvm) {
610 vcpu->arch.pause = false;
611 __kvm_vcpu_wake_up(vcpu);
612 }
613}
614
615static void kvm_vcpu_sleep(struct kvm_vcpu *vcpu)
616{
617 struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
618
619 rcuwait_wait_event(wait,
620 (!kvm_arm_vcpu_stopped(vcpu)) && (!vcpu->arch.pause),
621 TASK_INTERRUPTIBLE);
622
623 if (kvm_arm_vcpu_stopped(vcpu) || vcpu->arch.pause) {
624
625 kvm_make_request(KVM_REQ_SLEEP, vcpu);
626 }
627
628
629
630
631
632
633 smp_rmb();
634}
635
636
637
638
639
640
641
642
643
644void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
645{
646
647
648
649
650
651
652
653
654
655
656 preempt_disable();
657 kvm_vgic_vmcr_sync(vcpu);
658 vgic_v4_put(vcpu, true);
659 preempt_enable();
660
661 kvm_vcpu_halt(vcpu);
662 vcpu->arch.flags &= ~KVM_ARM64_WFIT;
663 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
664
665 preempt_disable();
666 vgic_v4_load(vcpu);
667 preempt_enable();
668}
669
670static int kvm_vcpu_suspend(struct kvm_vcpu *vcpu)
671{
672 if (!kvm_arm_vcpu_suspended(vcpu))
673 return 1;
674
675 kvm_vcpu_wfi(vcpu);
676
677
678
679
680
681
682 kvm_make_request(KVM_REQ_SUSPEND, vcpu);
683
684
685
686
687
688 if (kvm_arch_vcpu_runnable(vcpu)) {
689 memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
690 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_WAKEUP;
691 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
692 return 0;
693 }
694
695
696
697
698
699
700 return 1;
701}
702
703
704
705
706
707
708
709
710
711
712static int check_vcpu_requests(struct kvm_vcpu *vcpu)
713{
714 if (kvm_request_pending(vcpu)) {
715 if (kvm_check_request(KVM_REQ_SLEEP, vcpu))
716 kvm_vcpu_sleep(vcpu);
717
718 if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
719 kvm_reset_vcpu(vcpu);
720
721
722
723
724
725 kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
726
727 if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu))
728 kvm_update_stolen_time(vcpu);
729
730 if (kvm_check_request(KVM_REQ_RELOAD_GICv4, vcpu)) {
731
732 preempt_disable();
733 vgic_v4_put(vcpu, false);
734 vgic_v4_load(vcpu);
735 preempt_enable();
736 }
737
738 if (kvm_check_request(KVM_REQ_RELOAD_PMU, vcpu))
739 kvm_pmu_handle_pmcr(vcpu,
740 __vcpu_sys_reg(vcpu, PMCR_EL0));
741
742 if (kvm_check_request(KVM_REQ_SUSPEND, vcpu))
743 return kvm_vcpu_suspend(vcpu);
744 }
745
746 return 1;
747}
748
749static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu)
750{
751 if (likely(!vcpu_mode_is_32bit(vcpu)))
752 return false;
753
754 return !system_supports_32bit_el0() ||
755 static_branch_unlikely(&arm64_mismatched_32bit_el0);
756}
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret)
773{
774 struct kvm_run *run = vcpu->run;
775
776
777
778
779
780
781
782
783 if (static_branch_unlikely(&userspace_irqchip_in_use)) {
784 if (kvm_timer_should_notify_user(vcpu) ||
785 kvm_pmu_should_notify_user(vcpu)) {
786 *ret = -EINTR;
787 run->exit_reason = KVM_EXIT_INTR;
788 return true;
789 }
790 }
791
792 if (unlikely(vcpu_on_unsupported_cpu(vcpu))) {
793 run->exit_reason = KVM_EXIT_FAIL_ENTRY;
794 run->fail_entry.hardware_entry_failure_reason = KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED;
795 run->fail_entry.cpu = smp_processor_id();
796 *ret = 0;
797 return true;
798 }
799
800 return kvm_request_pending(vcpu) ||
801 xfer_to_guest_mode_work_pending();
802}
803
804
805
806
807
808
809
810
811static int noinstr kvm_arm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
812{
813 int ret;
814
815 guest_state_enter_irqoff();
816 ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu);
817 guest_state_exit_irqoff();
818
819 return ret;
820}
821
822
823
824
825
826
827
828
829
830
831
832int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
833{
834 struct kvm_run *run = vcpu->run;
835 int ret;
836
837 if (run->exit_reason == KVM_EXIT_MMIO) {
838 ret = kvm_handle_mmio_return(vcpu);
839 if (ret)
840 return ret;
841 }
842
843 vcpu_load(vcpu);
844
845 if (run->immediate_exit) {
846 ret = -EINTR;
847 goto out;
848 }
849
850 kvm_sigset_activate(vcpu);
851
852 ret = 1;
853 run->exit_reason = KVM_EXIT_UNKNOWN;
854 run->flags = 0;
855 while (ret > 0) {
856
857
858
859 ret = xfer_to_guest_mode_handle_work(vcpu);
860 if (!ret)
861 ret = 1;
862
863 if (ret > 0)
864 ret = check_vcpu_requests(vcpu);
865
866
867
868
869
870
871 preempt_disable();
872
873
874
875
876
877
878
879
880 kvm_arm_vmid_update(&vcpu->arch.hw_mmu->vmid);
881
882 kvm_pmu_flush_hwstate(vcpu);
883
884 local_irq_disable();
885
886 kvm_vgic_flush_hwstate(vcpu);
887
888 kvm_pmu_update_vcpu_events(vcpu);
889
890
891
892
893
894
895
896 smp_store_mb(vcpu->mode, IN_GUEST_MODE);
897
898 if (ret <= 0 || kvm_vcpu_exit_request(vcpu, &ret)) {
899 vcpu->mode = OUTSIDE_GUEST_MODE;
900 isb();
901 kvm_pmu_sync_hwstate(vcpu);
902 if (static_branch_unlikely(&userspace_irqchip_in_use))
903 kvm_timer_sync_user(vcpu);
904 kvm_vgic_sync_hwstate(vcpu);
905 local_irq_enable();
906 preempt_enable();
907 continue;
908 }
909
910 kvm_arm_setup_debug(vcpu);
911 kvm_arch_vcpu_ctxflush_fp(vcpu);
912
913
914
915
916 trace_kvm_entry(*vcpu_pc(vcpu));
917 guest_timing_enter_irqoff();
918
919 ret = kvm_arm_vcpu_enter_exit(vcpu);
920
921 vcpu->mode = OUTSIDE_GUEST_MODE;
922 vcpu->stat.exits++;
923
924
925
926
927 kvm_arm_clear_debug(vcpu);
928
929
930
931
932
933
934 kvm_pmu_sync_hwstate(vcpu);
935
936
937
938
939
940
941 kvm_vgic_sync_hwstate(vcpu);
942
943
944
945
946
947
948 if (static_branch_unlikely(&userspace_irqchip_in_use))
949 kvm_timer_sync_user(vcpu);
950
951 kvm_arch_vcpu_ctxsync_fp(vcpu);
952
953
954
955
956
957
958
959
960
961
962
963 if (ARM_EXCEPTION_CODE(ret) == ARM_EXCEPTION_IRQ) {
964 local_irq_enable();
965 isb();
966 local_irq_disable();
967 }
968
969 guest_timing_exit_irqoff();
970
971 local_irq_enable();
972
973 trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
974
975
976 handle_exit_early(vcpu, ret);
977
978 preempt_enable();
979
980
981
982
983
984
985
986
987
988 if (vcpu_mode_is_bad_32bit(vcpu)) {
989
990
991
992
993
994
995 vcpu->arch.target = -1;
996 ret = ARM_EXCEPTION_IL;
997 }
998
999 ret = handle_exit(vcpu, ret);
1000 }
1001
1002
1003 if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
1004 kvm_timer_update_run(vcpu);
1005 kvm_pmu_update_run(vcpu);
1006 }
1007
1008 kvm_sigset_deactivate(vcpu);
1009
1010out:
1011
1012
1013
1014
1015
1016
1017
1018 if (unlikely(vcpu->arch.flags & (KVM_ARM64_PENDING_EXCEPTION |
1019 KVM_ARM64_INCREMENT_PC)))
1020 kvm_call_hyp(__kvm_adjust_pc, vcpu);
1021
1022 vcpu_put(vcpu);
1023 return ret;
1024}
1025
1026static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
1027{
1028 int bit_index;
1029 bool set;
1030 unsigned long *hcr;
1031
1032 if (number == KVM_ARM_IRQ_CPU_IRQ)
1033 bit_index = __ffs(HCR_VI);
1034 else
1035 bit_index = __ffs(HCR_VF);
1036
1037 hcr = vcpu_hcr(vcpu);
1038 if (level)
1039 set = test_and_set_bit(bit_index, hcr);
1040 else
1041 set = test_and_clear_bit(bit_index, hcr);
1042
1043
1044
1045
1046 if (set == level)
1047 return 0;
1048
1049
1050
1051
1052
1053
1054 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
1055 kvm_vcpu_kick(vcpu);
1056
1057 return 0;
1058}
1059
1060int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
1061 bool line_status)
1062{
1063 u32 irq = irq_level->irq;
1064 unsigned int irq_type, vcpu_idx, irq_num;
1065 int nrcpus = atomic_read(&kvm->online_vcpus);
1066 struct kvm_vcpu *vcpu = NULL;
1067 bool level = irq_level->level;
1068
1069 irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK;
1070 vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK;
1071 vcpu_idx += ((irq >> KVM_ARM_IRQ_VCPU2_SHIFT) & KVM_ARM_IRQ_VCPU2_MASK) * (KVM_ARM_IRQ_VCPU_MASK + 1);
1072 irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK;
1073
1074 trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level);
1075
1076 switch (irq_type) {
1077 case KVM_ARM_IRQ_TYPE_CPU:
1078 if (irqchip_in_kernel(kvm))
1079 return -ENXIO;
1080
1081 if (vcpu_idx >= nrcpus)
1082 return -EINVAL;
1083
1084 vcpu = kvm_get_vcpu(kvm, vcpu_idx);
1085 if (!vcpu)
1086 return -EINVAL;
1087
1088 if (irq_num > KVM_ARM_IRQ_CPU_FIQ)
1089 return -EINVAL;
1090
1091 return vcpu_interrupt_line(vcpu, irq_num, level);
1092 case KVM_ARM_IRQ_TYPE_PPI:
1093 if (!irqchip_in_kernel(kvm))
1094 return -ENXIO;
1095
1096 if (vcpu_idx >= nrcpus)
1097 return -EINVAL;
1098
1099 vcpu = kvm_get_vcpu(kvm, vcpu_idx);
1100 if (!vcpu)
1101 return -EINVAL;
1102
1103 if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS)
1104 return -EINVAL;
1105
1106 return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level, NULL);
1107 case KVM_ARM_IRQ_TYPE_SPI:
1108 if (!irqchip_in_kernel(kvm))
1109 return -ENXIO;
1110
1111 if (irq_num < VGIC_NR_PRIVATE_IRQS)
1112 return -EINVAL;
1113
1114 return kvm_vgic_inject_irq(kvm, 0, irq_num, level, NULL);
1115 }
1116
1117 return -EINVAL;
1118}
1119
1120static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
1121 const struct kvm_vcpu_init *init)
1122{
1123 unsigned int i, ret;
1124 u32 phys_target = kvm_target_cpu();
1125
1126 if (init->target != phys_target)
1127 return -EINVAL;
1128
1129
1130
1131
1132
1133 if (vcpu->arch.target != -1 && vcpu->arch.target != init->target)
1134 return -EINVAL;
1135
1136
1137 for (i = 0; i < sizeof(init->features) * 8; i++) {
1138 bool set = (init->features[i / 32] & (1 << (i % 32)));
1139
1140 if (set && i >= KVM_VCPU_MAX_FEATURES)
1141 return -ENOENT;
1142
1143
1144
1145
1146
1147 if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES &&
1148 test_bit(i, vcpu->arch.features) != set)
1149 return -EINVAL;
1150
1151 if (set)
1152 set_bit(i, vcpu->arch.features);
1153 }
1154
1155 vcpu->arch.target = phys_target;
1156
1157
1158 ret = kvm_reset_vcpu(vcpu);
1159 if (ret) {
1160 vcpu->arch.target = -1;
1161 bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
1162 }
1163
1164 return ret;
1165}
1166
1167static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
1168 struct kvm_vcpu_init *init)
1169{
1170 int ret;
1171
1172 ret = kvm_vcpu_set_target(vcpu, init);
1173 if (ret)
1174 return ret;
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185 if (vcpu_has_run_once(vcpu)) {
1186 if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
1187 stage2_unmap_vm(vcpu->kvm);
1188 else
1189 icache_inval_all_pou();
1190 }
1191
1192 vcpu_reset_hcr(vcpu);
1193 vcpu->arch.cptr_el2 = CPTR_EL2_DEFAULT;
1194
1195
1196
1197
1198 if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
1199 kvm_arm_vcpu_power_off(vcpu);
1200 else
1201 vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE;
1202
1203 return 0;
1204}
1205
1206static int kvm_arm_vcpu_set_attr(struct kvm_vcpu *vcpu,
1207 struct kvm_device_attr *attr)
1208{
1209 int ret = -ENXIO;
1210
1211 switch (attr->group) {
1212 default:
1213 ret = kvm_arm_vcpu_arch_set_attr(vcpu, attr);
1214 break;
1215 }
1216
1217 return ret;
1218}
1219
1220static int kvm_arm_vcpu_get_attr(struct kvm_vcpu *vcpu,
1221 struct kvm_device_attr *attr)
1222{
1223 int ret = -ENXIO;
1224
1225 switch (attr->group) {
1226 default:
1227 ret = kvm_arm_vcpu_arch_get_attr(vcpu, attr);
1228 break;
1229 }
1230
1231 return ret;
1232}
1233
1234static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu,
1235 struct kvm_device_attr *attr)
1236{
1237 int ret = -ENXIO;
1238
1239 switch (attr->group) {
1240 default:
1241 ret = kvm_arm_vcpu_arch_has_attr(vcpu, attr);
1242 break;
1243 }
1244
1245 return ret;
1246}
1247
1248static int kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
1249 struct kvm_vcpu_events *events)
1250{
1251 memset(events, 0, sizeof(*events));
1252
1253 return __kvm_arm_vcpu_get_events(vcpu, events);
1254}
1255
1256static int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
1257 struct kvm_vcpu_events *events)
1258{
1259 int i;
1260
1261
1262 for (i = 0; i < ARRAY_SIZE(events->reserved); i++)
1263 if (events->reserved[i])
1264 return -EINVAL;
1265
1266
1267 for (i = 0; i < ARRAY_SIZE(events->exception.pad); i++)
1268 if (events->exception.pad[i])
1269 return -EINVAL;
1270
1271 return __kvm_arm_vcpu_set_events(vcpu, events);
1272}
1273
1274long kvm_arch_vcpu_ioctl(struct file *filp,
1275 unsigned int ioctl, unsigned long arg)
1276{
1277 struct kvm_vcpu *vcpu = filp->private_data;
1278 void __user *argp = (void __user *)arg;
1279 struct kvm_device_attr attr;
1280 long r;
1281
1282 switch (ioctl) {
1283 case KVM_ARM_VCPU_INIT: {
1284 struct kvm_vcpu_init init;
1285
1286 r = -EFAULT;
1287 if (copy_from_user(&init, argp, sizeof(init)))
1288 break;
1289
1290 r = kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init);
1291 break;
1292 }
1293 case KVM_SET_ONE_REG:
1294 case KVM_GET_ONE_REG: {
1295 struct kvm_one_reg reg;
1296
1297 r = -ENOEXEC;
1298 if (unlikely(!kvm_vcpu_initialized(vcpu)))
1299 break;
1300
1301 r = -EFAULT;
1302 if (copy_from_user(®, argp, sizeof(reg)))
1303 break;
1304
1305
1306
1307
1308
1309
1310 if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
1311 kvm_reset_vcpu(vcpu);
1312
1313 if (ioctl == KVM_SET_ONE_REG)
1314 r = kvm_arm_set_reg(vcpu, ®);
1315 else
1316 r = kvm_arm_get_reg(vcpu, ®);
1317 break;
1318 }
1319 case KVM_GET_REG_LIST: {
1320 struct kvm_reg_list __user *user_list = argp;
1321 struct kvm_reg_list reg_list;
1322 unsigned n;
1323
1324 r = -ENOEXEC;
1325 if (unlikely(!kvm_vcpu_initialized(vcpu)))
1326 break;
1327
1328 r = -EPERM;
1329 if (!kvm_arm_vcpu_is_finalized(vcpu))
1330 break;
1331
1332 r = -EFAULT;
1333 if (copy_from_user(®_list, user_list, sizeof(reg_list)))
1334 break;
1335 n = reg_list.n;
1336 reg_list.n = kvm_arm_num_regs(vcpu);
1337 if (copy_to_user(user_list, ®_list, sizeof(reg_list)))
1338 break;
1339 r = -E2BIG;
1340 if (n < reg_list.n)
1341 break;
1342 r = kvm_arm_copy_reg_indices(vcpu, user_list->reg);
1343 break;
1344 }
1345 case KVM_SET_DEVICE_ATTR: {
1346 r = -EFAULT;
1347 if (copy_from_user(&attr, argp, sizeof(attr)))
1348 break;
1349 r = kvm_arm_vcpu_set_attr(vcpu, &attr);
1350 break;
1351 }
1352 case KVM_GET_DEVICE_ATTR: {
1353 r = -EFAULT;
1354 if (copy_from_user(&attr, argp, sizeof(attr)))
1355 break;
1356 r = kvm_arm_vcpu_get_attr(vcpu, &attr);
1357 break;
1358 }
1359 case KVM_HAS_DEVICE_ATTR: {
1360 r = -EFAULT;
1361 if (copy_from_user(&attr, argp, sizeof(attr)))
1362 break;
1363 r = kvm_arm_vcpu_has_attr(vcpu, &attr);
1364 break;
1365 }
1366 case KVM_GET_VCPU_EVENTS: {
1367 struct kvm_vcpu_events events;
1368
1369 if (kvm_arm_vcpu_get_events(vcpu, &events))
1370 return -EINVAL;
1371
1372 if (copy_to_user(argp, &events, sizeof(events)))
1373 return -EFAULT;
1374
1375 return 0;
1376 }
1377 case KVM_SET_VCPU_EVENTS: {
1378 struct kvm_vcpu_events events;
1379
1380 if (copy_from_user(&events, argp, sizeof(events)))
1381 return -EFAULT;
1382
1383 return kvm_arm_vcpu_set_events(vcpu, &events);
1384 }
1385 case KVM_ARM_VCPU_FINALIZE: {
1386 int what;
1387
1388 if (!kvm_vcpu_initialized(vcpu))
1389 return -ENOEXEC;
1390
1391 if (get_user(what, (const int __user *)argp))
1392 return -EFAULT;
1393
1394 return kvm_arm_vcpu_finalize(vcpu, what);
1395 }
1396 default:
1397 r = -EINVAL;
1398 }
1399
1400 return r;
1401}
1402
1403void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
1404{
1405
1406}
1407
1408void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
1409 const struct kvm_memory_slot *memslot)
1410{
1411 kvm_flush_remote_tlbs(kvm);
1412}
1413
1414static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
1415 struct kvm_arm_device_addr *dev_addr)
1416{
1417 unsigned long dev_id, type;
1418
1419 dev_id = (dev_addr->id & KVM_ARM_DEVICE_ID_MASK) >>
1420 KVM_ARM_DEVICE_ID_SHIFT;
1421 type = (dev_addr->id & KVM_ARM_DEVICE_TYPE_MASK) >>
1422 KVM_ARM_DEVICE_TYPE_SHIFT;
1423
1424 switch (dev_id) {
1425 case KVM_ARM_DEVICE_VGIC_V2:
1426 if (!vgic_present)
1427 return -ENXIO;
1428 return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
1429 default:
1430 return -ENODEV;
1431 }
1432}
1433
1434long kvm_arch_vm_ioctl(struct file *filp,
1435 unsigned int ioctl, unsigned long arg)
1436{
1437 struct kvm *kvm = filp->private_data;
1438 void __user *argp = (void __user *)arg;
1439
1440 switch (ioctl) {
1441 case KVM_CREATE_IRQCHIP: {
1442 int ret;
1443 if (!vgic_present)
1444 return -ENXIO;
1445 mutex_lock(&kvm->lock);
1446 ret = kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
1447 mutex_unlock(&kvm->lock);
1448 return ret;
1449 }
1450 case KVM_ARM_SET_DEVICE_ADDR: {
1451 struct kvm_arm_device_addr dev_addr;
1452
1453 if (copy_from_user(&dev_addr, argp, sizeof(dev_addr)))
1454 return -EFAULT;
1455 return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
1456 }
1457 case KVM_ARM_PREFERRED_TARGET: {
1458 struct kvm_vcpu_init init;
1459
1460 kvm_vcpu_preferred_target(&init);
1461
1462 if (copy_to_user(argp, &init, sizeof(init)))
1463 return -EFAULT;
1464
1465 return 0;
1466 }
1467 case KVM_ARM_MTE_COPY_TAGS: {
1468 struct kvm_arm_copy_mte_tags copy_tags;
1469
1470 if (copy_from_user(©_tags, argp, sizeof(copy_tags)))
1471 return -EFAULT;
1472 return kvm_vm_ioctl_mte_copy_tags(kvm, ©_tags);
1473 }
1474 default:
1475 return -EINVAL;
1476 }
1477}
1478
1479static unsigned long nvhe_percpu_size(void)
1480{
1481 return (unsigned long)CHOOSE_NVHE_SYM(__per_cpu_end) -
1482 (unsigned long)CHOOSE_NVHE_SYM(__per_cpu_start);
1483}
1484
1485static unsigned long nvhe_percpu_order(void)
1486{
1487 unsigned long size = nvhe_percpu_size();
1488
1489 return size ? get_order(size) : 0;
1490}
1491
1492
1493static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS];
1494
1495static void kvm_init_vector_slot(void *base, enum arm64_hyp_spectre_vector slot)
1496{
1497 hyp_spectre_vector_selector[slot] = __kvm_vector_slot2addr(base, slot);
1498}
1499
1500static int kvm_init_vector_slots(void)
1501{
1502 int err;
1503 void *base;
1504
1505 base = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector));
1506 kvm_init_vector_slot(base, HYP_VECTOR_DIRECT);
1507
1508 base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs));
1509 kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT);
1510
1511 if (kvm_system_needs_idmapped_vectors() &&
1512 !is_protected_kvm_enabled()) {
1513 err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs),
1514 __BP_HARDEN_HYP_VECS_SZ, &base);
1515 if (err)
1516 return err;
1517 }
1518
1519 kvm_init_vector_slot(base, HYP_VECTOR_INDIRECT);
1520 kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_INDIRECT);
1521 return 0;
1522}
1523
1524static void cpu_prepare_hyp_mode(int cpu)
1525{
1526 struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
1527 unsigned long tcr;
1528
1529
1530
1531
1532
1533
1534
1535 params->tpidr_el2 = (unsigned long)kasan_reset_tag(per_cpu_ptr_nvhe_sym(__per_cpu_start, cpu)) -
1536 (unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start));
1537
1538 params->mair_el2 = read_sysreg(mair_el1);
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554 tcr = (read_sysreg(tcr_el1) & TCR_EL2_MASK) | TCR_EL2_RES1;
1555 tcr &= ~TCR_T0SZ_MASK;
1556 tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET;
1557 params->tcr_el2 = tcr;
1558
1559 params->pgd_pa = kvm_mmu_get_httbr();
1560 if (is_protected_kvm_enabled())
1561 params->hcr_el2 = HCR_HOST_NVHE_PROTECTED_FLAGS;
1562 else
1563 params->hcr_el2 = HCR_HOST_NVHE_FLAGS;
1564 params->vttbr = params->vtcr = 0;
1565
1566
1567
1568
1569
1570 kvm_flush_dcache_to_poc(params, sizeof(*params));
1571}
1572
1573static void hyp_install_host_vector(void)
1574{
1575 struct kvm_nvhe_init_params *params;
1576 struct arm_smccc_res res;
1577
1578
1579 __hyp_set_vectors(kvm_get_idmap_vector());
1580
1581
1582
1583
1584
1585
1586
1587 BUG_ON(!system_capabilities_finalized());
1588 params = this_cpu_ptr_nvhe_sym(kvm_init_params);
1589 arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init), virt_to_phys(params), &res);
1590 WARN_ON(res.a0 != SMCCC_RET_SUCCESS);
1591}
1592
1593static void cpu_init_hyp_mode(void)
1594{
1595 hyp_install_host_vector();
1596
1597
1598
1599
1600
1601 if (this_cpu_has_cap(ARM64_SSBS) &&
1602 arm64_get_spectre_v4_state() == SPECTRE_VULNERABLE) {
1603 kvm_call_hyp_nvhe(__kvm_enable_ssbs);
1604 }
1605}
1606
1607static void cpu_hyp_reset(void)
1608{
1609 if (!is_kernel_in_hyp_mode())
1610 __hyp_reset_vectors();
1611}
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633static void cpu_set_hyp_vector(void)
1634{
1635 struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data);
1636 void *vector = hyp_spectre_vector_selector[data->slot];
1637
1638 if (!is_protected_kvm_enabled())
1639 *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)vector;
1640 else
1641 kvm_call_hyp_nvhe(__pkvm_cpu_set_vector, data->slot);
1642}
1643
1644static void cpu_hyp_init_context(void)
1645{
1646 kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt);
1647
1648 if (!is_kernel_in_hyp_mode())
1649 cpu_init_hyp_mode();
1650}
1651
1652static void cpu_hyp_init_features(void)
1653{
1654 cpu_set_hyp_vector();
1655 kvm_arm_init_debug();
1656
1657 if (is_kernel_in_hyp_mode())
1658 kvm_timer_init_vhe();
1659
1660 if (vgic_present)
1661 kvm_vgic_init_cpu_hardware();
1662}
1663
1664static void cpu_hyp_reinit(void)
1665{
1666 cpu_hyp_reset();
1667 cpu_hyp_init_context();
1668 cpu_hyp_init_features();
1669}
1670
1671static void _kvm_arch_hardware_enable(void *discard)
1672{
1673 if (!__this_cpu_read(kvm_arm_hardware_enabled)) {
1674 cpu_hyp_reinit();
1675 __this_cpu_write(kvm_arm_hardware_enabled, 1);
1676 }
1677}
1678
1679int kvm_arch_hardware_enable(void)
1680{
1681 _kvm_arch_hardware_enable(NULL);
1682 return 0;
1683}
1684
1685static void _kvm_arch_hardware_disable(void *discard)
1686{
1687 if (__this_cpu_read(kvm_arm_hardware_enabled)) {
1688 cpu_hyp_reset();
1689 __this_cpu_write(kvm_arm_hardware_enabled, 0);
1690 }
1691}
1692
1693void kvm_arch_hardware_disable(void)
1694{
1695 if (!is_protected_kvm_enabled())
1696 _kvm_arch_hardware_disable(NULL);
1697}
1698
1699#ifdef CONFIG_CPU_PM
1700static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
1701 unsigned long cmd,
1702 void *v)
1703{
1704
1705
1706
1707
1708
1709 switch (cmd) {
1710 case CPU_PM_ENTER:
1711 if (__this_cpu_read(kvm_arm_hardware_enabled))
1712
1713
1714
1715
1716
1717 cpu_hyp_reset();
1718
1719 return NOTIFY_OK;
1720 case CPU_PM_ENTER_FAILED:
1721 case CPU_PM_EXIT:
1722 if (__this_cpu_read(kvm_arm_hardware_enabled))
1723
1724 cpu_hyp_reinit();
1725
1726 return NOTIFY_OK;
1727
1728 default:
1729 return NOTIFY_DONE;
1730 }
1731}
1732
1733static struct notifier_block hyp_init_cpu_pm_nb = {
1734 .notifier_call = hyp_init_cpu_pm_notifier,
1735};
1736
1737static void hyp_cpu_pm_init(void)
1738{
1739 if (!is_protected_kvm_enabled())
1740 cpu_pm_register_notifier(&hyp_init_cpu_pm_nb);
1741}
1742static void hyp_cpu_pm_exit(void)
1743{
1744 if (!is_protected_kvm_enabled())
1745 cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb);
1746}
1747#else
1748static inline void hyp_cpu_pm_init(void)
1749{
1750}
1751static inline void hyp_cpu_pm_exit(void)
1752{
1753}
1754#endif
1755
1756static void init_cpu_logical_map(void)
1757{
1758 unsigned int cpu;
1759
1760
1761
1762
1763
1764
1765
1766 for_each_online_cpu(cpu)
1767 hyp_cpu_logical_map[cpu] = cpu_logical_map(cpu);
1768}
1769
1770#define init_psci_0_1_impl_state(config, what) \
1771 config.psci_0_1_ ## what ## _implemented = psci_ops.what
1772
1773static bool init_psci_relay(void)
1774{
1775
1776
1777
1778
1779 if (!psci_ops.get_version) {
1780 kvm_err("Cannot initialize protected mode without PSCI\n");
1781 return false;
1782 }
1783
1784 kvm_host_psci_config.version = psci_ops.get_version();
1785
1786 if (kvm_host_psci_config.version == PSCI_VERSION(0, 1)) {
1787 kvm_host_psci_config.function_ids_0_1 = get_psci_0_1_function_ids();
1788 init_psci_0_1_impl_state(kvm_host_psci_config, cpu_suspend);
1789 init_psci_0_1_impl_state(kvm_host_psci_config, cpu_on);
1790 init_psci_0_1_impl_state(kvm_host_psci_config, cpu_off);
1791 init_psci_0_1_impl_state(kvm_host_psci_config, migrate);
1792 }
1793 return true;
1794}
1795
1796static int init_subsystems(void)
1797{
1798 int err = 0;
1799
1800
1801
1802
1803 on_each_cpu(_kvm_arch_hardware_enable, NULL, 1);
1804
1805
1806
1807
1808 hyp_cpu_pm_init();
1809
1810
1811
1812
1813 err = kvm_vgic_hyp_init();
1814 switch (err) {
1815 case 0:
1816 vgic_present = true;
1817 break;
1818 case -ENODEV:
1819 case -ENXIO:
1820 vgic_present = false;
1821 err = 0;
1822 break;
1823 default:
1824 goto out;
1825 }
1826
1827
1828
1829
1830 err = kvm_timer_hyp_init(vgic_present);
1831 if (err)
1832 goto out;
1833
1834 kvm_register_perf_callbacks(NULL);
1835
1836out:
1837 if (err || !is_protected_kvm_enabled())
1838 on_each_cpu(_kvm_arch_hardware_disable, NULL, 1);
1839
1840 return err;
1841}
1842
1843static void teardown_hyp_mode(void)
1844{
1845 int cpu;
1846
1847 free_hyp_pgds();
1848 for_each_possible_cpu(cpu) {
1849 free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
1850 free_pages(kvm_arm_hyp_percpu_base[cpu], nvhe_percpu_order());
1851 }
1852}
1853
1854static int do_pkvm_init(u32 hyp_va_bits)
1855{
1856 void *per_cpu_base = kvm_ksym_ref(kvm_arm_hyp_percpu_base);
1857 int ret;
1858
1859 preempt_disable();
1860 cpu_hyp_init_context();
1861 ret = kvm_call_hyp_nvhe(__pkvm_init, hyp_mem_base, hyp_mem_size,
1862 num_possible_cpus(), kern_hyp_va(per_cpu_base),
1863 hyp_va_bits);
1864 cpu_hyp_init_features();
1865
1866
1867
1868
1869
1870 __this_cpu_write(kvm_arm_hardware_enabled, 1);
1871 preempt_enable();
1872
1873 return ret;
1874}
1875
1876static int kvm_hyp_init_protection(u32 hyp_va_bits)
1877{
1878 void *addr = phys_to_virt(hyp_mem_base);
1879 int ret;
1880
1881 kvm_nvhe_sym(id_aa64pfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
1882 kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
1883 kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1);
1884 kvm_nvhe_sym(id_aa64isar1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1);
1885 kvm_nvhe_sym(id_aa64isar2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1);
1886 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
1887 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
1888 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1);
1889
1890 ret = create_hyp_mappings(addr, addr + hyp_mem_size, PAGE_HYP);
1891 if (ret)
1892 return ret;
1893
1894 ret = do_pkvm_init(hyp_va_bits);
1895 if (ret)
1896 return ret;
1897
1898 free_hyp_pgds();
1899
1900 return 0;
1901}
1902
1903
1904
1905
1906static int init_hyp_mode(void)
1907{
1908 u32 hyp_va_bits;
1909 int cpu;
1910 int err = -ENOMEM;
1911
1912
1913
1914
1915
1916 if (is_protected_kvm_enabled() && !hyp_mem_base)
1917 goto out_err;
1918
1919
1920
1921
1922 err = kvm_mmu_init(&hyp_va_bits);
1923 if (err)
1924 goto out_err;
1925
1926
1927
1928
1929 for_each_possible_cpu(cpu) {
1930 unsigned long stack_page;
1931
1932 stack_page = __get_free_page(GFP_KERNEL);
1933 if (!stack_page) {
1934 err = -ENOMEM;
1935 goto out_err;
1936 }
1937
1938 per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
1939 }
1940
1941
1942
1943
1944 for_each_possible_cpu(cpu) {
1945 struct page *page;
1946 void *page_addr;
1947
1948 page = alloc_pages(GFP_KERNEL, nvhe_percpu_order());
1949 if (!page) {
1950 err = -ENOMEM;
1951 goto out_err;
1952 }
1953
1954 page_addr = page_address(page);
1955 memcpy(page_addr, CHOOSE_NVHE_SYM(__per_cpu_start), nvhe_percpu_size());
1956 kvm_arm_hyp_percpu_base[cpu] = (unsigned long)page_addr;
1957 }
1958
1959
1960
1961
1962 err = create_hyp_mappings(kvm_ksym_ref(__hyp_text_start),
1963 kvm_ksym_ref(__hyp_text_end), PAGE_HYP_EXEC);
1964 if (err) {
1965 kvm_err("Cannot map world-switch code\n");
1966 goto out_err;
1967 }
1968
1969 err = create_hyp_mappings(kvm_ksym_ref(__hyp_rodata_start),
1970 kvm_ksym_ref(__hyp_rodata_end), PAGE_HYP_RO);
1971 if (err) {
1972 kvm_err("Cannot map .hyp.rodata section\n");
1973 goto out_err;
1974 }
1975
1976 err = create_hyp_mappings(kvm_ksym_ref(__start_rodata),
1977 kvm_ksym_ref(__end_rodata), PAGE_HYP_RO);
1978 if (err) {
1979 kvm_err("Cannot map rodata section\n");
1980 goto out_err;
1981 }
1982
1983
1984
1985
1986
1987
1988 err = create_hyp_mappings(kvm_ksym_ref(__hyp_bss_start),
1989 kvm_ksym_ref(__hyp_bss_end), PAGE_HYP);
1990 if (err) {
1991 kvm_err("Cannot map hyp bss section: %d\n", err);
1992 goto out_err;
1993 }
1994
1995 err = create_hyp_mappings(kvm_ksym_ref(__hyp_bss_end),
1996 kvm_ksym_ref(__bss_stop), PAGE_HYP_RO);
1997 if (err) {
1998 kvm_err("Cannot map bss section\n");
1999 goto out_err;
2000 }
2001
2002
2003
2004
2005 for_each_possible_cpu(cpu) {
2006 struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
2007 char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
2008 unsigned long hyp_addr;
2009
2010
2011
2012
2013
2014
2015 err = hyp_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
2016 if (err) {
2017 kvm_err("Cannot allocate hyp stack guard page\n");
2018 goto out_err;
2019 }
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030 err = __create_hyp_mappings(hyp_addr + PAGE_SIZE, PAGE_SIZE,
2031 __pa(stack_page), PAGE_HYP);
2032 if (err) {
2033 kvm_err("Cannot map hyp stack\n");
2034 goto out_err;
2035 }
2036
2037
2038
2039
2040
2041
2042
2043 params->stack_pa = __pa(stack_page);
2044
2045 params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
2046 }
2047
2048 for_each_possible_cpu(cpu) {
2049 char *percpu_begin = (char *)kvm_arm_hyp_percpu_base[cpu];
2050 char *percpu_end = percpu_begin + nvhe_percpu_size();
2051
2052
2053 err = create_hyp_mappings(percpu_begin, percpu_end, PAGE_HYP);
2054 if (err) {
2055 kvm_err("Cannot map hyp percpu region\n");
2056 goto out_err;
2057 }
2058
2059
2060 cpu_prepare_hyp_mode(cpu);
2061 }
2062
2063 if (is_protected_kvm_enabled()) {
2064 init_cpu_logical_map();
2065
2066 if (!init_psci_relay()) {
2067 err = -ENODEV;
2068 goto out_err;
2069 }
2070 }
2071
2072 if (is_protected_kvm_enabled()) {
2073 err = kvm_hyp_init_protection(hyp_va_bits);
2074 if (err) {
2075 kvm_err("Failed to init hyp memory protection\n");
2076 goto out_err;
2077 }
2078 }
2079
2080 return 0;
2081
2082out_err:
2083 teardown_hyp_mode();
2084 kvm_err("error initializing Hyp mode: %d\n", err);
2085 return err;
2086}
2087
2088static void _kvm_host_prot_finalize(void *arg)
2089{
2090 int *err = arg;
2091
2092 if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
2093 WRITE_ONCE(*err, -EINVAL);
2094}
2095
2096static int pkvm_drop_host_privileges(void)
2097{
2098 int ret = 0;
2099
2100
2101
2102
2103
2104 static_branch_enable(&kvm_protected_mode_initialized);
2105 on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
2106 return ret;
2107}
2108
2109static int finalize_hyp_mode(void)
2110{
2111 if (!is_protected_kvm_enabled())
2112 return 0;
2113
2114
2115
2116
2117
2118 kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
2119 kmemleak_free_part(__va(hyp_mem_base), hyp_mem_size);
2120 return pkvm_drop_host_privileges();
2121}
2122
2123struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
2124{
2125 struct kvm_vcpu *vcpu;
2126 unsigned long i;
2127
2128 mpidr &= MPIDR_HWID_BITMASK;
2129 kvm_for_each_vcpu(i, vcpu, kvm) {
2130 if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu))
2131 return vcpu;
2132 }
2133 return NULL;
2134}
2135
2136bool kvm_arch_has_irq_bypass(void)
2137{
2138 return true;
2139}
2140
2141int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
2142 struct irq_bypass_producer *prod)
2143{
2144 struct kvm_kernel_irqfd *irqfd =
2145 container_of(cons, struct kvm_kernel_irqfd, consumer);
2146
2147 return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq,
2148 &irqfd->irq_entry);
2149}
2150void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
2151 struct irq_bypass_producer *prod)
2152{
2153 struct kvm_kernel_irqfd *irqfd =
2154 container_of(cons, struct kvm_kernel_irqfd, consumer);
2155
2156 kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq,
2157 &irqfd->irq_entry);
2158}
2159
2160void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons)
2161{
2162 struct kvm_kernel_irqfd *irqfd =
2163 container_of(cons, struct kvm_kernel_irqfd, consumer);
2164
2165 kvm_arm_halt_guest(irqfd->kvm);
2166}
2167
2168void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons)
2169{
2170 struct kvm_kernel_irqfd *irqfd =
2171 container_of(cons, struct kvm_kernel_irqfd, consumer);
2172
2173 kvm_arm_resume_guest(irqfd->kvm);
2174}
2175
2176
2177
2178
2179int kvm_arch_init(void *opaque)
2180{
2181 int err;
2182 bool in_hyp_mode;
2183
2184 if (!is_hyp_mode_available()) {
2185 kvm_info("HYP mode not available\n");
2186 return -ENODEV;
2187 }
2188
2189 if (kvm_get_mode() == KVM_MODE_NONE) {
2190 kvm_info("KVM disabled from command line\n");
2191 return -ENODEV;
2192 }
2193
2194 err = kvm_sys_reg_table_init();
2195 if (err) {
2196 kvm_info("Error initializing system register tables");
2197 return err;
2198 }
2199
2200 in_hyp_mode = is_kernel_in_hyp_mode();
2201
2202 if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) ||
2203 cpus_have_final_cap(ARM64_WORKAROUND_1508412))
2204 kvm_info("Guests without required CPU erratum workarounds can deadlock system!\n" \
2205 "Only trusted guests should be used on this system.\n");
2206
2207 err = kvm_set_ipa_limit();
2208 if (err)
2209 return err;
2210
2211 err = kvm_arm_init_sve();
2212 if (err)
2213 return err;
2214
2215 err = kvm_arm_vmid_alloc_init();
2216 if (err) {
2217 kvm_err("Failed to initialize VMID allocator.\n");
2218 return err;
2219 }
2220
2221 if (!in_hyp_mode) {
2222 err = init_hyp_mode();
2223 if (err)
2224 goto out_err;
2225 }
2226
2227 err = kvm_init_vector_slots();
2228 if (err) {
2229 kvm_err("Cannot initialise vector slots\n");
2230 goto out_err;
2231 }
2232
2233 err = init_subsystems();
2234 if (err)
2235 goto out_hyp;
2236
2237 if (!in_hyp_mode) {
2238 err = finalize_hyp_mode();
2239 if (err) {
2240 kvm_err("Failed to finalize Hyp protection\n");
2241 goto out_hyp;
2242 }
2243 }
2244
2245 if (is_protected_kvm_enabled()) {
2246 kvm_info("Protected nVHE mode initialized successfully\n");
2247 } else if (in_hyp_mode) {
2248 kvm_info("VHE mode initialized successfully\n");
2249 } else {
2250 kvm_info("Hyp mode initialized successfully\n");
2251 }
2252
2253 return 0;
2254
2255out_hyp:
2256 hyp_cpu_pm_exit();
2257 if (!in_hyp_mode)
2258 teardown_hyp_mode();
2259out_err:
2260 kvm_arm_vmid_alloc_free();
2261 return err;
2262}
2263
2264
2265void kvm_arch_exit(void)
2266{
2267 kvm_unregister_perf_callbacks();
2268}
2269
2270static int __init early_kvm_mode_cfg(char *arg)
2271{
2272 if (!arg)
2273 return -EINVAL;
2274
2275 if (strcmp(arg, "protected") == 0) {
2276 if (!is_kernel_in_hyp_mode())
2277 kvm_mode = KVM_MODE_PROTECTED;
2278 else
2279 pr_warn_once("Protected KVM not available with VHE\n");
2280
2281 return 0;
2282 }
2283
2284 if (strcmp(arg, "nvhe") == 0 && !WARN_ON(is_kernel_in_hyp_mode())) {
2285 kvm_mode = KVM_MODE_DEFAULT;
2286 return 0;
2287 }
2288
2289 if (strcmp(arg, "none") == 0) {
2290 kvm_mode = KVM_MODE_NONE;
2291 return 0;
2292 }
2293
2294 return -EINVAL;
2295}
2296early_param("kvm-arm.mode", early_kvm_mode_cfg);
2297
2298enum kvm_mode kvm_get_mode(void)
2299{
2300 return kvm_mode;
2301}
2302
2303static int arm_init(void)
2304{
2305 int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2306 return rc;
2307}
2308
2309module_init(arm_init);
2310