1
2
3
4
5
6
7#include <linux/mman.h>
8#include <linux/kvm_host.h>
9#include <linux/io.h>
10#include <linux/hugetlb.h>
11#include <linux/sched/signal.h>
12#include <trace/events/kvm.h>
13#include <asm/pgalloc.h>
14#include <asm/cacheflush.h>
15#include <asm/kvm_arm.h>
16#include <asm/kvm_mmu.h>
17#include <asm/kvm_pgtable.h>
18#include <asm/kvm_ras.h>
19#include <asm/kvm_asm.h>
20#include <asm/kvm_emulate.h>
21#include <asm/virt.h>
22
23#include "trace.h"
24
25static struct kvm_pgtable *hyp_pgtable;
26static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
27
28static unsigned long hyp_idmap_start;
29static unsigned long hyp_idmap_end;
30static phys_addr_t hyp_idmap_vector;
31
32static unsigned long io_map_base;
33
34
35
36
37
38
39
40
41
42static int stage2_apply_range(struct kvm *kvm, phys_addr_t addr,
43 phys_addr_t end,
44 int (*fn)(struct kvm_pgtable *, u64, u64),
45 bool resched)
46{
47 int ret;
48 u64 next;
49
50 do {
51 struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
52 if (!pgt)
53 return -EINVAL;
54
55 next = stage2_pgd_addr_end(kvm, addr, end);
56 ret = fn(pgt, addr, next - addr);
57 if (ret)
58 break;
59
60 if (resched && next != end)
61 cond_resched_lock(&kvm->mmu_lock);
62 } while (addr = next, addr != end);
63
64 return ret;
65}
66
67#define stage2_apply_range_resched(kvm, addr, end, fn) \
68 stage2_apply_range(kvm, addr, end, fn, true)
69
70static bool memslot_is_logging(struct kvm_memory_slot *memslot)
71{
72 return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
73}
74
75
76
77
78
79
80
81void kvm_flush_remote_tlbs(struct kvm *kvm)
82{
83 ++kvm->stat.generic.remote_tlb_flush_requests;
84 kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
85}
86
87static bool kvm_is_device_pfn(unsigned long pfn)
88{
89 return !pfn_is_map_memory(pfn);
90}
91
92static void *stage2_memcache_zalloc_page(void *arg)
93{
94 struct kvm_mmu_memory_cache *mc = arg;
95
96
97 return kvm_mmu_memory_cache_alloc(mc);
98}
99
100static void *kvm_host_zalloc_pages_exact(size_t size)
101{
102 return alloc_pages_exact(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
103}
104
105static void kvm_host_get_page(void *addr)
106{
107 get_page(virt_to_page(addr));
108}
109
110static void kvm_host_put_page(void *addr)
111{
112 put_page(virt_to_page(addr));
113}
114
115static int kvm_host_page_count(void *addr)
116{
117 return page_count(virt_to_page(addr));
118}
119
120static phys_addr_t kvm_host_pa(void *addr)
121{
122 return __pa(addr);
123}
124
125static void *kvm_host_va(phys_addr_t phys)
126{
127 return __va(phys);
128}
129
130static void clean_dcache_guest_page(void *va, size_t size)
131{
132 __clean_dcache_guest_page(va, size);
133}
134
135static void invalidate_icache_guest_page(void *va, size_t size)
136{
137 __invalidate_icache_guest_page(va, size);
138}
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size,
177 bool may_block)
178{
179 struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
180 phys_addr_t end = start + size;
181
182 assert_spin_locked(&kvm->mmu_lock);
183 WARN_ON(size & ~PAGE_MASK);
184 WARN_ON(stage2_apply_range(kvm, start, end, kvm_pgtable_stage2_unmap,
185 may_block));
186}
187
188static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
189{
190 __unmap_stage2_range(mmu, start, size, true);
191}
192
193static void stage2_flush_memslot(struct kvm *kvm,
194 struct kvm_memory_slot *memslot)
195{
196 phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
197 phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
198
199 stage2_apply_range_resched(kvm, addr, end, kvm_pgtable_stage2_flush);
200}
201
202
203
204
205
206
207
208
209static void stage2_flush_vm(struct kvm *kvm)
210{
211 struct kvm_memslots *slots;
212 struct kvm_memory_slot *memslot;
213 int idx;
214
215 idx = srcu_read_lock(&kvm->srcu);
216 spin_lock(&kvm->mmu_lock);
217
218 slots = kvm_memslots(kvm);
219 kvm_for_each_memslot(memslot, slots)
220 stage2_flush_memslot(kvm, memslot);
221
222 spin_unlock(&kvm->mmu_lock);
223 srcu_read_unlock(&kvm->srcu, idx);
224}
225
226
227
228
229void free_hyp_pgds(void)
230{
231 mutex_lock(&kvm_hyp_pgd_mutex);
232 if (hyp_pgtable) {
233 kvm_pgtable_hyp_destroy(hyp_pgtable);
234 kfree(hyp_pgtable);
235 hyp_pgtable = NULL;
236 }
237 mutex_unlock(&kvm_hyp_pgd_mutex);
238}
239
240static bool kvm_host_owns_hyp_mappings(void)
241{
242 if (static_branch_likely(&kvm_protected_mode_initialized))
243 return false;
244
245
246
247
248
249
250 if (!hyp_pgtable && is_protected_kvm_enabled())
251 return false;
252
253 WARN_ON(!hyp_pgtable);
254
255 return true;
256}
257
258static int __create_hyp_mappings(unsigned long start, unsigned long size,
259 unsigned long phys, enum kvm_pgtable_prot prot)
260{
261 int err;
262
263 if (WARN_ON(!kvm_host_owns_hyp_mappings()))
264 return -EINVAL;
265
266 mutex_lock(&kvm_hyp_pgd_mutex);
267 err = kvm_pgtable_hyp_map(hyp_pgtable, start, size, phys, prot);
268 mutex_unlock(&kvm_hyp_pgd_mutex);
269
270 return err;
271}
272
273static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
274{
275 if (!is_vmalloc_addr(kaddr)) {
276 BUG_ON(!virt_addr_valid(kaddr));
277 return __pa(kaddr);
278 } else {
279 return page_to_phys(vmalloc_to_page(kaddr)) +
280 offset_in_page(kaddr);
281 }
282}
283
284static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end)
285{
286 phys_addr_t addr;
287 int ret;
288
289 for (addr = ALIGN_DOWN(start, PAGE_SIZE); addr < end; addr += PAGE_SIZE) {
290 ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp,
291 __phys_to_pfn(addr));
292 if (ret)
293 return ret;
294 }
295
296 return 0;
297}
298
299
300
301
302
303
304
305
306
307
308
309int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
310{
311 phys_addr_t phys_addr;
312 unsigned long virt_addr;
313 unsigned long start = kern_hyp_va((unsigned long)from);
314 unsigned long end = kern_hyp_va((unsigned long)to);
315
316 if (is_kernel_in_hyp_mode())
317 return 0;
318
319 if (!kvm_host_owns_hyp_mappings()) {
320 if (WARN_ON(prot != PAGE_HYP))
321 return -EPERM;
322 return pkvm_share_hyp(kvm_kaddr_to_phys(from),
323 kvm_kaddr_to_phys(to));
324 }
325
326 start = start & PAGE_MASK;
327 end = PAGE_ALIGN(end);
328
329 for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) {
330 int err;
331
332 phys_addr = kvm_kaddr_to_phys(from + virt_addr - start);
333 err = __create_hyp_mappings(virt_addr, PAGE_SIZE, phys_addr,
334 prot);
335 if (err)
336 return err;
337 }
338
339 return 0;
340}
341
342static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
343 unsigned long *haddr,
344 enum kvm_pgtable_prot prot)
345{
346 unsigned long base;
347 int ret = 0;
348
349 if (!kvm_host_owns_hyp_mappings()) {
350 base = kvm_call_hyp_nvhe(__pkvm_create_private_mapping,
351 phys_addr, size, prot);
352 if (IS_ERR_OR_NULL((void *)base))
353 return PTR_ERR((void *)base);
354 *haddr = base;
355
356 return 0;
357 }
358
359 mutex_lock(&kvm_hyp_pgd_mutex);
360
361
362
363
364
365
366
367
368
369 size = PAGE_ALIGN(size + offset_in_page(phys_addr));
370 base = io_map_base - size;
371
372
373
374
375
376
377 if ((base ^ io_map_base) & BIT(VA_BITS - 1))
378 ret = -ENOMEM;
379 else
380 io_map_base = base;
381
382 mutex_unlock(&kvm_hyp_pgd_mutex);
383
384 if (ret)
385 goto out;
386
387 ret = __create_hyp_mappings(base, size, phys_addr, prot);
388 if (ret)
389 goto out;
390
391 *haddr = base + offset_in_page(phys_addr);
392out:
393 return ret;
394}
395
396
397
398
399
400
401
402
403int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
404 void __iomem **kaddr,
405 void __iomem **haddr)
406{
407 unsigned long addr;
408 int ret;
409
410 *kaddr = ioremap(phys_addr, size);
411 if (!*kaddr)
412 return -ENOMEM;
413
414 if (is_kernel_in_hyp_mode()) {
415 *haddr = *kaddr;
416 return 0;
417 }
418
419 ret = __create_hyp_private_mapping(phys_addr, size,
420 &addr, PAGE_HYP_DEVICE);
421 if (ret) {
422 iounmap(*kaddr);
423 *kaddr = NULL;
424 *haddr = NULL;
425 return ret;
426 }
427
428 *haddr = (void __iomem *)addr;
429 return 0;
430}
431
432
433
434
435
436
437
438int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
439 void **haddr)
440{
441 unsigned long addr;
442 int ret;
443
444 BUG_ON(is_kernel_in_hyp_mode());
445
446 ret = __create_hyp_private_mapping(phys_addr, size,
447 &addr, PAGE_HYP_EXEC);
448 if (ret) {
449 *haddr = NULL;
450 return ret;
451 }
452
453 *haddr = (void *)addr;
454 return 0;
455}
456
457static struct kvm_pgtable_mm_ops kvm_user_mm_ops = {
458
459 .phys_to_virt = kvm_host_va,
460};
461
462static int get_user_mapping_size(struct kvm *kvm, u64 addr)
463{
464 struct kvm_pgtable pgt = {
465 .pgd = (kvm_pte_t *)kvm->mm->pgd,
466 .ia_bits = VA_BITS,
467 .start_level = (KVM_PGTABLE_MAX_LEVELS -
468 CONFIG_PGTABLE_LEVELS),
469 .mm_ops = &kvm_user_mm_ops,
470 };
471 kvm_pte_t pte = 0;
472 u32 level = ~0;
473 int ret;
474
475 ret = kvm_pgtable_get_leaf(&pgt, addr, &pte, &level);
476 VM_BUG_ON(ret);
477 VM_BUG_ON(level >= KVM_PGTABLE_MAX_LEVELS);
478 VM_BUG_ON(!(pte & PTE_VALID));
479
480 return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level));
481}
482
483static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
484 .zalloc_page = stage2_memcache_zalloc_page,
485 .zalloc_pages_exact = kvm_host_zalloc_pages_exact,
486 .free_pages_exact = free_pages_exact,
487 .get_page = kvm_host_get_page,
488 .put_page = kvm_host_put_page,
489 .page_count = kvm_host_page_count,
490 .phys_to_virt = kvm_host_va,
491 .virt_to_phys = kvm_host_pa,
492 .dcache_clean_inval_poc = clean_dcache_guest_page,
493 .icache_inval_pou = invalidate_icache_guest_page,
494};
495
496
497
498
499
500
501
502
503
504
505int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
506{
507 int cpu, err;
508 struct kvm_pgtable *pgt;
509
510 if (mmu->pgt != NULL) {
511 kvm_err("kvm_arch already initialized?\n");
512 return -EINVAL;
513 }
514
515 pgt = kzalloc(sizeof(*pgt), GFP_KERNEL);
516 if (!pgt)
517 return -ENOMEM;
518
519 err = kvm_pgtable_stage2_init(pgt, &kvm->arch, &kvm_s2_mm_ops);
520 if (err)
521 goto out_free_pgtable;
522
523 mmu->last_vcpu_ran = alloc_percpu(typeof(*mmu->last_vcpu_ran));
524 if (!mmu->last_vcpu_ran) {
525 err = -ENOMEM;
526 goto out_destroy_pgtable;
527 }
528
529 for_each_possible_cpu(cpu)
530 *per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1;
531
532 mmu->arch = &kvm->arch;
533 mmu->pgt = pgt;
534 mmu->pgd_phys = __pa(pgt->pgd);
535 WRITE_ONCE(mmu->vmid.vmid_gen, 0);
536 return 0;
537
538out_destroy_pgtable:
539 kvm_pgtable_stage2_destroy(pgt);
540out_free_pgtable:
541 kfree(pgt);
542 return err;
543}
544
545static void stage2_unmap_memslot(struct kvm *kvm,
546 struct kvm_memory_slot *memslot)
547{
548 hva_t hva = memslot->userspace_addr;
549 phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
550 phys_addr_t size = PAGE_SIZE * memslot->npages;
551 hva_t reg_end = hva + size;
552
553
554
555
556
557
558
559
560
561
562
563
564
565 do {
566 struct vm_area_struct *vma;
567 hva_t vm_start, vm_end;
568
569 vma = find_vma_intersection(current->mm, hva, reg_end);
570 if (!vma)
571 break;
572
573
574
575
576 vm_start = max(hva, vma->vm_start);
577 vm_end = min(reg_end, vma->vm_end);
578
579 if (!(vma->vm_flags & VM_PFNMAP)) {
580 gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
581 unmap_stage2_range(&kvm->arch.mmu, gpa, vm_end - vm_start);
582 }
583 hva = vm_end;
584 } while (hva < reg_end);
585}
586
587
588
589
590
591
592
593
594void stage2_unmap_vm(struct kvm *kvm)
595{
596 struct kvm_memslots *slots;
597 struct kvm_memory_slot *memslot;
598 int idx;
599
600 idx = srcu_read_lock(&kvm->srcu);
601 mmap_read_lock(current->mm);
602 spin_lock(&kvm->mmu_lock);
603
604 slots = kvm_memslots(kvm);
605 kvm_for_each_memslot(memslot, slots)
606 stage2_unmap_memslot(kvm, memslot);
607
608 spin_unlock(&kvm->mmu_lock);
609 mmap_read_unlock(current->mm);
610 srcu_read_unlock(&kvm->srcu, idx);
611}
612
613void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
614{
615 struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
616 struct kvm_pgtable *pgt = NULL;
617
618 spin_lock(&kvm->mmu_lock);
619 pgt = mmu->pgt;
620 if (pgt) {
621 mmu->pgd_phys = 0;
622 mmu->pgt = NULL;
623 free_percpu(mmu->last_vcpu_ran);
624 }
625 spin_unlock(&kvm->mmu_lock);
626
627 if (pgt) {
628 kvm_pgtable_stage2_destroy(pgt);
629 kfree(pgt);
630 }
631}
632
633
634
635
636
637
638
639
640
641
642int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
643 phys_addr_t pa, unsigned long size, bool writable)
644{
645 phys_addr_t addr;
646 int ret = 0;
647 struct kvm_mmu_memory_cache cache = { 0, __GFP_ZERO, NULL, };
648 struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
649 enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE |
650 KVM_PGTABLE_PROT_R |
651 (writable ? KVM_PGTABLE_PROT_W : 0);
652
653 size += offset_in_page(guest_ipa);
654 guest_ipa &= PAGE_MASK;
655
656 for (addr = guest_ipa; addr < guest_ipa + size; addr += PAGE_SIZE) {
657 ret = kvm_mmu_topup_memory_cache(&cache,
658 kvm_mmu_cache_min_pages(kvm));
659 if (ret)
660 break;
661
662 spin_lock(&kvm->mmu_lock);
663 ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot,
664 &cache);
665 spin_unlock(&kvm->mmu_lock);
666 if (ret)
667 break;
668
669 pa += PAGE_SIZE;
670 }
671
672 kvm_mmu_free_memory_cache(&cache);
673 return ret;
674}
675
676
677
678
679
680
681
682static void stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end)
683{
684 struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
685 stage2_apply_range_resched(kvm, addr, end, kvm_pgtable_stage2_wrprotect);
686}
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701static void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
702{
703 struct kvm_memslots *slots = kvm_memslots(kvm);
704 struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
705 phys_addr_t start, end;
706
707 if (WARN_ON_ONCE(!memslot))
708 return;
709
710 start = memslot->base_gfn << PAGE_SHIFT;
711 end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
712
713 spin_lock(&kvm->mmu_lock);
714 stage2_wp_range(&kvm->arch.mmu, start, end);
715 spin_unlock(&kvm->mmu_lock);
716 kvm_flush_remote_tlbs(kvm);
717}
718
719
720
721
722
723
724
725
726
727
728
729
730static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
731 struct kvm_memory_slot *slot,
732 gfn_t gfn_offset, unsigned long mask)
733{
734 phys_addr_t base_gfn = slot->base_gfn + gfn_offset;
735 phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT;
736 phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
737
738 stage2_wp_range(&kvm->arch.mmu, start, end);
739}
740
741
742
743
744
745
746
747
748void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
749 struct kvm_memory_slot *slot,
750 gfn_t gfn_offset, unsigned long mask)
751{
752 kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
753}
754
755static void kvm_send_hwpoison_signal(unsigned long address, short lsb)
756{
757 send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current);
758}
759
760static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
761 unsigned long hva,
762 unsigned long map_size)
763{
764 gpa_t gpa_start;
765 hva_t uaddr_start, uaddr_end;
766 size_t size;
767
768
769 if (map_size == PAGE_SIZE)
770 return true;
771
772 size = memslot->npages * PAGE_SIZE;
773
774 gpa_start = memslot->base_gfn << PAGE_SHIFT;
775
776 uaddr_start = memslot->userspace_addr;
777 uaddr_end = uaddr_start + size;
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802 if ((gpa_start & (map_size - 1)) != (uaddr_start & (map_size - 1)))
803 return false;
804
805
806
807
808
809
810
811
812
813
814
815
816
817 return (hva & ~(map_size - 1)) >= uaddr_start &&
818 (hva & ~(map_size - 1)) + map_size <= uaddr_end;
819}
820
821
822
823
824
825
826
827
828
829static unsigned long
830transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
831 unsigned long hva, kvm_pfn_t *pfnp,
832 phys_addr_t *ipap)
833{
834 kvm_pfn_t pfn = *pfnp;
835
836
837
838
839
840
841 if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) &&
842 get_user_mapping_size(kvm, hva) >= PMD_SIZE) {
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861 *ipap &= PMD_MASK;
862 kvm_release_pfn_clean(pfn);
863 pfn &= ~(PTRS_PER_PMD - 1);
864 get_page(pfn_to_page(pfn));
865 *pfnp = pfn;
866
867 return PMD_SIZE;
868 }
869
870
871 return PAGE_SIZE;
872}
873
874static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva)
875{
876 unsigned long pa;
877
878 if (is_vm_hugetlb_page(vma) && !(vma->vm_flags & VM_PFNMAP))
879 return huge_page_shift(hstate_vma(vma));
880
881 if (!(vma->vm_flags & VM_PFNMAP))
882 return PAGE_SHIFT;
883
884 VM_BUG_ON(is_vm_hugetlb_page(vma));
885
886 pa = (vma->vm_pgoff << PAGE_SHIFT) + (hva - vma->vm_start);
887
888#ifndef __PAGETABLE_PMD_FOLDED
889 if ((hva & (PUD_SIZE - 1)) == (pa & (PUD_SIZE - 1)) &&
890 ALIGN_DOWN(hva, PUD_SIZE) >= vma->vm_start &&
891 ALIGN(hva, PUD_SIZE) <= vma->vm_end)
892 return PUD_SHIFT;
893#endif
894
895 if ((hva & (PMD_SIZE - 1)) == (pa & (PMD_SIZE - 1)) &&
896 ALIGN_DOWN(hva, PMD_SIZE) >= vma->vm_start &&
897 ALIGN(hva, PMD_SIZE) <= vma->vm_end)
898 return PMD_SHIFT;
899
900 return PAGE_SHIFT;
901}
902
903
904
905
906
907
908
909
910
911
912
913
914static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
915 unsigned long size)
916{
917 unsigned long i, nr_pages = size >> PAGE_SHIFT;
918 struct page *page;
919
920 if (!kvm_has_mte(kvm))
921 return 0;
922
923
924
925
926
927 page = pfn_to_online_page(pfn);
928
929 if (!page)
930 return -EFAULT;
931
932 for (i = 0; i < nr_pages; i++, page++) {
933 if (!test_bit(PG_mte_tagged, &page->flags)) {
934 mte_clear_page_tags(page_address(page));
935 set_bit(PG_mte_tagged, &page->flags);
936 }
937 }
938
939 return 0;
940}
941
942static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
943 struct kvm_memory_slot *memslot, unsigned long hva,
944 unsigned long fault_status)
945{
946 int ret = 0;
947 bool write_fault, writable, force_pte = false;
948 bool exec_fault;
949 bool device = false;
950 bool shared;
951 unsigned long mmu_seq;
952 struct kvm *kvm = vcpu->kvm;
953 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
954 struct vm_area_struct *vma;
955 short vma_shift;
956 gfn_t gfn;
957 kvm_pfn_t pfn;
958 bool logging_active = memslot_is_logging(memslot);
959 unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
960 unsigned long vma_pagesize, fault_granule;
961 enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
962 struct kvm_pgtable *pgt;
963
964 fault_granule = 1UL << ARM64_HW_PGTABLE_LEVEL_SHIFT(fault_level);
965 write_fault = kvm_is_write_fault(vcpu);
966 exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
967 VM_BUG_ON(write_fault && exec_fault);
968
969 if (fault_status == FSC_PERM && !write_fault && !exec_fault) {
970 kvm_err("Unexpected L2 read permission error\n");
971 return -EFAULT;
972 }
973
974
975
976
977
978 mmap_read_lock(current->mm);
979 vma = vma_lookup(current->mm, hva);
980 if (unlikely(!vma)) {
981 kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
982 mmap_read_unlock(current->mm);
983 return -EFAULT;
984 }
985
986
987
988
989
990 if (logging_active) {
991 force_pte = true;
992 vma_shift = PAGE_SHIFT;
993 } else {
994 vma_shift = get_vma_page_shift(vma, hva);
995 }
996
997 shared = (vma->vm_flags & VM_SHARED);
998
999 switch (vma_shift) {
1000#ifndef __PAGETABLE_PMD_FOLDED
1001 case PUD_SHIFT:
1002 if (fault_supports_stage2_huge_mapping(memslot, hva, PUD_SIZE))
1003 break;
1004 fallthrough;
1005#endif
1006 case CONT_PMD_SHIFT:
1007 vma_shift = PMD_SHIFT;
1008 fallthrough;
1009 case PMD_SHIFT:
1010 if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE))
1011 break;
1012 fallthrough;
1013 case CONT_PTE_SHIFT:
1014 vma_shift = PAGE_SHIFT;
1015 force_pte = true;
1016 fallthrough;
1017 case PAGE_SHIFT:
1018 break;
1019 default:
1020 WARN_ONCE(1, "Unknown vma_shift %d", vma_shift);
1021 }
1022
1023 vma_pagesize = 1UL << vma_shift;
1024 if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE)
1025 fault_ipa &= ~(vma_pagesize - 1);
1026
1027 gfn = fault_ipa >> PAGE_SHIFT;
1028 mmap_read_unlock(current->mm);
1029
1030
1031
1032
1033
1034
1035
1036 if (fault_status != FSC_PERM || (logging_active && write_fault)) {
1037 ret = kvm_mmu_topup_memory_cache(memcache,
1038 kvm_mmu_cache_min_pages(kvm));
1039 if (ret)
1040 return ret;
1041 }
1042
1043 mmu_seq = vcpu->kvm->mmu_notifier_seq;
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057 smp_rmb();
1058
1059 pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
1060 write_fault, &writable, NULL);
1061 if (pfn == KVM_PFN_ERR_HWPOISON) {
1062 kvm_send_hwpoison_signal(hva, vma_shift);
1063 return 0;
1064 }
1065 if (is_error_noslot_pfn(pfn))
1066 return -EFAULT;
1067
1068 if (kvm_is_device_pfn(pfn)) {
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079 device = true;
1080 } else if (logging_active && !write_fault) {
1081
1082
1083
1084
1085 writable = false;
1086 }
1087
1088 if (exec_fault && device)
1089 return -ENOEXEC;
1090
1091 spin_lock(&kvm->mmu_lock);
1092 pgt = vcpu->arch.hw_mmu->pgt;
1093 if (mmu_notifier_retry(kvm, mmu_seq))
1094 goto out_unlock;
1095
1096
1097
1098
1099
1100 if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
1101 if (fault_status == FSC_PERM && fault_granule > PAGE_SIZE)
1102 vma_pagesize = fault_granule;
1103 else
1104 vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
1105 hva, &pfn,
1106 &fault_ipa);
1107 }
1108
1109 if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
1110
1111 if (!shared)
1112 ret = sanitise_mte_tags(kvm, pfn, vma_pagesize);
1113 else
1114 ret = -EFAULT;
1115 if (ret)
1116 goto out_unlock;
1117 }
1118
1119 if (writable)
1120 prot |= KVM_PGTABLE_PROT_W;
1121
1122 if (exec_fault)
1123 prot |= KVM_PGTABLE_PROT_X;
1124
1125 if (device)
1126 prot |= KVM_PGTABLE_PROT_DEVICE;
1127 else if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))
1128 prot |= KVM_PGTABLE_PROT_X;
1129
1130
1131
1132
1133
1134
1135 if (fault_status == FSC_PERM && vma_pagesize == fault_granule) {
1136 ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
1137 } else {
1138 ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
1139 __pfn_to_phys(pfn), prot,
1140 memcache);
1141 }
1142
1143
1144 if (writable && !ret) {
1145 kvm_set_pfn_dirty(pfn);
1146 mark_page_dirty_in_slot(kvm, memslot, gfn);
1147 }
1148
1149out_unlock:
1150 spin_unlock(&kvm->mmu_lock);
1151 kvm_set_pfn_accessed(pfn);
1152 kvm_release_pfn_clean(pfn);
1153 return ret != -EAGAIN ? ret : 0;
1154}
1155
1156
1157static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
1158{
1159 pte_t pte;
1160 kvm_pte_t kpte;
1161 struct kvm_s2_mmu *mmu;
1162
1163 trace_kvm_access_fault(fault_ipa);
1164
1165 spin_lock(&vcpu->kvm->mmu_lock);
1166 mmu = vcpu->arch.hw_mmu;
1167 kpte = kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa);
1168 spin_unlock(&vcpu->kvm->mmu_lock);
1169
1170 pte = __pte(kpte);
1171 if (pte_valid(pte))
1172 kvm_set_pfn_accessed(pte_pfn(pte));
1173}
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
1187{
1188 unsigned long fault_status;
1189 phys_addr_t fault_ipa;
1190 struct kvm_memory_slot *memslot;
1191 unsigned long hva;
1192 bool is_iabt, write_fault, writable;
1193 gfn_t gfn;
1194 int ret, idx;
1195
1196 fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
1197
1198 fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
1199 is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
1200
1201
1202 if (kvm_vcpu_abt_issea(vcpu)) {
1203
1204
1205
1206
1207 if (kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_esr(vcpu)))
1208 kvm_inject_vabt(vcpu);
1209
1210 return 1;
1211 }
1212
1213 trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu),
1214 kvm_vcpu_get_hfar(vcpu), fault_ipa);
1215
1216
1217 if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
1218 fault_status != FSC_ACCESS) {
1219 kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
1220 kvm_vcpu_trap_get_class(vcpu),
1221 (unsigned long)kvm_vcpu_trap_get_fault(vcpu),
1222 (unsigned long)kvm_vcpu_get_esr(vcpu));
1223 return -EFAULT;
1224 }
1225
1226 idx = srcu_read_lock(&vcpu->kvm->srcu);
1227
1228 gfn = fault_ipa >> PAGE_SHIFT;
1229 memslot = gfn_to_memslot(vcpu->kvm, gfn);
1230 hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
1231 write_fault = kvm_is_write_fault(vcpu);
1232 if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
1233
1234
1235
1236
1237
1238
1239 if (is_iabt) {
1240 ret = -ENOEXEC;
1241 goto out;
1242 }
1243
1244 if (kvm_vcpu_abt_iss1tw(vcpu)) {
1245 kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
1246 ret = 1;
1247 goto out_unlock;
1248 }
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260 if (kvm_is_error_hva(hva) && kvm_vcpu_dabt_is_cm(vcpu)) {
1261 kvm_incr_pc(vcpu);
1262 ret = 1;
1263 goto out_unlock;
1264 }
1265
1266
1267
1268
1269
1270
1271
1272 fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
1273 ret = io_mem_abort(vcpu, fault_ipa);
1274 goto out_unlock;
1275 }
1276
1277
1278 VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm));
1279
1280 if (fault_status == FSC_ACCESS) {
1281 handle_access_fault(vcpu, fault_ipa);
1282 ret = 1;
1283 goto out_unlock;
1284 }
1285
1286 ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
1287 if (ret == 0)
1288 ret = 1;
1289out:
1290 if (ret == -ENOEXEC) {
1291 kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
1292 ret = 1;
1293 }
1294out_unlock:
1295 srcu_read_unlock(&vcpu->kvm->srcu, idx);
1296 return ret;
1297}
1298
1299bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
1300{
1301 if (!kvm->arch.mmu.pgt)
1302 return false;
1303
1304 __unmap_stage2_range(&kvm->arch.mmu, range->start << PAGE_SHIFT,
1305 (range->end - range->start) << PAGE_SHIFT,
1306 range->may_block);
1307
1308 return false;
1309}
1310
1311bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
1312{
1313 kvm_pfn_t pfn = pte_pfn(range->pte);
1314 int ret;
1315
1316 if (!kvm->arch.mmu.pgt)
1317 return false;
1318
1319 WARN_ON(range->end - range->start != 1);
1320
1321 ret = sanitise_mte_tags(kvm, pfn, PAGE_SIZE);
1322 if (ret)
1323 return false;
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335 kvm_pgtable_stage2_map(kvm->arch.mmu.pgt, range->start << PAGE_SHIFT,
1336 PAGE_SIZE, __pfn_to_phys(pfn),
1337 KVM_PGTABLE_PROT_R, NULL);
1338
1339 return false;
1340}
1341
1342bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
1343{
1344 u64 size = (range->end - range->start) << PAGE_SHIFT;
1345 kvm_pte_t kpte;
1346 pte_t pte;
1347
1348 if (!kvm->arch.mmu.pgt)
1349 return false;
1350
1351 WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
1352
1353 kpte = kvm_pgtable_stage2_mkold(kvm->arch.mmu.pgt,
1354 range->start << PAGE_SHIFT);
1355 pte = __pte(kpte);
1356 return pte_valid(pte) && pte_young(pte);
1357}
1358
1359bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
1360{
1361 if (!kvm->arch.mmu.pgt)
1362 return false;
1363
1364 return kvm_pgtable_stage2_is_young(kvm->arch.mmu.pgt,
1365 range->start << PAGE_SHIFT);
1366}
1367
1368phys_addr_t kvm_mmu_get_httbr(void)
1369{
1370 return __pa(hyp_pgtable->pgd);
1371}
1372
1373phys_addr_t kvm_get_idmap_vector(void)
1374{
1375 return hyp_idmap_vector;
1376}
1377
1378static int kvm_map_idmap_text(void)
1379{
1380 unsigned long size = hyp_idmap_end - hyp_idmap_start;
1381 int err = __create_hyp_mappings(hyp_idmap_start, size, hyp_idmap_start,
1382 PAGE_HYP_EXEC);
1383 if (err)
1384 kvm_err("Failed to idmap %lx-%lx\n",
1385 hyp_idmap_start, hyp_idmap_end);
1386
1387 return err;
1388}
1389
1390static void *kvm_hyp_zalloc_page(void *arg)
1391{
1392 return (void *)get_zeroed_page(GFP_KERNEL);
1393}
1394
1395static struct kvm_pgtable_mm_ops kvm_hyp_mm_ops = {
1396 .zalloc_page = kvm_hyp_zalloc_page,
1397 .get_page = kvm_host_get_page,
1398 .put_page = kvm_host_put_page,
1399 .phys_to_virt = kvm_host_va,
1400 .virt_to_phys = kvm_host_pa,
1401};
1402
1403int kvm_mmu_init(u32 *hyp_va_bits)
1404{
1405 int err;
1406
1407 hyp_idmap_start = __pa_symbol(__hyp_idmap_text_start);
1408 hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE);
1409 hyp_idmap_end = __pa_symbol(__hyp_idmap_text_end);
1410 hyp_idmap_end = ALIGN(hyp_idmap_end, PAGE_SIZE);
1411 hyp_idmap_vector = __pa_symbol(__kvm_hyp_init);
1412
1413
1414
1415
1416
1417 BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
1418
1419 *hyp_va_bits = 64 - ((idmap_t0sz & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET);
1420 kvm_debug("Using %u-bit virtual addresses at EL2\n", *hyp_va_bits);
1421 kvm_debug("IDMAP page: %lx\n", hyp_idmap_start);
1422 kvm_debug("HYP VA range: %lx:%lx\n",
1423 kern_hyp_va(PAGE_OFFSET),
1424 kern_hyp_va((unsigned long)high_memory - 1));
1425
1426 if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) &&
1427 hyp_idmap_start < kern_hyp_va((unsigned long)high_memory - 1) &&
1428 hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) {
1429
1430
1431
1432
1433 kvm_err("IDMAP intersecting with HYP VA, unable to continue\n");
1434 err = -EINVAL;
1435 goto out;
1436 }
1437
1438 hyp_pgtable = kzalloc(sizeof(*hyp_pgtable), GFP_KERNEL);
1439 if (!hyp_pgtable) {
1440 kvm_err("Hyp mode page-table not allocated\n");
1441 err = -ENOMEM;
1442 goto out;
1443 }
1444
1445 err = kvm_pgtable_hyp_init(hyp_pgtable, *hyp_va_bits, &kvm_hyp_mm_ops);
1446 if (err)
1447 goto out_free_pgtable;
1448
1449 err = kvm_map_idmap_text();
1450 if (err)
1451 goto out_destroy_pgtable;
1452
1453 io_map_base = hyp_idmap_start;
1454 return 0;
1455
1456out_destroy_pgtable:
1457 kvm_pgtable_hyp_destroy(hyp_pgtable);
1458out_free_pgtable:
1459 kfree(hyp_pgtable);
1460 hyp_pgtable = NULL;
1461out:
1462 return err;
1463}
1464
1465void kvm_arch_commit_memory_region(struct kvm *kvm,
1466 const struct kvm_userspace_memory_region *mem,
1467 struct kvm_memory_slot *old,
1468 const struct kvm_memory_slot *new,
1469 enum kvm_mr_change change)
1470{
1471
1472
1473
1474
1475
1476 if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
1477
1478
1479
1480
1481
1482 if (!kvm_dirty_log_manual_protect_and_init_set(kvm)) {
1483 kvm_mmu_wp_memory_region(kvm, mem->slot);
1484 }
1485 }
1486}
1487
1488int kvm_arch_prepare_memory_region(struct kvm *kvm,
1489 struct kvm_memory_slot *memslot,
1490 const struct kvm_userspace_memory_region *mem,
1491 enum kvm_mr_change change)
1492{
1493 hva_t hva = mem->userspace_addr;
1494 hva_t reg_end = hva + mem->memory_size;
1495 int ret = 0;
1496
1497 if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
1498 change != KVM_MR_FLAGS_ONLY)
1499 return 0;
1500
1501
1502
1503
1504
1505 if ((memslot->base_gfn + memslot->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT))
1506 return -EFAULT;
1507
1508 mmap_read_lock(current->mm);
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520 do {
1521 struct vm_area_struct *vma;
1522
1523 vma = find_vma_intersection(current->mm, hva, reg_end);
1524 if (!vma)
1525 break;
1526
1527
1528
1529
1530
1531
1532 if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED) {
1533 ret = -EINVAL;
1534 break;
1535 }
1536
1537 if (vma->vm_flags & VM_PFNMAP) {
1538
1539 if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
1540 ret = -EINVAL;
1541 break;
1542 }
1543 }
1544 hva = min(reg_end, vma->vm_end);
1545 } while (hva < reg_end);
1546
1547 mmap_read_unlock(current->mm);
1548 return ret;
1549}
1550
1551void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
1552{
1553}
1554
1555void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
1556{
1557}
1558
1559void kvm_arch_flush_shadow_all(struct kvm *kvm)
1560{
1561 kvm_free_stage2_pgd(&kvm->arch.mmu);
1562}
1563
1564void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
1565 struct kvm_memory_slot *slot)
1566{
1567 gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
1568 phys_addr_t size = slot->npages << PAGE_SHIFT;
1569
1570 spin_lock(&kvm->mmu_lock);
1571 unmap_stage2_range(&kvm->arch.mmu, gpa, size);
1572 spin_unlock(&kvm->mmu_lock);
1573}
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603void kvm_set_way_flush(struct kvm_vcpu *vcpu)
1604{
1605 unsigned long hcr = *vcpu_hcr(vcpu);
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616 if (!(hcr & HCR_TVM)) {
1617 trace_kvm_set_way_flush(*vcpu_pc(vcpu),
1618 vcpu_has_cache_enabled(vcpu));
1619 stage2_flush_vm(vcpu->kvm);
1620 *vcpu_hcr(vcpu) = hcr | HCR_TVM;
1621 }
1622}
1623
1624void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled)
1625{
1626 bool now_enabled = vcpu_has_cache_enabled(vcpu);
1627
1628
1629
1630
1631
1632
1633 if (now_enabled != was_enabled)
1634 stage2_flush_vm(vcpu->kvm);
1635
1636
1637 if (now_enabled)
1638 *vcpu_hcr(vcpu) &= ~HCR_TVM;
1639
1640 trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled);
1641}
1642