1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/mman.h>
20#include <linux/kvm_host.h>
21#include <linux/io.h>
22#include <linux/hugetlb.h>
23#include <trace/events/kvm.h>
24#include <asm/pgalloc.h>
25#include <asm/cacheflush.h>
26#include <asm/kvm_arm.h>
27#include <asm/kvm_mmu.h>
28#include <asm/kvm_mmio.h>
29#include <asm/kvm_asm.h>
30#include <asm/kvm_emulate.h>
31#include <asm/virt.h>
32
33#include "trace.h"
34
35extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
36
37static pgd_t *boot_hyp_pgd;
38static pgd_t *hyp_pgd;
39static pgd_t *merged_hyp_pgd;
40static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
41
42static unsigned long hyp_idmap_start;
43static unsigned long hyp_idmap_end;
44static phys_addr_t hyp_idmap_vector;
45
46#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
47
48#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x))
49#define kvm_pud_huge(_x) pud_huge(_x)
50
51#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0)
52#define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1)
53
54static bool memslot_is_logging(struct kvm_memory_slot *memslot)
55{
56 return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
57}
58
59
60
61
62
63
64
65void kvm_flush_remote_tlbs(struct kvm *kvm)
66{
67 kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
68}
69
70static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
71{
72
73
74
75
76
77
78 if (kvm)
79 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
80}
81
82
83
84
85
86
87static void kvm_flush_dcache_pte(pte_t pte)
88{
89 __kvm_flush_dcache_pte(pte);
90}
91
92static void kvm_flush_dcache_pmd(pmd_t pmd)
93{
94 __kvm_flush_dcache_pmd(pmd);
95}
96
97static void kvm_flush_dcache_pud(pud_t pud)
98{
99 __kvm_flush_dcache_pud(pud);
100}
101
102static bool kvm_is_device_pfn(unsigned long pfn)
103{
104 return !pfn_valid(pfn);
105}
106
107
108
109
110
111
112
113
114
115
116static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
117{
118 if (!kvm_pmd_huge(*pmd))
119 return;
120
121 pmd_clear(pmd);
122 kvm_tlb_flush_vmid_ipa(kvm, addr);
123 put_page(virt_to_page(pmd));
124}
125
126static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
127 int min, int max)
128{
129 void *page;
130
131 BUG_ON(max > KVM_NR_MEM_OBJS);
132 if (cache->nobjs >= min)
133 return 0;
134 while (cache->nobjs < max) {
135 page = (void *)__get_free_page(PGALLOC_GFP);
136 if (!page)
137 return -ENOMEM;
138 cache->objects[cache->nobjs++] = page;
139 }
140 return 0;
141}
142
143static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
144{
145 while (mc->nobjs)
146 free_page((unsigned long)mc->objects[--mc->nobjs]);
147}
148
149static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
150{
151 void *p;
152
153 BUG_ON(!mc || !mc->nobjs);
154 p = mc->objects[--mc->nobjs];
155 return p;
156}
157
158static void clear_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
159{
160 pud_t *pud_table __maybe_unused = pud_offset(pgd, 0);
161 pgd_clear(pgd);
162 kvm_tlb_flush_vmid_ipa(kvm, addr);
163 pud_free(NULL, pud_table);
164 put_page(virt_to_page(pgd));
165}
166
167static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
168{
169 pmd_t *pmd_table = pmd_offset(pud, 0);
170 VM_BUG_ON(pud_huge(*pud));
171 pud_clear(pud);
172 kvm_tlb_flush_vmid_ipa(kvm, addr);
173 pmd_free(NULL, pmd_table);
174 put_page(virt_to_page(pud));
175}
176
177static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
178{
179 pte_t *pte_table = pte_offset_kernel(pmd, 0);
180 VM_BUG_ON(kvm_pmd_huge(*pmd));
181 pmd_clear(pmd);
182 kvm_tlb_flush_vmid_ipa(kvm, addr);
183 pte_free_kernel(NULL, pte_table);
184 put_page(virt_to_page(pmd));
185}
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
208 phys_addr_t addr, phys_addr_t end)
209{
210 phys_addr_t start_addr = addr;
211 pte_t *pte, *start_pte;
212
213 start_pte = pte = pte_offset_kernel(pmd, addr);
214 do {
215 if (!pte_none(*pte)) {
216 pte_t old_pte = *pte;
217
218 kvm_set_pte(pte, __pte(0));
219 kvm_tlb_flush_vmid_ipa(kvm, addr);
220
221
222 if (!kvm_is_device_pfn(pte_pfn(old_pte)))
223 kvm_flush_dcache_pte(old_pte);
224
225 put_page(virt_to_page(pte));
226 }
227 } while (pte++, addr += PAGE_SIZE, addr != end);
228
229 if (kvm_pte_table_empty(kvm, start_pte))
230 clear_pmd_entry(kvm, pmd, start_addr);
231}
232
233static void unmap_pmds(struct kvm *kvm, pud_t *pud,
234 phys_addr_t addr, phys_addr_t end)
235{
236 phys_addr_t next, start_addr = addr;
237 pmd_t *pmd, *start_pmd;
238
239 start_pmd = pmd = pmd_offset(pud, addr);
240 do {
241 next = kvm_pmd_addr_end(addr, end);
242 if (!pmd_none(*pmd)) {
243 if (kvm_pmd_huge(*pmd)) {
244 pmd_t old_pmd = *pmd;
245
246 pmd_clear(pmd);
247 kvm_tlb_flush_vmid_ipa(kvm, addr);
248
249 kvm_flush_dcache_pmd(old_pmd);
250
251 put_page(virt_to_page(pmd));
252 } else {
253 unmap_ptes(kvm, pmd, addr, next);
254 }
255 }
256 } while (pmd++, addr = next, addr != end);
257
258 if (kvm_pmd_table_empty(kvm, start_pmd))
259 clear_pud_entry(kvm, pud, start_addr);
260}
261
262static void unmap_puds(struct kvm *kvm, pgd_t *pgd,
263 phys_addr_t addr, phys_addr_t end)
264{
265 phys_addr_t next, start_addr = addr;
266 pud_t *pud, *start_pud;
267
268 start_pud = pud = pud_offset(pgd, addr);
269 do {
270 next = kvm_pud_addr_end(addr, end);
271 if (!pud_none(*pud)) {
272 if (pud_huge(*pud)) {
273 pud_t old_pud = *pud;
274
275 pud_clear(pud);
276 kvm_tlb_flush_vmid_ipa(kvm, addr);
277
278 kvm_flush_dcache_pud(old_pud);
279
280 put_page(virt_to_page(pud));
281 } else {
282 unmap_pmds(kvm, pud, addr, next);
283 }
284 }
285 } while (pud++, addr = next, addr != end);
286
287 if (kvm_pud_table_empty(kvm, start_pud))
288 clear_pgd_entry(kvm, pgd, start_addr);
289}
290
291
292static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
293 phys_addr_t start, u64 size)
294{
295 pgd_t *pgd;
296 phys_addr_t addr = start, end = start + size;
297 phys_addr_t next;
298
299 pgd = pgdp + kvm_pgd_index(addr);
300 do {
301 next = kvm_pgd_addr_end(addr, end);
302 if (!pgd_none(*pgd))
303 unmap_puds(kvm, pgd, addr, next);
304 } while (pgd++, addr = next, addr != end);
305}
306
307static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
308 phys_addr_t addr, phys_addr_t end)
309{
310 pte_t *pte;
311
312 pte = pte_offset_kernel(pmd, addr);
313 do {
314 if (!pte_none(*pte) && !kvm_is_device_pfn(pte_pfn(*pte)))
315 kvm_flush_dcache_pte(*pte);
316 } while (pte++, addr += PAGE_SIZE, addr != end);
317}
318
319static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
320 phys_addr_t addr, phys_addr_t end)
321{
322 pmd_t *pmd;
323 phys_addr_t next;
324
325 pmd = pmd_offset(pud, addr);
326 do {
327 next = kvm_pmd_addr_end(addr, end);
328 if (!pmd_none(*pmd)) {
329 if (kvm_pmd_huge(*pmd))
330 kvm_flush_dcache_pmd(*pmd);
331 else
332 stage2_flush_ptes(kvm, pmd, addr, next);
333 }
334 } while (pmd++, addr = next, addr != end);
335}
336
337static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
338 phys_addr_t addr, phys_addr_t end)
339{
340 pud_t *pud;
341 phys_addr_t next;
342
343 pud = pud_offset(pgd, addr);
344 do {
345 next = kvm_pud_addr_end(addr, end);
346 if (!pud_none(*pud)) {
347 if (pud_huge(*pud))
348 kvm_flush_dcache_pud(*pud);
349 else
350 stage2_flush_pmds(kvm, pud, addr, next);
351 }
352 } while (pud++, addr = next, addr != end);
353}
354
355static void stage2_flush_memslot(struct kvm *kvm,
356 struct kvm_memory_slot *memslot)
357{
358 phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
359 phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
360 phys_addr_t next;
361 pgd_t *pgd;
362
363 pgd = kvm->arch.pgd + kvm_pgd_index(addr);
364 do {
365 next = kvm_pgd_addr_end(addr, end);
366 stage2_flush_puds(kvm, pgd, addr, next);
367 } while (pgd++, addr = next, addr != end);
368}
369
370
371
372
373
374
375
376
377static void stage2_flush_vm(struct kvm *kvm)
378{
379 struct kvm_memslots *slots;
380 struct kvm_memory_slot *memslot;
381 int idx;
382
383 idx = srcu_read_lock(&kvm->srcu);
384 spin_lock(&kvm->mmu_lock);
385
386 slots = kvm_memslots(kvm);
387 kvm_for_each_memslot(memslot, slots)
388 stage2_flush_memslot(kvm, memslot);
389
390 spin_unlock(&kvm->mmu_lock);
391 srcu_read_unlock(&kvm->srcu, idx);
392}
393
394
395
396
397
398
399void free_boot_hyp_pgd(void)
400{
401 mutex_lock(&kvm_hyp_pgd_mutex);
402
403 if (boot_hyp_pgd) {
404 unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
405 unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
406 free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
407 boot_hyp_pgd = NULL;
408 }
409
410 if (hyp_pgd)
411 unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
412
413 mutex_unlock(&kvm_hyp_pgd_mutex);
414}
415
416
417
418
419
420
421
422
423
424
425
426void free_hyp_pgds(void)
427{
428 unsigned long addr;
429
430 free_boot_hyp_pgd();
431
432 mutex_lock(&kvm_hyp_pgd_mutex);
433
434 if (hyp_pgd) {
435 for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
436 unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
437 for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
438 unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
439
440 free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
441 hyp_pgd = NULL;
442 }
443 if (merged_hyp_pgd) {
444 clear_page(merged_hyp_pgd);
445 free_page((unsigned long)merged_hyp_pgd);
446 merged_hyp_pgd = NULL;
447 }
448
449 mutex_unlock(&kvm_hyp_pgd_mutex);
450}
451
452static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
453 unsigned long end, unsigned long pfn,
454 pgprot_t prot)
455{
456 pte_t *pte;
457 unsigned long addr;
458
459 addr = start;
460 do {
461 pte = pte_offset_kernel(pmd, addr);
462 kvm_set_pte(pte, pfn_pte(pfn, prot));
463 get_page(virt_to_page(pte));
464 kvm_flush_dcache_to_poc(pte, sizeof(*pte));
465 pfn++;
466 } while (addr += PAGE_SIZE, addr != end);
467}
468
469static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
470 unsigned long end, unsigned long pfn,
471 pgprot_t prot)
472{
473 pmd_t *pmd;
474 pte_t *pte;
475 unsigned long addr, next;
476
477 addr = start;
478 do {
479 pmd = pmd_offset(pud, addr);
480
481 BUG_ON(pmd_sect(*pmd));
482
483 if (pmd_none(*pmd)) {
484 pte = pte_alloc_one_kernel(NULL, addr);
485 if (!pte) {
486 kvm_err("Cannot allocate Hyp pte\n");
487 return -ENOMEM;
488 }
489 pmd_populate_kernel(NULL, pmd, pte);
490 get_page(virt_to_page(pmd));
491 kvm_flush_dcache_to_poc(pmd, sizeof(*pmd));
492 }
493
494 next = pmd_addr_end(addr, end);
495
496 create_hyp_pte_mappings(pmd, addr, next, pfn, prot);
497 pfn += (next - addr) >> PAGE_SHIFT;
498 } while (addr = next, addr != end);
499
500 return 0;
501}
502
503static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start,
504 unsigned long end, unsigned long pfn,
505 pgprot_t prot)
506{
507 pud_t *pud;
508 pmd_t *pmd;
509 unsigned long addr, next;
510 int ret;
511
512 addr = start;
513 do {
514 pud = pud_offset(pgd, addr);
515
516 if (pud_none_or_clear_bad(pud)) {
517 pmd = pmd_alloc_one(NULL, addr);
518 if (!pmd) {
519 kvm_err("Cannot allocate Hyp pmd\n");
520 return -ENOMEM;
521 }
522 pud_populate(NULL, pud, pmd);
523 get_page(virt_to_page(pud));
524 kvm_flush_dcache_to_poc(pud, sizeof(*pud));
525 }
526
527 next = pud_addr_end(addr, end);
528 ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
529 if (ret)
530 return ret;
531 pfn += (next - addr) >> PAGE_SHIFT;
532 } while (addr = next, addr != end);
533
534 return 0;
535}
536
537static int __create_hyp_mappings(pgd_t *pgdp,
538 unsigned long start, unsigned long end,
539 unsigned long pfn, pgprot_t prot)
540{
541 pgd_t *pgd;
542 pud_t *pud;
543 unsigned long addr, next;
544 int err = 0;
545
546 mutex_lock(&kvm_hyp_pgd_mutex);
547 addr = start & PAGE_MASK;
548 end = PAGE_ALIGN(end);
549 do {
550 pgd = pgdp + pgd_index(addr);
551
552 if (pgd_none(*pgd)) {
553 pud = pud_alloc_one(NULL, addr);
554 if (!pud) {
555 kvm_err("Cannot allocate Hyp pud\n");
556 err = -ENOMEM;
557 goto out;
558 }
559 pgd_populate(NULL, pgd, pud);
560 get_page(virt_to_page(pgd));
561 kvm_flush_dcache_to_poc(pgd, sizeof(*pgd));
562 }
563
564 next = pgd_addr_end(addr, end);
565 err = create_hyp_pud_mappings(pgd, addr, next, pfn, prot);
566 if (err)
567 goto out;
568 pfn += (next - addr) >> PAGE_SHIFT;
569 } while (addr = next, addr != end);
570out:
571 mutex_unlock(&kvm_hyp_pgd_mutex);
572 return err;
573}
574
575static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
576{
577 if (!is_vmalloc_addr(kaddr)) {
578 BUG_ON(!virt_addr_valid(kaddr));
579 return __pa(kaddr);
580 } else {
581 return page_to_phys(vmalloc_to_page(kaddr)) +
582 offset_in_page(kaddr);
583 }
584}
585
586
587
588
589
590
591
592
593
594
595int create_hyp_mappings(void *from, void *to)
596{
597 phys_addr_t phys_addr;
598 unsigned long virt_addr;
599 unsigned long start = KERN_TO_HYP((unsigned long)from);
600 unsigned long end = KERN_TO_HYP((unsigned long)to);
601
602 if (is_kernel_in_hyp_mode())
603 return 0;
604
605 start = start & PAGE_MASK;
606 end = PAGE_ALIGN(end);
607
608 for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) {
609 int err;
610
611 phys_addr = kvm_kaddr_to_phys(from + virt_addr - start);
612 err = __create_hyp_mappings(hyp_pgd, virt_addr,
613 virt_addr + PAGE_SIZE,
614 __phys_to_pfn(phys_addr),
615 PAGE_HYP);
616 if (err)
617 return err;
618 }
619
620 return 0;
621}
622
623
624
625
626
627
628
629
630
631
632int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
633{
634 unsigned long start = KERN_TO_HYP((unsigned long)from);
635 unsigned long end = KERN_TO_HYP((unsigned long)to);
636
637 if (is_kernel_in_hyp_mode())
638 return 0;
639
640
641 if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))
642 return -EINVAL;
643
644 return __create_hyp_mappings(hyp_pgd, start, end,
645 __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
646}
647
648
649static void kvm_free_hwpgd(void *hwpgd)
650{
651 free_pages_exact(hwpgd, kvm_get_hwpgd_size());
652}
653
654
655static void *kvm_alloc_hwpgd(void)
656{
657 unsigned int size = kvm_get_hwpgd_size();
658
659 return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
660}
661
662
663
664
665
666
667
668
669
670
671
672
673int kvm_alloc_stage2_pgd(struct kvm *kvm)
674{
675 pgd_t *pgd;
676 void *hwpgd;
677
678 if (kvm->arch.pgd != NULL) {
679 kvm_err("kvm_arch already initialized?\n");
680 return -EINVAL;
681 }
682
683 hwpgd = kvm_alloc_hwpgd();
684 if (!hwpgd)
685 return -ENOMEM;
686
687
688
689
690
691
692
693
694
695
696
697 if (KVM_PREALLOC_LEVEL > 0) {
698 int i;
699
700
701
702
703
704
705 pgd = kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
706 GFP_KERNEL | __GFP_ZERO);
707
708 if (!pgd) {
709 kvm_free_hwpgd(hwpgd);
710 return -ENOMEM;
711 }
712
713
714 for (i = 0; i < PTRS_PER_S2_PGD; i++) {
715 if (KVM_PREALLOC_LEVEL == 1)
716 pgd_populate(NULL, pgd + i,
717 (pud_t *)hwpgd + i * PTRS_PER_PUD);
718 else if (KVM_PREALLOC_LEVEL == 2)
719 pud_populate(NULL, pud_offset(pgd, 0) + i,
720 (pmd_t *)hwpgd + i * PTRS_PER_PMD);
721 }
722 } else {
723
724
725
726
727 pgd = (pgd_t *)hwpgd;
728 }
729
730 kvm_clean_pgd(pgd);
731 kvm->arch.pgd = pgd;
732 return 0;
733}
734
735
736
737
738
739
740
741
742
743
744
745
746static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
747{
748 unmap_range(kvm, kvm->arch.pgd, start, size);
749}
750
751static void stage2_unmap_memslot(struct kvm *kvm,
752 struct kvm_memory_slot *memslot)
753{
754 hva_t hva = memslot->userspace_addr;
755 phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
756 phys_addr_t size = PAGE_SIZE * memslot->npages;
757 hva_t reg_end = hva + size;
758
759
760
761
762
763
764
765
766
767
768
769
770
771 do {
772 struct vm_area_struct *vma = find_vma(current->mm, hva);
773 hva_t vm_start, vm_end;
774
775 if (!vma || vma->vm_start >= reg_end)
776 break;
777
778
779
780
781 vm_start = max(hva, vma->vm_start);
782 vm_end = min(reg_end, vma->vm_end);
783
784 if (!(vma->vm_flags & VM_PFNMAP)) {
785 gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
786 unmap_stage2_range(kvm, gpa, vm_end - vm_start);
787 }
788 hva = vm_end;
789 } while (hva < reg_end);
790}
791
792
793
794
795
796
797
798
799void stage2_unmap_vm(struct kvm *kvm)
800{
801 struct kvm_memslots *slots;
802 struct kvm_memory_slot *memslot;
803 int idx;
804
805 idx = srcu_read_lock(&kvm->srcu);
806 spin_lock(&kvm->mmu_lock);
807
808 slots = kvm_memslots(kvm);
809 kvm_for_each_memslot(memslot, slots)
810 stage2_unmap_memslot(kvm, memslot);
811
812 spin_unlock(&kvm->mmu_lock);
813 srcu_read_unlock(&kvm->srcu, idx);
814}
815
816
817
818
819
820
821
822
823
824
825
826
827void kvm_free_stage2_pgd(struct kvm *kvm)
828{
829 if (kvm->arch.pgd == NULL)
830 return;
831
832 unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
833 kvm_free_hwpgd(kvm_get_hwpgd(kvm));
834 if (KVM_PREALLOC_LEVEL > 0)
835 kfree(kvm->arch.pgd);
836
837 kvm->arch.pgd = NULL;
838}
839
840static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
841 phys_addr_t addr)
842{
843 pgd_t *pgd;
844 pud_t *pud;
845
846 pgd = kvm->arch.pgd + kvm_pgd_index(addr);
847 if (WARN_ON(pgd_none(*pgd))) {
848 if (!cache)
849 return NULL;
850 pud = mmu_memory_cache_alloc(cache);
851 pgd_populate(NULL, pgd, pud);
852 get_page(virt_to_page(pgd));
853 }
854
855 return pud_offset(pgd, addr);
856}
857
858static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
859 phys_addr_t addr)
860{
861 pud_t *pud;
862 pmd_t *pmd;
863
864 pud = stage2_get_pud(kvm, cache, addr);
865 if (pud_none(*pud)) {
866 if (!cache)
867 return NULL;
868 pmd = mmu_memory_cache_alloc(cache);
869 pud_populate(NULL, pud, pmd);
870 get_page(virt_to_page(pud));
871 }
872
873 return pmd_offset(pud, addr);
874}
875
876static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
877 *cache, phys_addr_t addr, const pmd_t *new_pmd)
878{
879 pmd_t *pmd, old_pmd;
880
881 pmd = stage2_get_pmd(kvm, cache, addr);
882 VM_BUG_ON(!pmd);
883
884
885
886
887
888
889
890
891
892
893 VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
894
895 old_pmd = *pmd;
896 kvm_set_pmd(pmd, *new_pmd);
897 if (pmd_present(old_pmd))
898 kvm_tlb_flush_vmid_ipa(kvm, addr);
899 else
900 get_page(virt_to_page(pmd));
901 return 0;
902}
903
904static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
905 phys_addr_t addr, const pte_t *new_pte,
906 unsigned long flags)
907{
908 pmd_t *pmd;
909 pte_t *pte, old_pte;
910 bool iomap = flags & KVM_S2PTE_FLAG_IS_IOMAP;
911 bool logging_active = flags & KVM_S2_FLAG_LOGGING_ACTIVE;
912
913 VM_BUG_ON(logging_active && !cache);
914
915
916 pmd = stage2_get_pmd(kvm, cache, addr);
917 if (!pmd) {
918
919
920
921
922 return 0;
923 }
924
925
926
927
928
929 if (logging_active)
930 stage2_dissolve_pmd(kvm, addr, pmd);
931
932
933 if (pmd_none(*pmd)) {
934 if (!cache)
935 return 0;
936 pte = mmu_memory_cache_alloc(cache);
937 kvm_clean_pte(pte);
938 pmd_populate_kernel(NULL, pmd, pte);
939 get_page(virt_to_page(pmd));
940 }
941
942 pte = pte_offset_kernel(pmd, addr);
943
944 if (iomap && pte_present(*pte))
945 return -EFAULT;
946
947
948 old_pte = *pte;
949 kvm_set_pte(pte, *new_pte);
950 if (pte_present(old_pte))
951 kvm_tlb_flush_vmid_ipa(kvm, addr);
952 else
953 get_page(virt_to_page(pte));
954
955 return 0;
956}
957
958
959
960
961
962
963
964
965
966int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
967 phys_addr_t pa, unsigned long size, bool writable)
968{
969 phys_addr_t addr, end;
970 int ret = 0;
971 unsigned long pfn;
972 struct kvm_mmu_memory_cache cache = { 0, };
973
974 end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK;
975 pfn = __phys_to_pfn(pa);
976
977 for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
978 pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
979
980 if (writable)
981 kvm_set_s2pte_writable(&pte);
982
983 ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES,
984 KVM_NR_MEM_OBJS);
985 if (ret)
986 goto out;
987 spin_lock(&kvm->mmu_lock);
988 ret = stage2_set_pte(kvm, &cache, addr, &pte,
989 KVM_S2PTE_FLAG_IS_IOMAP);
990 spin_unlock(&kvm->mmu_lock);
991 if (ret)
992 goto out;
993
994 pfn++;
995 }
996
997out:
998 mmu_free_memory_cache(&cache);
999 return ret;
1000}
1001
1002static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap)
1003{
1004 kvm_pfn_t pfn = *pfnp;
1005 gfn_t gfn = *ipap >> PAGE_SHIFT;
1006
1007 if (PageTransCompoundMap(pfn_to_page(pfn))) {
1008 unsigned long mask;
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027 mask = PTRS_PER_PMD - 1;
1028 VM_BUG_ON((gfn & mask) != (pfn & mask));
1029 if (pfn & mask) {
1030 *ipap &= PMD_MASK;
1031 kvm_release_pfn_clean(pfn);
1032 pfn &= ~mask;
1033 kvm_get_pfn(pfn);
1034 *pfnp = pfn;
1035 }
1036
1037 return true;
1038 }
1039
1040 return false;
1041}
1042
1043static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
1044{
1045 if (kvm_vcpu_trap_is_iabt(vcpu))
1046 return false;
1047
1048 return kvm_vcpu_dabt_iswrite(vcpu);
1049}
1050
1051
1052
1053
1054
1055
1056
1057static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
1058{
1059 pte_t *pte;
1060
1061 pte = pte_offset_kernel(pmd, addr);
1062 do {
1063 if (!pte_none(*pte)) {
1064 if (!kvm_s2pte_readonly(pte))
1065 kvm_set_s2pte_readonly(pte);
1066 }
1067 } while (pte++, addr += PAGE_SIZE, addr != end);
1068}
1069
1070
1071
1072
1073
1074
1075
1076static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
1077{
1078 pmd_t *pmd;
1079 phys_addr_t next;
1080
1081 pmd = pmd_offset(pud, addr);
1082
1083 do {
1084 next = kvm_pmd_addr_end(addr, end);
1085 if (!pmd_none(*pmd)) {
1086 if (kvm_pmd_huge(*pmd)) {
1087 if (!kvm_s2pmd_readonly(pmd))
1088 kvm_set_s2pmd_readonly(pmd);
1089 } else {
1090 stage2_wp_ptes(pmd, addr, next);
1091 }
1092 }
1093 } while (pmd++, addr = next, addr != end);
1094}
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104static void stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
1105{
1106 pud_t *pud;
1107 phys_addr_t next;
1108
1109 pud = pud_offset(pgd, addr);
1110 do {
1111 next = kvm_pud_addr_end(addr, end);
1112 if (!pud_none(*pud)) {
1113
1114 BUG_ON(kvm_pud_huge(*pud));
1115 stage2_wp_pmds(pud, addr, next);
1116 }
1117 } while (pud++, addr = next, addr != end);
1118}
1119
1120
1121
1122
1123
1124
1125
1126static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
1127{
1128 pgd_t *pgd;
1129 phys_addr_t next;
1130
1131 pgd = kvm->arch.pgd + kvm_pgd_index(addr);
1132 do {
1133
1134
1135
1136
1137
1138
1139
1140 if (need_resched() || spin_needbreak(&kvm->mmu_lock))
1141 cond_resched_lock(&kvm->mmu_lock);
1142
1143 next = kvm_pgd_addr_end(addr, end);
1144 if (pgd_present(*pgd))
1145 stage2_wp_puds(pgd, addr, next);
1146 } while (pgd++, addr = next, addr != end);
1147}
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
1163{
1164 struct kvm_memslots *slots = kvm_memslots(kvm);
1165 struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
1166 phys_addr_t start = memslot->base_gfn << PAGE_SHIFT;
1167 phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
1168
1169 spin_lock(&kvm->mmu_lock);
1170 stage2_wp_range(kvm, start, end);
1171 spin_unlock(&kvm->mmu_lock);
1172 kvm_flush_remote_tlbs(kvm);
1173}
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
1187 struct kvm_memory_slot *slot,
1188 gfn_t gfn_offset, unsigned long mask)
1189{
1190 phys_addr_t base_gfn = slot->base_gfn + gfn_offset;
1191 phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT;
1192 phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
1193
1194 stage2_wp_range(kvm, start, end);
1195}
1196
1197
1198
1199
1200
1201
1202
1203
1204void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
1205 struct kvm_memory_slot *slot,
1206 gfn_t gfn_offset, unsigned long mask)
1207{
1208 kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
1209}
1210
1211static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn,
1212 unsigned long size, bool uncached)
1213{
1214 __coherent_cache_guest_page(vcpu, pfn, size, uncached);
1215}
1216
1217static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
1218 struct kvm_memory_slot *memslot, unsigned long hva,
1219 unsigned long fault_status)
1220{
1221 int ret;
1222 bool write_fault, writable, hugetlb = false, force_pte = false;
1223 unsigned long mmu_seq;
1224 gfn_t gfn = fault_ipa >> PAGE_SHIFT;
1225 struct kvm *kvm = vcpu->kvm;
1226 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
1227 struct vm_area_struct *vma;
1228 kvm_pfn_t pfn;
1229 pgprot_t mem_type = PAGE_S2;
1230 bool fault_ipa_uncached;
1231 bool logging_active = memslot_is_logging(memslot);
1232 unsigned long flags = 0;
1233
1234 write_fault = kvm_is_write_fault(vcpu);
1235 if (fault_status == FSC_PERM && !write_fault) {
1236 kvm_err("Unexpected L2 read permission error\n");
1237 return -EFAULT;
1238 }
1239
1240
1241 down_read(¤t->mm->mmap_sem);
1242 vma = find_vma_intersection(current->mm, hva, hva + 1);
1243 if (unlikely(!vma)) {
1244 kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
1245 up_read(¤t->mm->mmap_sem);
1246 return -EFAULT;
1247 }
1248
1249 if (is_vm_hugetlb_page(vma) && !logging_active) {
1250 hugetlb = true;
1251 gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
1252 } else {
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262 if ((memslot->userspace_addr & ~PMD_MASK) !=
1263 ((memslot->base_gfn << PAGE_SHIFT) & ~PMD_MASK))
1264 force_pte = true;
1265 }
1266 up_read(¤t->mm->mmap_sem);
1267
1268
1269 ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
1270 KVM_NR_MEM_OBJS);
1271 if (ret)
1272 return ret;
1273
1274 mmu_seq = vcpu->kvm->mmu_notifier_seq;
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284 smp_rmb();
1285
1286 pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
1287 if (is_error_pfn(pfn))
1288 return -EFAULT;
1289
1290 if (kvm_is_device_pfn(pfn)) {
1291 mem_type = PAGE_S2_DEVICE;
1292 flags |= KVM_S2PTE_FLAG_IS_IOMAP;
1293 } else if (logging_active) {
1294
1295
1296
1297
1298
1299 force_pte = true;
1300 flags |= KVM_S2_FLAG_LOGGING_ACTIVE;
1301
1302
1303
1304
1305
1306 if (!write_fault)
1307 writable = false;
1308 }
1309
1310 spin_lock(&kvm->mmu_lock);
1311 if (mmu_notifier_retry(kvm, mmu_seq))
1312 goto out_unlock;
1313
1314 if (!hugetlb && !force_pte)
1315 hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
1316
1317 fault_ipa_uncached = memslot->flags & KVM_MEMSLOT_INCOHERENT;
1318
1319 if (hugetlb) {
1320 pmd_t new_pmd = pfn_pmd(pfn, mem_type);
1321 new_pmd = pmd_mkhuge(new_pmd);
1322 if (writable) {
1323 kvm_set_s2pmd_writable(&new_pmd);
1324 kvm_set_pfn_dirty(pfn);
1325 }
1326 coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached);
1327 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
1328 } else {
1329 pte_t new_pte = pfn_pte(pfn, mem_type);
1330
1331 if (writable) {
1332 kvm_set_s2pte_writable(&new_pte);
1333 kvm_set_pfn_dirty(pfn);
1334 mark_page_dirty(kvm, gfn);
1335 }
1336 coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached);
1337 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags);
1338 }
1339
1340out_unlock:
1341 spin_unlock(&kvm->mmu_lock);
1342 kvm_set_pfn_accessed(pfn);
1343 kvm_release_pfn_clean(pfn);
1344 return ret;
1345}
1346
1347
1348
1349
1350
1351
1352static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
1353{
1354 pmd_t *pmd;
1355 pte_t *pte;
1356 kvm_pfn_t pfn;
1357 bool pfn_valid = false;
1358
1359 trace_kvm_access_fault(fault_ipa);
1360
1361 spin_lock(&vcpu->kvm->mmu_lock);
1362
1363 pmd = stage2_get_pmd(vcpu->kvm, NULL, fault_ipa);
1364 if (!pmd || pmd_none(*pmd))
1365 goto out;
1366
1367 if (kvm_pmd_huge(*pmd)) {
1368 *pmd = pmd_mkyoung(*pmd);
1369 pfn = pmd_pfn(*pmd);
1370 pfn_valid = true;
1371 goto out;
1372 }
1373
1374 pte = pte_offset_kernel(pmd, fault_ipa);
1375 if (pte_none(*pte))
1376 goto out;
1377
1378 *pte = pte_mkyoung(*pte);
1379 pfn = pte_pfn(*pte);
1380 pfn_valid = true;
1381out:
1382 spin_unlock(&vcpu->kvm->mmu_lock);
1383 if (pfn_valid)
1384 kvm_set_pfn_accessed(pfn);
1385}
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
1400{
1401 unsigned long fault_status;
1402 phys_addr_t fault_ipa;
1403 struct kvm_memory_slot *memslot;
1404 unsigned long hva;
1405 bool is_iabt, write_fault, writable;
1406 gfn_t gfn;
1407 int ret, idx;
1408
1409 is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
1410 fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
1411
1412 trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
1413 kvm_vcpu_get_hfar(vcpu), fault_ipa);
1414
1415
1416 fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
1417 if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
1418 fault_status != FSC_ACCESS) {
1419 kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
1420 kvm_vcpu_trap_get_class(vcpu),
1421 (unsigned long)kvm_vcpu_trap_get_fault(vcpu),
1422 (unsigned long)kvm_vcpu_get_hsr(vcpu));
1423 return -EFAULT;
1424 }
1425
1426 idx = srcu_read_lock(&vcpu->kvm->srcu);
1427
1428 gfn = fault_ipa >> PAGE_SHIFT;
1429 memslot = gfn_to_memslot(vcpu->kvm, gfn);
1430 hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
1431 write_fault = kvm_is_write_fault(vcpu);
1432 if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
1433 if (is_iabt) {
1434
1435 kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
1436 ret = 1;
1437 goto out_unlock;
1438 }
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450 if (kvm_vcpu_dabt_is_cm(vcpu)) {
1451 kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
1452 ret = 1;
1453 goto out_unlock;
1454 }
1455
1456
1457
1458
1459
1460
1461
1462 fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
1463 ret = io_mem_abort(vcpu, run, fault_ipa);
1464 goto out_unlock;
1465 }
1466
1467
1468 VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE);
1469
1470 if (fault_status == FSC_ACCESS) {
1471 handle_access_fault(vcpu, fault_ipa);
1472 ret = 1;
1473 goto out_unlock;
1474 }
1475
1476 ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
1477 if (ret == 0)
1478 ret = 1;
1479out_unlock:
1480 srcu_read_unlock(&vcpu->kvm->srcu, idx);
1481 return ret;
1482}
1483
1484static int handle_hva_to_gpa(struct kvm *kvm,
1485 unsigned long start,
1486 unsigned long end,
1487 int (*handler)(struct kvm *kvm,
1488 gpa_t gpa, void *data),
1489 void *data)
1490{
1491 struct kvm_memslots *slots;
1492 struct kvm_memory_slot *memslot;
1493 int ret = 0;
1494
1495 slots = kvm_memslots(kvm);
1496
1497
1498 kvm_for_each_memslot(memslot, slots) {
1499 unsigned long hva_start, hva_end;
1500 gfn_t gfn, gfn_end;
1501
1502 hva_start = max(start, memslot->userspace_addr);
1503 hva_end = min(end, memslot->userspace_addr +
1504 (memslot->npages << PAGE_SHIFT));
1505 if (hva_start >= hva_end)
1506 continue;
1507
1508
1509
1510
1511
1512 gfn = hva_to_gfn_memslot(hva_start, memslot);
1513 gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
1514
1515 for (; gfn < gfn_end; ++gfn) {
1516 gpa_t gpa = gfn << PAGE_SHIFT;
1517 ret |= handler(kvm, gpa, data);
1518 }
1519 }
1520
1521 return ret;
1522}
1523
1524static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
1525{
1526 unmap_stage2_range(kvm, gpa, PAGE_SIZE);
1527 return 0;
1528}
1529
1530int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
1531{
1532 unsigned long end = hva + PAGE_SIZE;
1533
1534 if (!kvm->arch.pgd)
1535 return 0;
1536
1537 trace_kvm_unmap_hva(hva);
1538 handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL);
1539 return 0;
1540}
1541
1542int kvm_unmap_hva_range(struct kvm *kvm,
1543 unsigned long start, unsigned long end)
1544{
1545 if (!kvm->arch.pgd)
1546 return 0;
1547
1548 trace_kvm_unmap_hva_range(start, end);
1549 handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
1550 return 0;
1551}
1552
1553static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
1554{
1555 pte_t *pte = (pte_t *)data;
1556
1557
1558
1559
1560
1561
1562
1563
1564 stage2_set_pte(kvm, NULL, gpa, pte, 0);
1565 return 0;
1566}
1567
1568
1569void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
1570{
1571 unsigned long end = hva + PAGE_SIZE;
1572 pte_t stage2_pte;
1573
1574 if (!kvm->arch.pgd)
1575 return;
1576
1577 trace_kvm_set_spte_hva(hva);
1578 stage2_pte = pfn_pte(pte_pfn(pte), PAGE_S2);
1579 handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
1580}
1581
1582static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
1583{
1584 pmd_t *pmd;
1585 pte_t *pte;
1586
1587 pmd = stage2_get_pmd(kvm, NULL, gpa);
1588 if (!pmd || pmd_none(*pmd))
1589 return 0;
1590
1591 if (kvm_pmd_huge(*pmd)) {
1592 if (pmd_young(*pmd)) {
1593 *pmd = pmd_mkold(*pmd);
1594 return 1;
1595 }
1596
1597 return 0;
1598 }
1599
1600 pte = pte_offset_kernel(pmd, gpa);
1601 if (pte_none(*pte))
1602 return 0;
1603
1604 if (pte_young(*pte)) {
1605 *pte = pte_mkold(*pte);
1606 return 1;
1607 }
1608
1609 return 0;
1610}
1611
1612static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
1613{
1614 pmd_t *pmd;
1615 pte_t *pte;
1616
1617 pmd = stage2_get_pmd(kvm, NULL, gpa);
1618 if (!pmd || pmd_none(*pmd))
1619 return 0;
1620
1621 if (kvm_pmd_huge(*pmd))
1622 return pmd_young(*pmd);
1623
1624 pte = pte_offset_kernel(pmd, gpa);
1625 if (!pte_none(*pte))
1626 return pte_young(*pte);
1627
1628 return 0;
1629}
1630
1631int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
1632{
1633 trace_kvm_age_hva(start, end);
1634 return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
1635}
1636
1637int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
1638{
1639 trace_kvm_test_age_hva(hva);
1640 return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL);
1641}
1642
1643void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
1644{
1645 mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
1646}
1647
1648phys_addr_t kvm_mmu_get_httbr(void)
1649{
1650 if (__kvm_cpu_uses_extended_idmap())
1651 return virt_to_phys(merged_hyp_pgd);
1652 else
1653 return virt_to_phys(hyp_pgd);
1654}
1655
1656phys_addr_t kvm_mmu_get_boot_httbr(void)
1657{
1658 if (__kvm_cpu_uses_extended_idmap())
1659 return virt_to_phys(merged_hyp_pgd);
1660 else
1661 return virt_to_phys(boot_hyp_pgd);
1662}
1663
1664phys_addr_t kvm_get_idmap_vector(void)
1665{
1666 return hyp_idmap_vector;
1667}
1668
1669int kvm_mmu_init(void)
1670{
1671 int err;
1672
1673 hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start);
1674 hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end);
1675 hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init);
1676
1677
1678
1679
1680
1681 BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
1682
1683 hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
1684 boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
1685
1686 if (!hyp_pgd || !boot_hyp_pgd) {
1687 kvm_err("Hyp mode PGD not allocated\n");
1688 err = -ENOMEM;
1689 goto out;
1690 }
1691
1692
1693 err = __create_hyp_mappings(boot_hyp_pgd,
1694 hyp_idmap_start, hyp_idmap_end,
1695 __phys_to_pfn(hyp_idmap_start),
1696 PAGE_HYP);
1697
1698 if (err) {
1699 kvm_err("Failed to idmap %lx-%lx\n",
1700 hyp_idmap_start, hyp_idmap_end);
1701 goto out;
1702 }
1703
1704 if (__kvm_cpu_uses_extended_idmap()) {
1705 merged_hyp_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
1706 if (!merged_hyp_pgd) {
1707 kvm_err("Failed to allocate extra HYP pgd\n");
1708 goto out;
1709 }
1710 __kvm_extend_hypmap(boot_hyp_pgd, hyp_pgd, merged_hyp_pgd,
1711 hyp_idmap_start);
1712 return 0;
1713 }
1714
1715
1716 err = __create_hyp_mappings(boot_hyp_pgd,
1717 TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE,
1718 __phys_to_pfn(hyp_idmap_start),
1719 PAGE_HYP);
1720 if (err) {
1721 kvm_err("Failed to map trampoline @%lx into boot HYP pgd\n",
1722 TRAMPOLINE_VA);
1723 goto out;
1724 }
1725
1726
1727 err = __create_hyp_mappings(hyp_pgd,
1728 TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE,
1729 __phys_to_pfn(hyp_idmap_start),
1730 PAGE_HYP);
1731 if (err) {
1732 kvm_err("Failed to map trampoline @%lx into runtime HYP pgd\n",
1733 TRAMPOLINE_VA);
1734 goto out;
1735 }
1736
1737 return 0;
1738out:
1739 free_hyp_pgds();
1740 return err;
1741}
1742
1743void kvm_arch_commit_memory_region(struct kvm *kvm,
1744 const struct kvm_userspace_memory_region *mem,
1745 const struct kvm_memory_slot *old,
1746 const struct kvm_memory_slot *new,
1747 enum kvm_mr_change change)
1748{
1749
1750
1751
1752
1753
1754 if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES)
1755 kvm_mmu_wp_memory_region(kvm, mem->slot);
1756}
1757
1758int kvm_arch_prepare_memory_region(struct kvm *kvm,
1759 struct kvm_memory_slot *memslot,
1760 const struct kvm_userspace_memory_region *mem,
1761 enum kvm_mr_change change)
1762{
1763 hva_t hva = mem->userspace_addr;
1764 hva_t reg_end = hva + mem->memory_size;
1765 bool writable = !(mem->flags & KVM_MEM_READONLY);
1766 int ret = 0;
1767
1768 if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
1769 change != KVM_MR_FLAGS_ONLY)
1770 return 0;
1771
1772
1773
1774
1775
1776 if (memslot->base_gfn + memslot->npages >=
1777 (KVM_PHYS_SIZE >> PAGE_SHIFT))
1778 return -EFAULT;
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792 do {
1793 struct vm_area_struct *vma = find_vma(current->mm, hva);
1794 hva_t vm_start, vm_end;
1795
1796 if (!vma || vma->vm_start >= reg_end)
1797 break;
1798
1799
1800
1801
1802
1803 if (writable && !(vma->vm_flags & VM_WRITE)) {
1804 ret = -EPERM;
1805 break;
1806 }
1807
1808
1809
1810
1811 vm_start = max(hva, vma->vm_start);
1812 vm_end = min(reg_end, vma->vm_end);
1813
1814 if (vma->vm_flags & VM_PFNMAP) {
1815 gpa_t gpa = mem->guest_phys_addr +
1816 (vm_start - mem->userspace_addr);
1817 phys_addr_t pa;
1818
1819 pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
1820 pa += vm_start - vma->vm_start;
1821
1822
1823 if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES)
1824 return -EINVAL;
1825
1826 ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
1827 vm_end - vm_start,
1828 writable);
1829 if (ret)
1830 break;
1831 }
1832 hva = vm_end;
1833 } while (hva < reg_end);
1834
1835 if (change == KVM_MR_FLAGS_ONLY)
1836 return ret;
1837
1838 spin_lock(&kvm->mmu_lock);
1839 if (ret)
1840 unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
1841 else
1842 stage2_flush_memslot(kvm, memslot);
1843 spin_unlock(&kvm->mmu_lock);
1844 return ret;
1845}
1846
1847void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
1848 struct kvm_memory_slot *dont)
1849{
1850}
1851
1852int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
1853 unsigned long npages)
1854{
1855
1856
1857
1858
1859
1860
1861
1862 if (slot->flags & KVM_MEM_READONLY)
1863 slot->flags |= KVM_MEMSLOT_INCOHERENT;
1864 return 0;
1865}
1866
1867void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots)
1868{
1869}
1870
1871void kvm_arch_flush_shadow_all(struct kvm *kvm)
1872{
1873}
1874
1875void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
1876 struct kvm_memory_slot *slot)
1877{
1878 gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
1879 phys_addr_t size = slot->npages << PAGE_SHIFT;
1880
1881 spin_lock(&kvm->mmu_lock);
1882 unmap_stage2_range(kvm, gpa, size);
1883 spin_unlock(&kvm->mmu_lock);
1884}
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914void kvm_set_way_flush(struct kvm_vcpu *vcpu)
1915{
1916 unsigned long hcr = vcpu_get_hcr(vcpu);
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927 if (!(hcr & HCR_TVM)) {
1928 trace_kvm_set_way_flush(*vcpu_pc(vcpu),
1929 vcpu_has_cache_enabled(vcpu));
1930 stage2_flush_vm(vcpu->kvm);
1931 vcpu_set_hcr(vcpu, hcr | HCR_TVM);
1932 }
1933}
1934
1935void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled)
1936{
1937 bool now_enabled = vcpu_has_cache_enabled(vcpu);
1938
1939
1940
1941
1942
1943
1944 if (now_enabled != was_enabled)
1945 stage2_flush_vm(vcpu->kvm);
1946
1947
1948 if (now_enabled)
1949 vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM);
1950
1951 trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled);
1952}
1953