1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/mman.h>
20#include <linux/kvm_host.h>
21#include <linux/io.h>
22#include <linux/hugetlb.h>
23#include <linux/sched/signal.h>
24#include <trace/events/kvm.h>
25#include <asm/pgalloc.h>
26#include <asm/cacheflush.h>
27#include <asm/kvm_arm.h>
28#include <asm/kvm_mmu.h>
29#include <asm/kvm_mmio.h>
30#include <asm/kvm_asm.h>
31#include <asm/kvm_emulate.h>
32#include <asm/virt.h>
33#include <asm/system_misc.h>
34
35#include "trace.h"
36
37static pgd_t *boot_hyp_pgd;
38static pgd_t *hyp_pgd;
39static pgd_t *merged_hyp_pgd;
40static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
41
42static unsigned long hyp_idmap_start;
43static unsigned long hyp_idmap_end;
44static phys_addr_t hyp_idmap_vector;
45
46static unsigned long io_map_base;
47
48#define S2_PGD_SIZE (PTRS_PER_S2_PGD * sizeof(pgd_t))
49#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
50
51#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0)
52#define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1)
53
54static bool memslot_is_logging(struct kvm_memory_slot *memslot)
55{
56 return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
57}
58
59
60
61
62
63
64
65void kvm_flush_remote_tlbs(struct kvm *kvm)
66{
67 kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
68}
69
70static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
71{
72 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
73}
74
75
76
77
78
79
80static void kvm_flush_dcache_pte(pte_t pte)
81{
82 __kvm_flush_dcache_pte(pte);
83}
84
85static void kvm_flush_dcache_pmd(pmd_t pmd)
86{
87 __kvm_flush_dcache_pmd(pmd);
88}
89
90static void kvm_flush_dcache_pud(pud_t pud)
91{
92 __kvm_flush_dcache_pud(pud);
93}
94
95static bool kvm_is_device_pfn(unsigned long pfn)
96{
97 return !pfn_valid(pfn);
98}
99
100
101
102
103
104
105
106
107
108
109static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
110{
111 if (!pmd_thp_or_huge(*pmd))
112 return;
113
114 pmd_clear(pmd);
115 kvm_tlb_flush_vmid_ipa(kvm, addr);
116 put_page(virt_to_page(pmd));
117}
118
119static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
120 int min, int max)
121{
122 void *page;
123
124 BUG_ON(max > KVM_NR_MEM_OBJS);
125 if (cache->nobjs >= min)
126 return 0;
127 while (cache->nobjs < max) {
128 page = (void *)__get_free_page(PGALLOC_GFP);
129 if (!page)
130 return -ENOMEM;
131 cache->objects[cache->nobjs++] = page;
132 }
133 return 0;
134}
135
136static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
137{
138 while (mc->nobjs)
139 free_page((unsigned long)mc->objects[--mc->nobjs]);
140}
141
142static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
143{
144 void *p;
145
146 BUG_ON(!mc || !mc->nobjs);
147 p = mc->objects[--mc->nobjs];
148 return p;
149}
150
151static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
152{
153 pud_t *pud_table __maybe_unused = stage2_pud_offset(pgd, 0UL);
154 stage2_pgd_clear(pgd);
155 kvm_tlb_flush_vmid_ipa(kvm, addr);
156 stage2_pud_free(pud_table);
157 put_page(virt_to_page(pgd));
158}
159
160static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
161{
162 pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(pud, 0);
163 VM_BUG_ON(stage2_pud_huge(*pud));
164 stage2_pud_clear(pud);
165 kvm_tlb_flush_vmid_ipa(kvm, addr);
166 stage2_pmd_free(pmd_table);
167 put_page(virt_to_page(pud));
168}
169
170static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
171{
172 pte_t *pte_table = pte_offset_kernel(pmd, 0);
173 VM_BUG_ON(pmd_thp_or_huge(*pmd));
174 pmd_clear(pmd);
175 kvm_tlb_flush_vmid_ipa(kvm, addr);
176 pte_free_kernel(NULL, pte_table);
177 put_page(virt_to_page(pmd));
178}
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
201 phys_addr_t addr, phys_addr_t end)
202{
203 phys_addr_t start_addr = addr;
204 pte_t *pte, *start_pte;
205
206 start_pte = pte = pte_offset_kernel(pmd, addr);
207 do {
208 if (!pte_none(*pte)) {
209 pte_t old_pte = *pte;
210
211 kvm_set_pte(pte, __pte(0));
212 kvm_tlb_flush_vmid_ipa(kvm, addr);
213
214
215 if (!kvm_is_device_pfn(pte_pfn(old_pte)))
216 kvm_flush_dcache_pte(old_pte);
217
218 put_page(virt_to_page(pte));
219 }
220 } while (pte++, addr += PAGE_SIZE, addr != end);
221
222 if (stage2_pte_table_empty(start_pte))
223 clear_stage2_pmd_entry(kvm, pmd, start_addr);
224}
225
226static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
227 phys_addr_t addr, phys_addr_t end)
228{
229 phys_addr_t next, start_addr = addr;
230 pmd_t *pmd, *start_pmd;
231
232 start_pmd = pmd = stage2_pmd_offset(pud, addr);
233 do {
234 next = stage2_pmd_addr_end(addr, end);
235 if (!pmd_none(*pmd)) {
236 if (pmd_thp_or_huge(*pmd)) {
237 pmd_t old_pmd = *pmd;
238
239 pmd_clear(pmd);
240 kvm_tlb_flush_vmid_ipa(kvm, addr);
241
242 kvm_flush_dcache_pmd(old_pmd);
243
244 put_page(virt_to_page(pmd));
245 } else {
246 unmap_stage2_ptes(kvm, pmd, addr, next);
247 }
248 }
249 } while (pmd++, addr = next, addr != end);
250
251 if (stage2_pmd_table_empty(start_pmd))
252 clear_stage2_pud_entry(kvm, pud, start_addr);
253}
254
255static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd,
256 phys_addr_t addr, phys_addr_t end)
257{
258 phys_addr_t next, start_addr = addr;
259 pud_t *pud, *start_pud;
260
261 start_pud = pud = stage2_pud_offset(pgd, addr);
262 do {
263 next = stage2_pud_addr_end(addr, end);
264 if (!stage2_pud_none(*pud)) {
265 if (stage2_pud_huge(*pud)) {
266 pud_t old_pud = *pud;
267
268 stage2_pud_clear(pud);
269 kvm_tlb_flush_vmid_ipa(kvm, addr);
270 kvm_flush_dcache_pud(old_pud);
271 put_page(virt_to_page(pud));
272 } else {
273 unmap_stage2_pmds(kvm, pud, addr, next);
274 }
275 }
276 } while (pud++, addr = next, addr != end);
277
278 if (stage2_pud_table_empty(start_pud))
279 clear_stage2_pgd_entry(kvm, pgd, start_addr);
280}
281
282
283
284
285
286
287
288
289
290
291
292
293static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
294{
295 pgd_t *pgd;
296 phys_addr_t addr = start, end = start + size;
297 phys_addr_t next;
298
299 assert_spin_locked(&kvm->mmu_lock);
300 pgd = kvm->arch.pgd + stage2_pgd_index(addr);
301 do {
302
303
304
305
306
307 if (!READ_ONCE(kvm->arch.pgd))
308 break;
309 next = stage2_pgd_addr_end(addr, end);
310 if (!stage2_pgd_none(*pgd))
311 unmap_stage2_puds(kvm, pgd, addr, next);
312
313
314
315
316 if (next != end)
317 cond_resched_lock(&kvm->mmu_lock);
318 } while (pgd++, addr = next, addr != end);
319}
320
321static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
322 phys_addr_t addr, phys_addr_t end)
323{
324 pte_t *pte;
325
326 pte = pte_offset_kernel(pmd, addr);
327 do {
328 if (!pte_none(*pte) && !kvm_is_device_pfn(pte_pfn(*pte)))
329 kvm_flush_dcache_pte(*pte);
330 } while (pte++, addr += PAGE_SIZE, addr != end);
331}
332
333static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
334 phys_addr_t addr, phys_addr_t end)
335{
336 pmd_t *pmd;
337 phys_addr_t next;
338
339 pmd = stage2_pmd_offset(pud, addr);
340 do {
341 next = stage2_pmd_addr_end(addr, end);
342 if (!pmd_none(*pmd)) {
343 if (pmd_thp_or_huge(*pmd))
344 kvm_flush_dcache_pmd(*pmd);
345 else
346 stage2_flush_ptes(kvm, pmd, addr, next);
347 }
348 } while (pmd++, addr = next, addr != end);
349}
350
351static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
352 phys_addr_t addr, phys_addr_t end)
353{
354 pud_t *pud;
355 phys_addr_t next;
356
357 pud = stage2_pud_offset(pgd, addr);
358 do {
359 next = stage2_pud_addr_end(addr, end);
360 if (!stage2_pud_none(*pud)) {
361 if (stage2_pud_huge(*pud))
362 kvm_flush_dcache_pud(*pud);
363 else
364 stage2_flush_pmds(kvm, pud, addr, next);
365 }
366 } while (pud++, addr = next, addr != end);
367}
368
369static void stage2_flush_memslot(struct kvm *kvm,
370 struct kvm_memory_slot *memslot)
371{
372 phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
373 phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
374 phys_addr_t next;
375 pgd_t *pgd;
376
377 pgd = kvm->arch.pgd + stage2_pgd_index(addr);
378 do {
379 next = stage2_pgd_addr_end(addr, end);
380 stage2_flush_puds(kvm, pgd, addr, next);
381 } while (pgd++, addr = next, addr != end);
382}
383
384
385
386
387
388
389
390
391static void stage2_flush_vm(struct kvm *kvm)
392{
393 struct kvm_memslots *slots;
394 struct kvm_memory_slot *memslot;
395 int idx;
396
397 idx = srcu_read_lock(&kvm->srcu);
398 spin_lock(&kvm->mmu_lock);
399
400 slots = kvm_memslots(kvm);
401 kvm_for_each_memslot(memslot, slots)
402 stage2_flush_memslot(kvm, memslot);
403
404 spin_unlock(&kvm->mmu_lock);
405 srcu_read_unlock(&kvm->srcu, idx);
406}
407
408static void clear_hyp_pgd_entry(pgd_t *pgd)
409{
410 pud_t *pud_table __maybe_unused = pud_offset(pgd, 0UL);
411 pgd_clear(pgd);
412 pud_free(NULL, pud_table);
413 put_page(virt_to_page(pgd));
414}
415
416static void clear_hyp_pud_entry(pud_t *pud)
417{
418 pmd_t *pmd_table __maybe_unused = pmd_offset(pud, 0);
419 VM_BUG_ON(pud_huge(*pud));
420 pud_clear(pud);
421 pmd_free(NULL, pmd_table);
422 put_page(virt_to_page(pud));
423}
424
425static void clear_hyp_pmd_entry(pmd_t *pmd)
426{
427 pte_t *pte_table = pte_offset_kernel(pmd, 0);
428 VM_BUG_ON(pmd_thp_or_huge(*pmd));
429 pmd_clear(pmd);
430 pte_free_kernel(NULL, pte_table);
431 put_page(virt_to_page(pmd));
432}
433
434static void unmap_hyp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
435{
436 pte_t *pte, *start_pte;
437
438 start_pte = pte = pte_offset_kernel(pmd, addr);
439 do {
440 if (!pte_none(*pte)) {
441 kvm_set_pte(pte, __pte(0));
442 put_page(virt_to_page(pte));
443 }
444 } while (pte++, addr += PAGE_SIZE, addr != end);
445
446 if (hyp_pte_table_empty(start_pte))
447 clear_hyp_pmd_entry(pmd);
448}
449
450static void unmap_hyp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
451{
452 phys_addr_t next;
453 pmd_t *pmd, *start_pmd;
454
455 start_pmd = pmd = pmd_offset(pud, addr);
456 do {
457 next = pmd_addr_end(addr, end);
458
459 if (!pmd_none(*pmd))
460 unmap_hyp_ptes(pmd, addr, next);
461 } while (pmd++, addr = next, addr != end);
462
463 if (hyp_pmd_table_empty(start_pmd))
464 clear_hyp_pud_entry(pud);
465}
466
467static void unmap_hyp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
468{
469 phys_addr_t next;
470 pud_t *pud, *start_pud;
471
472 start_pud = pud = pud_offset(pgd, addr);
473 do {
474 next = pud_addr_end(addr, end);
475
476 if (!pud_none(*pud))
477 unmap_hyp_pmds(pud, addr, next);
478 } while (pud++, addr = next, addr != end);
479
480 if (hyp_pud_table_empty(start_pud))
481 clear_hyp_pgd_entry(pgd);
482}
483
484static unsigned int kvm_pgd_index(unsigned long addr, unsigned int ptrs_per_pgd)
485{
486 return (addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1);
487}
488
489static void __unmap_hyp_range(pgd_t *pgdp, unsigned long ptrs_per_pgd,
490 phys_addr_t start, u64 size)
491{
492 pgd_t *pgd;
493 phys_addr_t addr = start, end = start + size;
494 phys_addr_t next;
495
496
497
498
499
500 pgd = pgdp + kvm_pgd_index(addr, ptrs_per_pgd);
501 do {
502 next = pgd_addr_end(addr, end);
503 if (!pgd_none(*pgd))
504 unmap_hyp_puds(pgd, addr, next);
505 } while (pgd++, addr = next, addr != end);
506}
507
508static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
509{
510 __unmap_hyp_range(pgdp, PTRS_PER_PGD, start, size);
511}
512
513static void unmap_hyp_idmap_range(pgd_t *pgdp, phys_addr_t start, u64 size)
514{
515 __unmap_hyp_range(pgdp, __kvm_idmap_ptrs_per_pgd(), start, size);
516}
517
518
519
520
521
522
523
524
525
526
527
528void free_hyp_pgds(void)
529{
530 pgd_t *id_pgd;
531
532 mutex_lock(&kvm_hyp_pgd_mutex);
533
534 id_pgd = boot_hyp_pgd ? boot_hyp_pgd : hyp_pgd;
535
536 if (id_pgd) {
537
538 if (!io_map_base)
539 io_map_base = hyp_idmap_start;
540 unmap_hyp_idmap_range(id_pgd, io_map_base,
541 hyp_idmap_start + PAGE_SIZE - io_map_base);
542 }
543
544 if (boot_hyp_pgd) {
545 free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
546 boot_hyp_pgd = NULL;
547 }
548
549 if (hyp_pgd) {
550 unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET),
551 (uintptr_t)high_memory - PAGE_OFFSET);
552
553 free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
554 hyp_pgd = NULL;
555 }
556 if (merged_hyp_pgd) {
557 clear_page(merged_hyp_pgd);
558 free_page((unsigned long)merged_hyp_pgd);
559 merged_hyp_pgd = NULL;
560 }
561
562 mutex_unlock(&kvm_hyp_pgd_mutex);
563}
564
565static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
566 unsigned long end, unsigned long pfn,
567 pgprot_t prot)
568{
569 pte_t *pte;
570 unsigned long addr;
571
572 addr = start;
573 do {
574 pte = pte_offset_kernel(pmd, addr);
575 kvm_set_pte(pte, pfn_pte(pfn, prot));
576 get_page(virt_to_page(pte));
577 kvm_flush_dcache_to_poc(pte, sizeof(*pte));
578 pfn++;
579 } while (addr += PAGE_SIZE, addr != end);
580}
581
582static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
583 unsigned long end, unsigned long pfn,
584 pgprot_t prot)
585{
586 pmd_t *pmd;
587 pte_t *pte;
588 unsigned long addr, next;
589
590 addr = start;
591 do {
592 pmd = pmd_offset(pud, addr);
593
594 BUG_ON(pmd_sect(*pmd));
595
596 if (pmd_none(*pmd)) {
597 pte = pte_alloc_one_kernel(NULL, addr);
598 if (!pte) {
599 kvm_err("Cannot allocate Hyp pte\n");
600 return -ENOMEM;
601 }
602 pmd_populate_kernel(NULL, pmd, pte);
603 get_page(virt_to_page(pmd));
604 kvm_flush_dcache_to_poc(pmd, sizeof(*pmd));
605 }
606
607 next = pmd_addr_end(addr, end);
608
609 create_hyp_pte_mappings(pmd, addr, next, pfn, prot);
610 pfn += (next - addr) >> PAGE_SHIFT;
611 } while (addr = next, addr != end);
612
613 return 0;
614}
615
616static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start,
617 unsigned long end, unsigned long pfn,
618 pgprot_t prot)
619{
620 pud_t *pud;
621 pmd_t *pmd;
622 unsigned long addr, next;
623 int ret;
624
625 addr = start;
626 do {
627 pud = pud_offset(pgd, addr);
628
629 if (pud_none_or_clear_bad(pud)) {
630 pmd = pmd_alloc_one(NULL, addr);
631 if (!pmd) {
632 kvm_err("Cannot allocate Hyp pmd\n");
633 return -ENOMEM;
634 }
635 pud_populate(NULL, pud, pmd);
636 get_page(virt_to_page(pud));
637 kvm_flush_dcache_to_poc(pud, sizeof(*pud));
638 }
639
640 next = pud_addr_end(addr, end);
641 ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
642 if (ret)
643 return ret;
644 pfn += (next - addr) >> PAGE_SHIFT;
645 } while (addr = next, addr != end);
646
647 return 0;
648}
649
650static int __create_hyp_mappings(pgd_t *pgdp, unsigned long ptrs_per_pgd,
651 unsigned long start, unsigned long end,
652 unsigned long pfn, pgprot_t prot)
653{
654 pgd_t *pgd;
655 pud_t *pud;
656 unsigned long addr, next;
657 int err = 0;
658
659 mutex_lock(&kvm_hyp_pgd_mutex);
660 addr = start & PAGE_MASK;
661 end = PAGE_ALIGN(end);
662 do {
663 pgd = pgdp + kvm_pgd_index(addr, ptrs_per_pgd);
664
665 if (pgd_none(*pgd)) {
666 pud = pud_alloc_one(NULL, addr);
667 if (!pud) {
668 kvm_err("Cannot allocate Hyp pud\n");
669 err = -ENOMEM;
670 goto out;
671 }
672 pgd_populate(NULL, pgd, pud);
673 get_page(virt_to_page(pgd));
674 kvm_flush_dcache_to_poc(pgd, sizeof(*pgd));
675 }
676
677 next = pgd_addr_end(addr, end);
678 err = create_hyp_pud_mappings(pgd, addr, next, pfn, prot);
679 if (err)
680 goto out;
681 pfn += (next - addr) >> PAGE_SHIFT;
682 } while (addr = next, addr != end);
683out:
684 mutex_unlock(&kvm_hyp_pgd_mutex);
685 return err;
686}
687
688static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
689{
690 if (!is_vmalloc_addr(kaddr)) {
691 BUG_ON(!virt_addr_valid(kaddr));
692 return __pa(kaddr);
693 } else {
694 return page_to_phys(vmalloc_to_page(kaddr)) +
695 offset_in_page(kaddr);
696 }
697}
698
699
700
701
702
703
704
705
706
707
708
709int create_hyp_mappings(void *from, void *to, pgprot_t prot)
710{
711 phys_addr_t phys_addr;
712 unsigned long virt_addr;
713 unsigned long start = kern_hyp_va((unsigned long)from);
714 unsigned long end = kern_hyp_va((unsigned long)to);
715
716 if (is_kernel_in_hyp_mode())
717 return 0;
718
719 start = start & PAGE_MASK;
720 end = PAGE_ALIGN(end);
721
722 for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) {
723 int err;
724
725 phys_addr = kvm_kaddr_to_phys(from + virt_addr - start);
726 err = __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD,
727 virt_addr, virt_addr + PAGE_SIZE,
728 __phys_to_pfn(phys_addr),
729 prot);
730 if (err)
731 return err;
732 }
733
734 return 0;
735}
736
737static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
738 unsigned long *haddr, pgprot_t prot)
739{
740 pgd_t *pgd = hyp_pgd;
741 unsigned long base;
742 int ret = 0;
743
744 mutex_lock(&kvm_hyp_pgd_mutex);
745
746
747
748
749
750
751
752
753
754 size = PAGE_ALIGN(size + offset_in_page(phys_addr));
755 base = io_map_base - size;
756
757
758
759
760
761
762 if ((base ^ io_map_base) & BIT(VA_BITS - 1))
763 ret = -ENOMEM;
764 else
765 io_map_base = base;
766
767 mutex_unlock(&kvm_hyp_pgd_mutex);
768
769 if (ret)
770 goto out;
771
772 if (__kvm_cpu_uses_extended_idmap())
773 pgd = boot_hyp_pgd;
774
775 ret = __create_hyp_mappings(pgd, __kvm_idmap_ptrs_per_pgd(),
776 base, base + size,
777 __phys_to_pfn(phys_addr), prot);
778 if (ret)
779 goto out;
780
781 *haddr = base + offset_in_page(phys_addr);
782
783out:
784 return ret;
785}
786
787
788
789
790
791
792
793
794int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
795 void __iomem **kaddr,
796 void __iomem **haddr)
797{
798 unsigned long addr;
799 int ret;
800
801 *kaddr = ioremap(phys_addr, size);
802 if (!*kaddr)
803 return -ENOMEM;
804
805 if (is_kernel_in_hyp_mode()) {
806 *haddr = *kaddr;
807 return 0;
808 }
809
810 ret = __create_hyp_private_mapping(phys_addr, size,
811 &addr, PAGE_HYP_DEVICE);
812 if (ret) {
813 iounmap(*kaddr);
814 *kaddr = NULL;
815 *haddr = NULL;
816 return ret;
817 }
818
819 *haddr = (void __iomem *)addr;
820 return 0;
821}
822
823
824
825
826
827
828
829int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
830 void **haddr)
831{
832 unsigned long addr;
833 int ret;
834
835 BUG_ON(is_kernel_in_hyp_mode());
836
837 ret = __create_hyp_private_mapping(phys_addr, size,
838 &addr, PAGE_HYP_EXEC);
839 if (ret) {
840 *haddr = NULL;
841 return ret;
842 }
843
844 *haddr = (void *)addr;
845 return 0;
846}
847
848
849
850
851
852
853
854
855
856
857
858
859int kvm_alloc_stage2_pgd(struct kvm *kvm)
860{
861 pgd_t *pgd;
862
863 if (kvm->arch.pgd != NULL) {
864 kvm_err("kvm_arch already initialized?\n");
865 return -EINVAL;
866 }
867
868
869 pgd = alloc_pages_exact(S2_PGD_SIZE, GFP_KERNEL | __GFP_ZERO);
870 if (!pgd)
871 return -ENOMEM;
872
873 kvm->arch.pgd = pgd;
874 return 0;
875}
876
877static void stage2_unmap_memslot(struct kvm *kvm,
878 struct kvm_memory_slot *memslot)
879{
880 hva_t hva = memslot->userspace_addr;
881 phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
882 phys_addr_t size = PAGE_SIZE * memslot->npages;
883 hva_t reg_end = hva + size;
884
885
886
887
888
889
890
891
892
893
894
895
896
897 do {
898 struct vm_area_struct *vma = find_vma(current->mm, hva);
899 hva_t vm_start, vm_end;
900
901 if (!vma || vma->vm_start >= reg_end)
902 break;
903
904
905
906
907 vm_start = max(hva, vma->vm_start);
908 vm_end = min(reg_end, vma->vm_end);
909
910 if (!(vma->vm_flags & VM_PFNMAP)) {
911 gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
912 unmap_stage2_range(kvm, gpa, vm_end - vm_start);
913 }
914 hva = vm_end;
915 } while (hva < reg_end);
916}
917
918
919
920
921
922
923
924
925void stage2_unmap_vm(struct kvm *kvm)
926{
927 struct kvm_memslots *slots;
928 struct kvm_memory_slot *memslot;
929 int idx;
930
931 idx = srcu_read_lock(&kvm->srcu);
932 down_read(¤t->mm->mmap_sem);
933 spin_lock(&kvm->mmu_lock);
934
935 slots = kvm_memslots(kvm);
936 kvm_for_each_memslot(memslot, slots)
937 stage2_unmap_memslot(kvm, memslot);
938
939 spin_unlock(&kvm->mmu_lock);
940 up_read(¤t->mm->mmap_sem);
941 srcu_read_unlock(&kvm->srcu, idx);
942}
943
944
945
946
947
948
949
950
951
952void kvm_free_stage2_pgd(struct kvm *kvm)
953{
954 void *pgd = NULL;
955
956 spin_lock(&kvm->mmu_lock);
957 if (kvm->arch.pgd) {
958 unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
959 pgd = READ_ONCE(kvm->arch.pgd);
960 kvm->arch.pgd = NULL;
961 }
962 spin_unlock(&kvm->mmu_lock);
963
964
965 if (pgd)
966 free_pages_exact(pgd, S2_PGD_SIZE);
967}
968
969static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
970 phys_addr_t addr)
971{
972 pgd_t *pgd;
973 pud_t *pud;
974
975 pgd = kvm->arch.pgd + stage2_pgd_index(addr);
976 if (WARN_ON(stage2_pgd_none(*pgd))) {
977 if (!cache)
978 return NULL;
979 pud = mmu_memory_cache_alloc(cache);
980 stage2_pgd_populate(pgd, pud);
981 get_page(virt_to_page(pgd));
982 }
983
984 return stage2_pud_offset(pgd, addr);
985}
986
987static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
988 phys_addr_t addr)
989{
990 pud_t *pud;
991 pmd_t *pmd;
992
993 pud = stage2_get_pud(kvm, cache, addr);
994 if (!pud)
995 return NULL;
996
997 if (stage2_pud_none(*pud)) {
998 if (!cache)
999 return NULL;
1000 pmd = mmu_memory_cache_alloc(cache);
1001 stage2_pud_populate(pud, pmd);
1002 get_page(virt_to_page(pud));
1003 }
1004
1005 return stage2_pmd_offset(pud, addr);
1006}
1007
1008static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
1009 *cache, phys_addr_t addr, const pmd_t *new_pmd)
1010{
1011 pmd_t *pmd, old_pmd;
1012
1013 pmd = stage2_get_pmd(kvm, cache, addr);
1014 VM_BUG_ON(!pmd);
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025 VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
1026
1027 old_pmd = *pmd;
1028 if (pmd_present(old_pmd)) {
1029 pmd_clear(pmd);
1030 kvm_tlb_flush_vmid_ipa(kvm, addr);
1031 } else {
1032 get_page(virt_to_page(pmd));
1033 }
1034
1035 kvm_set_pmd(pmd, *new_pmd);
1036 return 0;
1037}
1038
1039static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr)
1040{
1041 pmd_t *pmdp;
1042 pte_t *ptep;
1043
1044 pmdp = stage2_get_pmd(kvm, NULL, addr);
1045 if (!pmdp || pmd_none(*pmdp) || !pmd_present(*pmdp))
1046 return false;
1047
1048 if (pmd_thp_or_huge(*pmdp))
1049 return kvm_s2pmd_exec(pmdp);
1050
1051 ptep = pte_offset_kernel(pmdp, addr);
1052 if (!ptep || pte_none(*ptep) || !pte_present(*ptep))
1053 return false;
1054
1055 return kvm_s2pte_exec(ptep);
1056}
1057
1058static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
1059 phys_addr_t addr, const pte_t *new_pte,
1060 unsigned long flags)
1061{
1062 pmd_t *pmd;
1063 pte_t *pte, old_pte;
1064 bool iomap = flags & KVM_S2PTE_FLAG_IS_IOMAP;
1065 bool logging_active = flags & KVM_S2_FLAG_LOGGING_ACTIVE;
1066
1067 VM_BUG_ON(logging_active && !cache);
1068
1069
1070 pmd = stage2_get_pmd(kvm, cache, addr);
1071 if (!pmd) {
1072
1073
1074
1075
1076 return 0;
1077 }
1078
1079
1080
1081
1082
1083 if (logging_active)
1084 stage2_dissolve_pmd(kvm, addr, pmd);
1085
1086
1087 if (pmd_none(*pmd)) {
1088 if (!cache)
1089 return 0;
1090 pte = mmu_memory_cache_alloc(cache);
1091 pmd_populate_kernel(NULL, pmd, pte);
1092 get_page(virt_to_page(pmd));
1093 }
1094
1095 pte = pte_offset_kernel(pmd, addr);
1096
1097 if (iomap && pte_present(*pte))
1098 return -EFAULT;
1099
1100
1101 old_pte = *pte;
1102 if (pte_present(old_pte)) {
1103 kvm_set_pte(pte, __pte(0));
1104 kvm_tlb_flush_vmid_ipa(kvm, addr);
1105 } else {
1106 get_page(virt_to_page(pte));
1107 }
1108
1109 kvm_set_pte(pte, *new_pte);
1110 return 0;
1111}
1112
1113#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
1114static int stage2_ptep_test_and_clear_young(pte_t *pte)
1115{
1116 if (pte_young(*pte)) {
1117 *pte = pte_mkold(*pte);
1118 return 1;
1119 }
1120 return 0;
1121}
1122#else
1123static int stage2_ptep_test_and_clear_young(pte_t *pte)
1124{
1125 return __ptep_test_and_clear_young(pte);
1126}
1127#endif
1128
1129static int stage2_pmdp_test_and_clear_young(pmd_t *pmd)
1130{
1131 return stage2_ptep_test_and_clear_young((pte_t *)pmd);
1132}
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
1143 phys_addr_t pa, unsigned long size, bool writable)
1144{
1145 phys_addr_t addr, end;
1146 int ret = 0;
1147 unsigned long pfn;
1148 struct kvm_mmu_memory_cache cache = { 0, };
1149
1150 end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK;
1151 pfn = __phys_to_pfn(pa);
1152
1153 for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
1154 pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
1155
1156 if (writable)
1157 pte = kvm_s2pte_mkwrite(pte);
1158
1159 ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES,
1160 KVM_NR_MEM_OBJS);
1161 if (ret)
1162 goto out;
1163 spin_lock(&kvm->mmu_lock);
1164 ret = stage2_set_pte(kvm, &cache, addr, &pte,
1165 KVM_S2PTE_FLAG_IS_IOMAP);
1166 spin_unlock(&kvm->mmu_lock);
1167 if (ret)
1168 goto out;
1169
1170 pfn++;
1171 }
1172
1173out:
1174 mmu_free_memory_cache(&cache);
1175 return ret;
1176}
1177
1178static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap)
1179{
1180 kvm_pfn_t pfn = *pfnp;
1181 gfn_t gfn = *ipap >> PAGE_SHIFT;
1182
1183 if (PageTransCompoundMap(pfn_to_page(pfn))) {
1184 unsigned long mask;
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203 mask = PTRS_PER_PMD - 1;
1204 VM_BUG_ON((gfn & mask) != (pfn & mask));
1205 if (pfn & mask) {
1206 *ipap &= PMD_MASK;
1207 kvm_release_pfn_clean(pfn);
1208 pfn &= ~mask;
1209 kvm_get_pfn(pfn);
1210 *pfnp = pfn;
1211 }
1212
1213 return true;
1214 }
1215
1216 return false;
1217}
1218
1219static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
1220{
1221 if (kvm_vcpu_trap_is_iabt(vcpu))
1222 return false;
1223
1224 return kvm_vcpu_dabt_iswrite(vcpu);
1225}
1226
1227
1228
1229
1230
1231
1232
1233static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
1234{
1235 pte_t *pte;
1236
1237 pte = pte_offset_kernel(pmd, addr);
1238 do {
1239 if (!pte_none(*pte)) {
1240 if (!kvm_s2pte_readonly(pte))
1241 kvm_set_s2pte_readonly(pte);
1242 }
1243 } while (pte++, addr += PAGE_SIZE, addr != end);
1244}
1245
1246
1247
1248
1249
1250
1251
1252static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
1253{
1254 pmd_t *pmd;
1255 phys_addr_t next;
1256
1257 pmd = stage2_pmd_offset(pud, addr);
1258
1259 do {
1260 next = stage2_pmd_addr_end(addr, end);
1261 if (!pmd_none(*pmd)) {
1262 if (pmd_thp_or_huge(*pmd)) {
1263 if (!kvm_s2pmd_readonly(pmd))
1264 kvm_set_s2pmd_readonly(pmd);
1265 } else {
1266 stage2_wp_ptes(pmd, addr, next);
1267 }
1268 }
1269 } while (pmd++, addr = next, addr != end);
1270}
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280static void stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
1281{
1282 pud_t *pud;
1283 phys_addr_t next;
1284
1285 pud = stage2_pud_offset(pgd, addr);
1286 do {
1287 next = stage2_pud_addr_end(addr, end);
1288 if (!stage2_pud_none(*pud)) {
1289
1290 BUG_ON(stage2_pud_huge(*pud));
1291 stage2_wp_pmds(pud, addr, next);
1292 }
1293 } while (pud++, addr = next, addr != end);
1294}
1295
1296
1297
1298
1299
1300
1301
1302static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
1303{
1304 pgd_t *pgd;
1305 phys_addr_t next;
1306
1307 pgd = kvm->arch.pgd + stage2_pgd_index(addr);
1308 do {
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318 cond_resched_lock(&kvm->mmu_lock);
1319 if (!READ_ONCE(kvm->arch.pgd))
1320 break;
1321 next = stage2_pgd_addr_end(addr, end);
1322 if (stage2_pgd_present(*pgd))
1323 stage2_wp_puds(pgd, addr, next);
1324 } while (pgd++, addr = next, addr != end);
1325}
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
1341{
1342 struct kvm_memslots *slots = kvm_memslots(kvm);
1343 struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
1344 phys_addr_t start = memslot->base_gfn << PAGE_SHIFT;
1345 phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
1346
1347 spin_lock(&kvm->mmu_lock);
1348 stage2_wp_range(kvm, start, end);
1349 spin_unlock(&kvm->mmu_lock);
1350 kvm_flush_remote_tlbs(kvm);
1351}
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
1365 struct kvm_memory_slot *slot,
1366 gfn_t gfn_offset, unsigned long mask)
1367{
1368 phys_addr_t base_gfn = slot->base_gfn + gfn_offset;
1369 phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT;
1370 phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
1371
1372 stage2_wp_range(kvm, start, end);
1373}
1374
1375
1376
1377
1378
1379
1380
1381
1382void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
1383 struct kvm_memory_slot *slot,
1384 gfn_t gfn_offset, unsigned long mask)
1385{
1386 kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
1387}
1388
1389static void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
1390{
1391 __clean_dcache_guest_page(pfn, size);
1392}
1393
1394static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size)
1395{
1396 __invalidate_icache_guest_page(pfn, size);
1397}
1398
1399static void kvm_send_hwpoison_signal(unsigned long address,
1400 struct vm_area_struct *vma)
1401{
1402 siginfo_t info;
1403
1404 info.si_signo = SIGBUS;
1405 info.si_errno = 0;
1406 info.si_code = BUS_MCEERR_AR;
1407 info.si_addr = (void __user *)address;
1408
1409 if (is_vm_hugetlb_page(vma))
1410 info.si_addr_lsb = huge_page_shift(hstate_vma(vma));
1411 else
1412 info.si_addr_lsb = PAGE_SHIFT;
1413
1414 send_sig_info(SIGBUS, &info, current);
1415}
1416
1417static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
1418 struct kvm_memory_slot *memslot, unsigned long hva,
1419 unsigned long fault_status)
1420{
1421 int ret;
1422 bool write_fault, exec_fault, writable, hugetlb = false, force_pte = false;
1423 unsigned long mmu_seq;
1424 gfn_t gfn = fault_ipa >> PAGE_SHIFT;
1425 struct kvm *kvm = vcpu->kvm;
1426 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
1427 struct vm_area_struct *vma;
1428 kvm_pfn_t pfn;
1429 pgprot_t mem_type = PAGE_S2;
1430 bool logging_active = memslot_is_logging(memslot);
1431 unsigned long flags = 0;
1432
1433 write_fault = kvm_is_write_fault(vcpu);
1434 exec_fault = kvm_vcpu_trap_is_iabt(vcpu);
1435 VM_BUG_ON(write_fault && exec_fault);
1436
1437 if (fault_status == FSC_PERM && !write_fault && !exec_fault) {
1438 kvm_err("Unexpected L2 read permission error\n");
1439 return -EFAULT;
1440 }
1441
1442
1443 down_read(¤t->mm->mmap_sem);
1444 vma = find_vma_intersection(current->mm, hva, hva + 1);
1445 if (unlikely(!vma)) {
1446 kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
1447 up_read(¤t->mm->mmap_sem);
1448 return -EFAULT;
1449 }
1450
1451 if (vma_kernel_pagesize(vma) == PMD_SIZE && !logging_active) {
1452 hugetlb = true;
1453 gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
1454 } else {
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464 if ((memslot->userspace_addr & ~PMD_MASK) !=
1465 ((memslot->base_gfn << PAGE_SHIFT) & ~PMD_MASK))
1466 force_pte = true;
1467 }
1468 up_read(¤t->mm->mmap_sem);
1469
1470
1471 ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
1472 KVM_NR_MEM_OBJS);
1473 if (ret)
1474 return ret;
1475
1476 mmu_seq = vcpu->kvm->mmu_notifier_seq;
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486 smp_rmb();
1487
1488 pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
1489 if (pfn == KVM_PFN_ERR_HWPOISON) {
1490 kvm_send_hwpoison_signal(hva, vma);
1491 return 0;
1492 }
1493 if (is_error_noslot_pfn(pfn))
1494 return -EFAULT;
1495
1496 if (kvm_is_device_pfn(pfn)) {
1497 mem_type = PAGE_S2_DEVICE;
1498 flags |= KVM_S2PTE_FLAG_IS_IOMAP;
1499 } else if (logging_active) {
1500
1501
1502
1503
1504
1505 force_pte = true;
1506 flags |= KVM_S2_FLAG_LOGGING_ACTIVE;
1507
1508
1509
1510
1511
1512 if (!write_fault)
1513 writable = false;
1514 }
1515
1516 spin_lock(&kvm->mmu_lock);
1517 if (mmu_notifier_retry(kvm, mmu_seq))
1518 goto out_unlock;
1519
1520 if (!hugetlb && !force_pte)
1521 hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
1522
1523 if (hugetlb) {
1524 pmd_t new_pmd = pfn_pmd(pfn, mem_type);
1525 new_pmd = pmd_mkhuge(new_pmd);
1526 if (writable) {
1527 new_pmd = kvm_s2pmd_mkwrite(new_pmd);
1528 kvm_set_pfn_dirty(pfn);
1529 }
1530
1531 if (fault_status != FSC_PERM)
1532 clean_dcache_guest_page(pfn, PMD_SIZE);
1533
1534 if (exec_fault) {
1535 new_pmd = kvm_s2pmd_mkexec(new_pmd);
1536 invalidate_icache_guest_page(pfn, PMD_SIZE);
1537 } else if (fault_status == FSC_PERM) {
1538
1539 if (stage2_is_exec(kvm, fault_ipa))
1540 new_pmd = kvm_s2pmd_mkexec(new_pmd);
1541 }
1542
1543 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
1544 } else {
1545 pte_t new_pte = pfn_pte(pfn, mem_type);
1546
1547 if (writable) {
1548 new_pte = kvm_s2pte_mkwrite(new_pte);
1549 kvm_set_pfn_dirty(pfn);
1550 mark_page_dirty(kvm, gfn);
1551 }
1552
1553 if (fault_status != FSC_PERM)
1554 clean_dcache_guest_page(pfn, PAGE_SIZE);
1555
1556 if (exec_fault) {
1557 new_pte = kvm_s2pte_mkexec(new_pte);
1558 invalidate_icache_guest_page(pfn, PAGE_SIZE);
1559 } else if (fault_status == FSC_PERM) {
1560
1561 if (stage2_is_exec(kvm, fault_ipa))
1562 new_pte = kvm_s2pte_mkexec(new_pte);
1563 }
1564
1565 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags);
1566 }
1567
1568out_unlock:
1569 spin_unlock(&kvm->mmu_lock);
1570 kvm_set_pfn_accessed(pfn);
1571 kvm_release_pfn_clean(pfn);
1572 return ret;
1573}
1574
1575
1576
1577
1578
1579
1580
1581
1582static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
1583{
1584 pmd_t *pmd;
1585 pte_t *pte;
1586 kvm_pfn_t pfn;
1587 bool pfn_valid = false;
1588
1589 trace_kvm_access_fault(fault_ipa);
1590
1591 spin_lock(&vcpu->kvm->mmu_lock);
1592
1593 pmd = stage2_get_pmd(vcpu->kvm, NULL, fault_ipa);
1594 if (!pmd || pmd_none(*pmd))
1595 goto out;
1596
1597 if (pmd_thp_or_huge(*pmd)) {
1598 *pmd = pmd_mkyoung(*pmd);
1599 pfn = pmd_pfn(*pmd);
1600 pfn_valid = true;
1601 goto out;
1602 }
1603
1604 pte = pte_offset_kernel(pmd, fault_ipa);
1605 if (pte_none(*pte))
1606 goto out;
1607
1608 *pte = pte_mkyoung(*pte);
1609 pfn = pte_pfn(*pte);
1610 pfn_valid = true;
1611out:
1612 spin_unlock(&vcpu->kvm->mmu_lock);
1613 if (pfn_valid)
1614 kvm_set_pfn_accessed(pfn);
1615}
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
1630{
1631 unsigned long fault_status;
1632 phys_addr_t fault_ipa;
1633 struct kvm_memory_slot *memslot;
1634 unsigned long hva;
1635 bool is_iabt, write_fault, writable;
1636 gfn_t gfn;
1637 int ret, idx;
1638
1639 fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
1640
1641 fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
1642 is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
1643
1644
1645 if (kvm_vcpu_dabt_isextabt(vcpu)) {
1646
1647
1648
1649
1650 if (!handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu)))
1651 return 1;
1652
1653 if (unlikely(!is_iabt)) {
1654 kvm_inject_vabt(vcpu);
1655 return 1;
1656 }
1657 }
1658
1659 trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
1660 kvm_vcpu_get_hfar(vcpu), fault_ipa);
1661
1662
1663 if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
1664 fault_status != FSC_ACCESS) {
1665 kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
1666 kvm_vcpu_trap_get_class(vcpu),
1667 (unsigned long)kvm_vcpu_trap_get_fault(vcpu),
1668 (unsigned long)kvm_vcpu_get_hsr(vcpu));
1669 return -EFAULT;
1670 }
1671
1672 idx = srcu_read_lock(&vcpu->kvm->srcu);
1673
1674 gfn = fault_ipa >> PAGE_SHIFT;
1675 memslot = gfn_to_memslot(vcpu->kvm, gfn);
1676 hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
1677 write_fault = kvm_is_write_fault(vcpu);
1678 if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
1679 if (is_iabt) {
1680
1681 kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
1682 ret = 1;
1683 goto out_unlock;
1684 }
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696 if (kvm_vcpu_dabt_is_cm(vcpu)) {
1697 kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
1698 ret = 1;
1699 goto out_unlock;
1700 }
1701
1702
1703
1704
1705
1706
1707
1708 fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
1709 ret = io_mem_abort(vcpu, run, fault_ipa);
1710 goto out_unlock;
1711 }
1712
1713
1714 VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE);
1715
1716 if (fault_status == FSC_ACCESS) {
1717 handle_access_fault(vcpu, fault_ipa);
1718 ret = 1;
1719 goto out_unlock;
1720 }
1721
1722 ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
1723 if (ret == 0)
1724 ret = 1;
1725out_unlock:
1726 srcu_read_unlock(&vcpu->kvm->srcu, idx);
1727 return ret;
1728}
1729
1730static int handle_hva_to_gpa(struct kvm *kvm,
1731 unsigned long start,
1732 unsigned long end,
1733 int (*handler)(struct kvm *kvm,
1734 gpa_t gpa, u64 size,
1735 void *data),
1736 void *data)
1737{
1738 struct kvm_memslots *slots;
1739 struct kvm_memory_slot *memslot;
1740 int ret = 0;
1741
1742 slots = kvm_memslots(kvm);
1743
1744
1745 kvm_for_each_memslot(memslot, slots) {
1746 unsigned long hva_start, hva_end;
1747 gfn_t gpa;
1748
1749 hva_start = max(start, memslot->userspace_addr);
1750 hva_end = min(end, memslot->userspace_addr +
1751 (memslot->npages << PAGE_SHIFT));
1752 if (hva_start >= hva_end)
1753 continue;
1754
1755 gpa = hva_to_gfn_memslot(hva_start, memslot) << PAGE_SHIFT;
1756 ret |= handler(kvm, gpa, (u64)(hva_end - hva_start), data);
1757 }
1758
1759 return ret;
1760}
1761
1762static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
1763{
1764 unmap_stage2_range(kvm, gpa, size);
1765 return 0;
1766}
1767
1768int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
1769{
1770 unsigned long end = hva + PAGE_SIZE;
1771
1772 if (!kvm->arch.pgd)
1773 return 0;
1774
1775 trace_kvm_unmap_hva(hva);
1776 handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL);
1777 return 0;
1778}
1779
1780int kvm_unmap_hva_range(struct kvm *kvm,
1781 unsigned long start, unsigned long end)
1782{
1783 if (!kvm->arch.pgd)
1784 return 0;
1785
1786 trace_kvm_unmap_hva_range(start, end);
1787 handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
1788 return 0;
1789}
1790
1791static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
1792{
1793 pte_t *pte = (pte_t *)data;
1794
1795 WARN_ON(size != PAGE_SIZE);
1796
1797
1798
1799
1800
1801
1802
1803 stage2_set_pte(kvm, NULL, gpa, pte, 0);
1804 return 0;
1805}
1806
1807
1808void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
1809{
1810 unsigned long end = hva + PAGE_SIZE;
1811 pte_t stage2_pte;
1812
1813 if (!kvm->arch.pgd)
1814 return;
1815
1816 trace_kvm_set_spte_hva(hva);
1817 stage2_pte = pfn_pte(pte_pfn(pte), PAGE_S2);
1818 handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
1819}
1820
1821static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
1822{
1823 pmd_t *pmd;
1824 pte_t *pte;
1825
1826 WARN_ON(size != PAGE_SIZE && size != PMD_SIZE);
1827 pmd = stage2_get_pmd(kvm, NULL, gpa);
1828 if (!pmd || pmd_none(*pmd))
1829 return 0;
1830
1831 if (pmd_thp_or_huge(*pmd))
1832 return stage2_pmdp_test_and_clear_young(pmd);
1833
1834 pte = pte_offset_kernel(pmd, gpa);
1835 if (pte_none(*pte))
1836 return 0;
1837
1838 return stage2_ptep_test_and_clear_young(pte);
1839}
1840
1841static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
1842{
1843 pmd_t *pmd;
1844 pte_t *pte;
1845
1846 WARN_ON(size != PAGE_SIZE && size != PMD_SIZE);
1847 pmd = stage2_get_pmd(kvm, NULL, gpa);
1848 if (!pmd || pmd_none(*pmd))
1849 return 0;
1850
1851 if (pmd_thp_or_huge(*pmd))
1852 return pmd_young(*pmd);
1853
1854 pte = pte_offset_kernel(pmd, gpa);
1855 if (!pte_none(*pte))
1856 return pte_young(*pte);
1857
1858 return 0;
1859}
1860
1861int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
1862{
1863 if (!kvm->arch.pgd)
1864 return 0;
1865 trace_kvm_age_hva(start, end);
1866 return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
1867}
1868
1869int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
1870{
1871 if (!kvm->arch.pgd)
1872 return 0;
1873 trace_kvm_test_age_hva(hva);
1874 return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL);
1875}
1876
1877void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
1878{
1879 mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
1880}
1881
1882phys_addr_t kvm_mmu_get_httbr(void)
1883{
1884 if (__kvm_cpu_uses_extended_idmap())
1885 return virt_to_phys(merged_hyp_pgd);
1886 else
1887 return virt_to_phys(hyp_pgd);
1888}
1889
1890phys_addr_t kvm_get_idmap_vector(void)
1891{
1892 return hyp_idmap_vector;
1893}
1894
1895static int kvm_map_idmap_text(pgd_t *pgd)
1896{
1897 int err;
1898
1899
1900 err = __create_hyp_mappings(pgd, __kvm_idmap_ptrs_per_pgd(),
1901 hyp_idmap_start, hyp_idmap_end,
1902 __phys_to_pfn(hyp_idmap_start),
1903 PAGE_HYP_EXEC);
1904 if (err)
1905 kvm_err("Failed to idmap %lx-%lx\n",
1906 hyp_idmap_start, hyp_idmap_end);
1907
1908 return err;
1909}
1910
1911int kvm_mmu_init(void)
1912{
1913 int err;
1914
1915 hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start);
1916 hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE);
1917 hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end);
1918 hyp_idmap_end = ALIGN(hyp_idmap_end, PAGE_SIZE);
1919 hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init);
1920
1921
1922
1923
1924
1925 BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
1926
1927 kvm_debug("IDMAP page: %lx\n", hyp_idmap_start);
1928 kvm_debug("HYP VA range: %lx:%lx\n",
1929 kern_hyp_va(PAGE_OFFSET),
1930 kern_hyp_va((unsigned long)high_memory - 1));
1931
1932 if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) &&
1933 hyp_idmap_start < kern_hyp_va((unsigned long)high_memory - 1) &&
1934 hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) {
1935
1936
1937
1938
1939 kvm_err("IDMAP intersecting with HYP VA, unable to continue\n");
1940 err = -EINVAL;
1941 goto out;
1942 }
1943
1944 hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
1945 if (!hyp_pgd) {
1946 kvm_err("Hyp mode PGD not allocated\n");
1947 err = -ENOMEM;
1948 goto out;
1949 }
1950
1951 if (__kvm_cpu_uses_extended_idmap()) {
1952 boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
1953 hyp_pgd_order);
1954 if (!boot_hyp_pgd) {
1955 kvm_err("Hyp boot PGD not allocated\n");
1956 err = -ENOMEM;
1957 goto out;
1958 }
1959
1960 err = kvm_map_idmap_text(boot_hyp_pgd);
1961 if (err)
1962 goto out;
1963
1964 merged_hyp_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
1965 if (!merged_hyp_pgd) {
1966 kvm_err("Failed to allocate extra HYP pgd\n");
1967 goto out;
1968 }
1969 __kvm_extend_hypmap(boot_hyp_pgd, hyp_pgd, merged_hyp_pgd,
1970 hyp_idmap_start);
1971 } else {
1972 err = kvm_map_idmap_text(hyp_pgd);
1973 if (err)
1974 goto out;
1975 }
1976
1977 io_map_base = hyp_idmap_start;
1978 return 0;
1979out:
1980 free_hyp_pgds();
1981 return err;
1982}
1983
1984void kvm_arch_commit_memory_region(struct kvm *kvm,
1985 const struct kvm_userspace_memory_region *mem,
1986 const struct kvm_memory_slot *old,
1987 const struct kvm_memory_slot *new,
1988 enum kvm_mr_change change)
1989{
1990
1991
1992
1993
1994
1995 if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES)
1996 kvm_mmu_wp_memory_region(kvm, mem->slot);
1997}
1998
1999int kvm_arch_prepare_memory_region(struct kvm *kvm,
2000 struct kvm_memory_slot *memslot,
2001 const struct kvm_userspace_memory_region *mem,
2002 enum kvm_mr_change change)
2003{
2004 hva_t hva = mem->userspace_addr;
2005 hva_t reg_end = hva + mem->memory_size;
2006 bool writable = !(mem->flags & KVM_MEM_READONLY);
2007 int ret = 0;
2008
2009 if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
2010 change != KVM_MR_FLAGS_ONLY)
2011 return 0;
2012
2013
2014
2015
2016
2017 if (memslot->base_gfn + memslot->npages >=
2018 (KVM_PHYS_SIZE >> PAGE_SHIFT))
2019 return -EFAULT;
2020
2021 down_read(¤t->mm->mmap_sem);
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034 do {
2035 struct vm_area_struct *vma = find_vma(current->mm, hva);
2036 hva_t vm_start, vm_end;
2037
2038 if (!vma || vma->vm_start >= reg_end)
2039 break;
2040
2041
2042
2043
2044
2045 if (writable && !(vma->vm_flags & VM_WRITE)) {
2046 ret = -EPERM;
2047 break;
2048 }
2049
2050
2051
2052
2053 vm_start = max(hva, vma->vm_start);
2054 vm_end = min(reg_end, vma->vm_end);
2055
2056 if (vma->vm_flags & VM_PFNMAP) {
2057 gpa_t gpa = mem->guest_phys_addr +
2058 (vm_start - mem->userspace_addr);
2059 phys_addr_t pa;
2060
2061 pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
2062 pa += vm_start - vma->vm_start;
2063
2064
2065 if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
2066 ret = -EINVAL;
2067 goto out;
2068 }
2069
2070 ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
2071 vm_end - vm_start,
2072 writable);
2073 if (ret)
2074 break;
2075 }
2076 hva = vm_end;
2077 } while (hva < reg_end);
2078
2079 if (change == KVM_MR_FLAGS_ONLY)
2080 goto out;
2081
2082 spin_lock(&kvm->mmu_lock);
2083 if (ret)
2084 unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
2085 else
2086 stage2_flush_memslot(kvm, memslot);
2087 spin_unlock(&kvm->mmu_lock);
2088out:
2089 up_read(¤t->mm->mmap_sem);
2090 return ret;
2091}
2092
2093void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
2094 struct kvm_memory_slot *dont)
2095{
2096}
2097
2098int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2099 unsigned long npages)
2100{
2101 return 0;
2102}
2103
2104void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots)
2105{
2106}
2107
2108void kvm_arch_flush_shadow_all(struct kvm *kvm)
2109{
2110 kvm_free_stage2_pgd(kvm);
2111}
2112
2113void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
2114 struct kvm_memory_slot *slot)
2115{
2116 gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
2117 phys_addr_t size = slot->npages << PAGE_SHIFT;
2118
2119 spin_lock(&kvm->mmu_lock);
2120 unmap_stage2_range(kvm, gpa, size);
2121 spin_unlock(&kvm->mmu_lock);
2122}
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152void kvm_set_way_flush(struct kvm_vcpu *vcpu)
2153{
2154 unsigned long hcr = *vcpu_hcr(vcpu);
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165 if (!(hcr & HCR_TVM)) {
2166 trace_kvm_set_way_flush(*vcpu_pc(vcpu),
2167 vcpu_has_cache_enabled(vcpu));
2168 stage2_flush_vm(vcpu->kvm);
2169 *vcpu_hcr(vcpu) = hcr | HCR_TVM;
2170 }
2171}
2172
2173void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled)
2174{
2175 bool now_enabled = vcpu_has_cache_enabled(vcpu);
2176
2177
2178
2179
2180
2181
2182 if (now_enabled != was_enabled)
2183 stage2_flush_vm(vcpu->kvm);
2184
2185
2186 if (now_enabled)
2187 *vcpu_hcr(vcpu) &= ~HCR_TVM;
2188
2189 trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled);
2190}
2191