1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/mman.h>
20#include <linux/kvm_host.h>
21#include <linux/io.h>
22#include <linux/hugetlb.h>
23#include <linux/sched/signal.h>
24#include <trace/events/kvm.h>
25#include <asm/pgalloc.h>
26#include <asm/cacheflush.h>
27#include <asm/kvm_arm.h>
28#include <asm/kvm_mmu.h>
29#include <asm/kvm_mmio.h>
30#include <asm/kvm_asm.h>
31#include <asm/kvm_emulate.h>
32#include <asm/virt.h>
33#include <asm/system_misc.h>
34
35#include "trace.h"
36
37static pgd_t *boot_hyp_pgd;
38static pgd_t *hyp_pgd;
39static pgd_t *merged_hyp_pgd;
40static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
41
42static unsigned long hyp_idmap_start;
43static unsigned long hyp_idmap_end;
44static phys_addr_t hyp_idmap_vector;
45
46#define S2_PGD_SIZE (PTRS_PER_S2_PGD * sizeof(pgd_t))
47#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
48
49#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0)
50#define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1)
51
52static bool memslot_is_logging(struct kvm_memory_slot *memslot)
53{
54 return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
55}
56
57
58
59
60
61
62
63void kvm_flush_remote_tlbs(struct kvm *kvm)
64{
65 kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
66}
67
68static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
69{
70 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
71}
72
73
74
75
76
77
78static void kvm_flush_dcache_pte(pte_t pte)
79{
80 __kvm_flush_dcache_pte(pte);
81}
82
83static void kvm_flush_dcache_pmd(pmd_t pmd)
84{
85 __kvm_flush_dcache_pmd(pmd);
86}
87
88static void kvm_flush_dcache_pud(pud_t pud)
89{
90 __kvm_flush_dcache_pud(pud);
91}
92
93static bool kvm_is_device_pfn(unsigned long pfn)
94{
95 return !pfn_valid(pfn);
96}
97
98
99
100
101
102
103
104
105
106
107static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
108{
109 if (!pmd_thp_or_huge(*pmd))
110 return;
111
112 pmd_clear(pmd);
113 kvm_tlb_flush_vmid_ipa(kvm, addr);
114 put_page(virt_to_page(pmd));
115}
116
117static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
118 int min, int max)
119{
120 void *page;
121
122 BUG_ON(max > KVM_NR_MEM_OBJS);
123 if (cache->nobjs >= min)
124 return 0;
125 while (cache->nobjs < max) {
126 page = (void *)__get_free_page(PGALLOC_GFP);
127 if (!page)
128 return -ENOMEM;
129 cache->objects[cache->nobjs++] = page;
130 }
131 return 0;
132}
133
134static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
135{
136 while (mc->nobjs)
137 free_page((unsigned long)mc->objects[--mc->nobjs]);
138}
139
140static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
141{
142 void *p;
143
144 BUG_ON(!mc || !mc->nobjs);
145 p = mc->objects[--mc->nobjs];
146 return p;
147}
148
149static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
150{
151 pud_t *pud_table __maybe_unused = stage2_pud_offset(pgd, 0UL);
152 stage2_pgd_clear(pgd);
153 kvm_tlb_flush_vmid_ipa(kvm, addr);
154 stage2_pud_free(pud_table);
155 put_page(virt_to_page(pgd));
156}
157
158static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
159{
160 pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(pud, 0);
161 VM_BUG_ON(stage2_pud_huge(*pud));
162 stage2_pud_clear(pud);
163 kvm_tlb_flush_vmid_ipa(kvm, addr);
164 stage2_pmd_free(pmd_table);
165 put_page(virt_to_page(pud));
166}
167
168static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
169{
170 pte_t *pte_table = pte_offset_kernel(pmd, 0);
171 VM_BUG_ON(pmd_thp_or_huge(*pmd));
172 pmd_clear(pmd);
173 kvm_tlb_flush_vmid_ipa(kvm, addr);
174 pte_free_kernel(NULL, pte_table);
175 put_page(virt_to_page(pmd));
176}
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
199 phys_addr_t addr, phys_addr_t end)
200{
201 phys_addr_t start_addr = addr;
202 pte_t *pte, *start_pte;
203
204 start_pte = pte = pte_offset_kernel(pmd, addr);
205 do {
206 if (!pte_none(*pte)) {
207 pte_t old_pte = *pte;
208
209 kvm_set_pte(pte, __pte(0));
210 kvm_tlb_flush_vmid_ipa(kvm, addr);
211
212
213 if (!kvm_is_device_pfn(pte_pfn(old_pte)))
214 kvm_flush_dcache_pte(old_pte);
215
216 put_page(virt_to_page(pte));
217 }
218 } while (pte++, addr += PAGE_SIZE, addr != end);
219
220 if (stage2_pte_table_empty(start_pte))
221 clear_stage2_pmd_entry(kvm, pmd, start_addr);
222}
223
224static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
225 phys_addr_t addr, phys_addr_t end)
226{
227 phys_addr_t next, start_addr = addr;
228 pmd_t *pmd, *start_pmd;
229
230 start_pmd = pmd = stage2_pmd_offset(pud, addr);
231 do {
232 next = stage2_pmd_addr_end(addr, end);
233 if (!pmd_none(*pmd)) {
234 if (pmd_thp_or_huge(*pmd)) {
235 pmd_t old_pmd = *pmd;
236
237 pmd_clear(pmd);
238 kvm_tlb_flush_vmid_ipa(kvm, addr);
239
240 kvm_flush_dcache_pmd(old_pmd);
241
242 put_page(virt_to_page(pmd));
243 } else {
244 unmap_stage2_ptes(kvm, pmd, addr, next);
245 }
246 }
247 } while (pmd++, addr = next, addr != end);
248
249 if (stage2_pmd_table_empty(start_pmd))
250 clear_stage2_pud_entry(kvm, pud, start_addr);
251}
252
253static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd,
254 phys_addr_t addr, phys_addr_t end)
255{
256 phys_addr_t next, start_addr = addr;
257 pud_t *pud, *start_pud;
258
259 start_pud = pud = stage2_pud_offset(pgd, addr);
260 do {
261 next = stage2_pud_addr_end(addr, end);
262 if (!stage2_pud_none(*pud)) {
263 if (stage2_pud_huge(*pud)) {
264 pud_t old_pud = *pud;
265
266 stage2_pud_clear(pud);
267 kvm_tlb_flush_vmid_ipa(kvm, addr);
268 kvm_flush_dcache_pud(old_pud);
269 put_page(virt_to_page(pud));
270 } else {
271 unmap_stage2_pmds(kvm, pud, addr, next);
272 }
273 }
274 } while (pud++, addr = next, addr != end);
275
276 if (stage2_pud_table_empty(start_pud))
277 clear_stage2_pgd_entry(kvm, pgd, start_addr);
278}
279
280
281
282
283
284
285
286
287
288
289
290
291static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
292{
293 pgd_t *pgd;
294 phys_addr_t addr = start, end = start + size;
295 phys_addr_t next;
296
297 assert_spin_locked(&kvm->mmu_lock);
298 pgd = kvm->arch.pgd + stage2_pgd_index(addr);
299 do {
300
301
302
303
304
305 if (!READ_ONCE(kvm->arch.pgd))
306 break;
307 next = stage2_pgd_addr_end(addr, end);
308 if (!stage2_pgd_none(*pgd))
309 unmap_stage2_puds(kvm, pgd, addr, next);
310
311
312
313
314 if (next != end)
315 cond_resched_lock(&kvm->mmu_lock);
316 } while (pgd++, addr = next, addr != end);
317}
318
319static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
320 phys_addr_t addr, phys_addr_t end)
321{
322 pte_t *pte;
323
324 pte = pte_offset_kernel(pmd, addr);
325 do {
326 if (!pte_none(*pte) && !kvm_is_device_pfn(pte_pfn(*pte)))
327 kvm_flush_dcache_pte(*pte);
328 } while (pte++, addr += PAGE_SIZE, addr != end);
329}
330
331static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
332 phys_addr_t addr, phys_addr_t end)
333{
334 pmd_t *pmd;
335 phys_addr_t next;
336
337 pmd = stage2_pmd_offset(pud, addr);
338 do {
339 next = stage2_pmd_addr_end(addr, end);
340 if (!pmd_none(*pmd)) {
341 if (pmd_thp_or_huge(*pmd))
342 kvm_flush_dcache_pmd(*pmd);
343 else
344 stage2_flush_ptes(kvm, pmd, addr, next);
345 }
346 } while (pmd++, addr = next, addr != end);
347}
348
349static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
350 phys_addr_t addr, phys_addr_t end)
351{
352 pud_t *pud;
353 phys_addr_t next;
354
355 pud = stage2_pud_offset(pgd, addr);
356 do {
357 next = stage2_pud_addr_end(addr, end);
358 if (!stage2_pud_none(*pud)) {
359 if (stage2_pud_huge(*pud))
360 kvm_flush_dcache_pud(*pud);
361 else
362 stage2_flush_pmds(kvm, pud, addr, next);
363 }
364 } while (pud++, addr = next, addr != end);
365}
366
367static void stage2_flush_memslot(struct kvm *kvm,
368 struct kvm_memory_slot *memslot)
369{
370 phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
371 phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
372 phys_addr_t next;
373 pgd_t *pgd;
374
375 pgd = kvm->arch.pgd + stage2_pgd_index(addr);
376 do {
377 next = stage2_pgd_addr_end(addr, end);
378 stage2_flush_puds(kvm, pgd, addr, next);
379 } while (pgd++, addr = next, addr != end);
380}
381
382
383
384
385
386
387
388
389static void stage2_flush_vm(struct kvm *kvm)
390{
391 struct kvm_memslots *slots;
392 struct kvm_memory_slot *memslot;
393 int idx;
394
395 idx = srcu_read_lock(&kvm->srcu);
396 spin_lock(&kvm->mmu_lock);
397
398 slots = kvm_memslots(kvm);
399 kvm_for_each_memslot(memslot, slots)
400 stage2_flush_memslot(kvm, memslot);
401
402 spin_unlock(&kvm->mmu_lock);
403 srcu_read_unlock(&kvm->srcu, idx);
404}
405
406static void clear_hyp_pgd_entry(pgd_t *pgd)
407{
408 pud_t *pud_table __maybe_unused = pud_offset(pgd, 0UL);
409 pgd_clear(pgd);
410 pud_free(NULL, pud_table);
411 put_page(virt_to_page(pgd));
412}
413
414static void clear_hyp_pud_entry(pud_t *pud)
415{
416 pmd_t *pmd_table __maybe_unused = pmd_offset(pud, 0);
417 VM_BUG_ON(pud_huge(*pud));
418 pud_clear(pud);
419 pmd_free(NULL, pmd_table);
420 put_page(virt_to_page(pud));
421}
422
423static void clear_hyp_pmd_entry(pmd_t *pmd)
424{
425 pte_t *pte_table = pte_offset_kernel(pmd, 0);
426 VM_BUG_ON(pmd_thp_or_huge(*pmd));
427 pmd_clear(pmd);
428 pte_free_kernel(NULL, pte_table);
429 put_page(virt_to_page(pmd));
430}
431
432static void unmap_hyp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
433{
434 pte_t *pte, *start_pte;
435
436 start_pte = pte = pte_offset_kernel(pmd, addr);
437 do {
438 if (!pte_none(*pte)) {
439 kvm_set_pte(pte, __pte(0));
440 put_page(virt_to_page(pte));
441 }
442 } while (pte++, addr += PAGE_SIZE, addr != end);
443
444 if (hyp_pte_table_empty(start_pte))
445 clear_hyp_pmd_entry(pmd);
446}
447
448static void unmap_hyp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
449{
450 phys_addr_t next;
451 pmd_t *pmd, *start_pmd;
452
453 start_pmd = pmd = pmd_offset(pud, addr);
454 do {
455 next = pmd_addr_end(addr, end);
456
457 if (!pmd_none(*pmd))
458 unmap_hyp_ptes(pmd, addr, next);
459 } while (pmd++, addr = next, addr != end);
460
461 if (hyp_pmd_table_empty(start_pmd))
462 clear_hyp_pud_entry(pud);
463}
464
465static void unmap_hyp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
466{
467 phys_addr_t next;
468 pud_t *pud, *start_pud;
469
470 start_pud = pud = pud_offset(pgd, addr);
471 do {
472 next = pud_addr_end(addr, end);
473
474 if (!pud_none(*pud))
475 unmap_hyp_pmds(pud, addr, next);
476 } while (pud++, addr = next, addr != end);
477
478 if (hyp_pud_table_empty(start_pud))
479 clear_hyp_pgd_entry(pgd);
480}
481
482static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
483{
484 pgd_t *pgd;
485 phys_addr_t addr = start, end = start + size;
486 phys_addr_t next;
487
488
489
490
491
492 pgd = pgdp + pgd_index(addr);
493 do {
494 next = pgd_addr_end(addr, end);
495 if (!pgd_none(*pgd))
496 unmap_hyp_puds(pgd, addr, next);
497 } while (pgd++, addr = next, addr != end);
498}
499
500
501
502
503
504
505
506
507
508
509
510void free_hyp_pgds(void)
511{
512 unsigned long addr;
513
514 mutex_lock(&kvm_hyp_pgd_mutex);
515
516 if (boot_hyp_pgd) {
517 unmap_hyp_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
518 free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
519 boot_hyp_pgd = NULL;
520 }
521
522 if (hyp_pgd) {
523 unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE);
524 for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
525 unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE);
526 for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
527 unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE);
528
529 free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
530 hyp_pgd = NULL;
531 }
532 if (merged_hyp_pgd) {
533 clear_page(merged_hyp_pgd);
534 free_page((unsigned long)merged_hyp_pgd);
535 merged_hyp_pgd = NULL;
536 }
537
538 mutex_unlock(&kvm_hyp_pgd_mutex);
539}
540
541static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
542 unsigned long end, unsigned long pfn,
543 pgprot_t prot)
544{
545 pte_t *pte;
546 unsigned long addr;
547
548 addr = start;
549 do {
550 pte = pte_offset_kernel(pmd, addr);
551 kvm_set_pte(pte, pfn_pte(pfn, prot));
552 get_page(virt_to_page(pte));
553 kvm_flush_dcache_to_poc(pte, sizeof(*pte));
554 pfn++;
555 } while (addr += PAGE_SIZE, addr != end);
556}
557
558static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
559 unsigned long end, unsigned long pfn,
560 pgprot_t prot)
561{
562 pmd_t *pmd;
563 pte_t *pte;
564 unsigned long addr, next;
565
566 addr = start;
567 do {
568 pmd = pmd_offset(pud, addr);
569
570 BUG_ON(pmd_sect(*pmd));
571
572 if (pmd_none(*pmd)) {
573 pte = pte_alloc_one_kernel(NULL, addr);
574 if (!pte) {
575 kvm_err("Cannot allocate Hyp pte\n");
576 return -ENOMEM;
577 }
578 pmd_populate_kernel(NULL, pmd, pte);
579 get_page(virt_to_page(pmd));
580 kvm_flush_dcache_to_poc(pmd, sizeof(*pmd));
581 }
582
583 next = pmd_addr_end(addr, end);
584
585 create_hyp_pte_mappings(pmd, addr, next, pfn, prot);
586 pfn += (next - addr) >> PAGE_SHIFT;
587 } while (addr = next, addr != end);
588
589 return 0;
590}
591
592static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start,
593 unsigned long end, unsigned long pfn,
594 pgprot_t prot)
595{
596 pud_t *pud;
597 pmd_t *pmd;
598 unsigned long addr, next;
599 int ret;
600
601 addr = start;
602 do {
603 pud = pud_offset(pgd, addr);
604
605 if (pud_none_or_clear_bad(pud)) {
606 pmd = pmd_alloc_one(NULL, addr);
607 if (!pmd) {
608 kvm_err("Cannot allocate Hyp pmd\n");
609 return -ENOMEM;
610 }
611 pud_populate(NULL, pud, pmd);
612 get_page(virt_to_page(pud));
613 kvm_flush_dcache_to_poc(pud, sizeof(*pud));
614 }
615
616 next = pud_addr_end(addr, end);
617 ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
618 if (ret)
619 return ret;
620 pfn += (next - addr) >> PAGE_SHIFT;
621 } while (addr = next, addr != end);
622
623 return 0;
624}
625
626static int __create_hyp_mappings(pgd_t *pgdp,
627 unsigned long start, unsigned long end,
628 unsigned long pfn, pgprot_t prot)
629{
630 pgd_t *pgd;
631 pud_t *pud;
632 unsigned long addr, next;
633 int err = 0;
634
635 mutex_lock(&kvm_hyp_pgd_mutex);
636 addr = start & PAGE_MASK;
637 end = PAGE_ALIGN(end);
638 do {
639 pgd = pgdp + pgd_index(addr);
640
641 if (pgd_none(*pgd)) {
642 pud = pud_alloc_one(NULL, addr);
643 if (!pud) {
644 kvm_err("Cannot allocate Hyp pud\n");
645 err = -ENOMEM;
646 goto out;
647 }
648 pgd_populate(NULL, pgd, pud);
649 get_page(virt_to_page(pgd));
650 kvm_flush_dcache_to_poc(pgd, sizeof(*pgd));
651 }
652
653 next = pgd_addr_end(addr, end);
654 err = create_hyp_pud_mappings(pgd, addr, next, pfn, prot);
655 if (err)
656 goto out;
657 pfn += (next - addr) >> PAGE_SHIFT;
658 } while (addr = next, addr != end);
659out:
660 mutex_unlock(&kvm_hyp_pgd_mutex);
661 return err;
662}
663
664static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
665{
666 if (!is_vmalloc_addr(kaddr)) {
667 BUG_ON(!virt_addr_valid(kaddr));
668 return __pa(kaddr);
669 } else {
670 return page_to_phys(vmalloc_to_page(kaddr)) +
671 offset_in_page(kaddr);
672 }
673}
674
675
676
677
678
679
680
681
682
683
684
685int create_hyp_mappings(void *from, void *to, pgprot_t prot)
686{
687 phys_addr_t phys_addr;
688 unsigned long virt_addr;
689 unsigned long start = kern_hyp_va((unsigned long)from);
690 unsigned long end = kern_hyp_va((unsigned long)to);
691
692 if (is_kernel_in_hyp_mode())
693 return 0;
694
695 start = start & PAGE_MASK;
696 end = PAGE_ALIGN(end);
697
698 for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) {
699 int err;
700
701 phys_addr = kvm_kaddr_to_phys(from + virt_addr - start);
702 err = __create_hyp_mappings(hyp_pgd, virt_addr,
703 virt_addr + PAGE_SIZE,
704 __phys_to_pfn(phys_addr),
705 prot);
706 if (err)
707 return err;
708 }
709
710 return 0;
711}
712
713
714
715
716
717
718
719
720
721
722int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
723{
724 unsigned long start = kern_hyp_va((unsigned long)from);
725 unsigned long end = kern_hyp_va((unsigned long)to);
726
727 if (is_kernel_in_hyp_mode())
728 return 0;
729
730
731 if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))
732 return -EINVAL;
733
734 return __create_hyp_mappings(hyp_pgd, start, end,
735 __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
736}
737
738
739
740
741
742
743
744
745
746
747
748
749int kvm_alloc_stage2_pgd(struct kvm *kvm)
750{
751 pgd_t *pgd;
752
753 if (kvm->arch.pgd != NULL) {
754 kvm_err("kvm_arch already initialized?\n");
755 return -EINVAL;
756 }
757
758
759 pgd = alloc_pages_exact(S2_PGD_SIZE, GFP_KERNEL | __GFP_ZERO);
760 if (!pgd)
761 return -ENOMEM;
762
763 kvm->arch.pgd = pgd;
764 return 0;
765}
766
767static void stage2_unmap_memslot(struct kvm *kvm,
768 struct kvm_memory_slot *memslot)
769{
770 hva_t hva = memslot->userspace_addr;
771 phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
772 phys_addr_t size = PAGE_SIZE * memslot->npages;
773 hva_t reg_end = hva + size;
774
775
776
777
778
779
780
781
782
783
784
785
786
787 do {
788 struct vm_area_struct *vma = find_vma(current->mm, hva);
789 hva_t vm_start, vm_end;
790
791 if (!vma || vma->vm_start >= reg_end)
792 break;
793
794
795
796
797 vm_start = max(hva, vma->vm_start);
798 vm_end = min(reg_end, vma->vm_end);
799
800 if (!(vma->vm_flags & VM_PFNMAP)) {
801 gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
802 unmap_stage2_range(kvm, gpa, vm_end - vm_start);
803 }
804 hva = vm_end;
805 } while (hva < reg_end);
806}
807
808
809
810
811
812
813
814
815void stage2_unmap_vm(struct kvm *kvm)
816{
817 struct kvm_memslots *slots;
818 struct kvm_memory_slot *memslot;
819 int idx;
820
821 idx = srcu_read_lock(&kvm->srcu);
822 down_read(¤t->mm->mmap_sem);
823 spin_lock(&kvm->mmu_lock);
824
825 slots = kvm_memslots(kvm);
826 kvm_for_each_memslot(memslot, slots)
827 stage2_unmap_memslot(kvm, memslot);
828
829 spin_unlock(&kvm->mmu_lock);
830 up_read(¤t->mm->mmap_sem);
831 srcu_read_unlock(&kvm->srcu, idx);
832}
833
834
835
836
837
838
839
840
841
842void kvm_free_stage2_pgd(struct kvm *kvm)
843{
844 void *pgd = NULL;
845
846 spin_lock(&kvm->mmu_lock);
847 if (kvm->arch.pgd) {
848 unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
849 pgd = READ_ONCE(kvm->arch.pgd);
850 kvm->arch.pgd = NULL;
851 }
852 spin_unlock(&kvm->mmu_lock);
853
854
855 if (pgd)
856 free_pages_exact(pgd, S2_PGD_SIZE);
857}
858
859static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
860 phys_addr_t addr)
861{
862 pgd_t *pgd;
863 pud_t *pud;
864
865 pgd = kvm->arch.pgd + stage2_pgd_index(addr);
866 if (WARN_ON(stage2_pgd_none(*pgd))) {
867 if (!cache)
868 return NULL;
869 pud = mmu_memory_cache_alloc(cache);
870 stage2_pgd_populate(pgd, pud);
871 get_page(virt_to_page(pgd));
872 }
873
874 return stage2_pud_offset(pgd, addr);
875}
876
877static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
878 phys_addr_t addr)
879{
880 pud_t *pud;
881 pmd_t *pmd;
882
883 pud = stage2_get_pud(kvm, cache, addr);
884 if (!pud)
885 return NULL;
886
887 if (stage2_pud_none(*pud)) {
888 if (!cache)
889 return NULL;
890 pmd = mmu_memory_cache_alloc(cache);
891 stage2_pud_populate(pud, pmd);
892 get_page(virt_to_page(pud));
893 }
894
895 return stage2_pmd_offset(pud, addr);
896}
897
898static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
899 *cache, phys_addr_t addr, const pmd_t *new_pmd)
900{
901 pmd_t *pmd, old_pmd;
902
903 pmd = stage2_get_pmd(kvm, cache, addr);
904 VM_BUG_ON(!pmd);
905
906
907
908
909
910
911
912
913
914
915 VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
916
917 old_pmd = *pmd;
918 if (pmd_present(old_pmd)) {
919 pmd_clear(pmd);
920 kvm_tlb_flush_vmid_ipa(kvm, addr);
921 } else {
922 get_page(virt_to_page(pmd));
923 }
924
925 kvm_set_pmd(pmd, *new_pmd);
926 return 0;
927}
928
929static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
930 phys_addr_t addr, const pte_t *new_pte,
931 unsigned long flags)
932{
933 pmd_t *pmd;
934 pte_t *pte, old_pte;
935 bool iomap = flags & KVM_S2PTE_FLAG_IS_IOMAP;
936 bool logging_active = flags & KVM_S2_FLAG_LOGGING_ACTIVE;
937
938 VM_BUG_ON(logging_active && !cache);
939
940
941 pmd = stage2_get_pmd(kvm, cache, addr);
942 if (!pmd) {
943
944
945
946
947 return 0;
948 }
949
950
951
952
953
954 if (logging_active)
955 stage2_dissolve_pmd(kvm, addr, pmd);
956
957
958 if (pmd_none(*pmd)) {
959 if (!cache)
960 return 0;
961 pte = mmu_memory_cache_alloc(cache);
962 pmd_populate_kernel(NULL, pmd, pte);
963 get_page(virt_to_page(pmd));
964 }
965
966 pte = pte_offset_kernel(pmd, addr);
967
968 if (iomap && pte_present(*pte))
969 return -EFAULT;
970
971
972 old_pte = *pte;
973 if (pte_present(old_pte)) {
974 kvm_set_pte(pte, __pte(0));
975 kvm_tlb_flush_vmid_ipa(kvm, addr);
976 } else {
977 get_page(virt_to_page(pte));
978 }
979
980 kvm_set_pte(pte, *new_pte);
981 return 0;
982}
983
984#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
985static int stage2_ptep_test_and_clear_young(pte_t *pte)
986{
987 if (pte_young(*pte)) {
988 *pte = pte_mkold(*pte);
989 return 1;
990 }
991 return 0;
992}
993#else
994static int stage2_ptep_test_and_clear_young(pte_t *pte)
995{
996 return __ptep_test_and_clear_young(pte);
997}
998#endif
999
1000static int stage2_pmdp_test_and_clear_young(pmd_t *pmd)
1001{
1002 return stage2_ptep_test_and_clear_young((pte_t *)pmd);
1003}
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
1014 phys_addr_t pa, unsigned long size, bool writable)
1015{
1016 phys_addr_t addr, end;
1017 int ret = 0;
1018 unsigned long pfn;
1019 struct kvm_mmu_memory_cache cache = { 0, };
1020
1021 end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK;
1022 pfn = __phys_to_pfn(pa);
1023
1024 for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
1025 pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
1026
1027 if (writable)
1028 pte = kvm_s2pte_mkwrite(pte);
1029
1030 ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES,
1031 KVM_NR_MEM_OBJS);
1032 if (ret)
1033 goto out;
1034 spin_lock(&kvm->mmu_lock);
1035 ret = stage2_set_pte(kvm, &cache, addr, &pte,
1036 KVM_S2PTE_FLAG_IS_IOMAP);
1037 spin_unlock(&kvm->mmu_lock);
1038 if (ret)
1039 goto out;
1040
1041 pfn++;
1042 }
1043
1044out:
1045 mmu_free_memory_cache(&cache);
1046 return ret;
1047}
1048
1049static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap)
1050{
1051 kvm_pfn_t pfn = *pfnp;
1052 gfn_t gfn = *ipap >> PAGE_SHIFT;
1053
1054 if (PageTransCompoundMap(pfn_to_page(pfn))) {
1055 unsigned long mask;
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074 mask = PTRS_PER_PMD - 1;
1075 VM_BUG_ON((gfn & mask) != (pfn & mask));
1076 if (pfn & mask) {
1077 *ipap &= PMD_MASK;
1078 kvm_release_pfn_clean(pfn);
1079 pfn &= ~mask;
1080 kvm_get_pfn(pfn);
1081 *pfnp = pfn;
1082 }
1083
1084 return true;
1085 }
1086
1087 return false;
1088}
1089
1090static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
1091{
1092 if (kvm_vcpu_trap_is_iabt(vcpu))
1093 return false;
1094
1095 return kvm_vcpu_dabt_iswrite(vcpu);
1096}
1097
1098
1099
1100
1101
1102
1103
1104static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
1105{
1106 pte_t *pte;
1107
1108 pte = pte_offset_kernel(pmd, addr);
1109 do {
1110 if (!pte_none(*pte)) {
1111 if (!kvm_s2pte_readonly(pte))
1112 kvm_set_s2pte_readonly(pte);
1113 }
1114 } while (pte++, addr += PAGE_SIZE, addr != end);
1115}
1116
1117
1118
1119
1120
1121
1122
1123static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
1124{
1125 pmd_t *pmd;
1126 phys_addr_t next;
1127
1128 pmd = stage2_pmd_offset(pud, addr);
1129
1130 do {
1131 next = stage2_pmd_addr_end(addr, end);
1132 if (!pmd_none(*pmd)) {
1133 if (pmd_thp_or_huge(*pmd)) {
1134 if (!kvm_s2pmd_readonly(pmd))
1135 kvm_set_s2pmd_readonly(pmd);
1136 } else {
1137 stage2_wp_ptes(pmd, addr, next);
1138 }
1139 }
1140 } while (pmd++, addr = next, addr != end);
1141}
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151static void stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
1152{
1153 pud_t *pud;
1154 phys_addr_t next;
1155
1156 pud = stage2_pud_offset(pgd, addr);
1157 do {
1158 next = stage2_pud_addr_end(addr, end);
1159 if (!stage2_pud_none(*pud)) {
1160
1161 BUG_ON(stage2_pud_huge(*pud));
1162 stage2_wp_pmds(pud, addr, next);
1163 }
1164 } while (pud++, addr = next, addr != end);
1165}
1166
1167
1168
1169
1170
1171
1172
1173static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
1174{
1175 pgd_t *pgd;
1176 phys_addr_t next;
1177
1178 pgd = kvm->arch.pgd + stage2_pgd_index(addr);
1179 do {
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189 cond_resched_lock(&kvm->mmu_lock);
1190 if (!READ_ONCE(kvm->arch.pgd))
1191 break;
1192 next = stage2_pgd_addr_end(addr, end);
1193 if (stage2_pgd_present(*pgd))
1194 stage2_wp_puds(pgd, addr, next);
1195 } while (pgd++, addr = next, addr != end);
1196}
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
1212{
1213 struct kvm_memslots *slots = kvm_memslots(kvm);
1214 struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
1215 phys_addr_t start = memslot->base_gfn << PAGE_SHIFT;
1216 phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
1217
1218 spin_lock(&kvm->mmu_lock);
1219 stage2_wp_range(kvm, start, end);
1220 spin_unlock(&kvm->mmu_lock);
1221 kvm_flush_remote_tlbs(kvm);
1222}
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
1236 struct kvm_memory_slot *slot,
1237 gfn_t gfn_offset, unsigned long mask)
1238{
1239 phys_addr_t base_gfn = slot->base_gfn + gfn_offset;
1240 phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT;
1241 phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
1242
1243 stage2_wp_range(kvm, start, end);
1244}
1245
1246
1247
1248
1249
1250
1251
1252
1253void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
1254 struct kvm_memory_slot *slot,
1255 gfn_t gfn_offset, unsigned long mask)
1256{
1257 kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
1258}
1259
1260static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn,
1261 unsigned long size)
1262{
1263 __coherent_cache_guest_page(vcpu, pfn, size);
1264}
1265
1266static void kvm_send_hwpoison_signal(unsigned long address,
1267 struct vm_area_struct *vma)
1268{
1269 siginfo_t info;
1270
1271 info.si_signo = SIGBUS;
1272 info.si_errno = 0;
1273 info.si_code = BUS_MCEERR_AR;
1274 info.si_addr = (void __user *)address;
1275
1276 if (is_vm_hugetlb_page(vma))
1277 info.si_addr_lsb = huge_page_shift(hstate_vma(vma));
1278 else
1279 info.si_addr_lsb = PAGE_SHIFT;
1280
1281 send_sig_info(SIGBUS, &info, current);
1282}
1283
1284static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
1285 struct kvm_memory_slot *memslot, unsigned long hva,
1286 unsigned long fault_status)
1287{
1288 int ret;
1289 bool write_fault, writable, hugetlb = false, force_pte = false;
1290 unsigned long mmu_seq;
1291 gfn_t gfn = fault_ipa >> PAGE_SHIFT;
1292 struct kvm *kvm = vcpu->kvm;
1293 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
1294 struct vm_area_struct *vma;
1295 kvm_pfn_t pfn;
1296 pgprot_t mem_type = PAGE_S2;
1297 bool logging_active = memslot_is_logging(memslot);
1298 unsigned long flags = 0;
1299
1300 write_fault = kvm_is_write_fault(vcpu);
1301 if (fault_status == FSC_PERM && !write_fault) {
1302 kvm_err("Unexpected L2 read permission error\n");
1303 return -EFAULT;
1304 }
1305
1306
1307 down_read(¤t->mm->mmap_sem);
1308 vma = find_vma_intersection(current->mm, hva, hva + 1);
1309 if (unlikely(!vma)) {
1310 kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
1311 up_read(¤t->mm->mmap_sem);
1312 return -EFAULT;
1313 }
1314
1315 if (is_vm_hugetlb_page(vma) && !logging_active) {
1316 hugetlb = true;
1317 gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
1318 } else {
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328 if ((memslot->userspace_addr & ~PMD_MASK) !=
1329 ((memslot->base_gfn << PAGE_SHIFT) & ~PMD_MASK))
1330 force_pte = true;
1331 }
1332 up_read(¤t->mm->mmap_sem);
1333
1334
1335 ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
1336 KVM_NR_MEM_OBJS);
1337 if (ret)
1338 return ret;
1339
1340 mmu_seq = vcpu->kvm->mmu_notifier_seq;
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350 smp_rmb();
1351
1352 pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
1353 if (pfn == KVM_PFN_ERR_HWPOISON) {
1354 kvm_send_hwpoison_signal(hva, vma);
1355 return 0;
1356 }
1357 if (is_error_noslot_pfn(pfn))
1358 return -EFAULT;
1359
1360 if (kvm_is_device_pfn(pfn)) {
1361 mem_type = PAGE_S2_DEVICE;
1362 flags |= KVM_S2PTE_FLAG_IS_IOMAP;
1363 } else if (logging_active) {
1364
1365
1366
1367
1368
1369 force_pte = true;
1370 flags |= KVM_S2_FLAG_LOGGING_ACTIVE;
1371
1372
1373
1374
1375
1376 if (!write_fault)
1377 writable = false;
1378 }
1379
1380 spin_lock(&kvm->mmu_lock);
1381 if (mmu_notifier_retry(kvm, mmu_seq))
1382 goto out_unlock;
1383
1384 if (!hugetlb && !force_pte)
1385 hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
1386
1387 if (hugetlb) {
1388 pmd_t new_pmd = pfn_pmd(pfn, mem_type);
1389 new_pmd = pmd_mkhuge(new_pmd);
1390 if (writable) {
1391 new_pmd = kvm_s2pmd_mkwrite(new_pmd);
1392 kvm_set_pfn_dirty(pfn);
1393 }
1394 coherent_cache_guest_page(vcpu, pfn, PMD_SIZE);
1395 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
1396 } else {
1397 pte_t new_pte = pfn_pte(pfn, mem_type);
1398
1399 if (writable) {
1400 new_pte = kvm_s2pte_mkwrite(new_pte);
1401 kvm_set_pfn_dirty(pfn);
1402 mark_page_dirty(kvm, gfn);
1403 }
1404 coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE);
1405 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags);
1406 }
1407
1408out_unlock:
1409 spin_unlock(&kvm->mmu_lock);
1410 kvm_set_pfn_accessed(pfn);
1411 kvm_release_pfn_clean(pfn);
1412 return ret;
1413}
1414
1415
1416
1417
1418
1419
1420
1421
1422static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
1423{
1424 pmd_t *pmd;
1425 pte_t *pte;
1426 kvm_pfn_t pfn;
1427 bool pfn_valid = false;
1428
1429 trace_kvm_access_fault(fault_ipa);
1430
1431 spin_lock(&vcpu->kvm->mmu_lock);
1432
1433 pmd = stage2_get_pmd(vcpu->kvm, NULL, fault_ipa);
1434 if (!pmd || pmd_none(*pmd))
1435 goto out;
1436
1437 if (pmd_thp_or_huge(*pmd)) {
1438 *pmd = pmd_mkyoung(*pmd);
1439 pfn = pmd_pfn(*pmd);
1440 pfn_valid = true;
1441 goto out;
1442 }
1443
1444 pte = pte_offset_kernel(pmd, fault_ipa);
1445 if (pte_none(*pte))
1446 goto out;
1447
1448 *pte = pte_mkyoung(*pte);
1449 pfn = pte_pfn(*pte);
1450 pfn_valid = true;
1451out:
1452 spin_unlock(&vcpu->kvm->mmu_lock);
1453 if (pfn_valid)
1454 kvm_set_pfn_accessed(pfn);
1455}
1456
1457static bool is_abort_sea(unsigned long fault_status)
1458{
1459 switch (fault_status) {
1460 case FSC_SEA:
1461 case FSC_SEA_TTW0:
1462 case FSC_SEA_TTW1:
1463 case FSC_SEA_TTW2:
1464 case FSC_SEA_TTW3:
1465 case FSC_SECC:
1466 case FSC_SECC_TTW0:
1467 case FSC_SECC_TTW1:
1468 case FSC_SECC_TTW2:
1469 case FSC_SECC_TTW3:
1470 return true;
1471 default:
1472 return false;
1473 }
1474}
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
1489{
1490 unsigned long fault_status;
1491 phys_addr_t fault_ipa;
1492 struct kvm_memory_slot *memslot;
1493 unsigned long hva;
1494 bool is_iabt, write_fault, writable;
1495 gfn_t gfn;
1496 int ret, idx;
1497
1498 fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
1499
1500 fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
1501
1502
1503
1504
1505
1506 if (is_abort_sea(fault_status)) {
1507 if (!handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu)))
1508 return 1;
1509 }
1510
1511 is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
1512 if (unlikely(!is_iabt && kvm_vcpu_dabt_isextabt(vcpu))) {
1513 kvm_inject_vabt(vcpu);
1514 return 1;
1515 }
1516
1517 trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
1518 kvm_vcpu_get_hfar(vcpu), fault_ipa);
1519
1520
1521 if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
1522 fault_status != FSC_ACCESS) {
1523 kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
1524 kvm_vcpu_trap_get_class(vcpu),
1525 (unsigned long)kvm_vcpu_trap_get_fault(vcpu),
1526 (unsigned long)kvm_vcpu_get_hsr(vcpu));
1527 return -EFAULT;
1528 }
1529
1530 idx = srcu_read_lock(&vcpu->kvm->srcu);
1531
1532 gfn = fault_ipa >> PAGE_SHIFT;
1533 memslot = gfn_to_memslot(vcpu->kvm, gfn);
1534 hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
1535 write_fault = kvm_is_write_fault(vcpu);
1536 if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
1537 if (is_iabt) {
1538
1539 kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
1540 ret = 1;
1541 goto out_unlock;
1542 }
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554 if (kvm_vcpu_dabt_is_cm(vcpu)) {
1555 kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
1556 ret = 1;
1557 goto out_unlock;
1558 }
1559
1560
1561
1562
1563
1564
1565
1566 fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
1567 ret = io_mem_abort(vcpu, run, fault_ipa);
1568 goto out_unlock;
1569 }
1570
1571
1572 VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE);
1573
1574 if (fault_status == FSC_ACCESS) {
1575 handle_access_fault(vcpu, fault_ipa);
1576 ret = 1;
1577 goto out_unlock;
1578 }
1579
1580 ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
1581 if (ret == 0)
1582 ret = 1;
1583out_unlock:
1584 srcu_read_unlock(&vcpu->kvm->srcu, idx);
1585 return ret;
1586}
1587
1588static int handle_hva_to_gpa(struct kvm *kvm,
1589 unsigned long start,
1590 unsigned long end,
1591 int (*handler)(struct kvm *kvm,
1592 gpa_t gpa, u64 size,
1593 void *data),
1594 void *data)
1595{
1596 struct kvm_memslots *slots;
1597 struct kvm_memory_slot *memslot;
1598 int ret = 0;
1599
1600 slots = kvm_memslots(kvm);
1601
1602
1603 kvm_for_each_memslot(memslot, slots) {
1604 unsigned long hva_start, hva_end;
1605 gfn_t gpa;
1606
1607 hva_start = max(start, memslot->userspace_addr);
1608 hva_end = min(end, memslot->userspace_addr +
1609 (memslot->npages << PAGE_SHIFT));
1610 if (hva_start >= hva_end)
1611 continue;
1612
1613 gpa = hva_to_gfn_memslot(hva_start, memslot) << PAGE_SHIFT;
1614 ret |= handler(kvm, gpa, (u64)(hva_end - hva_start), data);
1615 }
1616
1617 return ret;
1618}
1619
1620static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
1621{
1622 unmap_stage2_range(kvm, gpa, size);
1623 return 0;
1624}
1625
1626int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
1627{
1628 unsigned long end = hva + PAGE_SIZE;
1629
1630 if (!kvm->arch.pgd)
1631 return 0;
1632
1633 trace_kvm_unmap_hva(hva);
1634 handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL);
1635 return 0;
1636}
1637
1638int kvm_unmap_hva_range(struct kvm *kvm,
1639 unsigned long start, unsigned long end)
1640{
1641 if (!kvm->arch.pgd)
1642 return 0;
1643
1644 trace_kvm_unmap_hva_range(start, end);
1645 handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
1646 return 0;
1647}
1648
1649static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
1650{
1651 pte_t *pte = (pte_t *)data;
1652
1653 WARN_ON(size != PAGE_SIZE);
1654
1655
1656
1657
1658
1659
1660
1661 stage2_set_pte(kvm, NULL, gpa, pte, 0);
1662 return 0;
1663}
1664
1665
1666void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
1667{
1668 unsigned long end = hva + PAGE_SIZE;
1669 pte_t stage2_pte;
1670
1671 if (!kvm->arch.pgd)
1672 return;
1673
1674 trace_kvm_set_spte_hva(hva);
1675 stage2_pte = pfn_pte(pte_pfn(pte), PAGE_S2);
1676 handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
1677}
1678
1679static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
1680{
1681 pmd_t *pmd;
1682 pte_t *pte;
1683
1684 WARN_ON(size != PAGE_SIZE && size != PMD_SIZE);
1685 pmd = stage2_get_pmd(kvm, NULL, gpa);
1686 if (!pmd || pmd_none(*pmd))
1687 return 0;
1688
1689 if (pmd_thp_or_huge(*pmd))
1690 return stage2_pmdp_test_and_clear_young(pmd);
1691
1692 pte = pte_offset_kernel(pmd, gpa);
1693 if (pte_none(*pte))
1694 return 0;
1695
1696 return stage2_ptep_test_and_clear_young(pte);
1697}
1698
1699static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
1700{
1701 pmd_t *pmd;
1702 pte_t *pte;
1703
1704 WARN_ON(size != PAGE_SIZE && size != PMD_SIZE);
1705 pmd = stage2_get_pmd(kvm, NULL, gpa);
1706 if (!pmd || pmd_none(*pmd))
1707 return 0;
1708
1709 if (pmd_thp_or_huge(*pmd))
1710 return pmd_young(*pmd);
1711
1712 pte = pte_offset_kernel(pmd, gpa);
1713 if (!pte_none(*pte))
1714 return pte_young(*pte);
1715
1716 return 0;
1717}
1718
1719int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
1720{
1721 if (!kvm->arch.pgd)
1722 return 0;
1723 trace_kvm_age_hva(start, end);
1724 return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
1725}
1726
1727int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
1728{
1729 if (!kvm->arch.pgd)
1730 return 0;
1731 trace_kvm_test_age_hva(hva);
1732 return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL);
1733}
1734
1735void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
1736{
1737 mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
1738}
1739
1740phys_addr_t kvm_mmu_get_httbr(void)
1741{
1742 if (__kvm_cpu_uses_extended_idmap())
1743 return virt_to_phys(merged_hyp_pgd);
1744 else
1745 return virt_to_phys(hyp_pgd);
1746}
1747
1748phys_addr_t kvm_get_idmap_vector(void)
1749{
1750 return hyp_idmap_vector;
1751}
1752
1753static int kvm_map_idmap_text(pgd_t *pgd)
1754{
1755 int err;
1756
1757
1758 err = __create_hyp_mappings(pgd,
1759 hyp_idmap_start, hyp_idmap_end,
1760 __phys_to_pfn(hyp_idmap_start),
1761 PAGE_HYP_EXEC);
1762 if (err)
1763 kvm_err("Failed to idmap %lx-%lx\n",
1764 hyp_idmap_start, hyp_idmap_end);
1765
1766 return err;
1767}
1768
1769int kvm_mmu_init(void)
1770{
1771 int err;
1772
1773 hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start);
1774 hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end);
1775 hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init);
1776
1777
1778
1779
1780
1781 BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
1782
1783 kvm_info("IDMAP page: %lx\n", hyp_idmap_start);
1784 kvm_info("HYP VA range: %lx:%lx\n",
1785 kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL));
1786
1787 if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) &&
1788 hyp_idmap_start < kern_hyp_va(~0UL) &&
1789 hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) {
1790
1791
1792
1793
1794 kvm_err("IDMAP intersecting with HYP VA, unable to continue\n");
1795 err = -EINVAL;
1796 goto out;
1797 }
1798
1799 hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
1800 if (!hyp_pgd) {
1801 kvm_err("Hyp mode PGD not allocated\n");
1802 err = -ENOMEM;
1803 goto out;
1804 }
1805
1806 if (__kvm_cpu_uses_extended_idmap()) {
1807 boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
1808 hyp_pgd_order);
1809 if (!boot_hyp_pgd) {
1810 kvm_err("Hyp boot PGD not allocated\n");
1811 err = -ENOMEM;
1812 goto out;
1813 }
1814
1815 err = kvm_map_idmap_text(boot_hyp_pgd);
1816 if (err)
1817 goto out;
1818
1819 merged_hyp_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
1820 if (!merged_hyp_pgd) {
1821 kvm_err("Failed to allocate extra HYP pgd\n");
1822 goto out;
1823 }
1824 __kvm_extend_hypmap(boot_hyp_pgd, hyp_pgd, merged_hyp_pgd,
1825 hyp_idmap_start);
1826 } else {
1827 err = kvm_map_idmap_text(hyp_pgd);
1828 if (err)
1829 goto out;
1830 }
1831
1832 return 0;
1833out:
1834 free_hyp_pgds();
1835 return err;
1836}
1837
1838void kvm_arch_commit_memory_region(struct kvm *kvm,
1839 const struct kvm_userspace_memory_region *mem,
1840 const struct kvm_memory_slot *old,
1841 const struct kvm_memory_slot *new,
1842 enum kvm_mr_change change)
1843{
1844
1845
1846
1847
1848
1849 if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES)
1850 kvm_mmu_wp_memory_region(kvm, mem->slot);
1851}
1852
1853int kvm_arch_prepare_memory_region(struct kvm *kvm,
1854 struct kvm_memory_slot *memslot,
1855 const struct kvm_userspace_memory_region *mem,
1856 enum kvm_mr_change change)
1857{
1858 hva_t hva = mem->userspace_addr;
1859 hva_t reg_end = hva + mem->memory_size;
1860 bool writable = !(mem->flags & KVM_MEM_READONLY);
1861 int ret = 0;
1862
1863 if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
1864 change != KVM_MR_FLAGS_ONLY)
1865 return 0;
1866
1867
1868
1869
1870
1871 if (memslot->base_gfn + memslot->npages >=
1872 (KVM_PHYS_SIZE >> PAGE_SHIFT))
1873 return -EFAULT;
1874
1875 down_read(¤t->mm->mmap_sem);
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888 do {
1889 struct vm_area_struct *vma = find_vma(current->mm, hva);
1890 hva_t vm_start, vm_end;
1891
1892 if (!vma || vma->vm_start >= reg_end)
1893 break;
1894
1895
1896
1897
1898
1899 if (writable && !(vma->vm_flags & VM_WRITE)) {
1900 ret = -EPERM;
1901 break;
1902 }
1903
1904
1905
1906
1907 vm_start = max(hva, vma->vm_start);
1908 vm_end = min(reg_end, vma->vm_end);
1909
1910 if (vma->vm_flags & VM_PFNMAP) {
1911 gpa_t gpa = mem->guest_phys_addr +
1912 (vm_start - mem->userspace_addr);
1913 phys_addr_t pa;
1914
1915 pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
1916 pa += vm_start - vma->vm_start;
1917
1918
1919 if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
1920 ret = -EINVAL;
1921 goto out;
1922 }
1923
1924 ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
1925 vm_end - vm_start,
1926 writable);
1927 if (ret)
1928 break;
1929 }
1930 hva = vm_end;
1931 } while (hva < reg_end);
1932
1933 if (change == KVM_MR_FLAGS_ONLY)
1934 goto out;
1935
1936 spin_lock(&kvm->mmu_lock);
1937 if (ret)
1938 unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
1939 else
1940 stage2_flush_memslot(kvm, memslot);
1941 spin_unlock(&kvm->mmu_lock);
1942out:
1943 up_read(¤t->mm->mmap_sem);
1944 return ret;
1945}
1946
1947void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
1948 struct kvm_memory_slot *dont)
1949{
1950}
1951
1952int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
1953 unsigned long npages)
1954{
1955 return 0;
1956}
1957
1958void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots)
1959{
1960}
1961
1962void kvm_arch_flush_shadow_all(struct kvm *kvm)
1963{
1964 kvm_free_stage2_pgd(kvm);
1965}
1966
1967void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
1968 struct kvm_memory_slot *slot)
1969{
1970 gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
1971 phys_addr_t size = slot->npages << PAGE_SHIFT;
1972
1973 spin_lock(&kvm->mmu_lock);
1974 unmap_stage2_range(kvm, gpa, size);
1975 spin_unlock(&kvm->mmu_lock);
1976}
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006void kvm_set_way_flush(struct kvm_vcpu *vcpu)
2007{
2008 unsigned long hcr = vcpu_get_hcr(vcpu);
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019 if (!(hcr & HCR_TVM)) {
2020 trace_kvm_set_way_flush(*vcpu_pc(vcpu),
2021 vcpu_has_cache_enabled(vcpu));
2022 stage2_flush_vm(vcpu->kvm);
2023 vcpu_set_hcr(vcpu, hcr | HCR_TVM);
2024 }
2025}
2026
2027void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled)
2028{
2029 bool now_enabled = vcpu_has_cache_enabled(vcpu);
2030
2031
2032
2033
2034
2035
2036 if (now_enabled != was_enabled)
2037 stage2_flush_vm(vcpu->kvm);
2038
2039
2040 if (now_enabled)
2041 vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM);
2042
2043 trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled);
2044}
2045