1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/mman.h>
20#include <linux/kvm_host.h>
21#include <linux/io.h>
22#include <linux/hugetlb.h>
23#include <trace/events/kvm.h>
24#include <asm/pgalloc.h>
25#include <asm/cacheflush.h>
26#include <asm/kvm_arm.h>
27#include <asm/kvm_mmu.h>
28#include <asm/kvm_mmio.h>
29#include <asm/kvm_asm.h>
30#include <asm/kvm_emulate.h>
31#include <asm/virt.h>
32
33#include "trace.h"
34
35extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
36
37static pgd_t *boot_hyp_pgd;
38static pgd_t *hyp_pgd;
39static pgd_t *merged_hyp_pgd;
40static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
41
42static unsigned long hyp_idmap_start;
43static unsigned long hyp_idmap_end;
44static phys_addr_t hyp_idmap_vector;
45
46#define S2_PGD_SIZE (PTRS_PER_S2_PGD * sizeof(pgd_t))
47#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
48
49#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0)
50#define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1)
51
52static bool memslot_is_logging(struct kvm_memory_slot *memslot)
53{
54 return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
55}
56
57
58
59
60
61
62
63void kvm_flush_remote_tlbs(struct kvm *kvm)
64{
65 kvm_call_hyp(__kvm_tlb_flush_vmid, kvm);
66}
67
68static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
69{
70 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
71}
72
73
74
75
76
77
78static void kvm_flush_dcache_pte(pte_t pte)
79{
80 __kvm_flush_dcache_pte(pte);
81}
82
83static void kvm_flush_dcache_pmd(pmd_t pmd)
84{
85 __kvm_flush_dcache_pmd(pmd);
86}
87
88static void kvm_flush_dcache_pud(pud_t pud)
89{
90 __kvm_flush_dcache_pud(pud);
91}
92
93static bool kvm_is_device_pfn(unsigned long pfn)
94{
95 return !pfn_valid(pfn);
96}
97
98
99
100
101
102
103
104
105
106
107static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
108{
109 if (!pmd_thp_or_huge(*pmd))
110 return;
111
112 pmd_clear(pmd);
113 kvm_tlb_flush_vmid_ipa(kvm, addr);
114 put_page(virt_to_page(pmd));
115}
116
117static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
118 int min, int max)
119{
120 void *page;
121
122 BUG_ON(max > KVM_NR_MEM_OBJS);
123 if (cache->nobjs >= min)
124 return 0;
125 while (cache->nobjs < max) {
126 page = (void *)__get_free_page(PGALLOC_GFP);
127 if (!page)
128 return -ENOMEM;
129 cache->objects[cache->nobjs++] = page;
130 }
131 return 0;
132}
133
134static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
135{
136 while (mc->nobjs)
137 free_page((unsigned long)mc->objects[--mc->nobjs]);
138}
139
140static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
141{
142 void *p;
143
144 BUG_ON(!mc || !mc->nobjs);
145 p = mc->objects[--mc->nobjs];
146 return p;
147}
148
149static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
150{
151 pud_t *pud_table __maybe_unused = stage2_pud_offset(pgd, 0UL);
152 stage2_pgd_clear(pgd);
153 kvm_tlb_flush_vmid_ipa(kvm, addr);
154 stage2_pud_free(pud_table);
155 put_page(virt_to_page(pgd));
156}
157
158static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
159{
160 pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(pud, 0);
161 VM_BUG_ON(stage2_pud_huge(*pud));
162 stage2_pud_clear(pud);
163 kvm_tlb_flush_vmid_ipa(kvm, addr);
164 stage2_pmd_free(pmd_table);
165 put_page(virt_to_page(pud));
166}
167
168static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
169{
170 pte_t *pte_table = pte_offset_kernel(pmd, 0);
171 VM_BUG_ON(pmd_thp_or_huge(*pmd));
172 pmd_clear(pmd);
173 kvm_tlb_flush_vmid_ipa(kvm, addr);
174 pte_free_kernel(NULL, pte_table);
175 put_page(virt_to_page(pmd));
176}
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
199 phys_addr_t addr, phys_addr_t end)
200{
201 phys_addr_t start_addr = addr;
202 pte_t *pte, *start_pte;
203
204 start_pte = pte = pte_offset_kernel(pmd, addr);
205 do {
206 if (!pte_none(*pte)) {
207 pte_t old_pte = *pte;
208
209 kvm_set_pte(pte, __pte(0));
210 kvm_tlb_flush_vmid_ipa(kvm, addr);
211
212
213 if (!kvm_is_device_pfn(pte_pfn(old_pte)))
214 kvm_flush_dcache_pte(old_pte);
215
216 put_page(virt_to_page(pte));
217 }
218 } while (pte++, addr += PAGE_SIZE, addr != end);
219
220 if (stage2_pte_table_empty(start_pte))
221 clear_stage2_pmd_entry(kvm, pmd, start_addr);
222}
223
224static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
225 phys_addr_t addr, phys_addr_t end)
226{
227 phys_addr_t next, start_addr = addr;
228 pmd_t *pmd, *start_pmd;
229
230 start_pmd = pmd = stage2_pmd_offset(pud, addr);
231 do {
232 next = stage2_pmd_addr_end(addr, end);
233 if (!pmd_none(*pmd)) {
234 if (pmd_thp_or_huge(*pmd)) {
235 pmd_t old_pmd = *pmd;
236
237 pmd_clear(pmd);
238 kvm_tlb_flush_vmid_ipa(kvm, addr);
239
240 kvm_flush_dcache_pmd(old_pmd);
241
242 put_page(virt_to_page(pmd));
243 } else {
244 unmap_stage2_ptes(kvm, pmd, addr, next);
245 }
246 }
247 } while (pmd++, addr = next, addr != end);
248
249 if (stage2_pmd_table_empty(start_pmd))
250 clear_stage2_pud_entry(kvm, pud, start_addr);
251}
252
253static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd,
254 phys_addr_t addr, phys_addr_t end)
255{
256 phys_addr_t next, start_addr = addr;
257 pud_t *pud, *start_pud;
258
259 start_pud = pud = stage2_pud_offset(pgd, addr);
260 do {
261 next = stage2_pud_addr_end(addr, end);
262 if (!stage2_pud_none(*pud)) {
263 if (stage2_pud_huge(*pud)) {
264 pud_t old_pud = *pud;
265
266 stage2_pud_clear(pud);
267 kvm_tlb_flush_vmid_ipa(kvm, addr);
268 kvm_flush_dcache_pud(old_pud);
269 put_page(virt_to_page(pud));
270 } else {
271 unmap_stage2_pmds(kvm, pud, addr, next);
272 }
273 }
274 } while (pud++, addr = next, addr != end);
275
276 if (stage2_pud_table_empty(start_pud))
277 clear_stage2_pgd_entry(kvm, pgd, start_addr);
278}
279
280
281
282
283
284
285
286
287
288
289
290
291static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
292{
293 pgd_t *pgd;
294 phys_addr_t addr = start, end = start + size;
295 phys_addr_t next;
296
297 pgd = kvm->arch.pgd + stage2_pgd_index(addr);
298 do {
299 next = stage2_pgd_addr_end(addr, end);
300 if (!stage2_pgd_none(*pgd))
301 unmap_stage2_puds(kvm, pgd, addr, next);
302 } while (pgd++, addr = next, addr != end);
303}
304
305static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
306 phys_addr_t addr, phys_addr_t end)
307{
308 pte_t *pte;
309
310 pte = pte_offset_kernel(pmd, addr);
311 do {
312 if (!pte_none(*pte) && !kvm_is_device_pfn(pte_pfn(*pte)))
313 kvm_flush_dcache_pte(*pte);
314 } while (pte++, addr += PAGE_SIZE, addr != end);
315}
316
317static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
318 phys_addr_t addr, phys_addr_t end)
319{
320 pmd_t *pmd;
321 phys_addr_t next;
322
323 pmd = stage2_pmd_offset(pud, addr);
324 do {
325 next = stage2_pmd_addr_end(addr, end);
326 if (!pmd_none(*pmd)) {
327 if (pmd_thp_or_huge(*pmd))
328 kvm_flush_dcache_pmd(*pmd);
329 else
330 stage2_flush_ptes(kvm, pmd, addr, next);
331 }
332 } while (pmd++, addr = next, addr != end);
333}
334
335static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
336 phys_addr_t addr, phys_addr_t end)
337{
338 pud_t *pud;
339 phys_addr_t next;
340
341 pud = stage2_pud_offset(pgd, addr);
342 do {
343 next = stage2_pud_addr_end(addr, end);
344 if (!stage2_pud_none(*pud)) {
345 if (stage2_pud_huge(*pud))
346 kvm_flush_dcache_pud(*pud);
347 else
348 stage2_flush_pmds(kvm, pud, addr, next);
349 }
350 } while (pud++, addr = next, addr != end);
351}
352
353static void stage2_flush_memslot(struct kvm *kvm,
354 struct kvm_memory_slot *memslot)
355{
356 phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
357 phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
358 phys_addr_t next;
359 pgd_t *pgd;
360
361 pgd = kvm->arch.pgd + stage2_pgd_index(addr);
362 do {
363 next = stage2_pgd_addr_end(addr, end);
364 stage2_flush_puds(kvm, pgd, addr, next);
365 } while (pgd++, addr = next, addr != end);
366}
367
368
369
370
371
372
373
374
375static void stage2_flush_vm(struct kvm *kvm)
376{
377 struct kvm_memslots *slots;
378 struct kvm_memory_slot *memslot;
379 int idx;
380
381 idx = srcu_read_lock(&kvm->srcu);
382 spin_lock(&kvm->mmu_lock);
383
384 slots = kvm_memslots(kvm);
385 kvm_for_each_memslot(memslot, slots)
386 stage2_flush_memslot(kvm, memslot);
387
388 spin_unlock(&kvm->mmu_lock);
389 srcu_read_unlock(&kvm->srcu, idx);
390}
391
392static void clear_hyp_pgd_entry(pgd_t *pgd)
393{
394 pud_t *pud_table __maybe_unused = pud_offset(pgd, 0UL);
395 pgd_clear(pgd);
396 pud_free(NULL, pud_table);
397 put_page(virt_to_page(pgd));
398}
399
400static void clear_hyp_pud_entry(pud_t *pud)
401{
402 pmd_t *pmd_table __maybe_unused = pmd_offset(pud, 0);
403 VM_BUG_ON(pud_huge(*pud));
404 pud_clear(pud);
405 pmd_free(NULL, pmd_table);
406 put_page(virt_to_page(pud));
407}
408
409static void clear_hyp_pmd_entry(pmd_t *pmd)
410{
411 pte_t *pte_table = pte_offset_kernel(pmd, 0);
412 VM_BUG_ON(pmd_thp_or_huge(*pmd));
413 pmd_clear(pmd);
414 pte_free_kernel(NULL, pte_table);
415 put_page(virt_to_page(pmd));
416}
417
418static void unmap_hyp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
419{
420 pte_t *pte, *start_pte;
421
422 start_pte = pte = pte_offset_kernel(pmd, addr);
423 do {
424 if (!pte_none(*pte)) {
425 kvm_set_pte(pte, __pte(0));
426 put_page(virt_to_page(pte));
427 }
428 } while (pte++, addr += PAGE_SIZE, addr != end);
429
430 if (hyp_pte_table_empty(start_pte))
431 clear_hyp_pmd_entry(pmd);
432}
433
434static void unmap_hyp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
435{
436 phys_addr_t next;
437 pmd_t *pmd, *start_pmd;
438
439 start_pmd = pmd = pmd_offset(pud, addr);
440 do {
441 next = pmd_addr_end(addr, end);
442
443 if (!pmd_none(*pmd))
444 unmap_hyp_ptes(pmd, addr, next);
445 } while (pmd++, addr = next, addr != end);
446
447 if (hyp_pmd_table_empty(start_pmd))
448 clear_hyp_pud_entry(pud);
449}
450
451static void unmap_hyp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
452{
453 phys_addr_t next;
454 pud_t *pud, *start_pud;
455
456 start_pud = pud = pud_offset(pgd, addr);
457 do {
458 next = pud_addr_end(addr, end);
459
460 if (!pud_none(*pud))
461 unmap_hyp_pmds(pud, addr, next);
462 } while (pud++, addr = next, addr != end);
463
464 if (hyp_pud_table_empty(start_pud))
465 clear_hyp_pgd_entry(pgd);
466}
467
468static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
469{
470 pgd_t *pgd;
471 phys_addr_t addr = start, end = start + size;
472 phys_addr_t next;
473
474
475
476
477
478 pgd = pgdp + pgd_index(addr);
479 do {
480 next = pgd_addr_end(addr, end);
481 if (!pgd_none(*pgd))
482 unmap_hyp_puds(pgd, addr, next);
483 } while (pgd++, addr = next, addr != end);
484}
485
486
487
488
489
490
491void free_boot_hyp_pgd(void)
492{
493 mutex_lock(&kvm_hyp_pgd_mutex);
494
495 if (boot_hyp_pgd) {
496 unmap_hyp_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
497 unmap_hyp_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
498 free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
499 boot_hyp_pgd = NULL;
500 }
501
502 if (hyp_pgd)
503 unmap_hyp_range(hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
504
505 mutex_unlock(&kvm_hyp_pgd_mutex);
506}
507
508
509
510
511
512
513
514
515
516
517
518void free_hyp_pgds(void)
519{
520 unsigned long addr;
521
522 free_boot_hyp_pgd();
523
524 mutex_lock(&kvm_hyp_pgd_mutex);
525
526 if (hyp_pgd) {
527 for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
528 unmap_hyp_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
529 for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
530 unmap_hyp_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
531
532 free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
533 hyp_pgd = NULL;
534 }
535 if (merged_hyp_pgd) {
536 clear_page(merged_hyp_pgd);
537 free_page((unsigned long)merged_hyp_pgd);
538 merged_hyp_pgd = NULL;
539 }
540
541 mutex_unlock(&kvm_hyp_pgd_mutex);
542}
543
544static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
545 unsigned long end, unsigned long pfn,
546 pgprot_t prot)
547{
548 pte_t *pte;
549 unsigned long addr;
550
551 addr = start;
552 do {
553 pte = pte_offset_kernel(pmd, addr);
554 kvm_set_pte(pte, pfn_pte(pfn, prot));
555 get_page(virt_to_page(pte));
556 kvm_flush_dcache_to_poc(pte, sizeof(*pte));
557 pfn++;
558 } while (addr += PAGE_SIZE, addr != end);
559}
560
561static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
562 unsigned long end, unsigned long pfn,
563 pgprot_t prot)
564{
565 pmd_t *pmd;
566 pte_t *pte;
567 unsigned long addr, next;
568
569 addr = start;
570 do {
571 pmd = pmd_offset(pud, addr);
572
573 BUG_ON(pmd_sect(*pmd));
574
575 if (pmd_none(*pmd)) {
576 pte = pte_alloc_one_kernel(NULL, addr);
577 if (!pte) {
578 kvm_err("Cannot allocate Hyp pte\n");
579 return -ENOMEM;
580 }
581 pmd_populate_kernel(NULL, pmd, pte);
582 get_page(virt_to_page(pmd));
583 kvm_flush_dcache_to_poc(pmd, sizeof(*pmd));
584 }
585
586 next = pmd_addr_end(addr, end);
587
588 create_hyp_pte_mappings(pmd, addr, next, pfn, prot);
589 pfn += (next - addr) >> PAGE_SHIFT;
590 } while (addr = next, addr != end);
591
592 return 0;
593}
594
595static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start,
596 unsigned long end, unsigned long pfn,
597 pgprot_t prot)
598{
599 pud_t *pud;
600 pmd_t *pmd;
601 unsigned long addr, next;
602 int ret;
603
604 addr = start;
605 do {
606 pud = pud_offset(pgd, addr);
607
608 if (pud_none_or_clear_bad(pud)) {
609 pmd = pmd_alloc_one(NULL, addr);
610 if (!pmd) {
611 kvm_err("Cannot allocate Hyp pmd\n");
612 return -ENOMEM;
613 }
614 pud_populate(NULL, pud, pmd);
615 get_page(virt_to_page(pud));
616 kvm_flush_dcache_to_poc(pud, sizeof(*pud));
617 }
618
619 next = pud_addr_end(addr, end);
620 ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
621 if (ret)
622 return ret;
623 pfn += (next - addr) >> PAGE_SHIFT;
624 } while (addr = next, addr != end);
625
626 return 0;
627}
628
629static int __create_hyp_mappings(pgd_t *pgdp,
630 unsigned long start, unsigned long end,
631 unsigned long pfn, pgprot_t prot)
632{
633 pgd_t *pgd;
634 pud_t *pud;
635 unsigned long addr, next;
636 int err = 0;
637
638 mutex_lock(&kvm_hyp_pgd_mutex);
639 addr = start & PAGE_MASK;
640 end = PAGE_ALIGN(end);
641 do {
642 pgd = pgdp + pgd_index(addr);
643
644 if (pgd_none(*pgd)) {
645 pud = pud_alloc_one(NULL, addr);
646 if (!pud) {
647 kvm_err("Cannot allocate Hyp pud\n");
648 err = -ENOMEM;
649 goto out;
650 }
651 pgd_populate(NULL, pgd, pud);
652 get_page(virt_to_page(pgd));
653 kvm_flush_dcache_to_poc(pgd, sizeof(*pgd));
654 }
655
656 next = pgd_addr_end(addr, end);
657 err = create_hyp_pud_mappings(pgd, addr, next, pfn, prot);
658 if (err)
659 goto out;
660 pfn += (next - addr) >> PAGE_SHIFT;
661 } while (addr = next, addr != end);
662out:
663 mutex_unlock(&kvm_hyp_pgd_mutex);
664 return err;
665}
666
667static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
668{
669 if (!is_vmalloc_addr(kaddr)) {
670 BUG_ON(!virt_addr_valid(kaddr));
671 return __pa(kaddr);
672 } else {
673 return page_to_phys(vmalloc_to_page(kaddr)) +
674 offset_in_page(kaddr);
675 }
676}
677
678
679
680
681
682
683
684
685
686
687int create_hyp_mappings(void *from, void *to)
688{
689 phys_addr_t phys_addr;
690 unsigned long virt_addr;
691 unsigned long start = KERN_TO_HYP((unsigned long)from);
692 unsigned long end = KERN_TO_HYP((unsigned long)to);
693
694 if (is_kernel_in_hyp_mode())
695 return 0;
696
697 start = start & PAGE_MASK;
698 end = PAGE_ALIGN(end);
699
700 for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) {
701 int err;
702
703 phys_addr = kvm_kaddr_to_phys(from + virt_addr - start);
704 err = __create_hyp_mappings(hyp_pgd, virt_addr,
705 virt_addr + PAGE_SIZE,
706 __phys_to_pfn(phys_addr),
707 PAGE_HYP);
708 if (err)
709 return err;
710 }
711
712 return 0;
713}
714
715
716
717
718
719
720
721
722
723
724int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
725{
726 unsigned long start = KERN_TO_HYP((unsigned long)from);
727 unsigned long end = KERN_TO_HYP((unsigned long)to);
728
729 if (is_kernel_in_hyp_mode())
730 return 0;
731
732
733 if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))
734 return -EINVAL;
735
736 return __create_hyp_mappings(hyp_pgd, start, end,
737 __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
738}
739
740
741
742
743
744
745
746
747
748
749
750
751int kvm_alloc_stage2_pgd(struct kvm *kvm)
752{
753 pgd_t *pgd;
754
755 if (kvm->arch.pgd != NULL) {
756 kvm_err("kvm_arch already initialized?\n");
757 return -EINVAL;
758 }
759
760
761 pgd = alloc_pages_exact(S2_PGD_SIZE, GFP_KERNEL | __GFP_ZERO);
762 if (!pgd)
763 return -ENOMEM;
764
765 kvm_clean_pgd(pgd);
766 kvm->arch.pgd = pgd;
767 return 0;
768}
769
770static void stage2_unmap_memslot(struct kvm *kvm,
771 struct kvm_memory_slot *memslot)
772{
773 hva_t hva = memslot->userspace_addr;
774 phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
775 phys_addr_t size = PAGE_SIZE * memslot->npages;
776 hva_t reg_end = hva + size;
777
778
779
780
781
782
783
784
785
786
787
788
789
790 do {
791 struct vm_area_struct *vma = find_vma(current->mm, hva);
792 hva_t vm_start, vm_end;
793
794 if (!vma || vma->vm_start >= reg_end)
795 break;
796
797
798
799
800 vm_start = max(hva, vma->vm_start);
801 vm_end = min(reg_end, vma->vm_end);
802
803 if (!(vma->vm_flags & VM_PFNMAP)) {
804 gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
805 unmap_stage2_range(kvm, gpa, vm_end - vm_start);
806 }
807 hva = vm_end;
808 } while (hva < reg_end);
809}
810
811
812
813
814
815
816
817
818void stage2_unmap_vm(struct kvm *kvm)
819{
820 struct kvm_memslots *slots;
821 struct kvm_memory_slot *memslot;
822 int idx;
823
824 idx = srcu_read_lock(&kvm->srcu);
825 spin_lock(&kvm->mmu_lock);
826
827 slots = kvm_memslots(kvm);
828 kvm_for_each_memslot(memslot, slots)
829 stage2_unmap_memslot(kvm, memslot);
830
831 spin_unlock(&kvm->mmu_lock);
832 srcu_read_unlock(&kvm->srcu, idx);
833}
834
835
836
837
838
839
840
841
842
843
844
845
846void kvm_free_stage2_pgd(struct kvm *kvm)
847{
848 if (kvm->arch.pgd == NULL)
849 return;
850
851 unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
852
853 free_pages_exact(kvm->arch.pgd, S2_PGD_SIZE);
854 kvm->arch.pgd = NULL;
855}
856
857static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
858 phys_addr_t addr)
859{
860 pgd_t *pgd;
861 pud_t *pud;
862
863 pgd = kvm->arch.pgd + stage2_pgd_index(addr);
864 if (WARN_ON(stage2_pgd_none(*pgd))) {
865 if (!cache)
866 return NULL;
867 pud = mmu_memory_cache_alloc(cache);
868 stage2_pgd_populate(pgd, pud);
869 get_page(virt_to_page(pgd));
870 }
871
872 return stage2_pud_offset(pgd, addr);
873}
874
875static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
876 phys_addr_t addr)
877{
878 pud_t *pud;
879 pmd_t *pmd;
880
881 pud = stage2_get_pud(kvm, cache, addr);
882 if (stage2_pud_none(*pud)) {
883 if (!cache)
884 return NULL;
885 pmd = mmu_memory_cache_alloc(cache);
886 stage2_pud_populate(pud, pmd);
887 get_page(virt_to_page(pud));
888 }
889
890 return stage2_pmd_offset(pud, addr);
891}
892
893static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
894 *cache, phys_addr_t addr, const pmd_t *new_pmd)
895{
896 pmd_t *pmd, old_pmd;
897
898 pmd = stage2_get_pmd(kvm, cache, addr);
899 VM_BUG_ON(!pmd);
900
901
902
903
904
905
906
907
908
909
910 VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
911
912 old_pmd = *pmd;
913 if (pmd_present(old_pmd)) {
914 pmd_clear(pmd);
915 kvm_tlb_flush_vmid_ipa(kvm, addr);
916 } else {
917 get_page(virt_to_page(pmd));
918 }
919
920 kvm_set_pmd(pmd, *new_pmd);
921 return 0;
922}
923
924static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
925 phys_addr_t addr, const pte_t *new_pte,
926 unsigned long flags)
927{
928 pmd_t *pmd;
929 pte_t *pte, old_pte;
930 bool iomap = flags & KVM_S2PTE_FLAG_IS_IOMAP;
931 bool logging_active = flags & KVM_S2_FLAG_LOGGING_ACTIVE;
932
933 VM_BUG_ON(logging_active && !cache);
934
935
936 pmd = stage2_get_pmd(kvm, cache, addr);
937 if (!pmd) {
938
939
940
941
942 return 0;
943 }
944
945
946
947
948
949 if (logging_active)
950 stage2_dissolve_pmd(kvm, addr, pmd);
951
952
953 if (pmd_none(*pmd)) {
954 if (!cache)
955 return 0;
956 pte = mmu_memory_cache_alloc(cache);
957 kvm_clean_pte(pte);
958 pmd_populate_kernel(NULL, pmd, pte);
959 get_page(virt_to_page(pmd));
960 }
961
962 pte = pte_offset_kernel(pmd, addr);
963
964 if (iomap && pte_present(*pte))
965 return -EFAULT;
966
967
968 old_pte = *pte;
969 if (pte_present(old_pte)) {
970 kvm_set_pte(pte, __pte(0));
971 kvm_tlb_flush_vmid_ipa(kvm, addr);
972 } else {
973 get_page(virt_to_page(pte));
974 }
975
976 kvm_set_pte(pte, *new_pte);
977 return 0;
978}
979
980#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
981static int stage2_ptep_test_and_clear_young(pte_t *pte)
982{
983 if (pte_young(*pte)) {
984 *pte = pte_mkold(*pte);
985 return 1;
986 }
987 return 0;
988}
989#else
990static int stage2_ptep_test_and_clear_young(pte_t *pte)
991{
992 return __ptep_test_and_clear_young(pte);
993}
994#endif
995
996static int stage2_pmdp_test_and_clear_young(pmd_t *pmd)
997{
998 return stage2_ptep_test_and_clear_young((pte_t *)pmd);
999}
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
1010 phys_addr_t pa, unsigned long size, bool writable)
1011{
1012 phys_addr_t addr, end;
1013 int ret = 0;
1014 unsigned long pfn;
1015 struct kvm_mmu_memory_cache cache = { 0, };
1016
1017 end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK;
1018 pfn = __phys_to_pfn(pa);
1019
1020 for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
1021 pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
1022
1023 if (writable)
1024 pte = kvm_s2pte_mkwrite(pte);
1025
1026 ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES,
1027 KVM_NR_MEM_OBJS);
1028 if (ret)
1029 goto out;
1030 spin_lock(&kvm->mmu_lock);
1031 ret = stage2_set_pte(kvm, &cache, addr, &pte,
1032 KVM_S2PTE_FLAG_IS_IOMAP);
1033 spin_unlock(&kvm->mmu_lock);
1034 if (ret)
1035 goto out;
1036
1037 pfn++;
1038 }
1039
1040out:
1041 mmu_free_memory_cache(&cache);
1042 return ret;
1043}
1044
1045static bool transparent_hugepage_adjust(kvm_pfn_t *pfnp, phys_addr_t *ipap)
1046{
1047 kvm_pfn_t pfn = *pfnp;
1048 gfn_t gfn = *ipap >> PAGE_SHIFT;
1049
1050 if (PageTransCompoundMap(pfn_to_page(pfn))) {
1051 unsigned long mask;
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070 mask = PTRS_PER_PMD - 1;
1071 VM_BUG_ON((gfn & mask) != (pfn & mask));
1072 if (pfn & mask) {
1073 *ipap &= PMD_MASK;
1074 kvm_release_pfn_clean(pfn);
1075 pfn &= ~mask;
1076 kvm_get_pfn(pfn);
1077 *pfnp = pfn;
1078 }
1079
1080 return true;
1081 }
1082
1083 return false;
1084}
1085
1086static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
1087{
1088 if (kvm_vcpu_trap_is_iabt(vcpu))
1089 return false;
1090
1091 return kvm_vcpu_dabt_iswrite(vcpu);
1092}
1093
1094
1095
1096
1097
1098
1099
1100static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
1101{
1102 pte_t *pte;
1103
1104 pte = pte_offset_kernel(pmd, addr);
1105 do {
1106 if (!pte_none(*pte)) {
1107 if (!kvm_s2pte_readonly(pte))
1108 kvm_set_s2pte_readonly(pte);
1109 }
1110 } while (pte++, addr += PAGE_SIZE, addr != end);
1111}
1112
1113
1114
1115
1116
1117
1118
1119static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
1120{
1121 pmd_t *pmd;
1122 phys_addr_t next;
1123
1124 pmd = stage2_pmd_offset(pud, addr);
1125
1126 do {
1127 next = stage2_pmd_addr_end(addr, end);
1128 if (!pmd_none(*pmd)) {
1129 if (pmd_thp_or_huge(*pmd)) {
1130 if (!kvm_s2pmd_readonly(pmd))
1131 kvm_set_s2pmd_readonly(pmd);
1132 } else {
1133 stage2_wp_ptes(pmd, addr, next);
1134 }
1135 }
1136 } while (pmd++, addr = next, addr != end);
1137}
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147static void stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
1148{
1149 pud_t *pud;
1150 phys_addr_t next;
1151
1152 pud = stage2_pud_offset(pgd, addr);
1153 do {
1154 next = stage2_pud_addr_end(addr, end);
1155 if (!stage2_pud_none(*pud)) {
1156
1157 BUG_ON(stage2_pud_huge(*pud));
1158 stage2_wp_pmds(pud, addr, next);
1159 }
1160 } while (pud++, addr = next, addr != end);
1161}
1162
1163
1164
1165
1166
1167
1168
1169static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
1170{
1171 pgd_t *pgd;
1172 phys_addr_t next;
1173
1174 pgd = kvm->arch.pgd + stage2_pgd_index(addr);
1175 do {
1176
1177
1178
1179
1180
1181
1182
1183 if (need_resched() || spin_needbreak(&kvm->mmu_lock))
1184 cond_resched_lock(&kvm->mmu_lock);
1185
1186 next = stage2_pgd_addr_end(addr, end);
1187 if (stage2_pgd_present(*pgd))
1188 stage2_wp_puds(pgd, addr, next);
1189 } while (pgd++, addr = next, addr != end);
1190}
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
1206{
1207 struct kvm_memslots *slots = kvm_memslots(kvm);
1208 struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
1209 phys_addr_t start = memslot->base_gfn << PAGE_SHIFT;
1210 phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
1211
1212 spin_lock(&kvm->mmu_lock);
1213 stage2_wp_range(kvm, start, end);
1214 spin_unlock(&kvm->mmu_lock);
1215 kvm_flush_remote_tlbs(kvm);
1216}
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
1230 struct kvm_memory_slot *slot,
1231 gfn_t gfn_offset, unsigned long mask)
1232{
1233 phys_addr_t base_gfn = slot->base_gfn + gfn_offset;
1234 phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT;
1235 phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
1236
1237 stage2_wp_range(kvm, start, end);
1238}
1239
1240
1241
1242
1243
1244
1245
1246
1247void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
1248 struct kvm_memory_slot *slot,
1249 gfn_t gfn_offset, unsigned long mask)
1250{
1251 kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
1252}
1253
1254static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn,
1255 unsigned long size, bool uncached)
1256{
1257 __coherent_cache_guest_page(vcpu, pfn, size, uncached);
1258}
1259
1260static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
1261 struct kvm_memory_slot *memslot, unsigned long hva,
1262 unsigned long fault_status)
1263{
1264 int ret;
1265 bool write_fault, writable, hugetlb = false, force_pte = false;
1266 unsigned long mmu_seq;
1267 gfn_t gfn = fault_ipa >> PAGE_SHIFT;
1268 struct kvm *kvm = vcpu->kvm;
1269 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
1270 struct vm_area_struct *vma;
1271 kvm_pfn_t pfn;
1272 pgprot_t mem_type = PAGE_S2;
1273 bool fault_ipa_uncached;
1274 bool logging_active = memslot_is_logging(memslot);
1275 unsigned long flags = 0;
1276
1277 write_fault = kvm_is_write_fault(vcpu);
1278 if (fault_status == FSC_PERM && !write_fault) {
1279 kvm_err("Unexpected L2 read permission error\n");
1280 return -EFAULT;
1281 }
1282
1283
1284 down_read(¤t->mm->mmap_sem);
1285 vma = find_vma_intersection(current->mm, hva, hva + 1);
1286 if (unlikely(!vma)) {
1287 kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
1288 up_read(¤t->mm->mmap_sem);
1289 return -EFAULT;
1290 }
1291
1292 if (is_vm_hugetlb_page(vma) && !logging_active) {
1293 hugetlb = true;
1294 gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
1295 } else {
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305 if ((memslot->userspace_addr & ~PMD_MASK) !=
1306 ((memslot->base_gfn << PAGE_SHIFT) & ~PMD_MASK))
1307 force_pte = true;
1308 }
1309 up_read(¤t->mm->mmap_sem);
1310
1311
1312 ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
1313 KVM_NR_MEM_OBJS);
1314 if (ret)
1315 return ret;
1316
1317 mmu_seq = vcpu->kvm->mmu_notifier_seq;
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327 smp_rmb();
1328
1329 pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
1330 if (is_error_pfn(pfn))
1331 return -EFAULT;
1332
1333 if (kvm_is_device_pfn(pfn)) {
1334 mem_type = PAGE_S2_DEVICE;
1335 flags |= KVM_S2PTE_FLAG_IS_IOMAP;
1336 } else if (logging_active) {
1337
1338
1339
1340
1341
1342 force_pte = true;
1343 flags |= KVM_S2_FLAG_LOGGING_ACTIVE;
1344
1345
1346
1347
1348
1349 if (!write_fault)
1350 writable = false;
1351 }
1352
1353 spin_lock(&kvm->mmu_lock);
1354 if (mmu_notifier_retry(kvm, mmu_seq))
1355 goto out_unlock;
1356
1357 if (!hugetlb && !force_pte)
1358 hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
1359
1360 fault_ipa_uncached = memslot->flags & KVM_MEMSLOT_INCOHERENT;
1361
1362 if (hugetlb) {
1363 pmd_t new_pmd = pfn_pmd(pfn, mem_type);
1364 new_pmd = pmd_mkhuge(new_pmd);
1365 if (writable) {
1366 new_pmd = kvm_s2pmd_mkwrite(new_pmd);
1367 kvm_set_pfn_dirty(pfn);
1368 }
1369 coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached);
1370 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
1371 } else {
1372 pte_t new_pte = pfn_pte(pfn, mem_type);
1373
1374 if (writable) {
1375 new_pte = kvm_s2pte_mkwrite(new_pte);
1376 kvm_set_pfn_dirty(pfn);
1377 mark_page_dirty(kvm, gfn);
1378 }
1379 coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE, fault_ipa_uncached);
1380 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags);
1381 }
1382
1383out_unlock:
1384 spin_unlock(&kvm->mmu_lock);
1385 kvm_set_pfn_accessed(pfn);
1386 kvm_release_pfn_clean(pfn);
1387 return ret;
1388}
1389
1390
1391
1392
1393
1394
1395
1396
1397static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
1398{
1399 pmd_t *pmd;
1400 pte_t *pte;
1401 kvm_pfn_t pfn;
1402 bool pfn_valid = false;
1403
1404 trace_kvm_access_fault(fault_ipa);
1405
1406 spin_lock(&vcpu->kvm->mmu_lock);
1407
1408 pmd = stage2_get_pmd(vcpu->kvm, NULL, fault_ipa);
1409 if (!pmd || pmd_none(*pmd))
1410 goto out;
1411
1412 if (pmd_thp_or_huge(*pmd)) {
1413 *pmd = pmd_mkyoung(*pmd);
1414 pfn = pmd_pfn(*pmd);
1415 pfn_valid = true;
1416 goto out;
1417 }
1418
1419 pte = pte_offset_kernel(pmd, fault_ipa);
1420 if (pte_none(*pte))
1421 goto out;
1422
1423 *pte = pte_mkyoung(*pte);
1424 pfn = pte_pfn(*pte);
1425 pfn_valid = true;
1426out:
1427 spin_unlock(&vcpu->kvm->mmu_lock);
1428 if (pfn_valid)
1429 kvm_set_pfn_accessed(pfn);
1430}
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
1445{
1446 unsigned long fault_status;
1447 phys_addr_t fault_ipa;
1448 struct kvm_memory_slot *memslot;
1449 unsigned long hva;
1450 bool is_iabt, write_fault, writable;
1451 gfn_t gfn;
1452 int ret, idx;
1453
1454 is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
1455 fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
1456
1457 trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
1458 kvm_vcpu_get_hfar(vcpu), fault_ipa);
1459
1460
1461 fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
1462 if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
1463 fault_status != FSC_ACCESS) {
1464 kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
1465 kvm_vcpu_trap_get_class(vcpu),
1466 (unsigned long)kvm_vcpu_trap_get_fault(vcpu),
1467 (unsigned long)kvm_vcpu_get_hsr(vcpu));
1468 return -EFAULT;
1469 }
1470
1471 idx = srcu_read_lock(&vcpu->kvm->srcu);
1472
1473 gfn = fault_ipa >> PAGE_SHIFT;
1474 memslot = gfn_to_memslot(vcpu->kvm, gfn);
1475 hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
1476 write_fault = kvm_is_write_fault(vcpu);
1477 if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
1478 if (is_iabt) {
1479
1480 kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
1481 ret = 1;
1482 goto out_unlock;
1483 }
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495 if (kvm_vcpu_dabt_is_cm(vcpu)) {
1496 kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
1497 ret = 1;
1498 goto out_unlock;
1499 }
1500
1501
1502
1503
1504
1505
1506
1507 fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
1508 ret = io_mem_abort(vcpu, run, fault_ipa);
1509 goto out_unlock;
1510 }
1511
1512
1513 VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE);
1514
1515 if (fault_status == FSC_ACCESS) {
1516 handle_access_fault(vcpu, fault_ipa);
1517 ret = 1;
1518 goto out_unlock;
1519 }
1520
1521 ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
1522 if (ret == 0)
1523 ret = 1;
1524out_unlock:
1525 srcu_read_unlock(&vcpu->kvm->srcu, idx);
1526 return ret;
1527}
1528
1529static int handle_hva_to_gpa(struct kvm *kvm,
1530 unsigned long start,
1531 unsigned long end,
1532 int (*handler)(struct kvm *kvm,
1533 gpa_t gpa, void *data),
1534 void *data)
1535{
1536 struct kvm_memslots *slots;
1537 struct kvm_memory_slot *memslot;
1538 int ret = 0;
1539
1540 slots = kvm_memslots(kvm);
1541
1542
1543 kvm_for_each_memslot(memslot, slots) {
1544 unsigned long hva_start, hva_end;
1545 gfn_t gfn, gfn_end;
1546
1547 hva_start = max(start, memslot->userspace_addr);
1548 hva_end = min(end, memslot->userspace_addr +
1549 (memslot->npages << PAGE_SHIFT));
1550 if (hva_start >= hva_end)
1551 continue;
1552
1553
1554
1555
1556
1557 gfn = hva_to_gfn_memslot(hva_start, memslot);
1558 gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
1559
1560 for (; gfn < gfn_end; ++gfn) {
1561 gpa_t gpa = gfn << PAGE_SHIFT;
1562 ret |= handler(kvm, gpa, data);
1563 }
1564 }
1565
1566 return ret;
1567}
1568
1569static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
1570{
1571 unmap_stage2_range(kvm, gpa, PAGE_SIZE);
1572 return 0;
1573}
1574
1575int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
1576{
1577 unsigned long end = hva + PAGE_SIZE;
1578
1579 if (!kvm->arch.pgd)
1580 return 0;
1581
1582 trace_kvm_unmap_hva(hva);
1583 handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL);
1584 return 0;
1585}
1586
1587int kvm_unmap_hva_range(struct kvm *kvm,
1588 unsigned long start, unsigned long end)
1589{
1590 if (!kvm->arch.pgd)
1591 return 0;
1592
1593 trace_kvm_unmap_hva_range(start, end);
1594 handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
1595 return 0;
1596}
1597
1598static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
1599{
1600 pte_t *pte = (pte_t *)data;
1601
1602
1603
1604
1605
1606
1607
1608
1609 stage2_set_pte(kvm, NULL, gpa, pte, 0);
1610 return 0;
1611}
1612
1613
1614void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
1615{
1616 unsigned long end = hva + PAGE_SIZE;
1617 pte_t stage2_pte;
1618
1619 if (!kvm->arch.pgd)
1620 return;
1621
1622 trace_kvm_set_spte_hva(hva);
1623 stage2_pte = pfn_pte(pte_pfn(pte), PAGE_S2);
1624 handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
1625}
1626
1627static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
1628{
1629 pmd_t *pmd;
1630 pte_t *pte;
1631
1632 pmd = stage2_get_pmd(kvm, NULL, gpa);
1633 if (!pmd || pmd_none(*pmd))
1634 return 0;
1635
1636 if (pmd_thp_or_huge(*pmd))
1637 return stage2_pmdp_test_and_clear_young(pmd);
1638
1639 pte = pte_offset_kernel(pmd, gpa);
1640 if (pte_none(*pte))
1641 return 0;
1642
1643 return stage2_ptep_test_and_clear_young(pte);
1644}
1645
1646static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
1647{
1648 pmd_t *pmd;
1649 pte_t *pte;
1650
1651 pmd = stage2_get_pmd(kvm, NULL, gpa);
1652 if (!pmd || pmd_none(*pmd))
1653 return 0;
1654
1655 if (pmd_thp_or_huge(*pmd))
1656 return pmd_young(*pmd);
1657
1658 pte = pte_offset_kernel(pmd, gpa);
1659 if (!pte_none(*pte))
1660 return pte_young(*pte);
1661
1662 return 0;
1663}
1664
1665int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
1666{
1667 trace_kvm_age_hva(start, end);
1668 return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
1669}
1670
1671int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
1672{
1673 trace_kvm_test_age_hva(hva);
1674 return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL);
1675}
1676
1677void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
1678{
1679 mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
1680}
1681
1682phys_addr_t kvm_mmu_get_httbr(void)
1683{
1684 if (__kvm_cpu_uses_extended_idmap())
1685 return virt_to_phys(merged_hyp_pgd);
1686 else
1687 return virt_to_phys(hyp_pgd);
1688}
1689
1690phys_addr_t kvm_mmu_get_boot_httbr(void)
1691{
1692 if (__kvm_cpu_uses_extended_idmap())
1693 return virt_to_phys(merged_hyp_pgd);
1694 else
1695 return virt_to_phys(boot_hyp_pgd);
1696}
1697
1698phys_addr_t kvm_get_idmap_vector(void)
1699{
1700 return hyp_idmap_vector;
1701}
1702
1703phys_addr_t kvm_get_idmap_start(void)
1704{
1705 return hyp_idmap_start;
1706}
1707
1708int kvm_mmu_init(void)
1709{
1710 int err;
1711
1712 hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start);
1713 hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end);
1714 hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init);
1715
1716
1717
1718
1719
1720 BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
1721
1722 hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
1723 boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
1724
1725 if (!hyp_pgd || !boot_hyp_pgd) {
1726 kvm_err("Hyp mode PGD not allocated\n");
1727 err = -ENOMEM;
1728 goto out;
1729 }
1730
1731
1732 err = __create_hyp_mappings(boot_hyp_pgd,
1733 hyp_idmap_start, hyp_idmap_end,
1734 __phys_to_pfn(hyp_idmap_start),
1735 PAGE_HYP);
1736
1737 if (err) {
1738 kvm_err("Failed to idmap %lx-%lx\n",
1739 hyp_idmap_start, hyp_idmap_end);
1740 goto out;
1741 }
1742
1743 if (__kvm_cpu_uses_extended_idmap()) {
1744 merged_hyp_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
1745 if (!merged_hyp_pgd) {
1746 kvm_err("Failed to allocate extra HYP pgd\n");
1747 goto out;
1748 }
1749 __kvm_extend_hypmap(boot_hyp_pgd, hyp_pgd, merged_hyp_pgd,
1750 hyp_idmap_start);
1751 return 0;
1752 }
1753
1754
1755 err = __create_hyp_mappings(boot_hyp_pgd,
1756 TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE,
1757 __phys_to_pfn(hyp_idmap_start),
1758 PAGE_HYP);
1759 if (err) {
1760 kvm_err("Failed to map trampoline @%lx into boot HYP pgd\n",
1761 TRAMPOLINE_VA);
1762 goto out;
1763 }
1764
1765
1766 err = __create_hyp_mappings(hyp_pgd,
1767 TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE,
1768 __phys_to_pfn(hyp_idmap_start),
1769 PAGE_HYP);
1770 if (err) {
1771 kvm_err("Failed to map trampoline @%lx into runtime HYP pgd\n",
1772 TRAMPOLINE_VA);
1773 goto out;
1774 }
1775
1776 return 0;
1777out:
1778 free_hyp_pgds();
1779 return err;
1780}
1781
1782void kvm_arch_commit_memory_region(struct kvm *kvm,
1783 const struct kvm_userspace_memory_region *mem,
1784 const struct kvm_memory_slot *old,
1785 const struct kvm_memory_slot *new,
1786 enum kvm_mr_change change)
1787{
1788
1789
1790
1791
1792
1793 if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES)
1794 kvm_mmu_wp_memory_region(kvm, mem->slot);
1795}
1796
1797int kvm_arch_prepare_memory_region(struct kvm *kvm,
1798 struct kvm_memory_slot *memslot,
1799 const struct kvm_userspace_memory_region *mem,
1800 enum kvm_mr_change change)
1801{
1802 hva_t hva = mem->userspace_addr;
1803 hva_t reg_end = hva + mem->memory_size;
1804 bool writable = !(mem->flags & KVM_MEM_READONLY);
1805 int ret = 0;
1806
1807 if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
1808 change != KVM_MR_FLAGS_ONLY)
1809 return 0;
1810
1811
1812
1813
1814
1815 if (memslot->base_gfn + memslot->npages >=
1816 (KVM_PHYS_SIZE >> PAGE_SHIFT))
1817 return -EFAULT;
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831 do {
1832 struct vm_area_struct *vma = find_vma(current->mm, hva);
1833 hva_t vm_start, vm_end;
1834
1835 if (!vma || vma->vm_start >= reg_end)
1836 break;
1837
1838
1839
1840
1841
1842 if (writable && !(vma->vm_flags & VM_WRITE)) {
1843 ret = -EPERM;
1844 break;
1845 }
1846
1847
1848
1849
1850 vm_start = max(hva, vma->vm_start);
1851 vm_end = min(reg_end, vma->vm_end);
1852
1853 if (vma->vm_flags & VM_PFNMAP) {
1854 gpa_t gpa = mem->guest_phys_addr +
1855 (vm_start - mem->userspace_addr);
1856 phys_addr_t pa;
1857
1858 pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
1859 pa += vm_start - vma->vm_start;
1860
1861
1862 if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES)
1863 return -EINVAL;
1864
1865 ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
1866 vm_end - vm_start,
1867 writable);
1868 if (ret)
1869 break;
1870 }
1871 hva = vm_end;
1872 } while (hva < reg_end);
1873
1874 if (change == KVM_MR_FLAGS_ONLY)
1875 return ret;
1876
1877 spin_lock(&kvm->mmu_lock);
1878 if (ret)
1879 unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
1880 else
1881 stage2_flush_memslot(kvm, memslot);
1882 spin_unlock(&kvm->mmu_lock);
1883 return ret;
1884}
1885
1886void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
1887 struct kvm_memory_slot *dont)
1888{
1889}
1890
1891int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
1892 unsigned long npages)
1893{
1894
1895
1896
1897
1898
1899
1900
1901 if (slot->flags & KVM_MEM_READONLY)
1902 slot->flags |= KVM_MEMSLOT_INCOHERENT;
1903 return 0;
1904}
1905
1906void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots)
1907{
1908}
1909
1910void kvm_arch_flush_shadow_all(struct kvm *kvm)
1911{
1912}
1913
1914void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
1915 struct kvm_memory_slot *slot)
1916{
1917 gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
1918 phys_addr_t size = slot->npages << PAGE_SHIFT;
1919
1920 spin_lock(&kvm->mmu_lock);
1921 unmap_stage2_range(kvm, gpa, size);
1922 spin_unlock(&kvm->mmu_lock);
1923}
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953void kvm_set_way_flush(struct kvm_vcpu *vcpu)
1954{
1955 unsigned long hcr = vcpu_get_hcr(vcpu);
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966 if (!(hcr & HCR_TVM)) {
1967 trace_kvm_set_way_flush(*vcpu_pc(vcpu),
1968 vcpu_has_cache_enabled(vcpu));
1969 stage2_flush_vm(vcpu->kvm);
1970 vcpu_set_hcr(vcpu, hcr | HCR_TVM);
1971 }
1972}
1973
1974void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled)
1975{
1976 bool now_enabled = vcpu_has_cache_enabled(vcpu);
1977
1978
1979
1980
1981
1982
1983 if (now_enabled != was_enabled)
1984 stage2_flush_vm(vcpu->kvm);
1985
1986
1987 if (now_enabled)
1988 vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM);
1989
1990 trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled);
1991}
1992