1
2
3
4
5
6
7
8
9#include <linux/types.h>
10#include <linux/string.h>
11#include <linux/kvm.h>
12#include <linux/kvm_host.h>
13#include <linux/anon_inodes.h>
14#include <linux/file.h>
15#include <linux/debugfs.h>
16
17#include <asm/kvm_ppc.h>
18#include <asm/kvm_book3s.h>
19#include <asm/page.h>
20#include <asm/mmu.h>
21#include <asm/pgtable.h>
22#include <asm/pgalloc.h>
23#include <asm/pte-walk.h>
24#include <asm/ultravisor.h>
25#include <asm/kvm_book3s_uvmem.h>
26
27
28
29
30
31
32static int p9_supported_radix_bits[4] = { 5, 9, 9, 13 };
33
34unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
35 gva_t eaddr, void *to, void *from,
36 unsigned long n)
37{
38 int uninitialized_var(old_pid), old_lpid;
39 unsigned long quadrant, ret = n;
40 bool is_load = !!to;
41
42
43 if (kvmhv_on_pseries())
44 return plpar_hcall_norets(H_COPY_TOFROM_GUEST, lpid, pid, eaddr,
45 (to != NULL) ? __pa(to): 0,
46 (from != NULL) ? __pa(from): 0, n);
47
48 quadrant = 1;
49 if (!pid)
50 quadrant = 2;
51 if (is_load)
52 from = (void *) (eaddr | (quadrant << 62));
53 else
54 to = (void *) (eaddr | (quadrant << 62));
55
56 preempt_disable();
57
58
59 old_lpid = mfspr(SPRN_LPID);
60 if (old_lpid != lpid)
61 mtspr(SPRN_LPID, lpid);
62 if (quadrant == 1) {
63 old_pid = mfspr(SPRN_PID);
64 if (old_pid != pid)
65 mtspr(SPRN_PID, pid);
66 }
67 isync();
68
69 if (is_load)
70 ret = probe_user_read(to, (const void __user *)from, n);
71 else
72 ret = probe_user_write((void __user *)to, from, n);
73
74
75 if (quadrant == 1 && pid != old_pid)
76 mtspr(SPRN_PID, old_pid);
77 if (lpid != old_lpid)
78 mtspr(SPRN_LPID, old_lpid);
79 isync();
80
81 preempt_enable();
82
83 return ret;
84}
85EXPORT_SYMBOL_GPL(__kvmhv_copy_tofrom_guest_radix);
86
87static long kvmhv_copy_tofrom_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr,
88 void *to, void *from, unsigned long n)
89{
90 int lpid = vcpu->kvm->arch.lpid;
91 int pid = vcpu->arch.pid;
92
93
94 if (eaddr & (0x3FFUL << 52))
95 return -EINVAL;
96
97
98 if (vcpu->arch.nested)
99 lpid = vcpu->arch.nested->shadow_lpid;
100
101
102 if (((eaddr >> 62) & 0x3) == 0x3)
103 pid = 0;
104
105 eaddr &= ~(0xFFFUL << 52);
106
107 return __kvmhv_copy_tofrom_guest_radix(lpid, pid, eaddr, to, from, n);
108}
109
110long kvmhv_copy_from_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *to,
111 unsigned long n)
112{
113 long ret;
114
115 ret = kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, to, NULL, n);
116 if (ret > 0)
117 memset(to + (n - ret), 0, ret);
118
119 return ret;
120}
121EXPORT_SYMBOL_GPL(kvmhv_copy_from_guest_radix);
122
123long kvmhv_copy_to_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *from,
124 unsigned long n)
125{
126 return kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, NULL, from, n);
127}
128EXPORT_SYMBOL_GPL(kvmhv_copy_to_guest_radix);
129
130int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
131 struct kvmppc_pte *gpte, u64 root,
132 u64 *pte_ret_p)
133{
134 struct kvm *kvm = vcpu->kvm;
135 int ret, level, ps;
136 unsigned long rts, bits, offset, index;
137 u64 pte, base, gpa;
138 __be64 rpte;
139
140 rts = ((root & RTS1_MASK) >> (RTS1_SHIFT - 3)) |
141 ((root & RTS2_MASK) >> RTS2_SHIFT);
142 bits = root & RPDS_MASK;
143 base = root & RPDB_MASK;
144
145 offset = rts + 31;
146
147
148 if (offset != 52)
149 return -EINVAL;
150
151
152 for (level = 3; level >= 0; --level) {
153 u64 addr;
154
155 if (level && bits != p9_supported_radix_bits[level])
156 return -EINVAL;
157 if (level == 0 && !(bits == 5 || bits == 9))
158 return -EINVAL;
159 offset -= bits;
160 index = (eaddr >> offset) & ((1UL << bits) - 1);
161
162 if (base & ((1UL << (bits + 3)) - 1))
163 return -EINVAL;
164
165 addr = base + (index * sizeof(rpte));
166 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
167 ret = kvm_read_guest(kvm, addr, &rpte, sizeof(rpte));
168 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
169 if (ret) {
170 if (pte_ret_p)
171 *pte_ret_p = addr;
172 return ret;
173 }
174 pte = __be64_to_cpu(rpte);
175 if (!(pte & _PAGE_PRESENT))
176 return -ENOENT;
177
178 if (pte & _PAGE_PTE)
179 break;
180
181 base = pte & RPDB_MASK;
182 bits = pte & RPDS_MASK;
183 }
184
185
186 if (level < 0 || level == 3)
187 return -EINVAL;
188
189
190
191 gpa = pte & 0x01fffffffffff000ul;
192 if (gpa & ((1ul << offset) - 1))
193 return -EINVAL;
194 gpa |= eaddr & ((1ul << offset) - 1);
195 for (ps = MMU_PAGE_4K; ps < MMU_PAGE_COUNT; ++ps)
196 if (offset == mmu_psize_defs[ps].shift)
197 break;
198 gpte->page_size = ps;
199 gpte->page_shift = offset;
200
201 gpte->eaddr = eaddr;
202 gpte->raddr = gpa;
203
204
205 gpte->may_read = !!(pte & _PAGE_READ);
206 gpte->may_write = !!(pte & _PAGE_WRITE);
207 gpte->may_execute = !!(pte & _PAGE_EXEC);
208
209 gpte->rc = pte & (_PAGE_ACCESSED | _PAGE_DIRTY);
210
211 if (pte_ret_p)
212 *pte_ret_p = pte;
213
214 return 0;
215}
216
217
218
219
220
221
222
223
224
225int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
226 struct kvmppc_pte *gpte, u64 table,
227 int table_index, u64 *pte_ret_p)
228{
229 struct kvm *kvm = vcpu->kvm;
230 int ret;
231 unsigned long size, ptbl, root;
232 struct prtb_entry entry;
233
234 if ((table & PRTS_MASK) > 24)
235 return -EINVAL;
236 size = 1ul << ((table & PRTS_MASK) + 12);
237
238
239 if ((table_index * sizeof(entry)) >= size)
240 return -EINVAL;
241
242
243 ptbl = (table & PRTB_MASK) + (table_index * sizeof(entry));
244 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
245 ret = kvm_read_guest(kvm, ptbl, &entry, sizeof(entry));
246 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
247 if (ret)
248 return ret;
249
250
251 root = be64_to_cpu(entry.prtb0);
252
253 return kvmppc_mmu_walk_radix_tree(vcpu, eaddr, gpte, root, pte_ret_p);
254}
255
256int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
257 struct kvmppc_pte *gpte, bool data, bool iswrite)
258{
259 u32 pid;
260 u64 pte;
261 int ret;
262
263
264 switch (eaddr >> 62) {
265 case 0:
266 pid = vcpu->arch.pid;
267 break;
268 case 3:
269 pid = 0;
270 break;
271 default:
272 return -EINVAL;
273 }
274
275 ret = kvmppc_mmu_radix_translate_table(vcpu, eaddr, gpte,
276 vcpu->kvm->arch.process_table, pid, &pte);
277 if (ret)
278 return ret;
279
280
281 if (kvmppc_get_msr(vcpu) & MSR_PR) {
282 if (pte & _PAGE_PRIVILEGED) {
283 gpte->may_read = 0;
284 gpte->may_write = 0;
285 gpte->may_execute = 0;
286 }
287 } else {
288 if (!(pte & _PAGE_PRIVILEGED)) {
289
290 if (vcpu->arch.amr & (1ul << 62))
291 gpte->may_read = 0;
292 if (vcpu->arch.amr & (1ul << 63))
293 gpte->may_write = 0;
294 if (vcpu->arch.iamr & (1ul << 62))
295 gpte->may_execute = 0;
296 }
297 }
298
299 return 0;
300}
301
302void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
303 unsigned int pshift, unsigned int lpid)
304{
305 unsigned long psize = PAGE_SIZE;
306 int psi;
307 long rc;
308 unsigned long rb;
309
310 if (pshift)
311 psize = 1UL << pshift;
312 else
313 pshift = PAGE_SHIFT;
314
315 addr &= ~(psize - 1);
316
317 if (!kvmhv_on_pseries()) {
318 radix__flush_tlb_lpid_page(lpid, addr, psize);
319 return;
320 }
321
322 psi = shift_to_mmu_psize(pshift);
323 rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58));
324 rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1),
325 lpid, rb);
326 if (rc)
327 pr_err("KVM: TLB page invalidation hcall failed, rc=%ld\n", rc);
328}
329
330static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned int lpid)
331{
332 long rc;
333
334 if (!kvmhv_on_pseries()) {
335 radix__flush_pwc_lpid(lpid);
336 return;
337 }
338
339 rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1),
340 lpid, TLBIEL_INVAL_SET_LPID);
341 if (rc)
342 pr_err("KVM: TLB PWC invalidation hcall failed, rc=%ld\n", rc);
343}
344
345static unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
346 unsigned long clr, unsigned long set,
347 unsigned long addr, unsigned int shift)
348{
349 return __radix_pte_update(ptep, clr, set);
350}
351
352static void kvmppc_radix_set_pte_at(struct kvm *kvm, unsigned long addr,
353 pte_t *ptep, pte_t pte)
354{
355 radix__set_pte_at(kvm->mm, addr, ptep, pte, 0);
356}
357
358static struct kmem_cache *kvm_pte_cache;
359static struct kmem_cache *kvm_pmd_cache;
360
361static pte_t *kvmppc_pte_alloc(void)
362{
363 pte_t *pte;
364
365 pte = kmem_cache_alloc(kvm_pte_cache, GFP_KERNEL);
366
367 kmemleak_ignore(pte);
368
369 return pte;
370}
371
372static void kvmppc_pte_free(pte_t *ptep)
373{
374 kmem_cache_free(kvm_pte_cache, ptep);
375}
376
377static pmd_t *kvmppc_pmd_alloc(void)
378{
379 pmd_t *pmd;
380
381 pmd = kmem_cache_alloc(kvm_pmd_cache, GFP_KERNEL);
382
383 kmemleak_ignore(pmd);
384
385 return pmd;
386}
387
388static void kvmppc_pmd_free(pmd_t *pmdp)
389{
390 kmem_cache_free(kvm_pmd_cache, pmdp);
391}
392
393
394void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa,
395 unsigned int shift,
396 const struct kvm_memory_slot *memslot,
397 unsigned int lpid)
398
399{
400 unsigned long old;
401 unsigned long gfn = gpa >> PAGE_SHIFT;
402 unsigned long page_size = PAGE_SIZE;
403 unsigned long hpa;
404
405 old = kvmppc_radix_update_pte(kvm, pte, ~0UL, 0, gpa, shift);
406 kvmppc_radix_tlbie_page(kvm, gpa, shift, lpid);
407
408
409 if (lpid != kvm->arch.lpid)
410 return;
411
412 if (!memslot) {
413 memslot = gfn_to_memslot(kvm, gfn);
414 if (!memslot)
415 return;
416 }
417 if (shift) {
418 page_size = 1ul << shift;
419 if (shift == PMD_SHIFT)
420 kvm->stat.num_2M_pages--;
421 else if (shift == PUD_SHIFT)
422 kvm->stat.num_1G_pages--;
423 }
424
425 gpa &= ~(page_size - 1);
426 hpa = old & PTE_RPN_MASK;
427 kvmhv_remove_nest_rmap_range(kvm, memslot, gpa, hpa, page_size);
428
429 if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap)
430 kvmppc_update_dirty_map(memslot, gfn, page_size);
431}
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full,
448 unsigned int lpid)
449{
450 if (full) {
451 memset(pte, 0, sizeof(long) << RADIX_PTE_INDEX_SIZE);
452 } else {
453 pte_t *p = pte;
454 unsigned long it;
455
456 for (it = 0; it < PTRS_PER_PTE; ++it, ++p) {
457 if (pte_val(*p) == 0)
458 continue;
459 kvmppc_unmap_pte(kvm, p,
460 pte_pfn(*p) << PAGE_SHIFT,
461 PAGE_SHIFT, NULL, lpid);
462 }
463 }
464
465 kvmppc_pte_free(pte);
466}
467
468static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full,
469 unsigned int lpid)
470{
471 unsigned long im;
472 pmd_t *p = pmd;
473
474 for (im = 0; im < PTRS_PER_PMD; ++im, ++p) {
475 if (!pmd_present(*p))
476 continue;
477 if (pmd_is_leaf(*p)) {
478 if (full) {
479 pmd_clear(p);
480 } else {
481 WARN_ON_ONCE(1);
482 kvmppc_unmap_pte(kvm, (pte_t *)p,
483 pte_pfn(*(pte_t *)p) << PAGE_SHIFT,
484 PMD_SHIFT, NULL, lpid);
485 }
486 } else {
487 pte_t *pte;
488
489 pte = pte_offset_map(p, 0);
490 kvmppc_unmap_free_pte(kvm, pte, full, lpid);
491 pmd_clear(p);
492 }
493 }
494 kvmppc_pmd_free(pmd);
495}
496
497static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud,
498 unsigned int lpid)
499{
500 unsigned long iu;
501 pud_t *p = pud;
502
503 for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++p) {
504 if (!pud_present(*p))
505 continue;
506 if (pud_is_leaf(*p)) {
507 pud_clear(p);
508 } else {
509 pmd_t *pmd;
510
511 pmd = pmd_offset(p, 0);
512 kvmppc_unmap_free_pmd(kvm, pmd, true, lpid);
513 pud_clear(p);
514 }
515 }
516 pud_free(kvm->mm, pud);
517}
518
519void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd, unsigned int lpid)
520{
521 unsigned long ig;
522
523 for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) {
524 pud_t *pud;
525
526 if (!pgd_present(*pgd))
527 continue;
528 pud = pud_offset(pgd, 0);
529 kvmppc_unmap_free_pud(kvm, pud, lpid);
530 pgd_clear(pgd);
531 }
532}
533
534void kvmppc_free_radix(struct kvm *kvm)
535{
536 if (kvm->arch.pgtable) {
537 kvmppc_free_pgtable_radix(kvm, kvm->arch.pgtable,
538 kvm->arch.lpid);
539 pgd_free(kvm->mm, kvm->arch.pgtable);
540 kvm->arch.pgtable = NULL;
541 }
542}
543
544static void kvmppc_unmap_free_pmd_entry_table(struct kvm *kvm, pmd_t *pmd,
545 unsigned long gpa, unsigned int lpid)
546{
547 pte_t *pte = pte_offset_kernel(pmd, 0);
548
549
550
551
552
553
554 pmd_clear(pmd);
555 kvmppc_radix_flush_pwc(kvm, lpid);
556
557 kvmppc_unmap_free_pte(kvm, pte, false, lpid);
558}
559
560static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud,
561 unsigned long gpa, unsigned int lpid)
562{
563 pmd_t *pmd = pmd_offset(pud, 0);
564
565
566
567
568
569
570 pud_clear(pud);
571 kvmppc_radix_flush_pwc(kvm, lpid);
572
573 kvmppc_unmap_free_pmd(kvm, pmd, false, lpid);
574}
575
576
577
578
579
580
581
582
583#define PTE_BITS_MUST_MATCH (~(_PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED))
584
585int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
586 unsigned long gpa, unsigned int level,
587 unsigned long mmu_seq, unsigned int lpid,
588 unsigned long *rmapp, struct rmap_nested **n_rmap)
589{
590 pgd_t *pgd;
591 pud_t *pud, *new_pud = NULL;
592 pmd_t *pmd, *new_pmd = NULL;
593 pte_t *ptep, *new_ptep = NULL;
594 int ret;
595
596
597 pgd = pgtable + pgd_index(gpa);
598 pud = NULL;
599 if (pgd_present(*pgd))
600 pud = pud_offset(pgd, gpa);
601 else
602 new_pud = pud_alloc_one(kvm->mm, gpa);
603
604 pmd = NULL;
605 if (pud && pud_present(*pud) && !pud_is_leaf(*pud))
606 pmd = pmd_offset(pud, gpa);
607 else if (level <= 1)
608 new_pmd = kvmppc_pmd_alloc();
609
610 if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd)))
611 new_ptep = kvmppc_pte_alloc();
612
613
614 spin_lock(&kvm->mmu_lock);
615 ret = -EAGAIN;
616 if (mmu_notifier_retry(kvm, mmu_seq))
617 goto out_unlock;
618
619
620 ret = -ENOMEM;
621 if (pgd_none(*pgd)) {
622 if (!new_pud)
623 goto out_unlock;
624 pgd_populate(kvm->mm, pgd, new_pud);
625 new_pud = NULL;
626 }
627 pud = pud_offset(pgd, gpa);
628 if (pud_is_leaf(*pud)) {
629 unsigned long hgpa = gpa & PUD_MASK;
630
631
632 if (level == 2) {
633 if (pud_raw(*pud) == pte_raw(pte)) {
634 ret = 0;
635 goto out_unlock;
636 }
637
638 WARN_ON_ONCE((pud_val(*pud) ^ pte_val(pte)) &
639 PTE_BITS_MUST_MATCH);
640 kvmppc_radix_update_pte(kvm, (pte_t *)pud,
641 0, pte_val(pte), hgpa, PUD_SHIFT);
642 ret = 0;
643 goto out_unlock;
644 }
645
646
647
648
649 if (!new_pmd) {
650 ret = -EAGAIN;
651 goto out_unlock;
652 }
653
654 kvmppc_unmap_pte(kvm, (pte_t *)pud, hgpa, PUD_SHIFT, NULL,
655 lpid);
656 }
657 if (level == 2) {
658 if (!pud_none(*pud)) {
659
660
661
662
663
664 kvmppc_unmap_free_pud_entry_table(kvm, pud, gpa, lpid);
665 }
666 kvmppc_radix_set_pte_at(kvm, gpa, (pte_t *)pud, pte);
667 if (rmapp && n_rmap)
668 kvmhv_insert_nest_rmap(kvm, rmapp, n_rmap);
669 ret = 0;
670 goto out_unlock;
671 }
672 if (pud_none(*pud)) {
673 if (!new_pmd)
674 goto out_unlock;
675 pud_populate(kvm->mm, pud, new_pmd);
676 new_pmd = NULL;
677 }
678 pmd = pmd_offset(pud, gpa);
679 if (pmd_is_leaf(*pmd)) {
680 unsigned long lgpa = gpa & PMD_MASK;
681
682
683 if (level == 1) {
684 if (pmd_raw(*pmd) == pte_raw(pte)) {
685 ret = 0;
686 goto out_unlock;
687 }
688
689 WARN_ON_ONCE((pmd_val(*pmd) ^ pte_val(pte)) &
690 PTE_BITS_MUST_MATCH);
691 kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
692 0, pte_val(pte), lgpa, PMD_SHIFT);
693 ret = 0;
694 goto out_unlock;
695 }
696
697
698
699
700
701 if (!new_ptep) {
702 ret = -EAGAIN;
703 goto out_unlock;
704 }
705
706 kvmppc_unmap_pte(kvm, pmdp_ptep(pmd), lgpa, PMD_SHIFT, NULL,
707 lpid);
708 }
709 if (level == 1) {
710 if (!pmd_none(*pmd)) {
711
712
713
714
715
716 kvmppc_unmap_free_pmd_entry_table(kvm, pmd, gpa, lpid);
717 }
718 kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte);
719 if (rmapp && n_rmap)
720 kvmhv_insert_nest_rmap(kvm, rmapp, n_rmap);
721 ret = 0;
722 goto out_unlock;
723 }
724 if (pmd_none(*pmd)) {
725 if (!new_ptep)
726 goto out_unlock;
727 pmd_populate(kvm->mm, pmd, new_ptep);
728 new_ptep = NULL;
729 }
730 ptep = pte_offset_kernel(pmd, gpa);
731 if (pte_present(*ptep)) {
732
733 if (pte_raw(*ptep) == pte_raw(pte)) {
734 ret = 0;
735 goto out_unlock;
736 }
737
738 WARN_ON_ONCE((pte_val(*ptep) ^ pte_val(pte)) &
739 PTE_BITS_MUST_MATCH);
740 kvmppc_radix_update_pte(kvm, ptep, 0, pte_val(pte), gpa, 0);
741 ret = 0;
742 goto out_unlock;
743 }
744 kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
745 if (rmapp && n_rmap)
746 kvmhv_insert_nest_rmap(kvm, rmapp, n_rmap);
747 ret = 0;
748
749 out_unlock:
750 spin_unlock(&kvm->mmu_lock);
751 if (new_pud)
752 pud_free(kvm->mm, new_pud);
753 if (new_pmd)
754 kvmppc_pmd_free(new_pmd);
755 if (new_ptep)
756 kvmppc_pte_free(new_ptep);
757 return ret;
758}
759
760bool kvmppc_hv_handle_set_rc(struct kvm *kvm, bool nested, bool writing,
761 unsigned long gpa, unsigned int lpid)
762{
763 unsigned long pgflags;
764 unsigned int shift;
765 pte_t *ptep;
766
767
768
769
770
771
772 pgflags = _PAGE_ACCESSED;
773 if (writing)
774 pgflags |= _PAGE_DIRTY;
775
776 if (nested)
777 ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift);
778 else
779 ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
780
781 if (ptep && pte_present(*ptep) && (!writing || pte_write(*ptep))) {
782 kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, gpa, shift);
783 return true;
784 }
785 return false;
786}
787
788int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
789 unsigned long gpa,
790 struct kvm_memory_slot *memslot,
791 bool writing, bool kvm_ro,
792 pte_t *inserted_pte, unsigned int *levelp)
793{
794 struct kvm *kvm = vcpu->kvm;
795 struct page *page = NULL;
796 unsigned long mmu_seq;
797 unsigned long hva, gfn = gpa >> PAGE_SHIFT;
798 bool upgrade_write = false;
799 bool *upgrade_p = &upgrade_write;
800 pte_t pte, *ptep;
801 unsigned int shift, level;
802 int ret;
803 bool large_enable;
804
805
806 mmu_seq = kvm->mmu_notifier_seq;
807 smp_rmb();
808
809
810
811
812
813
814
815 hva = gfn_to_hva_memslot(memslot, gfn);
816 if (!kvm_ro && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
817 upgrade_write = true;
818 } else {
819 unsigned long pfn;
820
821
822 pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
823 writing, upgrade_p, NULL);
824 if (is_error_noslot_pfn(pfn))
825 return -EFAULT;
826 page = NULL;
827 if (pfn_valid(pfn)) {
828 page = pfn_to_page(pfn);
829 if (PageReserved(page))
830 page = NULL;
831 }
832 }
833
834
835
836
837
838 spin_lock(&kvm->mmu_lock);
839 ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift);
840 pte = __pte(0);
841 if (ptep)
842 pte = READ_ONCE(*ptep);
843 spin_unlock(&kvm->mmu_lock);
844
845
846
847
848 if (!pte_present(pte)) {
849 if (page)
850 put_page(page);
851 return RESUME_GUEST;
852 }
853
854
855 large_enable = !(memslot->flags & KVM_MEM_LOG_DIRTY_PAGES);
856
857
858 if (large_enable && shift == PUD_SHIFT &&
859 (gpa & (PUD_SIZE - PAGE_SIZE)) ==
860 (hva & (PUD_SIZE - PAGE_SIZE))) {
861 level = 2;
862 } else if (large_enable && shift == PMD_SHIFT &&
863 (gpa & (PMD_SIZE - PAGE_SIZE)) ==
864 (hva & (PMD_SIZE - PAGE_SIZE))) {
865 level = 1;
866 } else {
867 level = 0;
868 if (shift > PAGE_SHIFT) {
869
870
871
872
873
874 unsigned long rpnmask = (1ul << shift) - PAGE_SIZE;
875 pte = __pte(pte_val(pte) | (hva & rpnmask));
876 }
877 }
878
879 pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED);
880 if (writing || upgrade_write) {
881 if (pte_val(pte) & _PAGE_WRITE)
882 pte = __pte(pte_val(pte) | _PAGE_DIRTY);
883 } else {
884 pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY));
885 }
886
887
888 ret = kvmppc_create_pte(kvm, kvm->arch.pgtable, pte, gpa, level,
889 mmu_seq, kvm->arch.lpid, NULL, NULL);
890 if (inserted_pte)
891 *inserted_pte = pte;
892 if (levelp)
893 *levelp = level;
894
895 if (page) {
896 if (!ret && (pte_val(pte) & _PAGE_WRITE))
897 set_page_dirty_lock(page);
898 put_page(page);
899 }
900
901
902 if (!ret) {
903 if (level == 1)
904 kvm->stat.num_2M_pages++;
905 else if (level == 2)
906 kvm->stat.num_1G_pages++;
907 }
908
909 return ret;
910}
911
912int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu,
913 unsigned long ea, unsigned long dsisr)
914{
915 struct kvm *kvm = vcpu->kvm;
916 unsigned long gpa, gfn;
917 struct kvm_memory_slot *memslot;
918 long ret;
919 bool writing = !!(dsisr & DSISR_ISSTORE);
920 bool kvm_ro = false;
921
922
923 if (dsisr & DSISR_UNSUPP_MMU) {
924 pr_err("KVM: Got unsupported MMU fault\n");
925 return -EFAULT;
926 }
927 if (dsisr & DSISR_BADACCESS) {
928
929 pr_err("KVM: Got radix HV page fault with DSISR=%lx\n", dsisr);
930 kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
931 return RESUME_GUEST;
932 }
933
934
935 gpa = vcpu->arch.fault_gpa & ~0xfffUL;
936 gpa &= ~0xF000000000000000ul;
937 gfn = gpa >> PAGE_SHIFT;
938 if (!(dsisr & DSISR_PRTABLE_FAULT))
939 gpa |= ea & 0xfff;
940
941 if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
942 return kvmppc_send_page_to_uv(kvm, gfn);
943
944
945 memslot = gfn_to_memslot(kvm, gfn);
946
947
948 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
949 if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS |
950 DSISR_SET_RC)) {
951
952
953
954
955 kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
956 return RESUME_GUEST;
957 }
958 return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, writing);
959 }
960
961 if (memslot->flags & KVM_MEM_READONLY) {
962 if (writing) {
963
964 kvmppc_core_queue_data_storage(vcpu, ea, DSISR_ISSTORE |
965 DSISR_PROTFAULT);
966 return RESUME_GUEST;
967 }
968 kvm_ro = true;
969 }
970
971
972 if (dsisr & DSISR_SET_RC) {
973 spin_lock(&kvm->mmu_lock);
974 if (kvmppc_hv_handle_set_rc(kvm, false, writing,
975 gpa, kvm->arch.lpid))
976 dsisr &= ~DSISR_SET_RC;
977 spin_unlock(&kvm->mmu_lock);
978
979 if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
980 DSISR_PROTFAULT | DSISR_SET_RC)))
981 return RESUME_GUEST;
982 }
983
984
985 ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot, writing,
986 kvm_ro, NULL, NULL);
987
988 if (ret == 0 || ret == -EAGAIN)
989 ret = RESUME_GUEST;
990 return ret;
991}
992
993
994int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
995 unsigned long gfn)
996{
997 pte_t *ptep;
998 unsigned long gpa = gfn << PAGE_SHIFT;
999 unsigned int shift;
1000
1001 if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) {
1002 uv_page_inval(kvm->arch.lpid, gpa, PAGE_SHIFT);
1003 return 0;
1004 }
1005
1006 ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
1007 if (ptep && pte_present(*ptep))
1008 kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
1009 kvm->arch.lpid);
1010 return 0;
1011}
1012
1013
1014int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
1015 unsigned long gfn)
1016{
1017 pte_t *ptep;
1018 unsigned long gpa = gfn << PAGE_SHIFT;
1019 unsigned int shift;
1020 int ref = 0;
1021 unsigned long old, *rmapp;
1022
1023 if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
1024 return ref;
1025
1026 ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
1027 if (ptep && pte_present(*ptep) && pte_young(*ptep)) {
1028 old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0,
1029 gpa, shift);
1030
1031
1032 rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
1033 kvmhv_update_nest_rmap_rc_list(kvm, rmapp, _PAGE_ACCESSED, 0,
1034 old & PTE_RPN_MASK,
1035 1UL << shift);
1036 ref = 1;
1037 }
1038 return ref;
1039}
1040
1041
1042int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
1043 unsigned long gfn)
1044{
1045 pte_t *ptep;
1046 unsigned long gpa = gfn << PAGE_SHIFT;
1047 unsigned int shift;
1048 int ref = 0;
1049
1050 if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
1051 return ref;
1052
1053 ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
1054 if (ptep && pte_present(*ptep) && pte_young(*ptep))
1055 ref = 1;
1056 return ref;
1057}
1058
1059
1060static int kvm_radix_test_clear_dirty(struct kvm *kvm,
1061 struct kvm_memory_slot *memslot, int pagenum)
1062{
1063 unsigned long gfn = memslot->base_gfn + pagenum;
1064 unsigned long gpa = gfn << PAGE_SHIFT;
1065 pte_t *ptep, pte;
1066 unsigned int shift;
1067 int ret = 0;
1068 unsigned long old, *rmapp;
1069
1070 if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
1071 return ret;
1072
1073
1074
1075
1076
1077 ptep = find_kvm_secondary_pte_unlocked(kvm, gpa, &shift);
1078 if (!ptep)
1079 return 0;
1080
1081 pte = READ_ONCE(*ptep);
1082 if (pte_present(pte) && pte_dirty(pte)) {
1083 spin_lock(&kvm->mmu_lock);
1084
1085
1086
1087 if (pte_val(pte) != pte_val(*ptep)) {
1088
1089
1090
1091
1092
1093
1094 if (!pte_present(*ptep) || !pte_dirty(*ptep)) {
1095 spin_unlock(&kvm->mmu_lock);
1096 return 0;
1097 }
1098 }
1099
1100 ret = 1;
1101 VM_BUG_ON(shift);
1102 old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0,
1103 gpa, shift);
1104 kvmppc_radix_tlbie_page(kvm, gpa, shift, kvm->arch.lpid);
1105
1106 rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
1107 kvmhv_update_nest_rmap_rc_list(kvm, rmapp, _PAGE_DIRTY, 0,
1108 old & PTE_RPN_MASK,
1109 1UL << shift);
1110 spin_unlock(&kvm->mmu_lock);
1111 }
1112 return ret;
1113}
1114
1115long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
1116 struct kvm_memory_slot *memslot, unsigned long *map)
1117{
1118 unsigned long i, j;
1119 int npages;
1120
1121 for (i = 0; i < memslot->npages; i = j) {
1122 npages = kvm_radix_test_clear_dirty(kvm, memslot, i);
1123
1124
1125
1126
1127
1128
1129
1130
1131 j = i + 1;
1132 if (npages) {
1133 set_dirty_bits(map, i, npages);
1134 j = i + npages;
1135 }
1136 }
1137 return 0;
1138}
1139
1140void kvmppc_radix_flush_memslot(struct kvm *kvm,
1141 const struct kvm_memory_slot *memslot)
1142{
1143 unsigned long n;
1144 pte_t *ptep;
1145 unsigned long gpa;
1146 unsigned int shift;
1147
1148 if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)
1149 kvmppc_uvmem_drop_pages(memslot, kvm, true);
1150
1151 if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
1152 return;
1153
1154 gpa = memslot->base_gfn << PAGE_SHIFT;
1155 spin_lock(&kvm->mmu_lock);
1156 for (n = memslot->npages; n; --n) {
1157 ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
1158 if (ptep && pte_present(*ptep))
1159 kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
1160 kvm->arch.lpid);
1161 gpa += PAGE_SIZE;
1162 }
1163
1164
1165
1166
1167 kvm->mmu_notifier_seq++;
1168 spin_unlock(&kvm->mmu_lock);
1169}
1170
1171static void add_rmmu_ap_encoding(struct kvm_ppc_rmmu_info *info,
1172 int psize, int *indexp)
1173{
1174 if (!mmu_psize_defs[psize].shift)
1175 return;
1176 info->ap_encodings[*indexp] = mmu_psize_defs[psize].shift |
1177 (mmu_psize_defs[psize].ap << 29);
1178 ++(*indexp);
1179}
1180
1181int kvmhv_get_rmmu_info(struct kvm *kvm, struct kvm_ppc_rmmu_info *info)
1182{
1183 int i;
1184
1185 if (!radix_enabled())
1186 return -EINVAL;
1187 memset(info, 0, sizeof(*info));
1188
1189
1190 info->geometries[0].page_shift = 12;
1191 info->geometries[0].level_bits[0] = 9;
1192 for (i = 1; i < 4; ++i)
1193 info->geometries[0].level_bits[i] = p9_supported_radix_bits[i];
1194
1195 info->geometries[1].page_shift = 16;
1196 for (i = 0; i < 4; ++i)
1197 info->geometries[1].level_bits[i] = p9_supported_radix_bits[i];
1198
1199 i = 0;
1200 add_rmmu_ap_encoding(info, MMU_PAGE_4K, &i);
1201 add_rmmu_ap_encoding(info, MMU_PAGE_64K, &i);
1202 add_rmmu_ap_encoding(info, MMU_PAGE_2M, &i);
1203 add_rmmu_ap_encoding(info, MMU_PAGE_1G, &i);
1204
1205 return 0;
1206}
1207
1208int kvmppc_init_vm_radix(struct kvm *kvm)
1209{
1210 kvm->arch.pgtable = pgd_alloc(kvm->mm);
1211 if (!kvm->arch.pgtable)
1212 return -ENOMEM;
1213 return 0;
1214}
1215
1216static void pte_ctor(void *addr)
1217{
1218 memset(addr, 0, RADIX_PTE_TABLE_SIZE);
1219}
1220
1221static void pmd_ctor(void *addr)
1222{
1223 memset(addr, 0, RADIX_PMD_TABLE_SIZE);
1224}
1225
1226struct debugfs_radix_state {
1227 struct kvm *kvm;
1228 struct mutex mutex;
1229 unsigned long gpa;
1230 int lpid;
1231 int chars_left;
1232 int buf_index;
1233 char buf[128];
1234 u8 hdr;
1235};
1236
1237static int debugfs_radix_open(struct inode *inode, struct file *file)
1238{
1239 struct kvm *kvm = inode->i_private;
1240 struct debugfs_radix_state *p;
1241
1242 p = kzalloc(sizeof(*p), GFP_KERNEL);
1243 if (!p)
1244 return -ENOMEM;
1245
1246 kvm_get_kvm(kvm);
1247 p->kvm = kvm;
1248 mutex_init(&p->mutex);
1249 file->private_data = p;
1250
1251 return nonseekable_open(inode, file);
1252}
1253
1254static int debugfs_radix_release(struct inode *inode, struct file *file)
1255{
1256 struct debugfs_radix_state *p = file->private_data;
1257
1258 kvm_put_kvm(p->kvm);
1259 kfree(p);
1260 return 0;
1261}
1262
1263static ssize_t debugfs_radix_read(struct file *file, char __user *buf,
1264 size_t len, loff_t *ppos)
1265{
1266 struct debugfs_radix_state *p = file->private_data;
1267 ssize_t ret, r;
1268 unsigned long n;
1269 struct kvm *kvm;
1270 unsigned long gpa;
1271 pgd_t *pgt;
1272 struct kvm_nested_guest *nested;
1273 pgd_t pgd, *pgdp;
1274 pud_t pud, *pudp;
1275 pmd_t pmd, *pmdp;
1276 pte_t *ptep;
1277 int shift;
1278 unsigned long pte;
1279
1280 kvm = p->kvm;
1281 if (!kvm_is_radix(kvm))
1282 return 0;
1283
1284 ret = mutex_lock_interruptible(&p->mutex);
1285 if (ret)
1286 return ret;
1287
1288 if (p->chars_left) {
1289 n = p->chars_left;
1290 if (n > len)
1291 n = len;
1292 r = copy_to_user(buf, p->buf + p->buf_index, n);
1293 n -= r;
1294 p->chars_left -= n;
1295 p->buf_index += n;
1296 buf += n;
1297 len -= n;
1298 ret = n;
1299 if (r) {
1300 if (!n)
1301 ret = -EFAULT;
1302 goto out;
1303 }
1304 }
1305
1306 gpa = p->gpa;
1307 nested = NULL;
1308 pgt = NULL;
1309 while (len != 0 && p->lpid >= 0) {
1310 if (gpa >= RADIX_PGTABLE_RANGE) {
1311 gpa = 0;
1312 pgt = NULL;
1313 if (nested) {
1314 kvmhv_put_nested(nested);
1315 nested = NULL;
1316 }
1317 p->lpid = kvmhv_nested_next_lpid(kvm, p->lpid);
1318 p->hdr = 0;
1319 if (p->lpid < 0)
1320 break;
1321 }
1322 if (!pgt) {
1323 if (p->lpid == 0) {
1324 pgt = kvm->arch.pgtable;
1325 } else {
1326 nested = kvmhv_get_nested(kvm, p->lpid, false);
1327 if (!nested) {
1328 gpa = RADIX_PGTABLE_RANGE;
1329 continue;
1330 }
1331 pgt = nested->shadow_pgtable;
1332 }
1333 }
1334 n = 0;
1335 if (!p->hdr) {
1336 if (p->lpid > 0)
1337 n = scnprintf(p->buf, sizeof(p->buf),
1338 "\nNested LPID %d: ", p->lpid);
1339 n += scnprintf(p->buf + n, sizeof(p->buf) - n,
1340 "pgdir: %lx\n", (unsigned long)pgt);
1341 p->hdr = 1;
1342 goto copy;
1343 }
1344
1345 pgdp = pgt + pgd_index(gpa);
1346 pgd = READ_ONCE(*pgdp);
1347 if (!(pgd_val(pgd) & _PAGE_PRESENT)) {
1348 gpa = (gpa & PGDIR_MASK) + PGDIR_SIZE;
1349 continue;
1350 }
1351
1352 pudp = pud_offset(&pgd, gpa);
1353 pud = READ_ONCE(*pudp);
1354 if (!(pud_val(pud) & _PAGE_PRESENT)) {
1355 gpa = (gpa & PUD_MASK) + PUD_SIZE;
1356 continue;
1357 }
1358 if (pud_val(pud) & _PAGE_PTE) {
1359 pte = pud_val(pud);
1360 shift = PUD_SHIFT;
1361 goto leaf;
1362 }
1363
1364 pmdp = pmd_offset(&pud, gpa);
1365 pmd = READ_ONCE(*pmdp);
1366 if (!(pmd_val(pmd) & _PAGE_PRESENT)) {
1367 gpa = (gpa & PMD_MASK) + PMD_SIZE;
1368 continue;
1369 }
1370 if (pmd_val(pmd) & _PAGE_PTE) {
1371 pte = pmd_val(pmd);
1372 shift = PMD_SHIFT;
1373 goto leaf;
1374 }
1375
1376 ptep = pte_offset_kernel(&pmd, gpa);
1377 pte = pte_val(READ_ONCE(*ptep));
1378 if (!(pte & _PAGE_PRESENT)) {
1379 gpa += PAGE_SIZE;
1380 continue;
1381 }
1382 shift = PAGE_SHIFT;
1383 leaf:
1384 n = scnprintf(p->buf, sizeof(p->buf),
1385 " %lx: %lx %d\n", gpa, pte, shift);
1386 gpa += 1ul << shift;
1387 copy:
1388 p->chars_left = n;
1389 if (n > len)
1390 n = len;
1391 r = copy_to_user(buf, p->buf, n);
1392 n -= r;
1393 p->chars_left -= n;
1394 p->buf_index = n;
1395 buf += n;
1396 len -= n;
1397 ret += n;
1398 if (r) {
1399 if (!ret)
1400 ret = -EFAULT;
1401 break;
1402 }
1403 }
1404 p->gpa = gpa;
1405 if (nested)
1406 kvmhv_put_nested(nested);
1407
1408 out:
1409 mutex_unlock(&p->mutex);
1410 return ret;
1411}
1412
1413static ssize_t debugfs_radix_write(struct file *file, const char __user *buf,
1414 size_t len, loff_t *ppos)
1415{
1416 return -EACCES;
1417}
1418
1419static const struct file_operations debugfs_radix_fops = {
1420 .owner = THIS_MODULE,
1421 .open = debugfs_radix_open,
1422 .release = debugfs_radix_release,
1423 .read = debugfs_radix_read,
1424 .write = debugfs_radix_write,
1425 .llseek = generic_file_llseek,
1426};
1427
1428void kvmhv_radix_debugfs_init(struct kvm *kvm)
1429{
1430 debugfs_create_file("radix", 0400, kvm->arch.debugfs_dir, kvm,
1431 &debugfs_radix_fops);
1432}
1433
1434int kvmppc_radix_init(void)
1435{
1436 unsigned long size = sizeof(void *) << RADIX_PTE_INDEX_SIZE;
1437
1438 kvm_pte_cache = kmem_cache_create("kvm-pte", size, size, 0, pte_ctor);
1439 if (!kvm_pte_cache)
1440 return -ENOMEM;
1441
1442 size = sizeof(void *) << RADIX_PMD_INDEX_SIZE;
1443
1444 kvm_pmd_cache = kmem_cache_create("kvm-pmd", size, size, 0, pmd_ctor);
1445 if (!kvm_pmd_cache) {
1446 kmem_cache_destroy(kvm_pte_cache);
1447 return -ENOMEM;
1448 }
1449
1450 return 0;
1451}
1452
1453void kvmppc_radix_exit(void)
1454{
1455 kmem_cache_destroy(kvm_pte_cache);
1456 kmem_cache_destroy(kvm_pmd_cache);
1457}
1458