1
2
3
4
5
6
7#include <linux/types.h>
8#include <linux/string.h>
9#include <linux/kvm.h>
10#include <linux/kvm_host.h>
11#include <linux/highmem.h>
12#include <linux/gfp.h>
13#include <linux/slab.h>
14#include <linux/hugetlb.h>
15#include <linux/vmalloc.h>
16#include <linux/srcu.h>
17#include <linux/anon_inodes.h>
18#include <linux/file.h>
19#include <linux/debugfs.h>
20
21#include <asm/kvm_ppc.h>
22#include <asm/kvm_book3s.h>
23#include <asm/book3s/64/mmu-hash.h>
24#include <asm/hvcall.h>
25#include <asm/synch.h>
26#include <asm/ppc-opcode.h>
27#include <asm/cputable.h>
28#include <asm/pte-walk.h>
29
30#include "trace_hv.h"
31
32
33
34#ifdef DEBUG_RESIZE_HPT
35#define resize_hpt_debug(resize, ...) \
36 do { \
37 printk(KERN_DEBUG "RESIZE HPT %p: ", resize); \
38 printk(__VA_ARGS__); \
39 } while (0)
40#else
41#define resize_hpt_debug(resize, ...) \
42 do { } while (0)
43#endif
44
45static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
46 long pte_index, unsigned long pteh,
47 unsigned long ptel, unsigned long *pte_idx_ret);
48
49struct kvm_resize_hpt {
50
51 struct kvm *kvm;
52 struct work_struct work;
53 u32 order;
54
55
56
57
58
59
60
61
62 int error;
63
64
65
66
67 struct kvm_hpt_info hpt;
68};
69
70int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order)
71{
72 unsigned long hpt = 0;
73 int cma = 0;
74 struct page *page = NULL;
75 struct revmap_entry *rev;
76 unsigned long npte;
77
78 if ((order < PPC_MIN_HPT_ORDER) || (order > PPC_MAX_HPT_ORDER))
79 return -EINVAL;
80
81 page = kvm_alloc_hpt_cma(1ul << (order - PAGE_SHIFT));
82 if (page) {
83 hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
84 memset((void *)hpt, 0, (1ul << order));
85 cma = 1;
86 }
87
88 if (!hpt)
89 hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_RETRY_MAYFAIL
90 |__GFP_NOWARN, order - PAGE_SHIFT);
91
92 if (!hpt)
93 return -ENOMEM;
94
95
96 npte = 1ul << (order - 4);
97
98
99 rev = vmalloc(array_size(npte, sizeof(struct revmap_entry)));
100 if (!rev) {
101 if (cma)
102 kvm_free_hpt_cma(page, 1 << (order - PAGE_SHIFT));
103 else
104 free_pages(hpt, order - PAGE_SHIFT);
105 return -ENOMEM;
106 }
107
108 info->order = order;
109 info->virt = hpt;
110 info->cma = cma;
111 info->rev = rev;
112
113 return 0;
114}
115
116void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info)
117{
118 atomic64_set(&kvm->arch.mmio_update, 0);
119 kvm->arch.hpt = *info;
120 kvm->arch.sdr1 = __pa(info->virt) | (info->order - 18);
121
122 pr_debug("KVM guest htab at %lx (order %ld), LPID %x\n",
123 info->virt, (long)info->order, kvm->arch.lpid);
124}
125
126long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
127{
128 long err = -EBUSY;
129 struct kvm_hpt_info info;
130
131 mutex_lock(&kvm->arch.mmu_setup_lock);
132 if (kvm->arch.mmu_ready) {
133 kvm->arch.mmu_ready = 0;
134
135 smp_mb();
136 if (atomic_read(&kvm->arch.vcpus_running)) {
137 kvm->arch.mmu_ready = 1;
138 goto out;
139 }
140 }
141 if (kvm_is_radix(kvm)) {
142 err = kvmppc_switch_mmu_to_hpt(kvm);
143 if (err)
144 goto out;
145 }
146
147 if (kvm->arch.hpt.order == order) {
148
149
150
151 memset((void *)kvm->arch.hpt.virt, 0, 1ul << order);
152
153
154
155 kvmppc_rmap_reset(kvm);
156 err = 0;
157 goto out;
158 }
159
160 if (kvm->arch.hpt.virt) {
161 kvmppc_free_hpt(&kvm->arch.hpt);
162 kvmppc_rmap_reset(kvm);
163 }
164
165 err = kvmppc_allocate_hpt(&info, order);
166 if (err < 0)
167 goto out;
168 kvmppc_set_hpt(kvm, &info);
169
170out:
171 if (err == 0)
172
173 cpumask_setall(&kvm->arch.need_tlb_flush);
174
175 mutex_unlock(&kvm->arch.mmu_setup_lock);
176 return err;
177}
178
179void kvmppc_free_hpt(struct kvm_hpt_info *info)
180{
181 vfree(info->rev);
182 info->rev = NULL;
183 if (info->cma)
184 kvm_free_hpt_cma(virt_to_page(info->virt),
185 1 << (info->order - PAGE_SHIFT));
186 else if (info->virt)
187 free_pages(info->virt, info->order - PAGE_SHIFT);
188 info->virt = 0;
189 info->order = 0;
190}
191
192
193static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize)
194{
195 return (pgsize > 0x1000) ? HPTE_V_LARGE : 0;
196}
197
198
199static inline unsigned long hpte1_pgsize_encoding(unsigned long pgsize)
200{
201 return (pgsize == 0x10000) ? 0x1000 : 0;
202}
203
204void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
205 unsigned long porder)
206{
207 unsigned long i;
208 unsigned long npages;
209 unsigned long hp_v, hp_r;
210 unsigned long addr, hash;
211 unsigned long psize;
212 unsigned long hp0, hp1;
213 unsigned long idx_ret;
214 long ret;
215 struct kvm *kvm = vcpu->kvm;
216
217 psize = 1ul << porder;
218 npages = memslot->npages >> (porder - PAGE_SHIFT);
219
220
221 if (npages > 1ul << (40 - porder))
222 npages = 1ul << (40 - porder);
223
224 if (npages > kvmppc_hpt_mask(&kvm->arch.hpt) + 1)
225 npages = kvmppc_hpt_mask(&kvm->arch.hpt) + 1;
226
227 hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
228 HPTE_V_BOLTED | hpte0_pgsize_encoding(psize);
229 hp1 = hpte1_pgsize_encoding(psize) |
230 HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX;
231
232 for (i = 0; i < npages; ++i) {
233 addr = i << porder;
234
235 hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25)))
236 & kvmppc_hpt_mask(&kvm->arch.hpt);
237
238
239
240
241
242
243 hash = (hash << 3) + 7;
244 hp_v = hp0 | ((addr >> 16) & ~0x7fUL);
245 hp_r = hp1 | addr;
246 ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, hash, hp_v, hp_r,
247 &idx_ret);
248 if (ret != H_SUCCESS) {
249 pr_err("KVM: map_vrma at %lx failed, ret=%ld\n",
250 addr, ret);
251 break;
252 }
253 }
254}
255
256int kvmppc_mmu_hv_init(void)
257{
258 unsigned long host_lpid, rsvd_lpid;
259
260 if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
261 return -EINVAL;
262
263
264 host_lpid = 0;
265 if (cpu_has_feature(CPU_FTR_HVMODE))
266 host_lpid = mfspr(SPRN_LPID);
267 rsvd_lpid = LPID_RSVD;
268
269 kvmppc_init_lpid(rsvd_lpid + 1);
270
271 kvmppc_claim_lpid(host_lpid);
272
273 kvmppc_claim_lpid(rsvd_lpid);
274
275 return 0;
276}
277
278static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
279 long pte_index, unsigned long pteh,
280 unsigned long ptel, unsigned long *pte_idx_ret)
281{
282 long ret;
283
284 preempt_disable();
285 ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel,
286 kvm->mm->pgd, false, pte_idx_ret);
287 preempt_enable();
288 if (ret == H_TOO_HARD) {
289
290 pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n");
291 ret = H_RESOURCE;
292 }
293 return ret;
294
295}
296
297static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu,
298 gva_t eaddr)
299{
300 u64 mask;
301 int i;
302
303 for (i = 0; i < vcpu->arch.slb_nr; i++) {
304 if (!(vcpu->arch.slb[i].orige & SLB_ESID_V))
305 continue;
306
307 if (vcpu->arch.slb[i].origv & SLB_VSID_B_1T)
308 mask = ESID_MASK_1T;
309 else
310 mask = ESID_MASK;
311
312 if (((vcpu->arch.slb[i].orige ^ eaddr) & mask) == 0)
313 return &vcpu->arch.slb[i];
314 }
315 return NULL;
316}
317
318static unsigned long kvmppc_mmu_get_real_addr(unsigned long v, unsigned long r,
319 unsigned long ea)
320{
321 unsigned long ra_mask;
322
323 ra_mask = kvmppc_actual_pgsz(v, r) - 1;
324 return (r & HPTE_R_RPN & ~ra_mask) | (ea & ra_mask);
325}
326
327static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
328 struct kvmppc_pte *gpte, bool data, bool iswrite)
329{
330 struct kvm *kvm = vcpu->kvm;
331 struct kvmppc_slb *slbe;
332 unsigned long slb_v;
333 unsigned long pp, key;
334 unsigned long v, orig_v, gr;
335 __be64 *hptep;
336 long int index;
337 int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR);
338
339 if (kvm_is_radix(vcpu->kvm))
340 return kvmppc_mmu_radix_xlate(vcpu, eaddr, gpte, data, iswrite);
341
342
343 if (virtmode) {
344 slbe = kvmppc_mmu_book3s_hv_find_slbe(vcpu, eaddr);
345 if (!slbe)
346 return -EINVAL;
347 slb_v = slbe->origv;
348 } else {
349
350 slb_v = vcpu->kvm->arch.vrma_slb_v;
351 }
352
353 preempt_disable();
354
355 index = kvmppc_hv_find_lock_hpte(kvm, eaddr, slb_v,
356 HPTE_V_VALID | HPTE_V_ABSENT);
357 if (index < 0) {
358 preempt_enable();
359 return -ENOENT;
360 }
361 hptep = (__be64 *)(kvm->arch.hpt.virt + (index << 4));
362 v = orig_v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
363 if (cpu_has_feature(CPU_FTR_ARCH_300))
364 v = hpte_new_to_old_v(v, be64_to_cpu(hptep[1]));
365 gr = kvm->arch.hpt.rev[index].guest_rpte;
366
367 unlock_hpte(hptep, orig_v);
368 preempt_enable();
369
370 gpte->eaddr = eaddr;
371 gpte->vpage = ((v & HPTE_V_AVPN) << 4) | ((eaddr >> 12) & 0xfff);
372
373
374 pp = gr & (HPTE_R_PP0 | HPTE_R_PP);
375 key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
376 key &= slb_v;
377
378
379 gpte->may_read = hpte_read_permission(pp, key);
380 gpte->may_write = hpte_write_permission(pp, key);
381 gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G));
382
383
384 if (data && virtmode) {
385 int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr);
386 if (amrfield & 1)
387 gpte->may_read = 0;
388 if (amrfield & 2)
389 gpte->may_write = 0;
390 }
391
392
393 gpte->raddr = kvmppc_mmu_get_real_addr(v, gr, eaddr);
394 return 0;
395}
396
397
398
399
400
401
402
403
404
405static int instruction_is_store(unsigned int instr)
406{
407 unsigned int mask;
408
409 mask = 0x10000000;
410 if ((instr & 0xfc000000) == 0x7c000000)
411 mask = 0x100;
412 return (instr & mask) != 0;
413}
414
415int kvmppc_hv_emulate_mmio(struct kvm_vcpu *vcpu,
416 unsigned long gpa, gva_t ea, int is_store)
417{
418 u32 last_inst;
419
420
421
422
423
424
425 if (is_store) {
426 int idx, ret;
427
428 idx = srcu_read_lock(&vcpu->kvm->srcu);
429 ret = kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, (gpa_t) gpa, 0,
430 NULL);
431 srcu_read_unlock(&vcpu->kvm->srcu, idx);
432 if (!ret) {
433 kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
434 return RESUME_GUEST;
435 }
436 }
437
438
439
440
441 if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) !=
442 EMULATE_DONE)
443 return RESUME_GUEST;
444
445
446
447
448
449
450
451
452
453
454
455
456
457 if (instruction_is_store(last_inst) != !!is_store)
458 return RESUME_GUEST;
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473 vcpu->arch.paddr_accessed = gpa;
474 vcpu->arch.vaddr_accessed = ea;
475 return kvmppc_emulate_mmio(vcpu);
476}
477
478int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu,
479 unsigned long ea, unsigned long dsisr)
480{
481 struct kvm *kvm = vcpu->kvm;
482 unsigned long hpte[3], r;
483 unsigned long hnow_v, hnow_r;
484 __be64 *hptep;
485 unsigned long mmu_seq, psize, pte_size;
486 unsigned long gpa_base, gfn_base;
487 unsigned long gpa, gfn, hva, pfn, hpa;
488 struct kvm_memory_slot *memslot;
489 unsigned long *rmap;
490 struct revmap_entry *rev;
491 struct page *page;
492 long index, ret;
493 bool is_ci;
494 bool writing, write_ok;
495 unsigned int shift;
496 unsigned long rcbits;
497 long mmio_update;
498 pte_t pte, *ptep;
499
500 if (kvm_is_radix(kvm))
501 return kvmppc_book3s_radix_page_fault(vcpu, ea, dsisr);
502
503
504
505
506
507
508
509 if (ea != vcpu->arch.pgfault_addr)
510 return RESUME_GUEST;
511
512 if (vcpu->arch.pgfault_cache) {
513 mmio_update = atomic64_read(&kvm->arch.mmio_update);
514 if (mmio_update == vcpu->arch.pgfault_cache->mmio_update) {
515 r = vcpu->arch.pgfault_cache->rpte;
516 psize = kvmppc_actual_pgsz(vcpu->arch.pgfault_hpte[0],
517 r);
518 gpa_base = r & HPTE_R_RPN & ~(psize - 1);
519 gfn_base = gpa_base >> PAGE_SHIFT;
520 gpa = gpa_base | (ea & (psize - 1));
521 return kvmppc_hv_emulate_mmio(vcpu, gpa, ea,
522 dsisr & DSISR_ISSTORE);
523 }
524 }
525 index = vcpu->arch.pgfault_index;
526 hptep = (__be64 *)(kvm->arch.hpt.virt + (index << 4));
527 rev = &kvm->arch.hpt.rev[index];
528 preempt_disable();
529 while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
530 cpu_relax();
531 hpte[0] = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
532 hpte[1] = be64_to_cpu(hptep[1]);
533 hpte[2] = r = rev->guest_rpte;
534 unlock_hpte(hptep, hpte[0]);
535 preempt_enable();
536
537 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
538 hpte[0] = hpte_new_to_old_v(hpte[0], hpte[1]);
539 hpte[1] = hpte_new_to_old_r(hpte[1]);
540 }
541 if (hpte[0] != vcpu->arch.pgfault_hpte[0] ||
542 hpte[1] != vcpu->arch.pgfault_hpte[1])
543 return RESUME_GUEST;
544
545
546 psize = kvmppc_actual_pgsz(hpte[0], r);
547 gpa_base = r & HPTE_R_RPN & ~(psize - 1);
548 gfn_base = gpa_base >> PAGE_SHIFT;
549 gpa = gpa_base | (ea & (psize - 1));
550 gfn = gpa >> PAGE_SHIFT;
551 memslot = gfn_to_memslot(kvm, gfn);
552
553 trace_kvm_page_fault_enter(vcpu, hpte, memslot, ea, dsisr);
554
555
556 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
557 return kvmppc_hv_emulate_mmio(vcpu, gpa, ea,
558 dsisr & DSISR_ISSTORE);
559
560
561
562
563
564 if (gfn_base < memslot->base_gfn)
565 return -EFAULT;
566
567
568 mmu_seq = kvm->mmu_notifier_seq;
569 smp_rmb();
570
571 ret = -EFAULT;
572 page = NULL;
573 writing = (dsisr & DSISR_ISSTORE) != 0;
574
575 write_ok = writing;
576 hva = gfn_to_hva_memslot(memslot, gfn);
577
578
579
580
581
582
583
584 if (get_user_page_fast_only(hva, FOLL_WRITE, &page)) {
585 write_ok = true;
586 } else {
587
588 pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
589 writing, &write_ok);
590 if (is_error_noslot_pfn(pfn))
591 return -EFAULT;
592 page = NULL;
593 if (pfn_valid(pfn)) {
594 page = pfn_to_page(pfn);
595 if (PageReserved(page))
596 page = NULL;
597 }
598 }
599
600
601
602
603
604 spin_lock(&kvm->mmu_lock);
605 ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift);
606 pte = __pte(0);
607 if (ptep)
608 pte = READ_ONCE(*ptep);
609 spin_unlock(&kvm->mmu_lock);
610
611
612
613
614 if (!pte_present(pte)) {
615 if (page)
616 put_page(page);
617 return RESUME_GUEST;
618 }
619 hpa = pte_pfn(pte) << PAGE_SHIFT;
620 pte_size = PAGE_SIZE;
621 if (shift)
622 pte_size = 1ul << shift;
623 is_ci = pte_ci(pte);
624
625 if (psize > pte_size)
626 goto out_put;
627 if (pte_size > psize)
628 hpa |= hva & (pte_size - psize);
629
630
631 if (!hpte_cache_flags_ok(r, is_ci)) {
632 if (is_ci)
633 goto out_put;
634
635
636
637
638 r = (r & ~(HPTE_R_W|HPTE_R_I|HPTE_R_G)) | HPTE_R_M;
639 }
640
641
642
643
644
645
646 if (psize < PAGE_SIZE)
647 psize = PAGE_SIZE;
648 r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) | hpa;
649 if (hpte_is_writable(r) && !write_ok)
650 r = hpte_make_readonly(r);
651 ret = RESUME_GUEST;
652 preempt_disable();
653 while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
654 cpu_relax();
655 hnow_v = be64_to_cpu(hptep[0]);
656 hnow_r = be64_to_cpu(hptep[1]);
657 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
658 hnow_v = hpte_new_to_old_v(hnow_v, hnow_r);
659 hnow_r = hpte_new_to_old_r(hnow_r);
660 }
661
662
663
664
665
666
667
668 if (!kvm->arch.mmu_ready)
669 goto out_unlock;
670
671 if ((hnow_v & ~HPTE_V_HVLOCK) != hpte[0] || hnow_r != hpte[1] ||
672 rev->guest_rpte != hpte[2])
673
674 goto out_unlock;
675 hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
676
677
678 rmap = &memslot->arch.rmap[gfn_base - memslot->base_gfn];
679 lock_rmap(rmap);
680
681
682 ret = RESUME_GUEST;
683 if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) {
684 unlock_rmap(rmap);
685 goto out_unlock;
686 }
687
688
689 rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
690 r &= rcbits | ~(HPTE_R_R | HPTE_R_C);
691
692 if (be64_to_cpu(hptep[0]) & HPTE_V_VALID) {
693
694 unlock_rmap(rmap);
695 hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
696 kvmppc_invalidate_hpte(kvm, hptep, index);
697
698 r |= be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
699 } else {
700 kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
701 }
702
703 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
704 r = hpte_old_to_new_r(hpte[0], r);
705 hpte[0] = hpte_old_to_new_v(hpte[0]);
706 }
707 hptep[1] = cpu_to_be64(r);
708 eieio();
709 __unlock_hpte(hptep, hpte[0]);
710 asm volatile("ptesync" : : : "memory");
711 preempt_enable();
712 if (page && hpte_is_writable(r))
713 set_page_dirty_lock(page);
714
715 out_put:
716 trace_kvm_page_fault_exit(vcpu, hpte, ret);
717
718 if (page)
719 put_page(page);
720 return ret;
721
722 out_unlock:
723 __unlock_hpte(hptep, be64_to_cpu(hptep[0]));
724 preempt_enable();
725 goto out_put;
726}
727
728void kvmppc_rmap_reset(struct kvm *kvm)
729{
730 struct kvm_memslots *slots;
731 struct kvm_memory_slot *memslot;
732 int srcu_idx;
733
734 srcu_idx = srcu_read_lock(&kvm->srcu);
735 slots = kvm_memslots(kvm);
736 kvm_for_each_memslot(memslot, slots) {
737
738 spin_lock(&kvm->mmu_lock);
739
740
741
742
743 memset(memslot->arch.rmap, 0,
744 memslot->npages * sizeof(*memslot->arch.rmap));
745 spin_unlock(&kvm->mmu_lock);
746 }
747 srcu_read_unlock(&kvm->srcu, srcu_idx);
748}
749
750typedef int (*hva_handler_fn)(struct kvm *kvm, struct kvm_memory_slot *memslot,
751 unsigned long gfn);
752
753static int kvm_handle_hva_range(struct kvm *kvm,
754 unsigned long start,
755 unsigned long end,
756 hva_handler_fn handler)
757{
758 int ret;
759 int retval = 0;
760 struct kvm_memslots *slots;
761 struct kvm_memory_slot *memslot;
762
763 slots = kvm_memslots(kvm);
764 kvm_for_each_memslot(memslot, slots) {
765 unsigned long hva_start, hva_end;
766 gfn_t gfn, gfn_end;
767
768 hva_start = max(start, memslot->userspace_addr);
769 hva_end = min(end, memslot->userspace_addr +
770 (memslot->npages << PAGE_SHIFT));
771 if (hva_start >= hva_end)
772 continue;
773
774
775
776
777 gfn = hva_to_gfn_memslot(hva_start, memslot);
778 gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
779
780 for (; gfn < gfn_end; ++gfn) {
781 ret = handler(kvm, memslot, gfn);
782 retval |= ret;
783 }
784 }
785
786 return retval;
787}
788
789static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
790 hva_handler_fn handler)
791{
792 return kvm_handle_hva_range(kvm, hva, hva + 1, handler);
793}
794
795
796static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i,
797 struct kvm_memory_slot *memslot,
798 unsigned long *rmapp, unsigned long gfn)
799{
800 __be64 *hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
801 struct revmap_entry *rev = kvm->arch.hpt.rev;
802 unsigned long j, h;
803 unsigned long ptel, psize, rcbits;
804
805 j = rev[i].forw;
806 if (j == i) {
807
808 *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
809 } else {
810
811 h = rev[i].back;
812 rev[h].forw = j;
813 rev[j].back = h;
814 rev[i].forw = rev[i].back = i;
815 *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j;
816 }
817
818
819 ptel = rev[i].guest_rpte;
820 psize = kvmppc_actual_pgsz(be64_to_cpu(hptep[0]), ptel);
821 if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
822 hpte_rpn(ptel, psize) == gfn) {
823 hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
824 kvmppc_invalidate_hpte(kvm, hptep, i);
825 hptep[1] &= ~cpu_to_be64(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
826
827 rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
828 *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
829 if ((rcbits & HPTE_R_C) && memslot->dirty_bitmap)
830 kvmppc_update_dirty_map(memslot, gfn, psize);
831 if (rcbits & ~rev[i].guest_rpte) {
832 rev[i].guest_rpte = ptel | rcbits;
833 note_hpte_modification(kvm, &rev[i]);
834 }
835 }
836}
837
838static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
839 unsigned long gfn)
840{
841 unsigned long i;
842 __be64 *hptep;
843 unsigned long *rmapp;
844
845 rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
846 for (;;) {
847 lock_rmap(rmapp);
848 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
849 unlock_rmap(rmapp);
850 break;
851 }
852
853
854
855
856
857
858 i = *rmapp & KVMPPC_RMAP_INDEX;
859 hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
860 if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
861
862 unlock_rmap(rmapp);
863 while (be64_to_cpu(hptep[0]) & HPTE_V_HVLOCK)
864 cpu_relax();
865 continue;
866 }
867
868 kvmppc_unmap_hpte(kvm, i, memslot, rmapp, gfn);
869 unlock_rmap(rmapp);
870 __unlock_hpte(hptep, be64_to_cpu(hptep[0]));
871 }
872 return 0;
873}
874
875int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end)
876{
877 hva_handler_fn handler;
878
879 handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp;
880 kvm_handle_hva_range(kvm, start, end, handler);
881 return 0;
882}
883
884void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
885 struct kvm_memory_slot *memslot)
886{
887 unsigned long gfn;
888 unsigned long n;
889 unsigned long *rmapp;
890
891 gfn = memslot->base_gfn;
892 rmapp = memslot->arch.rmap;
893 if (kvm_is_radix(kvm)) {
894 kvmppc_radix_flush_memslot(kvm, memslot);
895 return;
896 }
897
898 for (n = memslot->npages; n; --n, ++gfn) {
899
900
901
902
903
904
905 if (*rmapp & KVMPPC_RMAP_PRESENT)
906 kvm_unmap_rmapp(kvm, memslot, gfn);
907 ++rmapp;
908 }
909}
910
911static int kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
912 unsigned long gfn)
913{
914 struct revmap_entry *rev = kvm->arch.hpt.rev;
915 unsigned long head, i, j;
916 __be64 *hptep;
917 int ret = 0;
918 unsigned long *rmapp;
919
920 rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
921 retry:
922 lock_rmap(rmapp);
923 if (*rmapp & KVMPPC_RMAP_REFERENCED) {
924 *rmapp &= ~KVMPPC_RMAP_REFERENCED;
925 ret = 1;
926 }
927 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
928 unlock_rmap(rmapp);
929 return ret;
930 }
931
932 i = head = *rmapp & KVMPPC_RMAP_INDEX;
933 do {
934 hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
935 j = rev[i].forw;
936
937
938 if (!(be64_to_cpu(hptep[1]) & HPTE_R_R))
939 continue;
940
941 if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
942
943 unlock_rmap(rmapp);
944 while (be64_to_cpu(hptep[0]) & HPTE_V_HVLOCK)
945 cpu_relax();
946 goto retry;
947 }
948
949
950 if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
951 (be64_to_cpu(hptep[1]) & HPTE_R_R)) {
952 kvmppc_clear_ref_hpte(kvm, hptep, i);
953 if (!(rev[i].guest_rpte & HPTE_R_R)) {
954 rev[i].guest_rpte |= HPTE_R_R;
955 note_hpte_modification(kvm, &rev[i]);
956 }
957 ret = 1;
958 }
959 __unlock_hpte(hptep, be64_to_cpu(hptep[0]));
960 } while ((i = j) != head);
961
962 unlock_rmap(rmapp);
963 return ret;
964}
965
966int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end)
967{
968 hva_handler_fn handler;
969
970 handler = kvm_is_radix(kvm) ? kvm_age_radix : kvm_age_rmapp;
971 return kvm_handle_hva_range(kvm, start, end, handler);
972}
973
974static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
975 unsigned long gfn)
976{
977 struct revmap_entry *rev = kvm->arch.hpt.rev;
978 unsigned long head, i, j;
979 unsigned long *hp;
980 int ret = 1;
981 unsigned long *rmapp;
982
983 rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
984 if (*rmapp & KVMPPC_RMAP_REFERENCED)
985 return 1;
986
987 lock_rmap(rmapp);
988 if (*rmapp & KVMPPC_RMAP_REFERENCED)
989 goto out;
990
991 if (*rmapp & KVMPPC_RMAP_PRESENT) {
992 i = head = *rmapp & KVMPPC_RMAP_INDEX;
993 do {
994 hp = (unsigned long *)(kvm->arch.hpt.virt + (i << 4));
995 j = rev[i].forw;
996 if (be64_to_cpu(hp[1]) & HPTE_R_R)
997 goto out;
998 } while ((i = j) != head);
999 }
1000 ret = 0;
1001
1002 out:
1003 unlock_rmap(rmapp);
1004 return ret;
1005}
1006
1007int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva)
1008{
1009 hva_handler_fn handler;
1010
1011 handler = kvm_is_radix(kvm) ? kvm_test_age_radix : kvm_test_age_rmapp;
1012 return kvm_handle_hva(kvm, hva, handler);
1013}
1014
1015void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte)
1016{
1017 hva_handler_fn handler;
1018
1019 handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp;
1020 kvm_handle_hva(kvm, hva, handler);
1021}
1022
1023static int vcpus_running(struct kvm *kvm)
1024{
1025 return atomic_read(&kvm->arch.vcpus_running) != 0;
1026}
1027
1028
1029
1030
1031
1032static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
1033{
1034 struct revmap_entry *rev = kvm->arch.hpt.rev;
1035 unsigned long head, i, j;
1036 unsigned long n;
1037 unsigned long v, r;
1038 __be64 *hptep;
1039 int npages_dirty = 0;
1040
1041 retry:
1042 lock_rmap(rmapp);
1043 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
1044 unlock_rmap(rmapp);
1045 return npages_dirty;
1046 }
1047
1048 i = head = *rmapp & KVMPPC_RMAP_INDEX;
1049 do {
1050 unsigned long hptep1;
1051 hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
1052 j = rev[i].forw;
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068 hptep1 = be64_to_cpu(hptep[1]);
1069 if (!(hptep1 & HPTE_R_C) &&
1070 (!hpte_is_writable(hptep1) || vcpus_running(kvm)))
1071 continue;
1072
1073 if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
1074
1075 unlock_rmap(rmapp);
1076 while (hptep[0] & cpu_to_be64(HPTE_V_HVLOCK))
1077 cpu_relax();
1078 goto retry;
1079 }
1080
1081
1082 if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) {
1083 __unlock_hpte(hptep, be64_to_cpu(hptep[0]));
1084 continue;
1085 }
1086
1087
1088 hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
1089 kvmppc_invalidate_hpte(kvm, hptep, i);
1090 v = be64_to_cpu(hptep[0]);
1091 r = be64_to_cpu(hptep[1]);
1092 if (r & HPTE_R_C) {
1093 hptep[1] = cpu_to_be64(r & ~HPTE_R_C);
1094 if (!(rev[i].guest_rpte & HPTE_R_C)) {
1095 rev[i].guest_rpte |= HPTE_R_C;
1096 note_hpte_modification(kvm, &rev[i]);
1097 }
1098 n = kvmppc_actual_pgsz(v, r);
1099 n = (n + PAGE_SIZE - 1) >> PAGE_SHIFT;
1100 if (n > npages_dirty)
1101 npages_dirty = n;
1102 eieio();
1103 }
1104 v &= ~HPTE_V_ABSENT;
1105 v |= HPTE_V_VALID;
1106 __unlock_hpte(hptep, v);
1107 } while ((i = j) != head);
1108
1109 unlock_rmap(rmapp);
1110 return npages_dirty;
1111}
1112
1113void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa,
1114 struct kvm_memory_slot *memslot,
1115 unsigned long *map)
1116{
1117 unsigned long gfn;
1118
1119 if (!vpa->dirty || !vpa->pinned_addr)
1120 return;
1121 gfn = vpa->gpa >> PAGE_SHIFT;
1122 if (gfn < memslot->base_gfn ||
1123 gfn >= memslot->base_gfn + memslot->npages)
1124 return;
1125
1126 vpa->dirty = false;
1127 if (map)
1128 __set_bit_le(gfn - memslot->base_gfn, map);
1129}
1130
1131long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
1132 struct kvm_memory_slot *memslot, unsigned long *map)
1133{
1134 unsigned long i;
1135 unsigned long *rmapp;
1136
1137 preempt_disable();
1138 rmapp = memslot->arch.rmap;
1139 for (i = 0; i < memslot->npages; ++i) {
1140 int npages = kvm_test_clear_dirty_npages(kvm, rmapp);
1141
1142
1143
1144
1145
1146 if (npages)
1147 set_dirty_bits(map, i, npages);
1148 ++rmapp;
1149 }
1150 preempt_enable();
1151 return 0;
1152}
1153
1154void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
1155 unsigned long *nb_ret)
1156{
1157 struct kvm_memory_slot *memslot;
1158 unsigned long gfn = gpa >> PAGE_SHIFT;
1159 struct page *page, *pages[1];
1160 int npages;
1161 unsigned long hva, offset;
1162 int srcu_idx;
1163
1164 srcu_idx = srcu_read_lock(&kvm->srcu);
1165 memslot = gfn_to_memslot(kvm, gfn);
1166 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
1167 goto err;
1168 hva = gfn_to_hva_memslot(memslot, gfn);
1169 npages = get_user_pages_fast(hva, 1, FOLL_WRITE, pages);
1170 if (npages < 1)
1171 goto err;
1172 page = pages[0];
1173 srcu_read_unlock(&kvm->srcu, srcu_idx);
1174
1175 offset = gpa & (PAGE_SIZE - 1);
1176 if (nb_ret)
1177 *nb_ret = PAGE_SIZE - offset;
1178 return page_address(page) + offset;
1179
1180 err:
1181 srcu_read_unlock(&kvm->srcu, srcu_idx);
1182 return NULL;
1183}
1184
1185void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
1186 bool dirty)
1187{
1188 struct page *page = virt_to_page(va);
1189 struct kvm_memory_slot *memslot;
1190 unsigned long gfn;
1191 int srcu_idx;
1192
1193 put_page(page);
1194
1195 if (!dirty)
1196 return;
1197
1198
1199 gfn = gpa >> PAGE_SHIFT;
1200 srcu_idx = srcu_read_lock(&kvm->srcu);
1201 memslot = gfn_to_memslot(kvm, gfn);
1202 if (memslot && memslot->dirty_bitmap)
1203 set_bit_le(gfn - memslot->base_gfn, memslot->dirty_bitmap);
1204 srcu_read_unlock(&kvm->srcu, srcu_idx);
1205}
1206
1207
1208
1209
1210static int resize_hpt_allocate(struct kvm_resize_hpt *resize)
1211{
1212 int rc;
1213
1214 rc = kvmppc_allocate_hpt(&resize->hpt, resize->order);
1215 if (rc < 0)
1216 return rc;
1217
1218 resize_hpt_debug(resize, "resize_hpt_allocate(): HPT @ 0x%lx\n",
1219 resize->hpt.virt);
1220
1221 return 0;
1222}
1223
1224static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
1225 unsigned long idx)
1226{
1227 struct kvm *kvm = resize->kvm;
1228 struct kvm_hpt_info *old = &kvm->arch.hpt;
1229 struct kvm_hpt_info *new = &resize->hpt;
1230 unsigned long old_hash_mask = (1ULL << (old->order - 7)) - 1;
1231 unsigned long new_hash_mask = (1ULL << (new->order - 7)) - 1;
1232 __be64 *hptep, *new_hptep;
1233 unsigned long vpte, rpte, guest_rpte;
1234 int ret;
1235 struct revmap_entry *rev;
1236 unsigned long apsize, avpn, pteg, hash;
1237 unsigned long new_idx, new_pteg, replace_vpte;
1238 int pshift;
1239
1240 hptep = (__be64 *)(old->virt + (idx << 4));
1241
1242
1243
1244
1245 vpte = be64_to_cpu(hptep[0]);
1246 if (!(vpte & HPTE_V_VALID) && !(vpte & HPTE_V_ABSENT))
1247 return 0;
1248
1249 while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
1250 cpu_relax();
1251
1252 vpte = be64_to_cpu(hptep[0]);
1253
1254 ret = 0;
1255 if (!(vpte & HPTE_V_VALID) && !(vpte & HPTE_V_ABSENT))
1256
1257 goto out;
1258
1259 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
1260 rpte = be64_to_cpu(hptep[1]);
1261 vpte = hpte_new_to_old_v(vpte, rpte);
1262 }
1263
1264
1265 rev = &old->rev[idx];
1266 guest_rpte = rev->guest_rpte;
1267
1268 ret = -EIO;
1269 apsize = kvmppc_actual_pgsz(vpte, guest_rpte);
1270 if (!apsize)
1271 goto out;
1272
1273 if (vpte & HPTE_V_VALID) {
1274 unsigned long gfn = hpte_rpn(guest_rpte, apsize);
1275 int srcu_idx = srcu_read_lock(&kvm->srcu);
1276 struct kvm_memory_slot *memslot =
1277 __gfn_to_memslot(kvm_memslots(kvm), gfn);
1278
1279 if (memslot) {
1280 unsigned long *rmapp;
1281 rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
1282
1283 lock_rmap(rmapp);
1284 kvmppc_unmap_hpte(kvm, idx, memslot, rmapp, gfn);
1285 unlock_rmap(rmapp);
1286 }
1287
1288 srcu_read_unlock(&kvm->srcu, srcu_idx);
1289 }
1290
1291
1292 vpte = be64_to_cpu(hptep[0]);
1293 BUG_ON(vpte & HPTE_V_VALID);
1294 BUG_ON(!(vpte & HPTE_V_ABSENT));
1295
1296 ret = 0;
1297 if (!(vpte & HPTE_V_BOLTED))
1298 goto out;
1299
1300 rpte = be64_to_cpu(hptep[1]);
1301
1302 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
1303 vpte = hpte_new_to_old_v(vpte, rpte);
1304 rpte = hpte_new_to_old_r(rpte);
1305 }
1306
1307 pshift = kvmppc_hpte_base_page_shift(vpte, rpte);
1308 avpn = HPTE_V_AVPN_VAL(vpte) & ~(((1ul << pshift) - 1) >> 23);
1309 pteg = idx / HPTES_PER_GROUP;
1310 if (vpte & HPTE_V_SECONDARY)
1311 pteg = ~pteg;
1312
1313 if (!(vpte & HPTE_V_1TB_SEG)) {
1314 unsigned long offset, vsid;
1315
1316
1317 offset = (avpn & 0x1f) << 23;
1318 vsid = avpn >> 5;
1319
1320 if (pshift < 23)
1321 offset |= ((vsid ^ pteg) & old_hash_mask) << pshift;
1322
1323 hash = vsid ^ (offset >> pshift);
1324 } else {
1325 unsigned long offset, vsid;
1326
1327
1328 offset = (avpn & 0x1ffff) << 23;
1329 vsid = avpn >> 17;
1330 if (pshift < 23)
1331 offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) << pshift;
1332
1333 hash = vsid ^ (vsid << 25) ^ (offset >> pshift);
1334 }
1335
1336 new_pteg = hash & new_hash_mask;
1337 if (vpte & HPTE_V_SECONDARY)
1338 new_pteg = ~hash & new_hash_mask;
1339
1340 new_idx = new_pteg * HPTES_PER_GROUP + (idx % HPTES_PER_GROUP);
1341 new_hptep = (__be64 *)(new->virt + (new_idx << 4));
1342
1343 replace_vpte = be64_to_cpu(new_hptep[0]);
1344 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
1345 unsigned long replace_rpte = be64_to_cpu(new_hptep[1]);
1346 replace_vpte = hpte_new_to_old_v(replace_vpte, replace_rpte);
1347 }
1348
1349 if (replace_vpte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
1350 BUG_ON(new->order >= old->order);
1351
1352 if (replace_vpte & HPTE_V_BOLTED) {
1353 if (vpte & HPTE_V_BOLTED)
1354
1355 ret = -ENOSPC;
1356
1357 goto out;
1358 }
1359
1360
1361 }
1362
1363 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
1364 rpte = hpte_old_to_new_r(vpte, rpte);
1365 vpte = hpte_old_to_new_v(vpte);
1366 }
1367
1368 new_hptep[1] = cpu_to_be64(rpte);
1369 new->rev[new_idx].guest_rpte = guest_rpte;
1370
1371 new_hptep[0] = cpu_to_be64(vpte);
1372 unlock_hpte(new_hptep, vpte);
1373
1374out:
1375 unlock_hpte(hptep, vpte);
1376 return ret;
1377}
1378
1379static int resize_hpt_rehash(struct kvm_resize_hpt *resize)
1380{
1381 struct kvm *kvm = resize->kvm;
1382 unsigned long i;
1383 int rc;
1384
1385 for (i = 0; i < kvmppc_hpt_npte(&kvm->arch.hpt); i++) {
1386 rc = resize_hpt_rehash_hpte(resize, i);
1387 if (rc != 0)
1388 return rc;
1389 }
1390
1391 return 0;
1392}
1393
1394static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
1395{
1396 struct kvm *kvm = resize->kvm;
1397 struct kvm_hpt_info hpt_tmp;
1398
1399
1400
1401
1402 resize_hpt_debug(resize, "resize_hpt_pivot()\n");
1403
1404 spin_lock(&kvm->mmu_lock);
1405 asm volatile("ptesync" : : : "memory");
1406
1407 hpt_tmp = kvm->arch.hpt;
1408 kvmppc_set_hpt(kvm, &resize->hpt);
1409 resize->hpt = hpt_tmp;
1410
1411 spin_unlock(&kvm->mmu_lock);
1412
1413 synchronize_srcu_expedited(&kvm->srcu);
1414
1415 if (cpu_has_feature(CPU_FTR_ARCH_300))
1416 kvmppc_setup_partition_table(kvm);
1417
1418 resize_hpt_debug(resize, "resize_hpt_pivot() done\n");
1419}
1420
1421static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
1422{
1423 if (WARN_ON(!mutex_is_locked(&kvm->arch.mmu_setup_lock)))
1424 return;
1425
1426 if (!resize)
1427 return;
1428
1429 if (resize->error != -EBUSY) {
1430 if (resize->hpt.virt)
1431 kvmppc_free_hpt(&resize->hpt);
1432 kfree(resize);
1433 }
1434
1435 if (kvm->arch.resize_hpt == resize)
1436 kvm->arch.resize_hpt = NULL;
1437}
1438
1439static void resize_hpt_prepare_work(struct work_struct *work)
1440{
1441 struct kvm_resize_hpt *resize = container_of(work,
1442 struct kvm_resize_hpt,
1443 work);
1444 struct kvm *kvm = resize->kvm;
1445 int err = 0;
1446
1447 if (WARN_ON(resize->error != -EBUSY))
1448 return;
1449
1450 mutex_lock(&kvm->arch.mmu_setup_lock);
1451
1452
1453 if (kvm->arch.resize_hpt == resize) {
1454
1455
1456
1457 mutex_unlock(&kvm->arch.mmu_setup_lock);
1458
1459 resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
1460 resize->order);
1461
1462 err = resize_hpt_allocate(resize);
1463
1464
1465
1466
1467 if (WARN_ON(err == -EBUSY))
1468 err = -EINPROGRESS;
1469
1470 mutex_lock(&kvm->arch.mmu_setup_lock);
1471
1472
1473
1474 }
1475
1476 resize->error = err;
1477
1478 if (kvm->arch.resize_hpt != resize)
1479 resize_hpt_release(kvm, resize);
1480
1481 mutex_unlock(&kvm->arch.mmu_setup_lock);
1482}
1483
1484long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
1485 struct kvm_ppc_resize_hpt *rhpt)
1486{
1487 unsigned long flags = rhpt->flags;
1488 unsigned long shift = rhpt->shift;
1489 struct kvm_resize_hpt *resize;
1490 int ret;
1491
1492 if (flags != 0 || kvm_is_radix(kvm))
1493 return -EINVAL;
1494
1495 if (shift && ((shift < 18) || (shift > 46)))
1496 return -EINVAL;
1497
1498 mutex_lock(&kvm->arch.mmu_setup_lock);
1499
1500 resize = kvm->arch.resize_hpt;
1501
1502 if (resize) {
1503 if (resize->order == shift) {
1504
1505 ret = resize->error;
1506 if (ret == -EBUSY)
1507 ret = 100;
1508 else if (ret)
1509 resize_hpt_release(kvm, resize);
1510
1511 goto out;
1512 }
1513
1514
1515 resize_hpt_release(kvm, resize);
1516 }
1517
1518 ret = 0;
1519 if (!shift)
1520 goto out;
1521
1522
1523
1524 resize = kzalloc(sizeof(*resize), GFP_KERNEL);
1525 if (!resize) {
1526 ret = -ENOMEM;
1527 goto out;
1528 }
1529
1530 resize->error = -EBUSY;
1531 resize->order = shift;
1532 resize->kvm = kvm;
1533 INIT_WORK(&resize->work, resize_hpt_prepare_work);
1534 kvm->arch.resize_hpt = resize;
1535
1536 schedule_work(&resize->work);
1537
1538 ret = 100;
1539
1540out:
1541 mutex_unlock(&kvm->arch.mmu_setup_lock);
1542 return ret;
1543}
1544
1545static void resize_hpt_boot_vcpu(void *opaque)
1546{
1547
1548}
1549
1550long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
1551 struct kvm_ppc_resize_hpt *rhpt)
1552{
1553 unsigned long flags = rhpt->flags;
1554 unsigned long shift = rhpt->shift;
1555 struct kvm_resize_hpt *resize;
1556 long ret;
1557
1558 if (flags != 0 || kvm_is_radix(kvm))
1559 return -EINVAL;
1560
1561 if (shift && ((shift < 18) || (shift > 46)))
1562 return -EINVAL;
1563
1564 mutex_lock(&kvm->arch.mmu_setup_lock);
1565
1566 resize = kvm->arch.resize_hpt;
1567
1568
1569 ret = -EIO;
1570 if (WARN_ON(!kvm->arch.mmu_ready))
1571 goto out_no_hpt;
1572
1573
1574 kvm->arch.mmu_ready = 0;
1575 smp_mb();
1576
1577
1578
1579 on_each_cpu(resize_hpt_boot_vcpu, NULL, 1);
1580
1581 ret = -ENXIO;
1582 if (!resize || (resize->order != shift))
1583 goto out;
1584
1585 ret = resize->error;
1586 if (ret)
1587 goto out;
1588
1589 ret = resize_hpt_rehash(resize);
1590 if (ret)
1591 goto out;
1592
1593 resize_hpt_pivot(resize);
1594
1595out:
1596
1597 kvm->arch.mmu_ready = 1;
1598 smp_mb();
1599out_no_hpt:
1600 resize_hpt_release(kvm, resize);
1601 mutex_unlock(&kvm->arch.mmu_setup_lock);
1602 return ret;
1603}
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621struct kvm_htab_ctx {
1622 unsigned long index;
1623 unsigned long flags;
1624 struct kvm *kvm;
1625 int first_pass;
1626};
1627
1628#define HPTE_SIZE (2 * sizeof(unsigned long))
1629
1630
1631
1632
1633
1634static int hpte_dirty(struct revmap_entry *revp, __be64 *hptp)
1635{
1636 unsigned long rcbits_unset;
1637
1638 if (revp->guest_rpte & HPTE_GR_MODIFIED)
1639 return 1;
1640
1641
1642 rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
1643 if ((be64_to_cpu(hptp[0]) & HPTE_V_VALID) &&
1644 (be64_to_cpu(hptp[1]) & rcbits_unset))
1645 return 1;
1646
1647 return 0;
1648}
1649
1650static long record_hpte(unsigned long flags, __be64 *hptp,
1651 unsigned long *hpte, struct revmap_entry *revp,
1652 int want_valid, int first_pass)
1653{
1654 unsigned long v, r, hr;
1655 unsigned long rcbits_unset;
1656 int ok = 1;
1657 int valid, dirty;
1658
1659
1660 dirty = hpte_dirty(revp, hptp);
1661 if (!first_pass && !dirty)
1662 return 0;
1663
1664 valid = 0;
1665 if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)) {
1666 valid = 1;
1667 if ((flags & KVM_GET_HTAB_BOLTED_ONLY) &&
1668 !(be64_to_cpu(hptp[0]) & HPTE_V_BOLTED))
1669 valid = 0;
1670 }
1671 if (valid != want_valid)
1672 return 0;
1673
1674 v = r = 0;
1675 if (valid || dirty) {
1676
1677 preempt_disable();
1678 while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
1679 cpu_relax();
1680 v = be64_to_cpu(hptp[0]);
1681 hr = be64_to_cpu(hptp[1]);
1682 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
1683 v = hpte_new_to_old_v(v, hr);
1684 hr = hpte_new_to_old_r(hr);
1685 }
1686
1687
1688 valid = !!(v & HPTE_V_VALID);
1689 dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
1690
1691
1692 rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
1693 if (valid && (rcbits_unset & hr)) {
1694 revp->guest_rpte |= (hr &
1695 (HPTE_R_R | HPTE_R_C)) | HPTE_GR_MODIFIED;
1696 dirty = 1;
1697 }
1698
1699 if (v & HPTE_V_ABSENT) {
1700 v &= ~HPTE_V_ABSENT;
1701 v |= HPTE_V_VALID;
1702 valid = 1;
1703 }
1704 if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED))
1705 valid = 0;
1706
1707 r = revp->guest_rpte;
1708
1709 if (valid == want_valid && dirty) {
1710 r &= ~HPTE_GR_MODIFIED;
1711 revp->guest_rpte = r;
1712 }
1713 unlock_hpte(hptp, be64_to_cpu(hptp[0]));
1714 preempt_enable();
1715 if (!(valid == want_valid && (first_pass || dirty)))
1716 ok = 0;
1717 }
1718 hpte[0] = cpu_to_be64(v);
1719 hpte[1] = cpu_to_be64(r);
1720 return ok;
1721}
1722
1723static ssize_t kvm_htab_read(struct file *file, char __user *buf,
1724 size_t count, loff_t *ppos)
1725{
1726 struct kvm_htab_ctx *ctx = file->private_data;
1727 struct kvm *kvm = ctx->kvm;
1728 struct kvm_get_htab_header hdr;
1729 __be64 *hptp;
1730 struct revmap_entry *revp;
1731 unsigned long i, nb, nw;
1732 unsigned long __user *lbuf;
1733 struct kvm_get_htab_header __user *hptr;
1734 unsigned long flags;
1735 int first_pass;
1736 unsigned long hpte[2];
1737
1738 if (!access_ok(buf, count))
1739 return -EFAULT;
1740 if (kvm_is_radix(kvm))
1741 return 0;
1742
1743 first_pass = ctx->first_pass;
1744 flags = ctx->flags;
1745
1746 i = ctx->index;
1747 hptp = (__be64 *)(kvm->arch.hpt.virt + (i * HPTE_SIZE));
1748 revp = kvm->arch.hpt.rev + i;
1749 lbuf = (unsigned long __user *)buf;
1750
1751 nb = 0;
1752 while (nb + sizeof(hdr) + HPTE_SIZE < count) {
1753
1754 hptr = (struct kvm_get_htab_header __user *)buf;
1755 hdr.n_valid = 0;
1756 hdr.n_invalid = 0;
1757 nw = nb;
1758 nb += sizeof(hdr);
1759 lbuf = (unsigned long __user *)(buf + sizeof(hdr));
1760
1761
1762 if (!first_pass) {
1763 while (i < kvmppc_hpt_npte(&kvm->arch.hpt) &&
1764 !hpte_dirty(revp, hptp)) {
1765 ++i;
1766 hptp += 2;
1767 ++revp;
1768 }
1769 }
1770 hdr.index = i;
1771
1772
1773 while (i < kvmppc_hpt_npte(&kvm->arch.hpt) &&
1774 hdr.n_valid < 0xffff &&
1775 nb + HPTE_SIZE < count &&
1776 record_hpte(flags, hptp, hpte, revp, 1, first_pass)) {
1777
1778 ++hdr.n_valid;
1779 if (__put_user(hpte[0], lbuf) ||
1780 __put_user(hpte[1], lbuf + 1))
1781 return -EFAULT;
1782 nb += HPTE_SIZE;
1783 lbuf += 2;
1784 ++i;
1785 hptp += 2;
1786 ++revp;
1787 }
1788
1789 while (i < kvmppc_hpt_npte(&kvm->arch.hpt) &&
1790 hdr.n_invalid < 0xffff &&
1791 record_hpte(flags, hptp, hpte, revp, 0, first_pass)) {
1792
1793 ++hdr.n_invalid;
1794 ++i;
1795 hptp += 2;
1796 ++revp;
1797 }
1798
1799 if (hdr.n_valid || hdr.n_invalid) {
1800
1801 if (__copy_to_user(hptr, &hdr, sizeof(hdr)))
1802 return -EFAULT;
1803 nw = nb;
1804 buf = (char __user *)lbuf;
1805 } else {
1806 nb = nw;
1807 }
1808
1809
1810 if (i >= kvmppc_hpt_npte(&kvm->arch.hpt)) {
1811 i = 0;
1812 ctx->first_pass = 0;
1813 break;
1814 }
1815 }
1816
1817 ctx->index = i;
1818
1819 return nb;
1820}
1821
1822static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
1823 size_t count, loff_t *ppos)
1824{
1825 struct kvm_htab_ctx *ctx = file->private_data;
1826 struct kvm *kvm = ctx->kvm;
1827 struct kvm_get_htab_header hdr;
1828 unsigned long i, j;
1829 unsigned long v, r;
1830 unsigned long __user *lbuf;
1831 __be64 *hptp;
1832 unsigned long tmp[2];
1833 ssize_t nb;
1834 long int err, ret;
1835 int mmu_ready;
1836 int pshift;
1837
1838 if (!access_ok(buf, count))
1839 return -EFAULT;
1840 if (kvm_is_radix(kvm))
1841 return -EINVAL;
1842
1843
1844 mutex_lock(&kvm->arch.mmu_setup_lock);
1845 mmu_ready = kvm->arch.mmu_ready;
1846 if (mmu_ready) {
1847 kvm->arch.mmu_ready = 0;
1848
1849 smp_mb();
1850 if (atomic_read(&kvm->arch.vcpus_running)) {
1851 kvm->arch.mmu_ready = 1;
1852 mutex_unlock(&kvm->arch.mmu_setup_lock);
1853 return -EBUSY;
1854 }
1855 }
1856
1857 err = 0;
1858 for (nb = 0; nb + sizeof(hdr) <= count; ) {
1859 err = -EFAULT;
1860 if (__copy_from_user(&hdr, buf, sizeof(hdr)))
1861 break;
1862
1863 err = 0;
1864 if (nb + hdr.n_valid * HPTE_SIZE > count)
1865 break;
1866
1867 nb += sizeof(hdr);
1868 buf += sizeof(hdr);
1869
1870 err = -EINVAL;
1871 i = hdr.index;
1872 if (i >= kvmppc_hpt_npte(&kvm->arch.hpt) ||
1873 i + hdr.n_valid + hdr.n_invalid > kvmppc_hpt_npte(&kvm->arch.hpt))
1874 break;
1875
1876 hptp = (__be64 *)(kvm->arch.hpt.virt + (i * HPTE_SIZE));
1877 lbuf = (unsigned long __user *)buf;
1878 for (j = 0; j < hdr.n_valid; ++j) {
1879 __be64 hpte_v;
1880 __be64 hpte_r;
1881
1882 err = -EFAULT;
1883 if (__get_user(hpte_v, lbuf) ||
1884 __get_user(hpte_r, lbuf + 1))
1885 goto out;
1886 v = be64_to_cpu(hpte_v);
1887 r = be64_to_cpu(hpte_r);
1888 err = -EINVAL;
1889 if (!(v & HPTE_V_VALID))
1890 goto out;
1891 pshift = kvmppc_hpte_base_page_shift(v, r);
1892 if (pshift <= 0)
1893 goto out;
1894 lbuf += 2;
1895 nb += HPTE_SIZE;
1896
1897 if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT))
1898 kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
1899 err = -EIO;
1900 ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r,
1901 tmp);
1902 if (ret != H_SUCCESS) {
1903 pr_err("kvm_htab_write ret %ld i=%ld v=%lx "
1904 "r=%lx\n", ret, i, v, r);
1905 goto out;
1906 }
1907 if (!mmu_ready && is_vrma_hpte(v)) {
1908 unsigned long senc, lpcr;
1909
1910 senc = slb_pgsize_encoding(1ul << pshift);
1911 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
1912 (VRMA_VSID << SLB_VSID_SHIFT_1T);
1913 if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
1914 lpcr = senc << (LPCR_VRMASD_SH - 4);
1915 kvmppc_update_lpcr(kvm, lpcr,
1916 LPCR_VRMASD);
1917 } else {
1918 kvmppc_setup_partition_table(kvm);
1919 }
1920 mmu_ready = 1;
1921 }
1922 ++i;
1923 hptp += 2;
1924 }
1925
1926 for (j = 0; j < hdr.n_invalid; ++j) {
1927 if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT))
1928 kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
1929 ++i;
1930 hptp += 2;
1931 }
1932 err = 0;
1933 }
1934
1935 out:
1936
1937 smp_wmb();
1938 kvm->arch.mmu_ready = mmu_ready;
1939 mutex_unlock(&kvm->arch.mmu_setup_lock);
1940
1941 if (err)
1942 return err;
1943 return nb;
1944}
1945
1946static int kvm_htab_release(struct inode *inode, struct file *filp)
1947{
1948 struct kvm_htab_ctx *ctx = filp->private_data;
1949
1950 filp->private_data = NULL;
1951 if (!(ctx->flags & KVM_GET_HTAB_WRITE))
1952 atomic_dec(&ctx->kvm->arch.hpte_mod_interest);
1953 kvm_put_kvm(ctx->kvm);
1954 kfree(ctx);
1955 return 0;
1956}
1957
1958static const struct file_operations kvm_htab_fops = {
1959 .read = kvm_htab_read,
1960 .write = kvm_htab_write,
1961 .llseek = default_llseek,
1962 .release = kvm_htab_release,
1963};
1964
1965int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
1966{
1967 int ret;
1968 struct kvm_htab_ctx *ctx;
1969 int rwflag;
1970
1971
1972 if (ghf->flags & ~(KVM_GET_HTAB_BOLTED_ONLY | KVM_GET_HTAB_WRITE))
1973 return -EINVAL;
1974 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
1975 if (!ctx)
1976 return -ENOMEM;
1977 kvm_get_kvm(kvm);
1978 ctx->kvm = kvm;
1979 ctx->index = ghf->start_index;
1980 ctx->flags = ghf->flags;
1981 ctx->first_pass = 1;
1982
1983 rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY;
1984 ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC);
1985 if (ret < 0) {
1986 kfree(ctx);
1987 kvm_put_kvm_no_destroy(kvm);
1988 return ret;
1989 }
1990
1991 if (rwflag == O_RDONLY) {
1992 mutex_lock(&kvm->slots_lock);
1993 atomic_inc(&kvm->arch.hpte_mod_interest);
1994
1995 synchronize_srcu_expedited(&kvm->srcu);
1996 mutex_unlock(&kvm->slots_lock);
1997 }
1998
1999 return ret;
2000}
2001
2002struct debugfs_htab_state {
2003 struct kvm *kvm;
2004 struct mutex mutex;
2005 unsigned long hpt_index;
2006 int chars_left;
2007 int buf_index;
2008 char buf[64];
2009};
2010
2011static int debugfs_htab_open(struct inode *inode, struct file *file)
2012{
2013 struct kvm *kvm = inode->i_private;
2014 struct debugfs_htab_state *p;
2015
2016 p = kzalloc(sizeof(*p), GFP_KERNEL);
2017 if (!p)
2018 return -ENOMEM;
2019
2020 kvm_get_kvm(kvm);
2021 p->kvm = kvm;
2022 mutex_init(&p->mutex);
2023 file->private_data = p;
2024
2025 return nonseekable_open(inode, file);
2026}
2027
2028static int debugfs_htab_release(struct inode *inode, struct file *file)
2029{
2030 struct debugfs_htab_state *p = file->private_data;
2031
2032 kvm_put_kvm(p->kvm);
2033 kfree(p);
2034 return 0;
2035}
2036
2037static ssize_t debugfs_htab_read(struct file *file, char __user *buf,
2038 size_t len, loff_t *ppos)
2039{
2040 struct debugfs_htab_state *p = file->private_data;
2041 ssize_t ret, r;
2042 unsigned long i, n;
2043 unsigned long v, hr, gr;
2044 struct kvm *kvm;
2045 __be64 *hptp;
2046
2047 kvm = p->kvm;
2048 if (kvm_is_radix(kvm))
2049 return 0;
2050
2051 ret = mutex_lock_interruptible(&p->mutex);
2052 if (ret)
2053 return ret;
2054
2055 if (p->chars_left) {
2056 n = p->chars_left;
2057 if (n > len)
2058 n = len;
2059 r = copy_to_user(buf, p->buf + p->buf_index, n);
2060 n -= r;
2061 p->chars_left -= n;
2062 p->buf_index += n;
2063 buf += n;
2064 len -= n;
2065 ret = n;
2066 if (r) {
2067 if (!n)
2068 ret = -EFAULT;
2069 goto out;
2070 }
2071 }
2072
2073 i = p->hpt_index;
2074 hptp = (__be64 *)(kvm->arch.hpt.virt + (i * HPTE_SIZE));
2075 for (; len != 0 && i < kvmppc_hpt_npte(&kvm->arch.hpt);
2076 ++i, hptp += 2) {
2077 if (!(be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)))
2078 continue;
2079
2080
2081 preempt_disable();
2082 while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
2083 cpu_relax();
2084 v = be64_to_cpu(hptp[0]) & ~HPTE_V_HVLOCK;
2085 hr = be64_to_cpu(hptp[1]);
2086 gr = kvm->arch.hpt.rev[i].guest_rpte;
2087 unlock_hpte(hptp, v);
2088 preempt_enable();
2089
2090 if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
2091 continue;
2092
2093 n = scnprintf(p->buf, sizeof(p->buf),
2094 "%6lx %.16lx %.16lx %.16lx\n",
2095 i, v, hr, gr);
2096 p->chars_left = n;
2097 if (n > len)
2098 n = len;
2099 r = copy_to_user(buf, p->buf, n);
2100 n -= r;
2101 p->chars_left -= n;
2102 p->buf_index = n;
2103 buf += n;
2104 len -= n;
2105 ret += n;
2106 if (r) {
2107 if (!ret)
2108 ret = -EFAULT;
2109 goto out;
2110 }
2111 }
2112 p->hpt_index = i;
2113
2114 out:
2115 mutex_unlock(&p->mutex);
2116 return ret;
2117}
2118
2119static ssize_t debugfs_htab_write(struct file *file, const char __user *buf,
2120 size_t len, loff_t *ppos)
2121{
2122 return -EACCES;
2123}
2124
2125static const struct file_operations debugfs_htab_fops = {
2126 .owner = THIS_MODULE,
2127 .open = debugfs_htab_open,
2128 .release = debugfs_htab_release,
2129 .read = debugfs_htab_read,
2130 .write = debugfs_htab_write,
2131 .llseek = generic_file_llseek,
2132};
2133
2134void kvmppc_mmu_debugfs_init(struct kvm *kvm)
2135{
2136 debugfs_create_file("htab", 0400, kvm->arch.debugfs_dir, kvm,
2137 &debugfs_htab_fops);
2138}
2139
2140void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
2141{
2142 struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
2143
2144 vcpu->arch.slb_nr = 32;
2145
2146 mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
2147
2148 vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;
2149}
2150