1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18#include <linux/types.h>
19#include <linux/string.h>
20#include <linux/kvm.h>
21#include <linux/kvm_host.h>
22#include <linux/highmem.h>
23#include <linux/gfp.h>
24#include <linux/slab.h>
25#include <linux/hugetlb.h>
26#include <linux/vmalloc.h>
27#include <linux/srcu.h>
28#include <linux/anon_inodes.h>
29#include <linux/file.h>
30#include <linux/debugfs.h>
31
32#include <asm/tlbflush.h>
33#include <asm/kvm_ppc.h>
34#include <asm/kvm_book3s.h>
35#include <asm/book3s/64/mmu-hash.h>
36#include <asm/hvcall.h>
37#include <asm/synch.h>
38#include <asm/ppc-opcode.h>
39#include <asm/cputable.h>
40#include <asm/pte-walk.h>
41
42#include "trace_hv.h"
43
44
45
46#ifdef DEBUG_RESIZE_HPT
47#define resize_hpt_debug(resize, ...) \
48 do { \
49 printk(KERN_DEBUG "RESIZE HPT %p: ", resize); \
50 printk(__VA_ARGS__); \
51 } while (0)
52#else
53#define resize_hpt_debug(resize, ...) \
54 do { } while (0)
55#endif
56
57static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
58 long pte_index, unsigned long pteh,
59 unsigned long ptel, unsigned long *pte_idx_ret);
60
61struct kvm_resize_hpt {
62
63 struct kvm *kvm;
64 struct work_struct work;
65 u32 order;
66
67
68 int error;
69 bool prepare_done;
70
71
72
73 struct kvm_hpt_info hpt;
74};
75
76static void kvmppc_rmap_reset(struct kvm *kvm);
77
78int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order)
79{
80 unsigned long hpt = 0;
81 int cma = 0;
82 struct page *page = NULL;
83 struct revmap_entry *rev;
84 unsigned long npte;
85
86 if ((order < PPC_MIN_HPT_ORDER) || (order > PPC_MAX_HPT_ORDER))
87 return -EINVAL;
88
89 page = kvm_alloc_hpt_cma(1ul << (order - PAGE_SHIFT));
90 if (page) {
91 hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
92 memset((void *)hpt, 0, (1ul << order));
93 cma = 1;
94 }
95
96 if (!hpt)
97 hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_RETRY_MAYFAIL
98 |__GFP_NOWARN, order - PAGE_SHIFT);
99
100 if (!hpt)
101 return -ENOMEM;
102
103
104 npte = 1ul << (order - 4);
105
106
107 rev = vmalloc(sizeof(struct revmap_entry) * npte);
108 if (!rev) {
109 pr_err("kvmppc_allocate_hpt: Couldn't alloc reverse map array\n");
110 if (cma)
111 kvm_free_hpt_cma(page, 1 << (order - PAGE_SHIFT));
112 else
113 free_pages(hpt, order - PAGE_SHIFT);
114 return -ENOMEM;
115 }
116
117 info->order = order;
118 info->virt = hpt;
119 info->cma = cma;
120 info->rev = rev;
121
122 return 0;
123}
124
125void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info)
126{
127 atomic64_set(&kvm->arch.mmio_update, 0);
128 kvm->arch.hpt = *info;
129 kvm->arch.sdr1 = __pa(info->virt) | (info->order - 18);
130
131 pr_debug("KVM guest htab at %lx (order %ld), LPID %x\n",
132 info->virt, (long)info->order, kvm->arch.lpid);
133}
134
135long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
136{
137 long err = -EBUSY;
138 struct kvm_hpt_info info;
139
140 if (kvm_is_radix(kvm))
141 return -EINVAL;
142
143 mutex_lock(&kvm->lock);
144 if (kvm->arch.hpte_setup_done) {
145 kvm->arch.hpte_setup_done = 0;
146
147 smp_mb();
148 if (atomic_read(&kvm->arch.vcpus_running)) {
149 kvm->arch.hpte_setup_done = 1;
150 goto out;
151 }
152 }
153 if (kvm->arch.hpt.order == order) {
154
155
156
157 memset((void *)kvm->arch.hpt.virt, 0, 1ul << order);
158
159
160
161 kvmppc_rmap_reset(kvm);
162
163 cpumask_setall(&kvm->arch.need_tlb_flush);
164 err = 0;
165 goto out;
166 }
167
168 if (kvm->arch.hpt.virt) {
169 kvmppc_free_hpt(&kvm->arch.hpt);
170 kvmppc_rmap_reset(kvm);
171 }
172
173 err = kvmppc_allocate_hpt(&info, order);
174 if (err < 0)
175 goto out;
176 kvmppc_set_hpt(kvm, &info);
177
178out:
179 mutex_unlock(&kvm->lock);
180 return err;
181}
182
183void kvmppc_free_hpt(struct kvm_hpt_info *info)
184{
185 vfree(info->rev);
186 if (info->cma)
187 kvm_free_hpt_cma(virt_to_page(info->virt),
188 1 << (info->order - PAGE_SHIFT));
189 else if (info->virt)
190 free_pages(info->virt, info->order - PAGE_SHIFT);
191 info->virt = 0;
192 info->order = 0;
193}
194
195
196static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize)
197{
198 return (pgsize > 0x1000) ? HPTE_V_LARGE : 0;
199}
200
201
202static inline unsigned long hpte1_pgsize_encoding(unsigned long pgsize)
203{
204 return (pgsize == 0x10000) ? 0x1000 : 0;
205}
206
207void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
208 unsigned long porder)
209{
210 unsigned long i;
211 unsigned long npages;
212 unsigned long hp_v, hp_r;
213 unsigned long addr, hash;
214 unsigned long psize;
215 unsigned long hp0, hp1;
216 unsigned long idx_ret;
217 long ret;
218 struct kvm *kvm = vcpu->kvm;
219
220 psize = 1ul << porder;
221 npages = memslot->npages >> (porder - PAGE_SHIFT);
222
223
224 if (npages > 1ul << (40 - porder))
225 npages = 1ul << (40 - porder);
226
227 if (npages > kvmppc_hpt_mask(&kvm->arch.hpt) + 1)
228 npages = kvmppc_hpt_mask(&kvm->arch.hpt) + 1;
229
230 hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
231 HPTE_V_BOLTED | hpte0_pgsize_encoding(psize);
232 hp1 = hpte1_pgsize_encoding(psize) |
233 HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX;
234
235 for (i = 0; i < npages; ++i) {
236 addr = i << porder;
237
238 hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25)))
239 & kvmppc_hpt_mask(&kvm->arch.hpt);
240
241
242
243
244
245
246 hash = (hash << 3) + 7;
247 hp_v = hp0 | ((addr >> 16) & ~0x7fUL);
248 hp_r = hp1 | addr;
249 ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, hash, hp_v, hp_r,
250 &idx_ret);
251 if (ret != H_SUCCESS) {
252 pr_err("KVM: map_vrma at %lx failed, ret=%ld\n",
253 addr, ret);
254 break;
255 }
256 }
257}
258
259int kvmppc_mmu_hv_init(void)
260{
261 unsigned long host_lpid, rsvd_lpid;
262
263 if (!cpu_has_feature(CPU_FTR_HVMODE))
264 return -EINVAL;
265
266
267 host_lpid = mfspr(SPRN_LPID);
268 rsvd_lpid = LPID_RSVD;
269
270 kvmppc_init_lpid(rsvd_lpid + 1);
271
272 kvmppc_claim_lpid(host_lpid);
273
274 kvmppc_claim_lpid(rsvd_lpid);
275
276 return 0;
277}
278
279static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
280{
281 unsigned long msr = vcpu->arch.intr_msr;
282
283
284 if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr))
285 msr |= MSR_TS_S;
286 else
287 msr |= vcpu->arch.shregs.msr & MSR_TS_MASK;
288 kvmppc_set_msr(vcpu, msr);
289}
290
291static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
292 long pte_index, unsigned long pteh,
293 unsigned long ptel, unsigned long *pte_idx_ret)
294{
295 long ret;
296
297
298 rcu_read_lock_sched();
299 ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel,
300 current->mm->pgd, false, pte_idx_ret);
301 rcu_read_unlock_sched();
302 if (ret == H_TOO_HARD) {
303
304 pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n");
305 ret = H_RESOURCE;
306 }
307 return ret;
308
309}
310
311static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu,
312 gva_t eaddr)
313{
314 u64 mask;
315 int i;
316
317 for (i = 0; i < vcpu->arch.slb_nr; i++) {
318 if (!(vcpu->arch.slb[i].orige & SLB_ESID_V))
319 continue;
320
321 if (vcpu->arch.slb[i].origv & SLB_VSID_B_1T)
322 mask = ESID_MASK_1T;
323 else
324 mask = ESID_MASK;
325
326 if (((vcpu->arch.slb[i].orige ^ eaddr) & mask) == 0)
327 return &vcpu->arch.slb[i];
328 }
329 return NULL;
330}
331
332static unsigned long kvmppc_mmu_get_real_addr(unsigned long v, unsigned long r,
333 unsigned long ea)
334{
335 unsigned long ra_mask;
336
337 ra_mask = hpte_page_size(v, r) - 1;
338 return (r & HPTE_R_RPN & ~ra_mask) | (ea & ra_mask);
339}
340
341static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
342 struct kvmppc_pte *gpte, bool data, bool iswrite)
343{
344 struct kvm *kvm = vcpu->kvm;
345 struct kvmppc_slb *slbe;
346 unsigned long slb_v;
347 unsigned long pp, key;
348 unsigned long v, orig_v, gr;
349 __be64 *hptep;
350 int index;
351 int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR);
352
353
354 if (virtmode) {
355 slbe = kvmppc_mmu_book3s_hv_find_slbe(vcpu, eaddr);
356 if (!slbe)
357 return -EINVAL;
358 slb_v = slbe->origv;
359 } else {
360
361 slb_v = vcpu->kvm->arch.vrma_slb_v;
362 }
363
364 preempt_disable();
365
366 index = kvmppc_hv_find_lock_hpte(kvm, eaddr, slb_v,
367 HPTE_V_VALID | HPTE_V_ABSENT);
368 if (index < 0) {
369 preempt_enable();
370 return -ENOENT;
371 }
372 hptep = (__be64 *)(kvm->arch.hpt.virt + (index << 4));
373 v = orig_v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
374 if (cpu_has_feature(CPU_FTR_ARCH_300))
375 v = hpte_new_to_old_v(v, be64_to_cpu(hptep[1]));
376 gr = kvm->arch.hpt.rev[index].guest_rpte;
377
378 unlock_hpte(hptep, orig_v);
379 preempt_enable();
380
381 gpte->eaddr = eaddr;
382 gpte->vpage = ((v & HPTE_V_AVPN) << 4) | ((eaddr >> 12) & 0xfff);
383
384
385 pp = gr & (HPTE_R_PP0 | HPTE_R_PP);
386 key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
387 key &= slb_v;
388
389
390 gpte->may_read = hpte_read_permission(pp, key);
391 gpte->may_write = hpte_write_permission(pp, key);
392 gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G));
393
394
395 if (data && virtmode) {
396 int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr);
397 if (amrfield & 1)
398 gpte->may_read = 0;
399 if (amrfield & 2)
400 gpte->may_write = 0;
401 }
402
403
404 gpte->raddr = kvmppc_mmu_get_real_addr(v, gr, eaddr);
405 return 0;
406}
407
408
409
410
411
412
413
414
415
416static int instruction_is_store(unsigned int instr)
417{
418 unsigned int mask;
419
420 mask = 0x10000000;
421 if ((instr & 0xfc000000) == 0x7c000000)
422 mask = 0x100;
423 return (instr & mask) != 0;
424}
425
426int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
427 unsigned long gpa, gva_t ea, int is_store)
428{
429 u32 last_inst;
430
431
432
433
434 if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) !=
435 EMULATE_DONE)
436 return RESUME_GUEST;
437
438
439
440
441
442
443
444
445
446
447
448
449
450 if (instruction_is_store(last_inst) != !!is_store)
451 return RESUME_GUEST;
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466 vcpu->arch.paddr_accessed = gpa;
467 vcpu->arch.vaddr_accessed = ea;
468 return kvmppc_emulate_mmio(run, vcpu);
469}
470
471int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
472 unsigned long ea, unsigned long dsisr)
473{
474 struct kvm *kvm = vcpu->kvm;
475 unsigned long hpte[3], r;
476 unsigned long hnow_v, hnow_r;
477 __be64 *hptep;
478 unsigned long mmu_seq, psize, pte_size;
479 unsigned long gpa_base, gfn_base;
480 unsigned long gpa, gfn, hva, pfn;
481 struct kvm_memory_slot *memslot;
482 unsigned long *rmap;
483 struct revmap_entry *rev;
484 struct page *page, *pages[1];
485 long index, ret, npages;
486 bool is_ci;
487 unsigned int writing, write_ok;
488 struct vm_area_struct *vma;
489 unsigned long rcbits;
490 long mmio_update;
491
492 if (kvm_is_radix(kvm))
493 return kvmppc_book3s_radix_page_fault(run, vcpu, ea, dsisr);
494
495
496
497
498
499
500
501 if (ea != vcpu->arch.pgfault_addr)
502 return RESUME_GUEST;
503
504 if (vcpu->arch.pgfault_cache) {
505 mmio_update = atomic64_read(&kvm->arch.mmio_update);
506 if (mmio_update == vcpu->arch.pgfault_cache->mmio_update) {
507 r = vcpu->arch.pgfault_cache->rpte;
508 psize = hpte_page_size(vcpu->arch.pgfault_hpte[0], r);
509 gpa_base = r & HPTE_R_RPN & ~(psize - 1);
510 gfn_base = gpa_base >> PAGE_SHIFT;
511 gpa = gpa_base | (ea & (psize - 1));
512 return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
513 dsisr & DSISR_ISSTORE);
514 }
515 }
516 index = vcpu->arch.pgfault_index;
517 hptep = (__be64 *)(kvm->arch.hpt.virt + (index << 4));
518 rev = &kvm->arch.hpt.rev[index];
519 preempt_disable();
520 while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
521 cpu_relax();
522 hpte[0] = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
523 hpte[1] = be64_to_cpu(hptep[1]);
524 hpte[2] = r = rev->guest_rpte;
525 unlock_hpte(hptep, hpte[0]);
526 preempt_enable();
527
528 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
529 hpte[0] = hpte_new_to_old_v(hpte[0], hpte[1]);
530 hpte[1] = hpte_new_to_old_r(hpte[1]);
531 }
532 if (hpte[0] != vcpu->arch.pgfault_hpte[0] ||
533 hpte[1] != vcpu->arch.pgfault_hpte[1])
534 return RESUME_GUEST;
535
536
537 psize = hpte_page_size(hpte[0], r);
538 gpa_base = r & HPTE_R_RPN & ~(psize - 1);
539 gfn_base = gpa_base >> PAGE_SHIFT;
540 gpa = gpa_base | (ea & (psize - 1));
541 gfn = gpa >> PAGE_SHIFT;
542 memslot = gfn_to_memslot(kvm, gfn);
543
544 trace_kvm_page_fault_enter(vcpu, hpte, memslot, ea, dsisr);
545
546
547 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
548 return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
549 dsisr & DSISR_ISSTORE);
550
551
552
553
554
555 if (gfn_base < memslot->base_gfn)
556 return -EFAULT;
557
558
559 mmu_seq = kvm->mmu_notifier_seq;
560 smp_rmb();
561
562 ret = -EFAULT;
563 is_ci = false;
564 pfn = 0;
565 page = NULL;
566 pte_size = PAGE_SIZE;
567 writing = (dsisr & DSISR_ISSTORE) != 0;
568
569 write_ok = writing;
570 hva = gfn_to_hva_memslot(memslot, gfn);
571 npages = get_user_pages_fast(hva, 1, writing, pages);
572 if (npages < 1) {
573
574 down_read(¤t->mm->mmap_sem);
575 vma = find_vma(current->mm, hva);
576 if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end &&
577 (vma->vm_flags & VM_PFNMAP)) {
578 pfn = vma->vm_pgoff +
579 ((hva - vma->vm_start) >> PAGE_SHIFT);
580 pte_size = psize;
581 is_ci = pte_ci(__pte((pgprot_val(vma->vm_page_prot))));
582 write_ok = vma->vm_flags & VM_WRITE;
583 }
584 up_read(¤t->mm->mmap_sem);
585 if (!pfn)
586 goto out_put;
587 } else {
588 page = pages[0];
589 pfn = page_to_pfn(page);
590 if (PageHuge(page)) {
591 page = compound_head(page);
592 pte_size <<= compound_order(page);
593 }
594
595 if (!writing && hpte_is_writable(r)) {
596 pte_t *ptep, pte;
597 unsigned long flags;
598
599
600
601
602 local_irq_save(flags);
603 ptep = find_current_mm_pte(current->mm->pgd,
604 hva, NULL, NULL);
605 if (ptep) {
606 pte = kvmppc_read_update_linux_pte(ptep, 1);
607 if (__pte_write(pte))
608 write_ok = 1;
609 }
610 local_irq_restore(flags);
611 }
612 }
613
614 if (psize > pte_size)
615 goto out_put;
616
617
618 if (!hpte_cache_flags_ok(r, is_ci)) {
619 if (is_ci)
620 goto out_put;
621
622
623
624
625 r = (r & ~(HPTE_R_W|HPTE_R_I|HPTE_R_G)) | HPTE_R_M;
626 }
627
628
629
630
631
632
633 if (psize < PAGE_SIZE)
634 psize = PAGE_SIZE;
635 r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) |
636 ((pfn << PAGE_SHIFT) & ~(psize - 1));
637 if (hpte_is_writable(r) && !write_ok)
638 r = hpte_make_readonly(r);
639 ret = RESUME_GUEST;
640 preempt_disable();
641 while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
642 cpu_relax();
643 hnow_v = be64_to_cpu(hptep[0]);
644 hnow_r = be64_to_cpu(hptep[1]);
645 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
646 hnow_v = hpte_new_to_old_v(hnow_v, hnow_r);
647 hnow_r = hpte_new_to_old_r(hnow_r);
648 }
649
650
651
652
653
654
655
656 if (!kvm->arch.hpte_setup_done)
657 goto out_unlock;
658
659 if ((hnow_v & ~HPTE_V_HVLOCK) != hpte[0] || hnow_r != hpte[1] ||
660 rev->guest_rpte != hpte[2])
661
662 goto out_unlock;
663 hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
664
665
666 rmap = &memslot->arch.rmap[gfn_base - memslot->base_gfn];
667 lock_rmap(rmap);
668
669
670 ret = RESUME_GUEST;
671 if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) {
672 unlock_rmap(rmap);
673 goto out_unlock;
674 }
675
676
677 rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
678 r &= rcbits | ~(HPTE_R_R | HPTE_R_C);
679
680 if (be64_to_cpu(hptep[0]) & HPTE_V_VALID) {
681
682 unlock_rmap(rmap);
683 hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
684 kvmppc_invalidate_hpte(kvm, hptep, index);
685
686 r |= be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
687 } else {
688 kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
689 }
690
691 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
692 r = hpte_old_to_new_r(hpte[0], r);
693 hpte[0] = hpte_old_to_new_v(hpte[0]);
694 }
695 hptep[1] = cpu_to_be64(r);
696 eieio();
697 __unlock_hpte(hptep, hpte[0]);
698 asm volatile("ptesync" : : : "memory");
699 preempt_enable();
700 if (page && hpte_is_writable(r))
701 SetPageDirty(page);
702
703 out_put:
704 trace_kvm_page_fault_exit(vcpu, hpte, ret);
705
706 if (page) {
707
708
709
710
711
712
713 put_page(pages[0]);
714 }
715 return ret;
716
717 out_unlock:
718 __unlock_hpte(hptep, be64_to_cpu(hptep[0]));
719 preempt_enable();
720 goto out_put;
721}
722
723static void kvmppc_rmap_reset(struct kvm *kvm)
724{
725 struct kvm_memslots *slots;
726 struct kvm_memory_slot *memslot;
727 int srcu_idx;
728
729 srcu_idx = srcu_read_lock(&kvm->srcu);
730 slots = kvm_memslots(kvm);
731 kvm_for_each_memslot(memslot, slots) {
732
733
734
735
736 memset(memslot->arch.rmap, 0,
737 memslot->npages * sizeof(*memslot->arch.rmap));
738 }
739 srcu_read_unlock(&kvm->srcu, srcu_idx);
740}
741
742typedef int (*hva_handler_fn)(struct kvm *kvm, struct kvm_memory_slot *memslot,
743 unsigned long gfn);
744
745static int kvm_handle_hva_range(struct kvm *kvm,
746 unsigned long start,
747 unsigned long end,
748 hva_handler_fn handler)
749{
750 int ret;
751 int retval = 0;
752 struct kvm_memslots *slots;
753 struct kvm_memory_slot *memslot;
754
755 slots = kvm_memslots(kvm);
756 kvm_for_each_memslot(memslot, slots) {
757 unsigned long hva_start, hva_end;
758 gfn_t gfn, gfn_end;
759
760 hva_start = max(start, memslot->userspace_addr);
761 hva_end = min(end, memslot->userspace_addr +
762 (memslot->npages << PAGE_SHIFT));
763 if (hva_start >= hva_end)
764 continue;
765
766
767
768
769 gfn = hva_to_gfn_memslot(hva_start, memslot);
770 gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
771
772 for (; gfn < gfn_end; ++gfn) {
773 ret = handler(kvm, memslot, gfn);
774 retval |= ret;
775 }
776 }
777
778 return retval;
779}
780
781static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
782 hva_handler_fn handler)
783{
784 return kvm_handle_hva_range(kvm, hva, hva + 1, handler);
785}
786
787
788static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i,
789 unsigned long *rmapp, unsigned long gfn)
790{
791 __be64 *hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
792 struct revmap_entry *rev = kvm->arch.hpt.rev;
793 unsigned long j, h;
794 unsigned long ptel, psize, rcbits;
795
796 j = rev[i].forw;
797 if (j == i) {
798
799 *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
800 } else {
801
802 h = rev[i].back;
803 rev[h].forw = j;
804 rev[j].back = h;
805 rev[i].forw = rev[i].back = i;
806 *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j;
807 }
808
809
810 ptel = rev[i].guest_rpte;
811 psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel);
812 if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
813 hpte_rpn(ptel, psize) == gfn) {
814 hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
815 kvmppc_invalidate_hpte(kvm, hptep, i);
816 hptep[1] &= ~cpu_to_be64(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
817
818 rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
819 *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
820 if (rcbits & HPTE_R_C)
821 kvmppc_update_rmap_change(rmapp, psize);
822 if (rcbits & ~rev[i].guest_rpte) {
823 rev[i].guest_rpte = ptel | rcbits;
824 note_hpte_modification(kvm, &rev[i]);
825 }
826 }
827}
828
829static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
830 unsigned long gfn)
831{
832 unsigned long i;
833 __be64 *hptep;
834 unsigned long *rmapp;
835
836 rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
837 for (;;) {
838 lock_rmap(rmapp);
839 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
840 unlock_rmap(rmapp);
841 break;
842 }
843
844
845
846
847
848
849 i = *rmapp & KVMPPC_RMAP_INDEX;
850 hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
851 if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
852
853 unlock_rmap(rmapp);
854 while (be64_to_cpu(hptep[0]) & HPTE_V_HVLOCK)
855 cpu_relax();
856 continue;
857 }
858
859 kvmppc_unmap_hpte(kvm, i, rmapp, gfn);
860 unlock_rmap(rmapp);
861 __unlock_hpte(hptep, be64_to_cpu(hptep[0]));
862 }
863 return 0;
864}
865
866int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva)
867{
868 hva_handler_fn handler;
869
870 handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp;
871 kvm_handle_hva(kvm, hva, handler);
872 return 0;
873}
874
875int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end)
876{
877 hva_handler_fn handler;
878
879 handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp;
880 kvm_handle_hva_range(kvm, start, end, handler);
881 return 0;
882}
883
884void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
885 struct kvm_memory_slot *memslot)
886{
887 unsigned long gfn;
888 unsigned long n;
889 unsigned long *rmapp;
890
891 gfn = memslot->base_gfn;
892 rmapp = memslot->arch.rmap;
893 for (n = memslot->npages; n; --n, ++gfn) {
894 if (kvm_is_radix(kvm)) {
895 kvm_unmap_radix(kvm, memslot, gfn);
896 continue;
897 }
898
899
900
901
902
903
904 if (*rmapp & KVMPPC_RMAP_PRESENT)
905 kvm_unmap_rmapp(kvm, memslot, gfn);
906 ++rmapp;
907 }
908}
909
910static int kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
911 unsigned long gfn)
912{
913 struct revmap_entry *rev = kvm->arch.hpt.rev;
914 unsigned long head, i, j;
915 __be64 *hptep;
916 int ret = 0;
917 unsigned long *rmapp;
918
919 rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
920 retry:
921 lock_rmap(rmapp);
922 if (*rmapp & KVMPPC_RMAP_REFERENCED) {
923 *rmapp &= ~KVMPPC_RMAP_REFERENCED;
924 ret = 1;
925 }
926 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
927 unlock_rmap(rmapp);
928 return ret;
929 }
930
931 i = head = *rmapp & KVMPPC_RMAP_INDEX;
932 do {
933 hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
934 j = rev[i].forw;
935
936
937 if (!(be64_to_cpu(hptep[1]) & HPTE_R_R))
938 continue;
939
940 if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
941
942 unlock_rmap(rmapp);
943 while (be64_to_cpu(hptep[0]) & HPTE_V_HVLOCK)
944 cpu_relax();
945 goto retry;
946 }
947
948
949 if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
950 (be64_to_cpu(hptep[1]) & HPTE_R_R)) {
951 kvmppc_clear_ref_hpte(kvm, hptep, i);
952 if (!(rev[i].guest_rpte & HPTE_R_R)) {
953 rev[i].guest_rpte |= HPTE_R_R;
954 note_hpte_modification(kvm, &rev[i]);
955 }
956 ret = 1;
957 }
958 __unlock_hpte(hptep, be64_to_cpu(hptep[0]));
959 } while ((i = j) != head);
960
961 unlock_rmap(rmapp);
962 return ret;
963}
964
965int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end)
966{
967 hva_handler_fn handler;
968
969 handler = kvm_is_radix(kvm) ? kvm_age_radix : kvm_age_rmapp;
970 return kvm_handle_hva_range(kvm, start, end, handler);
971}
972
973static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
974 unsigned long gfn)
975{
976 struct revmap_entry *rev = kvm->arch.hpt.rev;
977 unsigned long head, i, j;
978 unsigned long *hp;
979 int ret = 1;
980 unsigned long *rmapp;
981
982 rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
983 if (*rmapp & KVMPPC_RMAP_REFERENCED)
984 return 1;
985
986 lock_rmap(rmapp);
987 if (*rmapp & KVMPPC_RMAP_REFERENCED)
988 goto out;
989
990 if (*rmapp & KVMPPC_RMAP_PRESENT) {
991 i = head = *rmapp & KVMPPC_RMAP_INDEX;
992 do {
993 hp = (unsigned long *)(kvm->arch.hpt.virt + (i << 4));
994 j = rev[i].forw;
995 if (be64_to_cpu(hp[1]) & HPTE_R_R)
996 goto out;
997 } while ((i = j) != head);
998 }
999 ret = 0;
1000
1001 out:
1002 unlock_rmap(rmapp);
1003 return ret;
1004}
1005
1006int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva)
1007{
1008 hva_handler_fn handler;
1009
1010 handler = kvm_is_radix(kvm) ? kvm_test_age_radix : kvm_test_age_rmapp;
1011 return kvm_handle_hva(kvm, hva, handler);
1012}
1013
1014void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte)
1015{
1016 hva_handler_fn handler;
1017
1018 handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp;
1019 kvm_handle_hva(kvm, hva, handler);
1020}
1021
1022static int vcpus_running(struct kvm *kvm)
1023{
1024 return atomic_read(&kvm->arch.vcpus_running) != 0;
1025}
1026
1027
1028
1029
1030
1031static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
1032{
1033 struct revmap_entry *rev = kvm->arch.hpt.rev;
1034 unsigned long head, i, j;
1035 unsigned long n;
1036 unsigned long v, r;
1037 __be64 *hptep;
1038 int npages_dirty = 0;
1039
1040 retry:
1041 lock_rmap(rmapp);
1042 if (*rmapp & KVMPPC_RMAP_CHANGED) {
1043 long change_order = (*rmapp & KVMPPC_RMAP_CHG_ORDER)
1044 >> KVMPPC_RMAP_CHG_SHIFT;
1045 *rmapp &= ~(KVMPPC_RMAP_CHANGED | KVMPPC_RMAP_CHG_ORDER);
1046 npages_dirty = 1;
1047 if (change_order > PAGE_SHIFT)
1048 npages_dirty = 1ul << (change_order - PAGE_SHIFT);
1049 }
1050 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
1051 unlock_rmap(rmapp);
1052 return npages_dirty;
1053 }
1054
1055 i = head = *rmapp & KVMPPC_RMAP_INDEX;
1056 do {
1057 unsigned long hptep1;
1058 hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
1059 j = rev[i].forw;
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075 hptep1 = be64_to_cpu(hptep[1]);
1076 if (!(hptep1 & HPTE_R_C) &&
1077 (!hpte_is_writable(hptep1) || vcpus_running(kvm)))
1078 continue;
1079
1080 if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
1081
1082 unlock_rmap(rmapp);
1083 while (hptep[0] & cpu_to_be64(HPTE_V_HVLOCK))
1084 cpu_relax();
1085 goto retry;
1086 }
1087
1088
1089 if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) {
1090 __unlock_hpte(hptep, be64_to_cpu(hptep[0]));
1091 continue;
1092 }
1093
1094
1095 hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
1096 kvmppc_invalidate_hpte(kvm, hptep, i);
1097 v = be64_to_cpu(hptep[0]);
1098 r = be64_to_cpu(hptep[1]);
1099 if (r & HPTE_R_C) {
1100 hptep[1] = cpu_to_be64(r & ~HPTE_R_C);
1101 if (!(rev[i].guest_rpte & HPTE_R_C)) {
1102 rev[i].guest_rpte |= HPTE_R_C;
1103 note_hpte_modification(kvm, &rev[i]);
1104 }
1105 n = hpte_page_size(v, r);
1106 n = (n + PAGE_SIZE - 1) >> PAGE_SHIFT;
1107 if (n > npages_dirty)
1108 npages_dirty = n;
1109 eieio();
1110 }
1111 v &= ~HPTE_V_ABSENT;
1112 v |= HPTE_V_VALID;
1113 __unlock_hpte(hptep, v);
1114 } while ((i = j) != head);
1115
1116 unlock_rmap(rmapp);
1117 return npages_dirty;
1118}
1119
1120void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa,
1121 struct kvm_memory_slot *memslot,
1122 unsigned long *map)
1123{
1124 unsigned long gfn;
1125
1126 if (!vpa->dirty || !vpa->pinned_addr)
1127 return;
1128 gfn = vpa->gpa >> PAGE_SHIFT;
1129 if (gfn < memslot->base_gfn ||
1130 gfn >= memslot->base_gfn + memslot->npages)
1131 return;
1132
1133 vpa->dirty = false;
1134 if (map)
1135 __set_bit_le(gfn - memslot->base_gfn, map);
1136}
1137
1138long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
1139 struct kvm_memory_slot *memslot, unsigned long *map)
1140{
1141 unsigned long i, j;
1142 unsigned long *rmapp;
1143
1144 preempt_disable();
1145 rmapp = memslot->arch.rmap;
1146 for (i = 0; i < memslot->npages; ++i) {
1147 int npages = kvm_test_clear_dirty_npages(kvm, rmapp);
1148
1149
1150
1151
1152
1153 if (npages && map)
1154 for (j = i; npages; ++j, --npages)
1155 __set_bit_le(j, map);
1156 ++rmapp;
1157 }
1158 preempt_enable();
1159 return 0;
1160}
1161
1162void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
1163 unsigned long *nb_ret)
1164{
1165 struct kvm_memory_slot *memslot;
1166 unsigned long gfn = gpa >> PAGE_SHIFT;
1167 struct page *page, *pages[1];
1168 int npages;
1169 unsigned long hva, offset;
1170 int srcu_idx;
1171
1172 srcu_idx = srcu_read_lock(&kvm->srcu);
1173 memslot = gfn_to_memslot(kvm, gfn);
1174 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
1175 goto err;
1176 hva = gfn_to_hva_memslot(memslot, gfn);
1177 npages = get_user_pages_fast(hva, 1, 1, pages);
1178 if (npages < 1)
1179 goto err;
1180 page = pages[0];
1181 srcu_read_unlock(&kvm->srcu, srcu_idx);
1182
1183 offset = gpa & (PAGE_SIZE - 1);
1184 if (nb_ret)
1185 *nb_ret = PAGE_SIZE - offset;
1186 return page_address(page) + offset;
1187
1188 err:
1189 srcu_read_unlock(&kvm->srcu, srcu_idx);
1190 return NULL;
1191}
1192
1193void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
1194 bool dirty)
1195{
1196 struct page *page = virt_to_page(va);
1197 struct kvm_memory_slot *memslot;
1198 unsigned long gfn;
1199 unsigned long *rmap;
1200 int srcu_idx;
1201
1202 put_page(page);
1203
1204 if (!dirty)
1205 return;
1206
1207
1208 gfn = gpa >> PAGE_SHIFT;
1209 srcu_idx = srcu_read_lock(&kvm->srcu);
1210 memslot = gfn_to_memslot(kvm, gfn);
1211 if (memslot) {
1212 if (!kvm_is_radix(kvm)) {
1213 rmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
1214 lock_rmap(rmap);
1215 *rmap |= KVMPPC_RMAP_CHANGED;
1216 unlock_rmap(rmap);
1217 } else if (memslot->dirty_bitmap) {
1218 mark_page_dirty(kvm, gfn);
1219 }
1220 }
1221 srcu_read_unlock(&kvm->srcu, srcu_idx);
1222}
1223
1224
1225
1226
1227static int resize_hpt_allocate(struct kvm_resize_hpt *resize)
1228{
1229 int rc;
1230
1231 rc = kvmppc_allocate_hpt(&resize->hpt, resize->order);
1232 if (rc < 0)
1233 return rc;
1234
1235 resize_hpt_debug(resize, "resize_hpt_allocate(): HPT @ 0x%lx\n",
1236 resize->hpt.virt);
1237
1238 return 0;
1239}
1240
1241static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
1242 unsigned long idx)
1243{
1244 struct kvm *kvm = resize->kvm;
1245 struct kvm_hpt_info *old = &kvm->arch.hpt;
1246 struct kvm_hpt_info *new = &resize->hpt;
1247 unsigned long old_hash_mask = (1ULL << (old->order - 7)) - 1;
1248 unsigned long new_hash_mask = (1ULL << (new->order - 7)) - 1;
1249 __be64 *hptep, *new_hptep;
1250 unsigned long vpte, rpte, guest_rpte;
1251 int ret;
1252 struct revmap_entry *rev;
1253 unsigned long apsize, psize, avpn, pteg, hash;
1254 unsigned long new_idx, new_pteg, replace_vpte;
1255
1256 hptep = (__be64 *)(old->virt + (idx << 4));
1257
1258
1259
1260
1261 vpte = be64_to_cpu(hptep[0]);
1262 if (!(vpte & HPTE_V_VALID) && !(vpte & HPTE_V_ABSENT))
1263 return 0;
1264
1265 while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
1266 cpu_relax();
1267
1268 vpte = be64_to_cpu(hptep[0]);
1269
1270 ret = 0;
1271 if (!(vpte & HPTE_V_VALID) && !(vpte & HPTE_V_ABSENT))
1272
1273 goto out;
1274
1275
1276 rev = &old->rev[idx];
1277 guest_rpte = rev->guest_rpte;
1278
1279 ret = -EIO;
1280 apsize = hpte_page_size(vpte, guest_rpte);
1281 if (!apsize)
1282 goto out;
1283
1284 if (vpte & HPTE_V_VALID) {
1285 unsigned long gfn = hpte_rpn(guest_rpte, apsize);
1286 int srcu_idx = srcu_read_lock(&kvm->srcu);
1287 struct kvm_memory_slot *memslot =
1288 __gfn_to_memslot(kvm_memslots(kvm), gfn);
1289
1290 if (memslot) {
1291 unsigned long *rmapp;
1292 rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
1293
1294 lock_rmap(rmapp);
1295 kvmppc_unmap_hpte(kvm, idx, rmapp, gfn);
1296 unlock_rmap(rmapp);
1297 }
1298
1299 srcu_read_unlock(&kvm->srcu, srcu_idx);
1300 }
1301
1302
1303 vpte = be64_to_cpu(hptep[0]);
1304
1305 BUG_ON(vpte & HPTE_V_VALID);
1306 BUG_ON(!(vpte & HPTE_V_ABSENT));
1307
1308 ret = 0;
1309 if (!(vpte & HPTE_V_BOLTED))
1310 goto out;
1311
1312 rpte = be64_to_cpu(hptep[1]);
1313 psize = hpte_base_page_size(vpte, rpte);
1314 avpn = HPTE_V_AVPN_VAL(vpte) & ~((psize - 1) >> 23);
1315 pteg = idx / HPTES_PER_GROUP;
1316 if (vpte & HPTE_V_SECONDARY)
1317 pteg = ~pteg;
1318
1319 if (!(vpte & HPTE_V_1TB_SEG)) {
1320 unsigned long offset, vsid;
1321
1322
1323 offset = (avpn & 0x1f) << 23;
1324 vsid = avpn >> 5;
1325
1326 if (psize < (1ULL << 23))
1327 offset |= ((vsid ^ pteg) & old_hash_mask) * psize;
1328
1329 hash = vsid ^ (offset / psize);
1330 } else {
1331 unsigned long offset, vsid;
1332
1333
1334 offset = (avpn & 0x1ffff) << 23;
1335 vsid = avpn >> 17;
1336 if (psize < (1ULL << 23))
1337 offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) * psize;
1338
1339 hash = vsid ^ (vsid << 25) ^ (offset / psize);
1340 }
1341
1342 new_pteg = hash & new_hash_mask;
1343 if (vpte & HPTE_V_SECONDARY) {
1344 BUG_ON(~pteg != (hash & old_hash_mask));
1345 new_pteg = ~new_pteg;
1346 } else {
1347 BUG_ON(pteg != (hash & old_hash_mask));
1348 }
1349
1350 new_idx = new_pteg * HPTES_PER_GROUP + (idx % HPTES_PER_GROUP);
1351 new_hptep = (__be64 *)(new->virt + (new_idx << 4));
1352
1353 replace_vpte = be64_to_cpu(new_hptep[0]);
1354
1355 if (replace_vpte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
1356 BUG_ON(new->order >= old->order);
1357
1358 if (replace_vpte & HPTE_V_BOLTED) {
1359 if (vpte & HPTE_V_BOLTED)
1360
1361 ret = -ENOSPC;
1362
1363 goto out;
1364 }
1365
1366
1367 }
1368
1369 new_hptep[1] = cpu_to_be64(rpte);
1370 new->rev[new_idx].guest_rpte = guest_rpte;
1371
1372 new_hptep[0] = cpu_to_be64(vpte);
1373 unlock_hpte(new_hptep, vpte);
1374
1375out:
1376 unlock_hpte(hptep, vpte);
1377 return ret;
1378}
1379
1380static int resize_hpt_rehash(struct kvm_resize_hpt *resize)
1381{
1382 struct kvm *kvm = resize->kvm;
1383 unsigned long i;
1384 int rc;
1385
1386
1387
1388
1389
1390 if (cpu_has_feature(CPU_FTR_ARCH_300))
1391 return -EIO;
1392 for (i = 0; i < kvmppc_hpt_npte(&kvm->arch.hpt); i++) {
1393 rc = resize_hpt_rehash_hpte(resize, i);
1394 if (rc != 0)
1395 return rc;
1396 }
1397
1398 return 0;
1399}
1400
1401static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
1402{
1403 struct kvm *kvm = resize->kvm;
1404 struct kvm_hpt_info hpt_tmp;
1405
1406
1407
1408
1409 resize_hpt_debug(resize, "resize_hpt_pivot()\n");
1410
1411 spin_lock(&kvm->mmu_lock);
1412 asm volatile("ptesync" : : : "memory");
1413
1414 hpt_tmp = kvm->arch.hpt;
1415 kvmppc_set_hpt(kvm, &resize->hpt);
1416 resize->hpt = hpt_tmp;
1417
1418 spin_unlock(&kvm->mmu_lock);
1419
1420 synchronize_srcu_expedited(&kvm->srcu);
1421
1422 resize_hpt_debug(resize, "resize_hpt_pivot() done\n");
1423}
1424
1425static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
1426{
1427 BUG_ON(kvm->arch.resize_hpt != resize);
1428
1429 if (!resize)
1430 return;
1431
1432 if (resize->hpt.virt)
1433 kvmppc_free_hpt(&resize->hpt);
1434
1435 kvm->arch.resize_hpt = NULL;
1436 kfree(resize);
1437}
1438
1439static void resize_hpt_prepare_work(struct work_struct *work)
1440{
1441 struct kvm_resize_hpt *resize = container_of(work,
1442 struct kvm_resize_hpt,
1443 work);
1444 struct kvm *kvm = resize->kvm;
1445 int err;
1446
1447 resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
1448 resize->order);
1449
1450 err = resize_hpt_allocate(resize);
1451
1452 mutex_lock(&kvm->lock);
1453
1454 resize->error = err;
1455 resize->prepare_done = true;
1456
1457 mutex_unlock(&kvm->lock);
1458}
1459
1460long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
1461 struct kvm_ppc_resize_hpt *rhpt)
1462{
1463 unsigned long flags = rhpt->flags;
1464 unsigned long shift = rhpt->shift;
1465 struct kvm_resize_hpt *resize;
1466 int ret;
1467
1468 if (flags != 0)
1469 return -EINVAL;
1470
1471 if (shift && ((shift < 18) || (shift > 46)))
1472 return -EINVAL;
1473
1474 mutex_lock(&kvm->lock);
1475
1476 resize = kvm->arch.resize_hpt;
1477
1478 if (resize) {
1479 if (resize->order == shift) {
1480
1481 if (resize->prepare_done) {
1482 ret = resize->error;
1483 if (ret != 0)
1484 resize_hpt_release(kvm, resize);
1485 } else {
1486 ret = 100;
1487 }
1488
1489 goto out;
1490 }
1491
1492
1493 resize_hpt_release(kvm, resize);
1494 }
1495
1496 ret = 0;
1497 if (!shift)
1498 goto out;
1499
1500
1501
1502 resize = kzalloc(sizeof(*resize), GFP_KERNEL);
1503 if (!resize) {
1504 ret = -ENOMEM;
1505 goto out;
1506 }
1507 resize->order = shift;
1508 resize->kvm = kvm;
1509 INIT_WORK(&resize->work, resize_hpt_prepare_work);
1510 kvm->arch.resize_hpt = resize;
1511
1512 schedule_work(&resize->work);
1513
1514 ret = 100;
1515
1516out:
1517 mutex_unlock(&kvm->lock);
1518 return ret;
1519}
1520
1521static void resize_hpt_boot_vcpu(void *opaque)
1522{
1523
1524}
1525
1526long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
1527 struct kvm_ppc_resize_hpt *rhpt)
1528{
1529 unsigned long flags = rhpt->flags;
1530 unsigned long shift = rhpt->shift;
1531 struct kvm_resize_hpt *resize;
1532 long ret;
1533
1534 if (flags != 0)
1535 return -EINVAL;
1536
1537 if (shift && ((shift < 18) || (shift > 46)))
1538 return -EINVAL;
1539
1540 mutex_lock(&kvm->lock);
1541
1542 resize = kvm->arch.resize_hpt;
1543
1544
1545 ret = -EIO;
1546 if (WARN_ON(!kvm->arch.hpte_setup_done))
1547 goto out_no_hpt;
1548
1549
1550 kvm->arch.hpte_setup_done = 0;
1551 smp_mb();
1552
1553
1554
1555 on_each_cpu(resize_hpt_boot_vcpu, NULL, 1);
1556
1557 ret = -ENXIO;
1558 if (!resize || (resize->order != shift))
1559 goto out;
1560
1561 ret = -EBUSY;
1562 if (!resize->prepare_done)
1563 goto out;
1564
1565 ret = resize->error;
1566 if (ret != 0)
1567 goto out;
1568
1569 ret = resize_hpt_rehash(resize);
1570 if (ret != 0)
1571 goto out;
1572
1573 resize_hpt_pivot(resize);
1574
1575out:
1576
1577 kvm->arch.hpte_setup_done = 1;
1578 smp_mb();
1579out_no_hpt:
1580 resize_hpt_release(kvm, resize);
1581 mutex_unlock(&kvm->lock);
1582 return ret;
1583}
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601struct kvm_htab_ctx {
1602 unsigned long index;
1603 unsigned long flags;
1604 struct kvm *kvm;
1605 int first_pass;
1606};
1607
1608#define HPTE_SIZE (2 * sizeof(unsigned long))
1609
1610
1611
1612
1613
1614static int hpte_dirty(struct revmap_entry *revp, __be64 *hptp)
1615{
1616 unsigned long rcbits_unset;
1617
1618 if (revp->guest_rpte & HPTE_GR_MODIFIED)
1619 return 1;
1620
1621
1622 rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
1623 if ((be64_to_cpu(hptp[0]) & HPTE_V_VALID) &&
1624 (be64_to_cpu(hptp[1]) & rcbits_unset))
1625 return 1;
1626
1627 return 0;
1628}
1629
1630static long record_hpte(unsigned long flags, __be64 *hptp,
1631 unsigned long *hpte, struct revmap_entry *revp,
1632 int want_valid, int first_pass)
1633{
1634 unsigned long v, r, hr;
1635 unsigned long rcbits_unset;
1636 int ok = 1;
1637 int valid, dirty;
1638
1639
1640 dirty = hpte_dirty(revp, hptp);
1641 if (!first_pass && !dirty)
1642 return 0;
1643
1644 valid = 0;
1645 if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)) {
1646 valid = 1;
1647 if ((flags & KVM_GET_HTAB_BOLTED_ONLY) &&
1648 !(be64_to_cpu(hptp[0]) & HPTE_V_BOLTED))
1649 valid = 0;
1650 }
1651 if (valid != want_valid)
1652 return 0;
1653
1654 v = r = 0;
1655 if (valid || dirty) {
1656
1657 preempt_disable();
1658 while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
1659 cpu_relax();
1660 v = be64_to_cpu(hptp[0]);
1661 hr = be64_to_cpu(hptp[1]);
1662 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
1663 v = hpte_new_to_old_v(v, hr);
1664 hr = hpte_new_to_old_r(hr);
1665 }
1666
1667
1668 valid = !!(v & HPTE_V_VALID);
1669 dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
1670
1671
1672 rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
1673 if (valid && (rcbits_unset & hr)) {
1674 revp->guest_rpte |= (hr &
1675 (HPTE_R_R | HPTE_R_C)) | HPTE_GR_MODIFIED;
1676 dirty = 1;
1677 }
1678
1679 if (v & HPTE_V_ABSENT) {
1680 v &= ~HPTE_V_ABSENT;
1681 v |= HPTE_V_VALID;
1682 valid = 1;
1683 }
1684 if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED))
1685 valid = 0;
1686
1687 r = revp->guest_rpte;
1688
1689 if (valid == want_valid && dirty) {
1690 r &= ~HPTE_GR_MODIFIED;
1691 revp->guest_rpte = r;
1692 }
1693 unlock_hpte(hptp, be64_to_cpu(hptp[0]));
1694 preempt_enable();
1695 if (!(valid == want_valid && (first_pass || dirty)))
1696 ok = 0;
1697 }
1698 hpte[0] = cpu_to_be64(v);
1699 hpte[1] = cpu_to_be64(r);
1700 return ok;
1701}
1702
1703static ssize_t kvm_htab_read(struct file *file, char __user *buf,
1704 size_t count, loff_t *ppos)
1705{
1706 struct kvm_htab_ctx *ctx = file->private_data;
1707 struct kvm *kvm = ctx->kvm;
1708 struct kvm_get_htab_header hdr;
1709 __be64 *hptp;
1710 struct revmap_entry *revp;
1711 unsigned long i, nb, nw;
1712 unsigned long __user *lbuf;
1713 struct kvm_get_htab_header __user *hptr;
1714 unsigned long flags;
1715 int first_pass;
1716 unsigned long hpte[2];
1717
1718 if (!access_ok(VERIFY_WRITE, buf, count))
1719 return -EFAULT;
1720
1721 first_pass = ctx->first_pass;
1722 flags = ctx->flags;
1723
1724 i = ctx->index;
1725 hptp = (__be64 *)(kvm->arch.hpt.virt + (i * HPTE_SIZE));
1726 revp = kvm->arch.hpt.rev + i;
1727 lbuf = (unsigned long __user *)buf;
1728
1729 nb = 0;
1730 while (nb + sizeof(hdr) + HPTE_SIZE < count) {
1731
1732 hptr = (struct kvm_get_htab_header __user *)buf;
1733 hdr.n_valid = 0;
1734 hdr.n_invalid = 0;
1735 nw = nb;
1736 nb += sizeof(hdr);
1737 lbuf = (unsigned long __user *)(buf + sizeof(hdr));
1738
1739
1740 if (!first_pass) {
1741 while (i < kvmppc_hpt_npte(&kvm->arch.hpt) &&
1742 !hpte_dirty(revp, hptp)) {
1743 ++i;
1744 hptp += 2;
1745 ++revp;
1746 }
1747 }
1748 hdr.index = i;
1749
1750
1751 while (i < kvmppc_hpt_npte(&kvm->arch.hpt) &&
1752 hdr.n_valid < 0xffff &&
1753 nb + HPTE_SIZE < count &&
1754 record_hpte(flags, hptp, hpte, revp, 1, first_pass)) {
1755
1756 ++hdr.n_valid;
1757 if (__put_user(hpte[0], lbuf) ||
1758 __put_user(hpte[1], lbuf + 1))
1759 return -EFAULT;
1760 nb += HPTE_SIZE;
1761 lbuf += 2;
1762 ++i;
1763 hptp += 2;
1764 ++revp;
1765 }
1766
1767 while (i < kvmppc_hpt_npte(&kvm->arch.hpt) &&
1768 hdr.n_invalid < 0xffff &&
1769 record_hpte(flags, hptp, hpte, revp, 0, first_pass)) {
1770
1771 ++hdr.n_invalid;
1772 ++i;
1773 hptp += 2;
1774 ++revp;
1775 }
1776
1777 if (hdr.n_valid || hdr.n_invalid) {
1778
1779 if (__copy_to_user(hptr, &hdr, sizeof(hdr)))
1780 return -EFAULT;
1781 nw = nb;
1782 buf = (char __user *)lbuf;
1783 } else {
1784 nb = nw;
1785 }
1786
1787
1788 if (i >= kvmppc_hpt_npte(&kvm->arch.hpt)) {
1789 i = 0;
1790 ctx->first_pass = 0;
1791 break;
1792 }
1793 }
1794
1795 ctx->index = i;
1796
1797 return nb;
1798}
1799
1800static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
1801 size_t count, loff_t *ppos)
1802{
1803 struct kvm_htab_ctx *ctx = file->private_data;
1804 struct kvm *kvm = ctx->kvm;
1805 struct kvm_get_htab_header hdr;
1806 unsigned long i, j;
1807 unsigned long v, r;
1808 unsigned long __user *lbuf;
1809 __be64 *hptp;
1810 unsigned long tmp[2];
1811 ssize_t nb;
1812 long int err, ret;
1813 int hpte_setup;
1814
1815 if (!access_ok(VERIFY_READ, buf, count))
1816 return -EFAULT;
1817
1818
1819 mutex_lock(&kvm->lock);
1820 hpte_setup = kvm->arch.hpte_setup_done;
1821 if (hpte_setup) {
1822 kvm->arch.hpte_setup_done = 0;
1823
1824 smp_mb();
1825 if (atomic_read(&kvm->arch.vcpus_running)) {
1826 kvm->arch.hpte_setup_done = 1;
1827 mutex_unlock(&kvm->lock);
1828 return -EBUSY;
1829 }
1830 }
1831
1832 err = 0;
1833 for (nb = 0; nb + sizeof(hdr) <= count; ) {
1834 err = -EFAULT;
1835 if (__copy_from_user(&hdr, buf, sizeof(hdr)))
1836 break;
1837
1838 err = 0;
1839 if (nb + hdr.n_valid * HPTE_SIZE > count)
1840 break;
1841
1842 nb += sizeof(hdr);
1843 buf += sizeof(hdr);
1844
1845 err = -EINVAL;
1846 i = hdr.index;
1847 if (i >= kvmppc_hpt_npte(&kvm->arch.hpt) ||
1848 i + hdr.n_valid + hdr.n_invalid > kvmppc_hpt_npte(&kvm->arch.hpt))
1849 break;
1850
1851 hptp = (__be64 *)(kvm->arch.hpt.virt + (i * HPTE_SIZE));
1852 lbuf = (unsigned long __user *)buf;
1853 for (j = 0; j < hdr.n_valid; ++j) {
1854 __be64 hpte_v;
1855 __be64 hpte_r;
1856
1857 err = -EFAULT;
1858 if (__get_user(hpte_v, lbuf) ||
1859 __get_user(hpte_r, lbuf + 1))
1860 goto out;
1861 v = be64_to_cpu(hpte_v);
1862 r = be64_to_cpu(hpte_r);
1863 err = -EINVAL;
1864 if (!(v & HPTE_V_VALID))
1865 goto out;
1866 lbuf += 2;
1867 nb += HPTE_SIZE;
1868
1869 if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT))
1870 kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
1871 err = -EIO;
1872 ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r,
1873 tmp);
1874 if (ret != H_SUCCESS) {
1875 pr_err("kvm_htab_write ret %ld i=%ld v=%lx "
1876 "r=%lx\n", ret, i, v, r);
1877 goto out;
1878 }
1879 if (!hpte_setup && is_vrma_hpte(v)) {
1880 unsigned long psize = hpte_base_page_size(v, r);
1881 unsigned long senc = slb_pgsize_encoding(psize);
1882 unsigned long lpcr;
1883
1884 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
1885 (VRMA_VSID << SLB_VSID_SHIFT_1T);
1886 lpcr = senc << (LPCR_VRMASD_SH - 4);
1887 kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
1888 hpte_setup = 1;
1889 }
1890 ++i;
1891 hptp += 2;
1892 }
1893
1894 for (j = 0; j < hdr.n_invalid; ++j) {
1895 if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT))
1896 kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
1897 ++i;
1898 hptp += 2;
1899 }
1900 err = 0;
1901 }
1902
1903 out:
1904
1905 smp_wmb();
1906 kvm->arch.hpte_setup_done = hpte_setup;
1907 mutex_unlock(&kvm->lock);
1908
1909 if (err)
1910 return err;
1911 return nb;
1912}
1913
1914static int kvm_htab_release(struct inode *inode, struct file *filp)
1915{
1916 struct kvm_htab_ctx *ctx = filp->private_data;
1917
1918 filp->private_data = NULL;
1919 if (!(ctx->flags & KVM_GET_HTAB_WRITE))
1920 atomic_dec(&ctx->kvm->arch.hpte_mod_interest);
1921 kvm_put_kvm(ctx->kvm);
1922 kfree(ctx);
1923 return 0;
1924}
1925
1926static const struct file_operations kvm_htab_fops = {
1927 .read = kvm_htab_read,
1928 .write = kvm_htab_write,
1929 .llseek = default_llseek,
1930 .release = kvm_htab_release,
1931};
1932
1933int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
1934{
1935 int ret;
1936 struct kvm_htab_ctx *ctx;
1937 int rwflag;
1938
1939
1940 if (ghf->flags & ~(KVM_GET_HTAB_BOLTED_ONLY | KVM_GET_HTAB_WRITE))
1941 return -EINVAL;
1942 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
1943 if (!ctx)
1944 return -ENOMEM;
1945 kvm_get_kvm(kvm);
1946 ctx->kvm = kvm;
1947 ctx->index = ghf->start_index;
1948 ctx->flags = ghf->flags;
1949 ctx->first_pass = 1;
1950
1951 rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY;
1952 ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC);
1953 if (ret < 0) {
1954 kfree(ctx);
1955 kvm_put_kvm(kvm);
1956 return ret;
1957 }
1958
1959 if (rwflag == O_RDONLY) {
1960 mutex_lock(&kvm->slots_lock);
1961 atomic_inc(&kvm->arch.hpte_mod_interest);
1962
1963 synchronize_srcu_expedited(&kvm->srcu);
1964 mutex_unlock(&kvm->slots_lock);
1965 }
1966
1967 return ret;
1968}
1969
1970struct debugfs_htab_state {
1971 struct kvm *kvm;
1972 struct mutex mutex;
1973 unsigned long hpt_index;
1974 int chars_left;
1975 int buf_index;
1976 char buf[64];
1977};
1978
1979static int debugfs_htab_open(struct inode *inode, struct file *file)
1980{
1981 struct kvm *kvm = inode->i_private;
1982 struct debugfs_htab_state *p;
1983
1984 p = kzalloc(sizeof(*p), GFP_KERNEL);
1985 if (!p)
1986 return -ENOMEM;
1987
1988 kvm_get_kvm(kvm);
1989 p->kvm = kvm;
1990 mutex_init(&p->mutex);
1991 file->private_data = p;
1992
1993 return nonseekable_open(inode, file);
1994}
1995
1996static int debugfs_htab_release(struct inode *inode, struct file *file)
1997{
1998 struct debugfs_htab_state *p = file->private_data;
1999
2000 kvm_put_kvm(p->kvm);
2001 kfree(p);
2002 return 0;
2003}
2004
2005static ssize_t debugfs_htab_read(struct file *file, char __user *buf,
2006 size_t len, loff_t *ppos)
2007{
2008 struct debugfs_htab_state *p = file->private_data;
2009 ssize_t ret, r;
2010 unsigned long i, n;
2011 unsigned long v, hr, gr;
2012 struct kvm *kvm;
2013 __be64 *hptp;
2014
2015 ret = mutex_lock_interruptible(&p->mutex);
2016 if (ret)
2017 return ret;
2018
2019 if (p->chars_left) {
2020 n = p->chars_left;
2021 if (n > len)
2022 n = len;
2023 r = copy_to_user(buf, p->buf + p->buf_index, n);
2024 n -= r;
2025 p->chars_left -= n;
2026 p->buf_index += n;
2027 buf += n;
2028 len -= n;
2029 ret = n;
2030 if (r) {
2031 if (!n)
2032 ret = -EFAULT;
2033 goto out;
2034 }
2035 }
2036
2037 kvm = p->kvm;
2038 i = p->hpt_index;
2039 hptp = (__be64 *)(kvm->arch.hpt.virt + (i * HPTE_SIZE));
2040 for (; len != 0 && i < kvmppc_hpt_npte(&kvm->arch.hpt);
2041 ++i, hptp += 2) {
2042 if (!(be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)))
2043 continue;
2044
2045
2046 preempt_disable();
2047 while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
2048 cpu_relax();
2049 v = be64_to_cpu(hptp[0]) & ~HPTE_V_HVLOCK;
2050 hr = be64_to_cpu(hptp[1]);
2051 gr = kvm->arch.hpt.rev[i].guest_rpte;
2052 unlock_hpte(hptp, v);
2053 preempt_enable();
2054
2055 if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
2056 continue;
2057
2058 n = scnprintf(p->buf, sizeof(p->buf),
2059 "%6lx %.16lx %.16lx %.16lx\n",
2060 i, v, hr, gr);
2061 p->chars_left = n;
2062 if (n > len)
2063 n = len;
2064 r = copy_to_user(buf, p->buf, n);
2065 n -= r;
2066 p->chars_left -= n;
2067 p->buf_index = n;
2068 buf += n;
2069 len -= n;
2070 ret += n;
2071 if (r) {
2072 if (!ret)
2073 ret = -EFAULT;
2074 goto out;
2075 }
2076 }
2077 p->hpt_index = i;
2078
2079 out:
2080 mutex_unlock(&p->mutex);
2081 return ret;
2082}
2083
2084static ssize_t debugfs_htab_write(struct file *file, const char __user *buf,
2085 size_t len, loff_t *ppos)
2086{
2087 return -EACCES;
2088}
2089
2090static const struct file_operations debugfs_htab_fops = {
2091 .owner = THIS_MODULE,
2092 .open = debugfs_htab_open,
2093 .release = debugfs_htab_release,
2094 .read = debugfs_htab_read,
2095 .write = debugfs_htab_write,
2096 .llseek = generic_file_llseek,
2097};
2098
2099void kvmppc_mmu_debugfs_init(struct kvm *kvm)
2100{
2101 kvm->arch.htab_dentry = debugfs_create_file("htab", 0400,
2102 kvm->arch.debugfs_dir, kvm,
2103 &debugfs_htab_fops);
2104}
2105
2106void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
2107{
2108 struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
2109
2110 vcpu->arch.slb_nr = 32;
2111
2112 if (kvm_is_radix(vcpu->kvm))
2113 mmu->xlate = kvmppc_mmu_radix_xlate;
2114 else
2115 mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
2116 mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr;
2117
2118 vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;
2119}
2120