1
2
3
4
5
6#include <linux/sched.h>
7#include <linux/kernel.h>
8#include <linux/errno.h>
9#include <linux/gfp.h>
10#include <linux/mm.h>
11#include <linux/swap.h>
12#include <linux/smp.h>
13#include <linux/highmem.h>
14#include <linux/pagemap.h>
15#include <linux/spinlock.h>
16#include <linux/module.h>
17#include <linux/quicklist.h>
18#include <linux/rcupdate.h>
19#include <linux/slab.h>
20#include <linux/swapops.h>
21#include <linux/ksm.h>
22#include <linux/mman.h>
23
24#include <asm/pgtable.h>
25#include <asm/pgalloc.h>
26#include <asm/tlb.h>
27#include <asm/tlbflush.h>
28#include <asm/mmu_context.h>
29
30#ifndef CONFIG_64BIT
31#define ALLOC_ORDER 1
32#define FRAG_MASK 0x0f
33#else
34#define ALLOC_ORDER 2
35#define FRAG_MASK 0x03
36#endif
37
38
39unsigned long *crst_table_alloc(struct mm_struct *mm)
40{
41 struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
42
43 if (!page)
44 return NULL;
45 return (unsigned long *) page_to_phys(page);
46}
47
48void crst_table_free(struct mm_struct *mm, unsigned long *table)
49{
50 free_pages((unsigned long) table, ALLOC_ORDER);
51}
52
53#ifdef CONFIG_64BIT
54static void __crst_table_upgrade(void *arg)
55{
56 struct mm_struct *mm = arg;
57
58 if (current->active_mm == mm) {
59 clear_user_asce();
60 set_user_asce(mm);
61 }
62 __tlb_flush_local();
63}
64
65int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
66{
67 unsigned long *table, *pgd;
68 unsigned long entry;
69 int flush;
70
71 BUG_ON(limit > (1UL << 53));
72 flush = 0;
73repeat:
74 table = crst_table_alloc(mm);
75 if (!table)
76 return -ENOMEM;
77 spin_lock_bh(&mm->page_table_lock);
78 if (mm->context.asce_limit < limit) {
79 pgd = (unsigned long *) mm->pgd;
80 if (mm->context.asce_limit <= (1UL << 31)) {
81 entry = _REGION3_ENTRY_EMPTY;
82 mm->context.asce_limit = 1UL << 42;
83 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
84 _ASCE_USER_BITS |
85 _ASCE_TYPE_REGION3;
86 } else {
87 entry = _REGION2_ENTRY_EMPTY;
88 mm->context.asce_limit = 1UL << 53;
89 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
90 _ASCE_USER_BITS |
91 _ASCE_TYPE_REGION2;
92 }
93 crst_table_init(table, entry);
94 pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
95 mm->pgd = (pgd_t *) table;
96 mm->task_size = mm->context.asce_limit;
97 table = NULL;
98 flush = 1;
99 }
100 spin_unlock_bh(&mm->page_table_lock);
101 if (table)
102 crst_table_free(mm, table);
103 if (mm->context.asce_limit < limit)
104 goto repeat;
105 if (flush)
106 on_each_cpu(__crst_table_upgrade, mm, 0);
107 return 0;
108}
109
110void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
111{
112 pgd_t *pgd;
113
114 if (current->active_mm == mm) {
115 clear_user_asce();
116 __tlb_flush_mm(mm);
117 }
118 while (mm->context.asce_limit > limit) {
119 pgd = mm->pgd;
120 switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
121 case _REGION_ENTRY_TYPE_R2:
122 mm->context.asce_limit = 1UL << 42;
123 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
124 _ASCE_USER_BITS |
125 _ASCE_TYPE_REGION3;
126 break;
127 case _REGION_ENTRY_TYPE_R3:
128 mm->context.asce_limit = 1UL << 31;
129 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
130 _ASCE_USER_BITS |
131 _ASCE_TYPE_SEGMENT;
132 break;
133 default:
134 BUG();
135 }
136 mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
137 mm->task_size = mm->context.asce_limit;
138 crst_table_free(mm, (unsigned long *) pgd);
139 }
140 if (current->active_mm == mm)
141 set_user_asce(mm);
142}
143#endif
144
145#ifdef CONFIG_PGSTE
146
147
148
149
150
151
152
153
154struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit)
155{
156 struct gmap *gmap;
157 struct page *page;
158 unsigned long *table;
159 unsigned long etype, atype;
160
161 if (limit < (1UL << 31)) {
162 limit = (1UL << 31) - 1;
163 atype = _ASCE_TYPE_SEGMENT;
164 etype = _SEGMENT_ENTRY_EMPTY;
165 } else if (limit < (1UL << 42)) {
166 limit = (1UL << 42) - 1;
167 atype = _ASCE_TYPE_REGION3;
168 etype = _REGION3_ENTRY_EMPTY;
169 } else if (limit < (1UL << 53)) {
170 limit = (1UL << 53) - 1;
171 atype = _ASCE_TYPE_REGION2;
172 etype = _REGION2_ENTRY_EMPTY;
173 } else {
174 limit = -1UL;
175 atype = _ASCE_TYPE_REGION1;
176 etype = _REGION1_ENTRY_EMPTY;
177 }
178 gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL);
179 if (!gmap)
180 goto out;
181 INIT_LIST_HEAD(&gmap->crst_list);
182 INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL);
183 INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
184 spin_lock_init(&gmap->guest_table_lock);
185 gmap->mm = mm;
186 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
187 if (!page)
188 goto out_free;
189 page->index = 0;
190 list_add(&page->lru, &gmap->crst_list);
191 table = (unsigned long *) page_to_phys(page);
192 crst_table_init(table, etype);
193 gmap->table = table;
194 gmap->asce = atype | _ASCE_TABLE_LENGTH |
195 _ASCE_USER_BITS | __pa(table);
196 gmap->asce_end = limit;
197 down_write(&mm->mmap_sem);
198 list_add(&gmap->list, &mm->context.gmap_list);
199 up_write(&mm->mmap_sem);
200 return gmap;
201
202out_free:
203 kfree(gmap);
204out:
205 return NULL;
206}
207EXPORT_SYMBOL_GPL(gmap_alloc);
208
209static void gmap_flush_tlb(struct gmap *gmap)
210{
211 if (MACHINE_HAS_IDTE)
212 __tlb_flush_asce(gmap->mm, gmap->asce);
213 else
214 __tlb_flush_global();
215}
216
217static void gmap_radix_tree_free(struct radix_tree_root *root)
218{
219 struct radix_tree_iter iter;
220 unsigned long indices[16];
221 unsigned long index;
222 void **slot;
223 int i, nr;
224
225
226 index = 0;
227 do {
228 nr = 0;
229 radix_tree_for_each_slot(slot, root, &iter, index) {
230 indices[nr] = iter.index;
231 if (++nr == 16)
232 break;
233 }
234 for (i = 0; i < nr; i++) {
235 index = indices[i];
236 radix_tree_delete(root, index);
237 }
238 } while (nr > 0);
239}
240
241
242
243
244
245void gmap_free(struct gmap *gmap)
246{
247 struct page *page, *next;
248
249
250 if (MACHINE_HAS_IDTE)
251 __tlb_flush_asce(gmap->mm, gmap->asce);
252 else
253 __tlb_flush_global();
254
255
256 list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
257 __free_pages(page, ALLOC_ORDER);
258 gmap_radix_tree_free(&gmap->guest_to_host);
259 gmap_radix_tree_free(&gmap->host_to_guest);
260 down_write(&gmap->mm->mmap_sem);
261 list_del(&gmap->list);
262 up_write(&gmap->mm->mmap_sem);
263 kfree(gmap);
264}
265EXPORT_SYMBOL_GPL(gmap_free);
266
267
268
269
270
271void gmap_enable(struct gmap *gmap)
272{
273 S390_lowcore.gmap = (unsigned long) gmap;
274}
275EXPORT_SYMBOL_GPL(gmap_enable);
276
277
278
279
280
281void gmap_disable(struct gmap *gmap)
282{
283 S390_lowcore.gmap = 0UL;
284}
285EXPORT_SYMBOL_GPL(gmap_disable);
286
287
288
289
290static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
291 unsigned long init, unsigned long gaddr)
292{
293 struct page *page;
294 unsigned long *new;
295
296
297 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
298 if (!page)
299 return -ENOMEM;
300 new = (unsigned long *) page_to_phys(page);
301 crst_table_init(new, init);
302 spin_lock(&gmap->mm->page_table_lock);
303 if (*table & _REGION_ENTRY_INVALID) {
304 list_add(&page->lru, &gmap->crst_list);
305 *table = (unsigned long) new | _REGION_ENTRY_LENGTH |
306 (*table & _REGION_ENTRY_TYPE_MASK);
307 page->index = gaddr;
308 page = NULL;
309 }
310 spin_unlock(&gmap->mm->page_table_lock);
311 if (page)
312 __free_pages(page, ALLOC_ORDER);
313 return 0;
314}
315
316
317
318
319
320
321
322static unsigned long __gmap_segment_gaddr(unsigned long *entry)
323{
324 struct page *page;
325 unsigned long offset, mask;
326
327 offset = (unsigned long) entry / sizeof(unsigned long);
328 offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
329 mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
330 page = virt_to_page((void *)((unsigned long) entry & mask));
331 return page->index + offset;
332}
333
334
335
336
337
338
339
340
341static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
342{
343 unsigned long *entry;
344 int flush = 0;
345
346 spin_lock(&gmap->guest_table_lock);
347 entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
348 if (entry) {
349 flush = (*entry != _SEGMENT_ENTRY_INVALID);
350 *entry = _SEGMENT_ENTRY_INVALID;
351 }
352 spin_unlock(&gmap->guest_table_lock);
353 return flush;
354}
355
356
357
358
359
360
361
362
363static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr)
364{
365 unsigned long vmaddr;
366
367 vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host,
368 gaddr >> PMD_SHIFT);
369 return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0;
370}
371
372
373
374
375
376
377
378
379
380int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
381{
382 unsigned long off;
383 int flush;
384
385 if ((to | len) & (PMD_SIZE - 1))
386 return -EINVAL;
387 if (len == 0 || to + len < to)
388 return -EINVAL;
389
390 flush = 0;
391 down_write(&gmap->mm->mmap_sem);
392 for (off = 0; off < len; off += PMD_SIZE)
393 flush |= __gmap_unmap_by_gaddr(gmap, to + off);
394 up_write(&gmap->mm->mmap_sem);
395 if (flush)
396 gmap_flush_tlb(gmap);
397 return 0;
398}
399EXPORT_SYMBOL_GPL(gmap_unmap_segment);
400
401
402
403
404
405
406
407
408
409
410int gmap_map_segment(struct gmap *gmap, unsigned long from,
411 unsigned long to, unsigned long len)
412{
413 unsigned long off;
414 int flush;
415
416 if ((from | to | len) & (PMD_SIZE - 1))
417 return -EINVAL;
418 if (len == 0 || from + len < from || to + len < to ||
419 from + len > TASK_MAX_SIZE || to + len > gmap->asce_end)
420 return -EINVAL;
421
422 flush = 0;
423 down_write(&gmap->mm->mmap_sem);
424 for (off = 0; off < len; off += PMD_SIZE) {
425
426 flush |= __gmap_unmap_by_gaddr(gmap, to + off);
427
428 if (radix_tree_insert(&gmap->guest_to_host,
429 (to + off) >> PMD_SHIFT,
430 (void *) from + off))
431 break;
432 }
433 up_write(&gmap->mm->mmap_sem);
434 if (flush)
435 gmap_flush_tlb(gmap);
436 if (off >= len)
437 return 0;
438 gmap_unmap_segment(gmap, to, len);
439 return -ENOMEM;
440}
441EXPORT_SYMBOL_GPL(gmap_map_segment);
442
443
444
445
446
447
448
449
450
451
452
453
454unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
455{
456 unsigned long vmaddr;
457
458 vmaddr = (unsigned long)
459 radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT);
460 return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT;
461}
462EXPORT_SYMBOL_GPL(__gmap_translate);
463
464
465
466
467
468
469
470
471
472
473unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
474{
475 unsigned long rc;
476
477 down_read(&gmap->mm->mmap_sem);
478 rc = __gmap_translate(gmap, gaddr);
479 up_read(&gmap->mm->mmap_sem);
480 return rc;
481}
482EXPORT_SYMBOL_GPL(gmap_translate);
483
484
485
486
487
488
489
490static void gmap_unlink(struct mm_struct *mm, unsigned long *table,
491 unsigned long vmaddr)
492{
493 struct gmap *gmap;
494 int flush;
495
496 list_for_each_entry(gmap, &mm->context.gmap_list, list) {
497 flush = __gmap_unlink_by_vmaddr(gmap, vmaddr);
498 if (flush)
499 gmap_flush_tlb(gmap);
500 }
501}
502
503
504
505
506
507
508
509
510
511
512
513
514int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
515{
516 struct mm_struct *mm;
517 unsigned long *table;
518 spinlock_t *ptl;
519 pgd_t *pgd;
520 pud_t *pud;
521 pmd_t *pmd;
522 int rc;
523
524
525 table = gmap->table;
526 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
527 table += (gaddr >> 53) & 0x7ff;
528 if ((*table & _REGION_ENTRY_INVALID) &&
529 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
530 gaddr & 0xffe0000000000000UL))
531 return -ENOMEM;
532 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
533 }
534 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
535 table += (gaddr >> 42) & 0x7ff;
536 if ((*table & _REGION_ENTRY_INVALID) &&
537 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
538 gaddr & 0xfffffc0000000000UL))
539 return -ENOMEM;
540 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
541 }
542 if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
543 table += (gaddr >> 31) & 0x7ff;
544 if ((*table & _REGION_ENTRY_INVALID) &&
545 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
546 gaddr & 0xffffffff80000000UL))
547 return -ENOMEM;
548 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
549 }
550 table += (gaddr >> 20) & 0x7ff;
551
552 mm = gmap->mm;
553 pgd = pgd_offset(mm, vmaddr);
554 VM_BUG_ON(pgd_none(*pgd));
555 pud = pud_offset(pgd, vmaddr);
556 VM_BUG_ON(pud_none(*pud));
557 pmd = pmd_offset(pud, vmaddr);
558 VM_BUG_ON(pmd_none(*pmd));
559
560 if (pmd_large(*pmd))
561 return -EFAULT;
562
563 rc = radix_tree_preload(GFP_KERNEL);
564 if (rc)
565 return rc;
566 ptl = pmd_lock(mm, pmd);
567 spin_lock(&gmap->guest_table_lock);
568 if (*table == _SEGMENT_ENTRY_INVALID) {
569 rc = radix_tree_insert(&gmap->host_to_guest,
570 vmaddr >> PMD_SHIFT, table);
571 if (!rc)
572 *table = pmd_val(*pmd);
573 } else
574 rc = 0;
575 spin_unlock(&gmap->guest_table_lock);
576 spin_unlock(ptl);
577 radix_tree_preload_end();
578 return rc;
579}
580
581
582
583
584
585
586
587
588
589
590int gmap_fault(struct gmap *gmap, unsigned long gaddr,
591 unsigned int fault_flags)
592{
593 unsigned long vmaddr;
594 int rc;
595
596 down_read(&gmap->mm->mmap_sem);
597 vmaddr = __gmap_translate(gmap, gaddr);
598 if (IS_ERR_VALUE(vmaddr)) {
599 rc = vmaddr;
600 goto out_up;
601 }
602 if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags)) {
603 rc = -EFAULT;
604 goto out_up;
605 }
606 rc = __gmap_link(gmap, gaddr, vmaddr);
607out_up:
608 up_read(&gmap->mm->mmap_sem);
609 return rc;
610}
611EXPORT_SYMBOL_GPL(gmap_fault);
612
613static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm)
614{
615 if (!non_swap_entry(entry))
616 dec_mm_counter(mm, MM_SWAPENTS);
617 else if (is_migration_entry(entry)) {
618 struct page *page = migration_entry_to_page(entry);
619
620 if (PageAnon(page))
621 dec_mm_counter(mm, MM_ANONPAGES);
622 else
623 dec_mm_counter(mm, MM_FILEPAGES);
624 }
625 free_swap_and_cache(entry);
626}
627
628
629
630
631void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
632{
633 unsigned long vmaddr, ptev, pgstev;
634 pte_t *ptep, pte;
635 spinlock_t *ptl;
636 pgste_t pgste;
637
638
639 vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
640 gaddr >> PMD_SHIFT);
641 if (!vmaddr)
642 return;
643 vmaddr |= gaddr & ~PMD_MASK;
644
645 ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
646 if (unlikely(!ptep))
647 return;
648 pte = *ptep;
649 if (!pte_swap(pte))
650 goto out_pte;
651
652 pgste = pgste_get_lock(ptep);
653 pgstev = pgste_val(pgste);
654 ptev = pte_val(pte);
655 if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
656 ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) {
657 gmap_zap_swap_entry(pte_to_swp_entry(pte), gmap->mm);
658 pte_clear(gmap->mm, vmaddr, ptep);
659 }
660 pgste_set_unlock(ptep, pgste);
661out_pte:
662 pte_unmap_unlock(ptep, ptl);
663}
664EXPORT_SYMBOL_GPL(__gmap_zap);
665
666void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
667{
668 unsigned long gaddr, vmaddr, size;
669 struct vm_area_struct *vma;
670
671 down_read(&gmap->mm->mmap_sem);
672 for (gaddr = from; gaddr < to;
673 gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
674
675 vmaddr = (unsigned long)
676 radix_tree_lookup(&gmap->guest_to_host,
677 gaddr >> PMD_SHIFT);
678 if (!vmaddr)
679 continue;
680 vmaddr |= gaddr & ~PMD_MASK;
681
682 vma = find_vma(gmap->mm, vmaddr);
683 size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
684 zap_page_range(vma, vmaddr, size, NULL);
685 }
686 up_read(&gmap->mm->mmap_sem);
687}
688EXPORT_SYMBOL_GPL(gmap_discard);
689
690static LIST_HEAD(gmap_notifier_list);
691static DEFINE_SPINLOCK(gmap_notifier_lock);
692
693
694
695
696
697void gmap_register_ipte_notifier(struct gmap_notifier *nb)
698{
699 spin_lock(&gmap_notifier_lock);
700 list_add(&nb->list, &gmap_notifier_list);
701 spin_unlock(&gmap_notifier_lock);
702}
703EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier);
704
705
706
707
708
709void gmap_unregister_ipte_notifier(struct gmap_notifier *nb)
710{
711 spin_lock(&gmap_notifier_lock);
712 list_del_init(&nb->list);
713 spin_unlock(&gmap_notifier_lock);
714}
715EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
716
717
718
719
720
721
722
723
724
725
726
727
728int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
729{
730 unsigned long addr;
731 spinlock_t *ptl;
732 pte_t *ptep, entry;
733 pgste_t pgste;
734 int rc = 0;
735
736 if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK))
737 return -EINVAL;
738 down_read(&gmap->mm->mmap_sem);
739 while (len) {
740
741 addr = __gmap_translate(gmap, gaddr);
742 if (IS_ERR_VALUE(addr)) {
743 rc = addr;
744 break;
745 }
746
747 if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE)) {
748 rc = -EFAULT;
749 break;
750 }
751 rc = __gmap_link(gmap, gaddr, addr);
752 if (rc)
753 break;
754
755 ptep = get_locked_pte(gmap->mm, addr, &ptl);
756 VM_BUG_ON(!ptep);
757
758 entry = *ptep;
759 if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) {
760 pgste = pgste_get_lock(ptep);
761 pgste_val(pgste) |= PGSTE_IN_BIT;
762 pgste_set_unlock(ptep, pgste);
763 gaddr += PAGE_SIZE;
764 len -= PAGE_SIZE;
765 }
766 pte_unmap_unlock(ptep, ptl);
767 }
768 up_read(&gmap->mm->mmap_sem);
769 return rc;
770}
771EXPORT_SYMBOL_GPL(gmap_ipte_notify);
772
773
774
775
776
777
778
779
780
781
782void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte)
783{
784 unsigned long offset, gaddr;
785 unsigned long *table;
786 struct gmap_notifier *nb;
787 struct gmap *gmap;
788
789 offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
790 offset = offset * (4096 / sizeof(pte_t));
791 spin_lock(&gmap_notifier_lock);
792 list_for_each_entry(gmap, &mm->context.gmap_list, list) {
793 table = radix_tree_lookup(&gmap->host_to_guest,
794 vmaddr >> PMD_SHIFT);
795 if (!table)
796 continue;
797 gaddr = __gmap_segment_gaddr(table) + offset;
798 list_for_each_entry(nb, &gmap_notifier_list, list)
799 nb->notifier_call(gmap, gaddr);
800 }
801 spin_unlock(&gmap_notifier_lock);
802}
803EXPORT_SYMBOL_GPL(gmap_do_ipte_notify);
804
805static inline int page_table_with_pgste(struct page *page)
806{
807 return atomic_read(&page->_mapcount) == 0;
808}
809
810static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
811{
812 struct page *page;
813 unsigned long *table;
814
815 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
816 if (!page)
817 return NULL;
818 if (!pgtable_page_ctor(page)) {
819 __free_page(page);
820 return NULL;
821 }
822 atomic_set(&page->_mapcount, 0);
823 table = (unsigned long *) page_to_phys(page);
824 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
825 clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
826 return table;
827}
828
829static inline void page_table_free_pgste(unsigned long *table)
830{
831 struct page *page;
832
833 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
834 pgtable_page_dtor(page);
835 atomic_set(&page->_mapcount, -1);
836 __free_page(page);
837}
838
839int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
840 unsigned long key, bool nq)
841{
842 spinlock_t *ptl;
843 pgste_t old, new;
844 pte_t *ptep;
845
846 down_read(&mm->mmap_sem);
847retry:
848 ptep = get_locked_pte(mm, addr, &ptl);
849 if (unlikely(!ptep)) {
850 up_read(&mm->mmap_sem);
851 return -EFAULT;
852 }
853 if (!(pte_val(*ptep) & _PAGE_INVALID) &&
854 (pte_val(*ptep) & _PAGE_PROTECT)) {
855 pte_unmap_unlock(ptep, ptl);
856 if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) {
857 up_read(&mm->mmap_sem);
858 return -EFAULT;
859 }
860 goto retry;
861 }
862
863 new = old = pgste_get_lock(ptep);
864 pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
865 PGSTE_ACC_BITS | PGSTE_FP_BIT);
866 pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
867 pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
868 if (!(pte_val(*ptep) & _PAGE_INVALID)) {
869 unsigned long address, bits, skey;
870
871 address = pte_val(*ptep) & PAGE_MASK;
872 skey = (unsigned long) page_get_storage_key(address);
873 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
874 skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
875
876 page_set_storage_key(address, skey, !nq);
877
878 pgste_val(new) |= bits << 52;
879 }
880
881 if ((pgste_val(new) ^ pgste_val(old)) &
882 (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
883 pgste_val(new) |= PGSTE_UC_BIT;
884
885 pgste_set_unlock(ptep, new);
886 pte_unmap_unlock(ptep, ptl);
887 up_read(&mm->mmap_sem);
888 return 0;
889}
890EXPORT_SYMBOL(set_guest_storage_key);
891
892unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
893{
894 spinlock_t *ptl;
895 pgste_t pgste;
896 pte_t *ptep;
897 uint64_t physaddr;
898 unsigned long key = 0;
899
900 down_read(&mm->mmap_sem);
901 ptep = get_locked_pte(mm, addr, &ptl);
902 if (unlikely(!ptep)) {
903 up_read(&mm->mmap_sem);
904 return -EFAULT;
905 }
906 pgste = pgste_get_lock(ptep);
907
908 if (pte_val(*ptep) & _PAGE_INVALID) {
909 key |= (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56;
910 key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56;
911 key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48;
912 key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48;
913 } else {
914 physaddr = pte_val(*ptep) & PAGE_MASK;
915 key = page_get_storage_key(physaddr);
916
917
918 if (pgste_val(pgste) & PGSTE_GR_BIT)
919 key |= _PAGE_REFERENCED;
920 if (pgste_val(pgste) & PGSTE_GC_BIT)
921 key |= _PAGE_CHANGED;
922 }
923
924 pgste_set_unlock(ptep, pgste);
925 pte_unmap_unlock(ptep, ptl);
926 up_read(&mm->mmap_sem);
927 return key;
928}
929EXPORT_SYMBOL(get_guest_storage_key);
930
931#else
932
933static inline int page_table_with_pgste(struct page *page)
934{
935 return 0;
936}
937
938static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
939{
940 return NULL;
941}
942
943static inline void page_table_free_pgste(unsigned long *table)
944{
945}
946
947static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table,
948 unsigned long vmaddr)
949{
950}
951
952#endif
953
954static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
955{
956 unsigned int old, new;
957
958 do {
959 old = atomic_read(v);
960 new = old ^ bits;
961 } while (atomic_cmpxchg(v, old, new) != old);
962 return new;
963}
964
965
966
967
968unsigned long *page_table_alloc(struct mm_struct *mm)
969{
970 unsigned long *uninitialized_var(table);
971 struct page *uninitialized_var(page);
972 unsigned int mask, bit;
973
974 if (mm_has_pgste(mm))
975 return page_table_alloc_pgste(mm);
976
977 spin_lock_bh(&mm->context.list_lock);
978 mask = FRAG_MASK;
979 if (!list_empty(&mm->context.pgtable_list)) {
980 page = list_first_entry(&mm->context.pgtable_list,
981 struct page, lru);
982 table = (unsigned long *) page_to_phys(page);
983 mask = atomic_read(&page->_mapcount);
984 mask = mask | (mask >> 4);
985 }
986 if ((mask & FRAG_MASK) == FRAG_MASK) {
987 spin_unlock_bh(&mm->context.list_lock);
988 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
989 if (!page)
990 return NULL;
991 if (!pgtable_page_ctor(page)) {
992 __free_page(page);
993 return NULL;
994 }
995 atomic_set(&page->_mapcount, 1);
996 table = (unsigned long *) page_to_phys(page);
997 clear_table(table, _PAGE_INVALID, PAGE_SIZE);
998 spin_lock_bh(&mm->context.list_lock);
999 list_add(&page->lru, &mm->context.pgtable_list);
1000 } else {
1001 for (bit = 1; mask & bit; bit <<= 1)
1002 table += PTRS_PER_PTE;
1003 mask = atomic_xor_bits(&page->_mapcount, bit);
1004 if ((mask & FRAG_MASK) == FRAG_MASK)
1005 list_del(&page->lru);
1006 }
1007 spin_unlock_bh(&mm->context.list_lock);
1008 return table;
1009}
1010
1011void page_table_free(struct mm_struct *mm, unsigned long *table)
1012{
1013 struct page *page;
1014 unsigned int bit, mask;
1015
1016 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
1017 if (page_table_with_pgste(page))
1018 return page_table_free_pgste(table);
1019
1020 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
1021 spin_lock_bh(&mm->context.list_lock);
1022 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
1023 list_del(&page->lru);
1024 mask = atomic_xor_bits(&page->_mapcount, bit);
1025 if (mask & FRAG_MASK)
1026 list_add(&page->lru, &mm->context.pgtable_list);
1027 spin_unlock_bh(&mm->context.list_lock);
1028 if (mask == 0) {
1029 pgtable_page_dtor(page);
1030 atomic_set(&page->_mapcount, -1);
1031 __free_page(page);
1032 }
1033}
1034
1035static void __page_table_free_rcu(void *table, unsigned bit)
1036{
1037 struct page *page;
1038
1039 if (bit == FRAG_MASK)
1040 return page_table_free_pgste(table);
1041
1042 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
1043 if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
1044 pgtable_page_dtor(page);
1045 atomic_set(&page->_mapcount, -1);
1046 __free_page(page);
1047 }
1048}
1049
1050void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
1051 unsigned long vmaddr)
1052{
1053 struct mm_struct *mm;
1054 struct page *page;
1055 unsigned int bit, mask;
1056
1057 mm = tlb->mm;
1058 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
1059 if (page_table_with_pgste(page)) {
1060 gmap_unlink(mm, table, vmaddr);
1061 table = (unsigned long *) (__pa(table) | FRAG_MASK);
1062 tlb_remove_table(tlb, table);
1063 return;
1064 }
1065 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
1066 spin_lock_bh(&mm->context.list_lock);
1067 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
1068 list_del(&page->lru);
1069 mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4));
1070 if (mask & FRAG_MASK)
1071 list_add_tail(&page->lru, &mm->context.pgtable_list);
1072 spin_unlock_bh(&mm->context.list_lock);
1073 table = (unsigned long *) (__pa(table) | (bit << 4));
1074 tlb_remove_table(tlb, table);
1075}
1076
1077static void __tlb_remove_table(void *_table)
1078{
1079 const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK;
1080 void *table = (void *)((unsigned long) _table & ~mask);
1081 unsigned type = (unsigned long) _table & mask;
1082
1083 if (type)
1084 __page_table_free_rcu(table, type);
1085 else
1086 free_pages((unsigned long) table, ALLOC_ORDER);
1087}
1088
1089static void tlb_remove_table_smp_sync(void *arg)
1090{
1091
1092}
1093
1094static void tlb_remove_table_one(void *table)
1095{
1096
1097
1098
1099
1100
1101
1102
1103 smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
1104 __tlb_remove_table(table);
1105}
1106
1107static void tlb_remove_table_rcu(struct rcu_head *head)
1108{
1109 struct mmu_table_batch *batch;
1110 int i;
1111
1112 batch = container_of(head, struct mmu_table_batch, rcu);
1113
1114 for (i = 0; i < batch->nr; i++)
1115 __tlb_remove_table(batch->tables[i]);
1116
1117 free_page((unsigned long)batch);
1118}
1119
1120void tlb_table_flush(struct mmu_gather *tlb)
1121{
1122 struct mmu_table_batch **batch = &tlb->batch;
1123
1124 if (*batch) {
1125 call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
1126 *batch = NULL;
1127 }
1128}
1129
1130void tlb_remove_table(struct mmu_gather *tlb, void *table)
1131{
1132 struct mmu_table_batch **batch = &tlb->batch;
1133
1134 tlb->mm->context.flush_mm = 1;
1135 if (*batch == NULL) {
1136 *batch = (struct mmu_table_batch *)
1137 __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
1138 if (*batch == NULL) {
1139 __tlb_flush_mm_lazy(tlb->mm);
1140 tlb_remove_table_one(table);
1141 return;
1142 }
1143 (*batch)->nr = 0;
1144 }
1145 (*batch)->tables[(*batch)->nr++] = table;
1146 if ((*batch)->nr == MAX_TABLE_BATCH)
1147 tlb_flush_mmu(tlb);
1148}
1149
1150#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1151static inline void thp_split_vma(struct vm_area_struct *vma)
1152{
1153 unsigned long addr;
1154
1155 for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE)
1156 follow_page(vma, addr, FOLL_SPLIT);
1157}
1158
1159static inline void thp_split_mm(struct mm_struct *mm)
1160{
1161 struct vm_area_struct *vma;
1162
1163 for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
1164 thp_split_vma(vma);
1165 vma->vm_flags &= ~VM_HUGEPAGE;
1166 vma->vm_flags |= VM_NOHUGEPAGE;
1167 }
1168 mm->def_flags |= VM_NOHUGEPAGE;
1169}
1170#else
1171static inline void thp_split_mm(struct mm_struct *mm)
1172{
1173}
1174#endif
1175
1176static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb,
1177 struct mm_struct *mm, pud_t *pud,
1178 unsigned long addr, unsigned long end)
1179{
1180 unsigned long next, *table, *new;
1181 struct page *page;
1182 spinlock_t *ptl;
1183 pmd_t *pmd;
1184
1185 pmd = pmd_offset(pud, addr);
1186 do {
1187 next = pmd_addr_end(addr, end);
1188again:
1189 if (pmd_none_or_clear_bad(pmd))
1190 continue;
1191 table = (unsigned long *) pmd_deref(*pmd);
1192 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
1193 if (page_table_with_pgste(page))
1194 continue;
1195
1196 new = page_table_alloc_pgste(mm);
1197 if (!new)
1198 return -ENOMEM;
1199
1200 ptl = pmd_lock(mm, pmd);
1201 if (likely((unsigned long *) pmd_deref(*pmd) == table)) {
1202
1203 pmdp_flush_lazy(mm, addr, pmd);
1204 pmd_clear(pmd);
1205
1206 memcpy(new, table, PAGE_SIZE/2);
1207 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
1208
1209 pmd_populate(mm, pmd, (pte_t *) new);
1210
1211 page_table_free_rcu(tlb, table, addr);
1212 new = NULL;
1213 }
1214 spin_unlock(ptl);
1215 if (new) {
1216 page_table_free_pgste(new);
1217 goto again;
1218 }
1219 } while (pmd++, addr = next, addr != end);
1220
1221 return addr;
1222}
1223
1224static unsigned long page_table_realloc_pud(struct mmu_gather *tlb,
1225 struct mm_struct *mm, pgd_t *pgd,
1226 unsigned long addr, unsigned long end)
1227{
1228 unsigned long next;
1229 pud_t *pud;
1230
1231 pud = pud_offset(pgd, addr);
1232 do {
1233 next = pud_addr_end(addr, end);
1234 if (pud_none_or_clear_bad(pud))
1235 continue;
1236 next = page_table_realloc_pmd(tlb, mm, pud, addr, next);
1237 if (unlikely(IS_ERR_VALUE(next)))
1238 return next;
1239 } while (pud++, addr = next, addr != end);
1240
1241 return addr;
1242}
1243
1244static unsigned long page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm,
1245 unsigned long addr, unsigned long end)
1246{
1247 unsigned long next;
1248 pgd_t *pgd;
1249
1250 pgd = pgd_offset(mm, addr);
1251 do {
1252 next = pgd_addr_end(addr, end);
1253 if (pgd_none_or_clear_bad(pgd))
1254 continue;
1255 next = page_table_realloc_pud(tlb, mm, pgd, addr, next);
1256 if (unlikely(IS_ERR_VALUE(next)))
1257 return next;
1258 } while (pgd++, addr = next, addr != end);
1259
1260 return 0;
1261}
1262
1263
1264
1265
1266int s390_enable_sie(void)
1267{
1268 struct task_struct *tsk = current;
1269 struct mm_struct *mm = tsk->mm;
1270 struct mmu_gather tlb;
1271
1272
1273 if (mm_has_pgste(tsk->mm))
1274 return 0;
1275
1276 down_write(&mm->mmap_sem);
1277
1278 thp_split_mm(mm);
1279
1280 tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE);
1281 if (!page_table_realloc(&tlb, mm, 0, TASK_SIZE))
1282 mm->context.has_pgste = 1;
1283 tlb_finish_mmu(&tlb, 0, TASK_SIZE);
1284 up_write(&mm->mmap_sem);
1285 return mm->context.has_pgste ? 0 : -ENOMEM;
1286}
1287EXPORT_SYMBOL_GPL(s390_enable_sie);
1288
1289
1290
1291
1292
1293static int __s390_enable_skey(pte_t *pte, unsigned long addr,
1294 unsigned long next, struct mm_walk *walk)
1295{
1296 unsigned long ptev;
1297 pgste_t pgste;
1298
1299 pgste = pgste_get_lock(pte);
1300
1301
1302
1303
1304
1305 if (is_zero_pfn(pte_pfn(*pte))) {
1306 ptep_flush_direct(walk->mm, addr, pte);
1307 pte_val(*pte) = _PAGE_INVALID;
1308 }
1309
1310 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
1311 PGSTE_GR_BIT | PGSTE_GC_BIT);
1312 ptev = pte_val(*pte);
1313 if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
1314 page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
1315 pgste_set_unlock(pte, pgste);
1316 return 0;
1317}
1318
1319int s390_enable_skey(void)
1320{
1321 struct mm_walk walk = { .pte_entry = __s390_enable_skey };
1322 struct mm_struct *mm = current->mm;
1323 struct vm_area_struct *vma;
1324 int rc = 0;
1325
1326 down_write(&mm->mmap_sem);
1327 if (mm_use_skey(mm))
1328 goto out_up;
1329
1330 mm->context.use_skey = 1;
1331 for (vma = mm->mmap; vma; vma = vma->vm_next) {
1332 if (ksm_madvise(vma, vma->vm_start, vma->vm_end,
1333 MADV_UNMERGEABLE, &vma->vm_flags)) {
1334 mm->context.use_skey = 0;
1335 rc = -ENOMEM;
1336 goto out_up;
1337 }
1338 }
1339 mm->def_flags &= ~VM_MERGEABLE;
1340
1341 walk.mm = mm;
1342 walk_page_range(0, TASK_SIZE, &walk);
1343
1344out_up:
1345 up_write(&mm->mmap_sem);
1346 return rc;
1347}
1348EXPORT_SYMBOL_GPL(s390_enable_skey);
1349
1350
1351
1352
1353static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
1354 unsigned long next, struct mm_walk *walk)
1355{
1356 pgste_t pgste;
1357
1358 pgste = pgste_get_lock(pte);
1359 pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
1360 pgste_set_unlock(pte, pgste);
1361 return 0;
1362}
1363
1364void s390_reset_cmma(struct mm_struct *mm)
1365{
1366 struct mm_walk walk = { .pte_entry = __s390_reset_cmma };
1367
1368 down_write(&mm->mmap_sem);
1369 walk.mm = mm;
1370 walk_page_range(0, TASK_SIZE, &walk);
1371 up_write(&mm->mmap_sem);
1372}
1373EXPORT_SYMBOL_GPL(s390_reset_cmma);
1374
1375
1376
1377
1378bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *gmap)
1379{
1380 pte_t *pte;
1381 spinlock_t *ptl;
1382 bool dirty = false;
1383
1384 pte = get_locked_pte(gmap->mm, address, &ptl);
1385 if (unlikely(!pte))
1386 return false;
1387
1388 if (ptep_test_and_clear_user_dirty(gmap->mm, address, pte))
1389 dirty = true;
1390
1391 spin_unlock(ptl);
1392 return dirty;
1393}
1394EXPORT_SYMBOL_GPL(gmap_test_and_clear_dirty);
1395
1396#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1397int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address,
1398 pmd_t *pmdp)
1399{
1400 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
1401
1402
1403 return pmdp_test_and_clear_young(vma, address, pmdp);
1404}
1405
1406int pmdp_set_access_flags(struct vm_area_struct *vma,
1407 unsigned long address, pmd_t *pmdp,
1408 pmd_t entry, int dirty)
1409{
1410 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
1411
1412 entry = pmd_mkyoung(entry);
1413 if (dirty)
1414 entry = pmd_mkdirty(entry);
1415 if (pmd_same(*pmdp, entry))
1416 return 0;
1417 pmdp_invalidate(vma, address, pmdp);
1418 set_pmd_at(vma->vm_mm, address, pmdp, entry);
1419 return 1;
1420}
1421
1422static void pmdp_splitting_flush_sync(void *arg)
1423{
1424
1425}
1426
1427void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
1428 pmd_t *pmdp)
1429{
1430 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
1431 if (!test_and_set_bit(_SEGMENT_ENTRY_SPLIT_BIT,
1432 (unsigned long *) pmdp)) {
1433
1434 smp_call_function(pmdp_splitting_flush_sync, NULL, 1);
1435 }
1436}
1437
1438void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
1439 pgtable_t pgtable)
1440{
1441 struct list_head *lh = (struct list_head *) pgtable;
1442
1443 assert_spin_locked(pmd_lockptr(mm, pmdp));
1444
1445
1446 if (!pmd_huge_pte(mm, pmdp))
1447 INIT_LIST_HEAD(lh);
1448 else
1449 list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
1450 pmd_huge_pte(mm, pmdp) = pgtable;
1451}
1452
1453pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
1454{
1455 struct list_head *lh;
1456 pgtable_t pgtable;
1457 pte_t *ptep;
1458
1459 assert_spin_locked(pmd_lockptr(mm, pmdp));
1460
1461
1462 pgtable = pmd_huge_pte(mm, pmdp);
1463 lh = (struct list_head *) pgtable;
1464 if (list_empty(lh))
1465 pmd_huge_pte(mm, pmdp) = NULL;
1466 else {
1467 pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
1468 list_del(lh);
1469 }
1470 ptep = (pte_t *) pgtable;
1471 pte_val(*ptep) = _PAGE_INVALID;
1472 ptep++;
1473 pte_val(*ptep) = _PAGE_INVALID;
1474 return pgtable;
1475}
1476#endif
1477