1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#undef DEBUG
18#undef DEBUG_LOW
19
20#define pr_fmt(fmt) "hash-mmu: " fmt
21#include <linux/spinlock.h>
22#include <linux/errno.h>
23#include <linux/sched/mm.h>
24#include <linux/proc_fs.h>
25#include <linux/stat.h>
26#include <linux/sysctl.h>
27#include <linux/export.h>
28#include <linux/ctype.h>
29#include <linux/cache.h>
30#include <linux/init.h>
31#include <linux/signal.h>
32#include <linux/memblock.h>
33#include <linux/context_tracking.h>
34#include <linux/libfdt.h>
35#include <linux/pkeys.h>
36#include <linux/hugetlb.h>
37#include <linux/cpu.h>
38#include <linux/pgtable.h>
39#include <linux/debugfs.h>
40
41#include <asm/interrupt.h>
42#include <asm/processor.h>
43#include <asm/mmu.h>
44#include <asm/mmu_context.h>
45#include <asm/page.h>
46#include <asm/types.h>
47#include <linux/uaccess.h>
48#include <asm/machdep.h>
49#include <asm/prom.h>
50#include <asm/io.h>
51#include <asm/eeh.h>
52#include <asm/tlb.h>
53#include <asm/cacheflush.h>
54#include <asm/cputable.h>
55#include <asm/sections.h>
56#include <asm/copro.h>
57#include <asm/udbg.h>
58#include <asm/code-patching.h>
59#include <asm/fadump.h>
60#include <asm/firmware.h>
61#include <asm/tm.h>
62#include <asm/trace.h>
63#include <asm/ps3.h>
64#include <asm/pte-walk.h>
65#include <asm/asm-prototypes.h>
66#include <asm/ultravisor.h>
67
68#include <mm/mmu_decl.h>
69
70#include "internal.h"
71
72
73#ifdef DEBUG
74#define DBG(fmt...) udbg_printf(fmt)
75#else
76#define DBG(fmt...)
77#endif
78
79#ifdef DEBUG_LOW
80#define DBG_LOW(fmt...) udbg_printf(fmt)
81#else
82#define DBG_LOW(fmt...)
83#endif
84
85#define KB (1024)
86#define MB (1024*KB)
87#define GB (1024L*MB)
88
89
90
91
92
93
94
95
96
97
98
99
100
101static unsigned long _SDR1;
102struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
103EXPORT_SYMBOL_GPL(mmu_psize_defs);
104
105u8 hpte_page_sizes[1 << LP_BITS];
106EXPORT_SYMBOL_GPL(hpte_page_sizes);
107
108struct hash_pte *htab_address;
109unsigned long htab_size_bytes;
110unsigned long htab_hash_mask;
111EXPORT_SYMBOL_GPL(htab_hash_mask);
112int mmu_linear_psize = MMU_PAGE_4K;
113EXPORT_SYMBOL_GPL(mmu_linear_psize);
114int mmu_virtual_psize = MMU_PAGE_4K;
115int mmu_vmalloc_psize = MMU_PAGE_4K;
116EXPORT_SYMBOL_GPL(mmu_vmalloc_psize);
117#ifdef CONFIG_SPARSEMEM_VMEMMAP
118int mmu_vmemmap_psize = MMU_PAGE_4K;
119#endif
120int mmu_io_psize = MMU_PAGE_4K;
121int mmu_kernel_ssize = MMU_SEGSIZE_256M;
122EXPORT_SYMBOL_GPL(mmu_kernel_ssize);
123int mmu_highuser_ssize = MMU_SEGSIZE_256M;
124u16 mmu_slb_size = 64;
125EXPORT_SYMBOL_GPL(mmu_slb_size);
126#ifdef CONFIG_PPC_64K_PAGES
127int mmu_ci_restrictions;
128#endif
129#ifdef CONFIG_DEBUG_PAGEALLOC
130static u8 *linear_map_hash_slots;
131static unsigned long linear_map_hash_count;
132static DEFINE_SPINLOCK(linear_map_hash_lock);
133#endif
134struct mmu_hash_ops mmu_hash_ops;
135EXPORT_SYMBOL(mmu_hash_ops);
136
137
138
139
140
141
142
143
144
145static struct mmu_psize_def mmu_psize_defaults[] = {
146 [MMU_PAGE_4K] = {
147 .shift = 12,
148 .sllp = 0,
149 .penc = {[MMU_PAGE_4K] = 0, [1 ... MMU_PAGE_COUNT - 1] = -1},
150 .avpnm = 0,
151 .tlbiel = 0,
152 },
153};
154
155
156
157
158
159
160static struct mmu_psize_def mmu_psize_defaults_gp[] = {
161 [MMU_PAGE_4K] = {
162 .shift = 12,
163 .sllp = 0,
164 .penc = {[MMU_PAGE_4K] = 0, [1 ... MMU_PAGE_COUNT - 1] = -1},
165 .avpnm = 0,
166 .tlbiel = 1,
167 },
168 [MMU_PAGE_16M] = {
169 .shift = 24,
170 .sllp = SLB_VSID_L,
171 .penc = {[0 ... MMU_PAGE_16M - 1] = -1, [MMU_PAGE_16M] = 0,
172 [MMU_PAGE_16M + 1 ... MMU_PAGE_COUNT - 1] = -1 },
173 .avpnm = 0x1UL,
174 .tlbiel = 0,
175 },
176};
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191unsigned long htab_convert_pte_flags(unsigned long pteflags, unsigned long flags)
192{
193 unsigned long rflags = 0;
194
195
196 if ((pteflags & _PAGE_EXEC) == 0)
197 rflags |= HPTE_R_N;
198
199
200
201
202
203
204
205 if (pteflags & _PAGE_PRIVILEGED) {
206
207
208
209 if (!(pteflags & _PAGE_WRITE)) {
210 if (mmu_has_feature(MMU_FTR_KERNEL_RO))
211 rflags |= (HPTE_R_PP0 | 0x2);
212 else
213 rflags |= 0x3;
214 }
215 } else {
216 if (pteflags & _PAGE_RWX)
217 rflags |= 0x2;
218 if (!((pteflags & _PAGE_WRITE) && (pteflags & _PAGE_DIRTY)))
219 rflags |= 0x1;
220 }
221
222
223
224
225 rflags |= HPTE_R_R;
226
227 if (pteflags & _PAGE_DIRTY)
228 rflags |= HPTE_R_C;
229
230
231
232
233 if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_TOLERANT)
234 rflags |= HPTE_R_I;
235 else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT)
236 rflags |= (HPTE_R_I | HPTE_R_G);
237 else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO)
238 rflags |= (HPTE_R_W | HPTE_R_I | HPTE_R_M);
239 else
240
241
242
243 rflags |= HPTE_R_M;
244
245 rflags |= pte_to_hpte_pkey_bits(pteflags, flags);
246 return rflags;
247}
248
249int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
250 unsigned long pstart, unsigned long prot,
251 int psize, int ssize)
252{
253 unsigned long vaddr, paddr;
254 unsigned int step, shift;
255 int ret = 0;
256
257 shift = mmu_psize_defs[psize].shift;
258 step = 1 << shift;
259
260 prot = htab_convert_pte_flags(prot, HPTE_USE_KERNEL_KEY);
261
262 DBG("htab_bolt_mapping(%lx..%lx -> %lx (%lx,%d,%d)\n",
263 vstart, vend, pstart, prot, psize, ssize);
264
265
266 vaddr = ALIGN(vstart, step);
267 paddr = ALIGN(pstart, step);
268 vend = ALIGN_DOWN(vend, step);
269
270 for (; vaddr < vend; vaddr += step, paddr += step) {
271 unsigned long hash, hpteg;
272 unsigned long vsid = get_kernel_vsid(vaddr, ssize);
273 unsigned long vpn = hpt_vpn(vaddr, vsid, ssize);
274 unsigned long tprot = prot;
275 bool secondary_hash = false;
276
277
278
279
280 if (!vsid)
281 return -1;
282
283 if (overlaps_kernel_text(vaddr, vaddr + step))
284 tprot &= ~HPTE_R_N;
285
286
287
288
289
290
291
292
293
294
295
296 if ((PHYSICAL_START > MEMORY_START) &&
297 overlaps_interrupt_vector_text(vaddr, vaddr + step))
298 tprot &= ~HPTE_R_N;
299
300 hash = hpt_hash(vpn, shift, ssize);
301 hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
302
303 BUG_ON(!mmu_hash_ops.hpte_insert);
304repeat:
305 ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot,
306 HPTE_V_BOLTED, psize, psize,
307 ssize);
308 if (ret == -1) {
309
310
311
312
313 ret = mmu_hash_ops.hpte_remove(hpteg);
314 if (ret != -1)
315 ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot,
316 HPTE_V_BOLTED, psize, psize,
317 ssize);
318 if (ret == -1 && !secondary_hash) {
319 secondary_hash = true;
320 hpteg = ((~hash & htab_hash_mask) * HPTES_PER_GROUP);
321 goto repeat;
322 }
323 }
324
325 if (ret < 0)
326 break;
327
328 cond_resched();
329#ifdef CONFIG_DEBUG_PAGEALLOC
330 if (debug_pagealloc_enabled() &&
331 (paddr >> PAGE_SHIFT) < linear_map_hash_count)
332 linear_map_hash_slots[paddr >> PAGE_SHIFT] = ret | 0x80;
333#endif
334 }
335 return ret < 0 ? ret : 0;
336}
337
338int htab_remove_mapping(unsigned long vstart, unsigned long vend,
339 int psize, int ssize)
340{
341 unsigned long vaddr, time_limit;
342 unsigned int step, shift;
343 int rc;
344 int ret = 0;
345
346 shift = mmu_psize_defs[psize].shift;
347 step = 1 << shift;
348
349 if (!mmu_hash_ops.hpte_removebolted)
350 return -ENODEV;
351
352
353 vaddr = ALIGN_DOWN(vstart, step);
354 time_limit = jiffies + HZ;
355
356 for (;vaddr < vend; vaddr += step) {
357 rc = mmu_hash_ops.hpte_removebolted(vaddr, psize, ssize);
358
359
360
361
362
363 if (time_after(jiffies, time_limit)) {
364 cond_resched();
365 time_limit = jiffies + HZ;
366 }
367 if (rc == -ENOENT) {
368 ret = -ENOENT;
369 continue;
370 }
371 if (rc < 0)
372 return rc;
373 }
374
375 return ret;
376}
377
378static bool disable_1tb_segments = false;
379
380static int __init parse_disable_1tb_segments(char *p)
381{
382 disable_1tb_segments = true;
383 return 0;
384}
385early_param("disable_1tb_segments", parse_disable_1tb_segments);
386
387static int __init htab_dt_scan_seg_sizes(unsigned long node,
388 const char *uname, int depth,
389 void *data)
390{
391 const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
392 const __be32 *prop;
393 int size = 0;
394
395
396 if (type == NULL || strcmp(type, "cpu") != 0)
397 return 0;
398
399 prop = of_get_flat_dt_prop(node, "ibm,processor-segment-sizes", &size);
400 if (prop == NULL)
401 return 0;
402 for (; size >= 4; size -= 4, ++prop) {
403 if (be32_to_cpu(prop[0]) == 40) {
404 DBG("1T segment support detected\n");
405
406 if (disable_1tb_segments) {
407 DBG("1T segments disabled by command line\n");
408 break;
409 }
410
411 cur_cpu_spec->mmu_features |= MMU_FTR_1T_SEGMENT;
412 return 1;
413 }
414 }
415 cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B;
416 return 0;
417}
418
419static int __init get_idx_from_shift(unsigned int shift)
420{
421 int idx = -1;
422
423 switch (shift) {
424 case 0xc:
425 idx = MMU_PAGE_4K;
426 break;
427 case 0x10:
428 idx = MMU_PAGE_64K;
429 break;
430 case 0x14:
431 idx = MMU_PAGE_1M;
432 break;
433 case 0x18:
434 idx = MMU_PAGE_16M;
435 break;
436 case 0x22:
437 idx = MMU_PAGE_16G;
438 break;
439 }
440 return idx;
441}
442
443static int __init htab_dt_scan_page_sizes(unsigned long node,
444 const char *uname, int depth,
445 void *data)
446{
447 const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
448 const __be32 *prop;
449 int size = 0;
450
451
452 if (type == NULL || strcmp(type, "cpu") != 0)
453 return 0;
454
455 prop = of_get_flat_dt_prop(node, "ibm,segment-page-sizes", &size);
456 if (!prop)
457 return 0;
458
459 pr_info("Page sizes from device-tree:\n");
460 size /= 4;
461 cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE);
462 while(size > 0) {
463 unsigned int base_shift = be32_to_cpu(prop[0]);
464 unsigned int slbenc = be32_to_cpu(prop[1]);
465 unsigned int lpnum = be32_to_cpu(prop[2]);
466 struct mmu_psize_def *def;
467 int idx, base_idx;
468
469 size -= 3; prop += 3;
470 base_idx = get_idx_from_shift(base_shift);
471 if (base_idx < 0) {
472
473 prop += lpnum * 2; size -= lpnum * 2;
474 continue;
475 }
476 def = &mmu_psize_defs[base_idx];
477 if (base_idx == MMU_PAGE_16M)
478 cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE;
479
480 def->shift = base_shift;
481 if (base_shift <= 23)
482 def->avpnm = 0;
483 else
484 def->avpnm = (1 << (base_shift - 23)) - 1;
485 def->sllp = slbenc;
486
487
488
489
490 if (base_idx == MMU_PAGE_4K || base_idx == MMU_PAGE_64K)
491 def->tlbiel = 1;
492 else
493 def->tlbiel = 0;
494
495 while (size > 0 && lpnum) {
496 unsigned int shift = be32_to_cpu(prop[0]);
497 int penc = be32_to_cpu(prop[1]);
498
499 prop += 2; size -= 2;
500 lpnum--;
501
502 idx = get_idx_from_shift(shift);
503 if (idx < 0)
504 continue;
505
506 if (penc == -1)
507 pr_err("Invalid penc for base_shift=%d "
508 "shift=%d\n", base_shift, shift);
509
510 def->penc[idx] = penc;
511 pr_info("base_shift=%d: shift=%d, sllp=0x%04lx,"
512 " avpnm=0x%08lx, tlbiel=%d, penc=%d\n",
513 base_shift, shift, def->sllp,
514 def->avpnm, def->tlbiel, def->penc[idx]);
515 }
516 }
517
518 return 1;
519}
520
521#ifdef CONFIG_HUGETLB_PAGE
522
523
524
525
526static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
527 const char *uname, int depth,
528 void *data) {
529 const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
530 const __be64 *addr_prop;
531 const __be32 *page_count_prop;
532 unsigned int expected_pages;
533 long unsigned int phys_addr;
534 long unsigned int block_size;
535
536
537 if (type == NULL || strcmp(type, "memory") != 0)
538 return 0;
539
540
541
542
543
544 page_count_prop = of_get_flat_dt_prop(node, "ibm,expected#pages", NULL);
545 if (page_count_prop == NULL)
546 return 0;
547 expected_pages = (1 << be32_to_cpu(page_count_prop[0]));
548 addr_prop = of_get_flat_dt_prop(node, "reg", NULL);
549 if (addr_prop == NULL)
550 return 0;
551 phys_addr = be64_to_cpu(addr_prop[0]);
552 block_size = be64_to_cpu(addr_prop[1]);
553 if (block_size != (16 * GB))
554 return 0;
555 printk(KERN_INFO "Huge page(16GB) memory: "
556 "addr = 0x%lX size = 0x%lX pages = %d\n",
557 phys_addr, block_size, expected_pages);
558 if (phys_addr + block_size * expected_pages <= memblock_end_of_DRAM()) {
559 memblock_reserve(phys_addr, block_size * expected_pages);
560 pseries_add_gpage(phys_addr, block_size, expected_pages);
561 }
562 return 0;
563}
564#endif
565
566static void mmu_psize_set_default_penc(void)
567{
568 int bpsize, apsize;
569 for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++)
570 for (apsize = 0; apsize < MMU_PAGE_COUNT; apsize++)
571 mmu_psize_defs[bpsize].penc[apsize] = -1;
572}
573
574#ifdef CONFIG_PPC_64K_PAGES
575
576static bool might_have_hea(void)
577{
578
579
580
581
582
583#ifdef CONFIG_IBMEBUS
584 return !cpu_has_feature(CPU_FTR_ARCH_207S) &&
585 firmware_has_feature(FW_FEATURE_SPLPAR);
586#else
587 return false;
588#endif
589}
590
591#endif
592
593static void __init htab_scan_page_sizes(void)
594{
595 int rc;
596
597
598 mmu_psize_set_default_penc();
599
600
601 memcpy(mmu_psize_defs, mmu_psize_defaults,
602 sizeof(mmu_psize_defaults));
603
604
605
606
607 rc = of_scan_flat_dt(htab_dt_scan_page_sizes, NULL);
608 if (rc == 0 && early_mmu_has_feature(MMU_FTR_16M_PAGE)) {
609
610
611
612
613 memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
614 sizeof(mmu_psize_defaults_gp));
615 }
616
617#ifdef CONFIG_HUGETLB_PAGE
618 if (!hugetlb_disabled && !early_radix_enabled() ) {
619
620 of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL);
621 }
622#endif
623}
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647static void init_hpte_page_sizes(void)
648{
649 long int ap, bp;
650 long int shift, penc;
651
652 for (bp = 0; bp < MMU_PAGE_COUNT; ++bp) {
653 if (!mmu_psize_defs[bp].shift)
654 continue;
655 for (ap = bp; ap < MMU_PAGE_COUNT; ++ap) {
656 penc = mmu_psize_defs[bp].penc[ap];
657 if (penc == -1 || !mmu_psize_defs[ap].shift)
658 continue;
659 shift = mmu_psize_defs[ap].shift - LP_SHIFT;
660 if (shift <= 0)
661 continue;
662
663
664
665
666
667 while (penc < (1 << LP_BITS)) {
668 hpte_page_sizes[penc] = (ap << 4) | bp;
669 penc += 1 << shift;
670 }
671 }
672 }
673}
674
675static void __init htab_init_page_sizes(void)
676{
677 bool aligned = true;
678 init_hpte_page_sizes();
679
680 if (!debug_pagealloc_enabled()) {
681
682
683
684
685 if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) &&
686 (unsigned long)_stext % 0x1000000) {
687 if (mmu_psize_defs[MMU_PAGE_16M].shift)
688 pr_warn("Kernel not 16M aligned, disabling 16M linear map alignment\n");
689 aligned = false;
690 }
691
692 if (mmu_psize_defs[MMU_PAGE_16M].shift && aligned)
693 mmu_linear_psize = MMU_PAGE_16M;
694 else if (mmu_psize_defs[MMU_PAGE_1M].shift)
695 mmu_linear_psize = MMU_PAGE_1M;
696 }
697
698#ifdef CONFIG_PPC_64K_PAGES
699
700
701
702
703
704
705
706
707
708 if (mmu_psize_defs[MMU_PAGE_64K].shift) {
709 mmu_virtual_psize = MMU_PAGE_64K;
710 mmu_vmalloc_psize = MMU_PAGE_64K;
711 if (mmu_linear_psize == MMU_PAGE_4K)
712 mmu_linear_psize = MMU_PAGE_64K;
713 if (mmu_has_feature(MMU_FTR_CI_LARGE_PAGE)) {
714
715
716
717
718
719 if (!might_have_hea())
720 mmu_io_psize = MMU_PAGE_64K;
721 } else
722 mmu_ci_restrictions = 1;
723 }
724#endif
725
726#ifdef CONFIG_SPARSEMEM_VMEMMAP
727
728
729
730
731 if (mmu_psize_defs[MMU_PAGE_16M].shift &&
732 memblock_phys_mem_size() >= 0x40000000)
733 mmu_vmemmap_psize = MMU_PAGE_16M;
734 else
735 mmu_vmemmap_psize = mmu_virtual_psize;
736#endif
737
738 printk(KERN_DEBUG "Page orders: linear mapping = %d, "
739 "virtual = %d, io = %d"
740#ifdef CONFIG_SPARSEMEM_VMEMMAP
741 ", vmemmap = %d"
742#endif
743 "\n",
744 mmu_psize_defs[mmu_linear_psize].shift,
745 mmu_psize_defs[mmu_virtual_psize].shift,
746 mmu_psize_defs[mmu_io_psize].shift
747#ifdef CONFIG_SPARSEMEM_VMEMMAP
748 ,mmu_psize_defs[mmu_vmemmap_psize].shift
749#endif
750 );
751}
752
753static int __init htab_dt_scan_pftsize(unsigned long node,
754 const char *uname, int depth,
755 void *data)
756{
757 const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
758 const __be32 *prop;
759
760
761 if (type == NULL || strcmp(type, "cpu") != 0)
762 return 0;
763
764 prop = of_get_flat_dt_prop(node, "ibm,pft-size", NULL);
765 if (prop != NULL) {
766
767 ppc64_pft_size = be32_to_cpu(prop[1]);
768 return 1;
769 }
770 return 0;
771}
772
773unsigned htab_shift_for_mem_size(unsigned long mem_size)
774{
775 unsigned memshift = __ilog2(mem_size);
776 unsigned pshift = mmu_psize_defs[mmu_virtual_psize].shift;
777 unsigned pteg_shift;
778
779
780 if ((1UL << memshift) < mem_size)
781 memshift += 1;
782
783
784 pteg_shift = memshift - (pshift + 1);
785
786
787
788
789
790 return max(pteg_shift + 7, 18U);
791}
792
793static unsigned long __init htab_get_table_size(void)
794{
795
796
797
798
799
800 if (ppc64_pft_size == 0)
801 of_scan_flat_dt(htab_dt_scan_pftsize, NULL);
802 if (ppc64_pft_size)
803 return 1UL << ppc64_pft_size;
804
805 return 1UL << htab_shift_for_mem_size(memblock_phys_mem_size());
806}
807
808#ifdef CONFIG_MEMORY_HOTPLUG
809static int resize_hpt_for_hotplug(unsigned long new_mem_size)
810{
811 unsigned target_hpt_shift;
812
813 if (!mmu_hash_ops.resize_hpt)
814 return 0;
815
816 target_hpt_shift = htab_shift_for_mem_size(new_mem_size);
817
818
819
820
821
822
823
824
825
826 if (target_hpt_shift > ppc64_pft_size ||
827 target_hpt_shift < ppc64_pft_size - 1)
828 return mmu_hash_ops.resize_hpt(target_hpt_shift);
829
830 return 0;
831}
832
833int hash__create_section_mapping(unsigned long start, unsigned long end,
834 int nid, pgprot_t prot)
835{
836 int rc;
837
838 if (end >= H_VMALLOC_START) {
839 pr_warn("Outside the supported range\n");
840 return -1;
841 }
842
843 resize_hpt_for_hotplug(memblock_phys_mem_size());
844
845 rc = htab_bolt_mapping(start, end, __pa(start),
846 pgprot_val(prot), mmu_linear_psize,
847 mmu_kernel_ssize);
848
849 if (rc < 0) {
850 int rc2 = htab_remove_mapping(start, end, mmu_linear_psize,
851 mmu_kernel_ssize);
852 BUG_ON(rc2 && (rc2 != -ENOENT));
853 }
854 return rc;
855}
856
857int hash__remove_section_mapping(unsigned long start, unsigned long end)
858{
859 int rc = htab_remove_mapping(start, end, mmu_linear_psize,
860 mmu_kernel_ssize);
861
862 if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC)
863 pr_warn("Hash collision while resizing HPT\n");
864
865 return rc;
866}
867#endif
868
869static void __init hash_init_partition_table(phys_addr_t hash_table,
870 unsigned long htab_size)
871{
872 mmu_partition_table_init();
873
874
875
876
877
878 htab_size = __ilog2(htab_size) - 18;
879 mmu_partition_table_set_entry(0, hash_table | htab_size, 0, false);
880 pr_info("Partition table %p\n", partition_tb);
881}
882
883static void __init htab_initialize(void)
884{
885 unsigned long table;
886 unsigned long pteg_count;
887 unsigned long prot;
888 phys_addr_t base = 0, size = 0, end;
889 u64 i;
890
891 DBG(" -> htab_initialize()\n");
892
893 if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
894 mmu_kernel_ssize = MMU_SEGSIZE_1T;
895 mmu_highuser_ssize = MMU_SEGSIZE_1T;
896 printk(KERN_INFO "Using 1TB segments\n");
897 }
898
899 if (stress_slb_enabled)
900 static_branch_enable(&stress_slb_key);
901
902
903
904
905
906 htab_size_bytes = htab_get_table_size();
907 pteg_count = htab_size_bytes >> 7;
908
909 htab_hash_mask = pteg_count - 1;
910
911 if (firmware_has_feature(FW_FEATURE_LPAR) ||
912 firmware_has_feature(FW_FEATURE_PS3_LV1)) {
913
914 htab_address = NULL;
915 _SDR1 = 0;
916#ifdef CONFIG_FA_DUMP
917
918
919
920
921
922
923 if (is_fadump_active() && mmu_hash_ops.hpte_clear_all)
924 mmu_hash_ops.hpte_clear_all();
925#endif
926 } else {
927 unsigned long limit = MEMBLOCK_ALLOC_ANYWHERE;
928
929#ifdef CONFIG_PPC_CELL
930
931
932
933
934
935 if (fdt_subnode_offset(initial_boot_params, 0, "axon") > 0) {
936 limit = 0x80000000;
937 pr_info("Hash table forced below 2G for Axon IOMMU\n");
938 }
939#endif
940
941 table = memblock_phys_alloc_range(htab_size_bytes,
942 htab_size_bytes,
943 0, limit);
944 if (!table)
945 panic("ERROR: Failed to allocate %pa bytes below %pa\n",
946 &htab_size_bytes, &limit);
947
948 DBG("Hash table allocated at %lx, size: %lx\n", table,
949 htab_size_bytes);
950
951 htab_address = __va(table);
952
953
954 _SDR1 = table + __ilog2(htab_size_bytes) - 18;
955
956
957 memset((void *)table, 0, htab_size_bytes);
958
959 if (!cpu_has_feature(CPU_FTR_ARCH_300))
960
961 mtspr(SPRN_SDR1, _SDR1);
962 else
963 hash_init_partition_table(table, htab_size_bytes);
964 }
965
966 prot = pgprot_val(PAGE_KERNEL);
967
968#ifdef CONFIG_DEBUG_PAGEALLOC
969 if (debug_pagealloc_enabled()) {
970 linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT;
971 linear_map_hash_slots = memblock_alloc_try_nid(
972 linear_map_hash_count, 1, MEMBLOCK_LOW_LIMIT,
973 ppc64_rma_size, NUMA_NO_NODE);
974 if (!linear_map_hash_slots)
975 panic("%s: Failed to allocate %lu bytes max_addr=%pa\n",
976 __func__, linear_map_hash_count, &ppc64_rma_size);
977 }
978#endif
979
980
981 for_each_mem_range(i, &base, &end) {
982 size = end - base;
983 base = (unsigned long)__va(base);
984
985 DBG("creating mapping for region: %lx..%lx (prot: %lx)\n",
986 base, size, prot);
987
988 if ((base + size) >= H_VMALLOC_START) {
989 pr_warn("Outside the supported range\n");
990 continue;
991 }
992
993 BUG_ON(htab_bolt_mapping(base, base + size, __pa(base),
994 prot, mmu_linear_psize, mmu_kernel_ssize));
995 }
996 memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
997
998
999
1000
1001
1002
1003
1004
1005 if (tce_alloc_start) {
1006 tce_alloc_start = (unsigned long)__va(tce_alloc_start);
1007 tce_alloc_end = (unsigned long)__va(tce_alloc_end);
1008
1009 if (base + size >= tce_alloc_start)
1010 tce_alloc_start = base + size + 1;
1011
1012 BUG_ON(htab_bolt_mapping(tce_alloc_start, tce_alloc_end,
1013 __pa(tce_alloc_start), prot,
1014 mmu_linear_psize, mmu_kernel_ssize));
1015 }
1016
1017
1018 DBG(" <- htab_initialize()\n");
1019}
1020#undef KB
1021#undef MB
1022
1023void __init hash__early_init_devtree(void)
1024{
1025
1026 of_scan_flat_dt(htab_dt_scan_seg_sizes, NULL);
1027
1028
1029 htab_scan_page_sizes();
1030}
1031
1032static struct hash_mm_context init_hash_mm_context;
1033void __init hash__early_init_mmu(void)
1034{
1035#ifndef CONFIG_PPC_64K_PAGES
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047 BUILD_BUG_ON(H_PAGE_F_SECOND != (1ul << (H_PAGE_F_GIX_SHIFT + 3)));
1048#endif
1049
1050 htab_init_page_sizes();
1051
1052
1053
1054
1055 __pte_frag_nr = H_PTE_FRAG_NR;
1056 __pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT;
1057 __pmd_frag_nr = H_PMD_FRAG_NR;
1058 __pmd_frag_size_shift = H_PMD_FRAG_SIZE_SHIFT;
1059
1060 __pte_index_size = H_PTE_INDEX_SIZE;
1061 __pmd_index_size = H_PMD_INDEX_SIZE;
1062 __pud_index_size = H_PUD_INDEX_SIZE;
1063 __pgd_index_size = H_PGD_INDEX_SIZE;
1064 __pud_cache_index = H_PUD_CACHE_INDEX;
1065 __pte_table_size = H_PTE_TABLE_SIZE;
1066 __pmd_table_size = H_PMD_TABLE_SIZE;
1067 __pud_table_size = H_PUD_TABLE_SIZE;
1068 __pgd_table_size = H_PGD_TABLE_SIZE;
1069
1070
1071
1072
1073 __pmd_val_bits = HASH_PMD_VAL_BITS;
1074 __pud_val_bits = HASH_PUD_VAL_BITS;
1075 __pgd_val_bits = HASH_PGD_VAL_BITS;
1076
1077 __kernel_virt_start = H_KERN_VIRT_START;
1078 __vmalloc_start = H_VMALLOC_START;
1079 __vmalloc_end = H_VMALLOC_END;
1080 __kernel_io_start = H_KERN_IO_START;
1081 __kernel_io_end = H_KERN_IO_END;
1082 vmemmap = (struct page *)H_VMEMMAP_START;
1083 ioremap_bot = IOREMAP_BASE;
1084
1085#ifdef CONFIG_PCI
1086 pci_io_base = ISA_IO_BASE;
1087#endif
1088
1089
1090 if (firmware_has_feature(FW_FEATURE_PS3_LV1))
1091 ps3_early_mm_init();
1092 else if (firmware_has_feature(FW_FEATURE_LPAR))
1093 hpte_init_pseries();
1094 else if (IS_ENABLED(CONFIG_PPC_NATIVE))
1095 hpte_init_native();
1096
1097 if (!mmu_hash_ops.hpte_insert)
1098 panic("hash__early_init_mmu: No MMU hash ops defined!\n");
1099
1100
1101
1102
1103
1104
1105 htab_initialize();
1106
1107 init_mm.context.hash_context = &init_hash_mm_context;
1108 mm_ctx_set_slb_addr_limit(&init_mm.context, SLB_ADDR_LIMIT_DEFAULT);
1109
1110 pr_info("Initializing hash mmu with SLB\n");
1111
1112 slb_initialize();
1113
1114 if (cpu_has_feature(CPU_FTR_ARCH_206)
1115 && cpu_has_feature(CPU_FTR_HVMODE))
1116 tlbiel_all();
1117}
1118
1119#ifdef CONFIG_SMP
1120void hash__early_init_mmu_secondary(void)
1121{
1122
1123 if (!firmware_has_feature(FW_FEATURE_LPAR)) {
1124
1125 if (!cpu_has_feature(CPU_FTR_ARCH_300))
1126 mtspr(SPRN_SDR1, _SDR1);
1127 else
1128 set_ptcr_when_no_uv(__pa(partition_tb) |
1129 (PATB_SIZE_SHIFT - 12));
1130 }
1131
1132 slb_initialize();
1133
1134 if (cpu_has_feature(CPU_FTR_ARCH_206)
1135 && cpu_has_feature(CPU_FTR_HVMODE))
1136 tlbiel_all();
1137
1138#ifdef CONFIG_PPC_MEM_KEYS
1139 if (mmu_has_feature(MMU_FTR_PKEY))
1140 mtspr(SPRN_UAMOR, default_uamor);
1141#endif
1142}
1143#endif
1144
1145
1146
1147
1148unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
1149{
1150 struct page *page;
1151
1152 if (!pfn_valid(pte_pfn(pte)))
1153 return pp;
1154
1155 page = pte_page(pte);
1156
1157
1158 if (!test_bit(PG_dcache_clean, &page->flags) && !PageReserved(page)) {
1159 if (trap == INTERRUPT_INST_STORAGE) {
1160 flush_dcache_icache_page(page);
1161 set_bit(PG_dcache_clean, &page->flags);
1162 } else
1163 pp |= HPTE_R_N;
1164 }
1165 return pp;
1166}
1167
1168#ifdef CONFIG_PPC_MM_SLICES
1169static unsigned int get_paca_psize(unsigned long addr)
1170{
1171 unsigned char *psizes;
1172 unsigned long index, mask_index;
1173
1174 if (addr < SLICE_LOW_TOP) {
1175 psizes = get_paca()->mm_ctx_low_slices_psize;
1176 index = GET_LOW_SLICE_INDEX(addr);
1177 } else {
1178 psizes = get_paca()->mm_ctx_high_slices_psize;
1179 index = GET_HIGH_SLICE_INDEX(addr);
1180 }
1181 mask_index = index & 0x1;
1182 return (psizes[index >> 1] >> (mask_index * 4)) & 0xF;
1183}
1184
1185#else
1186unsigned int get_paca_psize(unsigned long addr)
1187{
1188 return get_paca()->mm_ctx_user_psize;
1189}
1190#endif
1191
1192
1193
1194
1195
1196#ifdef CONFIG_PPC_64K_PAGES
1197void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
1198{
1199 if (get_slice_psize(mm, addr) == MMU_PAGE_4K)
1200 return;
1201 slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K);
1202 copro_flush_all_slbs(mm);
1203 if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) {
1204
1205 copy_mm_to_paca(mm);
1206 slb_flush_and_restore_bolted();
1207 }
1208}
1209#endif
1210
1211#ifdef CONFIG_PPC_SUBPAGE_PROT
1212
1213
1214
1215
1216
1217
1218
1219static int subpage_protection(struct mm_struct *mm, unsigned long ea)
1220{
1221 struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
1222 u32 spp = 0;
1223 u32 **sbpm, *sbpp;
1224
1225 if (!spt)
1226 return 0;
1227
1228 if (ea >= spt->maxaddr)
1229 return 0;
1230 if (ea < 0x100000000UL) {
1231
1232 sbpm = spt->low_prot;
1233 } else {
1234 sbpm = spt->protptrs[ea >> SBP_L3_SHIFT];
1235 if (!sbpm)
1236 return 0;
1237 }
1238 sbpp = sbpm[(ea >> SBP_L2_SHIFT) & (SBP_L2_COUNT - 1)];
1239 if (!sbpp)
1240 return 0;
1241 spp = sbpp[(ea >> PAGE_SHIFT) & (SBP_L1_COUNT - 1)];
1242
1243
1244 spp >>= 30 - 2 * ((ea >> 12) & 0xf);
1245
1246
1247
1248
1249
1250
1251
1252 spp = ((spp & 2) ? _PAGE_RWX : 0) | ((spp & 1) ? _PAGE_WRITE : 0);
1253 return spp;
1254}
1255
1256#else
1257static inline int subpage_protection(struct mm_struct *mm, unsigned long ea)
1258{
1259 return 0;
1260}
1261#endif
1262
1263void hash_failure_debug(unsigned long ea, unsigned long access,
1264 unsigned long vsid, unsigned long trap,
1265 int ssize, int psize, int lpsize, unsigned long pte)
1266{
1267 if (!printk_ratelimit())
1268 return;
1269 pr_info("mm: Hashing failure ! EA=0x%lx access=0x%lx current=%s\n",
1270 ea, access, current->comm);
1271 pr_info(" trap=0x%lx vsid=0x%lx ssize=%d base psize=%d psize %d pte=0x%lx\n",
1272 trap, vsid, ssize, psize, lpsize, pte);
1273}
1274
1275static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
1276 int psize, bool user_region)
1277{
1278 if (user_region) {
1279 if (psize != get_paca_psize(ea)) {
1280 copy_mm_to_paca(mm);
1281 slb_flush_and_restore_bolted();
1282 }
1283 } else if (get_paca()->vmalloc_sllp !=
1284 mmu_psize_defs[mmu_vmalloc_psize].sllp) {
1285 get_paca()->vmalloc_sllp =
1286 mmu_psize_defs[mmu_vmalloc_psize].sllp;
1287 slb_vmalloc_update();
1288 }
1289}
1290
1291
1292
1293
1294
1295
1296
1297
1298int hash_page_mm(struct mm_struct *mm, unsigned long ea,
1299 unsigned long access, unsigned long trap,
1300 unsigned long flags)
1301{
1302 bool is_thp;
1303 pgd_t *pgdir;
1304 unsigned long vsid;
1305 pte_t *ptep;
1306 unsigned hugeshift;
1307 int rc, user_region = 0;
1308 int psize, ssize;
1309
1310 DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n",
1311 ea, access, trap);
1312 trace_hash_fault(ea, access, trap);
1313
1314
1315 switch (get_region_id(ea)) {
1316 case USER_REGION_ID:
1317 user_region = 1;
1318 if (! mm) {
1319 DBG_LOW(" user region with no mm !\n");
1320 rc = 1;
1321 goto bail;
1322 }
1323 psize = get_slice_psize(mm, ea);
1324 ssize = user_segment_size(ea);
1325 vsid = get_user_vsid(&mm->context, ea, ssize);
1326 break;
1327 case VMALLOC_REGION_ID:
1328 vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
1329 psize = mmu_vmalloc_psize;
1330 ssize = mmu_kernel_ssize;
1331 flags |= HPTE_USE_KERNEL_KEY;
1332 break;
1333
1334 case IO_REGION_ID:
1335 vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
1336 psize = mmu_io_psize;
1337 ssize = mmu_kernel_ssize;
1338 flags |= HPTE_USE_KERNEL_KEY;
1339 break;
1340 default:
1341
1342
1343
1344
1345 rc = 1;
1346 goto bail;
1347 }
1348 DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid);
1349
1350
1351 if (!vsid) {
1352 DBG_LOW("Bad address!\n");
1353 rc = 1;
1354 goto bail;
1355 }
1356
1357 pgdir = mm->pgd;
1358 if (pgdir == NULL) {
1359 rc = 1;
1360 goto bail;
1361 }
1362
1363
1364 if (user_region && mm_is_thread_local(mm))
1365 flags |= HPTE_LOCAL_UPDATE;
1366
1367#ifndef CONFIG_PPC_64K_PAGES
1368
1369
1370
1371
1372
1373
1374
1375
1376 if (psize != MMU_PAGE_4K)
1377 ea &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
1378#endif
1379
1380
1381 ptep = find_linux_pte(pgdir, ea, &is_thp, &hugeshift);
1382 if (ptep == NULL || !pte_present(*ptep)) {
1383 DBG_LOW(" no PTE !\n");
1384 rc = 1;
1385 goto bail;
1386 }
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396 access |= _PAGE_PRESENT | _PAGE_PTE;
1397
1398
1399
1400
1401
1402 if (!check_pte_access(access, pte_val(*ptep))) {
1403 DBG_LOW(" no access !\n");
1404 rc = 1;
1405 goto bail;
1406 }
1407
1408 if (hugeshift) {
1409 if (is_thp)
1410 rc = __hash_page_thp(ea, access, vsid, (pmd_t *)ptep,
1411 trap, flags, ssize, psize);
1412#ifdef CONFIG_HUGETLB_PAGE
1413 else
1414 rc = __hash_page_huge(ea, access, vsid, ptep, trap,
1415 flags, ssize, hugeshift, psize);
1416#else
1417 else {
1418
1419
1420
1421
1422 rc = 1;
1423 WARN_ON(1);
1424 }
1425#endif
1426 if (current->mm == mm)
1427 check_paca_psize(ea, mm, psize, user_region);
1428
1429 goto bail;
1430 }
1431
1432#ifndef CONFIG_PPC_64K_PAGES
1433 DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep));
1434#else
1435 DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep),
1436 pte_val(*(ptep + PTRS_PER_PTE)));
1437#endif
1438
1439#ifdef CONFIG_PPC_64K_PAGES
1440
1441 if ((pte_val(*ptep) & H_PAGE_4K_PFN) && psize == MMU_PAGE_64K) {
1442 demote_segment_4k(mm, ea);
1443 psize = MMU_PAGE_4K;
1444 }
1445
1446
1447
1448
1449
1450 if (mmu_ci_restrictions && psize == MMU_PAGE_64K && pte_ci(*ptep)) {
1451 if (user_region) {
1452 demote_segment_4k(mm, ea);
1453 psize = MMU_PAGE_4K;
1454 } else if (ea < VMALLOC_END) {
1455
1456
1457
1458
1459
1460 printk(KERN_ALERT "Reducing vmalloc segment "
1461 "to 4kB pages because of "
1462 "non-cacheable mapping\n");
1463 psize = mmu_vmalloc_psize = MMU_PAGE_4K;
1464 copro_flush_all_slbs(mm);
1465 }
1466 }
1467
1468#endif
1469
1470 if (current->mm == mm)
1471 check_paca_psize(ea, mm, psize, user_region);
1472
1473#ifdef CONFIG_PPC_64K_PAGES
1474 if (psize == MMU_PAGE_64K)
1475 rc = __hash_page_64K(ea, access, vsid, ptep, trap,
1476 flags, ssize);
1477 else
1478#endif
1479 {
1480 int spp = subpage_protection(mm, ea);
1481 if (access & spp)
1482 rc = -2;
1483 else
1484 rc = __hash_page_4K(ea, access, vsid, ptep, trap,
1485 flags, ssize, spp);
1486 }
1487
1488
1489
1490
1491
1492 if (rc == -1)
1493 hash_failure_debug(ea, access, vsid, trap, ssize, psize,
1494 psize, pte_val(*ptep));
1495#ifndef CONFIG_PPC_64K_PAGES
1496 DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep));
1497#else
1498 DBG_LOW(" o-pte: %016lx %016lx\n", pte_val(*ptep),
1499 pte_val(*(ptep + PTRS_PER_PTE)));
1500#endif
1501 DBG_LOW(" -> rc=%d\n", rc);
1502
1503bail:
1504 return rc;
1505}
1506EXPORT_SYMBOL_GPL(hash_page_mm);
1507
1508int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
1509 unsigned long dsisr)
1510{
1511 unsigned long flags = 0;
1512 struct mm_struct *mm = current->mm;
1513
1514 if ((get_region_id(ea) == VMALLOC_REGION_ID) ||
1515 (get_region_id(ea) == IO_REGION_ID))
1516 mm = &init_mm;
1517
1518 if (dsisr & DSISR_NOHPTE)
1519 flags |= HPTE_NOHPTE_UPDATE;
1520
1521 return hash_page_mm(mm, ea, access, trap, flags);
1522}
1523EXPORT_SYMBOL_GPL(hash_page);
1524
1525DECLARE_INTERRUPT_HANDLER(__do_hash_fault);
1526DEFINE_INTERRUPT_HANDLER(__do_hash_fault)
1527{
1528 unsigned long ea = regs->dar;
1529 unsigned long dsisr = regs->dsisr;
1530 unsigned long access = _PAGE_PRESENT | _PAGE_READ;
1531 unsigned long flags = 0;
1532 struct mm_struct *mm;
1533 unsigned int region_id;
1534 long err;
1535
1536 if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) {
1537 hash__do_page_fault(regs);
1538 return;
1539 }
1540
1541 region_id = get_region_id(ea);
1542 if ((region_id == VMALLOC_REGION_ID) || (region_id == IO_REGION_ID))
1543 mm = &init_mm;
1544 else
1545 mm = current->mm;
1546
1547 if (dsisr & DSISR_NOHPTE)
1548 flags |= HPTE_NOHPTE_UPDATE;
1549
1550 if (dsisr & DSISR_ISSTORE)
1551 access |= _PAGE_WRITE;
1552
1553
1554
1555
1556
1557
1558
1559
1560 access |= _PAGE_PRIVILEGED;
1561 if (user_mode(regs) || (region_id == USER_REGION_ID))
1562 access &= ~_PAGE_PRIVILEGED;
1563
1564 if (TRAP(regs) == INTERRUPT_INST_STORAGE)
1565 access |= _PAGE_EXEC;
1566
1567 err = hash_page_mm(mm, ea, access, TRAP(regs), flags);
1568 if (unlikely(err < 0)) {
1569
1570 if (user_mode(regs)) {
1571 if (IS_ENABLED(CONFIG_PPC_SUBPAGE_PROT) && err == -2)
1572 _exception(SIGSEGV, regs, SEGV_ACCERR, ea);
1573 else
1574 _exception(SIGBUS, regs, BUS_ADRERR, ea);
1575 } else {
1576 bad_page_fault(regs, SIGBUS);
1577 }
1578 err = 0;
1579
1580 } else if (err) {
1581 hash__do_page_fault(regs);
1582 }
1583}
1584
1585
1586
1587
1588
1589DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault)
1590{
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604 if (unlikely(in_nmi())) {
1605 do_bad_page_fault_segv(regs);
1606 return 0;
1607 }
1608
1609 __do_hash_fault(regs);
1610
1611 return 0;
1612}
1613
1614#ifdef CONFIG_PPC_MM_SLICES
1615static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
1616{
1617 int psize = get_slice_psize(mm, ea);
1618
1619
1620 if (unlikely(psize != mm_ctx_user_psize(&mm->context)))
1621 return false;
1622
1623
1624
1625
1626 if (unlikely((psize == MMU_PAGE_4K) && subpage_protection(mm, ea)))
1627 return false;
1628
1629 return true;
1630}
1631#else
1632static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
1633{
1634 return true;
1635}
1636#endif
1637
1638static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea,
1639 bool is_exec, unsigned long trap)
1640{
1641 unsigned long vsid;
1642 pgd_t *pgdir;
1643 int rc, ssize, update_flags = 0;
1644 unsigned long access = _PAGE_PRESENT | _PAGE_READ | (is_exec ? _PAGE_EXEC : 0);
1645 unsigned long flags;
1646
1647 BUG_ON(get_region_id(ea) != USER_REGION_ID);
1648
1649 if (!should_hash_preload(mm, ea))
1650 return;
1651
1652 DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx,"
1653 " trap=%lx\n", mm, mm->pgd, ea, access, trap);
1654
1655
1656 pgdir = mm->pgd;
1657 if (pgdir == NULL)
1658 return;
1659
1660
1661 ssize = user_segment_size(ea);
1662 vsid = get_user_vsid(&mm->context, ea, ssize);
1663 if (!vsid)
1664 return;
1665
1666#ifdef CONFIG_PPC_64K_PAGES
1667
1668
1669
1670
1671
1672
1673
1674
1675 if ((pte_val(*ptep) & H_PAGE_4K_PFN) || pte_ci(*ptep))
1676 return;
1677#endif
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699 local_irq_save(flags);
1700
1701
1702 if (mm_is_thread_local(mm))
1703 update_flags |= HPTE_LOCAL_UPDATE;
1704
1705
1706#ifdef CONFIG_PPC_64K_PAGES
1707 if (mm_ctx_user_psize(&mm->context) == MMU_PAGE_64K)
1708 rc = __hash_page_64K(ea, access, vsid, ptep, trap,
1709 update_flags, ssize);
1710 else
1711#endif
1712 rc = __hash_page_4K(ea, access, vsid, ptep, trap, update_flags,
1713 ssize, subpage_protection(mm, ea));
1714
1715
1716
1717
1718 if (rc == -1)
1719 hash_failure_debug(ea, access, vsid, trap, ssize,
1720 mm_ctx_user_psize(&mm->context),
1721 mm_ctx_user_psize(&mm->context),
1722 pte_val(*ptep));
1723
1724 local_irq_restore(flags);
1725}
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
1736 pte_t *ptep)
1737{
1738
1739
1740
1741
1742 unsigned long trap;
1743 bool is_exec;
1744
1745 if (radix_enabled())
1746 return;
1747
1748
1749 if (!pte_young(*ptep) || address >= TASK_SIZE)
1750 return;
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761 trap = current->thread.regs ? TRAP(current->thread.regs) : 0UL;
1762 switch (trap) {
1763 case 0x300:
1764 is_exec = false;
1765 break;
1766 case 0x400:
1767 is_exec = true;
1768 break;
1769 default:
1770 return;
1771 }
1772
1773 hash_preload(vma->vm_mm, ptep, address, is_exec, trap);
1774}
1775
1776#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1777static inline void tm_flush_hash_page(int local)
1778{
1779
1780
1781
1782
1783
1784
1785
1786
1787 if (local && cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
1788 MSR_TM_ACTIVE(current->thread.regs->msr)) {
1789 tm_enable();
1790 tm_abort(TM_CAUSE_TLBI);
1791 }
1792}
1793#else
1794static inline void tm_flush_hash_page(int local)
1795{
1796}
1797#endif
1798
1799
1800
1801
1802
1803unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift,
1804 int ssize, real_pte_t rpte, unsigned int subpg_index)
1805{
1806 unsigned long hash, gslot, hidx;
1807
1808 hash = hpt_hash(vpn, shift, ssize);
1809 hidx = __rpte_to_hidx(rpte, subpg_index);
1810 if (hidx & _PTEIDX_SECONDARY)
1811 hash = ~hash;
1812 gslot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
1813 gslot += hidx & _PTEIDX_GROUP_IX;
1814 return gslot;
1815}
1816
1817void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
1818 unsigned long flags)
1819{
1820 unsigned long index, shift, gslot;
1821 int local = flags & HPTE_LOCAL_UPDATE;
1822
1823 DBG_LOW("flush_hash_page(vpn=%016lx)\n", vpn);
1824 pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
1825 gslot = pte_get_hash_gslot(vpn, shift, ssize, pte, index);
1826 DBG_LOW(" sub %ld: gslot=%lx\n", index, gslot);
1827
1828
1829
1830
1831 mmu_hash_ops.hpte_invalidate(gslot, vpn, psize, psize,
1832 ssize, local);
1833 } pte_iterate_hashed_end();
1834
1835 tm_flush_hash_page(local);
1836}
1837
1838#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1839void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
1840 pmd_t *pmdp, unsigned int psize, int ssize,
1841 unsigned long flags)
1842{
1843 int i, max_hpte_count, valid;
1844 unsigned long s_addr;
1845 unsigned char *hpte_slot_array;
1846 unsigned long hidx, shift, vpn, hash, slot;
1847 int local = flags & HPTE_LOCAL_UPDATE;
1848
1849 s_addr = addr & HPAGE_PMD_MASK;
1850 hpte_slot_array = get_hpte_slot_array(pmdp);
1851
1852
1853
1854
1855
1856 if (!hpte_slot_array)
1857 return;
1858
1859 if (mmu_hash_ops.hugepage_invalidate) {
1860 mmu_hash_ops.hugepage_invalidate(vsid, s_addr, hpte_slot_array,
1861 psize, ssize, local);
1862 goto tm_abort;
1863 }
1864
1865
1866
1867 shift = mmu_psize_defs[psize].shift;
1868 max_hpte_count = HPAGE_PMD_SIZE >> shift;
1869 for (i = 0; i < max_hpte_count; i++) {
1870
1871
1872
1873
1874 valid = hpte_valid(hpte_slot_array, i);
1875 if (!valid)
1876 continue;
1877 hidx = hpte_hash_index(hpte_slot_array, i);
1878
1879
1880 addr = s_addr + (i * (1ul << shift));
1881 vpn = hpt_vpn(addr, vsid, ssize);
1882 hash = hpt_hash(vpn, shift, ssize);
1883 if (hidx & _PTEIDX_SECONDARY)
1884 hash = ~hash;
1885
1886 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
1887 slot += hidx & _PTEIDX_GROUP_IX;
1888 mmu_hash_ops.hpte_invalidate(slot, vpn, psize,
1889 MMU_PAGE_16M, ssize, local);
1890 }
1891tm_abort:
1892 tm_flush_hash_page(local);
1893}
1894#endif
1895
1896void flush_hash_range(unsigned long number, int local)
1897{
1898 if (mmu_hash_ops.flush_hash_range)
1899 mmu_hash_ops.flush_hash_range(number, local);
1900 else {
1901 int i;
1902 struct ppc64_tlb_batch *batch =
1903 this_cpu_ptr(&ppc64_tlb_batch);
1904
1905 for (i = 0; i < number; i++)
1906 flush_hash_page(batch->vpn[i], batch->pte[i],
1907 batch->psize, batch->ssize, local);
1908 }
1909}
1910
1911long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
1912 unsigned long pa, unsigned long rflags,
1913 unsigned long vflags, int psize, int ssize)
1914{
1915 unsigned long hpte_group;
1916 long slot;
1917
1918repeat:
1919 hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
1920
1921
1922 slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, vflags,
1923 psize, psize, ssize);
1924
1925
1926 if (unlikely(slot == -1)) {
1927 hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
1928 slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags,
1929 vflags | HPTE_V_SECONDARY,
1930 psize, psize, ssize);
1931 if (slot == -1) {
1932 if (mftb() & 0x1)
1933 hpte_group = (hash & htab_hash_mask) *
1934 HPTES_PER_GROUP;
1935
1936 mmu_hash_ops.hpte_remove(hpte_group);
1937 goto repeat;
1938 }
1939 }
1940
1941 return slot;
1942}
1943
1944#ifdef CONFIG_DEBUG_PAGEALLOC
1945static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
1946{
1947 unsigned long hash;
1948 unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize);
1949 unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
1950 unsigned long mode = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL), HPTE_USE_KERNEL_KEY);
1951 long ret;
1952
1953 hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
1954
1955
1956 if (!vsid)
1957 return;
1958
1959 ret = hpte_insert_repeating(hash, vpn, __pa(vaddr), mode,
1960 HPTE_V_BOLTED,
1961 mmu_linear_psize, mmu_kernel_ssize);
1962
1963 BUG_ON (ret < 0);
1964 spin_lock(&linear_map_hash_lock);
1965 BUG_ON(linear_map_hash_slots[lmi] & 0x80);
1966 linear_map_hash_slots[lmi] = ret | 0x80;
1967 spin_unlock(&linear_map_hash_lock);
1968}
1969
1970static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
1971{
1972 unsigned long hash, hidx, slot;
1973 unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize);
1974 unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
1975
1976 hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
1977 spin_lock(&linear_map_hash_lock);
1978 BUG_ON(!(linear_map_hash_slots[lmi] & 0x80));
1979 hidx = linear_map_hash_slots[lmi] & 0x7f;
1980 linear_map_hash_slots[lmi] = 0;
1981 spin_unlock(&linear_map_hash_lock);
1982 if (hidx & _PTEIDX_SECONDARY)
1983 hash = ~hash;
1984 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
1985 slot += hidx & _PTEIDX_GROUP_IX;
1986 mmu_hash_ops.hpte_invalidate(slot, vpn, mmu_linear_psize,
1987 mmu_linear_psize,
1988 mmu_kernel_ssize, 0);
1989}
1990
1991void __kernel_map_pages(struct page *page, int numpages, int enable)
1992{
1993 unsigned long flags, vaddr, lmi;
1994 int i;
1995
1996 local_irq_save(flags);
1997 for (i = 0; i < numpages; i++, page++) {
1998 vaddr = (unsigned long)page_address(page);
1999 lmi = __pa(vaddr) >> PAGE_SHIFT;
2000 if (lmi >= linear_map_hash_count)
2001 continue;
2002 if (enable)
2003 kernel_map_linear_page(vaddr, lmi);
2004 else
2005 kernel_unmap_linear_page(vaddr, lmi);
2006 }
2007 local_irq_restore(flags);
2008}
2009#endif
2010
2011void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
2012 phys_addr_t first_memblock_size)
2013{
2014
2015
2016
2017
2018 BUG_ON(first_memblock_base != 0);
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034 if (!early_cpu_has_feature(CPU_FTR_HVMODE)) {
2035 ppc64_rma_size = first_memblock_size;
2036 if (!early_cpu_has_feature(CPU_FTR_ARCH_300))
2037 ppc64_rma_size = min_t(u64, ppc64_rma_size, 0x40000000);
2038 else
2039 ppc64_rma_size = min_t(u64, ppc64_rma_size,
2040 1UL << SID_SHIFT_1T);
2041
2042
2043 memblock_set_current_limit(ppc64_rma_size);
2044 } else {
2045 ppc64_rma_size = ULONG_MAX;
2046 }
2047}
2048
2049#ifdef CONFIG_DEBUG_FS
2050
2051static int hpt_order_get(void *data, u64 *val)
2052{
2053 *val = ppc64_pft_size;
2054 return 0;
2055}
2056
2057static int hpt_order_set(void *data, u64 val)
2058{
2059 int ret;
2060
2061 if (!mmu_hash_ops.resize_hpt)
2062 return -ENODEV;
2063
2064 cpus_read_lock();
2065 ret = mmu_hash_ops.resize_hpt(val);
2066 cpus_read_unlock();
2067
2068 return ret;
2069}
2070
2071DEFINE_DEBUGFS_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n");
2072
2073static int __init hash64_debugfs(void)
2074{
2075 debugfs_create_file("hpt_order", 0600, arch_debugfs_dir, NULL,
2076 &fops_hpt_order);
2077 return 0;
2078}
2079machine_device_initcall(pseries, hash64_debugfs);
2080#endif
2081
2082void __init print_system_hash_info(void)
2083{
2084 pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size);
2085
2086 if (htab_hash_mask)
2087 pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask);
2088}
2089