1
2
3
4
5
6
7
8#define pr_fmt(fmt) "radix-mmu: " fmt
9
10#include <linux/io.h>
11#include <linux/kernel.h>
12#include <linux/sched/mm.h>
13#include <linux/memblock.h>
14#include <linux/of_fdt.h>
15#include <linux/mm.h>
16#include <linux/hugetlb.h>
17#include <linux/string_helpers.h>
18#include <linux/stop_machine.h>
19
20#include <asm/pgalloc.h>
21#include <asm/mmu_context.h>
22#include <asm/dma.h>
23#include <asm/machdep.h>
24#include <asm/mmu.h>
25#include <asm/firmware.h>
26#include <asm/powernv.h>
27#include <asm/sections.h>
28#include <asm/smp.h>
29#include <asm/trace.h>
30#include <asm/uaccess.h>
31#include <asm/ultravisor.h>
32
33#include <trace/events/thp.h>
34
35unsigned int mmu_pid_bits;
36unsigned int mmu_base_pid;
37
38static __ref void *early_alloc_pgtable(unsigned long size, int nid,
39 unsigned long region_start, unsigned long region_end)
40{
41 phys_addr_t min_addr = MEMBLOCK_LOW_LIMIT;
42 phys_addr_t max_addr = MEMBLOCK_ALLOC_ANYWHERE;
43 void *ptr;
44
45 if (region_start)
46 min_addr = region_start;
47 if (region_end)
48 max_addr = region_end;
49
50 ptr = memblock_alloc_try_nid(size, size, min_addr, max_addr, nid);
51
52 if (!ptr)
53 panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%pa max_addr=%pa\n",
54 __func__, size, size, nid, &min_addr, &max_addr);
55
56 return ptr;
57}
58
59static int early_map_kernel_page(unsigned long ea, unsigned long pa,
60 pgprot_t flags,
61 unsigned int map_page_size,
62 int nid,
63 unsigned long region_start, unsigned long region_end)
64{
65 unsigned long pfn = pa >> PAGE_SHIFT;
66 pgd_t *pgdp;
67 p4d_t *p4dp;
68 pud_t *pudp;
69 pmd_t *pmdp;
70 pte_t *ptep;
71
72 pgdp = pgd_offset_k(ea);
73 p4dp = p4d_offset(pgdp, ea);
74 if (p4d_none(*p4dp)) {
75 pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid,
76 region_start, region_end);
77 p4d_populate(&init_mm, p4dp, pudp);
78 }
79 pudp = pud_offset(p4dp, ea);
80 if (map_page_size == PUD_SIZE) {
81 ptep = (pte_t *)pudp;
82 goto set_the_pte;
83 }
84 if (pud_none(*pudp)) {
85 pmdp = early_alloc_pgtable(PMD_TABLE_SIZE, nid,
86 region_start, region_end);
87 pud_populate(&init_mm, pudp, pmdp);
88 }
89 pmdp = pmd_offset(pudp, ea);
90 if (map_page_size == PMD_SIZE) {
91 ptep = pmdp_ptep(pmdp);
92 goto set_the_pte;
93 }
94 if (!pmd_present(*pmdp)) {
95 ptep = early_alloc_pgtable(PAGE_SIZE, nid,
96 region_start, region_end);
97 pmd_populate_kernel(&init_mm, pmdp, ptep);
98 }
99 ptep = pte_offset_kernel(pmdp, ea);
100
101set_the_pte:
102 set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
103 smp_wmb();
104 return 0;
105}
106
107
108
109
110
111static int __map_kernel_page(unsigned long ea, unsigned long pa,
112 pgprot_t flags,
113 unsigned int map_page_size,
114 int nid,
115 unsigned long region_start, unsigned long region_end)
116{
117 unsigned long pfn = pa >> PAGE_SHIFT;
118 pgd_t *pgdp;
119 p4d_t *p4dp;
120 pud_t *pudp;
121 pmd_t *pmdp;
122 pte_t *ptep;
123
124
125
126 BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE);
127
128#ifdef CONFIG_PPC_64K_PAGES
129 BUILD_BUG_ON(RADIX_KERN_MAP_SIZE != (1UL << MAX_EA_BITS_PER_CONTEXT));
130#endif
131
132 if (unlikely(!slab_is_available()))
133 return early_map_kernel_page(ea, pa, flags, map_page_size,
134 nid, region_start, region_end);
135
136
137
138
139
140
141 pgdp = pgd_offset_k(ea);
142 p4dp = p4d_offset(pgdp, ea);
143 pudp = pud_alloc(&init_mm, p4dp, ea);
144 if (!pudp)
145 return -ENOMEM;
146 if (map_page_size == PUD_SIZE) {
147 ptep = (pte_t *)pudp;
148 goto set_the_pte;
149 }
150 pmdp = pmd_alloc(&init_mm, pudp, ea);
151 if (!pmdp)
152 return -ENOMEM;
153 if (map_page_size == PMD_SIZE) {
154 ptep = pmdp_ptep(pmdp);
155 goto set_the_pte;
156 }
157 ptep = pte_alloc_kernel(pmdp, ea);
158 if (!ptep)
159 return -ENOMEM;
160
161set_the_pte:
162 set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
163 smp_wmb();
164 return 0;
165}
166
167int radix__map_kernel_page(unsigned long ea, unsigned long pa,
168 pgprot_t flags,
169 unsigned int map_page_size)
170{
171 return __map_kernel_page(ea, pa, flags, map_page_size, -1, 0, 0);
172}
173
174#ifdef CONFIG_STRICT_KERNEL_RWX
175void radix__change_memory_range(unsigned long start, unsigned long end,
176 unsigned long clear)
177{
178 unsigned long idx;
179 pgd_t *pgdp;
180 p4d_t *p4dp;
181 pud_t *pudp;
182 pmd_t *pmdp;
183 pte_t *ptep;
184
185 start = ALIGN_DOWN(start, PAGE_SIZE);
186 end = PAGE_ALIGN(end);
187
188 pr_debug("Changing flags on range %lx-%lx removing 0x%lx\n",
189 start, end, clear);
190
191 for (idx = start; idx < end; idx += PAGE_SIZE) {
192 pgdp = pgd_offset_k(idx);
193 p4dp = p4d_offset(pgdp, idx);
194 pudp = pud_alloc(&init_mm, p4dp, idx);
195 if (!pudp)
196 continue;
197 if (pud_is_leaf(*pudp)) {
198 ptep = (pte_t *)pudp;
199 goto update_the_pte;
200 }
201 pmdp = pmd_alloc(&init_mm, pudp, idx);
202 if (!pmdp)
203 continue;
204 if (pmd_is_leaf(*pmdp)) {
205 ptep = pmdp_ptep(pmdp);
206 goto update_the_pte;
207 }
208 ptep = pte_alloc_kernel(pmdp, idx);
209 if (!ptep)
210 continue;
211update_the_pte:
212 radix__pte_update(&init_mm, idx, ptep, clear, 0, 0);
213 }
214
215 radix__flush_tlb_kernel_range(start, end);
216}
217
218void radix__mark_rodata_ro(void)
219{
220 unsigned long start, end;
221
222 start = (unsigned long)_stext;
223 end = (unsigned long)__init_begin;
224
225 radix__change_memory_range(start, end, _PAGE_WRITE);
226}
227
228void radix__mark_initmem_nx(void)
229{
230 unsigned long start = (unsigned long)__init_begin;
231 unsigned long end = (unsigned long)__init_end;
232
233 radix__change_memory_range(start, end, _PAGE_EXEC);
234}
235#endif
236
237static inline void __meminit
238print_mapping(unsigned long start, unsigned long end, unsigned long size, bool exec)
239{
240 char buf[10];
241
242 if (end <= start)
243 return;
244
245 string_get_size(size, 1, STRING_UNITS_2, buf, sizeof(buf));
246
247 pr_info("Mapped 0x%016lx-0x%016lx with %s pages%s\n", start, end, buf,
248 exec ? " (exec)" : "");
249}
250
251static unsigned long next_boundary(unsigned long addr, unsigned long end)
252{
253#ifdef CONFIG_STRICT_KERNEL_RWX
254 if (addr < __pa_symbol(__init_begin))
255 return __pa_symbol(__init_begin);
256#endif
257 return end;
258}
259
260static int __meminit create_physical_mapping(unsigned long start,
261 unsigned long end,
262 int nid, pgprot_t _prot)
263{
264 unsigned long vaddr, addr, mapping_size = 0;
265 bool prev_exec, exec = false;
266 pgprot_t prot;
267 int psize;
268
269 start = ALIGN(start, PAGE_SIZE);
270 for (addr = start; addr < end; addr += mapping_size) {
271 unsigned long gap, previous_size;
272 int rc;
273
274 gap = next_boundary(addr, end) - addr;
275 previous_size = mapping_size;
276 prev_exec = exec;
277
278 if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE &&
279 mmu_psize_defs[MMU_PAGE_1G].shift) {
280 mapping_size = PUD_SIZE;
281 psize = MMU_PAGE_1G;
282 } else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE &&
283 mmu_psize_defs[MMU_PAGE_2M].shift) {
284 mapping_size = PMD_SIZE;
285 psize = MMU_PAGE_2M;
286 } else {
287 mapping_size = PAGE_SIZE;
288 psize = mmu_virtual_psize;
289 }
290
291 vaddr = (unsigned long)__va(addr);
292
293 if (overlaps_kernel_text(vaddr, vaddr + mapping_size) ||
294 overlaps_interrupt_vector_text(vaddr, vaddr + mapping_size)) {
295 prot = PAGE_KERNEL_X;
296 exec = true;
297 } else {
298 prot = _prot;
299 exec = false;
300 }
301
302 if (mapping_size != previous_size || exec != prev_exec) {
303 print_mapping(start, addr, previous_size, prev_exec);
304 start = addr;
305 }
306
307 rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end);
308 if (rc)
309 return rc;
310
311 update_page_count(psize, 1);
312 }
313
314 print_mapping(start, addr, mapping_size, exec);
315 return 0;
316}
317
318static void __init radix_init_pgtable(void)
319{
320 unsigned long rts_field;
321 struct memblock_region *reg;
322
323
324 mmu_slb_size = 0;
325
326
327
328 for_each_memblock(memory, reg) {
329
330
331
332
333
334
335 if ((reg->base + reg->size) >= RADIX_VMALLOC_START) {
336 pr_warn("Outside the supported range\n");
337 continue;
338 }
339
340 WARN_ON(create_physical_mapping(reg->base,
341 reg->base + reg->size,
342 -1, PAGE_KERNEL));
343 }
344
345
346 if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
347 if (!mmu_pid_bits)
348 mmu_pid_bits = 20;
349 mmu_base_pid = 1;
350 } else if (cpu_has_feature(CPU_FTR_HVMODE)) {
351 if (!mmu_pid_bits)
352 mmu_pid_bits = 20;
353#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
354
355
356
357
358
359
360 mmu_base_pid = 1 << (mmu_pid_bits - 1);
361#else
362 mmu_base_pid = 1;
363#endif
364 } else {
365
366 if (!mmu_pid_bits)
367 mmu_pid_bits = 19;
368 mmu_base_pid = 1;
369 }
370
371
372
373
374
375 BUG_ON(PRTB_SIZE_SHIFT > 36);
376 process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT, -1, 0, 0);
377
378
379
380 rts_field = radix__get_tree_size();
381 process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE);
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396 init_mm.context.id = mmu_base_pid;
397 mmu_base_pid++;
398}
399
400static void __init radix_init_partition_table(void)
401{
402 unsigned long rts_field, dw0, dw1;
403
404 mmu_partition_table_init();
405 rts_field = radix__get_tree_size();
406 dw0 = rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR;
407 dw1 = __pa(process_tb) | (PRTB_SIZE_SHIFT - 12) | PATB_GR;
408 mmu_partition_table_set_entry(0, dw0, dw1, false);
409
410 pr_info("Initializing Radix MMU\n");
411}
412
413static int __init get_idx_from_shift(unsigned int shift)
414{
415 int idx = -1;
416
417 switch (shift) {
418 case 0xc:
419 idx = MMU_PAGE_4K;
420 break;
421 case 0x10:
422 idx = MMU_PAGE_64K;
423 break;
424 case 0x15:
425 idx = MMU_PAGE_2M;
426 break;
427 case 0x1e:
428 idx = MMU_PAGE_1G;
429 break;
430 }
431 return idx;
432}
433
434static int __init radix_dt_scan_page_sizes(unsigned long node,
435 const char *uname, int depth,
436 void *data)
437{
438 int size = 0;
439 int shift, idx;
440 unsigned int ap;
441 const __be32 *prop;
442 const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
443
444
445 if (type == NULL || strcmp(type, "cpu") != 0)
446 return 0;
447
448
449 prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size);
450 if (prop && size == 4)
451 mmu_pid_bits = be32_to_cpup(prop);
452
453
454 prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size);
455 if (!prop)
456 return 0;
457
458 pr_info("Page sizes from device-tree:\n");
459 for (; size >= 4; size -= 4, ++prop) {
460
461 struct mmu_psize_def *def;
462
463
464 shift = be32_to_cpu(prop[0]) & ~(0xe << 28);
465 ap = be32_to_cpu(prop[0]) >> 29;
466 pr_info("Page size shift = %d AP=0x%x\n", shift, ap);
467
468 idx = get_idx_from_shift(shift);
469 if (idx < 0)
470 continue;
471
472 def = &mmu_psize_defs[idx];
473 def->shift = shift;
474 def->ap = ap;
475 }
476
477
478 cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B;
479 return 1;
480}
481
482void __init radix__early_init_devtree(void)
483{
484 int rc;
485
486
487
488
489 rc = of_scan_flat_dt(radix_dt_scan_page_sizes, NULL);
490 if (rc != 0)
491 goto found;
492
493
494
495 mmu_psize_defs[MMU_PAGE_4K].shift = 12;
496 mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
497
498 mmu_psize_defs[MMU_PAGE_64K].shift = 16;
499 mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
500found:
501 return;
502}
503
504static void radix_init_amor(void)
505{
506
507
508
509
510
511
512
513 mtspr(SPRN_AMOR, (3ul << 62));
514}
515
516#ifdef CONFIG_PPC_KUEP
517void setup_kuep(bool disabled)
518{
519 if (disabled || !early_radix_enabled())
520 return;
521
522 if (smp_processor_id() == boot_cpuid)
523 pr_info("Activating Kernel Userspace Execution Prevention\n");
524
525
526
527
528
529
530 mtspr(SPRN_IAMR, (1ul << 62));
531}
532#endif
533
534#ifdef CONFIG_PPC_KUAP
535void setup_kuap(bool disabled)
536{
537 if (disabled || !early_radix_enabled())
538 return;
539
540 if (smp_processor_id() == boot_cpuid) {
541 pr_info("Activating Kernel Userspace Access Prevention\n");
542 cur_cpu_spec->mmu_features |= MMU_FTR_RADIX_KUAP;
543 }
544
545
546 mtspr(SPRN_UAMOR, 0);
547 mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
548 isync();
549}
550#endif
551
552void __init radix__early_init_mmu(void)
553{
554 unsigned long lpcr;
555
556#ifdef CONFIG_PPC_64K_PAGES
557
558 mmu_virtual_psize = MMU_PAGE_64K;
559#else
560 mmu_virtual_psize = MMU_PAGE_4K;
561#endif
562
563#ifdef CONFIG_SPARSEMEM_VMEMMAP
564
565 if (mmu_psize_defs[MMU_PAGE_2M].shift) {
566
567
568
569 mmu_vmemmap_psize = MMU_PAGE_2M;
570 } else
571 mmu_vmemmap_psize = mmu_virtual_psize;
572#endif
573
574
575
576 __pte_index_size = RADIX_PTE_INDEX_SIZE;
577 __pmd_index_size = RADIX_PMD_INDEX_SIZE;
578 __pud_index_size = RADIX_PUD_INDEX_SIZE;
579 __pgd_index_size = RADIX_PGD_INDEX_SIZE;
580 __pud_cache_index = RADIX_PUD_INDEX_SIZE;
581 __pte_table_size = RADIX_PTE_TABLE_SIZE;
582 __pmd_table_size = RADIX_PMD_TABLE_SIZE;
583 __pud_table_size = RADIX_PUD_TABLE_SIZE;
584 __pgd_table_size = RADIX_PGD_TABLE_SIZE;
585
586 __pmd_val_bits = RADIX_PMD_VAL_BITS;
587 __pud_val_bits = RADIX_PUD_VAL_BITS;
588 __pgd_val_bits = RADIX_PGD_VAL_BITS;
589
590 __kernel_virt_start = RADIX_KERN_VIRT_START;
591 __vmalloc_start = RADIX_VMALLOC_START;
592 __vmalloc_end = RADIX_VMALLOC_END;
593 __kernel_io_start = RADIX_KERN_IO_START;
594 __kernel_io_end = RADIX_KERN_IO_END;
595 vmemmap = (struct page *)RADIX_VMEMMAP_START;
596 ioremap_bot = IOREMAP_BASE;
597
598#ifdef CONFIG_PCI
599 pci_io_base = ISA_IO_BASE;
600#endif
601 __pte_frag_nr = RADIX_PTE_FRAG_NR;
602 __pte_frag_size_shift = RADIX_PTE_FRAG_SIZE_SHIFT;
603 __pmd_frag_nr = RADIX_PMD_FRAG_NR;
604 __pmd_frag_size_shift = RADIX_PMD_FRAG_SIZE_SHIFT;
605
606 radix_init_pgtable();
607
608 if (!firmware_has_feature(FW_FEATURE_LPAR)) {
609 lpcr = mfspr(SPRN_LPCR);
610 mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
611 radix_init_partition_table();
612 radix_init_amor();
613 } else {
614 radix_init_pseries();
615 }
616
617 memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
618
619
620 radix__switch_mmu_context(NULL, &init_mm);
621 tlbiel_all();
622}
623
624void radix__early_init_mmu_secondary(void)
625{
626 unsigned long lpcr;
627
628
629
630 if (!firmware_has_feature(FW_FEATURE_LPAR)) {
631 lpcr = mfspr(SPRN_LPCR);
632 mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
633
634 set_ptcr_when_no_uv(__pa(partition_tb) |
635 (PATB_SIZE_SHIFT - 12));
636
637 radix_init_amor();
638 }
639
640 radix__switch_mmu_context(NULL, &init_mm);
641 tlbiel_all();
642}
643
644void radix__mmu_cleanup_all(void)
645{
646 unsigned long lpcr;
647
648 if (!firmware_has_feature(FW_FEATURE_LPAR)) {
649 lpcr = mfspr(SPRN_LPCR);
650 mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT);
651 set_ptcr_when_no_uv(0);
652 powernv_set_nmmu_ptcr(0);
653 radix__flush_tlb_all();
654 }
655}
656
657void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base,
658 phys_addr_t first_memblock_size)
659{
660
661
662
663
664 BUG_ON(first_memblock_base != 0);
665
666
667
668
669 ppc64_rma_size = ULONG_MAX;
670}
671
672#ifdef CONFIG_MEMORY_HOTPLUG
673static void free_pte_table(pte_t *pte_start, pmd_t *pmd)
674{
675 pte_t *pte;
676 int i;
677
678 for (i = 0; i < PTRS_PER_PTE; i++) {
679 pte = pte_start + i;
680 if (!pte_none(*pte))
681 return;
682 }
683
684 pte_free_kernel(&init_mm, pte_start);
685 pmd_clear(pmd);
686}
687
688static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
689{
690 pmd_t *pmd;
691 int i;
692
693 for (i = 0; i < PTRS_PER_PMD; i++) {
694 pmd = pmd_start + i;
695 if (!pmd_none(*pmd))
696 return;
697 }
698
699 pmd_free(&init_mm, pmd_start);
700 pud_clear(pud);
701}
702
703struct change_mapping_params {
704 pte_t *pte;
705 unsigned long start;
706 unsigned long end;
707 unsigned long aligned_start;
708 unsigned long aligned_end;
709};
710
711static int __meminit stop_machine_change_mapping(void *data)
712{
713 struct change_mapping_params *params =
714 (struct change_mapping_params *)data;
715
716 if (!data)
717 return -1;
718
719 spin_unlock(&init_mm.page_table_lock);
720 pte_clear(&init_mm, params->aligned_start, params->pte);
721 create_physical_mapping(__pa(params->aligned_start),
722 __pa(params->start), -1, PAGE_KERNEL);
723 create_physical_mapping(__pa(params->end), __pa(params->aligned_end),
724 -1, PAGE_KERNEL);
725 spin_lock(&init_mm.page_table_lock);
726 return 0;
727}
728
729static void remove_pte_table(pte_t *pte_start, unsigned long addr,
730 unsigned long end)
731{
732 unsigned long next;
733 pte_t *pte;
734
735 pte = pte_start + pte_index(addr);
736 for (; addr < end; addr = next, pte++) {
737 next = (addr + PAGE_SIZE) & PAGE_MASK;
738 if (next > end)
739 next = end;
740
741 if (!pte_present(*pte))
742 continue;
743
744 if (!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(next)) {
745
746
747
748
749 WARN_ONCE(1, "%s: unaligned range\n", __func__);
750 continue;
751 }
752
753 pte_clear(&init_mm, addr, pte);
754 }
755}
756
757
758
759
760static void __meminit split_kernel_mapping(unsigned long addr, unsigned long end,
761 unsigned long size, pte_t *pte)
762{
763 unsigned long mask = ~(size - 1);
764 unsigned long aligned_start = addr & mask;
765 unsigned long aligned_end = addr + size;
766 struct change_mapping_params params;
767 bool split_region = false;
768
769 if ((end - addr) < size) {
770
771
772
773
774
775
776
777 if (overlaps_kernel_text(aligned_start, addr) ||
778 overlaps_kernel_text(end, aligned_end)) {
779
780
781
782 WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel "
783 "text, not splitting\n", addr, end);
784 return;
785 }
786 split_region = true;
787 }
788
789 if (split_region) {
790 params.pte = pte;
791 params.start = addr;
792 params.end = end;
793 params.aligned_start = addr & ~(size - 1);
794 params.aligned_end = min_t(unsigned long, aligned_end,
795 (unsigned long)__va(memblock_end_of_DRAM()));
796 stop_machine(stop_machine_change_mapping, ¶ms, NULL);
797 return;
798 }
799
800 pte_clear(&init_mm, addr, pte);
801}
802
803static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
804 unsigned long end)
805{
806 unsigned long next;
807 pte_t *pte_base;
808 pmd_t *pmd;
809
810 pmd = pmd_start + pmd_index(addr);
811 for (; addr < end; addr = next, pmd++) {
812 next = pmd_addr_end(addr, end);
813
814 if (!pmd_present(*pmd))
815 continue;
816
817 if (pmd_is_leaf(*pmd)) {
818 split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd);
819 continue;
820 }
821
822 pte_base = (pte_t *)pmd_page_vaddr(*pmd);
823 remove_pte_table(pte_base, addr, next);
824 free_pte_table(pte_base, pmd);
825 }
826}
827
828static void remove_pud_table(pud_t *pud_start, unsigned long addr,
829 unsigned long end)
830{
831 unsigned long next;
832 pmd_t *pmd_base;
833 pud_t *pud;
834
835 pud = pud_start + pud_index(addr);
836 for (; addr < end; addr = next, pud++) {
837 next = pud_addr_end(addr, end);
838
839 if (!pud_present(*pud))
840 continue;
841
842 if (pud_is_leaf(*pud)) {
843 split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud);
844 continue;
845 }
846
847 pmd_base = (pmd_t *)pud_page_vaddr(*pud);
848 remove_pmd_table(pmd_base, addr, next);
849 free_pmd_table(pmd_base, pud);
850 }
851}
852
853static void __meminit remove_pagetable(unsigned long start, unsigned long end)
854{
855 unsigned long addr, next;
856 pud_t *pud_base;
857 pgd_t *pgd;
858 p4d_t *p4d;
859
860 spin_lock(&init_mm.page_table_lock);
861
862 for (addr = start; addr < end; addr = next) {
863 next = pgd_addr_end(addr, end);
864
865 pgd = pgd_offset_k(addr);
866 p4d = p4d_offset(pgd, addr);
867 if (!p4d_present(*p4d))
868 continue;
869
870 if (p4d_is_leaf(*p4d)) {
871 split_kernel_mapping(addr, end, P4D_SIZE, (pte_t *)p4d);
872 continue;
873 }
874
875 pud_base = (pud_t *)p4d_page_vaddr(*p4d);
876 remove_pud_table(pud_base, addr, next);
877 }
878
879 spin_unlock(&init_mm.page_table_lock);
880 radix__flush_tlb_kernel_range(start, end);
881}
882
883int __meminit radix__create_section_mapping(unsigned long start,
884 unsigned long end, int nid,
885 pgprot_t prot)
886{
887 if (end >= RADIX_VMALLOC_START) {
888 pr_warn("Outside the supported range\n");
889 return -1;
890 }
891
892 return create_physical_mapping(__pa(start), __pa(end), nid, prot);
893}
894
895int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
896{
897 remove_pagetable(start, end);
898 return 0;
899}
900#endif
901
902#ifdef CONFIG_SPARSEMEM_VMEMMAP
903static int __map_kernel_page_nid(unsigned long ea, unsigned long pa,
904 pgprot_t flags, unsigned int map_page_size,
905 int nid)
906{
907 return __map_kernel_page(ea, pa, flags, map_page_size, nid, 0, 0);
908}
909
910int __meminit radix__vmemmap_create_mapping(unsigned long start,
911 unsigned long page_size,
912 unsigned long phys)
913{
914
915 unsigned long flags = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_KERNEL_RW;
916 int nid = early_pfn_to_nid(phys >> PAGE_SHIFT);
917 int ret;
918
919 if ((start + page_size) >= RADIX_VMEMMAP_END) {
920 pr_warn("Outside the supported range\n");
921 return -1;
922 }
923
924 ret = __map_kernel_page_nid(start, phys, __pgprot(flags), page_size, nid);
925 BUG_ON(ret);
926
927 return 0;
928}
929
930#ifdef CONFIG_MEMORY_HOTPLUG
931void __meminit radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
932{
933 remove_pagetable(start, start + page_size);
934}
935#endif
936#endif
937
938#ifdef CONFIG_TRANSPARENT_HUGEPAGE
939
940unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
941 pmd_t *pmdp, unsigned long clr,
942 unsigned long set)
943{
944 unsigned long old;
945
946#ifdef CONFIG_DEBUG_VM
947 WARN_ON(!radix__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
948 assert_spin_locked(pmd_lockptr(mm, pmdp));
949#endif
950
951 old = radix__pte_update(mm, addr, (pte_t *)pmdp, clr, set, 1);
952 trace_hugepage_update(addr, old, clr, set);
953
954 return old;
955}
956
957pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
958 pmd_t *pmdp)
959
960{
961 pmd_t pmd;
962
963 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
964 VM_BUG_ON(radix__pmd_trans_huge(*pmdp));
965 VM_BUG_ON(pmd_devmap(*pmdp));
966
967
968
969 pmd = *pmdp;
970 pmd_clear(pmdp);
971
972
973
974
975
976
977
978
979 serialize_against_pte_lookup(vma->vm_mm);
980
981 radix__flush_tlb_collapsed_pmd(vma->vm_mm, address);
982
983 return pmd;
984}
985
986
987
988
989
990
991
992void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
993 pgtable_t pgtable)
994{
995 struct list_head *lh = (struct list_head *) pgtable;
996
997 assert_spin_locked(pmd_lockptr(mm, pmdp));
998
999
1000 if (!pmd_huge_pte(mm, pmdp))
1001 INIT_LIST_HEAD(lh);
1002 else
1003 list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
1004 pmd_huge_pte(mm, pmdp) = pgtable;
1005}
1006
1007pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
1008{
1009 pte_t *ptep;
1010 pgtable_t pgtable;
1011 struct list_head *lh;
1012
1013 assert_spin_locked(pmd_lockptr(mm, pmdp));
1014
1015
1016 pgtable = pmd_huge_pte(mm, pmdp);
1017 lh = (struct list_head *) pgtable;
1018 if (list_empty(lh))
1019 pmd_huge_pte(mm, pmdp) = NULL;
1020 else {
1021 pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
1022 list_del(lh);
1023 }
1024 ptep = (pte_t *) pgtable;
1025 *ptep = __pte(0);
1026 ptep++;
1027 *ptep = __pte(0);
1028 return pgtable;
1029}
1030
1031pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
1032 unsigned long addr, pmd_t *pmdp)
1033{
1034 pmd_t old_pmd;
1035 unsigned long old;
1036
1037 old = radix__pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
1038 old_pmd = __pmd(old);
1039 return old_pmd;
1040}
1041
1042#endif
1043
1044void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
1045 pte_t entry, unsigned long address, int psize)
1046{
1047 struct mm_struct *mm = vma->vm_mm;
1048 unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED |
1049 _PAGE_RW | _PAGE_EXEC);
1050
1051 unsigned long change = pte_val(entry) ^ pte_val(*ptep);
1052
1053
1054
1055
1056 if ((change & _PAGE_RW) && atomic_read(&mm->context.copros) > 0) {
1057 unsigned long old_pte, new_pte;
1058
1059 old_pte = __radix_pte_update(ptep, _PAGE_PRESENT, _PAGE_INVALID);
1060
1061
1062
1063 new_pte = old_pte | set;
1064 radix__flush_tlb_page_psize(mm, address, psize);
1065 __radix_pte_update(ptep, _PAGE_INVALID, new_pte);
1066 } else {
1067 __radix_pte_update(ptep, 0, set);
1068
1069
1070
1071
1072
1073
1074 }
1075
1076}
1077
1078void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
1079 unsigned long addr, pte_t *ptep,
1080 pte_t old_pte, pte_t pte)
1081{
1082 struct mm_struct *mm = vma->vm_mm;
1083
1084
1085
1086
1087
1088
1089 if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) &&
1090 (atomic_read(&mm->context.copros) > 0))
1091 radix__flush_tlb_page(vma, addr);
1092
1093 set_pte_at(mm, addr, ptep, pte);
1094}
1095
1096int __init arch_ioremap_pud_supported(void)
1097{
1098
1099 return radix_enabled();
1100}
1101
1102int __init arch_ioremap_pmd_supported(void)
1103{
1104 return radix_enabled();
1105}
1106
1107int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
1108{
1109 return 0;
1110}
1111
1112int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
1113{
1114 pte_t *ptep = (pte_t *)pud;
1115 pte_t new_pud = pfn_pte(__phys_to_pfn(addr), prot);
1116
1117 if (!radix_enabled())
1118 return 0;
1119
1120 set_pte_at(&init_mm, 0 , ptep, new_pud);
1121
1122 return 1;
1123}
1124
1125int pud_clear_huge(pud_t *pud)
1126{
1127 if (pud_huge(*pud)) {
1128 pud_clear(pud);
1129 return 1;
1130 }
1131
1132 return 0;
1133}
1134
1135int pud_free_pmd_page(pud_t *pud, unsigned long addr)
1136{
1137 pmd_t *pmd;
1138 int i;
1139
1140 pmd = (pmd_t *)pud_page_vaddr(*pud);
1141 pud_clear(pud);
1142
1143 flush_tlb_kernel_range(addr, addr + PUD_SIZE);
1144
1145 for (i = 0; i < PTRS_PER_PMD; i++) {
1146 if (!pmd_none(pmd[i])) {
1147 pte_t *pte;
1148 pte = (pte_t *)pmd_page_vaddr(pmd[i]);
1149
1150 pte_free_kernel(&init_mm, pte);
1151 }
1152 }
1153
1154 pmd_free(&init_mm, pmd);
1155
1156 return 1;
1157}
1158
1159int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
1160{
1161 pte_t *ptep = (pte_t *)pmd;
1162 pte_t new_pmd = pfn_pte(__phys_to_pfn(addr), prot);
1163
1164 if (!radix_enabled())
1165 return 0;
1166
1167 set_pte_at(&init_mm, 0 , ptep, new_pmd);
1168
1169 return 1;
1170}
1171
1172int pmd_clear_huge(pmd_t *pmd)
1173{
1174 if (pmd_huge(*pmd)) {
1175 pmd_clear(pmd);
1176 return 1;
1177 }
1178
1179 return 0;
1180}
1181
1182int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
1183{
1184 pte_t *pte;
1185
1186 pte = (pte_t *)pmd_page_vaddr(*pmd);
1187 pmd_clear(pmd);
1188
1189 flush_tlb_kernel_range(addr, addr + PMD_SIZE);
1190
1191 pte_free_kernel(&init_mm, pte);
1192
1193 return 1;
1194}
1195
1196int __init arch_ioremap_p4d_supported(void)
1197{
1198 return 0;
1199}
1200