1
2
3
4
5
6
7
8
9
10
11#include <linux/mm.h>
12#include <linux/io.h>
13#include <linux/slab.h>
14#include <linux/hugetlb.h>
15#include <linux/export.h>
16#include <linux/of_fdt.h>
17#include <linux/memblock.h>
18#include <linux/bootmem.h>
19#include <linux/moduleparam.h>
20#include <asm/pgtable.h>
21#include <asm/pgalloc.h>
22#include <asm/tlb.h>
23#include <asm/setup.h>
24#include <asm/hugetlb.h>
25
26#ifdef CONFIG_HUGETLB_PAGE
27
28#define PAGE_SHIFT_64K 16
29#define PAGE_SHIFT_16M 24
30#define PAGE_SHIFT_16G 34
31
32bool hugetlb_disabled = false;
33
34unsigned int HPAGE_SHIFT;
35
36
37
38
39
40
41
42
43#ifdef CONFIG_PPC_FSL_BOOK3E
44#define MAX_NUMBER_GPAGES 128
45struct psize_gpages {
46 u64 gpage_list[MAX_NUMBER_GPAGES];
47 unsigned int nr_gpages;
48};
49static struct psize_gpages gpage_freearray[MMU_PAGE_COUNT];
50#else
51#define MAX_NUMBER_GPAGES 1024
52static u64 gpage_freearray[MAX_NUMBER_GPAGES];
53static unsigned nr_gpages;
54#endif
55
56#define hugepd_none(hpd) ((hpd).pd == 0)
57
58#ifdef CONFIG_PPC_BOOK3S_64
59
60
61
62
63
64
65
66
67
68int pmd_huge(pmd_t pmd)
69{
70
71
72
73 return ((pmd_val(pmd) & 0x3) != 0x0);
74}
75
76int pud_huge(pud_t pud)
77{
78
79
80
81 return ((pud_val(pud) & 0x3) != 0x0);
82}
83
84int pgd_huge(pgd_t pgd)
85{
86
87
88
89 return ((pgd_val(pgd) & 0x3) != 0x0);
90}
91#else
92int pmd_huge(pmd_t pmd)
93{
94 return 0;
95}
96
97int pud_huge(pud_t pud)
98{
99 return 0;
100}
101
102int pgd_huge(pgd_t pgd)
103{
104 return 0;
105}
106#endif
107
108pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
109{
110
111 return __find_linux_pte_or_hugepte(mm->pgd, addr, NULL, NULL);
112}
113
114static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
115 unsigned long address, unsigned pdshift, unsigned pshift)
116{
117 struct kmem_cache *cachep;
118 pte_t *new;
119
120#ifdef CONFIG_PPC_FSL_BOOK3E
121 int i;
122 int num_hugepd = 1 << (pshift - pdshift);
123 cachep = hugepte_cache;
124#else
125 cachep = PGT_CACHE(pdshift - pshift);
126#endif
127
128 new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT);
129
130 BUG_ON(pshift > HUGEPD_SHIFT_MASK);
131 BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
132
133 if (! new)
134 return -ENOMEM;
135
136 spin_lock(&mm->page_table_lock);
137#ifdef CONFIG_PPC_FSL_BOOK3E
138
139
140
141
142
143
144 for (i = 0; i < num_hugepd; i++, hpdp++) {
145 if (unlikely(!hugepd_none(*hpdp)))
146 break;
147 else
148
149 hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
150 }
151
152 if (i < num_hugepd) {
153 for (i = i - 1 ; i >= 0; i--, hpdp--)
154 hpdp->pd = 0;
155 kmem_cache_free(cachep, new);
156 }
157#else
158 if (!hugepd_none(*hpdp))
159 kmem_cache_free(cachep, new);
160 else {
161#ifdef CONFIG_PPC_BOOK3S_64
162 hpdp->pd = (unsigned long)new |
163 (shift_to_mmu_psize(pshift) << 2);
164#else
165 hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
166#endif
167 }
168#endif
169 spin_unlock(&mm->page_table_lock);
170 return 0;
171}
172
173
174
175
176
177#ifdef CONFIG_PPC_FSL_BOOK3E
178#define HUGEPD_PGD_SHIFT PGDIR_SHIFT
179#define HUGEPD_PUD_SHIFT PUD_SHIFT
180#else
181#define HUGEPD_PGD_SHIFT PUD_SHIFT
182#define HUGEPD_PUD_SHIFT PMD_SHIFT
183#endif
184
185#ifdef CONFIG_PPC_BOOK3S_64
186
187
188
189
190pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
191{
192 pgd_t *pg;
193 pud_t *pu;
194 pmd_t *pm;
195 hugepd_t *hpdp = NULL;
196 unsigned pshift = __ffs(sz);
197 unsigned pdshift = PGDIR_SHIFT;
198
199 addr &= ~(sz-1);
200 pg = pgd_offset(mm, addr);
201
202 if (pshift == PGDIR_SHIFT)
203
204 return (pte_t *) pg;
205 else if (pshift > PUD_SHIFT)
206
207
208
209 hpdp = (hugepd_t *)pg;
210 else {
211 pdshift = PUD_SHIFT;
212 pu = pud_alloc(mm, pg, addr);
213 if (pshift == PUD_SHIFT)
214 return (pte_t *)pu;
215 else if (pshift > PMD_SHIFT)
216 hpdp = (hugepd_t *)pu;
217 else {
218 pdshift = PMD_SHIFT;
219 pm = pmd_alloc(mm, pu, addr);
220 if (pshift == PMD_SHIFT)
221
222 return (pte_t *)pm;
223 else
224 hpdp = (hugepd_t *)pm;
225 }
226 }
227 if (!hpdp)
228 return NULL;
229
230 BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
231
232 if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
233 return NULL;
234
235 return hugepte_offset(hpdp, addr, pdshift);
236}
237
238#else
239
240pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
241{
242 pgd_t *pg;
243 pud_t *pu;
244 pmd_t *pm;
245 hugepd_t *hpdp = NULL;
246 unsigned pshift = __ffs(sz);
247 unsigned pdshift = PGDIR_SHIFT;
248
249 addr &= ~(sz-1);
250
251 pg = pgd_offset(mm, addr);
252
253 if (pshift >= HUGEPD_PGD_SHIFT) {
254 hpdp = (hugepd_t *)pg;
255 } else {
256 pdshift = PUD_SHIFT;
257 pu = pud_alloc(mm, pg, addr);
258 if (pshift >= HUGEPD_PUD_SHIFT) {
259 hpdp = (hugepd_t *)pu;
260 } else {
261 pdshift = PMD_SHIFT;
262 pm = pmd_alloc(mm, pu, addr);
263 hpdp = (hugepd_t *)pm;
264 }
265 }
266
267 if (!hpdp)
268 return NULL;
269
270 BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
271
272 if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
273 return NULL;
274
275 return hugepte_offset(hpdp, addr, pdshift);
276}
277#endif
278
279#ifdef CONFIG_PPC_FSL_BOOK3E
280
281
282
283void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
284{
285 unsigned int idx = shift_to_mmu_psize(__ffs(page_size));
286 int i;
287
288 if (addr == 0)
289 return;
290
291 gpage_freearray[idx].nr_gpages = number_of_pages;
292
293 for (i = 0; i < number_of_pages; i++) {
294 gpage_freearray[idx].gpage_list[i] = addr;
295 addr += page_size;
296 }
297}
298
299
300
301
302
303int alloc_bootmem_huge_page(struct hstate *hstate)
304{
305 struct huge_bootmem_page *m;
306 int idx = shift_to_mmu_psize(huge_page_shift(hstate));
307 int nr_gpages = gpage_freearray[idx].nr_gpages;
308
309 if (nr_gpages == 0)
310 return 0;
311
312#ifdef CONFIG_HIGHMEM
313
314
315
316
317 m = alloc_bootmem(sizeof(struct huge_bootmem_page));
318 m->phys = gpage_freearray[idx].gpage_list[--nr_gpages];
319#else
320 m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]);
321#endif
322
323 list_add(&m->list, &huge_boot_pages);
324 gpage_freearray[idx].nr_gpages = nr_gpages;
325 gpage_freearray[idx].gpage_list[nr_gpages] = 0;
326 m->hstate = hstate;
327
328 return 1;
329}
330
331
332
333
334
335unsigned long gpage_npages[MMU_PAGE_COUNT];
336
337static int __init do_gpage_early_setup(char *param, char *val,
338 const char *unused)
339{
340 static phys_addr_t size;
341 unsigned long npages;
342
343
344
345
346
347
348
349
350 if ((strcmp(param, "default_hugepagesz") == 0) ||
351 (strcmp(param, "hugepagesz") == 0)) {
352 size = memparse(val, NULL);
353 } else if (strcmp(param, "hugepages") == 0) {
354 if (size != 0) {
355 if (sscanf(val, "%lu", &npages) <= 0)
356 npages = 0;
357 gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages;
358 size = 0;
359 }
360 }
361 return 0;
362}
363
364
365
366
367
368
369
370
371
372void __init reserve_hugetlb_gpages(void)
373{
374 static __initdata char cmdline[COMMAND_LINE_SIZE];
375 phys_addr_t size, base;
376 int i;
377
378 strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE);
379 parse_args("hugetlb gpages", cmdline, NULL, 0, 0, 0,
380 &do_gpage_early_setup);
381
382
383
384
385
386
387
388 for (i = MMU_PAGE_COUNT-1; i >= 0; i--) {
389 if (mmu_psize_defs[i].shift == 0 || gpage_npages[i] == 0)
390 continue;
391 else if (mmu_psize_to_shift(i) < (MAX_ORDER + PAGE_SHIFT))
392 break;
393
394 size = (phys_addr_t)(1ULL << mmu_psize_to_shift(i));
395 base = memblock_alloc_base(size * gpage_npages[i], size,
396 MEMBLOCK_ALLOC_ANYWHERE);
397 add_gpage(base, size, gpage_npages[i]);
398 }
399}
400
401#else
402
403
404
405
406void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
407{
408 if (!addr)
409 return;
410 while (number_of_pages > 0) {
411 gpage_freearray[nr_gpages] = addr;
412 nr_gpages++;
413 number_of_pages--;
414 addr += page_size;
415 }
416}
417
418
419
420
421int alloc_bootmem_huge_page(struct hstate *hstate)
422{
423 struct huge_bootmem_page *m;
424 if (nr_gpages == 0)
425 return 0;
426 m = phys_to_virt(gpage_freearray[--nr_gpages]);
427 gpage_freearray[nr_gpages] = 0;
428 list_add(&m->list, &huge_boot_pages);
429 m->hstate = hstate;
430 return 1;
431}
432#endif
433
434int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
435{
436 return 0;
437}
438
439#ifdef CONFIG_PPC_FSL_BOOK3E
440#define HUGEPD_FREELIST_SIZE \
441 ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t))
442
443struct hugepd_freelist {
444 struct rcu_head rcu;
445 unsigned int index;
446 void *ptes[0];
447};
448
449static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur);
450
451static void hugepd_free_rcu_callback(struct rcu_head *head)
452{
453 struct hugepd_freelist *batch =
454 container_of(head, struct hugepd_freelist, rcu);
455 unsigned int i;
456
457 for (i = 0; i < batch->index; i++)
458 kmem_cache_free(hugepte_cache, batch->ptes[i]);
459
460 free_page((unsigned long)batch);
461}
462
463static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
464{
465 struct hugepd_freelist **batchp;
466
467 batchp = &get_cpu_var(hugepd_freelist_cur);
468
469 if (atomic_read(&tlb->mm->mm_users) < 2 ||
470 cpumask_equal(mm_cpumask(tlb->mm),
471 cpumask_of(smp_processor_id()))) {
472 kmem_cache_free(hugepte_cache, hugepte);
473 put_cpu_var(hugepd_freelist_cur);
474 return;
475 }
476
477 if (*batchp == NULL) {
478 *batchp = (struct hugepd_freelist *)__get_free_page(GFP_ATOMIC);
479 (*batchp)->index = 0;
480 }
481
482 (*batchp)->ptes[(*batchp)->index++] = hugepte;
483 if ((*batchp)->index == HUGEPD_FREELIST_SIZE) {
484 call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback);
485 *batchp = NULL;
486 }
487 put_cpu_var(hugepd_freelist_cur);
488}
489#endif
490
491static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift,
492 unsigned long start, unsigned long end,
493 unsigned long floor, unsigned long ceiling)
494{
495 pte_t *hugepte = hugepd_page(*hpdp);
496 int i;
497
498 unsigned long pdmask = ~((1UL << pdshift) - 1);
499 unsigned int num_hugepd = 1;
500
501#ifdef CONFIG_PPC_FSL_BOOK3E
502
503 num_hugepd = (1 << (hugepd_shift(*hpdp) - pdshift));
504#else
505 unsigned int shift = hugepd_shift(*hpdp);
506#endif
507
508 start &= pdmask;
509 if (start < floor)
510 return;
511 if (ceiling) {
512 ceiling &= pdmask;
513 if (! ceiling)
514 return;
515 }
516 if (end - 1 > ceiling - 1)
517 return;
518
519 for (i = 0; i < num_hugepd; i++, hpdp++)
520 hpdp->pd = 0;
521
522#ifdef CONFIG_PPC_FSL_BOOK3E
523 hugepd_free(tlb, hugepte);
524#else
525 pgtable_free_tlb(tlb, hugepte, pdshift - shift);
526#endif
527}
528
529static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
530 unsigned long addr, unsigned long end,
531 unsigned long floor, unsigned long ceiling)
532{
533 pmd_t *pmd;
534 unsigned long next;
535 unsigned long start;
536
537 start = addr;
538 do {
539 pmd = pmd_offset(pud, addr);
540 next = pmd_addr_end(addr, end);
541 if (!is_hugepd(pmd)) {
542
543
544
545
546 WARN_ON(!pmd_none_or_clear_bad(pmd));
547 continue;
548 }
549#ifdef CONFIG_PPC_FSL_BOOK3E
550
551
552
553
554
555
556 next = addr + (1 << hugepd_shift(*(hugepd_t *)pmd));
557#endif
558 free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT,
559 addr, next, floor, ceiling);
560 } while (addr = next, addr != end);
561
562 start &= PUD_MASK;
563 if (start < floor)
564 return;
565 if (ceiling) {
566 ceiling &= PUD_MASK;
567 if (!ceiling)
568 return;
569 }
570 if (end - 1 > ceiling - 1)
571 return;
572
573 pmd = pmd_offset(pud, start);
574 pud_clear(pud);
575 pmd_free_tlb(tlb, pmd, start);
576}
577
578static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
579 unsigned long addr, unsigned long end,
580 unsigned long floor, unsigned long ceiling)
581{
582 pud_t *pud;
583 unsigned long next;
584 unsigned long start;
585
586 start = addr;
587 do {
588 pud = pud_offset(pgd, addr);
589 next = pud_addr_end(addr, end);
590 if (!is_hugepd(pud)) {
591 if (pud_none_or_clear_bad(pud))
592 continue;
593 hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
594 ceiling);
595 } else {
596#ifdef CONFIG_PPC_FSL_BOOK3E
597
598
599
600
601
602
603 next = addr + (1 << hugepd_shift(*(hugepd_t *)pud));
604#endif
605 free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT,
606 addr, next, floor, ceiling);
607 }
608 } while (addr = next, addr != end);
609
610 start &= PGDIR_MASK;
611 if (start < floor)
612 return;
613 if (ceiling) {
614 ceiling &= PGDIR_MASK;
615 if (!ceiling)
616 return;
617 }
618 if (end - 1 > ceiling - 1)
619 return;
620
621 pud = pud_offset(pgd, start);
622 pgd_clear(pgd);
623 pud_free_tlb(tlb, pud, start);
624}
625
626
627
628
629
630
631void hugetlb_free_pgd_range(struct mmu_gather *tlb,
632 unsigned long addr, unsigned long end,
633 unsigned long floor, unsigned long ceiling)
634{
635 pgd_t *pgd;
636 unsigned long next;
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655 do {
656 next = pgd_addr_end(addr, end);
657 pgd = pgd_offset(tlb->mm, addr);
658 if (!is_hugepd(pgd)) {
659 if (pgd_none_or_clear_bad(pgd))
660 continue;
661 hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
662 } else {
663#ifdef CONFIG_PPC_FSL_BOOK3E
664
665
666
667
668
669
670 next = addr + (1 << hugepd_shift(*(hugepd_t *)pgd));
671#endif
672 free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT,
673 addr, next, floor, ceiling);
674 }
675 } while (addr = next, addr != end);
676}
677
678
679
680
681
682struct page *
683follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
684{
685 bool is_thp;
686 pte_t *ptep, pte;
687 unsigned shift;
688 unsigned long mask, flags;
689 struct page *page = ERR_PTR(-EINVAL);
690
691 local_irq_save(flags);
692 ptep = find_linux_pte_or_hugepte(mm->pgd, address, &is_thp, &shift);
693 if (!ptep)
694 goto no_page;
695 pte = READ_ONCE(*ptep);
696
697
698
699
700
701 if (!shift || is_thp)
702 goto no_page;
703
704 if (!pte_present(pte)) {
705 page = NULL;
706 goto no_page;
707 }
708 mask = (1UL << shift) - 1;
709 page = pte_page(pte);
710 if (page)
711 page += (address & mask) / PAGE_SIZE;
712
713no_page:
714 local_irq_restore(flags);
715 return page;
716}
717
718struct page *
719follow_huge_pmd(struct mm_struct *mm, unsigned long address,
720 pmd_t *pmd, int write)
721{
722 BUG();
723 return NULL;
724}
725
726struct page *
727follow_huge_pud(struct mm_struct *mm, unsigned long address,
728 pud_t *pud, int write)
729{
730 BUG();
731 return NULL;
732}
733
734static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
735 unsigned long sz)
736{
737 unsigned long __boundary = (addr + sz) & ~(sz-1);
738 return (__boundary - 1 < end - 1) ? __boundary : end;
739}
740
741int gup_hugepd(hugepd_t *hugepd, unsigned pdshift,
742 unsigned long addr, unsigned long end,
743 int write, struct page **pages, int *nr)
744{
745 pte_t *ptep;
746 unsigned long sz = 1UL << hugepd_shift(*hugepd);
747 unsigned long next;
748
749 ptep = hugepte_offset(hugepd, addr, pdshift);
750 do {
751 next = hugepte_addr_end(addr, end, sz);
752 if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr))
753 return 0;
754 } while (ptep++, addr = next, addr != end);
755
756 return 1;
757}
758
759#ifdef CONFIG_PPC_MM_SLICES
760unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
761 unsigned long len, unsigned long pgoff,
762 unsigned long flags)
763{
764 struct hstate *hstate = hstate_file(file);
765 int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
766
767 return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
768}
769#endif
770
771unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
772{
773#ifdef CONFIG_PPC_MM_SLICES
774 unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
775
776 return 1UL << mmu_psize_to_shift(psize);
777#else
778 if (!is_vm_hugetlb_page(vma))
779 return PAGE_SIZE;
780
781 return huge_page_size(hstate_vma(vma));
782#endif
783}
784
785static inline bool is_power_of_4(unsigned long x)
786{
787 if (is_power_of_2(x))
788 return (__ilog2(x) % 2) ? false : true;
789 return false;
790}
791
792static int __init add_huge_page_size(unsigned long long size)
793{
794 int shift = __ffs(size);
795 int mmu_psize;
796
797
798
799#ifdef CONFIG_PPC_FSL_BOOK3E
800 if ((size < PAGE_SIZE) || !is_power_of_4(size))
801 return -EINVAL;
802#else
803 if (!is_power_of_2(size)
804 || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT))
805 return -EINVAL;
806#endif
807
808 if ((mmu_psize = shift_to_mmu_psize(shift)) < 0)
809 return -EINVAL;
810
811#ifdef CONFIG_PPC_BOOK3S_64
812
813
814
815
816
817
818
819 if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G)
820 return -EINVAL;
821#endif
822
823#ifdef CONFIG_SPU_FS_64K_LS
824
825
826
827 if (shift == PAGE_SHIFT_64K)
828 return -EINVAL;
829#endif
830
831 BUG_ON(mmu_psize_defs[mmu_psize].shift != shift);
832
833
834 if (size_to_hstate(size))
835 return 0;
836
837 hugetlb_add_hstate(shift - PAGE_SHIFT);
838
839 return 0;
840}
841
842static int __init hugepage_setup_sz(char *str)
843{
844 unsigned long long size;
845
846 size = memparse(str, &str);
847
848 if (add_huge_page_size(size) != 0) {
849 hugetlb_bad_size();
850 pr_err("Invalid huge page size specified(%llu)\n", size);
851 }
852
853 return 1;
854}
855__setup("hugepagesz=", hugepage_setup_sz);
856
857#ifdef CONFIG_PPC_FSL_BOOK3E
858struct kmem_cache *hugepte_cache;
859static int __init hugetlbpage_init(void)
860{
861 int psize;
862
863 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
864 unsigned shift;
865
866 if (!mmu_psize_defs[psize].shift)
867 continue;
868
869 shift = mmu_psize_to_shift(psize);
870
871
872 if (shift != PAGE_SHIFT)
873 if (add_huge_page_size(1ULL << shift) < 0)
874 continue;
875 }
876
877
878
879
880
881 hugepte_cache = kmem_cache_create("hugepte-cache", sizeof(pte_t),
882 HUGEPD_SHIFT_MASK + 1, 0, NULL);
883 if (hugepte_cache == NULL)
884 panic("%s: Unable to create kmem cache for hugeptes\n",
885 __func__);
886
887
888 if (mmu_psize_defs[MMU_PAGE_4M].shift)
889 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift;
890 else
891 panic("%s: Unable to set default huge page size\n", __func__);
892
893
894 return 0;
895}
896#else
897static int __init hugetlbpage_init(void)
898{
899 int psize;
900
901 if (hugetlb_disabled) {
902 pr_info("HugeTLB support is disabled!\n");
903 return 0;
904 }
905
906 if (!mmu_has_feature(MMU_FTR_16M_PAGE))
907 return -ENODEV;
908
909 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
910 unsigned shift;
911 unsigned pdshift;
912
913 if (!mmu_psize_defs[psize].shift)
914 continue;
915
916 shift = mmu_psize_to_shift(psize);
917
918 if (add_huge_page_size(1ULL << shift) < 0)
919 continue;
920
921 if (shift < PMD_SHIFT)
922 pdshift = PMD_SHIFT;
923 else if (shift < PUD_SHIFT)
924 pdshift = PUD_SHIFT;
925 else
926 pdshift = PGDIR_SHIFT;
927
928
929
930
931 if (pdshift != shift) {
932 pgtable_cache_add(pdshift - shift, NULL);
933 if (!PGT_CACHE(pdshift - shift))
934 panic("hugetlbpage_init(): could not create "
935 "pgtable cache for %d bit pagesize\n", shift);
936 }
937 }
938
939
940
941
942 if (mmu_psize_defs[MMU_PAGE_16M].shift)
943 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift;
944 else if (mmu_psize_defs[MMU_PAGE_1M].shift)
945 HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift;
946
947 return 0;
948}
949#endif
950module_init(hugetlbpage_init);
951
952void flush_dcache_icache_hugepage(struct page *page)
953{
954 int i;
955 void *start;
956
957 BUG_ON(!PageCompound(page));
958
959 for (i = 0; i < (1UL << compound_order(page)); i++) {
960 if (!PageHighMem(page)) {
961 __flush_dcache_icache(page_address(page+i));
962 } else {
963 start = kmap_atomic(page+i);
964 __flush_dcache_icache(start);
965 kunmap_atomic(start);
966 }
967 }
968}
969
970#endif
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
986 bool *is_thp, unsigned *shift)
987{
988 pgd_t pgd, *pgdp;
989 pud_t pud, *pudp;
990 pmd_t pmd, *pmdp;
991 pte_t *ret_pte;
992 hugepd_t *hpdp = NULL;
993 unsigned pdshift = PGDIR_SHIFT;
994
995 if (shift)
996 *shift = 0;
997
998 if (is_thp)
999 *is_thp = false;
1000
1001 pgdp = pgdir + pgd_index(ea);
1002 pgd = READ_ONCE(*pgdp);
1003
1004
1005
1006
1007
1008
1009 if (pgd_none(pgd))
1010 return NULL;
1011 else if (pgd_huge(pgd)) {
1012 ret_pte = (pte_t *) pgdp;
1013 goto out;
1014 } else if (is_hugepd(&pgd))
1015 hpdp = (hugepd_t *)&pgd;
1016 else {
1017
1018
1019
1020
1021
1022 pdshift = PUD_SHIFT;
1023 pudp = pud_offset(&pgd, ea);
1024 pud = ACCESS_ONCE(*pudp);
1025
1026 if (pud_none(pud))
1027 return NULL;
1028 else if (pud_huge(pud)) {
1029 ret_pte = (pte_t *) pudp;
1030 goto out;
1031 } else if (is_hugepd(&pud))
1032 hpdp = (hugepd_t *)&pud;
1033 else {
1034 pdshift = PMD_SHIFT;
1035 pmdp = pmd_offset(&pud, ea);
1036 pmd = ACCESS_ONCE(*pmdp);
1037
1038
1039
1040
1041
1042
1043
1044
1045 if (pmd_none(pmd))
1046 return NULL;
1047
1048 if (pmd_trans_huge(pmd)) {
1049 if (is_thp)
1050 *is_thp = true;
1051 ret_pte = (pte_t *) pmdp;
1052 goto out;
1053 }
1054
1055 if (pmd_huge(pmd)) {
1056 ret_pte = (pte_t *) pmdp;
1057 goto out;
1058 } else if (is_hugepd(&pmd))
1059 hpdp = (hugepd_t *)&pmd;
1060 else
1061 return pte_offset_kernel(&pmd, ea);
1062 }
1063 }
1064 if (!hpdp)
1065 return NULL;
1066
1067 ret_pte = hugepte_offset(hpdp, ea, pdshift);
1068 pdshift = hugepd_shift(*hpdp);
1069out:
1070 if (shift)
1071 *shift = pdshift;
1072 return ret_pte;
1073}
1074EXPORT_SYMBOL_GPL(__find_linux_pte_or_hugepte);
1075
1076int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
1077 unsigned long end, int write, struct page **pages, int *nr)
1078{
1079 unsigned long mask;
1080 unsigned long pte_end;
1081 struct page *head, *page, *tail;
1082 pte_t pte;
1083 int refs;
1084
1085 pte_end = (addr + sz) & ~(sz-1);
1086 if (pte_end < end)
1087 end = pte_end;
1088
1089 pte = READ_ONCE(*ptep);
1090 mask = _PAGE_PRESENT | _PAGE_USER;
1091 if (write)
1092 mask |= _PAGE_RW;
1093
1094 if ((pte_val(pte) & mask) != mask)
1095 return 0;
1096
1097#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1098
1099
1100
1101 if (pmd_trans_splitting(pte_pmd(pte)))
1102 return 0;
1103#endif
1104
1105
1106 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
1107
1108 refs = 0;
1109 head = pte_page(pte);
1110
1111 page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
1112 tail = page;
1113 do {
1114 VM_BUG_ON(compound_head(page) != head);
1115 pages[*nr] = page;
1116 (*nr)++;
1117 page++;
1118 refs++;
1119 } while (addr += PAGE_SIZE, addr != end);
1120
1121 if (!page_cache_add_speculative(head, refs)) {
1122 *nr -= refs;
1123 return 0;
1124 }
1125
1126 if (unlikely(pte_val(pte) != pte_val(*ptep))) {
1127
1128 *nr -= refs;
1129 while (refs--)
1130 put_page(head);
1131 return 0;
1132 }
1133
1134
1135
1136
1137
1138 while (refs--) {
1139 if (PageTail(tail))
1140 get_huge_page_tail(tail);
1141 tail++;
1142 }
1143
1144 return 1;
1145}
1146