1
2
3
4
5
6
7
8
9
10#include <linux/init.h>
11#include <linux/fs.h>
12#include <linux/mm.h>
13#include <linux/hugetlb.h>
14#include <linux/pagemap.h>
15#include <linux/slab.h>
16#include <linux/err.h>
17#include <linux/sysctl.h>
18#include <asm/mman.h>
19#include <asm/pgalloc.h>
20#include <asm/tlb.h>
21#include <asm/tlbflush.h>
22#include <asm/mmu_context.h>
23#include <asm/machdep.h>
24#include <asm/cputable.h>
25#include <asm/spu.h>
26
27#define PAGE_SHIFT_64K 16
28#define PAGE_SHIFT_16M 24
29#define PAGE_SHIFT_16G 34
30
31#define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT)
32#define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT)
33#define MAX_NUMBER_GPAGES 1024
34
35
36
37static unsigned long gpage_freearray[MAX_NUMBER_GPAGES];
38static unsigned nr_gpages;
39
40
41
42
43unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { };
44
45#define hugepte_shift mmu_huge_psizes
46#define PTRS_PER_HUGEPTE(psize) (1 << hugepte_shift[psize])
47#define HUGEPTE_TABLE_SIZE(psize) (sizeof(pte_t) << hugepte_shift[psize])
48
49#define HUGEPD_SHIFT(psize) (mmu_psize_to_shift(psize) \
50 + hugepte_shift[psize])
51#define HUGEPD_SIZE(psize) (1UL << HUGEPD_SHIFT(psize))
52#define HUGEPD_MASK(psize) (~(HUGEPD_SIZE(psize)-1))
53
54
55
56#define HUGE_PGTABLE_INDEX(psize) (HUGEPTE_CACHE_NUM + psize - 1)
57#define HUGEPTE_CACHE_NAME(psize) (huge_pgtable_cache_name[psize])
58
59static const char *huge_pgtable_cache_name[MMU_PAGE_COUNT] = {
60 [MMU_PAGE_64K] = "hugepte_cache_64K",
61 [MMU_PAGE_1M] = "hugepte_cache_1M",
62 [MMU_PAGE_16M] = "hugepte_cache_16M",
63 [MMU_PAGE_16G] = "hugepte_cache_16G",
64};
65
66
67
68
69#define HUGEPD_OK 0x1
70
71typedef struct { unsigned long pd; } hugepd_t;
72
73#define hugepd_none(hpd) ((hpd).pd == 0)
74
75static inline int shift_to_mmu_psize(unsigned int shift)
76{
77 switch (shift) {
78#ifndef CONFIG_PPC_64K_PAGES
79 case PAGE_SHIFT_64K:
80 return MMU_PAGE_64K;
81#endif
82 case PAGE_SHIFT_16M:
83 return MMU_PAGE_16M;
84 case PAGE_SHIFT_16G:
85 return MMU_PAGE_16G;
86 }
87 return -1;
88}
89
90static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
91{
92 if (mmu_psize_defs[mmu_psize].shift)
93 return mmu_psize_defs[mmu_psize].shift;
94 BUG();
95}
96
97static inline pte_t *hugepd_page(hugepd_t hpd)
98{
99 BUG_ON(!(hpd.pd & HUGEPD_OK));
100 return (pte_t *)(hpd.pd & ~HUGEPD_OK);
101}
102
103static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
104 struct hstate *hstate)
105{
106 unsigned int shift = huge_page_shift(hstate);
107 int psize = shift_to_mmu_psize(shift);
108 unsigned long idx = ((addr >> shift) & (PTRS_PER_HUGEPTE(psize)-1));
109 pte_t *dir = hugepd_page(*hpdp);
110
111 return dir + idx;
112}
113
114static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
115 unsigned long address, unsigned int psize)
116{
117 pte_t *new = kmem_cache_zalloc(pgtable_cache[HUGE_PGTABLE_INDEX(psize)],
118 GFP_KERNEL|__GFP_REPEAT);
119
120 if (! new)
121 return -ENOMEM;
122
123 spin_lock(&mm->page_table_lock);
124 if (!hugepd_none(*hpdp))
125 kmem_cache_free(pgtable_cache[HUGE_PGTABLE_INDEX(psize)], new);
126 else
127 hpdp->pd = (unsigned long)new | HUGEPD_OK;
128 spin_unlock(&mm->page_table_lock);
129 return 0;
130}
131
132
133static pud_t *hpud_offset(pgd_t *pgd, unsigned long addr, struct hstate *hstate)
134{
135 if (huge_page_shift(hstate) < PUD_SHIFT)
136 return pud_offset(pgd, addr);
137 else
138 return (pud_t *) pgd;
139}
140static pud_t *hpud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long addr,
141 struct hstate *hstate)
142{
143 if (huge_page_shift(hstate) < PUD_SHIFT)
144 return pud_alloc(mm, pgd, addr);
145 else
146 return (pud_t *) pgd;
147}
148static pmd_t *hpmd_offset(pud_t *pud, unsigned long addr, struct hstate *hstate)
149{
150 if (huge_page_shift(hstate) < PMD_SHIFT)
151 return pmd_offset(pud, addr);
152 else
153 return (pmd_t *) pud;
154}
155static pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr,
156 struct hstate *hstate)
157{
158 if (huge_page_shift(hstate) < PMD_SHIFT)
159 return pmd_alloc(mm, pud, addr);
160 else
161 return (pmd_t *) pud;
162}
163
164
165
166
167void add_gpage(unsigned long addr, unsigned long page_size,
168 unsigned long number_of_pages)
169{
170 if (!addr)
171 return;
172 while (number_of_pages > 0) {
173 gpage_freearray[nr_gpages] = addr;
174 nr_gpages++;
175 number_of_pages--;
176 addr += page_size;
177 }
178}
179
180
181
182
183int alloc_bootmem_huge_page(struct hstate *hstate)
184{
185 struct huge_bootmem_page *m;
186 if (nr_gpages == 0)
187 return 0;
188 m = phys_to_virt(gpage_freearray[--nr_gpages]);
189 gpage_freearray[nr_gpages] = 0;
190 list_add(&m->list, &huge_boot_pages);
191 m->hstate = hstate;
192 return 1;
193}
194
195
196
197pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
198{
199 pgd_t *pg;
200 pud_t *pu;
201 pmd_t *pm;
202
203 unsigned int psize;
204 unsigned int shift;
205 unsigned long sz;
206 struct hstate *hstate;
207 psize = get_slice_psize(mm, addr);
208 shift = mmu_psize_to_shift(psize);
209 sz = ((1UL) << shift);
210 hstate = size_to_hstate(sz);
211
212 addr &= hstate->mask;
213
214 pg = pgd_offset(mm, addr);
215 if (!pgd_none(*pg)) {
216 pu = hpud_offset(pg, addr, hstate);
217 if (!pud_none(*pu)) {
218 pm = hpmd_offset(pu, addr, hstate);
219 if (!pmd_none(*pm))
220 return hugepte_offset((hugepd_t *)pm, addr,
221 hstate);
222 }
223 }
224
225 return NULL;
226}
227
228pte_t *huge_pte_alloc(struct mm_struct *mm,
229 unsigned long addr, unsigned long sz)
230{
231 pgd_t *pg;
232 pud_t *pu;
233 pmd_t *pm;
234 hugepd_t *hpdp = NULL;
235 struct hstate *hstate;
236 unsigned int psize;
237 hstate = size_to_hstate(sz);
238
239 psize = get_slice_psize(mm, addr);
240 BUG_ON(!mmu_huge_psizes[psize]);
241
242 addr &= hstate->mask;
243
244 pg = pgd_offset(mm, addr);
245 pu = hpud_alloc(mm, pg, addr, hstate);
246
247 if (pu) {
248 pm = hpmd_alloc(mm, pu, addr, hstate);
249 if (pm)
250 hpdp = (hugepd_t *)pm;
251 }
252
253 if (! hpdp)
254 return NULL;
255
256 if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, psize))
257 return NULL;
258
259 return hugepte_offset(hpdp, addr, hstate);
260}
261
262int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
263{
264 return 0;
265}
266
267static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp,
268 unsigned int psize)
269{
270 pte_t *hugepte = hugepd_page(*hpdp);
271
272 hpdp->pd = 0;
273 tlb->need_flush = 1;
274 pgtable_free_tlb(tlb, pgtable_free_cache(hugepte,
275 HUGEPTE_CACHE_NUM+psize-1,
276 PGF_CACHENUM_MASK));
277}
278
279static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
280 unsigned long addr, unsigned long end,
281 unsigned long floor, unsigned long ceiling,
282 unsigned int psize)
283{
284 pmd_t *pmd;
285 unsigned long next;
286 unsigned long start;
287
288 start = addr;
289 pmd = pmd_offset(pud, addr);
290 do {
291 next = pmd_addr_end(addr, end);
292 if (pmd_none(*pmd))
293 continue;
294 free_hugepte_range(tlb, (hugepd_t *)pmd, psize);
295 } while (pmd++, addr = next, addr != end);
296
297 start &= PUD_MASK;
298 if (start < floor)
299 return;
300 if (ceiling) {
301 ceiling &= PUD_MASK;
302 if (!ceiling)
303 return;
304 }
305 if (end - 1 > ceiling - 1)
306 return;
307
308 pmd = pmd_offset(pud, start);
309 pud_clear(pud);
310 pmd_free_tlb(tlb, pmd, start);
311}
312
313static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
314 unsigned long addr, unsigned long end,
315 unsigned long floor, unsigned long ceiling)
316{
317 pud_t *pud;
318 unsigned long next;
319 unsigned long start;
320 unsigned int shift;
321 unsigned int psize = get_slice_psize(tlb->mm, addr);
322 shift = mmu_psize_to_shift(psize);
323
324 start = addr;
325 pud = pud_offset(pgd, addr);
326 do {
327 next = pud_addr_end(addr, end);
328 if (shift < PMD_SHIFT) {
329 if (pud_none_or_clear_bad(pud))
330 continue;
331 hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
332 ceiling, psize);
333 } else {
334 if (pud_none(*pud))
335 continue;
336 free_hugepte_range(tlb, (hugepd_t *)pud, psize);
337 }
338 } while (pud++, addr = next, addr != end);
339
340 start &= PGDIR_MASK;
341 if (start < floor)
342 return;
343 if (ceiling) {
344 ceiling &= PGDIR_MASK;
345 if (!ceiling)
346 return;
347 }
348 if (end - 1 > ceiling - 1)
349 return;
350
351 pud = pud_offset(pgd, start);
352 pgd_clear(pgd);
353 pud_free_tlb(tlb, pud, start);
354}
355
356
357
358
359
360
361void hugetlb_free_pgd_range(struct mmu_gather *tlb,
362 unsigned long addr, unsigned long end,
363 unsigned long floor, unsigned long ceiling)
364{
365 pgd_t *pgd;
366 unsigned long next;
367 unsigned long start;
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403 unsigned int psize = get_slice_psize(tlb->mm, addr);
404
405 addr &= HUGEPD_MASK(psize);
406 if (addr < floor) {
407 addr += HUGEPD_SIZE(psize);
408 if (!addr)
409 return;
410 }
411 if (ceiling) {
412 ceiling &= HUGEPD_MASK(psize);
413 if (!ceiling)
414 return;
415 }
416 if (end - 1 > ceiling - 1)
417 end -= HUGEPD_SIZE(psize);
418 if (addr > end - 1)
419 return;
420
421 start = addr;
422 pgd = pgd_offset(tlb->mm, addr);
423 do {
424 psize = get_slice_psize(tlb->mm, addr);
425 BUG_ON(!mmu_huge_psizes[psize]);
426 next = pgd_addr_end(addr, end);
427 if (mmu_psize_to_shift(psize) < PUD_SHIFT) {
428 if (pgd_none_or_clear_bad(pgd))
429 continue;
430 hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
431 } else {
432 if (pgd_none(*pgd))
433 continue;
434 free_hugepte_range(tlb, (hugepd_t *)pgd, psize);
435 }
436 } while (pgd++, addr = next, addr != end);
437}
438
439void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
440 pte_t *ptep, pte_t pte)
441{
442 if (pte_present(*ptep)) {
443
444
445
446
447
448 unsigned int psize = get_slice_psize(mm, addr);
449 unsigned int shift = mmu_psize_to_shift(psize);
450 unsigned long sz = ((1UL) << shift);
451 struct hstate *hstate = size_to_hstate(sz);
452 pte_update(mm, addr & hstate->mask, ptep, ~0UL, 1);
453 }
454 *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
455}
456
457pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
458 pte_t *ptep)
459{
460 unsigned long old = pte_update(mm, addr, ptep, ~0UL, 1);
461 return __pte(old);
462}
463
464struct page *
465follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
466{
467 pte_t *ptep;
468 struct page *page;
469 unsigned int mmu_psize = get_slice_psize(mm, address);
470
471
472 if (!mmu_huge_psizes[mmu_psize])
473 return ERR_PTR(-EINVAL);
474
475 ptep = huge_pte_offset(mm, address);
476 page = pte_page(*ptep);
477 if (page) {
478 unsigned int shift = mmu_psize_to_shift(mmu_psize);
479 unsigned long sz = ((1UL) << shift);
480 page += (address % sz) / PAGE_SIZE;
481 }
482
483 return page;
484}
485
486int pmd_huge(pmd_t pmd)
487{
488 return 0;
489}
490
491int pud_huge(pud_t pud)
492{
493 return 0;
494}
495
496struct page *
497follow_huge_pmd(struct mm_struct *mm, unsigned long address,
498 pmd_t *pmd, int write)
499{
500 BUG();
501 return NULL;
502}
503
504
505unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
506 unsigned long len, unsigned long pgoff,
507 unsigned long flags)
508{
509 struct hstate *hstate = hstate_file(file);
510 int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
511
512 if (!mmu_huge_psizes[mmu_psize])
513 return -EINVAL;
514 return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
515}
516
517unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
518{
519 unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
520
521 return 1UL << mmu_psize_to_shift(psize);
522}
523
524
525
526
527static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags,
528 pte_t pte, int trap, unsigned long sz)
529{
530 struct page *page;
531 int i;
532
533 if (!pfn_valid(pte_pfn(pte)))
534 return rflags;
535
536 page = pte_page(pte);
537
538
539 if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) {
540 if (trap == 0x400) {
541 for (i = 0; i < (sz / PAGE_SIZE); i++)
542 __flush_dcache_icache(page_address(page+i));
543 set_bit(PG_arch_1, &page->flags);
544 } else {
545 rflags |= HPTE_R_N;
546 }
547 }
548 return rflags;
549}
550
551int hash_huge_page(struct mm_struct *mm, unsigned long access,
552 unsigned long ea, unsigned long vsid, int local,
553 unsigned long trap)
554{
555 pte_t *ptep;
556 unsigned long old_pte, new_pte;
557 unsigned long va, rflags, pa, sz;
558 long slot;
559 int err = 1;
560 int ssize = user_segment_size(ea);
561 unsigned int mmu_psize;
562 int shift;
563 mmu_psize = get_slice_psize(mm, ea);
564
565 if (!mmu_huge_psizes[mmu_psize])
566 goto out;
567 ptep = huge_pte_offset(mm, ea);
568
569
570 va = hpt_va(ea, vsid, ssize);
571
572
573
574
575
576 if (unlikely(!ptep || pte_none(*ptep)))
577 goto out;
578
579
580
581
582
583 if (unlikely(access & ~pte_val(*ptep)))
584 goto out;
585
586
587
588
589
590
591
592
593
594
595
596
597
598 do {
599 old_pte = pte_val(*ptep);
600 if (old_pte & _PAGE_BUSY)
601 goto out;
602 new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
603 } while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
604 old_pte, new_pte));
605
606 rflags = 0x2 | (!(new_pte & _PAGE_RW));
607
608 rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
609 shift = mmu_psize_to_shift(mmu_psize);
610 sz = ((1UL) << shift);
611 if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
612
613
614 rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte),
615 trap, sz);
616
617
618 if (unlikely(old_pte & _PAGE_HASHPTE)) {
619
620 unsigned long hash, slot;
621
622 hash = hpt_hash(va, shift, ssize);
623 if (old_pte & _PAGE_F_SECOND)
624 hash = ~hash;
625 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
626 slot += (old_pte & _PAGE_F_GIX) >> 12;
627
628 if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_psize,
629 ssize, local) == -1)
630 old_pte &= ~_PAGE_HPTEFLAGS;
631 }
632
633 if (likely(!(old_pte & _PAGE_HASHPTE))) {
634 unsigned long hash = hpt_hash(va, shift, ssize);
635 unsigned long hpte_group;
636
637 pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
638
639repeat:
640 hpte_group = ((hash & htab_hash_mask) *
641 HPTES_PER_GROUP) & ~0x7UL;
642
643
644#ifdef CONFIG_PPC_64K_PAGES
645 new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HPTE_SUB0;
646#else
647 new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
648#endif
649
650 rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
651 _PAGE_COHERENT | _PAGE_GUARDED));
652
653
654 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0,
655 mmu_psize, ssize);
656
657
658 if (unlikely(slot == -1)) {
659 hpte_group = ((~hash & htab_hash_mask) *
660 HPTES_PER_GROUP) & ~0x7UL;
661 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags,
662 HPTE_V_SECONDARY,
663 mmu_psize, ssize);
664 if (slot == -1) {
665 if (mftb() & 0x1)
666 hpte_group = ((hash & htab_hash_mask) *
667 HPTES_PER_GROUP)&~0x7UL;
668
669 ppc_md.hpte_remove(hpte_group);
670 goto repeat;
671 }
672 }
673
674 if (unlikely(slot == -2))
675 panic("hash_huge_page: pte_insert failed\n");
676
677 new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX);
678 }
679
680
681
682
683 *ptep = __pte(new_pte & ~_PAGE_BUSY);
684
685 err = 0;
686
687 out:
688 return err;
689}
690
691static void __init set_huge_psize(int psize)
692{
693
694
695 if (mmu_psize_defs[psize].shift &&
696 mmu_psize_defs[psize].shift < SID_SHIFT_1T &&
697 (mmu_psize_defs[psize].shift > MIN_HUGEPTE_SHIFT ||
698 mmu_psize_defs[psize].shift == PAGE_SHIFT_64K ||
699 mmu_psize_defs[psize].shift == PAGE_SHIFT_16G)) {
700
701
702 if (mmu_huge_psizes[psize] ||
703 mmu_psize_defs[psize].shift == PAGE_SHIFT)
704 return;
705 if (WARN_ON(HUGEPTE_CACHE_NAME(psize) == NULL))
706 return;
707 hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT);
708
709 switch (mmu_psize_defs[psize].shift) {
710 case PAGE_SHIFT_64K:
711
712
713
714 hugepte_shift[psize] = PMD_SHIFT;
715 break;
716 case PAGE_SHIFT_16M:
717
718
719 if (PAGE_SHIFT == PAGE_SHIFT_64K)
720 hugepte_shift[psize] = PMD_SHIFT;
721 else
722 hugepte_shift[psize] = PUD_SHIFT;
723 break;
724 case PAGE_SHIFT_16G:
725
726 hugepte_shift[psize] = PGDIR_SHIFT;
727 break;
728 }
729 hugepte_shift[psize] -= mmu_psize_defs[psize].shift;
730 } else
731 hugepte_shift[psize] = 0;
732}
733
734static int __init hugepage_setup_sz(char *str)
735{
736 unsigned long long size;
737 int mmu_psize;
738 int shift;
739
740 size = memparse(str, &str);
741
742 shift = __ffs(size);
743 mmu_psize = shift_to_mmu_psize(shift);
744 if (mmu_psize >= 0 && mmu_psize_defs[mmu_psize].shift)
745 set_huge_psize(mmu_psize);
746 else
747 printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size);
748
749 return 1;
750}
751__setup("hugepagesz=", hugepage_setup_sz);
752
753static int __init hugetlbpage_init(void)
754{
755 unsigned int psize;
756
757 if (!cpu_has_feature(CPU_FTR_16M_PAGE))
758 return -ENODEV;
759
760
761
762
763
764 set_huge_psize(MMU_PAGE_16M);
765 set_huge_psize(MMU_PAGE_16G);
766
767
768
769
770#ifndef CONFIG_SPU_FS_64K_LS
771 set_huge_psize(MMU_PAGE_64K);
772#endif
773
774 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
775 if (mmu_huge_psizes[psize]) {
776 pgtable_cache[HUGE_PGTABLE_INDEX(psize)] =
777 kmem_cache_create(
778 HUGEPTE_CACHE_NAME(psize),
779 HUGEPTE_TABLE_SIZE(psize),
780 HUGEPTE_TABLE_SIZE(psize),
781 0,
782 NULL);
783 if (!pgtable_cache[HUGE_PGTABLE_INDEX(psize)])
784 panic("hugetlbpage_init(): could not create %s"\
785 "\n", HUGEPTE_CACHE_NAME(psize));
786 }
787 }
788
789 return 0;
790}
791
792module_init(hugetlbpage_init);
793