1
2#include <linux/mm.h>
3#include <linux/gfp.h>
4#include <linux/hugetlb.h>
5#include <asm/pgalloc.h>
6#include <asm/pgtable.h>
7#include <asm/tlb.h>
8#include <asm/fixmap.h>
9#include <asm/mtrr.h>
10
11#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK
12phys_addr_t physical_mask __ro_after_init = (1ULL << __PHYSICAL_MASK_SHIFT) - 1;
13EXPORT_SYMBOL(physical_mask);
14#endif
15
16#define PGALLOC_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO)
17
18#ifdef CONFIG_HIGHPTE
19#define PGALLOC_USER_GFP __GFP_HIGHMEM
20#else
21#define PGALLOC_USER_GFP 0
22#endif
23
24gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
25
26pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
27{
28 return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
29}
30
31pgtable_t pte_alloc_one(struct mm_struct *mm)
32{
33 struct page *pte;
34
35 pte = alloc_pages(__userpte_alloc_gfp, 0);
36 if (!pte)
37 return NULL;
38 if (!pgtable_page_ctor(pte)) {
39 __free_page(pte);
40 return NULL;
41 }
42 return pte;
43}
44
45static int __init setup_userpte(char *arg)
46{
47 if (!arg)
48 return -EINVAL;
49
50
51
52
53
54 if (strcmp(arg, "nohigh") == 0)
55 __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
56 else
57 return -EINVAL;
58 return 0;
59}
60early_param("userpte", setup_userpte);
61
62void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
63{
64 pgtable_page_dtor(pte);
65 paravirt_release_pte(page_to_pfn(pte));
66 paravirt_tlb_remove_table(tlb, pte);
67}
68
69#if CONFIG_PGTABLE_LEVELS > 2
70void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
71{
72 struct page *page = virt_to_page(pmd);
73 paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
74
75
76
77
78#ifdef CONFIG_X86_PAE
79 tlb->need_flush_all = 1;
80#endif
81 pgtable_pmd_page_dtor(page);
82 paravirt_tlb_remove_table(tlb, page);
83}
84
85#if CONFIG_PGTABLE_LEVELS > 3
86void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
87{
88 paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
89 paravirt_tlb_remove_table(tlb, virt_to_page(pud));
90}
91
92#if CONFIG_PGTABLE_LEVELS > 4
93void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d)
94{
95 paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT);
96 paravirt_tlb_remove_table(tlb, virt_to_page(p4d));
97}
98#endif
99#endif
100#endif
101
102static inline void pgd_list_add(pgd_t *pgd)
103{
104 struct page *page = virt_to_page(pgd);
105
106 list_add(&page->lru, &pgd_list);
107}
108
109static inline void pgd_list_del(pgd_t *pgd)
110{
111 struct page *page = virt_to_page(pgd);
112
113 list_del(&page->lru);
114}
115
116#define UNSHARED_PTRS_PER_PGD \
117 (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
118
119
120static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
121{
122 virt_to_page(pgd)->pt_mm = mm;
123}
124
125struct mm_struct *pgd_page_get_mm(struct page *page)
126{
127 return page->pt_mm;
128}
129
130static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
131{
132
133
134
135 if (CONFIG_PGTABLE_LEVELS == 2 ||
136 (CONFIG_PGTABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
137 CONFIG_PGTABLE_LEVELS >= 4) {
138 clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
139 swapper_pg_dir + KERNEL_PGD_BOUNDARY,
140 KERNEL_PGD_PTRS);
141 }
142
143
144 if (!SHARED_KERNEL_PMD) {
145 pgd_set_mm(pgd, mm);
146 pgd_list_add(pgd);
147 }
148}
149
150static void pgd_dtor(pgd_t *pgd)
151{
152 if (SHARED_KERNEL_PMD)
153 return;
154
155 spin_lock(&pgd_lock);
156 pgd_list_del(pgd);
157 spin_unlock(&pgd_lock);
158}
159
160
161
162
163
164
165
166
167
168
169
170
171#ifdef CONFIG_X86_PAE
172
173
174
175
176
177
178
179
180
181
182
183#define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD
184
185void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
186{
187 paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
188
189
190
191 set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
192
193
194
195
196
197
198
199 flush_tlb_mm(mm);
200}
201#else
202
203
204#define PREALLOCATED_PMDS 0
205
206#endif
207
208static void free_pmds(struct mm_struct *mm, pmd_t *pmds[])
209{
210 int i;
211
212 for(i = 0; i < PREALLOCATED_PMDS; i++)
213 if (pmds[i]) {
214 pgtable_pmd_page_dtor(virt_to_page(pmds[i]));
215 free_page((unsigned long)pmds[i]);
216 mm_dec_nr_pmds(mm);
217 }
218}
219
220static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[])
221{
222 int i;
223 bool failed = false;
224 gfp_t gfp = PGALLOC_GFP;
225
226 if (mm == &init_mm)
227 gfp &= ~__GFP_ACCOUNT;
228
229 for(i = 0; i < PREALLOCATED_PMDS; i++) {
230 pmd_t *pmd = (pmd_t *)__get_free_page(gfp);
231 if (!pmd)
232 failed = true;
233 if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) {
234 free_page((unsigned long)pmd);
235 pmd = NULL;
236 failed = true;
237 }
238 if (pmd)
239 mm_inc_nr_pmds(mm);
240 pmds[i] = pmd;
241 }
242
243 if (failed) {
244 free_pmds(mm, pmds);
245 return -ENOMEM;
246 }
247
248 return 0;
249}
250
251
252
253
254
255
256
257static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
258{
259 int i;
260
261 for(i = 0; i < PREALLOCATED_PMDS; i++) {
262 pgd_t pgd = pgdp[i];
263
264 if (pgd_val(pgd) != 0) {
265 pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
266
267 pgdp[i] = native_make_pgd(0);
268
269 paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
270 pmd_free(mm, pmd);
271 mm_dec_nr_pmds(mm);
272 }
273 }
274}
275
276static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
277{
278 p4d_t *p4d;
279 pud_t *pud;
280 int i;
281
282 if (PREALLOCATED_PMDS == 0)
283 return;
284
285 p4d = p4d_offset(pgd, 0);
286 pud = pud_offset(p4d, 0);
287
288 for (i = 0; i < PREALLOCATED_PMDS; i++, pud++) {
289 pmd_t *pmd = pmds[i];
290
291 if (i >= KERNEL_PGD_BOUNDARY)
292 memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
293 sizeof(pmd_t) * PTRS_PER_PMD);
294
295 pud_populate(mm, pud, pmd);
296 }
297}
298
299
300
301
302
303
304
305
306#ifdef CONFIG_X86_PAE
307
308#include <linux/slab.h>
309
310#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
311#define PGD_ALIGN 32
312
313static struct kmem_cache *pgd_cache;
314
315void __init pgd_cache_init(void)
316{
317
318
319
320
321 if (!SHARED_KERNEL_PMD)
322 return;
323
324
325
326
327
328
329
330 pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
331 SLAB_PANIC, NULL);
332}
333
334static inline pgd_t *_pgd_alloc(void)
335{
336
337
338
339
340 if (!SHARED_KERNEL_PMD)
341 return (pgd_t *)__get_free_page(PGALLOC_GFP);
342
343
344
345
346
347 return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
348}
349
350static inline void _pgd_free(pgd_t *pgd)
351{
352 if (!SHARED_KERNEL_PMD)
353 free_page((unsigned long)pgd);
354 else
355 kmem_cache_free(pgd_cache, pgd);
356}
357#else
358
359void __init pgd_cache_init(void)
360{
361}
362
363static inline pgd_t *_pgd_alloc(void)
364{
365 return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
366}
367
368static inline void _pgd_free(pgd_t *pgd)
369{
370 free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
371}
372#endif
373
374pgd_t *pgd_alloc(struct mm_struct *mm)
375{
376 pgd_t *pgd;
377 pmd_t *pmds[PREALLOCATED_PMDS];
378
379 pgd = _pgd_alloc();
380
381 if (pgd == NULL)
382 goto out;
383
384 mm->pgd = pgd;
385
386 if (preallocate_pmds(mm, pmds) != 0)
387 goto out_free_pgd;
388
389 if (paravirt_pgd_alloc(mm) != 0)
390 goto out_free_pmds;
391
392
393
394
395
396
397 spin_lock(&pgd_lock);
398
399 pgd_ctor(mm, pgd);
400 pgd_prepopulate_pmd(mm, pgd, pmds);
401
402 spin_unlock(&pgd_lock);
403
404 return pgd;
405
406out_free_pmds:
407 free_pmds(mm, pmds);
408out_free_pgd:
409 _pgd_free(pgd);
410out:
411 return NULL;
412}
413
414void pgd_free(struct mm_struct *mm, pgd_t *pgd)
415{
416 pgd_mop_up_pmds(mm, pgd);
417 pgd_dtor(pgd);
418 paravirt_pgd_free(mm, pgd);
419 _pgd_free(pgd);
420}
421
422
423
424
425
426
427
428
429int ptep_set_access_flags(struct vm_area_struct *vma,
430 unsigned long address, pte_t *ptep,
431 pte_t entry, int dirty)
432{
433 int changed = !pte_same(*ptep, entry);
434
435 if (changed && dirty)
436 *ptep = entry;
437
438 return changed;
439}
440
441#ifdef CONFIG_TRANSPARENT_HUGEPAGE
442int pmdp_set_access_flags(struct vm_area_struct *vma,
443 unsigned long address, pmd_t *pmdp,
444 pmd_t entry, int dirty)
445{
446 int changed = !pmd_same(*pmdp, entry);
447
448 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
449
450 if (changed && dirty) {
451 *pmdp = entry;
452
453
454
455
456
457
458 }
459
460 return changed;
461}
462
463int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
464 pud_t *pudp, pud_t entry, int dirty)
465{
466 int changed = !pud_same(*pudp, entry);
467
468 VM_BUG_ON(address & ~HPAGE_PUD_MASK);
469
470 if (changed && dirty) {
471 *pudp = entry;
472
473
474
475
476
477
478 }
479
480 return changed;
481}
482#endif
483
484int ptep_test_and_clear_young(struct vm_area_struct *vma,
485 unsigned long addr, pte_t *ptep)
486{
487 int ret = 0;
488
489 if (pte_young(*ptep))
490 ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
491 (unsigned long *) &ptep->pte);
492
493 return ret;
494}
495
496#ifdef CONFIG_TRANSPARENT_HUGEPAGE
497int pmdp_test_and_clear_young(struct vm_area_struct *vma,
498 unsigned long addr, pmd_t *pmdp)
499{
500 int ret = 0;
501
502 if (pmd_young(*pmdp))
503 ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
504 (unsigned long *)pmdp);
505
506 return ret;
507}
508int pudp_test_and_clear_young(struct vm_area_struct *vma,
509 unsigned long addr, pud_t *pudp)
510{
511 int ret = 0;
512
513 if (pud_young(*pudp))
514 ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
515 (unsigned long *)pudp);
516
517 return ret;
518}
519#endif
520
521int ptep_clear_flush_young(struct vm_area_struct *vma,
522 unsigned long address, pte_t *ptep)
523{
524
525
526
527
528
529
530
531
532
533
534
535
536
537 return ptep_test_and_clear_young(vma, address, ptep);
538}
539
540#ifdef CONFIG_TRANSPARENT_HUGEPAGE
541int pmdp_clear_flush_young(struct vm_area_struct *vma,
542 unsigned long address, pmd_t *pmdp)
543{
544 int young;
545
546 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
547
548 young = pmdp_test_and_clear_young(vma, address, pmdp);
549 if (young)
550 flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
551
552 return young;
553}
554#endif
555
556
557
558
559
560
561
562
563void __init reserve_top_address(unsigned long reserve)
564{
565#ifdef CONFIG_X86_32
566 BUG_ON(fixmaps_set > 0);
567 __FIXADDR_TOP = round_down(-reserve, 1 << PMD_SHIFT) - PAGE_SIZE;
568 printk(KERN_INFO "Reserving virtual address space above 0x%08lx (rounded to 0x%08lx)\n",
569 -reserve, __FIXADDR_TOP + PAGE_SIZE);
570#endif
571}
572
573int fixmaps_set;
574
575void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
576{
577 unsigned long address = __fix_to_virt(idx);
578
579#ifdef CONFIG_X86_64
580
581
582
583
584 BUILD_BUG_ON(__end_of_permanent_fixed_addresses >
585 (FIXMAP_PMD_NUM * PTRS_PER_PTE));
586#endif
587
588 if (idx >= __end_of_fixed_addresses) {
589 BUG();
590 return;
591 }
592 set_pte_vaddr(address, pte);
593 fixmaps_set++;
594}
595
596void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys,
597 pgprot_t flags)
598{
599
600 pgprot_val(flags) &= __default_kernel_pte_mask;
601
602 __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags));
603}
604
605#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
606#ifdef CONFIG_X86_5LEVEL
607
608
609
610
611
612int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
613{
614 return 0;
615}
616
617
618
619
620
621
622int p4d_clear_huge(p4d_t *p4d)
623{
624 return 0;
625}
626#endif
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
647{
648 u8 mtrr, uniform;
649
650 mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform);
651 if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
652 (mtrr != MTRR_TYPE_WRBACK))
653 return 0;
654
655
656 if (pud_present(*pud) && !pud_huge(*pud))
657 return 0;
658
659 prot = pgprot_4k_2_large(prot);
660
661 set_pte((pte_t *)pud, pfn_pte(
662 (u64)addr >> PAGE_SHIFT,
663 __pgprot(pgprot_val(prot) | _PAGE_PSE)));
664
665 return 1;
666}
667
668
669
670
671
672
673
674
675int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
676{
677 u8 mtrr, uniform;
678
679 mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform);
680 if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
681 (mtrr != MTRR_TYPE_WRBACK)) {
682 pr_warn_once("%s: Cannot satisfy [mem %#010llx-%#010llx] with a huge-page mapping due to MTRR override.\n",
683 __func__, addr, addr + PMD_SIZE);
684 return 0;
685 }
686
687
688 if (pmd_present(*pmd) && !pmd_huge(*pmd))
689 return 0;
690
691 prot = pgprot_4k_2_large(prot);
692
693 set_pte((pte_t *)pmd, pfn_pte(
694 (u64)addr >> PAGE_SHIFT,
695 __pgprot(pgprot_val(prot) | _PAGE_PSE)));
696
697 return 1;
698}
699
700
701
702
703
704
705int pud_clear_huge(pud_t *pud)
706{
707 if (pud_large(*pud)) {
708 pud_clear(pud);
709 return 1;
710 }
711
712 return 0;
713}
714
715
716
717
718
719
720int pmd_clear_huge(pmd_t *pmd)
721{
722 if (pmd_large(*pmd)) {
723 pmd_clear(pmd);
724 return 1;
725 }
726
727 return 0;
728}
729
730#ifdef CONFIG_X86_64
731
732
733
734
735
736
737
738
739
740
741int pud_free_pmd_page(pud_t *pud, unsigned long addr)
742{
743 pmd_t *pmd, *pmd_sv;
744 pte_t *pte;
745 int i;
746
747 if (pud_none(*pud))
748 return 1;
749
750 pmd = (pmd_t *)pud_page_vaddr(*pud);
751 pmd_sv = (pmd_t *)__get_free_page(GFP_KERNEL);
752 if (!pmd_sv)
753 return 0;
754
755 for (i = 0; i < PTRS_PER_PMD; i++) {
756 pmd_sv[i] = pmd[i];
757 if (!pmd_none(pmd[i]))
758 pmd_clear(&pmd[i]);
759 }
760
761 pud_clear(pud);
762
763
764 flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);
765
766 for (i = 0; i < PTRS_PER_PMD; i++) {
767 if (!pmd_none(pmd_sv[i])) {
768 pte = (pte_t *)pmd_page_vaddr(pmd_sv[i]);
769 free_page((unsigned long)pte);
770 }
771 }
772
773 free_page((unsigned long)pmd_sv);
774
775 pgtable_pmd_page_dtor(virt_to_page(pmd));
776 free_page((unsigned long)pmd);
777
778 return 1;
779}
780
781
782
783
784
785
786
787
788
789int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
790{
791 pte_t *pte;
792
793 if (pmd_none(*pmd))
794 return 1;
795
796 pte = (pte_t *)pmd_page_vaddr(*pmd);
797 pmd_clear(pmd);
798
799
800 flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);
801
802 free_page((unsigned long)pte);
803
804 return 1;
805}
806
807#else
808
809int pud_free_pmd_page(pud_t *pud, unsigned long addr)
810{
811 return pud_none(*pud);
812}
813
814
815
816
817
818int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
819{
820 return pmd_none(*pmd);
821}
822
823#endif
824#endif
825