1
2#include <linux/mm.h>
3#include <linux/gfp.h>
4#include <linux/hugetlb.h>
5#include <asm/pgalloc.h>
6#include <asm/pgtable.h>
7#include <asm/tlb.h>
8#include <asm/fixmap.h>
9#include <asm/mtrr.h>
10
11#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK
12phys_addr_t physical_mask __ro_after_init = (1ULL << __PHYSICAL_MASK_SHIFT) - 1;
13EXPORT_SYMBOL(physical_mask);
14#endif
15
16#ifdef CONFIG_HIGHPTE
17#define PGTABLE_HIGHMEM __GFP_HIGHMEM
18#else
19#define PGTABLE_HIGHMEM 0
20#endif
21
22gfp_t __userpte_alloc_gfp = GFP_PGTABLE_USER | PGTABLE_HIGHMEM;
23
24pgtable_t pte_alloc_one(struct mm_struct *mm)
25{
26 return __pte_alloc_one(mm, __userpte_alloc_gfp);
27}
28
29static int __init setup_userpte(char *arg)
30{
31 if (!arg)
32 return -EINVAL;
33
34
35
36
37
38 if (strcmp(arg, "nohigh") == 0)
39 __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
40 else
41 return -EINVAL;
42 return 0;
43}
44early_param("userpte", setup_userpte);
45
46void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
47{
48 pgtable_page_dtor(pte);
49 paravirt_release_pte(page_to_pfn(pte));
50 paravirt_tlb_remove_table(tlb, pte);
51}
52
53#if CONFIG_PGTABLE_LEVELS > 2
54void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
55{
56 struct page *page = virt_to_page(pmd);
57 paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
58
59
60
61
62#ifdef CONFIG_X86_PAE
63 tlb->need_flush_all = 1;
64#endif
65 pgtable_pmd_page_dtor(page);
66 paravirt_tlb_remove_table(tlb, page);
67}
68
69#if CONFIG_PGTABLE_LEVELS > 3
70void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
71{
72 paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
73 paravirt_tlb_remove_table(tlb, virt_to_page(pud));
74}
75
76#if CONFIG_PGTABLE_LEVELS > 4
77void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d)
78{
79 paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT);
80 paravirt_tlb_remove_table(tlb, virt_to_page(p4d));
81}
82#endif
83#endif
84#endif
85
86static inline void pgd_list_add(pgd_t *pgd)
87{
88 struct page *page = virt_to_page(pgd);
89
90 list_add(&page->lru, &pgd_list);
91}
92
93static inline void pgd_list_del(pgd_t *pgd)
94{
95 struct page *page = virt_to_page(pgd);
96
97 list_del(&page->lru);
98}
99
100#define UNSHARED_PTRS_PER_PGD \
101 (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
102#define MAX_UNSHARED_PTRS_PER_PGD \
103 max_t(size_t, KERNEL_PGD_BOUNDARY, PTRS_PER_PGD)
104
105
106static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
107{
108 virt_to_page(pgd)->pt_mm = mm;
109}
110
111struct mm_struct *pgd_page_get_mm(struct page *page)
112{
113 return page->pt_mm;
114}
115
116static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
117{
118
119
120
121 if (CONFIG_PGTABLE_LEVELS == 2 ||
122 (CONFIG_PGTABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
123 CONFIG_PGTABLE_LEVELS >= 4) {
124 clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
125 swapper_pg_dir + KERNEL_PGD_BOUNDARY,
126 KERNEL_PGD_PTRS);
127 }
128
129
130 if (!SHARED_KERNEL_PMD) {
131 pgd_set_mm(pgd, mm);
132 pgd_list_add(pgd);
133 }
134}
135
136static void pgd_dtor(pgd_t *pgd)
137{
138 if (SHARED_KERNEL_PMD)
139 return;
140
141 spin_lock(&pgd_lock);
142 pgd_list_del(pgd);
143 spin_unlock(&pgd_lock);
144}
145
146
147
148
149
150
151
152
153
154
155
156
157#ifdef CONFIG_X86_PAE
158
159
160
161
162
163
164
165
166
167
168
169#define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD
170#define MAX_PREALLOCATED_PMDS MAX_UNSHARED_PTRS_PER_PGD
171
172
173
174
175
176
177#define PREALLOCATED_USER_PMDS (boot_cpu_has(X86_FEATURE_PTI) ? \
178 KERNEL_PGD_PTRS : 0)
179#define MAX_PREALLOCATED_USER_PMDS KERNEL_PGD_PTRS
180
181void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
182{
183 paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
184
185
186
187 set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
188
189
190
191
192
193
194
195 flush_tlb_mm(mm);
196}
197#else
198
199
200#define PREALLOCATED_PMDS 0
201#define MAX_PREALLOCATED_PMDS 0
202#define PREALLOCATED_USER_PMDS 0
203#define MAX_PREALLOCATED_USER_PMDS 0
204#endif
205
206static void free_pmds(struct mm_struct *mm, pmd_t *pmds[], int count)
207{
208 int i;
209
210 for (i = 0; i < count; i++)
211 if (pmds[i]) {
212 pgtable_pmd_page_dtor(virt_to_page(pmds[i]));
213 free_page((unsigned long)pmds[i]);
214 mm_dec_nr_pmds(mm);
215 }
216}
217
218static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[], int count)
219{
220 int i;
221 bool failed = false;
222 gfp_t gfp = GFP_PGTABLE_USER;
223
224 if (mm == &init_mm)
225 gfp &= ~__GFP_ACCOUNT;
226
227 for (i = 0; i < count; i++) {
228 pmd_t *pmd = (pmd_t *)__get_free_page(gfp);
229 if (!pmd)
230 failed = true;
231 if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) {
232 free_page((unsigned long)pmd);
233 pmd = NULL;
234 failed = true;
235 }
236 if (pmd)
237 mm_inc_nr_pmds(mm);
238 pmds[i] = pmd;
239 }
240
241 if (failed) {
242 free_pmds(mm, pmds, count);
243 return -ENOMEM;
244 }
245
246 return 0;
247}
248
249
250
251
252
253
254
255static void mop_up_one_pmd(struct mm_struct *mm, pgd_t *pgdp)
256{
257 pgd_t pgd = *pgdp;
258
259 if (pgd_val(pgd) != 0) {
260 pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
261
262 pgd_clear(pgdp);
263
264 paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
265 pmd_free(mm, pmd);
266 mm_dec_nr_pmds(mm);
267 }
268}
269
270static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
271{
272 int i;
273
274 for (i = 0; i < PREALLOCATED_PMDS; i++)
275 mop_up_one_pmd(mm, &pgdp[i]);
276
277#ifdef CONFIG_PAGE_TABLE_ISOLATION
278
279 if (!boot_cpu_has(X86_FEATURE_PTI))
280 return;
281
282 pgdp = kernel_to_user_pgdp(pgdp);
283
284 for (i = 0; i < PREALLOCATED_USER_PMDS; i++)
285 mop_up_one_pmd(mm, &pgdp[i + KERNEL_PGD_BOUNDARY]);
286#endif
287}
288
289static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
290{
291 p4d_t *p4d;
292 pud_t *pud;
293 int i;
294
295 if (PREALLOCATED_PMDS == 0)
296 return;
297
298 p4d = p4d_offset(pgd, 0);
299 pud = pud_offset(p4d, 0);
300
301 for (i = 0; i < PREALLOCATED_PMDS; i++, pud++) {
302 pmd_t *pmd = pmds[i];
303
304 if (i >= KERNEL_PGD_BOUNDARY)
305 memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
306 sizeof(pmd_t) * PTRS_PER_PMD);
307
308 pud_populate(mm, pud, pmd);
309 }
310}
311
312#ifdef CONFIG_PAGE_TABLE_ISOLATION
313static void pgd_prepopulate_user_pmd(struct mm_struct *mm,
314 pgd_t *k_pgd, pmd_t *pmds[])
315{
316 pgd_t *s_pgd = kernel_to_user_pgdp(swapper_pg_dir);
317 pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd);
318 p4d_t *u_p4d;
319 pud_t *u_pud;
320 int i;
321
322 u_p4d = p4d_offset(u_pgd, 0);
323 u_pud = pud_offset(u_p4d, 0);
324
325 s_pgd += KERNEL_PGD_BOUNDARY;
326 u_pud += KERNEL_PGD_BOUNDARY;
327
328 for (i = 0; i < PREALLOCATED_USER_PMDS; i++, u_pud++, s_pgd++) {
329 pmd_t *pmd = pmds[i];
330
331 memcpy(pmd, (pmd_t *)pgd_page_vaddr(*s_pgd),
332 sizeof(pmd_t) * PTRS_PER_PMD);
333
334 pud_populate(mm, u_pud, pmd);
335 }
336
337}
338#else
339static void pgd_prepopulate_user_pmd(struct mm_struct *mm,
340 pgd_t *k_pgd, pmd_t *pmds[])
341{
342}
343#endif
344
345
346
347
348
349
350
351#ifdef CONFIG_X86_PAE
352
353#include <linux/slab.h>
354
355#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
356#define PGD_ALIGN 32
357
358static struct kmem_cache *pgd_cache;
359
360void __init pgd_cache_init(void)
361{
362
363
364
365
366 if (!SHARED_KERNEL_PMD)
367 return;
368
369
370
371
372
373
374
375 pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
376 SLAB_PANIC, NULL);
377}
378
379static inline pgd_t *_pgd_alloc(void)
380{
381
382
383
384
385 if (!SHARED_KERNEL_PMD)
386 return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER,
387 PGD_ALLOCATION_ORDER);
388
389
390
391
392
393 return kmem_cache_alloc(pgd_cache, GFP_PGTABLE_USER);
394}
395
396static inline void _pgd_free(pgd_t *pgd)
397{
398 if (!SHARED_KERNEL_PMD)
399 free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
400 else
401 kmem_cache_free(pgd_cache, pgd);
402}
403#else
404
405void __init pgd_cache_init(void)
406{
407}
408
409static inline pgd_t *_pgd_alloc(void)
410{
411 return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER,
412 PGD_ALLOCATION_ORDER);
413}
414
415static inline void _pgd_free(pgd_t *pgd)
416{
417 free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
418}
419#endif
420
421pgd_t *pgd_alloc(struct mm_struct *mm)
422{
423 pgd_t *pgd;
424 pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS];
425 pmd_t *pmds[MAX_PREALLOCATED_PMDS];
426
427 pgd = _pgd_alloc();
428
429 if (pgd == NULL)
430 goto out;
431
432 mm->pgd = pgd;
433
434 if (preallocate_pmds(mm, pmds, PREALLOCATED_PMDS) != 0)
435 goto out_free_pgd;
436
437 if (preallocate_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS) != 0)
438 goto out_free_pmds;
439
440 if (paravirt_pgd_alloc(mm) != 0)
441 goto out_free_user_pmds;
442
443
444
445
446
447
448 spin_lock(&pgd_lock);
449
450 pgd_ctor(mm, pgd);
451 pgd_prepopulate_pmd(mm, pgd, pmds);
452 pgd_prepopulate_user_pmd(mm, pgd, u_pmds);
453
454 spin_unlock(&pgd_lock);
455
456 return pgd;
457
458out_free_user_pmds:
459 free_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS);
460out_free_pmds:
461 free_pmds(mm, pmds, PREALLOCATED_PMDS);
462out_free_pgd:
463 _pgd_free(pgd);
464out:
465 return NULL;
466}
467
468void pgd_free(struct mm_struct *mm, pgd_t *pgd)
469{
470 pgd_mop_up_pmds(mm, pgd);
471 pgd_dtor(pgd);
472 paravirt_pgd_free(mm, pgd);
473 _pgd_free(pgd);
474}
475
476
477
478
479
480
481
482
483int ptep_set_access_flags(struct vm_area_struct *vma,
484 unsigned long address, pte_t *ptep,
485 pte_t entry, int dirty)
486{
487 int changed = !pte_same(*ptep, entry);
488
489 if (changed && dirty)
490 set_pte(ptep, entry);
491
492 return changed;
493}
494
495#ifdef CONFIG_TRANSPARENT_HUGEPAGE
496int pmdp_set_access_flags(struct vm_area_struct *vma,
497 unsigned long address, pmd_t *pmdp,
498 pmd_t entry, int dirty)
499{
500 int changed = !pmd_same(*pmdp, entry);
501
502 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
503
504 if (changed && dirty) {
505 set_pmd(pmdp, entry);
506
507
508
509
510
511
512 }
513
514 return changed;
515}
516
517int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
518 pud_t *pudp, pud_t entry, int dirty)
519{
520 int changed = !pud_same(*pudp, entry);
521
522 VM_BUG_ON(address & ~HPAGE_PUD_MASK);
523
524 if (changed && dirty) {
525 set_pud(pudp, entry);
526
527
528
529
530
531
532 }
533
534 return changed;
535}
536#endif
537
538int ptep_test_and_clear_young(struct vm_area_struct *vma,
539 unsigned long addr, pte_t *ptep)
540{
541 int ret = 0;
542
543 if (pte_young(*ptep))
544 ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
545 (unsigned long *) &ptep->pte);
546
547 return ret;
548}
549
550#ifdef CONFIG_TRANSPARENT_HUGEPAGE
551int pmdp_test_and_clear_young(struct vm_area_struct *vma,
552 unsigned long addr, pmd_t *pmdp)
553{
554 int ret = 0;
555
556 if (pmd_young(*pmdp))
557 ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
558 (unsigned long *)pmdp);
559
560 return ret;
561}
562int pudp_test_and_clear_young(struct vm_area_struct *vma,
563 unsigned long addr, pud_t *pudp)
564{
565 int ret = 0;
566
567 if (pud_young(*pudp))
568 ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
569 (unsigned long *)pudp);
570
571 return ret;
572}
573#endif
574
575int ptep_clear_flush_young(struct vm_area_struct *vma,
576 unsigned long address, pte_t *ptep)
577{
578
579
580
581
582
583
584
585
586
587
588
589
590
591 return ptep_test_and_clear_young(vma, address, ptep);
592}
593
594#ifdef CONFIG_TRANSPARENT_HUGEPAGE
595int pmdp_clear_flush_young(struct vm_area_struct *vma,
596 unsigned long address, pmd_t *pmdp)
597{
598 int young;
599
600 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
601
602 young = pmdp_test_and_clear_young(vma, address, pmdp);
603 if (young)
604 flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
605
606 return young;
607}
608#endif
609
610
611
612
613
614
615
616
617void __init reserve_top_address(unsigned long reserve)
618{
619#ifdef CONFIG_X86_32
620 BUG_ON(fixmaps_set > 0);
621 __FIXADDR_TOP = round_down(-reserve, 1 << PMD_SHIFT) - PAGE_SIZE;
622 printk(KERN_INFO "Reserving virtual address space above 0x%08lx (rounded to 0x%08lx)\n",
623 -reserve, __FIXADDR_TOP + PAGE_SIZE);
624#endif
625}
626
627int fixmaps_set;
628
629void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
630{
631 unsigned long address = __fix_to_virt(idx);
632
633#ifdef CONFIG_X86_64
634
635
636
637
638 BUILD_BUG_ON(__end_of_permanent_fixed_addresses >
639 (FIXMAP_PMD_NUM * PTRS_PER_PTE));
640#endif
641
642 if (idx >= __end_of_fixed_addresses) {
643 BUG();
644 return;
645 }
646 set_pte_vaddr(address, pte);
647 fixmaps_set++;
648}
649
650void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys,
651 pgprot_t flags)
652{
653
654 pgprot_val(flags) &= __default_kernel_pte_mask;
655
656 __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags));
657}
658
659#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
660#ifdef CONFIG_X86_5LEVEL
661
662
663
664
665
666int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
667{
668 return 0;
669}
670
671
672
673
674
675
676int p4d_clear_huge(p4d_t *p4d)
677{
678 return 0;
679}
680#endif
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
701{
702 u8 mtrr, uniform;
703
704 mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform);
705 if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
706 (mtrr != MTRR_TYPE_WRBACK))
707 return 0;
708
709
710 if (pud_present(*pud) && !pud_huge(*pud))
711 return 0;
712
713 prot = pgprot_4k_2_large(prot);
714
715 set_pte((pte_t *)pud, pfn_pte(
716 (u64)addr >> PAGE_SHIFT,
717 __pgprot(pgprot_val(prot) | _PAGE_PSE)));
718
719 return 1;
720}
721
722
723
724
725
726
727
728
729int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
730{
731 u8 mtrr, uniform;
732
733 mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform);
734 if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
735 (mtrr != MTRR_TYPE_WRBACK)) {
736 pr_warn_once("%s: Cannot satisfy [mem %#010llx-%#010llx] with a huge-page mapping due to MTRR override.\n",
737 __func__, addr, addr + PMD_SIZE);
738 return 0;
739 }
740
741
742 if (pmd_present(*pmd) && !pmd_huge(*pmd))
743 return 0;
744
745 prot = pgprot_4k_2_large(prot);
746
747 set_pte((pte_t *)pmd, pfn_pte(
748 (u64)addr >> PAGE_SHIFT,
749 __pgprot(pgprot_val(prot) | _PAGE_PSE)));
750
751 return 1;
752}
753
754
755
756
757
758
759int pud_clear_huge(pud_t *pud)
760{
761 if (pud_large(*pud)) {
762 pud_clear(pud);
763 return 1;
764 }
765
766 return 0;
767}
768
769
770
771
772
773
774int pmd_clear_huge(pmd_t *pmd)
775{
776 if (pmd_large(*pmd)) {
777 pmd_clear(pmd);
778 return 1;
779 }
780
781 return 0;
782}
783
784
785
786
787int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
788{
789 return 0;
790}
791
792#ifdef CONFIG_X86_64
793
794
795
796
797
798
799
800
801
802
803int pud_free_pmd_page(pud_t *pud, unsigned long addr)
804{
805 pmd_t *pmd, *pmd_sv;
806 pte_t *pte;
807 int i;
808
809 pmd = (pmd_t *)pud_page_vaddr(*pud);
810 pmd_sv = (pmd_t *)__get_free_page(GFP_KERNEL);
811 if (!pmd_sv)
812 return 0;
813
814 for (i = 0; i < PTRS_PER_PMD; i++) {
815 pmd_sv[i] = pmd[i];
816 if (!pmd_none(pmd[i]))
817 pmd_clear(&pmd[i]);
818 }
819
820 pud_clear(pud);
821
822
823 flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);
824
825 for (i = 0; i < PTRS_PER_PMD; i++) {
826 if (!pmd_none(pmd_sv[i])) {
827 pte = (pte_t *)pmd_page_vaddr(pmd_sv[i]);
828 free_page((unsigned long)pte);
829 }
830 }
831
832 free_page((unsigned long)pmd_sv);
833 free_page((unsigned long)pmd);
834
835 return 1;
836}
837
838
839
840
841
842
843
844
845
846int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
847{
848 pte_t *pte;
849
850 pte = (pte_t *)pmd_page_vaddr(*pmd);
851 pmd_clear(pmd);
852
853
854 flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1);
855
856 free_page((unsigned long)pte);
857
858 return 1;
859}
860
861#else
862
863int pud_free_pmd_page(pud_t *pud, unsigned long addr)
864{
865 return pud_none(*pud);
866}
867
868
869
870
871
872int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
873{
874 return pmd_none(*pmd);
875}
876
877#endif
878#endif
879