1#include <linux/mm.h>
2#include <linux/gfp.h>
3#include <asm/pgalloc.h>
4#include <asm/pgtable.h>
5#include <asm/tlb.h>
6#include <asm/fixmap.h>
7#include <asm/mtrr.h>
8
9#define PGALLOC_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | __GFP_ZERO)
10
11#ifdef CONFIG_HIGHPTE
12#define PGALLOC_USER_GFP __GFP_HIGHMEM
13#else
14#define PGALLOC_USER_GFP 0
15#endif
16
17gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
18
19pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
20{
21 return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
22}
23
24pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
25{
26 struct page *pte;
27
28 pte = alloc_pages(__userpte_alloc_gfp, 0);
29 if (!pte)
30 return NULL;
31 if (!pgtable_page_ctor(pte)) {
32 __free_page(pte);
33 return NULL;
34 }
35 return pte;
36}
37
38static int __init setup_userpte(char *arg)
39{
40 if (!arg)
41 return -EINVAL;
42
43
44
45
46
47 if (strcmp(arg, "nohigh") == 0)
48 __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
49 else
50 return -EINVAL;
51 return 0;
52}
53early_param("userpte", setup_userpte);
54
55void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
56{
57 pgtable_page_dtor(pte);
58 paravirt_release_pte(page_to_pfn(pte));
59 tlb_remove_page(tlb, pte);
60}
61
62#if CONFIG_PGTABLE_LEVELS > 2
63void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
64{
65 struct page *page = virt_to_page(pmd);
66 paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
67
68
69
70
71#ifdef CONFIG_X86_PAE
72 tlb->need_flush_all = 1;
73#endif
74 pgtable_pmd_page_dtor(page);
75 tlb_remove_page(tlb, page);
76}
77
78#if CONFIG_PGTABLE_LEVELS > 3
79void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
80{
81 paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
82 tlb_remove_page(tlb, virt_to_page(pud));
83}
84
85#if CONFIG_PGTABLE_LEVELS > 4
86void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d)
87{
88 paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT);
89 tlb_remove_page(tlb, virt_to_page(p4d));
90}
91#endif
92#endif
93#endif
94
95static inline void pgd_list_add(pgd_t *pgd)
96{
97 struct page *page = virt_to_page(pgd);
98
99 list_add(&page->lru, &pgd_list);
100}
101
102static inline void pgd_list_del(pgd_t *pgd)
103{
104 struct page *page = virt_to_page(pgd);
105
106 list_del(&page->lru);
107}
108
109#define UNSHARED_PTRS_PER_PGD \
110 (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
111
112
113static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
114{
115 BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm));
116 virt_to_page(pgd)->index = (pgoff_t)mm;
117}
118
119struct mm_struct *pgd_page_get_mm(struct page *page)
120{
121 return (struct mm_struct *)page->index;
122}
123
124static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
125{
126
127
128
129 if (CONFIG_PGTABLE_LEVELS == 2 ||
130 (CONFIG_PGTABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
131 CONFIG_PGTABLE_LEVELS >= 4) {
132 clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
133 swapper_pg_dir + KERNEL_PGD_BOUNDARY,
134 KERNEL_PGD_PTRS);
135 }
136
137
138 if (!SHARED_KERNEL_PMD) {
139 pgd_set_mm(pgd, mm);
140 pgd_list_add(pgd);
141 }
142}
143
144static void pgd_dtor(pgd_t *pgd)
145{
146 if (SHARED_KERNEL_PMD)
147 return;
148
149 spin_lock(&pgd_lock);
150 pgd_list_del(pgd);
151 spin_unlock(&pgd_lock);
152}
153
154
155
156
157
158
159
160
161
162
163
164
165#ifdef CONFIG_X86_PAE
166
167
168
169
170
171
172
173
174
175
176
177#define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD
178
179void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
180{
181 paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
182
183
184
185 set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
186
187
188
189
190
191
192
193 flush_tlb_mm(mm);
194}
195#else
196
197
198#define PREALLOCATED_PMDS 0
199
200#endif
201
202static void free_pmds(struct mm_struct *mm, pmd_t *pmds[])
203{
204 int i;
205
206 for(i = 0; i < PREALLOCATED_PMDS; i++)
207 if (pmds[i]) {
208 pgtable_pmd_page_dtor(virt_to_page(pmds[i]));
209 free_page((unsigned long)pmds[i]);
210 mm_dec_nr_pmds(mm);
211 }
212}
213
214static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[])
215{
216 int i;
217 bool failed = false;
218 gfp_t gfp = PGALLOC_GFP;
219
220 if (mm == &init_mm)
221 gfp &= ~__GFP_ACCOUNT;
222
223 for(i = 0; i < PREALLOCATED_PMDS; i++) {
224 pmd_t *pmd = (pmd_t *)__get_free_page(gfp);
225 if (!pmd)
226 failed = true;
227 if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) {
228 free_page((unsigned long)pmd);
229 pmd = NULL;
230 failed = true;
231 }
232 if (pmd)
233 mm_inc_nr_pmds(mm);
234 pmds[i] = pmd;
235 }
236
237 if (failed) {
238 free_pmds(mm, pmds);
239 return -ENOMEM;
240 }
241
242 return 0;
243}
244
245
246
247
248
249
250
251static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
252{
253 int i;
254
255 for(i = 0; i < PREALLOCATED_PMDS; i++) {
256 pgd_t pgd = pgdp[i];
257
258 if (pgd_val(pgd) != 0) {
259 pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
260
261 pgdp[i] = native_make_pgd(0);
262
263 paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
264 pmd_free(mm, pmd);
265 mm_dec_nr_pmds(mm);
266 }
267 }
268}
269
270static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
271{
272 p4d_t *p4d;
273 pud_t *pud;
274 int i;
275
276 if (PREALLOCATED_PMDS == 0)
277 return;
278
279 p4d = p4d_offset(pgd, 0);
280 pud = pud_offset(p4d, 0);
281
282 for (i = 0; i < PREALLOCATED_PMDS; i++, pud++) {
283 pmd_t *pmd = pmds[i];
284
285 if (i >= KERNEL_PGD_BOUNDARY)
286 memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
287 sizeof(pmd_t) * PTRS_PER_PMD);
288
289 pud_populate(mm, pud, pmd);
290 }
291}
292
293
294
295
296
297
298
299
300#ifdef CONFIG_X86_PAE
301
302#include <linux/slab.h>
303
304#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
305#define PGD_ALIGN 32
306
307static struct kmem_cache *pgd_cache;
308
309static int __init pgd_cache_init(void)
310{
311
312
313
314
315 if (!SHARED_KERNEL_PMD)
316 return 0;
317
318
319
320
321
322
323
324 pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
325 SLAB_PANIC, NULL);
326 if (!pgd_cache)
327 return -ENOMEM;
328
329 return 0;
330}
331core_initcall(pgd_cache_init);
332
333static inline pgd_t *_pgd_alloc(void)
334{
335
336
337
338
339 if (!SHARED_KERNEL_PMD)
340 return (pgd_t *)__get_free_page(PGALLOC_GFP);
341
342
343
344
345
346 return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
347}
348
349static inline void _pgd_free(pgd_t *pgd)
350{
351 if (!SHARED_KERNEL_PMD)
352 free_page((unsigned long)pgd);
353 else
354 kmem_cache_free(pgd_cache, pgd);
355}
356#else
357static inline pgd_t *_pgd_alloc(void)
358{
359 return (pgd_t *)__get_free_page(PGALLOC_GFP);
360}
361
362static inline void _pgd_free(pgd_t *pgd)
363{
364 free_page((unsigned long)pgd);
365}
366#endif
367
368pgd_t *pgd_alloc(struct mm_struct *mm)
369{
370 pgd_t *pgd;
371 pmd_t *pmds[PREALLOCATED_PMDS];
372
373 pgd = _pgd_alloc();
374
375 if (pgd == NULL)
376 goto out;
377
378 mm->pgd = pgd;
379
380 if (preallocate_pmds(mm, pmds) != 0)
381 goto out_free_pgd;
382
383 if (paravirt_pgd_alloc(mm) != 0)
384 goto out_free_pmds;
385
386
387
388
389
390
391 spin_lock(&pgd_lock);
392
393 pgd_ctor(mm, pgd);
394 pgd_prepopulate_pmd(mm, pgd, pmds);
395
396 spin_unlock(&pgd_lock);
397
398 return pgd;
399
400out_free_pmds:
401 free_pmds(mm, pmds);
402out_free_pgd:
403 _pgd_free(pgd);
404out:
405 return NULL;
406}
407
408void pgd_free(struct mm_struct *mm, pgd_t *pgd)
409{
410 pgd_mop_up_pmds(mm, pgd);
411 pgd_dtor(pgd);
412 paravirt_pgd_free(mm, pgd);
413 _pgd_free(pgd);
414}
415
416
417
418
419
420
421
422
423int ptep_set_access_flags(struct vm_area_struct *vma,
424 unsigned long address, pte_t *ptep,
425 pte_t entry, int dirty)
426{
427 int changed = !pte_same(*ptep, entry);
428
429 if (changed && dirty) {
430 *ptep = entry;
431 pte_update(vma->vm_mm, address, ptep);
432 }
433
434 return changed;
435}
436
437#ifdef CONFIG_TRANSPARENT_HUGEPAGE
438int pmdp_set_access_flags(struct vm_area_struct *vma,
439 unsigned long address, pmd_t *pmdp,
440 pmd_t entry, int dirty)
441{
442 int changed = !pmd_same(*pmdp, entry);
443
444 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
445
446 if (changed && dirty) {
447 *pmdp = entry;
448
449
450
451
452
453
454 }
455
456 return changed;
457}
458
459int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
460 pud_t *pudp, pud_t entry, int dirty)
461{
462 int changed = !pud_same(*pudp, entry);
463
464 VM_BUG_ON(address & ~HPAGE_PUD_MASK);
465
466 if (changed && dirty) {
467 *pudp = entry;
468
469
470
471
472
473
474 }
475
476 return changed;
477}
478#endif
479
480int ptep_test_and_clear_young(struct vm_area_struct *vma,
481 unsigned long addr, pte_t *ptep)
482{
483 int ret = 0;
484
485 if (pte_young(*ptep))
486 ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
487 (unsigned long *) &ptep->pte);
488
489 if (ret)
490 pte_update(vma->vm_mm, addr, ptep);
491
492 return ret;
493}
494
495#ifdef CONFIG_TRANSPARENT_HUGEPAGE
496int pmdp_test_and_clear_young(struct vm_area_struct *vma,
497 unsigned long addr, pmd_t *pmdp)
498{
499 int ret = 0;
500
501 if (pmd_young(*pmdp))
502 ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
503 (unsigned long *)pmdp);
504
505 return ret;
506}
507int pudp_test_and_clear_young(struct vm_area_struct *vma,
508 unsigned long addr, pud_t *pudp)
509{
510 int ret = 0;
511
512 if (pud_young(*pudp))
513 ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
514 (unsigned long *)pudp);
515
516 return ret;
517}
518#endif
519
520int ptep_clear_flush_young(struct vm_area_struct *vma,
521 unsigned long address, pte_t *ptep)
522{
523
524
525
526
527
528
529
530
531
532
533
534
535
536 return ptep_test_and_clear_young(vma, address, ptep);
537}
538
539#ifdef CONFIG_TRANSPARENT_HUGEPAGE
540int pmdp_clear_flush_young(struct vm_area_struct *vma,
541 unsigned long address, pmd_t *pmdp)
542{
543 int young;
544
545 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
546
547 young = pmdp_test_and_clear_young(vma, address, pmdp);
548 if (young)
549 flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
550
551 return young;
552}
553#endif
554
555
556
557
558
559
560
561
562void __init reserve_top_address(unsigned long reserve)
563{
564#ifdef CONFIG_X86_32
565 BUG_ON(fixmaps_set > 0);
566 __FIXADDR_TOP = round_down(-reserve, 1 << PMD_SHIFT) - PAGE_SIZE;
567 printk(KERN_INFO "Reserving virtual address space above 0x%08lx (rounded to 0x%08lx)\n",
568 -reserve, __FIXADDR_TOP + PAGE_SIZE);
569#endif
570}
571
572int fixmaps_set;
573
574void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
575{
576 unsigned long address = __fix_to_virt(idx);
577
578 if (idx >= __end_of_fixed_addresses) {
579 BUG();
580 return;
581 }
582 set_pte_vaddr(address, pte);
583 fixmaps_set++;
584}
585
586void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys,
587 pgprot_t flags)
588{
589 __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags));
590}
591
592#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
593#ifdef CONFIG_X86_5LEVEL
594
595
596
597
598
599int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
600{
601 return 0;
602}
603
604
605
606
607
608
609int p4d_clear_huge(p4d_t *p4d)
610{
611 return 0;
612}
613#endif
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
634{
635 u8 mtrr, uniform;
636
637 mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform);
638 if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
639 (mtrr != MTRR_TYPE_WRBACK))
640 return 0;
641
642 prot = pgprot_4k_2_large(prot);
643
644 set_pte((pte_t *)pud, pfn_pte(
645 (u64)addr >> PAGE_SHIFT,
646 __pgprot(pgprot_val(prot) | _PAGE_PSE)));
647
648 return 1;
649}
650
651
652
653
654
655
656
657
658int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
659{
660 u8 mtrr, uniform;
661
662 mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform);
663 if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
664 (mtrr != MTRR_TYPE_WRBACK)) {
665 pr_warn_once("%s: Cannot satisfy [mem %#010llx-%#010llx] with a huge-page mapping due to MTRR override.\n",
666 __func__, addr, addr + PMD_SIZE);
667 return 0;
668 }
669
670 prot = pgprot_4k_2_large(prot);
671
672 set_pte((pte_t *)pmd, pfn_pte(
673 (u64)addr >> PAGE_SHIFT,
674 __pgprot(pgprot_val(prot) | _PAGE_PSE)));
675
676 return 1;
677}
678
679
680
681
682
683
684int pud_clear_huge(pud_t *pud)
685{
686 if (pud_large(*pud)) {
687 pud_clear(pud);
688 return 1;
689 }
690
691 return 0;
692}
693
694
695
696
697
698
699int pmd_clear_huge(pmd_t *pmd)
700{
701 if (pmd_large(*pmd)) {
702 pmd_clear(pmd);
703 return 1;
704 }
705
706 return 0;
707}
708#endif
709