1
2#include <linux/mm.h>
3#include <linux/gfp.h>
4#include <asm/pgalloc.h>
5#include <asm/pgtable.h>
6#include <asm/tlb.h>
7#include <asm/fixmap.h>
8#include <asm/mtrr.h>
9
10#define PGALLOC_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO)
11
12#ifdef CONFIG_HIGHPTE
13#define PGALLOC_USER_GFP __GFP_HIGHMEM
14#else
15#define PGALLOC_USER_GFP 0
16#endif
17
18gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
19
20pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
21{
22 return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
23}
24
25pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
26{
27 struct page *pte;
28
29 pte = alloc_pages(__userpte_alloc_gfp, 0);
30 if (!pte)
31 return NULL;
32 if (!pgtable_page_ctor(pte)) {
33 __free_page(pte);
34 return NULL;
35 }
36 return pte;
37}
38
39static int __init setup_userpte(char *arg)
40{
41 if (!arg)
42 return -EINVAL;
43
44
45
46
47
48 if (strcmp(arg, "nohigh") == 0)
49 __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
50 else
51 return -EINVAL;
52 return 0;
53}
54early_param("userpte", setup_userpte);
55
56void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
57{
58 pgtable_page_dtor(pte);
59 paravirt_release_pte(page_to_pfn(pte));
60 tlb_remove_table(tlb, pte);
61}
62
63#if CONFIG_PGTABLE_LEVELS > 2
64void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
65{
66 struct page *page = virt_to_page(pmd);
67 paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
68
69
70
71
72#ifdef CONFIG_X86_PAE
73 tlb->need_flush_all = 1;
74#endif
75 pgtable_pmd_page_dtor(page);
76 tlb_remove_table(tlb, page);
77}
78
79#if CONFIG_PGTABLE_LEVELS > 3
80void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
81{
82 paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
83 tlb_remove_table(tlb, virt_to_page(pud));
84}
85
86#if CONFIG_PGTABLE_LEVELS > 4
87void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d)
88{
89 paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT);
90 tlb_remove_table(tlb, virt_to_page(p4d));
91}
92#endif
93#endif
94#endif
95
96static inline void pgd_list_add(pgd_t *pgd)
97{
98 struct page *page = virt_to_page(pgd);
99
100 list_add(&page->lru, &pgd_list);
101}
102
103static inline void pgd_list_del(pgd_t *pgd)
104{
105 struct page *page = virt_to_page(pgd);
106
107 list_del(&page->lru);
108}
109
110#define UNSHARED_PTRS_PER_PGD \
111 (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
112
113
114static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
115{
116 BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm));
117 virt_to_page(pgd)->index = (pgoff_t)mm;
118}
119
120struct mm_struct *pgd_page_get_mm(struct page *page)
121{
122 return (struct mm_struct *)page->index;
123}
124
125static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
126{
127
128
129
130 if (CONFIG_PGTABLE_LEVELS == 2 ||
131 (CONFIG_PGTABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
132 CONFIG_PGTABLE_LEVELS >= 4) {
133 clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
134 swapper_pg_dir + KERNEL_PGD_BOUNDARY,
135 KERNEL_PGD_PTRS);
136 }
137
138
139 if (!SHARED_KERNEL_PMD) {
140 pgd_set_mm(pgd, mm);
141 pgd_list_add(pgd);
142 }
143}
144
145static void pgd_dtor(pgd_t *pgd)
146{
147 if (SHARED_KERNEL_PMD)
148 return;
149
150 spin_lock(&pgd_lock);
151 pgd_list_del(pgd);
152 spin_unlock(&pgd_lock);
153}
154
155
156
157
158
159
160
161
162
163
164
165
166#ifdef CONFIG_X86_PAE
167
168
169
170
171
172
173
174
175
176
177
178#define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD
179
180void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
181{
182 paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
183
184
185
186 set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
187
188
189
190
191
192
193
194 flush_tlb_mm(mm);
195}
196#else
197
198
199#define PREALLOCATED_PMDS 0
200
201#endif
202
203static void free_pmds(struct mm_struct *mm, pmd_t *pmds[])
204{
205 int i;
206
207 for(i = 0; i < PREALLOCATED_PMDS; i++)
208 if (pmds[i]) {
209 pgtable_pmd_page_dtor(virt_to_page(pmds[i]));
210 free_page((unsigned long)pmds[i]);
211 mm_dec_nr_pmds(mm);
212 }
213}
214
215static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[])
216{
217 int i;
218 bool failed = false;
219 gfp_t gfp = PGALLOC_GFP;
220
221 if (mm == &init_mm)
222 gfp &= ~__GFP_ACCOUNT;
223
224 for(i = 0; i < PREALLOCATED_PMDS; i++) {
225 pmd_t *pmd = (pmd_t *)__get_free_page(gfp);
226 if (!pmd)
227 failed = true;
228 if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) {
229 free_page((unsigned long)pmd);
230 pmd = NULL;
231 failed = true;
232 }
233 if (pmd)
234 mm_inc_nr_pmds(mm);
235 pmds[i] = pmd;
236 }
237
238 if (failed) {
239 free_pmds(mm, pmds);
240 return -ENOMEM;
241 }
242
243 return 0;
244}
245
246
247
248
249
250
251
252static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
253{
254 int i;
255
256 for(i = 0; i < PREALLOCATED_PMDS; i++) {
257 pgd_t pgd = pgdp[i];
258
259 if (pgd_val(pgd) != 0) {
260 pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
261
262 pgdp[i] = native_make_pgd(0);
263
264 paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
265 pmd_free(mm, pmd);
266 mm_dec_nr_pmds(mm);
267 }
268 }
269}
270
271static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
272{
273 p4d_t *p4d;
274 pud_t *pud;
275 int i;
276
277 if (PREALLOCATED_PMDS == 0)
278 return;
279
280 p4d = p4d_offset(pgd, 0);
281 pud = pud_offset(p4d, 0);
282
283 for (i = 0; i < PREALLOCATED_PMDS; i++, pud++) {
284 pmd_t *pmd = pmds[i];
285
286 if (i >= KERNEL_PGD_BOUNDARY)
287 memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
288 sizeof(pmd_t) * PTRS_PER_PMD);
289
290 pud_populate(mm, pud, pmd);
291 }
292}
293
294
295
296
297
298
299
300
301#ifdef CONFIG_X86_PAE
302
303#include <linux/slab.h>
304
305#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
306#define PGD_ALIGN 32
307
308static struct kmem_cache *pgd_cache;
309
310static int __init pgd_cache_init(void)
311{
312
313
314
315
316 if (!SHARED_KERNEL_PMD)
317 return 0;
318
319
320
321
322
323
324
325 pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
326 SLAB_PANIC, NULL);
327 if (!pgd_cache)
328 return -ENOMEM;
329
330 return 0;
331}
332core_initcall(pgd_cache_init);
333
334static inline pgd_t *_pgd_alloc(void)
335{
336
337
338
339
340 if (!SHARED_KERNEL_PMD)
341 return (pgd_t *)__get_free_page(PGALLOC_GFP);
342
343
344
345
346
347 return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
348}
349
350static inline void _pgd_free(pgd_t *pgd)
351{
352 if (!SHARED_KERNEL_PMD)
353 free_page((unsigned long)pgd);
354 else
355 kmem_cache_free(pgd_cache, pgd);
356}
357#else
358
359static inline pgd_t *_pgd_alloc(void)
360{
361 return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
362}
363
364static inline void _pgd_free(pgd_t *pgd)
365{
366 free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
367}
368#endif
369
370pgd_t *pgd_alloc(struct mm_struct *mm)
371{
372 pgd_t *pgd;
373 pmd_t *pmds[PREALLOCATED_PMDS];
374
375 pgd = _pgd_alloc();
376
377 if (pgd == NULL)
378 goto out;
379
380 mm->pgd = pgd;
381
382 if (preallocate_pmds(mm, pmds) != 0)
383 goto out_free_pgd;
384
385 if (paravirt_pgd_alloc(mm) != 0)
386 goto out_free_pmds;
387
388
389
390
391
392
393 spin_lock(&pgd_lock);
394
395 pgd_ctor(mm, pgd);
396 pgd_prepopulate_pmd(mm, pgd, pmds);
397
398 spin_unlock(&pgd_lock);
399
400 return pgd;
401
402out_free_pmds:
403 free_pmds(mm, pmds);
404out_free_pgd:
405 _pgd_free(pgd);
406out:
407 return NULL;
408}
409
410void pgd_free(struct mm_struct *mm, pgd_t *pgd)
411{
412 pgd_mop_up_pmds(mm, pgd);
413 pgd_dtor(pgd);
414 paravirt_pgd_free(mm, pgd);
415 _pgd_free(pgd);
416}
417
418
419
420
421
422
423
424
425int ptep_set_access_flags(struct vm_area_struct *vma,
426 unsigned long address, pte_t *ptep,
427 pte_t entry, int dirty)
428{
429 int changed = !pte_same(*ptep, entry);
430
431 if (changed && dirty)
432 *ptep = entry;
433
434 return changed;
435}
436
437#ifdef CONFIG_TRANSPARENT_HUGEPAGE
438int pmdp_set_access_flags(struct vm_area_struct *vma,
439 unsigned long address, pmd_t *pmdp,
440 pmd_t entry, int dirty)
441{
442 int changed = !pmd_same(*pmdp, entry);
443
444 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
445
446 if (changed && dirty) {
447 *pmdp = entry;
448
449
450
451
452
453
454 }
455
456 return changed;
457}
458
459int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
460 pud_t *pudp, pud_t entry, int dirty)
461{
462 int changed = !pud_same(*pudp, entry);
463
464 VM_BUG_ON(address & ~HPAGE_PUD_MASK);
465
466 if (changed && dirty) {
467 *pudp = entry;
468
469
470
471
472
473
474 }
475
476 return changed;
477}
478#endif
479
480int ptep_test_and_clear_young(struct vm_area_struct *vma,
481 unsigned long addr, pte_t *ptep)
482{
483 int ret = 0;
484
485 if (pte_young(*ptep))
486 ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
487 (unsigned long *) &ptep->pte);
488
489 return ret;
490}
491
492#ifdef CONFIG_TRANSPARENT_HUGEPAGE
493int pmdp_test_and_clear_young(struct vm_area_struct *vma,
494 unsigned long addr, pmd_t *pmdp)
495{
496 int ret = 0;
497
498 if (pmd_young(*pmdp))
499 ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
500 (unsigned long *)pmdp);
501
502 return ret;
503}
504int pudp_test_and_clear_young(struct vm_area_struct *vma,
505 unsigned long addr, pud_t *pudp)
506{
507 int ret = 0;
508
509 if (pud_young(*pudp))
510 ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
511 (unsigned long *)pudp);
512
513 return ret;
514}
515#endif
516
517int ptep_clear_flush_young(struct vm_area_struct *vma,
518 unsigned long address, pte_t *ptep)
519{
520
521
522
523
524
525
526
527
528
529
530
531
532
533 return ptep_test_and_clear_young(vma, address, ptep);
534}
535
536#ifdef CONFIG_TRANSPARENT_HUGEPAGE
537int pmdp_clear_flush_young(struct vm_area_struct *vma,
538 unsigned long address, pmd_t *pmdp)
539{
540 int young;
541
542 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
543
544 young = pmdp_test_and_clear_young(vma, address, pmdp);
545 if (young)
546 flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
547
548 return young;
549}
550#endif
551
552
553
554
555
556
557
558
559void __init reserve_top_address(unsigned long reserve)
560{
561#ifdef CONFIG_X86_32
562 BUG_ON(fixmaps_set > 0);
563 __FIXADDR_TOP = round_down(-reserve, 1 << PMD_SHIFT) - PAGE_SIZE;
564 printk(KERN_INFO "Reserving virtual address space above 0x%08lx (rounded to 0x%08lx)\n",
565 -reserve, __FIXADDR_TOP + PAGE_SIZE);
566#endif
567}
568
569int fixmaps_set;
570
571void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
572{
573 unsigned long address = __fix_to_virt(idx);
574
575 if (idx >= __end_of_fixed_addresses) {
576 BUG();
577 return;
578 }
579 set_pte_vaddr(address, pte);
580 fixmaps_set++;
581}
582
583void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys,
584 pgprot_t flags)
585{
586 __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags));
587}
588
589#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
590#ifdef CONFIG_X86_5LEVEL
591
592
593
594
595
596int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
597{
598 return 0;
599}
600
601
602
603
604
605
606int p4d_clear_huge(p4d_t *p4d)
607{
608 return 0;
609}
610#endif
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
631{
632 u8 mtrr, uniform;
633
634 mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE, &uniform);
635 if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
636 (mtrr != MTRR_TYPE_WRBACK))
637 return 0;
638
639 prot = pgprot_4k_2_large(prot);
640
641 set_pte((pte_t *)pud, pfn_pte(
642 (u64)addr >> PAGE_SHIFT,
643 __pgprot(pgprot_val(prot) | _PAGE_PSE)));
644
645 return 1;
646}
647
648
649
650
651
652
653
654
655int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
656{
657 u8 mtrr, uniform;
658
659 mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE, &uniform);
660 if ((mtrr != MTRR_TYPE_INVALID) && (!uniform) &&
661 (mtrr != MTRR_TYPE_WRBACK)) {
662 pr_warn_once("%s: Cannot satisfy [mem %#010llx-%#010llx] with a huge-page mapping due to MTRR override.\n",
663 __func__, addr, addr + PMD_SIZE);
664 return 0;
665 }
666
667 prot = pgprot_4k_2_large(prot);
668
669 set_pte((pte_t *)pmd, pfn_pte(
670 (u64)addr >> PAGE_SHIFT,
671 __pgprot(pgprot_val(prot) | _PAGE_PSE)));
672
673 return 1;
674}
675
676
677
678
679
680
681int pud_clear_huge(pud_t *pud)
682{
683 if (pud_large(*pud)) {
684 pud_clear(pud);
685 return 1;
686 }
687
688 return 0;
689}
690
691
692
693
694
695
696int pmd_clear_huge(pmd_t *pmd)
697{
698 if (pmd_large(*pmd)) {
699 pmd_clear(pmd);
700 return 1;
701 }
702
703 return 0;
704}
705#endif
706