1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#include <linux/mm.h>
22#include <linux/mmzone.h>
23#include <linux/memblock.h>
24#include <linux/memremap.h>
25#include <linux/highmem.h>
26#include <linux/slab.h>
27#include <linux/spinlock.h>
28#include <linux/vmalloc.h>
29#include <linux/sched.h>
30#include <linux/pgtable.h>
31#include <linux/bootmem_info.h>
32
33#include <asm/dma.h>
34#include <asm/pgalloc.h>
35#include <asm/tlbflush.h>
36
37#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
38
39
40
41
42
43
44
45
46
47
48struct vmemmap_remap_walk {
49 void (*remap_pte)(pte_t *pte, unsigned long addr,
50 struct vmemmap_remap_walk *walk);
51 unsigned long nr_walked;
52 struct page *reuse_page;
53 unsigned long reuse_addr;
54 struct list_head *vmemmap_pages;
55};
56
57static int __split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start)
58{
59 pmd_t __pmd;
60 int i;
61 unsigned long addr = start;
62 struct page *page = pmd_page(*pmd);
63 pte_t *pgtable = pte_alloc_one_kernel(&init_mm);
64
65 if (!pgtable)
66 return -ENOMEM;
67
68 pmd_populate_kernel(&init_mm, &__pmd, pgtable);
69
70 for (i = 0; i < PMD_SIZE / PAGE_SIZE; i++, addr += PAGE_SIZE) {
71 pte_t entry, *pte;
72 pgprot_t pgprot = PAGE_KERNEL;
73
74 entry = mk_pte(page + i, pgprot);
75 pte = pte_offset_kernel(&__pmd, addr);
76 set_pte_at(&init_mm, addr, pte, entry);
77 }
78
79 spin_lock(&init_mm.page_table_lock);
80 if (likely(pmd_leaf(*pmd))) {
81
82
83
84
85
86 if (!PageReserved(page))
87 split_page(page, get_order(PMD_SIZE));
88
89
90 smp_wmb();
91 pmd_populate_kernel(&init_mm, pmd, pgtable);
92 flush_tlb_kernel_range(start, start + PMD_SIZE);
93 } else {
94 pte_free_kernel(&init_mm, pgtable);
95 }
96 spin_unlock(&init_mm.page_table_lock);
97
98 return 0;
99}
100
101static int split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start)
102{
103 int leaf;
104
105 spin_lock(&init_mm.page_table_lock);
106 leaf = pmd_leaf(*pmd);
107 spin_unlock(&init_mm.page_table_lock);
108
109 if (!leaf)
110 return 0;
111
112 return __split_vmemmap_huge_pmd(pmd, start);
113}
114
115static void vmemmap_pte_range(pmd_t *pmd, unsigned long addr,
116 unsigned long end,
117 struct vmemmap_remap_walk *walk)
118{
119 pte_t *pte = pte_offset_kernel(pmd, addr);
120
121
122
123
124
125 if (!walk->reuse_page) {
126 walk->reuse_page = pte_page(*pte);
127
128
129
130
131 addr += PAGE_SIZE;
132 pte++;
133 walk->nr_walked++;
134 }
135
136 for (; addr != end; addr += PAGE_SIZE, pte++) {
137 walk->remap_pte(pte, addr, walk);
138 walk->nr_walked++;
139 }
140}
141
142static int vmemmap_pmd_range(pud_t *pud, unsigned long addr,
143 unsigned long end,
144 struct vmemmap_remap_walk *walk)
145{
146 pmd_t *pmd;
147 unsigned long next;
148
149 pmd = pmd_offset(pud, addr);
150 do {
151 int ret;
152
153 ret = split_vmemmap_huge_pmd(pmd, addr & PMD_MASK);
154 if (ret)
155 return ret;
156
157 next = pmd_addr_end(addr, end);
158 vmemmap_pte_range(pmd, addr, next, walk);
159 } while (pmd++, addr = next, addr != end);
160
161 return 0;
162}
163
164static int vmemmap_pud_range(p4d_t *p4d, unsigned long addr,
165 unsigned long end,
166 struct vmemmap_remap_walk *walk)
167{
168 pud_t *pud;
169 unsigned long next;
170
171 pud = pud_offset(p4d, addr);
172 do {
173 int ret;
174
175 next = pud_addr_end(addr, end);
176 ret = vmemmap_pmd_range(pud, addr, next, walk);
177 if (ret)
178 return ret;
179 } while (pud++, addr = next, addr != end);
180
181 return 0;
182}
183
184static int vmemmap_p4d_range(pgd_t *pgd, unsigned long addr,
185 unsigned long end,
186 struct vmemmap_remap_walk *walk)
187{
188 p4d_t *p4d;
189 unsigned long next;
190
191 p4d = p4d_offset(pgd, addr);
192 do {
193 int ret;
194
195 next = p4d_addr_end(addr, end);
196 ret = vmemmap_pud_range(p4d, addr, next, walk);
197 if (ret)
198 return ret;
199 } while (p4d++, addr = next, addr != end);
200
201 return 0;
202}
203
204static int vmemmap_remap_range(unsigned long start, unsigned long end,
205 struct vmemmap_remap_walk *walk)
206{
207 unsigned long addr = start;
208 unsigned long next;
209 pgd_t *pgd;
210
211 VM_BUG_ON(!IS_ALIGNED(start, PAGE_SIZE));
212 VM_BUG_ON(!IS_ALIGNED(end, PAGE_SIZE));
213
214 pgd = pgd_offset_k(addr);
215 do {
216 int ret;
217
218 next = pgd_addr_end(addr, end);
219 ret = vmemmap_p4d_range(pgd, addr, next, walk);
220 if (ret)
221 return ret;
222 } while (pgd++, addr = next, addr != end);
223
224
225
226
227
228
229 flush_tlb_kernel_range(start + PAGE_SIZE, end);
230
231 return 0;
232}
233
234
235
236
237
238
239
240static inline void free_vmemmap_page(struct page *page)
241{
242 if (PageReserved(page))
243 free_bootmem_page(page);
244 else
245 __free_page(page);
246}
247
248
249static void free_vmemmap_page_list(struct list_head *list)
250{
251 struct page *page, *next;
252
253 list_for_each_entry_safe(page, next, list, lru) {
254 list_del(&page->lru);
255 free_vmemmap_page(page);
256 }
257}
258
259static void vmemmap_remap_pte(pte_t *pte, unsigned long addr,
260 struct vmemmap_remap_walk *walk)
261{
262
263
264
265
266 pgprot_t pgprot = PAGE_KERNEL_RO;
267 pte_t entry = mk_pte(walk->reuse_page, pgprot);
268 struct page *page = pte_page(*pte);
269
270 list_add_tail(&page->lru, walk->vmemmap_pages);
271 set_pte_at(&init_mm, addr, pte, entry);
272}
273
274
275
276
277
278
279
280
281
282
283#define NR_RESET_STRUCT_PAGE 3
284
285static inline void reset_struct_pages(struct page *start)
286{
287 int i;
288 struct page *from = start + NR_RESET_STRUCT_PAGE;
289
290 for (i = 0; i < NR_RESET_STRUCT_PAGE; i++)
291 memcpy(start + i, from, sizeof(*from));
292}
293
294static void vmemmap_restore_pte(pte_t *pte, unsigned long addr,
295 struct vmemmap_remap_walk *walk)
296{
297 pgprot_t pgprot = PAGE_KERNEL;
298 struct page *page;
299 void *to;
300
301 BUG_ON(pte_page(*pte) != walk->reuse_page);
302
303 page = list_first_entry(walk->vmemmap_pages, struct page, lru);
304 list_del(&page->lru);
305 to = page_to_virt(page);
306 copy_page(to, (void *)walk->reuse_addr);
307 reset_struct_pages(to);
308
309 set_pte_at(&init_mm, addr, pte, mk_pte(page, pgprot));
310}
311
312
313
314
315
316
317
318
319
320
321
322
323
324int vmemmap_remap_free(unsigned long start, unsigned long end,
325 unsigned long reuse)
326{
327 int ret;
328 LIST_HEAD(vmemmap_pages);
329 struct vmemmap_remap_walk walk = {
330 .remap_pte = vmemmap_remap_pte,
331 .reuse_addr = reuse,
332 .vmemmap_pages = &vmemmap_pages,
333 };
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348 BUG_ON(start - reuse != PAGE_SIZE);
349
350 mmap_read_lock(&init_mm);
351 ret = vmemmap_remap_range(reuse, end, &walk);
352 if (ret && walk.nr_walked) {
353 end = reuse + walk.nr_walked * PAGE_SIZE;
354
355
356
357
358
359
360 walk = (struct vmemmap_remap_walk) {
361 .remap_pte = vmemmap_restore_pte,
362 .reuse_addr = reuse,
363 .vmemmap_pages = &vmemmap_pages,
364 };
365
366 vmemmap_remap_range(reuse, end, &walk);
367 }
368 mmap_read_unlock(&init_mm);
369
370 free_vmemmap_page_list(&vmemmap_pages);
371
372 return ret;
373}
374
375static int alloc_vmemmap_page_list(unsigned long start, unsigned long end,
376 gfp_t gfp_mask, struct list_head *list)
377{
378 unsigned long nr_pages = (end - start) >> PAGE_SHIFT;
379 int nid = page_to_nid((struct page *)start);
380 struct page *page, *next;
381
382 while (nr_pages--) {
383 page = alloc_pages_node(nid, gfp_mask, 0);
384 if (!page)
385 goto out;
386 list_add_tail(&page->lru, list);
387 }
388
389 return 0;
390out:
391 list_for_each_entry_safe(page, next, list, lru)
392 __free_pages(page, 0);
393 return -ENOMEM;
394}
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409int vmemmap_remap_alloc(unsigned long start, unsigned long end,
410 unsigned long reuse, gfp_t gfp_mask)
411{
412 LIST_HEAD(vmemmap_pages);
413 struct vmemmap_remap_walk walk = {
414 .remap_pte = vmemmap_restore_pte,
415 .reuse_addr = reuse,
416 .vmemmap_pages = &vmemmap_pages,
417 };
418
419
420 BUG_ON(start - reuse != PAGE_SIZE);
421
422 if (alloc_vmemmap_page_list(start, end, gfp_mask, &vmemmap_pages))
423 return -ENOMEM;
424
425 mmap_read_lock(&init_mm);
426 vmemmap_remap_range(reuse, end, &walk);
427 mmap_read_unlock(&init_mm);
428
429 return 0;
430}
431#endif
432
433
434
435
436
437
438
439static void * __ref __earlyonly_bootmem_alloc(int node,
440 unsigned long size,
441 unsigned long align,
442 unsigned long goal)
443{
444 return memblock_alloc_try_nid_raw(size, align, goal,
445 MEMBLOCK_ALLOC_ACCESSIBLE, node);
446}
447
448void * __meminit vmemmap_alloc_block(unsigned long size, int node)
449{
450
451 if (slab_is_available()) {
452 gfp_t gfp_mask = GFP_KERNEL|__GFP_RETRY_MAYFAIL|__GFP_NOWARN;
453 int order = get_order(size);
454 static bool warned;
455 struct page *page;
456
457 page = alloc_pages_node(node, gfp_mask, order);
458 if (page)
459 return page_address(page);
460
461 if (!warned) {
462 warn_alloc(gfp_mask & ~__GFP_NOWARN, NULL,
463 "vmemmap alloc failure: order:%u", order);
464 warned = true;
465 }
466 return NULL;
467 } else
468 return __earlyonly_bootmem_alloc(node, size, size,
469 __pa(MAX_DMA_ADDRESS));
470}
471
472static void * __meminit altmap_alloc_block_buf(unsigned long size,
473 struct vmem_altmap *altmap);
474
475
476void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node,
477 struct vmem_altmap *altmap)
478{
479 void *ptr;
480
481 if (altmap)
482 return altmap_alloc_block_buf(size, altmap);
483
484 ptr = sparse_buffer_alloc(size);
485 if (!ptr)
486 ptr = vmemmap_alloc_block(size, node);
487 return ptr;
488}
489
490static unsigned long __meminit vmem_altmap_next_pfn(struct vmem_altmap *altmap)
491{
492 return altmap->base_pfn + altmap->reserve + altmap->alloc
493 + altmap->align;
494}
495
496static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap)
497{
498 unsigned long allocated = altmap->alloc + altmap->align;
499
500 if (altmap->free > allocated)
501 return altmap->free - allocated;
502 return 0;
503}
504
505static void * __meminit altmap_alloc_block_buf(unsigned long size,
506 struct vmem_altmap *altmap)
507{
508 unsigned long pfn, nr_pfns, nr_align;
509
510 if (size & ~PAGE_MASK) {
511 pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n",
512 __func__, size);
513 return NULL;
514 }
515
516 pfn = vmem_altmap_next_pfn(altmap);
517 nr_pfns = size >> PAGE_SHIFT;
518 nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
519 nr_align = ALIGN(pfn, nr_align) - pfn;
520 if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap))
521 return NULL;
522
523 altmap->alloc += nr_pfns;
524 altmap->align += nr_align;
525 pfn += nr_align;
526
527 pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n",
528 __func__, pfn, altmap->alloc, altmap->align, nr_pfns);
529 return __va(__pfn_to_phys(pfn));
530}
531
532void __meminit vmemmap_verify(pte_t *pte, int node,
533 unsigned long start, unsigned long end)
534{
535 unsigned long pfn = pte_pfn(*pte);
536 int actual_node = early_pfn_to_nid(pfn);
537
538 if (node_distance(actual_node, node) > LOCAL_DISTANCE)
539 pr_warn("[%lx-%lx] potential offnode page_structs\n",
540 start, end - 1);
541}
542
543pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node,
544 struct vmem_altmap *altmap,
545 struct page *reuse)
546{
547 pte_t *pte = pte_offset_kernel(pmd, addr);
548 if (pte_none(*pte)) {
549 pte_t entry;
550 void *p;
551
552 if (!reuse) {
553 p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap);
554 if (!p)
555 return NULL;
556 } else {
557
558
559
560
561
562
563
564
565
566 get_page(reuse);
567 p = page_to_virt(reuse);
568 }
569 entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
570 set_pte_at(&init_mm, addr, pte, entry);
571 }
572 return pte;
573}
574
575static void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node)
576{
577 void *p = vmemmap_alloc_block(size, node);
578
579 if (!p)
580 return NULL;
581 memset(p, 0, size);
582
583 return p;
584}
585
586pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
587{
588 pmd_t *pmd = pmd_offset(pud, addr);
589 if (pmd_none(*pmd)) {
590 void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
591 if (!p)
592 return NULL;
593 pmd_populate_kernel(&init_mm, pmd, p);
594 }
595 return pmd;
596}
597
598pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
599{
600 pud_t *pud = pud_offset(p4d, addr);
601 if (pud_none(*pud)) {
602 void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
603 if (!p)
604 return NULL;
605 pud_populate(&init_mm, pud, p);
606 }
607 return pud;
608}
609
610p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
611{
612 p4d_t *p4d = p4d_offset(pgd, addr);
613 if (p4d_none(*p4d)) {
614 void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
615 if (!p)
616 return NULL;
617 p4d_populate(&init_mm, p4d, p);
618 }
619 return p4d;
620}
621
622pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
623{
624 pgd_t *pgd = pgd_offset_k(addr);
625 if (pgd_none(*pgd)) {
626 void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
627 if (!p)
628 return NULL;
629 pgd_populate(&init_mm, pgd, p);
630 }
631 return pgd;
632}
633
634static pte_t * __meminit vmemmap_populate_address(unsigned long addr, int node,
635 struct vmem_altmap *altmap,
636 struct page *reuse)
637{
638 pgd_t *pgd;
639 p4d_t *p4d;
640 pud_t *pud;
641 pmd_t *pmd;
642 pte_t *pte;
643
644 pgd = vmemmap_pgd_populate(addr, node);
645 if (!pgd)
646 return NULL;
647 p4d = vmemmap_p4d_populate(pgd, addr, node);
648 if (!p4d)
649 return NULL;
650 pud = vmemmap_pud_populate(p4d, addr, node);
651 if (!pud)
652 return NULL;
653 pmd = vmemmap_pmd_populate(pud, addr, node);
654 if (!pmd)
655 return NULL;
656 pte = vmemmap_pte_populate(pmd, addr, node, altmap, reuse);
657 if (!pte)
658 return NULL;
659 vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
660
661 return pte;
662}
663
664static int __meminit vmemmap_populate_range(unsigned long start,
665 unsigned long end, int node,
666 struct vmem_altmap *altmap,
667 struct page *reuse)
668{
669 unsigned long addr = start;
670 pte_t *pte;
671
672 for (; addr < end; addr += PAGE_SIZE) {
673 pte = vmemmap_populate_address(addr, node, altmap, reuse);
674 if (!pte)
675 return -ENOMEM;
676 }
677
678 return 0;
679}
680
681int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
682 int node, struct vmem_altmap *altmap)
683{
684 return vmemmap_populate_range(start, end, node, altmap, NULL);
685}
686
687
688
689
690
691
692
693
694
695
696
697static bool __meminit reuse_compound_section(unsigned long start_pfn,
698 struct dev_pagemap *pgmap)
699{
700 unsigned long nr_pages = pgmap_vmemmap_nr(pgmap);
701 unsigned long offset = start_pfn -
702 PHYS_PFN(pgmap->ranges[pgmap->nr_range].start);
703
704 return !IS_ALIGNED(offset, nr_pages) && nr_pages > PAGES_PER_SUBSECTION;
705}
706
707static pte_t * __meminit compound_section_tail_page(unsigned long addr)
708{
709 pte_t *pte;
710
711 addr -= PAGE_SIZE;
712
713
714
715
716
717 pte = pte_offset_kernel(pmd_off_k(addr), addr);
718 if (!pte)
719 return NULL;
720
721 return pte;
722}
723
724static int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
725 unsigned long start,
726 unsigned long end, int node,
727 struct dev_pagemap *pgmap)
728{
729 unsigned long size, addr;
730 pte_t *pte;
731 int rc;
732
733 if (reuse_compound_section(start_pfn, pgmap)) {
734 pte = compound_section_tail_page(start);
735 if (!pte)
736 return -ENOMEM;
737
738
739
740
741
742 return vmemmap_populate_range(start, end, node, NULL,
743 pte_page(*pte));
744 }
745
746 size = min(end - start, pgmap_vmemmap_nr(pgmap) * sizeof(struct page));
747 for (addr = start; addr < end; addr += size) {
748 unsigned long next = addr, last = addr + size;
749
750
751 pte = vmemmap_populate_address(addr, node, NULL, NULL);
752 if (!pte)
753 return -ENOMEM;
754
755
756 next = addr + PAGE_SIZE;
757 pte = vmemmap_populate_address(next, node, NULL, NULL);
758 if (!pte)
759 return -ENOMEM;
760
761
762
763
764
765 next += PAGE_SIZE;
766 rc = vmemmap_populate_range(next, last, node, NULL,
767 pte_page(*pte));
768 if (rc)
769 return -ENOMEM;
770 }
771
772 return 0;
773}
774
775struct page * __meminit __populate_section_memmap(unsigned long pfn,
776 unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
777 struct dev_pagemap *pgmap)
778{
779 unsigned long start = (unsigned long) pfn_to_page(pfn);
780 unsigned long end = start + nr_pages * sizeof(struct page);
781 int r;
782
783 if (WARN_ON_ONCE(!IS_ALIGNED(pfn, PAGES_PER_SUBSECTION) ||
784 !IS_ALIGNED(nr_pages, PAGES_PER_SUBSECTION)))
785 return NULL;
786
787 if (is_power_of_2(sizeof(struct page)) &&
788 pgmap && pgmap_vmemmap_nr(pgmap) > 1 && !altmap)
789 r = vmemmap_populate_compound_pages(pfn, start, end, nid, pgmap);
790 else
791 r = vmemmap_populate(start, end, nid, altmap);
792
793 if (r < 0)
794 return NULL;
795
796 return pfn_to_page(pfn);
797}
798