1
2
3
4
5
6
7
8
9
10
11
12#include <linux/init.h>
13#include <linux/pfn.h>
14#include <linux/slab.h>
15#include <linux/export.h>
16#include <linux/kmemleak.h>
17#include <linux/range.h>
18#include <linux/bug.h>
19#include <linux/io.h>
20#include <linux/bootmem.h>
21
22#include "internal.h"
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71#ifndef CONFIG_NEED_MULTIPLE_NODES
72struct pglist_data __refdata contig_page_data = {
73 .bdata = &bootmem_node_data[0]
74};
75EXPORT_SYMBOL(contig_page_data);
76#endif
77
78unsigned long max_low_pfn;
79unsigned long min_low_pfn;
80unsigned long max_pfn;
81unsigned long long max_possible_pfn;
82
83bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
84
85static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list);
86
87static int bootmem_debug;
88
89static int __init bootmem_debug_setup(char *buf)
90{
91 bootmem_debug = 1;
92 return 0;
93}
94early_param("bootmem_debug", bootmem_debug_setup);
95
96#define bdebug(fmt, args...) ({ \
97 if (unlikely(bootmem_debug)) \
98 pr_info("bootmem::%s " fmt, \
99 __func__, ## args); \
100})
101
102static unsigned long __init bootmap_bytes(unsigned long pages)
103{
104 unsigned long bytes = DIV_ROUND_UP(pages, BITS_PER_BYTE);
105
106 return ALIGN(bytes, sizeof(long));
107}
108
109
110
111
112
113
114
115unsigned long __init bootmem_bootmap_pages(unsigned long pages)
116{
117 unsigned long bytes = bootmap_bytes(pages);
118
119 return PAGE_ALIGN(bytes) >> PAGE_SHIFT;
120}
121
122
123
124
125static void __init link_bootmem(bootmem_data_t *bdata)
126{
127 bootmem_data_t *ent;
128
129 list_for_each_entry(ent, &bdata_list, list) {
130 if (bdata->node_min_pfn < ent->node_min_pfn) {
131 list_add_tail(&bdata->list, &ent->list);
132 return;
133 }
134 }
135
136 list_add_tail(&bdata->list, &bdata_list);
137}
138
139
140
141
142static unsigned long __init init_bootmem_core(bootmem_data_t *bdata,
143 unsigned long mapstart, unsigned long start, unsigned long end)
144{
145 unsigned long mapsize;
146
147 mminit_validate_memmodel_limits(&start, &end);
148 bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart));
149 bdata->node_min_pfn = start;
150 bdata->node_low_pfn = end;
151 link_bootmem(bdata);
152
153
154
155
156
157 mapsize = bootmap_bytes(end - start);
158 memset(bdata->node_bootmem_map, 0xff, mapsize);
159
160 bdebug("nid=%td start=%lx map=%lx end=%lx mapsize=%lx\n",
161 bdata - bootmem_node_data, start, mapstart, end, mapsize);
162
163 return mapsize;
164}
165
166
167
168
169
170
171
172
173
174
175unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
176 unsigned long startpfn, unsigned long endpfn)
177{
178 return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn);
179}
180
181
182
183
184
185
186
187
188unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
189{
190 max_low_pfn = pages;
191 min_low_pfn = start;
192 return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);
193}
194
195void __init free_bootmem_late(unsigned long physaddr, unsigned long size)
196{
197 unsigned long cursor, end;
198
199 kmemleak_free_part_phys(physaddr, size);
200
201 cursor = PFN_UP(physaddr);
202 end = PFN_DOWN(physaddr + size);
203
204 for (; cursor < end; cursor++) {
205 __free_pages_bootmem(pfn_to_page(cursor), cursor, 0);
206 totalram_pages++;
207 }
208}
209
210static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
211{
212 struct page *page;
213 unsigned long *map, start, end, pages, cur, count = 0;
214
215 if (!bdata->node_bootmem_map)
216 return 0;
217
218 map = bdata->node_bootmem_map;
219 start = bdata->node_min_pfn;
220 end = bdata->node_low_pfn;
221
222 bdebug("nid=%td start=%lx end=%lx\n",
223 bdata - bootmem_node_data, start, end);
224
225 while (start < end) {
226 unsigned long idx, vec;
227 unsigned shift;
228
229 idx = start - bdata->node_min_pfn;
230 shift = idx & (BITS_PER_LONG - 1);
231
232
233
234
235 vec = ~map[idx / BITS_PER_LONG];
236
237 if (shift) {
238 vec >>= shift;
239 if (end - start >= BITS_PER_LONG)
240 vec |= ~map[idx / BITS_PER_LONG + 1] <<
241 (BITS_PER_LONG - shift);
242 }
243
244
245
246
247
248 if (IS_ALIGNED(start, BITS_PER_LONG) && vec == ~0UL) {
249 int order = ilog2(BITS_PER_LONG);
250
251 __free_pages_bootmem(pfn_to_page(start), start, order);
252 count += BITS_PER_LONG;
253 start += BITS_PER_LONG;
254 } else {
255 cur = start;
256
257 start = ALIGN(start + 1, BITS_PER_LONG);
258 while (vec && cur != start) {
259 if (vec & 1) {
260 page = pfn_to_page(cur);
261 __free_pages_bootmem(page, cur, 0);
262 count++;
263 }
264 vec >>= 1;
265 ++cur;
266 }
267 }
268 }
269
270 cur = bdata->node_min_pfn;
271 page = virt_to_page(bdata->node_bootmem_map);
272 pages = bdata->node_low_pfn - bdata->node_min_pfn;
273 pages = bootmem_bootmap_pages(pages);
274 count += pages;
275 while (pages--)
276 __free_pages_bootmem(page++, cur++, 0);
277 bdata->node_bootmem_map = NULL;
278
279 bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count);
280
281 return count;
282}
283
284static int reset_managed_pages_done __initdata;
285
286void reset_node_managed_pages(pg_data_t *pgdat)
287{
288 struct zone *z;
289
290 for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
291 z->managed_pages = 0;
292}
293
294void __init reset_all_zones_managed_pages(void)
295{
296 struct pglist_data *pgdat;
297
298 if (reset_managed_pages_done)
299 return;
300
301 for_each_online_pgdat(pgdat)
302 reset_node_managed_pages(pgdat);
303
304 reset_managed_pages_done = 1;
305}
306
307unsigned long __init free_all_bootmem(void)
308{
309 unsigned long total_pages = 0;
310 bootmem_data_t *bdata;
311
312 reset_all_zones_managed_pages();
313
314 list_for_each_entry(bdata, &bdata_list, list)
315 total_pages += free_all_bootmem_core(bdata);
316
317 totalram_pages += total_pages;
318
319 return total_pages;
320}
321
322static void __init __free(bootmem_data_t *bdata,
323 unsigned long sidx, unsigned long eidx)
324{
325 unsigned long idx;
326
327 bdebug("nid=%td start=%lx end=%lx\n", bdata - bootmem_node_data,
328 sidx + bdata->node_min_pfn,
329 eidx + bdata->node_min_pfn);
330
331 if (WARN_ON(bdata->node_bootmem_map == NULL))
332 return;
333
334 if (bdata->hint_idx > sidx)
335 bdata->hint_idx = sidx;
336
337 for (idx = sidx; idx < eidx; idx++)
338 if (!test_and_clear_bit(idx, bdata->node_bootmem_map))
339 BUG();
340}
341
342static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx,
343 unsigned long eidx, int flags)
344{
345 unsigned long idx;
346 int exclusive = flags & BOOTMEM_EXCLUSIVE;
347
348 bdebug("nid=%td start=%lx end=%lx flags=%x\n",
349 bdata - bootmem_node_data,
350 sidx + bdata->node_min_pfn,
351 eidx + bdata->node_min_pfn,
352 flags);
353
354 if (WARN_ON(bdata->node_bootmem_map == NULL))
355 return 0;
356
357 for (idx = sidx; idx < eidx; idx++)
358 if (test_and_set_bit(idx, bdata->node_bootmem_map)) {
359 if (exclusive) {
360 __free(bdata, sidx, idx);
361 return -EBUSY;
362 }
363 bdebug("silent double reserve of PFN %lx\n",
364 idx + bdata->node_min_pfn);
365 }
366 return 0;
367}
368
369static int __init mark_bootmem_node(bootmem_data_t *bdata,
370 unsigned long start, unsigned long end,
371 int reserve, int flags)
372{
373 unsigned long sidx, eidx;
374
375 bdebug("nid=%td start=%lx end=%lx reserve=%d flags=%x\n",
376 bdata - bootmem_node_data, start, end, reserve, flags);
377
378 BUG_ON(start < bdata->node_min_pfn);
379 BUG_ON(end > bdata->node_low_pfn);
380
381 sidx = start - bdata->node_min_pfn;
382 eidx = end - bdata->node_min_pfn;
383
384 if (reserve)
385 return __reserve(bdata, sidx, eidx, flags);
386 else
387 __free(bdata, sidx, eidx);
388 return 0;
389}
390
391static int __init mark_bootmem(unsigned long start, unsigned long end,
392 int reserve, int flags)
393{
394 unsigned long pos;
395 bootmem_data_t *bdata;
396
397 pos = start;
398 list_for_each_entry(bdata, &bdata_list, list) {
399 int err;
400 unsigned long max;
401
402 if (pos < bdata->node_min_pfn ||
403 pos >= bdata->node_low_pfn) {
404 BUG_ON(pos != start);
405 continue;
406 }
407
408 max = min(bdata->node_low_pfn, end);
409
410 err = mark_bootmem_node(bdata, pos, max, reserve, flags);
411 if (reserve && err) {
412 mark_bootmem(start, pos, 0, 0);
413 return err;
414 }
415
416 if (max == end)
417 return 0;
418 pos = bdata->node_low_pfn;
419 }
420 BUG();
421}
422
423void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
424 unsigned long size)
425{
426 unsigned long start, end;
427
428 kmemleak_free_part_phys(physaddr, size);
429
430 start = PFN_UP(physaddr);
431 end = PFN_DOWN(physaddr + size);
432
433 mark_bootmem_node(pgdat->bdata, start, end, 0, 0);
434}
435
436void __init free_bootmem(unsigned long physaddr, unsigned long size)
437{
438 unsigned long start, end;
439
440 kmemleak_free_part_phys(physaddr, size);
441
442 start = PFN_UP(physaddr);
443 end = PFN_DOWN(physaddr + size);
444
445 mark_bootmem(start, end, 0, 0);
446}
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
462 unsigned long size, int flags)
463{
464 unsigned long start, end;
465
466 start = PFN_DOWN(physaddr);
467 end = PFN_UP(physaddr + size);
468
469 return mark_bootmem_node(pgdat->bdata, start, end, 1, flags);
470}
471
472
473
474
475
476
477
478
479
480
481
482
483
484int __init reserve_bootmem(unsigned long addr, unsigned long size,
485 int flags)
486{
487 unsigned long start, end;
488
489 start = PFN_DOWN(addr);
490 end = PFN_UP(addr + size);
491
492 return mark_bootmem(start, end, 1, flags);
493}
494
495static unsigned long __init align_idx(struct bootmem_data *bdata,
496 unsigned long idx, unsigned long step)
497{
498 unsigned long base = bdata->node_min_pfn;
499
500
501
502
503
504
505 return ALIGN(base + idx, step) - base;
506}
507
508static unsigned long __init align_off(struct bootmem_data *bdata,
509 unsigned long off, unsigned long align)
510{
511 unsigned long base = PFN_PHYS(bdata->node_min_pfn);
512
513
514
515 return ALIGN(base + off, align) - base;
516}
517
518static void * __init alloc_bootmem_bdata(struct bootmem_data *bdata,
519 unsigned long size, unsigned long align,
520 unsigned long goal, unsigned long limit)
521{
522 unsigned long fallback = 0;
523 unsigned long min, max, start, sidx, midx, step;
524
525 bdebug("nid=%td size=%lx [%lu pages] align=%lx goal=%lx limit=%lx\n",
526 bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT,
527 align, goal, limit);
528
529 BUG_ON(!size);
530 BUG_ON(align & (align - 1));
531 BUG_ON(limit && goal + size > limit);
532
533 if (!bdata->node_bootmem_map)
534 return NULL;
535
536 min = bdata->node_min_pfn;
537 max = bdata->node_low_pfn;
538
539 goal >>= PAGE_SHIFT;
540 limit >>= PAGE_SHIFT;
541
542 if (limit && max > limit)
543 max = limit;
544 if (max <= min)
545 return NULL;
546
547 step = max(align >> PAGE_SHIFT, 1UL);
548
549 if (goal && min < goal && goal < max)
550 start = ALIGN(goal, step);
551 else
552 start = ALIGN(min, step);
553
554 sidx = start - bdata->node_min_pfn;
555 midx = max - bdata->node_min_pfn;
556
557 if (bdata->hint_idx > sidx) {
558
559
560
561
562 fallback = sidx + 1;
563 sidx = align_idx(bdata, bdata->hint_idx, step);
564 }
565
566 while (1) {
567 int merge;
568 void *region;
569 unsigned long eidx, i, start_off, end_off;
570find_block:
571 sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx);
572 sidx = align_idx(bdata, sidx, step);
573 eidx = sidx + PFN_UP(size);
574
575 if (sidx >= midx || eidx > midx)
576 break;
577
578 for (i = sidx; i < eidx; i++)
579 if (test_bit(i, bdata->node_bootmem_map)) {
580 sidx = align_idx(bdata, i, step);
581 if (sidx == i)
582 sidx += step;
583 goto find_block;
584 }
585
586 if (bdata->last_end_off & (PAGE_SIZE - 1) &&
587 PFN_DOWN(bdata->last_end_off) + 1 == sidx)
588 start_off = align_off(bdata, bdata->last_end_off, align);
589 else
590 start_off = PFN_PHYS(sidx);
591
592 merge = PFN_DOWN(start_off) < sidx;
593 end_off = start_off + size;
594
595 bdata->last_end_off = end_off;
596 bdata->hint_idx = PFN_UP(end_off);
597
598
599
600
601 if (__reserve(bdata, PFN_DOWN(start_off) + merge,
602 PFN_UP(end_off), BOOTMEM_EXCLUSIVE))
603 BUG();
604
605 region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) +
606 start_off);
607 memset(region, 0, size);
608
609
610
611
612 kmemleak_alloc(region, size, 0, 0);
613 return region;
614 }
615
616 if (fallback) {
617 sidx = align_idx(bdata, fallback - 1, step);
618 fallback = 0;
619 goto find_block;
620 }
621
622 return NULL;
623}
624
625static void * __init alloc_bootmem_core(unsigned long size,
626 unsigned long align,
627 unsigned long goal,
628 unsigned long limit)
629{
630 bootmem_data_t *bdata;
631 void *region;
632
633 if (WARN_ON_ONCE(slab_is_available()))
634 return kzalloc(size, GFP_NOWAIT);
635
636 list_for_each_entry(bdata, &bdata_list, list) {
637 if (goal && bdata->node_low_pfn <= PFN_DOWN(goal))
638 continue;
639 if (limit && bdata->node_min_pfn >= PFN_DOWN(limit))
640 break;
641
642 region = alloc_bootmem_bdata(bdata, size, align, goal, limit);
643 if (region)
644 return region;
645 }
646
647 return NULL;
648}
649
650static void * __init ___alloc_bootmem_nopanic(unsigned long size,
651 unsigned long align,
652 unsigned long goal,
653 unsigned long limit)
654{
655 void *ptr;
656
657restart:
658 ptr = alloc_bootmem_core(size, align, goal, limit);
659 if (ptr)
660 return ptr;
661 if (goal) {
662 goal = 0;
663 goto restart;
664 }
665
666 return NULL;
667}
668
669void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
670 unsigned long goal)
671{
672 unsigned long limit = 0;
673
674 return ___alloc_bootmem_nopanic(size, align, goal, limit);
675}
676
677static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
678 unsigned long goal, unsigned long limit)
679{
680 void *mem = ___alloc_bootmem_nopanic(size, align, goal, limit);
681
682 if (mem)
683 return mem;
684
685
686
687 pr_alert("bootmem alloc of %lu bytes failed!\n", size);
688 panic("Out of memory");
689 return NULL;
690}
691
692void * __init __alloc_bootmem(unsigned long size, unsigned long align,
693 unsigned long goal)
694{
695 unsigned long limit = 0;
696
697 return ___alloc_bootmem(size, align, goal, limit);
698}
699
700void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
701 unsigned long size, unsigned long align,
702 unsigned long goal, unsigned long limit)
703{
704 void *ptr;
705
706 if (WARN_ON_ONCE(slab_is_available()))
707 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
708again:
709
710
711 if (limit && goal + size > limit)
712 limit = 0;
713
714 ptr = alloc_bootmem_bdata(pgdat->bdata, size, align, goal, limit);
715 if (ptr)
716 return ptr;
717
718 ptr = alloc_bootmem_core(size, align, goal, limit);
719 if (ptr)
720 return ptr;
721
722 if (goal) {
723 goal = 0;
724 goto again;
725 }
726
727 return NULL;
728}
729
730void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
731 unsigned long align, unsigned long goal)
732{
733 return ___alloc_bootmem_node_nopanic(pgdat, size, align, goal, 0);
734}
735
736void * __init ___alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
737 unsigned long align, unsigned long goal,
738 unsigned long limit)
739{
740 void *ptr;
741
742 ptr = ___alloc_bootmem_node_nopanic(pgdat, size, align, goal, 0);
743 if (ptr)
744 return ptr;
745
746 pr_alert("bootmem alloc of %lu bytes failed!\n", size);
747 panic("Out of memory");
748 return NULL;
749}
750
751void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
752 unsigned long align, unsigned long goal)
753{
754 if (WARN_ON_ONCE(slab_is_available()))
755 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
756
757 return ___alloc_bootmem_node(pgdat, size, align, goal, 0);
758}
759
760void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
761 unsigned long align, unsigned long goal)
762{
763#ifdef MAX_DMA32_PFN
764 unsigned long end_pfn;
765
766 if (WARN_ON_ONCE(slab_is_available()))
767 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
768
769
770 end_pfn = pgdat_end_pfn(pgdat);
771
772 if (end_pfn > MAX_DMA32_PFN + (128 >> (20 - PAGE_SHIFT)) &&
773 (goal >> PAGE_SHIFT) < MAX_DMA32_PFN) {
774 void *ptr;
775 unsigned long new_goal;
776
777 new_goal = MAX_DMA32_PFN << PAGE_SHIFT;
778 ptr = alloc_bootmem_bdata(pgdat->bdata, size, align,
779 new_goal, 0);
780 if (ptr)
781 return ptr;
782 }
783#endif
784
785 return __alloc_bootmem_node(pgdat, size, align, goal);
786
787}
788
789void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
790 unsigned long goal)
791{
792 return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT);
793}
794
795void * __init __alloc_bootmem_low_nopanic(unsigned long size,
796 unsigned long align,
797 unsigned long goal)
798{
799 return ___alloc_bootmem_nopanic(size, align, goal,
800 ARCH_LOW_ADDRESS_LIMIT);
801}
802
803void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
804 unsigned long align, unsigned long goal)
805{
806 if (WARN_ON_ONCE(slab_is_available()))
807 return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
808
809 return ___alloc_bootmem_node(pgdat, size, align,
810 goal, ARCH_LOW_ADDRESS_LIMIT);
811}
812