1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include <linux/kernel.h>
18#include <linux/mm.h>
19#include <linux/nmi.h>
20#include <linux/swap.h>
21#include <linux/bootmem.h>
22#include <linux/acpi.h>
23#include <linux/efi.h>
24#include <linux/nodemask.h>
25#include <linux/slab.h>
26#include <asm/pgalloc.h>
27#include <asm/tlb.h>
28#include <asm/meminit.h>
29#include <asm/numa.h>
30#include <asm/sections.h>
31
32
33
34
35
36struct early_node_data {
37 struct ia64_node_data *node_data;
38 unsigned long pernode_addr;
39 unsigned long pernode_size;
40 unsigned long num_physpages;
41#ifdef CONFIG_ZONE_DMA
42 unsigned long num_dma_physpages;
43#endif
44 unsigned long min_pfn;
45 unsigned long max_pfn;
46};
47
48static struct early_node_data mem_data[MAX_NUMNODES] __initdata;
49static nodemask_t memory_less_mask __initdata;
50
51pg_data_t *pgdat_list[MAX_NUMNODES];
52
53
54
55
56
57#define MAX_NODE_ALIGN_OFFSET (32 * 1024 * 1024)
58#define NODEDATA_ALIGN(addr, node) \
59 ((((addr) + 1024*1024-1) & ~(1024*1024-1)) + \
60 (((node)*PERCPU_PAGE_SIZE) & (MAX_NODE_ALIGN_OFFSET - 1)))
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75static int __init build_node_maps(unsigned long start, unsigned long len,
76 int node)
77{
78 unsigned long spfn, epfn, end = start + len;
79 struct bootmem_data *bdp = &bootmem_node_data[node];
80
81 epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT;
82 spfn = GRANULEROUNDDOWN(start) >> PAGE_SHIFT;
83
84 if (!bdp->node_low_pfn) {
85 bdp->node_min_pfn = spfn;
86 bdp->node_low_pfn = epfn;
87 } else {
88 bdp->node_min_pfn = min(spfn, bdp->node_min_pfn);
89 bdp->node_low_pfn = max(epfn, bdp->node_low_pfn);
90 }
91
92 return 0;
93}
94
95
96
97
98
99
100
101
102
103static int __meminit early_nr_cpus_node(int node)
104{
105 int cpu, n = 0;
106
107 for_each_possible_early_cpu(cpu)
108 if (node == node_cpuid[cpu].nid)
109 n++;
110
111 return n;
112}
113
114
115
116
117
118static unsigned long __meminit compute_pernodesize(int node)
119{
120 unsigned long pernodesize = 0, cpus;
121
122 cpus = early_nr_cpus_node(node);
123 pernodesize += PERCPU_PAGE_SIZE * cpus;
124 pernodesize += node * L1_CACHE_BYTES;
125 pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
126 pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
127 pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
128 pernodesize = PAGE_ALIGN(pernodesize);
129 return pernodesize;
130}
131
132
133
134
135
136
137
138
139
140
141static void *per_cpu_node_setup(void *cpu_data, int node)
142{
143#ifdef CONFIG_SMP
144 int cpu;
145
146 for_each_possible_early_cpu(cpu) {
147 void *src = cpu == 0 ? __cpu0_per_cpu : __phys_per_cpu_start;
148
149 if (node != node_cpuid[cpu].nid)
150 continue;
151
152 memcpy(__va(cpu_data), src, __per_cpu_end - __per_cpu_start);
153 __per_cpu_offset[cpu] = (char *)__va(cpu_data) -
154 __per_cpu_start;
155
156
157
158
159
160
161
162
163
164
165 if (cpu == 0)
166 ia64_set_kr(IA64_KR_PER_CPU_DATA,
167 (unsigned long)cpu_data -
168 (unsigned long)__per_cpu_start);
169
170 cpu_data += PERCPU_PAGE_SIZE;
171 }
172#endif
173 return cpu_data;
174}
175
176#ifdef CONFIG_SMP
177
178
179
180
181
182
183
184
185void __init setup_per_cpu_areas(void)
186{
187 struct pcpu_alloc_info *ai;
188 struct pcpu_group_info *uninitialized_var(gi);
189 unsigned int *cpu_map;
190 void *base;
191 unsigned long base_offset;
192 unsigned int cpu;
193 ssize_t static_size, reserved_size, dyn_size;
194 int node, prev_node, unit, nr_units, rc;
195
196 ai = pcpu_alloc_alloc_info(MAX_NUMNODES, nr_cpu_ids);
197 if (!ai)
198 panic("failed to allocate pcpu_alloc_info");
199 cpu_map = ai->groups[0].cpu_map;
200
201
202 base = (void *)ULONG_MAX;
203 for_each_possible_cpu(cpu)
204 base = min(base,
205 (void *)(__per_cpu_offset[cpu] + __per_cpu_start));
206 base_offset = (void *)__per_cpu_start - base;
207
208
209 unit = 0;
210 for_each_node(node)
211 for_each_possible_cpu(cpu)
212 if (node == node_cpuid[cpu].nid)
213 cpu_map[unit++] = cpu;
214 nr_units = unit;
215
216
217 static_size = __per_cpu_end - __per_cpu_start;
218 reserved_size = PERCPU_MODULE_RESERVE;
219 dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size;
220 if (dyn_size < 0)
221 panic("percpu area overflow static=%zd reserved=%zd\n",
222 static_size, reserved_size);
223
224 ai->static_size = static_size;
225 ai->reserved_size = reserved_size;
226 ai->dyn_size = dyn_size;
227 ai->unit_size = PERCPU_PAGE_SIZE;
228 ai->atom_size = PAGE_SIZE;
229 ai->alloc_size = PERCPU_PAGE_SIZE;
230
231
232
233
234
235 prev_node = -1;
236 ai->nr_groups = 0;
237 for (unit = 0; unit < nr_units; unit++) {
238 cpu = cpu_map[unit];
239 node = node_cpuid[cpu].nid;
240
241 if (node == prev_node) {
242 gi->nr_units++;
243 continue;
244 }
245 prev_node = node;
246
247 gi = &ai->groups[ai->nr_groups++];
248 gi->nr_units = 1;
249 gi->base_offset = __per_cpu_offset[cpu] + base_offset;
250 gi->cpu_map = &cpu_map[unit];
251 }
252
253 rc = pcpu_setup_first_chunk(ai, base);
254 if (rc)
255 panic("failed to setup percpu area (err=%d)", rc);
256
257 pcpu_free_alloc_info(ai);
258}
259#endif
260
261
262
263
264
265
266
267static void __init fill_pernode(int node, unsigned long pernode,
268 unsigned long pernodesize)
269{
270 void *cpu_data;
271 int cpus = early_nr_cpus_node(node);
272 struct bootmem_data *bdp = &bootmem_node_data[node];
273
274 mem_data[node].pernode_addr = pernode;
275 mem_data[node].pernode_size = pernodesize;
276 memset(__va(pernode), 0, pernodesize);
277
278 cpu_data = (void *)pernode;
279 pernode += PERCPU_PAGE_SIZE * cpus;
280 pernode += node * L1_CACHE_BYTES;
281
282 pgdat_list[node] = __va(pernode);
283 pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
284
285 mem_data[node].node_data = __va(pernode);
286 pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
287
288 pgdat_list[node]->bdata = bdp;
289 pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
290
291 cpu_data = per_cpu_node_setup(cpu_data, node);
292
293 return;
294}
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324static int __init find_pernode_space(unsigned long start, unsigned long len,
325 int node)
326{
327 unsigned long spfn, epfn;
328 unsigned long pernodesize = 0, pernode, pages, mapsize;
329 struct bootmem_data *bdp = &bootmem_node_data[node];
330
331 spfn = start >> PAGE_SHIFT;
332 epfn = (start + len) >> PAGE_SHIFT;
333
334 pages = bdp->node_low_pfn - bdp->node_min_pfn;
335 mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
336
337
338
339
340
341 if (spfn < bdp->node_min_pfn || epfn > bdp->node_low_pfn)
342 return 0;
343
344
345 if (mem_data[node].pernode_addr)
346 return 0;
347
348
349
350
351
352 pernodesize = compute_pernodesize(node);
353 pernode = NODEDATA_ALIGN(start, node);
354
355
356 if (start + len > (pernode + pernodesize + mapsize))
357 fill_pernode(node, pernode, pernodesize);
358
359 return 0;
360}
361
362
363
364
365
366
367
368
369
370
371
372
373static int __init free_node_bootmem(unsigned long start, unsigned long len,
374 int node)
375{
376 free_bootmem_node(pgdat_list[node], start, len);
377
378 return 0;
379}
380
381
382
383
384
385
386
387
388static void __init reserve_pernode_space(void)
389{
390 unsigned long base, size, pages;
391 struct bootmem_data *bdp;
392 int node;
393
394 for_each_online_node(node) {
395 pg_data_t *pdp = pgdat_list[node];
396
397 if (node_isset(node, memory_less_mask))
398 continue;
399
400 bdp = pdp->bdata;
401
402
403 pages = bdp->node_low_pfn - bdp->node_min_pfn;
404 size = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
405 base = __pa(bdp->node_bootmem_map);
406 reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT);
407
408
409 size = mem_data[node].pernode_size;
410 base = __pa(mem_data[node].pernode_addr);
411 reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT);
412 }
413}
414
415static void __meminit scatter_node_data(void)
416{
417 pg_data_t **dst;
418 int node;
419
420
421
422
423
424
425
426
427
428 for_each_node(node) {
429 if (pgdat_list[node]) {
430 dst = LOCAL_DATA_ADDR(pgdat_list[node])->pg_data_ptrs;
431 memcpy(dst, pgdat_list, sizeof(pgdat_list));
432 }
433 }
434}
435
436
437
438
439
440
441
442
443
444static void __init initialize_pernode_data(void)
445{
446 int cpu, node;
447
448 scatter_node_data();
449
450#ifdef CONFIG_SMP
451
452 for_each_possible_early_cpu(cpu) {
453 node = node_cpuid[cpu].nid;
454 per_cpu(ia64_cpu_info, cpu).node_data =
455 mem_data[node].node_data;
456 }
457#else
458 {
459 struct cpuinfo_ia64 *cpu0_cpu_info;
460 cpu = 0;
461 node = node_cpuid[cpu].nid;
462 cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start +
463 ((char *)&ia64_cpu_info - __per_cpu_start));
464 cpu0_cpu_info->node_data = mem_data[node].node_data;
465 }
466#endif
467}
468
469
470
471
472
473
474
475
476static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize)
477{
478 void *ptr = NULL;
479 u8 best = 0xff;
480 int bestnode = -1, node, anynode = 0;
481
482 for_each_online_node(node) {
483 if (node_isset(node, memory_less_mask))
484 continue;
485 else if (node_distance(nid, node) < best) {
486 best = node_distance(nid, node);
487 bestnode = node;
488 }
489 anynode = node;
490 }
491
492 if (bestnode == -1)
493 bestnode = anynode;
494
495 ptr = __alloc_bootmem_node(pgdat_list[bestnode], pernodesize,
496 PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
497
498 return ptr;
499}
500
501
502
503
504
505static void __init memory_less_nodes(void)
506{
507 unsigned long pernodesize;
508 void *pernode;
509 int node;
510
511 for_each_node_mask(node, memory_less_mask) {
512 pernodesize = compute_pernodesize(node);
513 pernode = memory_less_node_alloc(node, pernodesize);
514 fill_pernode(node, __pa(pernode), pernodesize);
515 }
516
517 return;
518}
519
520
521
522
523
524
525
526void __init find_memory(void)
527{
528 int node;
529
530 reserve_memory();
531
532 if (num_online_nodes() == 0) {
533 printk(KERN_ERR "node info missing!\n");
534 node_set_online(0);
535 }
536
537 nodes_or(memory_less_mask, memory_less_mask, node_online_map);
538 min_low_pfn = -1;
539 max_low_pfn = 0;
540
541
542 efi_memmap_walk(filter_rsvd_memory, build_node_maps);
543 efi_memmap_walk(filter_rsvd_memory, find_pernode_space);
544 efi_memmap_walk(find_max_min_low_pfn, NULL);
545
546 for_each_online_node(node)
547 if (bootmem_node_data[node].node_low_pfn) {
548 node_clear(node, memory_less_mask);
549 mem_data[node].min_pfn = ~0UL;
550 }
551
552 efi_memmap_walk(filter_memory, register_active_ranges);
553
554
555
556
557
558 for (node = MAX_NUMNODES - 1; node >= 0; node--) {
559 unsigned long pernode, pernodesize, map;
560 struct bootmem_data *bdp;
561
562 if (!node_online(node))
563 continue;
564 else if (node_isset(node, memory_less_mask))
565 continue;
566
567 bdp = &bootmem_node_data[node];
568 pernode = mem_data[node].pernode_addr;
569 pernodesize = mem_data[node].pernode_size;
570 map = pernode + pernodesize;
571
572 init_bootmem_node(pgdat_list[node],
573 map>>PAGE_SHIFT,
574 bdp->node_min_pfn,
575 bdp->node_low_pfn);
576 }
577
578 efi_memmap_walk(filter_rsvd_memory, free_node_bootmem);
579
580 reserve_pernode_space();
581 memory_less_nodes();
582 initialize_pernode_data();
583
584 max_pfn = max_low_pfn;
585
586 find_initrd();
587}
588
589#ifdef CONFIG_SMP
590
591
592
593
594
595
596void __cpuinit *per_cpu_init(void)
597{
598 int cpu;
599 static int first_time = 1;
600
601 if (first_time) {
602 first_time = 0;
603 for_each_possible_early_cpu(cpu)
604 per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
605 }
606
607 return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
608}
609#endif
610
611
612
613
614
615
616
617void show_mem(void)
618{
619 int i, total_reserved = 0;
620 int total_shared = 0, total_cached = 0;
621 unsigned long total_present = 0;
622 pg_data_t *pgdat;
623
624 printk(KERN_INFO "Mem-info:\n");
625 show_free_areas();
626 printk(KERN_INFO "Node memory in pages:\n");
627 for_each_online_pgdat(pgdat) {
628 unsigned long present;
629 unsigned long flags;
630 int shared = 0, cached = 0, reserved = 0;
631
632 pgdat_resize_lock(pgdat, &flags);
633 present = pgdat->node_present_pages;
634 for(i = 0; i < pgdat->node_spanned_pages; i++) {
635 struct page *page;
636 if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
637 touch_nmi_watchdog();
638 if (pfn_valid(pgdat->node_start_pfn + i))
639 page = pfn_to_page(pgdat->node_start_pfn + i);
640 else {
641 i = vmemmap_find_next_valid_pfn(pgdat->node_id,
642 i) - 1;
643 continue;
644 }
645 if (PageReserved(page))
646 reserved++;
647 else if (PageSwapCache(page))
648 cached++;
649 else if (page_count(page))
650 shared += page_count(page)-1;
651 }
652 pgdat_resize_unlock(pgdat, &flags);
653 total_present += present;
654 total_reserved += reserved;
655 total_cached += cached;
656 total_shared += shared;
657 printk(KERN_INFO "Node %4d: RAM: %11ld, rsvd: %8d, "
658 "shrd: %10d, swpd: %10d\n", pgdat->node_id,
659 present, reserved, shared, cached);
660 }
661 printk(KERN_INFO "%ld pages of RAM\n", total_present);
662 printk(KERN_INFO "%d reserved pages\n", total_reserved);
663 printk(KERN_INFO "%d pages shared\n", total_shared);
664 printk(KERN_INFO "%d pages swap cached\n", total_cached);
665 printk(KERN_INFO "Total of %ld pages in page table cache\n",
666 quicklist_total_size());
667 printk(KERN_INFO "%d free buffer pages\n", nr_free_buffer_pages());
668}
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683void call_pernode_memory(unsigned long start, unsigned long len, void *arg)
684{
685 unsigned long rs, re, end = start + len;
686 void (*func)(unsigned long, unsigned long, int);
687 int i;
688
689 start = PAGE_ALIGN(start);
690 end &= PAGE_MASK;
691 if (start >= end)
692 return;
693
694 func = arg;
695
696 if (!num_node_memblks) {
697
698 if (start < end)
699 (*func)(start, end - start, 0);
700 return;
701 }
702
703 for (i = 0; i < num_node_memblks; i++) {
704 rs = max(start, node_memblk[i].start_paddr);
705 re = min(end, node_memblk[i].start_paddr +
706 node_memblk[i].size);
707
708 if (rs < re)
709 (*func)(rs, re - rs, node_memblk[i].nid);
710
711 if (re == end)
712 break;
713 }
714}
715
716
717
718
719
720
721
722
723
724
725
726
727static __init int count_node_pages(unsigned long start, unsigned long len, int node)
728{
729 unsigned long end = start + len;
730
731 mem_data[node].num_physpages += len >> PAGE_SHIFT;
732#ifdef CONFIG_ZONE_DMA
733 if (start <= __pa(MAX_DMA_ADDRESS))
734 mem_data[node].num_dma_physpages +=
735 (min(end, __pa(MAX_DMA_ADDRESS)) - start) >>PAGE_SHIFT;
736#endif
737 start = GRANULEROUNDDOWN(start);
738 end = GRANULEROUNDUP(end);
739 mem_data[node].max_pfn = max(mem_data[node].max_pfn,
740 end >> PAGE_SHIFT);
741 mem_data[node].min_pfn = min(mem_data[node].min_pfn,
742 start >> PAGE_SHIFT);
743
744 return 0;
745}
746
747
748
749
750
751
752
753void __init paging_init(void)
754{
755 unsigned long max_dma;
756 unsigned long pfn_offset = 0;
757 unsigned long max_pfn = 0;
758 int node;
759 unsigned long max_zone_pfns[MAX_NR_ZONES];
760
761 max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
762
763 efi_memmap_walk(filter_rsvd_memory, count_node_pages);
764
765 sparse_memory_present_with_active_regions(MAX_NUMNODES);
766 sparse_init();
767
768#ifdef CONFIG_VIRTUAL_MEM_MAP
769 VMALLOC_END -= PAGE_ALIGN(ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) *
770 sizeof(struct page));
771 vmem_map = (struct page *) VMALLOC_END;
772 efi_memmap_walk(create_mem_map_page_table, NULL);
773 printk("Virtual mem_map starts at 0x%p\n", vmem_map);
774#endif
775
776 for_each_online_node(node) {
777 num_physpages += mem_data[node].num_physpages;
778 pfn_offset = mem_data[node].min_pfn;
779
780#ifdef CONFIG_VIRTUAL_MEM_MAP
781 NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset;
782#endif
783 if (mem_data[node].max_pfn > max_pfn)
784 max_pfn = mem_data[node].max_pfn;
785 }
786
787 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
788#ifdef CONFIG_ZONE_DMA
789 max_zone_pfns[ZONE_DMA] = max_dma;
790#endif
791 max_zone_pfns[ZONE_NORMAL] = max_pfn;
792 free_area_init_nodes(max_zone_pfns);
793
794 zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
795}
796
797#ifdef CONFIG_MEMORY_HOTPLUG
798pg_data_t *arch_alloc_nodedata(int nid)
799{
800 unsigned long size = compute_pernodesize(nid);
801
802 return kzalloc(size, GFP_KERNEL);
803}
804
805void arch_free_nodedata(pg_data_t *pgdat)
806{
807 kfree(pgdat);
808}
809
810void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat)
811{
812 pgdat_list[update_node] = update_pgdat;
813 scatter_node_data();
814}
815#endif
816
817#ifdef CONFIG_SPARSEMEM_VMEMMAP
818int __meminit vmemmap_populate(struct page *start_page,
819 unsigned long size, int node)
820{
821 return vmemmap_populate_basepages(start_page, size, node);
822}
823#endif
824