1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include <linux/kernel.h>
18#include <linux/mm.h>
19#include <linux/nmi.h>
20#include <linux/swap.h>
21#include <linux/bootmem.h>
22#include <linux/acpi.h>
23#include <linux/efi.h>
24#include <linux/nodemask.h>
25#include <linux/slab.h>
26#include <asm/pgalloc.h>
27#include <asm/tlb.h>
28#include <asm/meminit.h>
29#include <asm/numa.h>
30#include <asm/sections.h>
31
32
33
34
35
36struct early_node_data {
37 struct ia64_node_data *node_data;
38 unsigned long pernode_addr;
39 unsigned long pernode_size;
40#ifdef CONFIG_ZONE_DMA
41 unsigned long num_dma_physpages;
42#endif
43 unsigned long min_pfn;
44 unsigned long max_pfn;
45};
46
47static struct early_node_data mem_data[MAX_NUMNODES] __initdata;
48static nodemask_t memory_less_mask __initdata;
49
50pg_data_t *pgdat_list[MAX_NUMNODES];
51
52
53
54
55
56#define MAX_NODE_ALIGN_OFFSET (32 * 1024 * 1024)
57#define NODEDATA_ALIGN(addr, node) \
58 ((((addr) + 1024*1024-1) & ~(1024*1024-1)) + \
59 (((node)*PERCPU_PAGE_SIZE) & (MAX_NODE_ALIGN_OFFSET - 1)))
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74static int __init build_node_maps(unsigned long start, unsigned long len,
75 int node)
76{
77 unsigned long spfn, epfn, end = start + len;
78 struct bootmem_data *bdp = &bootmem_node_data[node];
79
80 epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT;
81 spfn = GRANULEROUNDDOWN(start) >> PAGE_SHIFT;
82
83 if (!bdp->node_low_pfn) {
84 bdp->node_min_pfn = spfn;
85 bdp->node_low_pfn = epfn;
86 } else {
87 bdp->node_min_pfn = min(spfn, bdp->node_min_pfn);
88 bdp->node_low_pfn = max(epfn, bdp->node_low_pfn);
89 }
90
91 return 0;
92}
93
94
95
96
97
98
99
100
101
102static int __meminit early_nr_cpus_node(int node)
103{
104 int cpu, n = 0;
105
106 for_each_possible_early_cpu(cpu)
107 if (node == node_cpuid[cpu].nid)
108 n++;
109
110 return n;
111}
112
113
114
115
116
117static unsigned long __meminit compute_pernodesize(int node)
118{
119 unsigned long pernodesize = 0, cpus;
120
121 cpus = early_nr_cpus_node(node);
122 pernodesize += PERCPU_PAGE_SIZE * cpus;
123 pernodesize += node * L1_CACHE_BYTES;
124 pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
125 pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
126 pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
127 pernodesize = PAGE_ALIGN(pernodesize);
128 return pernodesize;
129}
130
131
132
133
134
135
136
137
138
139
140static void *per_cpu_node_setup(void *cpu_data, int node)
141{
142#ifdef CONFIG_SMP
143 int cpu;
144
145 for_each_possible_early_cpu(cpu) {
146 void *src = cpu == 0 ? __cpu0_per_cpu : __phys_per_cpu_start;
147
148 if (node != node_cpuid[cpu].nid)
149 continue;
150
151 memcpy(__va(cpu_data), src, __per_cpu_end - __per_cpu_start);
152 __per_cpu_offset[cpu] = (char *)__va(cpu_data) -
153 __per_cpu_start;
154
155
156
157
158
159
160
161
162
163
164 if (cpu == 0)
165 ia64_set_kr(IA64_KR_PER_CPU_DATA,
166 (unsigned long)cpu_data -
167 (unsigned long)__per_cpu_start);
168
169 cpu_data += PERCPU_PAGE_SIZE;
170 }
171#endif
172 return cpu_data;
173}
174
175#ifdef CONFIG_SMP
176
177
178
179
180
181
182
183
184void __init setup_per_cpu_areas(void)
185{
186 struct pcpu_alloc_info *ai;
187 struct pcpu_group_info *uninitialized_var(gi);
188 unsigned int *cpu_map;
189 void *base;
190 unsigned long base_offset;
191 unsigned int cpu;
192 ssize_t static_size, reserved_size, dyn_size;
193 int node, prev_node, unit, nr_units, rc;
194
195 ai = pcpu_alloc_alloc_info(MAX_NUMNODES, nr_cpu_ids);
196 if (!ai)
197 panic("failed to allocate pcpu_alloc_info");
198 cpu_map = ai->groups[0].cpu_map;
199
200
201 base = (void *)ULONG_MAX;
202 for_each_possible_cpu(cpu)
203 base = min(base,
204 (void *)(__per_cpu_offset[cpu] + __per_cpu_start));
205 base_offset = (void *)__per_cpu_start - base;
206
207
208 unit = 0;
209 for_each_node(node)
210 for_each_possible_cpu(cpu)
211 if (node == node_cpuid[cpu].nid)
212 cpu_map[unit++] = cpu;
213 nr_units = unit;
214
215
216 static_size = __per_cpu_end - __per_cpu_start;
217 reserved_size = PERCPU_MODULE_RESERVE;
218 dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size;
219 if (dyn_size < 0)
220 panic("percpu area overflow static=%zd reserved=%zd\n",
221 static_size, reserved_size);
222
223 ai->static_size = static_size;
224 ai->reserved_size = reserved_size;
225 ai->dyn_size = dyn_size;
226 ai->unit_size = PERCPU_PAGE_SIZE;
227 ai->atom_size = PAGE_SIZE;
228 ai->alloc_size = PERCPU_PAGE_SIZE;
229
230
231
232
233
234 prev_node = -1;
235 ai->nr_groups = 0;
236 for (unit = 0; unit < nr_units; unit++) {
237 cpu = cpu_map[unit];
238 node = node_cpuid[cpu].nid;
239
240 if (node == prev_node) {
241 gi->nr_units++;
242 continue;
243 }
244 prev_node = node;
245
246 gi = &ai->groups[ai->nr_groups++];
247 gi->nr_units = 1;
248 gi->base_offset = __per_cpu_offset[cpu] + base_offset;
249 gi->cpu_map = &cpu_map[unit];
250 }
251
252 rc = pcpu_setup_first_chunk(ai, base);
253 if (rc)
254 panic("failed to setup percpu area (err=%d)", rc);
255
256 pcpu_free_alloc_info(ai);
257}
258#endif
259
260
261
262
263
264
265
266static void __init fill_pernode(int node, unsigned long pernode,
267 unsigned long pernodesize)
268{
269 void *cpu_data;
270 int cpus = early_nr_cpus_node(node);
271 struct bootmem_data *bdp = &bootmem_node_data[node];
272
273 mem_data[node].pernode_addr = pernode;
274 mem_data[node].pernode_size = pernodesize;
275 memset(__va(pernode), 0, pernodesize);
276
277 cpu_data = (void *)pernode;
278 pernode += PERCPU_PAGE_SIZE * cpus;
279 pernode += node * L1_CACHE_BYTES;
280
281 pgdat_list[node] = __va(pernode);
282 pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
283
284 mem_data[node].node_data = __va(pernode);
285 pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
286
287 pgdat_list[node]->bdata = bdp;
288 pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
289
290 cpu_data = per_cpu_node_setup(cpu_data, node);
291
292 return;
293}
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323static int __init find_pernode_space(unsigned long start, unsigned long len,
324 int node)
325{
326 unsigned long spfn, epfn;
327 unsigned long pernodesize = 0, pernode, pages, mapsize;
328 struct bootmem_data *bdp = &bootmem_node_data[node];
329
330 spfn = start >> PAGE_SHIFT;
331 epfn = (start + len) >> PAGE_SHIFT;
332
333 pages = bdp->node_low_pfn - bdp->node_min_pfn;
334 mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
335
336
337
338
339
340 if (spfn < bdp->node_min_pfn || epfn > bdp->node_low_pfn)
341 return 0;
342
343
344 if (mem_data[node].pernode_addr)
345 return 0;
346
347
348
349
350
351 pernodesize = compute_pernodesize(node);
352 pernode = NODEDATA_ALIGN(start, node);
353
354
355 if (start + len > (pernode + pernodesize + mapsize))
356 fill_pernode(node, pernode, pernodesize);
357
358 return 0;
359}
360
361
362
363
364
365
366
367
368
369
370
371
372static int __init free_node_bootmem(unsigned long start, unsigned long len,
373 int node)
374{
375 free_bootmem_node(pgdat_list[node], start, len);
376
377 return 0;
378}
379
380
381
382
383
384
385
386
387static void __init reserve_pernode_space(void)
388{
389 unsigned long base, size, pages;
390 struct bootmem_data *bdp;
391 int node;
392
393 for_each_online_node(node) {
394 pg_data_t *pdp = pgdat_list[node];
395
396 if (node_isset(node, memory_less_mask))
397 continue;
398
399 bdp = pdp->bdata;
400
401
402 pages = bdp->node_low_pfn - bdp->node_min_pfn;
403 size = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
404 base = __pa(bdp->node_bootmem_map);
405 reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT);
406
407
408 size = mem_data[node].pernode_size;
409 base = __pa(mem_data[node].pernode_addr);
410 reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT);
411 }
412}
413
414static void __meminit scatter_node_data(void)
415{
416 pg_data_t **dst;
417 int node;
418
419
420
421
422
423
424
425
426
427 for_each_node(node) {
428 if (pgdat_list[node]) {
429 dst = LOCAL_DATA_ADDR(pgdat_list[node])->pg_data_ptrs;
430 memcpy(dst, pgdat_list, sizeof(pgdat_list));
431 }
432 }
433}
434
435
436
437
438
439
440
441
442
443static void __init initialize_pernode_data(void)
444{
445 int cpu, node;
446
447 scatter_node_data();
448
449#ifdef CONFIG_SMP
450
451 for_each_possible_early_cpu(cpu) {
452 node = node_cpuid[cpu].nid;
453 per_cpu(ia64_cpu_info, cpu).node_data =
454 mem_data[node].node_data;
455 }
456#else
457 {
458 struct cpuinfo_ia64 *cpu0_cpu_info;
459 cpu = 0;
460 node = node_cpuid[cpu].nid;
461 cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start +
462 ((char *)&ia64_cpu_info - __per_cpu_start));
463 cpu0_cpu_info->node_data = mem_data[node].node_data;
464 }
465#endif
466}
467
468
469
470
471
472
473
474
475static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize)
476{
477 void *ptr = NULL;
478 u8 best = 0xff;
479 int bestnode = -1, node, anynode = 0;
480
481 for_each_online_node(node) {
482 if (node_isset(node, memory_less_mask))
483 continue;
484 else if (node_distance(nid, node) < best) {
485 best = node_distance(nid, node);
486 bestnode = node;
487 }
488 anynode = node;
489 }
490
491 if (bestnode == -1)
492 bestnode = anynode;
493
494 ptr = __alloc_bootmem_node(pgdat_list[bestnode], pernodesize,
495 PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
496
497 return ptr;
498}
499
500
501
502
503
504static void __init memory_less_nodes(void)
505{
506 unsigned long pernodesize;
507 void *pernode;
508 int node;
509
510 for_each_node_mask(node, memory_less_mask) {
511 pernodesize = compute_pernodesize(node);
512 pernode = memory_less_node_alloc(node, pernodesize);
513 fill_pernode(node, __pa(pernode), pernodesize);
514 }
515
516 return;
517}
518
519
520
521
522
523
524
525void __init find_memory(void)
526{
527 int node;
528
529 reserve_memory();
530
531 if (num_online_nodes() == 0) {
532 printk(KERN_ERR "node info missing!\n");
533 node_set_online(0);
534 }
535
536 nodes_or(memory_less_mask, memory_less_mask, node_online_map);
537 min_low_pfn = -1;
538 max_low_pfn = 0;
539
540
541 efi_memmap_walk(filter_rsvd_memory, build_node_maps);
542 efi_memmap_walk(filter_rsvd_memory, find_pernode_space);
543 efi_memmap_walk(find_max_min_low_pfn, NULL);
544
545 for_each_online_node(node)
546 if (bootmem_node_data[node].node_low_pfn) {
547 node_clear(node, memory_less_mask);
548 mem_data[node].min_pfn = ~0UL;
549 }
550
551 efi_memmap_walk(filter_memory, register_active_ranges);
552
553
554
555
556
557 for (node = MAX_NUMNODES - 1; node >= 0; node--) {
558 unsigned long pernode, pernodesize, map;
559 struct bootmem_data *bdp;
560
561 if (!node_online(node))
562 continue;
563 else if (node_isset(node, memory_less_mask))
564 continue;
565
566 bdp = &bootmem_node_data[node];
567 pernode = mem_data[node].pernode_addr;
568 pernodesize = mem_data[node].pernode_size;
569 map = pernode + pernodesize;
570
571 init_bootmem_node(pgdat_list[node],
572 map>>PAGE_SHIFT,
573 bdp->node_min_pfn,
574 bdp->node_low_pfn);
575 }
576
577 efi_memmap_walk(filter_rsvd_memory, free_node_bootmem);
578
579 reserve_pernode_space();
580 memory_less_nodes();
581 initialize_pernode_data();
582
583 max_pfn = max_low_pfn;
584
585 find_initrd();
586}
587
588#ifdef CONFIG_SMP
589
590
591
592
593
594
595void *per_cpu_init(void)
596{
597 int cpu;
598 static int first_time = 1;
599
600 if (first_time) {
601 first_time = 0;
602 for_each_possible_early_cpu(cpu)
603 per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
604 }
605
606 return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
607}
608#endif
609
610
611
612
613
614
615
616void show_mem(unsigned int filter)
617{
618 int i, total_reserved = 0;
619 int total_shared = 0, total_cached = 0;
620 unsigned long total_present = 0;
621 pg_data_t *pgdat;
622
623 printk(KERN_INFO "Mem-info:\n");
624 show_free_areas(filter);
625 if (filter & SHOW_MEM_FILTER_PAGE_COUNT)
626 return;
627 printk(KERN_INFO "Node memory in pages:\n");
628 for_each_online_pgdat(pgdat) {
629 unsigned long present;
630 unsigned long flags;
631 int shared = 0, cached = 0, reserved = 0;
632 int nid = pgdat->node_id;
633
634 if (skip_free_areas_node(filter, nid))
635 continue;
636 pgdat_resize_lock(pgdat, &flags);
637 present = pgdat->node_present_pages;
638 for(i = 0; i < pgdat->node_spanned_pages; i++) {
639 struct page *page;
640 if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
641 touch_nmi_watchdog();
642 if (pfn_valid(pgdat->node_start_pfn + i))
643 page = pfn_to_page(pgdat->node_start_pfn + i);
644 else {
645 i = vmemmap_find_next_valid_pfn(nid, i) - 1;
646 continue;
647 }
648 if (PageReserved(page))
649 reserved++;
650 else if (PageSwapCache(page))
651 cached++;
652 else if (page_count(page))
653 shared += page_count(page)-1;
654 }
655 pgdat_resize_unlock(pgdat, &flags);
656 total_present += present;
657 total_reserved += reserved;
658 total_cached += cached;
659 total_shared += shared;
660 printk(KERN_INFO "Node %4d: RAM: %11ld, rsvd: %8d, "
661 "shrd: %10d, swpd: %10d\n", nid,
662 present, reserved, shared, cached);
663 }
664 printk(KERN_INFO "%ld pages of RAM\n", total_present);
665 printk(KERN_INFO "%d reserved pages\n", total_reserved);
666 printk(KERN_INFO "%d pages shared\n", total_shared);
667 printk(KERN_INFO "%d pages swap cached\n", total_cached);
668 printk(KERN_INFO "Total of %ld pages in page table cache\n",
669 quicklist_total_size());
670 printk(KERN_INFO "%ld free buffer pages\n", nr_free_buffer_pages());
671}
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686void call_pernode_memory(unsigned long start, unsigned long len, void *arg)
687{
688 unsigned long rs, re, end = start + len;
689 void (*func)(unsigned long, unsigned long, int);
690 int i;
691
692 start = PAGE_ALIGN(start);
693 end &= PAGE_MASK;
694 if (start >= end)
695 return;
696
697 func = arg;
698
699 if (!num_node_memblks) {
700
701 if (start < end)
702 (*func)(start, end - start, 0);
703 return;
704 }
705
706 for (i = 0; i < num_node_memblks; i++) {
707 rs = max(start, node_memblk[i].start_paddr);
708 re = min(end, node_memblk[i].start_paddr +
709 node_memblk[i].size);
710
711 if (rs < re)
712 (*func)(rs, re - rs, node_memblk[i].nid);
713
714 if (re == end)
715 break;
716 }
717}
718
719
720
721
722
723
724
725
726
727
728
729
730static __init int count_node_pages(unsigned long start, unsigned long len, int node)
731{
732 unsigned long end = start + len;
733
734#ifdef CONFIG_ZONE_DMA
735 if (start <= __pa(MAX_DMA_ADDRESS))
736 mem_data[node].num_dma_physpages +=
737 (min(end, __pa(MAX_DMA_ADDRESS)) - start) >>PAGE_SHIFT;
738#endif
739 start = GRANULEROUNDDOWN(start);
740 end = GRANULEROUNDUP(end);
741 mem_data[node].max_pfn = max(mem_data[node].max_pfn,
742 end >> PAGE_SHIFT);
743 mem_data[node].min_pfn = min(mem_data[node].min_pfn,
744 start >> PAGE_SHIFT);
745
746 return 0;
747}
748
749
750
751
752
753
754
755void __init paging_init(void)
756{
757 unsigned long max_dma;
758 unsigned long pfn_offset = 0;
759 unsigned long max_pfn = 0;
760 int node;
761 unsigned long max_zone_pfns[MAX_NR_ZONES];
762
763 max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
764
765 efi_memmap_walk(filter_rsvd_memory, count_node_pages);
766
767 sparse_memory_present_with_active_regions(MAX_NUMNODES);
768 sparse_init();
769
770#ifdef CONFIG_VIRTUAL_MEM_MAP
771 VMALLOC_END -= PAGE_ALIGN(ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) *
772 sizeof(struct page));
773 vmem_map = (struct page *) VMALLOC_END;
774 efi_memmap_walk(create_mem_map_page_table, NULL);
775 printk("Virtual mem_map starts at 0x%p\n", vmem_map);
776#endif
777
778 for_each_online_node(node) {
779 pfn_offset = mem_data[node].min_pfn;
780
781#ifdef CONFIG_VIRTUAL_MEM_MAP
782 NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset;
783#endif
784 if (mem_data[node].max_pfn > max_pfn)
785 max_pfn = mem_data[node].max_pfn;
786 }
787
788 memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
789#ifdef CONFIG_ZONE_DMA
790 max_zone_pfns[ZONE_DMA] = max_dma;
791#endif
792 max_zone_pfns[ZONE_NORMAL] = max_pfn;
793 free_area_init_nodes(max_zone_pfns);
794
795 zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
796}
797
798#ifdef CONFIG_MEMORY_HOTPLUG
799pg_data_t *arch_alloc_nodedata(int nid)
800{
801 unsigned long size = compute_pernodesize(nid);
802
803 return kzalloc(size, GFP_KERNEL);
804}
805
806void arch_free_nodedata(pg_data_t *pgdat)
807{
808 kfree(pgdat);
809}
810
811void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat)
812{
813 pgdat_list[update_node] = update_pgdat;
814 scatter_node_data();
815}
816#endif
817
818#ifdef CONFIG_SPARSEMEM_VMEMMAP
819int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
820{
821 return vmemmap_populate_basepages(start, end, node);
822}
823
824void vmemmap_free(unsigned long start, unsigned long end)
825{
826}
827#endif
828