1
2
3
4
5
6
7
8
9
10
11
12#include <linux/kernel.h>
13#include <linux/acpi.h>
14#include <linux/mmzone.h>
15#include <linux/bitmap.h>
16#include <linux/module.h>
17#include <linux/topology.h>
18#include <linux/bootmem.h>
19#include <linux/mm.h>
20#include <asm/proto.h>
21#include <asm/numa.h>
22#include <asm/e820.h>
23
24int acpi_numa __initdata;
25
26static struct acpi_table_slit *acpi_slit;
27
28static nodemask_t nodes_parsed __initdata;
29static struct bootnode nodes[MAX_NUMNODES] __initdata;
30static struct bootnode nodes_add[MAX_NUMNODES];
31static int found_add_area __initdata;
32int hotadd_percent __initdata = 0;
33
34
35
36#define NODE_MIN_SIZE (4*1024*1024)
37
38static __init int setup_node(int pxm)
39{
40 return acpi_map_pxm_to_node(pxm);
41}
42
43static __init int conflicting_nodes(unsigned long start, unsigned long end)
44{
45 int i;
46 for_each_node_mask(i, nodes_parsed) {
47 struct bootnode *nd = &nodes[i];
48 if (nd->start == nd->end)
49 continue;
50 if (nd->end > start && nd->start < end)
51 return i;
52 if (nd->end == end && nd->start == start)
53 return i;
54 }
55 return -1;
56}
57
58static __init void cutoff_node(int i, unsigned long start, unsigned long end)
59{
60 struct bootnode *nd = &nodes[i];
61
62 if (found_add_area)
63 return;
64
65 if (nd->start < start) {
66 nd->start = start;
67 if (nd->end < nd->start)
68 nd->start = nd->end;
69 }
70 if (nd->end > end) {
71 nd->end = end;
72 if (nd->start > nd->end)
73 nd->start = nd->end;
74 }
75}
76
77static __init void bad_srat(void)
78{
79 int i;
80 printk(KERN_ERR "SRAT: SRAT not used.\n");
81 acpi_numa = -1;
82 found_add_area = 0;
83 for (i = 0; i < MAX_LOCAL_APIC; i++)
84 apicid_to_node[i] = NUMA_NO_NODE;
85 for (i = 0; i < MAX_NUMNODES; i++)
86 nodes_add[i].start = nodes[i].end = 0;
87 remove_all_active_ranges();
88}
89
90static __init inline int srat_disabled(void)
91{
92 return numa_off || acpi_numa < 0;
93}
94
95
96
97
98
99
100
101static __init int slit_valid(struct acpi_table_slit *slit)
102{
103 int i, j;
104 int d = slit->locality_count;
105 for (i = 0; i < d; i++) {
106 for (j = 0; j < d; j++) {
107 u8 val = slit->entry[d*i + j];
108 if (i == j) {
109 if (val != LOCAL_DISTANCE)
110 return 0;
111 } else if (val <= LOCAL_DISTANCE)
112 return 0;
113 }
114 }
115 return 1;
116}
117
118
119void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
120{
121 if (!slit_valid(slit)) {
122 printk(KERN_INFO "ACPI: SLIT table looks invalid. Not used.\n");
123 return;
124 }
125 acpi_slit = slit;
126}
127
128
129void __init
130acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
131{
132 int pxm, node;
133 if (srat_disabled())
134 return;
135 if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
136 bad_srat();
137 return;
138 }
139 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
140 return;
141 pxm = pa->proximity_domain_lo;
142 node = setup_node(pxm);
143 if (node < 0) {
144 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
145 bad_srat();
146 return;
147 }
148 apicid_to_node[pa->apic_id] = node;
149 acpi_numa = 1;
150 printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
151 pxm, pa->apic_id, node);
152}
153
154#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
155
156
157
158static int hotadd_enough_memory(struct bootnode *nd)
159{
160 static unsigned long allocated;
161 static unsigned long last_area_end;
162 unsigned long pages = (nd->end - nd->start) >> PAGE_SHIFT;
163 long mem = pages * sizeof(struct page);
164 unsigned long addr;
165 unsigned long allowed;
166 unsigned long oldpages = pages;
167
168 if (mem < 0)
169 return 0;
170 allowed = (end_pfn - absent_pages_in_range(0, end_pfn)) * PAGE_SIZE;
171 allowed = (allowed / 100) * hotadd_percent;
172 if (allocated + mem > allowed) {
173 unsigned long range;
174
175
176
177
178 if (allocated >= allowed)
179 return 0;
180 range = allowed - allocated;
181 pages = (range / PAGE_SIZE);
182 mem = pages * sizeof(struct page);
183 nd->end = nd->start + range;
184 }
185
186 addr = find_e820_area(last_area_end, end_pfn<<PAGE_SHIFT, mem);
187 if (addr == -1UL)
188 return 0;
189 if (pages != oldpages)
190 printk(KERN_NOTICE "SRAT: Hotadd area limited to %lu bytes\n",
191 pages << PAGE_SHIFT);
192 last_area_end = addr + mem;
193 allocated += mem;
194 return 1;
195}
196
197static int update_end_of_memory(unsigned long end)
198{
199 found_add_area = 1;
200 if ((end >> PAGE_SHIFT) > end_pfn)
201 end_pfn = end >> PAGE_SHIFT;
202 return 1;
203}
204
205static inline int save_add_info(void)
206{
207 return hotadd_percent > 0;
208}
209#else
210int update_end_of_memory(unsigned long end) {return -1;}
211static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
212#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
213static inline int save_add_info(void) {return 1;}
214#else
215static inline int save_add_info(void) {return 0;}
216#endif
217#endif
218
219
220
221
222
223static int reserve_hotadd(int node, unsigned long start, unsigned long end)
224{
225 unsigned long s_pfn = start >> PAGE_SHIFT;
226 unsigned long e_pfn = end >> PAGE_SHIFT;
227 int ret = 0, changed = 0;
228 struct bootnode *nd = &nodes_add[node];
229
230
231
232
233
234
235
236 if ((signed long)(end - start) < NODE_MIN_SIZE) {
237 printk(KERN_ERR "SRAT: Hotplug area too small\n");
238 return -1;
239 }
240
241
242 if (absent_pages_in_range(s_pfn, e_pfn) != e_pfn - s_pfn) {
243 printk(KERN_ERR
244 "SRAT: Hotplug area %lu -> %lu has existing memory\n",
245 s_pfn, e_pfn);
246 return -1;
247 }
248
249 if (!hotadd_enough_memory(&nodes_add[node])) {
250 printk(KERN_ERR "SRAT: Hotplug area too large\n");
251 return -1;
252 }
253
254
255
256 if (nd->start == nd->end) {
257 nd->start = start;
258 nd->end = end;
259 changed = 1;
260 } else {
261 if (nd->start == end) {
262 nd->start = start;
263 changed = 1;
264 }
265 if (nd->end == start) {
266 nd->end = end;
267 changed = 1;
268 }
269 if (!changed)
270 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
271 }
272
273 ret = update_end_of_memory(nd->end);
274
275 if (changed)
276 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end);
277 return ret;
278}
279
280
281void __init
282acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
283{
284 struct bootnode *nd, oldnode;
285 unsigned long start, end;
286 int node, pxm;
287 int i;
288
289 if (srat_disabled())
290 return;
291 if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
292 bad_srat();
293 return;
294 }
295 if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
296 return;
297
298 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
299 return;
300 start = ma->base_address;
301 end = start + ma->length;
302 pxm = ma->proximity_domain;
303 node = setup_node(pxm);
304 if (node < 0) {
305 printk(KERN_ERR "SRAT: Too many proximity domains.\n");
306 bad_srat();
307 return;
308 }
309 i = conflicting_nodes(start, end);
310 if (i == node) {
311 printk(KERN_WARNING
312 "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
313 pxm, start, end, nodes[i].start, nodes[i].end);
314 } else if (i >= 0) {
315 printk(KERN_ERR
316 "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n",
317 pxm, start, end, node_to_pxm(i),
318 nodes[i].start, nodes[i].end);
319 bad_srat();
320 return;
321 }
322 nd = &nodes[node];
323 oldnode = *nd;
324 if (!node_test_and_set(node, nodes_parsed)) {
325 nd->start = start;
326 nd->end = end;
327 } else {
328 if (start < nd->start)
329 nd->start = start;
330 if (nd->end < end)
331 nd->end = end;
332 }
333
334 printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm,
335 nd->start, nd->end);
336 e820_register_active_regions(node, nd->start >> PAGE_SHIFT,
337 nd->end >> PAGE_SHIFT);
338 push_node_boundaries(node, nd->start >> PAGE_SHIFT,
339 nd->end >> PAGE_SHIFT);
340
341 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) &&
342 (reserve_hotadd(node, start, end) < 0)) {
343
344 printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
345 *nd = oldnode;
346 if ((nd->start | nd->end) == 0)
347 node_clear(node, nodes_parsed);
348 }
349}
350
351
352
353static int __init nodes_cover_memory(const struct bootnode *nodes)
354{
355 int i;
356 unsigned long pxmram, e820ram;
357
358 pxmram = 0;
359 for_each_node_mask(i, nodes_parsed) {
360 unsigned long s = nodes[i].start >> PAGE_SHIFT;
361 unsigned long e = nodes[i].end >> PAGE_SHIFT;
362 pxmram += e - s;
363 pxmram -= absent_pages_in_range(s, e);
364 if ((long)pxmram < 0)
365 pxmram = 0;
366 }
367
368 e820ram = end_pfn - absent_pages_in_range(0, end_pfn);
369
370 if ((long)(e820ram - pxmram) >= 1*1024*1024) {
371 printk(KERN_ERR
372 "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
373 (pxmram << PAGE_SHIFT) >> 20,
374 (e820ram << PAGE_SHIFT) >> 20);
375 return 0;
376 }
377 return 1;
378}
379
380static void unparse_node(int node)
381{
382 int i;
383 node_clear(node, nodes_parsed);
384 for (i = 0; i < MAX_LOCAL_APIC; i++) {
385 if (apicid_to_node[i] == node)
386 apicid_to_node[i] = NUMA_NO_NODE;
387 }
388}
389
390void __init acpi_numa_arch_fixup(void) {}
391
392
393int __init acpi_scan_nodes(unsigned long start, unsigned long end)
394{
395 int i;
396
397 if (acpi_numa <= 0)
398 return -1;
399
400
401 for (i = 0; i < MAX_NUMNODES; i++) {
402 cutoff_node(i, start, end);
403 if ((nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
404 unparse_node(i);
405 node_set_offline(i);
406 }
407 }
408
409 if (!nodes_cover_memory(nodes)) {
410 bad_srat();
411 return -1;
412 }
413
414 memnode_shift = compute_hash_shift(nodes, MAX_NUMNODES);
415 if (memnode_shift < 0) {
416 printk(KERN_ERR
417 "SRAT: No NUMA node hash function found. Contact maintainer\n");
418 bad_srat();
419 return -1;
420 }
421
422 node_possible_map = nodes_parsed;
423
424
425 for_each_node_mask(i, node_possible_map)
426 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
427
428
429 for_each_node_mask(i, node_possible_map)
430 if (!node_online(i))
431 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
432
433 for (i = 0; i < NR_CPUS; i++) {
434 if (cpu_to_node(i) == NUMA_NO_NODE)
435 continue;
436 if (!node_isset(cpu_to_node(i), node_possible_map))
437 numa_set_node(i, NUMA_NO_NODE);
438 }
439 numa_init_array();
440 return 0;
441}
442
443#ifdef CONFIG_NUMA_EMU
444static int __init find_node_by_addr(unsigned long addr)
445{
446 int ret = NUMA_NO_NODE;
447 int i;
448
449 for_each_node_mask(i, nodes_parsed) {
450
451
452
453
454
455 if (addr >= nodes[i].start && addr < nodes[i].end) {
456 ret = i;
457 break;
458 }
459 }
460 return i;
461}
462
463
464
465
466
467
468
469
470
471void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
472{
473 int i, j;
474 int fake_node_to_pxm_map[MAX_NUMNODES] = {
475 [0 ... MAX_NUMNODES-1] = PXM_INVAL
476 };
477 unsigned char fake_apicid_to_node[MAX_LOCAL_APIC] = {
478 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
479 };
480
481 printk(KERN_INFO "Faking PXM affinity for fake nodes on real "
482 "topology.\n");
483 for (i = 0; i < num_nodes; i++) {
484 int nid, pxm;
485
486 nid = find_node_by_addr(fake_nodes[i].start);
487 if (nid == NUMA_NO_NODE)
488 continue;
489 pxm = node_to_pxm(nid);
490 if (pxm == PXM_INVAL)
491 continue;
492 fake_node_to_pxm_map[i] = pxm;
493
494
495
496
497 for (j = 0; j < MAX_LOCAL_APIC; j++)
498 if (apicid_to_node[j] == nid)
499 fake_apicid_to_node[j] = i;
500 }
501 for (i = 0; i < num_nodes; i++)
502 __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
503 memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
504
505 nodes_clear(nodes_parsed);
506 for (i = 0; i < num_nodes; i++)
507 if (fake_nodes[i].start != fake_nodes[i].end)
508 node_set(i, nodes_parsed);
509 WARN_ON(!nodes_cover_memory(fake_nodes));
510}
511
512static int null_slit_node_compare(int a, int b)
513{
514 return node_to_pxm(a) == node_to_pxm(b);
515}
516#else
517static int null_slit_node_compare(int a, int b)
518{
519 return a == b;
520}
521#endif
522
523void __init srat_reserve_add_area(int nodeid)
524{
525 if (found_add_area && nodes_add[nodeid].end) {
526 u64 total_mb;
527
528 printk(KERN_INFO "SRAT: Reserving hot-add memory space "
529 "for node %d at %Lx-%Lx\n",
530 nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end);
531 total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start)
532 >> PAGE_SHIFT;
533 total_mb *= sizeof(struct page);
534 total_mb >>= 20;
535 printk(KERN_INFO "SRAT: This will cost you %Lu MB of "
536 "pre-allocated memory.\n", (unsigned long long)total_mb);
537 reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start,
538 nodes_add[nodeid].end - nodes_add[nodeid].start);
539 }
540}
541
542int __node_distance(int a, int b)
543{
544 int index;
545
546 if (!acpi_slit)
547 return null_slit_node_compare(a, b) ? LOCAL_DISTANCE :
548 REMOTE_DISTANCE;
549 index = acpi_slit->locality_count * node_to_pxm(a);
550 return acpi_slit->entry[index + node_to_pxm(b)];
551}
552
553EXPORT_SYMBOL(__node_distance);
554
555int memory_add_physaddr_to_nid(u64 start)
556{
557 int i, ret = 0;
558
559 for_each_node(i)
560 if (nodes_add[i].start <= start && nodes_add[i].end > start)
561 ret = i;
562
563 return ret;
564}
565EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
566
567