1
2
3
4
5
6
7
8
9
10
11
12#include <linux/kernel.h>
13#include <linux/acpi.h>
14#include <linux/mmzone.h>
15#include <linux/bitmap.h>
16#include <linux/module.h>
17#include <linux/topology.h>
18#include <linux/bootmem.h>
19#include <linux/mm.h>
20#include <asm/proto.h>
21#include <asm/numa.h>
22#include <asm/e820.h>
23#include <asm/apic.h>
24#include <asm/uv/uv.h>
25
26int acpi_numa __initdata;
27
28static struct acpi_table_slit *acpi_slit;
29
30static nodemask_t nodes_parsed __initdata;
31static nodemask_t cpu_nodes_parsed __initdata;
32static struct bootnode nodes[MAX_NUMNODES] __initdata;
33static struct bootnode nodes_add[MAX_NUMNODES];
34
35static int num_node_memblks __initdata;
36static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
37static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
38
39static __init int setup_node(int pxm)
40{
41 return acpi_map_pxm_to_node(pxm);
42}
43
44static __init int conflicting_memblks(unsigned long start, unsigned long end)
45{
46 int i;
47 for (i = 0; i < num_node_memblks; i++) {
48 struct bootnode *nd = &node_memblk_range[i];
49 if (nd->start == nd->end)
50 continue;
51 if (nd->end > start && nd->start < end)
52 return memblk_nodeid[i];
53 if (nd->end == end && nd->start == start)
54 return memblk_nodeid[i];
55 }
56 return -1;
57}
58
59static __init void cutoff_node(int i, unsigned long start, unsigned long end)
60{
61 struct bootnode *nd = &nodes[i];
62
63 if (nd->start < start) {
64 nd->start = start;
65 if (nd->end < nd->start)
66 nd->start = nd->end;
67 }
68 if (nd->end > end) {
69 nd->end = end;
70 if (nd->start > nd->end)
71 nd->start = nd->end;
72 }
73}
74
75static __init void bad_srat(void)
76{
77 int i;
78 printk(KERN_ERR "SRAT: SRAT not used.\n");
79 acpi_numa = -1;
80 for (i = 0; i < MAX_LOCAL_APIC; i++)
81 apicid_to_node[i] = NUMA_NO_NODE;
82 for (i = 0; i < MAX_NUMNODES; i++) {
83 nodes[i].start = nodes[i].end = 0;
84 nodes_add[i].start = nodes_add[i].end = 0;
85 }
86 remove_all_active_ranges();
87}
88
89static __init inline int srat_disabled(void)
90{
91 return numa_off || acpi_numa < 0;
92}
93
94
95void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
96{
97 unsigned length;
98 unsigned long phys;
99
100 length = slit->header.length;
101 phys = find_e820_area(0, max_pfn_mapped<<PAGE_SHIFT, length,
102 PAGE_SIZE);
103
104 if (phys == -1L)
105 panic(" Can not save slit!\n");
106
107 acpi_slit = __va(phys);
108 memcpy(acpi_slit, slit, length);
109 reserve_early(phys, phys + length, "ACPI SLIT");
110}
111
112
113void __init
114acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
115{
116 int pxm, node;
117 int apic_id;
118
119 if (srat_disabled())
120 return;
121 if (pa->header.length < sizeof(struct acpi_srat_x2apic_cpu_affinity)) {
122 bad_srat();
123 return;
124 }
125 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
126 return;
127 pxm = pa->proximity_domain;
128 node = setup_node(pxm);
129 if (node < 0) {
130 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
131 bad_srat();
132 return;
133 }
134
135 apic_id = pa->apic_id;
136 apicid_to_node[apic_id] = node;
137 node_set(node, cpu_nodes_parsed);
138 acpi_numa = 1;
139 printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
140 pxm, apic_id, node);
141}
142
143
144void __init
145acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
146{
147 int pxm, node;
148 int apic_id;
149
150 if (srat_disabled())
151 return;
152 if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
153 bad_srat();
154 return;
155 }
156 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
157 return;
158 pxm = pa->proximity_domain_lo;
159 node = setup_node(pxm);
160 if (node < 0) {
161 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
162 bad_srat();
163 return;
164 }
165
166 if (get_uv_system_type() >= UV_X2APIC)
167 apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
168 else
169 apic_id = pa->apic_id;
170 apicid_to_node[apic_id] = node;
171 node_set(node, cpu_nodes_parsed);
172 acpi_numa = 1;
173 printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
174 pxm, apic_id, node);
175}
176
177#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
178static inline int save_add_info(void) {return 1;}
179#else
180static inline int save_add_info(void) {return 0;}
181#endif
182
183
184
185
186static void __init
187update_nodes_add(int node, unsigned long start, unsigned long end)
188{
189 unsigned long s_pfn = start >> PAGE_SHIFT;
190 unsigned long e_pfn = end >> PAGE_SHIFT;
191 int changed = 0;
192 struct bootnode *nd = &nodes_add[node];
193
194
195
196
197
198
199
200 if ((signed long)(end - start) < NODE_MIN_SIZE) {
201 printk(KERN_ERR "SRAT: Hotplug area too small\n");
202 return;
203 }
204
205
206 if (absent_pages_in_range(s_pfn, e_pfn) != e_pfn - s_pfn) {
207 printk(KERN_ERR
208 "SRAT: Hotplug area %lu -> %lu has existing memory\n",
209 s_pfn, e_pfn);
210 return;
211 }
212
213
214
215 if (nd->start == nd->end) {
216 nd->start = start;
217 nd->end = end;
218 changed = 1;
219 } else {
220 if (nd->start == end) {
221 nd->start = start;
222 changed = 1;
223 }
224 if (nd->end == start) {
225 nd->end = end;
226 changed = 1;
227 }
228 if (!changed)
229 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
230 }
231
232 if (changed)
233 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
234 nd->start, nd->end);
235}
236
237
238void __init
239acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
240{
241 struct bootnode *nd, oldnode;
242 unsigned long start, end;
243 int node, pxm;
244 int i;
245
246 if (srat_disabled())
247 return;
248 if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
249 bad_srat();
250 return;
251 }
252 if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
253 return;
254
255 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
256 return;
257 start = ma->base_address;
258 end = start + ma->length;
259 pxm = ma->proximity_domain;
260 node = setup_node(pxm);
261 if (node < 0) {
262 printk(KERN_ERR "SRAT: Too many proximity domains.\n");
263 bad_srat();
264 return;
265 }
266 i = conflicting_memblks(start, end);
267 if (i == node) {
268 printk(KERN_WARNING
269 "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
270 pxm, start, end, nodes[i].start, nodes[i].end);
271 } else if (i >= 0) {
272 printk(KERN_ERR
273 "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n",
274 pxm, start, end, node_to_pxm(i),
275 nodes[i].start, nodes[i].end);
276 bad_srat();
277 return;
278 }
279 nd = &nodes[node];
280 oldnode = *nd;
281 if (!node_test_and_set(node, nodes_parsed)) {
282 nd->start = start;
283 nd->end = end;
284 } else {
285 if (start < nd->start)
286 nd->start = start;
287 if (nd->end < end)
288 nd->end = end;
289 }
290
291 printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
292 start, end);
293 e820_register_active_regions(node, start >> PAGE_SHIFT,
294 end >> PAGE_SHIFT);
295
296 if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
297 update_nodes_add(node, start, end);
298
299 *nd = oldnode;
300 if ((nd->start | nd->end) == 0)
301 node_clear(node, nodes_parsed);
302 }
303
304 node_memblk_range[num_node_memblks].start = start;
305 node_memblk_range[num_node_memblks].end = end;
306 memblk_nodeid[num_node_memblks] = node;
307 num_node_memblks++;
308}
309
310
311
312static int __init nodes_cover_memory(const struct bootnode *nodes)
313{
314 int i;
315 unsigned long pxmram, e820ram;
316
317 pxmram = 0;
318 for_each_node_mask(i, nodes_parsed) {
319 unsigned long s = nodes[i].start >> PAGE_SHIFT;
320 unsigned long e = nodes[i].end >> PAGE_SHIFT;
321 pxmram += e - s;
322 pxmram -= absent_pages_in_range(s, e);
323 if ((long)pxmram < 0)
324 pxmram = 0;
325 }
326
327 e820ram = max_pfn - (e820_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT);
328
329 if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) {
330 printk(KERN_ERR
331 "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
332 (pxmram << PAGE_SHIFT) >> 20,
333 (e820ram << PAGE_SHIFT) >> 20);
334 return 0;
335 }
336 return 1;
337}
338
339void __init acpi_numa_arch_fixup(void) {}
340
341
342int __init acpi_scan_nodes(unsigned long start, unsigned long end)
343{
344 int i;
345
346 if (acpi_numa <= 0)
347 return -1;
348
349
350 for (i = 0; i < MAX_NUMNODES; i++)
351 cutoff_node(i, start, end);
352
353 if (!nodes_cover_memory(nodes)) {
354 bad_srat();
355 return -1;
356 }
357
358 memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
359 memblk_nodeid);
360 if (memnode_shift < 0) {
361 printk(KERN_ERR
362 "SRAT: No NUMA node hash function found. Contact maintainer\n");
363 bad_srat();
364 return -1;
365 }
366
367
368 nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed);
369
370
371 for_each_node_mask(i, node_possible_map)
372 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
373
374
375 for_each_node_mask(i, node_possible_map)
376 if (!node_online(i))
377 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
378
379 for (i = 0; i < nr_cpu_ids; i++) {
380 int node = early_cpu_to_node(i);
381
382 if (node == NUMA_NO_NODE)
383 continue;
384 if (!node_online(node))
385 numa_clear_node(i);
386 }
387 numa_init_array();
388 return 0;
389}
390
391#ifdef CONFIG_NUMA_EMU
392static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = {
393 [0 ... MAX_NUMNODES-1] = PXM_INVAL
394};
395static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
396 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
397};
398static int __init find_node_by_addr(unsigned long addr)
399{
400 int ret = NUMA_NO_NODE;
401 int i;
402
403 for_each_node_mask(i, nodes_parsed) {
404
405
406
407
408
409 if (addr >= nodes[i].start && addr < nodes[i].end) {
410 ret = i;
411 break;
412 }
413 }
414 return ret;
415}
416
417
418
419
420
421
422
423
424
425void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
426{
427 int i, j;
428
429 printk(KERN_INFO "Faking PXM affinity for fake nodes on real "
430 "topology.\n");
431 for (i = 0; i < num_nodes; i++) {
432 int nid, pxm;
433
434 nid = find_node_by_addr(fake_nodes[i].start);
435 if (nid == NUMA_NO_NODE)
436 continue;
437 pxm = node_to_pxm(nid);
438 if (pxm == PXM_INVAL)
439 continue;
440 fake_node_to_pxm_map[i] = pxm;
441
442
443
444
445 for (j = 0; j < MAX_LOCAL_APIC; j++)
446 if (apicid_to_node[j] == nid)
447 fake_apicid_to_node[j] = i;
448 }
449 for (i = 0; i < num_nodes; i++)
450 __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
451 memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
452
453 nodes_clear(nodes_parsed);
454 for (i = 0; i < num_nodes; i++)
455 if (fake_nodes[i].start != fake_nodes[i].end)
456 node_set(i, nodes_parsed);
457 WARN_ON(!nodes_cover_memory(fake_nodes));
458}
459
460static int null_slit_node_compare(int a, int b)
461{
462 return node_to_pxm(a) == node_to_pxm(b);
463}
464#else
465static int null_slit_node_compare(int a, int b)
466{
467 return a == b;
468}
469#endif
470
471int __node_distance(int a, int b)
472{
473 int index;
474
475 if (!acpi_slit)
476 return null_slit_node_compare(a, b) ? LOCAL_DISTANCE :
477 REMOTE_DISTANCE;
478 index = acpi_slit->locality_count * node_to_pxm(a);
479 return acpi_slit->entry[index + node_to_pxm(b)];
480}
481
482EXPORT_SYMBOL(__node_distance);
483
484#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY)
485int memory_add_physaddr_to_nid(u64 start)
486{
487 int i, ret = 0;
488
489 for_each_node(i)
490 if (nodes_add[i].start <= start && nodes_add[i].end > start)
491 ret = i;
492
493 return ret;
494}
495EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
496#endif
497