linux/drivers/acpi/numa.c
<<
>>
Prefs
   1/*
   2 *  acpi_numa.c - ACPI NUMA support
   3 *
   4 *  Copyright (C) 2002 Takayoshi Kochi <t-kochi@bq.jp.nec.com>
   5 *
   6 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   7 *
   8 *  This program is free software; you can redistribute it and/or modify
   9 *  it under the terms of the GNU General Public License as published by
  10 *  the Free Software Foundation; either version 2 of the License, or
  11 *  (at your option) any later version.
  12 *
  13 *  This program is distributed in the hope that it will be useful,
  14 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 *  GNU General Public License for more details.
  17 *
  18 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  19 *
  20 */
  21
  22#define pr_fmt(fmt) "ACPI: " fmt
  23
  24#include <linux/module.h>
  25#include <linux/init.h>
  26#include <linux/kernel.h>
  27#include <linux/types.h>
  28#include <linux/errno.h>
  29#include <linux/acpi.h>
  30#include <linux/bootmem.h>
  31#include <linux/memblock.h>
  32#include <linux/numa.h>
  33#include <linux/nodemask.h>
  34#include <linux/topology.h>
  35
  36static nodemask_t nodes_found_map = NODE_MASK_NONE;
  37
  38/* maps to convert between proximity domain and logical node ID */
  39static int pxm_to_node_map[MAX_PXM_DOMAINS]
  40                        = { [0 ... MAX_PXM_DOMAINS - 1] = NUMA_NO_NODE };
  41static int node_to_pxm_map[MAX_NUMNODES]
  42                        = { [0 ... MAX_NUMNODES - 1] = PXM_INVAL };
  43
  44unsigned char acpi_srat_revision __initdata;
  45int acpi_numa __initdata;
  46
  47int pxm_to_node(int pxm)
  48{
  49        if (pxm < 0)
  50                return NUMA_NO_NODE;
  51        return pxm_to_node_map[pxm];
  52}
  53
  54int node_to_pxm(int node)
  55{
  56        if (node < 0)
  57                return PXM_INVAL;
  58        return node_to_pxm_map[node];
  59}
  60
  61static void __acpi_map_pxm_to_node(int pxm, int node)
  62{
  63        if (pxm_to_node_map[pxm] == NUMA_NO_NODE || node < pxm_to_node_map[pxm])
  64                pxm_to_node_map[pxm] = node;
  65        if (node_to_pxm_map[node] == PXM_INVAL || pxm < node_to_pxm_map[node])
  66                node_to_pxm_map[node] = pxm;
  67}
  68
  69int acpi_map_pxm_to_node(int pxm)
  70{
  71        int node;
  72
  73        if (pxm < 0 || pxm >= MAX_PXM_DOMAINS || numa_off)
  74                return NUMA_NO_NODE;
  75
  76        node = pxm_to_node_map[pxm];
  77
  78        if (node == NUMA_NO_NODE) {
  79                if (nodes_weight(nodes_found_map) >= MAX_NUMNODES)
  80                        return NUMA_NO_NODE;
  81                node = first_unset_node(nodes_found_map);
  82                __acpi_map_pxm_to_node(pxm, node);
  83                node_set(node, nodes_found_map);
  84        }
  85
  86        return node;
  87}
  88
  89/**
  90 * acpi_map_pxm_to_online_node - Map proximity ID to online node
  91 * @pxm: ACPI proximity ID
  92 *
  93 * This is similar to acpi_map_pxm_to_node(), but always returns an online
  94 * node.  When the mapped node from a given proximity ID is offline, it
  95 * looks up the node distance table and returns the nearest online node.
  96 *
  97 * ACPI device drivers, which are called after the NUMA initialization has
  98 * completed in the kernel, can call this interface to obtain their device
  99 * NUMA topology from ACPI tables.  Such drivers do not have to deal with
 100 * offline nodes.  A node may be offline when a device proximity ID is
 101 * unique, SRAT memory entry does not exist, or NUMA is disabled, ex.
 102 * "numa=off" on x86.
 103 */
 104int acpi_map_pxm_to_online_node(int pxm)
 105{
 106        int node, min_node;
 107
 108        node = acpi_map_pxm_to_node(pxm);
 109
 110        if (node == NUMA_NO_NODE)
 111                node = 0;
 112
 113        min_node = node;
 114        if (!node_online(node)) {
 115                int min_dist = INT_MAX, dist, n;
 116
 117                for_each_online_node(n) {
 118                        dist = node_distance(node, n);
 119                        if (dist < min_dist) {
 120                                min_dist = dist;
 121                                min_node = n;
 122                        }
 123                }
 124        }
 125
 126        return min_node;
 127}
 128EXPORT_SYMBOL(acpi_map_pxm_to_online_node);
 129
 130static void __init
 131acpi_table_print_srat_entry(struct acpi_subtable_header *header)
 132{
 133        switch (header->type) {
 134        case ACPI_SRAT_TYPE_CPU_AFFINITY:
 135                {
 136                        struct acpi_srat_cpu_affinity *p =
 137                            (struct acpi_srat_cpu_affinity *)header;
 138                        pr_debug("SRAT Processor (id[0x%02x] eid[0x%02x]) in proximity domain %d %s\n",
 139                                 p->apic_id, p->local_sapic_eid,
 140                                 p->proximity_domain_lo,
 141                                 (p->flags & ACPI_SRAT_CPU_ENABLED) ?
 142                                 "enabled" : "disabled");
 143                }
 144                break;
 145
 146        case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
 147                {
 148                        struct acpi_srat_mem_affinity *p =
 149                            (struct acpi_srat_mem_affinity *)header;
 150                        pr_debug("SRAT Memory (0x%lx length 0x%lx) in proximity domain %d %s%s%s\n",
 151                                 (unsigned long)p->base_address,
 152                                 (unsigned long)p->length,
 153                                 p->proximity_domain,
 154                                 (p->flags & ACPI_SRAT_MEM_ENABLED) ?
 155                                 "enabled" : "disabled",
 156                                 (p->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ?
 157                                 " hot-pluggable" : "",
 158                                 (p->flags & ACPI_SRAT_MEM_NON_VOLATILE) ?
 159                                 " non-volatile" : "");
 160                }
 161                break;
 162
 163        case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
 164                {
 165                        struct acpi_srat_x2apic_cpu_affinity *p =
 166                            (struct acpi_srat_x2apic_cpu_affinity *)header;
 167                        pr_debug("SRAT Processor (x2apicid[0x%08x]) in proximity domain %d %s\n",
 168                                 p->apic_id,
 169                                 p->proximity_domain,
 170                                 (p->flags & ACPI_SRAT_CPU_ENABLED) ?
 171                                 "enabled" : "disabled");
 172                }
 173                break;
 174
 175        case ACPI_SRAT_TYPE_GICC_AFFINITY:
 176                {
 177                        struct acpi_srat_gicc_affinity *p =
 178                            (struct acpi_srat_gicc_affinity *)header;
 179                        pr_debug("SRAT Processor (acpi id[0x%04x]) in proximity domain %d %s\n",
 180                                 p->acpi_processor_uid,
 181                                 p->proximity_domain,
 182                                 (p->flags & ACPI_SRAT_GICC_ENABLED) ?
 183                                 "enabled" : "disabled");
 184                }
 185                break;
 186
 187        default:
 188                pr_warn("Found unsupported SRAT entry (type = 0x%x)\n",
 189                        header->type);
 190                break;
 191        }
 192}
 193
 194/*
 195 * A lot of BIOS fill in 10 (= no distance) everywhere. This messes
 196 * up the NUMA heuristics which wants the local node to have a smaller
 197 * distance than the others.
 198 * Do some quick checks here and only use the SLIT if it passes.
 199 */
 200static int __init slit_valid(struct acpi_table_slit *slit)
 201{
 202        int i, j;
 203        int d = slit->locality_count;
 204        for (i = 0; i < d; i++) {
 205                for (j = 0; j < d; j++)  {
 206                        u8 val = slit->entry[d*i + j];
 207                        if (i == j) {
 208                                if (val != LOCAL_DISTANCE)
 209                                        return 0;
 210                        } else if (val <= LOCAL_DISTANCE)
 211                                return 0;
 212                }
 213        }
 214        return 1;
 215}
 216
 217void __init bad_srat(void)
 218{
 219        pr_err("SRAT: SRAT not used.\n");
 220        acpi_numa = -1;
 221}
 222
 223int __init srat_disabled(void)
 224{
 225        return acpi_numa < 0;
 226}
 227
 228#if defined(CONFIG_X86) || defined(CONFIG_ARM64)
 229/*
 230 * Callback for SLIT parsing.  pxm_to_node() returns NUMA_NO_NODE for
 231 * I/O localities since SRAT does not list them.  I/O localities are
 232 * not supported at this point.
 233 */
 234void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
 235{
 236        int i, j;
 237
 238        for (i = 0; i < slit->locality_count; i++) {
 239                const int from_node = pxm_to_node(i);
 240
 241                if (from_node == NUMA_NO_NODE)
 242                        continue;
 243
 244                for (j = 0; j < slit->locality_count; j++) {
 245                        const int to_node = pxm_to_node(j);
 246
 247                        if (to_node == NUMA_NO_NODE)
 248                                continue;
 249
 250                        numa_set_distance(from_node, to_node,
 251                                slit->entry[slit->locality_count * i + j]);
 252                }
 253        }
 254}
 255
 256/*
 257 * Default callback for parsing of the Proximity Domain <-> Memory
 258 * Area mappings
 259 */
 260int __init
 261acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 262{
 263        u64 start, end;
 264        u32 hotpluggable;
 265        int node, pxm;
 266
 267        if (srat_disabled())
 268                goto out_err;
 269        if (ma->header.length < sizeof(struct acpi_srat_mem_affinity)) {
 270                pr_err("SRAT: Unexpected header length: %d\n",
 271                       ma->header.length);
 272                goto out_err_bad_srat;
 273        }
 274        if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
 275                goto out_err;
 276        hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE;
 277        if (hotpluggable && !IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
 278                goto out_err;
 279
 280        start = ma->base_address;
 281        end = start + ma->length;
 282        pxm = ma->proximity_domain;
 283        if (acpi_srat_revision <= 1)
 284                pxm &= 0xff;
 285
 286        node = acpi_map_pxm_to_node(pxm);
 287        if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
 288                pr_err("SRAT: Too many proximity domains.\n");
 289                goto out_err_bad_srat;
 290        }
 291
 292        if (numa_add_memblk(node, start, end) < 0) {
 293                pr_err("SRAT: Failed to add memblk to node %u [mem %#010Lx-%#010Lx]\n",
 294                       node, (unsigned long long) start,
 295                       (unsigned long long) end - 1);
 296                goto out_err_bad_srat;
 297        }
 298
 299        node_set(node, numa_nodes_parsed);
 300
 301        pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n",
 302                node, pxm,
 303                (unsigned long long) start, (unsigned long long) end - 1,
 304                hotpluggable ? " hotplug" : "",
 305                ma->flags & ACPI_SRAT_MEM_NON_VOLATILE ? " non-volatile" : "");
 306
 307        /* Mark hotplug range in memblock. */
 308        if (hotpluggable && memblock_mark_hotplug(start, ma->length))
 309                pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
 310                        (unsigned long long)start, (unsigned long long)end - 1);
 311
 312        max_possible_pfn = max(max_possible_pfn, PFN_UP(end - 1));
 313
 314        return 0;
 315out_err_bad_srat:
 316        bad_srat();
 317out_err:
 318        return -EINVAL;
 319}
 320#endif /* defined(CONFIG_X86) || defined (CONFIG_ARM64) */
 321
 322static int __init acpi_parse_slit(struct acpi_table_header *table)
 323{
 324        struct acpi_table_slit *slit = (struct acpi_table_slit *)table;
 325
 326        if (!slit_valid(slit)) {
 327                pr_info("SLIT table looks invalid. Not used.\n");
 328                return -EINVAL;
 329        }
 330        acpi_numa_slit_init(slit);
 331
 332        return 0;
 333}
 334
 335void __init __weak
 336acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
 337{
 338        pr_warn("Found unsupported x2apic [0x%08x] SRAT entry\n", pa->apic_id);
 339}
 340
 341static int __init
 342acpi_parse_x2apic_affinity(struct acpi_subtable_header *header,
 343                           const unsigned long end)
 344{
 345        struct acpi_srat_x2apic_cpu_affinity *processor_affinity;
 346
 347        processor_affinity = (struct acpi_srat_x2apic_cpu_affinity *)header;
 348        if (!processor_affinity)
 349                return -EINVAL;
 350
 351        acpi_table_print_srat_entry(header);
 352
 353        /* let architecture-dependent part to do it */
 354        acpi_numa_x2apic_affinity_init(processor_affinity);
 355
 356        return 0;
 357}
 358
 359static int __init
 360acpi_parse_processor_affinity(struct acpi_subtable_header *header,
 361                              const unsigned long end)
 362{
 363        struct acpi_srat_cpu_affinity *processor_affinity;
 364
 365        processor_affinity = (struct acpi_srat_cpu_affinity *)header;
 366        if (!processor_affinity)
 367                return -EINVAL;
 368
 369        acpi_table_print_srat_entry(header);
 370
 371        /* let architecture-dependent part to do it */
 372        acpi_numa_processor_affinity_init(processor_affinity);
 373
 374        return 0;
 375}
 376
 377static int __init
 378acpi_parse_gicc_affinity(struct acpi_subtable_header *header,
 379                         const unsigned long end)
 380{
 381        struct acpi_srat_gicc_affinity *processor_affinity;
 382
 383        processor_affinity = (struct acpi_srat_gicc_affinity *)header;
 384        if (!processor_affinity)
 385                return -EINVAL;
 386
 387        acpi_table_print_srat_entry(header);
 388
 389        /* let architecture-dependent part to do it */
 390        acpi_numa_gicc_affinity_init(processor_affinity);
 391
 392        return 0;
 393}
 394
 395static int __initdata parsed_numa_memblks;
 396
 397static int __init
 398acpi_parse_memory_affinity(struct acpi_subtable_header * header,
 399                           const unsigned long end)
 400{
 401        struct acpi_srat_mem_affinity *memory_affinity;
 402
 403        memory_affinity = (struct acpi_srat_mem_affinity *)header;
 404        if (!memory_affinity)
 405                return -EINVAL;
 406
 407        acpi_table_print_srat_entry(header);
 408
 409        /* let architecture-dependent part to do it */
 410        if (!acpi_numa_memory_affinity_init(memory_affinity))
 411                parsed_numa_memblks++;
 412        return 0;
 413}
 414
 415static int __init acpi_parse_srat(struct acpi_table_header *table)
 416{
 417        struct acpi_table_srat *srat = (struct acpi_table_srat *)table;
 418
 419        acpi_srat_revision = srat->header.revision;
 420
 421        /* Real work done in acpi_table_parse_srat below. */
 422
 423        return 0;
 424}
 425
 426static int __init
 427acpi_table_parse_srat(enum acpi_srat_type id,
 428                      acpi_tbl_entry_handler handler, unsigned int max_entries)
 429{
 430        return acpi_table_parse_entries(ACPI_SIG_SRAT,
 431                                            sizeof(struct acpi_table_srat), id,
 432                                            handler, max_entries);
 433}
 434
 435int __init acpi_numa_init(void)
 436{
 437        int cnt = 0;
 438
 439        if (acpi_disabled)
 440                return -EINVAL;
 441
 442        /*
 443         * Should not limit number with cpu num that is from NR_CPUS or nr_cpus=
 444         * SRAT cpu entries could have different order with that in MADT.
 445         * So go over all cpu entries in SRAT to get apicid to node mapping.
 446         */
 447
 448        /* SRAT: System Resource Affinity Table */
 449        if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
 450                struct acpi_subtable_proc srat_proc[3];
 451
 452                memset(srat_proc, 0, sizeof(srat_proc));
 453                srat_proc[0].id = ACPI_SRAT_TYPE_CPU_AFFINITY;
 454                srat_proc[0].handler = acpi_parse_processor_affinity;
 455                srat_proc[1].id = ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY;
 456                srat_proc[1].handler = acpi_parse_x2apic_affinity;
 457                srat_proc[2].id = ACPI_SRAT_TYPE_GICC_AFFINITY;
 458                srat_proc[2].handler = acpi_parse_gicc_affinity;
 459
 460                acpi_table_parse_entries_array(ACPI_SIG_SRAT,
 461                                        sizeof(struct acpi_table_srat),
 462                                        srat_proc, ARRAY_SIZE(srat_proc), 0);
 463
 464                cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
 465                                            acpi_parse_memory_affinity, 0);
 466        }
 467
 468        /* SLIT: System Locality Information Table */
 469        acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit);
 470
 471        if (cnt < 0)
 472                return cnt;
 473        else if (!parsed_numa_memblks)
 474                return -ENOENT;
 475        return 0;
 476}
 477
 478static int acpi_get_pxm(acpi_handle h)
 479{
 480        unsigned long long pxm;
 481        acpi_status status;
 482        acpi_handle handle;
 483        acpi_handle phandle = h;
 484
 485        do {
 486                handle = phandle;
 487                status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm);
 488                if (ACPI_SUCCESS(status))
 489                        return pxm;
 490                status = acpi_get_parent(handle, &phandle);
 491        } while (ACPI_SUCCESS(status));
 492        return -1;
 493}
 494
 495int acpi_get_node(acpi_handle handle)
 496{
 497        int pxm;
 498
 499        pxm = acpi_get_pxm(handle);
 500
 501        return acpi_map_pxm_to_node(pxm);
 502}
 503EXPORT_SYMBOL(acpi_get_node);
 504