linux/drivers/acpi/numa.c
<<
>>
Prefs
   1/*
   2 *  acpi_numa.c - ACPI NUMA support
   3 *
   4 *  Copyright (C) 2002 Takayoshi Kochi <t-kochi@bq.jp.nec.com>
   5 *
   6 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   7 *
   8 *  This program is free software; you can redistribute it and/or modify
   9 *  it under the terms of the GNU General Public License as published by
  10 *  the Free Software Foundation; either version 2 of the License, or
  11 *  (at your option) any later version.
  12 *
  13 *  This program is distributed in the hope that it will be useful,
  14 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 *  GNU General Public License for more details.
  17 *
  18 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  19 *
  20 */
  21
  22#define pr_fmt(fmt) "ACPI: " fmt
  23
  24#include <linux/module.h>
  25#include <linux/init.h>
  26#include <linux/kernel.h>
  27#include <linux/types.h>
  28#include <linux/errno.h>
  29#include <linux/acpi.h>
  30#include <linux/bootmem.h>
  31#include <linux/memblock.h>
  32#include <linux/numa.h>
  33#include <linux/nodemask.h>
  34#include <linux/topology.h>
  35
  36static nodemask_t nodes_found_map = NODE_MASK_NONE;
  37
  38/* maps to convert between proximity domain and logical node ID */
  39static int pxm_to_node_map[MAX_PXM_DOMAINS]
  40                        = { [0 ... MAX_PXM_DOMAINS - 1] = NUMA_NO_NODE };
  41static int node_to_pxm_map[MAX_NUMNODES]
  42                        = { [0 ... MAX_NUMNODES - 1] = PXM_INVAL };
  43
  44unsigned char acpi_srat_revision __initdata;
  45int acpi_numa __initdata;
  46
  47int pxm_to_node(int pxm)
  48{
  49        if (pxm < 0)
  50                return NUMA_NO_NODE;
  51        return pxm_to_node_map[pxm];
  52}
  53
  54int node_to_pxm(int node)
  55{
  56        if (node < 0)
  57                return PXM_INVAL;
  58        return node_to_pxm_map[node];
  59}
  60
  61static void __acpi_map_pxm_to_node(int pxm, int node)
  62{
  63        if (pxm_to_node_map[pxm] == NUMA_NO_NODE || node < pxm_to_node_map[pxm])
  64                pxm_to_node_map[pxm] = node;
  65        if (node_to_pxm_map[node] == PXM_INVAL || pxm < node_to_pxm_map[node])
  66                node_to_pxm_map[node] = pxm;
  67}
  68
  69int acpi_map_pxm_to_node(int pxm)
  70{
  71        int node;
  72
  73        if (pxm < 0 || pxm >= MAX_PXM_DOMAINS)
  74                return NUMA_NO_NODE;
  75
  76        node = pxm_to_node_map[pxm];
  77
  78        if (node == NUMA_NO_NODE) {
  79                if (nodes_weight(nodes_found_map) >= MAX_NUMNODES)
  80                        return NUMA_NO_NODE;
  81                node = first_unset_node(nodes_found_map);
  82                __acpi_map_pxm_to_node(pxm, node);
  83                node_set(node, nodes_found_map);
  84        }
  85
  86        return node;
  87}
  88
  89/**
  90 * acpi_map_pxm_to_online_node - Map proximity ID to online node
  91 * @pxm: ACPI proximity ID
  92 *
  93 * This is similar to acpi_map_pxm_to_node(), but always returns an online
  94 * node.  When the mapped node from a given proximity ID is offline, it
  95 * looks up the node distance table and returns the nearest online node.
  96 *
  97 * ACPI device drivers, which are called after the NUMA initialization has
  98 * completed in the kernel, can call this interface to obtain their device
  99 * NUMA topology from ACPI tables.  Such drivers do not have to deal with
 100 * offline nodes.  A node may be offline when a device proximity ID is
 101 * unique, SRAT memory entry does not exist, or NUMA is disabled, ex.
 102 * "numa=off" on x86.
 103 */
 104int acpi_map_pxm_to_online_node(int pxm)
 105{
 106        int node, n, dist, min_dist;
 107
 108        node = acpi_map_pxm_to_node(pxm);
 109
 110        if (node == NUMA_NO_NODE)
 111                node = 0;
 112
 113        if (!node_online(node)) {
 114                min_dist = INT_MAX;
 115                for_each_online_node(n) {
 116                        dist = node_distance(node, n);
 117                        if (dist < min_dist) {
 118                                min_dist = dist;
 119                                node = n;
 120                        }
 121                }
 122        }
 123
 124        return node;
 125}
 126EXPORT_SYMBOL(acpi_map_pxm_to_online_node);
 127
 128static void __init
 129acpi_table_print_srat_entry(struct acpi_subtable_header *header)
 130{
 131        switch (header->type) {
 132        case ACPI_SRAT_TYPE_CPU_AFFINITY:
 133                {
 134                        struct acpi_srat_cpu_affinity *p =
 135                            (struct acpi_srat_cpu_affinity *)header;
 136                        pr_debug("SRAT Processor (id[0x%02x] eid[0x%02x]) in proximity domain %d %s\n",
 137                                 p->apic_id, p->local_sapic_eid,
 138                                 p->proximity_domain_lo,
 139                                 (p->flags & ACPI_SRAT_CPU_ENABLED) ?
 140                                 "enabled" : "disabled");
 141                }
 142                break;
 143
 144        case ACPI_SRAT_TYPE_MEMORY_AFFINITY:
 145                {
 146                        struct acpi_srat_mem_affinity *p =
 147                            (struct acpi_srat_mem_affinity *)header;
 148                        pr_debug("SRAT Memory (0x%lx length 0x%lx) in proximity domain %d %s%s%s\n",
 149                                 (unsigned long)p->base_address,
 150                                 (unsigned long)p->length,
 151                                 p->proximity_domain,
 152                                 (p->flags & ACPI_SRAT_MEM_ENABLED) ?
 153                                 "enabled" : "disabled",
 154                                 (p->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ?
 155                                 " hot-pluggable" : "",
 156                                 (p->flags & ACPI_SRAT_MEM_NON_VOLATILE) ?
 157                                 " non-volatile" : "");
 158                }
 159                break;
 160
 161        case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY:
 162                {
 163                        struct acpi_srat_x2apic_cpu_affinity *p =
 164                            (struct acpi_srat_x2apic_cpu_affinity *)header;
 165                        pr_debug("SRAT Processor (x2apicid[0x%08x]) in proximity domain %d %s\n",
 166                                 p->apic_id,
 167                                 p->proximity_domain,
 168                                 (p->flags & ACPI_SRAT_CPU_ENABLED) ?
 169                                 "enabled" : "disabled");
 170                }
 171                break;
 172
 173        case ACPI_SRAT_TYPE_GICC_AFFINITY:
 174                {
 175                        struct acpi_srat_gicc_affinity *p =
 176                            (struct acpi_srat_gicc_affinity *)header;
 177                        pr_debug("SRAT Processor (acpi id[0x%04x]) in proximity domain %d %s\n",
 178                                 p->acpi_processor_uid,
 179                                 p->proximity_domain,
 180                                 (p->flags & ACPI_SRAT_GICC_ENABLED) ?
 181                                 "enabled" : "disabled");
 182                }
 183                break;
 184
 185        default:
 186                pr_warn("Found unsupported SRAT entry (type = 0x%x)\n",
 187                        header->type);
 188                break;
 189        }
 190}
 191
 192/*
 193 * A lot of BIOS fill in 10 (= no distance) everywhere. This messes
 194 * up the NUMA heuristics which wants the local node to have a smaller
 195 * distance than the others.
 196 * Do some quick checks here and only use the SLIT if it passes.
 197 */
 198static int __init slit_valid(struct acpi_table_slit *slit)
 199{
 200        int i, j;
 201        int d = slit->locality_count;
 202        for (i = 0; i < d; i++) {
 203                for (j = 0; j < d; j++)  {
 204                        u8 val = slit->entry[d*i + j];
 205                        if (i == j) {
 206                                if (val != LOCAL_DISTANCE)
 207                                        return 0;
 208                        } else if (val <= LOCAL_DISTANCE)
 209                                return 0;
 210                }
 211        }
 212        return 1;
 213}
 214
 215void __init bad_srat(void)
 216{
 217        pr_err("SRAT: SRAT not used.\n");
 218        acpi_numa = -1;
 219}
 220
 221int __init srat_disabled(void)
 222{
 223        return acpi_numa < 0;
 224}
 225
 226#if defined(CONFIG_X86) || defined(CONFIG_ARM64)
 227/*
 228 * Callback for SLIT parsing.  pxm_to_node() returns NUMA_NO_NODE for
 229 * I/O localities since SRAT does not list them.  I/O localities are
 230 * not supported at this point.
 231 */
 232void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
 233{
 234        int i, j;
 235
 236        for (i = 0; i < slit->locality_count; i++) {
 237                const int from_node = pxm_to_node(i);
 238
 239                if (from_node == NUMA_NO_NODE)
 240                        continue;
 241
 242                for (j = 0; j < slit->locality_count; j++) {
 243                        const int to_node = pxm_to_node(j);
 244
 245                        if (to_node == NUMA_NO_NODE)
 246                                continue;
 247
 248                        numa_set_distance(from_node, to_node,
 249                                slit->entry[slit->locality_count * i + j]);
 250                }
 251        }
 252}
 253
 254/*
 255 * Default callback for parsing of the Proximity Domain <-> Memory
 256 * Area mappings
 257 */
 258int __init
 259acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
 260{
 261        u64 start, end;
 262        u32 hotpluggable;
 263        int node, pxm;
 264
 265        if (srat_disabled())
 266                goto out_err;
 267        if (ma->header.length < sizeof(struct acpi_srat_mem_affinity)) {
 268                pr_err("SRAT: Unexpected header length: %d\n",
 269                       ma->header.length);
 270                goto out_err_bad_srat;
 271        }
 272        if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
 273                goto out_err;
 274        hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE;
 275        if (hotpluggable && !IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
 276                goto out_err;
 277
 278        start = ma->base_address;
 279        end = start + ma->length;
 280        pxm = ma->proximity_domain;
 281        if (acpi_srat_revision <= 1)
 282                pxm &= 0xff;
 283
 284        node = acpi_map_pxm_to_node(pxm);
 285        if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
 286                pr_err("SRAT: Too many proximity domains.\n");
 287                goto out_err_bad_srat;
 288        }
 289
 290        if (numa_add_memblk(node, start, end) < 0) {
 291                pr_err("SRAT: Failed to add memblk to node %u [mem %#010Lx-%#010Lx]\n",
 292                       node, (unsigned long long) start,
 293                       (unsigned long long) end - 1);
 294                goto out_err_bad_srat;
 295        }
 296
 297        node_set(node, numa_nodes_parsed);
 298
 299        pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n",
 300                node, pxm,
 301                (unsigned long long) start, (unsigned long long) end - 1,
 302                hotpluggable ? " hotplug" : "",
 303                ma->flags & ACPI_SRAT_MEM_NON_VOLATILE ? " non-volatile" : "");
 304
 305        /* Mark hotplug range in memblock. */
 306        if (hotpluggable && memblock_mark_hotplug(start, ma->length))
 307                pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n",
 308                        (unsigned long long)start, (unsigned long long)end - 1);
 309
 310        max_possible_pfn = max(max_possible_pfn, PFN_UP(end - 1));
 311
 312        return 0;
 313out_err_bad_srat:
 314        bad_srat();
 315out_err:
 316        return -EINVAL;
 317}
 318#endif /* defined(CONFIG_X86) || defined (CONFIG_ARM64) */
 319
 320static int __init acpi_parse_slit(struct acpi_table_header *table)
 321{
 322        struct acpi_table_slit *slit = (struct acpi_table_slit *)table;
 323
 324        if (!slit_valid(slit)) {
 325                pr_info("SLIT table looks invalid. Not used.\n");
 326                return -EINVAL;
 327        }
 328        acpi_numa_slit_init(slit);
 329
 330        return 0;
 331}
 332
 333void __init __weak
 334acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
 335{
 336        pr_warn("Found unsupported x2apic [0x%08x] SRAT entry\n", pa->apic_id);
 337}
 338
 339static int __init
 340acpi_parse_x2apic_affinity(struct acpi_subtable_header *header,
 341                           const unsigned long end)
 342{
 343        struct acpi_srat_x2apic_cpu_affinity *processor_affinity;
 344
 345        processor_affinity = (struct acpi_srat_x2apic_cpu_affinity *)header;
 346        if (!processor_affinity)
 347                return -EINVAL;
 348
 349        acpi_table_print_srat_entry(header);
 350
 351        /* let architecture-dependent part to do it */
 352        acpi_numa_x2apic_affinity_init(processor_affinity);
 353
 354        return 0;
 355}
 356
 357static int __init
 358acpi_parse_processor_affinity(struct acpi_subtable_header *header,
 359                              const unsigned long end)
 360{
 361        struct acpi_srat_cpu_affinity *processor_affinity;
 362
 363        processor_affinity = (struct acpi_srat_cpu_affinity *)header;
 364        if (!processor_affinity)
 365                return -EINVAL;
 366
 367        acpi_table_print_srat_entry(header);
 368
 369        /* let architecture-dependent part to do it */
 370        acpi_numa_processor_affinity_init(processor_affinity);
 371
 372        return 0;
 373}
 374
 375static int __init
 376acpi_parse_gicc_affinity(struct acpi_subtable_header *header,
 377                         const unsigned long end)
 378{
 379        struct acpi_srat_gicc_affinity *processor_affinity;
 380
 381        processor_affinity = (struct acpi_srat_gicc_affinity *)header;
 382        if (!processor_affinity)
 383                return -EINVAL;
 384
 385        acpi_table_print_srat_entry(header);
 386
 387        /* let architecture-dependent part to do it */
 388        acpi_numa_gicc_affinity_init(processor_affinity);
 389
 390        return 0;
 391}
 392
 393static int __initdata parsed_numa_memblks;
 394
 395static int __init
 396acpi_parse_memory_affinity(struct acpi_subtable_header * header,
 397                           const unsigned long end)
 398{
 399        struct acpi_srat_mem_affinity *memory_affinity;
 400
 401        memory_affinity = (struct acpi_srat_mem_affinity *)header;
 402        if (!memory_affinity)
 403                return -EINVAL;
 404
 405        acpi_table_print_srat_entry(header);
 406
 407        /* let architecture-dependent part to do it */
 408        if (!acpi_numa_memory_affinity_init(memory_affinity))
 409                parsed_numa_memblks++;
 410        return 0;
 411}
 412
 413static int __init acpi_parse_srat(struct acpi_table_header *table)
 414{
 415        struct acpi_table_srat *srat = (struct acpi_table_srat *)table;
 416
 417        acpi_srat_revision = srat->header.revision;
 418
 419        /* Real work done in acpi_table_parse_srat below. */
 420
 421        return 0;
 422}
 423
 424static int __init
 425acpi_table_parse_srat(enum acpi_srat_type id,
 426                      acpi_tbl_entry_handler handler, unsigned int max_entries)
 427{
 428        return acpi_table_parse_entries(ACPI_SIG_SRAT,
 429                                            sizeof(struct acpi_table_srat), id,
 430                                            handler, max_entries);
 431}
 432
 433int __init acpi_numa_init(void)
 434{
 435        int cnt = 0;
 436
 437        if (acpi_disabled)
 438                return -EINVAL;
 439
 440        /*
 441         * Should not limit number with cpu num that is from NR_CPUS or nr_cpus=
 442         * SRAT cpu entries could have different order with that in MADT.
 443         * So go over all cpu entries in SRAT to get apicid to node mapping.
 444         */
 445
 446        /* SRAT: Static Resource Affinity Table */
 447        if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
 448                struct acpi_subtable_proc srat_proc[3];
 449
 450                memset(srat_proc, 0, sizeof(srat_proc));
 451                srat_proc[0].id = ACPI_SRAT_TYPE_CPU_AFFINITY;
 452                srat_proc[0].handler = acpi_parse_processor_affinity;
 453                srat_proc[1].id = ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY;
 454                srat_proc[1].handler = acpi_parse_x2apic_affinity;
 455                srat_proc[2].id = ACPI_SRAT_TYPE_GICC_AFFINITY;
 456                srat_proc[2].handler = acpi_parse_gicc_affinity;
 457
 458                acpi_table_parse_entries_array(ACPI_SIG_SRAT,
 459                                        sizeof(struct acpi_table_srat),
 460                                        srat_proc, ARRAY_SIZE(srat_proc), 0);
 461
 462                cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
 463                                            acpi_parse_memory_affinity,
 464                                            NR_NODE_MEMBLKS);
 465        }
 466
 467        /* SLIT: System Locality Information Table */
 468        acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit);
 469
 470        if (cnt < 0)
 471                return cnt;
 472        else if (!parsed_numa_memblks)
 473                return -ENOENT;
 474        return 0;
 475}
 476
 477static int acpi_get_pxm(acpi_handle h)
 478{
 479        unsigned long long pxm;
 480        acpi_status status;
 481        acpi_handle handle;
 482        acpi_handle phandle = h;
 483
 484        do {
 485                handle = phandle;
 486                status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm);
 487                if (ACPI_SUCCESS(status))
 488                        return pxm;
 489                status = acpi_get_parent(handle, &phandle);
 490        } while (ACPI_SUCCESS(status));
 491        return -1;
 492}
 493
 494int acpi_get_node(acpi_handle handle)
 495{
 496        int pxm;
 497
 498        pxm = acpi_get_pxm(handle);
 499
 500        return acpi_map_pxm_to_node(pxm);
 501}
 502EXPORT_SYMBOL(acpi_get_node);
 503