linux/arch/arm64/mm/numa.c
<<
>>
Prefs
   1/*
   2 * NUMA support, based on the x86 implementation.
   3 *
   4 * Copyright (C) 2015 Cavium Inc.
   5 * Author: Ganapatrao Kulkarni <gkulkarni@cavium.com>
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include <linux/acpi.h>
  21#include <linux/bootmem.h>
  22#include <linux/memblock.h>
  23#include <linux/module.h>
  24#include <linux/of.h>
  25
  26#include <asm/acpi.h>
  27
  28struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
  29EXPORT_SYMBOL(node_data);
  30nodemask_t numa_nodes_parsed __initdata;
  31static int cpu_to_node_map[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE };
  32
  33static int numa_distance_cnt;
  34static u8 *numa_distance;
  35static bool numa_off;
  36
  37static __init int numa_parse_early_param(char *opt)
  38{
  39        if (!opt)
  40                return -EINVAL;
  41        if (!strncmp(opt, "off", 3)) {
  42                pr_info("%s\n", "NUMA turned off");
  43                numa_off = true;
  44        }
  45        return 0;
  46}
  47early_param("numa", numa_parse_early_param);
  48
  49cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
  50EXPORT_SYMBOL(node_to_cpumask_map);
  51
  52#ifdef CONFIG_DEBUG_PER_CPU_MAPS
  53
  54/*
  55 * Returns a pointer to the bitmask of CPUs on Node 'node'.
  56 */
  57const struct cpumask *cpumask_of_node(int node)
  58{
  59        if (WARN_ON(node >= nr_node_ids))
  60                return cpu_none_mask;
  61
  62        if (WARN_ON(node_to_cpumask_map[node] == NULL))
  63                return cpu_online_mask;
  64
  65        return node_to_cpumask_map[node];
  66}
  67EXPORT_SYMBOL(cpumask_of_node);
  68
  69#endif
  70
  71static void map_cpu_to_node(unsigned int cpu, int nid)
  72{
  73        set_cpu_numa_node(cpu, nid);
  74        if (nid >= 0)
  75                cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);
  76}
  77
  78void numa_clear_node(unsigned int cpu)
  79{
  80        int nid = cpu_to_node(cpu);
  81
  82        if (nid >= 0)
  83                cpumask_clear_cpu(cpu, node_to_cpumask_map[nid]);
  84        set_cpu_numa_node(cpu, NUMA_NO_NODE);
  85}
  86
  87/*
  88 * Allocate node_to_cpumask_map based on number of available nodes
  89 * Requires node_possible_map to be valid.
  90 *
  91 * Note: cpumask_of_node() is not valid until after this is done.
  92 * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
  93 */
  94static void __init setup_node_to_cpumask_map(void)
  95{
  96        unsigned int cpu;
  97        int node;
  98
  99        /* setup nr_node_ids if not done yet */
 100        if (nr_node_ids == MAX_NUMNODES)
 101                setup_nr_node_ids();
 102
 103        /* allocate and clear the mapping */
 104        for (node = 0; node < nr_node_ids; node++) {
 105                alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
 106                cpumask_clear(node_to_cpumask_map[node]);
 107        }
 108
 109        for_each_possible_cpu(cpu)
 110                set_cpu_numa_node(cpu, NUMA_NO_NODE);
 111
 112        /* cpumask_of_node() will now work */
 113        pr_debug("NUMA: Node to cpumask map for %d nodes\n", nr_node_ids);
 114}
 115
 116/*
 117 *  Set the cpu to node and mem mapping
 118 */
 119void numa_store_cpu_info(unsigned int cpu)
 120{
 121        map_cpu_to_node(cpu, numa_off ? 0 : cpu_to_node_map[cpu]);
 122}
 123
 124void __init early_map_cpu_to_node(unsigned int cpu, int nid)
 125{
 126        /* fallback to node 0 */
 127        if (nid < 0 || nid >= MAX_NUMNODES)
 128                nid = 0;
 129
 130        cpu_to_node_map[cpu] = nid;
 131}
 132
 133/**
 134 * numa_add_memblk - Set node id to memblk
 135 * @nid: NUMA node ID of the new memblk
 136 * @start: Start address of the new memblk
 137 * @end:  End address of the new memblk
 138 *
 139 * RETURNS:
 140 * 0 on success, -errno on failure.
 141 */
 142int __init numa_add_memblk(int nid, u64 start, u64 end)
 143{
 144        int ret;
 145
 146        ret = memblock_set_node(start, (end - start), &memblock.memory, nid);
 147        if (ret < 0) {
 148                pr_err("NUMA: memblock [0x%llx - 0x%llx] failed to add on node %d\n",
 149                        start, (end - 1), nid);
 150                return ret;
 151        }
 152
 153        node_set(nid, numa_nodes_parsed);
 154        pr_info("NUMA: Adding memblock [0x%llx - 0x%llx] on node %d\n",
 155                        start, (end - 1), nid);
 156        return ret;
 157}
 158
 159/**
 160 * Initialize NODE_DATA for a node on the local memory
 161 */
 162static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
 163{
 164        const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES);
 165        u64 nd_pa;
 166        void *nd;
 167        int tnid;
 168
 169        pr_info("NUMA: Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
 170                        nid, start_pfn << PAGE_SHIFT,
 171                        (end_pfn << PAGE_SHIFT) - 1);
 172
 173        nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
 174        nd = __va(nd_pa);
 175
 176        /* report and initialize */
 177        pr_info("NUMA: NODE_DATA [mem %#010Lx-%#010Lx]\n",
 178                nd_pa, nd_pa + nd_size - 1);
 179        tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
 180        if (tnid != nid)
 181                pr_info("NUMA: NODE_DATA(%d) on node %d\n", nid, tnid);
 182
 183        node_data[nid] = nd;
 184        memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
 185        NODE_DATA(nid)->node_id = nid;
 186        NODE_DATA(nid)->node_start_pfn = start_pfn;
 187        NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
 188}
 189
 190/**
 191 * numa_free_distance
 192 *
 193 * The current table is freed.
 194 */
 195void __init numa_free_distance(void)
 196{
 197        size_t size;
 198
 199        if (!numa_distance)
 200                return;
 201
 202        size = numa_distance_cnt * numa_distance_cnt *
 203                sizeof(numa_distance[0]);
 204
 205        memblock_free(__pa(numa_distance), size);
 206        numa_distance_cnt = 0;
 207        numa_distance = NULL;
 208}
 209
 210/**
 211 *
 212 * Create a new NUMA distance table.
 213 *
 214 */
 215static int __init numa_alloc_distance(void)
 216{
 217        size_t size;
 218        u64 phys;
 219        int i, j;
 220
 221        size = nr_node_ids * nr_node_ids * sizeof(numa_distance[0]);
 222        phys = memblock_find_in_range(0, PFN_PHYS(max_pfn),
 223                                      size, PAGE_SIZE);
 224        if (WARN_ON(!phys))
 225                return -ENOMEM;
 226
 227        memblock_reserve(phys, size);
 228
 229        numa_distance = __va(phys);
 230        numa_distance_cnt = nr_node_ids;
 231
 232        /* fill with the default distances */
 233        for (i = 0; i < numa_distance_cnt; i++)
 234                for (j = 0; j < numa_distance_cnt; j++)
 235                        numa_distance[i * numa_distance_cnt + j] = i == j ?
 236                                LOCAL_DISTANCE : REMOTE_DISTANCE;
 237
 238        pr_debug("NUMA: Initialized distance table, cnt=%d\n",
 239                        numa_distance_cnt);
 240
 241        return 0;
 242}
 243
 244/**
 245 * numa_set_distance - Set inter node NUMA distance from node to node.
 246 * @from: the 'from' node to set distance
 247 * @to: the 'to'  node to set distance
 248 * @distance: NUMA distance
 249 *
 250 * Set the distance from node @from to @to to @distance.
 251 * If distance table doesn't exist, a warning is printed.
 252 *
 253 * If @from or @to is higher than the highest known node or lower than zero
 254 * or @distance doesn't make sense, the call is ignored.
 255 *
 256 */
 257void __init numa_set_distance(int from, int to, int distance)
 258{
 259        if (!numa_distance) {
 260                pr_warn_once("NUMA: Warning: distance table not allocated yet\n");
 261                return;
 262        }
 263
 264        if (from >= numa_distance_cnt || to >= numa_distance_cnt ||
 265                        from < 0 || to < 0) {
 266                pr_warn_once("NUMA: Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
 267                            from, to, distance);
 268                return;
 269        }
 270
 271        if ((u8)distance != distance ||
 272            (from == to && distance != LOCAL_DISTANCE)) {
 273                pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
 274                             from, to, distance);
 275                return;
 276        }
 277
 278        numa_distance[from * numa_distance_cnt + to] = distance;
 279}
 280
 281/**
 282 * Return NUMA distance @from to @to
 283 */
 284int __node_distance(int from, int to)
 285{
 286        if (from >= numa_distance_cnt || to >= numa_distance_cnt)
 287                return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
 288        return numa_distance[from * numa_distance_cnt + to];
 289}
 290EXPORT_SYMBOL(__node_distance);
 291
 292static int __init numa_register_nodes(void)
 293{
 294        int nid;
 295        struct memblock_region *mblk;
 296
 297        /* Check that valid nid is set to memblks */
 298        for_each_memblock(memory, mblk)
 299                if (mblk->nid == NUMA_NO_NODE || mblk->nid >= MAX_NUMNODES) {
 300                        pr_warn("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
 301                                mblk->nid, mblk->base,
 302                                mblk->base + mblk->size - 1);
 303                        return -EINVAL;
 304                }
 305
 306        /* Finally register nodes. */
 307        for_each_node_mask(nid, numa_nodes_parsed) {
 308                unsigned long start_pfn, end_pfn;
 309
 310                get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
 311                setup_node_data(nid, start_pfn, end_pfn);
 312                node_set_online(nid);
 313        }
 314
 315        /* Setup online nodes to actual nodes*/
 316        node_possible_map = numa_nodes_parsed;
 317
 318        return 0;
 319}
 320
 321static int __init numa_init(int (*init_func)(void))
 322{
 323        int ret;
 324
 325        nodes_clear(numa_nodes_parsed);
 326        nodes_clear(node_possible_map);
 327        nodes_clear(node_online_map);
 328        numa_free_distance();
 329
 330        ret = numa_alloc_distance();
 331        if (ret < 0)
 332                return ret;
 333
 334        ret = init_func();
 335        if (ret < 0)
 336                return ret;
 337
 338        if (nodes_empty(numa_nodes_parsed))
 339                return -EINVAL;
 340
 341        ret = numa_register_nodes();
 342        if (ret < 0)
 343                return ret;
 344
 345        setup_node_to_cpumask_map();
 346
 347        /* init boot processor */
 348        cpu_to_node_map[0] = 0;
 349        map_cpu_to_node(0, 0);
 350
 351        return 0;
 352}
 353
 354/**
 355 * dummy_numa_init - Fallback dummy NUMA init
 356 *
 357 * Used if there's no underlying NUMA architecture, NUMA initialization
 358 * fails, or NUMA is disabled on the command line.
 359 *
 360 * Must online at least one node (node 0) and add memory blocks that cover all
 361 * allowed memory. It is unlikely that this function fails.
 362 */
 363static int __init dummy_numa_init(void)
 364{
 365        int ret;
 366        struct memblock_region *mblk;
 367
 368        if (numa_off)
 369                pr_info("NUMA disabled\n"); /* Forced off on command line. */
 370        else
 371                pr_info("No NUMA configuration found\n");
 372        pr_info("NUMA: Faking a node at [mem %#018Lx-%#018Lx]\n",
 373               0LLU, PFN_PHYS(max_pfn) - 1);
 374
 375        for_each_memblock(memory, mblk) {
 376                ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size);
 377                if (!ret)
 378                        continue;
 379
 380                pr_err("NUMA init failed\n");
 381                return ret;
 382        }
 383
 384        numa_off = true;
 385        return 0;
 386}
 387
 388/**
 389 * arm64_numa_init - Initialize NUMA
 390 *
 391 * Try each configured NUMA initialization method until one succeeds.  The
 392 * last fallback is dummy single node config encomapssing whole memory.
 393 */
 394void __init arm64_numa_init(void)
 395{
 396        if (!numa_off) {
 397                if (!acpi_disabled && !numa_init(arm64_acpi_numa_init))
 398                        return;
 399                if (acpi_disabled && !numa_init(of_numa_init))
 400                        return;
 401        }
 402
 403        numa_init(dummy_numa_init);
 404}
 405