linux/arch/x86/mm/amdtopology_64.c
<<
>>
Prefs
   1/*
   2 * AMD NUMA support.
   3 * Discover the memory map and associated nodes.
   4 *
   5 * This version reads it directly from the AMD northbridge.
   6 *
   7 * Copyright 2002,2003 Andi Kleen, SuSE Labs.
   8 */
   9#include <linux/kernel.h>
  10#include <linux/init.h>
  11#include <linux/string.h>
  12#include <linux/module.h>
  13#include <linux/nodemask.h>
  14#include <linux/memblock.h>
  15
  16#include <asm/io.h>
  17#include <linux/pci_ids.h>
  18#include <linux/acpi.h>
  19#include <asm/types.h>
  20#include <asm/mmzone.h>
  21#include <asm/proto.h>
  22#include <asm/e820.h>
  23#include <asm/pci-direct.h>
  24#include <asm/numa.h>
  25#include <asm/mpspec.h>
  26#include <asm/apic.h>
  27#include <asm/amd_nb.h>
  28
  29static struct bootnode __initdata nodes[8];
  30static unsigned char __initdata nodeids[8];
  31static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE;
  32
  33static __init int find_northbridge(void)
  34{
  35        int num;
  36
  37        for (num = 0; num < 32; num++) {
  38                u32 header;
  39
  40                header = read_pci_config(0, num, 0, 0x00);
  41                if (header != (PCI_VENDOR_ID_AMD | (0x1100<<16)) &&
  42                        header != (PCI_VENDOR_ID_AMD | (0x1200<<16)) &&
  43                        header != (PCI_VENDOR_ID_AMD | (0x1300<<16)))
  44                        continue;
  45
  46                header = read_pci_config(0, num, 1, 0x00);
  47                if (header != (PCI_VENDOR_ID_AMD | (0x1101<<16)) &&
  48                        header != (PCI_VENDOR_ID_AMD | (0x1201<<16)) &&
  49                        header != (PCI_VENDOR_ID_AMD | (0x1301<<16)))
  50                        continue;
  51                return num;
  52        }
  53
  54        return -1;
  55}
  56
  57static __init void early_get_boot_cpu_id(void)
  58{
  59        /*
  60         * need to get the APIC ID of the BSP so can use that to
  61         * create apicid_to_node in amd_scan_nodes()
  62         */
  63#ifdef CONFIG_X86_MPPARSE
  64        /*
  65         * get boot-time SMP configuration:
  66         */
  67        if (smp_found_config)
  68                early_get_smp_config();
  69#endif
  70}
  71
  72int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn)
  73{
  74        unsigned long start = PFN_PHYS(start_pfn);
  75        unsigned long end = PFN_PHYS(end_pfn);
  76        unsigned numnodes;
  77        unsigned long prevbase;
  78        int i, nb, found = 0;
  79        u32 nodeid, reg;
  80
  81        if (!early_pci_allowed())
  82                return -1;
  83
  84        nb = find_northbridge();
  85        if (nb < 0)
  86                return nb;
  87
  88        pr_info("Scanning NUMA topology in Northbridge %d\n", nb);
  89
  90        reg = read_pci_config(0, nb, 0, 0x60);
  91        numnodes = ((reg >> 4) & 0xF) + 1;
  92        if (numnodes <= 1)
  93                return -1;
  94
  95        pr_info("Number of physical nodes %d\n", numnodes);
  96
  97        prevbase = 0;
  98        for (i = 0; i < 8; i++) {
  99                unsigned long base, limit;
 100
 101                base = read_pci_config(0, nb, 1, 0x40 + i*8);
 102                limit = read_pci_config(0, nb, 1, 0x44 + i*8);
 103
 104                nodeids[i] = nodeid = limit & 7;
 105                if ((base & 3) == 0) {
 106                        if (i < numnodes)
 107                                pr_info("Skipping disabled node %d\n", i);
 108                        continue;
 109                }
 110                if (nodeid >= numnodes) {
 111                        pr_info("Ignoring excess node %d (%lx:%lx)\n", nodeid,
 112                                base, limit);
 113                        continue;
 114                }
 115
 116                if (!limit) {
 117                        pr_info("Skipping node entry %d (base %lx)\n",
 118                                i, base);
 119                        continue;
 120                }
 121                if ((base >> 8) & 3 || (limit >> 8) & 3) {
 122                        pr_err("Node %d using interleaving mode %lx/%lx\n",
 123                               nodeid, (base >> 8) & 3, (limit >> 8) & 3);
 124                        return -1;
 125                }
 126                if (node_isset(nodeid, nodes_parsed)) {
 127                        pr_info("Node %d already present, skipping\n",
 128                                nodeid);
 129                        continue;
 130                }
 131
 132                limit >>= 16;
 133                limit <<= 24;
 134                limit |= (1<<24)-1;
 135                limit++;
 136
 137                if (limit > end)
 138                        limit = end;
 139                if (limit <= base)
 140                        continue;
 141
 142                base >>= 16;
 143                base <<= 24;
 144
 145                if (base < start)
 146                        base = start;
 147                if (limit > end)
 148                        limit = end;
 149                if (limit == base) {
 150                        pr_err("Empty node %d\n", nodeid);
 151                        continue;
 152                }
 153                if (limit < base) {
 154                        pr_err("Node %d bogus settings %lx-%lx.\n",
 155                               nodeid, base, limit);
 156                        continue;
 157                }
 158
 159                /* Could sort here, but pun for now. Should not happen anyroads. */
 160                if (prevbase > base) {
 161                        pr_err("Node map not sorted %lx,%lx\n",
 162                               prevbase, base);
 163                        return -1;
 164                }
 165
 166                pr_info("Node %d MemBase %016lx Limit %016lx\n",
 167                        nodeid, base, limit);
 168
 169                found++;
 170
 171                nodes[nodeid].start = base;
 172                nodes[nodeid].end = limit;
 173
 174                prevbase = base;
 175
 176                node_set(nodeid, nodes_parsed);
 177        }
 178
 179        if (!found)
 180                return -1;
 181        return 0;
 182}
 183
 184#ifdef CONFIG_NUMA_EMU
 185static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
 186        [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
 187};
 188
 189void __init amd_get_nodes(struct bootnode *physnodes)
 190{
 191        int i;
 192
 193        for_each_node_mask(i, nodes_parsed) {
 194                physnodes[i].start = nodes[i].start;
 195                physnodes[i].end = nodes[i].end;
 196        }
 197}
 198
 199static int __init find_node_by_addr(unsigned long addr)
 200{
 201        int ret = NUMA_NO_NODE;
 202        int i;
 203
 204        for (i = 0; i < 8; i++)
 205                if (addr >= nodes[i].start && addr < nodes[i].end) {
 206                        ret = i;
 207                        break;
 208                }
 209        return ret;
 210}
 211
 212/*
 213 * For NUMA emulation, fake proximity domain (_PXM) to node id mappings must be
 214 * setup to represent the physical topology but reflect the emulated
 215 * environment.  For each emulated node, the real node which it appears on is
 216 * found and a fake pxm to nid mapping is created which mirrors the actual
 217 * locality.  node_distance() then represents the correct distances between
 218 * emulated nodes by using the fake acpi mappings to pxms.
 219 */
 220void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes)
 221{
 222        unsigned int bits;
 223        unsigned int cores;
 224        unsigned int apicid_base = 0;
 225        int i;
 226
 227        bits = boot_cpu_data.x86_coreid_bits;
 228        cores = 1 << bits;
 229        early_get_boot_cpu_id();
 230        if (boot_cpu_physical_apicid > 0)
 231                apicid_base = boot_cpu_physical_apicid;
 232
 233        for (i = 0; i < nr_nodes; i++) {
 234                int index;
 235                int nid;
 236                int j;
 237
 238                nid = find_node_by_addr(nodes[i].start);
 239                if (nid == NUMA_NO_NODE)
 240                        continue;
 241
 242                index = nodeids[nid] << bits;
 243                if (fake_apicid_to_node[index + apicid_base] == NUMA_NO_NODE)
 244                        for (j = apicid_base; j < cores + apicid_base; j++)
 245                                fake_apicid_to_node[index + j] = i;
 246#ifdef CONFIG_ACPI_NUMA
 247                __acpi_map_pxm_to_node(nid, i);
 248#endif
 249        }
 250        memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
 251}
 252#endif /* CONFIG_NUMA_EMU */
 253
 254int __init amd_scan_nodes(void)
 255{
 256        unsigned int bits;
 257        unsigned int cores;
 258        unsigned int apicid_base;
 259        int i;
 260
 261        BUG_ON(nodes_empty(nodes_parsed));
 262        node_possible_map = nodes_parsed;
 263        memnode_shift = compute_hash_shift(nodes, 8, NULL);
 264        if (memnode_shift < 0) {
 265                pr_err("No NUMA node hash function found. Contact maintainer\n");
 266                return -1;
 267        }
 268        pr_info("Using node hash shift of %d\n", memnode_shift);
 269
 270        /* use the coreid bits from early_identify_cpu */
 271        bits = boot_cpu_data.x86_coreid_bits;
 272        cores = (1<<bits);
 273        apicid_base = 0;
 274        /* get the APIC ID of the BSP early for systems with apicid lifting */
 275        early_get_boot_cpu_id();
 276        if (boot_cpu_physical_apicid > 0) {
 277                pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid);
 278                apicid_base = boot_cpu_physical_apicid;
 279        }
 280
 281        for_each_node_mask(i, node_possible_map) {
 282                int j;
 283
 284                memblock_x86_register_active_regions(i,
 285                                nodes[i].start >> PAGE_SHIFT,
 286                                nodes[i].end >> PAGE_SHIFT);
 287                for (j = apicid_base; j < cores + apicid_base; j++)
 288                        apicid_to_node[(i << bits) + j] = i;
 289                setup_node_bootmem(i, nodes[i].start, nodes[i].end);
 290        }
 291
 292        numa_init_array();
 293        return 0;
 294}
 295