linux/arch/ia64/mm/discontig.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2000, 2003 Silicon Graphics, Inc.  All rights reserved.
   3 * Copyright (c) 2001 Intel Corp.
   4 * Copyright (c) 2001 Tony Luck <tony.luck@intel.com>
   5 * Copyright (c) 2002 NEC Corp.
   6 * Copyright (c) 2002 Kimio Suganuma <k-suganuma@da.jp.nec.com>
   7 * Copyright (c) 2004 Silicon Graphics, Inc
   8 *      Russ Anderson <rja@sgi.com>
   9 *      Jesse Barnes <jbarnes@sgi.com>
  10 *      Jack Steiner <steiner@sgi.com>
  11 */
  12
  13/*
  14 * Platform initialization for Discontig Memory
  15 */
  16
  17#include <linux/kernel.h>
  18#include <linux/mm.h>
  19#include <linux/nmi.h>
  20#include <linux/swap.h>
  21#include <linux/bootmem.h>
  22#include <linux/acpi.h>
  23#include <linux/efi.h>
  24#include <linux/nodemask.h>
  25#include <linux/slab.h>
  26#include <asm/pgalloc.h>
  27#include <asm/tlb.h>
  28#include <asm/meminit.h>
  29#include <asm/numa.h>
  30#include <asm/sections.h>
  31
  32/*
  33 * Track per-node information needed to setup the boot memory allocator, the
  34 * per-node areas, and the real VM.
  35 */
  36struct early_node_data {
  37        struct ia64_node_data *node_data;
  38        unsigned long pernode_addr;
  39        unsigned long pernode_size;
  40        unsigned long num_physpages;
  41#ifdef CONFIG_ZONE_DMA
  42        unsigned long num_dma_physpages;
  43#endif
  44        unsigned long min_pfn;
  45        unsigned long max_pfn;
  46};
  47
  48static struct early_node_data mem_data[MAX_NUMNODES] __initdata;
  49static nodemask_t memory_less_mask __initdata;
  50
  51pg_data_t *pgdat_list[MAX_NUMNODES];
  52
  53/*
  54 * To prevent cache aliasing effects, align per-node structures so that they
  55 * start at addresses that are strided by node number.
  56 */
  57#define MAX_NODE_ALIGN_OFFSET   (32 * 1024 * 1024)
  58#define NODEDATA_ALIGN(addr, node)                                              \
  59        ((((addr) + 1024*1024-1) & ~(1024*1024-1)) +                            \
  60             (((node)*PERCPU_PAGE_SIZE) & (MAX_NODE_ALIGN_OFFSET - 1)))
  61
  62/**
  63 * build_node_maps - callback to setup bootmem structs for each node
  64 * @start: physical start of range
  65 * @len: length of range
  66 * @node: node where this range resides
  67 *
  68 * We allocate a struct bootmem_data for each piece of memory that we wish to
  69 * treat as a virtually contiguous block (i.e. each node). Each such block
  70 * must start on an %IA64_GRANULE_SIZE boundary, so we round the address down
  71 * if necessary.  Any non-existent pages will simply be part of the virtual
  72 * memmap.  We also update min_low_pfn and max_low_pfn here as we receive
  73 * memory ranges from the caller.
  74 */
  75static int __init build_node_maps(unsigned long start, unsigned long len,
  76                                  int node)
  77{
  78        unsigned long spfn, epfn, end = start + len;
  79        struct bootmem_data *bdp = &bootmem_node_data[node];
  80
  81        epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT;
  82        spfn = GRANULEROUNDDOWN(start) >> PAGE_SHIFT;
  83
  84        if (!bdp->node_low_pfn) {
  85                bdp->node_min_pfn = spfn;
  86                bdp->node_low_pfn = epfn;
  87        } else {
  88                bdp->node_min_pfn = min(spfn, bdp->node_min_pfn);
  89                bdp->node_low_pfn = max(epfn, bdp->node_low_pfn);
  90        }
  91
  92        return 0;
  93}
  94
  95/**
  96 * early_nr_cpus_node - return number of cpus on a given node
  97 * @node: node to check
  98 *
  99 * Count the number of cpus on @node.  We can't use nr_cpus_node() yet because
 100 * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been
 101 * called yet.  Note that node 0 will also count all non-existent cpus.
 102 */
 103static int __meminit early_nr_cpus_node(int node)
 104{
 105        int cpu, n = 0;
 106
 107        for_each_possible_early_cpu(cpu)
 108                if (node == node_cpuid[cpu].nid)
 109                        n++;
 110
 111        return n;
 112}
 113
 114/**
 115 * compute_pernodesize - compute size of pernode data
 116 * @node: the node id.
 117 */
 118static unsigned long __meminit compute_pernodesize(int node)
 119{
 120        unsigned long pernodesize = 0, cpus;
 121
 122        cpus = early_nr_cpus_node(node);
 123        pernodesize += PERCPU_PAGE_SIZE * cpus;
 124        pernodesize += node * L1_CACHE_BYTES;
 125        pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
 126        pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
 127        pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
 128        pernodesize = PAGE_ALIGN(pernodesize);
 129        return pernodesize;
 130}
 131
 132/**
 133 * per_cpu_node_setup - setup per-cpu areas on each node
 134 * @cpu_data: per-cpu area on this node
 135 * @node: node to setup
 136 *
 137 * Copy the static per-cpu data into the region we just set aside and then
 138 * setup __per_cpu_offset for each CPU on this node.  Return a pointer to
 139 * the end of the area.
 140 */
 141static void *per_cpu_node_setup(void *cpu_data, int node)
 142{
 143#ifdef CONFIG_SMP
 144        int cpu;
 145
 146        for_each_possible_early_cpu(cpu) {
 147                void *src = cpu == 0 ? __cpu0_per_cpu : __phys_per_cpu_start;
 148
 149                if (node != node_cpuid[cpu].nid)
 150                        continue;
 151
 152                memcpy(__va(cpu_data), src, __per_cpu_end - __per_cpu_start);
 153                __per_cpu_offset[cpu] = (char *)__va(cpu_data) -
 154                        __per_cpu_start;
 155
 156                /*
 157                 * percpu area for cpu0 is moved from the __init area
 158                 * which is setup by head.S and used till this point.
 159                 * Update ar.k3.  This move is ensures that percpu
 160                 * area for cpu0 is on the correct node and its
 161                 * virtual address isn't insanely far from other
 162                 * percpu areas which is important for congruent
 163                 * percpu allocator.
 164                 */
 165                if (cpu == 0)
 166                        ia64_set_kr(IA64_KR_PER_CPU_DATA,
 167                                    (unsigned long)cpu_data -
 168                                    (unsigned long)__per_cpu_start);
 169
 170                cpu_data += PERCPU_PAGE_SIZE;
 171        }
 172#endif
 173        return cpu_data;
 174}
 175
 176#ifdef CONFIG_SMP
 177/**
 178 * setup_per_cpu_areas - setup percpu areas
 179 *
 180 * Arch code has already allocated and initialized percpu areas.  All
 181 * this function has to do is to teach the determined layout to the
 182 * dynamic percpu allocator, which happens to be more complex than
 183 * creating whole new ones using helpers.
 184 */
 185void __init setup_per_cpu_areas(void)
 186{
 187        struct pcpu_alloc_info *ai;
 188        struct pcpu_group_info *uninitialized_var(gi);
 189        unsigned int *cpu_map;
 190        void *base;
 191        unsigned long base_offset;
 192        unsigned int cpu;
 193        ssize_t static_size, reserved_size, dyn_size;
 194        int node, prev_node, unit, nr_units, rc;
 195
 196        ai = pcpu_alloc_alloc_info(MAX_NUMNODES, nr_cpu_ids);
 197        if (!ai)
 198                panic("failed to allocate pcpu_alloc_info");
 199        cpu_map = ai->groups[0].cpu_map;
 200
 201        /* determine base */
 202        base = (void *)ULONG_MAX;
 203        for_each_possible_cpu(cpu)
 204                base = min(base,
 205                           (void *)(__per_cpu_offset[cpu] + __per_cpu_start));
 206        base_offset = (void *)__per_cpu_start - base;
 207
 208        /* build cpu_map, units are grouped by node */
 209        unit = 0;
 210        for_each_node(node)
 211                for_each_possible_cpu(cpu)
 212                        if (node == node_cpuid[cpu].nid)
 213                                cpu_map[unit++] = cpu;
 214        nr_units = unit;
 215
 216        /* set basic parameters */
 217        static_size = __per_cpu_end - __per_cpu_start;
 218        reserved_size = PERCPU_MODULE_RESERVE;
 219        dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size;
 220        if (dyn_size < 0)
 221                panic("percpu area overflow static=%zd reserved=%zd\n",
 222                      static_size, reserved_size);
 223
 224        ai->static_size         = static_size;
 225        ai->reserved_size       = reserved_size;
 226        ai->dyn_size            = dyn_size;
 227        ai->unit_size           = PERCPU_PAGE_SIZE;
 228        ai->atom_size           = PAGE_SIZE;
 229        ai->alloc_size          = PERCPU_PAGE_SIZE;
 230
 231        /*
 232         * CPUs are put into groups according to node.  Walk cpu_map
 233         * and create new groups at node boundaries.
 234         */
 235        prev_node = -1;
 236        ai->nr_groups = 0;
 237        for (unit = 0; unit < nr_units; unit++) {
 238                cpu = cpu_map[unit];
 239                node = node_cpuid[cpu].nid;
 240
 241                if (node == prev_node) {
 242                        gi->nr_units++;
 243                        continue;
 244                }
 245                prev_node = node;
 246
 247                gi = &ai->groups[ai->nr_groups++];
 248                gi->nr_units            = 1;
 249                gi->base_offset         = __per_cpu_offset[cpu] + base_offset;
 250                gi->cpu_map             = &cpu_map[unit];
 251        }
 252
 253        rc = pcpu_setup_first_chunk(ai, base);
 254        if (rc)
 255                panic("failed to setup percpu area (err=%d)", rc);
 256
 257        pcpu_free_alloc_info(ai);
 258}
 259#endif
 260
 261/**
 262 * fill_pernode - initialize pernode data.
 263 * @node: the node id.
 264 * @pernode: physical address of pernode data
 265 * @pernodesize: size of the pernode data
 266 */
 267static void __init fill_pernode(int node, unsigned long pernode,
 268        unsigned long pernodesize)
 269{
 270        void *cpu_data;
 271        int cpus = early_nr_cpus_node(node);
 272        struct bootmem_data *bdp = &bootmem_node_data[node];
 273
 274        mem_data[node].pernode_addr = pernode;
 275        mem_data[node].pernode_size = pernodesize;
 276        memset(__va(pernode), 0, pernodesize);
 277
 278        cpu_data = (void *)pernode;
 279        pernode += PERCPU_PAGE_SIZE * cpus;
 280        pernode += node * L1_CACHE_BYTES;
 281
 282        pgdat_list[node] = __va(pernode);
 283        pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
 284
 285        mem_data[node].node_data = __va(pernode);
 286        pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
 287
 288        pgdat_list[node]->bdata = bdp;
 289        pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
 290
 291        cpu_data = per_cpu_node_setup(cpu_data, node);
 292
 293        return;
 294}
 295
 296/**
 297 * find_pernode_space - allocate memory for memory map and per-node structures
 298 * @start: physical start of range
 299 * @len: length of range
 300 * @node: node where this range resides
 301 *
 302 * This routine reserves space for the per-cpu data struct, the list of
 303 * pg_data_ts and the per-node data struct.  Each node will have something like
 304 * the following in the first chunk of addr. space large enough to hold it.
 305 *
 306 *    ________________________
 307 *   |                        |
 308 *   |~~~~~~~~~~~~~~~~~~~~~~~~| <-- NODEDATA_ALIGN(start, node) for the first
 309 *   |    PERCPU_PAGE_SIZE *  |     start and length big enough
 310 *   |    cpus_on_this_node   | Node 0 will also have entries for all non-existent cpus.
 311 *   |------------------------|
 312 *   |   local pg_data_t *    |
 313 *   |------------------------|
 314 *   |  local ia64_node_data  |
 315 *   |------------------------|
 316 *   |          ???           |
 317 *   |________________________|
 318 *
 319 * Once this space has been set aside, the bootmem maps are initialized.  We
 320 * could probably move the allocation of the per-cpu and ia64_node_data space
 321 * outside of this function and use alloc_bootmem_node(), but doing it here
 322 * is straightforward and we get the alignments we want so...
 323 */
 324static int __init find_pernode_space(unsigned long start, unsigned long len,
 325                                     int node)
 326{
 327        unsigned long spfn, epfn;
 328        unsigned long pernodesize = 0, pernode, pages, mapsize;
 329        struct bootmem_data *bdp = &bootmem_node_data[node];
 330
 331        spfn = start >> PAGE_SHIFT;
 332        epfn = (start + len) >> PAGE_SHIFT;
 333
 334        pages = bdp->node_low_pfn - bdp->node_min_pfn;
 335        mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
 336
 337        /*
 338         * Make sure this memory falls within this node's usable memory
 339         * since we may have thrown some away in build_maps().
 340         */
 341        if (spfn < bdp->node_min_pfn || epfn > bdp->node_low_pfn)
 342                return 0;
 343
 344        /* Don't setup this node's local space twice... */
 345        if (mem_data[node].pernode_addr)
 346                return 0;
 347
 348        /*
 349         * Calculate total size needed, incl. what's necessary
 350         * for good alignment and alias prevention.
 351         */
 352        pernodesize = compute_pernodesize(node);
 353        pernode = NODEDATA_ALIGN(start, node);
 354
 355        /* Is this range big enough for what we want to store here? */
 356        if (start + len > (pernode + pernodesize + mapsize))
 357                fill_pernode(node, pernode, pernodesize);
 358
 359        return 0;
 360}
 361
 362/**
 363 * free_node_bootmem - free bootmem allocator memory for use
 364 * @start: physical start of range
 365 * @len: length of range
 366 * @node: node where this range resides
 367 *
 368 * Simply calls the bootmem allocator to free the specified ranged from
 369 * the given pg_data_t's bdata struct.  After this function has been called
 370 * for all the entries in the EFI memory map, the bootmem allocator will
 371 * be ready to service allocation requests.
 372 */
 373static int __init free_node_bootmem(unsigned long start, unsigned long len,
 374                                    int node)
 375{
 376        free_bootmem_node(pgdat_list[node], start, len);
 377
 378        return 0;
 379}
 380
 381/**
 382 * reserve_pernode_space - reserve memory for per-node space
 383 *
 384 * Reserve the space used by the bootmem maps & per-node space in the boot
 385 * allocator so that when we actually create the real mem maps we don't
 386 * use their memory.
 387 */
 388static void __init reserve_pernode_space(void)
 389{
 390        unsigned long base, size, pages;
 391        struct bootmem_data *bdp;
 392        int node;
 393
 394        for_each_online_node(node) {
 395                pg_data_t *pdp = pgdat_list[node];
 396
 397                if (node_isset(node, memory_less_mask))
 398                        continue;
 399
 400                bdp = pdp->bdata;
 401
 402                /* First the bootmem_map itself */
 403                pages = bdp->node_low_pfn - bdp->node_min_pfn;
 404                size = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
 405                base = __pa(bdp->node_bootmem_map);
 406                reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT);
 407
 408                /* Now the per-node space */
 409                size = mem_data[node].pernode_size;
 410                base = __pa(mem_data[node].pernode_addr);
 411                reserve_bootmem_node(pdp, base, size, BOOTMEM_DEFAULT);
 412        }
 413}
 414
 415static void __meminit scatter_node_data(void)
 416{
 417        pg_data_t **dst;
 418        int node;
 419
 420        /*
 421         * for_each_online_node() can't be used at here.
 422         * node_online_map is not set for hot-added nodes at this time,
 423         * because we are halfway through initialization of the new node's
 424         * structures.  If for_each_online_node() is used, a new node's
 425         * pg_data_ptrs will be not initialized. Instead of using it,
 426         * pgdat_list[] is checked.
 427         */
 428        for_each_node(node) {
 429                if (pgdat_list[node]) {
 430                        dst = LOCAL_DATA_ADDR(pgdat_list[node])->pg_data_ptrs;
 431                        memcpy(dst, pgdat_list, sizeof(pgdat_list));
 432                }
 433        }
 434}
 435
 436/**
 437 * initialize_pernode_data - fixup per-cpu & per-node pointers
 438 *
 439 * Each node's per-node area has a copy of the global pg_data_t list, so
 440 * we copy that to each node here, as well as setting the per-cpu pointer
 441 * to the local node data structure.  The active_cpus field of the per-node
 442 * structure gets setup by the platform_cpu_init() function later.
 443 */
 444static void __init initialize_pernode_data(void)
 445{
 446        int cpu, node;
 447
 448        scatter_node_data();
 449
 450#ifdef CONFIG_SMP
 451        /* Set the node_data pointer for each per-cpu struct */
 452        for_each_possible_early_cpu(cpu) {
 453                node = node_cpuid[cpu].nid;
 454                per_cpu(ia64_cpu_info, cpu).node_data =
 455                        mem_data[node].node_data;
 456        }
 457#else
 458        {
 459                struct cpuinfo_ia64 *cpu0_cpu_info;
 460                cpu = 0;
 461                node = node_cpuid[cpu].nid;
 462                cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start +
 463                        ((char *)&ia64_cpu_info - __per_cpu_start));
 464                cpu0_cpu_info->node_data = mem_data[node].node_data;
 465        }
 466#endif /* CONFIG_SMP */
 467}
 468
 469/**
 470 * memory_less_node_alloc - * attempt to allocate memory on the best NUMA slit
 471 *      node but fall back to any other node when __alloc_bootmem_node fails
 472 *      for best.
 473 * @nid: node id
 474 * @pernodesize: size of this node's pernode data
 475 */
 476static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize)
 477{
 478        void *ptr = NULL;
 479        u8 best = 0xff;
 480        int bestnode = -1, node, anynode = 0;
 481
 482        for_each_online_node(node) {
 483                if (node_isset(node, memory_less_mask))
 484                        continue;
 485                else if (node_distance(nid, node) < best) {
 486                        best = node_distance(nid, node);
 487                        bestnode = node;
 488                }
 489                anynode = node;
 490        }
 491
 492        if (bestnode == -1)
 493                bestnode = anynode;
 494
 495        ptr = __alloc_bootmem_node(pgdat_list[bestnode], pernodesize,
 496                PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
 497
 498        return ptr;
 499}
 500
 501/**
 502 * memory_less_nodes - allocate and initialize CPU only nodes pernode
 503 *      information.
 504 */
 505static void __init memory_less_nodes(void)
 506{
 507        unsigned long pernodesize;
 508        void *pernode;
 509        int node;
 510
 511        for_each_node_mask(node, memory_less_mask) {
 512                pernodesize = compute_pernodesize(node);
 513                pernode = memory_less_node_alloc(node, pernodesize);
 514                fill_pernode(node, __pa(pernode), pernodesize);
 515        }
 516
 517        return;
 518}
 519
 520/**
 521 * find_memory - walk the EFI memory map and setup the bootmem allocator
 522 *
 523 * Called early in boot to setup the bootmem allocator, and to
 524 * allocate the per-cpu and per-node structures.
 525 */
 526void __init find_memory(void)
 527{
 528        int node;
 529
 530        reserve_memory();
 531
 532        if (num_online_nodes() == 0) {
 533                printk(KERN_ERR "node info missing!\n");
 534                node_set_online(0);
 535        }
 536
 537        nodes_or(memory_less_mask, memory_less_mask, node_online_map);
 538        min_low_pfn = -1;
 539        max_low_pfn = 0;
 540
 541        /* These actually end up getting called by call_pernode_memory() */
 542        efi_memmap_walk(filter_rsvd_memory, build_node_maps);
 543        efi_memmap_walk(filter_rsvd_memory, find_pernode_space);
 544        efi_memmap_walk(find_max_min_low_pfn, NULL);
 545
 546        for_each_online_node(node)
 547                if (bootmem_node_data[node].node_low_pfn) {
 548                        node_clear(node, memory_less_mask);
 549                        mem_data[node].min_pfn = ~0UL;
 550                }
 551
 552        efi_memmap_walk(filter_memory, register_active_ranges);
 553
 554        /*
 555         * Initialize the boot memory maps in reverse order since that's
 556         * what the bootmem allocator expects
 557         */
 558        for (node = MAX_NUMNODES - 1; node >= 0; node--) {
 559                unsigned long pernode, pernodesize, map;
 560                struct bootmem_data *bdp;
 561
 562                if (!node_online(node))
 563                        continue;
 564                else if (node_isset(node, memory_less_mask))
 565                        continue;
 566
 567                bdp = &bootmem_node_data[node];
 568                pernode = mem_data[node].pernode_addr;
 569                pernodesize = mem_data[node].pernode_size;
 570                map = pernode + pernodesize;
 571
 572                init_bootmem_node(pgdat_list[node],
 573                                  map>>PAGE_SHIFT,
 574                                  bdp->node_min_pfn,
 575                                  bdp->node_low_pfn);
 576        }
 577
 578        efi_memmap_walk(filter_rsvd_memory, free_node_bootmem);
 579
 580        reserve_pernode_space();
 581        memory_less_nodes();
 582        initialize_pernode_data();
 583
 584        max_pfn = max_low_pfn;
 585
 586        find_initrd();
 587}
 588
 589#ifdef CONFIG_SMP
 590/**
 591 * per_cpu_init - setup per-cpu variables
 592 *
 593 * find_pernode_space() does most of this already, we just need to set
 594 * local_per_cpu_offset
 595 */
 596void __cpuinit *per_cpu_init(void)
 597{
 598        int cpu;
 599        static int first_time = 1;
 600
 601        if (first_time) {
 602                first_time = 0;
 603                for_each_possible_early_cpu(cpu)
 604                        per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
 605        }
 606
 607        return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
 608}
 609#endif /* CONFIG_SMP */
 610
 611/**
 612 * show_mem - give short summary of memory stats
 613 *
 614 * Shows a simple page count of reserved and used pages in the system.
 615 * For discontig machines, it does this on a per-pgdat basis.
 616 */
 617void show_mem(void)
 618{
 619        int i, total_reserved = 0;
 620        int total_shared = 0, total_cached = 0;
 621        unsigned long total_present = 0;
 622        pg_data_t *pgdat;
 623
 624        printk(KERN_INFO "Mem-info:\n");
 625        show_free_areas();
 626        printk(KERN_INFO "Node memory in pages:\n");
 627        for_each_online_pgdat(pgdat) {
 628                unsigned long present;
 629                unsigned long flags;
 630                int shared = 0, cached = 0, reserved = 0;
 631
 632                pgdat_resize_lock(pgdat, &flags);
 633                present = pgdat->node_present_pages;
 634                for(i = 0; i < pgdat->node_spanned_pages; i++) {
 635                        struct page *page;
 636                        if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
 637                                touch_nmi_watchdog();
 638                        if (pfn_valid(pgdat->node_start_pfn + i))
 639                                page = pfn_to_page(pgdat->node_start_pfn + i);
 640                        else {
 641                                i = vmemmap_find_next_valid_pfn(pgdat->node_id,
 642                                         i) - 1;
 643                                continue;
 644                        }
 645                        if (PageReserved(page))
 646                                reserved++;
 647                        else if (PageSwapCache(page))
 648                                cached++;
 649                        else if (page_count(page))
 650                                shared += page_count(page)-1;
 651                }
 652                pgdat_resize_unlock(pgdat, &flags);
 653                total_present += present;
 654                total_reserved += reserved;
 655                total_cached += cached;
 656                total_shared += shared;
 657                printk(KERN_INFO "Node %4d:  RAM: %11ld, rsvd: %8d, "
 658                       "shrd: %10d, swpd: %10d\n", pgdat->node_id,
 659                       present, reserved, shared, cached);
 660        }
 661        printk(KERN_INFO "%ld pages of RAM\n", total_present);
 662        printk(KERN_INFO "%d reserved pages\n", total_reserved);
 663        printk(KERN_INFO "%d pages shared\n", total_shared);
 664        printk(KERN_INFO "%d pages swap cached\n", total_cached);
 665        printk(KERN_INFO "Total of %ld pages in page table cache\n",
 666               quicklist_total_size());
 667        printk(KERN_INFO "%d free buffer pages\n", nr_free_buffer_pages());
 668}
 669
 670/**
 671 * call_pernode_memory - use SRAT to call callback functions with node info
 672 * @start: physical start of range
 673 * @len: length of range
 674 * @arg: function to call for each range
 675 *
 676 * efi_memmap_walk() knows nothing about layout of memory across nodes. Find
 677 * out to which node a block of memory belongs.  Ignore memory that we cannot
 678 * identify, and split blocks that run across multiple nodes.
 679 *
 680 * Take this opportunity to round the start address up and the end address
 681 * down to page boundaries.
 682 */
 683void call_pernode_memory(unsigned long start, unsigned long len, void *arg)
 684{
 685        unsigned long rs, re, end = start + len;
 686        void (*func)(unsigned long, unsigned long, int);
 687        int i;
 688
 689        start = PAGE_ALIGN(start);
 690        end &= PAGE_MASK;
 691        if (start >= end)
 692                return;
 693
 694        func = arg;
 695
 696        if (!num_node_memblks) {
 697                /* No SRAT table, so assume one node (node 0) */
 698                if (start < end)
 699                        (*func)(start, end - start, 0);
 700                return;
 701        }
 702
 703        for (i = 0; i < num_node_memblks; i++) {
 704                rs = max(start, node_memblk[i].start_paddr);
 705                re = min(end, node_memblk[i].start_paddr +
 706                         node_memblk[i].size);
 707
 708                if (rs < re)
 709                        (*func)(rs, re - rs, node_memblk[i].nid);
 710
 711                if (re == end)
 712                        break;
 713        }
 714}
 715
 716/**
 717 * count_node_pages - callback to build per-node memory info structures
 718 * @start: physical start of range
 719 * @len: length of range
 720 * @node: node where this range resides
 721 *
 722 * Each node has it's own number of physical pages, DMAable pages, start, and
 723 * end page frame number.  This routine will be called by call_pernode_memory()
 724 * for each piece of usable memory and will setup these values for each node.
 725 * Very similar to build_maps().
 726 */
 727static __init int count_node_pages(unsigned long start, unsigned long len, int node)
 728{
 729        unsigned long end = start + len;
 730
 731        mem_data[node].num_physpages += len >> PAGE_SHIFT;
 732#ifdef CONFIG_ZONE_DMA
 733        if (start <= __pa(MAX_DMA_ADDRESS))
 734                mem_data[node].num_dma_physpages +=
 735                        (min(end, __pa(MAX_DMA_ADDRESS)) - start) >>PAGE_SHIFT;
 736#endif
 737        start = GRANULEROUNDDOWN(start);
 738        end = GRANULEROUNDUP(end);
 739        mem_data[node].max_pfn = max(mem_data[node].max_pfn,
 740                                     end >> PAGE_SHIFT);
 741        mem_data[node].min_pfn = min(mem_data[node].min_pfn,
 742                                     start >> PAGE_SHIFT);
 743
 744        return 0;
 745}
 746
 747/**
 748 * paging_init - setup page tables
 749 *
 750 * paging_init() sets up the page tables for each node of the system and frees
 751 * the bootmem allocator memory for general use.
 752 */
 753void __init paging_init(void)
 754{
 755        unsigned long max_dma;
 756        unsigned long pfn_offset = 0;
 757        unsigned long max_pfn = 0;
 758        int node;
 759        unsigned long max_zone_pfns[MAX_NR_ZONES];
 760
 761        max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
 762
 763        efi_memmap_walk(filter_rsvd_memory, count_node_pages);
 764
 765        sparse_memory_present_with_active_regions(MAX_NUMNODES);
 766        sparse_init();
 767
 768#ifdef CONFIG_VIRTUAL_MEM_MAP
 769        VMALLOC_END -= PAGE_ALIGN(ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) *
 770                sizeof(struct page));
 771        vmem_map = (struct page *) VMALLOC_END;
 772        efi_memmap_walk(create_mem_map_page_table, NULL);
 773        printk("Virtual mem_map starts at 0x%p\n", vmem_map);
 774#endif
 775
 776        for_each_online_node(node) {
 777                num_physpages += mem_data[node].num_physpages;
 778                pfn_offset = mem_data[node].min_pfn;
 779
 780#ifdef CONFIG_VIRTUAL_MEM_MAP
 781                NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset;
 782#endif
 783                if (mem_data[node].max_pfn > max_pfn)
 784                        max_pfn = mem_data[node].max_pfn;
 785        }
 786
 787        memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
 788#ifdef CONFIG_ZONE_DMA
 789        max_zone_pfns[ZONE_DMA] = max_dma;
 790#endif
 791        max_zone_pfns[ZONE_NORMAL] = max_pfn;
 792        free_area_init_nodes(max_zone_pfns);
 793
 794        zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
 795}
 796
 797#ifdef CONFIG_MEMORY_HOTPLUG
 798pg_data_t *arch_alloc_nodedata(int nid)
 799{
 800        unsigned long size = compute_pernodesize(nid);
 801
 802        return kzalloc(size, GFP_KERNEL);
 803}
 804
 805void arch_free_nodedata(pg_data_t *pgdat)
 806{
 807        kfree(pgdat);
 808}
 809
 810void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat)
 811{
 812        pgdat_list[update_node] = update_pgdat;
 813        scatter_node_data();
 814}
 815#endif
 816
 817#ifdef CONFIG_SPARSEMEM_VMEMMAP
 818int __meminit vmemmap_populate(struct page *start_page,
 819                                                unsigned long size, int node)
 820{
 821        return vmemmap_populate_basepages(start_page, size, node);
 822}
 823#endif
 824