linux/mm/sparse.c
<<
>>
Prefs
   1/*
   2 * sparse memory mappings.
   3 */
   4#include <linux/mm.h>
   5#include <linux/mmzone.h>
   6#include <linux/bootmem.h>
   7#include <linux/highmem.h>
   8#include <linux/module.h>
   9#include <linux/spinlock.h>
  10#include <linux/vmalloc.h>
  11#include <asm/dma.h>
  12#include <asm/pgalloc.h>
  13#include <asm/pgtable.h>
  14
  15/*
  16 * Permanent SPARSEMEM data:
  17 *
  18 * 1) mem_section       - memory sections, mem_map's for valid memory
  19 */
  20#ifdef CONFIG_SPARSEMEM_EXTREME
  21struct mem_section *mem_section[NR_SECTION_ROOTS]
  22        ____cacheline_internodealigned_in_smp;
  23#else
  24struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
  25        ____cacheline_internodealigned_in_smp;
  26#endif
  27EXPORT_SYMBOL(mem_section);
  28
  29#ifdef NODE_NOT_IN_PAGE_FLAGS
  30/*
  31 * If we did not store the node number in the page then we have to
  32 * do a lookup in the section_to_node_table in order to find which
  33 * node the page belongs to.
  34 */
  35#if MAX_NUMNODES <= 256
  36static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
  37#else
  38static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
  39#endif
  40
  41int page_to_nid(struct page *page)
  42{
  43        return section_to_node_table[page_to_section(page)];
  44}
  45EXPORT_SYMBOL(page_to_nid);
  46
  47static void set_section_nid(unsigned long section_nr, int nid)
  48{
  49        section_to_node_table[section_nr] = nid;
  50}
  51#else /* !NODE_NOT_IN_PAGE_FLAGS */
  52static inline void set_section_nid(unsigned long section_nr, int nid)
  53{
  54}
  55#endif
  56
  57#ifdef CONFIG_SPARSEMEM_EXTREME
  58static struct mem_section noinline __init_refok *sparse_index_alloc(int nid)
  59{
  60        struct mem_section *section = NULL;
  61        unsigned long array_size = SECTIONS_PER_ROOT *
  62                                   sizeof(struct mem_section);
  63
  64        if (slab_is_available())
  65                section = kmalloc_node(array_size, GFP_KERNEL, nid);
  66        else
  67                section = alloc_bootmem_node(NODE_DATA(nid), array_size);
  68
  69        if (section)
  70                memset(section, 0, array_size);
  71
  72        return section;
  73}
  74
  75static int __meminit sparse_index_init(unsigned long section_nr, int nid)
  76{
  77        static DEFINE_SPINLOCK(index_init_lock);
  78        unsigned long root = SECTION_NR_TO_ROOT(section_nr);
  79        struct mem_section *section;
  80        int ret = 0;
  81
  82        if (mem_section[root])
  83                return -EEXIST;
  84
  85        section = sparse_index_alloc(nid);
  86        if (!section)
  87                return -ENOMEM;
  88        /*
  89         * This lock keeps two different sections from
  90         * reallocating for the same index
  91         */
  92        spin_lock(&index_init_lock);
  93
  94        if (mem_section[root]) {
  95                ret = -EEXIST;
  96                goto out;
  97        }
  98
  99        mem_section[root] = section;
 100out:
 101        spin_unlock(&index_init_lock);
 102        return ret;
 103}
 104#else /* !SPARSEMEM_EXTREME */
 105static inline int sparse_index_init(unsigned long section_nr, int nid)
 106{
 107        return 0;
 108}
 109#endif
 110
 111/*
 112 * Although written for the SPARSEMEM_EXTREME case, this happens
 113 * to also work for the flat array case because
 114 * NR_SECTION_ROOTS==NR_MEM_SECTIONS.
 115 */
 116int __section_nr(struct mem_section* ms)
 117{
 118        unsigned long root_nr;
 119        struct mem_section* root;
 120
 121        for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
 122                root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
 123                if (!root)
 124                        continue;
 125
 126                if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT)))
 127                     break;
 128        }
 129
 130        return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
 131}
 132
 133/*
 134 * During early boot, before section_mem_map is used for an actual
 135 * mem_map, we use section_mem_map to store the section's NUMA
 136 * node.  This keeps us from having to use another data structure.  The
 137 * node information is cleared just before we store the real mem_map.
 138 */
 139static inline unsigned long sparse_encode_early_nid(int nid)
 140{
 141        return (nid << SECTION_NID_SHIFT);
 142}
 143
 144static inline int sparse_early_nid(struct mem_section *section)
 145{
 146        return (section->section_mem_map >> SECTION_NID_SHIFT);
 147}
 148
 149/* Record a memory area against a node. */
 150void __init memory_present(int nid, unsigned long start, unsigned long end)
 151{
 152        unsigned long pfn;
 153
 154        start &= PAGE_SECTION_MASK;
 155        for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
 156                unsigned long section = pfn_to_section_nr(pfn);
 157                struct mem_section *ms;
 158
 159                sparse_index_init(section, nid);
 160                set_section_nid(section, nid);
 161
 162                ms = __nr_to_section(section);
 163                if (!ms->section_mem_map)
 164                        ms->section_mem_map = sparse_encode_early_nid(nid) |
 165                                                        SECTION_MARKED_PRESENT;
 166        }
 167}
 168
 169/*
 170 * Only used by the i386 NUMA architecures, but relatively
 171 * generic code.
 172 */
 173unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn,
 174                                                     unsigned long end_pfn)
 175{
 176        unsigned long pfn;
 177        unsigned long nr_pages = 0;
 178
 179        for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
 180                if (nid != early_pfn_to_nid(pfn))
 181                        continue;
 182
 183                if (pfn_present(pfn))
 184                        nr_pages += PAGES_PER_SECTION;
 185        }
 186
 187        return nr_pages * sizeof(struct page);
 188}
 189
 190/*
 191 * Subtle, we encode the real pfn into the mem_map such that
 192 * the identity pfn - section_mem_map will return the actual
 193 * physical page frame number.
 194 */
 195static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum)
 196{
 197        return (unsigned long)(mem_map - (section_nr_to_pfn(pnum)));
 198}
 199
 200/*
 201 * We need this if we ever free the mem_maps.  While not implemented yet,
 202 * this function is included for parity with its sibling.
 203 */
 204static __attribute((unused))
 205struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
 206{
 207        return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
 208}
 209
 210static int __meminit sparse_init_one_section(struct mem_section *ms,
 211                unsigned long pnum, struct page *mem_map,
 212                unsigned long *pageblock_bitmap)
 213{
 214        if (!present_section(ms))
 215                return -EINVAL;
 216
 217        ms->section_mem_map &= ~SECTION_MAP_MASK;
 218        ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) |
 219                                                        SECTION_HAS_MEM_MAP;
 220        ms->pageblock_flags = pageblock_bitmap;
 221
 222        return 1;
 223}
 224
 225static unsigned long usemap_size(void)
 226{
 227        unsigned long size_bytes;
 228        size_bytes = roundup(SECTION_BLOCKFLAGS_BITS, 8) / 8;
 229        size_bytes = roundup(size_bytes, sizeof(unsigned long));
 230        return size_bytes;
 231}
 232
 233#ifdef CONFIG_MEMORY_HOTPLUG
 234static unsigned long *__kmalloc_section_usemap(void)
 235{
 236        return kmalloc(usemap_size(), GFP_KERNEL);
 237}
 238#endif /* CONFIG_MEMORY_HOTPLUG */
 239
 240static unsigned long *sparse_early_usemap_alloc(unsigned long pnum)
 241{
 242        unsigned long *usemap;
 243        struct mem_section *ms = __nr_to_section(pnum);
 244        int nid = sparse_early_nid(ms);
 245
 246        usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size());
 247        if (usemap)
 248                return usemap;
 249
 250        /* Stupid: suppress gcc warning for SPARSEMEM && !NUMA */
 251        nid = 0;
 252
 253        printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__);
 254        return NULL;
 255}
 256
 257#ifndef CONFIG_SPARSEMEM_VMEMMAP
 258struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid)
 259{
 260        struct page *map;
 261
 262        map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION);
 263        if (map)
 264                return map;
 265
 266        map = alloc_bootmem_node(NODE_DATA(nid),
 267                        sizeof(struct page) * PAGES_PER_SECTION);
 268        return map;
 269}
 270#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
 271
 272struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
 273{
 274        struct page *map;
 275        struct mem_section *ms = __nr_to_section(pnum);
 276        int nid = sparse_early_nid(ms);
 277
 278        map = sparse_mem_map_populate(pnum, nid);
 279        if (map)
 280                return map;
 281
 282        printk(KERN_ERR "%s: sparsemem memory map backing failed "
 283                        "some memory will not be available.\n", __FUNCTION__);
 284        ms->section_mem_map = 0;
 285        return NULL;
 286}
 287
 288/*
 289 * Allocate the accumulated non-linear sections, allocate a mem_map
 290 * for each and record the physical to section mapping.
 291 */
 292void __init sparse_init(void)
 293{
 294        unsigned long pnum;
 295        struct page *map;
 296        unsigned long *usemap;
 297
 298        for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
 299                if (!present_section_nr(pnum))
 300                        continue;
 301
 302                map = sparse_early_mem_map_alloc(pnum);
 303                if (!map)
 304                        continue;
 305
 306                usemap = sparse_early_usemap_alloc(pnum);
 307                if (!usemap)
 308                        continue;
 309
 310                sparse_init_one_section(__nr_to_section(pnum), pnum, map,
 311                                                                usemap);
 312        }
 313}
 314
 315#ifdef CONFIG_MEMORY_HOTPLUG
 316#ifdef CONFIG_SPARSEMEM_VMEMMAP
 317static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
 318                                                 unsigned long nr_pages)
 319{
 320        /* This will make the necessary allocations eventually. */
 321        return sparse_mem_map_populate(pnum, nid);
 322}
 323static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
 324{
 325        return; /* XXX: Not implemented yet */
 326}
 327#else
 328static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
 329{
 330        struct page *page, *ret;
 331        unsigned long memmap_size = sizeof(struct page) * nr_pages;
 332
 333        page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
 334        if (page)
 335                goto got_map_page;
 336
 337        ret = vmalloc(memmap_size);
 338        if (ret)
 339                goto got_map_ptr;
 340
 341        return NULL;
 342got_map_page:
 343        ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
 344got_map_ptr:
 345        memset(ret, 0, memmap_size);
 346
 347        return ret;
 348}
 349
 350static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
 351                                                  unsigned long nr_pages)
 352{
 353        return __kmalloc_section_memmap(nr_pages);
 354}
 355
 356static int vaddr_in_vmalloc_area(void *addr)
 357{
 358        if (addr >= (void *)VMALLOC_START &&
 359            addr < (void *)VMALLOC_END)
 360                return 1;
 361        return 0;
 362}
 363
 364static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
 365{
 366        if (vaddr_in_vmalloc_area(memmap))
 367                vfree(memmap);
 368        else
 369                free_pages((unsigned long)memmap,
 370                           get_order(sizeof(struct page) * nr_pages));
 371}
 372#endif /* CONFIG_SPARSEMEM_VMEMMAP */
 373
 374/*
 375 * returns the number of sections whose mem_maps were properly
 376 * set.  If this is <=0, then that means that the passed-in
 377 * map was not consumed and must be freed.
 378 */
 379int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
 380                           int nr_pages)
 381{
 382        unsigned long section_nr = pfn_to_section_nr(start_pfn);
 383        struct pglist_data *pgdat = zone->zone_pgdat;
 384        struct mem_section *ms;
 385        struct page *memmap;
 386        unsigned long *usemap;
 387        unsigned long flags;
 388        int ret;
 389
 390        /*
 391         * no locking for this, because it does its own
 392         * plus, it does a kmalloc
 393         */
 394        ret = sparse_index_init(section_nr, pgdat->node_id);
 395        if (ret < 0 && ret != -EEXIST)
 396                return ret;
 397        memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, nr_pages);
 398        if (!memmap)
 399                return -ENOMEM;
 400        usemap = __kmalloc_section_usemap();
 401        if (!usemap) {
 402                __kfree_section_memmap(memmap, nr_pages);
 403                return -ENOMEM;
 404        }
 405
 406        pgdat_resize_lock(pgdat, &flags);
 407
 408        ms = __pfn_to_section(start_pfn);
 409        if (ms->section_mem_map & SECTION_MARKED_PRESENT) {
 410                ret = -EEXIST;
 411                goto out;
 412        }
 413
 414        ms->section_mem_map |= SECTION_MARKED_PRESENT;
 415
 416        ret = sparse_init_one_section(ms, section_nr, memmap, usemap);
 417
 418out:
 419        pgdat_resize_unlock(pgdat, &flags);
 420        if (ret <= 0) {
 421                kfree(usemap);
 422                __kfree_section_memmap(memmap, nr_pages);
 423        }
 424        return ret;
 425}
 426#endif
 427