linux/mm/sparse-vmemmap.c
<<
>>
Prefs
   1/*
   2 * Virtual Memory Map support
   3 *
   4 * (C) 2007 sgi. Christoph Lameter.
   5 *
   6 * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn,
   7 * virt_to_page, page_address() to be implemented as a base offset
   8 * calculation without memory access.
   9 *
  10 * However, virtual mappings need a page table and TLBs. Many Linux
  11 * architectures already map their physical space using 1-1 mappings
  12 * via TLBs. For those arches the virtual memory map is essentially
  13 * for free if we use the same page size as the 1-1 mappings. In that
  14 * case the overhead consists of a few additional pages that are
  15 * allocated to create a view of memory for vmemmap.
  16 *
  17 * The architecture is expected to provide a vmemmap_populate() function
  18 * to instantiate the mapping.
  19 */
  20#include <linux/mm.h>
  21#include <linux/mmzone.h>
  22#include <linux/bootmem.h>
  23#include <linux/highmem.h>
  24#include <linux/module.h>
  25#include <linux/slab.h>
  26#include <linux/spinlock.h>
  27#include <linux/vmalloc.h>
  28#include <linux/sched.h>
  29#include <asm/dma.h>
  30#include <asm/pgalloc.h>
  31#include <asm/pgtable.h>
  32
  33/*
  34 * Allocate a block of memory to be used to back the virtual memory map
  35 * or to back the page tables that are used to create the mapping.
  36 * Uses the main allocators if they are available, else bootmem.
  37 */
  38
  39static void * __init_refok __earlyonly_bootmem_alloc(int node,
  40                                unsigned long size,
  41                                unsigned long align,
  42                                unsigned long goal)
  43{
  44        return __alloc_bootmem_node_high(NODE_DATA(node), size, align, goal);
  45}
  46
  47static void *vmemmap_buf;
  48static void *vmemmap_buf_end;
  49
  50void * __meminit vmemmap_alloc_block(unsigned long size, int node)
  51{
  52        /* If the main allocator is up use that, fallback to bootmem. */
  53        if (slab_is_available()) {
  54                struct page *page;
  55
  56                if (node_state(node, N_HIGH_MEMORY))
  57                        page = alloc_pages_node(node,
  58                                GFP_KERNEL | __GFP_ZERO, get_order(size));
  59                else
  60                        page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
  61                                get_order(size));
  62                if (page)
  63                        return page_address(page);
  64                return NULL;
  65        } else
  66                return __earlyonly_bootmem_alloc(node, size, size,
  67                                __pa(MAX_DMA_ADDRESS));
  68}
  69
  70/* need to make sure size is all the same during early stage */
  71void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node)
  72{
  73        void *ptr;
  74
  75        if (!vmemmap_buf)
  76                return vmemmap_alloc_block(size, node);
  77
  78        /* take the from buf */
  79        ptr = (void *)ALIGN((unsigned long)vmemmap_buf, size);
  80        if (ptr + size > vmemmap_buf_end)
  81                return vmemmap_alloc_block(size, node);
  82
  83        vmemmap_buf = ptr + size;
  84
  85        return ptr;
  86}
  87
  88void __meminit vmemmap_verify(pte_t *pte, int node,
  89                                unsigned long start, unsigned long end)
  90{
  91        unsigned long pfn = pte_pfn(*pte);
  92        int actual_node = early_pfn_to_nid(pfn);
  93
  94        if (node_distance(actual_node, node) > LOCAL_DISTANCE)
  95                printk(KERN_WARNING "[%lx-%lx] potential offnode "
  96                        "page_structs\n", start, end - 1);
  97}
  98
  99pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node)
 100{
 101        pte_t *pte = pte_offset_kernel(pmd, addr);
 102        if (pte_none(*pte)) {
 103                pte_t entry;
 104                void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node);
 105                if (!p)
 106                        return NULL;
 107                entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
 108                set_pte_at(&init_mm, addr, pte, entry);
 109        }
 110        return pte;
 111}
 112
 113pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
 114{
 115        pmd_t *pmd = pmd_offset(pud, addr);
 116        if (pmd_none(*pmd)) {
 117                void *p = vmemmap_alloc_block(PAGE_SIZE, node);
 118                if (!p)
 119                        return NULL;
 120                pmd_populate_kernel(&init_mm, pmd, p);
 121        }
 122        return pmd;
 123}
 124
 125pud_t * __meminit vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node)
 126{
 127        pud_t *pud = pud_offset(pgd, addr);
 128        if (pud_none(*pud)) {
 129                void *p = vmemmap_alloc_block(PAGE_SIZE, node);
 130                if (!p)
 131                        return NULL;
 132                pud_populate(&init_mm, pud, p);
 133        }
 134        return pud;
 135}
 136
 137pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
 138{
 139        pgd_t *pgd = pgd_offset_k(addr);
 140        if (pgd_none(*pgd)) {
 141                void *p = vmemmap_alloc_block(PAGE_SIZE, node);
 142                if (!p)
 143                        return NULL;
 144                pgd_populate(&init_mm, pgd, p);
 145        }
 146        return pgd;
 147}
 148
 149int __meminit vmemmap_populate_basepages(struct page *start_page,
 150                                                unsigned long size, int node)
 151{
 152        unsigned long addr = (unsigned long)start_page;
 153        unsigned long end = (unsigned long)(start_page + size);
 154        pgd_t *pgd;
 155        pud_t *pud;
 156        pmd_t *pmd;
 157        pte_t *pte;
 158
 159        for (; addr < end; addr += PAGE_SIZE) {
 160                pgd = vmemmap_pgd_populate(addr, node);
 161                if (!pgd)
 162                        return -ENOMEM;
 163                pud = vmemmap_pud_populate(pgd, addr, node);
 164                if (!pud)
 165                        return -ENOMEM;
 166                pmd = vmemmap_pmd_populate(pud, addr, node);
 167                if (!pmd)
 168                        return -ENOMEM;
 169                pte = vmemmap_pte_populate(pmd, addr, node);
 170                if (!pte)
 171                        return -ENOMEM;
 172                vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
 173        }
 174
 175        return 0;
 176}
 177
 178struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid)
 179{
 180        struct page *map = pfn_to_page(pnum * PAGES_PER_SECTION);
 181        int error = vmemmap_populate(map, PAGES_PER_SECTION, nid);
 182        if (error)
 183                return NULL;
 184
 185        return map;
 186}
 187
 188void __init sparse_mem_maps_populate_node(struct page **map_map,
 189                                          unsigned long pnum_begin,
 190                                          unsigned long pnum_end,
 191                                          unsigned long map_count, int nodeid)
 192{
 193        unsigned long pnum;
 194        unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
 195        void *vmemmap_buf_start;
 196
 197        size = ALIGN(size, PMD_SIZE);
 198        vmemmap_buf_start = __earlyonly_bootmem_alloc(nodeid, size * map_count,
 199                         PMD_SIZE, __pa(MAX_DMA_ADDRESS));
 200
 201        if (vmemmap_buf_start) {
 202                vmemmap_buf = vmemmap_buf_start;
 203                vmemmap_buf_end = vmemmap_buf_start + size * map_count;
 204        }
 205
 206        for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
 207                struct mem_section *ms;
 208
 209                if (!present_section_nr(pnum))
 210                        continue;
 211
 212                map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
 213                if (map_map[pnum])
 214                        continue;
 215                ms = __nr_to_section(pnum);
 216                printk(KERN_ERR "%s: sparsemem memory map backing failed "
 217                        "some memory will not be available.\n", __func__);
 218                ms->section_mem_map = 0;
 219        }
 220
 221        if (vmemmap_buf_start) {
 222                /* need to free left buf */
 223                free_bootmem(__pa(vmemmap_buf), vmemmap_buf_end - vmemmap_buf);
 224                vmemmap_buf = NULL;
 225                vmemmap_buf_end = NULL;
 226        }
 227}
 228