linux/arch/x86/mm/init.c
<<
>>
Prefs
   1#include <linux/gfp.h>
   2#include <linux/initrd.h>
   3#include <linux/ioport.h>
   4#include <linux/swap.h>
   5#include <linux/memblock.h>
   6
   7#include <asm/cacheflush.h>
   8#include <asm/e820.h>
   9#include <asm/init.h>
  10#include <asm/page.h>
  11#include <asm/page_types.h>
  12#include <asm/sections.h>
  13#include <asm/setup.h>
  14#include <asm/system.h>
  15#include <asm/tlbflush.h>
  16#include <asm/tlb.h>
  17#include <asm/proto.h>
  18
  19unsigned long __initdata pgt_buf_start;
  20unsigned long __meminitdata pgt_buf_end;
  21unsigned long __meminitdata pgt_buf_top;
  22
  23int after_bootmem;
  24
  25int direct_gbpages
  26#ifdef CONFIG_DIRECT_GBPAGES
  27                                = 1
  28#endif
  29;
  30
  31static void __init find_early_table_space(unsigned long end, int use_pse,
  32                                          int use_gbpages)
  33{
  34        unsigned long puds, pmds, ptes, tables, start = 0, good_end = end;
  35        phys_addr_t base;
  36
  37        puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
  38        tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
  39
  40        if (use_gbpages) {
  41                unsigned long extra;
  42
  43                extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
  44                pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
  45        } else
  46                pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
  47
  48        tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
  49
  50        if (use_pse) {
  51                unsigned long extra;
  52
  53                extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
  54#ifdef CONFIG_X86_32
  55                extra += PMD_SIZE;
  56#endif
  57                ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
  58        } else
  59                ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
  60
  61        tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
  62
  63#ifdef CONFIG_X86_32
  64        /* for fixmap */
  65        tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
  66#endif
  67        good_end = max_pfn_mapped << PAGE_SHIFT;
  68
  69        base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE);
  70        if (base == MEMBLOCK_ERROR)
  71                panic("Cannot find space for the kernel page tables");
  72
  73        pgt_buf_start = base >> PAGE_SHIFT;
  74        pgt_buf_end = pgt_buf_start;
  75        pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT);
  76
  77        printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
  78                end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT);
  79}
  80
  81void __init native_pagetable_reserve(u64 start, u64 end)
  82{
  83        memblock_x86_reserve_range(start, end, "PGTABLE");
  84}
  85
  86struct map_range {
  87        unsigned long start;
  88        unsigned long end;
  89        unsigned page_size_mask;
  90};
  91
  92#ifdef CONFIG_X86_32
  93#define NR_RANGE_MR 3
  94#else /* CONFIG_X86_64 */
  95#define NR_RANGE_MR 5
  96#endif
  97
  98static int __meminit save_mr(struct map_range *mr, int nr_range,
  99                             unsigned long start_pfn, unsigned long end_pfn,
 100                             unsigned long page_size_mask)
 101{
 102        if (start_pfn < end_pfn) {
 103                if (nr_range >= NR_RANGE_MR)
 104                        panic("run out of range for init_memory_mapping\n");
 105                mr[nr_range].start = start_pfn<<PAGE_SHIFT;
 106                mr[nr_range].end   = end_pfn<<PAGE_SHIFT;
 107                mr[nr_range].page_size_mask = page_size_mask;
 108                nr_range++;
 109        }
 110
 111        return nr_range;
 112}
 113
 114/*
 115 * Setup the direct mapping of the physical memory at PAGE_OFFSET.
 116 * This runs before bootmem is initialized and gets pages directly from
 117 * the physical memory. To access them they are temporarily mapped.
 118 */
 119unsigned long __init_refok init_memory_mapping(unsigned long start,
 120                                               unsigned long end)
 121{
 122        unsigned long page_size_mask = 0;
 123        unsigned long start_pfn, end_pfn;
 124        unsigned long ret = 0;
 125        unsigned long pos;
 126
 127        struct map_range mr[NR_RANGE_MR];
 128        int nr_range, i;
 129        int use_pse, use_gbpages;
 130
 131        printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
 132
 133#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
 134        /*
 135         * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
 136         * This will simplify cpa(), which otherwise needs to support splitting
 137         * large pages into small in interrupt context, etc.
 138         */
 139        use_pse = use_gbpages = 0;
 140#else
 141        use_pse = cpu_has_pse;
 142        use_gbpages = direct_gbpages;
 143#endif
 144
 145        /* Enable PSE if available */
 146        if (cpu_has_pse)
 147                set_in_cr4(X86_CR4_PSE);
 148
 149        /* Enable PGE if available */
 150        if (cpu_has_pge) {
 151                set_in_cr4(X86_CR4_PGE);
 152                __supported_pte_mask |= _PAGE_GLOBAL;
 153        }
 154
 155        if (use_gbpages)
 156                page_size_mask |= 1 << PG_LEVEL_1G;
 157        if (use_pse)
 158                page_size_mask |= 1 << PG_LEVEL_2M;
 159
 160        memset(mr, 0, sizeof(mr));
 161        nr_range = 0;
 162
 163        /* head if not big page alignment ? */
 164        start_pfn = start >> PAGE_SHIFT;
 165        pos = start_pfn << PAGE_SHIFT;
 166#ifdef CONFIG_X86_32
 167        /*
 168         * Don't use a large page for the first 2/4MB of memory
 169         * because there are often fixed size MTRRs in there
 170         * and overlapping MTRRs into large pages can cause
 171         * slowdowns.
 172         */
 173        if (pos == 0)
 174                end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT);
 175        else
 176                end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
 177                                 << (PMD_SHIFT - PAGE_SHIFT);
 178#else /* CONFIG_X86_64 */
 179        end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT)
 180                        << (PMD_SHIFT - PAGE_SHIFT);
 181#endif
 182        if (end_pfn > (end >> PAGE_SHIFT))
 183                end_pfn = end >> PAGE_SHIFT;
 184        if (start_pfn < end_pfn) {
 185                nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
 186                pos = end_pfn << PAGE_SHIFT;
 187        }
 188
 189        /* big page (2M) range */
 190        start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
 191                         << (PMD_SHIFT - PAGE_SHIFT);
 192#ifdef CONFIG_X86_32
 193        end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
 194#else /* CONFIG_X86_64 */
 195        end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
 196                         << (PUD_SHIFT - PAGE_SHIFT);
 197        if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)))
 198                end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT));
 199#endif
 200
 201        if (start_pfn < end_pfn) {
 202                nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
 203                                page_size_mask & (1<<PG_LEVEL_2M));
 204                pos = end_pfn << PAGE_SHIFT;
 205        }
 206
 207#ifdef CONFIG_X86_64
 208        /* big page (1G) range */
 209        start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
 210                         << (PUD_SHIFT - PAGE_SHIFT);
 211        end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
 212        if (start_pfn < end_pfn) {
 213                nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
 214                                page_size_mask &
 215                                 ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
 216                pos = end_pfn << PAGE_SHIFT;
 217        }
 218
 219        /* tail is not big page (1G) alignment */
 220        start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
 221                         << (PMD_SHIFT - PAGE_SHIFT);
 222        end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
 223        if (start_pfn < end_pfn) {
 224                nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
 225                                page_size_mask & (1<<PG_LEVEL_2M));
 226                pos = end_pfn << PAGE_SHIFT;
 227        }
 228#endif
 229
 230        /* tail is not big page (2M) alignment */
 231        start_pfn = pos>>PAGE_SHIFT;
 232        end_pfn = end>>PAGE_SHIFT;
 233        nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
 234
 235        /* try to merge same page size and continuous */
 236        for (i = 0; nr_range > 1 && i < nr_range - 1; i++) {
 237                unsigned long old_start;
 238                if (mr[i].end != mr[i+1].start ||
 239                    mr[i].page_size_mask != mr[i+1].page_size_mask)
 240                        continue;
 241                /* move it */
 242                old_start = mr[i].start;
 243                memmove(&mr[i], &mr[i+1],
 244                        (nr_range - 1 - i) * sizeof(struct map_range));
 245                mr[i--].start = old_start;
 246                nr_range--;
 247        }
 248
 249        for (i = 0; i < nr_range; i++)
 250                printk(KERN_DEBUG " %010lx - %010lx page %s\n",
 251                                mr[i].start, mr[i].end,
 252                        (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
 253                         (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
 254
 255        /*
 256         * Find space for the kernel direct mapping tables.
 257         *
 258         * Later we should allocate these tables in the local node of the
 259         * memory mapped. Unfortunately this is done currently before the
 260         * nodes are discovered.
 261         */
 262        if (!after_bootmem)
 263                find_early_table_space(end, use_pse, use_gbpages);
 264
 265        for (i = 0; i < nr_range; i++)
 266                ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
 267                                                   mr[i].page_size_mask);
 268
 269#ifdef CONFIG_X86_32
 270        early_ioremap_page_table_range_init();
 271
 272        load_cr3(swapper_pg_dir);
 273#endif
 274
 275        __flush_tlb_all();
 276
 277        /*
 278         * Reserve the kernel pagetable pages we used (pgt_buf_start -
 279         * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top)
 280         * so that they can be reused for other purposes.
 281         *
 282         * On native it just means calling memblock_x86_reserve_range, on Xen it
 283         * also means marking RW the pagetable pages that we allocated before
 284         * but that haven't been used.
 285         *
 286         * In fact on xen we mark RO the whole range pgt_buf_start -
 287         * pgt_buf_top, because we have to make sure that when
 288         * init_memory_mapping reaches the pagetable pages area, it maps
 289         * RO all the pagetable pages, including the ones that are beyond
 290         * pgt_buf_end at that time.
 291         */
 292        if (!after_bootmem && pgt_buf_end > pgt_buf_start)
 293                x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start),
 294                                PFN_PHYS(pgt_buf_end));
 295
 296        if (!after_bootmem)
 297                early_memtest(start, end);
 298
 299        return ret >> PAGE_SHIFT;
 300}
 301
 302
 303/*
 304 * devmem_is_allowed() checks to see if /dev/mem access to a certain address
 305 * is valid. The argument is a physical page number.
 306 *
 307 *
 308 * On x86, access has to be given to the first megabyte of ram because that area
 309 * contains bios code and data regions used by X and dosemu and similar apps.
 310 * Access has to be given to non-kernel-ram areas as well, these contain the PCI
 311 * mmio resources as well as potential bios/acpi data regions.
 312 */
 313int devmem_is_allowed(unsigned long pagenr)
 314{
 315        if (pagenr <= 256)
 316                return 1;
 317        if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
 318                return 0;
 319        if (!page_is_ram(pagenr))
 320                return 1;
 321        return 0;
 322}
 323
 324void free_init_pages(char *what, unsigned long begin, unsigned long end)
 325{
 326        unsigned long addr;
 327        unsigned long begin_aligned, end_aligned;
 328
 329        /* Make sure boundaries are page aligned */
 330        begin_aligned = PAGE_ALIGN(begin);
 331        end_aligned   = end & PAGE_MASK;
 332
 333        if (WARN_ON(begin_aligned != begin || end_aligned != end)) {
 334                begin = begin_aligned;
 335                end   = end_aligned;
 336        }
 337
 338        if (begin >= end)
 339                return;
 340
 341        addr = begin;
 342
 343        /*
 344         * If debugging page accesses then do not free this memory but
 345         * mark them not present - any buggy init-section access will
 346         * create a kernel page fault:
 347         */
 348#ifdef CONFIG_DEBUG_PAGEALLOC
 349        printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n",
 350                begin, end);
 351        set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
 352#else
 353        /*
 354         * We just marked the kernel text read only above, now that
 355         * we are going to free part of that, we need to make that
 356         * writeable and non-executable first.
 357         */
 358        set_memory_nx(begin, (end - begin) >> PAGE_SHIFT);
 359        set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
 360
 361        printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
 362
 363        for (; addr < end; addr += PAGE_SIZE) {
 364                ClearPageReserved(virt_to_page(addr));
 365                init_page_count(virt_to_page(addr));
 366                memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
 367                free_page(addr);
 368                totalram_pages++;
 369        }
 370#endif
 371}
 372
 373void free_initmem(void)
 374{
 375        free_init_pages("unused kernel memory",
 376                        (unsigned long)(&__init_begin),
 377                        (unsigned long)(&__init_end));
 378}
 379
 380#ifdef CONFIG_BLK_DEV_INITRD
 381void free_initrd_mem(unsigned long start, unsigned long end)
 382{
 383        /*
 384         * end could be not aligned, and We can not align that,
 385         * decompresser could be confused by aligned initrd_end
 386         * We already reserve the end partial page before in
 387         *   - i386_start_kernel()
 388         *   - x86_64_start_kernel()
 389         *   - relocate_initrd()
 390         * So here We can do PAGE_ALIGN() safely to get partial page to be freed
 391         */
 392        free_init_pages("initrd memory", start, PAGE_ALIGN(end));
 393}
 394#endif
 395