linux/arch/x86/mm/init_32.c
<<
>>
Prefs
   1/*
   2 *
   3 *  Copyright (C) 1995  Linus Torvalds
   4 *
   5 *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
   6 */
   7
   8#include <linux/signal.h>
   9#include <linux/sched.h>
  10#include <linux/kernel.h>
  11#include <linux/errno.h>
  12#include <linux/string.h>
  13#include <linux/types.h>
  14#include <linux/ptrace.h>
  15#include <linux/mman.h>
  16#include <linux/mm.h>
  17#include <linux/hugetlb.h>
  18#include <linux/swap.h>
  19#include <linux/smp.h>
  20#include <linux/init.h>
  21#include <linux/highmem.h>
  22#include <linux/pagemap.h>
  23#include <linux/pci.h>
  24#include <linux/pfn.h>
  25#include <linux/poison.h>
  26#include <linux/bootmem.h>
  27#include <linux/memblock.h>
  28#include <linux/proc_fs.h>
  29#include <linux/memory_hotplug.h>
  30#include <linux/initrd.h>
  31#include <linux/cpumask.h>
  32#include <linux/gfp.h>
  33
  34#include <asm/asm.h>
  35#include <asm/bios_ebda.h>
  36#include <asm/processor.h>
  37#include <linux/uaccess.h>
  38#include <asm/pgtable.h>
  39#include <asm/dma.h>
  40#include <asm/fixmap.h>
  41#include <asm/e820/api.h>
  42#include <asm/apic.h>
  43#include <asm/bugs.h>
  44#include <asm/tlb.h>
  45#include <asm/tlbflush.h>
  46#include <asm/olpc_ofw.h>
  47#include <asm/pgalloc.h>
  48#include <asm/sections.h>
  49#include <asm/paravirt.h>
  50#include <asm/setup.h>
  51#include <asm/set_memory.h>
  52#include <asm/page_types.h>
  53#include <asm/cpu_entry_area.h>
  54#include <asm/init.h>
  55
  56#include "mm_internal.h"
  57
  58unsigned long highstart_pfn, highend_pfn;
  59
  60bool __read_mostly __vmalloc_start_set = false;
  61
  62/*
  63 * Creates a middle page table and puts a pointer to it in the
  64 * given global directory entry. This only returns the gd entry
  65 * in non-PAE compilation mode, since the middle layer is folded.
  66 */
  67static pmd_t * __init one_md_table_init(pgd_t *pgd)
  68{
  69        p4d_t *p4d;
  70        pud_t *pud;
  71        pmd_t *pmd_table;
  72
  73#ifdef CONFIG_X86_PAE
  74        if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
  75                pmd_table = (pmd_t *)alloc_low_page();
  76                paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
  77                set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
  78                p4d = p4d_offset(pgd, 0);
  79                pud = pud_offset(p4d, 0);
  80                BUG_ON(pmd_table != pmd_offset(pud, 0));
  81
  82                return pmd_table;
  83        }
  84#endif
  85        p4d = p4d_offset(pgd, 0);
  86        pud = pud_offset(p4d, 0);
  87        pmd_table = pmd_offset(pud, 0);
  88
  89        return pmd_table;
  90}
  91
  92/*
  93 * Create a page table and place a pointer to it in a middle page
  94 * directory entry:
  95 */
  96static pte_t * __init one_page_table_init(pmd_t *pmd)
  97{
  98        if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
  99                pte_t *page_table = (pte_t *)alloc_low_page();
 100
 101                paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
 102                set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
 103                BUG_ON(page_table != pte_offset_kernel(pmd, 0));
 104        }
 105
 106        return pte_offset_kernel(pmd, 0);
 107}
 108
 109pmd_t * __init populate_extra_pmd(unsigned long vaddr)
 110{
 111        int pgd_idx = pgd_index(vaddr);
 112        int pmd_idx = pmd_index(vaddr);
 113
 114        return one_md_table_init(swapper_pg_dir + pgd_idx) + pmd_idx;
 115}
 116
 117pte_t * __init populate_extra_pte(unsigned long vaddr)
 118{
 119        int pte_idx = pte_index(vaddr);
 120        pmd_t *pmd;
 121
 122        pmd = populate_extra_pmd(vaddr);
 123        return one_page_table_init(pmd) + pte_idx;
 124}
 125
 126static unsigned long __init
 127page_table_range_init_count(unsigned long start, unsigned long end)
 128{
 129        unsigned long count = 0;
 130#ifdef CONFIG_HIGHMEM
 131        int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT;
 132        int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT;
 133        int pgd_idx, pmd_idx;
 134        unsigned long vaddr;
 135
 136        if (pmd_idx_kmap_begin == pmd_idx_kmap_end)
 137                return 0;
 138
 139        vaddr = start;
 140        pgd_idx = pgd_index(vaddr);
 141        pmd_idx = pmd_index(vaddr);
 142
 143        for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd_idx++) {
 144                for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
 145                                                        pmd_idx++) {
 146                        if ((vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin &&
 147                            (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end)
 148                                count++;
 149                        vaddr += PMD_SIZE;
 150                }
 151                pmd_idx = 0;
 152        }
 153#endif
 154        return count;
 155}
 156
 157static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
 158                                           unsigned long vaddr, pte_t *lastpte,
 159                                           void **adr)
 160{
 161#ifdef CONFIG_HIGHMEM
 162        /*
 163         * Something (early fixmap) may already have put a pte
 164         * page here, which causes the page table allocation
 165         * to become nonlinear. Attempt to fix it, and if it
 166         * is still nonlinear then we have to bug.
 167         */
 168        int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT;
 169        int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT;
 170
 171        if (pmd_idx_kmap_begin != pmd_idx_kmap_end
 172            && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
 173            && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end) {
 174                pte_t *newpte;
 175                int i;
 176
 177                BUG_ON(after_bootmem);
 178                newpte = *adr;
 179                for (i = 0; i < PTRS_PER_PTE; i++)
 180                        set_pte(newpte + i, pte[i]);
 181                *adr = (void *)(((unsigned long)(*adr)) + PAGE_SIZE);
 182
 183                paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT);
 184                set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE));
 185                BUG_ON(newpte != pte_offset_kernel(pmd, 0));
 186                __flush_tlb_all();
 187
 188                paravirt_release_pte(__pa(pte) >> PAGE_SHIFT);
 189                pte = newpte;
 190        }
 191        BUG_ON(vaddr < fix_to_virt(FIX_KMAP_BEGIN - 1)
 192               && vaddr > fix_to_virt(FIX_KMAP_END)
 193               && lastpte && lastpte + PTRS_PER_PTE != pte);
 194#endif
 195        return pte;
 196}
 197
 198/*
 199 * This function initializes a certain range of kernel virtual memory
 200 * with new bootmem page tables, everywhere page tables are missing in
 201 * the given range.
 202 *
 203 * NOTE: The pagetables are allocated contiguous on the physical space
 204 * so we can cache the place of the first one and move around without
 205 * checking the pgd every time.
 206 */
 207static void __init
 208page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
 209{
 210        int pgd_idx, pmd_idx;
 211        unsigned long vaddr;
 212        pgd_t *pgd;
 213        pmd_t *pmd;
 214        pte_t *pte = NULL;
 215        unsigned long count = page_table_range_init_count(start, end);
 216        void *adr = NULL;
 217
 218        if (count)
 219                adr = alloc_low_pages(count);
 220
 221        vaddr = start;
 222        pgd_idx = pgd_index(vaddr);
 223        pmd_idx = pmd_index(vaddr);
 224        pgd = pgd_base + pgd_idx;
 225
 226        for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
 227                pmd = one_md_table_init(pgd);
 228                pmd = pmd + pmd_index(vaddr);
 229                for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
 230                                                        pmd++, pmd_idx++) {
 231                        pte = page_table_kmap_check(one_page_table_init(pmd),
 232                                                    pmd, vaddr, pte, &adr);
 233
 234                        vaddr += PMD_SIZE;
 235                }
 236                pmd_idx = 0;
 237        }
 238}
 239
 240static inline int is_kernel_text(unsigned long addr)
 241{
 242        if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end)
 243                return 1;
 244        return 0;
 245}
 246
 247/*
 248 * This maps the physical memory to kernel virtual address space, a total
 249 * of max_low_pfn pages, by creating page tables starting from address
 250 * PAGE_OFFSET:
 251 */
 252unsigned long __init
 253kernel_physical_mapping_init(unsigned long start,
 254                             unsigned long end,
 255                             unsigned long page_size_mask)
 256{
 257        int use_pse = page_size_mask == (1<<PG_LEVEL_2M);
 258        unsigned long last_map_addr = end;
 259        unsigned long start_pfn, end_pfn;
 260        pgd_t *pgd_base = swapper_pg_dir;
 261        int pgd_idx, pmd_idx, pte_ofs;
 262        unsigned long pfn;
 263        pgd_t *pgd;
 264        pmd_t *pmd;
 265        pte_t *pte;
 266        unsigned pages_2m, pages_4k;
 267        int mapping_iter;
 268
 269        start_pfn = start >> PAGE_SHIFT;
 270        end_pfn = end >> PAGE_SHIFT;
 271
 272        /*
 273         * First iteration will setup identity mapping using large/small pages
 274         * based on use_pse, with other attributes same as set by
 275         * the early code in head_32.S
 276         *
 277         * Second iteration will setup the appropriate attributes (NX, GLOBAL..)
 278         * as desired for the kernel identity mapping.
 279         *
 280         * This two pass mechanism conforms to the TLB app note which says:
 281         *
 282         *     "Software should not write to a paging-structure entry in a way
 283         *      that would change, for any linear address, both the page size
 284         *      and either the page frame or attributes."
 285         */
 286        mapping_iter = 1;
 287
 288        if (!boot_cpu_has(X86_FEATURE_PSE))
 289                use_pse = 0;
 290
 291repeat:
 292        pages_2m = pages_4k = 0;
 293        pfn = start_pfn;
 294        pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
 295        pgd = pgd_base + pgd_idx;
 296        for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
 297                pmd = one_md_table_init(pgd);
 298
 299                if (pfn >= end_pfn)
 300                        continue;
 301#ifdef CONFIG_X86_PAE
 302                pmd_idx = pmd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
 303                pmd += pmd_idx;
 304#else
 305                pmd_idx = 0;
 306#endif
 307                for (; pmd_idx < PTRS_PER_PMD && pfn < end_pfn;
 308                     pmd++, pmd_idx++) {
 309                        unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET;
 310
 311                        /*
 312                         * Map with big pages if possible, otherwise
 313                         * create normal page tables:
 314                         */
 315                        if (use_pse) {
 316                                unsigned int addr2;
 317                                pgprot_t prot = PAGE_KERNEL_LARGE;
 318                                /*
 319                                 * first pass will use the same initial
 320                                 * identity mapping attribute + _PAGE_PSE.
 321                                 */
 322                                pgprot_t init_prot =
 323                                        __pgprot(PTE_IDENT_ATTR |
 324                                                 _PAGE_PSE);
 325
 326                                pfn &= PMD_MASK >> PAGE_SHIFT;
 327                                addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE +
 328                                        PAGE_OFFSET + PAGE_SIZE-1;
 329
 330                                if (is_kernel_text(addr) ||
 331                                    is_kernel_text(addr2))
 332                                        prot = PAGE_KERNEL_LARGE_EXEC;
 333
 334                                pages_2m++;
 335                                if (mapping_iter == 1)
 336                                        set_pmd(pmd, pfn_pmd(pfn, init_prot));
 337                                else
 338                                        set_pmd(pmd, pfn_pmd(pfn, prot));
 339
 340                                pfn += PTRS_PER_PTE;
 341                                continue;
 342                        }
 343                        pte = one_page_table_init(pmd);
 344
 345                        pte_ofs = pte_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
 346                        pte += pte_ofs;
 347                        for (; pte_ofs < PTRS_PER_PTE && pfn < end_pfn;
 348                             pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) {
 349                                pgprot_t prot = PAGE_KERNEL;
 350                                /*
 351                                 * first pass will use the same initial
 352                                 * identity mapping attribute.
 353                                 */
 354                                pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR);
 355
 356                                if (is_kernel_text(addr))
 357                                        prot = PAGE_KERNEL_EXEC;
 358
 359                                pages_4k++;
 360                                if (mapping_iter == 1) {
 361                                        set_pte(pte, pfn_pte(pfn, init_prot));
 362                                        last_map_addr = (pfn << PAGE_SHIFT) + PAGE_SIZE;
 363                                } else
 364                                        set_pte(pte, pfn_pte(pfn, prot));
 365                        }
 366                }
 367        }
 368        if (mapping_iter == 1) {
 369                /*
 370                 * update direct mapping page count only in the first
 371                 * iteration.
 372                 */
 373                update_page_count(PG_LEVEL_2M, pages_2m);
 374                update_page_count(PG_LEVEL_4K, pages_4k);
 375
 376                /*
 377                 * local global flush tlb, which will flush the previous
 378                 * mappings present in both small and large page TLB's.
 379                 */
 380                __flush_tlb_all();
 381
 382                /*
 383                 * Second iteration will set the actual desired PTE attributes.
 384                 */
 385                mapping_iter = 2;
 386                goto repeat;
 387        }
 388        return last_map_addr;
 389}
 390
 391pte_t *kmap_pte;
 392
 393static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr)
 394{
 395        pgd_t *pgd = pgd_offset_k(vaddr);
 396        p4d_t *p4d = p4d_offset(pgd, vaddr);
 397        pud_t *pud = pud_offset(p4d, vaddr);
 398        pmd_t *pmd = pmd_offset(pud, vaddr);
 399        return pte_offset_kernel(pmd, vaddr);
 400}
 401
 402static void __init kmap_init(void)
 403{
 404        unsigned long kmap_vstart;
 405
 406        /*
 407         * Cache the first kmap pte:
 408         */
 409        kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
 410        kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
 411}
 412
 413#ifdef CONFIG_HIGHMEM
 414static void __init permanent_kmaps_init(pgd_t *pgd_base)
 415{
 416        unsigned long vaddr;
 417        pgd_t *pgd;
 418        p4d_t *p4d;
 419        pud_t *pud;
 420        pmd_t *pmd;
 421        pte_t *pte;
 422
 423        vaddr = PKMAP_BASE;
 424        page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
 425
 426        pgd = swapper_pg_dir + pgd_index(vaddr);
 427        p4d = p4d_offset(pgd, vaddr);
 428        pud = pud_offset(p4d, vaddr);
 429        pmd = pmd_offset(pud, vaddr);
 430        pte = pte_offset_kernel(pmd, vaddr);
 431        pkmap_page_table = pte;
 432}
 433
 434void __init add_highpages_with_active_regions(int nid,
 435                         unsigned long start_pfn, unsigned long end_pfn)
 436{
 437        phys_addr_t start, end;
 438        u64 i;
 439
 440        for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &start, &end, NULL) {
 441                unsigned long pfn = clamp_t(unsigned long, PFN_UP(start),
 442                                            start_pfn, end_pfn);
 443                unsigned long e_pfn = clamp_t(unsigned long, PFN_DOWN(end),
 444                                              start_pfn, end_pfn);
 445                for ( ; pfn < e_pfn; pfn++)
 446                        if (pfn_valid(pfn))
 447                                free_highmem_page(pfn_to_page(pfn));
 448        }
 449}
 450#else
 451static inline void permanent_kmaps_init(pgd_t *pgd_base)
 452{
 453}
 454#endif /* CONFIG_HIGHMEM */
 455
 456void __init sync_initial_page_table(void)
 457{
 458        clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
 459                        swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
 460                        KERNEL_PGD_PTRS);
 461
 462        /*
 463         * sync back low identity map too.  It is used for example
 464         * in the 32-bit EFI stub.
 465         */
 466        clone_pgd_range(initial_page_table,
 467                        swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
 468                        min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
 469}
 470
 471void __init native_pagetable_init(void)
 472{
 473        unsigned long pfn, va;
 474        pgd_t *pgd, *base = swapper_pg_dir;
 475        p4d_t *p4d;
 476        pud_t *pud;
 477        pmd_t *pmd;
 478        pte_t *pte;
 479
 480        /*
 481         * Remove any mappings which extend past the end of physical
 482         * memory from the boot time page table.
 483         * In virtual address space, we should have at least two pages
 484         * from VMALLOC_END to pkmap or fixmap according to VMALLOC_END
 485         * definition. And max_low_pfn is set to VMALLOC_END physical
 486         * address. If initial memory mapping is doing right job, we
 487         * should have pte used near max_low_pfn or one pmd is not present.
 488         */
 489        for (pfn = max_low_pfn; pfn < 1<<(32-PAGE_SHIFT); pfn++) {
 490                va = PAGE_OFFSET + (pfn<<PAGE_SHIFT);
 491                pgd = base + pgd_index(va);
 492                if (!pgd_present(*pgd))
 493                        break;
 494
 495                p4d = p4d_offset(pgd, va);
 496                pud = pud_offset(p4d, va);
 497                pmd = pmd_offset(pud, va);
 498                if (!pmd_present(*pmd))
 499                        break;
 500
 501                /* should not be large page here */
 502                if (pmd_large(*pmd)) {
 503                        pr_warn("try to clear pte for ram above max_low_pfn: pfn: %lx pmd: %p pmd phys: %lx, but pmd is big page and is not using pte !\n",
 504                                pfn, pmd, __pa(pmd));
 505                        BUG_ON(1);
 506                }
 507
 508                pte = pte_offset_kernel(pmd, va);
 509                if (!pte_present(*pte))
 510                        break;
 511
 512                printk(KERN_DEBUG "clearing pte for ram above max_low_pfn: pfn: %lx pmd: %p pmd phys: %lx pte: %p pte phys: %lx\n",
 513                                pfn, pmd, __pa(pmd), pte, __pa(pte));
 514                pte_clear(NULL, va, pte);
 515        }
 516        paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT);
 517        paging_init();
 518}
 519
 520/*
 521 * Build a proper pagetable for the kernel mappings.  Up until this
 522 * point, we've been running on some set of pagetables constructed by
 523 * the boot process.
 524 *
 525 * If we're booting on native hardware, this will be a pagetable
 526 * constructed in arch/x86/kernel/head_32.S.  The root of the
 527 * pagetable will be swapper_pg_dir.
 528 *
 529 * If we're booting paravirtualized under a hypervisor, then there are
 530 * more options: we may already be running PAE, and the pagetable may
 531 * or may not be based in swapper_pg_dir.  In any case,
 532 * paravirt_pagetable_init() will set up swapper_pg_dir
 533 * appropriately for the rest of the initialization to work.
 534 *
 535 * In general, pagetable_init() assumes that the pagetable may already
 536 * be partially populated, and so it avoids stomping on any existing
 537 * mappings.
 538 */
 539void __init early_ioremap_page_table_range_init(void)
 540{
 541        pgd_t *pgd_base = swapper_pg_dir;
 542        unsigned long vaddr, end;
 543
 544        /*
 545         * Fixed mappings, only the page table structure has to be
 546         * created - mappings will be set by set_fixmap():
 547         */
 548        vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
 549        end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
 550        page_table_range_init(vaddr, end, pgd_base);
 551        early_ioremap_reset();
 552}
 553
 554static void __init pagetable_init(void)
 555{
 556        pgd_t *pgd_base = swapper_pg_dir;
 557
 558        permanent_kmaps_init(pgd_base);
 559}
 560
 561#define DEFAULT_PTE_MASK ~(_PAGE_NX | _PAGE_GLOBAL)
 562/* Bits supported by the hardware: */
 563pteval_t __supported_pte_mask __read_mostly = DEFAULT_PTE_MASK;
 564/* Bits allowed in normal kernel mappings: */
 565pteval_t __default_kernel_pte_mask __read_mostly = DEFAULT_PTE_MASK;
 566EXPORT_SYMBOL_GPL(__supported_pte_mask);
 567/* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */
 568EXPORT_SYMBOL(__default_kernel_pte_mask);
 569
 570/* user-defined highmem size */
 571static unsigned int highmem_pages = -1;
 572
 573/*
 574 * highmem=size forces highmem to be exactly 'size' bytes.
 575 * This works even on boxes that have no highmem otherwise.
 576 * This also works to reduce highmem size on bigger boxes.
 577 */
 578static int __init parse_highmem(char *arg)
 579{
 580        if (!arg)
 581                return -EINVAL;
 582
 583        highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT;
 584        return 0;
 585}
 586early_param("highmem", parse_highmem);
 587
 588#define MSG_HIGHMEM_TOO_BIG \
 589        "highmem size (%luMB) is bigger than pages available (%luMB)!\n"
 590
 591#define MSG_LOWMEM_TOO_SMALL \
 592        "highmem size (%luMB) results in <64MB lowmem, ignoring it!\n"
 593/*
 594 * All of RAM fits into lowmem - but if user wants highmem
 595 * artificially via the highmem=x boot parameter then create
 596 * it:
 597 */
 598static void __init lowmem_pfn_init(void)
 599{
 600        /* max_low_pfn is 0, we already have early_res support */
 601        max_low_pfn = max_pfn;
 602
 603        if (highmem_pages == -1)
 604                highmem_pages = 0;
 605#ifdef CONFIG_HIGHMEM
 606        if (highmem_pages >= max_pfn) {
 607                printk(KERN_ERR MSG_HIGHMEM_TOO_BIG,
 608                        pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
 609                highmem_pages = 0;
 610        }
 611        if (highmem_pages) {
 612                if (max_low_pfn - highmem_pages < 64*1024*1024/PAGE_SIZE) {
 613                        printk(KERN_ERR MSG_LOWMEM_TOO_SMALL,
 614                                pages_to_mb(highmem_pages));
 615                        highmem_pages = 0;
 616                }
 617                max_low_pfn -= highmem_pages;
 618        }
 619#else
 620        if (highmem_pages)
 621                printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
 622#endif
 623}
 624
 625#define MSG_HIGHMEM_TOO_SMALL \
 626        "only %luMB highmem pages available, ignoring highmem size of %luMB!\n"
 627
 628#define MSG_HIGHMEM_TRIMMED \
 629        "Warning: only 4GB will be used. Use a HIGHMEM64G enabled kernel!\n"
 630/*
 631 * We have more RAM than fits into lowmem - we try to put it into
 632 * highmem, also taking the highmem=x boot parameter into account:
 633 */
 634static void __init highmem_pfn_init(void)
 635{
 636        max_low_pfn = MAXMEM_PFN;
 637
 638        if (highmem_pages == -1)
 639                highmem_pages = max_pfn - MAXMEM_PFN;
 640
 641        if (highmem_pages + MAXMEM_PFN < max_pfn)
 642                max_pfn = MAXMEM_PFN + highmem_pages;
 643
 644        if (highmem_pages + MAXMEM_PFN > max_pfn) {
 645                printk(KERN_WARNING MSG_HIGHMEM_TOO_SMALL,
 646                        pages_to_mb(max_pfn - MAXMEM_PFN),
 647                        pages_to_mb(highmem_pages));
 648                highmem_pages = 0;
 649        }
 650#ifndef CONFIG_HIGHMEM
 651        /* Maximum memory usable is what is directly addressable */
 652        printk(KERN_WARNING "Warning only %ldMB will be used.\n", MAXMEM>>20);
 653        if (max_pfn > MAX_NONPAE_PFN)
 654                printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");
 655        else
 656                printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
 657        max_pfn = MAXMEM_PFN;
 658#else /* !CONFIG_HIGHMEM */
 659#ifndef CONFIG_HIGHMEM64G
 660        if (max_pfn > MAX_NONPAE_PFN) {
 661                max_pfn = MAX_NONPAE_PFN;
 662                printk(KERN_WARNING MSG_HIGHMEM_TRIMMED);
 663        }
 664#endif /* !CONFIG_HIGHMEM64G */
 665#endif /* !CONFIG_HIGHMEM */
 666}
 667
 668/*
 669 * Determine low and high memory ranges:
 670 */
 671void __init find_low_pfn_range(void)
 672{
 673        /* it could update max_pfn */
 674
 675        if (max_pfn <= MAXMEM_PFN)
 676                lowmem_pfn_init();
 677        else
 678                highmem_pfn_init();
 679}
 680
 681#ifndef CONFIG_NEED_MULTIPLE_NODES
 682void __init initmem_init(void)
 683{
 684#ifdef CONFIG_HIGHMEM
 685        highstart_pfn = highend_pfn = max_pfn;
 686        if (max_pfn > max_low_pfn)
 687                highstart_pfn = max_low_pfn;
 688        printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
 689                pages_to_mb(highend_pfn - highstart_pfn));
 690        high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
 691#else
 692        high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
 693#endif
 694
 695        memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0);
 696        sparse_memory_present_with_active_regions(0);
 697
 698#ifdef CONFIG_FLATMEM
 699        max_mapnr = IS_ENABLED(CONFIG_HIGHMEM) ? highend_pfn : max_low_pfn;
 700#endif
 701        __vmalloc_start_set = true;
 702
 703        printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
 704                        pages_to_mb(max_low_pfn));
 705
 706        setup_bootmem_allocator();
 707}
 708#endif /* !CONFIG_NEED_MULTIPLE_NODES */
 709
 710void __init setup_bootmem_allocator(void)
 711{
 712        printk(KERN_INFO "  mapped low ram: 0 - %08lx\n",
 713                 max_pfn_mapped<<PAGE_SHIFT);
 714        printk(KERN_INFO "  low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
 715}
 716
 717/*
 718 * paging_init() sets up the page tables - note that the first 8MB are
 719 * already mapped by head.S.
 720 *
 721 * This routines also unmaps the page at virtual kernel address 0, so
 722 * that we can trap those pesky NULL-reference errors in the kernel.
 723 */
 724void __init paging_init(void)
 725{
 726        pagetable_init();
 727
 728        __flush_tlb_all();
 729
 730        kmap_init();
 731
 732        /*
 733         * NOTE: at this point the bootmem allocator is fully available.
 734         */
 735        olpc_dt_build_devicetree();
 736        sparse_memory_present_with_active_regions(MAX_NUMNODES);
 737        sparse_init();
 738        zone_sizes_init();
 739}
 740
 741/*
 742 * Test if the WP bit works in supervisor mode. It isn't supported on 386's
 743 * and also on some strange 486's. All 586+'s are OK. This used to involve
 744 * black magic jumps to work around some nasty CPU bugs, but fortunately the
 745 * switch to using exceptions got rid of all that.
 746 */
 747static void __init test_wp_bit(void)
 748{
 749        char z = 0;
 750
 751        printk(KERN_INFO "Checking if this processor honours the WP bit even in supervisor mode...");
 752
 753        __set_fixmap(FIX_WP_TEST, __pa_symbol(empty_zero_page), PAGE_KERNEL_RO);
 754
 755        if (probe_kernel_write((char *)fix_to_virt(FIX_WP_TEST), &z, 1)) {
 756                clear_fixmap(FIX_WP_TEST);
 757                printk(KERN_CONT "Ok.\n");
 758                return;
 759        }
 760
 761        printk(KERN_CONT "No.\n");
 762        panic("Linux doesn't support CPUs with broken WP.");
 763}
 764
 765void __init mem_init(void)
 766{
 767        pci_iommu_alloc();
 768
 769#ifdef CONFIG_FLATMEM
 770        BUG_ON(!mem_map);
 771#endif
 772        /*
 773         * With CONFIG_DEBUG_PAGEALLOC initialization of highmem pages has to
 774         * be done before free_all_bootmem(). Memblock use free low memory for
 775         * temporary data (see find_range_array()) and for this purpose can use
 776         * pages that was already passed to the buddy allocator, hence marked as
 777         * not accessible in the page tables when compiled with
 778         * CONFIG_DEBUG_PAGEALLOC. Otherwise order of initialization is not
 779         * important here.
 780         */
 781        set_highmem_pages_init();
 782
 783        /* this will put all low memory onto the freelists */
 784        free_all_bootmem();
 785
 786        after_bootmem = 1;
 787        x86_init.hyper.init_after_bootmem();
 788
 789        mem_init_print_info(NULL);
 790        printk(KERN_INFO "virtual kernel memory layout:\n"
 791                "    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 792                "  cpu_entry : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 793#ifdef CONFIG_HIGHMEM
 794                "    pkmap   : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 795#endif
 796                "    vmalloc : 0x%08lx - 0x%08lx   (%4ld MB)\n"
 797                "    lowmem  : 0x%08lx - 0x%08lx   (%4ld MB)\n"
 798                "      .init : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 799                "      .data : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 800                "      .text : 0x%08lx - 0x%08lx   (%4ld kB)\n",
 801                FIXADDR_START, FIXADDR_TOP,
 802                (FIXADDR_TOP - FIXADDR_START) >> 10,
 803
 804                CPU_ENTRY_AREA_BASE,
 805                CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE,
 806                CPU_ENTRY_AREA_MAP_SIZE >> 10,
 807
 808#ifdef CONFIG_HIGHMEM
 809                PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
 810                (LAST_PKMAP*PAGE_SIZE) >> 10,
 811#endif
 812
 813                VMALLOC_START, VMALLOC_END,
 814                (VMALLOC_END - VMALLOC_START) >> 20,
 815
 816                (unsigned long)__va(0), (unsigned long)high_memory,
 817                ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
 818
 819                (unsigned long)&__init_begin, (unsigned long)&__init_end,
 820                ((unsigned long)&__init_end -
 821                 (unsigned long)&__init_begin) >> 10,
 822
 823                (unsigned long)&_etext, (unsigned long)&_edata,
 824                ((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
 825
 826                (unsigned long)&_text, (unsigned long)&_etext,
 827                ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
 828
 829        /*
 830         * Check boundaries twice: Some fundamental inconsistencies can
 831         * be detected at build time already.
 832         */
 833#define __FIXADDR_TOP (-PAGE_SIZE)
 834#ifdef CONFIG_HIGHMEM
 835        BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE  > FIXADDR_START);
 836        BUILD_BUG_ON(VMALLOC_END                        > PKMAP_BASE);
 837#endif
 838#define high_memory (-128UL << 20)
 839        BUILD_BUG_ON(VMALLOC_START                      >= VMALLOC_END);
 840#undef high_memory
 841#undef __FIXADDR_TOP
 842
 843#ifdef CONFIG_HIGHMEM
 844        BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE        > FIXADDR_START);
 845        BUG_ON(VMALLOC_END                              > PKMAP_BASE);
 846#endif
 847        BUG_ON(VMALLOC_START                            >= VMALLOC_END);
 848        BUG_ON((unsigned long)high_memory               > VMALLOC_START);
 849
 850        test_wp_bit();
 851}
 852
 853#ifdef CONFIG_MEMORY_HOTPLUG
 854int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
 855                bool want_memblock)
 856{
 857        unsigned long start_pfn = start >> PAGE_SHIFT;
 858        unsigned long nr_pages = size >> PAGE_SHIFT;
 859
 860        return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
 861}
 862
 863#ifdef CONFIG_MEMORY_HOTREMOVE
 864int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
 865{
 866        unsigned long start_pfn = start >> PAGE_SHIFT;
 867        unsigned long nr_pages = size >> PAGE_SHIFT;
 868        struct zone *zone;
 869
 870        zone = page_zone(pfn_to_page(start_pfn));
 871        return __remove_pages(zone, start_pfn, nr_pages, altmap);
 872}
 873#endif
 874#endif
 875
 876int kernel_set_to_readonly __read_mostly;
 877
 878void set_kernel_text_rw(void)
 879{
 880        unsigned long start = PFN_ALIGN(_text);
 881        unsigned long size = PFN_ALIGN(_etext) - start;
 882
 883        if (!kernel_set_to_readonly)
 884                return;
 885
 886        pr_debug("Set kernel text: %lx - %lx for read write\n",
 887                 start, start+size);
 888
 889        set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT);
 890}
 891
 892void set_kernel_text_ro(void)
 893{
 894        unsigned long start = PFN_ALIGN(_text);
 895        unsigned long size = PFN_ALIGN(_etext) - start;
 896
 897        if (!kernel_set_to_readonly)
 898                return;
 899
 900        pr_debug("Set kernel text: %lx - %lx for read only\n",
 901                 start, start+size);
 902
 903        set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 904}
 905
 906static void mark_nxdata_nx(void)
 907{
 908        /*
 909         * When this called, init has already been executed and released,
 910         * so everything past _etext should be NX.
 911         */
 912        unsigned long start = PFN_ALIGN(_etext);
 913        /*
 914         * This comes from is_kernel_text upper limit. Also HPAGE where used:
 915         */
 916        unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start;
 917
 918        if (__supported_pte_mask & _PAGE_NX)
 919                printk(KERN_INFO "NX-protecting the kernel data: %luk\n", size >> 10);
 920        set_pages_nx(virt_to_page(start), size >> PAGE_SHIFT);
 921}
 922
 923void mark_rodata_ro(void)
 924{
 925        unsigned long start = PFN_ALIGN(_text);
 926        unsigned long size = PFN_ALIGN(_etext) - start;
 927
 928        set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 929        printk(KERN_INFO "Write protecting the kernel text: %luk\n",
 930                size >> 10);
 931
 932        kernel_set_to_readonly = 1;
 933
 934#ifdef CONFIG_CPA_DEBUG
 935        printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n",
 936                start, start+size);
 937        set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT);
 938
 939        printk(KERN_INFO "Testing CPA: write protecting again\n");
 940        set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
 941#endif
 942
 943        start += size;
 944        size = (unsigned long)__end_rodata - start;
 945        set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 946        printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
 947                size >> 10);
 948
 949#ifdef CONFIG_CPA_DEBUG
 950        printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, start + size);
 951        set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT);
 952
 953        printk(KERN_INFO "Testing CPA: write protecting again\n");
 954        set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 955#endif
 956        mark_nxdata_nx();
 957        if (__supported_pte_mask & _PAGE_NX)
 958                debug_checkwx();
 959}
 960