linux/arch/arm/mm/mmu.c
<<
>>
Prefs
   1/*
   2 *  linux/arch/arm/mm/mmu.c
   3 *
   4 *  Copyright (C) 1995-2005 Russell King
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 as
   8 * published by the Free Software Foundation.
   9 */
  10#include <linux/module.h>
  11#include <linux/kernel.h>
  12#include <linux/errno.h>
  13#include <linux/init.h>
  14#include <linux/bootmem.h>
  15#include <linux/mman.h>
  16#include <linux/nodemask.h>
  17
  18#include <asm/mach-types.h>
  19#include <asm/setup.h>
  20#include <asm/sizes.h>
  21#include <asm/tlb.h>
  22
  23#include <asm/mach/arch.h>
  24#include <asm/mach/map.h>
  25
  26#include "mm.h"
  27
  28DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
  29
  30extern void _stext, _etext, __data_start, _end;
  31extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
  32
  33/*
  34 * empty_zero_page is a special page that is used for
  35 * zero-initialized data and COW.
  36 */
  37struct page *empty_zero_page;
  38
  39/*
  40 * The pmd table for the upper-most set of pages.
  41 */
  42pmd_t *top_pmd;
  43
  44#define CPOLICY_UNCACHED        0
  45#define CPOLICY_BUFFERED        1
  46#define CPOLICY_WRITETHROUGH    2
  47#define CPOLICY_WRITEBACK       3
  48#define CPOLICY_WRITEALLOC      4
  49
  50static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK;
  51static unsigned int ecc_mask __initdata = 0;
  52pgprot_t pgprot_user;
  53pgprot_t pgprot_kernel;
  54
  55EXPORT_SYMBOL(pgprot_user);
  56EXPORT_SYMBOL(pgprot_kernel);
  57
  58struct cachepolicy {
  59        const char      policy[16];
  60        unsigned int    cr_mask;
  61        unsigned int    pmd;
  62        unsigned int    pte;
  63};
  64
  65static struct cachepolicy cache_policies[] __initdata = {
  66        {
  67                .policy         = "uncached",
  68                .cr_mask        = CR_W|CR_C,
  69                .pmd            = PMD_SECT_UNCACHED,
  70                .pte            = 0,
  71        }, {
  72                .policy         = "buffered",
  73                .cr_mask        = CR_C,
  74                .pmd            = PMD_SECT_BUFFERED,
  75                .pte            = PTE_BUFFERABLE,
  76        }, {
  77                .policy         = "writethrough",
  78                .cr_mask        = 0,
  79                .pmd            = PMD_SECT_WT,
  80                .pte            = PTE_CACHEABLE,
  81        }, {
  82                .policy         = "writeback",
  83                .cr_mask        = 0,
  84                .pmd            = PMD_SECT_WB,
  85                .pte            = PTE_BUFFERABLE|PTE_CACHEABLE,
  86        }, {
  87                .policy         = "writealloc",
  88                .cr_mask        = 0,
  89                .pmd            = PMD_SECT_WBWA,
  90                .pte            = PTE_BUFFERABLE|PTE_CACHEABLE,
  91        }
  92};
  93
  94/*
  95 * These are useful for identifying cache coherency
  96 * problems by allowing the cache or the cache and
  97 * writebuffer to be turned off.  (Note: the write
  98 * buffer should not be on and the cache off).
  99 */
 100static void __init early_cachepolicy(char **p)
 101{
 102        int i;
 103
 104        for (i = 0; i < ARRAY_SIZE(cache_policies); i++) {
 105                int len = strlen(cache_policies[i].policy);
 106
 107                if (memcmp(*p, cache_policies[i].policy, len) == 0) {
 108                        cachepolicy = i;
 109                        cr_alignment &= ~cache_policies[i].cr_mask;
 110                        cr_no_alignment &= ~cache_policies[i].cr_mask;
 111                        *p += len;
 112                        break;
 113                }
 114        }
 115        if (i == ARRAY_SIZE(cache_policies))
 116                printk(KERN_ERR "ERROR: unknown or unsupported cache policy\n");
 117        if (cpu_architecture() >= CPU_ARCH_ARMv6) {
 118                printk(KERN_WARNING "Only cachepolicy=writeback supported on ARMv6 and later\n");
 119                cachepolicy = CPOLICY_WRITEBACK;
 120        }
 121        flush_cache_all();
 122        set_cr(cr_alignment);
 123}
 124__early_param("cachepolicy=", early_cachepolicy);
 125
 126static void __init early_nocache(char **__unused)
 127{
 128        char *p = "buffered";
 129        printk(KERN_WARNING "nocache is deprecated; use cachepolicy=%s\n", p);
 130        early_cachepolicy(&p);
 131}
 132__early_param("nocache", early_nocache);
 133
 134static void __init early_nowrite(char **__unused)
 135{
 136        char *p = "uncached";
 137        printk(KERN_WARNING "nowb is deprecated; use cachepolicy=%s\n", p);
 138        early_cachepolicy(&p);
 139}
 140__early_param("nowb", early_nowrite);
 141
 142static void __init early_ecc(char **p)
 143{
 144        if (memcmp(*p, "on", 2) == 0) {
 145                ecc_mask = PMD_PROTECTION;
 146                *p += 2;
 147        } else if (memcmp(*p, "off", 3) == 0) {
 148                ecc_mask = 0;
 149                *p += 3;
 150        }
 151}
 152__early_param("ecc=", early_ecc);
 153
 154static int __init noalign_setup(char *__unused)
 155{
 156        cr_alignment &= ~CR_A;
 157        cr_no_alignment &= ~CR_A;
 158        set_cr(cr_alignment);
 159        return 1;
 160}
 161__setup("noalign", noalign_setup);
 162
 163#ifndef CONFIG_SMP
 164void adjust_cr(unsigned long mask, unsigned long set)
 165{
 166        unsigned long flags;
 167
 168        mask &= ~CR_A;
 169
 170        set &= mask;
 171
 172        local_irq_save(flags);
 173
 174        cr_no_alignment = (cr_no_alignment & ~mask) | set;
 175        cr_alignment = (cr_alignment & ~mask) | set;
 176
 177        set_cr((get_cr() & ~mask) | set);
 178
 179        local_irq_restore(flags);
 180}
 181#endif
 182
 183#define PROT_PTE_DEVICE         L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_WRITE
 184#define PROT_SECT_DEVICE        PMD_TYPE_SECT|PMD_SECT_XN|PMD_SECT_AP_WRITE
 185
 186static struct mem_type mem_types[] = {
 187        [MT_DEVICE] = {           /* Strongly ordered / ARMv6 shared device */
 188                .prot_pte       = PROT_PTE_DEVICE,
 189                .prot_l1        = PMD_TYPE_TABLE,
 190                .prot_sect      = PROT_SECT_DEVICE | PMD_SECT_UNCACHED,
 191                .domain         = DOMAIN_IO,
 192        },
 193        [MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */
 194                .prot_pte       = PROT_PTE_DEVICE,
 195                .prot_pte_ext   = PTE_EXT_TEX(2),
 196                .prot_l1        = PMD_TYPE_TABLE,
 197                .prot_sect      = PROT_SECT_DEVICE | PMD_SECT_TEX(2),
 198                .domain         = DOMAIN_IO,
 199        },
 200        [MT_DEVICE_CACHED] = {    /* ioremap_cached */
 201                .prot_pte       = PROT_PTE_DEVICE | L_PTE_CACHEABLE | L_PTE_BUFFERABLE,
 202                .prot_l1        = PMD_TYPE_TABLE,
 203                .prot_sect      = PROT_SECT_DEVICE | PMD_SECT_WB,
 204                .domain         = DOMAIN_IO,
 205        },      
 206        [MT_DEVICE_IXP2000] = {   /* IXP2400 requires XCB=101 for on-chip I/O */
 207                .prot_pte       = PROT_PTE_DEVICE,
 208                .prot_l1        = PMD_TYPE_TABLE,
 209                .prot_sect      = PROT_SECT_DEVICE | PMD_SECT_BUFFERABLE |
 210                                  PMD_SECT_TEX(1),
 211                .domain         = DOMAIN_IO,
 212        },
 213        [MT_CACHECLEAN] = {
 214                .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
 215                .domain    = DOMAIN_KERNEL,
 216        },
 217        [MT_MINICLEAN] = {
 218                .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE,
 219                .domain    = DOMAIN_KERNEL,
 220        },
 221        [MT_LOW_VECTORS] = {
 222                .prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
 223                                L_PTE_EXEC,
 224                .prot_l1   = PMD_TYPE_TABLE,
 225                .domain    = DOMAIN_USER,
 226        },
 227        [MT_HIGH_VECTORS] = {
 228                .prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
 229                                L_PTE_USER | L_PTE_EXEC,
 230                .prot_l1   = PMD_TYPE_TABLE,
 231                .domain    = DOMAIN_USER,
 232        },
 233        [MT_MEMORY] = {
 234                .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
 235                .domain    = DOMAIN_KERNEL,
 236        },
 237        [MT_ROM] = {
 238                .prot_sect = PMD_TYPE_SECT,
 239                .domain    = DOMAIN_KERNEL,
 240        },
 241};
 242
 243const struct mem_type *get_mem_type(unsigned int type)
 244{
 245        return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL;
 246}
 247
 248/*
 249 * Adjust the PMD section entries according to the CPU in use.
 250 */
 251static void __init build_mem_type_table(void)
 252{
 253        struct cachepolicy *cp;
 254        unsigned int cr = get_cr();
 255        unsigned int user_pgprot, kern_pgprot;
 256        int cpu_arch = cpu_architecture();
 257        int i;
 258
 259        if (cpu_arch < CPU_ARCH_ARMv6) {
 260#if defined(CONFIG_CPU_DCACHE_DISABLE)
 261                if (cachepolicy > CPOLICY_BUFFERED)
 262                        cachepolicy = CPOLICY_BUFFERED;
 263#elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH)
 264                if (cachepolicy > CPOLICY_WRITETHROUGH)
 265                        cachepolicy = CPOLICY_WRITETHROUGH;
 266#endif
 267        }
 268        if (cpu_arch < CPU_ARCH_ARMv5) {
 269                if (cachepolicy >= CPOLICY_WRITEALLOC)
 270                        cachepolicy = CPOLICY_WRITEBACK;
 271                ecc_mask = 0;
 272        }
 273
 274        /*
 275         * ARMv5 and lower, bit 4 must be set for page tables.
 276         * (was: cache "update-able on write" bit on ARM610)
 277         * However, Xscale cores require this bit to be cleared.
 278         */
 279        if (cpu_is_xscale()) {
 280                for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
 281                        mem_types[i].prot_sect &= ~PMD_BIT4;
 282                        mem_types[i].prot_l1 &= ~PMD_BIT4;
 283                }
 284        } else if (cpu_arch < CPU_ARCH_ARMv6) {
 285                for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
 286                        if (mem_types[i].prot_l1)
 287                                mem_types[i].prot_l1 |= PMD_BIT4;
 288                        if (mem_types[i].prot_sect)
 289                                mem_types[i].prot_sect |= PMD_BIT4;
 290                }
 291        }
 292
 293        cp = &cache_policies[cachepolicy];
 294        kern_pgprot = user_pgprot = cp->pte;
 295
 296        /*
 297         * Enable CPU-specific coherency if supported.
 298         * (Only available on XSC3 at the moment.)
 299         */
 300        if (arch_is_coherent()) {
 301                if (cpu_is_xsc3()) {
 302                        mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
 303                        mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED;
 304                }
 305        }
 306
 307        /*
 308         * ARMv6 and above have extended page tables.
 309         */
 310        if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) {
 311                /*
 312                 * Mark cache clean areas and XIP ROM read only
 313                 * from SVC mode and no access from userspace.
 314                 */
 315                mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
 316                mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
 317                mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
 318
 319                /*
 320                 * Mark the device area as "shared device"
 321                 */
 322                mem_types[MT_DEVICE].prot_pte |= L_PTE_BUFFERABLE;
 323                mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED;
 324
 325#ifdef CONFIG_SMP
 326                /*
 327                 * Mark memory with the "shared" attribute for SMP systems
 328                 */
 329                user_pgprot |= L_PTE_SHARED;
 330                kern_pgprot |= L_PTE_SHARED;
 331                mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
 332#endif
 333        }
 334
 335        for (i = 0; i < 16; i++) {
 336                unsigned long v = pgprot_val(protection_map[i]);
 337                v = (v & ~(L_PTE_BUFFERABLE|L_PTE_CACHEABLE)) | user_pgprot;
 338                protection_map[i] = __pgprot(v);
 339        }
 340
 341        mem_types[MT_LOW_VECTORS].prot_pte |= kern_pgprot;
 342        mem_types[MT_HIGH_VECTORS].prot_pte |= kern_pgprot;
 343
 344        if (cpu_arch >= CPU_ARCH_ARMv5) {
 345#ifndef CONFIG_SMP
 346                /*
 347                 * Only use write-through for non-SMP systems
 348                 */
 349                mem_types[MT_LOW_VECTORS].prot_pte &= ~L_PTE_BUFFERABLE;
 350                mem_types[MT_HIGH_VECTORS].prot_pte &= ~L_PTE_BUFFERABLE;
 351#endif
 352        } else {
 353                mem_types[MT_MINICLEAN].prot_sect &= ~PMD_SECT_TEX(1);
 354        }
 355
 356        pgprot_user   = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
 357        pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
 358                                 L_PTE_DIRTY | L_PTE_WRITE |
 359                                 L_PTE_EXEC | kern_pgprot);
 360
 361        mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
 362        mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
 363        mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd;
 364        mem_types[MT_ROM].prot_sect |= cp->pmd;
 365
 366        switch (cp->pmd) {
 367        case PMD_SECT_WT:
 368                mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT;
 369                break;
 370        case PMD_SECT_WB:
 371        case PMD_SECT_WBWA:
 372                mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB;
 373                break;
 374        }
 375        printk("Memory policy: ECC %sabled, Data cache %s\n",
 376                ecc_mask ? "en" : "dis", cp->policy);
 377
 378        for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
 379                struct mem_type *t = &mem_types[i];
 380                if (t->prot_l1)
 381                        t->prot_l1 |= PMD_DOMAIN(t->domain);
 382                if (t->prot_sect)
 383                        t->prot_sect |= PMD_DOMAIN(t->domain);
 384        }
 385}
 386
 387#define vectors_base()  (vectors_high() ? 0xffff0000 : 0)
 388
 389static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
 390                                  unsigned long end, unsigned long pfn,
 391                                  const struct mem_type *type)
 392{
 393        pte_t *pte;
 394
 395        if (pmd_none(*pmd)) {
 396                pte = alloc_bootmem_low_pages(2 * PTRS_PER_PTE * sizeof(pte_t));
 397                __pmd_populate(pmd, __pa(pte) | type->prot_l1);
 398        }
 399
 400        pte = pte_offset_kernel(pmd, addr);
 401        do {
 402                set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)),
 403                            type->prot_pte_ext);
 404                pfn++;
 405        } while (pte++, addr += PAGE_SIZE, addr != end);
 406}
 407
 408static void __init alloc_init_section(pgd_t *pgd, unsigned long addr,
 409                                      unsigned long end, unsigned long phys,
 410                                      const struct mem_type *type)
 411{
 412        pmd_t *pmd = pmd_offset(pgd, addr);
 413
 414        /*
 415         * Try a section mapping - end, addr and phys must all be aligned
 416         * to a section boundary.  Note that PMDs refer to the individual
 417         * L1 entries, whereas PGDs refer to a group of L1 entries making
 418         * up one logical pointer to an L2 table.
 419         */
 420        if (((addr | end | phys) & ~SECTION_MASK) == 0) {
 421                pmd_t *p = pmd;
 422
 423                if (addr & SECTION_SIZE)
 424                        pmd++;
 425
 426                do {
 427                        *pmd = __pmd(phys | type->prot_sect);
 428                        phys += SECTION_SIZE;
 429                } while (pmd++, addr += SECTION_SIZE, addr != end);
 430
 431                flush_pmd_entry(p);
 432        } else {
 433                /*
 434                 * No need to loop; pte's aren't interested in the
 435                 * individual L1 entries.
 436                 */
 437                alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type);
 438        }
 439}
 440
 441static void __init create_36bit_mapping(struct map_desc *md,
 442                                        const struct mem_type *type)
 443{
 444        unsigned long phys, addr, length, end;
 445        pgd_t *pgd;
 446
 447        addr = md->virtual;
 448        phys = (unsigned long)__pfn_to_phys(md->pfn);
 449        length = PAGE_ALIGN(md->length);
 450
 451        if (!(cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())) {
 452                printk(KERN_ERR "MM: CPU does not support supersection "
 453                       "mapping for 0x%08llx at 0x%08lx\n",
 454                       __pfn_to_phys((u64)md->pfn), addr);
 455                return;
 456        }
 457
 458        /* N.B. ARMv6 supersections are only defined to work with domain 0.
 459         *      Since domain assignments can in fact be arbitrary, the
 460         *      'domain == 0' check below is required to insure that ARMv6
 461         *      supersections are only allocated for domain 0 regardless
 462         *      of the actual domain assignments in use.
 463         */
 464        if (type->domain) {
 465                printk(KERN_ERR "MM: invalid domain in supersection "
 466                       "mapping for 0x%08llx at 0x%08lx\n",
 467                       __pfn_to_phys((u64)md->pfn), addr);
 468                return;
 469        }
 470
 471        if ((addr | length | __pfn_to_phys(md->pfn)) & ~SUPERSECTION_MASK) {
 472                printk(KERN_ERR "MM: cannot create mapping for "
 473                       "0x%08llx at 0x%08lx invalid alignment\n",
 474                       __pfn_to_phys((u64)md->pfn), addr);
 475                return;
 476        }
 477
 478        /*
 479         * Shift bits [35:32] of address into bits [23:20] of PMD
 480         * (See ARMv6 spec).
 481         */
 482        phys |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20);
 483
 484        pgd = pgd_offset_k(addr);
 485        end = addr + length;
 486        do {
 487                pmd_t *pmd = pmd_offset(pgd, addr);
 488                int i;
 489
 490                for (i = 0; i < 16; i++)
 491                        *pmd++ = __pmd(phys | type->prot_sect | PMD_SECT_SUPER);
 492
 493                addr += SUPERSECTION_SIZE;
 494                phys += SUPERSECTION_SIZE;
 495                pgd += SUPERSECTION_SIZE >> PGDIR_SHIFT;
 496        } while (addr != end);
 497}
 498
 499/*
 500 * Create the page directory entries and any necessary
 501 * page tables for the mapping specified by `md'.  We
 502 * are able to cope here with varying sizes and address
 503 * offsets, and we take full advantage of sections and
 504 * supersections.
 505 */
 506void __init create_mapping(struct map_desc *md)
 507{
 508        unsigned long phys, addr, length, end;
 509        const struct mem_type *type;
 510        pgd_t *pgd;
 511
 512        if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) {
 513                printk(KERN_WARNING "BUG: not creating mapping for "
 514                       "0x%08llx at 0x%08lx in user region\n",
 515                       __pfn_to_phys((u64)md->pfn), md->virtual);
 516                return;
 517        }
 518
 519        if ((md->type == MT_DEVICE || md->type == MT_ROM) &&
 520            md->virtual >= PAGE_OFFSET && md->virtual < VMALLOC_END) {
 521                printk(KERN_WARNING "BUG: mapping for 0x%08llx at 0x%08lx "
 522                       "overlaps vmalloc space\n",
 523                       __pfn_to_phys((u64)md->pfn), md->virtual);
 524        }
 525
 526        type = &mem_types[md->type];
 527
 528        /*
 529         * Catch 36-bit addresses
 530         */
 531        if (md->pfn >= 0x100000) {
 532                create_36bit_mapping(md, type);
 533                return;
 534        }
 535
 536        addr = md->virtual & PAGE_MASK;
 537        phys = (unsigned long)__pfn_to_phys(md->pfn);
 538        length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
 539
 540        if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) {
 541                printk(KERN_WARNING "BUG: map for 0x%08lx at 0x%08lx can not "
 542                       "be mapped using pages, ignoring.\n",
 543                       __pfn_to_phys(md->pfn), addr);
 544                return;
 545        }
 546
 547        pgd = pgd_offset_k(addr);
 548        end = addr + length;
 549        do {
 550                unsigned long next = pgd_addr_end(addr, end);
 551
 552                alloc_init_section(pgd, addr, next, phys, type);
 553
 554                phys += next - addr;
 555                addr = next;
 556        } while (pgd++, addr != end);
 557}
 558
 559/*
 560 * Create the architecture specific mappings
 561 */
 562void __init iotable_init(struct map_desc *io_desc, int nr)
 563{
 564        int i;
 565
 566        for (i = 0; i < nr; i++)
 567                create_mapping(io_desc + i);
 568}
 569
 570static inline void prepare_page_table(struct meminfo *mi)
 571{
 572        unsigned long addr;
 573
 574        /*
 575         * Clear out all the mappings below the kernel image.
 576         */
 577        for (addr = 0; addr < MODULE_START; addr += PGDIR_SIZE)
 578                pmd_clear(pmd_off_k(addr));
 579
 580#ifdef CONFIG_XIP_KERNEL
 581        /* The XIP kernel is mapped in the module area -- skip over it */
 582        addr = ((unsigned long)&_etext + PGDIR_SIZE - 1) & PGDIR_MASK;
 583#endif
 584        for ( ; addr < PAGE_OFFSET; addr += PGDIR_SIZE)
 585                pmd_clear(pmd_off_k(addr));
 586
 587        /*
 588         * Clear out all the kernel space mappings, except for the first
 589         * memory bank, up to the end of the vmalloc region.
 590         */
 591        for (addr = __phys_to_virt(mi->bank[0].start + mi->bank[0].size);
 592             addr < VMALLOC_END; addr += PGDIR_SIZE)
 593                pmd_clear(pmd_off_k(addr));
 594}
 595
 596/*
 597 * Reserve the various regions of node 0
 598 */
 599void __init reserve_node_zero(pg_data_t *pgdat)
 600{
 601        unsigned long res_size = 0;
 602
 603        /*
 604         * Register the kernel text and data with bootmem.
 605         * Note that this can only be in node 0.
 606         */
 607#ifdef CONFIG_XIP_KERNEL
 608        reserve_bootmem_node(pgdat, __pa(&__data_start), &_end - &__data_start);
 609#else
 610        reserve_bootmem_node(pgdat, __pa(&_stext), &_end - &_stext);
 611#endif
 612
 613        /*
 614         * Reserve the page tables.  These are already in use,
 615         * and can only be in node 0.
 616         */
 617        reserve_bootmem_node(pgdat, __pa(swapper_pg_dir),
 618                             PTRS_PER_PGD * sizeof(pgd_t));
 619
 620        /*
 621         * Hmm... This should go elsewhere, but we really really need to
 622         * stop things allocating the low memory; ideally we need a better
 623         * implementation of GFP_DMA which does not assume that DMA-able
 624         * memory starts at zero.
 625         */
 626        if (machine_is_integrator() || machine_is_cintegrator())
 627                res_size = __pa(swapper_pg_dir) - PHYS_OFFSET;
 628
 629        /*
 630         * These should likewise go elsewhere.  They pre-reserve the
 631         * screen memory region at the start of main system memory.
 632         */
 633        if (machine_is_edb7211())
 634                res_size = 0x00020000;
 635        if (machine_is_p720t())
 636                res_size = 0x00014000;
 637
 638        /* H1940 and RX3715 need to reserve this for suspend */
 639
 640        if (machine_is_h1940() || machine_is_rx3715()) {
 641                reserve_bootmem_node(pgdat, 0x30003000, 0x1000);
 642                reserve_bootmem_node(pgdat, 0x30081000, 0x1000);
 643        }
 644
 645#ifdef CONFIG_SA1111
 646        /*
 647         * Because of the SA1111 DMA bug, we want to preserve our
 648         * precious DMA-able memory...
 649         */
 650        res_size = __pa(swapper_pg_dir) - PHYS_OFFSET;
 651#endif
 652        if (res_size)
 653                reserve_bootmem_node(pgdat, PHYS_OFFSET, res_size);
 654}
 655
 656/*
 657 * Set up device the mappings.  Since we clear out the page tables for all
 658 * mappings above VMALLOC_END, we will remove any debug device mappings.
 659 * This means you have to be careful how you debug this function, or any
 660 * called function.  This means you can't use any function or debugging
 661 * method which may touch any device, otherwise the kernel _will_ crash.
 662 */
 663static void __init devicemaps_init(struct machine_desc *mdesc)
 664{
 665        struct map_desc map;
 666        unsigned long addr;
 667        void *vectors;
 668
 669        /*
 670         * Allocate the vector page early.
 671         */
 672        vectors = alloc_bootmem_low_pages(PAGE_SIZE);
 673        BUG_ON(!vectors);
 674
 675        for (addr = VMALLOC_END; addr; addr += PGDIR_SIZE)
 676                pmd_clear(pmd_off_k(addr));
 677
 678        /*
 679         * Map the kernel if it is XIP.
 680         * It is always first in the modulearea.
 681         */
 682#ifdef CONFIG_XIP_KERNEL
 683        map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK);
 684        map.virtual = MODULE_START;
 685        map.length = ((unsigned long)&_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK;
 686        map.type = MT_ROM;
 687        create_mapping(&map);
 688#endif
 689
 690        /*
 691         * Map the cache flushing regions.
 692         */
 693#ifdef FLUSH_BASE
 694        map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS);
 695        map.virtual = FLUSH_BASE;
 696        map.length = SZ_1M;
 697        map.type = MT_CACHECLEAN;
 698        create_mapping(&map);
 699#endif
 700#ifdef FLUSH_BASE_MINICACHE
 701        map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + SZ_1M);
 702        map.virtual = FLUSH_BASE_MINICACHE;
 703        map.length = SZ_1M;
 704        map.type = MT_MINICLEAN;
 705        create_mapping(&map);
 706#endif
 707
 708        /*
 709         * Create a mapping for the machine vectors at the high-vectors
 710         * location (0xffff0000).  If we aren't using high-vectors, also
 711         * create a mapping at the low-vectors virtual address.
 712         */
 713        map.pfn = __phys_to_pfn(virt_to_phys(vectors));
 714        map.virtual = 0xffff0000;
 715        map.length = PAGE_SIZE;
 716        map.type = MT_HIGH_VECTORS;
 717        create_mapping(&map);
 718
 719        if (!vectors_high()) {
 720                map.virtual = 0;
 721                map.type = MT_LOW_VECTORS;
 722                create_mapping(&map);
 723        }
 724
 725        /*
 726         * Ask the machine support to map in the statically mapped devices.
 727         */
 728        if (mdesc->map_io)
 729                mdesc->map_io();
 730
 731        /*
 732         * Finally flush the caches and tlb to ensure that we're in a
 733         * consistent state wrt the writebuffer.  This also ensures that
 734         * any write-allocated cache lines in the vector page are written
 735         * back.  After this point, we can start to touch devices again.
 736         */
 737        local_flush_tlb_all();
 738        flush_cache_all();
 739}
 740
 741/*
 742 * paging_init() sets up the page tables, initialises the zone memory
 743 * maps, and sets up the zero page, bad page and bad page tables.
 744 */
 745void __init paging_init(struct meminfo *mi, struct machine_desc *mdesc)
 746{
 747        void *zero_page;
 748
 749        build_mem_type_table();
 750        prepare_page_table(mi);
 751        bootmem_init(mi);
 752        devicemaps_init(mdesc);
 753
 754        top_pmd = pmd_off_k(0xffff0000);
 755
 756        /*
 757         * allocate the zero page.  Note that we count on this going ok.
 758         */
 759        zero_page = alloc_bootmem_low_pages(PAGE_SIZE);
 760        memzero(zero_page, PAGE_SIZE);
 761        empty_zero_page = virt_to_page(zero_page);
 762        flush_dcache_page(empty_zero_page);
 763}
 764
 765/*
 766 * In order to soft-boot, we need to insert a 1:1 mapping in place of
 767 * the user-mode pages.  This will then ensure that we have predictable
 768 * results when turning the mmu off
 769 */
 770void setup_mm_for_reboot(char mode)
 771{
 772        unsigned long base_pmdval;
 773        pgd_t *pgd;
 774        int i;
 775
 776        if (current->mm && current->mm->pgd)
 777                pgd = current->mm->pgd;
 778        else
 779                pgd = init_mm.pgd;
 780
 781        base_pmdval = PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | PMD_TYPE_SECT;
 782        if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale())
 783                base_pmdval |= PMD_BIT4;
 784
 785        for (i = 0; i < FIRST_USER_PGD_NR + USER_PTRS_PER_PGD; i++, pgd++) {
 786                unsigned long pmdval = (i << PGDIR_SHIFT) | base_pmdval;
 787                pmd_t *pmd;
 788
 789                pmd = pmd_off(pgd, i << PGDIR_SHIFT);
 790                pmd[0] = __pmd(pmdval);
 791                pmd[1] = __pmd(pmdval + (1 << (PGDIR_SHIFT - 1)));
 792                flush_pmd_entry(pmd);
 793        }
 794}
 795