linux/arch/arm/mm/mmu.c
<<
>>
Prefs
   1/*
   2 *  linux/arch/arm/mm/mmu.c
   3 *
   4 *  Copyright (C) 1995-2005 Russell King
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License version 2 as
   8 * published by the Free Software Foundation.
   9 */
  10#include <linux/module.h>
  11#include <linux/kernel.h>
  12#include <linux/errno.h>
  13#include <linux/init.h>
  14#include <linux/mman.h>
  15#include <linux/nodemask.h>
  16#include <linux/memblock.h>
  17#include <linux/fs.h>
  18#include <linux/vmalloc.h>
  19#include <linux/sizes.h>
  20
  21#include <asm/cp15.h>
  22#include <asm/cputype.h>
  23#include <asm/sections.h>
  24#include <asm/cachetype.h>
  25#include <asm/sections.h>
  26#include <asm/setup.h>
  27#include <asm/smp_plat.h>
  28#include <asm/tlb.h>
  29#include <asm/highmem.h>
  30#include <asm/system_info.h>
  31#include <asm/traps.h>
  32#include <asm/procinfo.h>
  33#include <asm/memory.h>
  34
  35#include <asm/mach/arch.h>
  36#include <asm/mach/map.h>
  37#include <asm/mach/pci.h>
  38#include <asm/fixmap.h>
  39
  40#include "mm.h"
  41#include "tcm.h"
  42
  43/*
  44 * empty_zero_page is a special page that is used for
  45 * zero-initialized data and COW.
  46 */
  47struct page *empty_zero_page;
  48EXPORT_SYMBOL(empty_zero_page);
  49
  50/*
  51 * The pmd table for the upper-most set of pages.
  52 */
  53pmd_t *top_pmd;
  54
  55#define CPOLICY_UNCACHED        0
  56#define CPOLICY_BUFFERED        1
  57#define CPOLICY_WRITETHROUGH    2
  58#define CPOLICY_WRITEBACK       3
  59#define CPOLICY_WRITEALLOC      4
  60
  61static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK;
  62static unsigned int ecc_mask __initdata = 0;
  63pgprot_t pgprot_user;
  64pgprot_t pgprot_kernel;
  65pgprot_t pgprot_hyp_device;
  66pgprot_t pgprot_s2;
  67pgprot_t pgprot_s2_device;
  68
  69EXPORT_SYMBOL(pgprot_user);
  70EXPORT_SYMBOL(pgprot_kernel);
  71
  72struct cachepolicy {
  73        const char      policy[16];
  74        unsigned int    cr_mask;
  75        pmdval_t        pmd;
  76        pteval_t        pte;
  77        pteval_t        pte_s2;
  78};
  79
  80#ifdef CONFIG_ARM_LPAE
  81#define s2_policy(policy)       policy
  82#else
  83#define s2_policy(policy)       0
  84#endif
  85
  86static struct cachepolicy cache_policies[] __initdata = {
  87        {
  88                .policy         = "uncached",
  89                .cr_mask        = CR_W|CR_C,
  90                .pmd            = PMD_SECT_UNCACHED,
  91                .pte            = L_PTE_MT_UNCACHED,
  92                .pte_s2         = s2_policy(L_PTE_S2_MT_UNCACHED),
  93        }, {
  94                .policy         = "buffered",
  95                .cr_mask        = CR_C,
  96                .pmd            = PMD_SECT_BUFFERED,
  97                .pte            = L_PTE_MT_BUFFERABLE,
  98                .pte_s2         = s2_policy(L_PTE_S2_MT_UNCACHED),
  99        }, {
 100                .policy         = "writethrough",
 101                .cr_mask        = 0,
 102                .pmd            = PMD_SECT_WT,
 103                .pte            = L_PTE_MT_WRITETHROUGH,
 104                .pte_s2         = s2_policy(L_PTE_S2_MT_WRITETHROUGH),
 105        }, {
 106                .policy         = "writeback",
 107                .cr_mask        = 0,
 108                .pmd            = PMD_SECT_WB,
 109                .pte            = L_PTE_MT_WRITEBACK,
 110                .pte_s2         = s2_policy(L_PTE_S2_MT_WRITEBACK),
 111        }, {
 112                .policy         = "writealloc",
 113                .cr_mask        = 0,
 114                .pmd            = PMD_SECT_WBWA,
 115                .pte            = L_PTE_MT_WRITEALLOC,
 116                .pte_s2         = s2_policy(L_PTE_S2_MT_WRITEBACK),
 117        }
 118};
 119
 120#ifdef CONFIG_CPU_CP15
 121static unsigned long initial_pmd_value __initdata = 0;
 122
 123/*
 124 * Initialise the cache_policy variable with the initial state specified
 125 * via the "pmd" value.  This is used to ensure that on ARMv6 and later,
 126 * the C code sets the page tables up with the same policy as the head
 127 * assembly code, which avoids an illegal state where the TLBs can get
 128 * confused.  See comments in early_cachepolicy() for more information.
 129 */
 130void __init init_default_cache_policy(unsigned long pmd)
 131{
 132        int i;
 133
 134        initial_pmd_value = pmd;
 135
 136        pmd &= PMD_SECT_TEX(1) | PMD_SECT_BUFFERABLE | PMD_SECT_CACHEABLE;
 137
 138        for (i = 0; i < ARRAY_SIZE(cache_policies); i++)
 139                if (cache_policies[i].pmd == pmd) {
 140                        cachepolicy = i;
 141                        break;
 142                }
 143
 144        if (i == ARRAY_SIZE(cache_policies))
 145                pr_err("ERROR: could not find cache policy\n");
 146}
 147
 148/*
 149 * These are useful for identifying cache coherency problems by allowing
 150 * the cache or the cache and writebuffer to be turned off.  (Note: the
 151 * write buffer should not be on and the cache off).
 152 */
 153static int __init early_cachepolicy(char *p)
 154{
 155        int i, selected = -1;
 156
 157        for (i = 0; i < ARRAY_SIZE(cache_policies); i++) {
 158                int len = strlen(cache_policies[i].policy);
 159
 160                if (memcmp(p, cache_policies[i].policy, len) == 0) {
 161                        selected = i;
 162                        break;
 163                }
 164        }
 165
 166        if (selected == -1)
 167                pr_err("ERROR: unknown or unsupported cache policy\n");
 168
 169        /*
 170         * This restriction is partly to do with the way we boot; it is
 171         * unpredictable to have memory mapped using two different sets of
 172         * memory attributes (shared, type, and cache attribs).  We can not
 173         * change these attributes once the initial assembly has setup the
 174         * page tables.
 175         */
 176        if (cpu_architecture() >= CPU_ARCH_ARMv6 && selected != cachepolicy) {
 177                pr_warn("Only cachepolicy=%s supported on ARMv6 and later\n",
 178                        cache_policies[cachepolicy].policy);
 179                return 0;
 180        }
 181
 182        if (selected != cachepolicy) {
 183                unsigned long cr = __clear_cr(cache_policies[selected].cr_mask);
 184                cachepolicy = selected;
 185                flush_cache_all();
 186                set_cr(cr);
 187        }
 188        return 0;
 189}
 190early_param("cachepolicy", early_cachepolicy);
 191
 192static int __init early_nocache(char *__unused)
 193{
 194        char *p = "buffered";
 195        printk(KERN_WARNING "nocache is deprecated; use cachepolicy=%s\n", p);
 196        early_cachepolicy(p);
 197        return 0;
 198}
 199early_param("nocache", early_nocache);
 200
 201static int __init early_nowrite(char *__unused)
 202{
 203        char *p = "uncached";
 204        printk(KERN_WARNING "nowb is deprecated; use cachepolicy=%s\n", p);
 205        early_cachepolicy(p);
 206        return 0;
 207}
 208early_param("nowb", early_nowrite);
 209
 210#ifndef CONFIG_ARM_LPAE
 211static int __init early_ecc(char *p)
 212{
 213        if (memcmp(p, "on", 2) == 0)
 214                ecc_mask = PMD_PROTECTION;
 215        else if (memcmp(p, "off", 3) == 0)
 216                ecc_mask = 0;
 217        return 0;
 218}
 219early_param("ecc", early_ecc);
 220#endif
 221
 222#else /* ifdef CONFIG_CPU_CP15 */
 223
 224static int __init early_cachepolicy(char *p)
 225{
 226        pr_warn("cachepolicy kernel parameter not supported without cp15\n");
 227}
 228early_param("cachepolicy", early_cachepolicy);
 229
 230static int __init noalign_setup(char *__unused)
 231{
 232        pr_warn("noalign kernel parameter not supported without cp15\n");
 233}
 234__setup("noalign", noalign_setup);
 235
 236#endif /* ifdef CONFIG_CPU_CP15 / else */
 237
 238#define PROT_PTE_DEVICE         L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_XN
 239#define PROT_PTE_S2_DEVICE      PROT_PTE_DEVICE
 240#define PROT_SECT_DEVICE        PMD_TYPE_SECT|PMD_SECT_AP_WRITE
 241
 242static struct mem_type mem_types[] = {
 243        [MT_DEVICE] = {           /* Strongly ordered / ARMv6 shared device */
 244                .prot_pte       = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
 245                                  L_PTE_SHARED,
 246                .prot_pte_s2    = s2_policy(PROT_PTE_S2_DEVICE) |
 247                                  s2_policy(L_PTE_S2_MT_DEV_SHARED) |
 248                                  L_PTE_SHARED,
 249                .prot_l1        = PMD_TYPE_TABLE,
 250                .prot_sect      = PROT_SECT_DEVICE | PMD_SECT_S,
 251                .domain         = DOMAIN_IO,
 252        },
 253        [MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */
 254                .prot_pte       = PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED,
 255                .prot_l1        = PMD_TYPE_TABLE,
 256                .prot_sect      = PROT_SECT_DEVICE,
 257                .domain         = DOMAIN_IO,
 258        },
 259        [MT_DEVICE_CACHED] = {    /* ioremap_cached */
 260                .prot_pte       = PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED,
 261                .prot_l1        = PMD_TYPE_TABLE,
 262                .prot_sect      = PROT_SECT_DEVICE | PMD_SECT_WB,
 263                .domain         = DOMAIN_IO,
 264        },
 265        [MT_DEVICE_WC] = {      /* ioremap_wc */
 266                .prot_pte       = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC,
 267                .prot_l1        = PMD_TYPE_TABLE,
 268                .prot_sect      = PROT_SECT_DEVICE,
 269                .domain         = DOMAIN_IO,
 270        },
 271        [MT_UNCACHED] = {
 272                .prot_pte       = PROT_PTE_DEVICE,
 273                .prot_l1        = PMD_TYPE_TABLE,
 274                .prot_sect      = PMD_TYPE_SECT | PMD_SECT_XN,
 275                .domain         = DOMAIN_IO,
 276        },
 277        [MT_CACHECLEAN] = {
 278                .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
 279                .domain    = DOMAIN_KERNEL,
 280        },
 281#ifndef CONFIG_ARM_LPAE
 282        [MT_MINICLEAN] = {
 283                .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE,
 284                .domain    = DOMAIN_KERNEL,
 285        },
 286#endif
 287        [MT_LOW_VECTORS] = {
 288                .prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
 289                                L_PTE_RDONLY,
 290                .prot_l1   = PMD_TYPE_TABLE,
 291                .domain    = DOMAIN_USER,
 292        },
 293        [MT_HIGH_VECTORS] = {
 294                .prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
 295                                L_PTE_USER | L_PTE_RDONLY,
 296                .prot_l1   = PMD_TYPE_TABLE,
 297                .domain    = DOMAIN_USER,
 298        },
 299        [MT_MEMORY_RWX] = {
 300                .prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
 301                .prot_l1   = PMD_TYPE_TABLE,
 302                .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
 303                .domain    = DOMAIN_KERNEL,
 304        },
 305        [MT_MEMORY_RW] = {
 306                .prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
 307                             L_PTE_XN,
 308                .prot_l1   = PMD_TYPE_TABLE,
 309                .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
 310                .domain    = DOMAIN_KERNEL,
 311        },
 312        [MT_ROM] = {
 313                .prot_sect = PMD_TYPE_SECT,
 314                .domain    = DOMAIN_KERNEL,
 315        },
 316        [MT_MEMORY_RWX_NONCACHED] = {
 317                .prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
 318                                L_PTE_MT_BUFFERABLE,
 319                .prot_l1   = PMD_TYPE_TABLE,
 320                .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
 321                .domain    = DOMAIN_KERNEL,
 322        },
 323        [MT_MEMORY_RW_DTCM] = {
 324                .prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
 325                                L_PTE_XN,
 326                .prot_l1   = PMD_TYPE_TABLE,
 327                .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
 328                .domain    = DOMAIN_KERNEL,
 329        },
 330        [MT_MEMORY_RWX_ITCM] = {
 331                .prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
 332                .prot_l1   = PMD_TYPE_TABLE,
 333                .domain    = DOMAIN_KERNEL,
 334        },
 335        [MT_MEMORY_RW_SO] = {
 336                .prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
 337                                L_PTE_MT_UNCACHED | L_PTE_XN,
 338                .prot_l1   = PMD_TYPE_TABLE,
 339                .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_S |
 340                                PMD_SECT_UNCACHED | PMD_SECT_XN,
 341                .domain    = DOMAIN_KERNEL,
 342        },
 343        [MT_MEMORY_DMA_READY] = {
 344                .prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
 345                                L_PTE_XN,
 346                .prot_l1   = PMD_TYPE_TABLE,
 347                .domain    = DOMAIN_KERNEL,
 348        },
 349};
 350
 351const struct mem_type *get_mem_type(unsigned int type)
 352{
 353        return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL;
 354}
 355EXPORT_SYMBOL(get_mem_type);
 356
 357#define PTE_SET_FN(_name, pteop) \
 358static int pte_set_##_name(pte_t *ptep, pgtable_t token, unsigned long addr, \
 359                        void *data) \
 360{ \
 361        pte_t pte = pteop(*ptep); \
 362\
 363        set_pte_ext(ptep, pte, 0); \
 364        return 0; \
 365} \
 366
 367#define SET_MEMORY_FN(_name, callback) \
 368int set_memory_##_name(unsigned long addr, int numpages) \
 369{ \
 370        unsigned long start = addr; \
 371        unsigned long size = PAGE_SIZE*numpages; \
 372        unsigned end = start + size; \
 373\
 374        if (start < MODULES_VADDR || start >= MODULES_END) \
 375                return -EINVAL;\
 376\
 377        if (end < MODULES_VADDR || end >= MODULES_END) \
 378                return -EINVAL; \
 379\
 380        apply_to_page_range(&init_mm, start, size, callback, NULL); \
 381        flush_tlb_kernel_range(start, end); \
 382        return 0;\
 383}
 384
 385PTE_SET_FN(ro, pte_wrprotect)
 386PTE_SET_FN(rw, pte_mkwrite)
 387PTE_SET_FN(x, pte_mkexec)
 388PTE_SET_FN(nx, pte_mknexec)
 389
 390SET_MEMORY_FN(ro, pte_set_ro)
 391SET_MEMORY_FN(rw, pte_set_rw)
 392SET_MEMORY_FN(x, pte_set_x)
 393SET_MEMORY_FN(nx, pte_set_nx)
 394
 395/*
 396 * Adjust the PMD section entries according to the CPU in use.
 397 */
 398static void __init build_mem_type_table(void)
 399{
 400        struct cachepolicy *cp;
 401        unsigned int cr = get_cr();
 402        pteval_t user_pgprot, kern_pgprot, vecs_pgprot;
 403        pteval_t hyp_device_pgprot, s2_pgprot, s2_device_pgprot;
 404        int cpu_arch = cpu_architecture();
 405        int i;
 406
 407        if (cpu_arch < CPU_ARCH_ARMv6) {
 408#if defined(CONFIG_CPU_DCACHE_DISABLE)
 409                if (cachepolicy > CPOLICY_BUFFERED)
 410                        cachepolicy = CPOLICY_BUFFERED;
 411#elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH)
 412                if (cachepolicy > CPOLICY_WRITETHROUGH)
 413                        cachepolicy = CPOLICY_WRITETHROUGH;
 414#endif
 415        }
 416        if (cpu_arch < CPU_ARCH_ARMv5) {
 417                if (cachepolicy >= CPOLICY_WRITEALLOC)
 418                        cachepolicy = CPOLICY_WRITEBACK;
 419                ecc_mask = 0;
 420        }
 421
 422        if (is_smp()) {
 423                if (cachepolicy != CPOLICY_WRITEALLOC) {
 424                        pr_warn("Forcing write-allocate cache policy for SMP\n");
 425                        cachepolicy = CPOLICY_WRITEALLOC;
 426                }
 427                if (!(initial_pmd_value & PMD_SECT_S)) {
 428                        pr_warn("Forcing shared mappings for SMP\n");
 429                        initial_pmd_value |= PMD_SECT_S;
 430                }
 431        }
 432
 433        /*
 434         * Strip out features not present on earlier architectures.
 435         * Pre-ARMv5 CPUs don't have TEX bits.  Pre-ARMv6 CPUs or those
 436         * without extended page tables don't have the 'Shared' bit.
 437         */
 438        if (cpu_arch < CPU_ARCH_ARMv5)
 439                for (i = 0; i < ARRAY_SIZE(mem_types); i++)
 440                        mem_types[i].prot_sect &= ~PMD_SECT_TEX(7);
 441        if ((cpu_arch < CPU_ARCH_ARMv6 || !(cr & CR_XP)) && !cpu_is_xsc3())
 442                for (i = 0; i < ARRAY_SIZE(mem_types); i++)
 443                        mem_types[i].prot_sect &= ~PMD_SECT_S;
 444
 445        /*
 446         * ARMv5 and lower, bit 4 must be set for page tables (was: cache
 447         * "update-able on write" bit on ARM610).  However, Xscale and
 448         * Xscale3 require this bit to be cleared.
 449         */
 450        if (cpu_is_xscale() || cpu_is_xsc3()) {
 451                for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
 452                        mem_types[i].prot_sect &= ~PMD_BIT4;
 453                        mem_types[i].prot_l1 &= ~PMD_BIT4;
 454                }
 455        } else if (cpu_arch < CPU_ARCH_ARMv6) {
 456                for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
 457                        if (mem_types[i].prot_l1)
 458                                mem_types[i].prot_l1 |= PMD_BIT4;
 459                        if (mem_types[i].prot_sect)
 460                                mem_types[i].prot_sect |= PMD_BIT4;
 461                }
 462        }
 463
 464        /*
 465         * Mark the device areas according to the CPU/architecture.
 466         */
 467        if (cpu_is_xsc3() || (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP))) {
 468                if (!cpu_is_xsc3()) {
 469                        /*
 470                         * Mark device regions on ARMv6+ as execute-never
 471                         * to prevent speculative instruction fetches.
 472                         */
 473                        mem_types[MT_DEVICE].prot_sect |= PMD_SECT_XN;
 474                        mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN;
 475                        mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN;
 476                        mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN;
 477
 478                        /* Also setup NX memory mapping */
 479                        mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_XN;
 480                }
 481                if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
 482                        /*
 483                         * For ARMv7 with TEX remapping,
 484                         * - shared device is SXCB=1100
 485                         * - nonshared device is SXCB=0100
 486                         * - write combine device mem is SXCB=0001
 487                         * (Uncached Normal memory)
 488                         */
 489                        mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1);
 490                        mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(1);
 491                        mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
 492                } else if (cpu_is_xsc3()) {
 493                        /*
 494                         * For Xscale3,
 495                         * - shared device is TEXCB=00101
 496                         * - nonshared device is TEXCB=01000
 497                         * - write combine device mem is TEXCB=00100
 498                         * (Inner/Outer Uncacheable in xsc3 parlance)
 499                         */
 500                        mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1) | PMD_SECT_BUFFERED;
 501                        mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
 502                        mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
 503                } else {
 504                        /*
 505                         * For ARMv6 and ARMv7 without TEX remapping,
 506                         * - shared device is TEXCB=00001
 507                         * - nonshared device is TEXCB=01000
 508                         * - write combine device mem is TEXCB=00100
 509                         * (Uncached Normal in ARMv6 parlance).
 510                         */
 511                        mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED;
 512                        mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
 513                        mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
 514                }
 515        } else {
 516                /*
 517                 * On others, write combining is "Uncached/Buffered"
 518                 */
 519                mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
 520        }
 521
 522        /*
 523         * Now deal with the memory-type mappings
 524         */
 525        cp = &cache_policies[cachepolicy];
 526        vecs_pgprot = kern_pgprot = user_pgprot = cp->pte;
 527        s2_pgprot = cp->pte_s2;
 528        hyp_device_pgprot = mem_types[MT_DEVICE].prot_pte;
 529        s2_device_pgprot = mem_types[MT_DEVICE].prot_pte_s2;
 530
 531        /*
 532         * We don't use domains on ARMv6 (since this causes problems with
 533         * v6/v7 kernels), so we must use a separate memory type for user
 534         * r/o, kernel r/w to map the vectors page.
 535         */
 536#ifndef CONFIG_ARM_LPAE
 537        if (cpu_arch == CPU_ARCH_ARMv6)
 538                vecs_pgprot |= L_PTE_MT_VECTORS;
 539#endif
 540
 541        /*
 542         * ARMv6 and above have extended page tables.
 543         */
 544        if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) {
 545#ifndef CONFIG_ARM_LPAE
 546                /*
 547                 * Mark cache clean areas and XIP ROM read only
 548                 * from SVC mode and no access from userspace.
 549                 */
 550                mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
 551                mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
 552                mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
 553#endif
 554
 555                /*
 556                 * If the initial page tables were created with the S bit
 557                 * set, then we need to do the same here for the same
 558                 * reasons given in early_cachepolicy().
 559                 */
 560                if (initial_pmd_value & PMD_SECT_S) {
 561                        user_pgprot |= L_PTE_SHARED;
 562                        kern_pgprot |= L_PTE_SHARED;
 563                        vecs_pgprot |= L_PTE_SHARED;
 564                        s2_pgprot |= L_PTE_SHARED;
 565                        mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_S;
 566                        mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED;
 567                        mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S;
 568                        mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED;
 569                        mem_types[MT_MEMORY_RWX].prot_sect |= PMD_SECT_S;
 570                        mem_types[MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED;
 571                        mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_S;
 572                        mem_types[MT_MEMORY_RW].prot_pte |= L_PTE_SHARED;
 573                        mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED;
 574                        mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_S;
 575                        mem_types[MT_MEMORY_RWX_NONCACHED].prot_pte |= L_PTE_SHARED;
 576                }
 577        }
 578
 579        /*
 580         * Non-cacheable Normal - intended for memory areas that must
 581         * not cause dirty cache line writebacks when used
 582         */
 583        if (cpu_arch >= CPU_ARCH_ARMv6) {
 584                if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
 585                        /* Non-cacheable Normal is XCB = 001 */
 586                        mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |=
 587                                PMD_SECT_BUFFERED;
 588                } else {
 589                        /* For both ARMv6 and non-TEX-remapping ARMv7 */
 590                        mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |=
 591                                PMD_SECT_TEX(1);
 592                }
 593        } else {
 594                mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;
 595        }
 596
 597#ifdef CONFIG_ARM_LPAE
 598        /*
 599         * Do not generate access flag faults for the kernel mappings.
 600         */
 601        for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
 602                mem_types[i].prot_pte |= PTE_EXT_AF;
 603                if (mem_types[i].prot_sect)
 604                        mem_types[i].prot_sect |= PMD_SECT_AF;
 605        }
 606        kern_pgprot |= PTE_EXT_AF;
 607        vecs_pgprot |= PTE_EXT_AF;
 608#endif
 609
 610        for (i = 0; i < 16; i++) {
 611                pteval_t v = pgprot_val(protection_map[i]);
 612                protection_map[i] = __pgprot(v | user_pgprot);
 613        }
 614
 615        mem_types[MT_LOW_VECTORS].prot_pte |= vecs_pgprot;
 616        mem_types[MT_HIGH_VECTORS].prot_pte |= vecs_pgprot;
 617
 618        pgprot_user   = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
 619        pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
 620                                 L_PTE_DIRTY | kern_pgprot);
 621        pgprot_s2  = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | s2_pgprot);
 622        pgprot_s2_device  = __pgprot(s2_device_pgprot);
 623        pgprot_hyp_device  = __pgprot(hyp_device_pgprot);
 624
 625        mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
 626        mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
 627        mem_types[MT_MEMORY_RWX].prot_sect |= ecc_mask | cp->pmd;
 628        mem_types[MT_MEMORY_RWX].prot_pte |= kern_pgprot;
 629        mem_types[MT_MEMORY_RW].prot_sect |= ecc_mask | cp->pmd;
 630        mem_types[MT_MEMORY_RW].prot_pte |= kern_pgprot;
 631        mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot;
 632        mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= ecc_mask;
 633        mem_types[MT_ROM].prot_sect |= cp->pmd;
 634
 635        switch (cp->pmd) {
 636        case PMD_SECT_WT:
 637                mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT;
 638                break;
 639        case PMD_SECT_WB:
 640        case PMD_SECT_WBWA:
 641                mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB;
 642                break;
 643        }
 644        pr_info("Memory policy: %sData cache %s\n",
 645                ecc_mask ? "ECC enabled, " : "", cp->policy);
 646
 647        for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
 648                struct mem_type *t = &mem_types[i];
 649                if (t->prot_l1)
 650                        t->prot_l1 |= PMD_DOMAIN(t->domain);
 651                if (t->prot_sect)
 652                        t->prot_sect |= PMD_DOMAIN(t->domain);
 653        }
 654}
 655
 656#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
 657pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 658                              unsigned long size, pgprot_t vma_prot)
 659{
 660        if (!pfn_valid(pfn))
 661                return pgprot_noncached(vma_prot);
 662        else if (file->f_flags & O_SYNC)
 663                return pgprot_writecombine(vma_prot);
 664        return vma_prot;
 665}
 666EXPORT_SYMBOL(phys_mem_access_prot);
 667#endif
 668
 669#define vectors_base()  (vectors_high() ? 0xffff0000 : 0)
 670
 671static void __init *early_alloc_aligned(unsigned long sz, unsigned long align)
 672{
 673        void *ptr = __va(memblock_alloc(sz, align));
 674        memset(ptr, 0, sz);
 675        return ptr;
 676}
 677
 678static void __init *early_alloc(unsigned long sz)
 679{
 680        return early_alloc_aligned(sz, sz);
 681}
 682
 683static pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr, unsigned long prot)
 684{
 685        if (pmd_none(*pmd)) {
 686                pte_t *pte = early_alloc(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE);
 687                __pmd_populate(pmd, __pa(pte), prot);
 688        }
 689        BUG_ON(pmd_bad(*pmd));
 690        return pte_offset_kernel(pmd, addr);
 691}
 692
 693static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
 694                                  unsigned long end, unsigned long pfn,
 695                                  const struct mem_type *type)
 696{
 697        pte_t *pte = early_pte_alloc(pmd, addr, type->prot_l1);
 698        do {
 699                set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), 0);
 700                pfn++;
 701        } while (pte++, addr += PAGE_SIZE, addr != end);
 702}
 703
 704static void __init __map_init_section(pmd_t *pmd, unsigned long addr,
 705                        unsigned long end, phys_addr_t phys,
 706                        const struct mem_type *type)
 707{
 708        pmd_t *p = pmd;
 709
 710#ifndef CONFIG_ARM_LPAE
 711        /*
 712         * In classic MMU format, puds and pmds are folded in to
 713         * the pgds. pmd_offset gives the PGD entry. PGDs refer to a
 714         * group of L1 entries making up one logical pointer to
 715         * an L2 table (2MB), where as PMDs refer to the individual
 716         * L1 entries (1MB). Hence increment to get the correct
 717         * offset for odd 1MB sections.
 718         * (See arch/arm/include/asm/pgtable-2level.h)
 719         */
 720        if (addr & SECTION_SIZE)
 721                pmd++;
 722#endif
 723        do {
 724                *pmd = __pmd(phys | type->prot_sect);
 725                phys += SECTION_SIZE;
 726        } while (pmd++, addr += SECTION_SIZE, addr != end);
 727
 728        flush_pmd_entry(p);
 729}
 730
 731static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
 732                                      unsigned long end, phys_addr_t phys,
 733                                      const struct mem_type *type)
 734{
 735        pmd_t *pmd = pmd_offset(pud, addr);
 736        unsigned long next;
 737
 738        do {
 739                /*
 740                 * With LPAE, we must loop over to map
 741                 * all the pmds for the given range.
 742                 */
 743                next = pmd_addr_end(addr, end);
 744
 745                /*
 746                 * Try a section mapping - addr, next and phys must all be
 747                 * aligned to a section boundary.
 748                 */
 749                if (type->prot_sect &&
 750                                ((addr | next | phys) & ~SECTION_MASK) == 0) {
 751                        __map_init_section(pmd, addr, next, phys, type);
 752                } else {
 753                        alloc_init_pte(pmd, addr, next,
 754                                                __phys_to_pfn(phys), type);
 755                }
 756
 757                phys += next - addr;
 758
 759        } while (pmd++, addr = next, addr != end);
 760}
 761
 762static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
 763                                  unsigned long end, phys_addr_t phys,
 764                                  const struct mem_type *type)
 765{
 766        pud_t *pud = pud_offset(pgd, addr);
 767        unsigned long next;
 768
 769        do {
 770                next = pud_addr_end(addr, end);
 771                alloc_init_pmd(pud, addr, next, phys, type);
 772                phys += next - addr;
 773        } while (pud++, addr = next, addr != end);
 774}
 775
 776#ifndef CONFIG_ARM_LPAE
 777static void __init create_36bit_mapping(struct map_desc *md,
 778                                        const struct mem_type *type)
 779{
 780        unsigned long addr, length, end;
 781        phys_addr_t phys;
 782        pgd_t *pgd;
 783
 784        addr = md->virtual;
 785        phys = __pfn_to_phys(md->pfn);
 786        length = PAGE_ALIGN(md->length);
 787
 788        if (!(cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())) {
 789                printk(KERN_ERR "MM: CPU does not support supersection "
 790                       "mapping for 0x%08llx at 0x%08lx\n",
 791                       (long long)__pfn_to_phys((u64)md->pfn), addr);
 792                return;
 793        }
 794
 795        /* N.B. ARMv6 supersections are only defined to work with domain 0.
 796         *      Since domain assignments can in fact be arbitrary, the
 797         *      'domain == 0' check below is required to insure that ARMv6
 798         *      supersections are only allocated for domain 0 regardless
 799         *      of the actual domain assignments in use.
 800         */
 801        if (type->domain) {
 802                printk(KERN_ERR "MM: invalid domain in supersection "
 803                       "mapping for 0x%08llx at 0x%08lx\n",
 804                       (long long)__pfn_to_phys((u64)md->pfn), addr);
 805                return;
 806        }
 807
 808        if ((addr | length | __pfn_to_phys(md->pfn)) & ~SUPERSECTION_MASK) {
 809                printk(KERN_ERR "MM: cannot create mapping for 0x%08llx"
 810                       " at 0x%08lx invalid alignment\n",
 811                       (long long)__pfn_to_phys((u64)md->pfn), addr);
 812                return;
 813        }
 814
 815        /*
 816         * Shift bits [35:32] of address into bits [23:20] of PMD
 817         * (See ARMv6 spec).
 818         */
 819        phys |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20);
 820
 821        pgd = pgd_offset_k(addr);
 822        end = addr + length;
 823        do {
 824                pud_t *pud = pud_offset(pgd, addr);
 825                pmd_t *pmd = pmd_offset(pud, addr);
 826                int i;
 827
 828                for (i = 0; i < 16; i++)
 829                        *pmd++ = __pmd(phys | type->prot_sect | PMD_SECT_SUPER);
 830
 831                addr += SUPERSECTION_SIZE;
 832                phys += SUPERSECTION_SIZE;
 833                pgd += SUPERSECTION_SIZE >> PGDIR_SHIFT;
 834        } while (addr != end);
 835}
 836#endif  /* !CONFIG_ARM_LPAE */
 837
 838/*
 839 * Create the page directory entries and any necessary
 840 * page tables for the mapping specified by `md'.  We
 841 * are able to cope here with varying sizes and address
 842 * offsets, and we take full advantage of sections and
 843 * supersections.
 844 */
 845static void __init create_mapping(struct map_desc *md)
 846{
 847        unsigned long addr, length, end;
 848        phys_addr_t phys;
 849        const struct mem_type *type;
 850        pgd_t *pgd;
 851
 852        if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) {
 853                printk(KERN_WARNING "BUG: not creating mapping for 0x%08llx"
 854                       " at 0x%08lx in user region\n",
 855                       (long long)__pfn_to_phys((u64)md->pfn), md->virtual);
 856                return;
 857        }
 858
 859        if ((md->type == MT_DEVICE || md->type == MT_ROM) &&
 860            md->virtual >= PAGE_OFFSET &&
 861            (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) {
 862                printk(KERN_WARNING "BUG: mapping for 0x%08llx"
 863                       " at 0x%08lx out of vmalloc space\n",
 864                       (long long)__pfn_to_phys((u64)md->pfn), md->virtual);
 865        }
 866
 867        type = &mem_types[md->type];
 868
 869#ifndef CONFIG_ARM_LPAE
 870        /*
 871         * Catch 36-bit addresses
 872         */
 873        if (md->pfn >= 0x100000) {
 874                create_36bit_mapping(md, type);
 875                return;
 876        }
 877#endif
 878
 879        addr = md->virtual & PAGE_MASK;
 880        phys = __pfn_to_phys(md->pfn);
 881        length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
 882
 883        if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) {
 884                printk(KERN_WARNING "BUG: map for 0x%08llx at 0x%08lx can not "
 885                       "be mapped using pages, ignoring.\n",
 886                       (long long)__pfn_to_phys(md->pfn), addr);
 887                return;
 888        }
 889
 890        pgd = pgd_offset_k(addr);
 891        end = addr + length;
 892        do {
 893                unsigned long next = pgd_addr_end(addr, end);
 894
 895                alloc_init_pud(pgd, addr, next, phys, type);
 896
 897                phys += next - addr;
 898                addr = next;
 899        } while (pgd++, addr != end);
 900}
 901
 902/*
 903 * Create the architecture specific mappings
 904 */
 905void __init iotable_init(struct map_desc *io_desc, int nr)
 906{
 907        struct map_desc *md;
 908        struct vm_struct *vm;
 909        struct static_vm *svm;
 910
 911        if (!nr)
 912                return;
 913
 914        svm = early_alloc_aligned(sizeof(*svm) * nr, __alignof__(*svm));
 915
 916        for (md = io_desc; nr; md++, nr--) {
 917                create_mapping(md);
 918
 919                vm = &svm->vm;
 920                vm->addr = (void *)(md->virtual & PAGE_MASK);
 921                vm->size = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
 922                vm->phys_addr = __pfn_to_phys(md->pfn);
 923                vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING;
 924                vm->flags |= VM_ARM_MTYPE(md->type);
 925                vm->caller = iotable_init;
 926                add_static_vm_early(svm++);
 927        }
 928}
 929
 930void __init vm_reserve_area_early(unsigned long addr, unsigned long size,
 931                                  void *caller)
 932{
 933        struct vm_struct *vm;
 934        struct static_vm *svm;
 935
 936        svm = early_alloc_aligned(sizeof(*svm), __alignof__(*svm));
 937
 938        vm = &svm->vm;
 939        vm->addr = (void *)addr;
 940        vm->size = size;
 941        vm->flags = VM_IOREMAP | VM_ARM_EMPTY_MAPPING;
 942        vm->caller = caller;
 943        add_static_vm_early(svm);
 944}
 945
 946#ifndef CONFIG_ARM_LPAE
 947
 948/*
 949 * The Linux PMD is made of two consecutive section entries covering 2MB
 950 * (see definition in include/asm/pgtable-2level.h).  However a call to
 951 * create_mapping() may optimize static mappings by using individual
 952 * 1MB section mappings.  This leaves the actual PMD potentially half
 953 * initialized if the top or bottom section entry isn't used, leaving it
 954 * open to problems if a subsequent ioremap() or vmalloc() tries to use
 955 * the virtual space left free by that unused section entry.
 956 *
 957 * Let's avoid the issue by inserting dummy vm entries covering the unused
 958 * PMD halves once the static mappings are in place.
 959 */
 960
 961static void __init pmd_empty_section_gap(unsigned long addr)
 962{
 963        vm_reserve_area_early(addr, SECTION_SIZE, pmd_empty_section_gap);
 964}
 965
 966static void __init fill_pmd_gaps(void)
 967{
 968        struct static_vm *svm;
 969        struct vm_struct *vm;
 970        unsigned long addr, next = 0;
 971        pmd_t *pmd;
 972
 973        list_for_each_entry(svm, &static_vmlist, list) {
 974                vm = &svm->vm;
 975                addr = (unsigned long)vm->addr;
 976                if (addr < next)
 977                        continue;
 978
 979                /*
 980                 * Check if this vm starts on an odd section boundary.
 981                 * If so and the first section entry for this PMD is free
 982                 * then we block the corresponding virtual address.
 983                 */
 984                if ((addr & ~PMD_MASK) == SECTION_SIZE) {
 985                        pmd = pmd_off_k(addr);
 986                        if (pmd_none(*pmd))
 987                                pmd_empty_section_gap(addr & PMD_MASK);
 988                }
 989
 990                /*
 991                 * Then check if this vm ends on an odd section boundary.
 992                 * If so and the second section entry for this PMD is empty
 993                 * then we block the corresponding virtual address.
 994                 */
 995                addr += vm->size;
 996                if ((addr & ~PMD_MASK) == SECTION_SIZE) {
 997                        pmd = pmd_off_k(addr) + 1;
 998                        if (pmd_none(*pmd))
 999                                pmd_empty_section_gap(addr);
1000                }
1001
1002                /* no need to look at any vm entry until we hit the next PMD */
1003                next = (addr + PMD_SIZE - 1) & PMD_MASK;
1004        }
1005}
1006
1007#else
1008#define fill_pmd_gaps() do { } while (0)
1009#endif
1010
1011#if defined(CONFIG_PCI) && !defined(CONFIG_NEED_MACH_IO_H)
1012static void __init pci_reserve_io(void)
1013{
1014        struct static_vm *svm;
1015
1016        svm = find_static_vm_vaddr((void *)PCI_IO_VIRT_BASE);
1017        if (svm)
1018                return;
1019
1020        vm_reserve_area_early(PCI_IO_VIRT_BASE, SZ_2M, pci_reserve_io);
1021}
1022#else
1023#define pci_reserve_io() do { } while (0)
1024#endif
1025
1026#ifdef CONFIG_DEBUG_LL
1027void __init debug_ll_io_init(void)
1028{
1029        struct map_desc map;
1030
1031        debug_ll_addr(&map.pfn, &map.virtual);
1032        if (!map.pfn || !map.virtual)
1033                return;
1034        map.pfn = __phys_to_pfn(map.pfn);
1035        map.virtual &= PAGE_MASK;
1036        map.length = PAGE_SIZE;
1037        map.type = MT_DEVICE;
1038        iotable_init(&map, 1);
1039}
1040#endif
1041
1042static void * __initdata vmalloc_min =
1043        (void *)(VMALLOC_END - (240 << 20) - VMALLOC_OFFSET);
1044
1045/*
1046 * vmalloc=size forces the vmalloc area to be exactly 'size'
1047 * bytes. This can be used to increase (or decrease) the vmalloc
1048 * area - the default is 240m.
1049 */
1050static int __init early_vmalloc(char *arg)
1051{
1052        unsigned long vmalloc_reserve = memparse(arg, NULL);
1053
1054        if (vmalloc_reserve < SZ_16M) {
1055                vmalloc_reserve = SZ_16M;
1056                printk(KERN_WARNING
1057                        "vmalloc area too small, limiting to %luMB\n",
1058                        vmalloc_reserve >> 20);
1059        }
1060
1061        if (vmalloc_reserve > VMALLOC_END - (PAGE_OFFSET + SZ_32M)) {
1062                vmalloc_reserve = VMALLOC_END - (PAGE_OFFSET + SZ_32M);
1063                printk(KERN_WARNING
1064                        "vmalloc area is too big, limiting to %luMB\n",
1065                        vmalloc_reserve >> 20);
1066        }
1067
1068        vmalloc_min = (void *)(VMALLOC_END - vmalloc_reserve);
1069        return 0;
1070}
1071early_param("vmalloc", early_vmalloc);
1072
1073phys_addr_t arm_lowmem_limit __initdata = 0;
1074
1075void __init sanity_check_meminfo(void)
1076{
1077        phys_addr_t memblock_limit = 0;
1078        int highmem = 0;
1079        phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1;
1080        struct memblock_region *reg;
1081
1082        for_each_memblock(memory, reg) {
1083                phys_addr_t block_start = reg->base;
1084                phys_addr_t block_end = reg->base + reg->size;
1085                phys_addr_t size_limit = reg->size;
1086
1087                if (reg->base >= vmalloc_limit)
1088                        highmem = 1;
1089                else
1090                        size_limit = vmalloc_limit - reg->base;
1091
1092
1093                if (!IS_ENABLED(CONFIG_HIGHMEM) || cache_is_vipt_aliasing()) {
1094
1095                        if (highmem) {
1096                                pr_notice("Ignoring RAM at %pa-%pa (!CONFIG_HIGHMEM)\n",
1097                                        &block_start, &block_end);
1098                                memblock_remove(reg->base, reg->size);
1099                                continue;
1100                        }
1101
1102                        if (reg->size > size_limit) {
1103                                phys_addr_t overlap_size = reg->size - size_limit;
1104
1105                                pr_notice("Truncating RAM at %pa-%pa to -%pa",
1106                                      &block_start, &block_end, &vmalloc_limit);
1107                                memblock_remove(vmalloc_limit, overlap_size);
1108                                block_end = vmalloc_limit;
1109                        }
1110                }
1111
1112                if (!highmem) {
1113                        if (block_end > arm_lowmem_limit) {
1114                                if (reg->size > size_limit)
1115                                        arm_lowmem_limit = vmalloc_limit;
1116                                else
1117                                        arm_lowmem_limit = block_end;
1118                        }
1119
1120                        /*
1121                         * Find the first non-section-aligned page, and point
1122                         * memblock_limit at it. This relies on rounding the
1123                         * limit down to be section-aligned, which happens at
1124                         * the end of this function.
1125                         *
1126                         * With this algorithm, the start or end of almost any
1127                         * bank can be non-section-aligned. The only exception
1128                         * is that the start of the bank 0 must be section-
1129                         * aligned, since otherwise memory would need to be
1130                         * allocated when mapping the start of bank 0, which
1131                         * occurs before any free memory is mapped.
1132                         */
1133                        if (!memblock_limit) {
1134                                if (!IS_ALIGNED(block_start, SECTION_SIZE))
1135                                        memblock_limit = block_start;
1136                                else if (!IS_ALIGNED(block_end, SECTION_SIZE))
1137                                        memblock_limit = arm_lowmem_limit;
1138                        }
1139
1140                }
1141        }
1142
1143        high_memory = __va(arm_lowmem_limit - 1) + 1;
1144
1145        /*
1146         * Round the memblock limit down to a section size.  This
1147         * helps to ensure that we will allocate memory from the
1148         * last full section, which should be mapped.
1149         */
1150        if (memblock_limit)
1151                memblock_limit = round_down(memblock_limit, SECTION_SIZE);
1152        if (!memblock_limit)
1153                memblock_limit = arm_lowmem_limit;
1154
1155        memblock_set_current_limit(memblock_limit);
1156}
1157
1158static inline void prepare_page_table(void)
1159{
1160        unsigned long addr;
1161        phys_addr_t end;
1162
1163        /*
1164         * Clear out all the mappings below the kernel image.
1165         */
1166        for (addr = 0; addr < MODULES_VADDR; addr += PMD_SIZE)
1167                pmd_clear(pmd_off_k(addr));
1168
1169#ifdef CONFIG_XIP_KERNEL
1170        /* The XIP kernel is mapped in the module area -- skip over it */
1171        addr = ((unsigned long)_etext + PMD_SIZE - 1) & PMD_MASK;
1172#endif
1173        for ( ; addr < PAGE_OFFSET; addr += PMD_SIZE)
1174                pmd_clear(pmd_off_k(addr));
1175
1176        /*
1177         * Find the end of the first block of lowmem.
1178         */
1179        end = memblock.memory.regions[0].base + memblock.memory.regions[0].size;
1180        if (end >= arm_lowmem_limit)
1181                end = arm_lowmem_limit;
1182
1183        /*
1184         * Clear out all the kernel space mappings, except for the first
1185         * memory bank, up to the vmalloc region.
1186         */
1187        for (addr = __phys_to_virt(end);
1188             addr < VMALLOC_START; addr += PMD_SIZE)
1189                pmd_clear(pmd_off_k(addr));
1190}
1191
1192#ifdef CONFIG_ARM_LPAE
1193/* the first page is reserved for pgd */
1194#define SWAPPER_PG_DIR_SIZE     (PAGE_SIZE + \
1195                                 PTRS_PER_PGD * PTRS_PER_PMD * sizeof(pmd_t))
1196#else
1197#define SWAPPER_PG_DIR_SIZE     (PTRS_PER_PGD * sizeof(pgd_t))
1198#endif
1199
1200/*
1201 * Reserve the special regions of memory
1202 */
1203void __init arm_mm_memblock_reserve(void)
1204{
1205        /*
1206         * Reserve the page tables.  These are already in use,
1207         * and can only be in node 0.
1208         */
1209        memblock_reserve(__pa(swapper_pg_dir), SWAPPER_PG_DIR_SIZE);
1210
1211#ifdef CONFIG_SA1111
1212        /*
1213         * Because of the SA1111 DMA bug, we want to preserve our
1214         * precious DMA-able memory...
1215         */
1216        memblock_reserve(PHYS_OFFSET, __pa(swapper_pg_dir) - PHYS_OFFSET);
1217#endif
1218}
1219
1220/*
1221 * Set up the device mappings.  Since we clear out the page tables for all
1222 * mappings above VMALLOC_START, we will remove any debug device mappings.
1223 * This means you have to be careful how you debug this function, or any
1224 * called function.  This means you can't use any function or debugging
1225 * method which may touch any device, otherwise the kernel _will_ crash.
1226 */
1227static void __init devicemaps_init(const struct machine_desc *mdesc)
1228{
1229        struct map_desc map;
1230        unsigned long addr;
1231        void *vectors;
1232
1233        /*
1234         * Allocate the vector page early.
1235         */
1236        vectors = early_alloc(PAGE_SIZE * 2);
1237
1238        early_trap_init(vectors);
1239
1240        for (addr = VMALLOC_START; addr; addr += PMD_SIZE)
1241                pmd_clear(pmd_off_k(addr));
1242
1243        /*
1244         * Map the kernel if it is XIP.
1245         * It is always first in the modulearea.
1246         */
1247#ifdef CONFIG_XIP_KERNEL
1248        map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK);
1249        map.virtual = MODULES_VADDR;
1250        map.length = ((unsigned long)_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK;
1251        map.type = MT_ROM;
1252        create_mapping(&map);
1253#endif
1254
1255        /*
1256         * Map the cache flushing regions.
1257         */
1258#ifdef FLUSH_BASE
1259        map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS);
1260        map.virtual = FLUSH_BASE;
1261        map.length = SZ_1M;
1262        map.type = MT_CACHECLEAN;
1263        create_mapping(&map);
1264#endif
1265#ifdef FLUSH_BASE_MINICACHE
1266        map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + SZ_1M);
1267        map.virtual = FLUSH_BASE_MINICACHE;
1268        map.length = SZ_1M;
1269        map.type = MT_MINICLEAN;
1270        create_mapping(&map);
1271#endif
1272
1273        /*
1274         * Create a mapping for the machine vectors at the high-vectors
1275         * location (0xffff0000).  If we aren't using high-vectors, also
1276         * create a mapping at the low-vectors virtual address.
1277         */
1278        map.pfn = __phys_to_pfn(virt_to_phys(vectors));
1279        map.virtual = 0xffff0000;
1280        map.length = PAGE_SIZE;
1281#ifdef CONFIG_KUSER_HELPERS
1282        map.type = MT_HIGH_VECTORS;
1283#else
1284        map.type = MT_LOW_VECTORS;
1285#endif
1286        create_mapping(&map);
1287
1288        if (!vectors_high()) {
1289                map.virtual = 0;
1290                map.length = PAGE_SIZE * 2;
1291                map.type = MT_LOW_VECTORS;
1292                create_mapping(&map);
1293        }
1294
1295        /* Now create a kernel read-only mapping */
1296        map.pfn += 1;
1297        map.virtual = 0xffff0000 + PAGE_SIZE;
1298        map.length = PAGE_SIZE;
1299        map.type = MT_LOW_VECTORS;
1300        create_mapping(&map);
1301
1302        /*
1303         * Ask the machine support to map in the statically mapped devices.
1304         */
1305        if (mdesc->map_io)
1306                mdesc->map_io();
1307        else
1308                debug_ll_io_init();
1309        fill_pmd_gaps();
1310
1311        /* Reserve fixed i/o space in VMALLOC region */
1312        pci_reserve_io();
1313
1314        /*
1315         * Finally flush the caches and tlb to ensure that we're in a
1316         * consistent state wrt the writebuffer.  This also ensures that
1317         * any write-allocated cache lines in the vector page are written
1318         * back.  After this point, we can start to touch devices again.
1319         */
1320        local_flush_tlb_all();
1321        flush_cache_all();
1322}
1323
1324static void __init kmap_init(void)
1325{
1326#ifdef CONFIG_HIGHMEM
1327        pkmap_page_table = early_pte_alloc(pmd_off_k(PKMAP_BASE),
1328                PKMAP_BASE, _PAGE_KERNEL_TABLE);
1329
1330        fixmap_page_table = early_pte_alloc(pmd_off_k(FIXADDR_START),
1331                FIXADDR_START, _PAGE_KERNEL_TABLE);
1332#endif
1333}
1334
1335static void __init map_lowmem(void)
1336{
1337        struct memblock_region *reg;
1338        unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE);
1339        unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
1340
1341        /* Map all the lowmem memory banks. */
1342        for_each_memblock(memory, reg) {
1343                phys_addr_t start = reg->base;
1344                phys_addr_t end = start + reg->size;
1345                struct map_desc map;
1346
1347                if (end > arm_lowmem_limit)
1348                        end = arm_lowmem_limit;
1349                if (start >= end)
1350                        break;
1351
1352                if (end < kernel_x_start || start >= kernel_x_end) {
1353                        map.pfn = __phys_to_pfn(start);
1354                        map.virtual = __phys_to_virt(start);
1355                        map.length = end - start;
1356                        map.type = MT_MEMORY_RWX;
1357
1358                        create_mapping(&map);
1359                } else {
1360                        /* This better cover the entire kernel */
1361                        if (start < kernel_x_start) {
1362                                map.pfn = __phys_to_pfn(start);
1363                                map.virtual = __phys_to_virt(start);
1364                                map.length = kernel_x_start - start;
1365                                map.type = MT_MEMORY_RW;
1366
1367                                create_mapping(&map);
1368                        }
1369
1370                        map.pfn = __phys_to_pfn(kernel_x_start);
1371                        map.virtual = __phys_to_virt(kernel_x_start);
1372                        map.length = kernel_x_end - kernel_x_start;
1373                        map.type = MT_MEMORY_RWX;
1374
1375                        create_mapping(&map);
1376
1377                        if (kernel_x_end < end) {
1378                                map.pfn = __phys_to_pfn(kernel_x_end);
1379                                map.virtual = __phys_to_virt(kernel_x_end);
1380                                map.length = end - kernel_x_end;
1381                                map.type = MT_MEMORY_RW;
1382
1383                                create_mapping(&map);
1384                        }
1385                }
1386        }
1387}
1388
1389#ifdef CONFIG_ARM_LPAE
1390/*
1391 * early_paging_init() recreates boot time page table setup, allowing machines
1392 * to switch over to a high (>4G) address space on LPAE systems
1393 */
1394void __init early_paging_init(const struct machine_desc *mdesc,
1395                              struct proc_info_list *procinfo)
1396{
1397        pmdval_t pmdprot = procinfo->__cpu_mm_mmu_flags;
1398        unsigned long map_start, map_end;
1399        pgd_t *pgd0, *pgdk;
1400        pud_t *pud0, *pudk, *pud_start;
1401        pmd_t *pmd0, *pmdk;
1402        phys_addr_t phys;
1403        int i;
1404
1405        if (!(mdesc->init_meminfo))
1406                return;
1407
1408        /* remap kernel code and data */
1409        map_start = init_mm.start_code & PMD_MASK;
1410        map_end   = ALIGN(init_mm.brk, PMD_SIZE);
1411
1412        /* get a handle on things... */
1413        pgd0 = pgd_offset_k(0);
1414        pud_start = pud0 = pud_offset(pgd0, 0);
1415        pmd0 = pmd_offset(pud0, 0);
1416
1417        pgdk = pgd_offset_k(map_start);
1418        pudk = pud_offset(pgdk, map_start);
1419        pmdk = pmd_offset(pudk, map_start);
1420
1421        mdesc->init_meminfo();
1422
1423        /* Run the patch stub to update the constants */
1424        fixup_pv_table(&__pv_table_begin,
1425                (&__pv_table_end - &__pv_table_begin) << 2);
1426
1427        /*
1428         * Cache cleaning operations for self-modifying code
1429         * We should clean the entries by MVA but running a
1430         * for loop over every pv_table entry pointer would
1431         * just complicate the code.
1432         */
1433        flush_cache_louis();
1434        dsb(ishst);
1435        isb();
1436
1437        /*
1438         * FIXME: This code is not architecturally compliant: we modify
1439         * the mappings in-place, indeed while they are in use by this
1440         * very same code.  This may lead to unpredictable behaviour of
1441         * the CPU.
1442         *
1443         * Even modifying the mappings in a separate page table does
1444         * not resolve this.
1445         *
1446         * The architecture strongly recommends that when a mapping is
1447         * changed, that it is changed by first going via an invalid
1448         * mapping and back to the new mapping.  This is to ensure that
1449         * no TLB conflicts (caused by the TLB having more than one TLB
1450         * entry match a translation) can occur.  However, doing that
1451         * here will result in unmapping the code we are running.
1452         */
1453        pr_warn("WARNING: unsafe modification of in-place page tables - tainting kernel\n");
1454        add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
1455
1456        /*
1457         * Remap level 1 table.  This changes the physical addresses
1458         * used to refer to the level 2 page tables to the high
1459         * physical address alias, leaving everything else the same.
1460         */
1461        for (i = 0; i < PTRS_PER_PGD; pud0++, i++) {
1462                set_pud(pud0,
1463                        __pud(__pa(pmd0) | PMD_TYPE_TABLE | L_PGD_SWAPPER));
1464                pmd0 += PTRS_PER_PMD;
1465        }
1466
1467        /*
1468         * Remap the level 2 table, pointing the mappings at the high
1469         * physical address alias of these pages.
1470         */
1471        phys = __pa(map_start);
1472        do {
1473                *pmdk++ = __pmd(phys | pmdprot);
1474                phys += PMD_SIZE;
1475        } while (phys < map_end);
1476
1477        /*
1478         * Ensure that the above updates are flushed out of the cache.
1479         * This is not strictly correct; on a system where the caches
1480         * are coherent with each other, but the MMU page table walks
1481         * may not be coherent, flush_cache_all() may be a no-op, and
1482         * this will fail.
1483         */
1484        flush_cache_all();
1485
1486        /*
1487         * Re-write the TTBR values to point them at the high physical
1488         * alias of the page tables.  We expect __va() will work on
1489         * cpu_get_pgd(), which returns the value of TTBR0.
1490         */
1491        cpu_switch_mm(pgd0, &init_mm);
1492        cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET);
1493
1494        /* Finally flush any stale TLB values. */
1495        local_flush_bp_all();
1496        local_flush_tlb_all();
1497}
1498
1499#else
1500
1501void __init early_paging_init(const struct machine_desc *mdesc,
1502                              struct proc_info_list *procinfo)
1503{
1504        if (mdesc->init_meminfo)
1505                mdesc->init_meminfo();
1506}
1507
1508#endif
1509
1510/*
1511 * paging_init() sets up the page tables, initialises the zone memory
1512 * maps, and sets up the zero page, bad page and bad page tables.
1513 */
1514void __init paging_init(const struct machine_desc *mdesc)
1515{
1516        void *zero_page;
1517
1518        build_mem_type_table();
1519        prepare_page_table();
1520        map_lowmem();
1521        dma_contiguous_remap();
1522        devicemaps_init(mdesc);
1523        kmap_init();
1524        tcm_init();
1525
1526        top_pmd = pmd_off_k(0xffff0000);
1527
1528        /* allocate the zero page. */
1529        zero_page = early_alloc(PAGE_SIZE);
1530
1531        bootmem_init();
1532
1533        empty_zero_page = virt_to_page(zero_page);
1534        __flush_dcache_page(NULL, empty_zero_page);
1535}
1536