linux/arch/arm64/mm/mmu.c
<<
>>
Prefs
   1/*
   2 * Based on arch/arm/mm/mmu.c
   3 *
   4 * Copyright (C) 1995-2005 Russell King
   5 * Copyright (C) 2012 ARM Ltd.
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include <linux/cache.h>
  21#include <linux/export.h>
  22#include <linux/kernel.h>
  23#include <linux/errno.h>
  24#include <linux/init.h>
  25#include <linux/ioport.h>
  26#include <linux/kexec.h>
  27#include <linux/libfdt.h>
  28#include <linux/mman.h>
  29#include <linux/nodemask.h>
  30#include <linux/memblock.h>
  31#include <linux/fs.h>
  32#include <linux/io.h>
  33#include <linux/mm.h>
  34#include <linux/vmalloc.h>
  35
  36#include <asm/barrier.h>
  37#include <asm/cputype.h>
  38#include <asm/fixmap.h>
  39#include <asm/kasan.h>
  40#include <asm/kernel-pgtable.h>
  41#include <asm/sections.h>
  42#include <asm/setup.h>
  43#include <asm/sizes.h>
  44#include <asm/tlb.h>
  45#include <asm/memblock.h>
  46#include <asm/mmu_context.h>
  47#include <asm/ptdump.h>
  48
  49#define NO_BLOCK_MAPPINGS       BIT(0)
  50#define NO_CONT_MAPPINGS        BIT(1)
  51
  52u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
  53u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;
  54
  55u64 kimage_voffset __ro_after_init;
  56EXPORT_SYMBOL(kimage_voffset);
  57
  58/*
  59 * Empty_zero_page is a special page that is used for zero-initialized data
  60 * and COW.
  61 */
  62unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
  63EXPORT_SYMBOL(empty_zero_page);
  64
  65static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
  66static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
  67static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
  68
  69pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
  70                              unsigned long size, pgprot_t vma_prot)
  71{
  72        if (!pfn_valid(pfn))
  73                return pgprot_noncached(vma_prot);
  74        else if (file->f_flags & O_SYNC)
  75                return pgprot_writecombine(vma_prot);
  76        return vma_prot;
  77}
  78EXPORT_SYMBOL(phys_mem_access_prot);
  79
  80static phys_addr_t __init early_pgtable_alloc(void)
  81{
  82        phys_addr_t phys;
  83        void *ptr;
  84
  85        phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
  86
  87        /*
  88         * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE
  89         * slot will be free, so we can (ab)use the FIX_PTE slot to initialise
  90         * any level of table.
  91         */
  92        ptr = pte_set_fixmap(phys);
  93
  94        memset(ptr, 0, PAGE_SIZE);
  95
  96        /*
  97         * Implicit barriers also ensure the zeroed page is visible to the page
  98         * table walker
  99         */
 100        pte_clear_fixmap();
 101
 102        return phys;
 103}
 104
 105static bool pgattr_change_is_safe(u64 old, u64 new)
 106{
 107        /*
 108         * The following mapping attributes may be updated in live
 109         * kernel mappings without the need for break-before-make.
 110         */
 111        static const pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG;
 112
 113        /* creating or taking down mappings is always safe */
 114        if (old == 0 || new == 0)
 115                return true;
 116
 117        /* live contiguous mappings may not be manipulated at all */
 118        if ((old | new) & PTE_CONT)
 119                return false;
 120
 121        /* Transitioning from Non-Global to Global is unsafe */
 122        if (old & ~new & PTE_NG)
 123                return false;
 124
 125        return ((old ^ new) & ~mask) == 0;
 126}
 127
 128static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end,
 129                     phys_addr_t phys, pgprot_t prot)
 130{
 131        pte_t *ptep;
 132
 133        ptep = pte_set_fixmap_offset(pmdp, addr);
 134        do {
 135                pte_t old_pte = READ_ONCE(*ptep);
 136
 137                set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot));
 138
 139                /*
 140                 * After the PTE entry has been populated once, we
 141                 * only allow updates to the permission attributes.
 142                 */
 143                BUG_ON(!pgattr_change_is_safe(pte_val(old_pte),
 144                                              READ_ONCE(pte_val(*ptep))));
 145
 146                phys += PAGE_SIZE;
 147        } while (ptep++, addr += PAGE_SIZE, addr != end);
 148
 149        pte_clear_fixmap();
 150}
 151
 152static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
 153                                unsigned long end, phys_addr_t phys,
 154                                pgprot_t prot,
 155                                phys_addr_t (*pgtable_alloc)(void),
 156                                int flags)
 157{
 158        unsigned long next;
 159        pmd_t pmd = READ_ONCE(*pmdp);
 160
 161        BUG_ON(pmd_sect(pmd));
 162        if (pmd_none(pmd)) {
 163                phys_addr_t pte_phys;
 164                BUG_ON(!pgtable_alloc);
 165                pte_phys = pgtable_alloc();
 166                __pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
 167                pmd = READ_ONCE(*pmdp);
 168        }
 169        BUG_ON(pmd_bad(pmd));
 170
 171        do {
 172                pgprot_t __prot = prot;
 173
 174                next = pte_cont_addr_end(addr, end);
 175
 176                /* use a contiguous mapping if the range is suitably aligned */
 177                if ((((addr | next | phys) & ~CONT_PTE_MASK) == 0) &&
 178                    (flags & NO_CONT_MAPPINGS) == 0)
 179                        __prot = __pgprot(pgprot_val(prot) | PTE_CONT);
 180
 181                init_pte(pmdp, addr, next, phys, __prot);
 182
 183                phys += next - addr;
 184        } while (addr = next, addr != end);
 185}
 186
 187static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
 188                     phys_addr_t phys, pgprot_t prot,
 189                     phys_addr_t (*pgtable_alloc)(void), int flags)
 190{
 191        unsigned long next;
 192        pmd_t *pmdp;
 193
 194        pmdp = pmd_set_fixmap_offset(pudp, addr);
 195        do {
 196                pmd_t old_pmd = READ_ONCE(*pmdp);
 197
 198                next = pmd_addr_end(addr, end);
 199
 200                /* try section mapping first */
 201                if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
 202                    (flags & NO_BLOCK_MAPPINGS) == 0) {
 203                        pmd_set_huge(pmdp, phys, prot);
 204
 205                        /*
 206                         * After the PMD entry has been populated once, we
 207                         * only allow updates to the permission attributes.
 208                         */
 209                        BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd),
 210                                                      READ_ONCE(pmd_val(*pmdp))));
 211                } else {
 212                        alloc_init_cont_pte(pmdp, addr, next, phys, prot,
 213                                            pgtable_alloc, flags);
 214
 215                        BUG_ON(pmd_val(old_pmd) != 0 &&
 216                               pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp)));
 217                }
 218                phys += next - addr;
 219        } while (pmdp++, addr = next, addr != end);
 220
 221        pmd_clear_fixmap();
 222}
 223
 224static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
 225                                unsigned long end, phys_addr_t phys,
 226                                pgprot_t prot,
 227                                phys_addr_t (*pgtable_alloc)(void), int flags)
 228{
 229        unsigned long next;
 230        pud_t pud = READ_ONCE(*pudp);
 231
 232        /*
 233         * Check for initial section mappings in the pgd/pud.
 234         */
 235        BUG_ON(pud_sect(pud));
 236        if (pud_none(pud)) {
 237                phys_addr_t pmd_phys;
 238                BUG_ON(!pgtable_alloc);
 239                pmd_phys = pgtable_alloc();
 240                __pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE);
 241                pud = READ_ONCE(*pudp);
 242        }
 243        BUG_ON(pud_bad(pud));
 244
 245        do {
 246                pgprot_t __prot = prot;
 247
 248                next = pmd_cont_addr_end(addr, end);
 249
 250                /* use a contiguous mapping if the range is suitably aligned */
 251                if ((((addr | next | phys) & ~CONT_PMD_MASK) == 0) &&
 252                    (flags & NO_CONT_MAPPINGS) == 0)
 253                        __prot = __pgprot(pgprot_val(prot) | PTE_CONT);
 254
 255                init_pmd(pudp, addr, next, phys, __prot, pgtable_alloc, flags);
 256
 257                phys += next - addr;
 258        } while (addr = next, addr != end);
 259}
 260
 261static inline bool use_1G_block(unsigned long addr, unsigned long next,
 262                        unsigned long phys)
 263{
 264        if (PAGE_SHIFT != 12)
 265                return false;
 266
 267        if (((addr | next | phys) & ~PUD_MASK) != 0)
 268                return false;
 269
 270        return true;
 271}
 272
 273static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
 274                           phys_addr_t phys, pgprot_t prot,
 275                           phys_addr_t (*pgtable_alloc)(void),
 276                           int flags)
 277{
 278        unsigned long next;
 279        pud_t *pudp;
 280        pgd_t pgd = READ_ONCE(*pgdp);
 281
 282        if (pgd_none(pgd)) {
 283                phys_addr_t pud_phys;
 284                BUG_ON(!pgtable_alloc);
 285                pud_phys = pgtable_alloc();
 286                __pgd_populate(pgdp, pud_phys, PUD_TYPE_TABLE);
 287                pgd = READ_ONCE(*pgdp);
 288        }
 289        BUG_ON(pgd_bad(pgd));
 290
 291        pudp = pud_set_fixmap_offset(pgdp, addr);
 292        do {
 293                pud_t old_pud = READ_ONCE(*pudp);
 294
 295                next = pud_addr_end(addr, end);
 296
 297                /*
 298                 * For 4K granule only, attempt to put down a 1GB block
 299                 */
 300                if (use_1G_block(addr, next, phys) &&
 301                    (flags & NO_BLOCK_MAPPINGS) == 0) {
 302                        pud_set_huge(pudp, phys, prot);
 303
 304                        /*
 305                         * After the PUD entry has been populated once, we
 306                         * only allow updates to the permission attributes.
 307                         */
 308                        BUG_ON(!pgattr_change_is_safe(pud_val(old_pud),
 309                                                      READ_ONCE(pud_val(*pudp))));
 310                } else {
 311                        alloc_init_cont_pmd(pudp, addr, next, phys, prot,
 312                                            pgtable_alloc, flags);
 313
 314                        BUG_ON(pud_val(old_pud) != 0 &&
 315                               pud_val(old_pud) != READ_ONCE(pud_val(*pudp)));
 316                }
 317                phys += next - addr;
 318        } while (pudp++, addr = next, addr != end);
 319
 320        pud_clear_fixmap();
 321}
 322
 323static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
 324                                 unsigned long virt, phys_addr_t size,
 325                                 pgprot_t prot,
 326                                 phys_addr_t (*pgtable_alloc)(void),
 327                                 int flags)
 328{
 329        unsigned long addr, length, end, next;
 330        pgd_t *pgdp = pgd_offset_raw(pgdir, virt);
 331
 332        /*
 333         * If the virtual and physical address don't have the same offset
 334         * within a page, we cannot map the region as the caller expects.
 335         */
 336        if (WARN_ON((phys ^ virt) & ~PAGE_MASK))
 337                return;
 338
 339        phys &= PAGE_MASK;
 340        addr = virt & PAGE_MASK;
 341        length = PAGE_ALIGN(size + (virt & ~PAGE_MASK));
 342
 343        end = addr + length;
 344        do {
 345                next = pgd_addr_end(addr, end);
 346                alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc,
 347                               flags);
 348                phys += next - addr;
 349        } while (pgdp++, addr = next, addr != end);
 350}
 351
 352static phys_addr_t pgd_pgtable_alloc(void)
 353{
 354        void *ptr = (void *)__get_free_page(PGALLOC_GFP);
 355        if (!ptr || !pgtable_page_ctor(virt_to_page(ptr)))
 356                BUG();
 357
 358        /* Ensure the zeroed page is visible to the page table walker */
 359        dsb(ishst);
 360        return __pa(ptr);
 361}
 362
 363/*
 364 * This function can only be used to modify existing table entries,
 365 * without allocating new levels of table. Note that this permits the
 366 * creation of new section or page entries.
 367 */
 368static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
 369                                  phys_addr_t size, pgprot_t prot)
 370{
 371        if (virt < VMALLOC_START) {
 372                pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
 373                        &phys, virt);
 374                return;
 375        }
 376        __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL,
 377                             NO_CONT_MAPPINGS);
 378}
 379
 380void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
 381                               unsigned long virt, phys_addr_t size,
 382                               pgprot_t prot, bool page_mappings_only)
 383{
 384        int flags = 0;
 385
 386        BUG_ON(mm == &init_mm);
 387
 388        if (page_mappings_only)
 389                flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
 390
 391        __create_pgd_mapping(mm->pgd, phys, virt, size, prot,
 392                             pgd_pgtable_alloc, flags);
 393}
 394
 395static void update_mapping_prot(phys_addr_t phys, unsigned long virt,
 396                                phys_addr_t size, pgprot_t prot)
 397{
 398        if (virt < VMALLOC_START) {
 399                pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n",
 400                        &phys, virt);
 401                return;
 402        }
 403
 404        __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL,
 405                             NO_CONT_MAPPINGS);
 406
 407        /* flush the TLBs after updating live kernel mappings */
 408        flush_tlb_kernel_range(virt, virt + size);
 409}
 410
 411static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start,
 412                                  phys_addr_t end, pgprot_t prot, int flags)
 413{
 414        __create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start,
 415                             prot, early_pgtable_alloc, flags);
 416}
 417
 418void __init mark_linear_text_alias_ro(void)
 419{
 420        /*
 421         * Remove the write permissions from the linear alias of .text/.rodata
 422         */
 423        update_mapping_prot(__pa_symbol(_text), (unsigned long)lm_alias(_text),
 424                            (unsigned long)__init_begin - (unsigned long)_text,
 425                            PAGE_KERNEL_RO);
 426}
 427
 428static void __init map_mem(pgd_t *pgdp)
 429{
 430        phys_addr_t kernel_start = __pa_symbol(_text);
 431        phys_addr_t kernel_end = __pa_symbol(__init_begin);
 432        struct memblock_region *reg;
 433        int flags = 0;
 434
 435        if (debug_pagealloc_enabled())
 436                flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
 437
 438        /*
 439         * Take care not to create a writable alias for the
 440         * read-only text and rodata sections of the kernel image.
 441         * So temporarily mark them as NOMAP to skip mappings in
 442         * the following for-loop
 443         */
 444        memblock_mark_nomap(kernel_start, kernel_end - kernel_start);
 445#ifdef CONFIG_KEXEC_CORE
 446        if (crashk_res.end)
 447                memblock_mark_nomap(crashk_res.start,
 448                                    resource_size(&crashk_res));
 449#endif
 450
 451        /* map all the memory banks */
 452        for_each_memblock(memory, reg) {
 453                phys_addr_t start = reg->base;
 454                phys_addr_t end = start + reg->size;
 455
 456                if (start >= end)
 457                        break;
 458                if (memblock_is_nomap(reg))
 459                        continue;
 460
 461                __map_memblock(pgdp, start, end, PAGE_KERNEL, flags);
 462        }
 463
 464        /*
 465         * Map the linear alias of the [_text, __init_begin) interval
 466         * as non-executable now, and remove the write permission in
 467         * mark_linear_text_alias_ro() below (which will be called after
 468         * alternative patching has completed). This makes the contents
 469         * of the region accessible to subsystems such as hibernate,
 470         * but protects it from inadvertent modification or execution.
 471         * Note that contiguous mappings cannot be remapped in this way,
 472         * so we should avoid them here.
 473         */
 474        __map_memblock(pgdp, kernel_start, kernel_end,
 475                       PAGE_KERNEL, NO_CONT_MAPPINGS);
 476        memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
 477
 478#ifdef CONFIG_KEXEC_CORE
 479        /*
 480         * Use page-level mappings here so that we can shrink the region
 481         * in page granularity and put back unused memory to buddy system
 482         * through /sys/kernel/kexec_crash_size interface.
 483         */
 484        if (crashk_res.end) {
 485                __map_memblock(pgdp, crashk_res.start, crashk_res.end + 1,
 486                               PAGE_KERNEL,
 487                               NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
 488                memblock_clear_nomap(crashk_res.start,
 489                                     resource_size(&crashk_res));
 490        }
 491#endif
 492}
 493
 494void mark_rodata_ro(void)
 495{
 496        unsigned long section_size;
 497
 498        /*
 499         * mark .rodata as read only. Use __init_begin rather than __end_rodata
 500         * to cover NOTES and EXCEPTION_TABLE.
 501         */
 502        section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata;
 503        update_mapping_prot(__pa_symbol(__start_rodata), (unsigned long)__start_rodata,
 504                            section_size, PAGE_KERNEL_RO);
 505
 506        debug_checkwx();
 507}
 508
 509static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
 510                                      pgprot_t prot, struct vm_struct *vma,
 511                                      int flags, unsigned long vm_flags)
 512{
 513        phys_addr_t pa_start = __pa_symbol(va_start);
 514        unsigned long size = va_end - va_start;
 515
 516        BUG_ON(!PAGE_ALIGNED(pa_start));
 517        BUG_ON(!PAGE_ALIGNED(size));
 518
 519        __create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, prot,
 520                             early_pgtable_alloc, flags);
 521
 522        if (!(vm_flags & VM_NO_GUARD))
 523                size += PAGE_SIZE;
 524
 525        vma->addr       = va_start;
 526        vma->phys_addr  = pa_start;
 527        vma->size       = size;
 528        vma->flags      = VM_MAP | vm_flags;
 529        vma->caller     = __builtin_return_address(0);
 530
 531        vm_area_add_early(vma);
 532}
 533
 534static int __init parse_rodata(char *arg)
 535{
 536        return strtobool(arg, &rodata_enabled);
 537}
 538early_param("rodata", parse_rodata);
 539
 540#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 541static int __init map_entry_trampoline(void)
 542{
 543        pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
 544        phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start);
 545
 546        /* The trampoline is always mapped and can therefore be global */
 547        pgprot_val(prot) &= ~PTE_NG;
 548
 549        /* Map only the text into the trampoline page table */
 550        memset(tramp_pg_dir, 0, PGD_SIZE);
 551        __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE,
 552                             prot, pgd_pgtable_alloc, 0);
 553
 554        /* Map both the text and data into the kernel page table */
 555        __set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot);
 556        if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
 557                extern char __entry_tramp_data_start[];
 558
 559                __set_fixmap(FIX_ENTRY_TRAMP_DATA,
 560                             __pa_symbol(__entry_tramp_data_start),
 561                             PAGE_KERNEL_RO);
 562        }
 563
 564        return 0;
 565}
 566core_initcall(map_entry_trampoline);
 567#endif
 568
 569/*
 570 * Create fine-grained mappings for the kernel.
 571 */
 572static void __init map_kernel(pgd_t *pgdp)
 573{
 574        static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext,
 575                                vmlinux_initdata, vmlinux_data;
 576
 577        /*
 578         * External debuggers may need to write directly to the text
 579         * mapping to install SW breakpoints. Allow this (only) when
 580         * explicitly requested with rodata=off.
 581         */
 582        pgprot_t text_prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
 583
 584        /*
 585         * Only rodata will be remapped with different permissions later on,
 586         * all other segments are allowed to use contiguous mappings.
 587         */
 588        map_kernel_segment(pgdp, _text, _etext, text_prot, &vmlinux_text, 0,
 589                           VM_NO_GUARD);
 590        map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL,
 591                           &vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD);
 592        map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot,
 593                           &vmlinux_inittext, 0, VM_NO_GUARD);
 594        map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL,
 595                           &vmlinux_initdata, 0, VM_NO_GUARD);
 596        map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0);
 597
 598        if (!READ_ONCE(pgd_val(*pgd_offset_raw(pgdp, FIXADDR_START)))) {
 599                /*
 600                 * The fixmap falls in a separate pgd to the kernel, and doesn't
 601                 * live in the carveout for the swapper_pg_dir. We can simply
 602                 * re-use the existing dir for the fixmap.
 603                 */
 604                set_pgd(pgd_offset_raw(pgdp, FIXADDR_START),
 605                        READ_ONCE(*pgd_offset_k(FIXADDR_START)));
 606        } else if (CONFIG_PGTABLE_LEVELS > 3) {
 607                /*
 608                 * The fixmap shares its top level pgd entry with the kernel
 609                 * mapping. This can really only occur when we are running
 610                 * with 16k/4 levels, so we can simply reuse the pud level
 611                 * entry instead.
 612                 */
 613                BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
 614                pud_populate(&init_mm,
 615                             pud_set_fixmap_offset(pgdp, FIXADDR_START),
 616                             lm_alias(bm_pmd));
 617                pud_clear_fixmap();
 618        } else {
 619                BUG();
 620        }
 621
 622        kasan_copy_shadow(pgdp);
 623}
 624
 625/*
 626 * paging_init() sets up the page tables, initialises the zone memory
 627 * maps and sets up the zero page.
 628 */
 629void __init paging_init(void)
 630{
 631        phys_addr_t pgd_phys = early_pgtable_alloc();
 632        pgd_t *pgdp = pgd_set_fixmap(pgd_phys);
 633
 634        map_kernel(pgdp);
 635        map_mem(pgdp);
 636
 637        /*
 638         * We want to reuse the original swapper_pg_dir so we don't have to
 639         * communicate the new address to non-coherent secondaries in
 640         * secondary_entry, and so cpu_switch_mm can generate the address with
 641         * adrp+add rather than a load from some global variable.
 642         *
 643         * To do this we need to go via a temporary pgd.
 644         */
 645        cpu_replace_ttbr1(__va(pgd_phys));
 646        memcpy(swapper_pg_dir, pgdp, PGD_SIZE);
 647        cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
 648
 649        pgd_clear_fixmap();
 650        memblock_free(pgd_phys, PAGE_SIZE);
 651
 652        /*
 653         * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd
 654         * allocated with it.
 655         */
 656        memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE,
 657                      __pa_symbol(swapper_pg_end) - __pa_symbol(swapper_pg_dir)
 658                      - PAGE_SIZE);
 659}
 660
 661/*
 662 * Check whether a kernel address is valid (derived from arch/x86/).
 663 */
 664int kern_addr_valid(unsigned long addr)
 665{
 666        pgd_t *pgdp;
 667        pud_t *pudp, pud;
 668        pmd_t *pmdp, pmd;
 669        pte_t *ptep, pte;
 670
 671        if ((((long)addr) >> VA_BITS) != -1UL)
 672                return 0;
 673
 674        pgdp = pgd_offset_k(addr);
 675        if (pgd_none(READ_ONCE(*pgdp)))
 676                return 0;
 677
 678        pudp = pud_offset(pgdp, addr);
 679        pud = READ_ONCE(*pudp);
 680        if (pud_none(pud))
 681                return 0;
 682
 683        if (pud_sect(pud))
 684                return pfn_valid(pud_pfn(pud));
 685
 686        pmdp = pmd_offset(pudp, addr);
 687        pmd = READ_ONCE(*pmdp);
 688        if (pmd_none(pmd))
 689                return 0;
 690
 691        if (pmd_sect(pmd))
 692                return pfn_valid(pmd_pfn(pmd));
 693
 694        ptep = pte_offset_kernel(pmdp, addr);
 695        pte = READ_ONCE(*ptep);
 696        if (pte_none(pte))
 697                return 0;
 698
 699        return pfn_valid(pte_pfn(pte));
 700}
 701#ifdef CONFIG_SPARSEMEM_VMEMMAP
 702#if !ARM64_SWAPPER_USES_SECTION_MAPS
 703int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 704                struct vmem_altmap *altmap)
 705{
 706        return vmemmap_populate_basepages(start, end, node);
 707}
 708#else   /* !ARM64_SWAPPER_USES_SECTION_MAPS */
 709int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 710                struct vmem_altmap *altmap)
 711{
 712        unsigned long addr = start;
 713        unsigned long next;
 714        pgd_t *pgdp;
 715        pud_t *pudp;
 716        pmd_t *pmdp;
 717
 718        do {
 719                next = pmd_addr_end(addr, end);
 720
 721                pgdp = vmemmap_pgd_populate(addr, node);
 722                if (!pgdp)
 723                        return -ENOMEM;
 724
 725                pudp = vmemmap_pud_populate(pgdp, addr, node);
 726                if (!pudp)
 727                        return -ENOMEM;
 728
 729                pmdp = pmd_offset(pudp, addr);
 730                if (pmd_none(READ_ONCE(*pmdp))) {
 731                        void *p = NULL;
 732
 733                        p = vmemmap_alloc_block_buf(PMD_SIZE, node);
 734                        if (!p)
 735                                return -ENOMEM;
 736
 737                        pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
 738                } else
 739                        vmemmap_verify((pte_t *)pmdp, node, addr, next);
 740        } while (addr = next, addr != end);
 741
 742        return 0;
 743}
 744#endif  /* CONFIG_ARM64_64K_PAGES */
 745void vmemmap_free(unsigned long start, unsigned long end,
 746                struct vmem_altmap *altmap)
 747{
 748}
 749#endif  /* CONFIG_SPARSEMEM_VMEMMAP */
 750
 751static inline pud_t * fixmap_pud(unsigned long addr)
 752{
 753        pgd_t *pgdp = pgd_offset_k(addr);
 754        pgd_t pgd = READ_ONCE(*pgdp);
 755
 756        BUG_ON(pgd_none(pgd) || pgd_bad(pgd));
 757
 758        return pud_offset_kimg(pgdp, addr);
 759}
 760
 761static inline pmd_t * fixmap_pmd(unsigned long addr)
 762{
 763        pud_t *pudp = fixmap_pud(addr);
 764        pud_t pud = READ_ONCE(*pudp);
 765
 766        BUG_ON(pud_none(pud) || pud_bad(pud));
 767
 768        return pmd_offset_kimg(pudp, addr);
 769}
 770
 771static inline pte_t * fixmap_pte(unsigned long addr)
 772{
 773        return &bm_pte[pte_index(addr)];
 774}
 775
 776/*
 777 * The p*d_populate functions call virt_to_phys implicitly so they can't be used
 778 * directly on kernel symbols (bm_p*d). This function is called too early to use
 779 * lm_alias so __p*d_populate functions must be used to populate with the
 780 * physical address from __pa_symbol.
 781 */
 782void __init early_fixmap_init(void)
 783{
 784        pgd_t *pgdp, pgd;
 785        pud_t *pudp;
 786        pmd_t *pmdp;
 787        unsigned long addr = FIXADDR_START;
 788
 789        pgdp = pgd_offset_k(addr);
 790        pgd = READ_ONCE(*pgdp);
 791        if (CONFIG_PGTABLE_LEVELS > 3 &&
 792            !(pgd_none(pgd) || pgd_page_paddr(pgd) == __pa_symbol(bm_pud))) {
 793                /*
 794                 * We only end up here if the kernel mapping and the fixmap
 795                 * share the top level pgd entry, which should only happen on
 796                 * 16k/4 levels configurations.
 797                 */
 798                BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
 799                pudp = pud_offset_kimg(pgdp, addr);
 800        } else {
 801                if (pgd_none(pgd))
 802                        __pgd_populate(pgdp, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
 803                pudp = fixmap_pud(addr);
 804        }
 805        if (pud_none(READ_ONCE(*pudp)))
 806                __pud_populate(pudp, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
 807        pmdp = fixmap_pmd(addr);
 808        __pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
 809
 810        /*
 811         * The boot-ioremap range spans multiple pmds, for which
 812         * we are not prepared:
 813         */
 814        BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
 815                     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
 816
 817        if ((pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
 818             || pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
 819                WARN_ON(1);
 820                pr_warn("pmdp %p != %p, %p\n",
 821                        pmdp, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
 822                        fixmap_pmd(fix_to_virt(FIX_BTMAP_END)));
 823                pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
 824                        fix_to_virt(FIX_BTMAP_BEGIN));
 825                pr_warn("fix_to_virt(FIX_BTMAP_END):   %08lx\n",
 826                        fix_to_virt(FIX_BTMAP_END));
 827
 828                pr_warn("FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
 829                pr_warn("FIX_BTMAP_BEGIN:     %d\n", FIX_BTMAP_BEGIN);
 830        }
 831}
 832
 833/*
 834 * Unusually, this is also called in IRQ context (ghes_iounmap_irq) so if we
 835 * ever need to use IPIs for TLB broadcasting, then we're in trouble here.
 836 */
 837void __set_fixmap(enum fixed_addresses idx,
 838                               phys_addr_t phys, pgprot_t flags)
 839{
 840        unsigned long addr = __fix_to_virt(idx);
 841        pte_t *ptep;
 842
 843        BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
 844
 845        ptep = fixmap_pte(addr);
 846
 847        if (pgprot_val(flags)) {
 848                set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags));
 849        } else {
 850                pte_clear(&init_mm, addr, ptep);
 851                flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
 852        }
 853}
 854
 855void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
 856{
 857        const u64 dt_virt_base = __fix_to_virt(FIX_FDT);
 858        int offset;
 859        void *dt_virt;
 860
 861        /*
 862         * Check whether the physical FDT address is set and meets the minimum
 863         * alignment requirement. Since we are relying on MIN_FDT_ALIGN to be
 864         * at least 8 bytes so that we can always access the magic and size
 865         * fields of the FDT header after mapping the first chunk, double check
 866         * here if that is indeed the case.
 867         */
 868        BUILD_BUG_ON(MIN_FDT_ALIGN < 8);
 869        if (!dt_phys || dt_phys % MIN_FDT_ALIGN)
 870                return NULL;
 871
 872        /*
 873         * Make sure that the FDT region can be mapped without the need to
 874         * allocate additional translation table pages, so that it is safe
 875         * to call create_mapping_noalloc() this early.
 876         *
 877         * On 64k pages, the FDT will be mapped using PTEs, so we need to
 878         * be in the same PMD as the rest of the fixmap.
 879         * On 4k pages, we'll use section mappings for the FDT so we only
 880         * have to be in the same PUD.
 881         */
 882        BUILD_BUG_ON(dt_virt_base % SZ_2M);
 883
 884        BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> SWAPPER_TABLE_SHIFT !=
 885                     __fix_to_virt(FIX_BTMAP_BEGIN) >> SWAPPER_TABLE_SHIFT);
 886
 887        offset = dt_phys % SWAPPER_BLOCK_SIZE;
 888        dt_virt = (void *)dt_virt_base + offset;
 889
 890        /* map the first chunk so we can read the size from the header */
 891        create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE),
 892                        dt_virt_base, SWAPPER_BLOCK_SIZE, prot);
 893
 894        if (fdt_magic(dt_virt) != FDT_MAGIC)
 895                return NULL;
 896
 897        *size = fdt_totalsize(dt_virt);
 898        if (*size > MAX_FDT_SIZE)
 899                return NULL;
 900
 901        if (offset + *size > SWAPPER_BLOCK_SIZE)
 902                create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
 903                               round_up(offset + *size, SWAPPER_BLOCK_SIZE), prot);
 904
 905        return dt_virt;
 906}
 907
 908void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
 909{
 910        void *dt_virt;
 911        int size;
 912
 913        dt_virt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL_RO);
 914        if (!dt_virt)
 915                return NULL;
 916
 917        memblock_reserve(dt_phys, size);
 918        return dt_virt;
 919}
 920
 921int __init arch_ioremap_pud_supported(void)
 922{
 923        /* only 4k granule supports level 1 block mappings */
 924        return IS_ENABLED(CONFIG_ARM64_4K_PAGES);
 925}
 926
 927int __init arch_ioremap_pmd_supported(void)
 928{
 929        return 1;
 930}
 931
 932int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
 933{
 934        pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT |
 935                                        pgprot_val(mk_sect_prot(prot)));
 936
 937        /* ioremap_page_range doesn't honour BBM */
 938        if (pud_present(READ_ONCE(*pudp)))
 939                return 0;
 940
 941        BUG_ON(phys & ~PUD_MASK);
 942        set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot));
 943        return 1;
 944}
 945
 946int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
 947{
 948        pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT |
 949                                        pgprot_val(mk_sect_prot(prot)));
 950
 951        /* ioremap_page_range doesn't honour BBM */
 952        if (pmd_present(READ_ONCE(*pmdp)))
 953                return 0;
 954
 955        BUG_ON(phys & ~PMD_MASK);
 956        set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot));
 957        return 1;
 958}
 959
 960int pud_clear_huge(pud_t *pudp)
 961{
 962        if (!pud_sect(READ_ONCE(*pudp)))
 963                return 0;
 964        pud_clear(pudp);
 965        return 1;
 966}
 967
 968int pmd_clear_huge(pmd_t *pmdp)
 969{
 970        if (!pmd_sect(READ_ONCE(*pmdp)))
 971                return 0;
 972        pmd_clear(pmdp);
 973        return 1;
 974}
 975
 976int pud_free_pmd_page(pud_t *pud)
 977{
 978        return pud_none(*pud);
 979}
 980
 981int pmd_free_pte_page(pmd_t *pmd)
 982{
 983        return pmd_none(*pmd);
 984}
 985