linux/arch/arm64/mm/init.c
<<
>>
Prefs
   1/*
   2 * Based on arch/arm/mm/init.c
   3 *
   4 * Copyright (C) 1995-2005 Russell King
   5 * Copyright (C) 2012 ARM Ltd.
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include <linux/kernel.h>
  21#include <linux/export.h>
  22#include <linux/errno.h>
  23#include <linux/swap.h>
  24#include <linux/init.h>
  25#include <linux/bootmem.h>
  26#include <linux/mman.h>
  27#include <linux/nodemask.h>
  28#include <linux/initrd.h>
  29#include <linux/gfp.h>
  30#include <linux/memblock.h>
  31#include <linux/sort.h>
  32#include <linux/of_fdt.h>
  33#include <linux/dma-mapping.h>
  34#include <linux/dma-contiguous.h>
  35#include <linux/efi.h>
  36#include <linux/swiotlb.h>
  37
  38#include <asm/boot.h>
  39#include <asm/fixmap.h>
  40#include <asm/kasan.h>
  41#include <asm/kernel-pgtable.h>
  42#include <asm/memory.h>
  43#include <asm/numa.h>
  44#include <asm/sections.h>
  45#include <asm/setup.h>
  46#include <asm/sizes.h>
  47#include <asm/tlb.h>
  48#include <asm/alternative.h>
  49
  50#include "mm.h"
  51
  52/*
  53 * We need to be able to catch inadvertent references to memstart_addr
  54 * that occur (potentially in generic code) before arm64_memblock_init()
  55 * executes, which assigns it its actual value. So use a default value
  56 * that cannot be mistaken for a real physical address.
  57 */
  58s64 memstart_addr __read_mostly = -1;
  59phys_addr_t arm64_dma_phys_limit __read_mostly;
  60
  61#ifdef CONFIG_BLK_DEV_INITRD
  62static int __init early_initrd(char *p)
  63{
  64        unsigned long start, size;
  65        char *endp;
  66
  67        start = memparse(p, &endp);
  68        if (*endp == ',') {
  69                size = memparse(endp + 1, NULL);
  70
  71                initrd_start = start;
  72                initrd_end = start + size;
  73        }
  74        return 0;
  75}
  76early_param("initrd", early_initrd);
  77#endif
  78
  79/*
  80 * Return the maximum physical address for ZONE_DMA (DMA_BIT_MASK(32)). It
  81 * currently assumes that for memory starting above 4G, 32-bit devices will
  82 * use a DMA offset.
  83 */
  84static phys_addr_t __init max_zone_dma_phys(void)
  85{
  86        phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32);
  87        return min(offset + (1ULL << 32), memblock_end_of_DRAM());
  88}
  89
  90#ifdef CONFIG_NUMA
  91
  92static void __init zone_sizes_init(unsigned long min, unsigned long max)
  93{
  94        unsigned long max_zone_pfns[MAX_NR_ZONES]  = {0};
  95
  96        if (IS_ENABLED(CONFIG_ZONE_DMA))
  97                max_zone_pfns[ZONE_DMA] = PFN_DOWN(max_zone_dma_phys());
  98        max_zone_pfns[ZONE_NORMAL] = max;
  99
 100        free_area_init_nodes(max_zone_pfns);
 101}
 102
 103#else
 104
 105static void __init zone_sizes_init(unsigned long min, unsigned long max)
 106{
 107        struct memblock_region *reg;
 108        unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];
 109        unsigned long max_dma = min;
 110
 111        memset(zone_size, 0, sizeof(zone_size));
 112
 113        /* 4GB maximum for 32-bit only capable devices */
 114#ifdef CONFIG_ZONE_DMA
 115        max_dma = PFN_DOWN(arm64_dma_phys_limit);
 116        zone_size[ZONE_DMA] = max_dma - min;
 117#endif
 118        zone_size[ZONE_NORMAL] = max - max_dma;
 119
 120        memcpy(zhole_size, zone_size, sizeof(zhole_size));
 121
 122        for_each_memblock(memory, reg) {
 123                unsigned long start = memblock_region_memory_base_pfn(reg);
 124                unsigned long end = memblock_region_memory_end_pfn(reg);
 125
 126                if (start >= max)
 127                        continue;
 128
 129#ifdef CONFIG_ZONE_DMA
 130                if (start < max_dma) {
 131                        unsigned long dma_end = min(end, max_dma);
 132                        zhole_size[ZONE_DMA] -= dma_end - start;
 133                }
 134#endif
 135                if (end > max_dma) {
 136                        unsigned long normal_end = min(end, max);
 137                        unsigned long normal_start = max(start, max_dma);
 138                        zhole_size[ZONE_NORMAL] -= normal_end - normal_start;
 139                }
 140        }
 141
 142        free_area_init_node(0, zone_size, min, zhole_size);
 143}
 144
 145#endif /* CONFIG_NUMA */
 146
 147#ifdef CONFIG_HAVE_ARCH_PFN_VALID
 148int pfn_valid(unsigned long pfn)
 149{
 150        return memblock_is_map_memory(pfn << PAGE_SHIFT);
 151}
 152EXPORT_SYMBOL(pfn_valid);
 153#endif
 154
 155#ifndef CONFIG_SPARSEMEM
 156static void __init arm64_memory_present(void)
 157{
 158}
 159#else
 160static void __init arm64_memory_present(void)
 161{
 162        struct memblock_region *reg;
 163
 164        for_each_memblock(memory, reg) {
 165                int nid = memblock_get_region_node(reg);
 166
 167                memory_present(nid, memblock_region_memory_base_pfn(reg),
 168                                memblock_region_memory_end_pfn(reg));
 169        }
 170}
 171#endif
 172
 173static phys_addr_t memory_limit = (phys_addr_t)ULLONG_MAX;
 174
 175/*
 176 * Limit the memory size that was specified via FDT.
 177 */
 178static int __init early_mem(char *p)
 179{
 180        if (!p)
 181                return 1;
 182
 183        memory_limit = memparse(p, &p) & PAGE_MASK;
 184        pr_notice("Memory limited to %lldMB\n", memory_limit >> 20);
 185
 186        return 0;
 187}
 188early_param("mem", early_mem);
 189
 190void __init arm64_memblock_init(void)
 191{
 192        const s64 linear_region_size = -(s64)PAGE_OFFSET;
 193
 194        /*
 195         * Ensure that the linear region takes up exactly half of the kernel
 196         * virtual address space. This way, we can distinguish a linear address
 197         * from a kernel/module/vmalloc address by testing a single bit.
 198         */
 199        BUILD_BUG_ON(linear_region_size != BIT(VA_BITS - 1));
 200
 201        /*
 202         * Select a suitable value for the base of physical memory.
 203         */
 204        memstart_addr = round_down(memblock_start_of_DRAM(),
 205                                   ARM64_MEMSTART_ALIGN);
 206
 207        /*
 208         * Remove the memory that we will not be able to cover with the
 209         * linear mapping. Take care not to clip the kernel which may be
 210         * high in memory.
 211         */
 212        memblock_remove(max_t(u64, memstart_addr + linear_region_size, __pa(_end)),
 213                        ULLONG_MAX);
 214        if (memstart_addr + linear_region_size < memblock_end_of_DRAM()) {
 215                /* ensure that memstart_addr remains sufficiently aligned */
 216                memstart_addr = round_up(memblock_end_of_DRAM() - linear_region_size,
 217                                         ARM64_MEMSTART_ALIGN);
 218                memblock_remove(0, memstart_addr);
 219        }
 220
 221        /*
 222         * Apply the memory limit if it was set. Since the kernel may be loaded
 223         * high up in memory, add back the kernel region that must be accessible
 224         * via the linear mapping.
 225         */
 226        if (memory_limit != (phys_addr_t)ULLONG_MAX) {
 227                memblock_mem_limit_remove_map(memory_limit);
 228                memblock_add(__pa(_text), (u64)(_end - _text));
 229        }
 230
 231        if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_start) {
 232                /*
 233                 * Add back the memory we just removed if it results in the
 234                 * initrd to become inaccessible via the linear mapping.
 235                 * Otherwise, this is a no-op
 236                 */
 237                u64 base = initrd_start & PAGE_MASK;
 238                u64 size = PAGE_ALIGN(initrd_end) - base;
 239
 240                /*
 241                 * We can only add back the initrd memory if we don't end up
 242                 * with more memory than we can address via the linear mapping.
 243                 * It is up to the bootloader to position the kernel and the
 244                 * initrd reasonably close to each other (i.e., within 32 GB of
 245                 * each other) so that all granule/#levels combinations can
 246                 * always access both.
 247                 */
 248                if (WARN(base < memblock_start_of_DRAM() ||
 249                         base + size > memblock_start_of_DRAM() +
 250                                       linear_region_size,
 251                        "initrd not fully accessible via the linear mapping -- please check your bootloader ...\n")) {
 252                        initrd_start = 0;
 253                } else {
 254                        memblock_remove(base, size); /* clear MEMBLOCK_ flags */
 255                        memblock_add(base, size);
 256                        memblock_reserve(base, size);
 257                }
 258        }
 259
 260        if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
 261                extern u16 memstart_offset_seed;
 262                u64 range = linear_region_size -
 263                            (memblock_end_of_DRAM() - memblock_start_of_DRAM());
 264
 265                /*
 266                 * If the size of the linear region exceeds, by a sufficient
 267                 * margin, the size of the region that the available physical
 268                 * memory spans, randomize the linear region as well.
 269                 */
 270                if (memstart_offset_seed > 0 && range >= ARM64_MEMSTART_ALIGN) {
 271                        range = range / ARM64_MEMSTART_ALIGN + 1;
 272                        memstart_addr -= ARM64_MEMSTART_ALIGN *
 273                                         ((range * memstart_offset_seed) >> 16);
 274                }
 275        }
 276
 277        /*
 278         * Register the kernel text, kernel data, initrd, and initial
 279         * pagetables with memblock.
 280         */
 281        memblock_reserve(__pa(_text), _end - _text);
 282#ifdef CONFIG_BLK_DEV_INITRD
 283        if (initrd_start) {
 284                memblock_reserve(initrd_start, initrd_end - initrd_start);
 285
 286                /* the generic initrd code expects virtual addresses */
 287                initrd_start = __phys_to_virt(initrd_start);
 288                initrd_end = __phys_to_virt(initrd_end);
 289        }
 290#endif
 291
 292        early_init_fdt_scan_reserved_mem();
 293
 294        /* 4GB maximum for 32-bit only capable devices */
 295        if (IS_ENABLED(CONFIG_ZONE_DMA))
 296                arm64_dma_phys_limit = max_zone_dma_phys();
 297        else
 298                arm64_dma_phys_limit = PHYS_MASK + 1;
 299        dma_contiguous_reserve(arm64_dma_phys_limit);
 300
 301        memblock_allow_resize();
 302}
 303
 304void __init bootmem_init(void)
 305{
 306        unsigned long min, max;
 307
 308        min = PFN_UP(memblock_start_of_DRAM());
 309        max = PFN_DOWN(memblock_end_of_DRAM());
 310
 311        early_memtest(min << PAGE_SHIFT, max << PAGE_SHIFT);
 312
 313        max_pfn = max_low_pfn = max;
 314
 315        arm64_numa_init();
 316        /*
 317         * Sparsemem tries to allocate bootmem in memory_present(), so must be
 318         * done after the fixed reservations.
 319         */
 320        arm64_memory_present();
 321
 322        sparse_init();
 323        zone_sizes_init(min, max);
 324
 325        high_memory = __va((max << PAGE_SHIFT) - 1) + 1;
 326        memblock_dump_all();
 327}
 328
 329#ifndef CONFIG_SPARSEMEM_VMEMMAP
 330static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn)
 331{
 332        struct page *start_pg, *end_pg;
 333        unsigned long pg, pgend;
 334
 335        /*
 336         * Convert start_pfn/end_pfn to a struct page pointer.
 337         */
 338        start_pg = pfn_to_page(start_pfn - 1) + 1;
 339        end_pg = pfn_to_page(end_pfn - 1) + 1;
 340
 341        /*
 342         * Convert to physical addresses, and round start upwards and end
 343         * downwards.
 344         */
 345        pg = (unsigned long)PAGE_ALIGN(__pa(start_pg));
 346        pgend = (unsigned long)__pa(end_pg) & PAGE_MASK;
 347
 348        /*
 349         * If there are free pages between these, free the section of the
 350         * memmap array.
 351         */
 352        if (pg < pgend)
 353                free_bootmem(pg, pgend - pg);
 354}
 355
 356/*
 357 * The mem_map array can get very big. Free the unused area of the memory map.
 358 */
 359static void __init free_unused_memmap(void)
 360{
 361        unsigned long start, prev_end = 0;
 362        struct memblock_region *reg;
 363
 364        for_each_memblock(memory, reg) {
 365                start = __phys_to_pfn(reg->base);
 366
 367#ifdef CONFIG_SPARSEMEM
 368                /*
 369                 * Take care not to free memmap entries that don't exist due
 370                 * to SPARSEMEM sections which aren't present.
 371                 */
 372                start = min(start, ALIGN(prev_end, PAGES_PER_SECTION));
 373#endif
 374                /*
 375                 * If we had a previous bank, and there is a space between the
 376                 * current bank and the previous, free it.
 377                 */
 378                if (prev_end && prev_end < start)
 379                        free_memmap(prev_end, start);
 380
 381                /*
 382                 * Align up here since the VM subsystem insists that the
 383                 * memmap entries are valid from the bank end aligned to
 384                 * MAX_ORDER_NR_PAGES.
 385                 */
 386                prev_end = ALIGN(__phys_to_pfn(reg->base + reg->size),
 387                                 MAX_ORDER_NR_PAGES);
 388        }
 389
 390#ifdef CONFIG_SPARSEMEM
 391        if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION))
 392                free_memmap(prev_end, ALIGN(prev_end, PAGES_PER_SECTION));
 393#endif
 394}
 395#endif  /* !CONFIG_SPARSEMEM_VMEMMAP */
 396
 397/*
 398 * mem_init() marks the free areas in the mem_map and tells us how much memory
 399 * is free.  This is done after various parts of the system have claimed their
 400 * memory after the kernel image.
 401 */
 402void __init mem_init(void)
 403{
 404        if (swiotlb_force || max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT))
 405                swiotlb_init(1);
 406
 407        set_max_mapnr(pfn_to_page(max_pfn) - mem_map);
 408
 409#ifndef CONFIG_SPARSEMEM_VMEMMAP
 410        free_unused_memmap();
 411#endif
 412        /* this will put all unused low memory onto the freelists */
 413        free_all_bootmem();
 414
 415        mem_init_print_info(NULL);
 416
 417#define MLK(b, t) b, t, ((t) - (b)) >> 10
 418#define MLM(b, t) b, t, ((t) - (b)) >> 20
 419#define MLG(b, t) b, t, ((t) - (b)) >> 30
 420#define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K)
 421
 422        pr_notice("Virtual kernel memory layout:\n");
 423#ifdef CONFIG_KASAN
 424        pr_cont("    kasan   : 0x%16lx - 0x%16lx   (%6ld GB)\n",
 425                MLG(KASAN_SHADOW_START, KASAN_SHADOW_END));
 426#endif
 427        pr_cont("    modules : 0x%16lx - 0x%16lx   (%6ld MB)\n",
 428                MLM(MODULES_VADDR, MODULES_END));
 429        pr_cont("    vmalloc : 0x%16lx - 0x%16lx   (%6ld GB)\n",
 430                MLG(VMALLOC_START, VMALLOC_END));
 431        pr_cont("      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n",
 432                MLK_ROUNDUP(_text, _etext));
 433        pr_cont("    .rodata : 0x%p" " - 0x%p" "   (%6ld KB)\n",
 434                MLK_ROUNDUP(__start_rodata, __init_begin));
 435        pr_cont("      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n",
 436                MLK_ROUNDUP(__init_begin, __init_end));
 437        pr_cont("      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n",
 438                MLK_ROUNDUP(_sdata, _edata));
 439        pr_cont("       .bss : 0x%p" " - 0x%p" "   (%6ld KB)\n",
 440                MLK_ROUNDUP(__bss_start, __bss_stop));
 441        pr_cont("    fixed   : 0x%16lx - 0x%16lx   (%6ld KB)\n",
 442                MLK(FIXADDR_START, FIXADDR_TOP));
 443        pr_cont("    PCI I/O : 0x%16lx - 0x%16lx   (%6ld MB)\n",
 444                MLM(PCI_IO_START, PCI_IO_END));
 445#ifdef CONFIG_SPARSEMEM_VMEMMAP
 446        pr_cont("    vmemmap : 0x%16lx - 0x%16lx   (%6ld GB maximum)\n",
 447                MLG(VMEMMAP_START, VMEMMAP_START + VMEMMAP_SIZE));
 448        pr_cont("              0x%16lx - 0x%16lx   (%6ld MB actual)\n",
 449                MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()),
 450                    (unsigned long)virt_to_page(high_memory)));
 451#endif
 452        pr_cont("    memory  : 0x%16lx - 0x%16lx   (%6ld MB)\n",
 453                MLM(__phys_to_virt(memblock_start_of_DRAM()),
 454                    (unsigned long)high_memory));
 455
 456#undef MLK
 457#undef MLM
 458#undef MLK_ROUNDUP
 459
 460        /*
 461         * Check boundaries twice: Some fundamental inconsistencies can be
 462         * detected at build time already.
 463         */
 464#ifdef CONFIG_COMPAT
 465        BUILD_BUG_ON(TASK_SIZE_32                       > TASK_SIZE_64);
 466#endif
 467
 468        /*
 469         * Make sure we chose the upper bound of sizeof(struct page)
 470         * correctly.
 471         */
 472        BUILD_BUG_ON(sizeof(struct page) > (1 << STRUCT_PAGE_MAX_SHIFT));
 473
 474        if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) {
 475                extern int sysctl_overcommit_memory;
 476                /*
 477                 * On a machine this small we won't get anywhere without
 478                 * overcommit, so turn it on by default.
 479                 */
 480                sysctl_overcommit_memory = OVERCOMMIT_ALWAYS;
 481        }
 482}
 483
 484void free_initmem(void)
 485{
 486        free_reserved_area(__va(__pa(__init_begin)), __va(__pa(__init_end)),
 487                           0, "unused kernel");
 488        fixup_init();
 489}
 490
 491#ifdef CONFIG_BLK_DEV_INITRD
 492
 493static int keep_initrd __initdata;
 494
 495void __init free_initrd_mem(unsigned long start, unsigned long end)
 496{
 497        if (!keep_initrd)
 498                free_reserved_area((void *)start, (void *)end, 0, "initrd");
 499}
 500
 501static int __init keepinitrd_setup(char *__unused)
 502{
 503        keep_initrd = 1;
 504        return 1;
 505}
 506
 507__setup("keepinitrd", keepinitrd_setup);
 508#endif
 509
 510/*
 511 * Dump out memory limit information on panic.
 512 */
 513static int dump_mem_limit(struct notifier_block *self, unsigned long v, void *p)
 514{
 515        if (memory_limit != (phys_addr_t)ULLONG_MAX) {
 516                pr_emerg("Memory Limit: %llu MB\n", memory_limit >> 20);
 517        } else {
 518                pr_emerg("Memory Limit: none\n");
 519        }
 520        return 0;
 521}
 522
 523static struct notifier_block mem_limit_notifier = {
 524        .notifier_call = dump_mem_limit,
 525};
 526
 527static int __init register_mem_limit_dumper(void)
 528{
 529        atomic_notifier_chain_register(&panic_notifier_list,
 530                                       &mem_limit_notifier);
 531        return 0;
 532}
 533__initcall(register_mem_limit_dumper);
 534