linux/mm/sparse.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * sparse memory mappings.
   4 */
   5#include <linux/mm.h>
   6#include <linux/slab.h>
   7#include <linux/mmzone.h>
   8#include <linux/bootmem.h>
   9#include <linux/compiler.h>
  10#include <linux/highmem.h>
  11#include <linux/export.h>
  12#include <linux/spinlock.h>
  13#include <linux/vmalloc.h>
  14
  15#include "internal.h"
  16#include <asm/dma.h>
  17#include <asm/pgalloc.h>
  18#include <asm/pgtable.h>
  19
  20/*
  21 * Permanent SPARSEMEM data:
  22 *
  23 * 1) mem_section       - memory sections, mem_map's for valid memory
  24 */
  25#ifdef CONFIG_SPARSEMEM_EXTREME
  26struct mem_section **mem_section;
  27#else
  28struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
  29        ____cacheline_internodealigned_in_smp;
  30#endif
  31EXPORT_SYMBOL(mem_section);
  32
  33#ifdef NODE_NOT_IN_PAGE_FLAGS
  34/*
  35 * If we did not store the node number in the page then we have to
  36 * do a lookup in the section_to_node_table in order to find which
  37 * node the page belongs to.
  38 */
  39#if MAX_NUMNODES <= 256
  40static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
  41#else
  42static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
  43#endif
  44
  45int page_to_nid(const struct page *page)
  46{
  47        return section_to_node_table[page_to_section(page)];
  48}
  49EXPORT_SYMBOL(page_to_nid);
  50
  51static void set_section_nid(unsigned long section_nr, int nid)
  52{
  53        section_to_node_table[section_nr] = nid;
  54}
  55#else /* !NODE_NOT_IN_PAGE_FLAGS */
  56static inline void set_section_nid(unsigned long section_nr, int nid)
  57{
  58}
  59#endif
  60
  61#ifdef CONFIG_SPARSEMEM_EXTREME
  62static noinline struct mem_section __ref *sparse_index_alloc(int nid)
  63{
  64        struct mem_section *section = NULL;
  65        unsigned long array_size = SECTIONS_PER_ROOT *
  66                                   sizeof(struct mem_section);
  67
  68        if (slab_is_available())
  69                section = kzalloc_node(array_size, GFP_KERNEL, nid);
  70        else
  71                section = memblock_virt_alloc_node(array_size, nid);
  72
  73        return section;
  74}
  75
  76static int __meminit sparse_index_init(unsigned long section_nr, int nid)
  77{
  78        unsigned long root = SECTION_NR_TO_ROOT(section_nr);
  79        struct mem_section *section;
  80
  81        if (mem_section[root])
  82                return -EEXIST;
  83
  84        section = sparse_index_alloc(nid);
  85        if (!section)
  86                return -ENOMEM;
  87
  88        mem_section[root] = section;
  89
  90        return 0;
  91}
  92#else /* !SPARSEMEM_EXTREME */
  93static inline int sparse_index_init(unsigned long section_nr, int nid)
  94{
  95        return 0;
  96}
  97#endif
  98
  99#ifdef CONFIG_SPARSEMEM_EXTREME
 100int __section_nr(struct mem_section* ms)
 101{
 102        unsigned long root_nr;
 103        struct mem_section *root = NULL;
 104
 105        for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
 106                root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
 107                if (!root)
 108                        continue;
 109
 110                if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT)))
 111                     break;
 112        }
 113
 114        VM_BUG_ON(!root);
 115
 116        return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
 117}
 118#else
 119int __section_nr(struct mem_section* ms)
 120{
 121        return (int)(ms - mem_section[0]);
 122}
 123#endif
 124
 125/*
 126 * During early boot, before section_mem_map is used for an actual
 127 * mem_map, we use section_mem_map to store the section's NUMA
 128 * node.  This keeps us from having to use another data structure.  The
 129 * node information is cleared just before we store the real mem_map.
 130 */
 131static inline unsigned long sparse_encode_early_nid(int nid)
 132{
 133        return (nid << SECTION_NID_SHIFT);
 134}
 135
 136static inline int sparse_early_nid(struct mem_section *section)
 137{
 138        return (section->section_mem_map >> SECTION_NID_SHIFT);
 139}
 140
 141/* Validate the physical addressing limitations of the model */
 142void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
 143                                                unsigned long *end_pfn)
 144{
 145        unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
 146
 147        /*
 148         * Sanity checks - do not allow an architecture to pass
 149         * in larger pfns than the maximum scope of sparsemem:
 150         */
 151        if (*start_pfn > max_sparsemem_pfn) {
 152                mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
 153                        "Start of range %lu -> %lu exceeds SPARSEMEM max %lu\n",
 154                        *start_pfn, *end_pfn, max_sparsemem_pfn);
 155                WARN_ON_ONCE(1);
 156                *start_pfn = max_sparsemem_pfn;
 157                *end_pfn = max_sparsemem_pfn;
 158        } else if (*end_pfn > max_sparsemem_pfn) {
 159                mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
 160                        "End of range %lu -> %lu exceeds SPARSEMEM max %lu\n",
 161                        *start_pfn, *end_pfn, max_sparsemem_pfn);
 162                WARN_ON_ONCE(1);
 163                *end_pfn = max_sparsemem_pfn;
 164        }
 165}
 166
 167/*
 168 * There are a number of times that we loop over NR_MEM_SECTIONS,
 169 * looking for section_present() on each.  But, when we have very
 170 * large physical address spaces, NR_MEM_SECTIONS can also be
 171 * very large which makes the loops quite long.
 172 *
 173 * Keeping track of this gives us an easy way to break out of
 174 * those loops early.
 175 */
 176int __highest_present_section_nr;
 177static void section_mark_present(struct mem_section *ms)
 178{
 179        int section_nr = __section_nr(ms);
 180
 181        if (section_nr > __highest_present_section_nr)
 182                __highest_present_section_nr = section_nr;
 183
 184        ms->section_mem_map |= SECTION_MARKED_PRESENT;
 185}
 186
 187static inline int next_present_section_nr(int section_nr)
 188{
 189        do {
 190                section_nr++;
 191                if (present_section_nr(section_nr))
 192                        return section_nr;
 193        } while ((section_nr <= __highest_present_section_nr));
 194
 195        return -1;
 196}
 197#define for_each_present_section_nr(start, section_nr)          \
 198        for (section_nr = next_present_section_nr(start-1);     \
 199             ((section_nr >= 0) &&                              \
 200              (section_nr <= __highest_present_section_nr));    \
 201             section_nr = next_present_section_nr(section_nr))
 202
 203static inline unsigned long first_present_section_nr(void)
 204{
 205        return next_present_section_nr(-1);
 206}
 207
 208/* Record a memory area against a node. */
 209void __init memory_present(int nid, unsigned long start, unsigned long end)
 210{
 211        unsigned long pfn;
 212
 213#ifdef CONFIG_SPARSEMEM_EXTREME
 214        if (unlikely(!mem_section)) {
 215                unsigned long size, align;
 216
 217                size = sizeof(struct mem_section*) * NR_SECTION_ROOTS;
 218                align = 1 << (INTERNODE_CACHE_SHIFT);
 219                mem_section = memblock_virt_alloc(size, align);
 220        }
 221#endif
 222
 223        start &= PAGE_SECTION_MASK;
 224        mminit_validate_memmodel_limits(&start, &end);
 225        for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
 226                unsigned long section = pfn_to_section_nr(pfn);
 227                struct mem_section *ms;
 228
 229                sparse_index_init(section, nid);
 230                set_section_nid(section, nid);
 231
 232                ms = __nr_to_section(section);
 233                if (!ms->section_mem_map) {
 234                        ms->section_mem_map = sparse_encode_early_nid(nid) |
 235                                                        SECTION_IS_ONLINE;
 236                        section_mark_present(ms);
 237                }
 238        }
 239}
 240
 241/*
 242 * Subtle, we encode the real pfn into the mem_map such that
 243 * the identity pfn - section_mem_map will return the actual
 244 * physical page frame number.
 245 */
 246static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum)
 247{
 248        unsigned long coded_mem_map =
 249                (unsigned long)(mem_map - (section_nr_to_pfn(pnum)));
 250        BUILD_BUG_ON(SECTION_MAP_LAST_BIT > (1UL<<PFN_SECTION_SHIFT));
 251        BUG_ON(coded_mem_map & ~SECTION_MAP_MASK);
 252        return coded_mem_map;
 253}
 254
 255/*
 256 * Decode mem_map from the coded memmap
 257 */
 258struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
 259{
 260        /* mask off the extra low bits of information */
 261        coded_mem_map &= SECTION_MAP_MASK;
 262        return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
 263}
 264
 265static void __meminit sparse_init_one_section(struct mem_section *ms,
 266                unsigned long pnum, struct page *mem_map,
 267                unsigned long *pageblock_bitmap)
 268{
 269        ms->section_mem_map &= ~SECTION_MAP_MASK;
 270        ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) |
 271                                                        SECTION_HAS_MEM_MAP;
 272        ms->pageblock_flags = pageblock_bitmap;
 273}
 274
 275unsigned long usemap_size(void)
 276{
 277        return BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS) * sizeof(unsigned long);
 278}
 279
 280#ifdef CONFIG_MEMORY_HOTPLUG
 281static unsigned long *__kmalloc_section_usemap(void)
 282{
 283        return kmalloc(usemap_size(), GFP_KERNEL);
 284}
 285#endif /* CONFIG_MEMORY_HOTPLUG */
 286
 287#ifdef CONFIG_MEMORY_HOTREMOVE
 288static unsigned long * __init
 289sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
 290                                         unsigned long size)
 291{
 292        unsigned long goal, limit;
 293        unsigned long *p;
 294        int nid;
 295        /*
 296         * A page may contain usemaps for other sections preventing the
 297         * page being freed and making a section unremovable while
 298         * other sections referencing the usemap remain active. Similarly,
 299         * a pgdat can prevent a section being removed. If section A
 300         * contains a pgdat and section B contains the usemap, both
 301         * sections become inter-dependent. This allocates usemaps
 302         * from the same section as the pgdat where possible to avoid
 303         * this problem.
 304         */
 305        goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT);
 306        limit = goal + (1UL << PA_SECTION_SHIFT);
 307        nid = early_pfn_to_nid(goal >> PAGE_SHIFT);
 308again:
 309        p = memblock_virt_alloc_try_nid_nopanic(size,
 310                                                SMP_CACHE_BYTES, goal, limit,
 311                                                nid);
 312        if (!p && limit) {
 313                limit = 0;
 314                goto again;
 315        }
 316        return p;
 317}
 318
 319static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
 320{
 321        unsigned long usemap_snr, pgdat_snr;
 322        static unsigned long old_usemap_snr;
 323        static unsigned long old_pgdat_snr;
 324        struct pglist_data *pgdat = NODE_DATA(nid);
 325        int usemap_nid;
 326
 327        /* First call */
 328        if (!old_usemap_snr) {
 329                old_usemap_snr = NR_MEM_SECTIONS;
 330                old_pgdat_snr = NR_MEM_SECTIONS;
 331        }
 332
 333        usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
 334        pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
 335        if (usemap_snr == pgdat_snr)
 336                return;
 337
 338        if (old_usemap_snr == usemap_snr && old_pgdat_snr == pgdat_snr)
 339                /* skip redundant message */
 340                return;
 341
 342        old_usemap_snr = usemap_snr;
 343        old_pgdat_snr = pgdat_snr;
 344
 345        usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr));
 346        if (usemap_nid != nid) {
 347                pr_info("node %d must be removed before remove section %ld\n",
 348                        nid, usemap_snr);
 349                return;
 350        }
 351        /*
 352         * There is a circular dependency.
 353         * Some platforms allow un-removable section because they will just
 354         * gather other removable sections for dynamic partitioning.
 355         * Just notify un-removable section's number here.
 356         */
 357        pr_info("Section %ld and %ld (node %d) have a circular dependency on usemap and pgdat allocations\n",
 358                usemap_snr, pgdat_snr, nid);
 359}
 360#else
 361static unsigned long * __init
 362sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
 363                                         unsigned long size)
 364{
 365        return memblock_virt_alloc_node_nopanic(size, pgdat->node_id);
 366}
 367
 368static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
 369{
 370}
 371#endif /* CONFIG_MEMORY_HOTREMOVE */
 372
 373#ifdef CONFIG_SPARSEMEM_VMEMMAP
 374static unsigned long __init section_map_size(void)
 375{
 376        return ALIGN(sizeof(struct page) * PAGES_PER_SECTION, PMD_SIZE);
 377}
 378
 379#else
 380static unsigned long __init section_map_size(void)
 381{
 382        return PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION);
 383}
 384
 385struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid,
 386                struct vmem_altmap *altmap)
 387{
 388        unsigned long size = section_map_size();
 389        struct page *map = sparse_buffer_alloc(size);
 390
 391        if (map)
 392                return map;
 393
 394        map = memblock_virt_alloc_try_nid(size,
 395                                          PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
 396                                          BOOTMEM_ALLOC_ACCESSIBLE, nid);
 397        return map;
 398}
 399#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
 400
 401static void *sparsemap_buf __meminitdata;
 402static void *sparsemap_buf_end __meminitdata;
 403
 404static void __init sparse_buffer_init(unsigned long size, int nid)
 405{
 406        WARN_ON(sparsemap_buf); /* forgot to call sparse_buffer_fini()? */
 407        sparsemap_buf =
 408                memblock_virt_alloc_try_nid_raw(size, PAGE_SIZE,
 409                                                __pa(MAX_DMA_ADDRESS),
 410                                                BOOTMEM_ALLOC_ACCESSIBLE, nid);
 411        sparsemap_buf_end = sparsemap_buf + size;
 412}
 413
 414static void __init sparse_buffer_fini(void)
 415{
 416        unsigned long size = sparsemap_buf_end - sparsemap_buf;
 417
 418        if (sparsemap_buf && size > 0)
 419                memblock_free_early(__pa(sparsemap_buf), size);
 420        sparsemap_buf = NULL;
 421}
 422
 423void * __meminit sparse_buffer_alloc(unsigned long size)
 424{
 425        void *ptr = NULL;
 426
 427        if (sparsemap_buf) {
 428                ptr = PTR_ALIGN(sparsemap_buf, size);
 429                if (ptr + size > sparsemap_buf_end)
 430                        ptr = NULL;
 431                else
 432                        sparsemap_buf = ptr + size;
 433        }
 434        return ptr;
 435}
 436
 437void __weak __meminit vmemmap_populate_print_last(void)
 438{
 439}
 440
 441/*
 442 * Initialize sparse on a specific node. The node spans [pnum_begin, pnum_end)
 443 * And number of present sections in this node is map_count.
 444 */
 445static void __init sparse_init_nid(int nid, unsigned long pnum_begin,
 446                                   unsigned long pnum_end,
 447                                   unsigned long map_count)
 448{
 449        unsigned long pnum, usemap_longs, *usemap;
 450        struct page *map;
 451
 452        usemap_longs = BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS);
 453        usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid),
 454                                                          usemap_size() *
 455                                                          map_count);
 456        if (!usemap) {
 457                pr_err("%s: node[%d] usemap allocation failed", __func__, nid);
 458                goto failed;
 459        }
 460        sparse_buffer_init(map_count * section_map_size(), nid);
 461        for_each_present_section_nr(pnum_begin, pnum) {
 462                if (pnum >= pnum_end)
 463                        break;
 464
 465                map = sparse_mem_map_populate(pnum, nid, NULL);
 466                if (!map) {
 467                        pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.",
 468                               __func__, nid);
 469                        pnum_begin = pnum;
 470                        goto failed;
 471                }
 472                check_usemap_section_nr(nid, usemap);
 473                sparse_init_one_section(__nr_to_section(pnum), pnum, map, usemap);
 474                usemap += usemap_longs;
 475        }
 476        sparse_buffer_fini();
 477        return;
 478failed:
 479        /* We failed to allocate, mark all the following pnums as not present */
 480        for_each_present_section_nr(pnum_begin, pnum) {
 481                struct mem_section *ms;
 482
 483                if (pnum >= pnum_end)
 484                        break;
 485                ms = __nr_to_section(pnum);
 486                ms->section_mem_map = 0;
 487        }
 488}
 489
 490/*
 491 * Allocate the accumulated non-linear sections, allocate a mem_map
 492 * for each and record the physical to section mapping.
 493 */
 494void __init sparse_init(void)
 495{
 496        unsigned long pnum_begin = first_present_section_nr();
 497        int nid_begin = sparse_early_nid(__nr_to_section(pnum_begin));
 498        unsigned long pnum_end, map_count = 1;
 499
 500        /* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */
 501        set_pageblock_order();
 502
 503        for_each_present_section_nr(pnum_begin + 1, pnum_end) {
 504                int nid = sparse_early_nid(__nr_to_section(pnum_end));
 505
 506                if (nid == nid_begin) {
 507                        map_count++;
 508                        continue;
 509                }
 510                /* Init node with sections in range [pnum_begin, pnum_end) */
 511                sparse_init_nid(nid_begin, pnum_begin, pnum_end, map_count);
 512                nid_begin = nid;
 513                pnum_begin = pnum_end;
 514                map_count = 1;
 515        }
 516        /* cover the last node */
 517        sparse_init_nid(nid_begin, pnum_begin, pnum_end, map_count);
 518        vmemmap_populate_print_last();
 519}
 520
 521#ifdef CONFIG_MEMORY_HOTPLUG
 522
 523/* Mark all memory sections within the pfn range as online */
 524void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
 525{
 526        unsigned long pfn;
 527
 528        for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
 529                unsigned long section_nr = pfn_to_section_nr(pfn);
 530                struct mem_section *ms;
 531
 532                /* onlining code should never touch invalid ranges */
 533                if (WARN_ON(!valid_section_nr(section_nr)))
 534                        continue;
 535
 536                ms = __nr_to_section(section_nr);
 537                ms->section_mem_map |= SECTION_IS_ONLINE;
 538        }
 539}
 540
 541#ifdef CONFIG_MEMORY_HOTREMOVE
 542/* Mark all memory sections within the pfn range as online */
 543void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
 544{
 545        unsigned long pfn;
 546
 547        for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
 548                unsigned long section_nr = pfn_to_section_nr(pfn);
 549                struct mem_section *ms;
 550
 551                /*
 552                 * TODO this needs some double checking. Offlining code makes
 553                 * sure to check pfn_valid but those checks might be just bogus
 554                 */
 555                if (WARN_ON(!valid_section_nr(section_nr)))
 556                        continue;
 557
 558                ms = __nr_to_section(section_nr);
 559                ms->section_mem_map &= ~SECTION_IS_ONLINE;
 560        }
 561}
 562#endif
 563
 564#ifdef CONFIG_SPARSEMEM_VMEMMAP
 565static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
 566                struct vmem_altmap *altmap)
 567{
 568        /* This will make the necessary allocations eventually. */
 569        return sparse_mem_map_populate(pnum, nid, altmap);
 570}
 571static void __kfree_section_memmap(struct page *memmap,
 572                struct vmem_altmap *altmap)
 573{
 574        unsigned long start = (unsigned long)memmap;
 575        unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
 576
 577        vmemmap_free(start, end, altmap);
 578}
 579#ifdef CONFIG_MEMORY_HOTREMOVE
 580static void free_map_bootmem(struct page *memmap)
 581{
 582        unsigned long start = (unsigned long)memmap;
 583        unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
 584
 585        vmemmap_free(start, end, NULL);
 586}
 587#endif /* CONFIG_MEMORY_HOTREMOVE */
 588#else
 589static struct page *__kmalloc_section_memmap(void)
 590{
 591        struct page *page, *ret;
 592        unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION;
 593
 594        page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
 595        if (page)
 596                goto got_map_page;
 597
 598        ret = vmalloc(memmap_size);
 599        if (ret)
 600                goto got_map_ptr;
 601
 602        return NULL;
 603got_map_page:
 604        ret = (struct page *)pfn_to_kaddr(page_to_pfn(page));
 605got_map_ptr:
 606
 607        return ret;
 608}
 609
 610static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
 611                struct vmem_altmap *altmap)
 612{
 613        return __kmalloc_section_memmap();
 614}
 615
 616static void __kfree_section_memmap(struct page *memmap,
 617                struct vmem_altmap *altmap)
 618{
 619        if (is_vmalloc_addr(memmap))
 620                vfree(memmap);
 621        else
 622                free_pages((unsigned long)memmap,
 623                           get_order(sizeof(struct page) * PAGES_PER_SECTION));
 624}
 625
 626#ifdef CONFIG_MEMORY_HOTREMOVE
 627static void free_map_bootmem(struct page *memmap)
 628{
 629        unsigned long maps_section_nr, removing_section_nr, i;
 630        unsigned long magic, nr_pages;
 631        struct page *page = virt_to_page(memmap);
 632
 633        nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page))
 634                >> PAGE_SHIFT;
 635
 636        for (i = 0; i < nr_pages; i++, page++) {
 637                magic = (unsigned long) page->freelist;
 638
 639                BUG_ON(magic == NODE_INFO);
 640
 641                maps_section_nr = pfn_to_section_nr(page_to_pfn(page));
 642                removing_section_nr = page_private(page);
 643
 644                /*
 645                 * When this function is called, the removing section is
 646                 * logical offlined state. This means all pages are isolated
 647                 * from page allocator. If removing section's memmap is placed
 648                 * on the same section, it must not be freed.
 649                 * If it is freed, page allocator may allocate it which will
 650                 * be removed physically soon.
 651                 */
 652                if (maps_section_nr != removing_section_nr)
 653                        put_page_bootmem(page);
 654        }
 655}
 656#endif /* CONFIG_MEMORY_HOTREMOVE */
 657#endif /* CONFIG_SPARSEMEM_VMEMMAP */
 658
 659/*
 660 * returns the number of sections whose mem_maps were properly
 661 * set.  If this is <=0, then that means that the passed-in
 662 * map was not consumed and must be freed.
 663 */
 664int __meminit sparse_add_one_section(struct pglist_data *pgdat,
 665                unsigned long start_pfn, struct vmem_altmap *altmap)
 666{
 667        unsigned long section_nr = pfn_to_section_nr(start_pfn);
 668        struct mem_section *ms;
 669        struct page *memmap;
 670        unsigned long *usemap;
 671        unsigned long flags;
 672        int ret;
 673
 674        /*
 675         * no locking for this, because it does its own
 676         * plus, it does a kmalloc
 677         */
 678        ret = sparse_index_init(section_nr, pgdat->node_id);
 679        if (ret < 0 && ret != -EEXIST)
 680                return ret;
 681        ret = 0;
 682        memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, altmap);
 683        if (!memmap)
 684                return -ENOMEM;
 685        usemap = __kmalloc_section_usemap();
 686        if (!usemap) {
 687                __kfree_section_memmap(memmap, altmap);
 688                return -ENOMEM;
 689        }
 690
 691        pgdat_resize_lock(pgdat, &flags);
 692
 693        ms = __pfn_to_section(start_pfn);
 694        if (ms->section_mem_map & SECTION_MARKED_PRESENT) {
 695                ret = -EEXIST;
 696                goto out;
 697        }
 698
 699#ifdef CONFIG_DEBUG_VM
 700        /*
 701         * Poison uninitialized struct pages in order to catch invalid flags
 702         * combinations.
 703         */
 704        memset(memmap, PAGE_POISON_PATTERN, sizeof(struct page) * PAGES_PER_SECTION);
 705#endif
 706
 707        section_mark_present(ms);
 708        sparse_init_one_section(ms, section_nr, memmap, usemap);
 709
 710out:
 711        pgdat_resize_unlock(pgdat, &flags);
 712        if (ret < 0) {
 713                kfree(usemap);
 714                __kfree_section_memmap(memmap, altmap);
 715        }
 716        return ret;
 717}
 718
 719#ifdef CONFIG_MEMORY_HOTREMOVE
 720#ifdef CONFIG_MEMORY_FAILURE
 721static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
 722{
 723        int i;
 724
 725        if (!memmap)
 726                return;
 727
 728        for (i = 0; i < nr_pages; i++) {
 729                if (PageHWPoison(&memmap[i])) {
 730                        atomic_long_sub(1, &num_poisoned_pages);
 731                        ClearPageHWPoison(&memmap[i]);
 732                }
 733        }
 734}
 735#else
 736static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
 737{
 738}
 739#endif
 740
 741static void free_section_usemap(struct page *memmap, unsigned long *usemap,
 742                struct vmem_altmap *altmap)
 743{
 744        struct page *usemap_page;
 745
 746        if (!usemap)
 747                return;
 748
 749        usemap_page = virt_to_page(usemap);
 750        /*
 751         * Check to see if allocation came from hot-plug-add
 752         */
 753        if (PageSlab(usemap_page) || PageCompound(usemap_page)) {
 754                kfree(usemap);
 755                if (memmap)
 756                        __kfree_section_memmap(memmap, altmap);
 757                return;
 758        }
 759
 760        /*
 761         * The usemap came from bootmem. This is packed with other usemaps
 762         * on the section which has pgdat at boot time. Just keep it as is now.
 763         */
 764
 765        if (memmap)
 766                free_map_bootmem(memmap);
 767}
 768
 769void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
 770                unsigned long map_offset, struct vmem_altmap *altmap)
 771{
 772        struct page *memmap = NULL;
 773        unsigned long *usemap = NULL, flags;
 774        struct pglist_data *pgdat = zone->zone_pgdat;
 775
 776        pgdat_resize_lock(pgdat, &flags);
 777        if (ms->section_mem_map) {
 778                usemap = ms->pageblock_flags;
 779                memmap = sparse_decode_mem_map(ms->section_mem_map,
 780                                                __section_nr(ms));
 781                ms->section_mem_map = 0;
 782                ms->pageblock_flags = NULL;
 783        }
 784        pgdat_resize_unlock(pgdat, &flags);
 785
 786        clear_hwpoisoned_pages(memmap + map_offset,
 787                        PAGES_PER_SECTION - map_offset);
 788        free_section_usemap(memmap, usemap, altmap);
 789}
 790#endif /* CONFIG_MEMORY_HOTREMOVE */
 791#endif /* CONFIG_MEMORY_HOTPLUG */
 792