linux/drivers/base/memory.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Memory subsystem support
   4 *
   5 * Written by Matt Tolentino <matthew.e.tolentino@intel.com>
   6 *            Dave Hansen <haveblue@us.ibm.com>
   7 *
   8 * This file provides the necessary infrastructure to represent
   9 * a SPARSEMEM-memory-model system's physical memory in /sysfs.
  10 * All arch-independent code that assumes MEMORY_HOTPLUG requires
  11 * SPARSEMEM should be contained here, or in mm/memory_hotplug.c.
  12 */
  13
  14#include <linux/module.h>
  15#include <linux/init.h>
  16#include <linux/topology.h>
  17#include <linux/capability.h>
  18#include <linux/device.h>
  19#include <linux/memory.h>
  20#include <linux/memory_hotplug.h>
  21#include <linux/mm.h>
  22#include <linux/stat.h>
  23#include <linux/slab.h>
  24#include <linux/xarray.h>
  25
  26#include <linux/atomic.h>
  27#include <linux/uaccess.h>
  28
  29#define MEMORY_CLASS_NAME       "memory"
  30
  31static const char *const online_type_to_str[] = {
  32        [MMOP_OFFLINE] = "offline",
  33        [MMOP_ONLINE] = "online",
  34        [MMOP_ONLINE_KERNEL] = "online_kernel",
  35        [MMOP_ONLINE_MOVABLE] = "online_movable",
  36};
  37
  38int memhp_online_type_from_str(const char *str)
  39{
  40        int i;
  41
  42        for (i = 0; i < ARRAY_SIZE(online_type_to_str); i++) {
  43                if (sysfs_streq(str, online_type_to_str[i]))
  44                        return i;
  45        }
  46        return -EINVAL;
  47}
  48
  49#define to_memory_block(dev) container_of(dev, struct memory_block, dev)
  50
  51static int sections_per_block;
  52
  53static inline unsigned long memory_block_id(unsigned long section_nr)
  54{
  55        return section_nr / sections_per_block;
  56}
  57
  58static inline unsigned long pfn_to_block_id(unsigned long pfn)
  59{
  60        return memory_block_id(pfn_to_section_nr(pfn));
  61}
  62
  63static inline unsigned long phys_to_block_id(unsigned long phys)
  64{
  65        return pfn_to_block_id(PFN_DOWN(phys));
  66}
  67
  68static int memory_subsys_online(struct device *dev);
  69static int memory_subsys_offline(struct device *dev);
  70
  71static struct bus_type memory_subsys = {
  72        .name = MEMORY_CLASS_NAME,
  73        .dev_name = MEMORY_CLASS_NAME,
  74        .online = memory_subsys_online,
  75        .offline = memory_subsys_offline,
  76};
  77
  78/*
  79 * Memory blocks are cached in a local radix tree to avoid
  80 * a costly linear search for the corresponding device on
  81 * the subsystem bus.
  82 */
  83static DEFINE_XARRAY(memory_blocks);
  84
  85static BLOCKING_NOTIFIER_HEAD(memory_chain);
  86
  87int register_memory_notifier(struct notifier_block *nb)
  88{
  89        return blocking_notifier_chain_register(&memory_chain, nb);
  90}
  91EXPORT_SYMBOL(register_memory_notifier);
  92
  93void unregister_memory_notifier(struct notifier_block *nb)
  94{
  95        blocking_notifier_chain_unregister(&memory_chain, nb);
  96}
  97EXPORT_SYMBOL(unregister_memory_notifier);
  98
  99static void memory_block_release(struct device *dev)
 100{
 101        struct memory_block *mem = to_memory_block(dev);
 102
 103        kfree(mem);
 104}
 105
 106unsigned long __weak memory_block_size_bytes(void)
 107{
 108        return MIN_MEMORY_BLOCK_SIZE;
 109}
 110EXPORT_SYMBOL_GPL(memory_block_size_bytes);
 111
 112/*
 113 * Show the first physical section index (number) of this memory block.
 114 */
 115static ssize_t phys_index_show(struct device *dev,
 116                               struct device_attribute *attr, char *buf)
 117{
 118        struct memory_block *mem = to_memory_block(dev);
 119        unsigned long phys_index;
 120
 121        phys_index = mem->start_section_nr / sections_per_block;
 122
 123        return sysfs_emit(buf, "%08lx\n", phys_index);
 124}
 125
 126/*
 127 * Legacy interface that we cannot remove. Always indicate "removable"
 128 * with CONFIG_MEMORY_HOTREMOVE - bad heuristic.
 129 */
 130static ssize_t removable_show(struct device *dev, struct device_attribute *attr,
 131                              char *buf)
 132{
 133        return sysfs_emit(buf, "%d\n", (int)IS_ENABLED(CONFIG_MEMORY_HOTREMOVE));
 134}
 135
 136/*
 137 * online, offline, going offline, etc.
 138 */
 139static ssize_t state_show(struct device *dev, struct device_attribute *attr,
 140                          char *buf)
 141{
 142        struct memory_block *mem = to_memory_block(dev);
 143        const char *output;
 144
 145        /*
 146         * We can probably put these states in a nice little array
 147         * so that they're not open-coded
 148         */
 149        switch (mem->state) {
 150        case MEM_ONLINE:
 151                output = "online";
 152                break;
 153        case MEM_OFFLINE:
 154                output = "offline";
 155                break;
 156        case MEM_GOING_OFFLINE:
 157                output = "going-offline";
 158                break;
 159        default:
 160                WARN_ON(1);
 161                return sysfs_emit(buf, "ERROR-UNKNOWN-%ld\n", mem->state);
 162        }
 163
 164        return sysfs_emit(buf, "%s\n", output);
 165}
 166
 167int memory_notify(unsigned long val, void *v)
 168{
 169        return blocking_notifier_call_chain(&memory_chain, val, v);
 170}
 171
 172/*
 173 * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is
 174 * OK to have direct references to sparsemem variables in here.
 175 */
 176static int
 177memory_block_action(unsigned long start_section_nr, unsigned long action,
 178                    int online_type, int nid)
 179{
 180        unsigned long start_pfn;
 181        unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
 182        int ret;
 183
 184        start_pfn = section_nr_to_pfn(start_section_nr);
 185
 186        switch (action) {
 187        case MEM_ONLINE:
 188                ret = online_pages(start_pfn, nr_pages, online_type, nid);
 189                break;
 190        case MEM_OFFLINE:
 191                ret = offline_pages(start_pfn, nr_pages);
 192                break;
 193        default:
 194                WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: "
 195                     "%ld\n", __func__, start_section_nr, action, action);
 196                ret = -EINVAL;
 197        }
 198
 199        return ret;
 200}
 201
 202static int memory_block_change_state(struct memory_block *mem,
 203                unsigned long to_state, unsigned long from_state_req)
 204{
 205        int ret = 0;
 206
 207        if (mem->state != from_state_req)
 208                return -EINVAL;
 209
 210        if (to_state == MEM_OFFLINE)
 211                mem->state = MEM_GOING_OFFLINE;
 212
 213        ret = memory_block_action(mem->start_section_nr, to_state,
 214                                  mem->online_type, mem->nid);
 215
 216        mem->state = ret ? from_state_req : to_state;
 217
 218        return ret;
 219}
 220
 221/* The device lock serializes operations on memory_subsys_[online|offline] */
 222static int memory_subsys_online(struct device *dev)
 223{
 224        struct memory_block *mem = to_memory_block(dev);
 225        int ret;
 226
 227        if (mem->state == MEM_ONLINE)
 228                return 0;
 229
 230        /*
 231         * When called via device_online() without configuring the online_type,
 232         * we want to default to MMOP_ONLINE.
 233         */
 234        if (mem->online_type == MMOP_OFFLINE)
 235                mem->online_type = MMOP_ONLINE;
 236
 237        ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
 238        mem->online_type = MMOP_OFFLINE;
 239
 240        return ret;
 241}
 242
 243static int memory_subsys_offline(struct device *dev)
 244{
 245        struct memory_block *mem = to_memory_block(dev);
 246
 247        if (mem->state == MEM_OFFLINE)
 248                return 0;
 249
 250        return memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
 251}
 252
 253static ssize_t state_store(struct device *dev, struct device_attribute *attr,
 254                           const char *buf, size_t count)
 255{
 256        const int online_type = memhp_online_type_from_str(buf);
 257        struct memory_block *mem = to_memory_block(dev);
 258        int ret;
 259
 260        if (online_type < 0)
 261                return -EINVAL;
 262
 263        ret = lock_device_hotplug_sysfs();
 264        if (ret)
 265                return ret;
 266
 267        switch (online_type) {
 268        case MMOP_ONLINE_KERNEL:
 269        case MMOP_ONLINE_MOVABLE:
 270        case MMOP_ONLINE:
 271                /* mem->online_type is protected by device_hotplug_lock */
 272                mem->online_type = online_type;
 273                ret = device_online(&mem->dev);
 274                break;
 275        case MMOP_OFFLINE:
 276                ret = device_offline(&mem->dev);
 277                break;
 278        default:
 279                ret = -EINVAL; /* should never happen */
 280        }
 281
 282        unlock_device_hotplug();
 283
 284        if (ret < 0)
 285                return ret;
 286        if (ret)
 287                return -EINVAL;
 288
 289        return count;
 290}
 291
 292/*
 293 * phys_device is a bad name for this.  What I really want
 294 * is a way to differentiate between memory ranges that
 295 * are part of physical devices that constitute
 296 * a complete removable unit or fru.
 297 * i.e. do these ranges belong to the same physical device,
 298 * s.t. if I offline all of these sections I can then
 299 * remove the physical device?
 300 */
 301static ssize_t phys_device_show(struct device *dev,
 302                                struct device_attribute *attr, char *buf)
 303{
 304        struct memory_block *mem = to_memory_block(dev);
 305
 306        return sysfs_emit(buf, "%d\n", mem->phys_device);
 307}
 308
 309#ifdef CONFIG_MEMORY_HOTREMOVE
 310static int print_allowed_zone(char *buf, int len, int nid,
 311                              unsigned long start_pfn, unsigned long nr_pages,
 312                              int online_type, struct zone *default_zone)
 313{
 314        struct zone *zone;
 315
 316        zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages);
 317        if (zone == default_zone)
 318                return 0;
 319
 320        return sysfs_emit_at(buf, len, " %s", zone->name);
 321}
 322
 323static ssize_t valid_zones_show(struct device *dev,
 324                                struct device_attribute *attr, char *buf)
 325{
 326        struct memory_block *mem = to_memory_block(dev);
 327        unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
 328        unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
 329        struct zone *default_zone;
 330        int len = 0;
 331        int nid;
 332
 333        /*
 334         * Check the existing zone. Make sure that we do that only on the
 335         * online nodes otherwise the page_zone is not reliable
 336         */
 337        if (mem->state == MEM_ONLINE) {
 338                /*
 339                 * The block contains more than one zone can not be offlined.
 340                 * This can happen e.g. for ZONE_DMA and ZONE_DMA32
 341                 */
 342                default_zone = test_pages_in_a_zone(start_pfn,
 343                                                    start_pfn + nr_pages);
 344                if (!default_zone)
 345                        return sysfs_emit(buf, "%s\n", "none");
 346                len += sysfs_emit_at(buf, len, "%s", default_zone->name);
 347                goto out;
 348        }
 349
 350        nid = mem->nid;
 351        default_zone = zone_for_pfn_range(MMOP_ONLINE, nid, start_pfn,
 352                                          nr_pages);
 353
 354        len += sysfs_emit_at(buf, len, "%s", default_zone->name);
 355        len += print_allowed_zone(buf, len, nid, start_pfn, nr_pages,
 356                                  MMOP_ONLINE_KERNEL, default_zone);
 357        len += print_allowed_zone(buf, len, nid, start_pfn, nr_pages,
 358                                  MMOP_ONLINE_MOVABLE, default_zone);
 359out:
 360        len += sysfs_emit_at(buf, len, "\n");
 361        return len;
 362}
 363static DEVICE_ATTR_RO(valid_zones);
 364#endif
 365
 366static DEVICE_ATTR_RO(phys_index);
 367static DEVICE_ATTR_RW(state);
 368static DEVICE_ATTR_RO(phys_device);
 369static DEVICE_ATTR_RO(removable);
 370
 371/*
 372 * Show the memory block size (shared by all memory blocks).
 373 */
 374static ssize_t block_size_bytes_show(struct device *dev,
 375                                     struct device_attribute *attr, char *buf)
 376{
 377        return sysfs_emit(buf, "%lx\n", memory_block_size_bytes());
 378}
 379
 380static DEVICE_ATTR_RO(block_size_bytes);
 381
 382/*
 383 * Memory auto online policy.
 384 */
 385
 386static ssize_t auto_online_blocks_show(struct device *dev,
 387                                       struct device_attribute *attr, char *buf)
 388{
 389        return sysfs_emit(buf, "%s\n",
 390                          online_type_to_str[memhp_default_online_type]);
 391}
 392
 393static ssize_t auto_online_blocks_store(struct device *dev,
 394                                        struct device_attribute *attr,
 395                                        const char *buf, size_t count)
 396{
 397        const int online_type = memhp_online_type_from_str(buf);
 398
 399        if (online_type < 0)
 400                return -EINVAL;
 401
 402        memhp_default_online_type = online_type;
 403        return count;
 404}
 405
 406static DEVICE_ATTR_RW(auto_online_blocks);
 407
 408/*
 409 * Some architectures will have custom drivers to do this, and
 410 * will not need to do it from userspace.  The fake hot-add code
 411 * as well as ppc64 will do all of their discovery in userspace
 412 * and will require this interface.
 413 */
 414#ifdef CONFIG_ARCH_MEMORY_PROBE
 415static ssize_t probe_store(struct device *dev, struct device_attribute *attr,
 416                           const char *buf, size_t count)
 417{
 418        u64 phys_addr;
 419        int nid, ret;
 420        unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block;
 421
 422        ret = kstrtoull(buf, 0, &phys_addr);
 423        if (ret)
 424                return ret;
 425
 426        if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1))
 427                return -EINVAL;
 428
 429        ret = lock_device_hotplug_sysfs();
 430        if (ret)
 431                return ret;
 432
 433        nid = memory_add_physaddr_to_nid(phys_addr);
 434        ret = __add_memory(nid, phys_addr,
 435                           MIN_MEMORY_BLOCK_SIZE * sections_per_block,
 436                           MHP_NONE);
 437
 438        if (ret)
 439                goto out;
 440
 441        ret = count;
 442out:
 443        unlock_device_hotplug();
 444        return ret;
 445}
 446
 447static DEVICE_ATTR_WO(probe);
 448#endif
 449
 450#ifdef CONFIG_MEMORY_FAILURE
 451/*
 452 * Support for offlining pages of memory
 453 */
 454
 455/* Soft offline a page */
 456static ssize_t soft_offline_page_store(struct device *dev,
 457                                       struct device_attribute *attr,
 458                                       const char *buf, size_t count)
 459{
 460        int ret;
 461        u64 pfn;
 462        if (!capable(CAP_SYS_ADMIN))
 463                return -EPERM;
 464        if (kstrtoull(buf, 0, &pfn) < 0)
 465                return -EINVAL;
 466        pfn >>= PAGE_SHIFT;
 467        ret = soft_offline_page(pfn, 0);
 468        return ret == 0 ? count : ret;
 469}
 470
 471/* Forcibly offline a page, including killing processes. */
 472static ssize_t hard_offline_page_store(struct device *dev,
 473                                       struct device_attribute *attr,
 474                                       const char *buf, size_t count)
 475{
 476        int ret;
 477        u64 pfn;
 478        if (!capable(CAP_SYS_ADMIN))
 479                return -EPERM;
 480        if (kstrtoull(buf, 0, &pfn) < 0)
 481                return -EINVAL;
 482        pfn >>= PAGE_SHIFT;
 483        ret = memory_failure(pfn, 0);
 484        return ret ? ret : count;
 485}
 486
 487static DEVICE_ATTR_WO(soft_offline_page);
 488static DEVICE_ATTR_WO(hard_offline_page);
 489#endif
 490
 491/*
 492 * Note that phys_device is optional.  It is here to allow for
 493 * differentiation between which *physical* devices each
 494 * section belongs to...
 495 */
 496int __weak arch_get_memory_phys_device(unsigned long start_pfn)
 497{
 498        return 0;
 499}
 500
 501/*
 502 * A reference for the returned memory block device is acquired.
 503 *
 504 * Called under device_hotplug_lock.
 505 */
 506static struct memory_block *find_memory_block_by_id(unsigned long block_id)
 507{
 508        struct memory_block *mem;
 509
 510        mem = xa_load(&memory_blocks, block_id);
 511        if (mem)
 512                get_device(&mem->dev);
 513        return mem;
 514}
 515
 516/*
 517 * Called under device_hotplug_lock.
 518 */
 519struct memory_block *find_memory_block(struct mem_section *section)
 520{
 521        unsigned long block_id = memory_block_id(__section_nr(section));
 522
 523        return find_memory_block_by_id(block_id);
 524}
 525
 526static struct attribute *memory_memblk_attrs[] = {
 527        &dev_attr_phys_index.attr,
 528        &dev_attr_state.attr,
 529        &dev_attr_phys_device.attr,
 530        &dev_attr_removable.attr,
 531#ifdef CONFIG_MEMORY_HOTREMOVE
 532        &dev_attr_valid_zones.attr,
 533#endif
 534        NULL
 535};
 536
 537static struct attribute_group memory_memblk_attr_group = {
 538        .attrs = memory_memblk_attrs,
 539};
 540
 541static const struct attribute_group *memory_memblk_attr_groups[] = {
 542        &memory_memblk_attr_group,
 543        NULL,
 544};
 545
 546/*
 547 * register_memory - Setup a sysfs device for a memory block
 548 */
 549static
 550int register_memory(struct memory_block *memory)
 551{
 552        int ret;
 553
 554        memory->dev.bus = &memory_subsys;
 555        memory->dev.id = memory->start_section_nr / sections_per_block;
 556        memory->dev.release = memory_block_release;
 557        memory->dev.groups = memory_memblk_attr_groups;
 558        memory->dev.offline = memory->state == MEM_OFFLINE;
 559
 560        ret = device_register(&memory->dev);
 561        if (ret) {
 562                put_device(&memory->dev);
 563                return ret;
 564        }
 565        ret = xa_err(xa_store(&memory_blocks, memory->dev.id, memory,
 566                              GFP_KERNEL));
 567        if (ret) {
 568                put_device(&memory->dev);
 569                device_unregister(&memory->dev);
 570        }
 571        return ret;
 572}
 573
 574static int init_memory_block(unsigned long block_id, unsigned long state)
 575{
 576        struct memory_block *mem;
 577        unsigned long start_pfn;
 578        int ret = 0;
 579
 580        mem = find_memory_block_by_id(block_id);
 581        if (mem) {
 582                put_device(&mem->dev);
 583                return -EEXIST;
 584        }
 585        mem = kzalloc(sizeof(*mem), GFP_KERNEL);
 586        if (!mem)
 587                return -ENOMEM;
 588
 589        mem->start_section_nr = block_id * sections_per_block;
 590        mem->state = state;
 591        start_pfn = section_nr_to_pfn(mem->start_section_nr);
 592        mem->phys_device = arch_get_memory_phys_device(start_pfn);
 593        mem->nid = NUMA_NO_NODE;
 594
 595        ret = register_memory(mem);
 596
 597        return ret;
 598}
 599
 600static int add_memory_block(unsigned long base_section_nr)
 601{
 602        int section_count = 0;
 603        unsigned long nr;
 604
 605        for (nr = base_section_nr; nr < base_section_nr + sections_per_block;
 606             nr++)
 607                if (present_section_nr(nr))
 608                        section_count++;
 609
 610        if (section_count == 0)
 611                return 0;
 612        return init_memory_block(memory_block_id(base_section_nr),
 613                                 MEM_ONLINE);
 614}
 615
 616static void unregister_memory(struct memory_block *memory)
 617{
 618        if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys))
 619                return;
 620
 621        WARN_ON(xa_erase(&memory_blocks, memory->dev.id) == NULL);
 622
 623        /* drop the ref. we got via find_memory_block() */
 624        put_device(&memory->dev);
 625        device_unregister(&memory->dev);
 626}
 627
 628/*
 629 * Create memory block devices for the given memory area. Start and size
 630 * have to be aligned to memory block granularity. Memory block devices
 631 * will be initialized as offline.
 632 *
 633 * Called under device_hotplug_lock.
 634 */
 635int create_memory_block_devices(unsigned long start, unsigned long size)
 636{
 637        const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
 638        unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
 639        struct memory_block *mem;
 640        unsigned long block_id;
 641        int ret = 0;
 642
 643        if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) ||
 644                         !IS_ALIGNED(size, memory_block_size_bytes())))
 645                return -EINVAL;
 646
 647        for (block_id = start_block_id; block_id != end_block_id; block_id++) {
 648                ret = init_memory_block(block_id, MEM_OFFLINE);
 649                if (ret)
 650                        break;
 651        }
 652        if (ret) {
 653                end_block_id = block_id;
 654                for (block_id = start_block_id; block_id != end_block_id;
 655                     block_id++) {
 656                        mem = find_memory_block_by_id(block_id);
 657                        if (WARN_ON_ONCE(!mem))
 658                                continue;
 659                        unregister_memory(mem);
 660                }
 661        }
 662        return ret;
 663}
 664
 665/*
 666 * Remove memory block devices for the given memory area. Start and size
 667 * have to be aligned to memory block granularity. Memory block devices
 668 * have to be offline.
 669 *
 670 * Called under device_hotplug_lock.
 671 */
 672void remove_memory_block_devices(unsigned long start, unsigned long size)
 673{
 674        const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
 675        const unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
 676        struct memory_block *mem;
 677        unsigned long block_id;
 678
 679        if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) ||
 680                         !IS_ALIGNED(size, memory_block_size_bytes())))
 681                return;
 682
 683        for (block_id = start_block_id; block_id != end_block_id; block_id++) {
 684                mem = find_memory_block_by_id(block_id);
 685                if (WARN_ON_ONCE(!mem))
 686                        continue;
 687                unregister_memory_block_under_nodes(mem);
 688                unregister_memory(mem);
 689        }
 690}
 691
 692/* return true if the memory block is offlined, otherwise, return false */
 693bool is_memblock_offlined(struct memory_block *mem)
 694{
 695        return mem->state == MEM_OFFLINE;
 696}
 697
 698static struct attribute *memory_root_attrs[] = {
 699#ifdef CONFIG_ARCH_MEMORY_PROBE
 700        &dev_attr_probe.attr,
 701#endif
 702
 703#ifdef CONFIG_MEMORY_FAILURE
 704        &dev_attr_soft_offline_page.attr,
 705        &dev_attr_hard_offline_page.attr,
 706#endif
 707
 708        &dev_attr_block_size_bytes.attr,
 709        &dev_attr_auto_online_blocks.attr,
 710        NULL
 711};
 712
 713static struct attribute_group memory_root_attr_group = {
 714        .attrs = memory_root_attrs,
 715};
 716
 717static const struct attribute_group *memory_root_attr_groups[] = {
 718        &memory_root_attr_group,
 719        NULL,
 720};
 721
 722/*
 723 * Initialize the sysfs support for memory devices. At the time this function
 724 * is called, we cannot have concurrent creation/deletion of memory block
 725 * devices, the device_hotplug_lock is not needed.
 726 */
 727void __init memory_dev_init(void)
 728{
 729        int ret;
 730        unsigned long block_sz, nr;
 731
 732        /* Validate the configured memory block size */
 733        block_sz = memory_block_size_bytes();
 734        if (!is_power_of_2(block_sz) || block_sz < MIN_MEMORY_BLOCK_SIZE)
 735                panic("Memory block size not suitable: 0x%lx\n", block_sz);
 736        sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
 737
 738        ret = subsys_system_register(&memory_subsys, memory_root_attr_groups);
 739        if (ret)
 740                panic("%s() failed to register subsystem: %d\n", __func__, ret);
 741
 742        /*
 743         * Create entries for memory sections that were found
 744         * during boot and have been initialized
 745         */
 746        for (nr = 0; nr <= __highest_present_section_nr;
 747             nr += sections_per_block) {
 748                ret = add_memory_block(nr);
 749                if (ret)
 750                        panic("%s() failed to add memory block: %d\n", __func__,
 751                              ret);
 752        }
 753}
 754
 755/**
 756 * walk_memory_blocks - walk through all present memory blocks overlapped
 757 *                      by the range [start, start + size)
 758 *
 759 * @start: start address of the memory range
 760 * @size: size of the memory range
 761 * @arg: argument passed to func
 762 * @func: callback for each memory section walked
 763 *
 764 * This function walks through all present memory blocks overlapped by the
 765 * range [start, start + size), calling func on each memory block.
 766 *
 767 * In case func() returns an error, walking is aborted and the error is
 768 * returned.
 769 *
 770 * Called under device_hotplug_lock.
 771 */
 772int walk_memory_blocks(unsigned long start, unsigned long size,
 773                       void *arg, walk_memory_blocks_func_t func)
 774{
 775        const unsigned long start_block_id = phys_to_block_id(start);
 776        const unsigned long end_block_id = phys_to_block_id(start + size - 1);
 777        struct memory_block *mem;
 778        unsigned long block_id;
 779        int ret = 0;
 780
 781        if (!size)
 782                return 0;
 783
 784        for (block_id = start_block_id; block_id <= end_block_id; block_id++) {
 785                mem = find_memory_block_by_id(block_id);
 786                if (!mem)
 787                        continue;
 788
 789                ret = func(mem, arg);
 790                put_device(&mem->dev);
 791                if (ret)
 792                        break;
 793        }
 794        return ret;
 795}
 796
 797struct for_each_memory_block_cb_data {
 798        walk_memory_blocks_func_t func;
 799        void *arg;
 800};
 801
 802static int for_each_memory_block_cb(struct device *dev, void *data)
 803{
 804        struct memory_block *mem = to_memory_block(dev);
 805        struct for_each_memory_block_cb_data *cb_data = data;
 806
 807        return cb_data->func(mem, cb_data->arg);
 808}
 809
 810/**
 811 * for_each_memory_block - walk through all present memory blocks
 812 *
 813 * @arg: argument passed to func
 814 * @func: callback for each memory block walked
 815 *
 816 * This function walks through all present memory blocks, calling func on
 817 * each memory block.
 818 *
 819 * In case func() returns an error, walking is aborted and the error is
 820 * returned.
 821 */
 822int for_each_memory_block(void *arg, walk_memory_blocks_func_t func)
 823{
 824        struct for_each_memory_block_cb_data cb_data = {
 825                .func = func,
 826                .arg = arg,
 827        };
 828
 829        return bus_for_each_dev(&memory_subsys, NULL, &cb_data,
 830                                for_each_memory_block_cb);
 831}
 832