linux/drivers/base/memory.c
<<
>>
Prefs
   1/*
   2 * drivers/base/memory.c - basic Memory class support
   3 *
   4 * Written by Matt Tolentino <matthew.e.tolentino@intel.com>
   5 *            Dave Hansen <haveblue@us.ibm.com>
   6 *
   7 * This file provides the necessary infrastructure to represent
   8 * a SPARSEMEM-memory-model system's physical memory in /sysfs.
   9 * All arch-independent code that assumes MEMORY_HOTPLUG requires
  10 * SPARSEMEM should be contained here, or in mm/memory_hotplug.c.
  11 */
  12
  13#include <linux/sysdev.h>
  14#include <linux/module.h>
  15#include <linux/init.h>
  16#include <linux/topology.h>
  17#include <linux/capability.h>
  18#include <linux/device.h>
  19#include <linux/memory.h>
  20#include <linux/kobject.h>
  21#include <linux/memory_hotplug.h>
  22#include <linux/mm.h>
  23#include <linux/mutex.h>
  24#include <linux/stat.h>
  25
  26#include <asm/atomic.h>
  27#include <asm/uaccess.h>
  28
  29#define MEMORY_CLASS_NAME       "memory"
  30
  31static struct sysdev_class memory_sysdev_class = {
  32        .name = MEMORY_CLASS_NAME,
  33};
  34
  35static const char *memory_uevent_name(struct kset *kset, struct kobject *kobj)
  36{
  37        return MEMORY_CLASS_NAME;
  38}
  39
  40static int memory_uevent(struct kset *kset, struct kobject *obj, struct kobj_uevent_env *env)
  41{
  42        int retval = 0;
  43
  44        return retval;
  45}
  46
  47static struct kset_uevent_ops memory_uevent_ops = {
  48        .name           = memory_uevent_name,
  49        .uevent         = memory_uevent,
  50};
  51
  52static BLOCKING_NOTIFIER_HEAD(memory_chain);
  53
  54int register_memory_notifier(struct notifier_block *nb)
  55{
  56        return blocking_notifier_chain_register(&memory_chain, nb);
  57}
  58EXPORT_SYMBOL(register_memory_notifier);
  59
  60void unregister_memory_notifier(struct notifier_block *nb)
  61{
  62        blocking_notifier_chain_unregister(&memory_chain, nb);
  63}
  64EXPORT_SYMBOL(unregister_memory_notifier);
  65
  66/*
  67 * register_memory - Setup a sysfs device for a memory block
  68 */
  69static
  70int register_memory(struct memory_block *memory, struct mem_section *section)
  71{
  72        int error;
  73
  74        memory->sysdev.cls = &memory_sysdev_class;
  75        memory->sysdev.id = __section_nr(section);
  76
  77        error = sysdev_register(&memory->sysdev);
  78        return error;
  79}
  80
  81static void
  82unregister_memory(struct memory_block *memory, struct mem_section *section)
  83{
  84        BUG_ON(memory->sysdev.cls != &memory_sysdev_class);
  85        BUG_ON(memory->sysdev.id != __section_nr(section));
  86
  87        /* drop the ref. we got in remove_memory_block() */
  88        kobject_put(&memory->sysdev.kobj);
  89        sysdev_unregister(&memory->sysdev);
  90}
  91
  92/*
  93 * use this as the physical section index that this memsection
  94 * uses.
  95 */
  96
  97static ssize_t show_mem_phys_index(struct sys_device *dev,
  98                        struct sysdev_attribute *attr, char *buf)
  99{
 100        struct memory_block *mem =
 101                container_of(dev, struct memory_block, sysdev);
 102        return sprintf(buf, "%08lx\n", mem->phys_index);
 103}
 104
 105/*
 106 * Show whether the section of memory is likely to be hot-removable
 107 */
 108static ssize_t show_mem_removable(struct sys_device *dev,
 109                        struct sysdev_attribute *attr, char *buf)
 110{
 111        unsigned long start_pfn;
 112        int ret;
 113        struct memory_block *mem =
 114                container_of(dev, struct memory_block, sysdev);
 115
 116        start_pfn = section_nr_to_pfn(mem->phys_index);
 117        ret = is_mem_section_removable(start_pfn, PAGES_PER_SECTION);
 118        return sprintf(buf, "%d\n", ret);
 119}
 120
 121/*
 122 * online, offline, going offline, etc.
 123 */
 124static ssize_t show_mem_state(struct sys_device *dev,
 125                        struct sysdev_attribute *attr, char *buf)
 126{
 127        struct memory_block *mem =
 128                container_of(dev, struct memory_block, sysdev);
 129        ssize_t len = 0;
 130
 131        /*
 132         * We can probably put these states in a nice little array
 133         * so that they're not open-coded
 134         */
 135        switch (mem->state) {
 136                case MEM_ONLINE:
 137                        len = sprintf(buf, "online\n");
 138                        break;
 139                case MEM_OFFLINE:
 140                        len = sprintf(buf, "offline\n");
 141                        break;
 142                case MEM_GOING_OFFLINE:
 143                        len = sprintf(buf, "going-offline\n");
 144                        break;
 145                default:
 146                        len = sprintf(buf, "ERROR-UNKNOWN-%ld\n",
 147                                        mem->state);
 148                        WARN_ON(1);
 149                        break;
 150        }
 151
 152        return len;
 153}
 154
 155int memory_notify(unsigned long val, void *v)
 156{
 157        return blocking_notifier_call_chain(&memory_chain, val, v);
 158}
 159
 160/*
 161 * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is
 162 * OK to have direct references to sparsemem variables in here.
 163 */
 164static int
 165memory_block_action(struct memory_block *mem, unsigned long action)
 166{
 167        int i;
 168        unsigned long psection;
 169        unsigned long start_pfn, start_paddr;
 170        struct page *first_page;
 171        int ret;
 172        int old_state = mem->state;
 173
 174        psection = mem->phys_index;
 175        first_page = pfn_to_page(psection << PFN_SECTION_SHIFT);
 176
 177        /*
 178         * The probe routines leave the pages reserved, just
 179         * as the bootmem code does.  Make sure they're still
 180         * that way.
 181         */
 182        if (action == MEM_ONLINE) {
 183                for (i = 0; i < PAGES_PER_SECTION; i++) {
 184                        if (PageReserved(first_page+i))
 185                                continue;
 186
 187                        printk(KERN_WARNING "section number %ld page number %d "
 188                                "not reserved, was it already online? \n",
 189                                psection, i);
 190                        return -EBUSY;
 191                }
 192        }
 193
 194        switch (action) {
 195                case MEM_ONLINE:
 196                        start_pfn = page_to_pfn(first_page);
 197                        ret = online_pages(start_pfn, PAGES_PER_SECTION);
 198                        break;
 199                case MEM_OFFLINE:
 200                        mem->state = MEM_GOING_OFFLINE;
 201                        start_paddr = page_to_pfn(first_page) << PAGE_SHIFT;
 202                        ret = remove_memory(start_paddr,
 203                                            PAGES_PER_SECTION << PAGE_SHIFT);
 204                        if (ret) {
 205                                mem->state = old_state;
 206                                break;
 207                        }
 208                        break;
 209                default:
 210                        WARN(1, KERN_WARNING "%s(%p, %ld) unknown action: %ld\n",
 211                                        __func__, mem, action, action);
 212                        ret = -EINVAL;
 213        }
 214
 215        return ret;
 216}
 217
 218static int memory_block_change_state(struct memory_block *mem,
 219                unsigned long to_state, unsigned long from_state_req)
 220{
 221        int ret = 0;
 222        mutex_lock(&mem->state_mutex);
 223
 224        if (mem->state != from_state_req) {
 225                ret = -EINVAL;
 226                goto out;
 227        }
 228
 229        ret = memory_block_action(mem, to_state);
 230        if (!ret)
 231                mem->state = to_state;
 232
 233out:
 234        mutex_unlock(&mem->state_mutex);
 235        return ret;
 236}
 237
 238static ssize_t
 239store_mem_state(struct sys_device *dev,
 240                struct sysdev_attribute *attr, const char *buf, size_t count)
 241{
 242        struct memory_block *mem;
 243        unsigned int phys_section_nr;
 244        int ret = -EINVAL;
 245
 246        mem = container_of(dev, struct memory_block, sysdev);
 247        phys_section_nr = mem->phys_index;
 248
 249        if (!present_section_nr(phys_section_nr))
 250                goto out;
 251
 252        if (!strncmp(buf, "online", min((int)count, 6)))
 253                ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
 254        else if(!strncmp(buf, "offline", min((int)count, 7)))
 255                ret = memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
 256out:
 257        if (ret)
 258                return ret;
 259        return count;
 260}
 261
 262/*
 263 * phys_device is a bad name for this.  What I really want
 264 * is a way to differentiate between memory ranges that
 265 * are part of physical devices that constitute
 266 * a complete removable unit or fru.
 267 * i.e. do these ranges belong to the same physical device,
 268 * s.t. if I offline all of these sections I can then
 269 * remove the physical device?
 270 */
 271static ssize_t show_phys_device(struct sys_device *dev,
 272                                struct sysdev_attribute *attr, char *buf)
 273{
 274        struct memory_block *mem =
 275                container_of(dev, struct memory_block, sysdev);
 276        return sprintf(buf, "%d\n", mem->phys_device);
 277}
 278
 279static SYSDEV_ATTR(phys_index, 0444, show_mem_phys_index, NULL);
 280static SYSDEV_ATTR(state, 0644, show_mem_state, store_mem_state);
 281static SYSDEV_ATTR(phys_device, 0444, show_phys_device, NULL);
 282static SYSDEV_ATTR(removable, 0444, show_mem_removable, NULL);
 283
 284#define mem_create_simple_file(mem, attr_name)  \
 285        sysdev_create_file(&mem->sysdev, &attr_##attr_name)
 286#define mem_remove_simple_file(mem, attr_name)  \
 287        sysdev_remove_file(&mem->sysdev, &attr_##attr_name)
 288
 289/*
 290 * Block size attribute stuff
 291 */
 292static ssize_t
 293print_block_size(struct class *class, char *buf)
 294{
 295        return sprintf(buf, "%lx\n", (unsigned long)PAGES_PER_SECTION * PAGE_SIZE);
 296}
 297
 298static CLASS_ATTR(block_size_bytes, 0444, print_block_size, NULL);
 299
 300static int block_size_init(void)
 301{
 302        return sysfs_create_file(&memory_sysdev_class.kset.kobj,
 303                                &class_attr_block_size_bytes.attr);
 304}
 305
 306/*
 307 * Some architectures will have custom drivers to do this, and
 308 * will not need to do it from userspace.  The fake hot-add code
 309 * as well as ppc64 will do all of their discovery in userspace
 310 * and will require this interface.
 311 */
 312#ifdef CONFIG_ARCH_MEMORY_PROBE
 313static ssize_t
 314memory_probe_store(struct class *class, const char *buf, size_t count)
 315{
 316        u64 phys_addr;
 317        int nid;
 318        int ret;
 319
 320        phys_addr = simple_strtoull(buf, NULL, 0);
 321
 322        nid = memory_add_physaddr_to_nid(phys_addr);
 323        ret = add_memory(nid, phys_addr, PAGES_PER_SECTION << PAGE_SHIFT);
 324
 325        if (ret)
 326                count = ret;
 327
 328        return count;
 329}
 330static CLASS_ATTR(probe, S_IWUSR, NULL, memory_probe_store);
 331
 332static int memory_probe_init(void)
 333{
 334        return sysfs_create_file(&memory_sysdev_class.kset.kobj,
 335                                &class_attr_probe.attr);
 336}
 337#else
 338static inline int memory_probe_init(void)
 339{
 340        return 0;
 341}
 342#endif
 343
 344/*
 345 * Note that phys_device is optional.  It is here to allow for
 346 * differentiation between which *physical* devices each
 347 * section belongs to...
 348 */
 349
 350static int add_memory_block(int nid, struct mem_section *section,
 351                        unsigned long state, int phys_device,
 352                        enum mem_add_context context)
 353{
 354        struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
 355        int ret = 0;
 356
 357        if (!mem)
 358                return -ENOMEM;
 359
 360        mem->phys_index = __section_nr(section);
 361        mem->state = state;
 362        mutex_init(&mem->state_mutex);
 363        mem->phys_device = phys_device;
 364
 365        ret = register_memory(mem, section);
 366        if (!ret)
 367                ret = mem_create_simple_file(mem, phys_index);
 368        if (!ret)
 369                ret = mem_create_simple_file(mem, state);
 370        if (!ret)
 371                ret = mem_create_simple_file(mem, phys_device);
 372        if (!ret)
 373                ret = mem_create_simple_file(mem, removable);
 374        if (!ret) {
 375                if (context == HOTPLUG)
 376                        ret = register_mem_sect_under_node(mem, nid);
 377        }
 378
 379        return ret;
 380}
 381
 382/*
 383 * For now, we have a linear search to go find the appropriate
 384 * memory_block corresponding to a particular phys_index. If
 385 * this gets to be a real problem, we can always use a radix
 386 * tree or something here.
 387 *
 388 * This could be made generic for all sysdev classes.
 389 */
 390struct memory_block *find_memory_block(struct mem_section *section)
 391{
 392        struct kobject *kobj;
 393        struct sys_device *sysdev;
 394        struct memory_block *mem;
 395        char name[sizeof(MEMORY_CLASS_NAME) + 9 + 1];
 396
 397        /*
 398         * This only works because we know that section == sysdev->id
 399         * slightly redundant with sysdev_register()
 400         */
 401        sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, __section_nr(section));
 402
 403        kobj = kset_find_obj(&memory_sysdev_class.kset, name);
 404        if (!kobj)
 405                return NULL;
 406
 407        sysdev = container_of(kobj, struct sys_device, kobj);
 408        mem = container_of(sysdev, struct memory_block, sysdev);
 409
 410        return mem;
 411}
 412
 413int remove_memory_block(unsigned long node_id, struct mem_section *section,
 414                int phys_device)
 415{
 416        struct memory_block *mem;
 417
 418        mem = find_memory_block(section);
 419        unregister_mem_sect_under_nodes(mem);
 420        mem_remove_simple_file(mem, phys_index);
 421        mem_remove_simple_file(mem, state);
 422        mem_remove_simple_file(mem, phys_device);
 423        mem_remove_simple_file(mem, removable);
 424        unregister_memory(mem, section);
 425
 426        return 0;
 427}
 428
 429/*
 430 * need an interface for the VM to add new memory regions,
 431 * but without onlining it.
 432 */
 433int register_new_memory(int nid, struct mem_section *section)
 434{
 435        return add_memory_block(nid, section, MEM_OFFLINE, 0, HOTPLUG);
 436}
 437
 438int unregister_memory_section(struct mem_section *section)
 439{
 440        if (!present_section(section))
 441                return -EINVAL;
 442
 443        return remove_memory_block(0, section, 0);
 444}
 445
 446/*
 447 * Initialize the sysfs support for memory devices...
 448 */
 449int __init memory_dev_init(void)
 450{
 451        unsigned int i;
 452        int ret;
 453        int err;
 454
 455        memory_sysdev_class.kset.uevent_ops = &memory_uevent_ops;
 456        ret = sysdev_class_register(&memory_sysdev_class);
 457        if (ret)
 458                goto out;
 459
 460        /*
 461         * Create entries for memory sections that were found
 462         * during boot and have been initialized
 463         */
 464        for (i = 0; i < NR_MEM_SECTIONS; i++) {
 465                if (!present_section_nr(i))
 466                        continue;
 467                err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE,
 468                                        0, BOOT);
 469                if (!ret)
 470                        ret = err;
 471        }
 472
 473        err = memory_probe_init();
 474        if (!ret)
 475                ret = err;
 476        err = block_size_init();
 477        if (!ret)
 478                ret = err;
 479out:
 480        if (ret)
 481                printk(KERN_ERR "%s() failed: %d\n", __func__, ret);
 482        return ret;
 483}
 484