linux/drivers/base/memory.c
<<
>>
Prefs
   1/*
   2 * drivers/base/memory.c - basic Memory class support
   3 *
   4 * Written by Matt Tolentino <matthew.e.tolentino@intel.com>
   5 *            Dave Hansen <haveblue@us.ibm.com>
   6 *
   7 * This file provides the necessary infrastructure to represent
   8 * a SPARSEMEM-memory-model system's physical memory in /sysfs.
   9 * All arch-independent code that assumes MEMORY_HOTPLUG requires
  10 * SPARSEMEM should be contained here, or in mm/memory_hotplug.c.
  11 */
  12
  13#include <linux/sysdev.h>
  14#include <linux/module.h>
  15#include <linux/init.h>
  16#include <linux/topology.h>
  17#include <linux/capability.h>
  18#include <linux/device.h>
  19#include <linux/memory.h>
  20#include <linux/kobject.h>
  21#include <linux/memory_hotplug.h>
  22#include <linux/mm.h>
  23#include <asm/atomic.h>
  24#include <asm/uaccess.h>
  25
  26#define MEMORY_CLASS_NAME       "memory"
  27
  28static struct sysdev_class memory_sysdev_class = {
  29        set_kset_name(MEMORY_CLASS_NAME),
  30};
  31
  32static const char *memory_uevent_name(struct kset *kset, struct kobject *kobj)
  33{
  34        return MEMORY_CLASS_NAME;
  35}
  36
  37static int memory_uevent(struct kset *kset, struct kobject *obj, struct kobj_uevent_env *env)
  38{
  39        int retval = 0;
  40
  41        return retval;
  42}
  43
  44static struct kset_uevent_ops memory_uevent_ops = {
  45        .name           = memory_uevent_name,
  46        .uevent         = memory_uevent,
  47};
  48
  49static BLOCKING_NOTIFIER_HEAD(memory_chain);
  50
  51int register_memory_notifier(struct notifier_block *nb)
  52{
  53        return blocking_notifier_chain_register(&memory_chain, nb);
  54}
  55
  56void unregister_memory_notifier(struct notifier_block *nb)
  57{
  58        blocking_notifier_chain_unregister(&memory_chain, nb);
  59}
  60
  61/*
  62 * register_memory - Setup a sysfs device for a memory block
  63 */
  64int register_memory(struct memory_block *memory, struct mem_section *section,
  65                struct node *root)
  66{
  67        int error;
  68
  69        memory->sysdev.cls = &memory_sysdev_class;
  70        memory->sysdev.id = __section_nr(section);
  71
  72        error = sysdev_register(&memory->sysdev);
  73
  74        if (root && !error)
  75                error = sysfs_create_link(&root->sysdev.kobj,
  76                                          &memory->sysdev.kobj,
  77                                          kobject_name(&memory->sysdev.kobj));
  78
  79        return error;
  80}
  81
  82static void
  83unregister_memory(struct memory_block *memory, struct mem_section *section,
  84                struct node *root)
  85{
  86        BUG_ON(memory->sysdev.cls != &memory_sysdev_class);
  87        BUG_ON(memory->sysdev.id != __section_nr(section));
  88
  89        sysdev_unregister(&memory->sysdev);
  90        if (root)
  91                sysfs_remove_link(&root->sysdev.kobj,
  92                                  kobject_name(&memory->sysdev.kobj));
  93}
  94
  95/*
  96 * use this as the physical section index that this memsection
  97 * uses.
  98 */
  99
 100static ssize_t show_mem_phys_index(struct sys_device *dev, char *buf)
 101{
 102        struct memory_block *mem =
 103                container_of(dev, struct memory_block, sysdev);
 104        return sprintf(buf, "%08lx\n", mem->phys_index);
 105}
 106
 107/*
 108 * online, offline, going offline, etc.
 109 */
 110static ssize_t show_mem_state(struct sys_device *dev, char *buf)
 111{
 112        struct memory_block *mem =
 113                container_of(dev, struct memory_block, sysdev);
 114        ssize_t len = 0;
 115
 116        /*
 117         * We can probably put these states in a nice little array
 118         * so that they're not open-coded
 119         */
 120        switch (mem->state) {
 121                case MEM_ONLINE:
 122                        len = sprintf(buf, "online\n");
 123                        break;
 124                case MEM_OFFLINE:
 125                        len = sprintf(buf, "offline\n");
 126                        break;
 127                case MEM_GOING_OFFLINE:
 128                        len = sprintf(buf, "going-offline\n");
 129                        break;
 130                default:
 131                        len = sprintf(buf, "ERROR-UNKNOWN-%ld\n",
 132                                        mem->state);
 133                        WARN_ON(1);
 134                        break;
 135        }
 136
 137        return len;
 138}
 139
 140int memory_notify(unsigned long val, void *v)
 141{
 142        return blocking_notifier_call_chain(&memory_chain, val, v);
 143}
 144
 145/*
 146 * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is
 147 * OK to have direct references to sparsemem variables in here.
 148 */
 149static int
 150memory_block_action(struct memory_block *mem, unsigned long action)
 151{
 152        int i;
 153        unsigned long psection;
 154        unsigned long start_pfn, start_paddr;
 155        struct page *first_page;
 156        int ret;
 157        int old_state = mem->state;
 158
 159        psection = mem->phys_index;
 160        first_page = pfn_to_page(psection << PFN_SECTION_SHIFT);
 161
 162        /*
 163         * The probe routines leave the pages reserved, just
 164         * as the bootmem code does.  Make sure they're still
 165         * that way.
 166         */
 167        if (action == MEM_ONLINE) {
 168                for (i = 0; i < PAGES_PER_SECTION; i++) {
 169                        if (PageReserved(first_page+i))
 170                                continue;
 171
 172                        printk(KERN_WARNING "section number %ld page number %d "
 173                                "not reserved, was it already online? \n",
 174                                psection, i);
 175                        return -EBUSY;
 176                }
 177        }
 178
 179        switch (action) {
 180                case MEM_ONLINE:
 181                        start_pfn = page_to_pfn(first_page);
 182                        ret = online_pages(start_pfn, PAGES_PER_SECTION);
 183                        break;
 184                case MEM_OFFLINE:
 185                        mem->state = MEM_GOING_OFFLINE;
 186                        start_paddr = page_to_pfn(first_page) << PAGE_SHIFT;
 187                        ret = remove_memory(start_paddr,
 188                                            PAGES_PER_SECTION << PAGE_SHIFT);
 189                        if (ret) {
 190                                mem->state = old_state;
 191                                break;
 192                        }
 193                        break;
 194                default:
 195                        printk(KERN_WARNING "%s(%p, %ld) unknown action: %ld\n",
 196                                        __FUNCTION__, mem, action, action);
 197                        WARN_ON(1);
 198                        ret = -EINVAL;
 199        }
 200
 201        return ret;
 202}
 203
 204static int memory_block_change_state(struct memory_block *mem,
 205                unsigned long to_state, unsigned long from_state_req)
 206{
 207        int ret = 0;
 208        down(&mem->state_sem);
 209
 210        if (mem->state != from_state_req) {
 211                ret = -EINVAL;
 212                goto out;
 213        }
 214
 215        ret = memory_block_action(mem, to_state);
 216        if (!ret)
 217                mem->state = to_state;
 218
 219out:
 220        up(&mem->state_sem);
 221        return ret;
 222}
 223
 224static ssize_t
 225store_mem_state(struct sys_device *dev, const char *buf, size_t count)
 226{
 227        struct memory_block *mem;
 228        unsigned int phys_section_nr;
 229        int ret = -EINVAL;
 230
 231        mem = container_of(dev, struct memory_block, sysdev);
 232        phys_section_nr = mem->phys_index;
 233
 234        if (!present_section_nr(phys_section_nr))
 235                goto out;
 236
 237        if (!strncmp(buf, "online", min((int)count, 6)))
 238                ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
 239        else if(!strncmp(buf, "offline", min((int)count, 7)))
 240                ret = memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
 241out:
 242        if (ret)
 243                return ret;
 244        return count;
 245}
 246
 247/*
 248 * phys_device is a bad name for this.  What I really want
 249 * is a way to differentiate between memory ranges that
 250 * are part of physical devices that constitute
 251 * a complete removable unit or fru.
 252 * i.e. do these ranges belong to the same physical device,
 253 * s.t. if I offline all of these sections I can then
 254 * remove the physical device?
 255 */
 256static ssize_t show_phys_device(struct sys_device *dev, char *buf)
 257{
 258        struct memory_block *mem =
 259                container_of(dev, struct memory_block, sysdev);
 260        return sprintf(buf, "%d\n", mem->phys_device);
 261}
 262
 263static SYSDEV_ATTR(phys_index, 0444, show_mem_phys_index, NULL);
 264static SYSDEV_ATTR(state, 0644, show_mem_state, store_mem_state);
 265static SYSDEV_ATTR(phys_device, 0444, show_phys_device, NULL);
 266
 267#define mem_create_simple_file(mem, attr_name)  \
 268        sysdev_create_file(&mem->sysdev, &attr_##attr_name)
 269#define mem_remove_simple_file(mem, attr_name)  \
 270        sysdev_remove_file(&mem->sysdev, &attr_##attr_name)
 271
 272/*
 273 * Block size attribute stuff
 274 */
 275static ssize_t
 276print_block_size(struct class *class, char *buf)
 277{
 278        return sprintf(buf, "%lx\n", (unsigned long)PAGES_PER_SECTION * PAGE_SIZE);
 279}
 280
 281static CLASS_ATTR(block_size_bytes, 0444, print_block_size, NULL);
 282
 283static int block_size_init(void)
 284{
 285        return sysfs_create_file(&memory_sysdev_class.kset.kobj,
 286                                &class_attr_block_size_bytes.attr);
 287}
 288
 289/*
 290 * Some architectures will have custom drivers to do this, and
 291 * will not need to do it from userspace.  The fake hot-add code
 292 * as well as ppc64 will do all of their discovery in userspace
 293 * and will require this interface.
 294 */
 295#ifdef CONFIG_ARCH_MEMORY_PROBE
 296static ssize_t
 297memory_probe_store(struct class *class, const char *buf, size_t count)
 298{
 299        u64 phys_addr;
 300        int nid;
 301        int ret;
 302
 303        phys_addr = simple_strtoull(buf, NULL, 0);
 304
 305        nid = memory_add_physaddr_to_nid(phys_addr);
 306        ret = add_memory(nid, phys_addr, PAGES_PER_SECTION << PAGE_SHIFT);
 307
 308        if (ret)
 309                count = ret;
 310
 311        return count;
 312}
 313static CLASS_ATTR(probe, 0700, NULL, memory_probe_store);
 314
 315static int memory_probe_init(void)
 316{
 317        return sysfs_create_file(&memory_sysdev_class.kset.kobj,
 318                                &class_attr_probe.attr);
 319}
 320#else
 321static inline int memory_probe_init(void)
 322{
 323        return 0;
 324}
 325#endif
 326
 327/*
 328 * Note that phys_device is optional.  It is here to allow for
 329 * differentiation between which *physical* devices each
 330 * section belongs to...
 331 */
 332
 333static int add_memory_block(unsigned long node_id, struct mem_section *section,
 334                     unsigned long state, int phys_device)
 335{
 336        struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
 337        int ret = 0;
 338
 339        if (!mem)
 340                return -ENOMEM;
 341
 342        mem->phys_index = __section_nr(section);
 343        mem->state = state;
 344        init_MUTEX(&mem->state_sem);
 345        mem->phys_device = phys_device;
 346
 347        ret = register_memory(mem, section, NULL);
 348        if (!ret)
 349                ret = mem_create_simple_file(mem, phys_index);
 350        if (!ret)
 351                ret = mem_create_simple_file(mem, state);
 352        if (!ret)
 353                ret = mem_create_simple_file(mem, phys_device);
 354
 355        return ret;
 356}
 357
 358/*
 359 * For now, we have a linear search to go find the appropriate
 360 * memory_block corresponding to a particular phys_index. If
 361 * this gets to be a real problem, we can always use a radix
 362 * tree or something here.
 363 *
 364 * This could be made generic for all sysdev classes.
 365 */
 366static struct memory_block *find_memory_block(struct mem_section *section)
 367{
 368        struct kobject *kobj;
 369        struct sys_device *sysdev;
 370        struct memory_block *mem;
 371        char name[sizeof(MEMORY_CLASS_NAME) + 9 + 1];
 372
 373        /*
 374         * This only works because we know that section == sysdev->id
 375         * slightly redundant with sysdev_register()
 376         */
 377        sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, __section_nr(section));
 378
 379        kobj = kset_find_obj(&memory_sysdev_class.kset, name);
 380        if (!kobj)
 381                return NULL;
 382
 383        sysdev = container_of(kobj, struct sys_device, kobj);
 384        mem = container_of(sysdev, struct memory_block, sysdev);
 385
 386        return mem;
 387}
 388
 389int remove_memory_block(unsigned long node_id, struct mem_section *section,
 390                int phys_device)
 391{
 392        struct memory_block *mem;
 393
 394        mem = find_memory_block(section);
 395        mem_remove_simple_file(mem, phys_index);
 396        mem_remove_simple_file(mem, state);
 397        mem_remove_simple_file(mem, phys_device);
 398        unregister_memory(mem, section, NULL);
 399
 400        return 0;
 401}
 402
 403/*
 404 * need an interface for the VM to add new memory regions,
 405 * but without onlining it.
 406 */
 407int register_new_memory(struct mem_section *section)
 408{
 409        return add_memory_block(0, section, MEM_OFFLINE, 0);
 410}
 411
 412int unregister_memory_section(struct mem_section *section)
 413{
 414        if (!present_section(section))
 415                return -EINVAL;
 416
 417        return remove_memory_block(0, section, 0);
 418}
 419
 420/*
 421 * Initialize the sysfs support for memory devices...
 422 */
 423int __init memory_dev_init(void)
 424{
 425        unsigned int i;
 426        int ret;
 427        int err;
 428
 429        memory_sysdev_class.kset.uevent_ops = &memory_uevent_ops;
 430        ret = sysdev_class_register(&memory_sysdev_class);
 431        if (ret)
 432                goto out;
 433
 434        /*
 435         * Create entries for memory sections that were found
 436         * during boot and have been initialized
 437         */
 438        for (i = 0; i < NR_MEM_SECTIONS; i++) {
 439                if (!present_section_nr(i))
 440                        continue;
 441                err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE, 0);
 442                if (!ret)
 443                        ret = err;
 444        }
 445
 446        err = memory_probe_init();
 447        if (!ret)
 448                ret = err;
 449        err = block_size_init();
 450        if (!ret)
 451                ret = err;
 452out:
 453        if (ret)
 454                printk(KERN_ERR "%s() failed: %d\n", __FUNCTION__, ret);
 455        return ret;
 456}
 457