linux/arch/powerpc/platforms/powernv/memtrace.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Copyright (C) IBM Corporation, 2014, 2017
   4 * Anton Blanchard, Rashmica Gupta.
   5 */
   6
   7#define pr_fmt(fmt) "memtrace: " fmt
   8
   9#include <linux/bitops.h>
  10#include <linux/string.h>
  11#include <linux/memblock.h>
  12#include <linux/init.h>
  13#include <linux/moduleparam.h>
  14#include <linux/fs.h>
  15#include <linux/debugfs.h>
  16#include <linux/slab.h>
  17#include <linux/memory.h>
  18#include <linux/memory_hotplug.h>
  19#include <linux/numa.h>
  20#include <asm/machdep.h>
  21#include <asm/debugfs.h>
  22
  23/* This enables us to keep track of the memory removed from each node. */
  24struct memtrace_entry {
  25        void *mem;
  26        u64 start;
  27        u64 size;
  28        u32 nid;
  29        struct dentry *dir;
  30        char name[16];
  31};
  32
  33static u64 memtrace_size;
  34
  35static struct memtrace_entry *memtrace_array;
  36static unsigned int memtrace_array_nr;
  37
  38
  39static ssize_t memtrace_read(struct file *filp, char __user *ubuf,
  40                             size_t count, loff_t *ppos)
  41{
  42        struct memtrace_entry *ent = filp->private_data;
  43
  44        return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size);
  45}
  46
  47static const struct file_operations memtrace_fops = {
  48        .llseek = default_llseek,
  49        .read   = memtrace_read,
  50        .open   = simple_open,
  51};
  52
  53static int check_memblock_online(struct memory_block *mem, void *arg)
  54{
  55        if (mem->state != MEM_ONLINE)
  56                return -1;
  57
  58        return 0;
  59}
  60
  61static int change_memblock_state(struct memory_block *mem, void *arg)
  62{
  63        unsigned long state = (unsigned long)arg;
  64
  65        mem->state = state;
  66
  67        return 0;
  68}
  69
  70/* called with device_hotplug_lock held */
  71static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages)
  72{
  73        const unsigned long start = PFN_PHYS(start_pfn);
  74        const unsigned long size = PFN_PHYS(nr_pages);
  75
  76        if (walk_memory_blocks(start, size, NULL, check_memblock_online))
  77                return false;
  78
  79        walk_memory_blocks(start, size, (void *)MEM_GOING_OFFLINE,
  80                           change_memblock_state);
  81
  82        if (offline_pages(start_pfn, nr_pages)) {
  83                walk_memory_blocks(start, size, (void *)MEM_ONLINE,
  84                                   change_memblock_state);
  85                return false;
  86        }
  87
  88        walk_memory_blocks(start, size, (void *)MEM_OFFLINE,
  89                           change_memblock_state);
  90
  91
  92        return true;
  93}
  94
  95static u64 memtrace_alloc_node(u32 nid, u64 size)
  96{
  97        u64 start_pfn, end_pfn, nr_pages, pfn;
  98        u64 base_pfn;
  99        u64 bytes = memory_block_size_bytes();
 100
 101        if (!node_spanned_pages(nid))
 102                return 0;
 103
 104        start_pfn = node_start_pfn(nid);
 105        end_pfn = node_end_pfn(nid);
 106        nr_pages = size >> PAGE_SHIFT;
 107
 108        /* Trace memory needs to be aligned to the size */
 109        end_pfn = round_down(end_pfn - nr_pages, nr_pages);
 110
 111        lock_device_hotplug();
 112        for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) {
 113                if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) {
 114                        /*
 115                         * Remove memory in memory block size chunks so that
 116                         * iomem resources are always split to the same size and
 117                         * we never try to remove memory that spans two iomem
 118                         * resources.
 119                         */
 120                        end_pfn = base_pfn + nr_pages;
 121                        for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) {
 122                                __remove_memory(nid, pfn << PAGE_SHIFT, bytes);
 123                        }
 124                        unlock_device_hotplug();
 125                        return base_pfn << PAGE_SHIFT;
 126                }
 127        }
 128        unlock_device_hotplug();
 129
 130        return 0;
 131}
 132
 133static int memtrace_init_regions_runtime(u64 size)
 134{
 135        u32 nid;
 136        u64 m;
 137
 138        memtrace_array = kcalloc(num_online_nodes(),
 139                                sizeof(struct memtrace_entry), GFP_KERNEL);
 140        if (!memtrace_array) {
 141                pr_err("Failed to allocate memtrace_array\n");
 142                return -EINVAL;
 143        }
 144
 145        for_each_online_node(nid) {
 146                m = memtrace_alloc_node(nid, size);
 147
 148                /*
 149                 * A node might not have any local memory, so warn but
 150                 * continue on.
 151                 */
 152                if (!m) {
 153                        pr_err("Failed to allocate trace memory on node %d\n", nid);
 154                        continue;
 155                }
 156
 157                pr_info("Allocated trace memory on node %d at 0x%016llx\n", nid, m);
 158
 159                memtrace_array[memtrace_array_nr].start = m;
 160                memtrace_array[memtrace_array_nr].size = size;
 161                memtrace_array[memtrace_array_nr].nid = nid;
 162                memtrace_array_nr++;
 163        }
 164
 165        return 0;
 166}
 167
 168static struct dentry *memtrace_debugfs_dir;
 169
 170static int memtrace_init_debugfs(void)
 171{
 172        int ret = 0;
 173        int i;
 174
 175        for (i = 0; i < memtrace_array_nr; i++) {
 176                struct dentry *dir;
 177                struct memtrace_entry *ent = &memtrace_array[i];
 178
 179                ent->mem = ioremap(ent->start, ent->size);
 180                /* Warn but continue on */
 181                if (!ent->mem) {
 182                        pr_err("Failed to map trace memory at 0x%llx\n",
 183                                 ent->start);
 184                        ret = -1;
 185                        continue;
 186                }
 187
 188                snprintf(ent->name, 16, "%08x", ent->nid);
 189                dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir);
 190                if (!dir) {
 191                        pr_err("Failed to create debugfs directory for node %d\n",
 192                                ent->nid);
 193                        return -1;
 194                }
 195
 196                ent->dir = dir;
 197                debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops);
 198                debugfs_create_x64("start", 0400, dir, &ent->start);
 199                debugfs_create_x64("size", 0400, dir, &ent->size);
 200        }
 201
 202        return ret;
 203}
 204
 205static int online_mem_block(struct memory_block *mem, void *arg)
 206{
 207        return device_online(&mem->dev);
 208}
 209
 210/*
 211 * Iterate through the chunks of memory we have removed from the kernel
 212 * and attempt to add them back to the kernel.
 213 */
 214static int memtrace_online(void)
 215{
 216        int i, ret = 0;
 217        struct memtrace_entry *ent;
 218
 219        for (i = memtrace_array_nr - 1; i >= 0; i--) {
 220                ent = &memtrace_array[i];
 221
 222                /* We have onlined this chunk previously */
 223                if (ent->nid == NUMA_NO_NODE)
 224                        continue;
 225
 226                /* Remove from io mappings */
 227                if (ent->mem) {
 228                        iounmap(ent->mem);
 229                        ent->mem = 0;
 230                }
 231
 232                if (add_memory(ent->nid, ent->start, ent->size)) {
 233                        pr_err("Failed to add trace memory to node %d\n",
 234                                ent->nid);
 235                        ret += 1;
 236                        continue;
 237                }
 238
 239                /*
 240                 * If kernel isn't compiled with the auto online option
 241                 * we need to online the memory ourselves.
 242                 */
 243                if (!memhp_auto_online) {
 244                        lock_device_hotplug();
 245                        walk_memory_blocks(ent->start, ent->size, NULL,
 246                                           online_mem_block);
 247                        unlock_device_hotplug();
 248                }
 249
 250                /*
 251                 * Memory was added successfully so clean up references to it
 252                 * so on reentry we can tell that this chunk was added.
 253                 */
 254                debugfs_remove_recursive(ent->dir);
 255                pr_info("Added trace memory back to node %d\n", ent->nid);
 256                ent->size = ent->start = ent->nid = NUMA_NO_NODE;
 257        }
 258        if (ret)
 259                return ret;
 260
 261        /* If all chunks of memory were added successfully, reset globals */
 262        kfree(memtrace_array);
 263        memtrace_array = NULL;
 264        memtrace_size = 0;
 265        memtrace_array_nr = 0;
 266        return 0;
 267}
 268
 269static int memtrace_enable_set(void *data, u64 val)
 270{
 271        u64 bytes;
 272
 273        /*
 274         * Don't attempt to do anything if size isn't aligned to a memory
 275         * block or equal to zero.
 276         */
 277        bytes = memory_block_size_bytes();
 278        if (val & (bytes - 1)) {
 279                pr_err("Value must be aligned with 0x%llx\n", bytes);
 280                return -EINVAL;
 281        }
 282
 283        /* Re-add/online previously removed/offlined memory */
 284        if (memtrace_size) {
 285                if (memtrace_online())
 286                        return -EAGAIN;
 287        }
 288
 289        if (!val)
 290                return 0;
 291
 292        /* Offline and remove memory */
 293        if (memtrace_init_regions_runtime(val))
 294                return -EINVAL;
 295
 296        if (memtrace_init_debugfs())
 297                return -EINVAL;
 298
 299        memtrace_size = val;
 300
 301        return 0;
 302}
 303
 304static int memtrace_enable_get(void *data, u64 *val)
 305{
 306        *val = memtrace_size;
 307        return 0;
 308}
 309
 310DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get,
 311                                        memtrace_enable_set, "0x%016llx\n");
 312
 313static int memtrace_init(void)
 314{
 315        memtrace_debugfs_dir = debugfs_create_dir("memtrace",
 316                                                  powerpc_debugfs_root);
 317        if (!memtrace_debugfs_dir)
 318                return -1;
 319
 320        debugfs_create_file("enable", 0600, memtrace_debugfs_dir,
 321                            NULL, &memtrace_init_fops);
 322
 323        return 0;
 324}
 325machine_device_initcall(powernv, memtrace_init);
 326