linux/arch/powerpc/platforms/powernv/memtrace.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) IBM Corporation, 2014, 2017
   3 * Anton Blanchard, Rashmica Gupta.
   4 *
   5 * This program is free software; you can redistribute it and/or modify
   6 * it under the terms of the GNU General Public License as published by
   7 * the Free Software Foundation; either version 2 of the License, or
   8 * (at your option) any later version.
   9 */
  10
  11#define pr_fmt(fmt) "memtrace: " fmt
  12
  13#include <linux/bitops.h>
  14#include <linux/string.h>
  15#include <linux/memblock.h>
  16#include <linux/init.h>
  17#include <linux/moduleparam.h>
  18#include <linux/fs.h>
  19#include <linux/debugfs.h>
  20#include <linux/slab.h>
  21#include <linux/memory.h>
  22#include <linux/memory_hotplug.h>
  23#include <asm/machdep.h>
  24#include <asm/debugfs.h>
  25
  26/* This enables us to keep track of the memory removed from each node. */
  27struct memtrace_entry {
  28        void *mem;
  29        u64 start;
  30        u64 size;
  31        u32 nid;
  32        struct dentry *dir;
  33        char name[16];
  34};
  35
  36static u64 memtrace_size;
  37
  38static struct memtrace_entry *memtrace_array;
  39static unsigned int memtrace_array_nr;
  40
  41
  42static ssize_t memtrace_read(struct file *filp, char __user *ubuf,
  43                             size_t count, loff_t *ppos)
  44{
  45        struct memtrace_entry *ent = filp->private_data;
  46
  47        return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size);
  48}
  49
  50static bool valid_memtrace_range(struct memtrace_entry *dev,
  51                                 unsigned long start, unsigned long size)
  52{
  53        if ((start >= dev->start) &&
  54            ((start + size) <= (dev->start + dev->size)))
  55                return true;
  56
  57        return false;
  58}
  59
  60static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma)
  61{
  62        unsigned long size = vma->vm_end - vma->vm_start;
  63        struct memtrace_entry *dev = filp->private_data;
  64
  65        if (!valid_memtrace_range(dev, vma->vm_pgoff << PAGE_SHIFT, size))
  66                return -EINVAL;
  67
  68        vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
  69
  70        if (remap_pfn_range(vma, vma->vm_start,
  71                            vma->vm_pgoff + (dev->start >> PAGE_SHIFT),
  72                            size, vma->vm_page_prot))
  73                return -EAGAIN;
  74
  75        return 0;
  76}
  77
  78static const struct file_operations memtrace_fops = {
  79        .llseek = default_llseek,
  80        .read   = memtrace_read,
  81        .mmap   = memtrace_mmap,
  82        .open   = simple_open,
  83};
  84
  85static void flush_memory_region(u64 base, u64 size)
  86{
  87        unsigned long line_size = ppc64_caches.l1d.size;
  88        u64 end = base + size;
  89        u64 addr;
  90
  91        base = round_down(base, line_size);
  92        end = round_up(end, line_size);
  93
  94        for (addr = base; addr < end; addr += line_size)
  95                asm volatile("dcbf 0,%0" : "=r" (addr) :: "memory");
  96}
  97
  98static int check_memblock_online(struct memory_block *mem, void *arg)
  99{
 100        if (mem->state != MEM_ONLINE)
 101                return -1;
 102
 103        return 0;
 104}
 105
 106static int change_memblock_state(struct memory_block *mem, void *arg)
 107{
 108        unsigned long state = (unsigned long)arg;
 109
 110        mem->state = state;
 111
 112        return 0;
 113}
 114
 115static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages)
 116{
 117        u64 end_pfn = start_pfn + nr_pages - 1;
 118
 119        if (walk_memory_range(start_pfn, end_pfn, NULL,
 120            check_memblock_online))
 121                return false;
 122
 123        walk_memory_range(start_pfn, end_pfn, (void *)MEM_GOING_OFFLINE,
 124                          change_memblock_state);
 125
 126        if (offline_pages(start_pfn, nr_pages)) {
 127                walk_memory_range(start_pfn, end_pfn, (void *)MEM_ONLINE,
 128                                  change_memblock_state);
 129                return false;
 130        }
 131
 132        walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE,
 133                          change_memblock_state);
 134
 135        /* RCU grace period? */
 136        flush_memory_region((u64)__va(start_pfn << PAGE_SHIFT),
 137                            nr_pages << PAGE_SHIFT);
 138
 139        lock_device_hotplug();
 140        remove_memory(nid, start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
 141        unlock_device_hotplug();
 142
 143        return true;
 144}
 145
 146static u64 memtrace_alloc_node(u32 nid, u64 size)
 147{
 148        u64 start_pfn, end_pfn, nr_pages;
 149        u64 base_pfn;
 150
 151        if (!NODE_DATA(nid) || !node_spanned_pages(nid))
 152                return 0;
 153
 154        start_pfn = node_start_pfn(nid);
 155        end_pfn = node_end_pfn(nid);
 156        nr_pages = size >> PAGE_SHIFT;
 157
 158        /* Trace memory needs to be aligned to the size */
 159        end_pfn = round_down(end_pfn - nr_pages, nr_pages);
 160
 161        for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) {
 162                if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true)
 163                        return base_pfn << PAGE_SHIFT;
 164        }
 165
 166        return 0;
 167}
 168
 169static int memtrace_init_regions_runtime(u64 size)
 170{
 171        u32 nid;
 172        u64 m;
 173
 174        memtrace_array = kcalloc(num_online_nodes(),
 175                                sizeof(struct memtrace_entry), GFP_KERNEL);
 176        if (!memtrace_array) {
 177                pr_err("Failed to allocate memtrace_array\n");
 178                return -EINVAL;
 179        }
 180
 181        for_each_online_node(nid) {
 182                m = memtrace_alloc_node(nid, size);
 183
 184                /*
 185                 * A node might not have any local memory, so warn but
 186                 * continue on.
 187                 */
 188                if (!m) {
 189                        pr_err("Failed to allocate trace memory on node %d\n", nid);
 190                        continue;
 191                }
 192
 193                pr_info("Allocated trace memory on node %d at 0x%016llx\n", nid, m);
 194
 195                memtrace_array[memtrace_array_nr].start = m;
 196                memtrace_array[memtrace_array_nr].size = size;
 197                memtrace_array[memtrace_array_nr].nid = nid;
 198                memtrace_array_nr++;
 199        }
 200
 201        return 0;
 202}
 203
 204static struct dentry *memtrace_debugfs_dir;
 205
 206static int memtrace_init_debugfs(void)
 207{
 208        int ret = 0;
 209        int i;
 210
 211        for (i = 0; i < memtrace_array_nr; i++) {
 212                struct dentry *dir;
 213                struct memtrace_entry *ent = &memtrace_array[i];
 214
 215                ent->mem = ioremap(ent->start, ent->size);
 216                /* Warn but continue on */
 217                if (!ent->mem) {
 218                        pr_err("Failed to map trace memory at 0x%llx\n",
 219                                 ent->start);
 220                        ret = -1;
 221                        continue;
 222                }
 223
 224                snprintf(ent->name, 16, "%08x", ent->nid);
 225                dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir);
 226                if (!dir)
 227                        return -1;
 228
 229                ent->dir = dir;
 230                debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops);
 231                debugfs_create_x64("start", 0400, dir, &ent->start);
 232                debugfs_create_x64("size", 0400, dir, &ent->size);
 233        }
 234
 235        return ret;
 236}
 237
 238static int memtrace_enable_set(void *data, u64 val)
 239{
 240        if (memtrace_size)
 241                return -EINVAL;
 242
 243        if (!val)
 244                return -EINVAL;
 245
 246        /* Make sure size is aligned to a memory block */
 247        if (val & (memory_block_size_bytes() - 1))
 248                return -EINVAL;
 249
 250        if (memtrace_init_regions_runtime(val))
 251                return -EINVAL;
 252
 253        if (memtrace_init_debugfs())
 254                return -EINVAL;
 255
 256        memtrace_size = val;
 257
 258        return 0;
 259}
 260
 261static int memtrace_enable_get(void *data, u64 *val)
 262{
 263        *val = memtrace_size;
 264        return 0;
 265}
 266
 267DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get,
 268                                        memtrace_enable_set, "0x%016llx\n");
 269
 270static int memtrace_init(void)
 271{
 272        memtrace_debugfs_dir = debugfs_create_dir("memtrace",
 273                                                  powerpc_debugfs_root);
 274        if (!memtrace_debugfs_dir)
 275                return -1;
 276
 277        debugfs_create_file("enable", 0600, memtrace_debugfs_dir,
 278                            NULL, &memtrace_init_fops);
 279
 280        return 0;
 281}
 282machine_device_initcall(powernv, memtrace_init);
 283