1
2
3
4
5
6
7#define pr_fmt(fmt) "memtrace: " fmt
8
9#include <linux/bitops.h>
10#include <linux/string.h>
11#include <linux/memblock.h>
12#include <linux/init.h>
13#include <linux/moduleparam.h>
14#include <linux/fs.h>
15#include <linux/debugfs.h>
16#include <linux/slab.h>
17#include <linux/memory.h>
18#include <linux/memory_hotplug.h>
19#include <linux/numa.h>
20#include <asm/machdep.h>
21#include <asm/debugfs.h>
22#include <asm/cacheflush.h>
23
24
25struct memtrace_entry {
26 void *mem;
27 u64 start;
28 u64 size;
29 u32 nid;
30 struct dentry *dir;
31 char name[16];
32};
33
34static DEFINE_MUTEX(memtrace_mutex);
35static u64 memtrace_size;
36
37static struct memtrace_entry *memtrace_array;
38static unsigned int memtrace_array_nr;
39
40
41static ssize_t memtrace_read(struct file *filp, char __user *ubuf,
42 size_t count, loff_t *ppos)
43{
44 struct memtrace_entry *ent = filp->private_data;
45
46 return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size);
47}
48
49static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma)
50{
51 struct memtrace_entry *ent = filp->private_data;
52
53 if (ent->size < vma->vm_end - vma->vm_start)
54 return -EINVAL;
55
56 if (vma->vm_pgoff << PAGE_SHIFT >= ent->size)
57 return -EINVAL;
58
59 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
60 return remap_pfn_range(vma, vma->vm_start, PHYS_PFN(ent->start) + vma->vm_pgoff,
61 vma->vm_end - vma->vm_start, vma->vm_page_prot);
62}
63
64static const struct file_operations memtrace_fops = {
65 .llseek = default_llseek,
66 .read = memtrace_read,
67 .open = simple_open,
68 .mmap = memtrace_mmap,
69};
70
71#define FLUSH_CHUNK_SIZE SZ_1G
72
73
74
75
76
77
78
79
80
81static void flush_dcache_range_chunked(unsigned long start, unsigned long stop,
82 unsigned long chunk)
83{
84 unsigned long i;
85
86 for (i = start; i < stop; i += chunk) {
87 flush_dcache_range(i, min(stop, i + chunk));
88 cond_resched();
89 }
90}
91
92static void memtrace_clear_range(unsigned long start_pfn,
93 unsigned long nr_pages)
94{
95 unsigned long pfn;
96
97
98 for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
99 if (IS_ALIGNED(pfn, PAGES_PER_SECTION))
100 cond_resched();
101 clear_page(__va(PFN_PHYS(pfn)));
102 }
103
104
105
106
107 flush_dcache_range_chunked((unsigned long)pfn_to_kaddr(start_pfn),
108 (unsigned long)pfn_to_kaddr(start_pfn + nr_pages),
109 FLUSH_CHUNK_SIZE);
110}
111
112static u64 memtrace_alloc_node(u32 nid, u64 size)
113{
114 const unsigned long nr_pages = PHYS_PFN(size);
115 unsigned long pfn, start_pfn;
116 struct page *page;
117
118
119
120
121
122 page = alloc_contig_pages(nr_pages, GFP_KERNEL | __GFP_THISNODE |
123 __GFP_NOWARN, nid, NULL);
124 if (!page)
125 return 0;
126 start_pfn = page_to_pfn(page);
127
128
129
130
131
132
133 memtrace_clear_range(start_pfn, nr_pages);
134
135
136
137
138
139 for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++)
140 __SetPageOffline(pfn_to_page(pfn));
141
142 arch_remove_linear_mapping(PFN_PHYS(start_pfn), size);
143
144 return PFN_PHYS(start_pfn);
145}
146
147static int memtrace_init_regions_runtime(u64 size)
148{
149 u32 nid;
150 u64 m;
151
152 memtrace_array = kcalloc(num_online_nodes(),
153 sizeof(struct memtrace_entry), GFP_KERNEL);
154 if (!memtrace_array) {
155 pr_err("Failed to allocate memtrace_array\n");
156 return -EINVAL;
157 }
158
159 for_each_online_node(nid) {
160 m = memtrace_alloc_node(nid, size);
161
162
163
164
165
166 if (!m) {
167 pr_err("Failed to allocate trace memory on node %d\n", nid);
168 continue;
169 }
170
171 pr_info("Allocated trace memory on node %d at 0x%016llx\n", nid, m);
172
173 memtrace_array[memtrace_array_nr].start = m;
174 memtrace_array[memtrace_array_nr].size = size;
175 memtrace_array[memtrace_array_nr].nid = nid;
176 memtrace_array_nr++;
177 }
178
179 return 0;
180}
181
182static struct dentry *memtrace_debugfs_dir;
183
184static int memtrace_init_debugfs(void)
185{
186 int ret = 0;
187 int i;
188
189 for (i = 0; i < memtrace_array_nr; i++) {
190 struct dentry *dir;
191 struct memtrace_entry *ent = &memtrace_array[i];
192
193 ent->mem = ioremap(ent->start, ent->size);
194
195 if (!ent->mem) {
196 pr_err("Failed to map trace memory at 0x%llx\n",
197 ent->start);
198 ret = -1;
199 continue;
200 }
201
202 snprintf(ent->name, 16, "%08x", ent->nid);
203 dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir);
204
205 ent->dir = dir;
206 debugfs_create_file_unsafe("trace", 0600, dir, ent, &memtrace_fops);
207 debugfs_create_x64("start", 0400, dir, &ent->start);
208 debugfs_create_x64("size", 0400, dir, &ent->size);
209 }
210
211 return ret;
212}
213
214static int memtrace_free(int nid, u64 start, u64 size)
215{
216 struct mhp_params params = { .pgprot = PAGE_KERNEL };
217 const unsigned long nr_pages = PHYS_PFN(size);
218 const unsigned long start_pfn = PHYS_PFN(start);
219 unsigned long pfn;
220 int ret;
221
222 ret = arch_create_linear_mapping(nid, start, size, ¶ms);
223 if (ret)
224 return ret;
225
226 for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++)
227 __ClearPageOffline(pfn_to_page(pfn));
228
229 free_contig_range(start_pfn, nr_pages);
230 return 0;
231}
232
233
234
235
236
237static int memtrace_free_regions(void)
238{
239 int i, ret = 0;
240 struct memtrace_entry *ent;
241
242 for (i = memtrace_array_nr - 1; i >= 0; i--) {
243 ent = &memtrace_array[i];
244
245
246 if (ent->nid == NUMA_NO_NODE)
247 continue;
248
249
250 if (ent->mem) {
251 iounmap(ent->mem);
252 ent->mem = 0;
253 }
254
255 if (memtrace_free(ent->nid, ent->start, ent->size)) {
256 pr_err("Failed to free trace memory on node %d\n",
257 ent->nid);
258 ret += 1;
259 continue;
260 }
261
262
263
264
265
266 debugfs_remove_recursive(ent->dir);
267 pr_info("Freed trace memory back on node %d\n", ent->nid);
268 ent->size = ent->start = ent->nid = NUMA_NO_NODE;
269 }
270 if (ret)
271 return ret;
272
273
274 kfree(memtrace_array);
275 memtrace_array = NULL;
276 memtrace_size = 0;
277 memtrace_array_nr = 0;
278 return 0;
279}
280
281static int memtrace_enable_set(void *data, u64 val)
282{
283 int rc = -EAGAIN;
284 u64 bytes;
285
286
287
288
289
290 bytes = memory_block_size_bytes();
291 if (val & (bytes - 1)) {
292 pr_err("Value must be aligned with 0x%llx\n", bytes);
293 return -EINVAL;
294 }
295
296 mutex_lock(&memtrace_mutex);
297
298
299 if (memtrace_size && memtrace_free_regions())
300 goto out_unlock;
301
302 if (!val) {
303 rc = 0;
304 goto out_unlock;
305 }
306
307
308 if (memtrace_init_regions_runtime(val))
309 goto out_unlock;
310
311 if (memtrace_init_debugfs())
312 goto out_unlock;
313
314 memtrace_size = val;
315 rc = 0;
316out_unlock:
317 mutex_unlock(&memtrace_mutex);
318 return rc;
319}
320
321static int memtrace_enable_get(void *data, u64 *val)
322{
323 *val = memtrace_size;
324 return 0;
325}
326
327DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get,
328 memtrace_enable_set, "0x%016llx\n");
329
330static int memtrace_init(void)
331{
332 memtrace_debugfs_dir = debugfs_create_dir("memtrace",
333 powerpc_debugfs_root);
334
335 debugfs_create_file("enable", 0600, memtrace_debugfs_dir,
336 NULL, &memtrace_init_fops);
337
338 return 0;
339}
340machine_device_initcall(powernv, memtrace_init);
341