1
2
3
4
5
6
7
8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9
10#include <linux/list.h>
11#include <linux/rculist.h>
12#include <linux/spinlock.h>
13#include <linux/hash.h>
14#include <linux/init.h>
15#include <linux/module.h>
16#include <linux/kernel.h>
17#include <linux/uaccess.h>
18#include <linux/ptrace.h>
19#include <linux/preempt.h>
20#include <linux/percpu.h>
21#include <linux/kdebug.h>
22#include <linux/mutex.h>
23#include <linux/io.h>
24#include <linux/slab.h>
25#include <asm/cacheflush.h>
26#include <asm/tlbflush.h>
27#include <linux/errno.h>
28#include <asm/debugreg.h>
29#include <linux/mmiotrace.h>
30
31#define KMMIO_PAGE_HASH_BITS 4
32#define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS)
33
34struct kmmio_fault_page {
35 struct list_head list;
36 struct kmmio_fault_page *release_next;
37 unsigned long page;
38 pteval_t old_presence;
39 bool armed;
40
41
42
43
44
45
46
47 int count;
48
49 bool scheduled_for_release;
50};
51
52struct kmmio_delayed_release {
53 struct rcu_head rcu;
54 struct kmmio_fault_page *release_list;
55};
56
57struct kmmio_context {
58 struct kmmio_fault_page *fpage;
59 struct kmmio_probe *probe;
60 unsigned long saved_flags;
61 unsigned long addr;
62 int active;
63};
64
65static DEFINE_SPINLOCK(kmmio_lock);
66
67
68unsigned int kmmio_count;
69
70
71static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
72static LIST_HEAD(kmmio_probes);
73
74static struct list_head *kmmio_page_list(unsigned long page)
75{
76 return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
77}
78
79
80static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx);
81
82
83
84
85
86
87
88
89
90
91static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
92{
93 struct kmmio_probe *p;
94 list_for_each_entry_rcu(p, &kmmio_probes, list) {
95 if (addr >= p->addr && addr < (p->addr + p->len))
96 return p;
97 }
98 return NULL;
99}
100
101
102static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
103{
104 struct list_head *head;
105 struct kmmio_fault_page *f;
106
107 page &= PAGE_MASK;
108 head = kmmio_page_list(page);
109 list_for_each_entry_rcu(f, head, list) {
110 if (f->page == page)
111 return f;
112 }
113 return NULL;
114}
115
116static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old)
117{
118 pmdval_t v = pmd_val(*pmd);
119 if (clear) {
120 *old = v & _PAGE_PRESENT;
121 v &= ~_PAGE_PRESENT;
122 } else
123 v |= *old;
124 set_pmd(pmd, __pmd(v));
125}
126
127static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old)
128{
129 pteval_t v = pte_val(*pte);
130 if (clear) {
131 *old = v & _PAGE_PRESENT;
132 v &= ~_PAGE_PRESENT;
133 } else
134 v |= *old;
135 set_pte_atomic(pte, __pte(v));
136}
137
138static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
139{
140 unsigned int level;
141 pte_t *pte = lookup_address(f->page, &level);
142
143 if (!pte) {
144 pr_err("no pte for page 0x%08lx\n", f->page);
145 return -1;
146 }
147
148 switch (level) {
149 case PG_LEVEL_2M:
150 clear_pmd_presence((pmd_t *)pte, clear, &f->old_presence);
151 break;
152 case PG_LEVEL_4K:
153 clear_pte_presence(pte, clear, &f->old_presence);
154 break;
155 default:
156 pr_err("unexpected page level 0x%x.\n", level);
157 return -1;
158 }
159
160 __flush_tlb_one(f->page);
161 return 0;
162}
163
164
165
166
167
168
169
170
171
172
173
174
175static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
176{
177 int ret;
178 WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n"));
179 if (f->armed) {
180 pr_warning("double-arm: page 0x%08lx, ref %d, old %d\n",
181 f->page, f->count, !!f->old_presence);
182 }
183 ret = clear_page_presence(f, true);
184 WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming 0x%08lx failed.\n"),
185 f->page);
186 f->armed = true;
187 return ret;
188}
189
190
191static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
192{
193 int ret = clear_page_presence(f, false);
194 WARN_ONCE(ret < 0,
195 KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page);
196 f->armed = false;
197}
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214int kmmio_handler(struct pt_regs *regs, unsigned long addr)
215{
216 struct kmmio_context *ctx;
217 struct kmmio_fault_page *faultpage;
218 int ret = 0;
219
220
221
222
223
224
225
226
227
228 preempt_disable();
229 rcu_read_lock();
230
231 faultpage = get_kmmio_fault_page(addr);
232 if (!faultpage) {
233
234
235
236
237
238 goto no_kmmio;
239 }
240
241 ctx = &get_cpu_var(kmmio_ctx);
242 if (ctx->active) {
243 if (addr == ctx->addr) {
244
245
246
247
248
249 pr_debug("secondary hit for 0x%08lx CPU %d.\n",
250 addr, smp_processor_id());
251
252 if (!faultpage->old_presence)
253 pr_info("unexpected secondary hit for address 0x%08lx on CPU %d.\n",
254 addr, smp_processor_id());
255 } else {
256
257
258
259
260
261 pr_emerg("recursive probe hit on CPU %d, for address 0x%08lx. Ignoring.\n",
262 smp_processor_id(), addr);
263 pr_emerg("previous hit was at 0x%08lx.\n", ctx->addr);
264 disarm_kmmio_fault_page(faultpage);
265 }
266 goto no_kmmio_ctx;
267 }
268 ctx->active++;
269
270 ctx->fpage = faultpage;
271 ctx->probe = get_kmmio_probe(addr);
272 ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
273 ctx->addr = addr;
274
275 if (ctx->probe && ctx->probe->pre_handler)
276 ctx->probe->pre_handler(ctx->probe, regs, addr);
277
278
279
280
281
282 regs->flags |= X86_EFLAGS_TF;
283 regs->flags &= ~X86_EFLAGS_IF;
284
285
286 disarm_kmmio_fault_page(ctx->fpage);
287
288
289
290
291
292
293
294
295 put_cpu_var(kmmio_ctx);
296 return 1;
297
298no_kmmio_ctx:
299 put_cpu_var(kmmio_ctx);
300no_kmmio:
301 rcu_read_unlock();
302 preempt_enable_no_resched();
303 return ret;
304}
305
306
307
308
309
310
311static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
312{
313 int ret = 0;
314 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
315
316 if (!ctx->active) {
317
318
319
320
321
322 pr_warning("unexpected debug trap on CPU %d.\n",
323 smp_processor_id());
324 goto out;
325 }
326
327 if (ctx->probe && ctx->probe->post_handler)
328 ctx->probe->post_handler(ctx->probe, condition, regs);
329
330
331 spin_lock(&kmmio_lock);
332 if (ctx->fpage->count)
333 arm_kmmio_fault_page(ctx->fpage);
334 spin_unlock(&kmmio_lock);
335
336 regs->flags &= ~X86_EFLAGS_TF;
337 regs->flags |= ctx->saved_flags;
338
339
340 ctx->active--;
341 BUG_ON(ctx->active);
342 rcu_read_unlock();
343 preempt_enable_no_resched();
344
345
346
347
348
349
350 if (!(regs->flags & X86_EFLAGS_TF))
351 ret = 1;
352out:
353 put_cpu_var(kmmio_ctx);
354 return ret;
355}
356
357
358static int add_kmmio_fault_page(unsigned long page)
359{
360 struct kmmio_fault_page *f;
361
362 page &= PAGE_MASK;
363 f = get_kmmio_fault_page(page);
364 if (f) {
365 if (!f->count)
366 arm_kmmio_fault_page(f);
367 f->count++;
368 return 0;
369 }
370
371 f = kzalloc(sizeof(*f), GFP_ATOMIC);
372 if (!f)
373 return -1;
374
375 f->count = 1;
376 f->page = page;
377
378 if (arm_kmmio_fault_page(f)) {
379 kfree(f);
380 return -1;
381 }
382
383 list_add_rcu(&f->list, kmmio_page_list(f->page));
384
385 return 0;
386}
387
388
389static void release_kmmio_fault_page(unsigned long page,
390 struct kmmio_fault_page **release_list)
391{
392 struct kmmio_fault_page *f;
393
394 page &= PAGE_MASK;
395 f = get_kmmio_fault_page(page);
396 if (!f)
397 return;
398
399 f->count--;
400 BUG_ON(f->count < 0);
401 if (!f->count) {
402 disarm_kmmio_fault_page(f);
403 if (!f->scheduled_for_release) {
404 f->release_next = *release_list;
405 *release_list = f;
406 f->scheduled_for_release = true;
407 }
408 }
409}
410
411
412
413
414
415
416
417
418int register_kmmio_probe(struct kmmio_probe *p)
419{
420 unsigned long flags;
421 int ret = 0;
422 unsigned long size = 0;
423 const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
424
425 spin_lock_irqsave(&kmmio_lock, flags);
426 if (get_kmmio_probe(p->addr)) {
427 ret = -EEXIST;
428 goto out;
429 }
430 kmmio_count++;
431 list_add_rcu(&p->list, &kmmio_probes);
432 while (size < size_lim) {
433 if (add_kmmio_fault_page(p->addr + size))
434 pr_err("Unable to set page fault.\n");
435 size += PAGE_SIZE;
436 }
437out:
438 spin_unlock_irqrestore(&kmmio_lock, flags);
439
440
441
442
443
444 return ret;
445}
446EXPORT_SYMBOL(register_kmmio_probe);
447
448static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
449{
450 struct kmmio_delayed_release *dr = container_of(
451 head,
452 struct kmmio_delayed_release,
453 rcu);
454 struct kmmio_fault_page *f = dr->release_list;
455 while (f) {
456 struct kmmio_fault_page *next = f->release_next;
457 BUG_ON(f->count);
458 kfree(f);
459 f = next;
460 }
461 kfree(dr);
462}
463
464static void remove_kmmio_fault_pages(struct rcu_head *head)
465{
466 struct kmmio_delayed_release *dr =
467 container_of(head, struct kmmio_delayed_release, rcu);
468 struct kmmio_fault_page *f = dr->release_list;
469 struct kmmio_fault_page **prevp = &dr->release_list;
470 unsigned long flags;
471
472 spin_lock_irqsave(&kmmio_lock, flags);
473 while (f) {
474 if (!f->count) {
475 list_del_rcu(&f->list);
476 prevp = &f->release_next;
477 } else {
478 *prevp = f->release_next;
479 f->release_next = NULL;
480 f->scheduled_for_release = false;
481 }
482 f = *prevp;
483 }
484 spin_unlock_irqrestore(&kmmio_lock, flags);
485
486
487 call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
488}
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503void unregister_kmmio_probe(struct kmmio_probe *p)
504{
505 unsigned long flags;
506 unsigned long size = 0;
507 const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
508 struct kmmio_fault_page *release_list = NULL;
509 struct kmmio_delayed_release *drelease;
510
511 spin_lock_irqsave(&kmmio_lock, flags);
512 while (size < size_lim) {
513 release_kmmio_fault_page(p->addr + size, &release_list);
514 size += PAGE_SIZE;
515 }
516 list_del_rcu(&p->list);
517 kmmio_count--;
518 spin_unlock_irqrestore(&kmmio_lock, flags);
519
520 if (!release_list)
521 return;
522
523 drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
524 if (!drelease) {
525 pr_crit("leaking kmmio_fault_page objects.\n");
526 return;
527 }
528 drelease->release_list = release_list;
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544 call_rcu(&drelease->rcu, remove_kmmio_fault_pages);
545}
546EXPORT_SYMBOL(unregister_kmmio_probe);
547
548static int
549kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
550{
551 struct die_args *arg = args;
552 unsigned long* dr6_p = (unsigned long *)ERR_PTR(arg->err);
553
554 if (val == DIE_DEBUG && (*dr6_p & DR_STEP))
555 if (post_kmmio_handler(*dr6_p, arg->regs) == 1) {
556
557
558
559
560 *dr6_p &= ~DR_STEP;
561 return NOTIFY_STOP;
562 }
563
564 return NOTIFY_DONE;
565}
566
567static struct notifier_block nb_die = {
568 .notifier_call = kmmio_die_notifier
569};
570
571int kmmio_init(void)
572{
573 int i;
574
575 for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
576 INIT_LIST_HEAD(&kmmio_page_table[i]);
577
578 return register_die_notifier(&nb_die);
579}
580
581void kmmio_cleanup(void)
582{
583 int i;
584
585 unregister_die_notifier(&nb_die);
586 for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) {
587 WARN_ONCE(!list_empty(&kmmio_page_table[i]),
588 KERN_ERR "kmmio_page_table not empty at cleanup, any further tracing will leak memory.\n");
589 }
590}
591