1
2
3
4
5
6
7
8
9
10
11
12
13#include <linux/spinlock.h>
14#include <linux/kprobes.h>
15#include <linux/kdebug.h>
16#include <linux/nmi.h>
17#include <linux/debugfs.h>
18#include <linux/delay.h>
19#include <linux/hardirq.h>
20#include <linux/ratelimit.h>
21#include <linux/slab.h>
22#include <linux/export.h>
23
24#if defined(CONFIG_EDAC)
25#include <linux/edac.h>
26#endif
27
28#include <linux/atomic.h>
29#include <asm/traps.h>
30#include <asm/mach_traps.h>
31#include <asm/nmi.h>
32#include <asm/x86_init.h>
33#include <asm/reboot.h>
34#include <asm/cache.h>
35
36#define CREATE_TRACE_POINTS
37#include <trace/events/nmi.h>
38
39struct nmi_desc {
40 spinlock_t lock;
41 struct list_head head;
42};
43
44static struct nmi_desc nmi_desc[NMI_MAX] =
45{
46 {
47 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock),
48 .head = LIST_HEAD_INIT(nmi_desc[0].head),
49 },
50 {
51 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock),
52 .head = LIST_HEAD_INIT(nmi_desc[1].head),
53 },
54 {
55 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[2].lock),
56 .head = LIST_HEAD_INIT(nmi_desc[2].head),
57 },
58 {
59 .lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[3].lock),
60 .head = LIST_HEAD_INIT(nmi_desc[3].head),
61 },
62
63};
64
65struct nmi_stats {
66 unsigned int normal;
67 unsigned int unknown;
68 unsigned int external;
69 unsigned int swallow;
70};
71
72static DEFINE_PER_CPU(struct nmi_stats, nmi_stats);
73
74static int ignore_nmis __read_mostly;
75
76int unknown_nmi_panic;
77
78
79
80
81static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
82
83static int __init setup_unknown_nmi_panic(char *str)
84{
85 unknown_nmi_panic = 1;
86 return 1;
87}
88__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
89
90#define nmi_to_desc(type) (&nmi_desc[type])
91
92static u64 nmi_longest_ns = 1 * NSEC_PER_MSEC;
93
94static int __init nmi_warning_debugfs(void)
95{
96 debugfs_create_u64("nmi_longest_ns", 0644,
97 arch_debugfs_dir, &nmi_longest_ns);
98 return 0;
99}
100fs_initcall(nmi_warning_debugfs);
101
102static void nmi_max_handler(struct irq_work *w)
103{
104 struct nmiaction *a = container_of(w, struct nmiaction, irq_work);
105 int remainder_ns, decimal_msecs;
106 u64 whole_msecs = ACCESS_ONCE(a->max_duration);
107
108 remainder_ns = do_div(whole_msecs, (1000 * 1000));
109 decimal_msecs = remainder_ns / 1000;
110
111 printk_ratelimited(KERN_INFO
112 "INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n",
113 a->handler, whole_msecs, decimal_msecs);
114}
115
116static int nmi_handle(unsigned int type, struct pt_regs *regs)
117{
118 struct nmi_desc *desc = nmi_to_desc(type);
119 struct nmiaction *a;
120 int handled=0;
121
122 rcu_read_lock();
123
124
125
126
127
128
129
130 list_for_each_entry_rcu(a, &desc->head, list) {
131 int thishandled;
132 u64 delta;
133
134 delta = sched_clock();
135 thishandled = a->handler(type, regs);
136 handled += thishandled;
137 delta = sched_clock() - delta;
138 trace_nmi_handler(a->handler, (int)delta, thishandled);
139
140 if (delta < nmi_longest_ns || delta < a->max_duration)
141 continue;
142
143 a->max_duration = delta;
144 irq_work_queue(&a->irq_work);
145 }
146
147 rcu_read_unlock();
148
149
150 return handled;
151}
152NOKPROBE_SYMBOL(nmi_handle);
153
154int __register_nmi_handler(unsigned int type, struct nmiaction *action)
155{
156 struct nmi_desc *desc = nmi_to_desc(type);
157 unsigned long flags;
158
159 if (!action->handler)
160 return -EINVAL;
161
162 init_irq_work(&action->irq_work, nmi_max_handler);
163
164 spin_lock_irqsave(&desc->lock, flags);
165
166
167
168
169
170
171 WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head));
172 WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head));
173 WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head));
174
175
176
177
178
179 if (action->flags & NMI_FLAG_FIRST)
180 list_add_rcu(&action->list, &desc->head);
181 else
182 list_add_tail_rcu(&action->list, &desc->head);
183
184 spin_unlock_irqrestore(&desc->lock, flags);
185 return 0;
186}
187EXPORT_SYMBOL(__register_nmi_handler);
188
189void unregister_nmi_handler(unsigned int type, const char *name)
190{
191 struct nmi_desc *desc = nmi_to_desc(type);
192 struct nmiaction *n;
193 unsigned long flags;
194
195 spin_lock_irqsave(&desc->lock, flags);
196
197 list_for_each_entry_rcu(n, &desc->head, list) {
198
199
200
201
202 if (!strcmp(n->name, name)) {
203 WARN(in_nmi(),
204 "Trying to free NMI (%s) from NMI context!\n", n->name);
205 list_del_rcu(&n->list);
206 break;
207 }
208 }
209
210 spin_unlock_irqrestore(&desc->lock, flags);
211 synchronize_rcu();
212}
213EXPORT_SYMBOL_GPL(unregister_nmi_handler);
214
215static void
216pci_serr_error(unsigned char reason, struct pt_regs *regs)
217{
218
219 if (nmi_handle(NMI_SERR, regs))
220 return;
221
222 pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
223 reason, smp_processor_id());
224
225
226
227
228
229#if defined(CONFIG_EDAC)
230 if (edac_handler_set()) {
231 edac_atomic_assert_error();
232 return;
233 }
234#endif
235
236 if (panic_on_unrecovered_nmi)
237 nmi_panic(regs, "NMI: Not continuing");
238
239 pr_emerg("Dazed and confused, but trying to continue\n");
240
241
242 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
243 outb(reason, NMI_REASON_PORT);
244}
245NOKPROBE_SYMBOL(pci_serr_error);
246
247static void
248io_check_error(unsigned char reason, struct pt_regs *regs)
249{
250 unsigned long i;
251
252
253 if (nmi_handle(NMI_IO_CHECK, regs))
254 return;
255
256 pr_emerg(
257 "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
258 reason, smp_processor_id());
259 show_regs(regs);
260
261 if (panic_on_io_nmi) {
262 nmi_panic(regs, "NMI IOCK error: Not continuing");
263
264
265
266
267
268
269 return;
270 }
271
272
273 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
274 outb(reason, NMI_REASON_PORT);
275
276 i = 20000;
277 while (--i) {
278 touch_nmi_watchdog();
279 udelay(100);
280 }
281
282 reason &= ~NMI_REASON_CLEAR_IOCHK;
283 outb(reason, NMI_REASON_PORT);
284}
285NOKPROBE_SYMBOL(io_check_error);
286
287static void
288unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
289{
290 int handled;
291
292
293
294
295
296
297
298 handled = nmi_handle(NMI_UNKNOWN, regs);
299 if (handled) {
300 __this_cpu_add(nmi_stats.unknown, handled);
301 return;
302 }
303
304 __this_cpu_add(nmi_stats.unknown, 1);
305
306 pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
307 reason, smp_processor_id());
308
309 pr_emerg("Do you have a strange power saving mode enabled?\n");
310 if (unknown_nmi_panic || panic_on_unrecovered_nmi)
311 nmi_panic(regs, "NMI: Not continuing");
312
313 pr_emerg("Dazed and confused, but trying to continue\n");
314}
315NOKPROBE_SYMBOL(unknown_nmi_error);
316
317static DEFINE_PER_CPU(bool, swallow_nmi);
318static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
319
320static void default_do_nmi(struct pt_regs *regs)
321{
322 unsigned char reason = 0;
323 int handled;
324 bool b2b = false;
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339 if (regs->ip == __this_cpu_read(last_nmi_rip))
340 b2b = true;
341 else
342 __this_cpu_write(swallow_nmi, false);
343
344 __this_cpu_write(last_nmi_rip, regs->ip);
345
346 handled = nmi_handle(NMI_LOCAL, regs);
347 __this_cpu_add(nmi_stats.normal, handled);
348 if (handled) {
349
350
351
352
353
354
355
356
357 if (handled > 1)
358 __this_cpu_write(swallow_nmi, true);
359 return;
360 }
361
362
363
364
365
366
367
368
369
370 while (!raw_spin_trylock(&nmi_reason_lock)) {
371 run_crash_ipi_callback(regs);
372 cpu_relax();
373 }
374
375 reason = x86_platform.get_nmi_reason();
376
377 if (reason & NMI_REASON_MASK) {
378 if (reason & NMI_REASON_SERR)
379 pci_serr_error(reason, regs);
380 else if (reason & NMI_REASON_IOCHK)
381 io_check_error(reason, regs);
382#ifdef CONFIG_X86_32
383
384
385
386
387 reassert_nmi();
388#endif
389 __this_cpu_add(nmi_stats.external, 1);
390 raw_spin_unlock(&nmi_reason_lock);
391 return;
392 }
393 raw_spin_unlock(&nmi_reason_lock);
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425 if (b2b && __this_cpu_read(swallow_nmi))
426 __this_cpu_add(nmi_stats.swallow, 1);
427 else
428 unknown_nmi_error(reason, regs);
429}
430NOKPROBE_SYMBOL(default_do_nmi);
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477enum nmi_states {
478 NMI_NOT_RUNNING = 0,
479 NMI_EXECUTING,
480 NMI_LATCHED,
481};
482static DEFINE_PER_CPU(enum nmi_states, nmi_state);
483static DEFINE_PER_CPU(unsigned long, nmi_cr2);
484
485#ifdef CONFIG_X86_64
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500static DEFINE_PER_CPU(int, update_debug_stack);
501#endif
502
503dotraplinkage notrace void
504do_nmi(struct pt_regs *regs, long error_code)
505{
506 if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) {
507 this_cpu_write(nmi_state, NMI_LATCHED);
508 return;
509 }
510 this_cpu_write(nmi_state, NMI_EXECUTING);
511 this_cpu_write(nmi_cr2, read_cr2());
512nmi_restart:
513
514#ifdef CONFIG_X86_64
515
516
517
518
519
520
521 if (unlikely(is_debug_stack(regs->sp))) {
522 debug_stack_set_zero();
523 this_cpu_write(update_debug_stack, 1);
524 }
525#endif
526
527 nmi_enter();
528
529 inc_irq_stat(__nmi_count);
530
531 if (!ignore_nmis)
532 default_do_nmi(regs);
533
534 nmi_exit();
535
536#ifdef CONFIG_X86_64
537 if (unlikely(this_cpu_read(update_debug_stack))) {
538 debug_stack_reset();
539 this_cpu_write(update_debug_stack, 0);
540 }
541#endif
542
543 if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))
544 write_cr2(this_cpu_read(nmi_cr2));
545 if (this_cpu_dec_return(nmi_state))
546 goto nmi_restart;
547}
548NOKPROBE_SYMBOL(do_nmi);
549
550void stop_nmi(void)
551{
552 ignore_nmis++;
553}
554
555void restart_nmi(void)
556{
557 ignore_nmis--;
558}
559
560
561void local_touch_nmi(void)
562{
563 __this_cpu_write(last_nmi_rip, 0);
564}
565EXPORT_SYMBOL_GPL(local_touch_nmi);
566