1
2
3
4
5
6
7
8
9
10
11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
13#include <linux/thread_info.h>
14#include <linux/capability.h>
15#include <linux/miscdevice.h>
16#include <linux/ratelimit.h>
17#include <linux/kallsyms.h>
18#include <linux/rcupdate.h>
19#include <linux/kobject.h>
20#include <linux/uaccess.h>
21#include <linux/kdebug.h>
22#include <linux/kernel.h>
23#include <linux/percpu.h>
24#include <linux/string.h>
25#include <linux/device.h>
26#include <linux/syscore_ops.h>
27#include <linux/delay.h>
28#include <linux/ctype.h>
29#include <linux/sched.h>
30#include <linux/sysfs.h>
31#include <linux/types.h>
32#include <linux/slab.h>
33#include <linux/init.h>
34#include <linux/kmod.h>
35#include <linux/poll.h>
36#include <linux/nmi.h>
37#include <linux/cpu.h>
38#include <linux/smp.h>
39#include <linux/fs.h>
40#include <linux/mm.h>
41#include <linux/debugfs.h>
42#include <linux/irq_work.h>
43#include <linux/export.h>
44
45#include <asm/intel-family.h>
46#include <asm/processor.h>
47#include <asm/traps.h>
48#include <asm/mce.h>
49#include <asm/msr.h>
50#include <asm/reboot.h>
51#include <asm/cacheflush.h>
52
53#include "mce-internal.h"
54
55static DEFINE_MUTEX(mce_chrdev_read_mutex);
56
57#define mce_log_get_idx_check(p) \
58({ \
59 rcu_lockdep_assert(rcu_read_lock_sched_held() || \
60 lockdep_is_held(&mce_chrdev_read_mutex), \
61 "suspicious mce_log_get_idx_check() usage"); \
62 smp_load_acquire(&(p)); \
63})
64
65#define CREATE_TRACE_POINTS
66#include <trace/events/mce.h>
67
68#define SPINUNIT 100
69
70DEFINE_PER_CPU(unsigned, mce_exception_count);
71
72struct mce_bank *mce_banks __read_mostly;
73struct mce_vendor_flags mce_flags __read_mostly;
74
75struct mca_config mca_cfg __read_mostly = {
76 .bootlog = -1,
77
78
79
80
81
82
83
84 .tolerant = 1,
85 .monarch_timeout = -1
86};
87
88
89static unsigned long mce_need_notify;
90static char mce_helper[128];
91static char *mce_helper_argv[2] = { mce_helper, NULL };
92
93static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
94
95static DEFINE_PER_CPU(struct mce, mces_seen);
96static int cpu_missing;
97
98
99
100
101
102DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
103 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
104};
105
106
107
108
109
110
111
112
113mce_banks_t mce_banks_ce_disabled;
114
115static struct work_struct mce_work;
116static struct irq_work mce_irq_work;
117
118static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
119
120
121static int (*quirk_noprint)(struct mce *m);
122
123
124
125
126
127BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain);
128
129
130void mce_setup(struct mce *m)
131{
132 memset(m, 0, sizeof(struct mce));
133 m->cpu = m->extcpu = smp_processor_id();
134 m->tsc = rdtsc();
135
136 m->time = get_seconds();
137 m->cpuvendor = boot_cpu_data.x86_vendor;
138 m->cpuid = cpuid_eax(1);
139 m->socketid = cpu_data(m->extcpu).phys_proc_id;
140 m->apicid = cpu_data(m->extcpu).initial_apicid;
141 rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
142
143 if (this_cpu_has(X86_FEATURE_INTEL_PPIN))
144 rdmsrl(MSR_PPIN, m->ppin);
145
146 m->microcode = boot_cpu_data.microcode;
147}
148
149DEFINE_PER_CPU(struct mce, injectm);
150EXPORT_PER_CPU_SYMBOL_GPL(injectm);
151
152
153
154
155
156
157
158static struct mce_log mcelog = {
159 .signature = MCE_LOG_SIGNATURE,
160 .len = MCE_LOG_LEN,
161 .recordlen = sizeof(struct mce),
162};
163
164void mce_log(struct mce *mce)
165{
166 unsigned next, entry;
167
168
169 trace_mce_record(mce);
170
171 if (!mce_gen_pool_add(mce))
172 irq_work_queue(&mce_irq_work);
173
174 wmb();
175 for (;;) {
176 entry = mce_log_get_idx_check(mcelog.next);
177 for (;;) {
178
179
180
181
182
183
184 if (entry >= MCE_LOG_LEN) {
185 set_bit(MCE_OVERFLOW,
186 (unsigned long *)&mcelog.flags);
187 return;
188 }
189
190 if (mcelog.entry[entry].finished) {
191 entry++;
192 continue;
193 }
194 break;
195 }
196 smp_rmb();
197 next = entry + 1;
198 if (cmpxchg(&mcelog.next, entry, next) == entry)
199 break;
200 }
201 memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
202 wmb();
203 mcelog.entry[entry].finished = 1;
204 wmb();
205
206 set_bit(0, &mce_need_notify);
207}
208
209void mce_inject_log(struct mce *m)
210{
211 mutex_lock(&mce_chrdev_read_mutex);
212 mce_log(m);
213 mutex_unlock(&mce_chrdev_read_mutex);
214}
215EXPORT_SYMBOL_GPL(mce_inject_log);
216
217static struct notifier_block mce_srao_nb;
218
219static atomic_t num_notifiers;
220
221void mce_register_decode_chain(struct notifier_block *nb)
222{
223 atomic_inc(&num_notifiers);
224
225 WARN_ON(nb->priority > MCE_PRIO_LOWEST && nb->priority < MCE_PRIO_EDAC);
226
227 blocking_notifier_chain_register(&x86_mce_decoder_chain, nb);
228}
229EXPORT_SYMBOL_GPL(mce_register_decode_chain);
230
231void mce_unregister_decode_chain(struct notifier_block *nb)
232{
233 atomic_dec(&num_notifiers);
234
235 blocking_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
236}
237EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
238
239static inline u32 ctl_reg(int bank)
240{
241 return MSR_IA32_MCx_CTL(bank);
242}
243
244static inline u32 status_reg(int bank)
245{
246 return MSR_IA32_MCx_STATUS(bank);
247}
248
249static inline u32 addr_reg(int bank)
250{
251 return MSR_IA32_MCx_ADDR(bank);
252}
253
254static inline u32 misc_reg(int bank)
255{
256 return MSR_IA32_MCx_MISC(bank);
257}
258
259static inline u32 smca_ctl_reg(int bank)
260{
261 return MSR_AMD64_SMCA_MCx_CTL(bank);
262}
263
264static inline u32 smca_status_reg(int bank)
265{
266 return MSR_AMD64_SMCA_MCx_STATUS(bank);
267}
268
269static inline u32 smca_addr_reg(int bank)
270{
271 return MSR_AMD64_SMCA_MCx_ADDR(bank);
272}
273
274static inline u32 smca_misc_reg(int bank)
275{
276 return MSR_AMD64_SMCA_MCx_MISC(bank);
277}
278
279struct mca_msr_regs msr_ops = {
280 .ctl = ctl_reg,
281 .status = status_reg,
282 .addr = addr_reg,
283 .misc = misc_reg
284};
285
286static void __print_mce(struct mce *m)
287{
288 if (quirk_noprint && quirk_noprint(m))
289 return;
290
291 pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n",
292 m->extcpu,
293 (m->mcgstatus & MCG_STATUS_MCIP ? " Exception" : ""),
294 m->mcgstatus, m->bank, m->status);
295
296 if (m->ip) {
297 pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ",
298 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
299 m->cs, m->ip);
300
301 if (m->cs == __KERNEL_CS)
302 print_symbol("{%s}", m->ip);
303 pr_cont("\n");
304 }
305
306 pr_emerg(HW_ERR "TSC %llx ", m->tsc);
307 if (m->addr)
308 pr_cont("ADDR %llx ", m->addr);
309 if (m->misc)
310 pr_cont("MISC %llx ", m->misc);
311
312 if (mce_flags.smca) {
313 if (m->synd)
314 pr_cont("SYND %llx ", m->synd);
315 if (m->ipid)
316 pr_cont("IPID %llx ", m->ipid);
317 }
318
319 pr_cont("\n");
320
321
322
323
324 pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n",
325 m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
326 m->microcode);
327}
328
329static void print_mce(struct mce *m)
330{
331 __print_mce(m);
332 pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
333}
334
335#define PANIC_TIMEOUT 5
336
337static atomic_t mce_panicked;
338
339static int fake_panic;
340static atomic_t mce_fake_panicked;
341
342
343static void wait_for_panic(void)
344{
345 long timeout = PANIC_TIMEOUT*USEC_PER_SEC;
346
347 preempt_disable();
348 local_irq_enable();
349 while (timeout-- > 0)
350 udelay(1);
351 if (panic_timeout == 0)
352 panic_timeout = mca_cfg.panic_timeout;
353 panic("Panicing machine check CPU died");
354}
355
356static void mce_panic(const char *msg, struct mce *final, char *exp)
357{
358 int apei_err = 0;
359 struct llist_node *pending;
360 struct mce_evt_llist *l;
361
362 if (!fake_panic) {
363
364
365
366 if (atomic_inc_return(&mce_panicked) > 1)
367 wait_for_panic();
368 barrier();
369
370 bust_spinlocks(1);
371 console_verbose();
372 } else {
373
374 if (atomic_inc_return(&mce_fake_panicked) > 1)
375 return;
376 }
377 pending = mce_gen_pool_prepare_records();
378
379 llist_for_each_entry(l, pending, llnode) {
380 struct mce *m = &l->mce;
381 if (!(m->status & MCI_STATUS_UC)) {
382 print_mce(m);
383 if (!apei_err)
384 apei_err = apei_write_mce(m);
385 }
386 }
387
388 llist_for_each_entry(l, pending, llnode) {
389 struct mce *m = &l->mce;
390 if (!(m->status & MCI_STATUS_UC))
391 continue;
392 if (!final || mce_cmp(m, final)) {
393 print_mce(m);
394 if (!apei_err)
395 apei_err = apei_write_mce(m);
396 }
397 }
398 if (final) {
399 print_mce(final);
400 if (!apei_err)
401 apei_err = apei_write_mce(final);
402 }
403 if (cpu_missing)
404 pr_emerg(HW_ERR "Some CPUs didn't answer in synchronization\n");
405 if (exp)
406 pr_emerg(HW_ERR "Machine check: %s\n", exp);
407 if (!fake_panic) {
408 if (panic_timeout == 0)
409 panic_timeout = mca_cfg.panic_timeout;
410 panic(msg);
411 } else
412 pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg);
413}
414
415
416
417static int msr_to_offset(u32 msr)
418{
419 unsigned bank = __this_cpu_read(injectm.bank);
420
421 if (msr == mca_cfg.rip_msr)
422 return offsetof(struct mce, ip);
423 if (msr == msr_ops.status(bank))
424 return offsetof(struct mce, status);
425 if (msr == msr_ops.addr(bank))
426 return offsetof(struct mce, addr);
427 if (msr == msr_ops.misc(bank))
428 return offsetof(struct mce, misc);
429 if (msr == MSR_IA32_MCG_STATUS)
430 return offsetof(struct mce, mcgstatus);
431 return -1;
432}
433
434
435static u64 mce_rdmsrl(u32 msr)
436{
437 u64 v;
438
439 if (__this_cpu_read(injectm.finished)) {
440 int offset = msr_to_offset(msr);
441
442 if (offset < 0)
443 return 0;
444 return *(u64 *)((char *)&__get_cpu_var(injectm) + offset);
445 }
446
447 if (rdmsrl_safe(msr, &v)) {
448 WARN_ONCE(1, "mce: Unable to read msr %d!\n", msr);
449
450
451
452
453
454 v = 0;
455 }
456
457 return v;
458}
459
460static void mce_wrmsrl(u32 msr, u64 v)
461{
462 if (__this_cpu_read(injectm.finished)) {
463 int offset = msr_to_offset(msr);
464
465 if (offset >= 0)
466 *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v;
467 return;
468 }
469 wrmsrl(msr, v);
470}
471
472
473
474
475
476
477static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
478{
479 mce_setup(m);
480
481 m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
482 if (regs) {
483
484
485
486
487 if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) {
488 m->ip = regs->ip;
489 m->cs = regs->cs;
490
491
492
493
494
495
496 if (v8086_mode(regs))
497 m->cs |= 3;
498 }
499
500 if (mca_cfg.rip_msr)
501 m->ip = mce_rdmsrl(mca_cfg.rip_msr);
502 }
503}
504
505int mce_available(struct cpuinfo_x86 *c)
506{
507 if (mca_cfg.disabled)
508 return 0;
509 return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
510}
511
512static void mce_schedule_work(void)
513{
514 if (!mce_gen_pool_empty())
515 schedule_work(&mce_work);
516}
517
518static void mce_irq_work_cb(struct irq_work *entry)
519{
520 mce_notify_irq();
521 mce_schedule_work();
522}
523
524static void mce_report_event(struct pt_regs *regs)
525{
526 if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) {
527 mce_notify_irq();
528
529
530
531
532
533
534 mce_schedule_work();
535 return;
536 }
537
538 irq_work_queue(&mce_irq_work);
539}
540
541
542
543
544
545
546
547int mce_usable_address(struct mce *m)
548{
549 if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV))
550 return 0;
551
552
553 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
554 return 1;
555
556 if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
557 return 0;
558 if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
559 return 0;
560 return 1;
561}
562EXPORT_SYMBOL_GPL(mce_usable_address);
563
564static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
565 void *data)
566{
567 struct mce *mce = (struct mce *)data;
568 unsigned long pfn;
569
570 if (!mce)
571 return NOTIFY_DONE;
572
573 if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
574 pfn = mce->addr >> PAGE_SHIFT;
575 if (!memory_failure(pfn, MCE_VECTOR, 0))
576 set_mce_nospec(pfn);
577 }
578
579 return NOTIFY_OK;
580}
581static struct notifier_block mce_srao_nb = {
582 .notifier_call = srao_decode_notifier,
583 .priority = MCE_PRIO_SRAO,
584};
585
586static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
587 void *data)
588{
589 struct mce *m = (struct mce *)data;
590
591 if (!m)
592 return NOTIFY_DONE;
593
594
595
596
597
598 if (atomic_read(&num_notifiers) > 2)
599 return NOTIFY_DONE;
600
601 __print_mce(m);
602
603 return NOTIFY_DONE;
604}
605
606static struct notifier_block mce_default_nb = {
607 .notifier_call = mce_default_notifier,
608
609 .priority = MCE_PRIO_LOWEST,
610};
611
612
613
614
615static void mce_read_aux(struct mce *m, int i)
616{
617 if (m->status & MCI_STATUS_MISCV)
618 m->misc = mce_rdmsrl(msr_ops.misc(i));
619
620 if (m->status & MCI_STATUS_ADDRV) {
621 m->addr = mce_rdmsrl(msr_ops.addr(i));
622
623
624
625
626 if (mca_cfg.ser && (m->status & MCI_STATUS_MISCV)) {
627 u8 shift = MCI_MISC_ADDR_LSB(m->misc);
628 m->addr >>= shift;
629 m->addr <<= shift;
630 }
631
632
633
634
635
636 if (mce_flags.smca) {
637 u8 lsb = (m->addr >> 56) & 0x3f;
638
639 m->addr &= GENMASK_ULL(55, lsb);
640 }
641 }
642
643 if (mce_flags.smca) {
644 m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i));
645
646 if (m->status & MCI_STATUS_SYNDV)
647 m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
648 }
649}
650
651bool mce_is_memory_error(struct mce *m)
652{
653 if (m->cpuvendor == X86_VENDOR_AMD) {
654
655 u8 xec = (m->status >> 16) & 0x1f;
656
657 return (xec == 0x0 || xec == 0x8);
658 } else if (m->cpuvendor == X86_VENDOR_INTEL) {
659
660
661
662
663
664
665
666
667
668
669
670
671
672 return (m->status & 0xef80) == BIT(7) ||
673 (m->status & 0xef00) == BIT(8) ||
674 (m->status & 0xeffc) == 0xc;
675 }
676
677 return false;
678}
679EXPORT_SYMBOL_GPL(mce_is_memory_error);
680
681bool mce_is_correctable(struct mce *m)
682{
683 if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED)
684 return false;
685
686 if (m->status & MCI_STATUS_UC)
687 return false;
688
689 return true;
690}
691EXPORT_SYMBOL_GPL(mce_is_correctable);
692
693DEFINE_PER_CPU(unsigned, mce_poll_count);
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
711{
712 bool error_seen = false;
713 struct mce m;
714 int severity;
715 int i;
716
717 this_cpu_inc(mce_poll_count);
718
719 mce_gather_info(&m, NULL);
720
721 for (i = 0; i < mca_cfg.banks; i++) {
722 if (!mce_banks[i].ctl || !test_bit(i, *b))
723 continue;
724
725 m.misc = 0;
726 m.addr = 0;
727 m.bank = i;
728 m.tsc = 0;
729
730 barrier();
731 m.status = mce_rdmsrl(msr_ops.status(i));
732 if (!(m.status & MCI_STATUS_VAL))
733 continue;
734
735
736
737
738
739
740
741
742 if (!(flags & MCP_UC) &&
743 (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC)))
744 continue;
745
746 error_seen = true;
747
748 mce_read_aux(&m, i);
749
750 if (!(flags & MCP_TIMESTAMP))
751 m.tsc = 0;
752
753 severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
754
755 if (severity == MCE_DEFERRED_SEVERITY && mce_is_memory_error(&m))
756 if (m.status & MCI_STATUS_ADDRV)
757 m.severity = severity;
758
759
760
761
762
763 if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce)
764 mce_log(&m);
765 else if (mce_usable_address(&m)) {
766
767
768
769
770
771 if (!mce_gen_pool_add(&m))
772 mce_schedule_work();
773 }
774
775
776
777
778 mce_wrmsrl(msr_ops.status(i), 0);
779 }
780
781
782
783
784
785
786 sync_core();
787
788 return error_seen;
789}
790EXPORT_SYMBOL_GPL(machine_check_poll);
791
792
793
794
795
796static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
797 struct pt_regs *regs)
798{
799 int i, ret = 0;
800 char *tmp;
801
802 for (i = 0; i < mca_cfg.banks; i++) {
803 m->status = mce_rdmsrl(msr_ops.status(i));
804 if (m->status & MCI_STATUS_VAL) {
805 __set_bit(i, validp);
806 if (quirk_no_way_out)
807 quirk_no_way_out(i, m, regs);
808 }
809
810 if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
811 *msg = tmp;
812 ret = 1;
813 }
814 }
815 return ret;
816}
817
818
819
820
821
822static atomic_t mce_executing;
823
824
825
826
827static atomic_t mce_callin;
828
829
830
831
832static int mce_timed_out(u64 *t, const char *msg)
833{
834
835
836
837
838
839
840 rmb();
841 if (atomic_read(&mce_panicked))
842 wait_for_panic();
843 if (!mca_cfg.monarch_timeout)
844 goto out;
845 if ((s64)*t < SPINUNIT) {
846 if (mca_cfg.tolerant <= 1)
847 mce_panic(msg, NULL, NULL);
848 cpu_missing = 1;
849 return 1;
850 }
851 *t -= SPINUNIT;
852out:
853 touch_nmi_watchdog();
854 return 0;
855}
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881static void mce_reign(void)
882{
883 int cpu;
884 struct mce *m = NULL;
885 int global_worst = 0;
886 char *msg = NULL;
887 char *nmsg = NULL;
888
889
890
891
892
893
894 for_each_possible_cpu(cpu) {
895 int severity = mce_severity(&per_cpu(mces_seen, cpu),
896 mca_cfg.tolerant,
897 &nmsg, true);
898 if (severity > global_worst) {
899 msg = nmsg;
900 global_worst = severity;
901 m = &per_cpu(mces_seen, cpu);
902 }
903 }
904
905
906
907
908
909
910 if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
911 mce_panic("Fatal machine check", m, msg);
912
913
914
915
916
917
918
919
920
921
922
923 if (global_worst <= MCE_KEEP_SEVERITY && mca_cfg.tolerant < 3)
924 mce_panic("Fatal machine check from unknown source", NULL, NULL);
925
926
927
928
929
930 for_each_possible_cpu(cpu)
931 memset(&per_cpu(mces_seen, cpu), 0, sizeof(struct mce));
932}
933
934static atomic_t global_nwo;
935
936
937
938
939
940
941
942
943static int mce_start(int *no_way_out)
944{
945 int order;
946 int cpus = num_online_cpus();
947 u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC;
948
949 if (!timeout)
950 return -1;
951
952 atomic_add(*no_way_out, &global_nwo);
953
954
955
956 smp_wmb();
957 order = atomic_inc_return(&mce_callin);
958
959
960
961
962 while (atomic_read(&mce_callin) != cpus) {
963 if (mce_timed_out(&timeout,
964 "Timeout: Not all CPUs entered broadcast exception handler")) {
965 atomic_set(&global_nwo, 0);
966 return -1;
967 }
968 ndelay(SPINUNIT);
969 }
970
971
972
973
974 smp_rmb();
975
976 if (order == 1) {
977
978
979
980 atomic_set(&mce_executing, 1);
981 } else {
982
983
984
985
986
987
988 while (atomic_read(&mce_executing) < order) {
989 if (mce_timed_out(&timeout,
990 "Timeout: Subject CPUs unable to finish machine check processing")) {
991 atomic_set(&global_nwo, 0);
992 return -1;
993 }
994 ndelay(SPINUNIT);
995 }
996 }
997
998
999
1000
1001 *no_way_out = atomic_read(&global_nwo);
1002
1003 return order;
1004}
1005
1006
1007
1008
1009
1010static int mce_end(int order)
1011{
1012 int ret = -1;
1013 u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC;
1014
1015 if (!timeout)
1016 goto reset;
1017 if (order < 0)
1018 goto reset;
1019
1020
1021
1022
1023 atomic_inc(&mce_executing);
1024
1025 if (order == 1) {
1026
1027 int cpus = num_online_cpus();
1028
1029
1030
1031
1032
1033 while (atomic_read(&mce_executing) <= cpus) {
1034 if (mce_timed_out(&timeout,
1035 "Timeout: Monarch CPU unable to finish machine check processing"))
1036 goto reset;
1037 ndelay(SPINUNIT);
1038 }
1039
1040 mce_reign();
1041 barrier();
1042 ret = 0;
1043 } else {
1044
1045
1046
1047 while (atomic_read(&mce_executing) != 0) {
1048 if (mce_timed_out(&timeout,
1049 "Timeout: Monarch CPU did not finish machine check processing"))
1050 goto reset;
1051 ndelay(SPINUNIT);
1052 }
1053
1054
1055
1056
1057 return 0;
1058 }
1059
1060
1061
1062
1063reset:
1064 atomic_set(&global_nwo, 0);
1065 atomic_set(&mce_callin, 0);
1066 barrier();
1067
1068
1069
1070
1071 atomic_set(&mce_executing, 0);
1072 return ret;
1073}
1074
1075static void mce_clear_state(unsigned long *toclear)
1076{
1077 int i;
1078
1079 for (i = 0; i < mca_cfg.banks; i++) {
1080 if (test_bit(i, toclear))
1081 mce_wrmsrl(msr_ops.status(i), 0);
1082 }
1083}
1084
1085static int do_memory_failure(struct mce *m)
1086{
1087 int flags = MF_ACTION_REQUIRED;
1088 int ret;
1089
1090 pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr);
1091 if (!(m->mcgstatus & MCG_STATUS_RIPV))
1092 flags |= MF_MUST_KILL;
1093 ret = memory_failure(m->addr >> PAGE_SHIFT, MCE_VECTOR, flags);
1094 if (ret)
1095 pr_err("Memory error not recovered");
1096 else
1097 set_mce_nospec(m->addr >> PAGE_SHIFT);
1098 return ret;
1099}
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113void do_machine_check(struct pt_regs *regs, long error_code)
1114{
1115 struct mca_config *cfg = &mca_cfg;
1116 struct mce m, *final;
1117 int i;
1118 int worst = 0;
1119 int severity;
1120
1121
1122
1123
1124
1125 int order = -1;
1126
1127
1128
1129
1130 int no_way_out = 0;
1131
1132
1133
1134
1135 int kill_it = 0;
1136 DECLARE_BITMAP(toclear, MAX_NR_BANKS);
1137 DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
1138 char *msg = "Unknown";
1139
1140
1141
1142
1143
1144 int lmce = 1;
1145 int cpu = smp_processor_id();
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159 if (cpu_is_offline(cpu) ||
1160 (crashing_cpu != -1 && crashing_cpu != cpu)) {
1161 u64 mcgstatus;
1162
1163 mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
1164 if (mcgstatus & MCG_STATUS_RIPV) {
1165 mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
1166 return;
1167 }
1168 }
1169
1170 this_cpu_inc(mce_exception_count);
1171
1172 if (!cfg->banks)
1173 goto out;
1174
1175 mce_gather_info(&m, regs);
1176
1177 final = &__get_cpu_var(mces_seen);
1178 *final = m;
1179
1180 memset(valid_banks, 0, sizeof(valid_banks));
1181 no_way_out = mce_no_way_out(&m, &msg, valid_banks, regs);
1182
1183 barrier();
1184
1185
1186
1187
1188
1189
1190 if (!(m.mcgstatus & MCG_STATUS_RIPV))
1191 kill_it = 1;
1192
1193
1194
1195
1196
1197 if (m.cpuvendor == X86_VENDOR_INTEL)
1198 lmce = m.mcgstatus & MCG_STATUS_LMCES;
1199
1200
1201
1202
1203
1204
1205
1206 if (!lmce)
1207 order = mce_start(&no_way_out);
1208
1209 for (i = 0; i < cfg->banks; i++) {
1210 __clear_bit(i, toclear);
1211 if (!test_bit(i, valid_banks))
1212 continue;
1213 if (!mce_banks[i].ctl)
1214 continue;
1215
1216 m.misc = 0;
1217 m.addr = 0;
1218 m.bank = i;
1219
1220 m.status = mce_rdmsrl(msr_ops.status(i));
1221 if ((m.status & MCI_STATUS_VAL) == 0)
1222 continue;
1223
1224
1225
1226
1227
1228 if (!(m.status & (cfg->ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
1229 !no_way_out)
1230 continue;
1231
1232
1233
1234
1235 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
1236
1237 severity = mce_severity(&m, cfg->tolerant, NULL, true);
1238
1239
1240
1241
1242
1243 if ((severity == MCE_KEEP_SEVERITY ||
1244 severity == MCE_UCNA_SEVERITY) && !no_way_out)
1245 continue;
1246 __set_bit(i, toclear);
1247 if (severity == MCE_NO_SEVERITY) {
1248
1249
1250
1251
1252 continue;
1253 }
1254
1255 mce_read_aux(&m, i);
1256
1257
1258 m.severity = severity;
1259
1260 mce_log(&m);
1261
1262 if (severity > worst) {
1263 *final = m;
1264 worst = severity;
1265 }
1266 }
1267
1268
1269 m = *final;
1270
1271 if (!no_way_out)
1272 mce_clear_state(toclear);
1273
1274
1275
1276
1277
1278 if (!lmce) {
1279 if (mce_end(order) < 0)
1280 no_way_out = worst >= MCE_PANIC_SEVERITY;
1281 } else {
1282
1283
1284
1285
1286 if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
1287 mce_panic("Machine check from unknown source",
1288 NULL, NULL);
1289 }
1290
1291
1292
1293
1294
1295 if (cfg->tolerant == 3)
1296 kill_it = 0;
1297 else if (no_way_out)
1298 mce_panic("Fatal machine check on current CPU", &m, msg);
1299
1300 if (worst > 0)
1301 mce_report_event(regs);
1302 mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
1303out:
1304 sync_core();
1305
1306 if (worst != MCE_AR_SEVERITY && !kill_it)
1307 return;
1308
1309
1310 if ((m.cs & 3) == 3) {
1311 local_irq_enable();
1312
1313 if (kill_it || do_memory_failure(&m))
1314 force_sig(SIGBUS, current);
1315 local_irq_disable();
1316 } else {
1317 if (!mc_fixup_exception(regs, X86_TRAP_MC))
1318 mce_panic("Failed kernel mode recovery", &m, NULL);
1319 }
1320}
1321EXPORT_SYMBOL_GPL(do_machine_check);
1322
1323#ifndef CONFIG_MEMORY_FAILURE
1324int memory_failure(unsigned long pfn, int vector, int flags)
1325{
1326
1327 BUG_ON(flags & MF_ACTION_REQUIRED);
1328 pr_err("Uncorrected memory error in page 0x%lx ignored\n"
1329 "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n",
1330 pfn);
1331
1332 return 0;
1333}
1334#endif
1335
1336
1337
1338
1339
1340
1341static void mce_process_work(struct work_struct *dummy)
1342{
1343 mce_gen_pool_process();
1344}
1345
1346
1347
1348
1349
1350
1351static unsigned long check_interval = INITIAL_CHECK_INTERVAL;
1352
1353static DEFINE_PER_CPU(unsigned long, mce_next_interval);
1354static DEFINE_PER_CPU(struct timer_list, mce_timer);
1355
1356static unsigned long mce_adjust_timer_default(unsigned long interval)
1357{
1358 return interval;
1359}
1360
1361static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
1362
1363static void __restart_timer(struct timer_list *t, unsigned long interval)
1364{
1365 unsigned long when = jiffies + interval;
1366 unsigned long flags;
1367
1368 local_irq_save(flags);
1369
1370 if (timer_pending(t)) {
1371 if (time_before(when, t->expires))
1372 mod_timer_pinned(t, when);
1373 } else {
1374 t->expires = round_jiffies(when);
1375 add_timer_on(t, smp_processor_id());
1376 }
1377
1378 local_irq_restore(flags);
1379}
1380
1381static void mce_timer_fn(unsigned long data)
1382{
1383 struct timer_list *t = &__get_cpu_var(mce_timer);
1384 int cpu = smp_processor_id();
1385 unsigned long iv;
1386
1387 WARN_ON(cpu != data);
1388
1389 iv = __this_cpu_read(mce_next_interval);
1390
1391 if (mce_available(__this_cpu_ptr(&cpu_info))) {
1392 machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_poll_banks));
1393
1394 if (mce_intel_cmci_poll()) {
1395 iv = mce_adjust_timer(iv);
1396 goto done;
1397 }
1398 }
1399
1400
1401
1402
1403
1404 if (mce_notify_irq())
1405 iv = max(iv / 2, (unsigned long) HZ/100);
1406 else
1407 iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
1408
1409done:
1410 __this_cpu_write(mce_next_interval, iv);
1411 __restart_timer(t, iv);
1412}
1413
1414
1415
1416
1417void mce_timer_kick(unsigned long interval)
1418{
1419 struct timer_list *t = &__get_cpu_var(mce_timer);
1420 unsigned long iv = __this_cpu_read(mce_next_interval);
1421
1422 __restart_timer(t, interval);
1423
1424 if (interval < iv)
1425 __this_cpu_write(mce_next_interval, interval);
1426}
1427
1428
1429static void mce_timer_delete_all(void)
1430{
1431 int cpu;
1432
1433 for_each_online_cpu(cpu)
1434 del_timer_sync(&per_cpu(mce_timer, cpu));
1435}
1436
1437static void mce_do_trigger(struct work_struct *work)
1438{
1439 call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT);
1440}
1441
1442static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
1443
1444
1445
1446
1447
1448
1449int mce_notify_irq(void)
1450{
1451
1452 static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
1453
1454 if (test_and_clear_bit(0, &mce_need_notify)) {
1455
1456 wake_up_interruptible(&mce_chrdev_wait);
1457
1458 if (mce_helper[0])
1459 schedule_work(&mce_trigger_work);
1460
1461 if (__ratelimit(&ratelimit))
1462 pr_info(HW_ERR "Machine check events logged\n");
1463
1464 return 1;
1465 }
1466 return 0;
1467}
1468EXPORT_SYMBOL_GPL(mce_notify_irq);
1469
1470static int __mcheck_cpu_mce_banks_init(void)
1471{
1472 int i;
1473
1474 mce_banks = kzalloc(MAX_NR_BANKS * sizeof(struct mce_bank), GFP_KERNEL);
1475 if (!mce_banks)
1476 return -ENOMEM;
1477
1478 for (i = 0; i < MAX_NR_BANKS; i++) {
1479 struct mce_bank *b = &mce_banks[i];
1480
1481 b->ctl = -1ULL;
1482 b->init = 1;
1483 }
1484 return 0;
1485}
1486
1487
1488
1489
1490static int __mcheck_cpu_cap_init(void)
1491{
1492 u64 cap;
1493 u8 b;
1494
1495 rdmsrl(MSR_IA32_MCG_CAP, cap);
1496
1497 b = cap & MCG_BANKCNT_MASK;
1498 if (WARN_ON_ONCE(b > MAX_NR_BANKS))
1499 b = MAX_NR_BANKS;
1500
1501 mca_cfg.banks = max(mca_cfg.banks, b);
1502
1503 if (!mce_banks) {
1504 int err = __mcheck_cpu_mce_banks_init();
1505 if (err)
1506 return err;
1507 }
1508
1509
1510 if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
1511 mca_cfg.rip_msr = MSR_IA32_MCG_EIP;
1512
1513 if (cap & MCG_SER_P)
1514 mca_cfg.ser = true;
1515
1516 return 0;
1517}
1518
1519static void __mcheck_cpu_init_generic(void)
1520{
1521 enum mcp_flags m_fl = 0;
1522 mce_banks_t all_banks;
1523 u64 cap;
1524
1525 if (!mca_cfg.bootlog)
1526 m_fl = MCP_DONTLOG;
1527
1528
1529
1530
1531 bitmap_fill(all_banks, MAX_NR_BANKS);
1532 machine_check_poll(MCP_UC | m_fl, &all_banks);
1533
1534 set_in_cr4(X86_CR4_MCE);
1535
1536 rdmsrl(MSR_IA32_MCG_CAP, cap);
1537 if (cap & MCG_CTL_P)
1538 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
1539}
1540
1541static void __mcheck_cpu_init_clear_banks(void)
1542{
1543 int i;
1544
1545 for (i = 0; i < mca_cfg.banks; i++) {
1546 struct mce_bank *b = &mce_banks[i];
1547
1548 if (!b->init)
1549 continue;
1550 wrmsrl(msr_ops.ctl(i), b->ctl);
1551 wrmsrl(msr_ops.status(i), 0);
1552 }
1553}
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
1564{
1565 if (bank != 0)
1566 return;
1567 if ((m->mcgstatus & (MCG_STATUS_EIPV|MCG_STATUS_RIPV)) != 0)
1568 return;
1569 if ((m->status & (MCI_STATUS_OVER|MCI_STATUS_UC|
1570 MCI_STATUS_EN|MCI_STATUS_MISCV|MCI_STATUS_ADDRV|
1571 MCI_STATUS_PCC|MCI_STATUS_S|MCI_STATUS_AR|
1572 MCACOD)) !=
1573 (MCI_STATUS_UC|MCI_STATUS_EN|
1574 MCI_STATUS_MISCV|MCI_STATUS_ADDRV|MCI_STATUS_S|
1575 MCI_STATUS_AR|MCACOD_INSTR))
1576 return;
1577
1578 m->mcgstatus |= MCG_STATUS_EIPV;
1579 m->ip = regs->ip;
1580 m->cs = regs->cs;
1581}
1582
1583static int quirk_haswell_noprint(struct mce *m)
1584{
1585 if (m->bank == 0 &&
1586 (m->status & 0xa0000000ffffffff) == 0x80000000000f0005)
1587 return 1;
1588
1589 return 0;
1590}
1591
1592
1593static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
1594{
1595 struct mca_config *cfg = &mca_cfg;
1596
1597 if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
1598 pr_info("unknown CPU type - not enabling MCE support\n");
1599 return -EOPNOTSUPP;
1600 }
1601
1602
1603 if (c->x86_vendor == X86_VENDOR_AMD) {
1604 if (c->x86 == 15 && cfg->banks > 4) {
1605
1606
1607
1608
1609
1610 clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
1611 }
1612 if (c->x86 <= 17 && cfg->bootlog < 0) {
1613
1614
1615
1616
1617 cfg->bootlog = 0;
1618 }
1619
1620
1621
1622
1623 if (c->x86 == 6 && cfg->banks > 0)
1624 mce_banks[0].ctl = 0;
1625
1626
1627
1628
1629
1630 if (c->x86 == 0x15 && c->x86_model <= 0xf)
1631 mce_flags.overflow_recov = 1;
1632
1633
1634
1635
1636
1637 if (c->x86 == 0x15 &&
1638 (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) {
1639 int i;
1640 u64 hwcr;
1641 bool need_toggle;
1642 u32 msrs[] = {
1643 0x00000413,
1644 0xc0000408,
1645 };
1646
1647 rdmsrl(MSR_K7_HWCR, hwcr);
1648
1649
1650 need_toggle = !(hwcr & BIT(18));
1651
1652 if (need_toggle)
1653 wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
1654
1655
1656 for (i = 0; i < ARRAY_SIZE(msrs); i++)
1657 msr_clear_bit(msrs[i], 62);
1658
1659
1660 if (need_toggle)
1661 wrmsrl(MSR_K7_HWCR, hwcr);
1662 }
1663 }
1664
1665 if (c->x86_vendor == X86_VENDOR_INTEL) {
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675 if (c->x86 == 6 && c->x86_model < 0x1A && cfg->banks > 0)
1676 mce_banks[0].init = 0;
1677
1678
1679
1680
1681
1682 if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) &&
1683 cfg->monarch_timeout < 0)
1684 cfg->monarch_timeout = USEC_PER_SEC;
1685
1686
1687
1688
1689
1690 if (c->x86 == 6 && c->x86_model <= 13 && cfg->bootlog < 0)
1691 cfg->bootlog = 0;
1692
1693 if (c->x86 == 6 && c->x86_model == 45)
1694 quirk_no_way_out = quirk_sandybridge_ifu;
1695
1696 if (c->x86 == 6) {
1697 switch (c->x86_model) {
1698 case 0x3c:
1699 case 0x3d:
1700 case 0x45:
1701 case 0x46:
1702 pr_info("Detected Haswell CPU. MCE quirk HSD131, HSM142, HSW131, BDM48, or HSM142 enabled.\n");
1703 quirk_noprint = quirk_haswell_noprint;
1704 break;
1705 }
1706 }
1707 }
1708 if (cfg->monarch_timeout < 0)
1709 cfg->monarch_timeout = 0;
1710 if (cfg->bootlog != 0)
1711 cfg->panic_timeout = 30;
1712
1713 return 0;
1714}
1715
1716static int __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
1717{
1718 if (c->x86 != 5)
1719 return 0;
1720
1721 switch (c->x86_vendor) {
1722 case X86_VENDOR_INTEL:
1723 intel_p5_mcheck_init(c);
1724 return 1;
1725 break;
1726 case X86_VENDOR_CENTAUR:
1727 winchip_mcheck_init(c);
1728 return 1;
1729 break;
1730 default:
1731 return 0;
1732 }
1733
1734 return 0;
1735}
1736
1737static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
1738{
1739 switch (c->x86_vendor) {
1740 case X86_VENDOR_INTEL:
1741 mce_intel_feature_init(c);
1742 mce_adjust_timer = cmci_intel_adjust_timer;
1743 break;
1744
1745 case X86_VENDOR_AMD: {
1746 mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV);
1747 mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR);
1748 mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA);
1749
1750
1751
1752
1753 if (mce_flags.smca) {
1754 msr_ops.ctl = smca_ctl_reg;
1755 msr_ops.status = smca_status_reg;
1756 msr_ops.addr = smca_addr_reg;
1757 msr_ops.misc = smca_misc_reg;
1758 }
1759 mce_amd_feature_init(c);
1760
1761 break;
1762 }
1763
1764 default:
1765 break;
1766 }
1767}
1768
1769static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
1770{
1771 switch (c->x86_vendor) {
1772 case X86_VENDOR_INTEL:
1773 mce_intel_feature_clear(c);
1774 break;
1775 default:
1776 break;
1777 }
1778}
1779
1780static void mce_start_timer(unsigned int cpu, struct timer_list *t)
1781{
1782 unsigned long iv = check_interval * HZ;
1783
1784 if (mca_cfg.ignore_ce || !iv)
1785 return;
1786
1787 per_cpu(mce_next_interval, cpu) = iv;
1788
1789 t->expires = round_jiffies(jiffies + iv);
1790 add_timer_on(t, cpu);
1791}
1792
1793static void __mcheck_cpu_init_timer(void)
1794{
1795 struct timer_list *t = &__get_cpu_var(mce_timer);
1796 unsigned int cpu = smp_processor_id();
1797
1798 setup_timer(t, mce_timer_fn, cpu);
1799 mce_start_timer(cpu, t);
1800}
1801
1802
1803static void unexpected_machine_check(struct pt_regs *regs, long error_code)
1804{
1805 pr_err("CPU#%d: Unexpected int18 (Machine Check)\n",
1806 smp_processor_id());
1807}
1808
1809
1810void (*machine_check_vector)(struct pt_regs *, long error_code) =
1811 unexpected_machine_check;
1812
1813dotraplinkage void do_mce(struct pt_regs *regs, long error_code)
1814{
1815 machine_check_vector(regs, error_code);
1816}
1817
1818
1819
1820
1821
1822void mcheck_cpu_init(struct cpuinfo_x86 *c)
1823{
1824 if (mca_cfg.disabled)
1825 return;
1826
1827 if (__mcheck_cpu_ancient_init(c))
1828 return;
1829
1830 if (!mce_available(c))
1831 return;
1832
1833 if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
1834 mca_cfg.disabled = true;
1835 return;
1836 }
1837
1838 if (mce_gen_pool_init()) {
1839 mca_cfg.disabled = true;
1840 pr_emerg("Couldn't allocate MCE records pool!\n");
1841 return;
1842 }
1843
1844 machine_check_vector = do_machine_check;
1845
1846 __mcheck_cpu_init_generic();
1847 __mcheck_cpu_init_vendor(c);
1848 __mcheck_cpu_init_clear_banks();
1849 __mcheck_cpu_init_timer();
1850}
1851
1852
1853
1854
1855void mcheck_cpu_clear(struct cpuinfo_x86 *c)
1856{
1857 if (mca_cfg.disabled)
1858 return;
1859
1860 if (!mce_available(c))
1861 return;
1862
1863
1864
1865
1866
1867 __mcheck_cpu_clear_vendor(c);
1868
1869}
1870
1871
1872
1873
1874
1875static DEFINE_SPINLOCK(mce_chrdev_state_lock);
1876static int mce_chrdev_open_count;
1877static int mce_chrdev_open_exclu;
1878
1879static int mce_chrdev_open(struct inode *inode, struct file *file)
1880{
1881 spin_lock(&mce_chrdev_state_lock);
1882
1883 if (mce_chrdev_open_exclu ||
1884 (mce_chrdev_open_count && (file->f_flags & O_EXCL))) {
1885 spin_unlock(&mce_chrdev_state_lock);
1886
1887 return -EBUSY;
1888 }
1889
1890 if (file->f_flags & O_EXCL)
1891 mce_chrdev_open_exclu = 1;
1892 mce_chrdev_open_count++;
1893
1894 spin_unlock(&mce_chrdev_state_lock);
1895
1896 return nonseekable_open(inode, file);
1897}
1898
1899static int mce_chrdev_release(struct inode *inode, struct file *file)
1900{
1901 spin_lock(&mce_chrdev_state_lock);
1902
1903 mce_chrdev_open_count--;
1904 mce_chrdev_open_exclu = 0;
1905
1906 spin_unlock(&mce_chrdev_state_lock);
1907
1908 return 0;
1909}
1910
1911static void collect_tscs(void *data)
1912{
1913 unsigned long *cpu_tsc = (unsigned long *)data;
1914
1915 cpu_tsc[smp_processor_id()] = rdtsc();
1916}
1917
1918static int mce_apei_read_done;
1919
1920
1921static int __mce_read_apei(char __user **ubuf, size_t usize)
1922{
1923 int rc;
1924 u64 record_id;
1925 struct mce m;
1926
1927 if (usize < sizeof(struct mce))
1928 return -EINVAL;
1929
1930 rc = apei_read_mce(&m, &record_id);
1931
1932 if (rc <= 0) {
1933 mce_apei_read_done = 1;
1934
1935
1936
1937
1938 if (rc == -ENODEV)
1939 return 0;
1940 return rc;
1941 }
1942 rc = -EFAULT;
1943 if (copy_to_user(*ubuf, &m, sizeof(struct mce)))
1944 return rc;
1945
1946
1947
1948
1949
1950
1951 rc = apei_clear_mce(record_id);
1952 if (rc) {
1953 mce_apei_read_done = 1;
1954 return rc;
1955 }
1956 *ubuf += sizeof(struct mce);
1957
1958 return 0;
1959}
1960
1961static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
1962 size_t usize, loff_t *off)
1963{
1964 char __user *buf = ubuf;
1965 unsigned long *cpu_tsc;
1966 unsigned prev, next;
1967 int i, err;
1968
1969 cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL);
1970 if (!cpu_tsc)
1971 return -ENOMEM;
1972
1973 mutex_lock(&mce_chrdev_read_mutex);
1974
1975 if (!mce_apei_read_done) {
1976 err = __mce_read_apei(&buf, usize);
1977 if (err || buf != ubuf)
1978 goto out;
1979 }
1980
1981 next = mce_log_get_idx_check(mcelog.next);
1982
1983
1984 err = -EINVAL;
1985 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce))
1986 goto out;
1987
1988 err = 0;
1989 prev = 0;
1990 do {
1991 for (i = prev; i < next; i++) {
1992 unsigned long start = jiffies;
1993 struct mce *m = &mcelog.entry[i];
1994
1995 while (!m->finished) {
1996 if (time_after_eq(jiffies, start + 2)) {
1997 memset(m, 0, sizeof(*m));
1998 goto timeout;
1999 }
2000 cpu_relax();
2001 }
2002 smp_rmb();
2003 err |= copy_to_user(buf, m, sizeof(*m));
2004 buf += sizeof(*m);
2005timeout:
2006 ;
2007 }
2008
2009 memset(mcelog.entry + prev, 0,
2010 (next - prev) * sizeof(struct mce));
2011 prev = next;
2012 next = cmpxchg(&mcelog.next, prev, 0);
2013 } while (next != prev);
2014
2015 synchronize_sched();
2016
2017
2018
2019
2020
2021 on_each_cpu(collect_tscs, cpu_tsc, 1);
2022
2023 for (i = next; i < MCE_LOG_LEN; i++) {
2024 struct mce *m = &mcelog.entry[i];
2025
2026 if (m->finished && m->tsc < cpu_tsc[m->cpu]) {
2027 err |= copy_to_user(buf, m, sizeof(*m));
2028 smp_rmb();
2029 buf += sizeof(*m);
2030 memset(m, 0, sizeof(*m));
2031 }
2032 }
2033
2034 if (err)
2035 err = -EFAULT;
2036
2037out:
2038 mutex_unlock(&mce_chrdev_read_mutex);
2039 kfree(cpu_tsc);
2040
2041 return err ? err : buf - ubuf;
2042}
2043
2044static unsigned int mce_chrdev_poll(struct file *file, poll_table *wait)
2045{
2046 poll_wait(file, &mce_chrdev_wait, wait);
2047 if (READ_ONCE(mcelog.next))
2048 return POLLIN | POLLRDNORM;
2049 if (!mce_apei_read_done && apei_check_mce())
2050 return POLLIN | POLLRDNORM;
2051 return 0;
2052}
2053
2054static long mce_chrdev_ioctl(struct file *f, unsigned int cmd,
2055 unsigned long arg)
2056{
2057 int __user *p = (int __user *)arg;
2058
2059 if (!capable(CAP_SYS_ADMIN))
2060 return -EPERM;
2061
2062 switch (cmd) {
2063 case MCE_GET_RECORD_LEN:
2064 return put_user(sizeof(struct mce), p);
2065 case MCE_GET_LOG_LEN:
2066 return put_user(MCE_LOG_LEN, p);
2067 case MCE_GETCLEAR_FLAGS: {
2068 unsigned flags;
2069
2070 do {
2071 flags = mcelog.flags;
2072 } while (cmpxchg(&mcelog.flags, flags, 0) != flags);
2073
2074 return put_user(flags, p);
2075 }
2076 default:
2077 return -ENOTTY;
2078 }
2079}
2080
2081static ssize_t (*mce_write)(struct file *filp, const char __user *ubuf,
2082 size_t usize, loff_t *off);
2083
2084void register_mce_write_callback(ssize_t (*fn)(struct file *filp,
2085 const char __user *ubuf,
2086 size_t usize, loff_t *off))
2087{
2088 mce_write = fn;
2089}
2090EXPORT_SYMBOL_GPL(register_mce_write_callback);
2091
2092static ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf,
2093 size_t usize, loff_t *off)
2094{
2095 if (mce_write)
2096 return mce_write(filp, ubuf, usize, off);
2097 else
2098 return -EINVAL;
2099}
2100
2101static const struct file_operations mce_chrdev_ops = {
2102 .open = mce_chrdev_open,
2103 .release = mce_chrdev_release,
2104 .read = mce_chrdev_read,
2105 .write = mce_chrdev_write,
2106 .poll = mce_chrdev_poll,
2107 .unlocked_ioctl = mce_chrdev_ioctl,
2108 .llseek = no_llseek,
2109};
2110
2111static struct miscdevice mce_chrdev_device = {
2112 MISC_MCELOG_MINOR,
2113 "mcelog",
2114 &mce_chrdev_ops,
2115};
2116
2117static void __mce_disable_bank(void *arg)
2118{
2119 int bank = *((int *)arg);
2120 __clear_bit(bank, __get_cpu_var(mce_poll_banks));
2121 cmci_disable_bank(bank);
2122}
2123
2124void mce_disable_bank(int bank)
2125{
2126 if (bank >= mca_cfg.banks) {
2127 pr_warn(FW_BUG
2128 "Ignoring request to disable invalid MCA bank %d.\n",
2129 bank);
2130 return;
2131 }
2132 set_bit(bank, mce_banks_ce_disabled);
2133 on_each_cpu(__mce_disable_bank, &bank, 1);
2134}
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150static int __init mcheck_enable(char *str)
2151{
2152 struct mca_config *cfg = &mca_cfg;
2153
2154 if (*str == 0) {
2155 enable_p5_mce();
2156 return 1;
2157 }
2158 if (*str == '=')
2159 str++;
2160 if (!strcmp(str, "off"))
2161 cfg->disabled = true;
2162 else if (!strcmp(str, "no_cmci"))
2163 cfg->cmci_disabled = true;
2164 else if (!strcmp(str, "no_lmce"))
2165 cfg->lmce_disabled = true;
2166 else if (!strcmp(str, "dont_log_ce"))
2167 cfg->dont_log_ce = true;
2168 else if (!strcmp(str, "ignore_ce"))
2169 cfg->ignore_ce = true;
2170 else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
2171 cfg->bootlog = (str[0] == 'b');
2172 else if (!strcmp(str, "bios_cmci_threshold"))
2173 cfg->bios_cmci_threshold = true;
2174 else if (!strcmp(str, "recovery"))
2175 cfg->recovery = true;
2176 else if (isdigit(str[0])) {
2177 if (get_option(&str, &cfg->tolerant) == 2)
2178 get_option(&str, &(cfg->monarch_timeout));
2179 } else {
2180 pr_info("mce argument %s ignored. Please use /sys\n", str);
2181 return 0;
2182 }
2183 return 1;
2184}
2185__setup("mce", mcheck_enable);
2186
2187int __init mcheck_init(void)
2188{
2189 mcheck_intel_therm_init();
2190 mce_register_decode_chain(&mce_srao_nb);
2191 mce_register_decode_chain(&mce_default_nb);
2192 mcheck_vendor_init_severity();
2193
2194 INIT_WORK(&mce_work, mce_process_work);
2195 init_irq_work(&mce_irq_work, mce_irq_work_cb);
2196
2197 return 0;
2198}
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208static void mce_disable_error_reporting(void)
2209{
2210 int i;
2211
2212 for (i = 0; i < mca_cfg.banks; i++) {
2213 struct mce_bank *b = &mce_banks[i];
2214
2215 if (b->init)
2216 wrmsrl(msr_ops.ctl(i), 0);
2217 }
2218 return;
2219}
2220
2221static void vendor_disable_error_reporting(void)
2222{
2223
2224
2225
2226
2227
2228
2229 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
2230 return;
2231
2232 mce_disable_error_reporting();
2233}
2234
2235static int mce_syscore_suspend(void)
2236{
2237 vendor_disable_error_reporting();
2238 return 0;
2239}
2240
2241static void mce_syscore_shutdown(void)
2242{
2243 vendor_disable_error_reporting();
2244}
2245
2246
2247
2248
2249
2250
2251static void mce_syscore_resume(void)
2252{
2253 __mcheck_cpu_init_generic();
2254 __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info));
2255 __mcheck_cpu_init_clear_banks();
2256}
2257
2258static struct syscore_ops mce_syscore_ops = {
2259 .suspend = mce_syscore_suspend,
2260 .shutdown = mce_syscore_shutdown,
2261 .resume = mce_syscore_resume,
2262};
2263
2264
2265
2266
2267
2268static void mce_cpu_restart(void *data)
2269{
2270 if (!mce_available(__this_cpu_ptr(&cpu_info)))
2271 return;
2272 __mcheck_cpu_init_generic();
2273 __mcheck_cpu_init_clear_banks();
2274 __mcheck_cpu_init_timer();
2275}
2276
2277
2278static void mce_restart(void)
2279{
2280 mce_timer_delete_all();
2281 on_each_cpu(mce_cpu_restart, NULL, 1);
2282}
2283
2284
2285static void mce_disable_cmci(void *data)
2286{
2287 if (!mce_available(__this_cpu_ptr(&cpu_info)))
2288 return;
2289 cmci_clear();
2290}
2291
2292static void mce_enable_ce(void *all)
2293{
2294 if (!mce_available(__this_cpu_ptr(&cpu_info)))
2295 return;
2296 cmci_reenable();
2297 cmci_recheck();
2298 if (all)
2299 __mcheck_cpu_init_timer();
2300}
2301
2302static struct bus_type mce_subsys = {
2303 .name = "machinecheck",
2304 .dev_name = "machinecheck",
2305};
2306
2307DEFINE_PER_CPU(struct device *, mce_device);
2308
2309void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
2310
2311static inline struct mce_bank *attr_to_bank(struct device_attribute *attr)
2312{
2313 return container_of(attr, struct mce_bank, attr);
2314}
2315
2316static ssize_t show_bank(struct device *s, struct device_attribute *attr,
2317 char *buf)
2318{
2319 return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl);
2320}
2321
2322static ssize_t set_bank(struct device *s, struct device_attribute *attr,
2323 const char *buf, size_t size)
2324{
2325 u64 new;
2326
2327 if (kstrtou64(buf, 0, &new) < 0)
2328 return -EINVAL;
2329
2330 attr_to_bank(attr)->ctl = new;
2331 mce_restart();
2332
2333 return size;
2334}
2335
2336static ssize_t
2337show_trigger(struct device *s, struct device_attribute *attr, char *buf)
2338{
2339 strcpy(buf, mce_helper);
2340 strcat(buf, "\n");
2341 return strlen(mce_helper) + 1;
2342}
2343
2344static ssize_t set_trigger(struct device *s, struct device_attribute *attr,
2345 const char *buf, size_t siz)
2346{
2347 char *p;
2348
2349 strncpy(mce_helper, buf, sizeof(mce_helper));
2350 mce_helper[sizeof(mce_helper)-1] = 0;
2351 p = strchr(mce_helper, '\n');
2352
2353 if (p)
2354 *p = 0;
2355
2356 return strlen(mce_helper) + !!p;
2357}
2358
2359static ssize_t set_ignore_ce(struct device *s,
2360 struct device_attribute *attr,
2361 const char *buf, size_t size)
2362{
2363 u64 new;
2364
2365 if (kstrtou64(buf, 0, &new) < 0)
2366 return -EINVAL;
2367
2368 if (mca_cfg.ignore_ce ^ !!new) {
2369 if (new) {
2370
2371 mce_timer_delete_all();
2372 on_each_cpu(mce_disable_cmci, NULL, 1);
2373 mca_cfg.ignore_ce = true;
2374 } else {
2375
2376 mca_cfg.ignore_ce = false;
2377 on_each_cpu(mce_enable_ce, (void *)1, 1);
2378 }
2379 }
2380 return size;
2381}
2382
2383static ssize_t set_cmci_disabled(struct device *s,
2384 struct device_attribute *attr,
2385 const char *buf, size_t size)
2386{
2387 u64 new;
2388
2389 if (kstrtou64(buf, 0, &new) < 0)
2390 return -EINVAL;
2391
2392 if (mca_cfg.cmci_disabled ^ !!new) {
2393 if (new) {
2394
2395 on_each_cpu(mce_disable_cmci, NULL, 1);
2396 mca_cfg.cmci_disabled = true;
2397 } else {
2398
2399 mca_cfg.cmci_disabled = false;
2400 on_each_cpu(mce_enable_ce, NULL, 1);
2401 }
2402 }
2403 return size;
2404}
2405
2406static ssize_t store_int_with_restart(struct device *s,
2407 struct device_attribute *attr,
2408 const char *buf, size_t size)
2409{
2410 ssize_t ret = device_store_int(s, attr, buf, size);
2411 mce_restart();
2412 return ret;
2413}
2414
2415static DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger);
2416static DEVICE_INT_ATTR(tolerant, 0644, mca_cfg.tolerant);
2417static DEVICE_INT_ATTR(monarch_timeout, 0644, mca_cfg.monarch_timeout);
2418static DEVICE_BOOL_ATTR(dont_log_ce, 0644, mca_cfg.dont_log_ce);
2419
2420static struct dev_ext_attribute dev_attr_check_interval = {
2421 __ATTR(check_interval, 0644, device_show_int, store_int_with_restart),
2422 &check_interval
2423};
2424
2425static struct dev_ext_attribute dev_attr_ignore_ce = {
2426 __ATTR(ignore_ce, 0644, device_show_bool, set_ignore_ce),
2427 &mca_cfg.ignore_ce
2428};
2429
2430static struct dev_ext_attribute dev_attr_cmci_disabled = {
2431 __ATTR(cmci_disabled, 0644, device_show_bool, set_cmci_disabled),
2432 &mca_cfg.cmci_disabled
2433};
2434
2435static struct device_attribute *mce_device_attrs[] = {
2436 &dev_attr_tolerant.attr,
2437 &dev_attr_check_interval.attr,
2438 &dev_attr_trigger,
2439 &dev_attr_monarch_timeout.attr,
2440 &dev_attr_dont_log_ce.attr,
2441 &dev_attr_ignore_ce.attr,
2442 &dev_attr_cmci_disabled.attr,
2443 NULL
2444};
2445
2446static cpumask_var_t mce_device_initialized;
2447
2448static void mce_device_release(struct device *dev)
2449{
2450 kfree(dev);
2451}
2452
2453
2454static int mce_device_create(unsigned int cpu)
2455{
2456 struct device *dev;
2457 int err;
2458 int i, j;
2459
2460 if (!mce_available(&boot_cpu_data))
2461 return -EIO;
2462
2463 dev = kzalloc(sizeof *dev, GFP_KERNEL);
2464 if (!dev)
2465 return -ENOMEM;
2466 dev->id = cpu;
2467 dev->bus = &mce_subsys;
2468 dev->release = &mce_device_release;
2469
2470 err = device_register(dev);
2471 if (err) {
2472 put_device(dev);
2473 return err;
2474 }
2475
2476 for (i = 0; mce_device_attrs[i]; i++) {
2477 err = device_create_file(dev, mce_device_attrs[i]);
2478 if (err)
2479 goto error;
2480 }
2481 for (j = 0; j < mca_cfg.banks; j++) {
2482 err = device_create_file(dev, &mce_banks[j].attr);
2483 if (err)
2484 goto error2;
2485 }
2486 cpumask_set_cpu(cpu, mce_device_initialized);
2487 per_cpu(mce_device, cpu) = dev;
2488
2489 return 0;
2490error2:
2491 while (--j >= 0)
2492 device_remove_file(dev, &mce_banks[j].attr);
2493error:
2494 while (--i >= 0)
2495 device_remove_file(dev, mce_device_attrs[i]);
2496
2497 device_unregister(dev);
2498
2499 return err;
2500}
2501
2502static void mce_device_remove(unsigned int cpu)
2503{
2504 struct device *dev = per_cpu(mce_device, cpu);
2505 int i;
2506
2507 if (!cpumask_test_cpu(cpu, mce_device_initialized))
2508 return;
2509
2510 for (i = 0; mce_device_attrs[i]; i++)
2511 device_remove_file(dev, mce_device_attrs[i]);
2512
2513 for (i = 0; i < mca_cfg.banks; i++)
2514 device_remove_file(dev, &mce_banks[i].attr);
2515
2516 device_unregister(dev);
2517 cpumask_clear_cpu(cpu, mce_device_initialized);
2518 per_cpu(mce_device, cpu) = NULL;
2519}
2520
2521
2522static void mce_disable_cpu(void *h)
2523{
2524 unsigned long action = *(unsigned long *)h;
2525
2526 if (!mce_available(__this_cpu_ptr(&cpu_info)))
2527 return;
2528
2529 if (!(action & CPU_TASKS_FROZEN))
2530 cmci_clear();
2531
2532 vendor_disable_error_reporting();
2533}
2534
2535static void mce_reenable_cpu(void *h)
2536{
2537 unsigned long action = *(unsigned long *)h;
2538 int i;
2539
2540 if (!mce_available(__this_cpu_ptr(&cpu_info)))
2541 return;
2542
2543 if (!(action & CPU_TASKS_FROZEN))
2544 cmci_reenable();
2545 for (i = 0; i < mca_cfg.banks; i++) {
2546 struct mce_bank *b = &mce_banks[i];
2547
2548 if (b->init)
2549 wrmsrl(msr_ops.ctl(i), b->ctl);
2550 }
2551}
2552
2553
2554static int
2555mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
2556{
2557 unsigned int cpu = (unsigned long)hcpu;
2558 struct timer_list *t = &per_cpu(mce_timer, cpu);
2559
2560 switch (action & ~CPU_TASKS_FROZEN) {
2561 case CPU_ONLINE:
2562 mce_device_create(cpu);
2563 if (threshold_cpu_callback)
2564 threshold_cpu_callback(action, cpu);
2565 break;
2566 case CPU_DEAD:
2567 if (threshold_cpu_callback)
2568 threshold_cpu_callback(action, cpu);
2569 mce_device_remove(cpu);
2570 mce_intel_hcpu_update(cpu);
2571 break;
2572 case CPU_DOWN_PREPARE:
2573 smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
2574 del_timer_sync(t);
2575 break;
2576 case CPU_DOWN_FAILED:
2577 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
2578 mce_start_timer(cpu, t);
2579 break;
2580 }
2581
2582 if (action == CPU_POST_DEAD) {
2583
2584 cmci_rediscover();
2585 }
2586
2587 return NOTIFY_OK;
2588}
2589
2590static struct notifier_block mce_cpu_notifier = {
2591 .notifier_call = mce_cpu_callback,
2592};
2593
2594static __init void mce_init_banks(void)
2595{
2596 int i;
2597
2598 for (i = 0; i < mca_cfg.banks; i++) {
2599 struct mce_bank *b = &mce_banks[i];
2600 struct device_attribute *a = &b->attr;
2601
2602 sysfs_attr_init(&a->attr);
2603 a->attr.name = b->attrname;
2604 snprintf(b->attrname, ATTR_LEN, "bank%d", i);
2605
2606 a->attr.mode = 0644;
2607 a->show = show_bank;
2608 a->store = set_bank;
2609 }
2610}
2611
2612static __init int mcheck_init_device(void)
2613{
2614 int err;
2615 int i = 0;
2616
2617 if (!mce_available(&boot_cpu_data)) {
2618 err = -EIO;
2619 goto err_out;
2620 }
2621
2622 if (!zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL)) {
2623 err = -ENOMEM;
2624 goto err_out;
2625 }
2626
2627 mce_init_banks();
2628
2629 err = subsys_system_register(&mce_subsys, NULL);
2630 if (err)
2631 goto err_out_mem;
2632
2633 cpu_notifier_register_begin();
2634 for_each_online_cpu(i) {
2635 err = mce_device_create(i);
2636 if (err) {
2637
2638
2639
2640
2641
2642 __register_hotcpu_notifier(&mce_cpu_notifier);
2643 cpu_notifier_register_done();
2644 goto err_device_create;
2645 }
2646 }
2647
2648 __register_hotcpu_notifier(&mce_cpu_notifier);
2649 cpu_notifier_register_done();
2650
2651 register_syscore_ops(&mce_syscore_ops);
2652
2653
2654 err = misc_register(&mce_chrdev_device);
2655 if (err)
2656 goto err_register;
2657
2658 return 0;
2659
2660err_register:
2661 unregister_syscore_ops(&mce_syscore_ops);
2662
2663err_device_create:
2664
2665
2666
2667
2668
2669
2670 for_each_possible_cpu(i)
2671 mce_device_remove(i);
2672
2673err_out_mem:
2674 free_cpumask_var(mce_device_initialized);
2675
2676err_out:
2677 pr_err("Unable to init device /dev/mcelog (rc: %d)\n", err);
2678
2679 return err;
2680}
2681device_initcall_sync(mcheck_init_device);
2682
2683
2684
2685
2686static int __init mcheck_disable(char *str)
2687{
2688 mca_cfg.disabled = true;
2689 return 1;
2690}
2691__setup("nomce", mcheck_disable);
2692
2693#ifdef CONFIG_DEBUG_FS
2694struct dentry *mce_get_debugfs_dir(void)
2695{
2696 static struct dentry *dmce;
2697
2698 if (!dmce)
2699 dmce = debugfs_create_dir("mce", NULL);
2700
2701 return dmce;
2702}
2703
2704static void mce_reset(void)
2705{
2706 cpu_missing = 0;
2707 atomic_set(&mce_fake_panicked, 0);
2708 atomic_set(&mce_executing, 0);
2709 atomic_set(&mce_callin, 0);
2710 atomic_set(&global_nwo, 0);
2711}
2712
2713static int fake_panic_get(void *data, u64 *val)
2714{
2715 *val = fake_panic;
2716 return 0;
2717}
2718
2719static int fake_panic_set(void *data, u64 val)
2720{
2721 mce_reset();
2722 fake_panic = val;
2723 return 0;
2724}
2725
2726DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get,
2727 fake_panic_set, "%llu\n");
2728
2729static int __init mcheck_debugfs_init(void)
2730{
2731 struct dentry *dmce, *ffake_panic;
2732
2733 dmce = mce_get_debugfs_dir();
2734 if (!dmce)
2735 return -ENOMEM;
2736 ffake_panic = debugfs_create_file("fake_panic", 0444, dmce, NULL,
2737 &fake_panic_fops);
2738 if (!ffake_panic)
2739 return -ENOMEM;
2740
2741 return 0;
2742}
2743#else
2744static int __init mcheck_debugfs_init(void) { return -EINVAL; }
2745#endif
2746
2747struct static_key mcsafe_key = STATIC_KEY_INIT_FALSE;
2748EXPORT_SYMBOL_GPL(mcsafe_key);
2749
2750static int __init mcheck_late_init(void)
2751{
2752 pr_info("Using %d MCE banks\n", mca_cfg.banks);
2753
2754 if (mca_cfg.recovery)
2755 static_key_slow_inc(&mcsafe_key);
2756
2757 mcheck_debugfs_init();
2758
2759
2760
2761
2762
2763 mce_schedule_work();
2764
2765 return 0;
2766}
2767late_initcall(mcheck_late_init);
2768