1
2
3
4
5
6
7
8
9
10
11
12#include <linux/thread_info.h>
13#include <linux/capability.h>
14#include <linux/miscdevice.h>
15#include <linux/ratelimit.h>
16#include <linux/rcupdate.h>
17#include <linux/kobject.h>
18#include <linux/uaccess.h>
19#include <linux/kdebug.h>
20#include <linux/kernel.h>
21#include <linux/percpu.h>
22#include <linux/string.h>
23#include <linux/device.h>
24#include <linux/syscore_ops.h>
25#include <linux/delay.h>
26#include <linux/ctype.h>
27#include <linux/sched.h>
28#include <linux/sysfs.h>
29#include <linux/types.h>
30#include <linux/slab.h>
31#include <linux/init.h>
32#include <linux/kmod.h>
33#include <linux/poll.h>
34#include <linux/nmi.h>
35#include <linux/cpu.h>
36#include <linux/ras.h>
37#include <linux/smp.h>
38#include <linux/fs.h>
39#include <linux/mm.h>
40#include <linux/debugfs.h>
41#include <linux/irq_work.h>
42#include <linux/export.h>
43#include <linux/set_memory.h>
44#include <linux/sync_core.h>
45#include <linux/task_work.h>
46#include <linux/hardirq.h>
47
48#include <asm/intel-family.h>
49#include <asm/processor.h>
50#include <asm/traps.h>
51#include <asm/tlbflush.h>
52#include <asm/mce.h>
53#include <asm/msr.h>
54#include <asm/reboot.h>
55
56#include "internal.h"
57
58
59static DEFINE_MUTEX(mce_sysfs_mutex);
60
61#define CREATE_TRACE_POINTS
62#include <trace/events/mce.h>
63
64#define SPINUNIT 100
65
66DEFINE_PER_CPU(unsigned, mce_exception_count);
67
68DEFINE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks);
69
70struct mce_bank {
71 u64 ctl;
72 bool init;
73};
74static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);
75
76#define ATTR_LEN 16
77
78struct mce_bank_dev {
79 struct device_attribute attr;
80 char attrname[ATTR_LEN];
81 u8 bank;
82};
83static struct mce_bank_dev mce_bank_devs[MAX_NR_BANKS];
84
85struct mce_vendor_flags mce_flags __read_mostly;
86
87struct mca_config mca_cfg __read_mostly = {
88 .bootlog = -1,
89 .monarch_timeout = -1
90};
91
92static DEFINE_PER_CPU(struct mce, mces_seen);
93static unsigned long mce_need_notify;
94
95
96
97
98
99DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
100 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
101};
102
103
104
105
106
107
108
109
110mce_banks_t mce_banks_ce_disabled;
111
112static struct work_struct mce_work;
113static struct irq_work mce_irq_work;
114
115
116
117
118
119BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain);
120
121
122void mce_setup(struct mce *m)
123{
124 memset(m, 0, sizeof(struct mce));
125 m->cpu = m->extcpu = smp_processor_id();
126
127 m->time = __ktime_get_real_seconds();
128 m->cpuvendor = boot_cpu_data.x86_vendor;
129 m->cpuid = cpuid_eax(1);
130 m->socketid = cpu_data(m->extcpu).phys_proc_id;
131 m->apicid = cpu_data(m->extcpu).initial_apicid;
132 m->mcgcap = __rdmsr(MSR_IA32_MCG_CAP);
133 m->ppin = cpu_data(m->extcpu).ppin;
134 m->microcode = boot_cpu_data.microcode;
135}
136
137DEFINE_PER_CPU(struct mce, injectm);
138EXPORT_PER_CPU_SYMBOL_GPL(injectm);
139
140void mce_log(struct mce *m)
141{
142 if (!mce_gen_pool_add(m))
143 irq_work_queue(&mce_irq_work);
144}
145EXPORT_SYMBOL_GPL(mce_log);
146
147void mce_register_decode_chain(struct notifier_block *nb)
148{
149 if (WARN_ON(nb->priority < MCE_PRIO_LOWEST ||
150 nb->priority > MCE_PRIO_HIGHEST))
151 return;
152
153 blocking_notifier_chain_register(&x86_mce_decoder_chain, nb);
154}
155EXPORT_SYMBOL_GPL(mce_register_decode_chain);
156
157void mce_unregister_decode_chain(struct notifier_block *nb)
158{
159 blocking_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
160}
161EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
162
163static void __print_mce(struct mce *m)
164{
165 pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n",
166 m->extcpu,
167 (m->mcgstatus & MCG_STATUS_MCIP ? " Exception" : ""),
168 m->mcgstatus, m->bank, m->status);
169
170 if (m->ip) {
171 pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ",
172 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
173 m->cs, m->ip);
174
175 if (m->cs == __KERNEL_CS)
176 pr_cont("{%pS}", (void *)(unsigned long)m->ip);
177 pr_cont("\n");
178 }
179
180 pr_emerg(HW_ERR "TSC %llx ", m->tsc);
181 if (m->addr)
182 pr_cont("ADDR %llx ", m->addr);
183 if (m->misc)
184 pr_cont("MISC %llx ", m->misc);
185 if (m->ppin)
186 pr_cont("PPIN %llx ", m->ppin);
187
188 if (mce_flags.smca) {
189 if (m->synd)
190 pr_cont("SYND %llx ", m->synd);
191 if (m->ipid)
192 pr_cont("IPID %llx ", m->ipid);
193 }
194
195 pr_cont("\n");
196
197
198
199
200
201 pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n",
202 m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
203 m->microcode);
204}
205
206static void print_mce(struct mce *m)
207{
208 __print_mce(m);
209
210 if (m->cpuvendor != X86_VENDOR_AMD && m->cpuvendor != X86_VENDOR_HYGON)
211 pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
212}
213
214#define PANIC_TIMEOUT 5
215
216static atomic_t mce_panicked;
217
218static int fake_panic;
219static atomic_t mce_fake_panicked;
220
221
222static void wait_for_panic(void)
223{
224 long timeout = PANIC_TIMEOUT*USEC_PER_SEC;
225
226 preempt_disable();
227 local_irq_enable();
228 while (timeout-- > 0)
229 udelay(1);
230 if (panic_timeout == 0)
231 panic_timeout = mca_cfg.panic_timeout;
232 panic("Panicing machine check CPU died");
233}
234
235static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
236{
237 struct llist_node *pending;
238 struct mce_evt_llist *l;
239 int apei_err = 0;
240
241
242
243
244
245 instrumentation_begin();
246
247 if (!fake_panic) {
248
249
250
251 if (atomic_inc_return(&mce_panicked) > 1)
252 wait_for_panic();
253 barrier();
254
255 bust_spinlocks(1);
256 console_verbose();
257 } else {
258
259 if (atomic_inc_return(&mce_fake_panicked) > 1)
260 goto out;
261 }
262 pending = mce_gen_pool_prepare_records();
263
264 llist_for_each_entry(l, pending, llnode) {
265 struct mce *m = &l->mce;
266 if (!(m->status & MCI_STATUS_UC)) {
267 print_mce(m);
268 if (!apei_err)
269 apei_err = apei_write_mce(m);
270 }
271 }
272
273 llist_for_each_entry(l, pending, llnode) {
274 struct mce *m = &l->mce;
275 if (!(m->status & MCI_STATUS_UC))
276 continue;
277 if (!final || mce_cmp(m, final)) {
278 print_mce(m);
279 if (!apei_err)
280 apei_err = apei_write_mce(m);
281 }
282 }
283 if (final) {
284 print_mce(final);
285 if (!apei_err)
286 apei_err = apei_write_mce(final);
287 }
288 if (exp)
289 pr_emerg(HW_ERR "Machine check: %s\n", exp);
290 if (!fake_panic) {
291 if (panic_timeout == 0)
292 panic_timeout = mca_cfg.panic_timeout;
293 panic(msg);
294 } else
295 pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg);
296
297out:
298 instrumentation_end();
299}
300
301
302
303static int msr_to_offset(u32 msr)
304{
305 unsigned bank = __this_cpu_read(injectm.bank);
306
307 if (msr == mca_cfg.rip_msr)
308 return offsetof(struct mce, ip);
309 if (msr == mca_msr_reg(bank, MCA_STATUS))
310 return offsetof(struct mce, status);
311 if (msr == mca_msr_reg(bank, MCA_ADDR))
312 return offsetof(struct mce, addr);
313 if (msr == mca_msr_reg(bank, MCA_MISC))
314 return offsetof(struct mce, misc);
315 if (msr == MSR_IA32_MCG_STATUS)
316 return offsetof(struct mce, mcgstatus);
317 return -1;
318}
319
320void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr)
321{
322 if (wrmsr) {
323 pr_emerg("MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
324 (unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax,
325 regs->ip, (void *)regs->ip);
326 } else {
327 pr_emerg("MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
328 (unsigned int)regs->cx, regs->ip, (void *)regs->ip);
329 }
330
331 show_stack_regs(regs);
332
333 panic("MCA architectural violation!\n");
334
335 while (true)
336 cpu_relax();
337}
338
339
340noinstr u64 mce_rdmsrl(u32 msr)
341{
342 DECLARE_ARGS(val, low, high);
343
344 if (__this_cpu_read(injectm.finished)) {
345 int offset;
346 u64 ret;
347
348 instrumentation_begin();
349
350 offset = msr_to_offset(msr);
351 if (offset < 0)
352 ret = 0;
353 else
354 ret = *(u64 *)((char *)this_cpu_ptr(&injectm) + offset);
355
356 instrumentation_end();
357
358 return ret;
359 }
360
361
362
363
364
365
366 asm volatile("1: rdmsr\n"
367 "2:\n"
368 _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_RDMSR_IN_MCE)
369 : EAX_EDX_RET(val, low, high) : "c" (msr));
370
371
372 return EAX_EDX_VAL(val, low, high);
373}
374
375static noinstr void mce_wrmsrl(u32 msr, u64 v)
376{
377 u32 low, high;
378
379 if (__this_cpu_read(injectm.finished)) {
380 int offset;
381
382 instrumentation_begin();
383
384 offset = msr_to_offset(msr);
385 if (offset >= 0)
386 *(u64 *)((char *)this_cpu_ptr(&injectm) + offset) = v;
387
388 instrumentation_end();
389
390 return;
391 }
392
393 low = (u32)v;
394 high = (u32)(v >> 32);
395
396
397 asm volatile("1: wrmsr\n"
398 "2:\n"
399 _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR_IN_MCE)
400 : : "c" (msr), "a"(low), "d" (high) : "memory");
401}
402
403
404
405
406
407
408static noinstr void mce_gather_info(struct mce *m, struct pt_regs *regs)
409{
410
411
412
413
414 instrumentation_begin();
415 mce_setup(m);
416 instrumentation_end();
417
418 m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
419 if (regs) {
420
421
422
423
424 if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) {
425 m->ip = regs->ip;
426 m->cs = regs->cs;
427
428
429
430
431
432
433 if (v8086_mode(regs))
434 m->cs |= 3;
435 }
436
437 if (mca_cfg.rip_msr)
438 m->ip = mce_rdmsrl(mca_cfg.rip_msr);
439 }
440}
441
442int mce_available(struct cpuinfo_x86 *c)
443{
444 if (mca_cfg.disabled)
445 return 0;
446 return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
447}
448
449static void mce_schedule_work(void)
450{
451 if (!mce_gen_pool_empty())
452 schedule_work(&mce_work);
453}
454
455static void mce_irq_work_cb(struct irq_work *entry)
456{
457 mce_schedule_work();
458}
459
460
461
462
463
464
465
466int mce_usable_address(struct mce *m)
467{
468 if (!(m->status & MCI_STATUS_ADDRV))
469 return 0;
470
471
472 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&
473 boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)
474 return 1;
475
476 if (!(m->status & MCI_STATUS_MISCV))
477 return 0;
478
479 if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
480 return 0;
481
482 if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
483 return 0;
484
485 return 1;
486}
487EXPORT_SYMBOL_GPL(mce_usable_address);
488
489bool mce_is_memory_error(struct mce *m)
490{
491 switch (m->cpuvendor) {
492 case X86_VENDOR_AMD:
493 case X86_VENDOR_HYGON:
494 return amd_mce_is_memory_error(m);
495
496 case X86_VENDOR_INTEL:
497 case X86_VENDOR_ZHAOXIN:
498
499
500
501
502
503
504
505
506
507
508
509
510
511 return (m->status & 0xef80) == BIT(7) ||
512 (m->status & 0xef00) == BIT(8) ||
513 (m->status & 0xeffc) == 0xc;
514
515 default:
516 return false;
517 }
518}
519EXPORT_SYMBOL_GPL(mce_is_memory_error);
520
521static bool whole_page(struct mce *m)
522{
523 if (!mca_cfg.ser || !(m->status & MCI_STATUS_MISCV))
524 return true;
525
526 return MCI_MISC_ADDR_LSB(m->misc) >= PAGE_SHIFT;
527}
528
529bool mce_is_correctable(struct mce *m)
530{
531 if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED)
532 return false;
533
534 if (m->cpuvendor == X86_VENDOR_HYGON && m->status & MCI_STATUS_DEFERRED)
535 return false;
536
537 if (m->status & MCI_STATUS_UC)
538 return false;
539
540 return true;
541}
542EXPORT_SYMBOL_GPL(mce_is_correctable);
543
544static int mce_early_notifier(struct notifier_block *nb, unsigned long val,
545 void *data)
546{
547 struct mce *m = (struct mce *)data;
548
549 if (!m)
550 return NOTIFY_DONE;
551
552
553 trace_mce_record(m);
554
555 set_bit(0, &mce_need_notify);
556
557 mce_notify_irq();
558
559 return NOTIFY_DONE;
560}
561
562static struct notifier_block early_nb = {
563 .notifier_call = mce_early_notifier,
564 .priority = MCE_PRIO_EARLY,
565};
566
567static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
568 void *data)
569{
570 struct mce *mce = (struct mce *)data;
571 unsigned long pfn;
572
573 if (!mce || !mce_usable_address(mce))
574 return NOTIFY_DONE;
575
576 if (mce->severity != MCE_AO_SEVERITY &&
577 mce->severity != MCE_DEFERRED_SEVERITY)
578 return NOTIFY_DONE;
579
580 pfn = mce->addr >> PAGE_SHIFT;
581 if (!memory_failure(pfn, 0)) {
582 set_mce_nospec(pfn, whole_page(mce));
583 mce->kflags |= MCE_HANDLED_UC;
584 }
585
586 return NOTIFY_OK;
587}
588
589static struct notifier_block mce_uc_nb = {
590 .notifier_call = uc_decode_notifier,
591 .priority = MCE_PRIO_UC,
592};
593
594static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
595 void *data)
596{
597 struct mce *m = (struct mce *)data;
598
599 if (!m)
600 return NOTIFY_DONE;
601
602 if (mca_cfg.print_all || !m->kflags)
603 __print_mce(m);
604
605 return NOTIFY_DONE;
606}
607
608static struct notifier_block mce_default_nb = {
609 .notifier_call = mce_default_notifier,
610
611 .priority = MCE_PRIO_LOWEST,
612};
613
614
615
616
617static noinstr void mce_read_aux(struct mce *m, int i)
618{
619 if (m->status & MCI_STATUS_MISCV)
620 m->misc = mce_rdmsrl(mca_msr_reg(i, MCA_MISC));
621
622 if (m->status & MCI_STATUS_ADDRV) {
623 m->addr = mce_rdmsrl(mca_msr_reg(i, MCA_ADDR));
624
625
626
627
628 if (mca_cfg.ser && (m->status & MCI_STATUS_MISCV)) {
629 u8 shift = MCI_MISC_ADDR_LSB(m->misc);
630 m->addr >>= shift;
631 m->addr <<= shift;
632 }
633
634
635
636
637
638 if (mce_flags.smca) {
639 u8 lsb = (m->addr >> 56) & 0x3f;
640
641 m->addr &= GENMASK_ULL(55, lsb);
642 }
643 }
644
645 if (mce_flags.smca) {
646 m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i));
647
648 if (m->status & MCI_STATUS_SYNDV)
649 m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
650 }
651}
652
653DEFINE_PER_CPU(unsigned, mce_poll_count);
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
671{
672 struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
673 bool error_seen = false;
674 struct mce m;
675 int i;
676
677 this_cpu_inc(mce_poll_count);
678
679 mce_gather_info(&m, NULL);
680
681 if (flags & MCP_TIMESTAMP)
682 m.tsc = rdtsc();
683
684 for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
685 if (!mce_banks[i].ctl || !test_bit(i, *b))
686 continue;
687
688 m.misc = 0;
689 m.addr = 0;
690 m.bank = i;
691
692 barrier();
693 m.status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
694
695
696 if (!(m.status & MCI_STATUS_VAL))
697 continue;
698
699
700
701
702
703 if ((flags & MCP_UC) || !(m.status & MCI_STATUS_UC))
704 goto log_it;
705
706
707
708
709
710
711
712 if (!mca_cfg.ser) {
713 if (m.status & MCI_STATUS_UC)
714 continue;
715 goto log_it;
716 }
717
718
719 if (!(m.status & MCI_STATUS_EN))
720 goto log_it;
721
722
723
724
725
726 if (!(m.status & MCI_STATUS_PCC) && !(m.status & MCI_STATUS_S))
727 goto log_it;
728
729
730
731
732
733
734 continue;
735
736log_it:
737 error_seen = true;
738
739 if (flags & MCP_DONTLOG)
740 goto clear_it;
741
742 mce_read_aux(&m, i);
743 m.severity = mce_severity(&m, NULL, NULL, false);
744
745
746
747
748
749 if (mca_cfg.dont_log_ce && !mce_usable_address(&m))
750 goto clear_it;
751
752 if (flags & MCP_QUEUE_LOG)
753 mce_gen_pool_add(&m);
754 else
755 mce_log(&m);
756
757clear_it:
758
759
760
761 mce_wrmsrl(mca_msr_reg(i, MCA_STATUS), 0);
762 }
763
764
765
766
767
768
769 sync_core();
770
771 return error_seen;
772}
773EXPORT_SYMBOL_GPL(machine_check_poll);
774
775
776
777
778
779
780
781
782
783static __always_inline void
784quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
785{
786 if (bank != 0)
787 return;
788 if ((m->mcgstatus & (MCG_STATUS_EIPV|MCG_STATUS_RIPV)) != 0)
789 return;
790 if ((m->status & (MCI_STATUS_OVER|MCI_STATUS_UC|
791 MCI_STATUS_EN|MCI_STATUS_MISCV|MCI_STATUS_ADDRV|
792 MCI_STATUS_PCC|MCI_STATUS_S|MCI_STATUS_AR|
793 MCACOD)) !=
794 (MCI_STATUS_UC|MCI_STATUS_EN|
795 MCI_STATUS_MISCV|MCI_STATUS_ADDRV|MCI_STATUS_S|
796 MCI_STATUS_AR|MCACOD_INSTR))
797 return;
798
799 m->mcgstatus |= MCG_STATUS_EIPV;
800 m->ip = regs->ip;
801 m->cs = regs->cs;
802}
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819static noinstr bool quirk_skylake_repmov(void)
820{
821 u64 mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
822 u64 misc_enable = mce_rdmsrl(MSR_IA32_MISC_ENABLE);
823 u64 mc1_status;
824
825
826
827
828
829 if (!(mcgstatus & MCG_STATUS_LMCES) ||
830 !(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING))
831 return false;
832
833 mc1_status = mce_rdmsrl(MSR_IA32_MCx_STATUS(1));
834
835
836 if ((mc1_status &
837 (MCI_STATUS_VAL | MCI_STATUS_OVER | MCI_STATUS_UC | MCI_STATUS_EN |
838 MCI_STATUS_ADDRV | MCI_STATUS_MISCV | MCI_STATUS_PCC |
839 MCI_STATUS_AR | MCI_STATUS_S)) ==
840 (MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
841 MCI_STATUS_ADDRV | MCI_STATUS_MISCV |
842 MCI_STATUS_AR | MCI_STATUS_S)) {
843 misc_enable &= ~MSR_IA32_MISC_ENABLE_FAST_STRING;
844 mce_wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
845 mce_wrmsrl(MSR_IA32_MCx_STATUS(1), 0);
846
847 instrumentation_begin();
848 pr_err_once("Erratum detected, disable fast string copy instructions.\n");
849 instrumentation_end();
850
851 return true;
852 }
853
854 return false;
855}
856
857
858
859
860
861static __always_inline int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
862 struct pt_regs *regs)
863{
864 char *tmp = *msg;
865 int i;
866
867 for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
868 m->status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
869 if (!(m->status & MCI_STATUS_VAL))
870 continue;
871
872 arch___set_bit(i, validp);
873 if (mce_flags.snb_ifu_quirk)
874 quirk_sandybridge_ifu(i, m, regs);
875
876 m->bank = i;
877 if (mce_severity(m, regs, &tmp, true) >= MCE_PANIC_SEVERITY) {
878 mce_read_aux(m, i);
879 *msg = tmp;
880 return 1;
881 }
882 }
883 return 0;
884}
885
886
887
888
889
890static atomic_t mce_executing;
891
892
893
894
895static atomic_t mce_callin;
896
897
898
899
900
901static cpumask_t mce_missing_cpus = CPU_MASK_ALL;
902
903
904
905
906static noinstr int mce_timed_out(u64 *t, const char *msg)
907{
908 int ret = 0;
909
910
911 instrumentation_begin();
912
913
914
915
916
917
918
919 rmb();
920 if (atomic_read(&mce_panicked))
921 wait_for_panic();
922 if (!mca_cfg.monarch_timeout)
923 goto out;
924 if ((s64)*t < SPINUNIT) {
925 if (cpumask_and(&mce_missing_cpus, cpu_online_mask, &mce_missing_cpus))
926 pr_emerg("CPUs not responding to MCE broadcast (may include false positives): %*pbl\n",
927 cpumask_pr_args(&mce_missing_cpus));
928 mce_panic(msg, NULL, NULL);
929
930 ret = 1;
931 goto out;
932 }
933 *t -= SPINUNIT;
934
935out:
936 touch_nmi_watchdog();
937
938 instrumentation_end();
939
940 return ret;
941}
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967static void mce_reign(void)
968{
969 int cpu;
970 struct mce *m = NULL;
971 int global_worst = 0;
972 char *msg = NULL;
973
974
975
976
977
978
979 for_each_possible_cpu(cpu) {
980 struct mce *mtmp = &per_cpu(mces_seen, cpu);
981
982 if (mtmp->severity > global_worst) {
983 global_worst = mtmp->severity;
984 m = &per_cpu(mces_seen, cpu);
985 }
986 }
987
988
989
990
991
992
993 if (m && global_worst >= MCE_PANIC_SEVERITY) {
994
995 mce_severity(m, NULL, &msg, true);
996 mce_panic("Fatal machine check", m, msg);
997 }
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009 if (global_worst <= MCE_KEEP_SEVERITY)
1010 mce_panic("Fatal machine check from unknown source", NULL, NULL);
1011
1012
1013
1014
1015
1016 for_each_possible_cpu(cpu)
1017 memset(&per_cpu(mces_seen, cpu), 0, sizeof(struct mce));
1018}
1019
1020static atomic_t global_nwo;
1021
1022
1023
1024
1025
1026
1027
1028
1029static noinstr int mce_start(int *no_way_out)
1030{
1031 u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC;
1032 int order, ret = -1;
1033
1034 if (!timeout)
1035 return ret;
1036
1037 arch_atomic_add(*no_way_out, &global_nwo);
1038
1039
1040
1041
1042 order = arch_atomic_inc_return(&mce_callin);
1043 arch_cpumask_clear_cpu(smp_processor_id(), &mce_missing_cpus);
1044
1045
1046 instrumentation_begin();
1047
1048
1049
1050
1051 while (arch_atomic_read(&mce_callin) != num_online_cpus()) {
1052 if (mce_timed_out(&timeout,
1053 "Timeout: Not all CPUs entered broadcast exception handler")) {
1054 arch_atomic_set(&global_nwo, 0);
1055 goto out;
1056 }
1057 ndelay(SPINUNIT);
1058 }
1059
1060
1061
1062
1063 smp_rmb();
1064
1065 if (order == 1) {
1066
1067
1068
1069 arch_atomic_set(&mce_executing, 1);
1070 } else {
1071
1072
1073
1074
1075
1076
1077 while (arch_atomic_read(&mce_executing) < order) {
1078 if (mce_timed_out(&timeout,
1079 "Timeout: Subject CPUs unable to finish machine check processing")) {
1080 arch_atomic_set(&global_nwo, 0);
1081 goto out;
1082 }
1083 ndelay(SPINUNIT);
1084 }
1085 }
1086
1087
1088
1089
1090 *no_way_out = arch_atomic_read(&global_nwo);
1091
1092 ret = order;
1093
1094out:
1095 instrumentation_end();
1096
1097 return ret;
1098}
1099
1100
1101
1102
1103
1104static noinstr int mce_end(int order)
1105{
1106 u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC;
1107 int ret = -1;
1108
1109
1110 instrumentation_begin();
1111
1112 if (!timeout)
1113 goto reset;
1114 if (order < 0)
1115 goto reset;
1116
1117
1118
1119
1120 atomic_inc(&mce_executing);
1121
1122 if (order == 1) {
1123
1124
1125
1126
1127 while (atomic_read(&mce_executing) <= num_online_cpus()) {
1128 if (mce_timed_out(&timeout,
1129 "Timeout: Monarch CPU unable to finish machine check processing"))
1130 goto reset;
1131 ndelay(SPINUNIT);
1132 }
1133
1134 mce_reign();
1135 barrier();
1136 ret = 0;
1137 } else {
1138
1139
1140
1141 while (atomic_read(&mce_executing) != 0) {
1142 if (mce_timed_out(&timeout,
1143 "Timeout: Monarch CPU did not finish machine check processing"))
1144 goto reset;
1145 ndelay(SPINUNIT);
1146 }
1147
1148
1149
1150
1151 ret = 0;
1152 goto out;
1153 }
1154
1155
1156
1157
1158reset:
1159 atomic_set(&global_nwo, 0);
1160 atomic_set(&mce_callin, 0);
1161 cpumask_setall(&mce_missing_cpus);
1162 barrier();
1163
1164
1165
1166
1167 atomic_set(&mce_executing, 0);
1168
1169out:
1170 instrumentation_end();
1171
1172 return ret;
1173}
1174
1175static __always_inline void mce_clear_state(unsigned long *toclear)
1176{
1177 int i;
1178
1179 for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
1180 if (arch_test_bit(i, toclear))
1181 mce_wrmsrl(mca_msr_reg(i, MCA_STATUS), 0);
1182 }
1183}
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197static noinstr bool mce_check_crashing_cpu(void)
1198{
1199 unsigned int cpu = smp_processor_id();
1200
1201 if (arch_cpu_is_offline(cpu) ||
1202 (crashing_cpu != -1 && crashing_cpu != cpu)) {
1203 u64 mcgstatus;
1204
1205 mcgstatus = __rdmsr(MSR_IA32_MCG_STATUS);
1206
1207 if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) {
1208 if (mcgstatus & MCG_STATUS_LMCES)
1209 return false;
1210 }
1211
1212 if (mcgstatus & MCG_STATUS_RIPV) {
1213 __wrmsr(MSR_IA32_MCG_STATUS, 0, 0);
1214 return true;
1215 }
1216 }
1217 return false;
1218}
1219
1220static __always_inline int
1221__mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
1222 unsigned long *toclear, unsigned long *valid_banks, int no_way_out,
1223 int *worst)
1224{
1225 struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
1226 struct mca_config *cfg = &mca_cfg;
1227 int severity, i, taint = 0;
1228
1229 for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
1230 arch___clear_bit(i, toclear);
1231 if (!arch_test_bit(i, valid_banks))
1232 continue;
1233
1234 if (!mce_banks[i].ctl)
1235 continue;
1236
1237 m->misc = 0;
1238 m->addr = 0;
1239 m->bank = i;
1240
1241 m->status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
1242 if (!(m->status & MCI_STATUS_VAL))
1243 continue;
1244
1245
1246
1247
1248
1249 if (!(m->status & (cfg->ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
1250 !no_way_out)
1251 continue;
1252
1253
1254 taint++;
1255
1256 severity = mce_severity(m, regs, NULL, true);
1257
1258
1259
1260
1261
1262 if ((severity == MCE_KEEP_SEVERITY ||
1263 severity == MCE_UCNA_SEVERITY) && !no_way_out)
1264 continue;
1265
1266 arch___set_bit(i, toclear);
1267
1268
1269 if (severity == MCE_NO_SEVERITY)
1270 continue;
1271
1272 mce_read_aux(m, i);
1273
1274
1275 m->severity = severity;
1276
1277
1278
1279
1280
1281 instrumentation_begin();
1282 mce_log(m);
1283 instrumentation_end();
1284
1285 if (severity > *worst) {
1286 *final = *m;
1287 *worst = severity;
1288 }
1289 }
1290
1291
1292 *m = *final;
1293
1294 return taint;
1295}
1296
1297static void kill_me_now(struct callback_head *ch)
1298{
1299 struct task_struct *p = container_of(ch, struct task_struct, mce_kill_me);
1300
1301 p->mce_count = 0;
1302 force_sig(SIGBUS);
1303}
1304
1305static void kill_me_maybe(struct callback_head *cb)
1306{
1307 struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
1308 int flags = MF_ACTION_REQUIRED;
1309 int ret;
1310
1311 p->mce_count = 0;
1312 pr_err("Uncorrected hardware memory error in user-access at %llx", p->mce_addr);
1313
1314 if (!p->mce_ripv)
1315 flags |= MF_MUST_KILL;
1316
1317 ret = memory_failure(p->mce_addr >> PAGE_SHIFT, flags);
1318 if (!ret) {
1319 set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
1320 sync_core();
1321 return;
1322 }
1323
1324
1325
1326
1327
1328
1329
1330
1331 if (ret == -EHWPOISON || ret == -EOPNOTSUPP)
1332 return;
1333
1334 pr_err("Memory error not recovered");
1335 kill_me_now(cb);
1336}
1337
1338static void kill_me_never(struct callback_head *cb)
1339{
1340 struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
1341
1342 p->mce_count = 0;
1343 pr_err("Kernel accessed poison in user space at %llx\n", p->mce_addr);
1344 if (!memory_failure(p->mce_addr >> PAGE_SHIFT, 0))
1345 set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
1346}
1347
1348static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callback_head *))
1349{
1350 int count = ++current->mce_count;
1351
1352
1353 if (count == 1) {
1354 current->mce_addr = m->addr;
1355 current->mce_kflags = m->kflags;
1356 current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV);
1357 current->mce_whole_page = whole_page(m);
1358 current->mce_kill_me.func = func;
1359 }
1360
1361
1362 if (count > 10)
1363 mce_panic("Too many consecutive machine checks while accessing user data", m, msg);
1364
1365
1366 if (count > 1 && (current->mce_addr >> PAGE_SHIFT) != (m->addr >> PAGE_SHIFT))
1367 mce_panic("Consecutive machine checks to different user pages", m, msg);
1368
1369
1370 if (count > 1)
1371 return;
1372
1373 task_work_add(current, ¤t->mce_kill_me, TWA_RESUME);
1374}
1375
1376
1377static noinstr void unexpected_machine_check(struct pt_regs *regs)
1378{
1379 instrumentation_begin();
1380 pr_err("CPU#%d: Unexpected int18 (Machine Check)\n",
1381 smp_processor_id());
1382 instrumentation_end();
1383}
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411noinstr void do_machine_check(struct pt_regs *regs)
1412{
1413 int worst = 0, order, no_way_out, kill_current_task, lmce, taint = 0;
1414 DECLARE_BITMAP(valid_banks, MAX_NR_BANKS) = { 0 };
1415 DECLARE_BITMAP(toclear, MAX_NR_BANKS) = { 0 };
1416 struct mce m, *final;
1417 char *msg = NULL;
1418
1419 if (unlikely(mce_flags.p5))
1420 return pentium_machine_check(regs);
1421 else if (unlikely(mce_flags.winchip))
1422 return winchip_machine_check(regs);
1423 else if (unlikely(!mca_cfg.initialized))
1424 return unexpected_machine_check(regs);
1425
1426 if (mce_flags.skx_repmov_quirk && quirk_skylake_repmov())
1427 goto clear;
1428
1429
1430
1431
1432
1433 order = -1;
1434
1435
1436
1437
1438
1439 no_way_out = 0;
1440
1441
1442
1443
1444
1445 kill_current_task = 0;
1446
1447
1448
1449
1450
1451 lmce = 1;
1452
1453 this_cpu_inc(mce_exception_count);
1454
1455 mce_gather_info(&m, regs);
1456 m.tsc = rdtsc();
1457
1458 final = this_cpu_ptr(&mces_seen);
1459 *final = m;
1460
1461 no_way_out = mce_no_way_out(&m, &msg, valid_banks, regs);
1462
1463 barrier();
1464
1465
1466
1467
1468
1469
1470 if (!(m.mcgstatus & MCG_STATUS_RIPV))
1471 kill_current_task = 1;
1472
1473
1474
1475
1476 if (m.cpuvendor == X86_VENDOR_INTEL ||
1477 m.cpuvendor == X86_VENDOR_ZHAOXIN)
1478 lmce = m.mcgstatus & MCG_STATUS_LMCES;
1479
1480
1481
1482
1483
1484
1485
1486
1487 if (lmce) {
1488 if (no_way_out)
1489 mce_panic("Fatal local machine check", &m, msg);
1490 } else {
1491 order = mce_start(&no_way_out);
1492 }
1493
1494 taint = __mc_scan_banks(&m, regs, final, toclear, valid_banks, no_way_out, &worst);
1495
1496 if (!no_way_out)
1497 mce_clear_state(toclear);
1498
1499
1500
1501
1502
1503 if (!lmce) {
1504 if (mce_end(order) < 0) {
1505 if (!no_way_out)
1506 no_way_out = worst >= MCE_PANIC_SEVERITY;
1507
1508 if (no_way_out)
1509 mce_panic("Fatal machine check on current CPU", &m, msg);
1510 }
1511 } else {
1512
1513
1514
1515
1516
1517
1518
1519
1520 if (worst >= MCE_PANIC_SEVERITY) {
1521 mce_severity(&m, regs, &msg, true);
1522 mce_panic("Local fatal machine check!", &m, msg);
1523 }
1524 }
1525
1526
1527
1528
1529
1530
1531 instrumentation_begin();
1532
1533 if (taint)
1534 add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
1535
1536 if (worst != MCE_AR_SEVERITY && !kill_current_task)
1537 goto out;
1538
1539
1540 if ((m.cs & 3) == 3) {
1541
1542 BUG_ON(!on_thread_stack() || !user_mode(regs));
1543
1544 if (kill_current_task)
1545 queue_task_work(&m, msg, kill_me_now);
1546 else
1547 queue_task_work(&m, msg, kill_me_maybe);
1548
1549 } else {
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559 if (m.kflags & MCE_IN_KERNEL_RECOV) {
1560 if (!fixup_exception(regs, X86_TRAP_MC, 0, 0))
1561 mce_panic("Failed kernel mode recovery", &m, msg);
1562 }
1563
1564 if (m.kflags & MCE_IN_KERNEL_COPYIN)
1565 queue_task_work(&m, msg, kill_me_never);
1566 }
1567
1568out:
1569 instrumentation_end();
1570
1571clear:
1572 mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
1573}
1574EXPORT_SYMBOL_GPL(do_machine_check);
1575
1576#ifndef CONFIG_MEMORY_FAILURE
1577int memory_failure(unsigned long pfn, int flags)
1578{
1579
1580 BUG_ON(flags & MF_ACTION_REQUIRED);
1581 pr_err("Uncorrected memory error in page 0x%lx ignored\n"
1582 "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n",
1583 pfn);
1584
1585 return 0;
1586}
1587#endif
1588
1589
1590
1591
1592
1593
1594static unsigned long check_interval = INITIAL_CHECK_INTERVAL;
1595
1596static DEFINE_PER_CPU(unsigned long, mce_next_interval);
1597static DEFINE_PER_CPU(struct timer_list, mce_timer);
1598
1599static unsigned long mce_adjust_timer_default(unsigned long interval)
1600{
1601 return interval;
1602}
1603
1604static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
1605
1606static void __start_timer(struct timer_list *t, unsigned long interval)
1607{
1608 unsigned long when = jiffies + interval;
1609 unsigned long flags;
1610
1611 local_irq_save(flags);
1612
1613 if (!timer_pending(t) || time_before(when, t->expires))
1614 mod_timer(t, round_jiffies(when));
1615
1616 local_irq_restore(flags);
1617}
1618
1619static void mce_timer_fn(struct timer_list *t)
1620{
1621 struct timer_list *cpu_t = this_cpu_ptr(&mce_timer);
1622 unsigned long iv;
1623
1624 WARN_ON(cpu_t != t);
1625
1626 iv = __this_cpu_read(mce_next_interval);
1627
1628 if (mce_available(this_cpu_ptr(&cpu_info))) {
1629 machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
1630
1631 if (mce_intel_cmci_poll()) {
1632 iv = mce_adjust_timer(iv);
1633 goto done;
1634 }
1635 }
1636
1637
1638
1639
1640
1641 if (mce_notify_irq())
1642 iv = max(iv / 2, (unsigned long) HZ/100);
1643 else
1644 iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
1645
1646done:
1647 __this_cpu_write(mce_next_interval, iv);
1648 __start_timer(t, iv);
1649}
1650
1651
1652
1653
1654void mce_timer_kick(unsigned long interval)
1655{
1656 struct timer_list *t = this_cpu_ptr(&mce_timer);
1657 unsigned long iv = __this_cpu_read(mce_next_interval);
1658
1659 __start_timer(t, interval);
1660
1661 if (interval < iv)
1662 __this_cpu_write(mce_next_interval, interval);
1663}
1664
1665
1666static void mce_timer_delete_all(void)
1667{
1668 int cpu;
1669
1670 for_each_online_cpu(cpu)
1671 del_timer_sync(&per_cpu(mce_timer, cpu));
1672}
1673
1674
1675
1676
1677
1678
1679int mce_notify_irq(void)
1680{
1681
1682 static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
1683
1684 if (test_and_clear_bit(0, &mce_need_notify)) {
1685 mce_work_trigger();
1686
1687 if (__ratelimit(&ratelimit))
1688 pr_info(HW_ERR "Machine check events logged\n");
1689
1690 return 1;
1691 }
1692 return 0;
1693}
1694EXPORT_SYMBOL_GPL(mce_notify_irq);
1695
1696static void __mcheck_cpu_mce_banks_init(void)
1697{
1698 struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
1699 u8 n_banks = this_cpu_read(mce_num_banks);
1700 int i;
1701
1702 for (i = 0; i < n_banks; i++) {
1703 struct mce_bank *b = &mce_banks[i];
1704
1705
1706
1707
1708
1709
1710 b->ctl = -1ULL;
1711 b->init = true;
1712 }
1713}
1714
1715
1716
1717
1718static void __mcheck_cpu_cap_init(void)
1719{
1720 u64 cap;
1721 u8 b;
1722
1723 rdmsrl(MSR_IA32_MCG_CAP, cap);
1724
1725 b = cap & MCG_BANKCNT_MASK;
1726
1727 if (b > MAX_NR_BANKS) {
1728 pr_warn("CPU%d: Using only %u machine check banks out of %u\n",
1729 smp_processor_id(), MAX_NR_BANKS, b);
1730 b = MAX_NR_BANKS;
1731 }
1732
1733 this_cpu_write(mce_num_banks, b);
1734
1735 __mcheck_cpu_mce_banks_init();
1736
1737
1738 if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
1739 mca_cfg.rip_msr = MSR_IA32_MCG_EIP;
1740
1741 if (cap & MCG_SER_P)
1742 mca_cfg.ser = 1;
1743}
1744
1745static void __mcheck_cpu_init_generic(void)
1746{
1747 enum mcp_flags m_fl = 0;
1748 mce_banks_t all_banks;
1749 u64 cap;
1750
1751 if (!mca_cfg.bootlog)
1752 m_fl = MCP_DONTLOG;
1753
1754
1755
1756
1757
1758
1759 bitmap_fill(all_banks, MAX_NR_BANKS);
1760 machine_check_poll(MCP_UC | MCP_QUEUE_LOG | m_fl, &all_banks);
1761
1762 cr4_set_bits(X86_CR4_MCE);
1763
1764 rdmsrl(MSR_IA32_MCG_CAP, cap);
1765 if (cap & MCG_CTL_P)
1766 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
1767}
1768
1769static void __mcheck_cpu_init_clear_banks(void)
1770{
1771 struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
1772 int i;
1773
1774 for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
1775 struct mce_bank *b = &mce_banks[i];
1776
1777 if (!b->init)
1778 continue;
1779 wrmsrl(mca_msr_reg(i, MCA_CTL), b->ctl);
1780 wrmsrl(mca_msr_reg(i, MCA_STATUS), 0);
1781 }
1782}
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794static void __mcheck_cpu_check_banks(void)
1795{
1796 struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
1797 u64 msrval;
1798 int i;
1799
1800 for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
1801 struct mce_bank *b = &mce_banks[i];
1802
1803 if (!b->init)
1804 continue;
1805
1806 rdmsrl(mca_msr_reg(i, MCA_CTL), msrval);
1807 b->init = !!msrval;
1808 }
1809}
1810
1811
1812static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
1813{
1814 struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
1815 struct mca_config *cfg = &mca_cfg;
1816
1817 if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
1818 pr_info("unknown CPU type - not enabling MCE support\n");
1819 return -EOPNOTSUPP;
1820 }
1821
1822
1823 if (c->x86_vendor == X86_VENDOR_AMD) {
1824 if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
1825
1826
1827
1828
1829
1830 clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
1831 }
1832 if (c->x86 < 0x11 && cfg->bootlog < 0) {
1833
1834
1835
1836
1837 cfg->bootlog = 0;
1838 }
1839
1840
1841
1842
1843 if (c->x86 == 6 && this_cpu_read(mce_num_banks) > 0)
1844 mce_banks[0].ctl = 0;
1845
1846
1847
1848
1849
1850 if (c->x86 == 0x15 && c->x86_model <= 0xf)
1851 mce_flags.overflow_recov = 1;
1852
1853 }
1854
1855 if (c->x86_vendor == X86_VENDOR_INTEL) {
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865 if (c->x86 == 6 && c->x86_model < 0x1A && this_cpu_read(mce_num_banks) > 0)
1866 mce_banks[0].init = false;
1867
1868
1869
1870
1871
1872 if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) &&
1873 cfg->monarch_timeout < 0)
1874 cfg->monarch_timeout = USEC_PER_SEC;
1875
1876
1877
1878
1879
1880 if (c->x86 == 6 && c->x86_model <= 13 && cfg->bootlog < 0)
1881 cfg->bootlog = 0;
1882
1883 if (c->x86 == 6 && c->x86_model == 45)
1884 mce_flags.snb_ifu_quirk = 1;
1885
1886
1887
1888
1889
1890 if (c->x86 == 6 && c->x86_model == INTEL_FAM6_SKYLAKE_X)
1891 mce_flags.skx_repmov_quirk = 1;
1892 }
1893
1894 if (c->x86_vendor == X86_VENDOR_ZHAOXIN) {
1895
1896
1897
1898
1899 if (c->x86 > 6 || (c->x86_model == 0x19 || c->x86_model == 0x1f)) {
1900 if (cfg->monarch_timeout < 0)
1901 cfg->monarch_timeout = USEC_PER_SEC;
1902 }
1903 }
1904
1905 if (cfg->monarch_timeout < 0)
1906 cfg->monarch_timeout = 0;
1907 if (cfg->bootlog != 0)
1908 cfg->panic_timeout = 30;
1909
1910 return 0;
1911}
1912
1913static int __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
1914{
1915 if (c->x86 != 5)
1916 return 0;
1917
1918 switch (c->x86_vendor) {
1919 case X86_VENDOR_INTEL:
1920 intel_p5_mcheck_init(c);
1921 mce_flags.p5 = 1;
1922 return 1;
1923 case X86_VENDOR_CENTAUR:
1924 winchip_mcheck_init(c);
1925 mce_flags.winchip = 1;
1926 return 1;
1927 default:
1928 return 0;
1929 }
1930
1931 return 0;
1932}
1933
1934
1935
1936
1937static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c)
1938{
1939 if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) {
1940 mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV);
1941 mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR);
1942 mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA);
1943 mce_flags.amd_threshold = 1;
1944 }
1945}
1946
1947static void mce_centaur_feature_init(struct cpuinfo_x86 *c)
1948{
1949 struct mca_config *cfg = &mca_cfg;
1950
1951
1952
1953
1954
1955 if ((c->x86 == 6 && c->x86_model == 0xf && c->x86_stepping >= 0xe) ||
1956 c->x86 > 6) {
1957 if (cfg->monarch_timeout < 0)
1958 cfg->monarch_timeout = USEC_PER_SEC;
1959 }
1960}
1961
1962static void mce_zhaoxin_feature_init(struct cpuinfo_x86 *c)
1963{
1964 struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975 if ((c->x86 == 7 && c->x86_model == 0x1b) ||
1976 (c->x86_model == 0x19 || c->x86_model == 0x1f)) {
1977 if (this_cpu_read(mce_num_banks) > 8)
1978 mce_banks[8].ctl = 0;
1979 }
1980
1981 intel_init_cmci();
1982 intel_init_lmce();
1983 mce_adjust_timer = cmci_intel_adjust_timer;
1984}
1985
1986static void mce_zhaoxin_feature_clear(struct cpuinfo_x86 *c)
1987{
1988 intel_clear_lmce();
1989}
1990
1991static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
1992{
1993 switch (c->x86_vendor) {
1994 case X86_VENDOR_INTEL:
1995 mce_intel_feature_init(c);
1996 mce_adjust_timer = cmci_intel_adjust_timer;
1997 break;
1998
1999 case X86_VENDOR_AMD: {
2000 mce_amd_feature_init(c);
2001 break;
2002 }
2003
2004 case X86_VENDOR_HYGON:
2005 mce_hygon_feature_init(c);
2006 break;
2007
2008 case X86_VENDOR_CENTAUR:
2009 mce_centaur_feature_init(c);
2010 break;
2011
2012 case X86_VENDOR_ZHAOXIN:
2013 mce_zhaoxin_feature_init(c);
2014 break;
2015
2016 default:
2017 break;
2018 }
2019}
2020
2021static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
2022{
2023 switch (c->x86_vendor) {
2024 case X86_VENDOR_INTEL:
2025 mce_intel_feature_clear(c);
2026 break;
2027
2028 case X86_VENDOR_ZHAOXIN:
2029 mce_zhaoxin_feature_clear(c);
2030 break;
2031
2032 default:
2033 break;
2034 }
2035}
2036
2037static void mce_start_timer(struct timer_list *t)
2038{
2039 unsigned long iv = check_interval * HZ;
2040
2041 if (mca_cfg.ignore_ce || !iv)
2042 return;
2043
2044 this_cpu_write(mce_next_interval, iv);
2045 __start_timer(t, iv);
2046}
2047
2048static void __mcheck_cpu_setup_timer(void)
2049{
2050 struct timer_list *t = this_cpu_ptr(&mce_timer);
2051
2052 timer_setup(t, mce_timer_fn, TIMER_PINNED);
2053}
2054
2055static void __mcheck_cpu_init_timer(void)
2056{
2057 struct timer_list *t = this_cpu_ptr(&mce_timer);
2058
2059 timer_setup(t, mce_timer_fn, TIMER_PINNED);
2060 mce_start_timer(t);
2061}
2062
2063bool filter_mce(struct mce *m)
2064{
2065 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
2066 return amd_filter_mce(m);
2067 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
2068 return intel_filter_mce(m);
2069
2070 return false;
2071}
2072
2073static __always_inline void exc_machine_check_kernel(struct pt_regs *regs)
2074{
2075 irqentry_state_t irq_state;
2076
2077 WARN_ON_ONCE(user_mode(regs));
2078
2079
2080
2081
2082
2083 if (mca_cfg.initialized && mce_check_crashing_cpu())
2084 return;
2085
2086 irq_state = irqentry_nmi_enter(regs);
2087
2088 do_machine_check(regs);
2089
2090 irqentry_nmi_exit(regs, irq_state);
2091}
2092
2093static __always_inline void exc_machine_check_user(struct pt_regs *regs)
2094{
2095 irqentry_enter_from_user_mode(regs);
2096
2097 do_machine_check(regs);
2098
2099 irqentry_exit_to_user_mode(regs);
2100}
2101
2102#ifdef CONFIG_X86_64
2103
2104DEFINE_IDTENTRY_MCE(exc_machine_check)
2105{
2106 unsigned long dr7;
2107
2108 dr7 = local_db_save();
2109 exc_machine_check_kernel(regs);
2110 local_db_restore(dr7);
2111}
2112
2113
2114DEFINE_IDTENTRY_MCE_USER(exc_machine_check)
2115{
2116 unsigned long dr7;
2117
2118 dr7 = local_db_save();
2119 exc_machine_check_user(regs);
2120 local_db_restore(dr7);
2121}
2122#else
2123
2124DEFINE_IDTENTRY_RAW(exc_machine_check)
2125{
2126 unsigned long dr7;
2127
2128 dr7 = local_db_save();
2129 if (user_mode(regs))
2130 exc_machine_check_user(regs);
2131 else
2132 exc_machine_check_kernel(regs);
2133 local_db_restore(dr7);
2134}
2135#endif
2136
2137
2138
2139
2140
2141void mcheck_cpu_init(struct cpuinfo_x86 *c)
2142{
2143 if (mca_cfg.disabled)
2144 return;
2145
2146 if (__mcheck_cpu_ancient_init(c))
2147 return;
2148
2149 if (!mce_available(c))
2150 return;
2151
2152 __mcheck_cpu_cap_init();
2153
2154 if (__mcheck_cpu_apply_quirks(c) < 0) {
2155 mca_cfg.disabled = 1;
2156 return;
2157 }
2158
2159 if (mce_gen_pool_init()) {
2160 mca_cfg.disabled = 1;
2161 pr_emerg("Couldn't allocate MCE records pool!\n");
2162 return;
2163 }
2164
2165 mca_cfg.initialized = 1;
2166
2167 __mcheck_cpu_init_early(c);
2168 __mcheck_cpu_init_generic();
2169 __mcheck_cpu_init_vendor(c);
2170 __mcheck_cpu_init_clear_banks();
2171 __mcheck_cpu_check_banks();
2172 __mcheck_cpu_setup_timer();
2173}
2174
2175
2176
2177
2178void mcheck_cpu_clear(struct cpuinfo_x86 *c)
2179{
2180 if (mca_cfg.disabled)
2181 return;
2182
2183 if (!mce_available(c))
2184 return;
2185
2186
2187
2188
2189
2190 __mcheck_cpu_clear_vendor(c);
2191
2192}
2193
2194static void __mce_disable_bank(void *arg)
2195{
2196 int bank = *((int *)arg);
2197 __clear_bit(bank, this_cpu_ptr(mce_poll_banks));
2198 cmci_disable_bank(bank);
2199}
2200
2201void mce_disable_bank(int bank)
2202{
2203 if (bank >= this_cpu_read(mce_num_banks)) {
2204 pr_warn(FW_BUG
2205 "Ignoring request to disable invalid MCA bank %d.\n",
2206 bank);
2207 return;
2208 }
2209 set_bit(bank, mce_banks_ce_disabled);
2210 on_each_cpu(__mce_disable_bank, &bank, 1);
2211}
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229static int __init mcheck_enable(char *str)
2230{
2231 struct mca_config *cfg = &mca_cfg;
2232
2233 if (*str == 0) {
2234 enable_p5_mce();
2235 return 1;
2236 }
2237 if (*str == '=')
2238 str++;
2239 if (!strcmp(str, "off"))
2240 cfg->disabled = 1;
2241 else if (!strcmp(str, "no_cmci"))
2242 cfg->cmci_disabled = true;
2243 else if (!strcmp(str, "no_lmce"))
2244 cfg->lmce_disabled = 1;
2245 else if (!strcmp(str, "dont_log_ce"))
2246 cfg->dont_log_ce = true;
2247 else if (!strcmp(str, "print_all"))
2248 cfg->print_all = true;
2249 else if (!strcmp(str, "ignore_ce"))
2250 cfg->ignore_ce = true;
2251 else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
2252 cfg->bootlog = (str[0] == 'b');
2253 else if (!strcmp(str, "bios_cmci_threshold"))
2254 cfg->bios_cmci_threshold = 1;
2255 else if (!strcmp(str, "recovery"))
2256 cfg->recovery = 1;
2257 else if (isdigit(str[0]))
2258 get_option(&str, &(cfg->monarch_timeout));
2259 else {
2260 pr_info("mce argument %s ignored. Please use /sys\n", str);
2261 return 0;
2262 }
2263 return 1;
2264}
2265__setup("mce", mcheck_enable);
2266
2267int __init mcheck_init(void)
2268{
2269 mce_register_decode_chain(&early_nb);
2270 mce_register_decode_chain(&mce_uc_nb);
2271 mce_register_decode_chain(&mce_default_nb);
2272
2273 INIT_WORK(&mce_work, mce_gen_pool_process);
2274 init_irq_work(&mce_irq_work, mce_irq_work_cb);
2275
2276 return 0;
2277}
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287static void mce_disable_error_reporting(void)
2288{
2289 struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
2290 int i;
2291
2292 for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
2293 struct mce_bank *b = &mce_banks[i];
2294
2295 if (b->init)
2296 wrmsrl(mca_msr_reg(i, MCA_CTL), 0);
2297 }
2298 return;
2299}
2300
2301static void vendor_disable_error_reporting(void)
2302{
2303
2304
2305
2306
2307
2308
2309
2310 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ||
2311 boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ||
2312 boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
2313 boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN)
2314 return;
2315
2316 mce_disable_error_reporting();
2317}
2318
2319static int mce_syscore_suspend(void)
2320{
2321 vendor_disable_error_reporting();
2322 return 0;
2323}
2324
2325static void mce_syscore_shutdown(void)
2326{
2327 vendor_disable_error_reporting();
2328}
2329
2330
2331
2332
2333
2334
2335static void mce_syscore_resume(void)
2336{
2337 __mcheck_cpu_init_generic();
2338 __mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info));
2339 __mcheck_cpu_init_clear_banks();
2340}
2341
2342static struct syscore_ops mce_syscore_ops = {
2343 .suspend = mce_syscore_suspend,
2344 .shutdown = mce_syscore_shutdown,
2345 .resume = mce_syscore_resume,
2346};
2347
2348
2349
2350
2351
2352static void mce_cpu_restart(void *data)
2353{
2354 if (!mce_available(raw_cpu_ptr(&cpu_info)))
2355 return;
2356 __mcheck_cpu_init_generic();
2357 __mcheck_cpu_init_clear_banks();
2358 __mcheck_cpu_init_timer();
2359}
2360
2361
2362static void mce_restart(void)
2363{
2364 mce_timer_delete_all();
2365 on_each_cpu(mce_cpu_restart, NULL, 1);
2366}
2367
2368
2369static void mce_disable_cmci(void *data)
2370{
2371 if (!mce_available(raw_cpu_ptr(&cpu_info)))
2372 return;
2373 cmci_clear();
2374}
2375
2376static void mce_enable_ce(void *all)
2377{
2378 if (!mce_available(raw_cpu_ptr(&cpu_info)))
2379 return;
2380 cmci_reenable();
2381 cmci_recheck();
2382 if (all)
2383 __mcheck_cpu_init_timer();
2384}
2385
2386static struct bus_type mce_subsys = {
2387 .name = "machinecheck",
2388 .dev_name = "machinecheck",
2389};
2390
2391DEFINE_PER_CPU(struct device *, mce_device);
2392
2393static inline struct mce_bank_dev *attr_to_bank(struct device_attribute *attr)
2394{
2395 return container_of(attr, struct mce_bank_dev, attr);
2396}
2397
2398static ssize_t show_bank(struct device *s, struct device_attribute *attr,
2399 char *buf)
2400{
2401 u8 bank = attr_to_bank(attr)->bank;
2402 struct mce_bank *b;
2403
2404 if (bank >= per_cpu(mce_num_banks, s->id))
2405 return -EINVAL;
2406
2407 b = &per_cpu(mce_banks_array, s->id)[bank];
2408
2409 if (!b->init)
2410 return -ENODEV;
2411
2412 return sprintf(buf, "%llx\n", b->ctl);
2413}
2414
2415static ssize_t set_bank(struct device *s, struct device_attribute *attr,
2416 const char *buf, size_t size)
2417{
2418 u8 bank = attr_to_bank(attr)->bank;
2419 struct mce_bank *b;
2420 u64 new;
2421
2422 if (kstrtou64(buf, 0, &new) < 0)
2423 return -EINVAL;
2424
2425 if (bank >= per_cpu(mce_num_banks, s->id))
2426 return -EINVAL;
2427
2428 b = &per_cpu(mce_banks_array, s->id)[bank];
2429
2430 if (!b->init)
2431 return -ENODEV;
2432
2433 b->ctl = new;
2434 mce_restart();
2435
2436 return size;
2437}
2438
2439static ssize_t set_ignore_ce(struct device *s,
2440 struct device_attribute *attr,
2441 const char *buf, size_t size)
2442{
2443 u64 new;
2444
2445 if (kstrtou64(buf, 0, &new) < 0)
2446 return -EINVAL;
2447
2448 mutex_lock(&mce_sysfs_mutex);
2449 if (mca_cfg.ignore_ce ^ !!new) {
2450 if (new) {
2451
2452 mce_timer_delete_all();
2453 on_each_cpu(mce_disable_cmci, NULL, 1);
2454 mca_cfg.ignore_ce = true;
2455 } else {
2456
2457 mca_cfg.ignore_ce = false;
2458 on_each_cpu(mce_enable_ce, (void *)1, 1);
2459 }
2460 }
2461 mutex_unlock(&mce_sysfs_mutex);
2462
2463 return size;
2464}
2465
2466static ssize_t set_cmci_disabled(struct device *s,
2467 struct device_attribute *attr,
2468 const char *buf, size_t size)
2469{
2470 u64 new;
2471
2472 if (kstrtou64(buf, 0, &new) < 0)
2473 return -EINVAL;
2474
2475 mutex_lock(&mce_sysfs_mutex);
2476 if (mca_cfg.cmci_disabled ^ !!new) {
2477 if (new) {
2478
2479 on_each_cpu(mce_disable_cmci, NULL, 1);
2480 mca_cfg.cmci_disabled = true;
2481 } else {
2482
2483 mca_cfg.cmci_disabled = false;
2484 on_each_cpu(mce_enable_ce, NULL, 1);
2485 }
2486 }
2487 mutex_unlock(&mce_sysfs_mutex);
2488
2489 return size;
2490}
2491
2492static ssize_t store_int_with_restart(struct device *s,
2493 struct device_attribute *attr,
2494 const char *buf, size_t size)
2495{
2496 unsigned long old_check_interval = check_interval;
2497 ssize_t ret = device_store_ulong(s, attr, buf, size);
2498
2499 if (check_interval == old_check_interval)
2500 return ret;
2501
2502 mutex_lock(&mce_sysfs_mutex);
2503 mce_restart();
2504 mutex_unlock(&mce_sysfs_mutex);
2505
2506 return ret;
2507}
2508
2509static DEVICE_INT_ATTR(monarch_timeout, 0644, mca_cfg.monarch_timeout);
2510static DEVICE_BOOL_ATTR(dont_log_ce, 0644, mca_cfg.dont_log_ce);
2511static DEVICE_BOOL_ATTR(print_all, 0644, mca_cfg.print_all);
2512
2513static struct dev_ext_attribute dev_attr_check_interval = {
2514 __ATTR(check_interval, 0644, device_show_int, store_int_with_restart),
2515 &check_interval
2516};
2517
2518static struct dev_ext_attribute dev_attr_ignore_ce = {
2519 __ATTR(ignore_ce, 0644, device_show_bool, set_ignore_ce),
2520 &mca_cfg.ignore_ce
2521};
2522
2523static struct dev_ext_attribute dev_attr_cmci_disabled = {
2524 __ATTR(cmci_disabled, 0644, device_show_bool, set_cmci_disabled),
2525 &mca_cfg.cmci_disabled
2526};
2527
2528static struct device_attribute *mce_device_attrs[] = {
2529 &dev_attr_check_interval.attr,
2530#ifdef CONFIG_X86_MCELOG_LEGACY
2531 &dev_attr_trigger,
2532#endif
2533 &dev_attr_monarch_timeout.attr,
2534 &dev_attr_dont_log_ce.attr,
2535 &dev_attr_print_all.attr,
2536 &dev_attr_ignore_ce.attr,
2537 &dev_attr_cmci_disabled.attr,
2538 NULL
2539};
2540
2541static cpumask_var_t mce_device_initialized;
2542
2543static void mce_device_release(struct device *dev)
2544{
2545 kfree(dev);
2546}
2547
2548
2549static int mce_device_create(unsigned int cpu)
2550{
2551 struct device *dev;
2552 int err;
2553 int i, j;
2554
2555 if (!mce_available(&boot_cpu_data))
2556 return -EIO;
2557
2558 dev = per_cpu(mce_device, cpu);
2559 if (dev)
2560 return 0;
2561
2562 dev = kzalloc(sizeof(*dev), GFP_KERNEL);
2563 if (!dev)
2564 return -ENOMEM;
2565 dev->id = cpu;
2566 dev->bus = &mce_subsys;
2567 dev->release = &mce_device_release;
2568
2569 err = device_register(dev);
2570 if (err) {
2571 put_device(dev);
2572 return err;
2573 }
2574
2575 for (i = 0; mce_device_attrs[i]; i++) {
2576 err = device_create_file(dev, mce_device_attrs[i]);
2577 if (err)
2578 goto error;
2579 }
2580 for (j = 0; j < per_cpu(mce_num_banks, cpu); j++) {
2581 err = device_create_file(dev, &mce_bank_devs[j].attr);
2582 if (err)
2583 goto error2;
2584 }
2585 cpumask_set_cpu(cpu, mce_device_initialized);
2586 per_cpu(mce_device, cpu) = dev;
2587
2588 return 0;
2589error2:
2590 while (--j >= 0)
2591 device_remove_file(dev, &mce_bank_devs[j].attr);
2592error:
2593 while (--i >= 0)
2594 device_remove_file(dev, mce_device_attrs[i]);
2595
2596 device_unregister(dev);
2597
2598 return err;
2599}
2600
2601static void mce_device_remove(unsigned int cpu)
2602{
2603 struct device *dev = per_cpu(mce_device, cpu);
2604 int i;
2605
2606 if (!cpumask_test_cpu(cpu, mce_device_initialized))
2607 return;
2608
2609 for (i = 0; mce_device_attrs[i]; i++)
2610 device_remove_file(dev, mce_device_attrs[i]);
2611
2612 for (i = 0; i < per_cpu(mce_num_banks, cpu); i++)
2613 device_remove_file(dev, &mce_bank_devs[i].attr);
2614
2615 device_unregister(dev);
2616 cpumask_clear_cpu(cpu, mce_device_initialized);
2617 per_cpu(mce_device, cpu) = NULL;
2618}
2619
2620
2621static void mce_disable_cpu(void)
2622{
2623 if (!mce_available(raw_cpu_ptr(&cpu_info)))
2624 return;
2625
2626 if (!cpuhp_tasks_frozen)
2627 cmci_clear();
2628
2629 vendor_disable_error_reporting();
2630}
2631
2632static void mce_reenable_cpu(void)
2633{
2634 struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
2635 int i;
2636
2637 if (!mce_available(raw_cpu_ptr(&cpu_info)))
2638 return;
2639
2640 if (!cpuhp_tasks_frozen)
2641 cmci_reenable();
2642 for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
2643 struct mce_bank *b = &mce_banks[i];
2644
2645 if (b->init)
2646 wrmsrl(mca_msr_reg(i, MCA_CTL), b->ctl);
2647 }
2648}
2649
2650static int mce_cpu_dead(unsigned int cpu)
2651{
2652 mce_intel_hcpu_update(cpu);
2653
2654
2655 if (!cpuhp_tasks_frozen)
2656 cmci_rediscover();
2657 return 0;
2658}
2659
2660static int mce_cpu_online(unsigned int cpu)
2661{
2662 struct timer_list *t = this_cpu_ptr(&mce_timer);
2663 int ret;
2664
2665 mce_device_create(cpu);
2666
2667 ret = mce_threshold_create_device(cpu);
2668 if (ret) {
2669 mce_device_remove(cpu);
2670 return ret;
2671 }
2672 mce_reenable_cpu();
2673 mce_start_timer(t);
2674 return 0;
2675}
2676
2677static int mce_cpu_pre_down(unsigned int cpu)
2678{
2679 struct timer_list *t = this_cpu_ptr(&mce_timer);
2680
2681 mce_disable_cpu();
2682 del_timer_sync(t);
2683 mce_threshold_remove_device(cpu);
2684 mce_device_remove(cpu);
2685 return 0;
2686}
2687
2688static __init void mce_init_banks(void)
2689{
2690 int i;
2691
2692 for (i = 0; i < MAX_NR_BANKS; i++) {
2693 struct mce_bank_dev *b = &mce_bank_devs[i];
2694 struct device_attribute *a = &b->attr;
2695
2696 b->bank = i;
2697
2698 sysfs_attr_init(&a->attr);
2699 a->attr.name = b->attrname;
2700 snprintf(b->attrname, ATTR_LEN, "bank%d", i);
2701
2702 a->attr.mode = 0644;
2703 a->show = show_bank;
2704 a->store = set_bank;
2705 }
2706}
2707
2708
2709
2710
2711
2712
2713
2714
2715static __init int mcheck_init_device(void)
2716{
2717 int err;
2718
2719
2720
2721
2722
2723 MAYBE_BUILD_BUG_ON(__VIRTUAL_MASK_SHIFT >= 63);
2724
2725 if (!mce_available(&boot_cpu_data)) {
2726 err = -EIO;
2727 goto err_out;
2728 }
2729
2730 if (!zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL)) {
2731 err = -ENOMEM;
2732 goto err_out;
2733 }
2734
2735 mce_init_banks();
2736
2737 err = subsys_system_register(&mce_subsys, NULL);
2738 if (err)
2739 goto err_out_mem;
2740
2741 err = cpuhp_setup_state(CPUHP_X86_MCE_DEAD, "x86/mce:dead", NULL,
2742 mce_cpu_dead);
2743 if (err)
2744 goto err_out_mem;
2745
2746
2747
2748
2749
2750 err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/mce:online",
2751 mce_cpu_online, mce_cpu_pre_down);
2752 if (err < 0)
2753 goto err_out_online;
2754
2755 register_syscore_ops(&mce_syscore_ops);
2756
2757 return 0;
2758
2759err_out_online:
2760 cpuhp_remove_state(CPUHP_X86_MCE_DEAD);
2761
2762err_out_mem:
2763 free_cpumask_var(mce_device_initialized);
2764
2765err_out:
2766 pr_err("Unable to init MCE device (rc: %d)\n", err);
2767
2768 return err;
2769}
2770device_initcall_sync(mcheck_init_device);
2771
2772
2773
2774
2775static int __init mcheck_disable(char *str)
2776{
2777 mca_cfg.disabled = 1;
2778 return 1;
2779}
2780__setup("nomce", mcheck_disable);
2781
2782#ifdef CONFIG_DEBUG_FS
2783struct dentry *mce_get_debugfs_dir(void)
2784{
2785 static struct dentry *dmce;
2786
2787 if (!dmce)
2788 dmce = debugfs_create_dir("mce", NULL);
2789
2790 return dmce;
2791}
2792
2793static void mce_reset(void)
2794{
2795 atomic_set(&mce_fake_panicked, 0);
2796 atomic_set(&mce_executing, 0);
2797 atomic_set(&mce_callin, 0);
2798 atomic_set(&global_nwo, 0);
2799 cpumask_setall(&mce_missing_cpus);
2800}
2801
2802static int fake_panic_get(void *data, u64 *val)
2803{
2804 *val = fake_panic;
2805 return 0;
2806}
2807
2808static int fake_panic_set(void *data, u64 val)
2809{
2810 mce_reset();
2811 fake_panic = val;
2812 return 0;
2813}
2814
2815DEFINE_DEBUGFS_ATTRIBUTE(fake_panic_fops, fake_panic_get, fake_panic_set,
2816 "%llu\n");
2817
2818static void __init mcheck_debugfs_init(void)
2819{
2820 struct dentry *dmce;
2821
2822 dmce = mce_get_debugfs_dir();
2823 debugfs_create_file_unsafe("fake_panic", 0444, dmce, NULL,
2824 &fake_panic_fops);
2825}
2826#else
2827static void __init mcheck_debugfs_init(void) { }
2828#endif
2829
2830static int __init mcheck_late_init(void)
2831{
2832 if (mca_cfg.recovery)
2833 enable_copy_mc_fragile();
2834
2835 mcheck_debugfs_init();
2836
2837
2838
2839
2840
2841 mce_schedule_work();
2842
2843 return 0;
2844}
2845late_initcall(mcheck_late_init);
2846