1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26#include <linux/cpu.h>
27#include <linux/debugfs.h>
28#include <linux/kernel.h>
29#include <linux/module.h>
30#include <linux/notifier.h>
31#include <linux/pci.h>
32#include <linux/uaccess.h>
33
34#include <asm/amd_nb.h>
35#include <asm/apic.h>
36#include <asm/irq_vectors.h>
37#include <asm/mce.h>
38#include <asm/nmi.h>
39#include <asm/smp.h>
40
41#include "mce-internal.h"
42
43
44
45
46static struct mce i_mce;
47static struct dentry *dfs_inj;
48
49static u8 n_banks;
50
51#define MAX_FLAG_OPT_SIZE 3
52#define NBCFG 0x44
53
54enum injection_type {
55 SW_INJ = 0,
56 HW_INJ,
57 DFR_INT_INJ,
58 THR_INT_INJ,
59 N_INJ_TYPES,
60};
61
62static const char * const flags_options[] = {
63 [SW_INJ] = "sw",
64 [HW_INJ] = "hw",
65 [DFR_INT_INJ] = "df",
66 [THR_INT_INJ] = "th",
67 NULL
68};
69
70
71static enum injection_type inj_type = SW_INJ;
72
73#define MCE_INJECT_SET(reg) \
74static int inj_##reg##_set(void *data, u64 val) \
75{ \
76 struct mce *m = (struct mce *)data; \
77 \
78 m->reg = val; \
79 return 0; \
80}
81
82MCE_INJECT_SET(status);
83MCE_INJECT_SET(misc);
84MCE_INJECT_SET(addr);
85MCE_INJECT_SET(synd);
86
87#define MCE_INJECT_GET(reg) \
88static int inj_##reg##_get(void *data, u64 *val) \
89{ \
90 struct mce *m = (struct mce *)data; \
91 \
92 *val = m->reg; \
93 return 0; \
94}
95
96MCE_INJECT_GET(status);
97MCE_INJECT_GET(misc);
98MCE_INJECT_GET(addr);
99MCE_INJECT_GET(synd);
100
101DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n");
102DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n");
103DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n");
104DEFINE_SIMPLE_ATTRIBUTE(synd_fops, inj_synd_get, inj_synd_set, "%llx\n");
105
106static void setup_inj_struct(struct mce *m)
107{
108 memset(m, 0, sizeof(struct mce));
109
110 m->cpuvendor = boot_cpu_data.x86_vendor;
111}
112
113
114static void inject_mce(struct mce *m)
115{
116 struct mce *i = &per_cpu(injectm, m->extcpu);
117
118
119 i->finished = 0;
120 mb();
121 m->finished = 0;
122
123 i->extcpu = m->extcpu;
124 mb();
125
126 memcpy(i, m, sizeof(struct mce));
127
128 mb();
129 i->finished = 1;
130}
131
132static void raise_poll(struct mce *m)
133{
134 unsigned long flags;
135 mce_banks_t b;
136
137 memset(&b, 0xff, sizeof(mce_banks_t));
138 local_irq_save(flags);
139 machine_check_poll(0, &b);
140 local_irq_restore(flags);
141 m->finished = 0;
142}
143
144static void raise_exception(struct mce *m, struct pt_regs *pregs)
145{
146 struct pt_regs regs;
147 unsigned long flags;
148
149 if (!pregs) {
150 memset(®s, 0, sizeof(struct pt_regs));
151 regs.ip = m->ip;
152 regs.cs = m->cs;
153 pregs = ®s;
154 }
155
156 local_irq_save(flags);
157 do_machine_check(pregs, 0);
158 local_irq_restore(flags);
159 m->finished = 0;
160}
161
162static cpumask_var_t mce_inject_cpumask;
163static DEFINE_MUTEX(mce_inject_mutex);
164
165static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
166{
167 int cpu = smp_processor_id();
168 struct mce *m = this_cpu_ptr(&injectm);
169 if (!cpumask_test_cpu(cpu, mce_inject_cpumask))
170 return NMI_DONE;
171 cpumask_clear_cpu(cpu, mce_inject_cpumask);
172 if (m->inject_flags & MCJ_EXCEPTION)
173 raise_exception(m, regs);
174 else if (m->status)
175 raise_poll(m);
176 return NMI_HANDLED;
177}
178
179static void mce_irq_ipi(void *info)
180{
181 int cpu = smp_processor_id();
182 struct mce *m = this_cpu_ptr(&injectm);
183
184 if (cpumask_test_cpu(cpu, mce_inject_cpumask) &&
185 m->inject_flags & MCJ_EXCEPTION) {
186 cpumask_clear_cpu(cpu, mce_inject_cpumask);
187 raise_exception(m, NULL);
188 }
189}
190
191
192static int raise_local(void)
193{
194 struct mce *m = this_cpu_ptr(&injectm);
195 int context = MCJ_CTX(m->inject_flags);
196 int ret = 0;
197 int cpu = m->extcpu;
198
199 if (m->inject_flags & MCJ_EXCEPTION) {
200 pr_info("Triggering MCE exception on CPU %d\n", cpu);
201 switch (context) {
202 case MCJ_CTX_IRQ:
203
204
205
206
207
208
209 case MCJ_CTX_PROCESS:
210 raise_exception(m, NULL);
211 break;
212 default:
213 pr_info("Invalid MCE context\n");
214 ret = -EINVAL;
215 }
216 pr_info("MCE exception done on CPU %d\n", cpu);
217 } else if (m->status) {
218 pr_info("Starting machine check poll CPU %d\n", cpu);
219 raise_poll(m);
220 mce_notify_irq();
221 pr_info("Machine check poll done on CPU %d\n", cpu);
222 } else
223 m->finished = 0;
224
225 return ret;
226}
227
228static void __maybe_unused raise_mce(struct mce *m)
229{
230 int context = MCJ_CTX(m->inject_flags);
231
232 inject_mce(m);
233
234 if (context == MCJ_CTX_RANDOM)
235 return;
236
237 if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) {
238 unsigned long start;
239 int cpu;
240
241 get_online_cpus();
242 cpumask_copy(mce_inject_cpumask, cpu_online_mask);
243 cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
244 for_each_online_cpu(cpu) {
245 struct mce *mcpu = &per_cpu(injectm, cpu);
246 if (!mcpu->finished ||
247 MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
248 cpumask_clear_cpu(cpu, mce_inject_cpumask);
249 }
250 if (!cpumask_empty(mce_inject_cpumask)) {
251 if (m->inject_flags & MCJ_IRQ_BROADCAST) {
252
253
254
255
256 preempt_disable();
257 smp_call_function_many(mce_inject_cpumask,
258 mce_irq_ipi, NULL, 0);
259 preempt_enable();
260 } else if (m->inject_flags & MCJ_NMI_BROADCAST)
261 apic->send_IPI_mask(mce_inject_cpumask,
262 NMI_VECTOR);
263 }
264 start = jiffies;
265 while (!cpumask_empty(mce_inject_cpumask)) {
266 if (!time_before(jiffies, start + 2*HZ)) {
267 pr_err("Timeout waiting for mce inject %lx\n",
268 *cpumask_bits(mce_inject_cpumask));
269 break;
270 }
271 cpu_relax();
272 }
273 raise_local();
274 put_cpu();
275 put_online_cpus();
276 } else {
277 preempt_disable();
278 raise_local();
279 preempt_enable();
280 }
281}
282
283static int mce_inject_raise(struct notifier_block *nb, unsigned long val,
284 void *data)
285{
286 struct mce *m = (struct mce *)data;
287
288 if (!m)
289 return NOTIFY_DONE;
290
291 mutex_lock(&mce_inject_mutex);
292 raise_mce(m);
293 mutex_unlock(&mce_inject_mutex);
294
295 return NOTIFY_DONE;
296}
297
298static struct notifier_block inject_nb = {
299 .notifier_call = mce_inject_raise,
300};
301
302
303
304
305
306static int toggle_hw_mce_inject(unsigned int cpu, bool enable)
307{
308 u32 l, h;
309 int err;
310
311 err = rdmsr_on_cpu(cpu, MSR_K7_HWCR, &l, &h);
312 if (err) {
313 pr_err("%s: error reading HWCR\n", __func__);
314 return err;
315 }
316
317 enable ? (l |= BIT(18)) : (l &= ~BIT(18));
318
319 err = wrmsr_on_cpu(cpu, MSR_K7_HWCR, l, h);
320 if (err)
321 pr_err("%s: error writing HWCR\n", __func__);
322
323 return err;
324}
325
326static int __set_inj(const char *buf)
327{
328 int i;
329
330 for (i = 0; i < N_INJ_TYPES; i++) {
331 if (!strncmp(flags_options[i], buf, strlen(flags_options[i]))) {
332 inj_type = i;
333 return 0;
334 }
335 }
336 return -EINVAL;
337}
338
339static ssize_t flags_read(struct file *filp, char __user *ubuf,
340 size_t cnt, loff_t *ppos)
341{
342 char buf[MAX_FLAG_OPT_SIZE];
343 int n;
344
345 n = sprintf(buf, "%s\n", flags_options[inj_type]);
346
347 return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
348}
349
350static ssize_t flags_write(struct file *filp, const char __user *ubuf,
351 size_t cnt, loff_t *ppos)
352{
353 char buf[MAX_FLAG_OPT_SIZE], *__buf;
354 int err;
355
356 if (cnt > MAX_FLAG_OPT_SIZE)
357 return -EINVAL;
358
359 if (copy_from_user(&buf, ubuf, cnt))
360 return -EFAULT;
361
362 buf[cnt - 1] = 0;
363
364
365 __buf = strstrip(buf);
366
367 err = __set_inj(__buf);
368 if (err) {
369 pr_err("%s: Invalid flags value: %s\n", __func__, __buf);
370 return err;
371 }
372
373 *ppos += cnt;
374
375 return cnt;
376}
377
378static const struct file_operations flags_fops = {
379 .read = flags_read,
380 .write = flags_write,
381 .llseek = generic_file_llseek,
382};
383
384
385
386
387MCE_INJECT_GET(extcpu);
388
389static int inj_extcpu_set(void *data, u64 val)
390{
391 struct mce *m = (struct mce *)data;
392
393 if (val >= nr_cpu_ids || !cpu_online(val)) {
394 pr_err("%s: Invalid CPU: %llu\n", __func__, val);
395 return -EINVAL;
396 }
397 m->extcpu = val;
398 return 0;
399}
400
401DEFINE_SIMPLE_ATTRIBUTE(extcpu_fops, inj_extcpu_get, inj_extcpu_set, "%llu\n");
402
403static void trigger_mce(void *info)
404{
405 asm volatile("int $18");
406}
407
408static void trigger_dfr_int(void *info)
409{
410 asm volatile("int %0" :: "i" (DEFERRED_ERROR_VECTOR));
411}
412
413static void trigger_thr_int(void *info)
414{
415 asm volatile("int %0" :: "i" (THRESHOLD_APIC_VECTOR));
416}
417
418static u32 get_nbc_for_node(int node_id)
419{
420 struct cpuinfo_x86 *c = &boot_cpu_data;
421 u32 cores_per_node;
422
423 cores_per_node = (c->x86_max_cores * smp_num_siblings) / amd_get_nodes_per_socket();
424
425 return cores_per_node * node_id;
426}
427
428static void toggle_nb_mca_mst_cpu(u16 nid)
429{
430 struct amd_northbridge *nb;
431 struct pci_dev *F3;
432 u32 val;
433 int err;
434
435 nb = node_to_amd_nb(nid);
436 if (!nb)
437 return;
438
439 F3 = nb->misc;
440 if (!F3)
441 return;
442
443 err = pci_read_config_dword(F3, NBCFG, &val);
444 if (err) {
445 pr_err("%s: Error reading F%dx%03x.\n",
446 __func__, PCI_FUNC(F3->devfn), NBCFG);
447 return;
448 }
449
450 if (val & BIT(27))
451 return;
452
453 pr_err("%s: Set D18F3x44[NbMcaToMstCpuEn] which BIOS hasn't done.\n",
454 __func__);
455
456 val |= BIT(27);
457 err = pci_write_config_dword(F3, NBCFG, val);
458 if (err)
459 pr_err("%s: Error writing F%dx%03x.\n",
460 __func__, PCI_FUNC(F3->devfn), NBCFG);
461}
462
463static void prepare_msrs(void *info)
464{
465 struct mce m = *(struct mce *)info;
466 u8 b = m.bank;
467
468 wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
469
470 if (boot_cpu_has(X86_FEATURE_SMCA)) {
471 if (m.inject_flags == DFR_INT_INJ) {
472 wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status);
473 wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr);
474 } else {
475 wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m.status);
476 wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
477 }
478
479 wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
480 wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
481 } else {
482 wrmsrl(MSR_IA32_MCx_STATUS(b), m.status);
483 wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr);
484 wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
485 }
486}
487
488static void do_inject(void)
489{
490 u64 mcg_status = 0;
491 unsigned int cpu = i_mce.extcpu;
492 u8 b = i_mce.bank;
493
494 i_mce.tsc = rdtsc_ordered();
495
496 if (i_mce.misc)
497 i_mce.status |= MCI_STATUS_MISCV;
498
499 if (i_mce.synd)
500 i_mce.status |= MCI_STATUS_SYNDV;
501
502 if (inj_type == SW_INJ) {
503 mce_inject_log(&i_mce);
504 return;
505 }
506
507
508 mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
509
510 if (!(i_mce.status & MCI_STATUS_PCC))
511 mcg_status |= MCG_STATUS_RIPV;
512
513
514
515
516
517
518 if (inj_type == DFR_INT_INJ) {
519 i_mce.status |= MCI_STATUS_DEFERRED;
520 i_mce.status |= (i_mce.status & ~MCI_STATUS_UC);
521 }
522
523
524
525
526
527
528 if (static_cpu_has(X86_FEATURE_AMD_DCM) &&
529 b == 4 &&
530 boot_cpu_data.x86 < 0x17) {
531 toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu));
532 cpu = get_nbc_for_node(amd_get_nb_id(cpu));
533 }
534
535 get_online_cpus();
536 if (!cpu_online(cpu))
537 goto err;
538
539 toggle_hw_mce_inject(cpu, true);
540
541 i_mce.mcgstatus = mcg_status;
542 i_mce.inject_flags = inj_type;
543 smp_call_function_single(cpu, prepare_msrs, &i_mce, 0);
544
545 toggle_hw_mce_inject(cpu, false);
546
547 switch (inj_type) {
548 case DFR_INT_INJ:
549 smp_call_function_single(cpu, trigger_dfr_int, NULL, 0);
550 break;
551 case THR_INT_INJ:
552 smp_call_function_single(cpu, trigger_thr_int, NULL, 0);
553 break;
554 default:
555 smp_call_function_single(cpu, trigger_mce, NULL, 0);
556 }
557
558err:
559 put_online_cpus();
560
561}
562
563
564
565
566
567static int inj_bank_set(void *data, u64 val)
568{
569 struct mce *m = (struct mce *)data;
570
571 if (val >= n_banks) {
572 pr_err("Non-existent MCE bank: %llu\n", val);
573 return -EINVAL;
574 }
575
576 m->bank = val;
577 do_inject();
578
579 return 0;
580}
581
582MCE_INJECT_GET(bank);
583
584DEFINE_SIMPLE_ATTRIBUTE(bank_fops, inj_bank_get, inj_bank_set, "%llu\n");
585
586static const char readme_msg[] =
587"Description of the files and their usages:\n"
588"\n"
589"Note1: i refers to the bank number below.\n"
590"Note2: See respective BKDGs for the exact bit definitions of the files below\n"
591"as they mirror the hardware registers.\n"
592"\n"
593"status:\t Set MCi_STATUS: the bits in that MSR control the error type and\n"
594"\t attributes of the error which caused the MCE.\n"
595"\n"
596"misc:\t Set MCi_MISC: provide auxiliary info about the error. It is mostly\n"
597"\t used for error thresholding purposes and its validity is indicated by\n"
598"\t MCi_STATUS[MiscV].\n"
599"\n"
600"synd:\t Set MCi_SYND: provide syndrome info about the error. Only valid on\n"
601"\t Scalable MCA systems, and its validity is indicated by MCi_STATUS[SyndV].\n"
602"\n"
603"addr:\t Error address value to be written to MCi_ADDR. Log address information\n"
604"\t associated with the error.\n"
605"\n"
606"cpu:\t The CPU to inject the error on.\n"
607"\n"
608"bank:\t Specify the bank you want to inject the error into: the number of\n"
609"\t banks in a processor varies and is family/model-specific, therefore, the\n"
610"\t supplied value is sanity-checked. Setting the bank value also triggers the\n"
611"\t injection.\n"
612"\n"
613"flags:\t Injection type to be performed. Writing to this file will trigger a\n"
614"\t real machine check, an APIC interrupt or invoke the error decoder routines\n"
615"\t for AMD processors.\n"
616"\n"
617"\t Allowed error injection types:\n"
618"\t - \"sw\": Software error injection. Decode error to a human-readable \n"
619"\t format only. Safe to use.\n"
620"\t - \"hw\": Hardware error injection. Causes the #MC exception handler to \n"
621"\t handle the error. Be warned: might cause system panic if MCi_STATUS[PCC] \n"
622"\t is set. Therefore, consider setting (debugfs_mountpoint)/mce/fake_panic \n"
623"\t before injecting.\n"
624"\t - \"df\": Trigger APIC interrupt for Deferred error. Causes deferred \n"
625"\t error APIC interrupt handler to handle the error if the feature is \n"
626"\t is present in hardware. \n"
627"\t - \"th\": Trigger APIC interrupt for Threshold errors. Causes threshold \n"
628"\t APIC interrupt handler to handle the error. \n"
629"\n";
630
631static ssize_t
632inj_readme_read(struct file *filp, char __user *ubuf,
633 size_t cnt, loff_t *ppos)
634{
635 return simple_read_from_buffer(ubuf, cnt, ppos,
636 readme_msg, strlen(readme_msg));
637}
638
639static const struct file_operations readme_fops = {
640 .read = inj_readme_read,
641};
642
643static struct dfs_node {
644 char *name;
645 struct dentry *d;
646 const struct file_operations *fops;
647 umode_t perm;
648} dfs_fls[] = {
649 { .name = "status", .fops = &status_fops, .perm = S_IRUSR | S_IWUSR },
650 { .name = "misc", .fops = &misc_fops, .perm = S_IRUSR | S_IWUSR },
651 { .name = "addr", .fops = &addr_fops, .perm = S_IRUSR | S_IWUSR },
652 { .name = "synd", .fops = &synd_fops, .perm = S_IRUSR | S_IWUSR },
653 { .name = "bank", .fops = &bank_fops, .perm = S_IRUSR | S_IWUSR },
654 { .name = "flags", .fops = &flags_fops, .perm = S_IRUSR | S_IWUSR },
655 { .name = "cpu", .fops = &extcpu_fops, .perm = S_IRUSR | S_IWUSR },
656 { .name = "README", .fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH },
657};
658
659static int __init debugfs_init(void)
660{
661 unsigned int i;
662 u64 cap;
663
664 rdmsrl(MSR_IA32_MCG_CAP, cap);
665 n_banks = cap & MCG_BANKCNT_MASK;
666
667 dfs_inj = debugfs_create_dir("mce-inject", NULL);
668 if (!dfs_inj)
669 return -EINVAL;
670
671 for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) {
672 dfs_fls[i].d = debugfs_create_file(dfs_fls[i].name,
673 dfs_fls[i].perm,
674 dfs_inj,
675 &i_mce,
676 dfs_fls[i].fops);
677
678 if (!dfs_fls[i].d)
679 goto err_dfs_add;
680 }
681
682 return 0;
683
684err_dfs_add:
685 while (i-- > 0)
686 debugfs_remove(dfs_fls[i].d);
687
688 debugfs_remove(dfs_inj);
689 dfs_inj = NULL;
690
691 return -ENODEV;
692}
693
694static int __init inject_init(void)
695{
696 int err;
697
698 if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
699 return -ENOMEM;
700
701 err = debugfs_init();
702 if (err) {
703 free_cpumask_var(mce_inject_cpumask);
704 return err;
705 }
706
707 register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
708 mce_register_injector_chain(&inject_nb);
709
710 setup_inj_struct(&i_mce);
711
712 pr_info("Machine check injector initialized\n");
713
714 return 0;
715}
716
717static void __exit inject_exit(void)
718{
719
720 mce_unregister_injector_chain(&inject_nb);
721 unregister_nmi_handler(NMI_LOCAL, "mce_notify");
722
723 debugfs_remove_recursive(dfs_inj);
724 dfs_inj = NULL;
725
726 memset(&dfs_fls, 0, sizeof(dfs_fls));
727
728 free_cpumask_var(mce_inject_cpumask);
729}
730
731module_init(inject_init);
732module_exit(inject_exit);
733MODULE_LICENSE("GPL");
734