1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/cpu.h>
20#include <linux/debugfs.h>
21#include <linux/kernel.h>
22#include <linux/module.h>
23#include <linux/notifier.h>
24#include <linux/pci.h>
25#include <linux/uaccess.h>
26
27#include <asm/amd_nb.h>
28#include <asm/apic.h>
29#include <asm/irq_vectors.h>
30#include <asm/mce.h>
31#include <asm/nmi.h>
32#include <asm/smp.h>
33
34#include "internal.h"
35
36
37
38
39static struct mce i_mce;
40static struct dentry *dfs_inj;
41
42#define MAX_FLAG_OPT_SIZE 4
43#define NBCFG 0x44
44
45enum injection_type {
46 SW_INJ = 0,
47 HW_INJ,
48 DFR_INT_INJ,
49 THR_INT_INJ,
50 N_INJ_TYPES,
51};
52
53static const char * const flags_options[] = {
54 [SW_INJ] = "sw",
55 [HW_INJ] = "hw",
56 [DFR_INT_INJ] = "df",
57 [THR_INT_INJ] = "th",
58 NULL
59};
60
61
62static enum injection_type inj_type = SW_INJ;
63
64#define MCE_INJECT_SET(reg) \
65static int inj_##reg##_set(void *data, u64 val) \
66{ \
67 struct mce *m = (struct mce *)data; \
68 \
69 m->reg = val; \
70 return 0; \
71}
72
73MCE_INJECT_SET(status);
74MCE_INJECT_SET(misc);
75MCE_INJECT_SET(addr);
76MCE_INJECT_SET(synd);
77
78#define MCE_INJECT_GET(reg) \
79static int inj_##reg##_get(void *data, u64 *val) \
80{ \
81 struct mce *m = (struct mce *)data; \
82 \
83 *val = m->reg; \
84 return 0; \
85}
86
87MCE_INJECT_GET(status);
88MCE_INJECT_GET(misc);
89MCE_INJECT_GET(addr);
90MCE_INJECT_GET(synd);
91
92DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n");
93DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n");
94DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n");
95DEFINE_SIMPLE_ATTRIBUTE(synd_fops, inj_synd_get, inj_synd_set, "%llx\n");
96
97static void setup_inj_struct(struct mce *m)
98{
99 memset(m, 0, sizeof(struct mce));
100
101 m->cpuvendor = boot_cpu_data.x86_vendor;
102 m->time = ktime_get_real_seconds();
103 m->cpuid = cpuid_eax(1);
104 m->microcode = boot_cpu_data.microcode;
105}
106
107
108static void inject_mce(struct mce *m)
109{
110 struct mce *i = &per_cpu(injectm, m->extcpu);
111
112
113 i->finished = 0;
114 mb();
115 m->finished = 0;
116
117 i->extcpu = m->extcpu;
118 mb();
119
120 memcpy(i, m, sizeof(struct mce));
121
122 mb();
123 i->finished = 1;
124}
125
126static void raise_poll(struct mce *m)
127{
128 unsigned long flags;
129 mce_banks_t b;
130
131 memset(&b, 0xff, sizeof(mce_banks_t));
132 local_irq_save(flags);
133 machine_check_poll(0, &b);
134 local_irq_restore(flags);
135 m->finished = 0;
136}
137
138static void raise_exception(struct mce *m, struct pt_regs *pregs)
139{
140 struct pt_regs regs;
141 unsigned long flags;
142
143 if (!pregs) {
144 memset(®s, 0, sizeof(struct pt_regs));
145 regs.ip = m->ip;
146 regs.cs = m->cs;
147 pregs = ®s;
148 }
149
150 local_irq_save(flags);
151 do_machine_check(pregs, 0);
152 local_irq_restore(flags);
153 m->finished = 0;
154}
155
156static cpumask_var_t mce_inject_cpumask;
157static DEFINE_MUTEX(mce_inject_mutex);
158
159static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
160{
161 int cpu = smp_processor_id();
162 struct mce *m = this_cpu_ptr(&injectm);
163 if (!cpumask_test_cpu(cpu, mce_inject_cpumask))
164 return NMI_DONE;
165 cpumask_clear_cpu(cpu, mce_inject_cpumask);
166 if (m->inject_flags & MCJ_EXCEPTION)
167 raise_exception(m, regs);
168 else if (m->status)
169 raise_poll(m);
170 return NMI_HANDLED;
171}
172
173static void mce_irq_ipi(void *info)
174{
175 int cpu = smp_processor_id();
176 struct mce *m = this_cpu_ptr(&injectm);
177
178 if (cpumask_test_cpu(cpu, mce_inject_cpumask) &&
179 m->inject_flags & MCJ_EXCEPTION) {
180 cpumask_clear_cpu(cpu, mce_inject_cpumask);
181 raise_exception(m, NULL);
182 }
183}
184
185
186static int raise_local(void)
187{
188 struct mce *m = this_cpu_ptr(&injectm);
189 int context = MCJ_CTX(m->inject_flags);
190 int ret = 0;
191 int cpu = m->extcpu;
192
193 if (m->inject_flags & MCJ_EXCEPTION) {
194 pr_info("Triggering MCE exception on CPU %d\n", cpu);
195 switch (context) {
196 case MCJ_CTX_IRQ:
197
198
199
200
201
202
203 case MCJ_CTX_PROCESS:
204 raise_exception(m, NULL);
205 break;
206 default:
207 pr_info("Invalid MCE context\n");
208 ret = -EINVAL;
209 }
210 pr_info("MCE exception done on CPU %d\n", cpu);
211 } else if (m->status) {
212 pr_info("Starting machine check poll CPU %d\n", cpu);
213 raise_poll(m);
214 mce_notify_irq();
215 pr_info("Machine check poll done on CPU %d\n", cpu);
216 } else
217 m->finished = 0;
218
219 return ret;
220}
221
222static void __maybe_unused raise_mce(struct mce *m)
223{
224 int context = MCJ_CTX(m->inject_flags);
225
226 inject_mce(m);
227
228 if (context == MCJ_CTX_RANDOM)
229 return;
230
231 if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) {
232 unsigned long start;
233 int cpu;
234
235 get_online_cpus();
236 cpumask_copy(mce_inject_cpumask, cpu_online_mask);
237 cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
238 for_each_online_cpu(cpu) {
239 struct mce *mcpu = &per_cpu(injectm, cpu);
240 if (!mcpu->finished ||
241 MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
242 cpumask_clear_cpu(cpu, mce_inject_cpumask);
243 }
244 if (!cpumask_empty(mce_inject_cpumask)) {
245 if (m->inject_flags & MCJ_IRQ_BROADCAST) {
246
247
248
249
250 preempt_disable();
251 smp_call_function_many(mce_inject_cpumask,
252 mce_irq_ipi, NULL, 0);
253 preempt_enable();
254 } else if (m->inject_flags & MCJ_NMI_BROADCAST)
255 apic->send_IPI_mask(mce_inject_cpumask,
256 NMI_VECTOR);
257 }
258 start = jiffies;
259 while (!cpumask_empty(mce_inject_cpumask)) {
260 if (!time_before(jiffies, start + 2*HZ)) {
261 pr_err("Timeout waiting for mce inject %lx\n",
262 *cpumask_bits(mce_inject_cpumask));
263 break;
264 }
265 cpu_relax();
266 }
267 raise_local();
268 put_cpu();
269 put_online_cpus();
270 } else {
271 preempt_disable();
272 raise_local();
273 preempt_enable();
274 }
275}
276
277static int mce_inject_raise(struct notifier_block *nb, unsigned long val,
278 void *data)
279{
280 struct mce *m = (struct mce *)data;
281
282 if (!m)
283 return NOTIFY_DONE;
284
285 mutex_lock(&mce_inject_mutex);
286 raise_mce(m);
287 mutex_unlock(&mce_inject_mutex);
288
289 return NOTIFY_DONE;
290}
291
292static struct notifier_block inject_nb = {
293 .notifier_call = mce_inject_raise,
294};
295
296
297
298
299
300static int toggle_hw_mce_inject(unsigned int cpu, bool enable)
301{
302 u32 l, h;
303 int err;
304
305 err = rdmsr_on_cpu(cpu, MSR_K7_HWCR, &l, &h);
306 if (err) {
307 pr_err("%s: error reading HWCR\n", __func__);
308 return err;
309 }
310
311 enable ? (l |= BIT(18)) : (l &= ~BIT(18));
312
313 err = wrmsr_on_cpu(cpu, MSR_K7_HWCR, l, h);
314 if (err)
315 pr_err("%s: error writing HWCR\n", __func__);
316
317 return err;
318}
319
320static int __set_inj(const char *buf)
321{
322 int i;
323
324 for (i = 0; i < N_INJ_TYPES; i++) {
325 if (!strncmp(flags_options[i], buf, strlen(flags_options[i]))) {
326 inj_type = i;
327 return 0;
328 }
329 }
330 return -EINVAL;
331}
332
333static ssize_t flags_read(struct file *filp, char __user *ubuf,
334 size_t cnt, loff_t *ppos)
335{
336 char buf[MAX_FLAG_OPT_SIZE];
337 int n;
338
339 n = sprintf(buf, "%s\n", flags_options[inj_type]);
340
341 return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
342}
343
344static ssize_t flags_write(struct file *filp, const char __user *ubuf,
345 size_t cnt, loff_t *ppos)
346{
347 char buf[MAX_FLAG_OPT_SIZE], *__buf;
348 int err;
349
350 if (cnt > MAX_FLAG_OPT_SIZE)
351 return -EINVAL;
352
353 if (copy_from_user(&buf, ubuf, cnt))
354 return -EFAULT;
355
356 buf[cnt - 1] = 0;
357
358
359 __buf = strstrip(buf);
360
361 err = __set_inj(__buf);
362 if (err) {
363 pr_err("%s: Invalid flags value: %s\n", __func__, __buf);
364 return err;
365 }
366
367 *ppos += cnt;
368
369 return cnt;
370}
371
372static const struct file_operations flags_fops = {
373 .read = flags_read,
374 .write = flags_write,
375 .llseek = generic_file_llseek,
376};
377
378
379
380
381MCE_INJECT_GET(extcpu);
382
383static int inj_extcpu_set(void *data, u64 val)
384{
385 struct mce *m = (struct mce *)data;
386
387 if (val >= nr_cpu_ids || !cpu_online(val)) {
388 pr_err("%s: Invalid CPU: %llu\n", __func__, val);
389 return -EINVAL;
390 }
391 m->extcpu = val;
392 return 0;
393}
394
395DEFINE_SIMPLE_ATTRIBUTE(extcpu_fops, inj_extcpu_get, inj_extcpu_set, "%llu\n");
396
397static void trigger_mce(void *info)
398{
399 asm volatile("int $18");
400}
401
402static void trigger_dfr_int(void *info)
403{
404 asm volatile("int %0" :: "i" (DEFERRED_ERROR_VECTOR));
405}
406
407static void trigger_thr_int(void *info)
408{
409 asm volatile("int %0" :: "i" (THRESHOLD_APIC_VECTOR));
410}
411
412static u32 get_nbc_for_node(int node_id)
413{
414 struct cpuinfo_x86 *c = &boot_cpu_data;
415 u32 cores_per_node;
416
417 cores_per_node = (c->x86_max_cores * smp_num_siblings) / amd_get_nodes_per_socket();
418
419 return cores_per_node * node_id;
420}
421
422static void toggle_nb_mca_mst_cpu(u16 nid)
423{
424 struct amd_northbridge *nb;
425 struct pci_dev *F3;
426 u32 val;
427 int err;
428
429 nb = node_to_amd_nb(nid);
430 if (!nb)
431 return;
432
433 F3 = nb->misc;
434 if (!F3)
435 return;
436
437 err = pci_read_config_dword(F3, NBCFG, &val);
438 if (err) {
439 pr_err("%s: Error reading F%dx%03x.\n",
440 __func__, PCI_FUNC(F3->devfn), NBCFG);
441 return;
442 }
443
444 if (val & BIT(27))
445 return;
446
447 pr_err("%s: Set D18F3x44[NbMcaToMstCpuEn] which BIOS hasn't done.\n",
448 __func__);
449
450 val |= BIT(27);
451 err = pci_write_config_dword(F3, NBCFG, val);
452 if (err)
453 pr_err("%s: Error writing F%dx%03x.\n",
454 __func__, PCI_FUNC(F3->devfn), NBCFG);
455}
456
457static void prepare_msrs(void *info)
458{
459 struct mce m = *(struct mce *)info;
460 u8 b = m.bank;
461
462 wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
463
464 if (boot_cpu_has(X86_FEATURE_SMCA)) {
465 if (m.inject_flags == DFR_INT_INJ) {
466 wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status);
467 wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr);
468 } else {
469 wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m.status);
470 wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
471 }
472
473 wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
474 wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
475 } else {
476 wrmsrl(MSR_IA32_MCx_STATUS(b), m.status);
477 wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr);
478 wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
479 }
480}
481
482static void do_inject(void)
483{
484 u64 mcg_status = 0;
485 unsigned int cpu = i_mce.extcpu;
486 u8 b = i_mce.bank;
487
488 i_mce.tsc = rdtsc_ordered();
489
490 if (i_mce.misc)
491 i_mce.status |= MCI_STATUS_MISCV;
492
493 if (i_mce.synd)
494 i_mce.status |= MCI_STATUS_SYNDV;
495
496 if (inj_type == SW_INJ) {
497 mce_inject_log(&i_mce);
498 return;
499 }
500
501
502 mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
503
504 if (!(i_mce.status & MCI_STATUS_PCC))
505 mcg_status |= MCG_STATUS_RIPV;
506
507
508
509
510
511
512 if (inj_type == DFR_INT_INJ) {
513 i_mce.status |= MCI_STATUS_DEFERRED;
514 i_mce.status |= (i_mce.status & ~MCI_STATUS_UC);
515 }
516
517
518
519
520
521
522 if (boot_cpu_has(X86_FEATURE_AMD_DCM) &&
523 b == 4 &&
524 boot_cpu_data.x86 < 0x17) {
525 toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu));
526 cpu = get_nbc_for_node(amd_get_nb_id(cpu));
527 }
528
529 get_online_cpus();
530 if (!cpu_online(cpu))
531 goto err;
532
533 toggle_hw_mce_inject(cpu, true);
534
535 i_mce.mcgstatus = mcg_status;
536 i_mce.inject_flags = inj_type;
537 smp_call_function_single(cpu, prepare_msrs, &i_mce, 0);
538
539 toggle_hw_mce_inject(cpu, false);
540
541 switch (inj_type) {
542 case DFR_INT_INJ:
543 smp_call_function_single(cpu, trigger_dfr_int, NULL, 0);
544 break;
545 case THR_INT_INJ:
546 smp_call_function_single(cpu, trigger_thr_int, NULL, 0);
547 break;
548 default:
549 smp_call_function_single(cpu, trigger_mce, NULL, 0);
550 }
551
552err:
553 put_online_cpus();
554
555}
556
557
558
559
560
561static int inj_bank_set(void *data, u64 val)
562{
563 struct mce *m = (struct mce *)data;
564 u8 n_banks;
565 u64 cap;
566
567
568 rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap);
569 n_banks = cap & MCG_BANKCNT_MASK;
570
571 if (val >= n_banks) {
572 pr_err("MCA bank %llu non-existent on CPU%d\n", val, m->extcpu);
573 return -EINVAL;
574 }
575
576 m->bank = val;
577 do_inject();
578
579
580 setup_inj_struct(&i_mce);
581
582 return 0;
583}
584
585MCE_INJECT_GET(bank);
586
587DEFINE_SIMPLE_ATTRIBUTE(bank_fops, inj_bank_get, inj_bank_set, "%llu\n");
588
589static const char readme_msg[] =
590"Description of the files and their usages:\n"
591"\n"
592"Note1: i refers to the bank number below.\n"
593"Note2: See respective BKDGs for the exact bit definitions of the files below\n"
594"as they mirror the hardware registers.\n"
595"\n"
596"status:\t Set MCi_STATUS: the bits in that MSR control the error type and\n"
597"\t attributes of the error which caused the MCE.\n"
598"\n"
599"misc:\t Set MCi_MISC: provide auxiliary info about the error. It is mostly\n"
600"\t used for error thresholding purposes and its validity is indicated by\n"
601"\t MCi_STATUS[MiscV].\n"
602"\n"
603"synd:\t Set MCi_SYND: provide syndrome info about the error. Only valid on\n"
604"\t Scalable MCA systems, and its validity is indicated by MCi_STATUS[SyndV].\n"
605"\n"
606"addr:\t Error address value to be written to MCi_ADDR. Log address information\n"
607"\t associated with the error.\n"
608"\n"
609"cpu:\t The CPU to inject the error on.\n"
610"\n"
611"bank:\t Specify the bank you want to inject the error into: the number of\n"
612"\t banks in a processor varies and is family/model-specific, therefore, the\n"
613"\t supplied value is sanity-checked. Setting the bank value also triggers the\n"
614"\t injection.\n"
615"\n"
616"flags:\t Injection type to be performed. Writing to this file will trigger a\n"
617"\t real machine check, an APIC interrupt or invoke the error decoder routines\n"
618"\t for AMD processors.\n"
619"\n"
620"\t Allowed error injection types:\n"
621"\t - \"sw\": Software error injection. Decode error to a human-readable \n"
622"\t format only. Safe to use.\n"
623"\t - \"hw\": Hardware error injection. Causes the #MC exception handler to \n"
624"\t handle the error. Be warned: might cause system panic if MCi_STATUS[PCC] \n"
625"\t is set. Therefore, consider setting (debugfs_mountpoint)/mce/fake_panic \n"
626"\t before injecting.\n"
627"\t - \"df\": Trigger APIC interrupt for Deferred error. Causes deferred \n"
628"\t error APIC interrupt handler to handle the error if the feature is \n"
629"\t is present in hardware. \n"
630"\t - \"th\": Trigger APIC interrupt for Threshold errors. Causes threshold \n"
631"\t APIC interrupt handler to handle the error. \n"
632"\n";
633
634static ssize_t
635inj_readme_read(struct file *filp, char __user *ubuf,
636 size_t cnt, loff_t *ppos)
637{
638 return simple_read_from_buffer(ubuf, cnt, ppos,
639 readme_msg, strlen(readme_msg));
640}
641
642static const struct file_operations readme_fops = {
643 .read = inj_readme_read,
644};
645
646static struct dfs_node {
647 char *name;
648 const struct file_operations *fops;
649 umode_t perm;
650} dfs_fls[] = {
651 { .name = "status", .fops = &status_fops, .perm = S_IRUSR | S_IWUSR },
652 { .name = "misc", .fops = &misc_fops, .perm = S_IRUSR | S_IWUSR },
653 { .name = "addr", .fops = &addr_fops, .perm = S_IRUSR | S_IWUSR },
654 { .name = "synd", .fops = &synd_fops, .perm = S_IRUSR | S_IWUSR },
655 { .name = "bank", .fops = &bank_fops, .perm = S_IRUSR | S_IWUSR },
656 { .name = "flags", .fops = &flags_fops, .perm = S_IRUSR | S_IWUSR },
657 { .name = "cpu", .fops = &extcpu_fops, .perm = S_IRUSR | S_IWUSR },
658 { .name = "README", .fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH },
659};
660
661static void __init debugfs_init(void)
662{
663 unsigned int i;
664
665 dfs_inj = debugfs_create_dir("mce-inject", NULL);
666
667 for (i = 0; i < ARRAY_SIZE(dfs_fls); i++)
668 debugfs_create_file(dfs_fls[i].name, dfs_fls[i].perm, dfs_inj,
669 &i_mce, dfs_fls[i].fops);
670}
671
672static int __init inject_init(void)
673{
674 if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
675 return -ENOMEM;
676
677 debugfs_init();
678
679 register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
680 mce_register_injector_chain(&inject_nb);
681
682 setup_inj_struct(&i_mce);
683
684 pr_info("Machine check injector initialized\n");
685
686 return 0;
687}
688
689static void __exit inject_exit(void)
690{
691
692 mce_unregister_injector_chain(&inject_nb);
693 unregister_nmi_handler(NMI_LOCAL, "mce_notify");
694
695 debugfs_remove_recursive(dfs_inj);
696 dfs_inj = NULL;
697
698 memset(&dfs_fls, 0, sizeof(dfs_fls));
699
700 free_cpumask_var(mce_inject_cpumask);
701}
702
703module_init(inject_init);
704module_exit(inject_exit);
705MODULE_LICENSE("GPL");
706