1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/cpu.h>
20#include <linux/debugfs.h>
21#include <linux/kernel.h>
22#include <linux/module.h>
23#include <linux/notifier.h>
24#include <linux/pci.h>
25#include <linux/uaccess.h>
26
27#include <asm/amd_nb.h>
28#include <asm/apic.h>
29#include <asm/irq_vectors.h>
30#include <asm/mce.h>
31#include <asm/nmi.h>
32#include <asm/smp.h>
33
34#include "internal.h"
35
36
37
38
39static struct mce i_mce;
40static struct dentry *dfs_inj;
41
42#define MAX_FLAG_OPT_SIZE 4
43#define NBCFG 0x44
44
45enum injection_type {
46 SW_INJ = 0,
47 HW_INJ,
48 DFR_INT_INJ,
49 THR_INT_INJ,
50 N_INJ_TYPES,
51};
52
53static const char * const flags_options[] = {
54 [SW_INJ] = "sw",
55 [HW_INJ] = "hw",
56 [DFR_INT_INJ] = "df",
57 [THR_INT_INJ] = "th",
58 NULL
59};
60
61
62static enum injection_type inj_type = SW_INJ;
63
64#define MCE_INJECT_SET(reg) \
65static int inj_##reg##_set(void *data, u64 val) \
66{ \
67 struct mce *m = (struct mce *)data; \
68 \
69 m->reg = val; \
70 return 0; \
71}
72
73MCE_INJECT_SET(status);
74MCE_INJECT_SET(misc);
75MCE_INJECT_SET(addr);
76MCE_INJECT_SET(synd);
77MCE_INJECT_SET(ipid);
78
79#define MCE_INJECT_GET(reg) \
80static int inj_##reg##_get(void *data, u64 *val) \
81{ \
82 struct mce *m = (struct mce *)data; \
83 \
84 *val = m->reg; \
85 return 0; \
86}
87
88MCE_INJECT_GET(status);
89MCE_INJECT_GET(misc);
90MCE_INJECT_GET(addr);
91MCE_INJECT_GET(synd);
92MCE_INJECT_GET(ipid);
93
94DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n");
95DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n");
96DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n");
97DEFINE_SIMPLE_ATTRIBUTE(synd_fops, inj_synd_get, inj_synd_set, "%llx\n");
98DEFINE_SIMPLE_ATTRIBUTE(ipid_fops, inj_ipid_get, inj_ipid_set, "%llx\n");
99
100static void setup_inj_struct(struct mce *m)
101{
102 memset(m, 0, sizeof(struct mce));
103
104 m->cpuvendor = boot_cpu_data.x86_vendor;
105 m->time = ktime_get_real_seconds();
106 m->cpuid = cpuid_eax(1);
107 m->microcode = boot_cpu_data.microcode;
108}
109
110
111static void inject_mce(struct mce *m)
112{
113 struct mce *i = &per_cpu(injectm, m->extcpu);
114
115
116 i->finished = 0;
117 mb();
118 m->finished = 0;
119
120 i->extcpu = m->extcpu;
121 mb();
122
123 memcpy(i, m, sizeof(struct mce));
124
125 mb();
126 i->finished = 1;
127}
128
129static void raise_poll(struct mce *m)
130{
131 unsigned long flags;
132 mce_banks_t b;
133
134 memset(&b, 0xff, sizeof(mce_banks_t));
135 local_irq_save(flags);
136 machine_check_poll(0, &b);
137 local_irq_restore(flags);
138 m->finished = 0;
139}
140
141static void raise_exception(struct mce *m, struct pt_regs *pregs)
142{
143 struct pt_regs regs;
144 unsigned long flags;
145
146 if (!pregs) {
147 memset(®s, 0, sizeof(struct pt_regs));
148 regs.ip = m->ip;
149 regs.cs = m->cs;
150 pregs = ®s;
151 }
152
153 local_irq_save(flags);
154 do_machine_check(pregs);
155 local_irq_restore(flags);
156 m->finished = 0;
157}
158
159static cpumask_var_t mce_inject_cpumask;
160static DEFINE_MUTEX(mce_inject_mutex);
161
162static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
163{
164 int cpu = smp_processor_id();
165 struct mce *m = this_cpu_ptr(&injectm);
166 if (!cpumask_test_cpu(cpu, mce_inject_cpumask))
167 return NMI_DONE;
168 cpumask_clear_cpu(cpu, mce_inject_cpumask);
169 if (m->inject_flags & MCJ_EXCEPTION)
170 raise_exception(m, regs);
171 else if (m->status)
172 raise_poll(m);
173 return NMI_HANDLED;
174}
175
176static void mce_irq_ipi(void *info)
177{
178 int cpu = smp_processor_id();
179 struct mce *m = this_cpu_ptr(&injectm);
180
181 if (cpumask_test_cpu(cpu, mce_inject_cpumask) &&
182 m->inject_flags & MCJ_EXCEPTION) {
183 cpumask_clear_cpu(cpu, mce_inject_cpumask);
184 raise_exception(m, NULL);
185 }
186}
187
188
189static int raise_local(void)
190{
191 struct mce *m = this_cpu_ptr(&injectm);
192 int context = MCJ_CTX(m->inject_flags);
193 int ret = 0;
194 int cpu = m->extcpu;
195
196 if (m->inject_flags & MCJ_EXCEPTION) {
197 pr_info("Triggering MCE exception on CPU %d\n", cpu);
198 switch (context) {
199 case MCJ_CTX_IRQ:
200
201
202
203
204
205 fallthrough;
206 case MCJ_CTX_PROCESS:
207 raise_exception(m, NULL);
208 break;
209 default:
210 pr_info("Invalid MCE context\n");
211 ret = -EINVAL;
212 }
213 pr_info("MCE exception done on CPU %d\n", cpu);
214 } else if (m->status) {
215 pr_info("Starting machine check poll CPU %d\n", cpu);
216 raise_poll(m);
217 mce_notify_irq();
218 pr_info("Machine check poll done on CPU %d\n", cpu);
219 } else
220 m->finished = 0;
221
222 return ret;
223}
224
225static void __maybe_unused raise_mce(struct mce *m)
226{
227 int context = MCJ_CTX(m->inject_flags);
228
229 inject_mce(m);
230
231 if (context == MCJ_CTX_RANDOM)
232 return;
233
234 if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) {
235 unsigned long start;
236 int cpu;
237
238 cpus_read_lock();
239 cpumask_copy(mce_inject_cpumask, cpu_online_mask);
240 cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
241 for_each_online_cpu(cpu) {
242 struct mce *mcpu = &per_cpu(injectm, cpu);
243 if (!mcpu->finished ||
244 MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
245 cpumask_clear_cpu(cpu, mce_inject_cpumask);
246 }
247 if (!cpumask_empty(mce_inject_cpumask)) {
248 if (m->inject_flags & MCJ_IRQ_BROADCAST) {
249
250
251
252
253 preempt_disable();
254 smp_call_function_many(mce_inject_cpumask,
255 mce_irq_ipi, NULL, 0);
256 preempt_enable();
257 } else if (m->inject_flags & MCJ_NMI_BROADCAST)
258 apic->send_IPI_mask(mce_inject_cpumask,
259 NMI_VECTOR);
260 }
261 start = jiffies;
262 while (!cpumask_empty(mce_inject_cpumask)) {
263 if (!time_before(jiffies, start + 2*HZ)) {
264 pr_err("Timeout waiting for mce inject %lx\n",
265 *cpumask_bits(mce_inject_cpumask));
266 break;
267 }
268 cpu_relax();
269 }
270 raise_local();
271 put_cpu();
272 cpus_read_unlock();
273 } else {
274 preempt_disable();
275 raise_local();
276 preempt_enable();
277 }
278}
279
280static int mce_inject_raise(struct notifier_block *nb, unsigned long val,
281 void *data)
282{
283 struct mce *m = (struct mce *)data;
284
285 if (!m)
286 return NOTIFY_DONE;
287
288 mutex_lock(&mce_inject_mutex);
289 raise_mce(m);
290 mutex_unlock(&mce_inject_mutex);
291
292 return NOTIFY_DONE;
293}
294
295static struct notifier_block inject_nb = {
296 .notifier_call = mce_inject_raise,
297};
298
299
300
301
302
303static int toggle_hw_mce_inject(unsigned int cpu, bool enable)
304{
305 u32 l, h;
306 int err;
307
308 err = rdmsr_on_cpu(cpu, MSR_K7_HWCR, &l, &h);
309 if (err) {
310 pr_err("%s: error reading HWCR\n", __func__);
311 return err;
312 }
313
314 enable ? (l |= BIT(18)) : (l &= ~BIT(18));
315
316 err = wrmsr_on_cpu(cpu, MSR_K7_HWCR, l, h);
317 if (err)
318 pr_err("%s: error writing HWCR\n", __func__);
319
320 return err;
321}
322
323static int __set_inj(const char *buf)
324{
325 int i;
326
327 for (i = 0; i < N_INJ_TYPES; i++) {
328 if (!strncmp(flags_options[i], buf, strlen(flags_options[i]))) {
329 inj_type = i;
330 return 0;
331 }
332 }
333 return -EINVAL;
334}
335
336static ssize_t flags_read(struct file *filp, char __user *ubuf,
337 size_t cnt, loff_t *ppos)
338{
339 char buf[MAX_FLAG_OPT_SIZE];
340 int n;
341
342 n = sprintf(buf, "%s\n", flags_options[inj_type]);
343
344 return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
345}
346
347static ssize_t flags_write(struct file *filp, const char __user *ubuf,
348 size_t cnt, loff_t *ppos)
349{
350 char buf[MAX_FLAG_OPT_SIZE], *__buf;
351 int err;
352
353 if (cnt > MAX_FLAG_OPT_SIZE)
354 return -EINVAL;
355
356 if (copy_from_user(&buf, ubuf, cnt))
357 return -EFAULT;
358
359 buf[cnt - 1] = 0;
360
361
362 __buf = strstrip(buf);
363
364 err = __set_inj(__buf);
365 if (err) {
366 pr_err("%s: Invalid flags value: %s\n", __func__, __buf);
367 return err;
368 }
369
370 *ppos += cnt;
371
372 return cnt;
373}
374
375static const struct file_operations flags_fops = {
376 .read = flags_read,
377 .write = flags_write,
378 .llseek = generic_file_llseek,
379};
380
381
382
383
384MCE_INJECT_GET(extcpu);
385
386static int inj_extcpu_set(void *data, u64 val)
387{
388 struct mce *m = (struct mce *)data;
389
390 if (val >= nr_cpu_ids || !cpu_online(val)) {
391 pr_err("%s: Invalid CPU: %llu\n", __func__, val);
392 return -EINVAL;
393 }
394 m->extcpu = val;
395 return 0;
396}
397
398DEFINE_SIMPLE_ATTRIBUTE(extcpu_fops, inj_extcpu_get, inj_extcpu_set, "%llu\n");
399
400static void trigger_mce(void *info)
401{
402 asm volatile("int $18");
403}
404
405static void trigger_dfr_int(void *info)
406{
407 asm volatile("int %0" :: "i" (DEFERRED_ERROR_VECTOR));
408}
409
410static void trigger_thr_int(void *info)
411{
412 asm volatile("int %0" :: "i" (THRESHOLD_APIC_VECTOR));
413}
414
415static u32 get_nbc_for_node(int node_id)
416{
417 struct cpuinfo_x86 *c = &boot_cpu_data;
418 u32 cores_per_node;
419
420 cores_per_node = (c->x86_max_cores * smp_num_siblings) / amd_get_nodes_per_socket();
421
422 return cores_per_node * node_id;
423}
424
425static void toggle_nb_mca_mst_cpu(u16 nid)
426{
427 struct amd_northbridge *nb;
428 struct pci_dev *F3;
429 u32 val;
430 int err;
431
432 nb = node_to_amd_nb(nid);
433 if (!nb)
434 return;
435
436 F3 = nb->misc;
437 if (!F3)
438 return;
439
440 err = pci_read_config_dword(F3, NBCFG, &val);
441 if (err) {
442 pr_err("%s: Error reading F%dx%03x.\n",
443 __func__, PCI_FUNC(F3->devfn), NBCFG);
444 return;
445 }
446
447 if (val & BIT(27))
448 return;
449
450 pr_err("%s: Set D18F3x44[NbMcaToMstCpuEn] which BIOS hasn't done.\n",
451 __func__);
452
453 val |= BIT(27);
454 err = pci_write_config_dword(F3, NBCFG, val);
455 if (err)
456 pr_err("%s: Error writing F%dx%03x.\n",
457 __func__, PCI_FUNC(F3->devfn), NBCFG);
458}
459
460static void prepare_msrs(void *info)
461{
462 struct mce m = *(struct mce *)info;
463 u8 b = m.bank;
464
465 wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
466
467 if (boot_cpu_has(X86_FEATURE_SMCA)) {
468 if (m.inject_flags == DFR_INT_INJ) {
469 wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status);
470 wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr);
471 } else {
472 wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m.status);
473 wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
474 }
475
476 wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
477 wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
478 } else {
479 wrmsrl(MSR_IA32_MCx_STATUS(b), m.status);
480 wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr);
481 wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
482 }
483}
484
485static void do_inject(void)
486{
487 u64 mcg_status = 0;
488 unsigned int cpu = i_mce.extcpu;
489 u8 b = i_mce.bank;
490
491 i_mce.tsc = rdtsc_ordered();
492
493 if (i_mce.misc)
494 i_mce.status |= MCI_STATUS_MISCV;
495
496 if (i_mce.synd)
497 i_mce.status |= MCI_STATUS_SYNDV;
498
499 if (inj_type == SW_INJ) {
500 mce_log(&i_mce);
501 return;
502 }
503
504
505 mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
506
507 if (!(i_mce.status & MCI_STATUS_PCC))
508 mcg_status |= MCG_STATUS_RIPV;
509
510
511
512
513
514
515 if (inj_type == DFR_INT_INJ) {
516 i_mce.status |= MCI_STATUS_DEFERRED;
517 i_mce.status &= ~MCI_STATUS_UC;
518 }
519
520
521
522
523
524
525 if (boot_cpu_has(X86_FEATURE_AMD_DCM) &&
526 b == 4 &&
527 boot_cpu_data.x86 < 0x17) {
528 toggle_nb_mca_mst_cpu(topology_die_id(cpu));
529 cpu = get_nbc_for_node(topology_die_id(cpu));
530 }
531
532 cpus_read_lock();
533 if (!cpu_online(cpu))
534 goto err;
535
536 toggle_hw_mce_inject(cpu, true);
537
538 i_mce.mcgstatus = mcg_status;
539 i_mce.inject_flags = inj_type;
540 smp_call_function_single(cpu, prepare_msrs, &i_mce, 0);
541
542 toggle_hw_mce_inject(cpu, false);
543
544 switch (inj_type) {
545 case DFR_INT_INJ:
546 smp_call_function_single(cpu, trigger_dfr_int, NULL, 0);
547 break;
548 case THR_INT_INJ:
549 smp_call_function_single(cpu, trigger_thr_int, NULL, 0);
550 break;
551 default:
552 smp_call_function_single(cpu, trigger_mce, NULL, 0);
553 }
554
555err:
556 cpus_read_unlock();
557
558}
559
560
561
562
563
564static int inj_bank_set(void *data, u64 val)
565{
566 struct mce *m = (struct mce *)data;
567 u8 n_banks;
568 u64 cap;
569
570
571 rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap);
572 n_banks = cap & MCG_BANKCNT_MASK;
573
574 if (val >= n_banks) {
575 pr_err("MCA bank %llu non-existent on CPU%d\n", val, m->extcpu);
576 return -EINVAL;
577 }
578
579 m->bank = val;
580 do_inject();
581
582
583 setup_inj_struct(&i_mce);
584
585 return 0;
586}
587
588MCE_INJECT_GET(bank);
589
590DEFINE_SIMPLE_ATTRIBUTE(bank_fops, inj_bank_get, inj_bank_set, "%llu\n");
591
592static const char readme_msg[] =
593"Description of the files and their usages:\n"
594"\n"
595"Note1: i refers to the bank number below.\n"
596"Note2: See respective BKDGs for the exact bit definitions of the files below\n"
597"as they mirror the hardware registers.\n"
598"\n"
599"status:\t Set MCi_STATUS: the bits in that MSR control the error type and\n"
600"\t attributes of the error which caused the MCE.\n"
601"\n"
602"misc:\t Set MCi_MISC: provide auxiliary info about the error. It is mostly\n"
603"\t used for error thresholding purposes and its validity is indicated by\n"
604"\t MCi_STATUS[MiscV].\n"
605"\n"
606"synd:\t Set MCi_SYND: provide syndrome info about the error. Only valid on\n"
607"\t Scalable MCA systems, and its validity is indicated by MCi_STATUS[SyndV].\n"
608"\n"
609"addr:\t Error address value to be written to MCi_ADDR. Log address information\n"
610"\t associated with the error.\n"
611"\n"
612"cpu:\t The CPU to inject the error on.\n"
613"\n"
614"bank:\t Specify the bank you want to inject the error into: the number of\n"
615"\t banks in a processor varies and is family/model-specific, therefore, the\n"
616"\t supplied value is sanity-checked. Setting the bank value also triggers the\n"
617"\t injection.\n"
618"\n"
619"flags:\t Injection type to be performed. Writing to this file will trigger a\n"
620"\t real machine check, an APIC interrupt or invoke the error decoder routines\n"
621"\t for AMD processors.\n"
622"\n"
623"\t Allowed error injection types:\n"
624"\t - \"sw\": Software error injection. Decode error to a human-readable \n"
625"\t format only. Safe to use.\n"
626"\t - \"hw\": Hardware error injection. Causes the #MC exception handler to \n"
627"\t handle the error. Be warned: might cause system panic if MCi_STATUS[PCC] \n"
628"\t is set. Therefore, consider setting (debugfs_mountpoint)/mce/fake_panic \n"
629"\t before injecting.\n"
630"\t - \"df\": Trigger APIC interrupt for Deferred error. Causes deferred \n"
631"\t error APIC interrupt handler to handle the error if the feature is \n"
632"\t is present in hardware. \n"
633"\t - \"th\": Trigger APIC interrupt for Threshold errors. Causes threshold \n"
634"\t APIC interrupt handler to handle the error. \n"
635"\n"
636"ipid:\t IPID (AMD-specific)\n"
637"\n";
638
639static ssize_t
640inj_readme_read(struct file *filp, char __user *ubuf,
641 size_t cnt, loff_t *ppos)
642{
643 return simple_read_from_buffer(ubuf, cnt, ppos,
644 readme_msg, strlen(readme_msg));
645}
646
647static const struct file_operations readme_fops = {
648 .read = inj_readme_read,
649};
650
651static struct dfs_node {
652 char *name;
653 const struct file_operations *fops;
654 umode_t perm;
655} dfs_fls[] = {
656 { .name = "status", .fops = &status_fops, .perm = S_IRUSR | S_IWUSR },
657 { .name = "misc", .fops = &misc_fops, .perm = S_IRUSR | S_IWUSR },
658 { .name = "addr", .fops = &addr_fops, .perm = S_IRUSR | S_IWUSR },
659 { .name = "synd", .fops = &synd_fops, .perm = S_IRUSR | S_IWUSR },
660 { .name = "ipid", .fops = &ipid_fops, .perm = S_IRUSR | S_IWUSR },
661 { .name = "bank", .fops = &bank_fops, .perm = S_IRUSR | S_IWUSR },
662 { .name = "flags", .fops = &flags_fops, .perm = S_IRUSR | S_IWUSR },
663 { .name = "cpu", .fops = &extcpu_fops, .perm = S_IRUSR | S_IWUSR },
664 { .name = "README", .fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH },
665};
666
667static void __init debugfs_init(void)
668{
669 unsigned int i;
670
671 dfs_inj = debugfs_create_dir("mce-inject", NULL);
672
673 for (i = 0; i < ARRAY_SIZE(dfs_fls); i++)
674 debugfs_create_file(dfs_fls[i].name, dfs_fls[i].perm, dfs_inj,
675 &i_mce, dfs_fls[i].fops);
676}
677
678static int __init inject_init(void)
679{
680 if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
681 return -ENOMEM;
682
683 debugfs_init();
684
685 register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
686 mce_register_injector_chain(&inject_nb);
687
688 setup_inj_struct(&i_mce);
689
690 pr_info("Machine check injector initialized\n");
691
692 return 0;
693}
694
695static void __exit inject_exit(void)
696{
697
698 mce_unregister_injector_chain(&inject_nb);
699 unregister_nmi_handler(NMI_LOCAL, "mce_notify");
700
701 debugfs_remove_recursive(dfs_inj);
702 dfs_inj = NULL;
703
704 memset(&dfs_fls, 0, sizeof(dfs_fls));
705
706 free_cpumask_var(mce_inject_cpumask);
707}
708
709module_init(inject_init);
710module_exit(inject_exit);
711MODULE_LICENSE("GPL");
712