1#include <linux/module.h>
2#include <linux/slab.h>
3
4#include "mce_amd.h"
5
6static struct amd_decoder_ops *fam_ops;
7
8static u8 xec_mask = 0xf;
9
10static bool report_gart_errors;
11static void (*decode_dram_ecc)(int node_id, struct mce *m);
12
13void amd_report_gart_errors(bool v)
14{
15 report_gart_errors = v;
16}
17EXPORT_SYMBOL_GPL(amd_report_gart_errors);
18
19void amd_register_ecc_decoder(void (*f)(int, struct mce *))
20{
21 decode_dram_ecc = f;
22}
23EXPORT_SYMBOL_GPL(amd_register_ecc_decoder);
24
25void amd_unregister_ecc_decoder(void (*f)(int, struct mce *))
26{
27 if (decode_dram_ecc) {
28 WARN_ON(decode_dram_ecc != f);
29
30 decode_dram_ecc = NULL;
31 }
32}
33EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);
34
35
36
37
38
39
40
41static const char * const tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" };
42
43
44static const char * const ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" };
45
46
47static const char * const rrrr_msgs[] = {
48 "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP"
49};
50
51
52const char * const pp_msgs[] = { "SRC", "RES", "OBS", "GEN" };
53EXPORT_SYMBOL_GPL(pp_msgs);
54
55
56static const char * const to_msgs[] = { "no timeout", "timed out" };
57
58
59static const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
60
61
62static const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" };
63
64static const char * const f15h_mc1_mce_desc[] = {
65 "UC during a demand linefill from L2",
66 "Parity error during data load from IC",
67 "Parity error for IC valid bit",
68 "Main tag parity error",
69 "Parity error in prediction queue",
70 "PFB data/address parity error",
71 "Parity error in the branch status reg",
72 "PFB promotion address error",
73 "Tag error during probe/victimization",
74 "Parity error for IC probe tag valid bit",
75 "PFB non-cacheable bit parity error",
76 "PFB valid bit parity error",
77 "Microcode Patch Buffer",
78 "uop queue",
79 "insn buffer",
80 "predecode buffer",
81 "fetch address FIFO",
82 "dispatch uop queue"
83};
84
85static const char * const f15h_mc2_mce_desc[] = {
86 "Fill ECC error on data fills",
87 "Fill parity error on insn fills",
88 "Prefetcher request FIFO parity error",
89 "PRQ address parity error",
90 "PRQ data parity error",
91 "WCC Tag ECC error",
92 "WCC Data ECC error",
93 "WCB Data parity error",
94 "VB Data ECC or parity error",
95 "L2 Tag ECC error",
96 "Hard L2 Tag ECC error",
97 "Multiple hits on L2 tag",
98 "XAB parity error",
99 "PRB address parity error"
100};
101
102static const char * const mc4_mce_desc[] = {
103 "DRAM ECC error detected on the NB",
104 "CRC error detected on HT link",
105 "Link-defined sync error packets detected on HT link",
106 "HT Master abort",
107 "HT Target abort",
108 "Invalid GART PTE entry during GART table walk",
109 "Unsupported atomic RMW received from an IO link",
110 "Watchdog timeout due to lack of progress",
111 "DRAM ECC error detected on the NB",
112 "SVM DMA Exclusion Vector error",
113 "HT data error detected on link",
114 "Protocol error (link, L3, probe filter)",
115 "NB internal arrays parity error",
116 "DRAM addr/ctl signals parity error",
117 "IO link transmission error",
118 "L3 data cache ECC error",
119 "L3 cache tag error",
120 "L3 LRU parity bits error",
121 "ECC Error in the Probe Filter directory"
122};
123
124static const char * const mc5_mce_desc[] = {
125 "CPU Watchdog timer expire",
126 "Wakeup array dest tag",
127 "AG payload array",
128 "EX payload array",
129 "IDRF array",
130 "Retire dispatch queue",
131 "Mapper checkpoint array",
132 "Physical register file EX0 port",
133 "Physical register file EX1 port",
134 "Physical register file AG0 port",
135 "Physical register file AG1 port",
136 "Flag register file",
137 "DE error occurred",
138 "Retire status queue"
139};
140
141
142static const char * const smca_ls_mce_desc[] = {
143 "Load queue parity",
144 "Store queue parity",
145 "Miss address buffer payload parity",
146 "L1 TLB parity",
147 "Reserved",
148 "DC tag error type 6",
149 "DC tag error type 1",
150 "Internal error type 1",
151 "Internal error type 2",
152 "Sys Read data error thread 0",
153 "Sys read data error thread 1",
154 "DC tag error type 2",
155 "DC data error type 1 (poison comsumption)",
156 "DC data error type 2",
157 "DC data error type 3",
158 "DC tag error type 4",
159 "L2 TLB parity",
160 "PDC parity error",
161 "DC tag error type 3",
162 "DC tag error type 5",
163 "L2 fill data error",
164};
165
166static const char * const smca_if_mce_desc[] = {
167 "microtag probe port parity error",
168 "IC microtag or full tag multi-hit error",
169 "IC full tag parity",
170 "IC data array parity",
171 "Decoupling queue phys addr parity error",
172 "L0 ITLB parity error",
173 "L1 ITLB parity error",
174 "L2 ITLB parity error",
175 "BPQ snoop parity on Thread 0",
176 "BPQ snoop parity on Thread 1",
177 "L1 BTB multi-match error",
178 "L2 BTB multi-match error",
179 "L2 Cache Response Poison error",
180 "System Read Data error",
181};
182
183static const char * const smca_l2_mce_desc[] = {
184 "L2M tag multi-way-hit error",
185 "L2M tag ECC error",
186 "L2M data ECC error",
187 "HW assert",
188};
189
190static const char * const smca_de_mce_desc[] = {
191 "uop cache tag parity error",
192 "uop cache data parity error",
193 "Insn buffer parity error",
194 "uop queue parity error",
195 "Insn dispatch queue parity error",
196 "Fetch address FIFO parity",
197 "Patch RAM data parity",
198 "Patch RAM sequencer parity",
199 "uop buffer parity"
200};
201
202static const char * const smca_ex_mce_desc[] = {
203 "Watchdog timeout error",
204 "Phy register file parity",
205 "Flag register file parity",
206 "Immediate displacement register file parity",
207 "Address generator payload parity",
208 "EX payload parity",
209 "Checkpoint queue parity",
210 "Retire dispatch queue parity",
211 "Retire status queue parity error",
212 "Scheduling queue parity error",
213 "Branch buffer queue parity error",
214};
215
216static const char * const smca_fp_mce_desc[] = {
217 "Physical register file parity",
218 "Freelist parity error",
219 "Schedule queue parity",
220 "NSQ parity error",
221 "Retire queue parity",
222 "Status register file parity",
223 "Hardware assertion",
224};
225
226static const char * const smca_l3_mce_desc[] = {
227 "Shadow tag macro ECC error",
228 "Shadow tag macro multi-way-hit error",
229 "L3M tag ECC error",
230 "L3M tag multi-way-hit error",
231 "L3M data ECC error",
232 "XI parity, L3 fill done channel error",
233 "L3 victim queue parity",
234 "L3 HW assert",
235};
236
237static const char * const smca_cs_mce_desc[] = {
238 "Illegal request from transport layer",
239 "Address violation",
240 "Security violation",
241 "Illegal response from transport layer",
242 "Unexpected response",
243 "Parity error on incoming request or probe response data",
244 "Parity error on incoming read response data",
245 "Atomic request parity",
246 "ECC error on probe filter access",
247};
248
249static const char * const smca_pie_mce_desc[] = {
250 "HW assert",
251 "Internal PIE register security violation",
252 "Error on GMI link",
253 "Poison data written to internal PIE register",
254};
255
256static const char * const smca_umc_mce_desc[] = {
257 "DRAM ECC error",
258 "Data poison error on DRAM",
259 "SDP parity error",
260 "Advanced peripheral bus error",
261 "Command/address parity error",
262 "Write data CRC error",
263};
264
265static const char * const smca_pb_mce_desc[] = {
266 "Parameter Block RAM ECC error",
267};
268
269static const char * const smca_psp_mce_desc[] = {
270 "PSP RAM ECC or parity error",
271};
272
273static const char * const smca_smu_mce_desc[] = {
274 "SMU RAM ECC or parity error",
275};
276
277struct smca_mce_desc {
278 const char * const *descs;
279 unsigned int num_descs;
280};
281
282static struct smca_mce_desc smca_mce_descs[] = {
283 [SMCA_LS] = { smca_ls_mce_desc, ARRAY_SIZE(smca_ls_mce_desc) },
284 [SMCA_IF] = { smca_if_mce_desc, ARRAY_SIZE(smca_if_mce_desc) },
285 [SMCA_L2_CACHE] = { smca_l2_mce_desc, ARRAY_SIZE(smca_l2_mce_desc) },
286 [SMCA_DE] = { smca_de_mce_desc, ARRAY_SIZE(smca_de_mce_desc) },
287 [SMCA_EX] = { smca_ex_mce_desc, ARRAY_SIZE(smca_ex_mce_desc) },
288 [SMCA_FP] = { smca_fp_mce_desc, ARRAY_SIZE(smca_fp_mce_desc) },
289 [SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) },
290 [SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) },
291 [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) },
292 [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) },
293 [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) },
294 [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) },
295 [SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) },
296};
297
298static bool f12h_mc0_mce(u16 ec, u8 xec)
299{
300 bool ret = false;
301
302 if (MEM_ERROR(ec)) {
303 u8 ll = LL(ec);
304 ret = true;
305
306 if (ll == LL_L2)
307 pr_cont("during L1 linefill from L2.\n");
308 else if (ll == LL_L1)
309 pr_cont("Data/Tag %s error.\n", R4_MSG(ec));
310 else
311 ret = false;
312 }
313 return ret;
314}
315
316static bool f10h_mc0_mce(u16 ec, u8 xec)
317{
318 if (R4(ec) == R4_GEN && LL(ec) == LL_L1) {
319 pr_cont("during data scrub.\n");
320 return true;
321 }
322 return f12h_mc0_mce(ec, xec);
323}
324
325static bool k8_mc0_mce(u16 ec, u8 xec)
326{
327 if (BUS_ERROR(ec)) {
328 pr_cont("during system linefill.\n");
329 return true;
330 }
331
332 return f10h_mc0_mce(ec, xec);
333}
334
335static bool cat_mc0_mce(u16 ec, u8 xec)
336{
337 u8 r4 = R4(ec);
338 bool ret = true;
339
340 if (MEM_ERROR(ec)) {
341
342 if (TT(ec) != TT_DATA || LL(ec) != LL_L1)
343 return false;
344
345 switch (r4) {
346 case R4_DRD:
347 case R4_DWR:
348 pr_cont("Data/Tag parity error due to %s.\n",
349 (r4 == R4_DRD ? "load/hw prf" : "store"));
350 break;
351 case R4_EVICT:
352 pr_cont("Copyback parity error on a tag miss.\n");
353 break;
354 case R4_SNOOP:
355 pr_cont("Tag parity error during snoop.\n");
356 break;
357 default:
358 ret = false;
359 }
360 } else if (BUS_ERROR(ec)) {
361
362 if ((II(ec) != II_MEM && II(ec) != II_IO) || LL(ec) != LL_LG)
363 return false;
364
365 pr_cont("System read data error on a ");
366
367 switch (r4) {
368 case R4_RD:
369 pr_cont("TLB reload.\n");
370 break;
371 case R4_DWR:
372 pr_cont("store.\n");
373 break;
374 case R4_DRD:
375 pr_cont("load.\n");
376 break;
377 default:
378 ret = false;
379 }
380 } else {
381 ret = false;
382 }
383
384 return ret;
385}
386
387static bool f15h_mc0_mce(u16 ec, u8 xec)
388{
389 bool ret = true;
390
391 if (MEM_ERROR(ec)) {
392
393 switch (xec) {
394 case 0x0:
395 pr_cont("Data Array access error.\n");
396 break;
397
398 case 0x1:
399 pr_cont("UC error during a linefill from L2/NB.\n");
400 break;
401
402 case 0x2:
403 case 0x11:
404 pr_cont("STQ access error.\n");
405 break;
406
407 case 0x3:
408 pr_cont("SCB access error.\n");
409 break;
410
411 case 0x10:
412 pr_cont("Tag error.\n");
413 break;
414
415 case 0x12:
416 pr_cont("LDQ access error.\n");
417 break;
418
419 default:
420 ret = false;
421 }
422 } else if (BUS_ERROR(ec)) {
423
424 if (!xec)
425 pr_cont("System Read Data Error.\n");
426 else
427 pr_cont(" Internal error condition type %d.\n", xec);
428 } else if (INT_ERROR(ec)) {
429 if (xec <= 0x1f)
430 pr_cont("Hardware Assert.\n");
431 else
432 ret = false;
433
434 } else
435 ret = false;
436
437 return ret;
438}
439
440static void decode_mc0_mce(struct mce *m)
441{
442 u16 ec = EC(m->status);
443 u8 xec = XEC(m->status, xec_mask);
444
445 pr_emerg(HW_ERR "MC0 Error: ");
446
447
448 if (TLB_ERROR(ec)) {
449 if (TT(ec) == TT_DATA) {
450 pr_cont("%s TLB %s.\n", LL_MSG(ec),
451 ((xec == 2) ? "locked miss"
452 : (xec ? "multimatch" : "parity")));
453 return;
454 }
455 } else if (fam_ops->mc0_mce(ec, xec))
456 ;
457 else
458 pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n");
459}
460
461static bool k8_mc1_mce(u16 ec, u8 xec)
462{
463 u8 ll = LL(ec);
464 bool ret = true;
465
466 if (!MEM_ERROR(ec))
467 return false;
468
469 if (ll == 0x2)
470 pr_cont("during a linefill from L2.\n");
471 else if (ll == 0x1) {
472 switch (R4(ec)) {
473 case R4_IRD:
474 pr_cont("Parity error during data load.\n");
475 break;
476
477 case R4_EVICT:
478 pr_cont("Copyback Parity/Victim error.\n");
479 break;
480
481 case R4_SNOOP:
482 pr_cont("Tag Snoop error.\n");
483 break;
484
485 default:
486 ret = false;
487 break;
488 }
489 } else
490 ret = false;
491
492 return ret;
493}
494
495static bool cat_mc1_mce(u16 ec, u8 xec)
496{
497 u8 r4 = R4(ec);
498 bool ret = true;
499
500 if (!MEM_ERROR(ec))
501 return false;
502
503 if (TT(ec) != TT_INSTR)
504 return false;
505
506 if (r4 == R4_IRD)
507 pr_cont("Data/tag array parity error for a tag hit.\n");
508 else if (r4 == R4_SNOOP)
509 pr_cont("Tag error during snoop/victimization.\n");
510 else if (xec == 0x0)
511 pr_cont("Tag parity error from victim castout.\n");
512 else if (xec == 0x2)
513 pr_cont("Microcode patch RAM parity error.\n");
514 else
515 ret = false;
516
517 return ret;
518}
519
520static bool f15h_mc1_mce(u16 ec, u8 xec)
521{
522 bool ret = true;
523
524 if (!MEM_ERROR(ec))
525 return false;
526
527 switch (xec) {
528 case 0x0 ... 0xa:
529 pr_cont("%s.\n", f15h_mc1_mce_desc[xec]);
530 break;
531
532 case 0xd:
533 pr_cont("%s.\n", f15h_mc1_mce_desc[xec-2]);
534 break;
535
536 case 0x10:
537 pr_cont("%s.\n", f15h_mc1_mce_desc[xec-4]);
538 break;
539
540 case 0x11 ... 0x15:
541 pr_cont("Decoder %s parity error.\n", f15h_mc1_mce_desc[xec-4]);
542 break;
543
544 default:
545 ret = false;
546 }
547 return ret;
548}
549
550static void decode_mc1_mce(struct mce *m)
551{
552 u16 ec = EC(m->status);
553 u8 xec = XEC(m->status, xec_mask);
554
555 pr_emerg(HW_ERR "MC1 Error: ");
556
557 if (TLB_ERROR(ec))
558 pr_cont("%s TLB %s.\n", LL_MSG(ec),
559 (xec ? "multimatch" : "parity error"));
560 else if (BUS_ERROR(ec)) {
561 bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
562
563 pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
564 } else if (INT_ERROR(ec)) {
565 if (xec <= 0x3f)
566 pr_cont("Hardware Assert.\n");
567 else
568 goto wrong_mc1_mce;
569 } else if (fam_ops->mc1_mce(ec, xec))
570 ;
571 else
572 goto wrong_mc1_mce;
573
574 return;
575
576wrong_mc1_mce:
577 pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n");
578}
579
580static bool k8_mc2_mce(u16 ec, u8 xec)
581{
582 bool ret = true;
583
584 if (xec == 0x1)
585 pr_cont(" in the write data buffers.\n");
586 else if (xec == 0x3)
587 pr_cont(" in the victim data buffers.\n");
588 else if (xec == 0x2 && MEM_ERROR(ec))
589 pr_cont(": %s error in the L2 cache tags.\n", R4_MSG(ec));
590 else if (xec == 0x0) {
591 if (TLB_ERROR(ec))
592 pr_cont("%s error in a Page Descriptor Cache or Guest TLB.\n",
593 TT_MSG(ec));
594 else if (BUS_ERROR(ec))
595 pr_cont(": %s/ECC error in data read from NB: %s.\n",
596 R4_MSG(ec), PP_MSG(ec));
597 else if (MEM_ERROR(ec)) {
598 u8 r4 = R4(ec);
599
600 if (r4 >= 0x7)
601 pr_cont(": %s error during data copyback.\n",
602 R4_MSG(ec));
603 else if (r4 <= 0x1)
604 pr_cont(": %s parity/ECC error during data "
605 "access from L2.\n", R4_MSG(ec));
606 else
607 ret = false;
608 } else
609 ret = false;
610 } else
611 ret = false;
612
613 return ret;
614}
615
616static bool f15h_mc2_mce(u16 ec, u8 xec)
617{
618 bool ret = true;
619
620 if (TLB_ERROR(ec)) {
621 if (xec == 0x0)
622 pr_cont("Data parity TLB read error.\n");
623 else if (xec == 0x1)
624 pr_cont("Poison data provided for TLB fill.\n");
625 else
626 ret = false;
627 } else if (BUS_ERROR(ec)) {
628 if (xec > 2)
629 ret = false;
630
631 pr_cont("Error during attempted NB data read.\n");
632 } else if (MEM_ERROR(ec)) {
633 switch (xec) {
634 case 0x4 ... 0xc:
635 pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x4]);
636 break;
637
638 case 0x10 ... 0x14:
639 pr_cont("%s.\n", f15h_mc2_mce_desc[xec - 0x7]);
640 break;
641
642 default:
643 ret = false;
644 }
645 } else if (INT_ERROR(ec)) {
646 if (xec <= 0x3f)
647 pr_cont("Hardware Assert.\n");
648 else
649 ret = false;
650 }
651
652 return ret;
653}
654
655static bool f16h_mc2_mce(u16 ec, u8 xec)
656{
657 u8 r4 = R4(ec);
658
659 if (!MEM_ERROR(ec))
660 return false;
661
662 switch (xec) {
663 case 0x04 ... 0x05:
664 pr_cont("%cBUFF parity error.\n", (r4 == R4_RD) ? 'I' : 'O');
665 break;
666
667 case 0x09 ... 0x0b:
668 case 0x0d ... 0x0f:
669 pr_cont("ECC error in L2 tag (%s).\n",
670 ((r4 == R4_GEN) ? "BankReq" :
671 ((r4 == R4_SNOOP) ? "Prb" : "Fill")));
672 break;
673
674 case 0x10 ... 0x19:
675 case 0x1b:
676 pr_cont("ECC error in L2 data array (%s).\n",
677 (((r4 == R4_RD) && !(xec & 0x3)) ? "Hit" :
678 ((r4 == R4_GEN) ? "Attr" :
679 ((r4 == R4_EVICT) ? "Vict" : "Fill"))));
680 break;
681
682 case 0x1c ... 0x1d:
683 case 0x1f:
684 pr_cont("Parity error in L2 attribute bits (%s).\n",
685 ((r4 == R4_RD) ? "Hit" :
686 ((r4 == R4_GEN) ? "Attr" : "Fill")));
687 break;
688
689 default:
690 return false;
691 }
692
693 return true;
694}
695
696static void decode_mc2_mce(struct mce *m)
697{
698 u16 ec = EC(m->status);
699 u8 xec = XEC(m->status, xec_mask);
700
701 pr_emerg(HW_ERR "MC2 Error: ");
702
703 if (!fam_ops->mc2_mce(ec, xec))
704 pr_cont(HW_ERR "Corrupted MC2 MCE info?\n");
705}
706
707static void decode_mc3_mce(struct mce *m)
708{
709 u16 ec = EC(m->status);
710 u8 xec = XEC(m->status, xec_mask);
711
712 if (boot_cpu_data.x86 >= 0x14) {
713 pr_emerg("You shouldn't be seeing MC3 MCE on this cpu family,"
714 " please report on LKML.\n");
715 return;
716 }
717
718 pr_emerg(HW_ERR "MC3 Error");
719
720 if (xec == 0x0) {
721 u8 r4 = R4(ec);
722
723 if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
724 goto wrong_mc3_mce;
725
726 pr_cont(" during %s.\n", R4_MSG(ec));
727 } else
728 goto wrong_mc3_mce;
729
730 return;
731
732 wrong_mc3_mce:
733 pr_emerg(HW_ERR "Corrupted MC3 MCE info?\n");
734}
735
736static void decode_mc4_mce(struct mce *m)
737{
738 struct cpuinfo_x86 *c = &boot_cpu_data;
739 int node_id = amd_get_nb_id(m->extcpu);
740 u16 ec = EC(m->status);
741 u8 xec = XEC(m->status, 0x1f);
742 u8 offset = 0;
743
744 pr_emerg(HW_ERR "MC4 Error (node %d): ", node_id);
745
746 switch (xec) {
747 case 0x0 ... 0xe:
748
749
750 if (xec == 0x0 || xec == 0x8) {
751
752 if (c->x86 == 0x11)
753 goto wrong_mc4_mce;
754
755 pr_cont("%s.\n", mc4_mce_desc[xec]);
756
757 if (decode_dram_ecc)
758 decode_dram_ecc(node_id, m);
759 return;
760 }
761 break;
762
763 case 0xf:
764 if (TLB_ERROR(ec))
765 pr_cont("GART Table Walk data error.\n");
766 else if (BUS_ERROR(ec))
767 pr_cont("DMA Exclusion Vector Table Walk error.\n");
768 else
769 goto wrong_mc4_mce;
770 return;
771
772 case 0x19:
773 if (boot_cpu_data.x86 == 0x15 || boot_cpu_data.x86 == 0x16)
774 pr_cont("Compute Unit Data Error.\n");
775 else
776 goto wrong_mc4_mce;
777 return;
778
779 case 0x1c ... 0x1f:
780 offset = 13;
781 break;
782
783 default:
784 goto wrong_mc4_mce;
785 }
786
787 pr_cont("%s.\n", mc4_mce_desc[xec - offset]);
788 return;
789
790 wrong_mc4_mce:
791 pr_emerg(HW_ERR "Corrupted MC4 MCE info?\n");
792}
793
794static void decode_mc5_mce(struct mce *m)
795{
796 struct cpuinfo_x86 *c = &boot_cpu_data;
797 u16 ec = EC(m->status);
798 u8 xec = XEC(m->status, xec_mask);
799
800 if (c->x86 == 0xf || c->x86 == 0x11)
801 goto wrong_mc5_mce;
802
803 pr_emerg(HW_ERR "MC5 Error: ");
804
805 if (INT_ERROR(ec)) {
806 if (xec <= 0x1f) {
807 pr_cont("Hardware Assert.\n");
808 return;
809 } else
810 goto wrong_mc5_mce;
811 }
812
813 if (xec == 0x0 || xec == 0xc)
814 pr_cont("%s.\n", mc5_mce_desc[xec]);
815 else if (xec <= 0xd)
816 pr_cont("%s parity error.\n", mc5_mce_desc[xec]);
817 else
818 goto wrong_mc5_mce;
819
820 return;
821
822 wrong_mc5_mce:
823 pr_emerg(HW_ERR "Corrupted MC5 MCE info?\n");
824}
825
826static void decode_mc6_mce(struct mce *m)
827{
828 u8 xec = XEC(m->status, xec_mask);
829
830 pr_emerg(HW_ERR "MC6 Error: ");
831
832 switch (xec) {
833 case 0x0:
834 pr_cont("Hardware Assertion");
835 break;
836
837 case 0x1:
838 pr_cont("Free List");
839 break;
840
841 case 0x2:
842 pr_cont("Physical Register File");
843 break;
844
845 case 0x3:
846 pr_cont("Retire Queue");
847 break;
848
849 case 0x4:
850 pr_cont("Scheduler table");
851 break;
852
853 case 0x5:
854 pr_cont("Status Register File");
855 break;
856
857 default:
858 goto wrong_mc6_mce;
859 break;
860 }
861
862 pr_cont(" parity error.\n");
863
864 return;
865
866 wrong_mc6_mce:
867 pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
868}
869
870
871static void decode_smca_errors(struct mce *m)
872{
873 struct smca_hwid *hwid;
874 unsigned int bank_type;
875 const char *ip_name;
876 u8 xec = XEC(m->status, xec_mask);
877
878 if (m->bank >= ARRAY_SIZE(smca_banks))
879 return;
880
881 hwid = smca_banks[m->bank].hwid;
882 if (!hwid)
883 return;
884
885 bank_type = hwid->bank_type;
886 ip_name = smca_names[bank_type].long_name;
887
888 pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);
889
890
891 if (xec < smca_mce_descs[bank_type].num_descs &&
892 (hwid->xec_bitmap & BIT_ULL(xec))) {
893 pr_emerg(HW_ERR "%s Error: ", ip_name);
894 pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]);
895 }
896
897
898
899
900
901 if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc)
902 decode_dram_ecc(amd_get_nb_id(m->extcpu) >> 1, m);
903}
904
905static inline void amd_decode_err_code(u16 ec)
906{
907 if (INT_ERROR(ec)) {
908 pr_emerg(HW_ERR "internal: %s\n", UU_MSG(ec));
909 return;
910 }
911
912 pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec));
913
914 if (BUS_ERROR(ec))
915 pr_cont(", mem/io: %s", II_MSG(ec));
916 else
917 pr_cont(", tx: %s", TT_MSG(ec));
918
919 if (MEM_ERROR(ec) || BUS_ERROR(ec)) {
920 pr_cont(", mem-tx: %s", R4_MSG(ec));
921
922 if (BUS_ERROR(ec))
923 pr_cont(", part-proc: %s (%s)", PP_MSG(ec), TO_MSG(ec));
924 }
925
926 pr_cont("\n");
927}
928
929
930
931
932static bool amd_filter_mce(struct mce *m)
933{
934 u8 xec = (m->status >> 16) & 0x1f;
935
936
937
938
939 if (m->bank == 4 && xec == 0x5 && !report_gart_errors)
940 return true;
941
942 return false;
943}
944
945static const char *decode_error_status(struct mce *m)
946{
947 if (m->status & MCI_STATUS_UC) {
948 if (m->status & MCI_STATUS_PCC)
949 return "System Fatal error.";
950 if (m->mcgstatus & MCG_STATUS_RIPV)
951 return "Uncorrected, software restartable error.";
952 return "Uncorrected, software containable error.";
953 }
954
955 if (m->status & MCI_STATUS_DEFERRED)
956 return "Deferred error.";
957
958 return "Corrected error, no action required.";
959}
960
961int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
962{
963 struct mce *m = (struct mce *)data;
964 struct cpuinfo_x86 *c = &cpu_data(m->extcpu);
965 int ecc;
966
967 if (amd_filter_mce(m))
968 return NOTIFY_STOP;
969
970 pr_emerg(HW_ERR "%s\n", decode_error_status(m));
971
972 pr_emerg(HW_ERR "CPU:%d (%x:%x:%x) MC%d_STATUS[%s|%s|%s|%s|%s",
973 m->extcpu,
974 c->x86, c->x86_model, c->x86_mask,
975 m->bank,
976 ((m->status & MCI_STATUS_OVER) ? "Over" : "-"),
977 ((m->status & MCI_STATUS_UC) ? "UE" :
978 (m->status & MCI_STATUS_DEFERRED) ? "-" : "CE"),
979 ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"),
980 ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"),
981 ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"));
982
983 if (c->x86 >= 0x15)
984 pr_cont("|%s|%s",
985 ((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
986 ((m->status & MCI_STATUS_POISON) ? "Poison" : "-"));
987
988 if (boot_cpu_has(X86_FEATURE_SMCA)) {
989 u32 low, high;
990 u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
991
992 pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-"));
993
994 if (!rdmsr_safe(addr, &low, &high) &&
995 (low & MCI_CONFIG_MCAX))
996 pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
997 }
998
999
1000 ecc = (m->status >> 45) & 0x3;
1001 if (ecc)
1002 pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
1003
1004 pr_cont("]: 0x%016llx\n", m->status);
1005
1006 if (m->status & MCI_STATUS_ADDRV)
1007 pr_emerg(HW_ERR "Error Addr: 0x%016llx", m->addr);
1008
1009 if (boot_cpu_has(X86_FEATURE_SMCA)) {
1010 if (m->status & MCI_STATUS_SYNDV)
1011 pr_cont(", Syndrome: 0x%016llx", m->synd);
1012
1013 pr_cont(", IPID: 0x%016llx", m->ipid);
1014
1015 pr_cont("\n");
1016
1017 decode_smca_errors(m);
1018 goto err_code;
1019 } else
1020 pr_cont("\n");
1021
1022 if (!fam_ops)
1023 goto err_code;
1024
1025 switch (m->bank) {
1026 case 0:
1027 decode_mc0_mce(m);
1028 break;
1029
1030 case 1:
1031 decode_mc1_mce(m);
1032 break;
1033
1034 case 2:
1035 decode_mc2_mce(m);
1036 break;
1037
1038 case 3:
1039 decode_mc3_mce(m);
1040 break;
1041
1042 case 4:
1043 decode_mc4_mce(m);
1044 break;
1045
1046 case 5:
1047 decode_mc5_mce(m);
1048 break;
1049
1050 case 6:
1051 decode_mc6_mce(m);
1052 break;
1053
1054 default:
1055 break;
1056 }
1057
1058 err_code:
1059 amd_decode_err_code(m->status & 0xffff);
1060
1061 return NOTIFY_STOP;
1062}
1063EXPORT_SYMBOL_GPL(amd_decode_mce);
1064
1065static struct notifier_block amd_mce_dec_nb = {
1066 .notifier_call = amd_decode_mce,
1067 .priority = MCE_PRIO_EDAC,
1068};
1069
1070static int __init mce_amd_init(void)
1071{
1072 struct cpuinfo_x86 *c = &boot_cpu_data;
1073
1074 if (c->x86_vendor != X86_VENDOR_AMD)
1075 return -ENODEV;
1076
1077 fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL);
1078 if (!fam_ops)
1079 return -ENOMEM;
1080
1081 switch (c->x86) {
1082 case 0xf:
1083 fam_ops->mc0_mce = k8_mc0_mce;
1084 fam_ops->mc1_mce = k8_mc1_mce;
1085 fam_ops->mc2_mce = k8_mc2_mce;
1086 break;
1087
1088 case 0x10:
1089 fam_ops->mc0_mce = f10h_mc0_mce;
1090 fam_ops->mc1_mce = k8_mc1_mce;
1091 fam_ops->mc2_mce = k8_mc2_mce;
1092 break;
1093
1094 case 0x11:
1095 fam_ops->mc0_mce = k8_mc0_mce;
1096 fam_ops->mc1_mce = k8_mc1_mce;
1097 fam_ops->mc2_mce = k8_mc2_mce;
1098 break;
1099
1100 case 0x12:
1101 fam_ops->mc0_mce = f12h_mc0_mce;
1102 fam_ops->mc1_mce = k8_mc1_mce;
1103 fam_ops->mc2_mce = k8_mc2_mce;
1104 break;
1105
1106 case 0x14:
1107 fam_ops->mc0_mce = cat_mc0_mce;
1108 fam_ops->mc1_mce = cat_mc1_mce;
1109 fam_ops->mc2_mce = k8_mc2_mce;
1110 break;
1111
1112 case 0x15:
1113 xec_mask = c->x86_model == 0x60 ? 0x3f : 0x1f;
1114
1115 fam_ops->mc0_mce = f15h_mc0_mce;
1116 fam_ops->mc1_mce = f15h_mc1_mce;
1117 fam_ops->mc2_mce = f15h_mc2_mce;
1118 break;
1119
1120 case 0x16:
1121 xec_mask = 0x1f;
1122 fam_ops->mc0_mce = cat_mc0_mce;
1123 fam_ops->mc1_mce = cat_mc1_mce;
1124 fam_ops->mc2_mce = f16h_mc2_mce;
1125 break;
1126
1127 case 0x17:
1128 xec_mask = 0x3f;
1129 if (!boot_cpu_has(X86_FEATURE_SMCA)) {
1130 printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n");
1131 goto err_out;
1132 }
1133 break;
1134
1135 default:
1136 printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
1137 goto err_out;
1138 }
1139
1140 pr_info("MCE: In-kernel MCE decoding enabled.\n");
1141
1142 mce_register_decode_chain(&amd_mce_dec_nb);
1143
1144 return 0;
1145
1146err_out:
1147 kfree(fam_ops);
1148 fam_ops = NULL;
1149 return -EINVAL;
1150}
1151early_initcall(mce_amd_init);
1152
1153#ifdef MODULE
1154static void __exit mce_amd_exit(void)
1155{
1156 mce_unregister_decode_chain(&amd_mce_dec_nb);
1157 kfree(fam_ops);
1158}
1159
1160MODULE_DESCRIPTION("AMD MCE decoder");
1161MODULE_ALIAS("edac-mce-amd");
1162MODULE_LICENSE("GPL");
1163module_exit(mce_amd_exit);
1164#endif
1165