1
2
3
4
5
6
7
8
9
10
11
12
13
14
15#define pr_fmt(fmt) "AER: " fmt
16#define dev_fmt pr_fmt
17
18#include <linux/bitops.h>
19#include <linux/cper.h>
20#include <linux/pci.h>
21#include <linux/pci-acpi.h>
22#include <linux/sched.h>
23#include <linux/kernel.h>
24#include <linux/errno.h>
25#include <linux/pm.h>
26#include <linux/init.h>
27#include <linux/interrupt.h>
28#include <linux/delay.h>
29#include <linux/kfifo.h>
30#include <linux/slab.h>
31#include <acpi/apei.h>
32#include <ras/ras_event.h>
33
34#include "../pci.h"
35#include "portdrv.h"
36
37#define AER_ERROR_SOURCES_MAX 128
38
39#define AER_MAX_TYPEOF_COR_ERRS 16
40#define AER_MAX_TYPEOF_UNCOR_ERRS 27
41
42struct aer_err_source {
43 unsigned int status;
44 unsigned int id;
45};
46
47struct aer_rpc {
48 struct pci_dev *rpd;
49 DECLARE_KFIFO(aer_fifo, struct aer_err_source, AER_ERROR_SOURCES_MAX);
50};
51
52
53struct aer_stats {
54
55
56
57
58
59
60
61
62
63
64
65 u64 dev_cor_errs[AER_MAX_TYPEOF_COR_ERRS];
66
67 u64 dev_fatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS];
68
69 u64 dev_nonfatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS];
70
71 u64 dev_total_cor_errs;
72
73 u64 dev_total_fatal_errs;
74
75 u64 dev_total_nonfatal_errs;
76
77
78
79
80
81
82
83 u64 rootport_total_cor_errs;
84 u64 rootport_total_fatal_errs;
85 u64 rootport_total_nonfatal_errs;
86};
87
88#define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \
89 PCI_ERR_UNC_ECRC| \
90 PCI_ERR_UNC_UNSUP| \
91 PCI_ERR_UNC_COMP_ABORT| \
92 PCI_ERR_UNC_UNX_COMP| \
93 PCI_ERR_UNC_MALF_TLP)
94
95#define SYSTEM_ERROR_INTR_ON_MESG_MASK (PCI_EXP_RTCTL_SECEE| \
96 PCI_EXP_RTCTL_SENFEE| \
97 PCI_EXP_RTCTL_SEFEE)
98#define ROOT_PORT_INTR_ON_MESG_MASK (PCI_ERR_ROOT_CMD_COR_EN| \
99 PCI_ERR_ROOT_CMD_NONFATAL_EN| \
100 PCI_ERR_ROOT_CMD_FATAL_EN)
101#define ERR_COR_ID(d) (d & 0xffff)
102#define ERR_UNCOR_ID(d) (d >> 16)
103
104static int pcie_aer_disable;
105static pci_ers_result_t aer_root_reset(struct pci_dev *dev);
106
107void pci_no_aer(void)
108{
109 pcie_aer_disable = 1;
110}
111
112bool pci_aer_available(void)
113{
114 return !pcie_aer_disable && pci_msi_enabled();
115}
116
117#ifdef CONFIG_PCIE_ECRC
118
119#define ECRC_POLICY_DEFAULT 0
120#define ECRC_POLICY_OFF 1
121#define ECRC_POLICY_ON 2
122
123static int ecrc_policy = ECRC_POLICY_DEFAULT;
124
125static const char * const ecrc_policy_str[] = {
126 [ECRC_POLICY_DEFAULT] = "bios",
127 [ECRC_POLICY_OFF] = "off",
128 [ECRC_POLICY_ON] = "on"
129};
130
131
132
133
134
135
136
137static int enable_ecrc_checking(struct pci_dev *dev)
138{
139 int aer = dev->aer_cap;
140 u32 reg32;
141
142 if (!aer)
143 return -ENODEV;
144
145 pci_read_config_dword(dev, aer + PCI_ERR_CAP, ®32);
146 if (reg32 & PCI_ERR_CAP_ECRC_GENC)
147 reg32 |= PCI_ERR_CAP_ECRC_GENE;
148 if (reg32 & PCI_ERR_CAP_ECRC_CHKC)
149 reg32 |= PCI_ERR_CAP_ECRC_CHKE;
150 pci_write_config_dword(dev, aer + PCI_ERR_CAP, reg32);
151
152 return 0;
153}
154
155
156
157
158
159
160
161static int disable_ecrc_checking(struct pci_dev *dev)
162{
163 int aer = dev->aer_cap;
164 u32 reg32;
165
166 if (!aer)
167 return -ENODEV;
168
169 pci_read_config_dword(dev, aer + PCI_ERR_CAP, ®32);
170 reg32 &= ~(PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
171 pci_write_config_dword(dev, aer + PCI_ERR_CAP, reg32);
172
173 return 0;
174}
175
176
177
178
179
180void pcie_set_ecrc_checking(struct pci_dev *dev)
181{
182 switch (ecrc_policy) {
183 case ECRC_POLICY_DEFAULT:
184 return;
185 case ECRC_POLICY_OFF:
186 disable_ecrc_checking(dev);
187 break;
188 case ECRC_POLICY_ON:
189 enable_ecrc_checking(dev);
190 break;
191 default:
192 return;
193 }
194}
195
196
197
198
199
200void pcie_ecrc_get_policy(char *str)
201{
202 int i;
203
204 i = match_string(ecrc_policy_str, ARRAY_SIZE(ecrc_policy_str), str);
205 if (i < 0)
206 return;
207
208 ecrc_policy = i;
209}
210#endif
211
212#define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \
213 PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE)
214
215int pcie_aer_is_native(struct pci_dev *dev)
216{
217 struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
218
219 if (!dev->aer_cap)
220 return 0;
221
222 return pcie_ports_native || host->native_aer;
223}
224
225int pci_enable_pcie_error_reporting(struct pci_dev *dev)
226{
227 int rc;
228
229 if (!pcie_aer_is_native(dev))
230 return -EIO;
231
232 rc = pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS);
233 return pcibios_err_to_errno(rc);
234}
235EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting);
236
237int pci_disable_pcie_error_reporting(struct pci_dev *dev)
238{
239 int rc;
240
241 if (!pcie_aer_is_native(dev))
242 return -EIO;
243
244 rc = pcie_capability_clear_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS);
245 return pcibios_err_to_errno(rc);
246}
247EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting);
248
249int pci_aer_clear_nonfatal_status(struct pci_dev *dev)
250{
251 int aer = dev->aer_cap;
252 u32 status, sev;
253
254 if (!pcie_aer_is_native(dev))
255 return -EIO;
256
257
258 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status);
259 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, &sev);
260 status &= ~sev;
261 if (status)
262 pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, status);
263
264 return 0;
265}
266EXPORT_SYMBOL_GPL(pci_aer_clear_nonfatal_status);
267
268void pci_aer_clear_fatal_status(struct pci_dev *dev)
269{
270 int aer = dev->aer_cap;
271 u32 status, sev;
272
273 if (!pcie_aer_is_native(dev))
274 return;
275
276
277 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status);
278 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, &sev);
279 status &= sev;
280 if (status)
281 pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, status);
282}
283
284
285
286
287
288
289
290
291
292
293int pci_aer_raw_clear_status(struct pci_dev *dev)
294{
295 int aer = dev->aer_cap;
296 u32 status;
297 int port_type;
298
299 if (!aer)
300 return -EIO;
301
302 port_type = pci_pcie_type(dev);
303 if (port_type == PCI_EXP_TYPE_ROOT_PORT) {
304 pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, &status);
305 pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, status);
306 }
307
308 pci_read_config_dword(dev, aer + PCI_ERR_COR_STATUS, &status);
309 pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS, status);
310
311 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status);
312 pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, status);
313
314 return 0;
315}
316
317int pci_aer_clear_status(struct pci_dev *dev)
318{
319 if (!pcie_aer_is_native(dev))
320 return -EIO;
321
322 return pci_aer_raw_clear_status(dev);
323}
324
325void pci_save_aer_state(struct pci_dev *dev)
326{
327 int aer = dev->aer_cap;
328 struct pci_cap_saved_state *save_state;
329 u32 *cap;
330
331 if (!aer)
332 return;
333
334 save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_ERR);
335 if (!save_state)
336 return;
337
338 cap = &save_state->cap.data[0];
339 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, cap++);
340 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, cap++);
341 pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, cap++);
342 pci_read_config_dword(dev, aer + PCI_ERR_CAP, cap++);
343 if (pcie_cap_has_rtctl(dev))
344 pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, cap++);
345}
346
347void pci_restore_aer_state(struct pci_dev *dev)
348{
349 int aer = dev->aer_cap;
350 struct pci_cap_saved_state *save_state;
351 u32 *cap;
352
353 if (!aer)
354 return;
355
356 save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_ERR);
357 if (!save_state)
358 return;
359
360 cap = &save_state->cap.data[0];
361 pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, *cap++);
362 pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_SEVER, *cap++);
363 pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, *cap++);
364 pci_write_config_dword(dev, aer + PCI_ERR_CAP, *cap++);
365 if (pcie_cap_has_rtctl(dev))
366 pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, *cap++);
367}
368
369void pci_aer_init(struct pci_dev *dev)
370{
371 int n;
372
373 dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
374 if (!dev->aer_cap)
375 return;
376
377 dev->aer_stats = kzalloc(sizeof(struct aer_stats), GFP_KERNEL);
378
379
380
381
382
383
384
385 n = pcie_cap_has_rtctl(dev) ? 5 : 4;
386 pci_add_ext_cap_save_buffer(dev, PCI_EXT_CAP_ID_ERR, sizeof(u32) * n);
387
388 pci_aer_clear_status(dev);
389}
390
391void pci_aer_exit(struct pci_dev *dev)
392{
393 kfree(dev->aer_stats);
394 dev->aer_stats = NULL;
395}
396
397#define AER_AGENT_RECEIVER 0
398#define AER_AGENT_REQUESTER 1
399#define AER_AGENT_COMPLETER 2
400#define AER_AGENT_TRANSMITTER 3
401
402#define AER_AGENT_REQUESTER_MASK(t) ((t == AER_CORRECTABLE) ? \
403 0 : (PCI_ERR_UNC_COMP_TIME|PCI_ERR_UNC_UNSUP))
404#define AER_AGENT_COMPLETER_MASK(t) ((t == AER_CORRECTABLE) ? \
405 0 : PCI_ERR_UNC_COMP_ABORT)
406#define AER_AGENT_TRANSMITTER_MASK(t) ((t == AER_CORRECTABLE) ? \
407 (PCI_ERR_COR_REP_ROLL|PCI_ERR_COR_REP_TIMER) : 0)
408
409#define AER_GET_AGENT(t, e) \
410 ((e & AER_AGENT_COMPLETER_MASK(t)) ? AER_AGENT_COMPLETER : \
411 (e & AER_AGENT_REQUESTER_MASK(t)) ? AER_AGENT_REQUESTER : \
412 (e & AER_AGENT_TRANSMITTER_MASK(t)) ? AER_AGENT_TRANSMITTER : \
413 AER_AGENT_RECEIVER)
414
415#define AER_PHYSICAL_LAYER_ERROR 0
416#define AER_DATA_LINK_LAYER_ERROR 1
417#define AER_TRANSACTION_LAYER_ERROR 2
418
419#define AER_PHYSICAL_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \
420 PCI_ERR_COR_RCVR : 0)
421#define AER_DATA_LINK_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \
422 (PCI_ERR_COR_BAD_TLP| \
423 PCI_ERR_COR_BAD_DLLP| \
424 PCI_ERR_COR_REP_ROLL| \
425 PCI_ERR_COR_REP_TIMER) : PCI_ERR_UNC_DLP)
426
427#define AER_GET_LAYER_ERROR(t, e) \
428 ((e & AER_PHYSICAL_LAYER_ERROR_MASK(t)) ? AER_PHYSICAL_LAYER_ERROR : \
429 (e & AER_DATA_LINK_LAYER_ERROR_MASK(t)) ? AER_DATA_LINK_LAYER_ERROR : \
430 AER_TRANSACTION_LAYER_ERROR)
431
432
433
434
435static const char *aer_error_severity_string[] = {
436 "Uncorrected (Non-Fatal)",
437 "Uncorrected (Fatal)",
438 "Corrected"
439};
440
441static const char *aer_error_layer[] = {
442 "Physical Layer",
443 "Data Link Layer",
444 "Transaction Layer"
445};
446
447static const char *aer_correctable_error_string[] = {
448 "RxErr",
449 NULL,
450 NULL,
451 NULL,
452 NULL,
453 NULL,
454 "BadTLP",
455 "BadDLLP",
456 "Rollover",
457 NULL,
458 NULL,
459 NULL,
460 "Timeout",
461 "NonFatalErr",
462 "CorrIntErr",
463 "HeaderOF",
464 NULL,
465 NULL,
466 NULL,
467 NULL,
468 NULL,
469 NULL,
470 NULL,
471 NULL,
472 NULL,
473 NULL,
474 NULL,
475 NULL,
476 NULL,
477 NULL,
478 NULL,
479 NULL,
480};
481
482static const char *aer_uncorrectable_error_string[] = {
483 "Undefined",
484 NULL,
485 NULL,
486 NULL,
487 "DLP",
488 "SDES",
489 NULL,
490 NULL,
491 NULL,
492 NULL,
493 NULL,
494 NULL,
495 "TLP",
496 "FCP",
497 "CmpltTO",
498 "CmpltAbrt",
499 "UnxCmplt",
500 "RxOF",
501 "MalfTLP",
502 "ECRC",
503 "UnsupReq",
504 "ACSViol",
505 "UncorrIntErr",
506 "BlockedTLP",
507 "AtomicOpBlocked",
508 "TLPBlockedErr",
509 "PoisonTLPBlocked",
510 NULL,
511 NULL,
512 NULL,
513 NULL,
514 NULL,
515};
516
517static const char *aer_agent_string[] = {
518 "Receiver ID",
519 "Requester ID",
520 "Completer ID",
521 "Transmitter ID"
522};
523
524#define aer_stats_dev_attr(name, stats_array, strings_array, \
525 total_string, total_field) \
526 static ssize_t \
527 name##_show(struct device *dev, struct device_attribute *attr, \
528 char *buf) \
529{ \
530 unsigned int i; \
531 char *str = buf; \
532 struct pci_dev *pdev = to_pci_dev(dev); \
533 u64 *stats = pdev->aer_stats->stats_array; \
534 \
535 for (i = 0; i < ARRAY_SIZE(strings_array); i++) { \
536 if (strings_array[i]) \
537 str += sprintf(str, "%s %llu\n", \
538 strings_array[i], stats[i]); \
539 else if (stats[i]) \
540 str += sprintf(str, #stats_array "_bit[%d] %llu\n",\
541 i, stats[i]); \
542 } \
543 str += sprintf(str, "TOTAL_%s %llu\n", total_string, \
544 pdev->aer_stats->total_field); \
545 return str-buf; \
546} \
547static DEVICE_ATTR_RO(name)
548
549aer_stats_dev_attr(aer_dev_correctable, dev_cor_errs,
550 aer_correctable_error_string, "ERR_COR",
551 dev_total_cor_errs);
552aer_stats_dev_attr(aer_dev_fatal, dev_fatal_errs,
553 aer_uncorrectable_error_string, "ERR_FATAL",
554 dev_total_fatal_errs);
555aer_stats_dev_attr(aer_dev_nonfatal, dev_nonfatal_errs,
556 aer_uncorrectable_error_string, "ERR_NONFATAL",
557 dev_total_nonfatal_errs);
558
559#define aer_stats_rootport_attr(name, field) \
560 static ssize_t \
561 name##_show(struct device *dev, struct device_attribute *attr, \
562 char *buf) \
563{ \
564 struct pci_dev *pdev = to_pci_dev(dev); \
565 return sprintf(buf, "%llu\n", pdev->aer_stats->field); \
566} \
567static DEVICE_ATTR_RO(name)
568
569aer_stats_rootport_attr(aer_rootport_total_err_cor,
570 rootport_total_cor_errs);
571aer_stats_rootport_attr(aer_rootport_total_err_fatal,
572 rootport_total_fatal_errs);
573aer_stats_rootport_attr(aer_rootport_total_err_nonfatal,
574 rootport_total_nonfatal_errs);
575
576static struct attribute *aer_stats_attrs[] __ro_after_init = {
577 &dev_attr_aer_dev_correctable.attr,
578 &dev_attr_aer_dev_fatal.attr,
579 &dev_attr_aer_dev_nonfatal.attr,
580 &dev_attr_aer_rootport_total_err_cor.attr,
581 &dev_attr_aer_rootport_total_err_fatal.attr,
582 &dev_attr_aer_rootport_total_err_nonfatal.attr,
583 NULL
584};
585
586static umode_t aer_stats_attrs_are_visible(struct kobject *kobj,
587 struct attribute *a, int n)
588{
589 struct device *dev = kobj_to_dev(kobj);
590 struct pci_dev *pdev = to_pci_dev(dev);
591
592 if (!pdev->aer_stats)
593 return 0;
594
595 if ((a == &dev_attr_aer_rootport_total_err_cor.attr ||
596 a == &dev_attr_aer_rootport_total_err_fatal.attr ||
597 a == &dev_attr_aer_rootport_total_err_nonfatal.attr) &&
598 pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT)
599 return 0;
600
601 return a->mode;
602}
603
604const struct attribute_group aer_stats_attr_group = {
605 .attrs = aer_stats_attrs,
606 .is_visible = aer_stats_attrs_are_visible,
607};
608
609static void pci_dev_aer_stats_incr(struct pci_dev *pdev,
610 struct aer_err_info *info)
611{
612 unsigned long status = info->status & ~info->mask;
613 int i, max = -1;
614 u64 *counter = NULL;
615 struct aer_stats *aer_stats = pdev->aer_stats;
616
617 if (!aer_stats)
618 return;
619
620 switch (info->severity) {
621 case AER_CORRECTABLE:
622 aer_stats->dev_total_cor_errs++;
623 counter = &aer_stats->dev_cor_errs[0];
624 max = AER_MAX_TYPEOF_COR_ERRS;
625 break;
626 case AER_NONFATAL:
627 aer_stats->dev_total_nonfatal_errs++;
628 counter = &aer_stats->dev_nonfatal_errs[0];
629 max = AER_MAX_TYPEOF_UNCOR_ERRS;
630 break;
631 case AER_FATAL:
632 aer_stats->dev_total_fatal_errs++;
633 counter = &aer_stats->dev_fatal_errs[0];
634 max = AER_MAX_TYPEOF_UNCOR_ERRS;
635 break;
636 }
637
638 for_each_set_bit(i, &status, max)
639 counter[i]++;
640}
641
642static void pci_rootport_aer_stats_incr(struct pci_dev *pdev,
643 struct aer_err_source *e_src)
644{
645 struct aer_stats *aer_stats = pdev->aer_stats;
646
647 if (!aer_stats)
648 return;
649
650 if (e_src->status & PCI_ERR_ROOT_COR_RCV)
651 aer_stats->rootport_total_cor_errs++;
652
653 if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) {
654 if (e_src->status & PCI_ERR_ROOT_FATAL_RCV)
655 aer_stats->rootport_total_fatal_errs++;
656 else
657 aer_stats->rootport_total_nonfatal_errs++;
658 }
659}
660
661static void __print_tlp_header(struct pci_dev *dev,
662 struct aer_header_log_regs *t)
663{
664 pci_err(dev, " TLP Header: %08x %08x %08x %08x\n",
665 t->dw0, t->dw1, t->dw2, t->dw3);
666}
667
668static void __aer_print_error(struct pci_dev *dev,
669 struct aer_err_info *info)
670{
671 const char **strings;
672 unsigned long status = info->status & ~info->mask;
673 const char *level, *errmsg;
674 int i;
675
676 if (info->severity == AER_CORRECTABLE) {
677 strings = aer_correctable_error_string;
678 level = KERN_WARNING;
679 } else {
680 strings = aer_uncorrectable_error_string;
681 level = KERN_ERR;
682 }
683
684 for_each_set_bit(i, &status, 32) {
685 errmsg = strings[i];
686 if (!errmsg)
687 errmsg = "Unknown Error Bit";
688
689 pci_printk(level, dev, " [%2d] %-22s%s\n", i, errmsg,
690 info->first_error == i ? " (First)" : "");
691 }
692 pci_dev_aer_stats_incr(dev, info);
693}
694
695void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
696{
697 int layer, agent;
698 int id = ((dev->bus->number << 8) | dev->devfn);
699 const char *level;
700
701 if (!info->status) {
702 pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
703 aer_error_severity_string[info->severity]);
704 goto out;
705 }
706
707 layer = AER_GET_LAYER_ERROR(info->severity, info->status);
708 agent = AER_GET_AGENT(info->severity, info->status);
709
710 level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR;
711
712 pci_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
713 aer_error_severity_string[info->severity],
714 aer_error_layer[layer], aer_agent_string[agent]);
715
716 pci_printk(level, dev, " device [%04x:%04x] error status/mask=%08x/%08x\n",
717 dev->vendor, dev->device, info->status, info->mask);
718
719 __aer_print_error(dev, info);
720
721 if (info->tlp_header_valid)
722 __print_tlp_header(dev, &info->tlp);
723
724out:
725 if (info->id && info->error_dev_num > 1 && info->id == id)
726 pci_err(dev, " Error of this Agent is reported first\n");
727
728 trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
729 info->severity, info->tlp_header_valid, &info->tlp);
730}
731
732static void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
733{
734 u8 bus = info->id >> 8;
735 u8 devfn = info->id & 0xff;
736
737 pci_info(dev, "%s%s error received: %04x:%02x:%02x.%d\n",
738 info->multi_error_valid ? "Multiple " : "",
739 aer_error_severity_string[info->severity],
740 pci_domain_nr(dev->bus), bus, PCI_SLOT(devfn),
741 PCI_FUNC(devfn));
742}
743
744#ifdef CONFIG_ACPI_APEI_PCIEAER
745int cper_severity_to_aer(int cper_severity)
746{
747 switch (cper_severity) {
748 case CPER_SEV_RECOVERABLE:
749 return AER_NONFATAL;
750 case CPER_SEV_FATAL:
751 return AER_FATAL;
752 default:
753 return AER_CORRECTABLE;
754 }
755}
756EXPORT_SYMBOL_GPL(cper_severity_to_aer);
757
758void cper_print_aer(struct pci_dev *dev, int aer_severity,
759 struct aer_capability_regs *aer)
760{
761 int layer, agent, tlp_header_valid = 0;
762 u32 status, mask;
763 struct aer_err_info info;
764
765 if (aer_severity == AER_CORRECTABLE) {
766 status = aer->cor_status;
767 mask = aer->cor_mask;
768 } else {
769 status = aer->uncor_status;
770 mask = aer->uncor_mask;
771 tlp_header_valid = status & AER_LOG_TLP_MASKS;
772 }
773
774 layer = AER_GET_LAYER_ERROR(aer_severity, status);
775 agent = AER_GET_AGENT(aer_severity, status);
776
777 memset(&info, 0, sizeof(info));
778 info.severity = aer_severity;
779 info.status = status;
780 info.mask = mask;
781 info.first_error = PCI_ERR_CAP_FEP(aer->cap_control);
782
783 pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask);
784 __aer_print_error(dev, &info);
785 pci_err(dev, "aer_layer=%s, aer_agent=%s\n",
786 aer_error_layer[layer], aer_agent_string[agent]);
787
788 if (aer_severity != AER_CORRECTABLE)
789 pci_err(dev, "aer_uncor_severity: 0x%08x\n",
790 aer->uncor_severity);
791
792 if (tlp_header_valid)
793 __print_tlp_header(dev, &aer->header_log);
794
795 trace_aer_event(dev_name(&dev->dev), (status & ~mask),
796 aer_severity, tlp_header_valid, &aer->header_log);
797}
798#endif
799
800
801
802
803
804
805static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev)
806{
807 if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) {
808 e_info->dev[e_info->error_dev_num] = pci_dev_get(dev);
809 e_info->error_dev_num++;
810 return 0;
811 }
812 return -ENOSPC;
813}
814
815
816
817
818
819
820static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info)
821{
822 int aer = dev->aer_cap;
823 u32 status, mask;
824 u16 reg16;
825
826
827
828
829
830 if ((PCI_BUS_NUM(e_info->id) != 0) &&
831 !(dev->bus->bus_flags & PCI_BUS_FLAGS_NO_AERSID)) {
832
833 if (e_info->id == ((dev->bus->number << 8) | dev->devfn))
834 return true;
835
836
837 if (!e_info->multi_error_valid)
838 return false;
839 }
840
841
842
843
844
845
846
847
848
849 if (atomic_read(&dev->enable_cnt) == 0)
850 return false;
851
852
853 pcie_capability_read_word(dev, PCI_EXP_DEVCTL, ®16);
854 if (!(reg16 & PCI_EXP_AER_FLAGS))
855 return false;
856
857 if (!aer)
858 return false;
859
860
861 if (e_info->severity == AER_CORRECTABLE) {
862 pci_read_config_dword(dev, aer + PCI_ERR_COR_STATUS, &status);
863 pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, &mask);
864 } else {
865 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, &status);
866 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &mask);
867 }
868 if (status & ~mask)
869 return true;
870
871 return false;
872}
873
874static int find_device_iter(struct pci_dev *dev, void *data)
875{
876 struct aer_err_info *e_info = (struct aer_err_info *)data;
877
878 if (is_error_source(dev, e_info)) {
879
880 if (add_error_device(e_info, dev)) {
881
882
883 return 1;
884 }
885
886
887 if (!e_info->multi_error_valid)
888 return 1;
889 }
890 return 0;
891}
892
893
894
895
896
897
898
899
900
901
902
903
904
905static bool find_source_device(struct pci_dev *parent,
906 struct aer_err_info *e_info)
907{
908 struct pci_dev *dev = parent;
909 int result;
910
911
912 e_info->error_dev_num = 0;
913
914
915 result = find_device_iter(dev, e_info);
916 if (result)
917 return true;
918
919 pci_walk_bus(parent->subordinate, find_device_iter, e_info);
920
921 if (!e_info->error_dev_num) {
922 pci_info(parent, "can't find device of ID%04x\n", e_info->id);
923 return false;
924 }
925 return true;
926}
927
928
929
930
931
932
933
934
935static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info)
936{
937 int aer = dev->aer_cap;
938
939 if (info->severity == AER_CORRECTABLE) {
940
941
942
943
944 if (aer)
945 pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS,
946 info->status);
947 if (pcie_aer_is_native(dev))
948 pcie_clear_device_status(dev);
949 } else if (info->severity == AER_NONFATAL)
950 pcie_do_recovery(dev, pci_channel_io_normal, aer_root_reset);
951 else if (info->severity == AER_FATAL)
952 pcie_do_recovery(dev, pci_channel_io_frozen, aer_root_reset);
953 pci_dev_put(dev);
954}
955
956#ifdef CONFIG_ACPI_APEI_PCIEAER
957
958#define AER_RECOVER_RING_ORDER 4
959#define AER_RECOVER_RING_SIZE (1 << AER_RECOVER_RING_ORDER)
960
961struct aer_recover_entry {
962 u8 bus;
963 u8 devfn;
964 u16 domain;
965 int severity;
966 struct aer_capability_regs *regs;
967};
968
969static DEFINE_KFIFO(aer_recover_ring, struct aer_recover_entry,
970 AER_RECOVER_RING_SIZE);
971
972static void aer_recover_work_func(struct work_struct *work)
973{
974 struct aer_recover_entry entry;
975 struct pci_dev *pdev;
976
977 while (kfifo_get(&aer_recover_ring, &entry)) {
978 pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus,
979 entry.devfn);
980 if (!pdev) {
981 pr_err("AER recover: Can not find pci_dev for %04x:%02x:%02x:%x\n",
982 entry.domain, entry.bus,
983 PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn));
984 continue;
985 }
986 cper_print_aer(pdev, entry.severity, entry.regs);
987 if (entry.severity == AER_NONFATAL)
988 pcie_do_recovery(pdev, pci_channel_io_normal,
989 aer_root_reset);
990 else if (entry.severity == AER_FATAL)
991 pcie_do_recovery(pdev, pci_channel_io_frozen,
992 aer_root_reset);
993 pci_dev_put(pdev);
994 }
995}
996
997
998
999
1000
1001
1002static DEFINE_SPINLOCK(aer_recover_ring_lock);
1003static DECLARE_WORK(aer_recover_work, aer_recover_work_func);
1004
1005void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
1006 int severity, struct aer_capability_regs *aer_regs)
1007{
1008 struct aer_recover_entry entry = {
1009 .bus = bus,
1010 .devfn = devfn,
1011 .domain = domain,
1012 .severity = severity,
1013 .regs = aer_regs,
1014 };
1015
1016 if (kfifo_in_spinlocked(&aer_recover_ring, &entry, 1,
1017 &aer_recover_ring_lock))
1018 schedule_work(&aer_recover_work);
1019 else
1020 pr_err("AER recover: Buffer overflow when recovering AER for %04x:%02x:%02x:%x\n",
1021 domain, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1022}
1023EXPORT_SYMBOL_GPL(aer_recover_queue);
1024#endif
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
1036{
1037 int aer = dev->aer_cap;
1038 int temp;
1039
1040
1041 info->status = 0;
1042 info->tlp_header_valid = 0;
1043
1044
1045 if (!aer)
1046 return 0;
1047
1048 if (info->severity == AER_CORRECTABLE) {
1049 pci_read_config_dword(dev, aer + PCI_ERR_COR_STATUS,
1050 &info->status);
1051 pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK,
1052 &info->mask);
1053 if (!(info->status & ~info->mask))
1054 return 0;
1055 } else if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
1056 pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM ||
1057 info->severity == AER_NONFATAL) {
1058
1059
1060 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS,
1061 &info->status);
1062 pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK,
1063 &info->mask);
1064 if (!(info->status & ~info->mask))
1065 return 0;
1066
1067
1068 pci_read_config_dword(dev, aer + PCI_ERR_CAP, &temp);
1069 info->first_error = PCI_ERR_CAP_FEP(temp);
1070
1071 if (info->status & AER_LOG_TLP_MASKS) {
1072 info->tlp_header_valid = 1;
1073 pci_read_config_dword(dev,
1074 aer + PCI_ERR_HEADER_LOG, &info->tlp.dw0);
1075 pci_read_config_dword(dev,
1076 aer + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1);
1077 pci_read_config_dword(dev,
1078 aer + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2);
1079 pci_read_config_dword(dev,
1080 aer + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3);
1081 }
1082 }
1083
1084 return 1;
1085}
1086
1087static inline void aer_process_err_devices(struct aer_err_info *e_info)
1088{
1089 int i;
1090
1091
1092 for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
1093 if (aer_get_device_error_info(e_info->dev[i], e_info))
1094 aer_print_error(e_info->dev[i], e_info);
1095 }
1096 for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
1097 if (aer_get_device_error_info(e_info->dev[i], e_info))
1098 handle_error_source(e_info->dev[i], e_info);
1099 }
1100}
1101
1102
1103
1104
1105
1106
1107static void aer_isr_one_error(struct aer_rpc *rpc,
1108 struct aer_err_source *e_src)
1109{
1110 struct pci_dev *pdev = rpc->rpd;
1111 struct aer_err_info e_info;
1112
1113 pci_rootport_aer_stats_incr(pdev, e_src);
1114
1115
1116
1117
1118
1119 if (e_src->status & PCI_ERR_ROOT_COR_RCV) {
1120 e_info.id = ERR_COR_ID(e_src->id);
1121 e_info.severity = AER_CORRECTABLE;
1122
1123 if (e_src->status & PCI_ERR_ROOT_MULTI_COR_RCV)
1124 e_info.multi_error_valid = 1;
1125 else
1126 e_info.multi_error_valid = 0;
1127 aer_print_port_info(pdev, &e_info);
1128
1129 if (find_source_device(pdev, &e_info))
1130 aer_process_err_devices(&e_info);
1131 }
1132
1133 if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) {
1134 e_info.id = ERR_UNCOR_ID(e_src->id);
1135
1136 if (e_src->status & PCI_ERR_ROOT_FATAL_RCV)
1137 e_info.severity = AER_FATAL;
1138 else
1139 e_info.severity = AER_NONFATAL;
1140
1141 if (e_src->status & PCI_ERR_ROOT_MULTI_UNCOR_RCV)
1142 e_info.multi_error_valid = 1;
1143 else
1144 e_info.multi_error_valid = 0;
1145
1146 aer_print_port_info(pdev, &e_info);
1147
1148 if (find_source_device(pdev, &e_info))
1149 aer_process_err_devices(&e_info);
1150 }
1151}
1152
1153
1154
1155
1156
1157
1158
1159
1160static irqreturn_t aer_isr(int irq, void *context)
1161{
1162 struct pcie_device *dev = (struct pcie_device *)context;
1163 struct aer_rpc *rpc = get_service_data(dev);
1164 struct aer_err_source e_src;
1165
1166 if (kfifo_is_empty(&rpc->aer_fifo))
1167 return IRQ_NONE;
1168
1169 while (kfifo_get(&rpc->aer_fifo, &e_src))
1170 aer_isr_one_error(rpc, &e_src);
1171 return IRQ_HANDLED;
1172}
1173
1174
1175
1176
1177
1178
1179
1180
1181static irqreturn_t aer_irq(int irq, void *context)
1182{
1183 struct pcie_device *pdev = (struct pcie_device *)context;
1184 struct aer_rpc *rpc = get_service_data(pdev);
1185 struct pci_dev *rp = rpc->rpd;
1186 int aer = rp->aer_cap;
1187 struct aer_err_source e_src = {};
1188
1189 pci_read_config_dword(rp, aer + PCI_ERR_ROOT_STATUS, &e_src.status);
1190 if (!(e_src.status & (PCI_ERR_ROOT_UNCOR_RCV|PCI_ERR_ROOT_COR_RCV)))
1191 return IRQ_NONE;
1192
1193 pci_read_config_dword(rp, aer + PCI_ERR_ROOT_ERR_SRC, &e_src.id);
1194 pci_write_config_dword(rp, aer + PCI_ERR_ROOT_STATUS, e_src.status);
1195
1196 if (!kfifo_put(&rpc->aer_fifo, e_src))
1197 return IRQ_HANDLED;
1198
1199 return IRQ_WAKE_THREAD;
1200}
1201
1202static int set_device_error_reporting(struct pci_dev *dev, void *data)
1203{
1204 bool enable = *((bool *)data);
1205 int type = pci_pcie_type(dev);
1206
1207 if ((type == PCI_EXP_TYPE_ROOT_PORT) ||
1208 (type == PCI_EXP_TYPE_UPSTREAM) ||
1209 (type == PCI_EXP_TYPE_DOWNSTREAM)) {
1210 if (enable)
1211 pci_enable_pcie_error_reporting(dev);
1212 else
1213 pci_disable_pcie_error_reporting(dev);
1214 }
1215
1216 if (enable)
1217 pcie_set_ecrc_checking(dev);
1218
1219 return 0;
1220}
1221
1222
1223
1224
1225
1226
1227static void set_downstream_devices_error_reporting(struct pci_dev *dev,
1228 bool enable)
1229{
1230 set_device_error_reporting(dev, &enable);
1231
1232 if (!dev->subordinate)
1233 return;
1234 pci_walk_bus(dev->subordinate, set_device_error_reporting, &enable);
1235}
1236
1237
1238
1239
1240
1241
1242
1243static void aer_enable_rootport(struct aer_rpc *rpc)
1244{
1245 struct pci_dev *pdev = rpc->rpd;
1246 int aer = pdev->aer_cap;
1247 u16 reg16;
1248 u32 reg32;
1249
1250
1251 pcie_capability_read_word(pdev, PCI_EXP_DEVSTA, ®16);
1252 pcie_capability_write_word(pdev, PCI_EXP_DEVSTA, reg16);
1253
1254
1255 pcie_capability_clear_word(pdev, PCI_EXP_RTCTL,
1256 SYSTEM_ERROR_INTR_ON_MESG_MASK);
1257
1258
1259 pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, ®32);
1260 pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, reg32);
1261 pci_read_config_dword(pdev, aer + PCI_ERR_COR_STATUS, ®32);
1262 pci_write_config_dword(pdev, aer + PCI_ERR_COR_STATUS, reg32);
1263 pci_read_config_dword(pdev, aer + PCI_ERR_UNCOR_STATUS, ®32);
1264 pci_write_config_dword(pdev, aer + PCI_ERR_UNCOR_STATUS, reg32);
1265
1266
1267
1268
1269
1270 set_downstream_devices_error_reporting(pdev, true);
1271
1272
1273 pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, ®32);
1274 reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
1275 pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32);
1276}
1277
1278
1279
1280
1281
1282
1283
1284static void aer_disable_rootport(struct aer_rpc *rpc)
1285{
1286 struct pci_dev *pdev = rpc->rpd;
1287 int aer = pdev->aer_cap;
1288 u32 reg32;
1289
1290
1291
1292
1293
1294 set_downstream_devices_error_reporting(pdev, false);
1295
1296
1297 pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, ®32);
1298 reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
1299 pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_COMMAND, reg32);
1300
1301
1302 pci_read_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, ®32);
1303 pci_write_config_dword(pdev, aer + PCI_ERR_ROOT_STATUS, reg32);
1304}
1305
1306
1307
1308
1309
1310
1311
1312static void aer_remove(struct pcie_device *dev)
1313{
1314 struct aer_rpc *rpc = get_service_data(dev);
1315
1316 aer_disable_rootport(rpc);
1317}
1318
1319
1320
1321
1322
1323
1324
1325static int aer_probe(struct pcie_device *dev)
1326{
1327 int status;
1328 struct aer_rpc *rpc;
1329 struct device *device = &dev->device;
1330 struct pci_dev *port = dev->port;
1331
1332 rpc = devm_kzalloc(device, sizeof(struct aer_rpc), GFP_KERNEL);
1333 if (!rpc)
1334 return -ENOMEM;
1335
1336 rpc->rpd = port;
1337 INIT_KFIFO(rpc->aer_fifo);
1338 set_service_data(dev, rpc);
1339
1340 status = devm_request_threaded_irq(device, dev->irq, aer_irq, aer_isr,
1341 IRQF_SHARED, "aerdrv", dev);
1342 if (status) {
1343 pci_err(port, "request AER IRQ %d failed\n", dev->irq);
1344 return status;
1345 }
1346
1347 aer_enable_rootport(rpc);
1348 pci_info(port, "enabled with IRQ %d\n", dev->irq);
1349 return 0;
1350}
1351
1352
1353
1354
1355
1356
1357
1358static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
1359{
1360 int aer = dev->aer_cap;
1361 u32 reg32;
1362 int rc;
1363
1364
1365
1366 pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, ®32);
1367 reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
1368 pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
1369
1370 rc = pci_bus_error_reset(dev);
1371 pci_info(dev, "Root Port link has been reset\n");
1372
1373
1374 pci_read_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, ®32);
1375 pci_write_config_dword(dev, aer + PCI_ERR_ROOT_STATUS, reg32);
1376
1377
1378 pci_read_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, ®32);
1379 reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
1380 pci_write_config_dword(dev, aer + PCI_ERR_ROOT_COMMAND, reg32);
1381
1382 return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
1383}
1384
1385static struct pcie_port_service_driver aerdriver = {
1386 .name = "aer",
1387 .port_type = PCI_EXP_TYPE_ROOT_PORT,
1388 .service = PCIE_PORT_SERVICE_AER,
1389
1390 .probe = aer_probe,
1391 .remove = aer_remove,
1392};
1393
1394
1395
1396
1397
1398
1399int __init pcie_aer_init(void)
1400{
1401 if (!pci_aer_available())
1402 return -ENXIO;
1403 return pcie_port_service_register(&aerdriver);
1404}
1405