1
2
3
4
5
6
7
8
9
10
11
12
13
14
15#include <linux/cper.h>
16#include <linux/pci.h>
17#include <linux/pci-acpi.h>
18#include <linux/sched.h>
19#include <linux/kernel.h>
20#include <linux/errno.h>
21#include <linux/pm.h>
22#include <linux/init.h>
23#include <linux/interrupt.h>
24#include <linux/delay.h>
25#include <linux/kfifo.h>
26#include <linux/slab.h>
27#include <acpi/apei.h>
28#include <ras/ras_event.h>
29
30#include "../pci.h"
31#include "portdrv.h"
32
33#define AER_ERROR_SOURCES_MAX 128
34
35#define AER_MAX_TYPEOF_COR_ERRS 16
36#define AER_MAX_TYPEOF_UNCOR_ERRS 26
37
38struct aer_err_source {
39 unsigned int status;
40 unsigned int id;
41};
42
43struct aer_rpc {
44 struct pci_dev *rpd;
45 DECLARE_KFIFO(aer_fifo, struct aer_err_source, AER_ERROR_SOURCES_MAX);
46};
47
48
49struct aer_stats {
50
51
52
53
54
55
56
57
58
59
60
61 u64 dev_cor_errs[AER_MAX_TYPEOF_COR_ERRS];
62
63 u64 dev_fatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS];
64
65 u64 dev_nonfatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS];
66
67 u64 dev_total_cor_errs;
68
69 u64 dev_total_fatal_errs;
70
71 u64 dev_total_nonfatal_errs;
72
73
74
75
76
77
78
79 u64 rootport_total_cor_errs;
80 u64 rootport_total_fatal_errs;
81 u64 rootport_total_nonfatal_errs;
82};
83
84#define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \
85 PCI_ERR_UNC_ECRC| \
86 PCI_ERR_UNC_UNSUP| \
87 PCI_ERR_UNC_COMP_ABORT| \
88 PCI_ERR_UNC_UNX_COMP| \
89 PCI_ERR_UNC_MALF_TLP)
90
91#define SYSTEM_ERROR_INTR_ON_MESG_MASK (PCI_EXP_RTCTL_SECEE| \
92 PCI_EXP_RTCTL_SENFEE| \
93 PCI_EXP_RTCTL_SEFEE)
94#define ROOT_PORT_INTR_ON_MESG_MASK (PCI_ERR_ROOT_CMD_COR_EN| \
95 PCI_ERR_ROOT_CMD_NONFATAL_EN| \
96 PCI_ERR_ROOT_CMD_FATAL_EN)
97#define ERR_COR_ID(d) (d & 0xffff)
98#define ERR_UNCOR_ID(d) (d >> 16)
99
100static int pcie_aer_disable;
101
102void pci_no_aer(void)
103{
104 pcie_aer_disable = 1;
105}
106
107bool pci_aer_available(void)
108{
109 return !pcie_aer_disable && pci_msi_enabled();
110}
111
112#ifdef CONFIG_PCIE_ECRC
113
114#define ECRC_POLICY_DEFAULT 0
115#define ECRC_POLICY_OFF 1
116#define ECRC_POLICY_ON 2
117
118static int ecrc_policy = ECRC_POLICY_DEFAULT;
119
120static const char *ecrc_policy_str[] = {
121 [ECRC_POLICY_DEFAULT] = "bios",
122 [ECRC_POLICY_OFF] = "off",
123 [ECRC_POLICY_ON] = "on"
124};
125
126
127
128
129
130
131
132static int enable_ecrc_checking(struct pci_dev *dev)
133{
134 int pos;
135 u32 reg32;
136
137 if (!pci_is_pcie(dev))
138 return -ENODEV;
139
140 pos = dev->aer_cap;
141 if (!pos)
142 return -ENODEV;
143
144 pci_read_config_dword(dev, pos + PCI_ERR_CAP, ®32);
145 if (reg32 & PCI_ERR_CAP_ECRC_GENC)
146 reg32 |= PCI_ERR_CAP_ECRC_GENE;
147 if (reg32 & PCI_ERR_CAP_ECRC_CHKC)
148 reg32 |= PCI_ERR_CAP_ECRC_CHKE;
149 pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32);
150
151 return 0;
152}
153
154
155
156
157
158
159
160static int disable_ecrc_checking(struct pci_dev *dev)
161{
162 int pos;
163 u32 reg32;
164
165 if (!pci_is_pcie(dev))
166 return -ENODEV;
167
168 pos = dev->aer_cap;
169 if (!pos)
170 return -ENODEV;
171
172 pci_read_config_dword(dev, pos + PCI_ERR_CAP, ®32);
173 reg32 &= ~(PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
174 pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32);
175
176 return 0;
177}
178
179
180
181
182
183void pcie_set_ecrc_checking(struct pci_dev *dev)
184{
185 switch (ecrc_policy) {
186 case ECRC_POLICY_DEFAULT:
187 return;
188 case ECRC_POLICY_OFF:
189 disable_ecrc_checking(dev);
190 break;
191 case ECRC_POLICY_ON:
192 enable_ecrc_checking(dev);
193 break;
194 default:
195 return;
196 }
197}
198
199
200
201
202void pcie_ecrc_get_policy(char *str)
203{
204 int i;
205
206 for (i = 0; i < ARRAY_SIZE(ecrc_policy_str); i++)
207 if (!strncmp(str, ecrc_policy_str[i],
208 strlen(ecrc_policy_str[i])))
209 break;
210 if (i >= ARRAY_SIZE(ecrc_policy_str))
211 return;
212
213 ecrc_policy = i;
214}
215#endif
216
217#ifdef CONFIG_ACPI_APEI
218static inline int hest_match_pci(struct acpi_hest_aer_common *p,
219 struct pci_dev *pci)
220{
221 return ACPI_HEST_SEGMENT(p->bus) == pci_domain_nr(pci->bus) &&
222 ACPI_HEST_BUS(p->bus) == pci->bus->number &&
223 p->device == PCI_SLOT(pci->devfn) &&
224 p->function == PCI_FUNC(pci->devfn);
225}
226
227static inline bool hest_match_type(struct acpi_hest_header *hest_hdr,
228 struct pci_dev *dev)
229{
230 u16 hest_type = hest_hdr->type;
231 u8 pcie_type = pci_pcie_type(dev);
232
233 if ((hest_type == ACPI_HEST_TYPE_AER_ROOT_PORT &&
234 pcie_type == PCI_EXP_TYPE_ROOT_PORT) ||
235 (hest_type == ACPI_HEST_TYPE_AER_ENDPOINT &&
236 pcie_type == PCI_EXP_TYPE_ENDPOINT) ||
237 (hest_type == ACPI_HEST_TYPE_AER_BRIDGE &&
238 (dev->class >> 16) == PCI_BASE_CLASS_BRIDGE))
239 return true;
240 return false;
241}
242
243struct aer_hest_parse_info {
244 struct pci_dev *pci_dev;
245 int firmware_first;
246};
247
248static int hest_source_is_pcie_aer(struct acpi_hest_header *hest_hdr)
249{
250 if (hest_hdr->type == ACPI_HEST_TYPE_AER_ROOT_PORT ||
251 hest_hdr->type == ACPI_HEST_TYPE_AER_ENDPOINT ||
252 hest_hdr->type == ACPI_HEST_TYPE_AER_BRIDGE)
253 return 1;
254 return 0;
255}
256
257static int aer_hest_parse(struct acpi_hest_header *hest_hdr, void *data)
258{
259 struct aer_hest_parse_info *info = data;
260 struct acpi_hest_aer_common *p;
261 int ff;
262
263 if (!hest_source_is_pcie_aer(hest_hdr))
264 return 0;
265
266 p = (struct acpi_hest_aer_common *)(hest_hdr + 1);
267 ff = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST);
268
269
270
271
272
273 if (!info->pci_dev) {
274 info->firmware_first |= ff;
275 return 0;
276 }
277
278
279 if (p->flags & ACPI_HEST_GLOBAL) {
280 if (hest_match_type(hest_hdr, info->pci_dev))
281 info->firmware_first = ff;
282 } else
283 if (hest_match_pci(p, info->pci_dev))
284 info->firmware_first = ff;
285
286 return 0;
287}
288
289static void aer_set_firmware_first(struct pci_dev *pci_dev)
290{
291 int rc;
292 struct aer_hest_parse_info info = {
293 .pci_dev = pci_dev,
294 .firmware_first = 0,
295 };
296
297 rc = apei_hest_parse(aer_hest_parse, &info);
298
299 if (rc)
300 pci_dev->__aer_firmware_first = 0;
301 else
302 pci_dev->__aer_firmware_first = info.firmware_first;
303 pci_dev->__aer_firmware_first_valid = 1;
304}
305
306int pcie_aer_get_firmware_first(struct pci_dev *dev)
307{
308 if (!pci_is_pcie(dev))
309 return 0;
310
311 if (pcie_ports_native)
312 return 0;
313
314 if (!dev->__aer_firmware_first_valid)
315 aer_set_firmware_first(dev);
316 return dev->__aer_firmware_first;
317}
318
319static bool aer_firmware_first;
320
321
322
323
324bool aer_acpi_firmware_first(void)
325{
326 static bool parsed = false;
327 struct aer_hest_parse_info info = {
328 .pci_dev = NULL,
329 .firmware_first = 0,
330 };
331
332 if (pcie_ports_native)
333 return false;
334
335 if (!parsed) {
336 apei_hest_parse(aer_hest_parse, &info);
337 aer_firmware_first = info.firmware_first;
338 parsed = true;
339 }
340 return aer_firmware_first;
341}
342#endif
343
344#define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \
345 PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE)
346
347int pci_enable_pcie_error_reporting(struct pci_dev *dev)
348{
349 if (pcie_aer_get_firmware_first(dev))
350 return -EIO;
351
352 if (!dev->aer_cap)
353 return -EIO;
354
355 return pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS);
356}
357EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting);
358
359int pci_disable_pcie_error_reporting(struct pci_dev *dev)
360{
361 if (pcie_aer_get_firmware_first(dev))
362 return -EIO;
363
364 return pcie_capability_clear_word(dev, PCI_EXP_DEVCTL,
365 PCI_EXP_AER_FLAGS);
366}
367EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting);
368
369void pci_aer_clear_device_status(struct pci_dev *dev)
370{
371 u16 sta;
372
373 pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &sta);
374 pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta);
375}
376
377int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
378{
379 int pos;
380 u32 status, sev;
381
382 pos = dev->aer_cap;
383 if (!pos)
384 return -EIO;
385
386 if (pcie_aer_get_firmware_first(dev))
387 return -EIO;
388
389
390 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
391 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev);
392 status &= ~sev;
393 if (status)
394 pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
395
396 return 0;
397}
398EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status);
399
400void pci_aer_clear_fatal_status(struct pci_dev *dev)
401{
402 int pos;
403 u32 status, sev;
404
405 pos = dev->aer_cap;
406 if (!pos)
407 return;
408
409 if (pcie_aer_get_firmware_first(dev))
410 return;
411
412
413 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
414 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev);
415 status &= sev;
416 if (status)
417 pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
418}
419
420int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
421{
422 int pos;
423 u32 status;
424 int port_type;
425
426 if (!pci_is_pcie(dev))
427 return -ENODEV;
428
429 pos = dev->aer_cap;
430 if (!pos)
431 return -EIO;
432
433 if (pcie_aer_get_firmware_first(dev))
434 return -EIO;
435
436 port_type = pci_pcie_type(dev);
437 if (port_type == PCI_EXP_TYPE_ROOT_PORT) {
438 pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status);
439 pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, status);
440 }
441
442 pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status);
443 pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, status);
444
445 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
446 pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
447
448 return 0;
449}
450
451void pci_aer_init(struct pci_dev *dev)
452{
453 dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
454
455 if (dev->aer_cap)
456 dev->aer_stats = kzalloc(sizeof(struct aer_stats), GFP_KERNEL);
457
458 pci_cleanup_aer_error_status_regs(dev);
459}
460
461void pci_aer_exit(struct pci_dev *dev)
462{
463 kfree(dev->aer_stats);
464 dev->aer_stats = NULL;
465}
466
467#define AER_AGENT_RECEIVER 0
468#define AER_AGENT_REQUESTER 1
469#define AER_AGENT_COMPLETER 2
470#define AER_AGENT_TRANSMITTER 3
471
472#define AER_AGENT_REQUESTER_MASK(t) ((t == AER_CORRECTABLE) ? \
473 0 : (PCI_ERR_UNC_COMP_TIME|PCI_ERR_UNC_UNSUP))
474#define AER_AGENT_COMPLETER_MASK(t) ((t == AER_CORRECTABLE) ? \
475 0 : PCI_ERR_UNC_COMP_ABORT)
476#define AER_AGENT_TRANSMITTER_MASK(t) ((t == AER_CORRECTABLE) ? \
477 (PCI_ERR_COR_REP_ROLL|PCI_ERR_COR_REP_TIMER) : 0)
478
479#define AER_GET_AGENT(t, e) \
480 ((e & AER_AGENT_COMPLETER_MASK(t)) ? AER_AGENT_COMPLETER : \
481 (e & AER_AGENT_REQUESTER_MASK(t)) ? AER_AGENT_REQUESTER : \
482 (e & AER_AGENT_TRANSMITTER_MASK(t)) ? AER_AGENT_TRANSMITTER : \
483 AER_AGENT_RECEIVER)
484
485#define AER_PHYSICAL_LAYER_ERROR 0
486#define AER_DATA_LINK_LAYER_ERROR 1
487#define AER_TRANSACTION_LAYER_ERROR 2
488
489#define AER_PHYSICAL_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \
490 PCI_ERR_COR_RCVR : 0)
491#define AER_DATA_LINK_LAYER_ERROR_MASK(t) ((t == AER_CORRECTABLE) ? \
492 (PCI_ERR_COR_BAD_TLP| \
493 PCI_ERR_COR_BAD_DLLP| \
494 PCI_ERR_COR_REP_ROLL| \
495 PCI_ERR_COR_REP_TIMER) : PCI_ERR_UNC_DLP)
496
497#define AER_GET_LAYER_ERROR(t, e) \
498 ((e & AER_PHYSICAL_LAYER_ERROR_MASK(t)) ? AER_PHYSICAL_LAYER_ERROR : \
499 (e & AER_DATA_LINK_LAYER_ERROR_MASK(t)) ? AER_DATA_LINK_LAYER_ERROR : \
500 AER_TRANSACTION_LAYER_ERROR)
501
502
503
504
505static const char *aer_error_severity_string[] = {
506 "Uncorrected (Non-Fatal)",
507 "Uncorrected (Fatal)",
508 "Corrected"
509};
510
511static const char *aer_error_layer[] = {
512 "Physical Layer",
513 "Data Link Layer",
514 "Transaction Layer"
515};
516
517static const char *aer_correctable_error_string[AER_MAX_TYPEOF_COR_ERRS] = {
518 "RxErr",
519 NULL,
520 NULL,
521 NULL,
522 NULL,
523 NULL,
524 "BadTLP",
525 "BadDLLP",
526 "Rollover",
527 NULL,
528 NULL,
529 NULL,
530 "Timeout",
531 "NonFatalErr",
532 "CorrIntErr",
533 "HeaderOF",
534};
535
536static const char *aer_uncorrectable_error_string[AER_MAX_TYPEOF_UNCOR_ERRS] = {
537 "Undefined",
538 NULL,
539 NULL,
540 NULL,
541 "DLP",
542 "SDES",
543 NULL,
544 NULL,
545 NULL,
546 NULL,
547 NULL,
548 NULL,
549 "TLP",
550 "FCP",
551 "CmpltTO",
552 "CmpltAbrt",
553 "UnxCmplt",
554 "RxOF",
555 "MalfTLP",
556 "ECRC",
557 "UnsupReq",
558 "ACSViol",
559 "UncorrIntErr",
560 "BlockedTLP",
561 "AtomicOpBlocked",
562 "TLPBlockedErr",
563};
564
565static const char *aer_agent_string[] = {
566 "Receiver ID",
567 "Requester ID",
568 "Completer ID",
569 "Transmitter ID"
570};
571
572#define aer_stats_dev_attr(name, stats_array, strings_array, \
573 total_string, total_field) \
574 static ssize_t \
575 name##_show(struct device *dev, struct device_attribute *attr, \
576 char *buf) \
577{ \
578 unsigned int i; \
579 char *str = buf; \
580 struct pci_dev *pdev = to_pci_dev(dev); \
581 u64 *stats = pdev->aer_stats->stats_array; \
582 \
583 for (i = 0; i < ARRAY_SIZE(strings_array); i++) { \
584 if (strings_array[i]) \
585 str += sprintf(str, "%s %llu\n", \
586 strings_array[i], stats[i]); \
587 else if (stats[i]) \
588 str += sprintf(str, #stats_array "_bit[%d] %llu\n",\
589 i, stats[i]); \
590 } \
591 str += sprintf(str, "TOTAL_%s %llu\n", total_string, \
592 pdev->aer_stats->total_field); \
593 return str-buf; \
594} \
595static DEVICE_ATTR_RO(name)
596
597aer_stats_dev_attr(aer_dev_correctable, dev_cor_errs,
598 aer_correctable_error_string, "ERR_COR",
599 dev_total_cor_errs);
600aer_stats_dev_attr(aer_dev_fatal, dev_fatal_errs,
601 aer_uncorrectable_error_string, "ERR_FATAL",
602 dev_total_fatal_errs);
603aer_stats_dev_attr(aer_dev_nonfatal, dev_nonfatal_errs,
604 aer_uncorrectable_error_string, "ERR_NONFATAL",
605 dev_total_nonfatal_errs);
606
607#define aer_stats_rootport_attr(name, field) \
608 static ssize_t \
609 name##_show(struct device *dev, struct device_attribute *attr, \
610 char *buf) \
611{ \
612 struct pci_dev *pdev = to_pci_dev(dev); \
613 return sprintf(buf, "%llu\n", pdev->aer_stats->field); \
614} \
615static DEVICE_ATTR_RO(name)
616
617aer_stats_rootport_attr(aer_rootport_total_err_cor,
618 rootport_total_cor_errs);
619aer_stats_rootport_attr(aer_rootport_total_err_fatal,
620 rootport_total_fatal_errs);
621aer_stats_rootport_attr(aer_rootport_total_err_nonfatal,
622 rootport_total_nonfatal_errs);
623
624static struct attribute *aer_stats_attrs[] __ro_after_init = {
625 &dev_attr_aer_dev_correctable.attr,
626 &dev_attr_aer_dev_fatal.attr,
627 &dev_attr_aer_dev_nonfatal.attr,
628 &dev_attr_aer_rootport_total_err_cor.attr,
629 &dev_attr_aer_rootport_total_err_fatal.attr,
630 &dev_attr_aer_rootport_total_err_nonfatal.attr,
631 NULL
632};
633
634static umode_t aer_stats_attrs_are_visible(struct kobject *kobj,
635 struct attribute *a, int n)
636{
637 struct device *dev = kobj_to_dev(kobj);
638 struct pci_dev *pdev = to_pci_dev(dev);
639
640 if (!pdev->aer_stats)
641 return 0;
642
643 if ((a == &dev_attr_aer_rootport_total_err_cor.attr ||
644 a == &dev_attr_aer_rootport_total_err_fatal.attr ||
645 a == &dev_attr_aer_rootport_total_err_nonfatal.attr) &&
646 pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT)
647 return 0;
648
649 return a->mode;
650}
651
652const struct attribute_group aer_stats_attr_group = {
653 .attrs = aer_stats_attrs,
654 .is_visible = aer_stats_attrs_are_visible,
655};
656
657static void pci_dev_aer_stats_incr(struct pci_dev *pdev,
658 struct aer_err_info *info)
659{
660 int status, i, max = -1;
661 u64 *counter = NULL;
662 struct aer_stats *aer_stats = pdev->aer_stats;
663
664 if (!aer_stats)
665 return;
666
667 switch (info->severity) {
668 case AER_CORRECTABLE:
669 aer_stats->dev_total_cor_errs++;
670 counter = &aer_stats->dev_cor_errs[0];
671 max = AER_MAX_TYPEOF_COR_ERRS;
672 break;
673 case AER_NONFATAL:
674 aer_stats->dev_total_nonfatal_errs++;
675 counter = &aer_stats->dev_nonfatal_errs[0];
676 max = AER_MAX_TYPEOF_UNCOR_ERRS;
677 break;
678 case AER_FATAL:
679 aer_stats->dev_total_fatal_errs++;
680 counter = &aer_stats->dev_fatal_errs[0];
681 max = AER_MAX_TYPEOF_UNCOR_ERRS;
682 break;
683 }
684
685 status = (info->status & ~info->mask);
686 for (i = 0; i < max; i++)
687 if (status & (1 << i))
688 counter[i]++;
689}
690
691static void pci_rootport_aer_stats_incr(struct pci_dev *pdev,
692 struct aer_err_source *e_src)
693{
694 struct aer_stats *aer_stats = pdev->aer_stats;
695
696 if (!aer_stats)
697 return;
698
699 if (e_src->status & PCI_ERR_ROOT_COR_RCV)
700 aer_stats->rootport_total_cor_errs++;
701
702 if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) {
703 if (e_src->status & PCI_ERR_ROOT_FATAL_RCV)
704 aer_stats->rootport_total_fatal_errs++;
705 else
706 aer_stats->rootport_total_nonfatal_errs++;
707 }
708}
709
710static void __print_tlp_header(struct pci_dev *dev,
711 struct aer_header_log_regs *t)
712{
713 pci_err(dev, " TLP Header: %08x %08x %08x %08x\n",
714 t->dw0, t->dw1, t->dw2, t->dw3);
715}
716
717static void __aer_print_error(struct pci_dev *dev,
718 struct aer_err_info *info)
719{
720 int i, status;
721 const char *errmsg = NULL;
722 status = (info->status & ~info->mask);
723
724 for (i = 0; i < 32; i++) {
725 if (!(status & (1 << i)))
726 continue;
727
728 if (info->severity == AER_CORRECTABLE)
729 errmsg = i < ARRAY_SIZE(aer_correctable_error_string) ?
730 aer_correctable_error_string[i] : NULL;
731 else
732 errmsg = i < ARRAY_SIZE(aer_uncorrectable_error_string) ?
733 aer_uncorrectable_error_string[i] : NULL;
734
735 if (errmsg)
736 pci_err(dev, " [%2d] %-22s%s\n", i, errmsg,
737 info->first_error == i ? " (First)" : "");
738 else
739 pci_err(dev, " [%2d] Unknown Error Bit%s\n",
740 i, info->first_error == i ? " (First)" : "");
741 }
742 pci_dev_aer_stats_incr(dev, info);
743}
744
745void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
746{
747 int layer, agent;
748 int id = ((dev->bus->number << 8) | dev->devfn);
749
750 if (!info->status) {
751 pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
752 aer_error_severity_string[info->severity]);
753 goto out;
754 }
755
756 layer = AER_GET_LAYER_ERROR(info->severity, info->status);
757 agent = AER_GET_AGENT(info->severity, info->status);
758
759 pci_err(dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
760 aer_error_severity_string[info->severity],
761 aer_error_layer[layer], aer_agent_string[agent]);
762
763 pci_err(dev, " device [%04x:%04x] error status/mask=%08x/%08x\n",
764 dev->vendor, dev->device,
765 info->status, info->mask);
766
767 __aer_print_error(dev, info);
768
769 if (info->tlp_header_valid)
770 __print_tlp_header(dev, &info->tlp);
771
772out:
773 if (info->id && info->error_dev_num > 1 && info->id == id)
774 pci_err(dev, " Error of this Agent is reported first\n");
775
776 trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
777 info->severity, info->tlp_header_valid, &info->tlp);
778}
779
780static void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
781{
782 u8 bus = info->id >> 8;
783 u8 devfn = info->id & 0xff;
784
785 pci_info(dev, "AER: %s%s error received: %04x:%02x:%02x.%d\n",
786 info->multi_error_valid ? "Multiple " : "",
787 aer_error_severity_string[info->severity],
788 pci_domain_nr(dev->bus), bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
789}
790
791#ifdef CONFIG_ACPI_APEI_PCIEAER
792int cper_severity_to_aer(int cper_severity)
793{
794 switch (cper_severity) {
795 case CPER_SEV_RECOVERABLE:
796 return AER_NONFATAL;
797 case CPER_SEV_FATAL:
798 return AER_FATAL;
799 default:
800 return AER_CORRECTABLE;
801 }
802}
803EXPORT_SYMBOL_GPL(cper_severity_to_aer);
804
805void cper_print_aer(struct pci_dev *dev, int aer_severity,
806 struct aer_capability_regs *aer)
807{
808 int layer, agent, tlp_header_valid = 0;
809 u32 status, mask;
810 struct aer_err_info info;
811
812 if (aer_severity == AER_CORRECTABLE) {
813 status = aer->cor_status;
814 mask = aer->cor_mask;
815 } else {
816 status = aer->uncor_status;
817 mask = aer->uncor_mask;
818 tlp_header_valid = status & AER_LOG_TLP_MASKS;
819 }
820
821 layer = AER_GET_LAYER_ERROR(aer_severity, status);
822 agent = AER_GET_AGENT(aer_severity, status);
823
824 memset(&info, 0, sizeof(info));
825 info.severity = aer_severity;
826 info.status = status;
827 info.mask = mask;
828 info.first_error = PCI_ERR_CAP_FEP(aer->cap_control);
829
830 pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask);
831 __aer_print_error(dev, &info);
832 pci_err(dev, "aer_layer=%s, aer_agent=%s\n",
833 aer_error_layer[layer], aer_agent_string[agent]);
834
835 if (aer_severity != AER_CORRECTABLE)
836 pci_err(dev, "aer_uncor_severity: 0x%08x\n",
837 aer->uncor_severity);
838
839 if (tlp_header_valid)
840 __print_tlp_header(dev, &aer->header_log);
841
842 trace_aer_event(dev_name(&dev->dev), (status & ~mask),
843 aer_severity, tlp_header_valid, &aer->header_log);
844}
845#endif
846
847
848
849
850
851
852static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev)
853{
854 if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) {
855 e_info->dev[e_info->error_dev_num] = pci_dev_get(dev);
856 e_info->error_dev_num++;
857 return 0;
858 }
859 return -ENOSPC;
860}
861
862
863
864
865
866
867static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info)
868{
869 int pos;
870 u32 status, mask;
871 u16 reg16;
872
873
874
875
876
877 if ((PCI_BUS_NUM(e_info->id) != 0) &&
878 !(dev->bus->bus_flags & PCI_BUS_FLAGS_NO_AERSID)) {
879
880 if (e_info->id == ((dev->bus->number << 8) | dev->devfn))
881 return true;
882
883
884 if (!e_info->multi_error_valid)
885 return false;
886 }
887
888
889
890
891
892
893
894
895
896 if (atomic_read(&dev->enable_cnt) == 0)
897 return false;
898
899
900 pcie_capability_read_word(dev, PCI_EXP_DEVCTL, ®16);
901 if (!(reg16 & PCI_EXP_AER_FLAGS))
902 return false;
903
904 pos = dev->aer_cap;
905 if (!pos)
906 return false;
907
908
909 if (e_info->severity == AER_CORRECTABLE) {
910 pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, &status);
911 pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, &mask);
912 } else {
913 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
914 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask);
915 }
916 if (status & ~mask)
917 return true;
918
919 return false;
920}
921
922static int find_device_iter(struct pci_dev *dev, void *data)
923{
924 struct aer_err_info *e_info = (struct aer_err_info *)data;
925
926 if (is_error_source(dev, e_info)) {
927
928 if (add_error_device(e_info, dev)) {
929
930
931 return 1;
932 }
933
934
935 if (!e_info->multi_error_valid)
936 return 1;
937 }
938 return 0;
939}
940
941
942
943
944
945
946
947
948
949
950
951
952
953static bool find_source_device(struct pci_dev *parent,
954 struct aer_err_info *e_info)
955{
956 struct pci_dev *dev = parent;
957 int result;
958
959
960 e_info->error_dev_num = 0;
961
962
963 result = find_device_iter(dev, e_info);
964 if (result)
965 return true;
966
967 pci_walk_bus(parent->subordinate, find_device_iter, e_info);
968
969 if (!e_info->error_dev_num) {
970 pci_printk(KERN_DEBUG, parent, "can't find device of ID%04x\n",
971 e_info->id);
972 return false;
973 }
974 return true;
975}
976
977
978
979
980
981
982
983
984static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info)
985{
986 int pos;
987
988 if (info->severity == AER_CORRECTABLE) {
989
990
991
992
993 pos = dev->aer_cap;
994 if (pos)
995 pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS,
996 info->status);
997 pci_aer_clear_device_status(dev);
998 } else if (info->severity == AER_NONFATAL)
999 pcie_do_recovery(dev, pci_channel_io_normal,
1000 PCIE_PORT_SERVICE_AER);
1001 else if (info->severity == AER_FATAL)
1002 pcie_do_recovery(dev, pci_channel_io_frozen,
1003 PCIE_PORT_SERVICE_AER);
1004 pci_dev_put(dev);
1005}
1006
1007#ifdef CONFIG_ACPI_APEI_PCIEAER
1008
1009#define AER_RECOVER_RING_ORDER 4
1010#define AER_RECOVER_RING_SIZE (1 << AER_RECOVER_RING_ORDER)
1011
1012struct aer_recover_entry {
1013 u8 bus;
1014 u8 devfn;
1015 u16 domain;
1016 int severity;
1017 struct aer_capability_regs *regs;
1018};
1019
1020static DEFINE_KFIFO(aer_recover_ring, struct aer_recover_entry,
1021 AER_RECOVER_RING_SIZE);
1022
1023static void aer_recover_work_func(struct work_struct *work)
1024{
1025 struct aer_recover_entry entry;
1026 struct pci_dev *pdev;
1027
1028 while (kfifo_get(&aer_recover_ring, &entry)) {
1029 pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus,
1030 entry.devfn);
1031 if (!pdev) {
1032 pr_err("AER recover: Can not find pci_dev for %04x:%02x:%02x:%x\n",
1033 entry.domain, entry.bus,
1034 PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn));
1035 continue;
1036 }
1037 cper_print_aer(pdev, entry.severity, entry.regs);
1038 if (entry.severity == AER_NONFATAL)
1039 pcie_do_recovery(pdev, pci_channel_io_normal,
1040 PCIE_PORT_SERVICE_AER);
1041 else if (entry.severity == AER_FATAL)
1042 pcie_do_recovery(pdev, pci_channel_io_frozen,
1043 PCIE_PORT_SERVICE_AER);
1044 pci_dev_put(pdev);
1045 }
1046}
1047
1048
1049
1050
1051
1052
1053static DEFINE_SPINLOCK(aer_recover_ring_lock);
1054static DECLARE_WORK(aer_recover_work, aer_recover_work_func);
1055
1056void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
1057 int severity, struct aer_capability_regs *aer_regs)
1058{
1059 struct aer_recover_entry entry = {
1060 .bus = bus,
1061 .devfn = devfn,
1062 .domain = domain,
1063 .severity = severity,
1064 .regs = aer_regs,
1065 };
1066
1067 if (kfifo_in_spinlocked(&aer_recover_ring, &entry, 1,
1068 &aer_recover_ring_lock))
1069 schedule_work(&aer_recover_work);
1070 else
1071 pr_err("AER recover: Buffer overflow when recovering AER for %04x:%02x:%02x:%x\n",
1072 domain, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1073}
1074EXPORT_SYMBOL_GPL(aer_recover_queue);
1075#endif
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
1087{
1088 int pos, temp;
1089
1090
1091 info->status = 0;
1092 info->tlp_header_valid = 0;
1093
1094 pos = dev->aer_cap;
1095
1096
1097 if (!pos)
1098 return 0;
1099
1100 if (info->severity == AER_CORRECTABLE) {
1101 pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS,
1102 &info->status);
1103 pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK,
1104 &info->mask);
1105 if (!(info->status & ~info->mask))
1106 return 0;
1107 } else if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
1108 pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM ||
1109 info->severity == AER_NONFATAL) {
1110
1111
1112 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS,
1113 &info->status);
1114 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK,
1115 &info->mask);
1116 if (!(info->status & ~info->mask))
1117 return 0;
1118
1119
1120 pci_read_config_dword(dev, pos + PCI_ERR_CAP, &temp);
1121 info->first_error = PCI_ERR_CAP_FEP(temp);
1122
1123 if (info->status & AER_LOG_TLP_MASKS) {
1124 info->tlp_header_valid = 1;
1125 pci_read_config_dword(dev,
1126 pos + PCI_ERR_HEADER_LOG, &info->tlp.dw0);
1127 pci_read_config_dword(dev,
1128 pos + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1);
1129 pci_read_config_dword(dev,
1130 pos + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2);
1131 pci_read_config_dword(dev,
1132 pos + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3);
1133 }
1134 }
1135
1136 return 1;
1137}
1138
1139static inline void aer_process_err_devices(struct aer_err_info *e_info)
1140{
1141 int i;
1142
1143
1144 for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
1145 if (aer_get_device_error_info(e_info->dev[i], e_info))
1146 aer_print_error(e_info->dev[i], e_info);
1147 }
1148 for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
1149 if (aer_get_device_error_info(e_info->dev[i], e_info))
1150 handle_error_source(e_info->dev[i], e_info);
1151 }
1152}
1153
1154
1155
1156
1157
1158
1159static void aer_isr_one_error(struct aer_rpc *rpc,
1160 struct aer_err_source *e_src)
1161{
1162 struct pci_dev *pdev = rpc->rpd;
1163 struct aer_err_info e_info;
1164
1165 pci_rootport_aer_stats_incr(pdev, e_src);
1166
1167
1168
1169
1170
1171 if (e_src->status & PCI_ERR_ROOT_COR_RCV) {
1172 e_info.id = ERR_COR_ID(e_src->id);
1173 e_info.severity = AER_CORRECTABLE;
1174
1175 if (e_src->status & PCI_ERR_ROOT_MULTI_COR_RCV)
1176 e_info.multi_error_valid = 1;
1177 else
1178 e_info.multi_error_valid = 0;
1179 aer_print_port_info(pdev, &e_info);
1180
1181 if (find_source_device(pdev, &e_info))
1182 aer_process_err_devices(&e_info);
1183 }
1184
1185 if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) {
1186 e_info.id = ERR_UNCOR_ID(e_src->id);
1187
1188 if (e_src->status & PCI_ERR_ROOT_FATAL_RCV)
1189 e_info.severity = AER_FATAL;
1190 else
1191 e_info.severity = AER_NONFATAL;
1192
1193 if (e_src->status & PCI_ERR_ROOT_MULTI_UNCOR_RCV)
1194 e_info.multi_error_valid = 1;
1195 else
1196 e_info.multi_error_valid = 0;
1197
1198 aer_print_port_info(pdev, &e_info);
1199
1200 if (find_source_device(pdev, &e_info))
1201 aer_process_err_devices(&e_info);
1202 }
1203}
1204
1205
1206
1207
1208
1209
1210
1211static irqreturn_t aer_isr(int irq, void *context)
1212{
1213 struct pcie_device *dev = (struct pcie_device *)context;
1214 struct aer_rpc *rpc = get_service_data(dev);
1215 struct aer_err_source uninitialized_var(e_src);
1216
1217 if (kfifo_is_empty(&rpc->aer_fifo))
1218 return IRQ_NONE;
1219
1220 while (kfifo_get(&rpc->aer_fifo, &e_src))
1221 aer_isr_one_error(rpc, &e_src);
1222 return IRQ_HANDLED;
1223}
1224
1225
1226
1227
1228
1229
1230
1231
1232static irqreturn_t aer_irq(int irq, void *context)
1233{
1234 struct pcie_device *pdev = (struct pcie_device *)context;
1235 struct aer_rpc *rpc = get_service_data(pdev);
1236 struct pci_dev *rp = rpc->rpd;
1237 struct aer_err_source e_src = {};
1238 int pos = rp->aer_cap;
1239
1240 pci_read_config_dword(rp, pos + PCI_ERR_ROOT_STATUS, &e_src.status);
1241 if (!(e_src.status & (PCI_ERR_ROOT_UNCOR_RCV|PCI_ERR_ROOT_COR_RCV)))
1242 return IRQ_NONE;
1243
1244 pci_read_config_dword(rp, pos + PCI_ERR_ROOT_ERR_SRC, &e_src.id);
1245 pci_write_config_dword(rp, pos + PCI_ERR_ROOT_STATUS, e_src.status);
1246
1247 if (!kfifo_put(&rpc->aer_fifo, e_src))
1248 return IRQ_HANDLED;
1249
1250 return IRQ_WAKE_THREAD;
1251}
1252
1253static int set_device_error_reporting(struct pci_dev *dev, void *data)
1254{
1255 bool enable = *((bool *)data);
1256 int type = pci_pcie_type(dev);
1257
1258 if ((type == PCI_EXP_TYPE_ROOT_PORT) ||
1259 (type == PCI_EXP_TYPE_UPSTREAM) ||
1260 (type == PCI_EXP_TYPE_DOWNSTREAM)) {
1261 if (enable)
1262 pci_enable_pcie_error_reporting(dev);
1263 else
1264 pci_disable_pcie_error_reporting(dev);
1265 }
1266
1267 if (enable)
1268 pcie_set_ecrc_checking(dev);
1269
1270 return 0;
1271}
1272
1273
1274
1275
1276
1277
1278static void set_downstream_devices_error_reporting(struct pci_dev *dev,
1279 bool enable)
1280{
1281 set_device_error_reporting(dev, &enable);
1282
1283 if (!dev->subordinate)
1284 return;
1285 pci_walk_bus(dev->subordinate, set_device_error_reporting, &enable);
1286}
1287
1288
1289
1290
1291
1292
1293
1294static void aer_enable_rootport(struct aer_rpc *rpc)
1295{
1296 struct pci_dev *pdev = rpc->rpd;
1297 int aer_pos;
1298 u16 reg16;
1299 u32 reg32;
1300
1301
1302 pcie_capability_read_word(pdev, PCI_EXP_DEVSTA, ®16);
1303 pcie_capability_write_word(pdev, PCI_EXP_DEVSTA, reg16);
1304
1305
1306 pcie_capability_clear_word(pdev, PCI_EXP_RTCTL,
1307 SYSTEM_ERROR_INTR_ON_MESG_MASK);
1308
1309 aer_pos = pdev->aer_cap;
1310
1311 pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, ®32);
1312 pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32);
1313 pci_read_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, ®32);
1314 pci_write_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, reg32);
1315 pci_read_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, ®32);
1316 pci_write_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, reg32);
1317
1318
1319
1320
1321
1322 set_downstream_devices_error_reporting(pdev, true);
1323
1324
1325 pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_COMMAND, ®32);
1326 reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
1327 pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_COMMAND, reg32);
1328}
1329
1330
1331
1332
1333
1334
1335
1336static void aer_disable_rootport(struct aer_rpc *rpc)
1337{
1338 struct pci_dev *pdev = rpc->rpd;
1339 u32 reg32;
1340 int pos;
1341
1342
1343
1344
1345
1346 set_downstream_devices_error_reporting(pdev, false);
1347
1348 pos = pdev->aer_cap;
1349
1350 pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, ®32);
1351 reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
1352 pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, reg32);
1353
1354
1355 pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, ®32);
1356 pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, reg32);
1357}
1358
1359
1360
1361
1362
1363
1364
1365static void aer_remove(struct pcie_device *dev)
1366{
1367 struct aer_rpc *rpc = get_service_data(dev);
1368
1369 aer_disable_rootport(rpc);
1370}
1371
1372
1373
1374
1375
1376
1377
1378static int aer_probe(struct pcie_device *dev)
1379{
1380 int status;
1381 struct aer_rpc *rpc;
1382 struct device *device = &dev->device;
1383
1384 rpc = devm_kzalloc(device, sizeof(struct aer_rpc), GFP_KERNEL);
1385 if (!rpc) {
1386 dev_printk(KERN_DEBUG, device, "alloc AER rpc failed\n");
1387 return -ENOMEM;
1388 }
1389 rpc->rpd = dev->port;
1390 set_service_data(dev, rpc);
1391
1392 status = devm_request_threaded_irq(device, dev->irq, aer_irq, aer_isr,
1393 IRQF_SHARED, "aerdrv", dev);
1394 if (status) {
1395 dev_printk(KERN_DEBUG, device, "request AER IRQ %d failed\n",
1396 dev->irq);
1397 return status;
1398 }
1399
1400 aer_enable_rootport(rpc);
1401 dev_info(device, "AER enabled with IRQ %d\n", dev->irq);
1402 return 0;
1403}
1404
1405
1406
1407
1408
1409
1410
1411static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
1412{
1413 u32 reg32;
1414 int pos;
1415 int rc;
1416
1417 pos = dev->aer_cap;
1418
1419
1420 pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, ®32);
1421 reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK;
1422 pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32);
1423
1424 rc = pci_bus_error_reset(dev);
1425 pci_printk(KERN_DEBUG, dev, "Root Port link has been reset\n");
1426
1427
1428 pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, ®32);
1429 pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, reg32);
1430
1431
1432 pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, ®32);
1433 reg32 |= ROOT_PORT_INTR_ON_MESG_MASK;
1434 pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32);
1435
1436 return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
1437}
1438
1439static struct pcie_port_service_driver aerdriver = {
1440 .name = "aer",
1441 .port_type = PCI_EXP_TYPE_ROOT_PORT,
1442 .service = PCIE_PORT_SERVICE_AER,
1443
1444 .probe = aer_probe,
1445 .remove = aer_remove,
1446 .reset_link = aer_root_reset,
1447};
1448
1449
1450
1451
1452
1453
1454int __init pcie_aer_init(void)
1455{
1456 if (!pci_aer_available() || aer_acpi_firmware_first())
1457 return -ENXIO;
1458 return pcie_port_service_register(&aerdriver);
1459}
1460