1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/delay.h>
25#include <linux/sched.h>
26#include <linux/init.h>
27#include <linux/list.h>
28#include <linux/pci.h>
29#include <linux/proc_fs.h>
30#include <linux/rbtree.h>
31#include <linux/seq_file.h>
32#include <linux/spinlock.h>
33#include <linux/export.h>
34#include <linux/of.h>
35
36#include <linux/atomic.h>
37#include <asm/eeh.h>
38#include <asm/eeh_event.h>
39#include <asm/io.h>
40#include <asm/machdep.h>
41#include <asm/ppc-pci.h>
42#include <asm/rtas.h>
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84#define EEH_MAX_FAILS 2100000
85
86
87#define PCI_BUS_RESET_WAIT_MSEC (60*1000)
88
89
90struct eeh_ops *eeh_ops = NULL;
91
92int eeh_subsystem_enabled;
93EXPORT_SYMBOL(eeh_subsystem_enabled);
94
95
96
97
98
99
100
101
102
103
104int eeh_probe_mode;
105
106
107DEFINE_MUTEX(eeh_mutex);
108
109
110static DEFINE_RAW_SPINLOCK(confirm_error_lock);
111
112
113
114
115
116#define EEH_PCI_REGS_LOG_LEN 4096
117static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
118
119
120
121
122
123
124struct eeh_stats {
125 u64 no_device;
126 u64 no_dn;
127 u64 no_cfg_addr;
128 u64 ignored_check;
129 u64 total_mmio_ffs;
130 u64 false_positives;
131 u64 slot_resets;
132};
133
134static struct eeh_stats eeh_stats;
135
136#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
137
138
139
140
141
142
143
144
145
146
147static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
148{
149 struct device_node *dn = eeh_dev_to_of_node(edev);
150 struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
151 u32 cfg;
152 int cap, i;
153 int n = 0;
154
155 n += scnprintf(buf+n, len-n, "%s\n", dn->full_name);
156 printk(KERN_WARNING "EEH: of node=%s\n", dn->full_name);
157
158 eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg);
159 n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
160 printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg);
161
162 eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg);
163 n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
164 printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg);
165
166 if (!dev) {
167 printk(KERN_WARNING "EEH: no PCI device for this of node\n");
168 return n;
169 }
170
171
172 if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
173 eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg);
174 n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
175 printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg);
176
177 eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg);
178 n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
179 printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg);
180 }
181
182
183 cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
184 if (cap) {
185 eeh_ops->read_config(dn, cap, 4, &cfg);
186 n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
187 printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg);
188
189 eeh_ops->read_config(dn, cap+4, 4, &cfg);
190 n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
191 printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg);
192 }
193
194
195 if (pci_is_pcie(dev)) {
196 n += scnprintf(buf+n, len-n, "pci-e cap10:\n");
197 printk(KERN_WARNING
198 "EEH: PCI-E capabilities and status follow:\n");
199
200 for (i=0; i<=8; i++) {
201 eeh_ops->read_config(dn, dev->pcie_cap+4*i, 4, &cfg);
202 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
203 printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg);
204 }
205
206 cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
207 if (cap) {
208 n += scnprintf(buf+n, len-n, "pci-e AER:\n");
209 printk(KERN_WARNING
210 "EEH: PCI-E AER capability register set follows:\n");
211
212 for (i=0; i<14; i++) {
213 eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
214 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
215 printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg);
216 }
217 }
218 }
219
220 return n;
221}
222
223
224
225
226
227
228
229
230
231
232
233void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
234{
235 size_t loglen = 0;
236 struct eeh_dev *edev;
237
238 eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
239 eeh_ops->configure_bridge(pe);
240 eeh_pe_restore_bars(pe);
241
242 pci_regs_buf[0] = 0;
243 eeh_pe_for_each_dev(pe, edev) {
244 loglen += eeh_gather_pci_data(edev, pci_regs_buf,
245 EEH_PCI_REGS_LOG_LEN);
246 }
247
248 eeh_ops->get_log(pe, severity, pci_regs_buf, loglen);
249}
250
251
252
253
254
255
256
257
258static inline unsigned long eeh_token_to_phys(unsigned long token)
259{
260 pte_t *ptep;
261 unsigned long pa;
262 int hugepage_shift;
263
264
265
266
267 ptep = find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift);
268 if (!ptep)
269 return token;
270 WARN_ON(hugepage_shift);
271 pa = pte_pfn(*ptep) << PAGE_SHIFT;
272
273 return pa | (token & (PAGE_SIZE-1));
274}
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290int eeh_dev_check_failure(struct eeh_dev *edev)
291{
292 int ret;
293 unsigned long flags;
294 struct device_node *dn;
295 struct pci_dev *dev;
296 struct eeh_pe *pe;
297 int rc = 0;
298 const char *location;
299
300 eeh_stats.total_mmio_ffs++;
301
302 if (!eeh_subsystem_enabled)
303 return 0;
304
305 if (!edev) {
306 eeh_stats.no_dn++;
307 return 0;
308 }
309 dn = eeh_dev_to_of_node(edev);
310 dev = eeh_dev_to_pci_dev(edev);
311 pe = edev->pe;
312
313
314 if (!pe) {
315 eeh_stats.ignored_check++;
316 pr_debug("EEH: Ignored check for %s %s\n",
317 eeh_pci_name(dev), dn->full_name);
318 return 0;
319 }
320
321 if (!pe->addr && !pe->config_addr) {
322 eeh_stats.no_cfg_addr++;
323 return 0;
324 }
325
326
327
328
329
330
331
332 raw_spin_lock_irqsave(&confirm_error_lock, flags);
333 rc = 1;
334 if (pe->state & EEH_PE_ISOLATED) {
335 pe->check_count++;
336 if (pe->check_count % EEH_MAX_FAILS == 0) {
337 location = of_get_property(dn, "ibm,loc-code", NULL);
338 printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
339 "location=%s driver=%s pci addr=%s\n",
340 pe->check_count, location,
341 eeh_driver_name(dev), eeh_pci_name(dev));
342 printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
343 eeh_driver_name(dev));
344 dump_stack();
345 }
346 goto dn_unlock;
347 }
348
349
350
351
352
353
354
355
356 ret = eeh_ops->get_state(pe, NULL);
357
358
359
360
361
362
363
364 if ((ret < 0) ||
365 (ret == EEH_STATE_NOT_SUPPORT) ||
366 (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
367 (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
368 eeh_stats.false_positives++;
369 pe->false_positives++;
370 rc = 0;
371 goto dn_unlock;
372 }
373
374 eeh_stats.slot_resets++;
375
376
377
378
379
380 eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
381 raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
382
383 eeh_send_failure_event(pe);
384
385
386
387
388
389 WARN(1, "EEH: failure detected\n");
390 return 1;
391
392dn_unlock:
393 raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
394 return rc;
395}
396
397EXPORT_SYMBOL_GPL(eeh_dev_check_failure);
398
399
400
401
402
403
404
405
406
407
408
409
410
411unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
412{
413 unsigned long addr;
414 struct eeh_dev *edev;
415
416
417 addr = eeh_token_to_phys((unsigned long __force) token);
418 edev = eeh_addr_cache_get_dev(addr);
419 if (!edev) {
420 eeh_stats.no_device++;
421 return val;
422 }
423
424 eeh_dev_check_failure(edev);
425
426 pci_dev_put(eeh_dev_to_pci_dev(edev));
427 return val;
428}
429
430EXPORT_SYMBOL(eeh_check_failure);
431
432
433
434
435
436
437
438
439
440
441int eeh_pci_enable(struct eeh_pe *pe, int function)
442{
443 int rc;
444
445 rc = eeh_ops->set_option(pe, function);
446 if (rc)
447 pr_warning("%s: Unexpected state change %d on PHB#%d-PE#%x, err=%d\n",
448 __func__, function, pe->phb->global_number, pe->addr, rc);
449
450 rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
451 if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) &&
452 (function == EEH_OPT_THAW_MMIO))
453 return 0;
454
455 return rc;
456}
457
458
459
460
461
462
463
464
465
466int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
467{
468 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
469 struct eeh_pe *pe = edev->pe;
470
471 if (!pe) {
472 pr_err("%s: No PE found on PCI device %s\n",
473 __func__, pci_name(dev));
474 return -EINVAL;
475 }
476
477 switch (state) {
478 case pcie_deassert_reset:
479 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
480 break;
481 case pcie_hot_reset:
482 eeh_ops->reset(pe, EEH_RESET_HOT);
483 break;
484 case pcie_warm_reset:
485 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
486 break;
487 default:
488 return -EINVAL;
489 };
490
491 return 0;
492}
493
494
495
496
497
498
499
500
501
502
503
504static void *eeh_set_dev_freset(void *data, void *flag)
505{
506 struct pci_dev *dev;
507 unsigned int *freset = (unsigned int *)flag;
508 struct eeh_dev *edev = (struct eeh_dev *)data;
509
510 dev = eeh_dev_to_pci_dev(edev);
511 if (dev)
512 *freset |= dev->needs_freset;
513
514 return NULL;
515}
516
517
518
519
520
521
522
523static void eeh_reset_pe_once(struct eeh_pe *pe)
524{
525 unsigned int freset = 0;
526
527
528
529
530
531
532
533 eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset);
534
535 if (freset)
536 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
537 else
538 eeh_ops->reset(pe, EEH_RESET_HOT);
539
540
541
542
543#define PCI_BUS_RST_HOLD_TIME_MSEC 250
544 msleep(PCI_BUS_RST_HOLD_TIME_MSEC);
545
546
547
548
549
550 eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
551
552 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
553
554
555
556
557
558#define PCI_BUS_SETTLE_TIME_MSEC 1800
559 msleep(PCI_BUS_SETTLE_TIME_MSEC);
560}
561
562
563
564
565
566
567
568
569
570int eeh_reset_pe(struct eeh_pe *pe)
571{
572 int i, rc;
573
574
575 for (i=0; i<3; i++) {
576 eeh_reset_pe_once(pe);
577
578 rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
579 if (rc == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE))
580 return 0;
581
582 if (rc < 0) {
583 pr_err("%s: Unrecoverable slot failure on PHB#%d-PE#%x",
584 __func__, pe->phb->global_number, pe->addr);
585 return -1;
586 }
587 pr_err("EEH: bus reset %d failed on PHB#%d-PE#%x, rc=%d\n",
588 i+1, pe->phb->global_number, pe->addr, rc);
589 }
590
591 return -1;
592}
593
594
595
596
597
598
599
600
601
602
603void eeh_save_bars(struct eeh_dev *edev)
604{
605 int i;
606 struct device_node *dn;
607
608 if (!edev)
609 return;
610 dn = eeh_dev_to_of_node(edev);
611
612 for (i = 0; i < 16; i++)
613 eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]);
614}
615
616
617
618
619
620
621
622
623
624int __init eeh_ops_register(struct eeh_ops *ops)
625{
626 if (!ops->name) {
627 pr_warning("%s: Invalid EEH ops name for %p\n",
628 __func__, ops);
629 return -EINVAL;
630 }
631
632 if (eeh_ops && eeh_ops != ops) {
633 pr_warning("%s: EEH ops of platform %s already existing (%s)\n",
634 __func__, eeh_ops->name, ops->name);
635 return -EEXIST;
636 }
637
638 eeh_ops = ops;
639
640 return 0;
641}
642
643
644
645
646
647
648
649
650int __exit eeh_ops_unregister(const char *name)
651{
652 if (!name || !strlen(name)) {
653 pr_warning("%s: Invalid EEH ops name\n",
654 __func__);
655 return -EINVAL;
656 }
657
658 if (eeh_ops && !strcmp(eeh_ops->name, name)) {
659 eeh_ops = NULL;
660 return 0;
661 }
662
663 return -EEXIST;
664}
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681static int __init eeh_init(void)
682{
683 struct pci_controller *hose, *tmp;
684 struct device_node *phb;
685 int ret;
686
687
688 if (!eeh_ops) {
689 pr_warning("%s: Platform EEH operation not found\n",
690 __func__);
691 return -EEXIST;
692 } else if ((ret = eeh_ops->init())) {
693 pr_warning("%s: Failed to call platform init function (%d)\n",
694 __func__, ret);
695 return ret;
696 }
697
698 raw_spin_lock_init(&confirm_error_lock);
699
700
701 if (eeh_probe_mode_devtree()) {
702 list_for_each_entry_safe(hose, tmp,
703 &hose_list, list_node) {
704 phb = hose->dn;
705 traverse_pci_devices(phb, eeh_ops->of_probe, NULL);
706 }
707 }
708
709 if (eeh_subsystem_enabled)
710 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
711 else
712 pr_warning("EEH: No capable adapters found\n");
713
714 return ret;
715}
716
717core_initcall_sync(eeh_init);
718
719
720
721
722
723
724
725
726
727
728
729
730
731static void eeh_add_device_early(struct device_node *dn)
732{
733 struct pci_controller *phb;
734
735 if (!of_node_to_eeh_dev(dn))
736 return;
737 phb = of_node_to_eeh_dev(dn)->phb;
738
739
740 if (NULL == phb || 0 == phb->buid)
741 return;
742
743
744 eeh_ops->of_probe(dn, NULL);
745}
746
747
748
749
750
751
752
753
754
755void eeh_add_device_tree_early(struct device_node *dn)
756{
757 struct device_node *sib;
758
759 for_each_child_of_node(dn, sib)
760 eeh_add_device_tree_early(sib);
761 eeh_add_device_early(dn);
762}
763EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
764
765
766
767
768
769
770
771
772static void eeh_add_device_late(struct pci_dev *dev)
773{
774 struct device_node *dn;
775 struct eeh_dev *edev;
776
777 if (!dev || !eeh_subsystem_enabled)
778 return;
779
780 pr_debug("EEH: Adding device %s\n", pci_name(dev));
781
782 dn = pci_device_to_OF_node(dev);
783 edev = of_node_to_eeh_dev(dn);
784 if (edev->pdev == dev) {
785 pr_debug("EEH: Already referenced !\n");
786 return;
787 }
788 WARN_ON(edev->pdev);
789
790 pci_dev_get(dev);
791 edev->pdev = dev;
792 dev->dev.archdata.edev = edev;
793
794 eeh_addr_cache_insert_dev(dev);
795}
796
797
798
799
800
801
802
803
804
805void eeh_add_device_tree_late(struct pci_bus *bus)
806{
807 struct pci_dev *dev;
808
809 list_for_each_entry(dev, &bus->devices, bus_list) {
810 eeh_add_device_late(dev);
811 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
812 struct pci_bus *subbus = dev->subordinate;
813 if (subbus)
814 eeh_add_device_tree_late(subbus);
815 }
816 }
817}
818EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
819
820
821
822
823
824
825
826
827
828void eeh_add_sysfs_files(struct pci_bus *bus)
829{
830 struct pci_dev *dev;
831
832 list_for_each_entry(dev, &bus->devices, bus_list) {
833 eeh_sysfs_add_device(dev);
834 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
835 struct pci_bus *subbus = dev->subordinate;
836 if (subbus)
837 eeh_add_sysfs_files(subbus);
838 }
839 }
840}
841EXPORT_SYMBOL_GPL(eeh_add_sysfs_files);
842
843
844
845
846
847
848
849
850
851
852
853
854static void eeh_remove_device(struct pci_dev *dev, int purge_pe)
855{
856 struct eeh_dev *edev;
857
858 if (!dev || !eeh_subsystem_enabled)
859 return;
860 edev = pci_dev_to_eeh_dev(dev);
861
862
863 pr_debug("EEH: Removing device %s\n", pci_name(dev));
864
865 if (!edev || !edev->pdev) {
866 pr_debug("EEH: Not referenced !\n");
867 return;
868 }
869 edev->pdev = NULL;
870 dev->dev.archdata.edev = NULL;
871 pci_dev_put(dev);
872
873 eeh_rmv_from_parent_pe(edev, purge_pe);
874 eeh_addr_cache_rmv_dev(dev);
875 eeh_sysfs_remove_device(dev);
876}
877
878
879
880
881
882
883
884
885
886
887void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe)
888{
889 struct pci_bus *bus = dev->subordinate;
890 struct pci_dev *child, *tmp;
891
892 eeh_remove_device(dev, purge_pe);
893
894 if (bus && dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
895 list_for_each_entry_safe(child, tmp, &bus->devices, bus_list)
896 eeh_remove_bus_device(child, purge_pe);
897 }
898}
899EXPORT_SYMBOL_GPL(eeh_remove_bus_device);
900
901static int proc_eeh_show(struct seq_file *m, void *v)
902{
903 if (0 == eeh_subsystem_enabled) {
904 seq_printf(m, "EEH Subsystem is globally disabled\n");
905 seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs);
906 } else {
907 seq_printf(m, "EEH Subsystem is enabled\n");
908 seq_printf(m,
909 "no device=%llu\n"
910 "no device node=%llu\n"
911 "no config address=%llu\n"
912 "check not wanted=%llu\n"
913 "eeh_total_mmio_ffs=%llu\n"
914 "eeh_false_positives=%llu\n"
915 "eeh_slot_resets=%llu\n",
916 eeh_stats.no_device,
917 eeh_stats.no_dn,
918 eeh_stats.no_cfg_addr,
919 eeh_stats.ignored_check,
920 eeh_stats.total_mmio_ffs,
921 eeh_stats.false_positives,
922 eeh_stats.slot_resets);
923 }
924
925 return 0;
926}
927
928static int proc_eeh_open(struct inode *inode, struct file *file)
929{
930 return single_open(file, proc_eeh_show, NULL);
931}
932
933static const struct file_operations proc_eeh_operations = {
934 .open = proc_eeh_open,
935 .read = seq_read,
936 .llseek = seq_lseek,
937 .release = single_release,
938};
939
940static int __init eeh_init_proc(void)
941{
942 if (machine_is(pseries))
943 proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations);
944 return 0;
945}
946__initcall(eeh_init_proc);
947