1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/delay.h>
25#include <linux/debugfs.h>
26#include <linux/sched.h>
27#include <linux/init.h>
28#include <linux/list.h>
29#include <linux/pci.h>
30#include <linux/iommu.h>
31#include <linux/proc_fs.h>
32#include <linux/rbtree.h>
33#include <linux/reboot.h>
34#include <linux/seq_file.h>
35#include <linux/spinlock.h>
36#include <linux/export.h>
37#include <linux/of.h>
38
39#include <linux/atomic.h>
40#include <asm/eeh.h>
41#include <asm/eeh_event.h>
42#include <asm/io.h>
43#include <asm/iommu.h>
44#include <asm/machdep.h>
45#include <asm/ppc-pci.h>
46#include <asm/rtas.h>
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88#define EEH_MAX_FAILS 2100000
89
90
91#define PCI_BUS_RESET_WAIT_MSEC (5*60*1000)
92
93
94
95
96
97
98
99
100
101
102
103int eeh_subsystem_flags;
104EXPORT_SYMBOL(eeh_subsystem_flags);
105
106
107
108
109
110
111int eeh_max_freezes = 5;
112
113
114struct eeh_ops *eeh_ops = NULL;
115
116
117DEFINE_RAW_SPINLOCK(confirm_error_lock);
118
119
120static DEFINE_MUTEX(eeh_dev_mutex);
121
122
123
124
125
126#define EEH_PCI_REGS_LOG_LEN 8192
127static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
128
129
130
131
132
133
134struct eeh_stats {
135 u64 no_device;
136 u64 no_dn;
137 u64 no_cfg_addr;
138 u64 ignored_check;
139 u64 total_mmio_ffs;
140 u64 false_positives;
141 u64 slot_resets;
142};
143
144static struct eeh_stats eeh_stats;
145
146static int __init eeh_setup(char *str)
147{
148 if (!strcmp(str, "off"))
149 eeh_add_flag(EEH_FORCE_DISABLED);
150 else if (!strcmp(str, "early_log"))
151 eeh_add_flag(EEH_EARLY_DUMP_LOG);
152
153 return 1;
154}
155__setup("eeh=", eeh_setup);
156
157
158
159
160
161
162static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
163{
164 struct pci_dn *pdn = eeh_dev_to_pdn(edev);
165 u32 cfg;
166 int cap, i;
167 int n = 0, l = 0;
168 char buffer[128];
169
170 n += scnprintf(buf+n, len-n, "%04x:%02x:%02x:%01x\n",
171 edev->phb->global_number, pdn->busno,
172 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
173 pr_warn("EEH: of node=%04x:%02x:%02x:%01x\n",
174 edev->phb->global_number, pdn->busno,
175 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
176
177 eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg);
178 n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
179 pr_warn("EEH: PCI device/vendor: %08x\n", cfg);
180
181 eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cfg);
182 n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
183 pr_warn("EEH: PCI cmd/status register: %08x\n", cfg);
184
185
186 if (edev->mode & EEH_DEV_BRIDGE) {
187 eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, &cfg);
188 n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
189 pr_warn("EEH: Bridge secondary status: %04x\n", cfg);
190
191 eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg);
192 n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
193 pr_warn("EEH: Bridge control: %04x\n", cfg);
194 }
195
196
197 cap = edev->pcix_cap;
198 if (cap) {
199 eeh_ops->read_config(pdn, cap, 4, &cfg);
200 n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
201 pr_warn("EEH: PCI-X cmd: %08x\n", cfg);
202
203 eeh_ops->read_config(pdn, cap+4, 4, &cfg);
204 n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
205 pr_warn("EEH: PCI-X status: %08x\n", cfg);
206 }
207
208
209 cap = edev->pcie_cap;
210 if (cap) {
211 n += scnprintf(buf+n, len-n, "pci-e cap10:\n");
212 pr_warn("EEH: PCI-E capabilities and status follow:\n");
213
214 for (i=0; i<=8; i++) {
215 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg);
216 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
217
218 if ((i % 4) == 0) {
219 if (i != 0)
220 pr_warn("%s\n", buffer);
221
222 l = scnprintf(buffer, sizeof(buffer),
223 "EEH: PCI-E %02x: %08x ",
224 4*i, cfg);
225 } else {
226 l += scnprintf(buffer+l, sizeof(buffer)-l,
227 "%08x ", cfg);
228 }
229
230 }
231
232 pr_warn("%s\n", buffer);
233 }
234
235
236 cap = edev->aer_cap;
237 if (cap) {
238 n += scnprintf(buf+n, len-n, "pci-e AER:\n");
239 pr_warn("EEH: PCI-E AER capability register set follows:\n");
240
241 for (i=0; i<=13; i++) {
242 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg);
243 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
244
245 if ((i % 4) == 0) {
246 if (i != 0)
247 pr_warn("%s\n", buffer);
248
249 l = scnprintf(buffer, sizeof(buffer),
250 "EEH: PCI-E AER %02x: %08x ",
251 4*i, cfg);
252 } else {
253 l += scnprintf(buffer+l, sizeof(buffer)-l,
254 "%08x ", cfg);
255 }
256 }
257
258 pr_warn("%s\n", buffer);
259 }
260
261 return n;
262}
263
264static void *eeh_dump_pe_log(void *data, void *flag)
265{
266 struct eeh_pe *pe = data;
267 struct eeh_dev *edev, *tmp;
268 size_t *plen = flag;
269
270
271
272
273
274 if (pe->state & EEH_PE_CFG_BLOCKED)
275 return NULL;
276
277 eeh_pe_for_each_dev(pe, edev, tmp)
278 *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen,
279 EEH_PCI_REGS_LOG_LEN - *plen);
280
281 return NULL;
282}
283
284
285
286
287
288
289
290
291
292
293
294void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
295{
296 size_t loglen = 0;
297
298
299
300
301
302
303
304
305
306
307 if (!(pe->type & EEH_PE_PHB)) {
308 if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG))
309 eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
310
311
312
313
314
315
316
317
318
319
320
321
322
323 eeh_ops->configure_bridge(pe);
324 if (!(pe->state & EEH_PE_CFG_BLOCKED)) {
325 eeh_pe_restore_bars(pe);
326
327 pci_regs_buf[0] = 0;
328 eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen);
329 }
330 }
331
332 eeh_ops->get_log(pe, severity, pci_regs_buf, loglen);
333}
334
335
336
337
338
339
340
341
342static inline unsigned long eeh_token_to_phys(unsigned long token)
343{
344 pte_t *ptep;
345 unsigned long pa;
346 int hugepage_shift;
347
348
349
350
351
352
353 ptep = __find_linux_pte_or_hugepte(init_mm.pgd, token,
354 NULL, &hugepage_shift);
355 if (!ptep)
356 return token;
357 WARN_ON(hugepage_shift);
358 pa = pte_pfn(*ptep) << PAGE_SHIFT;
359
360 return pa | (token & (PAGE_SIZE-1));
361}
362
363
364
365
366
367
368static int eeh_phb_check_failure(struct eeh_pe *pe)
369{
370 struct eeh_pe *phb_pe;
371 unsigned long flags;
372 int ret;
373
374 if (!eeh_has_flag(EEH_PROBE_MODE_DEV))
375 return -EPERM;
376
377
378 phb_pe = eeh_phb_pe_get(pe->phb);
379 if (!phb_pe) {
380 pr_warn("%s Can't find PE for PHB#%d\n",
381 __func__, pe->phb->global_number);
382 return -EEXIST;
383 }
384
385
386 eeh_serialize_lock(&flags);
387 if (phb_pe->state & EEH_PE_ISOLATED) {
388 ret = 0;
389 goto out;
390 }
391
392
393 ret = eeh_ops->get_state(phb_pe, NULL);
394 if ((ret < 0) ||
395 (ret == EEH_STATE_NOT_SUPPORT) ||
396 (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
397 (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
398 ret = 0;
399 goto out;
400 }
401
402
403 eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
404 eeh_serialize_unlock(flags);
405
406 pr_err("EEH: PHB#%x failure detected, location: %s\n",
407 phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe));
408 dump_stack();
409 eeh_send_failure_event(phb_pe);
410
411 return 1;
412out:
413 eeh_serialize_unlock(flags);
414 return ret;
415}
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431int eeh_dev_check_failure(struct eeh_dev *edev)
432{
433 int ret;
434 int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
435 unsigned long flags;
436 struct pci_dn *pdn;
437 struct pci_dev *dev;
438 struct eeh_pe *pe, *parent_pe, *phb_pe;
439 int rc = 0;
440 const char *location = NULL;
441
442 eeh_stats.total_mmio_ffs++;
443
444 if (!eeh_enabled())
445 return 0;
446
447 if (!edev) {
448 eeh_stats.no_dn++;
449 return 0;
450 }
451 dev = eeh_dev_to_pci_dev(edev);
452 pe = eeh_dev_to_pe(edev);
453
454
455 if (!pe) {
456 eeh_stats.ignored_check++;
457 pr_debug("EEH: Ignored check for %s\n",
458 eeh_pci_name(dev));
459 return 0;
460 }
461
462 if (!pe->addr && !pe->config_addr) {
463 eeh_stats.no_cfg_addr++;
464 return 0;
465 }
466
467
468
469
470
471 ret = eeh_phb_check_failure(pe);
472 if (ret > 0)
473 return ret;
474
475
476
477
478
479
480 if (eeh_pe_passed(pe))
481 return 0;
482
483
484
485
486
487
488
489 eeh_serialize_lock(&flags);
490 rc = 1;
491 if (pe->state & EEH_PE_ISOLATED) {
492 pe->check_count++;
493 if (pe->check_count % EEH_MAX_FAILS == 0) {
494 pdn = eeh_dev_to_pdn(edev);
495 if (pdn->node)
496 location = of_get_property(pdn->node, "ibm,loc-code", NULL);
497 printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
498 "location=%s driver=%s pci addr=%s\n",
499 pe->check_count,
500 location ? location : "unknown",
501 eeh_driver_name(dev), eeh_pci_name(dev));
502 printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
503 eeh_driver_name(dev));
504 dump_stack();
505 }
506 goto dn_unlock;
507 }
508
509
510
511
512
513
514
515
516 ret = eeh_ops->get_state(pe, NULL);
517
518
519
520
521
522
523
524 if ((ret < 0) ||
525 (ret == EEH_STATE_NOT_SUPPORT) ||
526 ((ret & active_flags) == active_flags)) {
527 eeh_stats.false_positives++;
528 pe->false_positives++;
529 rc = 0;
530 goto dn_unlock;
531 }
532
533
534
535
536
537
538 parent_pe = pe->parent;
539 while (parent_pe) {
540
541 if (parent_pe->type & EEH_PE_PHB)
542 break;
543
544
545 ret = eeh_ops->get_state(parent_pe, NULL);
546 if (ret > 0 &&
547 (ret & active_flags) != active_flags)
548 pe = parent_pe;
549
550
551 parent_pe = parent_pe->parent;
552 }
553
554 eeh_stats.slot_resets++;
555
556
557
558
559
560 eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
561 eeh_serialize_unlock(flags);
562
563
564
565
566
567 phb_pe = eeh_phb_pe_get(pe->phb);
568 pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
569 pe->phb->global_number, pe->addr);
570 pr_err("EEH: PE location: %s, PHB location: %s\n",
571 eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
572 dump_stack();
573
574 eeh_send_failure_event(pe);
575
576 return 1;
577
578dn_unlock:
579 eeh_serialize_unlock(flags);
580 return rc;
581}
582
583EXPORT_SYMBOL_GPL(eeh_dev_check_failure);
584
585
586
587
588
589
590
591
592
593
594
595
596int eeh_check_failure(const volatile void __iomem *token)
597{
598 unsigned long addr;
599 struct eeh_dev *edev;
600
601
602 addr = eeh_token_to_phys((unsigned long __force) token);
603 edev = eeh_addr_cache_get_dev(addr);
604 if (!edev) {
605 eeh_stats.no_device++;
606 return 0;
607 }
608
609 return eeh_dev_check_failure(edev);
610}
611EXPORT_SYMBOL(eeh_check_failure);
612
613
614
615
616
617
618
619
620
621
622int eeh_pci_enable(struct eeh_pe *pe, int function)
623{
624 int active_flag, rc;
625
626
627
628
629
630
631 switch (function) {
632 case EEH_OPT_THAW_MMIO:
633 active_flag = EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED;
634 break;
635 case EEH_OPT_THAW_DMA:
636 active_flag = EEH_STATE_DMA_ACTIVE;
637 break;
638 case EEH_OPT_DISABLE:
639 case EEH_OPT_ENABLE:
640 case EEH_OPT_FREEZE_PE:
641 active_flag = 0;
642 break;
643 default:
644 pr_warn("%s: Invalid function %d\n",
645 __func__, function);
646 return -EINVAL;
647 }
648
649
650
651
652
653 if (active_flag) {
654 rc = eeh_ops->get_state(pe, NULL);
655 if (rc < 0)
656 return rc;
657
658
659 if (rc == EEH_STATE_NOT_SUPPORT)
660 return 0;
661
662
663 if (rc & active_flag)
664 return 0;
665 }
666
667
668
669 rc = eeh_ops->set_option(pe, function);
670 if (rc)
671 pr_warn("%s: Unexpected state change %d on "
672 "PHB#%d-PE#%x, err=%d\n",
673 __func__, function, pe->phb->global_number,
674 pe->addr, rc);
675
676
677 if (active_flag) {
678 rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
679 if (rc < 0)
680 return rc;
681
682 if (rc & active_flag)
683 return 0;
684
685 return -EIO;
686 }
687
688 return rc;
689}
690
691static void *eeh_disable_and_save_dev_state(void *data, void *userdata)
692{
693 struct eeh_dev *edev = data;
694 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
695 struct pci_dev *dev = userdata;
696
697
698
699
700
701 if (!pdev || pdev == dev)
702 return NULL;
703
704
705 pci_set_power_state(pdev, PCI_D0);
706
707
708 pci_save_state(pdev);
709
710
711
712
713
714 pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
715
716 return NULL;
717}
718
719static void *eeh_restore_dev_state(void *data, void *userdata)
720{
721 struct eeh_dev *edev = data;
722 struct pci_dn *pdn = eeh_dev_to_pdn(edev);
723 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
724 struct pci_dev *dev = userdata;
725
726 if (!pdev)
727 return NULL;
728
729
730 if (pdn && eeh_ops->restore_config)
731 eeh_ops->restore_config(pdn);
732
733
734 if (pdev != dev)
735 pci_restore_state(pdev);
736
737 return NULL;
738}
739
740
741
742
743
744
745
746
747
748int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
749{
750 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
751 struct eeh_pe *pe = eeh_dev_to_pe(edev);
752
753 if (!pe) {
754 pr_err("%s: No PE found on PCI device %s\n",
755 __func__, pci_name(dev));
756 return -EINVAL;
757 }
758
759 switch (state) {
760 case pcie_deassert_reset:
761 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
762 eeh_unfreeze_pe(pe, false);
763 if (!(pe->type & EEH_PE_VF))
764 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
765 eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev);
766 eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
767 break;
768 case pcie_hot_reset:
769 eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
770 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
771 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
772 if (!(pe->type & EEH_PE_VF))
773 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
774 eeh_ops->reset(pe, EEH_RESET_HOT);
775 break;
776 case pcie_warm_reset:
777 eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
778 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
779 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
780 if (!(pe->type & EEH_PE_VF))
781 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
782 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
783 break;
784 default:
785 eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED);
786 return -EINVAL;
787 };
788
789 return 0;
790}
791
792
793
794
795
796
797
798
799
800
801
802static void *eeh_set_dev_freset(void *data, void *flag)
803{
804 struct pci_dev *dev;
805 unsigned int *freset = (unsigned int *)flag;
806 struct eeh_dev *edev = (struct eeh_dev *)data;
807
808 dev = eeh_dev_to_pci_dev(edev);
809 if (dev)
810 *freset |= dev->needs_freset;
811
812 return NULL;
813}
814
815
816
817
818
819
820
821
822
823
824
825
826
827int eeh_pe_reset_full(struct eeh_pe *pe)
828{
829 int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
830 int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED);
831 int type = EEH_RESET_HOT;
832 unsigned int freset = 0;
833 int i, state, ret;
834
835
836
837
838
839
840 eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset);
841
842 if (freset)
843 type = EEH_RESET_FUNDAMENTAL;
844
845
846 eeh_pe_state_mark(pe, reset_state);
847
848
849 for (i = 0; i < 3; i++) {
850 ret = eeh_pe_reset(pe, type);
851 if (ret)
852 break;
853
854 ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE);
855 if (ret)
856 break;
857
858
859 state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
860 if ((state & active_flags) == active_flags)
861 break;
862
863 if (state < 0) {
864 pr_warn("%s: Unrecoverable slot failure on PHB#%d-PE#%x",
865 __func__, pe->phb->global_number, pe->addr);
866 ret = -ENOTRECOVERABLE;
867 break;
868 }
869
870
871 ret = -EIO;
872 pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n",
873 __func__, state, pe->phb->global_number, pe->addr, (i + 1));
874 }
875
876 eeh_pe_state_clear(pe, reset_state);
877 return ret;
878}
879
880
881
882
883
884
885
886
887
888
889void eeh_save_bars(struct eeh_dev *edev)
890{
891 struct pci_dn *pdn;
892 int i;
893
894 pdn = eeh_dev_to_pdn(edev);
895 if (!pdn)
896 return;
897
898 for (i = 0; i < 16; i++)
899 eeh_ops->read_config(pdn, i * 4, 4, &edev->config_space[i]);
900
901
902
903
904
905
906
907 if (edev->mode & EEH_DEV_BRIDGE)
908 edev->config_space[1] |= PCI_COMMAND_MASTER;
909}
910
911
912
913
914
915
916
917
918
919int __init eeh_ops_register(struct eeh_ops *ops)
920{
921 if (!ops->name) {
922 pr_warn("%s: Invalid EEH ops name for %p\n",
923 __func__, ops);
924 return -EINVAL;
925 }
926
927 if (eeh_ops && eeh_ops != ops) {
928 pr_warn("%s: EEH ops of platform %s already existing (%s)\n",
929 __func__, eeh_ops->name, ops->name);
930 return -EEXIST;
931 }
932
933 eeh_ops = ops;
934
935 return 0;
936}
937
938
939
940
941
942
943
944
945int __exit eeh_ops_unregister(const char *name)
946{
947 if (!name || !strlen(name)) {
948 pr_warn("%s: Invalid EEH ops name\n",
949 __func__);
950 return -EINVAL;
951 }
952
953 if (eeh_ops && !strcmp(eeh_ops->name, name)) {
954 eeh_ops = NULL;
955 return 0;
956 }
957
958 return -EEXIST;
959}
960
961static int eeh_reboot_notifier(struct notifier_block *nb,
962 unsigned long action, void *unused)
963{
964 eeh_clear_flag(EEH_ENABLED);
965 return NOTIFY_DONE;
966}
967
968static struct notifier_block eeh_reboot_nb = {
969 .notifier_call = eeh_reboot_notifier,
970};
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987int eeh_init(void)
988{
989 struct pci_controller *hose, *tmp;
990 struct pci_dn *pdn;
991 static int cnt = 0;
992 int ret = 0;
993
994
995
996
997
998
999
1000 if (machine_is(powernv) && cnt++ <= 0)
1001 return ret;
1002
1003
1004 ret = register_reboot_notifier(&eeh_reboot_nb);
1005 if (ret) {
1006 pr_warn("%s: Failed to register notifier (%d)\n",
1007 __func__, ret);
1008 return ret;
1009 }
1010
1011
1012 if (!eeh_ops) {
1013 pr_warn("%s: Platform EEH operation not found\n",
1014 __func__);
1015 return -EEXIST;
1016 } else if ((ret = eeh_ops->init()))
1017 return ret;
1018
1019
1020 ret = eeh_event_init();
1021 if (ret)
1022 return ret;
1023
1024
1025 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
1026 pdn = hose->pci_data;
1027 traverse_pci_dn(pdn, eeh_ops->probe, NULL);
1028 }
1029
1030
1031
1032
1033
1034
1035 if (eeh_ops->post_init) {
1036 ret = eeh_ops->post_init();
1037 if (ret)
1038 return ret;
1039 }
1040
1041 if (eeh_enabled())
1042 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
1043 else
1044 pr_warn("EEH: No capable adapters found\n");
1045
1046 return ret;
1047}
1048
1049core_initcall_sync(eeh_init);
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063void eeh_add_device_early(struct pci_dn *pdn)
1064{
1065 struct pci_controller *phb;
1066 struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
1067
1068 if (!edev)
1069 return;
1070
1071 if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE))
1072 return;
1073
1074
1075 phb = edev->phb;
1076 if (NULL == phb ||
1077 (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid))
1078 return;
1079
1080 eeh_ops->probe(pdn, NULL);
1081}
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091void eeh_add_device_tree_early(struct pci_dn *pdn)
1092{
1093 struct pci_dn *n;
1094
1095 if (!pdn)
1096 return;
1097
1098 list_for_each_entry(n, &pdn->child_list, list)
1099 eeh_add_device_tree_early(n);
1100 eeh_add_device_early(pdn);
1101}
1102EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
1103
1104
1105
1106
1107
1108
1109
1110
1111void eeh_add_device_late(struct pci_dev *dev)
1112{
1113 struct pci_dn *pdn;
1114 struct eeh_dev *edev;
1115
1116 if (!dev || !eeh_enabled())
1117 return;
1118
1119 pr_debug("EEH: Adding device %s\n", pci_name(dev));
1120
1121 pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
1122 edev = pdn_to_eeh_dev(pdn);
1123 if (edev->pdev == dev) {
1124 pr_debug("EEH: Already referenced !\n");
1125 return;
1126 }
1127
1128
1129
1130
1131
1132
1133
1134 if (edev->pdev) {
1135 eeh_rmv_from_parent_pe(edev);
1136 eeh_addr_cache_rmv_dev(edev->pdev);
1137 eeh_sysfs_remove_device(edev->pdev);
1138 edev->mode &= ~EEH_DEV_SYSFS;
1139
1140
1141
1142
1143
1144
1145 edev->mode |= EEH_DEV_NO_HANDLER;
1146
1147 edev->pdev = NULL;
1148 dev->dev.archdata.edev = NULL;
1149 }
1150
1151 if (eeh_has_flag(EEH_PROBE_MODE_DEV))
1152 eeh_ops->probe(pdn, NULL);
1153
1154 edev->pdev = dev;
1155 dev->dev.archdata.edev = edev;
1156
1157 eeh_addr_cache_insert_dev(dev);
1158}
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168void eeh_add_device_tree_late(struct pci_bus *bus)
1169{
1170 struct pci_dev *dev;
1171
1172 list_for_each_entry(dev, &bus->devices, bus_list) {
1173 eeh_add_device_late(dev);
1174 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
1175 struct pci_bus *subbus = dev->subordinate;
1176 if (subbus)
1177 eeh_add_device_tree_late(subbus);
1178 }
1179 }
1180}
1181EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191void eeh_add_sysfs_files(struct pci_bus *bus)
1192{
1193 struct pci_dev *dev;
1194
1195 list_for_each_entry(dev, &bus->devices, bus_list) {
1196 eeh_sysfs_add_device(dev);
1197 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
1198 struct pci_bus *subbus = dev->subordinate;
1199 if (subbus)
1200 eeh_add_sysfs_files(subbus);
1201 }
1202 }
1203}
1204EXPORT_SYMBOL_GPL(eeh_add_sysfs_files);
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216void eeh_remove_device(struct pci_dev *dev)
1217{
1218 struct eeh_dev *edev;
1219
1220 if (!dev || !eeh_enabled())
1221 return;
1222 edev = pci_dev_to_eeh_dev(dev);
1223
1224
1225 pr_debug("EEH: Removing device %s\n", pci_name(dev));
1226
1227 if (!edev || !edev->pdev || !edev->pe) {
1228 pr_debug("EEH: Not referenced !\n");
1229 return;
1230 }
1231
1232
1233
1234
1235
1236
1237
1238 edev->pdev = NULL;
1239
1240
1241
1242
1243
1244
1245
1246 edev->in_error = false;
1247 dev->dev.archdata.edev = NULL;
1248 if (!(edev->pe->state & EEH_PE_KEEP))
1249 eeh_rmv_from_parent_pe(edev);
1250 else
1251 edev->mode |= EEH_DEV_DISCONNECTED;
1252
1253
1254
1255
1256
1257
1258
1259 edev->mode |= EEH_DEV_NO_HANDLER;
1260
1261 eeh_addr_cache_rmv_dev(dev);
1262 eeh_sysfs_remove_device(dev);
1263 edev->mode &= ~EEH_DEV_SYSFS;
1264}
1265
1266int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state)
1267{
1268 int ret;
1269
1270 ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
1271 if (ret) {
1272 pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n",
1273 __func__, ret, pe->phb->global_number, pe->addr);
1274 return ret;
1275 }
1276
1277 ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
1278 if (ret) {
1279 pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n",
1280 __func__, ret, pe->phb->global_number, pe->addr);
1281 return ret;
1282 }
1283
1284
1285 if (sw_state && (pe->state & EEH_PE_ISOLATED))
1286 eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
1287
1288 return ret;
1289}
1290
1291
1292static struct pci_device_id eeh_reset_ids[] = {
1293 { PCI_DEVICE(0x19a2, 0x0710) },
1294 { PCI_DEVICE(0x10df, 0xe220) },
1295 { PCI_DEVICE(0x14e4, 0x1657) },
1296 { 0 }
1297};
1298
1299static int eeh_pe_change_owner(struct eeh_pe *pe)
1300{
1301 struct eeh_dev *edev, *tmp;
1302 struct pci_dev *pdev;
1303 struct pci_device_id *id;
1304 int flags, ret;
1305
1306
1307 flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
1308 ret = eeh_ops->get_state(pe, NULL);
1309 if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT)
1310 return 0;
1311
1312
1313 if ((ret & flags) == flags)
1314 return 0;
1315
1316
1317 eeh_pe_for_each_dev(pe, edev, tmp) {
1318 pdev = eeh_dev_to_pci_dev(edev);
1319 if (!pdev)
1320 continue;
1321
1322 for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) {
1323 if (id->vendor != PCI_ANY_ID &&
1324 id->vendor != pdev->vendor)
1325 continue;
1326 if (id->device != PCI_ANY_ID &&
1327 id->device != pdev->device)
1328 continue;
1329 if (id->subvendor != PCI_ANY_ID &&
1330 id->subvendor != pdev->subsystem_vendor)
1331 continue;
1332 if (id->subdevice != PCI_ANY_ID &&
1333 id->subdevice != pdev->subsystem_device)
1334 continue;
1335
1336 goto reset;
1337 }
1338 }
1339
1340 return eeh_unfreeze_pe(pe, true);
1341
1342reset:
1343 return eeh_pe_reset_and_recover(pe);
1344}
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355int eeh_dev_open(struct pci_dev *pdev)
1356{
1357 struct eeh_dev *edev;
1358 int ret = -ENODEV;
1359
1360 mutex_lock(&eeh_dev_mutex);
1361
1362
1363 if (!pdev)
1364 goto out;
1365
1366
1367 edev = pci_dev_to_eeh_dev(pdev);
1368 if (!edev || !edev->pe)
1369 goto out;
1370
1371
1372
1373
1374
1375
1376
1377 ret = eeh_pe_change_owner(edev->pe);
1378 if (ret)
1379 goto out;
1380
1381
1382 atomic_inc(&edev->pe->pass_dev_cnt);
1383 mutex_unlock(&eeh_dev_mutex);
1384
1385 return 0;
1386out:
1387 mutex_unlock(&eeh_dev_mutex);
1388 return ret;
1389}
1390EXPORT_SYMBOL_GPL(eeh_dev_open);
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400void eeh_dev_release(struct pci_dev *pdev)
1401{
1402 struct eeh_dev *edev;
1403
1404 mutex_lock(&eeh_dev_mutex);
1405
1406
1407 if (!pdev)
1408 goto out;
1409
1410
1411 edev = pci_dev_to_eeh_dev(pdev);
1412 if (!edev || !edev->pe || !eeh_pe_passed(edev->pe))
1413 goto out;
1414
1415
1416 atomic_dec(&edev->pe->pass_dev_cnt);
1417 WARN_ON(atomic_read(&edev->pe->pass_dev_cnt) < 0);
1418 eeh_pe_change_owner(edev->pe);
1419out:
1420 mutex_unlock(&eeh_dev_mutex);
1421}
1422EXPORT_SYMBOL(eeh_dev_release);
1423
1424#ifdef CONFIG_IOMMU_API
1425
1426static int dev_has_iommu_table(struct device *dev, void *data)
1427{
1428 struct pci_dev *pdev = to_pci_dev(dev);
1429 struct pci_dev **ppdev = data;
1430
1431 if (!dev)
1432 return 0;
1433
1434 if (dev->iommu_group) {
1435 *ppdev = pdev;
1436 return 1;
1437 }
1438
1439 return 0;
1440}
1441
1442
1443
1444
1445
1446
1447
1448struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group)
1449{
1450 struct pci_dev *pdev = NULL;
1451 struct eeh_dev *edev;
1452 int ret;
1453
1454
1455 if (!group)
1456 return NULL;
1457
1458 ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table);
1459 if (!ret || !pdev)
1460 return NULL;
1461
1462
1463 edev = pci_dev_to_eeh_dev(pdev);
1464 if (!edev || !edev->pe)
1465 return NULL;
1466
1467 return edev->pe;
1468}
1469EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe);
1470
1471#endif
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481int eeh_pe_set_option(struct eeh_pe *pe, int option)
1482{
1483 int ret = 0;
1484
1485
1486 if (!pe)
1487 return -ENODEV;
1488
1489
1490
1491
1492
1493
1494 switch (option) {
1495 case EEH_OPT_ENABLE:
1496 if (eeh_enabled()) {
1497 ret = eeh_pe_change_owner(pe);
1498 break;
1499 }
1500 ret = -EIO;
1501 break;
1502 case EEH_OPT_DISABLE:
1503 break;
1504 case EEH_OPT_THAW_MMIO:
1505 case EEH_OPT_THAW_DMA:
1506 if (!eeh_ops || !eeh_ops->set_option) {
1507 ret = -ENOENT;
1508 break;
1509 }
1510
1511 ret = eeh_pci_enable(pe, option);
1512 break;
1513 default:
1514 pr_debug("%s: Option %d out of range (%d, %d)\n",
1515 __func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA);
1516 ret = -EINVAL;
1517 }
1518
1519 return ret;
1520}
1521EXPORT_SYMBOL_GPL(eeh_pe_set_option);
1522
1523
1524
1525
1526
1527
1528
1529
1530int eeh_pe_get_state(struct eeh_pe *pe)
1531{
1532 int result, ret = 0;
1533 bool rst_active, dma_en, mmio_en;
1534
1535
1536 if (!pe)
1537 return -ENODEV;
1538
1539 if (!eeh_ops || !eeh_ops->get_state)
1540 return -ENOENT;
1541
1542
1543
1544
1545
1546
1547
1548 if (pe->parent &&
1549 !(pe->state & EEH_PE_REMOVED) &&
1550 (pe->parent->state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING)))
1551 return EEH_PE_STATE_UNAVAIL;
1552
1553 result = eeh_ops->get_state(pe, NULL);
1554 rst_active = !!(result & EEH_STATE_RESET_ACTIVE);
1555 dma_en = !!(result & EEH_STATE_DMA_ENABLED);
1556 mmio_en = !!(result & EEH_STATE_MMIO_ENABLED);
1557
1558 if (rst_active)
1559 ret = EEH_PE_STATE_RESET;
1560 else if (dma_en && mmio_en)
1561 ret = EEH_PE_STATE_NORMAL;
1562 else if (!dma_en && !mmio_en)
1563 ret = EEH_PE_STATE_STOPPED_IO_DMA;
1564 else if (!dma_en && mmio_en)
1565 ret = EEH_PE_STATE_STOPPED_DMA;
1566 else
1567 ret = EEH_PE_STATE_UNAVAIL;
1568
1569 return ret;
1570}
1571EXPORT_SYMBOL_GPL(eeh_pe_get_state);
1572
1573static int eeh_pe_reenable_devices(struct eeh_pe *pe)
1574{
1575 struct eeh_dev *edev, *tmp;
1576 struct pci_dev *pdev;
1577 int ret = 0;
1578
1579
1580 eeh_pe_restore_bars(pe);
1581
1582
1583
1584
1585
1586 eeh_pe_for_each_dev(pe, edev, tmp) {
1587 pdev = eeh_dev_to_pci_dev(edev);
1588 if (!pdev)
1589 continue;
1590
1591 ret = pci_reenable_device(pdev);
1592 if (ret) {
1593 pr_warn("%s: Failure %d reenabling %s\n",
1594 __func__, ret, pci_name(pdev));
1595 return ret;
1596 }
1597 }
1598
1599
1600 return eeh_unfreeze_pe(pe, true);
1601}
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613int eeh_pe_reset(struct eeh_pe *pe, int option)
1614{
1615 int ret = 0;
1616
1617
1618 if (!pe)
1619 return -ENODEV;
1620
1621 if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset)
1622 return -ENOENT;
1623
1624 switch (option) {
1625 case EEH_RESET_DEACTIVATE:
1626 ret = eeh_ops->reset(pe, option);
1627 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
1628 if (ret)
1629 break;
1630
1631 ret = eeh_pe_reenable_devices(pe);
1632 break;
1633 case EEH_RESET_HOT:
1634 case EEH_RESET_FUNDAMENTAL:
1635
1636
1637
1638
1639
1640 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
1641
1642 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
1643 ret = eeh_ops->reset(pe, option);
1644 break;
1645 default:
1646 pr_debug("%s: Unsupported option %d\n",
1647 __func__, option);
1648 ret = -EINVAL;
1649 }
1650
1651 return ret;
1652}
1653EXPORT_SYMBOL_GPL(eeh_pe_reset);
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663int eeh_pe_configure(struct eeh_pe *pe)
1664{
1665 int ret = 0;
1666
1667
1668 if (!pe)
1669 return -ENODEV;
1670
1671 return ret;
1672}
1673EXPORT_SYMBOL_GPL(eeh_pe_configure);
1674
1675static int proc_eeh_show(struct seq_file *m, void *v)
1676{
1677 if (!eeh_enabled()) {
1678 seq_printf(m, "EEH Subsystem is globally disabled\n");
1679 seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs);
1680 } else {
1681 seq_printf(m, "EEH Subsystem is enabled\n");
1682 seq_printf(m,
1683 "no device=%llu\n"
1684 "no device node=%llu\n"
1685 "no config address=%llu\n"
1686 "check not wanted=%llu\n"
1687 "eeh_total_mmio_ffs=%llu\n"
1688 "eeh_false_positives=%llu\n"
1689 "eeh_slot_resets=%llu\n",
1690 eeh_stats.no_device,
1691 eeh_stats.no_dn,
1692 eeh_stats.no_cfg_addr,
1693 eeh_stats.ignored_check,
1694 eeh_stats.total_mmio_ffs,
1695 eeh_stats.false_positives,
1696 eeh_stats.slot_resets);
1697 }
1698
1699 return 0;
1700}
1701
1702static int proc_eeh_open(struct inode *inode, struct file *file)
1703{
1704 return single_open(file, proc_eeh_show, NULL);
1705}
1706
1707static const struct file_operations proc_eeh_operations = {
1708 .open = proc_eeh_open,
1709 .read = seq_read,
1710 .llseek = seq_lseek,
1711 .release = single_release,
1712};
1713
1714#ifdef CONFIG_DEBUG_FS
1715static int eeh_enable_dbgfs_set(void *data, u64 val)
1716{
1717 if (val)
1718 eeh_clear_flag(EEH_FORCE_DISABLED);
1719 else
1720 eeh_add_flag(EEH_FORCE_DISABLED);
1721
1722
1723 if (eeh_ops->post_init)
1724 eeh_ops->post_init();
1725
1726 return 0;
1727}
1728
1729static int eeh_enable_dbgfs_get(void *data, u64 *val)
1730{
1731 if (eeh_enabled())
1732 *val = 0x1ul;
1733 else
1734 *val = 0x0ul;
1735 return 0;
1736}
1737
1738static int eeh_freeze_dbgfs_set(void *data, u64 val)
1739{
1740 eeh_max_freezes = val;
1741 return 0;
1742}
1743
1744static int eeh_freeze_dbgfs_get(void *data, u64 *val)
1745{
1746 *val = eeh_max_freezes;
1747 return 0;
1748}
1749
1750DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get,
1751 eeh_enable_dbgfs_set, "0x%llx\n");
1752DEFINE_SIMPLE_ATTRIBUTE(eeh_freeze_dbgfs_ops, eeh_freeze_dbgfs_get,
1753 eeh_freeze_dbgfs_set, "0x%llx\n");
1754#endif
1755
1756static int __init eeh_init_proc(void)
1757{
1758 if (machine_is(pseries) || machine_is(powernv)) {
1759 proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations);
1760#ifdef CONFIG_DEBUG_FS
1761 debugfs_create_file("eeh_enable", 0600,
1762 powerpc_debugfs_root, NULL,
1763 &eeh_enable_dbgfs_ops);
1764 debugfs_create_file("eeh_max_freezes", 0600,
1765 powerpc_debugfs_root, NULL,
1766 &eeh_freeze_dbgfs_ops);
1767#endif
1768 }
1769
1770 return 0;
1771}
1772__initcall(eeh_init_proc);
1773