1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include <linux/delay.h>
26#include <linux/interrupt.h>
27#include <linux/irq.h>
28#include <linux/pci.h>
29#include <asm/eeh.h>
30#include <asm/eeh_event.h>
31#include <asm/ppc-pci.h>
32#include <asm/pci-bridge.h>
33#include <asm/prom.h>
34#include <asm/rtas.h>
35
36
37static inline const char * pcid_name (struct pci_dev *pdev)
38{
39 if (pdev && pdev->dev.driver)
40 return pdev->dev.driver->name;
41 return "";
42}
43
44#if 0
45static void print_device_node_tree(struct pci_dn *pdn, int dent)
46{
47 int i;
48 struct device_node *pc;
49
50 if (!pdn)
51 return;
52 for (i = 0; i < dent; i++)
53 printk(" ");
54 printk("dn=%s mode=%x \tcfg_addr=%x pe_addr=%x \tfull=%s\n",
55 pdn->node->name, pdn->eeh_mode, pdn->eeh_config_addr,
56 pdn->eeh_pe_config_addr, pdn->node->full_name);
57 dent += 3;
58 pc = pdn->node->child;
59 while (pc) {
60 print_device_node_tree(PCI_DN(pc), dent);
61 pc = pc->sibling;
62 }
63}
64#endif
65
66
67
68
69static int irq_in_use(unsigned int irq)
70{
71 int rc = 0;
72 unsigned long flags;
73 struct irq_desc *desc = irq_desc + irq;
74
75 spin_lock_irqsave(&desc->lock, flags);
76 if (desc->action)
77 rc = 1;
78 spin_unlock_irqrestore(&desc->lock, flags);
79 return rc;
80}
81
82
83
84
85static void eeh_disable_irq(struct pci_dev *dev)
86{
87 struct device_node *dn = pci_device_to_OF_node(dev);
88
89
90
91
92
93 if (dev->msi_enabled || dev->msix_enabled)
94 return;
95
96 if (!irq_in_use(dev->irq))
97 return;
98
99 PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED;
100 disable_irq_nosync(dev->irq);
101}
102
103
104
105
106static void eeh_enable_irq(struct pci_dev *dev)
107{
108 struct device_node *dn = pci_device_to_OF_node(dev);
109
110 if ((PCI_DN(dn)->eeh_mode) & EEH_MODE_IRQ_DISABLED) {
111 PCI_DN(dn)->eeh_mode &= ~EEH_MODE_IRQ_DISABLED;
112 enable_irq(dev->irq);
113 }
114}
115
116
117
118
119
120
121
122
123
124
125static int eeh_report_error(struct pci_dev *dev, void *userdata)
126{
127 enum pci_ers_result rc, *res = userdata;
128 struct pci_driver *driver = dev->driver;
129
130 dev->error_state = pci_channel_io_frozen;
131
132 if (!driver)
133 return 0;
134
135 eeh_disable_irq(dev);
136
137 if (!driver->err_handler ||
138 !driver->err_handler->error_detected)
139 return 0;
140
141 rc = driver->err_handler->error_detected (dev, pci_channel_io_frozen);
142
143
144 if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
145 if (*res == PCI_ERS_RESULT_NONE) *res = rc;
146
147 return 0;
148}
149
150
151
152
153
154
155
156
157
158static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
159{
160 enum pci_ers_result rc, *res = userdata;
161 struct pci_driver *driver = dev->driver;
162
163 if (!driver ||
164 !driver->err_handler ||
165 !driver->err_handler->mmio_enabled)
166 return 0;
167
168 rc = driver->err_handler->mmio_enabled (dev);
169
170
171 if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
172 if (*res == PCI_ERS_RESULT_NONE) *res = rc;
173
174 return 0;
175}
176
177
178
179
180
181static int eeh_report_reset(struct pci_dev *dev, void *userdata)
182{
183 enum pci_ers_result rc, *res = userdata;
184 struct pci_driver *driver = dev->driver;
185
186 if (!driver)
187 return 0;
188
189 dev->error_state = pci_channel_io_normal;
190
191 eeh_enable_irq(dev);
192
193 if (!driver->err_handler ||
194 !driver->err_handler->slot_reset)
195 return 0;
196
197 rc = driver->err_handler->slot_reset(dev);
198 if ((*res == PCI_ERS_RESULT_NONE) ||
199 (*res == PCI_ERS_RESULT_RECOVERED)) *res = rc;
200 if (*res == PCI_ERS_RESULT_DISCONNECT &&
201 rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
202
203 return 0;
204}
205
206
207
208
209
210static int eeh_report_resume(struct pci_dev *dev, void *userdata)
211{
212 struct pci_driver *driver = dev->driver;
213
214 dev->error_state = pci_channel_io_normal;
215
216 if (!driver)
217 return 0;
218
219 eeh_enable_irq(dev);
220
221 if (!driver->err_handler ||
222 !driver->err_handler->resume)
223 return 0;
224
225 driver->err_handler->resume(dev);
226
227 return 0;
228}
229
230
231
232
233
234
235
236
237static int eeh_report_failure(struct pci_dev *dev, void *userdata)
238{
239 struct pci_driver *driver = dev->driver;
240
241 dev->error_state = pci_channel_io_perm_failure;
242
243 if (!driver)
244 return 0;
245
246 eeh_disable_irq(dev);
247
248 if (!driver->err_handler ||
249 !driver->err_handler->error_detected)
250 return 0;
251
252 driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
253
254 return 0;
255}
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus)
288{
289 struct device_node *dn;
290 int cnt, rc;
291
292
293 cnt = pe_dn->eeh_freeze_count;
294
295 if (bus)
296 pcibios_remove_pci_devices(bus);
297
298
299
300
301 rc = rtas_set_slot_reset(pe_dn);
302 if (rc)
303 return rc;
304
305
306 dn = pe_dn->node;
307 if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
308 dn = dn->parent->child;
309
310 while (dn) {
311 struct pci_dn *ppe = PCI_DN(dn);
312
313 if (pe_dn->eeh_pe_config_addr == ppe->eeh_pe_config_addr) {
314 rtas_configure_bridge(ppe);
315 eeh_restore_bars(ppe);
316 }
317 dn = dn->sibling;
318 }
319
320
321
322
323
324
325
326 if (bus) {
327 ssleep (5);
328 pcibios_add_pci_devices(bus);
329 }
330 pe_dn->eeh_freeze_count = cnt;
331
332 return 0;
333}
334
335
336
337
338#define MAX_WAIT_FOR_RECOVERY 150
339
340struct pci_dn * handle_eeh_events (struct eeh_event *event)
341{
342 struct device_node *frozen_dn;
343 struct pci_dn *frozen_pdn;
344 struct pci_bus *frozen_bus;
345 int rc = 0;
346 enum pci_ers_result result = PCI_ERS_RESULT_NONE;
347 const char *location, *pci_str, *drv_str;
348
349 frozen_dn = find_device_pe(event->dn);
350 if (!frozen_dn) {
351
352 location = of_get_property(event->dn, "ibm,loc-code", NULL);
353 location = location ? location : "unknown";
354 printk(KERN_ERR "EEH: Error: Cannot find partition endpoint "
355 "for location=%s pci addr=%s\n",
356 location, pci_name(event->dev));
357 return NULL;
358 }
359
360 frozen_bus = pcibios_find_pci_bus(frozen_dn);
361 location = of_get_property(frozen_dn, "ibm,loc-code", NULL);
362 location = location ? location : "unknown";
363
364
365
366
367
368
369
370 if (!frozen_bus)
371 frozen_bus = pcibios_find_pci_bus (frozen_dn->parent);
372
373 if (!frozen_bus) {
374 printk(KERN_ERR "EEH: Cannot find PCI bus "
375 "for location=%s dn=%s\n",
376 location, frozen_dn->full_name);
377 return NULL;
378 }
379
380 frozen_pdn = PCI_DN(frozen_dn);
381 frozen_pdn->eeh_freeze_count++;
382
383 if (frozen_pdn->pcidev) {
384 pci_str = pci_name (frozen_pdn->pcidev);
385 drv_str = pcid_name (frozen_pdn->pcidev);
386 } else {
387 pci_str = pci_name (event->dev);
388 drv_str = pcid_name (event->dev);
389 }
390
391 if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES)
392 goto excess_failures;
393
394 printk(KERN_WARNING
395 "EEH: This PCI device has failed %d times in the last hour:\n",
396 frozen_pdn->eeh_freeze_count);
397 printk(KERN_WARNING
398 "EEH: location=%s driver=%s pci addr=%s\n",
399 location, drv_str, pci_str);
400
401
402
403
404
405
406
407 pci_walk_bus(frozen_bus, eeh_report_error, &result);
408
409
410
411 rc = eeh_wait_for_slot_status (frozen_pdn, MAX_WAIT_FOR_RECOVERY*1000);
412 if (rc < 0) {
413 printk(KERN_WARNING "EEH: Permanent failure\n");
414 goto hard_fail;
415 }
416
417
418
419
420
421 eeh_slot_error_detail(frozen_pdn, EEH_LOG_TEMP_FAILURE);
422
423
424
425
426
427 if (result == PCI_ERS_RESULT_NONE) {
428 rc = eeh_reset_device(frozen_pdn, frozen_bus);
429 if (rc) {
430 printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc);
431 goto hard_fail;
432 }
433 }
434
435
436 if (result == PCI_ERS_RESULT_CAN_RECOVER) {
437 rc = rtas_pci_enable(frozen_pdn, EEH_THAW_MMIO);
438
439 if (rc < 0)
440 goto hard_fail;
441 if (rc) {
442 result = PCI_ERS_RESULT_NEED_RESET;
443 } else {
444 result = PCI_ERS_RESULT_NONE;
445 pci_walk_bus(frozen_bus, eeh_report_mmio_enabled, &result);
446 }
447 }
448
449
450 if (result == PCI_ERS_RESULT_CAN_RECOVER) {
451 rc = rtas_pci_enable(frozen_pdn, EEH_THAW_DMA);
452
453 if (rc < 0)
454 goto hard_fail;
455 if (rc)
456 result = PCI_ERS_RESULT_NEED_RESET;
457 else
458 result = PCI_ERS_RESULT_RECOVERED;
459 }
460
461
462 if (result == PCI_ERS_RESULT_DISCONNECT) {
463 printk(KERN_WARNING "EEH: Device driver gave up\n");
464 goto hard_fail;
465 }
466
467
468 if (result == PCI_ERS_RESULT_NEED_RESET) {
469 rc = eeh_reset_device(frozen_pdn, NULL);
470 if (rc) {
471 printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc);
472 goto hard_fail;
473 }
474 result = PCI_ERS_RESULT_NONE;
475 pci_walk_bus(frozen_bus, eeh_report_reset, &result);
476 }
477
478
479 if ((result != PCI_ERS_RESULT_RECOVERED) &&
480 (result != PCI_ERS_RESULT_NONE)) {
481 printk(KERN_WARNING "EEH: Not recovered\n");
482 goto hard_fail;
483 }
484
485
486 pci_walk_bus(frozen_bus, eeh_report_resume, NULL);
487
488 return frozen_pdn;
489
490excess_failures:
491
492
493
494
495
496 printk(KERN_ERR
497 "EEH: PCI device at location=%s driver=%s pci addr=%s \n"
498 "has failed %d times in the last hour "
499 "and has been permanently disabled. \n"
500 "Please try reseating this device or replacing it.\n",
501 location, drv_str, pci_str, frozen_pdn->eeh_freeze_count);
502 goto perm_error;
503
504hard_fail:
505 printk(KERN_ERR
506 "EEH: Unable to recover from failure of PCI device "
507 "at location=%s driver=%s pci addr=%s \n"
508 "Please try reseating this device or replacing it.\n",
509 location, drv_str, pci_str);
510
511perm_error:
512 eeh_slot_error_detail(frozen_pdn, EEH_LOG_PERM_FAILURE);
513
514
515 pci_walk_bus(frozen_bus, eeh_report_failure, NULL);
516
517
518 pcibios_remove_pci_devices(frozen_bus);
519
520 return NULL;
521}
522
523
524