[PATCH] ppc64: wait for pci error state to settle down
Linas Vepstas
linas at austin.ibm.com
Sat Feb 17 11:51:17 EST 2007
Paul,
Please apply to your ppc64 tree.
--linas
PCI devices may be attached via a "far away" pci bus which
might be in the process of being reset. Wait for the
pci bus to come back online before trying to reset the
pci device.
Signed-off-by: Linas Vepstas <linas at austin.ibm.com>
----
arch/powerpc/platforms/pseries/eeh.c | 5 ++---
arch/powerpc/platforms/pseries/eeh_driver.c | 25 ++++++++++++++++++++++---
include/asm-powerpc/eeh.h | 2 ++
3 files changed, 26 insertions(+), 6 deletions(-)
Index: linux-2.6.20-git4/arch/powerpc/platforms/pseries/eeh.c
===================================================================
--- linux-2.6.20-git4.orig/arch/powerpc/platforms/pseries/eeh.c 2007-02-16 17:35:01.000000000 -0600
+++ linux-2.6.20-git4/arch/powerpc/platforms/pseries/eeh.c 2007-02-16 18:26:27.000000000 -0600
@@ -409,7 +409,7 @@ int eeh_dn_check_failure(struct device_n
/* Most EEH events are due to device driver bugs. Having
* a stack trace will help the device-driver authors figure
* out what happened. So print that out. */
- if (rets[0] != 5) dump_stack();
+ dump_stack();
return 1;
dn_unlock:
@@ -465,8 +465,7 @@ EXPORT_SYMBOL(eeh_check_failure);
* a number of milliseconds to wait until the PCI slot is
* ready to be used.
*/
-static int
-eeh_slot_availability(struct pci_dn *pdn)
+int eeh_slot_availability(struct pci_dn *pdn)
{
int rc;
int rets[3];
Index: linux-2.6.20-git4/arch/powerpc/platforms/pseries/eeh_driver.c
===================================================================
--- linux-2.6.20-git4.orig/arch/powerpc/platforms/pseries/eeh_driver.c 2007-02-09 11:41:09.000000000 -0600
+++ linux-2.6.20-git4/arch/powerpc/platforms/pseries/eeh_driver.c 2007-02-16 18:05:22.000000000 -0600
@@ -299,7 +299,7 @@ static int eeh_reset_device (struct pci_
/* The longest amount of time to wait for a pci device
* to come back on line, in seconds.
*/
-#define MAX_WAIT_FOR_RECOVERY 15
+#define MAX_WAIT_FOR_RECOVERY 150
struct pci_dn * handle_eeh_events (struct eeh_event *event)
{
@@ -362,8 +362,8 @@ struct pci_dn * handle_eeh_events (struc
if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES)
goto excess_failures;
- /* If the reset state is a '5' and the time to reset is 0 (infinity)
- * or is more then 15 seconds, then mark this as a permanent failure.
+ /* If the reset state is a '5' and the recovery time is 0 (infinity),
+ * or is more then 2.5 minutes, then mark this as a permanent failure.
*/
if ((event->state == pci_channel_io_perm_failure) &&
((event->time_unavail <= 0) ||
@@ -384,6 +384,25 @@ struct pci_dn * handle_eeh_events (struc
*/
pci_walk_bus(frozen_bus, eeh_report_error, &result);
+ /* If the reset state is a '5' and the recovery time is
+ * finite, then wait until the bus is in a recovered state
+ * before doing anything more.
+ */
+ if (event->state == pci_channel_io_perm_failure) {
+ int unavail_wait = 0;
+ while (unavail_wait < MAX_WAIT_FOR_RECOVERY*1000) {
+ rc = eeh_slot_availability(frozen_pdn);
+ if (rc < 0)
+ goto hard_fail;
+ if (rc == 0)
+ break;
+ unavail_wait += rc+100;
+ msleep (rc+100);
+ }
+ if (rc != 0)
+ goto hard_fail;
+ }
+
/* If all device drivers were EEH-unaware, then shut
* down all of the device drivers, and hope they
* go down willingly, without panicing the system.
Index: linux-2.6.20-git4/include/asm-powerpc/eeh.h
===================================================================
--- linux-2.6.20-git4.orig/include/asm-powerpc/eeh.h 2007-02-04 12:44:54.000000000 -0600
+++ linux-2.6.20-git4/include/asm-powerpc/eeh.h 2007-02-16 18:18:12.000000000 -0600
@@ -31,6 +31,7 @@ struct device_node;
#ifdef CONFIG_EEH
+struct pci_dn;
extern int eeh_subsystem_enabled;
/* Values for eeh_mode bits in device_node */
@@ -49,6 +50,7 @@ unsigned long eeh_check_failure(const vo
unsigned long val);
int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev);
void __init pci_addr_cache_build(void);
+int eeh_slot_availability(struct pci_dn *pdn);
/**
* eeh_add_device_early
More information about the Linuxppc-dev
mailing list