[PATCH/RFC] ppc64: EEH + SCSI recovery (IPR only)

Wed Feb 23 11:08:10 EST 2005

Hi Ben, Paul, Brian

The atteched prototype code will recover from EEH errors that
would normally take out the root filesystem SCSI volume.  The 
patch adds some simple hooks into the IPR scsi device driver to 
accomplish this.  

This code falls back to the old/original design points, the 
basic idea being:  

-- A device driver can register some callbacks to get notified
   of various points in the EEH recovery proceedure.
   See struct eeh_recovery_ops in include/asm-ppc64/eeh.h
-- A "master" recovery routine steps through the EEH recovery
   steps, notifying the device driver of the stages.  The 
   reason for a "master" routine is to handle multi-function 
   adapters (although the prototype doesn't yet handle 
   multi-function).
-- If a device driver has not registered any callbacks, then 
   the "master" routine hot-unplugs/replugs the device driver.


The code is "prototype", there are things that are broken in 
there, marked with XXX typically.

Ben, this is as close as I could get to the email you sent 
me yesterday.  My goal here was to go as generic as possible, 
so that the general shape of "struct eeh_recovery_ops" matches
what one might expect to get from a generic PCI-Express recovery 
design.

Brian, can you review the IPR portion of the patch and provide
comments or fixes?  

This applies to a circa-January BK tree. 

--linas



-------------- next part --------------
===== arch/ppc64/kernel/eeh.c 1.41 vs edited =====

--- 1.41/arch/ppc64/kernel/eeh.c	2005-01-06 13:05:42 -06:00
+++ edited/arch/ppc64/kernel/eeh.c	2005-02-22 17:27:36 -06:00
@@ -17,21 +17,19 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
-#include <linux/bootmem.h>
+#include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/list.h>
-#include <linux/mm.h>
 #include <linux/notifier.h>
 #include <linux/pci.h>
 #include <linux/proc_fs.h>
 #include <linux/rbtree.h>
 #include <linux/seq_file.h>
-#include <linux/spinlock.h>
+#include <asm/atomic.h>
 #include <asm/eeh.h>
 #include <asm/io.h>
 #include <asm/machdep.h>
 #include <asm/rtas.h>
-#include <asm/atomic.h>
 #include "pci.h"
 
 #undef DEBUG
@@ -88,8 +86,7 @@ static struct notifier_block *eeh_notifi
  * is broken and panic.  This sets the threshold for how many read
  * attempts we allow before panicking.
  */
-#define EEH_MAX_FAILS	1000
-static atomic_t eeh_fail_count;
+#define EEH_MAX_FAILS	100000
 
 /* RTAS tokens */
 static int ibm_set_eeh_option;
@@ -106,6 +103,10 @@ static spinlock_t slot_errbuf_lock = SPI
 static int eeh_error_buf_size;
 
 /* System monitoring statistics */
+static DEFINE_PER_CPU(unsigned long, no_device);
+static DEFINE_PER_CPU(unsigned long, no_dn);
+static DEFINE_PER_CPU(unsigned long, no_cfg_addr);
+static DEFINE_PER_CPU(unsigned long, ignored_check);
 static DEFINE_PER_CPU(unsigned long, total_mmio_ffs);
 static DEFINE_PER_CPU(unsigned long, false_positives);
 static DEFINE_PER_CPU(unsigned long, ignored_failures);
@@ -224,9 +225,9 @@ pci_addr_cache_insert(struct pci_dev *de
 	while (*p) {
 		parent = *p;
 		piar = rb_entry(parent, struct pci_io_addr_range, rb_node);
-		if (alo < piar->addr_lo) {
+		if (ahi < piar->addr_lo) {
 			p = &parent->rb_left;
-		} else if (ahi > piar->addr_hi) {
+		} else if (alo > piar->addr_hi) {
 			p = &parent->rb_right;
 		} else {
 			if (dev != piar->pcidev ||
@@ -244,6 +245,11 @@ pci_addr_cache_insert(struct pci_dev *de
 	piar->addr_hi = ahi;
 	piar->pcidev = dev;
 	piar->flags = flags;
+	
+#ifdef DEBUG 
+	printk (KERN_DEBUG "PIAR: insert range=[%lx:%lx] dev=%s\n", 
+	               alo, ahi, pci_name (dev));
+#endif
 
 	rb_link_node(&piar->rb_node, parent, p);
 	rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
@@ -368,6 +374,7 @@ void pci_addr_cache_remove_device(struct
  */
 void __init pci_addr_cache_build(void)
 {
+	struct device_node *dn;
 	struct pci_dev *dev = NULL;
 
 	spin_lock_init(&pci_io_addr_cache_root.piar_lock);
@@ -378,6 +385,17 @@ void __init pci_addr_cache_build(void)
 			continue;
 		}
 		pci_addr_cache_insert_device(dev);
+		
+		/* Save the BAR's; firmware doesn't restore these after EEH reset */
+		dn = pci_device_to_OF_node(dev);
+		if (dn) {
+			int i;
+			for (i = 0; i < 16; i++)
+				pci_read_config_dword(dev, i * 4, &dn->config_space[i]);
+
+			if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
+				dn->eeh_is_bridge = 1;
+		}
 	}
 
 #ifdef DEBUG
@@ -389,6 +407,32 @@ void __init pci_addr_cache_build(void)
 /* --------------------------------------------------------------- */
 /* Above lies the PCI Address Cache. Below lies the EEH event infrastructure */
 
+void eeh_slot_error_detail (struct device_node *dn, int severity)
+{
+	unsigned long flags;
+	int rc;
+
+	if (!dn) return;
+
+	/* Log the error with the rtas logger */
+	spin_lock_irqsave(&slot_errbuf_lock, flags);
+	memset(slot_errbuf, 0, eeh_error_buf_size);
+
+	rc = rtas_call(ibm_slot_error_detail,
+	               8, 1, NULL, dn->eeh_config_addr,
+	               BUID_HI(dn->phb->buid),
+	               BUID_LO(dn->phb->buid), NULL, 0,
+	               virt_to_phys(slot_errbuf),
+	               eeh_error_buf_size,
+	               severity);
+
+	if (rc == 0)
+		log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0);
+	spin_unlock_irqrestore(&slot_errbuf_lock, flags);
+}
+
+EXPORT_SYMBOL(eeh_slot_error_detail);
+
 /**
  * eeh_register_notifier - Register to find out about EEH events.
  * @nb: notifier block to callback on events
@@ -421,10 +465,11 @@ static int read_slot_reset_state(struct 
 		outputs = 4;
 	} else {
 		token = ibm_read_slot_reset_state;
+		rets[2] = 0; /* fake PE Unavailable info */
 		outputs = 3;
 	}
 	
-	return rtas_call(token, 3, outputs, rets, dn->eeh_config_addr, 
+	return rtas_call(token, 3, outputs, rets, dn->eeh_config_addr,
 			 BUID_HI(dn->phb->buid), BUID_LO(dn->phb->buid));
 }
 
@@ -480,15 +525,15 @@ static void eeh_event_handler(void *dumm
 		if (event == NULL)
 			break;
 
-		printk(KERN_INFO "EEH: MMIO failure (%d), notifiying device "
-		       "%s %s\n", event->reset_state,
-		       pci_name(event->dev), pci_pretty_name(event->dev));
-
-		atomic_set(&eeh_fail_count, 0);
-		notifier_call_chain (&eeh_notifier_chain,
-				     EEH_NOTIFY_FREEZE, event);
+		if (event->reset_state != 5) {
+			printk(KERN_INFO "EEH: MMIO failure (%d), notifiying device "
+			       "%s %s\n", event->reset_state,
+			       pci_name(event->dev), pci_pretty_name(event->dev));
+		}
 
 		__get_cpu_var(slot_resets)++;
+		notifier_call_chain (&eeh_notifier_chain,
+		           EEH_NOTIFY_FREEZE, event);
 
 		pci_dev_put(event->dev);
 		kfree(event);
@@ -496,8 +541,8 @@ static void eeh_event_handler(void *dumm
 }
 
 /**
- * eeh_token_to_phys - convert EEH address token to phys address
- * @token i/o token, should be address in the form 0xE....
+ * eeh_token_to_phys - convert I/O address to phys address
+ * @token i/o address, should be address in the form 0xA....
  */
 static inline unsigned long eeh_token_to_phys(unsigned long token)
 {
@@ -532,7 +577,6 @@ int eeh_dn_check_failure(struct device_n
 	int ret;
 	int rets[3];
 	unsigned long flags;
-	int rc, reset_state;
 	struct eeh_event  *event;
 
 	__get_cpu_var(total_mmio_ffs)++;
@@ -540,16 +584,20 @@ int eeh_dn_check_failure(struct device_n
 	if (!eeh_subsystem_enabled)
 		return 0;
 
-	if (!dn)
+	if (!dn) {
+		__get_cpu_var(no_dn)++;
 		return 0;
+	}
 
 	/* Access to IO BARs might get this far and still not want checking. */
 	if (!(dn->eeh_mode & EEH_MODE_SUPPORTED) ||
 	    dn->eeh_mode & EEH_MODE_NOCHECK) {
+		__get_cpu_var(ignored_check)++;
 		return 0;
 	}
 
 	if (!dn->eeh_config_addr) {
+		__get_cpu_var(no_cfg_addr)++;
 		return 0;
 	}
 
@@ -558,8 +606,11 @@ int eeh_dn_check_failure(struct device_n
 	 * slot, we know it's bad already, we don't need to check...
 	 */
 	if (dn->eeh_mode & EEH_MODE_ISOLATED) {
-		atomic_inc(&eeh_fail_count);
-		if (atomic_read(&eeh_fail_count) >= EEH_MAX_FAILS) {
+		dn->eeh_check_count ++;
+		if (dn->eeh_check_count >= EEH_MAX_FAILS) {
+			printk (KERN_ERR "EEH: Driver ignored %d bad reads, panicing\n",
+			        dn->eeh_check_count);
+			dump_stack();
 			/* re-read the slot reset state */
 			if (read_slot_reset_state(dn, rets) != 0)
 				rets[0] = -1;	/* reset state unknown */
@@ -576,42 +627,25 @@ int eeh_dn_check_failure(struct device_n
 	 * In any case they must share a common PHB.
 	 */
 	ret = read_slot_reset_state(dn, rets);
-	if (!(ret == 0 && rets[1] == 1 && (rets[0] == 2 || rets[0] == 4))) {
+	if (!(ret == 0 && ((rets[1] == 1 && (rets[0] == 2 || rets[0] >= 4))
+	                   || (rets[0] == 5)))) {
 		__get_cpu_var(false_positives)++;
 		return 0;
 	}
 
-	/* prevent repeated reports of this failure */
+	/* Prevent repeated reports of this failure */
 	dn->eeh_mode |= EEH_MODE_ISOLATED;
 
-	reset_state = rets[0];
-
-	spin_lock_irqsave(&slot_errbuf_lock, flags);
-	memset(slot_errbuf, 0, eeh_error_buf_size);
-
-	rc = rtas_call(ibm_slot_error_detail,
-	               8, 1, NULL, dn->eeh_config_addr,
-	               BUID_HI(dn->phb->buid),
-	               BUID_LO(dn->phb->buid), NULL, 0,
-	               virt_to_phys(slot_errbuf),
-	               eeh_error_buf_size,
-	               1 /* Temporary Error */);
-
-	if (rc == 0)
-		log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0);
-	spin_unlock_irqrestore(&slot_errbuf_lock, flags);
-
-	printk(KERN_INFO "EEH: MMIO failure (%d) on device: %s %s\n",
-	       rets[0], dn->name, dn->full_name);
 	event = kmalloc(sizeof(*event), GFP_ATOMIC);
 	if (event == NULL) {
-		eeh_panic(dev, reset_state);
+		printk (KERN_ERR "EEH: out of memory, event not handled\n");
 		return 1;
  	}
 
 	event->dev = dev;
 	event->dn = dn;
-	event->reset_state = reset_state;
+	event->reset_state = rets[0];
+	event->time_unavail = rets[2];
 
 	/* We may or may not be called in an interrupt context */
 	spin_lock_irqsave(&eeh_eventlist_lock, flags);
@@ -621,7 +655,7 @@ int eeh_dn_check_failure(struct device_n
 	/* Most EEH events are due to device driver bugs.  Having
 	 * a stack trace will help the device-driver authors figure
 	 * out what happened.  So print that out. */
-	dump_stack();
+	if (rets[0] != 5) dump_stack();
 	schedule_work(&eeh_event_wq);
 
 	return 0;
@@ -634,7 +668,6 @@ EXPORT_SYMBOL(eeh_dn_check_failure);
  * @token i/o token, should be address in the form 0xA....
  * @val value, should be all 1's (XXX why do we need this arg??)
  *
- * Check for an eeh failure at the given token address.
  * Check for an EEH failure at the given token address.  Call this
  * routine if the result of a read was all 0xff's and you want to
  * find out if this is due to an EEH slot freeze event.  This routine
@@ -642,6 +675,7 @@ EXPORT_SYMBOL(eeh_dn_check_failure);
  *
  * Note this routine is safe to call in an interrupt context.
  */
+
 unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
 {
 	unsigned long addr;
@@ -651,8 +685,10 @@ unsigned long eeh_check_failure(const vo
 	/* Finding the phys addr + pci device; this is pretty quick. */
 	addr = eeh_token_to_phys((unsigned long __force) token);
 	dev = pci_get_device_by_addr(addr);
-	if (!dev)
+	if (!dev) {
+		__get_cpu_var(no_device)++;
 		return val;
+	}
 
 	dn = pci_device_to_OF_node(dev);
 	eeh_dn_check_failure (dn, dev);
@@ -663,6 +699,218 @@ unsigned long eeh_check_failure(const vo
 
 EXPORT_SYMBOL(eeh_check_failure);
 
+/* ------------------------------------------------------------- */
+/* The code below deals with error recovery */
+
+int
+eeh_slot_is_isolated(struct pci_dev *dev)
+{ 
+	struct device_node *dn;
+	dn = pci_device_to_OF_node(dev);
+	return (dn->eeh_mode & EEH_MODE_ISOLATED);
+}
+
+/** rtas_pci_slot_reset raises/lowers the pci #RST line
+ *  state: 1/0 to raise/lower the #RST 
+ */
+void
+eeh_pci_slot_reset(struct pci_dev *dev, int state)
+{
+	struct device_node *dn = pci_device_to_OF_node(dev);
+	rtas_pci_slot_reset (dn, state);
+}
+
+/* return negative value if a permanent error, else return 
+ * a number of milliseconds to wait until the PCI slot is 
+ * ready to be used.
+ */
+static int
+eeh_slot_availability(struct device_node *dn)
+{
+	int rc;
+	int rets[3];
+
+	rc = read_slot_reset_state(dn, rets);
+	if (rc) return rc;
+
+	if (rets[1] == 0) return -1;  /* EEH is not supported */
+	if (rets[0] == 0)  return 0;  /* Oll Korrect */
+	if (rets[0] == 5) {
+		if (rets[2] == 0) return -1; /* permanently unavailable */
+		return rets[2]; /* number of millisecs to wait */
+	}
+	return -1;
+}
+
+int
+eeh_pci_slot_availability(struct pci_dev *dev)
+{
+	struct device_node *dn = pci_device_to_OF_node(dev);
+	if (!dn) return -1;
+	return eeh_slot_availability (dn);
+}
+
+void
+rtas_pci_slot_reset(struct device_node *dn, int state)
+{
+	int rc;
+
+	if (!dn)
+		return;
+
+	dn->eeh_mode |= EEH_MODE_RECOVERING;
+	rc = rtas_call(ibm_set_slot_reset,4,1, NULL,
+	               dn->eeh_config_addr,
+	               BUID_HI(dn->phb->buid),
+	               BUID_LO(dn->phb->buid),
+	               state);
+	if (rc) {
+		printk (KERN_WARNING "EEH: Unable to reset the failed slot, (%d) #RST=%d\n", rc, state);
+		return;
+	}
+
+	if (state == 0)
+		dn->eeh_mode &= ~(EEH_MODE_RECOVERING|EEH_MODE_ISOLATED);
+}
+
+/** rtas_set_slot_reset -- assert the pci #RST line for 1/4 second 
+ *  dn -- device node to be reset.
+ */
+
+void
+rtas_set_slot_reset(struct device_node *dn)
+{
+	int i, rc;
+
+	rtas_pci_slot_reset (dn, 1);
+
+	/* The PCI bus requires that the reset be held high for at least
+	 * a 100 milliseconds. We wait a bit longer 'just in case'.  */
+
+#define PCI_BUS_RST_HOLD_TIME_MSEC 250
+	msleep (PCI_BUS_RST_HOLD_TIME_MSEC);
+	rtas_pci_slot_reset (dn, 0); 
+	
+	/* After a PCI slot has been reset, the PCI Express spec requires
+	 * a 1.5 second idle time for the bus to stabilize, before starting 
+	 * up traffic. */
+#define PCI_BUS_SETTLE_TIME_MSEC 1800
+	msleep (PCI_BUS_SETTLE_TIME_MSEC);
+
+	/* Now double check with the firmware to make sure the device is
+	 * ready to be used; if not, wait for recovery. */
+	for (i=0; i<10; i++) {
+		rc = eeh_slot_availability (dn);
+		if (rc <= 0) return;
+
+		msleep (rc+100);
+	}
+}
+
+EXPORT_SYMBOL(rtas_set_slot_reset);
+
+void
+rtas_configure_bridge(struct device_node *dn)
+{
+	int token = rtas_token ("ibm,configure-bridge");
+	int rc;
+
+	if (token == RTAS_UNKNOWN_SERVICE)
+		return;
+	rc = rtas_call(token,3,1, NULL,
+	               dn->eeh_config_addr,
+	               BUID_HI(dn->phb->buid),
+	               BUID_LO(dn->phb->buid));
+	if (rc) {
+		printk (KERN_WARNING "EEH: Unable to configure device bridge\n");
+	}
+}
+
+EXPORT_SYMBOL(rtas_configure_bridge);
+
+/* ------------------------------------------------------- */
+/* EEH Error Recovery registeration */
+
+void eeh_register_recovery_ops (struct pci_dev *dev, 
+                                struct eeh_recovery_ops *ops)
+{
+	struct device_node *dn;
+	dn = pci_device_to_OF_node(dev);
+	dn->eeh_ops = ops;
+}
+
+/* ------------------------------------------------------- */
+/** Save and restore of PCI BARs
+ *
+ * Although firmware will set up BARs during boot, it doesn't
+ * set up device BAR's after a device reset, although it will,
+ * if requested, set up bridge configuration. Thus, we need to
+ * configure the PCI devices ourselves.  Config-space setup is
+ * stored in the PCI structures which are normally deleted during
+ * device removal.  Thus, the "save" routine references the
+ * structures so that they aren't deleted.
+ */
+
+/**
+ * __restore_bars - Restore the Base Address Registers
+ * Loads the PCI configuration space base address registers,
+ * the expansion ROM base address, the latency timer, and etc.
+ * from the saved values in the device node.
+ */
+static inline void __restore_bars (struct device_node *dn)
+{
+	int i;
+
+	if (NULL==dn->phb) return;
+	for (i=4; i<10; i++) {
+		rtas_write_config(dn, i*4, 4, dn->config_space[i]);
+	}
+
+	/* 12 == Expansion ROM Address */
+	rtas_write_config(dn, 12*4, 4, dn->config_space[12]);
+	
+#define SAVED_BYTE(OFF) (((u8 *)(dn->config_space))[OFF])
+	
+	rtas_write_config (dn, PCI_CACHE_LINE_SIZE, 1,
+	            SAVED_BYTE(PCI_CACHE_LINE_SIZE));
+	
+	rtas_write_config (dn, PCI_LATENCY_TIMER, 1,
+	            SAVED_BYTE(PCI_LATENCY_TIMER));
+	
+	rtas_write_config (dn, PCI_INTERRUPT_LINE, 1,
+	            SAVED_BYTE(PCI_INTERRUPT_LINE));
+}
+
+/**
+ * eeh_restore_bars - restore the PCI config space info
+ */
+void eeh_restore_bars(struct device_node *dn)
+{
+	if (! dn->eeh_is_bridge)
+		__restore_bars (dn);
+	
+	if (dn->child)
+		eeh_restore_bars (dn->child);
+#if DO_SIBLINGS
+	if (dn->sibling)
+		eeh_restore_bars (dn->sibling);
+#endif
+}
+
+void eeh_pci_restore_bars(struct pci_dev *dev)
+{
+	struct device_node *dn = pci_device_to_OF_node(dev);
+	eeh_restore_bars (dn);
+}
+
+/* ------------------------------------------------------------- */
+/* The code below deals with enabling EEH for devices during  the
+ * early boot sequence.  EEH must be enabled before any PCI probing
+ * can be done.
+ */
+
+#define EEH_ENABLE 1
+
 struct eeh_early_enable_info {
 	unsigned int buid_hi;
 	unsigned int buid_lo;
@@ -742,7 +990,7 @@ static void *early_enable_eeh(struct dev
 		       dn->full_name);
 	}
 
-	return NULL; 
+	return NULL;
 }
 
 /*
@@ -829,7 +1077,9 @@ void eeh_add_device_early(struct device_
 		return;
 	phb = dn->phb;
 	if (NULL == phb || 0 == phb->buid) {
-		printk(KERN_WARNING "EEH: Expected buid but found none\n");
+		printk(KERN_WARNING "EEH: Expected buid but found none for %s\n",
+		                dn->full_name);
+		dump_stack();
 		return;
 	}
 
@@ -848,6 +1098,9 @@ EXPORT_SYMBOL(eeh_add_device_early);
  */
 void eeh_add_device_late(struct pci_dev *dev)
 {
+	int i;
+	struct device_node *dn;
+
 	if (!dev || !eeh_subsystem_enabled)
 		return;
 
@@ -857,6 +1110,14 @@ void eeh_add_device_late(struct pci_dev 
 #endif
 
 	pci_addr_cache_insert_device (dev);
+
+	/* Save the BAR's; firmware doesn't restore these after EEH reset */
+	dn = pci_device_to_OF_node(dev);
+	for (i = 0; i < 16; i++)
+		pci_read_config_dword(dev, i * 4, &dn->config_space[i]);
+
+	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
+		dn->eeh_is_bridge = 1;
 }
 EXPORT_SYMBOL(eeh_add_device_late);
 
@@ -886,12 +1147,17 @@ static int proc_eeh_show(struct seq_file
 	unsigned int cpu;
 	unsigned long ffs = 0, positives = 0, failures = 0;
 	unsigned long resets = 0;
+	unsigned long no_dev = 0, no_dn = 0, no_cfg = 0, no_check = 0;
 
 	for_each_cpu(cpu) {
 		ffs += per_cpu(total_mmio_ffs, cpu);
 		positives += per_cpu(false_positives, cpu);
 		failures += per_cpu(ignored_failures, cpu);
 		resets += per_cpu(slot_resets, cpu);
+		no_dev += per_cpu(no_device, cpu);
+		no_dn += per_cpu(no_dn, cpu);
+		no_cfg += per_cpu(no_cfg_addr, cpu);
+		no_check += per_cpu(ignored_check, cpu);
 	}
 
 	if (0 == eeh_subsystem_enabled) {
@@ -899,13 +1165,17 @@ static int proc_eeh_show(struct seq_file
 		seq_printf(m, "eeh_total_mmio_ffs=%ld\n", ffs);
 	} else {
 		seq_printf(m, "EEH Subsystem is enabled\n");
-		seq_printf(m, "eeh_total_mmio_ffs=%ld\n"
+		seq_printf(m, 
+				"no device=%ld\n"
+				"no device node=%ld\n"
+				"no config address=%ld\n"
+				"check not wanted=%ld\n"
+				"eeh_total_mmio_ffs=%ld\n"
 			   "eeh_false_positives=%ld\n"
 			   "eeh_ignored_failures=%ld\n"
-			   "eeh_slot_resets=%ld\n"
-				"eeh_fail_count=%d\n",
-			   ffs, positives, failures, resets,
-				eeh_fail_count.counter);
+			   "eeh_slot_resets=%ld\n",
+				no_dev, no_dn, no_cfg, no_check,
+			   ffs, positives, failures, resets);
 	}
 
 	return 0;
===== arch/ppc64/kernel/pSeries_pci.c 1.59 vs edited =====
--- 1.59/arch/ppc64/kernel/pSeries_pci.c	2004-11-15 21:29:10 -06:00
+++ edited/arch/ppc64/kernel/pSeries_pci.c	2005-01-20 17:25:37 -06:00
@@ -102,7 +102,7 @@ static int rtas_pci_read_config(struct p
 	return PCIBIOS_DEVICE_NOT_FOUND;
 }
 
-static int rtas_write_config(struct device_node *dn, int where, int size, u32 val)
+int rtas_write_config(struct device_node *dn, int where, int size, u32 val)
 {
 	unsigned long buid, addr;
 	int ret;
===== drivers/pci/hotplug/rpaphp.h 1.11 vs edited =====
--- 1.11/drivers/pci/hotplug/rpaphp.h	2004-10-06 11:43:44 -05:00
+++ edited/drivers/pci/hotplug/rpaphp.h	2005-01-20 17:25:37 -06:00
@@ -125,7 +125,8 @@ extern int rpaphp_enable_pci_slot(struct
 extern int register_pci_slot(struct slot *slot);
 extern int rpaphp_unconfig_pci_adapter(struct slot *slot);
 extern int rpaphp_get_pci_adapter_status(struct slot *slot, int is_init, u8 * value);
-extern struct hotplug_slot *rpaphp_find_hotplug_slot(struct pci_dev *dev);
+extern void init_eeh_handler (void);
+extern void exit_eeh_handler (void);
 
 /* rpaphp_core.c */
 extern int rpaphp_add_slot(struct device_node *dn);
===== drivers/pci/hotplug/rpaphp_core.c 1.18 vs edited =====
--- 1.18/drivers/pci/hotplug/rpaphp_core.c	2004-10-06 11:43:44 -05:00
+++ edited/drivers/pci/hotplug/rpaphp_core.c	2005-01-20 17:25:37 -06:00
@@ -443,12 +443,18 @@ static int __init rpaphp_init(void)
 {
 	info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
 
+	/* Get set to handle EEH events. */
+	init_eeh_handler();
+
 	/* read all the PRA info from the system */
 	return init_rpa();
 }
 
 static void __exit rpaphp_exit(void)
 {
+	/* Let EEH know we are going away. */
+	exit_eeh_handler();
+
 	cleanup_slots();
 }
 
===== drivers/pci/hotplug/rpaphp_pci.c 1.17 vs edited =====
--- 1.17/drivers/pci/hotplug/rpaphp_pci.c	2004-11-18 02:36:18 -06:00
+++ edited/drivers/pci/hotplug/rpaphp_pci.c	2005-02-22 17:25:07 -06:00
@@ -22,8 +22,12 @@
  * Send feedback to <lxie at us.ibm.com>
  *
  */
+#include <linux/delay.h>
+#include <linux/notifier.h>
 #include <linux/pci.h>
+#include <asm/eeh.h>
 #include <asm/pci-bridge.h>
+#include <asm/prom.h>
 #include <asm/rtas.h>
 #include "../pci.h"		/* for pci_add_new_bus */
 
@@ -62,6 +66,7 @@ int rpaphp_claim_resource(struct pci_dev
 		    root ? "Address space collision on" :
 		    "No parent found for",
 		    resource, dtype, pci_name(dev), res->start, res->end);
+		dump_stack();
 	}
 	return err;
 }
@@ -184,6 +189,19 @@ rpaphp_fixup_new_pci_devices(struct pci_
 
 static int rpaphp_pci_config_bridge(struct pci_dev *dev);
 
+static void rpaphp_eeh_add_bus_device(struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		eeh_add_device_late(dev);
+		if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
+			struct pci_bus *subbus = dev->subordinate;
+			if (bus)
+				rpaphp_eeh_add_bus_device (subbus);
+		}
+	}
+}
+
 /*****************************************************************************
  rpaphp_pci_config_slot() will  configure all devices under the 
  given slot->dn and return the the first pci_dev.
@@ -211,6 +229,8 @@ rpaphp_pci_config_slot(struct device_nod
 		}
 		if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) 
 			rpaphp_pci_config_bridge(dev);
+
+		rpaphp_eeh_add_bus_device(bus);
 	}
 	return dev;
 }
@@ -219,7 +239,6 @@ static int rpaphp_pci_config_bridge(stru
 {
 	u8 sec_busno;
 	struct pci_bus *child_bus;
-	struct pci_dev *child_dev;
 
 	dbg("Enter %s:  BRIDGE dev=%s\n", __FUNCTION__, pci_name(dev));
 
@@ -236,11 +255,7 @@ static int rpaphp_pci_config_bridge(stru
 	/* do pci_scan_child_bus */
 	pci_scan_child_bus(child_bus);
 
-	list_for_each_entry(child_dev, &child_bus->devices, bus_list) {
-		eeh_add_device_late(child_dev);
-	}
-
-	 /* fixup new pci devices without touching bus struct */
+	/* Fixup new pci devices without touching bus struct */
 	rpaphp_fixup_new_pci_devices(child_bus, 0);
 
 	/* Make the discovered devices available */
@@ -278,7 +293,7 @@ static void print_slot_pci_funcs(struct 
 	return;
 }
 #else
-static void print_slot_pci_funcs(struct slot *slot)
+static inline void print_slot_pci_funcs(struct slot *slot)
 {
 	return;
 }
@@ -360,7 +375,6 @@ static void rpaphp_eeh_remove_bus_device
 			if (pdev)
 				rpaphp_eeh_remove_bus_device(pdev);
 		}
-
 	}
 	return;
 }
@@ -562,36 +576,266 @@ exit:
 	return retval;
 }
 
-struct hotplug_slot *rpaphp_find_hotplug_slot(struct pci_dev *dev)
+/**
+ * rpaphp_search_bus_for_dev - return 1 if device is under this bus, else 0
+ * @bus: the bus to search for this device.
+ * @dev: the pci device we are looking for.
+ */
+static int rpaphp_search_bus_for_dev (struct pci_bus *bus, struct pci_dev *dev)
+{
+	struct list_head *ln;
+
+	if (!bus) return 0;
+	
+	for (ln = bus->devices.next; ln != &bus->devices; ln = ln->next) {
+		struct pci_dev *pdev = pci_dev_b(ln);
+		if (pdev == dev)
+			return 1;
+		if (pdev->subordinate) {
+			int rc;
+			rc = rpaphp_search_bus_for_dev (pdev->subordinate, dev);
+			if (rc)
+				return 1;
+		}
+	}
+	return 0;
+}
+
+/**
+ * rpaphp_find_slot - find and return the slot holding the device
+ * @dev: pci device for which we want the slot structure.
+ */
+static struct slot *rpaphp_find_slot(struct pci_dev *dev)
 {
-	struct list_head	*tmp, *n;
-	struct slot		*slot;
+	struct list_head *tmp, *n;
+	struct slot	*slot;
 
 	list_for_each_safe(tmp, n, &rpaphp_slot_head) {
 		struct pci_bus *bus;
-		struct list_head *ln;
 
 		slot = list_entry(tmp, struct slot, rpaphp_slot_list);
-		if (slot->bridge == NULL) {
-			if (slot->dev_type == PCI_DEV) {
-				printk(KERN_WARNING "PCI slot missing bridge %s %s \n", 
-				                    slot->name, slot->location);
-			}
+		
+		/* PHB's don't have bridges. */
+		if (slot->bridge == NULL)
 			continue;
-		}
+
+		/* The PCI device could be the slot itself. */
+		if (slot->bridge == dev)
+			return slot;
 
 		bus = slot->bridge->subordinate;
 		if (!bus) {
+			printk (KERN_WARNING "PCI bridge is missing bus: %s %s\n",
+			    pci_name (slot->bridge), pci_pretty_name (slot->bridge));
 			continue;  /* should never happen? */
 		}
-		for (ln = bus->devices.next; ln != &bus->devices; ln = ln->next) {
-                                struct pci_dev *pdev = pci_dev_b(ln);
-				if (pdev == dev)
-					return slot->hotplug_slot;
-		}
+
+		if (rpaphp_search_bus_for_dev (bus, dev))
+			return slot;
+	}
+	return NULL;
+}
+
+/* ------------------------------------------------------- */
+/**
+ * handle_eeh_events -- reset a PCI device after hard lockup.
+ *
+ * pSeries systems will isolate a PCI slot if the PCI-Host
+ * bridge detects address or data parity errors, DMA's 
+ * occuring to wild addresses (which usually happen due to
+ * bugs in device drivers or in PCI adapter firmware).
+ * Slot isolations also occur if #SERR, #PERR or other misc
+ * PCI-related errors are detected.
+ * 
+ * Recovery process consists of unplugging the device driver
+ * (which generated hotplug events to userspace), then issuing
+ * a PCI #RST to the device, then reconfiguring the PCI config 
+ * space for all bridges & devices under this slot, and then 
+ * finally restarting the device drivers (which cause a second
+ * set of hotplug events to go out to userspace).
+ */
+
+extern void rtas_set_eeh_option(struct device_node *dn, int state);
+
+int eeh_reset_device (struct pci_dev *dev, int reconfig)
+{
+	struct slot *frozen_slot;
+
+	if (!dev)
+		return 1;
+
+	frozen_slot = rpaphp_find_slot(dev);
+	if (!frozen_slot)
+	{
+		printk (KERN_ERR "EEH: Cannot find PCI slot for %s %s\n",
+				pci_name(dev), pci_pretty_name (dev));
+		return 1;
 	}
 
+	if (reconfig) rpaphp_unconfig_pci_adapter (frozen_slot);
+	
+	/* Reset the pci controller. (Asserts RST#; resets config space). 
+	 * Reconfigure bridges and devices */
+	rtas_set_slot_reset (frozen_slot->dn->child);
+	rtas_configure_bridge(frozen_slot->dn);
+	eeh_restore_bars(frozen_slot->dn->child);
+
+	/* Give the system 5 seconds to finish running the user-space
+	 * hotplug scripts, e.g. ifdown for ethernet.  Yes, this is a hack, 
+	 * but if we don't do this, weird things happen.
+	 */
+	if (reconfig) {
+		ssleep (5);
+		rpaphp_enable_pci_slot (frozen_slot);
+	}
+	return 0;
+}
+
+static inline struct pci_dev * eeh_get_pci_dev(struct device_node *dn)
+{
+	struct pci_dev *dev = NULL;
+	for_each_pci_dev(dev) {
+		if (pci_device_to_OF_node(dev) == dn)
+			return dev;
+		}
 	return NULL;
 }
 
-EXPORT_SYMBOL_GPL(rpaphp_find_hotplug_slot);
+
+/* The longest amount of time to wait for a pci device
+ * to come back on line, in seconds.
+ */
+#define MAX_WAIT_FOR_RECOVERY 15 
+
+int handle_eeh_events (struct notifier_block *self, 
+                       unsigned long reason, void *ev)
+{
+	int freeze_count=0;
+	struct slot *frozen_slot;
+	struct device_node *frozen_device;
+	struct eeh_event *event = ev;
+	struct pci_dev *dev = event->dev;
+	int perm_failure = 0;
+	int rc;
+
+	if (!dev)
+		dev = eeh_get_pci_dev (event->dn);
+
+	if (!dev)
+	{
+		if (event->dn)
+			printk ("EEH: Cannot find the PCI device for dn %s\n", 
+			        event->dn->full_name);
+		else 
+			printk ("EEH: EEH error caught, but no PCI device specified!\n");
+		return 1;
+	}
+
+	frozen_slot = rpaphp_find_slot(dev);
+	if (!frozen_slot)
+	{
+		printk (KERN_ERR "EEH: Cannot find PCI slot for %s %s\n",
+				pci_name(dev), pci_pretty_name (dev));
+		return 1;
+	}
+	frozen_device = frozen_slot->dn->child;
+	
+	/* We get "permanent failure" messages on empty slots. 
+	 * These are false alarms. Empty slots have no child dn. */
+	if ((event->reset_state == 5) && (frozen_device == NULL))
+		return 0;
+
+	if (frozen_device)
+		freeze_count = frozen_device->eeh_freeze_count;
+	freeze_count ++;
+	if (freeze_count > EEH_MAX_ALLOWED_FREEZES)
+		perm_failure = 1;
+	
+	/* If the reset state is a '5' and the time to reset is 0 (infinity) 
+	 * or is more then 15 seconds, then mark this as a permanent failure. 
+	 */
+	if ((event->reset_state == 5) && 
+	    ((event->time_unavail <= 0) ||
+	     (event->time_unavail > MAX_WAIT_FOR_RECOVERY*1000))) 
+		perm_failure = 1;
+	
+	/* Log the error with the rtas logger. */
+	if (perm_failure) {
+		/* 
+		 * About 90% of all real-life EEH failures in the field
+		 * are due to poorly seated PCI cards. Only 10% or so are
+		 * due to actual, failed cards.
+		 */
+		printk (KERN_ERR
+		   "EEH: device %s:%s has failed %d times \n"
+			"and has been permanently disabled.  Please try reseating\n"
+		   "this device or replacing it.\n",
+			pci_name (dev),
+			pci_pretty_name (dev),
+			freeze_count);
+
+		eeh_slot_error_detail (frozen_device, 2 /* Permanent Error */);
+
+		/* Notify the device that its about to go down. */
+		/* XXX this should be a recursive walk to children for 
+		 * multi-function devices */
+		if (frozen_device->eeh_ops &&
+			 frozen_device->eeh_ops->perm_failure) {
+			frozen_device->eeh_ops->perm_failure (dev, frozen_device->eeh_ops->data);
+		}
+
+		/* Unconfigure the thing and go home. */
+		rpaphp_unconfig_pci_adapter (frozen_slot);
+		return 1;
+	} else {
+		eeh_slot_error_detail (frozen_device, 1 /* Temporary Error */);
+	}
+
+	printk (KERN_WARNING
+	   "EEH: This device has failed %d times since last reboot: %s:%s\n",
+		freeze_count,
+		pci_name (dev),
+		pci_pretty_name (dev));
+
+	/* Walk the various device drivers attached to this slot through
+	 * a reset sequence, giving each an opportunity to do what it needs
+	 * to accomplish the reset */
+	/* XXX this should be a recursive walk to children for 
+	 * multi-function devices; each child should get to report
+	 * status too, if needed ... if any child can't handle the reset,
+	 * then need to hotplug it. */
+	if (frozen_device->eeh_ops) {
+		if (frozen_device->eeh_ops->frozen) {
+			frozen_device->eeh_ops->frozen (dev, frozen_device->eeh_ops->data);
+		}	
+		rc = eeh_reset_device (dev, 0);
+		if (frozen_device->eeh_ops->post_reset) {
+			frozen_device->eeh_ops->post_reset (dev, frozen_device->eeh_ops->data);
+		}	
+		
+	} else {
+		rc = eeh_reset_device (dev, 1);
+	}
+
+	/* Store the freeze count with the pci adapter, and not the slot.
+	 * This way, if the device is replaced, the count is cleared.
+	 */
+	if (frozen_slot->dn->child)
+		frozen_slot->dn->child->eeh_freeze_count = freeze_count;
+
+	return rc;
+}
+
+static struct notifier_block eeh_block;
+
+void __init init_eeh_handler (void)
+{
+	eeh_block.notifier_call = handle_eeh_events;
+	eeh_register_notifier (&eeh_block);
+}
+
+void __exit exit_eeh_handler (void)
+{
+	eeh_unregister_notifier (&eeh_block);
+}
+
===== drivers/scsi/ipr.c 1.31 vs edited =====
--- 1.31/drivers/scsi/ipr.c	2004-12-14 17:06:35 -06:00
+++ edited/drivers/scsi/ipr.c	2005-02-22 17:37:41 -06:00
@@ -80,6 +80,8 @@
 #include <scsi/scsi_eh.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_request.h>
+
+#define CONFIG_SCSI_IPR_EEH
 #include "ipr.h"
 
 /*
@@ -2917,7 +2919,6 @@ static int ipr_eh_host_reset(struct scsi
 
 	if (WAIT_FOR_DUMP == ioa_cfg->sdt_state)
 		ioa_cfg->sdt_state = GET_DUMP;
-
 	rc = ipr_reset_reload(ioa_cfg, IPR_SHUTDOWN_ABBREV);
 
 	LEAVE;
@@ -5007,6 +5008,67 @@ static int ipr_reset_start_bist(struct i
 	return rc;
 }
 
+#ifdef CONFIG_SCSI_IPR_EEH
+
+static int ipr_reset_shutdown_ioa(struct ipr_cmnd *ipr_cmd);
+
+#define IPR_WAIT_FOR_EEH_RESET (HZ)
+static int ipr_reset_poll_eeh_recovery(struct ipr_cmnd *ipr_cmd)
+{
+	struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
+	int rc;
+
+	ENTER;
+	if (ioa_cfg->wait_on_eeh_reset) {
+		ipr_reset_start_timer(ipr_cmd, IPR_WAIT_FOR_EEH_RESET);
+		rc = IPR_RC_JOB_RETURN;
+	} else {
+		ipr_cmd->job_step = ipr_reset_start_bist;
+		rc = IPR_RC_JOB_CONTINUE;
+	}
+
+	LEAVE;
+	return rc;
+}
+
+static void ipr_eeh_frozen (struct pci_dev *pdev, void * data)
+{
+	struct ipr_ioa_cfg *ioa_cfg = data;
+	ioa_cfg->wait_on_eeh_reset = 1;
+}
+
+static void ipr_eeh_post_reset (struct pci_dev *pdev, void * data)
+{
+	struct ipr_ioa_cfg *ioa_cfg = data;
+	ioa_cfg->wait_on_eeh_reset = 0;
+}
+
+static void ipr_eeh_perm_failure (struct pci_dev *pdev, void * data)
+{
+	// struct ipr_ioa_cfg *ioa_cfg = data;
+	
+#if 0  // XXXXXXXXXXXXXXXXXXXXXXX
+	ipr_cmd->job_step = ipr_reset_shutdown_ioa;
+	rc = IPR_RC_JOB_CONTINUE;
+#endif
+}
+
+static void ipr_register_eeh_handlers (struct ipr_ioa_cfg *ioa_cfg)
+{
+	/* XXX borken memory management; this malloc not managed */
+	struct eeh_recovery_ops *eeh_ops;
+	eeh_ops = kmalloc (sizeof(struct eeh_recovery_ops), GFP_KERNEL);
+	memset (eeh_ops, 0, sizeof(struct eeh_recovery_ops));
+	eeh_ops->frozen = ipr_eeh_frozen;
+	eeh_ops->post_reset = ipr_eeh_post_reset;
+	eeh_ops->perm_failure = ipr_eeh_perm_failure;
+	eeh_ops->data = ioa_cfg;
+	eeh_register_recovery_ops (ioa_cfg->pdev, eeh_ops);
+}
+
+#endif
+
+
 /**
  * ipr_reset_allowed - Query whether or not IOA can be reset
  * @ioa_cfg:	ioa config struct
@@ -5042,6 +5104,7 @@ static int ipr_reset_wait_to_start_bist(
 	struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg;
 	int rc = IPR_RC_JOB_RETURN;
 
+#ifndef CONFIG_SCSI_IPR_EEH
 	if (!ipr_reset_allowed(ioa_cfg) && ipr_cmd->u.time_left) {
 		ipr_cmd->u.time_left -= IPR_CHECK_FOR_RESET_TIMEOUT;
 		ipr_reset_start_timer(ipr_cmd, IPR_CHECK_FOR_RESET_TIMEOUT);
@@ -5049,6 +5112,21 @@ static int ipr_reset_wait_to_start_bist(
 		ipr_cmd->job_step = ipr_reset_start_bist;
 		rc = IPR_RC_JOB_CONTINUE;
 	}
+#else
+	if (!ipr_reset_allowed(ioa_cfg) && ipr_cmd->u.time_left 
+	    && !eeh_slot_is_isolated (ioa_cfg->pdev)) {
+			  
+		ipr_cmd->u.time_left -= IPR_CHECK_FOR_RESET_TIMEOUT;
+		ipr_reset_start_timer(ipr_cmd, IPR_CHECK_FOR_RESET_TIMEOUT);
+	} else {
+		if (eeh_slot_is_isolated (ioa_cfg->pdev)) {
+			ipr_cmd->job_step = ipr_reset_poll_eeh_recovery;
+		} else {
+			ipr_cmd->job_step = ipr_reset_start_bist;
+		}
+		rc = IPR_RC_JOB_CONTINUE;
+	}
+#endif
 
 	return rc;
 }
@@ -5079,7 +5157,16 @@ static int ipr_reset_alert(struct ipr_cm
 		writel(IPR_UPROCI_RESET_ALERT, ioa_cfg->regs.set_uproc_interrupt_reg);
 		ipr_cmd->job_step = ipr_reset_wait_to_start_bist;
 	} else {
+#ifndef CONFIG_SCSI_IPR_EEH
 		ipr_cmd->job_step = ipr_reset_start_bist;
+#else
+		if (eeh_slot_is_isolated (ioa_cfg->pdev)) {
+			ipr_cmd->job_step = ipr_reset_poll_eeh_recovery;
+			return IPR_RC_JOB_CONTINUE;
+		} else {
+			ipr_cmd->job_step = ipr_reset_start_bist;
+		}
+#endif
 	}
 
 	ipr_cmd->u.time_left = IPR_WAIT_FOR_RESET_TIMEOUT;
@@ -5759,6 +5846,10 @@ static int __devinit ipr_probe_ioa(struc
 
 	/* Save away PCI config space for use following IOA reset */
 	rc = pci_save_state(pdev);
+
+#ifdef CONFIG_SCSI_IPR_EEH
+	ipr_register_eeh_handlers (ioa_cfg);
+#endif
 
 	if (rc != PCIBIOS_SUCCESSFUL) {
 		dev_err(&pdev->dev, "Failed to save PCI config space\n");
===== drivers/scsi/ipr.h 1.21 vs edited =====
--- 1.21/drivers/scsi/ipr.h	2004-12-14 17:09:02 -06:00
+++ edited/drivers/scsi/ipr.h	2005-02-22 15:52:36 -06:00
@@ -833,6 +833,9 @@ struct ipr_ioa_cfg {
 	u8 dump_taken:1;
 	u8 allow_cmds:1;
 	u8 allow_ml_add_del:1;
+#ifdef CONFIG_SCSI_IPR_EEH
+	u8 wait_on_eeh_reset:1;
+#endif
 
 	u16 type; /* CCIN of the card */
 
@@ -1132,9 +1135,11 @@ struct ipr_ucode_image_header {
 #define ipr_trace ipr_dbg("%s: %s: Line: %d\n",\
 	__FILE__, __FUNCTION__, __LINE__)
 
+#undef IPR_DBG_TRACE
+#define IPR_DBG_TRACE 1
 #if IPR_DBG_TRACE
-#define ENTER printk(KERN_INFO IPR_NAME": Entering %s\n", __FUNCTION__)
-#define LEAVE printk(KERN_INFO IPR_NAME": Leaving %s\n", __FUNCTION__)
+#define ENTER printk(KERN_INFO IPR_NAME": Entering %s jiffies=%lu\n", __FUNCTION__, jiffies)
+#define LEAVE printk(KERN_INFO IPR_NAME": Leaving %s jiffies=%lu\n", __FUNCTION__, jiffies)
 #else
 #define ENTER
 #define LEAVE
===== drivers/scsi/sym53c8xx_2/sym_glue.c 1.52 vs edited =====
--- 1.52/drivers/scsi/sym53c8xx_2/sym_glue.c	2004-10-24 11:08:18 -05:00
+++ edited/drivers/scsi/sym53c8xx_2/sym_glue.c	2005-02-22 17:15:38 -06:00
@@ -851,6 +851,7 @@ static int sym_eh_handler(int op, char *
 	sprintf(devname, "%s:%d:%d", sym_name(np), cmd->device->id, cmd->device->lun);
 
 	printf_warning("%s: %s operation started.\n", devname, opname);
+printk("duuuude %s: %s like operation started.\n", devname, opname);
 
 #if 0
 	/* This one should be the result of some race, thus to ignore */
@@ -896,6 +897,17 @@ prepare:
 		sts = 0;
 		break;
 	case SYM_EH_HOST_RESET:
+#define CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY
+#ifdef CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY
+printk("duuuuuude attempting symbios recovery\n");
+dump_stack();
+int rc = eeh_slot_is_isolated (np->s.device);
+
+printk ("duude symbios is isolated ??=%d\n", rc);
+if (rc) {
+	eeh_reset_device (np->s.device, 0);
+}
+#endif /* CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY */
 		sym_reset_scsi_bus(np, 0);
 		sym_start_up (np, 1);
 		sts = 0;
@@ -1587,6 +1599,21 @@ out_err32:
 	return -1;
 }
 
+#ifdef CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY
+static void sym_register_eeh_handlers (struct sym_device *dev)
+{
+	/* XXX borken memory management; this malloc not managed */
+	struct eeh_recovery_ops *eeh_ops;
+	eeh_ops = kmalloc (sizeof(struct eeh_recovery_ops), GFP_KERNEL);
+	memset (eeh_ops, 0, sizeof(struct eeh_recovery_ops));
+	eeh_ops->frozen = NULL;
+	eeh_ops->post_reset = NULL;
+	eeh_ops->perm_failure = NULL;
+	eeh_ops->data = dev;
+	eeh_register_recovery_ops (dev->pdev, eeh_ops);
+}
+#endif /* CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY */
+	
 /*
  *  Host attach and initialisations.
  *
@@ -1672,6 +1699,9 @@ static struct Scsi_Host * __devinit sym_
 	strlcpy(np->s.chip_name, dev->chip.name, sizeof(np->s.chip_name));
 	sprintf(np->s.inst_name, "sym%d", np->s.unit);
 
+#ifdef CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY
+	sym_register_eeh_handlers (dev);
+#endif /* CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY */
 	/*
 	 *  Ask/tell the system about DMA addressing.
 	 */
===== include/asm-ppc64/eeh.h 1.23 vs edited =====
--- 1.23/include/asm-ppc64/eeh.h	2004-10-25 18:17:38 -05:00
+++ edited/include/asm-ppc64/eeh.h	2005-02-22 13:21:49 -06:00
@@ -22,8 +22,8 @@
 
 #include <linux/init.h>
 #include <linux/list.h>
-#include <linux/string.h>
 #include <linux/notifier.h>
+#include <linux/string.h>
 
 struct pci_dev;
 struct device_node;
@@ -32,6 +32,11 @@ struct device_node;
 #define EEH_MODE_SUPPORTED	(1<<0)
 #define EEH_MODE_NOCHECK	(1<<1)
 #define EEH_MODE_ISOLATED	(1<<2)
+#define EEH_MODE_RECOVERING	(1<<3)
+
+/* Max number of EEH freezes allowed before we consider the device
+ * to be permanently disabled. */
+#define EEH_MAX_ALLOWED_FREEZES 5
 
 #ifdef CONFIG_PPC_PSERIES
 extern void __init eeh_init(void);
@@ -60,16 +65,76 @@ void eeh_add_device_late(struct pci_dev 
  * eeh_remove_device - undo EEH setup for the indicated pci device
  * @dev: pci device to be removed
  *
- * This routine should be when a device is removed from a running
- * system (e.g. by hotplug or dlpar).
+ * This routine should be called when a device is removed from 
+ * a running system (e.g. by hotplug or dlpar).  It unregisters 
+ * the PCI device from the EEH subsystem.  I/O errors affecting
+ * this device will no longer be detected after this call; thus,
+ * i/o errors affecting this slot may leave this device unusable.
  */
 void eeh_remove_device(struct pci_dev *);
 
-#define EEH_DISABLE		0
-#define EEH_ENABLE		1
-#define EEH_RELEASE_LOADSTORE	2
-#define EEH_RELEASE_DMA		3
-int eeh_set_option(struct pci_dev *dev, int options);
+/**
+ * eeh_slot_is_isolated -- return non-zero value if slot is frozen
+ */
+int eeh_slot_is_isolated (struct pci_dev *dev);
+
+/**
+ * eeh_slot_error_detail -- record and EEH error condition to the log
+ * @severity: 1 if temporary, 2 if permanent failure.
+ *
+ * Obtains the the EEH error details from the RTAS subsystem, 
+ * and then logs these details with the RTAS error log system.
+ */
+void eeh_slot_error_detail (struct device_node *dn, int severity);
+
+/** 
+ * rtas_set_slot_reset -- unfreeze a frozen slot
+ *
+ * Clear the EEH-frozen condition on a slot.  This routine
+ * does this by asserting the PCI #RST line for 1/8th of 
+ * a second; this routine will sleep while the adapter is 
+ * being reset.
+ */
+void rtas_set_slot_reset (struct device_node *dn);
+
+/** rtas_pci_slot_reset raises/lowers the pci #RST line
+ *  state: 1/0 to raise/lower the #RST
+ *
+ * Clear the EEH-frozen condition on a slot.  This routine
+ * asserts the PCI #RST line if the 'state' argument is '1',
+ * and drops the #RST line if 'state is '0'.  This routine is 
+ * safe to call in an interrupt context.
+ *
+ */
+void rtas_pci_slot_reset(struct device_node *dn, int state);
+void eeh_pci_slot_reset(struct pci_dev *dev, int state);
+
+/** eeh_pci_slot_availability -- Indicates whether a PCI
+ *  slot is ready to be used. After a PCI reset, it may take a while 
+ *  for the PCI fabric to fully reset the comminucations path to the
+ *  given PCI card.  This routine can be used to determine how long
+ *  to wait before a PCI slot might become usable.  
+ *
+ *  This routine returns how long to wait (in milliseconds) before
+ *  the slot is expected to be usable.  A value of zero means the
+ *  slot is immediately usable. A negavitve value means that the 
+ *  slot is permanently disabled.
+ */
+int eeh_pci_slot_availability(struct pci_dev *dev);
+
+/** Restore device configuration info across device resets.
+ */
+void eeh_restore_bars(struct device_node *);
+void eeh_pci_restore_bars(struct pci_dev *dev);
+
+/**
+ * rtas_configure_bridge -- firmware initialization of pci bridge
+ * 
+ * Ask the firmware to configure any PCI bridge devices 
+ * located behind the indicated node. Required after a 
+ * pci device reset.
+ */
+void rtas_configure_bridge(struct device_node *dn);
 
 
 /**
@@ -86,11 +151,27 @@ struct eeh_event {
 	struct pci_dev       *dev;
 	struct device_node   *dn;
 	int                  reset_state;
+	int                  time_unavail;
 };
 
 /** Register to find out about EEH events. */
 int eeh_register_notifier(struct notifier_block *nb);
 int eeh_unregister_notifier(struct notifier_block *nb);
+
+
+/** EEH error recovery callbacks.  These will be called on a
+ *  registered device driver during the EEH recovery proceedure.
+ *  Eventually, this should be a part of struct pci_driver
+ */
+struct eeh_recovery_ops {
+	void (*frozen) (struct pci_dev *, void *);  /* called when dev is first frozen */
+	void (*post_reset) (struct pci_dev *, void *);  /* called after card is reset */
+	void (*perm_failure) (struct pci_dev *, void *);  /* called if card is dead */
+	void * data;  /* pointer to self */
+};
+
+/** Register a set of recovery ops for an EEH event */
+void eeh_register_recovery_ops (struct pci_dev *, struct eeh_recovery_ops *);
 
 /**
  * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
===== include/asm-ppc64/prom.h 1.24 vs edited =====
--- 1.24/include/asm-ppc64/prom.h	2004-11-25 00:42:42 -06:00
+++ edited/include/asm-ppc64/prom.h	2005-02-22 12:07:09 -06:00
@@ -144,6 +144,7 @@ struct property {
  */
 struct pci_controller;
 struct iommu_table;
+struct eeh_recovery_ops;
 
 struct device_node {
 	char	*name;
@@ -164,8 +165,13 @@ struct device_node {
 	int	status;			/* Current device status (non-zero is bad) */
 	int	eeh_mode;		/* See eeh.h for possible EEH_MODEs */
 	int	eeh_config_addr;
+	int   eeh_check_count;    /* number of times device driver ignored error */
+	int	eeh_freeze_count;   /* number of times this device froze up. */
+	int   eeh_is_bridge;      /* device is pci-to-pci bridge */
 	struct  pci_controller *phb;	/* for pci devices */
 	struct	iommu_table *iommu_table;	/* for phb's or bridges */
+	u32      config_space[16]; /* saved PCI config space */
+	struct eeh_recovery_ops *eeh_ops;  /* recovery callbacks */
 
 	struct	property *properties;
 	struct	device_node *parent;
===== include/asm-ppc64/rtas.h 1.25 vs edited =====
--- 1.25/include/asm-ppc64/rtas.h	2004-11-25 00:42:42 -06:00
+++ edited/include/asm-ppc64/rtas.h	2005-01-20 17:25:37 -06:00
@@ -241,4 +241,6 @@ extern void rtas_stop_self(void);
 /* RMO buffer reserved for user-space RTAS use */
 extern unsigned long rtas_rmo_buf;
 
+extern int rtas_write_config(struct device_node *dn, int where, int size, u32 val);
+
 #endif /* _PPC64_RTAS_H */