[PATCH] FYI/ Re: PCI Error Recovery API Proposal (updated)

Sat May 14 02:25:56 EST 2005

Hi Linas-

Sorry for the delay.  My first comment is that patches that affect 
PCI Hotplug drivers should also be submitted to 
pcihpd-discuss at lists.sourceforge.net.

We've discussed this offline, and maybe this will generate some public
discussion.  I don't think handle_eeh_events() or eeh_reset_device()
belong in the RPA PCI hotplug driver.  I've pasted these functions at
the bottom of this note.  These are out of place compared to the rest
of the rpaphp code.  They use eeh-specific RTAS calls, structures, and
functions, which do not otherwise appear in RPA PCI Hotplug.  These
functions are using PCI Hotplug, rather than implementing it, so they
belong elsewhere.

Looking at the patch, all EEH needs from PCI Hotplug are enable and
disable functions.  The rpaphp driver could register disable/enable
functions with eeh.c, and we could avoid introducing this unrelated
code to the PCI Hotplug driver.

One other quick comment:

+static struct device_node *
+get_phb_of_device (struct pci_dev *dev)
+{
+	struct device_node *dn;
+	struct pci_bus *bus;
+
+	while (1) {
+		bus = dev->bus;
+		if (!bus)
+			break;
+		dn = pci_bus_to_OF_node(bus);
+
+		if (dn->phb)
+			return dn;
+	
+		dev = bus->self;
+		BUG_ON (dev==NULL);
+		if (dev == NULL)
+			return NULL;
+	}

Could you just use pci_device_to_OF_node(), then dn->phb?  This way, 
you could avoid the loop.

+/* ------------------------------------------------------- */
+/**
+ * handle_eeh_events -- reset a PCI device after hard lockup.
+ *
+ * pSeries systems will isolate a PCI slot if the PCI-Host
+ * bridge detects address or data parity errors, DMA's 
+ * occuring to wild addresses (which usually happen due to
+ * bugs in device drivers or in PCI adapter firmware).
+ * Slot isolations also occur if #SERR, #PERR or other misc
+ * PCI-related errors are detected.
+ * 
+ * Recovery process consists of unplugging the device driver
+ * (which generated hotplug events to userspace), then issuing
+ * a PCI #RST to the device, then reconfiguring the PCI config 
+ * space for all bridges & devices under this slot, and then 
+ * finally restarting the device drivers (which cause a second
+ * set of hotplug events to go out to userspace).
+ */
+
+int eeh_reset_device (struct pci_dev *dev, struct device_node *dn, int reconfig)
+{
+	struct slot *frozen_slot= NULL;
+
+	if (!dev)
+		return 1;
+
+	if (reconfig)
+		frozen_slot = rpaphp_find_slot(dev);
+
+	if (reconfig && frozen_slot) rpaphp_unconfig_pci_adapter (frozen_slot);
+	
+	/* Reset the pci controller. (Asserts RST#; resets config space). 
+	 * Reconfigure bridges and devices */
+	rtas_set_slot_reset (dn->child);
+	rtas_configure_bridge(dn);
+	eeh_restore_bars(dn->child);
+printk ("duude, post restore bars, for %s here's the dump\n", dn->full_name);
+{
+extern int rtas_read_config(struct device_node *dn, int where, int size, u32 *val);
+int i, rc;
+u32 val;
+struct device_node *xn=dn->child;
+for(i=0;i<16;i++) {
+rc =  rtas_read_config (xn, i*4,4,&val);
+printk ("duude read config %d rc=%d val=%x expect=%x\n", i, rc, val,xn->config_space[i]);
+}}
+
+	enable_irq (dev->irq);
+
+	/* Give the system 5 seconds to finish running the user-space
+	 * hotplug scripts, e.g. ifdown for ethernet.  Yes, this is a hack, 
+	 * but if we don't do this, weird things happen.
+	 */
+	if (reconfig && frozen_slot) {
+		ssleep (5);
+		rpaphp_enable_pci_slot (frozen_slot);
+	}
+	return 0;
+}
+
+/* The longest amount of time to wait for a pci device
+ * to come back on line, in seconds.
+ */
+#define MAX_WAIT_FOR_RECOVERY 15 
+
+int handle_eeh_events (struct notifier_block *self, 
+                       unsigned long reason, void *ev)
+{
+	int freeze_count=0;
+	struct device_node *frozen_device;
+	struct peh_event *event = ev;
+	struct pci_dev *dev = event->dev;
+	int perm_failure = 0;
+	int rc;
+
+	if (!dev)
+	{
+		printk ("EEH: EEH error caught, but no PCI device specified!\n");
+		return 1;
+	}
+
+	frozen_device = get_phb_of_device (dev);
+
+	if (!frozen_device)
+	{
+		printk (KERN_ERR "EEH: Cannot find PCI conroller for %s %s\n",
+				pci_name(dev), pci_pretty_name (dev));
+
+		return 1;
+	}
+
+	/* We get "permanent failure" messages on empty slots. 
+	 * These are false alarms. Empty slots have no child dn. */
+	if ((event->state == pci_channel_io_perm_failure) && (frozen_device == NULL))
+		return 0;
+
+	if (frozen_device)
+		freeze_count = frozen_device->eeh_freeze_count;
+	freeze_count ++;
+	if (freeze_count > EEH_MAX_ALLOWED_FREEZES)
+		perm_failure = 1;
+	
+	/* If the reset state is a '5' and the time to reset is 0 (infinity) 
+	 * or is more then 15 seconds, then mark this as a permanent failure. 
+	 */
+	if ((event->state == pci_channel_io_perm_failure) && 
+	    ((event->time_unavail <= 0) ||
+	     (event->time_unavail > MAX_WAIT_FOR_RECOVERY*1000))) 
+		perm_failure = 1;
+	
+	/* Log the error with the rtas logger. */
+	if (perm_failure) {
+		/* 
+		 * About 90% of all real-life EEH failures in the field
+		 * are due to poorly seated PCI cards. Only 10% or so are
+		 * due to actual, failed cards.
+		 */
+		printk (KERN_ERR
+		   "EEH: device %s:%s has failed %d times \n"
+			"and has been permanently disabled.  Please try reseating\n"
+		   "this device or replacing it.\n",
+			pci_name (dev),
+			pci_pretty_name (dev),
+			freeze_count);
+
+		eeh_slot_error_detail (frozen_device, 2 /* Permanent Error */);
+
+		/* Notify the device that its about to go down. */
+		/* XXX this should be a recursive walk to children for 
+		 * multi-function devices */
+		if (dev->driver->err_handler.error_detected) {
+			dev->driver->err_handler.error_detected (dev, pci_channel_io_perm_failure);
+		}
+
+		/* If there's a hotplug slot, unconfigure it */
+		struct slot * frozen_slot = rpaphp_find_slot(dev);
+		rpaphp_unconfig_pci_adapter (frozen_slot);
+		return 1;
+	} else {
+		eeh_slot_error_detail (frozen_device, 1 /* Temporary Error */);
+	}
+
+	printk (KERN_WARNING
+	   "EEH: This device has failed %d times since last reboot: %s:%s\n",
+		freeze_count,
+		pci_name (dev),
+		pci_pretty_name (dev));
+
+	/* Walk the various device drivers attached to this slot through
+	 * a reset sequence, giving each an opportunity to do what it needs
+	 * to accomplish the reset */
+	/* XXX this should be a recursive walk to children for 
+	 * multi-function devices; each child should get to report
+	 * status too, if needed ... if any child can't handle the reset,
+	 * then need to hotplug it. 
+	 * XXX This does not follow flow of BenH's last email at all. 
+	 * XXX will be fixed later XXX 
+	 */
+	if (dev->driver->err_handler.error_detected) {
+		dev->driver->err_handler.error_detected (dev, pci_channel_io_frozen);
+		rc = eeh_reset_device (dev, frozen_device, 0);
+		if (dev->driver->err_handler.slot_reset) 
+			dev->driver->err_handler.slot_reset (dev);
+	} else {
+		rc = eeh_reset_device (dev, frozen_device, 1);
+	}
+
+	/* Store the freeze count with the pci adapter, and not the slot.
+	 * This way, if the device is replaced, the count is cleared.
+	 */
+	frozen_device->eeh_freeze_count = freeze_count;
+
+	return rc;
+}

Thanks-
John