[PATCH] tg3: add PCI error recovery support

Linas Vepstas linas at austin.ibm.com
Thu Jul 19 09:48:26 EST 2007


Add support for PCI Error Recovery for the tg3 ethernet
device driver. The general principles of operation are
described in Documentation/pci-error-recovery.txt
Other drivers having similar structure include e100,
e1000, ixgb, s2io, ipr, sym53c8xx_2, and lpfc

Signed-off-by: Linas Vepstas <linas at austin.ibm.com>
Cc: Michael Chan <mchan at broadcom.com>

----

Michael, you are listed as the tg3 maintainer; could you
please forward upstream if you agree?  

Tested on the PCI-E version of this adapter, on power6, 
for 85 (artificial) error injections (overnight) while
ftp'ing dvd iso images over the link. Worked well.

 drivers/net/tg3.c |  108 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 107 insertions(+), 1 deletion(-)

Index: linux-2.6.22-git2/drivers/net/tg3.c
===================================================================
--- linux-2.6.22-git2.orig/drivers/net/tg3.c	2007-07-17 11:07:30.000000000 -0500
+++ linux-2.6.22-git2/drivers/net/tg3.c	2007-07-18 15:10:09.000000000 -0500
@@ -64,7 +64,7 @@
 
 #define DRV_MODULE_NAME		"tg3"
 #define PFX DRV_MODULE_NAME	": "
-#define DRV_MODULE_VERSION	"3.77"
+#define DRV_MODULE_VERSION	"3.77-a"
 #define DRV_MODULE_RELDATE	"May 31, 2007"
 
 #define TG3_DEF_MAC_MODE	0
@@ -12126,11 +12126,117 @@ out:
 	return err;
 }
 
+/**
+ * tg3_io_error_detected - called when PCI error is detected
+ * @pdev: Pointer to PCI device
+ * @state: The current pci connection state
+ *
+ * This function is called after a PCI bus error affecting
+ * this device has been detected. 
+ */
+static pci_ers_result_t tg3_io_error_detected(struct pci_dev *pdev,
+                                               pci_channel_state_t state)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct tg3 *tp = netdev_priv(netdev);
+	struct device *dev = &netdev->dev;
+
+	dev_info(dev, "PCI I/O error detected on %s\n", netdev->name);
+
+	if (!netif_running(netdev))
+		return PCI_ERS_RESULT_NEED_RESET;
+
+	/* Want to make sure that the reset task doesn't run */
+	cancel_work_sync(&tp->reset_task);
+	tg3_netif_stop(tp);
+	del_timer_sync(&tp->timer);
+	netif_device_detach(netdev);
+	pci_disable_device(pdev);
+
+	if (state == pci_channel_io_perm_failure) {
+		/* avoid hang in dev_close() with rtnl_lock held */
+		netif_poll_enable(netdev);
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * tg3_io_slot_reset - called after the pci bus has been reset.
+ * @pdev: Pointer to PCI device
+ *
+ * Restart the card from scratch, as if from a cold-boot.
+ * At this point, the card has exprienced a hard reset,
+ * followed by fixups by BIOS, and has its config space
+ * set up identically to what it was at cold boot.
+ */
+static pci_ers_result_t tg3_io_slot_reset(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct tg3 *tp = netdev_priv(netdev);
+	int err;
+
+	if (!netif_running(netdev))
+		return PCI_ERS_RESULT_RECOVERED;
+
+	if (pci_enable_device(pdev)) {
+		printk(KERN_ERR "tg3: %s: "
+		       "Cannot re-enable PCI device after reset.\n", netdev->name);
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	pci_set_master(pdev);
+	pci_restore_state(tp->pdev);
+	netif_device_attach(netdev);
+
+	tg3_full_lock(tp, 0);
+	tp->tg3_flags |= TG3_FLAG_INIT_COMPLETE;
+	err = tg3_restart_hw(tp, 1);
+	tg3_full_unlock(tp);
+	if (err) {
+		printk(KERN_ERR "tg3: %s: "
+		       "Cannot restart hardware after reset.\n", netdev->name);
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * tg3_io_resume - called when traffic can start flowing again.
+ * @pdev: Pointer to PCI device
+ *
+ * This callback is called when the error recovery driver tells
+ * us that its OK to resume normal operation.
+ */
+static void tg3_io_resume(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct tg3 *tp = netdev_priv(netdev);
+
+	if (!netif_running(netdev))
+		return;
+
+	netif_wake_queue(netdev);
+
+	tp->timer.expires = jiffies + tp->timer_offset;
+	add_timer(&tp->timer);
+
+	tg3_netif_start(tp);
+}
+
+static struct pci_error_handlers tg3_err_handler = {
+	.error_detected = tg3_io_error_detected,
+	.slot_reset = tg3_io_slot_reset,
+	.resume = tg3_io_resume,
+};
+
 static struct pci_driver tg3_driver = {
 	.name		= DRV_MODULE_NAME,
 	.id_table	= tg3_pci_tbl,
 	.probe		= tg3_init_one,
 	.remove		= __devexit_p(tg3_remove_one),
+	.err_handler = &tg3_err_handler,
 	.suspend	= tg3_suspend,
 	.resume		= tg3_resume
 };



More information about the Linuxppc-dev mailing list