[PATCH] EEH detection in acenic watchdog
Olof Johansson
olof at austin.ibm.com
Fri Aug 29 12:36:34 EST 2003
Paul Mackerras wrote:
> This is OK for our local ppc64 trees, but it's a bit ugly. It's an
> extra ifdef and it is putting something very pSeries-specific into a
> driver that otherwise is platform-agnostic.
Ok, so no shortcut this time then. :-)
I wasn't sure what the prevailing attitude was against adding such
specific hooks. The acenic driver is fairly clean as it is, so I suppose I
should make an effort to keep it that way.
> Maybe what we should propose is to add a "platform_error_check()"
> function which can be called in these kinds of circumstances, with
> null definitions on most architectures.
I've added a pci_check_error(), since EEH is currently limited to PCI on
our machines (and the driver in question is PCI-only. This also required a
minor shuffle in arch/ppc64/kernel/eeh.c to take a struct pci_dev *.
I also chose to #ifndef a dummy definition to be less intrusive on other
architectures. On 2.6 it could probably make sense to modify all
asm-*/pci.h instead, I'm not sure.
See attachment for the patch. Is this more like what you had in mind?
> BTW, I think Jes Sorensen did the acenic driver in the context of his
> work for a previous employer. I don't know if he has any interest
> in the acenic now (or even any acenic hardware to work on).
Ack, I guess that makes LKML the best venue for patches then? He's still
the official maintainer.
Thanks,
Olof
--
Olof Johansson Office: 4E002/905
pSeries Linux Development IBM Systems Group
Email: olof at austin.ibm.com Phone: 512-838-9858
All opinions are my own and not those of IBM
-------------- next part --------------
===== arch/ppc64/kernel/eeh.c 1.7 vs edited =====
--- 1.7/arch/ppc64/kernel/eeh.c Mon Aug 25 23:47:43 2003
+++ edited/arch/ppc64/kernel/eeh.c Thu Aug 28 21:11:51 2003
@@ -76,8 +76,6 @@
{
unsigned long addr;
struct pci_dev *dev;
- struct device_node *dn;
- unsigned long ret, rets[2];
/* IO BAR access could get us here...or if we manually force EEH
* operation on even if the hardware won't support it.
@@ -94,9 +92,21 @@
printk("EEH: no pci dev found for addr=0x%lx\n", addr);
return val;
}
+ return eeh_check_failure_dev(dev, val);
+}
+
+/* Same as eeh_check_failure(), but takes a pci_dev instead of a
+ * token address.
+ */
+
+unsigned long eeh_check_failure_dev(struct pci_dev *dev, unsigned long val)
+{
+ struct device_node *dn;
+ unsigned long ret, rets[2];
+
dn = pci_device_to_OF_node(dev);
if (!dn) {
- printk("EEH: no pci dn found for addr=0x%lx\n", addr);
+ printk("EEH: no pci dn found for device %s\n", dev->name);
return val;
}
@@ -133,7 +143,6 @@
}
eeh_false_positives++;
return val; /* good case */
-
}
struct eeh_early_enable_info {
===== drivers/net/acenic.c 1.29 vs edited =====
--- 1.29/drivers/net/acenic.c Fri Jun 20 01:00:08 2003
+++ edited/drivers/net/acenic.c Thu Aug 28 21:27:34 2003
@@ -1863,6 +1863,10 @@
dev->name, (unsigned int)readl(®s->HostCtrl));
/* This can happen due to ieee flow control. */
} else {
+ if (pci_check_error(ap->pdev)) {
+ printk(KERN_WARNING "%s: PCI error detected\n", dev->name);
+ }
+
printk(KERN_DEBUG "%s: BUG... transmitter died. Kicking it.\n",
dev->name);
#if 0
===== include/asm-ppc64/eeh.h 1.6 vs edited =====
--- 1.6/include/asm-ppc64/eeh.h Mon Aug 25 23:47:51 2003
+++ edited/include/asm-ppc64/eeh.h Thu Aug 28 20:45:56 2003
@@ -46,6 +46,7 @@
void eeh_init(void);
int eeh_get_state(unsigned long ea);
unsigned long eeh_check_failure(void *token, unsigned long val);
+unsigned long eeh_check_failure_dev(struct pci_dev *dev, unsigned long val);
void *eeh_ioremap(unsigned long addr, void *vaddr);
#define EEH_DISABLE 0
===== include/asm-ppc64/pci.h 1.3 vs edited =====
--- 1.3/include/asm-ppc64/pci.h Fri May 10 19:46:04 2002
+++ edited/include/asm-ppc64/pci.h Thu Aug 28 21:02:10 2003
@@ -145,6 +145,14 @@
* this boolean for bounce buffer decisions.
*/
#define PCI_DMA_BUS_IS_PHYS (0)
+
+#define HAVE_PCI_CHECK_ERROR
+static inline int pci_check_error(struct pci_dev *dev)
+{
+ /* eeh_check_failure returns the second argument on non-failures */
+ return eeh_check_failure_dev(dev, 1);
+}
+
#endif /* __KERNEL__ */
===== include/linux/pci.h 1.32 vs edited =====
--- 1.32/include/linux/pci.h Mon Aug 25 23:47:53 2003
+++ edited/include/linux/pci.h Thu Aug 28 20:47:37 2003
@@ -806,5 +806,16 @@
#define PCIPCI_VSFX 16
#define PCIPCI_ALIMAGIK 32
+/* Some architectures have additional hardware support to detect problems
+ * with a PCI device, and puts the slot in a frozen state. This is the
+ * generic way to access that functionality.
+ *
+ * Return value is 0 for "no error detected"
+ */
+
+#ifndef HAVE_PCI_CHECK_ERROR
+static inline int pci_check_error(struct pci_dev *dev) { return 0; }
+#endif /* HAVE_PCI_CHECK_ERROR */
+
#endif /* __KERNEL__ */
#endif /* LINUX_PCI_H */
More information about the Linuxppc64-dev
mailing list