[PATCH 8/22] ppc64: Slot Marking Bugfix
linas
linas at austin.ibm.com
Fri Oct 7 09:33:20 EST 2005
08-eeh-slot-marking-bug.patch
A device that experiences a PCI outage may be just one deivce out
of many that was affected. In order to avoid repeated reports of
a failure, the entire tree of affected devices should be marked
as failed. This patch marks up the entire tree.
Signed-off-by: Linas Vepstas <linas at linas.org>
Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh.c
===================================================================
--- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/eeh.c 2005-10-06 17:52:37.399078590 -0500
+++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/eeh.c 2005-10-06 17:53:02.164603746 -0500
@@ -480,32 +480,47 @@
* an interrupt context, which is bad.
*/
-static inline void __eeh_mark_slot (struct device_node *dn)
+static inline void __eeh_mark_slot (struct device_node *dn, int mode_flag)
{
while (dn) {
- PCI_DN(dn)->eeh_mode |= EEH_MODE_ISOLATED;
+ if (PCI_DN(dn)) {
+ PCI_DN(dn)->eeh_mode |= mode_flag;
- if (dn->child)
- __eeh_mark_slot (dn->child);
+ if (dn->child)
+ __eeh_mark_slot (dn->child, mode_flag);
+ }
dn = dn->sibling;
}
}
-static inline void __eeh_clear_slot (struct device_node *dn)
+void eeh_mark_slot (struct device_node *dn, int mode_flag)
+{
+ dn = find_device_pe (dn);
+ PCI_DN(dn)->eeh_mode |= mode_flag;
+ __eeh_mark_slot (dn->child, mode_flag);
+}
+
+static inline void __eeh_clear_slot (struct device_node *dn, int mode_flag)
{
while (dn) {
- PCI_DN(dn)->eeh_mode &= ~EEH_MODE_ISOLATED;
- if (dn->child)
- __eeh_clear_slot (dn->child);
+ if (PCI_DN(dn)) {
+ PCI_DN(dn)->eeh_mode &= ~mode_flag;
+ PCI_DN(dn)->eeh_check_count = 0;
+ if (dn->child)
+ __eeh_clear_slot (dn->child, mode_flag);
+ }
dn = dn->sibling;
}
}
-static inline void eeh_clear_slot (struct device_node *dn)
+void eeh_clear_slot (struct device_node *dn, int mode_flag)
{
unsigned long flags;
spin_lock_irqsave(&confirm_error_lock, flags);
- __eeh_clear_slot (dn);
+ dn = find_device_pe (dn);
+ PCI_DN(dn)->eeh_mode &= ~mode_flag;
+ PCI_DN(dn)->eeh_check_count = 0;
+ __eeh_clear_slot (dn->child, mode_flag);
spin_unlock_irqrestore(&confirm_error_lock, flags);
}
@@ -530,7 +545,6 @@
int rets[3];
unsigned long flags;
struct pci_dn *pdn;
- struct device_node *pe_dn;
int rc = 0;
__get_cpu_var(total_mmio_ffs)++;
@@ -632,8 +646,7 @@
/* Avoid repeated reports of this failure, including problems
* with other functions on this device, and functions under
* bridges. */
- pe_dn = find_device_pe (dn);
- __eeh_mark_slot (pe_dn);
+ eeh_mark_slot (dn, EEH_MODE_ISOLATED);
spin_unlock_irqrestore(&confirm_error_lock, flags);
eeh_send_failure_event (dn, dev, rets[0], rets[2]);
@@ -745,9 +758,6 @@
rc, state, pdn->node->full_name);
return;
}
-
- if (state == 0)
- eeh_clear_slot (pdn->node->parent->child);
}
/** rtas_set_slot_reset -- assert the pci #RST line for 1/4 second
@@ -766,6 +776,12 @@
#define PCI_BUS_RST_HOLD_TIME_MSEC 250
msleep (PCI_BUS_RST_HOLD_TIME_MSEC);
+
+ /* We might get hit with another EEH freeze as soon as the
+ * pci slot reset line is dropped. Make sure we don't miss
+ * these, and clear the flag now. */
+ eeh_clear_slot (pdn->node, EEH_MODE_ISOLATED);
+
rtas_pci_slot_reset (pdn, 0);
/* After a PCI slot has been reset, the PCI Express spec requires
Index: linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci.h
===================================================================
--- linux-2.6.14-rc2-git6.orig/arch/ppc64/kernel/pci.h 2005-10-06 17:52:37.399078590 -0500
+++ linux-2.6.14-rc2-git6/arch/ppc64/kernel/pci.h 2005-10-06 17:53:02.165603605 -0500
@@ -86,6 +86,13 @@
int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
+/**
+ * mark and clear slots: find "partition endpoint" PE and set or
+ * clear the flags for each subnode of the PE.
+ */
+void eeh_mark_slot (struct device_node *dn, int mode_flag);
+void eeh_clear_slot (struct device_node *dn, int mode_flag);
+
#endif
#endif /* __PPC_KERNEL_PCI_H__ */
More information about the Linuxppc64-dev
mailing list