[PATCH] powerpc/eeh: Dump PHB diag-data early

Gavin Shan gwshan at linux.vnet.ibm.com
Sat Nov 22 21:58:09 AEDT 2014


On PowerNV platform, PHB diag-data is dumped after stopping device
drivers. In case of recursive EEH errors, the kernel is usually
crashed before dumping PHB diag-data for the second EEH error. It's
hard to locate the root cause of the second EEH error without PHB
diag-data.

The patch adds one more EEH option "eeh=early_log", which helps
dumping PHB diag-data immediately once frozen PE is detected, in
order to get the PHB diag-data for the second EEH error.

Signed-off-by: Gavin Shan <gwshan at linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h            |  1 +
 arch/powerpc/kernel/eeh.c                 |  2 ++
 arch/powerpc/platforms/powernv/eeh-ioda.c | 13 ++++++++++++-
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 2e633b4..0652ebe 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -39,6 +39,7 @@ struct device_node;
 #define EEH_PROBE_MODE_DEV	0x04	/* From PCI device	*/
 #define EEH_PROBE_MODE_DEVTREE	0x08	/* From device tree	*/
 #define EEH_ENABLE_IO_FOR_LOG	0x10	/* Enable IO for log	*/
+#define EEH_EARLY_DUMP_LOG	0x20	/* Dump log immediately	*/
 
 /*
  * Delay for PE reset, all in ms
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index f1c6b11..05be77d 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -143,6 +143,8 @@ static int __init eeh_setup(char *str)
 {
 	if (!strcmp(str, "off"))
 		eeh_add_flag(EEH_FORCE_DISABLED);
+	else if (!strcmp(str, "early_log"))
+		eeh_add_flag(EEH_EARLY_DUMP_LOG);
 
 	return 1;
 }
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index 426814a..43aba2d 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -354,6 +354,9 @@ static int ioda_eeh_get_phb_state(struct eeh_pe *pe)
 	} else if (!(pe->state & EEH_PE_ISOLATED)) {
 		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
 		ioda_eeh_phb_diag(pe);
+
+		if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
+			pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
 	}
 
 	return result;
@@ -452,6 +455,9 @@ static int ioda_eeh_get_pe_state(struct eeh_pe *pe)
 
 		eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
 		ioda_eeh_phb_diag(pe);
+
+		if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
+			pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
 	}
 
 	return result;
@@ -731,7 +737,8 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option)
 static int ioda_eeh_get_log(struct eeh_pe *pe, int severity,
 			    char *drv_log, unsigned long len)
 {
-	pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
+	if (!eeh_has_flag(EEH_EARLY_DUMP_LOG))
+		pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
 
 	return 0;
 }
@@ -1087,6 +1094,10 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
 		    !((*pe)->state & EEH_PE_ISOLATED)) {
 			eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
 			ioda_eeh_phb_diag(*pe);
+
+			if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
+				pnv_pci_dump_phb_diag_data((*pe)->phb,
+							   (*pe)->data);
 		}
 
 		/*
-- 
1.8.3.2



More information about the Linuxppc-dev mailing list