[PATCH] powerpc/eeh: Prefetch PHB diag-data

Gavin Shan shangw at linux.vnet.ibm.com
Fri Feb 14 17:15:45 EST 2014


PHB diag-data is useful information to locate the root cause for
frozen PE. Unfortunately, we cleared part of that by wrongly zapping
LEM registers before collecting PHB diag-data. The patch fixes it
by prefetching that with extended eeh_ops->get_log() for PowerNV
platform.

Signed-off-by: Gavin Shan <shangw at linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h               |    3 ++-
 arch/powerpc/kernel/eeh.c                    |    2 +-
 arch/powerpc/kernel/eeh_driver.c             |    4 ++++
 arch/powerpc/platforms/powernv/eeh-ioda.c    |   11 +++++++----
 arch/powerpc/platforms/powernv/eeh-powernv.c |    6 ++++--
 arch/powerpc/platforms/powernv/pci.h         |    2 +-
 arch/powerpc/platforms/pseries/eeh_pseries.c |   10 ++++++++--
 7 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index d4dd41f..b0bce0b 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -163,7 +163,8 @@ struct eeh_ops {
 	int (*get_state)(struct eeh_pe *pe, int *state);
 	int (*reset)(struct eeh_pe *pe, int option);
 	int (*wait_state)(struct eeh_pe *pe, int max_wait);
-	int (*get_log)(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len);
+	int (*get_log)(struct eeh_pe *pe, int severity,
+		       char *drv_log, unsigned long len, bool prefetch);
 	int (*configure_bridge)(struct eeh_pe *pe);
 	int (*read_config)(struct device_node *dn, int where, int size, u32 *val);
 	int (*write_config)(struct device_node *dn, int where, int size, u32 val);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index e7b76a6..d409d9d 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -257,7 +257,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
 		}
 	}
 
-	eeh_ops->get_log(pe, severity, pci_regs_buf, loglen);
+	eeh_ops->get_log(pe, severity, pci_regs_buf, loglen, false);
 }
 
 /**
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 7bb30dc..7a9123a 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -502,6 +502,10 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
 	pr_warning("EEH: This PCI device has failed %d times in the last hour\n",
 		pe->freeze_count);
 
+	/* Prefetch PHB diag-data if applicable */
+	if (eeh_ops->get_log)
+		eeh_ops->get_log(pe, EEH_LOG_TEMP, NULL, 0, true);
+
 	/* Walk the various device drivers attached to this slot through
 	 * a reset sequence, giving each an opportunity to do what it needs
 	 * to accomplish the reset.  Each child gets a report of the
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index f514743..46fc394 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -539,7 +539,7 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option)
  * The function is used to retrieve error log from P7IOC.
  */
 static int ioda_eeh_get_log(struct eeh_pe *pe, int severity,
-			    char *drv_log, unsigned long len)
+			    char *drv_log, unsigned long len, bool prefetch)
 {
 	s64 ret;
 	unsigned long flags;
@@ -548,6 +548,12 @@ static int ioda_eeh_get_log(struct eeh_pe *pe, int severity,
 
 	spin_lock_irqsave(&phb->lock, flags);
 
+	if (!prefetch) {
+		pnv_pci_dump_phb_diag_data(hose, phb->diag.blob);
+		spin_unlock_irqrestore(&phb->lock, flags);
+		return 0;
+	}
+
 	ret = opal_pci_get_phb_diag_data2(phb->opal_id,
 			phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE);
 	if (ret) {
@@ -557,9 +563,6 @@ static int ioda_eeh_get_log(struct eeh_pe *pe, int severity,
 		return -EIO;
 	}
 
-	/* The PHB diag-data is always indicative */
-	pnv_pci_dump_phb_diag_data(hose, phb->diag.blob);
-
 	spin_unlock_irqrestore(&phb->lock, flags);
 
 	return 0;
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index a59788e..df1b73f 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -290,14 +290,16 @@ static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
  * Retrieve the temporary or permanent error from the PE.
  */
 static int powernv_eeh_get_log(struct eeh_pe *pe, int severity,
-			char *drv_log, unsigned long len)
+			       char *drv_log, unsigned long len,
+			       bool prefetch)
 {
 	struct pci_controller *hose = pe->phb;
 	struct pnv_phb *phb = hose->private_data;
 	int ret = -EEXIST;
 
 	if (phb->eeh_ops && phb->eeh_ops->get_log)
-		ret = phb->eeh_ops->get_log(pe, severity, drv_log, len);
+		ret = phb->eeh_ops->get_log(pe, severity,
+					    drv_log, len, prefetch);
 
 	return ret;
 }
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 13f1942..f1a9c2a 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -75,7 +75,7 @@ struct pnv_eeh_ops {
 	int (*get_state)(struct eeh_pe *pe);
 	int (*reset)(struct eeh_pe *pe, int option);
 	int (*get_log)(struct eeh_pe *pe, int severity,
-		       char *drv_log, unsigned long len);
+		       char *drv_log, unsigned long len, bool prefetch);
 	int (*configure_bridge)(struct eeh_pe *pe);
 	int (*next_error)(struct eeh_pe **pe);
 };
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 8a8f047..d38e1ba 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -576,11 +576,17 @@ static int pseries_eeh_wait_state(struct eeh_pe *pe, int max_wait)
  * Actually, the error will be retrieved through the dedicated
  * RTAS call.
  */
-static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len)
+static int pseries_eeh_get_log(struct eeh_pe *pe, int severity,
+			       char *drv_log, unsigned long len,
+			       bool prefetch)
 {
 	int config_addr;
 	unsigned long flags;
-	int ret;
+	int ret = 0;
+
+	/* We needn't do prefetch stuff */
+	if (prefetch)
+		return ret;
 
 	spin_lock_irqsave(&slot_errbuf_lock, flags);
 	memset(slot_errbuf, 0, eeh_error_buf_size);
-- 
1.7.10.4



More information about the Linuxppc-dev mailing list