[Skiboot] [PATCH] npu2: Print a wider range of registers for debug

Stewart Smith stewart at linux.vnet.ibm.com
Wed Mar 21 10:56:20 AEDT 2018


Also, skip registers we error out reading.

Cc: stable
Suggested-by: Ryan Black <rblack at us.ibm.com>
Signed-off-by: Stewart Smith <stewart at linux.vnet.ibm.com>
---
 core/hmi.c          | 65 ++++++++++++++++++++++++++++++++++++-----------------
 hw/xscom.c          |  9 +++++---
 include/npu2-regs.h |  2 +-
 3 files changed, 52 insertions(+), 24 deletions(-)

diff --git a/core/hmi.c b/core/hmi.c
index 846d2b9270a8..672e14e64a7e 100644
--- a/core/hmi.c
+++ b/core/hmi.c
@@ -568,6 +568,47 @@ static void find_nx_checkstop_reason(int flat_chip_id,
 	*event_generated = true;
 }
 
+static void npu2_dump_scom_range(int flat_chip_id, uint64_t start, uint64_t end)
+{
+	uint64_t npu_scom_dump[4];
+	uint64_t repeated_fir = 0;
+	uint64_t r;
+	int res[2];
+
+	npu_scom_dump[2] = 0;
+	npu_scom_dump[3] = 0;
+	for (r = start; r < end; r++) {
+		npu_scom_dump[0] = npu_scom_dump[1] = 0;
+		res[0] = _xscom_read(flat_chip_id, r++,
+				     &npu_scom_dump[0], false, true);
+		res[1] = _xscom_read(flat_chip_id, r,
+				     &npu_scom_dump[1], false, true);
+		if (res[0] != OPAL_SUCCESS)
+			npu_scom_dump[0] = ~0;
+		if (res[1] != OPAL_SUCCESS)
+			npu_scom_dump[1] = ~0;
+
+		if (npu_scom_dump[0] == npu_scom_dump[2]
+		    && npu_scom_dump[1] == npu_scom_dump[3]) {
+			if (repeated_fir == 0)
+				repeated_fir = r-1;
+		} else {
+			if (repeated_fir) {
+				prlog(PR_ERR, "NPU: 0x%08llx-0x%08llx: "
+				      "0x%016llx 0x%016llx\n",
+				      repeated_fir, r-2,
+				      npu_scom_dump[2], npu_scom_dump[3]);
+				repeated_fir = 0;
+			}
+			prlog(PR_ERR,
+			      "NPU: 0x%08llx: 0x%016llx 0x%016llx\n",
+			      r-1, npu_scom_dump[0], npu_scom_dump[1]);
+		}
+		npu_scom_dump[2] = npu_scom_dump[0];
+		npu_scom_dump[3] = npu_scom_dump[1];
+	}
+}
+
 static void find_npu2_checkstop_reason(int flat_chip_id,
 				      struct OpalHMIEvent *hmi_evt,
 				      bool *event_generated)
@@ -585,10 +626,8 @@ static void find_npu2_checkstop_reason(int flat_chip_id,
 	uint64_t npu2_fir_action0_addr;
 	uint64_t npu2_fir_action1_addr;
 	uint64_t fatal_errors;
-	uint64_t npu_scom_dump[2];
 	bool npu2_hmi_verbose;
 	int total_errors = 0;
-	uint64_t r;
 
 	/* Find the NPU on the chip associated with the HMI. */
 	for_each_phb(phb) {
@@ -622,8 +661,8 @@ static void find_npu2_checkstop_reason(int flat_chip_id,
 		fatal_errors = npu2_fir & ~npu2_fir_mask & npu2_fir_action0 & npu2_fir_action1;
 
 		if (fatal_errors) {
-			prlog(PR_ERR, "NPU: FIR#%d FIR 0x%016llx mask 0x%016llx\n",
-					i, npu2_fir, npu2_fir_mask);
+			prlog(PR_ERR, "NPU: Chip %d FIR#%d FIR 0x%016llx mask 0x%016llx\n",
+			      flat_chip_id, i, npu2_fir, npu2_fir_mask);
 			prlog(PR_ERR, "NPU: ACTION0 0x%016llx, ACTION1 0x%016llx\n",
 					npu2_fir_action0, npu2_fir_action1);
 			total_errors++;
@@ -646,22 +685,8 @@ static void find_npu2_checkstop_reason(int flat_chip_id,
 
 	if (npu2_hmi_verbose) {
 		_xscom_lock();
-		for (r = NPU2_DEBUG_REG_START; r < NPU2_DEBUG_REG_END; r++) {
-			npu_scom_dump[0] = npu_scom_dump[1] = 0;
-			_xscom_read(flat_chip_id, r++, &npu_scom_dump[0], false, true);
-			_xscom_read(flat_chip_id, r,   &npu_scom_dump[1], false, true);
-			prlog(PR_ERR, "NPU: 0x%016llx=0x%016llx 0x%016llx=0x%016llx\n",
-			      r-1, npu_scom_dump[0],
-			      r, npu_scom_dump[1]);
-		}
-		for (r = NPU2_FIR_REGISTER_0; r < NPU2_FIR_REGISTER_END; r++) {
-			npu_scom_dump[0] = npu_scom_dump[1] = 0;
-			_xscom_read(flat_chip_id, r++, &npu_scom_dump[0], false, true);
-			_xscom_read(flat_chip_id, r,   &npu_scom_dump[1], false, true);
-			prlog(PR_ERR, "NPU: 0x%016llx=0x%016llx 0x%016llx=0x%016llx\n",
-			      r-1, npu_scom_dump[0],
-			      r, npu_scom_dump[1]);
-		}
+		npu2_dump_scom_range(flat_chip_id, NPU2_DEBUG_REG_START, NPU2_DEBUG_REG_END);
+		npu2_dump_scom_range(flat_chip_id, NPU2_FIR_REGISTER_0, NPU2_FIR_REGISTER_END);
 		_xscom_unlock();
 		prlog(PR_ERR, " _________________________ \n");
 		prlog(PR_ERR, "< It's Driver Debug time! >\n");
diff --git a/hw/xscom.c b/hw/xscom.c
index 1bcfd475e737..0a041e6a7b41 100644
--- a/hw/xscom.c
+++ b/hw/xscom.c
@@ -233,9 +233,12 @@ static int64_t xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t pcb_add
 		 * retrying again.
 		 */
 		if (retries && !(retries  % XSCOM_BUSY_RESET_THRESHOLD)) {
-			prlog(PR_NOTICE, "XSCOM: Busy even after %d retries, "
-				"resetting XSCOM now. Total retries  = %lld\n",
-				XSCOM_BUSY_RESET_THRESHOLD, retries);
+			if (!ignore_error)
+				prlog(PR_NOTICE,
+				      "XSCOM: Busy even after %d retries, "
+				      "resetting XSCOM now. "
+				      "Total retries  = %lld\n",
+				      XSCOM_BUSY_RESET_THRESHOLD, retries);
 			xscom_reset(gcid, true);
 
 		}
diff --git a/include/npu2-regs.h b/include/npu2-regs.h
index e52918db1fcf..b49f5393c150 100644
--- a/include/npu2-regs.h
+++ b/include/npu2-regs.h
@@ -31,7 +31,7 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
 
 /* SCOM Registers to dump on HMI to aid in debugging */
 #define NPU2_DEBUG_REG_START 0x5011000
-#define NPU2_DEBUG_REG_END   0x50110FF
+#define NPU2_DEBUG_REG_END   0x50117FF
 
 /* These aren't really NPU specific registers but we initialise them in NPU
  * code */
-- 
2.14.3



More information about the Skiboot mailing list