[Skiboot] [PATCH v3 6/7] hw/npu2: Dump (more) npu2 registers on link error and HMIs

Frederic Barrat fbarrat at linux.ibm.com
Sat Apr 6 01:33:03 AEDT 2019


We were already logging some NPU registers during an HMI. This patch
cleans up a bit how it is done and separates what is global from what
is specific to nvlink or opencapi.

Since we can now receive an error interrupt when an opencapi link goes
down unexpectedly, we also dump the NPU state but we limit it to the
registers of the brick which hit the error.

The list of registers to dump was worked out with the hw team to
allow for proper debugging. For each register, we print the name as
found in the NPU workbook, the scom address and the register value.

Signed-off-by: Frederic Barrat <fbarrat at linux.ibm.com>
---
Changelog
v3:
  - remove useless macro when listing registers to dump (Andrew)
  - some function renaming (Andrew)
  - more attempt at code beautification

v2:
  - Simplify per-stack and stack-independent register handling by
    treating the XTS register separately
  - use ARRAY_SIZE() to iterate over the registers to dump

 core/hmi.c          |  58 +----------
 hw/npu2-common.c    | 234 ++++++++++++++++++++++++++++++++++++++++++++
 include/npu2-regs.h |  10 ++
 include/npu2.h      |   1 +
 4 files changed, 246 insertions(+), 57 deletions(-)

diff --git a/core/hmi.c b/core/hmi.c
index fbb182c3..26277fa6 100644
--- a/core/hmi.c
+++ b/core/hmi.c
@@ -594,60 +594,6 @@ static void find_nx_checkstop_reason(int flat_chip_id,
 	queue_hmi_event(hmi_evt, 0, out_flags);
 }
 
-/*
- * If the year is 2018 and you still see all these hardcoded, you
- * should really replace this with the neat macros that's in the
- * NPU2 code rather than this horrible listing of every single
- * NPU2 register hardcoded for a specific chip.
- *
- * I feel dirty having even written it.
- */
-static uint32_t npu2_scom_dump[] = {
-	0x5011017, 0x5011047, 0x5011077, 0x50110A7,
-	0x5011217, 0x5011247, 0x5011277, 0x50112A7,
-	0x5011417, 0x5011447, 0x5011477, 0x50114A7,
-	0x50110DA, 0x50112DA, 0x50114DA,
-	0x50110DB, 0x50112DB, 0x50114DB,
-	0x5011011, 0x5011041, 0x5011071, 0x50110A1,
-	0x5011211, 0x5011241, 0x5011271, 0x50112A1,
-	0x5011411, 0x5011441, 0x5011471, 0x50114A1,
-	0x5011018, 0x5011048, 0x5011078, 0x50110A8,
-	0x5011218, 0x5011248, 0x5011278, 0x50112A8,
-	0x5011418, 0x5011448, 0x5011478, 0x50114A8,
-	0x5011640,
-	0x5011114, 0x5011134, 0x5011314, 0x5011334,
-	0x5011514, 0x5011534, 0x5011118, 0x5011138,
-	0x5011318, 0x5011338, 0x5011518, 0x5011538,
-	0x50110D8, 0x50112D8, 0x50114D8,
-	0x50110D9, 0x50112D9, 0x50114D9,
-	0x5011019, 0x5011049, 0x5011079, 0x50110A9,
-	0x5011219, 0x5011249, 0x5011279, 0x50112A9,
-	0x5011419, 0x5011449, 0x5011479, 0x50114A9,
-	0x50110F4, 0x50112F4, 0x50114F4,
-	0x50110F5, 0x50112F5, 0x50114F5,
-	0x50110F6, 0x50112F6, 0x50114F6,
-	0x50110FD, 0x50112FD, 0x50114FD,
-	0x50110FE, 0x50112FE, 0x50114FE,
-	0x00
-};
-
-static void dump_scoms(int flat_chip_id, const char *unit, uint32_t *scoms,
-			const char *loc)
-{
-	uint64_t value;
-	int r;
-
-	while (*scoms != 0) {
-		value = 0;
-		r = _xscom_read(flat_chip_id, *scoms, &value, false);
-		if (r != OPAL_SUCCESS)
-			continue;
-		prlog(PR_ERR, "%s: [Loc: %s] P:%d 0x%08x=0x%016llx\n",
-		      unit, loc, flat_chip_id, *scoms, value);
-		scoms++;
-	}
-}
-
 static bool phb_is_npu2(struct dt_node *dn)
 {
 	return (dt_node_is_compatible(dn, "ibm,power9-npu-pciex") ||
@@ -731,9 +677,7 @@ static void find_npu2_checkstop_reason(int flat_chip_id,
 	npu2_hmi_verbose = true;
 
 	if (npu2_hmi_verbose) {
-		_xscom_lock();
-		dump_scoms(flat_chip_id, "NPU", npu2_scom_dump, loc);
-		_xscom_unlock();
+		npu2_dump_scoms(flat_chip_id);
 		prlog(PR_ERR, " _________________________ \n");
 		prlog(PR_ERR, "< It's Driver Debug time! >\n");
 		prlog(PR_ERR, " ------------------------- \n");
diff --git a/hw/npu2-common.c b/hw/npu2-common.c
index ccbbbbca..d4c0f851 100644
--- a/hw/npu2-common.c
+++ b/hw/npu2-common.c
@@ -103,6 +103,239 @@ void npu2_write_mask_4b(struct npu2 *p, uint64_t reg, uint32_t val, uint32_t mas
 			(uint64_t)new_val << 32);
 }
 
+typedef struct {
+	const char *name;
+	uint32_t block;
+	uint32_t offset;
+} npu2_scom_dump_t;
+
+static npu2_scom_dump_t npu2_scom_dump_global[] = {
+	/* CQ State Machine */
+	{ "CS.SM0.MISC.CERR_MESSAGE0", NPU2_BLOCK_SM_0, NPU2_C_ERR_RPT_MSG0 },
+	{ "CS.SM1.MISC.CERR_MESSAGE0", NPU2_BLOCK_SM_1, NPU2_C_ERR_RPT_MSG0 },
+	{ "CS.SM2.MISC.CERR_MESSAGE0", NPU2_BLOCK_SM_2, NPU2_C_ERR_RPT_MSG0 },
+	{ "CS.SM3.MISC.CERR_MESSAGE0", NPU2_BLOCK_SM_3, NPU2_C_ERR_RPT_MSG0 },
+
+	{ "CS.SM0.MISC.CERR_MESSAGE1", NPU2_BLOCK_SM_0, NPU2_C_ERR_RPT_MSG1 },
+	{ "CS.SM1.MISC.CERR_MESSAGE1", NPU2_BLOCK_SM_1, NPU2_C_ERR_RPT_MSG1 },
+	{ "CS.SM2.MISC.CERR_MESSAGE1", NPU2_BLOCK_SM_2, NPU2_C_ERR_RPT_MSG1 },
+	{ "CS.SM3.MISC.CERR_MESSAGE1", NPU2_BLOCK_SM_3, NPU2_C_ERR_RPT_MSG1 },
+
+	{ "CS.SM0.MISC.CERR_MESSAGE2", NPU2_BLOCK_SM_0, NPU2_C_ERR_RPT_MSG2 },
+	{ "CS.SM1.MISC.CERR_MESSAGE2", NPU2_BLOCK_SM_1, NPU2_C_ERR_RPT_MSG2 },
+	{ "CS.SM2.MISC.CERR_MESSAGE2", NPU2_BLOCK_SM_2, NPU2_C_ERR_RPT_MSG2 },
+	{ "CS.SM3.MISC.CERR_MESSAGE2", NPU2_BLOCK_SM_3, NPU2_C_ERR_RPT_MSG2 },
+
+	{ "CS.SM0.MISC.CERR_MESSAGE3", NPU2_BLOCK_SM_0, NPU2_C_ERR_RPT_MSG3 },
+	{ "CS.SM1.MISC.CERR_MESSAGE3", NPU2_BLOCK_SM_1, NPU2_C_ERR_RPT_MSG3 },
+	{ "CS.SM2.MISC.CERR_MESSAGE3", NPU2_BLOCK_SM_2, NPU2_C_ERR_RPT_MSG3 },
+	{ "CS.SM3.MISC.CERR_MESSAGE3", NPU2_BLOCK_SM_3, NPU2_C_ERR_RPT_MSG3 },
+
+	{ "CS.SM0.MISC.CERR_MESSAGE4", NPU2_BLOCK_SM_0, NPU2_C_ERR_RPT_MSG4 },
+	{ "CS.SM1.MISC.CERR_MESSAGE4", NPU2_BLOCK_SM_1, NPU2_C_ERR_RPT_MSG4 },
+	{ "CS.SM2.MISC.CERR_MESSAGE4", NPU2_BLOCK_SM_2, NPU2_C_ERR_RPT_MSG4 },
+	{ "CS.SM3.MISC.CERR_MESSAGE4", NPU2_BLOCK_SM_3, NPU2_C_ERR_RPT_MSG4 },
+
+	{ "CS.SM0.MISC.CERR_MESSAGE5", NPU2_BLOCK_SM_0, NPU2_C_ERR_RPT_MSG5 },
+	{ "CS.SM1.MISC.CERR_MESSAGE5", NPU2_BLOCK_SM_1, NPU2_C_ERR_RPT_MSG5 },
+	{ "CS.SM2.MISC.CERR_MESSAGE5", NPU2_BLOCK_SM_2, NPU2_C_ERR_RPT_MSG5 },
+	{ "CS.SM3.MISC.CERR_MESSAGE5", NPU2_BLOCK_SM_3, NPU2_C_ERR_RPT_MSG5 },
+
+	{ "CS.SM0.MISC.CERR_MESSAGE6", NPU2_BLOCK_SM_0, NPU2_C_ERR_RPT_MSG6 },
+	{ "CS.SM1.MISC.CERR_MESSAGE6", NPU2_BLOCK_SM_1, NPU2_C_ERR_RPT_MSG6 },
+	{ "CS.SM2.MISC.CERR_MESSAGE6", NPU2_BLOCK_SM_2, NPU2_C_ERR_RPT_MSG6 },
+	{ "CS.SM3.MISC.CERR_MESSAGE6", NPU2_BLOCK_SM_3, NPU2_C_ERR_RPT_MSG6 },
+
+	{ "CS.SM0.MISC.CERR_FIRST0", NPU2_BLOCK_SM_0, NPU2_C_ERR_RPT_FIRST0 },
+	{ "CS.SM1.MISC.CERR_FIRST0", NPU2_BLOCK_SM_1, NPU2_C_ERR_RPT_FIRST0 },
+	{ "CS.SM2.MISC.CERR_FIRST0", NPU2_BLOCK_SM_2, NPU2_C_ERR_RPT_FIRST0 },
+	{ "CS.SM3.MISC.CERR_FIRST0", NPU2_BLOCK_SM_3, NPU2_C_ERR_RPT_FIRST0 },
+
+	{ "CS.SM0.MISC.CERR_FIRST1", NPU2_BLOCK_SM_0, NPU2_C_ERR_RPT_FIRST1 },
+	{ "CS.SM1.MISC.CERR_FIRST1", NPU2_BLOCK_SM_1, NPU2_C_ERR_RPT_FIRST1 },
+	{ "CS.SM2.MISC.CERR_FIRST1", NPU2_BLOCK_SM_2, NPU2_C_ERR_RPT_FIRST1 },
+	{ "CS.SM3.MISC.CERR_FIRST1", NPU2_BLOCK_SM_3, NPU2_C_ERR_RPT_FIRST1 },
+
+	{ "CS.SM0.MISC.CERR_FIRST2", NPU2_BLOCK_SM_0, NPU2_C_ERR_RPT_FIRST2 },
+	{ "CS.SM1.MISC.CERR_FIRST2", NPU2_BLOCK_SM_1, NPU2_C_ERR_RPT_FIRST2 },
+	{ "CS.SM2.MISC.CERR_FIRST2", NPU2_BLOCK_SM_2, NPU2_C_ERR_RPT_FIRST2 },
+	{ "CS.SM3.MISC.CERR_FIRST2", NPU2_BLOCK_SM_3, NPU2_C_ERR_RPT_FIRST2 },
+
+	/* CQ Control */
+	{ "CS.CTL.MISC.CERR_MESSAGE0", NPU2_BLOCK_CTL, NPU2_CQ_C_ERR_RPT_MSG0 },
+	{ "CS.CTL.MISC.CERR_MESSAGE1", NPU2_BLOCK_CTL, NPU2_CQ_C_ERR_RPT_MSG1 },
+	{ "CS.CTL.MISC.CERR_FIRST0", NPU2_BLOCK_CTL, NPU2_CQ_C_ERR_RPT_FIRST0 },
+	{ "CS.CTL.MISC.CERR_FIRST1", NPU2_BLOCK_CTL, NPU2_CQ_C_ERR_RPT_FIRST1 },
+
+	/* CQ Data */
+	{ "DAT.MISC.CERR_ECC_HOLD", NPU2_BLOCK_DAT, NPU2_CQ_DAT_ECC_STATUS },
+	{ "DAT.MISC.CERR_ECC_MASK", NPU2_BLOCK_DAT, NPU2_CQ_DAT_ECC_MASK },
+	{ "DAT.MISC.CERR_ECC_FIRST", NPU2_BLOCK_DAT, NPU2_CQ_DAT_ECC_FIRST },
+	{ "DAT.MISC.REM0", NPU2_BLOCK_DAT, NPU2_CQ_DAT_RAS_MSG0 },
+	{ "DAT.MISC.REM1", NPU2_BLOCK_DAT, NPU2_CQ_DAT_RAS_MSG1 },
+};
+
+static npu2_scom_dump_t npu2_scom_dump_nvlink[] = {
+	{ "NTL0.REGS.CERR_FIRST1", NPU2_BLOCK_NTL0, NPU2_NTL_ERR_FIRST1_OFF },
+	{ "NTL1.REGS.CERR_FIRST1", NPU2_BLOCK_NTL1, NPU2_NTL_ERR_FIRST1_OFF },
+	{ "NTL0.REGS.CERR_FIRST2", NPU2_BLOCK_NTL0, NPU2_NTL_ERR_FIRST2_OFF },
+	{ "NTL1.REGS.CERR_FIRST2", NPU2_BLOCK_NTL1, NPU2_NTL_ERR_FIRST2_OFF },
+};
+
+static npu2_scom_dump_t npu2_scom_dump_ocapi[] = {
+	{ "OTL0.MISC.C_ERR_RPT_HOLD0", NPU2_BLOCK_OTL0, NPU2_OTL_ERR_RPT_HOLD0 },
+	{ "OTL1.MISC.C_ERR_RPT_HOLD0", NPU2_BLOCK_OTL1, NPU2_OTL_ERR_RPT_HOLD0 },
+	{ "OTL0.MISC.OTL_REM0", NPU2_BLOCK_OTL0, NPU2_OTL_RAS_ERR_MSG0 },
+	{ "OTL1.MISC.OTL_REM0", NPU2_BLOCK_OTL1, NPU2_OTL_RAS_ERR_MSG0 },
+	{ "OTL0.MISC.ERROR_SIG_RXI", NPU2_BLOCK_OTL0, NPU2_OTL_RXI_ERR_SIG },
+	{ "OTL1.MISC.ERROR_SIG_RXI", NPU2_BLOCK_OTL1, NPU2_OTL_RXI_ERR_SIG },
+	{ "OTL0.MISC.ERROR_SIG_RXO", NPU2_BLOCK_OTL0, NPU2_OTL_RXO_ERR_SIG },
+	{ "OTL1.MISC.ERROR_SIG_RXO", NPU2_BLOCK_OTL1, NPU2_OTL_RXO_ERR_SIG },
+	{ "OTL0.MISC.C_ERR_RPT_HOLD1", NPU2_BLOCK_OTL0, NPU2_OTL_ERR_RPT_HOLD1 },
+	{ "OTL1.MISC.C_ERR_RPT_HOLD1", NPU2_BLOCK_OTL1, NPU2_OTL_ERR_RPT_HOLD1 },
+};
+
+static void print_one_npu_reg(struct npu2 *npu, npu2_scom_dump_t *scom, int stack)
+{
+	uint64_t reg, val;
+
+	reg = NPU2_REG_OFFSET(stack, scom->block, scom->offset);
+	val = npu2_scom_read(npu->chip_id, npu->xscom_base,
+			reg, NPU2_MISC_DA_LEN_8B);
+
+	prlog(PR_ERR, "NPU[%d] STCK%d.%s 0x%llx = 0x%016llx\n",
+		npu->chip_id, stack - 4, scom->name, reg, val);
+}
+
+/* same as above, but for direct access registers */
+static void print_one_reg(int chip_id, int brick_index,
+			uint64_t reg_addr, const char *reg_name)
+{
+	uint64_t val;
+
+	xscom_read(chip_id, reg_addr, &val);
+	prlog(PR_ERR, "NPU[%d] %s brick %d 0x%llx = 0x%016llx\n",
+		chip_id, reg_name, brick_index, reg_addr, val);
+}
+
+static void show_nvlink_regs(struct npu2 *npu, int brick_index)
+{
+	uint32_t stack, ntl;
+	int i;
+
+	stack = NPU2_STACK_STCK_0 + brick_index / 2;
+	ntl = NPU2_BLOCK_NTL0 + (brick_index % 2) * 2;
+
+	for (i = 0; i < ARRAY_SIZE(npu2_scom_dump_nvlink); i++) {
+		if (npu2_scom_dump_nvlink[i].block == ntl)
+			print_one_npu_reg(npu, &npu2_scom_dump_nvlink[i], stack);
+	}
+}
+
+static void show_opencapi_regs(struct npu2 *npu, int brick_index)
+{
+	uint32_t stack, otl;
+	int i;
+
+	stack = NPU2_STACK_STCK_0 + brick_index / 2;
+	otl = NPU2_BLOCK_OTL0 + (brick_index % 2);
+
+	/* NPU registers */
+	for (i = 0; i < ARRAY_SIZE(npu2_scom_dump_ocapi); i++) {
+		if (npu2_scom_dump_ocapi[i].block == otl)
+			print_one_npu_reg(npu, &npu2_scom_dump_ocapi[i], stack);
+	}
+
+	/* Fabric registers */
+	print_one_reg(npu->chip_id, brick_index,
+		OB_ODL_STATUS(brick_index), "ODL status");
+	print_one_reg(npu->chip_id, brick_index,
+		OB_ODL_TRAINING_STATUS(brick_index), "ODL training status");
+	print_one_reg(npu->chip_id, brick_index,
+		OB_ODL_ENDPOINT_INFO(brick_index), "ODL endpoint info");
+}
+
+static void show_all_regs(struct npu2 *npu, int brick_index)
+{
+	int i, stack, stack_min, stack_max;
+	uint64_t fir_val, mask_val, fir_addr, mask_addr;
+	struct npu2_dev *dev;
+	npu2_scom_dump_t scom_reg;
+
+	if (brick_index != -1) {
+		stack_min = stack_max = NPU2_STACK_STCK_0 + brick_index / 2;
+	} else {
+		stack_min = NPU2_STACK_STCK_0;
+		stack_max = NPU2_STACK_STCK_2;
+		/* Avoid dumping unused stacks for opencapi on Lagrange */
+		if (npu->total_devices == 2)
+			stack_min = stack_max = NPU2_STACK_STCK_1;
+	}
+
+	/* NPU FIRs */
+	for (i = 0; i < NPU2_TOTAL_FIR_REGISTERS; i++) {
+		fir_addr  = NPU2_FIR_REGISTER_0 + i * NPU2_FIR_OFFSET;
+		mask_addr = fir_addr + NPU2_FIR_MASK_OFFSET;
+		xscom_read(npu->chip_id, fir_addr, &fir_val);
+		xscom_read(npu->chip_id, mask_addr, &mask_val);
+		prlog(PR_ERR, "NPU[%d] FIR%d = 0x%016llx (mask 0x%016llx => 0x%016llx)\n",
+			npu->chip_id, i, fir_val, mask_val, fir_val & ~mask_val);
+	}
+
+	/* NPU global, per-stack registers */
+	for (i = 0; i < ARRAY_SIZE(npu2_scom_dump_global); i++) {
+		for (stack = stack_min; stack <= stack_max; stack++)
+			print_one_npu_reg(npu, &npu2_scom_dump_global[i], stack);
+	}
+
+	/*
+	 * NPU global registers, stack independent
+	 * We have only one for now, so dump it directly
+	 */
+	scom_reg.name = "XTS.REG.ERR_HOLD";
+	scom_reg.block = NPU2_BLOCK_XTS;
+	scom_reg.offset = 0;
+	print_one_npu_reg(npu, &scom_reg, NPU2_STACK_MISC);
+
+	/* nvlink- or opencapi-specific registers */
+	for (i = 0; i < npu->total_devices; i++) {
+		dev = &npu->devices[i];
+		if (brick_index == -1 || dev->brick_index == brick_index) {
+			if (dev->type == NPU2_DEV_TYPE_NVLINK)
+				show_nvlink_regs(npu, dev->brick_index);
+			else if (dev->type == NPU2_DEV_TYPE_OPENCAPI)
+				show_opencapi_regs(npu, dev->brick_index);
+		}
+	}
+}
+
+void npu2_dump_scoms(int chip_id)
+{
+	struct npu2 *npu;
+	struct phb *phb;
+	struct npu2_dev *dev;
+
+	/*
+	 * Look for the npu2 structure for that chip ID. We can access it
+	 * through the array of phbs, looking for a nvlink or opencapi
+	 * phb. We can have several entries, but they all point
+	 * to the same npu2 structure
+	 */
+	for_each_phb(phb) {
+		npu = NULL;
+		if (phb->phb_type == phb_type_npu_v2) {
+			npu = phb_to_npu2_nvlink(phb);
+		} else if (phb->phb_type == phb_type_npu_v2_opencapi) {
+			dev = phb_to_npu2_dev_ocapi(phb);
+			npu = dev->npu;
+		}
+		if (npu && npu->chip_id == chip_id) {
+			show_all_regs(npu, -1 /* all bricks */);
+			break;
+		}
+	}
+}
+
 static uint64_t npu2_ipi_attributes(struct irq_source *is __unused, uint32_t isn __unused)
 {
 	struct npu2 *p = is->data;
@@ -182,6 +415,7 @@ static void npu2_err_interrupt(struct irq_source *is, uint32_t isn)
 		brick = 2 + ((idx - 27) % 4);
 		prlog(PR_ERR, "NPU[%d] error interrupt for brick %d\n",
 			p->chip_id, brick);
+		show_all_regs(p, brick);
 		opal_update_pending_evt(OPAL_EVENT_PCI_ERROR,
 					OPAL_EVENT_PCI_ERROR);
 		break;
diff --git a/include/npu2-regs.h b/include/npu2-regs.h
index 939a23f5..ba10b8ea 100644
--- a/include/npu2-regs.h
+++ b/include/npu2-regs.h
@@ -203,6 +203,8 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
 #define NPU2_PERF_MASK				0x110
 #define NPU2_DBG0_CFG				0x118
 #define NPU2_DBG1_CFG				0x120
+#define NPU2_C_ERR_RPT_MSG5			0x128
+#define NPU2_C_ERR_RPT_MSG6			0x130
 
 /* CTL block registers */
 #define NPU2_CQ_CTL_MISC_CFG			0x000
@@ -295,10 +297,12 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
 #define NPU2_NTL_MISC_CFG3(ndev)		NPU2_NTL_REG_OFFSET(ndev, 0x008)
 #define NPU2_NTL_ERR_HOLD1(ndev)		NPU2_NTL_REG_OFFSET(ndev, 0x010)
 #define NPU2_NTL_ERR_MASK1(ndev)		NPU2_NTL_REG_OFFSET(ndev, 0x018)
+#define NPU2_NTL_ERR_FIRST1_OFF			0x020
 #define NPU2_NTL_ERR_FIRST1(ndev)		NPU2_NTL_REG_OFFSET(ndev, 0x020)
 #define NPU2_NTL_ERR_FIRST1_MASK(ndev)		NPU2_NTL_REG_OFFSET(ndev, 0x028)
 #define NPU2_NTL_ERR_HOLD2(ndev)		NPU2_NTL_REG_OFFSET(ndev, 0x030)
 #define NPU2_NTL_ERR_MASK2(ndev)		NPU2_NTL_REG_OFFSET(ndev, 0x038)
+#define NPU2_NTL_ERR_FIRST2_OFF			0x040
 #define NPU2_NTL_ERR_FIRST2(ndev)		NPU2_NTL_REG_OFFSET(ndev, 0x040)
 #define NPU2_NTL_ERR_FIRST2_MASK(ndev)		NPU2_NTL_REG_OFFSET(ndev, 0x048)
 #define NPU2_NTL_SCRATCH2(ndev)			NPU2_NTL_REG_OFFSET(ndev, 0x050)
@@ -402,6 +406,12 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
 #define NPU2_OTL_OSL_DAR(stack, block)		NPU2_REG_OFFSET(stack, block, 0x008)
 #define NPU2_OTL_OSL_TFC(stack, block)		NPU2_REG_OFFSET(stack, block, 0x010)
 #define NPU2_OTL_OSL_PEHANDLE(stack, block)	NPU2_REG_OFFSET(stack, block, 0x018)
+#define NPU2_OTL_ERR_RPT_HOLD0			0x30
+#define NPU2_OTL_RAS_ERR_MSG0			0x68
+#define NPU2_OTL_RXI_ERR_SIG			0x70
+#define NPU2_OTL_RXO_ERR_SIG			0x78
+#define NPU2_OTL_ERR_RPT_HOLD1			0xB0
+
 
 /* Misc block registers. Unlike the SM/CTL/DAT/NTL registers above
  * there is only a single instance of each of these in the NPU so we
diff --git a/include/npu2.h b/include/npu2.h
index ef4e7aff..d58aab47 100644
--- a/include/npu2.h
+++ b/include/npu2.h
@@ -248,4 +248,5 @@ int64_t npu2_freeze_status(struct phb *phb __unused,
 			   uint8_t *freeze_state,
 			   uint16_t *pci_error_type __unused,
 			   uint16_t *severity __unused);
+void npu2_dump_scoms(int chip_id);
 #endif /* __NPU2_H */
-- 
2.19.1



More information about the Skiboot mailing list