[Skiboot] [PATCH V2 3/4] nvlink: Add freeze and fence error injection

Russell Currey ruscur at russell.cc
Mon Jan 18 16:59:41 AEDT 2016


Enable NPU freeze and fence injection through debugfs.

For example, if a NPU is PCI bus 8, a freeze on PE 1 can be injected with:

echo 1:0:0:0:0 >> /sys/kernel/debug/powerpc/PCI0008/err_injct

or a fence on PE 2 on PCI bus 9 with:

echo 2:1:0:0:0 >> /sys/kernel/debug/powerpc/PCI0009/err_injct

These will cause the appropriate EEH event to occur upon a DMA to the
NVLink.

PE number was added to the npu_dev struct to enable this.

Signed-off-by: Russell Currey <ruscur at russell.cc>
---
V2: Return OPAL_PARAMETER on failures and find devices based on pe_num
---
 hw/npu.c      | 44 +++++++++++++++++++++++++++++++++++++++++++-
 include/npu.h |  2 ++
 2 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/hw/npu.c b/hw/npu.c
index 3c2c0b8..23facaf 100644
--- a/hw/npu.c
+++ b/hw/npu.c
@@ -954,6 +954,7 @@ static int64_t npu_set_pe(struct phb *phb,
 		return OPAL_PARAMETER;
 
 	link_idx = dev->index;
+	dev->pe_num = pe_num;
 
 	/* Separate links will be mapped to different PEs */
 	if (bcompare != OpalPciBusAll ||
@@ -1020,6 +1021,47 @@ static int64_t npu_freeze_status(struct phb *phb,
 	return OPAL_SUCCESS;
 }
 
+/* Sets the NPU to trigger an error when a DMA occurs */
+static int64_t npu_err_inject(struct phb *phb, uint32_t pe_num,
+			      uint32_t type, uint32_t func __unused,
+			      uint64_t addr __unused, uint64_t mask __unused)
+{
+	struct npu *p = phb_to_npu(phb);
+	struct npu_dev *dev = NULL;
+	int i;
+
+	if (pe_num > NPU_NUM_OF_PES) {
+		prlog(PR_ERR, "NPU: error injection failed, bad PE given\n");
+		return OPAL_PARAMETER;
+	}
+
+	for (i = 0; i < p->total_devices; i++) {
+		if (p->devices[i].pe_num == pe_num) {
+			dev = &p->devices[i];
+			break;
+		}
+	}
+
+	if (!dev) {
+		prlog(PR_ERR, "NPU: couldn't find device with PE %x\n", pe_num);
+		return OPAL_PARAMETER;
+	}
+
+	/* TODO: extend this to conform to OPAL injection standards */
+	if (type > 1) {
+		prlog(PR_ERR, "NPU: invalid error injection type\n");
+		return OPAL_PARAMETER;
+	} else if (type == 1) {
+		/* Emulate fence mode. */
+		p->fenced = true;
+	} else {
+		/* Cause a freeze with an invalid MMIO write. */
+		in_be64((void *)dev->bar.base);
+	}
+
+	return OPAL_SUCCESS;
+}
+
 static const struct phb_ops npu_ops = {
 	.lock			= npu_lock,
 	.unlock			= npu_unlock,
@@ -1059,7 +1101,7 @@ static const struct phb_ops npu_ops = {
 	.eeh_freeze_clear	= NULL,
 	.eeh_freeze_set		= NULL,
 	.next_error		= NULL,
-	.err_inject		= NULL,
+	.err_inject		= npu_err_inject,
 	.get_diag_data		= NULL,
 	.get_diag_data2		= NULL,
 	.set_capi_mode		= NULL,
diff --git a/include/npu.h b/include/npu.h
index 5d5135b..389b732 100644
--- a/include/npu.h
+++ b/include/npu.h
@@ -144,6 +144,8 @@ struct npu_dev {
 	unsigned long		procedure_tb;
 
 	uint32_t		procedure_status;
+
+	uint8_t			pe_num;
 };
 
 /* NPU PHB descriptor */
-- 
2.7.0



More information about the Skiboot mailing list