[Skiboot] [PATCH V2 3/4] nvlink: Add freeze and fence error injection
Russell Currey
ruscur at russell.cc
Mon Jan 18 16:59:41 AEDT 2016
Enable NPU freeze and fence injection through debugfs.
For example, if a NPU is PCI bus 8, a freeze on PE 1 can be injected with:
echo 1:0:0:0:0 >> /sys/kernel/debug/powerpc/PCI0008/err_injct
or a fence on PE 2 on PCI bus 9 with:
echo 2:1:0:0:0 >> /sys/kernel/debug/powerpc/PCI0009/err_injct
These will cause the appropriate EEH event to occur upon a DMA to the
NVLink.
PE number was added to the npu_dev struct to enable this.
Signed-off-by: Russell Currey <ruscur at russell.cc>
---
V2: Return OPAL_PARAMETER on failures and find devices based on pe_num
---
hw/npu.c | 44 +++++++++++++++++++++++++++++++++++++++++++-
include/npu.h | 2 ++
2 files changed, 45 insertions(+), 1 deletion(-)
diff --git a/hw/npu.c b/hw/npu.c
index 3c2c0b8..23facaf 100644
--- a/hw/npu.c
+++ b/hw/npu.c
@@ -954,6 +954,7 @@ static int64_t npu_set_pe(struct phb *phb,
return OPAL_PARAMETER;
link_idx = dev->index;
+ dev->pe_num = pe_num;
/* Separate links will be mapped to different PEs */
if (bcompare != OpalPciBusAll ||
@@ -1020,6 +1021,47 @@ static int64_t npu_freeze_status(struct phb *phb,
return OPAL_SUCCESS;
}
+/* Sets the NPU to trigger an error when a DMA occurs */
+static int64_t npu_err_inject(struct phb *phb, uint32_t pe_num,
+ uint32_t type, uint32_t func __unused,
+ uint64_t addr __unused, uint64_t mask __unused)
+{
+ struct npu *p = phb_to_npu(phb);
+ struct npu_dev *dev = NULL;
+ int i;
+
+ if (pe_num > NPU_NUM_OF_PES) {
+ prlog(PR_ERR, "NPU: error injection failed, bad PE given\n");
+ return OPAL_PARAMETER;
+ }
+
+ for (i = 0; i < p->total_devices; i++) {
+ if (p->devices[i].pe_num == pe_num) {
+ dev = &p->devices[i];
+ break;
+ }
+ }
+
+ if (!dev) {
+ prlog(PR_ERR, "NPU: couldn't find device with PE %x\n", pe_num);
+ return OPAL_PARAMETER;
+ }
+
+ /* TODO: extend this to conform to OPAL injection standards */
+ if (type > 1) {
+ prlog(PR_ERR, "NPU: invalid error injection type\n");
+ return OPAL_PARAMETER;
+ } else if (type == 1) {
+ /* Emulate fence mode. */
+ p->fenced = true;
+ } else {
+ /* Cause a freeze with an invalid MMIO write. */
+ in_be64((void *)dev->bar.base);
+ }
+
+ return OPAL_SUCCESS;
+}
+
static const struct phb_ops npu_ops = {
.lock = npu_lock,
.unlock = npu_unlock,
@@ -1059,7 +1101,7 @@ static const struct phb_ops npu_ops = {
.eeh_freeze_clear = NULL,
.eeh_freeze_set = NULL,
.next_error = NULL,
- .err_inject = NULL,
+ .err_inject = npu_err_inject,
.get_diag_data = NULL,
.get_diag_data2 = NULL,
.set_capi_mode = NULL,
diff --git a/include/npu.h b/include/npu.h
index 5d5135b..389b732 100644
--- a/include/npu.h
+++ b/include/npu.h
@@ -144,6 +144,8 @@ struct npu_dev {
unsigned long procedure_tb;
uint32_t procedure_status;
+
+ uint8_t pe_num;
};
/* NPU PHB descriptor */
--
2.7.0
More information about the Skiboot
mailing list