[Skiboot] [PATCH] npu2-opencapi: Don't send commands to NPU when link is down

Frederic Barrat fbarrat at linux.ibm.com
Mon Jul 16 19:13:31 AEST 2018


Even if the link is down, the PCI scan framework always try to access
the root device ("scan upstream only"). For opencapi, there's no root
device, so we may try to issue a config operation when the link is
down. The operation fails, but it raises a FIR bit and can trigger an
HMI.

To fix it, we now keep track of the link state and avoid sending any
configuration space operations to the NPU if we know the link is not
ready.

Signed-off-by: Frederic Barrat <fbarrat at linux.ibm.com>
---
 hw/npu2-opencapi.c | 10 +++++++++-
 include/npu2.h     |  1 +
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
index f82e6562..474ac0fb 100644
--- a/hw/npu2-opencapi.c
+++ b/hw/npu2-opencapi.c
@@ -1107,6 +1107,7 @@ static int64_t npu2_opencapi_poll_link(struct pci_slot *slot)
 		return pci_slot_set_sm_timeout(slot, msecs_to_tb(1));
 
 	case OCAPI_SLOT_LINK_TRAINED:
+		dev->link_down = false;
 		otl_enabletx(chip_id, dev->npu->xscom_base, dev);
 		pci_slot_set_state(slot, OCAPI_SLOT_NORMAL);
 		return OPAL_SUCCESS;
@@ -1137,7 +1138,7 @@ static int64_t npu2_opencapi_freset(struct pci_slot *slot)
 	case OCAPI_SLOT_NORMAL:
 	case OCAPI_SLOT_FRESET_START:
 		OCAPIDBG(dev, "FRESET starts\n");
-
+		dev->link_down = true;
 		if (slot->ops.get_presence_state)
 			slot->ops.get_presence_state(slot, &presence);
 		if (!presence) {
@@ -1261,6 +1262,9 @@ static int64_t npu2_opencapi_pcicfg_read(struct phb *phb, uint32_t bdfn,
 	if (rc)
 		return rc;
 
+	if (dev->link_down)
+		return OPAL_HARDWARE;
+
 	genid_base = dev->bars[1].npu2_bar.base +
 		(index_to_block(dev->index) == NPU2_BLOCK_OTL1 ? 256 : 0);
 
@@ -1319,6 +1323,9 @@ static int64_t npu2_opencapi_pcicfg_write(struct phb *phb, uint32_t bdfn,
 	if (rc)
 		return rc;
 
+	if (dev->link_down)
+		return OPAL_HARDWARE;
+
 	genid_base = dev->bars[1].npu2_bar.base +
 		(index_to_block(dev->index) == NPU2_BLOCK_OTL1 ? 256 : 0);
 
@@ -1637,6 +1644,7 @@ static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
 	dev->bdfn = 0;
 	dev->train_need_fence = false;
 	dev->train_fenced = false;
+	dev->link_down = true;
 	n->total_devices++;
 
 	/* Find I2C port for handling device reset */
diff --git a/include/npu2.h b/include/npu2.h
index 4c2e20e0..c151f0e3 100644
--- a/include/npu2.h
+++ b/include/npu2.h
@@ -138,6 +138,7 @@ struct npu2_dev {
 	uint64_t		i2c_port_id_ocapi;
 	bool			train_need_fence;
 	bool			train_fenced;
+	bool			link_down;
 };
 
 struct npu2 {
-- 
2.17.1



More information about the Skiboot mailing list