[Skiboot] [RFC 05/12] npu2-opencapi: Tweak fundamental reset sequence

Frederic Barrat fbarrat at linux.ibm.com
Wed Jun 19 22:45:03 AEST 2019


Modify slightly the ordering of a few steps in our init sequence on
fundamental reset, so that it can be called from the OS, when the link
is already up:

- when the card is reset, the link goes down, so we need to fence the
  brick to prevent errors propagating to the NPU and OS
- since fencing and unfencing don't require any delay, let's also
  fence/unfence during the very first reset at boot. It's useless but
  doesn't hurt and keep the code simpler.
- resetting the PHY must be done a bit later, while fenced and the ODL
  and DLx in reset

Signed-off-by: Frederic Barrat <fbarrat at linux.ibm.com>
---
 hw/npu2-opencapi.c | 48 +++++++++++++++++++++++++---------------------
 include/npu2.h     |  2 --
 2 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
index 5a94c949..f9cb1d26 100644
--- a/hw/npu2-opencapi.c
+++ b/hw/npu2-opencapi.c
@@ -1052,6 +1052,28 @@ static int64_t npu2_opencapi_get_presence_state(struct pci_slot __unused *slot,
 	return OPAL_SUCCESS;
 }
 
+static void fence_brick(struct npu2_dev *dev)
+{
+	OCAPIDBG(dev, "Fencing brick\n");
+	set_fence_control(dev->npu->chip_id, dev->npu->xscom_base,
+			dev->brick_index, 0b11);
+	/* from 13.2.1, Quiesce Fence State */
+	npu2_write(dev->npu, NPU2_MISC_FENCE_STATE,
+		PPC_BIT(dev->brick_index + 6));
+}
+
+static void unfence_brick(struct npu2_dev *dev)
+{
+	OCAPIDBG(dev, "Unfencing brick\n");
+	npu2_write(dev->npu, NPU2_MISC_FENCE_STATE,
+		PPC_BIT(dev->brick_index));
+
+	set_fence_control(dev->npu->chip_id, dev->npu->xscom_base,
+			dev->brick_index, 0b10);
+	set_fence_control(dev->npu->chip_id, dev->npu->xscom_base,
+			dev->brick_index, 0b00);
+}
+
 static enum OpalShpcLinkState get_link_width(uint64_t odl_status)
 {
 	uint64_t tx_lanes, rx_lanes, state;
@@ -1166,7 +1188,7 @@ static int64_t npu2_opencapi_poll_link(struct pci_slot *slot)
 	return OPAL_HARDWARE;
 }
 
-static int64_t npu2_opencapi_creset(struct pci_slot *slot __unused)
+static int64_t npu2_opencapi_creset(struct pci_slot *slot)
 {
 	struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb);
 
@@ -1196,19 +1218,10 @@ static int64_t npu2_opencapi_freset(struct pci_slot *slot)
 			OCAPIINF(dev, "no card detected\n");
 			return OPAL_SUCCESS;
 		}
-		if (dev->train_need_fence) {
-			OCAPIDBG(dev, "Fencing OTL during reset\n");
-			set_fence_control(chip_id, dev->npu->xscom_base,
-					dev->brick_index, 0b11);
-			npu2_write(dev->npu, NPU2_MISC_FENCE_STATE,
-				PPC_BIT(dev->brick_index + 6));
-			dev->train_fenced = true;
-		}
-		dev->train_need_fence = true;
 		slot->link_retries = OCAPI_LINK_TRAINING_RETRIES;
-		npu2_opencapi_phy_reset(dev);
 		/* fall-through */
 	case OCAPI_SLOT_FRESET_INIT:
+		fence_brick(dev);
 		assert_odl_reset(chip_id, dev->brick_index);
 		assert_adapter_reset(dev);
 		pci_slot_set_state(slot,
@@ -1217,6 +1230,7 @@ static int64_t npu2_opencapi_freset(struct pci_slot *slot)
 		return pci_slot_set_sm_timeout(slot, msecs_to_tb(5));
 
 	case OCAPI_SLOT_FRESET_ASSERT_DELAY:
+		npu2_opencapi_phy_reset(dev);
 		deassert_odl_reset(chip_id, dev->brick_index);
 		pci_slot_set_state(slot,
 				OCAPI_SLOT_FRESET_DEASSERT_DELAY);
@@ -1234,15 +1248,7 @@ static int64_t npu2_opencapi_freset(struct pci_slot *slot)
 		return pci_slot_set_sm_timeout(slot, msecs_to_tb(250));
 
 	case OCAPI_SLOT_FRESET_DEASSERT_DELAY2:
-		if (dev->train_fenced) {
-			OCAPIDBG(dev, "Unfencing OTL after reset\n");
-			npu2_write(dev->npu, NPU2_MISC_FENCE_STATE,
-				   PPC_BIT(dev->brick_index));
-			set_fence_control(chip_id, dev->npu->xscom_base,
-					  dev->brick_index, 0b00);
-			dev->train_fenced = false;
-		}
-
+		unfence_brick(dev);
 		set_init_pattern(chip_id, dev);
 		pci_slot_set_state(slot,
 				OCAPI_SLOT_FRESET_INIT_DELAY);
@@ -1705,8 +1711,6 @@ static void setup_device(struct npu2_dev *dev)
 
 	dev->bdfn = 0;
 	dev->linux_pe = -1;
-	dev->train_need_fence = false;
-	dev->train_fenced = false;
 
 	/* TODO: Procedure 13.1.3.7 - AFU Memory Range BARs */
 	/* Procedure 13.1.3.8 - AFU MMIO Range BARs */
diff --git a/include/npu2.h b/include/npu2.h
index 57a9cc96..6aead6cb 100644
--- a/include/npu2.h
+++ b/include/npu2.h
@@ -158,8 +158,6 @@ struct npu2_dev {
 	/* OpenCAPI */
 	struct phb		phb_ocapi;
 	uint64_t		linux_pe;
-	bool			train_need_fence;
-	bool			train_fenced;
 	unsigned long		train_start;
 	unsigned long		train_timeout;
 };
-- 
2.21.0



More information about the Skiboot mailing list