[Skiboot] [PATCH 1/3] npu2-opencapi: Log extra information on link training failure
Frederic Barrat
fbarrat at linux.ibm.com
Fri Nov 23 19:54:37 AEDT 2018
Log the link training status register in case of failure to train.
It can have useful information for the hardware team.
Signed-off-by: Frederic Barrat <fbarrat at linux.ibm.com>
---
hw/npu2-opencapi.c | 37 ++++++++++++++++++++++++++++++++++---
1 file changed, 34 insertions(+), 3 deletions(-)
diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
index 8c4f3462..c771fae8 100644
--- a/hw/npu2-opencapi.c
+++ b/hw/npu2-opencapi.c
@@ -150,6 +150,30 @@ static uint64_t get_odl_status(uint32_t gcid, uint64_t index) {
return reg;
}
+static uint64_t get_odl_training_status(uint32_t gcid, uint64_t index)
+{
+ uint64_t status_xscom, reg;
+
+ switch (index) {
+ case 2:
+ status_xscom = OB0_ODL0_TRAINING_STATUS;
+ break;
+ case 3:
+ status_xscom = OB0_ODL1_TRAINING_STATUS;
+ break;
+ case 4:
+ status_xscom = OB3_ODL1_TRAINING_STATUS;
+ break;
+ case 5:
+ status_xscom = OB3_ODL0_TRAINING_STATUS;
+ break;
+ default:
+ assert(false);
+ }
+ xscom_read(gcid, status_xscom, ®);
+ return reg;
+}
+
static void disable_nvlink(uint32_t gcid, int index)
{
uint64_t phy_config_scom, reg;
@@ -1017,7 +1041,8 @@ static int64_t npu2_opencapi_get_link_state(struct pci_slot *slot, uint8_t *val)
return rc;
}
-static int64_t npu2_opencapi_retry_state(struct pci_slot *slot)
+static int64_t npu2_opencapi_retry_state(struct pci_slot *slot,
+ uint64_t odl_status)
{
struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb);
uint32_t chip_id = dev->npu->chip_id;
@@ -1031,11 +1056,17 @@ static int64_t npu2_opencapi_retry_state(struct pci_slot *slot)
*/
OCAPIERR(dev,
"Link failed to train, final link status: %016llx\n",
- get_odl_status(chip_id, dev->brick_index));
+ odl_status);
+ OCAPIDBG(dev, "Final link training status: %016llx\n",
+ get_odl_training_status(chip_id, dev->brick_index));
return OPAL_HARDWARE;
}
OCAPIERR(dev, "Link failed to train, retrying\n");
+ OCAPIDBG(dev, "Link status: %016llx, training status: %016llx\n",
+ odl_status,
+ get_odl_training_status(chip_id, dev->brick_index));
+
pci_slot_set_state(slot, OCAPI_SLOT_FRESET_INIT);
return pci_slot_set_sm_timeout(slot, msecs_to_tb(1));
}
@@ -1062,7 +1093,7 @@ static int64_t npu2_opencapi_poll_link(struct pci_slot *slot)
return pci_slot_set_sm_timeout(slot, msecs_to_tb(1));
}
if (slot->retries-- == 0)
- return npu2_opencapi_retry_state(slot);
+ return npu2_opencapi_retry_state(slot, reg);
return pci_slot_set_sm_timeout(slot, msecs_to_tb(1));
--
2.19.1
More information about the Skiboot
mailing list