[Skiboot] [PATCH 1/3] npu2-opencapi: Log extra information on link training failure

Andrew Donnellan andrew.donnellan at au1.ibm.com
Mon Nov 26 10:25:12 AEDT 2018


On 23/11/18 7:54 pm, Frederic Barrat wrote:
> Log the link training status register in case of failure to train.
> It can have useful information for the hardware team.
> 
> Signed-off-by: Frederic Barrat <fbarrat at linux.ibm.com>

Reviewed-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>

> ---
>   hw/npu2-opencapi.c | 37 ++++++++++++++++++++++++++++++++++---
>   1 file changed, 34 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
> index 8c4f3462..c771fae8 100644
> --- a/hw/npu2-opencapi.c
> +++ b/hw/npu2-opencapi.c
> @@ -150,6 +150,30 @@ static uint64_t get_odl_status(uint32_t gcid, uint64_t index) {
>   	return reg;
>   }
>   
> +static uint64_t get_odl_training_status(uint32_t gcid, uint64_t index)
> +{
> +	uint64_t status_xscom, reg;
> +
> +	switch (index) {
> +	case 2:
> +		status_xscom = OB0_ODL0_TRAINING_STATUS;
> +		break;
> +	case 3:
> +		status_xscom = OB0_ODL1_TRAINING_STATUS;
> +		break;
> +	case 4:
> +		status_xscom = OB3_ODL1_TRAINING_STATUS;
> +		break;
> +	case 5:
> +		status_xscom = OB3_ODL0_TRAINING_STATUS;
> +		break;
> +	default:
> +		assert(false);
> +	}
> +	xscom_read(gcid, status_xscom, &reg);
> +	return reg;
> +}
> +
>   static void disable_nvlink(uint32_t gcid, int index)
>   {
>   	uint64_t phy_config_scom, reg;
> @@ -1017,7 +1041,8 @@ static int64_t npu2_opencapi_get_link_state(struct pci_slot *slot, uint8_t *val)
>   	return rc;
>   }
>   
> -static int64_t npu2_opencapi_retry_state(struct pci_slot *slot)
> +static int64_t npu2_opencapi_retry_state(struct pci_slot *slot,
> +					 uint64_t odl_status)
>   {
>   	struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb);
>   	uint32_t chip_id = dev->npu->chip_id;
> @@ -1031,11 +1056,17 @@ static int64_t npu2_opencapi_retry_state(struct pci_slot *slot)
>   		 */
>   		OCAPIERR(dev,
>   			"Link failed to train, final link status: %016llx\n",
> -			get_odl_status(chip_id, dev->brick_index));
> +			odl_status);
> +		OCAPIDBG(dev, "Final link training status: %016llx\n",
> +			get_odl_training_status(chip_id, dev->brick_index));
>   		return OPAL_HARDWARE;
>   	}
>   
>   	OCAPIERR(dev, "Link failed to train, retrying\n");
> +	OCAPIDBG(dev, "Link status: %016llx, training status: %016llx\n",
> +		odl_status,
> +		get_odl_training_status(chip_id, dev->brick_index));
> +
>   	pci_slot_set_state(slot, OCAPI_SLOT_FRESET_INIT);
>   	return pci_slot_set_sm_timeout(slot, msecs_to_tb(1));
>   }
> @@ -1062,7 +1093,7 @@ static int64_t npu2_opencapi_poll_link(struct pci_slot *slot)
>   			return pci_slot_set_sm_timeout(slot, msecs_to_tb(1));
>   		}
>   		if (slot->retries-- == 0)
> -			return npu2_opencapi_retry_state(slot);
> +			return npu2_opencapi_retry_state(slot, reg);
>   
>   		return pci_slot_set_sm_timeout(slot, msecs_to_tb(1));
>   
> 

-- 
Andrew Donnellan              OzLabs, ADL Canberra
andrew.donnellan at au1.ibm.com  IBM Australia Limited



More information about the Skiboot mailing list