[Skiboot] [PATCH v2 2/2] npu2: hw-procedures: Add check_credits procedure

Alistair Popple alistair at popple.id.au
Wed Nov 22 13:15:53 AEDT 2017


Acked-by: Alistair Popple <alistair at popple.id.au>

On Tue, 21 Nov 2017 05:42:46 PM Reza Arbab wrote:
> As an immediate mitigator for a current hardware glitch, add a procedure
> that can be used to validate NTL credit values. This will be called as a
> safeguard to check that link training succeeded.
> 
> Assert that things are exactly as we expect, because if they aren't, the
> system will experience a catastrophic failure shortly after the start of
> link traffic.
> 
> Signed-off-by: Reza Arbab <arbab at linux.vnet.ibm.com>
> ---
>  doc/nvlink.rst          |  1 +
>  hw/npu2-hw-procedures.c | 39 ++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 39 insertions(+), 1 deletion(-)
> 
> diff --git a/doc/nvlink.rst b/doc/nvlink.rst
> index f33574a..4c893bb 100644
> --- a/doc/nvlink.rst
> +++ b/doc/nvlink.rst
> @@ -118,6 +118,7 @@ Procedure Control Register
>      10. Naples NPU - RESET
>      11. Naples PHY - PHY preterminate
>      12. Naples PHY - PHY terminated
> +    13. Witherspoon TL credit validation
>  
>     Procedure 5 (TX_ZCAL) should only be run once. System firmware will
>     ensure this so device drivers may call this procedure mutiple
> diff --git a/hw/npu2-hw-procedures.c b/hw/npu2-hw-procedures.c
> index 1db171a..bb8534b 100644
> --- a/hw/npu2-hw-procedures.c
> +++ b/hw/npu2-hw-procedures.c
> @@ -663,6 +663,41 @@ static uint32_t phy_rx_training_wait(struct npu2_dev *ndev)
>  }
>  DEFINE_PROCEDURE(phy_rx_training, phy_rx_training_wait);
>  
> +static uint32_t check_credit(struct npu2_dev *ndev, uint64_t reg,
> +			     const char *reg_name, uint64_t expected)
> +{
> +	uint64_t val;
> +
> +	val = npu2_read(ndev->npu, reg);
> +	if (val == expected)
> +		return 0;
> +
> +	NPU2DEVERR(ndev, "%s: expected 0x%llx, read 0x%llx\n",
> +		   reg_name, expected, val);
> +
> +	return 1;
> +}
> +
> +#define CHECK_CREDIT(ndev, reg, expected) \
> +	check_credit(ndev, reg(ndev), #reg, expected);
> +
> +static uint32_t check_credits(struct npu2_dev *ndev)
> +{
> +	int fail = 0;
> +
> +	fail += CHECK_CREDIT(ndev, NPU2_NTL_CRED_HDR_CREDIT_RX, 0x0BE0BE0000000000ULL);
> +	fail += CHECK_CREDIT(ndev, NPU2_NTL_RSP_HDR_CREDIT_RX, 0x0BE0BE0000000000ULL);
> +	fail += CHECK_CREDIT(ndev, NPU2_NTL_CRED_DATA_CREDIT_RX, 0x1001000000000000ULL);
> +	fail += CHECK_CREDIT(ndev, NPU2_NTL_RSP_DATA_CREDIT_RX, 0x1001000000000000ULL);
> +	fail += CHECK_CREDIT(ndev, NPU2_NTL_DBD_HDR_CREDIT_RX, 0x0640640000000000ULL);
> +	fail += CHECK_CREDIT(ndev, NPU2_NTL_ATSD_HDR_CREDIT_RX, 0x0200200000000000ULL);
> +
> +	assert(!fail);
> +
> +	return PROCEDURE_COMPLETE;
> +}
> +DEFINE_PROCEDURE(check_credits);
> +
>  static struct procedure *npu_procedures[] = {
>  	&procedure_stop,
>  	&procedure_nop,
> @@ -678,7 +713,9 @@ static struct procedure *npu_procedures[] = {
>  
>  	/* Place holders for pre-terminate and terminate procedures */
>  	&procedure_nop,
> -	&procedure_nop};
> +	&procedure_nop,
> +	&procedure_check_credits
> +};
>  
>  /* Run a procedure step(s) and return status */
>  static uint32_t get_procedure_status(struct npu2_dev *dev)
> 



More information about the Skiboot mailing list