[Skiboot] [PATCH V2 3/4] nvlink: Add freeze and fence error injection

Daniel Axtens dja at axtens.net
Tue Jan 26 08:39:42 AEDT 2016


Russell Currey <ruscur at russell.cc> writes:

> Enable NPU freeze and fence injection through debugfs.
>
> For example, if a NPU is PCI bus 8, a freeze on PE 1 can be injected with:
>
> echo 1:0:0:0:0 >> /sys/kernel/debug/powerpc/PCI0008/err_injct
>
> or a fence on PE 2 on PCI bus 9 with:
>
> echo 2:1:0:0:0 >> /sys/kernel/debug/powerpc/PCI0009/err_injct
>

Cool - I've never figured out how to work err_injct before.

Would it be possible to document this somewhere more
permanent/accessible than an email? Do we have a kernel driver? Or info
about NPU in Skiboot?

Regards,
Daniel

> These will cause the appropriate EEH event to occur upon a DMA to the
> NVLink.
>
> PE number was added to the npu_dev struct to enable this.
>
> Signed-off-by: Russell Currey <ruscur at russell.cc>
> ---
> V2: Return OPAL_PARAMETER on failures and find devices based on pe_num
> ---
>  hw/npu.c      | 44 +++++++++++++++++++++++++++++++++++++++++++-
>  include/npu.h |  2 ++
>  2 files changed, 45 insertions(+), 1 deletion(-)
>
> diff --git a/hw/npu.c b/hw/npu.c
> index 3c2c0b8..23facaf 100644
> --- a/hw/npu.c
> +++ b/hw/npu.c
> @@ -954,6 +954,7 @@ static int64_t npu_set_pe(struct phb *phb,
>  		return OPAL_PARAMETER;
>  
>  	link_idx = dev->index;
> +	dev->pe_num = pe_num;
>  
>  	/* Separate links will be mapped to different PEs */
>  	if (bcompare != OpalPciBusAll ||
> @@ -1020,6 +1021,47 @@ static int64_t npu_freeze_status(struct phb *phb,
>  	return OPAL_SUCCESS;
>  }
>  
> +/* Sets the NPU to trigger an error when a DMA occurs */
> +static int64_t npu_err_inject(struct phb *phb, uint32_t pe_num,
> +			      uint32_t type, uint32_t func __unused,
> +			      uint64_t addr __unused, uint64_t mask __unused)
> +{
> +	struct npu *p = phb_to_npu(phb);
> +	struct npu_dev *dev = NULL;
> +	int i;
> +
> +	if (pe_num > NPU_NUM_OF_PES) {
> +		prlog(PR_ERR, "NPU: error injection failed, bad PE given\n");
> +		return OPAL_PARAMETER;
> +	}
> +
> +	for (i = 0; i < p->total_devices; i++) {
> +		if (p->devices[i].pe_num == pe_num) {
> +			dev = &p->devices[i];
> +			break;
> +		}
> +	}
> +
> +	if (!dev) {
> +		prlog(PR_ERR, "NPU: couldn't find device with PE %x\n", pe_num);
> +		return OPAL_PARAMETER;
> +	}
> +
> +	/* TODO: extend this to conform to OPAL injection standards */
> +	if (type > 1) {
> +		prlog(PR_ERR, "NPU: invalid error injection type\n");
> +		return OPAL_PARAMETER;
> +	} else if (type == 1) {
> +		/* Emulate fence mode. */
> +		p->fenced = true;
> +	} else {
> +		/* Cause a freeze with an invalid MMIO write. */
> +		in_be64((void *)dev->bar.base);
> +	}
> +
> +	return OPAL_SUCCESS;
> +}
> +
>  static const struct phb_ops npu_ops = {
>  	.lock			= npu_lock,
>  	.unlock			= npu_unlock,
> @@ -1059,7 +1101,7 @@ static const struct phb_ops npu_ops = {
>  	.eeh_freeze_clear	= NULL,
>  	.eeh_freeze_set		= NULL,
>  	.next_error		= NULL,
> -	.err_inject		= NULL,
> +	.err_inject		= npu_err_inject,
>  	.get_diag_data		= NULL,
>  	.get_diag_data2		= NULL,
>  	.set_capi_mode		= NULL,
> diff --git a/include/npu.h b/include/npu.h
> index 5d5135b..389b732 100644
> --- a/include/npu.h
> +++ b/include/npu.h
> @@ -144,6 +144,8 @@ struct npu_dev {
>  	unsigned long		procedure_tb;
>  
>  	uint32_t		procedure_status;
> +
> +	uint8_t			pe_num;
>  };
>  
>  /* NPU PHB descriptor */
> -- 
> 2.7.0
>
> _______________________________________________
> Skiboot mailing list
> Skiboot at lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/skiboot
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 859 bytes
Desc: not available
URL: <http://lists.ozlabs.org/pipermail/skiboot/attachments/20160126/c27d093f/attachment.sig>


More information about the Skiboot mailing list