[Skiboot] [PATCH skiboot v2] npu: Implement FLR

Alistair Popple alistair at popple.id.au
Mon Apr 10 15:19:56 AEST 2017


Alexey,

One very minor nitpick below but looks good. Thanks!

Acked-by: Alistair Popple <alistair at popple.id.au>

On Fri, 7 Apr 2017 02:11:02 PM Alexey Kardashevskiy wrote:
> As the comment in npu_dev_populate_pcie_cap() says,
> "We should support FLR" and the NPU device advertises its
> support. However, when the kernel issues FLR, skiboot does
> nothing which leaves NPU in a state which does not allow
> to use NV links again after GPU was reset.
>
> This adds basic handling of FLR (function level reset).
>
> This does not update hreset/freset handlers as they are not going to be
> called under any circumstance - EEH is not supported for NPU and
> the kernel won't issue OPAL reset otherwise.
>
> Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
> ---
> Changes:
> v2:
> * added npu_dev_procedure_reset() to keep procedure_xxxx handling
> in one file
> * added check that device control register access is 2 bytes long
> and aligned
> * fixed typo
>
> ---
> With this fix, the bandwidthTest reports PINNED Memory Transfers
> >= 20000MB/s, without it only first guest boot does report this,
> all consequent guests only report 6000MB/s maximum.
> ---
>  include/npu.h          |  1 +
>  hw/npu-hw-procedures.c |  8 ++++++++
>  hw/npu.c               | 29 ++++++++++++++++++++++++++++-
>  3 files changed, 37 insertions(+), 1 deletion(-)
>
> diff --git a/include/npu.h b/include/npu.h
> index 77beca37..a76acdfc 100644
> --- a/include/npu.h
> +++ b/include/npu.h
> @@ -167,6 +167,7 @@ int64_t npu_dev_procedure(void *dev, struct pci_cfg_reg_filter *pcrf,
>  			  bool write);
>
>  void npu_set_fence_state(struct npu *p, bool fence);
> +void npu_dev_procedure_reset(struct npu_dev *dev);
>
>  #define NPUDBG(p, fmt, a...)	prlog(PR_DEBUG, "NPU%d: " fmt, \
>  				      (p)->phb.opal_id, ##a)
> diff --git a/hw/npu-hw-procedures.c b/hw/npu-hw-procedures.c
> index 85f09a11..cabc9c99 100644
> --- a/hw/npu-hw-procedures.c
> +++ b/hw/npu-hw-procedures.c
> @@ -606,3 +606,11 @@ int64_t npu_dev_procedure(void *dev, struct pci_cfg_reg_filter *pcrf,
>
>  	return npu_dev_procedure_read(ndev, offset - pcrf->start, len, data);
>  }
> +
> +void npu_dev_procedure_reset(struct npu_dev *dev)
> +{
> +	dev->procedure_status = 0;
> +	dev->procedure_number = 0;
> +	dev->procedure_step = 0;
> +	dev->procedure_data = 0;
> +}
> diff --git a/hw/npu.c b/hw/npu.c
> index 302a5518..0ad86a8c 100644
> --- a/hw/npu.c
> +++ b/hw/npu.c
> @@ -271,6 +271,29 @@ static int64_t npu_dev_cfg_bar(void *dev, struct pci_cfg_reg_filter *pcrf,
>  	return npu_dev_cfg_bar_read(ndev, pcrf, offset, len, data);
>  }
>
> +static int64_t npu_dev_cfg_exp_devcap(void *dev,
> +		struct pci_cfg_reg_filter *pcrf __unused,
> +		uint32_t offset __unused, uint32_t size __unused,

These are actually used in the check below so we can drop the __unused
flags.

> +		uint32_t *data, bool write)
> +{
> +	struct pci_virt_device *pvd = dev;
> +	struct npu_dev *ndev = pvd->data;
> +
> +	assert(write);
> +
> +	if ((size != 2) || (offset & 1)) {
> +		/* Short config writes are not supported */
> +		prlog(PR_ERR, "NPU%d: Unsupported write to pcie control register\n",
> +		      ndev->phb->opal_id);
> +		return OPAL_PARAMETER;
> +	}
> +
> +	if (*data & PCICAP_EXP_DEVCTL_FUNC_RESET)
> +		npu_dev_procedure_reset(ndev);
> +
> +	return OPAL_PARTIAL;
> +}
> +
>  static struct npu_dev *bdfn_to_npu_dev(struct npu *p, uint32_t bdfn)
>  {
>  	struct pci_virt_device *pvd;
> @@ -1187,7 +1210,7 @@ static void npu_dev_populate_pcie_cap(struct npu_dev_cap *cap)
>
>  	/* 0x04 - Device capability
>  	 *
> -	 * We should support FLR. Oterwhsie, it might have
> +	 * We should support FLR. Otherwise, it might have
>  	 * problem passing it through to userland via Linux
>  	 * VFIO infrastructure
>  	 */
> @@ -1198,6 +1221,10 @@ static void npu_dev_populate_pcie_cap(struct npu_dev_cap *cap)
>  	       (PCICAP_EXP_DEVCAP_FUNC_RESET));
>  	PCI_VIRT_CFG_INIT_RO(pvd, base + PCICAP_EXP_DEVCAP, 4, val);
>
> +	pci_virt_add_filter(pvd, base + PCICAP_EXP_DEVCTL, 2,
> +			    PCI_REG_FLAG_WRITE,
> +			    npu_dev_cfg_exp_devcap, NULL);
> +
>  	/* 0x08 - Device control and status */
>  	PCI_VIRT_CFG_INIT(pvd, base + PCICAP_EXP_DEVCTL, 4, 0x00002810,
>  			 0xffff0000, 0x000f0000);
>



More information about the Skiboot mailing list