[Skiboot] [PATCH skiboot v2] npu: Implement FLR
Alistair Popple
alistair at popple.id.au
Mon Apr 10 15:19:56 AEST 2017
Alexey,
One very minor nitpick below but looks good. Thanks!
Acked-by: Alistair Popple <alistair at popple.id.au>
On Fri, 7 Apr 2017 02:11:02 PM Alexey Kardashevskiy wrote:
> As the comment in npu_dev_populate_pcie_cap() says,
> "We should support FLR" and the NPU device advertises its
> support. However, when the kernel issues FLR, skiboot does
> nothing which leaves NPU in a state which does not allow
> to use NV links again after GPU was reset.
>
> This adds basic handling of FLR (function level reset).
>
> This does not update hreset/freset handlers as they are not going to be
> called under any circumstance - EEH is not supported for NPU and
> the kernel won't issue OPAL reset otherwise.
>
> Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
> ---
> Changes:
> v2:
> * added npu_dev_procedure_reset() to keep procedure_xxxx handling
> in one file
> * added check that device control register access is 2 bytes long
> and aligned
> * fixed typo
>
> ---
> With this fix, the bandwidthTest reports PINNED Memory Transfers
> >= 20000MB/s, without it only first guest boot does report this,
> all consequent guests only report 6000MB/s maximum.
> ---
> include/npu.h | 1 +
> hw/npu-hw-procedures.c | 8 ++++++++
> hw/npu.c | 29 ++++++++++++++++++++++++++++-
> 3 files changed, 37 insertions(+), 1 deletion(-)
>
> diff --git a/include/npu.h b/include/npu.h
> index 77beca37..a76acdfc 100644
> --- a/include/npu.h
> +++ b/include/npu.h
> @@ -167,6 +167,7 @@ int64_t npu_dev_procedure(void *dev, struct pci_cfg_reg_filter *pcrf,
> bool write);
>
> void npu_set_fence_state(struct npu *p, bool fence);
> +void npu_dev_procedure_reset(struct npu_dev *dev);
>
> #define NPUDBG(p, fmt, a...) prlog(PR_DEBUG, "NPU%d: " fmt, \
> (p)->phb.opal_id, ##a)
> diff --git a/hw/npu-hw-procedures.c b/hw/npu-hw-procedures.c
> index 85f09a11..cabc9c99 100644
> --- a/hw/npu-hw-procedures.c
> +++ b/hw/npu-hw-procedures.c
> @@ -606,3 +606,11 @@ int64_t npu_dev_procedure(void *dev, struct pci_cfg_reg_filter *pcrf,
>
> return npu_dev_procedure_read(ndev, offset - pcrf->start, len, data);
> }
> +
> +void npu_dev_procedure_reset(struct npu_dev *dev)
> +{
> + dev->procedure_status = 0;
> + dev->procedure_number = 0;
> + dev->procedure_step = 0;
> + dev->procedure_data = 0;
> +}
> diff --git a/hw/npu.c b/hw/npu.c
> index 302a5518..0ad86a8c 100644
> --- a/hw/npu.c
> +++ b/hw/npu.c
> @@ -271,6 +271,29 @@ static int64_t npu_dev_cfg_bar(void *dev, struct pci_cfg_reg_filter *pcrf,
> return npu_dev_cfg_bar_read(ndev, pcrf, offset, len, data);
> }
>
> +static int64_t npu_dev_cfg_exp_devcap(void *dev,
> + struct pci_cfg_reg_filter *pcrf __unused,
> + uint32_t offset __unused, uint32_t size __unused,
These are actually used in the check below so we can drop the __unused
flags.
> + uint32_t *data, bool write)
> +{
> + struct pci_virt_device *pvd = dev;
> + struct npu_dev *ndev = pvd->data;
> +
> + assert(write);
> +
> + if ((size != 2) || (offset & 1)) {
> + /* Short config writes are not supported */
> + prlog(PR_ERR, "NPU%d: Unsupported write to pcie control register\n",
> + ndev->phb->opal_id);
> + return OPAL_PARAMETER;
> + }
> +
> + if (*data & PCICAP_EXP_DEVCTL_FUNC_RESET)
> + npu_dev_procedure_reset(ndev);
> +
> + return OPAL_PARTIAL;
> +}
> +
> static struct npu_dev *bdfn_to_npu_dev(struct npu *p, uint32_t bdfn)
> {
> struct pci_virt_device *pvd;
> @@ -1187,7 +1210,7 @@ static void npu_dev_populate_pcie_cap(struct npu_dev_cap *cap)
>
> /* 0x04 - Device capability
> *
> - * We should support FLR. Oterwhsie, it might have
> + * We should support FLR. Otherwise, it might have
> * problem passing it through to userland via Linux
> * VFIO infrastructure
> */
> @@ -1198,6 +1221,10 @@ static void npu_dev_populate_pcie_cap(struct npu_dev_cap *cap)
> (PCICAP_EXP_DEVCAP_FUNC_RESET));
> PCI_VIRT_CFG_INIT_RO(pvd, base + PCICAP_EXP_DEVCAP, 4, val);
>
> + pci_virt_add_filter(pvd, base + PCICAP_EXP_DEVCTL, 2,
> + PCI_REG_FLAG_WRITE,
> + npu_dev_cfg_exp_devcap, NULL);
> +
> /* 0x08 - Device control and status */
> PCI_VIRT_CFG_INIT(pvd, base + PCICAP_EXP_DEVCTL, 4, 0x00002810,
> 0xffff0000, 0x000f0000);
>
More information about the Skiboot
mailing list