[Skiboot] [PATCH skiboot v3] npu: Implement FLR
Alexey Kardashevskiy
aik at ozlabs.ru
Mon Apr 10 17:40:57 AEST 2017
As the comment in npu_dev_populate_pcie_cap() says,
"We should support FLR" and the NPU device advertises its
support. However, when the kernel issues FLR, skiboot does
nothing which leaves NPU in a state which does not allow
to use NV links again after GPU was reset.
This adds basic handling of FLR (function level reset).
This does not update hreset/freset handlers as they are not going to be
called under any circumstance - EEH is not supported for NPU and
the kernel won't issue OPAL reset otherwise.
Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
Acked-by: Alistair Popple <alistair at popple.id.au>
---
Changes:
v3:
* dropped "__unused" - leftovers from v1
v2:
* added npu_dev_procedure_reset() to keep procedure_xxxx handling
in one file
* added check that device control register access is 2 bytes long
and aligned
* fixed typo
---
With this fix, the bandwidthTest reports PINNED Memory Transfers
>= 20000MB/s, without it only first guest boot does report this,
all consequent guests only report 6000MB/s maximum.
---
include/npu.h | 1 +
hw/npu-hw-procedures.c | 8 ++++++++
hw/npu.c | 29 ++++++++++++++++++++++++++++-
3 files changed, 37 insertions(+), 1 deletion(-)
diff --git a/include/npu.h b/include/npu.h
index 77beca37..a76acdfc 100644
--- a/include/npu.h
+++ b/include/npu.h
@@ -167,6 +167,7 @@ int64_t npu_dev_procedure(void *dev, struct pci_cfg_reg_filter *pcrf,
bool write);
void npu_set_fence_state(struct npu *p, bool fence);
+void npu_dev_procedure_reset(struct npu_dev *dev);
#define NPUDBG(p, fmt, a...) prlog(PR_DEBUG, "NPU%d: " fmt, \
(p)->phb.opal_id, ##a)
diff --git a/hw/npu-hw-procedures.c b/hw/npu-hw-procedures.c
index 85f09a11..cabc9c99 100644
--- a/hw/npu-hw-procedures.c
+++ b/hw/npu-hw-procedures.c
@@ -606,3 +606,11 @@ int64_t npu_dev_procedure(void *dev, struct pci_cfg_reg_filter *pcrf,
return npu_dev_procedure_read(ndev, offset - pcrf->start, len, data);
}
+
+void npu_dev_procedure_reset(struct npu_dev *dev)
+{
+ dev->procedure_status = 0;
+ dev->procedure_number = 0;
+ dev->procedure_step = 0;
+ dev->procedure_data = 0;
+}
diff --git a/hw/npu.c b/hw/npu.c
index 302a5518..f00e9c5c 100644
--- a/hw/npu.c
+++ b/hw/npu.c
@@ -271,6 +271,29 @@ static int64_t npu_dev_cfg_bar(void *dev, struct pci_cfg_reg_filter *pcrf,
return npu_dev_cfg_bar_read(ndev, pcrf, offset, len, data);
}
+static int64_t npu_dev_cfg_exp_devcap(void *dev,
+ struct pci_cfg_reg_filter *pcrf __unused,
+ uint32_t offset, uint32_t size,
+ uint32_t *data, bool write)
+{
+ struct pci_virt_device *pvd = dev;
+ struct npu_dev *ndev = pvd->data;
+
+ assert(write);
+
+ if ((size != 2) || (offset & 1)) {
+ /* Short config writes are not supported */
+ prlog(PR_ERR, "NPU%d: Unsupported write to pcie control register\n",
+ ndev->phb->opal_id);
+ return OPAL_PARAMETER;
+ }
+
+ if (*data & PCICAP_EXP_DEVCTL_FUNC_RESET)
+ npu_dev_procedure_reset(ndev);
+
+ return OPAL_PARTIAL;
+}
+
static struct npu_dev *bdfn_to_npu_dev(struct npu *p, uint32_t bdfn)
{
struct pci_virt_device *pvd;
@@ -1187,7 +1210,7 @@ static void npu_dev_populate_pcie_cap(struct npu_dev_cap *cap)
/* 0x04 - Device capability
*
- * We should support FLR. Oterwhsie, it might have
+ * We should support FLR. Otherwise, it might have
* problem passing it through to userland via Linux
* VFIO infrastructure
*/
@@ -1198,6 +1221,10 @@ static void npu_dev_populate_pcie_cap(struct npu_dev_cap *cap)
(PCICAP_EXP_DEVCAP_FUNC_RESET));
PCI_VIRT_CFG_INIT_RO(pvd, base + PCICAP_EXP_DEVCAP, 4, val);
+ pci_virt_add_filter(pvd, base + PCICAP_EXP_DEVCTL, 2,
+ PCI_REG_FLAG_WRITE,
+ npu_dev_cfg_exp_devcap, NULL);
+
/* 0x08 - Device control and status */
PCI_VIRT_CFG_INIT(pvd, base + PCICAP_EXP_DEVCTL, 4, 0x00002810,
0xffff0000, 0x000f0000);
--
2.11.0
More information about the Skiboot
mailing list