[Skiboot] [PATCH skiboot] npu2: Reset NVLinks when resetting a GPU

Alexey Kardashevskiy aik at ozlabs.ru
Mon May 20 14:19:21 AEST 2019


Resetting a V100 GPU brings its NVLinks down and if an NPU tries using
those, an HMI occurs. We were lucky not to observe this as the bare metal
does not normally reset a GPU and when passed through, GPUs are usually
before NPUs in QEMU command line or Libvirt XML and because of that NPUs
are naturally reset first. However simple change of the device order
brings HMIs.

This defines a bus control filter for a PCI slot with a GPU with NVLinks
so when the host system issues secondary bus reset to the slot, it resets
associated NVLinks.

Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
---
 hw/npu2.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/hw/npu2.c b/hw/npu2.c
index e444bc66cfd3..4aa1231dfc2b 100644
--- a/hw/npu2.c
+++ b/hw/npu2.c
@@ -537,6 +537,48 @@ static int __npu2_dev_bind_pci_dev(struct phb *phb __unused,
 	return 0;
 }
 
+static int64_t npu2_gpu_brigde_sec_bus_reset(void *dev,
+		struct pci_cfg_reg_filter *pcrf __unused,
+		uint32_t offset, uint32_t len,
+		uint32_t *data, bool write)
+{
+	struct pci_device *pd = dev;
+	struct pci_device *gpu;
+	struct phb *npphb;
+	struct npu2 *npu;
+	struct dt_node *np;
+	struct npu2_dev	*ndev;
+	int i;
+
+	assert(write);
+
+	if ((len != 2) || (offset & 1)) {
+		/* Short config writes are not supported */
+		PCIERR(pd->phb, pd->bdfn,
+		       "Unsupported write to bridge control register\n");
+		return OPAL_PARAMETER;
+	}
+
+	gpu = list_top(&pd->children, struct pci_device, link);
+	if (gpu && (*data & PCI_CFG_BRCTL_SECONDARY_RESET)) {
+		dt_for_each_compatible(dt_root, np, "ibm,power9-npu-pciex") {
+			npphb = pci_get_phb(dt_prop_get_cell(np,
+					"ibm,opal-phbid", 1));
+			if (!npphb || npphb->phb_type != phb_type_npu_v2)
+				continue;
+
+			npu = phb_to_npu2_nvlink(npphb);
+			for (i = 0; i < npu->total_devices; ++i) {
+				ndev = &npu->devices[i];
+				if (ndev->nvlink.pd == gpu)
+					npu2_dev_procedure_reset(ndev);
+			}
+		}
+	}
+
+	return OPAL_PARTIAL;
+}
+
 static void npu2_dev_bind_pci_dev(struct npu2_dev *dev)
 {
 	struct phb *phb;
@@ -558,6 +600,19 @@ static void npu2_dev_bind_pci_dev(struct npu2_dev *dev)
 			dev->nvlink.phb = phb;
 			/* Found the device, set the bit in config space */
 			npu2_set_link_flag(dev, NPU2_DEV_PCI_LINKED);
+
+			/*
+			 * We define a custom sec bus reset handler for a slot
+			 * with an NVLink-connected GPU to prevent HMIs which
+			 * will otherwise happen if we reset GPU before
+			 * resetting NVLinks.
+			 */
+			if (dev->nvlink.pd->parent &&
+			    dev->nvlink.pd->parent->slot)
+				pci_add_cfg_reg_filter(dev->nvlink.pd->parent,
+						PCI_CFG_BRCTL, 2,
+						PCI_REG_FLAG_WRITE,
+						npu2_gpu_brigde_sec_bus_reset);
 			return;
 		}
 	}
-- 
2.17.1



More information about the Skiboot mailing list