[Skiboot] [PATCH 2/3] npu: Rework finding the paired PCI devices

Tue Aug 22 18:15:42 AEST 2017

The NVLink DT nodes and the PCI device node include a phandle to
the PCIe slot node that contains the device. We can use that to identify
which NVLink and PCIe devices are bound together.

This patch reworks the npu-dev <-> gpu-dev binding logic so make use of
the new device-tree based PCIe slot information and moves some common
functionality out of npu.c and npu2.c into npu-common.c

Signed-off-by: Oliver O'Halloran <oohall at gmail.com>
---
 hw/Makefile.inc |   2 +-
 hw/npu-common.c | 123 ++++++++++++++++++++++++++++++++++++++
 hw/npu.c        | 158 ++++++++++++------------------------------------
 hw/npu2.c       | 182 ++++++++++++++++----------------------------------------
 4 files changed, 211 insertions(+), 254 deletions(-)
 create mode 100644 hw/npu-common.c

diff --git a/hw/Makefile.inc b/hw/Makefile.inc
index 40be37f5b4ef..2c86dc42f91d 100644
--- a/hw/Makefile.inc
+++ b/hw/Makefile.inc
@@ -7,7 +7,7 @@ HW_OBJS += p7ioc.o p7ioc-inits.o p7ioc-phb.o
 HW_OBJS += phb3.o sfc-ctrl.o fake-rtc.o bt.o p8-i2c.o prd.o
 HW_OBJS += dts.o lpc-rtc.o npu.o npu-hw-procedures.o xive.o phb4.o
 HW_OBJS += fake-nvram.o lpc-mbox.o npu2.o npu2-hw-procedures.o
-HW_OBJS += phys-map.o sbe-p9.o capp.o occ-sensor.o
+HW_OBJS += phys-map.o sbe-p9.o capp.o occ-sensor.o npu-common.o
 HW=hw/built-in.o
 
 # FIXME hack this for now
diff --git a/hw/npu-common.c b/hw/npu-common.c
new file mode 100644
index 000000000000..9418f39a9e1f
--- /dev/null
+++ b/hw/npu-common.c
@@ -0,0 +1,123 @@
+/* Copyright 2017 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <skiboot.h>
+#include <io.h>
+#include <timebase.h>
+#include <pci.h>
+#include <pci-cfg.h>
+#include <pci-virt.h>
+#include <pci-slot.h>
+#include <interrupts.h>
+#include <opal.h>
+#include <opal-api.h>
+#include <cpu.h>
+#include <device.h>
+#include <affinity.h>
+#include <npu-regs.h>
+#include <npu.h>
+#include <string.h>
+#include <npu-common.h>
+
+struct pci_device *npu_find_gpu_dev(struct pci_device *npu_pd)
+{
+	const char *npu_slot, *gpu_slot;
+	struct pci_device *pd;
+	struct phb *phb;
+	int tmp;
+
+	/*
+	 * We want to find the GPU device with a slot label that matches
+	 * this NPU (virtual PCI) device.
+	 */
+	npu_slot = dt_prop_get(npu_pd->dn, "ibm,slot-label");
+	if (!npu_slot) {
+		char *c = dt_get_path(npu_pd->dn);
+
+		prerror("%s has no slot label\n", c);
+		free(c);
+
+		return NULL;
+	}
+
+	for_each_phb(phb) {
+		/* Don't match the device to itself */
+		if (npu_pd->phb->opal_id == phb->opal_id)
+			continue;
+
+		for_each_pci_dev(phb, pd, &tmp) {
+			/* skip non-nvidia devices */
+			if ((pd->vdid & 0xffff) != 0x10de)
+				continue;
+
+			gpu_slot = dt_prop_get(pd->dn, "ibm,slot-label");
+			if (!gpu_slot) {
+				char *c = dt_get_path(pd->dn);
+
+				prerror("%s has no slot label!\n", c);
+				free(c);
+				continue;
+			}
+
+			if (streq(gpu_slot, npu_slot))
+				return pd;
+		}
+	}
+
+	return NULL;
+}
+
+static struct lock pci_npu_phandle_lock = LOCK_UNLOCKED;
+
+/*
+ * Appends an NPU phandle to the given PCI device node's ibm,npu
+ * property. We need this since there can be (and usually is) multiple
+ * links per GPU.
+ */
+void npu_append_pci_phandle(struct dt_node *dn, u32 phandle)
+{
+	struct dt_property *pci_npu_phandle_prop;
+	uint32_t *npu_phandles;
+	size_t prop_len;
+
+	/*
+	 * Use a lock to make sure no one else has a reference to an
+	 * ibm,npu property (this assumes this is the only function
+	 * that holds a reference to it).
+	 */
+	lock(&pci_npu_phandle_lock);
+
+	/* This function shouldn't be called unless ibm,npu exists */
+	pci_npu_phandle_prop = (struct dt_property *)
+		dt_find_property(dn, "ibm,npu");
+
+	if (!pci_npu_phandle_prop) {
+		dt_add_property_cells(dn, "ibm,npu", phandle);
+		unlock(&pci_npu_phandle_lock);
+		return;
+	}
+
+	/* Need to append to the properties */
+	prop_len = pci_npu_phandle_prop->len;
+	prop_len += sizeof(*npu_phandles);
+	dt_resize_property(&pci_npu_phandle_prop, prop_len);
+	pci_npu_phandle_prop->len = prop_len;
+
+	npu_phandles = (uint32_t *) pci_npu_phandle_prop->prop;
+	npu_phandles[prop_len/sizeof(*npu_phandles) - 1] = phandle;
+
+	unlock(&pci_npu_phandle_lock);
+}
diff --git a/hw/npu.c b/hw/npu.c
index b113800b9afd..ad44a56000a2 100644
--- a/hw/npu.c
+++ b/hw/npu.c
@@ -33,6 +33,8 @@
 #include <npu.h>
 #include <xscom.h>
 #include <string.h>
+#include <npu-common.h>
+
 
 /*
  * Terminology:
@@ -336,134 +338,48 @@ NPU_CFG_WRITE(8,  u8);
 NPU_CFG_WRITE(16, u16);
 NPU_CFG_WRITE(32, u32);
 
-static int __npu_dev_bind_pci_dev(struct phb *phb __unused,
-				  struct pci_device *pd,
-				  void *data)
-{
-	struct npu_dev *dev = data;
-	struct dt_node *pci_dt_node;
-	char *pcislot;
-
-	/* Ignore non-nvidia PCI devices */
-	if ((pd->vdid & 0xffff) != 0x10de)
-		return 0;
-
-	/* Find the PCI device's slot location */
-	for (pci_dt_node = pd->dn;
-	     pci_dt_node && !dt_find_property(pci_dt_node, "ibm,slot-label");
-	     pci_dt_node = pci_dt_node->parent);
-
-	if (!pci_dt_node)
-		return 0;
-
-	pcislot = (char *)dt_prop_get(pci_dt_node, "ibm,slot-label");
-
-	prlog(PR_DEBUG, "NPU: comparing GPU %s and NPU %s\n",
-	      pcislot, dev->slot_label);
-
-	if (streq(pcislot, dev->slot_label))
-		return 1;
-
-	return 0;
-}
-
-static void npu_dev_bind_pci_dev(struct npu_dev *dev)
+/*
+ * Locate the real PCI device targeted by this NVlink by matching devices
+ * against slots.
+ */
+static void npu_phb_final_fixup(struct phb *phb)
 {
-	struct phb *phb;
-	uint32_t i;
-
-	if (dev->pd)
-		return;
-
-	for (i = 0; i < 64; i++) {
-		if (dev->npu->phb.opal_id == i)
-			continue;
-
-		phb = pci_get_phb(i);
-		if (!phb)
+	struct npu *npu = phb_to_npu(phb);
+	struct pci_device *npu_pd, *pd;
+	const char *label;
+	int tmp;
+
+	for_each_pci_dev(phb, npu_pd, &tmp) {
+		struct npu_dev *dev = bdfn_to_npu_dev(npu, npu_pd->bdfn);
+		uint32_t phandle;
+
+		/* copy the slot info from the link at x to the pci node */
+		label = dt_prop_get(dev->dt_node, "ibm,slot-label");
+		dt_add_property_string(npu_pd->dn, "ibm,slot-label", label);
+
+		phandle = dt_prop_get_u32_def(dev->dt_node, "ibm,pcie-slot", 0);
+		if (phandle)
+			dt_add_property_cells(npu_pd->dn, "ibm,pcie-slot",
+						phandle);
+
+		pd = npu_find_gpu_dev(npu_pd);
+		if (!pd) {
+			prerror("%s: No PCI device for NPU device %04x:00:%02x.0 to bind to. If you expect a GPU to be there, this is a problem.\n",
+				__func__, dev->npu->phb.opal_id, dev->index);
 			continue;
-
-		dev->pd = pci_walk_dev(phb, NULL, __npu_dev_bind_pci_dev, dev);
-		if (dev->pd) {
-			dev->phb = phb;
-			/* Found the device, set the bit in config space */
-			PCI_VIRT_CFG_INIT_RO(dev->pvd, VENDOR_CAP_START +
-				VENDOR_CAP_PCI_DEV_OFFSET, 1, 0x01);
-			return;
 		}
-	}
-
-	prlog(PR_INFO, "%s: No PCI device for NPU device %04x:00:%02x.0 to bind to. If you expect a GPU to be there, this is a problem.\n",
-	      __func__, dev->npu->phb.opal_id, dev->index);
-}
-
-static struct lock pci_npu_phandle_lock = LOCK_UNLOCKED;
-
-/* Appends an NPU phandle to the given PCI device node ibm,npu
- * property */
-static void npu_append_pci_phandle(struct dt_node *dn, u32 phandle)
-{
-	uint32_t *npu_phandles;
-	struct dt_property *pci_npu_phandle_prop;
-	size_t prop_len;
-
-	/* Use a lock to make sure no one else has a reference to an
-	 * ibm,npu property (this assumes this is the only function
-	 * that holds a reference to it). */
-	lock(&pci_npu_phandle_lock);
-
-	/* This function shouldn't be called unless ibm,npu exists */
-	pci_npu_phandle_prop = (struct dt_property *)
-		dt_require_property(dn, "ibm,npu", -1);
-
-	/* Need to append to the properties */
-	prop_len = pci_npu_phandle_prop->len;
-	prop_len += sizeof(*npu_phandles);
-	dt_resize_property(&pci_npu_phandle_prop, prop_len);
-	pci_npu_phandle_prop->len = prop_len;
-
-	npu_phandles = (uint32_t *) pci_npu_phandle_prop->prop;
-	npu_phandles[prop_len/sizeof(*npu_phandles) - 1] = phandle;
-	unlock(&pci_npu_phandle_lock);
-}
-
-static int npu_dn_fixup(struct phb *phb,
-			struct pci_device *pd,
-			void *data __unused)
-{
-	struct npu *p = phb_to_npu(phb);
-	struct npu_dev *dev;
 
-	dev = bdfn_to_npu_dev(p, pd->bdfn);
-	assert(dev);
+		/* Now bind this nvlink to this GPU */
+		dev->phb = pd->phb;
+		dev->pd = pd;
 
-	if (dev->phb || dev->pd)
-		return 0;
+		npu_append_pci_phandle(pd->dn, npu_pd->dn->phandle);
+		dt_add_property_cells(npu_pd->dn, "ibm,gpu", pd->dn->phandle);
 
-	/* NPU devices require a slot location to associate with GPUs */
-	dev->slot_label = dt_prop_get(pd->dn, "ibm,slot-label");
-
-	/* Bind the emulated PCI device with the real one, which can't
-	 * be done until the PCI devices are populated. Once the real
-	 * PCI device is identified, we also need fix the device-tree
-	 * for it
-	 */
-	npu_dev_bind_pci_dev(dev);
-	if (dev->phb && dev->pd && dev->pd->dn) {
-		if (dt_find_property(dev->pd->dn, "ibm,npu"))
-			npu_append_pci_phandle(dev->pd->dn, pd->dn->phandle);
-		else
-			dt_add_property_cells(dev->pd->dn, "ibm,npu", pd->dn->phandle);
-
-		dt_add_property_cells(pd->dn, "ibm,gpu", dev->pd->dn->phandle);
+		/* Mark the link as in use in cfg space */
+		PCI_VIRT_CFG_INIT_RO(dev->pvd, VENDOR_CAP_START +
+				VENDOR_CAP_PCI_DEV_OFFSET, 1, 0x01);
 	}
-
-	return 0;
-}
-
-static void npu_phb_final_fixup(struct phb *phb)
-{
-	pci_walk_dev(phb, NULL, npu_dn_fixup, NULL);
 }
 
 static void npu_ioda_init(struct npu *p)
diff --git a/hw/npu2.c b/hw/npu2.c
index 74e332551085..9f83fd046c4c 100644
--- a/hw/npu2.c
+++ b/hw/npu2.c
@@ -36,6 +36,7 @@
 #include <chip.h>
 #include <phys-map.h>
 #include <nvram.h>
+#include <npu-common.h>
 
 /*
  * NPU2 BAR layout definition. We have 3 stacks and each of them
@@ -429,95 +430,6 @@ NPU2_CFG_WRITE(8, u8);
 NPU2_CFG_WRITE(16, u16);
 NPU2_CFG_WRITE(32, u32);
 
-static int __npu2_dev_bind_pci_dev(struct phb *phb __unused,
-				  struct pci_device *pd,
-				  void *data)
-{
-	struct npu2_dev *dev = data;
-	struct dt_node *pci_dt_node;
-	char *pcislot;
-
-	/* Ignore non-nvidia PCI devices */
-	if ((pd->vdid & 0xffff) != 0x10de)
-		return 0;
-
-	/* Find the PCI device's slot location */
-	for (pci_dt_node = pd->dn;
-	     pci_dt_node && !dt_find_property(pci_dt_node, "ibm,slot-label");
-	     pci_dt_node = pci_dt_node->parent);
-
-	if (!pci_dt_node)
-		return 0;
-
-	pcislot = (char *)dt_prop_get(pci_dt_node, "ibm,slot-label");
-
-	prlog(PR_DEBUG, "NPU: comparing GPU %s and NPU %s\n",
-	      pcislot, dev->slot_label);
-
-	if (streq(pcislot, dev->slot_label))
-		return 1;
-
-	return 0;
-}
-
-static void npu2_dev_bind_pci_dev(struct npu2_dev *dev)
-{
-	struct phb *phb;
-	uint32_t i;
-
-	if (dev->pd)
-		return;
-
-	for (i = 0; i < 64; i++) {
-		if (dev->npu->phb.opal_id == i)
-			continue;
-
-		phb = pci_get_phb(i);
-		if (!phb)
-			continue;
-
-		dev->pd = pci_walk_dev(phb, NULL, __npu2_dev_bind_pci_dev, dev);
-		if (dev->pd) {
-			dev->phb = phb;
-			/* Found the device, set the bit in config space */
-			npu2_set_link_flag(dev, NPU2_DEV_PCI_LINKED);
-			return;
-		}
-	}
-
-	prlog(PR_INFO, "%s: No PCI device for NPU device %04x:00:%02x.0 to bind to. If you expect a GPU to be there, this is a problem.\n",
-	      __func__, dev->npu->phb.opal_id, dev->index);
-}
-
-static struct lock pci_npu_phandle_lock = LOCK_UNLOCKED;
-
-static void npu2_append_phandle(struct dt_node *dn,
-				u32 phandle)
-{
-	struct dt_property *prop;
-	uint32_t *npu_phandles;
-	size_t len;
-
-	/*
-	 * Use a lock to make sure no one else has a reference to an
-	 * ibm,npu property (this assumes this is the only function
-	 * that holds a reference to it)
-	 */
-	lock(&pci_npu_phandle_lock);
-
-	/* This function shouldn't be called unless ibm,npu exists */
-	prop = (struct dt_property *)dt_require_property(dn, "ibm,npu", -1);
-
-	/* Need to append to the properties */
-	len = prop->len + sizeof(*npu_phandles);
-	dt_resize_property(&prop, len);
-	prop->len = len;
-
-	npu_phandles = (uint32_t *)prop->prop;
-	npu_phandles[len / sizeof(*npu_phandles) - 1] = phandle;
-	unlock(&pci_npu_phandle_lock);
-}
-
 static struct dt_node *npu2_create_memory_dn(uint64_t addr, uint64_t size)
 {
 	struct dt_node *mem;
@@ -667,57 +579,63 @@ static int npu2_assign_gmb(struct npu2_dev *ndev)
 	return 0;
 }
 
-static int npu2_dn_fixup(struct phb *phb,
-			 struct pci_device *pd,
-			 void *data __unused)
+/*
+ * Locate the real PCI device targeted by this NVlink by matching devices
+ * against slots.
+ */
+static void npu2_phb_final_fixup(struct phb *phb)
 {
-	struct npu2 *p = phb_to_npu2(phb);
-	struct npu2_dev *dev;
+	struct npu2 *npu = phb_to_npu2(phb);
+	struct pci_device *npu_pd, *gpu;
+	uint32_t phandle;
+	int tmp;
 
-	dev = npu2_bdf_to_dev(p, pd->bdfn);
-	assert(dev);
-	if (dev->phb || dev->pd)
-		return 0;
+	/*
+	 * For each "pci_virt_device" on the PHB we want to find the probed
+	 * PCI device that matches it.
+	 *
+	 * XXX: Can we have virtual and real devices on a PHB at the same time?
+	 * the virtual config space design seems to preclude it and there could
+	 * be bus numbering conflicts.
+	 *
+	 * actual PCI device and add the node cross references.
+	 */
 
-	npu2_assign_gmb(dev);
-	npu2_dn_fixup_gmb(pd->dn, dev);
-	dt_add_property_cells(pd->dn, "ibm,nvlink", dev->dt_node->phandle);
+	for_each_pci_dev(phb, npu_pd, &tmp) {
+		struct npu2_dev *dev = npu2_bdf_to_dev(npu, npu_pd->bdfn);
 
-	/* NPU devices require a slot location to associate with GPUs */
-	dev->slot_label = dt_prop_get_def(pd->dn, "ibm,slot-label", NULL);
-	if (!dev->slot_label) {
-		/**
-		 * @fwts-label NPUNoPHBSlotLabel
-		 * @fwts-advice No GPU/NPU slot information was found.
-		 * NVLink2 functionality will not work.
-		 */
-		prlog(PR_ERR, "NPU: Cannot find GPU slot information\n");
-		return 0;
-	}
+		assert(dev);
+		if (dev->phb || dev->pd)
+			continue;
 
-	/*
-	 * Bind the emulated PCI device with the real one, which can't
-	 * be done until the PCI devices are populated. Once the real
-	 * PCI device is identified, we also need fix the device-tree
-	 * for it
-	 */
-	npu2_dev_bind_pci_dev(dev);
-	if (dev->phb && dev->pd && dev->pd->dn) {
-		if (dt_find_property(dev->pd->dn, "ibm,npu"))
-			npu2_append_phandle(dev->pd->dn, pd->dn->phandle);
-		else
-			dt_add_property_cells(dev->pd->dn, "ibm,npu", pd->dn->phandle);
+		npu2_assign_gmb(dev);
+		npu2_dn_fixup_gmb(npu_pd->dn, dev);
 
-		dt_add_property_cells(pd->dn, "ibm,gpu", dev->pd->dn->phandle);
-		dev->gpu_bdfn = dev->pd->bdfn;
-	}
+		/* copy the pcie-slot from the link to the emulated pci  */
+		phandle = dt_prop_get_u32(dev->dt_node, "ibm,pcie-slot");
+		dt_add_property_cells(npu_pd->dn, "ibm,pcie-slot", phandle);
 
-	return 0;
-}
+		gpu = npu_find_gpu_dev(npu_pd);
+		if (!gpu) {
+			prerror("%s: No PCI device for NPU device %04x:00:%02x.0 to bind to. If you expect a GPU to be there, this is a problem.\n",
+				__func__, dev->npu->phb.opal_id, dev->index);
+			continue;
+		}
 
-static void npu2_phb_final_fixup(struct phb *phb)
-{
-	pci_walk_dev(phb, NULL, npu2_dn_fixup, NULL);
+		/* Found the device, set the bit in config space */
+		npu2_set_link_flag(dev, NPU2_DEV_PCI_LINKED);
+
+		dt_add_property_cells(npu_pd->dn, "ibm,nvlink",
+				dev->dt_node->phandle);
+
+		/* Now bind this nvlink to this GPU */
+		dev->phb = gpu->phb;
+		dev->pd = gpu;
+		dev->gpu_bdfn = gpu->bdfn;
+
+		npu_append_pci_phandle(gpu->dn, npu_pd->dn->phandle);
+		dt_add_property_cells(npu_pd->dn, "ibm,gpu", gpu->dn->phandle);
+	}
 }
 
 static void npu2_init_ioda_cache(struct npu2 *p)
-- 
2.9.5