[Skiboot] [PATCH v3] nvlink: Associate and allocate NPUs using slots

Russell Currey ruscur at russell.cc
Tue Jul 5 15:05:35 AEST 2016


Allocating BDFNs to NPU devices and associating NPU devices with PCI
devices of GPUs both rely on comparing PBCQ handles.  This will fail if a
system has multiple sets of GPUs behind a single PHB.

Rework this to instead use slot locations.  The following changes are
introduced:

 - Groups of NPU links that connect to the same GPU are presented in the
   slot table entries as st_npu_slot, using ST_LOC_NPU_GROUP

 - NPU links are created with the ibm,npu-group-id property replacing the
   ibm,pbcq property, which is used in BDFN allocation and GPU association

 - Slot comparison is handled slightly differently for NPU devices as the
   function of the BDFN is ignored, since the device number represents the
   physical GPU the link is connected to

 - BDFN allocation for NPU devices is now derived from the groups in the
   slot table.  For Garrison, the same BDFNs are generated as before.

 - Association with GPU PCI devices is performed by comparing the slot
   label.  This means for future machines with NPUs that slot labels
   are compulsory to have NVLink functionality working.

Signed-off-by: Russell Currey <ruscur at russell.cc>
---
This patch has too much in it, but I can't find a way to separate it
without breaking functionality.  Let me know if you see a way to do so.

This supercedes the series starting with "astbmc: Add NPU slot type and
use it in Garrison".  It's all a bit of a mess...
---
 hw/npu.c                    | 59 +++++++++++++++++++--------------------------
 include/npu.h               |  3 +++
 platforms/astbmc/astbmc.h   |  8 ++++++
 platforms/astbmc/garrison.c | 59 +++++++++++++--------------------------------
 platforms/astbmc/slots.c    | 10 +++++++-
 5 files changed, 62 insertions(+), 77 deletions(-)

diff --git a/hw/npu.c b/hw/npu.c
index f535f8a..2ab6ac2 100644
--- a/hw/npu.c
+++ b/hw/npu.c
@@ -30,6 +30,7 @@
 #include <npu-regs.h>
 #include <npu.h>
 #include <xscom.h>
+#include <string.h>
 
 /*
  * Terminology:
@@ -503,25 +504,27 @@ static int __npu_dev_bind_pci_dev(struct phb *phb __unused,
 {
 	struct npu_dev *dev = data;
 	struct dt_node *pci_dt_node;
-	uint32_t npu_npcq_phandle;
+	char *pcislot;
 
 	/* Ignore non-nvidia PCI devices */
 	if ((pd->vdid & 0xffff) != 0x10de)
 		return 0;
 
-	/* Find the PCI devices pbcq */
-	for (pci_dt_node = pd->dn->parent;
-	     pci_dt_node && !dt_find_property(pci_dt_node, "ibm,pbcq");
+	/* Find the PCI device's slot location */
+	for (pci_dt_node = pd->dn;
+	     pci_dt_node && !dt_find_property(pci_dt_node, "ibm,slot-label");
 	     pci_dt_node = pci_dt_node->parent);
 
 	if (!pci_dt_node)
 		return 0;
 
-	npu_npcq_phandle = dt_prop_get_u32(dev->dt_node, "ibm,npu-pbcq");
+	pcislot = (char *)dt_prop_get(pci_dt_node, "ibm,slot-label");
 
-	if (dt_prop_get_u32(pci_dt_node, "ibm,pbcq") == npu_npcq_phandle &&
-	    (pd->vdid & 0xffff) == 0x10de)
-			return 1;
+	prlog(PR_DEBUG, "NPU: comparing GPU %s and NPU %s\n",
+	      pcislot, dev->slot_label);
+
+	if (streq(pcislot, dev->slot_label))
+		return 1;
 
 	return 0;
 }
@@ -604,6 +607,9 @@ static int npu_dn_fixup(struct phb *phb,
 	if (dev->phb || dev->pd)
 		return 0;
 
+	/* NPU devices require a slot location to associate with GPUs */
+	dev->slot_label = dt_prop_get(pd->dn, "ibm,slot-label");
+
 	/* Bind the emulated PCI device with the real one, which can't
 	 * be done until the PCI devices are populated. Once the real
 	 * PCI device is identified, we also need fix the device-tree
@@ -1619,34 +1625,19 @@ static void npu_dev_create_cfg(struct npu_dev *dev)
 		NPU_DEV_CFG_INIT_RO(dev, PCI_CFG_INT_LINE, 4, 0x00000200);
 }
 
-static uint32_t npu_allocate_bdfn(struct npu *p, uint32_t pbcq)
+static uint32_t npu_allocate_bdfn(struct npu *p, uint32_t group)
 {
 	int i;
-	int dev = -1;
-	int bdfn = -1;
+	int bdfn = (group << 3);
 
-	/* Find the highest function number alloacted to emulated PCI
-	 * devices associated with this GPU. */
-	for(i = 0; i < p->total_devices; i++) {
-		int dev_bdfn = p->devices[i].bdfn;
-		dev = MAX(dev, dev_bdfn & 0xf8);
-
-		if (dt_prop_get_u32(p->devices[i].dt_node,
-				    "ibm,npu-pbcq") == pbcq)
-			bdfn = MAX(bdfn, dev_bdfn);
+	for (i = 0; i < p->total_devices; i++) {
+		if (p->devices[i].bdfn == bdfn) {
+			bdfn++;
+			break;
+		}
 	}
 
-	if (bdfn >= 0)
-		/* Device has already been allocated for this GPU so
-		 * assign the emulated PCI device the next
-		 * function. */
-		return bdfn + 1;
-	else if (dev >= 0)
-		/* Otherwise allocate a new device and allocate
-		 * function 0. */
-		return dev + (1 << 3);
-	else
-		return 0;
+	return bdfn;
 }
 
 static void npu_create_devices(struct dt_node *dn, struct npu *p)
@@ -1674,7 +1665,7 @@ static void npu_create_devices(struct dt_node *dn, struct npu *p)
 	p->phb.scan_map = 0;
 	dt_for_each_compatible(npu_dn, link, "ibm,npu-link") {
 		struct npu_dev_bar *bar;
-		uint32_t pbcq;
+		uint32_t group_id;
 		uint64_t val;
 		uint32_t j;
 
@@ -1689,8 +1680,8 @@ static void npu_create_devices(struct dt_node *dn, struct npu *p)
 		/* We don't support MMIO PHY access yet */
 		dev->pl_base = NULL;
 
-		pbcq = dt_prop_get_u32(link, "ibm,npu-pbcq");
-		dev->bdfn = npu_allocate_bdfn(p, pbcq);
+		group_id = dt_prop_get_u32(link, "ibm,npu-group-id");
+		dev->bdfn = npu_allocate_bdfn(p, group_id);
 
 		/* This must be done after calling
 		 * npu_allocate_bdfn() */
diff --git a/include/npu.h b/include/npu.h
index 778c985..800515a 100644
--- a/include/npu.h
+++ b/include/npu.h
@@ -148,6 +148,9 @@ struct npu_dev {
 	uint32_t		procedure_status;
 
 	uint8_t			pe_num;
+
+	/* Used to associate the NPU device with GPU PCI devices */
+	const char		*slot_label;
 };
 
 /* NPU PHB descriptor */
diff --git a/platforms/astbmc/astbmc.h b/platforms/astbmc/astbmc.h
index 23c31c7..322282e 100644
--- a/platforms/astbmc/astbmc.h
+++ b/platforms/astbmc/astbmc.h
@@ -20,6 +20,13 @@
 
 #define ST_LOC_PHB(chip_id, phb_idx)    ((chip_id) << 16 | (phb_idx))
 #define ST_LOC_DEVFN(dev, fn)	        ((dev) << 3 | (fn))
+/*
+ * NPU groups are used to allocate device numbers.  There is a 1 to 1
+ * correlation between a NPU group and a physical GPU.  Links within a group
+ * are allocated as functions within a device, so groups must be numbered
+ * sequentially starting at 0.
+ */
+#define ST_LOC_NPU_GROUP(group_id)	(group_id << 3)
 
 struct slot_table_entry {
 	enum slot_table_etype {
@@ -27,6 +34,7 @@ struct slot_table_entry {
 		st_phb,
 		st_pluggable_slot,
 		st_builtin_dev,
+		st_npu_slot
 	} etype;
 	uint32_t location;
 	const char *name;
diff --git a/platforms/astbmc/garrison.c b/platforms/astbmc/garrison.c
index 3ff84a3..f400a51 100644
--- a/platforms/astbmc/garrison.c
+++ b/platforms/astbmc/garrison.c
@@ -63,23 +63,13 @@ static const struct slot_table_entry garrison_phb0_3_slot[] = {
 
 static const struct slot_table_entry garrison_npu0_slots[] = {
 	{
-		.etype = st_pluggable_slot,
-		.location = ST_LOC_DEVFN(0,0),
-		.name = "GPU2",
-	},
-	{
-		.etype = st_pluggable_slot,
-		.location = ST_LOC_DEVFN(0,1),
+		.etype = st_npu_slot,
+		.location = ST_LOC_NPU_GROUP(0),
 		.name = "GPU2",
 	},
 	{
-		.etype = st_pluggable_slot,
-		.location = ST_LOC_DEVFN(1,0),
-		.name = "GPU1",
-	},
-	{
-		.etype = st_pluggable_slot,
-		.location = ST_LOC_DEVFN(1,1),
+		.etype = st_npu_slot,
+		.location = ST_LOC_NPU_GROUP(1),
 		.name = "GPU1",
 	},
 	{ .etype = st_end },
@@ -152,23 +142,13 @@ static const struct slot_table_entry garrison_phb1_3_slot[] = {
 
 static const struct slot_table_entry garrison_npu1_slots[] = {
 	{
-		.etype = st_pluggable_slot,
-		.location = ST_LOC_DEVFN(0,0),
-		.name = "GPU4",
-	},
-	{
-		.etype = st_pluggable_slot,
-		.location = ST_LOC_DEVFN(0,1),
+		.etype = st_npu_slot,
+		.location = ST_LOC_NPU_GROUP(0),
 		.name = "GPU4",
 	},
 	{
-		.etype = st_pluggable_slot,
-		.location = ST_LOC_DEVFN(1,0),
-		.name = "GPU3",
-	},
-	{
-		.etype = st_pluggable_slot,
-		.location = ST_LOC_DEVFN(1,1),
+		.etype = st_npu_slot,
+		.location = ST_LOC_NPU_GROUP(1),
 		.name = "GPU3",
 	},
 	{ .etype = st_end },
@@ -233,7 +213,7 @@ static const struct slot_table_entry garrison_phb_table[] = {
 #define NPU_INDIRECT0	0x8000000008010c3f
 #define NPU_INDIRECT1	0x8000000008010c7f
 
-static void create_link(struct dt_node *npu, struct dt_node *pbcq, int index)
+static void create_link(struct dt_node *npu, int group, int index)
 {
 	struct dt_node *link;
 	uint32_t lane_mask;
@@ -255,12 +235,12 @@ static void create_link(struct dt_node *npu, struct dt_node *pbcq, int index)
 	}
 	dt_add_property_u64s(link, "ibm,npu-phy", phy);
 	dt_add_property_cells(link, "ibm,npu-lane-mask", lane_mask);
-	dt_add_property_cells(link, "ibm,npu-pbcq", pbcq->phandle);
+	dt_add_property_cells(link, "ibm,npu-group-id", group);
 }
 
 static void dt_create_npu(void)
 {
-        struct dt_node *xscom, *npu, *pbcq;
+        struct dt_node *xscom, *npu;
         char namebuf[32];
 
 	dt_for_each_compatible(dt_root, xscom, "ibm,xscom") {
@@ -275,17 +255,12 @@ static void dt_create_npu(void)
 		dt_add_property_cells(npu, "ibm,npu-index", 0);
 		dt_add_property_cells(npu, "ibm,npu-links", 4);
 
-		/* On Garrison we have 2 links per GPU device. The
-		 * first 2 links go to the GPU connected via
-		 * pbcq at 2012c00 the second two via pbcq at 2012800. */
-		pbcq = dt_find_by_name(xscom, "pbcq at 2012c00");
-		assert(pbcq);
-		create_link(npu, pbcq, 0);
-		create_link(npu, pbcq, 1);
-		pbcq = dt_find_by_name(xscom, "pbcq at 2012800");
-		assert(pbcq);
-		create_link(npu, pbcq, 4);
-		create_link(npu, pbcq, 5);
+		/* On Garrison we have 2 links per GPU device.  These are
+		 * grouped together as per the slot tables above. */
+		create_link(npu, 0, 0);
+		create_link(npu, 0, 1);
+		create_link(npu, 1, 4);
+		create_link(npu, 1, 5);
 	}
 }
 
diff --git a/platforms/astbmc/slots.c b/platforms/astbmc/slots.c
index 36547e1..678a3cc 100644
--- a/platforms/astbmc/slots.c
+++ b/platforms/astbmc/slots.c
@@ -54,6 +54,7 @@ static const struct slot_table_entry *match_slot_dev_entry(struct phb *phb,
 							   struct pci_device *pd)
 {
 	const struct slot_table_entry *parent, *ent;
+	uint32_t bdfn;
 
 	/* Find a parent recursively */
 	if (pd->parent)
@@ -70,7 +71,14 @@ static const struct slot_table_entry *match_slot_dev_entry(struct phb *phb,
 			prerror("SLOT: Bad PHB entry type in table !\n");
 			continue;
 		}
-		if (ent->location == (pd->bdfn & 0xff))
+
+		/* NPU slots match on device, not function */
+		if (ent->etype == st_npu_slot)
+			bdfn = pd->bdfn & 0xf8;
+		else
+			bdfn = pd->bdfn & 0xff;
+
+		if (ent->location == bdfn)
 			return ent;
 	}
 	return NULL;
-- 
2.9.0



More information about the Skiboot mailing list