[Skiboot] [PATCH v3] nvlink: Associate and allocate NPUs using slots
Russell Currey
ruscur at russell.cc
Tue Jul 5 15:05:35 AEST 2016
Allocating BDFNs to NPU devices and associating NPU devices with PCI
devices of GPUs both rely on comparing PBCQ handles. This will fail if a
system has multiple sets of GPUs behind a single PHB.
Rework this to instead use slot locations. The following changes are
introduced:
- Groups of NPU links that connect to the same GPU are presented in the
slot table entries as st_npu_slot, using ST_LOC_NPU_GROUP
- NPU links are created with the ibm,npu-group-id property replacing the
ibm,pbcq property, which is used in BDFN allocation and GPU association
- Slot comparison is handled slightly differently for NPU devices as the
function of the BDFN is ignored, since the device number represents the
physical GPU the link is connected to
- BDFN allocation for NPU devices is now derived from the groups in the
slot table. For Garrison, the same BDFNs are generated as before.
- Association with GPU PCI devices is performed by comparing the slot
label. This means for future machines with NPUs that slot labels
are compulsory to have NVLink functionality working.
Signed-off-by: Russell Currey <ruscur at russell.cc>
---
This patch has too much in it, but I can't find a way to separate it
without breaking functionality. Let me know if you see a way to do so.
This supercedes the series starting with "astbmc: Add NPU slot type and
use it in Garrison". It's all a bit of a mess...
---
hw/npu.c | 59 +++++++++++++++++++--------------------------
include/npu.h | 3 +++
platforms/astbmc/astbmc.h | 8 ++++++
platforms/astbmc/garrison.c | 59 +++++++++++++--------------------------------
platforms/astbmc/slots.c | 10 +++++++-
5 files changed, 62 insertions(+), 77 deletions(-)
diff --git a/hw/npu.c b/hw/npu.c
index f535f8a..2ab6ac2 100644
--- a/hw/npu.c
+++ b/hw/npu.c
@@ -30,6 +30,7 @@
#include <npu-regs.h>
#include <npu.h>
#include <xscom.h>
+#include <string.h>
/*
* Terminology:
@@ -503,25 +504,27 @@ static int __npu_dev_bind_pci_dev(struct phb *phb __unused,
{
struct npu_dev *dev = data;
struct dt_node *pci_dt_node;
- uint32_t npu_npcq_phandle;
+ char *pcislot;
/* Ignore non-nvidia PCI devices */
if ((pd->vdid & 0xffff) != 0x10de)
return 0;
- /* Find the PCI devices pbcq */
- for (pci_dt_node = pd->dn->parent;
- pci_dt_node && !dt_find_property(pci_dt_node, "ibm,pbcq");
+ /* Find the PCI device's slot location */
+ for (pci_dt_node = pd->dn;
+ pci_dt_node && !dt_find_property(pci_dt_node, "ibm,slot-label");
pci_dt_node = pci_dt_node->parent);
if (!pci_dt_node)
return 0;
- npu_npcq_phandle = dt_prop_get_u32(dev->dt_node, "ibm,npu-pbcq");
+ pcislot = (char *)dt_prop_get(pci_dt_node, "ibm,slot-label");
- if (dt_prop_get_u32(pci_dt_node, "ibm,pbcq") == npu_npcq_phandle &&
- (pd->vdid & 0xffff) == 0x10de)
- return 1;
+ prlog(PR_DEBUG, "NPU: comparing GPU %s and NPU %s\n",
+ pcislot, dev->slot_label);
+
+ if (streq(pcislot, dev->slot_label))
+ return 1;
return 0;
}
@@ -604,6 +607,9 @@ static int npu_dn_fixup(struct phb *phb,
if (dev->phb || dev->pd)
return 0;
+ /* NPU devices require a slot location to associate with GPUs */
+ dev->slot_label = dt_prop_get(pd->dn, "ibm,slot-label");
+
/* Bind the emulated PCI device with the real one, which can't
* be done until the PCI devices are populated. Once the real
* PCI device is identified, we also need fix the device-tree
@@ -1619,34 +1625,19 @@ static void npu_dev_create_cfg(struct npu_dev *dev)
NPU_DEV_CFG_INIT_RO(dev, PCI_CFG_INT_LINE, 4, 0x00000200);
}
-static uint32_t npu_allocate_bdfn(struct npu *p, uint32_t pbcq)
+static uint32_t npu_allocate_bdfn(struct npu *p, uint32_t group)
{
int i;
- int dev = -1;
- int bdfn = -1;
+ int bdfn = (group << 3);
- /* Find the highest function number alloacted to emulated PCI
- * devices associated with this GPU. */
- for(i = 0; i < p->total_devices; i++) {
- int dev_bdfn = p->devices[i].bdfn;
- dev = MAX(dev, dev_bdfn & 0xf8);
-
- if (dt_prop_get_u32(p->devices[i].dt_node,
- "ibm,npu-pbcq") == pbcq)
- bdfn = MAX(bdfn, dev_bdfn);
+ for (i = 0; i < p->total_devices; i++) {
+ if (p->devices[i].bdfn == bdfn) {
+ bdfn++;
+ break;
+ }
}
- if (bdfn >= 0)
- /* Device has already been allocated for this GPU so
- * assign the emulated PCI device the next
- * function. */
- return bdfn + 1;
- else if (dev >= 0)
- /* Otherwise allocate a new device and allocate
- * function 0. */
- return dev + (1 << 3);
- else
- return 0;
+ return bdfn;
}
static void npu_create_devices(struct dt_node *dn, struct npu *p)
@@ -1674,7 +1665,7 @@ static void npu_create_devices(struct dt_node *dn, struct npu *p)
p->phb.scan_map = 0;
dt_for_each_compatible(npu_dn, link, "ibm,npu-link") {
struct npu_dev_bar *bar;
- uint32_t pbcq;
+ uint32_t group_id;
uint64_t val;
uint32_t j;
@@ -1689,8 +1680,8 @@ static void npu_create_devices(struct dt_node *dn, struct npu *p)
/* We don't support MMIO PHY access yet */
dev->pl_base = NULL;
- pbcq = dt_prop_get_u32(link, "ibm,npu-pbcq");
- dev->bdfn = npu_allocate_bdfn(p, pbcq);
+ group_id = dt_prop_get_u32(link, "ibm,npu-group-id");
+ dev->bdfn = npu_allocate_bdfn(p, group_id);
/* This must be done after calling
* npu_allocate_bdfn() */
diff --git a/include/npu.h b/include/npu.h
index 778c985..800515a 100644
--- a/include/npu.h
+++ b/include/npu.h
@@ -148,6 +148,9 @@ struct npu_dev {
uint32_t procedure_status;
uint8_t pe_num;
+
+ /* Used to associate the NPU device with GPU PCI devices */
+ const char *slot_label;
};
/* NPU PHB descriptor */
diff --git a/platforms/astbmc/astbmc.h b/platforms/astbmc/astbmc.h
index 23c31c7..322282e 100644
--- a/platforms/astbmc/astbmc.h
+++ b/platforms/astbmc/astbmc.h
@@ -20,6 +20,13 @@
#define ST_LOC_PHB(chip_id, phb_idx) ((chip_id) << 16 | (phb_idx))
#define ST_LOC_DEVFN(dev, fn) ((dev) << 3 | (fn))
+/*
+ * NPU groups are used to allocate device numbers. There is a 1 to 1
+ * correlation between a NPU group and a physical GPU. Links within a group
+ * are allocated as functions within a device, so groups must be numbered
+ * sequentially starting at 0.
+ */
+#define ST_LOC_NPU_GROUP(group_id) (group_id << 3)
struct slot_table_entry {
enum slot_table_etype {
@@ -27,6 +34,7 @@ struct slot_table_entry {
st_phb,
st_pluggable_slot,
st_builtin_dev,
+ st_npu_slot
} etype;
uint32_t location;
const char *name;
diff --git a/platforms/astbmc/garrison.c b/platforms/astbmc/garrison.c
index 3ff84a3..f400a51 100644
--- a/platforms/astbmc/garrison.c
+++ b/platforms/astbmc/garrison.c
@@ -63,23 +63,13 @@ static const struct slot_table_entry garrison_phb0_3_slot[] = {
static const struct slot_table_entry garrison_npu0_slots[] = {
{
- .etype = st_pluggable_slot,
- .location = ST_LOC_DEVFN(0,0),
- .name = "GPU2",
- },
- {
- .etype = st_pluggable_slot,
- .location = ST_LOC_DEVFN(0,1),
+ .etype = st_npu_slot,
+ .location = ST_LOC_NPU_GROUP(0),
.name = "GPU2",
},
{
- .etype = st_pluggable_slot,
- .location = ST_LOC_DEVFN(1,0),
- .name = "GPU1",
- },
- {
- .etype = st_pluggable_slot,
- .location = ST_LOC_DEVFN(1,1),
+ .etype = st_npu_slot,
+ .location = ST_LOC_NPU_GROUP(1),
.name = "GPU1",
},
{ .etype = st_end },
@@ -152,23 +142,13 @@ static const struct slot_table_entry garrison_phb1_3_slot[] = {
static const struct slot_table_entry garrison_npu1_slots[] = {
{
- .etype = st_pluggable_slot,
- .location = ST_LOC_DEVFN(0,0),
- .name = "GPU4",
- },
- {
- .etype = st_pluggable_slot,
- .location = ST_LOC_DEVFN(0,1),
+ .etype = st_npu_slot,
+ .location = ST_LOC_NPU_GROUP(0),
.name = "GPU4",
},
{
- .etype = st_pluggable_slot,
- .location = ST_LOC_DEVFN(1,0),
- .name = "GPU3",
- },
- {
- .etype = st_pluggable_slot,
- .location = ST_LOC_DEVFN(1,1),
+ .etype = st_npu_slot,
+ .location = ST_LOC_NPU_GROUP(1),
.name = "GPU3",
},
{ .etype = st_end },
@@ -233,7 +213,7 @@ static const struct slot_table_entry garrison_phb_table[] = {
#define NPU_INDIRECT0 0x8000000008010c3f
#define NPU_INDIRECT1 0x8000000008010c7f
-static void create_link(struct dt_node *npu, struct dt_node *pbcq, int index)
+static void create_link(struct dt_node *npu, int group, int index)
{
struct dt_node *link;
uint32_t lane_mask;
@@ -255,12 +235,12 @@ static void create_link(struct dt_node *npu, struct dt_node *pbcq, int index)
}
dt_add_property_u64s(link, "ibm,npu-phy", phy);
dt_add_property_cells(link, "ibm,npu-lane-mask", lane_mask);
- dt_add_property_cells(link, "ibm,npu-pbcq", pbcq->phandle);
+ dt_add_property_cells(link, "ibm,npu-group-id", group);
}
static void dt_create_npu(void)
{
- struct dt_node *xscom, *npu, *pbcq;
+ struct dt_node *xscom, *npu;
char namebuf[32];
dt_for_each_compatible(dt_root, xscom, "ibm,xscom") {
@@ -275,17 +255,12 @@ static void dt_create_npu(void)
dt_add_property_cells(npu, "ibm,npu-index", 0);
dt_add_property_cells(npu, "ibm,npu-links", 4);
- /* On Garrison we have 2 links per GPU device. The
- * first 2 links go to the GPU connected via
- * pbcq at 2012c00 the second two via pbcq at 2012800. */
- pbcq = dt_find_by_name(xscom, "pbcq at 2012c00");
- assert(pbcq);
- create_link(npu, pbcq, 0);
- create_link(npu, pbcq, 1);
- pbcq = dt_find_by_name(xscom, "pbcq at 2012800");
- assert(pbcq);
- create_link(npu, pbcq, 4);
- create_link(npu, pbcq, 5);
+ /* On Garrison we have 2 links per GPU device. These are
+ * grouped together as per the slot tables above. */
+ create_link(npu, 0, 0);
+ create_link(npu, 0, 1);
+ create_link(npu, 1, 4);
+ create_link(npu, 1, 5);
}
}
diff --git a/platforms/astbmc/slots.c b/platforms/astbmc/slots.c
index 36547e1..678a3cc 100644
--- a/platforms/astbmc/slots.c
+++ b/platforms/astbmc/slots.c
@@ -54,6 +54,7 @@ static const struct slot_table_entry *match_slot_dev_entry(struct phb *phb,
struct pci_device *pd)
{
const struct slot_table_entry *parent, *ent;
+ uint32_t bdfn;
/* Find a parent recursively */
if (pd->parent)
@@ -70,7 +71,14 @@ static const struct slot_table_entry *match_slot_dev_entry(struct phb *phb,
prerror("SLOT: Bad PHB entry type in table !\n");
continue;
}
- if (ent->location == (pd->bdfn & 0xff))
+
+ /* NPU slots match on device, not function */
+ if (ent->etype == st_npu_slot)
+ bdfn = pd->bdfn & 0xf8;
+ else
+ bdfn = pd->bdfn & 0xff;
+
+ if (ent->location == bdfn)
return ent;
}
return NULL;
--
2.9.0
More information about the Skiboot
mailing list