[Skiboot] [PATCH v2 2/2] nvlink: Allocate and associate NPUs using slots instead of pbcq

Russell Currey ruscur at russell.cc
Wed Jun 22 15:22:57 AEST 2016


On Wed, 2016-06-22 at 12:20 +1000, Alistair Popple wrote:
> Thanks Russell, a few comments below.
> 
> On Tue, 21 Jun 2016 18:04:48 Russell Currey wrote:
> > NPU links (npu_dev) need to be associated with their corresponding
> > physical GPU's PCI devices.  On Garrison systems, this works by comparing
> > the PBCQ handle of the NPU and GPU.  However, this will fail if a system
> > has multiple GPUs behind a single PHB.  Instead, associate GPUs and NPUs
> > by comparing slot locations.
> > 
> > Signed-off-by: Russell Currey <ruscur at russell.cc>
> > ---
> > V2: Change NPU device allocation to completely remove the pbcq code
> > ---
> >  hw/npu.c                    | 32 ++++++++++++++++++--------------
> >  include/npu.h               |  3 +++
> >  platforms/astbmc/garrison.c | 23 +++++++++--------------
> >  3 files changed, 30 insertions(+), 28 deletions(-)
> > 
> > diff --git a/hw/npu.c b/hw/npu.c
> > index f535f8a..5ced560 100644
> > --- a/hw/npu.c
> > +++ b/hw/npu.c
> > @@ -30,6 +30,7 @@
> >  #include <npu-regs.h>
> >  #include <npu.h>
> >  #include <xscom.h>
> > +#include <string.h>
> >  
> >  /*
> >   * Terminology:
> > @@ -503,25 +504,25 @@ static int __npu_dev_bind_pci_dev(struct phb *phb 
> __unused,
> >  {
> >  	struct npu_dev *dev = data;
> >  	struct dt_node *pci_dt_node;
> > -	uint32_t npu_npcq_phandle;
> > +	char *pcislot;
> >  
> >  	/* Ignore non-nvidia PCI devices */
> >  	if ((pd->vdid & 0xffff) != 0x10de)
> >  		return 0;
> >  
> > -	/* Find the PCI devices pbcq */
> > +	/* Find the PCI device's slot location */
> >  	for (pci_dt_node = pd->dn->parent;
> 
> This starts the search for "ibm,slot-label" at a GPU's parent device-node. 
> Shouldn't it start at pd->dn instead? This was an optimisation when searching 
> for pbcq which at a minimum would be in a GPU's parent device-node, but is 
> that true for slot-label as well?

No, I was lazy and just replaced the property name.  It should definitely start
at pd->dn.

> 
> > -	     pci_dt_node && !dt_find_property(pci_dt_node, "ibm,pbcq");
> > -	     pci_dt_node = pci_dt_node->parent);
> > +             pci_dt_node && !dt_find_property(pci_dt_node, "ibm,slot-
> label");
> > +             pci_dt_node = pci_dt_node->parent);
> >  
> >  	if (!pci_dt_node)
> >  		return 0;
> >  
> > -	npu_npcq_phandle = dt_prop_get_u32(dev->dt_node, "ibm,npu-pbcq");
> > +	pcislot = (char *)dt_prop_get(pci_dt_node, "ibm,slot-label");
> >  
> > -	if (dt_prop_get_u32(pci_dt_node, "ibm,pbcq") == npu_npcq_phandle &&
> > -	    (pd->vdid & 0xffff) == 0x10de)
> > -			return 1;
> > +	if (streq(pcislot, dev->slot_label)) {
> > +		return 1;
> > +	}
> >  
> >  	return 0;
> >  }
> > @@ -604,6 +605,9 @@ static int npu_dn_fixup(struct phb *phb,
> >  	if (dev->phb || dev->pd)
> >  		return 0;
> >  
> > +	/* NPU devices require a slot location to associate with GPUs */
> > +	dev->slot_label = dt_prop_get(pd->dn, "ibm,slot-label");
> > +
> >  	/* Bind the emulated PCI device with the real one, which can't
> >  	 * be done until the PCI devices are populated. Once the real
> >  	 * PCI device is identified, we also need fix the device-tree
> > @@ -1619,20 +1623,20 @@ static void npu_dev_create_cfg(struct npu_dev *dev)
> >  		NPU_DEV_CFG_INIT_RO(dev, PCI_CFG_INT_LINE, 4, 0x00000200);
> >  }
> >  
> > -static uint32_t npu_allocate_bdfn(struct npu *p, uint32_t pbcq)
> > +static uint32_t npu_allocate_bdfn(struct npu *p, int group)
> >  {
> >  	int i;
> >  	int dev = -1;
> >  	int bdfn = -1;
> >  
> > -	/* Find the highest function number alloacted to emulated PCI
> > +	/* Find the highest function number allocated to emulated PCI
> >  	 * devices associated with this GPU. */
> >  	for(i = 0; i < p->total_devices; i++) {
> >  		int dev_bdfn = p->devices[i].bdfn;
> >  		dev = MAX(dev, dev_bdfn & 0xf8);
> >  
> >  		if (dt_prop_get_u32(p->devices[i].dt_node,
> > -				    "ibm,npu-pbcq") == pbcq)
> > +				    "ibm,npu-group-id") == group)
> 
> To fix the comment on the previous patch you could allocate device numbers 
> based on group-id and we just mandate that each slot_table_entry has 
> sequentially numbered groups starting at 0.

That seems easier.

> 
> >  			bdfn = MAX(bdfn, dev_bdfn);
> >  	}
> >  
> > @@ -1674,7 +1678,7 @@ static void npu_create_devices(struct dt_node *dn, 
> struct npu *p)
> >  	p->phb.scan_map = 0;
> >  	dt_for_each_compatible(npu_dn, link, "ibm,npu-link") {
> >  		struct npu_dev_bar *bar;
> > -		uint32_t pbcq;
> > +		uint32_t group_id;
> >  		uint64_t val;
> >  		uint32_t j;
> >  
> > @@ -1689,8 +1693,8 @@ static void npu_create_devices(struct dt_node *dn, 
> struct npu *p)
> >  		/* We don't support MMIO PHY access yet */
> >  		dev->pl_base = NULL;
> >  
> > -		pbcq = dt_prop_get_u32(link, "ibm,npu-pbcq");
> > -		dev->bdfn = npu_allocate_bdfn(p, pbcq);
> > +		group_id = dt_prop_get_u32(link, "ibm,npu-group-id");
> 
> The alternate solution is to store group_id in dev->group_id and compare slot 
> st_npu_slot locations using dev->group_id instead.

Storing it in a struct instead of going in and out of the device tree is nice,
which as I mentioned before I'd rather restrict to npu structs...I'll do some
refactoring and see how it turns out.

> 
> > +		dev->bdfn = npu_allocate_bdfn(p, group_id);
> >  
> >  		/* This must be done after calling
> >  		 * npu_allocate_bdfn() */
> > diff --git a/include/npu.h b/include/npu.h
> > index 778c985..800515a 100644
> > --- a/include/npu.h
> > +++ b/include/npu.h
> > @@ -148,6 +148,9 @@ struct npu_dev {
> >  	uint32_t		procedure_status;
> >  
> >  	uint8_t			pe_num;
> > +
> > +	/* Used to associate the NPU device with GPU PCI devices */
> > +	const char		*slot_label;
> >  };
> >  
> >  /* NPU PHB descriptor */
> > diff --git a/platforms/astbmc/garrison.c b/platforms/astbmc/garrison.c
> > index fb006db..f400a51 100644
> > --- a/platforms/astbmc/garrison.c
> > +++ b/platforms/astbmc/garrison.c
> > @@ -213,7 +213,7 @@ static const struct slot_table_entry 
> garrison_phb_table[] = {
> >  #define NPU_INDIRECT0	0x8000000008010c3f
> >  #define NPU_INDIRECT1	0x8000000008010c7f
> >  
> > -static void create_link(struct dt_node *npu, struct dt_node *pbcq, int 
> index)
> > +static void create_link(struct dt_node *npu, int group, int index)
> >  {
> >  	struct dt_node *link;
> >  	uint32_t lane_mask;
> > @@ -235,12 +235,12 @@ static void create_link(struct dt_node *npu, struct 
> dt_node *pbcq, int index)
> >  	}
> >  	dt_add_property_u64s(link, "ibm,npu-phy", phy);
> >  	dt_add_property_cells(link, "ibm,npu-lane-mask", lane_mask);
> > -	dt_add_property_cells(link, "ibm,npu-pbcq", pbcq->phandle);
> > +	dt_add_property_cells(link, "ibm,npu-group-id", group);
> >  }
> >  
> >  static void dt_create_npu(void)
> >  {
> > -        struct dt_node *xscom, *npu, *pbcq;
> > +        struct dt_node *xscom, *npu;
> >          char namebuf[32];
> >  
> >  	dt_for_each_compatible(dt_root, xscom, "ibm,xscom") {
> > @@ -255,17 +255,12 @@ static void dt_create_npu(void)
> >  		dt_add_property_cells(npu, "ibm,npu-index", 0);
> >  		dt_add_property_cells(npu, "ibm,npu-links", 4);
> >  
> > -		/* On Garrison we have 2 links per GPU device. The
> > -		 * first 2 links go to the GPU connected via
> > -		 * pbcq at 2012c00 the second two via pbcq at 2012800. */
> > -		pbcq = dt_find_by_name(xscom, "pbcq at 2012c00");
> > -		assert(pbcq);
> > -		create_link(npu, pbcq, 0);
> > -		create_link(npu, pbcq, 1);
> > -		pbcq = dt_find_by_name(xscom, "pbcq at 2012800");
> > -		assert(pbcq);
> > -		create_link(npu, pbcq, 4);
> > -		create_link(npu, pbcq, 5);
> > +		/* On Garrison we have 2 links per GPU device.  These are
> > +		 * grouped together as per the slot tables above. */
> > +		create_link(npu, 0, 0);
> > +		create_link(npu, 0, 1);
> > +		create_link(npu, 1, 4);
> > +		create_link(npu, 1, 5);
> >  	}
> >  }
> >  
> > 



More information about the Skiboot mailing list