[Skiboot] [PATCH] npu2: Use phys-map to get MMIO BARs

Alistair Popple alistair at popple.id.au
Fri Jun 30 10:38:24 AEST 2017


On Thu, 29 Jun 2017 03:14:12 PM Michael Neuling wrote:
> To be honest, there is a bunch of abstraction here that can probably go away now
> at that we have phys_map_get.  I'm not really sure assign_mmio_bars() adds
> anything now we have phys_map_get().  It seems to make the code harder to trace
> IMHO.

Right, I tend to agree. I'd like to go through and really clean up all the BAR
code. A lot of the abstractions come from the original code which supports
rewritting/remapping the BARs from the OS via the emulated PCIe device.

However it's not obvious that has ever actually worked as the OS doesn't
actually rewrite the BARs. Removing this would simplify the code a lot as we
wouldn't have to keep a cache of the BARs.

- Alistair

> Mikey
> 
> On Mon, 2017-06-26 at 16:57 +1000, Andrew Donnellan wrote:
> > Commit bdea201a4c4b ("hw/npu2.c: Use phys-map to get GPU memory BARs")
> > added use of phys-map for setting GPU memory BARs.
> > 
> > Move the MMIO BARs over to using phys-map as well.
> > 
> > Acked-by: Alistair Popple <alistair at popple.id.au>
> > Signed-off-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
> > ---
> > v2-v3: use dev->index rather than macros when setting npu2_bar->index for 
> > NTL bars
> > 
> > ---
> >  hw/npu2.c      | 96 ++++++++++++++++++++++++++++-----------------------------
> > -
> >  include/npu2.h |  4 +++
> >  2 files changed, 50 insertions(+), 50 deletions(-)
> > 
> > diff --git a/hw/npu2.c b/hw/npu2.c
> > index 8786cc1e..be9f55c1 100644
> > --- a/hw/npu2.c
> > +++ b/hw/npu2.c
> > @@ -145,6 +145,11 @@ static struct npu2_dev *npu2_bdf_to_dev(struct npu2 *p,
> >  	return NULL;
> >  }
> >  
> > +static inline void npu2_get_bar(uint32_t gcid, struct npu2_bar *bar)
> > +{
> > +	phys_map_get(gcid, bar->type, bar->index, &bar->base, &bar->size);
> > +}
> > +
> >  static void npu2_read_bar(struct npu2 *p, struct npu2_bar *bar)
> >  {
> >  	uint64_t reg, val;
> > @@ -1101,55 +1106,45 @@ static const struct phb_ops npu_ops = {
> >  
> >  static void assign_mmio_bars(uint64_t gcid, uint32_t scom, uint64_t reg[2],
> > uint64_t mm_win[2])
> >  {
> > -	uint64_t mem_start;
> >  	uint32_t i;
> >  	struct npu2_bar *bar;
> >  	struct npu2_bar npu2_bars[] = {
> > -		{ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2,
> > 0,   NPU2_PHY_BAR), .size = 0x1000000,
> > +		/*
> > +		 * NPU_REGS must be first in this list, at least on DD1.
> > +		 * On DD2, stack 0 will be used for NPU_REGS, stack 1/2 for
> > NPU_PHY.
> > +		 */
> > +		{ .type = NPU_REGS, .index = 0,
> > +		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_PHY_BAR),
> >  		  .flags = NPU2_BAR_FLAG_ENABLED },
> > -		{ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0,
> > 0,   NPU2_PHY_BAR), .size =  0x200000,
> > +		{ .type = NPU_PHY, .index = 0,
> > +		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_PHY_BAR),
> >  		  .flags = NPU2_BAR_FLAG_ENABLED },
> > -		{ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1,
> > 0,   NPU2_PHY_BAR), .size =  0x200000,
> > +		{ .type = NPU_PHY, .index = 1,
> > +		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_PHY_BAR),
> >  		  .flags = NPU2_BAR_FLAG_ENABLED },
> > -		{ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0,
> > 0,  NPU2_NTL0_BAR), .size =   0x20000 },
> > -		{ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0,
> > 0,  NPU2_NTL1_BAR), .size =   0x20000 },
> > -		{ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1,
> > 0,  NPU2_NTL0_BAR), .size =   0x20000 },
> > -		{ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1,
> > 0,  NPU2_NTL1_BAR), .size =   0x20000 },
> > -		{ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2,
> > 0,  NPU2_NTL0_BAR), .size =   0x20000 },
> > -		{ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2,
> > 0,  NPU2_NTL1_BAR), .size =   0x20000 },
> > -		{ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0,
> > NPU2_GENID_BAR), .size =   0x20000 },
> > -		{ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0,
> > NPU2_GENID_BAR), .size =   0x20000 },
> > -		{ .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0,
> > NPU2_GENID_BAR), .size =   0x20000 },
> > +		{ .type = NPU_NTL, .index = 0,
> > +		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_NTL0_BAR)
> > },
> > +		{ .type = NPU_NTL, .index = 1,
> > +		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_NTL1_BAR)
> > },
> > +		{ .type = NPU_NTL, .index = 2,
> > +		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_NTL0_BAR)
> > },
> > +		{ .type = NPU_NTL, .index = 3,
> > +		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_NTL1_BAR)
> > },
> > +		{ .type = NPU_NTL, .index = 4,
> > +		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_NTL0_BAR)
> > },
> > +		{ .type = NPU_NTL, .index = 5,
> > +		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_NTL1_BAR)
> > },
> > +		{ .type = NPU_GENID, .index = 0,
> > +		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0,
> > NPU2_GENID_BAR) },
> > +		{ .type = NPU_GENID, .index = 1,
> > +		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0,
> > NPU2_GENID_BAR) },
> > +		{ .type = NPU_GENID, .index = 2,
> > +		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0,
> > NPU2_GENID_BAR) },
> >  	};
> >  
> > -	mem_start = 0x6030200000000;
> > -	mem_start |= gcid << PPC_BITLSHIFT(21);
> > -
> > -	/*
> > -	 * We're going to assign the BARs in reversed order according
> > -	 * to their sizes, just like the order we have in npu_bars[].
> > -	 * In that way, all BARs will be aligned perfectly without
> > -	 * wasting resources. Also, the Linux kernel won't change
> > -	 * anything though it attempts to reassign the BARs that
> > -	 * it can see, which are NTL and GENID BARs.
> > -	 *
> > -	 * GLOBAL MMIO (16MB)
> > -	 *        PHY0 (2MB)
> > -	 *        PHB1 (2MB)
> > -	 *        NTL0 (128KB)
> > -	 *        NTL1 (128KB)
> > -	 *        NTL2 (128KB)
> > -	 *        NTL3 (128KB)
> > -	 *        NTL4 (128KB)
> > -	 *        NTL5 (128KB)
> > -	 *      GENID0 (128KB)
> > -	 *      GENID1 (128KB)
> > -	 *      GENID2 (128KB)
> > -	 */
> >  	for (i = 0; i < ARRAY_SIZE(npu2_bars); i++) {
> >  		bar = &npu2_bars[i];
> > -		bar->base = mem_start;
> > -		mem_start += bar->size;
> > +		npu2_get_bar(gcid, bar);
> >  		npu2_write_bar(NULL, bar, gcid, scom);
> >  	}
> >  
> > @@ -1442,6 +1437,7 @@ static void npu2_populate_devices(struct npu2 *p,
> >  	struct npu2_dev *dev;
> >  	struct dt_node *npu2_dn, *link;
> >  	uint32_t npu_phandle, index = 0;
> > +	int stack;
> >  
> >  	/*
> >  	 * Get the npu node which has the links which we expand here
> > @@ -1474,23 +1470,23 @@ static void npu2_populate_devices(struct npu2 *p,
> >  		dev->pl_xscom_base = dt_prop_get_u64(link, "ibm,npu-phy");
> >  		dev->lane_mask = dt_prop_get_u32(link, "ibm,npu-lane-mask");
> >  
> > -		/* Populate BARs. BAR0/1 is the NTL bar. We initialise
> > -		 * it from the HW. */
> > +		/* Populate BARs. BAR0/1 is the NTL bar. */
> > +		stack = NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev);
> >  		npu2_bar = &dev->bars[0].npu2_bar;
> > -		if (NPU2DEV_BRICK(dev) == 0)
> > -			/* Leave the block as 0 - the read/write bar
> > -			 * functions fill it in */
> > -			npu2_bar->reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 +
> > NPU2DEV_STACK(dev), 0, NPU2_NTL0_BAR);
> > -		else
> > -			npu2_bar->reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 +
> > NPU2DEV_STACK(dev), 0, NPU2_NTL1_BAR);
> > +		npu2_bar->type = NPU_NTL;
> > +		npu2_bar->index = dev->index;
> > +		npu2_bar->reg = NPU2_REG_OFFSET(stack, 0, NPU2DEV_BRICK(dev)
> > == 0 ?
> > +						NPU2_NTL0_BAR :
> > NPU2_NTL1_BAR);
> > +	        npu2_get_bar(p->chip_id, npu2_bar);
> >  
> > -		npu2_read_bar(p, npu2_bar);
> >  		dev->bars[0].flags = PCI_CFG_BAR_TYPE_MEM |
> > PCI_CFG_BAR_MEM64;
> >  
> >  		/* BAR2/3 is the GENID bar. */
> >  		npu2_bar = &dev->bars[1].npu2_bar;
> > -		npu2_bar->reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 +
> > NPU2DEV_STACK(dev), 0, NPU2_GENID_BAR);
> > -		npu2_read_bar(p, npu2_bar);
> > +		npu2_bar->type = NPU_GENID;
> > +		npu2_bar->index = NPU2DEV_STACK(dev);
> > +		npu2_bar->reg = NPU2_REG_OFFSET(stack, 0, NPU2_GENID_BAR);
> > +	        npu2_get_bar(p->chip_id, npu2_bar);
> >  
> >  		/* The GENID is a single physical BAR that we split
> >  		 * for each emulated device */
> > diff --git a/include/npu2.h b/include/npu2.h
> > index 6476c729..88ebe9e1 100644
> > --- a/include/npu2.h
> > +++ b/include/npu2.h
> > @@ -17,6 +17,8 @@
> >  #ifndef __NPU2_H
> >  #define __NPU2_H
> >  
> > +#include <phys-map.h>
> > +
> >  /* Debugging options */
> >  #define NPU2DBG(p, fmt, a...)	prlog(PR_DEBUG, "NPU%d: " fmt, \
> >  				      (p)->phb.opal_id, ##a)
> > @@ -45,6 +47,8 @@
> >   * emulated PCIe BARs. The is a subtle difference between the two as
> >   * not all BARs are exposed outside of skiboot. */
> >  struct npu2_bar {
> > +	enum phys_map_type	type;
> > +	int			index;
> >  #define NPU2_BAR_FLAG_ENABLED	0x0010
> >  
> >  /* Generation ID's are a single space in the hardware but we split



More information about the Skiboot mailing list