[Skiboot] [PATCH v3 02/10] npu2: Rework NPU data structures for OpenCAPI

Fri Feb 2 18:23:54 AEDT 2018

On 02/02/18 16:46, Alistair Popple wrote:
> Hi Andrew,
> 
> I'm just curious ... given that we aren't really sharing much code what is
> gained from sharing data structures?
> 
> How many fields in struct npu2_dev_nvlink, npu2_dev, npu2 are actually shared
> vs. exclusively used for NVLink or OpenCAPI? Which fields are used for NVLink,
> which for OpenCAPI and which are common to both?
> 
> I'm a little worried that as the code evolves it will become difficult to work
> out which fields are used by what (and how). Given there isn't code sharing
> sharing perhaps it would be best to not share the structs? Although I'm
> interested in your thoughts.

Yeah, I admit this is a bit messier than I'd like it to be.

Whether you're an OpenCAPI or an NVLink device, you're going to have a 
device tree node, BARs, an NPU pointer, a SCOM base, a lane mask, and 
the hw procedure tracking. NVLink has a IODA cache at the NPU level and 
the pci-virt stuff at the device level. OCAPI has a PHB per device.

Obviously when we eventually support a mixed NVLink/OCAPI setup on the 
same NPU we're going to need to share whole-npu data somehow.

The hw procedures code is shared and takes lots of npu2_dev, we'd have 
to refactor all of that. I know you've suggested splitting the stuff 
that's relevant to hw procedures out of npu2_dev into a separate 
npu2_brick or npu2_link or something, though that'd also involve a fair 
bit of code churn.

I'd like to get this series in sooner rather than later especially given 
the corresponding kernel driver has already been merged, so I'd like to 
avoid as much code churn as possible unless it's obviously better.

Andrew

> 
> - Alistair
> 
> On Wednesday, 31 January 2018 8:34:42 PM AEDT Andrew Donnellan wrote:
>> Unlike NVLink, OpenCAPI registers a separate PHB for each device, in order
>> to allow us to force Linux to use the correct MMIO windows for each NPU
>> link. This requires some reworking of NPU data structures to account for
>> the fact that a PHB could correspond to either an NPU (NVLink) or a single
>> link (OpenCAPI).
>>
>> At some later point, we may want to rework the NVLink code to present a
>> separate PHB per device in order to simplify this. For now, we split
>> NVLink-specific device data into a separate struct in order to make it
>> clear which fields are NVLink-only.
>>
>> Additionally, add helper functions to correctly translate between
>> OpenCAPI/NVLink PHBs and the underlying structures, and various fields
>> for OpenCAPI data that we're going to need later on.
>>
>> Signed-off-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
>>
>> ---
>>
>> v1->v2:
>>
>>    - rebase on master
>>    - add fields for i2c port
>> ---
>>   hw/npu2-hw-procedures.c |   6 +-
>>   hw/npu2.c               | 129 +++++++++++++++++++++--------------------
>>   include/npu2.h          |  76 ++++++++++++++++++------
>>   include/pci.h           |   1 +-
>>   4 files changed, 129 insertions(+), 83 deletions(-)
>>
>> diff --git a/hw/npu2-hw-procedures.c b/hw/npu2-hw-procedures.c
>> index b21c399..d66632f 100644
>> --- a/hw/npu2-hw-procedures.c
>> +++ b/hw/npu2-hw-procedures.c
>> @@ -790,7 +790,7 @@ static int64_t npu_dev_procedure_read(struct npu2_dev *dev, uint32_t offset,
>>   
>>   	if (size != 4) {
>>   		/* Short config reads are not supported */
>> -		prlog(PR_ERR, "NPU%d: Short read of procedure register\n", dev->npu->phb.opal_id);
>> +		prlog(PR_ERR, "NPU%d: Short read of procedure register\n", npu2_dev_to_phb(dev)->opal_id);
>>   		return OPAL_PARAMETER;
>>   	}
>>   
>> @@ -812,7 +812,7 @@ static int64_t npu_dev_procedure_read(struct npu2_dev *dev, uint32_t offset,
>>   
>>   	default:
>>   		prlog(PR_ERR, "NPU%d: Invalid vendor specific offset 0x%08x\n",
>> -		      dev->npu->phb.opal_id, offset);
>> +		      npu2_dev_to_phb(dev)->opal_id, offset);
>>   		rc = OPAL_PARAMETER;
>>   	}
>>   
>> @@ -828,7 +828,7 @@ static int64_t npu_dev_procedure_write(struct npu2_dev *dev, uint32_t offset,
>>   	if (size != 4) {
>>   		/* Short config writes are not supported */
>>   		prlog(PR_ERR, "NPU%d: Short read of procedure register\n",
>> -		      dev->npu->phb.opal_id);
>> +		      npu2_dev_to_phb(dev)->opal_id);
>>   		return OPAL_PARAMETER;
>>   	}
>>   
>> diff --git a/hw/npu2.c b/hw/npu2.c
>> index 41c30f4..18bd63d 100644
>> --- a/hw/npu2.c
>> +++ b/hw/npu2.c
>> @@ -67,16 +67,16 @@
>>   /* Set a specific flag in the vendor config space */
>>   void npu2_set_link_flag(struct npu2_dev *ndev, uint8_t flag)
>>   {
>> -	ndev->link_flags |= flag;
>> -	PCI_VIRT_CFG_INIT_RO(ndev->pvd, VENDOR_CAP_START +
>> -			     VENDOR_CAP_PCI_DEV_OFFSET, 1, ndev->link_flags);
>> +	ndev->nvlink.link_flags |= flag;
>> +	PCI_VIRT_CFG_INIT_RO(ndev->nvlink.pvd, VENDOR_CAP_START +
>> +			     VENDOR_CAP_PCI_DEV_OFFSET, 1, ndev->nvlink.link_flags);
>>   }
>>   
>>   void npu2_clear_link_flag(struct npu2_dev *ndev, uint8_t flag)
>>   {
>> -	ndev->link_flags &= ~flag;
>> -	PCI_VIRT_CFG_INIT_RO(ndev->pvd, VENDOR_CAP_START +
>> -			     VENDOR_CAP_PCI_DEV_OFFSET, 1, ndev->link_flags);
>> +	ndev->nvlink.link_flags &= ~flag;
>> +	PCI_VIRT_CFG_INIT_RO(ndev->nvlink.pvd, VENDOR_CAP_START +
>> +			     VENDOR_CAP_PCI_DEV_OFFSET, 1, ndev->nvlink.link_flags);
>>   }
>>   
>>   static inline void npu2_ioda_sel(struct npu2 *p, uint32_t table,
>> @@ -97,7 +97,7 @@ static struct npu2_dev *npu2_bdf_to_dev(struct npu2 *p,
>>   	if (bdfn & ~0xff)
>>   		return NULL;
>>   
>> -	pvd = pci_virt_find_device(&p->phb, bdfn);
>> +	pvd = pci_virt_find_device(&p->phb_nvlink, bdfn);
>>   	if (pvd)
>>   		return pvd->data;
>>   
>> @@ -270,7 +270,7 @@ static int64_t npu2_cfg_write_bar(struct npu2_dev *dev,
>>   				  uint32_t offset, uint32_t size,
>>   				  uint32_t data)
>>   {
>> -	struct pci_virt_device *pvd = dev->pvd;
>> +	struct pci_virt_device *pvd = dev->nvlink.pvd;
>>   	struct npu2_pcie_bar *bar = (struct npu2_pcie_bar *) pcrf->data;
>>   	struct npu2_bar old_bar, *npu2_bar = &bar->npu2_bar;
>>   	uint32_t pci_cmd;
>> @@ -343,7 +343,7 @@ static int64_t npu2_dev_cfg_exp_devcap(void *dev,
>>   	if ((size != 2) || (offset & 1)) {
>>   		/* Short config writes are not supported */
>>   		prlog(PR_ERR, "NPU%d: Unsupported write to pcie control register\n",
>> -		      ndev->phb->opal_id);
>> +		      ndev->nvlink.phb->opal_id);
>>   		return OPAL_PARAMETER;
>>   	}
>>   
>> @@ -407,9 +407,9 @@ static int __npu2_dev_bind_pci_dev(struct phb *phb __unused,
>>   	pcislot = (char *)dt_prop_get(pci_dt_node, "ibm,slot-label");
>>   
>>   	prlog(PR_DEBUG, "NPU2: comparing GPU '%s' and NPU2 '%s'\n",
>> -	      pcislot, dev->slot_label);
>> +	      pcislot, dev->nvlink.slot_label);
>>   
>> -	if (streq(pcislot, dev->slot_label))
>> +	if (streq(pcislot, dev->nvlink.slot_label))
>>   		return 1;
>>   
>>   	return 0;
>> @@ -420,20 +420,20 @@ static void npu2_dev_bind_pci_dev(struct npu2_dev *dev)
>>   	struct phb *phb;
>>   	uint32_t i;
>>   
>> -	if (dev->pd)
>> +	if (dev->nvlink.pd)
>>   		return;
>>   
>>   	for (i = 0; i < 64; i++) {
>> -		if (dev->npu->phb.opal_id == i)
>> +		if (dev->npu->phb_nvlink.opal_id == i)
>>   			continue;
>>   
>>   		phb = pci_get_phb(i);
>>   		if (!phb)
>>   			continue;
>>   
>> -		dev->pd = pci_walk_dev(phb, NULL, __npu2_dev_bind_pci_dev, dev);
>> -		if (dev->pd) {
>> -			dev->phb = phb;
>> +		dev->nvlink.pd = pci_walk_dev(phb, NULL, __npu2_dev_bind_pci_dev, dev);
>> +		if (dev->nvlink.pd) {
>> +			dev->nvlink.phb = phb;
>>   			/* Found the device, set the bit in config space */
>>   			npu2_set_link_flag(dev, NPU2_DEV_PCI_LINKED);
>>   			return;
>> @@ -441,7 +441,7 @@ static void npu2_dev_bind_pci_dev(struct npu2_dev *dev)
>>   	}
>>   
>>   	prlog(PR_INFO, "%s: No PCI device for NPU2 device %04x:00:%02x.0 to bind to. If you expect a GPU to be there, this is a problem.\n",
>> -	      __func__, dev->npu->phb.opal_id, dev->index);
>> +	      __func__, dev->npu->phb_nvlink.opal_id, dev->index);
>>   }
>>   
>>   static struct lock pci_npu_phandle_lock = LOCK_UNLOCKED;
>> @@ -628,14 +628,14 @@ static int npu2_dn_fixup(struct phb *phb,
>>   			 struct pci_device *pd,
>>   			 void *data __unused)
>>   {
>> -	struct npu2 *p = phb_to_npu2(phb);
>> +	struct npu2 *p = phb_to_npu2_nvlink(phb);
>>   	struct npu2_dev *dev;
>>   	uint32_t speed;
>>   	const char *label;
>>   
>>   	dev = npu2_bdf_to_dev(p, pd->bdfn);
>>   	assert(dev);
>> -	if (dev->phb || dev->pd)
>> +	if (dev->nvlink.phb || dev->nvlink.pd)
>>   		return 0;
>>   
>>   	npu2_assign_gmb(dev);
>> @@ -675,7 +675,7 @@ static int npu2_dn_fixup(struct phb *phb,
>>   		return 0;
>>   	}
>>   
>> -	dev->slot_label = label;
>> +	dev->nvlink.slot_label = label;
>>   
>>   	/*
>>   	 * Bind the emulated PCI device with the real one, which can't
>> @@ -684,14 +684,14 @@ static int npu2_dn_fixup(struct phb *phb,
>>   	 * for it
>>   	 */
>>   	npu2_dev_bind_pci_dev(dev);
>> -	if (dev->phb && dev->pd && dev->pd->dn) {
>> -		if (dt_find_property(dev->pd->dn, "ibm,npu"))
>> -			npu2_append_phandle(dev->pd->dn, pd->dn->phandle);
>> +	if (dev->nvlink.phb && dev->nvlink.pd && dev->nvlink.pd->dn) {
>> +		if (dt_find_property(dev->nvlink.pd->dn, "ibm,npu"))
>> +			npu2_append_phandle(dev->nvlink.pd->dn, pd->dn->phandle);
>>   		else
>> -			dt_add_property_cells(dev->pd->dn, "ibm,npu", pd->dn->phandle);
>> +			dt_add_property_cells(dev->nvlink.pd->dn, "ibm,npu", pd->dn->phandle);
>>   
>> -		dt_add_property_cells(pd->dn, "ibm,gpu", dev->pd->dn->phandle);
>> -		dev->gpu_bdfn = dev->pd->bdfn;
>> +		dt_add_property_cells(pd->dn, "ibm,gpu", dev->nvlink.pd->dn->phandle);
>> +		dev->nvlink.gpu_bdfn = dev->nvlink.pd->bdfn;
>>   	}
>>   
>>   	return 0;
>> @@ -739,7 +739,7 @@ static void npu2_init_ioda_cache(struct npu2 *p)
>>   
>>   static int64_t npu2_ioda_reset(struct phb *phb, bool purge)
>>   {
>> -	struct npu2 *p = phb_to_npu2(phb);
>> +	struct npu2 *p = phb_to_npu2_nvlink(phb);
>>   	uint32_t i;
>>   
>>   	if (purge) {
>> @@ -814,7 +814,7 @@ static void npu2_hw_init(struct npu2 *p)
>>   {
>>   	uint64_t val;
>>   
>> -	npu2_ioda_reset(&p->phb, false);
>> +	npu2_ioda_reset(&p->phb_nvlink, false);
>>   
>>   	/* Enable XTS retry mode */
>>   	val = npu2_read(p, NPU2_XTS_CFG);
>> @@ -881,7 +881,7 @@ static int64_t npu2_map_pe_dma_window_real(struct phb *phb,
>>   					   uint64_t pci_start_addr,
>>   					   uint64_t pci_mem_size)
>>   {
>> -	struct npu2 *p = phb_to_npu2(phb);
>> +	struct npu2 *p = phb_to_npu2_nvlink(phb);
>>   	uint64_t end;
>>   	uint64_t tve;
>>   
>> @@ -939,7 +939,7 @@ static int64_t npu2_map_pe_dma_window(struct phb *phb,
>>   				      uint64_t tce_table_size,
>>   				      uint64_t tce_page_size)
>>   {
>> -	struct npu2 *p = phb_to_npu2(phb);
>> +	struct npu2 *p = phb_to_npu2_nvlink(phb);
>>   	uint64_t tts_encoded;
>>   	uint64_t data64 = 0;
>>   
>> @@ -1008,7 +1008,7 @@ static int64_t npu2_set_pe(struct phb *phb,
>>   			   uint8_t fcompare,
>>   			   uint8_t action)
>>   {
>> -	struct npu2 *p = phb_to_npu2(phb);
>> +	struct npu2 *p;
>>   	struct npu2_dev *dev;
>>   	uint64_t reg, val;
>>   
>> @@ -1025,13 +1025,19 @@ static int64_t npu2_set_pe(struct phb *phb,
>>   		return OPAL_UNSUPPORTED;
>>   
>>   	/* Get the NPU2 device */
>> -	dev = npu2_bdf_to_dev(p, bdfn);
>> +	if (phb->phb_type == phb_type_npu_v2_opencapi) {
>> +		dev = phb_to_npu2_dev_ocapi(phb);
>> +		p = dev->npu;
>> +	} else {
>> +		p = phb_to_npu2_nvlink(phb);
>> +		dev = npu2_bdf_to_dev(p, bdfn);
>> +	}
>>   	if (!dev)
>>   		return OPAL_PARAMETER;
>>   
>>   	val = NPU2_CQ_BRICK_BDF2PE_MAP_ENABLE;
>>   	val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_PE, val, pe_num);
>> -	val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_BDF, val, dev->gpu_bdfn);
>> +	val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_BDF, val, dev->nvlink.gpu_bdfn);
>>   
>>   	if (!NPU2DEV_BRICK(dev))
>>   		reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2,
>> @@ -1043,7 +1049,7 @@ static int64_t npu2_set_pe(struct phb *phb,
>>   	npu2_write(p, reg, val);
>>   	val = NPU2_MISC_BRICK_BDF2PE_MAP_ENABLE;
>>   	val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_PE, val, pe_num);
>> -	val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_BDF, val, dev->gpu_bdfn);
>> +	val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_BDF, val, dev->nvlink.gpu_bdfn);
>>   	reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC,
>>   			      NPU2_MISC_BRICK0_BDF2PE_MAP0 + (dev->index * 0x18));
>>   	p->bdf2pe_cache[dev->index] = val;
>> @@ -1126,7 +1132,7 @@ static int64_t npu2_eeh_next_error(struct phb *phb,
>>   				   uint16_t *pci_error_type,
>>   				   uint16_t *severity)
>>   {
>> -	struct npu2 *p = phb_to_npu2(phb);
>> +	struct npu2 *p = phb_to_npu2_nvlink(phb);
>>   	int i;
>>   	uint64_t result = 0;
>>   
>> @@ -1154,7 +1160,7 @@ static int64_t npu2_tce_kill(struct phb *phb, uint32_t kill_type,
>>   			     uint64_t pe_number, uint32_t tce_size,
>>   			     uint64_t dma_addr, uint32_t npages)
>>   {
>> -	struct npu2 *npu = phb_to_npu2(phb);
>> +	struct npu2 *npu = phb_to_npu2_nvlink(phb);
>>   	uint32_t tce_page_size;
>>   	uint64_t val;
>>   
>> @@ -1389,7 +1395,7 @@ static uint32_t npu2_populate_pcie_cap(struct npu2_dev *dev,
>>   				       uint32_t start,
>>   				       uint32_t prev_cap)
>>   {
>> -	struct pci_virt_device *pvd = dev->pvd;
>> +	struct pci_virt_device *pvd = dev->nvlink.pvd;
>>   	uint32_t val;
>>   
>>   	/* Add capability list */
>> @@ -1471,12 +1477,12 @@ static uint32_t npu2_populate_vendor_cap(struct npu2_dev *dev,
>>   					 uint32_t start,
>>   					 uint32_t prev_cap)
>>   {
>> -	struct pci_virt_device *pvd = dev->pvd;
>> +	struct pci_virt_device *pvd = dev->nvlink.pvd;
>>   
>>   	/* Capbility list */
>>   	PCI_VIRT_CFG_INIT_RO(pvd, prev_cap, 1, start);
>>   	PCI_VIRT_CFG_INIT_RO(pvd, start, 1, PCI_CFG_CAP_ID_VENDOR);
>> -	dev->vendor_cap = start;
>> +	dev->nvlink.vendor_cap = start;
>>   
>>   	/* Length and version */
>>   	PCI_VIRT_CFG_INIT_RO(pvd, start + 2, 1, VENDOR_CAP_LEN);
>> @@ -1503,7 +1509,7 @@ static uint32_t npu2_populate_vendor_cap(struct npu2_dev *dev,
>>   
>>   static void npu2_populate_cfg(struct npu2_dev *dev)
>>   {
>> -	struct pci_virt_device *pvd = dev->pvd;
>> +	struct pci_virt_device *pvd = dev->nvlink.pvd;
>>   	struct npu2_pcie_bar *bar;
>>   	uint32_t pos;
>>   
>> @@ -1611,12 +1617,13 @@ static void npu2_populate_devices(struct npu2 *p,
>>   
>>   	/* Walk the link at x nodes to initialize devices */
>>   	p->total_devices = 0;
>> -	p->phb.scan_map = 0;
>> +	p->phb_nvlink.scan_map = 0;
>>   	dt_for_each_compatible(npu2_dn, link, "ibm,npu-link") {
>>   		uint32_t group_id;
>>   		struct npu2_bar *npu2_bar;
>>   
>>   		dev = &p->devices[index];
>> +		dev->type = NPU2_DEV_TYPE_NVLINK;
>>   		dev->npu = p;
>>   		dev->dt_node = link;
>>   		dev->index = dt_prop_get_u32(link, "ibm,npu-link-index");
>> @@ -1627,7 +1634,7 @@ static void npu2_populate_devices(struct npu2 *p,
>>   		/* This must be done after calling
>>   		 * npu_allocate_bdfn() */
>>   		p->total_devices++;
>> -		p->phb.scan_map |= 0x1 << ((dev->bdfn & 0xf8) >> 3);
>> +		p->phb_nvlink.scan_map |= 0x1 << ((dev->bdfn & 0xf8) >> 3);
>>   
>>   		dev->pl_xscom_base = dt_prop_get_u64(link, "ibm,npu-phy");
>>   		dev->lane_mask = dt_prop_get_u32(link, "ibm,npu-lane-mask");
>> @@ -1658,10 +1665,10 @@ static void npu2_populate_devices(struct npu2 *p,
>>   		dev->bars[1].flags = PCI_CFG_BAR_TYPE_MEM | PCI_CFG_BAR_MEM64;
>>   
>>   		/* Initialize PCI virtual device */
>> -		dev->pvd = pci_virt_add_device(&p->phb, dev->bdfn, 0x100, dev);
>> -		if (dev->pvd) {
>> -			p->phb.scan_map |=
>> -				0x1 << ((dev->pvd->bdfn & 0xf8) >> 3);
>> +		dev->nvlink.pvd = pci_virt_add_device(&p->phb_nvlink, dev->bdfn, 0x100, dev);
>> +		if (dev->nvlink.pvd) {
>> +			p->phb_nvlink.scan_map |=
>> +				0x1 << ((dev->nvlink.pvd->bdfn & 0xf8) >> 3);
>>   			npu2_populate_cfg(dev);
>>   		}
>>   
>> @@ -1679,8 +1686,8 @@ static void npu2_add_interrupt_map(struct npu2 *p,
>>   	size_t map_size;
>>   	uint32_t mask[] = {0xff00, 0x0, 0x0, 0x7};
>>   
>> -	assert(p->phb.dt_node);
>> -	phb_dn = p->phb.dt_node;
>> +	assert(p->phb_nvlink.dt_node);
>> +	phb_dn = p->phb_nvlink.dt_node;
>>   
>>   	npu2_phandle = dt_prop_get_u32(dn, "ibm,npcq");
>>   	npu2_dn = dt_find_by_phandle(dt_root, npu2_phandle);
>> @@ -1707,7 +1714,7 @@ static void npu2_add_interrupt_map(struct npu2 *p,
>>   
>>   static void npu2_add_phb_properties(struct npu2 *p)
>>   {
>> -	struct dt_node *np = p->phb.dt_node;
>> +	struct dt_node *np = p->phb_nvlink.dt_node;
>>   	uint32_t icsp = get_ics_phandle();
>>   	uint64_t mm_base, mm_size, mmio_atsd;
>>   
>> @@ -1871,20 +1878,20 @@ static void npu2_create_phb(struct dt_node *dn)
>>   	p->devices = pmem + sizeof(struct npu2);
>>   
>>   	/* Generic PHB */
>> -	p->phb.dt_node = dn;
>> -	p->phb.ops = &npu_ops;
>> -	p->phb.phb_type = phb_type_npu_v2;
>> +	p->phb_nvlink.dt_node = dn;
>> +	p->phb_nvlink.ops = &npu_ops;
>> +	p->phb_nvlink.phb_type = phb_type_npu_v2;
>>   	init_lock(&p->lock);
>> -	init_lock(&p->phb.lock);
>> -	list_head_init(&p->phb.devices);
>> -	list_head_init(&p->phb.virt_devices);
>> +	init_lock(&p->phb_nvlink.lock);
>> +	list_head_init(&p->phb_nvlink.devices);
>> +	list_head_init(&p->phb_nvlink.virt_devices);
>>   
>>   	npu2_setup_irqs(p);
>>   	npu2_populate_devices(p, dn);
>>   	npu2_add_interrupt_map(p, dn);
>>   	npu2_add_phb_properties(p);
>>   
>> -	slot = npu2_slot_create(&p->phb);
>> +	slot = npu2_slot_create(&p->phb_nvlink);
>>   	if (!slot)
>>   	{
>>   		/**
>> @@ -1895,7 +1902,7 @@ static void npu2_create_phb(struct dt_node *dn)
>>   		prlog(PR_ERR, "NPU2: Cannot create PHB slot\n");
>>   	}
>>   
>> -	pci_register_phb(&p->phb, OPAL_DYNAMIC_PHB_ID);
>> +	pci_register_phb(&p->phb_nvlink, OPAL_DYNAMIC_PHB_ID);
>>   
>>   	npu2_init_ioda_cache(p);
>>   	npu2_hw_init(p);
>> @@ -1955,7 +1962,7 @@ static int64_t opal_npu_init_context(uint64_t phb_id, int pasid, uint64_t msr,
>>   				     uint64_t bdf)
>>   {
>>   	struct phb *phb = pci_get_phb(phb_id);
>> -	struct npu2 *p = phb_to_npu2(phb);
>> +	struct npu2 *p = phb_to_npu2_nvlink(phb);
>>   	uint64_t xts_bdf, xts_bdf_pid = 0;
>>   	int id, lparshort;
>>   
>> @@ -2033,7 +2040,7 @@ opal_call(OPAL_NPU_INIT_CONTEXT, opal_npu_init_context, 4);
>>   static int opal_npu_destroy_context(uint64_t phb_id, uint64_t pid, uint64_t bdf)
>>   {
>>   	struct phb *phb = pci_get_phb(phb_id);
>> -	struct npu2 *p = phb_to_npu2(phb);
>> +	struct npu2 *p = phb_to_npu2_nvlink(phb);
>>   	uint64_t xts_bdf, xts_bdf_pid;
>>   	uint64_t lparshort;
>>   	int id, rc = 0;
>> @@ -2081,7 +2088,7 @@ static int opal_npu_map_lpar(uint64_t phb_id, uint64_t bdf, uint64_t lparid,
>>   			     uint64_t lpcr)
>>   {
>>   	struct phb *phb = pci_get_phb(phb_id);
>> -	struct npu2 *p = phb_to_npu2(phb);
>> +	struct npu2 *p = phb_to_npu2_nvlink(phb);
>>   	struct npu2_dev *ndev = NULL;
>>   	uint64_t xts_bdf_lpar, rc = OPAL_SUCCESS;
>>   	int i;
>> @@ -2128,7 +2135,7 @@ static int opal_npu_map_lpar(uint64_t phb_id, uint64_t bdf, uint64_t lparid,
>>   
>>   	/* Need to find an NVLink to send the ATSDs for this device over */
>>   	for (i = 0; i < p->total_devices; i++) {
>> -		if (p->devices[i].gpu_bdfn == bdf) {
>> +		if (p->devices[i].nvlink.gpu_bdfn == bdf) {
>>   			ndev = &p->devices[i];
>>   			break;
>>   		}
>> diff --git a/include/npu2.h b/include/npu2.h
>> index d0c9ac5..94e537b 100644
>> --- a/include/npu2.h
>> +++ b/include/npu2.h
>> @@ -21,14 +21,14 @@
>>   
>>   /* Debugging options */
>>   #define NPU2DBG(p, fmt, a...)	prlog(PR_DEBUG, "NPU%d: " fmt, \
>> -				      (p)->phb.opal_id, ##a)
>> +				      (p)->phb_nvlink.opal_id, ##a)
>>   #define NPU2INF(p, fmt, a...)	prlog(PR_INFO,  "NPU%d: " fmt, \
>> -				      (p)->phb.opal_id, ##a)
>> +				      (p)->phb_nvlink.opal_id, ##a)
>>   #define NPU2ERR(p, fmt, a...)	prlog(PR_ERR,   "NPU%d: " fmt, \
>> -				      (p)->phb.opal_id, ##a)
>> +				      (p)->phb_nvlink.opal_id, ##a)
>>   
>>   #define NPU2DEVLOG(l, p, fmt, a...)	prlog(l, "NPU%d:%d:%d.%d " fmt, \
>> -					      (p)->npu->phb.opal_id, \
>> +					      (p)->npu->phb_nvlink.opal_id, \
>>   					      ((p)->bdfn >> 8) & 0xff, \
>>   					      ((p)->bdfn >> 3) & 0x1f, \
>>   					      (p)->bdfn & 0x7, ##a)
>> @@ -80,18 +80,18 @@ struct npu2_pcie_bar {
>>   	struct npu2_bar		npu2_bar;
>>   };
>>   
>> +enum npu2_dev_type {
>> +	NPU2_DEV_TYPE_NVLINK,
>> +	NPU2_DEV_TYPE_OPENCAPI,
>> +};
>> +
>>   struct npu2;
>> -struct npu2_dev {
>> -	uint32_t		index;
>> -	uint64_t		pl_xscom_base;
>> -	struct dt_node		*dt_node;
>> -	struct npu2_pcie_bar	bars[2];
>> -	struct npu2		*npu;
>>   
>> -	/* Device and function numbers are allocated based on GPU
>> -	 * association. Links to connected to the same GPU will be
>> -	 * exposed as different functions of the same bus/device. */
>> -	uint32_t		bdfn;
>> +struct npu2_dev_nvlink {
>> +	/* For NVLink, device and function numbers are allocated based
>> +	 * on GPU association. Links to connected to the same GPU will
>> +	 * be exposed as different functions of the same
>> +	 * bus/device. */
>>   	uint32_t		gpu_bdfn;
>>   
>>   	/* PCI virtual device and the associated GPU device */
>> @@ -104,6 +104,20 @@ struct npu2_dev {
>>   	/* Vendor specific capability */
>>   	uint32_t		vendor_cap;
>>   
>> +	/* Used to associate the NPU device with GPU PCI devices */
>> +	const char		*slot_label;
>> +};
>> +
>> +struct npu2_dev {
>> +	enum npu2_dev_type	type;
>> +	uint32_t		index;
>> +	uint64_t		pl_xscom_base;
>> +	struct dt_node		*dt_node;
>> +	struct npu2_pcie_bar	bars[2];
>> +	struct npu2		*npu;
>> +
>> +	uint32_t		bdfn;
>> +
>>   	/* Which PHY lanes this device is associated with */
>>   	uint32_t		lane_mask;
>>   
>> @@ -114,8 +128,12 @@ struct npu2_dev {
>>   	unsigned long		procedure_tb;
>>   	uint32_t		procedure_status;
>>   
>> -	/* Used to associate the NPU device with GPU PCI devices */
>> -	const char		*slot_label;
>> +	/* NVLink */
>> +	struct npu2_dev_nvlink	nvlink;
>> +
>> +	/* OpenCAPI */
>> +	struct phb		phb_ocapi;
>> +	uint64_t		i2c_port_id_ocapi;
>>   };
>>   
>>   struct npu2 {
>> @@ -142,12 +160,32 @@ struct npu2 {
>>   	 * tables. */
>>   	struct lock	lock;
>>   
>> -	struct phb	phb;
>> +	/* NVLink */
>> +	struct phb	phb_nvlink;
>>   };
>>   
>> -static inline struct npu2 *phb_to_npu2(struct phb *phb)
>> +static inline struct npu2 *phb_to_npu2_nvlink(struct phb *phb)
>> +{
>> +	assert(phb->phb_type == phb_type_npu_v2);
>> +	return container_of(phb, struct npu2, phb_nvlink);
>> +}
>> +
>> +static inline struct npu2_dev *phb_to_npu2_dev_ocapi(struct phb *phb)
>> +{
>> +	assert(phb->phb_type == phb_type_npu_v2_opencapi);
>> +	return container_of(phb, struct npu2_dev, phb_ocapi);
>> +}
>> +
>> +static inline struct phb *npu2_dev_to_phb(struct npu2_dev *ndev)
>>   {
>> -	return container_of(phb, struct npu2, phb);
>> +	switch (ndev->type) {
>> +	case NPU2_DEV_TYPE_NVLINK:
>> +		return &ndev->npu->phb_nvlink;
>> +	case NPU2_DEV_TYPE_OPENCAPI:
>> +		return &ndev->phb_ocapi;
>> +	default:
>> +		assert(false);
>> +	}
>>   }
>>   
>>   void npu2_write_4b(struct npu2 *p, uint64_t reg, uint32_t val);
>> diff --git a/include/pci.h b/include/pci.h
>> index c085b6b..695f8f7 100644
>> --- a/include/pci.h
>> +++ b/include/pci.h
>> @@ -344,6 +344,7 @@ enum phb_type {
>>   	phb_type_pcie_v3,
>>   	phb_type_pcie_v4,
>>   	phb_type_npu_v2,
>> +	phb_type_npu_v2_opencapi,
>>   };
>>   
>>   struct phb {
>>
> 
> 

-- 
Andrew Donnellan              OzLabs, ADL Canberra
andrew.donnellan at au1.ibm.com  IBM Australia Limited