[Skiboot] [PATCH v2 2/6] npu2: Split device index into brick and link index

Frederic Barrat fbarrat at linux.ibm.com
Wed Aug 29 22:58:59 AEST 2018



Le 27/08/2018 à 10:55, Andrew Donnellan a écrit :
> On Witherspoon, OpenCAPI devices attached to link indexes 0 and 1 are
> handled by bricks 2 and 3.
> 
> Rename index to brick_index, and add a new field, link_index, to
> refer to the link index. For now, we set those values identically.
> 

This looks good.
On a related topic, I'm a bit concerned that the link_index will change 
in the future on ZZ and Zaius when/if we ever get the npu links info 
from HDAT. It seems we'd need a patch in skiboot first to avoid breaking 
anything during the switch. On the other hand, it's hard to do anything 
as long as the dust is not settled on their side. We'll worry about it 
when it happens, I guess.

Reviewed-by: Frederic Barrat <fbarrat at linux.vnet.ibm.com>



> Signed-off-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
> ---
>   hw/npu2-hw-procedures.c |  8 ++--
>   hw/npu2-opencapi.c      | 77 +++++++++++++++++++++---------------------
>   hw/npu2.c               | 21 ++++++-----
>   include/npu2-regs.h     | 14 ++++----
>   include/npu2.h          |  7 ++--
>   5 files changed, 67 insertions(+), 60 deletions(-)
> 
> diff --git a/hw/npu2-hw-procedures.c b/hw/npu2-hw-procedures.c
> index 86864629a66b..8767d9259984 100644
> --- a/hw/npu2-hw-procedures.c
> +++ b/hw/npu2-hw-procedures.c
> @@ -198,7 +198,7 @@ DEFINE_PROCEDURE(nop);
>   /* Return the brick number (0-2) within an obus chiplet */
>   static int obus_brick_index(struct npu2_dev *ndev)
>   {
> -	int index = ndev->index % 3;
> +	int index = ndev->brick_index % 3;
> 
>   	/* On the second obus chiplet, index is reversed */
>   	if ((ndev->pl_xscom_base & 0x3F000000) != 0x09000000)
> @@ -432,7 +432,7 @@ DEFINE_PROCEDURE(phy_reset, phy_reset_wait, phy_reset_complete);
>   /* Procedure 1.2.6 - I/O PHY Tx Impedance Calibration */
>   static uint32_t phy_tx_zcal(struct npu2_dev *ndev)
>   {
> -	if (ndev->npu->tx_zcal_complete[ndev->index > 2])
> +	if (ndev->npu->tx_zcal_complete[ndev->brick_index > 2])
>   		return PROCEDURE_COMPLETE;
> 
>   	/* Turn off SW enable and enable zcal state machine */
> @@ -603,7 +603,7 @@ static uint32_t phy_tx_zcal_calculate(struct npu2_dev *ndev)
>   	phy_write(ndev, &NPU2_PHY_TX_MARGINPU_SELECT, therm(margin_select + 1)/2);
>   	phy_write(ndev, &NPU2_PHY_TX_MARGINPD_SELECT, therm(margin_select + 1)/2);
> 
> -	ndev->npu->tx_zcal_complete[ndev->index > 2] = 1;
> +	ndev->npu->tx_zcal_complete[ndev->brick_index > 2] = 1;
>   	return PROCEDURE_COMPLETE;
>   }
>   DEFINE_PROCEDURE(phy_tx_zcal, phy_tx_zcal_wait, phy_tx_zcal_calculate);
> @@ -977,7 +977,7 @@ void npu2_opencapi_bump_ui_lane(struct npu2_dev *dev)
>   	uint64_t status_xscom;
>   	int lane, bit = 7;
> 
> -	switch (dev->index) {
> +	switch (dev->brick_index) {
>   	case 2:
>   		status_xscom = OB0_ODL0_TRAINING_STATUS;
>   		break;
> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
> index 57c2f2fcc075..68ae93a2f836 100644
> --- a/hw/npu2-opencapi.c
> +++ b/hw/npu2-opencapi.c
> @@ -53,11 +53,11 @@
>   #include <nvram.h>
> 
>   #define OCAPIDBG(dev, fmt, a...)    prlog(PR_DEBUG, "OCAPI[%d:%d]: " fmt, \
> -					  dev->npu->chip_id, dev->index, ## a)
> +					  dev->npu->chip_id, dev->brick_index, ## a)
>   #define OCAPIINF(dev, fmt, a...)    prlog(PR_INFO, "OCAPI[%d:%d]: " fmt, \
> -					  dev->npu->chip_id, dev->index, ## a)
> +					  dev->npu->chip_id, dev->brick_index, ## a)
>   #define OCAPIERR(dev, fmt, a...)    prlog(PR_ERR, "OCAPI[%d:%d]: " fmt, \
> -					  dev->npu->chip_id, dev->index, ## a)
> +					  dev->npu->chip_id, dev->brick_index, ## a)
> 
> 
>   #define NPU_IRQ_LEVELS		35
> @@ -753,16 +753,16 @@ static void setup_global_mmio_bar(uint32_t gcid, uint32_t scom_base,
>   static void setup_afu_mmio_bars(uint32_t gcid, uint32_t scom_base,
>   				struct npu2_dev *dev)
>   {
> -	uint64_t stack = index_to_stack(dev->index);
> -	uint64_t offset = index_to_block(dev->index) == NPU2_BLOCK_OTL0 ?
> +	uint64_t stack = index_to_stack(dev->brick_index);
> +	uint64_t offset = index_to_block(dev->brick_index) == NPU2_BLOCK_OTL0 ?
>   		NPU2_NTL0_BAR : NPU2_NTL1_BAR;
> -	uint64_t pa_offset = index_to_block(dev->index) == NPU2_BLOCK_OTL0 ?
> +	uint64_t pa_offset = index_to_block(dev->brick_index) == NPU2_BLOCK_OTL0 ?
>   		NPU2_CQ_CTL_MISC_MMIOPA0_CONFIG :
>   		NPU2_CQ_CTL_MISC_MMIOPA1_CONFIG;
>   	uint64_t addr, size, reg;
> 
>   	prlog(PR_DEBUG, "OCAPI: %s: Setup AFU MMIO BARs\n", __func__);
> -	phys_map_get(gcid, NPU_OCAPI_MMIO, dev->index, &addr, &size);
> +	phys_map_get(gcid, NPU_OCAPI_MMIO, dev->brick_index, &addr, &size);
> 
>   	prlog(PR_DEBUG, "OCAPI: AFU MMIO set to %llx, size %llx\n", addr, size);
>   	write_bar(gcid, scom_base, NPU2_REG_OFFSET(stack, 0, offset), addr,
> @@ -783,7 +783,7 @@ static void setup_afu_mmio_bars(uint32_t gcid, uint32_t scom_base,
>   static void setup_afu_config_bars(uint32_t gcid, uint32_t scom_base,
>   				  struct npu2_dev *dev)
>   {
> -	uint64_t stack = index_to_stack(dev->index);
> +	uint64_t stack = index_to_stack(dev->brick_index);
>   	int stack_num = stack - NPU2_STACK_STCK_0;
>   	uint64_t addr, size;
> 
> @@ -799,8 +799,8 @@ static void setup_afu_config_bars(uint32_t gcid, uint32_t scom_base,
>   static void otl_enabletx(uint32_t gcid, uint32_t scom_base,
>   			struct npu2_dev *dev)
>   {
> -	uint64_t stack = index_to_stack(dev->index);
> -	uint64_t block = index_to_block(dev->index);
> +	uint64_t stack = index_to_stack(dev->brick_index);
> +	uint64_t block = index_to_block(dev->brick_index);
>   	uint64_t reg;
> 
>   	/* OTL Config 2 Register */
> @@ -822,7 +822,7 @@ static void assert_reset(struct npu2_dev *dev)
>   	uint8_t pin, data;
>   	int rc;
> 
> -	switch (dev->index) {
> +	switch (dev->brick_index) {
>   	case 2:
>   	case 4:
>   		pin = platform.ocapi->i2c_reset_odl0;
> @@ -910,7 +910,7 @@ static bool i2c_presence_detect(struct npu2_dev *dev)
> 
>   	OCAPIDBG(dev, "I2C presence detect: 0x%x\n", state);
> 
> -	switch (dev->index) {
> +	switch (dev->brick_index) { // TODO(ajd): Link or brick index?
>   	case 2:
>   		data = platform.ocapi->i2c_presence_odl0;
>   		break;
> @@ -929,7 +929,7 @@ static void reset_odl(uint32_t gcid, struct npu2_dev *dev)
>   {
>   	uint64_t reg, config_xscom;
> 
> -	switch (dev->index) {
> +	switch (dev->brick_index) {
>   	case 2:
>   		config_xscom = OB0_ODL0_CONFIG;
>   		break;
> @@ -965,7 +965,7 @@ static void set_init_pattern(uint32_t gcid, struct npu2_dev *dev)
>   {
>   	uint64_t reg, config_xscom;
> 
> -	switch (dev->index) {
> +	switch (dev->brick_index) {
>   	case 2:
>   		config_xscom = OB0_ODL0_CONFIG;
>   		break;
> @@ -992,7 +992,7 @@ static void start_training(uint32_t gcid, struct npu2_dev *dev)
>   {
>   	uint64_t reg, config_xscom;
> 
> -	switch (dev->index) {
> +	switch (dev->brick_index) {
>   	case 2:
>   		config_xscom = OB0_ODL0_CONFIG;
>   		break;
> @@ -1035,7 +1035,7 @@ static int64_t npu2_opencapi_get_link_state(struct pci_slot *slot, uint8_t *val)
>   	uint64_t reg;
>   	int64_t link_width, training_status, rc = OPAL_SUCCESS;
> 
> -	reg = get_odl_status(dev->npu->chip_id, dev->index);
> +	reg = get_odl_status(dev->npu->chip_id, dev->brick_index);
>   	link_width = GETFIELD(OB_ODL_STATUS_TRAINED_MODE, reg);
>   	training_status = GETFIELD(OB_ODL_STATUS_TRAINING_STATE_MACHINE, reg);
> 
> @@ -1071,7 +1071,7 @@ static int64_t npu2_opencapi_retry_state(struct pci_slot *slot)
>   		 */
>   		OCAPIERR(dev,
>   			"Link failed to train, final link status: %016llx\n",
> -			get_odl_status(chip_id, dev->index));
> +			get_odl_status(chip_id, dev->brick_index));
>   		return OPAL_HARDWARE;
>   	}
> 
> @@ -1093,7 +1093,7 @@ static int64_t npu2_opencapi_poll_link(struct pci_slot *slot)
>   		pci_slot_set_state(slot, OCAPI_SLOT_LINK_WAIT);
>   		/* fall-through */
>   	case OCAPI_SLOT_LINK_WAIT:
> -		reg = get_odl_status(chip_id, dev->index);
> +		reg = get_odl_status(chip_id, dev->brick_index);
>   		if (GETFIELD(OB_ODL_STATUS_TRAINING_STATE_MACHINE, reg) ==
>   			OCAPI_LINK_STATE_TRAINED) {
>   			OCAPIINF(dev, "link trained in %lld ms\n",
> @@ -1153,9 +1153,9 @@ static int64_t npu2_opencapi_freset(struct pci_slot *slot)
>   		if (dev->train_need_fence) {
>   			OCAPIDBG(dev, "Fencing OTL during reset\n");
>   			set_fence_control(chip_id, dev->npu->xscom_base,
> -					dev->index, 0b11);
> +					dev->brick_index, 0b11);
>   			npu2_write(dev->npu, NPU2_MISC_FENCE_STATE,
> -				PPC_BIT(dev->index + 6));
> +				PPC_BIT(dev->brick_index + 6));
>   			dev->train_fenced = true;
>   		}
>   		dev->train_need_fence = true;
> @@ -1180,9 +1180,10 @@ static int64_t npu2_opencapi_freset(struct pci_slot *slot)
>   	case OCAPI_SLOT_FRESET_DEASSERT_DELAY:
>   		if (dev->train_fenced) {
>   			OCAPIDBG(dev, "Unfencing OTL after reset\n");
> -			npu2_write(dev->npu, NPU2_MISC_FENCE_STATE, PPC_BIT(dev->index));
> +			npu2_write(dev->npu, NPU2_MISC_FENCE_STATE,
> +				   PPC_BIT(dev->brick_index));
>   			set_fence_control(chip_id, dev->npu->xscom_base,
> -					dev->index, 0b00);
> +					  dev->brick_index, 0b00);
>   			dev->train_fenced = false;
>   		}
> 
> @@ -1263,7 +1264,7 @@ static int64_t npu2_opencapi_pcicfg_read(struct phb *phb, uint32_t bdfn,
>   		return rc;
> 
>   	genid_base = dev->bars[1].npu2_bar.base +
> -		(index_to_block(dev->index) == NPU2_BLOCK_OTL1 ? 256 : 0);
> +		(index_to_block(dev->brick_index) == NPU2_BLOCK_OTL1 ? 256 : 0);
> 
>   	cfg_addr = NPU2_CQ_CTL_CONFIG_ADDR_ENABLE;
>   	cfg_addr = SETFIELD(NPU2_CQ_CTL_CONFIG_ADDR_BUS_NUMBER |
> @@ -1321,7 +1322,7 @@ static int64_t npu2_opencapi_pcicfg_write(struct phb *phb, uint32_t bdfn,
>   		return rc;
> 
>   	genid_base = dev->bars[1].npu2_bar.base +
> -		(index_to_block(dev->index) == NPU2_BLOCK_OTL1 ? 256 : 0);
> +		(index_to_block(dev->brick_index) == NPU2_BLOCK_OTL1 ? 256 : 0);
> 
>   	cfg_addr = NPU2_CQ_CTL_CONFIG_ADDR_ENABLE;
>   	cfg_addr = SETFIELD(NPU2_CQ_CTL_CONFIG_ADDR_BUS_NUMBER |
> @@ -1409,13 +1410,14 @@ static int64_t npu2_opencapi_set_pe(struct phb *phb,
>   	p = dev->npu;
> 
>   	pe_bdfn = dev->bdfn;
> -	
> +
>   	val = NPU2_MISC_BRICK_BDF2PE_MAP_ENABLE;
>   	val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_PE, val, pe_num);
>   	val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_BDF, val, pe_bdfn);
>   	reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC,
> -			      NPU2_MISC_BRICK0_BDF2PE_MAP0 + (dev->index * 0x18));
> -	p->bdf2pe_cache[dev->index] = val;
> +			      NPU2_MISC_BRICK0_BDF2PE_MAP0 +
> +			      (dev->brick_index * 0x18));
> +	p->bdf2pe_cache[dev->brick_index] = val;
>   	npu2_write(p, reg, val);
> 
>   	return OPAL_SUCCESS;
> @@ -1426,8 +1428,8 @@ static int npu2_add_mmio_regs(struct phb *phb, struct pci_device *pd,
>   {
>   	uint32_t irq;
>   	struct npu2_dev *dev = phb_to_npu2_dev_ocapi(phb);
> -	uint64_t block = index_to_block(dev->index);
> -	uint64_t stacku = index_to_stacku(dev->index);
> +	uint64_t block = index_to_block(dev->brick_index);
> +	uint64_t stacku = index_to_stacku(dev->brick_index);
>   	uint64_t dsisr, dar, tfc, handle;
> 
>   	/*
> @@ -1631,7 +1633,8 @@ static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
>   	dev->phb_ocapi.ops = &npu2_opencapi_ops;
>   	dev->phb_ocapi.phb_type = phb_type_npu_v2_opencapi;
>   	dev->phb_ocapi.scan_map = 0;
> -	dev->index = dt_prop_get_u32(dn_link, "ibm,npu-link-index");
> +	dev->link_index = dt_prop_get_u32(dn_link, "ibm,npu-link-index");
> +	dev->brick_index = dev->link_index;
>   	dev->pl_xscom_base = dt_prop_get_u64(dn_link, "ibm,npu-phy");
>   	dev->lane_mask = dt_prop_get_u32(dn_link, "ibm,npu-lane-mask");
>   	dev->link_speed = dt_prop_get_u64(dn_link, "ibm,link-speed");
> @@ -1664,7 +1667,7 @@ static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
>   	/* Procedure 13.1.3.9 - AFU Config BARs */
>   	setup_afu_config_bars(n->chip_id, n->xscom_base, dev);
> 
> -	set_fence_control(n->chip_id, n->xscom_base, dev->index, 0b00);
> +	set_fence_control(n->chip_id, n->xscom_base, dev->brick_index, 0b00);
> 
>   	if (npu2_ocapi_training_state != NPU2_TRAIN_DEFAULT) {
>   		setup_debug_training_state(dev);
> @@ -1847,8 +1850,8 @@ static int64_t opal_npu_spa_setup(uint64_t phb_id, uint32_t __unused bdfn,
>   	if (!dev)
>   		return OPAL_PARAMETER;
> 
> -	block = index_to_block(dev->index);
> -	stack = index_to_stack(dev->index);
> +	block = index_to_block(dev->brick_index);
> +	stack = index_to_stack(dev->brick_index);
>   	if (block == NPU2_BLOCK_OTL1)
>   		offset = NPU2_XSL_PSL_SPAP_A1;
>   	else
> @@ -1912,8 +1915,8 @@ static int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t __unused bdfn,
>   	if (!dev)
>   		return OPAL_PARAMETER;
> 
> -	block = index_to_block(dev->index);
> -	stack = index_to_stack(dev->index);
> +	block = index_to_block(dev->brick_index);
> +	stack = index_to_stack(dev->brick_index);
>   	cc_inv = NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, NPU2_XSL_PSL_LLCMD_A0);
> 
>   	lock(&dev->npu->lock);
> @@ -1987,8 +1990,8 @@ static int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t __unused bdfn,
>   	if (!dev)
>   		return OPAL_PARAMETER;
> 
> -	block = index_to_block(dev->index);
> -	stack = index_to_stack(dev->index);
> +	block = index_to_block(dev->brick_index);
> +	stack = index_to_stack(dev->brick_index);
>   	/*
>   	 * The 'capabilities' argument defines what TL template the
>   	 * device can receive. OpenCAPI 3.0 and 4.0 define 64 templates, so
> diff --git a/hw/npu2.c b/hw/npu2.c
> index 2b3540583ac1..be1c304420fc 100644
> --- a/hw/npu2.c
> +++ b/hw/npu2.c
> @@ -1075,10 +1075,10 @@ static int64_t npu2_set_pe(struct phb *phb,
>   	val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_BDF, val, dev->nvlink.gpu_bdfn);
> 
>   	if (!NPU2DEV_BRICK(dev))
> -		reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2,
> +		reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->brick_index/2,
>   				      NPU2_BLOCK_CTL, NPU2_CQ_BRICK0_BDF2PE_MAP0);
>   	else
> -		reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2,
> +		reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->brick_index/2,
>   				      NPU2_BLOCK_CTL, NPU2_CQ_BRICK1_BDF2PE_MAP0);
> 
>   	npu2_write(p, reg, val);
> @@ -1086,8 +1086,8 @@ static int64_t npu2_set_pe(struct phb *phb,
>   	val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_PE, val, pe_num);
>   	val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_BDF, val, dev->nvlink.gpu_bdfn);
>   	reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC,
> -			      NPU2_MISC_BRICK0_BDF2PE_MAP0 + (dev->index * 0x18));
> -	p->bdf2pe_cache[dev->index] = val;
> +			      NPU2_MISC_BRICK0_BDF2PE_MAP0 + (dev->brick_index * 0x18));
> +	p->bdf2pe_cache[dev->brick_index] = val;
>   	npu2_write(p, reg, val);
> 
>   	return OPAL_SUCCESS;
> @@ -1601,7 +1601,7 @@ static uint32_t npu2_populate_vendor_cap(struct npu2_dev *dev,
>   			    NULL);
> 
>   	/* Link index */
> -	PCI_VIRT_CFG_INIT_RO(pvd, start + 0xc, 1, dev->index);
> +	PCI_VIRT_CFG_INIT_RO(pvd, start + 0xc, 1, dev->link_index);
> 
>   	return start + VENDOR_CAP_LEN;
>   }
> @@ -1725,7 +1725,8 @@ static void npu2_populate_devices(struct npu2 *p,
>   		dev->type = NPU2_DEV_TYPE_NVLINK;
>   		dev->npu = p;
>   		dev->dt_node = link;
> -		dev->index = dt_prop_get_u32(link, "ibm,npu-link-index");
> +		dev->link_index = dt_prop_get_u32(link, "ibm,npu-link-index");
> +		dev->brick_index = dev->link_index;
> 
>   		group_id = dt_prop_get_u32(link, "ibm,npu-group-id");
>   		dev->bdfn = npu_allocate_bdfn(p, group_id);
> @@ -1742,7 +1743,7 @@ static void npu2_populate_devices(struct npu2 *p,
>   		stack = NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev);
>   		npu2_bar = &dev->bars[0].npu2_bar;
>   		npu2_bar->type = NPU_NTL;
> -		npu2_bar->index = dev->index;
> +		npu2_bar->index = dev->brick_index;
>   		npu2_bar->reg = NPU2_REG_OFFSET(stack, 0, NPU2DEV_BRICK(dev) == 0 ?
>   						NPU2_NTL0_BAR : NPU2_NTL1_BAR);
>   	        npu2_get_bar(p->chip_id, npu2_bar);
> @@ -2248,8 +2249,10 @@ static int opal_npu_map_lpar(uint64_t phb_id, uint64_t bdf, uint64_t lparid,
>   		goto out;
>   	}
> 
> -	xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_STACK, xts_bdf_lpar, 0x4 >> (ndev->index / 2));
> -	xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_BRICK, xts_bdf_lpar, (ndev->index % 2));
> +	xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_STACK, xts_bdf_lpar,
> +				0x4 >> (ndev->brick_index / 2));
> +	xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_BRICK, xts_bdf_lpar,
> +				(ndev->brick_index % 2));
> 
>   	NPU2DBG(p, "XTS_BDF_MAP[%03d] = 0x%08llx\n", id, xts_bdf_lpar);
>   	npu2_write(p, NPU2_XTS_BDF_MAP + id*8, xts_bdf_lpar);
> diff --git a/include/npu2-regs.h b/include/npu2-regs.h
> index 6bd77e48bf4f..8c1ba5fff37a 100644
> --- a/include/npu2-regs.h
> +++ b/include/npu2-regs.h
> @@ -44,19 +44,19 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
>   	(((stack) << 20) | ((block) << 16) | (offset))
> 
>   #define NPU2_NTL_REG_OFFSET(ndev, offset)				\
> -	NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + ((ndev)->index >> 1),       \
> -			NPU2_BLOCK_NTL0 + ((ndev)->index % 2)*2, offset)
> +	NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + ((ndev)->brick_index >> 1),       \
> +			NPU2_BLOCK_NTL0 + ((ndev)->brick_index % 2)*2, offset)
> 
>   #define NPU2_NTLU_REG_OFFSET(ndev, offset)				\
> -	NPU2_REG_OFFSET(NPU2_STACK_STCK_0U + ((ndev)->index >> 1),       \
> -			NPU2_BLOCK_NTL0 + ((ndev)->index % 2)*2, offset)
> +	NPU2_REG_OFFSET(NPU2_STACK_STCK_0U + ((ndev)->brick_index >> 1),       \
> +			NPU2_BLOCK_NTL0 + ((ndev)->brick_index % 2)*2, offset)
> 
>   #define NPU2_DL_REG_OFFSET(ndev, offset)	\
> -	NPU2_REG_OFFSET(((ndev)->index >> 1),				\
> -			8 + ((ndev)->index % 2)*2, offset)
> +	NPU2_REG_OFFSET(((ndev)->brick_index >> 1),				\
> +			8 + ((ndev)->brick_index % 2)*2, offset)
> 
>   #define NPU2_SM_REG_OFFSET(ndev, sm, offset)				\
> -	NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + ((ndev)->index >> 1),	\
> +	NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + ((ndev)->brick_index >> 1),	\
>   			NPU2_BLOCK_SM_0 + (sm), offset)
> 
>   /* Get the offset for this register */
> diff --git a/include/npu2.h b/include/npu2.h
> index 4c2e20e0e2f7..10742031ec0f 100644
> --- a/include/npu2.h
> +++ b/include/npu2.h
> @@ -47,10 +47,10 @@
>   #define NPU2_DEV_DL_RESET	0x2
> 
>   /* Return the stack (0-2) of a device */
> -#define NPU2DEV_STACK(ndev) ((ndev)->index / 2)
> +#define NPU2DEV_STACK(ndev) ((ndev)->brick_index / 2)
> 
>   /* Return the brick number (0-1) within a stack */
> -#define NPU2DEV_BRICK(ndev) ((ndev)->index % 2)
> +#define NPU2DEV_BRICK(ndev) ((ndev)->brick_index % 2)
> 
>   /* This represents the state of the actual hardware BARs not the
>    * emulated PCIe BARs. The is a subtle difference between the two as
> @@ -111,7 +111,8 @@ struct npu2_dev_nvlink {
> 
>   struct npu2_dev {
>   	enum npu2_dev_type	type;
> -	uint32_t		index;
> +	uint32_t		link_index;
> +	uint32_t		brick_index;
>   	uint64_t		pl_xscom_base;
>   	struct dt_node		*dt_node;
>   	struct npu2_pcie_bar	bars[2];
> 



More information about the Skiboot mailing list