[Skiboot] [PATCH v2 2/6] npu2: Split device index into brick and link index
Andrew Donnellan
andrew.donnellan at au1.ibm.com
Mon Aug 27 18:55:51 AEST 2018
On Witherspoon, OpenCAPI devices attached to link indexes 0 and 1 are
handled by bricks 2 and 3.
Rename index to brick_index, and add a new field, link_index, to
refer to the link index. For now, we set those values identically.
Signed-off-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
---
hw/npu2-hw-procedures.c | 8 ++--
hw/npu2-opencapi.c | 77 +++++++++++++++++++++---------------------
hw/npu2.c | 21 ++++++-----
include/npu2-regs.h | 14 ++++----
include/npu2.h | 7 ++--
5 files changed, 67 insertions(+), 60 deletions(-)
diff --git a/hw/npu2-hw-procedures.c b/hw/npu2-hw-procedures.c
index 86864629a66b..8767d9259984 100644
--- a/hw/npu2-hw-procedures.c
+++ b/hw/npu2-hw-procedures.c
@@ -198,7 +198,7 @@ DEFINE_PROCEDURE(nop);
/* Return the brick number (0-2) within an obus chiplet */
static int obus_brick_index(struct npu2_dev *ndev)
{
- int index = ndev->index % 3;
+ int index = ndev->brick_index % 3;
/* On the second obus chiplet, index is reversed */
if ((ndev->pl_xscom_base & 0x3F000000) != 0x09000000)
@@ -432,7 +432,7 @@ DEFINE_PROCEDURE(phy_reset, phy_reset_wait, phy_reset_complete);
/* Procedure 1.2.6 - I/O PHY Tx Impedance Calibration */
static uint32_t phy_tx_zcal(struct npu2_dev *ndev)
{
- if (ndev->npu->tx_zcal_complete[ndev->index > 2])
+ if (ndev->npu->tx_zcal_complete[ndev->brick_index > 2])
return PROCEDURE_COMPLETE;
/* Turn off SW enable and enable zcal state machine */
@@ -603,7 +603,7 @@ static uint32_t phy_tx_zcal_calculate(struct npu2_dev *ndev)
phy_write(ndev, &NPU2_PHY_TX_MARGINPU_SELECT, therm(margin_select + 1)/2);
phy_write(ndev, &NPU2_PHY_TX_MARGINPD_SELECT, therm(margin_select + 1)/2);
- ndev->npu->tx_zcal_complete[ndev->index > 2] = 1;
+ ndev->npu->tx_zcal_complete[ndev->brick_index > 2] = 1;
return PROCEDURE_COMPLETE;
}
DEFINE_PROCEDURE(phy_tx_zcal, phy_tx_zcal_wait, phy_tx_zcal_calculate);
@@ -977,7 +977,7 @@ void npu2_opencapi_bump_ui_lane(struct npu2_dev *dev)
uint64_t status_xscom;
int lane, bit = 7;
- switch (dev->index) {
+ switch (dev->brick_index) {
case 2:
status_xscom = OB0_ODL0_TRAINING_STATUS;
break;
diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
index 57c2f2fcc075..68ae93a2f836 100644
--- a/hw/npu2-opencapi.c
+++ b/hw/npu2-opencapi.c
@@ -53,11 +53,11 @@
#include <nvram.h>
#define OCAPIDBG(dev, fmt, a...) prlog(PR_DEBUG, "OCAPI[%d:%d]: " fmt, \
- dev->npu->chip_id, dev->index, ## a)
+ dev->npu->chip_id, dev->brick_index, ## a)
#define OCAPIINF(dev, fmt, a...) prlog(PR_INFO, "OCAPI[%d:%d]: " fmt, \
- dev->npu->chip_id, dev->index, ## a)
+ dev->npu->chip_id, dev->brick_index, ## a)
#define OCAPIERR(dev, fmt, a...) prlog(PR_ERR, "OCAPI[%d:%d]: " fmt, \
- dev->npu->chip_id, dev->index, ## a)
+ dev->npu->chip_id, dev->brick_index, ## a)
#define NPU_IRQ_LEVELS 35
@@ -753,16 +753,16 @@ static void setup_global_mmio_bar(uint32_t gcid, uint32_t scom_base,
static void setup_afu_mmio_bars(uint32_t gcid, uint32_t scom_base,
struct npu2_dev *dev)
{
- uint64_t stack = index_to_stack(dev->index);
- uint64_t offset = index_to_block(dev->index) == NPU2_BLOCK_OTL0 ?
+ uint64_t stack = index_to_stack(dev->brick_index);
+ uint64_t offset = index_to_block(dev->brick_index) == NPU2_BLOCK_OTL0 ?
NPU2_NTL0_BAR : NPU2_NTL1_BAR;
- uint64_t pa_offset = index_to_block(dev->index) == NPU2_BLOCK_OTL0 ?
+ uint64_t pa_offset = index_to_block(dev->brick_index) == NPU2_BLOCK_OTL0 ?
NPU2_CQ_CTL_MISC_MMIOPA0_CONFIG :
NPU2_CQ_CTL_MISC_MMIOPA1_CONFIG;
uint64_t addr, size, reg;
prlog(PR_DEBUG, "OCAPI: %s: Setup AFU MMIO BARs\n", __func__);
- phys_map_get(gcid, NPU_OCAPI_MMIO, dev->index, &addr, &size);
+ phys_map_get(gcid, NPU_OCAPI_MMIO, dev->brick_index, &addr, &size);
prlog(PR_DEBUG, "OCAPI: AFU MMIO set to %llx, size %llx\n", addr, size);
write_bar(gcid, scom_base, NPU2_REG_OFFSET(stack, 0, offset), addr,
@@ -783,7 +783,7 @@ static void setup_afu_mmio_bars(uint32_t gcid, uint32_t scom_base,
static void setup_afu_config_bars(uint32_t gcid, uint32_t scom_base,
struct npu2_dev *dev)
{
- uint64_t stack = index_to_stack(dev->index);
+ uint64_t stack = index_to_stack(dev->brick_index);
int stack_num = stack - NPU2_STACK_STCK_0;
uint64_t addr, size;
@@ -799,8 +799,8 @@ static void setup_afu_config_bars(uint32_t gcid, uint32_t scom_base,
static void otl_enabletx(uint32_t gcid, uint32_t scom_base,
struct npu2_dev *dev)
{
- uint64_t stack = index_to_stack(dev->index);
- uint64_t block = index_to_block(dev->index);
+ uint64_t stack = index_to_stack(dev->brick_index);
+ uint64_t block = index_to_block(dev->brick_index);
uint64_t reg;
/* OTL Config 2 Register */
@@ -822,7 +822,7 @@ static void assert_reset(struct npu2_dev *dev)
uint8_t pin, data;
int rc;
- switch (dev->index) {
+ switch (dev->brick_index) {
case 2:
case 4:
pin = platform.ocapi->i2c_reset_odl0;
@@ -910,7 +910,7 @@ static bool i2c_presence_detect(struct npu2_dev *dev)
OCAPIDBG(dev, "I2C presence detect: 0x%x\n", state);
- switch (dev->index) {
+ switch (dev->brick_index) { // TODO(ajd): Link or brick index?
case 2:
data = platform.ocapi->i2c_presence_odl0;
break;
@@ -929,7 +929,7 @@ static void reset_odl(uint32_t gcid, struct npu2_dev *dev)
{
uint64_t reg, config_xscom;
- switch (dev->index) {
+ switch (dev->brick_index) {
case 2:
config_xscom = OB0_ODL0_CONFIG;
break;
@@ -965,7 +965,7 @@ static void set_init_pattern(uint32_t gcid, struct npu2_dev *dev)
{
uint64_t reg, config_xscom;
- switch (dev->index) {
+ switch (dev->brick_index) {
case 2:
config_xscom = OB0_ODL0_CONFIG;
break;
@@ -992,7 +992,7 @@ static void start_training(uint32_t gcid, struct npu2_dev *dev)
{
uint64_t reg, config_xscom;
- switch (dev->index) {
+ switch (dev->brick_index) {
case 2:
config_xscom = OB0_ODL0_CONFIG;
break;
@@ -1035,7 +1035,7 @@ static int64_t npu2_opencapi_get_link_state(struct pci_slot *slot, uint8_t *val)
uint64_t reg;
int64_t link_width, training_status, rc = OPAL_SUCCESS;
- reg = get_odl_status(dev->npu->chip_id, dev->index);
+ reg = get_odl_status(dev->npu->chip_id, dev->brick_index);
link_width = GETFIELD(OB_ODL_STATUS_TRAINED_MODE, reg);
training_status = GETFIELD(OB_ODL_STATUS_TRAINING_STATE_MACHINE, reg);
@@ -1071,7 +1071,7 @@ static int64_t npu2_opencapi_retry_state(struct pci_slot *slot)
*/
OCAPIERR(dev,
"Link failed to train, final link status: %016llx\n",
- get_odl_status(chip_id, dev->index));
+ get_odl_status(chip_id, dev->brick_index));
return OPAL_HARDWARE;
}
@@ -1093,7 +1093,7 @@ static int64_t npu2_opencapi_poll_link(struct pci_slot *slot)
pci_slot_set_state(slot, OCAPI_SLOT_LINK_WAIT);
/* fall-through */
case OCAPI_SLOT_LINK_WAIT:
- reg = get_odl_status(chip_id, dev->index);
+ reg = get_odl_status(chip_id, dev->brick_index);
if (GETFIELD(OB_ODL_STATUS_TRAINING_STATE_MACHINE, reg) ==
OCAPI_LINK_STATE_TRAINED) {
OCAPIINF(dev, "link trained in %lld ms\n",
@@ -1153,9 +1153,9 @@ static int64_t npu2_opencapi_freset(struct pci_slot *slot)
if (dev->train_need_fence) {
OCAPIDBG(dev, "Fencing OTL during reset\n");
set_fence_control(chip_id, dev->npu->xscom_base,
- dev->index, 0b11);
+ dev->brick_index, 0b11);
npu2_write(dev->npu, NPU2_MISC_FENCE_STATE,
- PPC_BIT(dev->index + 6));
+ PPC_BIT(dev->brick_index + 6));
dev->train_fenced = true;
}
dev->train_need_fence = true;
@@ -1180,9 +1180,10 @@ static int64_t npu2_opencapi_freset(struct pci_slot *slot)
case OCAPI_SLOT_FRESET_DEASSERT_DELAY:
if (dev->train_fenced) {
OCAPIDBG(dev, "Unfencing OTL after reset\n");
- npu2_write(dev->npu, NPU2_MISC_FENCE_STATE, PPC_BIT(dev->index));
+ npu2_write(dev->npu, NPU2_MISC_FENCE_STATE,
+ PPC_BIT(dev->brick_index));
set_fence_control(chip_id, dev->npu->xscom_base,
- dev->index, 0b00);
+ dev->brick_index, 0b00);
dev->train_fenced = false;
}
@@ -1263,7 +1264,7 @@ static int64_t npu2_opencapi_pcicfg_read(struct phb *phb, uint32_t bdfn,
return rc;
genid_base = dev->bars[1].npu2_bar.base +
- (index_to_block(dev->index) == NPU2_BLOCK_OTL1 ? 256 : 0);
+ (index_to_block(dev->brick_index) == NPU2_BLOCK_OTL1 ? 256 : 0);
cfg_addr = NPU2_CQ_CTL_CONFIG_ADDR_ENABLE;
cfg_addr = SETFIELD(NPU2_CQ_CTL_CONFIG_ADDR_BUS_NUMBER |
@@ -1321,7 +1322,7 @@ static int64_t npu2_opencapi_pcicfg_write(struct phb *phb, uint32_t bdfn,
return rc;
genid_base = dev->bars[1].npu2_bar.base +
- (index_to_block(dev->index) == NPU2_BLOCK_OTL1 ? 256 : 0);
+ (index_to_block(dev->brick_index) == NPU2_BLOCK_OTL1 ? 256 : 0);
cfg_addr = NPU2_CQ_CTL_CONFIG_ADDR_ENABLE;
cfg_addr = SETFIELD(NPU2_CQ_CTL_CONFIG_ADDR_BUS_NUMBER |
@@ -1409,13 +1410,14 @@ static int64_t npu2_opencapi_set_pe(struct phb *phb,
p = dev->npu;
pe_bdfn = dev->bdfn;
-
+
val = NPU2_MISC_BRICK_BDF2PE_MAP_ENABLE;
val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_PE, val, pe_num);
val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_BDF, val, pe_bdfn);
reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC,
- NPU2_MISC_BRICK0_BDF2PE_MAP0 + (dev->index * 0x18));
- p->bdf2pe_cache[dev->index] = val;
+ NPU2_MISC_BRICK0_BDF2PE_MAP0 +
+ (dev->brick_index * 0x18));
+ p->bdf2pe_cache[dev->brick_index] = val;
npu2_write(p, reg, val);
return OPAL_SUCCESS;
@@ -1426,8 +1428,8 @@ static int npu2_add_mmio_regs(struct phb *phb, struct pci_device *pd,
{
uint32_t irq;
struct npu2_dev *dev = phb_to_npu2_dev_ocapi(phb);
- uint64_t block = index_to_block(dev->index);
- uint64_t stacku = index_to_stacku(dev->index);
+ uint64_t block = index_to_block(dev->brick_index);
+ uint64_t stacku = index_to_stacku(dev->brick_index);
uint64_t dsisr, dar, tfc, handle;
/*
@@ -1631,7 +1633,8 @@ static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
dev->phb_ocapi.ops = &npu2_opencapi_ops;
dev->phb_ocapi.phb_type = phb_type_npu_v2_opencapi;
dev->phb_ocapi.scan_map = 0;
- dev->index = dt_prop_get_u32(dn_link, "ibm,npu-link-index");
+ dev->link_index = dt_prop_get_u32(dn_link, "ibm,npu-link-index");
+ dev->brick_index = dev->link_index;
dev->pl_xscom_base = dt_prop_get_u64(dn_link, "ibm,npu-phy");
dev->lane_mask = dt_prop_get_u32(dn_link, "ibm,npu-lane-mask");
dev->link_speed = dt_prop_get_u64(dn_link, "ibm,link-speed");
@@ -1664,7 +1667,7 @@ static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
/* Procedure 13.1.3.9 - AFU Config BARs */
setup_afu_config_bars(n->chip_id, n->xscom_base, dev);
- set_fence_control(n->chip_id, n->xscom_base, dev->index, 0b00);
+ set_fence_control(n->chip_id, n->xscom_base, dev->brick_index, 0b00);
if (npu2_ocapi_training_state != NPU2_TRAIN_DEFAULT) {
setup_debug_training_state(dev);
@@ -1847,8 +1850,8 @@ static int64_t opal_npu_spa_setup(uint64_t phb_id, uint32_t __unused bdfn,
if (!dev)
return OPAL_PARAMETER;
- block = index_to_block(dev->index);
- stack = index_to_stack(dev->index);
+ block = index_to_block(dev->brick_index);
+ stack = index_to_stack(dev->brick_index);
if (block == NPU2_BLOCK_OTL1)
offset = NPU2_XSL_PSL_SPAP_A1;
else
@@ -1912,8 +1915,8 @@ static int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t __unused bdfn,
if (!dev)
return OPAL_PARAMETER;
- block = index_to_block(dev->index);
- stack = index_to_stack(dev->index);
+ block = index_to_block(dev->brick_index);
+ stack = index_to_stack(dev->brick_index);
cc_inv = NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, NPU2_XSL_PSL_LLCMD_A0);
lock(&dev->npu->lock);
@@ -1987,8 +1990,8 @@ static int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t __unused bdfn,
if (!dev)
return OPAL_PARAMETER;
- block = index_to_block(dev->index);
- stack = index_to_stack(dev->index);
+ block = index_to_block(dev->brick_index);
+ stack = index_to_stack(dev->brick_index);
/*
* The 'capabilities' argument defines what TL template the
* device can receive. OpenCAPI 3.0 and 4.0 define 64 templates, so
diff --git a/hw/npu2.c b/hw/npu2.c
index 2b3540583ac1..be1c304420fc 100644
--- a/hw/npu2.c
+++ b/hw/npu2.c
@@ -1075,10 +1075,10 @@ static int64_t npu2_set_pe(struct phb *phb,
val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_BDF, val, dev->nvlink.gpu_bdfn);
if (!NPU2DEV_BRICK(dev))
- reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2,
+ reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->brick_index/2,
NPU2_BLOCK_CTL, NPU2_CQ_BRICK0_BDF2PE_MAP0);
else
- reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2,
+ reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->brick_index/2,
NPU2_BLOCK_CTL, NPU2_CQ_BRICK1_BDF2PE_MAP0);
npu2_write(p, reg, val);
@@ -1086,8 +1086,8 @@ static int64_t npu2_set_pe(struct phb *phb,
val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_PE, val, pe_num);
val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_BDF, val, dev->nvlink.gpu_bdfn);
reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC,
- NPU2_MISC_BRICK0_BDF2PE_MAP0 + (dev->index * 0x18));
- p->bdf2pe_cache[dev->index] = val;
+ NPU2_MISC_BRICK0_BDF2PE_MAP0 + (dev->brick_index * 0x18));
+ p->bdf2pe_cache[dev->brick_index] = val;
npu2_write(p, reg, val);
return OPAL_SUCCESS;
@@ -1601,7 +1601,7 @@ static uint32_t npu2_populate_vendor_cap(struct npu2_dev *dev,
NULL);
/* Link index */
- PCI_VIRT_CFG_INIT_RO(pvd, start + 0xc, 1, dev->index);
+ PCI_VIRT_CFG_INIT_RO(pvd, start + 0xc, 1, dev->link_index);
return start + VENDOR_CAP_LEN;
}
@@ -1725,7 +1725,8 @@ static void npu2_populate_devices(struct npu2 *p,
dev->type = NPU2_DEV_TYPE_NVLINK;
dev->npu = p;
dev->dt_node = link;
- dev->index = dt_prop_get_u32(link, "ibm,npu-link-index");
+ dev->link_index = dt_prop_get_u32(link, "ibm,npu-link-index");
+ dev->brick_index = dev->link_index;
group_id = dt_prop_get_u32(link, "ibm,npu-group-id");
dev->bdfn = npu_allocate_bdfn(p, group_id);
@@ -1742,7 +1743,7 @@ static void npu2_populate_devices(struct npu2 *p,
stack = NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev);
npu2_bar = &dev->bars[0].npu2_bar;
npu2_bar->type = NPU_NTL;
- npu2_bar->index = dev->index;
+ npu2_bar->index = dev->brick_index;
npu2_bar->reg = NPU2_REG_OFFSET(stack, 0, NPU2DEV_BRICK(dev) == 0 ?
NPU2_NTL0_BAR : NPU2_NTL1_BAR);
npu2_get_bar(p->chip_id, npu2_bar);
@@ -2248,8 +2249,10 @@ static int opal_npu_map_lpar(uint64_t phb_id, uint64_t bdf, uint64_t lparid,
goto out;
}
- xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_STACK, xts_bdf_lpar, 0x4 >> (ndev->index / 2));
- xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_BRICK, xts_bdf_lpar, (ndev->index % 2));
+ xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_STACK, xts_bdf_lpar,
+ 0x4 >> (ndev->brick_index / 2));
+ xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_BRICK, xts_bdf_lpar,
+ (ndev->brick_index % 2));
NPU2DBG(p, "XTS_BDF_MAP[%03d] = 0x%08llx\n", id, xts_bdf_lpar);
npu2_write(p, NPU2_XTS_BDF_MAP + id*8, xts_bdf_lpar);
diff --git a/include/npu2-regs.h b/include/npu2-regs.h
index 6bd77e48bf4f..8c1ba5fff37a 100644
--- a/include/npu2-regs.h
+++ b/include/npu2-regs.h
@@ -44,19 +44,19 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
(((stack) << 20) | ((block) << 16) | (offset))
#define NPU2_NTL_REG_OFFSET(ndev, offset) \
- NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + ((ndev)->index >> 1), \
- NPU2_BLOCK_NTL0 + ((ndev)->index % 2)*2, offset)
+ NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + ((ndev)->brick_index >> 1), \
+ NPU2_BLOCK_NTL0 + ((ndev)->brick_index % 2)*2, offset)
#define NPU2_NTLU_REG_OFFSET(ndev, offset) \
- NPU2_REG_OFFSET(NPU2_STACK_STCK_0U + ((ndev)->index >> 1), \
- NPU2_BLOCK_NTL0 + ((ndev)->index % 2)*2, offset)
+ NPU2_REG_OFFSET(NPU2_STACK_STCK_0U + ((ndev)->brick_index >> 1), \
+ NPU2_BLOCK_NTL0 + ((ndev)->brick_index % 2)*2, offset)
#define NPU2_DL_REG_OFFSET(ndev, offset) \
- NPU2_REG_OFFSET(((ndev)->index >> 1), \
- 8 + ((ndev)->index % 2)*2, offset)
+ NPU2_REG_OFFSET(((ndev)->brick_index >> 1), \
+ 8 + ((ndev)->brick_index % 2)*2, offset)
#define NPU2_SM_REG_OFFSET(ndev, sm, offset) \
- NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + ((ndev)->index >> 1), \
+ NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + ((ndev)->brick_index >> 1), \
NPU2_BLOCK_SM_0 + (sm), offset)
/* Get the offset for this register */
diff --git a/include/npu2.h b/include/npu2.h
index 4c2e20e0e2f7..10742031ec0f 100644
--- a/include/npu2.h
+++ b/include/npu2.h
@@ -47,10 +47,10 @@
#define NPU2_DEV_DL_RESET 0x2
/* Return the stack (0-2) of a device */
-#define NPU2DEV_STACK(ndev) ((ndev)->index / 2)
+#define NPU2DEV_STACK(ndev) ((ndev)->brick_index / 2)
/* Return the brick number (0-1) within a stack */
-#define NPU2DEV_BRICK(ndev) ((ndev)->index % 2)
+#define NPU2DEV_BRICK(ndev) ((ndev)->brick_index % 2)
/* This represents the state of the actual hardware BARs not the
* emulated PCIe BARs. The is a subtle difference between the two as
@@ -111,7 +111,8 @@ struct npu2_dev_nvlink {
struct npu2_dev {
enum npu2_dev_type type;
- uint32_t index;
+ uint32_t link_index;
+ uint32_t brick_index;
uint64_t pl_xscom_base;
struct dt_node *dt_node;
struct npu2_pcie_bar bars[2];
--
git-series 0.9.1
More information about the Skiboot
mailing list