[Skiboot] [PATCH v2 7/9] npu2-opencapi: Train OpenCAPI links and setup devices
Andrew Donnellan
andrew.donnellan at au1.ibm.com
Fri Jan 19 13:50:45 AEDT 2018
Scan the OpenCAPI links under the NPU, and for each link, reset the card,
set up a device, train the link and register a PHB.
Implement the necessary operations for the OpenCAPI PHB type, and adapt the
existing npu2_set_pe() function to let us assign a PE number to an OpenCAPI
device. (For now, the PE numbers we assign to OpenCAPI devices are somewhat
meaningless - later on, we'll implement a mapping between Linux-allocated
PE numbers and a separate numbering space for PE numbers we actually
configure in the NPU.)
Signed-off-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
Signed-off-by: Frederic Barrat <fbarrat at linux.vnet.ibm.com>
---
v1->v2:
- remove all-devices case for device reset (Fred)
- remove i2c 1.8v hack (Fred)
- find the right I2C port ID for resets only once per device rather than
once per reset (Fred)
---
hw/npu2-opencapi.c | 578 ++++++++++++++++++++++++++++++++++++++++++++-
hw/npu2.c | 60 ++---
include/npu2-regs.h | 53 +++-
include/npu2.h | 14 +-
4 files changed, 677 insertions(+), 28 deletions(-)
diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
index 26f55e9..057da69 100644
--- a/hw/npu2-opencapi.c
+++ b/hw/npu2-opencapi.c
@@ -53,6 +53,9 @@
#include <i2c.h>
#define NPU_IRQ_LEVELS 35
+#define NPU_IRQ_LEVELS_XSL 23
+
+static const struct phb_ops npu2_opencapi_ops;
static inline uint64_t index_to_stack(uint64_t index) {
switch (index) {
@@ -99,6 +102,28 @@ static inline uint64_t index_to_block(uint64_t index) {
}
}
+static uint64_t get_odl_status(uint32_t gcid, uint64_t index) {
+ uint64_t reg, status_xscom;
+ switch (index) {
+ case 2:
+ status_xscom = OB0_ODL0_STATUS;
+ break;
+ case 3:
+ status_xscom = OB0_ODL1_STATUS;
+ break;
+ case 4:
+ status_xscom = OB3_ODL0_STATUS;
+ break;
+ case 5:
+ status_xscom = OB3_ODL1_STATUS;
+ break;
+ default:
+ assert(false);
+ }
+ xscom_read(gcid, status_xscom, ®);
+ return reg;
+}
+
/* Procedure 13.1.3.1 - select OCAPI vs NVLink for bricks 2-3/4-5 */
static void set_transport_mux_controls(uint32_t gcid, uint32_t scom_base,
@@ -673,6 +698,390 @@ static void setup_global_mmio_bar(uint32_t gcid, uint32_t scom_base,
reg[1] = size;
}
+/* Procedure 13.1.3.8 - AFU MMIO Range BARs */
+static void setup_afu_mmio_bars(uint32_t gcid, uint32_t scom_base,
+ struct npu2_dev *dev)
+{
+ uint64_t stack = index_to_stack(dev->index);
+ uint64_t offset = index_to_block(dev->index) == NPU2_BLOCK_OTL0 ?
+ NPU2_NTL0_BAR : NPU2_NTL1_BAR;
+ uint64_t pa_offset = index_to_block(dev->index) == NPU2_BLOCK_OTL0 ?
+ NPU2_CQ_CTL_MISC_MMIOPA0_CONFIG :
+ NPU2_CQ_CTL_MISC_MMIOPA1_CONFIG;
+ uint64_t addr, size, reg;
+
+ prlog(PR_DEBUG, "OCAPI: %s: Setup AFU MMIO BARs\n", __func__);
+ phys_map_get(gcid, NPU_OCAPI_MMIO, dev->index, &addr, &size);
+
+ prlog(PR_DEBUG, "OCAPI: AFU MMIO set to %llx, size %llx\n", addr, size);
+ write_bar(gcid, scom_base, NPU2_REG_OFFSET(stack, 0, offset), addr,
+ size);
+ dev->bars[0].npu2_bar.base = addr;
+ dev->bars[0].npu2_bar.size = size;
+
+ reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_ADDR, 0ull, addr >> 16);
+ reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_SIZE, reg, ilog2(size >> 16));
+ prlog(PR_DEBUG, "OCAPI: PA translation %llx\n", reg);
+ npu2_scom_write(gcid, scom_base,
+ NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL,
+ pa_offset),
+ NPU2_MISC_DA_LEN_8B, reg);
+}
+
+/* Procedure 13.1.3.9 - AFU Config BARs */
+static void setup_afu_config_bars(uint32_t gcid, uint32_t scom_base,
+ struct npu2_dev *dev)
+{
+ uint64_t stack = index_to_stack(dev->index);
+ int stack_num = stack - NPU2_STACK_STCK_0;
+ uint64_t addr, size;
+
+ prlog(PR_DEBUG, "OCAPI: %s: Setup AFU Config BARs\n", __func__);
+ phys_map_get(gcid, NPU_GENID, stack_num, &addr, &size);
+ prlog(PR_DEBUG, "OCAPI: Assigning GENID BAR: %016llx\n", addr);
+ write_bar(gcid, scom_base, NPU2_REG_OFFSET(stack, 0, NPU2_GENID_BAR),
+ addr, size);
+ dev->bars[1].npu2_bar.base = addr;
+ dev->bars[1].npu2_bar.size = size;
+}
+
+static void otl_enabletx(uint32_t gcid, uint32_t scom_base, uint64_t index)
+{
+ uint64_t stack = index_to_stack(index);
+ uint64_t block = index_to_block(index);
+ uint64_t reg;
+
+ /* OTL Config 2 Register */
+ /* Transmit Enable */
+ prlog(PR_DEBUG, "OCAPI: %s: Enabling TX\n", __func__);
+ reg = 0;
+ reg |= NPU2_OTL_CONFIG2_TX_SEND_EN;
+ npu2_scom_write(gcid, scom_base, NPU2_OTL_CONFIG2(stack, block),
+ NPU2_MISC_DA_LEN_8B, reg);
+
+ reg = npu2_scom_read(gcid, scom_base, NPU2_OTL_VC_CREDITS(stack, block),
+ NPU2_MISC_DA_LEN_8B);
+ prlog(PR_DEBUG, "OCAPI: credit counter: %llx\n", reg);
+ /* TODO: Abort if credits are zero */
+}
+
+static void reset_ocapi_device(struct npu2_dev *dev)
+{
+ uint8_t data[3];
+ int rc;
+ int i;
+
+ switch (dev->index) {
+ case 2:
+ case 4:
+ memcpy(data, platform.ocapi->i2c_odl0_data, sizeof(data));
+ break;
+ case 3:
+ case 5:
+ memcpy(data, platform.ocapi->i2c_odl1_data, sizeof(data));
+ break;
+ default:
+ assert(false);
+ }
+
+ for (i = 0; i < 3; i++) {
+ rc = i2c_request_send(dev->i2c_port_id_ocapi, 0x20, SMBUS_WRITE,
+ platform.ocapi->i2c_offset[i], 1,
+ &data[i], sizeof(data[i]), 120);
+ if (rc) {
+ /**
+ * @fwts-label OCAPIDeviceResetFailed
+ * @fwts-advice There was an error attempting to send
+ * a reset signal over I2C to the OpenCAPI device.
+ */
+ prlog(PR_ERR, "OCAPI: Error writing I2C reset signal: %d\n", rc);
+ break;
+ }
+ if (i != 0)
+ time_wait_ms(5);
+ }
+}
+
+static int odl_train(uint32_t gcid, uint32_t index, struct npu2_dev *dev)
+{
+ uint64_t reg, config_xscom;
+ int timeout = 3000;
+ prlog(PR_DEBUG, "OCAPI: %s: Training ODL\n", __func__);
+
+ switch (index) {
+ case 2:
+ config_xscom = OB0_ODL0_CONFIG;
+ break;
+ case 3:
+ config_xscom = OB0_ODL1_CONFIG;
+ break;
+ case 4:
+ config_xscom = OB3_ODL0_CONFIG;
+ break;
+ case 5:
+ config_xscom = OB3_ODL1_CONFIG;
+ break;
+ default:
+ assert(false);
+ }
+
+ /* Reset ODL */
+ reg = OB_ODL_CONFIG_RESET;
+ reg = SETFIELD(OB_ODL_CONFIG_VERSION, reg, 0b000001);
+ reg = SETFIELD(OB_ODL_CONFIG_TRAIN_MODE, reg, 0b0110);
+ reg = SETFIELD(OB_ODL_CONFIG_SUPPORTED_MODES, reg, 0b0010);
+ reg |= OB_ODL_CONFIG_X4_BACKOFF_ENABLE;
+ reg = SETFIELD(OB_ODL_CONFIG_PHY_CNTR_LIMIT, reg, 0b1111);
+ reg |= OB_ODL_CONFIG_DEBUG_ENABLE;
+ reg = SETFIELD(OB_ODL_CONFIG_FWD_PROGRESS_TIMER, reg, 0b0110);
+ xscom_write(gcid, config_xscom, reg);
+
+ reg &= ~OB_ODL_CONFIG_RESET;
+ xscom_write(gcid, config_xscom, reg);
+
+ reset_ocapi_device(dev);
+
+ /* Transmit Pattern A */
+ reg = SETFIELD(OB_ODL_CONFIG_TRAIN_MODE, reg, 0b0001);
+ xscom_write(gcid, config_xscom, reg);
+ time_wait_ms(5);
+
+ /* Bump lanes - this improves training reliability */
+ npu2_opencapi_bump_ui_lane(dev);
+
+ /* Start training */
+ reg = SETFIELD(OB_ODL_CONFIG_TRAIN_MODE, reg, 0b1000);
+ xscom_write(gcid, config_xscom, reg);
+
+ do {
+ reg = get_odl_status(gcid, index);
+ if (GETFIELD(OB_ODL_STATUS_TRAINING_STATE_MACHINE, reg) == 0x7) {
+ prlog(PR_NOTICE,
+ "OCAPI: Link %d on chip %u trained in %dms\n",
+ index, gcid, 3000 - timeout);
+ return OPAL_SUCCESS;
+ }
+ time_wait_ms(1);
+ } while (timeout--);
+ prlog(PR_INFO, "OCAPI: Link %d on chip %u failed to train, retrying\n",
+ index, gcid);
+ prlog(PR_INFO, "OCAPI: Link status: %016llx\n", reg);
+ return OPAL_HARDWARE;
+}
+
+static int64_t npu2_opencapi_get_link_state(struct pci_slot *slot, uint8_t *val)
+{
+ struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb);
+ uint64_t reg;
+ int64_t link_width, rc = OPAL_SUCCESS;
+
+ reg = get_odl_status(dev->npu->chip_id, dev->index);
+ link_width = GETFIELD(OB_ODL_STATUS_TRAINED_MODE, reg);
+ switch (link_width) {
+ case 0b0001:
+ *val = OPAL_SHPC_LINK_UP_x4;
+ break;
+ case 0b0010:
+ *val = OPAL_SHPC_LINK_UP_x8;
+ break;
+ default:
+ rc = OPAL_HARDWARE;
+ }
+ return rc;
+}
+
+static struct pci_slot *npu2_opencapi_slot_create(struct phb *phb)
+{
+ struct pci_slot *slot;
+
+ slot = pci_slot_alloc(phb, NULL);
+ if (!slot)
+ return slot;
+
+ /* TODO: Figure out other slot functions */
+ slot->ops.get_presence_state = NULL;
+ slot->ops.get_link_state = npu2_opencapi_get_link_state;
+ slot->ops.get_power_state = NULL;
+ slot->ops.get_attention_state = NULL;
+ slot->ops.get_latch_state = NULL;
+ slot->ops.set_power_state = NULL;
+ slot->ops.set_attention_state = NULL;
+
+ return slot;
+}
+
+static int64_t npu2_opencapi_pcicfg_check(struct npu2_dev *dev, uint32_t offset,
+ uint32_t size)
+{
+ if (!dev || offset > 0xfff || (offset & (size - 1)))
+ return OPAL_PARAMETER;
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t npu2_opencapi_pcicfg_read(struct phb *phb, uint32_t bdfn,
+ uint32_t offset, uint32_t size,
+ void *data)
+{
+ uint64_t cfg_addr;
+ struct npu2_dev *dev = phb_to_npu2_dev_ocapi(phb);
+ uint64_t genid_base;
+ int64_t rc;
+
+ rc = npu2_opencapi_pcicfg_check(dev, offset, size);
+ if (rc)
+ return rc;
+
+ genid_base = dev->bars[1].npu2_bar.base +
+ (index_to_block(dev->index) == NPU2_BLOCK_OTL1 ? 256 : 0);
+
+ cfg_addr = NPU2_CQ_CTL_CONFIG_ADDR_ENABLE;
+ cfg_addr = SETFIELD(NPU2_CQ_CTL_CONFIG_ADDR_BUS_NUMBER |
+ NPU2_CQ_CTL_CONFIG_ADDR_DEVICE_NUMBER |
+ NPU2_CQ_CTL_CONFIG_ADDR_FUNCTION_NUMBER,
+ cfg_addr, bdfn);
+ cfg_addr = SETFIELD(NPU2_CQ_CTL_CONFIG_ADDR_REGISTER_NUMBER,
+ cfg_addr, offset & ~3u);
+
+ out_be64((uint64_t *)genid_base, cfg_addr);
+ sync();
+
+ switch (size) {
+ case 1:
+ *((uint8_t *)data) =
+ in_8((volatile uint8_t *)(genid_base + 128 + (offset & 3)));
+ break;
+ case 2:
+ *((uint16_t *)data) =
+ in_le16((volatile uint16_t *)(genid_base + 128 + (offset & 2)));
+ break;
+ case 4:
+ *((uint32_t *)data) = in_le32((volatile uint32_t *)(genid_base + 128));
+ break;
+ default:
+ return OPAL_PARAMETER;
+ }
+
+ return OPAL_SUCCESS;
+}
+
+#define NPU2_OPENCAPI_PCI_CFG_READ(size, type) \
+static int64_t npu2_opencapi_pcicfg_read##size(struct phb *phb, \
+ uint32_t bdfn, \
+ uint32_t offset, \
+ type *data) \
+{ \
+ /* Initialize data in case of error */ \
+ *data = (type)0xffffffff; \
+ return npu2_opencapi_pcicfg_read(phb, bdfn, offset, \
+ sizeof(type), data); \
+}
+
+static int64_t npu2_opencapi_pcicfg_write(struct phb *phb, uint32_t bdfn,
+ uint32_t offset, uint32_t size,
+ uint32_t data)
+{
+ uint64_t cfg_addr;
+ struct npu2_dev *dev = phb_to_npu2_dev_ocapi(phb);
+ uint64_t genid_base;
+ int64_t rc;
+
+ rc = npu2_opencapi_pcicfg_check(dev, offset, size);
+ if (rc)
+ return rc;
+
+ genid_base = dev->bars[1].npu2_bar.base +
+ (index_to_block(dev->index) == NPU2_BLOCK_OTL1 ? 256 : 0);
+
+ cfg_addr = NPU2_CQ_CTL_CONFIG_ADDR_ENABLE;
+ cfg_addr = SETFIELD(NPU2_CQ_CTL_CONFIG_ADDR_BUS_NUMBER |
+ NPU2_CQ_CTL_CONFIG_ADDR_DEVICE_NUMBER |
+ NPU2_CQ_CTL_CONFIG_ADDR_FUNCTION_NUMBER,
+ cfg_addr, bdfn);
+ cfg_addr = SETFIELD(NPU2_CQ_CTL_CONFIG_ADDR_REGISTER_NUMBER,
+ cfg_addr, offset & ~3u);
+
+ out_be64((uint64_t *)genid_base, cfg_addr);
+ sync();
+
+ switch (size) {
+ case 1:
+ out_8((volatile uint8_t *)(genid_base + 128 + (offset & 3)),
+ data);
+ break;
+ case 2:
+ out_le16((volatile uint16_t *)(genid_base + 128 + (offset & 2)),
+ data);
+ break;
+ case 4:
+ out_le32((volatile uint32_t *)(genid_base + 128), data);
+ break;
+ default:
+ return OPAL_PARAMETER;
+ }
+
+ return OPAL_SUCCESS;
+}
+
+#define NPU2_OPENCAPI_PCI_CFG_WRITE(size, type) \
+static int64_t npu2_opencapi_pcicfg_write##size(struct phb *phb, \
+ uint32_t bdfn, \
+ uint32_t offset, \
+ type data) \
+{ \
+ return npu2_opencapi_pcicfg_write(phb, bdfn, offset, \
+ sizeof(type), data); \
+}
+
+NPU2_OPENCAPI_PCI_CFG_READ(8, u8)
+NPU2_OPENCAPI_PCI_CFG_READ(16, u16)
+NPU2_OPENCAPI_PCI_CFG_READ(32, u32)
+NPU2_OPENCAPI_PCI_CFG_WRITE(8, u8)
+NPU2_OPENCAPI_PCI_CFG_WRITE(16, u16)
+NPU2_OPENCAPI_PCI_CFG_WRITE(32, u32)
+
+static int npu2_add_mmio_regs(struct phb *phb, struct pci_device *pd,
+ void *data __unused)
+{
+ uint32_t irq;
+ struct npu2_dev *dev = phb_to_npu2_dev_ocapi(phb);
+ uint64_t block = index_to_block(dev->index);
+ uint64_t stacku = index_to_stacku(dev->index);
+ uint64_t dsisr, dar, tfc, handle;
+
+ /*
+ * Pass the hw irq number for the translation fault irq
+ * irq levels 23 -> 26 are for translation faults, 1 per brick
+ */
+ irq = dev->npu->irq_base + NPU_IRQ_LEVELS_XSL;
+ if (stacku == NPU2_STACK_STCK_2U)
+ irq += 2;
+ if (block == NPU2_BLOCK_OTL1)
+ irq++;
+
+ /*
+ * Add the addresses of the registers needed by the OS to handle
+ * faults. The OS accesses them by mmio.
+ */
+ dsisr = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_DSISR(stacku, block);
+ dar = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_DAR(stacku, block);
+ tfc = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_TFC(stacku, block);
+ handle = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_PEHANDLE(stacku,
+ block);
+ dt_add_property_cells(pd->dn, "ibm,opal-xsl-irq", irq);
+ dt_add_property_cells(pd->dn, "ibm,opal-xsl-mmio",
+ hi32(dsisr), lo32(dsisr),
+ hi32(dar), lo32(dar),
+ hi32(tfc), lo32(tfc),
+ hi32(handle), lo32(handle));
+ return 0;
+}
+
+static void npu2_opencapi_final_fixup(struct phb *phb)
+{
+ pci_walk_dev(phb, NULL, npu2_add_mmio_regs, NULL);
+}
+
static int setup_irq(struct npu2 *p)
{
uint64_t reg, mmio_addr;
@@ -721,6 +1130,130 @@ static int setup_irq(struct npu2 *p)
return 0;
}
+static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
+ struct npu2_dev *dev)
+{
+ uint32_t dev_index, npu_index;
+ struct dt_node *dn_phb, *dn;
+ struct pci_slot *slot;
+ char port_name[17];
+ uint64_t mm_win[2];
+ int retries = 20;
+ int rc;
+
+ dev_index = dt_prop_get_u32(dn_link, "ibm,npu-link-index");
+ npu_index = dt_prop_get_u32(n->dt_node, "ibm,npu-index");
+
+ /* Populate PHB device node */
+ phys_map_get(n->chip_id, NPU_OCAPI_MMIO, dev_index, &mm_win[0],
+ &mm_win[1]);
+ prlog(PR_DEBUG, "OCAPI: Setting MMIO window to %016llx + %016llx\n",
+ mm_win[0], mm_win[1]);
+ dn_phb = dt_new_addr(dt_root, "pciex", mm_win[0]);
+ assert(dn_phb);
+ dt_add_property_strings(dn_phb,
+ "compatible",
+ "ibm,power9-npu-opencapi-pciex",
+ "ibm,ioda2-npu2-opencapi-phb");
+ dt_add_property_strings(dn_phb, "device_type", "pciex");
+ dt_add_property(dn_phb, "reg", &n->regs, sizeof(n->regs));
+ dt_add_property_cells(dn_phb, "ibm,npu-index", npu_index);
+ dt_add_property_cells(dn_phb, "ibm,chip-id", n->chip_id);
+ dt_add_property_cells(dn_phb, "ibm,xscom-base", n->xscom_base);
+ dt_add_property_cells(dn_phb, "ibm,npcq", dn_link->phandle);
+ dt_add_property_cells(dn_phb, "ibm,links", 1);
+ dt_add_property(dn_phb, "ibm,mmio-window", mm_win, sizeof(mm_win));
+ dt_add_property_cells(dn_phb, "ibm,phb-diag-data-size", 0);
+ dt_add_property_cells(dn_phb, "bus-range", 0, 0xff);
+ dt_add_property_cells(dn_phb, "ibm,opal-num-pes", NPU2_MAX_PE_NUM);
+
+ n->mm_base = mm_win[0];
+ n->mm_size = mm_win[1];
+
+ dt_add_property_cells(dn_phb, "ranges", 0x02000000,
+ hi32(n->mm_base), lo32(n->mm_base),
+ hi32(n->mm_base), lo32(n->mm_base),
+ hi32(n->mm_size), lo32(n->mm_size));
+
+ dev->type = NPU2_DEV_TYPE_OPENCAPI;
+ dev->npu = n;
+ dev->dt_node = dn_link;
+ dev->phb_ocapi.dt_node = dn_phb;
+ dev->phb_ocapi.ops = &npu2_opencapi_ops;
+ dev->phb_ocapi.phb_type = phb_type_npu_v2_opencapi;
+ dev->phb_ocapi.scan_map = 1;
+ dev->index = dt_prop_get_u32(dn_link, "ibm,npu-link-index");
+ dev->pl_xscom_base = dt_prop_get_u64(dn_link, "ibm,npu-phy");
+ dev->lane_mask = dt_prop_get_u32(dn_link, "ibm,npu-lane-mask");
+ dev->bdfn = 0;
+ n->total_devices++;
+
+ /* Find I2C port for handling device reset */
+ snprintf(port_name, sizeof(port_name), "p8_%08x_e%dp%d",
+ dev->npu->chip_id, platform.ocapi->i2c_engine,
+ platform.ocapi->i2c_port);
+ prlog(PR_DEBUG, "OCAPI: Looking for I2C port %s\n", port_name);
+
+ dt_for_each_compatible(dt_root, dn, "ibm,power9-i2c-port") {
+ if (streq(port_name, dt_prop_get(dn, "ibm,port-name"))) {
+ dev->i2c_port_id_ocapi = dt_prop_get_u32(dn, "ibm,opal-id");
+ break;
+ }
+ }
+
+ if (!dev->i2c_port_id_ocapi) {
+ prlog(PR_ERR, "OCAPI: Couldn't find I2C port %s\n", port_name);
+ goto failed;
+ }
+
+ /* TODO: Procedure 13.1.3.7 - AFU Memory Range BARs */
+ /* Procedure 13.1.3.8 - AFU MMIO Range BARs */
+ setup_afu_mmio_bars(n->chip_id, n->xscom_base, dev);
+ /* Procedure 13.1.3.9 - AFU Config BARs */
+ setup_afu_config_bars(n->chip_id, n->xscom_base, dev);
+
+ set_fence_control(n->chip_id, n->xscom_base, dev->index, 0b00);
+
+ npu2_opencapi_phy_setup(dev);
+
+ do {
+ rc = odl_train(n->chip_id, dev->index, dev);
+ } while (rc != OPAL_SUCCESS && --retries);
+
+ if (rc != OPAL_SUCCESS && retries == 0) {
+ /**
+ * @fwts-label OCAPILinkTrainingFailed
+ * @fwts-advice The OpenCAPI link training procedure failed.
+ * This indicates a hardware or firmware bug. OpenCAPI
+ * functionality will not be available on this link.
+ */
+ prlog(PR_ERR, "OCAPI: Link %d on chip %u failed to train\n",
+ dev->index, n->chip_id);
+ prlog(PR_ERR, "OCAPI: Final link status: %016llx\n",
+ get_odl_status(n->chip_id, dev->index));
+ goto failed;
+ }
+
+ otl_enabletx(n->chip_id, n->xscom_base, dev->index);
+
+ slot = npu2_opencapi_slot_create(&dev->phb_ocapi);
+ if (!slot)
+ {
+ /**
+ * @fwts-label OCAPICannotCreatePHBSlot
+ * @fwts-advice Firmware probably ran out of memory creating
+ * NPU slot. OpenCAPI functionality could be broken.
+ */
+ prlog(PR_ERR, "OCAPI: Cannot create PHB slot\n");
+ }
+
+ pci_register_phb(&dev->phb_ocapi, OPAL_DYNAMIC_PHB_ID);
+ return;
+failed:
+ dt_add_property_string(dn_phb, "status", "error");
+ return;
+}
+
static void npu2_opencapi_probe(struct dt_node *dn)
{
struct dt_node *link;
@@ -729,7 +1262,7 @@ static void npu2_opencapi_probe(struct dt_node *dn)
uint64_t reg[2];
uint64_t dev_index;
struct npu2 *n;
- int rc;
+ int rc, i = 0;
path = dt_get_path(dn);
gcid = dt_get_chip_id(dn);
@@ -747,6 +1280,8 @@ static void npu2_opencapi_probe(struct dt_node *dn)
gcid, index, links, path);
free(path);
+ assert(platform.ocapi);
+
scom_base = dt_get_address(dn, 0, NULL);
prlog(PR_INFO, "OCAPI: SCOM Base: %08x\n", scom_base);
@@ -779,6 +1314,11 @@ static void npu2_opencapi_probe(struct dt_node *dn)
if (rc)
goto failed;
+ dt_for_each_compatible(dn, link, "ibm,npu-link-opencapi") {
+ npu2_opencapi_setup_device(link, n, &n->devices[i]);
+ i++;
+ }
+
return;
failed:
free(n);
@@ -791,3 +1331,39 @@ void probe_npu2_opencapi(void)
dt_for_each_compatible(dt_root, np_npu, "ibm,power9-npu")
npu2_opencapi_probe(np_npu);
}
+
+static const struct phb_ops npu2_opencapi_ops = {
+ .cfg_read8 = npu2_opencapi_pcicfg_read8,
+ .cfg_read16 = npu2_opencapi_pcicfg_read16,
+ .cfg_read32 = npu2_opencapi_pcicfg_read32,
+ .cfg_write8 = npu2_opencapi_pcicfg_write8,
+ .cfg_write16 = npu2_opencapi_pcicfg_write16,
+ .cfg_write32 = npu2_opencapi_pcicfg_write32,
+ .choose_bus = NULL,
+ .device_init = NULL,
+ .phb_final_fixup = npu2_opencapi_final_fixup,
+ .ioda_reset = NULL,
+ .papr_errinjct_reset = NULL,
+ .pci_reinit = NULL,
+ .set_phb_mem_window = NULL,
+ .phb_mmio_enable = NULL,
+ .map_pe_mmio_window = NULL,
+ .map_pe_dma_window = NULL,
+ .map_pe_dma_window_real = NULL,
+ .pci_msi_eoi = NULL,
+ .set_xive_pe = NULL,
+ .get_msi_32 = NULL,
+ .get_msi_64 = NULL,
+ .set_pe = npu2_set_pe,
+ .set_peltv = NULL,
+ .eeh_freeze_status = npu2_freeze_status, /* TODO */
+ .eeh_freeze_clear = NULL,
+ .eeh_freeze_set = NULL,
+ .next_error = NULL,
+ .err_inject = NULL,
+ .get_diag_data = NULL,
+ .get_diag_data2 = NULL,
+ .set_capi_mode = NULL,
+ .set_capp_recovery = NULL,
+ .tce_kill = NULL,
+};
diff --git a/hw/npu2.c b/hw/npu2.c
index 3e30f19..7f209de 100644
--- a/hw/npu2.c
+++ b/hw/npu2.c
@@ -1001,17 +1001,17 @@ static int64_t npu2_map_pe_dma_window(struct phb *phb,
return OPAL_SUCCESS;
}
-static int64_t npu2_set_pe(struct phb *phb,
- uint64_t pe_num,
- uint64_t bdfn,
- uint8_t bcompare,
- uint8_t dcompare,
- uint8_t fcompare,
- uint8_t action)
+int64_t npu2_set_pe(struct phb *phb,
+ uint64_t pe_num,
+ uint64_t bdfn,
+ uint8_t bcompare,
+ uint8_t dcompare,
+ uint8_t fcompare,
+ uint8_t action)
{
struct npu2 *p;
struct npu2_dev *dev;
- uint64_t reg, val;
+ uint64_t reg, val, pe_bdfn;
/* Sanity check */
if (action != OPAL_MAP_PE && action != OPAL_UNMAP_PE)
@@ -1036,21 +1036,31 @@ static int64_t npu2_set_pe(struct phb *phb,
if (!dev)
return OPAL_PARAMETER;
- val = NPU2_CQ_BRICK_BDF2PE_MAP_ENABLE;
- val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_PE, val, pe_num);
- val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_BDF, val, dev->nvlink.gpu_bdfn);
-
- if (!NPU2DEV_BRICK(dev))
- reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2,
- NPU2_BLOCK_CTL, NPU2_CQ_BRICK0_BDF2PE_MAP0);
+ if (dev->type == NPU2_DEV_TYPE_OPENCAPI)
+ pe_bdfn = dev->bdfn;
else
- reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2,
- NPU2_BLOCK_CTL, NPU2_CQ_BRICK1_BDF2PE_MAP0);
+ pe_bdfn = dev->nvlink.gpu_bdfn;
+
+ if (dev->type == NPU2_DEV_TYPE_NVLINK) {
+ val = NPU2_CQ_BRICK_BDF2PE_MAP_ENABLE;
+ val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_PE, val, pe_num);
+ val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_BDF, val, pe_bdfn);
+
+ if (!NPU2DEV_BRICK(dev))
+ reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2,
+ NPU2_BLOCK_CTL,
+ NPU2_CQ_BRICK0_BDF2PE_MAP0);
+ else
+ reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2,
+ NPU2_BLOCK_CTL,
+ NPU2_CQ_BRICK1_BDF2PE_MAP0);
+
+ npu2_write(p, reg, val);
+ }
- npu2_write(p, reg, val);
val = NPU2_MISC_BRICK_BDF2PE_MAP_ENABLE;
val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_PE, val, pe_num);
- val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_BDF, val, dev->nvlink.gpu_bdfn);
+ val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_BDF, val, pe_bdfn);
reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC,
NPU2_MISC_BRICK0_BDF2PE_MAP0 + (dev->index * 0x18));
p->bdf2pe_cache[dev->index] = val;
@@ -1111,12 +1121,12 @@ static struct pci_slot *npu2_slot_create(struct phb *phb)
return slot;
}
-static int64_t npu2_freeze_status(struct phb *phb __unused,
- uint64_t pe_number __unused,
- uint8_t *freeze_state,
- uint16_t *pci_error_type __unused,
- uint16_t *severity __unused,
- uint64_t *phb_status __unused)
+int64_t npu2_freeze_status(struct phb *phb __unused,
+ uint64_t pe_number __unused,
+ uint8_t *freeze_state,
+ uint16_t *pci_error_type __unused,
+ uint16_t *severity __unused,
+ uint64_t *phb_status __unused)
{
/*
* FIXME: When it's called by skiboot PCI config accessor,
diff --git a/include/npu2-regs.h b/include/npu2-regs.h
index f8158c7..00a72aa 100644
--- a/include/npu2-regs.h
+++ b/include/npu2-regs.h
@@ -116,6 +116,7 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
#define NPU2_CQ_SM_MISC_CFG0_CONFIG_ENABLE_PBUS PPC_BIT(38)
#define NPU2_CQ_SM_MISC_CFG0_CONFIG_OCAPI_MODE PPC_BIT(57)
#define NPU2_CQ_SM_MISC_CFG1 0x008
+#define NPU2_CQ_SM_MISC_CFG2 0x148
#define NPU2_PB_EPSILON 0x010
#define NPU2_TIMER_CFG 0x018
#define NPU2_GPU0_MEM_BAR 0x020
@@ -187,7 +188,11 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
#define NPU2_CQ_CTL_MISC_CFG_CONFIG_OCAPI_MODE PPC_BIT(52)
#define NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL0_ENABLE PPC_BIT(55)
#define NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL1_ENABLE PPC_BIT(56)
-#define NPU2_CQ_FUTURE_CFG1 0x008
+#define NPU2_CQ_CTL_MISC_MMIOPA0_CONFIG 0x0B0
+#define NPU2_CQ_CTL_MISC_MMIOPA_ADDR PPC_BITMASK(1,35)
+#define NPU2_CQ_CTL_MISC_MMIOPA_SIZE PPC_BITMASK(39,43)
+#define NPU2_CQ_CTL_MISC_MMIOPA1_CONFIG 0x0B8
+#define NPU2_CQ_CTL_MISC_CFG1 0x008
#define NPU2_CQ_FUTURE_CFG2 0x010
#define NPU2_CQ_FUTURE_CFG3 0x018
#define NPU2_CQ_PERF_MATCH 0x020
@@ -219,6 +224,15 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
#define NPU2_CQ_C_ERR_RPT_MASK1 0x0E8
#define NPU2_CQ_C_ERR_RPT_HOLD0 0x0F0
#define NPU2_CQ_C_ERR_RPT_HOLD1 0x0F8
+#define NPU2_CQ_CTL_CONFIG_ADDR0 0x120
+#define NPU2_CQ_CTL_CONFIG_ADDR1 0x128
+#define NPU2_CQ_CTL_CONFIG_ADDR_ENABLE PPC_BIT(0)
+#define NPU2_CQ_CTL_CONFIG_ADDR_STATUS PPC_BITMASK(1, 3)
+#define NPU2_CQ_CTL_CONFIG_ADDR_BUS_NUMBER PPC_BITMASK(4, 11)
+#define NPU2_CQ_CTL_CONFIG_ADDR_DEVICE_NUMBER PPC_BITMASK(12, 16)
+#define NPU2_CQ_CTL_CONFIG_ADDR_FUNCTION_NUMBER PPC_BITMASK(17, 19)
+#define NPU2_CQ_CTL_CONFIG_ADDR_REGISTER_NUMBER PPC_BITMASK(20, 31)
+#define NPU2_CQ_CTL_CONFIG_ADDR_TYPE PPC_BIT(32)
#define NPU2_CQ_CTL_FENCE_CONTROL_0 0x140
#define NPU2_CQ_CTL_FENCE_CONTROL_1 0x148
#define NPU2_CQ_CTL_FENCE_CONTROL_REQUEST_FENCE PPC_BITMASK(0, 1)
@@ -331,9 +345,16 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
#define NPU2_OTL_TLX_CREDITS_VC3_CREDITS PPC_BITMASK(24, 31)
#define NPU2_OTL_TLX_CREDITS_DCP0_CREDITS PPC_BITMASK(32, 39)
#define NPU2_OTL_TLX_CREDITS_DCP1_CREDITS PPC_BITMASK(56, 63)
+#define NPU2_OTL_VC_CREDITS(stack, block) NPU2_REG_OFFSET(stack, block, 0x090)
#define NPU2_OTL_CONFIG1(stack, block) NPU2_REG_OFFSET(stack, block, 0x058)
+#define NPU2_OTL_CONFIG1_TX_TEMP1_EN PPC_BIT(1)
+#define NPU2_OTL_CONFIG1_TX_TEMP2_EN PPC_BIT(2)
+#define NPU2_OTL_CONFIG1_TX_TEMP3_EN PPC_BIT(3)
#define NPU2_OTL_CONFIG1_TX_DRDY_WAIT PPC_BITMASK(5, 7)
#define NPU2_OTL_CONFIG1_TX_TEMP0_RATE PPC_BITMASK(8, 11)
+#define NPU2_OTL_CONFIG1_TX_TEMP1_RATE PPC_BITMASK(12, 15)
+#define NPU2_OTL_CONFIG1_TX_TEMP2_RATE PPC_BITMASK(16, 19)
+#define NPU2_OTL_CONFIG1_TX_TEMP3_RATE PPC_BITMASK(20, 23)
#define NPU2_OTL_CONFIG1_TX_CRET_FREQ PPC_BITMASK(32, 34)
#define NPU2_OTL_CONFIG1_TX_AGE_FREQ PPC_BITMASK(35, 39)
#define NPU2_OTL_CONFIG1_TX_RS2_HPWAIT PPC_BITMASK(40, 45)
@@ -342,6 +363,15 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
#define NPU2_OTL_CONFIG1_TX_CBUF_ECC_DIS PPC_BIT(58)
#define NPU2_OTL_CONFIG1_TX_STOP_LINK PPC_BIT(59)
#define NPU2_OTL_CONFIG1_TX_STOP_ON_UE PPC_BIT(60)
+#define NPU2_OTL_CONFIG1_TX_T0_MASK_CRTN0 PPC_BIT(61)
+#define NPU2_OTL_CONFIG1_TX_T123_MASK_CRTN0 PPC_BIT(62)
+#define NPU2_OTL_CONFIG2(stack, block) NPU2_REG_OFFSET(stack, block, 0x0C0)
+#define NPU2_OTL_CONFIG2_TX_SEND_EN PPC_BIT(0)
+
+#define NPU2_OTL_OSL_DSISR(stack, block) NPU2_REG_OFFSET(stack, block, 0x000)
+#define NPU2_OTL_OSL_DAR(stack, block) NPU2_REG_OFFSET(stack, block, 0x008)
+#define NPU2_OTL_OSL_TFC(stack, block) NPU2_REG_OFFSET(stack, block, 0x010)
+#define NPU2_OTL_OSL_PEHANDLE(stack, block) NPU2_REG_OFFSET(stack, block, 0x018)
/* Misc block registers. Unlike the SM/CTL/DAT/NTL registers above
* there is only a single instance of each of these in the NPU so we
@@ -420,6 +450,7 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
#define NPU2_MISC_IRQ_LOG13 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x368)
#define NPU2_MISC_IRQ_LOG14 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x370)
#define NPU2_MISC_IRQ_LOG15 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x378)
+#define NPU2_MISC_FENCE_ON_ERROR_EN_FIR2 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x400)
#define NPU2_MISC_IRQ_ON_ERROR_EN_FIR2 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x408)
@@ -551,6 +582,26 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
#define PU_IOE_PB_FP_CFG_FP1_FMR_DISABLE PPC_BIT(52)
#define PU_IOE_PB_FP_CFG_FP1_PRS_DISABLE PPC_BIT(57)
+#define OB0_ODL0_CONFIG 0x901082A
+#define OB0_ODL1_CONFIG 0x901082B
+#define OB3_ODL0_CONFIG 0xC01082A
+#define OB3_ODL1_CONFIG 0xC01082B
+#define OB_ODL_CONFIG_RESET PPC_BIT(0)
+#define OB_ODL_CONFIG_VERSION PPC_BITMASK(2, 7)
+#define OB_ODL_CONFIG_TRAIN_MODE PPC_BITMASK(8, 11)
+#define OB_ODL_CONFIG_SUPPORTED_MODES PPC_BITMASK(12, 15)
+#define OB_ODL_CONFIG_X4_BACKOFF_ENABLE PPC_BIT(16)
+#define OB_ODL_CONFIG_PHY_CNTR_LIMIT PPC_BITMASK(20, 23)
+#define OB_ODL_CONFIG_DEBUG_ENABLE PPC_BIT(33)
+#define OB_ODL_CONFIG_FWD_PROGRESS_TIMER PPC_BITMASK(40, 43)
+
+#define OB0_ODL0_STATUS 0x901082C
+#define OB0_ODL1_STATUS 0x901082D
+#define OB3_ODL0_STATUS 0xC01082C
+#define OB3_ODL1_STATUS 0xC01082D
+#define OB_ODL_STATUS_TRAINED_MODE PPC_BITMASK(0,3)
+#define OB_ODL_STATUS_TRAINING_STATE_MACHINE PPC_BITMASK(49, 51)
+
#define OB0_ODL0_TRAINING_STATUS 0x901082E
#define OB0_ODL1_TRAINING_STATUS 0x901082F
#define OB3_ODL0_TRAINING_STATUS 0xC01082E
diff --git a/include/npu2.h b/include/npu2.h
index 2922d21..bd5bced 100644
--- a/include/npu2.h
+++ b/include/npu2.h
@@ -207,5 +207,17 @@ extern int nv_zcal_nominal;
bool is_p9dd1(void);
void npu2_opencapi_phy_setup(struct npu2_dev *dev);
void npu2_opencapi_bump_ui_lane(struct npu2_dev *dev);
-
+int64_t npu2_set_pe(struct phb *phb,
+ uint64_t pe_num,
+ uint64_t bdfn,
+ uint8_t bcompare,
+ uint8_t dcompare,
+ uint8_t fcompare,
+ uint8_t action);
+int64_t npu2_freeze_status(struct phb *phb __unused,
+ uint64_t pe_number __unused,
+ uint8_t *freeze_state,
+ uint16_t *pci_error_type __unused,
+ uint16_t *severity __unused,
+ uint64_t *phb_status __unused);
#endif /* __NPU2_H */
--
git-series 0.9.1
More information about the Skiboot
mailing list