[Skiboot] [PATCH v3 05/10] npu2-opencapi: Configure NPU for OpenCAPI
Andrew Donnellan
andrew.donnellan at au1.ibm.com
Wed Jan 31 20:34:45 AEDT 2018
Scan the device tree for NPUs with OpenCAPI links and configure the NPU per
the initialisation sequence in the NPU OpenCAPI workbook.
Training of individual links and setup of per-AFU/link configuration will
be in a later patch.
Signed-off-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
Signed-off-by: Frederic Barrat <fbarrat at linux.vnet.ibm.com>
---
v1->v2:
- fix coding style (Fred)
v2->v3:
- Mask FIRs for repurposed nvlink signals (Fred)
- Fix IRQ register mnemonic (Fred)
---
core/init.c | 2 +-
hw/Makefile.inc | 3 +-
hw/npu2-opencapi.c | 838 +++++++++++++++++++++++++++++++++++++++++++++-
hw/npu2.c | 33 +-
include/npu2-regs.h | 90 +++++-
include/npu2.h | 2 +-
include/skiboot.h | 1 +-
7 files changed, 956 insertions(+), 13 deletions(-)
create mode 100644 hw/npu2-opencapi.c
diff --git a/core/init.c b/core/init.c
index ec9f329..fcf7a3f 100644
--- a/core/init.c
+++ b/core/init.c
@@ -1042,6 +1042,8 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
/* Probe NPUs */
probe_npu();
probe_npu2();
+ /* TODO: Eventually, we'll do NVLink and OpenCAPI together */
+ probe_npu2_opencapi();
/* Initialize PCI */
pci_init_slots();
diff --git a/hw/Makefile.inc b/hw/Makefile.inc
index 27d8a38..cfc8646 100644
--- a/hw/Makefile.inc
+++ b/hw/Makefile.inc
@@ -7,7 +7,8 @@ HW_OBJS += p7ioc.o p7ioc-inits.o p7ioc-phb.o
HW_OBJS += phb3.o sfc-ctrl.o fake-rtc.o bt.o p8-i2c.o prd.o
HW_OBJS += dts.o lpc-rtc.o npu.o npu-hw-procedures.o xive.o phb4.o
HW_OBJS += fake-nvram.o lpc-mbox.o npu2.o npu2-hw-procedures.o
-HW_OBJS += npu2-common.o phys-map.o sbe-p9.o capp.o occ-sensor.o vas.o p9-adu.o
+HW_OBJS += npu2-common.o npu2-opencapi.o phys-map.o sbe-p9.o capp.o occ-sensor.o
+HW_OBJS += vas.o p9-adu.o
HW=hw/built-in.o
# FIXME hack this for now
diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
new file mode 100644
index 0000000..a66c8f7
--- /dev/null
+++ b/hw/npu2-opencapi.c
@@ -0,0 +1,838 @@
+/* Copyright 2013-2018 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Support for OpenCAPI on POWER9 NPUs
+ *
+ * This file provides support for OpenCAPI as implemented on POWER9.
+ *
+ * At present, we initialise the NPU separately from the NVLink code in npu2.c.
+ * As such, we don't currently support mixed NVLink and OpenCAPI configurations
+ * on the same NPU for machines such as Witherspoon.
+ *
+ * Procedure references in this file are to the POWER9 OpenCAPI NPU Workbook
+ * (IBM internal document).
+ *
+ * TODO:
+ * - Support for mixed NVLink and OpenCAPI on the same NPU
+ * - Support for link ganging (one AFU using multiple links)
+ * - Link reset and error handling
+ * - Presence detection
+ * - Consume HDAT NPU information
+ * - LPC Memory support
+ */
+
+#include <skiboot.h>
+#include <xscom.h>
+#include <io.h>
+#include <timebase.h>
+#include <pci.h>
+#include <pci-cfg.h>
+#include <pci-slot.h>
+#include <interrupts.h>
+#include <opal.h>
+#include <opal-api.h>
+#include <npu2.h>
+#include <npu2-regs.h>
+#include <phys-map.h>
+#include <xive.h>
+#include <p9-adu.h>
+#include <i2c.h>
+
+#define NPU_IRQ_LEVELS 35
+
+static inline uint64_t index_to_stack(uint64_t index) {
+ switch (index) {
+ case 2:
+ case 3:
+ return NPU2_STACK_STCK_1;
+ break;
+ case 4:
+ case 5:
+ return NPU2_STACK_STCK_2;
+ break;
+ default:
+ assert(false);
+ }
+}
+
+static inline uint64_t index_to_stacku(uint64_t index) {
+ switch (index) {
+ case 2:
+ case 3:
+ return NPU2_STACK_STCK_1U;
+ break;
+ case 4:
+ case 5:
+ return NPU2_STACK_STCK_2U;
+ break;
+ default:
+ assert(false);
+ }
+}
+
+static inline uint64_t index_to_block(uint64_t index) {
+ switch (index) {
+ case 2:
+ case 4:
+ return NPU2_BLOCK_OTL0;
+ break;
+ case 3:
+ case 5:
+ return NPU2_BLOCK_OTL1;
+ break;
+ default:
+ assert(false);
+ }
+}
+
+/* Procedure 13.1.3.1 - select OCAPI vs NVLink for bricks 2-3/4-5 */
+
+static void set_transport_mux_controls(uint32_t gcid, uint32_t scom_base,
+ int index, enum npu2_dev_type type)
+{
+ /* Step 1 - Set Transport MUX controls to select correct OTL or NTL */
+ uint64_t reg;
+ uint64_t field;
+
+ /* TODO: Rework this to select for NVLink too */
+ assert(type == NPU2_DEV_TYPE_OPENCAPI);
+
+ prlog(PR_DEBUG, "OCAPI: %s: Setting transport mux controls\n", __func__);
+
+ /* Optical IO Transport Mux Config for Bricks 0-2 and 4-5 */
+ reg = npu2_scom_read(gcid, scom_base, NPU2_MISC_OPTICAL_IO_CFG0,
+ NPU2_MISC_DA_LEN_8B);
+ switch (index) {
+ case 0:
+ case 1:
+ /* not valid for OpenCAPI */
+ assert(false);
+ break;
+ case 2: /* OTL1.0 */
+ field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_NDLMUX_BRK0TO2, reg);
+ field &= ~0b100;
+ reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_NDLMUX_BRK0TO2, reg,
+ field);
+ field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK0TO1, reg);
+ field |= 0b10;
+ reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK0TO1, reg,
+ field);
+ break;
+ case 3: /* OTL1.1 */
+ field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_NDLMUX_BRK0TO2, reg);
+ field &= ~0b010;
+ reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_NDLMUX_BRK0TO2, reg,
+ field);
+ field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK0TO1, reg);
+ field |= 0b01;
+ reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK0TO1, reg,
+ field);
+ break;
+ case 4: /* OTL2.0 */
+ field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK4TO5, reg);
+ field |= 0b10;
+ reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK4TO5, reg,
+ field);
+ break;
+ case 5: /* OTL2.1 */
+ field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK4TO5, reg);
+ field |= 0b01;
+ reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK4TO5, reg,
+ field);
+ break;
+ default:
+ assert(false);
+ }
+ npu2_scom_write(gcid, scom_base, NPU2_MISC_OPTICAL_IO_CFG0,
+ NPU2_MISC_DA_LEN_8B, reg);
+
+ /*
+ * PowerBus Optical Miscellaneous Config Register - select
+ * OpenCAPI for b4/5 and A-Link for b3
+ */
+ xscom_read(gcid, PU_IOE_PB_MISC_CFG, ®);
+ switch (index) {
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ break;
+ case 4:
+ reg = SETFIELD(PU_IOE_PB_MISC_CFG_SEL_04_NPU_NOT_PB, reg, 1);
+ break;
+ case 5:
+ reg = SETFIELD(PU_IOE_PB_MISC_CFG_SEL_05_NPU_NOT_PB, reg, 1);
+ break;
+ }
+ xscom_write(gcid, PU_IOE_PB_MISC_CFG, reg);
+}
+
+static void enable_odl_phy_mux(uint32_t gcid, int index)
+{
+ uint64_t reg;
+ uint64_t phy_config_scom;
+ prlog(PR_DEBUG, "OCAPI: %s: Enabling ODL to PHY MUXes\n", __func__);
+ /* Step 2 - Enable MUXes for ODL to PHY connection */
+ switch (index) {
+ case 2:
+ case 3:
+ phy_config_scom = OBUS_LL0_IOOL_PHY_CONFIG;
+ break;
+ case 4:
+ case 5:
+ phy_config_scom = OBUS_LL3_IOOL_PHY_CONFIG;
+ break;
+ default:
+ assert(false);
+ }
+
+ /* PowerBus OLL PHY Training Config Register */
+ xscom_read(gcid, phy_config_scom, ®);
+
+ /* Enable ODLs to use shared PHYs */
+ reg |= OBUS_IOOL_PHY_CONFIG_ODL0_ENABLED;
+ reg |= OBUS_IOOL_PHY_CONFIG_ODL1_ENABLED;
+
+ /*
+ * Swap ODL1 to use brick 2 lanes instead of brick 1 lanes if using a
+ * 22-pin cable for OpenCAPI connection.
+ */
+ if (platform.ocapi->odl_phy_swap)
+ reg |= OBUS_IOOL_PHY_CONFIG_ODL_PHY_SWAP;
+ else
+ reg &= ~OBUS_IOOL_PHY_CONFIG_ODL_PHY_SWAP;
+
+ /* Disable A-Link link layers */
+ reg &= ~OBUS_IOOL_PHY_CONFIG_LINK0_OLL_ENABLED;
+ reg &= ~OBUS_IOOL_PHY_CONFIG_LINK1_OLL_ENABLED;
+
+ /* Disable NV-Link link layers */
+ reg &= ~OBUS_IOOL_PHY_CONFIG_NV0_NPU_ENABLED;
+ reg &= ~OBUS_IOOL_PHY_CONFIG_NV1_NPU_ENABLED;
+ reg &= ~OBUS_IOOL_PHY_CONFIG_NV2_NPU_ENABLED;
+ xscom_write(gcid, phy_config_scom, reg);
+}
+
+static void disable_alink_fp(uint32_t gcid)
+{
+ uint64_t reg = 0;
+
+ prlog(PR_DEBUG, "OCAPI: %s: Disabling A-Link framer/parsers\n", __func__);
+ /* Step 3 - Disable A-Link framers/parsers */
+ /* TODO: Confirm if needed on OPAL system */
+
+ reg |= PU_IOE_PB_FP_CFG_FP0_FMR_DISABLE;
+ reg |= PU_IOE_PB_FP_CFG_FP0_PRS_DISABLE;
+ reg |= PU_IOE_PB_FP_CFG_FP1_FMR_DISABLE;
+ reg |= PU_IOE_PB_FP_CFG_FP1_PRS_DISABLE;
+ xscom_write(gcid, PU_IOE_PB_FP01_CFG, reg);
+ xscom_write(gcid, PU_IOE_PB_FP23_CFG, reg);
+ xscom_write(gcid, PU_IOE_PB_FP45_CFG, reg);
+ xscom_write(gcid, PU_IOE_PB_FP67_CFG, reg);
+}
+
+static void set_pb_hp_opencapi(uint32_t gcid, int index)
+{
+ /* Step 4 - Set PowerBus HotPlug Mode Registers */
+ uint64_t reg;
+
+ prlog(PR_DEBUG, "OCAPI: %s: Setting PowerBus Hotplug Mode registers\n", __func__);
+
+ xscom_read(gcid, PB_WEST_HP_MODE_CURR, ®);
+ switch (index) {
+ case 2:
+ case 3:
+ /* Configure OPT0 as an OpenCAPI link */
+ reg = SETFIELD(PPC_BITMASK(32, 33), reg, 0b01);
+ break;
+ case 4:
+ case 5:
+ /* Configure OPT3 as an OpenCAPI link */
+ reg = SETFIELD(PPC_BITMASK(38, 39), reg, 0b01);
+ break;
+ default:
+ assert(false);
+ }
+
+ p9_adu_set_pb_hp_mode(gcid, reg);
+}
+
+static void enable_xsl_clocks(uint32_t gcid, uint32_t scom_base, int index)
+{
+ /* Step 5 - Enable Clocks in XSL */
+
+ prlog(PR_DEBUG, "OCAPI: %s: Enable clocks in XSL\n", __func__);
+
+ npu2_scom_write(gcid, scom_base, NPU2_REG_OFFSET(index_to_stack(index),
+ NPU2_BLOCK_XSL,
+ NPU2_XSL_WRAP_CFG),
+ NPU2_MISC_DA_LEN_8B, NPU2_XSL_WRAP_CFG_XSLO_CLOCK_ENABLE);
+}
+
+#define CQ_CTL_STATUS_TIMEOUT 10 /* milliseconds */
+
+static int set_fence_control(uint32_t gcid, uint32_t scom_base,
+ int index, uint8_t status)
+{
+ int stack, block;
+ uint64_t reg, status_field;
+ uint8_t status_val;
+ uint64_t fence_control;
+ uint64_t timeout = mftb() + msecs_to_tb(CQ_CTL_STATUS_TIMEOUT);
+
+ stack = index_to_stack(index);
+ block = index_to_block(index);
+
+ fence_control = NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL,
+ block == NPU2_BLOCK_OTL0 ?
+ NPU2_CQ_CTL_FENCE_CONTROL_0 :
+ NPU2_CQ_CTL_FENCE_CONTROL_1);
+
+ reg = SETFIELD(NPU2_CQ_CTL_FENCE_CONTROL_REQUEST_FENCE, 0ull, status);
+ npu2_scom_write(gcid, scom_base, fence_control,
+ NPU2_MISC_DA_LEN_8B, reg);
+
+ /* Wait for fence status to update */
+ if (index_to_block(index) == NPU2_BLOCK_OTL0)
+ status_field = NPU2_CQ_CTL_STATUS_BRK0_AM_FENCED;
+ else
+ status_field = NPU2_CQ_CTL_STATUS_BRK1_AM_FENCED;
+
+ do {
+ reg = npu2_scom_read(gcid, scom_base,
+ NPU2_REG_OFFSET(index_to_stack(index),
+ NPU2_BLOCK_CTL,
+ NPU2_CQ_CTL_STATUS),
+ NPU2_MISC_DA_LEN_8B);
+ status_val = GETFIELD(status_field, reg);
+ if (status_val == status)
+ return OPAL_SUCCESS;
+ time_wait_ms(1);
+ } while (tb_compare(mftb(), timeout) == TB_ABEFOREB);
+
+ /**
+ * @fwts-label OCAPIFenceStatusTimeout
+ * @fwts-advice The NPU fence status did not update as expected. This
+ * could be the result of a firmware or hardware bug. OpenCAPI
+ * functionality could be broken.
+ */
+ prlog(PR_ERR,
+ "OCAPI: Fence status for brick %d stuck: expected 0x%x, got 0x%x\n",
+ index, status, status_val);
+ return OPAL_HARDWARE;
+}
+
+static void set_npcq_config(uint32_t gcid, uint32_t scom_base, int index)
+{
+ uint64_t reg, stack, block;
+
+ prlog(PR_DEBUG, "OCAPI: %s: Set NPCQ Config\n", __func__);
+ /* Step 6 - Set NPCQ configuration */
+ /* CQ_CTL Misc Config Register #0 */
+ stack = index_to_stack(index);
+ block = index_to_block(index);
+
+ /* Enable OTL */
+ npu2_scom_write(gcid, scom_base, NPU2_OTL_CONFIG0(stack, block),
+ NPU2_MISC_DA_LEN_8B, NPU2_OTL_CONFIG0_EN);
+ set_fence_control(gcid, scom_base, index, 0b01);
+ reg = npu2_scom_read(gcid, scom_base,
+ NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL,
+ NPU2_CQ_CTL_MISC_CFG),
+ NPU2_MISC_DA_LEN_8B);
+ /* Set OCAPI mode */
+ reg |= NPU2_CQ_CTL_MISC_CFG_CONFIG_OCAPI_MODE;
+ if (block == NPU2_BLOCK_OTL0)
+ reg |= NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL0_ENABLE;
+ else
+ reg |= NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL1_ENABLE;
+ npu2_scom_write(gcid, scom_base,
+ NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL,
+ NPU2_CQ_CTL_MISC_CFG),
+ NPU2_MISC_DA_LEN_8B, reg);
+
+ /* NPU Fenced */
+ set_fence_control(gcid, scom_base, index, 0b11);
+
+ /* NPU Half Fenced */
+ set_fence_control(gcid, scom_base, index, 0b10);
+
+ /* CQ_DAT Misc Config Register #1 */
+ reg = npu2_scom_read(gcid, scom_base,
+ NPU2_REG_OFFSET(stack, NPU2_BLOCK_DAT,
+ NPU2_CQ_DAT_MISC_CFG),
+ NPU2_MISC_DA_LEN_8B);
+ /* Set OCAPI mode for bricks 2-5 */
+ reg |= NPU2_CQ_DAT_MISC_CFG_CONFIG_OCAPI_MODE;
+ npu2_scom_write(gcid, scom_base,
+ NPU2_REG_OFFSET(stack, NPU2_BLOCK_DAT,
+ NPU2_CQ_DAT_MISC_CFG),
+ NPU2_MISC_DA_LEN_8B, reg);
+
+ /* CQ_SM Misc Config Register #0 */
+ for (block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) {
+ reg = npu2_scom_read(gcid, scom_base,
+ NPU2_REG_OFFSET(stack, block,
+ NPU2_CQ_SM_MISC_CFG0),
+ NPU2_MISC_DA_LEN_8B);
+ /* Set OCAPI mode for bricks 2-5 */
+ reg |= NPU2_CQ_SM_MISC_CFG0_CONFIG_OCAPI_MODE;
+ npu2_scom_write(gcid, scom_base,
+ NPU2_REG_OFFSET(stack, block,
+ NPU2_CQ_SM_MISC_CFG0),
+ NPU2_MISC_DA_LEN_8B, reg);
+ }
+}
+
+static void enable_xsl_xts_interfaces(uint32_t gcid, uint32_t scom_base, int index)
+{
+ uint64_t reg;
+
+ prlog(PR_DEBUG, "OCAPI: %s: Enable XSL-XTS Interfaces\n", __func__);
+ /* Step 7 - Enable XSL-XTS interfaces */
+ /* XTS Config Register - Enable XSL-XTS interface */
+ reg = npu2_scom_read(gcid, scom_base, NPU2_XTS_CFG, NPU2_MISC_DA_LEN_8B);
+ reg |= NPU2_XTS_CFG_OPENCAPI;
+ npu2_scom_write(gcid, scom_base, NPU2_XTS_CFG, NPU2_MISC_DA_LEN_8B, reg);
+
+ /* XTS Config2 Register - Enable XSL1/2 */
+ reg = npu2_scom_read(gcid, scom_base, NPU2_XTS_CFG2, NPU2_MISC_DA_LEN_8B);
+ switch (index_to_stack(index)) {
+ case NPU2_STACK_STCK_1:
+ reg |= NPU2_XTS_CFG2_XSL1_ENA;
+ break;
+ case NPU2_STACK_STCK_2:
+ reg |= NPU2_XTS_CFG2_XSL2_ENA;
+ break;
+ }
+ npu2_scom_write(gcid, scom_base, NPU2_XTS_CFG2, NPU2_MISC_DA_LEN_8B, reg);
+}
+
+static void enable_sm_allocation(uint32_t gcid, uint32_t scom_base, int index)
+{
+ uint64_t reg, block;
+ int stack = index_to_stack(index);
+
+ prlog(PR_DEBUG, "OCAPI: %s: Enable State Machine Allocation\n", __func__);
+ /* Step 8 - Enable state-machine allocation */
+ /* Low-Water Marks Registers - Enable state machine allocation */
+ for (block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) {
+ reg = npu2_scom_read(gcid, scom_base,
+ NPU2_REG_OFFSET(stack, block,
+ NPU2_LOW_WATER_MARKS),
+ NPU2_MISC_DA_LEN_8B);
+ reg |= NPU2_LOW_WATER_MARKS_ENABLE_MACHINE_ALLOC;
+ npu2_scom_write(gcid, scom_base,
+ NPU2_REG_OFFSET(stack, block,
+ NPU2_LOW_WATER_MARKS),
+ NPU2_MISC_DA_LEN_8B, reg);
+ }
+}
+
+static void enable_pb_snooping(uint32_t gcid, uint32_t scom_base, int index)
+{
+ uint64_t reg, block;
+ int stack = index_to_stack(index);
+
+ prlog(PR_DEBUG, "OCAPI: %s: Enable PowerBus snooping\n", __func__);
+ /* Step 9 - Enable PowerBus snooping */
+ /* CQ_SM Misc Config Register #0 - Enable PowerBus snooping */
+ for (block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) {
+ reg = npu2_scom_read(gcid, scom_base,
+ NPU2_REG_OFFSET(stack, block,
+ NPU2_CQ_SM_MISC_CFG0),
+ NPU2_MISC_DA_LEN_8B);
+ reg |= NPU2_CQ_SM_MISC_CFG0_CONFIG_ENABLE_PBUS;
+ npu2_scom_write(gcid, scom_base,
+ NPU2_REG_OFFSET(stack, block,
+ NPU2_CQ_SM_MISC_CFG0),
+ NPU2_MISC_DA_LEN_8B, reg);
+ }
+}
+
+static void brick_config(uint32_t gcid, uint32_t scom_base, int index)
+{
+ set_transport_mux_controls(gcid, scom_base, index, NPU2_DEV_TYPE_OPENCAPI);
+ enable_odl_phy_mux(gcid, index);
+ disable_alink_fp(gcid);
+ set_pb_hp_opencapi(gcid, index);
+ enable_xsl_clocks(gcid, scom_base, index);
+ set_npcq_config(gcid, scom_base, index);
+ enable_xsl_xts_interfaces(gcid, scom_base, index);
+ enable_sm_allocation(gcid, scom_base, index);
+ enable_pb_snooping(gcid, scom_base, index);
+}
+
+/* Procedure 13.1.3.5 - TL Configuration */
+static void tl_config(uint32_t gcid, uint32_t scom_base, uint64_t index)
+{
+ uint64_t reg;
+ uint64_t stack = index_to_stack(index);
+ uint64_t block = index_to_block(index);
+
+ prlog(PR_DEBUG, "OCAPI: %s: TL Configuration\n", __func__);
+ /* OTL Config 0 Register */
+ reg = 0;
+ /* OTL Enable */
+ reg |= NPU2_OTL_CONFIG0_EN;
+ /* Block PE Handle from ERAT Index */
+ reg |= NPU2_OTL_CONFIG0_BLOCK_PE_HANDLE;
+ /* OTL Brick ID */
+ reg = SETFIELD(NPU2_OTL_CONFIG0_BRICKID, reg, index - 2);
+ /* ERAT Hash 0 */
+ reg = SETFIELD(NPU2_OTL_CONFIG0_ERAT_HASH_0, reg, 0b011001);
+ /* ERAT Hash 1 */
+ reg = SETFIELD(NPU2_OTL_CONFIG0_ERAT_HASH_1, reg, 0b000111);
+ /* ERAT Hash 2 */
+ reg = SETFIELD(NPU2_OTL_CONFIG0_ERAT_HASH_2, reg, 0b101100);
+ /* ERAT Hash 3 */
+ reg = SETFIELD(NPU2_OTL_CONFIG0_ERAT_HASH_3, reg, 0b100110);
+ npu2_scom_write(gcid, scom_base, NPU2_OTL_CONFIG0(stack, block),
+ NPU2_MISC_DA_LEN_8B, reg);
+
+ /* OTL Config 1 Register */
+ reg = 0;
+ /*
+ * We leave Template 1-3 bits at 0 to force template 0 as required
+ * for unknown devices.
+ *
+ * Template 0 Transmit Rate is set to most conservative setting which
+ * will always be supported. Other Template Transmit rates are left
+ * unset and will be set later by OS.
+ */
+ reg = SETFIELD(NPU2_OTL_CONFIG1_TX_TEMP0_RATE, reg, 0b1111);
+ /* Extra wait cycles TXI-TXO - varied from workbook */
+ reg = SETFIELD(NPU2_OTL_CONFIG1_TX_DRDY_WAIT, reg, 0b001);
+ /* Minimum Frequency to Return TLX Credits to AFU */
+ reg = SETFIELD(NPU2_OTL_CONFIG1_TX_CRET_FREQ, reg, 0b001);
+ /* Frequency to add age to Transmit Requests */
+ reg = SETFIELD(NPU2_OTL_CONFIG1_TX_AGE_FREQ, reg, 0b11000);
+ /* Response High Priority Threshold */
+ reg = SETFIELD(NPU2_OTL_CONFIG1_TX_RS2_HPWAIT, reg, 0b011011);
+ /* 4-slot Request High Priority Threshold */
+ reg = SETFIELD(NPU2_OTL_CONFIG1_TX_RQ4_HPWAIT, reg, 0b011011);
+ /* 6-slot Request High Priority */
+ reg = SETFIELD(NPU2_OTL_CONFIG1_TX_RQ6_HPWAIT, reg, 0b011011);
+ /* Stop the OCAPI Link on Uncorrectable Error
+ * TODO: Confirm final value - disabled for debug */
+
+ npu2_scom_write(gcid, scom_base, NPU2_OTL_CONFIG1(stack, block),
+ NPU2_MISC_DA_LEN_8B, reg);
+
+ /* TLX Credit Configuration Register */
+ reg = 0;
+ /* VC0/VC3/DCP0/DCP1 credits to send to AFU */
+ reg = SETFIELD(NPU2_OTL_TLX_CREDITS_VC0_CREDITS, reg, 0x40);
+ reg = SETFIELD(NPU2_OTL_TLX_CREDITS_VC3_CREDITS, reg, 0x40);
+ reg = SETFIELD(NPU2_OTL_TLX_CREDITS_DCP0_CREDITS, reg, 0x80);
+ reg = SETFIELD(NPU2_OTL_TLX_CREDITS_DCP1_CREDITS, reg, 0x80);
+ npu2_scom_write(gcid, scom_base, NPU2_OTL_TLX_CREDITS(stack, block),
+ NPU2_MISC_DA_LEN_8B, reg);
+}
+
+/* Detect Nimbus DD2.0 and DD2.01 */
+static int get_nimbus_level(void)
+{
+ struct proc_chip *chip = next_chip(NULL);
+
+ if (chip && chip->type == PROC_CHIP_P9_NIMBUS)
+ return chip->ec_level & 0xff;
+ return -1;
+}
+
+/* Procedure 13.1.3.6 - Address Translation Configuration */
+static void address_translation_config(uint32_t gcid, uint32_t scom_base,
+ uint64_t index)
+{
+ int chip_level;
+ uint64_t reg;
+ uint64_t stack = index_to_stack(index);
+
+ prlog(PR_DEBUG, "OCAPI: %s: Address Translation Configuration\n", __func__);
+ /* PSL_SCNTL_A0 Register */
+ /*
+ * ERAT shared between multiple AFUs
+ *
+ * The workbook has this bit around the wrong way from the hardware.
+ *
+ * TODO: handle correctly with link ganging
+ */
+ reg = npu2_scom_read(gcid, scom_base,
+ NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL,
+ NPU2_XSL_PSL_SCNTL_A0),
+ NPU2_MISC_DA_LEN_8B);
+ reg |= NPU2_XSL_PSL_SCNTL_A0_MULTI_AFU_DIAL;
+ npu2_scom_write(gcid, scom_base,
+ NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL,
+ NPU2_XSL_PSL_SCNTL_A0),
+ NPU2_MISC_DA_LEN_8B, reg);
+
+ chip_level = get_nimbus_level();
+ if (chip_level == 0x20) {
+ /*
+ * Errata HW408041 (section 15.1.10 of NPU workbook)
+ * "RA mismatch when both tlbie and checkout response
+ * are seen in same cycle"
+ */
+ /* XSL_GP Register - Bloom Filter Disable */
+ reg = npu2_scom_read(gcid, scom_base,
+ NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, NPU2_XSL_GP),
+ NPU2_MISC_DA_LEN_8B);
+ /* To update XSL_GP, we must first write a magic value to it */
+ npu2_scom_write(gcid, scom_base,
+ NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, NPU2_XSL_GP),
+ NPU2_MISC_DA_LEN_8B, 0x0523790323000000);
+ reg &= ~NPU2_XSL_GP_BLOOM_FILTER_ENABLE;
+ npu2_scom_write(gcid, scom_base,
+ NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, NPU2_XSL_GP),
+ NPU2_MISC_DA_LEN_8B, reg);
+ }
+
+ if (chip_level == 0x20 || chip_level == 0x21) {
+ /*
+ * DD2.0/2.1 EOA Bug. Fixed in DD2.2
+ */
+ reg = 0x32F8000000000001;
+ npu2_scom_write(gcid, scom_base,
+ NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL,
+ NPU2_XSL_DEF),
+ NPU2_MISC_DA_LEN_8B, reg);
+ }
+}
+
+/* TODO: Merge this with NVLink implementation - we don't use the npu2_bar
+ * wrapper for the PHY BARs yet */
+static void write_bar(uint32_t gcid, uint32_t scom_base, uint64_t reg,
+ uint64_t addr, uint64_t size)
+{
+ uint64_t val;
+ int block;
+ switch (NPU2_REG(reg)) {
+ case NPU2_PHY_BAR:
+ val = SETFIELD(NPU2_PHY_BAR_ADDR, 0ul, addr >> 21);
+ val = SETFIELD(NPU2_PHY_BAR_ENABLE, val, 1);
+ break;
+ case NPU2_NTL0_BAR:
+ case NPU2_NTL1_BAR:
+ val = SETFIELD(NPU2_NTL_BAR_ADDR, 0ul, addr >> 16);
+ val = SETFIELD(NPU2_NTL_BAR_SIZE, val, ilog2(size >> 16));
+ val = SETFIELD(NPU2_NTL_BAR_ENABLE, val, 1);
+ break;
+ case NPU2_GENID_BAR:
+ val = SETFIELD(NPU2_GENID_BAR_ADDR, 0ul, addr >> 16);
+ val = SETFIELD(NPU2_GENID_BAR_ENABLE, val, 1);
+ break;
+ default:
+ val = 0ul;
+ }
+
+ for (block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) {
+ npu2_scom_write(gcid, scom_base, NPU2_REG_OFFSET(0, block, reg),
+ NPU2_MISC_DA_LEN_8B, val);
+ prlog(PR_DEBUG, "OCAPI: Setting BAR %llx to %llx\n",
+ NPU2_REG_OFFSET(0, block, reg), val);
+ }
+}
+
+static void setup_global_mmio_bar(uint32_t gcid, uint32_t scom_base,
+ uint64_t reg[])
+{
+ uint64_t addr, size;
+
+ prlog(PR_DEBUG, "OCAPI: patching up PHY0 bar, %s\n", __func__);
+ phys_map_get(gcid, NPU_PHY, 0, &addr, &size);
+ write_bar(gcid, scom_base,
+ NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_PHY_BAR),
+ addr, size);
+ prlog(PR_DEBUG, "OCAPI: patching up PHY1 bar, %s\n", __func__);
+ phys_map_get(gcid, NPU_PHY, 1, &addr, &size);
+ write_bar(gcid, scom_base,
+ NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_PHY_BAR),
+ addr, size);
+
+ prlog(PR_DEBUG, "OCAPI: setup global mmio, %s\n", __func__);
+ phys_map_get(gcid, NPU_REGS, 0, &addr, &size);
+ write_bar(gcid, scom_base,
+ NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_PHY_BAR),
+ addr, size);
+ reg[0] = addr;
+ reg[1] = size;
+}
+
+static void mask_nvlink_fir(struct npu2 *p)
+{
+ uint64_t reg;
+
+ /*
+ * From section 13.1.3.10 of the NPU workbook: "the NV-Link
+ * Datalink Layer Stall and NoStall signals are used for a
+ * different purpose when the link is configured for
+ * OpenCAPI. Therefore, the corresponding bits in NPU FIR
+ * Register 1 must be masked and configured to NOT cause the
+ * NPU to go into Freeze or Fence mode or send an Interrupt."
+ *
+ * FIXME: will need to revisit when mixing nvlink with
+ * opencapi. Assumes an opencapi-only setup on both PHYs for
+ * now.
+ */
+
+ /* Mask FIRs */
+ xscom_read(p->chip_id, p->xscom_base + NPU2_MISC_FIR_MASK1, ®);
+ reg = SETFIELD(PPC_BITMASK(0, 11), reg, 0xFFF);
+ xscom_write(p->chip_id, p->xscom_base + NPU2_MISC_FIR_MASK1, reg);
+
+ /* freeze disable */
+ reg = npu2_scom_read(p->chip_id, p->xscom_base,
+ NPU2_MISC_FREEZE_ENABLE1, NPU2_MISC_DA_LEN_8B);
+ reg = SETFIELD(PPC_BITMASK(0, 11), reg, 0);
+ npu2_scom_write(p->chip_id, p->xscom_base,
+ NPU2_MISC_FREEZE_ENABLE1, NPU2_MISC_DA_LEN_8B, reg);
+
+ /* fence disable */
+ reg = npu2_scom_read(p->chip_id, p->xscom_base,
+ NPU2_MISC_FENCE_ENABLE1, NPU2_MISC_DA_LEN_8B);
+ reg = SETFIELD(PPC_BITMASK(0, 11), reg, 0);
+ npu2_scom_write(p->chip_id, p->xscom_base,
+ NPU2_MISC_FENCE_ENABLE1, NPU2_MISC_DA_LEN_8B, reg);
+
+ /* irq disable */
+ reg = npu2_scom_read(p->chip_id, p->xscom_base,
+ NPU2_MISC_IRQ_ENABLE1, NPU2_MISC_DA_LEN_8B);
+ reg = SETFIELD(PPC_BITMASK(0, 11), reg, 0);
+ npu2_scom_write(p->chip_id, p->xscom_base,
+ NPU2_MISC_IRQ_ENABLE1, NPU2_MISC_DA_LEN_8B, reg);
+}
+
+static int setup_irq(struct npu2 *p)
+{
+ uint64_t reg, mmio_addr;
+ uint32_t base;
+
+ base = xive_alloc_ipi_irqs(p->chip_id, NPU_IRQ_LEVELS, 64);
+ if (base == XIVE_IRQ_ERROR) {
+ /**
+ * @fwts-label OCAPIIRQAllocationFailed
+ * @fwts-advice OpenCAPI IRQ setup failed. This is probably
+ * a firmware bug. OpenCAPI functionality will be broken.
+ */
+ prlog(PR_ERR, "OCAPI: Couldn't allocate interrupts for NPU\n");
+ return -1;
+ }
+ p->irq_base = base;
+
+ xive_register_ipi_source(base, NPU_IRQ_LEVELS, NULL, NULL);
+ mmio_addr = (uint64_t ) xive_get_trigger_port(base);
+ prlog(PR_DEBUG, "OCAPI: NPU base irq %d @%llx\n", base, mmio_addr);
+ reg = (mmio_addr & NPU2_MISC_IRQ_BASE_MASK) << 13;
+ npu2_scom_write(p->chip_id, p->xscom_base, NPU2_MISC_IRQ_BASE,
+ NPU2_MISC_DA_LEN_8B, reg);
+ /*
+ * setup page size = 64k
+ *
+ * OS type is set to AIX: opal also runs with 2 pages per interrupt,
+ * so to cover the max offset for 35 levels of interrupt, we need
+ * bits 41 to 46, which is what the AIX setting does. There's no
+ * other meaning for that AIX setting.
+ */
+ reg = npu2_scom_read(p->chip_id, p->xscom_base, NPU2_MISC_CFG,
+ NPU2_MISC_DA_LEN_8B);
+ reg |= NPU2_MISC_CFG_IPI_PS;
+ reg &= ~NPU2_MISC_CFG_IPI_OS;
+ npu2_scom_write(p->chip_id, p->xscom_base, NPU2_MISC_CFG,
+ NPU2_MISC_DA_LEN_8B, reg);
+
+ /* enable translation interrupts for all bricks */
+ reg = npu2_scom_read(p->chip_id, p->xscom_base, NPU2_MISC_IRQ_ENABLE2,
+ NPU2_MISC_DA_LEN_8B);
+ reg |= PPC_BIT(0) | PPC_BIT(1) | PPC_BIT(2) | PPC_BIT(3);
+ npu2_scom_write(p->chip_id, p->xscom_base, NPU2_MISC_IRQ_ENABLE2,
+ NPU2_MISC_DA_LEN_8B, reg);
+
+ mask_nvlink_fir(p);
+ return 0;
+}
+
+static void npu2_opencapi_probe(struct dt_node *dn)
+{
+ struct dt_node *link;
+ char *path;
+ uint32_t gcid, index, links, scom_base;
+ uint64_t reg[2];
+ uint64_t dev_index;
+ struct npu2 *n;
+ int rc;
+
+ path = dt_get_path(dn);
+ gcid = dt_get_chip_id(dn);
+ index = dt_prop_get_u32(dn, "ibm,npu-index");
+ links = dt_prop_get_u32(dn, "ibm,npu-links");
+
+ /* Don't try to init when we have an NVLink link */
+ dt_for_each_compatible(dn, link, "ibm,npu-link") {
+ prlog(PR_DEBUG, "OCAPI: NPU%d: NVLink link found, skipping\n",
+ index);
+ return;
+ }
+
+ prlog(PR_INFO, "OCAPI: Chip %d Found OpenCAPI NPU%d (%d links) at %s\n",
+ gcid, index, links, path);
+ free(path);
+
+ scom_base = dt_get_address(dn, 0, NULL);
+ prlog(PR_INFO, "OCAPI: SCOM Base: %08x\n", scom_base);
+
+ setup_global_mmio_bar(gcid, scom_base, reg);
+
+ n = zalloc(sizeof(struct npu2) + links * sizeof(struct npu2_dev));
+ n->devices = (struct npu2_dev *)(n + 1);
+ n->chip_id = gcid;
+ n->xscom_base = scom_base;
+ n->regs = (void *)reg[0];
+ n->dt_node = dn;
+
+ dt_for_each_compatible(dn, link, "ibm,npu-link-opencapi") {
+ dev_index = dt_prop_get_u32(link, "ibm,npu-link-index");
+ prlog(PR_INFO, "OCAPI: Configuring link index %lld\n",
+ dev_index);
+
+ /* Procedure 13.1.3.1 - Select OCAPI vs NVLink */
+ brick_config(gcid, scom_base, dev_index);
+
+ /* Procedure 13.1.3.5 - Transaction Layer Configuration */
+ tl_config(gcid, scom_base, dev_index);
+
+ /* Procedure 13.1.3.6 - Address Translation Configuration */
+ address_translation_config(gcid, scom_base, dev_index);
+ }
+
+ /* Procedure 13.1.3.10 - Interrupt Configuration */
+ rc = setup_irq(n);
+ if (rc)
+ goto failed;
+
+ return;
+failed:
+ free(n);
+}
+
+void probe_npu2_opencapi(void)
+{
+ struct dt_node *np_npu;
+
+ dt_for_each_compatible(dt_root, np_npu, "ibm,power9-npu")
+ npu2_opencapi_probe(np_npu);
+}
diff --git a/hw/npu2.c b/hw/npu2.c
index e3ab594..3e30f19 100644
--- a/hw/npu2.c
+++ b/hw/npu2.c
@@ -1303,7 +1303,7 @@ static void npu2_probe_phb(struct dt_node *dn)
{
struct proc_chip *proc_chip;
struct dt_node *np;
- uint32_t gcid, scom, index, phb_index, links;
+ uint32_t gcid, scom, index, phb_index, links = 0;
uint64_t reg[2], mm_win[2];
char *path;
@@ -1317,6 +1317,26 @@ static void npu2_probe_phb(struct dt_node *dn)
return;
}
+ dt_for_each_compatible(dn, np, "ibm,npu-link") {
+ links++;
+ }
+
+ index = dt_prop_get_u32(dn, "ibm,npu-index");
+ phb_index = dt_prop_get_u32(dn, "ibm,phb-index");
+
+ prlog(PR_INFO, "NPU2: Chip %d Found NPU2#%d (%d links) at %s\n",
+ gcid, index, links, path);
+ free(path);
+
+ /* Retrieve scom base address */
+ scom = dt_get_address(dn, 0, NULL);
+ prlog(PR_INFO, " SCOM Base: %08x\n", scom);
+
+ if (!links) {
+ prlog(PR_INFO, " No NVLink links found\n");
+ return;
+ }
+
if (!is_p9dd1()) {
/* TODO: Clean this up with register names, etc. when we get
* time. This just turns NVLink mode on in each brick and should
@@ -1353,17 +1373,6 @@ static void npu2_probe_phb(struct dt_node *dn)
xscom_write_mask(gcid, 0x5011530, PPC_BIT(0), PPC_BIT(0));
}
- index = dt_prop_get_u32(dn, "ibm,npu-index");
- phb_index = dt_prop_get_u32(dn, "ibm,phb-index");
- links = dt_prop_get_u32(dn, "ibm,npu-links");
- prlog(PR_INFO, "NPU2: Chip %d Found NPU2#%d (%d links) at %s\n",
- gcid, index, links, path);
- free(path);
-
- /* Retrieve scom base address */
- scom = dt_get_address(dn, 0, NULL);
- prlog(PR_INFO, " SCOM Base: %08x\n", scom);
-
/* Reassign the BARs */
assign_mmio_bars(gcid, scom, reg, mm_win);
diff --git a/include/npu2-regs.h b/include/npu2-regs.h
index b6dae7b..9732ec9 100644
--- a/include/npu2-regs.h
+++ b/include/npu2-regs.h
@@ -90,6 +90,11 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
#define NPU2_BLOCK_DAT 0x5
#define NPU2_BLOCK_NTL0 0x9
#define NPU2_BLOCK_NTL1 0xb
+#define NPU2_BLOCK_OTL0 0xc
+#define NPU2_BLOCK_OTL1 0xd
+
+/* OpenCAPI blocks */
+#define NPU2_BLOCK_XSL 0xe
/* MISC stack blocks */
#define NPU2_BLOCK_ATS 0
@@ -108,6 +113,8 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
/* SM block registers */
#define NPU2_CQ_SM_MISC_CFG0 0x000
+#define NPU2_CQ_SM_MISC_CFG0_CONFIG_ENABLE_PBUS PPC_BIT(38)
+#define NPU2_CQ_SM_MISC_CFG0_CONFIG_OCAPI_MODE PPC_BIT(57)
#define NPU2_CQ_SM_MISC_CFG1 0x008
#define NPU2_PB_EPSILON 0x010
#define NPU2_TIMER_CFG 0x018
@@ -138,6 +145,7 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
#define NPU2_GENID_BAR_ADDR PPC_BITMASK(3,35)
#define NPU2_GENID_BAR_POISON PPC_BIT(39)
#define NPU2_LOW_WATER_MARKS 0x040
+#define NPU2_LOW_WATER_MARKS_ENABLE_MACHINE_ALLOC PPC_BIT(51)
#define NPU2_HIGH_WATER_MARKS 0x048
#define NPU2_RELAXED_ORDERING_CFG0 0x050
#define NPU2_RELAXED_ORDERING_CFG1 0x058
@@ -176,6 +184,9 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
/* CTL block registers */
#define NPU2_CQ_CTL_MISC_CFG 0x000
+#define NPU2_CQ_CTL_MISC_CFG_CONFIG_OCAPI_MODE PPC_BIT(52)
+#define NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL0_ENABLE PPC_BIT(55)
+#define NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL1_ENABLE PPC_BIT(56)
#define NPU2_CQ_FUTURE_CFG1 0x008
#define NPU2_CQ_FUTURE_CFG2 0x010
#define NPU2_CQ_FUTURE_CFG3 0x018
@@ -198,6 +209,8 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
#define NPU2_CQ_LPC_THRESHOLD_CFG 0x080
#define NPU2_CQ_INHIBIT_CFG 0x088
#define NPU2_CQ_CTL_STATUS 0x090
+#define NPU2_CQ_CTL_STATUS_BRK0_AM_FENCED PPC_BITMASK(48, 49)
+#define NPU2_CQ_CTL_STATUS_BRK1_AM_FENCED PPC_BITMASK(50, 51)
#define NPU2_CQ_C_ERR_RPT_MSG0 0x0C0
#define NPU2_CQ_C_ERR_RPT_MSG1 0x0C8
#define NPU2_CQ_C_ERR_RPT_FIRST0 0x0D0
@@ -206,9 +219,13 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
#define NPU2_CQ_C_ERR_RPT_MASK1 0x0E8
#define NPU2_CQ_C_ERR_RPT_HOLD0 0x0F0
#define NPU2_CQ_C_ERR_RPT_HOLD1 0x0F8
+#define NPU2_CQ_CTL_FENCE_CONTROL_0 0x140
+#define NPU2_CQ_CTL_FENCE_CONTROL_1 0x148
+#define NPU2_CQ_CTL_FENCE_CONTROL_REQUEST_FENCE PPC_BITMASK(0, 1)
/* DAT block registers */
#define NPU2_CQ_DAT_MISC_CFG 0x008
+#define NPU2_CQ_DAT_MISC_CFG_CONFIG_OCAPI_MODE PPC_BIT(40)
#define NPU2_CQ_DAT_ECC_CFG 0x010
#define NPU2_CQ_DAT_SCRATCH0 0x018
#define NPU2_CQ_DAT_ECC_STATUS 0x020
@@ -289,6 +306,43 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
#define NPU2_NTL_DL_CONFIG(ndev) NPU2_DL_REG_OFFSET(ndev, 0xFFF8)
#define NPU2_NTL_DL_CLK_CTRL(ndev) NPU2_DL_REG_OFFSET(ndev, 0x001C)
+/* OpenCAPI - XSL registers */
+#define NPU2_XSL_PSL_SCNTL_A0 0x010
+#define NPU2_XSL_PSL_SCNTL_A0_MULTI_AFU_DIAL PPC_BIT(0)
+#define NPU2_XSL_DEF 0x040
+#define NPU2_XSL_GP 0x058
+#define NPU2_XSL_GP_BLOOM_FILTER_ENABLE PPC_BIT(16)
+#define NPU2_XSL_WRAP_CFG 0x0C0
+#define NPU2_XSL_WRAP_CFG_XSLO_CLOCK_ENABLE PPC_BIT(0)
+
+/* OpenCAPI - OTL registers */
+#define NPU2_OTL_CONFIG0(stack, block) NPU2_REG_OFFSET(stack, block, 0x000)
+#define NPU2_OTL_CONFIG0_EN PPC_BIT(0)
+#define NPU2_OTL_CONFIG0_BLOCK_PE_HANDLE PPC_BIT(1)
+#define NPU2_OTL_CONFIG0_BRICKID PPC_BITMASK(2, 3)
+#define NPU2_OTL_CONFIG0_PE_MASK PPC_BITMASK(4, 7)
+#define NPU2_OTL_CONFIG0_ERAT_HASH_0 PPC_BITMASK(8, 13)
+#define NPU2_OTL_CONFIG0_ERAT_HASH_1 PPC_BITMASK(14, 19)
+#define NPU2_OTL_CONFIG0_ERAT_HASH_2 PPC_BITMASK(20, 25)
+#define NPU2_OTL_CONFIG0_ERAT_HASH_3 PPC_BITMASK(26, 31)
+#define NPU2_OTL_CONFIG0_BLOCK_TID_OVERRIDE PPC_BIT(50)
+#define NPU2_OTL_TLX_CREDITS(stack, block) NPU2_REG_OFFSET(stack, block, 0x050)
+#define NPU2_OTL_TLX_CREDITS_VC0_CREDITS PPC_BITMASK(0, 7)
+#define NPU2_OTL_TLX_CREDITS_VC3_CREDITS PPC_BITMASK(24, 31)
+#define NPU2_OTL_TLX_CREDITS_DCP0_CREDITS PPC_BITMASK(32, 39)
+#define NPU2_OTL_TLX_CREDITS_DCP1_CREDITS PPC_BITMASK(56, 63)
+#define NPU2_OTL_CONFIG1(stack, block) NPU2_REG_OFFSET(stack, block, 0x058)
+#define NPU2_OTL_CONFIG1_TX_DRDY_WAIT PPC_BITMASK(5, 7)
+#define NPU2_OTL_CONFIG1_TX_TEMP0_RATE PPC_BITMASK(8, 11)
+#define NPU2_OTL_CONFIG1_TX_CRET_FREQ PPC_BITMASK(32, 34)
+#define NPU2_OTL_CONFIG1_TX_AGE_FREQ PPC_BITMASK(35, 39)
+#define NPU2_OTL_CONFIG1_TX_RS2_HPWAIT PPC_BITMASK(40, 45)
+#define NPU2_OTL_CONFIG1_TX_RQ4_HPWAIT PPC_BITMASK(46, 51)
+#define NPU2_OTL_CONFIG1_TX_RQ6_HPWAIT PPC_BITMASK(52, 57)
+#define NPU2_OTL_CONFIG1_TX_CBUF_ECC_DIS PPC_BIT(58)
+#define NPU2_OTL_CONFIG1_TX_STOP_LINK PPC_BIT(59)
+#define NPU2_OTL_CONFIG1_TX_STOP_ON_UE PPC_BIT(60)
+
/* Misc block registers. Unlike the SM/CTL/DAT/NTL registers above
* there is only a single instance of each of these in the NPU so we
* define them as absolute offsets. */
@@ -296,6 +350,9 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
#define NPU2_MISC_RELAXTED_CFG NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x008)
#define NPU2_MISC_LCO_CFG NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x010)
#define NPU2_MISC_OPTICAL_IO_CFG0 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x018)
+#define NPU2_MISC_OPTICAL_IO_CFG0_NDLMUX_BRK0TO2 PPC_BITMASK(0, 2)
+#define NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK0TO1 PPC_BITMASK(3, 4)
+#define NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK4TO5 PPC_BITMASK(5, 6)
#define NPU2_MISC_ERR_RPT_HOLD NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x020)
#define NPU2_MISC_ERR_RPT_MASK NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x028)
#define NPU2_MISC_CFG NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x030)
@@ -320,6 +377,7 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
#define NPU2_MISC_ERR_CTL_CFG NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x088)
#define NPU2_MISC_RING_ACK_ERR NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x090)
#define NPU2_MISC_IRQ_BASE NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x098)
+#define NPU2_MISC_IRQ_BASE_MASK PPC_BITMASK(13, 51)
#define NPU2_MISC_ERR_BRICK_GROUP NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x0A0)
#define NPU2_MISC_FREEZE_STATE NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x0A8)
#define NPU2_MISC_FENCE_STATE NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x0B0)
@@ -362,6 +420,10 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
#define NPU2_MISC_IRQ_LOG13 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x368)
#define NPU2_MISC_IRQ_LOG14 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x370)
#define NPU2_MISC_IRQ_LOG15 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x378)
+#define NPU2_MISC_IRQ_ENABLE2 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x408)
+
+/* Misc register, direct access only */
+#define NPU2_MISC_FIR_MASK1 0x2C43
/* ATS block registers */
#define NPU2_ATS_PMU_CTL NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_ATS, 0x000)
@@ -406,8 +468,11 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
#define NPU2_XTS_CFG NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_XTS, 0x020)
#define NPU2_XTS_CFG_MMIOSD PPC_BIT(1)
#define NPU2_XTS_CFG_TRY_ATR_RO PPC_BIT(6)
+#define NPU2_XTS_CFG_OPENCAPI PPC_BIT(15)
#define NPU2_XTS_CFG2 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_XTS, 0x028)
#define NPU2_XTS_CFG2_NO_FLUSH_ENA PPC_BIT(49)
+#define NPU2_XTS_CFG2_XSL1_ENA PPC_BIT(54)
+#define NPU2_XTS_CFG2_XSL2_ENA PPC_BIT(55)
#define NPU2_XTS_DBG_CFG0 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_XTS, 0x030)
#define NPU2_XTS_DBG_CFG1 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_XTS, 0x038)
#define NPU2_XTS_PMU_CNT NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_XTS, 0x040)
@@ -463,4 +528,29 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
#define NPU2_DD1_MISC_SCOM_IND_SCOM_DATA 0x38f
#define NPU2_MISC_SCOM_IND_SCOM_DATA 0x68f
+
+/* OpenCAPI - PowerBus and OBus configuration SCOM addresses */
+#define PU_IOE_PB_MISC_CFG 0x5013823
+#define PU_IOE_PB_MISC_CFG_SEL_03_NPU_NOT_PB PPC_BIT(13)
+#define PU_IOE_PB_MISC_CFG_SEL_04_NPU_NOT_PB PPC_BIT(14)
+#define PU_IOE_PB_MISC_CFG_SEL_05_NPU_NOT_PB PPC_BIT(15)
+#define OBUS_LL0_IOOL_PHY_CONFIG 0x901080C
+#define OBUS_LL3_IOOL_PHY_CONFIG 0xC01080C
+#define OBUS_IOOL_PHY_CONFIG_ODL0_ENABLED PPC_BIT(52)
+#define OBUS_IOOL_PHY_CONFIG_ODL1_ENABLED PPC_BIT(53)
+#define OBUS_IOOL_PHY_CONFIG_ODL_PHY_SWAP PPC_BIT(54)
+#define OBUS_IOOL_PHY_CONFIG_LINK0_OLL_ENABLED PPC_BIT(58)
+#define OBUS_IOOL_PHY_CONFIG_LINK1_OLL_ENABLED PPC_BIT(59)
+#define OBUS_IOOL_PHY_CONFIG_NV0_NPU_ENABLED PPC_BIT(61)
+#define OBUS_IOOL_PHY_CONFIG_NV1_NPU_ENABLED PPC_BIT(62)
+#define OBUS_IOOL_PHY_CONFIG_NV2_NPU_ENABLED PPC_BIT(63)
+#define PU_IOE_PB_FP01_CFG 0x501380A
+#define PU_IOE_PB_FP23_CFG 0x501380B
+#define PU_IOE_PB_FP45_CFG 0x501380C
+#define PU_IOE_PB_FP67_CFG 0x501380D
+#define PU_IOE_PB_FP_CFG_FP0_FMR_DISABLE PPC_BIT(20)
+#define PU_IOE_PB_FP_CFG_FP0_PRS_DISABLE PPC_BIT(25)
+#define PU_IOE_PB_FP_CFG_FP1_FMR_DISABLE PPC_BIT(52)
+#define PU_IOE_PB_FP_CFG_FP1_PRS_DISABLE PPC_BIT(57)
+
#endif /* __NPU2_REGS_H */
diff --git a/include/npu2.h b/include/npu2.h
index 94e537b..7b2b509 100644
--- a/include/npu2.h
+++ b/include/npu2.h
@@ -138,6 +138,7 @@ struct npu2_dev {
struct npu2 {
uint32_t index;
+ struct dt_node *dt_node;
uint32_t flags;
uint32_t chip_id;
uint64_t xscom_base;
@@ -146,6 +147,7 @@ struct npu2 {
uint64_t mm_base;
uint64_t mm_size;
uint32_t base_lsi;
+ uint32_t irq_base;
uint32_t total_devices;
struct npu2_dev *devices;
enum phys_map_type gpu_map_type;
diff --git a/include/skiboot.h b/include/skiboot.h
index e94f212..1218367 100644
--- a/include/skiboot.h
+++ b/include/skiboot.h
@@ -232,6 +232,7 @@ extern int preload_capp_ucode(void);
extern void preload_io_vpd(void);
extern void probe_npu(void);
extern void probe_npu2(void);
+extern void probe_npu2_opencapi(void);
extern void uart_init(void);
extern void mbox_init(void);
extern void early_uart_init(void);
--
git-series 0.9.1
More information about the Skiboot
mailing list