[Skiboot] [PATCH 5/9] npu2-opencapi: Configure NPU for OpenCAPI

Frederic Barrat fbarrat at linux.vnet.ibm.com
Fri Jan 12 00:31:55 AEDT 2018


FYI, I didn't re-check all the register and bit settings for the various 
procedures. I had checked some in the past when debugging, and I'm 
willing to accept that getting it working is good enough.

A few nitpicks below.


Le 18/12/2017 à 08:07, Andrew Donnellan a écrit :
> Scan the device tree for NPUs with OpenCAPI links and configure the NPU per
> the initialisation sequence in the NPU OpenCAPI workbook.
> 
> Training of individual links and setup of per-AFU/link configuration will
> be in a later patch.
> 
> Signed-off-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
> ---
>   core/init.c         |   2 +-
>   hw/Makefile.inc     |   3 +-
>   hw/npu2-opencapi.c  | 794 +++++++++++++++++++++++++++++++++++++++++++++-
>   hw/npu2.c           |  33 +-
>   include/npu2-regs.h |  88 +++++-
>   include/npu2.h      |   2 +-
>   include/skiboot.h   |   1 +-
>   7 files changed, 910 insertions(+), 13 deletions(-)
>   create mode 100644 hw/npu2-opencapi.c
> 
> diff --git a/core/init.c b/core/init.c
> index 9eb8e25..1522ebe 100644
> --- a/core/init.c
> +++ b/core/init.c
> @@ -1048,6 +1048,8 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
>   	/* Probe NPUs */
>   	probe_npu();
>   	probe_npu2();
> +	/* TODO: Eventually, we'll do NVLink and OpenCAPI together */
> +	probe_npu2_opencapi();
> 
>   	/* Initialize PCI */
>   	pci_init_slots();
> diff --git a/hw/Makefile.inc b/hw/Makefile.inc
> index 27d8a38..cfc8646 100644
> --- a/hw/Makefile.inc
> +++ b/hw/Makefile.inc
> @@ -7,7 +7,8 @@ HW_OBJS += p7ioc.o p7ioc-inits.o p7ioc-phb.o
>   HW_OBJS += phb3.o sfc-ctrl.o fake-rtc.o bt.o p8-i2c.o prd.o
>   HW_OBJS += dts.o lpc-rtc.o npu.o npu-hw-procedures.o xive.o phb4.o
>   HW_OBJS += fake-nvram.o lpc-mbox.o npu2.o npu2-hw-procedures.o
> -HW_OBJS += npu2-common.o phys-map.o sbe-p9.o capp.o occ-sensor.o vas.o p9-adu.o
> +HW_OBJS += npu2-common.o npu2-opencapi.o phys-map.o sbe-p9.o capp.o occ-sensor.o
> +HW_OBJS += vas.o p9-adu.o
>   HW=hw/built-in.o
> 
>   # FIXME hack this for now
> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
> new file mode 100644
> index 0000000..55d5cd9
> --- /dev/null
> +++ b/hw/npu2-opencapi.c
> @@ -0,0 +1,794 @@
> +/* Copyright 2013-2017 IBM Corp.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at
> + *
> + *      http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> + * implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +/*
> + * Support for OpenCAPI on POWER9 NPUs
> + *
> + * This file provides support for OpenCAPI as implemented on POWER9.
> + *
> + * At present, we initialise the NPU separately from the NVLink code in npu2.c.
> + * As such, we don't currently support mixed NVLink and OpenCAPI configurations
> + * on the same NPU for machines such as Witherspoon.
> + *
> + * Procedure references in this file are to the POWER9 OpenCAPI NPU Workbook
> + * (IBM internal document).
> + *
> + * TODO:
> + *   - Support for mixed NVLink and OpenCAPI on the same NPU
> + *   - Support for link ganging (one AFU using multiple links)
> + *   - Link reset and error handling
> + *   - Presence detection
> + *   - Consume HDAT NPU information
> + *   - LPC Memory support
> + */
> +
> +#include <skiboot.h>
> +#include <xscom.h>
> +#include <io.h>
> +#include <timebase.h>
> +#include <pci.h>
> +#include <pci-cfg.h>
> +#include <pci-slot.h>
> +#include <interrupts.h>
> +#include <opal.h>
> +#include <opal-api.h>
> +#include <npu2.h>
> +#include <npu2-regs.h>
> +#include <phys-map.h>
> +#include <xive.h>
> +#include <p9-adu.h>
> +#include <i2c.h>
> +
> +#define NPU_IRQ_LEVELS		35
> +
> +static inline uint64_t index_to_stack(uint64_t index) {
> +	switch (index) {
> +	case 2:
> +	case 3:
> +		return NPU2_STACK_STCK_1;
> +		break;
> +	case 4:
> +	case 5:
> +		return NPU2_STACK_STCK_2;
> +		break;
> +	default:
> +		assert(false);
> +	}
> +}
> +
> +static inline uint64_t index_to_stacku(uint64_t index) {
> +	switch (index) {
> +	case 2:
> +	case 3:
> +		return NPU2_STACK_STCK_1U;
> +		break;
> +	case 4:
> +	case 5:
> +		return NPU2_STACK_STCK_2U;
> +		break;
> +	default:
> +		assert(false);
> +	}
> +}
> +
> +static inline uint64_t index_to_block(uint64_t index) {
> +	switch (index) {
> +	case 2:
> +	case 4:
> +		return NPU2_BLOCK_OTL0;
> +		break;
> +	case 3:
> +	case 5:
> +		return NPU2_BLOCK_OTL1;
> +		break;
> +	default:
> +		assert(false);
> +	}
> +}
> +
> +/* Procedure 13.1.3.1 - select OCAPI vs NVLink for bricks 2-3/4-5 */
> +
> +static void set_transport_mux_controls(uint32_t gcid, uint32_t scom_base,
> +				       int index, enum npu2_dev_type type)
> +{
> +	/* Step 1 - Set Transport MUX controls to select correct OTL or NTL */
> +	uint64_t reg;
> +	uint64_t field;
> +
> +	/* TODO: Rework this to select for NVLink too */
> +	assert(type == NPU2_DEV_TYPE_OPENCAPI);
> +
> +	prlog(PR_DEBUG, "OCAPI: %s: Setting transport mux controls\n", __func__);
> +
> +	/* Optical IO Transport Mux Config for Bricks 0-2 and 4-5 */
> +	reg = npu2_scom_read(gcid, scom_base, NPU2_MISC_OPTICAL_IO_CFG0,
> +			     NPU2_MISC_DA_LEN_8B);
> +	switch (index) {
> +	case 0:
> +	case 1:
> +		/* not valid for OpenCAPI */
> +		assert(false);
> +		break;
> +	case 2:	 /* OTL1.0 */
> +		field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_NDLMUX_BRK0TO2, reg);
> +		field &= ~0b100;
> +		reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_NDLMUX_BRK0TO2, reg,
> +			       field);
> +		field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK0TO1, reg);
> +		field |= 0b10;
> +		reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK0TO1, reg,
> +			       field);
> +		break;
> +	case 3:	 /* OTL1.1 */
> +		field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_NDLMUX_BRK0TO2, reg);
> +		field &= ~0b010;
> +		reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_NDLMUX_BRK0TO2, reg,
> +			       field);
> +		field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK0TO1, reg);
> +		field |= 0b01;
> +		reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK0TO1, reg,
> +			       field);
> +		break;
> +	case 4:	 /* OTL2.0 */
> +		field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK4TO5, reg);
> +		field |= 0b10;
> +		reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK4TO5, reg,
> +			       field);
> +		break;
> +	case 5:	 /* OTL2.1 */
> +		field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK4TO5, reg);
> +		field |= 0b01;
> +		reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK4TO5, reg,
> +			       field);
> +		break;
> +	default:
> +		assert(false);
> +	}
> +	npu2_scom_write(gcid, scom_base, NPU2_MISC_OPTICAL_IO_CFG0,
> +			NPU2_MISC_DA_LEN_8B, reg);
> +
> +	/*
> +	 * PowerBus Optical Miscellaneous Config Register - select
> +	 * OpenCAPI for b4/5 and A-Link for b3
> +	 */
> +	xscom_read(gcid, PU_IOE_PB_MISC_CFG, &reg);
> +	switch (index) {
> +	case 0:
> +	case 1:
> +	case 2:
> +	case 3:
> +		break;
> +	case 4:
> +		reg = SETFIELD(PU_IOE_PB_MISC_CFG_SEL_04_NPU_NOT_PB, reg, 1);
> +		break;
> +	case 5:
> +		reg = SETFIELD(PU_IOE_PB_MISC_CFG_SEL_05_NPU_NOT_PB, reg, 1);
> +		break;
> +	}
> +	xscom_write(gcid, PU_IOE_PB_MISC_CFG, reg);
> +}
> +
> +static void enable_odl_phy_mux(uint32_t gcid, int index)
> +{
> +	uint64_t reg;
> +	uint64_t phy_config_scom;
> +	prlog(PR_DEBUG, "OCAPI: %s: Enabling ODL to PHY MUXes\n", __func__);
> +	/* Step 2 - Enable MUXes for ODL to PHY connection */
> +	switch (index) {
> +	case 2:
> +	case 3:
> +		phy_config_scom = OBUS_LL0_IOOL_PHY_CONFIG;
> +		break;
> +	case 4:
> +	case 5:
> +		phy_config_scom = OBUS_LL3_IOOL_PHY_CONFIG;
> +		break;
> +	default:
> +		assert(false);
> +	}
> +
> +	/* PowerBus OLL PHY Training Config Register */
> +	xscom_read(gcid, phy_config_scom, &reg);
> +
> +	/* Enable ODLs to use shared PHYs */
> +	reg |= OBUS_IOOL_PHY_CONFIG_ODL0_ENABLED;
> +	reg |= OBUS_IOOL_PHY_CONFIG_ODL1_ENABLED;
> +
> +	/*
> +	 * Swap ODL1 to use brick 2 lanes instead of brick 1 lanes if using a
> +	 * 22-pin cable for OpenCAPI connection.
> +	 */
> +	if (platform.ocapi->odl_phy_swap)
> +		reg |= OBUS_IOOL_PHY_CONFIG_ODL_PHY_SWAP;
> +	else
> +		reg &= ~OBUS_IOOL_PHY_CONFIG_ODL_PHY_SWAP;
> +
> +	/* Disable A-Link link layers */
> +	reg &= ~OBUS_IOOL_PHY_CONFIG_LINK0_OLL_ENABLED;
> +	reg &= ~OBUS_IOOL_PHY_CONFIG_LINK1_OLL_ENABLED;
> +
> +	/* Disable NV-Link link layers */
> +	reg &= ~OBUS_IOOL_PHY_CONFIG_NV0_NPU_ENABLED;
> +	reg &= ~OBUS_IOOL_PHY_CONFIG_NV1_NPU_ENABLED;
> +	reg &= ~OBUS_IOOL_PHY_CONFIG_NV2_NPU_ENABLED;
> +	xscom_write(gcid, phy_config_scom, reg);
> +}
> +
> +static void disable_alink_fp(uint32_t gcid)
> +{
> +	uint64_t reg = 0;
> +
> +	prlog(PR_DEBUG, "OCAPI: %s: Disabling A-Link framer/parsers\n", __func__);
> +	/* Step 3 - Disable A-Link framers/parsers */
> +	/* TODO: Confirm if needed on OPAL system */
> +
> +	reg |= PU_IOE_PB_FP_CFG_FP0_FMR_DISABLE;
> +	reg |= PU_IOE_PB_FP_CFG_FP0_PRS_DISABLE;
> +	reg |= PU_IOE_PB_FP_CFG_FP1_FMR_DISABLE;
> +	reg |= PU_IOE_PB_FP_CFG_FP1_PRS_DISABLE;
> +	xscom_write(gcid, PU_IOE_PB_FP01_CFG, reg);
> +	xscom_write(gcid, PU_IOE_PB_FP23_CFG, reg);
> +	xscom_write(gcid, PU_IOE_PB_FP45_CFG, reg);
> +	xscom_write(gcid, PU_IOE_PB_FP67_CFG, reg);
> +}
> +
> +static void set_pb_hp_opencapi(uint32_t gcid, int index)
> +{
> +	/* Step 4 - Set PowerBus HotPlug Mode Registers */
> +	uint64_t reg;
> +
> +	prlog(PR_DEBUG, "OCAPI: %s: Setting PowerBus Hotplug Mode registers\n", __func__);
> +
> +	xscom_read(gcid, PB_WEST_HP_MODE_CURR, &reg);
> +	switch (index) {
> +	case 2:
> +	case 3:
> +		/* Configure OPT0 as an OpenCAPI link */
> +		reg = SETFIELD(PPC_BITMASK(32, 33), reg, 0b01);
> +		break;
> +	case 4:
> +	case 5:
> +		/* Configure OPT3 as an OpenCAPI link */
> +		reg = SETFIELD(PPC_BITMASK(38, 39), reg, 0b01);
> +		break;
> +	default:
> +		assert(false);
> +	}
> +
> +	p9_adu_set_pb_hp_mode(gcid, reg);
> +}
> +
> +static void enable_xsl_clocks(uint32_t gcid, uint32_t scom_base, int index)
> +{
> +	/* Step 5 - Enable Clocks in XSL */
> +
> +	prlog(PR_DEBUG, "OCAPI: %s: Enable clocks in XSL\n", __func__);
> +
> +	npu2_scom_write(gcid, scom_base, NPU2_REG_OFFSET(index_to_stack(index),
> +							 NPU2_BLOCK_XSL,
> +							 NPU2_XSL_WRAP_CFG),
> +			NPU2_MISC_DA_LEN_8B, NPU2_XSL_WRAP_CFG_XSLO_CLOCK_ENABLE);
> +}
> +
> +#define CQ_CTL_STATUS_TIMEOUT	10 /* milliseconds */
> +
> +static int set_fence_control(uint32_t gcid, uint32_t scom_base,
> +			     int index, uint8_t status)
> +{
> +	int stack, block;
> +	uint64_t reg, status_field;
> +	uint8_t status_val;
> +	uint64_t fence_control;
> +	uint64_t timeout = mftb() + msecs_to_tb(CQ_CTL_STATUS_TIMEOUT);
> +
> +	stack = index_to_stack(index);
> +	block = index_to_block(index);
> +
> +	fence_control = NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL,
> +					block == NPU2_BLOCK_OTL0 ?
> +					NPU2_CQ_CTL_FENCE_CONTROL_0 :
> +					NPU2_CQ_CTL_FENCE_CONTROL_1);
> +
> +	reg = SETFIELD(NPU2_CQ_CTL_FENCE_CONTROL_REQUEST_FENCE, 0ull, status);
> +	npu2_scom_write(gcid, scom_base, fence_control,
> +			NPU2_MISC_DA_LEN_8B, reg);
> +
> +	/* Wait for fence status to update */
> +	if (index_to_block(index) == NPU2_BLOCK_OTL0)
> +		status_field = NPU2_CQ_CTL_STATUS_BRK0_AM_FENCED;
> +	else
> +		status_field = NPU2_CQ_CTL_STATUS_BRK1_AM_FENCED;
> +
> +	do {
> +		reg = npu2_scom_read(gcid, scom_base,
> +				     NPU2_REG_OFFSET(index_to_stack(index),
> +						     NPU2_BLOCK_CTL,
> +						     NPU2_CQ_CTL_STATUS),
> +				     NPU2_MISC_DA_LEN_8B);
> +		status_val = GETFIELD(status_field, reg);
> +		if (status_val == status)
> +			return OPAL_SUCCESS;
> +		time_wait_ms(1);
> +	} while (tb_compare(mftb(), timeout) == TB_ABEFOREB);
> +
> +	/**
> +	 * @fwts-label OCAPIFenceStatusTimeout
> +	 * @fwts-advice The NPU fence status did not update as expected. This
> +	 * could be the result of a firmware or hardware bug. OpenCAPI
> +	 * functionality could be broken.
> +	 */
> +	prlog(PR_ERR,
> +	      "OCAPI: Fence status for brick %d stuck: expected 0x%x, got 0x%x\n",
> +	      index, status, status_val);
> +	return OPAL_HARDWARE;
> +}
> +
> +static void set_npcq_config(uint32_t gcid, uint32_t scom_base, int index)
> +{
> +	uint64_t reg, stack, block;
> +
> +	prlog(PR_DEBUG, "OCAPI: %s: Set NPCQ Config\n", __func__);
> +	/* Step 6 - Set NPCQ configuration */
> +	/* CQ_CTL Misc Config Register #0 */
> +	stack = index_to_stack(index);
> +	block = index_to_block(index);
> +
> +	/* Enable OTL */
> +	npu2_scom_write(gcid, scom_base, NPU2_OTL_CONFIG0(stack, block),
> +			NPU2_MISC_DA_LEN_8B, NPU2_OTL_CONFIG0_EN);
> +	set_fence_control(gcid, scom_base, index, 0b01);
> +	reg = npu2_scom_read(gcid, scom_base,
> +			     NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL,
> +					     NPU2_CQ_CTL_MISC_CFG),
> +			     NPU2_MISC_DA_LEN_8B);
> +	/* Set OCAPI mode */
> +	reg |= NPU2_CQ_CTL_MISC_CFG_CONFIG_OCAPI_MODE;
> +	if (block == NPU2_BLOCK_OTL0)
> +		reg |= NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL0_ENABLE;
> +	else
> +		reg |= NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL1_ENABLE;
> +	npu2_scom_write(gcid, scom_base,
> +			NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL,
> +					NPU2_CQ_CTL_MISC_CFG),
> +			NPU2_MISC_DA_LEN_8B, reg);
> +
> +	/* NPU Fenced */
> +	set_fence_control(gcid, scom_base, index, 0b11);
> +
> +	/* NPU Half Fenced */
> +	set_fence_control(gcid, scom_base, index, 0b10);
> +
> +	/* CQ_DAT Misc Config Register #1 */
> +	reg = npu2_scom_read(gcid, scom_base,
> +			     NPU2_REG_OFFSET(stack, NPU2_BLOCK_DAT,
> +					     NPU2_CQ_DAT_MISC_CFG),
> +			     NPU2_MISC_DA_LEN_8B);
> +	/* Set OCAPI mode for bricks 2-5 */
> +	reg |= NPU2_CQ_DAT_MISC_CFG_CONFIG_OCAPI_MODE;
> +	npu2_scom_write(gcid, scom_base,
> +			NPU2_REG_OFFSET(stack, NPU2_BLOCK_DAT,
> +					NPU2_CQ_DAT_MISC_CFG),
> +			NPU2_MISC_DA_LEN_8B, reg);
> +
> +	/* CQ_SM Misc Config Register #0 */
> +	for (uint64_t block = NPU2_BLOCK_SM_0;
> +	     block <= NPU2_BLOCK_SM_3; block++) {
> +		reg = npu2_scom_read(gcid, scom_base,
> +				     NPU2_REG_OFFSET(stack, block,
> +						     NPU2_CQ_SM_MISC_CFG0),
> +				     NPU2_MISC_DA_LEN_8B);
> +		/* Set OCAPI mode for bricks 2-5 */
> +		reg |= NPU2_CQ_SM_MISC_CFG0_CONFIG_OCAPI_MODE;
> +		npu2_scom_write(gcid, scom_base,
> +				NPU2_REG_OFFSET(stack, block,
> +						NPU2_CQ_SM_MISC_CFG0),
> +				NPU2_MISC_DA_LEN_8B, reg);
> +	}
> +}
> +
> +static void enable_xsl_xts_interfaces(uint32_t gcid, uint32_t scom_base, int index)
> +{
> +	uint64_t reg;
> +
> +	prlog(PR_DEBUG, "OCAPI: %s: Enable XSL-XTS Interfaces\n", __func__);
> +	/* Step 7 - Enable XSL-XTS interfaces */
> +	/* XTS Config Register - Enable XSL-XTS interface */
> +	reg = npu2_scom_read(gcid, scom_base, NPU2_XTS_CFG, NPU2_MISC_DA_LEN_8B);
> +	reg |= NPU2_XTS_CFG_OPENCAPI;
> +	npu2_scom_write(gcid, scom_base, NPU2_XTS_CFG, NPU2_MISC_DA_LEN_8B, reg);
> +
> +	/* XTS Config2 Register - Enable XSL1/2 */
> +	reg = npu2_scom_read(gcid, scom_base, NPU2_XTS_CFG2, NPU2_MISC_DA_LEN_8B);
> +	switch (index_to_stack(index)) {
> +	case NPU2_STACK_STCK_1:
> +		reg |= NPU2_XTS_CFG2_XSL1_ENA;
> +		break;
> +	case NPU2_STACK_STCK_2:
> +		reg |= NPU2_XTS_CFG2_XSL2_ENA;
> +		break;
> +	}
> +	npu2_scom_write(gcid, scom_base, NPU2_XTS_CFG2, NPU2_MISC_DA_LEN_8B, reg);
> +}
> +
> +static void enable_sm_allocation(uint32_t gcid, uint32_t scom_base, int index)
> +{
> +	uint64_t reg;
> +	int stack = index_to_stack(index);
> +
> +	prlog(PR_DEBUG, "OCAPI: %s: Enable State Machine Allocation\n", __func__);
> +	/* Step 8 - Enable state-machine allocation */
> +	/* Low-Water Marks Registers - Enable state machine allocation */
> +	for (uint64_t block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) {

The 'block' declaration is following an usual coding style.

> +		reg = npu2_scom_read(gcid, scom_base,
> +				     NPU2_REG_OFFSET(stack, block,
> +						     NPU2_LOW_WATER_MARKS),
> +				     NPU2_MISC_DA_LEN_8B);
> +		reg |= NPU2_LOW_WATER_MARKS_ENABLE_MACHINE_ALLOC;
> +		npu2_scom_write(gcid, scom_base,
> +				NPU2_REG_OFFSET(stack, block,
> +						NPU2_LOW_WATER_MARKS),
> +				NPU2_MISC_DA_LEN_8B, reg);
> +	}
> +}
> +
> +static void enable_pb_snooping(uint32_t gcid, uint32_t scom_base, int index)
> +{
> +	uint64_t reg;
> +	int stack = index_to_stack(index);
> +
> +	prlog(PR_DEBUG, "OCAPI: %s: Enable PowerBus snooping\n", __func__);
> +	/* Step 9 - Enable PowerBus snooping */
> +	/* CQ_SM Misc Config Register #0 - Enable PowerBus snooping */
> +	for (uint64_t block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) {

Same here.

> +		reg = npu2_scom_read(gcid, scom_base,
> +				     NPU2_REG_OFFSET(stack, block,
> +						     NPU2_CQ_SM_MISC_CFG0),
> +				     NPU2_MISC_DA_LEN_8B);
> +		reg |= NPU2_CQ_SM_MISC_CFG0_CONFIG_ENABLE_PBUS;
> +		npu2_scom_write(gcid, scom_base,
> +				NPU2_REG_OFFSET(stack, block,
> +						NPU2_CQ_SM_MISC_CFG0),
> +				NPU2_MISC_DA_LEN_8B, reg);
> +	}
> +}
> +
> +static void brick_config(uint32_t gcid, uint32_t scom_base, int index)
> +{
> +	set_transport_mux_controls(gcid, scom_base, index, NPU2_DEV_TYPE_OPENCAPI);
> +	enable_odl_phy_mux(gcid, index);
> +	disable_alink_fp(gcid);
> +	set_pb_hp_opencapi(gcid, index);
> +	enable_xsl_clocks(gcid, scom_base, index);
> +	set_npcq_config(gcid, scom_base, index);
> +	enable_xsl_xts_interfaces(gcid, scom_base, index);
> +	enable_sm_allocation(gcid, scom_base, index);
> +	enable_pb_snooping(gcid, scom_base, index);
> +}
> +
> +/* Procedure 13.1.3.5 - TL Configuration */
> +static void tl_config(uint32_t gcid, uint32_t scom_base, uint64_t index)
> +{
> +	uint64_t reg;
> +	uint64_t stack = index_to_stack(index);
> +	uint64_t block = index_to_block(index);
> +
> +	prlog(PR_DEBUG, "OCAPI: %s: TL Configuration\n", __func__);
> +	/* OTL Config 0 Register */
> +	reg = 0;
> +	/* OTL Enable */
> +	reg |= NPU2_OTL_CONFIG0_EN;
> +	/* Block PE Handle from ERAT Index */
> +	reg |= NPU2_OTL_CONFIG0_BLOCK_PE_HANDLE;
> +	/* OTL Brick ID */
> +	reg = SETFIELD(NPU2_OTL_CONFIG0_BRICKID, reg, index - 2);
> +	/* ERAT Hash 0 */
> +	reg = SETFIELD(NPU2_OTL_CONFIG0_ERAT_HASH_0, reg, 0b011001);
> +	/* ERAT Hash 1 */
> +	reg = SETFIELD(NPU2_OTL_CONFIG0_ERAT_HASH_1, reg, 0b000111);
> +	/* ERAT Hash 2 */
> +	reg = SETFIELD(NPU2_OTL_CONFIG0_ERAT_HASH_2, reg, 0b101100);
> +	/* ERAT Hash 3 */
> +	reg = SETFIELD(NPU2_OTL_CONFIG0_ERAT_HASH_3, reg, 0b100110);
> +	npu2_scom_write(gcid, scom_base, NPU2_OTL_CONFIG0(stack, block),
> +			NPU2_MISC_DA_LEN_8B, reg);
> +
> +	/* OTL Config 1 Register */
> +	reg = 0;
> +	/*
> +	 * We leave Template 1-3 bits at 0 to force template 0 as required
> +	 * for unknown devices.
> +	 *
> +	 * Template 0 Transmit Rate is set to most conservative setting which
> +	 * will always be supported. Other Template Transmit rates are left
> +	 * unset and will be set later by OS.
> +	 */
> +	reg = SETFIELD(NPU2_OTL_CONFIG1_TX_TEMP0_RATE, reg, 0b1111);
> +	/* Extra wait cycles TXI-TXO - varied from workbook */
> +	reg = SETFIELD(NPU2_OTL_CONFIG1_TX_DRDY_WAIT, reg, 0b001);
> +	/* Minimum Frequency to Return TLX Credits to AFU */
> +	reg = SETFIELD(NPU2_OTL_CONFIG1_TX_CRET_FREQ, reg, 0b001);
> +	/* Frequency to add age to Transmit Requests */
> +	reg = SETFIELD(NPU2_OTL_CONFIG1_TX_AGE_FREQ, reg, 0b11000);
> +	/* Response High Priority Threshold */
> +	reg = SETFIELD(NPU2_OTL_CONFIG1_TX_RS2_HPWAIT, reg, 0b011011);
> +	/* 4-slot Request High Priority Threshold */
> +	reg = SETFIELD(NPU2_OTL_CONFIG1_TX_RQ4_HPWAIT, reg, 0b011011);
> +	/* 6-slot Request High Priority */
> +	reg = SETFIELD(NPU2_OTL_CONFIG1_TX_RQ6_HPWAIT, reg, 0b011011);
> +	/* Stop the OCAPI Link on Uncorrectable Error
> +	 * TODO: Confirm final value - disabled for debug */
> +
> +	npu2_scom_write(gcid, scom_base, NPU2_OTL_CONFIG1(stack, block),
> +			NPU2_MISC_DA_LEN_8B, reg);
> +
> +	/* TLX Credit Configuration Register */
> +	reg = 0;
> +	/* VC0/VC3/DCP0/DCP1 credits to send to AFU */
> +	reg = SETFIELD(NPU2_OTL_TLX_CREDITS_VC0_CREDITS, reg, 0x40);
> +	reg = SETFIELD(NPU2_OTL_TLX_CREDITS_VC3_CREDITS, reg, 0x40);
> +	reg = SETFIELD(NPU2_OTL_TLX_CREDITS_DCP0_CREDITS, reg, 0x80);
> +	reg = SETFIELD(NPU2_OTL_TLX_CREDITS_DCP1_CREDITS, reg, 0x80);
> +	npu2_scom_write(gcid, scom_base, NPU2_OTL_TLX_CREDITS(stack, block),
> +			NPU2_MISC_DA_LEN_8B, reg);
> +}
> +
> +/* Detect Nimbus DD2.0 and DD2.01 */
> +static int get_nimbus_level(void)
> +{
> +	struct proc_chip *chip = next_chip(NULL);
> +
> +	if (chip && chip->type == PROC_CHIP_P9_NIMBUS)
> +		return chip->ec_level & 0xff;
> +	return -1;
> +}
> +
> +/* Procedure 13.1.3.6 - Address Translation Configuration */
> +static void address_translation_config(uint32_t gcid, uint32_t scom_base,
> +				       uint64_t index)
> +{
> +	int chip_level;
> +	uint64_t reg;
> +	uint64_t stack = index_to_stack(index);
> +
> +	prlog(PR_DEBUG, "OCAPI: %s: Address Translation Configuration\n", __func__);
> +	/* PSL_SCNTL_A0 Register */
> +	/*
> +	 * ERAT shared between multiple AFUs
> +	 *
> +	 * The workbook has this bit around the wrong way from the hardware.
> +	 *
> +	 * TODO: handle correctly with link ganging
> +	 */
> +	reg = npu2_scom_read(gcid, scom_base,
> +			     NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL,
> +					     NPU2_XSL_PSL_SCNTL_A0),
> +			     NPU2_MISC_DA_LEN_8B);
> +	reg |= NPU2_XSL_PSL_SCNTL_A0_MULTI_AFU_DIAL;
> +	npu2_scom_write(gcid, scom_base,
> +			NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL,
> +					NPU2_XSL_PSL_SCNTL_A0),
> +			NPU2_MISC_DA_LEN_8B, reg);
> +
> +	chip_level = get_nimbus_level();
> +	if (chip_level == 0x20) {
> +		/*
> +		 * Errata HW408041 (section 15.1.10 of NPU workbook)
> +		 * "RA mismatch when both tlbie and checkout response
> +		 * are seen in same cycle"
> +		 */
> +		/* XSL_GP Register - Bloom Filter Disable */
> +		reg = npu2_scom_read(gcid, scom_base,
> +				NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, NPU2_XSL_GP),
> +				NPU2_MISC_DA_LEN_8B);
> +		/* To update XSL_GP, we must first write a magic value to it */
> +		npu2_scom_write(gcid, scom_base,
> +				NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, NPU2_XSL_GP),
> +				NPU2_MISC_DA_LEN_8B, 0x0523790323000000);
> +		reg &= ~NPU2_XSL_GP_BLOOM_FILTER_ENABLE;
> +		npu2_scom_write(gcid, scom_base,
> +				NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, NPU2_XSL_GP),
> +				NPU2_MISC_DA_LEN_8B, reg);
> +	}
> +
> +	if (chip_level == 0x20 || chip_level == 0x21) {
> +		/*
> +		 * DD2.0/2.1 EOA Bug. Fixed in DD2.2
> +		 */
> +		reg = 0x32F8000000000001;
> +		npu2_scom_write(gcid, scom_base,
> +				NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL,
> +						NPU2_XSL_DEF),
> +				NPU2_MISC_DA_LEN_8B, reg);
> +	}
> +}
> +
> +/* TODO: Merge this with NVLink implementation - we don't use the npu2_bar
> + * wrapper for the PHY BARs yet */
> +static void write_bar(uint32_t gcid, uint32_t scom_base, uint64_t reg,
> +		      uint64_t addr, uint64_t size)
> +{
> +	uint64_t val;
> +	int block;
> +	switch (NPU2_REG(reg)) {
> +	case NPU2_PHY_BAR:
> +		val = SETFIELD(NPU2_PHY_BAR_ADDR, 0ul, addr >> 21);
> +		val = SETFIELD(NPU2_PHY_BAR_ENABLE, val, 1);
> +		break;
> +	case NPU2_NTL0_BAR:
> +	case NPU2_NTL1_BAR:
> +		val = SETFIELD(NPU2_NTL_BAR_ADDR, 0ul, addr >> 16);
> +		val = SETFIELD(NPU2_NTL_BAR_SIZE, val, ilog2(size >> 16));
> +		val = SETFIELD(NPU2_NTL_BAR_ENABLE, val, 1);
> +		break;
> +	case NPU2_GENID_BAR:
> +		val = SETFIELD(NPU2_GENID_BAR_ADDR, 0ul, addr >> 16);
> +		val = SETFIELD(NPU2_GENID_BAR_ENABLE, val, 1);
> +		break;
> +	default:
> +		val = 0ul;
> +	}
> +
> +	for (block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) {
> +		npu2_scom_write(gcid, scom_base, NPU2_REG_OFFSET(0, block, reg),
> +				NPU2_MISC_DA_LEN_8B, val);
> +		prlog(PR_DEBUG, "OCAPI: Setting BAR %llx to %llx\n",
> +		      NPU2_REG_OFFSET(0, block, reg), val);
> +	}
> +}
> +
> +static void setup_global_mmio_bar(uint32_t gcid, uint32_t scom_base,
> +				  uint64_t reg[])
> +{
> +	uint64_t addr, size;
> +
> +	prlog(PR_DEBUG, "OCAPI: patching up PHY0 bar, %s\n", __func__);
> +	phys_map_get(gcid, NPU_PHY, 0, &addr, &size);
> +	write_bar(gcid, scom_base,
> +		  NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_PHY_BAR),
> +		addr, size);
> +	prlog(PR_DEBUG, "OCAPI: patching up PHY1 bar, %s\n", __func__);
> +	phys_map_get(gcid, NPU_PHY, 1, &addr, &size);
> +	write_bar(gcid, scom_base,
> +		  NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_PHY_BAR),
> +		addr, size);
> +
> +	prlog(PR_DEBUG, "OCAPI: setup global mmio, %s\n", __func__);
> +	phys_map_get(gcid, NPU_REGS, 0, &addr, &size);
> +	write_bar(gcid, scom_base,
> +		  NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_PHY_BAR),
> +		addr, size);
> +	reg[0] = addr;
> +	reg[1] = size;
> +}
> +
> +static int setup_irq(struct npu2 *p)
> +{
> +	uint64_t reg, mmio_addr;
> +	uint32_t base;
> +
> +	base = xive_alloc_ipi_irqs(p->chip_id, NPU_IRQ_LEVELS, 64);

Mea culpa: we should use a macro instead of "64". That's likely 
temporary anyway, as it should be merged down the road with nvlink 
(Alistair's has just sent a related patch).


> +	if (base == XIVE_IRQ_ERROR) {
> +		/**
> +		 * @fwts-label OCAPIIRQAllocationFailed
> +		 * @fwts-advice OpenCAPI IRQ setup failed. This is probably
> +		 * a firmware bug. OpenCAPI functionality will be broken.
> +		 */
> +		prlog(PR_ERR, "OCAPI: Couldn't allocate interrupts for NPU\n");
> +		return -1;
> +	}
> +	p->irq_base = base;
> +
> +	xive_register_ipi_source(base, NPU_IRQ_LEVELS, NULL, NULL);
> +	mmio_addr = (uint64_t ) xive_get_trigger_port(base);
> +	prlog(PR_DEBUG, "OCAPI: NPU base irq %d @%llx\n", base, mmio_addr);
> +	reg = (mmio_addr & NPU2_MISC_IRQ_BASE_MASK) << 13;
> +	npu2_scom_write(p->chip_id, p->xscom_base, NPU2_MISC_IRQ_BASE,
> +			NPU2_MISC_DA_LEN_8B, reg);
> +	/*
> +	 * setup page size = 64k
> +	 *
> +	 * OS type is set to AIX: opal also runs with 2 pages per interrupt,
> +	 * so to cover the max offset for 35 levels of interrupt, we need
> +	 * bits 41 to 46, which is what the AIX setting does. There's no
> +	 * other meaning for that AIX setting.
> +	 */
> +	reg = npu2_scom_read(p->chip_id, p->xscom_base, NPU2_MISC_CFG,
> +			NPU2_MISC_DA_LEN_8B);
> +	reg |= NPU2_MISC_CFG_IPI_PS;
> +	reg &= ~NPU2_MISC_CFG_IPI_OS;
> +	npu2_scom_write(p->chip_id, p->xscom_base, NPU2_MISC_CFG,
> +			NPU2_MISC_DA_LEN_8B, reg);
> +
> +	/* enable translation interrupts for all bricks */
> +	reg = npu2_scom_read(p->chip_id, p->xscom_base,
> +			NPU2_MISC_IRQ_ON_ERROR_EN_FIR2, NPU2_MISC_DA_LEN_8B);
> +	reg |= PPC_BIT(0) | PPC_BIT(1) | PPC_BIT(2) | PPC_BIT(3);
> +	npu2_scom_write(p->chip_id, p->xscom_base,
> +			NPU2_MISC_IRQ_ON_ERROR_EN_FIR2, NPU2_MISC_DA_LEN_8B,
> +			reg);
> +	return 0;
> +}
> +
> +static void npu2_opencapi_probe(struct dt_node *dn)
> +{
> +	struct dt_node *link;
> +	char *path;
> +	uint32_t gcid, index, links, scom_base;
> +	uint64_t reg[2];
> +	uint64_t dev_index;
> +	struct npu2 *n;
> +	int rc;
> +
> +	path = dt_get_path(dn);
> +	gcid = dt_get_chip_id(dn);
> +	index = dt_prop_get_u32(dn, "ibm,npu-index");
> +	links = dt_prop_get_u32(dn, "ibm,npu-links");
> +
> +	/* Don't try to init when we have an NVLink link */
> +	dt_for_each_compatible(dn, link, "ibm,npu-link") {
> +		prlog(PR_DEBUG, "OCAPI: NPU%d: NVLink link found, skipping\n",
> +		      index);
> +		return;
> +	}
> +
> +	prlog(PR_INFO, "OCAPI: Chip %d Found OpenCAPI NPU%d (%d links) at %s\n",
> +	      gcid, index, links, path);
> +	free(path);
> +
> +	scom_base = dt_get_address(dn, 0, NULL);
> +	prlog(PR_INFO, "OCAPI:	 SCOM Base:  %08x\n", scom_base);
> +
> +	setup_global_mmio_bar(gcid, scom_base, reg);
> +
> +	n = zalloc(sizeof(struct npu2) + links * sizeof(struct npu2_dev));
> +	n->devices = (struct npu2_dev *)(n + 1);
> +	n->chip_id = gcid;
> +	n->xscom_base = scom_base;
> +	n->regs = (void *)reg[0];
> +	n->dt_node = dn;
> +
> +	dt_for_each_compatible(dn, link, "ibm,npu-link-opencapi") {
> +		dev_index = dt_prop_get_u32(link, "ibm,npu-link-index");
> +		prlog(PR_INFO, "OCAPI: Configuring link index %lld\n",
> +		      dev_index);
> +
> +		/* Procedure 13.1.3.1 - Select OCAPI vs NVLink */
> +		brick_config(gcid, scom_base, dev_index);
> +
> +		/* Procedure 13.1.3.5 - Transaction Layer Configuration */
> +		tl_config(gcid, scom_base, dev_index);
> +
> +		/* Procedure 13.1.3.6 - Address Translation Configuration */
> +		address_translation_config(gcid, scom_base, dev_index);
> +	}
> +
> +	/* Procedure 13.1.3.10 - Interrupt Configuration */
> +	rc = setup_irq(n);
> +	if (rc)
> +		goto failed;
> +
> +	return;
> +failed:
> +	free(n);
> +}
> +
> +void probe_npu2_opencapi(void)
> +{
> +	struct dt_node *np_npu;
> +
> +	dt_for_each_compatible(dt_root, np_npu, "ibm,power9-npu")
> +		npu2_opencapi_probe(np_npu);
> +}
> diff --git a/hw/npu2.c b/hw/npu2.c
> index 12d239a..218ac86 100644
> --- a/hw/npu2.c
> +++ b/hw/npu2.c
> @@ -1274,7 +1274,7 @@ static void npu2_probe_phb(struct dt_node *dn)
>   {
>   	struct proc_chip *proc_chip;
>   	struct dt_node *np;
> -	uint32_t gcid, scom, index, phb_index, links;
> +	uint32_t gcid, scom, index, phb_index, links = 0;
>   	uint64_t reg[2], mm_win[2];
>   	char *path;
> 
> @@ -1288,6 +1288,26 @@ static void npu2_probe_phb(struct dt_node *dn)
>   		return;
>   	}
> 
> +	dt_for_each_compatible(dn, np, "ibm,npu-link") {
> +		links++;
> +	}
> +
> +	index = dt_prop_get_u32(dn, "ibm,npu-index");
> +	phb_index = dt_prop_get_u32(dn, "ibm,phb-index");
> +
> +	prlog(PR_INFO, "NPU2: Chip %d Found NPU2#%d (%d links) at %s\n",
> +	      gcid, index, links, path);
> +	free(path);
> +
> +	/* Retrieve scom base address */
> +	scom = dt_get_address(dn, 0, NULL);
> +	prlog(PR_INFO, "   SCOM Base:  %08x\n", scom);
> +
> +	if (!links) {
> +		prlog(PR_INFO, "   No NVLink links found\n");
> +		return;
> +	}
> +
>   	if (!is_p9dd1()) {
>   		/* TODO: Clean this up with register names, etc. when we get
>   		 * time. This just turns NVLink mode on in each brick and should
> @@ -1324,17 +1344,6 @@ static void npu2_probe_phb(struct dt_node *dn)
>   		xscom_write_mask(gcid, 0x5011530, PPC_BIT(0), PPC_BIT(0));
>   	}
> 
> -	index = dt_prop_get_u32(dn, "ibm,npu-index");
> -	phb_index = dt_prop_get_u32(dn, "ibm,phb-index");
> -	links = dt_prop_get_u32(dn, "ibm,npu-links");
> -	prlog(PR_INFO, "NPU2: Chip %d Found NPU2#%d (%d links) at %s\n",
> -	      gcid, index, links, path);
> -	free(path);
> -
> -	/* Retrieve scom base address */
> -	scom = dt_get_address(dn, 0, NULL);
> -	prlog(PR_INFO, "   SCOM Base:  %08x\n", scom);
> -
>   	/* Reassign the BARs */
>   	assign_mmio_bars(gcid, scom, reg, mm_win);
> 
> diff --git a/include/npu2-regs.h b/include/npu2-regs.h
> index 27956f2..b2812d5 100644
> --- a/include/npu2-regs.h
> +++ b/include/npu2-regs.h
> @@ -90,6 +90,11 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
>   #define NPU2_BLOCK_DAT	0x5
>   #define NPU2_BLOCK_NTL0	0x9
>   #define NPU2_BLOCK_NTL1	0xb
> +#define NPU2_BLOCK_OTL0 0xc
> +#define NPU2_BLOCK_OTL1 0xd
> +
> +/* OpenCAPI blocks */
> +#define NPU2_BLOCK_XSL 0xe
> 
>   /* MISC stack blocks */
>   #define NPU2_BLOCK_ATS	0
> @@ -108,6 +113,8 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
> 
>   /* SM block registers */
>   #define NPU2_CQ_SM_MISC_CFG0			0x000
> +#define   NPU2_CQ_SM_MISC_CFG0_CONFIG_ENABLE_PBUS PPC_BIT(38)
> +#define   NPU2_CQ_SM_MISC_CFG0_CONFIG_OCAPI_MODE PPC_BIT(57)
>   #define NPU2_CQ_SM_MISC_CFG1			0x008
>   #define NPU2_PB_EPSILON				0x010
>   #define NPU2_TIMER_CFG				0x018
> @@ -138,6 +145,7 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
>   #define   NPU2_GENID_BAR_ADDR			PPC_BITMASK(3,35)
>   #define   NPU2_GENID_BAR_POISON			PPC_BIT(39)
>   #define NPU2_LOW_WATER_MARKS			0x040
> +#define   NPU2_LOW_WATER_MARKS_ENABLE_MACHINE_ALLOC PPC_BIT(51)
>   #define NPU2_HIGH_WATER_MARKS			0x048
>   #define NPU2_RELAXED_ORDERING_CFG0		0x050
>   #define NPU2_RELAXED_ORDERING_CFG1		0x058
> @@ -176,6 +184,9 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
> 
>   /* CTL block registers */
>   #define NPU2_CQ_CTL_MISC_CFG			0x000
> +#define   NPU2_CQ_CTL_MISC_CFG_CONFIG_OCAPI_MODE PPC_BIT(52)
> +#define   NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL0_ENABLE PPC_BIT(55)
> +#define   NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL1_ENABLE PPC_BIT(56)
>   #define NPU2_CQ_FUTURE_CFG1			0x008
>   #define NPU2_CQ_FUTURE_CFG2			0x010
>   #define NPU2_CQ_FUTURE_CFG3			0x018
> @@ -198,6 +209,8 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
>   #define NPU2_CQ_LPC_THRESHOLD_CFG		0x080
>   #define NPU2_CQ_INHIBIT_CFG			0x088
>   #define NPU2_CQ_CTL_STATUS			0x090
> +#define   NPU2_CQ_CTL_STATUS_BRK0_AM_FENCED	PPC_BITMASK(48, 49)
> +#define   NPU2_CQ_CTL_STATUS_BRK1_AM_FENCED	PPC_BITMASK(50, 51)
>   #define NPU2_CQ_C_ERR_RPT_MSG0			0x0C0
>   #define NPU2_CQ_C_ERR_RPT_MSG1			0x0C8
>   #define NPU2_CQ_C_ERR_RPT_FIRST0		0x0D0
> @@ -206,9 +219,13 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
>   #define NPU2_CQ_C_ERR_RPT_MASK1			0x0E8
>   #define NPU2_CQ_C_ERR_RPT_HOLD0			0x0F0
>   #define NPU2_CQ_C_ERR_RPT_HOLD1			0x0F8
> +#define NPU2_CQ_CTL_FENCE_CONTROL_0		0x140
> +#define NPU2_CQ_CTL_FENCE_CONTROL_1		0x148
> +#define   NPU2_CQ_CTL_FENCE_CONTROL_REQUEST_FENCE PPC_BITMASK(0, 1)
> 
>   /* DAT block registers */
>   #define NPU2_CQ_DAT_MISC_CFG			0x008
> +#define	  NPU2_CQ_DAT_MISC_CFG_CONFIG_OCAPI_MODE PPC_BIT(40)
>   #define NPU2_CQ_DAT_ECC_CFG			0x010
>   #define NPU2_CQ_DAT_SCRATCH0			0x018
>   #define NPU2_CQ_DAT_ECC_STATUS			0x020
> @@ -289,6 +306,43 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
>   #define NPU2_NTL_DL_CONFIG(ndev)		NPU2_DL_REG_OFFSET(ndev, 0xFFF8)
>   #define NPU2_NTL_DL_CLK_CTRL(ndev)		NPU2_DL_REG_OFFSET(ndev, 0x001C)
> 
> +/* OpenCAPI - XSL registers */
> +#define NPU2_XSL_PSL_SCNTL_A0			0x010
> +#define   NPU2_XSL_PSL_SCNTL_A0_MULTI_AFU_DIAL	PPC_BIT(0)
> +#define NPU2_XSL_DEF				0x040
> +#define NPU2_XSL_GP				0x058
> +#define   NPU2_XSL_GP_BLOOM_FILTER_ENABLE	PPC_BIT(16)
> +#define NPU2_XSL_WRAP_CFG			0x0C0
> +#define   NPU2_XSL_WRAP_CFG_XSLO_CLOCK_ENABLE	PPC_BIT(0)
> +
> +/* OpenCAPI - OTL registers */
> +#define NPU2_OTL_CONFIG0(stack, block)		NPU2_REG_OFFSET(stack, block, 0x000)
> +#define   NPU2_OTL_CONFIG0_EN			PPC_BIT(0)
> +#define   NPU2_OTL_CONFIG0_BLOCK_PE_HANDLE	PPC_BIT(1)
> +#define   NPU2_OTL_CONFIG0_BRICKID		PPC_BITMASK(2, 3)
> +#define   NPU2_OTL_CONFIG0_PE_MASK		PPC_BITMASK(4, 7)
> +#define   NPU2_OTL_CONFIG0_ERAT_HASH_0		PPC_BITMASK(8, 13)
> +#define   NPU2_OTL_CONFIG0_ERAT_HASH_1		PPC_BITMASK(14, 19)
> +#define   NPU2_OTL_CONFIG0_ERAT_HASH_2		PPC_BITMASK(20, 25)
> +#define   NPU2_OTL_CONFIG0_ERAT_HASH_3		PPC_BITMASK(26, 31)
> +#define   NPU2_OTL_CONFIG0_BLOCK_TID_OVERRIDE	PPC_BIT(50)
> +#define NPU2_OTL_TLX_CREDITS(stack, block)	NPU2_REG_OFFSET(stack, block, 0x050)
> +#define   NPU2_OTL_TLX_CREDITS_VC0_CREDITS	PPC_BITMASK(0, 7)
> +#define   NPU2_OTL_TLX_CREDITS_VC3_CREDITS	PPC_BITMASK(24, 31)
> +#define   NPU2_OTL_TLX_CREDITS_DCP0_CREDITS	PPC_BITMASK(32, 39)
> +#define   NPU2_OTL_TLX_CREDITS_DCP1_CREDITS	PPC_BITMASK(56, 63)
> +#define NPU2_OTL_CONFIG1(stack, block)		NPU2_REG_OFFSET(stack, block, 0x058)
> +#define   NPU2_OTL_CONFIG1_TX_DRDY_WAIT		PPC_BITMASK(5, 7)
> +#define   NPU2_OTL_CONFIG1_TX_TEMP0_RATE	PPC_BITMASK(8, 11)
> +#define   NPU2_OTL_CONFIG1_TX_CRET_FREQ 	PPC_BITMASK(32, 34)
> +#define   NPU2_OTL_CONFIG1_TX_AGE_FREQ		PPC_BITMASK(35, 39)
> +#define   NPU2_OTL_CONFIG1_TX_RS2_HPWAIT	PPC_BITMASK(40, 45)
> +#define   NPU2_OTL_CONFIG1_TX_RQ4_HPWAIT	PPC_BITMASK(46, 51)
> +#define   NPU2_OTL_CONFIG1_TX_RQ6_HPWAIT	PPC_BITMASK(52, 57)
> +#define   NPU2_OTL_CONFIG1_TX_CBUF_ECC_DIS	PPC_BIT(58)
> +#define   NPU2_OTL_CONFIG1_TX_STOP_LINK		PPC_BIT(59)
> +#define   NPU2_OTL_CONFIG1_TX_STOP_ON_UE	PPC_BIT(60)
> +
>   /* Misc block registers. Unlike the SM/CTL/DAT/NTL registers above
>    * there is only a single instance of each of these in the NPU so we
>    * define them as absolute offsets. */
> @@ -296,6 +350,9 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
>   #define NPU2_MISC_RELAXTED_CFG			NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x008)
>   #define NPU2_MISC_LCO_CFG			NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x010)
>   #define NPU2_MISC_OPTICAL_IO_CFG0		NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x018)
> +#define   NPU2_MISC_OPTICAL_IO_CFG0_NDLMUX_BRK0TO2 PPC_BITMASK(0, 2)
> +#define   NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK0TO1 PPC_BITMASK(3, 4)
> +#define   NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK4TO5 PPC_BITMASK(5, 6)
>   #define NPU2_MISC_ERR_RPT_HOLD			NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x020)
>   #define NPU2_MISC_ERR_RPT_MASK			NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x028)
>   #define NPU2_MISC_CFG				NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x030)
> @@ -320,6 +377,7 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
>   #define NPU2_MISC_ERR_CTL_CFG			NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x088)
>   #define NPU2_MISC_RING_ACK_ERR			NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x090)
>   #define NPU2_MISC_IRQ_BASE			NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x098)
> +#define   NPU2_MISC_IRQ_BASE_MASK		PPC_BITMASK(13, 51)
>   #define NPU2_MISC_ERR_BRICK_GROUP		NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x0A0)
>   #define NPU2_MISC_FREEZE_STATE			NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x0A8)
>   #define NPU2_MISC_FENCE_STATE			NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x0B0)
> @@ -377,6 +435,8 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
>   #define NPU2_MISC_IRQ_LOG13			NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x368)
>   #define NPU2_MISC_IRQ_LOG14			NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x370)
>   #define NPU2_MISC_IRQ_LOG15			NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x378)
> +#define NPU2_MISC_IRQ_ON_ERROR_EN_FIR2		NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x408)
> +
> 
>   /* ATS block registers */
>   #define NPU2_ATS_PMU_CTL			NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_ATS, 0x000)
> @@ -421,8 +481,11 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
>   #define NPU2_XTS_CFG				NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_XTS, 0x020)
>   #define   NPU2_XTS_CFG_MMIOSD			PPC_BIT(1)
>   #define   NPU2_XTS_CFG_TRY_ATR_RO		PPC_BIT(6)
> +#define   NPU2_XTS_CFG_OPENCAPI			PPC_BIT(15)
>   #define NPU2_XTS_CFG2				NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_XTS, 0x028)
>   #define   NPU2_XTS_CFG2_NO_FLUSH_ENA		PPC_BIT(49)
> +#define   NPU2_XTS_CFG2_XSL1_ENA		PPC_BIT(54)
> +#define   NPU2_XTS_CFG2_XSL2_ENA		PPC_BIT(55)
>   #define NPU2_XTS_DBG_CFG0			NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_XTS, 0x030)
>   #define NPU2_XTS_DBG_CFG1			NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_XTS, 0x038)
>   #define NPU2_XTS_PMU_CNT			NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_XTS, 0x040)
> @@ -478,4 +541,29 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
>   #define NPU2_DD1_MISC_SCOM_IND_SCOM_DATA	0x38f
>   #define NPU2_MISC_SCOM_IND_SCOM_DATA		0x68f
> 
> +
> +/* OpenCAPI - PowerBus and OBus configuration SCOM addresses */
> +#define PU_IOE_PB_MISC_CFG			0x5013823
> +#define   PU_IOE_PB_MISC_CFG_SEL_03_NPU_NOT_PB	PPC_BIT(13)
> +#define   PU_IOE_PB_MISC_CFG_SEL_04_NPU_NOT_PB	PPC_BIT(14)
> +#define   PU_IOE_PB_MISC_CFG_SEL_05_NPU_NOT_PB	PPC_BIT(15)
> +#define OBUS_LL0_IOOL_PHY_CONFIG		0x901080C
> +#define OBUS_LL3_IOOL_PHY_CONFIG		0xC01080C
> +#define   OBUS_IOOL_PHY_CONFIG_ODL0_ENABLED	PPC_BIT(52)
> +#define   OBUS_IOOL_PHY_CONFIG_ODL1_ENABLED	PPC_BIT(53)
> +#define   OBUS_IOOL_PHY_CONFIG_ODL_PHY_SWAP	PPC_BIT(54)
> +#define   OBUS_IOOL_PHY_CONFIG_LINK0_OLL_ENABLED PPC_BIT(58)
> +#define   OBUS_IOOL_PHY_CONFIG_LINK1_OLL_ENABLED PPC_BIT(59)
> +#define   OBUS_IOOL_PHY_CONFIG_NV0_NPU_ENABLED	PPC_BIT(61)
> +#define   OBUS_IOOL_PHY_CONFIG_NV1_NPU_ENABLED	PPC_BIT(62)
> +#define   OBUS_IOOL_PHY_CONFIG_NV2_NPU_ENABLED	PPC_BIT(63)
> +#define PU_IOE_PB_FP01_CFG			0x501380A
> +#define PU_IOE_PB_FP23_CFG			0x501380B
> +#define PU_IOE_PB_FP45_CFG			0x501380C
> +#define PU_IOE_PB_FP67_CFG			0x501380D
> +#define    PU_IOE_PB_FP_CFG_FP0_FMR_DISABLE	PPC_BIT(20)
> +#define    PU_IOE_PB_FP_CFG_FP0_PRS_DISABLE	PPC_BIT(25)
> +#define    PU_IOE_PB_FP_CFG_FP1_FMR_DISABLE	PPC_BIT(52)
> +#define    PU_IOE_PB_FP_CFG_FP1_PRS_DISABLE	PPC_BIT(57)
> +
>   #endif /* __NPU2_REGS_H */
> diff --git a/include/npu2.h b/include/npu2.h
> index e19fab6..b28f91b 100644
> --- a/include/npu2.h
> +++ b/include/npu2.h
> @@ -134,6 +134,7 @@ struct npu2_dev {
> 
>   struct npu2 {
>   	uint32_t	index;
> +	struct dt_node	*dt_node;
>   	uint32_t	flags;
>   	uint32_t	chip_id;
>   	uint64_t	xscom_base;
> @@ -142,6 +143,7 @@ struct npu2 {
>   	uint64_t	mm_base;
>   	uint64_t	mm_size;
>   	uint32_t	base_lsi;
> +	uint32_t	irq_base;
>   	uint32_t	total_devices;
>   	struct npu2_dev	*devices;
>   	enum phys_map_type gpu_map_type;
> diff --git a/include/skiboot.h b/include/skiboot.h
> index db91325..6b14c82 100644
> --- a/include/skiboot.h
> +++ b/include/skiboot.h
> @@ -230,6 +230,7 @@ extern int preload_capp_ucode(void);
>   extern void preload_io_vpd(void);
>   extern void probe_npu(void);
>   extern void probe_npu2(void);
> +extern void probe_npu2_opencapi(void);
>   extern void uart_init(void);
>   extern void mbox_init(void);
>   extern void early_uart_init(void);
> 



More information about the Skiboot mailing list