[Skiboot] [PATCH 14/16] [PATCH 14/16] opencapi5: add opal functions

Frederic Barrat fbarrat at linux.ibm.com
Wed Sep 8 23:20:16 AEST 2021



On 20/08/2021 11:45, Christophe Lombard wrote:
> Add three OPAL API calls that are required by the ocxl driver.
> 
>    - OPAL_PAU_SPA_SETUP
> 
>      The Shared Process Area (SPA) is a table containing one entry (a
>      "Process Element") per memory context which can be accessed by the
>      OpenCAPI device.
> 
>    - OPAL_PAU_SPA_CLEAR_CACHE
> 
>      The PAU keeps a cache of recently accessed memory contexts. When a
>      Process Element is removed from the SPA, the cache for the link must
>      be cleared.
> 
>    - OPAL_PAU_TL_SET
> 
>      The Transaction Layer specification defines several templates for
>      messages to be exchanged on the link. During link setup, the host
>      and device must negotiate what templates are supported on both sides
>      and at what rates those messages can be sent.
> 
> Signed-off-by: Christophe Lombard <clombard at linux.vnet.ibm.com>
> ---


Reviewed-by: Frederic Barrat <fbarrat at linux.ibm.com>


>   hw/npu-opal.c      |   8 +++
>   hw/pau.c           | 159 +++++++++++++++++++++++++++++++++++++++++++++
>   include/pau-regs.h |  13 ++++
>   include/pau.h      |   9 +++
>   4 files changed, 189 insertions(+)
> 
> diff --git a/hw/npu-opal.c b/hw/npu-opal.c
> index 64e36852..4fc4c662 100644
> --- a/hw/npu-opal.c
> +++ b/hw/npu-opal.c
> @@ -8,6 +8,7 @@
>   #include <phb4.h>
>   #include <npu2.h>
>   #include <npu3.h>
> +#include <pau.h>
> 
>   static int64_t opal_npu_init_context(uint64_t phb_id, int pid __unused,
>   				     uint64_t msr, uint64_t bdf)
> @@ -195,6 +196,8 @@ static int64_t opal_npu_spa_setup(uint64_t phb_id, uint32_t bdfn,
> 
>   	if (phb->phb_type == phb_type_npu_v2_opencapi)
>   		rc = npu2_opencapi_spa_setup(phb, bdfn, addr, PE_mask);
> +	else if (phb->phb_type == phb_type_pau_opencapi)
> +		rc = pau_opencapi_spa_setup(phb, bdfn, addr, PE_mask);
>   	else
>   		return OPAL_PARAMETER;
> 
> @@ -216,6 +219,8 @@ static int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t bdfn,
> 
>   	if (phb->phb_type == phb_type_npu_v2_opencapi)
>   		rc = npu2_opencapi_spa_clear_cache(phb, bdfn, PE_handle);
> +	else if (phb->phb_type == phb_type_pau_opencapi)
> +		rc = pau_opencapi_spa_clear_cache(phb, bdfn, PE_handle);
>   	else
>   		return OPAL_PARAMETER;
> 
> @@ -235,6 +240,9 @@ static int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn,
>   	if (phb->phb_type == phb_type_npu_v2_opencapi)
>   		rc = npu2_opencapi_tl_set(phb, bdfn, capabilities,
>   					  rate_phys, rate_sz);
> +	else if (phb->phb_type == phb_type_pau_opencapi)
> +		rc = pau_opencapi_tl_set(phb, bdfn, capabilities,
> +					 rate_phys, rate_sz);
>   	else
>   		return OPAL_PARAMETER;
> 
> diff --git a/hw/pau.c b/hw/pau.c
> index 63655118..33d33c65 100644
> --- a/hw/pau.c
> +++ b/hw/pau.c
> @@ -15,6 +15,9 @@
>   #define PAU_MAX_PE_NUM		16
>   #define PAU_RESERVED_PE_NUM	15
> 
> +#define PAU_TL_MAX_TEMPLATE	63
> +#define PAU_TL_RATE_BUF_SIZE	32
> +
>   #define PAU_SLOT_NORMAL			PCI_SLOT_STATE_NORMAL
>   #define PAU_SLOT_LINK			PCI_SLOT_STATE_LINK
>   #define   PAU_SLOT_LINK_START			(PAU_SLOT_LINK + 1)
> @@ -271,6 +274,162 @@ static void pau_device_detect_fixup(struct pau_dev *dev)
>   	dt_add_property_strings(dn, "ibm,pau-link-type", "unknown");
>   }
> 
> +int64_t pau_opencapi_spa_setup(struct phb *phb, uint32_t __unused bdfn,
> +			       uint64_t addr, uint64_t PE_mask)
> +{
> +	struct pau_dev *dev = pau_phb_to_opencapi_dev(phb);
> +	struct pau *pau = dev->pau;
> +	uint64_t reg, val;
> +	int64_t rc;
> +
> +	lock(&pau->lock);
> +
> +	reg = PAU_XSL_OSL_SPAP_AN(dev->index);
> +	val = pau_read(pau, reg);
> +	if ((addr && (val & PAU_XSL_OSL_SPAP_AN_EN)) ||
> +		(!addr && !(val & PAU_XSL_OSL_SPAP_AN_EN))) {
> +		rc = OPAL_BUSY;
> +		goto out;
> +	}
> +
> +	/* SPA is disabled by passing a NULL address */
> +	val = addr;
> +	if (addr)
> +		val = addr | PAU_XSL_OSL_SPAP_AN_EN;
> +	pau_write(pau, reg, val);
> +
> +	/*
> +	 * set the PE mask that the OS uses for PASID -> PE handle
> +	 * conversion
> +	 */
> +	reg = PAU_OTL_MISC_CFG0(dev->index);
> +	val = pau_read(pau, reg);
> +	val = SETFIELD(PAU_OTL_MISC_CFG0_PE_MASK, val, PE_mask);
> +	pau_write(pau, reg, val);
> +	rc = OPAL_SUCCESS;
> +out:
> +	unlock(&pau->lock);
> +	return rc;
> +}
> +
> +int64_t pau_opencapi_spa_clear_cache(struct phb *phb,
> +				     uint32_t __unused bdfn,
> +				     uint64_t PE_handle)
> +{
> +	struct pau_dev *dev = pau_phb_to_opencapi_dev(phb);
> +	struct pau *pau = dev->pau;
> +	uint64_t reg, val;
> +	int64_t rc, retries = 5;
> +
> +	lock(&pau->lock);
> +
> +	reg = PAU_XSL_OSL_CCINV;
> +	val = pau_read(pau, reg);
> +	if (val & PAU_XSL_OSL_CCINV_PENDING) {
> +		rc = OPAL_BUSY;
> +		goto out;
> +	}
> +
> +	val = PAU_XSL_OSL_CCINV_REMOVE;
> +	val |= SETFIELD(PAU_XSL_OSL_CCINV_PE_HANDLE, val, PE_handle);
> +	if (dev->index)
> +		val |= PAU_XSL_OSL_CCINV_BRICK;
> +	pau_write(pau, reg, val);
> +
> +	rc = OPAL_HARDWARE;
> +	while (retries--) {
> +		val = pau_read(pau, reg);
> +		if (!(val & PAU_XSL_OSL_CCINV_PENDING)) {
> +			rc = OPAL_SUCCESS;
> +			break;
> +		}
> +		/* the bit expected to flip in less than 200us */
> +		time_wait_us(200);
> +	}
> +out:
> +	unlock(&pau->lock);
> +	return rc;
> +}
> +
> +static int pau_opencapi_get_templ_rate(unsigned int templ,
> +				       char *rate_buf)
> +{
> +	int shift, idx, val;
> +
> +	/*
> +	 * Each rate is encoded over 4 bits (0->15), with 15 being the
> +	 * slowest. The buffer is a succession of rates for all the
> +	 * templates. The first 4 bits are for template 63, followed
> +	 * by 4 bits for template 62, ... etc. So the rate for
> +	 * template 0 is at the very end of the buffer.
> +	 */
> +	idx = (PAU_TL_MAX_TEMPLATE - templ) / 2;
> +	shift = 4 * (1 - ((PAU_TL_MAX_TEMPLATE - templ) % 2));
> +	val = rate_buf[idx] >> shift;
> +	return val;
> +}
> +
> +static bool pau_opencapi_is_templ_supported(unsigned int templ,
> +					    long capabilities)
> +{
> +	return !!(capabilities & (1ull << templ));
> +}
> +
> +int64_t pau_opencapi_tl_set(struct phb *phb, uint32_t __unused bdfn,
> +			    long capabilities, uint64_t rate_phys,
> +			    int rate_sz)
> +{
> +	struct pau_dev *dev = pau_phb_to_opencapi_dev(phb);
> +	struct pau *pau;
> +	char *rate = (char *) rate_phys;
> +	uint64_t reg, val, templ_rate;
> +	int i, rate_pos;
> +
> +	if (!dev)
> +		return OPAL_PARAMETER;
> +	pau = dev->pau;
> +
> +	if (!opal_addr_valid(rate) || rate_sz != PAU_TL_RATE_BUF_SIZE)
> +		return OPAL_PARAMETER;
> +
> +	/* The 'capabilities' argument defines what TL template the
> +	 * device can receive. OpenCAPI 5.0 defines 64 templates, so
> +	 * that's one bit per template.
> +	 *
> +	 * For each template, the device processing time may vary, so
> +	 * the device advertises at what rate a message of a given
> +	 * template can be sent. That's encoded in the 'rate' buffer.
> +	 *
> +	 * On P10, PAU only knows about TL templates 0 -> 3.
> +	 * Per the spec, template 0 must be supported.
> +	 */
> +	if (!pau_opencapi_is_templ_supported(0, capabilities))
> +		return OPAL_PARAMETER;
> +
> +	reg = PAU_OTL_MISC_CFG_TX(dev->index);
> +	val = pau_read(pau, reg);
> +	val &= ~PAU_OTL_MISC_CFG_TX_TEMP1_EN;
> +	val &= ~PAU_OTL_MISC_CFG_TX_TEMP2_EN;
> +	val &= ~PAU_OTL_MISC_CFG_TX_TEMP3_EN;
> +
> +	for (i = 0; i < 4; i++) {
> +		/* Skip template 0 as it is implicitly enabled.
> +		 * Enable other template If supported by AFU
> +		 */
> +		if (i && pau_opencapi_is_templ_supported(i, capabilities))
> +			val |= PAU_OTL_MISC_CFG_TX_TEMP_EN(i);
> +		/* The tx rate should still be set for template 0 */
> +		templ_rate = pau_opencapi_get_templ_rate(i, rate);
> +		rate_pos = 8 + i * 4;
> +		val = SETFIELD(PAU_OTL_MISC_CFG_TX_TEMP_RATE(rate_pos, rate_pos + 3),
> +			       val, templ_rate);
> +	}
> +	pau_write(pau, reg, val);
> +	PAUDEVDBG(dev, "OTL configuration register set to %llx\n", val);
> +
> +	return OPAL_SUCCESS;
> +}
> +
>   #define CQ_CTL_STATUS_TIMEOUT  10 /* milliseconds */
> 
>   static int pau_opencapi_set_fence_control(struct pau_dev *dev,
> diff --git a/include/pau-regs.h b/include/pau-regs.h
> index 7a5aaa5f..57c2d723 100644
> --- a/include/pau-regs.h
> +++ b/include/pau-regs.h
> @@ -118,6 +118,7 @@
>   #define   PAU_OTL_MISC_CFG0_EN			PPC_BIT(0)
>   #define   PAU_OTL_MISC_CFG0_BLOCK_PE_HANDLE	PPC_BIT(1)
>   #define   PAU_OTL_MISC_CFG0_BRICKID		PPC_BITMASK(2, 3)
> +#define   PAU_OTL_MISC_CFG0_PE_MASK		PPC_BITMASK(4, 7)
>   #define   PAU_OTL_MISC_CFG0_ENABLE_4_0		PPC_BIT(51)
>   #define   PAU_OTL_MISC_CFG0_XLATE_RELEASE	PPC_BIT(62)
>   #define   PAU_OTL_MISC_CFG0_ENABLE_5_0		PPC_BIT(63)
> @@ -132,11 +133,16 @@
>   #define   PAU_OTL_MISC_CFG_TLX_CREDITS_DCP2	PPC_BITMASK(48, 55)
>   #define   PAU_OTL_MISC_CFG_TLX_CREDITS_DCP3	PPC_BITMASK(56, 63)
>   #define PAU_OTL_MISC_CFG_TX(brk)		(PAU_BLOCK_OTL(brk) + 0x058)
> +#define   PAU_OTL_MISC_CFG_TX_TEMP1_EN		PPC_BIT(1)
> +#define   PAU_OTL_MISC_CFG_TX_TEMP2_EN		PPC_BIT(2)
> +#define   PAU_OTL_MISC_CFG_TX_TEMP3_EN		PPC_BIT(3)
> +#define   PAU_OTL_MISC_CFG_TX_TEMP_EN(n)	PPC_BIT(n)
>   #define   PAU_OTL_MISC_CFG_TX_DRDY_WAIT		PPC_BITMASK(5, 7)
>   #define   PAU_OTL_MISC_CFG_TX_TEMP0_RATE	PPC_BITMASK(8, 11)
>   #define   PAU_OTL_MISC_CFG_TX_TEMP1_RATE	PPC_BITMASK(12, 15)
>   #define   PAU_OTL_MISC_CFG_TX_TEMP2_RATE	PPC_BITMASK(16, 19)
>   #define   PAU_OTL_MISC_CFG_TX_TEMP3_RATE	PPC_BITMASK(20, 23)
> +#define   PAU_OTL_MISC_CFG_TX_TEMP_RATE(nib0, nib1)	PPC_BITMASK(nib0, nib1)
>   #define   PAU_OTL_MISC_CFG_TX_CRET_FREQ		PPC_BITMASK(32, 34)
>   #define PAU_OTL_MISC_OTL_REM0(brk)		(PAU_BLOCK_OTL(brk) + 0x068)
>   #define PAU_OTL_MISC_ERROR_SIG_RXI(brk)		(PAU_BLOCK_OTL(brk) + 0x070)
> @@ -150,11 +156,18 @@
>   #define PAU_OTL_MISC_PSL_PEHANDLE_AN(brk)	(PAU_BLOCK_OTL_PSL(brk) + 0x018)
> 
>   /* XSL block registers */
> +#define PAU_XSL_OSL_SPAP_AN(brk)		(PAU_BLOCK_XSL + 0x000 + (brk) * 8)
> +#define   PAU_XSL_OSL_SPAP_AN_EN		PPC_BIT(63)
>   #define PAU_XSL_WRAP_CFG			(PAU_BLOCK_XSL + 0x100)
>   #define   PAU_XSL_WRAP_CFG_CLOCK_ENABLE		PPC_BIT(0)
>   #define PAU_XSL_OSL_XLATE_CFG(brk)		(PAU_BLOCK_XSL + 0x040 + (brk) * 8)
>   #define   PAU_XSL_OSL_XLATE_CFG_AFU_DIAL	PPC_BIT(0)
>   #define   PAU_XSL_OSL_XLATE_CFG_OPENCAPI3	PPC_BIT(32)
> +#define PAU_XSL_OSL_CCINV			(PAU_BLOCK_XSL + 0x070)
> +#define   PAU_XSL_OSL_CCINV_REMOVE		PPC_BIT(15)
> +#define   PAU_XSL_OSL_CCINV_PENDING		PPC_BIT(16)
> +#define   PAU_XSL_OSL_CCINV_BRICK		PPC_BIT(47)
> +#define   PAU_XSL_OSL_CCINV_PE_HANDLE		PPC_BITMASK(48, 62)
> 
>   /* XTS block registers */
>   #define PAU_XTS_CFG				(PAU_BLOCK_PAU_XTS + 0x020)
> diff --git a/include/pau.h b/include/pau.h
> index 8b978bd6..61b17925 100644
> --- a/include/pau.h
> +++ b/include/pau.h
> @@ -200,6 +200,15 @@ static inline uint64_t pau_read(struct pau *pau, uint64_t reg)
>   }
> 
>   void pau_opencapi_dump_scoms(struct phb *phb);
> +int64_t pau_opencapi_spa_setup(struct phb *phb, uint32_t __unused bdfn,
> +			       uint64_t addr, uint64_t PE_mask);
> +int64_t pau_opencapi_spa_clear_cache(struct phb *phb,
> +				     uint32_t __unused bdfn,
> +				     uint64_t PE_handle);
> +int64_t pau_opencapi_tl_set(struct phb *phb, uint32_t __unused bdfn,
> +			    long capabilities, uint64_t rate_phys,
> +			    int rate_sz);
> +
> 
>   /* PHY */
>   int pau_dev_phy_reset(struct pau_dev *dev);
> 


More information about the Skiboot mailing list