[Skiboot] [PATCH 14/16] [PATCH 14/16] opencapi5: add opal functions
Frederic Barrat
fbarrat at linux.ibm.com
Wed Sep 8 23:20:16 AEST 2021
On 20/08/2021 11:45, Christophe Lombard wrote:
> Add three OPAL API calls that are required by the ocxl driver.
>
> - OPAL_PAU_SPA_SETUP
>
> The Shared Process Area (SPA) is a table containing one entry (a
> "Process Element") per memory context which can be accessed by the
> OpenCAPI device.
>
> - OPAL_PAU_SPA_CLEAR_CACHE
>
> The PAU keeps a cache of recently accessed memory contexts. When a
> Process Element is removed from the SPA, the cache for the link must
> be cleared.
>
> - OPAL_PAU_TL_SET
>
> The Transaction Layer specification defines several templates for
> messages to be exchanged on the link. During link setup, the host
> and device must negotiate what templates are supported on both sides
> and at what rates those messages can be sent.
>
> Signed-off-by: Christophe Lombard <clombard at linux.vnet.ibm.com>
> ---
Reviewed-by: Frederic Barrat <fbarrat at linux.ibm.com>
> hw/npu-opal.c | 8 +++
> hw/pau.c | 159 +++++++++++++++++++++++++++++++++++++++++++++
> include/pau-regs.h | 13 ++++
> include/pau.h | 9 +++
> 4 files changed, 189 insertions(+)
>
> diff --git a/hw/npu-opal.c b/hw/npu-opal.c
> index 64e36852..4fc4c662 100644
> --- a/hw/npu-opal.c
> +++ b/hw/npu-opal.c
> @@ -8,6 +8,7 @@
> #include <phb4.h>
> #include <npu2.h>
> #include <npu3.h>
> +#include <pau.h>
>
> static int64_t opal_npu_init_context(uint64_t phb_id, int pid __unused,
> uint64_t msr, uint64_t bdf)
> @@ -195,6 +196,8 @@ static int64_t opal_npu_spa_setup(uint64_t phb_id, uint32_t bdfn,
>
> if (phb->phb_type == phb_type_npu_v2_opencapi)
> rc = npu2_opencapi_spa_setup(phb, bdfn, addr, PE_mask);
> + else if (phb->phb_type == phb_type_pau_opencapi)
> + rc = pau_opencapi_spa_setup(phb, bdfn, addr, PE_mask);
> else
> return OPAL_PARAMETER;
>
> @@ -216,6 +219,8 @@ static int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t bdfn,
>
> if (phb->phb_type == phb_type_npu_v2_opencapi)
> rc = npu2_opencapi_spa_clear_cache(phb, bdfn, PE_handle);
> + else if (phb->phb_type == phb_type_pau_opencapi)
> + rc = pau_opencapi_spa_clear_cache(phb, bdfn, PE_handle);
> else
> return OPAL_PARAMETER;
>
> @@ -235,6 +240,9 @@ static int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn,
> if (phb->phb_type == phb_type_npu_v2_opencapi)
> rc = npu2_opencapi_tl_set(phb, bdfn, capabilities,
> rate_phys, rate_sz);
> + else if (phb->phb_type == phb_type_pau_opencapi)
> + rc = pau_opencapi_tl_set(phb, bdfn, capabilities,
> + rate_phys, rate_sz);
> else
> return OPAL_PARAMETER;
>
> diff --git a/hw/pau.c b/hw/pau.c
> index 63655118..33d33c65 100644
> --- a/hw/pau.c
> +++ b/hw/pau.c
> @@ -15,6 +15,9 @@
> #define PAU_MAX_PE_NUM 16
> #define PAU_RESERVED_PE_NUM 15
>
> +#define PAU_TL_MAX_TEMPLATE 63
> +#define PAU_TL_RATE_BUF_SIZE 32
> +
> #define PAU_SLOT_NORMAL PCI_SLOT_STATE_NORMAL
> #define PAU_SLOT_LINK PCI_SLOT_STATE_LINK
> #define PAU_SLOT_LINK_START (PAU_SLOT_LINK + 1)
> @@ -271,6 +274,162 @@ static void pau_device_detect_fixup(struct pau_dev *dev)
> dt_add_property_strings(dn, "ibm,pau-link-type", "unknown");
> }
>
> +int64_t pau_opencapi_spa_setup(struct phb *phb, uint32_t __unused bdfn,
> + uint64_t addr, uint64_t PE_mask)
> +{
> + struct pau_dev *dev = pau_phb_to_opencapi_dev(phb);
> + struct pau *pau = dev->pau;
> + uint64_t reg, val;
> + int64_t rc;
> +
> + lock(&pau->lock);
> +
> + reg = PAU_XSL_OSL_SPAP_AN(dev->index);
> + val = pau_read(pau, reg);
> + if ((addr && (val & PAU_XSL_OSL_SPAP_AN_EN)) ||
> + (!addr && !(val & PAU_XSL_OSL_SPAP_AN_EN))) {
> + rc = OPAL_BUSY;
> + goto out;
> + }
> +
> + /* SPA is disabled by passing a NULL address */
> + val = addr;
> + if (addr)
> + val = addr | PAU_XSL_OSL_SPAP_AN_EN;
> + pau_write(pau, reg, val);
> +
> + /*
> + * set the PE mask that the OS uses for PASID -> PE handle
> + * conversion
> + */
> + reg = PAU_OTL_MISC_CFG0(dev->index);
> + val = pau_read(pau, reg);
> + val = SETFIELD(PAU_OTL_MISC_CFG0_PE_MASK, val, PE_mask);
> + pau_write(pau, reg, val);
> + rc = OPAL_SUCCESS;
> +out:
> + unlock(&pau->lock);
> + return rc;
> +}
> +
> +int64_t pau_opencapi_spa_clear_cache(struct phb *phb,
> + uint32_t __unused bdfn,
> + uint64_t PE_handle)
> +{
> + struct pau_dev *dev = pau_phb_to_opencapi_dev(phb);
> + struct pau *pau = dev->pau;
> + uint64_t reg, val;
> + int64_t rc, retries = 5;
> +
> + lock(&pau->lock);
> +
> + reg = PAU_XSL_OSL_CCINV;
> + val = pau_read(pau, reg);
> + if (val & PAU_XSL_OSL_CCINV_PENDING) {
> + rc = OPAL_BUSY;
> + goto out;
> + }
> +
> + val = PAU_XSL_OSL_CCINV_REMOVE;
> + val |= SETFIELD(PAU_XSL_OSL_CCINV_PE_HANDLE, val, PE_handle);
> + if (dev->index)
> + val |= PAU_XSL_OSL_CCINV_BRICK;
> + pau_write(pau, reg, val);
> +
> + rc = OPAL_HARDWARE;
> + while (retries--) {
> + val = pau_read(pau, reg);
> + if (!(val & PAU_XSL_OSL_CCINV_PENDING)) {
> + rc = OPAL_SUCCESS;
> + break;
> + }
> + /* the bit expected to flip in less than 200us */
> + time_wait_us(200);
> + }
> +out:
> + unlock(&pau->lock);
> + return rc;
> +}
> +
> +static int pau_opencapi_get_templ_rate(unsigned int templ,
> + char *rate_buf)
> +{
> + int shift, idx, val;
> +
> + /*
> + * Each rate is encoded over 4 bits (0->15), with 15 being the
> + * slowest. The buffer is a succession of rates for all the
> + * templates. The first 4 bits are for template 63, followed
> + * by 4 bits for template 62, ... etc. So the rate for
> + * template 0 is at the very end of the buffer.
> + */
> + idx = (PAU_TL_MAX_TEMPLATE - templ) / 2;
> + shift = 4 * (1 - ((PAU_TL_MAX_TEMPLATE - templ) % 2));
> + val = rate_buf[idx] >> shift;
> + return val;
> +}
> +
> +static bool pau_opencapi_is_templ_supported(unsigned int templ,
> + long capabilities)
> +{
> + return !!(capabilities & (1ull << templ));
> +}
> +
> +int64_t pau_opencapi_tl_set(struct phb *phb, uint32_t __unused bdfn,
> + long capabilities, uint64_t rate_phys,
> + int rate_sz)
> +{
> + struct pau_dev *dev = pau_phb_to_opencapi_dev(phb);
> + struct pau *pau;
> + char *rate = (char *) rate_phys;
> + uint64_t reg, val, templ_rate;
> + int i, rate_pos;
> +
> + if (!dev)
> + return OPAL_PARAMETER;
> + pau = dev->pau;
> +
> + if (!opal_addr_valid(rate) || rate_sz != PAU_TL_RATE_BUF_SIZE)
> + return OPAL_PARAMETER;
> +
> + /* The 'capabilities' argument defines what TL template the
> + * device can receive. OpenCAPI 5.0 defines 64 templates, so
> + * that's one bit per template.
> + *
> + * For each template, the device processing time may vary, so
> + * the device advertises at what rate a message of a given
> + * template can be sent. That's encoded in the 'rate' buffer.
> + *
> + * On P10, PAU only knows about TL templates 0 -> 3.
> + * Per the spec, template 0 must be supported.
> + */
> + if (!pau_opencapi_is_templ_supported(0, capabilities))
> + return OPAL_PARAMETER;
> +
> + reg = PAU_OTL_MISC_CFG_TX(dev->index);
> + val = pau_read(pau, reg);
> + val &= ~PAU_OTL_MISC_CFG_TX_TEMP1_EN;
> + val &= ~PAU_OTL_MISC_CFG_TX_TEMP2_EN;
> + val &= ~PAU_OTL_MISC_CFG_TX_TEMP3_EN;
> +
> + for (i = 0; i < 4; i++) {
> + /* Skip template 0 as it is implicitly enabled.
> + * Enable other template If supported by AFU
> + */
> + if (i && pau_opencapi_is_templ_supported(i, capabilities))
> + val |= PAU_OTL_MISC_CFG_TX_TEMP_EN(i);
> + /* The tx rate should still be set for template 0 */
> + templ_rate = pau_opencapi_get_templ_rate(i, rate);
> + rate_pos = 8 + i * 4;
> + val = SETFIELD(PAU_OTL_MISC_CFG_TX_TEMP_RATE(rate_pos, rate_pos + 3),
> + val, templ_rate);
> + }
> + pau_write(pau, reg, val);
> + PAUDEVDBG(dev, "OTL configuration register set to %llx\n", val);
> +
> + return OPAL_SUCCESS;
> +}
> +
> #define CQ_CTL_STATUS_TIMEOUT 10 /* milliseconds */
>
> static int pau_opencapi_set_fence_control(struct pau_dev *dev,
> diff --git a/include/pau-regs.h b/include/pau-regs.h
> index 7a5aaa5f..57c2d723 100644
> --- a/include/pau-regs.h
> +++ b/include/pau-regs.h
> @@ -118,6 +118,7 @@
> #define PAU_OTL_MISC_CFG0_EN PPC_BIT(0)
> #define PAU_OTL_MISC_CFG0_BLOCK_PE_HANDLE PPC_BIT(1)
> #define PAU_OTL_MISC_CFG0_BRICKID PPC_BITMASK(2, 3)
> +#define PAU_OTL_MISC_CFG0_PE_MASK PPC_BITMASK(4, 7)
> #define PAU_OTL_MISC_CFG0_ENABLE_4_0 PPC_BIT(51)
> #define PAU_OTL_MISC_CFG0_XLATE_RELEASE PPC_BIT(62)
> #define PAU_OTL_MISC_CFG0_ENABLE_5_0 PPC_BIT(63)
> @@ -132,11 +133,16 @@
> #define PAU_OTL_MISC_CFG_TLX_CREDITS_DCP2 PPC_BITMASK(48, 55)
> #define PAU_OTL_MISC_CFG_TLX_CREDITS_DCP3 PPC_BITMASK(56, 63)
> #define PAU_OTL_MISC_CFG_TX(brk) (PAU_BLOCK_OTL(brk) + 0x058)
> +#define PAU_OTL_MISC_CFG_TX_TEMP1_EN PPC_BIT(1)
> +#define PAU_OTL_MISC_CFG_TX_TEMP2_EN PPC_BIT(2)
> +#define PAU_OTL_MISC_CFG_TX_TEMP3_EN PPC_BIT(3)
> +#define PAU_OTL_MISC_CFG_TX_TEMP_EN(n) PPC_BIT(n)
> #define PAU_OTL_MISC_CFG_TX_DRDY_WAIT PPC_BITMASK(5, 7)
> #define PAU_OTL_MISC_CFG_TX_TEMP0_RATE PPC_BITMASK(8, 11)
> #define PAU_OTL_MISC_CFG_TX_TEMP1_RATE PPC_BITMASK(12, 15)
> #define PAU_OTL_MISC_CFG_TX_TEMP2_RATE PPC_BITMASK(16, 19)
> #define PAU_OTL_MISC_CFG_TX_TEMP3_RATE PPC_BITMASK(20, 23)
> +#define PAU_OTL_MISC_CFG_TX_TEMP_RATE(nib0, nib1) PPC_BITMASK(nib0, nib1)
> #define PAU_OTL_MISC_CFG_TX_CRET_FREQ PPC_BITMASK(32, 34)
> #define PAU_OTL_MISC_OTL_REM0(brk) (PAU_BLOCK_OTL(brk) + 0x068)
> #define PAU_OTL_MISC_ERROR_SIG_RXI(brk) (PAU_BLOCK_OTL(brk) + 0x070)
> @@ -150,11 +156,18 @@
> #define PAU_OTL_MISC_PSL_PEHANDLE_AN(brk) (PAU_BLOCK_OTL_PSL(brk) + 0x018)
>
> /* XSL block registers */
> +#define PAU_XSL_OSL_SPAP_AN(brk) (PAU_BLOCK_XSL + 0x000 + (brk) * 8)
> +#define PAU_XSL_OSL_SPAP_AN_EN PPC_BIT(63)
> #define PAU_XSL_WRAP_CFG (PAU_BLOCK_XSL + 0x100)
> #define PAU_XSL_WRAP_CFG_CLOCK_ENABLE PPC_BIT(0)
> #define PAU_XSL_OSL_XLATE_CFG(brk) (PAU_BLOCK_XSL + 0x040 + (brk) * 8)
> #define PAU_XSL_OSL_XLATE_CFG_AFU_DIAL PPC_BIT(0)
> #define PAU_XSL_OSL_XLATE_CFG_OPENCAPI3 PPC_BIT(32)
> +#define PAU_XSL_OSL_CCINV (PAU_BLOCK_XSL + 0x070)
> +#define PAU_XSL_OSL_CCINV_REMOVE PPC_BIT(15)
> +#define PAU_XSL_OSL_CCINV_PENDING PPC_BIT(16)
> +#define PAU_XSL_OSL_CCINV_BRICK PPC_BIT(47)
> +#define PAU_XSL_OSL_CCINV_PE_HANDLE PPC_BITMASK(48, 62)
>
> /* XTS block registers */
> #define PAU_XTS_CFG (PAU_BLOCK_PAU_XTS + 0x020)
> diff --git a/include/pau.h b/include/pau.h
> index 8b978bd6..61b17925 100644
> --- a/include/pau.h
> +++ b/include/pau.h
> @@ -200,6 +200,15 @@ static inline uint64_t pau_read(struct pau *pau, uint64_t reg)
> }
>
> void pau_opencapi_dump_scoms(struct phb *phb);
> +int64_t pau_opencapi_spa_setup(struct phb *phb, uint32_t __unused bdfn,
> + uint64_t addr, uint64_t PE_mask);
> +int64_t pau_opencapi_spa_clear_cache(struct phb *phb,
> + uint32_t __unused bdfn,
> + uint64_t PE_handle);
> +int64_t pau_opencapi_tl_set(struct phb *phb, uint32_t __unused bdfn,
> + long capabilities, uint64_t rate_phys,
> + int rate_sz);
> +
>
> /* PHY */
> int pau_dev_phy_reset(struct pau_dev *dev);
>
More information about the Skiboot
mailing list