[Skiboot] [PATCH 8/9] npu2-opencapi: Add OpenCAPI OPAL API calls
Frederic Barrat
fbarrat at linux.vnet.ibm.com
Fri Jan 12 02:05:57 AEDT 2018
Le 18/12/2017 à 08:07, Andrew Donnellan a écrit :
> From: Frederic Barrat <fbarrat at linux.vnet.ibm.com>
>
> Add three OPAL API calls that are required by the ocxl driver.
>
> - OPAL_NPU_SPA_SETUP
>
> The Shared Process Area (SPA) is a table containing one entry (a
> "Process Element") per memory context which can be accessed by the
> OpenCAPI device.
>
> - OPAL_NPU_SPA_CLEAR_CACHE
>
> The NPU keeps a cache of recently accessed memory contexts. When a
> Process Element is removed from the SPA, the cache for the link must be
> cleared.
>
> - OPAL_NPU_TL_SET
>
> The Transaction Layer specification defines several templates for
> messages to be exchanged on the link. During link setup, the host and
> device must negotiate what templates are supported on both sides and at
> what rates those messages can be sent.
>
> Signed-off-by: Frederic Barrat <fbarrat at linux.vnet.ibm.com>
> Signed-off-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
> ---
It still looks mostly ok to me.
Fred
> doc/opal-api/opal-npu2-opencapi-159-160-161.rst | 126 ++++++++++-
> hw/npu2-opencapi.c | 206 +++++++++++++++++-
> include/npu2-regs.h | 4 +-
> include/opal-api.h | 5 +-
> 4 files changed, 340 insertions(+), 1 deletion(-)
> create mode 100644 doc/opal-api/opal-npu2-opencapi-159-160-161.rst
>
> diff --git a/doc/opal-api/opal-npu2-opencapi-159-160-161.rst b/doc/opal-api/opal-npu2-opencapi-159-160-161.rst
> new file mode 100644
> index 0000000..4db3d3e
> --- /dev/null
> +++ b/doc/opal-api/opal-npu2-opencapi-159-160-161.rst
> @@ -0,0 +1,126 @@
> +.. _OPAL_NPU_SPA_SETUP:
> +
> +OPAL_NPU_SPA_SETUP
> +==================
> +
> +OpenCAPI devices only.
> +
> +Sets up a Shared Process Area (SPA) with the Shared Process Area
> +Pointer (SPAP) set to the provided address `addr`, and sets the OTL PE
> +mask (used for PASID to PE handle conversion) to `PE_mask`.
> +
> +If `addr` is NULL, the SPA will be disabled. `addr` must be 4K aligned.
> +
> +Parameters
> +----------
> +::
> +
> + uint64_t phb_id
> + int bdfn
> + uint64_t addr
> + uint64_t PE_mask
> +
> +``phb_id``
> + OPAL ID of PHB
> +
> +``bdfn``
> + Bus-Device-Function number of OpenCAPI AFU
> +
> +``addr``
> + Address of Shared Process Area, or NULL to disable SPA. Must be 4K aligned.
> +
> +``PE_mask``
> + Process Element mask for PASID to PE handle conversion
> +
> +Return Values
> +-------------
> +
> +OPAL_SUCCESS
> + SPAP and PE mask were successfully set
> +
> +OPAL_PARAMETER
> + A provided parameter was invalid
> +
> +OPAL_BUSY
> + SPA is already enabled (or if addr is NULL, SPA is already disabled)
> +
> +.. _OPAL_NPU_SPA_CLEAR_CACHE:
> +
> +OPAL_NPU_SPA_CLEAR_CACHE
> +========================
> +
> +OpenCAPI devices only.
> +
> +Invalidates the Process Element with the given `PE_handle` from the NPU's SPA cache.
> +
> +Parameters
> +----------
> +::
> +
> + uint64_t phb_id
> + uint32_t bdfn
> + uint64_t PE_handle
> +
> +``phb_id``
> + OPAL ID of PHB
> +
> +``bdfn``
> + Bus-Device-Function number of OpenCAPI AFU
> +
> +``PE_handle``
> + Handle of Process Element being cleared from SPA cache
> +
> +Return Values
> +-------------
> +
> +OPAL_SUCCESS
> + PE was successfully cleared from SPA cache
> +
> +OPAL_PARAMETER
> + A provided parameter was invalid
> +
> +OPAL_BUSY
> + XSLO is currently invalidating a previously requested entry
> +
> +.. _OPAL_NPU_TL_SET:
> +
> +OPAL_NPU_TL_SET
> +===============
> +
> +OpenCAPI devices only.
> +
> +Update the NPU OTL configuration with device capabilities.
> +
> +Parameters
> +----------
> +::
> +
> + uint64_t phb_id
> + uint32_t bdfn
> + long capabilities
> + uint64_t rate_phys
> + int rate_sz
> +
> +``phb_id``
> + OPAL ID of PHB
> +
> +``bdfn``
> + Bus-Device-Function number of OpenCAPI AFU
> +
> +``capabilities``
> + Bitmap of TL templates the device can receive
> +
> +``rate_phys``
> + Physical address of rates buffer
> +
> +``rate_sz``
> + Size of rates buffer (must be equal to 32)
> +
> +Return Values
> +-------------
> +
> +OPAL_SUCCESS
> + OTL configuration was successfully updated
> +
> +OPAL_PARAMETER
> + A provided parameter was invalid
> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
> index ea55dad..196b7f7 100644
> --- a/hw/npu2-opencapi.c
> +++ b/hw/npu2-opencapi.c
> @@ -54,6 +54,9 @@
>
> #define NPU_IRQ_LEVELS 35
> #define NPU_IRQ_LEVELS_XSL 23
> +#define MAX_PE_HANDLE ((1 << 15) - 1)
> +#define TL_MAX_TEMPLATE 63
> +#define TL_RATE_BUF_SIZE 32
>
> static const struct phb_ops npu2_opencapi_ops;
>
> @@ -1377,3 +1380,206 @@ static const struct phb_ops npu2_opencapi_ops = {
> .set_capp_recovery = NULL,
> .tce_kill = NULL,
> };
> +
> +static int64_t opal_npu_spa_setup(uint64_t phb_id, uint32_t __unused bdfn,
> + uint64_t addr, uint64_t PE_mask)
> +{
> + uint64_t stack, block, offset, reg;
> + struct phb *phb = pci_get_phb(phb_id);
> + struct npu2_dev *dev;
> + int rc;
> +
> + if (!phb || phb->phb_type != phb_type_npu_v2_opencapi)
> + return OPAL_PARAMETER;
> +
> + /* 4k aligned */
> + if (addr & 0xFFF)
> + return OPAL_PARAMETER;
> +
> + if (PE_mask > 15)
> + return OPAL_PARAMETER;
> +
> + dev = phb_to_npu2_dev_ocapi(phb);
> + if (!dev)
> + return OPAL_PARAMETER;
> +
> + block = index_to_block(dev->index);
> + stack = index_to_stack(dev->index);
> + if (block == NPU2_BLOCK_OTL1)
> + offset = NPU2_XSL_PSL_SPAP_A1;
> + else
> + offset = NPU2_XSL_PSL_SPAP_A0;
> +
> +
> + lock(&dev->npu->lock);
> + /*
> + * set the SPAP used by the device
> + */
> + reg = npu2_scom_read(dev->npu->chip_id, dev->npu->xscom_base,
> + NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, offset),
> + NPU2_MISC_DA_LEN_8B);
> + if ((addr && (reg & NPU2_XSL_PSL_SPAP_EN)) ||
> + (!addr && !(reg & NPU2_XSL_PSL_SPAP_EN))) {
> + rc = OPAL_BUSY;
> + goto out;
> + }
> + /* SPA is disabled by passing a NULL address */
> + reg = addr;
> + if (addr)
> + reg = addr | NPU2_XSL_PSL_SPAP_EN;
> +
> + npu2_scom_write(dev->npu->chip_id, dev->npu->xscom_base,
> + NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, offset),
> + NPU2_MISC_DA_LEN_8B, reg);
> +
> + /*
> + * set the PE mask that the OS uses for PASID -> PE handle
> + * conversion
> + */
> + reg = npu2_scom_read(dev->npu->chip_id, dev->npu->xscom_base,
> + NPU2_OTL_CONFIG0(stack, block), NPU2_MISC_DA_LEN_8B);
> + reg &= ~NPU2_OTL_CONFIG0_PE_MASK;
> + reg |= (PE_mask << (63-7));
> + npu2_scom_write(dev->npu->chip_id, dev->npu->xscom_base,
> + NPU2_OTL_CONFIG0(stack, block), NPU2_MISC_DA_LEN_8B,
> + reg);
> + rc = OPAL_SUCCESS;
> +out:
> + unlock(&dev->npu->lock);
> + return rc;
> +}
> +opal_call(OPAL_NPU_SPA_SETUP, opal_npu_spa_setup, 4);
> +
> +static int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t __unused bdfn,
> + uint64_t PE_handle)
> +{
> + uint64_t cc_inv, stack, block, reg, rc;
> + uint32_t retries = 5;
> + struct phb *phb = pci_get_phb(phb_id);
> + struct npu2_dev *dev;
> +
> + if (!phb || phb->phb_type != phb_type_npu_v2_opencapi)
> + return OPAL_PARAMETER;
> +
> + if (PE_handle > MAX_PE_HANDLE)
> + return OPAL_PARAMETER;
> +
> + dev = phb_to_npu2_dev_ocapi(phb);
> + if (!dev)
> + return OPAL_PARAMETER;
> +
> + block = index_to_block(dev->index);
> + stack = index_to_stack(dev->index);
> + cc_inv = NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, NPU2_XSL_PSL_LLCMD_A0);
> +
> + lock(&dev->npu->lock);
> + reg = npu2_scom_read(dev->npu->chip_id, dev->npu->xscom_base, cc_inv,
> + NPU2_MISC_DA_LEN_8B);
> + if (reg & PPC_BIT(16)) {
> + rc = OPAL_BUSY;
> + goto out;
> + }
> +
> + reg = PE_handle | PPC_BIT(15);
> + if (block == NPU2_BLOCK_OTL1)
> + reg |= PPC_BIT(48);
> + npu2_scom_write(dev->npu->chip_id, dev->npu->xscom_base, cc_inv,
> + NPU2_MISC_DA_LEN_8B, reg);
> +
> + rc = OPAL_HARDWARE;
> + while (retries--) {
> + reg = npu2_scom_read(dev->npu->chip_id, dev->npu->xscom_base,
> + cc_inv, NPU2_MISC_DA_LEN_8B);
> + if (!(reg & PPC_BIT(16))) {
> + rc = OPAL_SUCCESS;
> + break;
> + }
> + /* the bit expected to flip in less than 200us */
> + time_wait_us(200);
> + }
> +out:
> + unlock(&dev->npu->lock);
> + return rc;
> +}
> +opal_call(OPAL_NPU_SPA_CLEAR_CACHE, opal_npu_spa_clear_cache, 3);
> +
> +static int get_template_rate(unsigned int templ, char *rate_buf)
> +{
> + int shift, idx, val;
> +
> + /*
> + * Each rate is encoded over 4 bits (0->15), with 15 being the
> + * slowest. The buffer is a succession of rates for all the
> + * templates. The first 4 bits are for template 63, followed
> + * by 4 bits for template 62, ... etc. So the rate for
> + * template 0 is at the very end of the buffer.
> + */
> + idx = (TL_MAX_TEMPLATE - templ) / 2;
> + shift = 4 * (1 - ((TL_MAX_TEMPLATE - templ) % 2));
> + val = rate_buf[idx] >> shift;
> + return val;
> +}
> +
> +static bool is_template_supported(unsigned int templ, long capabilities)
> +{
> + return !!(capabilities & (1ull << templ));
> +}
> +
> +static int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn,
> + long capabilities, uint64_t rate_phys, int rate_sz)
> +{
> + struct phb *phb = pci_get_phb(phb_id);
> + struct npu2_dev *dev;
> + uint64_t stack, block, reg, templ_rate;
> + int i, rate_pos;
> + char *rate = (char *) rate_phys;
> +
> + if (!phb || phb->phb_type != phb_type_npu_v2_opencapi)
> + return OPAL_PARAMETER;
> + if (!opal_addr_valid(rate) || rate_sz != TL_RATE_BUF_SIZE)
> + return OPAL_PARAMETER;
> +
> + dev = phb_to_npu2_dev_ocapi(phb);
> + if (!dev)
> + return OPAL_PARAMETER;
> +
> + block = index_to_block(dev->index);
> + stack = index_to_stack(dev->index);
> + /*
> + * The 'capabilities' argument defines what TL template the
> + * device can receive. OpenCAPI 3.0 and 4.0 define 64 templates, so
> + * that's one bit per template.
> + *
> + * For each template, the device processing time may vary, so
> + * the device advertises at what rate a message of a given
> + * template can be sent. That's encoded in the 'rate' buffer.
> + *
> + * On P9, NPU only knows about TL templates 0 -> 3.
> + * Per the spec, template 0 must be supported.
> + */
> + if (!is_template_supported(0, capabilities))
> + return OPAL_PARAMETER;
> +
> + reg = npu2_scom_read(dev->npu->chip_id, dev->npu->xscom_base,
> + NPU2_OTL_CONFIG1(stack, block),
> + NPU2_MISC_DA_LEN_8B);
> + reg &= ~(NPU2_OTL_CONFIG1_TX_TEMP1_EN | NPU2_OTL_CONFIG1_TX_TEMP3_EN |
> + NPU2_OTL_CONFIG1_TX_TEMP1_EN);
> + for (i = 0; i < 4; i++) {
> + /* Skip template 0 as it is implicitly enabled */
> + if (i && is_template_supported(i, capabilities))
> + reg |= PPC_BIT(i);
> + /* The tx rate should still be set for template 0 */
> + templ_rate = get_template_rate(i, rate);
> + rate_pos = 8 + i * 4;
> + reg = SETFIELD(PPC_BITMASK(rate_pos, rate_pos + 3), reg,
> + templ_rate);
> + }
> + npu2_scom_write(dev->npu->chip_id, dev->npu->xscom_base,
> + NPU2_OTL_CONFIG1(stack, block), NPU2_MISC_DA_LEN_8B,
> + reg);
> + prlog(PR_DEBUG, "OCAPI: Link %llx:%x, TL conf1 register set to %llx\n",
> + phb_id, bdfn, reg);
> + return OPAL_SUCCESS;
> +}
> +opal_call(OPAL_NPU_TL_SET, opal_npu_tl_set, 5);
> diff --git a/include/npu2-regs.h b/include/npu2-regs.h
> index 876d7f0..5d99a3e 100644
> --- a/include/npu2-regs.h
> +++ b/include/npu2-regs.h
> @@ -321,6 +321,7 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
> #define NPU2_NTL_DL_CLK_CTRL(ndev) NPU2_DL_REG_OFFSET(ndev, 0x001C)
>
> /* OpenCAPI - XSL registers */
> +#define NPU2_XSL_PSL_LLCMD_A0 0x008
> #define NPU2_XSL_PSL_SCNTL_A0 0x010
> #define NPU2_XSL_PSL_SCNTL_A0_MULTI_AFU_DIAL PPC_BIT(0)
> #define NPU2_XSL_DEF 0x040
> @@ -328,6 +329,9 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
> #define NPU2_XSL_GP_BLOOM_FILTER_ENABLE PPC_BIT(16)
> #define NPU2_XSL_WRAP_CFG 0x0C0
> #define NPU2_XSL_WRAP_CFG_XSLO_CLOCK_ENABLE PPC_BIT(0)
> +#define NPU2_XSL_PSL_SPAP_A0 0
> +#define NPU2_XSL_PSL_SPAP_A1 0x18
> +#define NPU2_XSL_PSL_SPAP_EN PPC_BIT(63)
>
> /* OpenCAPI - OTL registers */
> #define NPU2_OTL_CONFIG0(stack, block) NPU2_REG_OFFSET(stack, block, 0x000)
> diff --git a/include/opal-api.h b/include/opal-api.h
> index 1c1b9cc..05d6f4c 100644
> --- a/include/opal-api.h
> +++ b/include/opal-api.h
> @@ -215,7 +215,10 @@
> #define OPAL_SENSOR_GROUP_CLEAR 156
> #define OPAL_PCI_SET_P2P 157
> #define OPAL_QUIESCE 158
> -#define OPAL_LAST 158
> +#define OPAL_NPU_SPA_SETUP 159
> +#define OPAL_NPU_SPA_CLEAR_CACHE 160
> +#define OPAL_NPU_TL_SET 161
> +#define OPAL_LAST 161
>
> #define QUIESCE_HOLD 1 /* Spin all calls at entry */
> #define QUIESCE_REJECT 2 /* Fail all calls with OPAL_BUSY */
>
More information about the Skiboot
mailing list