[Skiboot] [PATCH v3] hw/npu2-opencapi: Add initial support for allocating OpenCAPI LPC memory

Wed May 8 00:32:09 AEST 2019

Le 01/05/2019 à 08:56, Andrew Donnellan a écrit :
> Lowest Point of Coherency (LPC) memory allows the host to access memory on
> an OpenCAPI device.
> 
> Define 2 OPAL calls, OPAL_NPU_MEM_ALLOC and OPAL_NPU_MEM_RELEASE, for
> assigning and clearing the memory BAR. (We try to avoid using the term
> "LPC" to avoid confusion with Low Pin Count.)
> 
> At present, we use a fixed location in the address space, which means we
> are restricted to a single range of 4TB, on a single OpenCAPI device per
> chip. In future, we'll use some chip ID extension magic to give us more
> space, and some sort of allocator to assign ranges to more than one device.
> 
> Signed-off-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
> 
> ---

I'm wondering about concurrent access to the GPU BAR. We don't prevent 
it in skiboot (nor were we preventing it for the SPA setup for example), 
and it could result in a garbage setting. It would reflect what the OS 
is asking. However, in this case and unlike the SPA setup, I'm worried 
that it could bubble up to userland and that user threads could mess it 
up. I guess it depends on the exact implementation of the driver and 
libocxl. The issue is that LPC memory access failed by the NPU could 
turn into SUE and potential bad things. Thoughts?

Other than that, the code looks good to me.

   Fred

> 
> This code is currently being used for some internal testing of LPC memory
> devices and seems to work acceptably for that purpose. We haven't tested
> all the corner cases... this is really just intended to enable prototyping
> and bringup at this stage.
> 
> v1->v2:
> - Comments from Fred
> - Simplify BAR release
> - Make the BAR maths more sensible
> - Use OCAPI log macro
> 
> v2->v3:
> - Add documentation
> - got rid of a spurious TODO
> ---
>   ...pal-npu2-opencapi-159-160-161-171-172.rst} |  77 ++++++++
>   hw/npu2-opencapi.c                            | 165 ++++++++++++++++++
>   hw/phys-map.c                                 |  11 ++
>   include/npu2-regs.h                           |   7 +
>   include/npu2.h                                |   5 +
>   include/opal-api.h                            |   4 +-
>   include/phys-map.h                            |   1 +
>   7 files changed, 268 insertions(+), 2 deletions(-)
>   rename doc/opal-api/{opal-npu2-opencapi-159-160-161.rst => opal-npu2-opencapi-159-160-161-171-172.rst} (64%)
> 
> diff --git a/doc/opal-api/opal-npu2-opencapi-159-160-161.rst b/doc/opal-api/opal-npu2-opencapi-159-160-161-171-172.rst
> similarity index 64%
> rename from doc/opal-api/opal-npu2-opencapi-159-160-161.rst
> rename to doc/opal-api/opal-npu2-opencapi-159-160-161-171-172.rst
> index 4db3d3e8961e..6d603f173a4b 100644
> --- a/doc/opal-api/opal-npu2-opencapi-159-160-161.rst
> +++ b/doc/opal-api/opal-npu2-opencapi-159-160-161-171-172.rst
> @@ -124,3 +124,80 @@ OPAL_SUCCESS
>   
>   OPAL_PARAMETER
>      A provided parameter was invalid
> +
> + .. _OPAL_NPU_MEM_ALLOC:
> +
> +OPAL_NPU_MEM_ALLOC
> +==================
> +
> +OpenCAPI devices only.
> +
> +Sets up the NPU memory BAR for Lowest Point of Coherency (LPC) memory.
> +
> +At present, only one device per CPU can use LPC memory, and a maximum of 4TB
> +can be allocated.
> +
> +Parameters
> +----------
> +::
> +
> +   uint64_t phb_id
> +   uint32_t bdfn
> +   uint64_t size
> +   uint64_t *bar
> +
> +``phb_id``
> +   OPAL ID of PHB
> +
> +``bdfn``
> +   Bus-Device-Function number of OpenCAPI AFU
> +
> +``size``
> +   Size of requested LPC memory area in bytes
> +
> +``bar``
> +   Pointer to variable where base of LPC memory area will be returned
> +
> +Return Values
> +-------------
> +
> +OPAL_SUCCESS
> +   BAR setup completed successfully
> +
> +OPAL_PARAMETER
> +   A provided parameter was invalid
> +
> +OPAL_RESOURCE
> +   The BAR could not be assigned due to limitations
> +
> +.. _OPAL_NPU_MEM_RELEASE:
> +
> +OPAL_NPU_MEM_RELEASE
> +====================
> +
> +OpenCAPI devices only.
> +
> +Releases NPU memory BAR.
> +
> +Parameters
> +----------
> +::
> +
> +   uint64_t phb_id
> +   uint32_t bdfn
> +
> +``phb_id``
> +   OPAL ID of PHB
> +
> +``bdfn``
> +   Bus-Device-Function number of OpenCAPI AFU
> +
> +Return Values
> +-------------
> +
> +OPAL_SUCCESS
> +   BAR setup completed successfully
> +
> +OPAL_PARAMETER
> +   A provided parameter was invalid, or the specified device does not currently
> +   have LPC memory assigned
> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
> index 9df51b22eda5..dad4f04a422f 100644
> --- a/hw/npu2-opencapi.c
> +++ b/hw/npu2-opencapi.c
> @@ -2025,3 +2025,168 @@ static int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t __unused bdfn,
>   	return OPAL_SUCCESS;
>   }
>   opal_call(OPAL_NPU_TL_SET, opal_npu_tl_set, 5);
> +
> +static void set_mem_bar(struct npu2_dev *dev, uint64_t base, uint64_t size)
> +{
> +	uint64_t stack, val, reg, bar_offset, pa_config_offset;
> +	uint8_t memsel;
> +
> +	stack = index_to_stack(dev->brick_index);
> +	switch (dev->brick_index) {
> +	case 2:
> +	case 4:
> +		bar_offset = NPU2_GPU0_MEM_BAR;
> +		pa_config_offset = NPU2_CQ_CTL_MISC_PA0_CONFIG;
> +		break;
> +	case 3:
> +	case 5:
> +		bar_offset = NPU2_GPU1_MEM_BAR;
> +		pa_config_offset = NPU2_CQ_CTL_MISC_PA1_CONFIG;
> +		break;
> +	default:
> +		assert(false);
> +	}
> +
> +	assert((!size && !base) || (size && base));
> +
> +	/*
> +	 * Memory select configuration:
> +	 * - 0b000 - BAR disabled
> +	 * - 0b001 - match 0b00, 0b01
> +	 * - 0b010 - match 0b01, 0b10
> +	 * - 0b011 - match 0b00, 0b10
> +	 * - 0b100 - match 0b00
> +	 * - 0b101 - match 0b01
> +	 * - 0b110 - match 0b10
> +	 * - 0b111 - match 0b00, 0b01, 0b10
> +	 */
> +	memsel = GETFIELD(PPC_BITMASK(13, 14), base);
> +	if (size)
> +		val = SETFIELD(NPU2_MEM_BAR_EN | NPU2_MEM_BAR_SEL_MEM, 0ULL, 0b100 + memsel);
> +	else
> +		val = 0;
> +
> +	/* Base address - 12 bits, 1G aligned */
> +	val = SETFIELD(NPU2_MEM_BAR_NODE_ADDR, val, GETFIELD(PPC_BITMASK(22, 33), base));
> +
> +	/* GCID */
> +	val = SETFIELD(NPU2_MEM_BAR_GROUP, val, GETFIELD(PPC_BITMASK(15, 18), base));
> +	val = SETFIELD(NPU2_MEM_BAR_CHIP, val, GETFIELD(PPC_BITMASK(19, 21), base));
> +
> +	/* Other settings */
> +	val = SETFIELD(NPU2_MEM_BAR_POISON, val, 1);
> +	val = SETFIELD(NPU2_MEM_BAR_GRANULE, val, 0);
> +	val = SETFIELD(NPU2_MEM_BAR_BAR_SIZE, val, ilog2(size >> 30));
> +	val = SETFIELD(NPU2_MEM_BAR_MODE, val, 0);
> +
> +	for (int block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) {
> +		reg = NPU2_REG_OFFSET(stack, block, bar_offset);
> +		npu2_write(dev->npu, reg, val);
> +	}
> +
> +	/* Set PA config */
> +	if (size)
> +		val = SETFIELD(NPU2_CQ_CTL_MISC_PA_CONFIG_MEMSELMATCH, 0ULL, 0b100 + memsel);
> +	else
> +		val = 0;
> +	val = SETFIELD(NPU2_CQ_CTL_MISC_PA_CONFIG_GRANULE, val, 0);
> +	val = SETFIELD(NPU2_CQ_CTL_MISC_PA_CONFIG_SIZE, val, ilog2(size >> 30));
> +	val = SETFIELD(NPU2_CQ_CTL_MISC_PA_CONFIG_MODE, val, 0);
> +	val = SETFIELD(NPU2_CQ_CTL_MISC_PA_CONFIG_MASK, val, 0);
> +	reg = NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL, pa_config_offset);
> +	npu2_write(dev->npu, reg, val);
> +}
> +
> +static int64_t alloc_mem_bar(struct npu2_dev *dev, uint64_t size, uint64_t *bar)
> +{
> +	uint64_t phys_map_base, phys_map_size;
> +
> +	/*
> +	 * Right now, we support 1 allocation per chip, of up to 4TB.
> +	 *
> +	 * In future, we will use chip address extension to support
> +	 * >4TB ranges, and we will implement a more sophisticated
> +	 * allocator to allow an allocation for every link on a chip.
> +	 */
> +
> +	if (dev->npu->lpc_mem_allocated)
> +		return OPAL_RESOURCE;
> +
> +	phys_map_get(dev->npu->chip_id, OCAPI_MEM, 0, &phys_map_base, &phys_map_size);
> +
> +	if (size > phys_map_size) {
> +		/**
> +		 * @fwts-label OCAPIInvalidLPCMemoryBARSize
> +		 * @fwts-advice The operating system requested an unsupported
> +		 * amount of OpenCAPI LPC memory. This is possibly a kernel
> +		 * bug, or you may need to upgrade your firmware.
> +		 */
> +		OCAPIERR(dev, "Invalid LPC memory BAR allocation size requested: 0x%llx bytes (limit 0x%llx)\n",
> +			 size, phys_map_size);
> +		return OPAL_PARAMETER;
> +	}
> +
> +	/* Minimum BAR size is 1 GB */
> +	if (size < (1 << 30)) {
> +		size = 1 << 30;
> +	}
> +
> +	if (!is_pow2(size)) {
> +		size = 1 << (ilog2(size) + 1);
> +	}
> +
> +	set_mem_bar(dev, phys_map_base, size);
> +	*bar = phys_map_base;
> +	dev->npu->lpc_mem_allocated = dev;
> +
> +	return OPAL_SUCCESS;
> +}
> +
> +static int64_t release_mem_bar(struct npu2_dev *dev)
> +{
> +	if (dev->npu->lpc_mem_allocated != dev)
> +		return OPAL_PARAMETER;
> +
> +	set_mem_bar(dev, 0, 0);
> +	dev->npu->lpc_mem_allocated = NULL;
> +
> +	return OPAL_SUCCESS;
> +}
> +
> +static int64_t opal_npu_mem_alloc(uint64_t phb_id, uint32_t __unused bdfn,
> +				  uint64_t size, uint64_t *bar)
> +{
> +	struct phb *phb = pci_get_phb(phb_id);
> +	struct npu2_dev *dev;
> +
> +
> +	if (!phb || phb->phb_type != phb_type_npu_v2_opencapi)
> +		return OPAL_PARAMETER;
> +
> +	dev = phb_to_npu2_dev_ocapi(phb);
> +	if (!dev)
> +		return OPAL_PARAMETER;
> +
> +	if (!opal_addr_valid(bar))
> +		return OPAL_PARAMETER;
> +
> +	return alloc_mem_bar(dev, size, bar);
> +}
> +opal_call(OPAL_NPU_MEM_ALLOC, opal_npu_mem_alloc, 4);
> +
> +static int64_t opal_npu_mem_release(uint64_t phb_id, uint32_t __unused bdfn)
> +{
> +	struct phb *phb = pci_get_phb(phb_id);
> +	struct npu2_dev *dev;
> +
> +
> +	if (!phb || phb->phb_type != phb_type_npu_v2_opencapi)
> +		return OPAL_PARAMETER;
> +
> +	dev = phb_to_npu2_dev_ocapi(phb);
> +	if (!dev)
> +		return OPAL_PARAMETER;
> +
> +	return release_mem_bar(dev);
> +}
> +opal_call(OPAL_NPU_MEM_RELEASE, opal_npu_mem_release, 2);
> diff --git a/hw/phys-map.c b/hw/phys-map.c
> index fe949e4043ff..75836297c2f9 100644
> --- a/hw/phys-map.c
> +++ b/hw/phys-map.c
> @@ -52,6 +52,17 @@ static const struct phys_map_entry phys_map_table_nimbus[] = {
>   	{ GPU_MEM_4T_UP,   2, 0x0000044000000000ull, 0x0000002000000000ull },
>   	{ GPU_MEM_4T_UP,   3, 0x0000046000000000ull, 0x0000002000000000ull },
>   
> +	/*
> +	 * OpenCAPI LPC Memory - single 4TB range per chip, fills
> +	 * whole second non-mirrored region.
> +	 *
> +	 * Longer term, we're going to use chip address extension to
> +	 * enable >4TB to be allocated per chip.  At that point, we
> +	 * may have to find another way of assigning these ranges
> +	 * outside of phys-map.
> +	 */
> +	{ OCAPI_MEM,	   0, 0x0002000000000000ull, 0x0000040000000000ull },
> +
>   	/* 0 TB offset @ MMIO 0x0006000000000000ull */
>   	{ PHB4_64BIT_MMIO, 0, 0x0006000000000000ull, 0x0000004000000000ull },
>   	{ PHB4_64BIT_MMIO, 1, 0x0006004000000000ull, 0x0000004000000000ull },
> diff --git a/include/npu2-regs.h b/include/npu2-regs.h
> index ba10b8eaf88d..ecf47abf6c96 100644
> --- a/include/npu2-regs.h
> +++ b/include/npu2-regs.h
> @@ -239,6 +239,13 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
>   #define NPU2_CQ_CTL_STATUS			0x090
>   #define   NPU2_CQ_CTL_STATUS_BRK0_AM_FENCED	PPC_BITMASK(48, 49)
>   #define   NPU2_CQ_CTL_STATUS_BRK1_AM_FENCED	PPC_BITMASK(50, 51)
> +#define NPU2_CQ_CTL_MISC_PA0_CONFIG		0x0A0 /* or should that be CS */
> +#define NPU2_CQ_CTL_MISC_PA1_CONFIG		0x0A8 /* or should that be CS */
> +#define   NPU2_CQ_CTL_MISC_PA_CONFIG_MEMSELMATCH PPC_BITMASK(0,2)
> +#define   NPU2_CQ_CTL_MISC_PA_CONFIG_GRANULE	PPC_BIT(3)
> +#define   NPU2_CQ_CTL_MISC_PA_CONFIG_SIZE	PPC_BITMASK(4,7)
> +#define   NPU2_CQ_CTL_MISC_PA_CONFIG_MODE	PPC_BITMASK(8,11)
> +#define   NPU2_CQ_CTL_MISC_PA_CONFIG_MASK	PPC_BITMASK(13,19)
>   #define NPU2_CQ_C_ERR_RPT_MSG0			0x0C0
>   #define NPU2_CQ_C_ERR_RPT_MSG1			0x0C8
>   #define NPU2_CQ_C_ERR_RPT_FIRST0		0x0D0
> diff --git a/include/npu2.h b/include/npu2.h
> index d58aab47bb30..9febf1a343ef 100644
> --- a/include/npu2.h
> +++ b/include/npu2.h
> @@ -193,6 +193,11 @@ struct npu2 {
>   	struct lock	i2c_lock;
>   	uint8_t		i2c_pin_mode;
>   	uint8_t		i2c_pin_wr_state;
> +	/*
> +	 * Which device currently has an LPC allocation.
> +	 * Temporary as long as we only support 1 LPC alloc per chip.
> +	 */
> +	struct npu2_dev	*lpc_mem_allocated;
>   };
>   
>   static inline struct npu2 *phb_to_npu2_nvlink(struct phb *phb)
> diff --git a/include/opal-api.h b/include/opal-api.h
> index e461c9d278c2..b0ad435539fb 100644
> --- a/include/opal-api.h
> +++ b/include/opal-api.h
> @@ -227,8 +227,8 @@
>   #define OPAL_NPU_SET_RELAXED_ORDER		168
>   #define OPAL_NPU_GET_RELAXED_ORDER		169
>   #define OPAL_XIVE_GET_VP_STATE			170 /* Get NVT state */
> -#define OPAL_NPU_RESERVED1			171  /* LPC Allocate */
> -#define OPAL_NPU_RESERVED2			172  /* LPC Release */
> +#define OPAL_NPU_MEM_ALLOC			171
> +#define OPAL_NPU_MEM_RELEASE			172
>   #define OPAL_LAST				172
>   
>   #define QUIESCE_HOLD			1 /* Spin all calls at entry */
> diff --git a/include/phys-map.h b/include/phys-map.h
> index 73adda079e23..0cf48b6628af 100644
> --- a/include/phys-map.h
> +++ b/include/phys-map.h
> @@ -28,6 +28,7 @@ enum phys_map_type {
>   	SYSTEM_MEM,
>   	GPU_MEM_4T_DOWN,
>   	GPU_MEM_4T_UP,
> +	OCAPI_MEM,
>   	PHB4_64BIT_MMIO,
>   	PHB4_32BIT_MMIO,
>   	PHB4_XIVE_ESB,
>