[Skiboot] [PATCH] hw/npu2-opencapi: Add initial support for allocating OpenCAPI LPC memory

Wed Apr 24 23:56:22 AEST 2019

Hi Andrew,

A few minor comments below.

Le 24/04/2019 à 09:20, Andrew Donnellan a écrit :
> Lowest Point of Coherency (LPC) memory allows the host to access memory on
> an OpenCAPI device.
> 
> Define 2 OPAL calls, OPAL_NPU_MEM_ALLOC and OPAL_NPU_MEM_RELEASE, for
> assigning and clearing the memory BAR. (We try to avoid using the term
> "LPC" to avoid confusion with Low Pin Count.)
> 
> At present, we use a fixed location in the address space, which means we
> are restricted to a single range of 4TB, on a single OpenCAPI device per
> chip. In future, we'll use some chip ID extension magic to give us more
> space, and some sort of allocator to assign ranges to more than one device.
> 
> Signed-off-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
> 
> ---
> 
> This code is currently being used for some internal testing of LPC memory
> devices and seems to work acceptably for that purpose. We haven't tested
> all the corner cases... this is really just intended to enable prototyping
> and bringup at this stage.
> ---
>   hw/npu2-opencapi.c  | 182 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>   hw/phys-map.c       |  11 ++++
>   include/npu2-regs.h |   7 ++
>   include/npu2.h      |   5 ++
>   include/opal-api.h  |   4 +-
>   include/phys-map.h  |   1 +
>   6 files changed, 208 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
> index 9df51b22eda5..b98335e48daf 100644
> --- a/hw/npu2-opencapi.c
> +++ b/hw/npu2-opencapi.c
> @@ -2025,3 +2025,185 @@ static int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t __unused bdfn,
>   	return OPAL_SUCCESS;
>   }
>   opal_call(OPAL_NPU_TL_SET, opal_npu_tl_set, 5);
> +
> +static void set_mem_bar(struct npu2_dev *dev, uint64_t base, uint64_t size)
> +{
> +	uint64_t stack, val, reg, bar_offset, pa_config_offset;
> +	uint8_t memsel;
> +
> +	stack = index_to_stack(dev->brick_index);
> +	switch (dev->brick_index) {
> +	case 2:
> +	case 4:
> +		bar_offset = NPU2_GPU0_MEM_BAR;
> +		pa_config_offset = NPU2_CQ_CTL_MISC_PA0_CONFIG;
> +		break;
> +	case 3:
> +	case 5:
> +		bar_offset = NPU2_GPU1_MEM_BAR;
> +		pa_config_offset = NPU2_CQ_CTL_MISC_PA1_CONFIG;
> +		break;
> +	default:
> +		assert(false);
> +	}
> +
> +	/*
> +	 * Memory select configuration:
> +	 * - 0b000 - BAR disabled
> +	 * - 0b001 - match 0b00, 0b01
> +	 * - 0b010 - match 0b01, 0b10
> +	 * - 0b011 - match 0b00, 0b10
> +	 * - 0b100 - match 0b00
> +	 * - 0b101 - match 0b01
> +	 * - 0b110 - match 0b10
> +	 * - 0b111 - match 0b00, 0b01, 0b10
> +	 */
> +	memsel = GETFIELD(PPC_BITMASK(13, 14), base);
> +	val = SETFIELD(NPU2_MEM_BAR_EN | NPU2_MEM_BAR_SEL_MEM, 0ULL, 0b100 + memsel);
> +
> +	/* Base address - 12 bits, 1G aligned */
> +	val = SETFIELD(NPU2_MEM_BAR_NODE_ADDR, val, GETFIELD(PPC_BITMASK(22, 33), base));
> +
> +	/* GCID */
> +	val = SETFIELD(NPU2_MEM_BAR_GROUP, val, GETFIELD(PPC_BITMASK(15, 18), base));
> +	val = SETFIELD(NPU2_MEM_BAR_CHIP, val, GETFIELD(PPC_BITMASK(19, 21), base));
> +
> +	/* Other settings */
> +	val = SETFIELD(NPU2_MEM_BAR_POISON, val, 1);
> +	val = SETFIELD(NPU2_MEM_BAR_GRANULE, val, 0);
> +	val = SETFIELD(NPU2_MEM_BAR_BAR_SIZE, val, ilog2(size >> 30));
> +	val = SETFIELD(NPU2_MEM_BAR_MODE, val, 0);
> +
> +	for (int block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) {
> +		reg = NPU2_REG_OFFSET(stack, block, bar_offset);
> +		npu2_write(dev->npu, reg, val);
> +	}
> +
> +	/* Set PA config */
> +	val = SETFIELD(NPU2_CQ_CTL_MISC_PA_CONFIG_MEMSELMATCH, 0ULL, 0b100 + memsel);
> +	val = SETFIELD(NPU2_CQ_CTL_MISC_PA_CONFIG_GRANULE, val, 0);
> +	val = SETFIELD(NPU2_CQ_CTL_MISC_PA_CONFIG_SIZE, val, ilog2(size >> 30));
> +	val = SETFIELD(NPU2_CQ_CTL_MISC_PA_CONFIG_MODE, val, 0);
> +	val = SETFIELD(NPU2_CQ_CTL_MISC_PA_CONFIG_MASK, val, 0);
> +	reg = NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL, pa_config_offset);
> +	npu2_write(dev->npu, reg, val);
> +}
> +
> +static int64_t alloc_mem_bar(struct npu2_dev *dev, uint64_t size, uint64_t *bar)
> +{
> +	uint64_t phys_map_base, phys_map_size;
> +
> +	/*
> +	 * Right now, we support 1 allocation per chip, of up to 4TB.
> +	 *
> +	 * In future, we will use chip address extension to support
> +	 * >4TB ranges, and we will implement a more sophisticated
> +	 * allocator to allow an allocation for every link on a chip.
> +	 */
> +
> +	if (dev->npu->lpc_mem_allocated)
> +		return OPAL_RESOURCE;
> +
> +	phys_map_get(dev->npu->chip_id, OCAPI_MEM, 0, &phys_map_base, &phys_map_size);
> +
> +	if (size > phys_map_size) {
> +		/**
> +		 * @fwts-label OCAPIInvalidLPCMemoryBARSize
> +		 * @fwts-advice The operating system requested an unsupported
> +		 * amount of OpenCAPI LPC memory. This is possibly a kernel
> +		 * bug, or you may need to upgrade your firmware.
> +		 */
> +		prlog(PR_ERR,
> +		      "OCAPI: Invalid LPC memory BAR allocation size requested: 0x%llx bytes (limit 0x%llx)\n",
> +		      size, phys_map_size);

OCAPIERR(dev, ...) ?

> +		return OPAL_PARAMETER;
> +	}
> +
> +	/* Minimum BAR size is 1 GB */
> +	if (size < (2 << 29)) {
> +		size = 2 << 29;

Interesting.... Is that the perception down under? 1 << 30 would seem 
more natural to me :-)

> +	}
> +
> +	if (!is_pow2(size)) {
> +		size = 2 << ilog2(size);
> +	}
> +
> +	set_mem_bar(dev, phys_map_base, size);
> +	*bar = phys_map_base;
> +	dev->npu->lpc_mem_allocated = dev;
> +
> +	return OPAL_SUCCESS;
> +}
> +
> +static int64_t release_mem_bar(struct npu2_dev *dev)
> +{
> +	uint64_t stack, reg, bar_offset, pa_config_offset;
> +
> +	if (dev->npu->lpc_mem_allocated != dev)
> +		return OPAL_PARAMETER;

OPAL_PERMISSION?

> +
> +	stack = index_to_stack(dev->brick_index);
> +	switch (dev->brick_index) {
> +	case 2:
> +	case 4:
> +		bar_offset = NPU2_GPU0_MEM_BAR;
> +		pa_config_offset = NPU2_CQ_CTL_MISC_PA0_CONFIG;
> +		break;
> +	case 3:
> +	case 5:
> +		bar_offset = NPU2_GPU1_MEM_BAR;
> +		pa_config_offset = NPU2_CQ_CTL_MISC_PA1_CONFIG;
> +		break;
> +	default:
> +		assert(false);
> +	}
> +
> +	for (int block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) {
> +		reg = NPU2_REG_OFFSET(stack, block, bar_offset);
> +		npu2_write(dev->npu, reg, 0ull);
> +	}
> +	reg = NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL, pa_config_offset);
> +	npu2_write(dev->npu, reg, 0ull);

I was expecting set_mem_bar(dev, 0, 0)

> +
> +	dev->npu->lpc_mem_allocated = NULL;
> +
> +	return OPAL_SUCCESS;
> +}
> +
> +static int64_t opal_npu_mem_alloc(uint64_t phb_id, uint32_t __unused bdfn,
> +				  uint64_t size, uint64_t *bar)
> +{
> +	struct phb *phb = pci_get_phb(phb_id);
> +	struct npu2_dev *dev;
> +
> +
> +	if (!phb || phb->phb_type != phb_type_npu_v2_opencapi)
> +		return OPAL_PARAMETER;
> +
> +	dev = phb_to_npu2_dev_ocapi(phb);
> +	if (!dev)
> +		return OPAL_PARAMETER;
> +
> +	if (!opal_addr_valid(bar))
> +		return OPAL_PARAMETER;
> +
> +	return alloc_mem_bar(dev, size, bar);
> +}
> +opal_call(OPAL_NPU_MEM_ALLOC, opal_npu_mem_alloc, 4);
> +
> +static int64_t opal_npu_mem_release(uint64_t phb_id, uint32_t __unused bdfn)
> +{
> +	struct phb *phb = pci_get_phb(phb_id);
> +	struct npu2_dev *dev;
> +
> +
> +	if (!phb || phb->phb_type != phb_type_npu_v2_opencapi)
> +		return OPAL_PARAMETER;
> +
> +	dev = phb_to_npu2_dev_ocapi(phb);
> +	if (!dev)
> +		return OPAL_PARAMETER;
> +
> +	return release_mem_bar(dev);
> +}
> +opal_call(OPAL_NPU_MEM_RELEASE, opal_npu_mem_release, 2);
> diff --git a/hw/phys-map.c b/hw/phys-map.c
> index fe949e4043ff..75836297c2f9 100644
> --- a/hw/phys-map.c
> +++ b/hw/phys-map.c
> @@ -52,6 +52,17 @@ static const struct phys_map_entry phys_map_table_nimbus[] = {
>   	{ GPU_MEM_4T_UP,   2, 0x0000044000000000ull, 0x0000002000000000ull },
>   	{ GPU_MEM_4T_UP,   3, 0x0000046000000000ull, 0x0000002000000000ull },
>   
> +	/*
> +	 * OpenCAPI LPC Memory - single 4TB range per chip, fills
> +	 * whole second non-mirrored region.
> +	 *
> +	 * Longer term, we're going to use chip address extension to
> +	 * enable >4TB to be allocated per chip.  At that point, we
> +	 * may have to find another way of assigning these ranges
> +	 * outside of phys-map.
> +	 */
> +	{ OCAPI_MEM,	   0, 0x0002000000000000ull, 0x0000040000000000ull },
> +

I found some old discussions between Balbir and powerVM about the 
suitable spot for the LPC memory, and it matches that range. Is that 
also your source, or are there some other considerations as well?

   Fred

>   	/* 0 TB offset @ MMIO 0x0006000000000000ull */
>   	{ PHB4_64BIT_MMIO, 0, 0x0006000000000000ull, 0x0000004000000000ull },
>   	{ PHB4_64BIT_MMIO, 1, 0x0006004000000000ull, 0x0000004000000000ull },
> diff --git a/include/npu2-regs.h b/include/npu2-regs.h
> index ba10b8eaf88d..ecf47abf6c96 100644
> --- a/include/npu2-regs.h
> +++ b/include/npu2-regs.h
> @@ -239,6 +239,13 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
>   #define NPU2_CQ_CTL_STATUS			0x090
>   #define   NPU2_CQ_CTL_STATUS_BRK0_AM_FENCED	PPC_BITMASK(48, 49)
>   #define   NPU2_CQ_CTL_STATUS_BRK1_AM_FENCED	PPC_BITMASK(50, 51)
> +#define NPU2_CQ_CTL_MISC_PA0_CONFIG		0x0A0 /* or should that be CS */
> +#define NPU2_CQ_CTL_MISC_PA1_CONFIG		0x0A8 /* or should that be CS */
> +#define   NPU2_CQ_CTL_MISC_PA_CONFIG_MEMSELMATCH PPC_BITMASK(0,2)
> +#define   NPU2_CQ_CTL_MISC_PA_CONFIG_GRANULE	PPC_BIT(3)
> +#define   NPU2_CQ_CTL_MISC_PA_CONFIG_SIZE	PPC_BITMASK(4,7)
> +#define   NPU2_CQ_CTL_MISC_PA_CONFIG_MODE	PPC_BITMASK(8,11)
> +#define   NPU2_CQ_CTL_MISC_PA_CONFIG_MASK	PPC_BITMASK(13,19)
>   #define NPU2_CQ_C_ERR_RPT_MSG0			0x0C0
>   #define NPU2_CQ_C_ERR_RPT_MSG1			0x0C8
>   #define NPU2_CQ_C_ERR_RPT_FIRST0		0x0D0
> diff --git a/include/npu2.h b/include/npu2.h
> index d58aab47bb30..9febf1a343ef 100644
> --- a/include/npu2.h
> +++ b/include/npu2.h
> @@ -193,6 +193,11 @@ struct npu2 {
>   	struct lock	i2c_lock;
>   	uint8_t		i2c_pin_mode;
>   	uint8_t		i2c_pin_wr_state;
> +	/*
> +	 * Which device currently has an LPC allocation.
> +	 * Temporary as long as we only support 1 LPC alloc per chip.
> +	 */
> +	struct npu2_dev	*lpc_mem_allocated;
>   };
>   
>   static inline struct npu2 *phb_to_npu2_nvlink(struct phb *phb)
> diff --git a/include/opal-api.h b/include/opal-api.h
> index e461c9d278c2..b0ad435539fb 100644
> --- a/include/opal-api.h
> +++ b/include/opal-api.h
> @@ -227,8 +227,8 @@
>   #define OPAL_NPU_SET_RELAXED_ORDER		168
>   #define OPAL_NPU_GET_RELAXED_ORDER		169
>   #define OPAL_XIVE_GET_VP_STATE			170 /* Get NVT state */
> -#define OPAL_NPU_RESERVED1			171  /* LPC Allocate */
> -#define OPAL_NPU_RESERVED2			172  /* LPC Release */
> +#define OPAL_NPU_MEM_ALLOC			171
> +#define OPAL_NPU_MEM_RELEASE			172
>   #define OPAL_LAST				172
>   
>   #define QUIESCE_HOLD			1 /* Spin all calls at entry */
> diff --git a/include/phys-map.h b/include/phys-map.h
> index 73adda079e23..0cf48b6628af 100644
> --- a/include/phys-map.h
> +++ b/include/phys-map.h
> @@ -28,6 +28,7 @@ enum phys_map_type {
>   	SYSTEM_MEM,
>   	GPU_MEM_4T_DOWN,
>   	GPU_MEM_4T_UP,
> +	OCAPI_MEM,
>   	PHB4_64BIT_MMIO,
>   	PHB4_32BIT_MMIO,
>   	PHB4_XIVE_ESB,
>