[Skiboot] [PATCH V2 15/15] pau: Add support for OpenCAPI Persistent Memory devices.

Frederic Barrat fbarrat at linux.ibm.com
Tue Oct 12 21:52:08 AEDT 2021



On 23/09/2021 11:03, Christophe Lombard wrote:
> Lowest Point of Coherency (LPC) memory allows the host to access memory on
> an OpenCAPI device.
> 
> When the P10 chip accesses memory addresses on the AFU, the Real Address
> on the PowerBus must hit a BAR in the PAU such as GPU-Memory BAR. The BAR
> defines the range of Real Addresses that represent AFU memory.
> 
> The two existing OPAL calls, OPAL_NPU_MEM_ALLOC and OPAL_NPU_MEM_RELEASE
> are used to manage the AFU momory.
> 
> Signed-off-by: Christophe Lombard <clombard at linux.vnet.ibm.com>
> ---


[ I only sent it to Christophe before, sorry ]

Reviewed-by: Frederic Barrat <fbarrat at linux.ibm.com>


>   hw/npu-opal.c      | 35 +++++++++++++++++
>   hw/npu2-opencapi.c | 18 ++-------
>   hw/pau.c           | 93 ++++++++++++++++++++++++++++++++++++++++++++++
>   hw/phys-map.c      |  7 +++-
>   include/npu2.h     |  3 ++
>   include/pau-regs.h |  8 ++++
>   include/pau.h      |  4 ++
>   7 files changed, 152 insertions(+), 16 deletions(-)
> 
> diff --git a/hw/npu-opal.c b/hw/npu-opal.c
> index 2e455dc9..1b66857f 100644
> --- a/hw/npu-opal.c
> +++ b/hw/npu-opal.c
> @@ -239,3 +239,38 @@ static int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn,
>   	return OPAL_PARAMETER;
>   }
>   opal_call(OPAL_NPU_TL_SET, opal_npu_tl_set, 5);
> +
> +static int64_t opal_npu_mem_alloc(uint64_t phb_id, uint32_t bdfn,
> +				  uint64_t size, uint64_t *bar)
> +{
> +	struct phb *phb = pci_get_phb(phb_id);
> +
> +	if (!phb)
> +		return OPAL_PARAMETER;
> +
> +	if (phb->phb_type == phb_type_npu_v2_opencapi)
> +		return npu2_opencapi_mem_alloc(phb, bdfn, size, bar);
> +
> +	if (phb->phb_type == phb_type_pau_opencapi)
> +		return pau_opencapi_mem_alloc(phb, bdfn, size, bar);
> +
> +	return OPAL_PARAMETER;
> +}
> +opal_call(OPAL_NPU_MEM_ALLOC, opal_npu_mem_alloc, 4);
> +
> +static int64_t opal_npu_mem_release(uint64_t phb_id, uint32_t bdfn)
> +{
> +	struct phb *phb = pci_get_phb(phb_id);;
> +
> +	if (!phb)
> +		return OPAL_PARAMETER;
> +
> +	if (phb->phb_type == phb_type_npu_v2_opencapi)
> +		return npu2_opencapi_mem_release(phb, bdfn);
> +
> +	if (phb->phb_type == phb_type_pau_opencapi)
> +		return pau_opencapi_mem_release(phb, bdfn);
> +
> +	return OPAL_PARAMETER;
> +}
> +opal_call(OPAL_NPU_MEM_RELEASE, opal_npu_mem_release, 2);
> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
> index 272f924b..8e7bcca9 100644
> --- a/hw/npu2-opencapi.c
> +++ b/hw/npu2-opencapi.c
> @@ -2295,18 +2295,13 @@ out:
>   	return rc;
>   }
>   
> -static int64_t opal_npu_mem_alloc(uint64_t phb_id, uint32_t __unused bdfn,
> -				  uint64_t size, __be64 *__bar)
> +int64_t npu2_opencapi_mem_alloc(struct phb *phb, uint32_t __unused bdfn,
> +				uint64_t size, uint64_t *__bar)
>   {
> -	struct phb *phb = pci_get_phb(phb_id);
>   	struct npu2_dev *dev;
>   	uint64_t bar;
>   	int64_t rc;
>   
> -
> -	if (!phb || phb->phb_type != phb_type_npu_v2_opencapi)
> -		return OPAL_PARAMETER;
> -
>   	dev = phb_to_npu2_dev_ocapi(phb);
>   	if (!dev)
>   		return OPAL_PARAMETER;
> @@ -2320,21 +2315,14 @@ static int64_t opal_npu_mem_alloc(uint64_t phb_id, uint32_t __unused bdfn,
>   
>   	return rc;
>   }
> -opal_call(OPAL_NPU_MEM_ALLOC, opal_npu_mem_alloc, 4);
>   
> -static int64_t opal_npu_mem_release(uint64_t phb_id, uint32_t __unused bdfn)
> +int64_t npu2_opencapi_mem_release(struct phb *phb, uint32_t __unused bdfn)
>   {
> -	struct phb *phb = pci_get_phb(phb_id);
>   	struct npu2_dev *dev;
>   
> -
> -	if (!phb || phb->phb_type != phb_type_npu_v2_opencapi)
> -		return OPAL_PARAMETER;
> -
>   	dev = phb_to_npu2_dev_ocapi(phb);
>   	if (!dev)
>   		return OPAL_PARAMETER;
>   
>   	return release_mem_bar(dev);
>   }
> -opal_call(OPAL_NPU_MEM_RELEASE, opal_npu_mem_release, 2);
> diff --git a/hw/pau.c b/hw/pau.c
> index 75dacaff..26813371 100644
> --- a/hw/pau.c
> +++ b/hw/pau.c
> @@ -443,6 +443,99 @@ int64_t pau_opencapi_tl_set(struct phb *phb, uint32_t __unused bdfn,
>   	return OPAL_SUCCESS;
>   }
>   
> +static int64_t pau_opencapi_afu_memory_bars(struct pau_dev *dev,
> +					    uint64_t size,
> +					    uint64_t *bar)
> +{
> +	struct pau *pau = dev->pau;
> +	uint64_t addr, psize;
> +	uint64_t reg, val;
> +
> +	PAUDEVDBG(dev, "Setup AFU Memory BARs\n");
> +
> +	if (dev->memory_bar.enable) {
> +		PAUDEVERR(dev, "AFU memory allocation failed - BAR already in use\n");
> +		return OPAL_RESOURCE;
> +	}
> +
> +	phys_map_get(pau->chip_id, OCAPI_MEM,
> +		     dev->index,
> +		     &addr, &psize);
> +
> +	if (size > psize) {
> +		PAUDEVERR(dev, "Invalid AFU memory BAR allocation size "
> +			       "requested: 0x%llx bytes (limit 0x%llx)\n",
> +			  size, psize);
> +		return OPAL_PARAMETER;
> +	}
> +
> +	if (size < (1 << 30))
> +		size = 1 << 30;
> +
> +	dev->memory_bar.enable = true;
> +	dev->memory_bar.addr = addr;
> +	dev->memory_bar.size = size;
> +
> +	reg = PAU_GPU_MEM_BAR(dev->index);
> +	val = PAU_GPU_MEM_BAR_ENABLE |
> +	      PAU_GPU_MEM_BAR_POISON;
> +	val = SETFIELD(PAU_GPU_MEM_BAR_ADDR, val, addr >> 30);
> +	if (!is_pow2(size))
> +		size = 1ull << (ilog2(size) + 1);
> +
> +	size = (size >> 30) - 1;
> +	val = SETFIELD(PAU_GPU_MEM_BAR_SIZE, val, size);
> +	pau_write(pau, reg, val);
> +
> +	reg = PAU_CTL_MISC_GPU_MEM_BAR(dev->index);
> +	pau_write(pau, reg, val);
> +
> +	reg = PAU_XSL_GPU_MEM_BAR(dev->index);
> +	pau_write(pau, reg, val);
> +
> +	*bar = addr;
> +	return OPAL_SUCCESS;
> +}
> +
> +int64_t pau_opencapi_mem_alloc(struct phb *phb, uint32_t __unused bdfn,
> +			       uint64_t size, uint64_t *bar)
> +{
> +	struct pau_dev *dev = pau_phb_to_opencapi_dev(phb);
> +	int64_t rc;
> +
> +	if (!dev)
> +		return OPAL_PARAMETER;
> +
> +	if (!opal_addr_valid(bar))
> +		return OPAL_PARAMETER;
> +
> +	lock(&dev->pau->lock);
> +	rc = pau_opencapi_afu_memory_bars(dev, size, bar);
> +
> +	unlock(&dev->pau->lock);
> +	return rc;
> +}
> +
> +int64_t pau_opencapi_mem_release(struct phb *phb, uint32_t __unused bdfn)
> +{
> +	struct pau_dev *dev = pau_phb_to_opencapi_dev(phb);
> +
> +	if (!dev)
> +		return OPAL_PARAMETER;
> +
> +	lock(&dev->pau->lock);
> +	pau_write(dev->pau, PAU_GPU_MEM_BAR(dev->index), 0ull);
> +	pau_write(dev->pau, PAU_CTL_MISC_GPU_MEM_BAR(dev->index), 0ull);
> +	pau_write(dev->pau, PAU_XSL_GPU_MEM_BAR(dev->index), 0ull);
> +
> +	dev->memory_bar.enable = false;
> +	dev->memory_bar.addr = 0ull;
> +	dev->memory_bar.size = 0ull;
> +	unlock(&dev->pau->lock);
> +
> +	return OPAL_SUCCESS;
> +}
> +
>   #define CQ_CTL_STATUS_TIMEOUT  10 /* milliseconds */
>   
>   static int pau_opencapi_set_fence_control(struct pau_dev *dev,
> diff --git a/hw/phys-map.c b/hw/phys-map.c
> index 7b44fc61..1e9d9255 100644
> --- a/hw/phys-map.c
> +++ b/hw/phys-map.c
> @@ -30,7 +30,12 @@ static const struct phys_map_entry phys_map_table_p10[] = {
>   	/* System memory upto 4TB minus GPU memory */
>   	{ SYSTEM_MEM,      0, 0x0000000000000000ull, 0x0000034000000000ull },
>   
> -	/* TODO: Figure out GPU memory */
> +	/* Configure OpenCapi memory as before with the old chips.
> +	 * Keep in mind that we would need to rework this by allocating
> +	 * a new topology id.
> +	 */
> +	{ OCAPI_MEM,	   0, 0x0002000000000000ull, 0x0000040000000000ull },
> +	{ OCAPI_MEM,	   1, 0x0002040000000000ull, 0x0000040000000000ull },
>   
>   	/* 0 TB offset @ MMIO 0x0006000000000000ull */
>   	{ PHB5_64BIT_MMIO, 0, 0x0006000000000000ull, 0x0000004000000000ull },
> diff --git a/include/npu2.h b/include/npu2.h
> index a12bf98a..b302108b 100644
> --- a/include/npu2.h
> +++ b/include/npu2.h
> @@ -277,5 +277,8 @@ int64_t npu2_opencapi_spa_clear_cache(struct phb *phb, uint32_t __unused bdfn,
>   				      uint64_t PE_handle);
>   int64_t npu2_opencapi_tl_set(struct phb *phb, uint32_t __unused bdfn,
>   		    long capabilities, char *rate);
> +int64_t npu2_opencapi_mem_alloc(struct phb *phb, uint32_t __unused bdfn,
> +				uint64_t size, uint64_t *bar);
> +int64_t npu2_opencapi_mem_release(struct phb *phb, uint32_t __unused bdfn);
>   
>   #endif /* __NPU2_H */
> diff --git a/include/pau-regs.h b/include/pau-regs.h
> index da83ad44..45c36037 100644
> --- a/include/pau-regs.h
> +++ b/include/pau-regs.h
> @@ -64,6 +64,12 @@
>   #define   PAU_SNP_MISC_CFG0_ENABLE_PBUS		PPC_BIT(2)
>   #define   PAU_SNP_MISC_CFG0_OCAPI_MODE		PPC_BITMASK(32, 36)
>   #define   PAU_SNP_MISC_CFG0_OCAPI_C2		PPC_BITMASK(45, 49)
> +#define PAU_GPU_MEM_BAR(brk)			(PAU_BLOCK_CQ_SM(0) + 0x190 + (brk) * 8)
> +#define   PAU_GPU_MEM_BAR_ENABLE		PPC_BIT(0)
> +#define   PAU_GPU_MEM_BAR_ADDR_MASK		PPC_BITMASK(1, 35)
> +#define     PAU_GPU_MEM_BAR_ADDR		PPC_BITMASK(1, 21)
> +#define     PAU_GPU_MEM_BAR_SIZE		PPC_BITMASK(22, 35)
> +#define   PAU_GPU_MEM_BAR_POISON		PPC_BIT(45)
>   #define PAU_NTL_BAR(brk)			(PAU_BLOCK_CQ_SM(0) + 0x1b8 + (brk) * 8)
>   #define   PAU_NTL_BAR_ENABLE			PPC_BIT(0)
>   #define   PAU_NTL_BAR_ADDR			PPC_BITMASK(3, 35)
> @@ -88,6 +94,7 @@
>   #define   PAU_CTL_MISC_CFG2_OCAPI_MEM_OS_BIT	PPC_BITMASK(25, 29)
>   #define PAU_CTL_MISC_STATUS(brk)		(PAU_BLOCK_CQ_CTL + 0x060 + (brk) * 8)
>   #define   PAU_CTL_MISC_STATUS_AM_FENCED(brk)	(PPC_BITMASK(41, 42) << ((brk)*32))
> +#define PAU_CTL_MISC_GPU_MEM_BAR(brk)		(PAU_BLOCK_CQ_CTL + 0x070 + (brk) * 8)
>   #define PAU_CTL_MISC_MMIOPA_CONFIG(brk)		(PAU_BLOCK_CQ_CTL + 0x098 + (brk) * 8)
>   #define   PAU_CTL_MISC_MMIOPA_CONFIG_BAR_ADDR	PPC_BITMASK(1, 35)
>   #define   PAU_CTL_MISC_MMIOPA_CONFIG_BAR_SIZE	PPC_BITMASK(39, 43)
> @@ -159,6 +166,7 @@
>   /* XSL block registers */
>   #define PAU_XSL_OSL_SPAP_AN(brk)		(PAU_BLOCK_XSL + 0x000 + (brk) * 8)
>   #define   PAU_XSL_OSL_SPAP_AN_EN		PPC_BIT(63)
> +#define PAU_XSL_GPU_MEM_BAR(brk)		(PAU_BLOCK_XSL + 0x0D0 + (brk) * 8)
>   #define PAU_XSL_WRAP_CFG			(PAU_BLOCK_XSL + 0x100)
>   #define   PAU_XSL_WRAP_CFG_CLOCK_ENABLE		PPC_BIT(0)
>   #define PAU_XSL_OSL_XLATE_CFG(brk)		(PAU_BLOCK_XSL + 0x040 + (brk) * 8)
> diff --git a/include/pau.h b/include/pau.h
> index a70058f2..4a6087cb 100644
> --- a/include/pau.h
> +++ b/include/pau.h
> @@ -45,6 +45,7 @@ struct pau_dev {
>   
>   	struct pau_bar		ntl_bar;
>   	struct pau_bar		genid_bar;
> +	struct pau_bar		memory_bar;
>   
>   	/* Associated I2C information */
>   	uint8_t			i2c_bus_id;
> @@ -209,6 +210,9 @@ int64_t pau_opencapi_spa_clear_cache(struct phb *phb,
>   				     uint64_t PE_handle);
>   int64_t pau_opencapi_tl_set(struct phb *phb, uint32_t __unused bdfn,
>   			    long capabilities, char *rate_buf);
> +int64_t pau_opencapi_mem_alloc(struct phb *phb, uint32_t __unused bdfn,
> +			       uint64_t size, uint64_t *bar);
> +int64_t pau_opencapi_mem_release(struct phb *phb, uint32_t __unused bdfn);
>   
>   /* PHY */
>   int pau_dev_phy_reset(struct pau_dev *dev);
> 


More information about the Skiboot mailing list