[Skiboot] [PATCH 16/16] [PATCH 16/16] opencapi5: Add support for OpenCAPI Persistent Memory devices.

Christophe Lombard clombard at linux.vnet.ibm.com
Fri Aug 20 19:45:57 AEST 2021


Lowest Point of Coherency (LPC) memory allows the host to access memory on
an OpenCAPI device.

When the P10 chip accesses memory addresses on the AFU, the Real Address
on the PowerBus must hit a BAR in the PAU such as GPU-Memory BAR. The BAR
defines the range of Real Addresses that represent AFU memory.

The two existing OPAL calls, OPAL_NPU_MEM_ALLOC and OPAL_NPU_MEM_RELEASE
are used to manage the AFU momory.

Signed-off-by: Christophe Lombard <clombard at linux.vnet.ibm.com>
---
 hw/npu-opal.c      | 35 +++++++++++++++++
 hw/npu2-opencapi.c | 18 ++-------
 hw/pau.c           | 93 ++++++++++++++++++++++++++++++++++++++++++++++
 hw/phys-map.c      |  3 ++
 include/npu2.h     |  3 ++
 include/pau-regs.h |  8 ++++
 include/pau.h      |  4 ++
 7 files changed, 149 insertions(+), 15 deletions(-)

diff --git a/hw/npu-opal.c b/hw/npu-opal.c
index 50aa8675..0f0b7bbe 100644
--- a/hw/npu-opal.c
+++ b/hw/npu-opal.c
@@ -252,3 +252,38 @@ static int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn,
 	return rc;
 }
 opal_call(OPAL_NPU_TL_SET, opal_npu_tl_set, 5);
+
+static int64_t opal_npu_mem_alloc(uint64_t phb_id, uint32_t bdfn,
+				  uint64_t size, uint64_t *bar)
+{
+	struct phb *phb = pci_get_phb(phb_id);
+	int64_t rc = OPAL_SUCCESS;
+
+	if (!phb)
+		return OPAL_PARAMETER;
+
+	if (phb->phb_type == phb_type_npu_v2_opencapi)
+		rc = npu2_opencapi_mem_alloc(phb, bdfn, size, bar);
+	else if (phb->phb_type == phb_type_pau_opencapi)
+		rc = pau_opencapi_mem_alloc(phb, bdfn, size, bar);
+
+	return rc;
+}
+opal_call(OPAL_NPU_MEM_ALLOC, opal_npu_mem_alloc, 4);
+
+static int64_t opal_npu_mem_release(uint64_t phb_id, uint32_t bdfn)
+{
+	struct phb *phb = pci_get_phb(phb_id);
+	int64_t rc = OPAL_SUCCESS;
+
+	if (!phb)
+		return OPAL_PARAMETER;
+
+	if (phb->phb_type == phb_type_npu_v2_opencapi)
+		rc = npu2_opencapi_mem_release(phb, bdfn);
+	else if (phb->phb_type == phb_type_pau_opencapi)
+		rc = pau_opencapi_mem_release(phb, bdfn);
+
+	return rc;
+}
+opal_call(OPAL_NPU_MEM_RELEASE, opal_npu_mem_release, 2);
diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
index 686f2e22..5a0d060e 100644
--- a/hw/npu2-opencapi.c
+++ b/hw/npu2-opencapi.c
@@ -2300,18 +2300,13 @@ out:
 	return rc;
 }
 
-static int64_t opal_npu_mem_alloc(uint64_t phb_id, uint32_t __unused bdfn,
-				  uint64_t size, __be64 *__bar)
+int64_t npu2_opencapi_mem_alloc(struct phb *phb, uint32_t __unused bdfn,
+				uint64_t size, uint64_t *__bar)
 {
-	struct phb *phb = pci_get_phb(phb_id);
 	struct npu2_dev *dev;
 	uint64_t bar;
 	int64_t rc;
 
-
-	if (!phb || phb->phb_type != phb_type_npu_v2_opencapi)
-		return OPAL_PARAMETER;
-
 	dev = phb_to_npu2_dev_ocapi(phb);
 	if (!dev)
 		return OPAL_PARAMETER;
@@ -2325,21 +2320,14 @@ static int64_t opal_npu_mem_alloc(uint64_t phb_id, uint32_t __unused bdfn,
 
 	return rc;
 }
-opal_call(OPAL_NPU_MEM_ALLOC, opal_npu_mem_alloc, 4);
 
-static int64_t opal_npu_mem_release(uint64_t phb_id, uint32_t __unused bdfn)
+int64_t npu2_opencapi_mem_release(struct phb *phb, uint32_t __unused bdfn)
 {
-	struct phb *phb = pci_get_phb(phb_id);
 	struct npu2_dev *dev;
 
-
-	if (!phb || phb->phb_type != phb_type_npu_v2_opencapi)
-		return OPAL_PARAMETER;
-
 	dev = phb_to_npu2_dev_ocapi(phb);
 	if (!dev)
 		return OPAL_PARAMETER;
 
 	return release_mem_bar(dev);
 }
-opal_call(OPAL_NPU_MEM_RELEASE, opal_npu_mem_release, 2);
diff --git a/hw/pau.c b/hw/pau.c
index 1d11aeac..874cf85a 100644
--- a/hw/pau.c
+++ b/hw/pau.c
@@ -453,6 +453,99 @@ int64_t pau_opencapi_tl_set(struct phb *phb, uint32_t __unused bdfn,
 	return OPAL_SUCCESS;
 }
 
+static int64_t pau_opencapi_afu_memory_bars(struct pau_dev *dev,
+					    uint64_t size,
+					    uint64_t *bar)
+{
+	struct pau *pau = dev->pau;
+	uint64_t addr, psize;
+	uint64_t reg, val;
+
+	PAUDEVDBG(dev, "Setup AFU Memory BARs\n");
+
+	if (dev->memory_bar.enable) {
+		PAUDEVERR(dev, "AFU memory allocation failed - BAR already in use\n");
+		return OPAL_RESOURCE;
+	}
+
+	phys_map_get(pau->chip_id, OCAPI_MEM,
+		     dev->index,
+		     &addr, &psize);
+
+	if (size > psize) {
+		PAUDEVERR(dev, "Invalid AFU memory BAR allocation size "
+			       "requested: 0x%llx bytes (limit 0x%llx)\n",
+			  size, psize);
+		return OPAL_PARAMETER;
+	}
+
+	if (size < (1 << 30))
+		size = 1 << 30;
+
+	dev->memory_bar.enable = true;
+	dev->memory_bar.addr = addr;
+	dev->memory_bar.size = size;
+
+	reg = PAU_GPU_MEM_BAR(dev->index);
+	val = PAU_GPU_MEM_BAR_ENABLE |
+	      PAU_GPU_MEM_BAR_POISON;
+	val = SETFIELD(PAU_GPU_MEM_BAR_ADDR, val, addr >> 30);
+	if (!is_pow2(size))
+		size = 1ull << (ilog2(size) + 1);
+
+	size = (size >> 30) - 1;
+	val = SETFIELD(PAU_GPU_MEM_BAR_SIZE, val, size);
+	pau_write(pau, reg, val);
+
+	reg = PAU_CTL_MISC_GPU_MEM_BAR(dev->index);
+	pau_write(pau, reg, val);
+
+	reg = PAU_XSL_GPU_MEM_BAR(dev->index);
+	pau_write(pau, reg, val);
+
+	*bar = addr;
+	return OPAL_SUCCESS;
+}
+
+int64_t pau_opencapi_mem_alloc(struct phb *phb, uint32_t __unused bdfn,
+			       uint64_t size, uint64_t *bar)
+{
+	struct pau_dev *dev = pau_phb_to_opencapi_dev(phb);
+	int64_t rc;
+
+	if (!dev)
+		return OPAL_PARAMETER;
+
+	if (!opal_addr_valid(bar))
+		return OPAL_PARAMETER;
+
+	lock(&dev->pau->lock);
+	rc = pau_opencapi_afu_memory_bars(dev, size, bar);
+
+	unlock(&dev->pau->lock);
+	return rc;
+}
+
+int64_t pau_opencapi_mem_release(struct phb *phb, uint32_t __unused bdfn)
+{
+	struct pau_dev *dev = pau_phb_to_opencapi_dev(phb);
+
+	if (!dev)
+		return OPAL_PARAMETER;
+
+	lock(&dev->pau->lock);
+	pau_write(dev->pau, PAU_GPU_MEM_BAR(dev->index), 0ull);
+	pau_write(dev->pau, PAU_CTL_MISC_GPU_MEM_BAR(dev->index), 0ull);
+	pau_write(dev->pau, PAU_XSL_GPU_MEM_BAR(dev->index), 0ull);
+
+	dev->memory_bar.enable = false;
+	dev->memory_bar.addr = 0ull;
+	dev->memory_bar.size = 0ull;
+	unlock(&dev->pau->lock);
+
+	return OPAL_SUCCESS;
+}
+
 #define CQ_CTL_STATUS_TIMEOUT  10 /* milliseconds */
 
 static int pau_opencapi_set_fence_control(struct pau_dev *dev,
diff --git a/hw/phys-map.c b/hw/phys-map.c
index 7b44fc61..68d7cd0d 100644
--- a/hw/phys-map.c
+++ b/hw/phys-map.c
@@ -32,6 +32,9 @@ static const struct phys_map_entry phys_map_table_p10[] = {
 
 	/* TODO: Figure out GPU memory */
 
+	{ OCAPI_MEM,	   0, 0x0002000000000000ull, 0x0000040000000000ull },
+	{ OCAPI_MEM,	   1, 0x0002004000000000ull, 0x0000040000000000ull },
+
 	/* 0 TB offset @ MMIO 0x0006000000000000ull */
 	{ PHB5_64BIT_MMIO, 0, 0x0006000000000000ull, 0x0000004000000000ull },
 	{ PHB5_64BIT_MMIO, 1, 0x0006004000000000ull, 0x0000004000000000ull },
diff --git a/include/npu2.h b/include/npu2.h
index abe88747..c24861ab 100644
--- a/include/npu2.h
+++ b/include/npu2.h
@@ -277,5 +277,8 @@ int64_t npu2_opencapi_spa_clear_cache(struct phb *phb, uint32_t __unused bdfn,
 				      uint64_t PE_handle);
 int64_t npu2_opencapi_tl_set(struct phb *phb, uint32_t __unused bdfn,
 		    long capabilities, uint64_t rate_phys, int rate_sz);
+int64_t npu2_opencapi_mem_alloc(struct phb *phb, uint32_t __unused bdfn,
+				uint64_t size, uint64_t *bar);
+int64_t npu2_opencapi_mem_release(struct phb *phb, uint32_t __unused bdfn);
 
 #endif /* __NPU2_H */
diff --git a/include/pau-regs.h b/include/pau-regs.h
index da83ad44..45c36037 100644
--- a/include/pau-regs.h
+++ b/include/pau-regs.h
@@ -64,6 +64,12 @@
 #define   PAU_SNP_MISC_CFG0_ENABLE_PBUS		PPC_BIT(2)
 #define   PAU_SNP_MISC_CFG0_OCAPI_MODE		PPC_BITMASK(32, 36)
 #define   PAU_SNP_MISC_CFG0_OCAPI_C2		PPC_BITMASK(45, 49)
+#define PAU_GPU_MEM_BAR(brk)			(PAU_BLOCK_CQ_SM(0) + 0x190 + (brk) * 8)
+#define   PAU_GPU_MEM_BAR_ENABLE		PPC_BIT(0)
+#define   PAU_GPU_MEM_BAR_ADDR_MASK		PPC_BITMASK(1, 35)
+#define     PAU_GPU_MEM_BAR_ADDR		PPC_BITMASK(1, 21)
+#define     PAU_GPU_MEM_BAR_SIZE		PPC_BITMASK(22, 35)
+#define   PAU_GPU_MEM_BAR_POISON		PPC_BIT(45)
 #define PAU_NTL_BAR(brk)			(PAU_BLOCK_CQ_SM(0) + 0x1b8 + (brk) * 8)
 #define   PAU_NTL_BAR_ENABLE			PPC_BIT(0)
 #define   PAU_NTL_BAR_ADDR			PPC_BITMASK(3, 35)
@@ -88,6 +94,7 @@
 #define   PAU_CTL_MISC_CFG2_OCAPI_MEM_OS_BIT	PPC_BITMASK(25, 29)
 #define PAU_CTL_MISC_STATUS(brk)		(PAU_BLOCK_CQ_CTL + 0x060 + (brk) * 8)
 #define   PAU_CTL_MISC_STATUS_AM_FENCED(brk)	(PPC_BITMASK(41, 42) << ((brk)*32))
+#define PAU_CTL_MISC_GPU_MEM_BAR(brk)		(PAU_BLOCK_CQ_CTL + 0x070 + (brk) * 8)
 #define PAU_CTL_MISC_MMIOPA_CONFIG(brk)		(PAU_BLOCK_CQ_CTL + 0x098 + (brk) * 8)
 #define   PAU_CTL_MISC_MMIOPA_CONFIG_BAR_ADDR	PPC_BITMASK(1, 35)
 #define   PAU_CTL_MISC_MMIOPA_CONFIG_BAR_SIZE	PPC_BITMASK(39, 43)
@@ -159,6 +166,7 @@
 /* XSL block registers */
 #define PAU_XSL_OSL_SPAP_AN(brk)		(PAU_BLOCK_XSL + 0x000 + (brk) * 8)
 #define   PAU_XSL_OSL_SPAP_AN_EN		PPC_BIT(63)
+#define PAU_XSL_GPU_MEM_BAR(brk)		(PAU_BLOCK_XSL + 0x0D0 + (brk) * 8)
 #define PAU_XSL_WRAP_CFG			(PAU_BLOCK_XSL + 0x100)
 #define   PAU_XSL_WRAP_CFG_CLOCK_ENABLE		PPC_BIT(0)
 #define PAU_XSL_OSL_XLATE_CFG(brk)		(PAU_BLOCK_XSL + 0x040 + (brk) * 8)
diff --git a/include/pau.h b/include/pau.h
index 9b612fc2..c601f7ea 100644
--- a/include/pau.h
+++ b/include/pau.h
@@ -45,6 +45,7 @@ struct pau_dev {
 
 	struct pau_bar		ntl_bar;
 	struct pau_bar		genid_bar;
+	struct pau_bar		memory_bar;
 
 	/* Associated I2C information */
 	uint8_t			i2c_bus_id;
@@ -210,6 +211,9 @@ int64_t pau_opencapi_spa_clear_cache(struct phb *phb,
 int64_t pau_opencapi_tl_set(struct phb *phb, uint32_t __unused bdfn,
 			    long capabilities, uint64_t rate_phys,
 			    int rate_sz);
+int64_t pau_opencapi_mem_alloc(struct phb *phb, uint32_t __unused bdfn,
+				uint64_t size, uint64_t *bar);
+int64_t pau_opencapi_mem_release(struct phb *phb, uint32_t __unused bdfn);
 
 
 /* PHY */
-- 
2.31.1



More information about the Skiboot mailing list