[Skiboot] [PATCH] phb4, doc: Make GPU-Direct bandwidth optimizations Witherspoon specific

Vaibhav Jain vaibhav at linux.ibm.com
Sat Jul 28 18:17:50 AEST 2018


Patch 5690c5a8980f("phb4: Reallocate PEC2 DMA-Read engines to improve
GPU-Direct bandwidth") introduced allocation of extra DMA-read engines
for improving Mellanox CX5 GPU-Direct bandwidth. At present CX5 is the
only card thats using these optimizations so these changes will only
impact Witherspoon systems.

However hardware team has raised the possibility of other
non-witherspoon systems in future that may be using a similar card,
where these optimizations wont be needed. So they have asked us to
make these changes Witherspoon specific.

Hence this patch updates the phb4_init_capp_regs() &
enable_capi_mode() to configure the extra DMA-read engine allocation
if and only if skiboot is running on Witherspoon platform.

Cc: stable #5.0.6+
Fixes: 5690c5a8980f("phb4: Reallocate PEC2 DMA-Read engines to improve
GPU-Direct bandwidth")
Signed-off-by: Vaibhav Jain <vaibhav at linux.ibm.com>
---
 .../opal-pci-set-phb-capi-mode-93.rst         |  8 ++--
 hw/phb4.c                                     | 47 ++++++++++++-------
 2 files changed, 35 insertions(+), 20 deletions(-)

diff --git a/doc/opal-api/opal-pci-set-phb-capi-mode-93.rst b/doc/opal-api/opal-pci-set-phb-capi-mode-93.rst
index 6a8d2be8..09ecd69c 100644
--- a/doc/opal-api/opal-pci-set-phb-capi-mode-93.rst
+++ b/doc/opal-api/opal-pci-set-phb-capi-mode-93.rst
@@ -44,10 +44,10 @@ CAPP-PSL transactions.
 
 Notes:
 -----
-* If PHB is in PEC2 then requesting mode `OPAL_PHB_CAPI_MODE_DMA_TVT1` will
-  allocate extra 16/8 dma read engines to the PHB depending on its stack
-  (stack 0/ stack 1). This is needed to improve the Direct-GPU DMA read
-  performance for the Mellanox CX5 card.
+* On a Witherspoon system if PHB is in PEC2 then requesting mode
+  `OPAL_PHB_CAPI_MODE_DMA_TVT1` will allocate extra 16/8 dma read engines to the
+  PHB depending on its stack (stack 0/ stack 1). This is needed to improve the
+  Direct-GPU DMA read performance for the Mellanox CX5 card.
 * Mode `OPAL_PHB_CAPI_MODE_PCIE` not yet supported on Power-9.
 * Requesting mode `OPAL_PHB_CAPI_MODE_CAPI` on Power-9 will disable fast-reboot.
 * Modes `OPAL_PHB_CAPI_MODE_DMA`, `OPAL_PHB_CAPI_MODE_SNOOP_OFF` are
diff --git a/hw/phb4.c b/hw/phb4.c
index a3aa8b80..ee238109 100644
--- a/hw/phb4.c
+++ b/hw/phb4.c
@@ -148,6 +148,9 @@ static void phb4_init_hw(struct phb4 *p);
 #define PHB4_CAN_STORE_EOI(p) \
 	(XIVE_STORE_EOI_ENABLED && ((p)->rev >= PHB4_REV_NIMBUS_DD20))
 
+/* Are we running on a Witherspoon system */
+#define IS_WITHERSPOON() (strcmp(platform.name, "Witherspoon") == 0)
+
 static bool verbose_eeh;
 static bool pci_tracing;
 static bool pci_eeh_mmio;
@@ -3937,24 +3940,29 @@ static void phb4_init_capp_regs(struct phb4 *p, uint32_t capp_eng)
 			    0xDCE0280428000000);
 	}
 
-	/* capp owns PHB read buffers */
-	if (p->index == CAPP0_PHB_INDEX) {
+
+	/* assigned capp owned PHB read buffers */
+	reg = 0;
+	if (capp_eng & CAPP_MAX_DMA_READ_ENGINES) {
+		/* In case of Mellanox CX5 card on witherspoon assign
+		 * just 4 phb read buffers to CAPP. On other systems allocate
+		 * 8 read phb read buffers
+		 */
+		reg = IS_WITHERSPOON() ? 0xF000000000000000 : /*4 Read buffers*/
+			0xFF00000000000000; /*8 PHB Read buffers*/
+
+	} else	if (p->index == CAPP0_PHB_INDEX) {
 		/* max PHB read buffers 0-47 */
 		reg = 0xFFFFFFFFFFFF0000;
-		if (capp_eng & CAPP_MAX_DMA_READ_ENGINES)
-			reg = 0xF000000000000000;
-		xscom_write(p->chip_id, APC_FSM_READ_MASK + offset, reg);
-		xscom_write(p->chip_id, XPT_FSM_RMM + offset, reg);
-	}
-	if (p->index == CAPP1_PHB_INDEX) {
+
+	} else if (p->index == CAPP1_PHB_INDEX) {
 		/* Set 30 Read machines for CAPP Minus 20-27 for DMA */
 		reg = 0xFFFFF00E00000000;
-		if (capp_eng & CAPP_MAX_DMA_READ_ENGINES)
-			reg = 0xF000000000000000;
-		xscom_write(p->chip_id, APC_FSM_READ_MASK + offset, reg);
-		xscom_write(p->chip_id, XPT_FSM_RMM + offset, reg);
 	}
 
+	xscom_write(p->chip_id, APC_FSM_READ_MASK + offset, reg);
+	xscom_write(p->chip_id, XPT_FSM_RMM + offset, reg);
+
 	/* CAPP FIR Action 0 */
 	xscom_write(p->chip_id, CAPP_FIR_ACTION0 + offset, 0x0b1c000104060000);
 
@@ -4111,8 +4119,13 @@ static int64_t enable_capi_mode(struct phb4 *p, uint64_t pe_number,
 	/* CAPP Control Register. Enable CAPP Mode */
 	reg = 0x8000000000000000ULL; /* PEC works in CAPP Mode */
 	reg |= stq_eng;
-	if (capp_eng & CAPP_MAX_DMA_READ_ENGINES)
-		dma_eng = 0x0000F00000000000ULL; /* 4 CAPP Read machines */
+	if (capp_eng & CAPP_MAX_DMA_READ_ENGINES) {
+		/* For Mellanox CX5 running on witherspoon allocate 4 CAPP read
+		 * machines. On other systems allocate 8 CAPP Read machines
+		 */
+		dma_eng = IS_WITHERSPOON() ? 0x0000F00000000000ULL :
+			0x0000FF0000000000ULL;
+	}
 	reg |= dma_eng;
 	xscom_write(p->chip_id, p->pe_xscom + XPEC_NEST_CAPP_CNTL, reg);
 
@@ -4120,9 +4133,11 @@ static int64_t enable_capi_mode(struct phb4 *p, uint64_t pe_number,
 	 * x8+x8 (bifurcated) or x8+x4+x4 (trifurcated) mode. When
 	 * Mellanox CX5 card is attached to stack0 of this PEC, indicated by
 	 * request to allocate CAPP_MAX_DMA_READ_ENGINES; we tweak the default
-	 * dma-read engines allocations to maximize the DMA read performance
+	 * dma-read engines allocations to maximize the DMA read performance.
+	 * Do this only on a witherspoon system.
 	 */
-	if ((p->index == CAPP1_PHB_INDEX) &&
+	if (IS_WITHERSPOON() &&
+	    (p->index == CAPP1_PHB_INDEX) &&
 	    (capp_eng & CAPP_MAX_DMA_READ_ENGINES)) {
 
 		/*
-- 
2.17.1



More information about the Skiboot mailing list