[Skiboot] [PATCH skiboot v2] npu2: Disable Probe.I.MO snarfing by default

Fri Apr 26 14:31:15 AEST 2019

V100 GPUs are known to violate NVLink2 protocol in some cases (one is when
memory was accessed by the CPU and they by GPU using so called block
linear mapping) and issue double probes to NPU which can still handle this
but only if CONFIG_ENABLE_SNARF_CPM is not set in the CQ_SM Misc Config
register #0. If the bit is set (which is the case today), NPU issues
the machine check stop.

The snarfing feature is designed to detect 2 probes in flight and combine
them into one.

This adds a new "opal-npu2-snarf-cpm" nvram variable which controls
CONFIG_ENABLE_SNARF_CPM for all NVLinks to prevent the machine check
stop from happening. By default snarfing is disabled. In order to enable
it as there might be a small unconfirmed performance impact, the user has
to run:
sudo nvram -p ibm,skiboot --update-config opal-npu2-snarf-cpm=enable
and reboot the host system.

While at this, define macros for register names as well to avoid touching
same lines over and over again.

Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
---
Changes:
v2:
* disable sharfing by default
* add a comment block
---
 include/npu2-regs.h | 14 ++++++++++++
 hw/npu2.c           | 52 +++++++++++++++++++++++++++++++++------------
 2 files changed, 53 insertions(+), 13 deletions(-)

diff --git a/include/npu2-regs.h b/include/npu2-regs.h
index ba10b8eaf88d..61e8ea8615f8 100644
--- a/include/npu2-regs.h
+++ b/include/npu2-regs.h
@@ -791,4 +791,18 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
 #define L3_PRD_PURGE_TTYPE_MASK 		PPC_BIT(1) | PPC_BIT(2) | PPC_BIT(3) | PPC_BIT(4)
 #define L3_FULL_PURGE				0x0
 
+/* Config registers for NPU2 */
+#define NPU_STCK0_CS_SM0_MISC_CONFIG0		0x5011000
+#define NPU_STCK0_CS_SM1_MISC_CONFIG0		0x5011030
+#define NPU_STCK0_CS_SM2_MISC_CONFIG0		0x5011060
+#define NPU_STCK0_CS_SM3_MISC_CONFIG0		0x5011090
+#define NPU_STCK1_CS_SM0_MISC_CONFIG0		0x5011200
+#define NPU_STCK1_CS_SM1_MISC_CONFIG0		0x5011230
+#define NPU_STCK1_CS_SM2_MISC_CONFIG0		0x5011260
+#define NPU_STCK1_CS_SM3_MISC_CONFIG0		0x5011290
+#define NPU_STCK2_CS_SM0_MISC_CONFIG0		0x5011400
+#define NPU_STCK2_CS_SM1_MISC_CONFIG0		0x5011430
+#define NPU_STCK2_CS_SM2_MISC_CONFIG0		0x5011460
+#define NPU_STCK2_CS_SM3_MISC_CONFIG0		0x5011490
+
 #endif /* __NPU2_REGS_H */
diff --git a/hw/npu2.c b/hw/npu2.c
index d532c4da3532..6829428bc9dd 100644
--- a/hw/npu2.c
+++ b/hw/npu2.c
@@ -1452,7 +1452,7 @@ static void assign_mmio_bars(uint64_t gcid, uint32_t scom, uint64_t reg[2], uint
 int npu2_nvlink_init_npu(struct npu2 *npu)
 {
 	struct dt_node *np;
-	uint64_t reg[2], mm_win[2], val;
+	uint64_t reg[2], mm_win[2], val, mask;
 
 	/* TODO: Clean this up with register names, etc. when we get
 	 * time. This just turns NVLink mode on in each brick and should
@@ -1461,18 +1461,44 @@ int npu2_nvlink_init_npu(struct npu2 *npu)
 	 *
 	 * Obviously if the year is now 2020 that didn't happen and you
 	 * should fix this :-) */
-	xscom_write_mask(npu->chip_id, 0x5011000, PPC_BIT(58), PPC_BIT(58));
-	xscom_write_mask(npu->chip_id, 0x5011030, PPC_BIT(58), PPC_BIT(58));
-	xscom_write_mask(npu->chip_id, 0x5011060, PPC_BIT(58), PPC_BIT(58));
-	xscom_write_mask(npu->chip_id, 0x5011090, PPC_BIT(58), PPC_BIT(58));
-	xscom_write_mask(npu->chip_id, 0x5011200, PPC_BIT(58), PPC_BIT(58));
-	xscom_write_mask(npu->chip_id, 0x5011230, PPC_BIT(58), PPC_BIT(58));
-	xscom_write_mask(npu->chip_id, 0x5011260, PPC_BIT(58), PPC_BIT(58));
-	xscom_write_mask(npu->chip_id, 0x5011290, PPC_BIT(58), PPC_BIT(58));
-	xscom_write_mask(npu->chip_id, 0x5011400, PPC_BIT(58), PPC_BIT(58));
-	xscom_write_mask(npu->chip_id, 0x5011430, PPC_BIT(58), PPC_BIT(58));
-	xscom_write_mask(npu->chip_id, 0x5011460, PPC_BIT(58), PPC_BIT(58));
-	xscom_write_mask(npu->chip_id, 0x5011490, PPC_BIT(58), PPC_BIT(58));
+
+	val = PPC_BIT(58);
+	mask = PPC_BIT(58); /* CONFIG_NVLINK_MODE */
+
+	/*
+	 * V100 GPUs are known to violate NVLink2 protocol if some GPU memory
+	 * mapped by a CPU was also "linear-block" mapped by a GPU. When this
+	 * happens, it breaks the NPU2 cache coherency state machine and
+	 * it throws machine checkstop. Disabling snarfing helps so let's
+	 * disable it by default.
+	 */
+	if (!nvram_query_eq("opal-npu2-snarf-cpm", "enable"))
+		mask |= PPC_BIT(40); /* CONFIG_ENABLE_SNARF_CPM */
+
+	xscom_write_mask(npu->chip_id, NPU_STCK0_CS_SM0_MISC_CONFIG0,
+			 val, mask);
+	xscom_write_mask(npu->chip_id, NPU_STCK0_CS_SM1_MISC_CONFIG0,
+			 val, mask);
+	xscom_write_mask(npu->chip_id, NPU_STCK0_CS_SM2_MISC_CONFIG0,
+			 val, mask);
+	xscom_write_mask(npu->chip_id, NPU_STCK0_CS_SM3_MISC_CONFIG0,
+			 val, mask);
+	xscom_write_mask(npu->chip_id, NPU_STCK1_CS_SM0_MISC_CONFIG0,
+			 val, mask);
+	xscom_write_mask(npu->chip_id, NPU_STCK1_CS_SM1_MISC_CONFIG0,
+			 val, mask);
+	xscom_write_mask(npu->chip_id, NPU_STCK1_CS_SM2_MISC_CONFIG0,
+			 val, mask);
+	xscom_write_mask(npu->chip_id, NPU_STCK1_CS_SM3_MISC_CONFIG0,
+			 val, mask);
+	xscom_write_mask(npu->chip_id, NPU_STCK2_CS_SM0_MISC_CONFIG0,
+			 val, mask);
+	xscom_write_mask(npu->chip_id, NPU_STCK2_CS_SM1_MISC_CONFIG0,
+			 val, mask);
+	xscom_write_mask(npu->chip_id, NPU_STCK2_CS_SM2_MISC_CONFIG0,
+			 val, mask);
+	xscom_write_mask(npu->chip_id, NPU_STCK2_CS_SM3_MISC_CONFIG0,
+			 val, mask);
 
 	xscom_write_mask(npu->chip_id, 0x50110c0, PPC_BIT(53), PPC_BIT(53));
 	xscom_write_mask(npu->chip_id, 0x50112c0, PPC_BIT(53), PPC_BIT(53));
-- 
2.17.1