[Skiboot] [PATCH] Add in new OPAL call to flush the L2 and L3 caches.

Rashmica Gupta rashmica.g at gmail.com
Mon Oct 29 17:19:07 AEDT 2018


Using the dcbf instruction is really slow... This is much faster.

Suggested-by: Alistair Popple <alistair at popple.id.au>
Signed-off-by: Rashmica Gupta <rashmica.g at gmail.com>
---
Stewart: I realise that cpu.c is probably not where this should live...
Thoughts on where it should go?

Context: When resetting a GPU we want to make sure all dirty cache lines
in the CPU cache are cleared. Hit up Alexey and Alistair for the nitty
gritty details.

 core/cpu.c         | 108 +++++++++++++++++++++++++++++++++++++++++++++
 include/cpu.h      |   2 +
 include/opal-api.h |   3 +-
 3 files changed, 112 insertions(+), 1 deletion(-)

diff --git a/core/cpu.c b/core/cpu.c
index 4f518a4c..bc4fcaf8 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -1630,3 +1630,111 @@ static int64_t opal_nmmu_set_ptcr(uint64_t chip_id, uint64_t ptcr)
 	return rc;
 }
 opal_call(OPAL_NMMU_SET_PTCR, opal_nmmu_set_ptcr, 2);
+
+
+#define L2_PRD_PURGE_CMD_REG 0x1080E
+#define L2_PRD_PURGE_CMD_REG_BUSY 0x0040000000000000
+#define L2_PRD_PURGE_CMD_TRIGGER 0x8000000000000000
+#define L3_PRD_PURGE_REG 0x1180E
+#define L3_PRD_PURGE_REQ 0x8000000000000000
+#define TIMEOUT_MS 2
+
+static inline bool time_expired(unsigned long start)
+{
+	unsigned long time = tb_to_msecs(mftb());
+
+	if (time - start > TIMEOUT_MS) {
+		return true;
+	}
+	return false;
+}
+
+static int flush_l2_caches(uint32_t chip_id, uint32_t core_id)
+{
+	int rc, timeout = 0;
+	unsigned long start_time;
+	uint64_t val = L2_PRD_PURGE_CMD_REG_BUSY;
+	uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L2_PRD_PURGE_CMD_REG);
+
+	rc = xscom_write_mask(chip_id, addr, L2_PRD_PURGE_CMD_TRIGGER,
+			      L2_PRD_PURGE_CMD_TRIGGER);
+	if (rc) {
+		prlog(PR_ERR, "FLUSH L2 on core 0x%x: XSCOM write_mask failed %i\n", core_id, rc);
+	}
+	start_time = tb_to_msecs(mftb());
+	while ((val & L2_PRD_PURGE_CMD_REG_BUSY) && !(timeout = time_expired(start_time))) {
+		rc = xscom_read(chip_id, addr, &val);
+		if (rc) {
+			prlog(PR_ERR, "FLUSH L2 on core 0x%x: XSCOM read failed %i\n", core_id, rc);
+			break;
+		}
+	}
+	if (timeout) {
+		prlog(PR_ERR, "FLUSH L3 on core 0x%x timed out %i\n", core_id, rc);
+		return OPAL_BUSY;
+	}
+
+	/* We have to clear the trigger bit ourselves */
+	val &= ~L2_PRD_PURGE_CMD_TRIGGER;
+	rc = xscom_write(chip_id, addr, val);
+	if (rc)
+		prlog(PR_ERR, "FLUSH L2 on core 0x%x: XSCOM write failed %i\n", core_id, rc);
+	return 0;
+
+}
+
+static int flush_l3_caches(uint32_t chip_id, uint32_t core_id)
+{
+	int rc, timeout = 0;
+	unsigned long start_time;
+	uint64_t val = L3_PRD_PURGE_REQ;
+	uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L3_PRD_PURGE_REG);
+
+	rc = xscom_write_mask(chip_id, addr, L3_PRD_PURGE_REQ, L3_PRD_PURGE_REQ);
+	if (rc) {
+		prlog(PR_ERR, "FLUSH L3 on core 0x%x: XSCOM write_mask failed %i\n", core_id, rc);
+	}
+
+	/* Trigger bit is automatically set to zero when flushing is done*/
+	start_time = tb_to_msecs(mftb());
+	while ((val & L3_PRD_PURGE_REQ) && !(timeout = time_expired(start_time) )) {
+		rc = xscom_read(chip_id, addr, &val);
+		if (rc) {
+			prlog(PR_ERR, "FLUSH L3 on core 0x%x: XSCOM read failed %i\n", core_id, rc);
+			break;
+		}
+	}
+	if (timeout) {
+		prlog(PR_ERR, "FLUSH L3 on core 0x%x timed out %i\n", core_id, rc);
+		return OPAL_BUSY;
+	}
+
+	return 0;
+}
+
+int flush_caches(void)
+{
+	int rc = 0;
+	struct cpu_thread *t;
+	uint64_t chip_id, core_id, prev_core_id = 0xdeadbeef;
+
+	if ((mfspr(SPR_PVR) & PVR_TYPE_P9) != PVR_TYPE_P9)
+		return OPAL_UNSUPPORTED;
+
+	for_each_cpu(t) {
+		/* Only need to do it once per core chiplet */
+		core_id = pir_to_core_id(t->pir);
+		if (prev_core_id == core_id)
+			continue;
+		prev_core_id = core_id;
+		chip_id = t->chip_id;
+
+		rc |= flush_l2_caches(chip_id, core_id);
+		rc |= flush_l3_caches(chip_id, core_id);
+	}
+
+	return rc;
+}
+
+
+opal_call(OPAL_CLEAR_CACHE, flush_caches, 0);
diff --git a/include/cpu.h b/include/cpu.h
index 2fe47982..04c862c5 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -329,4 +329,6 @@ int dctl_set_special_wakeup(struct cpu_thread *t);
 int dctl_clear_special_wakeup(struct cpu_thread *t);
 int dctl_core_is_gated(struct cpu_thread *t);
 
+extern int flush_caches(void);
+
 #endif /* __CPU_H */
diff --git a/include/opal-api.h b/include/opal-api.h
index 5f397c8e..c24838d2 100644
--- a/include/opal-api.h
+++ b/include/opal-api.h
@@ -226,7 +226,8 @@
 #define OPAL_NX_COPROC_INIT			167
 #define OPAL_NPU_SET_RELAXED_ORDER		168
 #define OPAL_NPU_GET_RELAXED_ORDER		169
-#define OPAL_LAST				169
+#define OPAL_CLEAR_CACHE			170
+#define OPAL_LAST				170
 
 #define QUIESCE_HOLD			1 /* Spin all calls at entry */
 #define QUIESCE_REJECT			2 /* Fail all calls with OPAL_BUSY */
-- 
2.17.2



More information about the Skiboot mailing list