[Skiboot] [PATCH] Add in new OPAL call to flush the L2 and L3 caches.
Rashmica Gupta
rashmica.g at gmail.com
Mon Oct 29 17:19:07 AEDT 2018
Using the dcbf instruction is really slow... This is much faster.
Suggested-by: Alistair Popple <alistair at popple.id.au>
Signed-off-by: Rashmica Gupta <rashmica.g at gmail.com>
---
Stewart: I realise that cpu.c is probably not where this should live...
Thoughts on where it should go?
Context: When resetting a GPU we want to make sure all dirty cache lines
in the CPU cache are cleared. Hit up Alexey and Alistair for the nitty
gritty details.
core/cpu.c | 108 +++++++++++++++++++++++++++++++++++++++++++++
include/cpu.h | 2 +
include/opal-api.h | 3 +-
3 files changed, 112 insertions(+), 1 deletion(-)
diff --git a/core/cpu.c b/core/cpu.c
index 4f518a4c..bc4fcaf8 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -1630,3 +1630,111 @@ static int64_t opal_nmmu_set_ptcr(uint64_t chip_id, uint64_t ptcr)
return rc;
}
opal_call(OPAL_NMMU_SET_PTCR, opal_nmmu_set_ptcr, 2);
+
+
+#define L2_PRD_PURGE_CMD_REG 0x1080E
+#define L2_PRD_PURGE_CMD_REG_BUSY 0x0040000000000000
+#define L2_PRD_PURGE_CMD_TRIGGER 0x8000000000000000
+#define L3_PRD_PURGE_REG 0x1180E
+#define L3_PRD_PURGE_REQ 0x8000000000000000
+#define TIMEOUT_MS 2
+
+static inline bool time_expired(unsigned long start)
+{
+ unsigned long time = tb_to_msecs(mftb());
+
+ if (time - start > TIMEOUT_MS) {
+ return true;
+ }
+ return false;
+}
+
+static int flush_l2_caches(uint32_t chip_id, uint32_t core_id)
+{
+ int rc, timeout = 0;
+ unsigned long start_time;
+ uint64_t val = L2_PRD_PURGE_CMD_REG_BUSY;
+ uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L2_PRD_PURGE_CMD_REG);
+
+ rc = xscom_write_mask(chip_id, addr, L2_PRD_PURGE_CMD_TRIGGER,
+ L2_PRD_PURGE_CMD_TRIGGER);
+ if (rc) {
+ prlog(PR_ERR, "FLUSH L2 on core 0x%x: XSCOM write_mask failed %i\n", core_id, rc);
+ }
+ start_time = tb_to_msecs(mftb());
+ while ((val & L2_PRD_PURGE_CMD_REG_BUSY) && !(timeout = time_expired(start_time))) {
+ rc = xscom_read(chip_id, addr, &val);
+ if (rc) {
+ prlog(PR_ERR, "FLUSH L2 on core 0x%x: XSCOM read failed %i\n", core_id, rc);
+ break;
+ }
+ }
+ if (timeout) {
+ prlog(PR_ERR, "FLUSH L3 on core 0x%x timed out %i\n", core_id, rc);
+ return OPAL_BUSY;
+ }
+
+ /* We have to clear the trigger bit ourselves */
+ val &= ~L2_PRD_PURGE_CMD_TRIGGER;
+ rc = xscom_write(chip_id, addr, val);
+ if (rc)
+ prlog(PR_ERR, "FLUSH L2 on core 0x%x: XSCOM write failed %i\n", core_id, rc);
+ return 0;
+
+}
+
+static int flush_l3_caches(uint32_t chip_id, uint32_t core_id)
+{
+ int rc, timeout = 0;
+ unsigned long start_time;
+ uint64_t val = L3_PRD_PURGE_REQ;
+ uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L3_PRD_PURGE_REG);
+
+ rc = xscom_write_mask(chip_id, addr, L3_PRD_PURGE_REQ, L3_PRD_PURGE_REQ);
+ if (rc) {
+ prlog(PR_ERR, "FLUSH L3 on core 0x%x: XSCOM write_mask failed %i\n", core_id, rc);
+ }
+
+ /* Trigger bit is automatically set to zero when flushing is done*/
+ start_time = tb_to_msecs(mftb());
+ while ((val & L3_PRD_PURGE_REQ) && !(timeout = time_expired(start_time) )) {
+ rc = xscom_read(chip_id, addr, &val);
+ if (rc) {
+ prlog(PR_ERR, "FLUSH L3 on core 0x%x: XSCOM read failed %i\n", core_id, rc);
+ break;
+ }
+ }
+ if (timeout) {
+ prlog(PR_ERR, "FLUSH L3 on core 0x%x timed out %i\n", core_id, rc);
+ return OPAL_BUSY;
+ }
+
+ return 0;
+}
+
+int flush_caches(void)
+{
+ int rc = 0;
+ struct cpu_thread *t;
+ uint64_t chip_id, core_id, prev_core_id = 0xdeadbeef;
+
+ if ((mfspr(SPR_PVR) & PVR_TYPE_P9) != PVR_TYPE_P9)
+ return OPAL_UNSUPPORTED;
+
+ for_each_cpu(t) {
+ /* Only need to do it once per core chiplet */
+ core_id = pir_to_core_id(t->pir);
+ if (prev_core_id == core_id)
+ continue;
+ prev_core_id = core_id;
+ chip_id = t->chip_id;
+
+ rc |= flush_l2_caches(chip_id, core_id);
+ rc |= flush_l3_caches(chip_id, core_id);
+ }
+
+ return rc;
+}
+
+
+opal_call(OPAL_CLEAR_CACHE, flush_caches, 0);
diff --git a/include/cpu.h b/include/cpu.h
index 2fe47982..04c862c5 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -329,4 +329,6 @@ int dctl_set_special_wakeup(struct cpu_thread *t);
int dctl_clear_special_wakeup(struct cpu_thread *t);
int dctl_core_is_gated(struct cpu_thread *t);
+extern int flush_caches(void);
+
#endif /* __CPU_H */
diff --git a/include/opal-api.h b/include/opal-api.h
index 5f397c8e..c24838d2 100644
--- a/include/opal-api.h
+++ b/include/opal-api.h
@@ -226,7 +226,8 @@
#define OPAL_NX_COPROC_INIT 167
#define OPAL_NPU_SET_RELAXED_ORDER 168
#define OPAL_NPU_GET_RELAXED_ORDER 169
-#define OPAL_LAST 169
+#define OPAL_CLEAR_CACHE 170
+#define OPAL_LAST 170
#define QUIESCE_HOLD 1 /* Spin all calls at entry */
#define QUIESCE_REJECT 2 /* Fail all calls with OPAL_BUSY */
--
2.17.2
More information about the Skiboot
mailing list