[Skiboot] [RFC PATCH] direct-controls: Use P8 sequence from workbook

Stewart Smith stewart at linux.ibm.com
Tue Mar 12 11:30:31 AEDT 2019


From: Stewart Smith <stewart at linux.vnet.ibm.com>

When attempting to do SRESET under load, our existing implementation
fell down and would fail to SRESET some CPUs, causing the fast-reboot
to fail. This is reproduced by running the (new) op-test test
testcases.OpTestFastReboot.FastRebootHostStressTorture - and especially
so if you up the `stress` invocation to be 120 seconds.

See PR for op-test: https://github.com/open-power/op-test-framework/pull/437

Unfortunately, this patch itself isn't the whole story. It makes things
*better* but there's still a window. Mind you, with this patch I've
seen it pass 24 fast-reboots, which is around 22 more than previously.

The same problem has been observed with pdbg, see discussion there
https://lists.ozlabs.org/pipermail/pdbg/2019-March/001076.html

Signed-off-by: Stewart Smith <stewart at linux.vnet.ibm.com>
---
 core/direct-controls.c | 44 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 42 insertions(+), 2 deletions(-)

diff --git a/core/direct-controls.c b/core/direct-controls.c
index 1d0f6818e2c8..8783a83a25ac 100644
--- a/core/direct-controls.c
+++ b/core/direct-controls.c
@@ -1,4 +1,4 @@
-/* Copyright 2017 IBM Corp.
+/* Copyright 2017-2019 IBM Corp.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -59,6 +59,12 @@ static void mambo_stop_cpu(struct cpu_thread *cpu)
 #define P8_DIRECT_CTL_STOP		PPC_BIT(63)
 #define P8_DIRECT_CTL_PRENAP		PPC_BIT(47)
 #define P8_DIRECT_CTL_SRESET		PPC_BIT(60)
+#define P8_EX_TCTL_RAS_STATUS(t)	(0x10013002 + (t) * 0x10)
+#define P8_RAS_STATUS_SRQ_EMPTY		PPC_BIT(8)
+#define P8_RAS_STATUS_LSU_QUIESCED	PPC_BIT(9)
+#define P8_RAS_STATUS_INST_COMPLETE	PPC_BIT(12)
+#define P8_RAS_STATUS_THREAD_ACTIVE	PPC_BIT(48)
+#define P8_RAS_STATUS_TS_QUIESCE	PPC_BIT(49)
 
 static int p8_core_set_special_wakeup(struct cpu_thread *cpu)
 {
@@ -224,6 +230,9 @@ static int p8_stop_thread(struct cpu_thread *cpu)
 	uint32_t chip_id = pir_to_chip_id(cpu->pir);
 	uint32_t thread_id = pir_to_thread_id(cpu->pir);
 	uint32_t xscom_addr;
+	uint64_t val;
+	int i;
+	int rc;
 
 	xscom_addr = XSCOM_ADDR_P8_EX(core_id,
 				      P8_EX_TCTL_DIRECT_CONTROLS(thread_id));
@@ -235,7 +244,38 @@ static int p8_stop_thread(struct cpu_thread *cpu)
 		return OPAL_HARDWARE;
 	}
 
-	return OPAL_SUCCESS;
+	xscom_addr = XSCOM_ADDR_P8_EX(core_id,
+				      P8_EX_TCTL_RAS_STATUS(thread_id));
+	for (i=0; i < 1000; i++) {
+		rc = xscom_read(chip_id, xscom_addr, &val);
+		if (rc) {
+			prlog(PR_ERR, "Could not check state of thread "
+			      "%u:%u:%u\n", chip_id, core_id, thread_id);
+		return OPAL_HARDWARE;
+		}
+		prlog(PR_INFO, "RAS_STATUS for %u:%u:%u = 0x%llx\n",
+		      chip_id, core_id, thread_id, val);
+		if (val & P8_RAS_STATUS_INST_COMPLETE)
+			break;
+		time_wait_ms(10);
+	}
+	if (!(val & P8_RAS_STATUS_INST_COMPLETE))
+		return OPAL_HARDWARE;
+
+	for (i=0; i < 1000; i++) {
+		rc = xscom_read(chip_id, xscom_addr, &val);
+		if (rc) {
+			prlog(PR_ERR, "Could not check state of thread "
+			      "%u:%u:%u\n", chip_id, core_id, thread_id);
+		return OPAL_HARDWARE;
+		}
+		prlog(PR_INFO, "RAS_STATUS for %u:%u:%u = 0x%llx\n",
+		      chip_id, core_id, thread_id, val);
+		if (val & P8_RAS_STATUS_TS_QUIESCE)
+			return OPAL_SUCCESS;
+		time_wait_ms(10);
+	}
+	return OPAL_HARDWARE;
 }
 
 static int p8_sreset_thread(struct cpu_thread *cpu)
-- 
2.20.1



More information about the Skiboot mailing list