[Skiboot] [PATCH v3] core: POWER9 implement OPAL_SIGNAL_SYSTEM_RESET

Nicholas Piggin npiggin at gmail.com
Mon Sep 18 17:45:41 AEST 2017


This implements OPAL_SIGNAL_SYSTEM_RESET, using scom registers to
quiesce the target thread and raise a system reset exception on it.

This has been tested on DD1 and DD2 including ESL=0 and ESL=1 power
saving modes.

It will have to be tested with deep idle states when those are enabled.
If those cannot be supported, it should be possible to work around in
Linux.

Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
---
Hi,

I'd like this to be considered for merge. It's been working for me
without issue. It requires a Linux side patch for enablement as well.
Without the Linux patch, this path won't be used. With the Linux patch,
there remain fallbacks so this API can be disabled at any time.

I have some follow up patches that bring some of the P8 sreset scoms
from fast-reboot.c into here, then generalize them so fast reboot just
calls sreset facilities. But let's get this patch in first.

Thanks,
Nick

Changes since v2:
- More comments, documentation, error handling.
- Hardware requires all threads on a core to be quiesced before sending
  the sreset, to avoid issues with thread reconfiguration. sresetting
  a sibling quiesces all siblings except self, because we are not in
  idle.
- DD1 works! Requires a small workaround on the Linux side, but does
  not reqiure bouncing the IPI as I first thought.



 core/Makefile.inc                             |   2 +-
 core/sreset.c                                 | 327 ++++++++++++++++++++++++++
 doc/opal-api/opal-signal-system-reset-145.rst |  26 +-
 hw/xscom.c                                    |   4 +
 include/skiboot.h                             |   3 +
 5 files changed, 351 insertions(+), 11 deletions(-)
 create mode 100644 core/sreset.c

diff --git a/core/Makefile.inc b/core/Makefile.inc
index e32c0a0a..c12cf3a2 100644
--- a/core/Makefile.inc
+++ b/core/Makefile.inc
@@ -9,7 +9,7 @@ CORE_OBJS += vpd.o hostservices.o platform.o nvram.o nvram-format.o hmi.o
 CORE_OBJS += console-log.o ipmi.o time-utils.o pel.o pool.o errorlog.o
 CORE_OBJS += timer.o i2c.o rtc.o flash.o sensor.o ipmi-opal.o
 CORE_OBJS += flash-subpartition.o bitmap.o buddy.o pci-quirk.o powercap.o psr.o
-CORE_OBJS += pci-dt-slot.o
+CORE_OBJS += pci-dt-slot.o sreset.o
 
 ifeq ($(SKIBOOT_GCOV),1)
 CORE_OBJS += gcov-profiling.o
diff --git a/core/sreset.c b/core/sreset.c
new file mode 100644
index 00000000..2d1bc191
--- /dev/null
+++ b/core/sreset.c
@@ -0,0 +1,327 @@
+/* Copyright 2017 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * 	http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <skiboot.h>
+#include <opal.h>
+#include <cpu.h>
+#include <xscom.h>
+#include <timebase.h>
+#include <chip.h>
+
+#define P9_RAS_STATUS			0x10a02
+#define P9_RSTAT_QUIESCED(t)		PPC_BITMASK(0 + 8*(t), 3 + 8*(t))
+#define P9_RSTAT_RETRIES		100
+
+#define P9_RAS_MODEREG			0x10a9d
+#define P9_DIRECT_CONTROLS		0x10a9c
+#define P9_DCTL_STOP(t)			PPC_BIT(7 + 8*(t))
+#define P9_DCTL_CONT(t)			PPC_BIT(6 + 8*(t))
+#define P9_DCTL_SRESET(t)		PPC_BIT(4 + 8*(t))
+#define P9_DCTL_PWR(t)			PPC_BIT(32 + 8*(t))
+
+#define P9_CORE_THREAD_STATE		0x10ab3
+#define P9_CTS_STOP(t)			PPC_BIT(56 + (t))
+
+#define P9_PPM_GPMMR			0xf0100
+#define P9_GPMMR_SPWKUP_DONE		PPC_BIT(1)
+#define P9_GPMMR_SPWKUP_TIMEOUT		10
+
+#define P9_PPM_SPWKUP_OTR		0xf010a
+#define P9_SPWKUP_SET			PPC_BIT(0)
+
+/*
+ * This implements the ability for a thread to raise system reset exceptions
+ * on other threads using scoms on POWER9.
+ *
+ * We assert special wakeup on the core first, to ensure direct controls work.
+ * Then stop all threads on the core and wait for them to quiesce.
+ * Then sreset the target thread, which resumes execution on that thread.
+ * Then continue execution on the non-target threads.
+ * Then de-assert special wakeup on the core.
+ *
+ * When resetting threads on the same core, we don't stop ourself.
+ *
+ * Some CPU revisions have a race where a sreset can fail to cause the thread
+ * to wake from ESL=0 stop unless all threads on the core are sreset. When
+ * resetting all-but-self in a crash/xmon situation, a
+ *
+ */
+
+static int p9_core_set_special_wakeup(struct cpu_thread *cpu)
+{
+	uint32_t chip_id = pir_to_chip_id(cpu->pir);
+	uint32_t core_id = pir_to_core_id(cpu->pir);
+	uint32_t swake_addr;
+	uint32_t gpmmr_addr;
+	uint64_t val;
+	int i;
+
+	swake_addr = XSCOM_ADDR_P9_EC(core_id, P9_PPM_SPWKUP_OTR);
+	gpmmr_addr = XSCOM_ADDR_P9_EC(core_id, P9_PPM_GPMMR);
+
+	/*
+	 * The read-write-read pattern with read errors ignored comes from
+	 * P8 code. This should be revisited, but it does not appear to have
+	 * any ill effects.
+	 */
+	xscom_read(chip_id, swake_addr, &val);
+	if (xscom_write(chip_id, swake_addr, P9_SPWKUP_SET)) {
+		prlog(PR_DEBUG, "SRESET: Unable to write SPWKUP_OTR register\n");
+		return OPAL_HARDWARE;
+	}
+	xscom_read(chip_id, swake_addr, &val);
+
+	for (i = 0; i < P9_GPMMR_SPWKUP_TIMEOUT; i++) {
+		if (xscom_read(chip_id, gpmmr_addr, &val)) {
+			prlog(PR_DEBUG, "SRESET: Unable to read GPMMR register\n");
+			return OPAL_HARDWARE;
+		}
+		if (val & P9_GPMMR_SPWKUP_DONE)
+			return 0;
+
+		time_wait_us(1);
+	}
+
+	/* De-assert special wakeup bit */
+	xscom_read(chip_id, swake_addr, &val);
+	xscom_write(chip_id, swake_addr, 0);
+	xscom_read(chip_id, swake_addr, &val);
+
+	prlog(PR_DEBUG, "SRESET: Special wakeup mode could not be set.\n");
+	return OPAL_HARDWARE;
+}
+
+static void p9_core_clear_special_wakeup(struct cpu_thread *cpu)
+{
+	uint32_t chip_id = pir_to_chip_id(cpu->pir);
+	uint32_t core_id = pir_to_core_id(cpu->pir);
+	uint32_t swake_addr;
+	uint64_t val;
+
+	swake_addr = XSCOM_ADDR_P9_EC(core_id, P9_PPM_SPWKUP_OTR);
+
+	/* De-assert special wakeup bit */
+	xscom_read(chip_id, swake_addr, &val);
+	xscom_write(chip_id, swake_addr, 0);
+	xscom_read(chip_id, swake_addr, &val);
+}
+
+static int p9_thread_quiesced(struct cpu_thread *cpu)
+{
+	uint32_t chip_id = pir_to_chip_id(cpu->pir);
+	uint32_t core_id = pir_to_core_id(cpu->pir);
+	uint32_t thread_id = pir_to_thread_id(cpu->pir);
+	uint32_t ras_addr;
+	uint64_t ras_status;
+
+	ras_addr = XSCOM_ADDR_P9_EC(core_id, P9_RAS_STATUS);
+	if (xscom_read(chip_id, ras_addr, &ras_status)) {
+		prlog(PR_DEBUG, "SRESET: Unable to read status register\n");
+		return OPAL_HARDWARE;
+	}
+
+	if ((ras_status & P9_RSTAT_QUIESCED(thread_id))
+			== P9_RSTAT_QUIESCED(thread_id))
+		return 1;
+
+	return 0;
+}
+
+static int p9_stop_thread(struct cpu_thread *cpu)
+{
+	uint32_t chip_id = pir_to_chip_id(cpu->pir);
+	uint32_t core_id = pir_to_core_id(cpu->pir);
+	uint32_t thread_id = pir_to_thread_id(cpu->pir);
+	uint32_t dctl_addr;
+	int rc;
+	int i;
+
+	dctl_addr = XSCOM_ADDR_P9_EC(core_id, P9_DIRECT_CONTROLS);
+
+	rc = p9_thread_quiesced(cpu);
+	if (rc < 0)
+		return rc;
+	if (rc)
+		prlog(PR_WARNING, "SRESET: Thread is quiesced already\n");
+
+	if (xscom_write(chip_id, dctl_addr, P9_DCTL_STOP(thread_id))) {
+		prlog(PR_DEBUG, "SRESET: Unable to write DIRECT_CONTROLS register\n");
+		return OPAL_HARDWARE;
+	}
+
+	for (i = 0; i < P9_RSTAT_RETRIES; i++) {
+		int rc = p9_thread_quiesced(cpu);
+		if (rc < 0)
+			break;
+		if (rc)
+			return 0;
+	}
+
+	xscom_write(chip_id, dctl_addr, P9_DCTL_CONT(thread_id));
+	prlog(PR_DEBUG, "SRESET: Could not quiesce thread\n");
+	return OPAL_HARDWARE;
+}
+
+static int p9_cont_thread(struct cpu_thread *cpu)
+{
+	uint32_t chip_id = pir_to_chip_id(cpu->pir);
+	uint32_t core_id = pir_to_core_id(cpu->pir);
+	uint32_t thread_id = pir_to_thread_id(cpu->pir);
+	uint32_t dctl_addr;
+
+	dctl_addr = XSCOM_ADDR_P9_EC(core_id, P9_DIRECT_CONTROLS);
+	xscom_write(chip_id, dctl_addr, P9_DCTL_CONT(thread_id));
+
+	return 0;
+}
+
+static int p9_sreset_thread(struct cpu_thread *cpu)
+{
+	uint32_t chip_id = pir_to_chip_id(cpu->pir);
+	uint32_t core_id = pir_to_core_id(cpu->pir);
+	uint32_t thread_id = pir_to_thread_id(cpu->pir);
+	uint32_t dctl_addr;
+	uint32_t cts_addr;
+	uint64_t cts_val;
+
+	dctl_addr = XSCOM_ADDR_P9_EC(core_id, P9_DIRECT_CONTROLS);
+	cts_addr = XSCOM_ADDR_P9_EC(core_id, P9_CORE_THREAD_STATE);
+
+	if (xscom_read(chip_id, cts_addr, &cts_val)) {
+		prlog(PR_DEBUG, "SRESET: Unable to read CORE_THREAD_STATE register\n");
+		return OPAL_HARDWARE;
+	}
+	if (!(cts_val & P9_CTS_STOP(thread_id))) {
+		/*
+		 * Quiescing a thread causes SRR1[46:47] to be set by the
+		 * system reset interrupt as though it was in a power saving
+		 * mode even if it was not.
+		 *
+		 * Setting the DCTL_PWR bit causes SRR1[46:47] to be clear,
+		 * so poke that if thread state says we were in stop.
+		 */
+		if (xscom_write(chip_id, dctl_addr, P9_DCTL_PWR(thread_id))) {
+			prlog(PR_DEBUG, "SRESET: Unable to set power saving mode\n");
+			return OPAL_HARDWARE;
+		}
+	}
+
+	if (xscom_write(chip_id, dctl_addr, P9_DCTL_SRESET(thread_id))) {
+		prlog(PR_DEBUG, "SRESET: Unable to write DIRECT_CONTROLS register\n");
+		return OPAL_HARDWARE;
+	}
+
+	return 0;
+}
+
+static int64_t p9_sreset_cpu(struct cpu_thread *cpu)
+{
+	struct cpu_thread *t;
+	int rc;
+
+	if (this_cpu() == cpu) {
+		prlog(PR_DEBUG, "SRESET: Unable to reset self\n");
+		return OPAL_PARAMETER;
+	}
+
+	/*
+	 * We can not just assert the sreset direct control bit, because that
+	 * does not appear to set SRR1[46:47] bits correctly for idle vs
+	 * running threads. So quiesce it and check the core thread state for
+	 * idle, see above.
+	 */
+	rc = p9_core_set_special_wakeup(cpu);
+	if (rc)
+		return rc;
+
+	/*
+	 * P9 must stop all threads in the core to prevent thread
+	 * reconfiguration. Can't stop our own thread, but it won't
+	 * go in or out of idle so it should not reconfigure.
+	 */
+	for_each_cpu(t) {
+		if (t == this_cpu())
+			continue;
+		if (t->primary != cpu->primary)
+			continue;
+		rc = p9_stop_thread(t);
+		if (rc)
+			goto out_cont;
+	}
+
+	rc = p9_sreset_thread(cpu);
+	if (rc)
+		goto out_cont;
+
+	/* Resume the other threads on the core */
+	for_each_cpu(t) {
+		if (t == this_cpu())
+			continue;
+		if (t->primary != cpu->primary)
+			continue;
+		if (t == cpu)
+			continue;
+		p9_cont_thread(t);
+	}
+
+	p9_core_clear_special_wakeup(cpu);
+
+	return 0;
+
+out_cont:
+	for_each_cpu(t) {
+		if (t == this_cpu())
+			continue;
+		if (t->primary != cpu->primary)
+			continue;
+		p9_cont_thread(t);
+	}
+	p9_core_clear_special_wakeup(cpu);
+
+	return rc;
+}
+
+static struct lock sreset_lock = LOCK_UNLOCKED;
+
+int64_t signal_system_reset(int cpu_nr)
+{
+	struct cpu_thread *cpu;
+	int64_t ret;
+
+	if (proc_gen != proc_gen_p9)
+		return OPAL_UNSUPPORTED;
+
+	/*
+	 * Broadcasts unsupported. Not clear what threads should be
+	 * signaled, so it's better for the OS to perform one-at-a-time
+	 * for now.
+	 */
+	if (cpu_nr < 0)
+		return OPAL_CONSTRAINED;
+
+	/* Reset a single CPU */
+	cpu = find_cpu_by_server(cpu_nr);
+	if (!cpu) {
+		prlog(PR_DEBUG, "SRESET: could not find cpu by server %d\n", cpu_nr);
+		return OPAL_PARAMETER;
+	}
+
+	lock(&sreset_lock);
+	ret = p9_sreset_cpu(cpu);
+	unlock(&sreset_lock);
+
+	return ret;
+}
diff --git a/doc/opal-api/opal-signal-system-reset-145.rst b/doc/opal-api/opal-signal-system-reset-145.rst
index 3ddb6845..28e5e2f4 100644
--- a/doc/opal-api/opal-signal-system-reset-145.rst
+++ b/doc/opal-api/opal-signal-system-reset-145.rst
@@ -9,12 +9,13 @@ OPAL_SIGNAL_SYSTEM_RESET
 This OPAL call causes the specified cpu(s) to be reset to the system
 reset exception handler (0x100).
 
-The exact contents of system registers (e.g., SRR1 wakeup causes) may
-vary depending on implementation and should not be relied upon.
+The SRR1 register will indicate a power-saving wakeup when appropriate,
+and the wake reason will be System Reset (see Power ISA).
 
-Resetting active threads on the same core as this call is run may
-not be supported by some platforms. In that case, OPAL_PARTIAL will be
-returned and NONE of the interrupts will be delivered.
+This interrupt may not be recoverable in some cases (e.g., if it is
+raised when the target has MSR[RI]=0), so it should not be used in
+normal operation, but only for crashing, debugging, and similar
+exceptional cases.
 
 Arguments
 ---------
@@ -28,18 +29,23 @@ Arguments
 Returns
 -------
 OPAL_SUCCESS
-  The power down was updated successful.
+  The system reset requests to target CPU(s) was successful. This returns
+  asynchronously without acknowledgement from targets that system reset
+  interrupt processing has completed or even started.
 
 OPAL_PARAMETER
   A parameter was incorrect.
 
 OPAL_HARDWARE
-  Hardware indicated failure during reset.
+  Hardware indicated failure during reset, some or all of the target CPUs
+  may have the system reset delivered.
+
+OPAL_CONSTRAINED
+  Platform does not support broadcast operations.
 
 OPAL_PARTIAL
-  Platform can not reset all requested CPUs at this time. This requires
-  platform-specific code to work around, otherwise to be treated as
-  failure. No CPUs are reset.
+  Platform can not reset sibling threads on the same core as requested.
+  None of the specified CPUs are reset in this case.
 
 OPAL_UNSUPPORTED
   This processor/platform is not supported.
diff --git a/hw/xscom.c b/hw/xscom.c
index 7bd78bf9..4a6d91f4 100644
--- a/hw/xscom.c
+++ b/hw/xscom.c
@@ -705,6 +705,10 @@ static void xscom_init_chip_info(struct proc_chip *chip)
 		printf("P9 DD%i.%i%d detected\n", 0xf & (chip->ec_level >> 4),
 		       chip->ec_level & 0xf, rev);
 		chip->ec_rev = rev;
+
+		if (!chip_quirk(QUIRK_MAMBO_CALLOUTS))
+			opal_register(OPAL_SIGNAL_SYSTEM_RESET,
+					signal_system_reset, 1);
 	}
 }
 
diff --git a/include/skiboot.h b/include/skiboot.h
index 3040c517..b989edd5 100644
--- a/include/skiboot.h
+++ b/include/skiboot.h
@@ -205,6 +205,9 @@ extern char __sym_map_end[];
 extern unsigned long get_symbol(unsigned long addr,
 				char **sym, char **sym_end);
 
+/* System reset */
+extern int64_t signal_system_reset(int cpu_nr);
+
 /* Fast reboot support */
 extern void disable_fast_reboot(const char *reason);
 extern void fast_reboot(void);
-- 
2.13.3



More information about the Skiboot mailing list