[Skiboot] [PATCH v4] core: POWER9 implement OPAL_SIGNAL_SYSTEM_RESET

Nicholas Piggin npiggin at gmail.com
Wed Sep 20 16:56:03 AEST 2017


This implements OPAL_SIGNAL_SYSTEM_RESET, using scom registers to
quiesce the target thread and raise a system reset exception on it.
It has been tested on DD2 with stop0 ESL=0 and ESL=1 shallow power
saving modes.

DD1 is not implemented because it is sufficiently different as to
make support difficult.

Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
---
Since v3:
- Rework the special wakeup and direct control code so it is in more of
  a framework that can be reused and extended. Provide some refcounting
  and locking for multiple users.
- Use the HYP special wakeup register rather than OTR (which was copied
  from pdbg).
- Use the SSHHYP register to poll for special wakeup done, rather than
  GPMMR, which may be the better approach because GPMMR seems to be
  multiplexing all special wakeup registers.
- Add a microsecond delay before de-asserting special wakeup, which may
  avoid some failures (Vaidy is looking into this further).
- Remove read-write-read sequence on the special wakeup register that
  came from P8.
- Remove DD1 support again. This allows the SRR1 power saving workaround
  to be removed, which makes things very simple now.
- Improve comments, error messages and check a few more xscom failure
  registers.


 core/Makefile.inc                             |   2 +-
 core/cpu.c                                    |   1 +
 core/direct-controls.c                        | 409 ++++++++++++++++++++++++++
 core/init.c                                   |   6 +
 doc/opal-api/opal-signal-system-reset-145.rst |  26 +-
 hdata/hdata.h                                 |  14 -
 include/cpu.h                                 |   7 +
 include/processor.h                           |  15 +
 include/skiboot.h                             |   4 +
 9 files changed, 459 insertions(+), 25 deletions(-)
 create mode 100644 core/direct-controls.c

diff --git a/core/Makefile.inc b/core/Makefile.inc
index e32c0a0a..d6a7269f 100644
--- a/core/Makefile.inc
+++ b/core/Makefile.inc
@@ -9,7 +9,7 @@ CORE_OBJS += vpd.o hostservices.o platform.o nvram.o nvram-format.o hmi.o
 CORE_OBJS += console-log.o ipmi.o time-utils.o pel.o pool.o errorlog.o
 CORE_OBJS += timer.o i2c.o rtc.o flash.o sensor.o ipmi-opal.o
 CORE_OBJS += flash-subpartition.o bitmap.o buddy.o pci-quirk.o powercap.o psr.o
-CORE_OBJS += pci-dt-slot.o
+CORE_OBJS += pci-dt-slot.o direct-controls.o
 
 ifeq ($(SKIBOOT_GCOV),1)
 CORE_OBJS += gcov-profiling.o
diff --git a/core/cpu.c b/core/cpu.c
index bcf84ad5..358356a2 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -625,6 +625,7 @@ static void init_cpu_thread(struct cpu_thread *t,
 			    enum cpu_thread_state state,
 			    unsigned int pir)
 {
+	init_lock(&t->dctl_lock);
 	init_lock(&t->job_lock);
 	list_head_init(&t->job_queue);
 	t->state = state;
diff --git a/core/direct-controls.c b/core/direct-controls.c
new file mode 100644
index 00000000..9acdab5e
--- /dev/null
+++ b/core/direct-controls.c
@@ -0,0 +1,409 @@
+/* Copyright 2017 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * 	http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <skiboot.h>
+#include <opal.h>
+#include <cpu.h>
+#include <xscom.h>
+#include <timebase.h>
+#include <chip.h>
+
+#define P9_RAS_STATUS			0x10a02
+#define P9_THREAD_QUIESCED(t)		PPC_BITMASK(0 + 8*(t), 3 + 8*(t))
+#define P9_QUIESCE_RETRIES		100
+
+#define P9_EC_DIRECT_CONTROLS		0x10a9c
+#define P9_THREAD_STOP(t)		PPC_BIT(7 + 8*(t))
+#define P9_THREAD_CONT(t)		PPC_BIT(6 + 8*(t))
+#define P9_THREAD_SRESET(t)		PPC_BIT(4 + 8*(t))
+#define P9_THREAD_PWR(t)		PPC_BIT(32 + 8*(t))
+
+/* EC_PPM_SPECIAL_WKUP_HYP */
+#define P9_SPWKUP_SET			PPC_BIT(0)
+
+#define P9_EC_PPM_SSHHYP		0x0114
+#define P9_SPECIAL_WKUP_DONE		PPC_BIT(1)
+#define P9_SPWKUP_TIMEOUT		10
+
+/*
+ * This implements direct control facilities of processor cores and threads
+ * using scom registers.
+ */
+
+static int p9_core_set_special_wakeup(struct cpu_thread *cpu)
+{
+	uint32_t chip_id = pir_to_chip_id(cpu->pir);
+	uint32_t core_id = pir_to_core_id(cpu->pir);
+	uint32_t swake_addr;
+	uint32_t sshhyp_addr;
+	uint64_t val;
+	int i;
+
+	swake_addr = XSCOM_ADDR_P9_EC_SLAVE(core_id, EC_PPM_SPECIAL_WKUP_HYP);
+	sshhyp_addr = XSCOM_ADDR_P9_EC_SLAVE(core_id, P9_EC_PPM_SSHHYP);
+
+	if (xscom_write(chip_id, swake_addr, P9_SPWKUP_SET)) {
+		prlog(PR_ERR, "Could not set special wakeup on %u:%u:"
+				" Unable to write PPM_SPECIAL_WKUP_HYP.\n",
+				chip_id, core_id);
+		return OPAL_HARDWARE;
+	}
+
+	for (i = 0; i < P9_SPWKUP_TIMEOUT; i++) {
+		if (xscom_read(chip_id, sshhyp_addr, &val)) {
+			prlog(PR_ERR, "Could not set special wakeup on %u:%u:"
+					" Unable to read PPM_SSHHYP.\n",
+					chip_id, core_id);
+			goto out_fail;
+		}
+		if (val & P9_SPECIAL_WKUP_DONE)
+			return 0;
+
+		time_wait_us(1);
+	}
+
+	prlog(PR_ERR, "Could not set special wakeup on %u:%u:"
+			" timeout waiting for SPECIAL_WKUP_DONE.\n",
+			chip_id, core_id);
+
+out_fail:
+	/* De-assert special wakeup after a small delay. */
+	time_wait_us(1);
+	xscom_write(chip_id, swake_addr, 0);
+
+	return OPAL_HARDWARE;
+}
+
+static int p9_core_clear_special_wakeup(struct cpu_thread *cpu)
+{
+	uint32_t chip_id = pir_to_chip_id(cpu->pir);
+	uint32_t core_id = pir_to_core_id(cpu->pir);
+	uint32_t swake_addr;
+
+	swake_addr = XSCOM_ADDR_P9_EC_SLAVE(core_id, EC_PPM_SPECIAL_WKUP_HYP);
+
+	/*
+	 * De-assert special wakeup after a small delay.
+	 * The delay may help avoid problems setting and clearing special
+	 * wakeup back-to-back. This should be confirmed.
+	 */
+	time_wait_us(1);
+	if (xscom_write(chip_id, swake_addr, 0)) {
+		prlog(PR_ERR, "Could not clear special wakeup on %u:%u:"
+				" Unable to write PPM_SPECIAL_WKUP_HYP.\n",
+				chip_id, core_id);
+		return OPAL_HARDWARE;
+	}
+
+	return OPAL_SUCCESS;
+}
+
+static int p9_thread_quiesced(struct cpu_thread *cpu)
+{
+	uint32_t chip_id = pir_to_chip_id(cpu->pir);
+	uint32_t core_id = pir_to_core_id(cpu->pir);
+	uint32_t thread_id = pir_to_thread_id(cpu->pir);
+	uint32_t ras_addr;
+	uint64_t ras_status;
+
+	ras_addr = XSCOM_ADDR_P9_EC(core_id, P9_RAS_STATUS);
+	if (xscom_read(chip_id, ras_addr, &ras_status)) {
+		prlog(PR_ERR, "Could not check thread state on %u:%u:"
+				" Unable to read RAS_STATUS.\n",
+				chip_id, core_id);
+		return OPAL_HARDWARE;
+	}
+
+	if ((ras_status & P9_THREAD_QUIESCED(thread_id))
+			== P9_THREAD_QUIESCED(thread_id))
+		return 1;
+
+	return 0;
+}
+
+static int p9_stop_thread(struct cpu_thread *cpu)
+{
+	uint32_t chip_id = pir_to_chip_id(cpu->pir);
+	uint32_t core_id = pir_to_core_id(cpu->pir);
+	uint32_t thread_id = pir_to_thread_id(cpu->pir);
+	uint32_t dctl_addr;
+	int rc;
+	int i;
+
+	dctl_addr = XSCOM_ADDR_P9_EC(core_id, P9_EC_DIRECT_CONTROLS);
+
+	rc = p9_thread_quiesced(cpu);
+	if (rc < 0)
+		return rc;
+	if (rc)
+		prlog(PR_WARNING, "Stopping thread %u:%u:%u warning:"
+				" thread is quiesced already.\n",
+				chip_id, core_id, thread_id);
+
+	if (xscom_write(chip_id, dctl_addr, P9_THREAD_STOP(thread_id))) {
+		prlog(PR_ERR, "Could not stop thread %u:%u:%u:"
+				" Unable to write EC_DIRECT_CONTROLS.\n",
+				chip_id, core_id, thread_id);
+		return OPAL_HARDWARE;
+	}
+
+	for (i = 0; i < P9_QUIESCE_RETRIES; i++) {
+		int rc = p9_thread_quiesced(cpu);
+		if (rc < 0)
+			break;
+		if (rc)
+			return 0;
+	}
+
+	prlog(PR_ERR, "Could not stop thread %u:%u:%u:"
+			" Unable to quiesce thread.\n",
+			chip_id, core_id, thread_id);
+
+	if (xscom_write(chip_id, dctl_addr, P9_THREAD_CONT(thread_id))) {
+		prlog(PR_ERR, "Could not resume thread %u:%u:%u:"
+				" Unable to write EC_DIRECT_CONTROLS.\n",
+				chip_id, core_id, thread_id);
+	}
+
+	return OPAL_HARDWARE;
+}
+
+static int p9_cont_thread(struct cpu_thread *cpu)
+{
+	uint32_t chip_id = pir_to_chip_id(cpu->pir);
+	uint32_t core_id = pir_to_core_id(cpu->pir);
+	uint32_t thread_id = pir_to_thread_id(cpu->pir);
+	uint32_t dctl_addr;
+
+	dctl_addr = XSCOM_ADDR_P9_EC(core_id, P9_EC_DIRECT_CONTROLS);
+	if (xscom_write(chip_id, dctl_addr, P9_THREAD_CONT(thread_id))) {
+		prlog(PR_ERR, "Could not resume thread %u:%u:%u:"
+				" Unable to write EC_DIRECT_CONTROLS.\n",
+				chip_id, core_id, thread_id);
+	}
+
+	return 0;
+}
+
+static int p9_sreset_thread(struct cpu_thread *cpu)
+{
+	uint32_t chip_id = pir_to_chip_id(cpu->pir);
+	uint32_t core_id = pir_to_core_id(cpu->pir);
+	uint32_t thread_id = pir_to_thread_id(cpu->pir);
+	uint32_t dctl_addr;
+
+	dctl_addr = XSCOM_ADDR_P9_EC(core_id, P9_EC_DIRECT_CONTROLS);
+
+	if (xscom_write(chip_id, dctl_addr, P9_THREAD_SRESET(thread_id))) {
+		prlog(PR_ERR, "Could not sreset thread %u:%u:%u:"
+				" Unable to write EC_DIRECT_CONTROLS.\n",
+				chip_id, core_id, thread_id);
+		return OPAL_HARDWARE;
+	}
+
+	return 0;
+}
+
+static int dctl_set_special_wakeup(struct cpu_thread *t)
+{
+	struct cpu_thread *c = t->primary;
+	int rc = OPAL_SUCCESS;
+
+	if (proc_gen != proc_gen_p9)
+		return OPAL_UNSUPPORTED;
+
+	lock(&c->dctl_lock);
+	if (c->special_wakeup_count == 0)
+		rc = p9_core_set_special_wakeup(c);
+	if (!rc)
+		c->special_wakeup_count++;
+	unlock(&c->dctl_lock);
+
+	return rc;
+}
+
+static int dctl_clear_special_wakeup(struct cpu_thread *t)
+{
+	struct cpu_thread *c = t->primary;
+	int rc = OPAL_SUCCESS;
+
+	if (proc_gen != proc_gen_p9)
+		return OPAL_UNSUPPORTED;
+
+	lock(&c->dctl_lock);
+	if (c->special_wakeup_count == 0)
+		rc = p9_core_clear_special_wakeup(c);
+	if (!rc)
+		c->special_wakeup_count--;
+	unlock(&c->dctl_lock);
+
+	return rc;
+}
+
+static int dctl_stop(struct cpu_thread *t)
+{
+	struct cpu_thread *c = t->primary;
+	int rc;
+
+	if (proc_gen != proc_gen_p9)
+		return OPAL_UNSUPPORTED;
+
+	lock(&c->dctl_lock);
+	if (t->dctl_stopped) {
+		unlock(&c->dctl_lock);
+		return OPAL_BUSY;
+	}
+	rc = p9_stop_thread(t);
+	if (!rc)
+		t->dctl_stopped = true;
+	unlock(&c->dctl_lock);
+
+	return rc;
+}
+
+static int dctl_cont(struct cpu_thread *t)
+{
+	struct cpu_thread *c = t->primary;
+	int rc;
+
+	if (proc_gen != proc_gen_p9)
+		return OPAL_UNSUPPORTED;
+
+	lock(&c->dctl_lock);
+	if (!t->dctl_stopped) {
+		unlock(&c->dctl_lock);
+		return OPAL_BUSY;
+	}
+	rc = p9_cont_thread(t);
+	if (!rc)
+		t->dctl_stopped = false;
+	unlock(&c->dctl_lock);
+
+	return rc;
+}
+
+static int dctl_sreset(struct cpu_thread *t)
+{
+	struct cpu_thread *c = t->primary;
+	int rc;
+
+	if (proc_gen != proc_gen_p9)
+		return OPAL_UNSUPPORTED;
+
+	lock(&c->dctl_lock);
+	if (!t->dctl_stopped) {
+		unlock(&c->dctl_lock);
+		return OPAL_BUSY;
+	}
+	rc = p9_sreset_thread(t);
+	if (!rc)
+		t->dctl_stopped = false;
+	unlock(&c->dctl_lock);
+
+	return rc;
+}
+
+/*
+ * This provides a way for the host to raise system reset exceptions
+ * on other threads using direct control scoms on POWER9.
+ *
+ * We assert special wakeup on the core first.
+ * Then stop target thread and wait for it to quiesce.
+ * Then sreset the target thread, which resumes execution on that thread.
+ * Then de-assert special wakeup on the core.
+ */
+static int64_t p9_sreset_cpu(struct cpu_thread *cpu)
+{
+	int rc;
+
+	if (this_cpu() == cpu) {
+		prlog(PR_ERR, "SRESET: Unable to reset self\n");
+		return OPAL_PARAMETER;
+	}
+
+	rc = dctl_set_special_wakeup(cpu);
+	if (rc)
+		return rc;
+
+	rc = dctl_stop(cpu);
+	if (rc)
+		goto out_spwk;
+
+	rc = dctl_sreset(cpu);
+	if (rc)
+		goto out_cont;
+
+	dctl_clear_special_wakeup(cpu);
+
+	return 0;
+
+out_cont:
+	dctl_cont(cpu);
+out_spwk:
+	dctl_clear_special_wakeup(cpu);
+
+	return rc;
+}
+
+static struct lock sreset_lock = LOCK_UNLOCKED;
+
+int64_t opal_signal_system_reset(int cpu_nr)
+{
+	struct cpu_thread *cpu;
+	int64_t ret;
+
+	if (proc_gen != proc_gen_p9)
+		return OPAL_UNSUPPORTED;
+
+	/*
+	 * Broadcasts unsupported. Not clear what threads should be
+	 * signaled, so it's better for the OS to perform one-at-a-time
+	 * for now.
+	 */
+	if (cpu_nr < 0)
+		return OPAL_CONSTRAINED;
+
+	/* Reset a single CPU */
+	cpu = find_cpu_by_server(cpu_nr);
+	if (!cpu) {
+		prlog(PR_ERR, "SRESET: could not find cpu by server %d\n", cpu_nr);
+		return OPAL_PARAMETER;
+	}
+
+	lock(&sreset_lock);
+	ret = p9_sreset_cpu(cpu);
+	unlock(&sreset_lock);
+
+	return ret;
+}
+
+void direct_controls_init(void)
+{
+	uint32_t version;
+
+	if (chip_quirk(QUIRK_MAMBO_CALLOUTS))
+		return;
+
+	if (proc_gen != proc_gen_p9)
+		return;
+
+	/* DD1 has some sreset quirks we do not support */
+	version = mfspr(SPR_PVR);
+	if (is_power9n(version) && PVR_VERS_MAJ(version) == 1)
+		return;
+
+	opal_register(OPAL_SIGNAL_SYSTEM_RESET, opal_signal_system_reset, 1);
+}
diff --git a/core/init.c b/core/init.c
index 8951e17b..ab260d40 100644
--- a/core/init.c
+++ b/core/init.c
@@ -881,6 +881,12 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
 	mfsi_init();
 
 	/*
+	 * Direct controls facilities provides some controls over CPUs
+	 * using scoms.
+	 */
+	direct_controls_init();
+
+	/*
 	 * Put various bits & pieces in device-tree that might not
 	 * already be there such as the /chosen node if not there yet,
 	 * the ICS node, etc... This can potentially use XSCOM
diff --git a/doc/opal-api/opal-signal-system-reset-145.rst b/doc/opal-api/opal-signal-system-reset-145.rst
index 3ddb6845..28e5e2f4 100644
--- a/doc/opal-api/opal-signal-system-reset-145.rst
+++ b/doc/opal-api/opal-signal-system-reset-145.rst
@@ -9,12 +9,13 @@ OPAL_SIGNAL_SYSTEM_RESET
 This OPAL call causes the specified cpu(s) to be reset to the system
 reset exception handler (0x100).
 
-The exact contents of system registers (e.g., SRR1 wakeup causes) may
-vary depending on implementation and should not be relied upon.
+The SRR1 register will indicate a power-saving wakeup when appropriate,
+and the wake reason will be System Reset (see Power ISA).
 
-Resetting active threads on the same core as this call is run may
-not be supported by some platforms. In that case, OPAL_PARTIAL will be
-returned and NONE of the interrupts will be delivered.
+This interrupt may not be recoverable in some cases (e.g., if it is
+raised when the target has MSR[RI]=0), so it should not be used in
+normal operation, but only for crashing, debugging, and similar
+exceptional cases.
 
 Arguments
 ---------
@@ -28,18 +29,23 @@ Arguments
 Returns
 -------
 OPAL_SUCCESS
-  The power down was updated successful.
+  The system reset requests to target CPU(s) was successful. This returns
+  asynchronously without acknowledgement from targets that system reset
+  interrupt processing has completed or even started.
 
 OPAL_PARAMETER
   A parameter was incorrect.
 
 OPAL_HARDWARE
-  Hardware indicated failure during reset.
+  Hardware indicated failure during reset, some or all of the target CPUs
+  may have the system reset delivered.
+
+OPAL_CONSTRAINED
+  Platform does not support broadcast operations.
 
 OPAL_PARTIAL
-  Platform can not reset all requested CPUs at this time. This requires
-  platform-specific code to work around, otherwise to be treated as
-  failure. No CPUs are reset.
+  Platform can not reset sibling threads on the same core as requested.
+  None of the specified CPUs are reset in this case.
 
 OPAL_UNSUPPORTED
   This processor/platform is not supported.
diff --git a/hdata/hdata.h b/hdata/hdata.h
index 82479f52..2e4840f4 100644
--- a/hdata/hdata.h
+++ b/hdata/hdata.h
@@ -36,20 +36,6 @@ extern void vpd_data_parse(struct dt_node *node,
 extern struct dt_node *find_xscom_for_chip(uint32_t chip_id);
 extern uint32_t pcid_to_chip_id(uint32_t proc_chip_id);
 
-static __unused bool is_power9n(uint32_t version)
-{
-	if (PVR_TYPE(version) != PVR_TYPE_P9)
-		return false;
-	/*
-	 * Bit 13 tells us:
-	 *   0 = Scale out (aka Nimbus)
-	 *   1 = Scale up  (aka Cumulus)
-	 */
-	if ((version >> 13) & 1)
-		return false;
-	return true;
-}
-
 extern struct dt_node *add_core_common(struct dt_node *cpus,
 				       const struct sppaca_cpu_cache *cache,
 				       const struct sppaca_cpu_timebase *tb,
diff --git a/include/cpu.h b/include/cpu.h
index fd3acf77..1b622a55 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -95,6 +95,13 @@ struct cpu_thread {
 
 	/* For use by XICS emulation on XIVE */
 	struct xive_cpu_state		*xstate;
+
+	/*
+	 * For direct controls scoms, including special wakeup.
+	 */
+	struct lock			dctl_lock; /* primary only */
+	bool				dctl_stopped; /* per thread */
+	uint32_t			special_wakeup_count; /* primary */
 };
 
 /* This global is set to 1 to allow secondaries to callin,
diff --git a/include/processor.h b/include/processor.h
index 2e1ac37d..da483046 100644
--- a/include/processor.h
+++ b/include/processor.h
@@ -206,6 +206,21 @@
 
 #include <compiler.h>
 #include <stdint.h>
+#include <stdbool.h>
+
+static inline bool is_power9n(uint32_t version)
+{
+	if (PVR_TYPE(version) != PVR_TYPE_P9)
+		return false;
+	/*
+	 * Bit 13 tells us:
+	 *   0 = Scale out (aka Nimbus)
+	 *   1 = Scale up  (aka Cumulus)
+	 */
+	if ((version >> 13) & 1)
+		return false;
+	return true;
+}
 
 /*
  * SMT priority
diff --git a/include/skiboot.h b/include/skiboot.h
index 3040c517..a32af6a5 100644
--- a/include/skiboot.h
+++ b/include/skiboot.h
@@ -205,6 +205,10 @@ extern char __sym_map_end[];
 extern unsigned long get_symbol(unsigned long addr,
 				char **sym, char **sym_end);
 
+/* Direct controls */
+extern void direct_controls_init(void);
+extern int64_t opal_signal_system_reset(int cpu_nr);
+
 /* Fast reboot support */
 extern void disable_fast_reboot(const char *reason);
 extern void fast_reboot(void);
-- 
2.13.3



More information about the Skiboot mailing list