[Skiboot] [PATCH v2] core: POWER9 implement OPAL_SIGNAL_SYSTEM_RESET

Nicholas Piggin npiggin at gmail.com
Thu Sep 14 20:51:23 AEST 2017


This implements OPAL_SIGNAL_SYSTEM_RESET, using scom registers to
quiesce the target thread and raise a system reset exception on it.

This has been tested on DD1 and DD2 including ELS=0 and ESL=1 power
saving modes.

It will have to be tested with deep idle states when those are enabled.
If those cannot be supported, it should be possible to work around in
Linux.

Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
---

Changes since v1:
- Set messages to PR_DEBUG
- DD2 can sreset threads on the same core

- Initial support for Linux now will NOT bouce sreset back to the
  originating core, because that adds a lot of complexity. This can
  be done afterwards.
 
 core/Makefile.inc                             |   1 +
 core/sreset.c                                 | 280 ++++++++++++++++++++++++++
 doc/opal-api/opal-signal-system-reset-145.rst |  23 ++-
 hdata/hdata.h                                 |  14 --
 hw/xscom.c                                    |   4 +
 include/processor.h                           |  15 ++
 include/skiboot.h                             |   3 +
 7 files changed, 316 insertions(+), 24 deletions(-)
 create mode 100644 core/sreset.c

diff --git a/core/Makefile.inc b/core/Makefile.inc
index f2de2f64..16204978 100644
--- a/core/Makefile.inc
+++ b/core/Makefile.inc
@@ -9,6 +9,7 @@ CORE_OBJS += vpd.o hostservices.o platform.o nvram.o nvram-format.o hmi.o
 CORE_OBJS += console-log.o ipmi.o time-utils.o pel.o pool.o errorlog.o
 CORE_OBJS += timer.o i2c.o rtc.o flash.o sensor.o ipmi-opal.o
 CORE_OBJS += flash-subpartition.o bitmap.o buddy.o pci-quirk.o powercap.o psr.o
+CORE_OBJS += sreset.o
 
 ifeq ($(SKIBOOT_GCOV),1)
 CORE_OBJS += gcov-profiling.o
diff --git a/core/sreset.c b/core/sreset.c
new file mode 100644
index 00000000..13e91ee1
--- /dev/null
+++ b/core/sreset.c
@@ -0,0 +1,280 @@
+/* Copyright 2017 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * 	http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <skiboot.h>
+#include <cpu.h>
+#include <fsp.h>
+#include <psi.h>
+#include <opal.h>
+#include <xscom.h>
+#include <interrupts.h>
+#include <cec.h>
+#include <timebase.h>
+#include <pci.h>
+#include <chip.h>
+#include <chiptod.h>
+#include <ipmi.h>
+
+#define P9_RAS_STATUS			0x10a02
+#define P9_RSTAT_QUIESCED(t)		PPC_BITMASK(0 + 8*(t), 3 + 8*(t))
+#define P9_RSTAT_RETRIES		100
+
+#define P9_RAS_MODEREG			0x10a9d
+#define P9_DIRECT_CONTROLS		0x10a9c
+#define P9_DCTL_STOP(t)			PPC_BIT(7 + 8*(t))
+#define P9_DCTL_CONT(t)			PPC_BIT(6 + 8*(t))
+#define P9_DCTL_SRESET(t)		PPC_BIT(4 + 8*(t))
+#define P9_DCTL_PWR(t)			PPC_BIT(32 + 8*(t))
+
+#define P9_CORE_THREAD_STATE		0x10ab3
+#define P9_CTS_STOP(t)			PPC_BIT(56 + (t))
+
+#define P9_PPM_GPMMR			0xf0100
+#define P9_GPMMR_SPWKUP_DONE		PPC_BIT(1)
+#define P9_GPMMR_SPWKUP_TIMEOUT		10
+
+#define P9_PPM_SPWKUP_OTR		0xf010a
+#define P9_SPWKUP_SET			PPC_BIT(0)
+
+
+static int p9_core_set_special_wakeup(struct cpu_thread *cpu)
+{
+	uint32_t chip_id = pir_to_chip_id(cpu->pir);
+	uint32_t core_id = pir_to_core_id(cpu->pir);
+	uint32_t swake_addr;
+	uint32_t gpmmr_addr;
+	uint64_t val;
+	int i;
+
+	swake_addr = XSCOM_ADDR_P9_EC(core_id, P9_PPM_SPWKUP_OTR);
+	gpmmr_addr = XSCOM_ADDR_P9_EC(core_id, P9_PPM_GPMMR);
+
+	/*
+	 * The read-write-read pattern with read errors ignored comes from
+	 * P8 code. This should be revisited, but it does not appear to have
+	 * any ill effects.
+	 */
+	xscom_read(chip_id, swake_addr, &val);
+	if (xscom_write(chip_id, swake_addr, P9_SPWKUP_SET)) {
+		prlog(PR_DEBUG, "SRESET: Unable to write SPWKUP_OTR register\n");
+		return OPAL_HARDWARE;
+	}
+	xscom_read(chip_id, swake_addr, &val);
+
+	for (i = 0; i < P9_GPMMR_SPWKUP_TIMEOUT; i++) {
+		if (xscom_read(chip_id, gpmmr_addr, &val)) {
+			prlog(PR_DEBUG, "SRESET: Unable to read GPMMR register\n");
+			return OPAL_HARDWARE;
+		}
+		if (val & P9_GPMMR_SPWKUP_DONE)
+			return 0;
+
+		time_wait_us(1);
+	}
+
+	/* De-assert special wakeup bit */
+	xscom_read(chip_id, swake_addr, &val);
+	xscom_write(chip_id, swake_addr, 0);
+	xscom_read(chip_id, swake_addr, &val);
+
+	prlog(PR_DEBUG, "SRESET: Special wakeup mode could not be set.\n");
+	return OPAL_HARDWARE;
+}
+
+static void p9_core_clear_special_wakeup(struct cpu_thread *cpu)
+{
+	uint32_t chip_id = pir_to_chip_id(cpu->pir);
+	uint32_t core_id = pir_to_core_id(cpu->pir);
+	uint32_t swake_addr;
+	uint64_t val;
+
+	swake_addr = XSCOM_ADDR_P9_EC(core_id, P9_PPM_SPWKUP_OTR);
+
+	/* De-assert special wakeup bit */
+	xscom_read(chip_id, swake_addr, &val);
+	xscom_write(chip_id, swake_addr, 0);
+	xscom_read(chip_id, swake_addr, &val);
+}
+
+static int p9_thread_quiesced(struct cpu_thread *cpu)
+{
+	uint32_t chip_id = pir_to_chip_id(cpu->pir);
+	uint32_t core_id = pir_to_core_id(cpu->pir);
+	uint32_t thread_id = pir_to_thread_id(cpu->pir);
+	uint32_t ras_addr;
+	uint64_t ras_status;
+
+	ras_addr = XSCOM_ADDR_P9_EC(core_id, P9_RAS_STATUS);
+	if (xscom_read(chip_id, ras_addr, &ras_status)) {
+		prlog(PR_DEBUG, "SRESET: Unable to read status register\n");
+		return OPAL_HARDWARE;
+	}
+
+	if ((ras_status & P9_RSTAT_QUIESCED(thread_id))
+			== P9_RSTAT_QUIESCED(thread_id))
+		return 1;
+
+	return 0;
+}
+
+static int p9_stop_thread(struct cpu_thread *cpu)
+{
+	uint32_t chip_id = pir_to_chip_id(cpu->pir);
+	uint32_t core_id = pir_to_core_id(cpu->pir);
+	uint32_t thread_id = pir_to_thread_id(cpu->pir);
+	uint32_t dctl_addr;
+	int i;
+
+	dctl_addr = XSCOM_ADDR_P9_EC(core_id, P9_DIRECT_CONTROLS);
+
+	xscom_write(chip_id, dctl_addr, P9_DCTL_STOP(thread_id));
+
+	for (i = 0; i < P9_RSTAT_RETRIES; i++) {
+		int rc = p9_thread_quiesced(cpu);
+		if (rc < 0)
+			break;
+		if (rc)
+			return 0;
+	}
+
+	xscom_write(chip_id, dctl_addr, P9_DCTL_CONT(thread_id));
+	prlog(PR_DEBUG, "SRESET: Could not quiesce thread\n");
+	return OPAL_HARDWARE;
+}
+
+static int p9_sreset_thread(struct cpu_thread *cpu)
+{
+	uint32_t chip_id = pir_to_chip_id(cpu->pir);
+	uint32_t core_id = pir_to_core_id(cpu->pir);
+	uint32_t thread_id = pir_to_thread_id(cpu->pir);
+	uint32_t dctl_addr;
+	uint32_t cts_addr;
+	uint64_t cts_val;
+
+	dctl_addr = XSCOM_ADDR_P9_EC(core_id, P9_DIRECT_CONTROLS);
+	cts_addr = XSCOM_ADDR_P9_EC(core_id, P9_CORE_THREAD_STATE);
+
+	if (xscom_read(chip_id, cts_addr, &cts_val)) {
+		prlog(PR_DEBUG, "SRESET: Unable to read CORE_THREAD_STATE register\n");
+		return OPAL_HARDWARE;
+	}
+	if (!(cts_val & P9_CTS_STOP(thread_id))) {
+		/*
+		 * Quiescing a thread causes SRR1[46:47] to be set by the
+		 * system reset interrupt as though it was in a power saving
+		 * mode even if it was not.
+		 *
+		 * Setting the DCTL_PWR bit causes SRR1[46:47] to be clear,
+		 * so poke that if thread state says we were in stop.
+		 */
+		if (xscom_write(chip_id, dctl_addr, P9_DCTL_PWR(thread_id))) {
+			prlog(PR_DEBUG, "SRESET: Unable to set power saving mode\n");
+			return OPAL_HARDWARE;
+		}
+	}
+
+	if (xscom_write(chip_id, dctl_addr, P9_DCTL_SRESET(thread_id))) {
+		prlog(PR_DEBUG, "SRESET: Unable to write DIRECT_CONTROLS register\n");
+		return OPAL_HARDWARE;
+	}
+
+	return 0;
+}
+
+static int64_t p9_sreset_cpu(struct cpu_thread *cpu)
+{
+	uint32_t version;
+	int rc;
+
+	if (this_cpu() == cpu) {
+		prlog(PR_DEBUG, "SRESET: Unable to reset self\n");
+		return OPAL_PARAMETER;
+	}
+
+	version = mfspr(SPR_PVR);
+	if (is_power9n(version) && (PVR_VERS_MAJ(version) == 1)) {
+		/*
+		 * DD1 is unable to reset threads on the same core.
+		 */
+		if (this_cpu()->primary == cpu->primary) {
+			prlog(PR_DEBUG, "SRESET: Unable to reset threads on same core\n");
+			return OPAL_PARTIAL;
+		}
+	}
+
+
+	rc = p9_thread_quiesced(cpu);
+	if (rc < 0)
+		return rc;
+	if (rc) {
+		prlog(PR_DEBUG, "SRESET: Thread is quiesced already\n");
+		return OPAL_WRONG_STATE;
+	}
+
+	/*
+	 * We can not just assert the sreset direct control bit, because that
+	 * does not appear to set SRR1[46:47] bits correctly for idle vs
+	 * running threads. So quiesce it and check the core thread state for
+	 * idle, see above.
+	 */
+	rc = p9_core_set_special_wakeup(cpu);
+	if (rc)
+		return rc;
+
+	rc = p9_stop_thread(cpu);
+	if (rc) {
+		p9_core_clear_special_wakeup(cpu);
+		return rc;
+	}
+
+	rc = p9_sreset_thread(cpu);
+
+	p9_core_clear_special_wakeup(cpu);
+
+	return 0;
+}
+
+static struct lock sreset_lock = LOCK_UNLOCKED;
+
+int64_t signal_system_reset(int cpu_nr)
+{
+	struct cpu_thread *cpu;
+	int64_t ret;
+
+	if (proc_gen != proc_gen_p9)
+		return OPAL_UNSUPPORTED;
+
+	/*
+	 * Broadcasts unsupported. Not clear what threads should be
+	 * signaled, so it's better for the OS to perform one-at-a-time
+	 * for now.
+	 */
+	if (cpu_nr < 0)
+		return OPAL_PARTIAL;
+
+	/* Reset a single CPU */
+	cpu = find_cpu_by_server(cpu_nr);
+	if (!cpu) {
+		prlog(PR_DEBUG, "SRESET: could not find cpu by server %d\n", cpu_nr);
+		return OPAL_PARAMETER;
+	}
+
+	lock(&sreset_lock);
+	ret = p9_sreset_cpu(cpu);
+	unlock(&sreset_lock);
+
+	return ret;
+}
diff --git a/doc/opal-api/opal-signal-system-reset-145.rst b/doc/opal-api/opal-signal-system-reset-145.rst
index 3ddb6845..6fc7a20b 100644
--- a/doc/opal-api/opal-signal-system-reset-145.rst
+++ b/doc/opal-api/opal-signal-system-reset-145.rst
@@ -9,12 +9,13 @@ OPAL_SIGNAL_SYSTEM_RESET
 This OPAL call causes the specified cpu(s) to be reset to the system
 reset exception handler (0x100).
 
-The exact contents of system registers (e.g., SRR1 wakeup causes) may
-vary depending on implementation and should not be relied upon.
+The SRR1 register will indicate a power-saving wakeup when appropriate,
+and the wake reason will be System Reset (see Power ISA).
 
-Resetting active threads on the same core as this call is run may
-not be supported by some platforms. In that case, OPAL_PARTIAL will be
-returned and NONE of the interrupts will be delivered.
+This interrupt may not be recoverable in some cases (e.g., if it is
+raised when the target has MSR[RI]=0), so it should not be used in
+normal operation, but only for crashing, debugging, and similar
+exceptional cases.
 
 Arguments
 ---------
@@ -28,18 +29,20 @@ Arguments
 Returns
 -------
 OPAL_SUCCESS
-  The power down was updated successful.
+  The system reset requests to target CPU(s) was successful. This returns
+  asynchronously without acknowledgement that system reset interrupt
+  processing has completed or even started.
 
 OPAL_PARAMETER
   A parameter was incorrect.
 
 OPAL_HARDWARE
-  Hardware indicated failure during reset.
+  Hardware indicated failure during reset, some or all of the target CPUs
+  may have the system reset delivered.
 
 OPAL_PARTIAL
-  Platform can not reset all requested CPUs at this time. This requires
-  platform-specific code to work around, otherwise to be treated as
-  failure. No CPUs are reset.
+  Platform can not reset sibling threads on the same core as requested.
+  None of the specified CPUs are reset in this case.
 
 OPAL_UNSUPPORTED
   This processor/platform is not supported.
diff --git a/hdata/hdata.h b/hdata/hdata.h
index cab06264..888d0637 100644
--- a/hdata/hdata.h
+++ b/hdata/hdata.h
@@ -36,20 +36,6 @@ extern void vpd_data_parse(struct dt_node *node,
 extern struct dt_node *find_xscom_for_chip(uint32_t chip_id);
 extern uint32_t pcid_to_chip_id(uint32_t proc_chip_id);
 
-static __unused bool is_power9n(uint32_t version)
-{
-	if (PVR_TYPE(version) != PVR_TYPE_P9)
-		return false;
-	/*
-	 * Bit 13 tells us:
-	 *   0 = Scale out (aka Nimbus)
-	 *   1 = Scale up  (aka Cumulus)
-	 */
-	if ((version >> 13) & 1)
-		return false;
-	return true;
-}
-
 extern struct dt_node *add_core_common(struct dt_node *cpus,
 				       const struct sppaca_cpu_cache *cache,
 				       const struct sppaca_cpu_timebase *tb,
diff --git a/hw/xscom.c b/hw/xscom.c
index 7bd78bf9..4a6d91f4 100644
--- a/hw/xscom.c
+++ b/hw/xscom.c
@@ -705,6 +705,10 @@ static void xscom_init_chip_info(struct proc_chip *chip)
 		printf("P9 DD%i.%i%d detected\n", 0xf & (chip->ec_level >> 4),
 		       chip->ec_level & 0xf, rev);
 		chip->ec_rev = rev;
+
+		if (!chip_quirk(QUIRK_MAMBO_CALLOUTS))
+			opal_register(OPAL_SIGNAL_SYSTEM_RESET,
+					signal_system_reset, 1);
 	}
 }
 
diff --git a/include/processor.h b/include/processor.h
index 2e1ac37d..da483046 100644
--- a/include/processor.h
+++ b/include/processor.h
@@ -206,6 +206,21 @@
 
 #include <compiler.h>
 #include <stdint.h>
+#include <stdbool.h>
+
+static inline bool is_power9n(uint32_t version)
+{
+	if (PVR_TYPE(version) != PVR_TYPE_P9)
+		return false;
+	/*
+	 * Bit 13 tells us:
+	 *   0 = Scale out (aka Nimbus)
+	 *   1 = Scale up  (aka Cumulus)
+	 */
+	if ((version >> 13) & 1)
+		return false;
+	return true;
+}
 
 /*
  * SMT priority
diff --git a/include/skiboot.h b/include/skiboot.h
index 0ab9f388..55aa9b8e 100644
--- a/include/skiboot.h
+++ b/include/skiboot.h
@@ -198,6 +198,9 @@ extern char __sym_map_end[];
 extern unsigned long get_symbol(unsigned long addr,
 				char **sym, char **sym_end);
 
+/* System reset */
+extern int64_t signal_system_reset(int cpu_nr);
+
 /* Fast reboot support */
 extern void disable_fast_reboot(const char *reason);
 extern void fast_reboot(void);
-- 
2.13.3



More information about the Skiboot mailing list