[Skiboot] [PATCH v4] core: POWER9 implement OPAL_SIGNAL_SYSTEM_RESET
Nicholas Piggin
npiggin at gmail.com
Wed Sep 20 16:56:03 AEST 2017
This implements OPAL_SIGNAL_SYSTEM_RESET, using scom registers to
quiesce the target thread and raise a system reset exception on it.
It has been tested on DD2 with stop0 ESL=0 and ESL=1 shallow power
saving modes.
DD1 is not implemented because it is sufficiently different as to
make support difficult.
Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
---
Since v3:
- Rework the special wakeup and direct control code so it is in more of
a framework that can be reused and extended. Provide some refcounting
and locking for multiple users.
- Use the HYP special wakeup register rather than OTR (which was copied
from pdbg).
- Use the SSHHYP register to poll for special wakeup done, rather than
GPMMR, which may be the better approach because GPMMR seems to be
multiplexing all special wakeup registers.
- Add a microsecond delay before de-asserting special wakeup, which may
avoid some failures (Vaidy is looking into this further).
- Remove read-write-read sequence on the special wakeup register that
came from P8.
- Remove DD1 support again. This allows the SRR1 power saving workaround
to be removed, which makes things very simple now.
- Improve comments, error messages and check a few more xscom failure
registers.
core/Makefile.inc | 2 +-
core/cpu.c | 1 +
core/direct-controls.c | 409 ++++++++++++++++++++++++++
core/init.c | 6 +
doc/opal-api/opal-signal-system-reset-145.rst | 26 +-
hdata/hdata.h | 14 -
include/cpu.h | 7 +
include/processor.h | 15 +
include/skiboot.h | 4 +
9 files changed, 459 insertions(+), 25 deletions(-)
create mode 100644 core/direct-controls.c
diff --git a/core/Makefile.inc b/core/Makefile.inc
index e32c0a0a..d6a7269f 100644
--- a/core/Makefile.inc
+++ b/core/Makefile.inc
@@ -9,7 +9,7 @@ CORE_OBJS += vpd.o hostservices.o platform.o nvram.o nvram-format.o hmi.o
CORE_OBJS += console-log.o ipmi.o time-utils.o pel.o pool.o errorlog.o
CORE_OBJS += timer.o i2c.o rtc.o flash.o sensor.o ipmi-opal.o
CORE_OBJS += flash-subpartition.o bitmap.o buddy.o pci-quirk.o powercap.o psr.o
-CORE_OBJS += pci-dt-slot.o
+CORE_OBJS += pci-dt-slot.o direct-controls.o
ifeq ($(SKIBOOT_GCOV),1)
CORE_OBJS += gcov-profiling.o
diff --git a/core/cpu.c b/core/cpu.c
index bcf84ad5..358356a2 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -625,6 +625,7 @@ static void init_cpu_thread(struct cpu_thread *t,
enum cpu_thread_state state,
unsigned int pir)
{
+ init_lock(&t->dctl_lock);
init_lock(&t->job_lock);
list_head_init(&t->job_queue);
t->state = state;
diff --git a/core/direct-controls.c b/core/direct-controls.c
new file mode 100644
index 00000000..9acdab5e
--- /dev/null
+++ b/core/direct-controls.c
@@ -0,0 +1,409 @@
+/* Copyright 2017 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <skiboot.h>
+#include <opal.h>
+#include <cpu.h>
+#include <xscom.h>
+#include <timebase.h>
+#include <chip.h>
+
+#define P9_RAS_STATUS 0x10a02
+#define P9_THREAD_QUIESCED(t) PPC_BITMASK(0 + 8*(t), 3 + 8*(t))
+#define P9_QUIESCE_RETRIES 100
+
+#define P9_EC_DIRECT_CONTROLS 0x10a9c
+#define P9_THREAD_STOP(t) PPC_BIT(7 + 8*(t))
+#define P9_THREAD_CONT(t) PPC_BIT(6 + 8*(t))
+#define P9_THREAD_SRESET(t) PPC_BIT(4 + 8*(t))
+#define P9_THREAD_PWR(t) PPC_BIT(32 + 8*(t))
+
+/* EC_PPM_SPECIAL_WKUP_HYP */
+#define P9_SPWKUP_SET PPC_BIT(0)
+
+#define P9_EC_PPM_SSHHYP 0x0114
+#define P9_SPECIAL_WKUP_DONE PPC_BIT(1)
+#define P9_SPWKUP_TIMEOUT 10
+
+/*
+ * This implements direct control facilities of processor cores and threads
+ * using scom registers.
+ */
+
+static int p9_core_set_special_wakeup(struct cpu_thread *cpu)
+{
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint32_t swake_addr;
+ uint32_t sshhyp_addr;
+ uint64_t val;
+ int i;
+
+ swake_addr = XSCOM_ADDR_P9_EC_SLAVE(core_id, EC_PPM_SPECIAL_WKUP_HYP);
+ sshhyp_addr = XSCOM_ADDR_P9_EC_SLAVE(core_id, P9_EC_PPM_SSHHYP);
+
+ if (xscom_write(chip_id, swake_addr, P9_SPWKUP_SET)) {
+ prlog(PR_ERR, "Could not set special wakeup on %u:%u:"
+ " Unable to write PPM_SPECIAL_WKUP_HYP.\n",
+ chip_id, core_id);
+ return OPAL_HARDWARE;
+ }
+
+ for (i = 0; i < P9_SPWKUP_TIMEOUT; i++) {
+ if (xscom_read(chip_id, sshhyp_addr, &val)) {
+ prlog(PR_ERR, "Could not set special wakeup on %u:%u:"
+ " Unable to read PPM_SSHHYP.\n",
+ chip_id, core_id);
+ goto out_fail;
+ }
+ if (val & P9_SPECIAL_WKUP_DONE)
+ return 0;
+
+ time_wait_us(1);
+ }
+
+ prlog(PR_ERR, "Could not set special wakeup on %u:%u:"
+ " timeout waiting for SPECIAL_WKUP_DONE.\n",
+ chip_id, core_id);
+
+out_fail:
+ /* De-assert special wakeup after a small delay. */
+ time_wait_us(1);
+ xscom_write(chip_id, swake_addr, 0);
+
+ return OPAL_HARDWARE;
+}
+
+static int p9_core_clear_special_wakeup(struct cpu_thread *cpu)
+{
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint32_t swake_addr;
+
+ swake_addr = XSCOM_ADDR_P9_EC_SLAVE(core_id, EC_PPM_SPECIAL_WKUP_HYP);
+
+ /*
+ * De-assert special wakeup after a small delay.
+ * The delay may help avoid problems setting and clearing special
+ * wakeup back-to-back. This should be confirmed.
+ */
+ time_wait_us(1);
+ if (xscom_write(chip_id, swake_addr, 0)) {
+ prlog(PR_ERR, "Could not clear special wakeup on %u:%u:"
+ " Unable to write PPM_SPECIAL_WKUP_HYP.\n",
+ chip_id, core_id);
+ return OPAL_HARDWARE;
+ }
+
+ return OPAL_SUCCESS;
+}
+
+static int p9_thread_quiesced(struct cpu_thread *cpu)
+{
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint32_t thread_id = pir_to_thread_id(cpu->pir);
+ uint32_t ras_addr;
+ uint64_t ras_status;
+
+ ras_addr = XSCOM_ADDR_P9_EC(core_id, P9_RAS_STATUS);
+ if (xscom_read(chip_id, ras_addr, &ras_status)) {
+ prlog(PR_ERR, "Could not check thread state on %u:%u:"
+ " Unable to read RAS_STATUS.\n",
+ chip_id, core_id);
+ return OPAL_HARDWARE;
+ }
+
+ if ((ras_status & P9_THREAD_QUIESCED(thread_id))
+ == P9_THREAD_QUIESCED(thread_id))
+ return 1;
+
+ return 0;
+}
+
+static int p9_stop_thread(struct cpu_thread *cpu)
+{
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint32_t thread_id = pir_to_thread_id(cpu->pir);
+ uint32_t dctl_addr;
+ int rc;
+ int i;
+
+ dctl_addr = XSCOM_ADDR_P9_EC(core_id, P9_EC_DIRECT_CONTROLS);
+
+ rc = p9_thread_quiesced(cpu);
+ if (rc < 0)
+ return rc;
+ if (rc)
+ prlog(PR_WARNING, "Stopping thread %u:%u:%u warning:"
+ " thread is quiesced already.\n",
+ chip_id, core_id, thread_id);
+
+ if (xscom_write(chip_id, dctl_addr, P9_THREAD_STOP(thread_id))) {
+ prlog(PR_ERR, "Could not stop thread %u:%u:%u:"
+ " Unable to write EC_DIRECT_CONTROLS.\n",
+ chip_id, core_id, thread_id);
+ return OPAL_HARDWARE;
+ }
+
+ for (i = 0; i < P9_QUIESCE_RETRIES; i++) {
+ int rc = p9_thread_quiesced(cpu);
+ if (rc < 0)
+ break;
+ if (rc)
+ return 0;
+ }
+
+ prlog(PR_ERR, "Could not stop thread %u:%u:%u:"
+ " Unable to quiesce thread.\n",
+ chip_id, core_id, thread_id);
+
+ if (xscom_write(chip_id, dctl_addr, P9_THREAD_CONT(thread_id))) {
+ prlog(PR_ERR, "Could not resume thread %u:%u:%u:"
+ " Unable to write EC_DIRECT_CONTROLS.\n",
+ chip_id, core_id, thread_id);
+ }
+
+ return OPAL_HARDWARE;
+}
+
+static int p9_cont_thread(struct cpu_thread *cpu)
+{
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint32_t thread_id = pir_to_thread_id(cpu->pir);
+ uint32_t dctl_addr;
+
+ dctl_addr = XSCOM_ADDR_P9_EC(core_id, P9_EC_DIRECT_CONTROLS);
+ if (xscom_write(chip_id, dctl_addr, P9_THREAD_CONT(thread_id))) {
+ prlog(PR_ERR, "Could not resume thread %u:%u:%u:"
+ " Unable to write EC_DIRECT_CONTROLS.\n",
+ chip_id, core_id, thread_id);
+ }
+
+ return 0;
+}
+
+static int p9_sreset_thread(struct cpu_thread *cpu)
+{
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint32_t thread_id = pir_to_thread_id(cpu->pir);
+ uint32_t dctl_addr;
+
+ dctl_addr = XSCOM_ADDR_P9_EC(core_id, P9_EC_DIRECT_CONTROLS);
+
+ if (xscom_write(chip_id, dctl_addr, P9_THREAD_SRESET(thread_id))) {
+ prlog(PR_ERR, "Could not sreset thread %u:%u:%u:"
+ " Unable to write EC_DIRECT_CONTROLS.\n",
+ chip_id, core_id, thread_id);
+ return OPAL_HARDWARE;
+ }
+
+ return 0;
+}
+
+static int dctl_set_special_wakeup(struct cpu_thread *t)
+{
+ struct cpu_thread *c = t->primary;
+ int rc = OPAL_SUCCESS;
+
+ if (proc_gen != proc_gen_p9)
+ return OPAL_UNSUPPORTED;
+
+ lock(&c->dctl_lock);
+ if (c->special_wakeup_count == 0)
+ rc = p9_core_set_special_wakeup(c);
+ if (!rc)
+ c->special_wakeup_count++;
+ unlock(&c->dctl_lock);
+
+ return rc;
+}
+
+static int dctl_clear_special_wakeup(struct cpu_thread *t)
+{
+ struct cpu_thread *c = t->primary;
+ int rc = OPAL_SUCCESS;
+
+ if (proc_gen != proc_gen_p9)
+ return OPAL_UNSUPPORTED;
+
+ lock(&c->dctl_lock);
+ if (c->special_wakeup_count == 0)
+ rc = p9_core_clear_special_wakeup(c);
+ if (!rc)
+ c->special_wakeup_count--;
+ unlock(&c->dctl_lock);
+
+ return rc;
+}
+
+static int dctl_stop(struct cpu_thread *t)
+{
+ struct cpu_thread *c = t->primary;
+ int rc;
+
+ if (proc_gen != proc_gen_p9)
+ return OPAL_UNSUPPORTED;
+
+ lock(&c->dctl_lock);
+ if (t->dctl_stopped) {
+ unlock(&c->dctl_lock);
+ return OPAL_BUSY;
+ }
+ rc = p9_stop_thread(t);
+ if (!rc)
+ t->dctl_stopped = true;
+ unlock(&c->dctl_lock);
+
+ return rc;
+}
+
+static int dctl_cont(struct cpu_thread *t)
+{
+ struct cpu_thread *c = t->primary;
+ int rc;
+
+ if (proc_gen != proc_gen_p9)
+ return OPAL_UNSUPPORTED;
+
+ lock(&c->dctl_lock);
+ if (!t->dctl_stopped) {
+ unlock(&c->dctl_lock);
+ return OPAL_BUSY;
+ }
+ rc = p9_cont_thread(t);
+ if (!rc)
+ t->dctl_stopped = false;
+ unlock(&c->dctl_lock);
+
+ return rc;
+}
+
+static int dctl_sreset(struct cpu_thread *t)
+{
+ struct cpu_thread *c = t->primary;
+ int rc;
+
+ if (proc_gen != proc_gen_p9)
+ return OPAL_UNSUPPORTED;
+
+ lock(&c->dctl_lock);
+ if (!t->dctl_stopped) {
+ unlock(&c->dctl_lock);
+ return OPAL_BUSY;
+ }
+ rc = p9_sreset_thread(t);
+ if (!rc)
+ t->dctl_stopped = false;
+ unlock(&c->dctl_lock);
+
+ return rc;
+}
+
+/*
+ * This provides a way for the host to raise system reset exceptions
+ * on other threads using direct control scoms on POWER9.
+ *
+ * We assert special wakeup on the core first.
+ * Then stop target thread and wait for it to quiesce.
+ * Then sreset the target thread, which resumes execution on that thread.
+ * Then de-assert special wakeup on the core.
+ */
+static int64_t p9_sreset_cpu(struct cpu_thread *cpu)
+{
+ int rc;
+
+ if (this_cpu() == cpu) {
+ prlog(PR_ERR, "SRESET: Unable to reset self\n");
+ return OPAL_PARAMETER;
+ }
+
+ rc = dctl_set_special_wakeup(cpu);
+ if (rc)
+ return rc;
+
+ rc = dctl_stop(cpu);
+ if (rc)
+ goto out_spwk;
+
+ rc = dctl_sreset(cpu);
+ if (rc)
+ goto out_cont;
+
+ dctl_clear_special_wakeup(cpu);
+
+ return 0;
+
+out_cont:
+ dctl_cont(cpu);
+out_spwk:
+ dctl_clear_special_wakeup(cpu);
+
+ return rc;
+}
+
+static struct lock sreset_lock = LOCK_UNLOCKED;
+
+int64_t opal_signal_system_reset(int cpu_nr)
+{
+ struct cpu_thread *cpu;
+ int64_t ret;
+
+ if (proc_gen != proc_gen_p9)
+ return OPAL_UNSUPPORTED;
+
+ /*
+ * Broadcasts unsupported. Not clear what threads should be
+ * signaled, so it's better for the OS to perform one-at-a-time
+ * for now.
+ */
+ if (cpu_nr < 0)
+ return OPAL_CONSTRAINED;
+
+ /* Reset a single CPU */
+ cpu = find_cpu_by_server(cpu_nr);
+ if (!cpu) {
+ prlog(PR_ERR, "SRESET: could not find cpu by server %d\n", cpu_nr);
+ return OPAL_PARAMETER;
+ }
+
+ lock(&sreset_lock);
+ ret = p9_sreset_cpu(cpu);
+ unlock(&sreset_lock);
+
+ return ret;
+}
+
+void direct_controls_init(void)
+{
+ uint32_t version;
+
+ if (chip_quirk(QUIRK_MAMBO_CALLOUTS))
+ return;
+
+ if (proc_gen != proc_gen_p9)
+ return;
+
+ /* DD1 has some sreset quirks we do not support */
+ version = mfspr(SPR_PVR);
+ if (is_power9n(version) && PVR_VERS_MAJ(version) == 1)
+ return;
+
+ opal_register(OPAL_SIGNAL_SYSTEM_RESET, opal_signal_system_reset, 1);
+}
diff --git a/core/init.c b/core/init.c
index 8951e17b..ab260d40 100644
--- a/core/init.c
+++ b/core/init.c
@@ -881,6 +881,12 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
mfsi_init();
/*
+ * Direct controls facilities provides some controls over CPUs
+ * using scoms.
+ */
+ direct_controls_init();
+
+ /*
* Put various bits & pieces in device-tree that might not
* already be there such as the /chosen node if not there yet,
* the ICS node, etc... This can potentially use XSCOM
diff --git a/doc/opal-api/opal-signal-system-reset-145.rst b/doc/opal-api/opal-signal-system-reset-145.rst
index 3ddb6845..28e5e2f4 100644
--- a/doc/opal-api/opal-signal-system-reset-145.rst
+++ b/doc/opal-api/opal-signal-system-reset-145.rst
@@ -9,12 +9,13 @@ OPAL_SIGNAL_SYSTEM_RESET
This OPAL call causes the specified cpu(s) to be reset to the system
reset exception handler (0x100).
-The exact contents of system registers (e.g., SRR1 wakeup causes) may
-vary depending on implementation and should not be relied upon.
+The SRR1 register will indicate a power-saving wakeup when appropriate,
+and the wake reason will be System Reset (see Power ISA).
-Resetting active threads on the same core as this call is run may
-not be supported by some platforms. In that case, OPAL_PARTIAL will be
-returned and NONE of the interrupts will be delivered.
+This interrupt may not be recoverable in some cases (e.g., if it is
+raised when the target has MSR[RI]=0), so it should not be used in
+normal operation, but only for crashing, debugging, and similar
+exceptional cases.
Arguments
---------
@@ -28,18 +29,23 @@ Arguments
Returns
-------
OPAL_SUCCESS
- The power down was updated successful.
+ The system reset requests to target CPU(s) was successful. This returns
+ asynchronously without acknowledgement from targets that system reset
+ interrupt processing has completed or even started.
OPAL_PARAMETER
A parameter was incorrect.
OPAL_HARDWARE
- Hardware indicated failure during reset.
+ Hardware indicated failure during reset, some or all of the target CPUs
+ may have the system reset delivered.
+
+OPAL_CONSTRAINED
+ Platform does not support broadcast operations.
OPAL_PARTIAL
- Platform can not reset all requested CPUs at this time. This requires
- platform-specific code to work around, otherwise to be treated as
- failure. No CPUs are reset.
+ Platform can not reset sibling threads on the same core as requested.
+ None of the specified CPUs are reset in this case.
OPAL_UNSUPPORTED
This processor/platform is not supported.
diff --git a/hdata/hdata.h b/hdata/hdata.h
index 82479f52..2e4840f4 100644
--- a/hdata/hdata.h
+++ b/hdata/hdata.h
@@ -36,20 +36,6 @@ extern void vpd_data_parse(struct dt_node *node,
extern struct dt_node *find_xscom_for_chip(uint32_t chip_id);
extern uint32_t pcid_to_chip_id(uint32_t proc_chip_id);
-static __unused bool is_power9n(uint32_t version)
-{
- if (PVR_TYPE(version) != PVR_TYPE_P9)
- return false;
- /*
- * Bit 13 tells us:
- * 0 = Scale out (aka Nimbus)
- * 1 = Scale up (aka Cumulus)
- */
- if ((version >> 13) & 1)
- return false;
- return true;
-}
-
extern struct dt_node *add_core_common(struct dt_node *cpus,
const struct sppaca_cpu_cache *cache,
const struct sppaca_cpu_timebase *tb,
diff --git a/include/cpu.h b/include/cpu.h
index fd3acf77..1b622a55 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -95,6 +95,13 @@ struct cpu_thread {
/* For use by XICS emulation on XIVE */
struct xive_cpu_state *xstate;
+
+ /*
+ * For direct controls scoms, including special wakeup.
+ */
+ struct lock dctl_lock; /* primary only */
+ bool dctl_stopped; /* per thread */
+ uint32_t special_wakeup_count; /* primary */
};
/* This global is set to 1 to allow secondaries to callin,
diff --git a/include/processor.h b/include/processor.h
index 2e1ac37d..da483046 100644
--- a/include/processor.h
+++ b/include/processor.h
@@ -206,6 +206,21 @@
#include <compiler.h>
#include <stdint.h>
+#include <stdbool.h>
+
+static inline bool is_power9n(uint32_t version)
+{
+ if (PVR_TYPE(version) != PVR_TYPE_P9)
+ return false;
+ /*
+ * Bit 13 tells us:
+ * 0 = Scale out (aka Nimbus)
+ * 1 = Scale up (aka Cumulus)
+ */
+ if ((version >> 13) & 1)
+ return false;
+ return true;
+}
/*
* SMT priority
diff --git a/include/skiboot.h b/include/skiboot.h
index 3040c517..a32af6a5 100644
--- a/include/skiboot.h
+++ b/include/skiboot.h
@@ -205,6 +205,10 @@ extern char __sym_map_end[];
extern unsigned long get_symbol(unsigned long addr,
char **sym, char **sym_end);
+/* Direct controls */
+extern void direct_controls_init(void);
+extern int64_t opal_signal_system_reset(int cpu_nr);
+
/* Fast reboot support */
extern void disable_fast_reboot(const char *reason);
extern void fast_reboot(void);
--
2.13.3
More information about the Skiboot
mailing list