[Skiboot] [PATCH v2 23/25] core: Add support for quiescing OPAL

Nicholas Piggin npiggin at gmail.com
Sat Nov 25 01:08:32 AEDT 2017


Quiescing is ensuring all host controlled CPUs (except the current
one) are out of OPAL and prevented from entering. This can be use in
debug and shutdown paths, particularly with system reset sequences.

This patch adds per-CPU entry and exit tracking for OPAL calls, and
adds logic to "hold" or "reject" at entry time, if OPAL is quiesced.

An OPAL call is added, to expose the functionality to Linux, where it
can be used for shutdown, kexec, and before generating sreset IPIs for
debugging (so the debug code does not recurse into OPAL).

Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
---
 core/cpu.c                                    |   5 +
 core/opal.c                                   | 166 +++++++++++++++++++++++++-
 doc/opal-api/opal-quiesce-158.rst             |  59 +++++++++
 doc/opal-api/opal-signal-system-reset-145.rst |   7 ++
 include/cpu.h                                 |   2 +
 include/opal-api.h                            |   9 +-
 include/opal-internal.h                       |   2 +
 7 files changed, 246 insertions(+), 4 deletions(-)
 create mode 100644 doc/opal-api/opal-quiesce-158.rst

diff --git a/core/cpu.c b/core/cpu.c
index 2dba2f504..b94e04ef2 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -1220,6 +1220,11 @@ static int64_t opal_return_cpu(void)
 {
 	prlog(PR_DEBUG, "OPAL: Returning CPU 0x%04x\n", this_cpu()->pir);
 
+	this_cpu()->in_opal_call--;
+	if (this_cpu()->in_opal_call != 0) {
+		printf("OPAL in_opal_call=%u\n", this_cpu()->in_opal_call);
+	}
+
 	__secondary_cpu_entry();
 
 	return OPAL_HARDWARE; /* Should not happen */
diff --git a/core/opal.c b/core/opal.c
index 752dd3023..dab5b78ac 100644
--- a/core/opal.c
+++ b/core/opal.c
@@ -113,6 +113,14 @@ static void opal_trace_entry(struct stack_frame *eframe __unused)
 #endif
 }
 
+/*
+ * opal_quiesce_state is used as a lock. Don't use an actual lock to avoid
+ * lock busting.
+ */
+static uint32_t opal_quiesce_state;	/* 0 or QUIESCE_HOLD/QUIESCE_REJECT */
+static int32_t opal_quiesce_owner;	/* PIR */
+static int32_t opal_quiesce_target;	/* -1 or PIR */
+
 static int64_t opal_check_token(uint64_t token);
 
 /* Called from head.S, thus no prototype */
@@ -134,16 +142,168 @@ int64_t opal_entry_check(struct stack_frame *eframe)
 	if (!opal_check_token(token))
 		return opal_bad_token(token);
 
+	if (!opal_quiesce_state && cpu->in_opal_call) {
+		printf("CPU ATTEMPT TO RE-ENTER FIRMWARE! PIR=%04lx cpu @%p -> pir=%04x token=%llu\n",
+		       mfspr(SPR_PIR), cpu, cpu->pir, token);
+		return OPAL_BUSY;
+	}
+
+again:
+	cpu->in_opal_call++;
+	/*
+	 * Order the store in_opal_call vs load quiesce_opal_call.
+	 * This also provides an acquire barrier for opal entry vs
+	 * another thread quiescing opal. In this way, quiescing
+	 * can behave as mutual exclusion.
+	 */
+	sync();
+	if (cpu->quiesce_opal_call) {
+		cpu->in_opal_call--;
+		if (opal_quiesce_state == QUIESCE_REJECT)
+			return OPAL_BUSY;
+		smt_lowest();
+		while (cpu->quiesce_opal_call)
+			barrier();
+		smt_medium();
+		goto again;
+	}
+
 	return OPAL_SUCCESS;
 }
 
 void opal_exit_check(int64_t retval, struct stack_frame *eframe);
 
-void __attrconst opal_exit_check(int64_t retval, struct stack_frame *eframe)
+void opal_exit_check(int64_t retval, struct stack_frame *eframe)
 {
-	(void)retval;
-	(void)eframe;
+	struct cpu_thread *cpu = this_cpu();
+	uint64_t token = eframe->gpr[0];
+
+	if (!cpu->in_opal_call) {
+		printf("CPU UN-ACCOUNTED FIRMWARE ENTRY! PIR=%04lx cpu @%p -> pir=%04x token=%llu retval=%lld\n",
+		       mfspr(SPR_PIR), cpu, cpu->pir, token, retval);
+	} else {
+		sync(); /* release barrier vs quiescing */
+		cpu->in_opal_call--;
+	}
+}
+
+int64_t opal_quiesce(uint32_t quiesce_type, int32_t cpu_target)
+{
+	struct cpu_thread *cpu = this_cpu();
+	struct cpu_thread *c = NULL;
+	uint64_t end;
+	bool stuck = false;
+
+	if (cpu_target >= 0) {
+		c = find_cpu_by_server(cpu_target);
+		if (!c)
+			return OPAL_PARAMETER;
+	} else if (cpu_target != -1) {
+		return OPAL_PARAMETER;
+	}
+
+	if (quiesce_type == QUIESCE_HOLD || quiesce_type == QUIESCE_REJECT) {
+		if (cmpxchg32(&opal_quiesce_state, 0, quiesce_type) != 0) {
+			if (opal_quiesce_owner != cpu->pir) {
+				/*
+				 * Nested is allowed for now just for
+				 * internal uses, so an error is returned
+				 * for OS callers, but no error message
+				 * printed if we are nested.
+				 */
+				printf("opal_quiesce already quiescing\n");
+			}
+			return OPAL_BUSY;
+		}
+		opal_quiesce_owner = cpu->pir;
+		opal_quiesce_target = cpu_target;
+	}
+
+	if (opal_quiesce_owner != cpu->pir) {
+		printf("opal_quiesce CPU does not own quiesce state (must call QUIESCE_HOLD or QUIESCE_REJECT)\n");
+		return OPAL_BUSY;
+	}
+
+	/* Okay now we own the quiesce state */
+
+	if (quiesce_type == QUIESCE_RESUME || quiesce_type == QUIESCE_RESUME_FAST_REBOOT) {
+		bust_locks = false;
+		sync(); /* release barrier vs opal entry */
+		if (c) {
+			c->quiesce_opal_call = false;
+		} else {
+			for_each_cpu(c) {
+				if (quiesce_type == QUIESCE_RESUME_FAST_REBOOT)
+					cpu->in_opal_call = 0;
+
+				if (c == cpu)
+					continue;
+				c->quiesce_opal_call = false;
+			}
+		}
+		sync();
+		opal_quiesce_state = 0;
+		return OPAL_SUCCESS;
+	}
+
+	if (quiesce_type == QUIESCE_LOCK_BREAK) {
+		if (opal_quiesce_target != -1) {
+			printf("opal_quiesce has not quiesced all CPUs (must target -1)\n");
+			return OPAL_BUSY;
+		}
+		bust_locks = true;
+		return OPAL_SUCCESS;
+	}
+
+	if (c) {
+		c->quiesce_opal_call = true;
+	} else {
+		for_each_cpu(c) {
+			if (c == cpu)
+				continue;
+			c->quiesce_opal_call = true;
+		}
+	}
+
+	sync(); /* Order stores to quiesce_opal_call vs loads of in_opal_call */
+
+	end = mftb() + msecs_to_tb(1000);
+
+	smt_lowest();
+	if (c) {
+		while (c->in_opal_call) {
+			if (tb_compare(mftb(), end) == TB_AAFTERB) {
+				printf("OPAL quiesce CPU:%04x stuck in OPAL\n", c->pir);
+				stuck = true;
+				break;
+			}
+			barrier();
+		}
+	} else {
+		for_each_cpu(c) {
+			if (c == cpu)
+				continue;
+			while (c->in_opal_call) {
+				if (tb_compare(mftb(), end) == TB_AAFTERB) {
+					printf("OPAL quiesce CPU:%04x stuck in OPAL\n", c->pir);
+					stuck = true;
+					break;
+				}
+				barrier();
+			}
+		}
+	}
+	smt_medium();
+	sync(); /* acquire barrier vs opal entry */
+
+	if (stuck) {
+		printf("OPAL quiesce could not kick all CPUs out of OPAL\n");
+		return OPAL_PARTIAL;
+	}
+
+	return OPAL_SUCCESS;
 }
+opal_call(OPAL_QUIESCE, opal_quiesce, 2);
 
 void __opal_register(uint64_t token, void *func, unsigned int nargs)
 {
diff --git a/doc/opal-api/opal-quiesce-158.rst b/doc/opal-api/opal-quiesce-158.rst
new file mode 100644
index 000000000..19604396b
--- /dev/null
+++ b/doc/opal-api/opal-quiesce-158.rst
@@ -0,0 +1,59 @@
+.. _opal-quiesce:
+
+OPAL_QUIESCE
+============
+
+The host OS can use OPAL_QUIESCE to ensure CPUs under host control are not
+executing OPAL. This is useful in crash or shutdown scenarios to try to
+ensure that CPUs are not holding locks, and is intended to be used with
+OPAL_SIGNAL_SYSTEM_RESET, for example.
+
+Arguments
+---------
+::
+
+  uint32_t quiesce_type
+    QUIESCE_HOLD        Wait for all target(s) currently executing OPAL to
+                        return to the host. Any new OPAL call that is made
+                        will be held off until QUIESCE_RESUME.
+    QUIESCE_REJECT      Wait for all target(s) currently executing OPAL to
+                        return to the host. Any new OPAL call that is made
+                        will fail with OPAL_BUSY until QUIESCE_RESUME.
+    QUIESCE_LOCK_BREAK  After QUIESCE_HOLD or QUIESCE_REJECT is successful,
+                        the CPU can call QUIESCE_LOCK_BREAK to skip all
+                        locking in OPAL to give the best chance of making
+                        progress in the crash/debug paths. The host should
+                        ensure all other CPUs are stopped (e.g., with
+                        OPAL_SIGNAL_SYSTEM_RESET) before this call is made, to
+                        avoid concurrency.
+    QUIESCE_RESUME      Undo the effects of QUIESCE_HOLD/QUIESCE_REJECT and
+                        QUIESCE_LOCK_BREAK calls.
+    QUIESCE_RESUME_FAST_REBOOT
+                        As above, but also reset the tracking of OS calls
+                        into firmware as part of fast reboot (secondaries
+                        will never return to OS, but instead be released
+                        into a new OS boot).
+
+  int32_t target_cpu
+    cpu_nr >= 0        The cpu server number of the target cpu to reset.
+    -1                 All cpus except the current one should be quiesced.
+
+Returns
+-------
+OPAL_SUCCESS
+  The quiesce call was successful.
+
+OPAL_PARTIAL
+  Some or all of the CPUs executing OPAL when the call was made did not
+  return to the host after a timeout of 1 second. This is a best effort
+  at quiescing OPAL, and QUIESCE_RESUME must be called to resume normal
+  firmware operation.
+
+OPAL_PARAMETER
+  A parameter was incorrect.
+
+OPAL_BUSY
+  This CPU was not able to complete the operation, either because another
+  has concurrently started quiescing the system, or because it has not
+  successfully called QUIESCE_HOLD or QUIESCE_REJECT before attempting
+  QUIESCE_LOCK_BREAK or QUIESCE_RESUME.
diff --git a/doc/opal-api/opal-signal-system-reset-145.rst b/doc/opal-api/opal-signal-system-reset-145.rst
index 28e5e2f41..98baef72a 100644
--- a/doc/opal-api/opal-signal-system-reset-145.rst
+++ b/doc/opal-api/opal-signal-system-reset-145.rst
@@ -17,6 +17,13 @@ raised when the target has MSR[RI]=0), so it should not be used in
 normal operation, but only for crashing, debugging, and similar
 exceptional cases.
 
+OPAL_SIGNAL_SYSTEM_RESET can pull CPUs out of OPAL, which may be
+undesirable in a crash or shutdown situation (e.g., because they may
+hold locks which are required to access the console, or may be halfway
+through setting hardware registers), so OPAL_QUIESCE can be used
+before OPAL_SIGNAL_SYSTEM_RESET to (attempt to) ensure all CPUs are
+out of OPAL before being interrupted.
+
 Arguments
 ---------
 ::
diff --git a/include/cpu.h b/include/cpu.h
index 3d5dbd40d..28041057d 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -55,9 +55,11 @@ struct cpu_thread {
 	struct trace_info		*trace;
 	uint64_t			save_r1;
 	void				*icp_regs;
+	uint32_t			in_opal_call;
 	uint32_t			lock_depth;
 	uint32_t			con_suspend;
 	bool				con_need_flush;
+	bool				quiesce_opal_call;
 	bool				in_mcount;
 	bool				in_poller;
 	bool				in_reinit;
diff --git a/include/opal-api.h b/include/opal-api.h
index 0bc036ed7..5013b71b5 100644
--- a/include/opal-api.h
+++ b/include/opal-api.h
@@ -214,7 +214,14 @@
 #define OPAL_SET_POWER_SHIFT_RATIO		155
 #define OPAL_SENSOR_GROUP_CLEAR			156
 #define OPAL_PCI_SET_P2P			157
-#define OPAL_LAST				157
+#define OPAL_QUIESCE				158
+#define OPAL_LAST				158
+
+#define QUIESCE_HOLD			1 /* Spin all calls at entry */
+#define QUIESCE_REJECT			2 /* Fail all calls with OPAL_BUSY */
+#define QUIESCE_LOCK_BREAK		3 /* Set to ignore locks. */
+#define QUIESCE_RESUME			4 /* Un-quiesce */
+#define QUIESCE_RESUME_FAST_REBOOT	5 /* Un-quiesce, fast reboot */
 
 /* Device tree flags */
 
diff --git a/include/opal-internal.h b/include/opal-internal.h
index 583e99948..8d3d0a177 100644
--- a/include/opal-internal.h
+++ b/include/opal-internal.h
@@ -61,6 +61,8 @@ extern void add_opal_node(void);
 			(func), (nargs))
 extern void __opal_register(uint64_t token, void *func, unsigned num_args);
 
+int64_t opal_quiesce(uint32_t shutdown_type, int32_t cpu);
+
 /* Warning: no locking at the moment, do at init time only
  *
  * XXX TODO: Add the big RCU-ish "opal API lock" to protect us here
-- 
2.15.0



More information about the Skiboot mailing list