[Skiboot] [RFC PATCH v2] Add support for quiescing OPAL calls

Nicholas Piggin npiggin at gmail.com
Sun Oct 15 23:37:31 AEDT 2017


This is a bit more fleshed out. It will quiesce CPUs by holding them
at OPAL entry, and can release them if we want to recover. It has
OPAL API added now and seems to work from Linux.

---
 asm/head.S              |  32 ++-------
 core/fast-reboot.c      |  21 +++---
 core/lock.c             |  10 ++-
 core/opal.c             | 171 +++++++++++++++++++++++++++++++++++++++++++++---
 core/platform.c         |   4 ++
 include/cpu.h           |   2 +
 include/lock.h          |  12 ++++
 include/opal-api.h      |   7 +-
 include/opal-internal.h |   2 +
 9 files changed, 212 insertions(+), 49 deletions(-)

diff --git a/asm/head.S b/asm/head.S
index d6b58be9..a221bab1 100644
--- a/asm/head.S
+++ b/asm/head.S
@@ -913,8 +913,7 @@ opal_entry:
 	std	%r1,STACK_GPR1(%r12)
 	mr	%r1,%r12
 
-	/* May save arguments for tracing */
-#ifdef OPAL_TRACE_ENTRY
+	/* Save arguments because we call C */
 	std	%r3,STACK_GPR3(%r1)
 	std	%r4,STACK_GPR4(%r1)
 	std	%r5,STACK_GPR5(%r1)
@@ -923,7 +922,7 @@ opal_entry:
 	std	%r8,STACK_GPR8(%r1)
 	std	%r9,STACK_GPR9(%r1)
 	std	%r10,STACK_GPR10(%r1)
-#endif
+
 	/* Save Token (r0), LR and r13 */
 	mflr	%r12
 	std	%r0,STACK_GPR0(%r1)
@@ -944,15 +943,10 @@ opal_entry:
 	addis	%r2,%r2,(__toc_start - __head)@ha
 	addi	%r2,%r2,(__toc_start - __head)@l
 
-	/* Check for a reboot in progress */
-	LOAD_ADDR_FROM_TOC(%r12, reboot_in_progress)
-	lbz	%r12,0(%r12)
-	cmpwi	%r12,0
-	bne	3f
-
-#ifdef OPAL_TRACE_ENTRY
 	mr	%r3,%r1
-	bl	opal_trace_entry
+	bl	opal_entry_check
+	cmpdi	%r3,0
+	bne	1f
 	ld	%r0,STACK_GPR0(%r1)
 	ld	%r3,STACK_GPR3(%r1)
 	ld	%r4,STACK_GPR4(%r1)
@@ -962,39 +956,25 @@ opal_entry:
 	ld	%r8,STACK_GPR8(%r1)
 	ld	%r9,STACK_GPR9(%r1)
 	ld	%r10,STACK_GPR10(%r1)
-#endif /* OPAL_TRACE_ENTRY */
 
 	/* Convert our token into a table entry and get the
 	 * function pointer. Also check the token.
 	 * For ELFv2 ABI, the local entry point is used so no need for r12.
 	 */
-	cmpldi	%r0,OPAL_LAST
-	bgt-	2f
 	sldi	%r0,%r0,3
 	LOAD_ADDR_FROM_TOC(%r12, opal_branch_table)
 	ldx	%r0,%r12,%r0
-	cmpldi	%r0,0
-	beq-	2f
 	mtctr	%r0
 
 	/* Jump ! */
 	bctrl
-
+	bl	opal_exit_check
 1:	ld	%r12,STACK_LR(%r1)
 	mtlr	%r12
 	ld	%r13,STACK_GPR13(%r1)
 	ld	%r1,STACK_GPR1(%r1)
 	blr
 
-2:	/* Bad token */
-	ld	%r3,STACK_GPR0(%r1)
-	bl	opal_bad_token
-	b	1b
-
-3:	/* Reboot in progress, reject all calls */
-	li	%r3,OPAL_BUSY
-	b	1b
-
 .global start_kernel
 start_kernel:
 	sync
diff --git a/core/fast-reboot.c b/core/fast-reboot.c
index 8af5c590..7c89dc4c 100644
--- a/core/fast-reboot.c
+++ b/core/fast-reboot.c
@@ -35,7 +35,6 @@
 
 
 /* Flag tested by the OPAL entry code */
-uint8_t reboot_in_progress;
 static volatile bool fast_boot_release;
 static struct cpu_thread *last_man_standing;
 static struct lock reset_lock = LOCK_UNLOCKED;
@@ -333,19 +332,17 @@ void fast_reboot(void)
 	}
 	unlock(&fast_reboot_disabled_lock);
 
+	/*
+	 * Ensure all other CPUs have left OPAL calls.
+	 */
+	if (!opal_shutdown(SHUTDOWN_QUIESCE, -1)) {
+		prlog(PR_DEBUG, "RESET: Fast reboot disabled because OPAL calls not quiescing\n");
+		return;
+	}
+
 	prlog(PR_NOTICE, "RESET: Initiating fast reboot %d...\n", ++fast_reboot_count);
 	free(fdt);
 
-	/* XXX We need a way to ensure that no other CPU is in skiboot
-	 * holding locks (via the OPAL APIs) and if they are, we need
-	 * for them to get out. Hopefully that isn't happening, but...
-	 *
-	 * To fix this properly, we want to keep track of OPAL entry/exit
-	 * on all CPUs.
-	 */
-	reboot_in_progress = 1;
-	time_wait_ms(200);
-
 	/* Lock so the new guys coming don't reset us */
 	lock(&reset_lock);
 
@@ -552,7 +549,7 @@ void __noreturn fast_reboot_entry(void)
 
 	/* Clear release flag for next time */
 	fast_boot_release = false;
-	reboot_in_progress = 0;
+	opal_shutdown(SHUTDOWN_RESUME, -1);
 
 	/* Cleanup ourselves */
 	cleanup_cpu_state();
diff --git a/core/lock.c b/core/lock.c
index 916a0241..b7fdaa74 100644
--- a/core/lock.c
+++ b/core/lock.c
@@ -93,6 +93,9 @@ bool try_lock(struct lock *l)
 {
 	struct cpu_thread *cpu = this_cpu();
 
+	if (bust_locks)
+		return true;
+
 	if (__try_lock(cpu, l)) {
 		if (l->in_con_path)
 			cpu->con_suspend++;
@@ -112,8 +115,13 @@ void lock(struct lock *l)
 		if (try_lock(l))
 			break;
 		smt_lowest();
-		while (l->lock_val)
+		while (l->lock_val) {
+			if (bust_locks) {
+				smt_medium();
+				return;
+			}
 			barrier();
+		}
 		smt_medium();
 	}
 }
diff --git a/core/opal.c b/core/opal.c
index 8095f731..2afa2e6b 100644
--- a/core/opal.c
+++ b/core/opal.c
@@ -92,19 +92,12 @@ long opal_bad_token(uint64_t token)
 	return OPAL_PARAMETER;
 }
 
-/* Called from head.S, thus no prototype */
-void opal_trace_entry(struct stack_frame *eframe);
-
-void opal_trace_entry(struct stack_frame *eframe)
+static void opal_trace_entry(struct stack_frame *eframe __unused)
 {
+#ifdef OPAL_TRACE_ENTRY
 	union trace t;
 	unsigned nargs, i;
 
-	if (this_cpu()->pir != mfspr(SPR_PIR)) {
-		printf("CPU MISMATCH ! PIR=%04lx cpu @%p -> pir=%04x\n",
-		       mfspr(SPR_PIR), this_cpu(), this_cpu()->pir);
-		abort();
-	}
 	if (eframe->gpr[0] > OPAL_LAST)
 		nargs = 0;
 	else
@@ -117,7 +110,167 @@ void opal_trace_entry(struct stack_frame *eframe)
 		t.opal.r3_to_11[i] = cpu_to_be64(eframe->gpr[3+i]);
 
 	trace_add(&t, TRACE_OPAL, offsetof(struct trace_opal, r3_to_11[nargs]));
+#endif
+}
+
+/* Shutdown state has PIR in top 32-bits, 0/1 in bottom bit */
+static uint64_t opal_shutdown_state = 0;
+
+/* Called from head.S, thus no prototype */
+int64_t opal_entry_check(struct stack_frame *eframe);
+
+static int64_t opal_check_token(uint64_t token);
+
+int64_t opal_entry_check(struct stack_frame *eframe)
+{
+	struct cpu_thread *cpu = this_cpu();
+	uint64_t token = eframe->gpr[0];
+
+	if (cpu->pir != mfspr(SPR_PIR)) {
+		printf("CPU MISMATCH ! PIR=%04lx cpu @%p -> pir=%04x\n",
+		       mfspr(SPR_PIR), cpu, cpu->pir);
+		abort();
+	}
+
+	opal_trace_entry(eframe);
+
+	if (!opal_check_token(token))
+		return opal_bad_token(token);
+
+	if (!opal_shutdown_state && cpu->in_opal_call) {
+		printf("CPU ATTEMPT TO RE-ENTER FIRMWARE! PIR=%04lx cpu @%p -> pir=%04x\n",
+		       mfspr(SPR_PIR), cpu, cpu->pir);
+		return OPAL_BUSY;
+	}
+
+again:
+	cpu->in_opal_call++;
+	sync(); /* Store in_opal_call vs load opal_quiesce */
+	if (cpu->quiesce_opal_call) {
+		cpu->in_opal_call--;
+		smt_lowest();
+		while (cpu->quiesce_opal_call)
+			barrier();
+		smt_medium();
+		goto again;
+	}
+
+	return OPAL_SUCCESS;
+}
+
+void opal_exit_check(void);
+
+void opal_exit_check(void)
+{
+	struct cpu_thread *cpu = this_cpu();
+
+	if (!cpu->in_opal_call) {
+		printf("CPU UN-ACCOUNTED FIRMWARE ENTRY! PIR=%04lx cpu @%p -> pir=%04x\n",
+		       mfspr(SPR_PIR), cpu, cpu->pir);
+	} else {
+		cpu->in_opal_call--;
+	}
+}
+
+int64_t opal_shutdown(uint64_t shutdown_type, int cpu_target)
+{
+	struct cpu_thread *cpu = this_cpu();
+	struct cpu_thread *c = NULL;
+	uint64_t end;
+	bool stuck = false;
+
+	if (cpu_target >= 0) {
+		c = find_cpu_by_server(cpu_target);
+		if (!c)
+			return OPAL_PARAMETER;
+	} else if (cpu_target != -1) {
+		return OPAL_PARAMETER;
+	}
+
+	if (shutdown_type == SHUTDOWN_QUIESCE) {
+		if (cmpxchg64(&opal_shutdown_state, 0,
+					((uint64_t)cpu->pir << 32) | 1) != 0) {
+			printf("opal_shutdown another CPU already in shutdown\n");
+			return OPAL_BUSY;
+		}
+	}
+
+	if (opal_shutdown_state != (((uint64_t)cpu->pir << 32) | 1)) {
+		printf("opal_shutdown CPU does not own shutdown state (must call SHUTDOWN_QUIESCE)\n");
+		return OPAL_BUSY;
+	}
+
+	/* Okay now we own the shutdown state */
+
+	if (shutdown_type == SHUTDOWN_RESUME) {
+		bust_locks = false;
+		sync();
+		if (c) {
+			c->quiesce_opal_call = false;
+		} else {
+			for_each_cpu(c) {
+				if (c == cpu)
+					continue;
+				c->quiesce_opal_call = false;
+			}
+		}
+		sync();
+		opal_shutdown_state = 0;
+		return OPAL_SUCCESS;
+	}
+
+	if (shutdown_type == SHUTDOWN_SINGLE_THREADED) {
+		bust_locks = true;
+		return OPAL_SUCCESS;
+	}
+
+	if (c) {
+		c->quiesce_opal_call = true;
+	} else {
+		for_each_cpu(c) {
+			if (c == cpu)
+				continue;
+			c->quiesce_opal_call = true;
+		}
+	}
+	sync();
+
+	end = mftb() + msecs_to_tb(1000);
+
+	smt_lowest();
+	if (c) {
+		while (c->in_opal_call) {
+			if (tb_compare(mftb(), end) == TB_AAFTERB) {
+				printf("OPAL shutdown CPU:%04x stuck in OPAL\n", c->pir);
+				stuck = true;
+				break;
+			}
+			barrier();
+		}
+	} else {
+		for_each_cpu(c) {
+			if (c == cpu)
+				continue;
+			while (c->in_opal_call) {
+				if (tb_compare(mftb(), end) == TB_AAFTERB) {
+					printf("OPAL shutdown CPU:%04x stuck in OPAL\n", c->pir);
+					stuck = true;
+					break;
+				}
+				barrier();
+			}
+		}
+	}
+	smt_medium();
+
+	if (stuck) {
+		printf("OPAL shutdown could not kick all CPUs out of OPAL\n");
+		return OPAL_PARTIAL;
+	}
+
+	return OPAL_SUCCESS;
 }
+opal_call(OPAL_SHUTDOWN, opal_shutdown, 2);
 
 void __opal_register(uint64_t token, void *func, unsigned int nargs)
 {
diff --git a/core/platform.c b/core/platform.c
index 732f67e5..2672b252 100644
--- a/core/platform.c
+++ b/core/platform.c
@@ -54,6 +54,8 @@ static int64_t opal_cec_reboot(void)
 {
 	prlog(PR_NOTICE, "OPAL: Reboot request...\n");
 
+	opal_shutdown(SHUTDOWN_QUIESCE, -1);
+
 	console_complete_flush();
 
 	/* Try fast-reset unless explicitly disabled */
@@ -71,6 +73,8 @@ static int64_t opal_cec_reboot2(uint32_t reboot_type, char *diag)
 {
 	struct errorlog *buf;
 
+	opal_shutdown(SHUTDOWN_QUIESCE, -1);
+
 	switch (reboot_type) {
 	case OPAL_REBOOT_NORMAL:
 		return opal_cec_reboot();
diff --git a/include/cpu.h b/include/cpu.h
index 168fa994..bb0b4eaa 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -54,9 +54,11 @@ struct cpu_thread {
 	struct trace_info		*trace;
 	uint64_t			save_r1;
 	void				*icp_regs;
+	uint32_t			in_opal_call;
 	uint32_t			lock_depth;
 	uint32_t			con_suspend;
 	bool				con_need_flush;
+	bool				quiesce_opal_call;
 	bool				in_mcount;
 	bool				in_poller;
 	bool				in_reinit;
diff --git a/include/lock.h b/include/lock.h
index c209e7d1..863b4066 100644
--- a/include/lock.h
+++ b/include/lock.h
@@ -18,6 +18,7 @@
 #define __LOCK_H
 
 #include <stdbool.h>
+#include <processor.h>
 
 struct lock {
 	/* Lock value has bit 63 as lock bit and the PIR of the owner
@@ -86,6 +87,17 @@ static inline uint64_t __cmpxchg64(uint64_t *mem, uint64_t old, uint64_t new)
 	return prev;
 }
 
+static inline uint64_t cmpxchg64(uint64_t *mem, uint64_t old, uint64_t new)
+{
+	uint64_t prev;
+
+	sync();
+	prev = __cmpxchg64(mem, old,new);
+	sync();
+
+	return prev;
+}
+
 extern bool try_lock(struct lock *l);
 extern void lock(struct lock *l);
 extern void unlock(struct lock *l);
diff --git a/include/opal-api.h b/include/opal-api.h
index 0ff0db02..47c8427b 100644
--- a/include/opal-api.h
+++ b/include/opal-api.h
@@ -214,7 +214,12 @@
 #define OPAL_SET_POWER_SHIFT_RATIO		155
 #define OPAL_SENSOR_GROUP_CLEAR			156
 #define OPAL_PCI_SET_P2P			157
-#define OPAL_LAST				157
+#define OPAL_SHUTDOWN				158
+#define OPAL_LAST				158
+
+#define SHUTDOWN_QUIESCE		1 /* CPUs out of OPAL calls */
+#define SHUTDOWN_SINGLE_THREADED	2 /* Call after smp_send_stop() */
+#define SHUTDOWN_RESUME			3 /* Un-quiesce */
 
 /* Device tree flags */
 
diff --git a/include/opal-internal.h b/include/opal-internal.h
index 583e9994..8ca89d93 100644
--- a/include/opal-internal.h
+++ b/include/opal-internal.h
@@ -61,6 +61,8 @@ extern void add_opal_node(void);
 			(func), (nargs))
 extern void __opal_register(uint64_t token, void *func, unsigned num_args);
 
+int64_t opal_shutdown(uint64_t shutdown_type, int cpu);
+
 /* Warning: no locking at the moment, do at init time only
  *
  * XXX TODO: Add the big RCU-ish "opal API lock" to protect us here
-- 
2.13.3



More information about the Skiboot mailing list