[Skiboot] [RFC PATCH] support for quiescing OPAL calls

Nicholas Piggin npiggin at gmail.com
Sun Oct 15 17:44:12 AEDT 2017


This patch is a quick hack at the moment, just want to get comments on
the idea.

It accounts all host OS entry and exit from OPAL calls, and with that
adds a shutdown API to quiesce OPAL for all other CPUs, while allowing
the caller CPU to continue making OPAL calls. A forceful mode will
break locks and hopefully allow the crash/shutdown primary to print
something to the console and trigger a hardware restart.

This has become more important now with OPAL_SIGNAL_SYSTEM_RESET,
OPAL can be interrupted and it can leave us in a state where we can't
get crash information out.

The idea is that the host OS will call shutdown to mark itself as the
shutdown CPU (allowed to re-enter OPAL) and gracefully try to quiesce
OPAL calls without taking any locks or waiting indefinitely anywhere.

It will then call OPAL_SIGNAL_SYSTEM_RESET to crash or stop all other
CPUs.

It will then call shutdown again to disable locks in order to have the
best chance of making progress.

At the moment this is probably not very recoverable, because secondaries
get OPAL_BUSY errors everywhere. It may be better to instead of failing
with OPAL_BUSY, just spin at the OPAL entry, and continue if the shutdown
flag gets cleared. That way it should be usable in debugging a live
system with xmon, for example.

I haven't wired it up to an OPAL CALL yet, but we can use it in fast
reboot. If graceful shutdown fails, we can disable fast reboot.

Thanks,
Nick

---
 asm/head.S              |  32 +++----------
 core/fast-reboot.c      |  21 ++++-----
 core/lock.c             |  10 +++-
 core/opal.c             | 120 ++++++++++++++++++++++++++++++++++++++++++++----
 core/platform.c         |   4 ++
 include/cpu.h           |   1 +
 include/opal-api.h      |   3 +-
 include/opal-internal.h |   6 +++
 8 files changed, 148 insertions(+), 49 deletions(-)

diff --git a/asm/head.S b/asm/head.S
index d6b58be9..a221bab1 100644
--- a/asm/head.S
+++ b/asm/head.S
@@ -913,8 +913,7 @@ opal_entry:
 	std	%r1,STACK_GPR1(%r12)
 	mr	%r1,%r12
 
-	/* May save arguments for tracing */
-#ifdef OPAL_TRACE_ENTRY
+	/* Save arguments because we call C */
 	std	%r3,STACK_GPR3(%r1)
 	std	%r4,STACK_GPR4(%r1)
 	std	%r5,STACK_GPR5(%r1)
@@ -923,7 +922,7 @@ opal_entry:
 	std	%r8,STACK_GPR8(%r1)
 	std	%r9,STACK_GPR9(%r1)
 	std	%r10,STACK_GPR10(%r1)
-#endif
+
 	/* Save Token (r0), LR and r13 */
 	mflr	%r12
 	std	%r0,STACK_GPR0(%r1)
@@ -944,15 +943,10 @@ opal_entry:
 	addis	%r2,%r2,(__toc_start - __head)@ha
 	addi	%r2,%r2,(__toc_start - __head)@l
 
-	/* Check for a reboot in progress */
-	LOAD_ADDR_FROM_TOC(%r12, reboot_in_progress)
-	lbz	%r12,0(%r12)
-	cmpwi	%r12,0
-	bne	3f
-
-#ifdef OPAL_TRACE_ENTRY
 	mr	%r3,%r1
-	bl	opal_trace_entry
+	bl	opal_entry_check
+	cmpdi	%r3,0
+	bne	1f
 	ld	%r0,STACK_GPR0(%r1)
 	ld	%r3,STACK_GPR3(%r1)
 	ld	%r4,STACK_GPR4(%r1)
@@ -962,39 +956,25 @@ opal_entry:
 	ld	%r8,STACK_GPR8(%r1)
 	ld	%r9,STACK_GPR9(%r1)
 	ld	%r10,STACK_GPR10(%r1)
-#endif /* OPAL_TRACE_ENTRY */
 
 	/* Convert our token into a table entry and get the
 	 * function pointer. Also check the token.
 	 * For ELFv2 ABI, the local entry point is used so no need for r12.
 	 */
-	cmpldi	%r0,OPAL_LAST
-	bgt-	2f
 	sldi	%r0,%r0,3
 	LOAD_ADDR_FROM_TOC(%r12, opal_branch_table)
 	ldx	%r0,%r12,%r0
-	cmpldi	%r0,0
-	beq-	2f
 	mtctr	%r0
 
 	/* Jump ! */
 	bctrl
-
+	bl	opal_exit_check
 1:	ld	%r12,STACK_LR(%r1)
 	mtlr	%r12
 	ld	%r13,STACK_GPR13(%r1)
 	ld	%r1,STACK_GPR1(%r1)
 	blr
 
-2:	/* Bad token */
-	ld	%r3,STACK_GPR0(%r1)
-	bl	opal_bad_token
-	b	1b
-
-3:	/* Reboot in progress, reject all calls */
-	li	%r3,OPAL_BUSY
-	b	1b
-
 .global start_kernel
 start_kernel:
 	sync
diff --git a/core/fast-reboot.c b/core/fast-reboot.c
index 8af5c590..7850c921 100644
--- a/core/fast-reboot.c
+++ b/core/fast-reboot.c
@@ -35,7 +35,6 @@
 
 
 /* Flag tested by the OPAL entry code */
-uint8_t reboot_in_progress;
 static volatile bool fast_boot_release;
 static struct cpu_thread *last_man_standing;
 static struct lock reset_lock = LOCK_UNLOCKED;
@@ -333,19 +332,17 @@ void fast_reboot(void)
 	}
 	unlock(&fast_reboot_disabled_lock);
 
+	/*
+	 * Ensure all other CPUs have left OPAL calls.
+	 */
+	if (!opal_shutdown(SHUTDOWN_TYPE_GRACEFUL)) {
+		prlog(PR_DEBUG, "RESET: Fast reboot disabled because OPAL calls not quiescing\n");
+		return;
+	}
+
 	prlog(PR_NOTICE, "RESET: Initiating fast reboot %d...\n", ++fast_reboot_count);
 	free(fdt);
 
-	/* XXX We need a way to ensure that no other CPU is in skiboot
-	 * holding locks (via the OPAL APIs) and if they are, we need
-	 * for them to get out. Hopefully that isn't happening, but...
-	 *
-	 * To fix this properly, we want to keep track of OPAL entry/exit
-	 * on all CPUs.
-	 */
-	reboot_in_progress = 1;
-	time_wait_ms(200);
-
 	/* Lock so the new guys coming don't reset us */
 	lock(&reset_lock);
 
@@ -552,7 +549,7 @@ void __noreturn fast_reboot_entry(void)
 
 	/* Clear release flag for next time */
 	fast_boot_release = false;
-	reboot_in_progress = 0;
+	opal_shutdown(SHUTDOWN_TYPE_RESET);
 
 	/* Cleanup ourselves */
 	cleanup_cpu_state();
diff --git a/core/lock.c b/core/lock.c
index 0868f2ba..bf0c27b5 100644
--- a/core/lock.c
+++ b/core/lock.c
@@ -75,6 +75,9 @@ bool lock_held_by_me(struct lock *l)
 
 bool try_lock(struct lock *l)
 {
+	if (bust_locks)
+		return true;
+
 	if (__try_lock(l)) {
 		if (l->in_con_path)
 			this_cpu()->con_suspend++;
@@ -94,8 +97,13 @@ void lock(struct lock *l)
 		if (try_lock(l))
 			break;
 		smt_lowest();
-		while (l->lock_val)
+		while (l->lock_val) {
+			if (bust_locks) {
+				smt_medium();
+				return;
+			}
 			barrier();
+		}
 		smt_medium();
 	}
 }
diff --git a/core/opal.c b/core/opal.c
index 8095f731..9160836e 100644
--- a/core/opal.c
+++ b/core/opal.c
@@ -92,19 +92,12 @@ long opal_bad_token(uint64_t token)
 	return OPAL_PARAMETER;
 }
 
-/* Called from head.S, thus no prototype */
-void opal_trace_entry(struct stack_frame *eframe);
-
-void opal_trace_entry(struct stack_frame *eframe)
+static void opal_trace_entry(struct stack_frame *eframe __unused)
 {
+#ifdef OPAL_TRACE_ENTRY
 	union trace t;
 	unsigned nargs, i;
 
-	if (this_cpu()->pir != mfspr(SPR_PIR)) {
-		printf("CPU MISMATCH ! PIR=%04lx cpu @%p -> pir=%04x\n",
-		       mfspr(SPR_PIR), this_cpu(), this_cpu()->pir);
-		abort();
-	}
 	if (eframe->gpr[0] > OPAL_LAST)
 		nargs = 0;
 	else
@@ -117,6 +110,115 @@ void opal_trace_entry(struct stack_frame *eframe)
 		t.opal.r3_to_11[i] = cpu_to_be64(eframe->gpr[3+i]);
 
 	trace_add(&t, TRACE_OPAL, offsetof(struct trace_opal, r3_to_11[nargs]));
+#endif
+}
+
+static uint8_t opal_shutdown_state = 0;
+static uint32_t opal_shutdown_pir; /* Only CPU which may enter OPAL */
+
+/* Called from head.S, thus no prototype */
+int64_t opal_entry_check(struct stack_frame *eframe);
+
+static int64_t opal_check_token(uint64_t token);
+
+int64_t opal_entry_check(struct stack_frame *eframe)
+{
+	struct cpu_thread *cpu = this_cpu();
+	uint64_t token = eframe->gpr[0];
+
+	if (cpu->pir != mfspr(SPR_PIR)) {
+		printf("CPU MISMATCH ! PIR=%04lx cpu @%p -> pir=%04x\n",
+		       mfspr(SPR_PIR), cpu, cpu->pir);
+		abort();
+	}
+
+	opal_trace_entry(eframe);
+
+	if (!opal_check_token(token))
+		return opal_bad_token(token);
+
+	if (cpu->in_opal_call) {
+		printf("CPU ATTEMPT TO RE-ENTER FIRMWARE! PIR=%04lx cpu @%p -> pir=%04x\n",
+		       mfspr(SPR_PIR), cpu, cpu->pir);
+		return OPAL_BUSY;
+	}
+	cpu->in_opal_call++;
+	sync(); /* Store in_opal_call vs load opal_shutdown_state */
+	if (opal_shutdown_state != 0) {
+		if (cpu->pir != opal_shutdown_pir) {
+			cpu->in_opal_call--;
+			return OPAL_BUSY;
+		}
+	}
+
+	return OPAL_SUCCESS;
+}
+
+void opal_exit_check(void);
+
+void opal_exit_check(void)
+{
+	struct cpu_thread *cpu = this_cpu();
+
+	if (!cpu->in_opal_call) {
+		printf("CPU UN-ACCOUNTED FIRMWARE ENTRY! PIR=%04lx cpu @%p -> pir=%04x\n",
+		       mfspr(SPR_PIR), cpu, cpu->pir);
+	} else {
+		cpu->in_opal_call--;
+	}
+}
+
+int64_t opal_shutdown(uint64_t shutdown_type)
+{
+	struct cpu_thread *cpu = this_cpu();
+	struct cpu_thread *c;
+	uint64_t end;
+	bool stuck = false;
+
+	if (shutdown_type == SHUTDOWN_TYPE_RESET) {
+		opal_shutdown_state = 0;
+		return OPAL_SUCCESS;
+	}
+	if (shutdown_type == SHUTDOWN_TYPE_LIMP_HOME) {
+		if (!opal_shutdown_state) {
+			printf("Should try graceful first!\n");
+			return OPAL_PARAMETER;
+		}
+		bust_locks = true;
+		return OPAL_SUCCESS;
+	}
+
+	printf("OPAL shutdown\n");
+	opal_shutdown_pir = cpu->pir; /* XXX: use atomic cmpxchg */
+	sync();
+	opal_shutdown_state = 1;
+	sync();
+
+	end = mftb() + msecs_to_tb(1000);
+
+	smt_lowest();
+	for_each_cpu(c) {
+		if (c == cpu)
+			continue;
+		while (c->in_opal_call) {
+			if (tb_compare(mftb(), end) == TB_AAFTERB) {
+				printf("OPAL shutdown CPU:%04x stuck in OPAL\n", c->pir);
+				stuck = true;
+				break;
+			}
+			barrier();
+		}
+	}
+	smt_medium();
+
+	if (stuck) {
+		printf("OPAL shutdown could not kick all CPUs out of OPAL\n");
+		return OPAL_BUSY;
+	}
+
+	printf("OPAL shutdown kicked all CPUs out of OPAL\n");
+
+	return OPAL_SUCCESS;
 }
 
 void __opal_register(uint64_t token, void *func, unsigned int nargs)
diff --git a/core/platform.c b/core/platform.c
index 732f67e5..8287a08d 100644
--- a/core/platform.c
+++ b/core/platform.c
@@ -54,6 +54,8 @@ static int64_t opal_cec_reboot(void)
 {
 	prlog(PR_NOTICE, "OPAL: Reboot request...\n");
 
+	opal_shutdown(SHUTDOWN_TYPE_GRACEFUL);
+
 	console_complete_flush();
 
 	/* Try fast-reset unless explicitly disabled */
@@ -71,6 +73,8 @@ static int64_t opal_cec_reboot2(uint32_t reboot_type, char *diag)
 {
 	struct errorlog *buf;
 
+	opal_shutdown(SHUTDOWN_TYPE_GRACEFUL);
+
 	switch (reboot_type) {
 	case OPAL_REBOOT_NORMAL:
 		return opal_cec_reboot();
diff --git a/include/cpu.h b/include/cpu.h
index 168fa994..698d83bf 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -54,6 +54,7 @@ struct cpu_thread {
 	struct trace_info		*trace;
 	uint64_t			save_r1;
 	void				*icp_regs;
+	uint32_t			in_opal_call;
 	uint32_t			lock_depth;
 	uint32_t			con_suspend;
 	bool				con_need_flush;
diff --git a/include/opal-api.h b/include/opal-api.h
index 0ff0db02..0917c0e3 100644
--- a/include/opal-api.h
+++ b/include/opal-api.h
@@ -214,7 +214,8 @@
 #define OPAL_SET_POWER_SHIFT_RATIO		155
 #define OPAL_SENSOR_GROUP_CLEAR			156
 #define OPAL_PCI_SET_P2P			157
-#define OPAL_LAST				157
+#define OPAL_SHUTDOWN				158
+#define OPAL_LAST				158
 
 /* Device tree flags */
 
diff --git a/include/opal-internal.h b/include/opal-internal.h
index 583e9994..5ef8d554 100644
--- a/include/opal-internal.h
+++ b/include/opal-internal.h
@@ -61,6 +61,12 @@ extern void add_opal_node(void);
 			(func), (nargs))
 extern void __opal_register(uint64_t token, void *func, unsigned num_args);
 
+#define SHUTDOWN_TYPE_GRACEFUL	1 /* Kick all other CPUs out of OPAL calls */
+#define SHUTDOWN_TYPE_LIMP_HOME	2 /* Break locks. Call after smp_send_stop() */
+#define SHUTDOWN_TYPE_RESET	3 /* Bring shutdown state back after fast reboot */
+
+int64_t opal_shutdown(uint64_t shutdown_type);
+
 /* Warning: no locking at the moment, do at init time only
  *
  * XXX TODO: Add the big RCU-ish "opal API lock" to protect us here
-- 
2.13.3



More information about the Skiboot mailing list