[Skiboot] [RFC PATCH] core/opal: Emergency stack, OPAL_DEBUG support

Nicholas Piggin npiggin at gmail.com
Tue Mar 27 01:37:31 AEDT 2018


This is still a bit rough, but it gets some decent results already
so I would like to ask about more comments and ideas.

This is at least 2 patches at the moment. One is to detect opal
re-entrancy, and if this CPU was already in OPAL, then move the
stack down 12kB and use that instead. This allows a CPU to debug
itself if we get interrupted from within OPAL, making OPAL calls
to print data, use OPAL_DEBUG, etc., without destroying our
stack.

Second part is to implement OPAL_DEBUG. My idea is to have maybe
a set of debug sub-functions, and use this call for the OS to
assist with debugging firmware. Currently only this stack
unwinding is implemented. It allows Linux to specify r1 and nip,
and skiboot will print a stack dump, and return the r1 of OPAL's
caller to Linux.

With some Linux work to use this call, we can use 0x100 to break
into xmon from OPAL (using pdbg or NMI IPIs or mambo injection),
and get a sane unwinding of both the skiboot and Linux stacks.

The sreset is recoverable too because the main stack was not
trashed, so 'x' returns to the system (assuming recursion has
not caused other problems).

opped in OPAL!
cpu 0x0: Vector: 100 (System Reset) at [c0000000fffcfd80]
pc: 000000003001298c
lr: 000000003001bb5c
sp: 31c03ba0
msr: 9000000002803002
current = 0xc0000000f0862600
paca    = 0xc00000000ffe0000^I softe: 3^I irq_happened: 0x01
pid   = 16, comm = kopald
Linux version ...
enter ? for help
SP is in OPAL, calling OPAL to dump stack
CPU 0000 PC: 000000003001298c .dummy_console_poll+0x24 Backtrace:
 S: 0000000031c03c20 R: 000000003001bb5c   .opal_run_pollers+0x148
 S: 0000000031c03ca0 R: 000000003001bc38   .opal_poll_events+0xc4
 S: 0000000031c03d20 R: 00000000300051d8   opal_entry+0x128
 --- OPAL call token: 0xa caller R1: 0xc0000000f081fd20 ---
[c0000000f081fd20] c00000000006dfb0 opal_handle_events+0x70/0x130 (unreliable)
[c0000000f081fd80] c00000000006847c kopald+0x5c/0xc0
[c0000000f081fdc0] c0000000000fda9c kthread+0x18c/0x1a0
[c0000000f081fe30] c00000000000b3a8 ret_from_kernel_thread+0x5c/0xb4
0:mon>

This patch is actually appied on top of a recent version of
"[RFC PATCH] asm/head: implement quiescing without stack or clobbering
regs", so it won't apply as is. Also needs Linux patchs to test
properly so I'd just like to get feedback at the moment.

Thanks,
Nick
---
 asm/head.S         |  6 ++++++
 core/opal.c        | 42 +++++++++++++++++++++++++++++++++++++++++-
 core/stack.c       | 14 ++++++++++----
 include/opal-api.h |  3 ++-
 include/stack.h    |  6 +++---
 5 files changed, 62 insertions(+), 9 deletions(-)

diff --git a/asm/head.S b/asm/head.S
index 26dc11c3..ca9f4466 100644
--- a/asm/head.S
+++ b/asm/head.S
@@ -1013,8 +1013,14 @@ opal_entry:
 	b	1b
 
 4:	/* Quiesce protocol done, get our per CPU stack */
+	lwz	%r11,CPUTHREAD_IN_OPAL_CALL(%r12)
+	cmpwi	%r11,1
+
 	mfspr	%r12,SPR_PIR
 	GET_STACK(%r12,%r12)
+	beq	5f
+	subi	%r12,%r12,(12*1024)
+5:
 	stdu	%r12,-STACK_FRAMESIZE(%r12)
 
 	/* Save caller r1, establish new r1 */
diff --git a/core/opal.c b/core/opal.c
index 465656ad..6ffe35c3 100644
--- a/core/opal.c
+++ b/core/opal.c
@@ -154,6 +154,7 @@ int64_t opal_entry_check(struct stack_frame *eframe)
 		case OPAL_CEC_REBOOT:
 		case OPAL_CEC_REBOOT2:
 		case OPAL_SIGNAL_SYSTEM_RESET:
+		case OPAL_DEBUG:
 			break;
 		default:
 			printf("CPU ATTEMPT TO RE-ENTER FIRMWARE! PIR=%04lx cpu @%p -> pir=%04x token=%llu\n",
@@ -177,16 +178,55 @@ int64_t opal_exit_check(int64_t retval, struct stack_frame *eframe)
 		printf("CPU UN-ACCOUNTED FIRMWARE ENTRY! PIR=%04lx cpu @%p -> pir=%04x token=%llu retval=%lld\n",
 		       mfspr(SPR_PIR), cpu, cpu->pir, token, retval);
 		cpu->in_opal_call++; /* avoid exit path underflowing */
-	} else {
+	} else if (cpu->in_opal_call == 1) {
 		if (!list_empty(&cpu->locks_held)) {
 			prlog(PR_ERR, "OPAL exiting with locks held, token=%llu retval=%lld\n",
 			      token, retval);
 			drop_my_locks(true);
 		}
+	} else {
+		/* Re-entry could be debugger, so don't trash locks */
 	}
 	return retval;
 }
 
+#include <stack.h>
+#define STACK_BUF_ENTRIES	60
+static struct bt_entry bt_buf[STACK_BUF_ENTRIES];
+static char debug_buf[128];
+
+#define OPAL_DEBUG_DUMP_STACK 1
+
+struct debug_struct {
+	unsigned long nip;
+	unsigned long r1;
+	unsigned long r1_caller;
+};
+
+static int64_t opal_debug(uint32_t debug_type, uint64_t arg)
+{
+	unsigned int ents = STACK_BUF_ENTRIES;
+	struct debug_struct *db = (struct debug_struct *)arg;
+	unsigned long nip;
+	unsigned long r1;
+	unsigned long token;
+	unsigned long r1_caller;
+
+	if (debug_type != OPAL_DEBUG_DUMP_STACK)
+		return OPAL_PARAMETER;
+
+	nip = be64_to_cpu(db->nip);
+	r1 = be64_to_cpu(db->r1);
+	snprintf_symbol(debug_buf, 128, nip);
+	___backtrace(bt_buf, &ents, r1, &token, &r1_caller);
+	db->r1_caller = cpu_to_be64(r1_caller);
+	___print_backtrace(mfspr(SPR_PIR), bt_buf, ents, token, nip, r1_caller,
+			NULL, NULL, true);
+
+	return OPAL_SUCCESS;
+}
+opal_call(OPAL_DEBUG, opal_debug, 2);
+
 int64_t opal_quiesce(uint32_t quiesce_type, int32_t cpu_target)
 {
 	struct cpu_thread *cpu = this_cpu();
diff --git a/core/stack.c b/core/stack.c
index 10118e42..14eb0f76 100644
--- a/core/stack.c
+++ b/core/stack.c
@@ -65,8 +65,8 @@ void __nomcount ___backtrace(struct bt_entry *entries, unsigned int *count,
 
 void ___print_backtrace(unsigned int pir, struct bt_entry *entries,
 			      unsigned int count, unsigned long token,
-			      unsigned long r1_caller, char *out_buf,
-			      unsigned int *len, bool symbols)
+			      unsigned long nip, unsigned long r1_caller,
+			      char *out_buf, unsigned int *len, bool symbols)
 {
 	static char bt_text_buf[4096];
 	int i, l = 0, max;
@@ -85,7 +85,13 @@ void ___print_backtrace(unsigned int pir, struct bt_entry *entries,
 	tbot = SKIBOOT_BASE;
 	ttop = (unsigned long)&_etext;
 
-	l += snprintf(buf, max, "CPU %04x Backtrace:\n", pir);
+	if (nip) {
+		l += snprintf(buf, max, "CPU %04x PC: %016lx ", pir, nip);
+		l += snprintf_symbol(buf + l, max - l, nip);
+		l += snprintf(buf + l, max - l, " Backtrace:\n");
+	} else {
+		l += snprintf(buf, max, "CPU %04x Backtrace:\n", pir);
+	}
 	for (i = 0; i < count && l < max; i++) {
 		if (entries->sp < bottom || entries->sp > top)
 			mark = '!';
@@ -129,7 +135,7 @@ void backtrace(void)
 
 	___backtrace(bt_buf, &ents, (unsigned long)__builtin_frame_address(0),
 			&token, &r1_caller);
-	___print_backtrace(mfspr(SPR_PIR), bt_buf, ents, token, r1_caller,
+	___print_backtrace(mfspr(SPR_PIR), bt_buf, ents, token, 0, r1_caller,
 			NULL, NULL, true);
 
 	unlock(&bt_lock);
diff --git a/include/opal-api.h b/include/opal-api.h
index df71cf2d..726cdfa3 100644
--- a/include/opal-api.h
+++ b/include/opal-api.h
@@ -223,7 +223,8 @@
 #define OPAL_PCI_GET_PBCQ_TUNNEL_BAR		164
 #define OPAL_PCI_SET_PBCQ_TUNNEL_BAR		165
 #define OPAL_HANDLE_HMI2			166
-#define OPAL_LAST				166
+#define OPAL_DEBUG				167
+#define OPAL_LAST				167
 
 #define QUIESCE_HOLD			1 /* Spin all calls at entry */
 #define QUIESCE_REJECT			2 /* Fail all calls with OPAL_BUSY */
diff --git a/include/stack.h b/include/stack.h
index 4d3e504d..55ee8e07 100644
--- a/include/stack.h
+++ b/include/stack.h
@@ -124,14 +124,14 @@ static inline void __backtrace(struct bt_entry *entries, unsigned int *count)
 /* Convert a backtrace to ASCII */
 extern void ___print_backtrace(unsigned int pir, struct bt_entry *entries,
 			      unsigned int count, unsigned long token,
-			      unsigned long r1_caller, char *out_buf,
-			      unsigned int *len, bool symbols);
+			      unsigned long nip, unsigned long r1_caller,
+			      char *out_buf, unsigned int *len, bool symbols);
 
 static inline void __print_backtrace(unsigned int pir, struct bt_entry *entries,
 			      unsigned int count, char *out_buf,
 			      unsigned int *len, bool symbols)
 {
-	___print_backtrace(pir, entries, count, OPAL_LAST + 1, 0, out_buf, len, symbols);
+	___print_backtrace(pir, entries, count, OPAL_LAST + 1, 0, 0, out_buf, len, symbols);
 }
 
 /* For use by debug code, create and print backtrace, uses a static buffer */
-- 
2.16.1



More information about the Skiboot mailing list