[Pdbg] [PATCH v3] stack: guess endian for stack frame walking

Nicholas Piggin npiggin at gmail.com
Mon Oct 29 19:27:42 AEDT 2018


The stack unwinder currently does not do any endian conversion, which
means it won't work correctly if the stack does not match pdbg endian.

This patch attempts an endian flip if the stack looks wrong, and goes
with that if it's an improvement. It also has some magic hackery to
take OPAL->Linux into account. Unfortunately this is not a "clean"
fully general solution, but works reasonably well in practice.

This is the regs --backtrace output for a test that has a CPU hang in
an OPAL call from Linux:

STACK:           SP                NIA
 0x0000000031c43cb0 0x000000003002b324 (big-endian)
 0x0000000031c43d20 0x00000000300051e4 (big-endian)
 0xc000200006283b60 0xc00000000008f1c8 (little-endian)
 0xc000200006283c40 0xc00000000002af18 (little-endian)
 0xc000200006283c70 0xc000000000114064 (little-endian)
 0xc000200006283ce0 0xc0000000001144d0 (little-endian)
SP:0xc000200006283e30 points to 0x00007fffe28d0cb0, not unwinding

We can see the stack unwind from OPAL to Linux to userspace (which
does not get decoded -- yet).

Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
---
v3: improve detection a bit more to cross OPAL/Linux boundary
(suggegsted by mpe)

 src/thread.c | 80 +++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 70 insertions(+), 10 deletions(-)

diff --git a/src/thread.c b/src/thread.c
index e755620..dcf4f3a 100644
--- a/src/thread.c
+++ b/src/thread.c
@@ -105,10 +105,19 @@ static int load8(struct pdbg_target *target, uint64_t addr, uint64_t *value)
 	return 1;
 }
 
+uint64_t flip_endian(uint64_t v)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	return be64toh(v);
+#else
+	return le64toh(v);
+#endif
+}
+
 static int dump_stack(struct thread_regs *regs)
 {
 	struct pdbg_target *target;
-	uint64_t sp = regs->gprs[1];
+	uint64_t next_sp = regs->gprs[1];
 	uint64_t pc;
 
 	pdbg_for_each_class_target("adu", target) {
@@ -117,22 +126,73 @@ static int dump_stack(struct thread_regs *regs)
 		break;
 	}
 
-	printf("STACK:\n");
+	printf("STACK:           SP                NIA\n");
 	if (!target)
 		pdbg_log(PDBG_ERROR, "Unable to read memory (no ADU found)\n");
 
-	if (sp && is_real_address(regs, sp)) {
-		if (!load8(target, sp, &sp))
+	if (!(next_sp && is_real_address(regs, next_sp))) {
+		printf("SP:0x%016" PRIx64 " does not appear to be a stack\n", next_sp);
+		return 0;
+	}
+
+	for (;;) {
+		uint64_t sp = next_sp;
+		uint64_t tmp, tmp2;
+		bool flip = false;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+		bool be = false;
+#else
+		bool be = true;
+#endif
+
+		if (!load8(target, sp, &tmp))
+			return 1;
+		if (!load8(target, sp + 16, &pc))
 			return 1;
-		while (sp && is_real_address(regs, sp)) {
-			if (!load8(target, sp + 16, &pc))
-				return 1;
 
-			printf(" 0x%016" PRIx64 " 0x%16" PRIx64 "\n", sp, pc);
+		if (!tmp) {
+badstack:
+			printf("SP:0x%016" PRIx64 " points to 0x%016" PRIx64 ", not unwinding\n", sp, tmp);
+			return 0;
+		}
+
+		tmp2 = flip_endian(tmp);
+
+		/*
+		 * Basic endian detection.
+		 * Stack grows down, so as we unwind it we expect to see
+		 * increasing addresses without huge jumps.  The stack may
+		 * switch endian-ness across frames in some cases (e.g., LE
+		 * kernel calling BE OPAL).
+		 */
+		if (sp >= 0x30000000UL && sp < 0x40000000UL) {
+			/* Check for OPAL stack -> Linux stack */
+			if (tmp >> 60 == 0xc)
+				goto no_flip;
+			else if (tmp2 >> 60 == 0xc)
+				goto do_flip;
+		}
 
-			if (!load8(target, sp, &sp))
-				return 1;
+		if (tmp < sp || (tmp - sp > 0xffffffffUL)) {
+			if (tmp2 < sp || (tmp2 - sp > 0xffffffffUL))
+				goto badstack;
+do_flip:
+			next_sp = tmp2;
+			flip = true;
+			be = !be;
+		} else {
+no_flip:
+			next_sp = tmp;
 		}
+
+		if (!is_real_address(regs, sp))
+			break;
+
+		if (flip)
+			pc = flip_endian(pc);
+
+		printf(" 0x%016" PRIx64 " 0x%016" PRIx64 " (%s)\n",
+			sp, pc, be ? "big-endian" : "little-endian");
 	}
 
 	return 0;
-- 
2.18.0



More information about the Pdbg mailing list