System Reset exception rewrite

Anton Blanchard anton at samba.org
Tue Mar 11 13:27:28 EST 2003


Hi,

Ive been chasing a bug in 2.5 where we would hit the exception stack
guard page. The problem with the guard page is that its impossible to
debug the problem without a hardware debugger, every exception will
hit the unmapped guard page and end up looping in a 0x300.

I spoke to Paulus and he suggested we should just do a compare and
branch to the debugger if we have overflowed our exception stack and
I agree. However the whole thing got me thinking about the system reset
exception.

Since this is our last ditch effort to get debug out, we should make it
as failsafe as possible. Ive started writing a handler that:

- uses a page in the paca for its stack
- does an slbia and enters slb entries manually before accessing areas
  of memory (eg the serial port ioremap segment and the kernel stack
  segment)
- serialises all cpu output but has a timeout in case one cpu locks up
  before dropping the lock
- starts with the safer things (registers, exception stack) then moves
  to more adventurous things (kernel stack backtrace)

I also moved the RTAS allocation into the first (256MB) segment. With
FWNMI it is critical to complete the rtas unlock method or all cpus
behind it will lock up. Allocating the rtas segment ensures we wont
take an SLB miss when reading the error info.

TODO:
- write the POWER3 equivalent for the segment entry code
- perhaps print more info
- pass control off to debugger once all cpus have dumped state

The patch was good enough to catch my bug, notice the exception stack
where we took a 0x380 in fast_exception_return after we loaded srr0.

Anton

cpu 6: System Reset
pc    = 000000000000036c  lr    = 000000001002e2ac  sp    = 00000000ffffdbc0
msr   = 9000000000001000  dar   = c0000000005ab03c  dsisr = 000000000a000000
cr    = 0000000028022448  ctr   = 0000000000000000  xer   = 0000000020000000
sprg0 = 0000000000000000  sprg1 = 00000000ffffe808  sprg2 = 0000000000000000
sprg3 = c0000000005aa000  curr  = c000000000009f94  dec   = 000000009492b228
exsp  = c0000000005aafc0  ksp   = c0000007b4c23e30
R00   = 0000000000000002  R10   = 0000000000000000  R20   = c0000000005aa000
R01   = 00000000ffffdbc0  R11   = 0000000000000000  R21   = c0000000005ab000
R02   = 0000000000000000  R12   = 0000000020022448  R22   = c0000000000089d4
R03   = 0000000000000000  R13   = 000000001009c498  R23   = b000000000001032
R04   = 00000000ffffe788  R14   = 0000000000000000  R24   = 0000000000000000
R05   = 0000000000000000  R15   = 0000000000000000  R25   = 0000000000000000
R06   = 00000049beae2dba  R16   = 0000000000000000  R26   = 00000000ffffffff
R07   = 0000000000000001  R17   = 0000000000000000  R27   = 0000000000000006
R08   = ffe55f5d00000080  R18   = 0000000000000000  R28   = 000000001009c7a0
R09   = 0000000000000000  R19   = 00000000100a11f0  R29   = 00000000100a11f0
R31   = 0000000000000000  R32   = 000000000000036c
exception stack:
c0000000000089bc 9000000000001030 00000000100a16b8 00000000ffffffff
0000000000000000 00000000ffffffff 00000000ffffe890 0020000000000380
c000000000008b34 9000000000001032 c0000000005aa000 c0000000005aaf00
00000000ffffe780 0000000028022448 00000000ffffe508 0220000000000380
exception stack overflow:
c000000000008b34 9000000000001032 c0000000005aa000 c0000000005aaf40
00000000ffffe490 0000000028022448 00000000ffffe218 0220000000000380



diff -ru ppc64-2.5-old/arch/ppc64/kernel/asm-offsets.c ppc64-2.5/arch/ppc64/kernel/asm-offsets.c
--- ppc64-2.5-old/arch/ppc64/kernel/asm-offsets.c	2003-02-12 07:02:23.000000000 -0600
+++ ppc64-2.5/arch/ppc64/kernel/asm-offsets.c	2003-03-10 15:30:04.000000000 -0600
@@ -96,6 +96,7 @@
         DEFINE(PACAREGSAV, offsetof(struct paca_struct, xRegSav));
         DEFINE(PACAEXC, offsetof(struct paca_struct, exception_stack));
         DEFINE(PACAGUARD, offsetof(struct paca_struct, guard));
+	DEFINE(PACA_SYSRESET_STACK, offsetof(struct paca_struct, system_reset_stack) + 0x1000);
         DEFINE(LPPACASRR0, offsetof(struct ItLpPaca, xSavedSrr0));
         DEFINE(LPPACASRR1, offsetof(struct ItLpPaca, xSavedSrr1));
 	DEFINE(LPPACAANYINT, offsetof(struct ItLpPaca, xIntDword.xAnyInt));
@@ -135,6 +136,9 @@
 	DEFINE(GPR21, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[21]));
 	DEFINE(GPR22, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[22]));
 	DEFINE(GPR23, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[23]));
+	DEFINE(GPR30, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[30]));
+	DEFINE(GPR31, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[31]));
+
 	/*
 	 * Note: these symbols include _ because they overlap with special
 	 * register names
diff -ru ppc64-2.5-old/arch/ppc64/kernel/head.S ppc64-2.5/arch/ppc64/kernel/head.S
--- ppc64-2.5-old/arch/ppc64/kernel/head.S	2003-03-02 17:25:01.000000000 -0600
+++ ppc64-2.5/arch/ppc64/kernel/head.S	2003-03-10 16:19:54.000000000 -0600
@@ -209,6 +209,39 @@
 	mfcr    r23;                    /* save CR in r23             */ \
 	rfid

+#define SYSTEMRESET_PROLOG_PSERIES(label) \
+	mtspr	SPRG1,r30; \
+	mtspr	SPRG2,r31; \
+	mfspr	r30,SPRG3; \
+	addi	r31,r30,PACA_SYSRESET_STACK; \
+	SAVE_2GPRS(28, r31); \
+	mfspr	r29,SRR0; \
+	std	r29,_NIP(r31); \
+	mfspr	r29,SRR1; \
+	std	r29,_MSR(r31); \
+	mfspr	r29,DAR; \
+	std	r29,_DAR(r31);\
+	mfspr	r29,DSISR; \
+	std	r29,_DSISR(r31); \
+	mflr	r29; \
+	std	r29,_LINK(r31); \
+	mfcr	r29; \
+	std	r29,_CCR(r31); \
+	mfctr	r29; \
+	std	r29,_CTR(r31); \
+	mfspr	r29,XER; \
+	std	r29,_XER(r31); \
+	SAVE_8GPRS(0, r31); \
+	SAVE_8GPRS(8, r31); \
+	LOADADDR(r29,(label)); \
+	mtspr	SRR0,r29; \
+	SET_REG_TO_CONST(r29,MSR_KERNEL); \
+	mtspr	SRR1,r29; \
+	isync; \
+	slbia; \
+	isync; \
+	rfid
+
 /*
  * This is the start of the interrupt handlers for iSeries
  * This code runs with relocation on.
@@ -342,7 +375,9 @@
 	.globl __start_interrupts
 __start_interrupts:

-	STD_EXCEPTION_PSERIES( 0x100, SystemReset )
+	.globl SystemReset_Pseries
+	SYSTEMRESET_PROLOG_PSERIES(system_reset_handler)
+
 	STD_EXCEPTION_PSERIES( 0x200, MachineCheck )
 	STD_EXCEPTION_PSERIES( 0x300, DataAccess )
 	STD_EXCEPTION_PSERIES( 0x380, DataAccessSLB )
@@ -524,7 +559,7 @@
 	. = 0x8000
 	.globl SystemReset_FWNMI
 SystemReset_FWNMI:
-	EXCEPTION_PROLOG_PSERIES(0x100, SystemReset_common)
+	SYSTEMRESET_PROLOG_PSERIES(system_reset_handler)
 	.globl MachineCheck_FWNMI
 MachineCheck_FWNMI:
 	EXCEPTION_PROLOG_PSERIES(0x200, MachineCheck_common)
@@ -1921,6 +1956,31 @@
 	blr
 #endif

+#ifdef CONFIG_PPC_PSERIES
+system_reset_handler:
+	/* save remaining (r16-r27, r30-r31) GPRs */ \
+	SAVE_8GPRS(16, r31)
+	SAVE_4GPRS(24, r31)
+	mfspr	r29,SPRG1
+	std	r29,GPR30(r31)
+	mfspr	r29,SPRG2
+	std	r29,GPR31(r31)
+	/* load up stack and paca pointer */
+	mr	r1,r31
+	ld	r2,PACATOC(r30)
+	mr	r13,r30
+	/* *regs in r3 */
+	addi    r3,r1,STACK_FRAME_OVERHEAD
+	bl	.system_reset_entry
+
+	#if 0
+	/* fixme - return from system reset */
+	ld	r31,PACASYSRESETSP(r13);
+	addi	r31,r31,INT_FRAME_SIZE;
+	std	r31,PACASYSRESETSP(r30);
+	#endif
+#endif
+
 /*
  * We put a few things here that have to be page-aligned.
  * This stuff goes at the beginning of the data segment,
diff -ru ppc64-2.5-old/arch/ppc64/kernel/stab.c ppc64-2.5/arch/ppc64/kernel/stab.c
--- ppc64-2.5-old/arch/ppc64/kernel/stab.c	2003-03-05 20:11:03.000000000 -0600
+++ ppc64-2.5/arch/ppc64/kernel/stab.c	2003-03-09 18:28:25.000000000 -0600
@@ -228,8 +228,8 @@
 	asm volatile("isync" : : : "memory" );   /* Order update */
 }

-static inline void __ste_allocate(unsigned long esid, unsigned long vsid,
-				  int kernel_segment)
+void __ste_allocate(unsigned long esid, unsigned long vsid,
+		    int kernel_segment)
 {
 	if (cpu_has_slb()) {
 #ifndef CONFIG_PPC_ISERIES
diff -ru ppc64-2.5-old/arch/ppc64/kernel/traps.c ppc64-2.5/arch/ppc64/kernel/traps.c
--- ppc64-2.5-old/arch/ppc64/kernel/traps.c	2003-02-12 07:02:23.000000000 -0600
+++ ppc64-2.5/arch/ppc64/kernel/traps.c	2003-03-09 20:30:51.000000000 -0600
@@ -96,7 +96,7 @@
  * the actual r3 if possible, and a ptr to the error log entry
  * will be returned if found.
  */
-static struct rtas_error_log *FWNMI_get_errinfo(struct pt_regs *regs)
+struct rtas_error_log *FWNMI_get_errinfo(struct pt_regs *regs)
 {
 	unsigned long errdata = regs->gpr[3];
 	struct rtas_error_log *errhdr = NULL;
@@ -117,7 +117,7 @@
  * It will release the saved data area for other CPUs in the
  * partition to receive FWNMI errors.
  */
-static void FWNMI_release_errinfo(void)
+void FWNMI_release_errinfo(void)
 {
 	unsigned long ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL);
 	if (ret != 0)
diff -ru ppc64-2.5-old/arch/ppc64/xmon/privinst.h ppc64-2.5/arch/ppc64/xmon/privinst.h
--- ppc64-2.5-old/arch/ppc64/xmon/privinst.h	2003-02-12 07:02:23.000000000 -0600
+++ ppc64-2.5/arch/ppc64/xmon/privinst.h	2003-03-09 18:26:03.000000000 -0600
@@ -33,7 +33,7 @@
 GSETSPR(9, ctr)
 GSETSPR(18, dsisr)
 GSETSPR(19, dar)
-GSETSPR(22, dec)
+/* GSETSPR(22, dec) */
 GSETSPR(25, sdr1)
 GSETSPR(26, srr0)
 GSETSPR(27, srr1)
diff -ru ppc64-2.5-old/arch/ppc64/xmon/xmon.c ppc64-2.5/arch/ppc64/xmon/xmon.c
--- ppc64-2.5-old/arch/ppc64/xmon/xmon.c	2003-03-02 17:25:01.000000000 -0600
+++ ppc64-2.5/arch/ppc64/xmon/xmon.c	2003-03-10 18:12:00.000000000 -0600
@@ -26,6 +26,7 @@
 #include <asm/naca.h>
 #include <asm/paca.h>
 #include <asm/ppcdebug.h>
+#include <asm/time.h>
 #include "nonstdio.h"
 #include "privinst.h"

@@ -2446,3 +2447,194 @@
 		cmd = skipbl();
 	}
 }
+
+#ifdef CONFIG_PPC_PSERIES
+extern void *udbg_comport;
+void __ste_allocate(unsigned long esid, unsigned long vsid,
+		    int kernel_segment);
+extern int fwnmi_active;
+
+static unsigned long system_reset_serialise;
+
+/* We wait this long before declaring the cpu to be locked up */
+#define SECONDS_TO_SPIN 20
+
+static void insert_kernel_segment(unsigned long ea)
+{
+	if (cpu_has_slb()) {
+		unsigned long esid = ea >> SID_SHIFT;
+		unsigned long vsid;
+
+		/* first kernel segment is always mapped */
+		if (esid == (PAGE_OFFSET >> SID_SHIFT))
+			return;
+
+		/* XXX should search to see if we have this mapped */
+		vsid = get_kernel_vsid(ea);
+		__ste_allocate(esid, vsid, 0);
+	} else {
+		/* XXX FIXME */
+	}
+}
+
+static void print_exception_stack()
+{
+	unsigned long *ex;
+	int i;
+	int overflow = 0;
+
+	ex = (unsigned long *)&get_paca()->exception_stack[0];
+
+	printf("exception stack:\n");
+
+	for (i = 0; i < 4; i++)
+		printf("%.16lx %.16lx %.16lx %.16lx\n", ex[i*4], ex[i*4+1],
+		       ex[i*4+2], ex[i*4+3]);
+
+	/* check next frame for exception stack overflow */
+	for (i = 0; i < 8; i++) {
+		if (ex[16+i]) {
+			overflow = 1;
+			break;
+		}
+	}
+
+	if (overflow) {
+		printf("exception stack overflow:\n");
+
+		for (i = 0; i < 2; i++)
+			printf("%.16lx %.16lx %.16lx %.16lx\n", ex[i*4+16],
+			       ex[i*4+17], ex[i*4+18], ex[i*4+19]);
+	}
+}
+
+static void print_backtrace(unsigned long sp)
+{
+	unsigned long bottom, top;
+	int i = 1;
+	char *c;
+
+	if (REGION_ID(sp) != KERNEL_REGION_ID)
+		return;
+
+	if (!pfn_valid(__pa(sp) >> PAGE_SHIFT))
+		return;
+
+	insert_kernel_segment(sp);
+
+	bottom = sp & ~(THREAD_SIZE-1UL);
+	top = bottom + THREAD_SIZE;
+
+	/* very simple backtrace which doesnt handle irq stacks */
+
+	printk("Backtrace:\n");
+	while (sp < top && sp >= bottom) {
+		printf("%.16lx ", *(unsigned long *)(sp + 16));
+		sp = *(unsigned long *)sp;
+		if (!(i % 4))
+			printf("\n");
+		if (i++ == 32)
+			break;
+	}
+	i--;
+	if (i % 4)
+		printf("\n");
+
+	/* warn if we went close to bottom of stack */
+	c = (unsigned char *)bottom;
+
+	/* XXX * 4 temporary */
+	for (i = sizeof(struct thread_info); i < 4096*4; i++) {
+		if (c[i]) {
+			printf("warning stack usage %d bytes from end\n", i);
+			break;
+		}
+	}
+}
+
+void system_reset_entry(struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+	unsigned long startticks;
+	unsigned long spinticks;
+	int i;
+
+	spinticks = (SECONDS_TO_SPIN * (cpu+1)) * tb_ticks_per_sec;
+
+	if (fwnmi_active) {
+		struct rtas_error_log *errhdr = FWNMI_get_errinfo(regs);
+		if (errhdr) {
+			/* XXX Should look at FWNMI information */
+		}
+		FWNMI_release_errinfo();
+	}
+
+	/*
+	 * Make sure the segment that maps the serial port is loaded.
+	 * We assume it is bolted into the hashed pagetable
+	 */
+	if (udbg_comport)
+		insert_kernel_segment((unsigned long)udbg_comport);
+
+#ifdef CONFIG_SMP
+	/*
+	 * Wait for other cpus to dump state, but timeout in case they lock up
+	 */
+	startticks = get_tb();
+	while ((get_tb() - startticks) < spinticks) {
+		if (!test_and_set_bit(0, &system_reset_serialise))
+			break;
+	}
+
+	printf("cpu %d: ", cpu);
+#endif
+
+	printf("System Reset\n");
+	printf("pc    = %.16lx  lr    = %.16lx  sp    = %.16lx\n", regs->nip,
+	       regs->link, regs->gpr[1]);
+	printf("msr   = %.16lx  dar   = %.16lx  dsisr = %.16lx\n", regs->msr,
+	       regs->dar, regs->dsisr);
+	printf("cr    = %.16lx  ctr   = %.16lx  xer   = %.16lx\n", regs->ccr,
+	       regs->ctr, regs->xer);
+	printf("sprg0 = %.16lx  sprg1 = %.16lx  sprg2 = %.16lx\n", get_sprg0(),
+	       get_sprg1(), get_sprg2());
+	printf("sprg3 = %.16lx  curr  = %.16lx  dec   = %.16lx\n", get_sprg3(),
+	       current, get_dec());
+	printf("exsp  = %.16lx  ksp   = %.16lx\n", get_paca()->exception_sp,
+	       get_paca()->xKsave);
+
+	for (i = 0; i < 10; i++) {
+		printf("R%.2ld   = %.16lx  R%.2ld   = %.16lx  "
+		       "R%.2ld   = %.16lx\n", i, regs->gpr[i],
+		       i+10, regs->gpr[i+10], i+20, regs->gpr[i+20]);
+	}
+	printf("R31   = %.16lx  R32   = %.16lx\n", regs->gpr[31],
+	       regs->gpr[32]);
+
+	print_exception_stack();
+	print_backtrace(regs->gpr[1]);
+
+#if 0
+	if (!cpu_has_slb()) {
+		printf("asr  = %.16lx\n", mfasr());
+		for (i = 0; i < 8; ++i)
+			printf("sr%.2ld = %.16lx  sr%.2ld = %.16lx\n", i,
+			       get_sr(i), i+8, get_sr(i+8));
+	}
+
+	printf("pid = %ld, comm = %s\n", current->pid, current->comm);
+#endif
+
+	/* Warn if we took an exception */
+	if ((get_srr0() != regs->nip) || (get_srr1() != regs->msr)) {
+		printf("warning srr0/srr1 is now\n");
+		printf("srr0  = %.16lx   srr1  = %.16lx\n", get_srr0(),
+		       get_srr1());
+	}
+
+	clear_bit(0, &system_reset_serialise);
+
+	while(1)
+		; /* loop forever */
+}
+#endif
diff -ru ppc64-2.5-old/include/asm-ppc64/paca.h ppc64-2.5/include/asm-ppc64/paca.h
--- ppc64-2.5-old/include/asm-ppc64/paca.h	2003-02-12 07:02:43.000000000 -0600
+++ ppc64-2.5/include/asm-ppc64/paca.h	2003-03-10 15:27:41.000000000 -0600
@@ -160,6 +160,8 @@
  *=====================================================================================
  */
 	u8 guard[0x1000];               /* ... and then hang 'em         */
+
+	u8 system_reset_stack[0x1000];
 };

 #endif /* _PPC64_PACA_H */
diff -ru ppc64-2.5-old/include/asm-ppc64/rtas.h ppc64-2.5/include/asm-ppc64/rtas.h
--- ppc64-2.5-old/include/asm-ppc64/rtas.h	2003-02-13 05:15:03.000000000 -0600
+++ ppc64-2.5/include/asm-ppc64/rtas.h	2003-03-10 18:10:25.000000000 -0600
@@ -17,7 +17,7 @@
  */

 #define RTAS_UNKNOWN_SERVICE (-1)
-#define RTAS_INSTANTIATE_MAX (1UL<<30) /* Don't instantiate rtas at/above this value */
+#define RTAS_INSTANTIATE_MAX (1UL<<28) /* Don't instantiate rtas at/above this value */

 /*
  * In general to call RTAS use rtas_token("string") to lookup

** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/




More information about the Linuxppc64-dev mailing list