System Reset exception rewrite
Anton Blanchard
anton at samba.org
Tue Mar 11 13:27:28 EST 2003
Hi,
Ive been chasing a bug in 2.5 where we would hit the exception stack
guard page. The problem with the guard page is that its impossible to
debug the problem without a hardware debugger, every exception will
hit the unmapped guard page and end up looping in a 0x300.
I spoke to Paulus and he suggested we should just do a compare and
branch to the debugger if we have overflowed our exception stack and
I agree. However the whole thing got me thinking about the system reset
exception.
Since this is our last ditch effort to get debug out, we should make it
as failsafe as possible. Ive started writing a handler that:
- uses a page in the paca for its stack
- does an slbia and enters slb entries manually before accessing areas
of memory (eg the serial port ioremap segment and the kernel stack
segment)
- serialises all cpu output but has a timeout in case one cpu locks up
before dropping the lock
- starts with the safer things (registers, exception stack) then moves
to more adventurous things (kernel stack backtrace)
I also moved the RTAS allocation into the first (256MB) segment. With
FWNMI it is critical to complete the rtas unlock method or all cpus
behind it will lock up. Allocating the rtas segment ensures we wont
take an SLB miss when reading the error info.
TODO:
- write the POWER3 equivalent for the segment entry code
- perhaps print more info
- pass control off to debugger once all cpus have dumped state
The patch was good enough to catch my bug, notice the exception stack
where we took a 0x380 in fast_exception_return after we loaded srr0.
Anton
cpu 6: System Reset
pc = 000000000000036c lr = 000000001002e2ac sp = 00000000ffffdbc0
msr = 9000000000001000 dar = c0000000005ab03c dsisr = 000000000a000000
cr = 0000000028022448 ctr = 0000000000000000 xer = 0000000020000000
sprg0 = 0000000000000000 sprg1 = 00000000ffffe808 sprg2 = 0000000000000000
sprg3 = c0000000005aa000 curr = c000000000009f94 dec = 000000009492b228
exsp = c0000000005aafc0 ksp = c0000007b4c23e30
R00 = 0000000000000002 R10 = 0000000000000000 R20 = c0000000005aa000
R01 = 00000000ffffdbc0 R11 = 0000000000000000 R21 = c0000000005ab000
R02 = 0000000000000000 R12 = 0000000020022448 R22 = c0000000000089d4
R03 = 0000000000000000 R13 = 000000001009c498 R23 = b000000000001032
R04 = 00000000ffffe788 R14 = 0000000000000000 R24 = 0000000000000000
R05 = 0000000000000000 R15 = 0000000000000000 R25 = 0000000000000000
R06 = 00000049beae2dba R16 = 0000000000000000 R26 = 00000000ffffffff
R07 = 0000000000000001 R17 = 0000000000000000 R27 = 0000000000000006
R08 = ffe55f5d00000080 R18 = 0000000000000000 R28 = 000000001009c7a0
R09 = 0000000000000000 R19 = 00000000100a11f0 R29 = 00000000100a11f0
R31 = 0000000000000000 R32 = 000000000000036c
exception stack:
c0000000000089bc 9000000000001030 00000000100a16b8 00000000ffffffff
0000000000000000 00000000ffffffff 00000000ffffe890 0020000000000380
c000000000008b34 9000000000001032 c0000000005aa000 c0000000005aaf00
00000000ffffe780 0000000028022448 00000000ffffe508 0220000000000380
exception stack overflow:
c000000000008b34 9000000000001032 c0000000005aa000 c0000000005aaf40
00000000ffffe490 0000000028022448 00000000ffffe218 0220000000000380
diff -ru ppc64-2.5-old/arch/ppc64/kernel/asm-offsets.c ppc64-2.5/arch/ppc64/kernel/asm-offsets.c
--- ppc64-2.5-old/arch/ppc64/kernel/asm-offsets.c 2003-02-12 07:02:23.000000000 -0600
+++ ppc64-2.5/arch/ppc64/kernel/asm-offsets.c 2003-03-10 15:30:04.000000000 -0600
@@ -96,6 +96,7 @@
DEFINE(PACAREGSAV, offsetof(struct paca_struct, xRegSav));
DEFINE(PACAEXC, offsetof(struct paca_struct, exception_stack));
DEFINE(PACAGUARD, offsetof(struct paca_struct, guard));
+ DEFINE(PACA_SYSRESET_STACK, offsetof(struct paca_struct, system_reset_stack) + 0x1000);
DEFINE(LPPACASRR0, offsetof(struct ItLpPaca, xSavedSrr0));
DEFINE(LPPACASRR1, offsetof(struct ItLpPaca, xSavedSrr1));
DEFINE(LPPACAANYINT, offsetof(struct ItLpPaca, xIntDword.xAnyInt));
@@ -135,6 +136,9 @@
DEFINE(GPR21, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[21]));
DEFINE(GPR22, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[22]));
DEFINE(GPR23, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[23]));
+ DEFINE(GPR30, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[30]));
+ DEFINE(GPR31, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[31]));
+
/*
* Note: these symbols include _ because they overlap with special
* register names
diff -ru ppc64-2.5-old/arch/ppc64/kernel/head.S ppc64-2.5/arch/ppc64/kernel/head.S
--- ppc64-2.5-old/arch/ppc64/kernel/head.S 2003-03-02 17:25:01.000000000 -0600
+++ ppc64-2.5/arch/ppc64/kernel/head.S 2003-03-10 16:19:54.000000000 -0600
@@ -209,6 +209,39 @@
mfcr r23; /* save CR in r23 */ \
rfid
+#define SYSTEMRESET_PROLOG_PSERIES(label) \
+ mtspr SPRG1,r30; \
+ mtspr SPRG2,r31; \
+ mfspr r30,SPRG3; \
+ addi r31,r30,PACA_SYSRESET_STACK; \
+ SAVE_2GPRS(28, r31); \
+ mfspr r29,SRR0; \
+ std r29,_NIP(r31); \
+ mfspr r29,SRR1; \
+ std r29,_MSR(r31); \
+ mfspr r29,DAR; \
+ std r29,_DAR(r31);\
+ mfspr r29,DSISR; \
+ std r29,_DSISR(r31); \
+ mflr r29; \
+ std r29,_LINK(r31); \
+ mfcr r29; \
+ std r29,_CCR(r31); \
+ mfctr r29; \
+ std r29,_CTR(r31); \
+ mfspr r29,XER; \
+ std r29,_XER(r31); \
+ SAVE_8GPRS(0, r31); \
+ SAVE_8GPRS(8, r31); \
+ LOADADDR(r29,(label)); \
+ mtspr SRR0,r29; \
+ SET_REG_TO_CONST(r29,MSR_KERNEL); \
+ mtspr SRR1,r29; \
+ isync; \
+ slbia; \
+ isync; \
+ rfid
+
/*
* This is the start of the interrupt handlers for iSeries
* This code runs with relocation on.
@@ -342,7 +375,9 @@
.globl __start_interrupts
__start_interrupts:
- STD_EXCEPTION_PSERIES( 0x100, SystemReset )
+ .globl SystemReset_Pseries
+ SYSTEMRESET_PROLOG_PSERIES(system_reset_handler)
+
STD_EXCEPTION_PSERIES( 0x200, MachineCheck )
STD_EXCEPTION_PSERIES( 0x300, DataAccess )
STD_EXCEPTION_PSERIES( 0x380, DataAccessSLB )
@@ -524,7 +559,7 @@
. = 0x8000
.globl SystemReset_FWNMI
SystemReset_FWNMI:
- EXCEPTION_PROLOG_PSERIES(0x100, SystemReset_common)
+ SYSTEMRESET_PROLOG_PSERIES(system_reset_handler)
.globl MachineCheck_FWNMI
MachineCheck_FWNMI:
EXCEPTION_PROLOG_PSERIES(0x200, MachineCheck_common)
@@ -1921,6 +1956,31 @@
blr
#endif
+#ifdef CONFIG_PPC_PSERIES
+system_reset_handler:
+ /* save remaining (r16-r27, r30-r31) GPRs */ \
+ SAVE_8GPRS(16, r31)
+ SAVE_4GPRS(24, r31)
+ mfspr r29,SPRG1
+ std r29,GPR30(r31)
+ mfspr r29,SPRG2
+ std r29,GPR31(r31)
+ /* load up stack and paca pointer */
+ mr r1,r31
+ ld r2,PACATOC(r30)
+ mr r13,r30
+ /* *regs in r3 */
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .system_reset_entry
+
+ #if 0
+ /* fixme - return from system reset */
+ ld r31,PACASYSRESETSP(r13);
+ addi r31,r31,INT_FRAME_SIZE;
+ std r31,PACASYSRESETSP(r30);
+ #endif
+#endif
+
/*
* We put a few things here that have to be page-aligned.
* This stuff goes at the beginning of the data segment,
diff -ru ppc64-2.5-old/arch/ppc64/kernel/stab.c ppc64-2.5/arch/ppc64/kernel/stab.c
--- ppc64-2.5-old/arch/ppc64/kernel/stab.c 2003-03-05 20:11:03.000000000 -0600
+++ ppc64-2.5/arch/ppc64/kernel/stab.c 2003-03-09 18:28:25.000000000 -0600
@@ -228,8 +228,8 @@
asm volatile("isync" : : : "memory" ); /* Order update */
}
-static inline void __ste_allocate(unsigned long esid, unsigned long vsid,
- int kernel_segment)
+void __ste_allocate(unsigned long esid, unsigned long vsid,
+ int kernel_segment)
{
if (cpu_has_slb()) {
#ifndef CONFIG_PPC_ISERIES
diff -ru ppc64-2.5-old/arch/ppc64/kernel/traps.c ppc64-2.5/arch/ppc64/kernel/traps.c
--- ppc64-2.5-old/arch/ppc64/kernel/traps.c 2003-02-12 07:02:23.000000000 -0600
+++ ppc64-2.5/arch/ppc64/kernel/traps.c 2003-03-09 20:30:51.000000000 -0600
@@ -96,7 +96,7 @@
* the actual r3 if possible, and a ptr to the error log entry
* will be returned if found.
*/
-static struct rtas_error_log *FWNMI_get_errinfo(struct pt_regs *regs)
+struct rtas_error_log *FWNMI_get_errinfo(struct pt_regs *regs)
{
unsigned long errdata = regs->gpr[3];
struct rtas_error_log *errhdr = NULL;
@@ -117,7 +117,7 @@
* It will release the saved data area for other CPUs in the
* partition to receive FWNMI errors.
*/
-static void FWNMI_release_errinfo(void)
+void FWNMI_release_errinfo(void)
{
unsigned long ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL);
if (ret != 0)
diff -ru ppc64-2.5-old/arch/ppc64/xmon/privinst.h ppc64-2.5/arch/ppc64/xmon/privinst.h
--- ppc64-2.5-old/arch/ppc64/xmon/privinst.h 2003-02-12 07:02:23.000000000 -0600
+++ ppc64-2.5/arch/ppc64/xmon/privinst.h 2003-03-09 18:26:03.000000000 -0600
@@ -33,7 +33,7 @@
GSETSPR(9, ctr)
GSETSPR(18, dsisr)
GSETSPR(19, dar)
-GSETSPR(22, dec)
+/* GSETSPR(22, dec) */
GSETSPR(25, sdr1)
GSETSPR(26, srr0)
GSETSPR(27, srr1)
diff -ru ppc64-2.5-old/arch/ppc64/xmon/xmon.c ppc64-2.5/arch/ppc64/xmon/xmon.c
--- ppc64-2.5-old/arch/ppc64/xmon/xmon.c 2003-03-02 17:25:01.000000000 -0600
+++ ppc64-2.5/arch/ppc64/xmon/xmon.c 2003-03-10 18:12:00.000000000 -0600
@@ -26,6 +26,7 @@
#include <asm/naca.h>
#include <asm/paca.h>
#include <asm/ppcdebug.h>
+#include <asm/time.h>
#include "nonstdio.h"
#include "privinst.h"
@@ -2446,3 +2447,194 @@
cmd = skipbl();
}
}
+
+#ifdef CONFIG_PPC_PSERIES
+extern void *udbg_comport;
+void __ste_allocate(unsigned long esid, unsigned long vsid,
+ int kernel_segment);
+extern int fwnmi_active;
+
+static unsigned long system_reset_serialise;
+
+/* We wait this long before declaring the cpu to be locked up */
+#define SECONDS_TO_SPIN 20
+
+static void insert_kernel_segment(unsigned long ea)
+{
+ if (cpu_has_slb()) {
+ unsigned long esid = ea >> SID_SHIFT;
+ unsigned long vsid;
+
+ /* first kernel segment is always mapped */
+ if (esid == (PAGE_OFFSET >> SID_SHIFT))
+ return;
+
+ /* XXX should search to see if we have this mapped */
+ vsid = get_kernel_vsid(ea);
+ __ste_allocate(esid, vsid, 0);
+ } else {
+ /* XXX FIXME */
+ }
+}
+
+static void print_exception_stack()
+{
+ unsigned long *ex;
+ int i;
+ int overflow = 0;
+
+ ex = (unsigned long *)&get_paca()->exception_stack[0];
+
+ printf("exception stack:\n");
+
+ for (i = 0; i < 4; i++)
+ printf("%.16lx %.16lx %.16lx %.16lx\n", ex[i*4], ex[i*4+1],
+ ex[i*4+2], ex[i*4+3]);
+
+ /* check next frame for exception stack overflow */
+ for (i = 0; i < 8; i++) {
+ if (ex[16+i]) {
+ overflow = 1;
+ break;
+ }
+ }
+
+ if (overflow) {
+ printf("exception stack overflow:\n");
+
+ for (i = 0; i < 2; i++)
+ printf("%.16lx %.16lx %.16lx %.16lx\n", ex[i*4+16],
+ ex[i*4+17], ex[i*4+18], ex[i*4+19]);
+ }
+}
+
+static void print_backtrace(unsigned long sp)
+{
+ unsigned long bottom, top;
+ int i = 1;
+ char *c;
+
+ if (REGION_ID(sp) != KERNEL_REGION_ID)
+ return;
+
+ if (!pfn_valid(__pa(sp) >> PAGE_SHIFT))
+ return;
+
+ insert_kernel_segment(sp);
+
+ bottom = sp & ~(THREAD_SIZE-1UL);
+ top = bottom + THREAD_SIZE;
+
+ /* very simple backtrace which doesnt handle irq stacks */
+
+ printk("Backtrace:\n");
+ while (sp < top && sp >= bottom) {
+ printf("%.16lx ", *(unsigned long *)(sp + 16));
+ sp = *(unsigned long *)sp;
+ if (!(i % 4))
+ printf("\n");
+ if (i++ == 32)
+ break;
+ }
+ i--;
+ if (i % 4)
+ printf("\n");
+
+ /* warn if we went close to bottom of stack */
+ c = (unsigned char *)bottom;
+
+ /* XXX * 4 temporary */
+ for (i = sizeof(struct thread_info); i < 4096*4; i++) {
+ if (c[i]) {
+ printf("warning stack usage %d bytes from end\n", i);
+ break;
+ }
+ }
+}
+
+void system_reset_entry(struct pt_regs *regs)
+{
+ int cpu = smp_processor_id();
+ unsigned long startticks;
+ unsigned long spinticks;
+ int i;
+
+ spinticks = (SECONDS_TO_SPIN * (cpu+1)) * tb_ticks_per_sec;
+
+ if (fwnmi_active) {
+ struct rtas_error_log *errhdr = FWNMI_get_errinfo(regs);
+ if (errhdr) {
+ /* XXX Should look at FWNMI information */
+ }
+ FWNMI_release_errinfo();
+ }
+
+ /*
+ * Make sure the segment that maps the serial port is loaded.
+ * We assume it is bolted into the hashed pagetable
+ */
+ if (udbg_comport)
+ insert_kernel_segment((unsigned long)udbg_comport);
+
+#ifdef CONFIG_SMP
+ /*
+ * Wait for other cpus to dump state, but timeout in case they lock up
+ */
+ startticks = get_tb();
+ while ((get_tb() - startticks) < spinticks) {
+ if (!test_and_set_bit(0, &system_reset_serialise))
+ break;
+ }
+
+ printf("cpu %d: ", cpu);
+#endif
+
+ printf("System Reset\n");
+ printf("pc = %.16lx lr = %.16lx sp = %.16lx\n", regs->nip,
+ regs->link, regs->gpr[1]);
+ printf("msr = %.16lx dar = %.16lx dsisr = %.16lx\n", regs->msr,
+ regs->dar, regs->dsisr);
+ printf("cr = %.16lx ctr = %.16lx xer = %.16lx\n", regs->ccr,
+ regs->ctr, regs->xer);
+ printf("sprg0 = %.16lx sprg1 = %.16lx sprg2 = %.16lx\n", get_sprg0(),
+ get_sprg1(), get_sprg2());
+ printf("sprg3 = %.16lx curr = %.16lx dec = %.16lx\n", get_sprg3(),
+ current, get_dec());
+ printf("exsp = %.16lx ksp = %.16lx\n", get_paca()->exception_sp,
+ get_paca()->xKsave);
+
+ for (i = 0; i < 10; i++) {
+ printf("R%.2ld = %.16lx R%.2ld = %.16lx "
+ "R%.2ld = %.16lx\n", i, regs->gpr[i],
+ i+10, regs->gpr[i+10], i+20, regs->gpr[i+20]);
+ }
+ printf("R31 = %.16lx R32 = %.16lx\n", regs->gpr[31],
+ regs->gpr[32]);
+
+ print_exception_stack();
+ print_backtrace(regs->gpr[1]);
+
+#if 0
+ if (!cpu_has_slb()) {
+ printf("asr = %.16lx\n", mfasr());
+ for (i = 0; i < 8; ++i)
+ printf("sr%.2ld = %.16lx sr%.2ld = %.16lx\n", i,
+ get_sr(i), i+8, get_sr(i+8));
+ }
+
+ printf("pid = %ld, comm = %s\n", current->pid, current->comm);
+#endif
+
+ /* Warn if we took an exception */
+ if ((get_srr0() != regs->nip) || (get_srr1() != regs->msr)) {
+ printf("warning srr0/srr1 is now\n");
+ printf("srr0 = %.16lx srr1 = %.16lx\n", get_srr0(),
+ get_srr1());
+ }
+
+ clear_bit(0, &system_reset_serialise);
+
+ while(1)
+ ; /* loop forever */
+}
+#endif
diff -ru ppc64-2.5-old/include/asm-ppc64/paca.h ppc64-2.5/include/asm-ppc64/paca.h
--- ppc64-2.5-old/include/asm-ppc64/paca.h 2003-02-12 07:02:43.000000000 -0600
+++ ppc64-2.5/include/asm-ppc64/paca.h 2003-03-10 15:27:41.000000000 -0600
@@ -160,6 +160,8 @@
*=====================================================================================
*/
u8 guard[0x1000]; /* ... and then hang 'em */
+
+ u8 system_reset_stack[0x1000];
};
#endif /* _PPC64_PACA_H */
diff -ru ppc64-2.5-old/include/asm-ppc64/rtas.h ppc64-2.5/include/asm-ppc64/rtas.h
--- ppc64-2.5-old/include/asm-ppc64/rtas.h 2003-02-13 05:15:03.000000000 -0600
+++ ppc64-2.5/include/asm-ppc64/rtas.h 2003-03-10 18:10:25.000000000 -0600
@@ -17,7 +17,7 @@
*/
#define RTAS_UNKNOWN_SERVICE (-1)
-#define RTAS_INSTANTIATE_MAX (1UL<<30) /* Don't instantiate rtas at/above this value */
+#define RTAS_INSTANTIATE_MAX (1UL<<28) /* Don't instantiate rtas at/above this value */
/*
* In general to call RTAS use rtas_token("string") to lookup
** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/
More information about the Linuxppc64-dev
mailing list