[BUG] 2.6.25-rc2-git4 - Regression Kernel oops while running kernbench and tbench on powerpc

Paul Mackerras paulus at samba.org
Wed Apr 23 18:16:34 EST 2008


Kamalesh Babulal writes:

> After applying the patch above and the patch posted on
> http://lkml.org/lkml/2008/4/8/42
> the bug had the following information,

Thanks.  The patch below, against Linus' current git tree, fixes one
bug that might be the cause of the problem, and also attempts to
detect the erroneous situation earlier and fix it up, and also print
some debug information.  Please try to reproduce the problem with this
patch applied, and if there are any console log messages starting with
SLB: or FWNMI:, please send me the console log.

Paul.

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index c0db5b7..f7f0962 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -439,6 +439,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
 	mr	r1,r8		/* start using new stack pointer */
 	std	r7,PACAKSAVE(r13)
 
+	/* check that SLB entry 2 contains the right thing */
+	clrrdi	r6,r1,28
+	clrldi.	r0,r6,2
+	beq	3f
+	li	r0,2
+	slbmfee	r7,r0
+	oris	r6,r6,SLB_ESID_V at h
+	cmpd	r6,r7
+	beq	3f
+	bl	bad_slb_switch
+	ld	r3,PACACURRENT(r13)
+	addi	r3,r3,THREAD
+3:
 	ld	r6,_CCR(r1)
 	mtcrf	0xFF,r6
 
@@ -540,6 +553,19 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
 	ld	r4,_XER(r1)
 	mtspr	SPRN_XER,r4
 
+	/* check that SLB entry 2 contains the right thing */
+	clrrdi	r6,r1,28	/* stack ESID */
+	clrldi.	r0,r6,2
+	beq	57f
+	li	r0,2
+	slbmfee	r7,r0
+	oris	r6,r6,SLB_ESID_V at h
+	cmpd	r6,r7
+	beq	57f
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	bad_slb_exc
+	ld	r3,_MSR(r1)
+57:
 	REST_8GPRS(5, r1)
 
 	andi.	r0,r3,MSR_RI
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index be35ffa..c938134 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -45,6 +45,7 @@
 #include <asm/system.h>
 #include <asm/mpic.h>
 #include <asm/vdso_datapage.h>
+#include <asm/mmu.h>
 #ifdef CONFIG_PPC64
 #include <asm/paca.h>
 #endif
@@ -580,6 +581,10 @@ int __devinit start_secondary(void *unused)
 	atomic_inc(&init_mm.mm_count);
 	current->active_mm = &init_mm;
 
+	/* Bolt in the entry for the kernel stack now */
+	if (cpu_has_feature(CPU_FTR_SLB))
+		slb_flush_and_rebolt();
+
 	smp_store_cpu_info(cpu);
 	set_dec(tb_ticks_per_jiffy);
 	preempt_disable();
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 906daed..bb7765b 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -309,3 +309,34 @@ void slb_initialize(void)
 	 * one. */
 	asm volatile("isync":::"memory");
 }
+
+static void dump_slb(void)
+{
+	long entry;
+	unsigned long esid, vsid;
+
+	printk(KERN_EMERG "SLB contents now:\n");
+	for (entry = 0; entry < 64; ++entry) {
+		asm volatile("slbmfee  %0,%1" : "=r" (esid) : "r" (entry));
+		if (esid == 0)
+			/* valid bit is clear along with everything else */
+			continue;
+		asm volatile("slbmfev  %0,%1" : "=r" (vsid) : "r" (entry));
+		printk(KERN_EMERG "%d: %.16lx %.16lx\n", entry, esid, vsid);
+	}
+}
+
+void bad_slb_exc(struct pt_regs *regs)
+{
+	printk(KERN_EMERG "SLB: stack not bolted on exception return\n");
+	dump_slb();
+	slb_flush_and_rebolt();
+	show_regs(regs);
+}
+
+void bad_slb_switch(void)
+{
+	printk(KERN_EMERG "SLB: stack not bolted on context switch\n");
+	dump_slb();
+	slb_flush_and_rebolt();
+}
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index a1ab25c..ed68083 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -325,6 +325,8 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log * err)
 
 	if (err->disposition == RTAS_DISP_FULLY_RECOVERED) {
 		/* Platform corrected itself */
+		printk(KERN_ALERT "FWNMI: platform corrected error %.16lx\n",
+		       *(unsigned long *)err);
 		nonfatal = 1;
 	} else if ((regs->msr & MSR_RI) &&
 		   user_mode(regs) &&



More information about the Linuxppc-dev mailing list