[PATCH] Handle I-TLB Error and Miss separately on 8xx

Sat Jan 8 03:22:31 EST 2005

As the code stands currently, there is a bug in the 2.4 and 2.6 handling
of I-TLB Miss and Error exceptions on 8xx.  The problem is that since we
treat both of them as the same exception when we hit do_page_fault,
there is a case where we can incorrectly find that a protection fault
has occured, when it hasn't.  This is because we check bit 4 of SRR1 in
both cases, but in the case of an I-TLB Miss, this bit is always set,
and it only indicates a protection fault on an I-TLB Error.

Originally from Grigori Tolstolytkin <gtolstolytkin at ru.mvista.com>.
Signed-off-by: Tom Rini <trini at kernel.crashing.org>

Patch vs 2.4-current:

--- 1.19/arch/ppc/kernel/head_8xx.S	2003-10-27 12:31:25 -07:00
+++ edited/arch/ppc/kernel/head_8xx.S	2005-01-07 08:57:31 -07:00
@@ -501,10 +501,18 @@
 /* This is an instruction TLB error on the MPC8xx.  This could be due
  * to many reasons, such as executing guarded memory or illegal instruction
  * addresses.  There is nothing to do but handle a big time error fault.
+ * But we can't just jump from the InstructionAccess fault (0x400) as
+ * do_page_fault() needs to know.
  */
 	. = 0x1300
 InstructionTLBError:
-	b	InstructionAccess
+	EXCEPTION_PROLOG
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	mr	r4,r22
+	mr	r5,r23
+	li	r20,MSR_KERNEL
+	rlwimi	r20,r23,0,16,16		/* copy EE bit from saved MSR */
+	FINISH_EXCEPTION(do_page_fault)
 
 /* This is the data TLB error on the MPC8xx.  This could be due to
  * many reasons, including a dirty update to a pte.  We can catch that
--- 1.15/arch/ppc/mm/fault.c	2003-08-29 03:37:49 -07:00
+++ edited/arch/ppc/mm/fault.c	2005-01-07 08:59:25 -07:00
@@ -91,7 +91,8 @@
  * For 600- and 800-family processors, the error_code parameter is DSISR
  * for a data fault, SRR1 for an instruction fault. For 400-family processors
  * the error_code parameter is ESR for a data fault, 0 for an instruction
- * fault.
+ * fault.  On 800-family processors, we fudge an I-TLB Miss (0x1100) as
+ * being at 0x400 for space reasons.
  */
 void do_page_fault(struct pt_regs *regs, unsigned long address,
 		   unsigned long error_code)
@@ -111,7 +112,11 @@
 	 * bits we are interested in.  But there are some bits which
 	 * indicate errors in DSISR but can validly be set in SRR1.
 	 */
+#ifdef CONFIG_8xx
+	if (regs->trap == 0x400 || regs->trap == 0x1300)
+#else
 	if (regs->trap == 0x400)
+#endif
 		error_code &= 0x48200000;
 	else
 		is_write = error_code & 0x02000000;
@@ -204,8 +209,17 @@
 			goto bad_area;
 	/* a read */
 	} else {
-		/* protection fault */
+		/*
+		 * On non-8xx, a protection fault.  On 8xx, this bit is
+		 * always set on I-TLB Miss, but indicates a protection
+		 * fault on an I-TLB Error.  So we only check this bit
+		 * if we aren't an I-TLB Miss.
+		 */
+#ifdef CONFIG_8xx
+		if ((error_code & 0x08000000) && regs->trap != 0x400)
+#else
 		if (error_code & 0x08000000)
+#endif
 			goto bad_area;
 		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
 			goto bad_area;

Patch vs 2.6-current:
--- 1.18/arch/ppc/kernel/head_8xx.S	2004-11-11 01:25:53 -07:00
+++ edited/arch/ppc/kernel/head_8xx.S	2005-01-07 09:13:05 -07:00
@@ -445,10 +445,15 @@
 /* This is an instruction TLB error on the MPC8xx.  This could be due
  * to many reasons, such as executing guarded memory or illegal instruction
  * addresses.  There is nothing to do but handle a big time error fault.
+ * But we can't just jump from the InstructionAccess fault (0x400) as
+ * do_page_fault() needs to know.
  */
 	. = 0x1300
 InstructionTLBError:
-	b	InstructionAccess
+	EXCEPTION_PROLOG
+	mr	r4,r12
+	mr	r5,r9
+	EXC_XFER_EE_LITE(0x1300, handle_page_fault)
 
 /* This is the data TLB error on the MPC8xx.  This could be due to
  * many reasons, including a dirty update to a pte.  We can catch that
--- 1.21/arch/ppc/mm/fault.c	2004-07-26 14:43:22 -07:00
+++ edited/arch/ppc/mm/fault.c	2005-01-07 09:11:44 -07:00
@@ -90,7 +90,8 @@
  * For 600- and 800-family processors, the error_code parameter is DSISR
  * for a data fault, SRR1 for an instruction fault. For 400-family processors
  * the error_code parameter is ESR for a data fault, 0 for an instruction
- * fault.
+ * fault.  On 800-family processors, we fudge an I-TLB Miss (0x1100) as
+ * being at 0x400 for space reasons.
  */
 int do_page_fault(struct pt_regs *regs, unsigned long address,
 		  unsigned long error_code)
@@ -110,7 +111,11 @@
 	 * bits we are interested in.  But there are some bits which
 	 * indicate errors in DSISR but can validly be set in SRR1.
 	 */
-	if (TRAP(regs) == 0x400)
+#ifdef CONFIG_8xx
+	if (TRAP(regs) == 0x400 || TRAP(regs) == 0x1300)
+#else
+ 	if (TRAP(regs) == 0x400)
+#endif
 		error_code &= 0x48200000;
 	else
 		is_write = error_code & 0x02000000;
@@ -235,8 +240,17 @@
 #endif
 	/* a read */
 	} else {
-		/* protection fault */
+		/*
+		 * On non-8xx, a protection fault.  On 8xx, this bit is
+		 * always set on I-TLB Miss, but indicates a protection
+		 * fault on an I-TLB Error.  So we only check this bit
+		 * if we aren't an I-TLB Miss.
+		 */
+#ifdef CONFIG_8xx
+		if ((error_code & 0x08000000) && regs->trap != 0x400)
+#else
 		if (error_code & 0x08000000)
+#endif
 			goto bad_area;
 		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
 			goto bad_area;

-- 
Tom Rini
http://gate.crashing.org/~trini/