mpc8xx DCBZ (&friends) hw bug. Tests, analysis + conclusions.
Joakim Tjernlund
joakim.tjernlund at lumentis.se
Fri May 9 22:37:15 EST 2003
> Me again :-)
>
> I have completed and tested my workaround for the dcbx instructions. The workaround
> handles ALL dcbx instructions, ANY register combination and works both on
> kernel space and user space addresses.
>
> I also did some benchmarking using copy_page(dcbz enabled) and memcpy to
> memory allocated with kmalloc and/or vmalloc. copy_page is about 30% faster
> than memcpy even with the workaround applied.
Here I go again :)
I have been running this patch on some 20-30 custom MPC860/862 boards in our test lab
since I posted this message and it is stable.
I made some changes since then:
- Made it configurable, #define CONFIG_8xx_DCBxFIXED to enable it.
- Tagging in the fast path in the DLBMiss handler is just one(1) instruction.
- Test and branch if TAG present is two instructions in the DTLB Error handler.
- Enabled the use of the dcbz instruction in copy_tofrom_user(), cacheable_memzero(),
cacheable_memcpy(), clear_page() and copy_page()
Feedback most welcome!
Patch against linuxppc_2_4_devel follows.
Jocke
--- a/arch/ppc/kernel/head_8xx.S Tue Apr 29 00:45:35 2003
+++ b/arch/ppc/kernel/head_8xx.S Fri May 9 14:16:44 2003
@@ -31,6 +31,27 @@
#include <asm/ppc_asm.h>
#include "ppc_defs.h"
+#ifdef CONFIG_8xx_DCBxFIXED
+/* These macros are used to tag DAR with a known value so that the
+ * DataTLBError can recognize a buggy dcbx instruction and workaround
+ * the problem.
+ */
+ #define TAG_VAL 0x00f0
+ #define TAG_DAR_R20 \
+ li r20, TAG_VAL;\
+ mtspr DAR, r20;
+#else
+ #define TAG_DAR_R20
+#endif
+/* Macro to make the code more readable. */
+#ifdef CONFIG_8xx_CPU6
+ #define DO_8xx_CPU6(val, reg) \
+ li reg, val; \
+ stw reg, 12(r0); \
+ lwz reg, 12(r0);
+#else
+ #define DO_8xx_CPU6(val, reg)
+#endif
.text
.globl _stext
_stext:
@@ -166,6 +187,7 @@
. = n; \
label: \
EXCEPTION_PROLOG; \
+ TAG_DAR_R20; \
addi r3,r1,STACK_FRAME_OVERHEAD; \
li r20,MSR_KERNEL; \
FINISH_EXCEPTION(hdlr)
@@ -188,6 +210,7 @@
mr r5,r20
mfspr r4,DAR
stw r4,_DAR(r21)
+ TAG_DAR_R20
addi r3,r1,STACK_FRAME_OVERHEAD
li r20,MSR_KERNEL
rlwimi r20,r23,0,16,16 /* copy EE bit from saved MSR */
@@ -226,6 +249,7 @@
EXCEPTION_PROLOG
mfspr r4,DAR
stw r4,_DAR(r21)
+ TAG_DAR_R20
mfspr r5,DSISR
stw r5,_DSISR(r21)
addi r3,r1,STACK_FRAME_OVERHEAD
@@ -457,6 +481,13 @@
#endif
mtspr MD_RPN, r20 /* Update TLB entry */
+#ifdef CONFIG_8xx_DCBxFIXED
+#if TAG_VAL == 0x00f0 /* Save 1 instr. by reusing the val loaded in r21 above */
+ mtspr DAR, r21
+#else
+ TAG_DAR_R20
+#endif
+#endif
mfspr r20, M_TW /* Restore registers */
lwz r21, 0(r0)
mtcr r21
@@ -466,7 +497,17 @@
#endif
rfi
-2: mfspr r20, M_TW /* Restore registers */
+2:
+#ifdef CONFIG_8xx_DCBxFIXED
+ /* Copy 20 msb from MD_EPN to DAR since the dcxx instructions fails
+ * to update DAR when they cause a DTLB Miss.
+ */
+ mfspr r21, MD_EPN
+ mfspr r20, DAR
+ rlwimi r20, r21, 0, 0, 19
+ mtspr DAR, r20
+#endif
+ mfspr r20, M_TW /* Restore registers */
lwz r21, 0(r0)
mtcr r21
lwz r21, 4(r0)
@@ -504,10 +545,19 @@
stw r20, 0(r0)
stw r21, 4(r0)
+ mfspr r20, DAR
+#ifdef CONFIG_8xx_DCBxFIXED
+ /* If DAR contains TAG_VAL implies a buggy dcbx instruction
+ * that did not set DAR.
+ */
+ cmplwi cr0, r20, TAG_VAL
+ beq- 100f /* Branch if TAG_VAL to dcbx workaround procedure */
+101: /* return from dcbx instruction bug workaround, r20 holds value of DAR */
/* First, make sure this was a store operation.
*/
- mfspr r20, DSISR
- andis. r21, r20, 0x0200 /* If set, indicates store op */
+#endif
+ mfspr r21, DSISR
+ andis. r21, r21, 0x0200 /* If set, indicates store op */
beq 2f
/* The EA of a data TLB miss is automatically stored in the MD_EPN
@@ -526,7 +576,7 @@
* are initialized in mapin_ram(). This will avoid the problem,
* assuming we only use the dcbi instruction on kernel addresses.
*/
- mfspr r20, DAR
+ /* DAR is in r20 already */
rlwinm r21, r20, 0, 0, 19
ori r21, r21, MD_EVALID
mfspr r20, M_CASID
@@ -591,6 +641,13 @@
#endif
mtspr MD_RPN, r20 /* Update TLB entry */
+#ifdef CONFIG_8xx_DCBxFIXED
+#if TAG_VAL == 0x00f0 /* Save 1 instr. by reusing the val loaded in r21 above */
+ mtspr DAR, r21
+#else
+ TAG_DAR_R20
+#endif
+#endif
mfspr r20, M_TW /* Restore registers */
lwz r21, 0(r0)
mtcr r21
@@ -628,6 +685,149 @@
. = 0x2000
+#ifdef CONFIG_8xx_DCBxFIXED
+/* This is the workaround procedure to calculate the data EA for a buggy dcbx instruction
+ * by decoding the registers used by the dcbx instruction and adding them.
+ * DAR is set to the calculated address and r20 also holds the EA on exit.
+ */
+139: /* fetch instruction from userspace memory */
+ DO_8xx_CPU6(0x3780, r3)
+ mtspr MD_EPN, r20
+ mfspr r21, M_TWB /* Get level 1 table entry address */
+ lwz r21, 0(r21) /* Get the level 1 entry */
+ tophys (r21, r21)
+ DO_8xx_CPU6(0x3b80, r3)
+ mtspr MD_TWC, r21 /* Load pte table base address */
+ mfspr r21, MD_TWC /* ....and get the pte address */
+ lwz r21, 0(r21) /* Get the pte */
+ /* concat physical page address(r21) and page offset(r20) */
+ rlwimi r21, r20, 0, 20, 31
+ b 140f
+100: /* Entry point for dcbx workaround. */
+ /* fetch instruction from memory. */
+ mfspr r20,SRR0
+ andis. r21, r20, 0x8000
+ beq- 139b /* Branch if user space address */
+ tophys (r21, r20)
+140: lwz r21,0(r21)
+
+/* Check if it really is a dcbx instruction */
+ rlwinm r20, r21, 0, 21, 30
+ cmpwi cr0, r20, 2028 /* Is dcbz? */
+ beq+ 142f
+ cmpwi cr0, r20, 940 /* Is dcbi? */
+ beq+ 142f
+ cmpwi cr0, r20, 556 /* Is dcbt? */
+ beq+ 142f
+ cmpwi cr0, r20, 172 /* Is dcbf? */
+ beq+ 142f
+#ifdef DEBUG_DCBX_INSTRUCTIONS
+ cmpwi cr0, r20, 108 /* Is dcbst? Should never cause a DTLB Miss/Error */
+ beq+ 142f
+ cmpwi cr0, r20, 492 /* Is dcbtst? Should never cause a DTLB Miss/Error */
+ beq+ 142f
+
+141: b 141b /* Stop here if no dcbx instruction */
+#endif
+ mfspr r20, DAR /* r20 must hold DAR at exit */
+ b 101b /* None of the above, go back to normal TLB processing */
+142: /* continue, it was a dcbx instruction. */
+
+#ifdef CONFIG_8xx_CPU6
+ lwz r3, 8(r0) /* restore r3 from memory */
+#endif
+ mfctr r20
+ mtdar r20 /* save ctr reg in DAR */
+ rlwinm r20, r21, 24, 24, 28 /* offset into jump table for reg RB */
+ addi r20, r20, 150f at l /* add start of table */
+ mtctr r20 /* load ctr with jump address */
+ xor r20, r20, r20 /* sum starts at zero */
+ bctr /* jump into table */
+150:
+ add r20, r20, r0
+ b 151f
+ add r20, r20, r1
+ b 151f
+ add r20, r20, r2
+ b 151f
+ add r20, r20, r3
+ b 151f
+ add r20, r20, r4
+ b 151f
+ add r20, r20, r5
+ b 151f
+ add r20, r20, r6
+ b 151f
+ add r20, r20, r7
+ b 151f
+ add r20, r20, r8
+ b 151f
+ add r20, r20, r9
+ b 151f
+ add r20, r20, r10
+ b 151f
+ add r20, r20, r11
+ b 151f
+ add r20, r20, r12
+ b 151f
+ add r20, r20, r13
+ b 151f
+ add r20, r20, r14
+ b 151f
+ add r20, r20, r15
+ b 151f
+ add r20, r20, r16
+ b 151f
+ add r20, r20, r17
+ b 151f
+ add r20, r20, r18
+ b 151f
+ add r20, r20, r19
+ b 151f
+ mtctr r21 /* reg 20 needs special handling */
+ b 154f
+ mtctr r21 /* reg 21 needs special handling */
+ b 153f
+ add r20, r20, r22
+ b 151f
+ add r20, r20, r23
+ b 151f
+ add r20, r20, r24
+ b 151f
+ add r20, r20, r25
+ b 151f
+ add r20, r20, r25
+ b 151f
+ add r20, r20, r27
+ b 151f
+ add r20, r20, r28
+ b 151f
+ add r20, r20, r29
+ b 151f
+ add r20, r20, r30
+ b 151f
+ add r20, r20, r31
+151:
+ rlwinm. r21,r21,19,24,28 /* offset into jump table for reg RA */
+ beq 152f /* if reg RA is zero, don't add it */
+ addi r21, r21, 150b at l /* add start of table */
+ mtctr r21 /* load ctr with jump address */
+ rlwinm r21,r21,0,16,10 /* make sure we don't execute this more than once */
+ bctr /* jump into table */
+152:
+ mfdar r21
+ mtctr r21 /* restore ctr reg from DAR */
+ mtdar r20 /* save fault EA to DAR */
+ b 101b /* Go back to normal TLB handling */
+
+ /* special handling for r20,r21 since these are modified already */
+153: lwz r21, 4(r0) /* load r21 from memory */
+ b 155f
+154: mfspr r21, M_TW /* load r20 from M_TW */
+155: add r20, r20, r21 /* add it */
+ mfctr r21 /* restore r21 */
+ b 151b
+#endif
/*
* This code finishes saving the registers to the exception frame
* and jumps to the appropriate handler for the exception, turning
--- a/arch/ppc/lib/string.S Tue Apr 29 00:45:35 2003
+++ b/arch/ppc/lib/string.S Fri May 9 14:17:07 2003
@@ -151,7 +151,7 @@
bdnz 4b
3: mtctr r9
li r7,4
-#if !defined(CONFIG_8xx)
+#if !defined(CONFIG_8xx) || defined(CONFIG_8xx_DCBxFIXED)
10: dcbz r7,r6
#else
10: stw r4, 4(r6)
@@ -253,7 +253,7 @@
mtctr r0
beq 63f
53:
-#if !defined(CONFIG_8xx)
+#if !defined(CONFIG_8xx) || defined(CONFIG_8xx_DCBxFIXED)
dcbz r11,r6
#endif
COPY_16_BYTES
@@ -452,6 +452,8 @@
53:
#if !defined(CONFIG_8xx)
dcbt r3,r4
+#endif
+#if !defined(CONFIG_8xx) || defined(CONFIG_8xx_DCBxFIXED)
54: dcbz r11,r6
#endif
/* had to move these to keep extable in order */
@@ -461,7 +463,7 @@
.long 71b,101f
.long 72b,102f
.long 73b,103f
-#if !defined(CONFIG_8xx)
+#if !defined(CONFIG_8xx) || defined(CONFIG_8xx_DCBxFIXED)
.long 54b,105f
#endif
.text
--- a/arch/ppc/kernel/misc.S Tue Apr 29 00:45:35 2003
+++ b/arch/ppc/kernel/misc.S Fri May 9 14:16:23 2003
@@ -657,7 +657,7 @@
_GLOBAL(clear_page)
li r0,4096/L1_CACHE_LINE_SIZE
mtctr r0
-#ifdef CONFIG_8xx
+#if defined(CONFIG_8xx) && !defined(CONFIG_8xx_DCBxFIXED)
li r4, 0
1: stw r4, 0(r3)
stw r4, 4(r3)
@@ -710,6 +710,8 @@
1:
#ifndef CONFIG_8xx
dcbt r11,r4
+#endif
+#if !defined(CONFIG_8xx) || defined(CONFIG_8xx_DCBxFIXED)
dcbz r5,r3
#endif
COPY_16_BYTES
** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/
More information about the Linuxppc-embedded
mailing list