[PATCH] [RFC] workaround buggy dcbX instructions in 8xx
Joakim Tjernlund
joakim.tjernlund at lumentis.se
Thu Apr 7 01:22:57 EST 2005
All cache instructions in 8xx are somewhat buggy as they
do not update the DAR register when causing a DTLB Miss/Error
This is a forward port of my workaround from 2.4 to 2.6 to fix this problem.
The patch tags DAR with a known value which is tested for in the DTLB Error handler.
If DAR matches the tag, a instruction decode routine is invoked for calculate the
faulting address. There are two verisons of the decode procedure which is controlled
by a #define. Read the patch and try different versions.
I have not tested this on 2.6 since I haven't ported our board
from 2.4 yet.
Jocke
===== head_8xx.S 1.21 vs edited =====
--- 1.21/arch/ppc/kernel/head_8xx.S 2005-03-29 00:21:20 +02:00
+++ edited/head_8xx.S 2005-04-06 17:01:51 +02:00
@@ -32,6 +32,8 @@
#include <asm/ppc_asm.h>
#include <asm/offsets.h>
+#define CONFIG_8xx_DCBxFIXED
+
/* Macro to make the code more readable. */
#ifdef CONFIG_8xx_CPU6
#define DO_8xx_CPU6(val, reg) \
@@ -41,6 +43,20 @@
#else
#define DO_8xx_CPU6(val, reg)
#endif
+
+#ifdef CONFIG_8xx_DCBxFIXED
+/* These macros are used to tag DAR with a known value so that the
+ * DataTLBError can recognize a buggy dcbx instruction and workaround
+ * the problem.
+ */
+#define TAG_VAL 0x00f0 /* -1 may also be used */
+#define TAG_DAR_R10 \
+ li r10, TAG_VAL;\
+ mtspr SPRN_DAR, r10;
+#else
+#define TAG_DAR_R10
+#endif
+
.text
.globl _stext
_stext:
@@ -174,6 +190,7 @@
xfer(n, hdlr)
#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret) \
+ TAG_DAR_R10; \
li r10,trap; \
stw r10,TRAP(r11); \
li r10,MSR_KERNEL; \
@@ -214,6 +231,7 @@
mfspr r5,SPRN_DSISR
stw r5,_DSISR(r11)
addi r3,r1,STACK_FRAME_OVERHEAD
+ TAG_DAR_R10
EXC_XFER_STD(0x200, MachineCheckException)
/* Data access exception.
@@ -227,6 +245,7 @@
stw r10,_DSISR(r11)
mr r5,r10
mfspr r4,SPRN_DAR
+ TAG_DAR_R10
EXC_XFER_EE_LITE(0x300, handle_page_fault)
/* Instruction access exception.
@@ -252,6 +271,7 @@
mfspr r5,SPRN_DSISR
stw r5,_DSISR(r11)
addi r3,r1,STACK_FRAME_OVERHEAD
+ TAG_DAR_R10
EXC_XFER_EE(0x600, AlignmentException)
/* Program check exception */
@@ -414,7 +434,13 @@
rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */
DO_8xx_CPU6(0x3d80, r3)
mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
-
+#ifdef CONFIG_8xx_DCBxFIXED
+ #if TAG_VAL == 0x00f0 /* Save 1 instr. by reusing the val loaded in r11 above */
+ mtspr SPRN_DAR, r11
+ #else
+ TAG_DAR_R10
+ #endif
+#endif
mfspr r10, SPRN_M_TW /* Restore registers */
lwz r11, 0(r0)
mtcr r11
@@ -450,11 +476,20 @@
mfcr r10
stw r10, 0(r0)
stw r11, 4(r0)
+ mfspr r10, SPRN_DAR
+#ifdef CONFIG_8xx_DCBxFIXED
+ /* If DAR contains TAG_VAL implies a buggy dcbx instruction
+ * that did not set DAR.
+ */
+ cmpwi cr0, r10, TAG_VAL
+ beq- 100f /* Branch if TAG_VAL to dcbx workaround procedure */
+101: /* return from dcbx instruction bug workaround, r10 holds value of DAR */
+#endif
/* First, make sure this was a store operation.
*/
- mfspr r10, SPRN_DSISR
- andis. r11, r10, 0x0200 /* If set, indicates store op */
+ mfspr r11, SPRN_DSISR
+ andis. r11, r11, 0x0200 /* If set, indicates store op */
beq 2f
/* The EA of a data TLB miss is automatically stored in the MD_EPN
@@ -473,7 +508,7 @@
* are initialized in mapin_ram(). This will avoid the problem,
* assuming we only use the dcbi instruction on kernel addresses.
*/
- mfspr r10, SPRN_DAR
+ /* DAR is in r10 already */
rlwinm r11, r10, 0, 0, 19
ori r11, r11, MD_EVALID
mfspr r10, SPRN_M_CASID
@@ -523,7 +558,13 @@
rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */
DO_8xx_CPU6(0x3d80, r3)
mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
-
+#ifdef CONFIG_8xx_DCBxFIXED
+ #if TAG_VAL == 0x00f0 /* Save 1 instr. by reusing the val loaded in r11 above */
+ mtspr SPRN_DAR, r11
+ #else
+ TAG_DAR_R10
+ #endif
+#endif
mfspr r10, SPRN_M_TW /* Restore registers */
lwz r11, 0(r0)
mtcr r11
@@ -561,6 +602,185 @@
. = 0x2000
+#ifdef CONFIG_8xx_DCBxFIXED
+/* This is the workaround procedure to calculate the data EA for buggy dcbx,dcbi instructions
+ * by decoding the registers used by the dcbx instruction and adding them.
+ * DAR is set to the calculated address and r10 also holds the EA on exit.
+ */
+//#define INSTR_CHECK /* define to verify if it is a dcbx instr. Should not be needed. */
+//#define NO_SELF_MODIFYING_CODE /* define if you don't want to use self modifying code */
+//#define DEBUG_DCBX_INSTRUCTIONS /* for debugging only. Needs INSTR_CHECK defined as well. */
+//#define KERNEL_SPACE_ONLY /* define if user space do NOT contain dcbx instructions. */
+
+#ifndef KERNEL_SPACE_ONLY
+ nop /* A few nops to make the modified_instr: space below cache line aligned */
+ nop
+139: /* fetch instruction from userspace memory */
+ DO_8xx_CPU6(0x3780, r3)
+ mtspr SPRN_MD_EPN, r10
+ mfspr r11, SPRN_M_TWB /* Get level 1 table entry address */
+ lwz r11, 0(r11) /* Get the level 1 entry */
+ DO_8xx_CPU6(0x3b80, r3)
+ mtspr SPRN_MD_TWC, r11 /* Load pte table base address */
+ mfspr r11, SPRN_MD_TWC /* ....and get the pte address */
+ lwz r11, 0(r11) /* Get the pte */
+ /* concat physical page address(r11) and page offset(r10) */
+ rlwimi r11, r10, 0, 20, 31
+ b 140f
+#endif
+100: /* Entry point for dcbx workaround. */
+ /* fetch instruction from memory. */
+ mfspr r10,SPRN_SRR0
+#ifndef KERNEL_SPACE_ONLY
+ andis. r11, r10, 0x8000
+ tophys (r11, r10)
+ beq- 139b /* Branch if user space address */
+#else
+ tophys (r11, r10)
+#endif
+140: lwz r11,0(r11)
+#ifdef INSTR_CHECK
+/* Check if it really is a dcbx instruction. This is not needed as far as I can tell */
+/* dcbt and dcbtst does not generate DTLB Misses/Errors, no need to include them here */
+ rlwinm r10, r11, 0, 21, 30
+ cmpwi cr0, r10, 2028 /* Is dcbz? */
+ beq+ 142f
+ cmpwi cr0, r10, 940 /* Is dcbi? */
+ beq+ 142f
+ cmpwi cr0, r10, 108 /* Is dcbst? */
+ beq+ 142f
+ cmpwi cr0, r10, 172 /* Is dcbf? */
+ beq+ 142f
+ cmpwi cr0, r10, 1964 /* Is icbi? */
+ beq+ 142f
+#ifdef DEBUG_DCBX_INSTRUCTIONS
+141: b 141b /* Stop here if no dcbx instruction */
+#endif
+ mfspr r10, SPRN_DAR /* r10 must hold DAR at exit */
+ b 101b /* None of the above, go back to normal TLB processing */
+142: /* continue, it was a dcbx instruction. */
+#endif
+#ifdef CONFIG_8xx_CPU6
+ lwz r3, 8(r0) /* restore r3 from memory */
+#endif
+#ifndef NO_SELF_MODIFYING_CODE
+ andis. r10,r11,0x1f /* test if reg RA is r0 */
+ li r10,modified_instr at l
+ dcbtst r0,r10 /* touch for store */
+ rlwinm r11,r11,0,0,20 /* Zero lower 10 bits */
+ oris r11,r11,640 /* Transform instr. to a "add r10,RA,RB" */
+ ori r11,r11,532
+ stw r11,0(r10) /* store add/and instruction */
+ dcbf 0,r10 /* flush new instr. to memory. */
+ icbi 0,r10 /* invalidate instr. cache line */
+ lwz r11, 4(r0) /* restore r11 from memory */
+ mfspr r10, SPRN_M_TW /* restore r10 from M_TW */
+ isync /* Wait until new instr is loaded from memory */
+modified_instr:
+ .space 4 /* this is where the add/and instr. is stored */
+#ifdef DEBUG_DCBX_INSTRUCTIONS
+ /* fill with some garbage */
+ li r11,0xffff
+ stw r11,0(r11)
+#endif
+ bne+ 143f
+ subf r10,r0,r10 /* r10=r10-r0, only if reg RA is r0 */
+143: mtdar r10 /* store faulting EA in DAR */
+ b 101b /* Go back to normal TLB handling */
+#else
+ mfctr r10
+ mtdar r10 /* save ctr reg in DAR */
+ rlwinm r10, r11, 24, 24, 28 /* offset into jump table for reg RB */
+ addi r10, r10, 150f at l /* add start of table */
+ mtctr r10 /* load ctr with jump address */
+ xor r10, r10, r10 /* sum starts at zero */
+ bctr /* jump into table */
+150:
+ add r10, r10, r0
+ b 151f
+ add r10, r10, r1
+ b 151f
+ add r10, r10, r2
+ b 151f
+ add r10, r10, r3
+ b 151f
+ add r10, r10, r4
+ b 151f
+ add r10, r10, r5
+ b 151f
+ add r10, r10, r6
+ b 151f
+ add r10, r10, r7
+ b 151f
+ add r10, r10, r8
+ b 151f
+ add r10, r10, r9
+ b 151f
+ mtctr r11 /* reg 10 needs special handling */
+ b 154f
+ mtctr r11 /* reg 11 needs special handling */
+ b 153f
+ add r10, r10, r12
+ b 151f
+ add r10, r10, r13
+ b 151f
+ add r10, r10, r14
+ b 151f
+ add r10, r10, r15
+ b 151f
+ add r10, r10, r16
+ b 151f
+ add r10, r10, r17
+ b 151f
+ add r10, r10, r18
+ b 151f
+ add r10, r10, r19
+ b 151f
+ add r10, r10, r20
+ b 151f
+ add r10, r10, r21
+ b 151f
+ add r10, r10, r22
+ b 151f
+ add r10, r10, r23
+ b 151f
+ add r10, r10, r24
+ b 151f
+ add r10, r10, r25
+ b 151f
+ add r10, r10, r25
+ b 151f
+ add r10, r10, r27
+ b 151f
+ add r10, r10, r28
+ b 151f
+ add r10, r10, r29
+ b 151f
+ add r10, r10, r30
+ b 151f
+ add r10, r10, r31
+151:
+ rlwinm. r11,r11,19,24,28 /* offset into jump table for reg RA */
+ beq 152f /* if reg RA is zero, don't add it */
+ addi r11, r11, 150b at l /* add start of table */
+ mtctr r11 /* load ctr with jump address */
+ rlwinm r11,r11,0,16,10 /* make sure we don't execute this more than once */
+ bctr /* jump into table */
+152:
+ mfdar r11
+ mtctr r11 /* restore ctr reg from DAR */
+ mtdar r10 /* save fault EA to DAR */
+ b 101b /* Go back to normal TLB handling */
+
+ /* special handling for r10,r11 since these are modified already */
+153: lwz r11, 4(r0) /* load r11 from memory */
+ b 155f
+154: mfspr r11, SPRN_M_TW /* load r10 from M_TW */
+155: add r10, r10, r11 /* add it */
+ mfctr r11 /* restore r11 */
+ b 151b
+#endif
+#endif
.globl giveup_fpu
giveup_fpu:
blr
More information about the Linuxppc-embedded
mailing list