[PATCH] powerpc: Update 64bit __copy_tofrom_user() using CPU_FTR_UNALIGNED_LD_STD
Mark Nelson
markn at au1.ibm.com
Tue Nov 11 21:53:34 EST 2008
In exactly the same way that we updated memcpy() with new feature sections
in commit 25d6e2d7c58ddc4a3b614fc5381591c0cfe66556 we do the same thing
here for __copy_tofrom_user(). Once again this is purely a performance
tweak for Cell and Power6 - this has no effect on all the other 64bit
powerpc chips.
We can make these same changes to __copy_tofrom_user() because the basic
copy algorithm is the same as in memcpy() - this version just has all the
exception handling logic needed when copying to or from userspace as well
as a special case for copying whole 4K pages that are page aligned.
CPU_FTR_UNALIGNED_LD_STD CPU was added in commit
4ec577a28980a0790df3c3dfe9c81f6e2222acfb
We also make the same simple one line change from cmpldi r1,... to cmpldi
cr1,... for consistency.
Signed-off-by: Mark Nelson <markn at au1.ibm.com>
---
arch/powerpc/lib/copyuser_64.S | 17 +++++++++++++++--
1 file changed, 15 insertions(+), 2 deletions(-)
Index: upstream/arch/powerpc/lib/copyuser_64.S
===================================================================
--- upstream.orig/arch/powerpc/lib/copyuser_64.S
+++ upstream/arch/powerpc/lib/copyuser_64.S
@@ -26,11 +26,24 @@ _GLOBAL(__copy_tofrom_user)
andi. r6,r6,7
PPC_MTOCRF 0x01,r5
blt cr1,.Lshort_copy
+/* Below we want to nop out the bne if we're on a CPU that has the
+ * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
+ * cleared.
+ * At the time of writing the only CPU that has this combination of bits
+ * set is Power6.
+ */
+BEGIN_FTR_SECTION
+ nop
+FTR_SECTION_ELSE
bne .Ldst_unaligned
+ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
+ CPU_FTR_UNALIGNED_LD_STD)
.Ldst_aligned:
- andi. r0,r4,7
addi r3,r3,-16
+BEGIN_FTR_SECTION
+ andi. r0,r4,7
bne .Lsrc_unaligned
+END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
srdi r7,r5,4
20: ld r9,0(r4)
addi r4,r4,-8
@@ -138,7 +151,7 @@ _GLOBAL(__copy_tofrom_user)
PPC_MTOCRF 0x01,r6 /* put #bytes to 8B bdry into cr7 */
subf r5,r6,r5
li r7,0
- cmpldi r1,r5,16
+ cmpldi cr1,r5,16
bf cr7*4+3,1f
35: lbz r0,0(r4)
81: stb r0,0(r3)
More information about the Linuxppc-dev
mailing list