[PATCH v2 5/6] powerpc32: cacheable_memcpy becomes memcpy
Christophe Leroy
christophe.leroy at c-s.fr
Tue May 19 20:07:55 AEST 2015
cacheable_memcpy uses dcbz instruction and is more efficient than
memcpy when the destination is in RAM. If the destination is in an
io area, memcpy_toio() is normally used, not memcpy
This patch renames memcpy as generic_memcpy, and renames
cacheable_memcpy as memcpy
On MPC885, we get approximatly 7% increase of the transfer rate
on an FTP reception
Signed-off-by: Christophe Leroy <christophe.leroy at c-s.fr>
---
arch/powerpc/lib/copy_32.S | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index 9262071..1d49c74 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -129,13 +129,18 @@ _GLOBAL(memset)
* We only use this version if the source and dest don't overlap.
* -- paulus.
*/
-_GLOBAL(cacheable_memcpy)
+_GLOBAL(memmove)
+ cmplw 0,r3,r4
+ bgt backwards_memcpy
+ /* fall through */
+
+_GLOBAL(memcpy)
add r7,r3,r5 /* test if the src & dst overlap */
add r8,r4,r5
cmplw 0,r4,r7
cmplw 1,r3,r8
crand 0,0,4 /* cr0.lt &= cr1.lt */
- blt memcpy /* if regions overlap */
+ blt generic_memcpy /* if regions overlap */
addi r4,r4,-4
addi r6,r3,-4
@@ -201,12 +206,7 @@ _GLOBAL(cacheable_memcpy)
bdnz 40b
65: blr
-_GLOBAL(memmove)
- cmplw 0,r3,r4
- bgt backwards_memcpy
- /* fall through */
-
-_GLOBAL(memcpy)
+_GLOBAL(generic_memcpy)
srwi. r7,r5,3
addi r6,r3,-4
addi r4,r4,-4
--
2.1.0
More information about the Linuxppc-dev
mailing list