[PATCH 4/4] powerpc32: memcpy: use cacheable_memcpy

Christophe Leroy christophe.leroy at c-s.fr
Tue May 12 23:32:56 AEST 2015


cacheable_memcpy uses dcbz instruction and is more efficient than
memcpy when the destination is in RAM

This patch renames memcpy as generic_memcpy, and defines memcpy as a
prolog to cacheable_memcpy. This prolog checks if the buffer is
in RAM. If not, it falls back to generic_memcpy()

On MPC885, we get approximatly 7% increase of the transfer rate
on an FTP reception

Signed-off-by: Christophe Leroy <christophe.leroy at c-s.fr>
---
 arch/powerpc/lib/copy_32.S | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index d8a9a86..8f76d49 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -161,13 +161,27 @@ _GLOBAL(generic_memset)
  * We only use this version if the source and dest don't overlap.
  * -- paulus.
  */
+_GLOBAL(memmove)
+	cmplw	0,r3,r4
+	bgt	backwards_memcpy
+	/* fall through */
+
+_GLOBAL(memcpy)
+	cmplwi	r5,L1_CACHE_BYTES
+	blt-	generic_memcpy
+	lis	r8,max_pfn at ha
+	lwz	r8,max_pfn at l(r8)
+	tophys	(r9,r3)
+	srwi	r9,r9,PAGE_SHIFT
+	cmplw	r9,r8
+	bge-	generic_memcpy
 _GLOBAL(cacheable_memcpy)
 	add	r7,r3,r5		/* test if the src & dst overlap */
 	add	r8,r4,r5
 	cmplw	0,r4,r7
 	cmplw	1,r3,r8
 	crand	0,0,4			/* cr0.lt &= cr1.lt */
-	blt	memcpy			/* if regions overlap */
+	blt	generic_memcpy		/* if regions overlap */
 
 	addi	r4,r4,-4
 	addi	r6,r3,-4
@@ -233,12 +247,7 @@ _GLOBAL(cacheable_memcpy)
 	bdnz	40b
 65:	blr
 
-_GLOBAL(memmove)
-	cmplw	0,r3,r4
-	bgt	backwards_memcpy
-	/* fall through */
-
-_GLOBAL(memcpy)
+_GLOBAL(generic_memcpy)
 	srwi.	r7,r5,3
 	addi	r6,r3,-4
 	addi	r4,r4,-4
-- 
2.1.0



More information about the Linuxppc-dev mailing list