[PATCH v2 6/6] powerpc32: Few optimisations in memcpy

Christophe Leroy christophe.leroy at c-s.fr
Tue May 19 20:07:57 AEST 2015


This patch adds a few optimisations in memcpy functions by using
lbzu/stbu instead of lxb/stb and by re-ordering insn inside a loop
to reduce latency due to loading

Signed-off-by: Christophe Leroy <christophe.leroy at c-s.fr>
---
 arch/powerpc/lib/copy_32.S | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index 1d49c74..2ef50c6 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -155,9 +155,9 @@ _GLOBAL(memcpy)
 	mtctr	r8
 	beq+	61f
 70:	lbz	r9,4(r4)		/* do some bytes */
-	stb	r9,4(r6)
 	addi	r4,r4,1
 	addi	r6,r6,1
+	stb	r9,3(r6)
 	bdnz	70b
 61:	srwi.	r0,r0,2
 	mtctr	r0
@@ -199,10 +199,10 @@ _GLOBAL(memcpy)
 64:	andi.	r0,r5,3
 	mtctr	r0
 	beq+	65f
-40:	lbz	r0,4(r4)
-	stb	r0,4(r6)
-	addi	r4,r4,1
-	addi	r6,r6,1
+	addi	r4,r4,3
+	addi	r6,r6,3
+40:	lbzu	r0,1(r4)
+	stbu	r0,1(r6)
 	bdnz	40b
 65:	blr
 
-- 
2.1.0



More information about the Linuxppc-dev mailing list