[PATCH] powerpc32: use stmw/lmw for non volatile registers save/restore

Christophe Leroy christophe.leroy at c-s.fr
Mon May 23 18:46:36 AEST 2016


lmw/stmw have a 1 cycle (2 cycles for lmw on some ppc) in addition
and implies serialising, however it reduces the amount of instructions
hence the amount of instruction fetch compared to the equivalent
operation with several lzw/stw. It means less pressure on cache and
less fetching delays on slow memory.
When we transfer 20 registers, it is worth it.
gcc uses stmw/lmw at function entry/exit to save/restore non
volatile register, so lets also do it that way.

On powerpc64, we can't use lmw/stmw as it only handles 32 bits, so
we move longjmp() and setjmp() from misc.S to misc_64.S, and we
write a 32 bits version in misc_32.S using stmw/lmw

Signed-off-by: Christophe Leroy <christophe.leroy at c-s.fr>
---
The patch goes on top of "powerpc: inline current_stack_pointer()" or
requires trivial manual merge in arch/powerpc/kernel/misc.S

 arch/powerpc/include/asm/ppc_asm.h |  6 ++--
 arch/powerpc/kernel/misc.S         | 61 --------------------------------------
 arch/powerpc/kernel/misc_32.S      | 22 ++++++++++++++
 arch/powerpc/kernel/misc_64.S      | 61 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 85 insertions(+), 65 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 2b31632..e29b649 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -82,10 +82,8 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
 #else
 #define SAVE_GPR(n, base)	stw	n,GPR0+4*(n)(base)
 #define REST_GPR(n, base)	lwz	n,GPR0+4*(n)(base)
-#define SAVE_NVGPRS(base)	SAVE_GPR(13, base); SAVE_8GPRS(14, base); \
-				SAVE_10GPRS(22, base)
-#define REST_NVGPRS(base)	REST_GPR(13, base); REST_8GPRS(14, base); \
-				REST_10GPRS(22, base)
+#define SAVE_NVGPRS(base)	stmw	13, GPR0+4*13(base)
+#define REST_NVGPRS(base)	lmw	13, GPR0+4*13(base)
 #endif
 
 #define SAVE_2GPRS(n, base)	SAVE_GPR(n, base); SAVE_GPR(n+1, base)
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 7ce26d4..9de71d8 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -53,64 +53,3 @@ _GLOBAL(add_reloc_offset)
 
 	.align	3
 2:	PPC_LONG 1b
-
-_GLOBAL(setjmp)
-	mflr	r0
-	PPC_STL	r0,0(r3)
-	PPC_STL	r1,SZL(r3)
-	PPC_STL	r2,2*SZL(r3)
-	mfcr	r0
-	PPC_STL	r0,3*SZL(r3)
-	PPC_STL	r13,4*SZL(r3)
-	PPC_STL	r14,5*SZL(r3)
-	PPC_STL	r15,6*SZL(r3)
-	PPC_STL	r16,7*SZL(r3)
-	PPC_STL	r17,8*SZL(r3)
-	PPC_STL	r18,9*SZL(r3)
-	PPC_STL	r19,10*SZL(r3)
-	PPC_STL	r20,11*SZL(r3)
-	PPC_STL	r21,12*SZL(r3)
-	PPC_STL	r22,13*SZL(r3)
-	PPC_STL	r23,14*SZL(r3)
-	PPC_STL	r24,15*SZL(r3)
-	PPC_STL	r25,16*SZL(r3)
-	PPC_STL	r26,17*SZL(r3)
-	PPC_STL	r27,18*SZL(r3)
-	PPC_STL	r28,19*SZL(r3)
-	PPC_STL	r29,20*SZL(r3)
-	PPC_STL	r30,21*SZL(r3)
-	PPC_STL	r31,22*SZL(r3)
-	li	r3,0
-	blr
-
-_GLOBAL(longjmp)
-	PPC_LCMPI r4,0
-	bne	1f
-	li	r4,1
-1:	PPC_LL	r13,4*SZL(r3)
-	PPC_LL	r14,5*SZL(r3)
-	PPC_LL	r15,6*SZL(r3)
-	PPC_LL	r16,7*SZL(r3)
-	PPC_LL	r17,8*SZL(r3)
-	PPC_LL	r18,9*SZL(r3)
-	PPC_LL	r19,10*SZL(r3)
-	PPC_LL	r20,11*SZL(r3)
-	PPC_LL	r21,12*SZL(r3)
-	PPC_LL	r22,13*SZL(r3)
-	PPC_LL	r23,14*SZL(r3)
-	PPC_LL	r24,15*SZL(r3)
-	PPC_LL	r25,16*SZL(r3)
-	PPC_LL	r26,17*SZL(r3)
-	PPC_LL	r27,18*SZL(r3)
-	PPC_LL	r28,19*SZL(r3)
-	PPC_LL	r29,20*SZL(r3)
-	PPC_LL	r30,21*SZL(r3)
-	PPC_LL	r31,22*SZL(r3)
-	PPC_LL	r0,3*SZL(r3)
-	mtcrf	0x38,r0
-	PPC_LL	r0,0(r3)
-	PPC_LL	r1,SZL(r3)
-	PPC_LL	r2,2*SZL(r3)
-	mtlr	r0
-	mr	r3,r4
-	blr
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index d9c912b..de419e9 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -1086,3 +1086,25 @@ relocate_new_kernel_end:
 relocate_new_kernel_size:
 	.long relocate_new_kernel_end - relocate_new_kernel
 #endif
+
+_GLOBAL(setjmp)
+	mflr	r0
+	li	r3, 0
+	stw	r0, 0(r3)
+	stw	r1, 4(r3)
+	stw	r2, 8(r3)
+	mfcr	r12
+	stmw	r12, 12(r3)
+	blr
+
+_GLOBAL(longjmp)
+	lwz	r0, 0(r3)
+	lwz	r1, 4(r3)
+	lwz	r2, 8(r3)
+	lmw	r12, 12(r3)
+	mtcrf	0x38, r12
+	mtlr	r0
+	mr.	r3, r4
+	bnelr
+	li	r3, 1
+	blr
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index f28754c..7e25249 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -701,3 +701,64 @@ _GLOBAL(kexec_sequence)
 	li	r5,0
 	blr	/* image->start(physid, image->start, 0); */
 #endif /* CONFIG_KEXEC */
+
+_GLOBAL(setjmp)
+	mflr	r0
+	PPC_STL	r0,0(r3)
+	PPC_STL	r1,SZL(r3)
+	PPC_STL	r2,2*SZL(r3)
+	mfcr	r0
+	PPC_STL	r0,3*SZL(r3)
+	PPC_STL	r13,4*SZL(r3)
+	PPC_STL	r14,5*SZL(r3)
+	PPC_STL	r15,6*SZL(r3)
+	PPC_STL	r16,7*SZL(r3)
+	PPC_STL	r17,8*SZL(r3)
+	PPC_STL	r18,9*SZL(r3)
+	PPC_STL	r19,10*SZL(r3)
+	PPC_STL	r20,11*SZL(r3)
+	PPC_STL	r21,12*SZL(r3)
+	PPC_STL	r22,13*SZL(r3)
+	PPC_STL	r23,14*SZL(r3)
+	PPC_STL	r24,15*SZL(r3)
+	PPC_STL	r25,16*SZL(r3)
+	PPC_STL	r26,17*SZL(r3)
+	PPC_STL	r27,18*SZL(r3)
+	PPC_STL	r28,19*SZL(r3)
+	PPC_STL	r29,20*SZL(r3)
+	PPC_STL	r30,21*SZL(r3)
+	PPC_STL	r31,22*SZL(r3)
+	li	r3,0
+	blr
+
+_GLOBAL(longjmp)
+	PPC_LCMPI r4,0
+	bne	1f
+	li	r4,1
+1:	PPC_LL	r13,4*SZL(r3)
+	PPC_LL	r14,5*SZL(r3)
+	PPC_LL	r15,6*SZL(r3)
+	PPC_LL	r16,7*SZL(r3)
+	PPC_LL	r17,8*SZL(r3)
+	PPC_LL	r18,9*SZL(r3)
+	PPC_LL	r19,10*SZL(r3)
+	PPC_LL	r20,11*SZL(r3)
+	PPC_LL	r21,12*SZL(r3)
+	PPC_LL	r22,13*SZL(r3)
+	PPC_LL	r23,14*SZL(r3)
+	PPC_LL	r24,15*SZL(r3)
+	PPC_LL	r25,16*SZL(r3)
+	PPC_LL	r26,17*SZL(r3)
+	PPC_LL	r27,18*SZL(r3)
+	PPC_LL	r28,19*SZL(r3)
+	PPC_LL	r29,20*SZL(r3)
+	PPC_LL	r30,21*SZL(r3)
+	PPC_LL	r31,22*SZL(r3)
+	PPC_LL	r0,3*SZL(r3)
+	mtcrf	0x38,r0
+	PPC_LL	r0,0(r3)
+	PPC_LL	r1,SZL(r3)
+	PPC_LL	r2,2*SZL(r3)
+	mtlr	r0
+	mr	r3,r4
+	blr
-- 
2.1.0


More information about the Linuxppc-dev mailing list