[PATCH 2/2] powerpc/32s: Unroll kuep_lock and kuep_unlock macros

Christophe Leroy christophe.leroy at csgroup.eu
Wed Jan 20 18:34:00 AEDT 2021


Unroll the loops in kuep_lock and kuep_unlock.

Benchmarked on an mpc 8321 with a standard kernel having a
3M/1M user/kernel memory split, i.e. 12 segments for user.

Without KUEP, null_syscall benchmark is 220 cycles.
With KUEP, null_syscall benchmark is 439 cycles.

Once loops are unrolled, null_syscall benchmark is 366 cycles.
This is almost 17% reduction.

It is assumed that userspace covers at least 4 segments and
at most 14 segments.

The isync is removed, it saves 8 cycles. For kuep_unlock, the rfi
will do the synchronisation. For kuep_lock, we get a small window
during which exec is still possible, but is won't last more than a
few instructions.

Both macros are called two times so the size increase is in
the noise (approx 120 instructions).

Signed-off-by: Christophe Leroy <christophe.leroy at csgroup.eu>
---
 arch/powerpc/include/asm/book3s/32/kup.h | 67 ++++++++++++++++++------
 1 file changed, 52 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index a0117a9d5b06..e800b515ac02 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -7,21 +7,61 @@
 
 #ifdef __ASSEMBLY__
 
-.macro kuep_update_sr	gpr1, gpr2		/* NEVER use r0 as gpr2 due to addis */
-101:	mtsrin	\gpr1, \gpr2
-	addi	\gpr1, \gpr1, 0x111		/* next VSID */
-	rlwinm	\gpr1, \gpr1, 0, 0xf0ffffff	/* clear VSID overflow */
-	addis	\gpr2, \gpr2, 0x1000		/* address of next segment */
-	bdnz	101b
-	isync
+.macro kuep_increment gpr1, gpr2
+	addi	\gpr1, \gpr1, 0x222		/* Next second VSID */
+	addi	\gpr2, \gpr2, 0x222		/* Next second VSID */
+	rlwinm	\gpr1, \gpr1, 0, 0xf0ffffff	/* Clear VSID overflow */
+	rlwinm	\gpr2, \gpr2, 0, 0xf0ffffff	/* Clear VSID overflow */
+.endm
+
+.macro kuep_update_sr gpr1, gpr2		/* NEVER use r0 as gpr1 or gpr2 due to addi */
+	addi	\gpr2, \gpr1, 0x111		/* Next VSID */
+	rlwinm	\gpr2, \gpr2, 0, 0xf0ffffff	/* Clear VSID overflow */
+	mtsr	0, \gpr1
+	mtsr	1, \gpr2
+	kuep_increment \gpr1, \gpr2
+	mtsr	2, \gpr1
+	mtsr	3, \gpr2
+#if NUM_USER_SEGMENTS > 4
+	kuep_increment \gpr1, \gpr2
+	mtsr	4, \gpr1
+#if NUM_USER_SEGMENTS > 5
+	mtsr	5, \gpr2
+#if NUM_USER_SEGMENTS > 6
+	kuep_increment \gpr1, \gpr2
+	mtsr	6, \gpr1
+#if NUM_USER_SEGMENTS > 7
+	mtsr	7, \gpr2
+#if NUM_USER_SEGMENTS > 8
+	kuep_increment \gpr1, \gpr2
+	mtsr	8, \gpr1
+#if NUM_USER_SEGMENTS > 9
+	mtsr	9, \gpr2
+#if NUM_USER_SEGMENTS > 10
+	kuep_increment \gpr1, \gpr2
+	mtsr	10, \gpr1
+#if NUM_USER_SEGMENTS > 11
+	mtsr	11, \gpr2
+#if NUM_USER_SEGMENTS > 12
+	kuep_increment \gpr1, \gpr2
+	mtsr	12, \gpr1
+#if NUM_USER_SEGMENTS > 13
+	mtsr	13, \gpr2
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
 .endm
 
 .macro kuep_lock	gpr1, gpr2
 #ifdef CONFIG_PPC_KUEP
-	li	\gpr1, NUM_USER_SEGMENTS
-	li	\gpr2, 0
-	mtctr	\gpr1
-	mfsrin	\gpr1, \gpr2
+	mfsr	\gpr1, 0
 	oris	\gpr1, \gpr1, SR_NX at h		/* set Nx */
 	kuep_update_sr \gpr1, \gpr2
 #endif
@@ -29,10 +69,7 @@
 
 .macro kuep_unlock	gpr1, gpr2
 #ifdef CONFIG_PPC_KUEP
-	li	\gpr1, NUM_USER_SEGMENTS
-	li	\gpr2, 0
-	mtctr	\gpr1
-	mfsrin	\gpr1, \gpr2
+	mfsr	\gpr1, 0
 	rlwinm	\gpr1, \gpr1, 0, ~SR_NX		/* Clear Nx */
 	kuep_update_sr \gpr1, \gpr2
 #endif
-- 
2.25.0



More information about the Linuxppc-dev mailing list