[PATCH 1/2] powerpc: fix KUAP warning in VMX usercopy path

Sayali Patil sayalip at linux.ibm.com
Tue Feb 17 23:44:56 AEDT 2026


On powerpc with PREEMPT_FULL or PREEMPT_LAZY and function tracing enabled,
KUAP warnings can be triggered from the VMX usercopy path under memory
stress workloads.

KUAP requires that no subfunctions are called once userspace access has
been enabled. The existing VMX copy implementation violates this
requirement by invoking enter_vmx_usercopy() from the assembly path after
userspace access has already been enabled. If preemption occurs
in this window, the AMR state may not be preserved correctly,
leading to unexpected userspace access state and resulting in
KUAP warnings.

Fix this by moving VMX selection and enter_vmx_usercopy()/
exit_vmx_usercopy() handling into the raw_copy_{to,from,in}_user()
wrappers in uaccess.h. The new flow is:

  - Decide whether to use the VMX path based on size and CPU capability
  - Call enter_vmx_usercopy() before enabling userspace access
  - Enable userspace access and perform the VMX copy
  - Disable userspace access
  - Call exit_vmx_usercopy()
  - Fall back to the base copy routine if the VMX copy faults

With this change, the VMX assembly routines no longer perform VMX state
management or call helper functions; they only implement the
copy operations.
The previous feature-section based VMX selection inside
__copy_tofrom_user_power7() is removed, and a dedicated
__copy_tofrom_user_power7_vmx() entry point is introduced.

This ensures correct KUAP ordering, avoids subfunction calls
while KUAP is unlocked, and eliminates the warnings while preserving
the VMX fast path.

Fixes: de78a9c42a79 ("powerpc: Add a framework for Kernel Userspace Access Protection")
Reported-by: Shrikanth Hegde <sshegde at linux.ibm.com>
Closes: https://lore.kernel.org/all/20260109064917.777587-2-sshegde@linux.ibm.com/
Suggested-by: Christophe Leroy (CS GROUP) <chleroy at kernel.org>
Co-developed-by: Aboorva Devarajan <aboorvad at linux.ibm.com>
Signed-off-by: Aboorva Devarajan <aboorvad at linux.ibm.com>
Signed-off-by: Sayali Patil <sayalip at linux.ibm.com>
---
 arch/powerpc/include/asm/uaccess.h | 67 ++++++++++++++++++++++++++++++
 arch/powerpc/lib/copyuser_64.S     |  1 +
 arch/powerpc/lib/copyuser_power7.S | 45 +++++++-------------
 arch/powerpc/lib/vmx-helper.c      |  2 +
 4 files changed, 85 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 784a00e681fa..52e4a784d148 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -13,6 +13,11 @@
 #define TASK_SIZE_MAX		TASK_SIZE_USER64
 #endif
 
+#ifdef CONFIG_ALTIVEC
+/* Threshold above which VMX copy path is used */
+#define VMX_COPY_THRESHOLD 3328
+#endif
+
 #include <asm-generic/access_ok.h>
 
 /*
@@ -323,12 +328,42 @@ do {								\
 extern unsigned long __copy_tofrom_user(void __user *to,
 		const void __user *from, unsigned long size);
 
+extern unsigned long __copy_tofrom_user_base(void __user *to,
+		const void __user *from, unsigned long size);
+
+#ifdef CONFIG_ALTIVEC
+extern unsigned long __copy_tofrom_user_power7_vmx(void __user *to,
+		const void __user *from, unsigned long size);
+
+static inline bool will_use_vmx(unsigned long n)
+{
+	return cpu_has_feature(CPU_FTR_VMX_COPY) &&
+		n > VMX_COPY_THRESHOLD;
+}
+#endif
+
 #ifdef __powerpc64__
 static inline unsigned long
 raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
 {
 	unsigned long ret;
 
+#ifdef CONFIG_ALTIVEC
+	if (will_use_vmx(n) && enter_vmx_usercopy()) {
+		allow_read_write_user(to, from, n);
+		ret = __copy_tofrom_user_power7_vmx(to, from, n);
+		prevent_read_write_user(to, from, n);
+		exit_vmx_usercopy();
+		if (unlikely(ret)) {
+			allow_read_write_user(to, from, n);
+			ret = __copy_tofrom_user_base(to, from, n);
+			prevent_read_write_user(to, from, n);
+		}
+
+		return ret;
+	}
+#endif
+
 	allow_read_write_user(to, from, n);
 	ret = __copy_tofrom_user(to, from, n);
 	prevent_read_write_user(to, from, n);
@@ -341,6 +376,22 @@ static inline unsigned long raw_copy_from_user(void *to,
 {
 	unsigned long ret;
 
+#ifdef CONFIG_ALTIVEC
+	if (will_use_vmx(n) && enter_vmx_usercopy()) {
+		allow_read_from_user(from, n);
+		ret = __copy_tofrom_user_power7_vmx((__force void __user *)to, from, n);
+		prevent_read_from_user(from, n);
+		exit_vmx_usercopy();
+		if (unlikely(ret)) {
+			allow_read_from_user(from, n);
+			ret = __copy_tofrom_user_base((__force void __user *)to, from, n);
+			prevent_read_from_user(from, n);
+		}
+
+		return ret;
+	}
+#endif
+
 	allow_read_from_user(from, n);
 	ret = __copy_tofrom_user((__force void __user *)to, from, n);
 	prevent_read_from_user(from, n);
@@ -352,6 +403,22 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	unsigned long ret;
 
+#ifdef CONFIG_ALTIVEC
+	if (will_use_vmx(n) && enter_vmx_usercopy()) {
+		allow_write_to_user(to, n);
+		ret = __copy_tofrom_user_power7_vmx(to, (__force const void __user *)from, n);
+		prevent_write_to_user(to, n);
+		exit_vmx_usercopy();
+		if (unlikely(ret)) {
+			allow_write_to_user(to, n);
+			ret = __copy_tofrom_user_base(to, (__force const void __user *)from, n);
+			prevent_write_to_user(to, n);
+		}
+
+		return ret;
+	}
+#endif
+
 	allow_write_to_user(to, n);
 	ret = __copy_tofrom_user(to, (__force const void __user *)from, n);
 	prevent_write_to_user(to, n);
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
index 9af969d2cc0c..25a99108caff 100644
--- a/arch/powerpc/lib/copyuser_64.S
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -562,3 +562,4 @@ exc;	std	r10,32(3)
 	li	r5,4096
 	b	.Ldst_aligned
 EXPORT_SYMBOL(__copy_tofrom_user)
+EXPORT_SYMBOL(__copy_tofrom_user_base)
diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
index 8474c682a178..17dbcfbae25f 100644
--- a/arch/powerpc/lib/copyuser_power7.S
+++ b/arch/powerpc/lib/copyuser_power7.S
@@ -5,13 +5,9 @@
  *
  * Author: Anton Blanchard <anton at au.ibm.com>
  */
+#include <linux/export.h>
 #include <asm/ppc_asm.h>
 
-#ifndef SELFTEST_CASE
-/* 0 == don't use VMX, 1 == use VMX */
-#define SELFTEST_CASE	0
-#endif
-
 #ifdef __BIG_ENDIAN__
 #define LVS(VRT,RA,RB)		lvsl	VRT,RA,RB
 #define VPERM(VRT,VRA,VRB,VRC)	vperm	VRT,VRA,VRB,VRC
@@ -47,10 +43,14 @@
 	ld	r15,STK_REG(R15)(r1)
 	ld	r14,STK_REG(R14)(r1)
 .Ldo_err3:
-	bl	CFUNC(exit_vmx_usercopy)
+	ld      r6,STK_REG(R31)(r1)	/* original destination pointer */
+	ld      r5,STK_REG(R29)(r1)	/* original number of bytes */
+	subf    r7,r6,r3		/* #bytes copied */
+	subf    r3,r7,r5		/* #bytes not copied in r3 */
 	ld	r0,STACKFRAMESIZE+16(r1)
 	mtlr	r0
-	b	.Lexit
+	addi    r1,r1,STACKFRAMESIZE
+	blr
 #endif /* CONFIG_ALTIVEC */
 
 .Ldo_err2:
@@ -74,7 +74,6 @@
 
 _GLOBAL(__copy_tofrom_user_power7)
 	cmpldi	r5,16
-	cmpldi	cr1,r5,3328
 
 	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
 	std	r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
@@ -82,12 +81,6 @@ _GLOBAL(__copy_tofrom_user_power7)
 
 	blt	.Lshort_copy
 
-#ifdef CONFIG_ALTIVEC
-test_feature = SELFTEST_CASE
-BEGIN_FTR_SECTION
-	bgt	cr1,.Lvmx_copy
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif
 
 .Lnonvmx_copy:
 	/* Get the source 8B aligned */
@@ -263,23 +256,14 @@ err1;	stb	r0,0(r3)
 15:	li	r3,0
 	blr
 
-.Lunwind_stack_nonvmx_copy:
-	addi	r1,r1,STACKFRAMESIZE
-	b	.Lnonvmx_copy
-
-.Lvmx_copy:
 #ifdef CONFIG_ALTIVEC
+_GLOBAL(__copy_tofrom_user_power7_vmx)
 	mflr	r0
 	std	r0,16(r1)
 	stdu	r1,-STACKFRAMESIZE(r1)
-	bl	CFUNC(enter_vmx_usercopy)
-	cmpwi	cr1,r3,0
-	ld	r0,STACKFRAMESIZE+16(r1)
-	ld	r3,STK_REG(R31)(r1)
-	ld	r4,STK_REG(R30)(r1)
-	ld	r5,STK_REG(R29)(r1)
-	mtlr	r0
 
+	std     r3,STK_REG(R31)(r1)
+	std     r5,STK_REG(R29)(r1)
 	/*
 	 * We prefetch both the source and destination using enhanced touch
 	 * instructions. We use a stream ID of 0 for the load side and
@@ -300,8 +284,6 @@ err1;	stb	r0,0(r3)
 
 	DCBT_SETUP_STREAMS(r6, r7, r9, r10, r8)
 
-	beq	cr1,.Lunwind_stack_nonvmx_copy
-
 	/*
 	 * If source and destination are not relatively aligned we use a
 	 * slower permute loop.
@@ -478,7 +460,8 @@ err3;	lbz	r0,0(r4)
 err3;	stb	r0,0(r3)
 
 15:	addi	r1,r1,STACKFRAMESIZE
-	b	CFUNC(exit_vmx_usercopy)	/* tail call optimise */
+	li r3,0
+	blr
 
 .Lvmx_unaligned_copy:
 	/* Get the destination 16B aligned */
@@ -681,5 +664,7 @@ err3;	lbz	r0,0(r4)
 err3;	stb	r0,0(r3)
 
 15:	addi	r1,r1,STACKFRAMESIZE
-	b	CFUNC(exit_vmx_usercopy)	/* tail call optimise */
+	li r3,0
+	blr
+EXPORT_SYMBOL(__copy_tofrom_user_power7_vmx)
 #endif /* CONFIG_ALTIVEC */
diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c
index 54340912398f..554b248002b4 100644
--- a/arch/powerpc/lib/vmx-helper.c
+++ b/arch/powerpc/lib/vmx-helper.c
@@ -27,6 +27,7 @@ int enter_vmx_usercopy(void)
 
 	return 1;
 }
+EXPORT_SYMBOL(enter_vmx_usercopy);
 
 /*
  * This function must return 0 because we tail call optimise when calling
@@ -49,6 +50,7 @@ int exit_vmx_usercopy(void)
 		set_dec(1);
 	return 0;
 }
+EXPORT_SYMBOL(exit_vmx_usercopy);
 
 int enter_vmx_ops(void)
 {
-- 
2.52.0



More information about the Linuxppc-dev mailing list