[PATCH 1/2] powerpc: fix KUAP warning in VMX usercopy path

Christophe Leroy (CS GROUP) chleroy at kernel.org
Fri Feb 27 20:22:28 AEDT 2026


Hi,

Le 17/02/2026 à 13:44, Sayali Patil a écrit :
> [Vous ne recevez pas souvent de courriers de sayalip at linux.ibm.com. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ]
> 
> On powerpc with PREEMPT_FULL or PREEMPT_LAZY and function tracing enabled,
> KUAP warnings can be triggered from the VMX usercopy path under memory
> stress workloads.
> 
> KUAP requires that no subfunctions are called once userspace access has
> been enabled. The existing VMX copy implementation violates this
> requirement by invoking enter_vmx_usercopy() from the assembly path after
> userspace access has already been enabled. If preemption occurs
> in this window, the AMR state may not be preserved correctly,
> leading to unexpected userspace access state and resulting in
> KUAP warnings.
> 
> Fix this by moving VMX selection and enter_vmx_usercopy()/
> exit_vmx_usercopy() handling into the raw_copy_{to,from,in}_user()
> wrappers in uaccess.h. The new flow is:
> 
>    - Decide whether to use the VMX path based on size and CPU capability
>    - Call enter_vmx_usercopy() before enabling userspace access
>    - Enable userspace access and perform the VMX copy
>    - Disable userspace access
>    - Call exit_vmx_usercopy()
>    - Fall back to the base copy routine if the VMX copy faults
> 
> With this change, the VMX assembly routines no longer perform VMX state
> management or call helper functions; they only implement the
> copy operations.
> The previous feature-section based VMX selection inside
> __copy_tofrom_user_power7() is removed, and a dedicated
> __copy_tofrom_user_power7_vmx() entry point is introduced.
> 
> This ensures correct KUAP ordering, avoids subfunction calls
> while KUAP is unlocked, and eliminates the warnings while preserving
> the VMX fast path.

You patch conflicts with the changes for adding masked user access.

Can you rebase on top of v7.0-rc1 ?

Comments below

> 
> Fixes: de78a9c42a79 ("powerpc: Add a framework for Kernel Userspace Access Protection")
> Reported-by: Shrikanth Hegde <sshegde at linux.ibm.com>
> Closes: https://lore.kernel.org/all/20260109064917.777587-2-sshegde@linux.ibm.com/
> Suggested-by: Christophe Leroy (CS GROUP) <chleroy at kernel.org>
> Co-developed-by: Aboorva Devarajan <aboorvad at linux.ibm.com>
> Signed-off-by: Aboorva Devarajan <aboorvad at linux.ibm.com>
> Signed-off-by: Sayali Patil <sayalip at linux.ibm.com>
> ---
>   arch/powerpc/include/asm/uaccess.h | 67 ++++++++++++++++++++++++++++++
>   arch/powerpc/lib/copyuser_64.S     |  1 +
>   arch/powerpc/lib/copyuser_power7.S | 45 +++++++-------------
>   arch/powerpc/lib/vmx-helper.c      |  2 +
>   4 files changed, 85 insertions(+), 30 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
> index 784a00e681fa..52e4a784d148 100644
> --- a/arch/powerpc/include/asm/uaccess.h
> +++ b/arch/powerpc/include/asm/uaccess.h
> @@ -13,6 +13,11 @@
>   #define TASK_SIZE_MAX          TASK_SIZE_USER64
>   #endif
> 
> +#ifdef CONFIG_ALTIVEC

remove the ifdef to avoid matching ifdef later

> +/* Threshold above which VMX copy path is used */
> +#define VMX_COPY_THRESHOLD 3328
> +#endif
> +
>   #include <asm-generic/access_ok.h>
> 
>   /*
> @@ -323,12 +328,42 @@ do {                                                              \
>   extern unsigned long __copy_tofrom_user(void __user *to,
>                  const void __user *from, unsigned long size);
> 
> +extern unsigned long __copy_tofrom_user_base(void __user *to,
> +               const void __user *from, unsigned long size);
> +

extern keywork is pointless for function prototypes, don't add new ones.

> +#ifdef CONFIG_ALTIVEC

Remove the ifdef

> +extern unsigned long __copy_tofrom_user_power7_vmx(void __user *to,
> +               const void __user *from, unsigned long size);
> +
> +static inline bool will_use_vmx(unsigned long n)
> +{
> +       return cpu_has_feature(CPU_FTR_VMX_COPY) &&
> +               n > VMX_COPY_THRESHOLD;

Change to

	return IS_ENABLED(CONFIG_ALTIVEC) && cpu_has_feature(CPU_FTR_VMX_COPY) 
&&  n > VMX_COPY_THRESHOLD;

Then will_use_vmx() will return false when CONFIG_ALTIVEC is not set

> +}
> +#endif
> +
>   #ifdef __powerpc64__
>   static inline unsigned long
>   raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
>   {
>          unsigned long ret;
> 
> +#ifdef CONFIG_ALTIVEC

Remove the ifdef, will_use_vmx() will return false with the above change 
when CONFIG_ALTIVEC is not set

> +       if (will_use_vmx(n) && enter_vmx_usercopy()) {
> +               allow_read_write_user(to, from, n);
> +               ret = __copy_tofrom_user_power7_vmx(to, from, n);
> +               prevent_read_write_user(to, from, n);
> +               exit_vmx_usercopy();
> +               if (unlikely(ret)) {
> +                       allow_read_write_user(to, from, n);
> +                       ret = __copy_tofrom_user_base(to, from, n);
> +                       prevent_read_write_user(to, from, n);
> +               }
> +
> +               return ret;
> +       }

This block is starting to be a bit big for an inline function.
I think we should just have:

	if (will_use_vmx(n))
		return __copy_tofrom_user_vmx()

and then define a __copy_tofrom_user_vmx() in for instance 
arch/powerpc/lib/vmx-helper.c

This would also avoid having to export enter_vmx_usercopy() and 
exit_vmx_usercopy()

Christophe

> +#endif
> +
>          allow_read_write_user(to, from, n);
>          ret = __copy_tofrom_user(to, from, n);
>          prevent_read_write_user(to, from, n);
> @@ -341,6 +376,22 @@ static inline unsigned long raw_copy_from_user(void *to,
>   {
>          unsigned long ret;
> 
> +#ifdef CONFIG_ALTIVEC
> +       if (will_use_vmx(n) && enter_vmx_usercopy()) {
> +               allow_read_from_user(from, n);
> +               ret = __copy_tofrom_user_power7_vmx((__force void __user *)to, from, n);
> +               prevent_read_from_user(from, n);
> +               exit_vmx_usercopy();
> +               if (unlikely(ret)) {
> +                       allow_read_from_user(from, n);
> +                       ret = __copy_tofrom_user_base((__force void __user *)to, from, n);
> +                       prevent_read_from_user(from, n);
> +               }
> +
> +               return ret;
> +       }
> +#endif
> +
>          allow_read_from_user(from, n);
>          ret = __copy_tofrom_user((__force void __user *)to, from, n);
>          prevent_read_from_user(from, n);
> @@ -352,6 +403,22 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
>   {
>          unsigned long ret;
> 
> +#ifdef CONFIG_ALTIVEC
> +       if (will_use_vmx(n) && enter_vmx_usercopy()) {
> +               allow_write_to_user(to, n);
> +               ret = __copy_tofrom_user_power7_vmx(to, (__force const void __user *)from, n);
> +               prevent_write_to_user(to, n);
> +               exit_vmx_usercopy();
> +               if (unlikely(ret)) {
> +                       allow_write_to_user(to, n);
> +                       ret = __copy_tofrom_user_base(to, (__force const void __user *)from, n);
> +                       prevent_write_to_user(to, n);
> +               }
> +
> +               return ret;
> +       }
> +#endif
> +
>          allow_write_to_user(to, n);
>          ret = __copy_tofrom_user(to, (__force const void __user *)from, n);
>          prevent_write_to_user(to, n);
> diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
> index 9af969d2cc0c..25a99108caff 100644
> --- a/arch/powerpc/lib/copyuser_64.S
> +++ b/arch/powerpc/lib/copyuser_64.S
> @@ -562,3 +562,4 @@ exc;        std     r10,32(3)
>          li      r5,4096
>          b       .Ldst_aligned
>   EXPORT_SYMBOL(__copy_tofrom_user)
> +EXPORT_SYMBOL(__copy_tofrom_user_base)
> diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
> index 8474c682a178..17dbcfbae25f 100644
> --- a/arch/powerpc/lib/copyuser_power7.S
> +++ b/arch/powerpc/lib/copyuser_power7.S
> @@ -5,13 +5,9 @@
>    *
>    * Author: Anton Blanchard <anton at au.ibm.com>
>    */
> +#include <linux/export.h>
>   #include <asm/ppc_asm.h>
> 
> -#ifndef SELFTEST_CASE
> -/* 0 == don't use VMX, 1 == use VMX */
> -#define SELFTEST_CASE  0
> -#endif
> -
>   #ifdef __BIG_ENDIAN__
>   #define LVS(VRT,RA,RB)         lvsl    VRT,RA,RB
>   #define VPERM(VRT,VRA,VRB,VRC) vperm   VRT,VRA,VRB,VRC
> @@ -47,10 +43,14 @@
>          ld      r15,STK_REG(R15)(r1)
>          ld      r14,STK_REG(R14)(r1)
>   .Ldo_err3:
> -       bl      CFUNC(exit_vmx_usercopy)
> +       ld      r6,STK_REG(R31)(r1)     /* original destination pointer */
> +       ld      r5,STK_REG(R29)(r1)     /* original number of bytes */
> +       subf    r7,r6,r3                /* #bytes copied */
> +       subf    r3,r7,r5                /* #bytes not copied in r3 */
>          ld      r0,STACKFRAMESIZE+16(r1)
>          mtlr    r0
> -       b       .Lexit
> +       addi    r1,r1,STACKFRAMESIZE
> +       blr
>   #endif /* CONFIG_ALTIVEC */
> 
>   .Ldo_err2:
> @@ -74,7 +74,6 @@
> 
>   _GLOBAL(__copy_tofrom_user_power7)
>          cmpldi  r5,16
> -       cmpldi  cr1,r5,3328
> 
>          std     r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
>          std     r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
> @@ -82,12 +81,6 @@ _GLOBAL(__copy_tofrom_user_power7)
> 
>          blt     .Lshort_copy
> 
> -#ifdef CONFIG_ALTIVEC
> -test_feature = SELFTEST_CASE
> -BEGIN_FTR_SECTION
> -       bgt     cr1,.Lvmx_copy
> -END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
> -#endif
> 
>   .Lnonvmx_copy:
>          /* Get the source 8B aligned */
> @@ -263,23 +256,14 @@ err1;     stb     r0,0(r3)
>   15:    li      r3,0
>          blr
> 
> -.Lunwind_stack_nonvmx_copy:
> -       addi    r1,r1,STACKFRAMESIZE
> -       b       .Lnonvmx_copy
> -
> -.Lvmx_copy:
>   #ifdef CONFIG_ALTIVEC
> +_GLOBAL(__copy_tofrom_user_power7_vmx)
>          mflr    r0
>          std     r0,16(r1)
>          stdu    r1,-STACKFRAMESIZE(r1)
> -       bl      CFUNC(enter_vmx_usercopy)
> -       cmpwi   cr1,r3,0
> -       ld      r0,STACKFRAMESIZE+16(r1)
> -       ld      r3,STK_REG(R31)(r1)
> -       ld      r4,STK_REG(R30)(r1)
> -       ld      r5,STK_REG(R29)(r1)
> -       mtlr    r0
> 
> +       std     r3,STK_REG(R31)(r1)
> +       std     r5,STK_REG(R29)(r1)
>          /*
>           * We prefetch both the source and destination using enhanced touch
>           * instructions. We use a stream ID of 0 for the load side and
> @@ -300,8 +284,6 @@ err1;       stb     r0,0(r3)
> 
>          DCBT_SETUP_STREAMS(r6, r7, r9, r10, r8)
> 
> -       beq     cr1,.Lunwind_stack_nonvmx_copy
> -
>          /*
>           * If source and destination are not relatively aligned we use a
>           * slower permute loop.
> @@ -478,7 +460,8 @@ err3;       lbz     r0,0(r4)
>   err3;  stb     r0,0(r3)
> 
>   15:    addi    r1,r1,STACKFRAMESIZE
> -       b       CFUNC(exit_vmx_usercopy)        /* tail call optimise */
> +       li r3,0
> +       blr
> 
>   .Lvmx_unaligned_copy:
>          /* Get the destination 16B aligned */
> @@ -681,5 +664,7 @@ err3;       lbz     r0,0(r4)
>   err3;  stb     r0,0(r3)
> 
>   15:    addi    r1,r1,STACKFRAMESIZE
> -       b       CFUNC(exit_vmx_usercopy)        /* tail call optimise */
> +       li r3,0
> +       blr
> +EXPORT_SYMBOL(__copy_tofrom_user_power7_vmx)
>   #endif /* CONFIG_ALTIVEC */
> diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c
> index 54340912398f..554b248002b4 100644
> --- a/arch/powerpc/lib/vmx-helper.c
> +++ b/arch/powerpc/lib/vmx-helper.c
> @@ -27,6 +27,7 @@ int enter_vmx_usercopy(void)
> 
>          return 1;
>   }
> +EXPORT_SYMBOL(enter_vmx_usercopy);
> 
>   /*
>    * This function must return 0 because we tail call optimise when calling
> @@ -49,6 +50,7 @@ int exit_vmx_usercopy(void)
>                  set_dec(1);
>          return 0;
>   }
> +EXPORT_SYMBOL(exit_vmx_usercopy);
> 
>   int enter_vmx_ops(void)
>   {
> --
> 2.52.0
> 



More information about the Linuxppc-dev mailing list