[patch V3 02/12] uaccess: Provide ASM GOTO safe wrappers for unsafe_*_user()

Mathieu Desnoyers mathieu.desnoyers at efficios.com
Fri Oct 17 23:43:31 AEDT 2025


On 2025-10-17 06:08, Thomas Gleixner wrote:
> ASM GOTO is miscompiled by GCC when it is used inside a auto cleanup scope:
> 
> bool foo(u32 __user *p, u32 val)
> {
> 	scoped_guard(pagefault)
> 		unsafe_put_user(val, p, efault);
> 	return true;
> efault:
> 	return false;
> }
> 
>   e80:	e8 00 00 00 00       	call   e85 <foo+0x5>
>   e85:	65 48 8b 05 00 00 00 00 mov    %gs:0x0(%rip),%rax
>   e8d:	83 80 04 14 00 00 01 	addl   $0x1,0x1404(%rax)   // pf_disable++
>   e94:	89 37                	mov    %esi,(%rdi)
>   e96:	83 a8 04 14 00 00 01 	subl   $0x1,0x1404(%rax)   // pf_disable--
>   e9d:	b8 01 00 00 00       	mov    $0x1,%eax           // success
>   ea2:	e9 00 00 00 00       	jmp    ea7 <foo+0x27>      // ret
>   ea7:	31 c0                	xor    %eax,%eax           // fail
>   ea9:	e9 00 00 00 00       	jmp    eae <foo+0x2e>      // ret
> 
> which is broken as it leaks the pagefault disable counter on failure.
> 
> Clang at least fails the build.
> 
> Linus suggested to add a local label into the macro scope and let that
> jump to the actual caller supplied error label.
> 
>         	__label__ local_label;                                  \
>          arch_unsafe_get_user(x, ptr, local_label);              \
> 	if (0) {                                                \
> 	local_label:                                            \
> 		goto label;                                     \
> 
> That works for both GCC and clang.
> 
> clang:
> 
>   c80:	0f 1f 44 00 00       	   nopl   0x0(%rax,%rax,1)	
>   c85:	65 48 8b 0c 25 00 00 00 00 mov    %gs:0x0,%rcx
>   c8e:	ff 81 04 14 00 00    	   incl   0x1404(%rcx)	   // pf_disable++
>   c94:	31 c0                	   xor    %eax,%eax        // set retval to false
>   c96:	89 37                      mov    %esi,(%rdi)      // write
>   c98:	b0 01                	   mov    $0x1,%al         // set retval to true
>   c9a:	ff 89 04 14 00 00    	   decl   0x1404(%rcx)     // pf_disable--
>   ca0:	2e e9 00 00 00 00    	   cs jmp ca6 <foo+0x26>   // ret
> 
> The exception table entry points correctly to c9a
> 
> GCC:
> 
>   f70:   e8 00 00 00 00          call   f75 <baz+0x5>
>   f75:   65 48 8b 05 00 00 00 00 mov    %gs:0x0(%rip),%rax
>   f7d:   83 80 04 14 00 00 01    addl   $0x1,0x1404(%rax)  // pf_disable++
>   f84:   8b 17                   mov    (%rdi),%edx
>   f86:   89 16                   mov    %edx,(%rsi)
>   f88:   83 a8 04 14 00 00 01    subl   $0x1,0x1404(%rax) // pf_disable--
>   f8f:   b8 01 00 00 00          mov    $0x1,%eax         // success
>   f94:   e9 00 00 00 00          jmp    f99 <baz+0x29>    // ret
>   f99:   83 a8 04 14 00 00 01    subl   $0x1,0x1404(%rax) // pf_disable--
>   fa0:   31 c0                   xor    %eax,%eax         // fail
>   fa2:   e9 00 00 00 00          jmp    fa7 <baz+0x37>    // ret
> 
> The exception table entry points correctly to f99
> 
> So both compilers optimize out the extra goto and emit correct and
> efficient code.
> 
> Provide a generic wrapper to do that to avoid modifying all the affected
> architecture specific implementation with that workaround.
> 
> The only change required for architectures is to rename unsafe_*_user() to
> arch_unsafe_*_user(). That's done in subsequent changes.
> 
> Suggested-by: Linus Torvalds <torvalds at linux-foundation.org>
> Signed-off-by: Thomas Gleixner <tglx at linutronix.de>

Adding a link to a gcc bugzilla entry may be relevant here. Ditto for
clang if there is a bug tracker entry for this.

Other than that:

Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers at efficios.com>

> ---
>   include/linux/uaccess.h |   72 +++++++++++++++++++++++++++++++++++++++++++++---
>   1 file changed, 68 insertions(+), 4 deletions(-)
> 
> --- a/include/linux/uaccess.h
> +++ b/include/linux/uaccess.h
> @@ -518,7 +518,34 @@ long strncpy_from_user_nofault(char *dst
>   		long count);
>   long strnlen_user_nofault(const void __user *unsafe_addr, long count);
>   
> -#ifndef __get_kernel_nofault
> +#ifdef arch_get_kernel_nofault
> +/*
> + * Wrap the architecture implementation so that @label can be outside of a
> + * cleanup() scope. A regular C goto works correctly, but ASM goto does
> + * not. Clang rejects such an attempt, but GCC silently emits buggy code.
> + */
> +#define __get_kernel_nofault(dst, src, type, label)		\
> +do {								\
> +	__label__ local_label;					\
> +	arch_get_kernel_nofault(dst, src, type, local_label);	\
> +	if (0) {						\
> +	local_label:						\
> +		goto label;					\
> +	}							\
> +} while (0)
> +
> +#define __put_kernel_nofault(dst, src, type, label)		\
> +do {								\
> +	__label__ local_label;					\
> +	arch_get_kernel_nofault(dst, src, type, local_label);	\
> +	if (0) {						\
> +	local_label:						\
> +		goto label;					\
> +	}							\
> +} while (0)
> +
> +#elif !defined(__get_kernel_nofault) /* arch_get_kernel_nofault */
> +
>   #define __get_kernel_nofault(dst, src, type, label)	\
>   do {							\
>   	type __user *p = (type __force __user *)(src);	\
> @@ -535,7 +562,8 @@ do {							\
>   	if (__put_user(data, p))			\
>   		goto label;				\
>   } while (0)
> -#endif
> +
> +#endif  /* !__get_kernel_nofault */
>   
>   /**
>    * get_kernel_nofault(): safely attempt to read from a location
> @@ -549,7 +577,42 @@ do {							\
>   	copy_from_kernel_nofault(&(val), __gk_ptr, sizeof(val));\
>   })
>   
> -#ifndef user_access_begin
> +#ifdef user_access_begin
> +
> +#ifdef arch_unsafe_get_user
> +/*
> + * Wrap the architecture implementation so that @label can be outside of a
> + * cleanup() scope. A regular C goto works correctly, but ASM goto does
> + * not. Clang rejects such an attempt, but GCC silently emits buggy code.
> + *
> + * Some architectures use internal local labels already, but this extra
> + * indirection here is harmless because the compiler optimizes it out
> + * completely in any case. This construct just ensures that the ASM GOTO
> + * target is always in the local scope. The C goto 'label' works correct
> + * when leaving a cleanup() scope.
> + */
> +#define unsafe_get_user(x, ptr, label)			\
> +do {							\
> +	__label__ local_label;				\
> +	arch_unsafe_get_user(x, ptr, local_label);	\
> +	if (0) {					\
> +	local_label:					\
> +		goto label;				\
> +	}						\
> +} while (0)
> +
> +#define unsafe_put_user(x, ptr, label)			\
> +do {							\
> +	__label__ local_label;				\
> +	arch_unsafe_put_user(x, ptr, local_label);	\
> +	if (0) {					\
> +	local_label:					\
> +		goto label;				\
> +	}						\
> +} while (0)
> +#endif /* arch_unsafe_get_user */
> +
> +#else /* user_access_begin */
>   #define user_access_begin(ptr,len) access_ok(ptr, len)
>   #define user_access_end() do { } while (0)
>   #define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0)
> @@ -559,7 +622,8 @@ do {							\
>   #define unsafe_copy_from_user(d,s,l,e) unsafe_op_wrap(__copy_from_user(d,s,l),e)
>   static inline unsigned long user_access_save(void) { return 0UL; }
>   static inline void user_access_restore(unsigned long flags) { }
> -#endif
> +#endif /* !user_access_begin */
> +
>   #ifndef user_write_access_begin
>   #define user_write_access_begin user_access_begin
>   #define user_write_access_end user_access_end
> 


-- 
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com


More information about the Linuxppc-dev mailing list