[PATCH v2 10/14] powerpc: Provide syscall wrapper

Andrew Donnellan ajd at linux.ibm.com
Wed Aug 10 20:32:58 AEST 2022


On Mon, 2022-07-25 at 16:30 +1000, Rohan McLure wrote:
> Implement syscall wrapper as per s390, x86, arm64. When enabled
> cause handlers to accept parameters from a stack frame rather than
> from user scratch register state. This allows for user registers to
> be
> safely cleared in order to reduce caller influence on speculation
> within syscall routine. The wrapper is a macro that emits syscall
> handler symbols that call into the target handler, obtaining its
> parameters from a struct pt_regs on the stack.
> 
> As registers are already saved to the stack prior to calling
> system_call_exception, it appears that this function is executed more
> efficiently with the new stack-pointer convention than with
> parameters
> passed by registers, avoiding the allocation of a stack frame for
> this
> method. On a 32-bit system, we see >20% performance increases on the
> null_syscall microbenchmark, and on a Power 8 the performance gains
> amortise the cost of clearing and restoring registers which is
> implemented at the end of this series, seeing final result of ~5.6%
> performance improvement on null_syscall.
> 
> Syscalls are wrapped in this fashion on all platforms except for the
> Cell processor as this commit does not provide SPU support. This can
> be
> quickly fixed in a successive patch, but requires spu_sys_callback to
> allocate a pt_regs structure to satisfy the wrapped calling
> convention.
> 
> Co-developed-by: Andrew Donnellan <ajd at linux.ibm.com>
> Signed-off-by: Andrew Donnellan <ajd at linux.ibm.com>
> Signed-off-by: Rohan McLure <rmclure at linux.ibm.com>

[This patch needs some trivial changes to rebase on top of commit
1547db7d1f44 ("powerpc: Move system_call_exception() to syscall.c")]

> ---
> V1 -> V2: Generate prototypes for symbols produced by the wrapper.
> ---

Thanks for fixing this!

>  arch/powerpc/Kconfig                       |  1 +
>  arch/powerpc/include/asm/interrupt.h       |  3 +-
>  arch/powerpc/include/asm/syscall.h         |  4 +
>  arch/powerpc/include/asm/syscall_wrapper.h | 94 ++++++++++++++++++++
>  arch/powerpc/include/asm/syscalls.h        | 25 +++++-
>  arch/powerpc/kernel/entry_32.S             |  6 +-
>  arch/powerpc/kernel/interrupt.c            | 31 +++----
>  arch/powerpc/kernel/interrupt_64.S         | 30 +++----
>  arch/powerpc/kernel/systbl.c               |  2 +
>  arch/powerpc/kernel/vdso.c                 |  2 +
>  10 files changed, 156 insertions(+), 42 deletions(-)
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 26331cdd3642..447bd34b5b87 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -137,6 +137,7 @@ config PPC
>         select ARCH_HAS_STRICT_KERNEL_RWX       if (PPC_BOOK3S ||
> PPC_8xx || 40x) && !HIBERNATION
>         select ARCH_HAS_STRICT_KERNEL_RWX       if FSL_BOOKE &&
> !HIBERNATION && !RANDOMIZE_BASE
>         select ARCH_HAS_STRICT_MODULE_RWX       if
> ARCH_HAS_STRICT_KERNEL_RWX
> +       select ARCH_HAS_SYSCALL_WRAPPER         if !SPU_BASE
>         select ARCH_HAS_TICK_BROADCAST          if
> GENERIC_CLOCKEVENTS_BROADCAST
>         select ARCH_HAS_UACCESS_FLUSHCACHE
>         select ARCH_HAS_UBSAN_SANITIZE_ALL
> diff --git a/arch/powerpc/include/asm/interrupt.h
> b/arch/powerpc/include/asm/interrupt.h
> index 8069dbc4b8d1..3f9cad81585c 100644
> --- a/arch/powerpc/include/asm/interrupt.h
> +++ b/arch/powerpc/include/asm/interrupt.h
> @@ -665,8 +665,7 @@ static inline void
> interrupt_cond_local_irq_enable(struct pt_regs *regs)
>                 local_irq_enable();
>  }
>  
> -long system_call_exception(long r3, long r4, long r5, long r6, long
> r7, long r8,
> -                          unsigned long r0, struct pt_regs *regs);
> +long system_call_exception(unsigned long r0, struct pt_regs *regs);
>  notrace unsigned long syscall_exit_prepare(unsigned long r3, struct
> pt_regs *regs, long scv);
>  notrace unsigned long interrupt_exit_user_prepare(struct pt_regs
> *regs);
>  notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs
> *regs);
> diff --git a/arch/powerpc/include/asm/syscall.h
> b/arch/powerpc/include/asm/syscall.h
> index d2a8dfd5de33..3dd36c5e334a 100644
> --- a/arch/powerpc/include/asm/syscall.h
> +++ b/arch/powerpc/include/asm/syscall.h
> @@ -14,8 +14,12 @@
>  #include <linux/sched.h>
>  #include <linux/thread_info.h>
>  
> +#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
> +typedef long (*syscall_fn)(const struct pt_regs *);
> +#else
>  typedef long (*syscall_fn)(unsigned long, unsigned long, unsigned
> long,
>                            unsigned long, unsigned long, unsigned
> long);
> +#endif
>  
>  /* ftrace syscalls requires exporting the sys_call_table */
>  extern const syscall_fn sys_call_table[];
> diff --git a/arch/powerpc/include/asm/syscall_wrapper.h
> b/arch/powerpc/include/asm/syscall_wrapper.h
> new file mode 100644
> index 000000000000..ebeffcb08d3d
> --- /dev/null
> +++ b/arch/powerpc/include/asm/syscall_wrapper.h
> @@ -0,0 +1,94 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * syscall_wrapper.h - powerpc specific wrappers to syscall
> definitions
> + *
> + * Based on arch/{x86,arm64}/include/asm/syscall_wrapper.h
> + */
> +
> +#ifndef __ASM_SYSCALL_WRAPPER_H
> +#define __ASM_SYSCALL_WRAPPER_H
> +
> +struct pt_regs;
> +
> +#define SC_POWERPC_REGS_TO_ARGS(x,
> ...)                                \
> +       __MAP(x,__SC_ARGS                                       \
> +             ,,regs->gpr[3],,regs->gpr[4],,regs->gpr[5]        \
> +             ,,regs->gpr[6],,regs->gpr[7],,regs->gpr[8])
> +
> +#ifdef CONFIG_COMPAT
> +
> +#define COMPAT_SYSCALL_DEFINEx(x, name,
> ...)                                           \
> +       asmlinkage long __powerpc_compat_sys##name(const struct
> pt_regs *regs);         \
> +       ALLOW_ERROR_INJECTION(__powerpc_compat_sys##name,
> ERRNO);                       \
> +       static long
> __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));              \
> +       static inline long
> __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));       \
> +       asmlinkage long __powerpc_compat_sys##name(const struct
> pt_regs *regs)          \
> +       {                                                            
>                    \
> +               return
> __se_compat_sys##name(SC_POWERPC_REGS_TO_ARGS(x,__VA_ARGS__));   \
> +       }                                                            
>                    \
> +       static long
> __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))               \
> +       {                                                            
>                    \
> +               return
> __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));        \
> +       }                                                            
>                    \
> +       static inline long
> __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
> +
> +#define
> COMPAT_SYSCALL_DEFINE0(sname)                                        
>           \
> +       asmlinkage long __powerpc_compat_sys_##sname(const struct
> pt_regs *__unused);   \
> +       ALLOW_ERROR_INJECTION(__powerpc_compat_sys_##sname,
> ERRNO);                     \
> +       asmlinkage long __powerpc_compat_sys_##sname(const struct
> pt_regs *__unused)
> +
> +#define
> COND_SYSCALL_COMPAT(name)                                            
>           \
> +       long __powerpc_compat_sys_##name(const struct pt_regs
> *regs);                   \
> +       asmlinkage long __weak __powerpc_compat_sys_##name(const
> struct pt_regs *regs)  \
> +       {                                                            
>                    \
> +               return
> sys_ni_syscall();                                                \
> +       }
> +#define COMPAT_SYS_NI(name) \
> +       SYSCALL_ALIAS(__powerpc_compat_sys_##name,
> sys_ni_posix_timers);
> +
> +#endif /* CONFIG_COMPAT */
> +
> +#define __SYSCALL_DEFINEx(x, name,
> ...)                                                \
> +       asmlinkage long __powerpc_sys##name(const struct pt_regs
> *regs);        \
> +       ALLOW_ERROR_INJECTION(__powerpc_sys##name,
> ERRNO);                      \
> +       long
> sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));                         \
> +       static long
> __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));             \
> +       static inline long
> __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));      \
> +       asmlinkage long __powerpc_sys##name(const struct pt_regs
> *regs)         \
> +       {                                                            
>            \
> +               return
> __se_sys##name(SC_POWERPC_REGS_TO_ARGS(x,__VA_ARGS__));  \
> +       }                                                            
>            \
> +       long
> sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))                          \
> +       {                                                            
>            \
> +               return
> __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));          \
> +       }                                                            
>            \
> +       static long
> __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))              \
> +       {                                                            
>            \
> +               long ret =
> __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));      \
> +               __MAP(x,__SC_TEST,__VA_ARGS__);                      
>            \
> +               __PROTECT(x,
> ret,__MAP(x,__SC_ARGS,__VA_ARGS__));               \
> +               return
> ret;                                                     \
> +       }                                                            
>            \
> +       static inline long
> __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
> +
> +#define
> SYSCALL_DEFINE0(sname)                                               
>   \
> +       SYSCALL_METADATA(_##sname,
> 0);                                          \
> +       long
> sys_##sname(void);                                                 \
> +       asmlinkage long __powerpc_sys_##sname(const struct pt_regs
> *__unused);  \
> +       ALLOW_ERROR_INJECTION(__powerpc_sys_##sname,
> ERRNO);                    \
> +       long
> sys_##sname(void)                                                  \
> +       {                                                            
>            \
> +               return
> __powerpc_sys_##sname(NULL);                             \
> +       }                                                            
>            \
> +       asmlinkage long __powerpc_sys_##sname(const struct pt_regs
> *__unused)
> +
> +#define
> COND_SYSCALL(name)                                                   
>   \
> +       long __powerpc_sys_##name(const struct pt_regs
> *regs);                  \
> +       asmlinkage long __weak __powerpc_sys_##name(const struct
> pt_regs *regs) \
> +       {                                                            
>            \
> +               return
> sys_ni_syscall();                                        \
> +       }
> +
> +#define SYS_NI(name) SYSCALL_ALIAS(__powerpc_sys_##name,
> sys_ni_posix_timers);
> +
> +#endif /* __ASM_SYSCALL_WRAPPER_H */
> diff --git a/arch/powerpc/include/asm/syscalls.h
> b/arch/powerpc/include/asm/syscalls.h
> index fbeab8e88c5e..7b66f9fdfa04 100644
> --- a/arch/powerpc/include/asm/syscalls.h
> +++ b/arch/powerpc/include/asm/syscalls.h
> @@ -15,6 +15,12 @@
>  #include <asm/unistd.h>
>  #include <asm/ucontext.h>
>  
> +#ifndef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
> +asmlinkage long sys_ni_syscall(void);
> +#else
> +asmlinkage long sys_ni_syscall(const struct pt_regs *regs);
> +#endif
> +
>  struct rtas_args;
>  
>  #ifndef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
> @@ -24,7 +30,6 @@ struct rtas_args;
>   */
>  
>  asmlinkage long sys_rtas(struct rtas_args __user *uargs);
> -asmlinkage long sys_ni_syscall(void);
>  
>  #ifdef CONFIG_PPC64
>  asmlinkage long sys_ppc64_personality(unsigned long personality);
> @@ -95,6 +100,24 @@ asmlinkage long
> compat_sys_ppc_sync_file_range2(int fd, unsigned int flags,
>                                                 unsigned int
> nbytes2);
>  #endif /* CONFIG_COMPAT */
>  
> +#else
> +
> +#define __SYSCALL_WITH_COMPAT(nr, native, compat)      __SYSCALL(nr,
> native)
> +#define __SYSCALL(nr, entry) \
> +       asmlinkage long __powerpc_##entry(const struct pt_regs
> *regs);
> +
> +#ifdef CONFIG_PPC64
> +#include <asm/syscall_table_64.h>
> +#else
> +#include <asm/syscall_table_32.h>
> +#endif /* CONFIG_PPC64 */
> +
> +#ifdef CONFIG_COMPAT
> +#undef __SYSCALL_WITH_COMPAT
> +#define __SYSCALL_WITH_COMPAT(nr, native, compat)      __SYSCALL(nr,
> compat)
> +#include <asm/syscall_table_32.h>
> +#endif /* CONFIG_COMPAT */
> +
>  #endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */
>  
>  #endif /* __KERNEL__ */
> diff --git a/arch/powerpc/kernel/entry_32.S
> b/arch/powerpc/kernel/entry_32.S
> index 1d599df6f169..be66040f7708 100644
> --- a/arch/powerpc/kernel/entry_32.S
> +++ b/arch/powerpc/kernel/entry_32.S
> @@ -121,9 +121,9 @@ transfer_to_syscall:
>         SAVE_NVGPRS(r1)
>         kuep_lock
>  
> -       /* Calling convention has r9 = orig r0, r10 = regs */
> -       addi    r10,r1,STACK_FRAME_OVERHEAD
> -       mr      r9,r0
> +       /* Calling convention has r3 = orig r0, r4 = regs */
> +       addi    r4,r1,STACK_FRAME_OVERHEAD
> +       mr      r3,r0
>         bl      system_call_exception
>  
>  ret_from_syscall:
> diff --git a/arch/powerpc/kernel/interrupt.c
> b/arch/powerpc/kernel/interrupt.c
> index 62e636e0f684..faa94a7e1edc 100644
> --- a/arch/powerpc/kernel/interrupt.c
> +++ b/arch/powerpc/kernel/interrupt.c
> @@ -72,15 +72,13 @@ static notrace __always_inline bool
> prep_irq_for_enabled_exit(bool restartable)
>  }
>  
>  /* Has to run notrace because it is entered not completely
> "reconciled" */
> -notrace long system_call_exception(long r3, long r4, long r5,
> -                                  long r6, long r7, long r8,
> -                                  unsigned long r0, struct pt_regs
> *regs)
> +notrace long system_call_exception(unsigned long r0, struct pt_regs
> *regs)
>  {
>         syscall_fn f;
>  
>         kuap_lock();
>  
> -       regs->orig_gpr3 = r3;
> +       regs->orig_gpr3 = regs->gpr[3];
>  
>         if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
>                 BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
> @@ -194,12 +192,6 @@ notrace long system_call_exception(long r3, long
> r4, long r5,
>                 r0 = do_syscall_trace_enter(regs);
>                 if (unlikely(r0 >= NR_syscalls))
>                         return regs->gpr[3];
> -               r3 = regs->gpr[3];
> -               r4 = regs->gpr[4];
> -               r5 = regs->gpr[5];
> -               r6 = regs->gpr[6];
> -               r7 = regs->gpr[7];
> -               r8 = regs->gpr[8];
>  
>         } else if (unlikely(r0 >= NR_syscalls)) {
>                 if (unlikely(trap_is_unsupported_scv(regs))) {
> @@ -216,18 +208,23 @@ notrace long system_call_exception(long r3,
> long r4, long r5,
>         if (unlikely(is_compat_task())) {
>                 f = (void *)compat_sys_call_table[r0];
>  
> -               r3 &= 0x00000000ffffffffULL;
> -               r4 &= 0x00000000ffffffffULL;
> -               r5 &= 0x00000000ffffffffULL;
> -               r6 &= 0x00000000ffffffffULL;
> -               r7 &= 0x00000000ffffffffULL;
> -               r8 &= 0x00000000ffffffffULL;
> +               regs->gpr[3] &= 0x00000000ffffffffULL;
> +               regs->gpr[4] &= 0x00000000ffffffffULL;
> +               regs->gpr[5] &= 0x00000000ffffffffULL;
> +               regs->gpr[6] &= 0x00000000ffffffffULL;
> +               regs->gpr[7] &= 0x00000000ffffffffULL;
> +               regs->gpr[8] &= 0x00000000ffffffffULL;
>  
>         } else {
>                 f = (void *)sys_call_table[r0];
>         }
>  
> -       return f(r3, r4, r5, r6, r7, r8);
> +       #ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
> +       return f(regs);
> +       #else
> +       return f(regs->gpr[3], regs->gpr[4], regs->gpr[5],
> +                regs->gpr[6], regs->gpr[7], regs->gpr[8]);
> +       #endif

Kernel coding style generally aligns #ifdefs to column 0

>  }
>  
>  static notrace void booke_load_dbcr0(void)
> diff --git a/arch/powerpc/kernel/interrupt_64.S
> b/arch/powerpc/kernel/interrupt_64.S
> index ce25b28cf418..3e8a811e09c4 100644
> --- a/arch/powerpc/kernel/interrupt_64.S
> +++ b/arch/powerpc/kernel/interrupt_64.S
> @@ -91,9 +91,9 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
>         li      r11,\trapnr
>         std     r11,_TRAP(r1)
>         std     r12,_CCR(r1)
> -       addi    r10,r1,STACK_FRAME_OVERHEAD
> +       addi    r4,r1,STACK_FRAME_OVERHEAD
>         ld      r11,exception_marker at toc(r2)
> -       std     r11,-16(r10)            /* "regshere" marker */
> +       std     r11,-16(r4)             /* "regshere" marker */
>  
>  BEGIN_FTR_SECTION
>         HMT_MEDIUM
> @@ -108,8 +108,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
>          * but this is the best we can do.
>          */
>  
> -       /* Calling convention has r9 = orig r0, r10 = regs */
> -       mr      r9,r0
> +       /* Calling convention has r3 = orig r0, r4 = regs */
> +       mr      r3,r0
>         bl      system_call_exception
>  
>  .Lsyscall_vectored_\name\()_exit:
> @@ -275,9 +275,9 @@ END_BTB_FLUSH_SECTION
>         std     r10,_LINK(r1)
>         std     r11,_TRAP(r1)
>         std     r12,_CCR(r1)
> -       addi    r10,r1,STACK_FRAME_OVERHEAD
> +       addi    r4,r1,STACK_FRAME_OVERHEAD
>         ld      r11,exception_marker at toc(r2)
> -       std     r11,-16(r10)            /* "regshere" marker */
> +       std     r11,-16(r4)             /* "regshere" marker */
>  
>  #ifdef CONFIG_PPC_BOOK3S
>         li      r11,1
> @@ -298,8 +298,8 @@ END_BTB_FLUSH_SECTION
>         wrteei  1
>  #endif
>  
> -       /* Calling convention has r9 = orig r0, r10 = regs */
> -       mr      r9,r0
> +       /* Calling convention has r3 = orig r0, r4 = regs */
> +       mr      r3,r0
>         bl      system_call_exception
>  
>  .Lsyscall_exit:
> @@ -343,16 +343,8 @@
> END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
>         cmpdi   r3,0
>         bne     .Lsyscall_restore_regs
>         /* Zero volatile regs that may contain sensitive kernel data
> */
> -       li      r0,0
> -       li      r4,0
> -       li      r5,0
> -       li      r6,0
> -       li      r7,0
> -       li      r8,0
> -       li      r9,0
> -       li      r10,0
> -       li      r11,0
> -       li      r12,0
> +       NULLIFY_GPR(0)
> +       NULLIFY_GPRS(4, 12)

Move this to a different patch

>         mtctr   r0
>         mtspr   SPRN_XER,r0
>  .Lsyscall_restore_regs_cont:
> @@ -378,7 +370,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
>         REST_NVGPRS(r1)
>         mtctr   r3
>         mtspr   SPRN_XER,r4
> -       ld      r0,GPR0(r1)
> +       REST_GPR(0, r1)

Same for this

>         REST_GPRS(4, 12, r1)
>         b       .Lsyscall_restore_regs_cont
>  .Lsyscall_rst_end:
> diff --git a/arch/powerpc/kernel/systbl.c
> b/arch/powerpc/kernel/systbl.c
> index b88a9c2a1f50..cb05e302ea58 100644
> --- a/arch/powerpc/kernel/systbl.c
> +++ b/arch/powerpc/kernel/systbl.c
> @@ -15,8 +15,10 @@
>  #include <asm/unistd.h>
>  #include <asm/syscalls.h>
>  
> +#undef __SYSCALL_WITH_COMPAT
>  #define __SYSCALL_WITH_COMPAT(nr, entry, compat) __SYSCALL(nr,
> entry)
>  
> +#undef __SYSCALL
>  #ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
>  #define __SYSCALL(nr, entry) [nr] = __powerpc_##entry,
>  #define __powerpc_sys_ni_syscall       sys_ni_syscall
> diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
> index 080c9e7de0c0..900dc8ed1f14 100644
> --- a/arch/powerpc/kernel/vdso.c
> +++ b/arch/powerpc/kernel/vdso.c
> @@ -39,6 +39,8 @@
>  extern char vdso32_start, vdso32_end;
>  extern char vdso64_start, vdso64_end;
>  
> +asmlinkage long sys_ni_syscall(void);
> +
>  /*
>   * The vdso data page (aka. systemcfg for old ppc64 fans) is here.
>   * Once the early boot kernel code no longer needs to muck around

-- 
Andrew Donnellan    OzLabs, ADL Canberra
ajd at linux.ibm.com   IBM Australia Limited



More information about the Linuxppc-dev mailing list