[PATCH v2 3/4] powerpc/64: system call remove non-volatile GPR save optimisation

Christophe Leroy christophe.leroy at c-s.fr
Wed Aug 28 19:02:51 AEST 2019



Le 27/08/2019 à 15:55, Nicholas Piggin a écrit :
> powerpc has an optimisation where interrupts avoid saving the
> non-volatile (or callee saved) registers to the interrupt stack frame if
> they are not required.
> 
> Two problems with this are that an interrupt does not always know
> whether it will need non-volatiles; and if it does need them, they can
> only be saved from the entry-scoped asm code (because we don't control
> what the C compiler does with these registers).
> 
> system calls are the most difficult: some system calls always require
> all registers (e.g., fork, to copy regs into the child).  Sometimes
> registers are only required under certain conditions (e.g., tracing,
> signal delivery). These cases require ugly logic in the call chains
> (e.g., ppc_fork), and require a lot of logic to be implemented in asm.

Do you really find it ugly to just call function nvgprs() before calling 
sys_fork() ? I guess there are things a lot uglier.

> 
> So remove the optimisation for system calls, and always save NVGPRs on
> entry. Modern high performance CPUs are not so sensitive, because the
> stores are dense in cache and can be hidden by other expensive work in
> the syscall path -- the null syscall selftests benchmark on POWER9 is
> not slowed (124.40ns before and 123.64ns after, i.e., within the noise).

I did the test on PPC32:

On an 885, null_syscall reports 2227ns (132MHz)
If saving non-volatile regs, it goes to 2419, ie +8.6%

On an 8321, null_syscall reports 1021ns (333MHz)
If saving non-volatile regs, it goes to 1100, ie +7.7%

So unless going to C compensates this degradation, I guess it is not 
worth it on PPC32.

> 
> Other interrupts retain the NVGPR optimisation for now.
> 
> Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
> ---
> Changes since v1:
> - Improve changelog
> - Fix clone3 spu table entry (Segher)
> 
>   arch/powerpc/kernel/entry_64.S           | 72 +++++-------------------
>   arch/powerpc/kernel/syscalls/syscall.tbl | 22 +++++---
>   2 files changed, 28 insertions(+), 66 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> index 6467bdab8d40..5a3e0b5c9ad1 100644
> --- a/arch/powerpc/kernel/entry_64.S
> +++ b/arch/powerpc/kernel/entry_64.S
> @@ -98,13 +98,14 @@ END_BTB_FLUSH_SECTION
>   	std	r11,_XER(r1)
>   	std	r11,_CTR(r1)
>   	std	r9,GPR13(r1)
> +	SAVE_NVGPRS(r1)
>   	mflr	r10
>   	/*
>   	 * This clears CR0.SO (bit 28), which is the error indication on
>   	 * return from this system call.
>   	 */
>   	rldimi	r2,r11,28,(63-28)
> -	li	r11,0xc01
> +	li	r11,0xc00
>   	std	r10,_LINK(r1)
>   	std	r11,_TRAP(r1)
>   	std	r3,ORIG_GPR3(r1)
> @@ -323,7 +324,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
>   
>   /* Traced system call support */
>   .Lsyscall_dotrace:
> -	bl	save_nvgprs
>   	addi	r3,r1,STACK_FRAME_OVERHEAD
>   	bl	do_syscall_trace_enter
>   
> @@ -408,7 +408,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
>   	mtmsrd	r10,1
>   #endif /* CONFIG_PPC_BOOK3E */
>   
> -	bl	save_nvgprs
>   	addi	r3,r1,STACK_FRAME_OVERHEAD
>   	bl	do_syscall_trace_leave
>   	b	ret_from_except
> @@ -442,62 +441,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
>   _ASM_NOKPROBE_SYMBOL(system_call_common);
>   _ASM_NOKPROBE_SYMBOL(system_call_exit);
>   
> -/* Save non-volatile GPRs, if not already saved. */
> -_GLOBAL(save_nvgprs)
> -	ld	r11,_TRAP(r1)
> -	andi.	r0,r11,1
> -	beqlr-
> -	SAVE_NVGPRS(r1)
> -	clrrdi	r0,r11,1
> -	std	r0,_TRAP(r1)
> -	blr
> -_ASM_NOKPROBE_SYMBOL(save_nvgprs);

I see it is added back somewhere below. Why don't you leave it where it is ?

> -
> -	
> -/*
> - * The sigsuspend and rt_sigsuspend system calls can call do_signal
> - * and thus put the process into the stopped state where we might
> - * want to examine its user state with ptrace.  Therefore we need
> - * to save all the nonvolatile registers (r14 - r31) before calling
> - * the C code.  Similarly, fork, vfork and clone need the full
> - * register state on the stack so that it can be copied to the child.
> - */
> -
> -_GLOBAL(ppc_fork)
> -	bl	save_nvgprs
> -	bl	sys_fork
> -	b	.Lsyscall_exit
> -
> -_GLOBAL(ppc_vfork)
> -	bl	save_nvgprs
> -	bl	sys_vfork
> -	b	.Lsyscall_exit
> -
> -_GLOBAL(ppc_clone)
> -	bl	save_nvgprs
> -	bl	sys_clone
> -	b	.Lsyscall_exit
> -
> -_GLOBAL(ppc_clone3)
> -       bl      save_nvgprs
> -       bl      sys_clone3
> -       b       .Lsyscall_exit
> -
> -_GLOBAL(ppc32_swapcontext)
> -	bl	save_nvgprs
> -	bl	compat_sys_swapcontext
> -	b	.Lsyscall_exit
> -
> -_GLOBAL(ppc64_swapcontext)
> -	bl	save_nvgprs
> -	bl	sys_swapcontext
> -	b	.Lsyscall_exit
> -
> -_GLOBAL(ppc_switch_endian)
> -	bl	save_nvgprs
> -	bl	sys_switch_endian
> -	b	.Lsyscall_exit
> -
>   _GLOBAL(ret_from_fork)
>   	bl	schedule_tail
>   	REST_NVGPRS(r1)
> @@ -516,6 +459,17 @@ _GLOBAL(ret_from_kernel_thread)
>   	li	r3,0
>   	b	.Lsyscall_exit
>   
> +/* Save non-volatile GPRs, if not already saved. */
> +_GLOBAL(save_nvgprs)
> +	ld	r11,_TRAP(r1)
> +	andi.	r0,r11,1
> +	beqlr-
> +	SAVE_NVGPRS(r1)
> +	clrrdi	r0,r11,1
> +	std	r0,_TRAP(r1)
> +	blr
> +_ASM_NOKPROBE_SYMBOL(save_nvgprs);
> +

Moved here.

>   #ifdef CONFIG_PPC_BOOK3S_64
>   
>   #define FLUSH_COUNT_CACHE	\

Christophe



More information about the Linuxppc-dev mailing list