[PATCH] powerpc/vdso64: inline __get_datapage()

Christophe Leroy christophe.leroy at c-s.fr
Wed Aug 21 19:46:38 AEST 2019



Le 21/08/2019 à 11:29, Santosh Sivaraj a écrit :
> __get_datapage() is only a few instructions to retrieve the
> address of the page where the kernel stores data to the VDSO.
> 
> By inlining this function into its users, a bl/blr pair and
> a mflr/mtlr pair is avoided, plus a few reg moves.
> 
> clock-gettime-monotonic: syscall: 514 nsec/call  396 nsec/call
> clock-gettime-monotonic:    libc: 25 nsec/call   24 nsec/call
> clock-gettime-monotonic:    vdso: 20 nsec/call   20 nsec/call
> clock-getres-monotonic: syscall: 347 nsec/call   372 nsec/call
> clock-getres-monotonic:    libc: 19 nsec/call    19 nsec/call
> clock-getres-monotonic:    vdso: 10 nsec/call    10 nsec/call
> clock-gettime-monotonic-coarse: syscall: 511 nsec/call   396 nsec/call
> clock-gettime-monotonic-coarse:    libc: 23 nsec/call    21 nsec/call
> clock-gettime-monotonic-coarse:    vdso: 15 nsec/call    13 nsec/call
> clock-gettime-realtime: syscall: 526 nsec/call   405 nsec/call
> clock-gettime-realtime:    libc: 24 nsec/call    23 nsec/call
> clock-gettime-realtime:    vdso: 18 nsec/call    18 nsec/call
> clock-getres-realtime: syscall: 342 nsec/call    372 nsec/call
> clock-getres-realtime:    libc: 19 nsec/call     19 nsec/call
> clock-getres-realtime:    vdso: 10 nsec/call     10 nsec/call
> clock-gettime-realtime-coarse: syscall: 515 nsec/call    373 nsec/call
> clock-gettime-realtime-coarse:    libc: 23 nsec/call     22 nsec/call
> clock-gettime-realtime-coarse:    vdso: 14 nsec/call     13 nsec/call

I think you should only put the measurements on vdso calls, and only the 
ones that are impacted by the change. For exemple, getres function 
doesn't use __get_datapage so showing it here is pointless.

gettimeofday should be shown there as it uses __get_datapage()


> 
> Based on the patch by Christophe Leroy <christophe.leroy at c-s.fr> for vdso32.
> 
> Signed-off-by: Santosh Sivaraj <santosh at fossix.org>
> ---
> 
> except for a couple of calls (1 or 2 nsec reduction), there are no
> improvements in the call times. Or is 10 nsec the minimum granularity??

Maybe the ones that show no improvements are the ones that don't use 
__get_datapage() at all ...

> 
> So I don't know if its even worth updating vdso64 except to keep vdso32 and
> vdso64 equal.

2ns on a 15ns call is 13% so it is worth it I think.

Christophe


> 
> 
>   arch/powerpc/kernel/vdso64/cacheflush.S   | 10 ++++----
>   arch/powerpc/kernel/vdso64/datapage.S     | 29 ++++-------------------
>   arch/powerpc/kernel/vdso64/datapage.h     | 10 ++++++++
>   arch/powerpc/kernel/vdso64/gettimeofday.S |  8 ++++---
>   4 files changed, 24 insertions(+), 33 deletions(-)
>   create mode 100644 arch/powerpc/kernel/vdso64/datapage.h
> 
> diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S
> index 3f92561a64c4..30e8b0d29bea 100644
> --- a/arch/powerpc/kernel/vdso64/cacheflush.S
> +++ b/arch/powerpc/kernel/vdso64/cacheflush.S
> @@ -10,6 +10,8 @@
>   #include <asm/vdso.h>
>   #include <asm/asm-offsets.h>
>   
> +#include "datapage.h"
> +
>   	.text
>   
>   /*
> @@ -24,14 +26,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
>     .cfi_startproc
>   	mflr	r12
>     .cfi_register lr,r12
> -	mr	r11,r3
> -	bl	V_LOCAL_FUNC(__get_datapage)
> +	get_datapage	r11, r0
>   	mtlr	r12
> -	mr	r10,r3
>   
>   	lwz	r7,CFG_DCACHE_BLOCKSZ(r10)
>   	addi	r5,r7,-1
> -	andc	r6,r11,r5		/* round low to line bdy */
> +	andc	r6,r3,r5		/* round low to line bdy */
>   	subf	r8,r6,r4		/* compute length */
>   	add	r8,r8,r5		/* ensure we get enough */
>   	lwz	r9,CFG_DCACHE_LOGBLOCKSZ(r10)
> @@ -48,7 +48,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
>   
>   	lwz	r7,CFG_ICACHE_BLOCKSZ(r10)
>   	addi	r5,r7,-1
> -	andc	r6,r11,r5		/* round low to line bdy */
> +	andc	r6,r3,r5		/* round low to line bdy */
>   	subf	r8,r6,r4		/* compute length */
>   	add	r8,r8,r5
>   	lwz	r9,CFG_ICACHE_LOGBLOCKSZ(r10)
> diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S
> index dc84f5ae3802..8712f57c931c 100644
> --- a/arch/powerpc/kernel/vdso64/datapage.S
> +++ b/arch/powerpc/kernel/vdso64/datapage.S
> @@ -11,34 +11,13 @@
>   #include <asm/unistd.h>
>   #include <asm/vdso.h>
>   
> +#include "datapage.h"
> +
>   	.text
>   .global	__kernel_datapage_offset;
>   __kernel_datapage_offset:
>   	.long	0
>   
> -V_FUNCTION_BEGIN(__get_datapage)
> -  .cfi_startproc
> -	/* We don't want that exposed or overridable as we want other objects
> -	 * to be able to bl directly to here
> -	 */
> -	.protected __get_datapage
> -	.hidden __get_datapage
> -
> -	mflr	r0
> -  .cfi_register lr,r0
> -
> -	bcl	20,31,data_page_branch
> -data_page_branch:
> -	mflr	r3
> -	mtlr	r0
> -	addi	r3, r3, __kernel_datapage_offset-data_page_branch
> -	lwz	r0,0(r3)
> -  .cfi_restore lr
> -	add	r3,r0,r3
> -	blr
> -  .cfi_endproc
> -V_FUNCTION_END(__get_datapage)
> -
>   /*
>    * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
>    *
> @@ -53,7 +32,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
>   	mflr	r12
>     .cfi_register lr,r12
>   	mr	r4,r3
> -	bl	V_LOCAL_FUNC(__get_datapage)
> +	get_datapage	r3, r0
>   	mtlr	r12
>   	addi	r3,r3,CFG_SYSCALL_MAP64
>   	cmpldi	cr0,r4,0
> @@ -75,7 +54,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
>     .cfi_startproc
>   	mflr	r12
>     .cfi_register lr,r12
> -	bl	V_LOCAL_FUNC(__get_datapage)
> +	get_datapage	r3, r0
>   	ld	r3,CFG_TB_TICKS_PER_SEC(r3)
>   	mtlr	r12
>   	crclr	cr0*4+so
> diff --git a/arch/powerpc/kernel/vdso64/datapage.h b/arch/powerpc/kernel/vdso64/datapage.h
> new file mode 100644
> index 000000000000..f2f0da0f65f3
> --- /dev/null
> +++ b/arch/powerpc/kernel/vdso64/datapage.h
> @@ -0,0 +1,10 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +
> +.macro get_datapage ptr, tmp
> +	bcl	20,31,888f
> +888:
> +	mflr	\ptr
> +	addi	\ptr, \ptr, __kernel_datapage_offset - 888b
> +	lwz	\tmp, 0(\ptr)
> +	add	\ptr, \tmp, \ptr
> +.endm
> diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
> index 07bfe33fe874..7bcc879392cc 100644
> --- a/arch/powerpc/kernel/vdso64/gettimeofday.S
> +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
> @@ -12,6 +12,8 @@
>   #include <asm/asm-offsets.h>
>   #include <asm/unistd.h>
>   
> +#include "datapage.h"
> +
>   	.text
>   /*
>    * Exact prototype of gettimeofday
> @@ -26,7 +28,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
>   
>   	mr	r11,r3			/* r11 holds tv */
>   	mr	r10,r4			/* r10 holds tz */
> -	bl	V_LOCAL_FUNC(__get_datapage)	/* get data page */
> +	get_datapage	r3, r0
>   	cmpldi	r11,0			/* check if tv is NULL */
>   	beq	2f
>   	lis	r7,1000000 at ha		/* load up USEC_PER_SEC */
> @@ -71,7 +73,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
>   	mflr	r12			/* r12 saves lr */
>     .cfi_register lr,r12
>   	mr	r11,r4			/* r11 saves tp */
> -	bl	V_LOCAL_FUNC(__get_datapage)	/* get data page */
> +	get_datapage	r3, r0		/* get data page */
>   	lis	r7,NSEC_PER_SEC at h	/* want nanoseconds */
>   	ori	r7,r7,NSEC_PER_SEC at l
>   	beq	cr5,70f
> @@ -218,7 +220,7 @@ V_FUNCTION_BEGIN(__kernel_time)
>     .cfi_register lr,r12
>   
>   	mr	r11,r3			/* r11 holds t */
> -	bl	V_LOCAL_FUNC(__get_datapage)
> +	get_datapage	r3, r0
>   
>   	ld	r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
>   
> 


More information about the Linuxppc-dev mailing list