[PATCH v2 4/8] powerpc/vdso32: inline __get_datapage()

Christophe Leroy christophe.leroy at c-s.fr
Wed Oct 30 03:12:52 AEDT 2019


Hi Santosh,

Le 26/08/2019 à 07:44, Santosh Sivaraj a écrit :
> Hi Christophe,
> 
> Christophe Leroy <christophe.leroy at c-s.fr> writes:
> 
>> __get_datapage() is only a few instructions to retrieve the
>> address of the page where the kernel stores data to the VDSO.
>>
>> By inlining this function into its users, a bl/blr pair and
>> a mflr/mtlr pair is avoided, plus a few reg moves.
>>
>> The improvement is noticeable (about 55 nsec/call on an 8xx)
>>
>> vdsotest before the patch:
>> gettimeofday:    vdso: 731 nsec/call
>> clock-gettime-realtime-coarse:    vdso: 668 nsec/call
>> clock-gettime-monotonic-coarse:    vdso: 745 nsec/call
>>
>> vdsotest after the patch:
>> gettimeofday:    vdso: 677 nsec/call
>> clock-gettime-realtime-coarse:    vdso: 613 nsec/call
>> clock-gettime-monotonic-coarse:    vdso: 690 nsec/call
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy at c-s.fr>
>> ---
>>   arch/powerpc/kernel/vdso32/cacheflush.S   | 10 +++++-----
>>   arch/powerpc/kernel/vdso32/datapage.S     | 29 ++++-------------------------
>>   arch/powerpc/kernel/vdso32/datapage.h     | 11 +++++++++++
>>   arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++-------
>>   4 files changed, 26 insertions(+), 37 deletions(-)
>>   create mode 100644 arch/powerpc/kernel/vdso32/datapage.h
> 
> The datapage.h file should ideally be moved under include/asm, then we can use
> the same for powerpc64 too.

Finally, I added the get_datapage macro to the existing asm/vdso_datapage.h

Christophe

> 
> Santosh
> 
>>
>> diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S
>> index 7f882e7b9f43..e9453837e4ee 100644
>> --- a/arch/powerpc/kernel/vdso32/cacheflush.S
>> +++ b/arch/powerpc/kernel/vdso32/cacheflush.S
>> @@ -10,6 +10,8 @@
>>   #include <asm/vdso.h>
>>   #include <asm/asm-offsets.h>
>>   
>> +#include "datapage.h"
>> +
>>   	.text
>>   
>>   /*
>> @@ -24,14 +26,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
>>     .cfi_startproc
>>   	mflr	r12
>>     .cfi_register lr,r12
>> -	mr	r11,r3
>> -	bl	__get_datapage at local
>> +	get_datapage	r10, r0
>>   	mtlr	r12
>> -	mr	r10,r3
>>   
>>   	lwz	r7,CFG_DCACHE_BLOCKSZ(r10)
>>   	addi	r5,r7,-1
>> -	andc	r6,r11,r5		/* round low to line bdy */
>> +	andc	r6,r3,r5		/* round low to line bdy */
>>   	subf	r8,r6,r4		/* compute length */
>>   	add	r8,r8,r5		/* ensure we get enough */
>>   	lwz	r9,CFG_DCACHE_LOGBLOCKSZ(r10)
>> @@ -48,7 +48,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
>>   
>>   	lwz	r7,CFG_ICACHE_BLOCKSZ(r10)
>>   	addi	r5,r7,-1
>> -	andc	r6,r11,r5		/* round low to line bdy */
>> +	andc	r6,r3,r5		/* round low to line bdy */
>>   	subf	r8,r6,r4		/* compute length */
>>   	add	r8,r8,r5
>>   	lwz	r9,CFG_ICACHE_LOGBLOCKSZ(r10)
>> diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
>> index 6984125b9fc0..d480d2d4a3fe 100644
>> --- a/arch/powerpc/kernel/vdso32/datapage.S
>> +++ b/arch/powerpc/kernel/vdso32/datapage.S
>> @@ -11,34 +11,13 @@
>>   #include <asm/unistd.h>
>>   #include <asm/vdso.h>
>>   
>> +#include "datapage.h"
>> +
>>   	.text
>>   	.global	__kernel_datapage_offset;
>>   __kernel_datapage_offset:
>>   	.long	0
>>   
>> -V_FUNCTION_BEGIN(__get_datapage)
>> -  .cfi_startproc
>> -	/* We don't want that exposed or overridable as we want other objects
>> -	 * to be able to bl directly to here
>> -	 */
>> -	.protected __get_datapage
>> -	.hidden __get_datapage
>> -
>> -	mflr	r0
>> -  .cfi_register lr,r0
>> -
>> -	bcl	20,31,data_page_branch
>> -data_page_branch:
>> -	mflr	r3
>> -	mtlr	r0
>> -	addi	r3, r3, __kernel_datapage_offset-data_page_branch
>> -	lwz	r0,0(r3)
>> -  .cfi_restore lr
>> -	add	r3,r0,r3
>> -	blr
>> -  .cfi_endproc
>> -V_FUNCTION_END(__get_datapage)
>> -
>>   /*
>>    * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
>>    *
>> @@ -53,7 +32,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
>>   	mflr	r12
>>     .cfi_register lr,r12
>>   	mr	r4,r3
>> -	bl	__get_datapage at local
>> +	get_datapage	r3, r0
>>   	mtlr	r12
>>   	addi	r3,r3,CFG_SYSCALL_MAP32
>>   	cmpli	cr0,r4,0
>> @@ -74,7 +53,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
>>     .cfi_startproc
>>   	mflr	r12
>>     .cfi_register lr,r12
>> -	bl	__get_datapage at local
>> +	get_datapage	r3, r0
>>   	lwz	r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
>>   	lwz	r3,CFG_TB_TICKS_PER_SEC(r3)
>>   	mtlr	r12
>> diff --git a/arch/powerpc/kernel/vdso32/datapage.h b/arch/powerpc/kernel/vdso32/datapage.h
>> new file mode 100644
>> index 000000000000..74f4f57c2da8
>> --- /dev/null
>> +++ b/arch/powerpc/kernel/vdso32/datapage.h
>> @@ -0,0 +1,11 @@
>> +/* SPDX-License-Identifier: GPL-2.0-or-later */
>> +
>> +.macro get_datapage ptr, tmp
>> +	bcl	20,31,.+4
>> +	mflr	\ptr
>> +	addi	\ptr, \ptr, __kernel_datapage_offset - (.-4)
>> +	lwz	\tmp, 0(\ptr)
>> +	add	\ptr, \tmp, \ptr
>> +.endm
>> +
>> +
>> diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
>> index 355b537d327a..3e55cba19f44 100644
>> --- a/arch/powerpc/kernel/vdso32/gettimeofday.S
>> +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
>> @@ -12,6 +12,8 @@
>>   #include <asm/asm-offsets.h>
>>   #include <asm/unistd.h>
>>   
>> +#include "datapage.h"
>> +
>>   /* Offset for the low 32-bit part of a field of long type */
>>   #ifdef CONFIG_PPC64
>>   #define LOPART	4
>> @@ -35,8 +37,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
>>   
>>   	mr	r10,r3			/* r10 saves tv */
>>   	mr	r11,r4			/* r11 saves tz */
>> -	bl	__get_datapage at local	/* get data page */
>> -	mr	r9, r3			/* datapage ptr in r9 */
>> +	get_datapage	r9, r0
>>   	cmplwi	r10,0			/* check if tv is NULL */
>>   	beq	3f
>>   	lis	r7,1000000 at ha		/* load up USEC_PER_SEC */
>> @@ -82,8 +83,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
>>   	mflr	r12			/* r12 saves lr */
>>     .cfi_register lr,r12
>>   	mr	r11,r4			/* r11 saves tp */
>> -	bl	__get_datapage at local	/* get data page */
>> -	mr	r9,r3			/* datapage ptr in r9 */
>> +	get_datapage	r9, r0
>>   	lis	r7,NSEC_PER_SEC at h	/* want nanoseconds */
>>   	ori	r7,r7,NSEC_PER_SEC at l
>>   	beq	cr5, .Lcoarse_clocks
>> @@ -208,7 +208,7 @@ V_FUNCTION_BEGIN(__kernel_clock_getres)
>>   
>>   	mflr	r12
>>     .cfi_register lr,r12
>> -	bl	__get_datapage at local	/* get data page */
>> +	get_datapage	r3, r0
>>   	lwz	r5, CLOCK_HRTIMER_RES(r3)
>>   	mtlr	r12
>>   	li	r3,0
>> @@ -242,8 +242,7 @@ V_FUNCTION_BEGIN(__kernel_time)
>>     .cfi_register lr,r12
>>   
>>   	mr	r11,r3			/* r11 holds t */
>> -	bl	__get_datapage at local
>> -	mr	r9, r3			/* datapage ptr in r9 */
>> +	get_datapage	r9, r0
>>   
>>   	lwz	r3,STAMP_XTIME+TSPEC_TV_SEC(r9)
>>   
>> -- 
>> 2.13.3


More information about the Linuxppc-dev mailing list