[PATCH] powerpc: memcpy optimization for 64bit LE

Michael Neuling mikey at neuling.org
Wed Nov 6 17:22:46 EST 2013


Philippe Bergheaud <felix at linux.vnet.ibm.com> wrote:

> Unaligned stores take alignment exceptions on POWER7 running in little-endian.
> This is a dumb little-endian base memcpy that prevents unaligned stores.
> It is replaced by the VMX memcpy at boot.

Is this any faster than the generic version?

Mikey

> 
> Signed-off-by: Philippe Bergheaud <felix at linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/string.h |    4 ----
>  arch/powerpc/kernel/ppc_ksyms.c   |    2 --
>  arch/powerpc/lib/Makefile         |    2 --
>  arch/powerpc/lib/memcpy_64.S      |   19 +++++++++++++++++++
>  4 files changed, 19 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h
> index 0dffad6..e40010a 100644
> --- a/arch/powerpc/include/asm/string.h
> +++ b/arch/powerpc/include/asm/string.h
> @@ -10,9 +10,7 @@
>  #define __HAVE_ARCH_STRNCMP
>  #define __HAVE_ARCH_STRCAT
>  #define __HAVE_ARCH_MEMSET
> -#ifdef __BIG_ENDIAN__
>  #define __HAVE_ARCH_MEMCPY
> -#endif
>  #define __HAVE_ARCH_MEMMOVE
>  #define __HAVE_ARCH_MEMCMP
>  #define __HAVE_ARCH_MEMCHR
> @@ -24,9 +22,7 @@ extern int strcmp(const char *,const char *);
>  extern int strncmp(const char *, const char *, __kernel_size_t);
>  extern char * strcat(char *, const char *);
>  extern void * memset(void *,int,__kernel_size_t);
> -#ifdef __BIG_ENDIAN__
>  extern void * memcpy(void *,const void *,__kernel_size_t);
> -#endif
>  extern void * memmove(void *,const void *,__kernel_size_t);
>  extern int memcmp(const void *,const void *,__kernel_size_t);
>  extern void * memchr(const void *,int,__kernel_size_t);
> diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
> index 526ad5c..0c2dd60 100644
> --- a/arch/powerpc/kernel/ppc_ksyms.c
> +++ b/arch/powerpc/kernel/ppc_ksyms.c
> @@ -147,9 +147,7 @@ EXPORT_SYMBOL(__ucmpdi2);
>  #endif
>  long long __bswapdi2(long long);
>  EXPORT_SYMBOL(__bswapdi2);
> -#ifdef __BIG_ENDIAN__
>  EXPORT_SYMBOL(memcpy);
> -#endif
>  EXPORT_SYMBOL(memset);
>  EXPORT_SYMBOL(memmove);
>  EXPORT_SYMBOL(memcmp);
> diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
> index 5310132..6670361 100644
> --- a/arch/powerpc/lib/Makefile
> +++ b/arch/powerpc/lib/Makefile
> @@ -23,9 +23,7 @@ obj-y			+= checksum_$(CONFIG_WORD_SIZE).o
>  obj-$(CONFIG_PPC64)	+= checksum_wrappers_64.o
>  endif
>  
> -ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),)
>  obj-$(CONFIG_PPC64)		+= memcpy_power7.o memcpy_64.o 
> -endif
>  
>  obj-$(CONFIG_PPC_EMULATE_SSTEP)	+= sstep.o ldstfp.o
>  
> diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
> index d2bbbc8..358cf74 100644
> --- a/arch/powerpc/lib/memcpy_64.S
> +++ b/arch/powerpc/lib/memcpy_64.S
> @@ -12,10 +12,28 @@
>  	.align	7
>  _GLOBAL(memcpy)
>  BEGIN_FTR_SECTION
> +#ifdef __LITTLE_ENDIAN__
> +	cmpdi cr7,r5,0		/* dumb little-endian memcpy */
> +#else
>  	std	r3,48(r1)	/* save destination pointer for return value */
> +#endif
>  FTR_SECTION_ELSE
>  	b	memcpy_power7
>  ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
> +#ifdef __LITTLE_ENDIAN__
> +	addi r5,r5,-1
> +	addi r9,r3,-1
> +	add r5,r3,r5
> +	subf r5,r9,r5
> +	addi r4,r4,-1
> +	mtctr r5
> +	beqlr cr7
> +1:
> +	lbzu r10,1(r4)
> +	stbu r10,1(r9)
> +	bdnz 1b
> +	blr
> +#else
>  	PPC_MTOCRF(0x01,r5)
>  	cmpldi	cr1,r5,16
>  	neg	r6,r3		# LS 3 bits = # bytes to 8-byte dest bdry
> @@ -201,3 +219,4 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
>  	stb	r0,0(r3)
>  4:	ld	r3,48(r1)	/* return dest pointer */
>  	blr
> +#endif
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev at lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
> 


More information about the Linuxppc-dev mailing list