[PATCH] powerpc: Add support for popcnt instructions

Benjamin Herrenschmidt benh at kernel.crashing.org
Fri Aug 13 14:29:04 EST 2010


On Fri, 2010-08-13 at 12:28 +1000, Anton Blanchard wrote:
> POWER5 added popcntb, and POWER7 added popcntw and popcntd. As a first step
> this patch does all the work out of line, but it would be nice to implement
> them as inlines with an out of line fallback.
> 
> The performance issue with hweight was noticed when disabling SMT on a large
> (192 thread) POWER7 box. The patch improves that testcase by about 8%.

Especially from modules it will suck big time. If kept out of line they
should probably be linked-in with each module, but I'd rather have them
inlined.

Cheers,
Ben.

> Signed-off-by: Anton Blanchard <anton at samba.org>
> ---
> 
> Index: powerpc.git/arch/powerpc/include/asm/cputable.h
> ===================================================================
> --- powerpc.git.orig/arch/powerpc/include/asm/cputable.h	2010-08-13 11:19:42.691991439 +1000
> +++ powerpc.git/arch/powerpc/include/asm/cputable.h	2010-08-13 11:24:55.510741618 +1000
> @@ -199,6 +199,8 @@ extern const char *powerpc_base_platform
>  #define CPU_FTR_UNALIGNED_LD_STD	LONG_ASM_CONST(0x0080000000000000)
>  #define CPU_FTR_ASYM_SMT		LONG_ASM_CONST(0x0100000000000000)
>  #define CPU_FTR_STCX_CHECKS_ADDRESS	LONG_ASM_CONST(0x0200000000000000)
> +#define CPU_FTR_POPCNTB			LONG_ASM_CONST(0x0400000000000000)
> +#define CPU_FTR_POPCNTD			LONG_ASM_CONST(0x0800000000000000)
>  
>  #ifndef __ASSEMBLY__
>  
> @@ -403,21 +405,22 @@ extern const char *powerpc_base_platform
>  	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
>  	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
>  	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
> -	    CPU_FTR_PURR | CPU_FTR_STCX_CHECKS_ADDRESS)
> +	    CPU_FTR_PURR | CPU_FTR_STCX_CHECKS_ADDRESS | \
> +	    CPU_FTR_POPCNTB)
>  #define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
>  	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
>  	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
>  	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
>  	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
>  	    CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \
> -	    CPU_FTR_STCX_CHECKS_ADDRESS)
> +	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB)
>  #define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
>  	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
>  	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
>  	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
>  	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
>  	    CPU_FTR_DSCR | CPU_FTR_SAO  | CPU_FTR_ASYM_SMT | \
> -	    CPU_FTR_STCX_CHECKS_ADDRESS)
> +	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD)
>  #define CPU_FTRS_CELL	(CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
>  	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
>  	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
> Index: powerpc.git/arch/powerpc/lib/Makefile
> ===================================================================
> --- powerpc.git.orig/arch/powerpc/lib/Makefile	2010-08-13 11:19:43.653241065 +1000
> +++ powerpc.git/arch/powerpc/lib/Makefile	2010-08-13 11:19:45.930743841 +1000
> @@ -18,7 +18,7 @@ obj-$(CONFIG_HAS_IOMEM)	+= devres.o
>  
>  obj-$(CONFIG_PPC64)	+= copypage_64.o copyuser_64.o \
>  			   memcpy_64.o usercopy_64.o mem_64.o string.o \
> -			   checksum_wrappers_64.o
> +			   checksum_wrappers_64.o hweight_64.o
>  obj-$(CONFIG_XMON)	+= sstep.o ldstfp.o
>  obj-$(CONFIG_KPROBES)	+= sstep.o ldstfp.o
>  obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= sstep.o ldstfp.o
> Index: powerpc.git/arch/powerpc/lib/hweight_64.S
> ===================================================================
> --- /dev/null	1970-01-01 00:00:00.000000000 +0000
> +++ powerpc.git/arch/powerpc/lib/hweight_64.S	2010-08-13 11:19:45.940741462 +1000
> @@ -0,0 +1,110 @@
> +/*
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
> + *
> + * Copyright (C) IBM Corporation, 2010
> + *
> + * Author: Anton Blanchard <anton at au.ibm.com>
> + */
> +#include <asm/processor.h>
> +#include <asm/ppc_asm.h>
> +
> +/* Note: This code relies on -mminimal-toc */
> +
> +_GLOBAL(__arch_hweight8)
> +BEGIN_FTR_SECTION
> +	b .__sw_hweight8
> +	nop
> +	nop
> +FTR_SECTION_ELSE
> +	popcntb	r3,r3
> +	clrldi	r3,r3,64-8
> +	blr
> +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
> +
> +_GLOBAL(__arch_hweight16)
> +BEGIN_FTR_SECTION
> +	b .__sw_hweight16
> +	nop
> +	nop
> +	nop
> +	nop
> +FTR_SECTION_ELSE
> +  BEGIN_FTR_SECTION_NESTED(50)
> +	popcntb r3,r3
> +	srdi	r4,r3,8
> +	add	r3,r4,r3
> +	clrldi	r3,r3,64-8
> +	blr
> +  FTR_SECTION_ELSE_NESTED(50)
> +	clrlwi  r3,r3,16
> +	popcntw	r3,r3
> +	clrldi	r3,r3,64-8
> +	blr
> +  ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 50)
> +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
> +
> +_GLOBAL(__arch_hweight32)
> +BEGIN_FTR_SECTION
> +	b .__sw_hweight32
> +	nop
> +	nop
> +	nop
> +	nop
> +	nop
> +	nop
> +FTR_SECTION_ELSE
> +  BEGIN_FTR_SECTION_NESTED(51)
> +	popcntb r3,r3
> +	srdi	r4,r3,16
> +	add	r3,r4,r3
> +	srdi	r4,r3,8
> +	add	r3,r4,r3
> +	clrldi	r3,r3,64-8
> +	blr
> +  FTR_SECTION_ELSE_NESTED(51)
> +	popcntw	r3,r3
> +	clrldi	r3,r3,64-8
> +	blr
> +  ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 51)
> +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
> +
> +_GLOBAL(__arch_hweight64)
> +BEGIN_FTR_SECTION
> +	b .__sw_hweight64
> +	nop
> +	nop
> +	nop
> +	nop
> +	nop
> +	nop
> +	nop
> +	nop
> +FTR_SECTION_ELSE
> +  BEGIN_FTR_SECTION_NESTED(52)
> +	popcntb r3,r3
> +	srdi	r4,r3,32
> +	add	r3,r4,r3
> +	srdi	r4,r3,16
> +	add	r3,r4,r3
> +	srdi	r4,r3,8
> +	add	r3,r4,r3
> +	clrldi	r3,r3,64-8
> +	blr
> +  FTR_SECTION_ELSE_NESTED(52)
> +	popcntd	r3,r3
> +	clrldi	r3,r3,64-8
> +	blr
> +  ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 52)
> +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
> Index: powerpc.git/arch/powerpc/include/asm/bitops.h
> ===================================================================
> --- powerpc.git.orig/arch/powerpc/include/asm/bitops.h	2010-08-13 11:06:20.991992998 +1000
> +++ powerpc.git/arch/powerpc/include/asm/bitops.h	2010-08-13 11:19:45.940741462 +1000
> @@ -267,7 +267,16 @@ static __inline__ int fls64(__u64 x)
>  #include <asm-generic/bitops/fls64.h>
>  #endif /* __powerpc64__ */
>  
> +#ifdef CONFIG_PPC64
> +unsigned int __arch_hweight8(unsigned int w);
> +unsigned int __arch_hweight16(unsigned int w);
> +unsigned int __arch_hweight32(unsigned int w);
> +unsigned long __arch_hweight64(__u64 w);
> +#include <asm-generic/bitops/const_hweight.h>
> +#else
>  #include <asm-generic/bitops/hweight.h>
> +#endif
> +
>  #include <asm-generic/bitops/find.h>
>  
>  /* Little-endian versions */
> Index: powerpc.git/arch/powerpc/kernel/ppc_ksyms.c
> ===================================================================
> --- powerpc.git.orig/arch/powerpc/kernel/ppc_ksyms.c	2010-08-13 11:06:21.011991745 +1000
> +++ powerpc.git/arch/powerpc/kernel/ppc_ksyms.c	2010-08-13 11:19:45.940741462 +1000
> @@ -186,3 +186,10 @@ EXPORT_SYMBOL(__mtdcr);
>  EXPORT_SYMBOL(__mfdcr);
>  #endif
>  EXPORT_SYMBOL(empty_zero_page);
> +
> +#ifdef CONFIG_PPC64
> +EXPORT_SYMBOL(__arch_hweight8);
> +EXPORT_SYMBOL(__arch_hweight16);
> +EXPORT_SYMBOL(__arch_hweight32);
> +EXPORT_SYMBOL(__arch_hweight64);
> +#endif




More information about the Linuxppc-dev mailing list