[PATCH] RFC: powerpc: expose the multi-bit ops that underlie single-bit ops.

Benjamin Herrenschmidt benh at kernel.crashing.org
Tue Jun 16 13:53:38 EST 2009


On Tue, 2009-05-26 at 14:19 -0400, Geoff Thorpe wrote:
> NOT FOR COMMIT, THIS IS A REQUEST FOR FEEDBACK.
> 
> The bitops.h functions that operate on a single bit in a bitfield are
> implemented by operating on the corresponding word location. In all cases
> the inner logic appears to be valid if the mask being applied has more
> than one bit set, so this patch exposes those inner operations. Indeed,
> set_bits() was already available, but it duplicated code from set_bit()
> (rather than making the latter a wrapper) - it was also missing the
> PPC405_ERR77() workaround and the "volatile" address qualifier present in
> other APIs. This corrects that, and exposes the other multi-bit
> equivalents.
> 
> One advantage of these multi-bit forms is that they allow word-sized
> variables to essentially be their own spinlocks.

Hi ! Sorry for the delay, that was on my "have a look one of these days,
low priority" list for a bit too long :-)

> NB, the same factoring is possible in asm-generic/bitops/[non-]atomic.h.
> I would be happy to provide corresponding patches if this approach is
> deemed appropriate. Feedback would be most welcome.
> 
> Signed-off-by: Geoff Thorpe <Geoff.Thorpe at freescale.com>
> ---
>  arch/powerpc/include/asm/bitops.h |  111 +++++++++++++++++++++++--------------
>  1 files changed, 69 insertions(+), 42 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
> index 897eade..72de28c 100644
> --- a/arch/powerpc/include/asm/bitops.h
> +++ b/arch/powerpc/include/asm/bitops.h
> @@ -56,11 +56,10 @@
>  #define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
>  #define BITOP_LE_SWIZZLE	((BITS_PER_LONG-1) & ~0x7)
>  
> -static __inline__ void set_bit(int nr, volatile unsigned long *addr)
> +static __inline__ void set_bits(unsigned long mask, volatile unsigned long *_p)
>  {
>  	unsigned long old;
> -	unsigned long mask = BITOP_MASK(nr);
> -	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
> +	unsigned long *p = (unsigned long *)_p;
>  
>  	__asm__ __volatile__(
>  "1:"	PPC_LLARX "%0,0,%3	# set_bit\n"
> @@ -73,11 +72,16 @@ static __inline__ void set_bit(int nr, volatile unsigned long *addr)
>  	: "cc" );
>  }
>  
> -static __inline__ void clear_bit(int nr, volatile unsigned long *addr)
> +static __inline__ void set_bit(int nr, volatile unsigned long *addr)
> +{
> +	set_bits(BITOP_MASK(nr), addr + BITOP_WORD(nr));
> +}

No objection with the above.

> +static __inline__ void clear_bits(unsigned long mask,
> +				volatile unsigned long *_p)
>  {
>  	unsigned long old;
> -	unsigned long mask = BITOP_MASK(nr);
> -	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
> +	unsigned long *p = (unsigned long *)_p;
>  
>  	__asm__ __volatile__(
>  "1:"	PPC_LLARX "%0,0,%3	# clear_bit\n"
> @@ -90,11 +94,16 @@ static __inline__ void clear_bit(int nr, volatile unsigned long *addr)
>  	: "cc" );
>  }
>  
> -static __inline__ void clear_bit_unlock(int nr, volatile unsigned long *addr)
> +static __inline__ void clear_bit(int nr, volatile unsigned long *addr)
> +{
> +	clear_bits(BITOP_MASK(nr), addr + BITOP_WORD(nr));
> +}

Looks good too.

> +static __inline__ void clear_bits_unlock(unsigned long mask,
> +					volatile unsigned long *_p)
>  {
>  	unsigned long old;
> -	unsigned long mask = BITOP_MASK(nr);
> -	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
> +	unsigned long *p = (unsigned long *)_p;
>  
>  	__asm__ __volatile__(
>  	LWSYNC_ON_SMP
> @@ -108,11 +117,16 @@ static __inline__ void clear_bit_unlock(int nr, volatile unsigned long *addr)
>  	: "cc", "memory");
>  }
>  
> -static __inline__ void change_bit(int nr, volatile unsigned long *addr)
> +static __inline__ void clear_bit_unlock(int nr, volatile unsigned long *addr)
> +{
> +	clear_bits_unlock(BITOP_MASK(nr), addr + BITOP_WORD(nr));
> +}

I'm not sure it's useful to provide a multi-bit variant of the
"lock" and "unlock" primitives. Do other archs do ?

> +static __inline__ void change_bits(unsigned long mask,
> +				volatile unsigned long *_p)
>  {
>  	unsigned long old;
> -	unsigned long mask = BITOP_MASK(nr);
> -	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
> +	unsigned long *p = (unsigned long *)_p;
>  
>  	__asm__ __volatile__(
>  "1:"	PPC_LLARX "%0,0,%3	# change_bit\n"
> @@ -125,12 +139,16 @@ static __inline__ void change_bit(int nr, volatile unsigned long *addr)
>  	: "cc" );
>  }
>  
> -static __inline__ int test_and_set_bit(unsigned long nr,
> -				       volatile unsigned long *addr)
> +static __inline__ void change_bit(int nr, volatile unsigned long *addr)
> +{
> +	change_bits(BITOP_MASK(nr), addr + BITOP_WORD(nr));
> +}

Ah, patch is getting confused between change_bit and
test_and_set_bit :-)

Now, you know what I'm thinking is ... Those are all the same except
for:

 - Barriers for lock and unlock variants
 - Barriers for "return" (aka test_and_set) variants
 - Actual op done on the mask

Maybe we can shrink that file significantly (and avoid the risk for
typos etc...) by generating them all from a macro.

Something like (typed directly into the mailer :-)

#define DEFINE_BITOP(op, prefix, postfix) \
	asm volatile (			  \
	prefix				  \
"1:"    PPC_LLARX "%0,0,%3\n"		  \
	__stringify(op) "%1,%0,%2\n"	  \
	PPC405_ERR77(0,%3)		  \
	PPC_STLCX "%1,0,%3\n"		  \
	"bne- 1b\n"			  \
	postfix				  \
	 : "=&r" (old), "=&r" (t)
	 : "r" (mask), "r" (p)
	 : "cc", "memory")

and so:

static inline void set_bits(unsigned long mask, volatile unsigned long *addr)
{
	unsigned long old, t;

	DEFINE_BITOP(or, "", "");
}

static inline void test_and_set_bits(unsigned long mask, volatile unsigned long *addr)
{
	unsigned long old, t;

	DEFINE_BITOP(or, LWSYNC_ON_SMP, ISYNC_ON_SMP);

	return (old & mask) != 0;
}

etc...

Cheers,
Ben.




More information about the Linuxppc-dev mailing list