[PATCH V4] powerpc: Implement {cmp}xchg for u8 and u16

Boqun Feng boqun.feng at gmail.com
Thu Apr 28 00:50:34 AEST 2016


On Wed, Apr 27, 2016 at 09:58:17PM +0800, Boqun Feng wrote:
> On Wed, Apr 27, 2016 at 05:16:45PM +0800, Pan Xinhui wrote:
> > From: Pan Xinhui <xinhui.pan at linux.vnet.ibm.com>
> > 
> > Implement xchg{u8,u16}{local,relaxed}, and
> > cmpxchg{u8,u16}{,local,acquire,relaxed}.
> > 
> > It works on all ppc.
> > 
> > remove volatile of first parameter in __cmpxchg_local and __cmpxchg
> > 
> > Suggested-by: Peter Zijlstra (Intel) <peterz at infradead.org>
> > Signed-off-by: Pan Xinhui <xinhui.pan at linux.vnet.ibm.com>
> > ---
> > change from v3:
> > 	rewrite in asm for the LL/SC.
> > 	remove volatile in __cmpxchg_local and __cmpxchg.
> > change from v2:
> > 	in the do{}while(), we save one load and use corresponding cmpxchg suffix.
> > 	Also add corresponding __cmpxchg_u32 function declaration in the __XCHG_GEN 
> > change from V1:
> > 	rework totally.
> > ---
> >  arch/powerpc/include/asm/cmpxchg.h | 109 ++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 106 insertions(+), 3 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
> > index 44efe73..8a3735f 100644
> > --- a/arch/powerpc/include/asm/cmpxchg.h
> > +++ b/arch/powerpc/include/asm/cmpxchg.h
> > @@ -7,6 +7,71 @@
> >  #include <asm/asm-compat.h>
> >  #include <linux/bug.h>
> >  
> > +#ifdef __BIG_ENDIAN
> > +#define BITOFF_CAL(size, off)	((sizeof(u32) - size - off) * BITS_PER_BYTE)
> > +#else
> > +#define BITOFF_CAL(size, off)	(off * BITS_PER_BYTE)
> > +#endif
> > +
> > +#define XCHG_GEN(type, sfx, cl)				\
> > +static inline u32 __xchg_##type##sfx(void *p, u32 val)		\
> > +{								\
> > +	unsigned int prev, prev_mask, tmp, bitoff, off;		\
> > +								\
> > +	off = (unsigned long)p % sizeof(u32);			\
> > +	bitoff = BITOFF_CAL(sizeof(type), off);			\
> > +	p -= off;						\
> > +	val <<= bitoff;						\
> > +	prev_mask = (u32)(type)-1 << bitoff;			\
> > +								\
> > +	__asm__ __volatile__(					\
> > +"1:	lwarx   %0,0,%3\n"					\
> > +"	andc	%1,%0,%5\n"					\
> > +"	or	%1,%1,%4\n"					\
> > +	PPC405_ERR77(0,%3)					\
> > +"	stwcx.	%1,0,%3\n"					\
> > +"	bne-	1b\n"						\
> > +	: "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p)		\
> 
> I think we can save the "tmp" here by:
> 
> 	__asm__ volatile__(
> "1:	lwarx	%0,0,%2\n"
> "	andc	%0,%0,%4\n"
> "	or	%0,%0,%3\n"
> 	PPC405_ERR77(0,%2)
> "	stwcx.	%0,0,%2\n"
> "	bne-	1b\n"
> 	: "=&r" (prev), "+m" (*(u32*)p)
> 	: "r" (p), "r" (val), "r" (prev_mask)
> 	: "cc", cl);
> 
> right?
> 
> > +	: "r" (p), "r" (val), "r" (prev_mask)			\
> > +	: "cc", cl);						\
> > +								\
> > +	return prev >> bitoff;					\
> > +}
> > +
> > +#define CMPXCHG_GEN(type, sfx, br, br2, cl)			\
> > +static inline							\
> > +u32 __cmpxchg_##type##sfx(void *p, u32 old, u32 new)		\
> > +{								\
> > +	unsigned int prev, prev_mask, tmp, bitoff, off;		\
> > +								\
> > +	off = (unsigned long)p % sizeof(u32);			\
> > +	bitoff = BITOFF_CAL(sizeof(type), off);			\
> > +	p -= off;						\
> > +	old <<= bitoff;						\
> > +	new <<= bitoff;						\
> > +	prev_mask = (u32)(type)-1 << bitoff;			\
> > +								\
> > +	__asm__ __volatile__(					\
> > +	br							\
> > +"1:	lwarx   %0,0,%3\n"					\
> > +"	and	%1,%0,%6\n"					\
> > +"	cmpw	0,%1,%4\n"					\
> > +"	bne-	2f\n"						\
> > +"	andc	%1,%0,%6\n"					\
> > +"	or	%1,%1,%5\n"					\
> > +	PPC405_ERR77(0,%3)					\
> > +"	stwcx.  %1,0,%3\n"					\
> > +"	bne-    1b\n"						\
> > +	br2							\
> > +	"\n"							\
> > +"2:"								\
> > +	: "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p)		\
> 
> And "tmp" here could also be saved by:
> 
> "1:	lwarx   %0,0,%2\n"					\
> "	xor	%3,%0,%3\n"					\
> "	and.	%3,%3,%5\n"					\
> "	bne-	2f\n"						\
> "	andc	%0,%0,%5\n"					\
> "	or	%0,%0,%4\n"					\
> 	PPC405_ERR77(0,%2)					\
> "	stwcx.  %0,0,%2\n"					\
> "	bne-    1b\n"						\
> 	br2							\
> 	"\n"							\
> "2:"								\
> 	: "=&r" (prev), "+m" (*(u32*)p)		\
> 	: "r" (p), "r" (old), "r" (new), "r" (prev_mask)	\
> 	: "cc", cl);						\
> 
> right?
> 

Sorry, my bad, we can't implement cmpxchg like this.. please ignore
this, I should really go to bed soon...

But still, we can save the "tmp" for xchg() I think.

Regards,
Boqun

> IIUC, saving the local variable "tmp" will result in saving a general
> register for the compilers to use for other variables.
> 
> So thoughts?
> 
> Regards,
> Boqun
> 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 473 bytes
Desc: not available
URL: <http://lists.ozlabs.org/pipermail/linuxppc-dev/attachments/20160427/f022d6af/attachment.sig>


More information about the Linuxppc-dev mailing list