[PATCH] powerpc: Fix atomic_xxx_return barrier semantics
Paul E. McKenney
paulmck at linux.vnet.ibm.com
Thu Nov 17 00:45:27 EST 2011
On Wed, Nov 16, 2011 at 02:11:27PM +1100, Benjamin Herrenschmidt wrote:
> The Documentation/memory-barriers.txt document requires that atomic
> operations that return a value act as a memory barrier both before
> and after the actual atomic operation.
>
> Our current implementation doesn't guarantee this. More specifically,
> while a load following the isync can not be issued before stwcx. has
> completed, that completion doesn't architecturally means that the
> result of stwcx. is visible to other processors (or any previous stores
> for that matter) (typically, the other processors L1 caches can still
> hold the old value).
>
> This has caused an actual crash in RCU torture testing on Power 7
>
> This fixes it by changing those atomic ops to use new macros instead
> of RELEASE/ACQUIRE barriers, called ATOMIC_ENTRY and ATMOIC_EXIT barriers,
> which are then defined respectively to lwsync and sync.
>
> I haven't had a chance to measure the performance impact (or rather
> what I measured with kernel compiles is in the noise, I yet have to
> find a more precise benchmark)
>
> Signed-off-by: Benjamin Herrenschmidt <benh at kernel.crashing.org>
Acked-by: Paul E. McKenney <paulmck at linux.vnet.ibm.com>
> ---
>
> diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
> index e2a4c26..02e41b5 100644
> --- a/arch/powerpc/include/asm/atomic.h
> +++ b/arch/powerpc/include/asm/atomic.h
> @@ -49,13 +49,13 @@ static __inline__ int atomic_add_return(int a, atomic_t *v)
> int t;
>
> __asm__ __volatile__(
> - PPC_RELEASE_BARRIER
> + PPC_ATOMIC_ENTRY_BARRIER
> "1: lwarx %0,0,%2 # atomic_add_return\n\
> add %0,%1,%0\n"
> PPC405_ERR77(0,%2)
> " stwcx. %0,0,%2 \n\
> bne- 1b"
> - PPC_ACQUIRE_BARRIER
> + PPC_ATOMIC_EXIT_BARRIER
> : "=&r" (t)
> : "r" (a), "r" (&v->counter)
> : "cc", "memory");
> @@ -85,13 +85,13 @@ static __inline__ int atomic_sub_return(int a, atomic_t *v)
> int t;
>
> __asm__ __volatile__(
> - PPC_RELEASE_BARRIER
> + PPC_ATOMIC_ENTRY_BARRIER
> "1: lwarx %0,0,%2 # atomic_sub_return\n\
> subf %0,%1,%0\n"
> PPC405_ERR77(0,%2)
> " stwcx. %0,0,%2 \n\
> bne- 1b"
> - PPC_ACQUIRE_BARRIER
> + PPC_ATOMIC_EXIT_BARRIER
> : "=&r" (t)
> : "r" (a), "r" (&v->counter)
> : "cc", "memory");
> @@ -119,13 +119,13 @@ static __inline__ int atomic_inc_return(atomic_t *v)
> int t;
>
> __asm__ __volatile__(
> - PPC_RELEASE_BARRIER
> + PPC_ATOMIC_ENTRY_BARRIER
> "1: lwarx %0,0,%1 # atomic_inc_return\n\
> addic %0,%0,1\n"
> PPC405_ERR77(0,%1)
> " stwcx. %0,0,%1 \n\
> bne- 1b"
> - PPC_ACQUIRE_BARRIER
> + PPC_ATOMIC_EXIT_BARRIER
> : "=&r" (t)
> : "r" (&v->counter)
> : "cc", "xer", "memory");
> @@ -163,13 +163,13 @@ static __inline__ int atomic_dec_return(atomic_t *v)
> int t;
>
> __asm__ __volatile__(
> - PPC_RELEASE_BARRIER
> + PPC_ATOMIC_ENTRY_BARRIER
> "1: lwarx %0,0,%1 # atomic_dec_return\n\
> addic %0,%0,-1\n"
> PPC405_ERR77(0,%1)
> " stwcx. %0,0,%1\n\
> bne- 1b"
> - PPC_ACQUIRE_BARRIER
> + PPC_ATOMIC_EXIT_BARRIER
> : "=&r" (t)
> : "r" (&v->counter)
> : "cc", "xer", "memory");
> @@ -194,7 +194,7 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
> int t;
>
> __asm__ __volatile__ (
> - PPC_RELEASE_BARRIER
> + PPC_ATOMIC_ENTRY_BARRIER
> "1: lwarx %0,0,%1 # __atomic_add_unless\n\
> cmpw 0,%0,%3 \n\
> beq- 2f \n\
> @@ -202,7 +202,7 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
> PPC405_ERR77(0,%2)
> " stwcx. %0,0,%1 \n\
> bne- 1b \n"
> - PPC_ACQUIRE_BARRIER
> + PPC_ATOMIC_EXIT_BARRIER
> " subf %0,%2,%0 \n\
> 2:"
> : "=&r" (t)
> @@ -226,7 +226,7 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
> int t;
>
> __asm__ __volatile__(
> - PPC_RELEASE_BARRIER
> + PPC_ATOMIC_ENTRY_BARRIER
> "1: lwarx %0,0,%1 # atomic_dec_if_positive\n\
> cmpwi %0,1\n\
> addi %0,%0,-1\n\
> @@ -234,7 +234,7 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
> PPC405_ERR77(0,%1)
> " stwcx. %0,0,%1\n\
> bne- 1b"
> - PPC_ACQUIRE_BARRIER
> + PPC_ATOMIC_EXIT_BARRIER
> "\n\
> 2:" : "=&b" (t)
> : "r" (&v->counter)
> @@ -285,12 +285,12 @@ static __inline__ long atomic64_add_return(long a, atomic64_t *v)
> long t;
>
> __asm__ __volatile__(
> - PPC_RELEASE_BARRIER
> + PPC_ATOMIC_ENTRY_BARRIER
> "1: ldarx %0,0,%2 # atomic64_add_return\n\
> add %0,%1,%0\n\
> stdcx. %0,0,%2 \n\
> bne- 1b"
> - PPC_ACQUIRE_BARRIER
> + PPC_ATOMIC_EXIT_BARRIER
> : "=&r" (t)
> : "r" (a), "r" (&v->counter)
> : "cc", "memory");
> @@ -319,12 +319,12 @@ static __inline__ long atomic64_sub_return(long a, atomic64_t *v)
> long t;
>
> __asm__ __volatile__(
> - PPC_RELEASE_BARRIER
> + PPC_ATOMIC_ENTRY_BARRIER
> "1: ldarx %0,0,%2 # atomic64_sub_return\n\
> subf %0,%1,%0\n\
> stdcx. %0,0,%2 \n\
> bne- 1b"
> - PPC_ACQUIRE_BARRIER
> + PPC_ATOMIC_EXIT_BARRIER
> : "=&r" (t)
> : "r" (a), "r" (&v->counter)
> : "cc", "memory");
> @@ -351,12 +351,12 @@ static __inline__ long atomic64_inc_return(atomic64_t *v)
> long t;
>
> __asm__ __volatile__(
> - PPC_RELEASE_BARRIER
> + PPC_ATOMIC_ENTRY_BARRIER
> "1: ldarx %0,0,%1 # atomic64_inc_return\n\
> addic %0,%0,1\n\
> stdcx. %0,0,%1 \n\
> bne- 1b"
> - PPC_ACQUIRE_BARRIER
> + PPC_ATOMIC_EXIT_BARRIER
> : "=&r" (t)
> : "r" (&v->counter)
> : "cc", "xer", "memory");
> @@ -393,12 +393,12 @@ static __inline__ long atomic64_dec_return(atomic64_t *v)
> long t;
>
> __asm__ __volatile__(
> - PPC_RELEASE_BARRIER
> + PPC_ATOMIC_ENTRY_BARRIER
> "1: ldarx %0,0,%1 # atomic64_dec_return\n\
> addic %0,%0,-1\n\
> stdcx. %0,0,%1\n\
> bne- 1b"
> - PPC_ACQUIRE_BARRIER
> + PPC_ATOMIC_EXIT_BARRIER
> : "=&r" (t)
> : "r" (&v->counter)
> : "cc", "xer", "memory");
> @@ -418,13 +418,13 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
> long t;
>
> __asm__ __volatile__(
> - PPC_RELEASE_BARRIER
> + PPC_ATOMIC_ENTRY_BARRIER
> "1: ldarx %0,0,%1 # atomic64_dec_if_positive\n\
> addic. %0,%0,-1\n\
> blt- 2f\n\
> stdcx. %0,0,%1\n\
> bne- 1b"
> - PPC_ACQUIRE_BARRIER
> + PPC_ATOMIC_EXIT_BARRIER
> "\n\
> 2:" : "=&r" (t)
> : "r" (&v->counter)
> @@ -450,14 +450,14 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
> long t;
>
> __asm__ __volatile__ (
> - PPC_RELEASE_BARRIER
> + PPC_ATOMIC_ENTRY_BARRIER
> "1: ldarx %0,0,%1 # __atomic_add_unless\n\
> cmpd 0,%0,%3 \n\
> beq- 2f \n\
> add %0,%2,%0 \n"
> " stdcx. %0,0,%1 \n\
> bne- 1b \n"
> - PPC_ACQUIRE_BARRIER
> + PPC_ATOMIC_EXIT_BARRIER
> " subf %0,%2,%0 \n\
> 2:"
> : "=&r" (t)
> diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
> index e137afc..efdc926 100644
> --- a/arch/powerpc/include/asm/bitops.h
> +++ b/arch/powerpc/include/asm/bitops.h
> @@ -124,14 +124,14 @@ static __inline__ unsigned long fn( \
> return (old & mask); \
> }
>
> -DEFINE_TESTOP(test_and_set_bits, or, PPC_RELEASE_BARRIER,
> - PPC_ACQUIRE_BARRIER, 0)
> +DEFINE_TESTOP(test_and_set_bits, or, PPC_ATOMIC_ENTRY_BARRIER,
> + PPC_ATOMIC_EXIT_BARRIER, 0)
> DEFINE_TESTOP(test_and_set_bits_lock, or, "",
> PPC_ACQUIRE_BARRIER, 1)
> -DEFINE_TESTOP(test_and_clear_bits, andc, PPC_RELEASE_BARRIER,
> - PPC_ACQUIRE_BARRIER, 0)
> -DEFINE_TESTOP(test_and_change_bits, xor, PPC_RELEASE_BARRIER,
> - PPC_ACQUIRE_BARRIER, 0)
> +DEFINE_TESTOP(test_and_clear_bits, andc, PPC_ATOMIC_ENTRY_BARRIER,
> + PPC_ATOMIC_EXIT_BARRIER, 0)
> +DEFINE_TESTOP(test_and_change_bits, xor, PPC_ATOMIC_ENTRY_BARRIER,
> + PPC_ATOMIC_EXIT_BARRIER, 0)
>
> static __inline__ int test_and_set_bit(unsigned long nr,
> volatile unsigned long *addr)
> diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h
> index c94e4a3..2a9cf84 100644
> --- a/arch/powerpc/include/asm/futex.h
> +++ b/arch/powerpc/include/asm/futex.h
> @@ -11,12 +11,13 @@
>
> #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
> __asm__ __volatile ( \
> - PPC_RELEASE_BARRIER \
> + PPC_ATOMIC_ENTRY_BARRIER \
> "1: lwarx %0,0,%2\n" \
> insn \
> PPC405_ERR77(0, %2) \
> "2: stwcx. %1,0,%2\n" \
> "bne- 1b\n" \
> + PPC_ATOMIC_EXIT_BARRIER \
> "li %1,0\n" \
> "3: .section .fixup,\"ax\"\n" \
> "4: li %1,%3\n" \
> @@ -92,14 +93,14 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
> return -EFAULT;
>
> __asm__ __volatile__ (
> - PPC_RELEASE_BARRIER
> + PPC_ATOMIC_ENTRY_BARRIER
> "1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\
> cmpw 0,%1,%4\n\
> bne- 3f\n"
> PPC405_ERR77(0,%3)
> "2: stwcx. %5,0,%3\n\
> bne- 1b\n"
> - PPC_ACQUIRE_BARRIER
> + PPC_ATOMIC_EXIT_BARRIER
> "3: .section .fixup,\"ax\"\n\
> 4: li %0,%6\n\
> b 3b\n\
> diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h
> index d7cab44..24fc618 100644
> --- a/arch/powerpc/include/asm/synch.h
> +++ b/arch/powerpc/include/asm/synch.h
> @@ -41,11 +41,15 @@ static inline void isync(void)
> START_LWSYNC_SECTION(97); \
> isync; \
> MAKE_LWSYNC_SECTION_ENTRY(97, __lwsync_fixup);
> -#define PPC_ACQUIRE_BARRIER "\n" stringify_in_c(__PPC_ACQUIRE_BARRIER)
> -#define PPC_RELEASE_BARRIER stringify_in_c(LWSYNC) "\n"
> +#define PPC_ACQUIRE_BARRIER "\n" stringify_in_c(__PPC_ACQUIRE_BARRIER)
> +#define PPC_RELEASE_BARRIER stringify_in_c(LWSYNC) "\n"
> +#define PPC_ATOMIC_ENTRY_BARRIER "\n" stringify_in_c(LWSYNC) "\n"
> +#define PPC_ATOMIC_EXIT_BARRIER "\n" stringify_in_c(sync) "\n"
> #else
> #define PPC_ACQUIRE_BARRIER
> #define PPC_RELEASE_BARRIER
> +#define PPC_ATOMIC_ENTRY_BARRIER
> +#define PPC_ATOMIC_EXIT_BARRIER
> #endif
>
> #endif /* __KERNEL__ */
>
>
More information about the Linuxppc-dev
mailing list