[PATCH 03/17] powerpc/qspinlock: use a half-word store to unlock to avoid larx/stcx.

Jordan Niethe jniethe5 at gmail.com
Wed Aug 10 13:28:02 AEST 2022


On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote:
> The first 16 bits of the lock are only modified by the owner, and other
> modifications always use atomic operations on the entire 32 bits, so
> unlocks can use plain stores on the 16 bits. This is the same kind of
> optimisation done by core qspinlock code.
> ---
>  arch/powerpc/include/asm/qspinlock.h       |  6 +-----
>  arch/powerpc/include/asm/qspinlock_types.h | 19 +++++++++++++++++--
>  2 files changed, 18 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
> index f06117aa60e1..79a1936fb68d 100644
> --- a/arch/powerpc/include/asm/qspinlock.h
> +++ b/arch/powerpc/include/asm/qspinlock.h
> @@ -38,11 +38,7 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
>  
>  static inline void queued_spin_unlock(struct qspinlock *lock)
>  {
> -	for (;;) {
> -		int val = atomic_read(&lock->val);
> -		if (atomic_cmpxchg_release(&lock->val, val, val & ~_Q_LOCKED_VAL) == val)
> -			return;
> -	}
> +	smp_store_release(&lock->locked, 0);

Is it also possible for lock_set_locked() to use a non-atomic acquire
operation?

>  }
>  
>  #define arch_spin_is_locked(l)		queued_spin_is_locked(l)
> diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h
> index 9630e714c70d..3425dab42576 100644
> --- a/arch/powerpc/include/asm/qspinlock_types.h
> +++ b/arch/powerpc/include/asm/qspinlock_types.h
> @@ -3,12 +3,27 @@
>  #define _ASM_POWERPC_QSPINLOCK_TYPES_H
>  
>  #include <linux/types.h>
> +#include <asm/byteorder.h>
>  
>  typedef struct qspinlock {
> -	atomic_t val;
> +	union {
> +		atomic_t val;
> +
> +#ifdef __LITTLE_ENDIAN
> +		struct {
> +			u16	locked;
> +			u8	reserved[2];
> +		};
> +#else
> +		struct {
> +			u8	reserved[2];
> +			u16	locked;
> +		};
> +#endif
> +	};
>  } arch_spinlock_t;

Just to double check we have:

#define _Q_LOCKED_OFFSET	0
#define _Q_LOCKED_BITS		1
#define _Q_LOCKED_MASK		0x00000001
#define _Q_LOCKED_VAL		1

#define _Q_TAIL_CPU_OFFSET	16
#define _Q_TAIL_CPU_BITS	16
#define _Q_TAIL_CPU_MASK	0xffff0000


so the ordering here looks correct.

>  
> -#define	__ARCH_SPIN_LOCK_UNLOCKED	{ .val = ATOMIC_INIT(0) }
> +#define	__ARCH_SPIN_LOCK_UNLOCKED	{ { .val = ATOMIC_INIT(0) } }
>  
>  /*
>   * Bitfields in the atomic value:



More information about the Linuxppc-dev mailing list