[PATCH 03/17] powerpc/qspinlock: use a half-word store to unlock to avoid larx/stcx.
Jordan Niethe
jniethe5 at gmail.com
Wed Aug 10 13:28:02 AEST 2022
On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote:
> The first 16 bits of the lock are only modified by the owner, and other
> modifications always use atomic operations on the entire 32 bits, so
> unlocks can use plain stores on the 16 bits. This is the same kind of
> optimisation done by core qspinlock code.
> ---
> arch/powerpc/include/asm/qspinlock.h | 6 +-----
> arch/powerpc/include/asm/qspinlock_types.h | 19 +++++++++++++++++--
> 2 files changed, 18 insertions(+), 7 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
> index f06117aa60e1..79a1936fb68d 100644
> --- a/arch/powerpc/include/asm/qspinlock.h
> +++ b/arch/powerpc/include/asm/qspinlock.h
> @@ -38,11 +38,7 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
>
> static inline void queued_spin_unlock(struct qspinlock *lock)
> {
> - for (;;) {
> - int val = atomic_read(&lock->val);
> - if (atomic_cmpxchg_release(&lock->val, val, val & ~_Q_LOCKED_VAL) == val)
> - return;
> - }
> + smp_store_release(&lock->locked, 0);
Is it also possible for lock_set_locked() to use a non-atomic acquire
operation?
> }
>
> #define arch_spin_is_locked(l) queued_spin_is_locked(l)
> diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h
> index 9630e714c70d..3425dab42576 100644
> --- a/arch/powerpc/include/asm/qspinlock_types.h
> +++ b/arch/powerpc/include/asm/qspinlock_types.h
> @@ -3,12 +3,27 @@
> #define _ASM_POWERPC_QSPINLOCK_TYPES_H
>
> #include <linux/types.h>
> +#include <asm/byteorder.h>
>
> typedef struct qspinlock {
> - atomic_t val;
> + union {
> + atomic_t val;
> +
> +#ifdef __LITTLE_ENDIAN
> + struct {
> + u16 locked;
> + u8 reserved[2];
> + };
> +#else
> + struct {
> + u8 reserved[2];
> + u16 locked;
> + };
> +#endif
> + };
> } arch_spinlock_t;
Just to double check we have:
#define _Q_LOCKED_OFFSET 0
#define _Q_LOCKED_BITS 1
#define _Q_LOCKED_MASK 0x00000001
#define _Q_LOCKED_VAL 1
#define _Q_TAIL_CPU_OFFSET 16
#define _Q_TAIL_CPU_BITS 16
#define _Q_TAIL_CPU_MASK 0xffff0000
so the ordering here looks correct.
>
> -#define __ARCH_SPIN_LOCK_UNLOCKED { .val = ATOMIC_INIT(0) }
> +#define __ARCH_SPIN_LOCK_UNLOCKED { { .val = ATOMIC_INIT(0) } }
>
> /*
> * Bitfields in the atomic value:
More information about the Linuxppc-dev
mailing list