[RFC PATCH 2/4] powerpc/qspinlock: Avoid cmpxchg style patterns in queue head locking

Nicholas Piggin npiggin at gmail.com
Tue Nov 15 03:11:17 AEDT 2022


Locking by the MCS queue head must clear the tail CPU if there are
no more queue entries left, and it has to deal with concurrent lock
stealing. Implementing these with cmpxchg style updates leaves the
possibility for unnecessary failure when the lock word changes.

Implement this instead within one larx/stcx. critical section that
tests the value and takes the appropriate action: bail out if
it was locked, otherwise lock and clear the tail if we are the tail,
else lock and leave the tail.

With this primitive, there is no longer a significant reason to keep
the large !maybe_stealers special case, so remove it.

Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
---
 arch/powerpc/lib/qspinlock.c | 92 ++++++++++++------------------------
 1 file changed, 29 insertions(+), 63 deletions(-)

diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index ff718f27cbc9..79793b3209ea 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -104,62 +104,36 @@ static inline int get_owner_cpu(u32 val)
 	return (val & _Q_OWNER_CPU_MASK) >> _Q_OWNER_CPU_OFFSET;
 }
 
-/* Take the lock by setting the lock bit, no other CPUs will touch it. */
-static __always_inline void set_locked(struct qspinlock *lock)
+static __always_inline u32 trylock_clear_my_tail(struct qspinlock *lock, u32 mytail)
 {
-	u32 new = queued_spin_encode_locked_val();
+	u32 newval = queued_spin_encode_locked_val();
 	u32 prev, tmp;
 
 	asm volatile(
-"1:	lwarx	%0,0,%2,%4	# set_locked				\n"
-"	or	%1,%0,%3						\n"
-"	stwcx.	%1,0,%2							\n"
+"1:	lwarx	%0,0,%2,%7	# trylock_clear_my_tail			\n"
+	/* This test is necessary if there could be stealers */
+"	andi.	%1,%0,%5						\n"
+"	bne	3f							\n"
+	/* Test whether the lock tail == mytail */
+"	and	%1,%0,%6						\n"
+"	cmpw	0,%1,%3							\n"
+	/* Merge the new locked value */
+"	or	%1,%1,%4						\n"
+"	bne	2f							\n"
+	/* If the lock tail matched, then clear it, otherwise leave it. */
+"	andc	%1,%1,%6						\n"
+"2:	stwcx.	%1,0,%2							\n"
 "	bne-	1b							\n"
 "\t"	PPC_ACQUIRE_BARRIER "						\n"
+"3:									\n"
 	: "=&r" (prev), "=&r" (tmp)
-	: "r" (&lock->val), "r" (new),
+	: "r" (&lock->val), "r"(mytail), "r" (newval),
+	  "i" (_Q_LOCKED_VAL),
+	  "r" (_Q_TAIL_CPU_MASK),
 	  "i" (IS_ENABLED(CONFIG_PPC64))
 	: "cr0", "memory");
 
-	BUG_ON(prev & _Q_LOCKED_VAL);
-}
-
-static __always_inline u32 __trylock_cmpxchg(struct qspinlock *lock, u32 old, u32 new)
-{
-	u32 prev;
-
-	BUG_ON(old & _Q_LOCKED_VAL);
-
-	asm volatile(
-"1:	lwarx	%0,0,%1,%4	# __trylock_cmpxchg			\n"
-"	cmpw	0,%0,%2							\n"
-"	bne-	2f							\n"
-"	stwcx.	%3,0,%1							\n"
-"	bne-	1b							\n"
-"\t"	PPC_ACQUIRE_BARRIER "						\n"
-"2:									\n"
-	: "=&r" (prev)
-	: "r" (&lock->val), "r"(old), "r" (new),
-	  "i" (IS_ENABLED(CONFIG_PPC64))
-	: "cr0", "memory");
-
-	return likely(prev == old);
-}
-
-/* Take lock, clearing tail, cmpxchg with old (which must not be locked) */
-static __always_inline int trylock_clear_tail_cpu(struct qspinlock *lock, u32 val)
-{
-	u32 newval = queued_spin_encode_locked_val();
-
-	return __trylock_cmpxchg(lock, val, newval);
-}
-
-/* Take lock, preserving tail, cmpxchg with val (which must not be locked) */
-static __always_inline int trylock_with_tail_cpu(struct qspinlock *lock, u32 val)
-{
-	u32 newval = queued_spin_encode_locked_val() | (val & _Q_TAIL_CPU_MASK);
-
-	return __trylock_cmpxchg(lock, val, newval);
+	return prev;
 }
 
 /*
@@ -620,14 +594,11 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
 		spin_end();
 
 		/* If we're the last queued, must clean up the tail. */
-		if ((val & _Q_TAIL_CPU_MASK) == tail) {
-			if (trylock_clear_tail_cpu(lock, val))
-				goto release;
-			/* Another waiter must have enqueued. */
-		}
+		old = trylock_clear_my_tail(lock, tail);
+		BUG_ON(old & _Q_LOCKED_VAL);
+		if ((old & _Q_TAIL_CPU_MASK) == tail)
+			goto release;
 
-		/* We must be the owner, just set the lock bit and acquire */
-		set_locked(lock);
 	} else {
 		int set_yield_cpu = -1;
 		int iters = 0;
@@ -682,18 +653,13 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
 		spin_end();
 
 		/* If we're the last queued, must clean up the tail. */
-		if ((val & _Q_TAIL_CPU_MASK) == tail) {
-			if (trylock_clear_tail_cpu(lock, val))
-				goto release;
-			/* Another waiter must have enqueued, or lock stolen. */
-		} else {
-			if (trylock_with_tail_cpu(lock, val))
-				goto unlock_next;
-		}
-		goto again;
+		old = trylock_clear_my_tail(lock, tail);
+		if (unlikely(old & _Q_LOCKED_VAL))
+			goto again;
+		if ((old & _Q_TAIL_CPU_MASK) == tail)
+			goto release;
 	}
 
-unlock_next:
 	/* contended path; must wait for next != NULL (MCS protocol) */
 	next = READ_ONCE(node->next);
 	if (!next) {
-- 
2.37.2



More information about the Linuxppc-dev mailing list