[PATCH v2 13/17] powerpc/qspinlock: trylock and initial lock attempt may steal

Nicholas Piggin npiggin at gmail.com
Mon Nov 14 13:31:33 AEDT 2022


This gives trylock slightly more strength, and it also gives most
of the benefit of passing 'val' back through the slowpath without
the complexity.

Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
---
 arch/powerpc/include/asm/qspinlock.h | 44 +++++++++++++++++++++++++++-
 arch/powerpc/lib/qspinlock.c         |  9 ++++++
 2 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
index 3eff2d875bb6..56638175e49b 100644
--- a/arch/powerpc/include/asm/qspinlock.h
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -5,6 +5,15 @@
 #include <linux/compiler.h>
 #include <asm/qspinlock_types.h>
 
+/*
+ * The trylock itself may steal. This makes trylocks slightly stronger, and
+ * might make spin locks slightly more efficient when stealing.
+ *
+ * This is compile-time, so if true then there may always be stealers, so the
+ * nosteal paths become unused.
+ */
+#define _Q_SPIN_TRY_LOCK_STEAL 1
+
 static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
 {
 	return READ_ONCE(lock->val);
@@ -26,11 +35,12 @@ static __always_inline u32 queued_spin_encode_locked_val(void)
 	return _Q_LOCKED_VAL | (smp_processor_id() << _Q_OWNER_CPU_OFFSET);
 }
 
-static __always_inline int queued_spin_trylock(struct qspinlock *lock)
+static __always_inline int __queued_spin_trylock_nosteal(struct qspinlock *lock)
 {
 	u32 new = queued_spin_encode_locked_val();
 	u32 prev;
 
+	/* Trylock succeeds only when unlocked and no queued nodes */
 	asm volatile(
 "1:	lwarx	%0,0,%1,%3	# queued_spin_trylock			\n"
 "	cmpwi	0,%0,0							\n"
@@ -47,6 +57,38 @@ static __always_inline int queued_spin_trylock(struct qspinlock *lock)
 	return likely(prev == 0);
 }
 
+static __always_inline int __queued_spin_trylock_steal(struct qspinlock *lock)
+{
+	u32 new = queued_spin_encode_locked_val();
+	u32 prev, tmp;
+
+	/* Trylock may get ahead of queued nodes if it finds unlocked */
+	asm volatile(
+"1:	lwarx	%0,0,%2,%5	# queued_spin_trylock			\n"
+"	andc.	%1,%0,%4						\n"
+"	bne-	2f							\n"
+"	and	%1,%0,%4						\n"
+"	or	%1,%1,%3						\n"
+"	stwcx.	%1,0,%2							\n"
+"	bne-	1b							\n"
+"\t"	PPC_ACQUIRE_BARRIER "						\n"
+"2:									\n"
+	: "=&r" (prev), "=&r" (tmp)
+	: "r" (&lock->val), "r" (new), "r" (_Q_TAIL_CPU_MASK),
+	  "i" (IS_ENABLED(CONFIG_PPC64))
+	: "cr0", "memory");
+
+	return likely(!(prev & ~_Q_TAIL_CPU_MASK));
+}
+
+static __always_inline int queued_spin_trylock(struct qspinlock *lock)
+{
+	if (!_Q_SPIN_TRY_LOCK_STEAL)
+		return __queued_spin_trylock_nosteal(lock);
+	else
+		return __queued_spin_trylock_steal(lock);
+}
+
 void queued_spin_lock_slowpath(struct qspinlock *lock);
 
 static __always_inline void queued_spin_lock(struct qspinlock *lock)
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index 51123240da8e..830a90a66f5f 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -24,7 +24,11 @@ struct qnodes {
 
 /* Tuning parameters */
 static int steal_spins __read_mostly = (1<<5);
+#if _Q_SPIN_TRY_LOCK_STEAL == 1
+static const bool maybe_stealers = true;
+#else
 static bool maybe_stealers __read_mostly = true;
+#endif
 static int head_spins __read_mostly = (1<<8);
 
 static bool pv_yield_owner __read_mostly = true;
@@ -527,6 +531,10 @@ void pv_spinlocks_init(void)
 #include <linux/debugfs.h>
 static int steal_spins_set(void *data, u64 val)
 {
+#if _Q_SPIN_TRY_LOCK_STEAL == 1
+	/* MAYBE_STEAL remains true */
+	steal_spins = val;
+#else
 	static DEFINE_MUTEX(lock);
 
 	/*
@@ -551,6 +559,7 @@ static int steal_spins_set(void *data, u64 val)
 		steal_spins = val;
 	}
 	mutex_unlock(&lock);
+#endif
 
 	return 0;
 }
-- 
2.37.2



More information about the Linuxppc-dev mailing list