[RFC] powerpc: use ticket spin lock for !CONFIG_PPC_SPLPAR

Kevin Hao haokexin at gmail.com
Thu Mar 12 21:55:23 AEDT 2015


I know Torsten Duwe has tried to add the ticket spinlock for powerpc
one year ago [1]. But it make no progress due to the conflict between
PPC_SPLPAR and lockref. We still don't find a better way to handle
this. But instead of waiting forever for a perfect solution, can't we
just use the ticket spinlock for the !CONFIG_PPC_SPLPAR?

This is a very rough patch based on arm64 codes. I want to make sure
that this is acceptable before going step further. This just passed
build and boot test on a fsl t4240rdb board. I have done a simple
performance benchmark by running the following command ten times before
and after applying this patch:
	./perf bench sched messaging

                                Before        After
    Averaged total time [sec]:  0.403         0.367

So we can see a ~9% performance enhancing. This patch depends on this
one [2].

[1] https://lists.ozlabs.org/pipermail/linuxppc-dev/2014-February/115195.html
[2] http://patchwork.ozlabs.org/patch/447563/

Signed-off-by: Kevin Hao <haokexin at gmail.com>
---
 arch/powerpc/include/asm/spinlock.h       | 79 ++++++++++++++++++++++++++++++-
 arch/powerpc/include/asm/spinlock_types.h | 16 +++++++
 arch/powerpc/lib/locks.c                  |  2 +-
 3 files changed, 95 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index d303cdad2519..3faf2507abe9 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -54,6 +54,7 @@
 #define SYNC_IO
 #endif
 
+#ifdef CONFIG_PPC_SPLPAR
 static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 {
 	return lock.slock == 0;
@@ -89,6 +90,40 @@ static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock)
 	return tmp;
 }
 
+#else
+static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+	return lock.owner == lock.next;
+}
+
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+	return !arch_spin_value_unlocked(READ_ONCE(*lock));
+}
+
+static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock)
+{
+	unsigned int tmp;
+	arch_spinlock_t lockval;
+
+	__asm__ __volatile__ (
+"1:	" PPC_LWARX(%0,0,%2,1) "\n\
+	rotlwi		%1,%0,16\n\
+	xor.		%1,%1,%0\n\
+	bne-		2f\n\
+	add		%0,%0,%3\n\
+	stwcx.		%0,0,%2\n\
+	bne-		1b\n"
+	PPC_ACQUIRE_BARRIER
+"2:"
+	: "=&r" (lockval), "=&r" (tmp)
+	: "r" (lock), "r" (1 << TICKET_SHIFT)
+	: "cr0", "memory");
+
+	return tmp;
+}
+#endif
+
 static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
 	CLEAR_IO_SYNC;
@@ -120,6 +155,7 @@ extern void __rw_yield(arch_rwlock_t *lock);
 #define SHARED_PROCESSOR	0
 #endif
 
+#ifdef CONFIG_PPC_SPLPAR
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	CLEAR_IO_SYNC;
@@ -155,16 +191,57 @@ void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
 		local_irq_restore(flags_dis);
 	}
 }
+#else
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
+
+static inline int arch_spin_is_contended(arch_spinlock_t *lock)
+{
+	arch_spinlock_t lockval = READ_ONCE(*lock);
+	return (lockval.next - lockval.owner) > 1;
+}
+#define arch_spin_is_contended	arch_spin_is_contended
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+	unsigned int tmp;
+	arch_spinlock_t lockval;
+
+	CLEAR_IO_SYNC;
+	__asm__ __volatile__ (
+"1:	" PPC_LWARX(%0,0,%2,1) "\n\
+	add		%1,%0,%4\n\
+	stwcx.		%1,0,%2\n\
+	bne-		1b\n\
+	rotlwi		%1,%0,16\n\
+	cmpw		%1,%0\n\
+	beq		3f\n\
+	rlwinm		%0,%0,16,16,31\n\
+2:	or		1,1,1\n\
+	lhz		%1,0(%3)\n\
+	cmpw		%1,%0\n\
+	bne		2b\n\
+	or		2,2,2\n\
+3:"
+	PPC_ACQUIRE_BARRIER
+	: "=&r" (lockval), "=&r" (tmp)
+	: "r"(lock), "r" (&lock->owner), "r" (1 << TICKET_SHIFT)
+	: "cr0", "memory");
+}
+#endif
 
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
 	SYNC_IO;
 	__asm__ __volatile__("# arch_spin_unlock\n\t"
 				PPC_RELEASE_BARRIER: : :"memory");
+#ifdef CONFIG_PPC_SPLPAR
 	lock->slock = 0;
+#else
+	lock->owner++;
+#endif
 }
 
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_SPLPAR
 extern void arch_spin_unlock_wait(arch_spinlock_t *lock);
 #else
 #define arch_spin_unlock_wait(lock) \
diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h
index 2351adc4fdc4..1af94f290363 100644
--- a/arch/powerpc/include/asm/spinlock_types.h
+++ b/arch/powerpc/include/asm/spinlock_types.h
@@ -5,11 +5,27 @@
 # error "please don't include this file directly"
 #endif
 
+#ifdef CONFIG_PPC_SPLPAR
 typedef struct {
 	volatile unsigned int slock;
 } arch_spinlock_t;
 
 #define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
+#else
+#define TICKET_SHIFT	16
+
+typedef struct {
+#ifdef __BIG_ENDIAN__
+	u16 next;
+	u16 owner;
+#else
+	u16 owner;
+	u16 next;
+#endif
+} __aligned(4) arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 , 0 }
+#endif /*CONFIG_PPC_SPLPAR*/
 
 typedef struct {
 	volatile signed int lock;
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 170a0346f756..fe3d21eeb10d 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -66,7 +66,6 @@ void __rw_yield(arch_rwlock_t *rw)
 	plpar_hcall_norets(H_CONFER,
 		get_hard_smp_processor_id(holder_cpu), yield_count);
 }
-#endif
 
 void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
@@ -83,3 +82,4 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock)
 }
 
 EXPORT_SYMBOL(arch_spin_unlock_wait);
+#endif
-- 
2.1.0



More information about the Linuxppc-dev mailing list