[PATCH] minor lock improvements

jschopp at austin.ibm.com jschopp at austin.ibm.com
Wed Dec 17 07:22:09 EST 2003


I've attached a patch that should help locks be just a smidge faster on
ppc64 machines.  I am not a performance guy so I ran the only benchmark I
had handy (sdet) which I am unfortunatly not allowed to publish a number
on to show the increase.  I got a overall throughput increase of .436%,
with a confidence of 95% that the increase is between .232% and .639%.  I
would expect other tests to show larger improvements (performance guys
welcome to help me out here).

The patch needs some feedback (comments in code show where) on how to do
a couple things correctly.
-------------- next part --------------
diff -Nru a/include/asm-ppc64/ppc_asm.h b/include/asm-ppc64/ppc_asm.h
--- a/include/asm-ppc64/ppc_asm.h	Tue Dec 16 14:15:20 2003
+++ b/include/asm-ppc64/ppc_asm.h	Tue Dec 16 14:15:20 2003
@@ -44,10 +44,25 @@
 	ld	ra,PACALPPACA+LPPACAANYINT(rb); /* Get pending interrupt flags */\
 	cmpldi	0,ra,0;

-/* Macros to adjust thread priority for Iseries hardware multithreading */
+/* Macros to adjust thread priority for RPA hardware multithreading
+ * and iSeries hardware nultithreading.  This way is kidof hackish,
+ * looking for suggestions on how to do it better. Joel S.
+ */
+#ifdef CONFIG_HMT
 #define HMT_LOW		or 1,1,1
 #define HMT_MEDIUM	or 2,2,2
 #define HMT_HIGH	or 3,3,3
+#else /* CONFIG_HMT */
+#ifdef CONFIG_PPC_ISERIES
+#define HMT_LOW		or 1,1,1
+#define HMT_MEDIUM	or 2,2,2
+#define HMT_HIGH	or 3,3,3
+#else /* CONFIG_PPC_ISERES */
+#define HMT_LOW
+#define HMT_MEDIUM
+#define HMT_HIGH
+#endif /* CONFIG_PPC_ISERIES */
+#endif /* CONFIG_HMT */

 /* Insert the high 32 bits of the MSR into what will be the new
    MSR (via SRR1 and rfid)  This preserves the MSR.SF and MSR.ISF
diff -Nru a/include/asm-ppc64/spinlock.h b/include/asm-ppc64/spinlock.h
--- a/include/asm-ppc64/spinlock.h	Tue Dec 16 14:15:20 2003
+++ b/include/asm-ppc64/spinlock.h	Tue Dec 16 14:15:20 2003
@@ -22,7 +22,18 @@
  * locking when running on an RPA platform.  As we do more performance
  * tuning, I would expect this selection mechanism to change.  Dave E.
  */
+/* XXX- Need some way to test if SPLPAR is possible on this machine
+ * this way is kindof hackish. HMT and SPLPAR don't really have anything
+ * to do with eachother.  Open for suggestions.  Joel S.
+ */
+#ifdef CONFIG_PPC_PSERIES
+#ifndef CONFIG_HMT
+#undef SPLPAR_LOCKS
+#else /* CONIFG_HMT is defined */
 #define SPLPAR_LOCKS
+#endif /* CONFIG_HMT */
+#endif /* CONFIG_PPC_PSERIES */
+
 #define HVSC			".long 0x44000022\n"

 typedef struct {
@@ -107,7 +118,7 @@
 	unsigned long tmp, tmp2;

 	__asm__ __volatile__(
-	"b		2f		# spin_lock\n\
+	"b		3f		# spin_lock\n\
 1:"
 	HMT_LOW
 "       ldx		%0,0,%2         # load the lock value\n\
@@ -127,11 +138,12 @@
 "        b               1b\n\
 2: \n"
 	HMT_MEDIUM
-" 	ldarx		%0,0,%2\n\
+"3: \n\
+ 	ldarx		%0,0,%2\n\
 	cmpdi		0,%0,0\n\
 	bne-		1b\n\
 	stdcx.		13,0,%2\n\
-	bne-		2b\n\
+	bne-		3b\n\
 	isync"
 	: "=&r"(tmp), "=&r"(tmp2)
 	: "r"(&lock->lock)
@@ -148,10 +160,10 @@
 	HMT_LOW
 "       ldx		%0,0,%1         # load the lock value\n\
 	cmpdi		0,%0,0          # if not locked, try to acquire\n\
-	bne+		1b\n\
-2: \n"
+	bne+		1b\n"
 	HMT_MEDIUM
-" 	ldarx		%0,0,%1\n\
+"2: \n\
+ 	ldarx		%0,0,%1\n\
 	cmpdi		0,%0,0\n\
 	bne-		1b\n\
 	stdcx.		13,0,%1\n\
@@ -224,7 +236,7 @@
 	unsigned long tmp, tmp2;

 	__asm__ __volatile__(
-	"b		2f		# read_lock\n\
+	"b		3f		# read_lock\n\
 1:"
 	HMT_LOW
 "	ldx		%0,0,%2\n\
@@ -247,11 +259,12 @@
         sc                              # do the hcall \n\
 2: \n"
 	HMT_MEDIUM
-" 	ldarx		%0,0,%2\n\
+"3:\n\
+ 	ldarx		%0,0,%2\n\
 	addic.		%0,%0,1\n\
 	ble-		1b\n\
 	stdcx.		%0,0,%2\n\
-	bne-		2b\n\
+	bne-		3b\n\
 	isync"
 	: "=&r"(tmp), "=&r"(tmp2)
 	: "r"(&rw->lock)
@@ -265,7 +278,7 @@
 	unsigned long tmp, tmp2;

 	__asm__ __volatile__(
-	"b		2f		# read_lock\n\
+	"b		3f		# read_lock\n\
 1:"
 	HMT_LOW
 "	ldx		%0,0,%2\n\
@@ -284,11 +297,12 @@
 	HVSC
 "2: \n"
 	HMT_MEDIUM
-" 	ldarx		%0,0,%2\n\
+"3: \n\
+ 	ldarx		%0,0,%2\n\
 	addic.		%0,%0,1\n\
 	ble-		1b\n\
 	stdcx.		%0,0,%2\n\
-	bne-		2b\n\
+	bne-		3b\n\
 	isync"
 	: "=&r"(tmp), "=&r"(tmp2)
 	: "r"(&rw->lock)
@@ -305,10 +319,10 @@
 	HMT_LOW
 "	ldx		%0,0,%1\n\
 	cmpdi		0,%0,0\n\
-	blt+		1b\n\
-2: \n"
+	blt+		1b\n"
 	HMT_MEDIUM
-" 	ldarx		%0,0,%1\n\
+"2: \n\
+ 	ldarx		%0,0,%1\n\
 	addic.		%0,%0,1\n\
 	ble-		1b\n\
 	stdcx.		%0,0,%1\n\
@@ -363,7 +377,7 @@
 	unsigned long tmp, tmp2;

 	__asm__ __volatile__(
-	"b		2f		# spin_lock\n\
+	"b		3f		# spin_lock\n\
 1:"
 	HMT_LOW
 "       ldx		%0,0,%2         # load the lock value\n\
@@ -387,11 +401,12 @@
         sc                              # do the hcall \n\
 2: \n"
 	HMT_MEDIUM
-" 	ldarx		%0,0,%2\n\
+"3: \n\
+ 	ldarx		%0,0,%2\n\
 	cmpdi		0,%0,0\n\
 	bne-		1b\n\
 	stdcx.		13,0,%2\n\
-	bne-		2b\n\
+	bne-		3b\n\
 	isync"
 	: "=&r"(tmp), "=&r"(tmp2)
 	: "r"(&rw->lock)
@@ -405,7 +420,7 @@
 	unsigned long tmp, tmp2;

 	__asm__ __volatile__(
-	"b		2f		# spin_lock\n\
+	"b		3f		# spin_lock\n\
 1:"
 	HMT_LOW
 "       ldx		%0,0,%2         # load the lock value\n\
@@ -427,11 +442,12 @@
 "        b               1b\n\
 2: \n"
 	HMT_MEDIUM
-" 	ldarx		%0,0,%2\n\
+"3: \n\
+ 	ldarx		%0,0,%2\n\
 	cmpdi		0,%0,0\n\
 	bne-		1b\n\
 	stdcx.		13,0,%2\n\
-	bne-		2b\n\
+	bne-		3b\n\
 	isync"
 	: "=&r"(tmp), "=&r"(tmp2)
 	: "r"(&rw->lock)
@@ -443,7 +459,7 @@
 	unsigned long tmp;

 	__asm__ __volatile__(
-	"b		2f		# spin_lock\n\
+	"b		3f		# spin_lock\n\
 1:"
 	HMT_LOW
 "       ldx		%0,0,%1         # load the lock value\n\
@@ -451,11 +467,12 @@
 	bne+		1b\n\
 2: \n"
 	HMT_MEDIUM
-" 	ldarx		%0,0,%1\n\
+"3: \n\
+ 	ldarx		%0,0,%1\n\
 	cmpdi		0,%0,0\n\
 	bne-		1b\n\
 	stdcx.		13,0,%1\n\
-	bne-		2b\n\
+	bne-		3b\n\
 	isync"
 	: "=&r"(tmp)
 	: "r"(&rw->lock)


More information about the Linuxppc64-dev mailing list