[PATCH] minor lock improvements
jschopp at austin.ibm.com
jschopp at austin.ibm.com
Wed Dec 17 07:22:09 EST 2003
I've attached a patch that should help locks be just a smidge faster on
ppc64 machines. I am not a performance guy so I ran the only benchmark I
had handy (sdet) which I am unfortunatly not allowed to publish a number
on to show the increase. I got a overall throughput increase of .436%,
with a confidence of 95% that the increase is between .232% and .639%. I
would expect other tests to show larger improvements (performance guys
welcome to help me out here).
The patch needs some feedback (comments in code show where) on how to do
a couple things correctly.
-------------- next part --------------
diff -Nru a/include/asm-ppc64/ppc_asm.h b/include/asm-ppc64/ppc_asm.h
--- a/include/asm-ppc64/ppc_asm.h Tue Dec 16 14:15:20 2003
+++ b/include/asm-ppc64/ppc_asm.h Tue Dec 16 14:15:20 2003
@@ -44,10 +44,25 @@
ld ra,PACALPPACA+LPPACAANYINT(rb); /* Get pending interrupt flags */\
cmpldi 0,ra,0;
-/* Macros to adjust thread priority for Iseries hardware multithreading */
+/* Macros to adjust thread priority for RPA hardware multithreading
+ * and iSeries hardware nultithreading. This way is kidof hackish,
+ * looking for suggestions on how to do it better. Joel S.
+ */
+#ifdef CONFIG_HMT
#define HMT_LOW or 1,1,1
#define HMT_MEDIUM or 2,2,2
#define HMT_HIGH or 3,3,3
+#else /* CONFIG_HMT */
+#ifdef CONFIG_PPC_ISERIES
+#define HMT_LOW or 1,1,1
+#define HMT_MEDIUM or 2,2,2
+#define HMT_HIGH or 3,3,3
+#else /* CONFIG_PPC_ISERES */
+#define HMT_LOW
+#define HMT_MEDIUM
+#define HMT_HIGH
+#endif /* CONFIG_PPC_ISERIES */
+#endif /* CONFIG_HMT */
/* Insert the high 32 bits of the MSR into what will be the new
MSR (via SRR1 and rfid) This preserves the MSR.SF and MSR.ISF
diff -Nru a/include/asm-ppc64/spinlock.h b/include/asm-ppc64/spinlock.h
--- a/include/asm-ppc64/spinlock.h Tue Dec 16 14:15:20 2003
+++ b/include/asm-ppc64/spinlock.h Tue Dec 16 14:15:20 2003
@@ -22,7 +22,18 @@
* locking when running on an RPA platform. As we do more performance
* tuning, I would expect this selection mechanism to change. Dave E.
*/
+/* XXX- Need some way to test if SPLPAR is possible on this machine
+ * this way is kindof hackish. HMT and SPLPAR don't really have anything
+ * to do with eachother. Open for suggestions. Joel S.
+ */
+#ifdef CONFIG_PPC_PSERIES
+#ifndef CONFIG_HMT
+#undef SPLPAR_LOCKS
+#else /* CONIFG_HMT is defined */
#define SPLPAR_LOCKS
+#endif /* CONFIG_HMT */
+#endif /* CONFIG_PPC_PSERIES */
+
#define HVSC ".long 0x44000022\n"
typedef struct {
@@ -107,7 +118,7 @@
unsigned long tmp, tmp2;
__asm__ __volatile__(
- "b 2f # spin_lock\n\
+ "b 3f # spin_lock\n\
1:"
HMT_LOW
" ldx %0,0,%2 # load the lock value\n\
@@ -127,11 +138,12 @@
" b 1b\n\
2: \n"
HMT_MEDIUM
-" ldarx %0,0,%2\n\
+"3: \n\
+ ldarx %0,0,%2\n\
cmpdi 0,%0,0\n\
bne- 1b\n\
stdcx. 13,0,%2\n\
- bne- 2b\n\
+ bne- 3b\n\
isync"
: "=&r"(tmp), "=&r"(tmp2)
: "r"(&lock->lock)
@@ -148,10 +160,10 @@
HMT_LOW
" ldx %0,0,%1 # load the lock value\n\
cmpdi 0,%0,0 # if not locked, try to acquire\n\
- bne+ 1b\n\
-2: \n"
+ bne+ 1b\n"
HMT_MEDIUM
-" ldarx %0,0,%1\n\
+"2: \n\
+ ldarx %0,0,%1\n\
cmpdi 0,%0,0\n\
bne- 1b\n\
stdcx. 13,0,%1\n\
@@ -224,7 +236,7 @@
unsigned long tmp, tmp2;
__asm__ __volatile__(
- "b 2f # read_lock\n\
+ "b 3f # read_lock\n\
1:"
HMT_LOW
" ldx %0,0,%2\n\
@@ -247,11 +259,12 @@
sc # do the hcall \n\
2: \n"
HMT_MEDIUM
-" ldarx %0,0,%2\n\
+"3:\n\
+ ldarx %0,0,%2\n\
addic. %0,%0,1\n\
ble- 1b\n\
stdcx. %0,0,%2\n\
- bne- 2b\n\
+ bne- 3b\n\
isync"
: "=&r"(tmp), "=&r"(tmp2)
: "r"(&rw->lock)
@@ -265,7 +278,7 @@
unsigned long tmp, tmp2;
__asm__ __volatile__(
- "b 2f # read_lock\n\
+ "b 3f # read_lock\n\
1:"
HMT_LOW
" ldx %0,0,%2\n\
@@ -284,11 +297,12 @@
HVSC
"2: \n"
HMT_MEDIUM
-" ldarx %0,0,%2\n\
+"3: \n\
+ ldarx %0,0,%2\n\
addic. %0,%0,1\n\
ble- 1b\n\
stdcx. %0,0,%2\n\
- bne- 2b\n\
+ bne- 3b\n\
isync"
: "=&r"(tmp), "=&r"(tmp2)
: "r"(&rw->lock)
@@ -305,10 +319,10 @@
HMT_LOW
" ldx %0,0,%1\n\
cmpdi 0,%0,0\n\
- blt+ 1b\n\
-2: \n"
+ blt+ 1b\n"
HMT_MEDIUM
-" ldarx %0,0,%1\n\
+"2: \n\
+ ldarx %0,0,%1\n\
addic. %0,%0,1\n\
ble- 1b\n\
stdcx. %0,0,%1\n\
@@ -363,7 +377,7 @@
unsigned long tmp, tmp2;
__asm__ __volatile__(
- "b 2f # spin_lock\n\
+ "b 3f # spin_lock\n\
1:"
HMT_LOW
" ldx %0,0,%2 # load the lock value\n\
@@ -387,11 +401,12 @@
sc # do the hcall \n\
2: \n"
HMT_MEDIUM
-" ldarx %0,0,%2\n\
+"3: \n\
+ ldarx %0,0,%2\n\
cmpdi 0,%0,0\n\
bne- 1b\n\
stdcx. 13,0,%2\n\
- bne- 2b\n\
+ bne- 3b\n\
isync"
: "=&r"(tmp), "=&r"(tmp2)
: "r"(&rw->lock)
@@ -405,7 +420,7 @@
unsigned long tmp, tmp2;
__asm__ __volatile__(
- "b 2f # spin_lock\n\
+ "b 3f # spin_lock\n\
1:"
HMT_LOW
" ldx %0,0,%2 # load the lock value\n\
@@ -427,11 +442,12 @@
" b 1b\n\
2: \n"
HMT_MEDIUM
-" ldarx %0,0,%2\n\
+"3: \n\
+ ldarx %0,0,%2\n\
cmpdi 0,%0,0\n\
bne- 1b\n\
stdcx. 13,0,%2\n\
- bne- 2b\n\
+ bne- 3b\n\
isync"
: "=&r"(tmp), "=&r"(tmp2)
: "r"(&rw->lock)
@@ -443,7 +459,7 @@
unsigned long tmp;
__asm__ __volatile__(
- "b 2f # spin_lock\n\
+ "b 3f # spin_lock\n\
1:"
HMT_LOW
" ldx %0,0,%1 # load the lock value\n\
@@ -451,11 +467,12 @@
bne+ 1b\n\
2: \n"
HMT_MEDIUM
-" ldarx %0,0,%1\n\
+"3: \n\
+ ldarx %0,0,%1\n\
cmpdi 0,%0,0\n\
bne- 1b\n\
stdcx. 13,0,%1\n\
- bne- 2b\n\
+ bne- 3b\n\
isync"
: "=&r"(tmp)
: "r"(&rw->lock)
More information about the Linuxppc64-dev
mailing list