[RFC PATCH 1/2]powerpc: foundation code to handle CR5 for local_t
Madhavan Srinivasan
maddy at linux.vnet.ibm.com
Thu Nov 27 23:18:40 AEDT 2014
This patch create the infrastructure to handle the CR based
local_* atomic operations. Local atomic operations are fast
and highly reentrant per CPU counters. Used for percpu
variable updates. Local atomic operations only guarantee
variable modification atomicity wrt the CPU which owns the
data and these needs to be executed in a preemption safe way.
Here is the design of this patch. Since local_* operations
are only need to be atomic to interrupts (IIUC), patch uses
one of the Condition Register (CR) fields as a flag variable. When
entering the local_*, specific bit in the CR5 field is set
and on exit, bit is cleared. CR bit checking is done in the
interrupt return path. If CR5[EQ] bit set and if we return
to kernel, we reset to start of local_* operation.
Reason for this approach is that, currently l[w/d]arx/st[w/d]cx.
instruction pair is used for local_* operations, which are heavy
on cycle count and they dont support a local variant. So to
see whether the new implementation helps, used a modified
version of Rusty's benchmark code on local_t.
https://lkml.org/lkml/2008/12/16/450
Modifications:
- increated the working set size from 1MB to 8MB,
- removed cpu_local_inc test.
Test ran
- on POWER8 1S Scale out System 2.0GHz
- on OPAL v3 with v3.18-rc4 patch kernel as Host
Here are the values with the patch.
Time in ns per iteration
inc add read add_return
atomic_long 67 67 18 69
irqsave/rest 39 39 23 39
trivalue 39 39 29 49
local_t 26 26 24 26
Since CR5 is used as a flag, have added CFLAGS to avoid CR5
for the kernel compilation and CR5 is zeroed at the kernel
entry.
Tested the patch in a
- pSeries LPAR,
- Host with patched/unmodified guest kernel
To check whether userspace see any CR5 corruption, ran a simple
test which does,
- set CR5 field,
- while(1)
- sleep or gettimeofday
- chk bit set
Signed-off-by: Madhavan Srinivasan <maddy at linux.vnet.ibm.com>
---
- I really appreciate feedback on the patchset.
- Kindly comment if I should try with any other benchmark or
workload to check the numbers.
- Also, kindly recommand any know stress test for CR
Makefile | 6 ++
arch/powerpc/include/asm/exception-64s.h | 21 +++++-
arch/powerpc/kernel/entry_64.S | 106 ++++++++++++++++++++++++++++++-
arch/powerpc/kernel/exceptions-64s.S | 2 +-
arch/powerpc/kernel/head_64.S | 8 +++
5 files changed, 138 insertions(+), 5 deletions(-)
diff --git a/Makefile b/Makefile
index 00d618b..2e271ad 100644
--- a/Makefile
+++ b/Makefile
@@ -706,6 +706,12 @@ endif
KBUILD_CFLAGS += $(call cc-option, -fno-var-tracking-assignments)
+ifdef CONFIG_PPC64
+# We need this flag to force compiler not to use CR5, since
+# local_t type code is based on this.
+KBUILD_CFLAGS += -ffixed-cr5
+endif
+
ifdef CONFIG_DEBUG_INFO
ifdef CONFIG_DEBUG_INFO_SPLIT
KBUILD_CFLAGS += $(call cc-option, -gsplit-dwarf, -g)
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 77f52b2..c42919a 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -306,7 +306,26 @@ do_kvm_##n: \
std r10,0(r1); /* make stack chain pointer */ \
std r0,GPR0(r1); /* save r0 in stackframe */ \
std r10,GPR1(r1); /* save r1 in stackframe */ \
- beq 4f; /* if from kernel mode */ \
+BEGIN_FTR_SECTION; \
+ lis r9,4096; /* Create a mask with HV and PR */ \
+ rldicr r9,r9,32,31; /* bits, AND with the MSR */ \
+ mr r10,r9; /* to check for Hyp state */ \
+ ori r9,r9,16384; \
+ and r9,r12,r9; \
+ cmpd cr3,r10,r9; \
+ beq cr3,66f; /* Jump if we come from Hyp mode*/ \
+ mtcrf 0x04,r10; /* Clear CR5 if coming from usr */ \
+FTR_SECTION_ELSE; \
+ beq 4f; /* if kernel mode branch */ \
+ li r10,0; /* Clear CR5 incase of coming */ \
+ mtcrf 0x04,r10; /* from user. */ \
+ nop; /* This part of code is for */ \
+ nop; /* kernel with MSR[HV]=0, */ \
+ nop; /* MSR[PR]=0, so just chk for */ \
+ nop; /* MSR[PR] */ \
+ nop; \
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE); \
+66: beq 4f; /* if from kernel mode */ \
ACCOUNT_CPU_USER_ENTRY(r9, r10); \
SAVE_PPR(area, r9, r10); \
4: EXCEPTION_PROLOG_COMMON_2(area) \
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 0905c8d..e42bb99 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -68,7 +68,26 @@ system_call_common:
2: std r2,GPR2(r1)
std r3,GPR3(r1)
mfcr r2
- std r4,GPR4(r1)
+BEGIN_FTR_SECTION
+ lis r10,4096
+ rldicr r10,r10,32,31
+ mr r11,r10
+ ori r10,r10,16384
+ and r10,r12,r10
+ cmpd r11,r10
+ beq 67f
+ mtcrf 0x04,r11
+FTR_SECTION_ELSE
+ beq 67f
+ li r11,0
+ mtcrf 0x04,r11
+ nop
+ nop
+ nop
+ nop
+ nop
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
+67: std r4,GPR4(r1)
std r5,GPR5(r1)
std r6,GPR6(r1)
std r7,GPR7(r1)
@@ -224,8 +243,26 @@ syscall_exit:
BEGIN_FTR_SECTION
stdcx. r0,0,r1 /* to clear the reservation */
END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+BEGIN_FTR_SECTION
+ lis r4,4096
+ rldicr r4,r4,32,31
+ mr r6,r4
+ ori r4,r4,16384
+ and r4,r8,r4
+ cmpd cr3,r6,r4
+ beq cr3,65f
+ mtcr r5
+FTR_SECTION_ELSE
andi. r6,r8,MSR_PR
- ld r4,_LINK(r1)
+ beq 65f
+ mtcr r5
+ nop
+ nop
+ nop
+ nop
+ nop
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
+65: ld r4,_LINK(r1)
beq- 1f
ACCOUNT_CPU_USER_EXIT(r11, r12)
@@ -234,7 +271,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
1: ld r2,GPR2(r1)
ld r1,GPR1(r1)
mtlr r4
+#ifdef CONFIG_PPC64
+ mtcrf 0xFB,r5
+#else
mtcr r5
+#endif
mtspr SPRN_SRR0,r7
mtspr SPRN_SRR1,r8
RFI
@@ -804,7 +845,66 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
*/
.globl fast_exception_return
fast_exception_return:
- ld r3,_MSR(r1)
+
+ /*
+ * Now that we are about to exit from interrupt, lets check for
+ * cr5 eq bit. If it is set, then we may be in the middle of
+ * local_t update. In this case, we should rewind the NIP
+ * accordingly.
+ */
+ mfcr r3
+ andi. r4,r3,0x200
+ beq 63f
+
+ /*
+ * Now that the bit is set, lets check for return to User
+ */
+ ld r4,_MSR(r1)
+BEGIN_FTR_SECTION
+ li r3,4096
+ rldicr r3,r3,32,31
+ mr r5,r3
+ ori r3,r3,16384
+ and r3,r4,r3
+ cmpd r5,r3
+ bne 63f
+FTR_SECTION_ELSE
+ andi. r3,r4,MSR_PR
+ bne 63f
+ nop
+ nop
+ nop
+ nop
+ nop
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
+
+ /*
+ * Looks like we are returning to Kernel, so
+ * lets get the NIP and search the ex_table.
+ * Change the NIP based on the return value
+ */
+lookup_ex_table:
+ ld r3,_NIP(r1)
+ bl search_exception_tables
+ cmpli 0,1,r3,0
+ bne 62f
+
+ /*
+ * This is a panic case. Reason is that, we
+ * have the CR5 bit set, but we are not in
+ * local_* code and we are returning to Kernel.
+ */
+ ld r3,_NIP(r1)
+ mfcr r4
+ EMIT_BUG_ENTRY lookup_ex_table, __FILE__,__LINE__,BUGFLAG_WARNING
+
+ /*
+ * Now save the return fixup address as NIP
+ */
+62: ld r4,8(r3)
+ std r4,_NIP(r1)
+ crclr 22
+63: ld r3,_MSR(r1)
ld r4,_CTR(r1)
ld r0,_LINK(r1)
mtctr r4
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 72e783e..edb75a9 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -637,7 +637,7 @@ masked_##_H##interrupt: \
rldicl r10,r10,48,1; /* clear MSR_EE */ \
rotldi r10,r10,16; \
mtspr SPRN_##_H##SRR1,r10; \
-2: mtcrf 0x80,r9; \
+2: mtcrf 0x90,r9; \
ld r9,PACA_EXGEN+EX_R9(r13); \
ld r10,PACA_EXGEN+EX_R10(r13); \
ld r11,PACA_EXGEN+EX_R11(r13); \
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index d48125d..02e49b3 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -347,6 +347,14 @@ __mmu_off:
*
*/
__start_initialization_multiplatform:
+
+ /*
+ * Before we do anything, lets clear CR5 field,
+ * so that we will have a clean start at entry
+ */
+ li r11,0
+ mtcrf 0x04,r11
+
/* Make sure we are running in 64 bits mode */
bl enable_64b_mode
--
1.9.1
More information about the Linuxppc-dev
mailing list