[RFC PATCH 1/2]powerpc: foundation code to handle CR5 for local_t
Benjamin Herrenschmidt
benh at kernel.crashing.org
Fri Nov 28 11:56:40 AEDT 2014
On Thu, 2014-11-27 at 17:48 +0530, Madhavan Srinivasan wrote:
> This patch create the infrastructure to handle the CR based
> local_* atomic operations. Local atomic operations are fast
> and highly reentrant per CPU counters. Used for percpu
> variable updates. Local atomic operations only guarantee
> variable modification atomicity wrt the CPU which owns the
> data and these needs to be executed in a preemption safe way.
>
> Here is the design of this patch. Since local_* operations
> are only need to be atomic to interrupts (IIUC), patch uses
> one of the Condition Register (CR) fields as a flag variable. When
> entering the local_*, specific bit in the CR5 field is set
> and on exit, bit is cleared. CR bit checking is done in the
> interrupt return path. If CR5[EQ] bit set and if we return
> to kernel, we reset to start of local_* operation.
>
> Reason for this approach is that, currently l[w/d]arx/st[w/d]cx.
> instruction pair is used for local_* operations, which are heavy
> on cycle count and they dont support a local variant. So to
> see whether the new implementation helps, used a modified
> version of Rusty's benchmark code on local_t.
>
> https://lkml.org/lkml/2008/12/16/450
>
> Modifications:
> - increated the working set size from 1MB to 8MB,
> - removed cpu_local_inc test.
>
> Test ran
> - on POWER8 1S Scale out System 2.0GHz
> - on OPAL v3 with v3.18-rc4 patch kernel as Host
>
> Here are the values with the patch.
>
> Time in ns per iteration
>
> inc add read add_return
> atomic_long 67 67 18 69
> irqsave/rest 39 39 23 39
> trivalue 39 39 29 49
> local_t 26 26 24 26
>
> Since CR5 is used as a flag, have added CFLAGS to avoid CR5
> for the kernel compilation and CR5 is zeroed at the kernel
> entry.
>
> Tested the patch in a
> - pSeries LPAR,
> - Host with patched/unmodified guest kernel
>
> To check whether userspace see any CR5 corruption, ran a simple
> test which does,
> - set CR5 field,
> - while(1)
> - sleep or gettimeofday
> - chk bit set
>
> Signed-off-by: Madhavan Srinivasan <maddy at linux.vnet.ibm.com>
> ---
> - I really appreciate feedback on the patchset.
> - Kindly comment if I should try with any other benchmark or
> workload to check the numbers.
> - Also, kindly recommand any know stress test for CR
>
> Makefile | 6 ++
> arch/powerpc/include/asm/exception-64s.h | 21 +++++-
> arch/powerpc/kernel/entry_64.S | 106 ++++++++++++++++++++++++++++++-
> arch/powerpc/kernel/exceptions-64s.S | 2 +-
> arch/powerpc/kernel/head_64.S | 8 +++
> 5 files changed, 138 insertions(+), 5 deletions(-)
>
> diff --git a/Makefile b/Makefile
> index 00d618b..2e271ad 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -706,6 +706,12 @@ endif
>
> KBUILD_CFLAGS += $(call cc-option, -fno-var-tracking-assignments)
>
> +ifdef CONFIG_PPC64
> +# We need this flag to force compiler not to use CR5, since
> +# local_t type code is based on this.
> +KBUILD_CFLAGS += -ffixed-cr5
> +endif
> +
> ifdef CONFIG_DEBUG_INFO
> ifdef CONFIG_DEBUG_INFO_SPLIT
> KBUILD_CFLAGS += $(call cc-option, -gsplit-dwarf, -g)
> diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
> index 77f52b2..c42919a 100644
> --- a/arch/powerpc/include/asm/exception-64s.h
> +++ b/arch/powerpc/include/asm/exception-64s.h
> @@ -306,7 +306,26 @@ do_kvm_##n: \
> std r10,0(r1); /* make stack chain pointer */ \
> std r0,GPR0(r1); /* save r0 in stackframe */ \
> std r10,GPR1(r1); /* save r1 in stackframe */ \
> - beq 4f; /* if from kernel mode */ \
> +BEGIN_FTR_SECTION; \
> + lis r9,4096; /* Create a mask with HV and PR */ \
> + rldicr r9,r9,32,31; /* bits, AND with the MSR */ \
> + mr r10,r9; /* to check for Hyp state */ \
> + ori r9,r9,16384; \
> + and r9,r12,r9; \
> + cmpd cr3,r10,r9; \
> + beq cr3,66f; /* Jump if we come from Hyp mode*/ \
> + mtcrf 0x04,r10; /* Clear CR5 if coming from usr */ \
> +FTR_SECTION_ELSE; \
Can't we just unconditionally clear at as long as we do that after we've
saved it ? In that case, it's just a matter for the fixup code to check
the saved version rather than the actual CR..
> + beq 4f; /* if kernel mode branch */ \
> + li r10,0; /* Clear CR5 incase of coming */ \
> + mtcrf 0x04,r10; /* from user. */ \
> + nop; /* This part of code is for */ \
> + nop; /* kernel with MSR[HV]=0, */ \
> + nop; /* MSR[PR]=0, so just chk for */ \
> + nop; /* MSR[PR] */ \
> + nop; \
> +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE); \
> +66: beq 4f; /* if from kernel mode */ \
> ACCOUNT_CPU_USER_ENTRY(r9, r10); \
> SAVE_PPR(area, r9, r10); \
> 4: EXCEPTION_PROLOG_COMMON_2(area) \
> diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> index 0905c8d..e42bb99 100644
> --- a/arch/powerpc/kernel/entry_64.S
> +++ b/arch/powerpc/kernel/entry_64.S
> @@ -68,7 +68,26 @@ system_call_common:
> 2: std r2,GPR2(r1)
> std r3,GPR3(r1)
> mfcr r2
> - std r4,GPR4(r1)
> +BEGIN_FTR_SECTION
> + lis r10,4096
> + rldicr r10,r10,32,31
> + mr r11,r10
> + ori r10,r10,16384
> + and r10,r12,r10
> + cmpd r11,r10
> + beq 67f
> + mtcrf 0x04,r11
> +FTR_SECTION_ELSE
> + beq 67f
> + li r11,0
> + mtcrf 0x04,r11
> + nop
> + nop
> + nop
> + nop
> + nop
> +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
> +67: std r4,GPR4(r1)
> std r5,GPR5(r1)
> std r6,GPR6(r1)
> std r7,GPR7(r1)
> @@ -224,8 +243,26 @@ syscall_exit:
> BEGIN_FTR_SECTION
> stdcx. r0,0,r1 /* to clear the reservation */
> END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
> +BEGIN_FTR_SECTION
> + lis r4,4096
> + rldicr r4,r4,32,31
> + mr r6,r4
> + ori r4,r4,16384
> + and r4,r8,r4
> + cmpd cr3,r6,r4
> + beq cr3,65f
> + mtcr r5
> +FTR_SECTION_ELSE
> andi. r6,r8,MSR_PR
> - ld r4,_LINK(r1)
> + beq 65f
> + mtcr r5
> + nop
> + nop
> + nop
> + nop
> + nop
> +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
> +65: ld r4,_LINK(r1)
>
> beq- 1f
> ACCOUNT_CPU_USER_EXIT(r11, r12)
> @@ -234,7 +271,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
> 1: ld r2,GPR2(r1)
> ld r1,GPR1(r1)
> mtlr r4
> +#ifdef CONFIG_PPC64
> + mtcrf 0xFB,r5
> +#else
> mtcr r5
> +#endif
> mtspr SPRN_SRR0,r7
> mtspr SPRN_SRR1,r8
> RFI
> @@ -804,7 +845,66 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
> */
> .globl fast_exception_return
> fast_exception_return:
> - ld r3,_MSR(r1)
> +
> + /*
> + * Now that we are about to exit from interrupt, lets check for
> + * cr5 eq bit. If it is set, then we may be in the middle of
> + * local_t update. In this case, we should rewind the NIP
> + * accordingly.
> + */
> + mfcr r3
> + andi. r4,r3,0x200
> + beq 63f
> +
> + /*
> + * Now that the bit is set, lets check for return to User
> + */
> + ld r4,_MSR(r1)
> +BEGIN_FTR_SECTION
> + li r3,4096
> + rldicr r3,r3,32,31
> + mr r5,r3
> + ori r3,r3,16384
> + and r3,r4,r3
> + cmpd r5,r3
> + bne 63f
> +FTR_SECTION_ELSE
> + andi. r3,r4,MSR_PR
> + bne 63f
> + nop
> + nop
> + nop
> + nop
> + nop
> +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
> +
> + /*
> + * Looks like we are returning to Kernel, so
> + * lets get the NIP and search the ex_table.
> + * Change the NIP based on the return value
> + */
> +lookup_ex_table:
> + ld r3,_NIP(r1)
> + bl search_exception_tables
> + cmpli 0,1,r3,0
> + bne 62f
> +
> + /*
> + * This is a panic case. Reason is that, we
> + * have the CR5 bit set, but we are not in
> + * local_* code and we are returning to Kernel.
> + */
> + ld r3,_NIP(r1)
> + mfcr r4
> + EMIT_BUG_ENTRY lookup_ex_table, __FILE__,__LINE__,BUGFLAG_WARNING
> +
> + /*
> + * Now save the return fixup address as NIP
> + */
> +62: ld r4,8(r3)
> + std r4,_NIP(r1)
> + crclr 22
> +63: ld r3,_MSR(r1)
> ld r4,_CTR(r1)
> ld r0,_LINK(r1)
> mtctr r4
> diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
> index 72e783e..edb75a9 100644
> --- a/arch/powerpc/kernel/exceptions-64s.S
> +++ b/arch/powerpc/kernel/exceptions-64s.S
> @@ -637,7 +637,7 @@ masked_##_H##interrupt: \
> rldicl r10,r10,48,1; /* clear MSR_EE */ \
> rotldi r10,r10,16; \
> mtspr SPRN_##_H##SRR1,r10; \
> -2: mtcrf 0x80,r9; \
> +2: mtcrf 0x90,r9; \
> ld r9,PACA_EXGEN+EX_R9(r13); \
> ld r10,PACA_EXGEN+EX_R10(r13); \
> ld r11,PACA_EXGEN+EX_R11(r13); \
> diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
> index d48125d..02e49b3 100644
> --- a/arch/powerpc/kernel/head_64.S
> +++ b/arch/powerpc/kernel/head_64.S
> @@ -347,6 +347,14 @@ __mmu_off:
> *
> */
> __start_initialization_multiplatform:
> +
> + /*
> + * Before we do anything, lets clear CR5 field,
> + * so that we will have a clean start at entry
> + */
> + li r11,0
> + mtcrf 0x04,r11
> +
> /* Make sure we are running in 64 bits mode */
> bl enable_64b_mode
>
More information about the Linuxppc-dev
mailing list