[RFC PATCH] powerpc/mm/radix: Optimize tlbiel flush
Anton Blanchard
anton at samba.org
Thu Apr 20 18:23:15 AEST 2017
Hi Aneesh,
> For a page walk cache flush, we don't need to loop with set number.
> The set number is ignored with RIC=1 (pwc flush).
>
> For RIC=2 (flush all), inorder to flush implementation dependent
> caches, we can ignore the set number. Hence we do a RIC=2 flush with
> set no: 0, so we do both the tlb flush for set 0 and the
> implementation dependent cache flushes. This is then followed with
> tbl flush for set 1-127
I've applied your two previous radix tlbiel optimisations as my
baseline, and using the simple exec microbenchmark in a7a9dcd882a6 I
see:
HPT: 100%
Radix baseline: 248%
Radix patched: 95%
So this patch fixes the large regression we see with radix, and is even
faster than our HPT number now. Nice work!
Acked-by: Anton Blanchard <anton at samba.org>
Anton
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar at linux.vnet.ibm.com>
> ---
> Note: not yet tested.
>
> arch/powerpc/mm/tlb-radix.c | 28 +++++++++++++++++++++++-----
> 1 file changed, 23 insertions(+), 5 deletions(-)
>
> diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
> index b68b5219cf45..b827aef38b90 100644
> --- a/arch/powerpc/mm/tlb-radix.c
> +++ b/arch/powerpc/mm/tlb-radix.c
> @@ -43,12 +43,30 @@ static inline void __tlbiel_pid(unsigned long
> pid, int set, */
> static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
> {
> - int set;
> + int set = 0;
>
> asm volatile("ptesync": : :"memory");
> - for (set = 0; set < POWER9_TLB_SETS_RADIX ; set++) {
> - __tlbiel_pid(pid, set, ric);
> + if (ric == RIC_FLUSH_ALL) {
> + ric = RIC_FLUSH_TLB;
> + set = 1;
> + /* Use set 0 to flush all */
> + __tlbiel_pid(pid, 0, RIC_FLUSH_ALL);
> }
> +
> + for (; set < POWER9_TLB_SETS_RADIX ; set++)
> + __tlbiel_pid(pid, set, ric);
> +
> + asm volatile("ptesync": : :"memory");
> + asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
> +}
> +
> +static inline void _tlbiel_pwc(unsigned long pid)
> +{
> + asm volatile("ptesync": : :"memory");
> + /*
> + * for PWC flush, we don't look at set number
> + */
> + __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
> asm volatile("ptesync": : :"memory");
> asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
> }
> @@ -140,7 +158,7 @@ void radix__local_flush_tlb_pwc(struct mmu_gather
> *tlb, unsigned long addr)
> pid = mm->context.id;
> if (pid != MMU_NO_CONTEXT)
> - _tlbiel_pid(pid, RIC_FLUSH_PWC);
> + _tlbiel_pwc(pid);
>
> preempt_enable();
> }
> @@ -222,7 +240,7 @@ void radix__flush_tlb_pwc(struct mmu_gather *tlb,
> unsigned long addr) if (lock_tlbie)
> raw_spin_unlock(&native_tlbie_lock);
> } else
> - _tlbiel_pid(pid, RIC_FLUSH_PWC);
> + _tlbiel_pwc(pid);
> no_context:
> preempt_enable();
> }
More information about the Linuxppc-dev
mailing list