[PATCH 2/2] cxl: Enable global TLBIs for cxl contexts
Frederic Barrat
fbarrat at linux.vnet.ibm.com
Tue Aug 29 03:37:16 AEST 2017
Le 28/08/2017 à 14:03, Benjamin Herrenschmidt a écrit :
> On Mon, 2017-08-28 at 10:47 +0200, Frederic Barrat wrote:
>>
>>
>> Signed-off-by: Frederic Barrat <fbarrat at linux.vnet.ibm.com>
>> diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
>> index 309592589e30..6447c0df7ec4 100644
>> --- a/arch/powerpc/include/asm/mmu_context.h
>> +++ b/arch/powerpc/include/asm/mmu_context.h
>> @@ -77,6 +77,41 @@ extern void switch_cop(struct mm_struct *next);
>> extern int use_cop(unsigned long acop, struct mm_struct *mm);
>> extern void drop_cop(unsigned long acop, struct mm_struct *mm);
>>
>> +#ifdef CONFIG_PPC_BOOK3S_64
>> +static inline void inc_mm_active_cpus(struct mm_struct *mm)
>> +{
>> + atomic_inc(&mm->context.active_cpus);
>> +}
>> +
>> +static inline void dec_mm_active_cpus(struct mm_struct *mm)
>> +{
>> + atomic_dec(&mm->context.active_cpus);
>> +}
>> +
>> +static inline void mm_context_add_copro(struct mm_struct *mm)
>> +{
>> + inc_mm_active_cpus(mm);
>> +}
>> +
>> +static inline void mm_context_remove_copro(struct mm_struct *mm)
>> +{
>> + /*
>> + * Need to broadcast a global flush of the full mm before
>> + * decrementing active_cpus count, as the next TLBI may be
>> + * local and the nMMU and/or PSL need to be cleaned up.
>> + * Should be rare enough so that it's acceptable.
>> + */
>> + flush_tlb_mm(mm);
>> + dec_mm_active_cpus(mm);
>> +}
>
> You probably need to kill the pwc too. With my recent optimizations
> flush_tlb_mm won't do that anymore. You need a bigger hammer (I don't
> have the code at hand right now to tell you what exactly :-) Basically
> something that does a RIC_FLUSH_ALL.
Good point, I had missed the change. It looks like I now need to call
radix__flush_all_mm(), which I would have to export outside of
tlb-radix.c first.
Any problem with having a flush_all_mm() to complement a flush_tlb_mm()?
It's tainted with radix, and the 2 would be equivalent on hash, but it
would make things easy.
Fred
>> +#else
>> +static inline void inc_mm_active_cpus(struct mm_struct *mm) { }
>> +static inline void dec_mm_active_cpus(struct mm_struct *mm) { }
>> +static inline void mm_context_add_copro(struct mm_struct *mm) { }
>> +static inline void mm_context_remove_copro(struct mm_struct *mm) { }
>> +#endif
>> +
>> +
>> extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
>> struct task_struct *tsk);
>>
>> diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
>> index 0f613bc63c50..d60a62bf4fc7 100644
>> --- a/arch/powerpc/mm/mmu_context.c
>> +++ b/arch/powerpc/mm/mmu_context.c
>> @@ -34,15 +34,6 @@ static inline void switch_mm_pgdir(struct task_struct *tsk,
>> struct mm_struct *mm) { }
>> #endif
>>
>> -#ifdef CONFIG_PPC_BOOK3S_64
>> -static inline void inc_mm_active_cpus(struct mm_struct *mm)
>> -{
>> - atomic_inc(&mm->context.active_cpus);
>> -}
>> -#else
>> -static inline void inc_mm_active_cpus(struct mm_struct *mm) { }
>> -#endif
>> -
>> void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
>> struct task_struct *tsk)
>> {
>> diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
>> index e0dfd1eadd70..33daf33e0e05 100644
>> --- a/drivers/misc/cxl/api.c
>> +++ b/drivers/misc/cxl/api.c
>> @@ -15,6 +15,7 @@
>> #include <linux/module.h>
>> #include <linux/mount.h>
>> #include <linux/sched/mm.h>
>> +#include <linux/mmu_context.h>
>>
>> #include "cxl.h"
>>
>> @@ -332,8 +333,11 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed,
>> cxl_context_mm_count_get(ctx);
>>
>> /* decrement the use count */
>> - if (ctx->mm)
>> + if (ctx->mm) {
>> mmput(ctx->mm);
>> + /* make TLBIs for this context global */
>> + mm_context_add_copro(ctx->mm);
>> + }
>> }
>>
>> /*
>> @@ -342,13 +346,25 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed,
>> */
>> cxl_ctx_get();
>>
>> + /*
>> + * Barrier is needed to make sure all TLBIs are global before
>> + * we attach and the context starts being used by the adapter.
>> + *
>> + * Needed after mm_context_add_copro() for radix and
>> + * cxl_ctx_get() for hash/p8
>> + */
>> + smp_mb();
>> +
>> if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) {
>> put_pid(ctx->pid);
>> ctx->pid = NULL;
>> cxl_adapter_context_put(ctx->afu->adapter);
>> cxl_ctx_put();
>> - if (task)
>> + if (task) {
>> cxl_context_mm_count_put(ctx);
>> + if (ctx->mm)
>> + mm_context_remove_copro(ctx->mm);
>> + }
>> goto out;
>> }
>>
>> diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
>> index 8c32040b9c09..12a41b2753f0 100644
>> --- a/drivers/misc/cxl/context.c
>> +++ b/drivers/misc/cxl/context.c
>> @@ -18,6 +18,7 @@
>> #include <linux/slab.h>
>> #include <linux/idr.h>
>> #include <linux/sched/mm.h>
>> +#include <linux/mmu_context.h>
>> #include <asm/cputable.h>
>> #include <asm/current.h>
>> #include <asm/copro.h>
>> @@ -267,6 +268,8 @@ int __detach_context(struct cxl_context *ctx)
>>
>> /* Decrease the mm count on the context */
>> cxl_context_mm_count_put(ctx);
>> + if (ctx->mm)
>> + mm_context_remove_copro(ctx->mm);
>> ctx->mm = NULL;
>>
>> return 0;
>> diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
>> index b76a491a485d..411e83cbbd82 100644
>> --- a/drivers/misc/cxl/file.c
>> +++ b/drivers/misc/cxl/file.c
>> @@ -19,6 +19,7 @@
>> #include <linux/mm.h>
>> #include <linux/slab.h>
>> #include <linux/sched/mm.h>
>> +#include <linux/mmu_context.h>
>> #include <asm/cputable.h>
>> #include <asm/current.h>
>> #include <asm/copro.h>
>> @@ -220,9 +221,12 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
>> /* ensure this mm_struct can't be freed */
>> cxl_context_mm_count_get(ctx);
>>
>> - /* decrement the use count */
>> - if (ctx->mm)
>> + if (ctx->mm) {
>> + /* decrement the use count */
>> mmput(ctx->mm);
>> + /* make TLBIs for this context global */
>> + mm_context_add_copro(ctx->mm);
>> + }
>>
>> /*
>> * Increment driver use count. Enables global TLBIs for hash
>> @@ -230,6 +234,15 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
>> */
>> cxl_ctx_get();
>>
>> + /*
>> + * Barrier is needed to make sure all TLBIs are global before
>> + * we attach and the context starts being used by the adapter.
>> + *
>> + * Needed after mm_context_add_copro() for radix and
>> + * cxl_ctx_get() for hash/p8
>> + */
>> + smp_mb();
>> +
>> trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr);
>>
>> if ((rc = cxl_ops->attach_process(ctx, false, work.work_element_descriptor,
>> @@ -240,6 +253,8 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
>> ctx->pid = NULL;
>> cxl_ctx_put();
>> cxl_context_mm_count_put(ctx);
>> + if (ctx->mm)
>> + mm_context_remove_copro(ctx->mm);
>> goto out;
>> }
>>
>
More information about the Linuxppc-dev
mailing list