[PATCH] powerpc/64s/hash: Fix SLB preload cache vs kthread_use_mm

Tue Jul 13 00:07:46 AEST 2021

Excerpts from Nicholas Piggin's message of July 8, 2021 7:05 pm:
> It's possible for kernel threads to pick up SLB preload entries if
> they are accessing userspace with kthread_use_mm. If the kthread
> later is context switched while using a different mm, when it is
> switched back it could preload SLBs belonging to the previous mm.
> 
> This could lead to data corruption, leaks, SLB multi hits, etc.
> 
> In the absence of a usable hook to clear preloads when unusing an
> mm, fix it by keeping track of the mm that the preloads belong to.
> 
> Adjust the isync() comment to be clear it can't be skipped if we
> had no preloads.

I should note that this patch is wrong, and so I withdraw it. The
supposed bug is not actually a bug, because the SLB preload only
records the ESID/EA to preload, not the VA.

So this cross-mm "leak" can happen, but the worst it will do is
preload some addresses used in the previous mm that are not likely to
be accessed in the new mm.

There is an idea that this is the "correct" thing to be doing as
performance goes, but on the other hand it should be pretty rare to
happen so it may not be worth the extra logic. At least it should be
submitted as a performance thing not bugfix if we did do it.

Thanks,
Nick

> 
> Fixes: 5434ae74629a ("powerpc/64s/hash: Add a SLB preload cache")
> Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
> ---
>  arch/powerpc/include/asm/thread_info.h |  1 +
>  arch/powerpc/mm/book3s64/slb.c         | 36 ++++++++++++++++++--------
>  2 files changed, 26 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
> index b4ec6c7dd72e..c3de13dde2af 100644
> --- a/arch/powerpc/include/asm/thread_info.h
> +++ b/arch/powerpc/include/asm/thread_info.h
> @@ -54,6 +54,7 @@ struct thread_info {
>  #if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC32)
>  	struct cpu_accounting_data accounting;
>  #endif
> +	struct mm_struct *slb_preload_mm;
>  	unsigned char slb_preload_nr;
>  	unsigned char slb_preload_tail;
>  	u32 slb_preload_esid[SLB_PRELOAD_NR];
> diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
> index c91bd85eb90e..4f9dbce0dd84 100644
> --- a/arch/powerpc/mm/book3s64/slb.c
> +++ b/arch/powerpc/mm/book3s64/slb.c
> @@ -294,11 +294,20 @@ static bool preload_hit(struct thread_info *ti, unsigned long esid)
>  	return false;
>  }
>  
> -static bool preload_add(struct thread_info *ti, unsigned long ea)
> +static bool preload_add(struct thread_info *ti, struct mm_struct *mm, unsigned long ea)
>  {
>  	unsigned char idx;
>  	unsigned long esid;
>  
> +	if (unlikely(ti->slb_preload_mm != mm)) {
> +		/*
> +		 * kthread_use_mm or other temporary mm switching can
> +		 * change the mm being used by a particular thread.
> +		 */
> +		ti->slb_preload_nr = 0;
> +		ti->slb_preload_mm = mm;
> +	}
> +
>  	if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
>  		/* EAs are stored >> 28 so 256MB segments don't need clearing */
>  		if (ea & ESID_MASK_1T)
> @@ -362,13 +371,13 @@ void slb_setup_new_exec(void)
>  	 * 0x10000000 so it makes sense to preload this segment.
>  	 */
>  	if (!is_kernel_addr(exec)) {
> -		if (preload_add(ti, exec))
> +		if (preload_add(ti, mm, exec))
>  			slb_allocate_user(mm, exec);
>  	}
>  
>  	/* Libraries and mmaps. */
>  	if (!is_kernel_addr(mm->mmap_base)) {
> -		if (preload_add(ti, mm->mmap_base))
> +		if (preload_add(ti, mm, mm->mmap_base))
>  			slb_allocate_user(mm, mm->mmap_base);
>  	}
>  
> @@ -394,19 +403,19 @@ void preload_new_slb_context(unsigned long start, unsigned long sp)
>  
>  	/* Userspace entry address. */
>  	if (!is_kernel_addr(start)) {
> -		if (preload_add(ti, start))
> +		if (preload_add(ti, mm, start))
>  			slb_allocate_user(mm, start);
>  	}
>  
>  	/* Top of stack, grows down. */
>  	if (!is_kernel_addr(sp)) {
> -		if (preload_add(ti, sp))
> +		if (preload_add(ti, mm, sp))
>  			slb_allocate_user(mm, sp);
>  	}
>  
>  	/* Bottom of heap, grows up. */
>  	if (heap && !is_kernel_addr(heap)) {
> -		if (preload_add(ti, heap))
> +		if (preload_add(ti, mm, heap))
>  			slb_allocate_user(mm, heap);
>  	}
>  
> @@ -502,6 +511,11 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
>  
>  	copy_mm_to_paca(mm);
>  
> +	if (unlikely(ti->slb_preload_mm != mm)) {
> +		ti->slb_preload_nr = 0;
> +		ti->slb_preload_mm = mm;
> +	}
> +
>  	/*
>  	 * We gradually age out SLBs after a number of context switches to
>  	 * reduce reload overhead of unused entries (like we do with FP/VEC
> @@ -513,7 +527,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
>  		unsigned long pc = KSTK_EIP(tsk);
>  
>  		preload_age(ti);
> -		preload_add(ti, pc);
> +		preload_add(ti, mm, pc);
>  	}
>  
>  	for (i = 0; i < ti->slb_preload_nr; i++) {
> @@ -527,9 +541,9 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
>  	}
>  
>  	/*
> -	 * Synchronize slbmte preloads with possible subsequent user memory
> -	 * address accesses by the kernel (user mode won't happen until
> -	 * rfid, which is safe).
> +	 * Synchronize slbias and slbmte preloads with possible subsequent user
> +	 * memory address accesses by the kernel (user mode won't happen until
> +	 * rfid, which is synchronizing).
>  	 */
>  	isync();
>  }
> @@ -863,7 +877,7 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_slb_fault)
>  
>  		err = slb_allocate_user(mm, ea);
>  		if (!err)
> -			preload_add(current_thread_info(), ea);
> +			preload_add(current_thread_info(), mm, ea);
>  
>  		return err;
>  	}
> -- 
> 2.23.0
> 
>