[RFC PATCH 2/6] powerpc/64s: Helpers to switch between linear and vmapped stack pointers

Christophe Leroy christophe.leroy at csgroup.eu
Sat Nov 5 19:00:28 AEDT 2022



Le 04/11/2022 à 18:27, Andrew Donnellan a écrit :
> powerpc unfortunately has too many places where we run stuff in real mode.
> 
> With CONFIG_VMAP_STACK enabled, this means we need to be able to swap the
> stack pointer to use the linear mapping when we enter a real mode section,
> and back afterwards.
> 
> Store the top bits of the stack pointer in both the linear map and the
> vmalloc space in the PACA, and add some helper macros/functions to swap
> between them.

That may work when pagesize is 64k because stack is on a single page, 
but I doubt is works with 4k pages, because vmalloc may allocate non 
contiguous pages.

> 
> Signed-off-by: Andrew Donnellan <ajd at linux.ibm.com>
> 
> ---
> 
> Some of the helpers that are currently unused will be used in the next
> version of the series for the KVM real mode handling
> ---
>   arch/powerpc/include/asm/book3s/64/stack.h | 71 ++++++++++++++++++++++
>   arch/powerpc/include/asm/opal.h            |  1 +
>   arch/powerpc/include/asm/paca.h            |  4 ++
>   arch/powerpc/include/asm/processor.h       |  6 ++
>   arch/powerpc/kernel/asm-offsets.c          |  8 +++
>   arch/powerpc/kernel/entry_64.S             |  7 +++
>   arch/powerpc/kernel/process.c              |  4 ++
>   arch/powerpc/kernel/smp.c                  |  7 +++
>   arch/powerpc/xmon/xmon.c                   |  4 ++
>   9 files changed, 112 insertions(+)
>   create mode 100644 arch/powerpc/include/asm/book3s/64/stack.h
> 
> diff --git a/arch/powerpc/include/asm/book3s/64/stack.h b/arch/powerpc/include/asm/book3s/64/stack.h
> new file mode 100644
> index 000000000000..6b31adb1a026
> --- /dev/null
> +++ b/arch/powerpc/include/asm/book3s/64/stack.h
> @@ -0,0 +1,71 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +
> +// Helpers for VMAP_STACK on book3s64
> +// Copyright (C) 2022 IBM Corporation (Andrew Donnellan)
> +
> +#ifndef _ASM_POWERPC_BOOK3S_64_STACK_H
> +#define _ASM_POWERPC_BOOK3S_64_STACK_H
> +
> +#include <asm/thread_info.h>
> +
> +#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S_64)
> +
> +#ifdef __ASSEMBLY__
> +// Switch the current stack pointer in r1 between a linear map address and a
> +// vmalloc address. Used when we need to go in and out of real mode with
> +// CONFIG_VMAP_STACK enabled.
> +//
> +// tmp: scratch register that can be clobbered
> +
> +#define SWAP_STACK_LINEAR(tmp)			\
> +	ld	tmp, PACAKSTACK_LINEAR_BASE(r13);	\
> +	andi.	r1, r1, THREAD_SIZE - 1;		\

Do you assume THREAD_SIZE to never be more than 64k ?

> +	or	r1, r1, tmp;

You can probably do better with rldimi instruction.

> +#define SWAP_STACK_VMALLOC(tmp)			\
> +	ld	tmp, PACAKSTACK_VMALLOC_BASE(r13);	\
> +	andi.	r1, r1, THREAD_SIZE - 1;		\
> +	or	r1, r1, tmp;

Same

> +
> +#else // __ASSEMBLY__
> +
> +#include <asm/paca.h>
> +#include <asm/reg.h>
> +#include <linux/mm.h>
> +
> +#define stack_pa(ptr) (is_vmalloc_addr((ptr)) ? (void *)vmalloc_to_phys((void *)(ptr)) : (void *)ptr)
> +
> +static __always_inline void swap_stack_linear(void)
> +{
> +	current_stack_pointer = get_paca()->kstack_linear_base |	\
> +		(current_stack_pointer & (THREAD_SIZE - 1));

That looks hacky. I think you can't just change current_stack_pointer on 
the fly. You have to provide something similar to call_do_softirq() or 
call_do_irq()

> +}
> +
> +static __always_inline void swap_stack_vmalloc(void)
> +{
> +	current_stack_pointer = get_paca()->kstack_vmalloc_base |	\
> +		(current_stack_pointer & (THREAD_SIZE - 1));

Same

> +}
> +
> +#endif // __ASSEMBLY__
> +
> +#else // CONFIG_VMAP_STACK && CONFIG_PPC_BOOK3S_64
> +
> +#define SWAP_STACK_LINEAR(tmp)
> +#define SWAP_STACK_VMALLOC(tmp)
> +
> +static __always_inline void *stack_pa(void *ptr)
> +{
> +	return ptr;
> +}
> +
> +static __always_inline void swap_stack_linear(void)
> +{
> +}
> +
> +static __always_inline void swap_stack_vmalloc(void)
> +{
> +}
> +
> +#endif // CONFIG_VMAP_STACK && CONFIG_PPC_BOOK3S_64
> +
> +#endif // _ASM_POWERPC_BOOK3S_64_STACK_H
> diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
> index 726125a534de..0360360ad2cf 100644
> --- a/arch/powerpc/include/asm/opal.h
> +++ b/arch/powerpc/include/asm/opal.h
> @@ -13,6 +13,7 @@
>   #ifndef __ASSEMBLY__
>   
>   #include <linux/notifier.h>
> +#include <asm/book3s/64/stack.h>
>   
>   /* We calculate number of sg entries based on PAGE_SIZE */
>   #define SG_ENTRIES_PER_NODE ((PAGE_SIZE - 16) / sizeof(struct opal_sg_entry))
> diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
> index 09f1790d0ae1..51d060036fa1 100644
> --- a/arch/powerpc/include/asm/paca.h
> +++ b/arch/powerpc/include/asm/paca.h
> @@ -163,6 +163,10 @@ struct paca_struct {
>   	 */
>   	struct task_struct *__current;	/* Pointer to current */
>   	u64 kstack;			/* Saved Kernel stack addr */
> +#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S_64)
> +	u64 kstack_vmalloc_base;	/* Base address of stack in the vmalloc mapping */
> +	u64 kstack_linear_base;		/* Base address of stack in the linear mapping */
> +#endif /* CONFIG_VMAP_STACK && CONFIG_PPC_BOOK3S_64 */
>   	u64 saved_r1;			/* r1 save for RTAS calls or PM or EE=0 */
>   	u64 saved_msr;			/* MSR saved here by enter_rtas */
>   #ifdef CONFIG_PPC64
> diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
> index 631802999d59..999078452aa4 100644
> --- a/arch/powerpc/include/asm/processor.h
> +++ b/arch/powerpc/include/asm/processor.h
> @@ -132,6 +132,12 @@ struct debug_reg {
>   
>   struct thread_struct {
>   	unsigned long	ksp;		/* Kernel stack pointer */
> +#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S_64)
> +	// Kernel stack base addresses in vmalloc and linear mappings
> +	// Used for swapping to linear map in real mode code
> +	unsigned long	ksp_vmalloc_base;
> +	unsigned long	ksp_linear_base;
> +#endif /* CONFIG_VMAP_STACK && CONFIG_PPC_BOOK3S_64 */
>   
>   #ifdef CONFIG_PPC64
>   	unsigned long	ksp_vsid;
> diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
> index 4ce2a4aa3985..46ace958d3ce 100644
> --- a/arch/powerpc/kernel/asm-offsets.c
> +++ b/arch/powerpc/kernel/asm-offsets.c
> @@ -99,6 +99,10 @@ int main(void)
>   #endif
>   
>   	OFFSET(KSP, thread_struct, ksp);
> +#ifdef CONFIG_VMAP_STACK
> +	OFFSET(KSP_VMALLOC_BASE, thread_struct, ksp_vmalloc_base);
> +	OFFSET(KSP_LINEAR_BASE, thread_struct, ksp_linear_base);
> +#endif /* CONFIG_VMAP_STACK */
>   	OFFSET(PT_REGS, thread_struct, regs);
>   #ifdef CONFIG_BOOKE
>   	OFFSET(THREAD_NORMSAVES, thread_struct, normsave[0]);
> @@ -181,6 +185,10 @@ int main(void)
>   	OFFSET(PACAPACAINDEX, paca_struct, paca_index);
>   	OFFSET(PACAPROCSTART, paca_struct, cpu_start);
>   	OFFSET(PACAKSAVE, paca_struct, kstack);
> +#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S_64)
> +	OFFSET(PACAKSTACK_VMALLOC_BASE, paca_struct, kstack_vmalloc_base);
> +	OFFSET(PACAKSTACK_LINEAR_BASE, paca_struct, kstack_linear_base);
> +#endif /* CONFIG_VMAP_STACK && CONFIG_PPC_BOOK3S_64 */
>   	OFFSET(PACACURRENT, paca_struct, __current);
>   	DEFINE(PACA_THREAD_INFO, offsetof(struct paca_struct, __current) +
>   				 offsetof(struct task_struct, thread_info));
> diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> index af25db6e0205..cd9e56b25934 100644
> --- a/arch/powerpc/kernel/entry_64.S
> +++ b/arch/powerpc/kernel/entry_64.S
> @@ -253,6 +253,13 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
>   	mr	r1,r8		/* start using new stack pointer */
>   	std	r7,PACAKSAVE(r13)
>   
> +#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S_64)
> +	ld	r8,KSP_LINEAR_BASE(r4)
> +	std	r8,PACAKSTACK_LINEAR_BASE(r13)
> +	ld	r8,KSP_VMALLOC_BASE(r4)
> +	std	r8,PACAKSTACK_VMALLOC_BASE(r13)

Do you only have r8 to play with ? Otherwise I'd suggest to perform the 
two ld then the two std. Or maybe that doesn't matter on ppc64.

> +#endif /* CONFIG_VMAP_STACK && CONFIG_PPC_BOOK3S_64 */
> +
>   	ld	r6,_CCR(r1)
>   	mtcrf	0xFF,r6
>   
> diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
> index 67da147fe34d..07917726c629 100644
> --- a/arch/powerpc/kernel/process.c
> +++ b/arch/powerpc/kernel/process.c
> @@ -1782,6 +1782,10 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
>   	kregs = (struct pt_regs *) sp;
>   	sp -= STACK_FRAME_OVERHEAD;
>   	p->thread.ksp = sp;
> +#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S_64)
> +	p->thread.ksp_vmalloc_base = sp & ~(THREAD_SIZE - 1);
> +	p->thread.ksp_linear_base = (u64)__va(vmalloc_to_pfn((void *)sp) << PAGE_SHIFT);

What about:

	page_to_virt(vmalloc_to_page((void *)sp))

But is that really the linear base you want, isn't it the phys address ? 
In that case you can do:

	page_to_phys(vmalloc_to_page((void *)sp))



> +#endif /* CONFIG_VMAP_STACK && CONFIG_PPC_BOOK3S_64 */
>   #ifdef CONFIG_HAVE_HW_BREAKPOINT
>   	for (i = 0; i < nr_wp_slots(); i++)
>   		p->thread.ptrace_bps[i] = NULL;
> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index 0da6e59161cd..466ccab5adb8 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -60,6 +60,7 @@
>   #include <asm/ftrace.h>
>   #include <asm/kup.h>
>   #include <asm/fadump.h>
> +#include <asm/book3s/64/stack.h>

Could we avoid including a book3s/64 header directly ? Could it come via 
a more generic one, maybe pgtable.h ?

As you can see with

	git grep "asm/book3s/64" arch/powerpc/

There are no direct inclusion of book3s64 headers in generic C files.


>   
>   #ifdef DEBUG
>   #include <asm/udbg.h>
> @@ -1250,6 +1251,12 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
>   	paca_ptrs[cpu]->__current = idle;
>   	paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) +
>   				 THREAD_SIZE - STACK_FRAME_OVERHEAD;
> +#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S_64)
> +	paca_ptrs[cpu]->kstack_linear_base = is_vmalloc_addr((void *)paca_ptrs[cpu]->kstack) ?
> +		vmalloc_to_phys((void *)(paca_ptrs[cpu]->kstack)) :
> +		paca_ptrs[cpu]->kstack;
> +	paca_ptrs[cpu]->kstack_vmalloc_base = paca_ptrs[cpu]->kstack & (THREAD_SIZE - 1);
> +#endif // CONFIG_VMAP_STACK && CONFIG_PPC_BOOK3S_64
>   #endif
>   	task_thread_info(idle)->cpu = cpu;
>   	secondary_current = current_set[cpu] = idle;
> diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
> index f51c882bf902..236287c4a231 100644
> --- a/arch/powerpc/xmon/xmon.c
> +++ b/arch/powerpc/xmon/xmon.c
> @@ -2697,6 +2697,10 @@ static void dump_one_paca(int cpu)
>   	DUMP(p, __current, "%-*px");
>   	DUMP(p, kstack, "%#-*llx");
>   	printf(" %-*s = 0x%016llx\n", 25, "kstack_base", p->kstack & ~(THREAD_SIZE - 1));
> +#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_PPC_BOOK3S_64)
> +	DUMP(p, kstack_linear_base, "%#-*llx");
> +	DUMP(p, kstack_vmalloc_base, "%#-*llx");
> +#endif
>   #ifdef CONFIG_STACKPROTECTOR
>   	DUMP(p, canary, "%#-*lx");
>   #endif


More information about the Linuxppc-dev mailing list