[PATCH 17/17] powerpc/ftrace: Create a dummy stackframe to fix stack unwind

Christophe Leroy christophe.leroy at csgroup.eu
Fri Jun 23 15:40:34 AEST 2023



Le 19/06/2023 à 11:47, Naveen N Rao a écrit :
> With ppc64 -mprofile-kernel and ppc32 -pg, profiling instructions to
> call into ftrace are emitted right at function entry. The instruction
> sequence used is minimal to reduce overhead. Crucially, a stackframe is
> not created for the function being traced. This breaks stack unwinding
> since the function being traced does not have a stackframe for itself.
> As such, it never shows up in the backtrace:
> 
> /sys/kernel/debug/tracing # echo 1 > /proc/sys/kernel/stack_tracer_enabled
> /sys/kernel/debug/tracing # cat stack_trace
>          Depth    Size   Location    (17 entries)
>          -----    ----   --------
>    0)     4144      32   ftrace_call+0x4/0x44
>    1)     4112     432   get_page_from_freelist+0x26c/0x1ad0
>    2)     3680     496   __alloc_pages+0x290/0x1280
>    3)     3184     336   __folio_alloc+0x34/0x90
>    4)     2848     176   vma_alloc_folio+0xd8/0x540
>    5)     2672     272   __handle_mm_fault+0x700/0x1cc0
>    6)     2400     208   handle_mm_fault+0xf0/0x3f0
>    7)     2192      80   ___do_page_fault+0x3e4/0xbe0
>    8)     2112     160   do_page_fault+0x30/0xc0
>    9)     1952     256   data_access_common_virt+0x210/0x220
>   10)     1696     400   0xc00000000f16b100
>   11)     1296     384   load_elf_binary+0x804/0x1b80
>   12)      912     208   bprm_execve+0x2d8/0x7e0
>   13)      704      64   do_execveat_common+0x1d0/0x2f0
>   14)      640     160   sys_execve+0x54/0x70
>   15)      480      64   system_call_exception+0x138/0x350
>   16)      416     416   system_call_common+0x160/0x2c4
> 
> Fix this by having ftrace create a dummy stackframe for the function
> being traced. Since this is only relevant when ftrace is active, we nop
> out the instruction to store LR in the LR save area in the profiling
> instruction sequence on ppc32 (and in ppc64 with older gcc versions).
> Instead, in ftrace, we store LR in the LR save area of the previous
> stackframe, and create a minimal stackframe to represent the function
> being traced. With this, backtraces now capture the function being
> traced:
> 
> /sys/kernel/debug/tracing # cat stack_trace
>          Depth    Size   Location    (17 entries)
>          -----    ----   --------
>    0)     3888      32   _raw_spin_trylock+0x8/0x70
>    1)     3856     576   get_page_from_freelist+0x26c/0x1ad0
>    2)     3280      64   __alloc_pages+0x290/0x1280
>    3)     3216     336   __folio_alloc+0x34/0x90
>    4)     2880     176   vma_alloc_folio+0xd8/0x540
>    5)     2704     416   __handle_mm_fault+0x700/0x1cc0
>    6)     2288      96   handle_mm_fault+0xf0/0x3f0
>    7)     2192      48   ___do_page_fault+0x3e4/0xbe0
>    8)     2144     192   do_page_fault+0x30/0xc0
>    9)     1952     608   data_access_common_virt+0x210/0x220
>   10)     1344      16   0xc0000000334bbb50
>   11)     1328     416   load_elf_binary+0x804/0x1b80
>   12)      912      64   bprm_execve+0x2d8/0x7e0
>   13)      848     176   do_execveat_common+0x1d0/0x2f0
>   14)      672     192   sys_execve+0x54/0x70
>   15)      480      64   system_call_exception+0x138/0x350
>   16)      416     416   system_call_common+0x160/0x2c4
> 
> This results in two additional stores in the ftrace entry code, but
> produces reliable backtraces. Note that this change now aligns with
> other architectures (arm64, s390, x86).
> 
> Signed-off-by: Naveen N Rao <naveen at kernel.org>

Reviewed-by: Christophe Leroy <christophe.leroy at csgroup.eu>

> ---
>   arch/powerpc/kernel/trace/ftrace.c       |  6 ++++--
>   arch/powerpc/kernel/trace/ftrace_entry.S | 11 ++++++++---
>   2 files changed, 12 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
> index 82010629cf887c..2956196c98ffdc 100644
> --- a/arch/powerpc/kernel/trace/ftrace.c
> +++ b/arch/powerpc/kernel/trace/ftrace.c
> @@ -229,13 +229,15 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
>   		/* Expected sequence: 'mflr r0', 'stw r0,4(r1)', 'bl _mcount' */
>   		ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0)));
>   		if (!ret)
> -			ret = ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_STW(_R0, _R1, 4)));
> +			ret = ftrace_modify_code(ip - 4, ppc_inst(PPC_RAW_STW(_R0, _R1, 4)),
> +						 ppc_inst(PPC_RAW_NOP()));
>   	} else if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) {
>   		/* Expected sequence: 'mflr r0', ['std r0,16(r1)'], 'bl _mcount' */
>   		ret = ftrace_read_inst(ip - 4, &old);
>   		if (!ret && !ppc_inst_equal(old, ppc_inst(PPC_RAW_MFLR(_R0)))) {
>   			ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0)));
> -			ret |= ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_STD(_R0, _R1, 16)));
> +			ret |= ftrace_modify_code(ip - 4, ppc_inst(PPC_RAW_STD(_R0, _R1, 16)),
> +						  ppc_inst(PPC_RAW_NOP()));
>   		}
>   	} else {
>   		return -EINVAL;
> diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S
> index bab3ab1368a33f..05e981fb526c2e 100644
> --- a/arch/powerpc/kernel/trace/ftrace_entry.S
> +++ b/arch/powerpc/kernel/trace/ftrace_entry.S
> @@ -33,6 +33,11 @@
>    * and then arrange for the ftrace function to be called.
>    */
>   .macro	ftrace_regs_entry allregs
> +	/* Create a minimal stack frame for representing B */
> +	PPC_STLU	r1, -STACK_FRAME_MIN_SIZE(r1)
> +	/* Save the original return address in A's stack frame */
> +	PPC_STL		r0, LRSAVE+STACK_FRAME_MIN_SIZE(r1)
> +
>   	/* Create our stack frame + pt_regs */
>   	PPC_STLU	r1,-SWITCH_FRAME_SIZE(r1)
>   
> @@ -41,8 +46,6 @@
>   	SAVE_GPRS(3, 10, r1)
>   
>   #ifdef CONFIG_PPC64
> -	/* Save the original return address in A's stack frame */
> -	std	r0, LRSAVE+SWITCH_FRAME_SIZE(r1)
>   	/* Ok to continue? */
>   	lbz	r3, PACA_FTRACE_ENABLED(r13)
>   	cmpdi	r3, 0
> @@ -77,6 +80,8 @@
>   	mflr	r7
>   	/* Save it as pt_regs->nip */
>   	PPC_STL	r7, _NIP(r1)
> +	/* Also save it in B's stackframe header for proper unwind */
> +	PPC_STL	r7, LRSAVE+SWITCH_FRAME_SIZE(r1)
>   	/* Save the read LR in pt_regs->link */
>   	PPC_STL	r0, _LINK(r1)
>   
> @@ -142,7 +147,7 @@
>   #endif
>   
>   	/* Pop our stack frame */
> -	addi r1, r1, SWITCH_FRAME_SIZE
> +	addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE
>   
>   #ifdef CONFIG_LIVEPATCH_64
>           /* Based on the cmpd above, if the NIP was altered handle livepatch */


More information about the Linuxppc-dev mailing list