[PATCH v2 4/5] powerpc64/bpf: remove BPF redzone protection in trampoline stack

Venkat Rao Bagalkote venkat88 at linux.ibm.com
Sat Feb 21 14:43:46 AEDT 2026


On 20/02/26 12:09 pm, Hari Bathini wrote:
> Since bpf2bpf tailcall support is enabled for 64-bit powerpc with
> kernel commit 2ed2d8f6fb38 ("powerpc64/bpf: Support tailcalls with
> subprogs"), 'tailcalls/tailcall_bpf2bpf_hierarchy_fexit' BPF selftest
> is triggering "corrupted stack end detected inside scheduler" with the
> config option CONFIG_SCHED_STACK_END_CHECK enabled. While reviewing
> the stack layout for BPF trampoline, observed that the dummy frame is
> trying to protect the redzone of BPF program. This is because tail
> call info and NVRs save area are in redzone at the time of tailcall
> as the current BPF program stack frame is teared down before the
> tailcall. But saving this redzone in the dummy frame of trampoline
> is unnecessary because of the follow reasons:
>
>    1) Firstly, trampoline can be attached to BPF entry/main program
>       or subprog. But prologue part of the BPF entry/main program,
>       where the trampoline attachpoint is, is skipped during tailcall.
>       So, protecting the redzone does not arise when the trampoline is
>       not even triggered in this scenario.
>    2) In case of subprog, the caller's stackframe is already setup
>       and the subprog's stackframe is yet to be setup. So, nothing
>       on the redzone to be protected.
>
> Also, using dummy frame in BPF trampoline, wastes critically scarce
> kernel stack space, especially in tailcall sequence, for marginal
> benefit in stack unwinding. So, drop setting up the dummy frame.
> Instead, save return address in bpf trampoline frame and use it as
> appropriate. Pruning this unnecessary stack usage mitigates the
> likelihood of stack overflow in scenarios where bpf2bpf tailcalls
> and fexit programs are mixed.
>
> Reported-by: Saket Kumar Bhaskar <skb99 at linux.ibm.com>
> Fixes: 2ed2d8f6fb38 ("powerpc64/bpf: Support tailcalls with subprogs")
> Signed-off-by: Hari Bathini <hbathini at linux.ibm.com>
> ---
>   arch/powerpc/net/bpf_jit_comp.c | 89 ++++++++++++---------------------
>   1 file changed, 33 insertions(+), 56 deletions(-)
>
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index 860b118391ed..256f9ee350eb 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -638,15 +638,10 @@ static int invoke_bpf_mod_ret(u32 *image, u32 *ro_image, struct codegen_context
>    * for the traced function (BPF subprog/callee) to fetch it.
>    */
>   static void bpf_trampoline_setup_tail_call_info(u32 *image, struct codegen_context *ctx,
> -						int func_frame_offset,
> -						int bpf_dummy_frame_size, int r4_off)
> +						int bpf_frame_size, int r4_off)
>   {
>   	if (IS_ENABLED(CONFIG_PPC64)) {
> -		/*
> -		 * func_frame_offset =                                   ...(1)
> -		 *      bpf_dummy_frame_size + trampoline_frame_size
> -		 */
> -		EMIT(PPC_RAW_LD(_R4, _R1, func_frame_offset));
> +		EMIT(PPC_RAW_LD(_R4, _R1, bpf_frame_size));
>   		/* Refer to trampoline's Generated stack layout */
>   		EMIT(PPC_RAW_LD(_R3, _R4, -BPF_PPC_TAILCALL));
>   
> @@ -657,21 +652,13 @@ static void bpf_trampoline_setup_tail_call_info(u32 *image, struct codegen_conte
>   		EMIT(PPC_RAW_CMPLWI(_R3, MAX_TAIL_CALL_CNT));
>   		PPC_BCC_CONST_SHORT(COND_GT, 8);
>   		EMIT(PPC_RAW_ADDI(_R3, _R4, -BPF_PPC_TAILCALL));
> +
>   		/*
> -		 * From ...(1) above:
> -		 * trampoline_frame_bottom =                            ...(2)
> -		 *      func_frame_offset - bpf_dummy_frame_size
> -		 *
> -		 * Using ...(2) derived above:
> -		 * trampoline_tail_call_info_offset =                  ...(3)
> -		 *      trampoline_frame_bottom - BPF_PPC_TAILCALL
> -		 *
> -		 * From ...(3):
> -		 * Use trampoline_tail_call_info_offset to write reference of main's
> -		 * tail_call_info in trampoline frame.
> +		 * Trampoline's tail_call_info is at the same offset, as that of
> +		 * any bpf program, with reference to previous frame. Update the
> +		 * address of main's tail_call_info in trampoline frame.
>   		 */
> -		EMIT(PPC_RAW_STL(_R3, _R1, (func_frame_offset - bpf_dummy_frame_size)
> -								- BPF_PPC_TAILCALL));
> +		EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size - BPF_PPC_TAILCALL));
>   	} else {
>   		/* See bpf_jit_stack_offsetof() and BPF_PPC_TC */
>   		EMIT(PPC_RAW_LL(_R4, _R1, r4_off));
> @@ -679,7 +666,7 @@ static void bpf_trampoline_setup_tail_call_info(u32 *image, struct codegen_conte
>   }
>   
>   static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct codegen_context *ctx,
> -						 int func_frame_offset, int r4_off)
> +						 int bpf_frame_size, int r4_off)
>   {
>   	if (IS_ENABLED(CONFIG_PPC32)) {
>   		/*
> @@ -690,12 +677,12 @@ static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct codegen_cont
>   	}
>   }
>   
> -static void bpf_trampoline_save_args(u32 *image, struct codegen_context *ctx, int func_frame_offset,
> -				     int nr_regs, int regs_off)
> +static void bpf_trampoline_save_args(u32 *image, struct codegen_context *ctx,
> +				     int bpf_frame_size, int nr_regs, int regs_off)
>   {
>   	int param_save_area_offset;
>   
> -	param_save_area_offset = func_frame_offset; /* the two frames we alloted */
> +	param_save_area_offset = bpf_frame_size;
>   	param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is past frame header */
>   
>   	for (int i = 0; i < nr_regs; i++) {
> @@ -718,11 +705,11 @@ static void bpf_trampoline_restore_args_regs(u32 *image, struct codegen_context
>   
>   /* Used when we call into the traced function. Replicate parameter save area */
>   static void bpf_trampoline_restore_args_stack(u32 *image, struct codegen_context *ctx,
> -					      int func_frame_offset, int nr_regs, int regs_off)
> +					      int bpf_frame_size, int nr_regs, int regs_off)
>   {
>   	int param_save_area_offset;
>   
> -	param_save_area_offset = func_frame_offset; /* the two frames we alloted */
> +	param_save_area_offset = bpf_frame_size;
>   	param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is past frame header */
>   
>   	for (int i = 8; i < nr_regs; i++) {
> @@ -739,10 +726,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>   					 void *func_addr)
>   {
>   	int regs_off, nregs_off, ip_off, run_ctx_off, retval_off, nvr_off, alt_lr_off, r4_off = 0;
> -	int i, ret, nr_regs, bpf_frame_size = 0, bpf_dummy_frame_size = 0, func_frame_offset;
>   	struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
>   	struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
>   	struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
> +	int i, ret, nr_regs, retaddr_off, bpf_frame_size = 0;
>   	struct codegen_context codegen_ctx, *ctx;
>   	u32 *image = (u32 *)rw_image;
>   	ppc_inst_t branch_insn;
> @@ -768,16 +755,11 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>   	 * Generated stack layout:
>   	 *
>   	 * func prev back chain         [ back chain        ]
> -	 *                              [                   ]
> -	 * bpf prog redzone/tailcallcnt [ ...               ] 64 bytes (64-bit powerpc)
> -	 *                              [                   ] --
> -	 * LR save area                 [ r0 save (64-bit)  ]   | header
> -	 *                              [ r0 save (32-bit)  ]   |
> -	 * dummy frame for unwind       [ back chain 1      ] --
>   	 *                              [ tail_call_info    ] optional - 64-bit powerpc
>   	 *                              [ padding           ] align stack frame
>   	 *       r4_off                 [ r4 (tailcallcnt)  ] optional - 32-bit powerpc
>   	 *       alt_lr_off             [ real lr (ool stub)] optional - actual lr
> +	 *       retaddr_off            [ return address    ]
>   	 *                              [ r26               ]
>   	 *       nvr_off                [ r25               ] nvr save area
>   	 *       retval_off             [ return value      ]
> @@ -841,6 +823,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>   	nvr_off = bpf_frame_size;
>   	bpf_frame_size += 2 * SZL;
>   
> +	/* Save area for return address */
> +	retaddr_off = bpf_frame_size;
> +	bpf_frame_size += SZL;
> +
>   	/* Optional save area for actual LR in case of ool ftrace */
>   	if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) {
>   		alt_lr_off = bpf_frame_size;
> @@ -867,16 +853,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>   	/* Padding to align stack frame, if any */
>   	bpf_frame_size = round_up(bpf_frame_size, SZL * 2);
>   
> -	/* Dummy frame size for proper unwind - includes 64-bytes red zone for 64-bit powerpc */
> -	bpf_dummy_frame_size = STACK_FRAME_MIN_SIZE + 64;
> -
> -	/* Offset to the traced function's stack frame */
> -	func_frame_offset = bpf_dummy_frame_size + bpf_frame_size;
> -
> -	/* Create dummy frame for unwind, store original return value */
> +	/*  Store original return value */
>   	EMIT(PPC_RAW_STL(_R0, _R1, PPC_LR_STKOFF));
> -	/* Protect red zone where tail call count goes */
> -	EMIT(PPC_RAW_STLU(_R1, _R1, -bpf_dummy_frame_size));
>   
>   	/* Create our stack frame */
>   	EMIT(PPC_RAW_STLU(_R1, _R1, -bpf_frame_size));
> @@ -891,14 +869,14 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>   	if (IS_ENABLED(CONFIG_PPC32) && nr_regs < 2)
>   		EMIT(PPC_RAW_STL(_R4, _R1, r4_off));
>   
> -	bpf_trampoline_save_args(image, ctx, func_frame_offset, nr_regs, regs_off);
> +	bpf_trampoline_save_args(image, ctx, bpf_frame_size, nr_regs, regs_off);
>   
>   	/* Save our LR/return address */
>   	EMIT(PPC_RAW_MFLR(_R3));
>   	if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE))
>   		EMIT(PPC_RAW_STL(_R3, _R1, alt_lr_off));
>   	else
> -		EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF));
> +		EMIT(PPC_RAW_STL(_R3, _R1, retaddr_off));
>   
>   	/*
>   	 * Get IP address of the traced function.
> @@ -920,9 +898,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>   		EMIT(PPC_RAW_STL(_R3, _R1, ip_off));
>   
>   	if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) {
> -		/* Fake our LR for unwind */
> +		/* Fake our LR for BPF_TRAMP_F_CALL_ORIG case */
>   		EMIT(PPC_RAW_ADDI(_R3, _R3, 4));
> -		EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF));
> +		EMIT(PPC_RAW_STL(_R3, _R1, retaddr_off));
>   	}
>   
>   	/* Save function arg count -- see bpf_get_func_arg_cnt() */
> @@ -961,20 +939,19 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>   	/* Call the traced function */
>   	if (flags & BPF_TRAMP_F_CALL_ORIG) {
>   		/*
> -		 * The address in LR save area points to the correct point in the original function
> +		 * retaddr on trampoline stack points to the correct point in the original function
>   		 * with both PPC_FTRACE_OUT_OF_LINE as well as with traditional ftrace instruction
>   		 * sequence
>   		 */
> -		EMIT(PPC_RAW_LL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF));
> +		EMIT(PPC_RAW_LL(_R3, _R1, retaddr_off));
>   		EMIT(PPC_RAW_MTCTR(_R3));
>   
>   		/* Replicate tail_call_cnt before calling the original BPF prog */
>   		if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
> -			bpf_trampoline_setup_tail_call_info(image, ctx, func_frame_offset,
> -								bpf_dummy_frame_size, r4_off);
> +			bpf_trampoline_setup_tail_call_info(image, ctx, bpf_frame_size, r4_off);
>   
>   		/* Restore args */
> -		bpf_trampoline_restore_args_stack(image, ctx, func_frame_offset, nr_regs, regs_off);
> +		bpf_trampoline_restore_args_stack(image, ctx, bpf_frame_size, nr_regs, regs_off);
>   
>   		/* Restore TOC for 64-bit */
>   		if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL))
> @@ -988,7 +965,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>   
>   		/* Restore updated tail_call_cnt */
>   		if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
> -			bpf_trampoline_restore_tail_call_cnt(image, ctx, func_frame_offset, r4_off);
> +			bpf_trampoline_restore_tail_call_cnt(image, ctx, bpf_frame_size, r4_off);
>   
>   		/* Reserve space to patch branch instruction to skip fexit progs */
>   		if (ro_image) /* image is NULL for dummy pass */
> @@ -1040,7 +1017,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>   		EMIT(PPC_RAW_LD(_R2, _R1, 24));
>   	if (flags & BPF_TRAMP_F_SKIP_FRAME) {
>   		/* Skip the traced function and return to parent */
> -		EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset));
> +		EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_frame_size));
>   		EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF));
>   		EMIT(PPC_RAW_MTLR(_R0));
>   		EMIT(PPC_RAW_BLR());
> @@ -1048,13 +1025,13 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>   		if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) {
>   			EMIT(PPC_RAW_LL(_R0, _R1, alt_lr_off));
>   			EMIT(PPC_RAW_MTLR(_R0));
> -			EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset));
> +			EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_frame_size));
>   			EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF));
>   			EMIT(PPC_RAW_BLR());
>   		} else {
> -			EMIT(PPC_RAW_LL(_R0, _R1, bpf_frame_size + PPC_LR_STKOFF));
> +			EMIT(PPC_RAW_LL(_R0, _R1, retaddr_off));
>   			EMIT(PPC_RAW_MTCTR(_R0));
> -			EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset));
> +			EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_frame_size));
>   			EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF));
>   			EMIT(PPC_RAW_MTLR(_R0));
>   			EMIT(PPC_RAW_BCTR());


Ran the test: tailcalls/tailcall_bpf2bpf_hierarchy_fexit in a loop and 
issue is not seen. With out this patch, crash is observed.


Tested-by: Venkat Rao Bagalkote <venkat88 at linux.ibm.com>


Regards,

Venkat.




More information about the Linuxppc-dev mailing list