[PATCH v2 4/5] powerpc64/bpf: remove BPF redzone protection in trampoline stack
Venkat Rao Bagalkote
venkat88 at linux.ibm.com
Sat Feb 21 14:43:46 AEDT 2026
On 20/02/26 12:09 pm, Hari Bathini wrote:
> Since bpf2bpf tailcall support is enabled for 64-bit powerpc with
> kernel commit 2ed2d8f6fb38 ("powerpc64/bpf: Support tailcalls with
> subprogs"), 'tailcalls/tailcall_bpf2bpf_hierarchy_fexit' BPF selftest
> is triggering "corrupted stack end detected inside scheduler" with the
> config option CONFIG_SCHED_STACK_END_CHECK enabled. While reviewing
> the stack layout for BPF trampoline, observed that the dummy frame is
> trying to protect the redzone of BPF program. This is because tail
> call info and NVRs save area are in redzone at the time of tailcall
> as the current BPF program stack frame is teared down before the
> tailcall. But saving this redzone in the dummy frame of trampoline
> is unnecessary because of the follow reasons:
>
> 1) Firstly, trampoline can be attached to BPF entry/main program
> or subprog. But prologue part of the BPF entry/main program,
> where the trampoline attachpoint is, is skipped during tailcall.
> So, protecting the redzone does not arise when the trampoline is
> not even triggered in this scenario.
> 2) In case of subprog, the caller's stackframe is already setup
> and the subprog's stackframe is yet to be setup. So, nothing
> on the redzone to be protected.
>
> Also, using dummy frame in BPF trampoline, wastes critically scarce
> kernel stack space, especially in tailcall sequence, for marginal
> benefit in stack unwinding. So, drop setting up the dummy frame.
> Instead, save return address in bpf trampoline frame and use it as
> appropriate. Pruning this unnecessary stack usage mitigates the
> likelihood of stack overflow in scenarios where bpf2bpf tailcalls
> and fexit programs are mixed.
>
> Reported-by: Saket Kumar Bhaskar <skb99 at linux.ibm.com>
> Fixes: 2ed2d8f6fb38 ("powerpc64/bpf: Support tailcalls with subprogs")
> Signed-off-by: Hari Bathini <hbathini at linux.ibm.com>
> ---
> arch/powerpc/net/bpf_jit_comp.c | 89 ++++++++++++---------------------
> 1 file changed, 33 insertions(+), 56 deletions(-)
>
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index 860b118391ed..256f9ee350eb 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -638,15 +638,10 @@ static int invoke_bpf_mod_ret(u32 *image, u32 *ro_image, struct codegen_context
> * for the traced function (BPF subprog/callee) to fetch it.
> */
> static void bpf_trampoline_setup_tail_call_info(u32 *image, struct codegen_context *ctx,
> - int func_frame_offset,
> - int bpf_dummy_frame_size, int r4_off)
> + int bpf_frame_size, int r4_off)
> {
> if (IS_ENABLED(CONFIG_PPC64)) {
> - /*
> - * func_frame_offset = ...(1)
> - * bpf_dummy_frame_size + trampoline_frame_size
> - */
> - EMIT(PPC_RAW_LD(_R4, _R1, func_frame_offset));
> + EMIT(PPC_RAW_LD(_R4, _R1, bpf_frame_size));
> /* Refer to trampoline's Generated stack layout */
> EMIT(PPC_RAW_LD(_R3, _R4, -BPF_PPC_TAILCALL));
>
> @@ -657,21 +652,13 @@ static void bpf_trampoline_setup_tail_call_info(u32 *image, struct codegen_conte
> EMIT(PPC_RAW_CMPLWI(_R3, MAX_TAIL_CALL_CNT));
> PPC_BCC_CONST_SHORT(COND_GT, 8);
> EMIT(PPC_RAW_ADDI(_R3, _R4, -BPF_PPC_TAILCALL));
> +
> /*
> - * From ...(1) above:
> - * trampoline_frame_bottom = ...(2)
> - * func_frame_offset - bpf_dummy_frame_size
> - *
> - * Using ...(2) derived above:
> - * trampoline_tail_call_info_offset = ...(3)
> - * trampoline_frame_bottom - BPF_PPC_TAILCALL
> - *
> - * From ...(3):
> - * Use trampoline_tail_call_info_offset to write reference of main's
> - * tail_call_info in trampoline frame.
> + * Trampoline's tail_call_info is at the same offset, as that of
> + * any bpf program, with reference to previous frame. Update the
> + * address of main's tail_call_info in trampoline frame.
> */
> - EMIT(PPC_RAW_STL(_R3, _R1, (func_frame_offset - bpf_dummy_frame_size)
> - - BPF_PPC_TAILCALL));
> + EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size - BPF_PPC_TAILCALL));
> } else {
> /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */
> EMIT(PPC_RAW_LL(_R4, _R1, r4_off));
> @@ -679,7 +666,7 @@ static void bpf_trampoline_setup_tail_call_info(u32 *image, struct codegen_conte
> }
>
> static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct codegen_context *ctx,
> - int func_frame_offset, int r4_off)
> + int bpf_frame_size, int r4_off)
> {
> if (IS_ENABLED(CONFIG_PPC32)) {
> /*
> @@ -690,12 +677,12 @@ static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct codegen_cont
> }
> }
>
> -static void bpf_trampoline_save_args(u32 *image, struct codegen_context *ctx, int func_frame_offset,
> - int nr_regs, int regs_off)
> +static void bpf_trampoline_save_args(u32 *image, struct codegen_context *ctx,
> + int bpf_frame_size, int nr_regs, int regs_off)
> {
> int param_save_area_offset;
>
> - param_save_area_offset = func_frame_offset; /* the two frames we alloted */
> + param_save_area_offset = bpf_frame_size;
> param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is past frame header */
>
> for (int i = 0; i < nr_regs; i++) {
> @@ -718,11 +705,11 @@ static void bpf_trampoline_restore_args_regs(u32 *image, struct codegen_context
>
> /* Used when we call into the traced function. Replicate parameter save area */
> static void bpf_trampoline_restore_args_stack(u32 *image, struct codegen_context *ctx,
> - int func_frame_offset, int nr_regs, int regs_off)
> + int bpf_frame_size, int nr_regs, int regs_off)
> {
> int param_save_area_offset;
>
> - param_save_area_offset = func_frame_offset; /* the two frames we alloted */
> + param_save_area_offset = bpf_frame_size;
> param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is past frame header */
>
> for (int i = 8; i < nr_regs; i++) {
> @@ -739,10 +726,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
> void *func_addr)
> {
> int regs_off, nregs_off, ip_off, run_ctx_off, retval_off, nvr_off, alt_lr_off, r4_off = 0;
> - int i, ret, nr_regs, bpf_frame_size = 0, bpf_dummy_frame_size = 0, func_frame_offset;
> struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
> struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
> struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
> + int i, ret, nr_regs, retaddr_off, bpf_frame_size = 0;
> struct codegen_context codegen_ctx, *ctx;
> u32 *image = (u32 *)rw_image;
> ppc_inst_t branch_insn;
> @@ -768,16 +755,11 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
> * Generated stack layout:
> *
> * func prev back chain [ back chain ]
> - * [ ]
> - * bpf prog redzone/tailcallcnt [ ... ] 64 bytes (64-bit powerpc)
> - * [ ] --
> - * LR save area [ r0 save (64-bit) ] | header
> - * [ r0 save (32-bit) ] |
> - * dummy frame for unwind [ back chain 1 ] --
> * [ tail_call_info ] optional - 64-bit powerpc
> * [ padding ] align stack frame
> * r4_off [ r4 (tailcallcnt) ] optional - 32-bit powerpc
> * alt_lr_off [ real lr (ool stub)] optional - actual lr
> + * retaddr_off [ return address ]
> * [ r26 ]
> * nvr_off [ r25 ] nvr save area
> * retval_off [ return value ]
> @@ -841,6 +823,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
> nvr_off = bpf_frame_size;
> bpf_frame_size += 2 * SZL;
>
> + /* Save area for return address */
> + retaddr_off = bpf_frame_size;
> + bpf_frame_size += SZL;
> +
> /* Optional save area for actual LR in case of ool ftrace */
> if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) {
> alt_lr_off = bpf_frame_size;
> @@ -867,16 +853,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
> /* Padding to align stack frame, if any */
> bpf_frame_size = round_up(bpf_frame_size, SZL * 2);
>
> - /* Dummy frame size for proper unwind - includes 64-bytes red zone for 64-bit powerpc */
> - bpf_dummy_frame_size = STACK_FRAME_MIN_SIZE + 64;
> -
> - /* Offset to the traced function's stack frame */
> - func_frame_offset = bpf_dummy_frame_size + bpf_frame_size;
> -
> - /* Create dummy frame for unwind, store original return value */
> + /* Store original return value */
> EMIT(PPC_RAW_STL(_R0, _R1, PPC_LR_STKOFF));
> - /* Protect red zone where tail call count goes */
> - EMIT(PPC_RAW_STLU(_R1, _R1, -bpf_dummy_frame_size));
>
> /* Create our stack frame */
> EMIT(PPC_RAW_STLU(_R1, _R1, -bpf_frame_size));
> @@ -891,14 +869,14 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
> if (IS_ENABLED(CONFIG_PPC32) && nr_regs < 2)
> EMIT(PPC_RAW_STL(_R4, _R1, r4_off));
>
> - bpf_trampoline_save_args(image, ctx, func_frame_offset, nr_regs, regs_off);
> + bpf_trampoline_save_args(image, ctx, bpf_frame_size, nr_regs, regs_off);
>
> /* Save our LR/return address */
> EMIT(PPC_RAW_MFLR(_R3));
> if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE))
> EMIT(PPC_RAW_STL(_R3, _R1, alt_lr_off));
> else
> - EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF));
> + EMIT(PPC_RAW_STL(_R3, _R1, retaddr_off));
>
> /*
> * Get IP address of the traced function.
> @@ -920,9 +898,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
> EMIT(PPC_RAW_STL(_R3, _R1, ip_off));
>
> if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) {
> - /* Fake our LR for unwind */
> + /* Fake our LR for BPF_TRAMP_F_CALL_ORIG case */
> EMIT(PPC_RAW_ADDI(_R3, _R3, 4));
> - EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF));
> + EMIT(PPC_RAW_STL(_R3, _R1, retaddr_off));
> }
>
> /* Save function arg count -- see bpf_get_func_arg_cnt() */
> @@ -961,20 +939,19 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
> /* Call the traced function */
> if (flags & BPF_TRAMP_F_CALL_ORIG) {
> /*
> - * The address in LR save area points to the correct point in the original function
> + * retaddr on trampoline stack points to the correct point in the original function
> * with both PPC_FTRACE_OUT_OF_LINE as well as with traditional ftrace instruction
> * sequence
> */
> - EMIT(PPC_RAW_LL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF));
> + EMIT(PPC_RAW_LL(_R3, _R1, retaddr_off));
> EMIT(PPC_RAW_MTCTR(_R3));
>
> /* Replicate tail_call_cnt before calling the original BPF prog */
> if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
> - bpf_trampoline_setup_tail_call_info(image, ctx, func_frame_offset,
> - bpf_dummy_frame_size, r4_off);
> + bpf_trampoline_setup_tail_call_info(image, ctx, bpf_frame_size, r4_off);
>
> /* Restore args */
> - bpf_trampoline_restore_args_stack(image, ctx, func_frame_offset, nr_regs, regs_off);
> + bpf_trampoline_restore_args_stack(image, ctx, bpf_frame_size, nr_regs, regs_off);
>
> /* Restore TOC for 64-bit */
> if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL))
> @@ -988,7 +965,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>
> /* Restore updated tail_call_cnt */
> if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
> - bpf_trampoline_restore_tail_call_cnt(image, ctx, func_frame_offset, r4_off);
> + bpf_trampoline_restore_tail_call_cnt(image, ctx, bpf_frame_size, r4_off);
>
> /* Reserve space to patch branch instruction to skip fexit progs */
> if (ro_image) /* image is NULL for dummy pass */
> @@ -1040,7 +1017,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
> EMIT(PPC_RAW_LD(_R2, _R1, 24));
> if (flags & BPF_TRAMP_F_SKIP_FRAME) {
> /* Skip the traced function and return to parent */
> - EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset));
> + EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_frame_size));
> EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF));
> EMIT(PPC_RAW_MTLR(_R0));
> EMIT(PPC_RAW_BLR());
> @@ -1048,13 +1025,13 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
> if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) {
> EMIT(PPC_RAW_LL(_R0, _R1, alt_lr_off));
> EMIT(PPC_RAW_MTLR(_R0));
> - EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset));
> + EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_frame_size));
> EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF));
> EMIT(PPC_RAW_BLR());
> } else {
> - EMIT(PPC_RAW_LL(_R0, _R1, bpf_frame_size + PPC_LR_STKOFF));
> + EMIT(PPC_RAW_LL(_R0, _R1, retaddr_off));
> EMIT(PPC_RAW_MTCTR(_R0));
> - EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset));
> + EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_frame_size));
> EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF));
> EMIT(PPC_RAW_MTLR(_R0));
> EMIT(PPC_RAW_BCTR());
Ran the test: tailcalls/tailcall_bpf2bpf_hierarchy_fexit in a loop and
issue is not seen. With out this patch, crash is observed.
Tested-by: Venkat Rao Bagalkote <venkat88 at linux.ibm.com>
Regards,
Venkat.
More information about the Linuxppc-dev
mailing list