[PATCH v2 3/6] powerpc64/bpf: Tailcall handling with trampolines

Hari Bathini hbathini at linux.ibm.com
Sat Jan 17 21:39:07 AEDT 2026



On 14/01/26 5:14 pm, adubey at linux.ibm.com wrote:
> From: Abhishek Dubey <adubey at linux.ibm.com>
> 
> The trampoline mechanism sets up its own stack frame and
> an additional dummy frame. We need to have additional JIT
> instructions handling tailcall dereferencing in the
> trampoline's context.
> 
> We don't add the two stack frames pointed above, rather
> add space for tail_call_info at bottom in trampoline frame
> for ppc64. This makes the trampoline's frame consistent with
> layout of all other frames wrt tail_call_info offset.
> 
> Signed-off-by: Abhishek Dubey <adubey at linux.ibm.com>
> ---
>   arch/powerpc/net/bpf_jit_comp.c | 83 ++++++++++++++++++++++-----------
>   1 file changed, 56 insertions(+), 27 deletions(-)
> 
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index 069a8822c30d..e3088cf089d1 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -606,33 +606,58 @@ static int invoke_bpf_mod_ret(u32 *image, u32 *ro_image, struct codegen_context
>   	return 0;
>   }
>   
> -static void bpf_trampoline_setup_tail_call_cnt(u32 *image, struct codegen_context *ctx,
> -					       int func_frame_offset, int r4_off)
> -{
> -	if (IS_ENABLED(CONFIG_PPC64)) {
> -		/* See bpf_jit_stack_tailcallinfo_offset() */
> -		int tailcallcnt_offset = 7 * 8;
> -
> -		EMIT(PPC_RAW_LL(_R3, _R1, func_frame_offset - tailcallcnt_offset));
> -		EMIT(PPC_RAW_STL(_R3, _R1, -tailcallcnt_offset));
> -	} else {
> -		/* See bpf_jit_stack_offsetof() and BPF_PPC_TC */
> -		EMIT(PPC_RAW_LL(_R4, _R1, r4_off));
> -	}
> -}
> +/*
> + * Refer the label 'Generated stack layout' in this file for actual stack
> + * layout during trampoline invocation.
> + *
> + * Refer __arch_prepare_bpf_trampoline() for stack component details.
> + *
> + * The tailcall count/reference is present in caller's stack frame. Its required
> + * to copy the content of tail_call_info before calling the actual function
> + * to which the trampoline is attached.
> + *
> + */
>   
> -static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct codegen_context *ctx,
> -						 int func_frame_offset, int r4_off)
> +static void bpf_trampoline_setup_tail_call_info(u32 *image, struct codegen_context *ctx,
> +					       int func_frame_offset,
> +					       int bpf_dummy_frame_size, int r4_off)
>   {
>   	if (IS_ENABLED(CONFIG_PPC64)) {
>   		/* See bpf_jit_stack_tailcallinfo_offset() */

> -		int tailcallcnt_offset = 7 * 8;
> +		int tailcallinfo_offset = BPF_PPC_TAILCALL;

This offset update should have been part of patch#1

> +		/*
> +		 * func_frame_offset =                                   ...(1)
> +		 *     bpf_dummy_frame_size + trampoline_frame_size
> +		 */
> +		EMIT(PPC_RAW_LD(_R4, _R1, func_frame_offset));
> +		EMIT(PPC_RAW_LD(_R3, _R4, -tailcallinfo_offset));
> +
> +		/*
> +		 * Setting the tail_call_info in trampoline's frame
> +		 * depending on if previous frame had value or reference.
> +		 */
> +		EMIT(PPC_RAW_CMPLWI(_R3, MAX_TAIL_CALL_CNT));
> +		PPC_COND_BRANCH(COND_GT, CTX_NIA(ctx) + 8);
> +		EMIT(PPC_RAW_ADDI(_R3, _R4, bpf_jit_stack_tailcallinfo_offset(ctx)));
> +		/*
> +		 * From ...(1) above:
> +		 * trampoline_frame_bottom =                            ...(2)
> +		 *     func_frame_offset - bpf_dummy_frame_size
> +		 *
> +		 * Using ...(2) derived above:
> +		 *  trampoline_tail_call_info_offset =                  ...(3)
> +		 *      trampoline_frame_bottom - tailcallinfo_offset
> +		 *
> +		 * From ...(3):
> +		 * Use trampoline_tail_call_info_offset to write reference of main's
> +		 * tail_call_info in trampoline frame.
> +		 */
> +		EMIT(PPC_RAW_STL(_R3, _R1, (func_frame_offset - bpf_dummy_frame_size)
> +					- tailcallinfo_offset));
>   
> -		EMIT(PPC_RAW_LL(_R3, _R1, -tailcallcnt_offset));
> -		EMIT(PPC_RAW_STL(_R3, _R1, func_frame_offset - tailcallcnt_offset));
>   	} else {
>   		/* See bpf_jit_stack_offsetof() and BPF_PPC_TC */
> -		EMIT(PPC_RAW_STL(_R4, _R1, r4_off));
> +		EMIT(PPC_RAW_LL(_R4, _R1, r4_off));
>   	}
>   }
>   
> @@ -720,6 +745,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>   	 * LR save area                 [ r0 save (64-bit)  ]   | header
>   	 *                              [ r0 save (32-bit)  ]   |
>   	 * dummy frame for unwind       [ back chain 1      ] --
> +	 *                              [ tail_call_info    ] non optional - 64-bit powerpc
>   	 *                              [ padding           ] align stack frame
>   	 *       r4_off                 [ r4 (tailcallcnt)  ] optional - 32-bit powerpc
>   	 *       alt_lr_off             [ real lr (ool stub)] optional - actual lr

> @@ -801,8 +827,14 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
>   		}
>   	}
>   
> -	/* Padding to align stack frame, if any */
> -	bpf_frame_size = round_up(bpf_frame_size, SZL * 2);
> +	if (!(bpf_frame_size % (2 * SZL))) {
> +		/* Stack is 16-byte aligned */
> +		/* Room for padding followed by 64-bit tail_call_info */
> +		bpf_frame_size += SZL + BPF_PPC_TAILCALL;
> +	} else {
> +		/* Room for 64-bit tail_call_info */
> +		bpf_frame_size += BPF_PPC_TAILCALL;
> +	}
>   
>   	/* Dummy frame size for proper unwind - includes 64-bytes red zone for 64-bit powerpc */
>   	bpf_dummy_frame_size = STACK_FRAME_MIN_SIZE + 64;

This change assumes the size is at least 8-byte aligned which is
true today but better skip that assumption by not touching the
padding part. The above hunk could simply be:

diff --git a/arch/powerpc/net/bpf_jit_comp.c 
b/arch/powerpc/net/bpf_jit_comp.c
index 5e976730b2f5..266cc6f17dcc 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -795,6 +795,10 @@ static int __arch_prepare_bpf_trampoline(struct 
bpf_tramp_image *im, void *rw_im
                 }
         }

+       /* Save tailcall count pointer at the same offset on the stack 
where subprogs expect it */
+       if ((flags & BPF_TRAMP_F_CALL_ORIG) && (flags & 
BPF_TRAMP_F_TAIL_CALL_CTX))
+               bpf_frame_size += SZL;
+
         /* Padding to align stack frame, if any */
         bpf_frame_size = round_up(bpf_frame_size, SZL * 2);


Patch#2 is not complete without this change. Please fold this patch
into patch#2 itself.

- Hari



More information about the Linuxppc-dev mailing list