[PATCH 1/2] powerpc64/bpf: Implement JIT support for private stack
Christophe Leroy (CS GROUP)
chleroy at kernel.org
Mon Feb 23 22:10:44 AEDT 2026
Le 16/02/2026 à 16:22, adubey at linux.ibm.com a écrit :
> From: Abhishek Dubey <adubey at linux.ibm.com>
>
> Provision the private stack as a per-CPU allocation during
> bpf_int_jit_compile(). Align the stack to 16 bytes and place guard
> regions at both ends to detect runtime stack overflow and underflow.
>
> Round the private stack size up to the nearest 16-byte boundary.
> Make each guard region 16 bytes to preserve the required overall
> 16-byte alignment. When private stack is set, skip bpf stack size
> accounting in kernel stack.
>
> There is no stack pointer in powerpc. Stack referencing during JIT
> is done using frame pointer. Frame pointer calculation goes like:
>
> BPF frame pointer = Priv stack allocation start address +
> Overflow guard +
> Actual stack size defined by verifier
>
> Memory layout:
>
> High Addr +--------------------------------------------------+
> | |
> | 16 bytes Underflow guard (0xEB9F12345678eb9fULL) |
> | |
> BPF FP -> +--------------------------------------------------+
> | |
> | Private stack - determined by verifier |
> | 16-bytes aligned |
> | |
> +--------------------------------------------------+
> | |
> Lower Addr | 16 byte Overflow guard (0xEB9F12345678eb9fULL) |
> | |
> Priv stack alloc ->+--------------------------------------------------+
> start
>
> Update BPF_REG_FP to point to the calculated offset within the
> allocated private stack buffer. Now, BPF stack usage reference
> in the allocated private stack.
>
> The patch is rebase over fixes by Hari:
> https://lore.kernel.org/bpf/20260216065639.1750181-1-hbathini@linux.ibm.com/T/#mf02cad9096fa4ad1f05610b1f464da1cddf7445a
>
> Signed-off-by: Abhishek Dubey <adubey at linux.ibm.com>
> ---
> arch/powerpc/net/bpf_jit.h | 5 +++
> arch/powerpc/net/bpf_jit_comp.c | 74 +++++++++++++++++++++++++++++++
> arch/powerpc/net/bpf_jit_comp64.c | 38 +++++++++++++---
> 3 files changed, 112 insertions(+), 5 deletions(-)
>
> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
> index 7354e1d72f79..5a115c54e43a 100644
> --- a/arch/powerpc/net/bpf_jit.h
> +++ b/arch/powerpc/net/bpf_jit.h
> @@ -178,8 +178,13 @@ struct codegen_context {
> bool is_subprog;
> bool exception_boundary;
> bool exception_cb;
> + u64 priv_sp;
Are you sure you want a u64, not a long ?
> };
>
> +/* Memory size & magic-value to detect private stack overflow/underflow */
> +#define PRIV_STACK_GUARD_SZ 16
> +#define PRIV_STACK_GUARD_VAL 0xEB9F12345678eb9fULL
> +
> #define bpf_to_ppc(r) (ctx->b2p[r])
>
> #ifdef CONFIG_PPC32
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index 278e09b57560..7a78e03d482f 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -129,6 +129,39 @@ bool bpf_jit_needs_zext(void)
> return true;
> }
>
> +static void priv_stack_init_guard(void __percpu *priv_stack_ptr, int alloc_size)
> +{
> + int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
> + u64 *stack_ptr;
> +
> + for_each_possible_cpu(cpu) {
> + stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
> + stack_ptr[0] = PRIV_STACK_GUARD_VAL;
> + stack_ptr[1] = PRIV_STACK_GUARD_VAL;
> + stack_ptr[underflow_idx] = PRIV_STACK_GUARD_VAL;
> + stack_ptr[underflow_idx + 1] = PRIV_STACK_GUARD_VAL;
> + }
> +}
> +
> +static void priv_stack_check_guard(void __percpu *priv_stack_ptr, int alloc_size,
> + struct bpf_prog *fp)
> +{
> + int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
> + u64 *stack_ptr;
> +
> + for_each_possible_cpu(cpu) {
> + stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu);
> + if (stack_ptr[0] != PRIV_STACK_GUARD_VAL ||
> + stack_ptr[1] != PRIV_STACK_GUARD_VAL ||
> + stack_ptr[underflow_idx] != PRIV_STACK_GUARD_VAL ||
> + stack_ptr[underflow_idx + 1] != PRIV_STACK_GUARD_VAL) {
> + pr_err("BPF private stack overflow/underflow detected for prog %sx\n",
> + bpf_jit_get_prog_name(fp));
> + break;
> + }
> + }
> +}
> +
> struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
> {
> u32 proglen;
> @@ -140,6 +173,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
> struct codegen_context cgctx;
> int pass;
> int flen;
> + int priv_stack_alloc_size;
> + void __percpu *priv_stack_ptr = NULL;
> struct bpf_binary_header *fhdr = NULL;
> struct bpf_binary_header *hdr = NULL;
> struct bpf_prog *org_fp = fp;
> @@ -173,6 +208,25 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
> fp->aux->jit_data = jit_data;
> }
>
> + if (!priv_stack_ptr && fp->aux->jits_use_priv_stack) {
> + /*
> + * Allocate private stack of size equivalent to
> + * verifier-calculated stack size plus two memory
> + * guard regions to detect private stack overflow
> + * and underflow.
> + */
> + priv_stack_alloc_size = round_up(fp->aux->stack_depth, 16) +
> + 2 * PRIV_STACK_GUARD_SZ;
> + priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_size, 16, GFP_KERNEL);
> + if (!priv_stack_ptr) {
> + fp = org_fp;
> + goto out_priv_stack;
> + }
> +
> + priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_size);
> + fp->aux->priv_stack_ptr = priv_stack_ptr;
> + }
> +
> flen = fp->len;
> addrs = jit_data->addrs;
> if (addrs) {
> @@ -209,6 +263,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
> cgctx.is_subprog = bpf_is_subprog(fp);
> cgctx.exception_boundary = fp->aux->exception_boundary;
> cgctx.exception_cb = fp->aux->exception_cb;
> + cgctx.priv_sp = priv_stack_ptr ? (u64)priv_stack_ptr : 0;
priv_stack_ptr is a pointer, ie a long.
priv_sp is a u64, ie a long long.
This will mismatch on powerpc/32
>
> /* Scouting faux-generate pass 0 */
> if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
> @@ -306,7 +361,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
> }
> bpf_prog_fill_jited_linfo(fp, addrs);
> out_addrs:
> + if (!image && priv_stack_ptr) {
> + free_percpu(priv_stack_ptr);
> + fp->aux->priv_stack_ptr = NULL;
Shouldn't those two lines be swapped ?
> + }
> kfree(addrs);
> +out_priv_stack:
> kfree(jit_data);
> fp->aux->jit_data = NULL;
> } else {
> @@ -419,6 +479,8 @@ void bpf_jit_free(struct bpf_prog *fp)
> if (fp->jited) {
> struct powerpc_jit_data *jit_data = fp->aux->jit_data;
> struct bpf_binary_header *hdr;
> + void __percpu *priv_stack_ptr;
> + int priv_stack_alloc_size;
>
> /*
> * If we fail the final pass of JIT (from jit_subprogs),
> @@ -432,6 +494,13 @@ void bpf_jit_free(struct bpf_prog *fp)
> }
> hdr = bpf_jit_binary_pack_hdr(fp);
> bpf_jit_binary_pack_free(hdr, NULL);
> + priv_stack_ptr = fp->aux->priv_stack_ptr;
> + if (priv_stack_ptr) {
> + priv_stack_alloc_size = round_up(fp->aux->stack_depth, 16) +
> + 2 * PRIV_STACK_GUARD_SZ;
> + priv_stack_check_guard(priv_stack_ptr, priv_stack_alloc_size, fp);
> + free_percpu(priv_stack_ptr);
> + }
> WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
> }
>
> @@ -453,6 +522,11 @@ bool bpf_jit_supports_kfunc_call(void)
> return true;
> }
>
> +bool bpf_jit_supports_private_stack(void)
> +{
> + return IS_ENABLED(CONFIG_PPC64);
> +}
> +
> bool bpf_jit_supports_arena(void)
> {
> return IS_ENABLED(CONFIG_PPC64);
> diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
> index 640b84409687..206ef43b4090 100644
> --- a/arch/powerpc/net/bpf_jit_comp64.c
> +++ b/arch/powerpc/net/bpf_jit_comp64.c
> @@ -183,6 +183,20 @@ void bpf_jit_realloc_regs(struct codegen_context *ctx)
> {
> }
>
> +static void emit_fp_priv_stack(u32 *image, struct codegen_context *ctx, void __percpu *ptr)
> +{
> + /* Load percpu data offset */
> + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), _R13,
> + offsetof(struct paca_struct, data_offset)));
> + PPC_LI64(bpf_to_ppc(BPF_REG_FP), (u64)ptr);
> + /*
> + * Set frame pointer with percpu allocated
> + * buffer for private stack.
> + */
> + EMIT(PPC_RAW_ADD(bpf_to_ppc(BPF_REG_FP),
> + bpf_to_ppc(TMP_REG_1), bpf_to_ppc(BPF_REG_FP)));
> +}
> +
> /*
> * For exception boundary & exception_cb progs:
> * return increased size to accommodate additional NVRs.
> @@ -197,6 +211,12 @@ static int bpf_jit_stack_size(struct codegen_context *ctx)
> void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
> {
> int i;
> + void __percpu *priv_frame_ptr = NULL;
> +
> + if (ctx->priv_sp) {
> + priv_frame_ptr = (void*) ctx->priv_sp + PRIV_STACK_GUARD_SZ +
> + round_up(ctx->stack_size, 16);
> + }
>
> /* Instruction for trampoline attach */
> EMIT(PPC_RAW_NOP());
> @@ -251,7 +271,7 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
> }
>
> EMIT(PPC_RAW_STDU(_R1, _R1,
> - -(bpf_jit_stack_size(ctx) + ctx->stack_size)));
> + -(bpf_jit_stack_size(ctx) + (ctx->priv_sp ? 0 : ctx->stack_size))));
> }
>
> /*
> @@ -307,9 +327,16 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
> * Exception_cb not restricted from using stack area or arena.
> * Setup frame pointer to point to the bpf stack area
> */
> - if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP)))
> - EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1,
> - STACK_FRAME_MIN_SIZE + ctx->stack_size));
> + if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP))) {
> + if (ctx->priv_sp && priv_frame_ptr) {
> + /* Set up private stack pointer */
> + emit_fp_priv_stack(image, ctx, priv_frame_ptr);
> + } else {
> + /* Setup frame pointer to point to the bpf stack area */
> + EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1,
> + STACK_FRAME_MIN_SIZE + ctx->stack_size));
> + }
> + }
>
> if (ctx->arena_vm_start)
> PPC_LI64(bpf_to_ppc(ARENA_VM_START), ctx->arena_vm_start);
> @@ -339,7 +366,8 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
>
> /* Tear down our stack frame */
> if (bpf_has_stack_frame(ctx)) {
> - EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_jit_stack_size(ctx) + ctx->stack_size));
> + EMIT(PPC_RAW_ADDI(_R1, _R1,
> + bpf_jit_stack_size(ctx) + (ctx->priv_sp ? 0 : ctx->stack_size)));
>
> if (ctx->seen & SEEN_FUNC || ctx->exception_cb) {
> EMIT(PPC_RAW_LD(_R0, _R1, PPC_LR_STKOFF));
More information about the Linuxppc-dev
mailing list