[PATCH v2 4/4] powerpc/bpf: use bpf_jit_binary_pack_[alloc|finalize|free]

Hari Bathini hbathini at linux.ibm.com
Sat Aug 26 01:40:47 AEST 2023



On 11/03/23 4:05 am, Song Liu wrote:
> On Thu, Mar 9, 2023 at 10:03 AM Hari Bathini <hbathini at linux.ibm.com> wrote:
>>
>> Use bpf_jit_binary_pack_alloc in powerpc jit. The jit engine first
>> writes the program to the rw buffer. When the jit is done, the program
>> is copied to the final location with bpf_jit_binary_pack_finalize.
>> With multiple jit_subprogs, bpf_jit_free is called on some subprograms
>> that haven't got bpf_jit_binary_pack_finalize() yet. Implement custom
>> bpf_jit_free() like in commit 1d5f82d9dd47 ("bpf, x86: fix freeing of
>> not-finalized bpf_prog_pack") to call bpf_jit_binary_pack_finalize(),
>> if necessary. While here, correct the misnomer powerpc64_jit_data to
>> powerpc_jit_data as it is meant for both ppc32 and ppc64.
>>
>> Signed-off-by: Hari Bathini <hbathini at linux.ibm.com>
>> ---
>>   arch/powerpc/net/bpf_jit.h        |   7 +-
>>   arch/powerpc/net/bpf_jit_comp.c   | 104 +++++++++++++++++++++---------
>>   arch/powerpc/net/bpf_jit_comp32.c |   4 +-
>>   arch/powerpc/net/bpf_jit_comp64.c |   6 +-
>>   4 files changed, 83 insertions(+), 38 deletions(-)
>>
>> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
>> index d767e39d5645..a8b7480c4d43 100644
>> --- a/arch/powerpc/net/bpf_jit.h
>> +++ b/arch/powerpc/net/bpf_jit.h
>> @@ -168,15 +168,16 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
>>
>>   void bpf_jit_init_reg_mapping(struct codegen_context *ctx);
>>   int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
>> -int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
>> +int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx,
>>                         u32 *addrs, int pass, bool extra_pass);
>>   void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
>>   void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
>>   void bpf_jit_realloc_regs(struct codegen_context *ctx);
>>   int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr);
>>
>> -int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
>> -                         int insn_idx, int jmp_off, int dst_reg);
>> +int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
>> +                         struct codegen_context *ctx, int insn_idx,
>> +                         int jmp_off, int dst_reg);
>>
>>   #endif
>>
>> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
>> index d1794d9f0154..ece75c829499 100644
>> --- a/arch/powerpc/net/bpf_jit_comp.c
>> +++ b/arch/powerpc/net/bpf_jit_comp.c
>> @@ -42,10 +42,11 @@ int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg,
>>          return 0;
>>   }
>>
>> -struct powerpc64_jit_data {
>> -       struct bpf_binary_header *header;
>> +struct powerpc_jit_data {
>> +       struct bpf_binary_header *hdr;
>> +       struct bpf_binary_header *fhdr;
>>          u32 *addrs;
>> -       u8 *image;
>> +       u8 *fimage;
>>          u32 proglen;
>>          struct codegen_context ctx;
>>   };
> 
> Some comments about the f- prefix will be helpful. (Yes, I should have done
> better job adding comments for the x86 counterpart..)
> 
>> @@ -62,15 +63,18 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>          u8 *image = NULL;
>>          u32 *code_base;
>>          u32 *addrs;
>> -       struct powerpc64_jit_data *jit_data;
>> +       struct powerpc_jit_data *jit_data;
>>          struct codegen_context cgctx;
>>          int pass;
>>          int flen;
>> -       struct bpf_binary_header *bpf_hdr;
>> +       struct bpf_binary_header *fhdr = NULL;
>> +       struct bpf_binary_header *hdr = NULL;
>>          struct bpf_prog *org_fp = fp;
>>          struct bpf_prog *tmp_fp;
>>          bool bpf_blinded = false;
>>          bool extra_pass = false;
>> +       u8 *fimage = NULL;
>> +       u32 *fcode_base;
>>          u32 extable_len;
>>          u32 fixup_len;
>>
>> @@ -100,9 +104,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>          addrs = jit_data->addrs;
>>          if (addrs) {
>>                  cgctx = jit_data->ctx;
>> -               image = jit_data->image;
>> -               bpf_hdr = jit_data->header;
>> +               fimage = jit_data->fimage;
>> +               fhdr = jit_data->fhdr;
>>                  proglen = jit_data->proglen;
>> +               hdr = jit_data->hdr;
>> +               image = (void *)hdr + ((void *)fimage - (void *)fhdr);
>>                  extra_pass = true;
>>                  goto skip_init_ctx;
>>          }
>> @@ -120,7 +126,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>          cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
>>
>>          /* Scouting faux-generate pass 0 */
>> -       if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) {
>> +       if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
>>                  /* We hit something illegal or unsupported. */
>>                  fp = org_fp;
>>                  goto out_addrs;
>> @@ -135,7 +141,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>           */
>>          if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) {
>>                  cgctx.idx = 0;
>> -               if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) {
>> +               if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
>>                          fp = org_fp;
>>                          goto out_addrs;
>>                  }
>> @@ -157,17 +163,19 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>          proglen = cgctx.idx * 4;
>>          alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len;
>>
>> -       bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns);
>> -       if (!bpf_hdr) {
>> +       fhdr = bpf_jit_binary_pack_alloc(alloclen, &fimage, 4, &hdr, &image,
>> +                                             bpf_jit_fill_ill_insns);
>> +       if (!fhdr) {
>>                  fp = org_fp;
>>                  goto out_addrs;
>>          }
>>
>>          if (extable_len)
>> -               fp->aux->extable = (void *)image + FUNCTION_DESCR_SIZE + proglen + fixup_len;
>> +               fp->aux->extable = (void *)fimage + FUNCTION_DESCR_SIZE + proglen + fixup_len;
>>
>>   skip_init_ctx:
>>          code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
>> +       fcode_base = (u32 *)(fimage + FUNCTION_DESCR_SIZE);
>>
>>          /* Code generation passes 1-2 */
>>          for (pass = 1; pass < 3; pass++) {
>> @@ -175,8 +183,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>                  cgctx.idx = 0;
>>                  cgctx.alt_exit_addr = 0;
>>                  bpf_jit_build_prologue(code_base, &cgctx);
>> -               if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass, extra_pass)) {
>> -                       bpf_jit_binary_free(bpf_hdr);
>> +               if (bpf_jit_build_body(fp, code_base, fcode_base, &cgctx, addrs, pass, extra_pass)) {
>> +                       bpf_arch_text_copy(&fhdr->size, &hdr->size, sizeof(hdr->size));
>> +                       bpf_jit_binary_pack_free(fhdr, hdr);
>>                          fp = org_fp;
>>                          goto out_addrs;
>>                  }
>> @@ -192,21 +201,23 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>                   * Note that we output the base address of the code_base
>>                   * rather than image, since opcodes are in code_base.
>>                   */
> Maybe update the comment above with fcode_base to avoid
> confusion.
> 
>> -               bpf_jit_dump(flen, proglen, pass, code_base);
>> +               bpf_jit_dump(flen, proglen, pass, fcode_base);
>>
>>   #ifdef CONFIG_PPC64_ELF_ABI_V1
>>          /* Function descriptor nastiness: Address + TOC */
>> -       ((u64 *)image)[0] = (u64)code_base;
>> +       ((u64 *)image)[0] = (u64)fcode_base;
>>          ((u64 *)image)[1] = local_paca->kernel_toc;
>>   #endif
>>
>> -       fp->bpf_func = (void *)image;
>> +       fp->bpf_func = (void *)fimage;
>>          fp->jited = 1;
>>          fp->jited_len = proglen + FUNCTION_DESCR_SIZE;
>>
>> -       bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + bpf_hdr->size);
>>          if (!fp->is_func || extra_pass) {
>> -               bpf_jit_binary_lock_ro(bpf_hdr);
>> +               if (bpf_jit_binary_pack_finalize(fp, fhdr, hdr)) {
>> +                       fp = org_fp;
>> +                       goto out_addrs;
>> +               }
>>                  bpf_prog_fill_jited_linfo(fp, addrs);
>>   out_addrs:
>>                  kfree(addrs);
>> @@ -216,8 +227,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>                  jit_data->addrs = addrs;
>>                  jit_data->ctx = cgctx;
>>                  jit_data->proglen = proglen;
>> -               jit_data->image = image;
>> -               jit_data->header = bpf_hdr;
>> +               jit_data->fimage = fimage;
>> +               jit_data->fhdr = fhdr;
>> +               jit_data->hdr = hdr;
>>          }
>>
>>   out:
>> @@ -231,12 +243,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>>    * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling
>>    * this function, as this only applies to BPF_PROBE_MEM, for now.
>>    */
>> -int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
>> -                         int insn_idx, int jmp_off, int dst_reg)
>> +int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
>> +                         struct codegen_context *ctx, int insn_idx, int jmp_off,
>> +                         int dst_reg)
>>   {
>>          off_t offset;
>>          unsigned long pc;
>> -       struct exception_table_entry *ex;
>> +       struct exception_table_entry *ex, *ex_entry;
>>          u32 *fixup;
>>
>>          /* Populate extable entries only in the last pass */
>> @@ -247,9 +260,16 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code
>>              WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries))
>>                  return -EINVAL;
>>
>> +       /*
>> +        * Program is firt written to image before copying to the
> s/firt/first/
> 
>> +        * final location (fimage). Accordingly, update in the image first.
>> +        * As all offsets used are relative, copying as is to the
>> +        * final location should be alright.
>> +        */
>>          pc = (unsigned long)&image[insn_idx];
>> +       ex = (void *)fp->aux->extable - (void *)fimage + (void *)image;
>>
>> -       fixup = (void *)fp->aux->extable -
>> +       fixup = (void *)ex -
>>                  (fp->aux->num_exentries * BPF_FIXUP_LEN * 4) +
>>                  (ctx->exentry_idx * BPF_FIXUP_LEN * 4);
>>
>> @@ -260,17 +280,17 @@ int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct code
>>          fixup[BPF_FIXUP_LEN - 1] =
>>                  PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[BPF_FIXUP_LEN - 1]);
>>
>> -       ex = &fp->aux->extable[ctx->exentry_idx];
>> +       ex_entry = &ex[ctx->exentry_idx];
>>
>> -       offset = pc - (long)&ex->insn;
>> +       offset = pc - (long)&ex_entry->insn;
>>          if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
>>                  return -ERANGE;
>> -       ex->insn = offset;
>> +       ex_entry->insn = offset;
>>
>> -       offset = (long)fixup - (long)&ex->fixup;
>> +       offset = (long)fixup - (long)&ex_entry->fixup;
>>          if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
>>                  return -ERANGE;
>> -       ex->fixup = offset;
>> +       ex_entry->fixup = offset;
>>
>>          ctx->exentry_idx++;
>>          return 0;
>> @@ -308,3 +328,27 @@ int bpf_arch_text_invalidate(void *dst, size_t len)
>>
>>          return ret;
>>   }
>> +
>> +void bpf_jit_free(struct bpf_prog *fp)
>> +{
>> +       if (fp->jited) {
>> +               struct powerpc_jit_data *jit_data = fp->aux->jit_data;
>> +               struct bpf_binary_header *hdr;
>> +
>> +               /*
>> +                * If we fail the final pass of JIT (from jit_subprogs),
>> +                * the program may not be finalized yet. Call finalize here
>> +                * before freeing it.
>> +                */
>> +               if (jit_data) {
>> +                       bpf_jit_binary_pack_finalize(fp, jit_data->fhdr, jit_data->hdr);
> 
> I just realized x86 is the same. But I think we only need the following
> here?
> 
> bpf_arch_text_copy(&jit_data->fhdr->size, &jit_data->hdr->size,
> sizeof(jit_data->hdr->size));
> 
> Right?

Thanks for reviewing.
Better off with bpf_jit_binary_pack_finalize, probably?
Kept it that way for v3. Posted v3. Please review.

Thanks
Hari


More information about the Linuxppc-dev mailing list