[RFC PATCH 3/4] powerpc/64: Add support for out-of-line static calls

Christophe Leroy christophe.leroy at csgroup.eu
Thu Sep 1 17:33:28 AEST 2022



Le 01/09/2022 à 07:58, Benjamin Gray a écrit :
> [Vous ne recevez pas souvent de courriers de bgray at linux.ibm.com. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ]
> 
> Implement static call support for 64 bit V2 ABI. This requires
> making sure the TOC is kept correct across kernel-module
> boundaries. As a secondary concern, it tries to use the local
> entry point of a target wherever possible. It does so by
> checking if both tramp & target are kernel code, and falls
> back to detecting the common global entry point patterns
> if modules are involved. Detecting the global entry point is
> also required for setting the local entry point as the trampoline
> target: if we cannot detect the local entry point, then we need to
> convservatively initialise r12 and use the global entry point.
> 
> The implementation is incorrect as-is, because a trampoline cannot
> use the stack. Doing so has the same issue described in 85baa095,
> where parameters passed relative to the stack pointer (large arg count
> or varargs) are broken. However the trampoline must guarantee the
> TOC be restored before the caller uses it again.

And the implementation as is wouldn't have much added value 
performancewise because of the additional save/restore on the stack.

> 
> Static call sites are themselves static, so it is feasible to handle
> this by patching the callsites. However the linker currently
> does not seem to realise that the trampoline treats r2 as volatile
> (even with an alternative trampoline that does not have a separate
> local entry point), and so does not insert the appropriate restoration
> at the call site.
> 
> Signed-off-by: Benjamin Gray <bgray at linux.ibm.com>
> ---
>   arch/powerpc/Kconfig                     | 13 +++-
>   arch/powerpc/include/asm/code-patching.h |  1 +
>   arch/powerpc/include/asm/static_call.h   | 45 ++++++++++++-
>   arch/powerpc/kernel/Makefile             |  3 +-
>   arch/powerpc/kernel/static_call.c        | 80 ++++++++++++++++++++++--
>   5 files changed, 132 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 4c466acdc70d..1d5abbeb2c40 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -248,7 +248,7 @@ config PPC
>          select HAVE_SOFTIRQ_ON_OWN_STACK
>          select HAVE_STACKPROTECTOR              if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
>          select HAVE_STACKPROTECTOR              if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
> -       select HAVE_STATIC_CALL                 if PPC32
> +       select HAVE_STATIC_CALL                 if PPC_ENABLE_STATIC_CALL
>          select HAVE_SYSCALL_TRACEPOINTS
>          select HAVE_VIRT_CPU_ACCOUNTING
>          select HUGETLB_PAGE_SIZE_VARIABLE       if PPC_BOOK3S_64 && HUGETLB_PAGE
> @@ -1023,6 +1023,17 @@ config PPC_RTAS_FILTER
>            Say Y unless you know what you are doing and the filter is causing
>            problems for you.
> 
> +config PPC_ENABLE_STATIC_CALL
> +       bool "Enable static calls"
> +       default y
> +       depends on PPC32 || PPC64_ELF_ABI_V2
> +       help
> +         PowerPC static calls with the ELF V2 ABI are not as straightforward
> +         as checking if the target is in range of a branch or not. They must
> +         also ensure the TOC remains consistent. This leads to complex
> +         performance results, so it's useful to make them configurable to
> +         allow testing with the generic implementation too.
> +

Only as part of your RFC I guess, we shouldn't make it configurable at 
the end.

>   endmenu
> 
>   config ISA_DMA_API
> diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
> index 3de90748bce7..319cb1eef71c 100644
> --- a/arch/powerpc/include/asm/code-patching.h
> +++ b/arch/powerpc/include/asm/code-patching.h
> @@ -126,6 +126,7 @@ int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src);
>   bool is_conditional_branch(ppc_inst_t instr);
> 
>   #define OP_RT_RA_MASK  0xffff0000UL
> +#define OP_SI_MASK     0x0000ffffUL
>   #define LIS_R2         (PPC_RAW_LIS(_R2, 0))
>   #define ADDIS_R2_R12   (PPC_RAW_ADDIS(_R2, _R12, 0))
>   #define ADDI_R2_R2     (PPC_RAW_ADDI(_R2, _R2, 0))
> diff --git a/arch/powerpc/include/asm/static_call.h b/arch/powerpc/include/asm/static_call.h
> index de1018cc522b..d85ff3f88c8e 100644
> --- a/arch/powerpc/include/asm/static_call.h
> +++ b/arch/powerpc/include/asm/static_call.h
> @@ -2,12 +2,49 @@
>   #ifndef _ASM_POWERPC_STATIC_CALL_H
>   #define _ASM_POWERPC_STATIC_CALL_H
> 
> +#if defined(CONFIG_PPC64_ELF_ABI_V2)
> +
> +#define __PPC_SCT(name, inst)                                  \
> +       asm(".pushsection .text, \"ax\"                         \n"     \
> +           ".align 6                                           \n"     \
> +           ".globl " STATIC_CALL_TRAMP_STR(name) "             \n"     \
> +           STATIC_CALL_TRAMP_STR(name) ":                      \n"     \
> +           "   addis   2, 12, (.TOC.-" STATIC_CALL_TRAMP_STR(name) ")@ha \n"   \
> +           "   addi    2, 2, (.TOC.-" STATIC_CALL_TRAMP_STR(name) ")@l   \n"   \
> +           ".localentry " STATIC_CALL_TRAMP_STR(name) ", .-" STATIC_CALL_TRAMP_STR(name) "\n" \
> +           "   " inst "                                        \n"     \
> +           "   mflr    0                                       \n"     \
> +           "   std     0, 16(1)                                \n"     \

Use PPC_LR_STKOFF

> +           "   stdu    1, -32(1)                               \n"     \

Is that correct ? For PPC64 we have PPC_MIN_STKFRM    112

> +           "   std     2, 24(1)                                \n"     \

Use R2_STACK_OFFSET

> +           "   addis   12, 2, 2f at toc@ha                        \n"     \
> +           "   ld      12, 2f at toc@l(12)                        \n"     \
> +           "   mtctr   12                                      \n"     \
> +           "   bctrl                                           \n"     \
> +           "   ld      2, 24(1)                                \n"     \
> +           "   addi    1, 1, 32                                \n"     \
> +           "   ld      0, 16(1)                                \n"     \
> +           "   mtlr    0                                       \n"     \
> +           "   blr                                             \n"     \
> +           "1: li      3, 0                                    \n"     \
> +           "   blr                                             \n"     \
> +           ".balign 8                                          \n"     \
> +           "2: .8byte 0                                        \n"     \
> +           ".type " STATIC_CALL_TRAMP_STR(name) ", @function   \n"     \
> +           ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
> +           ".popsection                                        \n")
> +
> +#define PPC_SCT_RET0           64              /* Offset of label 1 */
> +#define PPC_SCT_DATA           72              /* Offset of label 2 (aligned) */
> +
> +#elif defined(PPC32)
> +
>   #define __PPC_SCT(name, inst)                                  \
>          asm(".pushsection .text, \"ax\"                         \n"     \
>              ".align 5                                           \n"     \
>              ".globl " STATIC_CALL_TRAMP_STR(name) "             \n"     \
>              STATIC_CALL_TRAMP_STR(name) ":                      \n"     \
> -           inst "                                              \n"     \
> +           "   " inst "                                        \n"     \
>              "   lis     12,2f at ha                                \n"     \
>              "   lwz     12,2f at l(12)                             \n"     \
>              "   mtctr   12                                      \n"     \
> @@ -22,8 +59,12 @@
>   #define PPC_SCT_RET0           20              /* Offset of label 1 */
>   #define PPC_SCT_DATA           28              /* Offset of label 2 */
> 
> +#else /* !CONFIG_PPC64_ELF_ABI_V2 && !PPC32 */
> +#error "Unsupported ABI"
> +#endif /* CONFIG_PPC64_ELF_ABI_V2 */
> +
>   #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func)      __PPC_SCT(name, "b " #func)
>   #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name)       __PPC_SCT(name, "blr")
> -#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name)       __PPC_SCT(name, "b .+20")
> +#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name)       __PPC_SCT(name, "b 1f")
> 
>   #endif /* _ASM_POWERPC_STATIC_CALL_H */
> diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
> index 06d2d1f78f71..a30d0d0f5499 100644
> --- a/arch/powerpc/kernel/Makefile
> +++ b/arch/powerpc/kernel/Makefile
> @@ -128,8 +128,9 @@ extra-y                             += vmlinux.lds
> 
>   obj-$(CONFIG_RELOCATABLE)      += reloc_$(BITS).o
> 
> -obj-$(CONFIG_PPC32)            += entry_32.o setup_32.o early_32.o static_call.o
> +obj-$(CONFIG_PPC32)            += entry_32.o setup_32.o early_32.o
>   obj-$(CONFIG_PPC64)            += dma-iommu.o iommu.o
> +obj-$(CONFIG_HAVE_STATIC_CALL) += static_call.o
>   obj-$(CONFIG_KGDB)             += kgdb.o
>   obj-$(CONFIG_BOOTX_TEXT)       += btext.o
>   obj-$(CONFIG_SMP)              += smp.o
> diff --git a/arch/powerpc/kernel/static_call.c b/arch/powerpc/kernel/static_call.c
> index 863a7aa24650..ed3bc361fdb0 100644
> --- a/arch/powerpc/kernel/static_call.c
> +++ b/arch/powerpc/kernel/static_call.c
> @@ -1,33 +1,101 @@
>   // SPDX-License-Identifier: GPL-2.0
> +#include <asm/code-patching.h>
> +#include <linux/export.h>
> +#include <linux/kconfig.h>
> +#include <linux/kernel.h>
>   #include <linux/memory.h>
>   #include <linux/static_call.h>
> +#include <linux/syscalls.h>
> 
> -#include <asm/code-patching.h>
> +#ifdef CONFIG_PPC64_ELF_ABI_V2
> +
> +static void* ppc_function_toc(u32 *func) {
> +       u32 insn1 = *func;
> +       u32 insn2 = *(func+1);
> +       u64 si1 = sign_extend64((insn1 & OP_SI_MASK) << 16, 31);
> +       u64 si2 = sign_extend64(insn2 & OP_SI_MASK, 15);
> +       u64 addr = ((u64) func + si1) + si2;
> +
> +       if ((((insn1 & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
> +            ((insn1 & OP_RT_RA_MASK) == LIS_R2)) &&
> +           ((insn2 & OP_RT_RA_MASK) == ADDI_R2_R2))
> +               return (void*) addr;
> +       else
> +               return NULL;
> +}
> +
> +static bool shares_toc(void *func1, void *func2) {
> +       void* func1_toc;
> +       void* func2_toc;
> +
> +       if (func1 == NULL || func2 == NULL)
> +               return false;
> +
> +       /* Assume the kernel only uses a single TOC */
> +       if (core_kernel_text((unsigned long)func1) &&
> +           core_kernel_text((unsigned long)func2))
> +               return true;
> +
> +       /* Fall back to calculating the TOC from common patterns
> +        * if modules are involved
> +        */
> +       func1_toc = ppc_function_toc(func1);
> +       func2_toc = ppc_function_toc(func2);
> +       return func1_toc != NULL && func2_toc != NULL && (func1_toc == func2_toc);
> +}
> +
> +#endif /* CONFIG_PPC64_ELF_ABI_V2 */
> 
>   void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
>   {
>          int err;
>          bool is_ret0 = (func == __static_call_return0);
>          unsigned long target = (unsigned long)(is_ret0 ? tramp + PPC_SCT_RET0 : func);
> -       bool is_short = is_offset_in_branch_range((long)target - (long)tramp);
> +       bool is_short;
> +       void* tramp_entry;
> 
>          if (!tramp)
>                  return;
> 
> +       tramp_entry = (void*)ppc_function_entry(tramp);
> +
> +#ifdef CONFIG_PPC64_ELF_ABI_V2
> +       if (shares_toc(tramp, (void*)target)) {
> +               /* Confirm that the local entry point is in range */
> +               is_short = is_offset_in_branch_range(
> +                       (long)ppc_function_entry((void*)target) - (long)tramp_entry);
> +       } else {
> +               /* Combine out-of-range with not sharing a TOC. Though it's possible an
> +                * out-of-range target shares a TOC, handling this separately complicates
> +                * the trampoline. It's simpler to always use the global entry point
> +                * in this case.
> +                */
> +               is_short = false;
> +       }
> +#else /* !CONFIG_PPC64_ELF_ABI_V2 */
> +       is_short = is_offset_in_branch_range((long)target - (long)tramp);
> +#endif /* CONFIG_PPC64_ELF_ABI_V2 */
> +
>          mutex_lock(&text_mutex);
> 
>          if (func && !is_short) {
> -               err = patch_instruction(tramp + PPC_SCT_DATA, ppc_inst(target));
> +               /* This assumes that the update is atomic. The current implementation uses
> +                * stw/std and the store location is aligned. A sync is issued by one of the
> +                * patch_instruction/patch_branch functions below.
> +                */
> +               err = PTR_ERR_OR_ZERO(patch_memory(
> +                       tramp + PPC_SCT_DATA, &target, sizeof(target)));
>                  if (err)
>                          goto out;
>          }
> 
>          if (!func)
> -               err = patch_instruction(tramp, ppc_inst(PPC_RAW_BLR()));
> +               err = patch_instruction(tramp_entry, ppc_inst(PPC_RAW_BLR()));
>          else if (is_short)
> -               err = patch_branch(tramp, target, 0);
> +               err = patch_branch(tramp_entry, ppc_function_entry((void*) target), 0);
>          else
> -               err = patch_instruction(tramp, ppc_inst(PPC_RAW_NOP()));
> +               err = patch_instruction(tramp_entry, ppc_inst(PPC_RAW_NOP()));
> +
>   out:
>          mutex_unlock(&text_mutex);
> 
> --
> 2.37.2
> 


More information about the Linuxppc-dev mailing list