[PATCH 1/3] powerpc/ftrace: Reserve instructions from function entry for ftrace
Masami Hiramatsu
mhiramat at kernel.org
Mon Feb 21 11:10:02 AEDT 2022
Hi Naveen,
On Thu, 17 Feb 2022 17:06:23 +0530
"Naveen N. Rao" <naveen.n.rao at linux.vnet.ibm.com> wrote:
> On some architectures, enabling function tracing results in multiple
> instructions being emitted at function entry. As an example, on
> powerpc64 with -mprofile-kernel, two instructions are emitted at
> function entry:
> mflr r0
> bl _mcount
>
> It is desirable to nop out both these instructions when ftrace is not
> active. For that purpose, it is essential to mark both these
> instructions as belonging to ftrace so that other kernel subsystems
> (such as kprobes) do not modify these instructions.
Indeed, kprobes must handle this. However, to keep consistency of kprobes
usage with/without CONFIG_FUNCTION_TRACER, I think KPROBES_ON_FTRACE should
handle these instructions are virutal single instruction.
More specifically, it should allow user to put a kprobe on 'mflr r0' address
and the kprobes on 'bl _mcount' should return -EILSEQ. (because it is not an
instruction boundary.) And the kprobe's ftrace handler temporarily modifies
the instruction pointer to the address of 'mflr'.
Thank you,
>
> Add support for this by allowing architectures to override
> ftrace_cmp_recs() and to match against address ranges over and above a
> single MCOUNT_INSN_SIZE.
>
> For powerpc32, we mark the two instructions preceding the call to
> _mcount() as belonging to ftrace.
>
> For powerpc64, an additional aspect to consider is that functions can
> have a global entry point for setting up the TOC when invoked from other
> modules. If present, global entry point always involves two instructions
> (addis/lis and addi). To handle this, we provide a custom
> ftrace_init_nop() for powerpc64 where we identify functions having a
> global entry point and record this information in the LSB of
> dyn_ftrace->arch.mod. This information is used in ftrace_cmp_recs() to
> reserve instructions from the global entry point.
>
> Suggested-by: Steven Rostedt <rostedt at goodmis.org>
> Signed-off-by: Naveen N. Rao <naveen.n.rao at linux.vnet.ibm.com>
> ---
> arch/powerpc/include/asm/ftrace.h | 15 ++++
> arch/powerpc/kernel/trace/ftrace.c | 110 ++++++++++++++++++++++++++---
> kernel/trace/ftrace.c | 2 +
> 3 files changed, 117 insertions(+), 10 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
> index debe8c4f706260..8eb3235831633d 100644
> --- a/arch/powerpc/include/asm/ftrace.h
> +++ b/arch/powerpc/include/asm/ftrace.h
> @@ -59,6 +59,21 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
> struct dyn_arch_ftrace {
> struct module *mod;
> };
> +
> +struct dyn_ftrace;
> +struct module *ftrace_mod_addr_get(struct dyn_ftrace *rec);
> +void ftrace_mod_addr_set(struct dyn_ftrace *rec, struct module *mod);
> +
> +#ifdef CONFIG_MPROFILE_KERNEL
> +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
> +#define ftrace_init_nop ftrace_init_nop
> +#endif
> +
> +#if defined(CONFIG_MPROFILE_KERNEL) || defined(CONFIG_PPC32)
> +int ftrace_cmp_recs(const void *a, const void *b);
> +#define ftrace_cmp_recs ftrace_cmp_recs
> +#endif
> +
> #endif /* __ASSEMBLY__ */
>
> #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
> index 80b6285769f27c..11ce9296ce3cf2 100644
> --- a/arch/powerpc/kernel/trace/ftrace.c
> +++ b/arch/powerpc/kernel/trace/ftrace.c
> @@ -428,21 +428,21 @@ int ftrace_make_nop(struct module *mod,
> * We should either already have a pointer to the module
> * or it has been passed in.
> */
> - if (!rec->arch.mod) {
> + if (!ftrace_mod_addr_get(rec)) {
> if (!mod) {
> pr_err("No module loaded addr=%lx\n", addr);
> return -EFAULT;
> }
> - rec->arch.mod = mod;
> + ftrace_mod_addr_set(rec, mod);
> } else if (mod) {
> - if (mod != rec->arch.mod) {
> + if (mod != ftrace_mod_addr_get(rec)) {
> pr_err("Record mod %p not equal to passed in mod %p\n",
> - rec->arch.mod, mod);
> + ftrace_mod_addr_get(rec), mod);
> return -EINVAL;
> }
> /* nothing to do if mod == rec->arch.mod */
> } else
> - mod = rec->arch.mod;
> + mod = ftrace_mod_addr_get(rec);
>
> return __ftrace_make_nop(mod, rec, addr);
> #else
> @@ -451,6 +451,96 @@ int ftrace_make_nop(struct module *mod,
> #endif /* CONFIG_MODULES */
> }
>
> +#define FUNC_MCOUNT_OFFSET_PPC32 8
> +#define FUNC_MCOUNT_OFFSET_PPC64_LEP 4
> +#define FUNC_MCOUNT_OFFSET_PPC64_GEP 12
> +
> +#ifdef CONFIG_MPROFILE_KERNEL
> +struct module *ftrace_mod_addr_get(struct dyn_ftrace *rec)
> +{
> + return (struct module *)((unsigned long)rec->arch.mod & ~0x1);
> +}
> +
> +void ftrace_mod_addr_set(struct dyn_ftrace *rec, struct module *mod)
> +{
> + rec->arch.mod = (struct module *)(((unsigned long)rec->arch.mod & 0x1) | (unsigned long)mod);
> +}
> +
> +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
> +{
> + unsigned long offset, ip = rec->ip;
> + ppc_inst_t op1, op2;
> + int ret;
> +
> + if (!kallsyms_lookup_size_offset(rec->ip, NULL, &offset) ||
> + (offset != FUNC_MCOUNT_OFFSET_PPC64_GEP && offset != FUNC_MCOUNT_OFFSET_PPC64_LEP)) {
> + ip -= FUNC_MCOUNT_OFFSET_PPC64_GEP;
> + ret = copy_inst_from_kernel_nofault(&op1, (void *)ip);
> + ret |= copy_inst_from_kernel_nofault(&op2, (void *)(ip + MCOUNT_INSN_SIZE));
> + if (!ret &&
> + ((ppc_inst_val(op1) & 0xffff0000) == PPC_RAW_LIS(_R2, 0) ||
> + (ppc_inst_val(op1) & 0xffff0000) == PPC_RAW_ADDIS(_R2, _R12, 0)) &&
> + (ppc_inst_val(op2) & 0xffff0000) == PPC_RAW_ADDI(_R2, _R2, 0))
> + ftrace_mod_addr_set(rec, (struct module *)1);
> + } else if (offset == FUNC_MCOUNT_OFFSET_PPC64_GEP) {
> + ftrace_mod_addr_set(rec, (struct module *)1);
> + }
> +
> + return ftrace_make_nop(mod, rec, MCOUNT_ADDR);
> +}
> +#else
> +struct module *ftrace_mod_addr_get(struct dyn_ftrace *rec)
> +{
> + return rec->arch.mod;
> +}
> +
> +void ftrace_mod_addr_set(struct dyn_ftrace *rec, struct module *mod)
> +{
> + rec->arch.mod = mod;
> +}
> +#endif /* CONFIG_MPROFILE_KERNEL */
> +
> +#if defined(CONFIG_MPROFILE_KERNEL) || defined(CONFIG_PPC32)
> +int ftrace_location_get_offset(const struct dyn_ftrace *rec)
> +{
> + if (IS_ENABLED(CONFIG_MPROFILE_KERNEL))
> + /*
> + * On ppc64le with -mprofile-kernel, function entry can have:
> + * addis r2, r12, M
> + * addi r2, r2, N
> + * mflr r0
> + * bl _mcount
> + *
> + * The first two instructions are for TOC setup and represent the global entry
> + * point for cross-module calls, and may be missing if the function is never called
> + * from other modules.
> + */
> + return ((unsigned long)rec->arch.mod & 0x1) ? FUNC_MCOUNT_OFFSET_PPC64_GEP :
> + FUNC_MCOUNT_OFFSET_PPC64_LEP;
> + else
> + /*
> + * On ppc32, function entry always has:
> + * mflr r0
> + * stw r0, 4(r1)
> + * bl _mcount
> + */
> + return FUNC_MCOUNT_OFFSET_PPC32;
> +}
> +
> +int ftrace_cmp_recs(const void *a, const void *b)
> +{
> + const struct dyn_ftrace *key = a;
> + const struct dyn_ftrace *rec = b;
> + int offset = ftrace_location_get_offset(rec);
> +
> + if (key->flags < rec->ip - offset)
> + return -1;
> + if (key->ip >= rec->ip + MCOUNT_INSN_SIZE)
> + return 1;
> + return 0;
> +}
> +#endif
> +
> #ifdef CONFIG_MODULES
> #ifdef CONFIG_PPC64
> /*
> @@ -494,7 +584,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
> ppc_inst_t instr;
> void *ip = (void *)rec->ip;
> unsigned long entry, ptr, tramp;
> - struct module *mod = rec->arch.mod;
> + struct module *mod = ftrace_mod_addr_get(rec);
>
> /* read where this goes */
> if (copy_inst_from_kernel_nofault(op, ip))
> @@ -561,7 +651,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
> int err;
> ppc_inst_t op;
> u32 *ip = (u32 *)rec->ip;
> - struct module *mod = rec->arch.mod;
> + struct module *mod = ftrace_mod_addr_get(rec);
> unsigned long tramp;
>
> /* read where this goes */
> @@ -678,7 +768,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
> * Being that we are converting from nop, it had better
> * already have a module defined.
> */
> - if (!rec->arch.mod) {
> + if (!ftrace_mod_addr_get(rec)) {
> pr_err("No module loaded\n");
> return -EINVAL;
> }
> @@ -699,7 +789,7 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
> ppc_inst_t op;
> unsigned long ip = rec->ip;
> unsigned long entry, ptr, tramp;
> - struct module *mod = rec->arch.mod;
> + struct module *mod = ftrace_mod_addr_get(rec);
>
> /* If we never set up ftrace trampolines, then bail */
> if (!mod->arch.tramp || !mod->arch.tramp_regs) {
> @@ -814,7 +904,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
> /*
> * Out of range jumps are called from modules.
> */
> - if (!rec->arch.mod) {
> + if (!ftrace_mod_addr_get(rec)) {
> pr_err("No module loaded\n");
> return -EINVAL;
> }
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index f9feb197b2daaf..68f20cf34b0c47 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -1510,6 +1510,7 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
> }
>
>
> +#ifndef ftrace_cmp_recs
> static int ftrace_cmp_recs(const void *a, const void *b)
> {
> const struct dyn_ftrace *key = a;
> @@ -1521,6 +1522,7 @@ static int ftrace_cmp_recs(const void *a, const void *b)
> return 1;
> return 0;
> }
> +#endif
>
> static struct dyn_ftrace *lookup_rec(unsigned long start, unsigned long end)
> {
> --
> 2.35.1
>
--
Masami Hiramatsu <mhiramat at kernel.org>
More information about the Linuxppc-dev
mailing list