[PATCH 1/3] powerpc/ftrace: Reserve instructions from function entry for ftrace

Masami Hiramatsu mhiramat at kernel.org
Mon Feb 21 11:10:02 AEDT 2022


Hi Naveen,

On Thu, 17 Feb 2022 17:06:23 +0530
"Naveen N. Rao" <naveen.n.rao at linux.vnet.ibm.com> wrote:

> On some architectures, enabling function tracing results in multiple
> instructions being emitted at function entry. As an example, on
> powerpc64 with -mprofile-kernel, two instructions are emitted at
> function entry:
> 	mflr	r0
> 	bl	_mcount
> 
> It is desirable to nop out both these instructions when ftrace is not
> active. For that purpose, it is essential to mark both these
> instructions as belonging to ftrace so that other kernel subsystems
> (such as kprobes) do not modify these instructions.

Indeed, kprobes must handle this. However, to keep consistency of kprobes
usage with/without CONFIG_FUNCTION_TRACER, I think KPROBES_ON_FTRACE should
handle these instructions are virutal single instruction.
More specifically, it should allow user to put a kprobe on 'mflr r0' address
and the kprobes on 'bl _mcount' should return -EILSEQ. (because it is not an 
instruction boundary.) And the kprobe's ftrace handler temporarily modifies
the instruction pointer to the address of 'mflr'.

Thank you,

> 
> Add support for this by allowing architectures to override
> ftrace_cmp_recs() and to match against address ranges over and above a
> single MCOUNT_INSN_SIZE.
> 
> For powerpc32, we mark the two instructions preceding the call to
> _mcount() as belonging to ftrace.
> 
> For powerpc64, an additional aspect to consider is that functions can
> have a global entry point for setting up the TOC when invoked from other
> modules. If present, global entry point always involves two instructions
> (addis/lis and addi). To handle this, we provide a custom
> ftrace_init_nop() for powerpc64 where we identify functions having a
> global entry point and record this information in the LSB of
> dyn_ftrace->arch.mod. This information is used in ftrace_cmp_recs() to
> reserve instructions from the global entry point.
> 
> Suggested-by: Steven Rostedt <rostedt at goodmis.org>
> Signed-off-by: Naveen N. Rao <naveen.n.rao at linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/ftrace.h  |  15 ++++
>  arch/powerpc/kernel/trace/ftrace.c | 110 ++++++++++++++++++++++++++---
>  kernel/trace/ftrace.c              |   2 +
>  3 files changed, 117 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
> index debe8c4f706260..8eb3235831633d 100644
> --- a/arch/powerpc/include/asm/ftrace.h
> +++ b/arch/powerpc/include/asm/ftrace.h
> @@ -59,6 +59,21 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
>  struct dyn_arch_ftrace {
>  	struct module *mod;
>  };
> +
> +struct dyn_ftrace;
> +struct module *ftrace_mod_addr_get(struct dyn_ftrace *rec);
> +void ftrace_mod_addr_set(struct dyn_ftrace *rec, struct module *mod);
> +
> +#ifdef CONFIG_MPROFILE_KERNEL
> +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
> +#define ftrace_init_nop ftrace_init_nop
> +#endif
> +
> +#if defined(CONFIG_MPROFILE_KERNEL) || defined(CONFIG_PPC32)
> +int ftrace_cmp_recs(const void *a, const void *b);
> +#define ftrace_cmp_recs ftrace_cmp_recs
> +#endif
> +
>  #endif /* __ASSEMBLY__ */
>  
>  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
> index 80b6285769f27c..11ce9296ce3cf2 100644
> --- a/arch/powerpc/kernel/trace/ftrace.c
> +++ b/arch/powerpc/kernel/trace/ftrace.c
> @@ -428,21 +428,21 @@ int ftrace_make_nop(struct module *mod,
>  	 * We should either already have a pointer to the module
>  	 * or it has been passed in.
>  	 */
> -	if (!rec->arch.mod) {
> +	if (!ftrace_mod_addr_get(rec)) {
>  		if (!mod) {
>  			pr_err("No module loaded addr=%lx\n", addr);
>  			return -EFAULT;
>  		}
> -		rec->arch.mod = mod;
> +		ftrace_mod_addr_set(rec, mod);
>  	} else if (mod) {
> -		if (mod != rec->arch.mod) {
> +		if (mod != ftrace_mod_addr_get(rec)) {
>  			pr_err("Record mod %p not equal to passed in mod %p\n",
> -			       rec->arch.mod, mod);
> +			       ftrace_mod_addr_get(rec), mod);
>  			return -EINVAL;
>  		}
>  		/* nothing to do if mod == rec->arch.mod */
>  	} else
> -		mod = rec->arch.mod;
> +		mod = ftrace_mod_addr_get(rec);
>  
>  	return __ftrace_make_nop(mod, rec, addr);
>  #else
> @@ -451,6 +451,96 @@ int ftrace_make_nop(struct module *mod,
>  #endif /* CONFIG_MODULES */
>  }
>  
> +#define FUNC_MCOUNT_OFFSET_PPC32	8
> +#define FUNC_MCOUNT_OFFSET_PPC64_LEP	4
> +#define FUNC_MCOUNT_OFFSET_PPC64_GEP	12
> +
> +#ifdef CONFIG_MPROFILE_KERNEL
> +struct module *ftrace_mod_addr_get(struct dyn_ftrace *rec)
> +{
> +	return (struct module *)((unsigned long)rec->arch.mod & ~0x1);
> +}
> +
> +void ftrace_mod_addr_set(struct dyn_ftrace *rec, struct module *mod)
> +{
> +	rec->arch.mod = (struct module *)(((unsigned long)rec->arch.mod & 0x1) | (unsigned long)mod);
> +}
> +
> +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
> +{
> +	unsigned long offset, ip = rec->ip;
> +	ppc_inst_t op1, op2;
> +	int ret;
> +
> +	if (!kallsyms_lookup_size_offset(rec->ip, NULL, &offset) ||
> +	    (offset != FUNC_MCOUNT_OFFSET_PPC64_GEP && offset != FUNC_MCOUNT_OFFSET_PPC64_LEP)) {
> +		ip -= FUNC_MCOUNT_OFFSET_PPC64_GEP;
> +		ret = copy_inst_from_kernel_nofault(&op1, (void *)ip);
> +		ret |= copy_inst_from_kernel_nofault(&op2, (void *)(ip + MCOUNT_INSN_SIZE));
> +		if (!ret &&
> +		    ((ppc_inst_val(op1) & 0xffff0000) == PPC_RAW_LIS(_R2, 0) ||
> +		     (ppc_inst_val(op1) & 0xffff0000) == PPC_RAW_ADDIS(_R2, _R12, 0)) &&
> +		    (ppc_inst_val(op2) & 0xffff0000) == PPC_RAW_ADDI(_R2, _R2, 0))
> +			ftrace_mod_addr_set(rec, (struct module *)1);
> +	} else if (offset == FUNC_MCOUNT_OFFSET_PPC64_GEP) {
> +		ftrace_mod_addr_set(rec, (struct module *)1);
> +	}
> +
> +	return ftrace_make_nop(mod, rec, MCOUNT_ADDR);
> +}
> +#else
> +struct module *ftrace_mod_addr_get(struct dyn_ftrace *rec)
> +{
> +	return rec->arch.mod;
> +}
> +
> +void ftrace_mod_addr_set(struct dyn_ftrace *rec, struct module *mod)
> +{
> +	rec->arch.mod = mod;
> +}
> +#endif /* CONFIG_MPROFILE_KERNEL */
> +
> +#if defined(CONFIG_MPROFILE_KERNEL) || defined(CONFIG_PPC32)
> +int ftrace_location_get_offset(const struct dyn_ftrace *rec)
> +{
> +	if (IS_ENABLED(CONFIG_MPROFILE_KERNEL))
> +		/*
> +		 * On ppc64le with -mprofile-kernel, function entry can have:
> +		 *   addis r2, r12, M
> +		 *   addi  r2, r2, N
> +		 *   mflr  r0
> +		 *   bl    _mcount
> +		 *
> +		 * The first two instructions are for TOC setup and represent the global entry
> +		 * point for cross-module calls, and may be missing if the function is never called
> +		 * from other modules.
> +		 */
> +		return ((unsigned long)rec->arch.mod & 0x1) ? FUNC_MCOUNT_OFFSET_PPC64_GEP :
> +							      FUNC_MCOUNT_OFFSET_PPC64_LEP;
> +	else
> +		/*
> +		 * On ppc32, function entry always has:
> +		 *   mflr r0
> +		 *   stw  r0, 4(r1)
> +		 *   bl   _mcount
> +		 */
> +		return FUNC_MCOUNT_OFFSET_PPC32;
> +}
> +
> +int ftrace_cmp_recs(const void *a, const void *b)
> +{
> +	const struct dyn_ftrace *key = a;
> +	const struct dyn_ftrace *rec = b;
> +	int offset = ftrace_location_get_offset(rec);
> +
> +	if (key->flags < rec->ip - offset)
> +		return -1;
> +	if (key->ip >= rec->ip + MCOUNT_INSN_SIZE)
> +		return 1;
> +	return 0;
> +}
> +#endif
> +
>  #ifdef CONFIG_MODULES
>  #ifdef CONFIG_PPC64
>  /*
> @@ -494,7 +584,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
>  	ppc_inst_t instr;
>  	void *ip = (void *)rec->ip;
>  	unsigned long entry, ptr, tramp;
> -	struct module *mod = rec->arch.mod;
> +	struct module *mod = ftrace_mod_addr_get(rec);
>  
>  	/* read where this goes */
>  	if (copy_inst_from_kernel_nofault(op, ip))
> @@ -561,7 +651,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
>  	int err;
>  	ppc_inst_t op;
>  	u32 *ip = (u32 *)rec->ip;
> -	struct module *mod = rec->arch.mod;
> +	struct module *mod = ftrace_mod_addr_get(rec);
>  	unsigned long tramp;
>  
>  	/* read where this goes */
> @@ -678,7 +768,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
>  	 * Being that we are converting from nop, it had better
>  	 * already have a module defined.
>  	 */
> -	if (!rec->arch.mod) {
> +	if (!ftrace_mod_addr_get(rec)) {
>  		pr_err("No module loaded\n");
>  		return -EINVAL;
>  	}
> @@ -699,7 +789,7 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
>  	ppc_inst_t op;
>  	unsigned long ip = rec->ip;
>  	unsigned long entry, ptr, tramp;
> -	struct module *mod = rec->arch.mod;
> +	struct module *mod = ftrace_mod_addr_get(rec);
>  
>  	/* If we never set up ftrace trampolines, then bail */
>  	if (!mod->arch.tramp || !mod->arch.tramp_regs) {
> @@ -814,7 +904,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
>  	/*
>  	 * Out of range jumps are called from modules.
>  	 */
> -	if (!rec->arch.mod) {
> +	if (!ftrace_mod_addr_get(rec)) {
>  		pr_err("No module loaded\n");
>  		return -EINVAL;
>  	}
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index f9feb197b2daaf..68f20cf34b0c47 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -1510,6 +1510,7 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
>  	}
>  
>  
> +#ifndef ftrace_cmp_recs
>  static int ftrace_cmp_recs(const void *a, const void *b)
>  {
>  	const struct dyn_ftrace *key = a;
> @@ -1521,6 +1522,7 @@ static int ftrace_cmp_recs(const void *a, const void *b)
>  		return 1;
>  	return 0;
>  }
> +#endif
>  
>  static struct dyn_ftrace *lookup_rec(unsigned long start, unsigned long end)
>  {
> -- 
> 2.35.1
> 


-- 
Masami Hiramatsu <mhiramat at kernel.org>


More information about the Linuxppc-dev mailing list