[PATCH RFC] powerpc/ftrace: Handle large kernel configs

Naveen N. Rao naveen.n.rao at linux.vnet.ibm.com
Tue Oct 16 05:57:25 AEDT 2018


Currently, we expect to be able to reach ftrace_caller() from all
ftrace-enabled functions through a single relative branch. With large
kernel configs, we see functions farther than 32MB of ftrace_caller()
causing ftrace_init() to bail.

One way to solve this is by adding additional trampolines around .text,
.init.text and any other sections with profiled functions.  However,
such trampolines only help if a section does not exceed 64MB.  With
allyesconfig, .text section alone can grow upwards of 100MB, which will
then require us to insert trampolines in the middle of .text... somehow.

In such configurations, gcc/ld emits two types of trampolines for mcount():
1. A long_branch, which has a single branch to mcount() for functions that
   are one hop away from mcount():
	c0000000019e8544 <00031b56.long_branch._mcount>:
	c0000000019e8544:	4a 69 3f ac 	b       c00000000007c4f0 <._mcount>

2. A plt_branch, for functions that are farther away from mcount():
	c0000000051f33f8 <0008ba04.plt_branch._mcount>:
	c0000000051f33f8:	3d 82 ff a4 	addis   r12,r2,-92
	c0000000051f33fc:	e9 8c 04 20 	ld      r12,1056(r12)
	c0000000051f3400:	7d 89 03 a6 	mtctr   r12
	c0000000051f3404:	4e 80 04 20 	bctr

We can reuse those trampolines for ftrace if we can have those
trampolines go to ftrace_caller() instead. On powerpc, we don't support
!CONFIG_DYNAMIC_FTRACE anymore. As such, we can simply patch mcount() to
branch to ftrace_caller() (or to ftrace_regs_caller() on
-mprofile-kernel) allowing us to use those gcc-generated trampolines for
ftrace.

We note down all the existing gcc-generated trampolines during
ftrace_init() and patch branches to those if ftrace_caller() is not
reachable.

Signed-off-by: Naveen N. Rao <naveen.n.rao at linux.vnet.ibm.com>
---
The one aspect I am not entirely sure about is if the plt_branch is fine 
for -mprofile-kernel as it depends on r2 being properly setup. If it 
isn't, we will have to setup separate trampolines just for 
-mprofile-kernel.

- Naveen


 arch/powerpc/kernel/trace/ftrace.c | 131 ++++++++++++++++++++++++++++-
 1 file changed, 129 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index 4bfbb54dee51..5fcc05866a23 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -30,6 +30,10 @@
 
 
 #ifdef CONFIG_DYNAMIC_FTRACE
+
+#define	NUM_FTRACE_TRAMPS	8
+static unsigned long ftrace_cc_tramps[NUM_FTRACE_TRAMPS];
+
 static unsigned int
 ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
 {
@@ -270,6 +274,52 @@ __ftrace_make_nop(struct module *mod,
 #endif /* PPC64 */
 #endif /* CONFIG_MODULES */
 
+static void add_ftrace_cc_tramp(unsigned long tramp)
+{
+	int i;
+
+	for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+		if (!ftrace_cc_tramps[i]) {
+			ftrace_cc_tramps[i] = tramp;
+			return;
+		} else if (ftrace_cc_tramps[i] == tramp)
+			return;
+
+	WARN(1, "No ftrace cc tramp slots available");
+}
+
+static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long tramp, ip = rec->ip;
+	unsigned int op;
+
+	/* read where this goes */
+	if (probe_kernel_read(&op, (void *)ip, sizeof(int))) {
+		pr_err("Fetching opcode failed.\n");
+		return -EFAULT;
+	}
+
+	/* Make sure that that this is still a 24bit jump */
+	if (!is_bl_op(op)) {
+		pr_err("Not expected bl: opcode is %x\n", op);
+		return -EINVAL;
+	}
+
+	/* lets find where the pointer goes */
+	tramp = find_bl_target(ip, op);
+
+	pr_devel("ip:%lx jumps to %lx", ip, tramp);
+
+	add_ftrace_cc_tramp(tramp);
+
+	if (patch_instruction((unsigned int *)ip, PPC_INST_NOP)) {
+		pr_err("Patching NOP failed.\n");
+		return -EPERM;
+	}
+
+	return 0;
+}
+
 int ftrace_make_nop(struct module *mod,
 		    struct dyn_ftrace *rec, unsigned long addr)
 {
@@ -286,7 +336,8 @@ int ftrace_make_nop(struct module *mod,
 		old = ftrace_call_replace(ip, addr, 1);
 		new = PPC_INST_NOP;
 		return ftrace_modify_code(ip, old, new);
-	}
+	} else if (core_kernel_text(ip))
+		return __ftrace_make_nop_kernel(rec, addr);
 
 #ifdef CONFIG_MODULES
 	/*
@@ -456,6 +507,40 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 #endif /* CONFIG_PPC64 */
 #endif /* CONFIG_MODULES */
 
+static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned int op, i;
+	void *ip = (void *)rec->ip;
+
+	/* read where this goes */
+	if (probe_kernel_read(&op, ip, sizeof(op)))
+		return -EFAULT;
+
+	if (op != PPC_INST_NOP) {
+		pr_err("Unexpected call sequence at %p: %x\n", ip, op);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < NUM_FTRACE_TRAMPS; i++) {
+		if (!ftrace_cc_tramps[i])
+			break;
+
+		if (!create_branch(ip, ftrace_cc_tramps[i], BRANCH_SET_LINK))
+			continue;
+
+		if (patch_branch(ip, ftrace_cc_tramps[i], BRANCH_SET_LINK)) {
+			pr_err("Error patching branch to ftrace tramp!\n");
+			return -EINVAL;
+		}
+
+		return 0;
+	}
+
+	pr_err("No trampolines are reachable from %p\n", ip);
+
+	return -EINVAL;
+}
+
 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 {
 	unsigned long ip = rec->ip;
@@ -471,7 +556,8 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 		old = PPC_INST_NOP;
 		new = ftrace_call_replace(ip, addr, 1);
 		return ftrace_modify_code(ip, old, new);
-	}
+	} else if (core_kernel_text(ip))
+		return __ftrace_make_call_kernel(rec, addr);
 
 #ifdef CONFIG_MODULES
 	/*
@@ -603,6 +689,12 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
 		old = ftrace_call_replace(ip, old_addr, 1);
 		new = ftrace_call_replace(ip, addr, 1);
 		return ftrace_modify_code(ip, old, new);
+	} else if (core_kernel_text(ip)) {
+		/*
+		 * We always patch out of range locations to go to the regs
+		 * variant, so there is nothing to do here
+		 */
+		return 0;
 	}
 
 #ifdef CONFIG_MODULES
@@ -654,8 +746,43 @@ void arch_ftrace_update_code(int command)
 	ftrace_modify_all_code(command);
 }
 
+/*
+ * Patch _mcount() to jump to ftrace_caller/ftrace_regs_caller for catching
+ * ftrace entry from far functions in a large kernel.
+ */
 int __init ftrace_dyn_arch_init(void)
 {
+	unsigned long ip = ppc_global_function_entry((void *)_mcount);
+	unsigned long ftrace_call_entry;
+	unsigned int op;
+
+#ifdef CONFIG_MPROFILE_KERNEL
+	ftrace_call_entry = (unsigned long)ftrace_regs_caller;
+#else
+	ftrace_call_entry = ppc_global_function_entry((void *)ftrace_caller);
+#endif
+
+	if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE)) {
+		pr_err("Fetching instruction at %lx failed.\n", ip);
+		return -EFAULT;
+	}
+
+	/* We expect either a mflr r0 (ppc32), or a mflr r12 (ppc64) */
+	if ((op & 0xfc1fffff) != PPC_INST_MFLR) {
+		pr_err("Unexpected instruction %08x in _mcount()\n", op);
+		return -EINVAL;
+	}
+
+	if (!create_branch((unsigned int *)ip, ftrace_call_entry, 0)) {
+		pr_err("Branch out of range\n");
+		return -EINVAL;
+	}
+
+	if (patch_branch((unsigned int *)ip, ftrace_call_entry, 0)) {
+		pr_err("REL24 out of range!\n");
+		return -EINVAL;
+	}
+
 	return 0;
 }
 #endif /* CONFIG_DYNAMIC_FTRACE */
-- 
2.19.1



More information about the Linuxppc-dev mailing list