[RFC][PATCH] perf: Add 'merge-recursive' callchain option

Sukadev Bhattiprolu sukadev at linux.vnet.ibm.com
Sat Mar 22 13:01:56 EST 2014


>From 9ad9432dab2bf4d1c8e6ff9201e88d5ae9f3994a Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev at linux.vnet.ibm.com>
Date: Wed, 19 Mar 2014 20:24:22 -0500
Subject: [PATCH 1/1] perf: Add 'merge-recursive' callchain option

Powerpc saves the link register (LR) with each sample to help resolve
callchains for programs involving tail calls. Sometimes the value in the
LR is same as the NIP register. Other times it is not. This results in
callchains samples like these:

3547953334790 0x1ec0 [0x78]: PERF_RECORD_SAMPLE(IP, 2): 4667/4667: 0x80a7be3c58 period: 1773985 addr: 0
... chain: nr:9
.....  0: fffffffffffffe00
.....  1: 00000080a7be3c58	__random
.....  2: 00000080a7be3c40	__random
.....  3: 00000080a7be4270	rand
.....  4: 0000000010000784	do_my_sprintf
.....  5: 00000000100009d8	main
.....  6: 00000080a7bc482c
.....  7: 00000080a7bc4a34
.....  8: 0000000000000000
 ... thread: sprintft:4667
 ...... dso: /usr/lib64/libc-2.18.so

67470723107802 0x2f10 [0x78]: PERF_RECORD_SAMPLE(IP, 0x2): 5706/5706: 0x80a7be3c20 period: 872629 addr: 0
... chain: nr:9
.....  0: fffffffffffffe00
.....  1: 00000080a7be3c20	__random
.....  2: 00000080a7be4270	rand
.....  3: 00000080a7be4270	rand
.....  4: 0000000010000784
.....  5: 00000000100009d8
.....  6: 00000080a7bc482c
.....  7: 00000080a7bc4a34
.....  8: 0000000000000000
 ... thread: sprintft:5706
 ...... dso: /usr/lib64/libc-2.18.so

67470738362072 0x4cf8 [0x78]: PERF_RECORD_SAMPLE(IP, 0x2): 5706/5706: 0x80a7be3c14 period: 869774 addr: 0
... chain: nr:9
.....  0: fffffffffffffe00
.....  1: 00000080a7be3c14	__random
.....  2: 00000080a7be4270	rand
.....  3: 00000080a7be4270	rand
.....  4: 0000000010000784	do_my_sprintf
.....  5: 00000000100009d8	main
.....  6: 00000080a7bc482c
.....  7: 00000080a7bc4a34
.....  8: 0000000000000000
 ... thread: sprintft:5706
 ...... dso: /usr/lib64/libc-2.18.so

In the perf tool, these samples end up on different branches of the RB-Tree
resulting in duplicated arcs in the final call-graph.

    15.02%  sprintft  libc-2.18.so       [.] __random
            |
            --- __random
               |
               |--58.45%-- rand
               |          rand
               |          do_my_sprintf
               |          main
               |          generic_start_main.isra.0
               |          __libc_start_main
               |          0x0
               |
                --41.55%-- __random
                          rand
                          do_my_sprintf
                          main
                          generic_start_main.isra.0
                          __libc_start_main
                          0x0

This is an RFC for adding a 'merge-recursive' call-graph option that would
discard the duplicate entries resulting in a more compact call-graph. The
duplicates can be either identical instruction pointer values or IP values
within the same function.

	perf report --call-graph=fractal,0.5,callee,function,merge-recursive

    15.02%  sprintft  libc-2.18.so       [.] __random
            |
            ---__random
               rand
               do_my_sprintf
               main
               generic_start_main.isra.0
               __libc_start_main
                (nil)

This option could also be used to collapse call-graphs of recursive functions.

AFAICT, the existing CCKEY_FUNCTION does not address this case because
the callchain lengths can vary across samples.

Signed-off-by: Sukadev Bhattiprolu <sukadev at linux.vnet.ibm.com>
---
 tools/perf/builtin-report.c |   14 ++++++++++++--
 tools/perf/util/callchain.h |    1 +
 tools/perf/util/machine.c   |   39 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index d882b6f..0b68749 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -647,6 +647,16 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
 		callchain_param.key = CCKEY_ADDRESS;
 	else
 		return -1;
+
+	tok2 = strtok(NULL, ",");
+	if (!tok2)
+		goto setup;
+
+	if (!strncmp(tok2, "merge-recursive", strlen("merge-recursive")))
+		callchain_param.merge_recursive = 1;
+	else
+		return -1;
+
 setup:
 	if (callchain_register_param(&callchain_param) < 0) {
 		pr_err("Can't register callchain params\n");
@@ -762,8 +772,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 		   "regex filter to identify parent, see: '--sort parent'"),
 	OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
 		    "Only display entries with parent-match"),
-	OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order",
-		     "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). "
+	OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order,merge-recursive",
+		     "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address), merge or don't merge recursive calls"
 		     "Default: fractal,0.5,callee,function", &parse_callchain_opt, callchain_default_opt),
 	OPT_INTEGER(0, "max-stack", &report.max_stack,
 		    "Set the maximum stack depth when parsing the callchain, "
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 8ad97e9..d8d0822 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -53,6 +53,7 @@ struct callchain_param {
 	sort_chain_func_t	sort;
 	enum chain_order	order;
 	enum chain_key		key;
+	u32			merge_recursive;
 };
 
 struct callchain_list {
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index eb26544..e74ad95 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1272,6 +1272,32 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
 	return bi;
 }
 
+static int duplicate_symbol(struct symbol *sym, char **prev_name)
+{
+	char *prev = *prev_name;
+
+	if (sym) {
+		if (prev) {
+			if (!strcmp(sym->name, prev))
+				return 1;		/* duplicate */
+			free(prev);
+		}
+
+		*prev_name = prev = strdup(sym->name);
+		if (!prev) {
+			pr_debug("%s: Unable to alloc memory\n", __func__);
+			return -ENOMEM;
+		}
+	} else if (prev) {
+		/* new symbol is NULL, so forget prev name */
+		free(prev);
+		*prev_name = NULL;
+	}
+	
+	/* Not duplicate */
+	return 0;
+}
+
 static int machine__resolve_callchain_sample(struct machine *machine,
 					     struct thread *thread,
 					     struct ip_callchain *chain,
@@ -1283,6 +1309,8 @@ static int machine__resolve_callchain_sample(struct machine *machine,
 	int chain_nr = min(max_stack, (int)chain->nr);
 	int i;
 	int err;
+	u64 prev_ip = 0;
+	char *prev_name = NULL;
 
 	callchain_cursor_reset(&callchain_cursor);
 
@@ -1324,6 +1352,10 @@ static int machine__resolve_callchain_sample(struct machine *machine,
 			continue;
 		}
 
+		if (callchain_param.merge_recursive && prev_ip == ip)
+			continue;
+		prev_ip = ip;
+
 		al.filtered = false;
 		thread__find_addr_location(thread, machine, cpumode,
 					   MAP__FUNCTION, ip, &al);
@@ -1340,6 +1372,13 @@ static int machine__resolve_callchain_sample(struct machine *machine,
 			}
 		}
 
+		if (callchain_param.merge_recursive) {
+			if ((err = duplicate_symbol(al.sym, &prev_name)) > 0)
+				continue;	/* duplicate */
+			else if (err < 0)
+				return err;
+		}
+
 		err = callchain_cursor_append(&callchain_cursor,
 					      ip, al.map, al.sym);
 		if (err)
-- 
1.7.9.5



More information about the Linuxppc-dev mailing list