[PATCH V8 10/10] selftests, powerpc: Add test for BHRB branch filters (HW & SW)

Daniel Axtens dja at axtens.net
Thu Jun 11 12:09:21 AEST 2015


Hi,

On Mon, 2015-06-08 at 17:08 +0530, Anshuman Khandual wrote:
> diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c
> new file mode 100644
> index 0000000..13e6b72
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.c
> @@ -0,0 +1,513 @@
> +/*
> + * BHRB filter test (HW & SW)
> + *
> + * Copyright 2015 Anshuman Khandual, IBM Corporation.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */

This should also be gpl2 only.
> +#include <unistd.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <signal.h>
> +#include <poll.h>
> +#include <sys/shm.h>
> +#include <sys/types.h>
> +#include <sys/wait.h>
> +#include <sys/mman.h>
> +
> +#include "bhrb_filters.h"
> +#include "utils.h"
> +#include "../event.h"
> +#include "../lib.h"
> +
> +/* Fetched address counts */
> +#define ALL_MAX		32
> +#define CALL_MAX	12
> +#define RET_MAX		10
> +#define COND_MAX	8
> +#define IND_MAX		4
> +
> +/* Test tunables */
> +#define LOOP_COUNT	10
> +#define SAMPLE_PERIOD	10000
> +
> +static int branch_sample_type;
> +static int branch_test_set[27] = {
Do you need to explicitly provide the count here?
> +		PERF_SAMPLE_BRANCH_ANY_CALL,		/* Single filters */
> +		PERF_SAMPLE_BRANCH_ANY_RETURN,
> +		PERF_SAMPLE_BRANCH_COND,
> +		PERF_SAMPLE_BRANCH_IND_CALL,
> +		PERF_SAMPLE_BRANCH_ANY,
> +

> +		PERF_SAMPLE_BRANCH_ANY_CALL |		/* Tripple filters */
s/Tripple/Triple/
> +		PERF_SAMPLE_BRANCH_ANY_RETURN |
> +		PERF_SAMPLE_BRANCH_COND,
> +


> +
> +static void *ring_buffer_mask(struct ring_buffer *r, void *p)
Is this actually returning a mask? It looks more like it's calculating
an offset, and that seems to be how you use it below.
> +{
> +	unsigned long l = (unsigned long)p;
> +
> +	return (void *)(r->ring_base + ((l - r->ring_base) & r->mask));
> +}
That's a lot of casts, especially when you then load it into a int64_t
pointer below...
> +
> +static void dump_sample(struct perf_event_header *hdr, struct ring_buffer *r)
> +{
> +	unsigned long from, to, flag;
> +	int i, nr;
> +	int64_t *v;
> +
> +	/* NR Branches */
> +	v = ring_buffer_mask(r, hdr + 1);
...here. (and everywhere else I can see that you're using the
ring_buffer_mask function)
> +
> +	nr = *v;
You are dereferencing a int64_t pointer into an int. Should nr be an
int64_t? Or should v be a different pointer type?

> +
> +	/* Branches */
> +	for (i = 0; i < nr; i++) {
> +		v = ring_buffer_mask(r, v + 1);
> +		from = *v;
Now you're dereferencing an *int64_t into an unsigned long.
> +
> +		v = ring_buffer_mask(r, v + 1);
> +		to = *v;
> +
> +		v = ring_buffer_mask(r, v + 1);
> +		flag = *v;
> +
> +		if (!check_branch(from, to)) {
> +			has_failed = true;
> +			printf("[Filter: %d] From: %lx To: %lx Flags: %lx\n",
> +					branch_sample_type, from, to, flag);
> +		}
> +	}
> +}
> +
> +static void read_ring_buffer(struct event *e)
> +{
> +	struct ring_buffer *r = &e->ring_buffer;
> +	struct perf_event_header *hdr;
> +	int old, head;
Why not tail and head?
> +
> +	head = r->page->data_head & r->mask;
> +
> +	asm volatile ("sync": : :"memory");
> +
> +	old = r->page->data_tail & r->mask;
> +
Can you explain the logic of syncing between reading head and tail? Is
there an expectation that head is not likely to change?

As a more general comment, what is sync trying to achieve? If you're
trying to synchronise something, what's the sync actually achieving? Is
there a corresponding memory barrier somewhere else? What race
conditions are you trying to guard against and does this actually guard
against them?

> +	while (old != head) {
> +		hdr = (struct perf_event_header *)(r->ring_base + old);
> +
> +		if ((old & r->mask) + hdr->size !=
> +					((old + hdr->size) & r->mask))
> +			++record_overlap;
> +
> +		if (hdr->type == PERF_RECORD_SAMPLE) {
> +			++record_sample;
> +			dump_sample(hdr, r);
> +		}
> +
> +		if (hdr->type == PERF_RECORD_MMAP)
> +			++record_mmap;
> +
> +		if (hdr->type == PERF_RECORD_LOST)
> +			++record_lost;
> +
> +		if (hdr->type == PERF_RECORD_THROTTLE)
> +			++record_throttle;
> +
> +		if (hdr->type == PERF_RECORD_UNTHROTTLE)
> +			++record_unthrottle;
> +
> +		old = (old + hdr->size) & r->mask;
> +	}
> +	r->page->data_tail = old;
What happens if data_tail moves while you're doing the loop?



> +static int filter_test(void)
> +{
> +	struct pollfd pollfd;
> +	struct event ebhrb;
> +	pid_t pid;
> +	int ret, loop = 0;
> +
> +	has_failed = false;
> +	pid = fork();
> +	if (pid == -1) {
> +		perror("fork() failed");
> +		return 1;
> +	}
> +
> +	/* Run child */
> +	if (pid == 0) {
> +		start_loop();
> +		exit(0);
> +	}
> +
> +	/* Prepare event */
> +	event_init_opts(&ebhrb, PERF_COUNT_HW_INSTRUCTIONS,
> +				PERF_TYPE_HARDWARE, "insturctions");
Is instructions deliberately spelled incorrectly?

> +	ebhrb.attr.sample_type = PERF_SAMPLE_BRANCH_STACK;
> +	ebhrb.attr.disabled = 1;
> +	ebhrb.attr.mmap = 1;
> +	ebhrb.attr.mmap_data = 1;
> +	ebhrb.attr.sample_period = SAMPLE_PERIOD;
> +	ebhrb.attr.exclude_user = 0;
> +	ebhrb.attr.exclude_kernel = 1;
> +	ebhrb.attr.exclude_hv = 1;
> +	ebhrb.attr.branch_sample_type = branch_sample_type;
> +
> +	/* Open event */
> +	event_open_with_pid(&ebhrb, pid);
> +
> +	/* Mmap ring buffer and enable event */
> +	event_mmap(&ebhrb);
> +	FAIL_IF(event_enable(&ebhrb));
> +
> +	/* Prepare polling */
> +	pollfd.fd = ebhrb.fd;
> +	pollfd.events = POLLIN;
> +
> +	for (loop = 0; loop < LOOP_COUNT; loop++) {
> +		ret = poll(&pollfd, 1, -1);
> +		if (ret == -1) {
> +			perror("poll() failed");
> +			return 1;
> +		}
> +		if (ret == 0) {
> +			perror("poll() timeout");
> +			return 1;
> +		}
> +		read_ring_buffer(&ebhrb);
> +		if (has_failed)
> +			return 1;
1) I don't see anything that sets has_failed after it's initalised.
2) Should these error cases also explicitly terminate the child? Do you
need something like this perhaps?

		if (ret == 0) {
			perror("poll() failed");
			goto err;
		}

		...
	
	}

	...

	return 0;

	err:
	kill(pid, SIGTERM); // maybe even sigkill in the error case?
	return 1;

> +	}
> +
> +	/* Disable and close event */
> +	FAIL_IF(event_disable(&ebhrb));
> +	event_close(&ebhrb);
Again, do these need to be replicated in the error path?
> +
> +	/* Terminate child */
> +	kill(pid, SIGKILL);
SIGKILL seems a bit harsh: wouldn't SIGTERM work?
> +	return 0;
> +}
> +
> +static int  bhrb_filters_test(void)
> +{
> +	int i;
> +
> +	/* Fetch branches */
> +	fetch_branches();
> +	init_branch_stats();
> +	init_perf_mmap_stats();
> +
> +	for (i = 0; i < sizeof(branch_test_set)/sizeof(int); i++) {
> +		branch_sample_type = branch_test_set[i];
> +		if (filter_test())
Couldn't branch_sample_type be passed to filter_test as a parameter,
rather than as a global?
> +			return 1;
> +	}
> +


> diff --git a/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h
> new file mode 100644
> index 0000000..072375a
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/pmu/bhrb/bhrb_filters.h
> @@ -0,0 +1,16 @@
> +/*
> + * Copyright 2015 Anshuman Khandual, IBM Corporation.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
License again. (And in the other files in this patch.)


> +_GLOBAL(start_loop)
> +label:
> +	b label0			/* ANY */
> +	blr				/* ANY_RETURN */
> +label0:
> +	b label1			/* ANY */
> +
> +label1:
> +	b label2			/* ANY */
> +
> +label2:
> +	b label3			/* ANY */
> +
> +label3:
> +	mflr LR_SAVE
> +	bl label4			/* ANY | ANY_CALL */
> +	mtlr LR_SAVE
> +	b start_loop			/* ANY */
> +label4:
> +	mflr LR_SAVE
> +	li 20, 12
> +	cmpi 3, 20, 12
> +	bcl 12, 4 * cr3+2, label5	/* ANY | ANY_CALL | COND */
> +	li 20, 12
> +	cmpi 4, 20, 20
> +	bcl 12, 4 * cr4+0, label5	/* ANY | ANY_CALL | COND */
> +	LOAD_ADDR(20, label5)
> +	mtctr 20
> +	li 22, 10
> +	cmpi 0, 22, 10
> +	bcctrl 12, 4*cr0+2		/* ANY | NY_CALL | IND_CALL | COND */
> +	LOAD_ADDR(20, label5)
> +	mtlr 20
> +	li      20, 10
> +	cmpi    0, 20, 10
> +	bclrl   12, 4*cr0+2		/* ANY | ANY_CALL | IND_CALL | COND */
> +	mtlr LR_SAVE
> +	blr				/* ANY | ANY_RETURN */
> +
> +label5:
> +	blr				/* ANY | ANY_RETURN */
> +
Could these labels have more descriptive names?


Regards,
Daniel
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 860 bytes
Desc: This is a digitally signed message part
URL: <http://lists.ozlabs.org/pipermail/linuxppc-dev/attachments/20150611/8e2a9959/attachment.sig>


More information about the Linuxppc-dev mailing list