[PATCH v4 4/7] powerpc/powernv: detect supported nest pmus and its events

Sukadev Bhattiprolu sukadev at linux.vnet.ibm.com
Thu Jul 9 08:01:25 AEST 2015


Madhavan Srinivasan [maddy at linux.vnet.ibm.com] wrote:
| Parse device tree to detect supported nest pmu units. Traverse
| through each nest pmu unit folder to find supported events and
| corresponding unit/scale files (if any).
| 
| The nest unit event file from DT, will contain the offset in the
| reserved memory region to get the counter data for a given event.
| Kernel code uses this offset as event configuration value.
| 
| Device tree parser code also looks for scale/unit in the file name and
| passes on the file as an event attr for perf tool to use in the post
| processing.
| 
| Cc: Michael Ellerman <mpe at ellerman.id.au>
| Cc: Benjamin Herrenschmidt <benh at kernel.crashing.org>
| Cc: Paul Mackerras <paulus at samba.org>
| Cc: Anton Blanchard <anton at samba.org>
| Cc: Sukadev Bhattiprolu <sukadev at linux.vnet.ibm.com>
| Cc: Anshuman Khandual <khandual at linux.vnet.ibm.com>
| Cc: Stephane Eranian <eranian at google.com>
| Signed-off-by: Madhavan Srinivasan <maddy at linux.vnet.ibm.com>
| ---
|  arch/powerpc/perf/nest-pmu.c | 124 ++++++++++++++++++++++++++++++++++++++++++-
|  1 file changed, 123 insertions(+), 1 deletion(-)
| 
| diff --git a/arch/powerpc/perf/nest-pmu.c b/arch/powerpc/perf/nest-pmu.c
| index e7d45ed..6116ff3 100644
| --- a/arch/powerpc/perf/nest-pmu.c
| +++ b/arch/powerpc/perf/nest-pmu.c
| @@ -11,6 +11,119 @@
|  #include "nest-pmu.h"
| 
|  static struct perchip_nest_info p8_nest_perchip_info[P8_NEST_MAX_CHIPS];
| +static struct nest_pmu *per_nest_pmu_arr[P8_NEST_MAX_PMUS];
| +
| +static int nest_event_info(struct property *pp, char *start,

nit: s/start/name/?

| +			struct nest_ima_events *p8_events, int flg, u32 val)

nit: s/flg/string/?
| +{
| +	char *buf;
| +
| +	/* memory for event name */
| +	buf = kzalloc(P8_NEST_MAX_PMU_NAME_LEN, GFP_KERNEL);
| +	if (!buf)
| +		return -ENOMEM;
| +
| +	strncpy(buf, start, strlen(start));
| +	p8_events->ev_name = buf;
| +
| +	/* memory for content */
| +	buf = kzalloc(P8_NEST_MAX_PMU_NAME_LEN, GFP_KERNEL);
| +	if (!buf)
| +		return -ENOMEM;
| +
| +	if (flg) {
| +		/* string content*/
| +		if (!pp->value ||
| +		   (strnlen(pp->value, pp->length) == pp->length))
| +			return -EINVAL;
| +
| +		strncpy(buf, (const char *)pp->value, pp->length);
| +	} else
| +		sprintf(buf, "event=0x%x", val);
| +
| +	p8_events->ev_value = buf;
| +	return 0;
| +}
| +
| +static int nest_pmu_create(struct device_node *dev, int pmu_index)
| +{
| +	struct nest_ima_events **p8_events_arr, *p8_events;
| +	struct nest_pmu *pmu_ptr;
| +	struct property *pp;
| +	char *buf, *start;
| +	const __be32 *lval;
| +	u32 val;
| +	int idx = 0, ret;
| +
| +	if (!dev)
| +		return -EINVAL;
| +
| +	/* memory for nest pmus */
| +	pmu_ptr = kzalloc(sizeof(struct nest_pmu), GFP_KERNEL);
| +	if (!pmu_ptr)
| +		return -ENOMEM;
| +
| +	/* Needed for hotplug/migration */
| +	per_nest_pmu_arr[pmu_index] = pmu_ptr;
| +
| +	/* memory for nest pmu events */
| +	p8_events_arr = kzalloc((sizeof(struct nest_ima_events) * 64),
| +								GFP_KERNEL);
| +	if (!p8_events_arr)
| +		return -ENOMEM;
| +	p8_events = (struct nest_ima_events *)p8_events_arr;
| +
| +	/*
| +	 * Loop through each property
| +	 */
| +	for_each_property_of_node(dev, pp) {
| +		start = pp->name;
| +
| +		if (!strcmp(pp->name, "name")) {
| +			if (!pp->value ||
| +			   (strnlen(pp->value, pp->length) == pp->length))
| +				return -EINVAL;

Do we need to check the string length here? If so, should we check against
size we are going to allocate below (P8_NEST_MAX_PMU_NAME_LEN)? Or is it
possible pp->value is not NULL terminated?

| +
| +			buf = kzalloc(P8_NEST_MAX_PMU_NAME_LEN, GFP_KERNEL);
| +			if (!buf)
| +				return -ENOMEM;
| +
| +			/* Save the name to register it later */
| +			sprintf(buf, "Nest_%s", (char *)pp->value);
| +			pmu_ptr->pmu.name = (char *)buf;
| +			continue;
| +		}
| +
| +		/* Skip these, we dont need it */
| +		if (!strcmp(pp->name, "phandle") ||
| +		    !strcmp(pp->name, "device_type") ||
| +		    !strcmp(pp->name, "linux,phandle"))
| +			continue;
| +
| +		if (strncmp(pp->name, "unit.", 5) == 0) {
| +			/* Skip first few chars in the name */
| +			start += 5;
| +			ret = nest_event_info(pp, start, p8_events++, 1, 0);
| +		} else if (strncmp(pp->name, "scale.", 6) == 0) {
| +			/* Skip first few chars in the name */
| +			start += 6;
| +			ret = nest_event_info(pp, start, p8_events++, 1, 0);

Are the 'start.*' and 'unit.*' files events by themselves or just attributes
of events?

These will show up in sysfs as:

	$ ls /sys/bus/event_source/devices/Nest_Alink_BW//events/
	Alink0        Alink0.unit  Alink1.scale  Alink2        Alink2.unit
	Alink0.scale  Alink1       Alink1.unit   Alink2.scale

>From the other PMUs, the 'events' directory contains just events.

Attributes of events, like descriptions go into a separate directory:
Eg: For 24x7 counters:

	$ ls -d /sys/bus/event_source/devices/hv_24x7/event*
	/sys/bus/event_source/devices/hv_24x7/event_descs
	/sys/bus/event_source/devices/hv_24x7/event_long_descs
	/sys/bus/event_source/devices/hv_24x7/events

If events have/need parameters, they go into the event file itself:
Eg on an x86 box:

	$ cat /sys/bus/event_source/devices/cpu/events/cache-misses
	event=0x2e,umask=0x41

For the nest events, are the 'units'  and 'scale' files needed to
identify the event (like the umask in x86 example above) or are they
informational attributes (like descriptions for 24x7 counters)?

IOW, following works on my x86 system (and with 24x7 counters):

	for i in `ls /sys/bus/event_source/devices/cpu/events/`; do
		perf stat -e cpu/$i/ sleep 1;
	done

This loop will fail for Nest events, when it hits files like Alink0.unit.

That said, I am not sure if the above for loop is supposed to work always!
Maybe Peter Zijlstra can comment on that.

Are the units and scales needed/used by perf in computations? If just
informational and, given that we can locate them from the device-tree,
can we drop them altogether?

If they are needed by perf and are attributes of an event, can we
move them to separate directories?

	/sys/bus/event_source/devices/Nest_Alink_BW/events 
	/sys/bus/event_source/devices/Nest_Alink_BW/units
	/sys/bus/event_source/devices/Nest_Alink_BW/scales

Sukadev



| +		} else {
| +			lval = of_get_property(dev, pp->name, NULL);
| +			val = (uint32_t)be32_to_cpup(lval);
| +
| +			ret = nest_event_info(pp, start, p8_events++, 0, val);
| +		}
| +
| +		if (ret)
| +			return ret;
| +
| +		/* book keeping */
| +		idx++;

I don't see idx being used after the increment?

| +	}
| +
| +	return 0;
| +}
| 
|  static int nest_ima_dt_parser(void)
|  {
| @@ -19,7 +132,7 @@ static int nest_ima_dt_parser(void)
|  	const __be64 *chip_ima_size;
|  	struct device_node *dev;
|  	struct perchip_nest_info *p8ni;
| -	int idx;
| +	int idx, ret;
| 
|  	/*
|  	 * "nest-ima" folder contains two things,
| @@ -50,6 +163,15 @@ static int nest_ima_dt_parser(void)
|  		p8ni->vbase = (uint64_t) phys_to_virt(p8ni->pbase);
|  	}
| 
| +	/* Look for supported Nest PMU units */
| +	idx = 0;
| +	for_each_node_by_type(dev, "nest-ima-unit") {
| +		ret = nest_pmu_create(dev, idx);
| +		if (ret)
| +			return ret;
| +		idx++;

idx not used?

| +	}
| +
|  	return 0;
|  }
| 
| -- 
| 1.9.1



More information about the Linuxppc-dev mailing list