[Skiboot] [PATCH] occ: sensors: Add sensors to IMC framework

Shilpasri G Bhat shilpa.bhat at linux.vnet.ibm.com
Thu May 30 15:06:15 AEST 2019


This patch exports non-hwmon type of On-Chip-Controller (OCC) based
inband sensors to In-Memory Collection (IMC) counter framework. OCC
copies the sensors to one of two buffers and we can read the sensor
value from the other buffer. This patch adds an opal call to copy
the reading buffer to kernel and exports nest(chip) and core
sensors to IMC.

Signed-off-by: Shilpasri G Bhat <shilpa.bhat at linux.vnet.ibm.com>
---
 doc/device-tree/imc.rst                          |  69 +++++
 doc/opal-api/opal-copy-occ-sensor-buffer-173.rst |  30 +++
 hw/occ-sensor.c                                  | 330 +++++++++++++++++++++--
 include/occ.h                                    |   6 +
 include/opal-api.h                               |   3 +-
 5 files changed, 422 insertions(+), 16 deletions(-)
 create mode 100644 doc/opal-api/opal-copy-occ-sensor-buffer-173.rst

diff --git a/doc/device-tree/imc.rst b/doc/device-tree/imc.rst
index 6046b45..2cfb951 100644
--- a/doc/device-tree/imc.rst
+++ b/doc/device-tree/imc.rst
@@ -120,3 +120,72 @@ determines the event to count. *BUFFRSIZE* indicates the memory range.
    b’010’ - 16K entries * 64 per entry = 1M
    b’011’ - 32K entries * 64 per entry = 2M
    b’100’ - 64K entries * 64 per entry = 4M
+
+OCC inband sensor based IMC counters
+------------------------------------
+
+List of properties/nodes that are added to /imc-counters to export OCC
+inband based sensors to IMC framework
+
+.. code-block:: dts
+
+        imc-counters {
+                imc,occ-update-interval-us = <0x186a0>;
+                imc,occ-sensor-buffer-size = <0x14000>;
+                ...
+                nest-occ-events {
+                        #address-cells = <0x1>;
+                        #size-cells = <0x1>;
+                        phandle = <0x829>;
+
+                        event_FREQA at 1628 {
+                                desc = "Average of all core frequencies for Processor";
+                                scale = [31 00];
+                                unit = "MHz";
+                                reg = <0x0 0x10>;
+                                phandle = <0x82d>;
+                                offsets = <0x1632 0x1632>;
+                                event-name = "FREQA";
+                        };
+                        ...
+                };
+
+                nest at 200fffd80000 {
+                        type = <0x10>;
+                        compatible = "ibm,occ-counters";
+                        size = <0xa000>;
+                        events-prefix = [00];
+                        reg = <0x0 0x0>;
+                        phandle = <0x828>;
+                        proc-id = <0x0 0x8>;
+                        events = <0x829>;
+                };
+
+                core-occ-events {
+                        #address-cells = <0x1>;
+                        #size-cells = <0x1>;
+                        phandle = <0x81e>;
+
+                        event_NUTIL at 11a8 {
+                                desc = "Normalized average utilization, rolling average";
+                                scale = "0.01";
+                                unit = [25 00];
+                                reg = <0x0 0x10>;
+                                phandle = <0x826>;
+                                offsets = <0x11b2 0x11e2 0x1212 0x1242 0x1272 0x12a2 0x12d2 0x1302 0x1332 0x1362 0x1392 0x13c2 0x13f2 0x1422 0x1452 0x1482 0x1512 0x1542 0x1572 0x15a2 0x15d2 0x1602 0x11b2 0x11e2 0x1212 0x1242 0x1272 0x12a2 0x12d2 0x1302 0x1332 0x1362 0x1392 0x13c2 0x13f2 0x1422 0x1452 0x1482 0x14b2 0x14e2 0x1572 0x15a2 0x15d2 0x1602>;
+                                event-name = "NUTIL";
+                        };
+                        ...
+                };
+
+                core at 200fffd80000 {
+                        type = <0x4>;
+                        compatible = "ibm,occ-counters";
+                        size = <0xa000>;
+                        events-prefix = [00];
+                        core-id = <0x0 0x4 0x8 0xc 0x10 0x14 0x18 0x1c 0x20 0x24 0x28 0x2c 0x30 0x34 0x38 0x3c 0x48 0x4c 0x50 0x54 0x58 0x5c 0x800 0x804 0x808 0x80c 0x810 0x814 0x818 0x81c 0x820 0x824 0x828 0x82c 0x830 0x834 0x838 0x83c 0x840 0x844 0x850 0x854 0x858 0x85c>;
+                        reg = <0x0 0x0>;
+                        phandle = <0x81d>;
+                        events = <0x81e>;
+                };
+        };
diff --git a/doc/opal-api/opal-copy-occ-sensor-buffer-173.rst b/doc/opal-api/opal-copy-occ-sensor-buffer-173.rst
new file mode 100644
index 0000000..299f8a6
--- /dev/null
+++ b/doc/opal-api/opal-copy-occ-sensor-buffer-173.rst
@@ -0,0 +1,30 @@
+.. _opal-copy-occ-sensor-buffer:
+
+OPAL_COPY_OCC_SENSOR_BUFFER
+===========================
+OPAL call used to copy the active reading sensor buffer to kernel.
+This buffer is consumed by the IMC framework which exports the OCC
+inband sensors as perf events. The total size of the buffer is provided
+to kernel by the DT property "/imc-counters/imc,occ-sensor-buffer-size"
+
+Parameters
+----------
+::
+        void *buff
+        u32 size
+
+buff - The pointer to the buffer passed by the kernel into which the
+       active reading sensor buffer needs to be copied.
+
+size - buffer size to be copied
+
+Returns
+-------
+OPAL_SUCCESS
+  Success
+
+OPAL_PARAMETER
+  Invalid size or invalid pointer (buff)
+
+OPAL_HARDWARE
+  Unable to proceed due to the current hardware state
diff --git a/hw/occ-sensor.c b/hw/occ-sensor.c
index 0337294..b8f45d6 100644
--- a/hw/occ-sensor.c
+++ b/hw/occ-sensor.c
@@ -21,6 +21,7 @@
 #include <device.h>
 #include <cpu.h>
 #include <occ.h>
+#include <imc.h>
 
 enum sensor_attr {
 	SENSOR_SAMPLE,
@@ -201,17 +202,11 @@ static u64 read_sensor(struct occ_sensor_record *sensor, int attr)
 	return 0;
 }
 
-static void *select_sensor_buffer(struct occ_sensor_data_header *hb, int id)
+static void *select_sensor_buffer(struct occ_sensor_data_header *hb, u32 offset)
 {
-	struct occ_sensor_name *md;
 	u8 *ping, *pong;
 	void *buffer = NULL;
 
-	if (!hb)
-		return NULL;
-
-	md = get_names_block(hb);
-
 	ping = (u8 *)((u64)hb + hb->reading_ping_offset);
 	pong = (u8 *)((u64)hb + hb->reading_pong_offset);
 
@@ -225,8 +220,8 @@ static void *select_sensor_buffer(struct occ_sensor_data_header *hb, int id)
 
 	if (*ping && *pong) {
 		u64 tping, tpong;
-		u64 ping_buf = (u64)ping + md[id].reading_offset;
-		u64 pong_buf = (u64)pong + md[id].reading_offset;
+		u64 ping_buf = (u64)ping + offset;
+		u64 pong_buf = (u64)pong + offset;
 
 		tping = ((struct occ_sensor_record *)ping_buf)->timestamp;
 		tpong = ((struct occ_sensor_record *)pong_buf)->timestamp;
@@ -239,14 +234,12 @@ static void *select_sensor_buffer(struct occ_sensor_data_header *hb, int id)
 		buffer = ping;
 	} else if (!*ping && *pong) {
 		buffer = pong;
-	} else if (!*ping && !*pong) {
+	} else {
 		prlog(PR_DEBUG, "OCC: Both ping and pong sensor buffers are invalid\n");
 		return NULL;
 	}
 
 	assert(buffer);
-	buffer = (void *)((u64)buffer + md[id].reading_offset);
-
 	return buffer;
 }
 
@@ -276,15 +269,16 @@ int occ_sensor_read(u32 handle, u64 *data)
 	if (id > hb->nr_sensors)
 		return OPAL_PARAMETER;
 
-	buff = select_sensor_buffer(hb, id);
+	md = get_names_block(hb);
+	buff = select_sensor_buffer(hb, md[id].reading_offset);
 	if (!buff)
 		return OPAL_HARDWARE;
 
+	buff = (void *)((u64)buff + md[id].reading_offset);
 	*data = read_sensor(buff, attr);
 	if (!*data)
 		return OPAL_SUCCESS;
 
-	md = get_names_block(hb);
 	if (md[id].type == OCC_SENSOR_TYPE_POWER && attr == SENSOR_ACCUMULATOR)
 		scale_energy(&md[id], data);
 	else
@@ -494,10 +488,303 @@ static void add_sensor_node(const char *loc, const char *type, int i, int attr,
 	*phandle = node->phandle;
 }
 
+static struct occ_imc_sensors_desc imc_core_sensors[] = {
+	{ "STOPDEEPACT",
+	"Deepest actual stop state that was entered during OCC sample time" },
+	{ "STOPDEEPREQ",
+	"Deepest stop state that has been requested during OCC sample time" },
+	{ "IPS", "Instructions per second for core" },
+	{ "NOTBZE", "Not Busy (stall) cycles counter for core" },
+	{ "NOTFIN", "Not Finished (stall) cycles counter for core" },
+	{ "FREQA", "Average/actual frequency for core" },
+	{ "UTIL", "Utilization of the core" },
+	{ "NUTIL", "Normalized average utilization, rolling average" },
+	{ "VOLTDROOPCNT", "Small voltage droop count for core" },
+};
+
+static struct occ_imc_sensors_desc imc_nest_sensors[] = {
+	{ "IPS", "Vector sensor that takes average of all IPS core sensor" },
+	{ "PROCPWRTHROT", "Throttle count due to power" },
+	{ "PROCOTTHROT", "Throttle count for temperature" },
+	{ "FREQA", "Average of all core frequencies for Processor" },
+	{ "UTIL", "Average of all cores UTIL sensor" },
+};
+
+static struct dt_node *occ_add_imc_event(struct dt_node *events,
+					 struct occ_sensor_name *md,
+					 const char *sname, const char *desc)
+{
+	struct dt_node *event;
+	char name[30];
+	int size, scale;
+	s8 exp;
+
+	snprintf(name, sizeof(name), "event_%s", sname);
+
+	event = dt_new_addr(events, name, md->reading_offset);
+	if (!event)
+		return NULL;
+
+	dt_add_property_string(event, "desc", desc);
+	dt_add_property_string(event, "event-name", sname);
+	dt_add_property_string(event, "unit", md->units);
+
+	if (md->structure_type == OCC_SENSOR_READING_FULL)
+		size = 16;
+	else
+		size = 64;
+
+	dt_add_property_cells(event, "reg", 0, size);
+
+	scale = md->scale_factor >> 8;
+	exp = md->scale_factor & 0xFF;
+	if (exp >= 0) {
+		while (exp--)
+			scale *= 10;
+		snprintf(name, sizeof(name), "%d", scale);
+	} else if (exp < 0) {
+		int old_scale = scale;
+
+		while (scale / 10 && exp) {
+			exp++;
+			scale /= 10;
+		}
+
+		if (!exp) {
+			snprintf(name, sizeof(name), "%d", scale);
+		} else {
+			int k;
+
+			size = strlen("0.");
+			snprintf(name, size + 1, "0.");
+			for (k = 0; k < labs(exp) - 1; k++)
+				snprintf(name + size + k, sizeof("0"), "0");
+
+			snprintf(name + size + k, sizeof(name) - strlen(name),
+				 "%d", old_scale);
+		}
+	}
+	dt_add_property_string(event, "scale", name);
+
+	return event;
+}
+
+static void occ_add_imc_nest_nodes(struct dt_node *events, int index)
+{
+	struct dt_node *event = NULL;
+	struct proc_chip *chip;
+	u32 addr[MAX_OCCS];
+	int occ_num = 0;
+
+	for_each_chip(chip) {
+		struct occ_sensor_data_header *hb;
+		struct occ_sensor_name *md;
+		int i;
+
+		hb = get_sensor_header_block(occ_num);
+		md = get_names_block(hb);
+		for (i = 0; i < hb->nr_sensors; i++) {
+			if (md[i].location != OCC_SENSOR_LOC_PROCESSOR)
+				continue;
+
+			if (!strncmp(md[i].name, imc_nest_sensors[index].name,
+				     strlen(imc_nest_sensors[index].name)))
+				break;
+		}
+
+		if (!event)
+			event = occ_add_imc_event(events, &md[i],
+						  imc_nest_sensors[index].name,
+						  imc_nest_sensors[index].desc);
+
+		if (md[i].structure_type == OCC_SENSOR_READING_FULL) {
+			addr[occ_num] = md[i].reading_offset +
+				offsetof(struct occ_sensor_record, sample);
+		} else {
+			addr[occ_num] = md[i].reading_offset +
+			      offsetof(struct occ_sensor_counter, accumulator);
+		}
+		occ_num++;
+	}
+
+	if (!event)
+		return;
+
+	dt_add_property(event, "offsets", &addr, occ_num * sizeof(u32));
+}
+
+static void occ_add_imc_core_nodes(struct dt_node *events, u32 *core_id,
+				   int nr_cores, int sindex)
+{
+	struct dt_node *event = NULL;
+	struct proc_chip *chip;
+	u32 *addr;
+	int occ_num = 0;
+
+	addr = zalloc(nr_cores * sizeof(u32));
+	assert(addr);
+
+	for_each_chip(chip) {
+		struct occ_sensor_data_header *hb;
+		struct occ_sensor_name *md;
+		int i;
+
+		hb = get_sensor_header_block(occ_num);
+		md = get_names_block(hb);
+		for (i = 0; i < hb->nr_sensors; i++) {
+			int num, cindex;
+
+			if (md[i].location != OCC_SENSOR_LOC_CORE)
+				continue;
+
+			if (strncmp(md[i].name, imc_core_sensors[sindex].name,
+				    strlen(imc_core_sensors[sindex].name)))
+				continue;
+
+			num = parse_entity(md[i].name, NULL);
+			for (cindex = 0; cindex < nr_cores; cindex++)
+				if (pir_to_core_id(core_id[cindex]) == num &&
+				    pir_to_chip_id(core_id[cindex]) == chip->id)
+					break;
+
+			if (cindex == nr_cores)
+				continue;
+
+			if (!event)
+				event = occ_add_imc_event(events, &md[i],
+						imc_core_sensors[sindex].name,
+						imc_core_sensors[sindex].desc);
+
+			if (md[i].structure_type == OCC_SENSOR_READING_FULL) {
+				addr[cindex] = md[i].reading_offset +
+				offsetof(struct occ_sensor_record, sample);
+			} else {
+				addr[cindex] = md[i].reading_offset +
+					offsetof(struct occ_sensor_counter,
+						 accumulator);
+			}
+		}
+		occ_num++;
+	}
+
+	if (event)
+		dt_add_property(event, "offsets", addr, nr_cores * sizeof(u32));
+
+	free(addr);
+}
+
+static void occ_add_imc_sensors(struct dt_node *imc, const char *sdev,
+				const char *sevent, int loc)
+{
+	struct proc_chip *chip;
+	struct dt_node *dev, *events;
+	int i;
+
+	dev = dt_new_addr(imc, sdev, occ_sensor_base);
+	if (!dev)
+		return;
+
+	events = dt_new(imc, sevent);
+	if (!events)
+		return;
+
+	dt_add_property_cells(dev, "events", events->phandle);
+	dt_add_property_string(dev, "compatible", "ibm,occ-counters");
+	dt_add_property_cells(dev, "reg", 0, 0);
+	dt_add_property_cells(dev, "size", OCC_SENSOR_READING_BUFFER_SIZE);
+	dt_add_property_string(dev, "events-prefix", "");
+
+	dt_add_property_cells(events, "#address-cells", 1);
+	dt_add_property_cells(events, "#size-cells", 1);
+
+	if (loc == OCC_SENSOR_LOC_CORE) {
+		u32 *core_id;
+		int nr_cores = 0;
+
+		for_each_chip(chip) {
+			struct cpu_thread *c = NULL;
+
+			for_each_available_core_in_chip(c, chip->id)
+				nr_cores++;
+		}
+
+		core_id = zalloc(nr_cores * sizeof(u32));
+		assert(core_id);
+
+		nr_cores = 0;
+		for_each_chip(chip) {
+			struct cpu_thread *c = NULL;
+
+			for_each_available_core_in_chip(c, chip->id)
+				core_id[nr_cores++] = c->pir;
+		}
+		dt_add_property(dev, "core-id", core_id,
+				nr_cores * sizeof(u32));
+		dt_add_property_cells(dev, "type", IMC_COUNTER_CORE);
+
+		for (i = 0; i < ARRAY_SIZE(imc_core_sensors); i++)
+			occ_add_imc_core_nodes(events, core_id, nr_cores, i);
+
+		free(core_id);
+	} else if (loc == OCC_SENSOR_LOC_PROCESSOR) {
+		u32 proc_id[MAX_OCCS], nr_chips = 0;
+
+		for_each_chip(chip)
+			proc_id[nr_chips++] = chip->id;
+		dt_add_property(dev, "proc-id", &proc_id,
+				nr_chips * sizeof(u32));
+		dt_add_property_cells(dev, "type", IMC_COUNTER_CHIP);
+
+		for (i = 0; i < ARRAY_SIZE(imc_nest_sensors); i++)
+			occ_add_imc_nest_nodes(events, i);
+	}
+}
+
+static int opal_copy_occ_sensor_buffer(void *hbuff, u32 size)
+{
+	struct proc_chip *chip;
+	u8 occ_num = 0, i;
+
+	if (!hbuff || !opal_addr_valid(hbuff))
+		return OPAL_PARAMETER;
+
+	for_each_chip(chip)
+		occ_num++;
+
+	if (!size || size > occ_num * OCC_SENSOR_READING_BUFFER_SIZE ||
+	    size < OCC_SENSOR_READING_BUFFER_SIZE)
+		return OPAL_PARAMETER;
+
+	for (i = 0; i < occ_num; i++) {
+		struct occ_sensor_data_header *hb;
+		struct occ_sensor_name *md;
+		void *buffer = NULL;
+
+		if (size < (i + 1) * OCC_SENSOR_READING_BUFFER_SIZE)
+			break;
+
+		hb = get_sensor_header_block(i);
+		if (hb->valid != 1)
+			return OPAL_HARDWARE;
+
+		md = get_names_block(hb);
+		buffer = select_sensor_buffer(hb, md[0].reading_offset);
+		if (!buffer)
+			return OPAL_HARDWARE;
+
+		memcpy(hbuff + i * OCC_SENSOR_READING_BUFFER_SIZE, buffer,
+		       OCC_SENSOR_READING_BUFFER_SIZE);
+	}
+
+	return 0;
+}
+
+opal_call(OPAL_COPY_OCC_SENSOR_BUFFER, opal_copy_occ_sensor_buffer, 2);
+
 bool occ_sensors_init(void)
 {
 	struct proc_chip *chip;
-	struct dt_node *sg, *exports;
+	struct dt_node *sg, *exports, *imc;
 	int occ_num = 0, i;
 	bool has_gpu = false;
 
@@ -609,5 +896,18 @@ bool occ_sensors_init(void)
 	dt_add_property_u64s(exports, "occ_inband_sensors", occ_sensor_base,
 			     OCC_SENSOR_DATA_BLOCK_SIZE * occ_num);
 
+	imc = dt_find_by_path(dt_root, "/imc-counters");
+	if (!imc)
+		goto out;
+
+	dt_add_property_cells(imc, "imc,occ-sensor-buffer-size",
+			      OCC_SENSOR_READING_BUFFER_SIZE * occ_num);
+	dt_add_property_cells(imc, "imc,occ-update-interval-us", 100000);
+
+	occ_add_imc_sensors(imc, "core", "core-occ-events",
+			    OCC_SENSOR_LOC_CORE);
+	occ_add_imc_sensors(imc, "nest", "nest-occ-events",
+			    OCC_SENSOR_LOC_PROCESSOR);
+out:
 	return true;
 }
diff --git a/include/occ.h b/include/occ.h
index 6b831e0..bd70a05 100644
--- a/include/occ.h
+++ b/include/occ.h
@@ -133,6 +133,7 @@ extern bool is_occ_reset(void);
 
 #define OCC_SENSOR_DATA_BLOCK_OFFSET		0x00580000
 #define OCC_SENSOR_DATA_BLOCK_SIZE		0x00025800
+#define OCC_SENSOR_READING_BUFFER_SIZE		0xA000
 
 /*
  * These should match the definitions inside the OCC source:
@@ -300,3 +301,8 @@ struct occ_sensor_counter {
 	u8 sample;
 	u8 pad[5];
 } __attribute__((__packed__));
+
+struct occ_imc_sensors_desc {
+	const char *name;
+	const char *desc;
+};
diff --git a/include/opal-api.h b/include/opal-api.h
index b0ad435..e8adce9 100644
--- a/include/opal-api.h
+++ b/include/opal-api.h
@@ -229,7 +229,8 @@
 #define OPAL_XIVE_GET_VP_STATE			170 /* Get NVT state */
 #define OPAL_NPU_MEM_ALLOC			171
 #define OPAL_NPU_MEM_RELEASE			172
-#define OPAL_LAST				172
+#define OPAL_COPY_OCC_SENSOR_BUFFER		173
+#define OPAL_LAST				173
 
 #define QUIESCE_HOLD			1 /* Spin all calls at entry */
 #define QUIESCE_REJECT			2 /* Fail all calls with OPAL_BUSY */
-- 
1.8.3.1



More information about the Skiboot mailing list