[Skiboot] [PATCH V7 3/3] occ: Add support for Version 0x90 OCC_OPAL shared memory region

Shilpasri G Bhat shilpa.bhat at linux.vnet.ibm.com
Tue Feb 14 07:31:42 AEDT 2017


This patch enables pstate table parsing support for P9. It
introduces below device tree changes.
- Add a new node per chip in /ibm,opal/power-mgt called occ.
	occ at 3ffd9f8000 {
		reg = <0x3f 0xfd9f8000>;
		ibm,chip-id = <0x0>;
		ibm,pstate-vdds = <0x4e4e4f4f 0x4f505152 0x53545556
		0x5758595a 0x5b5c5d5e 0x5f606162 0x63646565 0x66676868
		0x696a6a6b 0x6c6d6d6e 0x6f6f7071 0x72727374 0x74757677
		0x7778797a 0x7a7b7c7c>;
		ibm,pstate-vcss = <0x4446484a 0x4c4e4f50 0x50515253
		0x54555556 0x5758595a 0x5a5b5c5d 0x5d5e5e5e 0x5e5f5f5f
		0x5f606060 0x61616161 0x62626263 0x63636364 0x64646465
		0x65656666 0x66666767>;
		phandle = <0x10000326>;
	};
- Move VID (ibm,pstate-vdds, ibm,pstate-vcss) in to the new per-chip
  /ibm,opal/power-mgt/occ node as these properties are unique to
  chip.
- WOF is supported from version 0x02. Till now we have been adding
  max ultra-turbo pstate(ibm,pstate-ultra-turbo), max turbo pstate
  (ibm,pstate-turbo) and max pstate-per-n-core (ibm,pstate-core-max)
  only when WOF is enabled. This patch will add these properties
  even when WOF is disabled. When WOF is disabled max ultra turbo pstate
  equals to max turbo pstate and max pstate-per-n-core array has all
  entries equal to max turbo pstate. So the above three properties
  are added whenever WOF is supported to ease the reporting of these
  data in host.

Signed-off-by: Shilpasri G Bhat <shilpa.bhat at linux.vnet.ibm.com>
---
Changes from V6:
- Remove all the macro definitions for code blocks.
- Retain the links to OCC pstate table header file
- Define OPAL_DYNAMIC_DATA_OFFSET and remove the large padding between
  static and dynamic table to keep the structure definition consistent
  between OCC and OPAL code.
- Use malloc() followed by assert()
- Move pstate-core-max property back to "power-mgt"node
- Add documentation for device-tree properties

 doc/device-tree/ibm,opal/power-mgt.rst     |  20 +-
 doc/device-tree/ibm,opal/power-mgt/occ.rst |  38 ++
 hw/occ.c                                   | 633 ++++++++++++++++++++---------
 3 files changed, 492 insertions(+), 199 deletions(-)
 create mode 100644 doc/device-tree/ibm,opal/power-mgt/occ.rst

diff --git a/doc/device-tree/ibm,opal/power-mgt.rst b/doc/device-tree/ibm,opal/power-mgt.rst
index 275f92b..af99a22 100644
--- a/doc/device-tree/ibm,opal/power-mgt.rst
+++ b/doc/device-tree/ibm,opal/power-mgt.rst
@@ -41,8 +41,6 @@ Example:
                 ibm,cpu-idle-state-flags = <0x11000 0x81003 0x47003>;
                 ibm,cpu-idle-state-names = "nap", "fastsleep_", "winkle";
                 ibm,cpu-idle-state-pmicr = <0x0 0x0 0x20 0x0 0x0 0x0>;
-                ibm,pstate-vdds = <0x5758595a 0x5a5b5c5d 0x5e5e5f60 0x61626263 0x64656566 0x66676768 0x6869696a 0x6a6b6b6c 0x6c6d6d6e 0x6e6f6f70 0x70717272>;
-                ibm,pstate-vcss = <0x4d4e4f4f 0x50505152 0x52535354 0x55555657 0x57585859 0x59595a5a 0x5a5b5b5c 0x5c5c5d5d 0x5d5e5e5f 0x5f5f6060 0x60616162>;
                 ibm,pstate-nominal = <0xffffffef>;
                 ibm,cpu-idle-state-residency-ns = <0x186a0 0x11e1a300 0x3b9aca00>;
                 ibm,cpu-idle-state-pmicr-mask = <0x0 0x0 0x30 0x0 0x0 0x0>;
@@ -104,20 +102,22 @@ ibm,pstate-max ibm,pstate-min ibm,pstate-nominal
 These properties give the maximum, minimum and nominal pstate values, as an id
 specified in the ibm,pstate-ids file.
 
-
-ibm,pstate-vcss ibm,pstate-vdds
--------------------------------
-
-These properties list a voltage-identifier of each of the pstates listed in
-ibm,pstate-ids for the Vcs and Vdd values used for that pstate. Each VID is a
-single byte.
-
 ibm,pstate-ultra-turbo ibm,pstate-turbo
 ---------------------------------------
 
 These properties are added when ultra-turbo(WOF) is enabled. These properties
 give the max turbo and max ultra-turbo pstate.
 
+Example:
+
+.. code-block:: dts
+
+  power-mgt {
+        ibm,pstate-core-max = <0x0 0x0 0x0 0x0 0x0 0x0 0x0>;
+        ibm,pstate-turbo = <0xfffffffb>
+        ibm,pstate-ultra-turbo = <0x0>;
+  };
+
 ibm,pstate-core-max
 -------------------
 
diff --git a/doc/device-tree/ibm,opal/power-mgt/occ.rst b/doc/device-tree/ibm,opal/power-mgt/occ.rst
new file mode 100644
index 0000000..ba04f05
--- /dev/null
+++ b/doc/device-tree/ibm,opal/power-mgt/occ.rst
@@ -0,0 +1,38 @@
+ibm,opal/power-mgt/occ device tree entries
+==========================================
+
+This node exports the per-chip pstate table properties to kernel.
+
+Example:
+
+.. code-block:: dts
+
+occ at 7ffddf8000 {
+        ibm,pstate-vdds = [45 45 46 46 46 47 48 49 4a 4b 4c 4d 4f 50 51 52 53 54 55 57 58 59 5a 5b 5c 5d 5e 5f 5f 60 61 62 63 64 65 65 66 67 68 69 6a 6a 6b 6c 6d 6e 6f 70 70 71];
+        ibm,chip-id = <0x1>;
+        phandle = <0x100003b8>;
+        ibm,pstate-vcss = [3b 3d 3f 41 42 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f 50 50 51 52 53 54 55 56 56 57 57 58 58 59 59 5a 5a 5b 5b 5c 5c 5d 5d 5e 5e 5f 5f 60 60 61 61 62 62];
+        reg = <0x7f 0xfddf8000 0xb98>;
+};
+
+ibm,chip-id
+-----------
+
+This property denotes the ID of chip to which OCC belongs to.
+
+reg
+---
+
+This tuple gives the statring address of the OPAL data in HOMER and
+the size of the OPAL data.
+
+The top-level /ibm,opal/power-mgt contains :
+ #size-cells = <1>
+ #address-cells = <2>
+
+ibm,pstate-vcss ibm,pstate-vdds
+-------------------------------
+
+These properties list a voltage-identifier of each of the pstates listed in
+ibm,pstate-ids for the Vcs and Vdd values used for that pstate in that chip.
+Each VID is a single byte.
diff --git a/hw/occ.c b/hw/occ.c
index 9eb32b4..7b502b4 100644
--- a/hw/occ.c
+++ b/hw/occ.c
@@ -30,44 +30,133 @@
 
 /* OCC Communication Area for PStates */
 
-#define P8_HOMER_SAPPHIRE_DATA_OFFSET	0x1F8000
+#define P8_HOMER_OPAL_DATA_OFFSET	0x1F8000
+#define P9_HOMER_OPAL_DATA_OFFSET	0x0E2000
 
-#define MAX_PSTATES 256
+#define OPAL_DYNAMIC_DATA_OFFSET	0x0B80
+/* relative to HOMER_OPAL_DATA_OFFSET */
 
-#define chip_occ_data(chip) \
-		((struct occ_pstate_table *)(chip->homer_base + \
-				P8_HOMER_SAPPHIRE_DATA_OFFSET))
+#define MAX_PSTATES			256
+#define MAX_P8_CORES			12
+#define MAX_P9_CORES			24
 
-static bool occ_reset;
-static struct lock occ_lock = LOCK_UNLOCKED;
-
-struct occ_pstate_entry {
-	s8 id;
-	u8 flags;
-	u8 vdd;
-	u8 vcs;
-	u32 freq_khz;
-} __packed;
-
-/*
- * OCC-OPAL Shared Memory Region Version 2
- * https://github.com/open-power/occ/blob/master/src/occ/proc/proc_pstate.h
- * Interface defined in 'sapphire_table_t'
+/**
+ * OCC-OPAL Shared Memory Region
+ *
+ * Reference document :
+ * https://github.com/open-power/docs/blob/master/occ/OCC_OpenPwr_FW_Interfaces.pdf
+ *
+ * Supported layout versions:
+ * - 0x01, 0x02 : P8
+ * https://github.com/open-power/occ/blob/master_p8/src/occ/proc/proc_pstate.h
+ *
+ * - 0x90 : P9
+ * https://github.com/open-power/occ/blob/master/src/occ_405/proc/proc_pstate.h
+ *   In 0x90 the data is separated into :-
+ *   -- Static Data (struct occ_pstate_table): Data is written once by OCC
+ *   -- Dynamic Data (struct occ_dynamic_data): Data is updated at runtime
+ *
+ * struct occ_pstate_table -	Pstate table layout
+ * @valid:			Indicates if data is valid
+ * @version:			Layout version
+ * @v2.throttle:		Reason for limiting the max pstate
+ * @v9.occ_role:		OCC role (Master/Slave)
+ * @v#.pstate_min:		Minimum pstate ever allowed
+ * @v#.pstate_nom:		Nominal pstate
+ * @v#.pstate_turbo:		Maximum turbo pstate
+ * @v#.pstate_ultra_turbo:	Maximum ultra turbo pstate and the maximum
+ *				pstate ever allowed
+ * @v#.pstates:			Pstate-id and frequency list from Pmax to Pmin
+ * @v#.pstates.id:		Pstate-id
+ * @v#.pstates.flags:		Pstate-flag(reserved)
+ * @v2.pstates.vdd:		Voltage Identifier
+ * @v2.pstates.vcs:		Voltage Identifier
+ * @v#.pstates.freq_khz:	Frequency in KHz
+ * @v#.core_max[1..N]:		Max pstate with N active cores
+ * @spare/reserved/pad:		Unused data
  */
 struct occ_pstate_table {
 	u8 valid;
 	u8 version;
-	u8 throttle;
-	s8 pstate_min;
-	s8 pstate_nom;
-	s8 pstate_turbo;
-	s8 pstate_ultra_turbo;
-	u8 spare;
+	union __packed {
+		struct __packed { /* Version 0x01 and 0x02 */
+			u8 throttle;
+			s8 pstate_min;
+			s8 pstate_nom;
+			s8 pstate_turbo;
+			s8 pstate_ultra_turbo;
+			u8 spare;
+			u64 reserved;
+			struct __packed {
+				s8 id;
+				u8 flags;
+				u8 vdd;
+				u8 vcs;
+				u32 freq_khz;
+			} pstates[MAX_PSTATES];
+			s8 core_max[MAX_P8_CORES];
+			u8 pad[100];
+		} v2;
+		struct __packed { /* Version 0x90 */
+			u8 occ_role;
+			u8 pstate_min;
+			u8 pstate_nom;
+			u8 pstate_turbo;
+			u8 pstate_ultra_turbo;
+			u8 spare;
+			u64 reserved1;
+			u64 reserved2;
+			struct __packed {
+				u8 id;
+				u8 flags;
+				u16 reserved;
+				u32 freq_khz;
+			} pstates[MAX_PSTATES];
+			u8 core_max[MAX_P9_CORES];
+			u8 pad[56];
+		} v9;
+	};
+} __packed;
+
+/**
+ * OCC-OPAL Shared Memory Interface Dynamic Data Vx90
+ *
+ * struct occ_dynamic_data -	Contains runtime attributes
+ * @occ_state:			Current state of OCC
+ * @cpu_throttle:		Reason for limiting the max pstate
+ * @mem_throttle:		Reason for throttling memory
+ * @quick_pwr_drop:		Indicates if QPD is asserted
+ * @pwr_shifting_ratio:		Indicates the current percentage of power to
+ *				take away from the CPU vs GPU when shifting
+ *				power to maintain a power cap. Value of 100
+ *				means take all power from CPU.
+ * @pwr_cap_type:		Indicates type of power cap in effect
+ * @min_pwr_cap:		Minimum allowed system power cap in Watts
+ * @max_pwr_cap:		Maximum allowed system power cap in Watts
+ * @cur_pwr_cap:		Current system power cap
+ * @spare/reserved:		Unused data
+ */
+struct occ_dynamic_data {
+	u8 occ_state;
+	u8 spare1;
+	u8 spare2;
+	u8 spare3;
+	u8 spare4;
+	u8 cpu_throttle;
+	u8 mem_throttle;
+	u8 quick_pwr_drop;
+	u8 pwr_shifting_ratio;
+	u8 pwr_cap_type;
+	u16 min_pwr_cap;
+	u16 max_pwr_cap;
+	u16 cur_pwr_cap;
 	u64 reserved;
-	struct occ_pstate_entry pstates[MAX_PSTATES];
-	s8 core_max[16];
 } __packed;
 
+static bool occ_reset;
+static struct lock occ_lock = LOCK_UNLOCKED;
+static unsigned long homer_opal_data_offset;
+
 DEFINE_LOG_ENTRY(OPAL_RC_OCC_LOAD, OPAL_PLATFORM_ERR_EVT, OPAL_OCC,
 		OPAL_CEC_HARDWARE, OPAL_PREDICTIVE_ERR_GENERAL,
 		OPAL_NA);
@@ -118,12 +207,26 @@ static int cmp_negative_pstates(int a, int b)
 	return 0;
 }
 
+static inline
+struct occ_pstate_table *get_occ_pstate_table(struct proc_chip *chip)
+{
+	return (struct occ_pstate_table *)
+	       (chip->homer_base + homer_opal_data_offset);
+}
+
+static inline
+struct occ_dynamic_data *get_occ_dynamic_data(struct proc_chip *chip)
+{
+	return (struct occ_dynamic_data *)
+	       (chip->homer_base + homer_opal_data_offset +
+		OPAL_DYNAMIC_DATA_OFFSET);
+}
+
 /* Check each chip's HOMER/Sapphire area for PState valid bit */
 static bool wait_for_all_occ_init(void)
 {
 	struct proc_chip *chip;
 	struct dt_node *xn;
-	uint64_t occ_data_area;
 	struct occ_pstate_table *occ_data;
 	int tries;
 	uint64_t start_time, end_time;
@@ -151,8 +254,7 @@ static bool wait_for_all_occ_init(void)
 		}
 
 		/* Get PState table address */
-		occ_data_area = chip->homer_base + P8_HOMER_SAPPHIRE_DATA_OFFSET;
-		occ_data = (struct occ_pstate_table *)occ_data_area;
+		occ_data = get_occ_pstate_table(chip);
 
 		/*
 		 * Checking for occ_data->valid == 1 is ok because we clear all
@@ -183,8 +285,7 @@ static bool wait_for_all_occ_init(void)
 			chip->occ_functional = true;
 
 		prlog(PR_DEBUG, "OCC: Chip %02x Data (%016llx) = %016llx\n",
-		      chip->id, occ_data_area,
-		      *(uint64_t *)occ_data_area);
+		      chip->id, (uint64_t)occ_data, *(uint64_t *)occ_data);
 	}
 	end_time = mftb();
 	prlog(PR_NOTICE, "OCC: All Chip Rdy after %lu ms\n",
@@ -200,39 +301,117 @@ static bool wait_for_all_occ_init(void)
 	return true;
 }
 
+/*
+ * OCC provides pstate table entries in continuous descending order.
+ * Parse the pstate table to skip pstate_ids that are greater
+ * than Pmax. If a pstate_id is equal to Pmin then add it to
+ * the list and break from the loop as this is the last valid
+ * element in the pstate table.
+ */
+static void parse_pstates_v2(struct occ_pstate_table *data, u32 *dt_id,
+			     u32 *dt_freq, int nr_pstates, int pmax, int pmin)
+{
+	int i, j;
+
+	for (i = 0, j = 0; i < MAX_PSTATES && j < nr_pstates; i++) {
+		if (cmp_pstates(data->v2.pstates[i].id, pmax) > 0)
+			continue;
+
+		dt_id[j] = data->v2.pstates[i].id;
+		dt_freq[j] = data->v2.pstates[i].freq_khz / 1000;
+		j++;
+
+		if (data->v2.pstates[i].id == pmin)
+			break;
+	}
+
+	if (j != nr_pstates)
+		prerror("OCC: Expected pstates(%d) is not equal to parsed pstates(%d)\n",
+			nr_pstates, j);
+}
+
+static void parse_pstates_v9(struct occ_pstate_table *data, u32 *dt_id,
+			     u32 *dt_freq, int nr_pstates, int pmax, int pmin)
+{
+	int i, j;
+
+	for (i = 0, j = 0; i < MAX_PSTATES && j < nr_pstates; i++) {
+		if (cmp_pstates(data->v9.pstates[i].id, pmax) > 0)
+			continue;
+
+		dt_id[j] = data->v9.pstates[i].id;
+		dt_freq[j] = data->v9.pstates[i].freq_khz / 1000;
+		j++;
+
+		if (data->v9.pstates[i].id == pmin)
+			break;
+	}
+
+	if (j != nr_pstates)
+		prerror("OCC: Expected pstates(%d) is not equal to parsed pstates(%d)\n",
+			nr_pstates, j);
+}
+
+static void parse_vid(struct occ_pstate_table *occ_data,
+		      struct dt_node *node, u8 nr_pstates,
+		      int pmax, int pmin)
+{
+	u8 *dt_vdd, *dt_vcs;
+	int i, j;
+
+	dt_vdd = malloc(nr_pstates);
+	assert(dt_vdd);
+	dt_vcs = malloc(nr_pstates);
+	assert(dt_vcs);
+
+	for (i = 0, j = 0; i < MAX_PSTATES && j < nr_pstates; i++) {
+		if (cmp_pstates(occ_data->v2.pstates[i].id, pmax) > 0)
+			continue;
+
+		dt_vdd[j] = occ_data->v2.pstates[i].vdd;
+		dt_vcs[j] = occ_data->v2.pstates[i].vcs;
+		j++;
+
+		if (occ_data->v2.pstates[i].id == pmin)
+			break;
+	}
+
+	dt_add_property(node, "ibm,pstate-vdds", dt_vdd, nr_pstates);
+	dt_add_property(node, "ibm,pstate-vcss", dt_vcs, nr_pstates);
+
+	free(dt_vdd);
+	free(dt_vcs);
+}
+
 /* Add device tree properties to describe pstates states */
-/* Retrun nominal pstate to set in each core */
-static bool add_cpu_pstate_properties(s8 *pstate_nom)
+/* Return nominal pstate to set in each core */
+static bool add_cpu_pstate_properties(int *pstate_nom)
 {
 	struct proc_chip *chip;
 	uint64_t occ_data_area;
 	struct occ_pstate_table *occ_data;
 	struct dt_node *power_mgt;
-	u8 nr_pstates, nr_cores = 0;
-	s8 pmax;
 	/* Arrays for device tree */
 	u32 *dt_id, *dt_freq;
-	u8 *dt_vdd, *dt_vcs;
-	s8 *dt_core_max = NULL;
-	bool rc, ultra_turbo_en;
-	int i, j;
+	int pmax, pmin, pnom;
+	u8 nr_pstates;
+	bool ultra_turbo_supported;
+	int i;
 
 	prlog(PR_DEBUG, "OCC: CPU pstate state device tree init\n");
 
-	/* Find first chip and core */
+	/* Find first chip */
 	chip = next_chip(NULL);
 
 	/* Extract PState information from OCC */
+	occ_data = get_occ_pstate_table(chip);
 
-	/* Dump state table */
-	occ_data_area = chip->homer_base + P8_HOMER_SAPPHIRE_DATA_OFFSET;
-
+	/* Dump first 16 bytes of PState table */
+	occ_data_area = (uint64_t)occ_data;
 	prlog(PR_DEBUG, "OCC: Data (%16llx) = %16llx %16llx\n",
 	      occ_data_area,
 	      *(uint64_t *)occ_data_area,
-	      *(uint64_t *)(occ_data_area+8));
-	
-	occ_data = (struct occ_pstate_table *)occ_data_area;
+	      *(uint64_t *)(occ_data_area + 8));
 
 	if (!occ_data->valid) {
 		/**
@@ -250,21 +429,70 @@ static bool add_cpu_pstate_properties(s8 *pstate_nom)
 
 	/*
 	 * Workload-Optimized-Frequency(WOF) or Ultra-Turbo is supported
-	 * from version 2 onwards. If WOF is disabled then, the max
+	 * from version 0x02 onwards. If WOF is disabled then, the max
 	 * ultra_turbo pstate will be equal to max turbo pstate.
 	 */
-	if (occ_data->version > 1 &&
-	    cmp_pstates(occ_data->pstate_ultra_turbo,
-			occ_data->pstate_turbo) > 0)
-		ultra_turbo_en = true;
-	else
-		ultra_turbo_en = false;
-
-	pmax = ultra_turbo_en ? occ_data->pstate_ultra_turbo :
-				occ_data->pstate_turbo;
+	ultra_turbo_supported = true;
+
+	/* Parse Pmax, Pmin and Pnominal */
+	switch (occ_data->version) {
+	case 0x01:
+		ultra_turbo_supported = false;
+	case 0x02:
+		if (proc_gen == proc_gen_p9) {
+			/**
+			 * @fwts-label OCCInvalidVersion02
+			 * @fwts-advice The PState table layout version is not
+			 * supported in P9. So OPAL will not parse the PState
+			 * table. CPU frequency scaling will not be functional
+			 * as frequency and pstate-ids are not added to DT.
+			 */
+			prerror("OCC: Version %x is not supported in P9\n",
+				occ_data->version);
+			return false;
+		}
+		pmin = occ_data->v2.pstate_min;
+		pnom = occ_data->v2.pstate_nom;
+		if (ultra_turbo_supported)
+			pmax = occ_data->v2.pstate_ultra_turbo;
+		else
+			pmax = occ_data->v2.pstate_turbo;
+		break;
+	case 0x90:
+		if (proc_gen == proc_gen_p8) {
+			/**
+			 * @fwts-label OCCInvalidVersion90
+			 * @fwts-advice The PState table layout version is not
+			 * supported in P8. So OPAL will not parse the PState
+			 * table. CPU frequency scaling will not be functional
+			 * as frequency and pstate-ids are not added to DT.
+			 */
+			prerror("OCC: Version %x is not supported in P8\n",
+				occ_data->version);
+			return false;
+		}
+		pmin = occ_data->v9.pstate_min;
+		pnom = occ_data->v9.pstate_nom;
+		if (ultra_turbo_supported)
+			pmax = occ_data->v9.pstate_ultra_turbo;
+		else
+			pmax = occ_data->v9.pstate_turbo;
+		break;
+	default:
+		/**
+		 * @fwts-label OCCUnsupportedVersion
+		 * @fwts-advice The PState table layout version is not
+		 * supported. So OPAL will not parse the PState table.
+		 * CPU frequency scaling will not be functional as OPAL
+		 * doesn't populate the device tree with pstates.
+		 */
+		prerror("OCC: Unsupported pstate table layout version %d\n",
+			occ_data->version);
+		return false;
+	}
 
 	/* Sanity check for pstate limits */
-	if (cmp_pstates(occ_data->pstate_min, pmax) > 0) {
+	if (cmp_pstates(pmin, pmax) > 0) {
 		/**
 		 * @fwts-label OCCInvalidPStateLimits
 		 * @fwts-advice The min pstate is greater than the
@@ -274,16 +502,27 @@ static bool add_cpu_pstate_properties(s8 *pstate_nom)
 		 * CPU Frequency management will not be functional in
 		 * the host.
 		 */
-		prlog(PR_ERR, "OCC: Invalid Pstate Limits. Pmin(%d) > Pmax (%d)\n",
-		      occ_data->pstate_min, pmax);
+		prerror("OCC: Invalid pstate limits. Pmin(%d) > Pmax (%d)\n",
+			pmin, pmax);
 		return false;
 	}
 
-	nr_pstates = labs(pmax - occ_data->pstate_min) + 1;
-	prlog(PR_DEBUG, "OCC: Min %d Nom %d Max %d Nr States %d\n", 
-	      occ_data->pstate_min, occ_data->pstate_nom,
-	      pmax, nr_pstates);
+	if (cmp_pstates(pnom, pmax) > 0) {
+		/**
+		 * @fwts-label OCCInvalidNominalPState
+		 * @fwts-advice The nominal pstate is greater than the
+		 * max pstate, this could be due to corrupted/invalid
+		 * data in OCC-OPAL shared memory region. So OPAL has
+		 * limited the nominal pstate to max pstate.
+		 */
+		prerror("OCC: Clipping nominal pstate(%d) to Pmax(%d)\n",
+			pnom, pmax);
+		pnom = pmax;
+	}
 
+	nr_pstates = labs(pmax - pmin) + 1;
+	prlog(PR_DEBUG, "OCC: Version %x Min %d Nom %d Max %d Nr States %d\n",
+	      occ_data->version, pmin, pnom, pmax, nr_pstates);
 	if (nr_pstates <= 1 || nr_pstates > 128) {
 		/**
 		 * @fwts-label OCCInvalidPStateRange
@@ -294,7 +533,8 @@ static bool add_cpu_pstate_properties(s8 *pstate_nom)
 		 * that CPU idle states and CPU frequency scaling
 		 * will not be functional.
 		 */
-		prlog(PR_ERR, "OCC: OCC range is not valid\n");
+		prerror("OCC: OCC range is not valid; No of pstates = %d\n",
+			nr_pstates);
 		return false;
 	}
 
@@ -310,141 +550,134 @@ static bool add_cpu_pstate_properties(s8 *pstate_nom)
 		return false;
 	}
 
-	rc = false;
-
-	/* Setup arrays for device-tree */
-	/* Allocate memory */
 	dt_id = malloc(nr_pstates * sizeof(u32));
-	if (!dt_id) {
-		/**
-		 * @fwts-label OCCdt_idENOMEM
-		 * @fwts-advice Out of memory when allocating pstates array.
-		 * No Pstates added to device tree, pstates not functional.
-		 */
-		prlog(PR_ERR, "OCC: dt_id array alloc failure\n");
-		goto out;
-	}
-
+	assert(dt_id);
 	dt_freq = malloc(nr_pstates * sizeof(u32));
-	if (!dt_freq) {
-		/**
-		 * @fwts-label OCCdt_freqENOMEM
-		 * @fwts-advice Out of memory when allocating pstates array.
-		 * No Pstates added to device tree, pstates not functional.
-		 */
-		prlog(PR_ERR, "OCC: dt_freq array alloc failure\n");
-		goto out_free_id;
-	}
+	assert(dt_freq);
 
-	dt_vdd = malloc(nr_pstates * sizeof(u8));
-	if (!dt_vdd) {
-		/**
-		 * @fwts-label OCCdt_vddENOMEM
-		 * @fwts-advice Out of memory when allocating pstates array.
-		 * No Pstates added to device tree, pstates not functional.
-		 */
-		prlog(PR_ERR, "OCC: dt_vdd array alloc failure\n");
-		goto out_free_freq;
-	}
-
-	dt_vcs = malloc(nr_pstates * sizeof(u8));
-	if (!dt_vcs) {
-		/**
-		 * @fwts-label OCCdt_vcsENOMEM
-		 * @fwts-advice Out of memory when allocating pstates array.
-		 * No Pstates added to device tree, pstates not functional.
-		 */
-		prlog(PR_ERR, "OCC: dt_vcs array alloc failure\n");
-		goto out_free_vdd;
+	switch (occ_data->version) {
+	case 0x01:
+	case 0x02:
+		parse_pstates_v2(occ_data, dt_id, dt_freq, nr_pstates,
+				 pmax, pmin);
+		break;
+	case 0x90:
+		parse_pstates_v9(occ_data, dt_id, dt_freq, nr_pstates,
+				 pmax, pmin);
+		break;
+	default:
+		return false;
 	}
 
-	if (ultra_turbo_en) {
-		nr_cores = get_available_nr_cores_in_chip(chip->id);
-		dt_core_max = malloc(nr_cores * sizeof(s8));
-		if (!dt_core_max) {
-			/**
-			 * @fwts-label OCCdt_core_maxENOMEM
-			 * @fwts-advice Out of memory allocating dt_core_max
-			 * array. No PStates in Device Tree: non-functional
-			 * power/frequency management.
-			 */
-			prlog(PR_ERR, "OCC: dt_core_max alloc failure\n");
-			goto out_free_vcs;
-		}
+	/* Add the device-tree entries */
+	dt_add_property(power_mgt, "ibm,pstate-ids", dt_id,
+			nr_pstates * sizeof(u32));
+	dt_add_property(power_mgt, "ibm,pstate-frequencies-mhz", dt_freq,
+			nr_pstates * sizeof(u32));
+	dt_add_property_cells(power_mgt, "ibm,pstate-min", pmin);
+	dt_add_property_cells(power_mgt, "ibm,pstate-nominal", pnom);
+	dt_add_property_cells(power_mgt, "ibm,pstate-max", pmax);
 
-		for (i = 0; i < nr_cores; i++)
-			dt_core_max[i] = occ_data->core_max[i];
-	}
+	free(dt_freq);
+	free(dt_id);
 
 	/*
-	 * OCC provides pstate table entries in continuous descending order.
-	 * Parse the pstate table to skip pstate_ids that are greater
-	 * than Pmax. If a pstate_id is equal to Pmin then add it to
-	 * the list and break from the loop as this is the last valid
-	 * element in the pstate table.
+	 * Parse and add WOF properties: turbo, ultra-turbo and core_max array.
+	 * core_max[1..n] array provides the max sustainable pstate that can be
+	 * achieved with i active cores in the chip.
 	 */
-	for (i = 0, j = 0; i < MAX_PSTATES && j < nr_pstates; i++) {
-		if (cmp_pstates(occ_data->pstates[i].id, pmax) > 0)
-			continue;
+	if (ultra_turbo_supported) {
+		int pturbo, pultra_turbo;
+		u8 nr_cores = get_available_nr_cores_in_chip(chip->id);
+		u32 *dt_cmax;
+
+		dt_cmax = malloc(nr_cores * sizeof(u32));
+		assert(dt_cmax);
+		switch (occ_data->version) {
+		case 0x02:
+			pturbo = occ_data->v2.pstate_turbo;
+			pultra_turbo = occ_data->v2.pstate_ultra_turbo;
+			for (i = 0; i < nr_cores; i++)
+				dt_cmax[i] = occ_data->v2.core_max[i];
+			break;
+		case 0x90:
+			pturbo = occ_data->v9.pstate_turbo;
+			pultra_turbo = occ_data->v9.pstate_ultra_turbo;
+			for (i = 0; i < nr_cores; i++)
+				dt_cmax[i] = occ_data->v9.core_max[i];
+			break;
+		default:
+			return false;
+		}
 
-		dt_id[j] = occ_data->pstates[i].id;
-		dt_freq[j] = occ_data->pstates[i].freq_khz / 1000;
-		dt_vdd[j] = occ_data->pstates[i].vdd;
-		dt_vcs[j] = occ_data->pstates[i].vcs;
-		j++;
+		if (cmp_pstates(pturbo, pmax) > 0) {
+			prerror("OCC: Clipping turbo pstate(%d) to Pmax(%d)\n",
+				pturbo, pmax);
+			dt_add_property_cells(power_mgt, "ibm,pstate-turbo",
+					      pmax);
+		} else {
+			dt_add_property_cells(power_mgt, "ibm,pstate-turbo",
+					      pturbo);
+		}
 
-		if (occ_data->pstates[i].id == occ_data->pstate_min)
-			break;
-	}
+		dt_add_property_cells(power_mgt, "ibm,pstate-ultra-turbo",
+				      pultra_turbo);
+		dt_add_property(power_mgt, "ibm,pstate-core-max", dt_cmax,
+				nr_cores * sizeof(u32));
 
-	if (j != nr_pstates) {
-		prerror("OCC: Expected pstates(%d) is not equal to parsed pstates(%d)\n",
-			nr_pstates, j);
-		goto out_free_vcs;
+		free(dt_cmax);
 	}
 
-	/* Add the device-tree entries */
-	dt_add_property(power_mgt, "ibm,pstate-ids", dt_id,
-			nr_pstates * sizeof(u32));
-	dt_add_property(power_mgt, "ibm,pstate-frequencies-mhz", dt_freq,
-			nr_pstates * sizeof(u32));
-	dt_add_property(power_mgt, "ibm,pstate-vdds", dt_vdd, nr_pstates);
-	dt_add_property(power_mgt, "ibm,pstate-vcss", dt_vcs, nr_pstates);
-	dt_add_property_cells(power_mgt, "ibm,pstate-min", occ_data->pstate_min);
-	dt_add_property_cells(power_mgt, "ibm,pstate-nominal", occ_data->pstate_nom);
-	dt_add_property_cells(power_mgt, "ibm,pstate-max", pmax);
+	if (occ_data->version > 0x02)
+		goto out;
 
-	if (ultra_turbo_en) {
-		dt_add_property_cells(power_mgt, "ibm,pstate-turbo",
-				      occ_data->pstate_turbo);
-		dt_add_property_cells(power_mgt, "ibm,pstate-ultra-turbo",
-				      occ_data->pstate_ultra_turbo);
-		dt_add_property(power_mgt, "ibm,pstate-core-max", dt_core_max,
-				nr_cores);
-		free(dt_core_max);
-	}
+	dt_add_property_cells(power_mgt, "#address-cells", 2);
+	dt_add_property_cells(power_mgt, "#size-cells", 1);
 
-	/* Return pstate to set for each core */
-	*pstate_nom = occ_data->pstate_nom;
-	rc = true;
+	/* Add chip specific pstate properties */
+	for_each_chip(chip) {
+		struct dt_node *occ_node;
 
-out_free_vcs:
-	free(dt_vcs);
-out_free_vdd:
-	free(dt_vdd);
-out_free_id:
-	free(dt_id);
-out_free_freq:
-	free(dt_freq);
+		occ_data = get_occ_pstate_table(chip);
+		occ_node = dt_new_addr(power_mgt, "occ", (uint64_t)occ_data);
+		if (!occ_node) {
+			/**
+			 * @fwts-label OCCDTFailedNodeCreation
+			 * @fwts-advice Failed to create
+			 * /ibm,opal/power-mgt/occ. Per-chip pstate properties
+			 * are not added to Device Tree.
+			 */
+			prerror("OCC: Failed to create /ibm,opal/power-mgt/occ@%llx\n",
+				(uint64_t)occ_data);
+			return false;
+		}
+
+		dt_add_property_cells(occ_node, "reg",
+				      hi32((uint64_t)occ_data),
+				      lo32((uint64_t)occ_data),
+				      OPAL_DYNAMIC_DATA_OFFSET +
+				      sizeof(struct occ_dynamic_data));
+		dt_add_property_cells(occ_node, "ibm,chip-id", chip->id);
+
+		/*
+		 * Parse and add pstate Voltage Identifiers (VID) to DT which
+		 * are provided by OCC in version 0x01 and 0x02
+		 */
+		parse_vid(occ_data, occ_node, nr_pstates, pmax, pmin);
+	}
 out:
-	return rc;
+	/* Return pstate to set for each core */
+	*pstate_nom = pnom;
+	return true;
 }
 
 /*
  * Prepare chip for pstate transitions
  */
 
-static bool cpu_pstates_prepare_core(struct proc_chip *chip, struct cpu_thread *c, s8 pstate_nom)
+static bool cpu_pstates_prepare_core(struct proc_chip *chip,
+				     struct cpu_thread *c,
+				     int pstate_nom)
 {
 	uint32_t core = pir_to_core_id(c->pir);
 	uint64_t tmp, pstate;
@@ -532,6 +765,23 @@ static void occ_msg_consumed(void *data __unused)
 	unlock(&occ_lock);
 }
 
+static inline u8 get_cpu_throttle(struct proc_chip *chip)
+{
+	struct occ_pstate_table *pdata = get_occ_pstate_table(chip);
+	struct occ_dynamic_data *data;
+
+	switch (pdata->version) {
+	case 0x01:
+	case 0x02:
+		return pdata->v2.throttle;
+	case 0x90:
+		data = get_occ_dynamic_data(chip);
+		return data->cpu_throttle;
+	default:
+		return 0;
+	};
+}
+
 static void occ_throttle_poll(void *data __unused)
 {
 	struct proc_chip *chip;
@@ -545,7 +795,7 @@ static void occ_throttle_poll(void *data __unused)
 		int inactive = 0;
 
 		for_each_chip(chip) {
-			occ_data = chip_occ_data(chip);
+			occ_data = get_occ_pstate_table(chip);
 			if (occ_data->valid != 1) {
 				inactive = 1;
 				break;
@@ -568,18 +818,21 @@ static void occ_throttle_poll(void *data __unused)
 		if (occ_opal_msg_outstanding)
 			goto done;
 		for_each_chip(chip) {
-			occ_data = chip_occ_data(chip);
+			u8 throttle;
+
+			occ_data = get_occ_pstate_table(chip);
+			throttle = get_cpu_throttle(chip);
 			if ((occ_data->valid == 1) &&
-			    (chip->throttle != occ_data->throttle) &&
-			    (occ_data->throttle <= OCC_MAX_THROTTLE_STATUS)) {
+			    (chip->throttle != throttle) &&
+			    (throttle <= OCC_MAX_THROTTLE_STATUS)) {
 				occ_msg.type = cpu_to_be64(OCC_THROTTLE);
 				occ_msg.chip = cpu_to_be64(chip->id);
-				occ_msg.throttle_status = cpu_to_be64(occ_data->throttle);
+				occ_msg.throttle_status = cpu_to_be64(throttle);
 				rc = _opal_queue_msg(OPAL_MSG_OCC, NULL,
 						     occ_msg_consumed,
 						     3, (uint64_t *)&occ_msg);
 				if (!rc) {
-					chip->throttle = occ_data->throttle;
+					chip->throttle = throttle;
 					occ_opal_msg_outstanding = true;
 					break;
 				}
@@ -591,16 +844,16 @@ done:
 }
 
 /* CPU-OCC PState init */
-/* Called after OCC init on P8 */
+/* Called after OCC init on P8 and P9 */
 void occ_pstates_init(void)
 {
 	struct proc_chip *chip;
 	struct cpu_thread *c;
-	s8 pstate_nom;
+	int pstate_nom;
 	static bool occ_pstates_initialized;
 
-	/* OCC is P8 only */
-	if (proc_gen != proc_gen_p8)
+	/* OCC is supported in P8 and P9 */
+	if (proc_gen < proc_gen_p8)
 		return;
 	/* Handle fast reboots */
 	if (occ_pstates_initialized)
@@ -609,9 +862,11 @@ void occ_pstates_init(void)
 	switch (proc_gen) {
 	case proc_gen_p8:
 		cmp_pstates = cmp_negative_pstates;
+		homer_opal_data_offset = P8_HOMER_OPAL_DATA_OFFSET;
 		break;
 	case proc_gen_p9:
 		cmp_pstates = cmp_positive_pstates;
+		homer_opal_data_offset = P9_HOMER_OPAL_DATA_OFFSET;
 		break;
 	default:
 		return;
@@ -852,7 +1107,7 @@ int occ_msg_queue_occ_reset(void)
 		goto out;
 	}
 	/*
-	 * Set 'valid' byte of chip_occ_data to 0 since OCC
+	 * Set 'valid' byte of occ_pstate_table to 0 since OCC
 	 * may not clear this byte on a reset.
 	 * OCC will set the 'valid' byte to 1 when it becomes
 	 * active again.
@@ -860,7 +1115,7 @@ int occ_msg_queue_occ_reset(void)
 	for_each_chip(chip) {
 		struct occ_pstate_table *occ_data;
 
-		occ_data = chip_occ_data(chip);
+		occ_data = get_occ_pstate_table(chip);
 		occ_data->valid = 0;
 		chip->throttle = 0;
 	}
-- 
1.8.3.1



More information about the Skiboot mailing list