[Skiboot-stable] [PATCH] occ: Fix false negatives in wait_for_all_occ_init()

Gautham R. Shenoy ego at linux.vnet.ibm.com
Tue May 26 17:21:50 AEST 2020


From: "Gautham R. Shenoy" <ego at linux.vnet.ibm.com>

Currently the wait_for_all_occ_init() function determines that the
OCCs associated with every Chip has been initialized by verifying if
the "Valid" bit in pstate table of that OCC is set.

However, on chips where all the EX units are guarded, the OCC, even
though it is active, does not update the pstate_table. Currently as a
result of this, OPAL concludes that the OCC is not functional and not
only disable Pstate initialization, but incorrectly report that that
OCCs were not initialized, thereby cutting other features such as
sensors.

Fix this by ensuring that

   * We check if there is atleast one active EX unit in the chip
     before checking if the OCC is active.

   * On platforms with OCC-OPAL communication interface version 0x90

        * wait_for_all_occ_init() only checks if the occ_state in the
          OCC dynamic area is set to "Active State".

        * move the "Valid" bit check to add_cpu_pstate_properties(),
          which is where we create the device-tree entries for
          frequency scaling.

Cc: Vaidyanathan Srinivasan <svaidy at linux.vnet.ibm.com>
Tested-by: Pavaman Subramaniyam <pavsubra at in.ibm.com>
Signed-off-by: Gautham R. Shenoy <ego at linux.vnet.ibm.com>
---
 core/cpu.c     |   6 +-
 hw/occ.c       | 177 +++++++++++++++++++++++++++++++++++++++++++++++----------
 include/chip.h |   3 +
 3 files changed, 154 insertions(+), 32 deletions(-)

diff --git a/core/cpu.c b/core/cpu.c
index 37d9f41..1b38124 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -1171,10 +1171,12 @@ void init_all_cpus(void)
 		chip_id = dt_get_chip_id(cpu);
 
 		/* Only use operational CPUs */
-		if (!strcmp(dt_prop_get(cpu, "status"), "okay"))
+		if (!strcmp(dt_prop_get(cpu, "status"), "okay")) {
 			state = cpu_state_present;
-		else
+			get_chip(chip_id)->ex_present = true;
+		} else {
 			state = cpu_state_unavailable;
+		}
 
 		prlog(PR_INFO, "CPU: CPU from DT PIR=0x%04x Server#=0x%x"
 		      " State=%d\n", pir, server_no, state);
diff --git a/hw/occ.c b/hw/occ.c
index 76c7736..a1e4982 100644
--- a/hw/occ.c
+++ b/hw/occ.c
@@ -312,12 +312,17 @@ struct occ_dynamic_data *get_occ_dynamic_data(struct proc_chip *chip)
 		OPAL_DYNAMIC_DATA_OFFSET);
 }
 
-/* Check each chip's HOMER/Sapphire area for PState valid bit */
+/*
+ * On Chips which have at least one active EX unit, check the
+ * HOMER area for pstate-table valid bit on versions 0x1 and 0x2, or
+ * HOMER dynamic area occ_state on version 0x90.
+ */
 static bool wait_for_all_occ_init(void)
 {
 	struct proc_chip *chip;
 	struct dt_node *xn;
 	struct occ_pstate_table *occ_data;
+	struct occ_dynamic_data *occ_dyn_data;
 	int tries;
 	uint64_t start_time, end_time;
 	uint32_t timeout = 0;
@@ -327,6 +332,19 @@ static bool wait_for_all_occ_init(void)
 
 	start_time = mftb();
 	for_each_chip(chip) {
+		u8 version;
+
+		/*
+		 * If the chip doesn't any EX unit present, then OCC
+		 * will not update the pstate-table. So, skip the
+		 * check.
+		 */
+		if (!chip->ex_present) {
+			prlog(PR_DEBUG, "OCC: Chip %02x has no active EX units. Skipping check\n",
+			      chip->id);
+			continue;
+		}
+
 		/* Check for valid homer address */
 		if (!chip->homer_base) {
 			/**
@@ -347,27 +365,108 @@ static bool wait_for_all_occ_init(void)
 		occ_data = get_occ_pstate_table(chip);
 
 		/*
-		 * Checking for occ_data->valid == 1 is ok because we clear all
-		 * homer_base+size before passing memory to host services.
-		 * This ensures occ_data->valid == 0 before OCC load
+		 * Wait for the OCC to set an appropriate version bit.
+		 * The wait is needed since on some platforms (such P8
+		 * Tuletta), OCC is not loaded before OPAL boot. Hence
+		 * initialization can take a while.
+		 *
+		 * Note: Checking for occ_data->version == (0x01/0x02/0x90)
+		 * is ok because we clear all of
+		 * homer_base+size before passing memory to host
+		 * services.  This ensures occ_data->version == 0x0
+		 * before OCC load.
 		 */
 		tries = timeout * 10;
-		while((occ_data->valid != 1) && tries--) {
+		while (tries--) {
+			version = occ_data->version;
+
+			if (version == 0x01 || version == 0x02 ||
+			    version == 0x90)
+				break;
+
 			time_wait_ms(100);
 		}
-		if (occ_data->valid != 1) {
-			/**
-			 * @fwts-label OCCInvalidPStateTable
-			 * @fwts-advice The pstate table for a chip
-			 * was not valid. This means that OCC (On Chip
-			 * Controller) will be non-functional and CPU
-			 * frequency scaling will not be functional. CPU may
-			 * be set to a low, safe frequency. This means
-			 * that CPU idle states and CPU frequency scaling
-			 * may not be functional.
+
+		version = occ_data->version;
+		switch (version) {
+		case 0x1:
+		case 0x2:
+		/*
+		 * OCC-OPAL interface version 0x1 and 0x2 do not have
+		 * the dynamic data.  Hence the the only way to figure out
+		 * if the OCC is up or not is to check the valid-bit
+		 * in the pstate table.
+		 */
+			if (occ_data->valid != 1) {
+				/**
+				 * @fwts-label OCCInvalidPStateTable
+				 * @fwts-advice The pstate table for a chip
+				 * was not valid. This means that OCC (On Chip
+				 * Controller) will be non-functional and CPU
+				 * frequency scaling will not be functional. CPU may
+				 * be set to a low, safe frequency. This means
+				 * that CPU idle states and CPU frequency scaling
+				 * may not be functional.
+				 */
+				prlog(PR_ERR, "OCC: Chip: %x PState table is not valid\n",
+				      chip->id);
+				return false;
+			}
+			break;
+
+		case 0x90:
+			/*
+			 * OCC-OPAL interface version 0x90 has a
+			 * dynamic data section.  This has an
+			 * occ_state field whose values inform about
+			 * the state of the OCC.
+			 *
+			 * 0x00 = OCC not running. No communication
+			 *        allowed.
+			 *
+			 * 0x01 = Standby. No communication allowed.
+			 *
+			 * 0x02 = Observation State. Communication
+			 *        allowed and is command dependent.
+			 *
+			 * 0x03 = Active State. Communication allowed
+			 *        and is command dependent.
+			 *
+			 * 0x04 = Safe State. No communication
+			 *        allowed. Just like CPU throttle
+			 *        status, some failures will not allow
+			 *        for OCC to update state to safe.
+			 *
+			 * 0x05 = Characterization State.
+			 *        Communication allowed and is command
+			 *        dependent.
+			 *
+			 * We will error out if OCC is not in the
+			 * Active State.
+			 *
+			 * XXX : Should we error out only if no
+			 *       communication is allowed with the
+			 *       OCC ?
 			 */
-			prlog(PR_ERR, "OCC: Chip: %x PState table is not valid\n",
-				chip->id);
+			occ_dyn_data = get_occ_dynamic_data(chip);
+			if (occ_dyn_data->occ_state != 0x3) {
+				/**
+				 * @fwts-label OCCInactive
+				 * @fwts-advice The OCC for a chip was not active.
+				 * This means that CPU frequency scaling will
+				 * not be functional. CPU may be set to a low,
+				 * safe frequency. This means that CPU idle
+				 * states and CPU frequency scaling may not be
+				 * functional.
+				 */
+				prlog(PR_ERR, "OCC: Chip: %x: OCC not active\n",
+				      chip->id);
+				return false;
+			}
+			break;
+
+		default:
+			prlog(PR_ERR, "OCC: Unknown OCC-OPAL interface version.\n");
 			return false;
 		}
 
@@ -376,7 +475,15 @@ static bool wait_for_all_occ_init(void)
 
 		prlog(PR_DEBUG, "OCC: Chip %02x Data (%016llx) = %016llx\n",
 		      chip->id, (uint64_t)occ_data, be64_to_cpu(*(__be64 *)occ_data));
+
+		if (version == 0x90) {
+			occ_dyn_data = get_occ_dynamic_data(chip);
+			prlog(PR_DEBUG, "OCC: Chip %02x Dynamic Data (%016llx) = %016llx\n",
+			      chip->id, (uint64_t)occ_dyn_data,
+			      be64_to_cpu(*(__be64 *)occ_dyn_data));
+		}
 	}
+
 	end_time = mftb();
 	prlog(PR_NOTICE, "OCC: All Chip Rdy after %lu ms\n",
 	      tb_to_msecs(end_time - start_time));
@@ -480,7 +587,7 @@ static bool add_cpu_pstate_properties(struct dt_node *power_mgt,
 {
 	struct proc_chip *chip;
 	uint64_t occ_data_area;
-	struct occ_pstate_table *occ_data;
+	struct occ_pstate_table *occ_data = NULL;
 	/* Arrays for device tree */
 	__be32 *dt_id, *dt_freq;
 	int pmax, pmin, pnom;
@@ -490,24 +597,34 @@ static bool add_cpu_pstate_properties(struct dt_node *power_mgt,
 
 	prlog(PR_DEBUG, "OCC: CPU pstate state device tree init\n");
 
-	/* Find first chip */
-	chip = next_chip(NULL);
+	/*
+	 * Find first chip with an OCC which has as a valid
+	 * pstate-table
+	 */
+	for_each_chip(chip) {
+		occ_data = get_occ_pstate_table(chip);
 
-	/* Extract PState information from OCC */
-	occ_data = get_occ_pstate_table(chip);
+		/* Dump first 16 bytes of PState table */
+		occ_data_area = (uint64_t)occ_data;
+		prlog(PR_DEBUG, "OCC: Chip %02d :Data (%16llx) = %16llx %16llx\n",
+			chip->id, occ_data_area,
+			be64_to_cpu(*(__be64 *)occ_data_area),
+			be64_to_cpu(*(__be64 *)(occ_data_area + 8)));
 
-	/* Dump first 16 bytes of PState table */
-	occ_data_area = (uint64_t)occ_data;
-	prlog(PR_DEBUG, "OCC: Data (%16llx) = %16llx %16llx\n",
-	      occ_data_area,
-	      be64_to_cpu(*(__be64 *)occ_data_area),
-	      be64_to_cpu(*(__be64 *)(occ_data_area + 8)));
+		if (occ_data->valid)
+			break;
+		/*
+		 * XXX : Error out if !occ_data->valid but Chip has at
+		 * least one EX Unit?
+		 */
+	}
 
+	assert(occ_data);
 	if (!occ_data->valid) {
 		/**
 		 * @fwts-label OCCInvalidPStateTableDT
-		 * @fwts-advice The pstate table for the first chip
-		 * was not valid. This means that OCC (On Chip
+		 * @fwts-advice The pstate tables for none of the chips
+		 * are valid. This means that OCC (On Chip
 		 * Controller) will be non-functional. This means
 		 * that CPU idle states and CPU frequency scaling
 		 * will not be functional as OPAL doesn't populate
diff --git a/include/chip.h b/include/chip.h
index f76c837..b79b63e 100644
--- a/include/chip.h
+++ b/include/chip.h
@@ -192,6 +192,9 @@ struct proc_chip {
 
 	/* Used by hw/dio-p9.c */
 	struct p9_dio		*dio;
+
+	/* Used during OCC init */
+	bool			ex_present;
 };
 
 extern uint32_t pir_to_chip_id(uint32_t pir);
-- 
1.8.3.1



More information about the Skiboot-stable mailing list