[Skiboot] [PATCH V7 3/3] occ: Add support for Version 0x90 OCC_OPAL shared memory region

Shilpasri G Bhat shilpa.bhat at linux.vnet.ibm.com
Sat Apr 1 19:42:03 AEDT 2017


Hi,

On 02/14/2017 02:01 AM, Shilpasri G Bhat wrote:
> This patch enables pstate table parsing support for P9. It
> introduces below device tree changes.
> - Add a new node per chip in /ibm,opal/power-mgt called occ.
> 	occ at 3ffd9f8000 {
> 		reg = <0x3f 0xfd9f8000>;
> 		ibm,chip-id = <0x0>;
> 		ibm,pstate-vdds = <0x4e4e4f4f 0x4f505152 0x53545556
> 		0x5758595a 0x5b5c5d5e 0x5f606162 0x63646565 0x66676868
> 		0x696a6a6b 0x6c6d6d6e 0x6f6f7071 0x72727374 0x74757677
> 		0x7778797a 0x7a7b7c7c>;
> 		ibm,pstate-vcss = <0x4446484a 0x4c4e4f50 0x50515253
> 		0x54555556 0x5758595a 0x5a5b5c5d 0x5d5e5e5e 0x5e5f5f5f
> 		0x5f606060 0x61616161 0x62626263 0x63636364 0x64646465
> 		0x65656666 0x66666767>;
> 		phandle = <0x10000326>;
> 	};
> - Move VID (ibm,pstate-vdds, ibm,pstate-vcss) in to the new per-chip
>   /ibm,opal/power-mgt/occ node as these properties are unique to
>   chip.
> - WOF is supported from version 0x02. Till now we have been adding
>   max ultra-turbo pstate(ibm,pstate-ultra-turbo), max turbo pstate
>   (ibm,pstate-turbo) and max pstate-per-n-core (ibm,pstate-core-max)
>   only when WOF is enabled. This patch will add these properties
>   even when WOF is disabled. When WOF is disabled max ultra turbo pstate
>   equals to max turbo pstate and max pstate-per-n-core array has all
>   entries equal to max turbo pstate. So the above three properties
>   are added whenever WOF is supported to ease the reporting of these
>   data in host.
> 
> Signed-off-by: Shilpasri G Bhat <shilpa.bhat at linux.vnet.ibm.com>
> ---

The pstate table parsing for the new layout is verified in Witherspoon.

[    9.327912896,7] OCC: Chip 00 Data (00000017fd8e2000) = 019001230b0b0b00
[    9.327916144,5] OCC: All Chip Rdy after 0 ms
[    9.327970720,7] OCC: CPU pstate state device tree init
[    9.327972992,7] OCC: Data (      17fd8e2000) =  19001230b0b0b00                0
[    9.327976576,7] OCC: Version 90 Min 35 Nom 11 Max 11 Nr States 25

Thanks and Regards,
Shilpa
> Changes from V6:
> - Remove all the macro definitions for code blocks.
> - Retain the links to OCC pstate table header file
> - Define OPAL_DYNAMIC_DATA_OFFSET and remove the large padding between
>   static and dynamic table to keep the structure definition consistent
>   between OCC and OPAL code.
> - Use malloc() followed by assert()
> - Move pstate-core-max property back to "power-mgt"node
> - Add documentation for device-tree properties
> 
>  doc/device-tree/ibm,opal/power-mgt.rst     |  20 +-
>  doc/device-tree/ibm,opal/power-mgt/occ.rst |  38 ++
>  hw/occ.c                                   | 633 ++++++++++++++++++++---------
>  3 files changed, 492 insertions(+), 199 deletions(-)
>  create mode 100644 doc/device-tree/ibm,opal/power-mgt/occ.rst
> 
> diff --git a/doc/device-tree/ibm,opal/power-mgt.rst b/doc/device-tree/ibm,opal/power-mgt.rst
> index 275f92b..af99a22 100644
> --- a/doc/device-tree/ibm,opal/power-mgt.rst
> +++ b/doc/device-tree/ibm,opal/power-mgt.rst
> @@ -41,8 +41,6 @@ Example:
>                  ibm,cpu-idle-state-flags = <0x11000 0x81003 0x47003>;
>                  ibm,cpu-idle-state-names = "nap", "fastsleep_", "winkle";
>                  ibm,cpu-idle-state-pmicr = <0x0 0x0 0x20 0x0 0x0 0x0>;
> -                ibm,pstate-vdds = <0x5758595a 0x5a5b5c5d 0x5e5e5f60 0x61626263 0x64656566 0x66676768 0x6869696a 0x6a6b6b6c 0x6c6d6d6e 0x6e6f6f70 0x70717272>;
> -                ibm,pstate-vcss = <0x4d4e4f4f 0x50505152 0x52535354 0x55555657 0x57585859 0x59595a5a 0x5a5b5b5c 0x5c5c5d5d 0x5d5e5e5f 0x5f5f6060 0x60616162>;
>                  ibm,pstate-nominal = <0xffffffef>;
>                  ibm,cpu-idle-state-residency-ns = <0x186a0 0x11e1a300 0x3b9aca00>;
>                  ibm,cpu-idle-state-pmicr-mask = <0x0 0x0 0x30 0x0 0x0 0x0>;
> @@ -104,20 +102,22 @@ ibm,pstate-max ibm,pstate-min ibm,pstate-nominal
>  These properties give the maximum, minimum and nominal pstate values, as an id
>  specified in the ibm,pstate-ids file.
> 
> -
> -ibm,pstate-vcss ibm,pstate-vdds
> --------------------------------
> -
> -These properties list a voltage-identifier of each of the pstates listed in
> -ibm,pstate-ids for the Vcs and Vdd values used for that pstate. Each VID is a
> -single byte.
> -
>  ibm,pstate-ultra-turbo ibm,pstate-turbo
>  ---------------------------------------
> 
>  These properties are added when ultra-turbo(WOF) is enabled. These properties
>  give the max turbo and max ultra-turbo pstate.
> 
> +Example:
> +
> +.. code-block:: dts
> +
> +  power-mgt {
> +        ibm,pstate-core-max = <0x0 0x0 0x0 0x0 0x0 0x0 0x0>;
> +        ibm,pstate-turbo = <0xfffffffb>
> +        ibm,pstate-ultra-turbo = <0x0>;
> +  };
> +
>  ibm,pstate-core-max
>  -------------------
> 
> diff --git a/doc/device-tree/ibm,opal/power-mgt/occ.rst b/doc/device-tree/ibm,opal/power-mgt/occ.rst
> new file mode 100644
> index 0000000..ba04f05
> --- /dev/null
> +++ b/doc/device-tree/ibm,opal/power-mgt/occ.rst
> @@ -0,0 +1,38 @@
> +ibm,opal/power-mgt/occ device tree entries
> +==========================================
> +
> +This node exports the per-chip pstate table properties to kernel.
> +
> +Example:
> +
> +.. code-block:: dts
> +
> +occ at 7ffddf8000 {
> +        ibm,pstate-vdds = [45 45 46 46 46 47 48 49 4a 4b 4c 4d 4f 50 51 52 53 54 55 57 58 59 5a 5b 5c 5d 5e 5f 5f 60 61 62 63 64 65 65 66 67 68 69 6a 6a 6b 6c 6d 6e 6f 70 70 71];
> +        ibm,chip-id = <0x1>;
> +        phandle = <0x100003b8>;
> +        ibm,pstate-vcss = [3b 3d 3f 41 42 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f 50 50 51 52 53 54 55 56 56 57 57 58 58 59 59 5a 5a 5b 5b 5c 5c 5d 5d 5e 5e 5f 5f 60 60 61 61 62 62];
> +        reg = <0x7f 0xfddf8000 0xb98>;
> +};
> +
> +ibm,chip-id
> +-----------
> +
> +This property denotes the ID of chip to which OCC belongs to.
> +
> +reg
> +---
> +
> +This tuple gives the statring address of the OPAL data in HOMER and
> +the size of the OPAL data.
> +
> +The top-level /ibm,opal/power-mgt contains :
> + #size-cells = <1>
> + #address-cells = <2>
> +
> +ibm,pstate-vcss ibm,pstate-vdds
> +-------------------------------
> +
> +These properties list a voltage-identifier of each of the pstates listed in
> +ibm,pstate-ids for the Vcs and Vdd values used for that pstate in that chip.
> +Each VID is a single byte.
> diff --git a/hw/occ.c b/hw/occ.c
> index 9eb32b4..7b502b4 100644
> --- a/hw/occ.c
> +++ b/hw/occ.c
> @@ -30,44 +30,133 @@
> 
>  /* OCC Communication Area for PStates */
> 
> -#define P8_HOMER_SAPPHIRE_DATA_OFFSET	0x1F8000
> +#define P8_HOMER_OPAL_DATA_OFFSET	0x1F8000
> +#define P9_HOMER_OPAL_DATA_OFFSET	0x0E2000
> 
> -#define MAX_PSTATES 256
> +#define OPAL_DYNAMIC_DATA_OFFSET	0x0B80
> +/* relative to HOMER_OPAL_DATA_OFFSET */
> 
> -#define chip_occ_data(chip) \
> -		((struct occ_pstate_table *)(chip->homer_base + \
> -				P8_HOMER_SAPPHIRE_DATA_OFFSET))
> +#define MAX_PSTATES			256
> +#define MAX_P8_CORES			12
> +#define MAX_P9_CORES			24
> 
> -static bool occ_reset;
> -static struct lock occ_lock = LOCK_UNLOCKED;
> -
> -struct occ_pstate_entry {
> -	s8 id;
> -	u8 flags;
> -	u8 vdd;
> -	u8 vcs;
> -	u32 freq_khz;
> -} __packed;
> -
> -/*
> - * OCC-OPAL Shared Memory Region Version 2
> - * https://github.com/open-power/occ/blob/master/src/occ/proc/proc_pstate.h
> - * Interface defined in 'sapphire_table_t'
> +/**
> + * OCC-OPAL Shared Memory Region
> + *
> + * Reference document :
> + * https://github.com/open-power/docs/blob/master/occ/OCC_OpenPwr_FW_Interfaces.pdf
> + *
> + * Supported layout versions:
> + * - 0x01, 0x02 : P8
> + * https://github.com/open-power/occ/blob/master_p8/src/occ/proc/proc_pstate.h
> + *
> + * - 0x90 : P9
> + * https://github.com/open-power/occ/blob/master/src/occ_405/proc/proc_pstate.h
> + *   In 0x90 the data is separated into :-
> + *   -- Static Data (struct occ_pstate_table): Data is written once by OCC
> + *   -- Dynamic Data (struct occ_dynamic_data): Data is updated at runtime
> + *
> + * struct occ_pstate_table -	Pstate table layout
> + * @valid:			Indicates if data is valid
> + * @version:			Layout version
> + * @v2.throttle:		Reason for limiting the max pstate
> + * @v9.occ_role:		OCC role (Master/Slave)
> + * @v#.pstate_min:		Minimum pstate ever allowed
> + * @v#.pstate_nom:		Nominal pstate
> + * @v#.pstate_turbo:		Maximum turbo pstate
> + * @v#.pstate_ultra_turbo:	Maximum ultra turbo pstate and the maximum
> + *				pstate ever allowed
> + * @v#.pstates:			Pstate-id and frequency list from Pmax to Pmin
> + * @v#.pstates.id:		Pstate-id
> + * @v#.pstates.flags:		Pstate-flag(reserved)
> + * @v2.pstates.vdd:		Voltage Identifier
> + * @v2.pstates.vcs:		Voltage Identifier
> + * @v#.pstates.freq_khz:	Frequency in KHz
> + * @v#.core_max[1..N]:		Max pstate with N active cores
> + * @spare/reserved/pad:		Unused data
>   */
>  struct occ_pstate_table {
>  	u8 valid;
>  	u8 version;
> -	u8 throttle;
> -	s8 pstate_min;
> -	s8 pstate_nom;
> -	s8 pstate_turbo;
> -	s8 pstate_ultra_turbo;
> -	u8 spare;
> +	union __packed {
> +		struct __packed { /* Version 0x01 and 0x02 */
> +			u8 throttle;
> +			s8 pstate_min;
> +			s8 pstate_nom;
> +			s8 pstate_turbo;
> +			s8 pstate_ultra_turbo;
> +			u8 spare;
> +			u64 reserved;
> +			struct __packed {
> +				s8 id;
> +				u8 flags;
> +				u8 vdd;
> +				u8 vcs;
> +				u32 freq_khz;
> +			} pstates[MAX_PSTATES];
> +			s8 core_max[MAX_P8_CORES];
> +			u8 pad[100];
> +		} v2;
> +		struct __packed { /* Version 0x90 */
> +			u8 occ_role;
> +			u8 pstate_min;
> +			u8 pstate_nom;
> +			u8 pstate_turbo;
> +			u8 pstate_ultra_turbo;
> +			u8 spare;
> +			u64 reserved1;
> +			u64 reserved2;
> +			struct __packed {
> +				u8 id;
> +				u8 flags;
> +				u16 reserved;
> +				u32 freq_khz;
> +			} pstates[MAX_PSTATES];
> +			u8 core_max[MAX_P9_CORES];
> +			u8 pad[56];
> +		} v9;
> +	};
> +} __packed;
> +
> +/**
> + * OCC-OPAL Shared Memory Interface Dynamic Data Vx90
> + *
> + * struct occ_dynamic_data -	Contains runtime attributes
> + * @occ_state:			Current state of OCC
> + * @cpu_throttle:		Reason for limiting the max pstate
> + * @mem_throttle:		Reason for throttling memory
> + * @quick_pwr_drop:		Indicates if QPD is asserted
> + * @pwr_shifting_ratio:		Indicates the current percentage of power to
> + *				take away from the CPU vs GPU when shifting
> + *				power to maintain a power cap. Value of 100
> + *				means take all power from CPU.
> + * @pwr_cap_type:		Indicates type of power cap in effect
> + * @min_pwr_cap:		Minimum allowed system power cap in Watts
> + * @max_pwr_cap:		Maximum allowed system power cap in Watts
> + * @cur_pwr_cap:		Current system power cap
> + * @spare/reserved:		Unused data
> + */
> +struct occ_dynamic_data {
> +	u8 occ_state;
> +	u8 spare1;
> +	u8 spare2;
> +	u8 spare3;
> +	u8 spare4;
> +	u8 cpu_throttle;
> +	u8 mem_throttle;
> +	u8 quick_pwr_drop;
> +	u8 pwr_shifting_ratio;
> +	u8 pwr_cap_type;
> +	u16 min_pwr_cap;
> +	u16 max_pwr_cap;
> +	u16 cur_pwr_cap;
>  	u64 reserved;
> -	struct occ_pstate_entry pstates[MAX_PSTATES];
> -	s8 core_max[16];
>  } __packed;
> 
> +static bool occ_reset;
> +static struct lock occ_lock = LOCK_UNLOCKED;
> +static unsigned long homer_opal_data_offset;
> +
>  DEFINE_LOG_ENTRY(OPAL_RC_OCC_LOAD, OPAL_PLATFORM_ERR_EVT, OPAL_OCC,
>  		OPAL_CEC_HARDWARE, OPAL_PREDICTIVE_ERR_GENERAL,
>  		OPAL_NA);
> @@ -118,12 +207,26 @@ static int cmp_negative_pstates(int a, int b)
>  	return 0;
>  }
> 
> +static inline
> +struct occ_pstate_table *get_occ_pstate_table(struct proc_chip *chip)
> +{
> +	return (struct occ_pstate_table *)
> +	       (chip->homer_base + homer_opal_data_offset);
> +}
> +
> +static inline
> +struct occ_dynamic_data *get_occ_dynamic_data(struct proc_chip *chip)
> +{
> +	return (struct occ_dynamic_data *)
> +	       (chip->homer_base + homer_opal_data_offset +
> +		OPAL_DYNAMIC_DATA_OFFSET);
> +}
> +
>  /* Check each chip's HOMER/Sapphire area for PState valid bit */
>  static bool wait_for_all_occ_init(void)
>  {
>  	struct proc_chip *chip;
>  	struct dt_node *xn;
> -	uint64_t occ_data_area;
>  	struct occ_pstate_table *occ_data;
>  	int tries;
>  	uint64_t start_time, end_time;
> @@ -151,8 +254,7 @@ static bool wait_for_all_occ_init(void)
>  		}
> 
>  		/* Get PState table address */
> -		occ_data_area = chip->homer_base + P8_HOMER_SAPPHIRE_DATA_OFFSET;
> -		occ_data = (struct occ_pstate_table *)occ_data_area;
> +		occ_data = get_occ_pstate_table(chip);
> 
>  		/*
>  		 * Checking for occ_data->valid == 1 is ok because we clear all
> @@ -183,8 +285,7 @@ static bool wait_for_all_occ_init(void)
>  			chip->occ_functional = true;
> 
>  		prlog(PR_DEBUG, "OCC: Chip %02x Data (%016llx) = %016llx\n",
> -		      chip->id, occ_data_area,
> -		      *(uint64_t *)occ_data_area);
> +		      chip->id, (uint64_t)occ_data, *(uint64_t *)occ_data);
>  	}
>  	end_time = mftb();
>  	prlog(PR_NOTICE, "OCC: All Chip Rdy after %lu ms\n",
> @@ -200,39 +301,117 @@ static bool wait_for_all_occ_init(void)
>  	return true;
>  }
> 
> +/*
> + * OCC provides pstate table entries in continuous descending order.
> + * Parse the pstate table to skip pstate_ids that are greater
> + * than Pmax. If a pstate_id is equal to Pmin then add it to
> + * the list and break from the loop as this is the last valid
> + * element in the pstate table.
> + */
> +static void parse_pstates_v2(struct occ_pstate_table *data, u32 *dt_id,
> +			     u32 *dt_freq, int nr_pstates, int pmax, int pmin)
> +{
> +	int i, j;
> +
> +	for (i = 0, j = 0; i < MAX_PSTATES && j < nr_pstates; i++) {
> +		if (cmp_pstates(data->v2.pstates[i].id, pmax) > 0)
> +			continue;
> +
> +		dt_id[j] = data->v2.pstates[i].id;
> +		dt_freq[j] = data->v2.pstates[i].freq_khz / 1000;
> +		j++;
> +
> +		if (data->v2.pstates[i].id == pmin)
> +			break;
> +	}
> +
> +	if (j != nr_pstates)
> +		prerror("OCC: Expected pstates(%d) is not equal to parsed pstates(%d)\n",
> +			nr_pstates, j);
> +}
> +
> +static void parse_pstates_v9(struct occ_pstate_table *data, u32 *dt_id,
> +			     u32 *dt_freq, int nr_pstates, int pmax, int pmin)
> +{
> +	int i, j;
> +
> +	for (i = 0, j = 0; i < MAX_PSTATES && j < nr_pstates; i++) {
> +		if (cmp_pstates(data->v9.pstates[i].id, pmax) > 0)
> +			continue;
> +
> +		dt_id[j] = data->v9.pstates[i].id;
> +		dt_freq[j] = data->v9.pstates[i].freq_khz / 1000;
> +		j++;
> +
> +		if (data->v9.pstates[i].id == pmin)
> +			break;
> +	}
> +
> +	if (j != nr_pstates)
> +		prerror("OCC: Expected pstates(%d) is not equal to parsed pstates(%d)\n",
> +			nr_pstates, j);
> +}
> +
> +static void parse_vid(struct occ_pstate_table *occ_data,
> +		      struct dt_node *node, u8 nr_pstates,
> +		      int pmax, int pmin)
> +{
> +	u8 *dt_vdd, *dt_vcs;
> +	int i, j;
> +
> +	dt_vdd = malloc(nr_pstates);
> +	assert(dt_vdd);
> +	dt_vcs = malloc(nr_pstates);
> +	assert(dt_vcs);
> +
> +	for (i = 0, j = 0; i < MAX_PSTATES && j < nr_pstates; i++) {
> +		if (cmp_pstates(occ_data->v2.pstates[i].id, pmax) > 0)
> +			continue;
> +
> +		dt_vdd[j] = occ_data->v2.pstates[i].vdd;
> +		dt_vcs[j] = occ_data->v2.pstates[i].vcs;
> +		j++;
> +
> +		if (occ_data->v2.pstates[i].id == pmin)
> +			break;
> +	}
> +
> +	dt_add_property(node, "ibm,pstate-vdds", dt_vdd, nr_pstates);
> +	dt_add_property(node, "ibm,pstate-vcss", dt_vcs, nr_pstates);
> +
> +	free(dt_vdd);
> +	free(dt_vcs);
> +}
> +
>  /* Add device tree properties to describe pstates states */
> -/* Retrun nominal pstate to set in each core */
> -static bool add_cpu_pstate_properties(s8 *pstate_nom)
> +/* Return nominal pstate to set in each core */
> +static bool add_cpu_pstate_properties(int *pstate_nom)
>  {
>  	struct proc_chip *chip;
>  	uint64_t occ_data_area;
>  	struct occ_pstate_table *occ_data;
>  	struct dt_node *power_mgt;
> -	u8 nr_pstates, nr_cores = 0;
> -	s8 pmax;
>  	/* Arrays for device tree */
>  	u32 *dt_id, *dt_freq;
> -	u8 *dt_vdd, *dt_vcs;
> -	s8 *dt_core_max = NULL;
> -	bool rc, ultra_turbo_en;
> -	int i, j;
> +	int pmax, pmin, pnom;
> +	u8 nr_pstates;
> +	bool ultra_turbo_supported;
> +	int i;
> 
>  	prlog(PR_DEBUG, "OCC: CPU pstate state device tree init\n");
> 
> -	/* Find first chip and core */
> +	/* Find first chip */
>  	chip = next_chip(NULL);
> 
>  	/* Extract PState information from OCC */
> +	occ_data = get_occ_pstate_table(chip);
> 
> -	/* Dump state table */
> -	occ_data_area = chip->homer_base + P8_HOMER_SAPPHIRE_DATA_OFFSET;
> -
> +	/* Dump first 16 bytes of PState table */
> +	occ_data_area = (uint64_t)occ_data;
>  	prlog(PR_DEBUG, "OCC: Data (%16llx) = %16llx %16llx\n",
>  	      occ_data_area,
>  	      *(uint64_t *)occ_data_area,
> -	      *(uint64_t *)(occ_data_area+8));
> -	
> -	occ_data = (struct occ_pstate_table *)occ_data_area;
> +	      *(uint64_t *)(occ_data_area + 8));
> 
>  	if (!occ_data->valid) {
>  		/**
> @@ -250,21 +429,70 @@ static bool add_cpu_pstate_properties(s8 *pstate_nom)
> 
>  	/*
>  	 * Workload-Optimized-Frequency(WOF) or Ultra-Turbo is supported
> -	 * from version 2 onwards. If WOF is disabled then, the max
> +	 * from version 0x02 onwards. If WOF is disabled then, the max
>  	 * ultra_turbo pstate will be equal to max turbo pstate.
>  	 */
> -	if (occ_data->version > 1 &&
> -	    cmp_pstates(occ_data->pstate_ultra_turbo,
> -			occ_data->pstate_turbo) > 0)
> -		ultra_turbo_en = true;
> -	else
> -		ultra_turbo_en = false;
> -
> -	pmax = ultra_turbo_en ? occ_data->pstate_ultra_turbo :
> -				occ_data->pstate_turbo;
> +	ultra_turbo_supported = true;
> +
> +	/* Parse Pmax, Pmin and Pnominal */
> +	switch (occ_data->version) {
> +	case 0x01:
> +		ultra_turbo_supported = false;
> +	case 0x02:
> +		if (proc_gen == proc_gen_p9) {
> +			/**
> +			 * @fwts-label OCCInvalidVersion02
> +			 * @fwts-advice The PState table layout version is not
> +			 * supported in P9. So OPAL will not parse the PState
> +			 * table. CPU frequency scaling will not be functional
> +			 * as frequency and pstate-ids are not added to DT.
> +			 */
> +			prerror("OCC: Version %x is not supported in P9\n",
> +				occ_data->version);
> +			return false;
> +		}
> +		pmin = occ_data->v2.pstate_min;
> +		pnom = occ_data->v2.pstate_nom;
> +		if (ultra_turbo_supported)
> +			pmax = occ_data->v2.pstate_ultra_turbo;
> +		else
> +			pmax = occ_data->v2.pstate_turbo;
> +		break;
> +	case 0x90:
> +		if (proc_gen == proc_gen_p8) {
> +			/**
> +			 * @fwts-label OCCInvalidVersion90
> +			 * @fwts-advice The PState table layout version is not
> +			 * supported in P8. So OPAL will not parse the PState
> +			 * table. CPU frequency scaling will not be functional
> +			 * as frequency and pstate-ids are not added to DT.
> +			 */
> +			prerror("OCC: Version %x is not supported in P8\n",
> +				occ_data->version);
> +			return false;
> +		}
> +		pmin = occ_data->v9.pstate_min;
> +		pnom = occ_data->v9.pstate_nom;
> +		if (ultra_turbo_supported)
> +			pmax = occ_data->v9.pstate_ultra_turbo;
> +		else
> +			pmax = occ_data->v9.pstate_turbo;
> +		break;
> +	default:
> +		/**
> +		 * @fwts-label OCCUnsupportedVersion
> +		 * @fwts-advice The PState table layout version is not
> +		 * supported. So OPAL will not parse the PState table.
> +		 * CPU frequency scaling will not be functional as OPAL
> +		 * doesn't populate the device tree with pstates.
> +		 */
> +		prerror("OCC: Unsupported pstate table layout version %d\n",
> +			occ_data->version);
> +		return false;
> +	}
> 
>  	/* Sanity check for pstate limits */
> -	if (cmp_pstates(occ_data->pstate_min, pmax) > 0) {
> +	if (cmp_pstates(pmin, pmax) > 0) {
>  		/**
>  		 * @fwts-label OCCInvalidPStateLimits
>  		 * @fwts-advice The min pstate is greater than the
> @@ -274,16 +502,27 @@ static bool add_cpu_pstate_properties(s8 *pstate_nom)
>  		 * CPU Frequency management will not be functional in
>  		 * the host.
>  		 */
> -		prlog(PR_ERR, "OCC: Invalid Pstate Limits. Pmin(%d) > Pmax (%d)\n",
> -		      occ_data->pstate_min, pmax);
> +		prerror("OCC: Invalid pstate limits. Pmin(%d) > Pmax (%d)\n",
> +			pmin, pmax);
>  		return false;
>  	}
> 
> -	nr_pstates = labs(pmax - occ_data->pstate_min) + 1;
> -	prlog(PR_DEBUG, "OCC: Min %d Nom %d Max %d Nr States %d\n", 
> -	      occ_data->pstate_min, occ_data->pstate_nom,
> -	      pmax, nr_pstates);
> +	if (cmp_pstates(pnom, pmax) > 0) {
> +		/**
> +		 * @fwts-label OCCInvalidNominalPState
> +		 * @fwts-advice The nominal pstate is greater than the
> +		 * max pstate, this could be due to corrupted/invalid
> +		 * data in OCC-OPAL shared memory region. So OPAL has
> +		 * limited the nominal pstate to max pstate.
> +		 */
> +		prerror("OCC: Clipping nominal pstate(%d) to Pmax(%d)\n",
> +			pnom, pmax);
> +		pnom = pmax;
> +	}
> 
> +	nr_pstates = labs(pmax - pmin) + 1;
> +	prlog(PR_DEBUG, "OCC: Version %x Min %d Nom %d Max %d Nr States %d\n",
> +	      occ_data->version, pmin, pnom, pmax, nr_pstates);
>  	if (nr_pstates <= 1 || nr_pstates > 128) {
>  		/**
>  		 * @fwts-label OCCInvalidPStateRange
> @@ -294,7 +533,8 @@ static bool add_cpu_pstate_properties(s8 *pstate_nom)
>  		 * that CPU idle states and CPU frequency scaling
>  		 * will not be functional.
>  		 */
> -		prlog(PR_ERR, "OCC: OCC range is not valid\n");
> +		prerror("OCC: OCC range is not valid; No of pstates = %d\n",
> +			nr_pstates);
>  		return false;
>  	}
> 
> @@ -310,141 +550,134 @@ static bool add_cpu_pstate_properties(s8 *pstate_nom)
>  		return false;
>  	}
> 
> -	rc = false;
> -
> -	/* Setup arrays for device-tree */
> -	/* Allocate memory */
>  	dt_id = malloc(nr_pstates * sizeof(u32));
> -	if (!dt_id) {
> -		/**
> -		 * @fwts-label OCCdt_idENOMEM
> -		 * @fwts-advice Out of memory when allocating pstates array.
> -		 * No Pstates added to device tree, pstates not functional.
> -		 */
> -		prlog(PR_ERR, "OCC: dt_id array alloc failure\n");
> -		goto out;
> -	}
> -
> +	assert(dt_id);
>  	dt_freq = malloc(nr_pstates * sizeof(u32));
> -	if (!dt_freq) {
> -		/**
> -		 * @fwts-label OCCdt_freqENOMEM
> -		 * @fwts-advice Out of memory when allocating pstates array.
> -		 * No Pstates added to device tree, pstates not functional.
> -		 */
> -		prlog(PR_ERR, "OCC: dt_freq array alloc failure\n");
> -		goto out_free_id;
> -	}
> +	assert(dt_freq);
> 
> -	dt_vdd = malloc(nr_pstates * sizeof(u8));
> -	if (!dt_vdd) {
> -		/**
> -		 * @fwts-label OCCdt_vddENOMEM
> -		 * @fwts-advice Out of memory when allocating pstates array.
> -		 * No Pstates added to device tree, pstates not functional.
> -		 */
> -		prlog(PR_ERR, "OCC: dt_vdd array alloc failure\n");
> -		goto out_free_freq;
> -	}
> -
> -	dt_vcs = malloc(nr_pstates * sizeof(u8));
> -	if (!dt_vcs) {
> -		/**
> -		 * @fwts-label OCCdt_vcsENOMEM
> -		 * @fwts-advice Out of memory when allocating pstates array.
> -		 * No Pstates added to device tree, pstates not functional.
> -		 */
> -		prlog(PR_ERR, "OCC: dt_vcs array alloc failure\n");
> -		goto out_free_vdd;
> +	switch (occ_data->version) {
> +	case 0x01:
> +	case 0x02:
> +		parse_pstates_v2(occ_data, dt_id, dt_freq, nr_pstates,
> +				 pmax, pmin);
> +		break;
> +	case 0x90:
> +		parse_pstates_v9(occ_data, dt_id, dt_freq, nr_pstates,
> +				 pmax, pmin);
> +		break;
> +	default:
> +		return false;
>  	}
> 
> -	if (ultra_turbo_en) {
> -		nr_cores = get_available_nr_cores_in_chip(chip->id);
> -		dt_core_max = malloc(nr_cores * sizeof(s8));
> -		if (!dt_core_max) {
> -			/**
> -			 * @fwts-label OCCdt_core_maxENOMEM
> -			 * @fwts-advice Out of memory allocating dt_core_max
> -			 * array. No PStates in Device Tree: non-functional
> -			 * power/frequency management.
> -			 */
> -			prlog(PR_ERR, "OCC: dt_core_max alloc failure\n");
> -			goto out_free_vcs;
> -		}
> +	/* Add the device-tree entries */
> +	dt_add_property(power_mgt, "ibm,pstate-ids", dt_id,
> +			nr_pstates * sizeof(u32));
> +	dt_add_property(power_mgt, "ibm,pstate-frequencies-mhz", dt_freq,
> +			nr_pstates * sizeof(u32));
> +	dt_add_property_cells(power_mgt, "ibm,pstate-min", pmin);
> +	dt_add_property_cells(power_mgt, "ibm,pstate-nominal", pnom);
> +	dt_add_property_cells(power_mgt, "ibm,pstate-max", pmax);
> 
> -		for (i = 0; i < nr_cores; i++)
> -			dt_core_max[i] = occ_data->core_max[i];
> -	}
> +	free(dt_freq);
> +	free(dt_id);
> 
>  	/*
> -	 * OCC provides pstate table entries in continuous descending order.
> -	 * Parse the pstate table to skip pstate_ids that are greater
> -	 * than Pmax. If a pstate_id is equal to Pmin then add it to
> -	 * the list and break from the loop as this is the last valid
> -	 * element in the pstate table.
> +	 * Parse and add WOF properties: turbo, ultra-turbo and core_max array.
> +	 * core_max[1..n] array provides the max sustainable pstate that can be
> +	 * achieved with i active cores in the chip.
>  	 */
> -	for (i = 0, j = 0; i < MAX_PSTATES && j < nr_pstates; i++) {
> -		if (cmp_pstates(occ_data->pstates[i].id, pmax) > 0)
> -			continue;
> +	if (ultra_turbo_supported) {
> +		int pturbo, pultra_turbo;
> +		u8 nr_cores = get_available_nr_cores_in_chip(chip->id);
> +		u32 *dt_cmax;
> +
> +		dt_cmax = malloc(nr_cores * sizeof(u32));
> +		assert(dt_cmax);
> +		switch (occ_data->version) {
> +		case 0x02:
> +			pturbo = occ_data->v2.pstate_turbo;
> +			pultra_turbo = occ_data->v2.pstate_ultra_turbo;
> +			for (i = 0; i < nr_cores; i++)
> +				dt_cmax[i] = occ_data->v2.core_max[i];
> +			break;
> +		case 0x90:
> +			pturbo = occ_data->v9.pstate_turbo;
> +			pultra_turbo = occ_data->v9.pstate_ultra_turbo;
> +			for (i = 0; i < nr_cores; i++)
> +				dt_cmax[i] = occ_data->v9.core_max[i];
> +			break;
> +		default:
> +			return false;
> +		}
> 
> -		dt_id[j] = occ_data->pstates[i].id;
> -		dt_freq[j] = occ_data->pstates[i].freq_khz / 1000;
> -		dt_vdd[j] = occ_data->pstates[i].vdd;
> -		dt_vcs[j] = occ_data->pstates[i].vcs;
> -		j++;
> +		if (cmp_pstates(pturbo, pmax) > 0) {
> +			prerror("OCC: Clipping turbo pstate(%d) to Pmax(%d)\n",
> +				pturbo, pmax);
> +			dt_add_property_cells(power_mgt, "ibm,pstate-turbo",
> +					      pmax);
> +		} else {
> +			dt_add_property_cells(power_mgt, "ibm,pstate-turbo",
> +					      pturbo);
> +		}
> 
> -		if (occ_data->pstates[i].id == occ_data->pstate_min)
> -			break;
> -	}
> +		dt_add_property_cells(power_mgt, "ibm,pstate-ultra-turbo",
> +				      pultra_turbo);
> +		dt_add_property(power_mgt, "ibm,pstate-core-max", dt_cmax,
> +				nr_cores * sizeof(u32));
> 
> -	if (j != nr_pstates) {
> -		prerror("OCC: Expected pstates(%d) is not equal to parsed pstates(%d)\n",
> -			nr_pstates, j);
> -		goto out_free_vcs;
> +		free(dt_cmax);
>  	}
> 
> -	/* Add the device-tree entries */
> -	dt_add_property(power_mgt, "ibm,pstate-ids", dt_id,
> -			nr_pstates * sizeof(u32));
> -	dt_add_property(power_mgt, "ibm,pstate-frequencies-mhz", dt_freq,
> -			nr_pstates * sizeof(u32));
> -	dt_add_property(power_mgt, "ibm,pstate-vdds", dt_vdd, nr_pstates);
> -	dt_add_property(power_mgt, "ibm,pstate-vcss", dt_vcs, nr_pstates);
> -	dt_add_property_cells(power_mgt, "ibm,pstate-min", occ_data->pstate_min);
> -	dt_add_property_cells(power_mgt, "ibm,pstate-nominal", occ_data->pstate_nom);
> -	dt_add_property_cells(power_mgt, "ibm,pstate-max", pmax);
> +	if (occ_data->version > 0x02)
> +		goto out;
> 
> -	if (ultra_turbo_en) {
> -		dt_add_property_cells(power_mgt, "ibm,pstate-turbo",
> -				      occ_data->pstate_turbo);
> -		dt_add_property_cells(power_mgt, "ibm,pstate-ultra-turbo",
> -				      occ_data->pstate_ultra_turbo);
> -		dt_add_property(power_mgt, "ibm,pstate-core-max", dt_core_max,
> -				nr_cores);
> -		free(dt_core_max);
> -	}
> +	dt_add_property_cells(power_mgt, "#address-cells", 2);
> +	dt_add_property_cells(power_mgt, "#size-cells", 1);
> 
> -	/* Return pstate to set for each core */
> -	*pstate_nom = occ_data->pstate_nom;
> -	rc = true;
> +	/* Add chip specific pstate properties */
> +	for_each_chip(chip) {
> +		struct dt_node *occ_node;
> 
> -out_free_vcs:
> -	free(dt_vcs);
> -out_free_vdd:
> -	free(dt_vdd);
> -out_free_id:
> -	free(dt_id);
> -out_free_freq:
> -	free(dt_freq);
> +		occ_data = get_occ_pstate_table(chip);
> +		occ_node = dt_new_addr(power_mgt, "occ", (uint64_t)occ_data);
> +		if (!occ_node) {
> +			/**
> +			 * @fwts-label OCCDTFailedNodeCreation
> +			 * @fwts-advice Failed to create
> +			 * /ibm,opal/power-mgt/occ. Per-chip pstate properties
> +			 * are not added to Device Tree.
> +			 */
> +			prerror("OCC: Failed to create /ibm,opal/power-mgt/occ@%llx\n",
> +				(uint64_t)occ_data);
> +			return false;
> +		}
> +
> +		dt_add_property_cells(occ_node, "reg",
> +				      hi32((uint64_t)occ_data),
> +				      lo32((uint64_t)occ_data),
> +				      OPAL_DYNAMIC_DATA_OFFSET +
> +				      sizeof(struct occ_dynamic_data));
> +		dt_add_property_cells(occ_node, "ibm,chip-id", chip->id);
> +
> +		/*
> +		 * Parse and add pstate Voltage Identifiers (VID) to DT which
> +		 * are provided by OCC in version 0x01 and 0x02
> +		 */
> +		parse_vid(occ_data, occ_node, nr_pstates, pmax, pmin);
> +	}
>  out:
> -	return rc;
> +	/* Return pstate to set for each core */
> +	*pstate_nom = pnom;
> +	return true;
>  }
> 
>  /*
>   * Prepare chip for pstate transitions
>   */
> 
> -static bool cpu_pstates_prepare_core(struct proc_chip *chip, struct cpu_thread *c, s8 pstate_nom)
> +static bool cpu_pstates_prepare_core(struct proc_chip *chip,
> +				     struct cpu_thread *c,
> +				     int pstate_nom)
>  {
>  	uint32_t core = pir_to_core_id(c->pir);
>  	uint64_t tmp, pstate;
> @@ -532,6 +765,23 @@ static void occ_msg_consumed(void *data __unused)
>  	unlock(&occ_lock);
>  }
> 
> +static inline u8 get_cpu_throttle(struct proc_chip *chip)
> +{
> +	struct occ_pstate_table *pdata = get_occ_pstate_table(chip);
> +	struct occ_dynamic_data *data;
> +
> +	switch (pdata->version) {
> +	case 0x01:
> +	case 0x02:
> +		return pdata->v2.throttle;
> +	case 0x90:
> +		data = get_occ_dynamic_data(chip);
> +		return data->cpu_throttle;
> +	default:
> +		return 0;
> +	};
> +}
> +
>  static void occ_throttle_poll(void *data __unused)
>  {
>  	struct proc_chip *chip;
> @@ -545,7 +795,7 @@ static void occ_throttle_poll(void *data __unused)
>  		int inactive = 0;
> 
>  		for_each_chip(chip) {
> -			occ_data = chip_occ_data(chip);
> +			occ_data = get_occ_pstate_table(chip);
>  			if (occ_data->valid != 1) {
>  				inactive = 1;
>  				break;
> @@ -568,18 +818,21 @@ static void occ_throttle_poll(void *data __unused)
>  		if (occ_opal_msg_outstanding)
>  			goto done;
>  		for_each_chip(chip) {
> -			occ_data = chip_occ_data(chip);
> +			u8 throttle;
> +
> +			occ_data = get_occ_pstate_table(chip);
> +			throttle = get_cpu_throttle(chip);
>  			if ((occ_data->valid == 1) &&
> -			    (chip->throttle != occ_data->throttle) &&
> -			    (occ_data->throttle <= OCC_MAX_THROTTLE_STATUS)) {
> +			    (chip->throttle != throttle) &&
> +			    (throttle <= OCC_MAX_THROTTLE_STATUS)) {
>  				occ_msg.type = cpu_to_be64(OCC_THROTTLE);
>  				occ_msg.chip = cpu_to_be64(chip->id);
> -				occ_msg.throttle_status = cpu_to_be64(occ_data->throttle);
> +				occ_msg.throttle_status = cpu_to_be64(throttle);
>  				rc = _opal_queue_msg(OPAL_MSG_OCC, NULL,
>  						     occ_msg_consumed,
>  						     3, (uint64_t *)&occ_msg);
>  				if (!rc) {
> -					chip->throttle = occ_data->throttle;
> +					chip->throttle = throttle;
>  					occ_opal_msg_outstanding = true;
>  					break;
>  				}
> @@ -591,16 +844,16 @@ done:
>  }
> 
>  /* CPU-OCC PState init */
> -/* Called after OCC init on P8 */
> +/* Called after OCC init on P8 and P9 */
>  void occ_pstates_init(void)
>  {
>  	struct proc_chip *chip;
>  	struct cpu_thread *c;
> -	s8 pstate_nom;
> +	int pstate_nom;
>  	static bool occ_pstates_initialized;
> 
> -	/* OCC is P8 only */
> -	if (proc_gen != proc_gen_p8)
> +	/* OCC is supported in P8 and P9 */
> +	if (proc_gen < proc_gen_p8)
>  		return;
>  	/* Handle fast reboots */
>  	if (occ_pstates_initialized)
> @@ -609,9 +862,11 @@ void occ_pstates_init(void)
>  	switch (proc_gen) {
>  	case proc_gen_p8:
>  		cmp_pstates = cmp_negative_pstates;
> +		homer_opal_data_offset = P8_HOMER_OPAL_DATA_OFFSET;
>  		break;
>  	case proc_gen_p9:
>  		cmp_pstates = cmp_positive_pstates;
> +		homer_opal_data_offset = P9_HOMER_OPAL_DATA_OFFSET;
>  		break;
>  	default:
>  		return;
> @@ -852,7 +1107,7 @@ int occ_msg_queue_occ_reset(void)
>  		goto out;
>  	}
>  	/*
> -	 * Set 'valid' byte of chip_occ_data to 0 since OCC
> +	 * Set 'valid' byte of occ_pstate_table to 0 since OCC
>  	 * may not clear this byte on a reset.
>  	 * OCC will set the 'valid' byte to 1 when it becomes
>  	 * active again.
> @@ -860,7 +1115,7 @@ int occ_msg_queue_occ_reset(void)
>  	for_each_chip(chip) {
>  		struct occ_pstate_table *occ_data;
> 
> -		occ_data = chip_occ_data(chip);
> +		occ_data = get_occ_pstate_table(chip);
>  		occ_data->valid = 0;
>  		chip->throttle = 0;
>  	}
> 



More information about the Skiboot mailing list