[Skiboot] [PATCH V6 3/3] occ: Add support for Version 0x90 OCC_OPAL shared memory region
Stewart Smith
stewart at linux.vnet.ibm.com
Thu Feb 2 16:01:25 AEDT 2017
Shilpasri G Bhat <shilpa.bhat at linux.vnet.ibm.com> writes:
> This patch enables pstate table parsing support for P9. It
> introduces below device tree changes.
> - Add a new node per chip in /ibm,opal/power-mgt called occ.
> occ at 3ffd9f8000 {
> reg = <0x3f 0xfd9f8000>;
> ibm,chip-id = <0x0>;
> ibm,pstate-vdds = <0x4e4e4f4f 0x4f505152 0x53545556
> 0x5758595a 0x5b5c5d5e 0x5f606162 0x63646565 0x66676868
> 0x696a6a6b 0x6c6d6d6e 0x6f6f7071 0x72727374 0x74757677
> 0x7778797a 0x7a7b7c7c>;
> ibm,pstate-vcss = <0x4446484a 0x4c4e4f50 0x50515253
> 0x54555556 0x5758595a 0x5a5b5c5d 0x5d5e5e5e 0x5e5f5f5f
> 0x5f606060 0x61616161 0x62626263 0x63636364 0x64646465
> 0x65656666 0x66666767>;
> ibm,pstate-core-max = <0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0>;
> phandle = <0x10000326>;
> };
Ben - any thoughts on device tree layout for ^ ?
I'm wondering if we need it per chip or if the information in the
ibm,pstate-* is mostly common, and we should just point to it rather
than have a copy per chip?
> - Move VID (ibm,pstate-vdds, ibm,pstate-vcss) and max pstate for
> #n active cores array (ibm,pstate-core-max) in to the new per-chip
> /ibm,opal/power-mgt/occ node as these properties are unique to
> chip.
> - WOF is supported from version 0x02. Till now we have been adding
> max ultra-turbo pstate(ibm,pstate-ultra-turbo), max turbo pstate
> (ibm,pstate-turbo) and max pstate-per-n-core (ibm,pstate-core-max)
> only when WOF is enabled. This patch will add these properties
> even when WOF is disabled. When WOF is disabled max ultra turbo pstate
> equals to max turbo pstate and max pstate-per-n-core array has all
> entries equal to max turbo pstate. So the above three properties
> are added whenever WOF is supported to ease the reporting of these
> data in host.
Please add/enhance documentation of the applicable device tree entries
in doc/device-tree/ibm,opal/power-mgt.rst
> Signed-off-by: Shilpasri G Bhat <shilpa.bhat at linux.vnet.ibm.com>
> ---
> No changes from V5.
>
> Changes from V4:
> - s/prerror/prlog(PR_ERR) which have fwts-annotations
> - Removed fwts_annotations in macro definitions
> - Added out-of-bound sanity checks for nominal and turbo pstate
> - Added size to the "reg" property of /ibm,opal/power-mgt/occ
>
> hw/occ.c | 588 +++++++++++++++++++++++++++++++++++++++++++--------------------
> 1 file changed, 402 insertions(+), 186 deletions(-)
>
> diff --git a/hw/occ.c b/hw/occ.c
> index 9eb32b4..f3b7e7c 100644
> --- a/hw/occ.c
> +++ b/hw/occ.c
> @@ -30,44 +30,137 @@
>
> /* OCC Communication Area for PStates */
>
> -#define P8_HOMER_SAPPHIRE_DATA_OFFSET 0x1F8000
> +#define P8_HOMER_OPAL_DATA_OFFSET 0x1F8000
> +#define P9_HOMER_OPAL_DATA_OFFSET 0x0E2000
> +#define MAX_PSTATES 256
> +#define MAX_P8_CORES 12
> +#define MAX_P9_CORES 24
>
> -#define MAX_PSTATES 256
> -
> -#define chip_occ_data(chip) \
> - ((struct occ_pstate_table *)(chip->homer_base + \
> - P8_HOMER_SAPPHIRE_DATA_OFFSET))
> -
> -static bool occ_reset;
> -static struct lock occ_lock = LOCK_UNLOCKED;
> -
> -struct occ_pstate_entry {
> - s8 id;
> - u8 flags;
> - u8 vdd;
> - u8 vcs;
> - u32 freq_khz;
> -} __packed;
> -
> -/*
> - * OCC-OPAL Shared Memory Region Version 2
> - * https://github.com/open-power/occ/blob/master/src/occ/proc/proc_pstate.h
> - * Interface defined in 'sapphire_table_t'
> +/**
> + * OCC-OPAL Shared Memory Region
> + *
> + * Reference document :
> + *
> https://github.com/open-power/docs/blob/master/occ/OCC_OpenPwr_FW_Interfaces.pdf
I don't see any 0x90 version info in that document.
Could we also point to the OCC header file?
> + *
> + * Supported layout versions:
> + * - 0x01 : P8
> + * - 0x02 : P8
> + * - 0x90 : P9
> + * In 0x90 the data is separated into :-
> + * -- Static Data (struct occ_pstate_table): Data is written once by OCC
> + * -- Dynamic Data (struct occ_dynamic_data): Data is updated at runtime
> + *
> + * struct occ_pstate_table - Pstate table layout
> + * @valid: Indicates if data is valid
> + * @version: Layout version
> + * @v2.throttle: Reason for limiting the max pstate
> + * @v90.occ_role: OCC role (Master/Slave)
> + * @v#.pstate_min: Minimum pstate ever allowed
> + * @v#.pstate_nom: Nominal pstate
> + * @v#.pstate_turbo: Maximum turbo pstate
> + * @v#.pstate_ultra_turbo: Maximum ultra turbo pstate and the maximum
> + * pstate ever allowed
> + * @v#.pstates: Pstate-id and frequency list from Pmax to Pmin
> + * @v#.pstates.id: Pstate-id
> + * @v#.pstates.flags: Pstate-flag(reserved)
> + * @v2.pstates.vdd: Voltage Identifier
> + * @v2.pstates.vcs: Voltage Identifier
> + * @v#.pstates.freq_khz: Frequency in KHz
> + * @v#.core_max[1..N]: Max pstate with N active cores
> + * @spare/reserved/pad: Unused data
> */
> struct occ_pstate_table {
> u8 valid;
> u8 version;
> - u8 throttle;
> - s8 pstate_min;
> - s8 pstate_nom;
> - s8 pstate_turbo;
> - s8 pstate_ultra_turbo;
> - u8 spare;
> + union __packed {
> + struct __packed { /* Version 0x01 and 0x02 */
> + u8 throttle;
> + s8 pstate_min;
> + s8 pstate_nom;
> + s8 pstate_turbo;
> + s8 pstate_ultra_turbo;
> + u8 spare;
> + u64 reserved;
> + struct __packed {
> + s8 id;
> + u8 flags;
> + u8 vdd;
> + u8 vcs;
> + u32 freq_khz;
> + } pstates[MAX_PSTATES];
> + s8 core_max[MAX_P8_CORES];
> + u8 pad[100];
> + } v2;
> + struct __packed { /* Version 0x90 */
> + u8 occ_role;
> + u8 pstate_min;
> + u8 pstate_nom;
> + u8 pstate_turbo;
> + u8 pstate_ultra_turbo;
> + u8 spare;
> + u64 reserved1;
> + u64 reserved2;
> + struct __packed {
> + u8 id;
> + u8 flags;
> + u16 reserved;
> + u32 freq_khz;
> + } pstates[MAX_PSTATES];
> + u8 core_max[MAX_P9_CORES];
> + u8 pad[1024];
> + } v90;
> + };
> +} __packed;
> +
> +/**
> + * OCC-OPAL Shared Memory Interface Dynamic Data Vx90
> + *
> + * struct occ_dynamic_data - Contains runtime attributes
> + * @occ_state: Current state of OCC
> + * @cpu_throttle: Reason for limiting the max pstate
> + * @mem_throttle: Reason for throttling memory
> + * @quick_pwr_drop: Indicates if QPD is asserted
> + * @pwr_shifting_ratio: Indicates the current percentage of power to
> + * take away from the CPU vs GPU when shifting
> + * power to maintain a power cap. Value of 100
> + * means take all power from CPU.
> + * @pwr_cap_type: Indicates type of power cap in effect
> + * @min_pwr_cap: Minimum allowed system power cap in Watts
> + * @max_pwr_cap: Maximum allowed system power cap in Watts
> + * @cur_pwr_cap: Current system power cap
> + * @spare/reserved: Unused data
> + */
> +struct occ_dynamic_data {
> + u8 occ_state;
> + u8 spare1;
> + u8 spare2;
> + u8 spare3;
> + u8 spare4;
> + u8 cpu_throttle;
> + u8 mem_throttle;
> + u8 quick_pwr_drop;
> + u8 pwr_shifting_ratio;
> + u8 pwr_cap_type;
> + u16 min_pwr_cap;
> + u16 max_pwr_cap;
> + u16 cur_pwr_cap;
> u64 reserved;
> - struct occ_pstate_entry pstates[MAX_PSTATES];
> - s8 core_max[16];
> } __packed;
>
> +#define get_homer_opal_data(chip) \
> + (chip->homer_base + homer_opal_data_offset)
> +
> +#define get_occ_pstate_table(chip) \
> + ((struct occ_pstate_table *)(get_homer_opal_data(chip)))
> +
> +#define get_occ_dynamic_data(chip) \
> + ((struct occ_dynamic_data *)(get_homer_opal_data(chip) + \
> + sizeof(struct occ_pstate_table)))
I'd prefer these three to be static inline instead.
> @@ -200,40 +290,119 @@ static bool wait_for_all_occ_init(void)
> return true;
> }
>
> +#define parse_pstate_limits(pstate_data) \
> +do { \
> + pmin = pstate_data.pstate_min; \
> + pnom = pstate_data.pstate_nom; \
> + pmax = ultra_turbo_supported ? pstate_data.pstate_ultra_turbo : \
> + pstate_data.pstate_turbo; \
> +} while (0)
For clarity of reading the code, I'd prefer just to open code this twice
rather than hide in a macro that has side-effects.
> +
> +/*
> + * OCC provides pstate table entries in continuous descending order.
> + * Parse the pstate table to skip pstate_ids that are greater
> + * than Pmax. If a pstate_id is equal to Pmin then add it to
> + * the list and break from the loop as this is the last valid
> + * element in the pstate table.
> + */
> +#define parse_cpu_pstate_properties(pstate_data) \
> +do { \
> + for (i = 0, j = 0; i < MAX_PSTATES && j < nr_pstates; i++) { \
> + if (cmp_pstates(pstate_data.pstates[i].id, pmax) > 0) \
> + continue; \
> + \
> + dt_id[j] = pstate_data.pstates[i].id; \
> + dt_freq[j] = pstate_data.pstates[i].freq_khz / 1000; \
> + j++; \
> + if (pstate_data.pstates[i].id == pmin) \
> + break; \
> + } \
> + \
> + if (j != nr_pstates) { \
> + prerror("OCC: Expected pstates(%d) is not equal to" \
> + "parsed pstates(%d)\n", nr_pstates, j); \
> + goto out_free_vcs; \
> + } \
> + \
> + if (!ultra_turbo_supported) \
> + break; \
> + \
> + if (cmp_pstates(pstate_data.pstate_turbo, pmax) > 0) { \
> + prlog(PR_WARNING, "OCC: Invalid turbo pstate. Clipping " \
> + "turbo pstate(%d) to Pmax(%d)\n", \
> + pstate_data.pstate_turbo, pmax); \
> + dt_add_property_cells(power_mgt, "ibm,pstate-turbo", \
> + pmax); \
> + } else { \
> + dt_add_property_cells(power_mgt, "ibm,pstate-turbo", \
> + pstate_data.pstate_turbo); \
> + dt_add_property_cells(power_mgt, "ibm,pstate-ultra-turbo",\
> + pstate_data.pstate_ultra_turbo); \
> + } \
> +} while (0)
Hrm... soo... I'm not a fan of this as a macro, but I'm unsure of a
better way to do it... could the macro be PARSE_CPU_PSTATE_PROPERTIES
(caps) rather than lower case though? parse_cpu_pstate_properties
> +
> +/* core_max[1..n] array provides the max sustainable pstate that can be
> + * achieved with i active cores in the chip.
> + */
> +#define parse_core_max(pstate_data) \
> +do { \
> + for (i = 0; i < nr_cores; i++) \
> + dt_core_max[i] = pstate_data.core_max[i]; \
> +} while (0)
I'd prefer static inline function to a macro here.
> +
> +#define parse_vid(pstate_data) \
> +do { \
> + for (i = 0, j = 0; i < MAX_PSTATES && j < nr_pstates; i++) { \
> + if (cmp_pstates(pstate_data.pstates[i].id, pmax) > 0) \
> + continue; \
> + \
> + dt_vdd[j] = pstate_data.pstates[i].vdd; \
> + dt_vcs[j] = pstate_data.pstates[i].vcs; \
> + j++; \
> + \
> + if (pstate_data.pstates[i].id == pmin) \
> + break; \
> + } \
> +} while (0)
I'd prefer this code inline rather than hiding in macro - or in its own function.
> +
> +#define allocate(ptr, size, fail_label) \
> +do { \
> + ptr = malloc((size)); \
> + if (!ptr) { \
> + prerror("OCC:" #ptr " array alloc failure\n"); \
> + goto fail_label; \
> + } \
> +} while (0)
Just use malloc() and assert() on failure. Since this is at boot time,
that's okay.
> +
> /* Add device tree properties to describe pstates states */
> -/* Retrun nominal pstate to set in each core */
> -static bool add_cpu_pstate_properties(s8 *pstate_nom)
> +/* Return nominal pstate to set in each core */
> +static bool add_cpu_pstate_properties(int *pstate_nom)
> {
> struct proc_chip *chip;
> uint64_t occ_data_area;
> struct occ_pstate_table *occ_data;
> struct dt_node *power_mgt;
> - u8 nr_pstates, nr_cores = 0;
> - s8 pmax;
> - /* Arrays for device tree */
> - u32 *dt_id, *dt_freq;
> - u8 *dt_vdd, *dt_vcs;
> - s8 *dt_core_max = NULL;
> - bool rc, ultra_turbo_en;
> + u32 *dt_id, *dt_freq, *dt_core_max = NULL;
> + int pmax, pmin, pnom;
> + u8 *dt_vdd = NULL, *dt_vcs = NULL, nr_pstates, max_cores;
> + bool ultra_turbo_supported, rc = false;
> int i, j;
>
> prlog(PR_DEBUG, "OCC: CPU pstate state device tree init\n");
>
> - /* Find first chip and core */
> + /* Find first chip */
> chip = next_chip(NULL);
>
> /* Extract PState information from OCC */
> + occ_data = get_occ_pstate_table(chip);
>
> - /* Dump state table */
> - occ_data_area = chip->homer_base + P8_HOMER_SAPPHIRE_DATA_OFFSET;
> -
> + /* Dump first 16 bytes of PState table */
> + occ_data_area = (uint64_t)occ_data;
> prlog(PR_DEBUG, "OCC: Data (%16llx) = %16llx %16llx\n",
> occ_data_area,
> *(uint64_t *)occ_data_area,
> *(uint64_t *)(occ_data_area+8));
>
may as well remove the trailing whitespace here while you're at it.
--
Stewart Smith
OPAL Architect, IBM.
More information about the Skiboot
mailing list