[Skiboot] [PATCH v3 3/6] slw: Add Power9 idle states to power-mgt dt node

Shreyas B Prabhu shreyas at linux.vnet.ibm.com
Tue Jun 7 21:41:55 AEST 2016



On 06/06/2016 11:31 AM, Michael Neuling wrote:
> This looks mostly ok to me with some minor comments below.  It seems
> to follow much of what we do currently
> 
>> POWER ISA v3 defines a new idle processor core mechanism. In summary,
>>  a) new instruction named stop is added. This instruction replaces
>>  	instructions like nap, sleep, rvwinkle.
>>  b) new per thread SPR named PSSCR is added which controls the behavior
>>  	of stop instruction. This SPR subsumes PMICR.
>>  
>> This patch adds the supported idle states to power-mgt dt node.
>>  
>> It also introduces ibm,cpu-idle-state-psscr and
>> ibm,cpu-idle-state-psscr-mask entries which exposes the value to be
>> written to PSSCR to enter a given stop state. These entries replaces
>> POWER8's counterparts ibm,cpu-idle-state-pmicr and
>> ibm,cpu-idle-state-pmicr-mask.
> 
> Are these backwards and forwards compatible with the kernel?
> 
> Or don't we have to worry since it'll be invalid boot an older kernel
> on p9 anyway, or a newer p9 kernel without upgrading your firmware to
> p9?

We are backward compatible on P8, of course. On P9, as you mentioned
older kernel will be an invalid boot. And FWIW this patch won't be
breaking newer kernel with old firmware since kernel would not discover
any idle states and hence not look for psscr dt nodes.

> 
>> Signed-off-by: Shreyas B. Prabhu <shreyas at linux.vnet.ibm.com>
>> ---
>>  hw/slw.c           | 178 ++++++++++++++++++++++++++++++++++++++++++-----
> ------
>>  include/opal-api.h |  10 +++
>>  2 files changed, 153 insertions(+), 35 deletions(-)
>>  
>> diff --git a/hw/slw.c b/hw/slw.c
>> index 68b61ea80ccd..d391cd70cc98 100644
>> --- a/hw/slw.c
>> +++ b/hw/slw.c
>> @@ -397,8 +397,12 @@ struct cpu_idle_states {
>>  	u32 latency_ns;
>>  	u32 residency_ns;
>>  	u32 flags;
>> -	u64 pmicr;
>> -	u64 pmicr_mask;
>> +	/*
>> +	 * Register value/mask used to select different idle states.
>> +	 * PMICR in POWER8 and PSSCR in POWER9
>> +	 */
>> +	u64 pm_ctrl_reg_val;
>> +	u64 pm_ctrl_reg_mask;
> 
> 
> You might want to pack this struct a bit to ensure there aren't holes
> in it. There is currently a 32bit hole between flags and
> pm_ctrl_reg_val.
> 
> The same goes for some local variables in your functions.
>

Ok. Will fix that.

>>  };
>>  
>>  /* Flag definitions */
>> @@ -435,8 +439,8 @@ static struct cpu_idle_states
> power7_cpu_idle_states[] = {
>>  		       | 0*OPAL_PM_SLEEP_ENABLED \
>>  		       | 0*OPAL_PM_WINKLE_ENABLED \
>>  		       | 0*IDLE_USE_PMICR,
>> -		.pmicr = 0,
>> -		.pmicr_mask = 0 },
>> +		.pm_ctrl_reg_val = 0,
>> +		.pm_ctrl_reg_mask = 0 },
>>  };
>>  
>>  static struct cpu_idle_states power8_cpu_idle_states[] = {
>> @@ -451,8 +455,8 @@ static struct cpu_idle_states
> power8_cpu_idle_states[] = {
>>  		       | 0*IDLE_LOSE_FULL_CONTEXT \
>>  		       | 1*OPAL_PM_NAP_ENABLED \
>>  		       | 0*IDLE_USE_PMICR,
>> -		.pmicr = 0,
>> -		.pmicr_mask = 0 },
>> +		.pm_ctrl_reg_val = 0,
>> +		.pm_ctrl_reg_mask = 0 },
>>  	{ /* fast sleep (with workaround) */
>>  		.name = "fastsleep_",
>>  		.latency_ns = 40000,
>> @@ -465,8 +469,8 @@ static struct cpu_idle_states
> power8_cpu_idle_states[] = {
>>  		       | 1*OPAL_PM_SLEEP_ENABLED_ER1 \
>>  		       | 0*IDLE_USE_PMICR, /* Not enabled until deep
>>  						states are available */
>> -		.pmicr = IDLE_FASTSLEEP_PMICR,
>> -		.pmicr_mask = IDLE_SLEEP_PMICR_MASK },
>> +		.pm_ctrl_reg_val = IDLE_FASTSLEEP_PMICR,
>> +		.pm_ctrl_reg_mask = IDLE_SLEEP_PMICR_MASK },
>>  	{ /* Winkle */
>>  		.name = "winkle",
>>  		.latency_ns = 10000000,
>> @@ -485,10 +489,93 @@ static struct cpu_idle_states
> power8_cpu_idle_states[] = {
>>  		       | 1*OPAL_PM_WINKLE_ENABLED \
>>  		       | 0*IDLE_USE_PMICR, /* Currently choosing deep vs
>>  						fast via EX_PM_GP1 reg
> */
>> -		.pmicr = 0,
>> -		.pmicr_mask = 0 },
>> +		.pm_ctrl_reg_val = 0,
>> +		.pm_ctrl_reg_mask = 0 },
>>  };
>>  
>> +/*
>> + * cpu_idle_states for POWER9
>> + * Note latency_ns and residency_ns are estimated values for now.
>> + */
>> +static struct cpu_idle_states power9_cpu_idle_states[] = {
>> +	{
>> +		.name = "stop0",
>> +		.latency_ns = 300,
>> +		.residency_ns = 3000,
>> +		.flags = 0*IDLE_DEC_STOP \
>> +		       | 0*IDLE_TB_STOP  \
>> +		       | 0*IDLE_LOSE_USER_CONTEXT \
>> +		       | 0*IDLE_LOSE_HYP_CONTEXT \
>> +		       | 0*IDLE_LOSE_FULL_CONTEXT \
>> +		       | 1*OPAL_PM_STOP_INST_FAST,
>> +		.pm_ctrl_reg_val = 0,
>> +		.pm_ctrl_reg_mask = 0xF },
>> +	{
>> +		.name = "stop1",
>> +		.latency_ns = 5000,
>> +		.residency_ns = 50000,
>> +		.flags = 0*IDLE_DEC_STOP \
>> +		       | 0*IDLE_TB_STOP  \
>> +		       | 1*IDLE_LOSE_USER_CONTEXT \
>> +		       | 0*IDLE_LOSE_HYP_CONTEXT \
>> +		       | 0*IDLE_LOSE_FULL_CONTEXT \
>> +		       | 1*OPAL_PM_STOP_INST_FAST,
>> +		.pm_ctrl_reg_val = 1,
>> +		.pm_ctrl_reg_mask = 0xF },
>> +	{
>> +		.name = "stop2",
>> +		.latency_ns = 10000,
>> +		.residency_ns = 100000,
>> +		.flags = 0*IDLE_DEC_STOP \
>> +		       | 0*IDLE_TB_STOP  \
>> +		       | 1*IDLE_LOSE_USER_CONTEXT \
>> +		       | 0*IDLE_LOSE_HYP_CONTEXT \
>> +		       | 0*IDLE_LOSE_FULL_CONTEXT \
>> +		       | 1*OPAL_PM_STOP_INST_FAST,
>> +		.pm_ctrl_reg_val = 2,
>> +		.pm_ctrl_reg_mask = 0xF },
>> +
>> +	{
>> +		.name = "stop4",
>> +		.latency_ns = 100000,
>> +		.residency_ns = 1000000,
>> +		.flags = 1*IDLE_DEC_STOP \
>> +		       | 1*IDLE_TB_STOP  \
>> +		       | 1*IDLE_LOSE_USER_CONTEXT \
>> +		       | 1*IDLE_LOSE_HYP_CONTEXT \
>> +		       | 1*IDLE_LOSE_FULL_CONTEXT \
>> +		       | 1*OPAL_PM_STOP_INST_DEEP,
>> +		.pm_ctrl_reg_val = 4,
>> +		.pm_ctrl_reg_mask = 0xF },
>> +
>> +	{
>> +		.name = "stop8",
>> +		.latency_ns = 2000000,
>> +		.residency_ns = 20000000,
>> +		.flags = 1*IDLE_DEC_STOP \
>> +		       | 1*IDLE_TB_STOP  \
>> +		       | 1*IDLE_LOSE_USER_CONTEXT \
>> +		       | 1*IDLE_LOSE_HYP_CONTEXT \
>> +		       | 1*IDLE_LOSE_FULL_CONTEXT \
>> +		       | 1*OPAL_PM_STOP_INST_DEEP,
>> +		.pm_ctrl_reg_val = 0x8,
>> +		.pm_ctrl_reg_mask = 0xF },
>> +
>> +
>> +	{
>> +		.name = "stop11",
>> +		.latency_ns = 10000000,
>> +		.residency_ns = 100000000,
>> +		.flags = 1*IDLE_DEC_STOP \
>> +		       | 1*IDLE_TB_STOP  \
>> +		       | 1*IDLE_LOSE_USER_CONTEXT \
>> +		       | 1*IDLE_LOSE_HYP_CONTEXT \
>> +		       | 1*IDLE_LOSE_FULL_CONTEXT \
>> +		       | 1*OPAL_PM_STOP_INST_DEEP,
>> +		.pm_ctrl_reg_val = 0xB,
>> +		.pm_ctrl_reg_mask = 0xF },
> 
> Why 0, 1, 2, 4, 8 and 11?  Why don't we use any of the others?
> 

These are the key idle states that we want to exploit initially.
>> +
>> +};
>>  /* Add device tree properties to describe idle states */
>>  void add_cpu_idle_state_properties(void)
>>  {
>> @@ -499,6 +586,7 @@ void add_cpu_idle_state_properties(void)
>>  
>>  	bool can_sleep = true;
>>  	bool has_slw = true;
>> +	bool has_stop_inst = false;
>>  	u8 i;
>>  
>>  	u32 supported_states_mask;
>> @@ -508,8 +596,8 @@ void add_cpu_idle_state_properties(void)
>>  	u32 *latency_ns_buf;
>>  	u32 *residency_ns_buf;
>>  	u32 *flags_buf;
>> -	u64 *pmicr_buf;
>> -	u64 *pmicr_mask_buf;
>> +	u64 *pm_ctrl_reg_val_buf;
>> +	u64 *pm_ctrl_reg_mask_buf;
>>  
>>  	/* Variables to track buffer length */
>>  	u8 name_buf_len;
>> @@ -538,7 +626,12 @@ void add_cpu_idle_state_properties(void)
>>  	 */
>>  	chip = next_chip(NULL);
>>  	assert(chip);
>> -	if (chip->type == PROC_CHIP_P8_MURANO ||
>> +	if (chip->type == PROC_CHIP_P9_NIMBUS ||
>> +	    chip->type == PROC_CHIP_P9_CUMULUS) {
>> +		states = power9_cpu_idle_states;
>> +		nr_states = ARRAY_SIZE(power9_cpu_idle_states);
>> +		has_stop_inst = true;
>> +	} else if (chip->type == PROC_CHIP_P8_MURANO ||
>>  	    chip->type == PROC_CHIP_P8_VENICE ||
>>  	    chip->type == PROC_CHIP_P8_NAPLES) {
>>  		const struct dt_property *p;
>> @@ -586,8 +679,8 @@ void add_cpu_idle_state_properties(void)
>>  	latency_ns_buf	=  (u32 *) malloc(nr_states *
> sizeof(u32));
>>  	residency_ns_buf=  (u32 *) malloc(nr_states * sizeof(u32));
>>  	flags_buf	=  (u32 *) malloc(nr_states * sizeof(u32));
>> -	pmicr_buf	=  (u64 *) malloc(nr_states * sizeof(u64));
>> -	pmicr_mask_buf	=  (u64 *) malloc(nr_states *
> sizeof(u64));
>> +	pm_ctrl_reg_val_buf	=  (u64 *) malloc(nr_states *
> sizeof(u64));
>> +	pm_ctrl_reg_mask_buf	=  (u64 *) malloc(nr_states *
> sizeof(u64));
> 
> malloc returns a void *, so you shouldn't need these casts.
>

I'll drop the cast.

> We don't check any of these returns either, although this is at boot,
> so if we can't malloc, something is really screwed, so probably not
> worth bothering with
> 
Yes. Probably not worth bothering.

>>  
>>  	name_buf_len = 0;
>>  	num_supported_idle_states = 0;
>> @@ -597,13 +690,18 @@ void add_cpu_idle_state_properties(void)
>>  	 * set. Use this to only add supported idle states to the
>>  	 * device-tree
>>  	 */
>> -	supported_states_mask = OPAL_PM_NAP_ENABLED;
>> -	if (can_sleep)
>> -		supported_states_mask |= OPAL_PM_SLEEP_ENABLED |
>> -					OPAL_PM_SLEEP_ENABLED_ER1;
>> -	if (has_slw)
>> -		supported_states_mask |= OPAL_PM_WINKLE_ENABLED;
>> -
>> +	if (has_stop_inst) {
>> +		supported_states_mask = OPAL_PM_STOP_INST_FAST;
>> +		if (has_slw)
>> +			supported_states_mask |= OPAL_PM_STOP_INST_DEEP;
>> +	} else {
>> +		supported_states_mask = OPAL_PM_NAP_ENABLED;
>> +		if (can_sleep)
>> +			supported_states_mask |= OPAL_PM_SLEEP_ENABLED |
>> +						OPAL_PM_SLEEP_ENABLED_ER
> 1;
>> +		if (has_slw)
>> +			supported_states_mask |= OPAL_PM_WINKLE_ENABLED;
>> +	}
>>  	for (i = 0; i < nr_states; i++) {
>>  		/* For each state, check if it is one of the supported
> states. */
>>  		if (states[i].flags & supported_states_mask) {
>> @@ -623,11 +721,11 @@ void add_cpu_idle_state_properties(void)
> 
> A couple of unrelated changes...
> 
> You can avoid the indentation here by changing the if statement to
> 
> 	     if (!(states[i].flags & supported_states_mask))
> 		continue;
> 
> also, the
>   strcpy(name_buf, states[i].name);
> should be:
>   strncpy(name_buf, states[i].name, MAX_NAME_LEN);
> 
> 

Ok. Will make the change.

Thanks for the review!

-Shreyas



More information about the Skiboot mailing list