[Skiboot] [PATCH v3] occ: Poll OCC throttle status and queue OCC events to host

Preeti U Murthy preeti at linux.vnet.ibm.com
Tue May 26 14:41:35 AEST 2015


On 05/21/2015 07:22 PM, Shilpasri G Bhat wrote:
> Hi Preeti,
> 
> On 05/21/2015 06:06 PM, Preeti U Murthy wrote:
>> Hi Shilpa,
>>
>> On 05/21/2015 03:50 PM, Shilpasri G Bhat wrote:
>>> Add a new class of message definition OPAL_MSG_OCC to
>>> opal_message_type to notify the following OCC events to host:
>>> 1) OCC Reset
>>> 2) OCC Load
>>> 3) OCC Throttle Status Change
>>>
>>> Add an opal poller to periodically read throttle status updated by OCC
>>> for each chip and notify any change in throttle status to host. The
>>> throttle status indicates the reason why OCC may have limited the max
>>> Pstate of the chip.
>>>
>>> Signed-off-by: Shilpasri G Bhat <shilpa.bhat at linux.vnet.ibm.com>
>>> ---
>>> Changes from V2:
>>> - Added documentation of OPAL_MSG_OCC to doc/opal-api/opal-messages.txt
>>> - Moved macros OCC_RESET, OCC_LOAD, OCC_THROTTLE to opal-api.h
>>> - Added a structure opal_occ_msg.
>>>
>>> Changes from V1:
>>> - Initialize prev_throttle to 0 instead of 0xF
>>> - Add pr_log when opal_queue_msg() fails on OCC reset and load.
>>> - Do not update occ_reset and prev_throttle if opal_queue_msg fails in
>>>   occ_throttle_poll() 
>>> - Do not queue if throttle reason is greater than 5.
>>>
>>>  doc/opal-api/opal-messages.txt | 37 ++++++++++++++++++
>>>  hw/occ.c                       | 89 ++++++++++++++++++++++++++++++++++++++++++
>>>  include/chip.h                 |  1 +
>>>  include/opal-api.h             | 24 ++++++++++++
>>>  4 files changed, 151 insertions(+)
>>>
>>> diff --git a/doc/opal-api/opal-messages.txt b/doc/opal-api/opal-messages.txt
>>> index fdde247..451b217 100644
>>> --- a/doc/opal-api/opal-messages.txt
>>> +++ b/doc/opal-api/opal-messages.txt
>>> @@ -158,3 +158,40 @@ struct opal_prd_msg:
>>>
>>>  Responses from the kernel use the same message format, but are passed
>>>  through the opal_prd_msg call.
>>> +
>>> +OPAL_MSG_OCC
>>> +------------
>>> +
>>> +This is used by OPAL to inform host about OCC events like OCC reset,
>>> +OCC load and throttle status change by OCC which can indicate the
>>> +host the reason for frequency throttling/unthrottling.
>>> +
>>> +#define OCC_RESET	0
>>> +#define OCC_LOAD 	1
>>> +#define OCC_THROTTLE 	2
>>> +
>>> +/*
>>> + * struct opal_occ_msg:
>>> + * type: OCC_RESET, OCC_LOAD, OCC_THROTTLE
>>> + * chip: chip id
>>> + * throttle status: Indicates the reason why OCC may have limited
>>> + * the max Pstate of the chip.
>>> + * 0x00 = No throttle
>>> + * 0x01 = Power Cap
>>> + * 0x02 = Processor Over Temperature
>>> + * 0x03 = Power Supply Failure (currently not used)
>>> + * 0x04 = Over current (currently not used)
>>> + * 0x05 = OCC Reset (not reliable as some failures will not allow for
>>> + * OCC to update throttle status)
>>> + */
>>> +struct opal_occ_msg {
>>> +	__be64 type;
>>> +	__be64 chip;
>>> +	__be64 throttle_status;
>>> +};
>>> +
>>> +Host should read opal_occ_msg.chip and opal_occ_msg.throttle_status
>>> +only when opal_occ_msg.type = OCC_THROTTLE.
>>> +If opal_occ_msg.type > 2 then host should ignore the message for now,
>>> +new events can be defined for opal_occ_msg.type in the future
>>> +versions of OPAL.
>>> diff --git a/hw/occ.c b/hw/occ.c
>>> index fe513cb..7d11c4f 100644
>>> --- a/hw/occ.c
>>> +++ b/hw/occ.c
>>> @@ -24,6 +24,8 @@
>>>  #include <timebase.h>
>>>  #include <hostservices.h>
>>>  #include <errorlog.h>
>>> +#include <opal-api.h>
>>> +#include <opal-msg.h>
>>>
>>>  /* OCC Communication Area for PStates */
>>>
>>> @@ -31,6 +33,13 @@
>>>
>>>  #define MAX_PSTATES 256
>>>
>>> +struct opal_occ_msg occ_msg;
>>> +#define chip_occ_data(chip) \
>>> +		((struct occ_pstate_table *)(chip->homer_base + \
>>> +				P8_HOMER_SAPPHIRE_DATA_OFFSET))
>>> +
>>> +static bool occ_reset;
>>> +
>>>  struct occ_pstate_entry {
>>>  	s8 id;
>>>  	u8 flags;
>>> @@ -302,6 +311,54 @@ static bool cpu_pstates_prepare_core(struct proc_chip *chip, struct cpu_thread *
>>>  	return true;
>>>  }
>>>
>>> +static void occ_throttle_poll(void *data __unused)
>>> +{
>>> +	struct proc_chip *chip;
>>> +	struct occ_pstate_table *occ_data;
>>> +	int rc;
>>> +
>>> +	if (occ_reset) {
>>> +		int inactive = 0;
>>> +
>>> +		for_each_chip(chip) {
>>> +			occ_data = chip_occ_data(chip);
>>> +			if (occ_data->valid != 1) {
>>> +				inactive = 1;
>>> +				break;
>>> +			}
>>> +		}
>>> +		if (!inactive) {
>>> +			/*
>>> +			 * Queue OCC_THROTTLE with throttle status as 0 to
>>> +			 * indicate all OCCs are active after a reset.
>>> +			 */
>>> +			occ_msg.type = OCC_THROTTLE;
>>> +			occ_msg.chip = 0;
>>> +			occ_msg.throttle_status = 0;
>>> +			rc = _opal_queue_msg(OPAL_MSG_OCC, NULL, NULL, 3,
>>> +					     (uint64_t *)&occ_msg);
>>
>> Note that the corresponding patch in the kernel is expecting three
>> parameters, while now we are passing just one.
> 
> No.
> The kernel will receive the message via opal_msg.params[], here 'occ_msg' will
> be copied to opal_msg.params[].
> 
> Sorry I have missed this point in the change log.
> - Using '_opal_queue_msg()' instead of 'opal_queue_msg()'

Ok got it.

Reviewed-by: Preeti U Murthy <preeti at linux.vnet.ibm.com>
> 
> Thanks and Regards,
> Shilpa
> 



More information about the Skiboot mailing list