[Skiboot] [PATCH v4] occ: Poll OCC throttle status and queue OCC events to host

Fri Jun 12 18:46:17 AEST 2015

Hi Preeti,

On 06/05/2015 05:02 PM, Preeti U Murthy wrote:
> Hi Shilpa,
> 
>> diff --git a/hw/occ.c b/hw/occ.c
>> index fe513cb..7953753 100644
>> --- a/hw/occ.c
>> +++ b/hw/occ.c
>> @@ -24,6 +24,8 @@
>>  #include <timebase.h>
>>  #include <hostservices.h>
>>  #include <errorlog.h>
>> +#include <opal-api.h>
>> +#include <opal-msg.h>
>>
>>  /* OCC Communication Area for PStates */
>>
>> @@ -31,6 +33,12 @@
>>
>>  #define MAX_PSTATES 256
>>
>> +#define chip_occ_data(chip) \
>> +		((struct occ_pstate_table *)(chip->homer_base + \
>> +				P8_HOMER_SAPPHIRE_DATA_OFFSET))
>> +
>> +static bool occ_reset;
>> +
>>  struct occ_pstate_entry {
>>  	s8 id;
>>  	u8 flags;
>> @@ -302,6 +310,63 @@ static bool cpu_pstates_prepare_core(struct proc_chip *chip, struct cpu_thread *
>>  	return true;
>>  }
>>
>> +static void occ_throttle_poll(void *data __unused)
>> +{
>> +	struct proc_chip *chip;
>> +	struct occ_pstate_table *occ_data;
>> +	struct opal_occ_msg occ_msg;
>> +	int rc;
>> +
>> +	if (occ_reset) {
>> +		int inactive = 0;
>> +
>> +		for_each_chip(chip) {
>> +			occ_data = chip_occ_data(chip);
>> +			if (occ_data->valid != 1) {
>> +				inactive = 1;
>> +				break;
>> +			}
>> +		}
>> +		if (!inactive) {
>> +			/*
>> +			 * Queue OCC_THROTTLE with throttle status as 0 to
>> +			 * indicate all OCCs are active after a reset.
>> +			 */
>> +			occ_msg.type = OCC_THROTTLE;
>> +			occ_msg.chip = 0;
>> +			occ_msg.throttle_status = 0;
>> +			rc = _opal_queue_msg(OPAL_MSG_OCC, NULL, NULL, 3,
>> +					     (uint64_t *)&occ_msg);
>> +			if (!rc)
>> +				occ_reset = false;
>> +		}
>> +	} else {
>> +		for_each_chip(chip) {
>> +			occ_data = chip_occ_data(chip);
>> +			if ((occ_data->valid == 1) &&
>> +			    (chip->prev_throttle != occ_data->throttle) &&
>> +			    (occ_data->throttle <= OCC_MAX_THROTTLE_STATUS)) {
>> +				occ_msg.type = OCC_THROTTLE;
>> +				occ_msg.chip = chip->id;
>> +				occ_msg.throttle_status = occ_data->throttle;
>> +				rc = _opal_queue_msg(OPAL_MSG_OCC, NULL, NULL,
>> +						     3, (uint64_t *)&occ_msg);
>> +				if (!rc)
>> +					chip->prev_throttle =
>> +						occ_data->throttle;
>> +			} else if (occ_data->valid == 0) {
>> +				occ_msg.type = OCC_RESET;
>> +				occ_msg.chip = 0;
>> +				occ_msg.throttle_status = 0;
>> +				rc = _opal_queue_msg(OPAL_MSG_OCC, NULL, NULL,
>> +						     3, (uint64_t *)&occ_msg);
> 
> Now that Neelesh brought out the possibilities of race conditions, FWIW
> is the following possible :
> 
> occ gets reset                                  opal_poller
>   queues occ_reset message                   checks occ_reset: false
>   sets occ_data->valid = 0                   queues throttle msg
>   sets occ_reset = true
>                                            checks occ_reset next time
>                                          queues unthrottle msg
> 
> My concern is the order of msg will be:
> occ_reset, occ throttle(may happen), occ_unthrottle/occ_load.
> 
> It appears that we were throttled before the occ loaded itself. The
> point is can there be such races that the ordering of the queued
> messages appear confusing to the user ?
> 

Yes this race is possible and also the above ordering of messages can be wrongly
interpreted in the host. The first throttle message after an OCC_RESET is used
to indicate that all OCCs are active. But in the above case an OCC_RESET
followed by an OCC_THROTTLE of chip can be wrongly interpreted as active message
in the host. So we have a dire need to synchronize queuing of OCC_RESET and
OCC_THROTTLE along with read/write of occ_reset in opal_poller  and
FSP_OCC_RESET event. Something like below:

FSP_OCC_RESET()
  stop_occ()
  lock()
  queue(OCC_RESET)
  clear occ_data->valid and chip->throttle
  occ_reset = true
  unlock()

occ_throttle_poll()
  if(!try_lock())
    return
  read/write occ_reset
  queue OCC_THROTTLE
  unlock()

>> +		
> 	if (!rc)
>> +					occ_reset = true;
>> +			}
>> +		}
>> +	}
>> +}
>> +
>>  /* CPU-OCC PState init */
>>  /* Called after OCC init on P8 */
>>  void occ_pstates_init(void)
>> @@ -345,6 +410,11 @@ void occ_pstates_init(void)
>>  			cpu_pstates_prepare_core(chip, c, pstate_nom);
>>  		}
>>  	}
>> +
>> +	/* Add opal_poller to poll OCC throttle status of each chip */
>> +	for_each_chip(chip)
>> +		chip->prev_throttle = 0;
>> +	opal_add_poller(occ_throttle_poll, NULL);
>>  }
>>
>>  struct occ_load_req {
>> @@ -386,6 +456,14 @@ static void __occ_do_load(u8 scope, u32 dbob_id __unused, u32 seq_id)
>>  		prlog(PR_INFO, "OCC: Load: Fallback to preloaded image\n");
>>  		rc = 0;
>>  	} else if (!rc) {
>> +		struct opal_occ_msg occ_msg = { OCC_LOAD, 0, 0 };
>> +
>> +		rc = _opal_queue_msg(OPAL_MSG_OCC, NULL, NULL, 3,
>> +				     (uint64_t *)&occ_msg);
>> +		if (rc)
>> +			prlog(PR_INFO, "OCC: Failed to queue message %d\n",
>> +			      OCC_LOAD);
>> +
>>  		/* Success, start OCC */
>>  		rc = host_services_occ_start();
>>  	}
>> @@ -509,6 +587,27 @@ static void occ_do_reset(u8 scope, u32 dbob_id, u32 seq_id)
>>  		rc = 0;
>>  	}
>>  	if (!rc) {
>> +		struct opal_occ_msg occ_msg = { OCC_RESET, 0, 0 };
>> +
>> +		rc = _opal_queue_msg(OPAL_MSG_OCC, NULL, NULL, 3,
>> +				     (uint64_t *)&occ_msg);
>> +		if (rc)
>> +			prlog(PR_INFO, "OCC: Failed to queue message %d\n",
>> +			      OCC_RESET);
>> +		/*
>> +		 * Set 'valid' byte of chip_occ_data to 0 since OCC
>> +		 * may not clear this byte on a reset.
>> +		 * OCC will set the 'valid' byte to 1 when it becomes
>> +		 * active again.
>> +		 */
>> +		for_each_chip(chip) {
>> +			struct occ_pstate_table *occ_data;
>> +
>> +			occ_data = chip_occ_data(chip);
>> +			occ_data->valid = 0;
> 
> I recall you explaining this before, nevertheless; can this race with
> the valid bit being overwritten by 1, if the OCC becomes active by this
> time ? In which case, the OCC would be perfectly valid, but we think its
> in an invalid state all the time no ?
> 

No this race cannot happen as we call stop_occ() before updating 'valid' byte to
'0'. So OCC cannot overwrite it to '1' immediately after an consecutive
FSP_RESET sets it to '0'.

Thanks and Regards,
Shilpa