[Skiboot] [PATCH v4] occ: Poll OCC throttle status and queue OCC events to host

Fri Jun 5 21:32:02 AEST 2015

Hi Shilpa,

> diff --git a/hw/occ.c b/hw/occ.c
> index fe513cb..7953753 100644
> --- a/hw/occ.c
> +++ b/hw/occ.c
> @@ -24,6 +24,8 @@
>  #include <timebase.h>
>  #include <hostservices.h>
>  #include <errorlog.h>
> +#include <opal-api.h>
> +#include <opal-msg.h>
> 
>  /* OCC Communication Area for PStates */
> 
> @@ -31,6 +33,12 @@
> 
>  #define MAX_PSTATES 256
> 
> +#define chip_occ_data(chip) \
> +		((struct occ_pstate_table *)(chip->homer_base + \
> +				P8_HOMER_SAPPHIRE_DATA_OFFSET))
> +
> +static bool occ_reset;
> +
>  struct occ_pstate_entry {
>  	s8 id;
>  	u8 flags;
> @@ -302,6 +310,63 @@ static bool cpu_pstates_prepare_core(struct proc_chip *chip, struct cpu_thread *
>  	return true;
>  }
> 
> +static void occ_throttle_poll(void *data __unused)
> +{
> +	struct proc_chip *chip;
> +	struct occ_pstate_table *occ_data;
> +	struct opal_occ_msg occ_msg;
> +	int rc;
> +
> +	if (occ_reset) {
> +		int inactive = 0;
> +
> +		for_each_chip(chip) {
> +			occ_data = chip_occ_data(chip);
> +			if (occ_data->valid != 1) {
> +				inactive = 1;
> +				break;
> +			}
> +		}
> +		if (!inactive) {
> +			/*
> +			 * Queue OCC_THROTTLE with throttle status as 0 to
> +			 * indicate all OCCs are active after a reset.
> +			 */
> +			occ_msg.type = OCC_THROTTLE;
> +			occ_msg.chip = 0;
> +			occ_msg.throttle_status = 0;
> +			rc = _opal_queue_msg(OPAL_MSG_OCC, NULL, NULL, 3,
> +					     (uint64_t *)&occ_msg);
> +			if (!rc)
> +				occ_reset = false;
> +		}
> +	} else {
> +		for_each_chip(chip) {
> +			occ_data = chip_occ_data(chip);
> +			if ((occ_data->valid == 1) &&
> +			    (chip->prev_throttle != occ_data->throttle) &&
> +			    (occ_data->throttle <= OCC_MAX_THROTTLE_STATUS)) {
> +				occ_msg.type = OCC_THROTTLE;
> +				occ_msg.chip = chip->id;
> +				occ_msg.throttle_status = occ_data->throttle;
> +				rc = _opal_queue_msg(OPAL_MSG_OCC, NULL, NULL,
> +						     3, (uint64_t *)&occ_msg);
> +				if (!rc)
> +					chip->prev_throttle =
> +						occ_data->throttle;
> +			} else if (occ_data->valid == 0) {
> +				occ_msg.type = OCC_RESET;
> +				occ_msg.chip = 0;
> +				occ_msg.throttle_status = 0;
> +				rc = _opal_queue_msg(OPAL_MSG_OCC, NULL, NULL,
> +						     3, (uint64_t *)&occ_msg);

Now that Neelesh brought out the possibilities of race conditions, FWIW
is the following possible :

occ gets reset                                  opal_poller
  queues occ_reset message                   checks occ_reset: false
  sets occ_data->valid = 0                   queues throttle msg
  sets occ_reset = true
                                           checks occ_reset next time
                                         queues unthrottle msg

My concern is the order of msg will be:
occ_reset, occ throttle(may happen), occ_unthrottle/occ_load.

It appears that we were throttled before the occ loaded itself. The
point is can there be such races that the ordering of the queued
messages appear confusing to the user ?

> +		
	if (!rc)
> +					occ_reset = true;
> +			}
> +		}
> +	}
> +}
> +
>  /* CPU-OCC PState init */
>  /* Called after OCC init on P8 */
>  void occ_pstates_init(void)
> @@ -345,6 +410,11 @@ void occ_pstates_init(void)
>  			cpu_pstates_prepare_core(chip, c, pstate_nom);
>  		}
>  	}
> +
> +	/* Add opal_poller to poll OCC throttle status of each chip */
> +	for_each_chip(chip)
> +		chip->prev_throttle = 0;
> +	opal_add_poller(occ_throttle_poll, NULL);
>  }
> 
>  struct occ_load_req {
> @@ -386,6 +456,14 @@ static void __occ_do_load(u8 scope, u32 dbob_id __unused, u32 seq_id)
>  		prlog(PR_INFO, "OCC: Load: Fallback to preloaded image\n");
>  		rc = 0;
>  	} else if (!rc) {
> +		struct opal_occ_msg occ_msg = { OCC_LOAD, 0, 0 };
> +
> +		rc = _opal_queue_msg(OPAL_MSG_OCC, NULL, NULL, 3,
> +				     (uint64_t *)&occ_msg);
> +		if (rc)
> +			prlog(PR_INFO, "OCC: Failed to queue message %d\n",
> +			      OCC_LOAD);
> +
>  		/* Success, start OCC */
>  		rc = host_services_occ_start();
>  	}
> @@ -509,6 +587,27 @@ static void occ_do_reset(u8 scope, u32 dbob_id, u32 seq_id)
>  		rc = 0;
>  	}
>  	if (!rc) {
> +		struct opal_occ_msg occ_msg = { OCC_RESET, 0, 0 };
> +
> +		rc = _opal_queue_msg(OPAL_MSG_OCC, NULL, NULL, 3,
> +				     (uint64_t *)&occ_msg);
> +		if (rc)
> +			prlog(PR_INFO, "OCC: Failed to queue message %d\n",
> +			      OCC_RESET);
> +		/*
> +		 * Set 'valid' byte of chip_occ_data to 0 since OCC
> +		 * may not clear this byte on a reset.
> +		 * OCC will set the 'valid' byte to 1 when it becomes
> +		 * active again.
> +		 */
> +		for_each_chip(chip) {
> +			struct occ_pstate_table *occ_data;
> +
> +			occ_data = chip_occ_data(chip);
> +			occ_data->valid = 0;

I recall you explaining this before, nevertheless; can this race with
the valid bit being overwritten by 1, if the OCC becomes active by this
time ? In which case, the OCC would be perfectly valid, but we think its
in an invalid state all the time no ?

Regards
Preeti U Murthy