[Skiboot] [PATCH v4] occ: Poll OCC throttle status and queue OCC events to host

Neelesh Gupta neelegup at linux.vnet.ibm.com
Fri Jun 5 21:33:14 AEST 2015



On 06/03/2015 06:24 PM, Shilpasri G Bhat wrote:
>
> +static void occ_throttle_poll(void *data __unused)
> +{
> +	struct proc_chip *chip;
> +	struct occ_pstate_table *occ_data;
> +	struct opal_occ_msg occ_msg;
> +	int rc;
> +
> +	if (occ_reset) {
> +		int inactive = 0;
> +
> +		for_each_chip(chip) {
> +			occ_data = chip_occ_data(chip);
> +			if (occ_data->valid != 1) {
> +				inactive = 1;
> +				break;
> +			}
> +		}
> +		if (!inactive) {
> +			/*
> +			 * Queue OCC_THROTTLE with throttle status as 0 to
> +			 * indicate all OCCs are active after a reset.
> +			 */
> +			occ_msg.type = OCC_THROTTLE;
> +			occ_msg.chip = 0;
> +			occ_msg.throttle_status = 0;
> +			rc = _opal_queue_msg(OPAL_MSG_OCC, NULL, NULL, 3,
> +					     (uint64_t *)&occ_msg);
> +			if (!rc)
> +				occ_reset = false;
> +		}
> +	} else {
> +		for_each_chip(chip) {
> +			occ_data = chip_occ_data(chip);
> +			if ((occ_data->valid == 1) &&
> +			    (chip->prev_throttle != occ_data->throttle) &&
> +			    (occ_data->throttle <= OCC_MAX_THROTTLE_STATUS)) {
> +				occ_msg.type = OCC_THROTTLE;
> +				occ_msg.chip = chip->id;
> +				occ_msg.throttle_status = occ_data->throttle;
> +				rc = _opal_queue_msg(OPAL_MSG_OCC, NULL, NULL,
> +						     3, (uint64_t *)&occ_msg);
> +				if (!rc)
> +					chip->prev_throttle =
> +						occ_data->throttle;
> +			} else if (occ_data->valid == 0) {
> +				occ_msg.type = OCC_RESET;
> +				occ_msg.chip = 0;
> +				occ_msg.throttle_status = 0;
> +				rc = _opal_queue_msg(OPAL_MSG_OCC, NULL, NULL,
> +						     3, (uint64_t *)&occ_msg);
> +				if (!rc)
> +					occ_reset = true;

OCC in reset, should we 'break' here ?
Otherwise, it start sending 'OCC_THROTTLE' before recovering..

> +			}
> +		}
> +	}
> +}
> +
>   /* CPU-OCC PState init */
>   /* Called after OCC init on P8 */
>   void occ_pstates_init(void)
> @@ -345,6 +410,11 @@ void occ_pstates_init(void)
>   			cpu_pstates_prepare_core(chip, c, pstate_nom);
>   		}
>   	}
> +
> +	/* Add opal_poller to poll OCC throttle status of each chip */
> +	for_each_chip(chip)
> +		chip->prev_throttle = 0;
> +	opal_add_poller(occ_throttle_poll, NULL);
>   }
>
>   struct occ_load_req {
> @@ -386,6 +456,14 @@ static void __occ_do_load(u8 scope, u32 dbob_id __unused, u32 seq_id)
>   		prlog(PR_INFO, "OCC: Load: Fallback to preloaded image\n");
>   		rc = 0;
>   	} else if (!rc) {
> +		struct opal_occ_msg occ_msg = { OCC_LOAD, 0, 0 };
> +
> +		rc = _opal_queue_msg(OPAL_MSG_OCC, NULL, NULL, 3,
> +				     (uint64_t *)&occ_msg);
> +		if (rc)
> +			prlog(PR_INFO, "OCC: Failed to queue message %d\n",
> +			      OCC_LOAD);
> +
>   		/* Success, start OCC */
>   		rc = host_services_occ_start();
>   	}
> @@ -509,6 +587,27 @@ static void occ_do_reset(u8 scope, u32 dbob_id, u32 seq_id)
>   		rc = 0;
>   	}
>   	if (!rc) {
> +		struct opal_occ_msg occ_msg = { OCC_RESET, 0, 0 };
> +
> +		rc = _opal_queue_msg(OPAL_MSG_OCC, NULL, NULL, 3,
> +				     (uint64_t *)&occ_msg);
> +		if (rc)
> +			prlog(PR_INFO, "OCC: Failed to queue message %d\n",
> +			      OCC_RESET);
> +		/*
> +		 * Set 'valid' byte of chip_occ_data to 0 since OCC
> +		 * may not clear this byte on a reset.
> +		 * OCC will set the 'valid' byte to 1 when it becomes
> +		 * active again.
> +		 */
> +		for_each_chip(chip) {
> +			struct occ_pstate_table *occ_data;
> +
> +			occ_data = chip_occ_data(chip);
> +			occ_data->valid = 0;
> +			chip->prev_throttle = 0;
> +		}
> +		occ_reset = true;
>   		/* Send a single success response for all chips */
>   		stat = fsp_mkmsg(FSP_CMD_RESET_OCC_STAT, 2, 0, seq_id);
>   		if (stat)
> diff --git a/include/chip.h b/include/chip.h
> index 1b4f4c4..5b57c87 100644
> --- a/include/chip.h
> +++ b/include/chip.h
> @@ -147,6 +147,7 @@ struct proc_chip {
>   	uint64_t		homer_size;
>   	uint64_t		occ_common_base;
>   	uint64_t		occ_common_size;
> +	u8			prev_throttle;

It is common chip structure shared across and contains the prevailing value
of 'throttle' until we read the new occ data.. so I think 'throttle' is 
apt here.. :)

Rest looks good to me.

Neelesh.

>
>   	/* Must hold capi_lock to change */
>   	u8			capp_phb3_attached_mask;
> diff --git a/include/opal-api.h b/include/opal-api.h
> index 29421c1..ba35543 100644
> --- a/include/opal-api.h
> +++ b/include/opal-api.h
> @@ -418,6 +418,7 @@ enum opal_msg_type {
>   	OPAL_MSG_HMI_EVT,
>   	OPAL_MSG_DPO,
>   	OPAL_MSG_PRD,
> +	OPAL_MSG_OCC,
>   	OPAL_MSG_TYPE_MAX,
>   };
>
> @@ -882,6 +883,30 @@ struct opal_prd_msg {
>   	};
>   };
>
> +#define OCC_RESET			0
> +#define OCC_LOAD			1
> +#define OCC_THROTTLE			2
> +#define OCC_MAX_THROTTLE_STATUS		5
> +/*
> + * struct opal_occ_msg:
> + * type: OCC_RESET, OCC_LOAD, OCC_THROTTLE
> + * chip: chip id
> + * throttle status: indicates the reason why OCC may have limited
> + * the max Pstate of the chip.
> + * 0x00 = No throttle
> + * 0x01 = Power Cap
> + * 0x02 = Processor Over Temperature
> + * 0x03 = Power Supply Failure (currently not used)
> + * 0x04 = Over current (currently not used)
> + * 0x05 = OCC Reset (not reliable as some failures will not allow for
> + * OCC to update throttle status)
> + */
> +struct opal_occ_msg {
> +	__be64 type;
> +	__be64 chip;
> +	__be64 throttle_status;
> +};
> +
>   /*
>    * SG entries
>    *

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.ozlabs.org/pipermail/skiboot/attachments/20150605/25ba9688/attachment.html>


More information about the Skiboot mailing list