[Skiboot] [PATCH V4 2/3] occ: Add support for OPAL-OCC command/response interface
Shilpasri G Bhat
shilpa.bhat at linux.vnet.ibm.com
Mon Jul 10 17:04:04 AEST 2017
Hi Stewart,
On 07/07/2017 02:23 PM, Stewart Smith wrote:
> Shilpasri G Bhat <shilpa.bhat at linux.vnet.ibm.com> writes:
>> +static inline bool occ_in_progress(struct cmd_interface *chip)
>> +{
>> + return (chip->rsp->flag == OCC_IN_PROGRESS);
>> +}
>> +
>> +static int write_occ_cmd(struct cmd_interface *chip, bool retry)
>> +{
>> + struct opal_command_buffer *cmd = chip->cmd;
>> + struct opal_occ_cmd_rsp_msg *msg = chip->msg;
>> +
>> + if (!retry && occ_in_progress(chip)) {
>> + chip->cmd_in_progress = false;
>> + return OPAL_OCC_BUSY;
>> + }
>> +
>> + cmd->flag = chip->rsp->flag = 0;
>> + cmd->cmd = occ_cmds[msg->cmd].value;
>> + cmd->request_id = msg->request_id;
>> + cmd->data_size = msg->cdata_size;
>> + memcpy(&cmd->data, (u8 *)msg->cdata, msg->cdata_size);
>> + cmd->flag = OPAL_CMD_READY;
>> +
>> + schedule_timer(&chip->timeout,
>> + msecs_to_tb(occ_cmds[msg->cmd].timeout_ms));
>> + chip->prev_cmd = msg->cmd;
>> +
>> + return OPAL_ASYNC_COMPLETION;
>> +}
>> +
>> +static int64_t opal_occ_command(int chip_id, struct opal_occ_cmd_rsp_msg *msg,
>> + int token, bool retry)
>
> It looks like retry is not actually retry, but rather 'return if busy,
> don't queue' as in write_occ_cmd that's the logic.
If kernel is retrying the same command due to a command-timeout in the previous
try, then don't check the OCC status flag as OCC might not have been able to
update the status flag. 'retry' flag is used to indicate that kernel is retrying
the command.
Do you want 'retry' state to be stored in OPAL ?
>
>> +{
>> + struct cmd_interface *chip;
>> + int rc;
>> +
>> + if (!msg || !opal_addr_valid((u8 *)msg->cdata) ||
>> + !opal_addr_valid((u8 *)msg->rdata))
>> + return OPAL_PARAMETER;
>
> no check if msg is valid addr?
Yeah will add the check for 'msg'.
>
>> +
>> + if (msg->cmd >= OCC_CMD_LAST)
>> + return OPAL_UNSUPPORTED;
>> +
>> + if (msg->cdata_size > MAX_OPAL_CMD_DATA_LENGTH)
>> + return OPAL_PARAMETER;
>> +
>> + chip = get_chip_cmd_interface(chip_id);
>> + if (!chip)
>> + return OPAL_PARAMETER;
>> +
>> + if (retry && msg->cmd != chip->prev_cmd)
>> + return OPAL_PARAMETER;
>> +
>> + if (!(PPC_BIT8(chip->occ_role) & occ_cmds[msg->cmd].role_mask))
>> + return OPAL_PARAMETER;
>> +
>> + if (!(PPC_BIT16(*chip->occ_state) & occ_cmds[msg->cmd].state_mask))
>> + return OPAL_OCC_INVALID_STATE;
>> +
>> + lock(&chip->queue_lock);
>> + if (chip->cmd_in_progress) {
>> + rc = OPAL_OCC_BUSY;
>> + goto out;
>> + }
>> +
>> + chip->msg = msg;
>> + chip->token = token;
>> + chip->cmd_in_progress = true;
>> + rc = write_occ_cmd(chip, retry);
>> +out:
>> + unlock(&chip->queue_lock);
>> + return rc;
>> +}
>> +
>> +static inline bool sanity_check_opal_cmd(struct opal_command_buffer *cmd,
>> + struct opal_occ_cmd_rsp_msg *msg)
>> +{
>> + return ((cmd->cmd == occ_cmds[msg->cmd].value) &&
>> + (cmd->request_id == msg->request_id) &&
>> + (cmd->data_size == msg->cdata_size));
>> +}
>> +
>> +static inline bool check_occ_rsp(struct opal_command_buffer *cmd,
>> + struct occ_response_buffer *rsp)
>> +{
>> + if (cmd->cmd != rsp->cmd) {
>> + prlog(PR_WARNING, "OCC: Command value mismatch in OCC response"
>> + "rsp->cmd = %d cmd->cmd = %d\n", rsp->cmd, cmd->cmd);
>> + return false;
>> + }
>> +
>> + if (cmd->request_id != rsp->request_id) {
>> + prlog(PR_WARNING, "OCC: Request ID mismatch in OCC response"
>> + "rsp->request_id = %d cmd->request_id = %d\n",
>> + rsp->request_id, cmd->request_id);
>> + return false;
>> + }
>> +
>> + return true;
>> +}
>> +
>> +static inline void queue_occ_rsp_msg(int token, int rc)
>> +{
>> + int ret;
>> +
>> + ret = opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL, token, rc);
>> + if (ret)
>> + prerror("OCC: Failed to queue OCC response status message\n");
>> +}
>> +
>> +static void occ_cmd_timeout_handler(struct timer *t __unused, void *data,
>> + uint64_t now __unused)
>> +{
>> + struct cmd_interface *chip = data;
>> +
>> + lock(&chip->queue_lock);
>> + if (!chip->cmd_in_progress)
>> + goto exit;
>> +
>> + chip->cmd_in_progress = false;
>> + queue_occ_rsp_msg(chip->token, OPAL_OCC_CMD_TIMEOUT);
>> +exit:
>> + unlock(&chip->queue_lock);
>> +}
>> +
>> +static void read_occ_rsp(struct occ_response_buffer *rsp,
>> + struct opal_occ_cmd_rsp_msg *msg)
>> +{
>> + /* Copy response to host buffer */
>> + msg->status = rsp->status;
>> + msg->rdata_size = rsp->data_size;
>> + memcpy((u8 *)msg->rdata, rsp->data, rsp->data_size);
>> +
>> + /* Clear the OCC response flag */
>> + rsp->flag = 0;
>> +}
>> +
>> +static void handle_occ_rsp(uint32_t chip_id)
>> +{
>> + struct cmd_interface *chip;
>> + struct opal_command_buffer *cmd;
>> + struct occ_response_buffer *rsp;
>> + struct opal_occ_cmd_rsp_msg *msg;
>> +
>> + chip = get_chip_cmd_interface(chip_id);
>> + if (!chip)
>> + return;
>> +
>> + cmd = chip->cmd;
>> + rsp = chip->rsp;
>> + msg = chip->msg;
>> +
>> + /*Read rsp*/
>> + if (rsp->flag != OCC_RSP_READY)
>> + return;
>> + lock(&chip->queue_lock);
>> + if (!chip->cmd_in_progress)
>> + goto exit;
>> +
>> + chip->cmd_in_progress = false;
>> + cancel_timer(&chip->timeout);
>> + if (!sanity_check_opal_cmd(cmd, chip->msg) ||
>> + !check_occ_rsp(cmd, rsp)) {
>> + queue_occ_rsp_msg(chip->token, OPAL_OCC_CMD_MISMATCH);
>> + goto exit;
>> + }
>> +
>> + read_occ_rsp(chip->rsp, msg);
>> + queue_occ_rsp_msg(chip->token, OPAL_SUCCESS);
>> +exit:
>> + unlock(&chip->queue_lock);
>> +}
>> +
>> +static void occ_cmd_interface_init(void)
>> +{
>> + struct dt_node *power_mgt;
>> + struct occ_dynamic_data *data;
>> + struct occ_pstate_table *pdata;
>> + struct proc_chip *chip;
>> + int i = 0;
>> +
>> + chip = next_chip(NULL);
>> + pdata = get_occ_pstate_table(chip);
>> + if (pdata->version != 0x90)
>> + return;
>> +
>> + for_each_chip(chip)
>> + nr_occs++;
>> +
>> + chips = malloc(sizeof(*chips) * nr_occs);
>> + assert(chips);
>> +
>> + for_each_chip(chip) {
>> + pdata = get_occ_pstate_table(chip);
>> + data = get_occ_dynamic_data(chip);
>> + chips[i].id = chip->id;
>> + chips[i].occ_role = pdata->v9.occ_role;
>> + chips[i].occ_state = &data->occ_state;
>> + chips[i].cmd = &data->cmd;
>> + chips[i].rsp = &data->rsp;
>> + init_lock(&chips[i].queue_lock);
>> + chips[i].cmd_in_progress = false;
>> + chips[i].prev_cmd = OCC_CMD_LAST;
>> + init_timer(&chips[i].timeout, occ_cmd_timeout_handler,
>> + &chips[i]);
>> + i++;
>> + }
>> +
>> + power_mgt = dt_find_by_path(dt_root, "/ibm,opal/power-mgt");
>> + if (!power_mgt) {
>> + prerror("OCC: dt node /ibm,opal/power-mgt not found\n");
>> + free(chips);
>> + return;
>> + }
>> +
>> + update_max_async_completions(nr_occs);
>> + alloc_opal_msg_list(nr_occs);
>> + dt_add_property_string(power_mgt, "compatible",
>> + "ibm,opal-occ-cmd-rsp-interface");
>> + opal_register(OPAL_OCC_COMMAND, opal_occ_command, 4);
>
> This means you need to add documentation to doc/opal-api/ describing the
> API.
>
>
Okay.
>> @@ -910,6 +1314,9 @@ void occ_pstates_init(void)
>> chip->throttle = 0;
>> opal_add_poller(occ_throttle_poll, NULL);
>> occ_pstates_initialized = true;
>> +
>> + /* Init OPAL-OCC command-response interface */
>> + occ_cmd_interface_init();
>> }
>>
>> struct occ_load_req {
>> @@ -1409,8 +1816,10 @@ void occ_p9_interrupt(uint32_t chip_id)
>> if (ireg & OCB_OCI_OCIMISC_IRQ_TMGT)
>> prd_tmgt_interrupt(chip_id);
>>
>> - if (ireg & OCB_OCI_OCIMISC_IRQ_SHMEM)
>> + if (ireg & OCB_OCI_OCIMISC_IRQ_SHMEM) {
>> occ_throttle_poll(NULL);
>> + handle_occ_rsp(chip_id);
>> + }
>>
>> if (ireg & OCB_OCI_OCIMISC_IRQ_I2C)
>> p9_i2c_bus_owner_change(chip_id);
>> @@ -1435,5 +1844,3 @@ void occ_fsp_init(void)
>> if (fsp_present())
>> fsp_register_client(&fsp_occ_client, FSP_MCLASS_OCC);
>> }
>> -
>> -
>> diff --git a/include/opal-api.h b/include/opal-api.h
>> index edae069..dd92cf9 100644
>> --- a/include/opal-api.h
>> +++ b/include/opal-api.h
>> @@ -55,6 +55,10 @@
>> #define OPAL_XSCOM_CTR_OFFLINED -30
>> #define OPAL_XIVE_PROVISIONING -31
>> #define OPAL_XIVE_FREE_ACTIVE -32
>> +#define OPAL_OCC_INVALID_STATE -33
>
> Instead of adding new return codes, I think existing ones suffice, or is
> there a really big need to have a way to distinguish these errors from
> other ones? e.g. insetad of OPAL_OCC_INVALID_STATE, it could be
> OPAL_WRONG_STATE - and this is consistent with other OPAL calls.
>
>> +#define OPAL_OCC_BUSY -34
>
> OPAL_BUSY
Yeah OPAL_BUSY works.
>
>> +#define OPAL_OCC_CMD_TIMEOUT -35
>
> We probably want an OPAL_TIMEOUT actually... we have OPAL_I2C_TIMEOUT
> and OPAL_XSCOM_TIMEOUT
>
>> +#define OPAL_OCC_CMD_MISMATCH -36
>
> OPAL_PARAMETER
>
> Does that make sense? Any reason not to do that?
>
OPAL_OCC_CMD_TIMEOUT and OPAL_OCC_CMD_MISMATCH is used by kerenl to retry the
OCC command. As OPAL_PARAMETER is also used for the sanity checks of the
parameters passed to opal_call, kernel wont be able to distinguish when to retry.
>>
>> /* API Tokens (in r0) */
>> #define OPAL_INVALID_CALL -1
>> @@ -207,7 +211,8 @@
>> #define OPAL_IMC_COUNTERS_INIT 149
>> #define OPAL_IMC_COUNTERS_START 150
>> #define OPAL_IMC_COUNTERS_STOP 151
>> -#define OPAL_LAST 151
>> +#define OPAL_OCC_COMMAND 152
>> +#define OPAL_LAST 152
>>
>> /* Device tree flags */
>>
>> @@ -1033,6 +1038,81 @@ typedef struct oppanel_line {
>> __be64 line_len;
>> } oppanel_line_t;
>>
>> +enum occ_cmd {
>> + OCC_CMD_AMESTER_PASS_THRU = 0,
>
> What is amester passthru for? Do we need this as part of our ABI?
This command will be used by Amester for tracing the sensor values at 250us/2ms
sampling rate. In P8, Amester used IPMI for tracing. In P9, Amester will be able
to communicate to OCC inband using this cmd/rsp interface.
Yeah we need this command to support inband Amester tracing.
>
>> + OCC_CMD_CLEAR_SENSOR_DATA,
>
> Clear what sensors?
Min-max values of sensors stored for different users like CSM, Profiler and
Job-scheduler.
How about OCC_CMD_CLEAR_SENSOR_MIN_MAX instead?
>
>> + OCC_CMD_SET_POWER_CAP,
>
> Power cap for what? System? Socket?
System.
>
>> + OCC_CMD_SET_POWER_SHIFTING_RATIO,
>> + OCC_CMD_SELECT_SENSOR_GROUPS,
>
> Basically, this all needs to be documented somewhere, notably in
> doc/opal-api/
Okay.
>
> I also wonder if this approach is the best for the OPAL API. While we do
> *currently* have a thing called an OCC that does these functions, is
> that *always* going to be the case? Elsewhere in firmware we've been
> going down the route of calling things "pm_complex" and sending commands
> to/from the pm_complex to do tasks for us.
>
> would it be better if our OPAL API instead described the intended effect
> rather than how to achieve said effect?
>
> I wouldn't be opposed to an OPAL_SET_POWER_CAP call, that's an async
> call, and accepts parameters in a range that's described in the device
> tree. That call could be implemented on P8 too (where you currently set
> power caps by doing crazy IPMI incantations to the BMC), and that would
> prove that the OPAL API is implementation independent, and we should be
> future proof with however we do the intent of capping
> system/socket/other-thing power in the future.
I agree with defining an abstract API for setting powercap as a better approach.
>
> How do the sensors work? Do they fit into the existing OPAL Sensors
> infrastructure? are they more in-memory sensors? I ask as perhaps the
> select/clear calls should be part of either the regular opal sensors API
> or the IMC api rather than their own API.
Currently, only in-memory environment sensors are exported in the OPAL sensor
infrastructure. Yes there are more perf/util/freq in-memory sensors that are not
exported.
The current sensor framework provides API to only read the sensor values. We
will need new API additions to the sensor framework then to clear min/max of
in-memory sensors and select the sensor groups to be copied to main memory.
So should we have new opal-calls defined for these sensor operations instead of
the OCC cmd/rsp interface?
Thanks and Regards,
Shilpa
>> --
>> 1.8.3.1
>>
>> _______________________________________________
>> Skiboot mailing list
>> Skiboot at lists.ozlabs.org
>> https://lists.ozlabs.org/listinfo/skiboot
>
More information about the Skiboot
mailing list