[Skiboot] [PATCH v2] occ: Poll OCC throttle status and queue OCC events to host

Patrick Williams III iawillia at us.ibm.com
Tue May 12 21:11:01 AEST 2015


Doesn't hostboot runtime already perform this function?  Is there any
collision between opal modifying the occ queue while hostboot runtime is
functional?

Patrick Williams

> On May 12, 2015, at 12:31 AM, Stewart Smith <stewart at linux.vnet.ibm.com>
wrote:
>
> Shilpasri G Bhat <shilpa.bhat at linux.vnet.ibm.com> writes:
>> Add a new class of message definition OPAL_MSG_OCC to
>> opal_message_type to notify the following OCC events to host:
>> 1) OCC Reset
>> 2) OCC Load
>> 3) OCC Throttle Status Change
>>
>> Add an opal poller to periodically read throttle status updated by OCC
>> for each chip and notify any change in throttle status to host. The
>> throttle status indicates the reason why OCC may have limited the max
>> Pstate of the chip.
>
> Major comment is that you need to add something to
> doc/opal-api/opal-messages.txt
>
> Also include a description on how we may deal with future expansion
> (e.g. chip_id and throttle_status only valid for params[0] = 0,1,2 and
> if params is > 2 then rest of params is something defined in future OPAL
> version and host should just ignore the message)
>
> Do we *really* have to poll? That kind of sucks if so (and how do we get
> that fixed?)
>
>> diff --git a/hw/occ.c b/hw/occ.c
>> index 34d6de5..d346394 100644
>> --- a/hw/occ.c
>> +++ b/hw/occ.c
>> @@ -24,6 +24,8 @@
>> #include <timebase.h>
>> #include <hostservices.h>
>> #include <errorlog.h>
>> +#include <opal-api.h>
>> +#include <opal-msg.h>
>>
>> /* OCC Communication Area for PStates */
>>
>> @@ -31,6 +33,16 @@
>>
>> #define MAX_PSTATES 256
>>
>> +#define OCC_RESET    0
>> +#define OCC_LOAD    1
>> +#define OCC_THROTTLE    2
>
> Part of ABI? Please add to opal-api.h and doc/
>
>> +
>> +#define chip_occ_data(chip) \
>> +        ((struct occ_pstate_table *)(chip->homer_base + \
>> +                P8_HOMER_SAPPHIRE_DATA_OFFSET))
>> +
>> +static bool occ_reset;
>> +
>> struct occ_pstate_entry {
>>    s8 id;
>>    u8 flags;
>> @@ -302,6 +314,61 @@ static bool cpu_pstates_prepare_core(struct
proc_chip *chip, struct cpu_thread *
>>    return true;
>> }
>>
>> +/* occ_throttle_poll: This function will queue a meassage of type
>> + * OPAL_MSG_OCC to notify any change in the throttle status of the
>> + * chip. Throttle status indicates the reason why OCC may have limited
>> + * the max Pstate of the chip.
>> + * 0x00 = No throttle
>> + * 0x01 = Power Cap
>> + * 0x02 = Processor Over Temperature
>> + * 0x03 = Power Supply Failure (currently not used)
>> + * 0x04 = Over current (currently not used)
>> + * 0x05 = OCC Reset (not reliable as some failures will not allow for
>> + * OCC to update throttle status, so use 'occ_reset')
>
> Looks like these are part of ABI - please add to opal-api.h and document.
>
>> + */
>> +static void occ_throttle_poll(void *data __unused)
>> +{
>> +    struct proc_chip *chip;
>> +    struct occ_pstate_table *occ_data;
>> +    int rc;
>> +
>> +    if (occ_reset) {
>> +        int inactive = 0;
>> +
>> +        for_each_chip(chip) {
>> +            occ_data = chip_occ_data(chip);
>> +            if (occ_data->valid != 1) {
>> +                inactive = 1;
>> +                break;
>> +            }
>> +        }
>> +        if (!inactive) {
>> +            /*
>> +             * Queue OCC_THROTTLE with throttle status as 0 to
>> +             * indicate all OCCs are active after a reset.
>> +             */
>> +            rc = opal_queue_msg(OPAL_MSG_OCC, NULL, NULL,
>> +                        OCC_THROTTLE, 0, 0);
>> +            if (!rc)
>> +                occ_reset = false;
>> +        }
>> +    } else {
>> +        for_each_chip(chip) {
>> +            occ_data = chip_occ_data(chip);
>> +            if ((occ_data->valid == 1) &&
>> +                (chip->prev_throttle != occ_data->throttle) &&
>> +                (occ_data->throttle <= 5)) {
>> +                rc = opal_queue_msg(OPAL_MSG_OCC, NULL, NULL,
>> +                        OCC_THROTTLE, chip->id,
>> +                        occ_data->throttle);
>> +                if (!rc)
>> +                    chip->prev_throttle =
>> +                        occ_data->throttle;
>> +            }
>> +        }
>> +    }
>> +}
>> +
>> /* CPU-OCC PState init */
>> /* Called after OCC init on P8 */
>> void occ_pstates_init(void)
>> @@ -345,6 +412,11 @@ void occ_pstates_init(void)
>>            cpu_pstates_prepare_core(chip, c, pstate_nom);
>>        }
>>    }
>> +
>> +    /* Add opal_poller to poll OCC throttle status of each chip */
>> +    for_each_chip(chip)
>> +        chip->prev_throttle = 0;
>> +    opal_add_poller(occ_throttle_poll, NULL);
>> }
>>
>> struct occ_load_req {
>> @@ -386,6 +458,11 @@ static void __occ_do_load(u8 scope, u32 dbob_id
__unused, u32 seq_id)
>>        prlog(PR_INFO, "OCC: Load: Fallback to preloaded image\n");
>>        rc = 0;
>>    } else if (!rc) {
>> +        rc = opal_queue_msg(OPAL_MSG_OCC, NULL, NULL, OCC_LOAD);
>> +        if (rc)
>> +            prlog(PR_INFO, "OCC: Failed to queue message %d\n",
>> +                        OCC_LOAD);
>> +
>>        /* Success, start OCC */
>>        rc = host_services_occ_start();
>>    }
>> @@ -509,6 +586,24 @@ static void occ_do_reset(u8 scope, u32 dbob_id, u32
seq_id)
>>        rc = 0;
>>    }
>>    if (!rc) {
>> +        rc = opal_queue_msg(OPAL_MSG_OCC, NULL, NULL, OCC_RESET);
>> +        if (rc)
>> +            prlog(PR_INFO, "OCC: Failed to queue message %d\n",
>> +                        OCC_RESET);
>> +        /*
>> +         * Set 'valid' byte of chip_occ_data to 0 since OCC
>> +         * may not clear this byte on a reset.
>> +         * OCC will set the 'valid' byte to 1 when it becomes
>> +         * active again.
>> +         */
>> +        for_each_chip(chip) {
>> +            struct occ_pstate_table *occ_data;
>> +
>> +            occ_data = chip_occ_data(chip);
>> +            occ_data->valid = 0;
>> +            chip->prev_throttle = 0;
>> +        }
>> +        occ_reset = true;
>>        /* Send a single success response for all chips */
>>        stat = fsp_mkmsg(FSP_CMD_RESET_OCC_STAT, 2, 0, seq_id);
>>        if (stat)
>> diff --git a/include/chip.h b/include/chip.h
>> index 0547902..340fdfc 100644
>> --- a/include/chip.h
>> +++ b/include/chip.h
>> @@ -147,6 +147,7 @@ struct proc_chip {
>>    uint64_t        homer_size;
>>    uint64_t        occ_common_base;
>>    uint64_t        occ_common_size;
>> +    u8            prev_throttle;
>>
>>    /* Must hold capi_lock to change */
>>    u8            capp_phb3_attached_mask;
>> diff --git a/include/opal-api.h b/include/opal-api.h
>> index 1698311..abe798e 100644
>> --- a/include/opal-api.h
>> +++ b/include/opal-api.h
>> @@ -417,6 +417,13 @@ enum opal_msg_type {
>>    OPAL_MSG_HMI_EVT,
>>    OPAL_MSG_DPO,
>>    OPAL_MSG_PRD,
>> +    OPAL_MSG_OCC,            /*
>> +                     * params[0] =    0 reset,
>> +                     *        1 load,
>> +                     *        2 throttle
>> +                     * params[1] = chip_id,
>> +                     * params[2] = throttle_status
>> +                     */
>
> Perhaps define this in a struct/union somewhere too?
>
> _______________________________________________
> Skiboot mailing list
> Skiboot at lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/skiboot
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.ozlabs.org/pipermail/skiboot/attachments/20150512/b959ddb4/attachment-0001.html>


More information about the Skiboot mailing list