[Skiboot] [PATCH v2 3/3] occ: Add support for GPU presence detection
Shilpasri G Bhat
shilpa.bhat at linux.vnet.ibm.com
Wed Jun 20 15:40:28 AEST 2018
On 06/20/2018 09:19 AM, Andrew Donnellan wrote:
> On the Witherspoon platform, we need to distinguish between NVLink GPUs and
> OpenCAPI accelerators. In order to do this, we first need to find out
> whether the SXM2 socket is populated.
>
> On Witherspoon, the SXM2 socket's presence detection pin is only visible
> via I2C from the APSS, and thus can only be exposed to the host via the
> OCC. The OCC, per OCC Firmware Interface Specification for POWER9 version
> 0.22, now exposes this to skiboot through a field in the dynamic data
> shared memory.
>
> Add the necessary dynamic data changes required to read the version and
> GPU presence fields. Add a function, occ_get_gpu_presence(), that can be
> used to check GPU presence.
>
> If the OCC isn't reporting presence (old OCC firmware, or some other
> reason), we default to assuming there is a device present and wait until
> link training to fail.
>
> This will be used in later patches to fix up the NPU2 probe path for
> OpenCAPI support on Witherspoon.
>
> Signed-off-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
Reviewed-by: Shilpasri G Bhat <shilpa.bhat at linux.vnet.ibm.com>
> ---
> hw/occ.c | 23 ++++++++++++++++++++---
> include/occ.h | 4 ++++
> 2 files changed, 24 insertions(+), 3 deletions(-)
>
> diff --git a/hw/occ.c b/hw/occ.c
> index fc95d3926bb0..10b2de07dd7a 100644
> --- a/hw/occ.c
> +++ b/hw/occ.c
> @@ -229,10 +229,10 @@ struct occ_response_buffer {
> */
> struct occ_dynamic_data {
> u8 occ_state;
> + u8 major_version;
> + u8 minor_version;
> + u8 gpus_present;
> u8 spare1;
> - u8 spare2;
> - u8 spare3;
> - u8 spare4;
> u8 cpu_throttle;
> u8 mem_throttle;
> u8 quick_pwr_drop;
> @@ -1230,6 +1230,23 @@ exit:
> unlock(&chip->queue_lock);
> }
>
> +bool occ_get_gpu_presence(struct proc_chip *chip, int gpu_num)
> +{
> + struct occ_dynamic_data *ddata;
> +
> + assert(gpu_num <= 2);
> +
> + ddata = get_occ_dynamic_data(chip);
> +
> + if (ddata->major_version != 0 || ddata->minor_version < 1) {
> + prlog(PR_INFO, "OCC: OCC not reporting GPU slot presence, "
> + "assuming device is present\n");
> + return true;
> + }
> +
> + return (bool)(ddata->gpus_present & 1 << gpu_num);
> +}
> +
> static void occ_add_powercap_sensors(struct dt_node *power_mgt);
> static void occ_add_psr_sensors(struct dt_node *power_mgt);
>
> diff --git a/include/occ.h b/include/occ.h
> index c9faef9fdfb8..a46b9219fc70 100644
> --- a/include/occ.h
> +++ b/include/occ.h
> @@ -14,6 +14,8 @@
> * limitations under the License.
> */
>
> +#include <chip.h>
> +
> /* OCC Functions */
>
> extern void occ_pstates_init(void);
> @@ -36,6 +38,8 @@ enum pnor_owner {
> };
> extern void occ_pnor_set_owner(enum pnor_owner owner);
>
> +/* GPU presence detection */
> +bool occ_get_gpu_presence(struct proc_chip *chip, int gpu_num);
>
> /* OCC Inband Sensors */
> extern bool occ_sensors_init(void);
>
More information about the Skiboot
mailing list