[PATCH v2 13/15] hwmon: peci: Add dimmtemp driver

Guenter Roeck linux at roeck-us.net
Thu Aug 5 03:33:35 AEST 2021


On 8/4/21 3:46 AM, Winiarska, Iwona wrote:
> On Tue, 2021-08-03 at 08:39 -0700, Guenter Roeck wrote:
>> On Tue, Aug 03, 2021 at 01:31:32PM +0200, Iwona Winiarska wrote:
>>> Add peci-dimmtemp driver for Temperature Sensor on DIMM readings that
>>> are accessible via the processor PECI interface.
>>>
>>> The main use case for the driver (and PECI interface) is out-of-band
>>> management, where we're able to obtain thermal readings from an external
>>> entity connected with PECI, e.g. BMC on server platforms.
>>>
>>> Co-developed-by: Jae Hyun Yoo <jae.hyun.yoo at linux.intel.com>
>>> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo at linux.intel.com>
>>> Signed-off-by: Iwona Winiarska <iwona.winiarska at intel.com>
>>> Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart at linux.intel.com>
>>> ---
>>> Note that the timeout was completely removed - we're going to probe
>>> for detected DIMMs every 5 seconds until we reach "stable" state of
>>> either getting correct DIMM data or getting all -EINVAL (which
>>> suggest that the CPU doesn't have any DIMMs).
>>>
>>>   drivers/hwmon/peci/Kconfig    |  13 +
>>>   drivers/hwmon/peci/Makefile   |   2 +
>>>   drivers/hwmon/peci/dimmtemp.c | 614 ++++++++++++++++++++++++++++++++++
>>>   3 files changed, 629 insertions(+)
>>>   create mode 100644 drivers/hwmon/peci/dimmtemp.c
>>>
>>> diff --git a/drivers/hwmon/peci/Kconfig b/drivers/hwmon/peci/Kconfig
>>> index e10eed68d70a..9d32a57badfe 100644
>>> --- a/drivers/hwmon/peci/Kconfig
>>> +++ b/drivers/hwmon/peci/Kconfig
>>> @@ -14,5 +14,18 @@ config SENSORS_PECI_CPUTEMP
>>>            This driver can also be built as a module. If so, the module
>>>            will be called peci-cputemp.
>>>   
>>> +config SENSORS_PECI_DIMMTEMP
>>> +       tristate "PECI DIMM temperature monitoring client"
>>> +       depends on PECI
>>> +       select SENSORS_PECI
>>> +       select PECI_CPU
>>> +       help
>>> +         If you say yes here you get support for the generic Intel PECI
>>> hwmon
>>> +         driver which provides Temperature Sensor on DIMM readings that are
>>> +         accessible via the processor PECI interface.
>>> +
>>> +         This driver can also be built as a module. If so, the module
>>> +         will be called peci-dimmtemp.
>>> +
>>>   config SENSORS_PECI
>>>          tristate
>>> diff --git a/drivers/hwmon/peci/Makefile b/drivers/hwmon/peci/Makefile
>>> index e8a0ada5ab1f..191cfa0227f3 100644
>>> --- a/drivers/hwmon/peci/Makefile
>>> +++ b/drivers/hwmon/peci/Makefile
>>> @@ -1,5 +1,7 @@
>>>   # SPDX-License-Identifier: GPL-2.0-only
>>>   
>>>   peci-cputemp-y := cputemp.o
>>> +peci-dimmtemp-y := dimmtemp.o
>>>   
>>>   obj-$(CONFIG_SENSORS_PECI_CPUTEMP)     += peci-cputemp.o
>>> +obj-$(CONFIG_SENSORS_PECI_DIMMTEMP)    += peci-dimmtemp.o
>>> diff --git a/drivers/hwmon/peci/dimmtemp.c b/drivers/hwmon/peci/dimmtemp.c
>>> new file mode 100644
>>> index 000000000000..6264c29bb6c0
>>> --- /dev/null
>>> +++ b/drivers/hwmon/peci/dimmtemp.c
>>> @@ -0,0 +1,614 @@
>>> +// SPDX-License-Identifier: GPL-2.0-only
>>> +// Copyright (c) 2018-2021 Intel Corporation
>>> +
>>> +#include <linux/auxiliary_bus.h>
>>> +#include <linux/bitfield.h>
>>> +#include <linux/bitops.h>
>>> +#include <linux/hwmon.h>
>>> +#include <linux/jiffies.h>
>>> +#include <linux/module.h>
>>> +#include <linux/peci.h>
>>> +#include <linux/peci-cpu.h>
>>> +#include <linux/units.h>
>>> +#include <linux/workqueue.h>
>>> +#include <linux/x86/intel-family.h>
>>> +
>>> +#include "common.h"
>>> +
>>> +#define DIMM_MASK_CHECK_DELAY_JIFFIES  msecs_to_jiffies(5000)
>>> +
>>> +/* Max number of channel ranks and DIMM index per channel */
>>> +#define CHAN_RANK_MAX_ON_HSX   8
>>> +#define DIMM_IDX_MAX_ON_HSX    3
>>> +#define CHAN_RANK_MAX_ON_BDX   4
>>> +#define DIMM_IDX_MAX_ON_BDX    3
>>> +#define CHAN_RANK_MAX_ON_BDXD  2
>>> +#define DIMM_IDX_MAX_ON_BDXD   2
>>> +#define CHAN_RANK_MAX_ON_SKX   6
>>> +#define DIMM_IDX_MAX_ON_SKX    2
>>> +#define CHAN_RANK_MAX_ON_ICX   8
>>> +#define DIMM_IDX_MAX_ON_ICX    2
>>> +#define CHAN_RANK_MAX_ON_ICXD  4
>>> +#define DIMM_IDX_MAX_ON_ICXD   2
>>> +
>>> +#define CHAN_RANK_MAX          CHAN_RANK_MAX_ON_HSX
>>> +#define DIMM_IDX_MAX           DIMM_IDX_MAX_ON_HSX
>>> +#define DIMM_NUMS_MAX          (CHAN_RANK_MAX * DIMM_IDX_MAX)
>>> +
>>> +#define CPU_SEG_MASK           GENMASK(23, 16)
>>> +#define GET_CPU_SEG(x)         (((x) & CPU_SEG_MASK) >> 16)
>>> +#define CPU_BUS_MASK           GENMASK(7, 0)
>>> +#define GET_CPU_BUS(x)         ((x) & CPU_BUS_MASK)
>>> +
>>> +#define DIMM_TEMP_MAX          GENMASK(15, 8)
>>> +#define DIMM_TEMP_CRIT         GENMASK(23, 16)
>>> +#define GET_TEMP_MAX(x)                (((x) & DIMM_TEMP_MAX) >> 8)
>>> +#define GET_TEMP_CRIT(x)       (((x) & DIMM_TEMP_CRIT) >> 16)
>>> +
>>> +struct peci_dimmtemp;
>>> +
>>> +struct dimm_info {
>>> +       int chan_rank_max;
>>> +       int dimm_idx_max;
>>> +       u8 min_peci_revision;
>>> +       int (*read_thresholds)(struct peci_dimmtemp *priv, int dimm_order,
>>> +                              int chan_rank, u32 *data);
>>> +};
>>> +
>>> +struct peci_dimm_thresholds {
>>> +       long temp_max;
>>> +       long temp_crit;
>>> +       struct peci_sensor_state state;
>>> +};
>>> +
>>> +enum peci_dimm_threshold_type {
>>> +       temp_max_type,
>>> +       temp_crit_type,
>>> +};
>>> +
>>> +struct peci_dimmtemp {
>>> +       struct peci_device *peci_dev;
>>> +       struct device *dev;
>>> +       const char *name;
>>> +       const struct dimm_info *gen_info;
>>> +       struct delayed_work detect_work;
>>> +       struct {
>>> +               struct peci_sensor_data temp;
>>> +               struct peci_dimm_thresholds thresholds;
>>> +       } dimm[DIMM_NUMS_MAX];
>>> +       char **dimmtemp_label;
>>> +       DECLARE_BITMAP(dimm_mask, DIMM_NUMS_MAX);
>>> +};
>>> +
>>> +static u8 __dimm_temp(u32 reg, int dimm_order)
>>> +{
>>> +       return (reg >> (dimm_order * 8)) & 0xff;
>>> +}
>>> +
>>> +static int get_dimm_temp(struct peci_dimmtemp *priv, int dimm_no, long
>>> *val)
>>> +{
>>> +       int dimm_order = dimm_no % priv->gen_info->dimm_idx_max;
>>> +       int chan_rank = dimm_no / priv->gen_info->dimm_idx_max;
>>> +       u32 data;
>>> +       int ret;
>>
>>          int ret = 0;
>>
>>> +
>>> +       mutex_lock(&priv->dimm[dimm_no].temp.state.lock);
>>> +       if (!peci_sensor_need_update(&priv->dimm[dimm_no].temp.state))
>>> +               goto skip_update;
>>> +
>>> +       ret = peci_pcs_read(priv->peci_dev, PECI_PCS_DDR_DIMM_TEMP,
>>> chan_rank, &data);
>>> +       if (ret) {
>>> +               mutex_unlock(&priv->dimm[dimm_no].temp.state.lock);
>>> +               return ret;
>>> +       }
>>
>>          if (ret)
>>                  goto unlock;
>>
>>> +
>>> +       priv->dimm[dimm_no].temp.value = __dimm_temp(data, dimm_order) *
>>> MILLIDEGREE_PER_DEGREE;
>>> +
>>> +       peci_sensor_mark_updated(&priv->dimm[dimm_no].temp.state);
>>> +
>>> +skip_update:
>>> +       *val = priv->dimm[dimm_no].temp.value;
>>
>> unlock:
>>> +       mutex_unlock(&priv->dimm[dimm_no].temp.state.lock);
>>> +       return 0;
>>
>>          return ret;
> 
> Ack.
> 
>>
>>> +}
>>> +
>>> +static int update_thresholds(struct peci_dimmtemp *priv, int dimm_no)
>>> +{
>>> +       int dimm_order = dimm_no % priv->gen_info->dimm_idx_max;
>>> +       int chan_rank = dimm_no / priv->gen_info->dimm_idx_max;
>>> +       u32 data;
>>> +       int ret;
>>> +
>>> +       if (!peci_sensor_need_update(&priv->dimm[dimm_no].thresholds.state))
>>> +               return 0;
>>> +
>>> +       ret = priv->gen_info->read_thresholds(priv, dimm_order, chan_rank,
>>> &data);
>>> +       if (ret == -ENODATA) /* Use default or previous value */
>>> +               return 0;
>>> +       if (ret)
>>> +               return ret;
>>> +
>>> +       priv->dimm[dimm_no].thresholds.temp_max = GET_TEMP_MAX(data) *
>>> MILLIDEGREE_PER_DEGREE;
>>> +       priv->dimm[dimm_no].thresholds.temp_crit = GET_TEMP_CRIT(data) *
>>> MILLIDEGREE_PER_DEGREE;
>>> +
>>> +       peci_sensor_mark_updated(&priv->dimm[dimm_no].thresholds.state);
>>> +
>>> +       return 0;
>>> +}
>>> +
>>> +static int get_dimm_thresholds(struct peci_dimmtemp *priv, enum
>>> peci_dimm_threshold_type type,
>>> +                              int dimm_no, long *val)
>>> +{
>>> +       int ret;
>>> +
>>> +       mutex_lock(&priv->dimm[dimm_no].thresholds.state.lock);
>>> +       ret = update_thresholds(priv, dimm_no);
>>> +       if (ret)
>>> +               goto unlock;
>>> +
>>> +       switch (type) {
>>> +       case temp_max_type:
>>> +               *val = priv->dimm[dimm_no].thresholds.temp_max;
>>> +               break;
>>> +       case temp_crit_type:
>>> +               *val = priv->dimm[dimm_no].thresholds.temp_crit;
>>> +               break;
>>> +       default:
>>> +               ret = -EOPNOTSUPP;
>>> +               break;
>>> +       }
>>> +unlock:
>>> +       mutex_unlock(&priv->dimm[dimm_no].thresholds.state.lock);
>>> +
>>> +       return ret;
>>> +}
>>> +
>>> +static int dimmtemp_read_string(struct device *dev,
>>> +                               enum hwmon_sensor_types type,
>>> +                               u32 attr, int channel, const char **str)
>>> +{
>>> +       struct peci_dimmtemp *priv = dev_get_drvdata(dev);
>>> +
>>> +       if (attr != hwmon_temp_label)
>>> +               return -EOPNOTSUPP;
>>> +
>>> +       *str = (const char *)priv->dimmtemp_label[channel];
>>> +
>>> +       return 0;
>>> +}
>>> +
>>> +static int dimmtemp_read(struct device *dev, enum hwmon_sensor_types type,
>>> +                        u32 attr, int channel, long *val)
>>> +{
>>> +       struct peci_dimmtemp *priv = dev_get_drvdata(dev);
>>> +
>>> +       switch (attr) {
>>> +       case hwmon_temp_input:
>>> +               return get_dimm_temp(priv, channel, val);
>>> +       case hwmon_temp_max:
>>> +               return get_dimm_thresholds(priv, temp_max_type, channel,
>>> val);
>>> +       case hwmon_temp_crit:
>>> +               return get_dimm_thresholds(priv, temp_crit_type, channel,
>>> val);
>>> +       default:
>>> +               break;
>>> +       }
>>> +
>>> +       return -EOPNOTSUPP;
>>> +}
>>> +
>>> +static umode_t dimmtemp_is_visible(const void *data, enum
>>> hwmon_sensor_types type,
>>> +                                  u32 attr, int channel)
>>> +{
>>> +       const struct peci_dimmtemp *priv = data;
>>> +
>>> +       if (test_bit(channel, priv->dimm_mask))
>>> +               return 0444;
>>> +
>>> +       return 0;
>>> +}
>>> +
>>> +static const struct hwmon_ops peci_dimmtemp_ops = {
>>> +       .is_visible = dimmtemp_is_visible,
>>> +       .read_string = dimmtemp_read_string,
>>> +       .read = dimmtemp_read,
>>> +};
>>> +
>>> +static int check_populated_dimms(struct peci_dimmtemp *priv)
>>> +{
>>> +       int chan_rank_max = priv->gen_info->chan_rank_max;
>>> +       int dimm_idx_max = priv->gen_info->dimm_idx_max;
>>> +       u32 chan_rank_empty = 0;
>>> +       u64 dimm_mask = 0;
>>> +       int chan_rank, dimm_idx, ret;
>>> +       u32 pcs;
>>> +
>>> +       BUILD_BUG_ON(CHAN_RANK_MAX > 32);
>>> +       BUILD_BUG_ON(DIMM_NUMS_MAX > 64);
>>
>> I don't immediately see the value of those build bugs. What happens if
>> CHAN_RANK_MAX > 32 or DIMM_NUMS_MAX > 64 ? Where do those limits come
>> from ?
> 
> Supported HW doesn't come near the limit for now - it's just an "artificial"
> limit imposed by variables we're using (u64 for dimm_mask and u32 for
> chan_rank_empty).
> 

Please use a value derived from the size of those variables for the check
to clarify and explain the constraints.

Thanks,
Guenter


More information about the openbmc mailing list