[Skiboot] [PATCH 2/2] dts: add DIMM temperature sensors

Benjamin Herrenschmidt benh at au1.ibm.com
Wed Jun 24 08:02:07 AEST 2015


On Tue, 2015-06-23 at 23:50 +0200, Cedric Le Goater wrote:
> On 06/23/2015 12:07 AM, Benjamin Herrenschmidt wrote:
> > On Mon, 2015-06-22 at 13:53 +0200, Cédric Le Goater wrote:
> >> The DIMM temperatures are stored in the Centaur's sensor cache. This 
> >> patch adds the necessary interface to read theses values and expose
> >> them to the host OS through the sensor framework of OPAL.
> > 
> > So you are reading the cache via SCOMs to the Centaur. Isn't the cache
> > also readable on the PowerBus using MMIOs ? AFAIK that's the path the 
> > OCC takes no ?
> 
> Yes. This is what the OCC does and it is quite complex ... Do you think 
> we should collect the Centaur sensor cache data through a PowerBus 
> mapping ? 

I think ideally we should get it from RAM where the OCC copies it after
reading it :-) I think Vaidy is working on that with the OCC folks.

> Are there some risks of collision using SCOM on the centaur ? I should 
> probably add an extra check to see if the cache is enabled or not when 
> reading the data or use centaur->scache_disable_count from your recent 
> patchset.

I don't know, but I don't think you care. When the cache is disabled it
means it's just not updated. This is meant to stop the i2c accesses so I
can do i2c accesses to the SPVPD but you can jsut continue reading the
old cache content.

The only thing that would be good is if you could write your code so
that it deals with OPAL_BUSY coming back from xscom_read/write meaning
"try again later" (and possibly escalate that all the way to Linux so
the delay can be done as a sleeping delay in Linux).

I might have to introduce recovery delays in the error handling for
XSCOM on FSI. I'll fix I2C (the other user).

> The OCC does collect a lot of sensor values which would be interesting
> to expose in OPAL, and Linux. But, maybe, using a table in SRAM and/or 
> extend the sapphire_table whould be more appropriate.  

Talk to Vaidy, he's been looking into this too.

Cheers,
Ben.

> Cheers,
> 
> C. 
> 
>  
> >> Signed-off-by: Cédric Le Goater <clg at fr.ibm.com>
> >> ---
> >>  hw/dts.c |  149 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >>  1 file changed, 149 insertions(+)
> >>
> >> Index: skiboot.git/hw/dts.c
> >> ===================================================================
> >> --- skiboot.git.orig/hw/dts.c
> >> +++ skiboot.git/hw/dts.c
> >> @@ -42,6 +42,7 @@ struct dts {
> >>  	uint8_t		valid;
> >>  	uint8_t		trip;
> >>  	int16_t		temp;
> >> +	int8_t		status;
> >>  };
> >>  
> >>  /* Different sensor locations */
> >> @@ -209,6 +210,98 @@ static int dts_read_core_temp(uint32_t p
> >>  	return rc;
> >>  }
> >>  
> >> +/*
> >> + * Centaur Sensor Cache Registers
> >> + */
> >> +#define CENTAUR_SCAC_DATA0_3	0x020115CA
> >> +#define CENTAUR_SCAC_DATA4_7	0x020115CB
> >> +#define CENTAUR_SCAC_ENABLE	0x020115CC
> >> +#define CENTAUR_SCAC_LFIR	0x020115C0
> >> +
> >> +enum centaur_scac_status {
> >> +	SCAC_STATUS_STALLED_OR_DISABLED	= 0x0, /* should check LFIR */
> >> +	SCAC_STATUS_ERROR		= 0x1, /* should check LFIR */
> >> +	SCAC_STATUS_VALID_READING	= 0x2,
> >> +	SCAC_STATUS_VALID_READING_NEW	= 0x3  /* First read */
> >> +};
> >> +
> >> +/*
> >> + * 0		Critical Trip Alarm
> >> + * 1		Above Window Alarm
> >> + * 2		Below Window Alarm
> >> + * 3		Temperature Sign Bit
> >> + * [4:13]	Temperature = A(4:11) + B(12:13) (Celsius)
> >> + * [14:15]	Status Indicator (see enum centaur_scac_status)
> >> + */
> >> +static void dts_decode_one_dimm_dts(uint16_t raw, struct dts *dts)
> >> +{
> >> +	uint8_t sign;
> >> +
> >> +	dts->status = raw & 0x3;
> >> +	dts->valid  = raw & 0x2;
> >> +	dts->temp   = ((raw >> 4) & 0xff) + ((raw >> 2) & 0x3);
> >> +	dts->trip   = (raw >> 13) & 0x7;
> >> +
> >> +	sign = (raw >> 12) & 0x1;
> >> +	if (sign)
> >> +		dts->temp *= -1;
> >> +}
> >> +
> >> +#define MAX_DIMMS 8
> >> +
> >> +static const uint64_t scac_data_addrs[MAX_DIMMS] = {
> >> +	[0 ... 3] = CENTAUR_SCAC_DATA0_3,
> >> +	[4 ... 7] = CENTAUR_SCAC_DATA4_7
> >> +};
> >> +
> >> +static int dts_read_dimm_temp(uint32_t chip_id, uint8_t dimm_id,
> >> +			      struct dts *dts)
> >> +{
> >> +	uint64_t data;
> >> +	uint64_t enable;
> >> +	int rc;
> >> +	struct dts temps[4];
> >> +
> >> +	if (dimm_id >= MAX_DIMMS) {
> >> +		prerror("DTS: Chip %x Dimm %x does not exist\n", chip_id,
> >> +		       dimm_id);
> >> +		return OPAL_PARAMETER;
> >> +	}
> >> +
> >> +	rc = xscom_read(chip_id, CENTAUR_SCAC_ENABLE, &enable);
> >> +	if (rc)
> >> +		return rc;
> >> +
> >> +	if (!(enable & PPC_BIT(dimm_id))) {
> >> +		prerror("DTS: Chip %x Dimm %x is disabled\n", chip_id,
> >> +		       dimm_id);
> >> +		return OPAL_RESOURCE;
> >> +	}
> >> +
> >> +	rc = xscom_read(chip_id, scac_data_addrs[dimm_id], &data);
> >> +	if (rc)
> >> +		return rc;
> >> +
> >> +	dts_decode_one_dimm_dts(data >> 48, &temps[0]);
> >> +	dts_decode_one_dimm_dts(data >> 32, &temps[1]);
> >> +	dts_decode_one_dimm_dts(data >> 16, &temps[2]);
> >> +	dts_decode_one_dimm_dts(data >> 0,  &temps[3]);
> >> +
> >> +	memcpy(dts, &temps[dimm_id % 4], sizeof(*dts));
> >> +
> >> +	if (!dts->valid) {
> >> +		uint64_t lfir;
> >> +
> >> +		xscom_read(chip_id, CENTAUR_SCAC_LFIR, &lfir);
> >> +		prerror("DTS: Chip %x Dimm %x invalid status:%x fir:%016llx\n",
> >> +			chip_id, dimm_id, dts->status, lfir);
> >> +		return OPAL_HARDWARE;
> >> +	}
> >> +
> >> +	prlog(PR_TRACE, "DTS: Chip %x Dimm %x temp:%dC trip:%x status:%x\n",
> >> +	      chip_id, dimm_id, dts->temp, dts->trip, dts->status);
> >> +	return 0;
> >> +}
> >>  
> >>  /* Different sensor locations */
> >>  #define P8_MEM_DTS0	0
> >> @@ -261,6 +354,7 @@ static int dts_read_mem_temp(uint32_t ch
> >>  enum sensor_dts_class {
> >>  	SENSOR_DTS_CORE_TEMP,
> >>  	SENSOR_DTS_MEM_TEMP,
> >> +	SENSOR_DTS_DIMM_TEMP,
> >>  	/* To be continued */
> >>  };
> >>  
> >> @@ -277,6 +371,7 @@ enum {
> >>   * resource identifier field of the sensor handler
> >>   */
> >>  #define centaur_get_id(rid) (0x80000000 | ((rid) & 0x3ff))
> >> +#define dimm_get_id(rid) ((rid) >> 10)
> >>  
> >>  int64_t dts_sensor_read(uint32_t sensor_hndl, uint32_t *sensor_data)
> >>  {
> >> @@ -297,6 +392,10 @@ int64_t dts_sensor_read(uint32_t sensor_
> >>  	case SENSOR_DTS_MEM_TEMP:
> >>  		rc = dts_read_mem_temp(centaur_get_id(rid), &dts);
> >>  		break;
> >> +	case SENSOR_DTS_DIMM_TEMP:
> >> +		rc = dts_read_dimm_temp(centaur_get_id(rid), dimm_get_id(rid),
> >> +					&dts);
> >> +		break;
> >>  	default:
> >>  		rc = OPAL_PARAMETER;
> >>  		break;
> >> @@ -333,11 +432,16 @@ int64_t dts_sensor_read(uint32_t sensor_
> >>  	sensor_make_handler(SENSOR_DTS_MEM_TEMP|SENSOR_DTS,		\
> >>  			    centaur_make_id(chip_id, 0), attr_id)
> >>  
> >> +#define dimm_handler(cen_id, dimm_id, attr_id)				\
> >> +	sensor_make_handler(SENSOR_DTS_DIMM_TEMP|SENSOR_DTS,		\
> >> +			    centaur_make_id(chip_id, i), attr_id)
> >> +
> >>  bool dts_sensor_create_nodes(struct dt_node *sensors)
> >>  {
> >>  	struct proc_chip *chip;
> >>  	struct dt_node *cn;
> >>  	char name[64];
> >> +	int dimm_num = 1;
> >>  
> >>  	/* build the device tree nodes :
> >>  	 *
> >> @@ -375,6 +479,9 @@ bool dts_sensor_create_nodes(struct dt_n
> >>  		uint32_t chip_id;
> >>  		struct dt_node *node;
> >>  		uint32_t handler;
> >> +		uint64_t enable;
> >> +		int rc;
> >> +		int i;
> >>  
> >>  		chip_id = dt_prop_get_u32(cn, "ibm,chip-id");
> >>  
> >> @@ -391,6 +498,48 @@ bool dts_sensor_create_nodes(struct dt_n
> >>  		dt_add_property_string(node, "sensor-type", "temp");
> >>  		dt_add_property_cells(node, "ibm,chip-id", chip_id);
> >>  		dt_add_property_string(node, "label", "Centaur");
> >> +
> >> +		rc = xscom_read(chip_id, CENTAUR_SCAC_ENABLE, &enable);
> >> +		if (rc) {
> >> +			prerror("DTS: Chip %x failed to create nodes for DIMMs",
> >> +			       chip_id);
> >> +			continue;
> >> +		}
> >> +
> >> +		for (i = 0; i < MAX_DIMMS; i++, dimm_num++) {
> >> +			if (!(enable & PPC_BIT(i))) {
> >> +				prlog(PR_INFO, "DTS: Chip %x Dimm %x is disabled\n",
> >> +					chip_id, i);
> >> +				continue;
> >> +			}
> >> +
> >> +			snprintf(name, sizeof(name), "dimm-temp@%x-%x",
> >> +				 chip_id, i);
> >> +
> >> +			/*
> >> +			 * We only have two bytes for the resource
> >> +			 * identifier. Let's trunctate the centaur chip id
> >> +			 */
> >> +			handler = dimm_handler(chip_id, i,
> >> +					       SENSOR_DTS_ATTR_TEMP_MAX);
> >> +			node = dt_new(sensors, name);
> >> +			dt_add_property_string(node, "compatible",
> >> +					       "ibm,opal-sensor");
> >> +			dt_add_property_cells(node, "sensor-data", handler);
> >> +
> >> +			handler = dimm_handler(chip_id, i,
> >> +					       SENSOR_DTS_ATTR_TEMP_TRIP);
> >> +			dt_add_property_cells(node, "sensor-status", handler);
> >> +			dt_add_property_string(node, "sensor-type", "temp");
> >> +
> >> +			/*
> >> +			 *  Use a global increment for the DIMMs. It
> >> +			 *  gives a quick idea on how slots are
> >> +			 *  populated on the system.
> >> +			 */
> >> +			snprintf(name, sizeof(name), "Dimm %d", dimm_num);
> >> +			dt_add_property_string(node, "label", name);
> >> +		}
> >>  	}
> >>  
> >>  	return true;
> >>
> >> _______________________________________________
> >> Skiboot mailing list
> >> Skiboot at lists.ozlabs.org
> >> https://lists.ozlabs.org/listinfo/skiboot
> > 
> > 




More information about the Skiboot mailing list