[PATCH linux dev-4.10 v2] drivers/hwmon/occ: Add error handling
Eddie James
eajames at linux.vnet.ibm.com
Wed Jun 21 07:27:49 AEST 2017
From: "Edward A. James" <eajames at us.ibm.com>
Create occ_error sysfs attribute to describe occ error state. Add
logic to poll to check for error conditions. Fix set user powercap
endianness.
Resolves: https://github.com/openbmc/openbmc/issues/1802
Changes since v1:
- Forgot to decrement OCC counter in remove functions.
Signed-off-by: Edward A. James <eajames at us.ibm.com>
---
drivers/hwmon/occ/common.c | 85 ++++++++++++++++++++++++++++++++++++++--------
drivers/hwmon/occ/common.h | 11 ++++++
drivers/hwmon/occ/p8_i2c.c | 16 +++++++--
drivers/hwmon/occ/p9_sbe.c | 18 +++++++++-
4 files changed, 112 insertions(+), 18 deletions(-)
diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c
index d3eb6f8..4e3e411 100644
--- a/drivers/hwmon/occ/common.c
+++ b/drivers/hwmon/occ/common.c
@@ -10,7 +10,7 @@
#include <asm/unaligned.h>
#include "common.h"
-#define OCC_NUM_STATUS_ATTRS 7
+#define OCC_NUM_STATUS_ATTRS 8
#define OCC_STAT_MASTER 0x80
#define OCC_STAT_ACTIVE 0x01
@@ -19,6 +19,8 @@
#define OCC_EXT_STAT_MEM_THROTTLE 0x20
#define OCC_EXT_STAT_QUICK_DROP 0x10
+atomic_t occ_num_occs = ATOMIC_INIT(0);
+
struct temp_sensor_1 {
u16 sensor_id;
u16 value;
@@ -163,6 +165,8 @@ void occ_parse_poll_response(struct occ *occ)
int occ_poll(struct occ *occ)
{
+ int rc;
+ struct occ_poll_response_header *header;
u16 checksum = occ->poll_cmd_data + 1;
u8 cmd[8];
@@ -175,7 +179,32 @@ int occ_poll(struct occ *occ)
cmd[6] = checksum & 0xFF;
cmd[7] = 0;
- return occ->send_cmd(occ, cmd);
+ rc = occ->send_cmd(occ, cmd);
+ if (rc)
+ return rc;
+
+ header = (struct occ_poll_response_header *)occ->resp.data;
+
+ if (header->occ_state == OCC_STATE_SAFE) {
+ if (occ->last_safe) {
+ if (time_after(jiffies,
+ occ->last_safe + OCC_SAFE_TIMEOUT))
+ occ->error = -EHOSTDOWN;
+ } else
+ occ->last_safe = jiffies;
+ } else
+ occ->last_safe = 0;
+
+ if (header->status & OCC_STAT_MASTER) {
+ if (hweight8(header->occs_present) !=
+ atomic_read(&occ_num_occs)) {
+ occ->error = -EXDEV;
+ occ->bad_present_count++;
+ } else
+ occ->bad_present_count = 0;
+ }
+
+ return rc;
}
int occ_set_user_power_cap(struct occ *occ, u16 user_power_cap)
@@ -184,6 +213,14 @@ int occ_set_user_power_cap(struct occ *occ, u16 user_power_cap)
u8 cmd[8];
u16 checksum = 0x24;
__be16 user_power_cap_be;
+ struct occ_poll_response_header *header =
+ (struct occ_poll_response_header *)occ->resp.data;
+
+ if (!(header->status & OCC_STAT_MASTER))
+ return -EPERM;
+
+ if (!(header->status & OCC_STAT_ACTIVE))
+ return -EACCES;
user_power_cap_be = cpu_to_be16(user_power_cap);
@@ -192,7 +229,7 @@ int occ_set_user_power_cap(struct occ *occ, u16 user_power_cap)
cmd[2] = 0;
cmd[3] = 2;
- memcpy(&cmd[4], &user_power_cap, 2);
+ memcpy(&cmd[4], &user_power_cap_be, 2);
checksum += cmd[4] + cmd[5];
cmd[6] = checksum >> 8;
@@ -220,6 +257,19 @@ int occ_update_response(struct occ *occ)
return rc;
}
+static ssize_t occ_show_error(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int error = 0;
+ struct occ *occ = dev_get_drvdata(dev);
+
+ if (occ->error_count > OCC_ERROR_COUNT_THRESHOLD || occ->last_safe ||
+ occ->bad_present_count > OCC_ERROR_COUNT_THRESHOLD)
+ error = occ->error;
+
+ return snprintf(buf, PAGE_SIZE - 1, "%d\n", error);
+}
+
static ssize_t occ_show_status(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -237,22 +287,22 @@ static ssize_t occ_show_status(struct device *dev,
switch (sattr->index) {
case 0:
- val = header->status & OCC_STAT_MASTER;
+ val = (header->status & OCC_STAT_MASTER) ? 1 : 0;
break;
case 1:
- val = header->status & OCC_STAT_ACTIVE;
+ val = (header->status & OCC_STAT_ACTIVE) ? 1 : 0;
break;
case 2:
- val = header->ext_status & OCC_EXT_STAT_DVFS_OT;
+ val = (header->ext_status & OCC_EXT_STAT_DVFS_OT) ? 1 : 0;
break;
case 3:
- val = header->ext_status & OCC_EXT_STAT_DVFS_POWER;
+ val = (header->ext_status & OCC_EXT_STAT_DVFS_POWER) ? 1 : 0;
break;
case 4:
- val = header->ext_status & OCC_EXT_STAT_MEM_THROTTLE;
+ val = (header->ext_status & OCC_EXT_STAT_MEM_THROTTLE) ? 1 : 0;
break;
case 5:
- val = header->ext_status & OCC_EXT_STAT_QUICK_DROP;
+ val = (header->ext_status & OCC_EXT_STAT_QUICK_DROP) ? 1 : 0;
break;
case 6:
val = header->occ_state;
@@ -1073,30 +1123,35 @@ int occ_create_status_attrs(struct occ *occ)
occ->status_attrs[1] =
(struct sensor_device_attribute)SENSOR_ATTR(occ_active, 0444,
occ_show_status,
- NULL, 0);
+ NULL, 1);
occ->status_attrs[2] =
(struct sensor_device_attribute)SENSOR_ATTR(occ_dvfs_ot, 0444,
occ_show_status,
- NULL, 1);
+ NULL, 2);
occ->status_attrs[3] =
(struct sensor_device_attribute)SENSOR_ATTR(occ_dvfs_power,
0444,
occ_show_status,
- NULL, 2);
+ NULL, 3);
occ->status_attrs[4] =
(struct sensor_device_attribute)SENSOR_ATTR(occ_mem_throttle,
0444,
occ_show_status,
- NULL, 3);
+ NULL, 4);
occ->status_attrs[5] =
(struct sensor_device_attribute)SENSOR_ATTR(occ_quick_drop,
0444,
occ_show_status,
- NULL, 4);
+ NULL, 5);
occ->status_attrs[6] =
(struct sensor_device_attribute)SENSOR_ATTR(occ_status, 0444,
occ_show_status,
- NULL, 5);
+ NULL, 6);
+
+ occ->status_attrs[7] =
+ (struct sensor_device_attribute)SENSOR_ATTR(occ_error, 0444,
+ occ_show_error,
+ NULL, 0);
for (i = 0; i < OCC_NUM_STATUS_ATTRS; ++i) {
rc = device_create_file(dev, &occ->status_attrs[i].dev_attr);
diff --git a/drivers/hwmon/occ/common.h b/drivers/hwmon/occ/common.h
index a6582a7..a5f86c3 100644
--- a/drivers/hwmon/occ/common.h
+++ b/drivers/hwmon/occ/common.h
@@ -13,6 +13,8 @@
#include <linux/hwmon-sysfs.h>
#include <linux/sysfs.h>
+#define OCC_ERROR_COUNT_THRESHOLD 2
+
#define OCC_UPDATE_FREQUENCY msecs_to_jiffies(1000)
#define OCC_RESP_DATA_BYTES 4089
@@ -28,6 +30,9 @@
#define RESP_RETURN_OCC_ERR 0x15
#define RESP_RETURN_STATE 0x16
+#define OCC_STATE_SAFE 0x4
+#define OCC_SAFE_TIMEOUT msecs_to_jiffies(60000)
+
struct occ_response {
u8 seq_no;
u8 cmd_type;
@@ -96,6 +101,10 @@ struct occ {
struct device *bus_dev;
struct device *hwmon;
+ int error;
+ unsigned int error_count;
+ unsigned int bad_present_count;
+ unsigned long last_safe;
unsigned long last_update;
struct mutex lock;
@@ -131,6 +140,8 @@ struct occ {
((struct sensor_device_attribute_2) \
SENSOR_ATTR_OCC(_name, _mode, _show, _store, _nr, _index))
+extern atomic_t occ_num_occs;
+
void occ_parse_poll_response(struct occ *occ);
int occ_poll(struct occ *occ);
int occ_set_user_power_cap(struct occ *occ, u16 user_power_cap);
diff --git a/drivers/hwmon/occ/p8_i2c.c b/drivers/hwmon/occ/p8_i2c.c
index a4d9965..29233b4 100644
--- a/drivers/hwmon/occ/p8_i2c.c
+++ b/drivers/hwmon/occ/p8_i2c.c
@@ -122,12 +122,12 @@ static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd)
if (rc)
goto err;
+retry:
/* set sram address for response */
rc = p8_i2c_occ_putscom_u32(client, 0x6B070, 0xFFFF7000, 0);
if (rc)
goto err;
-retry:
rc = p8_i2c_occ_getscom(client, 0x6B075, (u8 *)resp);
if (rc)
goto err;
@@ -161,7 +161,10 @@ static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd)
rc = -EFAULT;
}
+ occ->error = resp->return_status;
+
if (rc < 0) {
+ occ->error_count++;
dev_warn(&client->dev, "occ bad response:%d\n",
resp->return_status);
return rc;
@@ -169,9 +172,11 @@ static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd)
data_length = get_unaligned_be16(&resp->data_length_be);
if (data_length > OCC_RESP_DATA_BYTES) {
+ occ->error_count++;
+ occ->error = -EDOM;
dev_warn(&client->dev, "occ bad data length:%d\n",
data_length);
- return -EDOM;
+ return occ->error;
}
for (i = 8; i < data_length + 7; i += 8) {
@@ -180,9 +185,12 @@ static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd)
goto err;
}
+ occ->error_count = 0;
return data_length + 7;
err:
+ occ->error_count++;
+ occ->error = rc;
dev_err(&client->dev, "i2c scom op failed rc:%d\n", rc);
return rc;
}
@@ -242,6 +250,8 @@ static int p8_i2c_occ_probe(struct i2c_client *client,
return rc;
}
+ atomic_inc(&occ_num_occs);
+
return 0;
}
@@ -251,6 +261,8 @@ static int p8_i2c_occ_remove(struct i2c_client *client)
occ_remove_status_attrs(occ);
+ atomic_dec(&occ_num_occs);
+
return 0;
}
diff --git a/drivers/hwmon/occ/p9_sbe.c b/drivers/hwmon/occ/p9_sbe.c
index 6226f6f..c85f133 100644
--- a/drivers/hwmon/occ/p9_sbe.c
+++ b/drivers/hwmon/occ/p9_sbe.c
@@ -36,6 +36,7 @@ static int p9_sbe_occ_send_cmd(struct occ *occ, u8 *cmd)
retry:
client = occ_drv_open(p9_sbe_occ->sbe, 0);
if (!client)
+ /* don't increment occ error counter */
return -ENODEV;
rc = occ_drv_write(client, (const char *)&cmd[1], 7);
@@ -76,15 +77,21 @@ static int p9_sbe_occ_send_cmd(struct occ *occ, u8 *cmd)
rc = -EFAULT;
}
+ occ->error = resp->return_status;
+
if (rc < 0) {
+ occ->error_count++;
dev_warn(occ->bus_dev, "occ bad response:%d\n",
resp->return_status);
return rc;
}
+ occ->error_count = 0;
return 0;
err:
+ occ->error_count++;
+ occ->error = rc;
occ_drv_release(client);
dev_err(occ->bus_dev, "occ bus op failed rc:%d\n", rc);
return rc;
@@ -130,6 +137,7 @@ static int p9_sbe_occ_setup(struct p9_sbe_occ *p9_sbe_occ)
static int p9_sbe_occ_probe(struct platform_device *pdev)
{
+ int rc;
struct occ *occ;
struct p9_sbe_occ *p9_sbe_occ = devm_kzalloc(&pdev->dev,
sizeof(*p9_sbe_occ),
@@ -148,7 +156,13 @@ static int p9_sbe_occ_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, occ);
- return p9_sbe_occ_setup(p9_sbe_occ);
+ rc = p9_sbe_occ_setup(p9_sbe_occ);
+ if (rc)
+ return rc;
+
+ atomic_inc(&occ_num_occs);
+
+ return rc;
}
static int p9_sbe_occ_remove(struct platform_device *pdev)
@@ -157,6 +171,8 @@ static int p9_sbe_occ_remove(struct platform_device *pdev)
occ_remove_status_attrs(occ);
+ atomic_dec(&occ_num_occs);
+
return 0;
}
--
1.8.3.1
More information about the openbmc
mailing list