[Skiboot] [PATCH 3/5] centaur: Mark centaur offline after 10 consecutive access errors
Benjamin Herrenschmidt
benh at kernel.crashing.org
Tue Sep 13 14:45:26 AEST 2016
This avoids spamming the logs when the centaur is dead and PRD
constantly tries to access it
Signed-off-by: Benjamin Herrenschmidt <benh at kernel.crashing.org>
---
hw/centaur.c | 28 ++++++++++++++++++++++++++++
include/centaur.h | 2 ++
include/opal-api.h | 1 +
3 files changed, 31 insertions(+)
diff --git a/hw/centaur.c b/hw/centaur.c
index 1f2b9c4..8b7311f 100644
--- a/hw/centaur.c
+++ b/hw/centaur.c
@@ -39,6 +39,9 @@
/* Is that correct ? */
#define MAX_CENTAURS_PER_CHIP 8
+/* Mark the centaur offline after this many consecutive errors */
+#define CENTAUR_ERR_OFFLINE_THRESHOLD 10
+
/*
* FSI2PIB register definitions (this could be moved out if we were to
* support FSI master to other chips.
@@ -319,12 +322,24 @@ int64_t centaur_xscom_read(uint32_t id, uint64_t pcb_addr, uint64_t *val)
if (!centaur)
return OPAL_PARAMETER;
+ if (!centaur->online)
+ return OPAL_XSCOM_CTR_OFFLINED;
lock(¢aur->lock);
if (pcb_addr & XSCOM_ADDR_IND_FLAG)
rc = centaur_xscom_ind_read(centaur, pcb_addr, val);
else
rc = centaur_fsiscom_read(centaur, pcb_addr, val);
+
+ /* We mark the centaur offline if we get too many errors on
+ * consecutive accesses
+ */
+ if (rc) {
+ centaur->error_count++;
+ if (centaur->error_count > CENTAUR_ERR_OFFLINE_THRESHOLD)
+ centaur->online = false;
+ } else
+ centaur->error_count = 0;
unlock(¢aur->lock);
return rc;
@@ -337,12 +352,24 @@ int64_t centaur_xscom_write(uint32_t id, uint64_t pcb_addr, uint64_t val)
if (!centaur)
return OPAL_PARAMETER;
+ if (!centaur->online)
+ return OPAL_XSCOM_CTR_OFFLINED;
lock(¢aur->lock);
if (pcb_addr & XSCOM_ADDR_IND_FLAG)
rc = centaur_xscom_ind_write(centaur, pcb_addr, val);
else
rc = centaur_fsiscom_write(centaur, pcb_addr, val);
+
+ /* We mark the centaur offline if we get too many errors on
+ * consecutive accesses
+ */
+ if (rc) {
+ centaur->error_count++;
+ if (centaur->error_count > CENTAUR_ERR_OFFLINE_THRESHOLD)
+ centaur->online = false;
+ } else
+ centaur->error_count = 0;
unlock(¢aur->lock);
return rc;
@@ -425,6 +452,7 @@ static bool centaur_add(uint32_t part_id, uint32_t mchip, uint32_t meng,
centaur->fsi_master_chip_id = mchip;
centaur->fsi_master_port = mport;
centaur->fsi_master_engine = meng ? MFSI_cMFSI1 : MFSI_cMFSI0;
+ centaur->online = true;
init_lock(¢aur->lock);
list_head_init(¢aur->i2cms);
diff --git a/include/centaur.h b/include/centaur.h
index 6453e13..7a50bee 100644
--- a/include/centaur.h
+++ b/include/centaur.h
@@ -24,6 +24,7 @@
struct centaur_chip {
bool valid;
+ bool online;
uint8_t ec_level;
uint32_t part_id;
uint32_t fsi_master_chip_id;
@@ -31,6 +32,7 @@ struct centaur_chip {
uint32_t fsi_master_engine;
uint32_t scache_disable_count;
bool scache_was_enabled;
+ uint32_t error_count;
struct lock lock;
/* Used by hw/p8-i2c.c */
diff --git a/include/opal-api.h b/include/opal-api.h
index 7422443..4cc1544 100644
--- a/include/opal-api.h
+++ b/include/opal-api.h
@@ -52,6 +52,7 @@
#define OPAL_XSCOM_CLOCK_ERROR -27
#define OPAL_XSCOM_PARITY_ERROR -28
#define OPAL_XSCOM_TIMEOUT -29
+#define OPAL_XSCOM_CTR_OFFLINED -30
/* API Tokens (in r0) */
#define OPAL_INVALID_CALL -1
--
2.7.4
More information about the Skiboot
mailing list