[Skiboot] [PATCH] lpc: Log LPC SYNC errors as unrecoverable ones for manufacturing
Vipin K Parashar
vipin at linux.vnet.ibm.com
Fri Aug 5 08:14:30 AEST 2016
High volume of SYNC errors onto LPC bus cause degraded system
performance and are likely due to bad hardware present onto system.
Thus once LPC SYNC errors cross a certain threshold, OPAL should log
them onto BMC as unrecoverable errors in manufacturing mode. This
will help manufacturing screen bad parts, causing such errors.
Cc: stable
Signed-off-by: Vipin K Parashar <vipin at linux.vnet.ibm.com>
---
core/platform.c | 8 +++++++-
hw/lpc.c | 32 +++++++++++++++++++++++++++-----
include/errorlog.h | 1 +
include/platform.h | 2 ++
4 files changed, 37 insertions(+), 6 deletions(-)
diff --git a/core/platform.c b/core/platform.c
index de6e406..9730f8d 100644
--- a/core/platform.c
+++ b/core/platform.c
@@ -24,6 +24,7 @@
#include <xscom.h>
#include <errorlog.h>
+bool mfg_mode;
struct platform platform;
DEFINE_LOG_ENTRY(OPAL_RC_ABNORMAL_REBOOT, OPAL_PLATFORM_ERR_EVT, OPAL_CEC,
@@ -124,8 +125,13 @@ void probe_platform(void)
struct platform *platforms = &__platforms_start;
unsigned int i;
- platform = generic_platform;
+ /* Detect Manufacturing mode */
+ if (dt_find_property(dt_root, "ibm,manufacturing-mode")) {
+ printf("PLAT: Manufacturing mode ON\n");
+ mfg_mode = true;
+ }
+ platform = generic_platform;
for (i = 0; &platforms[i] < &__platforms_end; i++) {
if (platforms[i].probe && platforms[i].probe()) {
platform = platforms[i];
diff --git a/hw/lpc.c b/hw/lpc.c
index 32cb7b1..4b76b4d 100644
--- a/hw/lpc.c
+++ b/hw/lpc.c
@@ -25,6 +25,7 @@
#include <timebase.h>
#include <errorlog.h>
#include <opal-api.h>
+#include <platform.h>
//#define DBG_IRQ(fmt...) prerror(fmt)
#define DBG_IRQ(fmt...) do { } while(0)
@@ -41,6 +42,10 @@ DEFINE_LOG_ENTRY(OPAL_RC_LPC_SYNC, OPAL_PLATFORM_ERR_EVT, OPAL_LPC,
OPAL_MISC_SUBSYSTEM, OPAL_PREDICTIVE_ERR_GENERAL,
OPAL_NA);
+DEFINE_LOG_ENTRY(OPAL_RC_LPC_SYNC_PERF, OPAL_PLATFORM_ERR_EVT, OPAL_LPC,
+ OPAL_MISC_SUBSYSTEM, OPAL_UNRECOVERABLE_ERR_DEGRADE_PERF,
+ OPAL_NA);
+
#define ECCB_CTL 0 /* b0020 -> b00200 */
#define ECCB_STAT 2 /* b0022 -> b00210 */
#define ECCB_DATA 3 /* b0023 -> b00218 */
@@ -110,6 +115,9 @@ DEFINE_LOG_ENTRY(OPAL_RC_LPC_SYNC, OPAL_PLATFORM_ERR_EVT, OPAL_LPC,
LPC_HC_IRQ_BM_TAR_ERR)
#define LPC_HC_ERROR_ADDRESS 0x40
+
+#define LPC_BUS_DEGRADED_PERF_THRESHOLD 5
+
struct lpc_client_entry {
struct list_node node;
const struct lpc_client *clt;
@@ -662,8 +670,10 @@ static void lpc_dispatch_reset(struct proc_chip *chip)
static void lpc_dispatch_err_irqs(struct proc_chip *chip, uint32_t irqs)
{
int rc;
+ struct opal_err_info *info;
const char *sync_err = "Unknown LPC error";
uint32_t err_addr;
+ static int lpc_bus_err_count;
/* Write back to clear error interrupts, we clear SerIRQ later
* as they are handled as level interrupts
@@ -690,13 +700,19 @@ static void lpc_dispatch_err_irqs(struct proc_chip *chip, uint32_t irqs)
rc = opb_read(chip, lpc_reg_opb_base + LPC_HC_ERROR_ADDRESS,
&err_addr, 4);
+
+ lpc_bus_err_count++;
+ if (mfg_mode && (lpc_bus_err_count > LPC_BUS_DEGRADED_PERF_THRESHOLD))
+ info = &e_info(OPAL_RC_LPC_SYNC_PERF);
+ else
+ info = &e_info(OPAL_RC_LPC_SYNC);
+
if (rc)
- log_simple_error(&e_info(OPAL_RC_LPC_SYNC), "%s "
- "Error address: Unknown\n", sync_err);
+ log_simple_error(info, "%s Error address: Unknown\n",
+ sync_err);
else
- log_simple_error(&e_info(OPAL_RC_LPC_SYNC), "%s "
- "Error address: 0x%08x\n",
- sync_err, err_addr);
+ log_simple_error(info, "%s Error address: 0x%08x\n",
+ sync_err, err_addr);
}
static void lpc_dispatch_ser_irqs(struct proc_chip *chip, uint32_t irqs,
@@ -869,6 +885,12 @@ void lpc_init(void)
prlog(PR_NOTICE, "Default bus on chip %d\n",
lpc_default_chip_id);
+ /* Detect Manufacturing mode */
+ if (dt_find_property(dt_root, "ibm,manufacturing-mode")) {
+ prlog(PR_INFO, "Manufacturing mode ON\n");
+ mfg_mode = true;
+ }
+
if (has_lpc) {
opal_register(OPAL_LPC_WRITE, opal_lpc_write, 5);
opal_register(OPAL_LPC_READ, opal_lpc_read, 5);
diff --git a/include/errorlog.h b/include/errorlog.h
index f89eac9..247198b 100644
--- a/include/errorlog.h
+++ b/include/errorlog.h
@@ -266,6 +266,7 @@ enum opal_reasoncode {
OPAL_RC_LPC_READ = OPAL_SRC_COMPONENT_LPC | 0x10,
OPAL_RC_LPC_WRITE = OPAL_SRC_COMPONENT_LPC | 0x11,
OPAL_RC_LPC_SYNC = OPAL_SRC_COMPONENT_LPC | 0x12,
+ OPAL_RC_LPC_SYNC_PERF = OPAL_SRC_COMPONENT_LPC | 0x13,
/* OP_PANEL */
OPAL_RC_PANEL_WRITE = OPAL_SRC_COMPONENT_OP_PANEL | 0x10,
/* PSI */
diff --git a/include/platform.h b/include/platform.h
index 062a941..a2c2fee 100644
--- a/include/platform.h
+++ b/include/platform.h
@@ -175,6 +175,8 @@ extern struct platform __platforms_end;
extern struct platform platform;
+extern bool mfg_mode;
+
#define DECLARE_PLATFORM(name)\
static const struct platform __used __section(".platforms") name ##_platform
--
2.7.4
More information about the Skiboot
mailing list