[Skiboot] [PATCH v3] opal: Do not overwrite same HMI event for multiple HMI errors.
Mahesh J Salgaonkar
mahesh at linux.vnet.ibm.com
Thu Feb 12 16:15:18 AEDT 2015
From: Mahesh Salgaonkar <mahesh at linux.vnet.ibm.com>
The current implementation overwrites the same HMI event if there are
multiple HMI errors reported through a single HMI interrupt. This patch
fixes that issue by sending separate HMI event per error.
Signed-off-by: Mahesh Salgaonkar <mahesh at linux.vnet.ibm.com>
---
Change in V3:
- Fixed an issue where recover variable was not set properly before calling
queue_hmi_event(). Addressed review comments from Stewart.
Change in V2:
- Removed the forward declaration for queue_hmi_event() and moved the function
on top instead.
core/hmi.c | 76 ++++++++++++++++++++++++++++++++++++------------------------
1 file changed, 45 insertions(+), 31 deletions(-)
diff --git a/core/hmi.c b/core/hmi.c
index 1978f54..96f1cbe 100644
--- a/core/hmi.c
+++ b/core/hmi.c
@@ -147,6 +147,32 @@
static struct lock hmi_lock = LOCK_UNLOCKED;
+static int queue_hmi_event(struct OpalHMIEvent *hmi_evt, int recover)
+{
+ uint64_t *hmi_data;
+
+ /* Don't queue up event if recover == -1 */
+ if (recover == -1)
+ return 0;
+
+ /* set disposition */
+ if (recover == 1)
+ hmi_evt->disposition = OpalHMI_DISPOSITION_RECOVERED;
+ else if (recover == 0)
+ hmi_evt->disposition = OpalHMI_DISPOSITION_NOT_RECOVERED;
+
+ /*
+ * struct OpalHMIEvent is of (3 * 64 bits) size and well packed
+ * structure. Hence use uint64_t pointer to pass entire structure
+ * using 4 params in generic message format.
+ */
+ hmi_data = (uint64_t *)hmi_evt;
+
+ /* queue up for delivery to host. */
+ return opal_queue_msg(OPAL_MSG_HMI_EVT, NULL, NULL,
+ hmi_data[0], hmi_data[1], hmi_data[2]);
+}
+
static int is_capp_recoverable(int chip_id)
{
uint64_t reg;
@@ -215,6 +241,7 @@ static int decode_malfunction(struct OpalHMIEvent *hmi_evt)
int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
{
int recover = 1;
+ uint64_t tfmr;
printf("HMI: Received HMI interrupt: HMER = 0x%016llx\n", hmer);
if (hmi_evt)
@@ -224,6 +251,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
if (hmi_evt) {
hmi_evt->severity = OpalHMI_SEV_NO_ERROR;
hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_DONE;
+ queue_hmi_event(hmi_evt, recover);
}
printf("HMI: Processor recovery Done.\n");
}
@@ -232,6 +260,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
if (hmi_evt) {
hmi_evt->severity = OpalHMI_SEV_NO_ERROR;
hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_MASKED;
+ queue_hmi_event(hmi_evt, recover);
}
printf("HMI: Processor recovery Done (masked).\n");
}
@@ -240,6 +269,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
if (hmi_evt) {
hmi_evt->severity = OpalHMI_SEV_NO_ERROR;
hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_DONE_AGAIN;
+ queue_hmi_event(hmi_evt, recover);
}
printf("HMI: Processor recovery occurred again before"
"bit2 was cleared\n");
@@ -247,19 +277,23 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
/* Assert if we see malfunction alert, we can not continue. */
if (hmer & SPR_HMER_MALFUNCTION_ALERT) {
hmer &= ~SPR_HMER_MALFUNCTION_ALERT;
+ recover = 0;
- if (hmi_evt)
+ if (hmi_evt) {
recover = decode_malfunction(hmi_evt);
+ queue_hmi_event(hmi_evt, recover);
+ }
}
/* Assert if we see Hypervisor resource error, we can not continue. */
if (hmer & SPR_HMER_HYP_RESOURCE_ERR) {
hmer &= ~SPR_HMER_HYP_RESOURCE_ERR;
+ recover = 0;
if (hmi_evt) {
hmi_evt->severity = OpalHMI_SEV_FATAL;
hmi_evt->type = OpalHMI_ERROR_HYP_RESOURCE;
+ queue_hmi_event(hmi_evt, recover);
}
- recover = 0;
}
/*
@@ -267,22 +301,26 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
* TFMR and take corrective action wherever required.
*/
if (hmer & SPR_HMER_TFAC_ERROR) {
+ tfmr = mfspr(SPR_TFMR); /* save original TFMR */
hmer &= ~SPR_HMER_TFAC_ERROR;
+ recover = chiptod_recover_tb_errors();
if (hmi_evt) {
hmi_evt->severity = OpalHMI_SEV_ERROR_SYNC;
hmi_evt->type = OpalHMI_ERROR_TFAC;
- hmi_evt->tfmr = mfspr(SPR_TFMR);
+ hmi_evt->tfmr = tfmr;
+ queue_hmi_event(hmi_evt, recover);
}
- recover = chiptod_recover_tb_errors();
}
if (hmer & SPR_HMER_TFMR_PARITY_ERROR) {
+ tfmr = mfspr(SPR_TFMR); /* save original TFMR */
hmer &= ~SPR_HMER_TFMR_PARITY_ERROR;
+ recover = 0;
if (hmi_evt) {
hmi_evt->severity = OpalHMI_SEV_FATAL;
hmi_evt->type = OpalHMI_ERROR_TFMR_PARITY;
- hmi_evt->tfmr = mfspr(SPR_TFMR);
+ hmi_evt->tfmr = tfmr;
+ queue_hmi_event(hmi_evt, recover);
}
- recover = 0;
}
/*
@@ -294,44 +332,20 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
return recover;
}
-static int queue_hmi_event(struct OpalHMIEvent *hmi_evt)
-{
- uint64_t *hmi_data;
-
- /*
- * struct OpalHMIEvent is of (3 * 64 bits) size and well packed
- * structure. Hence use uint64_t pointer to pass entire structure
- * using 4 params in generic message format.
- */
- hmi_data = (uint64_t *)hmi_evt;
-
- /* queue up for delivery to host. */
- return opal_queue_msg(OPAL_MSG_HMI_EVT, NULL, NULL,
- hmi_data[0], hmi_data[1], hmi_data[2]);
-}
-
static int64_t opal_handle_hmi(void)
{
uint64_t hmer;
int rc = OPAL_SUCCESS;
struct OpalHMIEvent hmi_evt;
- int recover;
memset(&hmi_evt, 0, sizeof(struct OpalHMIEvent));
hmi_evt.version = OpalHMIEvt_V1;
lock(&hmi_lock);
hmer = mfspr(SPR_HMER); /* Get HMER register value */
- recover = handle_hmi_exception(hmer, &hmi_evt);
+ handle_hmi_exception(hmer, &hmi_evt);
unlock(&hmi_lock);
- if (recover == 1)
- hmi_evt.disposition = OpalHMI_DISPOSITION_RECOVERED;
- else if (recover == 0)
- hmi_evt.disposition = OpalHMI_DISPOSITION_NOT_RECOVERED;
-
- if (recover != -1)
- queue_hmi_event(&hmi_evt);
return rc;
}
opal_call(OPAL_HANDLE_HMI, opal_handle_hmi, 0);
More information about the Skiboot
mailing list