[Skiboot] [PATCH v2 4/6] opal/hmi: HMI logging with location code info.

Wed Dec 13 16:24:35 AEDT 2017

From: Mahesh Salgaonkar <mahesh at linux.vnet.ibm.com>

Add few HMI debug prints with location code info few additional info.

No functionality change.

With this patch the log messages will look like:

[210612.175196744,7] HMI: Received HMI interrupt: HMER = 0x0840000000000000
[210612.175200449,7] HMI: [Loc: UOPWR.1302LFA-Node0-Proc1]: P:8 C:16 T:1: TFMR(2d12000870e04020) Timer Facility Error

[210660.259689526,7] HMI: Received HMI interrupt: HMER = 0x2040000000000000
[210660.259695649,7] HMI: [Loc: UOPWR.1302LFA-Node0-Proc0]: P:0 C:16 T:1: Processor recovery Done.

Signed-off-by: Mahesh Salgaonkar <mahesh at linux.vnet.ibm.com>
---
 core/hmi.c |   40 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 36 insertions(+), 4 deletions(-)

diff --git a/core/hmi.c b/core/hmi.c
index 826e27c..a995455 100644
--- a/core/hmi.c
+++ b/core/hmi.c
@@ -920,6 +920,30 @@ static void hmi_exit(void)
 	*(this_cpu()->core_hmi_state_ptr) &= ~(this_cpu()->thread_mask);
 }
 
+static void hmi_print_debug(const uint8_t *msg)
+{
+	uint64_t hmer = mfspr(SPR_HMER);
+	const char *loc;
+	uint32_t core_id, thread_index;
+
+	core_id = pir_to_core_id(this_cpu()->pir);
+	thread_index = cpu_get_thread_index(this_cpu());
+
+	loc = chip_loc_code(this_cpu()->chip_id);
+	if (!loc)
+		loc = "Not Available";
+
+	if (hmer & (SPR_HMER_TFAC_ERROR | SPR_HMER_TFMR_PARITY_ERROR)) {
+		prlog(PR_DEBUG, "[Loc: %s]: P:%d C:%d T:%d: TFMR(%016lx) %s\n",
+			loc, this_cpu()->chip_id, core_id, thread_index,
+			mfspr(SPR_TFMR), msg);
+	} else {
+		prlog(PR_DEBUG, "[Loc: %s]: P:%d C:%d T:%d: %s\n",
+			loc, this_cpu()->chip_id, core_id, thread_index,
+			msg);
+	}
+}
+
 int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
 {
 	int recover = 1;
@@ -948,7 +972,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
 			hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_DONE;
 			queue_hmi_event(hmi_evt, recover);
 		}
-		prlog(PR_DEBUG, "Processor recovery Done.\n");
+		hmi_print_debug("Processor recovery Done.");
 	}
 	if (hmer & SPR_HMER_PROC_RECV_ERROR_MASKED) {
 		hmer &= ~SPR_HMER_PROC_RECV_ERROR_MASKED;
@@ -957,7 +981,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
 			hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_MASKED;
 			queue_hmi_event(hmi_evt, recover);
 		}
-		prlog(PR_DEBUG, "Processor recovery Done (masked).\n");
+		hmi_print_debug("Processor recovery Done (masked).");
 	}
 	if (hmer & SPR_HMER_PROC_RECV_AGAIN) {
 		hmer &= ~SPR_HMER_PROC_RECV_AGAIN;
@@ -966,13 +990,14 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
 			hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_DONE_AGAIN;
 			queue_hmi_event(hmi_evt, recover);
 		}
-		prlog(PR_DEBUG, "Processor recovery occurred again before"
-			"bit2 was cleared\n");
+		hmi_print_debug("Processor recovery occurred again before"
+				"bit2 was cleared\n");
 	}
 	/* Assert if we see malfunction alert, we can not continue. */
 	if (hmer & SPR_HMER_MALFUNCTION_ALERT) {
 		hmer &= ~SPR_HMER_MALFUNCTION_ALERT;
 
+		hmi_print_debug("Malfunction Alert");
 		if (hmi_evt)
 			decode_malfunction(hmi_evt);
 	}
@@ -980,6 +1005,8 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
 	/* Assert if we see Hypervisor resource error, we can not continue. */
 	if (hmer & SPR_HMER_HYP_RESOURCE_ERR) {
 		hmer &= ~SPR_HMER_HYP_RESOURCE_ERR;
+
+		hmi_print_debug("Hypervisor resource error");
 		recover = 0;
 		if (hmi_evt) {
 			hmi_evt->severity = OpalHMI_SEV_FATAL;
@@ -994,6 +1021,9 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
 	 */
 	if (hmer & SPR_HMER_TFAC_ERROR) {
 		tfmr = mfspr(SPR_TFMR);		/* save original TFMR */
+
+		hmi_print_debug("Timer Facility Error");
+
 		hmer &= ~SPR_HMER_TFAC_ERROR;
 		recover = chiptod_recover_tb_errors();
 		if (hmi_evt) {
@@ -1006,6 +1036,8 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
 	if (hmer & SPR_HMER_TFMR_PARITY_ERROR) {
 		tfmr = mfspr(SPR_TFMR);		/* save original TFMR */
 		hmer &= ~SPR_HMER_TFMR_PARITY_ERROR;
+
+		hmi_print_debug("TFMR parity Error");
 		recover = chiptod_recover_tb_errors();
 		if (hmi_evt) {
 			hmi_evt->severity = OpalHMI_SEV_FATAL;