[Skiboot] [PATCH v2 1/6] FSP/ELOG: Disable event notification during kexec

Vasant Hegde hegdevasant at linux.vnet.ibm.com
Sun Jul 3 01:33:38 AEST 2016


ELOG enables event notification once new log is available. And this
will be disabled after host completes reading logs (it has to complete
both fsp_opal_elog_info and fsp_opal_elog_read).

In some corner cases like kexec, host may endup reading same ELOG id twice
(calling fsp_opal_elog_info twice because of resend request). Host finds it
as duplicate and it will not read actual log (fsp_opal_elog_read()). In such
situations we fails to disable event notification  :-(

Scenario :
OPAL				Host
-------------------------------------
OPAL_EVENT_ELOG_AVAIL	-->	kexec
OPAL_EVENT_ELOG_AVAIL	-->	elog client registered
			<--	read ELOG (id=x)
			<--	resend elog (opal_resend_pending_logs())
resend all ELOG		-->	read ELOG (id=x) -- Duplicate ELOG !
				bhoom!!

kernel call trace:
------------------
[   28.055923] CPU: 10 PID: 20 Comm: irq/29-opal-elo Not tainted 4.4.0-24-generic #43-Ubuntu
[   28.056012] task: c0000000ef982a20 ti: c0000000efa38000 task.ti: c0000000efa38000
[   28.056100] NIP: c000000008010a24 LR: c000000008010a24 CTR: 0000000030033758
[   28.056188] REGS: c0000000efa3b9c0 TRAP: 0901   Not tainted  (4.4.0-24-generic)
[   28.056274] MSR: 9000000100009033 <SF,HV,EE,ME,IR,DR,RI,LE>  CR: 22000844  XER: 20000000
[   28.056499] CFAR: c000000008009958 SOFTE: 1
GPR00: c000000008131e8c c0000000efa3bc40 c0000000095b4200 0000000000000900
GPR04: c0000000094a63c8 0000000000000001 9000000100009033 0000000000000062
GPR08: 0000000000000000 0000000000000000 c0000000ef960400 9000000100001003
GPR12: c00000000806de48 c00000000fb45f00
[   28.057042] NIP [c000000008010a24] arch_local_irq_restore+0x74/0x90
[   28.057117] LR [c000000008010a24] arch_local_irq_restore+0x74/0x90
[   28.057189] Call Trace:
[   28.057221] [c0000000efa3bc40] [c0000000f108a980] 0xc0000000f108a980 (unreliable)
[   28.057326] [c0000000efa3bc60] [c000000008131e8c] irq_finalize_oneshot.part.2+0xbc/0x250
[   28.057429] [c0000000efa3bcb0] [c000000008132170] irq_thread_fn+0x80/0xa0
[   28.057519] [c0000000efa3bcf0] [c00000000813263c] irq_thread+0x1ac/0x280
[   28.057609] [c0000000efa3bd80] [c0000000080e61e0] kthread+0x110/0x130
[   28.057698] [c0000000efa3be30] [c000000008009538] ret_from_kernel_thread+0x5c/0xa4
[   28.057799] Instruction dump:
[   28.057844] 994d02ca 2fa30000 409e0024 e92d0020 61298000 7d210164 38210020 e8010010
[   28.057995] 7c0803a6 4e800020 60420000 4bff17ad <60000000> 4bffffe4 60420000 e92d0020

This patch adds kexec notifier client. It will disable event notification
during kexec. Once host is ready to receive ELOG's again it will call
fsp_opal_resend_pending_logs(). This call re-enables ELOG notication.

It will fix above issue. I will add follow up patch to improve event state.

Signed-off-by: Vasant Hegde <hegdevasant at linux.vnet.ibm.com>
---
 hw/fsp/fsp-elog-read.c | 46 +++++++++++++++++++++++++++++++++-------------
 1 file changed, 33 insertions(+), 13 deletions(-)

diff --git a/hw/fsp/fsp-elog-read.c b/hw/fsp/fsp-elog-read.c
index e9332af..a7897a8 100644
--- a/hw/fsp/fsp-elog-read.c
+++ b/hw/fsp/fsp-elog-read.c
@@ -88,6 +88,8 @@ static uint32_t elog_read_retries;	/* bad response status count */
 /* Initialize the state of the log */
 static enum elog_head_state elog_read_from_fsp_head_state = ELOG_STATE_NONE;
 
+static bool elog_enabled;
+
 /* Need forward declaration because of Circular dependency */
 static void fsp_elog_queue_fetch(void);
 
@@ -128,6 +130,9 @@ static int64_t fsp_send_elog_ack(uint32_t log_id)
 /* retrive error log from FSP with TCE for the data transfer */
 static void fsp_elog_check_and_fetch_head(void)
 {
+	if (!elog_enabled)
+		return;
+
 	lock(&elog_read_lock);
 
 	if (elog_read_from_fsp_head_state != ELOG_STATE_NONE ||
@@ -150,6 +155,10 @@ static void fsp_elog_set_head_state(enum elog_head_state state)
 
 	elog_read_from_fsp_head_state = state;
 
+	/* ELOG disabled */
+	if (!elog_enabled)
+		return;
+
 	if (state == ELOG_STATE_FETCHED_DATA &&
 			old_state != ELOG_STATE_FETCHED_DATA)
 		opal_update_pending_evt(OPAL_EVENT_ERROR_LOG_AVAIL,
@@ -375,7 +384,7 @@ static void elog_reject_head(void)
 {
 	if (elog_read_from_fsp_head_state == ELOG_STATE_FETCHING)
 		fsp_elog_set_head_state(ELOG_STATE_REJECTED);
-	if (elog_read_from_fsp_head_state == ELOG_STATE_FETCHED_DATA)
+	else
 		fsp_elog_set_head_state(ELOG_STATE_NONE);
 }
 
@@ -420,6 +429,10 @@ static void fsp_opal_resend_pending_logs(void)
 {
 	struct fsp_log_entry  *entry;
 
+	lock(&elog_read_lock);
+	elog_enabled = true;
+	unlock(&elog_read_lock);
+
 	/* Check if any Sapphire logs are pending */
 	opal_resend_pending_logs();
 
@@ -435,23 +448,25 @@ static void fsp_opal_resend_pending_logs(void)
 		list_add(&elog_read_pending, &entry->link);
 	}
 
-	/*
-	 * If the current fetched or fetching log doesn't match our
-	 * new pending list head, then reject it
-	 */
-	if (!list_empty(&elog_read_pending)) {
-		entry = list_top(&elog_read_pending,
-					 struct fsp_log_entry, link);
-		if (entry->log_id != elog_head_id)
-			elog_reject_head();
-	}
-
 	unlock(&elog_read_lock);
 
-	/* Read error log from FSP if needed */
+	/* Read error log from FSP */
+	elog_reject_head();
 	fsp_elog_check_and_fetch_head();
 }
 
+/* Disable ELOG event flag until host is ready to receive event */
+static bool opal_kexec_elog_notify(void *data __unused)
+{
+	lock(&elog_read_lock);
+	elog_reject_head();
+	elog_enabled = false;
+	opal_update_pending_evt(OPAL_EVENT_ERROR_LOG_AVAIL, 0);
+	unlock(&elog_read_lock);
+
+	return true;
+}
+
 /* fsp elog notify function  */
 static bool fsp_elog_msg(uint32_t cmd_sub_mod, struct fsp_msg *msg)
 {
@@ -573,9 +588,14 @@ void fsp_elog_read_init(void)
 	if (val != 0)
 		return;
 
+	elog_enabled = true;
+
 	/* register Eror log Class D2 */
 	fsp_register_client(&fsp_get_elog_notify, FSP_MCLASS_ERR_LOG);
 
+	/* Register for sync on host reboot call */
+	opal_add_host_sync_notifier(opal_kexec_elog_notify, NULL);
+
 	/* register opal Interface */
 	opal_register(OPAL_ELOG_READ, fsp_opal_elog_read, 3);
 	opal_register(OPAL_ELOG_ACK, fsp_opal_elog_ack, 1);
-- 
2.5.5



More information about the Skiboot mailing list