[Skiboot] [PATCH v2 1/6] FSP/ELOG: Disable event notification during kexec
Vasant Hegde
hegdevasant at linux.vnet.ibm.com
Sun Jul 3 01:33:38 AEST 2016
ELOG enables event notification once new log is available. And this
will be disabled after host completes reading logs (it has to complete
both fsp_opal_elog_info and fsp_opal_elog_read).
In some corner cases like kexec, host may endup reading same ELOG id twice
(calling fsp_opal_elog_info twice because of resend request). Host finds it
as duplicate and it will not read actual log (fsp_opal_elog_read()). In such
situations we fails to disable event notification :-(
Scenario :
OPAL Host
-------------------------------------
OPAL_EVENT_ELOG_AVAIL --> kexec
OPAL_EVENT_ELOG_AVAIL --> elog client registered
<-- read ELOG (id=x)
<-- resend elog (opal_resend_pending_logs())
resend all ELOG --> read ELOG (id=x) -- Duplicate ELOG !
bhoom!!
kernel call trace:
------------------
[ 28.055923] CPU: 10 PID: 20 Comm: irq/29-opal-elo Not tainted 4.4.0-24-generic #43-Ubuntu
[ 28.056012] task: c0000000ef982a20 ti: c0000000efa38000 task.ti: c0000000efa38000
[ 28.056100] NIP: c000000008010a24 LR: c000000008010a24 CTR: 0000000030033758
[ 28.056188] REGS: c0000000efa3b9c0 TRAP: 0901 Not tainted (4.4.0-24-generic)
[ 28.056274] MSR: 9000000100009033 <SF,HV,EE,ME,IR,DR,RI,LE> CR: 22000844 XER: 20000000
[ 28.056499] CFAR: c000000008009958 SOFTE: 1
GPR00: c000000008131e8c c0000000efa3bc40 c0000000095b4200 0000000000000900
GPR04: c0000000094a63c8 0000000000000001 9000000100009033 0000000000000062
GPR08: 0000000000000000 0000000000000000 c0000000ef960400 9000000100001003
GPR12: c00000000806de48 c00000000fb45f00
[ 28.057042] NIP [c000000008010a24] arch_local_irq_restore+0x74/0x90
[ 28.057117] LR [c000000008010a24] arch_local_irq_restore+0x74/0x90
[ 28.057189] Call Trace:
[ 28.057221] [c0000000efa3bc40] [c0000000f108a980] 0xc0000000f108a980 (unreliable)
[ 28.057326] [c0000000efa3bc60] [c000000008131e8c] irq_finalize_oneshot.part.2+0xbc/0x250
[ 28.057429] [c0000000efa3bcb0] [c000000008132170] irq_thread_fn+0x80/0xa0
[ 28.057519] [c0000000efa3bcf0] [c00000000813263c] irq_thread+0x1ac/0x280
[ 28.057609] [c0000000efa3bd80] [c0000000080e61e0] kthread+0x110/0x130
[ 28.057698] [c0000000efa3be30] [c000000008009538] ret_from_kernel_thread+0x5c/0xa4
[ 28.057799] Instruction dump:
[ 28.057844] 994d02ca 2fa30000 409e0024 e92d0020 61298000 7d210164 38210020 e8010010
[ 28.057995] 7c0803a6 4e800020 60420000 4bff17ad <60000000> 4bffffe4 60420000 e92d0020
This patch adds kexec notifier client. It will disable event notification
during kexec. Once host is ready to receive ELOG's again it will call
fsp_opal_resend_pending_logs(). This call re-enables ELOG notication.
It will fix above issue. I will add follow up patch to improve event state.
Signed-off-by: Vasant Hegde <hegdevasant at linux.vnet.ibm.com>
---
hw/fsp/fsp-elog-read.c | 46 +++++++++++++++++++++++++++++++++-------------
1 file changed, 33 insertions(+), 13 deletions(-)
diff --git a/hw/fsp/fsp-elog-read.c b/hw/fsp/fsp-elog-read.c
index e9332af..a7897a8 100644
--- a/hw/fsp/fsp-elog-read.c
+++ b/hw/fsp/fsp-elog-read.c
@@ -88,6 +88,8 @@ static uint32_t elog_read_retries; /* bad response status count */
/* Initialize the state of the log */
static enum elog_head_state elog_read_from_fsp_head_state = ELOG_STATE_NONE;
+static bool elog_enabled;
+
/* Need forward declaration because of Circular dependency */
static void fsp_elog_queue_fetch(void);
@@ -128,6 +130,9 @@ static int64_t fsp_send_elog_ack(uint32_t log_id)
/* retrive error log from FSP with TCE for the data transfer */
static void fsp_elog_check_and_fetch_head(void)
{
+ if (!elog_enabled)
+ return;
+
lock(&elog_read_lock);
if (elog_read_from_fsp_head_state != ELOG_STATE_NONE ||
@@ -150,6 +155,10 @@ static void fsp_elog_set_head_state(enum elog_head_state state)
elog_read_from_fsp_head_state = state;
+ /* ELOG disabled */
+ if (!elog_enabled)
+ return;
+
if (state == ELOG_STATE_FETCHED_DATA &&
old_state != ELOG_STATE_FETCHED_DATA)
opal_update_pending_evt(OPAL_EVENT_ERROR_LOG_AVAIL,
@@ -375,7 +384,7 @@ static void elog_reject_head(void)
{
if (elog_read_from_fsp_head_state == ELOG_STATE_FETCHING)
fsp_elog_set_head_state(ELOG_STATE_REJECTED);
- if (elog_read_from_fsp_head_state == ELOG_STATE_FETCHED_DATA)
+ else
fsp_elog_set_head_state(ELOG_STATE_NONE);
}
@@ -420,6 +429,10 @@ static void fsp_opal_resend_pending_logs(void)
{
struct fsp_log_entry *entry;
+ lock(&elog_read_lock);
+ elog_enabled = true;
+ unlock(&elog_read_lock);
+
/* Check if any Sapphire logs are pending */
opal_resend_pending_logs();
@@ -435,23 +448,25 @@ static void fsp_opal_resend_pending_logs(void)
list_add(&elog_read_pending, &entry->link);
}
- /*
- * If the current fetched or fetching log doesn't match our
- * new pending list head, then reject it
- */
- if (!list_empty(&elog_read_pending)) {
- entry = list_top(&elog_read_pending,
- struct fsp_log_entry, link);
- if (entry->log_id != elog_head_id)
- elog_reject_head();
- }
-
unlock(&elog_read_lock);
- /* Read error log from FSP if needed */
+ /* Read error log from FSP */
+ elog_reject_head();
fsp_elog_check_and_fetch_head();
}
+/* Disable ELOG event flag until host is ready to receive event */
+static bool opal_kexec_elog_notify(void *data __unused)
+{
+ lock(&elog_read_lock);
+ elog_reject_head();
+ elog_enabled = false;
+ opal_update_pending_evt(OPAL_EVENT_ERROR_LOG_AVAIL, 0);
+ unlock(&elog_read_lock);
+
+ return true;
+}
+
/* fsp elog notify function */
static bool fsp_elog_msg(uint32_t cmd_sub_mod, struct fsp_msg *msg)
{
@@ -573,9 +588,14 @@ void fsp_elog_read_init(void)
if (val != 0)
return;
+ elog_enabled = true;
+
/* register Eror log Class D2 */
fsp_register_client(&fsp_get_elog_notify, FSP_MCLASS_ERR_LOG);
+ /* Register for sync on host reboot call */
+ opal_add_host_sync_notifier(opal_kexec_elog_notify, NULL);
+
/* register opal Interface */
opal_register(OPAL_ELOG_READ, fsp_opal_elog_read, 3);
opal_register(OPAL_ELOG_ACK, fsp_opal_elog_ack, 1);
--
2.5.5
More information about the Skiboot
mailing list