[Skiboot] [PATCH] hw/bt: Add backend interface to disable ipmi message retry option

Vasant Hegde hegdevasant at linux.vnet.ibm.com
Wed Feb 6 17:14:13 AEDT 2019


During boot OPAL makes IPMI_GET_BT_CAPS call to BMC to get BT interface
capabilities which includes IPMI message max resend count, message
timeout, etc,. Most of the time OPAL gets response from BMC within
specified timeout. In some corner cases (like mboxd daemon reset in BMC,
BMC reboot, etc) OPAL may not get response within timeout period. In
such scenarios, OPAL resends message until max resend count reaches.

OPAL uses synchronous IPMI message (ipmi_queue_msg_sync()) for few
operations like flash read, write, etc. Thread will wait in OPAL until
it gets response from BMC. In some corner cases like BMC reboot, thread
may wait in OPAL for long time (more than 20 seconds) and results in
kernel hardlockup.

This patch introduces new interface to disable message resend option. We
will disable message resend option for synchrous message. This will
greatly reduces kernel hardlock up issues.

This is short term fix. Long term solution is to convert all synchronous
messages to asynhrounous one.

Signed-off-by: Vasant Hegde <hegdevasant at linux.vnet.ibm.com>
---
 core/ipmi.c    |  2 ++
 hw/bt.c        | 15 ++++++++++++++-
 include/ipmi.h |  1 +
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/core/ipmi.c b/core/ipmi.c
index e5d38a90c..27cb4268f 100644
--- a/core/ipmi.c
+++ b/core/ipmi.c
@@ -176,6 +176,8 @@ void ipmi_queue_msg_sync(struct ipmi_msg *msg)
 	lock(&sync_lock);
 	while (sync_msg);
 	sync_msg = msg;
+	if (msg->backend->disable_retry)
+		msg->backend->disable_retry(msg);
 	ipmi_queue_msg_head(msg);
 	unlock(&sync_lock);
 
diff --git a/hw/bt.c b/hw/bt.c
index 0411d392a..33944af0a 100644
--- a/hw/bt.c
+++ b/hw/bt.c
@@ -96,6 +96,7 @@ struct bt_msg {
 	unsigned long tb;
 	uint8_t seq;
 	uint8_t send_count;
+	bool disable_retry;
 	struct ipmi_msg ipmi_msg;
 };
 
@@ -384,7 +385,8 @@ static void bt_expire_old_msg(uint64_t tb)
 	if (bt_msg && bt_msg->tb > 0 && !chip_quirk(QUIRK_SIMICS) &&
 	    (tb_compare(tb, bt_msg->tb +
 			secs_to_tb(bt.caps.msg_timeout)) == TB_AAFTERB)) {
-		if (bt_msg->send_count <= bt.caps.max_retries) {
+		if (bt_msg->send_count <= bt.caps.max_retries &&
+		    !bt_msg->disable_retry) {
 			/* A message timeout is usually due to the BMC
 			 * clearing the H2B_ATN flag without actually
 			 * doing anything. The data will still be in the
@@ -594,6 +596,16 @@ static void bt_free_ipmi_msg(struct ipmi_msg *ipmi_msg)
 	free(bt_msg);
 }
 
+/*
+ * Do not resend IPMI messages to BMC.
+ */
+static void bt_disable_ipmi_msg_retry(struct ipmi_msg *ipmi_msg)
+{
+	struct bt_msg *bt_msg = container_of(ipmi_msg, struct bt_msg, ipmi_msg);
+
+	bt_msg->disable_retry = true;
+}
+
 /*
  * Remove a message from the queue. The memory allocated for the ipmi message
  * will need to be freed by the caller with bt_free_ipmi_msg() as it will no
@@ -616,6 +628,7 @@ static struct ipmi_backend bt_backend = {
 	.queue_msg = bt_add_ipmi_msg,
 	.queue_msg_head = bt_add_ipmi_msg_head,
 	.dequeue_msg = bt_del_ipmi_msg,
+	.disable_retry = bt_disable_ipmi_msg_retry,
 };
 
 static struct lpc_client bt_lpc_client = {
diff --git a/include/ipmi.h b/include/ipmi.h
index a2735f16b..4999bb5a3 100644
--- a/include/ipmi.h
+++ b/include/ipmi.h
@@ -181,6 +181,7 @@ struct ipmi_backend {
 	int (*queue_msg)(struct ipmi_msg *);
 	int (*queue_msg_head)(struct ipmi_msg *);
 	int (*dequeue_msg)(struct ipmi_msg *);
+	void (*disable_retry)(struct ipmi_msg *);
 };
 
 extern struct ipmi_backend *ipmi_backend;
-- 
2.14.3



More information about the Skiboot mailing list