[Skiboot] [PATCH] FSP: Do not process incoming FSP messages in fsp_sync_msg path

Tue Mar 10 17:22:24 AEDT 2015

Presently we process incoming messages from FSP all the time.
Our fsp_fetch_data function holds lock and calls fsp_sync_msg.
Due to our callback mechanism its possible that in some cases
we may hit recursive lock issue in fsp_fetch_data.

Sample code path:
  fsp_fetch_data ->
     fsp_sync_msg ->
       Incoming message (ex: OCC LID load) ->
         notify registered client (ex: fsp_occ_msg) ->
	   fsp_fetch_data -> Lock issue!

This patch adds support to queue incoming messages in fsp_fetch_data
path. That way we are sure that we will not hit above scenario. Also
I have changed locking code in fsp_fetch_data. Now we just hold lock
to update flag.

Note:
  We still have one lock issue in poller path. (We held capi_lock in
  capp_lid_download and call opal_run_pollers via load_resources).

  [4108781364,3] Running pollers with lock held !
  CPU 0830 Backtrace:
  S: 0000000033ac3800 R: 0000000030013520   .backtrace+0x2c
  S: 0000000033ac3890 R: 0000000030017dd8   .opal_run_pollers+0x44
  S: 0000000033ac3910 R: 0000000030046190   .fsp_sync_msg+0x40
  S: 0000000033ac39a0 R: 0000000030047c10   .fsp_fetch_data+0x188
  S: 0000000033ac3a90 R: 0000000030047ed8   .fsp_load_resource+0x12c
  S: 0000000033ac3b50 R: 0000000030021bb0   .load_resource+0x38
  S: 0000000033ac3bc0 R: 000000003003b610   .capp_load_ucode+0x168
  S: 0000000033ac3c90 R: 0000000030041140   .probe_phb3+0x1224
  S: 0000000033ac3e30 R: 000000003001420c   .main_cpu_entry+0x444
  S: 0000000033ac3f00 R: 0000000030002524   boot_entry+0x18c

Signed-off-by: Vasant Hegde <hegdevasant at linux.vnet.ibm.com>
---
 hw/fsp/fsp.c |   71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 70 insertions(+), 1 deletion(-)

diff --git a/hw/fsp/fsp.c b/hw/fsp/fsp.c
index fdf175a..c73a193 100644
--- a/hw/fsp/fsp.c
+++ b/hw/fsp/fsp.c
@@ -117,6 +117,13 @@ static u32 disr_last_print;
 static u32 drcr_last_print;
 static u32 hstate_last_print;
 
+/*
+ * Keep track of incoming messages during sychronous FSP call from
+ * fsp_fetch_data and process them later.
+ */
+static LIST_HEAD(fsp_msg_list);
+static bool fsp_handle_cmd_ready = true;
+
 void fsp_handle_resp(struct fsp_msg *msg);
 
 struct fsp_cmdclass {
@@ -1254,7 +1261,7 @@ static bool fsp_local_command(u32 cmd_sub_mod, struct fsp_msg *msg)
 
 
 /* This is called without the FSP lock */
-static void fsp_handle_command(struct fsp_msg *msg)
+static void __fsp_handle_command(struct fsp_msg *msg)
 {
 	struct fsp_cmdclass *cmdclass = fsp_get_cmdclass(msg);
 	struct fsp_client *client, *next;
@@ -1300,6 +1307,49 @@ static void fsp_handle_command(struct fsp_msg *msg)
 	fsp_freemsg(msg);
 }
 
+/*
+ * Queue incoming messages, if we are in the middle of
+ * fsp_sync_msg call
+ */
+static void fsp_handle_command(struct fsp_msg *msg)
+{
+	u32 cmd_sub_mod;
+
+	/*
+	 * Check if we are ready to handle command. If not, queue
+	 * incoming message.
+	 *
+	 * XXX: Note that we use same link (msg->link) in fsp_queue_msg()
+	 *      as well. But these are two different path. Hence we are
+	 *      fine here.
+	 */
+	if (!fsp_handle_cmd_ready) {
+		cmd_sub_mod =  (msg->word0 & 0xff) << 16;
+		cmd_sub_mod |= (msg->word1 & 0xff) << 8;
+		cmd_sub_mod |= (msg->word1 >> 8) & 0xff;
+		prlog(PR_TRACE, "FSP: Queued incoming message [%x]\n",
+		      cmd_sub_mod);
+
+		list_add_tail(&fsp_msg_list, &msg->link);
+		return;
+	}
+
+	__fsp_handle_command(msg);
+}
+
+static void fsp_poke_cmd_queue(void)
+{
+	struct fsp_msg *msg;
+
+	if (list_empty(&fsp_msg_list))
+		return;
+
+	list_for_each(&fsp_msg_list, msg, link) {
+		list_del(&msg->link);
+		__fsp_handle_command(msg);
+	}
+}
+
 static void __fsp_fill_incoming(struct fsp *fsp, struct fsp_msg *msg,
 				int dlen, u32 w0, u32 w1)
 {
@@ -2166,6 +2216,19 @@ int fsp_fetch_data(uint8_t flags, uint16_t id, uint32_t sub_id,
 	 */
 	lock(&fsp_fetch_lock);
 
+	/*
+	 * XXX: This means we are re-entering this code. Ideally this
+	 * should never happen.
+	 */
+	if (!fsp_handle_cmd_ready) {
+		prlog(PR_ERR, "FSP: Re-entering fsp_fetch_data!\n");
+		unlock(&fsp_fetch_lock);
+		return -EBUSY;
+	}
+
+	fsp_handle_cmd_ready = false;
+	unlock(&fsp_fetch_lock);
+
 	while(remaining) {
 		uint32_t chunk, taddr, woffset, wlen;
 		uint8_t rc;
@@ -2201,7 +2264,10 @@ int fsp_fetch_data(uint8_t flags, uint16_t id, uint32_t sub_id,
 
 		/* XXX Is flash busy (0x3f) a reason for retry ? */
 		if (rc != 0 && rc != 2) {
+			lock(&fsp_fetch_lock);
+			fsp_handle_cmd_ready = true;
 			unlock(&fsp_fetch_lock);
+			fsp_poke_cmd_queue();
 			return -EIO;
 		}
 
@@ -2218,10 +2284,13 @@ int fsp_fetch_data(uint8_t flags, uint16_t id, uint32_t sub_id,
 		if (wlen < chunk)
 			break;
 	}
+	lock(&fsp_fetch_lock);
+	fsp_handle_cmd_ready = true;
 	unlock(&fsp_fetch_lock);
 
 	*length = total;
 
+	fsp_poke_cmd_queue();
 	return 0;
 }