[PATCH linux dev-4.13 03/10] fsi/fsi-master-gpio: Implement CRC error recovery

Benjamin Herrenschmidt benh at kernel.crashing.org
Thu May 24 15:14:22 AEST 2018


The FSI protocol defines two modes of recovery from CRC errors,
this implements both:

 - If the device returns an ECRC (it detected a CRC error in the
   command), then we simply issue the command again.

 - If the master detects a CRC error in the response, we send
   an E_POLL command which requests a resend of the response
   without actually re-executing the command (which could otherwise
   have unwanted side effects such as dequeuing a FIFO twice).

Signed-off-by: Benjamin Herrenschmidt <benh at kernel.crashing.org>
---

Note: This was actually tested by removing some of my fixes, thus
causing us to hit occasional CRC errors during high LPC activity.
---
 drivers/fsi/fsi-master-gpio.c          | 90 ++++++++++++++++++++------
 include/trace/events/fsi_master_gpio.h | 27 ++++++++
 2 files changed, 99 insertions(+), 18 deletions(-)

diff --git a/drivers/fsi/fsi-master-gpio.c b/drivers/fsi/fsi-master-gpio.c
index 0a6799bda294..351c12f2ac55 100644
--- a/drivers/fsi/fsi-master-gpio.c
+++ b/drivers/fsi/fsi-master-gpio.c
@@ -22,20 +22,23 @@
 #define	FSI_BREAK_CLOCKS	256	/* Number of clocks to issue break */
 #define	FSI_POST_BREAK_CLOCKS	16000	/* Number clocks to set up cfam */
 #define	FSI_INIT_CLOCKS		5000	/* Clock out any old data */
+#define	FSI_GPIO_DPOLL_CLOCKS	50      /* < 21 will cause slave to hang */
+#define	FSI_GPIO_EPOLL_CLOCKS	50      /* Number of clocks for E_POLL retry */
 #define	FSI_GPIO_STD_DELAY	10	/* Standard GPIO delay in nS */
 					/* todo: adjust down as low as */
 					/* possible or eliminate */
+#define FSI_CRC_ERR_RETRIES	10
+
 #define	FSI_GPIO_CMD_DPOLL      0x2
+#define	FSI_GPIO_CMD_EPOLL      0x3
 #define	FSI_GPIO_CMD_TERM	0x3f
 #define FSI_GPIO_CMD_ABS_AR	0x4
 #define FSI_GPIO_CMD_REL_AR	0x5
 #define FSI_GPIO_CMD_SAME_AR	0x3	/* but only a 2-bit opcode... */
 
-
-#define	FSI_GPIO_DPOLL_CLOCKS	50      /* < 21 will cause slave to hang */
-
-/* Bus errors */
-#define	FSI_GPIO_ERR_BUSY	1	/* Slave stuck in busy state */
+/* Slave responses */
+#define	FSI_GPIO_RESP_ACK	0	/* Success */
+#define	FSI_GPIO_RESP_BUSY	1	/* Slave busy */
 #define	FSI_GPIO_RESP_ERRA	2	/* Any (misc) Error */
 #define	FSI_GPIO_RESP_ERRC	3	/* Slave reports master CRC error */
 #define	FSI_GPIO_MTOE		4	/* Master time out error */
@@ -330,6 +333,16 @@ static void build_dpoll_command(struct fsi_gpio_msg *cmd, uint8_t slave_id)
 	msg_push_crc(cmd);
 }
 
+static void build_epoll_command(struct fsi_gpio_msg *cmd, uint8_t slave_id)
+{
+	cmd->bits = 0;
+	cmd->msg = 0;
+
+	msg_push_bits(cmd, slave_id, 2);
+	msg_push_bits(cmd, FSI_GPIO_CMD_EPOLL, 3);
+	msg_push_crc(cmd);
+}
+
 static void echo_delay(struct fsi_master_gpio *master)
 {
 	set_sda_output(master, 1);
@@ -355,6 +368,12 @@ static void fsi_master_gpio_error(struct fsi_master_gpio *master, int error)
 
 }
 
+/*
+ * Note: callers rely specifically on this returning -EAGAIN for
+ * a CRC error detected in the response. Use other error code
+ * for other situations. It will be converted to something else
+ * higher up the stack before it reaches userspace.
+ */
 static int read_one_response(struct fsi_master_gpio *master,
 		uint8_t data_size, struct fsi_gpio_msg *msgp, uint8_t *tagp)
 {
@@ -379,7 +398,7 @@ static int read_one_response(struct fsi_master_gpio *master,
 			"Master time out waiting for response\n");
 		fsi_master_gpio_error(master, FSI_GPIO_MTOE);
 		spin_unlock_irqrestore(&master->bit_lock, flags);
-		return -EIO;
+		return -ETIMEDOUT;
 	}
 
 	msg.bits = 0;
@@ -405,7 +424,7 @@ static int read_one_response(struct fsi_master_gpio *master,
 	if (crc) {
 		dev_dbg(master->dev, "ERR response CRC\n");
 		fsi_master_gpio_error(master, FSI_GPIO_CRC_INVAL);
-		return -EIO;
+		return -EAGAIN;
 	}
 
 	if (msgp)
@@ -451,11 +470,33 @@ static int poll_for_response(struct fsi_master_gpio *master,
 	unsigned long flags;
 	uint8_t tag;
 	uint8_t *data_byte = data;
-
+	int crc_err_retries = 0;
 retry:
 	rc = read_one_response(master, size, &response, &tag);
-	if (rc)
-		return rc;
+
+	/* Handle retries on CRC errors */
+	if (rc == -EAGAIN) {
+		/* Too many retries ? */
+		if (crc_err_retries++ > FSI_CRC_ERR_RETRIES) {
+			/*
+			 * Pass it up as a -EIO otherwise upper level will retry
+			 * the whole command which isn't what we want here.
+			 */
+			rc = -EIO;
+			goto fail;
+		}
+		dev_dbg(master->dev,
+			 "CRC error retry %d\n", crc_err_retries);
+		trace_fsi_master_gpio_crc_rsp_error(master);
+		build_epoll_command(&cmd, slave);
+		spin_lock_irqsave(&master->bit_lock, flags);
+		clock_zeros(master, FSI_GPIO_EPOLL_CLOCKS);
+		serial_out(master, &cmd);
+		echo_delay(master);
+		spin_unlock_irqrestore(&master->bit_lock, flags);
+		goto retry;
+	} else if (rc)
+		goto fail;
 
 	switch (tag) {
 	case FSI_GPIO_RESP_ACK:
@@ -496,18 +537,21 @@ static int poll_for_response(struct fsi_master_gpio *master,
 		break;
 
 	case FSI_GPIO_RESP_ERRA:
-	case FSI_GPIO_RESP_ERRC:
-		dev_dbg(master->dev, "ERR%c received: 0x%x\n",
-			tag == FSI_GPIO_RESP_ERRA ? 'A' : 'C',
-			(int)response.msg);
+		dev_dbg(master->dev, "ERRA received: 0x%x\n", (int)response.msg);
 		fsi_master_gpio_error(master, response.msg);
 		rc = -EIO;
 		break;
+	case FSI_GPIO_RESP_ERRC:
+		dev_dbg(master->dev, "ERRC received: 0x%x\n", (int)response.msg);
+		fsi_master_gpio_error(master, response.msg);
+		trace_fsi_master_gpio_crc_cmd_error(master);
+		rc = -EAGAIN;
+		break;
 	}
 
 	if (busy_count > 0)
 		trace_fsi_master_gpio_poll_response_busy(master, busy_count);
-
+ fail:
 	/* Clock the slave enough to be ready for next operation */
 	spin_lock_irqsave(&master->bit_lock, flags);
 	clock_zeros(master, FSI_GPIO_PRIME_SLAVE_CLOCKS);
@@ -536,11 +580,21 @@ static int send_request(struct fsi_master_gpio *master,
 static int fsi_master_gpio_xfer(struct fsi_master_gpio *master, uint8_t slave,
 		struct fsi_gpio_msg *cmd, size_t resp_len, void *resp)
 {
-	int rc;
+	int rc = -EAGAIN, retries = 0;
 
-	rc = send_request(master, cmd);
-	if (!rc)
+	while ((retries++) < FSI_CRC_ERR_RETRIES) {
+		rc = send_request(master, cmd);
+		if (rc)
+			break;
 		rc = poll_for_response(master, slave, resp_len, resp);
+		if (rc != -EAGAIN)
+			break;
+		rc = -EIO;
+		dev_warn(master->dev, "ECRC retry %d\n", retries);
+
+		/* Pace it a bit before retry */
+		msleep(1);
+	}
 
 	return rc;
 }
diff --git a/include/trace/events/fsi_master_gpio.h b/include/trace/events/fsi_master_gpio.h
index 48e83e5755f4..e18b94ce85b2 100644
--- a/include/trace/events/fsi_master_gpio.h
+++ b/include/trace/events/fsi_master_gpio.h
@@ -63,6 +63,33 @@ TRACE_EVENT(fsi_master_gpio_break,
 	)
 );
 
+TRACE_EVENT(fsi_master_gpio_crc_cmd_error,
+	TP_PROTO(const struct fsi_master_gpio *master),
+	TP_ARGS(master),
+	TP_STRUCT__entry(
+		__field(int,	master_idx)
+	),
+	TP_fast_assign(
+		__entry->master_idx = master->master.idx;
+	),
+	TP_printk("fsi-gpio%d ----CRC command retry---",
+		__entry->master_idx
+	)
+);
+
+TRACE_EVENT(fsi_master_gpio_crc_rsp_error,
+	TP_PROTO(const struct fsi_master_gpio *master),
+	TP_ARGS(master),
+	TP_STRUCT__entry(
+		__field(int,	master_idx)
+	),
+	TP_fast_assign(
+		__entry->master_idx = master->master.idx;
+	),
+	TP_printk("fsi-gpio%d ----CRC response---",
+		__entry->master_idx
+	)
+);
 
 TRACE_EVENT(fsi_master_gpio_poll_response_busy,
 	TP_PROTO(const struct fsi_master_gpio *master, int busy),
-- 
2.17.0



More information about the openbmc mailing list