[PATCH linux dev-4.13 03/10] fsi/fsi-master-gpio: Implement CRC error recovery
Christopher Bostic
cbostic at linux.vnet.ibm.com
Fri May 25 01:05:15 AEST 2018
Reviewed-by: Christopher Bostic <cbostic at linux.vnet.ibm.com>
On 5/24/18 12:14 AM, Benjamin Herrenschmidt wrote:
> The FSI protocol defines two modes of recovery from CRC errors,
> this implements both:
>
> - If the device returns an ECRC (it detected a CRC error in the
> command), then we simply issue the command again.
>
> - If the master detects a CRC error in the response, we send
> an E_POLL command which requests a resend of the response
> without actually re-executing the command (which could otherwise
> have unwanted side effects such as dequeuing a FIFO twice).
>
> Signed-off-by: Benjamin Herrenschmidt <benh at kernel.crashing.org>
> ---
>
> Note: This was actually tested by removing some of my fixes, thus
> causing us to hit occasional CRC errors during high LPC activity.
> ---
> drivers/fsi/fsi-master-gpio.c | 90 ++++++++++++++++++++------
> include/trace/events/fsi_master_gpio.h | 27 ++++++++
> 2 files changed, 99 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/fsi/fsi-master-gpio.c b/drivers/fsi/fsi-master-gpio.c
> index 0a6799bda294..351c12f2ac55 100644
> --- a/drivers/fsi/fsi-master-gpio.c
> +++ b/drivers/fsi/fsi-master-gpio.c
> @@ -22,20 +22,23 @@
> #define FSI_BREAK_CLOCKS 256 /* Number of clocks to issue break */
> #define FSI_POST_BREAK_CLOCKS 16000 /* Number clocks to set up cfam */
> #define FSI_INIT_CLOCKS 5000 /* Clock out any old data */
> +#define FSI_GPIO_DPOLL_CLOCKS 50 /* < 21 will cause slave to hang */
> +#define FSI_GPIO_EPOLL_CLOCKS 50 /* Number of clocks for E_POLL retry */
> #define FSI_GPIO_STD_DELAY 10 /* Standard GPIO delay in nS */
> /* todo: adjust down as low as */
> /* possible or eliminate */
> +#define FSI_CRC_ERR_RETRIES 10
> +
> #define FSI_GPIO_CMD_DPOLL 0x2
> +#define FSI_GPIO_CMD_EPOLL 0x3
> #define FSI_GPIO_CMD_TERM 0x3f
> #define FSI_GPIO_CMD_ABS_AR 0x4
> #define FSI_GPIO_CMD_REL_AR 0x5
> #define FSI_GPIO_CMD_SAME_AR 0x3 /* but only a 2-bit opcode... */
>
> -
> -#define FSI_GPIO_DPOLL_CLOCKS 50 /* < 21 will cause slave to hang */
> -
> -/* Bus errors */
> -#define FSI_GPIO_ERR_BUSY 1 /* Slave stuck in busy state */
> +/* Slave responses */
> +#define FSI_GPIO_RESP_ACK 0 /* Success */
> +#define FSI_GPIO_RESP_BUSY 1 /* Slave busy */
> #define FSI_GPIO_RESP_ERRA 2 /* Any (misc) Error */
> #define FSI_GPIO_RESP_ERRC 3 /* Slave reports master CRC error */
> #define FSI_GPIO_MTOE 4 /* Master time out error */
> @@ -330,6 +333,16 @@ static void build_dpoll_command(struct fsi_gpio_msg *cmd, uint8_t slave_id)
> msg_push_crc(cmd);
> }
>
> +static void build_epoll_command(struct fsi_gpio_msg *cmd, uint8_t slave_id)
> +{
> + cmd->bits = 0;
> + cmd->msg = 0;
> +
> + msg_push_bits(cmd, slave_id, 2);
> + msg_push_bits(cmd, FSI_GPIO_CMD_EPOLL, 3);
> + msg_push_crc(cmd);
> +}
> +
> static void echo_delay(struct fsi_master_gpio *master)
> {
> set_sda_output(master, 1);
> @@ -355,6 +368,12 @@ static void fsi_master_gpio_error(struct fsi_master_gpio *master, int error)
>
> }
>
> +/*
> + * Note: callers rely specifically on this returning -EAGAIN for
> + * a CRC error detected in the response. Use other error code
> + * for other situations. It will be converted to something else
> + * higher up the stack before it reaches userspace.
> + */
> static int read_one_response(struct fsi_master_gpio *master,
> uint8_t data_size, struct fsi_gpio_msg *msgp, uint8_t *tagp)
> {
> @@ -379,7 +398,7 @@ static int read_one_response(struct fsi_master_gpio *master,
> "Master time out waiting for response\n");
> fsi_master_gpio_error(master, FSI_GPIO_MTOE);
> spin_unlock_irqrestore(&master->bit_lock, flags);
> - return -EIO;
> + return -ETIMEDOUT;
> }
>
> msg.bits = 0;
> @@ -405,7 +424,7 @@ static int read_one_response(struct fsi_master_gpio *master,
> if (crc) {
> dev_dbg(master->dev, "ERR response CRC\n");
> fsi_master_gpio_error(master, FSI_GPIO_CRC_INVAL);
> - return -EIO;
> + return -EAGAIN;
> }
>
> if (msgp)
> @@ -451,11 +470,33 @@ static int poll_for_response(struct fsi_master_gpio *master,
> unsigned long flags;
> uint8_t tag;
> uint8_t *data_byte = data;
> -
> + int crc_err_retries = 0;
> retry:
> rc = read_one_response(master, size, &response, &tag);
> - if (rc)
> - return rc;
> +
> + /* Handle retries on CRC errors */
> + if (rc == -EAGAIN) {
> + /* Too many retries ? */
> + if (crc_err_retries++ > FSI_CRC_ERR_RETRIES) {
> + /*
> + * Pass it up as a -EIO otherwise upper level will retry
> + * the whole command which isn't what we want here.
> + */
> + rc = -EIO;
> + goto fail;
> + }
> + dev_dbg(master->dev,
> + "CRC error retry %d\n", crc_err_retries);
> + trace_fsi_master_gpio_crc_rsp_error(master);
> + build_epoll_command(&cmd, slave);
> + spin_lock_irqsave(&master->bit_lock, flags);
> + clock_zeros(master, FSI_GPIO_EPOLL_CLOCKS);
> + serial_out(master, &cmd);
> + echo_delay(master);
> + spin_unlock_irqrestore(&master->bit_lock, flags);
> + goto retry;
> + } else if (rc)
> + goto fail;
>
> switch (tag) {
> case FSI_GPIO_RESP_ACK:
> @@ -496,18 +537,21 @@ static int poll_for_response(struct fsi_master_gpio *master,
> break;
>
> case FSI_GPIO_RESP_ERRA:
> - case FSI_GPIO_RESP_ERRC:
> - dev_dbg(master->dev, "ERR%c received: 0x%x\n",
> - tag == FSI_GPIO_RESP_ERRA ? 'A' : 'C',
> - (int)response.msg);
> + dev_dbg(master->dev, "ERRA received: 0x%x\n", (int)response.msg);
> fsi_master_gpio_error(master, response.msg);
> rc = -EIO;
> break;
> + case FSI_GPIO_RESP_ERRC:
> + dev_dbg(master->dev, "ERRC received: 0x%x\n", (int)response.msg);
> + fsi_master_gpio_error(master, response.msg);
> + trace_fsi_master_gpio_crc_cmd_error(master);
> + rc = -EAGAIN;
> + break;
> }
>
> if (busy_count > 0)
> trace_fsi_master_gpio_poll_response_busy(master, busy_count);
> -
> + fail:
> /* Clock the slave enough to be ready for next operation */
> spin_lock_irqsave(&master->bit_lock, flags);
> clock_zeros(master, FSI_GPIO_PRIME_SLAVE_CLOCKS);
> @@ -536,11 +580,21 @@ static int send_request(struct fsi_master_gpio *master,
> static int fsi_master_gpio_xfer(struct fsi_master_gpio *master, uint8_t slave,
> struct fsi_gpio_msg *cmd, size_t resp_len, void *resp)
> {
> - int rc;
> + int rc = -EAGAIN, retries = 0;
>
> - rc = send_request(master, cmd);
> - if (!rc)
> + while ((retries++) < FSI_CRC_ERR_RETRIES) {
> + rc = send_request(master, cmd);
> + if (rc)
> + break;
> rc = poll_for_response(master, slave, resp_len, resp);
> + if (rc != -EAGAIN)
> + break;
> + rc = -EIO;
> + dev_warn(master->dev, "ECRC retry %d\n", retries);
> +
> + /* Pace it a bit before retry */
> + msleep(1);
> + }
>
> return rc;
> }
> diff --git a/include/trace/events/fsi_master_gpio.h b/include/trace/events/fsi_master_gpio.h
> index 48e83e5755f4..e18b94ce85b2 100644
> --- a/include/trace/events/fsi_master_gpio.h
> +++ b/include/trace/events/fsi_master_gpio.h
> @@ -63,6 +63,33 @@ TRACE_EVENT(fsi_master_gpio_break,
> )
> );
>
> +TRACE_EVENT(fsi_master_gpio_crc_cmd_error,
> + TP_PROTO(const struct fsi_master_gpio *master),
> + TP_ARGS(master),
> + TP_STRUCT__entry(
> + __field(int, master_idx)
> + ),
> + TP_fast_assign(
> + __entry->master_idx = master->master.idx;
> + ),
> + TP_printk("fsi-gpio%d ----CRC command retry---",
> + __entry->master_idx
> + )
> +);
> +
> +TRACE_EVENT(fsi_master_gpio_crc_rsp_error,
> + TP_PROTO(const struct fsi_master_gpio *master),
> + TP_ARGS(master),
> + TP_STRUCT__entry(
> + __field(int, master_idx)
> + ),
> + TP_fast_assign(
> + __entry->master_idx = master->master.idx;
> + ),
> + TP_printk("fsi-gpio%d ----CRC response---",
> + __entry->master_idx
> + )
> +);
>
> TRACE_EVENT(fsi_master_gpio_poll_response_busy,
> TP_PROTO(const struct fsi_master_gpio *master, int busy),
More information about the openbmc
mailing list