[Skiboot] [PATCH 2/3] uart: Workaround for unresponsive console

Nicholas Piggin npiggin at gmail.com
Thu May 21 00:09:41 AEST 2020


Excerpts from Vasant Hegde's message of May 20, 2020 9:51 pm:
> We use 4K internal output buffer for host console write. Whenever kernel
> makes opal_console_write() call it first write to internal buffer and
> then calls uart_con_flush() to flush data to UART console.
> 
> If kernel is doing heavy console write operation and BMC becomes unresponsive
> (like BMC reboot) then our internal buffer becomes full. Kernel will keep
> on retrying write operation. This will result in kernel lockups.

Would this be better to be done in OS, or not?

> This patch introduces new timer. It starts timer whenever our write
> operation to UART is not making progress. If situation doesn't improve
> within specified time (10 seconds) then it will send error back to
> kernel.
> 
> Signed-off-by: Vasant Hegde <hegdevasant at linux.vnet.ibm.com>
> ---
>  hw/lpc-uart.c | 60 +++++++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 54 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/lpc-uart.c b/hw/lpc-uart.c
> index ed56a4bdc..672d4612b 100644
> --- a/hw/lpc-uart.c
> +++ b/hw/lpc-uart.c
> @@ -217,6 +217,11 @@ static uint8_t *out_buf;
>  static uint32_t out_buf_prod;
>  static uint32_t out_buf_cons;
>  
> +#define UART_BUFFER_OUT_TIMEOUT  10
> +static uint64_t out_buf_timeout;
> +static uint64_t out_buf_prev_len;
> +static uint64_t con_flush_timeout;
> +
>  /* Asynchronous flush, uart_lock must be held */
>  static int64_t uart_con_flush(void)
>  {
> @@ -246,14 +251,31 @@ static int64_t uart_con_flush(void)
>  	}
>  	if (tx_full != tx_was_full)
>  		uart_update_ier();
> +
>  	if (out_buf_prod != out_buf_cons) {
> -		/* Return busy if nothing was flushed this call */
> -		if (out_buf_cons == out_buf_cons_initial)
> -			return OPAL_BUSY;
>  		/* Return partial if there's more to flush */
> -		return OPAL_PARTIAL;
> +		if (out_buf_cons != out_buf_cons_initial) {
> +			con_flush_timeout = 0;
> +			return OPAL_PARTIAL;
> +		}
> +
> +		/*
> +		 * Nothing was flushed. Start internal timer. We will continue
> +		 * returning BUSY until timeout happens, hoping BMC will consume
> +		 * data within timeout period.
> +		 */
> +		if (con_flush_timeout == 0) {
> +			con_flush_timeout = mftb() +
> +				secs_to_tb(UART_BUFFER_OUT_TIMEOUT);
> +		}
> +
> +		if (tb_compare(mftb(), con_flush_timeout) != TB_AAFTERB)
> +			return OPAL_BUSY;
> +
> +		return OPAL_INTERNAL_ERROR;

Would OPAL_TIMEOUT be a better error code? Internal means something in 
skiboot has gone wrong or is inconsistent doesn't it, and is pretty 
broad.

>  	}
>  
> +	con_flush_timeout = 0;
>  	return OPAL_SUCCESS;
>  }
>  
> @@ -273,6 +295,11 @@ static int64_t uart_opal_write(int64_t term_number, __be64 *__length,
>  
>  	lock(&uart_lock);
>  
> +	if (!uart_tx_buf_space()) {
> +		unlock(&uart_lock);
> +		return OPAL_INTERNAL_ERROR;
> +	}

If the uart is full from a previous write, it's possible there would 
have been no buffer space freed up and then you could get an error here 
I think?

It seems better if you check if uart_con_flush returns timeout here and 
you have zero bytes written, then return error here.

What if we do all the timing at the uart level, then the buffer 
management can just ask if it's timed out for some length of time, like 
this?

---
 hw/lpc-uart.c | 108 +++++++++++++++++++++++++++++++++++---------------
 1 file changed, 75 insertions(+), 33 deletions(-)

diff --git a/hw/lpc-uart.c b/hw/lpc-uart.c
index 979a617c3..1a25c62ea 100644
--- a/hw/lpc-uart.c
+++ b/hw/lpc-uart.c
@@ -56,6 +56,7 @@ DEFINE_LOG_ENTRY(OPAL_RC_UART_INIT, OPAL_PLATFORM_ERR_EVT, OPAL_UART,
 static struct lock uart_lock = LOCK_UNLOCKED;
 static struct dt_node *uart_node;
 static uint32_t uart_base;
+static uint64_t uart_tx_full_time;
 static bool has_irq = false, irq_ok, rx_full, tx_full;
 static uint8_t tx_room;
 static uint8_t cached_ier;
@@ -95,28 +96,60 @@ static inline void uart_write(unsigned int reg, uint8_t val)
 		lpc_outb(val, uart_base + reg);
 }
 
-static void uart_check_tx_room(void)
+static bool uart_check_tx_room(void)
 {
+	if (tx_room)
+		return true;
+
 	if (uart_read(REG_LSR) & LSR_THRE) {
 		/* FIFO is 16 entries */
 		tx_room = 16;
 		tx_full = false;
+		return true;
+	}
+
+	return false;
+}
+
+static void uart_write_thr(uint8_t val)
+{
+	assert(!tx_full);
+	assert(tx_room);
+
+	uart_write(REG_THR, val);
+
+	tx_room--;
+	if (tx_room == 0) {
+		if (!uart_check_tx_room())
+			uart_tx_full_time = mftb();
 	}
 }
 
-static void uart_wait_tx_room(void)
+static bool uart_timed_out(unsigned long msecs)
+{
+	if (uart_check_tx_room())
+		return false;
+
+	if (tb_compare(mftb(), uart_tx_full_time + msecs_to_tb(msecs)) == TB_AAFTERB)
+		return true;
+	return false;
+}
+
+static bool uart_wait_tx_room(void)
 {
-	while (!tx_room) {
-		uart_check_tx_room();
-		if (!tx_room) {
-			smt_lowest();
-			do {
-				barrier();
-				uart_check_tx_room();
-			} while (!tx_room);
+	if (uart_check_tx_room())
+		return true;
+
+	smt_lowest();
+	while (!uart_check_tx_room()) {
+		if (uart_timed_out(10)) {
 			smt_medium();
+			return false;
 		}
 	}
+	smt_medium();
+
+	return true;
 }
 
 static void uart_update_ier(void)
@@ -160,18 +193,18 @@ static size_t uart_con_write(const char *buf, size_t len)
 		return written;
 
 	lock(&uart_lock);
-	while(written < len) {
-		if (tx_room == 0) {
-			uart_wait_tx_room();
-			if (tx_room == 0)
-				goto bail;
-		} else {
-			uart_write(REG_THR, buf[written++]);
-			tx_room--;
-		}
+	while (written < len) {
+		if (!uart_wait_tx_room())
+			break;
+
+		uart_write_thr(buf[written++]);
 	}
- bail:
+
+	if (!written && uart_timed_out(1000))
+		return written; /* swallow data */
+
 	unlock(&uart_lock);
+
 	return written;
 }
 
@@ -223,25 +256,27 @@ static int64_t uart_con_flush(void)
 			 * otherwise the Linux console isn't
 			 * usable (too slow).
 			 */
-			if (irq_ok)
-				uart_check_tx_room();
-			else
-				uart_wait_tx_room();
+			if (irq_ok) {
+				if (!uart_check_tx_room())
+					break;
+			} else {
+				if (!uart_wait_tx_room())
+					break;
+			}
 		}
-		if (tx_room == 0) {
-			tx_full = true;
-			break;
-		}
-		uart_write(REG_THR, out_buf[out_buf_cons++]);
+
+		uart_write_thr(out_buf[out_buf_cons++]);
 		out_buf_cons %= OUT_BUF_SIZE;
-		tx_room--;
 	}
 	if (tx_full != tx_was_full)
 		uart_update_ier();
 	if (out_buf_prod != out_buf_cons) {
 		/* Return busy if nothing was flushed this call */
-		if (out_buf_cons == out_buf_cons_initial)
+		if (out_buf_cons == out_buf_cons_initial) {
+			if (uart_timed_out(1000))
+				return OPAL_TIMEOUT;
 			return OPAL_BUSY;
+		}
 		/* Return partial if there's more to flush */
 		return OPAL_PARTIAL;
 	}
@@ -259,6 +294,7 @@ static int64_t uart_opal_write(int64_t term_number, __be64 *__length,
 			       const uint8_t *buffer)
 {
 	size_t written = 0, len = be64_to_cpu(*__length);
+	int64_t ret = OPAL_SUCCESS;
 
 	if (term_number != 0)
 		return OPAL_PARAMETER;
@@ -275,24 +311,30 @@ static int64_t uart_opal_write(int64_t term_number, __be64 *__length,
 	/* Flush out buffer again */
 	uart_con_flush();
 
+	if (!written && uart_timed_out(1000))
+		ret = OPAL_TIMEOUT;
 	unlock(&uart_lock);
 
 	*__length = cpu_to_be64(written);
 
-	return OPAL_SUCCESS;
+	return ret;
 }
 
 static int64_t uart_opal_write_buffer_space(int64_t term_number,
 					    __be64 *__length)
 {
+	int64_t ret = OPAL_SUCCESS;
 	if (term_number != 0)
 		return OPAL_PARAMETER;
 
 	lock(&uart_lock);
+	uart_con_flush();
+	if (!uart_tx_buf_space() && uart_timed_out(1000))
+		ret = OPAL_TIMEOUT;
 	*__length = cpu_to_be64(uart_tx_buf_space());
 	unlock(&uart_lock);
 
-	return OPAL_SUCCESS;
+	return ret;
 }
 
 /* Must be called with UART lock held */
-- 
2.23.0



More information about the Skiboot mailing list