[Skiboot] [PATCH 2/3] uart: Workaround for unresponsive console

Vasant Hegde hegdevasant at linux.vnet.ibm.com
Thu May 21 16:23:55 AEST 2020


On 5/20/20 7:39 PM, Nicholas Piggin wrote:
> Excerpts from Vasant Hegde's message of May 20, 2020 9:51 pm:
>> We use 4K internal output buffer for host console write. Whenever kernel
>> makes opal_console_write() call it first write to internal buffer and
>> then calls uart_con_flush() to flush data to UART console.
>>
>> If kernel is doing heavy console write operation and BMC becomes unresponsive
>> (like BMC reboot) then our internal buffer becomes full. Kernel will keep
>> on retrying write operation. This will result in kernel lockups.
> 
> Would this be better to be done in OS, or not?

Yeah. Something to look from kernel side as well.

> 
>> This patch introduces new timer. It starts timer whenever our write
>> operation to UART is not making progress. If situation doesn't improve
>> within specified time (10 seconds) then it will send error back to
>> kernel.
>>
>> Signed-off-by: Vasant Hegde <hegdevasant at linux.vnet.ibm.com>
>> ---
>>   hw/lpc-uart.c | 60 +++++++++++++++++++++++++++++++++++++++++++++------
>>   1 file changed, 54 insertions(+), 6 deletions(-)
>>
>> diff --git a/hw/lpc-uart.c b/hw/lpc-uart.c
>> index ed56a4bdc..672d4612b 100644
>> --- a/hw/lpc-uart.c
>> +++ b/hw/lpc-uart.c
>> @@ -217,6 +217,11 @@ static uint8_t *out_buf;
>>   static uint32_t out_buf_prod;
>>   static uint32_t out_buf_cons;
>>   
>> +#define UART_BUFFER_OUT_TIMEOUT  10
>> +static uint64_t out_buf_timeout;
>> +static uint64_t out_buf_prev_len;
>> +static uint64_t con_flush_timeout;
>> +
>>   /* Asynchronous flush, uart_lock must be held */
>>   static int64_t uart_con_flush(void)
>>   {
>> @@ -246,14 +251,31 @@ static int64_t uart_con_flush(void)
>>   	}
>>   	if (tx_full != tx_was_full)
>>   		uart_update_ier();
>> +
>>   	if (out_buf_prod != out_buf_cons) {
>> -		/* Return busy if nothing was flushed this call */
>> -		if (out_buf_cons == out_buf_cons_initial)
>> -			return OPAL_BUSY;
>>   		/* Return partial if there's more to flush */
>> -		return OPAL_PARTIAL;
>> +		if (out_buf_cons != out_buf_cons_initial) {
>> +			con_flush_timeout = 0;
>> +			return OPAL_PARTIAL;
>> +		}
>> +
>> +		/*
>> +		 * Nothing was flushed. Start internal timer. We will continue
>> +		 * returning BUSY until timeout happens, hoping BMC will consume
>> +		 * data within timeout period.
>> +		 */
>> +		if (con_flush_timeout == 0) {
>> +			con_flush_timeout = mftb() +
>> +				secs_to_tb(UART_BUFFER_OUT_TIMEOUT);
>> +		}
>> +
>> +		if (tb_compare(mftb(), con_flush_timeout) != TB_AAFTERB)
>> +			return OPAL_BUSY;
>> +
>> +		return OPAL_INTERNAL_ERROR;
> 
> Would OPAL_TIMEOUT be a better error code? Internal means something in
> skiboot has gone wrong or is inconsistent doesn't it, and is pretty
> broad.

Yeah. OPAL_TIMEOUT is better.

> 
>>   	}
>>   
>> +	con_flush_timeout = 0;
>>   	return OPAL_SUCCESS;
>>   }
>>   
>> @@ -273,6 +295,11 @@ static int64_t uart_opal_write(int64_t term_number, __be64 *__length,
>>   
>>   	lock(&uart_lock);
>>   
>> +	if (!uart_tx_buf_space()) {
>> +		unlock(&uart_lock);
>> +		return OPAL_INTERNAL_ERROR;
>> +	}
> 
> If the uart is full from a previous write, it's possible there would
> have been no buffer space freed up and then you could get an error here
> I think?

Actually above check in redundant. Kernel calls uart_opal_write_buffer_space() 
before
calling this function... which makes sure we have space to write. Since this is 
dedicated
buffer for kernel we don't need this check.

> 
> It seems better if you check if uart_con_flush returns timeout here and
> you have zero bytes written, then return error here.
> 
> What if we do all the timing at the uart level, then the buffer
> management can just ask if it's timed out for some length of time, like
> this?


Yeah.. That's better one.


I tried below patch (with lock fix, see below). Its making console really slow. 
will look into it.



> 
> ---
>   hw/lpc-uart.c | 108 +++++++++++++++++++++++++++++++++++---------------
>   1 file changed, 75 insertions(+), 33 deletions(-)
> 
> diff --git a/hw/lpc-uart.c b/hw/lpc-uart.c
> index 979a617c3..1a25c62ea 100644
> --- a/hw/lpc-uart.c
> +++ b/hw/lpc-uart.c
> @@ -56,6 +56,7 @@ DEFINE_LOG_ENTRY(OPAL_RC_UART_INIT, OPAL_PLATFORM_ERR_EVT, OPAL_UART,
>   static struct lock uart_lock = LOCK_UNLOCKED;
>   static struct dt_node *uart_node;
>   static uint32_t uart_base;
> +static uint64_t uart_tx_full_time;
>   static bool has_irq = false, irq_ok, rx_full, tx_full;
>   static uint8_t tx_room;
>   static uint8_t cached_ier;
> @@ -95,28 +96,60 @@ static inline void uart_write(unsigned int reg, uint8_t val)
>   		lpc_outb(val, uart_base + reg);
>   }
>   
> -static void uart_check_tx_room(void)
> +static bool uart_check_tx_room(void)
>   {
> +	if (tx_room)
> +		return true;

I think this is redundant check for most cases except uart_wait_tx_room().


> +
>   	if (uart_read(REG_LSR) & LSR_THRE) {
>   		/* FIFO is 16 entries */
>   		tx_room = 16;
>   		tx_full = false;
> +		return true;
> +	}
> +
> +	return false;
> +}
> +

May be worth to mention its called with uart lock.

> +static void uart_write_thr(uint8_t val)
> +{
> +	assert(!tx_full);
> +	assert(tx_room);

IMO this is not required. Worst case we may overwrite data is FIFO buffer. which 
is ok.


> +
> +	uart_write(REG_THR, val);
> +
> +	tx_room--;
> +	if (tx_room == 0) {
> +		if (!uart_check_tx_room())
> +			uart_tx_full_time = mftb();
>   	}
>   }
>   
> -static void uart_wait_tx_room(void)
> +static bool uart_timed_out(unsigned long msecs)
> +{
> +	if (uart_check_tx_room())
> +		return false;
> +
> +	if (tb_compare(mftb(), uart_tx_full_time + msecs_to_tb(msecs)) == TB_AAFTERB)
> +		return true;
> +	return false;
> +}
> +
> +static bool uart_wait_tx_room(void)
>   {
> -	while (!tx_room) {
> -		uart_check_tx_room();
> -		if (!tx_room) {
> -			smt_lowest();
> -			do {
> -				barrier();
> -				uart_check_tx_room();
> -			} while (!tx_room);
> +	if (uart_check_tx_room())
> +		return true;
> +
> +	smt_lowest();
> +	while (!uart_check_tx_room()) {
> +		if (uart_timed_out(10)) {
>   			smt_medium();
> +			return false;
>   		}
>   	}
> +	smt_medium();
> +
> +	return true;
>   }
>   
>   static void uart_update_ier(void)
> @@ -160,18 +193,18 @@ static size_t uart_con_write(const char *buf, size_t len)
>   		return written;
>   
>   	lock(&uart_lock);
> -	while(written < len) {
> -		if (tx_room == 0) {
> -			uart_wait_tx_room();
> -			if (tx_room == 0)
> -				goto bail;
> -		} else {
> -			uart_write(REG_THR, buf[written++]);
> -			tx_room--;
> -		}
> +	while (written < len) {
> +		if (!uart_wait_tx_room())
> +			break;
> +
> +		uart_write_thr(buf[written++]);
>   	}
> - bail:
> +
> +	if (!written && uart_timed_out(1000))


+ unlock(&uart_lock);

-Vasant

> +		return written; /* swallow data */
> +
>   	unlock(&uart_lock);
> +
>   	return written;
>   }
>   
> @@ -223,25 +256,27 @@ static int64_t uart_con_flush(void)
>   			 * otherwise the Linux console isn't
>   			 * usable (too slow).
>   			 */
> -			if (irq_ok)
> -				uart_check_tx_room();
> -			else
> -				uart_wait_tx_room();
> +			if (irq_ok) {
> +				if (!uart_check_tx_room())
> +					break;
> +			} else {
> +				if (!uart_wait_tx_room())
> +					break;
> +			}
>   		}
> -		if (tx_room == 0) {
> -			tx_full = true;
> -			break;
> -		}
> -		uart_write(REG_THR, out_buf[out_buf_cons++]);
> +
> +		uart_write_thr(out_buf[out_buf_cons++]);
>   		out_buf_cons %= OUT_BUF_SIZE;
> -		tx_room--;
>   	}
>   	if (tx_full != tx_was_full)
>   		uart_update_ier();
>   	if (out_buf_prod != out_buf_cons) {
>   		/* Return busy if nothing was flushed this call */
> -		if (out_buf_cons == out_buf_cons_initial)
> +		if (out_buf_cons == out_buf_cons_initial) {
> +			if (uart_timed_out(1000))
> +				return OPAL_TIMEOUT;
>   			return OPAL_BUSY;
> +		}
>   		/* Return partial if there's more to flush */
>   		return OPAL_PARTIAL;
>   	}
> @@ -259,6 +294,7 @@ static int64_t uart_opal_write(int64_t term_number, __be64 *__length,
>   			       const uint8_t *buffer)
>   {
>   	size_t written = 0, len = be64_to_cpu(*__length);
> +	int64_t ret = OPAL_SUCCESS;
>   
>   	if (term_number != 0)
>   		return OPAL_PARAMETER;
> @@ -275,24 +311,30 @@ static int64_t uart_opal_write(int64_t term_number, __be64 *__length,
>   	/* Flush out buffer again */
>   	uart_con_flush();
>   
> +	if (!written && uart_timed_out(1000))
> +		ret = OPAL_TIMEOUT;
>   	unlock(&uart_lock);
>   
>   	*__length = cpu_to_be64(written);
>   
> -	return OPAL_SUCCESS;
> +	return ret;
>   }
>   
>   static int64_t uart_opal_write_buffer_space(int64_t term_number,
>   					    __be64 *__length)
>   {
> +	int64_t ret = OPAL_SUCCESS;
>   	if (term_number != 0)
>   		return OPAL_PARAMETER;
>   
>   	lock(&uart_lock);
> +	uart_con_flush();
> +	if (!uart_tx_buf_space() && uart_timed_out(1000))
> +		ret = OPAL_TIMEOUT;
>   	*__length = cpu_to_be64(uart_tx_buf_space());
>   	unlock(&uart_lock);
>   
> -	return OPAL_SUCCESS;
> +	return ret;
>   }
>   
>   /* Must be called with UART lock held */
> 



More information about the Skiboot mailing list