[Skiboot] [PATCH 2/3] uart: Workaround for unresponsive console
Vasant Hegde
hegdevasant at linux.vnet.ibm.com
Thu May 21 16:23:55 AEST 2020
On 5/20/20 7:39 PM, Nicholas Piggin wrote:
> Excerpts from Vasant Hegde's message of May 20, 2020 9:51 pm:
>> We use 4K internal output buffer for host console write. Whenever kernel
>> makes opal_console_write() call it first write to internal buffer and
>> then calls uart_con_flush() to flush data to UART console.
>>
>> If kernel is doing heavy console write operation and BMC becomes unresponsive
>> (like BMC reboot) then our internal buffer becomes full. Kernel will keep
>> on retrying write operation. This will result in kernel lockups.
>
> Would this be better to be done in OS, or not?
Yeah. Something to look from kernel side as well.
>
>> This patch introduces new timer. It starts timer whenever our write
>> operation to UART is not making progress. If situation doesn't improve
>> within specified time (10 seconds) then it will send error back to
>> kernel.
>>
>> Signed-off-by: Vasant Hegde <hegdevasant at linux.vnet.ibm.com>
>> ---
>> hw/lpc-uart.c | 60 +++++++++++++++++++++++++++++++++++++++++++++------
>> 1 file changed, 54 insertions(+), 6 deletions(-)
>>
>> diff --git a/hw/lpc-uart.c b/hw/lpc-uart.c
>> index ed56a4bdc..672d4612b 100644
>> --- a/hw/lpc-uart.c
>> +++ b/hw/lpc-uart.c
>> @@ -217,6 +217,11 @@ static uint8_t *out_buf;
>> static uint32_t out_buf_prod;
>> static uint32_t out_buf_cons;
>>
>> +#define UART_BUFFER_OUT_TIMEOUT 10
>> +static uint64_t out_buf_timeout;
>> +static uint64_t out_buf_prev_len;
>> +static uint64_t con_flush_timeout;
>> +
>> /* Asynchronous flush, uart_lock must be held */
>> static int64_t uart_con_flush(void)
>> {
>> @@ -246,14 +251,31 @@ static int64_t uart_con_flush(void)
>> }
>> if (tx_full != tx_was_full)
>> uart_update_ier();
>> +
>> if (out_buf_prod != out_buf_cons) {
>> - /* Return busy if nothing was flushed this call */
>> - if (out_buf_cons == out_buf_cons_initial)
>> - return OPAL_BUSY;
>> /* Return partial if there's more to flush */
>> - return OPAL_PARTIAL;
>> + if (out_buf_cons != out_buf_cons_initial) {
>> + con_flush_timeout = 0;
>> + return OPAL_PARTIAL;
>> + }
>> +
>> + /*
>> + * Nothing was flushed. Start internal timer. We will continue
>> + * returning BUSY until timeout happens, hoping BMC will consume
>> + * data within timeout period.
>> + */
>> + if (con_flush_timeout == 0) {
>> + con_flush_timeout = mftb() +
>> + secs_to_tb(UART_BUFFER_OUT_TIMEOUT);
>> + }
>> +
>> + if (tb_compare(mftb(), con_flush_timeout) != TB_AAFTERB)
>> + return OPAL_BUSY;
>> +
>> + return OPAL_INTERNAL_ERROR;
>
> Would OPAL_TIMEOUT be a better error code? Internal means something in
> skiboot has gone wrong or is inconsistent doesn't it, and is pretty
> broad.
Yeah. OPAL_TIMEOUT is better.
>
>> }
>>
>> + con_flush_timeout = 0;
>> return OPAL_SUCCESS;
>> }
>>
>> @@ -273,6 +295,11 @@ static int64_t uart_opal_write(int64_t term_number, __be64 *__length,
>>
>> lock(&uart_lock);
>>
>> + if (!uart_tx_buf_space()) {
>> + unlock(&uart_lock);
>> + return OPAL_INTERNAL_ERROR;
>> + }
>
> If the uart is full from a previous write, it's possible there would
> have been no buffer space freed up and then you could get an error here
> I think?
Actually above check in redundant. Kernel calls uart_opal_write_buffer_space()
before
calling this function... which makes sure we have space to write. Since this is
dedicated
buffer for kernel we don't need this check.
>
> It seems better if you check if uart_con_flush returns timeout here and
> you have zero bytes written, then return error here.
>
> What if we do all the timing at the uart level, then the buffer
> management can just ask if it's timed out for some length of time, like
> this?
Yeah.. That's better one.
I tried below patch (with lock fix, see below). Its making console really slow.
will look into it.
>
> ---
> hw/lpc-uart.c | 108 +++++++++++++++++++++++++++++++++++---------------
> 1 file changed, 75 insertions(+), 33 deletions(-)
>
> diff --git a/hw/lpc-uart.c b/hw/lpc-uart.c
> index 979a617c3..1a25c62ea 100644
> --- a/hw/lpc-uart.c
> +++ b/hw/lpc-uart.c
> @@ -56,6 +56,7 @@ DEFINE_LOG_ENTRY(OPAL_RC_UART_INIT, OPAL_PLATFORM_ERR_EVT, OPAL_UART,
> static struct lock uart_lock = LOCK_UNLOCKED;
> static struct dt_node *uart_node;
> static uint32_t uart_base;
> +static uint64_t uart_tx_full_time;
> static bool has_irq = false, irq_ok, rx_full, tx_full;
> static uint8_t tx_room;
> static uint8_t cached_ier;
> @@ -95,28 +96,60 @@ static inline void uart_write(unsigned int reg, uint8_t val)
> lpc_outb(val, uart_base + reg);
> }
>
> -static void uart_check_tx_room(void)
> +static bool uart_check_tx_room(void)
> {
> + if (tx_room)
> + return true;
I think this is redundant check for most cases except uart_wait_tx_room().
> +
> if (uart_read(REG_LSR) & LSR_THRE) {
> /* FIFO is 16 entries */
> tx_room = 16;
> tx_full = false;
> + return true;
> + }
> +
> + return false;
> +}
> +
May be worth to mention its called with uart lock.
> +static void uart_write_thr(uint8_t val)
> +{
> + assert(!tx_full);
> + assert(tx_room);
IMO this is not required. Worst case we may overwrite data is FIFO buffer. which
is ok.
> +
> + uart_write(REG_THR, val);
> +
> + tx_room--;
> + if (tx_room == 0) {
> + if (!uart_check_tx_room())
> + uart_tx_full_time = mftb();
> }
> }
>
> -static void uart_wait_tx_room(void)
> +static bool uart_timed_out(unsigned long msecs)
> +{
> + if (uart_check_tx_room())
> + return false;
> +
> + if (tb_compare(mftb(), uart_tx_full_time + msecs_to_tb(msecs)) == TB_AAFTERB)
> + return true;
> + return false;
> +}
> +
> +static bool uart_wait_tx_room(void)
> {
> - while (!tx_room) {
> - uart_check_tx_room();
> - if (!tx_room) {
> - smt_lowest();
> - do {
> - barrier();
> - uart_check_tx_room();
> - } while (!tx_room);
> + if (uart_check_tx_room())
> + return true;
> +
> + smt_lowest();
> + while (!uart_check_tx_room()) {
> + if (uart_timed_out(10)) {
> smt_medium();
> + return false;
> }
> }
> + smt_medium();
> +
> + return true;
> }
>
> static void uart_update_ier(void)
> @@ -160,18 +193,18 @@ static size_t uart_con_write(const char *buf, size_t len)
> return written;
>
> lock(&uart_lock);
> - while(written < len) {
> - if (tx_room == 0) {
> - uart_wait_tx_room();
> - if (tx_room == 0)
> - goto bail;
> - } else {
> - uart_write(REG_THR, buf[written++]);
> - tx_room--;
> - }
> + while (written < len) {
> + if (!uart_wait_tx_room())
> + break;
> +
> + uart_write_thr(buf[written++]);
> }
> - bail:
> +
> + if (!written && uart_timed_out(1000))
+ unlock(&uart_lock);
-Vasant
> + return written; /* swallow data */
> +
> unlock(&uart_lock);
> +
> return written;
> }
>
> @@ -223,25 +256,27 @@ static int64_t uart_con_flush(void)
> * otherwise the Linux console isn't
> * usable (too slow).
> */
> - if (irq_ok)
> - uart_check_tx_room();
> - else
> - uart_wait_tx_room();
> + if (irq_ok) {
> + if (!uart_check_tx_room())
> + break;
> + } else {
> + if (!uart_wait_tx_room())
> + break;
> + }
> }
> - if (tx_room == 0) {
> - tx_full = true;
> - break;
> - }
> - uart_write(REG_THR, out_buf[out_buf_cons++]);
> +
> + uart_write_thr(out_buf[out_buf_cons++]);
> out_buf_cons %= OUT_BUF_SIZE;
> - tx_room--;
> }
> if (tx_full != tx_was_full)
> uart_update_ier();
> if (out_buf_prod != out_buf_cons) {
> /* Return busy if nothing was flushed this call */
> - if (out_buf_cons == out_buf_cons_initial)
> + if (out_buf_cons == out_buf_cons_initial) {
> + if (uart_timed_out(1000))
> + return OPAL_TIMEOUT;
> return OPAL_BUSY;
> + }
> /* Return partial if there's more to flush */
> return OPAL_PARTIAL;
> }
> @@ -259,6 +294,7 @@ static int64_t uart_opal_write(int64_t term_number, __be64 *__length,
> const uint8_t *buffer)
> {
> size_t written = 0, len = be64_to_cpu(*__length);
> + int64_t ret = OPAL_SUCCESS;
>
> if (term_number != 0)
> return OPAL_PARAMETER;
> @@ -275,24 +311,30 @@ static int64_t uart_opal_write(int64_t term_number, __be64 *__length,
> /* Flush out buffer again */
> uart_con_flush();
>
> + if (!written && uart_timed_out(1000))
> + ret = OPAL_TIMEOUT;
> unlock(&uart_lock);
>
> *__length = cpu_to_be64(written);
>
> - return OPAL_SUCCESS;
> + return ret;
> }
>
> static int64_t uart_opal_write_buffer_space(int64_t term_number,
> __be64 *__length)
> {
> + int64_t ret = OPAL_SUCCESS;
> if (term_number != 0)
> return OPAL_PARAMETER;
>
> lock(&uart_lock);
> + uart_con_flush();
> + if (!uart_tx_buf_space() && uart_timed_out(1000))
> + ret = OPAL_TIMEOUT;
> *__length = cpu_to_be64(uart_tx_buf_space());
> unlock(&uart_lock);
>
> - return OPAL_SUCCESS;
> + return ret;
> }
>
> /* Must be called with UART lock held */
>
More information about the Skiboot
mailing list