[Skiboot] [PATCH v2 06/11] hw/sbe-p9: Change SBE lagging detection
Nicholas Piggin
npiggin at gmail.com
Tue Jan 14 22:46:47 AEDT 2025
Disabling the SBE timer entirely is counter-productive: the SBE
interrupt can be delayed for a number of reasons including booting
or OS bugs, and there is no other timer to replace it. If the SBE
timer is detected to be lagging, increase polling rate until it
fires but keep it running.
Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
---
core/interrupts.c | 2 +-
core/test/run-timer.c | 5 +++++
core/timer.c | 4 ++--
hw/sbe-p8.c | 3 +++
hw/sbe-p9.c | 16 +++++++++-------
hw/sbe.c | 8 +++++++-
include/sbe.h | 4 ++++
7 files changed, 31 insertions(+), 11 deletions(-)
diff --git a/core/interrupts.c b/core/interrupts.c
index 03a37b2ab..667ab7640 100644
--- a/core/interrupts.c
+++ b/core/interrupts.c
@@ -501,7 +501,7 @@ static int64_t opal_handle_interrupt(uint32_t isn, __be64 *outstanding_event_mas
/* Run it */
is->ops->interrupt(is, isn);
- /* Check timers if SBE timer isn't working */
+ /* Check timers if SBE timer isn't working well */
if (!sbe_timer_ok())
check_timers(true);
diff --git a/core/test/run-timer.c b/core/test/run-timer.c
index 4e540caae..a9b6c85bb 100644
--- a/core/test/run-timer.c
+++ b/core/test/run-timer.c
@@ -66,6 +66,11 @@ bool sbe_timer_ok(void)
return true;
}
+bool sbe_timer_present(void)
+{
+ return true;
+}
+
int main(void)
{
unsigned int i;
diff --git a/core/timer.c b/core/timer.c
index 7f8c4b667..1119cbb5e 100644
--- a/core/timer.c
+++ b/core/timer.c
@@ -54,7 +54,7 @@ static inline void this_cpu_set_running_timer(bool running)
static inline void update_timer_expiry(uint64_t target)
{
- if (sbe_timer_ok())
+ if (sbe_timer_present())
sbe_update_timer_expiry(target);
}
@@ -313,7 +313,7 @@ void late_init_timers(void)
*/
if (platform.heartbeat_time) {
heartbeat = platform.heartbeat_time();
- } else if (sbe_timer_ok()) {
+ } else if (sbe_timer_present()) {
heartbeat = HEARTBEAT_DEFAULT_MS * 10;
}
diff --git a/hw/sbe-p8.c b/hw/sbe-p8.c
index 70edec65e..9be9eb7a5 100644
--- a/hw/sbe-p8.c
+++ b/hw/sbe-p8.c
@@ -120,6 +120,7 @@ void p8_sbe_update_timer_expiry(uint64_t new_target)
prlog(PR_DEBUG, "SLW: Stuck with odd generation !\n");
_xscom_unlock();
sbe_has_timer = false;
+ sbe_timer_good = false;
p8_sbe_dump_timer_ffdc();
return;
}
@@ -153,6 +154,7 @@ void p8_sbe_update_timer_expiry(uint64_t new_target)
prlog(PR_ERR,
"SLW: Timer appears to not be running !\n");
sbe_has_timer = false;
+ sbe_timer_good = false;
p8_sbe_dump_timer_ffdc();
}
sbe_last_gen = gen;
@@ -187,4 +189,5 @@ void p8_sbe_init_timer(void)
prlog(PR_INFO, "SLW: Timer facility on chip %d, resolution %dus\n",
sbe_timer_chip, tick_us);
sbe_has_timer = true;
+ sbe_timer_good = true;
}
diff --git a/hw/sbe-p9.c b/hw/sbe-p9.c
index f164ff8a7..bdc6a71a3 100644
--- a/hw/sbe-p9.c
+++ b/hw/sbe-p9.c
@@ -538,6 +538,10 @@ static void p9_sbe_timer_response(struct p9_sbe *sbe)
if (sbe->chip_id != sbe_default_chip_id)
return;
+ if (!sbe_timer_good) {
+ sbe_timer_good = true;
+ prlog_once(PR_WARNING, "Lagging timer fired, decreasing polling\n rate.");
+ }
sbe_timer_in_progress = false;
/* Drop lock and call timers */
unlock(&sbe->lock);
@@ -686,19 +690,16 @@ static void p9_sbe_timer_poll(struct p9_sbe *sbe)
if (sbe->chip_id != sbe_default_chip_id)
return;
- if (!sbe_has_timer || !sbe_timer_in_progress)
+ if (!sbe_timer_good || !sbe_timer_in_progress)
return;
if (tb_compare(mftb(), sbe_last_gen_stamp + msecs_to_tb(10))
!= TB_AAFTERB)
return;
- prlog(PR_ERR, "Timer stuck, falling back to OPAL pollers.\n");
- prlog(PR_ERR, "You will likely have slower I2C and may have "
- "experienced increased jitter.\n");
- p9_sbe_reg_dump(sbe->chip_id);
- sbe_has_timer = false;
- sbe_timer_in_progress = false;
+ sbe_timer_good = false;
+
+ prlog_once(PR_WARNING, "Timer is lagging, increasing polling rate.\n");
}
static void p9_sbe_timeout_poll_one(struct p9_sbe *sbe)
@@ -960,6 +961,7 @@ static void p9_sbe_timer_init(void)
assert(timer_ctrl_msg);
init_lock(&sbe_timer_lock);
sbe_has_timer = true;
+ sbe_timer_good = true;
sbe_timer_target = ~0ull;
sbe_last_gen_stamp = ~0ull;
sbe_timer_def_tb = usecs_to_tb(SBE_TIMER_DEFAULT_US);
diff --git a/hw/sbe.c b/hw/sbe.c
index 859581662..56927bf60 100644
--- a/hw/sbe.c
+++ b/hw/sbe.c
@@ -13,10 +13,11 @@
#include <stdbool.h>
bool sbe_has_timer = false;
+bool sbe_timer_good = false;
void sbe_update_timer_expiry(uint64_t target)
{
- assert(sbe_timer_ok);
+ assert(sbe_has_timer);
if (proc_gen == proc_gen_p9 || proc_gen == proc_gen_p10)
p9_sbe_update_timer_expiry(target);
@@ -28,6 +29,11 @@ void sbe_update_timer_expiry(uint64_t target)
}
bool sbe_timer_ok(void)
+{
+ return sbe_timer_good;
+}
+
+bool sbe_timer_present(void)
{
return sbe_has_timer;
}
diff --git a/include/sbe.h b/include/sbe.h
index 24d21fa0b..d5f844b76 100644
--- a/include/sbe.h
+++ b/include/sbe.h
@@ -10,8 +10,12 @@
extern void sbe_update_timer_expiry(uint64_t target);
/* Is SBE timer available ? */
+extern bool sbe_timer_present(void);
+
+/* Is SBE timer keeping good time ? */
extern bool sbe_timer_ok(void);
extern bool sbe_has_timer;
+extern bool sbe_timer_good;
#endif /* __SBE_P9_H */
--
2.45.2
More information about the Skiboot
mailing list