[Skiboot] [PATCH 06/11] hw/sbe-p9: Change SBE lagging detection

Nicholas Piggin npiggin at gmail.com
Sat Nov 16 15:07:22 AEDT 2024


Disabling the SBE timer entirely is counterproductive, the SBE
interrupt can be delayed for a number of reasons including booting
or OS bugs, and there is no other timer to replace it. If the SBE
timer is detected to be lagging, increase polling rate until it
fires but keep it running.

Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
---
 core/interrupts.c |  2 +-
 core/timer.c      |  4 ++--
 hw/sbe-p8.c       |  3 +++
 hw/sbe-p9.c       | 16 +++++++++-------
 hw/sbe.c          |  8 +++++++-
 include/sbe.h     |  4 ++++
 6 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/core/interrupts.c b/core/interrupts.c
index 03a37b2ab..667ab7640 100644
--- a/core/interrupts.c
+++ b/core/interrupts.c
@@ -501,7 +501,7 @@ static int64_t opal_handle_interrupt(uint32_t isn, __be64 *outstanding_event_mas
 	/* Run it */
 	is->ops->interrupt(is, isn);
 
-	/* Check timers if SBE timer isn't working */
+	/* Check timers if SBE timer isn't working well */
 	if (!sbe_timer_ok())
 		check_timers(true);
 
diff --git a/core/timer.c b/core/timer.c
index f512972eb..b1e3e6a13 100644
--- a/core/timer.c
+++ b/core/timer.c
@@ -35,7 +35,7 @@ static uint64_t timer_poll_gen;
 
 static inline void update_timer_expiry(uint64_t target)
 {
-	if (sbe_timer_ok())
+	if (sbe_timer_present())
 		sbe_update_timer_expiry(target);
 }
 
@@ -294,7 +294,7 @@ void late_init_timers(void)
 	 */
 	if (platform.heartbeat_time) {
 		heartbeat = platform.heartbeat_time();
-	} else if (sbe_timer_ok()) {
+	} else if (sbe_timer_present()) {
 		heartbeat = HEARTBEAT_DEFAULT_MS * 10;
 	}
 
diff --git a/hw/sbe-p8.c b/hw/sbe-p8.c
index 70edec65e..9be9eb7a5 100644
--- a/hw/sbe-p8.c
+++ b/hw/sbe-p8.c
@@ -120,6 +120,7 @@ void p8_sbe_update_timer_expiry(uint64_t new_target)
 				prlog(PR_DEBUG, "SLW: Stuck with odd generation !\n");
 				_xscom_unlock();
 				sbe_has_timer = false;
+				sbe_timer_good = false;
 				p8_sbe_dump_timer_ffdc();
 				return;
 			}
@@ -153,6 +154,7 @@ void p8_sbe_update_timer_expiry(uint64_t new_target)
 			prlog(PR_ERR,
 			      "SLW: Timer appears to not be running !\n");
 			sbe_has_timer = false;
+			sbe_timer_good = false;
 			p8_sbe_dump_timer_ffdc();
 		}
 		sbe_last_gen = gen;
@@ -187,4 +189,5 @@ void p8_sbe_init_timer(void)
 	prlog(PR_INFO, "SLW: Timer facility on chip %d, resolution %dus\n",
 	      sbe_timer_chip, tick_us);
 	sbe_has_timer = true;
+	sbe_timer_good = true;
 }
diff --git a/hw/sbe-p9.c b/hw/sbe-p9.c
index f164ff8a7..3bbd5a456 100644
--- a/hw/sbe-p9.c
+++ b/hw/sbe-p9.c
@@ -538,6 +538,10 @@ static void p9_sbe_timer_response(struct p9_sbe *sbe)
 	if (sbe->chip_id != sbe_default_chip_id)
 		return;
 
+	if (!sbe_timer_good) {
+		sbe_timer_good = true;
+		prlog(PR_DEBUG, "Lagging timer fired, decreasing polling\n");
+	}
 	sbe_timer_in_progress = false;
 	/* Drop lock and call timers */
 	unlock(&sbe->lock);
@@ -686,19 +690,16 @@ static void p9_sbe_timer_poll(struct p9_sbe *sbe)
 	if (sbe->chip_id != sbe_default_chip_id)
 		return;
 
-	if (!sbe_has_timer || !sbe_timer_in_progress)
+	if (!sbe_timer_good || !sbe_timer_in_progress)
 		return;
 
 	if (tb_compare(mftb(), sbe_last_gen_stamp + msecs_to_tb(10))
 	    != TB_AAFTERB)
 		return;
 
-	prlog(PR_ERR, "Timer stuck, falling back to OPAL pollers.\n");
-	prlog(PR_ERR, "You will likely have slower I2C and may have "
-	      "experienced increased jitter.\n");
-	p9_sbe_reg_dump(sbe->chip_id);
-	sbe_has_timer = false;
-	sbe_timer_in_progress = false;
+	sbe_timer_good = false;
+
+	prlog(PR_DEBUG, "Timer is lagging, increasing polling\n");
 }
 
 static void p9_sbe_timeout_poll_one(struct p9_sbe *sbe)
@@ -960,6 +961,7 @@ static void p9_sbe_timer_init(void)
 	assert(timer_ctrl_msg);
 	init_lock(&sbe_timer_lock);
 	sbe_has_timer = true;
+	sbe_timer_good = true;
 	sbe_timer_target = ~0ull;
 	sbe_last_gen_stamp = ~0ull;
 	sbe_timer_def_tb = usecs_to_tb(SBE_TIMER_DEFAULT_US);
diff --git a/hw/sbe.c b/hw/sbe.c
index 859581662..56927bf60 100644
--- a/hw/sbe.c
+++ b/hw/sbe.c
@@ -13,10 +13,11 @@
 #include <stdbool.h>
 
 bool sbe_has_timer = false;
+bool sbe_timer_good = false;
 
 void sbe_update_timer_expiry(uint64_t target)
 {
-	assert(sbe_timer_ok);
+	assert(sbe_has_timer);
 
 	if (proc_gen == proc_gen_p9 || proc_gen == proc_gen_p10)
 		p9_sbe_update_timer_expiry(target);
@@ -28,6 +29,11 @@ void sbe_update_timer_expiry(uint64_t target)
 }
 
 bool sbe_timer_ok(void)
+{
+	return sbe_timer_good;
+}
+
+bool sbe_timer_present(void)
 {
 	return sbe_has_timer;
 }
diff --git a/include/sbe.h b/include/sbe.h
index 24d21fa0b..d5f844b76 100644
--- a/include/sbe.h
+++ b/include/sbe.h
@@ -10,8 +10,12 @@
 extern void sbe_update_timer_expiry(uint64_t target);
 
 /* Is SBE timer available ? */
+extern bool sbe_timer_present(void);
+
+/* Is SBE timer keeping good time ? */
 extern bool sbe_timer_ok(void);
 
 extern bool sbe_has_timer;
+extern bool sbe_timer_good;
 
 #endif	/* __SBE_P9_H */
-- 
2.45.2



More information about the Skiboot mailing list