[Skiboot] [PATCH 1/2] core/lock: fix timeout warning causing a deadlock false positive
Nicholas Piggin
npiggin at gmail.com
Sun Sep 30 13:48:20 AEST 2018
If a lock waiter exceeds the warning timeout, it prints a message
while still registered as requesting the lock. Printing the message
can take locks, so if one is held when the owner of the original
lock tries to print a message, it will get a false positive deadlock
detection, which brings down the system.
This can easily be hit when there is a lot of HMI activity from a
KVM guest, where the timebase was not returned to host timebase
before calling the HMI handler.
Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
---
core/lock.c | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)
diff --git a/core/lock.c b/core/lock.c
index fca8f465..fc051ca4 100644
--- a/core/lock.c
+++ b/core/lock.c
@@ -28,6 +28,8 @@
*/
bool bust_locks = true;
+#define LOCK_TIMEOUT_MS 5000
+
#ifdef DEBUG_LOCKS
static void __nomcount lock_error(struct lock *l, const char *reason, uint16_t err)
@@ -78,7 +80,6 @@ static inline bool __nomcount __try_lock(struct cpu_thread *cpu, struct lock *l)
return false;
}
-#define LOCK_TIMEOUT_MS 5000
static inline bool lock_timeout(unsigned long start)
{
/* Print warning if lock has been spinning for more than TIMEOUT_MS */
@@ -92,9 +93,6 @@ static inline bool lock_timeout(unsigned long start)
*/
if( !(mfspr(SPR_TFMR) & SPR_TFMR_TB_VALID))
return false;
- prlog(PR_WARNING, "WARNING: Lock has been "\
- "spinning for %lums\n", wait - start);
- backtrace();
return true;
}
@@ -252,8 +250,19 @@ void lock_caller(struct lock *l, const char *owner)
barrier();
smt_medium();
- if (start && !timeout_warn)
- timeout_warn = lock_timeout(start);
+ if (start && !timeout_warn && lock_timeout(start)) {
+ /*
+ * Holding the lock request while printing a
+ * timeout and taking console locks can result
+ * in deadlock fals positive if the lock owner
+ * tries to take the console lock. So drop it.
+ */
+ remove_lock_request();
+ prlog(PR_WARNING, "WARNING: Lock has been spinning for over %dms\n", LOCK_TIMEOUT_MS);
+ backtrace();
+ add_lock_request(l);
+ timeout_warn = true;
+ }
}
remove_lock_request();
--
2.18.0
More information about the Skiboot
mailing list