[Skiboot] [PATCH 1/2] core/lock: fix timeout warning causing a deadlock false positive

Nicholas Piggin npiggin at gmail.com
Sun Sep 30 13:48:20 AEST 2018


If a lock waiter exceeds the warning timeout, it prints a message
while still registered as requesting the lock. Printing the message
can take locks, so if one is held when the owner of the original
lock tries to print a message, it will get a false positive deadlock
detection, which brings down the system.

This can easily be hit when there is a lot of HMI activity from a
KVM guest, where the timebase was not returned to host timebase
before calling the HMI handler.

Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
---
 core/lock.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/core/lock.c b/core/lock.c
index fca8f465..fc051ca4 100644
--- a/core/lock.c
+++ b/core/lock.c
@@ -28,6 +28,8 @@
  */
 bool bust_locks = true;
 
+#define LOCK_TIMEOUT_MS 5000
+
 #ifdef DEBUG_LOCKS
 
 static void __nomcount lock_error(struct lock *l, const char *reason, uint16_t err)
@@ -78,7 +80,6 @@ static inline bool __nomcount __try_lock(struct cpu_thread *cpu, struct lock *l)
 	return false;
 }
 
-#define LOCK_TIMEOUT_MS 5000
 static inline bool lock_timeout(unsigned long start)
 {
 	/* Print warning if lock has been spinning for more than TIMEOUT_MS */
@@ -92,9 +93,6 @@ static inline bool lock_timeout(unsigned long start)
 		 */
 		if( !(mfspr(SPR_TFMR) & SPR_TFMR_TB_VALID))
 			return false;
-		prlog(PR_WARNING, "WARNING: Lock has been "\
-		      "spinning for %lums\n", wait - start);
-		backtrace();
 		return true;
 	}
 
@@ -252,8 +250,19 @@ void lock_caller(struct lock *l, const char *owner)
 			barrier();
 		smt_medium();
 
-		if (start && !timeout_warn)
-			timeout_warn = lock_timeout(start);
+		if (start && !timeout_warn && lock_timeout(start)) {
+			/*
+			 * Holding the lock request while printing a
+			 * timeout and taking console locks can result
+			 * in deadlock fals positive if the lock owner
+			 * tries to take the console lock. So drop it.
+			 */
+			remove_lock_request();
+			prlog(PR_WARNING, "WARNING: Lock has been spinning for over %dms\n", LOCK_TIMEOUT_MS);
+			backtrace();
+			add_lock_request(l);
+			timeout_warn = true;
+		}
 	}
 
 	remove_lock_request();
-- 
2.18.0



More information about the Skiboot mailing list