[Skiboot] [PATCH 4/5] lock: Fix interactions between lock dependency checker and stack checker

Benjamin Herrenschmidt benh at kernel.crashing.org
Wed Aug 15 15:10:38 AEST 2018


The lock dependency checker does a few nasty things that can cause
re-entrancy deadlocks in conjunction with the stack checker or
in fact other debug tests.

A lot of it revolves around taking a new lock (dl_lock) as part
of the locking process.

This tries to fix it by making sure we do not hit the stack
checker while holding dl_lock.

We achieve that in part by directly using the low-level __try_lock
and manually unlocking on the dl_lock, and making some functions
"nomcount".

In addition, we mark the dl_lock as being in the console path to
avoid deadlocks with the UART driver.

We move the enabling of the deadlock checker to a separate config
option from DEBUG_LOCKS as well, in case we chose to disable it
by default later on.

Signed-off-by: Benjamin Herrenschmidt <benh at kernel.crashing.org>
---
 core/cpu.c       |  7 +++++++
 core/lock.c      | 45 ++++++++++++++++++++++++++++++---------------
 include/config.h |  3 +++
 include/cpu.h    |  3 +++
 4 files changed, 43 insertions(+), 15 deletions(-)

diff --git a/core/cpu.c b/core/cpu.c
index b5bfb277..a4c4d4ff 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -714,6 +714,13 @@ struct cpu_thread *find_cpu_by_pir(u32 pir)
 	return &cpu_stacks[pir].cpu;
 }
 
+struct __nomcount cpu_thread *find_cpu_by_pir_nomcount(u32 pir)
+{
+	if (pir > cpu_max_pir)
+		return NULL;
+	return &cpu_stacks[pir].cpu;
+}
+
 struct cpu_thread *find_cpu_by_server(u32 server_no)
 {
 	struct cpu_thread *t;
diff --git a/core/lock.c b/core/lock.c
index 1fc71a92..fca8f465 100644
--- a/core/lock.c
+++ b/core/lock.c
@@ -29,9 +29,8 @@
 bool bust_locks = true;
 
 #ifdef DEBUG_LOCKS
-static struct lock dl_lock = LOCK_UNLOCKED;
 
-static void lock_error(struct lock *l, const char *reason, uint16_t err)
+static void __nomcount lock_error(struct lock *l, const char *reason, uint16_t err)
 {
 	bust_locks = true;
 
@@ -42,13 +41,13 @@ static void lock_error(struct lock *l, const char *reason, uint16_t err)
 	abort();
 }
 
-static void lock_check(struct lock *l)
+static inline void __nomcount lock_check(struct lock *l)
 {
 	if ((l->lock_val & 1) && (l->lock_val >> 32) == this_cpu()->pir)
 		lock_error(l, "Invalid recursive lock", 0);
 }
 
-static void unlock_check(struct lock *l)
+static inline void __nomcount unlock_check(struct lock *l)
 {
 	if (!(l->lock_val & 1))
 		lock_error(l, "Unlocking unlocked lock", 1);
@@ -102,8 +101,21 @@ static inline bool lock_timeout(unsigned long start)
 	return false;
 }
 #else
+static inline void lock_check(struct lock *l) { };
+static inline void unlock_check(struct lock *l) { };
+static inline bool lock_timeout(unsigned long s) { return false; }
+#endif /* DEBUG_LOCKS */
+
+#if defined(DEADLOCK_CHECKER) && defined(DEBUG_LOCKS)
+
+static struct lock dl_lock = {
+	.lock_val = 0,
+	.in_con_path = true,
+	.owner = LOCK_CALLER
+};
+
 /* Find circular dependencies in the lock requests. */
-static bool check_deadlock(void)
+static __nomcount inline bool check_deadlock(void)
 {
 	uint32_t lock_owner, start, i;
 	struct cpu_thread *next_cpu;
@@ -126,7 +138,7 @@ static bool check_deadlock(void)
 		if (lock_owner == start)
 			return true;
 
-		next_cpu = find_cpu_by_pir(lock_owner);
+		next_cpu = find_cpu_by_pir_nomcount(lock_owner);
 
 		if (!next_cpu)
 			return false;
@@ -141,6 +153,7 @@ static bool check_deadlock(void)
 static void add_lock_request(struct lock *l)
 {
 	struct cpu_thread *curr = this_cpu();
+	bool dead;
 
 	if (curr->state != cpu_state_active &&
 	    curr->state != cpu_state_os)
@@ -148,10 +161,12 @@ static void add_lock_request(struct lock *l)
 
 	/*
 	 * For deadlock detection we must keep the lock states constant
-	 * while doing the deadlock check.
+	 * while doing the deadlock check. However we need to avoid
+	 * clashing with the stack checker, so no mcount and use an
+	 * inline implementation of the lock for the dl_lock
 	 */
 	for (;;) {
-		if (try_lock(&dl_lock))
+		if (__try_lock(curr, &dl_lock))
 			break;
 		smt_lowest();
 		while (dl_lock.lock_val)
@@ -161,10 +176,13 @@ static void add_lock_request(struct lock *l)
 
 	curr->requested_lock = l;
 
-	if (check_deadlock())
-		lock_error(l, "Deadlock detected", 0);
+	dead = check_deadlock();
 
-	unlock(&dl_lock);
+	lwsync();
+	dl_lock.lock_val = 0;
+
+	if (dead)
+		lock_error(l, "Deadlock detected", 0);
 }
 
 static void remove_lock_request(void)
@@ -172,12 +190,9 @@ static void remove_lock_request(void)
 	this_cpu()->requested_lock = NULL;
 }
 #else
-static inline void lock_check(struct lock *l) { };
-static inline void unlock_check(struct lock *l) { };
 static inline void add_lock_request(struct lock *l) { };
 static inline void remove_lock_request(void) { };
-static inline bool lock_timeout(unsigned long s) { return false; }
-#endif /* DEBUG_LOCKS */
+#endif /* #if defined(DEADLOCK_CHECKER) && defined(DEBUG_LOCKS) */
 
 bool lock_held_by_me(struct lock *l)
 {
diff --git a/include/config.h b/include/config.h
index cd8a0a65..6b36590c 100644
--- a/include/config.h
+++ b/include/config.h
@@ -39,6 +39,9 @@
 /* Enable lock debugging */
 #define DEBUG_LOCKS		1
 
+/* Enable lock dependency checker */
+#define DEADLOCK_CHECKER	1
+
 /* Enable malloc debugging */
 #define DEBUG_MALLOC		1
 
diff --git a/include/cpu.h b/include/cpu.h
index ae318572..2fe47982 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -174,6 +174,9 @@ extern struct cpu_thread *find_cpu_by_node(struct dt_node *cpu);
 extern struct cpu_thread *find_cpu_by_server(u32 server_no);
 extern struct cpu_thread *find_cpu_by_pir(u32 pir);
 
+/* Used for lock internals to avoid re-entrancy */
+extern struct __nomcount cpu_thread *find_cpu_by_pir_nomcount(u32 pir);
+
 extern struct dt_node *get_cpu_node(u32 pir);
 
 /* Iterator */
-- 
2.17.1



More information about the Skiboot mailing list