[Skiboot] [PATCH 4/5] lock: Fix interactions between lock dependency checker and stack checker
Benjamin Herrenschmidt
benh at kernel.crashing.org
Wed Aug 15 15:10:38 AEST 2018
The lock dependency checker does a few nasty things that can cause
re-entrancy deadlocks in conjunction with the stack checker or
in fact other debug tests.
A lot of it revolves around taking a new lock (dl_lock) as part
of the locking process.
This tries to fix it by making sure we do not hit the stack
checker while holding dl_lock.
We achieve that in part by directly using the low-level __try_lock
and manually unlocking on the dl_lock, and making some functions
"nomcount".
In addition, we mark the dl_lock as being in the console path to
avoid deadlocks with the UART driver.
We move the enabling of the deadlock checker to a separate config
option from DEBUG_LOCKS as well, in case we chose to disable it
by default later on.
Signed-off-by: Benjamin Herrenschmidt <benh at kernel.crashing.org>
---
core/cpu.c | 7 +++++++
core/lock.c | 45 ++++++++++++++++++++++++++++++---------------
include/config.h | 3 +++
include/cpu.h | 3 +++
4 files changed, 43 insertions(+), 15 deletions(-)
diff --git a/core/cpu.c b/core/cpu.c
index b5bfb277..a4c4d4ff 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -714,6 +714,13 @@ struct cpu_thread *find_cpu_by_pir(u32 pir)
return &cpu_stacks[pir].cpu;
}
+struct __nomcount cpu_thread *find_cpu_by_pir_nomcount(u32 pir)
+{
+ if (pir > cpu_max_pir)
+ return NULL;
+ return &cpu_stacks[pir].cpu;
+}
+
struct cpu_thread *find_cpu_by_server(u32 server_no)
{
struct cpu_thread *t;
diff --git a/core/lock.c b/core/lock.c
index 1fc71a92..fca8f465 100644
--- a/core/lock.c
+++ b/core/lock.c
@@ -29,9 +29,8 @@
bool bust_locks = true;
#ifdef DEBUG_LOCKS
-static struct lock dl_lock = LOCK_UNLOCKED;
-static void lock_error(struct lock *l, const char *reason, uint16_t err)
+static void __nomcount lock_error(struct lock *l, const char *reason, uint16_t err)
{
bust_locks = true;
@@ -42,13 +41,13 @@ static void lock_error(struct lock *l, const char *reason, uint16_t err)
abort();
}
-static void lock_check(struct lock *l)
+static inline void __nomcount lock_check(struct lock *l)
{
if ((l->lock_val & 1) && (l->lock_val >> 32) == this_cpu()->pir)
lock_error(l, "Invalid recursive lock", 0);
}
-static void unlock_check(struct lock *l)
+static inline void __nomcount unlock_check(struct lock *l)
{
if (!(l->lock_val & 1))
lock_error(l, "Unlocking unlocked lock", 1);
@@ -102,8 +101,21 @@ static inline bool lock_timeout(unsigned long start)
return false;
}
#else
+static inline void lock_check(struct lock *l) { };
+static inline void unlock_check(struct lock *l) { };
+static inline bool lock_timeout(unsigned long s) { return false; }
+#endif /* DEBUG_LOCKS */
+
+#if defined(DEADLOCK_CHECKER) && defined(DEBUG_LOCKS)
+
+static struct lock dl_lock = {
+ .lock_val = 0,
+ .in_con_path = true,
+ .owner = LOCK_CALLER
+};
+
/* Find circular dependencies in the lock requests. */
-static bool check_deadlock(void)
+static __nomcount inline bool check_deadlock(void)
{
uint32_t lock_owner, start, i;
struct cpu_thread *next_cpu;
@@ -126,7 +138,7 @@ static bool check_deadlock(void)
if (lock_owner == start)
return true;
- next_cpu = find_cpu_by_pir(lock_owner);
+ next_cpu = find_cpu_by_pir_nomcount(lock_owner);
if (!next_cpu)
return false;
@@ -141,6 +153,7 @@ static bool check_deadlock(void)
static void add_lock_request(struct lock *l)
{
struct cpu_thread *curr = this_cpu();
+ bool dead;
if (curr->state != cpu_state_active &&
curr->state != cpu_state_os)
@@ -148,10 +161,12 @@ static void add_lock_request(struct lock *l)
/*
* For deadlock detection we must keep the lock states constant
- * while doing the deadlock check.
+ * while doing the deadlock check. However we need to avoid
+ * clashing with the stack checker, so no mcount and use an
+ * inline implementation of the lock for the dl_lock
*/
for (;;) {
- if (try_lock(&dl_lock))
+ if (__try_lock(curr, &dl_lock))
break;
smt_lowest();
while (dl_lock.lock_val)
@@ -161,10 +176,13 @@ static void add_lock_request(struct lock *l)
curr->requested_lock = l;
- if (check_deadlock())
- lock_error(l, "Deadlock detected", 0);
+ dead = check_deadlock();
- unlock(&dl_lock);
+ lwsync();
+ dl_lock.lock_val = 0;
+
+ if (dead)
+ lock_error(l, "Deadlock detected", 0);
}
static void remove_lock_request(void)
@@ -172,12 +190,9 @@ static void remove_lock_request(void)
this_cpu()->requested_lock = NULL;
}
#else
-static inline void lock_check(struct lock *l) { };
-static inline void unlock_check(struct lock *l) { };
static inline void add_lock_request(struct lock *l) { };
static inline void remove_lock_request(void) { };
-static inline bool lock_timeout(unsigned long s) { return false; }
-#endif /* DEBUG_LOCKS */
+#endif /* #if defined(DEADLOCK_CHECKER) && defined(DEBUG_LOCKS) */
bool lock_held_by_me(struct lock *l)
{
diff --git a/include/config.h b/include/config.h
index cd8a0a65..6b36590c 100644
--- a/include/config.h
+++ b/include/config.h
@@ -39,6 +39,9 @@
/* Enable lock debugging */
#define DEBUG_LOCKS 1
+/* Enable lock dependency checker */
+#define DEADLOCK_CHECKER 1
+
/* Enable malloc debugging */
#define DEBUG_MALLOC 1
diff --git a/include/cpu.h b/include/cpu.h
index ae318572..2fe47982 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -174,6 +174,9 @@ extern struct cpu_thread *find_cpu_by_node(struct dt_node *cpu);
extern struct cpu_thread *find_cpu_by_server(u32 server_no);
extern struct cpu_thread *find_cpu_by_pir(u32 pir);
+/* Used for lock internals to avoid re-entrancy */
+extern struct __nomcount cpu_thread *find_cpu_by_pir_nomcount(u32 pir);
+
extern struct dt_node *get_cpu_node(u32 pir);
/* Iterator */
--
2.17.1
More information about the Skiboot
mailing list