[Skiboot] [PATCH] core/lock: Add deadlock detection

Oliver oohall at gmail.com
Fri Jun 23 17:14:34 AEST 2017


Hey nick, you're a concurrency wizard can you review this?


On Fri, Jun 23, 2017 at 2:35 PM, Matt Brown <matthew.brown.dev at gmail.com> wrote:
> This adds simple deadlock detection.
> The detection looks for circular dependencies in the lock requests.
> It will abort and display a stack trace when a deadlock occurs.
> The detection will reduce performance so only use for debugging.
> To use, enable DEBUG_DEADLOCKS in include/config.h (disabled by default).

It might be useful if we can get stack traces for every thread
involved in the deadlock. You might be able
to use the NMI injection that nick was working on to wake up
individual threads to get them to print the stack
trace.

>
> Signed-off-by: Matt Brown <matthew.brown.dev at gmail.com>
> ---
>  core/lock.c      | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  include/config.h |  3 ++
>  include/lock.h   |  5 ++++
>  3 files changed, 90 insertions(+), 1 deletion(-)
>
> diff --git a/core/lock.c b/core/lock.c
> index e82048b..0379fb0 100644
> --- a/core/lock.c
> +++ b/core/lock.c
> @@ -1,4 +1,4 @@
> -/* Copyright 2013-2014 IBM Corp.
> +/* Copyright 2013-2017 IBM Corp.
>   *
>   * Licensed under the Apache License, Version 2.0 (the "License");
>   * you may not use this file except in compliance with the License.
> @@ -66,6 +66,73 @@ static inline void lock_check(struct lock *l) { };
>  static inline void unlock_check(struct lock *l) { };
>  #endif /* DEBUG_LOCKS */
>
> +#ifdef DEBUG_DEADLOCKS
> +
> +#define MAX_THREADS 2048
> +static struct lock *lock_table[MAX_THREADS];

Rename this to target_lock_table or something. You could probably
remove the array and track this information as a field in the
cpu_thread structure too since skiboot is effectively single thread
until we call cpu_bringup().

> +
> +/* Find circular dependencies in the lock requests. */
> +static bool check_deadlock(void)

I'd rather you passed the pir/lock that you want to check as function
arguments rather than wacking globals at random, but that's just me.

> +{
> +       int lock_owner, i, start;
> +       struct lock *next;
> +
> +       start = this_cpu()->pir;
> +       next = lock_table[start];
> +       i = 0;

I think you can remove the counter. It's being used for is to make
sure the current lock isn't the one we started from and you can do
that check in a more straight forward manner.

> +
> +       while (i < MAX_THREADS) {
> +
> +               if (!next)
> +                       return false;
> +

> +               if (!(next->lock_val & 1) || next->in_con_path)
> +                       return false;
Why is being in the console path important?

> +
> +               lock_owner = next->lock_val >> 32;


> +               if (lock_owner >= MAX_THREADS)
> +                       return false;

This should be impossible, maybe make it an assert() because if it
happens something is very broken.

> +
> +               if (lock_owner == start && i > 0)
> +                       return true;
> +
> +               next = lock_table[lock_owner];
> +               i++;
> +       }
> +
> +       return false;
> +}
> +

> +void add_lock_request(struct lock *l)
> +{
> +       if (this_cpu()->state == cpu_state_active ||
> +           this_cpu()->state == cpu_state_os) {

If we're taking locks inside skiboot is the thread ever going to be in
a different state?

> +
> +               if (this_cpu()->pir >= MAX_THREADS)
> +                       return;
Again, this should never happen so either make it a assert() or remove
the check.

> +
> +               lock_table[this_cpu()->pir] = l;
> +
> +               if (check_deadlock() && check_deadlock()) {

Why twice?

> +#ifdef DEBUG_LOCKS
> +                       lock_error(l, "Deadlock detected", 0);
> +#else
> +                       prlog(PR_EMERG, "LOCK: Deadlock detected\n");
> +                       backtrace();
> +                       abort();
> +#endif /* DEBUG_LOCKS */
> +               }
> +       }
> +}
> +
> +void remove_lock_request(void)
> +{
> +       if (this_cpu()->pir < MAX_THREADS)
> +               lock_table[this_cpu()->pir] = NULL;
> +}
> +#endif /* DEBUG_DEADLOCKS */
> +
>  bool lock_held_by_me(struct lock *l)
>  {
>         uint64_t pir64 = this_cpu()->pir;
> @@ -86,15 +153,29 @@ bool try_lock(struct lock *l)
>
>  void lock(struct lock *l)
>  {
> +
>         if (bust_locks)
>                 return;
>
>         lock_check(l);
> +
> +#ifdef DEBUG_DEADLOCKS
> +       if (try_lock(l))
> +               return;
> +
> +       add_lock_request(l);
> +#endif
> +
>         for (;;) {
>                 if (try_lock(l))
>                         break;
>                 cpu_relax();
>         }
> +
> +#ifdef DEBUG_DEADLOCKS
> +       remove_lock_request();
> +#endif
> +
>  }
>
>  void unlock(struct lock *l)
> diff --git a/include/config.h b/include/config.h
> index cd8a0a6..c2f80e9 100644
> --- a/include/config.h
> +++ b/include/config.h
> @@ -39,6 +39,9 @@
>  /* Enable lock debugging */
>  #define DEBUG_LOCKS            1
>
> +/* Enable deadlock detection */
> +//#define DEBUG_DEADLOCKS      1
> +
>  /* Enable malloc debugging */
>  #define DEBUG_MALLOC           1
>
> diff --git a/include/lock.h b/include/lock.h
> index 0ac943d..27bedc7 100644
> --- a/include/lock.h
> +++ b/include/lock.h
> @@ -81,4 +81,9 @@ extern bool lock_recursive(struct lock *l);
>  /* Called after per-cpu data structures are available */
>  extern void init_locks(void);
>
> +#ifdef DEBUG_DEADLOCKS
> +extern void add_lock_request(struct lock *l);
> +extern void remove_lock_request(void);
> +#endif /* DEBUG_DEADLOCKS*/
> +
>  #endif /* __LOCK_H */
> --
> 2.9.3
>
> _______________________________________________
> Skiboot mailing list
> Skiboot at lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/skiboot


More information about the Skiboot mailing list