[PATCH v4 09/17] watchdog/hardlockup: Add a "cpu" param to watchdog_hardlockup_check()
Douglas Anderson
dianders at chromium.org
Fri May 5 08:13:41 AEST 2023
In preparation for the buddy hardlockup detector where the CPU
checking for lockup might not be the currently running CPU, add a
"cpu" parameter to watchdog_hardlockup_check().
Signed-off-by: Douglas Anderson <dianders at chromium.org>
---
Changes in v4:
- ("Add a "cpu" param to watchdog_hardlockup_check()") new for v4.
include/linux/nmi.h | 2 +-
kernel/watchdog.c | 47 ++++++++++++++++++++++++++++--------------
kernel/watchdog_perf.c | 2 +-
3 files changed, 33 insertions(+), 18 deletions(-)
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index c6cb9bc5dc80..2c9ea1ba285c 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -88,7 +88,7 @@ static inline void hardlockup_detector_disable(void) {}
#endif
#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
-void watchdog_hardlockup_check(struct pt_regs *regs);
+void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs);
#endif
#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index f46669c1671d..367bea0167a5 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -92,14 +92,14 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
static DEFINE_PER_CPU(bool, watchdog_hardlockup_processed);
static unsigned long watchdog_hardlockup_dumped_stacks;
-static bool watchdog_hardlockup_is_lockedup(void)
+static bool watchdog_hardlockup_is_lockedup(unsigned int cpu)
{
- unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
+ unsigned long hrint = per_cpu(hrtimer_interrupts, cpu);
- if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
+ if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint)
return true;
- __this_cpu_write(hrtimer_interrupts_saved, hrint);
+ per_cpu(hrtimer_interrupts_saved, cpu) = hrint;
return false;
}
@@ -109,7 +109,7 @@ static void watchdog_hardlockup_interrupt_count(void)
__this_cpu_inc(hrtimer_interrupts);
}
-void watchdog_hardlockup_check(struct pt_regs *regs)
+void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
{
/*
* Check for a hardlockup by making sure the CPU's timer
@@ -117,35 +117,50 @@ void watchdog_hardlockup_check(struct pt_regs *regs)
* fired multiple times before we overflow'd. If it hasn't
* then this is a good indication the cpu is stuck
*/
- if (watchdog_hardlockup_is_lockedup()) {
+ if (watchdog_hardlockup_is_lockedup(cpu)) {
unsigned int this_cpu = smp_processor_id();
+ struct cpumask backtrace_mask = *cpu_online_mask;
/* Only handle hardlockups once. */
- if (__this_cpu_read(watchdog_hardlockup_processed))
+ if (per_cpu(watchdog_hardlockup_processed, cpu))
return;
- pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", this_cpu);
+ pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", cpu);
print_modules();
print_irqtrace_events(current);
- if (regs)
+ if (regs) {
show_regs(regs);
- else
- dump_stack();
+ cpumask_clear_cpu(cpu, &backtrace_mask);
+ } else {
+ /*
+ * If the locked up CPU is different than the CPU we're
+ * running on then we'll try to backtrace the CPU that
+ * locked up and then exclude it from later backtraces.
+ * If that fails or if we're running on the locked up
+ * CPU, just do a normal backtrace.
+ */
+ if (cpu != this_cpu && trigger_single_cpu_backtrace(cpu)) {
+ cpumask_clear_cpu(cpu, &backtrace_mask);
+ } else {
+ dump_stack();
+ cpumask_clear_cpu(this_cpu, &backtrace_mask);
+ }
+ }
/*
- * Perform all-CPU dump only once to avoid multiple hardlockups
- * generating interleaving traces
+ * Perform multi-CPU dump only once to avoid multiple
+ * hardlockups generating interleaving traces
*/
if (sysctl_hardlockup_all_cpu_backtrace &&
!test_and_set_bit(0, &watchdog_hardlockup_dumped_stacks))
- trigger_allbutself_cpu_backtrace();
+ trigger_cpumask_backtrace(&backtrace_mask);
if (hardlockup_panic)
nmi_panic(regs, "Hard LOCKUP");
- __this_cpu_write(watchdog_hardlockup_processed, true);
+ per_cpu(watchdog_hardlockup_processed, cpu) = true;
} else {
- __this_cpu_write(watchdog_hardlockup_processed, false);
+ per_cpu(watchdog_hardlockup_processed, cpu) = false;
}
}
diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c
index 5f3651b87ee7..9be90b2a2ea7 100644
--- a/kernel/watchdog_perf.c
+++ b/kernel/watchdog_perf.c
@@ -120,7 +120,7 @@ static void watchdog_overflow_callback(struct perf_event *event,
if (!watchdog_check_timestamp())
return;
- watchdog_hardlockup_check(regs);
+ watchdog_hardlockup_check(smp_processor_id(), regs);
}
static int hardlockup_detector_event_create(void)
--
2.40.1.521.gf1e218fcd8-goog
More information about the Linuxppc-dev
mailing list