[PATCH v4 2/2] powernv/kdump: Fix cases where the kdump kernel can get HMI's
Nicholas Piggin
npiggin at gmail.com
Fri Dec 15 19:54:26 AEDT 2017
On Fri, 15 Dec 2017 19:14:55 +1100
Balbir Singh <bsingharora at gmail.com> wrote:
> Certain HMI's such as malfunction error propagate through
> all threads/core on the system. If a thread was offline
> prior to us crashing the system and jumping to the kdump
> kernel, bad things happen when it wakes up due to an HMI
> in the kdump kernel.
>
> There are several possible ways to solve this problem
>
> 1. Put the offline cores in a state such that they are
> not woken up for machine check and HMI errors. This
> does not work, since we might need to wake up offline
> threads to handle TB errors
> 2. Ignore HMI errors, setup HMEER to mask HMI errors,
> but this still leads the window open for any MCEs
> and masking them for the duration of the dump might
> be a concern
> 3. Wake up offline CPUs, as in send them to
> crash_ipi_callback (not wake them up as in mark them
> online as seen by the hotplug). kexec does a
> wake_online_cpus() call, this patch does something
> similar, but instead sends an IPI and forces them to
> crash_ipi_callback()
>
> This patch takes approach #3.
>
> Care is taken to enable this only for powenv platforms
> via crash_wake_offline (a global value set at setup
> time). The crash code sends out IPI's to all CPU's
> which then move to crash_ipi_callback and kexec_smp_wait().
>
Reviewed-by: Nicholas Piggin <npiggin at gmail.com>
> Signed-off-by: Balbir Singh <bsingharora at gmail.com>
> ---
>
> Changelog v4
> - Handle the case for crash IPI's sent via non NMI
> - Drop system reset via SCOM in non-power saving
> mode
>
> arch/powerpc/include/asm/kexec.h | 2 ++
> arch/powerpc/kernel/crash.c | 13 ++++++++++++-
> arch/powerpc/kernel/smp.c | 18 ++++++++++++++++++
> arch/powerpc/platforms/powernv/smp.c | 28 ++++++++++++++++++++++++++++
> 4 files changed, 60 insertions(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
> index 4419d435639a..9dcbfa6bbb91 100644
> --- a/arch/powerpc/include/asm/kexec.h
> +++ b/arch/powerpc/include/asm/kexec.h
> @@ -73,6 +73,8 @@ extern void kexec_smp_wait(void); /* get and clear naca physid, wait for
> master to copy new code to 0 */
> extern int crashing_cpu;
> extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *));
> +extern void crash_ipi_callback(struct pt_regs *);
> +extern int crash_wake_offline;
>
> struct kimage;
> struct pt_regs;
> diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
> index 29c56ca2ddfd..00b215125d3e 100644
> --- a/arch/powerpc/kernel/crash.c
> +++ b/arch/powerpc/kernel/crash.c
> @@ -44,6 +44,14 @@
> #define REAL_MODE_TIMEOUT 10000
>
> static int time_to_dump;
> +/*
> + * crash_wake_offline should be set to 1 by platforms that intend to wake
> + * up offline cpus prior to jumping to a kdump kernel. Currently powernv
> + * sets it to 1, since we want to avoid things from happening when an
> + * offline CPU wakes up due to something like an HMI (malfunction error),
> + * which propagates to all threads.
> + */
> +int crash_wake_offline;
>
> #define CRASH_HANDLER_MAX 3
> /* List of shutdown handles */
> @@ -63,7 +71,7 @@ static int handle_fault(struct pt_regs *regs)
> #ifdef CONFIG_SMP
>
> static atomic_t cpus_in_crash;
> -static void crash_ipi_callback(struct pt_regs *regs)
> +void crash_ipi_callback(struct pt_regs *regs)
> {
> static cpumask_t cpus_state_saved = CPU_MASK_NONE;
>
> @@ -106,6 +114,9 @@ static void crash_kexec_prepare_cpus(int cpu)
>
> printk(KERN_EMERG "Sending IPI to other CPUs\n");
>
> + if (crash_wake_offline)
> + ncpus = num_present_cpus() - 1;
> +
> crash_send_ipi(crash_ipi_callback);
> smp_wmb();
>
> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index e0a4c1f82e25..bbe7634b3a43 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -543,7 +543,25 @@ void smp_send_debugger_break(void)
> #ifdef CONFIG_KEXEC_CORE
> void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
> {
> + int cpu;
> +
> smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_ipi_callback, 1000000);
> + if (kdump_in_progress() && crash_wake_offline) {
> + for_each_present_cpu(cpu) {
> + if (cpu_online(cpu))
> + continue;
> + /*
> + * crash_ipi_callback will wait for
> + * all cpus, including offline CPUs.
> + * We don't care about nmi_ipi_function.
> + * Offline cpus will jump straight into
> + * crash_ipi_callback, we can skip the
> + * entire NMI dance and waiting for
> + * cpus to clear pending mask, etc.
> + */
> + do_smp_send_nmi_ipi(cpu);
> + }
> + }
> }
> #endif
>
> diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
> index ba030669eca1..9664c8461f03 100644
> --- a/arch/powerpc/platforms/powernv/smp.c
> +++ b/arch/powerpc/platforms/powernv/smp.c
> @@ -37,6 +37,8 @@
> #include <asm/kvm_ppc.h>
> #include <asm/ppc-opcode.h>
> #include <asm/cpuidle.h>
> +#include <asm/kexec.h>
> +#include <asm/reg.h>
>
> #include "powernv.h"
>
> @@ -209,9 +211,32 @@ static void pnv_smp_cpu_kill_self(void)
> } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {
> unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
> asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
> + } else if ((srr1 & wmask) == SRR1_WAKERESET) {
> + irq_set_pending_from_srr1(srr1);
> + /* Does not return */
> }
> +
> smp_mb();
>
> + /*
> + * For kdump kernels, we process the ipi and jump to
> + * crash_ipi_callback
> + */
> + if (kdump_in_progress()) {
> + /*
> + * If we got to this point, we've not used
> + * NMI's, otherwise we would have gone
> + * via the SRR1_WAKERESET path. We are
> + * using regular IPI's for waking up offline
> + * threads.
> + */
> + struct pt_regs regs;
> +
> + ppc_save_regs(®s);
> + crash_ipi_callback(®s);
> + /* Does not return */
> + }
> +
> if (cpu_core_split_required())
> continue;
>
> @@ -371,5 +396,8 @@ void __init pnv_smp_init(void)
>
> #ifdef CONFIG_HOTPLUG_CPU
> ppc_md.cpu_die = pnv_smp_cpu_kill_self;
> +#ifdef CONFIG_KEXEC_CORE
> + crash_wake_offline = 1;
> +#endif
> #endif
> }
More information about the Linuxppc-dev
mailing list