[PATCH v3 3/3] powernv/kdump: Fix cases where the kdump kernel can get HMI's
Balbir Singh
bsingharora at gmail.com
Fri Dec 15 14:34:03 AEDT 2017
On Fri, Dec 15, 2017 at 2:10 PM, Nicholas Piggin <npiggin at gmail.com> wrote:
> On Fri, 15 Dec 2017 12:27:40 +1100
> Balbir Singh <bsingharora at gmail.com> wrote:
>
>> Certain HMI's such as malfunction error propagate through
>> all threads/core on the system. If a thread was offline
>> prior to us crashing the system and jumping to the kdump
>> kernel, bad things happen when it wakes up due to an HMI
>> in the kdump kernel.
>>
>> There are several possible ways to solve this problem
>>
>> 1. Put the offline cores in a state such that they are
>> not woken up for machine check and HMI errors. This
>> does not work, since we might need to wake up offline
>> threads to handle TB errors
>> 2. Ignore HMI errors, setup HMEER to mask HMI errors,
>> but this still leads the window open for any MCEs
>> and masking them for the duration of the dump might
>> be a concern
>> 3. Wake up offline CPUs, as in send them to
>> crash_ipi_callback (not wake them up as in mark them
>> online as seen by the hotplug). kexec does a
>> wake_online_cpus() call, this patch does something
>> similar, but instead sends an IPI and forces them to
>> crash_ipi_callback()
>>
>> This patch takes approach #3.
>>
>> Care is taken to enable this only for powenv platforms
>> via crash_wake_offline (a global value set at setup
>> time). The crash code sends out IPI's to all CPU's
>> which then move to crash_ipi_callback and kexec_smp_wait().
>>
>> Signed-off-by: Balbir Singh <bsingharora at gmail.com>
>> ---
>>
>> Changelog v3
>> - Use SRR1's reason to wake up to drive replay_system_reset()
>> as a means of getting to kdump() as opposed to calling
>> crash_ipi_callback based on comments from Nick Piggin.
>>
>> arch/powerpc/include/asm/kexec.h | 2 ++
>> arch/powerpc/kernel/crash.c | 13 ++++++++++++-
>> arch/powerpc/kernel/smp.c | 18 ++++++++++++++++++
>> arch/powerpc/platforms/powernv/smp.c | 12 ++++++++++++
>> 4 files changed, 44 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
>> index 4419d435639a..9dcbfa6bbb91 100644
>> --- a/arch/powerpc/include/asm/kexec.h
>> +++ b/arch/powerpc/include/asm/kexec.h
>> @@ -73,6 +73,8 @@ extern void kexec_smp_wait(void); /* get and clear naca physid, wait for
>> master to copy new code to 0 */
>> extern int crashing_cpu;
>> extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *));
>> +extern void crash_ipi_callback(struct pt_regs *);
>> +extern int crash_wake_offline;
>>
>> struct kimage;
>> struct pt_regs;
>> diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
>> index 29c56ca2ddfd..00b215125d3e 100644
>> --- a/arch/powerpc/kernel/crash.c
>> +++ b/arch/powerpc/kernel/crash.c
>> @@ -44,6 +44,14 @@
>> #define REAL_MODE_TIMEOUT 10000
>>
>> static int time_to_dump;
>> +/*
>> + * crash_wake_offline should be set to 1 by platforms that intend to wake
>> + * up offline cpus prior to jumping to a kdump kernel. Currently powernv
>> + * sets it to 1, since we want to avoid things from happening when an
>> + * offline CPU wakes up due to something like an HMI (malfunction error),
>> + * which propagates to all threads.
>> + */
>> +int crash_wake_offline;
>>
>> #define CRASH_HANDLER_MAX 3
>> /* List of shutdown handles */
>> @@ -63,7 +71,7 @@ static int handle_fault(struct pt_regs *regs)
>> #ifdef CONFIG_SMP
>>
>> static atomic_t cpus_in_crash;
>> -static void crash_ipi_callback(struct pt_regs *regs)
>> +void crash_ipi_callback(struct pt_regs *regs)
>> {
>> static cpumask_t cpus_state_saved = CPU_MASK_NONE;
>>
>> @@ -106,6 +114,9 @@ static void crash_kexec_prepare_cpus(int cpu)
>>
>> printk(KERN_EMERG "Sending IPI to other CPUs\n");
>>
>> + if (crash_wake_offline)
>> + ncpus = num_present_cpus() - 1;
>> +
>> crash_send_ipi(crash_ipi_callback);
>> smp_wmb();
>>
>> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
>> index e0a4c1f82e25..bbe7634b3a43 100644
>> --- a/arch/powerpc/kernel/smp.c
>> +++ b/arch/powerpc/kernel/smp.c
>> @@ -543,7 +543,25 @@ void smp_send_debugger_break(void)
>> #ifdef CONFIG_KEXEC_CORE
>> void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
>> {
>> + int cpu;
>> +
>> smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_ipi_callback, 1000000);
>> + if (kdump_in_progress() && crash_wake_offline) {
>> + for_each_present_cpu(cpu) {
>> + if (cpu_online(cpu))
>> + continue;
>> + /*
>> + * crash_ipi_callback will wait for
>> + * all cpus, including offline CPUs.
>> + * We don't care about nmi_ipi_function.
>> + * Offline cpus will jump straight into
>> + * crash_ipi_callback, we can skip the
>> + * entire NMI dance and waiting for
>> + * cpus to clear pending mask, etc.
>> + */
>> + do_smp_send_nmi_ipi(cpu);
>> + }
>> + }
>> }
>> #endif
>>
>> diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
>> index ba030669eca1..1594bbff3dec 100644
>> --- a/arch/powerpc/platforms/powernv/smp.c
>> +++ b/arch/powerpc/platforms/powernv/smp.c
>> @@ -37,6 +37,8 @@
>> #include <asm/kvm_ppc.h>
>> #include <asm/ppc-opcode.h>
>> #include <asm/cpuidle.h>
>> +#include <asm/kexec.h>
>> +#include <asm/reg.h>
>>
>> #include "powernv.h"
>>
>> @@ -212,6 +214,13 @@ static void pnv_smp_cpu_kill_self(void)
>> }
>> smp_mb();
>>
>> + /*
>> + * For kdump kernels, we process the ipi and jump to
>> + * handling the system reset exception.
>> + */
>> + if (kdump_in_progress())
>> + irq_set_pending_from_srr1(srr1);
>> +
>> if (cpu_core_split_required())
>> continue;
>>
>
> I wonder if we want to special-case it for system reset only? Everything
> is all going down for kdump anyway I guess, but theoretically we don't
> want to put other interrupts in our pending mask.
>
> We might as well just do it unconditionally as well, so we can easily test
> and make sure we do something sane if we happen to take a stray sreset
> here for some reason (e.g., pdbg).
>
> I would do this:
>
> } else if ((srr1 & wmask) == SRR1_WAKERESET) {
> /* kdump or debug tools can sreset offline CPUs */
> irq_set_pending_from_srr1(srr1);
> }
>
> But then you still need an explicit kdump check for non-sreset wakeups
> because the platform may not implement sreset, or it may fall back to
> normal IPI if the sreset scom fails. So you then still need your
>
> if (kdump) crash_ipi_callback
Yep, its required for the the non NMI case.
How does this look -- based on your comment
+ } else if ((srr1 & wmask) == SRR1_WAKESRESET) {
+ irq_set_pending_from_srr1(srr1);
+ /* Does not return */
}
+
smp_mb();
/*
* For kdump kernels, we process the ipi and jump to
- * handling the system reset exception.
+ * crash_ipi_callback
*/
- if (kdump_in_progress())
- irq_set_pending_from_srr1(srr1);
+ if (kdump_in_progress()) {
+ /*
+ * If we got to this point, we've not used
+ * NMI's, otherwise we would have gone
+ * via the SRR1_WAKESRESET path. We are
+ * using regular IPI's for waking up offline
+ * threads.
+ */
+ struct pt_regs regs;
+
+ ppc_save_regs(®s);
+ crash_ipi_callback(regs);
+ /* Does not return */
+ }
Balbir Singh
>
> Thanks,
> Nick
More information about the Linuxppc-dev
mailing list