[PATCH] powerpc/books: Never call nmi_enter for real-mode NMIs
Nicholas Piggin
npiggin at gmail.com
Thu Oct 27 18:43:13 AEDT 2022
NMIs that are taken in real mode (the early MCE and HMI handlers)
skipped calling nmi_enter() in some configurations, in the hope that
more modern configurations like radix suffer fewer restrictions. This
just turns into whack-a-mole and fragile when core kernel code changes
anything.
A recent such example that breaks with radix, an HMI real mode interrupt
tries to access vmalloc memory, causing it to take a machine check:
--- interrupt: 200 at perf_trace_rcu_dyntick+0x140/0x190
NIP: c0000000001d4720 LR: c0000000001d2bb4 CTR: c0000000001d45e0
REGS: c000000fffdbfd60 TRAP: 0200 Tainted: G M (6.0.0-dirty)
MSR: 9000000000201003 <SF,HV,ME,RI,LE> CR: 24024228 XER: 20040000
CFAR: c0000000001d4648 DAR: c009e000016e29a8 DSISR: 00000008 IRQMASK: 3
GPR00: c0000000001d2bb4 c000000fffdc7b30 c00000000255c100 c0000000023089f8
GPR04: c000000001bd0438 4000000000000000 4000000000000002 0000000000964794
GPR08: 0000000000000000 c009dff0055b29a8 0000000ffc130000 7265677368657265
GPR12: c0000000001d45e0 c000000ffffd7000 c00000000014e7c8 c00000000ab74280
GPR16: 0000000000000000 0000000000000000 0000000000000000 c0000000031a64d8
GPR20: c00000000d9f7b00 0000000000000006 c000000002446a28 c009e000016e29a8
GPR24: c000000001bd0438 4000000000000000 4000000000000002 0000000000964794
GPR28: c0000000001d2bb4 4000000000000002 c0000000023089f8 c0002000063f0668
perf_trace_rcu_dyntick+0x140/0x190
__traceiter_rcu_dyntick+0x84/0xc0
--- interrupt: 200
rcu_read_lock_sched_held+0x10/0xe0 (unreliable)
__traceiter_rcu_dyntick+0x84/0xc0
ct_nmi_enter+0x118/0x280
interrupt_nmi_enter_prepare+0x118/0x1f0
hmi_exception_realmode+0x38/0xe4
hmi_exception_early_common+0x114/0x2a0
--- interrupt: e60 at arch_local_irq_restore+0x11c/0x1b0
Just disable this entirely. It turns out the features that might be
enabled by nmi_enter(), like RCU or printk are unlikely to be usable
in real mode anyway.
Reported-by: Michael Ellerman <mpe at ellerman.id.au>
Cc: Mahesh Salgaonkar <mahesh at linux.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
---
arch/powerpc/include/asm/interrupt.h | 49 +++++++++-------------------
1 file changed, 16 insertions(+), 33 deletions(-)
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 4745bb9998bd..3e87e9ec5117 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -276,6 +276,7 @@ struct interrupt_nmi_state {
u8 irq_soft_mask;
u8 irq_happened;
u8 ftrace_enabled;
+ u8 mmu_enabled;
u64 softe;
#endif
};
@@ -303,6 +304,7 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte
state->irq_soft_mask = local_paca->irq_soft_mask;
state->irq_happened = local_paca->irq_happened;
state->softe = regs->softe;
+ state->mmu_enabled = !!(mfmsr() & MSR_DR);
/*
* Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does
@@ -333,46 +335,27 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte
}
#endif
- /* If data relocations are enabled, it's safe to use nmi_enter() */
- if (mfmsr() & MSR_DR) {
- nmi_enter();
- return;
- }
-
- /*
- * But do not use nmi_enter() for pseries hash guest taking a real-mode
- * NMI because not everything it touches is within the RMA limit.
- */
- if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) &&
- firmware_has_feature(FW_FEATURE_LPAR) &&
- !radix_enabled())
- return;
-
/*
- * Likewise, don't use it if we have some form of instrumentation (like
- * KASAN shadow) that is not safe to access in real mode (even on radix)
+ * If data relocations are enabled, it's safe to use nmi_enter().
+ * Otherwise avoid using it because the core kernel may touch
+ * vmalloc (e.g., in per-CPU variables), which is not accessible
+ * with the MMU off. Linear memory beyond the VRMA limit is also
+ * a problem for hash guests.
+ *
+ * The real-mode machine checks should not use RCU, tracing, lockdep
+ * locks, and should not printk, access per-CPU variables, among
+ * many other restrictions.
*/
- if (IS_ENABLED(CONFIG_KASAN))
- return;
-
- /* Otherwise, it should be safe to call it */
- nmi_enter();
+ if (state->mmu_enabled)
+ nmi_enter();
}
static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state)
{
- if (mfmsr() & MSR_DR) {
- // nmi_exit if relocations are on
- nmi_exit();
- } else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) &&
- firmware_has_feature(FW_FEATURE_LPAR) &&
- !radix_enabled()) {
- // no nmi_exit for a pseries hash guest taking a real mode exception
- } else if (IS_ENABLED(CONFIG_KASAN)) {
- // no nmi_exit for KASAN in real mode
- } else {
+ WARN_ON_ONCE(state->mmu_enabled != !!(mfmsr() & MSR_DR));
+
+ if (state->mmu_enabled)
nmi_exit();
- }
/*
* nmi does not call nap_adjust_return because nmi should not create
--
2.37.2
More information about the Linuxppc-dev
mailing list