[PATCH v3 1/2] powerpc/mce: Avoid using irq_work_queue() in realmode
Nicholas Piggin
npiggin at gmail.com
Thu Nov 25 00:03:26 AEDT 2021
Excerpts from Ganesh Goudar's message of November 24, 2021 7:54 pm:
> In realmode mce handler we use irq_work_queue() to defer
> the processing of mce events, irq_work_queue() can only
> be called when translation is enabled because it touches
> memory outside RMA, hence we enable translation before
> calling irq_work_queue and disable on return, though it
> is not safe to do in realmode.
>
> To avoid this, program the decrementer and call the event
> processing functions from timer handler.
>
> Signed-off-by: Ganesh Goudar <ganeshgr at linux.ibm.com>
> ---
> V2:
> * Use arch_irq_work_raise to raise decrementer interrupt.
> * Avoid having atomic variable.
>
> V3:
> * Fix build error.
> Reported by kernel test bot.
> ---
> arch/powerpc/include/asm/machdep.h | 2 +
> arch/powerpc/include/asm/mce.h | 2 +
> arch/powerpc/include/asm/paca.h | 1 +
> arch/powerpc/kernel/mce.c | 51 +++++++++++-------------
> arch/powerpc/kernel/time.c | 3 ++
> arch/powerpc/platforms/pseries/pseries.h | 1 +
> arch/powerpc/platforms/pseries/ras.c | 31 +-------------
> arch/powerpc/platforms/pseries/setup.c | 1 +
> 8 files changed, 34 insertions(+), 58 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
> index 9c3c9f04129f..d22b222ba471 100644
> --- a/arch/powerpc/include/asm/machdep.h
> +++ b/arch/powerpc/include/asm/machdep.h
> @@ -99,6 +99,8 @@ struct machdep_calls {
> /* Called during machine check exception to retrive fixup address. */
> bool (*mce_check_early_recovery)(struct pt_regs *regs);
>
> + void (*machine_check_log_err)(void);
> +
> /* Motherboard/chipset features. This is a kind of general purpose
> * hook used to control some machine specific features (like reset
> * lines, chip power control, etc...).
> diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
> index 331d944280b8..6e306aaf58aa 100644
> --- a/arch/powerpc/include/asm/mce.h
> +++ b/arch/powerpc/include/asm/mce.h
> @@ -235,8 +235,10 @@ extern void machine_check_print_event_info(struct machine_check_event *evt,
> unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr);
> extern void mce_common_process_ue(struct pt_regs *regs,
> struct mce_error_info *mce_err);
> +void machine_check_raise_dec_intr(void);
> int mce_register_notifier(struct notifier_block *nb);
> int mce_unregister_notifier(struct notifier_block *nb);
> +void mce_run_late_handlers(void);
> #ifdef CONFIG_PPC_BOOK3S_64
> void flush_and_reload_slb(void);
> void flush_erat(void);
> diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
> index dc05a862e72a..d463c796f7fa 100644
> --- a/arch/powerpc/include/asm/paca.h
> +++ b/arch/powerpc/include/asm/paca.h
> @@ -280,6 +280,7 @@ struct paca_struct {
> #endif
> #ifdef CONFIG_PPC_BOOK3S_64
> struct mce_info *mce_info;
> + u32 mces_to_process;
> #endif /* CONFIG_PPC_BOOK3S_64 */
> } ____cacheline_aligned;
>
> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> index fd829f7f25a4..8e17f29472a0 100644
> --- a/arch/powerpc/kernel/mce.c
> +++ b/arch/powerpc/kernel/mce.c
> @@ -28,19 +28,9 @@
>
> #include "setup.h"
>
> -static void machine_check_process_queued_event(struct irq_work *work);
> -static void machine_check_ue_irq_work(struct irq_work *work);
> static void machine_check_ue_event(struct machine_check_event *evt);
> static void machine_process_ue_event(struct work_struct *work);
>
> -static struct irq_work mce_event_process_work = {
> - .func = machine_check_process_queued_event,
> -};
> -
> -static struct irq_work mce_ue_event_irq_work = {
> - .func = machine_check_ue_irq_work,
> -};
> -
> static DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
>
> static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
> @@ -89,6 +79,12 @@ static void mce_set_error_info(struct machine_check_event *mce,
> }
> }
>
> +/* Raise decrementer interrupt */
> +void machine_check_raise_dec_intr(void)
> +{
> + arch_irq_work_raise();
> +}
It would be better if the name specifically related to irq work, which
is more than just dec interrupt. It might be good to set mces_to_process
here as well.
I would name it something like mce_irq_work_queue, and the paca variable
to mce_pending_irq_work...
> +void mce_run_late_handlers(void)
> +{
> + if (unlikely(local_paca->mces_to_process)) {
> + if (ppc_md.machine_check_log_err)
> + ppc_md.machine_check_log_err();
> + machine_check_process_queued_event();
> + machine_check_ue_work();
> + local_paca->mces_to_process--;
> + }
> +}
The problem with a counter is that you're clearing the irq work pending
in the timer interrupt, so you'll never call in here again to clear that
(until something else sets irq work).
But as far as I can see it does not need to be a counter, just a flag.
The machine check calls will process multiple events, right? (and the
current irq_work queue does not queue the same work multiple times).
Oh. That's actually bad, isn't it? Our irq work should be per-CPU
because the callbacks are mainly only operating on the local paca
queued events, so we have a longstanding bug there AFAIKS. Your patch
will solve it if everything is converted over.
> +
> void machine_check_print_event_info(struct machine_check_event *evt,
> bool user_mode, bool in_guest)
> {
> diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
> index cae8f03a44fe..94c591b6f9d2 100644
> --- a/arch/powerpc/kernel/time.c
> +++ b/arch/powerpc/kernel/time.c
> @@ -594,6 +594,9 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
>
> if (test_irq_work_pending()) {
> clear_irq_work_pending();
> +#ifdef CONFIG_PPC_BOOK3S_64
> + mce_run_late_handlers();
> +#endif
Maybe create a no-op inline function for others and call unconditionally
here. I wonder if the name could be better, we have lots of handlers, of
varying earliness. real-mode, then virt mode NMI context, then IRQ
context, then workqueue context.
mce_run_irq_context_handlers() might not be much better though.
> irq_work_run();
> }
>
> diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
> index 3544778e06d0..9cf0d33dfbf5 100644
> --- a/arch/powerpc/platforms/pseries/pseries.h
> +++ b/arch/powerpc/platforms/pseries/pseries.h
> @@ -21,6 +21,7 @@ struct pt_regs;
> extern int pSeries_system_reset_exception(struct pt_regs *regs);
> extern int pSeries_machine_check_exception(struct pt_regs *regs);
> extern long pseries_machine_check_realmode(struct pt_regs *regs);
> +void pSeries_machine_check_log_err(void);
>
> #ifdef CONFIG_SMP
> extern void smp_init_pseries(void);
> diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
> index 56092dccfdb8..8613f9cc5798 100644
> --- a/arch/powerpc/platforms/pseries/ras.c
> +++ b/arch/powerpc/platforms/pseries/ras.c
> @@ -23,11 +23,6 @@ static DEFINE_SPINLOCK(ras_log_buf_lock);
>
> static int ras_check_exception_token;
>
> -static void mce_process_errlog_event(struct irq_work *work);
> -static struct irq_work mce_errlog_process_work = {
> - .func = mce_process_errlog_event,
> -};
> -
> #define EPOW_SENSOR_TOKEN 9
> #define EPOW_SENSOR_INDEX 0
>
> @@ -729,40 +724,16 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
> error_type = mce_log->error_type;
>
> disposition = mce_handle_err_realmode(disposition, error_type);
> -
> - /*
> - * Enable translation as we will be accessing per-cpu variables
> - * in save_mce_event() which may fall outside RMO region, also
> - * leave it enabled because subsequently we will be queuing work
> - * to workqueues where again per-cpu variables accessed, besides
> - * fwnmi_release_errinfo() crashes when called in realmode on
> - * pseries.
> - * Note: All the realmode handling like flushing SLB entries for
> - * SLB multihit is done by now.
> - */
> out:
> - msr = mfmsr();
> - mtmsr(msr | MSR_IR | MSR_DR);
> -
> disposition = mce_handle_err_virtmode(regs, errp, mce_log,
> disposition);
> -
> - /*
> - * Queue irq work to log this rtas event later.
> - * irq_work_queue uses per-cpu variables, so do this in virt
> - * mode as well.
> - */
> - irq_work_queue(&mce_errlog_process_work);
> -
> - mtmsr(msr);
> -
> return disposition;
> }
>
> /*
> * Process MCE rtas errlog event.
> */
> -static void mce_process_errlog_event(struct irq_work *work)
> +void pSeries_machine_check_log_err(void)
> {
> struct rtas_error_log *err;
>
> diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
> index 8a62af5b9c24..9bdc487b8e35 100644
> --- a/arch/powerpc/platforms/pseries/setup.c
> +++ b/arch/powerpc/platforms/pseries/setup.c
> @@ -1084,6 +1084,7 @@ define_machine(pseries) {
> .system_reset_exception = pSeries_system_reset_exception,
> .machine_check_early = pseries_machine_check_realmode,
> .machine_check_exception = pSeries_machine_check_exception,
> + .machine_check_log_err = pSeries_machine_check_log_err,
> #ifdef CONFIG_KEXEC_CORE
> .machine_kexec = pSeries_machine_kexec,
> .kexec_cpu_down = pseries_kexec_cpu_down,
> --
> 2.31.1
>
>
More information about the Linuxppc-dev
mailing list