<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
</head>
<body>
<pre>On 11/8/21 19:49, Nicholas Piggin wrote:</pre>
<blockquote type="cite"
cite="mid:1636380442.sna3yrbwu3.astroid@bobo.none">
<pre class="moz-quote-pre" wrap="">Excerpts from Ganesh Goudar's message of November 8, 2021 6:38 pm:
</pre>
<blockquote type="cite">
<pre class="moz-quote-pre" wrap="">In realmode mce handler we use irq_work_queue() to defer
the processing of mce events, irq_work_queue() can only
be called when translation is enabled because it touches
memory outside RMA, hence we enable translation before
calling irq_work_queue and disable on return, though it
is not safe to do in realmode.
To avoid this, program the decrementer and call the event
processing functions from timer handler.
Signed-off-by: Ganesh Goudar <a class="moz-txt-link-rfc2396E" href="mailto:ganeshgr@linux.ibm.com"><ganeshgr@linux.ibm.com></a>
---
arch/powerpc/include/asm/machdep.h | 2 +
arch/powerpc/include/asm/mce.h | 2 +
arch/powerpc/include/asm/paca.h | 1 +
arch/powerpc/kernel/mce.c | 51 +++++++++++-------------
arch/powerpc/kernel/time.c | 3 ++
arch/powerpc/platforms/pseries/pseries.h | 1 +
arch/powerpc/platforms/pseries/ras.c | 31 +-------------
arch/powerpc/platforms/pseries/setup.c | 1 +
8 files changed, 34 insertions(+), 58 deletions(-)
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 764f2732a821..c89cc03c0f97 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -103,6 +103,8 @@ struct machdep_calls {
/* Called during machine check exception to retrive fixup address. */
bool (*mce_check_early_recovery)(struct pt_regs *regs);
+ void (*machine_check_log_err)(void);
+
/* Motherboard/chipset features. This is a kind of general purpose
* hook used to control some machine specific features (like reset
* lines, chip power control, etc...).
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 331d944280b8..187810f13669 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -235,8 +235,10 @@ extern void machine_check_print_event_info(struct machine_check_event *evt,
unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr);
extern void mce_common_process_ue(struct pt_regs *regs,
struct mce_error_info *mce_err);
+extern void machine_check_raise_dec_intr(void);
</pre>
</blockquote>
<pre class="moz-quote-pre" wrap="">
No new externs on function declarations, they tell me.</pre>
</blockquote>
<pre>ok.</pre>
<blockquote type="cite"
cite="mid:1636380442.sna3yrbwu3.astroid@bobo.none">
<pre class="moz-quote-pre" wrap="">
</pre>
<blockquote type="cite">
<pre class="moz-quote-pre" wrap=""> int mce_register_notifier(struct notifier_block *nb);
int mce_unregister_notifier(struct notifier_block *nb);
+void mce_run_late_handlers(void);
#ifdef CONFIG_PPC_BOOK3S_64
void flush_and_reload_slb(void);
void flush_erat(void);
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index dc05a862e72a..f49180f8c9be 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -280,6 +280,7 @@ struct paca_struct {
#endif
#ifdef CONFIG_PPC_BOOK3S_64
struct mce_info *mce_info;
+ atomic_t mces_to_process;
#endif /* CONFIG_PPC_BOOK3S_64 */
} ____cacheline_aligned;
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index fd829f7f25a4..45baa062ebc0 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -28,19 +28,9 @@
#include "setup.h"
-static void machine_check_process_queued_event(struct irq_work *work);
-static void machine_check_ue_irq_work(struct irq_work *work);
static void machine_check_ue_event(struct machine_check_event *evt);
static void machine_process_ue_event(struct work_struct *work);
-static struct irq_work mce_event_process_work = {
- .func = machine_check_process_queued_event,
-};
-
-static struct irq_work mce_ue_event_irq_work = {
- .func = machine_check_ue_irq_work,
-};
-
static DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
@@ -89,6 +79,12 @@ static void mce_set_error_info(struct machine_check_event *mce,
}
}
+/* Raise decrementer interrupt */
+void machine_check_raise_dec_intr(void)
+{
+ set_dec(1);
+}
</pre>
</blockquote>
<pre class="moz-quote-pre" wrap="">
The problem here is a timer can be scheduled (e.g., by an external
interrupt if it gets taken before the decrementer, then uses a
timer) and that set decr > 1. See logic in decrementer_set_next_event.
I _think_ the way to get around this would be to have the machine check
just use arch_irq_work_raise.
Then you could also only call the mce handler inside the
test_irq_work_pending() check and avoid the added function call on every
timer. That test should also be marked unlikely come to think of it, but
that's a side patchlet.</pre>
</blockquote>
<pre>Sure, I will use arch_irq_work_raise() and test_irq_work_pending().
</pre>
<blockquote type="cite"
cite="mid:1636380442.sna3yrbwu3.astroid@bobo.none">
<pre class="moz-quote-pre" wrap="">
</pre>
<blockquote type="cite">
<pre class="moz-quote-pre" wrap="">+
/*
* Decode and save high level MCE information into per cpu buffer which
* is an array of machine_check_event structure.
@@ -135,6 +131,8 @@ void save_mce_event(struct pt_regs *regs, long handled,
if (mce->error_type == MCE_ERROR_TYPE_UE)
mce->u.ue_error.ignore_event = mce_err->ignore_event;
+ atomic_inc(&local_paca->mces_to_process);
+
if (!addr)
return;
@@ -217,7 +215,7 @@ void release_mce_event(void)
get_mce_event(NULL, true);
}
-static void machine_check_ue_irq_work(struct irq_work *work)
+static void machine_check_ue_work(void)
{
schedule_work(&mce_ue_event_work);
}
@@ -239,7 +237,7 @@ static void machine_check_ue_event(struct machine_check_event *evt)
evt, sizeof(*evt));
/* Queue work to process this event later. */
- irq_work_queue(&mce_ue_event_irq_work);
+ machine_check_raise_dec_intr();
}
/*
@@ -249,7 +247,6 @@ void machine_check_queue_event(void)
{
int index;
struct machine_check_event evt;
- unsigned long msr;
if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
return;
@@ -263,20 +260,7 @@ void machine_check_queue_event(void)
memcpy(&local_paca->mce_info->mce_event_queue[index],
&evt, sizeof(evt));
- /*
- * Queue irq work to process this event later. Before
- * queuing the work enable translation for non radix LPAR,
- * as irq_work_queue may try to access memory outside RMO
- * region.
- */
- if (!radix_enabled() && firmware_has_feature(FW_FEATURE_LPAR)) {
- msr = mfmsr();
- mtmsr(msr | MSR_IR | MSR_DR);
- irq_work_queue(&mce_event_process_work);
- mtmsr(msr);
- } else {
- irq_work_queue(&mce_event_process_work);
- }
</pre>
</blockquote>
<pre class="moz-quote-pre" wrap="">
Getting rid of these things would be very nice.
</pre>
<blockquote type="cite">
<pre class="moz-quote-pre" wrap="">+ machine_check_raise_dec_intr();
}
void mce_common_process_ue(struct pt_regs *regs,
@@ -338,7 +322,7 @@ static void machine_process_ue_event(struct work_struct *work)
* process pending MCE event from the mce event queue. This function will be
* called during syscall exit.
*/
-static void machine_check_process_queued_event(struct irq_work *work)
+static void machine_check_process_queued_event(void)
{
int index;
struct machine_check_event *evt;
@@ -363,6 +347,17 @@ static void machine_check_process_queued_event(struct irq_work *work)
}
}
+void mce_run_late_handlers(void)
+{
+ if (unlikely(atomic_read(&local_paca->mces_to_process))) {
+ if (ppc_md.machine_check_log_err)
+ ppc_md.machine_check_log_err();
+ machine_check_process_queued_event();
+ machine_check_ue_work();
+ atomic_dec(&local_paca->mces_to_process);
+ }
+}
+
void machine_check_print_event_info(struct machine_check_event *evt,
bool user_mode, bool in_guest)
{
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 934d8ae66cc6..2dc09d75d77c 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -597,6 +597,9 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
irq_work_run();
}
+#ifdef CONFIG_PPC_BOOK3S_64
+ mce_run_late_handlers();
+#endif
</pre>
</blockquote>
<pre class="moz-quote-pre" wrap="">
It looks like if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) should work here?</pre>
</blockquote>
<pre>sure.</pre>
<blockquote type="cite"
cite="mid:1636380442.sna3yrbwu3.astroid@bobo.none">
<pre class="moz-quote-pre" wrap="">
</pre>
<blockquote type="cite">
<pre class="moz-quote-pre" wrap=""> now = get_tb();
if (now >= *next_tb) {
*next_tb = ~(u64)0;
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 3544778e06d0..0dc4f1027b30 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -21,6 +21,7 @@ struct pt_regs;
extern int pSeries_system_reset_exception(struct pt_regs *regs);
extern int pSeries_machine_check_exception(struct pt_regs *regs);
extern long pseries_machine_check_realmode(struct pt_regs *regs);
+extern void pSeries_machine_check_log_err(void);
</pre>
</blockquote>
<pre class="moz-quote-pre" wrap="">
extern can be removed.</pre>
</blockquote>
<pre>sure, thanks.
</pre>
<blockquote type="cite"
cite="mid:1636380442.sna3yrbwu3.astroid@bobo.none">
<pre class="moz-quote-pre" wrap="">
Thanks,
Nick
</pre>
<blockquote type="cite">
<pre class="moz-quote-pre" wrap="">
#ifdef CONFIG_SMP
extern void smp_init_pseries(void);
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 56092dccfdb8..8613f9cc5798 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -23,11 +23,6 @@ static DEFINE_SPINLOCK(ras_log_buf_lock);
static int ras_check_exception_token;
-static void mce_process_errlog_event(struct irq_work *work);
-static struct irq_work mce_errlog_process_work = {
- .func = mce_process_errlog_event,
-};
-
#define EPOW_SENSOR_TOKEN 9
#define EPOW_SENSOR_INDEX 0
@@ -729,40 +724,16 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
error_type = mce_log->error_type;
disposition = mce_handle_err_realmode(disposition, error_type);
-
- /*
- * Enable translation as we will be accessing per-cpu variables
- * in save_mce_event() which may fall outside RMO region, also
- * leave it enabled because subsequently we will be queuing work
- * to workqueues where again per-cpu variables accessed, besides
- * fwnmi_release_errinfo() crashes when called in realmode on
- * pseries.
- * Note: All the realmode handling like flushing SLB entries for
- * SLB multihit is done by now.
- */
out:
- msr = mfmsr();
- mtmsr(msr | MSR_IR | MSR_DR);
-
disposition = mce_handle_err_virtmode(regs, errp, mce_log,
disposition);
-
- /*
- * Queue irq work to log this rtas event later.
- * irq_work_queue uses per-cpu variables, so do this in virt
- * mode as well.
- */
- irq_work_queue(&mce_errlog_process_work);
-
- mtmsr(msr);
-
return disposition;
}
/*
* Process MCE rtas errlog event.
*/
-static void mce_process_errlog_event(struct irq_work *work)
+void pSeries_machine_check_log_err(void)
{
struct rtas_error_log *err;
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index f79126f16258..54bd7bdb7e92 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -1085,6 +1085,7 @@ define_machine(pseries) {
.system_reset_exception = pSeries_system_reset_exception,
.machine_check_early = pseries_machine_check_realmode,
.machine_check_exception = pSeries_machine_check_exception,
+ .machine_check_log_err = pSeries_machine_check_log_err,
#ifdef CONFIG_KEXEC_CORE
.machine_kexec = pSeries_machine_kexec,
.kexec_cpu_down = pseries_kexec_cpu_down,
--
2.26.2
</pre>
</blockquote>
</blockquote>
</body>
</html>