[RFC PATCH 3/7] powerpc/book3s: mce: Process the MCE event and recover if possible.
Mahesh J Salgaonkar
mahesh at linux.vnet.ibm.com
Tue Feb 21 12:52:08 AEDT 2017
From: Mahesh Salgaonkar <mahesh at linux.vnet.ibm.com>
Once we get high level MCE error event from opal, process it and figure
out if it recoverable or not. If yes, take corrective actions.
TODO:
- Rework on handling of asynchronous MCE errors.
- Update opal_recover_mce() to ignore async errors.
- Update flush_and_reload_slb() to avoid SLB reload in radix mode.
Signed-off-by: Mahesh Salgaonkar <mahesh at linux.vnet.ibm.com>
---
arch/powerpc/include/asm/mce.h | 3 +++
arch/powerpc/kernel/mce.c | 26 +++++++++++++++++++++++
arch/powerpc/kernel/mce_power.c | 38 +++++++++++++++++++++++++++++++++
arch/powerpc/platforms/powernv/opal.c | 2 ++
4 files changed, 69 insertions(+)
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 36db6b0..69e4a42 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -88,9 +88,12 @@ extern void save_mce_event(struct pt_regs *regs, long handled,
struct mce_error_info *mce_err, uint64_t nip,
uint64_t addr);
extern int get_mce_event(struct OpalMachineCheckEvent *mce, bool release);
+extern int set_mce_event(struct OpalMachineCheckEvent *mce);
extern void release_mce_event(void);
extern void machine_check_queue_event(void);
extern void machine_check_print_event_info(struct OpalMachineCheckEvent *evt);
extern uint64_t get_mce_fault_addr(struct OpalMachineCheckEvent *evt);
+extern long handle_mce_errors(struct pt_regs *regs,
+ struct OpalMachineCheckEvent *evt);
#endif /* __ASM_PPC64_MCE_H__ */
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 51a7c64..36da14a3 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -166,6 +166,32 @@ int get_mce_event(struct OpalMachineCheckEvent *mce, bool release)
return ret;
}
+int set_mce_event(struct OpalMachineCheckEvent *mce)
+{
+ int index = __this_cpu_inc_return(mce_nest_count) - 1;
+ struct OpalMachineCheckEvent *mc_evt = this_cpu_ptr(&mce_event[index]);
+ int ret = 0;
+
+ /* Sanity check */
+ if (index < 0)
+ return ret;
+
+ /* Check if we have MCE info slot within array limit. */
+ if (index < MAX_MC_EVT) {
+ /* Copy the event structure and release the original */
+ if (mce) {
+ *mc_evt = *mce;
+ /* endian conversions */
+ mc_evt->srr0 = be64_to_cpu(mce->srr0);
+ mc_evt->srr1 = be64_to_cpu(mce->srr1);
+ mc_evt->u.ue_error.effective_address =
+ be64_to_cpu(mce->u.ue_error.effective_address);
+ }
+ ret = 1;
+ }
+ return ret;
+}
+
void release_mce_event(void)
{
get_mce_event(NULL, true);
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 7353991..91ed2ef 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -372,3 +372,41 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs)
save_mce_event(regs, handled, &mce_error_info, nip, addr);
return handled;
}
+
+static long flush_tlb(void)
+{
+ long handled = 0;
+
+ if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
+ cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL);
+ handled = 1;
+ }
+ return handled;
+}
+
+long handle_mce_errors(struct pt_regs *regs, struct OpalMachineCheckEvent *evt)
+{
+ long handled = 1;
+
+ if (evt->disposition == MCE_DISPOSITION_RECOVERED)
+ return handled;
+
+ switch (evt->error_type) {
+ case MCE_ERROR_TYPE_UE:
+ handled = mce_handle_ue_error(regs);
+ break;
+ case MCE_ERROR_TYPE_SLB:
+ case MCE_ERROR_TYPE_ERAT:
+ flush_and_reload_slb();
+ handled = 1;
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ handled = flush_tlb();
+ break;
+ default:
+ handled = 0;
+ }
+ if (handled)
+ evt->disposition = MCE_DISPOSITION_RECOVERED;
+ return handled;
+}
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 263c57e..f1115c4 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -501,6 +501,8 @@ int opal_machine_check_early(struct pt_regs *regs, long *handled)
if (rc != OPAL_SUCCESS)
return -1;
+ *handled = handle_mce_errors(regs, &evt);
+ set_mce_event(&evt);
return 0;
}
More information about the Linuxppc-dev
mailing list