[RFC PATCH 7/7] powerpc/book3s: Display task info for MCE error in user mode.

Mahesh J Salgaonkar mahesh at linux.vnet.ibm.com
Tue Feb 21 12:53:07 AEDT 2017


From: Mahesh Salgaonkar <mahesh at linux.vnet.ibm.com>

For MCE that hit while in use mode MSR(HV=1,PR=1), print the task info on the
console MCE error log. This will help to identify application that stumbled
upon MCE error.

After this patch the MCE console log would look like:

[    2.246155] Severe Machine check interrupt [Recovered]
[    2.246178]   Initiator: CPU
[    2.246199]   NIP: [0000000010039778] PID: 813 Comm: ebizzy
[    2.246223]   Error type: ERAT [Multihit]
[    2.246244]     Effective address: 00003fff94070000

[114560.247515] Severe Machine check interrupt [Recovered]
[114560.247562]   Initiator: CPU
[114560.247599]   NIP [d00000000d2e019c]: init_module+0x19c/0x260 [bork_kernel]
[114560.247666]   Error type: SLB [Multihit]
[114560.247701]     Effective address: d000000023db0000


Signed-off-by: Mahesh Salgaonkar <mahesh at linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/mce.h        |    3 ++-
 arch/powerpc/kernel/mce.c             |   12 +++++++++---
 arch/powerpc/platforms/powernv/opal.c |    2 +-
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 69e4a42..99dd1f3 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -91,7 +91,8 @@ extern int get_mce_event(struct OpalMachineCheckEvent *mce, bool release);
 extern int set_mce_event(struct OpalMachineCheckEvent *mce);
 extern void release_mce_event(void);
 extern void machine_check_queue_event(void);
-extern void machine_check_print_event_info(struct OpalMachineCheckEvent *evt);
+extern void machine_check_print_event_info(struct OpalMachineCheckEvent *evt,
+							bool user_mode);
 extern uint64_t get_mce_fault_addr(struct OpalMachineCheckEvent *evt);
 extern long handle_mce_errors(struct pt_regs *regs,
 					struct OpalMachineCheckEvent *evt);
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 035ef53..af36824 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -235,12 +235,13 @@ static void machine_check_process_queued_event(struct irq_work *work)
 	while (__this_cpu_read(mce_queue_count) > 0) {
 		index = __this_cpu_read(mce_queue_count) - 1;
 		machine_check_print_event_info(
-				this_cpu_ptr(&mce_event_queue[index]));
+				this_cpu_ptr(&mce_event_queue[index]), false);
 		__this_cpu_dec(mce_queue_count);
 	}
 }
 
-void machine_check_print_event_info(struct OpalMachineCheckEvent *evt)
+void machine_check_print_event_info(struct OpalMachineCheckEvent *evt,
+							bool user_mode)
 {
 	const char *level, *sevstr, *subtype;
 	static const char *mc_ue_types[] = {
@@ -320,8 +321,13 @@ void machine_check_print_event_info(struct OpalMachineCheckEvent *evt)
 	       "Recovered" : "[Not recovered");
 	printk("%s  Initiator: %s\n", level,
 	       evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
-	printk("%s  NIP [%016llx]: %pS\n", level, evt->srr0,
+	if (user_mode) {
+		printk("%s  NIP: [%016llx] PID: %d Comm: %s\n", level,
+				evt->srr0, current->pid, current->comm);
+	} else {
+		printk("%s  NIP [%016llx]: %pS\n", level, evt->srr0,
 							(void *)evt->srr0);
+	}
 	switch (evt->error_type) {
 	case MCE_ERROR_TYPE_UE:
 		subtype = evt->u.ue_error.ue_error_type <
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index f1115c4..49f193c 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -444,7 +444,7 @@ int opal_machine_check(struct pt_regs *regs)
 		       evt.version);
 		return 0;
 	}
-	machine_check_print_event_info(&evt);
+	machine_check_print_event_info(&evt, user_mode(regs));
 
 	if (opal_recover_mce(regs, &evt))
 		return 1;



More information about the Linuxppc-dev mailing list