[Skiboot] [PATCH 15/16] core/exceptions: allow recoverable sreset exceptions

Nicholas Piggin npiggin at gmail.com
Tue Jan 8 01:04:27 AEDT 2019


This requires implementing the MSR[RI] bit. Then just allow all
non-fatal sreset exceptions to recover.

Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
---
 asm/head.S         |  4 ++++
 core/cpu.c         |  1 +
 core/exceptions.c  | 48 +++++++++++++++++++++++++++++++++++-----------
 core/fast-reboot.c |  2 ++
 core/init.c        | 15 +++++++++------
 include/skiboot.h  |  4 ++--
 6 files changed, 55 insertions(+), 19 deletions(-)

diff --git a/asm/head.S b/asm/head.S
index 85ca57cb7..4a00532df 100644
--- a/asm/head.S
+++ b/asm/head.S
@@ -175,7 +175,9 @@ _exception:
 	stw	%r3,STACK_DSISR(%r1)
 	std	%r4,STACK_DAR(%r1)
 	mfmsr	%r3
+	li	%r4,MSR_RI
 	std	%r3,STACK_MSR(%r1)
+	mtmsrd	%r4,1
 	mfspr	%r3,SPR_HSRR0
 	mfspr	%r4,SPR_HSRR1
 	std	%r3,STACK_HSRR0(%r1)
@@ -276,6 +278,8 @@ exception_entry_foo:
 	REST_GPR(29,%r1)
 	REST_GPR(30,%r1)
 	REST_GPR(31,%r1)
+	li	%r3,0
+	mtmsrd	%r3,1	/* Clear MSR[RI] */
 	ld	%r3,STACK_SRR0(%r1)
 	mtspr	SPR_SRR0,%r3
 	ld	%r3,STACK_SRR1(%r1)
diff --git a/core/cpu.c b/core/cpu.c
index 4820beb31..c1a0a0258 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -534,6 +534,7 @@ static void cpu_idle_pm(enum cpu_wake_cause wake_on)
 		default:
 			break;
 		}
+		mtmsrd(MSR_RI, 1);
 	}
 }
 
diff --git a/core/exceptions.c b/core/exceptions.c
index 1c2917357..779cd620a 100644
--- a/core/exceptions.c
+++ b/core/exceptions.c
@@ -39,8 +39,10 @@ static void dump_regs(struct stack_frame *stack)
 		       i, stack->gpr[i], i + 16, stack->gpr[i + 16]);
 }
 
-void __noreturn exception_entry(struct stack_frame *stack)
+void exception_entry(struct stack_frame *stack)
 {
+	bool fatal = false;
+	bool hv;
 	uint64_t nip;
 	uint64_t msr;
 	const size_t max = 320;
@@ -57,24 +59,40 @@ void __noreturn exception_entry(struct stack_frame *stack)
 	case 0xe80:
 	case 0xea0:
 	case 0xf80:
-		nip = stack->hsrr0;
-		msr = stack->hsrr1;
+		hv = true;
 		break;
 	default:
+		hv = false;
+		break;
+	}
+
+	if (hv) {
+		nip = stack->hsrr0;
+		msr = stack->hsrr1;
+	} else {
 		nip = stack->srr0;
 		msr = stack->srr1;
-		break;
 	}
 
+	if (!(msr & MSR_RI))
+		fatal = true;
+
 	prerror("***********************************************\n");
 	l = 0;
 	if (stack->type == 0x100) {
-		l += snprintf(buf + l, max - l,
-			"Fatal System Reset at "REG"   ", nip);
+		if (fatal) {
+			l += snprintf(buf + l, max - l,
+				"Fatal System Reset at "REG"   ", nip);
+		} else {
+			l += snprintf(buf + l, max - l,
+				"System Reset at "REG"   ", nip);
+		}
 	} else if (stack->type == 0x200) {
+		fatal = true;
 		l += snprintf(buf + l, max - l,
 			"Fatal MCE at "REG"   ", nip);
 	} else {
+		fatal = true;
 		l += snprintf(buf + l, max - l,
 			"Fatal Exception 0x%llx at "REG"  ", stack->type, nip);
 	}
@@ -83,10 +101,19 @@ void __noreturn exception_entry(struct stack_frame *stack)
 	prerror("%s\n", buf);
 	dump_regs(stack);
 
-	abort();
+	if (fatal)
+		abort();
+	else
+		backtrace();
+
+	if (hv) {
+		/* Set up for SRR return */
+		stack->srr0 = nip;
+		stack->srr1 = msr;
+	}
 }
 
-void __noreturn exception_entry_pm_sreset(void)
+void exception_entry_pm_sreset(void)
 {
 	const size_t max = 320;
 	char buf[max];
@@ -95,10 +122,9 @@ void __noreturn exception_entry_pm_sreset(void)
 	prerror("***********************************************\n");
 	l = 0;
 	l += snprintf(buf + l, max - l,
-		"Fatal System Reset in sleep");
+		"System Reset in sleep");
 	prerror("%s\n", buf);
-
-	abort();
+	backtrace();
 }
 
 
diff --git a/core/fast-reboot.c b/core/fast-reboot.c
index d841474d9..22160b65d 100644
--- a/core/fast-reboot.c
+++ b/core/fast-reboot.c
@@ -196,6 +196,7 @@ void fast_reboot(void)
 	 * crash.
 	 */
 	enable_machine_check();
+	mtmsrd(MSR_RI, 1);
 
 	/*
 	 * sreset vector has a FIXUP_ENDIAN sequence at the start, so
@@ -358,6 +359,7 @@ void __noreturn fast_reboot_entry(void)
 		sync();
 		cleanup_cpu_state();
 		enable_machine_check();
+		mtmsrd(MSR_RI, 1);
 
 		__secondary_cpu_entry();
 	}
diff --git a/core/init.c b/core/init.c
index e09550740..132a42a08 100644
--- a/core/init.c
+++ b/core/init.c
@@ -475,12 +475,13 @@ static void load_initramfs(void)
 	}
 }
 
-static void cpu_disable_ME_one(void *param __unused)
+static void cpu_disable_ME_RI_one(void *param __unused)
 {
 	disable_machine_check();
+	mtmsrd(0, 1);
 }
 
-static int64_t cpu_disable_ME_all(void)
+static int64_t cpu_disable_ME_RI_all(void)
 {
 	struct cpu_thread *cpu;
 	struct cpu_job **jobs;
@@ -491,12 +492,12 @@ static int64_t cpu_disable_ME_all(void)
 	for_each_available_cpu(cpu) {
 		if (cpu == this_cpu())
 			continue;
-		jobs[cpu->pir] = cpu_queue_job(cpu, "cpu_disable_ME",
-						cpu_disable_ME_one, NULL);
+		jobs[cpu->pir] = cpu_queue_job(cpu, "cpu_disable_ME_RI",
+						cpu_disable_ME_RI_one, NULL);
 	}
 
 	/* this cpu */
-	cpu_disable_ME_one(NULL);
+	cpu_disable_ME_RI_one(NULL);
 
 	for_each_available_cpu(cpu) {
 		if (jobs[cpu->pir])
@@ -620,7 +621,7 @@ void __noreturn load_and_boot_kernel(bool is_reboot)
 	       kernel_entry, fdt, fdt_totalsize(fdt));
 
 	/* Disable machine checks on all */
-	cpu_disable_ME_all();
+	cpu_disable_ME_RI_all();
 
 	debug_descriptor.state_flags |= OPAL_BOOT_COMPLETE;
 
@@ -951,6 +952,7 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
 	 * recover, but we print some useful information.
 	 */
 	enable_machine_check();
+	mtmsrd(MSR_RI, 1);
 
 	/* Setup a NULL catcher to catch accidental NULL ptr calls */
 	setup_branch_null_catcher();
@@ -1282,6 +1284,7 @@ void __noreturn __secondary_cpu_entry(void)
 	cpu_callin(cpu);
 
 	enable_machine_check();
+	mtmsrd(MSR_RI, 1);
 
 	/* Some XIVE setup */
 	xive_cpu_callin(cpu);
diff --git a/include/skiboot.h b/include/skiboot.h
index 0f6a85520..c06146d06 100644
--- a/include/skiboot.h
+++ b/include/skiboot.h
@@ -287,8 +287,8 @@ extern void fake_rtc_init(void);
 
 /* Exceptions */
 struct stack_frame;
-extern void __noreturn exception_entry(struct stack_frame *stack);
-extern void __noreturn exception_entry_pm_sreset(void);
+extern void exception_entry(struct stack_frame *stack);
+extern void exception_entry_pm_sreset(void);
 
 /* Assembly in head.S */
 extern void disable_machine_check(void);
-- 
2.18.0



More information about the Skiboot mailing list