[Skiboot] [PATCH 15/16] core/exceptions: allow recoverable sreset exceptions
Nicholas Piggin
npiggin at gmail.com
Tue Jan 8 01:04:27 AEDT 2019
This requires implementing the MSR[RI] bit. Then just allow all
non-fatal sreset exceptions to recover.
Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
---
asm/head.S | 4 ++++
core/cpu.c | 1 +
core/exceptions.c | 48 +++++++++++++++++++++++++++++++++++-----------
core/fast-reboot.c | 2 ++
core/init.c | 15 +++++++++------
include/skiboot.h | 4 ++--
6 files changed, 55 insertions(+), 19 deletions(-)
diff --git a/asm/head.S b/asm/head.S
index 85ca57cb7..4a00532df 100644
--- a/asm/head.S
+++ b/asm/head.S
@@ -175,7 +175,9 @@ _exception:
stw %r3,STACK_DSISR(%r1)
std %r4,STACK_DAR(%r1)
mfmsr %r3
+ li %r4,MSR_RI
std %r3,STACK_MSR(%r1)
+ mtmsrd %r4,1
mfspr %r3,SPR_HSRR0
mfspr %r4,SPR_HSRR1
std %r3,STACK_HSRR0(%r1)
@@ -276,6 +278,8 @@ exception_entry_foo:
REST_GPR(29,%r1)
REST_GPR(30,%r1)
REST_GPR(31,%r1)
+ li %r3,0
+ mtmsrd %r3,1 /* Clear MSR[RI] */
ld %r3,STACK_SRR0(%r1)
mtspr SPR_SRR0,%r3
ld %r3,STACK_SRR1(%r1)
diff --git a/core/cpu.c b/core/cpu.c
index 4820beb31..c1a0a0258 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -534,6 +534,7 @@ static void cpu_idle_pm(enum cpu_wake_cause wake_on)
default:
break;
}
+ mtmsrd(MSR_RI, 1);
}
}
diff --git a/core/exceptions.c b/core/exceptions.c
index 1c2917357..779cd620a 100644
--- a/core/exceptions.c
+++ b/core/exceptions.c
@@ -39,8 +39,10 @@ static void dump_regs(struct stack_frame *stack)
i, stack->gpr[i], i + 16, stack->gpr[i + 16]);
}
-void __noreturn exception_entry(struct stack_frame *stack)
+void exception_entry(struct stack_frame *stack)
{
+ bool fatal = false;
+ bool hv;
uint64_t nip;
uint64_t msr;
const size_t max = 320;
@@ -57,24 +59,40 @@ void __noreturn exception_entry(struct stack_frame *stack)
case 0xe80:
case 0xea0:
case 0xf80:
- nip = stack->hsrr0;
- msr = stack->hsrr1;
+ hv = true;
break;
default:
+ hv = false;
+ break;
+ }
+
+ if (hv) {
+ nip = stack->hsrr0;
+ msr = stack->hsrr1;
+ } else {
nip = stack->srr0;
msr = stack->srr1;
- break;
}
+ if (!(msr & MSR_RI))
+ fatal = true;
+
prerror("***********************************************\n");
l = 0;
if (stack->type == 0x100) {
- l += snprintf(buf + l, max - l,
- "Fatal System Reset at "REG" ", nip);
+ if (fatal) {
+ l += snprintf(buf + l, max - l,
+ "Fatal System Reset at "REG" ", nip);
+ } else {
+ l += snprintf(buf + l, max - l,
+ "System Reset at "REG" ", nip);
+ }
} else if (stack->type == 0x200) {
+ fatal = true;
l += snprintf(buf + l, max - l,
"Fatal MCE at "REG" ", nip);
} else {
+ fatal = true;
l += snprintf(buf + l, max - l,
"Fatal Exception 0x%llx at "REG" ", stack->type, nip);
}
@@ -83,10 +101,19 @@ void __noreturn exception_entry(struct stack_frame *stack)
prerror("%s\n", buf);
dump_regs(stack);
- abort();
+ if (fatal)
+ abort();
+ else
+ backtrace();
+
+ if (hv) {
+ /* Set up for SRR return */
+ stack->srr0 = nip;
+ stack->srr1 = msr;
+ }
}
-void __noreturn exception_entry_pm_sreset(void)
+void exception_entry_pm_sreset(void)
{
const size_t max = 320;
char buf[max];
@@ -95,10 +122,9 @@ void __noreturn exception_entry_pm_sreset(void)
prerror("***********************************************\n");
l = 0;
l += snprintf(buf + l, max - l,
- "Fatal System Reset in sleep");
+ "System Reset in sleep");
prerror("%s\n", buf);
-
- abort();
+ backtrace();
}
diff --git a/core/fast-reboot.c b/core/fast-reboot.c
index d841474d9..22160b65d 100644
--- a/core/fast-reboot.c
+++ b/core/fast-reboot.c
@@ -196,6 +196,7 @@ void fast_reboot(void)
* crash.
*/
enable_machine_check();
+ mtmsrd(MSR_RI, 1);
/*
* sreset vector has a FIXUP_ENDIAN sequence at the start, so
@@ -358,6 +359,7 @@ void __noreturn fast_reboot_entry(void)
sync();
cleanup_cpu_state();
enable_machine_check();
+ mtmsrd(MSR_RI, 1);
__secondary_cpu_entry();
}
diff --git a/core/init.c b/core/init.c
index e09550740..132a42a08 100644
--- a/core/init.c
+++ b/core/init.c
@@ -475,12 +475,13 @@ static void load_initramfs(void)
}
}
-static void cpu_disable_ME_one(void *param __unused)
+static void cpu_disable_ME_RI_one(void *param __unused)
{
disable_machine_check();
+ mtmsrd(0, 1);
}
-static int64_t cpu_disable_ME_all(void)
+static int64_t cpu_disable_ME_RI_all(void)
{
struct cpu_thread *cpu;
struct cpu_job **jobs;
@@ -491,12 +492,12 @@ static int64_t cpu_disable_ME_all(void)
for_each_available_cpu(cpu) {
if (cpu == this_cpu())
continue;
- jobs[cpu->pir] = cpu_queue_job(cpu, "cpu_disable_ME",
- cpu_disable_ME_one, NULL);
+ jobs[cpu->pir] = cpu_queue_job(cpu, "cpu_disable_ME_RI",
+ cpu_disable_ME_RI_one, NULL);
}
/* this cpu */
- cpu_disable_ME_one(NULL);
+ cpu_disable_ME_RI_one(NULL);
for_each_available_cpu(cpu) {
if (jobs[cpu->pir])
@@ -620,7 +621,7 @@ void __noreturn load_and_boot_kernel(bool is_reboot)
kernel_entry, fdt, fdt_totalsize(fdt));
/* Disable machine checks on all */
- cpu_disable_ME_all();
+ cpu_disable_ME_RI_all();
debug_descriptor.state_flags |= OPAL_BOOT_COMPLETE;
@@ -951,6 +952,7 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
* recover, but we print some useful information.
*/
enable_machine_check();
+ mtmsrd(MSR_RI, 1);
/* Setup a NULL catcher to catch accidental NULL ptr calls */
setup_branch_null_catcher();
@@ -1282,6 +1284,7 @@ void __noreturn __secondary_cpu_entry(void)
cpu_callin(cpu);
enable_machine_check();
+ mtmsrd(MSR_RI, 1);
/* Some XIVE setup */
xive_cpu_callin(cpu);
diff --git a/include/skiboot.h b/include/skiboot.h
index 0f6a85520..c06146d06 100644
--- a/include/skiboot.h
+++ b/include/skiboot.h
@@ -287,8 +287,8 @@ extern void fake_rtc_init(void);
/* Exceptions */
struct stack_frame;
-extern void __noreturn exception_entry(struct stack_frame *stack);
-extern void __noreturn exception_entry_pm_sreset(void);
+extern void exception_entry(struct stack_frame *stack);
+extern void exception_entry_pm_sreset(void);
/* Assembly in head.S */
extern void disable_machine_check(void);
--
2.18.0
More information about the Skiboot
mailing list