[Skiboot] [RFC PATCH] asm/head: implement quiescing without stack or clobbering regs
Nicholas Piggin
npiggin at gmail.com
Thu Mar 15 01:16:50 AEDT 2018
Quiescing currently is implmeented in C in opal_entry before the
opal call handler is called. This works well enough for simple
cases like fast reset when one CPU wants all others out of the way.
Linux would like to use it to prevent an sreset IPI from
interrupting firmware, which could lead to deadlocks when crash
dumping or entering the debugger. Linux interrupts do not recover
well when returning back to general OPAL code, due to r13 not being
restored. OPAL also can't be re-entered, which may happen e.g.,
from the debugger.
So move the quiesce hold/reject to entry code, beore the stack or
r1 or r13 registers are switched. OPAL can be interrupted and
returned to or re-entered during this period.
This does not completely solve all such problems. OPAL will be
interrupted with sreset if the quiesce times out, and it can be
interrupted by MCEs as well. These still have the issues above.
---
asm/asm-offsets.c | 2 ++
asm/head.S | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
core/opal.c | 33 +++++--------------------
include/cpu.h | 2 +-
4 files changed, 75 insertions(+), 34 deletions(-)
diff --git a/asm/asm-offsets.c b/asm/asm-offsets.c
index 71199503..3eac592d 100644
--- a/asm/asm-offsets.c
+++ b/asm/asm-offsets.c
@@ -37,6 +37,8 @@ int main(void)
OFFSET(CPUTHREAD_PIR, cpu_thread, pir);
OFFSET(CPUTHREAD_SAVE_R1, cpu_thread, save_r1);
OFFSET(CPUTHREAD_STATE, cpu_thread, state);
+ OFFSET(CPUTHREAD_IN_OPAL_CALL, cpu_thread, in_opal_call);
+ OFFSET(CPUTHREAD_QUIESCE_OPAL_CALL, cpu_thread, quiesce_opal_call);
OFFSET(CPUTHREAD_CUR_TOKEN, cpu_thread, current_token);
DEFINE(CPUTHREAD_GAP, sizeof(struct cpu_thread) + STACK_SAFETY_GAP);
#ifdef STACK_CHECK_ENABLED
diff --git a/asm/head.S b/asm/head.S
index fb4b95f7..89af5cdd 100644
--- a/asm/head.S
+++ b/asm/head.S
@@ -952,14 +952,64 @@ opal_boot_trampoline:
* r0: Token
* r2: OPAL Base
* r3..r10: Args
- * r12: Scratch
+ * r11..r12: Scratch
* r13..r31: Preserved
- *
*/
.balign 0x10
.global opal_entry
opal_entry:
- /* Get our per CPU stack */
+ /* Get our per CPU pointer in r12 to check for quiesce */
+ mfspr %r12,SPR_PIR
+ GET_STACK(%r12,%r12)
+
+ /* Get CPU thread */
+ clrrdi %r12,%r12,STACK_SHIFT
+
+ /*
+ * OPAL entry must first increment in_opal_call, then check
+ * for quiesce, without touching the stack or clobbering
+ * registers other than r11 and r12 and cr0. In this way, OPAL
+ * is tolerant of re-entry on this same CPU while it is spinning
+ * for quiesce.
+ *
+ * Sequence goes:
+ * in_opal_call++;
+ * sync;
+ * if (quiesce_opal_call) {
+ * in_opal_call--;
+ * reject-or-spin-then-retry;
+ */
+1: lwz %r11,CPUTHREAD_IN_OPAL_CALL(%r12)
+ addi %r11,%r11,1
+ stw %r11,CPUTHREAD_IN_OPAL_CALL(%r12)
+ /*
+ * Order the store in_opal_call vs load quiesce_opal_call.
+ * This also provides an acquire barrier for opal entry vs
+ * another thread quiescing opal. In this way, quiescing
+ * can behave as mutual exclusion.
+ */
+ sync
+ lwz %r11,CPUTHREAD_QUIESCE_OPAL_CALL(%r12)
+ cmpwi %cr0,%r11,0
+ beq+ 4f
+ /* We are quiescing, hold or reject */
+ cmpwi %cr0,%r11,QUIESCE_REJECT
+ bne 2f
+ li %r3,OPAL_BUSY
+ b 5f /* reject */
+2: /* hold */
+ lwz %r11,CPUTHREAD_IN_OPAL_CALL(%r12)
+ subi %r11,%r11,1
+ stw %r11,CPUTHREAD_IN_OPAL_CALL(%r12)
+ smt_lowest
+3: lwz %r11,CPUTHREAD_QUIESCE_OPAL_CALL(%r12)
+ cmpwi %cr0,%r11,QUIESCE_HOLD
+ beq 3b
+ /* spin finished, try again */
+ smt_medium
+ b 1b
+
+4: /* Quiesce protocol done, get our per CPU stack */
mfspr %r12,SPR_PIR
GET_STACK(%r12,%r12)
stdu %r12,-STACK_FRAMESIZE(%r12)
@@ -1002,7 +1052,7 @@ opal_entry:
mr %r3,%r1
bl opal_entry_check
cmpdi %r3,0
- bne 1f
+ bne 5f
ld %r0,STACK_GPR0(%r1)
ld %r3,STACK_GPR3(%r1)
@@ -1027,12 +1077,22 @@ opal_entry:
bctrl
mr %r4,%r1
- bl opal_exit_check
+ bl opal_exit_check /* r3 is preserved */
-1: ld %r12,STACK_LR(%r1)
+ /*
+ * Restore r1 and r13 before decrementing in_opal_call.
+ * Move per-cpu pointer to volatile r12, restore lr, r1, r13.
+ */
+5:
+ ld %r12,STACK_LR(%r1)
mtlr %r12
+ mr %r12,%r13
ld %r13,STACK_GPR13(%r1)
ld %r1,STACK_GPR1(%r1)
+ sync /* release barrier vs quiescing */
+ lwz %r11,CPUTHREAD_IN_OPAL_CALL(%r12)
+ subi %r11,%r11,1
+ stw %r11,CPUTHREAD_IN_OPAL_CALL(%r12)
blr
.global start_kernel
diff --git a/core/opal.c b/core/opal.c
index 9bf2ad84..b12b6ee3 100644
--- a/core/opal.c
+++ b/core/opal.c
@@ -142,32 +142,12 @@ int64_t opal_entry_check(struct stack_frame *eframe)
if (!opal_check_token(token))
return opal_bad_token(token);
- if (!opal_quiesce_state && cpu->in_opal_call) {
+ if (!opal_quiesce_state && cpu->in_opal_call > 1) {
printf("CPU ATTEMPT TO RE-ENTER FIRMWARE! PIR=%04lx cpu @%p -> pir=%04x token=%llu\n",
mfspr(SPR_PIR), cpu, cpu->pir, token);
return OPAL_BUSY;
}
-again:
- cpu->in_opal_call++;
- /*
- * Order the store in_opal_call vs load quiesce_opal_call.
- * This also provides an acquire barrier for opal entry vs
- * another thread quiescing opal. In this way, quiescing
- * can behave as mutual exclusion.
- */
- sync();
- if (cpu->quiesce_opal_call) {
- cpu->in_opal_call--;
- if (opal_quiesce_state == QUIESCE_REJECT)
- return OPAL_BUSY;
- smt_lowest();
- while (cpu->quiesce_opal_call)
- barrier();
- smt_medium();
- goto again;
- }
-
return OPAL_SUCCESS;
}
@@ -181,14 +161,13 @@ int64_t opal_exit_check(int64_t retval, struct stack_frame *eframe)
if (!cpu->in_opal_call) {
printf("CPU UN-ACCOUNTED FIRMWARE ENTRY! PIR=%04lx cpu @%p -> pir=%04x token=%llu retval=%lld\n",
mfspr(SPR_PIR), cpu, cpu->pir, token, retval);
+ cpu->in_opal_call++; /* avoid exit path underflowing */
} else {
if (!list_empty(&cpu->locks_held)) {
prlog(PR_ERR, "OPAL exiting with locks held, token=%llu retval=%lld\n",
token, retval);
drop_my_locks(true);
}
- sync(); /* release barrier vs quiescing */
- cpu->in_opal_call--;
}
return retval;
}
@@ -238,7 +217,7 @@ int64_t opal_quiesce(uint32_t quiesce_type, int32_t cpu_target)
bust_locks = false;
sync(); /* release barrier vs opal entry */
if (target) {
- target->quiesce_opal_call = false;
+ target->quiesce_opal_call = 0;
} else {
for_each_cpu(c) {
if (quiesce_type == QUIESCE_RESUME_FAST_REBOOT)
@@ -248,7 +227,7 @@ int64_t opal_quiesce(uint32_t quiesce_type, int32_t cpu_target)
assert(!c->quiesce_opal_call);
continue;
}
- c->quiesce_opal_call = false;
+ c->quiesce_opal_call = 0;
}
}
sync();
@@ -266,12 +245,12 @@ int64_t opal_quiesce(uint32_t quiesce_type, int32_t cpu_target)
}
if (target) {
- target->quiesce_opal_call = true;
+ target->quiesce_opal_call = quiesce_type;
} else {
for_each_cpu(c) {
if (c == cpu)
continue;
- c->quiesce_opal_call = true;
+ c->quiesce_opal_call = quiesce_type;
}
}
diff --git a/include/cpu.h b/include/cpu.h
index 220fa1df..59e7f473 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -61,10 +61,10 @@ struct cpu_thread {
uint64_t save_r1;
void *icp_regs;
uint32_t in_opal_call;
+ uint32_t quiesce_opal_call;
uint32_t con_suspend;
struct list_head locks_held;
bool con_need_flush;
- bool quiesce_opal_call;
bool in_mcount;
bool in_poller;
bool in_reinit;
--
2.16.1
More information about the Skiboot
mailing list