[Skiboot] [RFC/WIP/PATCH v2] Fast reboot for P8
Benjamin Herrenschmidt
benh at kernel.crashing.org
Fri Jul 22 19:53:50 AEST 2016
This is an experimental patch that implements "Fast reboot" on P8
machines.
The basic idea is that when the OS calls OPAL reboot, we gather all
the threads in the system using a combination of patching the reset
vector and soft-resetting them, then cleanup a few bits of hardware
(we do re-probe PCIe for example), and reload & restart the bootloader.
This is very experimental and needs a lot of testing and also auditing
code for other bits of HW that might need to be cleaned up. I also need
to check if we are properly PERST'ing PCI devices.
I've successfully fast rebooted a Habanero a few times.
This is partially based on old code I had to do that on P7. I only
support it on P8 though as there are issues with the PSI interrupts
on P7 that cannot be reliably solved.
Not-yet-signed-off-by: Benjamin Herrenschmidt <benh at kernel.crashing.org>
---
v2. Properly cleanup the TLB
Handle when OS had cores in fast sleep (restore TB and undo workaround)
Untested attempt at dealing with split cores (unsplit them)
Additional cleanups
asm/head.S | 143 +++++++-------
core/device.c | 8 +
core/fast-reboot.c | 516 +++++++++++++++++++++++++++++++++------------------
core/init.c | 6 +-
core/interrupts.c | 6 +-
core/lock.c | 3 +
core/pci.c | 13 +-
core/platform.c | 4 +-
hw/fsp/fsp-console.c | 5 +
hw/occ.c | 5 +
hw/psi.c | 97 ++++------
hw/slw.c | 9 +-
include/config.h | 4 +-
include/cpu.h | 1 +
include/device.h | 2 +
include/interrupts.h | 2 +-
include/processor.h | 1 +
include/skiboot.h | 8 +-
18 files changed, 518 insertions(+), 315 deletions(-)
diff --git a/asm/head.S b/asm/head.S
index e92f9b8..2432bd4 100644
--- a/asm/head.S
+++ b/asm/head.S
@@ -294,7 +294,7 @@ boot_entry:
bne secondary_wait
/* Initialize per-core SPRs */
- bl init_shared_sprs
+ bl init_shared_sprs
/* Pick a boot CPU, cpu index in r31 */
LOAD_IMM32(%r3, boot_sem - __head)
@@ -311,7 +311,7 @@ boot_entry:
smt_medium
/* Initialize thread SPRs */
- bl init_replicated_sprs
+ bl init_replicated_sprs
/* Save the initial offset. The secondary threads will spin on boot_flag
* before relocation so we need to keep track of its location to wake
@@ -410,11 +410,11 @@ secondary_wait:
add %r3,%r3,%r30
mtctr %r3
isync
- bctr
+ bctr
1:
/* Now wait for cpu_secondary_start to be set */
LOAD_ADDR_FROM_TOC(%r3, cpu_secondary_start)
-1: smt_very_low
+1: smt_very_low
ld %r0,0(%r3)
cmpdi %r0,0
beq 1b
@@ -457,64 +457,6 @@ call_relocate:
1: /* Fatal relocate failure */
attn
-/* This is a little piece of code that is copied down to
- * 0x100 when doing a "fast reset"
- */
-.global fast_reset_patch_start
-fast_reset_patch_start:
- smt_medium
- LOAD_IMM64(%r30, SKIBOOT_BASE)
- LOAD_IMM32(%r3, fast_reset_entry - __head)
- add %r3,%r30,%r3
- mtctr %r3
- bctr
-.global fast_reset_patch_end
-fast_reset_patch_end:
-
-/* Fast reset code. We clean up the TLB and a few SPRs and
- * return to C code. All CPUs do that, the CPU triggering the
- * reset does it to itself last. The C code will sort out who
- * the master is. We come from the trampoline above with
- * r30 containing SKIBOOT_BASE
- */
-fast_reset_entry:
- /* Clear out SLB */
- li %r6,0
- slbmte %r6,%r6
- slbia
- ptesync
-
- /* Get PIR */
- mfspr %r31,SPR_PIR
-
- /* Get a stack and restore r13 */
- GET_STACK(%r1,%r31)
- li %r3,0
- std %r3,0(%r1)
- std %r3,8(%r1)
- std %r3,16(%r1)
- GET_CPU()
-
- /* Get our TOC */
- addis %r2,%r30,(__toc_start - __head)@ha
- addi %r2,%r2,(__toc_start - __head)@l
-
- /* Go to C ! */
- bl fast_reboot
- b .
-
-.global cleanup_tlb
-cleanup_tlb:
- /* Clean the TLB */
- li %r3,128
- mtctr %r3
- li %r4,0x800 /* IS field = 0b10 */
- ptesync
-1: tlbiel %r4
- addi %r4,%r4,0x1000
- bdnz 1b
- ptesync
-
#define FIXUP_ENDIAN \
tdi 0,0,0x48; /* Reverse endian of b . + 8 */ \
b $+36; /* Skip trampoline if endian is good */ \
@@ -652,6 +594,66 @@ rvwinkle_restore:
mtlr %r0
blr
+/* This is a little piece of code that is copied down to
+ * 0x100 when doing a "fast reset"
+ */
+.global fast_reset_patch_start
+fast_reset_patch_start:
+ FIXUP_ENDIAN
+ smt_medium
+ LOAD_IMM64(%r30, SKIBOOT_BASE)
+ LOAD_IMM32(%r3, fast_reset_entry - __head)
+ add %r3,%r30,%r3
+ mtctr %r3
+ bctr
+.global fast_reset_patch_end
+fast_reset_patch_end:
+
+/* Fast reset code. We clean up the TLB and a few SPRs and
+ * return to C code. All CPUs do that, the CPU triggering the
+ * reset does it to itself last. The C code will sort out who
+ * the master is. We come from the trampoline above with
+ * r30 containing SKIBOOT_BASE
+ */
+fast_reset_entry:
+ /* Clear out SLB */
+ li %r6,0
+ slbmte %r6,%r6
+ slbia
+ ptesync
+
+ /* Get PIR */
+ mfspr %r31,SPR_PIR
+
+ /* Get a stack and restore r13 */
+ GET_STACK(%r1,%r31)
+ li %r3,0
+ std %r3,0(%r1)
+ std %r3,8(%r1)
+ std %r3,16(%r1)
+ GET_CPU()
+
+ /* Get our TOC */
+ addis %r2,%r30,(__toc_start - __head)@ha
+ addi %r2,%r2,(__toc_start - __head)@l
+
+ /* Go to C ! */
+ bl fast_reboot_entry
+ b .
+
+.global cleanup_tlb
+cleanup_tlb:
+ /* Clean the TLB */
+ li %r3,512
+ mtctr %r3
+ li %r4,0xc00 /* IS field = 0b11 */
+ ptesync
+1: tlbiel %r4
+ addi %r4,%r4,0x1000
+ bdnz 1b
+ ptesync
+ blr
+
/* Functions to initialize replicated and shared SPRs to sane
* values. This is called at boot and on soft-reset
*/
@@ -699,10 +701,14 @@ init_shared_sprs:
mtspr SPR_LPCR,%r3
sync
isync
- /* HID0: Clear bit 13 (enable core recovery) */
+ /* HID0: Clear bit 13 (enable core recovery)
+ * Clear bit 19 (HILE)
+ */
mfspr %r3,SPR_HID0
li %r0,1
- sldi %r0,%r0,(63-13)
+ sldi %r4,%r0,(63-13)
+ sldi %r5,%r0,(63-19)
+ or %r0,%r4,%r5,
andc %r3,%r3,%r0
sync
mtspr SPR_HID0,%r3
@@ -734,6 +740,15 @@ init_replicated_sprs:
/* XXX TODO: Add more */
blr
+ .global enter_nap
+enter_nap:
+ std %r0,0(%r1)
+ ptesync
+ ld %r0,0(%r1)
+1: cmp %cr0,%r0,%r0
+ bne 1b
+ nap
+ b .
/*
*
* NACA structure, accessed by the FPS to find the SPIRA
diff --git a/core/device.c b/core/device.c
index 9e7ef0d..e7b53a8 100644
--- a/core/device.c
+++ b/core/device.c
@@ -581,6 +581,14 @@ const struct dt_property *dt_find_property(const struct dt_node *node,
return NULL;
}
+void dt_check_del_prop(struct dt_node *node, const char *name)
+{
+ struct dt_property *p;
+
+ p = __dt_find_property(node, name);
+ if (p)
+ dt_del_property(node, p);
+}
const struct dt_property *dt_require_property(const struct dt_node *node,
const char *name, int wanted_len)
{
diff --git a/core/fast-reboot.c b/core/fast-reboot.c
index 30b77e9..ce6c967 100644
--- a/core/fast-reboot.c
+++ b/core/fast-reboot.c
@@ -25,242 +25,402 @@
#include <timebase.h>
#include <pci.h>
#include <chip.h>
+#include <chiptod.h>
+
+#define P8_EX_TCTL_DIRECT_CONTROLS(t) (0x10013000 + (t) * 0x10)
+#define P8_DIRECT_CTL_STOP PPC_BIT(63)
+#define P8_DIRECT_CTL_PRENAP PPC_BIT(47)
+#define P8_DIRECT_CTL_SRESET PPC_BIT(60)
-/*
- * To get control of all threads, we sreset them via XSCOM after
- * patching the 0x100 vector. This will work as long as the target
- * HRMOR is 0. If Linux ever uses HRMOR, we'll have to consider
- * a more messy approach.
- *
- * The SCOM register we want is called "Core RAS Control" in the doc
- * and EX0.EC.PC.TCTL_GENERATE#0.TCTL.DIRECT_CONTROLS in the SCOM list
- *
- * Bits in there change from CPU rev to CPU rev but the bit we care
- * about, bit 60 "sreset_request" appears to have stuck to the same
- * place in both P7 and P7+. The register also has the same SCOM
- * address
- */
-#define EX0_TCTL_DIRECT_CONTROLS0 0x08010400
-#define EX0_TCTL_DIRECT_CONTROLS1 0x08010440
-#define EX0_TCTL_DIRECT_CONTROLS2 0x08010480
-#define EX0_TCTL_DIRECT_CONTROLS3 0x080104c0
-#define TCTL_DC_SRESET_REQUEST PPC_BIT(60)
/* Flag tested by the OPAL entry code */
uint8_t reboot_in_progress;
-static struct cpu_thread *resettor, *resettee;
+static volatile bool fast_boot_release;
+static struct cpu_thread *last_man_standing;
+static struct lock reset_lock = LOCK_UNLOCKED;
-static void flush_caches(void)
+static int set_special_wakeup(struct cpu_thread *cpu)
{
- uint64_t base = SKIBOOT_BASE;
- uint64_t end = base + SKIBOOT_SIZE;
+ uint64_t val, poll_target, stamp;
+ uint32_t core_id;
+ int rc;
+
+ /*
+ * Note: HWP checks for checkstops, but I assume we don't need to
+ * as we wouldn't be running if one was present
+ */
+
+ /* Grab core ID once */
+ core_id = pir_to_core_id(cpu->pir);
- /* Not sure what the effect of sreset is on cores, so let's
- * shoot a series of dcbf's on all cachelines that make up
- * our core memory just in case...
+ prlog(PR_DEBUG, "RESET Waking up core 0x%x\n", core_id);
+
+ /*
+ * The original HWp reads the XSCOM first but ignores the result
+ * and error, let's do the same until I know for sure that is
+ * not necessary
*/
- while(base < end) {
- asm volatile("dcbf 0,%0" : : "r" (base) : "memory");
- base += 128;
+ xscom_read(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP),
+ &val);
+
+ /* Then we write special wakeup */
+ rc = xscom_write(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id,
+ EX_PM_SPECIAL_WAKEUP_PHYP),
+ PPC_BIT(0));
+ if (rc) {
+ prerror("RESET: XSCOM error %d asserting special"
+ " wakeup on 0x%x\n", rc, cpu->pir);
+ return rc;
}
- sync();
+
+ /*
+ * HWP uses the history for Perf register here, dunno why it uses
+ * that one instead of the pHyp one, maybe to avoid clobbering it...
+ *
+ * In any case, it does that to check for run/nap vs.sleep/winkle/other
+ * to decide whether to poll on checkstop or not. Since we don't deal
+ * with checkstop conditions here, we ignore that part.
+ */
+
+ /*
+ * Now poll for completion of special wakeup. The HWP is nasty here,
+ * it will poll at 5ms intervals for up to 200ms. This is not quite
+ * acceptable for us at runtime, at least not until we have the
+ * ability to "context switch" HBRT. In practice, because we don't
+ * winkle, it will never take that long, so we increase the polling
+ * frequency to 1us per poll. However we do have to keep the same
+ * timeout.
+ *
+ * We don't use time_wait_ms() either for now as we don't want to
+ * poll the FSP here.
+ */
+ stamp = mftb();
+ poll_target = stamp + msecs_to_tb(200);
+ val = 0;
+ while (!(val & EX_PM_GP0_SPECIAL_WAKEUP_DONE)) {
+ /* Wait 1 us */
+ time_wait_us(1);
+
+ /* Read PM state */
+ rc = xscom_read(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_GP0),
+ &val);
+ if (rc) {
+ prerror("RESET: XSCOM error %d reading PM state on"
+ " 0x%x\n", rc, cpu->pir);
+ return rc;
+ }
+ /* Check timeout */
+ if (mftb() > poll_target)
+ break;
+ }
+
+ /* Success ? */
+ if (val & EX_PM_GP0_SPECIAL_WAKEUP_DONE) {
+ uint64_t now = mftb();
+ prlog(PR_TRACE, "RESET: Special wakeup complete after %ld us\n",
+ tb_to_usecs(now - stamp));
+ return 0;
+ }
+
+ /*
+ * We timed out ...
+ *
+ * HWP has a complex workaround for HW255321 which affects
+ * Murano DD1 and Venice DD1. Ignore that for now
+ *
+ * Instead we just dump some XSCOMs for error logging
+ */
+ prerror("RESET: Timeout on special wakeup of 0x%0x\n", cpu->pir);
+ prerror("RESET: PM0 = 0x%016llx\n", val);
+ val = -1;
+ xscom_read(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP),
+ &val);
+ prerror("RESET: SPC_WKUP = 0x%016llx\n", val);
+ val = -1;
+ xscom_read(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id,
+ EX_PM_IDLE_STATE_HISTORY_PHYP),
+ &val);
+ prerror("RESET: HISTORY = 0x%016llx\n", val);
+
+ return OPAL_HARDWARE;
}
-static bool do_reset_core_p7(struct cpu_thread *cpu)
+static int clr_special_wakeup(struct cpu_thread *cpu)
{
- uint32_t xscom_addr, chip;
- uint64_t ctl;
+ uint64_t val;
+ uint32_t core_id;
int rc;
- /* Add the Core# */
- xscom_addr = EX0_TCTL_DIRECT_CONTROLS0;
- xscom_addr |= ((cpu->pir >> 2) & 7) << 24;
+ /*
+ * Note: HWP checks for checkstops, but I assume we don't need to
+ * as we wouldn't be running if one was present
+ */
+
+ /* Grab core ID once */
+ core_id = pir_to_core_id(cpu->pir);
- chip = pir_to_chip_id(cpu->pir);
+ prlog(PR_DEBUG, "RESET: Releasing core 0x%x wakeup\n", core_id);
- ctl = TCTL_DC_SRESET_REQUEST;
- rc = xscom_write(chip, xscom_addr, ctl);
- rc |= xscom_write(chip, xscom_addr + 0x40, ctl);
- rc |= xscom_write(chip, xscom_addr + 0x80, ctl);
- rc |= xscom_write(chip, xscom_addr + 0xc0, ctl);
+ /*
+ * The original HWp reads the XSCOM first but ignores the result
+ * and error, let's do the same until I know for sure that is
+ * not necessary
+ */
+ xscom_read(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP),
+ &val);
+
+ /* Then we write special wakeup */
+ rc = xscom_write(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id,
+ EX_PM_SPECIAL_WAKEUP_PHYP), 0);
if (rc) {
- prerror("RESET: Error %d resetting CPU 0x%04x\n",
- rc, cpu->pir);
- return false;
+ prerror("RESET: XSCOM error %d deasserting"
+ " special wakeup on 0x%x\n", rc, cpu->pir);
+ return rc;
}
- return true;
+
+ /*
+ * The original HWp reads the XSCOM again with the comment
+ * "This puts an inherent delay in the propagation of the reset
+ * transition"
+ */
+ xscom_read(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP),
+ &val);
+
+ return 0;
}
-static void fast_reset_p7(void)
+static void set_direct_ctl(struct cpu_thread *cpu, uint64_t bits)
{
- struct cpu_thread *cpu;
+ uint32_t core_id = pir_to_core_id(cpu->pir);
+ uint32_t chip_id = pir_to_chip_id(cpu->pir);
+ uint32_t thread_id = pir_to_thread_id(cpu->pir);
+ uint32_t xscom_addr;
- resettee = this_cpu();
- resettor = NULL;
+ xscom_addr = XSCOM_ADDR_P8_EX(core_id,
+ P8_EX_TCTL_DIRECT_CONTROLS(thread_id));
- /* Pick up a candidate resettor. We do that before we flush
- * the caches
- */
- for_each_cpu(cpu) {
- /*
- * Some threads might still be in skiboot.
- *
- * But because we deal with entire cores and we don't want
- * to special case things, we are just going to reset them
- * too making the assumption that this is safe, they are
- * holding no locks. This can only be true if they don't
- * have jobs scheduled which is hopefully the case.
- */
- if (cpu->state != cpu_state_os &&
- cpu->state != cpu_state_active)
- continue;
+ xscom_write(chip_id, xscom_addr, bits);
+}
- /*
- * Only hit cores and only if they aren't on the same core
- * as ourselves
- */
- if (cpu_get_thread0(cpu) == cpu_get_thread0(this_cpu()) ||
- cpu->pir & 0x3)
- continue;
+static void patch_reset_vector(void)
+{
+ extern uint32_t fast_reset_patch_start;
+ extern uint32_t fast_reset_patch_end;
+ uint32_t *dst, *src;
- /* Pick up one of those guys as our "resettor". It will be
- * in charge of resetting this CPU. We avoid resetting
- * ourselves, not sure how well it would do with SCOM
- */
- resettor = cpu;
- break;
+ /* Copy reset trampoline */
+ prlog(PR_DEBUG, "RESET: Copying reset trampoline...\n");
+ src = &fast_reset_patch_start;
+ dst = (uint32_t *)0x100;
+ while(src < &fast_reset_patch_end)
+ *(dst++) = *(src++);
+ sync_icache();
+}
+
+static bool fast_reset_p8(void)
+{
+ struct cpu_thread *cpu;
+
+ /* Mark ourselves as last man standing in need of a reset */
+ last_man_standing = this_cpu();
+
+ prlog(PR_DEBUG, "RESET: Resetting from cpu: 0x%x (core 0x%x)\n",
+ this_cpu()->pir, pir_to_core_id(this_cpu()->pir));
+
+ /* Assert special wakup on all cores */
+ for_each_cpu(cpu) {
+ if (cpu->primary == cpu)
+ if (set_special_wakeup(cpu) != OPAL_SUCCESS)
+ return false;
}
- if (!resettor) {
- printf("RESET: Can't find a resettor !\n");
- return;
+ prlog(PR_DEBUG, "RESET: Stopping the world...\n");
+
+ /* Put everybody in stop except myself */
+ for_each_cpu(cpu) {
+ if (cpu != this_cpu())
+ set_direct_ctl(cpu, P8_DIRECT_CTL_STOP);
}
- printf("RESET: Resetting from 0x%04x, resettor 0x%04x\n",
- this_cpu()->pir, resettor->pir);
- printf("RESET: Flushing caches...\n");
+ /* Patch reset */
+ patch_reset_vector();
- /* Is that necessary ? */
- flush_caches();
+ prlog(PR_DEBUG, "RESET: Pre-napping all threads but one...\n");
- /* Reset everybody except self and except resettor */
+ /* Put everybody in pre-nap except myself */
for_each_cpu(cpu) {
- if (cpu->state != cpu_state_os &&
- cpu->state != cpu_state_active)
- continue;
- if (cpu_get_thread0(cpu) == cpu_get_thread0(this_cpu()) ||
- cpu->pir & 0x3)
- continue;
- if (cpu_get_thread0(cpu) == cpu_get_thread0(resettor))
- continue;
+ if (cpu != this_cpu())
+ set_direct_ctl(cpu, P8_DIRECT_CTL_PRENAP);
+ }
- printf("RESET: Resetting CPU 0x%04x...\n", cpu->pir);
+ prlog(PR_DEBUG, "RESET: Resetting all threads but one...\n");
- if (!do_reset_core_p7(cpu))
- return;
+ /* Reset everybody except my own core threads */
+ for_each_cpu(cpu) {
+ if (cpu != this_cpu())
+ set_direct_ctl(cpu, P8_DIRECT_CTL_SRESET);
}
- /* Reset the resettor last because it's going to kill me ! */
- printf("RESET: Resetting CPU 0x%04x...\n", resettor->pir);
- if (!do_reset_core_p7(resettor))
- return;
-
- /* Don't return */
- for (;;)
- ;
+ return true;
}
-void fast_reset(void)
+void fast_reboot(void)
{
- uint32_t pvr = mfspr(SPR_PVR);
- extern uint32_t fast_reset_patch_start;
- extern uint32_t fast_reset_patch_end;
- uint32_t *dst, *src;
+ bool success;
+
+ if (proc_gen != proc_gen_p8)
+ return;
- printf("RESET: Fast reboot request !\n");
+ prlog(PR_INFO, "RESET: Initiating fast reboot...\n");
/* XXX We need a way to ensure that no other CPU is in skiboot
* holding locks (via the OPAL APIs) and if they are, we need
- * for them to get out
+ * for them to get out. Hopefully that isn't happening, but...
+ *
+ * To fix this properly, we want to keep track of OPAL entry/exit
+ * on all CPUs.
*/
reboot_in_progress = 1;
time_wait_ms(200);
- /* Copy reset trampoline */
- printf("RESET: Copying reset trampoline...\n");
- src = &fast_reset_patch_start;
- dst = (uint32_t *)0x100;
- while(src < &fast_reset_patch_end)
- *(dst++) = *(src++);
- sync_icache();
+ /* Lock so the new guys coming don't reset us */
+ lock(&reset_lock);
- switch(PVR_TYPE(pvr)) {
- case PVR_TYPE_P7:
- case PVR_TYPE_P7P:
- fast_reset_p7();
- }
+ fast_boot_release = false;
+
+ success = fast_reset_p8();
+
+ /* Unlock, at this point we go away */
+ unlock(&reset_lock);
+
+ if (success)
+ /* Don't return */
+ for (;;)
+ ;
}
static void cleanup_cpu_state(void)
{
- if (cpu_is_thread0(this_cpu())) {
- cleanup_tlb();
+ struct cpu_thread *cpu = this_cpu();
+
+ cpu->current_hile = false;
+
+ /* Per core cleanup */
+ if (cpu_is_thread0(cpu)) {
+ /* Shared SPRs whacked back to normal */
+
+ /* XXX Update the SLW copies ! Also dbl check HIDs etc... */
init_shared_sprs();
+
+ /* If somebody was in fast_sleep, we may have a workaround
+ * to undo
+ */
+ if (cpu->in_sleep) {
+ prlog(PR_DEBUG, "RESET: CPU 0x%04x in fast sleep"
+ " undoing workarounds...\n", cpu->pir);
+ fast_sleep_exit();
+ }
+
+ /* And we might have lost TB sync */
+ chiptod_wakeup_resync();
+
+ /* The TLB surely contains garbage */
+ cleanup_tlb();
}
+
+ /* Per-thread additional cleanup */
init_replicated_sprs();
- reset_cpu_icp();
+
+ // XXX Cleanup SLW, check HIDs ...
}
-#ifdef FAST_REBOOT_CLEARS_MEMORY
-static void fast_mem_clear(uint64_t start, uint64_t end)
+void __noreturn enter_nap(void);
+
+static void check_split_core(void)
{
- printf("MEMORY: Clearing %llx..%llx\n", start, end);
+ struct cpu_thread *cpu;
+ u64 mask, hid0;
- while(start < end) {
- asm volatile("dcbz 0,%0" : : "r" (start) : "memory");
- start += 128;
+ hid0 = mfspr(SPR_HID0);
+ mask = SPR_HID0_POWER8_4LPARMODE | SPR_HID0_POWER8_2LPARMODE;
+
+ if ((hid0 & mask) == 0)
+ return;
+
+ prlog(PR_INFO, "RESET: CPU 0x%04x is split !\n", this_cpu()->pir);
+
+ /* If it's a secondary thread, just send it to nap */
+ if (this_cpu()->pir & 7) {
+ /* Prepare to be woken up */
+ icp_prep_for_pm();
+ /* Setup LPCR to wakeup on external interrupts only */
+ mtspr(SPR_LPCR, ((mfspr(SPR_LPCR) & ~SPR_LPCR_P8_PECE) |
+ SPR_LPCR_P8_PECE2));
+ /* Go to nap (doesn't return) */
+ enter_nap();
}
-}
-static void memory_reset(void)
-{
- struct address_range *i;
- uint64_t skistart = SKIBOOT_BASE;
- uint64_t skiend = SKIBOOT_BASE + SKIBOOT_SIZE;
-
- printf("MEMORY: Clearing ...\n");
-
- list_for_each(&address_ranges, i, list) {
- uint64_t start = cleanup_addr(i->arange->start);
- uint64_t end = cleanup_addr(i->arange->end);
-
- if (start >= skiend || end <= skistart)
- fast_mem_clear(start, end);
- else {
- if (start < skistart)
- fast_mem_clear(start, skistart);
- if (end > skiend)
- fast_mem_clear(skiend, end);
- }
+ prlog(PR_INFO, "RESET: Primary, unsplitting... \n");
+
+ /* Trigger unsplit operation and update SLW image */
+ hid0 &= ~SPR_HID0_POWER8_DYNLPARDIS;
+ set_hid0(hid0);
+ opal_slw_set_reg(this_cpu()->pir, SPR_HID0, hid0);
+
+ /* Wait for unsplit */
+ while (mfspr(SPR_HID0) & mask)
+ cpu_relax();
+
+ /* Now the guys are sleeping, wake'em up. They will come back
+ * via reset and continue the fast reboot process normally.
+ * No need to wait.
+ */
+ prlog(PR_INFO, "RESET: Waking unsplit secondaries... \n");
+
+ for_each_cpu(cpu) {
+ if (!cpu_is_sibling(cpu, this_cpu()) || (cpu == this_cpu()))
+ continue;
+ icp_kick_cpu(cpu);
}
}
-#endif /* FAST_REBOOT_CLEARS_MEMORY */
+
/* Entry from asm after a fast reset */
-void __noreturn fast_reboot(void);
+void __noreturn fast_reboot_entry(void);
-void __noreturn fast_reboot(void)
+void __noreturn fast_reboot_entry(void)
{
- static volatile bool fast_boot_release;
struct cpu_thread *cpu;
- printf("INIT: CPU PIR 0x%04x reset in\n", this_cpu()->pir);
+ prlog(PR_DEBUG, "RESET: CPU 0x%04x reset in\n", this_cpu()->pir);
+ time_wait_ms(100);
+
+ lock(&reset_lock);
+ if (last_man_standing) {
+ prlog(PR_DEBUG, "RESET: last man standing fixup...\n");
+ set_direct_ctl(last_man_standing, P8_DIRECT_CTL_PRENAP);
+ set_direct_ctl(last_man_standing, P8_DIRECT_CTL_SRESET);
+ last_man_standing = NULL;
+ }
+ unlock(&reset_lock);
+
+ /* We reset our ICP first ! Otherwise we might get stray interrupts
+ * when unsplitting
+ */
+ reset_cpu_icp();
- /* If this CPU was chosen as the resettor, it must reset the
- * resettee (the one that initiated the whole process
+ /* If we are split, we need to unsplit. Since that can send us
+ * to NAP, which will come back via reset, we do it now
*/
- if (this_cpu() == resettor)
- do_reset_core_p7(resettee);
+ check_split_core();
/* Are we the original boot CPU ? If not, we spin waiting
* for a relase signal from CPU 1, then we clean ourselves
@@ -277,6 +437,8 @@ void __noreturn fast_reboot(void)
__secondary_cpu_entry();
}
+ prlog(PR_INFO, "RESET: Boot CPU waiting for everybody...\n");
+
/* We are the original boot CPU, wait for secondaries to
* be captured
*/
@@ -292,7 +454,7 @@ void __noreturn fast_reboot(void)
smt_medium();
}
- printf("INIT: Releasing secondaries...\n");
+ prlog(PR_INFO, "RESET: Releasing secondaries...\n");
/* Release everybody */
fast_boot_release = true;
@@ -310,7 +472,14 @@ void __noreturn fast_reboot(void)
}
}
- printf("INIT: All done, resetting everything else...\n");
+ prlog(PR_DEBUG, "RESET: Releasing special wakeups...\n");
+
+ for_each_cpu(cpu) {
+ if (cpu->primary == cpu)
+ clr_special_wakeup(cpu);
+ }
+
+ prlog(PR_INFO, "RESET: All done, cleaning up...\n");
/* Clear release flag for next time */
fast_boot_release = false;
@@ -322,6 +491,8 @@ void __noreturn fast_reboot(void)
/* Set our state to active */
this_cpu()->state = cpu_state_active;
+ start_preload_kernel();
+
/* Poke the consoles (see comments in the code there) */
fsp_console_reset();
@@ -331,15 +502,6 @@ void __noreturn fast_reboot(void)
/* Remove all PCI devices */
pci_reset();
- /* Reset IO Hubs */
- cec_reset();
-
- /* Re-Initialize all discovered PCI slots */
- pci_init_slots();
-
- /* Clear memory */
-#ifdef FAST_REBOOT_CLEARS_MEMORY
- memory_reset();
-#endif
+ /* Load and boot payload */
load_and_boot_kernel(true);
}
diff --git a/core/init.c b/core/init.c
index ca3ad55..1a3d741 100644
--- a/core/init.c
+++ b/core/init.c
@@ -287,7 +287,7 @@ extern uint64_t boot_offset;
static size_t kernel_size;
static size_t initramfs_size;
-static bool start_preload_kernel(void)
+bool start_preload_kernel(void)
{
int loaded;
@@ -384,6 +384,9 @@ static void load_initramfs(void)
{
int loaded;
+ dt_check_del_prop(dt_chosen, "linux,initrd-start");
+ dt_check_del_prop(dt_chosen, "linux,initrd-end");
+
loaded = wait_for_resource_loaded(RESOURCE_ID_INITRAMFS,
RESOURCE_SUBID_NONE);
@@ -447,6 +450,7 @@ void __noreturn load_and_boot_kernel(bool is_reboot)
/* Set kernel command line argument if specified */
#ifdef KERNEL_COMMAND_LINE
+ dt_check_del_prop(dt_chosen, "bootargs");
dt_add_property_string(dt_chosen, "bootargs", KERNEL_COMMAND_LINE);
#endif
diff --git a/core/interrupts.c b/core/interrupts.c
index f93ce7b..3b919fa 100644
--- a/core/interrupts.c
+++ b/core/interrupts.c
@@ -267,10 +267,10 @@ void icp_send_eoi(uint32_t interrupt)
out_be32(icp + ICP_XIRR, interrupt & 0xffffff);
}
-/* This is called before winkle, we clear pending IPIs and set our priority
- * to 1 to mask all but the IPI
+/* This is called before winkle or nap, we clear pending IPIs and
+ * set our priority to 1 to mask all but the IPI.
*/
-void icp_prep_for_rvwinkle(void)
+void icp_prep_for_pm(void)
{
void *icp = this_cpu()->icp_regs;
diff --git a/core/lock.c b/core/lock.c
index 53cc337..e82048b 100644
--- a/core/lock.c
+++ b/core/lock.c
@@ -110,6 +110,9 @@ void unlock(struct lock *l)
this_cpu()->lock_depth--;
l->lock_val = 0;
+ /* WARNING: On fast reboot, we can be reset right at that
+ * point, so the reset_lock in there cannot be in the con path
+ */
if (l->in_con_path) {
cpu->con_suspend--;
if (cpu->con_suspend == 0 && cpu->con_need_flush)
diff --git a/core/pci.c b/core/pci.c
index cbaea35..bbf4583 100644
--- a/core/pci.c
+++ b/core/pci.c
@@ -1456,6 +1456,7 @@ static void __pci_reset(struct list_head *list)
while ((pd = list_pop(list, struct pci_device, link)) != NULL) {
__pci_reset(&pd->children);
+ dt_free(pd->dn);
free(pd);
}
}
@@ -1466,16 +1467,22 @@ void pci_reset(void)
prlog(PR_NOTICE, "PCI: Clearing all devices...\n");
- /* This is a remnant of fast-reboot, not currently used */
/* XXX Do those in parallel (at least the power up
* state machine could be done in parallel)
*/
for (i = 0; i < ARRAY_SIZE(phbs); i++) {
- if (!phbs[i])
+ struct phb *phb = phbs[i];
+ if (!phb)
continue;
- __pci_reset(&phbs[i]->devices);
+ __pci_reset(&phb->devices);
+ if (phb->ops->ioda_reset)
+ phb->ops->ioda_reset(phb, true);
}
+
+ /* Re-Initialize all discovered PCI slots */
+ pci_init_slots();
+
}
static void pci_do_jobs(void (*fn)(void *))
diff --git a/core/platform.c b/core/platform.c
index de6e406..7915857 100644
--- a/core/platform.c
+++ b/core/platform.c
@@ -52,9 +52,9 @@ static int64_t opal_cec_reboot(void)
console_complete_flush();
-#ifdef ENABLE_FAST_RESET
+#ifdef ENABLE_FAST_REBOOT
/* Try a fast reset first */
- fast_reset();
+ fast_reboot();
#endif
if (platform.cec_reboot)
return platform.cec_reboot();
diff --git a/hw/fsp/fsp-console.c b/hw/fsp/fsp-console.c
index 87e509d..5e27197 100644
--- a/hw/fsp/fsp-console.c
+++ b/hw/fsp/fsp-console.c
@@ -884,6 +884,9 @@ static void reopen_all_hvsi(void)
void fsp_console_reset(void)
{
+ if (!fsp_present())
+ return;
+
prlog(PR_NOTICE, "FSP: Console reset !\n");
/* This is called on a fast-reset. To work around issues with HVSI
@@ -985,6 +988,8 @@ void fsp_console_select_stdout(void)
*/
}
}
+ dt_check_del_prop(dt_chosen, "linux,stdout-path");
+
if (fsp_serials[1].open && use_serial) {
dt_add_property_string(dt_chosen, "linux,stdout-path",
"/ibm,opal/consoles/serial at 1");
diff --git a/hw/occ.c b/hw/occ.c
index b606a67..3d86f7a 100644
--- a/hw/occ.c
+++ b/hw/occ.c
@@ -517,10 +517,14 @@ void occ_pstates_init(void)
struct proc_chip *chip;
struct cpu_thread *c;
s8 pstate_nom;
+ static bool occ_pstates_initialized;
/* OCC is P8 only */
if (proc_gen != proc_gen_p8)
return;
+ /* Handle fast reboots */
+ if (occ_pstates_initialized)
+ return;
chip = next_chip(NULL);
if (!chip->homer_base) {
@@ -558,6 +562,7 @@ void occ_pstates_init(void)
for_each_chip(chip)
chip->throttle = 0;
opal_add_poller(occ_throttle_poll, NULL);
+ occ_pstates_initialized = true;
}
struct occ_load_req {
diff --git a/hw/psi.c b/hw/psi.c
index 3efc177..e9b32b6 100644
--- a/hw/psi.c
+++ b/hw/psi.c
@@ -432,34 +432,25 @@ static int64_t psi_p7_get_xive(struct irq_source *is, uint32_t isn __unused,
return OPAL_SUCCESS;
}
+static const uint32_t psi_p8_irq_to_xivr[P8_IRQ_PSI_ALL_COUNT] = {
+ [P8_IRQ_PSI_FSP] = PSIHB_XIVR_FSP,
+ [P8_IRQ_PSI_OCC] = PSIHB_XIVR_OCC,
+ [P8_IRQ_PSI_FSI] = PSIHB_XIVR_FSI,
+ [P8_IRQ_PSI_LPC] = PSIHB_XIVR_LPC,
+ [P8_IRQ_PSI_LOCAL_ERR] = PSIHB_XIVR_LOCAL_ERR,
+ [P8_IRQ_PSI_HOST_ERR] = PSIHB_XIVR_HOST_ERR,
+};
+
static int64_t psi_p8_set_xive(struct irq_source *is, uint32_t isn,
uint16_t server, uint8_t priority)
{
struct psi *psi = is->data;
uint64_t xivr_p, xivr;
+ uint32_t irq_idx = isn & 7;
- switch(isn & 7) {
- case P8_IRQ_PSI_FSP:
- xivr_p = PSIHB_XIVR_FSP;
- break;
- case P8_IRQ_PSI_OCC:
- xivr_p = PSIHB_XIVR_OCC;
- break;
- case P8_IRQ_PSI_FSI:
- xivr_p = PSIHB_XIVR_FSI;
- break;
- case P8_IRQ_PSI_LPC:
- xivr_p = PSIHB_XIVR_LPC;
- break;
- case P8_IRQ_PSI_LOCAL_ERR:
- xivr_p = PSIHB_XIVR_LOCAL_ERR;
- break;
- case P8_IRQ_PSI_HOST_ERR:
- xivr_p = PSIHB_XIVR_HOST_ERR;
- break;
- default:
+ if (irq_idx >= P8_IRQ_PSI_ALL_COUNT)
return OPAL_PARAMETER;
- }
+ xivr_p = psi_p8_irq_to_xivr[irq_idx];
/* Populate the XIVR */
xivr = (uint64_t)server << 40;
@@ -476,29 +467,11 @@ static int64_t psi_p8_get_xive(struct irq_source *is, uint32_t isn __unused,
{
struct psi *psi = is->data;
uint64_t xivr_p, xivr;
+ uint32_t irq_idx = isn & 7;
- switch(isn & 7) {
- case P8_IRQ_PSI_FSP:
- xivr_p = PSIHB_XIVR_FSP;
- break;
- case P8_IRQ_PSI_OCC:
- xivr_p = PSIHB_XIVR_OCC;
- break;
- case P8_IRQ_PSI_FSI:
- xivr_p = PSIHB_XIVR_FSI;
- break;
- case P8_IRQ_PSI_LPC:
- xivr_p = PSIHB_XIVR_LPC;
- break;
- case P8_IRQ_PSI_LOCAL_ERR:
- xivr_p = PSIHB_XIVR_LOCAL_ERR;
- break;
- case P8_IRQ_PSI_HOST_ERR:
- xivr_p = PSIHB_XIVR_HOST_ERR;
- break;
- default:
+ if (irq_idx >= P8_IRQ_PSI_ALL_COUNT)
return OPAL_PARAMETER;
- }
+ xivr_p = psi_p8_irq_to_xivr[irq_idx];
/* Read & decode the XIVR */
xivr = in_be64(psi->regs + xivr_p);
@@ -509,33 +482,41 @@ static int64_t psi_p8_get_xive(struct irq_source *is, uint32_t isn __unused,
return OPAL_SUCCESS;
}
+static void psi_cleanup_irq(struct psi *psi)
+{
+ uint32_t irq;
+ uint64_t xivr, xivr_p;
+
+ for (irq = 0; irq < P8_IRQ_PSI_ALL_COUNT; irq++) {
+ prlog(PR_DEBUG, "PSI[0x%03x]: Cleaning up IRQ %d\n",
+ psi->chip_id, irq);
+
+ xivr_p = psi_p8_irq_to_xivr[irq];
+ xivr = in_be64(psi->regs + xivr_p);
+ xivr |= (0xffull << 32);
+ out_be64(psi->regs + xivr_p, xivr);
+ time_wait_ms_nopoll(10);
+ xivr = in_be64(psi->regs + xivr_p);
+ if (xivr & PPC_BIT(39)) {
+ printf(" Need EOI !\n");
+ icp_send_eoi(psi->interrupt + irq);
+ }
+ }
+}
+
/* Called on a fast reset, make sure we aren't stuck with
* an accepted and never EOId PSI interrupt
*/
void psi_irq_reset(void)
{
struct psi *psi;
- uint64_t xivr;
printf("PSI: Hot reset!\n");
- assert(proc_gen == proc_gen_p7);
+ assert(proc_gen == proc_gen_p8);
list_for_each(&psis, psi, list) {
- /* Mask the interrupt & clean the XIVR */
- xivr = 0x000000ff00000000UL;
- xivr |= P7_IRQ_BUID(psi->interrupt) << 16;
- out_be64(psi->regs + PSIHB_XIVR, xivr);
-
-#if 0 /* Seems to checkstop ... */
- /*
- * Maybe not anymore; we were just blindly sending
- * this on all iopaths, not just the active one;
- * We don't even know if those psis are even correct.
- */
- /* Send a dummy EOI to make sure the ICP is clear */
- icp_send_eoi(psi->interrupt);
-#endif
+ psi_cleanup_irq(psi);
}
}
diff --git a/hw/slw.c b/hw/slw.c
index 74b9cd5..51ac223 100644
--- a/hw/slw.c
+++ b/hw/slw.c
@@ -77,7 +77,7 @@ static void slw_do_rvwinkle(void *data)
struct proc_chip *chip;
/* Setup our ICP to receive IPIs */
- icp_prep_for_rvwinkle();
+ icp_prep_for_pm();
/* Setup LPCR to wakeup on external interrupts only */
mtspr(SPR_LPCR, ((lpcr & ~SPR_LPCR_P8_PECE) | SPR_LPCR_P8_PECE2));
@@ -1061,6 +1061,8 @@ static void fast_sleep_enter(void)
}
primary_thread->save_l2_fir_action1 = tmp;
+ primary_thread->in_sleep = true;
+
tmp = tmp & ~0x0200000000000000ULL;
rc = xscom_write(chip_id, XSCOM_ADDR_P8_EX(core, L2_FIR_ACTION1),
tmp);
@@ -1083,7 +1085,7 @@ static void fast_sleep_enter(void)
/* Workarounds while exiting fast-sleep */
-static void fast_sleep_exit(void)
+void fast_sleep_exit(void)
{
uint32_t core = pir_to_core_id(this_cpu()->pir);
uint32_t chip_id = this_cpu()->chip_id;
@@ -1091,6 +1093,7 @@ static void fast_sleep_exit(void)
int rc;
primary_thread = this_cpu()->primary;
+ primary_thread->in_sleep = false;
rc = xscom_write(chip_id, XSCOM_ADDR_P8_EX(core, L2_FIR_ACTION1),
primary_thread->save_l2_fir_action1);
@@ -1132,7 +1135,7 @@ static int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t enter)
opal_call(OPAL_CONFIG_CPU_IDLE_STATE, opal_config_cpu_idle_state, 2);
#ifdef __HAVE_LIBPORE__
-static int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val)
+int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val)
{
struct cpu_thread *c = find_cpu_by_pir(cpu_pir);
diff --git a/include/config.h b/include/config.h
index 2524570..3163c65 100644
--- a/include/config.h
+++ b/include/config.h
@@ -72,8 +72,8 @@
*/
//#define FORCE_DUMMY_CONSOLE 1
-/* Enable this to do fast resets. Currently unreliable... */
-//#define ENABLE_FAST_RESET 1
+/* Enable this to do fast reboots. Currently unreliable... */
+#define ENABLE_FAST_REBOOT 1
/* Enable this to make fast reboot clear memory */
//#define FAST_REBOOT_CLEARS_MEMORY 1
diff --git a/include/cpu.h b/include/cpu.h
index 62f5629..3d194e4 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -61,6 +61,7 @@ struct cpu_thread {
bool in_mcount;
bool in_poller;
bool in_reinit;
+ bool in_sleep;
uint32_t hbrt_spec_wakeup; /* primary only */
uint64_t save_l2_fir_action1;
uint64_t current_token;
diff --git a/include/device.h b/include/device.h
index ed4fc46..4198a41 100644
--- a/include/device.h
+++ b/include/device.h
@@ -119,6 +119,8 @@ static inline struct dt_property *dt_add_property_u64(struct dt_node *node,
void dt_del_property(struct dt_node *node, struct dt_property *prop);
+void dt_check_del_prop(struct dt_node *node, const char *name);
+
/* Warning: moves *prop! */
void dt_resize_property(struct dt_property **prop, size_t len);
diff --git a/include/interrupts.h b/include/interrupts.h
index 3fba9d9..f269900 100644
--- a/include/interrupts.h
+++ b/include/interrupts.h
@@ -300,7 +300,7 @@ struct cpu_thread;
extern void reset_cpu_icp(void);
extern void icp_send_eoi(uint32_t interrupt);
-extern void icp_prep_for_rvwinkle(void);
+extern void icp_prep_for_pm(void);
extern void icp_kick_cpu(struct cpu_thread *cpu);
extern void init_interrupts(void);
diff --git a/include/processor.h b/include/processor.h
index caca804..fe4487b 100644
--- a/include/processor.h
+++ b/include/processor.h
@@ -163,6 +163,7 @@
/* Bits in HID0 */
#define SPR_HID0_POWER8_4LPARMODE PPC_BIT(2)
#define SPR_HID0_POWER8_2LPARMODE PPC_BIT(6)
+#define SPR_HID0_POWER8_DYNLPARDIS PPC_BIT(15)
#define SPR_HID0_POWER8_HILE PPC_BIT(19)
#define SPR_HID0_POWER9_HILE PPC_BIT(4)
#define SPR_HID0_POWER8_ENABLE_ATTN PPC_BIT(31)
diff --git a/include/skiboot.h b/include/skiboot.h
index 72cda14..5a76cb0 100644
--- a/include/skiboot.h
+++ b/include/skiboot.h
@@ -190,12 +190,13 @@ extern unsigned long get_symbol(unsigned long addr,
char **sym, char **sym_end);
/* Fast reboot support */
-extern void fast_reset(void);
+extern void fast_reboot(void);
extern void __noreturn __secondary_cpu_entry(void);
extern void __noreturn load_and_boot_kernel(bool is_reboot);
extern void cleanup_tlb(void);
extern void init_shared_sprs(void);
extern void init_replicated_sprs(void);
+extern bool start_preload_kernel(void);
/* Various probe routines, to replace with an initcall system */
extern void probe_p7ioc(void);
@@ -266,6 +267,11 @@ extern void slw_update_timer_expiry(uint64_t new_target);
/* Is SLW timer available ? */
extern bool slw_timer_ok(void);
+/* Patch SPR in SLW image */
+extern int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
+
+extern void fast_sleep_exit(void);
+
/* Fallback fake RTC */
extern void fake_rtc_init(void);
More information about the Skiboot
mailing list