[Skiboot] [PATCH v3 28/29] fast-reboot: bare bones fast reboot implementation for POWER9

Nicholas Piggin npiggin at gmail.com
Wed Nov 29 16:37:06 AEDT 2017


This is an initial fast reboot implementation for p9 which has only been
tested on the Witherspoon platform, and without the use of NPUs, NX/VAS,
etc.

This has worked reasonably well so far, with no failures in about 100
reboots. It is hidden behind the traditional fast-reboot experimental
nvram option, until more platforms and configurations are tested.

Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
---
 core/fast-reboot.c   | 64 +++++++++++++++++++++++++++++++++++-----------------
 core/init.c          | 14 ++++++------
 hw/xive.c            | 32 ++++++++++++++++++++++----
 include/interrupts.h |  1 +
 include/xive.h       |  1 +
 5 files changed, 79 insertions(+), 33 deletions(-)

diff --git a/core/fast-reboot.c b/core/fast-reboot.c
index 2f1a50058..1c76c0891 100644
--- a/core/fast-reboot.c
+++ b/core/fast-reboot.c
@@ -24,10 +24,12 @@
 #include <cec.h>
 #include <timebase.h>
 #include <pci.h>
+#include <xive.h>
 #include <chip.h>
 #include <chiptod.h>
 #include <ipmi.h>
 #include <direct-controls.h>
+#include <nvram.h>
 
 /* Flag tested by the OPAL entry code */
 static volatile bool fast_boot_release;
@@ -73,8 +75,13 @@ void fast_reboot(void)
 	struct cpu_thread *cpu;
 	static int fast_reboot_count = 0;
 
+	if (proc_gen == proc_gen_p9) {
+		if (!nvram_query_eq("experimental-fast-reset","feeling-lucky"))
+			return;
+	}
+
 	if (!chip_quirk(QUIRK_MAMBO_CALLOUTS) &&
-			proc_gen != proc_gen_p8) {
+			(proc_gen != proc_gen_p8 && proc_gen != proc_gen_p9)) {
 		prlog(PR_DEBUG,
 		      "RESET: Fast reboot not available on this CPU\n");
 		return;
@@ -168,20 +175,24 @@ static void cleanup_cpu_state(void)
 		/* XXX Update the SLW copies ! Also dbl check HIDs etc... */
 		init_shared_sprs();
 
-		/* If somebody was in fast_sleep, we may have a workaround
-		 * to undo
-		 */
-		if (cpu->in_fast_sleep) {
-			prlog(PR_DEBUG, "RESET: CPU 0x%04x in fast sleep"
-			      " undoing workarounds...\n", cpu->pir);
-			fast_sleep_exit();
+		if (proc_gen == proc_gen_p8) {
+			/* If somebody was in fast_sleep, we may have a
+			 * workaround to undo
+			 */
+			if (cpu->in_fast_sleep) {
+				prlog(PR_DEBUG, "RESET: CPU 0x%04x in fast sleep"
+				      " undoing workarounds...\n", cpu->pir);
+				fast_sleep_exit();
+			}
+
+			/* The TLB surely contains garbage.
+			 * P9 clears TLBs in cpu_fast_reboot_complete
+			 */
+			cleanup_local_tlb();
 		}
 
 		/* And we might have lost TB sync */
 		chiptod_wakeup_resync();
-
-		/* The TLB surely contains garbage */
-		cleanup_local_tlb();
 	}
 
 	/* Per-thread additional cleanup */
@@ -248,15 +259,19 @@ void __noreturn fast_reboot_entry(void)
 {
 	prlog(PR_DEBUG, "RESET: CPU 0x%04x reset in\n", this_cpu()->pir);
 
-	/* We reset our ICP first ! Otherwise we might get stray interrupts
-	 * when unsplitting
-	 */
-	reset_cpu_icp();
+	if (proc_gen == proc_gen_p9) {
+		reset_cpu_xive();
+	} else if (proc_gen == proc_gen_p8) {
+		/* We reset our ICP first ! Otherwise we might get stray
+		 * interrupts when unsplitting
+		 */
+		reset_cpu_icp();
 
-	/* If we are split, we need to unsplit. Since that can send us
-	 * to NAP, which will come back via reset, we do it now
-	 */
-	check_split_core();
+		/* If we are split, we need to unsplit. Since that can send us
+		 * to NAP, which will come back via reset, we do it now
+		 */
+		check_split_core();
+	}
 
 	sync();
 	this_cpu()->state = cpu_state_present;
@@ -285,6 +300,10 @@ void __noreturn fast_reboot_entry(void)
 	 */
 	cpu_state_wait_all_others(cpu_state_present, 0);
 
+	if (proc_gen == proc_gen_p9) {
+		xive_reset();
+	}
+
 	prlog(PR_INFO, "RESET: Releasing secondaries...\n");
 
 	/* Release everybody */
@@ -322,8 +341,11 @@ void __noreturn fast_reboot_entry(void)
 	/* Poke the consoles (see comments in the code there) */
 	fsp_console_reset();
 
-	/* Reset/EOI the PSI interrupt */
-	psi_irq_reset();
+	if (proc_gen == proc_gen_p8) {
+		/* XXX */
+		/* Reset/EOI the PSI interrupt */
+		psi_irq_reset();
+	}
 
 	/* Remove all PCI devices */
 	pci_reset();
diff --git a/core/init.c b/core/init.c
index 51db180b0..f655fa1e2 100644
--- a/core/init.c
+++ b/core/init.c
@@ -502,6 +502,13 @@ void __noreturn load_and_boot_kernel(bool is_reboot)
 		/* Wait for FW VPD data read to complete */
 		fsp_code_update_wait_vpd(true);
 
+		/*
+		 * OCC takes few secs to boot.  Call this as late as
+		 * as possible to avoid delay.
+		 */
+		occ_pstates_init();
+		occ_sensors_init();
+
 	} else {
 		/* fdt will be rebuilt */
 		free(fdt);
@@ -512,13 +519,6 @@ void __noreturn load_and_boot_kernel(bool is_reboot)
 
 	fsp_console_select_stdout();
 
-	/*
-	 * OCC takes few secs to boot.  Call this as late as
-	 * as possible to avoid delay.
-	 */
-	occ_pstates_init();
-	occ_sensors_init();
-
 	/* Use nvram bootargs over device tree */
 	cmdline = nvram_query("bootargs");
 	if (cmdline) {
diff --git a/hw/xive.c b/hw/xive.c
index df38074de..db9797cae 100644
--- a/hw/xive.c
+++ b/hw/xive.c
@@ -4431,14 +4431,20 @@ static void xive_reset_mask_source_cb(struct irq_source *is,
 	}
 }
 
-static int64_t opal_xive_reset(uint64_t version)
+void reset_cpu_xive(void)
 {
-	struct proc_chip *chip;
+	struct cpu_thread *c = this_cpu();
+	struct xive_cpu_state *xs = c->xstate;
 
-	prlog(PR_DEBUG, "XIVE reset, version: %d...\n", (int)version);
+	xs->cppr = 0;
+	out_8(xs->tm_ring1 + TM_QW3_HV_PHYS + TM_CPPR, 0);
 
-	if (version > 1)
-		return OPAL_PARAMETER;
+	in_be64(xs->tm_ring1 + TM_SPC_PULL_POOL_CTX);
+}
+
+static int64_t __xive_reset(uint64_t version)
+{
+	struct proc_chip *chip;
 
 	xive_mode = version;
 
@@ -4474,6 +4480,22 @@ static int64_t opal_xive_reset(uint64_t version)
 	return OPAL_SUCCESS;
 }
 
+/* Called by fast reboot */
+int64_t xive_reset(void)
+{
+	return __xive_reset(XIVE_MODE_EMU);
+}
+
+static int64_t opal_xive_reset(uint64_t version)
+{
+	prlog(PR_DEBUG, "XIVE reset, version: %d...\n", (int)version);
+
+	if (version > 1)
+		return OPAL_PARAMETER;
+
+	return __xive_reset(version);
+}
+
 static int64_t opal_xive_free_vp_block(uint64_t vp_base)
 {
 	uint32_t blk, idx, i, count;
diff --git a/include/interrupts.h b/include/interrupts.h
index 0376e8f9c..b412812af 100644
--- a/include/interrupts.h
+++ b/include/interrupts.h
@@ -316,6 +316,7 @@ extern uint32_t get_ics_phandle(void);
 struct cpu_thread;
 
 extern void reset_cpu_icp(void);
+extern void reset_cpu_xive(void);
 extern void icp_send_eoi(uint32_t interrupt);
 extern void icp_prep_for_pm(void);
 extern void icp_kick_cpu(struct cpu_thread *cpu);
diff --git a/include/xive.h b/include/xive.h
index 63ee77b38..8c65f4750 100644
--- a/include/xive.h
+++ b/include/xive.h
@@ -469,6 +469,7 @@ struct xive_vp {
 #define XIVE_IRQ_ERROR	0xffffffff
 
 void init_xive(void);
+int64_t xive_reset(void);
 
 /* Allocate a chunk of HW sources */
 uint32_t xive_alloc_hw_irqs(uint32_t chip_id, uint32_t count, uint32_t align);
-- 
2.15.0



More information about the Skiboot mailing list