[Skiboot] [PATCH v2 04/59] p10: Workaround core recovery issue

Vasant Hegde hegdevasant at linux.vnet.ibm.com
Wed Aug 4 17:20:42 AEST 2021


From: Michael Neuling <mikey at neuling.org>

This works around a core recovery issue in P10. The workaround involves
the CME polling for a core recovery and performing the recovery
procedure itself.

For this to happen, the host leaves core recovery off (HID[5]) and
then masks the PC system checkstop. This patch does this.

Firmware starts skiboot with recovery already off, so we just leave it
off for longer and then mask the PC system checkstop. This makes the
window longer where a core recovery can cause an xstop but this
window is still small and can still only happens on boot.

Signed-off-by: Michael Neuling <mikey at neuling.org>
[Added mambo check - Vasant]
Signed-off-by: Vasant Hegde <hegdevasant at linux.vnet.ibm.com>
---
 asm/head.S               |  4 ++--
 core/init.c              | 36 ++++++++++++++++++++++++++++++++++++
 include/xscom-p10-regs.h |  2 ++
 3 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/asm/head.S b/asm/head.S
index f85b0fe29..fa8933b14 100644
--- a/asm/head.S
+++ b/asm/head.S
@@ -828,9 +828,9 @@ init_shared_sprs:
 
 	/* HID0:
 	 * Boot with PPC_BIT(5) set (dis_recovery).
-	 * Clear bit 5 to enable recovery.
+	 * Leave bit 5 set to disable recovery (due to HW570622)
 	 */
-	LOAD_IMM64(%r3, 0)
+	LOAD_IMM64(%r3, PPC_BIT(5))
 	sync
 	mtspr	SPR_HID0,%r3
 	isync
diff --git a/core/init.c b/core/init.c
index 65f136daa..0bf4ab269 100644
--- a/core/init.c
+++ b/core/init.c
@@ -47,6 +47,7 @@
 #include <debug_descriptor.h>
 #include <occ.h>
 #include <opal-dump.h>
+#include <xscom-p10-regs.h>
 
 enum proc_gen proc_gen;
 unsigned int pcie_max_link_speed;
@@ -989,6 +990,38 @@ bool verify_romem(void)
 	return true;
 }
 
+static void mask_pc_system_xstop(void)
+{
+        struct cpu_thread *cpu;
+        uint32_t chip_id, core_id;
+        int rc;
+
+	if (proc_gen != proc_gen_p10)
+                return;
+
+	if (chip_quirk(QUIRK_MAMBO_CALLOUTS))
+		return;
+
+        /*
+         * On P10 Mask PC system checkstop (bit 28). This is needed
+         * for HW570622. We keep processor recovery disabled via
+         * HID[5] and mask the checkstop that it can cause. CME does
+         * the recovery handling for us.
+         */
+        for_each_cpu(cpu) {
+                chip_id = cpu->chip_id;
+                core_id = pir_to_core_id(cpu->pir);
+
+                rc = xscom_write(chip_id,
+                                 XSCOM_ADDR_P10_EC(core_id, P10_CORE_FIRMASK_OR),
+                                 PPC_BIT(28));
+                if (rc)
+                        prerror("Error setting FIR MASK rc:%d on PIR:%x\n",
+                                rc, cpu->pir);
+        }
+}
+
+
 /* Called from head.S, thus no prototype. */
 void __noreturn __nomcount  main_cpu_entry(const void *fdt);
 
@@ -1170,6 +1203,9 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
 	if (proc_gen == proc_gen_p9 || proc_gen == proc_gen_p10)
 		cpu_set_ipi_enable(true);
 
+        /* Once all CPU are up apply this workaround */
+        mask_pc_system_xstop();
+
 	/* Add the /opal node to the device-tree */
 	add_opal_node();
 
diff --git a/include/xscom-p10-regs.h b/include/xscom-p10-regs.h
index 8096b2f91..6045152d2 100644
--- a/include/xscom-p10-regs.h
+++ b/include/xscom-p10-regs.h
@@ -4,6 +4,8 @@
 /* Core FIR (Fault Isolation Register) */
 #define P10_CORE_FIR		0x440
 
+#define P10_CORE_FIRMASK_OR	0x445
+
 /* Core WOF (Whose On First) */
 #define P10_CORE_WOF		0x448
 
-- 
2.31.1



More information about the Skiboot mailing list