[Skiboot] [PATCH v2 1/9] Add basic P9 fused core support

Michael Neuling mikey at neuling.org
Tue Mar 19 17:03:57 AEDT 2019


From: Ryan Grimm <grimm at linux.vnet.ibm.com>

P9 cores can be configured into fused core mode where two core chiplets
function as an 8-threaded, single core.  So, bump four to eight in boot_entry
when in fused core mode and cpu_thread_count in init_boot_cpu.

The HID, AMOR, TSCR, RPR require the first active thread on that core chiplet
to load the copy for that core chiplet.  So, send thread 1 of a fused core to
init_shared_sprs in boot_entry.

The code checks for fused core mode in the core thead state register and puts a
field in struct cpu_thread.  This flag is checked when updating the HID and in
XIVE code when setting the special bar.

For XSCOM, the core ID is the non-fused EX.  So, create macros to arrange the
bits.  It's fairly verbose but somewhat readable.

This was tested on a P9 ZZ with 16 fused cores and ran HTX for over 24 hours.

Signed-off-by: Ryan Grimm <grimm at linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh at kernel.crashing.org>
Signed-off-by: Michael Neuling <mikey at neuling.org>
---
 asm/head.S               | 24 +++++++++++++++++++++---
 core/chip.c              | 15 +++++++++++----
 core/cpu.c               | 39 ++++++++++++++++++++++++++++++++++-----
 core/fast-reboot.c       |  2 +-
 hdata/test/hdata_to_dt.c |  9 ++++++++-
 hw/xive.c                |  2 +-
 include/chip.h           | 31 +++++++++++++++++++++++++++++++
 include/cpu.h            |  6 ++++++
 include/xscom.h          |  3 +++
 9 files changed, 116 insertions(+), 15 deletions(-)

diff --git a/asm/head.S b/asm/head.S
index 0ed1acddd4..67d493ed3f 100644
--- a/asm/head.S
+++ b/asm/head.S
@@ -336,6 +336,7 @@ boot_offset:
  *   r28 :  PVR
  *   r27 :  DTB pointer (or NULL)
  *   r26 :  PIR thread mask
+ *   r25 :  P9 fused core flag
  */
 .global boot_entry
 boot_entry:
@@ -354,13 +355,21 @@ boot_entry:
 	cmpwi	cr0,%r3,PVR_TYPE_P8NVL
 	beq	2f
 	cmpwi	cr0,%r3,PVR_TYPE_P9
-	beq 	1f
+	beq 	3f
 	cmpwi	cr0,%r3,PVR_TYPE_P9P
-	beq 	1f
+	beq 	3f
 	attn		/* Unsupported CPU type... what do we do ? */
 	b 	.	/* loop here, just in case attn is disabled */
 
-	/* P8 -> 8 threads */
+	/* Check for fused core and set flag */
+3:
+	li	%r3, 0x1e0
+	mtspr   SPR_SPRC, %r3
+	mfspr	%r3, SPR_SPRD
+	andi.	%r25, %r3, 1
+	beq 1f
+
+	/* P8 or P9 fused -> 8 threads */
 2:	li	%r26,7
 
 	/* Get our reloc offset into r30 */
@@ -382,6 +391,15 @@ boot_entry:
 	LOAD_IMM64(%r3, (MSR_HV | MSR_SF))
 	mtmsrd	%r3,0
 
+	/* If fused, t1 is primary chiplet and must init shared sprs */
+	andi.	%r3,%r25,1
+	beq	not_fused
+
+	mfspr	%r31,SPR_PIR
+	andi.	%r3,%r31,1
+	bnel	init_shared_sprs
+
+not_fused:
 	/* Check our PIR, avoid threads */
 	mfspr	%r31,SPR_PIR
 	and.	%r0,%r31,%r26
diff --git a/core/chip.c b/core/chip.c
index 6526325342..2b9b6ef9e2 100644
--- a/core/chip.c
+++ b/core/chip.c
@@ -20,6 +20,7 @@
 #include <console.h>
 #include <device.h>
 #include <timebase.h>
+#include <cpu.h>
 
 static struct proc_chip *chips[MAX_CHIPS];
 enum proc_chip_quirks proc_chip_quirks;
@@ -37,7 +38,10 @@ uint32_t pir_to_chip_id(uint32_t pir)
 uint32_t pir_to_core_id(uint32_t pir)
 {
 	if (proc_gen == proc_gen_p9)
-		return P9_PIR2COREID(pir);
+		if (this_cpu()->is_fused_core)
+			return P9_PIRFUSED2NORMALCOREID(pir);
+		else
+			return P9_PIR2COREID(pir);
 	else if (proc_gen == proc_gen_p8)
 		return P8_PIR2COREID(pir);
 	else
@@ -46,9 +50,12 @@ uint32_t pir_to_core_id(uint32_t pir)
 
 uint32_t pir_to_thread_id(uint32_t pir)
 {
-	if (proc_gen == proc_gen_p9)
-		return P9_PIR2THREADID(pir);
-	else if (proc_gen == proc_gen_p8)
+	if (proc_gen == proc_gen_p9) {
+		if (this_cpu()->is_fused_core)
+			return P9_PIR2FUSEDTHREADID(pir);
+		else
+			return P9_PIR2THREADID(pir);
+	} else if (proc_gen == proc_gen_p8)
 		return P8_PIR2THREADID(pir);
 	else
 		return P7_PIR2THREADID(pir);
diff --git a/core/cpu.c b/core/cpu.c
index d9d4713372..1bcd2b6660 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -922,6 +922,14 @@ void cpu_disable_all_threads(struct cpu_thread *cpu)
 	/* XXX Do something to actually stop the core */
 }
 
+static int is_fused_core (void)
+{
+	unsigned int core_thread_state;
+	mtspr(SPR_SPRC, 0x00000000000001e0ULL);
+	core_thread_state = mfspr(SPR_SPRD);
+	return core_thread_state & PPC_BIT(63);
+}
+
 static void init_cpu_thread(struct cpu_thread *t,
 			    enum cpu_thread_state state,
 			    unsigned int pir)
@@ -941,6 +949,7 @@ static void init_cpu_thread(struct cpu_thread *t,
 #ifdef STACK_CHECK_ENABLED
 	t->stack_bot_mark = LONG_MAX;
 #endif
+	t->is_fused_core = is_fused_core();
 	assert(pir == container_of(t, struct cpu_stack, cpu) - cpu_stacks);
 }
 
@@ -1034,14 +1043,16 @@ void init_boot_cpu(void)
 		      " (max %d threads/core)\n", cpu_thread_count);
 		break;
 	case proc_gen_p9:
-		cpu_thread_count = 4;
+		if (is_fused_core())
+			cpu_thread_count = 8;
+		else
+			cpu_thread_count = 4;
 		prlog(PR_INFO, "CPU: P9 generation processor"
 		      " (max %d threads/core)\n", cpu_thread_count);
 		break;
 	default:
 		prerror("CPU: Unknown PVR, assuming 1 thread\n");
 		cpu_thread_count = 1;
-		cpu_max_pir = mfspr(SPR_PIR);
 	}
 
 	if (is_power9n(pvr) && (PVR_VERS_MAJ(pvr) == 1)) {
@@ -1169,7 +1180,7 @@ void init_all_cpus(void)
 
 	/* Iterate all CPUs in the device-tree */
 	dt_for_each_child(cpus, cpu) {
-		unsigned int pir, server_no, chip_id;
+		unsigned int pir, server_no, chip_id, threads;
 		enum cpu_thread_state state;
 		const struct dt_property *p;
 		struct cpu_thread *t, *pt;
@@ -1197,6 +1208,14 @@ void init_all_cpus(void)
 		prlog(PR_INFO, "CPU: CPU from DT PIR=0x%04x Server#=0x%x"
 		      " State=%d\n", pir, server_no, state);
 
+		/* Check max PIR */
+		if (cpu_max_pir < (pir + cpu_thread_count - 1)) {
+			prlog(PR_WARNING, "CPU: CPU potentially out of range"
+			      "PIR=0x%04x MAX=0x%04x !\n",
+			      pir, cpu_max_pir);
+			continue;
+		}
+
 		/* Setup thread 0 */
 		assert(pir <= cpu_max_pir);
 		t = pt = &cpu_stacks[pir].cpu;
@@ -1222,11 +1241,21 @@ void init_all_cpus(void)
 		/* Add the decrementer width property */
 		dt_add_property_cells(cpu, "ibm,dec-bits", dec_bits);
 
+		if (t->is_fused_core)
+			dt_add_property(t->node, "ibm,fused-core", NULL, 0);
+
 		/* Iterate threads */
 		p = dt_find_property(cpu, "ibm,ppc-interrupt-server#s");
 		if (!p)
 			continue;
-		for (thread = 1; thread < (p->len / 4); thread++) {
+		threads = p->len / 4;
+		if (threads > cpu_thread_count) {
+			prlog(PR_WARNING, "CPU: Threads out of range for PIR 0x%04x"
+			      " threads=%d max=%d\n",
+			      pir, threads, cpu_thread_count);
+			threads = cpu_thread_count;
+		}
+		for (thread = 1; thread < threads; thread++) {
 			prlog(PR_TRACE, "CPU:   secondary thread %d found\n",
 			      thread);
 			t = &cpu_stacks[pir + thread].cpu;
@@ -1412,7 +1441,7 @@ static int64_t cpu_change_all_hid0(struct hid0_change_req *req)
 	assert(jobs);
 
 	for_each_available_cpu(cpu) {
-		if (!cpu_is_thread0(cpu))
+		if (!cpu_is_thread0(cpu) && !cpu_is_core_chiplet_primary(cpu))
 			continue;
 		if (cpu == this_cpu())
 			continue;
diff --git a/core/fast-reboot.c b/core/fast-reboot.c
index 22160b65d2..be70c2271f 100644
--- a/core/fast-reboot.c
+++ b/core/fast-reboot.c
@@ -236,7 +236,7 @@ static void cleanup_cpu_state(void)
 	struct cpu_thread *cpu = this_cpu();
 
 	/* Per core cleanup */
-	if (cpu_is_thread0(cpu)) {
+	if (cpu_is_thread0(cpu) | cpu_is_core_chiplet_primary(cpu)) {
 		/* Shared SPRs whacked back to normal */
 
 		/* XXX Update the SLW copies ! Also dbl check HIDs etc... */
diff --git a/hdata/test/hdata_to_dt.c b/hdata/test/hdata_to_dt.c
index a5f152e86b..cddb1d435f 100644
--- a/hdata/test/hdata_to_dt.c
+++ b/hdata/test/hdata_to_dt.c
@@ -47,7 +47,11 @@ struct spira_ntuple;
 static void *ntuple_addr(const struct spira_ntuple *n);
 
 /* Stuff which core expects. */
-#define __this_cpu ((struct cpu_thread *)NULL)
+struct cpu_thread *my_fake_cpu;
+static struct cpu_thread *this_cpu(void)
+{
+	return my_fake_cpu;
+}
 
 unsigned long tb_hz = 512000000;
 
@@ -84,6 +88,7 @@ unsigned long tb_hz = 512000000;
 struct cpu_thread {
 	uint32_t			pir;
 	uint32_t			chip_id;
+	bool				is_fused_core;
 };
 struct cpu_job *__cpu_queue_job(struct cpu_thread *cpu,
 				const char *name,
@@ -105,6 +110,8 @@ static inline struct cpu_job *cpu_queue_job(struct cpu_thread *cpu,
 struct cpu_thread __boot_cpu, *boot_cpu = &__boot_cpu;
 static unsigned long fake_pvr = PVR_P7;
 
+unsigned int cpu_thread_count = 8;
+
 static inline unsigned long mfspr(unsigned int spr)
 {
 	assert(spr == SPR_PVR);
diff --git a/hw/xive.c b/hw/xive.c
index b863b634d1..c9f3f07dfb 100644
--- a/hw/xive.c
+++ b/hw/xive.c
@@ -3299,7 +3299,7 @@ static void xive_init_cpu(struct cpu_thread *c)
 	 * of a pair is present we just do the setup for each of them, which
 	 * is harmless.
 	 */
-	if (cpu_is_thread0(c))
+	if (cpu_is_thread0(c) || cpu_is_core_chiplet_primary(c))
 		xive_configure_ex_special_bar(x, c);
 
 	/* Initialize the state structure */
diff --git a/include/chip.h b/include/chip.h
index d6e7e355bf..bcb080b6bf 100644
--- a/include/chip.h
+++ b/include/chip.h
@@ -91,6 +91,26 @@
  * thus we have a 6-bit core number.
  *
  * Note: XIVE Only supports 4-bit chip numbers ...
+ *
+ * Upper PIR Bits
+ * --------------
+ *
+ * Normal-Core Mode:
+ * 57:61 CoreID
+ * 62:62 ThreadID
+ *
+ * Fused-Core Mode:
+ * 57:59 FusedQuadID
+ * 60    FusedCoreID
+ * 61:63 FusedThreadID
+ *
+ * FusedCoreID 0 contains normal-core chiplet 0 and 1
+ * FusedCoreID 1 contains normal-core chiplet 2 and 3
+ *
+ * Fused cores have interleaved threads:
+ * core chiplet 0/2 = t0, t2, t4, t6
+ * core chiplet 1/3 = t1, t3, t5, t7
+ *
  */
 #define P9_PIR2GCID(pir) (((pir) >> 8) & 0x7f)
 
@@ -102,6 +122,17 @@
 
 #define P9_GCID2CHIPID(gcid) ((gcid) & 0x7)
 
+#define P9_PIR2FUSEDQUADID(pir) (((pir) >> 4) & 0x7)
+
+#define P9_PIR2FUSEDCOREID(pir) (((pir) >> 3) & 0x1)
+
+#define P9_PIR2FUSEDTHREADID(pir) ((pir) & 0x7)
+
+#define P9_PIRFUSED2NORMALCOREID(pir) \
+	(P9_PIR2FUSEDQUADID(pir) << 2) | \
+	(P9_PIR2FUSEDCOREID(pir) << 1) | \
+	(P9_PIR2FUSEDTHREADID(pir) & 1)
+
 /* P9 specific ones mostly used by XIVE */
 #define P9_PIR2LOCALCPU(pir) ((pir) & 0xff)
 #define P9_PIRFROMLOCALCPU(chip, cpu)	(((chip) << 8) | (cpu))
diff --git a/include/cpu.h b/include/cpu.h
index 06d5c0d112..009ae52c76 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -54,6 +54,7 @@ struct cpu_thread {
 	uint32_t			server_no;
 	uint32_t			chip_id;
 	bool				is_secondary;
+	bool				is_fused_core;
 	struct cpu_thread		*primary;
 	enum cpu_thread_state		state;
 	struct dt_node			*node;
@@ -251,6 +252,11 @@ static inline bool cpu_is_thread0(struct cpu_thread *cpu)
 	return cpu->primary == cpu;
 }
 
+static inline bool cpu_is_core_chiplet_primary(struct cpu_thread *cpu)
+{
+	return cpu->is_fused_core & (cpu_get_thread_index(cpu) == 1);
+}
+
 static inline bool cpu_is_sibling(struct cpu_thread *cpu1,
 				  struct cpu_thread *cpu2)
 {
diff --git a/include/xscom.h b/include/xscom.h
index 98532240b1..4e6ce92dd0 100644
--- a/include/xscom.h
+++ b/include/xscom.h
@@ -123,6 +123,9 @@
 
 /*
  * Additional useful definitions for P9
+ *
+ * Note: In all of these, the core numbering is the *small* core
+ *       number.
  */
 
 /* An EQ is a quad (also named an EP) */
-- 
2.20.1



More information about the Skiboot mailing list