[Skiboot] [PATCH v6 01/11] Add basic P9 fused core support

Vaidyanathan Srinivasan svaidy at linux.ibm.com
Wed Aug 5 03:32:13 AEST 2020


From: Ryan Grimm <grimm at linux.vnet.ibm.com>

P9 cores can be configured into fused core mode where two core chiplets
function as an 8-threaded, single core.  So, bump four to eight in boot_entry
when in fused core mode and cpu_thread_count in init_boot_cpu.

The HID, AMOR, TSCR, RPR require the first active thread on that core chiplet
to load the copy for that core chiplet.  So, send thread 1 of a fused core to
init_shared_sprs in boot_entry.

The code checks for fused core mode in the core thead state register and puts a
field in struct cpu_thread.  This flag is checked when updating the HID and in
XIVE code when setting the special bar.

For XSCOM, the core ID is the non-fused EX.  So, create macros to arrange the
bits.  It's fairly verbose but somewhat readable.

This was tested on a P9 ZZ with 16 fused cores and ran HTX for over 24 hours.

Signed-off-by: Ryan Grimm <grimm at linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh at kernel.crashing.org>
Signed-off-by: Michael Neuling <mikey at neuling.org>
Signed-off-by: Vaidyanathan Srinivasan <svaidy at linux.ibm.com>
---
 asm/head.S               | 25 ++++++++++++++++++++++---
 core/chip.c              | 19 +++++++++++++------
 core/cpu.c               | 31 ++++++++++++++++++++++++++-----
 hdata/test/hdata_to_dt.c |  9 ++++++++-
 hw/xive.c                |  2 +-
 include/chip.h           | 31 +++++++++++++++++++++++++++++++
 include/cpu.h            |  6 ++++++
 include/xscom.h          |  3 +++
 8 files changed, 110 insertions(+), 16 deletions(-)

diff --git a/asm/head.S b/asm/head.S
index 3b41815c..0b81bb51 100644
--- a/asm/head.S
+++ b/asm/head.S
@@ -324,6 +324,7 @@ boot_offset:
  *   r28 :  PVR
  *   r27 :  DTB pointer (or NULL)
  *   r26 :  PIR thread mask
+ *   r25 :  P9 fused core flag
  */
 .global boot_entry
 boot_entry:
@@ -338,13 +339,22 @@ boot_entry:
 	cmpwi	cr0,%r3,PVR_TYPE_P8NVL
 	beq	2f
 	cmpwi	cr0,%r3,PVR_TYPE_P9
-	beq 	1f
+	beq 	3f
 	cmpwi	cr0,%r3,PVR_TYPE_P9P
-	beq 	1f
+	beq 	3f
 	attn		/* Unsupported CPU type... what do we do ? */
 	b 	.	/* loop here, just in case attn is disabled */
 
-	/* P8 -> 8 threads */
+	/* Check for fused core and set flag */
+3:
+	li	%r3, 0x1e0
+	mtspr   SPR_SPRC, %r3
+	mfspr	%r3, SPR_SPRD
+	andi.	%r25, %r3, 1
+	beq 1f
+
+	/* P8 or P9 fused -> 8 threads */
+
 2:	li	%r26,7
 
 	/* Get our reloc offset into r30 */
@@ -370,6 +380,15 @@ boot_entry:
 #endif
 	mtmsrd	%r3,0
 
+	/* If fused, t1 is primary chiplet and must init shared sprs */
+	andi.	%r3,%r25,1
+	beq	not_fused
+
+	mfspr	%r31,SPR_PIR
+	andi.	%r3,%r31,1
+	bnel	init_shared_sprs
+
+not_fused:
 	/* Check our PIR, avoid threads */
 	mfspr	%r31,SPR_PIR
 	and.	%r0,%r31,%r26
diff --git a/core/chip.c b/core/chip.c
index 191432d2..5c3276a4 100644
--- a/core/chip.c
+++ b/core/chip.c
@@ -6,6 +6,7 @@
 #include <console.h>
 #include <device.h>
 #include <timebase.h>
+#include <cpu.h>
 
 static struct proc_chip *chips[MAX_CHIPS];
 enum proc_chip_quirks proc_chip_quirks;
@@ -22,9 +23,12 @@ uint32_t pir_to_chip_id(uint32_t pir)
 
 uint32_t pir_to_core_id(uint32_t pir)
 {
-	if (proc_gen == proc_gen_p9)
-		return P9_PIR2COREID(pir);
-	else if (proc_gen == proc_gen_p8)
+	if (proc_gen == proc_gen_p9) {
+		if (this_cpu()->is_fused_core)
+			return P9_PIRFUSED2NORMALCOREID(pir);
+		else
+			return P9_PIR2COREID(pir);
+	} else if (proc_gen == proc_gen_p8)
 		return P8_PIR2COREID(pir);
 	else
 		assert(false);
@@ -32,9 +36,12 @@ uint32_t pir_to_core_id(uint32_t pir)
 
 uint32_t pir_to_thread_id(uint32_t pir)
 {
-	if (proc_gen == proc_gen_p9)
-		return P9_PIR2THREADID(pir);
-	else if (proc_gen == proc_gen_p8)
+	if (proc_gen == proc_gen_p9) {
+		if (this_cpu()->is_fused_core)
+			return P9_PIR2FUSEDTHREADID(pir);
+		else
+			return P9_PIR2THREADID(pir);
+	} else if (proc_gen == proc_gen_p8)
 		return P8_PIR2THREADID(pir);
 	else
 		assert(false);
diff --git a/core/cpu.c b/core/cpu.c
index 73777dd4..158f73e2 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -932,6 +932,7 @@ static void init_cpu_thread(struct cpu_thread *t,
 #ifdef STACK_CHECK_ENABLED
 	t->stack_bot_mark = LONG_MAX;
 #endif
+	t->is_fused_core = is_fused_core(mfspr(SPR_PVR));
 	assert(pir == container_of(t, struct cpu_stack, cpu) - cpu_stacks);
 }
 
@@ -1016,14 +1017,16 @@ void init_boot_cpu(void)
 		      " (max %d threads/core)\n", cpu_thread_count);
 		break;
 	case proc_gen_p9:
-		cpu_thread_count = 4;
+		if (is_fused_core(pvr))
+			cpu_thread_count = 8;
+		else
+			cpu_thread_count = 4;
 		prlog(PR_INFO, "CPU: P9 generation processor"
 		      " (max %d threads/core)\n", cpu_thread_count);
 		break;
 	default:
 		prerror("CPU: Unknown PVR, assuming 1 thread\n");
 		cpu_thread_count = 1;
-		cpu_max_pir = mfspr(SPR_PIR);
 	}
 
 	if (is_power9n(pvr) && (PVR_VERS_MAJ(pvr) == 1)) {
@@ -1151,7 +1154,7 @@ void init_all_cpus(void)
 
 	/* Iterate all CPUs in the device-tree */
 	dt_for_each_child(cpus, cpu) {
-		unsigned int pir, server_no, chip_id;
+		unsigned int pir, server_no, chip_id, threads;
 		enum cpu_thread_state state;
 		const struct dt_property *p;
 		struct cpu_thread *t, *pt;
@@ -1181,6 +1184,14 @@ void init_all_cpus(void)
 		prlog(PR_INFO, "CPU: CPU from DT PIR=0x%04x Server#=0x%x"
 		      " State=%d\n", pir, server_no, state);
 
+		/* Check max PIR */
+		if (cpu_max_pir < (pir + cpu_thread_count - 1)) {
+			prlog(PR_WARNING, "CPU: CPU potentially out of range"
+			      "PIR=0x%04x MAX=0x%04x !\n",
+			      pir, cpu_max_pir);
+			continue;
+		}
+
 		/* Setup thread 0 */
 		assert(pir <= cpu_max_pir);
 		t = pt = &cpu_stacks[pir].cpu;
@@ -1206,11 +1217,21 @@ void init_all_cpus(void)
 		/* Add the decrementer width property */
 		dt_add_property_cells(cpu, "ibm,dec-bits", dec_bits);
 
+		if (t->is_fused_core)
+			dt_add_property(t->node, "ibm,fused-core", NULL, 0);
+
 		/* Iterate threads */
 		p = dt_find_property(cpu, "ibm,ppc-interrupt-server#s");
 		if (!p)
 			continue;
-		for (thread = 1; thread < (p->len / 4); thread++) {
+		threads = p->len / 4;
+		if (threads > cpu_thread_count) {
+			prlog(PR_WARNING, "CPU: Threads out of range for PIR 0x%04x"
+			      " threads=%d max=%d\n",
+			      pir, threads, cpu_thread_count);
+			threads = cpu_thread_count;
+		}
+		for (thread = 1; thread < threads; thread++) {
 			prlog(PR_TRACE, "CPU:   secondary thread %d found\n",
 			      thread);
 			t = &cpu_stacks[pir + thread].cpu;
@@ -1396,7 +1417,7 @@ static int64_t cpu_change_all_hid0(struct hid0_change_req *req)
 	assert(jobs);
 
 	for_each_available_cpu(cpu) {
-		if (!cpu_is_thread0(cpu))
+		if (!cpu_is_thread0(cpu) && !cpu_is_core_chiplet_primary(cpu))
 			continue;
 		if (cpu == this_cpu())
 			continue;
diff --git a/hdata/test/hdata_to_dt.c b/hdata/test/hdata_to_dt.c
index 49357cdf..90d83f93 100644
--- a/hdata/test/hdata_to_dt.c
+++ b/hdata/test/hdata_to_dt.c
@@ -38,7 +38,11 @@ struct spira_ntuple;
 static void *ntuple_addr(const struct spira_ntuple *n);
 
 /* Stuff which core expects. */
-#define __this_cpu ((struct cpu_thread *)NULL)
+struct cpu_thread *my_fake_cpu;
+static struct cpu_thread *this_cpu(void)
+{
+	return my_fake_cpu;
+}
 
 unsigned long tb_hz = 512000000;
 
@@ -74,6 +78,7 @@ unsigned long tb_hz = 512000000;
 struct cpu_thread {
 	uint32_t			pir;
 	uint32_t			chip_id;
+	bool				is_fused_core;
 };
 struct cpu_job *__cpu_queue_job(struct cpu_thread *cpu,
 				const char *name,
@@ -95,6 +100,8 @@ static inline struct cpu_job *cpu_queue_job(struct cpu_thread *cpu,
 struct cpu_thread __boot_cpu, *boot_cpu = &__boot_cpu;
 static unsigned long fake_pvr = PVR_P8;
 
+unsigned int cpu_thread_count = 8;
+
 static inline unsigned long mfspr(unsigned int spr)
 {
 	assert(spr == SPR_PVR);
diff --git a/hw/xive.c b/hw/xive.c
index 8d6095c0..626ec182 100644
--- a/hw/xive.c
+++ b/hw/xive.c
@@ -3074,7 +3074,7 @@ static void xive_init_cpu(struct cpu_thread *c)
 	 * of a pair is present we just do the setup for each of them, which
 	 * is harmless.
 	 */
-	if (cpu_is_thread0(c))
+	if (cpu_is_thread0(c) || cpu_is_core_chiplet_primary(c))
 		xive_configure_ex_special_bar(x, c);
 
 	/* Initialize the state structure */
diff --git a/include/chip.h b/include/chip.h
index b79b63ec..38fafcf4 100644
--- a/include/chip.h
+++ b/include/chip.h
@@ -56,6 +56,26 @@
  * thus we have a 6-bit core number.
  *
  * Note: XIVE Only supports 4-bit chip numbers ...
+ *
+ * Upper PIR Bits
+ * --------------
+ *
+ * Normal-Core Mode:
+ * 57:61 CoreID
+ * 62:63 ThreadID
+ *
+ * Fused-Core Mode:
+ * 57:59 FusedQuadID
+ * 60    FusedCoreID
+ * 61:63 FusedThreadID
+ *
+ * FusedCoreID 0 contains normal-core chiplet 0 and 1
+ * FusedCoreID 1 contains normal-core chiplet 2 and 3
+ *
+ * Fused cores have interleaved threads:
+ * core chiplet 0/2 = t0, t2, t4, t6
+ * core chiplet 1/3 = t1, t3, t5, t7
+ *
  */
 #define P9_PIR2GCID(pir) (((pir) >> 8) & 0x7f)
 
@@ -67,6 +87,17 @@
 
 #define P9_GCID2CHIPID(gcid) ((gcid) & 0x7)
 
+#define P9_PIR2FUSEDQUADID(pir) (((pir) >> 4) & 0x7)
+
+#define P9_PIR2FUSEDCOREID(pir) (((pir) >> 3) & 0x1)
+
+#define P9_PIR2FUSEDTHREADID(pir) ((pir) & 0x7)
+
+#define P9_PIRFUSED2NORMALCOREID(pir) \
+	(P9_PIR2FUSEDQUADID(pir) << 2) | \
+	(P9_PIR2FUSEDCOREID(pir) << 1) | \
+	(P9_PIR2FUSEDTHREADID(pir) & 1)
+
 /* P9 specific ones mostly used by XIVE */
 #define P9_PIR2LOCALCPU(pir) ((pir) & 0xff)
 #define P9_PIRFROMLOCALCPU(chip, cpu)	(((chip) << 8) | (cpu))
diff --git a/include/cpu.h b/include/cpu.h
index c90b961f..1863d6ad 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -42,6 +42,7 @@ struct cpu_thread {
 	uint32_t			server_no;
 	uint32_t			chip_id;
 	bool				is_secondary;
+	bool				is_fused_core;
 	struct cpu_thread		*primary;
 	enum cpu_thread_state		state;
 	struct dt_node			*node;
@@ -244,6 +245,11 @@ static inline bool cpu_is_thread0(struct cpu_thread *cpu)
 	return cpu->primary == cpu;
 }
 
+static inline bool cpu_is_core_chiplet_primary(struct cpu_thread *cpu)
+{
+	return cpu->is_fused_core & (cpu_get_thread_index(cpu) == 1);
+}
+
 static inline bool cpu_is_sibling(struct cpu_thread *cpu1,
 				  struct cpu_thread *cpu2)
 {
diff --git a/include/xscom.h b/include/xscom.h
index bd8bb89a..db6d3fcd 100644
--- a/include/xscom.h
+++ b/include/xscom.h
@@ -110,6 +110,9 @@
 
 /*
  * Additional useful definitions for P9
+ *
+ * Note: In all of these, the core numbering is the
+ * *normal* (small) core number.
  */
 
 /*
-- 
2.26.2



More information about the Skiboot mailing list