[Skiboot] [PATCH v6 01/11] Add basic P9 fused core support
Vaidyanathan Srinivasan
svaidy at linux.ibm.com
Wed Aug 5 03:32:13 AEST 2020
From: Ryan Grimm <grimm at linux.vnet.ibm.com>
P9 cores can be configured into fused core mode where two core chiplets
function as an 8-threaded, single core. So, bump four to eight in boot_entry
when in fused core mode and cpu_thread_count in init_boot_cpu.
The HID, AMOR, TSCR, RPR require the first active thread on that core chiplet
to load the copy for that core chiplet. So, send thread 1 of a fused core to
init_shared_sprs in boot_entry.
The code checks for fused core mode in the core thead state register and puts a
field in struct cpu_thread. This flag is checked when updating the HID and in
XIVE code when setting the special bar.
For XSCOM, the core ID is the non-fused EX. So, create macros to arrange the
bits. It's fairly verbose but somewhat readable.
This was tested on a P9 ZZ with 16 fused cores and ran HTX for over 24 hours.
Signed-off-by: Ryan Grimm <grimm at linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh at kernel.crashing.org>
Signed-off-by: Michael Neuling <mikey at neuling.org>
Signed-off-by: Vaidyanathan Srinivasan <svaidy at linux.ibm.com>
---
asm/head.S | 25 ++++++++++++++++++++++---
core/chip.c | 19 +++++++++++++------
core/cpu.c | 31 ++++++++++++++++++++++++++-----
hdata/test/hdata_to_dt.c | 9 ++++++++-
hw/xive.c | 2 +-
include/chip.h | 31 +++++++++++++++++++++++++++++++
include/cpu.h | 6 ++++++
include/xscom.h | 3 +++
8 files changed, 110 insertions(+), 16 deletions(-)
diff --git a/asm/head.S b/asm/head.S
index 3b41815c..0b81bb51 100644
--- a/asm/head.S
+++ b/asm/head.S
@@ -324,6 +324,7 @@ boot_offset:
* r28 : PVR
* r27 : DTB pointer (or NULL)
* r26 : PIR thread mask
+ * r25 : P9 fused core flag
*/
.global boot_entry
boot_entry:
@@ -338,13 +339,22 @@ boot_entry:
cmpwi cr0,%r3,PVR_TYPE_P8NVL
beq 2f
cmpwi cr0,%r3,PVR_TYPE_P9
- beq 1f
+ beq 3f
cmpwi cr0,%r3,PVR_TYPE_P9P
- beq 1f
+ beq 3f
attn /* Unsupported CPU type... what do we do ? */
b . /* loop here, just in case attn is disabled */
- /* P8 -> 8 threads */
+ /* Check for fused core and set flag */
+3:
+ li %r3, 0x1e0
+ mtspr SPR_SPRC, %r3
+ mfspr %r3, SPR_SPRD
+ andi. %r25, %r3, 1
+ beq 1f
+
+ /* P8 or P9 fused -> 8 threads */
+
2: li %r26,7
/* Get our reloc offset into r30 */
@@ -370,6 +380,15 @@ boot_entry:
#endif
mtmsrd %r3,0
+ /* If fused, t1 is primary chiplet and must init shared sprs */
+ andi. %r3,%r25,1
+ beq not_fused
+
+ mfspr %r31,SPR_PIR
+ andi. %r3,%r31,1
+ bnel init_shared_sprs
+
+not_fused:
/* Check our PIR, avoid threads */
mfspr %r31,SPR_PIR
and. %r0,%r31,%r26
diff --git a/core/chip.c b/core/chip.c
index 191432d2..5c3276a4 100644
--- a/core/chip.c
+++ b/core/chip.c
@@ -6,6 +6,7 @@
#include <console.h>
#include <device.h>
#include <timebase.h>
+#include <cpu.h>
static struct proc_chip *chips[MAX_CHIPS];
enum proc_chip_quirks proc_chip_quirks;
@@ -22,9 +23,12 @@ uint32_t pir_to_chip_id(uint32_t pir)
uint32_t pir_to_core_id(uint32_t pir)
{
- if (proc_gen == proc_gen_p9)
- return P9_PIR2COREID(pir);
- else if (proc_gen == proc_gen_p8)
+ if (proc_gen == proc_gen_p9) {
+ if (this_cpu()->is_fused_core)
+ return P9_PIRFUSED2NORMALCOREID(pir);
+ else
+ return P9_PIR2COREID(pir);
+ } else if (proc_gen == proc_gen_p8)
return P8_PIR2COREID(pir);
else
assert(false);
@@ -32,9 +36,12 @@ uint32_t pir_to_core_id(uint32_t pir)
uint32_t pir_to_thread_id(uint32_t pir)
{
- if (proc_gen == proc_gen_p9)
- return P9_PIR2THREADID(pir);
- else if (proc_gen == proc_gen_p8)
+ if (proc_gen == proc_gen_p9) {
+ if (this_cpu()->is_fused_core)
+ return P9_PIR2FUSEDTHREADID(pir);
+ else
+ return P9_PIR2THREADID(pir);
+ } else if (proc_gen == proc_gen_p8)
return P8_PIR2THREADID(pir);
else
assert(false);
diff --git a/core/cpu.c b/core/cpu.c
index 73777dd4..158f73e2 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -932,6 +932,7 @@ static void init_cpu_thread(struct cpu_thread *t,
#ifdef STACK_CHECK_ENABLED
t->stack_bot_mark = LONG_MAX;
#endif
+ t->is_fused_core = is_fused_core(mfspr(SPR_PVR));
assert(pir == container_of(t, struct cpu_stack, cpu) - cpu_stacks);
}
@@ -1016,14 +1017,16 @@ void init_boot_cpu(void)
" (max %d threads/core)\n", cpu_thread_count);
break;
case proc_gen_p9:
- cpu_thread_count = 4;
+ if (is_fused_core(pvr))
+ cpu_thread_count = 8;
+ else
+ cpu_thread_count = 4;
prlog(PR_INFO, "CPU: P9 generation processor"
" (max %d threads/core)\n", cpu_thread_count);
break;
default:
prerror("CPU: Unknown PVR, assuming 1 thread\n");
cpu_thread_count = 1;
- cpu_max_pir = mfspr(SPR_PIR);
}
if (is_power9n(pvr) && (PVR_VERS_MAJ(pvr) == 1)) {
@@ -1151,7 +1154,7 @@ void init_all_cpus(void)
/* Iterate all CPUs in the device-tree */
dt_for_each_child(cpus, cpu) {
- unsigned int pir, server_no, chip_id;
+ unsigned int pir, server_no, chip_id, threads;
enum cpu_thread_state state;
const struct dt_property *p;
struct cpu_thread *t, *pt;
@@ -1181,6 +1184,14 @@ void init_all_cpus(void)
prlog(PR_INFO, "CPU: CPU from DT PIR=0x%04x Server#=0x%x"
" State=%d\n", pir, server_no, state);
+ /* Check max PIR */
+ if (cpu_max_pir < (pir + cpu_thread_count - 1)) {
+ prlog(PR_WARNING, "CPU: CPU potentially out of range"
+ "PIR=0x%04x MAX=0x%04x !\n",
+ pir, cpu_max_pir);
+ continue;
+ }
+
/* Setup thread 0 */
assert(pir <= cpu_max_pir);
t = pt = &cpu_stacks[pir].cpu;
@@ -1206,11 +1217,21 @@ void init_all_cpus(void)
/* Add the decrementer width property */
dt_add_property_cells(cpu, "ibm,dec-bits", dec_bits);
+ if (t->is_fused_core)
+ dt_add_property(t->node, "ibm,fused-core", NULL, 0);
+
/* Iterate threads */
p = dt_find_property(cpu, "ibm,ppc-interrupt-server#s");
if (!p)
continue;
- for (thread = 1; thread < (p->len / 4); thread++) {
+ threads = p->len / 4;
+ if (threads > cpu_thread_count) {
+ prlog(PR_WARNING, "CPU: Threads out of range for PIR 0x%04x"
+ " threads=%d max=%d\n",
+ pir, threads, cpu_thread_count);
+ threads = cpu_thread_count;
+ }
+ for (thread = 1; thread < threads; thread++) {
prlog(PR_TRACE, "CPU: secondary thread %d found\n",
thread);
t = &cpu_stacks[pir + thread].cpu;
@@ -1396,7 +1417,7 @@ static int64_t cpu_change_all_hid0(struct hid0_change_req *req)
assert(jobs);
for_each_available_cpu(cpu) {
- if (!cpu_is_thread0(cpu))
+ if (!cpu_is_thread0(cpu) && !cpu_is_core_chiplet_primary(cpu))
continue;
if (cpu == this_cpu())
continue;
diff --git a/hdata/test/hdata_to_dt.c b/hdata/test/hdata_to_dt.c
index 49357cdf..90d83f93 100644
--- a/hdata/test/hdata_to_dt.c
+++ b/hdata/test/hdata_to_dt.c
@@ -38,7 +38,11 @@ struct spira_ntuple;
static void *ntuple_addr(const struct spira_ntuple *n);
/* Stuff which core expects. */
-#define __this_cpu ((struct cpu_thread *)NULL)
+struct cpu_thread *my_fake_cpu;
+static struct cpu_thread *this_cpu(void)
+{
+ return my_fake_cpu;
+}
unsigned long tb_hz = 512000000;
@@ -74,6 +78,7 @@ unsigned long tb_hz = 512000000;
struct cpu_thread {
uint32_t pir;
uint32_t chip_id;
+ bool is_fused_core;
};
struct cpu_job *__cpu_queue_job(struct cpu_thread *cpu,
const char *name,
@@ -95,6 +100,8 @@ static inline struct cpu_job *cpu_queue_job(struct cpu_thread *cpu,
struct cpu_thread __boot_cpu, *boot_cpu = &__boot_cpu;
static unsigned long fake_pvr = PVR_P8;
+unsigned int cpu_thread_count = 8;
+
static inline unsigned long mfspr(unsigned int spr)
{
assert(spr == SPR_PVR);
diff --git a/hw/xive.c b/hw/xive.c
index 8d6095c0..626ec182 100644
--- a/hw/xive.c
+++ b/hw/xive.c
@@ -3074,7 +3074,7 @@ static void xive_init_cpu(struct cpu_thread *c)
* of a pair is present we just do the setup for each of them, which
* is harmless.
*/
- if (cpu_is_thread0(c))
+ if (cpu_is_thread0(c) || cpu_is_core_chiplet_primary(c))
xive_configure_ex_special_bar(x, c);
/* Initialize the state structure */
diff --git a/include/chip.h b/include/chip.h
index b79b63ec..38fafcf4 100644
--- a/include/chip.h
+++ b/include/chip.h
@@ -56,6 +56,26 @@
* thus we have a 6-bit core number.
*
* Note: XIVE Only supports 4-bit chip numbers ...
+ *
+ * Upper PIR Bits
+ * --------------
+ *
+ * Normal-Core Mode:
+ * 57:61 CoreID
+ * 62:63 ThreadID
+ *
+ * Fused-Core Mode:
+ * 57:59 FusedQuadID
+ * 60 FusedCoreID
+ * 61:63 FusedThreadID
+ *
+ * FusedCoreID 0 contains normal-core chiplet 0 and 1
+ * FusedCoreID 1 contains normal-core chiplet 2 and 3
+ *
+ * Fused cores have interleaved threads:
+ * core chiplet 0/2 = t0, t2, t4, t6
+ * core chiplet 1/3 = t1, t3, t5, t7
+ *
*/
#define P9_PIR2GCID(pir) (((pir) >> 8) & 0x7f)
@@ -67,6 +87,17 @@
#define P9_GCID2CHIPID(gcid) ((gcid) & 0x7)
+#define P9_PIR2FUSEDQUADID(pir) (((pir) >> 4) & 0x7)
+
+#define P9_PIR2FUSEDCOREID(pir) (((pir) >> 3) & 0x1)
+
+#define P9_PIR2FUSEDTHREADID(pir) ((pir) & 0x7)
+
+#define P9_PIRFUSED2NORMALCOREID(pir) \
+ (P9_PIR2FUSEDQUADID(pir) << 2) | \
+ (P9_PIR2FUSEDCOREID(pir) << 1) | \
+ (P9_PIR2FUSEDTHREADID(pir) & 1)
+
/* P9 specific ones mostly used by XIVE */
#define P9_PIR2LOCALCPU(pir) ((pir) & 0xff)
#define P9_PIRFROMLOCALCPU(chip, cpu) (((chip) << 8) | (cpu))
diff --git a/include/cpu.h b/include/cpu.h
index c90b961f..1863d6ad 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -42,6 +42,7 @@ struct cpu_thread {
uint32_t server_no;
uint32_t chip_id;
bool is_secondary;
+ bool is_fused_core;
struct cpu_thread *primary;
enum cpu_thread_state state;
struct dt_node *node;
@@ -244,6 +245,11 @@ static inline bool cpu_is_thread0(struct cpu_thread *cpu)
return cpu->primary == cpu;
}
+static inline bool cpu_is_core_chiplet_primary(struct cpu_thread *cpu)
+{
+ return cpu->is_fused_core & (cpu_get_thread_index(cpu) == 1);
+}
+
static inline bool cpu_is_sibling(struct cpu_thread *cpu1,
struct cpu_thread *cpu2)
{
diff --git a/include/xscom.h b/include/xscom.h
index bd8bb89a..db6d3fcd 100644
--- a/include/xscom.h
+++ b/include/xscom.h
@@ -110,6 +110,9 @@
/*
* Additional useful definitions for P9
+ *
+ * Note: In all of these, the core numbering is the
+ * *normal* (small) core number.
*/
/*
--
2.26.2
More information about the Skiboot
mailing list