[Skiboot] [PATCH v2 1/9] Add basic P9 fused core support
Michael Neuling
mikey at neuling.org
Tue Mar 19 17:03:57 AEDT 2019
From: Ryan Grimm <grimm at linux.vnet.ibm.com>
P9 cores can be configured into fused core mode where two core chiplets
function as an 8-threaded, single core. So, bump four to eight in boot_entry
when in fused core mode and cpu_thread_count in init_boot_cpu.
The HID, AMOR, TSCR, RPR require the first active thread on that core chiplet
to load the copy for that core chiplet. So, send thread 1 of a fused core to
init_shared_sprs in boot_entry.
The code checks for fused core mode in the core thead state register and puts a
field in struct cpu_thread. This flag is checked when updating the HID and in
XIVE code when setting the special bar.
For XSCOM, the core ID is the non-fused EX. So, create macros to arrange the
bits. It's fairly verbose but somewhat readable.
This was tested on a P9 ZZ with 16 fused cores and ran HTX for over 24 hours.
Signed-off-by: Ryan Grimm <grimm at linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh at kernel.crashing.org>
Signed-off-by: Michael Neuling <mikey at neuling.org>
---
asm/head.S | 24 +++++++++++++++++++++---
core/chip.c | 15 +++++++++++----
core/cpu.c | 39 ++++++++++++++++++++++++++++++++++-----
core/fast-reboot.c | 2 +-
hdata/test/hdata_to_dt.c | 9 ++++++++-
hw/xive.c | 2 +-
include/chip.h | 31 +++++++++++++++++++++++++++++++
include/cpu.h | 6 ++++++
include/xscom.h | 3 +++
9 files changed, 116 insertions(+), 15 deletions(-)
diff --git a/asm/head.S b/asm/head.S
index 0ed1acddd4..67d493ed3f 100644
--- a/asm/head.S
+++ b/asm/head.S
@@ -336,6 +336,7 @@ boot_offset:
* r28 : PVR
* r27 : DTB pointer (or NULL)
* r26 : PIR thread mask
+ * r25 : P9 fused core flag
*/
.global boot_entry
boot_entry:
@@ -354,13 +355,21 @@ boot_entry:
cmpwi cr0,%r3,PVR_TYPE_P8NVL
beq 2f
cmpwi cr0,%r3,PVR_TYPE_P9
- beq 1f
+ beq 3f
cmpwi cr0,%r3,PVR_TYPE_P9P
- beq 1f
+ beq 3f
attn /* Unsupported CPU type... what do we do ? */
b . /* loop here, just in case attn is disabled */
- /* P8 -> 8 threads */
+ /* Check for fused core and set flag */
+3:
+ li %r3, 0x1e0
+ mtspr SPR_SPRC, %r3
+ mfspr %r3, SPR_SPRD
+ andi. %r25, %r3, 1
+ beq 1f
+
+ /* P8 or P9 fused -> 8 threads */
2: li %r26,7
/* Get our reloc offset into r30 */
@@ -382,6 +391,15 @@ boot_entry:
LOAD_IMM64(%r3, (MSR_HV | MSR_SF))
mtmsrd %r3,0
+ /* If fused, t1 is primary chiplet and must init shared sprs */
+ andi. %r3,%r25,1
+ beq not_fused
+
+ mfspr %r31,SPR_PIR
+ andi. %r3,%r31,1
+ bnel init_shared_sprs
+
+not_fused:
/* Check our PIR, avoid threads */
mfspr %r31,SPR_PIR
and. %r0,%r31,%r26
diff --git a/core/chip.c b/core/chip.c
index 6526325342..2b9b6ef9e2 100644
--- a/core/chip.c
+++ b/core/chip.c
@@ -20,6 +20,7 @@
#include <console.h>
#include <device.h>
#include <timebase.h>
+#include <cpu.h>
static struct proc_chip *chips[MAX_CHIPS];
enum proc_chip_quirks proc_chip_quirks;
@@ -37,7 +38,10 @@ uint32_t pir_to_chip_id(uint32_t pir)
uint32_t pir_to_core_id(uint32_t pir)
{
if (proc_gen == proc_gen_p9)
- return P9_PIR2COREID(pir);
+ if (this_cpu()->is_fused_core)
+ return P9_PIRFUSED2NORMALCOREID(pir);
+ else
+ return P9_PIR2COREID(pir);
else if (proc_gen == proc_gen_p8)
return P8_PIR2COREID(pir);
else
@@ -46,9 +50,12 @@ uint32_t pir_to_core_id(uint32_t pir)
uint32_t pir_to_thread_id(uint32_t pir)
{
- if (proc_gen == proc_gen_p9)
- return P9_PIR2THREADID(pir);
- else if (proc_gen == proc_gen_p8)
+ if (proc_gen == proc_gen_p9) {
+ if (this_cpu()->is_fused_core)
+ return P9_PIR2FUSEDTHREADID(pir);
+ else
+ return P9_PIR2THREADID(pir);
+ } else if (proc_gen == proc_gen_p8)
return P8_PIR2THREADID(pir);
else
return P7_PIR2THREADID(pir);
diff --git a/core/cpu.c b/core/cpu.c
index d9d4713372..1bcd2b6660 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -922,6 +922,14 @@ void cpu_disable_all_threads(struct cpu_thread *cpu)
/* XXX Do something to actually stop the core */
}
+static int is_fused_core (void)
+{
+ unsigned int core_thread_state;
+ mtspr(SPR_SPRC, 0x00000000000001e0ULL);
+ core_thread_state = mfspr(SPR_SPRD);
+ return core_thread_state & PPC_BIT(63);
+}
+
static void init_cpu_thread(struct cpu_thread *t,
enum cpu_thread_state state,
unsigned int pir)
@@ -941,6 +949,7 @@ static void init_cpu_thread(struct cpu_thread *t,
#ifdef STACK_CHECK_ENABLED
t->stack_bot_mark = LONG_MAX;
#endif
+ t->is_fused_core = is_fused_core();
assert(pir == container_of(t, struct cpu_stack, cpu) - cpu_stacks);
}
@@ -1034,14 +1043,16 @@ void init_boot_cpu(void)
" (max %d threads/core)\n", cpu_thread_count);
break;
case proc_gen_p9:
- cpu_thread_count = 4;
+ if (is_fused_core())
+ cpu_thread_count = 8;
+ else
+ cpu_thread_count = 4;
prlog(PR_INFO, "CPU: P9 generation processor"
" (max %d threads/core)\n", cpu_thread_count);
break;
default:
prerror("CPU: Unknown PVR, assuming 1 thread\n");
cpu_thread_count = 1;
- cpu_max_pir = mfspr(SPR_PIR);
}
if (is_power9n(pvr) && (PVR_VERS_MAJ(pvr) == 1)) {
@@ -1169,7 +1180,7 @@ void init_all_cpus(void)
/* Iterate all CPUs in the device-tree */
dt_for_each_child(cpus, cpu) {
- unsigned int pir, server_no, chip_id;
+ unsigned int pir, server_no, chip_id, threads;
enum cpu_thread_state state;
const struct dt_property *p;
struct cpu_thread *t, *pt;
@@ -1197,6 +1208,14 @@ void init_all_cpus(void)
prlog(PR_INFO, "CPU: CPU from DT PIR=0x%04x Server#=0x%x"
" State=%d\n", pir, server_no, state);
+ /* Check max PIR */
+ if (cpu_max_pir < (pir + cpu_thread_count - 1)) {
+ prlog(PR_WARNING, "CPU: CPU potentially out of range"
+ "PIR=0x%04x MAX=0x%04x !\n",
+ pir, cpu_max_pir);
+ continue;
+ }
+
/* Setup thread 0 */
assert(pir <= cpu_max_pir);
t = pt = &cpu_stacks[pir].cpu;
@@ -1222,11 +1241,21 @@ void init_all_cpus(void)
/* Add the decrementer width property */
dt_add_property_cells(cpu, "ibm,dec-bits", dec_bits);
+ if (t->is_fused_core)
+ dt_add_property(t->node, "ibm,fused-core", NULL, 0);
+
/* Iterate threads */
p = dt_find_property(cpu, "ibm,ppc-interrupt-server#s");
if (!p)
continue;
- for (thread = 1; thread < (p->len / 4); thread++) {
+ threads = p->len / 4;
+ if (threads > cpu_thread_count) {
+ prlog(PR_WARNING, "CPU: Threads out of range for PIR 0x%04x"
+ " threads=%d max=%d\n",
+ pir, threads, cpu_thread_count);
+ threads = cpu_thread_count;
+ }
+ for (thread = 1; thread < threads; thread++) {
prlog(PR_TRACE, "CPU: secondary thread %d found\n",
thread);
t = &cpu_stacks[pir + thread].cpu;
@@ -1412,7 +1441,7 @@ static int64_t cpu_change_all_hid0(struct hid0_change_req *req)
assert(jobs);
for_each_available_cpu(cpu) {
- if (!cpu_is_thread0(cpu))
+ if (!cpu_is_thread0(cpu) && !cpu_is_core_chiplet_primary(cpu))
continue;
if (cpu == this_cpu())
continue;
diff --git a/core/fast-reboot.c b/core/fast-reboot.c
index 22160b65d2..be70c2271f 100644
--- a/core/fast-reboot.c
+++ b/core/fast-reboot.c
@@ -236,7 +236,7 @@ static void cleanup_cpu_state(void)
struct cpu_thread *cpu = this_cpu();
/* Per core cleanup */
- if (cpu_is_thread0(cpu)) {
+ if (cpu_is_thread0(cpu) | cpu_is_core_chiplet_primary(cpu)) {
/* Shared SPRs whacked back to normal */
/* XXX Update the SLW copies ! Also dbl check HIDs etc... */
diff --git a/hdata/test/hdata_to_dt.c b/hdata/test/hdata_to_dt.c
index a5f152e86b..cddb1d435f 100644
--- a/hdata/test/hdata_to_dt.c
+++ b/hdata/test/hdata_to_dt.c
@@ -47,7 +47,11 @@ struct spira_ntuple;
static void *ntuple_addr(const struct spira_ntuple *n);
/* Stuff which core expects. */
-#define __this_cpu ((struct cpu_thread *)NULL)
+struct cpu_thread *my_fake_cpu;
+static struct cpu_thread *this_cpu(void)
+{
+ return my_fake_cpu;
+}
unsigned long tb_hz = 512000000;
@@ -84,6 +88,7 @@ unsigned long tb_hz = 512000000;
struct cpu_thread {
uint32_t pir;
uint32_t chip_id;
+ bool is_fused_core;
};
struct cpu_job *__cpu_queue_job(struct cpu_thread *cpu,
const char *name,
@@ -105,6 +110,8 @@ static inline struct cpu_job *cpu_queue_job(struct cpu_thread *cpu,
struct cpu_thread __boot_cpu, *boot_cpu = &__boot_cpu;
static unsigned long fake_pvr = PVR_P7;
+unsigned int cpu_thread_count = 8;
+
static inline unsigned long mfspr(unsigned int spr)
{
assert(spr == SPR_PVR);
diff --git a/hw/xive.c b/hw/xive.c
index b863b634d1..c9f3f07dfb 100644
--- a/hw/xive.c
+++ b/hw/xive.c
@@ -3299,7 +3299,7 @@ static void xive_init_cpu(struct cpu_thread *c)
* of a pair is present we just do the setup for each of them, which
* is harmless.
*/
- if (cpu_is_thread0(c))
+ if (cpu_is_thread0(c) || cpu_is_core_chiplet_primary(c))
xive_configure_ex_special_bar(x, c);
/* Initialize the state structure */
diff --git a/include/chip.h b/include/chip.h
index d6e7e355bf..bcb080b6bf 100644
--- a/include/chip.h
+++ b/include/chip.h
@@ -91,6 +91,26 @@
* thus we have a 6-bit core number.
*
* Note: XIVE Only supports 4-bit chip numbers ...
+ *
+ * Upper PIR Bits
+ * --------------
+ *
+ * Normal-Core Mode:
+ * 57:61 CoreID
+ * 62:62 ThreadID
+ *
+ * Fused-Core Mode:
+ * 57:59 FusedQuadID
+ * 60 FusedCoreID
+ * 61:63 FusedThreadID
+ *
+ * FusedCoreID 0 contains normal-core chiplet 0 and 1
+ * FusedCoreID 1 contains normal-core chiplet 2 and 3
+ *
+ * Fused cores have interleaved threads:
+ * core chiplet 0/2 = t0, t2, t4, t6
+ * core chiplet 1/3 = t1, t3, t5, t7
+ *
*/
#define P9_PIR2GCID(pir) (((pir) >> 8) & 0x7f)
@@ -102,6 +122,17 @@
#define P9_GCID2CHIPID(gcid) ((gcid) & 0x7)
+#define P9_PIR2FUSEDQUADID(pir) (((pir) >> 4) & 0x7)
+
+#define P9_PIR2FUSEDCOREID(pir) (((pir) >> 3) & 0x1)
+
+#define P9_PIR2FUSEDTHREADID(pir) ((pir) & 0x7)
+
+#define P9_PIRFUSED2NORMALCOREID(pir) \
+ (P9_PIR2FUSEDQUADID(pir) << 2) | \
+ (P9_PIR2FUSEDCOREID(pir) << 1) | \
+ (P9_PIR2FUSEDTHREADID(pir) & 1)
+
/* P9 specific ones mostly used by XIVE */
#define P9_PIR2LOCALCPU(pir) ((pir) & 0xff)
#define P9_PIRFROMLOCALCPU(chip, cpu) (((chip) << 8) | (cpu))
diff --git a/include/cpu.h b/include/cpu.h
index 06d5c0d112..009ae52c76 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -54,6 +54,7 @@ struct cpu_thread {
uint32_t server_no;
uint32_t chip_id;
bool is_secondary;
+ bool is_fused_core;
struct cpu_thread *primary;
enum cpu_thread_state state;
struct dt_node *node;
@@ -251,6 +252,11 @@ static inline bool cpu_is_thread0(struct cpu_thread *cpu)
return cpu->primary == cpu;
}
+static inline bool cpu_is_core_chiplet_primary(struct cpu_thread *cpu)
+{
+ return cpu->is_fused_core & (cpu_get_thread_index(cpu) == 1);
+}
+
static inline bool cpu_is_sibling(struct cpu_thread *cpu1,
struct cpu_thread *cpu2)
{
diff --git a/include/xscom.h b/include/xscom.h
index 98532240b1..4e6ce92dd0 100644
--- a/include/xscom.h
+++ b/include/xscom.h
@@ -123,6 +123,9 @@
/*
* Additional useful definitions for P9
+ *
+ * Note: In all of these, the core numbering is the *small* core
+ * number.
*/
/* An EQ is a quad (also named an EP) */
--
2.20.1
More information about the Skiboot
mailing list