[Skiboot] [RFC PATCH] core/cpu: make stack allocation fully dynamic

Nicholas Piggin npiggin at gmail.com
Fri Feb 16 17:52:50 AEDT 2018


Initial stack allocation is a bit clunky. It first allocates a memory
region for stacks that can accommodate maximum PIR of the
architecture, and initialises all of the cpu structures in them.

This is 32768 on POWER9, which is 512MB for stacks! It does then
shrink the size of the stack mem_region after all CPUs are discovered,
but this is a bit of a hack that leaves a hole in memory allocation
regions. And leaves possible allocations above the stacks quite sparse
(which isn't really desirable because relocation-off memory access
still consumes ERATs.

Instead, this parses the device tree to determine the number of CPUs
before setting up the stack memory region.
---
 core/cpu.c           | 66 ++++++++++++++++++++++++++++++----------------------
 core/init.c          |  2 ++
 core/mem_region.c    | 15 ++++--------
 include/cpu.h        |  1 +
 include/mem_region.h |  1 -
 5 files changed, 46 insertions(+), 39 deletions(-)

diff --git a/core/cpu.c b/core/cpu.c
index 3e110c3c..213f80ee 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -824,6 +824,7 @@ static void init_cpu_thread(struct cpu_thread *t,
 			    enum cpu_thread_state state,
 			    unsigned int pir)
 {
+	memset(t, 0, sizeof(struct cpu_thread));
 	init_lock(&t->dctl_lock);
 	init_lock(&t->job_lock);
 	list_head_init(&t->job_queue);
@@ -878,7 +879,7 @@ void __nomcount pre_init_boot_cpu(void)
 
 void init_boot_cpu(void)
 {
-	unsigned int i, pir, pvr;
+	unsigned int pir, pvr;
 
 	pir = mfspr(SPR_PIR);
 	pvr = mfspr(SPR_PVR);
@@ -913,23 +914,20 @@ void init_boot_cpu(void)
 		proc_gen = proc_gen_unknown;
 	}
 
-	/* Get a CPU thread count and an initial max PIR based on family */
+	/* Get a CPU thread count based on family */
 	switch(proc_gen) {
 	case proc_gen_p7:
 		cpu_thread_count = 4;
-		cpu_max_pir = SPR_PIR_P7_MASK;
 		prlog(PR_INFO, "CPU: P7 generation processor"
 		      " (max %d threads/core)\n", cpu_thread_count);
 		break;
 	case proc_gen_p8:
 		cpu_thread_count = 8;
-		cpu_max_pir = SPR_PIR_P8_MASK;
 		prlog(PR_INFO, "CPU: P8 generation processor"
 		      " (max %d threads/core)\n", cpu_thread_count);
 		break;
 	case proc_gen_p9:
 		cpu_thread_count = 4;
-		cpu_max_pir = SPR_PIR_P9_MASK;
 		prlog(PR_INFO, "CPU: P9 generation processor"
 		      " (max %d threads/core)\n", cpu_thread_count);
 		break;
@@ -941,25 +939,16 @@ void init_boot_cpu(void)
 
 	prlog(PR_DEBUG, "CPU: Boot CPU PIR is 0x%04x PVR is 0x%08x\n",
 	      pir, pvr);
-	prlog(PR_DEBUG, "CPU: Initial max PIR set to 0x%x\n", cpu_max_pir);
 
 	/*
-	 * Adjust top of RAM to include CPU stacks. While we *could* have
-	 * less RAM than this... during early boot, it's enough of a check
-	 * until we start parsing device tree / hdat and find out for sure
+	 * Adjust top of RAM to include the boot CPU stack. While we *could*
+	 * have less RAM than this... during early boot, it's enough of a
+	 * check until we start parsing device tree / hdat and find out for
+	 * sure
 	 */
+	cpu_max_pir = pir;
 	top_of_ram += (cpu_max_pir + 1) * STACK_SIZE;
 
-	/* Clear the CPU structs */
-	for (i = 0; i <= cpu_max_pir; i++) {
-		/* boot CPU already cleared and we don't want to clobber
-		 * its stack guard value.
-		 */
-		if (i == pir)
-			continue;
-		memset(&cpu_stacks[i].cpu, 0, sizeof(struct cpu_thread));
-	}
-
 	/* Setup boot CPU state */
 	boot_cpu = &cpu_stacks[pir].cpu;
 	init_cpu_thread(boot_cpu, cpu_state_active, pir);
@@ -1012,10 +1001,39 @@ static int find_dec_bits(void)
 	return bits;
 }
 
+void init_cpu_max_pir(void)
+{
+	struct dt_node *cpus, *cpu;
+
+	cpus = dt_find_by_path(dt_root, "/cpus");
+	assert(cpus);
+
+	/* Iterate all CPUs in the device-tree */
+	dt_for_each_child(cpus, cpu) {
+		unsigned int pir, server_no;
+
+		/* Skip cache nodes */
+		if (strcmp(dt_prop_get(cpu, "device_type"), "cpu"))
+			continue;
+
+		server_no = dt_prop_get_u32(cpu, "reg");
+
+		/* If PIR property is absent, assume it's the same as the
+		 * server number
+		 */
+		pir = dt_prop_get_u32_def(cpu, "ibm,pir", server_no);
+
+		if (cpu_max_pir < pir + cpu_thread_count - 1)
+			cpu_max_pir = pir + cpu_thread_count - 1;
+	}
+
+	prlog(PR_DEBUG, "CPU: New max PIR set to 0x%x\n", cpu_max_pir);
+}
+
 void init_all_cpus(void)
 {
 	struct dt_node *cpus, *cpu;
-	unsigned int thread, new_max_pir = 0;
+	unsigned int thread;
 	int dec_bits = find_dec_bits();
 
 	cpus = dt_find_by_path(dt_root, "/cpus");
@@ -1052,7 +1070,6 @@ void init_all_cpus(void)
 		      " State=%d\n", pir, server_no, state);
 
 		/* Setup thread 0 */
-		assert(pir <= cpu_max_pir);
 		t = pt = &cpu_stacks[pir].cpu;
 		if (t != boot_cpu) {
 			init_cpu_thread(t, state, pir);
@@ -1074,10 +1091,6 @@ void init_all_cpus(void)
 		/* Add the decrementer width property */
 		dt_add_property_cells(cpu, "ibm,dec-bits", dec_bits);
 
-		/* Adjust max PIR */
-		if (new_max_pir < (pir + cpu_thread_count - 1))
-			new_max_pir = pir + cpu_thread_count - 1;
-
 		/* Iterate threads */
 		p = dt_find_property(cpu, "ibm,ppc-interrupt-server#s");
 		if (!p)
@@ -1098,9 +1111,6 @@ void init_all_cpus(void)
 		}
 		prlog(PR_INFO, "CPU:  %d secondary threads\n", thread);
 	}
-	cpu_max_pir = new_max_pir;
-	prlog(PR_DEBUG, "CPU: New max PIR set to 0x%x\n", new_max_pir);
-	adjust_cpu_stacks_alloc();
 }
 
 void cpu_bringup(void)
diff --git a/core/init.c b/core/init.c
index e8c84a95..d167e4b9 100644
--- a/core/init.c
+++ b/core/init.c
@@ -919,6 +919,8 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
 	 */
 	lpc_init();
 
+	init_cpu_max_pir();
+
 	/*
 	 * Now, we init our memory map from the device-tree, and immediately
 	 * reserve areas which we know might contain data coming from
diff --git a/core/mem_region.c b/core/mem_region.c
index 3c93cd47..102a7045 100644
--- a/core/mem_region.c
+++ b/core/mem_region.c
@@ -912,15 +912,6 @@ bool mem_range_is_reserved(uint64_t start, uint64_t size)
 	return false;
 }
 
-void adjust_cpu_stacks_alloc(void)
-{
-	/* CPU stacks start at 0, then when we know max possible PIR,
-	 * we adjust, then when we bring all CPUs online we know the
-	 * runtime max PIR, so we adjust this a few times during boot.
-	 */
-	skiboot_cpu_stacks.len = (cpu_max_pir + 1) * STACK_SIZE;
-}
-
 static void mem_region_parse_reserved_properties(void)
 {
 	const struct dt_property *names, *ranges;
@@ -1054,7 +1045,11 @@ void mem_region_init(void)
 		unlock(&mem_region_lock);
 	}
 
-	adjust_cpu_stacks_alloc();
+	/*
+	 * This is called after we know the maximum PIR of all CPUs,
+	 * so we can dynamically set the stack length.
+	 */
+	skiboot_cpu_stacks.len = (cpu_max_pir + 1) * STACK_SIZE;
 
 	lock(&mem_region_lock);
 
diff --git a/include/cpu.h b/include/cpu.h
index bec4b030..cdb168a7 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -152,6 +152,7 @@ static inline void __nomcount cpu_relax(void)
 /* Initialize CPUs */
 void pre_init_boot_cpu(void);
 void init_boot_cpu(void);
+void init_cpu_max_pir(void);
 void init_all_cpus(void);
 
 /* This brings up our secondaries */
diff --git a/include/mem_region.h b/include/mem_region.h
index 324e98e1..4da3b6ea 100644
--- a/include/mem_region.h
+++ b/include/mem_region.h
@@ -69,7 +69,6 @@ void mem_region_release_unused(void);
 extern struct mem_region skiboot_heap;
 
 void mem_region_init(void);
-void adjust_cpu_stacks_alloc(void);
 void mem_region_add_dt_reserved(void);
 
 /* Mark memory as reserved */
-- 
2.16.1



More information about the Skiboot mailing list