[Skiboot] [PATCH] core/cpu: discover stack region size before initialising memory regions

Nicholas Piggin npiggin at gmail.com
Sun Mar 25 17:54:08 AEDT 2018


Stack allocation first allocates a memory region sized to hold stacks
for all possible CPUs up to the maximum PIR of the architecture, zeros
the region, then initialises all stacks. Max PIR is 32768 on POWER9,
which is 512MB for stacks.

The stack region is then shrunk after CPUs are discovered, but this is
a bit of a hack, and it leaves a hole in the memory allocation regions
as it's done after mem regions are initialised.

  0x000000000000..00002fffffff : ibm,os-reserve - OS
  0x000030000000..0000303fffff : ibm,firmware-code - OPAL
  0x000030400000..000030ffffff : ibm,firmware-heap - OPAL
  0x000031000000..000031bfffff : ibm,firmware-data - OPAL
  0x000031c00000..000031c0ffff : ibm,firmware-stacks - OPAL
  *** gap ***
  0x000051c00000..000051d01fff : ibm,firmware-allocs-memory at 0 - OPAL
  0x000051d02000..00007fffffff : ibm,firmware-allocs-memory at 0 - OS
  0x000080000000..000080b3cdff : initramfs - OPAL
  0x000080b3ce00..000080b7cdff : ibm,fake-nvram - OPAL
  0x000080b7ce00..0000ffffffff : ibm,firmware-allocs-memory at 0 - OS

This change moves zeroing into the per-cpu stack setup. The boot CPU
stack is set up based on the current PIR. Then the size of the stack
region is set, by discovering the maximum PIR of the system from the
device tree, before mem regions are intialised.

This results in all memory being accounted within memory regions,
and less memory fragmentation of OPAL allocations.

Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
---
 core/cpu.c           | 66 +++++++++++++++++++++++++++++-----------------------
 core/init.c          |  6 +++++
 core/mem_region.c    | 15 ++++--------
 include/cpu.h        |  1 +
 include/mem_region.h |  1 -
 5 files changed, 49 insertions(+), 40 deletions(-)

diff --git a/core/cpu.c b/core/cpu.c
index e243344a..1a7ccb0b 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -43,7 +43,7 @@ struct cpu_stack {
 	};
 } __align(STACK_SIZE);
 
-static struct cpu_stack *cpu_stacks = (struct cpu_stack *)CPU_STACKS_BASE;
+static struct cpu_stack * const cpu_stacks = (struct cpu_stack *)CPU_STACKS_BASE;
 unsigned int cpu_thread_count;
 unsigned int cpu_max_pir;
 struct cpu_thread *boot_cpu;
@@ -826,6 +826,7 @@ static void init_cpu_thread(struct cpu_thread *t,
 			    enum cpu_thread_state state,
 			    unsigned int pir)
 {
+	memset(t, 0, sizeof(struct cpu_thread));
 	init_lock(&t->dctl_lock);
 	init_lock(&t->job_lock);
 	list_head_init(&t->job_queue);
@@ -880,7 +881,7 @@ void __nomcount pre_init_boot_cpu(void)
 
 void init_boot_cpu(void)
 {
-	unsigned int i, pir, pvr;
+	unsigned int pir, pvr;
 
 	pir = mfspr(SPR_PIR);
 	pvr = mfspr(SPR_PVR);
@@ -915,23 +916,20 @@ void init_boot_cpu(void)
 		proc_gen = proc_gen_unknown;
 	}
 
-	/* Get a CPU thread count and an initial max PIR based on family */
+	/* Get a CPU thread count based on family */
 	switch(proc_gen) {
 	case proc_gen_p7:
 		cpu_thread_count = 4;
-		cpu_max_pir = SPR_PIR_P7_MASK;
 		prlog(PR_INFO, "CPU: P7 generation processor"
 		      " (max %d threads/core)\n", cpu_thread_count);
 		break;
 	case proc_gen_p8:
 		cpu_thread_count = 8;
-		cpu_max_pir = SPR_PIR_P8_MASK;
 		prlog(PR_INFO, "CPU: P8 generation processor"
 		      " (max %d threads/core)\n", cpu_thread_count);
 		break;
 	case proc_gen_p9:
 		cpu_thread_count = 4;
-		cpu_max_pir = SPR_PIR_P9_MASK;
 		prlog(PR_INFO, "CPU: P9 generation processor"
 		      " (max %d threads/core)\n", cpu_thread_count);
 		break;
@@ -943,25 +941,14 @@ void init_boot_cpu(void)
 
 	prlog(PR_DEBUG, "CPU: Boot CPU PIR is 0x%04x PVR is 0x%08x\n",
 	      pir, pvr);
-	prlog(PR_DEBUG, "CPU: Initial max PIR set to 0x%x\n", cpu_max_pir);
 
 	/*
-	 * Adjust top of RAM to include CPU stacks. While we *could* have
-	 * less RAM than this... during early boot, it's enough of a check
-	 * until we start parsing device tree / hdat and find out for sure
+	 * Adjust top of RAM to include the boot CPU stack. If we have less
+	 * RAM than this, it's not possible to boot.
 	 */
+	cpu_max_pir = pir;
 	top_of_ram += (cpu_max_pir + 1) * STACK_SIZE;
 
-	/* Clear the CPU structs */
-	for (i = 0; i <= cpu_max_pir; i++) {
-		/* boot CPU already cleared and we don't want to clobber
-		 * its stack guard value.
-		 */
-		if (i == pir)
-			continue;
-		memset(&cpu_stacks[i].cpu, 0, sizeof(struct cpu_thread));
-	}
-
 	/* Setup boot CPU state */
 	boot_cpu = &cpu_stacks[pir].cpu;
 	init_cpu_thread(boot_cpu, cpu_state_active, pir);
@@ -1029,10 +1016,39 @@ static void init_tm_suspend_mode_property(void)
 		tm_suspend_enabled = false;
 }
 
+void init_cpu_max_pir(void)
+{
+	struct dt_node *cpus, *cpu;
+
+	cpus = dt_find_by_path(dt_root, "/cpus");
+	assert(cpus);
+
+	/* Iterate all CPUs in the device-tree */
+	dt_for_each_child(cpus, cpu) {
+		unsigned int pir, server_no;
+
+		/* Skip cache nodes */
+		if (strcmp(dt_prop_get(cpu, "device_type"), "cpu"))
+			continue;
+
+		server_no = dt_prop_get_u32(cpu, "reg");
+
+		/* If PIR property is absent, assume it's the same as the
+		 * server number
+		 */
+		pir = dt_prop_get_u32_def(cpu, "ibm,pir", server_no);
+
+		if (cpu_max_pir < pir + cpu_thread_count - 1)
+			cpu_max_pir = pir + cpu_thread_count - 1;
+	}
+
+	prlog(PR_DEBUG, "CPU: New max PIR set to 0x%x\n", cpu_max_pir);
+}
+
 void init_all_cpus(void)
 {
 	struct dt_node *cpus, *cpu;
-	unsigned int thread, new_max_pir = 0;
+	unsigned int thread;
 	int dec_bits = find_dec_bits();
 
 	cpus = dt_find_by_path(dt_root, "/cpus");
@@ -1071,7 +1087,6 @@ void init_all_cpus(void)
 		      " State=%d\n", pir, server_no, state);
 
 		/* Setup thread 0 */
-		assert(pir <= cpu_max_pir);
 		t = pt = &cpu_stacks[pir].cpu;
 		if (t != boot_cpu) {
 			init_cpu_thread(t, state, pir);
@@ -1096,10 +1111,6 @@ void init_all_cpus(void)
 		/* Add the decrementer width property */
 		dt_add_property_cells(cpu, "ibm,dec-bits", dec_bits);
 
-		/* Adjust max PIR */
-		if (new_max_pir < (pir + cpu_thread_count - 1))
-			new_max_pir = pir + cpu_thread_count - 1;
-
 		/* Iterate threads */
 		p = dt_find_property(cpu, "ibm,ppc-interrupt-server#s");
 		if (!p)
@@ -1120,9 +1131,6 @@ void init_all_cpus(void)
 		}
 		prlog(PR_INFO, "CPU:  %d secondary threads\n", thread);
 	}
-	cpu_max_pir = new_max_pir;
-	prlog(PR_DEBUG, "CPU: New max PIR set to 0x%x\n", new_max_pir);
-	adjust_cpu_stacks_alloc();
 }
 
 void cpu_bringup(void)
diff --git a/core/init.c b/core/init.c
index d2e0e869..3e3c7ab6 100644
--- a/core/init.c
+++ b/core/init.c
@@ -933,6 +933,12 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
 	 */
 	lpc_init();
 
+	/*
+	 * This should be done before mem_region_init, so the stack
+	 * region length can be set according to the maximum PIR.
+	 */
+	init_cpu_max_pir();
+
 	/*
 	 * Now, we init our memory map from the device-tree, and immediately
 	 * reserve areas which we know might contain data coming from
diff --git a/core/mem_region.c b/core/mem_region.c
index 95441a63..bfb2bef8 100644
--- a/core/mem_region.c
+++ b/core/mem_region.c
@@ -937,15 +937,6 @@ bool mem_range_is_reserved(uint64_t start, uint64_t size)
 	return false;
 }
 
-void adjust_cpu_stacks_alloc(void)
-{
-	/* CPU stacks start at 0, then when we know max possible PIR,
-	 * we adjust, then when we bring all CPUs online we know the
-	 * runtime max PIR, so we adjust this a few times during boot.
-	 */
-	skiboot_cpu_stacks.len = (cpu_max_pir + 1) * STACK_SIZE;
-}
-
 static void mem_region_parse_reserved_properties(void)
 {
 	const struct dt_property *names, *ranges;
@@ -1079,7 +1070,11 @@ void mem_region_init(void)
 		unlock(&mem_region_lock);
 	}
 
-	adjust_cpu_stacks_alloc();
+	/*
+	 * This is called after we know the maximum PIR of all CPUs,
+	 * so we can dynamically set the stack length.
+	 */
+	skiboot_cpu_stacks.len = (cpu_max_pir + 1) * STACK_SIZE;
 
 	lock(&mem_region_lock);
 
diff --git a/include/cpu.h b/include/cpu.h
index 220fa1df..b7cd588d 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -157,6 +157,7 @@ static inline void __nomcount cpu_relax(void)
 /* Initialize CPUs */
 void pre_init_boot_cpu(void);
 void init_boot_cpu(void);
+void init_cpu_max_pir(void);
 void init_all_cpus(void);
 
 /* This brings up our secondaries */
diff --git a/include/mem_region.h b/include/mem_region.h
index 2a9453f5..9e288dac 100644
--- a/include/mem_region.h
+++ b/include/mem_region.h
@@ -73,7 +73,6 @@ void mem_dump_allocs(void);
 extern struct mem_region skiboot_heap;
 
 void mem_region_init(void);
-void adjust_cpu_stacks_alloc(void);
 void mem_region_add_dt_reserved(void);
 
 /* Mark memory as reserved */
-- 
2.16.1



More information about the Skiboot mailing list