[RFC/PATCH] Updated: ppc64: Add mem=X option

Michael Ellerman michael at ellerman.id.au
Fri Feb 25 19:14:08 EST 2005


Hi All,

Here is an updated patch for adding support for the mem=X boot option.

Nothing major's changed, just a bunch of cleanups as suggested by Olof. I've booted this a couple of times on a P5 LPAR.

Apart from the NUMA code which I'd like to test further, I'm fairly happy with this. I'll test it all a bit more next week and then hopefully it'll be ready for 2.6.12.

cheers


Summary:
 - Fixed comments
 - Rename prom_init.c variables for clarity
 - Removed likely/unlikely abuse
 - Only bolt/map up to memory_limit on iSeries/pSeries
 - Align memory_limit to PAGE_SIZE on iSeries
 - On pSeries memory_limit & tce_alloc_start/end are 16MB aligned
 - Don't offline numa nodes just for mem limit, just set spanned_pages = 0
 - Fix stupid truncation algorithm, thanks to Olof and Stephen

 arch/ppc64/kernel/iSeries_setup.c |   38 +++++++----
 arch/ppc64/kernel/lmb.c           |   33 +++++++++
 arch/ppc64/kernel/prom.c          |   15 ++++
 arch/ppc64/kernel/prom_init.c     |  131 +++++++++++++++++++++++++++++++++++---
 arch/ppc64/kernel/setup.c         |   20 ++++-
 arch/ppc64/mm/hash_utils.c        |   23 ++++++
 arch/ppc64/mm/numa.c              |   29 +++++++-
 include/asm-ppc64/lmb.h           |    1
 8 files changed, 259 insertions(+), 31 deletions(-)


Index: latest-bk-with-mem-option/arch/ppc64/kernel/setup.c
===================================================================
--- latest-bk-with-mem-option.orig/arch/ppc64/kernel/setup.c
+++ latest-bk-with-mem-option/arch/ppc64/kernel/setup.c
@@ -641,12 +641,11 @@ void __init setup_system(void)
 	early_console_initialized = 1;
 	register_console(&udbg_console);
 
-#endif /* !CONFIG_PPC_ISERIES */
-
 	/* Save unparsed command line copy for /proc/cmdline */
 	strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE);
 
 	parse_early_param();
+#endif /* !CONFIG_PPC_ISERIES */
 
 #if defined(CONFIG_SMP) && !defined(CONFIG_PPC_ISERIES)
 	/*
@@ -805,20 +804,31 @@ struct seq_operations cpuinfo_op = {
 	.show =	show_cpuinfo,
 };
 
-#if 0 /* XXX not currently used */
+/*
+ * These three variables are used to save values passed to us by prom_init()
+ * via the device tree. The TCE variables are needed because with a memory_limit
+ * in force we may need to explicitly map the TCE are at the top of RAM.
+ */
 unsigned long memory_limit;
+unsigned long tce_alloc_start;
+unsigned long tce_alloc_end;
 
+#ifdef CONFIG_PPC_ISERIES
+/* 
+ * On iSeries we just parse the mem=X option from the command line.
+ * On pSeries it's a bit more complicated, see prom_init_mem()
+ */
 static int __init early_parsemem(char *p)
 {
 	if (!p)
 		return 0;
 
-	memory_limit = memparse(p, &p);
+	memory_limit = ALIGN(memparse(p, &p), PAGE_SIZE);
 
 	return 0;
 }
 early_param("mem", early_parsemem);
-#endif
+#endif /* CONFIG_PPC_ISERIES */
 
 #ifdef CONFIG_PPC_MULTIPLATFORM
 static int __init set_preferred_console(void)
Index: latest-bk-with-mem-option/arch/ppc64/kernel/lmb.c
===================================================================
--- latest-bk-with-mem-option.orig/arch/ppc64/kernel/lmb.c
+++ latest-bk-with-mem-option/arch/ppc64/kernel/lmb.c
@@ -344,3 +344,36 @@ lmb_abs_to_phys(unsigned long aa)
 
 	return pa;
 }
+
+/*
+ * Truncate the lmb list to memory_limit if it's set
+ * You must call lmb_analyze() after this.
+ */
+void __init lmb_apply_memory_limit(void)
+{
+	extern unsigned long memory_limit;
+	unsigned long i, limit;
+	struct lmb_region *mem = &(lmb.memory);
+
+	if (! memory_limit)
+		return;
+
+	limit = memory_limit;
+	for (i = 0; i < mem->cnt; i++) {
+		if (limit > mem->region[i].size) {
+			limit -= mem->region[i].size;
+			continue;
+		}
+		
+#ifdef DEBUG
+		udbg_printf("lmb_truncate(): truncating at region %x\n", i);
+		udbg_printf("lmb_truncate(): total = %x\n", total);
+		udbg_printf("lmb_truncate(): size  = %x\n", mem->region[i].size);
+		udbg_printf("lmb_truncate(): crop = %x\n", crop);
+#endif
+
+		mem->region[i].size = limit;
+		mem->cnt = i + 1;
+		break;
+	}
+}
Index: latest-bk-with-mem-option/include/asm-ppc64/lmb.h
===================================================================
--- latest-bk-with-mem-option.orig/include/asm-ppc64/lmb.h
+++ latest-bk-with-mem-option/include/asm-ppc64/lmb.h
@@ -53,6 +53,7 @@ extern unsigned long __init lmb_alloc_ba
 extern unsigned long __init lmb_phys_mem_size(void);
 extern unsigned long __init lmb_end_of_DRAM(void);
 extern unsigned long __init lmb_abs_to_phys(unsigned long);
+extern void __init lmb_apply_memory_limit(void);
 
 extern void lmb_dump_all(void);
 
Index: latest-bk-with-mem-option/arch/ppc64/kernel/iSeries_setup.c
===================================================================
--- latest-bk-with-mem-option.orig/arch/ppc64/kernel/iSeries_setup.c
+++ latest-bk-with-mem-option/arch/ppc64/kernel/iSeries_setup.c
@@ -284,7 +284,7 @@ unsigned long iSeries_process_mainstore_
 	return mem_blocks;
 }
 
-static void __init iSeries_parse_cmdline(void)
+static void __init iSeries_get_cmdline(void)
 {
 	char *p, *q;
 
@@ -304,6 +304,8 @@ static void __init iSeries_parse_cmdline
 
 /*static*/ void __init iSeries_init_early(void)
 {
+	extern unsigned long memory_limit;
+
 	DBG(" -> iSeries_init_early()\n");
 
 	ppcdbg_initialize();
@@ -351,6 +353,29 @@ static void __init iSeries_parse_cmdline
 	 */
 	build_iSeries_Memory_Map();
 
+	iSeries_get_cmdline();
+	
+	/* Save unparsed command line copy for /proc/cmdline */
+	strlcpy(saved_command_line, cmd_line, COMMAND_LINE_SIZE);
+	
+	/* Parse early parameters, in particular mem=x */
+	parse_early_param();
+	
+	if (memory_limit) {
+		if (memory_limit > systemcfg->physicalMemorySize)
+			printk("Ignoring 'mem' option, value %lu is too large.\n", memory_limit);
+		else
+			systemcfg->physicalMemorySize = memory_limit;
+	}
+
+	/* Bolt kernel mappings for all of memory (or just a bit if we've got a limit) */
+	iSeries_bolt_kernel(0, systemcfg->physicalMemorySize);
+	
+	lmb_init();
+	lmb_add(0, systemcfg->physicalMemorySize);
+	lmb_analyze();	/* ?? */
+	lmb_reserve(0, __pa(klimit));
+	
 	/* Initialize machine-dependency vectors */
 #ifdef CONFIG_SMP
 	smp_init_iSeries();
@@ -376,9 +401,6 @@ static void __init iSeries_parse_cmdline
 		initrd_start = initrd_end = 0;
 #endif /* CONFIG_BLK_DEV_INITRD */
 
-
-	iSeries_parse_cmdline();
-
 	DBG(" <- iSeries_init_early()\n");
 }
 
@@ -539,14 +561,6 @@ static void __init build_iSeries_Memory_
 	 *   nextPhysChunk
 	 */
 	systemcfg->physicalMemorySize = chunk_to_addr(nextPhysChunk);
-
-	/* Bolt kernel mappings for all of memory */
-	iSeries_bolt_kernel(0, systemcfg->physicalMemorySize);
-
-	lmb_init();
-	lmb_add(0, systemcfg->physicalMemorySize);
-	lmb_analyze();	/* ?? */
-	lmb_reserve(0, __pa(klimit));
 }
 
 /*
Index: latest-bk-with-mem-option/arch/ppc64/kernel/prom.c
===================================================================
--- latest-bk-with-mem-option.orig/arch/ppc64/kernel/prom.c
+++ latest-bk-with-mem-option/arch/ppc64/kernel/prom.c
@@ -875,6 +875,8 @@ static int __init early_init_dt_scan_cho
 					    const char *full_path, void *data)
 {
 	u32 *prop;
+	u64 *prop64;
+	extern unsigned long memory_limit, tce_alloc_start, tce_alloc_end;
 
 	if (strcmp(full_path, "/chosen") != 0)
 		return 0;
@@ -891,6 +893,18 @@ static int __init early_init_dt_scan_cho
 	if (get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL)
 		iommu_force_on = 1;
 
+	prop64 = (u64*)get_flat_dt_prop(node, "linux,memory-limit", NULL);
+	if (prop64)
+		memory_limit = *prop64;
+		
+	prop64 = (u64*)get_flat_dt_prop(node, "linux,tce-alloc-start", NULL);
+	if (prop64)
+		tce_alloc_start = *prop64;
+		
+	prop64 = (u64*)get_flat_dt_prop(node, "linux,tce-alloc-end", NULL);
+	if (prop64)
+		tce_alloc_end = *prop64;
+
 #ifdef CONFIG_PPC_PSERIES
 	/* To help early debugging via the front panel, we retreive a minimal
 	 * set of RTAS infos now if available
@@ -1030,6 +1044,7 @@ void __init early_init_devtree(void *par
 	lmb_init();
 	scan_flat_dt(early_init_dt_scan_root, NULL);
 	scan_flat_dt(early_init_dt_scan_memory, NULL);
+	lmb_apply_memory_limit();
 	lmb_analyze();
 	systemcfg->physicalMemorySize = lmb_phys_mem_size();
 	lmb_reserve(0, __pa(klimit));
Index: latest-bk-with-mem-option/arch/ppc64/mm/hash_utils.c
===================================================================
--- latest-bk-with-mem-option.orig/arch/ppc64/mm/hash_utils.c
+++ latest-bk-with-mem-option/arch/ppc64/mm/hash_utils.c
@@ -140,6 +140,8 @@ void __init htab_initialize(void)
 	unsigned long pteg_count;
 	unsigned long mode_rw;
 	int i, use_largepages = 0;
+	unsigned long base = 0, size = 0;
+	extern unsigned long memory_limit, tce_alloc_start, tce_alloc_end;
 
 	DBG(" -> htab_initialize()\n");
 
@@ -195,8 +197,6 @@ void __init htab_initialize(void)
 
 	/* create bolted the linear mapping in the hash table */
 	for (i=0; i < lmb.memory.cnt; i++) {
-		unsigned long base, size;
-
 		base = lmb.memory.region[i].physbase + KERNELBASE;
 		size = lmb.memory.region[i].size;
 
@@ -225,6 +225,25 @@ void __init htab_initialize(void)
 #endif /* CONFIG_U3_DART */
 		create_pte_mapping(base, base + size, mode_rw, use_largepages);
 	}
+
+	/*
+	 * If we have a memory_limit and we've allocated TCEs then we need to
+	 * explicitly map the TCE area at the top of RAM. We also cope with the
+	 * case that the TCEs start below memory_limit.
+	 * tce_alloc_start/end are 16MB aligned so the mapping should work
+	 * for either 4K or 16MB pages.
+	 */
+	if (tce_alloc_start) {
+		tce_alloc_start += KERNELBASE;
+		tce_alloc_end += KERNELBASE;
+		
+		if (base + size >= tce_alloc_start)
+			tce_alloc_start = base + size + 1;
+		
+		create_pte_mapping(tce_alloc_start, tce_alloc_end,
+			mode_rw, use_largepages);
+	}
+	
 	DBG(" <- htab_initialize()\n");
 }
 #undef KB
Index: latest-bk-with-mem-option/arch/ppc64/mm/numa.c
===================================================================
--- latest-bk-with-mem-option.orig/arch/ppc64/mm/numa.c
+++ latest-bk-with-mem-option/arch/ppc64/mm/numa.c
@@ -270,6 +270,7 @@ static int __init parse_numa_properties(
 	int max_domain = 0;
 	long entries = lmb_end_of_DRAM() >> MEMORY_INCREMENT_SHIFT;
 	unsigned long i;
+	extern unsigned long memory_limit;
 
 	if (numa_enabled == 0) {
 		printk(KERN_WARNING "NUMA disabled by user\n");
@@ -378,15 +379,37 @@ new_range:
 				size / PAGE_SIZE;
 		}
 
-		for (i = start ; i < (start+size); i += MEMORY_INCREMENT)
-			numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] =
-				numa_domain;
+		for (i = start; i < (start+size) && i < lmb_end_of_DRAM(); i += MEMORY_INCREMENT)
+			numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = numa_domain;
 
 		ranges--;
 		if (ranges)
 			goto new_range;
 	}
 
+	if (memory_limit) {
+		unsigned long size, limit = memory_limit;
+
+		for (i = 0; i <= max_domain; i++) {
+			size = init_node_data[i].node_spanned_pages * PAGE_SIZE;
+			if (limit > size) {
+				limit -= size;
+				continue;
+			}
+
+			init_node_data[i].node_spanned_pages = limit / PAGE_SIZE;
+
+			dbg("NUMA: truncating node %ld to 0x%lx bytes\n", i, limit);
+			break;
+		}
+
+		while (++i <= max_domain) {
+			dbg("NUMA: truncating node %ld to 0x0 bytes\n", i);
+			init_node_data[i].node_start_pfn = 0;
+			init_node_data[i].node_spanned_pages = 0;
+		}
+	}
+
 	for (i = 0; i <= max_domain; i++)
 		node_set_online(i);
 
Index: latest-bk-with-mem-option/arch/ppc64/kernel/prom_init.c
===================================================================
--- latest-bk-with-mem-option.orig/arch/ppc64/kernel/prom_init.c
+++ latest-bk-with-mem-option/arch/ppc64/kernel/prom_init.c
@@ -178,6 +178,10 @@ static int __initdata of_platform;
 
 static char __initdata prom_cmd_line[COMMAND_LINE_SIZE];
 
+static unsigned long __initdata prom_memory_limit;
+static unsigned long __initdata prom_tce_alloc_start;
+static unsigned long __initdata prom_tce_alloc_end;
+
 static unsigned long __initdata alloc_top;
 static unsigned long __initdata alloc_top_high;
 static unsigned long __initdata alloc_bottom;
@@ -385,10 +389,64 @@ static int __init prom_setprop(phandle n
 			 (u32)(unsigned long) value, (u32) valuelen);
 }
 
+/* We can't use the standard versions because of RELOC headaches. */
+#define isxdigit(c)	(('0' <= (c) && (c) <= '9') \
+			 || ('a' <= (c) && (c) <= 'f') \
+			 || ('A' <= (c) && (c) <= 'F'))
+			 
+#define isdigit(c)	('0' <= (c) && (c) <= '9')
+#define islower(c)	('a' <= (c) && (c) <= 'z')
+#define toupper(c)	(islower(c) ? ((c) - 'a' + 'A') : (c))
+
+unsigned long prom_strtoul(const char *cp, const char **endp)
+{
+	unsigned long result = 0, base = 10, value;
+	
+	if (*cp == '0') {
+		base = 8;
+		cp++;
+		if (toupper(*cp) == 'X') {
+			cp++;
+			base = 16;
+		}
+	}
+
+	while (isxdigit(*cp) &&
+	       (value = isdigit(*cp) ? *cp - '0' : toupper(*cp) - 'A' + 10) < base) {
+		result = result * base + value;
+		cp++;
+	}
+	
+	if (endp)
+		*endp = cp;
+	
+	return result;
+}
+
+unsigned long prom_memparse(const char *ptr, const char **retptr)
+{
+	unsigned long ret = prom_strtoul(ptr, retptr);
+
+	switch (**retptr) {
+	case 'G':
+	case 'g':
+		ret <<= 10;
+	case 'M':
+	case 'm':
+		ret <<= 10;
+	case 'K':
+	case 'k':
+		ret <<= 10;
+		(*retptr)++;
+	default:
+		break;
+	}
+	return ret;
+}
 
 /*
  * Early parsing of the command line passed to the kernel, used for
- * the options that affect the iommu
+ * "mem=x" and the options that affect the iommu
  */
 static void __init early_cmdline_parse(void)
 {
@@ -419,6 +477,14 @@ static void __init early_cmdline_parse(v
 		else if (!strncmp(opt, RELOC("force"), 5))
 			RELOC(iommu_force_on) = 1;
 	}
+	
+	opt = strstr(RELOC(prom_cmd_line), RELOC("mem="));
+	if (opt) {
+		opt += 4;
+		RELOC(prom_memory_limit) = prom_memparse(opt, (const char **)&opt);
+		/* Align to 16 MB == size of large page */
+		RELOC(prom_memory_limit) = ALIGN(RELOC(prom_memory_limit), 0x1000000);
+	}
 }
 
 /*
@@ -665,15 +731,7 @@ static void __init prom_init_mem(void)
 		}
 	}
 
-	/* Setup our top/bottom alloc points, that is top of RMO or top of
-	 * segment 0 when running non-LPAR
-	 */
-	if ( RELOC(of_platform) == PLATFORM_PSERIES_LPAR )
-		RELOC(alloc_top) = RELOC(rmo_top);
-	else
-		RELOC(alloc_top) = RELOC(rmo_top) = min(0x40000000ul, RELOC(ram_top));
 	RELOC(alloc_bottom) = PAGE_ALIGN(RELOC(klimit) - offset + 0x4000);
-	RELOC(alloc_top_high) = RELOC(ram_top);
 
 	/* Check if we have an initrd after the kernel, if we do move our bottom
 	 * point to after it
@@ -683,8 +741,41 @@ static void __init prom_init_mem(void)
 		    > RELOC(alloc_bottom))
 			RELOC(alloc_bottom) = PAGE_ALIGN(RELOC(prom_initrd_end));
 	}
+	
+	/*
+	 * If prom_memory_limit is set we reduce the upper limits *except* for
+	 * alloc_top_high. This must be the real top of RAM so we can put
+	 * TCE's up there.
+	 */
+	
+	RELOC(alloc_top_high) = RELOC(ram_top);
+	
+	if (RELOC(prom_memory_limit)) {
+		if (RELOC(prom_memory_limit) <= RELOC(alloc_bottom)) {
+			prom_printf("Ignoring mem=%x <= alloc_bottom.\n",
+				RELOC(prom_memory_limit));
+			RELOC(prom_memory_limit) = 0;
+		} else if (RELOC(prom_memory_limit) >= RELOC(ram_top)) {
+			prom_printf("Ignoring mem=%x >= ram_top.\n",
+				RELOC(prom_memory_limit));
+			RELOC(prom_memory_limit) = 0;
+		} else {
+			RELOC(ram_top) = RELOC(prom_memory_limit);
+			RELOC(rmo_top) = min(RELOC(rmo_top), RELOC(prom_memory_limit));
+		}
+	}
+
+	/*
+	 * Setup our top alloc point, that is top of RMO or top of
+	 * segment 0 when running non-LPAR.
+	 */
+	if ( RELOC(of_platform) == PLATFORM_PSERIES_LPAR )
+		RELOC(alloc_top) = RELOC(rmo_top);
+	else
+		RELOC(alloc_top) = RELOC(rmo_top) = min(0x40000000ul, RELOC(ram_top));
 
 	prom_printf("memory layout at init:\n");
+	prom_printf("  memory_limit : %x (16 MB aligned)\n", RELOC(prom_memory_limit));
 	prom_printf("  alloc_bottom : %x\n", RELOC(alloc_bottom));
 	prom_printf("  alloc_top    : %x\n", RELOC(alloc_top));
 	prom_printf("  alloc_top_hi : %x\n", RELOC(alloc_top_high));
@@ -873,6 +964,16 @@ static void __init prom_initialize_tce_t
 
 	reserve_mem(local_alloc_bottom, local_alloc_top - local_alloc_bottom);
 
+	if (RELOC(prom_memory_limit)) {
+		/*
+		 * We align the start to a 16MB boundary so we can map the TCE area
+		 * using large pages if possible. The end should be the top of RAM
+		 * so no need to align it.
+		 */
+		RELOC(prom_tce_alloc_start) = _ALIGN_DOWN(local_alloc_bottom, 0x1000000);
+		RELOC(prom_tce_alloc_end) = local_alloc_top;
+	}
+	
 	/* Flag the first invalid entry */
 	prom_debug("ending prom_initialize_tce_table\n");
 }
@@ -1686,9 +1787,21 @@ unsigned long __init prom_init(unsigned 
 	 */
 	if (RELOC(ppc64_iommu_off))
 		prom_setprop(_prom->chosen, "linux,iommu-off", NULL, 0);
+
 	if (RELOC(iommu_force_on))
 		prom_setprop(_prom->chosen, "linux,iommu-force-on", NULL, 0);
 
+	if (RELOC(prom_memory_limit))
+		prom_setprop(_prom->chosen, "linux,memory-limit",
+			PTRRELOC(&prom_memory_limit), sizeof(RELOC(prom_memory_limit)));
+
+	if (RELOC(prom_tce_alloc_start)) {
+		prom_setprop(_prom->chosen, "linux,tce-alloc-start",
+			PTRRELOC(&prom_tce_alloc_start), sizeof(RELOC(prom_tce_alloc_start)));
+		prom_setprop(_prom->chosen, "linux,tce-alloc-end",
+			PTRRELOC(&prom_tce_alloc_end), sizeof(RELOC(prom_tce_alloc_end)));
+	}
+
 	/*
 	 * Now finally create the flattened device-tree
 	 */



More information about the Linuxppc64-dev mailing list