[Cbe-oss-dev] Subject: [RFC] spufs: memory-add fix for CONFIG_NUMA

Arnd Bergmann arnd at arndb.de
Wed Apr 12 02:26:19 EST 2006


From: Joel H Schopp <jschopp at us.ibm.com>
Based on an older patch from  Mike Kravetz <kravetz at us.ibm.com>

We need to have a mem_map for high addresses in order to make
fops->no_page work on spufs mem and register files. So far, we
have used the memory_present() function during early bootup,
but that did not work when CONFIG_NUMA was enabled.

We now use the __add_pages() function to add the mem_map
when loading the spufs module, which is a lot nicer.

Unfortunately, the memory hot-add code is currently a little
broken (tries to do bootmem_alloc and kmalloc from the
same function), so I had to add an ugly hack to the
common code for that, which I don't want to submit
for upstream inclusion in its current form.

Signed-off-by: Arnd Bergmann <arnd.bergmann at de.ibm.com>

---
Index: linus-2.6/arch/powerpc/kernel/setup_64.c
===================================================================
--- linus-2.6.orig/arch/powerpc/kernel/setup_64.c
+++ linus-2.6/arch/powerpc/kernel/setup_64.c
@@ -551,7 +551,6 @@ void __init setup_arch(char **cmdline_p)
 
 	/* set up the bootmem stuff with available memory */
 	do_init_bootmem();
-	cell_spumem_init(1);
 	sparse_init();
 
 #ifdef CONFIG_DUMMY_CONSOLE
Index: linus-2.6/arch/powerpc/platforms/cell/setup.c
===================================================================
--- linus-2.6.orig/arch/powerpc/platforms/cell/setup.c
+++ linus-2.6/arch/powerpc/platforms/cell/setup.c
@@ -29,6 +29,8 @@
 #include <linux/seq_file.h>
 #include <linux/root_dev.h>
 #include <linux/console.h>
+#include <linux/mutex.h>
+#include <linux/memory_hotplug.h>
 
 #include <asm/mmu.h>
 #include <asm/processor.h>
@@ -71,73 +73,6 @@ static void cell_show_cpuinfo(struct seq
 	of_node_put(root);
 }
 
-#ifdef CONFIG_SPARSEMEM
-static int __init find_spu_node_id(struct device_node *spe)
-{
-	unsigned int *id;
-#ifdef CONFIG_NUMA
-	struct device_node *cpu;
-	cpu = spe->parent->parent;
-	id = (unsigned int *)get_property(cpu, "node-id", NULL);
-#else
-	id = NULL;
-#endif
-	return id ? *id : 0;
-}
-
-static void __init cell_spuprop_present(struct device_node *spe,
-				       const char *prop, int early)
-{
-	struct address_prop {
-		unsigned long address;
-		unsigned int len;
-	} __attribute__((packed)) *p;
-	int proplen;
-
-	unsigned long start_pfn, end_pfn, pfn;
-	int node_id;
-
-	p = (void*)get_property(spe, prop, &proplen);
-	WARN_ON(proplen != sizeof (*p));
-
-	node_id = find_spu_node_id(spe);
-
-	start_pfn = p->address >> PAGE_SHIFT;
-	end_pfn = (p->address + p->len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-
-	/* We need to call memory_present *before* the call to sparse_init,
-	   but we can initialize the page structs only *after* that call.
-	   Thus, we're being called twice. */
-	if (early)
-		memory_present(node_id, start_pfn, end_pfn);
-	else {
-		/* As the pages backing SPU LS and I/O are outside the range
-		   of regular memory, their page structs were not initialized
-		   by free_area_init. Do it here instead. */
-		for (pfn = start_pfn; pfn < end_pfn; pfn++) {
-			struct page *page = pfn_to_page(pfn);
-			set_page_links(page, ZONE_DMA, node_id, pfn);
-			init_page_count(page);
-			reset_page_mapcount(page);
-			SetPageReserved(page);
-			INIT_LIST_HEAD(&page->lru);
-		}
-	}
-}
-
-void __init cell_spumem_init(int early)
-{
-	struct device_node *node;
-	for (node = of_find_node_by_type(NULL, "spe");
-			node; node = of_find_node_by_type(node, "spe")) {
-		cell_spuprop_present(node, "local-store", early);
-		cell_spuprop_present(node, "problem", early);
-		cell_spuprop_present(node, "priv1", early);
-		cell_spuprop_present(node, "priv2", early);
-	}
-}
-#endif
-
 static void cell_progress(char *s, unsigned short hex)
 {
 	printk("*** %04x : %s\n", hex, s ? s : "");
@@ -201,8 +136,6 @@ static void __init cell_setup_arch(void)
 #endif
 
 	mmio_nvram_init();
-
-	cell_spumem_init(0);
 }
 
 /*
Index: linus-2.6/arch/powerpc/platforms/cell/spu_base.c
===================================================================
--- linus-2.6.orig/arch/powerpc/platforms/cell/spu_base.c
+++ linus-2.6/arch/powerpc/platforms/cell/spu_base.c
@@ -525,6 +525,57 @@ void spu_irq_setaffinity(struct spu *spu
 }
 EXPORT_SYMBOL_GPL(spu_irq_setaffinity);
 
+/* XXX better look for ibm,associativity properties as well */
+static int __init find_spu_node_id(struct device_node *spe)
+{
+	unsigned int *id;
+	struct device_node *cpu;
+	cpu = spe->parent->parent;
+	id = (unsigned int *)get_property(cpu, "node-id", NULL);
+	return id ? *id : 0;
+}
+
+static int __init cell_spuprop_present(struct device_node *spe,
+					const char *prop)
+{
+	static DEFINE_MUTEX(add_spumem_mutex);
+
+	struct address_prop {
+		unsigned long address;
+		unsigned int len;
+	} __attribute__((packed)) *p;
+	int proplen;
+
+	unsigned long start_pfn, nr_pages;
+	int node_id;
+	struct pglist_data *pgdata;
+	struct zone *zone;
+	int ret;
+
+	p = (void*)get_property(spe, prop, &proplen);
+	WARN_ON(proplen != sizeof (*p));
+
+	start_pfn = p->address >> PAGE_SHIFT;
+	nr_pages = ((unsigned long)p->len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+	/*
+	 * XXX need to get the correct NUMA node in here. This may
+	 * be different from the spe::node_id property, e.g. when
+	 * the host firmware is not NUMA aware.
+	 */
+	node_id = 0;
+
+	pgdata = NODE_DATA(node_id);
+	zone = pgdata->node_zones;
+
+	/* XXX rethink locking here */
+	mutex_lock(&add_spumem_mutex);
+	ret = __add_pages(zone, start_pfn, nr_pages);
+	mutex_unlock(&add_spumem_mutex);
+
+	return ret;
+}
+
 static void __iomem * __init map_spe_prop(struct device_node *n,
 						 const char *name)
 {
@@ -535,6 +586,8 @@ static void __iomem * __init map_spe_pro
 
 	void *p;
 	int proplen;
+	void* ret = NULL;
+	int err = 0;
 
 	p = get_property(n, name, &proplen);
 	if (proplen != sizeof (struct address_prop))
@@ -546,7 +599,14 @@ static void __iomem * __init map_spe_pro
 	if (strcmp (name, "priv2") == 0 && prop->len < 0x20000)
 		return ioremap(prop->address, 0x20000);
 
-	return ioremap(prop->address, prop->len);
+	err = cell_spuprop_present(n, name);
+	if (err && (err != -EEXIST))
+		goto out;
+
+	ret = ioremap(prop->address, prop->len);
+
+ out:
+	return ret;
 }
 
 static void spu_unmap(struct spu *spu)
@@ -606,17 +666,6 @@ out:
 	return ret;
 }
 
-static int __init find_spu_node_id(struct device_node *spe)
-{
-	unsigned int *id;
-	struct device_node *cpu;
-
-	cpu = spe->parent->parent;
-	id = (unsigned int *)get_property(cpu, "node-id", NULL);
-
-	return id ? *id : 0;
-}
-
 static int __init create_spu(struct device_node *spe)
 {
 	struct spu *spu;
Index: linus-2.6/mm/memory_hotplug.c
===================================================================
--- linus-2.6.orig/mm/memory_hotplug.c
+++ linus-2.6/mm/memory_hotplug.c
@@ -69,12 +69,16 @@ int __add_pages(struct zone *zone, unsig
 	for (i = 0; i < nr_pages; i += PAGES_PER_SECTION) {
 		err = __add_section(zone, phys_start_pfn + i);
 
-		if (err)
+		/* We want to keep adding the rest of the
+		 * sections if the first ones already exist
+		 */
+		if (err && (err != -EEXIST))
 			break;
 	}
 
 	return err;
 }
+EXPORT_SYMBOL_GPL(__add_pages);
 
 static void grow_zone_span(struct zone *zone,
 		unsigned long start_pfn, unsigned long end_pfn)
Index: linus-2.6/mm/sparse.c
===================================================================
--- linus-2.6.orig/mm/sparse.c
+++ linus-2.6/mm/sparse.c
@@ -26,13 +26,16 @@ struct mem_section mem_section[NR_SECTIO
 EXPORT_SYMBOL(mem_section);
 
 #ifdef CONFIG_SPARSEMEM_EXTREME
-static struct mem_section *sparse_index_alloc(int nid)
+static struct mem_section *sparse_index_alloc(int nid, int late)
 {
 	struct mem_section *section = NULL;
 	unsigned long array_size = SECTIONS_PER_ROOT *
 				   sizeof(struct mem_section);
 
-	section = alloc_bootmem_node(NODE_DATA(nid), array_size);
+	if (late)
+		section = kmalloc_node(array_size, GFP_KERNEL, nid);
+	else
+		section = alloc_bootmem_node(NODE_DATA(nid), array_size);
 
 	if (section)
 		memset(section, 0, array_size);
@@ -40,7 +43,7 @@ static struct mem_section *sparse_index_
 	return section;
 }
 
-static int sparse_index_init(unsigned long section_nr, int nid)
+static int sparse_index_init(unsigned long section_nr, int nid, int late)
 {
 	static spinlock_t index_init_lock = SPIN_LOCK_UNLOCKED;
 	unsigned long root = SECTION_NR_TO_ROOT(section_nr);
@@ -50,7 +53,7 @@ static int sparse_index_init(unsigned lo
 	if (mem_section[root])
 		return -EEXIST;
 
-	section = sparse_index_alloc(nid);
+	section = sparse_index_alloc(nid, late);
 	/*
 	 * This lock keeps two different sections from
 	 * reallocating for the same index
@@ -68,7 +71,8 @@ out:
 	return ret;
 }
 #else /* !SPARSEMEM_EXTREME */
-static inline int sparse_index_init(unsigned long section_nr, int nid)
+static inline int sparse_index_init(unsigned long section_nr, int nid,
+				    int late)
 {
 	return 0;
 }
@@ -109,7 +113,7 @@ void memory_present(int nid, unsigned lo
 		unsigned long section = pfn_to_section_nr(pfn);
 		struct mem_section *ms;
 
-		sparse_index_init(section, nid);
+		sparse_index_init(section, nid, 0);
 
 		ms = __nr_to_section(section);
 		if (!ms->section_mem_map)
@@ -250,8 +254,8 @@ void sparse_init(void)
 
 /*
  * returns the number of sections whose mem_maps were properly
- * set.  If this is <=0, then that means that the passed-in
- * map was not consumed and must be freed.
+ * set.  If the return is less than 0 the section didn't get added
+ * -EEXIST is if it already is there
  */
 int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
 			   int nr_pages)
@@ -267,7 +271,7 @@ int sparse_add_one_section(struct zone *
 	 * no locking for this, because it does its own
 	 * plus, it does a kmalloc
 	 */
-	sparse_index_init(section_nr, pgdat->node_id);
+	sparse_index_init(section_nr, pgdat->node_id, 1);
 	memmap = __kmalloc_section_memmap(nr_pages);
 
 	pgdat_resize_lock(pgdat, &flags);
@@ -281,9 +285,10 @@ int sparse_add_one_section(struct zone *
 
 	ret = sparse_init_one_section(ms, section_nr, memmap);
 
+out:
 	if (ret <= 0)
 		__kfree_section_memmap(memmap, nr_pages);
-out:
+
 	pgdat_resize_unlock(pgdat, &flags);
 	return ret;
 }
Index: linus-2.6/arch/powerpc/platforms/cell/Kconfig
===================================================================
--- linus-2.6.orig/arch/powerpc/platforms/cell/Kconfig
+++ linus-2.6/arch/powerpc/platforms/cell/Kconfig
@@ -22,6 +22,7 @@ config SPU_FS
 
 config SPUFS_MMAP
 	bool
+	depends on SPU_FS && SPARSEMEM && MEMORY_HOTPLUG
 	depends on SPU_FS && SPARSEMEM
 	default y
 



More information about the cbe-oss-dev mailing list