[PATCH 2/2] update cpu-to-node mappings during dlpar

Nathan Lynch ntl at pobox.com
Fri May 6 08:15:39 EST 2005


This fixes up some fallout from the preceding patch.

The sched domains reinitialization code which runs at cpu hotplug time
expects the cpu-to-node mappings to have been set up earlier than we
were doing.  Seems that things such as cpu_to_node() are expected to
return sane values regardless of whether the cpu is online.  It makes
sense, I suppose:  we should be updating the cpu<->node mappings when
the topology changes, instead of keying on the state of cpus.

Map logical cpus to nodes when a processor is added to the system;
tear down the mapping(s) when a processor is going away.  Get rid of
the numa cpu hotplug notifier stuff.

 arch/ppc64/kernel/pSeries_smp.c |    3 +
 arch/ppc64/mm/numa.c            |   61 +++++++++++++---------------------------
 include/asm-ppc64/topology.h    |   13 ++++++++
 3 files changed, 37 insertions(+), 40 deletions(-)

Signed-off-by: Nathan Lynch <ntl at pobox.com>

Index: linux-2.6.12-rc3-mm3/arch/ppc64/kernel/pSeries_smp.c
===================================================================
--- linux-2.6.12-rc3-mm3.orig/arch/ppc64/kernel/pSeries_smp.c
+++ linux-2.6.12-rc3-mm3/arch/ppc64/kernel/pSeries_smp.c
@@ -45,6 +45,7 @@
 #include <asm/rtas.h>
 #include <asm/plpar_wrappers.h>
 #include <asm/pSeries_reconfig.h>
+#include <asm/topology.h>
 
 #include "mpic.h"
 
@@ -187,6 +188,7 @@ static int pSeries_add_processor(struct 
 		BUG_ON(cpu_isset(cpu, cpu_present_map));
 		cpu_set(cpu, cpu_present_map);
 		set_hard_smp_processor_id(cpu, *intserv++);
+		numa_setup_cpu(cpu, np);
 	}
 	err = 0;
 out_unlock:
@@ -218,6 +220,7 @@ static void pSeries_remove_processor(str
 				continue;
 			BUG_ON(cpu_online(cpu));
 			cpu_clear(cpu, cpu_present_map);
+			numa_teardown_cpu(cpu);
 			set_hard_smp_processor_id(cpu, -1);
 			break;
 		}
Index: linux-2.6.12-rc3-mm3/arch/ppc64/mm/numa.c
===================================================================
--- linux-2.6.12-rc3-mm3.orig/arch/ppc64/mm/numa.c
+++ linux-2.6.12-rc3-mm3/arch/ppc64/mm/numa.c
@@ -136,6 +136,11 @@ static void unmap_cpu_from_node(unsigned
 		       cpu, node);
 	}
 }
+#else
+static void unmap_cpu_from_node(unsigned long cpu)
+{
+	return;
+}
 #endif /* CONFIG_HOTPLUG_CPU */
 
 static struct device_node * __devinit find_cpu_node(unsigned int cpu)
@@ -284,19 +289,25 @@ static unsigned long read_n_cells(int n,
 }
 
 /*
- * Figure out to which node a cpu belongs and stick it there.
- * Return the id of the node used.
+ * Figure out to which node a cpu belongs and stick it there.  Return
+ * the id of the node used.  We allow the caller to optionally pass
+ * the device_node which corresponds to the logical cpu, since at
+ * DLPAR time the new node may not have been added to the device tree
+ * yet.
  */
-static int numa_setup_cpu(unsigned long lcpu)
+int numa_setup_cpu(unsigned long lcpu, struct device_node *np)
 {
 	int nid = 0, numa_domain = INVALID_DOMAIN;
-	struct device_node *cpu = find_cpu_node(lcpu);
+	struct device_node *cpu = np ? of_node_get(np) : find_cpu_node(lcpu);
 
 	if (!cpu) {
 		WARN_ON(1);
 		goto out;
 	}
 
+	if (!numa_enabled)
+		goto out;
+
 	numa_domain = of_node_numa_domain(cpu);
 
 	if (numa_domain != INVALID_DOMAIN)
@@ -312,32 +323,10 @@ out:
 	return nid;
 }
 
-static int cpu_numa_callback(struct notifier_block *nfb,
-			     unsigned long action,
-			     void *hcpu)
-{
-	unsigned long lcpu = (unsigned long)hcpu;
-	int ret = NOTIFY_DONE;
-
-	switch (action) {
-	case CPU_UP_PREPARE:
-		if (min_common_depth == -1 || !numa_enabled)
-			map_cpu_to_node(lcpu, 0);
-		else
-			numa_setup_cpu(lcpu);
-		ret = NOTIFY_OK;
-		break;
-#ifdef CONFIG_HOTPLUG_CPU
-	case CPU_DEAD:
-	case CPU_UP_CANCELED:
-		unmap_cpu_from_node(lcpu);
-		ret = NOTIFY_OK;
-		break;
-#endif
-	}
-	return ret;
+void numa_teardown_cpu(unsigned long lcpu)
+{
+	unmap_cpu_from_node(lcpu);
 }
-
 /*
  * Check and possibly modify a memory region to enforce the memory limit.
  *
@@ -373,7 +362,7 @@ static int __init parse_numa_properties(
 	struct device_node *memory = NULL;
 	int addr_cells, size_cells;
 	long entries = lmb_end_of_DRAM() >> MEMORY_INCREMENT_SHIFT;
-	unsigned long i;
+	unsigned long i, lcpu;
 
 	if (numa_enabled == 0) {
 		printk(KERN_WARNING "NUMA disabled by user\n");
@@ -482,10 +471,8 @@ new_range:
 			establish_domain_mapping(domain);
 	}
 
-	/* Secondary logical cpus are associated with nids later in
-	 * boot, but we need to explicitly set up the boot cpu.
-	 */
-	numa_setup_cpu(boot_cpuid);
+	for_each_present_cpu(lcpu)
+		numa_setup_cpu(lcpu, NULL);
 
 	return 0;
 }
@@ -602,10 +589,6 @@ void __init do_init_bootmem(void)
 	int nid;
 	int addr_cells, size_cells;
 	struct device_node *memory = NULL;
-	static struct notifier_block ppc64_numa_nb = {
-		.notifier_call = cpu_numa_callback,
-		.priority = 1 /* Must run before sched domains notifier. */
-	};
 
 	min_low_pfn = 0;
 	max_low_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT;
@@ -616,8 +599,6 @@ void __init do_init_bootmem(void)
 	else
 		dump_numa_topology();
 
-	register_cpu_notifier(&ppc64_numa_nb);
-
 	for_each_online_node(nid) {
 		unsigned long start_paddr, end_paddr;
 		int i;
Index: linux-2.6.12-rc3-mm3/include/asm-ppc64/topology.h
===================================================================
--- linux-2.6.12-rc3-mm3.orig/include/asm-ppc64/topology.h
+++ linux-2.6.12-rc3-mm3/include/asm-ppc64/topology.h
@@ -4,6 +4,8 @@
 #include <linux/config.h>
 #include <asm/mmzone.h>
 
+struct device_node; /* for numa_setup_cpu() */
+
 #ifdef CONFIG_NUMA
 
 static inline int cpu_to_node(int cpu)
@@ -51,10 +53,21 @@ static inline int node_to_first_cpu(int 
 	.nr_balance_failed	= 0,			\
 }
 
+int numa_setup_cpu(unsigned long lcpu, struct device_node *);
+void numa_teardown_cpu(unsigned long lcpu);
 #else /* !CONFIG_NUMA */
 
 #include <asm-generic/topology.h>
 
+static int inline numa_setup_cpu(unsigned long lcpu, struct device_node *np)
+{
+	return 0;
+}
+
+static void inline numa_teardown_cpu(unsigned long lcpu)
+{
+	return;
+}
 #endif /* CONFIG_NUMA */
 
 #endif /* _ASM_PPC64_TOPOLOGY_H */



More information about the Linuxppc64-dev mailing list