NUMA memory block size

Olof Johansson olof at austin.ibm.com
Sun Apr 4 04:13:33 EST 2004


On Sat, 3 Apr 2004, Anton Blanchard wrote:

> > 1. Why do we use a full int for node ID? It's quite unlikely that we will
> > have 2 billion nodes anytime soon. Current limit is 16. :-) Switching to a
> > char instead of int might be worth it.
>
> Agreed, we should switch it to an unsigned char.

If we switch it to a signed char, sign extension back to int will work
properly for DEBUG_NUMA cases where entries are initialized to -1. 127
nodes is still plenty.

> > 2. A lmb_alloc() approach has the benefit of only allocating as much table
> > as we actually have physical memory in the system. At least this way we'd
> > only allocate in proportion to how much memory the machine has. 1MB table
> > for a 2TB machine isn't too bad. On a 128GB system, size will be the same
> > as before (32KB).
>
> This does add another load:
>
>        ld 10,.LC1-.LCTOC1(30)	/* numa_cpu_lookup_table */
>        sldi 8,8,2
>        li 0,17024
>        ld 7,.LC2-.LCTOC1(30)
> here ->ld 11,0(10)		/* *numa_cpu_lookup_table */
>        lwax 9,8,11
>
> But always allocating 128kB (2TB, 16MB segments, char for a node id)
> seems excessive so I like your idea of dynamically allocating the
> bitmap.

Well, if it makes a visible impact on performance numbers we can always
fall back to 128KB array.

New (final?) patch below.

-Olof

===== include/asm-ppc64/mmzone.h 1.18 vs edited =====
--- 1.18/include/asm-ppc64/mmzone.h	Fri Mar 12 21:18:15 2004
+++ edited/include/asm-ppc64/mmzone.h	Sat Apr  3 11:42:31 2004
@@ -19,13 +19,13 @@
  */

 extern int numa_cpu_lookup_table[];
-extern int numa_memory_lookup_table[];
+extern char *numa_memory_lookup_table;
 extern cpumask_t numa_cpumask_lookup_table[];
 extern int nr_cpus_in_node[];

 #define MAX_MEMORY (1UL << 41)
-/* 256MB regions */
-#define MEMORY_INCREMENT_SHIFT 28
+/* 16MB regions */
+#define MEMORY_INCREMENT_SHIFT 24
 #define MEMORY_INCREMENT (1UL << MEMORY_INCREMENT_SHIFT)

 /* NUMA debugging, will not work on a DLPAR machine */
===== arch/ppc64/mm/numa.c 1.30 vs edited =====
--- 1.30/arch/ppc64/mm/numa.c	Sat Mar 20 18:59:12 2004
+++ edited/arch/ppc64/mm/numa.c	Sat Apr  3 11:42:10 2004
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <asm/lmb.h>
 #include <asm/machdep.h>
+#include <asm/abs_addr.h>

 #if 1
 #define dbg(args...) udbg_printf(args)
@@ -31,9 +32,7 @@

 int numa_cpu_lookup_table[NR_CPUS] = { [ 0 ... (NR_CPUS - 1)] =
 	ARRAY_INITIALISER};
-int numa_memory_lookup_table[MAX_MEMORY >> MEMORY_INCREMENT_SHIFT] =
-	{ [ 0 ... ((MAX_MEMORY >> MEMORY_INCREMENT_SHIFT) - 1)] =
-	ARRAY_INITIALISER};
+char *numa_memory_lookup_table;
 cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES];
 int nr_cpus_in_node[MAX_NUMNODES] = { [0 ... (MAX_NUMNODES -1)] = 0};

@@ -65,12 +64,20 @@
 	int *memory_associativity;
 	int depth;
 	int max_domain = 0;
+	long entries = lmb_end_of_DRAM() >> MEMORY_INCREMENT_SHIFT;
+	long i;

 	if (strstr(saved_command_line, "numa=off")) {
 		printk(KERN_WARNING "NUMA disabled by user\n");
 		return -1;
 	}

+	numa_memory_lookup_table =
+		(int *)abs_to_virt(lmb_alloc(entries * sizeof(char), 1));
+
+	for (i = 0; i < entries ; i++)
+		numa_memory_lookup_table[i] = ARRAY_INITIALISER;
+
 	cpu = of_find_node_by_type(NULL, "cpu");
 	if (!cpu)
 		goto err;
@@ -243,6 +250,14 @@
 	       top_of_ram, total_ram);
 	printk(KERN_INFO "Memory hole size: %ldMB\n",
 	       (top_of_ram - total_ram) >> 20);
+
+	if (!numa_memory_lookup_table) {
+		long entries = top_of_ram >> MEMORY_INCREMENT_SHIFT;
+		numa_memory_lookup_table =
+			(int *)abs_to_virt(lmb_alloc(entries * sizeof(char), 1));
+		for (i = 0; i < entries ; i++)
+			numa_memory_lookup_table[i] = ARRAY_INITIALISER;
+	}

 	for (i = 0; i < NR_CPUS; i++)
 		map_cpu_to_node(i, 0);


** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/





More information about the Linuxppc64-dev mailing list