NUMA memory block size
Olof Johansson
olof at austin.ibm.com
Sun Apr 4 04:13:33 EST 2004
On Sat, 3 Apr 2004, Anton Blanchard wrote:
> > 1. Why do we use a full int for node ID? It's quite unlikely that we will
> > have 2 billion nodes anytime soon. Current limit is 16. :-) Switching to a
> > char instead of int might be worth it.
>
> Agreed, we should switch it to an unsigned char.
If we switch it to a signed char, sign extension back to int will work
properly for DEBUG_NUMA cases where entries are initialized to -1. 127
nodes is still plenty.
> > 2. A lmb_alloc() approach has the benefit of only allocating as much table
> > as we actually have physical memory in the system. At least this way we'd
> > only allocate in proportion to how much memory the machine has. 1MB table
> > for a 2TB machine isn't too bad. On a 128GB system, size will be the same
> > as before (32KB).
>
> This does add another load:
>
> ld 10,.LC1-.LCTOC1(30) /* numa_cpu_lookup_table */
> sldi 8,8,2
> li 0,17024
> ld 7,.LC2-.LCTOC1(30)
> here ->ld 11,0(10) /* *numa_cpu_lookup_table */
> lwax 9,8,11
>
> But always allocating 128kB (2TB, 16MB segments, char for a node id)
> seems excessive so I like your idea of dynamically allocating the
> bitmap.
Well, if it makes a visible impact on performance numbers we can always
fall back to 128KB array.
New (final?) patch below.
-Olof
===== include/asm-ppc64/mmzone.h 1.18 vs edited =====
--- 1.18/include/asm-ppc64/mmzone.h Fri Mar 12 21:18:15 2004
+++ edited/include/asm-ppc64/mmzone.h Sat Apr 3 11:42:31 2004
@@ -19,13 +19,13 @@
*/
extern int numa_cpu_lookup_table[];
-extern int numa_memory_lookup_table[];
+extern char *numa_memory_lookup_table;
extern cpumask_t numa_cpumask_lookup_table[];
extern int nr_cpus_in_node[];
#define MAX_MEMORY (1UL << 41)
-/* 256MB regions */
-#define MEMORY_INCREMENT_SHIFT 28
+/* 16MB regions */
+#define MEMORY_INCREMENT_SHIFT 24
#define MEMORY_INCREMENT (1UL << MEMORY_INCREMENT_SHIFT)
/* NUMA debugging, will not work on a DLPAR machine */
===== arch/ppc64/mm/numa.c 1.30 vs edited =====
--- 1.30/arch/ppc64/mm/numa.c Sat Mar 20 18:59:12 2004
+++ edited/arch/ppc64/mm/numa.c Sat Apr 3 11:42:10 2004
@@ -16,6 +16,7 @@
#include <linux/module.h>
#include <asm/lmb.h>
#include <asm/machdep.h>
+#include <asm/abs_addr.h>
#if 1
#define dbg(args...) udbg_printf(args)
@@ -31,9 +32,7 @@
int numa_cpu_lookup_table[NR_CPUS] = { [ 0 ... (NR_CPUS - 1)] =
ARRAY_INITIALISER};
-int numa_memory_lookup_table[MAX_MEMORY >> MEMORY_INCREMENT_SHIFT] =
- { [ 0 ... ((MAX_MEMORY >> MEMORY_INCREMENT_SHIFT) - 1)] =
- ARRAY_INITIALISER};
+char *numa_memory_lookup_table;
cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES];
int nr_cpus_in_node[MAX_NUMNODES] = { [0 ... (MAX_NUMNODES -1)] = 0};
@@ -65,12 +64,20 @@
int *memory_associativity;
int depth;
int max_domain = 0;
+ long entries = lmb_end_of_DRAM() >> MEMORY_INCREMENT_SHIFT;
+ long i;
if (strstr(saved_command_line, "numa=off")) {
printk(KERN_WARNING "NUMA disabled by user\n");
return -1;
}
+ numa_memory_lookup_table =
+ (int *)abs_to_virt(lmb_alloc(entries * sizeof(char), 1));
+
+ for (i = 0; i < entries ; i++)
+ numa_memory_lookup_table[i] = ARRAY_INITIALISER;
+
cpu = of_find_node_by_type(NULL, "cpu");
if (!cpu)
goto err;
@@ -243,6 +250,14 @@
top_of_ram, total_ram);
printk(KERN_INFO "Memory hole size: %ldMB\n",
(top_of_ram - total_ram) >> 20);
+
+ if (!numa_memory_lookup_table) {
+ long entries = top_of_ram >> MEMORY_INCREMENT_SHIFT;
+ numa_memory_lookup_table =
+ (int *)abs_to_virt(lmb_alloc(entries * sizeof(char), 1));
+ for (i = 0; i < entries ; i++)
+ numa_memory_lookup_table[i] = ARRAY_INITIALISER;
+ }
for (i = 0; i < NR_CPUS; i++)
map_cpu_to_node(i, 0);
** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/
More information about the Linuxppc64-dev
mailing list