[PATCH v3 5/5] powerpc/numa: Fill distance_lookup_table for offline nodes

Srikar Dronamraju srikar at linux.vnet.ibm.com
Thu Aug 26 20:05:21 AEST 2021


Scheduler expects unique number of node distances to be available at
boot. It uses node distance to calculate this unique node distances.
On POWER, node distances for offline nodes is not available. However,
POWER already knows unique possible node distances. Fake the offline
node's distance_lookup_table entries so that all possible node
distances are updated.

However this only needs to be done if the number of unique node
distances that can be computed for online nodes is less than the
number of possible unique node distances as represented by
distance_ref_points_depth. When the node is actually onlined,
distance_lookup_table will be updated with actual entries.

Cc: linuxppc-dev at lists.ozlabs.org
Cc: Nathan Lynch <nathanl at linux.ibm.com>
Cc: Michael Ellerman <mpe at ellerman.id.au>
Cc: Ingo Molnar <mingo at kernel.org>
Cc: Peter Zijlstra <peterz at infradead.org>
Cc: Valentin Schneider <valentin.schneider at arm.com>
Cc: Gautham R Shenoy <ego at linux.vnet.ibm.com>
Cc: Vincent Guittot <vincent.guittot at linaro.org>
Cc: Geetika Moolchandani <Geetika.Moolchandani1 at ibm.com>
Cc: Laurent Dufour <ldufour at linux.ibm.com>
Cc: kernel test robot <lkp at intel.com>
Signed-off-by: Srikar Dronamraju <srikar at linux.vnet.ibm.com>
---
 arch/powerpc/mm/numa.c | 70 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

Changelog:
v1: https://lore.kernel.org/linuxppc-dev/20210701041552.112072-3-srikar@linux.vnet.ibm.com/t/#u
[ Fixed a missing prototype warning Reported-by: kernel test robot <lkp at intel.com>]

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 87ade2f56f45..afa2ede4ac53 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -849,6 +849,75 @@ void __init dump_numa_cpu_topology(void)
 	}
 }
 
+/*
+ * Scheduler expects unique number of node distances to be available at
+ * boot. It uses node distance to calculate this unique node distances. On
+ * POWER, node distances for offline nodes is not available. However, POWER
+ * already knows unique possible node distances. Fake the offline node's
+ * distance_lookup_table entries so that all possible node distances are
+ * updated.
+ */
+static void __init fake_update_distance_lookup_table(void)
+{
+	unsigned long distance_map;
+	int i, nr_levels, nr_depth, node;
+
+	if (!numa_enabled)
+		return;
+
+	if (!form1_affinity)
+		return;
+
+	/*
+	 * distance_ref_points_depth lists the unique numa domains
+	 * available. However it ignore LOCAL_DISTANCE. So add +1
+	 * to get the actual number of unique distances.
+	 */
+	nr_depth = distance_ref_points_depth + 1;
+
+	WARN_ON(nr_depth > sizeof(distance_map));
+
+	bitmap_zero(&distance_map, nr_depth);
+	bitmap_set(&distance_map, 0, 1);
+
+	for_each_online_node(node) {
+		int nd, distance = LOCAL_DISTANCE;
+
+		if (node == first_online_node)
+			continue;
+
+		nd = __node_distance(node, first_online_node);
+		for (i = 0; i < nr_depth; i++, distance *= 2) {
+			if (distance == nd) {
+				bitmap_set(&distance_map, i, 1);
+				break;
+			}
+		}
+		nr_levels = bitmap_weight(&distance_map, nr_depth);
+		if (nr_levels == nr_depth)
+			return;
+	}
+
+	for_each_node(node) {
+		if (node_online(node))
+			continue;
+
+		i = find_first_zero_bit(&distance_map, nr_depth);
+		if (i >= nr_depth || i == 0) {
+			pr_warn("Levels(%d) not matching levels(%d)", nr_levels, nr_depth);
+			return;
+		}
+
+		bitmap_set(&distance_map, i, 1);
+		while (i--)
+			distance_lookup_table[node][i] = node;
+
+		nr_levels = bitmap_weight(&distance_map, nr_depth);
+		if (nr_levels == nr_depth)
+			return;
+	}
+}
+
 /* Initialize NODE_DATA for a node on the local memory */
 static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
 {
@@ -964,6 +1033,7 @@ void __init mem_topology_setup(void)
 		 */
 		numa_setup_cpu(cpu);
 	}
+	fake_update_distance_lookup_table();
 }
 
 void __init initmem_init(void)
-- 
2.18.2



More information about the Linuxppc-dev mailing list