[PATCH 3/3] powerpc/smp: Use existing L2 cache_map cpumask to find L3 cache siblings

Parth Shah parth at linux.ibm.com
Tue Jun 15 17:08:04 AEST 2021


On POWER10 systems, the "ibm,thread-groups" property "2" indicates the cpus
in thread-group share both L2 and L3 caches. Hence, use cache_property = 2
itself to find both the L2 and L3 cache siblings.
Hence, rename existing macros to detect if the cache property is for L2 or
L3 and use the L2 cache map itself to find the presence of L3 siblings.

Signed-off-by: Parth Shah <parth at linux.ibm.com>
---
 arch/powerpc/include/asm/smp.h  |  2 ++
 arch/powerpc/kernel/cacheinfo.c |  3 +++
 arch/powerpc/kernel/smp.c       | 20 +++++++++++++++-----
 3 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 1259040cc3a4..55082d343bd2 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -144,6 +144,7 @@ extern int cpu_to_core_id(int cpu);
 
 extern bool has_big_cores;
 extern bool thread_group_shares_l2;
+extern bool thread_group_shares_l3;
 
 #define cpu_smt_mask cpu_smt_mask
 #ifdef CONFIG_SCHED_SMT
@@ -198,6 +199,7 @@ extern void __cpu_die(unsigned int cpu);
 #define hard_smp_processor_id()		get_hard_smp_processor_id(0)
 #define smp_setup_cpu_maps()
 #define thread_group_shares_l2  0
+#define thread_group_shares_l3	0
 static inline void inhibit_secondary_onlining(void) {}
 static inline void uninhibit_secondary_onlining(void) {}
 static inline const struct cpumask *cpu_sibling_mask(int cpu)
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index 20d91693eac1..378ae20d05a9 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -469,6 +469,9 @@ static int get_group_id(unsigned int cpu_id, int level)
 	else if (thread_group_shares_l2 && level == 2)
 		return cpumask_first(per_cpu(thread_group_l2_cache_map,
 					     cpu_id));
+	else if (thread_group_shares_l3 && level == 3)
+		return cpumask_first(per_cpu(thread_group_l2_cache_map,
+					     cpu_id));
 	return -1;
 }
 
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index a34877257f2d..d0c70fcd0068 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -78,6 +78,7 @@ struct task_struct *secondary_current;
 bool has_big_cores;
 bool coregroup_enabled;
 bool thread_group_shares_l2;
+bool thread_group_shares_l3;
 
 DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
 DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
@@ -101,7 +102,7 @@ enum {
 
 #define MAX_THREAD_LIST_SIZE	8
 #define THREAD_GROUP_SHARE_L1   1
-#define THREAD_GROUP_SHARE_L2   2
+#define THREAD_GROUP_SHARE_L2_L3 2
 struct thread_groups {
 	unsigned int property;
 	unsigned int nr_groups;
@@ -887,9 +888,16 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property)
 	cpumask_var_t *mask = NULL;
 
 	if (cache_property != THREAD_GROUP_SHARE_L1 &&
-	    cache_property != THREAD_GROUP_SHARE_L2)
+	    cache_property != THREAD_GROUP_SHARE_L2_L3)
 		return -EINVAL;
 
+	/*
+	 * On P10 fused-core system, the L3 cache is shared between threads of a
+	 * small core only, but the "ibm,thread-groups" property is indicated as
+	 * "2" only which is interpreted as the thread-groups sharing both L2
+	 * and L3 caches. Hence cache_property of THREAD_GROUP_SHARE_L2_L3 is
+	 * used for both L2 and L3 cache sibling detection.
+	 */
 	tg = get_thread_groups(cpu, cache_property, &err);
 	if (!tg)
 		return err;
@@ -903,7 +911,7 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property)
 
 	if (cache_property == THREAD_GROUP_SHARE_L1)
 		mask = &per_cpu(thread_group_l1_cache_map, cpu);
-	else if (cache_property == THREAD_GROUP_SHARE_L2)
+	else if (cache_property == THREAD_GROUP_SHARE_L2_L3)
 		mask = &per_cpu(thread_group_l2_cache_map, cpu);
 
 	zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
@@ -1009,14 +1017,16 @@ static int __init init_big_cores(void)
 	has_big_cores = true;
 
 	for_each_possible_cpu(cpu) {
-		int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2);
+		int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2_L3);
 
 		if (err)
 			return err;
 	}
 
 	thread_group_shares_l2 = true;
-	pr_debug("L2 cache only shared by the threads in the small core\n");
+	thread_group_shares_l3 = true;
+	pr_debug("L2/L3 cache only shared by the threads in the small core\n");
+
 	return 0;
 }
 
-- 
2.26.3



More information about the Linuxppc-dev mailing list