[PATCH 3/3] powerpc/smp: Use existing L2 cache_map cpumask to find L3 cache siblings

Gautham R Shenoy ego at linux.vnet.ibm.com
Tue Jul 20 16:42:26 AEST 2021


Hi Parth,

Sorry for the late review.

On Tue, Jun 15, 2021 at 12:38:04PM +0530, Parth Shah wrote:
> On POWER10 systems, the "ibm,thread-groups" property "2" indicates the cpus
> in thread-group share both L2 and L3 caches. Hence, use cache_property = 2
> itself to find both the L2 and L3 cache siblings.
> Hence, rename existing macros to detect if the cache property is for L2 or
> L3 and use the L2 cache map itself to find the presence of L3 siblings.
> 
> Signed-off-by: Parth Shah <parth at linux.ibm.com>
> ---
>  arch/powerpc/include/asm/smp.h  |  2 ++
>  arch/powerpc/kernel/cacheinfo.c |  3 +++
>  arch/powerpc/kernel/smp.c       | 20 +++++++++++++++-----
>  3 files changed, 20 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
> index 1259040cc3a4..55082d343bd2 100644
> --- a/arch/powerpc/include/asm/smp.h
> +++ b/arch/powerpc/include/asm/smp.h
> @@ -144,6 +144,7 @@ extern int cpu_to_core_id(int cpu);
> 
>  extern bool has_big_cores;
>  extern bool thread_group_shares_l2;
> +extern bool thread_group_shares_l3;
> 
>  #define cpu_smt_mask cpu_smt_mask
>  #ifdef CONFIG_SCHED_SMT
> @@ -198,6 +199,7 @@ extern void __cpu_die(unsigned int cpu);
>  #define hard_smp_processor_id()		get_hard_smp_processor_id(0)
>  #define smp_setup_cpu_maps()
>  #define thread_group_shares_l2  0
> +#define thread_group_shares_l3	0
>  static inline void inhibit_secondary_onlining(void) {}
>  static inline void uninhibit_secondary_onlining(void) {}
>  static inline const struct cpumask *cpu_sibling_mask(int cpu)
> diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
> index 20d91693eac1..378ae20d05a9 100644
> --- a/arch/powerpc/kernel/cacheinfo.c
> +++ b/arch/powerpc/kernel/cacheinfo.c
> @@ -469,6 +469,9 @@ static int get_group_id(unsigned int cpu_id, int level)
>  	else if (thread_group_shares_l2 && level == 2)
>  		return cpumask_first(per_cpu(thread_group_l2_cache_map,
>  					     cpu_id));
> +	else if (thread_group_shares_l3 && level == 3)
> +		return cpumask_first(per_cpu(thread_group_l2_cache_map,
> +					     cpu_id));

We should either rename thread_group_l2_cache_map as
thread_group_l2_l3_cache_map or we should create a separate
thread_group_l3_cache_map. I prefer the latter approach since it makes
the code consistent. 

Otherwise, the patch looks good to me.

--
Thanks and Regards
gautham.

>  	return -1;
>  }
> 
> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index a34877257f2d..d0c70fcd0068 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -78,6 +78,7 @@ struct task_struct *secondary_current;
>  bool has_big_cores;
>  bool coregroup_enabled;
>  bool thread_group_shares_l2;
> +bool thread_group_shares_l3;
> 
>  DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
>  DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
> @@ -101,7 +102,7 @@ enum {
> 
>  #define MAX_THREAD_LIST_SIZE	8
>  #define THREAD_GROUP_SHARE_L1   1
> -#define THREAD_GROUP_SHARE_L2   2
> +#define THREAD_GROUP_SHARE_L2_L3 2
>  struct thread_groups {
>  	unsigned int property;
>  	unsigned int nr_groups;
> @@ -887,9 +888,16 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property)
>  	cpumask_var_t *mask = NULL;
> 
>  	if (cache_property != THREAD_GROUP_SHARE_L1 &&
> -	    cache_property != THREAD_GROUP_SHARE_L2)
> +	    cache_property != THREAD_GROUP_SHARE_L2_L3)
>  		return -EINVAL;
> 
> +	/*
> +	 * On P10 fused-core system, the L3 cache is shared between threads of a
> +	 * small core only, but the "ibm,thread-groups" property is indicated as
> +	 * "2" only which is interpreted as the thread-groups sharing both L2
> +	 * and L3 caches. Hence cache_property of THREAD_GROUP_SHARE_L2_L3 is
> +	 * used for both L2 and L3 cache sibling detection.
> +	 */
>  	tg = get_thread_groups(cpu, cache_property, &err);
>  	if (!tg)
>  		return err;
> @@ -903,7 +911,7 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property)
> 
>  	if (cache_property == THREAD_GROUP_SHARE_L1)
>  		mask = &per_cpu(thread_group_l1_cache_map, cpu);
> -	else if (cache_property == THREAD_GROUP_SHARE_L2)
> +	else if (cache_property == THREAD_GROUP_SHARE_L2_L3)
>  		mask = &per_cpu(thread_group_l2_cache_map, cpu);
> 
>  	zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
> @@ -1009,14 +1017,16 @@ static int __init init_big_cores(void)
>  	has_big_cores = true;
> 
>  	for_each_possible_cpu(cpu) {
> -		int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2);
> +		int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2_L3);
> 
>  		if (err)
>  			return err;
>  	}
> 
>  	thread_group_shares_l2 = true;
> -	pr_debug("L2 cache only shared by the threads in the small core\n");
> +	thread_group_shares_l3 = true;
> +	pr_debug("L2/L3 cache only shared by the threads in the small core\n");
> +
>  	return 0;
>  }
> 
> -- 
> 2.26.3
> 


More information about the Linuxppc-dev mailing list