[PATCHv2 3/3] powerpc/smp: Use existing L2 cache_map cpumask to find L3 cache siblings
Gautham R Shenoy
ego at linux.vnet.ibm.com
Fri Jul 30 20:08:20 AEST 2021
On Wed, Jul 28, 2021 at 11:26:07PM +0530, Parth Shah wrote:
> On POWER10 systems, the "ibm,thread-groups" property "2" indicates the cpus
> in thread-group share both L2 and L3 caches. Hence, use cache_property = 2
> itself to find both the L2 and L3 cache siblings.
> Hence, create a new thread_group_l3_cache_map to keep list of L3 siblings,
> but fill the mask using same property "2" array.
This version looks good to me.
Reviewed-by: Gautham R. Shenoy <ego at linux.vnet.ibm.com>
>
> Signed-off-by: Parth Shah <parth at linux.ibm.com>
> ---
> arch/powerpc/include/asm/smp.h | 3 ++
> arch/powerpc/kernel/cacheinfo.c | 3 ++
> arch/powerpc/kernel/smp.c | 66 ++++++++++++++++++++++-----------
> 3 files changed, 51 insertions(+), 21 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
> index 1259040cc3a4..7ef1cd8168a0 100644
> --- a/arch/powerpc/include/asm/smp.h
> +++ b/arch/powerpc/include/asm/smp.h
> @@ -35,6 +35,7 @@ extern int *chip_id_lookup_table;
>
> DECLARE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
> DECLARE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
> +DECLARE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map);
>
> #ifdef CONFIG_SMP
>
> @@ -144,6 +145,7 @@ extern int cpu_to_core_id(int cpu);
>
> extern bool has_big_cores;
> extern bool thread_group_shares_l2;
> +extern bool thread_group_shares_l3;
>
> #define cpu_smt_mask cpu_smt_mask
> #ifdef CONFIG_SCHED_SMT
> @@ -198,6 +200,7 @@ extern void __cpu_die(unsigned int cpu);
> #define hard_smp_processor_id() get_hard_smp_processor_id(0)
> #define smp_setup_cpu_maps()
> #define thread_group_shares_l2 0
> +#define thread_group_shares_l3 0
> static inline void inhibit_secondary_onlining(void) {}
> static inline void uninhibit_secondary_onlining(void) {}
> static inline const struct cpumask *cpu_sibling_mask(int cpu)
> diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
> index 20d91693eac1..cf1be75b7833 100644
> --- a/arch/powerpc/kernel/cacheinfo.c
> +++ b/arch/powerpc/kernel/cacheinfo.c
> @@ -469,6 +469,9 @@ static int get_group_id(unsigned int cpu_id, int level)
> else if (thread_group_shares_l2 && level == 2)
> return cpumask_first(per_cpu(thread_group_l2_cache_map,
> cpu_id));
> + else if (thread_group_shares_l3 && level == 3)
> + return cpumask_first(per_cpu(thread_group_l3_cache_map,
> + cpu_id));
> return -1;
> }
>
> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index a7fcac44a8e2..f2abd88e0c25 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -78,6 +78,7 @@ struct task_struct *secondary_current;
> bool has_big_cores;
> bool coregroup_enabled;
> bool thread_group_shares_l2;
> +bool thread_group_shares_l3;
>
> DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
> DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
> @@ -101,7 +102,7 @@ enum {
>
> #define MAX_THREAD_LIST_SIZE 8
> #define THREAD_GROUP_SHARE_L1 1
> -#define THREAD_GROUP_SHARE_L2 2
> +#define THREAD_GROUP_SHARE_L2_L3 2
> struct thread_groups {
> unsigned int property;
> unsigned int nr_groups;
> @@ -131,6 +132,12 @@ DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
> */
> DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
>
> +/*
> + * On P10, thread_group_l3_cache_map for each CPU is equal to the
> + * thread_group_l2_cache_map
> + */
> +DEFINE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map);
> +
> /* SMP operations for this machine */
> struct smp_ops_t *smp_ops;
>
> @@ -889,19 +896,41 @@ static struct thread_groups *__init get_thread_groups(int cpu,
> return tg;
> }
>
> +static int update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg, int cpu, int cpu_group_start)
> +{
> + int first_thread = cpu_first_thread_sibling(cpu);
> + int i;
> +
> + zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
> +
> + for (i = first_thread; i < first_thread + threads_per_core; i++) {
> + int i_group_start = get_cpu_thread_group_start(i, tg);
> +
> + if (unlikely(i_group_start == -1)) {
> + WARN_ON_ONCE(1);
> + return -ENODATA;
> + }
> +
> + if (i_group_start == cpu_group_start)
> + cpumask_set_cpu(i, *mask);
> + }
> +
> + return 0;
> +}
> +
> static int __init init_thread_group_cache_map(int cpu, int cache_property)
>
> {
> - int first_thread = cpu_first_thread_sibling(cpu);
> - int i, cpu_group_start = -1, err = 0;
> + int cpu_group_start = -1, err = 0;
> struct thread_groups *tg = NULL;
> cpumask_var_t *mask = NULL;
>
> if (cache_property != THREAD_GROUP_SHARE_L1 &&
> - cache_property != THREAD_GROUP_SHARE_L2)
> + cache_property != THREAD_GROUP_SHARE_L2_L3)
> return -EINVAL;
>
> tg = get_thread_groups(cpu, cache_property, &err);
> +
> if (!tg)
> return err;
>
> @@ -912,25 +941,18 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property)
> return -ENODATA;
> }
>
> - if (cache_property == THREAD_GROUP_SHARE_L1)
> + if (cache_property == THREAD_GROUP_SHARE_L1) {
> mask = &per_cpu(thread_group_l1_cache_map, cpu);
> - else if (cache_property == THREAD_GROUP_SHARE_L2)
> + update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
> + }
> + else if (cache_property == THREAD_GROUP_SHARE_L2_L3) {
> mask = &per_cpu(thread_group_l2_cache_map, cpu);
> -
> - zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
> -
> - for (i = first_thread; i < first_thread + threads_per_core; i++) {
> - int i_group_start = get_cpu_thread_group_start(i, tg);
> -
> - if (unlikely(i_group_start == -1)) {
> - WARN_ON_ONCE(1);
> - return -ENODATA;
> - }
> -
> - if (i_group_start == cpu_group_start)
> - cpumask_set_cpu(i, *mask);
> + update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
> + mask = &per_cpu(thread_group_l3_cache_map, cpu);
> + update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
> }
>
> +
> return 0;
> }
>
> @@ -1020,14 +1042,16 @@ static int __init init_big_cores(void)
> has_big_cores = true;
>
> for_each_possible_cpu(cpu) {
> - int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2);
> + int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2_L3);
>
> if (err)
> return err;
> }
>
> thread_group_shares_l2 = true;
> - pr_debug("L2 cache only shared by the threads in the small core\n");
> + thread_group_shares_l3 = true;
> + pr_debug("L2/L3 cache only shared by the threads in the small core\n");
> +
> return 0;
> }
>
> --
> 2.26.3
>
More information about the Linuxppc-dev
mailing list