[PATCH 1/3] powerpc/smp: Parse ibm,thread-groups with multiple properties

Mon Dec 7 23:10:42 AEDT 2020

* Gautham R. Shenoy <ego at linux.vnet.ibm.com> [2020-12-04 10:18:45]:

> From: "Gautham R. Shenoy" <ego at linux.vnet.ibm.com>

<snipped>

> 
>  static int parse_thread_groups(struct device_node *dn,
> -			       struct thread_groups *tg,
> -			       unsigned int property)
> +			       struct thread_groups_list *tglp)
>  {
> -	int i;
> -	u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE];
> +	int i = 0;
> +	u32 *thread_group_array;
>  	u32 *thread_list;
>  	size_t total_threads;
> -	int ret;
> +	int ret = 0, count;
> +	unsigned int property_idx = 0;

NIT:
tglx mentions in one of his recent comments to try keep a reverse fir tree
ordering of variables where possible.

> 
> +	count = of_property_count_u32_elems(dn, "ibm,thread-groups");
> +	thread_group_array = kcalloc(count, sizeof(u32), GFP_KERNEL);
>  	ret = of_property_read_u32_array(dn, "ibm,thread-groups",
> -					 thread_group_array, 3);
> +					 thread_group_array, count);
>  	if (ret)
> -		return ret;
> -
> -	tg->property = thread_group_array[0];
> -	tg->nr_groups = thread_group_array[1];
> -	tg->threads_per_group = thread_group_array[2];
> -	if (tg->property != property ||
> -	    tg->nr_groups < 1 ||
> -	    tg->threads_per_group < 1)
> -		return -ENODATA;
> +		goto out_free;
> 
> -	total_threads = tg->nr_groups * tg->threads_per_group;
> +	while (i < count && property_idx < MAX_THREAD_GROUP_PROPERTIES) {
> +		int j;
> +		struct thread_groups *tg = &tglp->property_tgs[property_idx++];

NIT: same as above.

> 
> -	ret = of_property_read_u32_array(dn, "ibm,thread-groups",
> -					 thread_group_array,
> -					 3 + total_threads);
> -	if (ret)
> -		return ret;
> +		tg->property = thread_group_array[i];
> +		tg->nr_groups = thread_group_array[i + 1];
> +		tg->threads_per_group = thread_group_array[i + 2];
> +		total_threads = tg->nr_groups * tg->threads_per_group;
> +
> +		thread_list = &thread_group_array[i + 3];
> 
> -	thread_list = &thread_group_array[3];
> +		for (j = 0; j < total_threads; j++)
> +			tg->thread_list[j] = thread_list[j];
> +		i = i + 3 + total_threads;

	Can't we simply use memcpy instead?

> +	}
> 
> -	for (i = 0 ; i < total_threads; i++)
> -		tg->thread_list[i] = thread_list[i];
> +	tglp->nr_properties = property_idx;
> 
> -	return 0;
> +out_free:
> +	kfree(thread_group_array);
> +	return ret;
>  }
> 
>  /*
> @@ -805,24 +827,39 @@ static int get_cpu_thread_group_start(int cpu, struct thread_groups *tg)
>  	return -1;
>  }
> 
> -static int init_cpu_l1_cache_map(int cpu)
> +static int init_cpu_cache_map(int cpu, unsigned int cache_property)
> 
>  {
>  	struct device_node *dn = of_get_cpu_node(cpu, NULL);
> -	struct thread_groups tg = {.property = 0,
> -				   .nr_groups = 0,
> -				   .threads_per_group = 0};
> +	struct thread_groups *tg = NULL;
>  	int first_thread = cpu_first_thread_sibling(cpu);
>  	int i, cpu_group_start = -1, err = 0;
> +	cpumask_var_t *mask;
> +	struct thread_groups_list *cpu_tgl = &tgl[cpu];

NIT: same as 1st comment.

> 
>  	if (!dn)
>  		return -ENODATA;
> 
> -	err = parse_thread_groups(dn, &tg, THREAD_GROUP_SHARE_L1);
> -	if (err)
> -		goto out;
> +	if (!(cache_property == THREAD_GROUP_SHARE_L1))
> +		return -EINVAL;
> 
> -	cpu_group_start = get_cpu_thread_group_start(cpu, &tg);
> +	if (!cpu_tgl->nr_properties) {
> +		err = parse_thread_groups(dn, cpu_tgl);
> +		if (err)
> +			goto out;
> +	}
> +
> +	for (i = 0; i < cpu_tgl->nr_properties; i++) {
> +		if (cpu_tgl->property_tgs[i].property == cache_property) {
> +			tg = &cpu_tgl->property_tgs[i];
> +			break;
> +		}
> +	}
> +
> +	if (!tg)
> +		return -EINVAL;
> +
> +	cpu_group_start = get_cpu_thread_group_start(cpu, tg);

This whole hunk should be moved to a new function and called before
init_cpu_cache_map. It will simplify the logic to great extent.

> 
>  	if (unlikely(cpu_group_start == -1)) {
>  		WARN_ON_ONCE(1);
> @@ -830,11 +867,12 @@ static int init_cpu_l1_cache_map(int cpu)
>  		goto out;
>  	}
> 
> -	zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu),
> -				GFP_KERNEL, cpu_to_node(cpu));
> +	mask = &per_cpu(cpu_l1_cache_map, cpu);
> +
> +	zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
> 

This hunk (and the next hunk) should be moved to next patch.

>  	for (i = first_thread; i < first_thread + threads_per_core; i++) {
> -		int i_group_start = get_cpu_thread_group_start(i, &tg);
> +		int i_group_start = get_cpu_thread_group_start(i, tg);
> 
>  		if (unlikely(i_group_start == -1)) {
>  			WARN_ON_ONCE(1);
> @@ -843,7 +881,7 @@ static int init_cpu_l1_cache_map(int cpu)
>  		}
> 
>  		if (i_group_start == cpu_group_start)
> -			cpumask_set_cpu(i, per_cpu(cpu_l1_cache_map, cpu));
> +			cpumask_set_cpu(i, *mask);
>  	}
> 
>  out:
> @@ -924,7 +962,7 @@ static int init_big_cores(void)
>  	int cpu;
> 
>  	for_each_possible_cpu(cpu) {
> -		int err = init_cpu_l1_cache_map(cpu);
> +		int err = init_cpu_cache_map(cpu, THREAD_GROUP_SHARE_L1);
> 
>  		if (err)
>  			return err;
> -- 
> 1.9.4
> 

-- 
Thanks and Regards
Srikar Dronamraju