[PATCH v5 4/6] powerpc/pseries: Consolidate different NUMA distance update code paths

David Gibson david at gibson.dropbear.id.au
Tue Jul 27 15:59:12 AEST 2021


On Tue, Jul 27, 2021 at 09:02:33AM +0530, Aneesh Kumar K.V wrote:
> David Gibson <david at gibson.dropbear.id.au> writes:
> 
> > On Thu, Jul 22, 2021 at 12:37:46PM +0530, Aneesh Kumar K.V wrote:
> >> David Gibson <david at gibson.dropbear.id.au> writes:
> >> 
> >> > On Mon, Jun 28, 2021 at 08:41:15PM +0530, Aneesh Kumar K.V wrote:
> 
> ....
> 
> > 
> >> >
> >> >> +		nid = of_read_number(&aa.arrays[index], 1);
> >> >> +
> >> >> +		if (nid == 0xffff || nid >= nr_node_ids)
> >> >> +			nid = default_nid;
> >> >> +		if (nid > 0 && affinity_form == FORM1_AFFINITY) {
> >> >> +			int i;
> >> >> +			const __be32 *associativity;
> >> >> +
> >> >> +			index = lmb->aa_index * aa.array_sz;
> >> >> +			associativity = &aa.arrays[index];
> >> >> +			/*
> >> >> +			 * lookup array associativity entries have different format
> >> >> +			 * There is no length of the array as the first element.
> >> >
> >> > The difference it very small, and this is not a hot path.  Couldn't
> >> > you reduce a chunk of code by prepending aa.array_sz, then re-using
> >> > __initialize_form1_numa_distance.  Or even making
> >> > __initialize_form1_numa_distance() take the length as a parameter.
> >> 
> >> The changes are small but confusing w.r.t how we look at the
> >> associativity-lookup-arrays. The way we interpret associativity array
> >> and associativity lookup array using primary_domain_index is different.
> >> Hence the '-1' in the node lookup here.
> >
> > They're really not, though.  It's exactly the same interpretation of
> > the associativity array itself - it's just that one of them has the
> > array prepended with a (redundant) length.  So you can make
> > __initialize_form1_numa_distance() work on the "bare" associativity
> > array, with a given length.  Here you call it with aa.array_sz as the
> > length, and in the other place you call it with prop[0] as the length.
> >
> >> 
> >> 	index = lmb->aa_index * aa.array_sz + primary_domain_index - 1;
> >> 	nid = of_read_number(&aa.arrays[index], 1);
> >> 
> >> 
> >> >
> >> >> +			 */
> >> >> +			for (i = 0; i < max_associativity_domain_index; i++) {
> >> >> +				const __be32 *entry;
> >> >> +
> >> >> +				entry = &associativity[be32_to_cpu(distance_ref_points[i]) - 1];
> >> >
> >> > Does anywhere verify that distance_ref_points[i] <= aa.array_size for
> >> > every i?
> >> 
> >> We do check for 
> >> 
> >> 	if (primary_domain_index <= aa.array_sz &&
> >
> > Right, but that doesn't check the other distance_ref_points entries.
> > Not that there's any reason to have extra entries with Form2, but we
> > still don't want stray array accesses.
> 
> This is how the change looks. I am not convinced this makes it simpler.

It's not, but that's because the lookup_array_assoc flag is not needed...

> I will add that as the last patch and we can drop that if we find that
> not helpful? 
> 
> modified   arch/powerpc/mm/numa.c
> @@ -171,20 +171,31 @@ static void unmap_cpu_from_node(unsigned long cpu)
>  }
>  #endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */
>  
> -/*
> - * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
> - * info is found.
> - */
> -static int associativity_to_nid(const __be32 *associativity)
> +static int __associativity_to_nid(const __be32 *associativity,
> +				  bool lookup_array_assoc,
> +				  int max_array_index)
>  {
>  	int nid = NUMA_NO_NODE;
> +	int index;
>  
>  	if (!numa_enabled)
>  		goto out;
> +	/*
> +	 * ibm,associativity-lookup-array doesn't have element
> +	 * count at the start of the associativity. Hence
> +	 * decrement the primary_domain_index when used with
> +	 * lookup-array associativity.
> +	 */
> +	if (lookup_array_assoc)
> +		index = primary_domain_index - 1;
> +	else {
> +		index = primary_domain_index;
> +		max_array_index = of_read_number(associativity, 1);
> +	}
> +	if (index > max_array_index)
> +		goto out;

So, the associativity-array-with-length is exactly a length, followed
by an associativity-array-without-length.  What I was suggesting is
you make this function only take an
associativity-array-without-length, with the length passed separately.

Where you want to use it on an associativity-array-with-length, stored
in __be32 *awl, you just invoke it as:
	associativity_to_nid(awl + 1, of_read_number(awl, 1));

> -	if (of_read_number(associativity, 1) >= primary_domain_index)
> -		nid = of_read_number(&associativity[primary_domain_index], 1);
> -
> +	nid = of_read_number(&associativity[index], 1);
>  	/* POWER4 LPAR uses 0xffff as invalid node */
>  	if (nid == 0xffff || nid >= nr_node_ids)
>  		nid = NUMA_NO_NODE;
> @@ -192,6 +203,15 @@ static int associativity_to_nid(const __be32 *associativity)
>  	return nid;
>  }
>  
> +/*
> + * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
> + * info is found.
> + */
> +static inline int associativity_to_nid(const __be32 *associativity)
> +{
> +	return __associativity_to_nid(associativity, false, 0);
> +}
> +
>  static int __cpu_form2_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
>  {
>  	int dist;
> @@ -295,19 +315,38 @@ int of_node_to_nid(struct device_node *device)
>  }
>  EXPORT_SYMBOL(of_node_to_nid);
>  
> -static void __initialize_form1_numa_distance(const __be32 *associativity)
> +static void __initialize_form1_numa_distance(const __be32 *associativity,
> +					     bool lookup_array_assoc,
> +					     int max_array_index)
>  {
>  	int i, nid;
> +	int index_offset = 0;
>  
>  	if (affinity_form != FORM1_AFFINITY)
>  		return;
> +	/*
> +	 * ibm,associativity-lookup-array doesn't have element
> +	 * count at the start of the associativity. Hence
> +	 * decrement the distance_ref_points index when used with
> +	 * lookup-array associativity.
> +	 */
> +	if (lookup_array_assoc)
> +		index_offset = 1;
> +	else
> +		max_array_index = of_read_number(associativity, 1);
>  
> -	nid = associativity_to_nid(associativity);
> +	nid = __associativity_to_nid(associativity, lookup_array_assoc, max_array_index);
>  	if (nid != NUMA_NO_NODE) {
>  		for (i = 0; i < distance_ref_points_depth; i++) {
>  			const __be32 *entry;
> +			int index = be32_to_cpu(distance_ref_points[i]) - index_offset;
>  
> -			entry = &associativity[be32_to_cpu(distance_ref_points[i])];
> +			/*
> +			 * broken hierarchy, return with broken distance table
> +			 */
> +			if (index > max_array_index)
> +				return;
> +			entry = &associativity[index];
>  			distance_lookup_table[nid][i] = of_read_number(entry, 1);
>  		}
>  	}
> @@ -321,7 +360,7 @@ static void initialize_form1_numa_distance(struct device_node *node)
>  	if (!associativity)
>  		return;
>  
> -	__initialize_form1_numa_distance(associativity);
> +	__initialize_form1_numa_distance(associativity, false, 0);
>  }
>  
>  /*
> @@ -586,27 +625,14 @@ static int get_nid_and_numa_distance(struct drmem_lmb *lmb)
>  
>  	if (primary_domain_index <= aa.array_sz &&
>  	    !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) {
> -		index = lmb->aa_index * aa.array_sz + primary_domain_index - 1;
> -		nid = of_read_number(&aa.arrays[index], 1);
> +		const __be32 *associativity;
>  
> -		if (nid == 0xffff || nid >= nr_node_ids)
> -			nid = default_nid;
> +		index = lmb->aa_index * aa.array_sz;
> +		associativity = &aa.arrays[index];
> +		nid = __associativity_to_nid(associativity, true, aa.array_sz - 1);
>  		if (nid > 0 && affinity_form == FORM1_AFFINITY) {
> -			int i;
> -			const __be32 *associativity;
> -
> -			index = lmb->aa_index * aa.array_sz;
> -			associativity = &aa.arrays[index];
> -			/*
> -			 * lookup array associativity entries have different format
> -			 * There is no length of the array as the first element.
> -			 */
> -			for (i = 0; i < distance_ref_points_depth; i++) {
> -				const __be32 *entry;
> -
> -				entry = &associativity[be32_to_cpu(distance_ref_points[i]) - 1];
> -				distance_lookup_table[nid][i] = of_read_number(entry, 1);
> -			}
> +			__initialize_form1_numa_distance(associativity,
> +							 true, aa.array_sz - 1);
>  		}
>  	}
>  	return nid;
> @@ -632,9 +658,11 @@ int of_drconf_to_nid_single(struct drmem_lmb *lmb)
>  
>  	if (primary_domain_index <= aa.array_sz &&
>  	    !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) {
> -		index = lmb->aa_index * aa.array_sz + primary_domain_index - 1;
> -		nid = of_read_number(&aa.arrays[index], 1);
> +		const __be32 *associativity;
>  
> +		index = lmb->aa_index * aa.array_sz;
> +		associativity = &aa.arrays[index];
> +		nid = __associativity_to_nid(associativity, true, aa.array_sz - 1);
>  		if (nid == 0xffff || nid >= nr_node_ids)
>  			nid = default_nid;
>  	}
> @@ -939,7 +967,7 @@ static int __init parse_numa_properties(void)
>  
>  		if (__vphn_get_associativity(i, vphn_assoc) == 0) {
>  			nid = associativity_to_nid(vphn_assoc);
> -			__initialize_form1_numa_distance(vphn_assoc);
> +			__initialize_form1_numa_distance(vphn_assoc, false, 0);
>  		} else {
>  
>  			/*
> @@ -953,7 +981,7 @@ static int __init parse_numa_properties(void)
>  			associativity = of_get_associativity(cpu);
>  			if (associativity) {
>  				nid = associativity_to_nid(associativity);
> -				__initialize_form1_numa_distance(associativity);
> +				__initialize_form1_numa_distance(associativity, false, 0);
>  			}
>  			of_node_put(cpu);
>  		}
> @@ -993,7 +1021,7 @@ static int __init parse_numa_properties(void)
>  		associativity = of_get_associativity(memory);
>  		if (associativity) {
>  			nid = associativity_to_nid(associativity);
> -			__initialize_form1_numa_distance(associativity);
> +			__initialize_form1_numa_distance(associativity, false, 0);
>  		} else
>  			nid = default_nid;
>  
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <http://lists.ozlabs.org/pipermail/linuxppc-dev/attachments/20210727/2dbee44b/attachment.sig>


More information about the Linuxppc-dev mailing list