[PATCH v7 3/6] powerpc/pseries: Consolidate different NUMA distance update code paths

David Gibson david at gibson.dropbear.id.au
Tue Aug 10 12:40:39 AEST 2021


On Mon, Aug 09, 2021 at 10:54:31AM +0530, Aneesh Kumar K.V wrote:
> The associativity details of the newly added resourced are collected from
> the hypervisor via "ibm,configure-connector" rtas call. Update the numa
> distance details of the newly added numa node after the above call.
> 
> Instead of updating NUMA distance every time we lookup a node id
> from the associativity property, add helpers that can be used
> during boot which does this only once. Also remove the distance
> update from node id lookup helpers.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar at linux.ibm.com>

It would make review easier if you'd folded the fixes/cleanups from
6/6 in here, rather than having them separate.

> ---
>  arch/powerpc/include/asm/topology.h           |   2 +
>  arch/powerpc/mm/numa.c                        | 178 +++++++++++++-----
>  arch/powerpc/platforms/pseries/hotplug-cpu.c  |   2 +
>  .../platforms/pseries/hotplug-memory.c        |   2 +
>  4 files changed, 138 insertions(+), 46 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
> index e4db64c0e184..a6425a70c37b 100644
> --- a/arch/powerpc/include/asm/topology.h
> +++ b/arch/powerpc/include/asm/topology.h
> @@ -64,6 +64,7 @@ static inline int early_cpu_to_node(int cpu)
>  }
>  
>  int of_drconf_to_nid_single(struct drmem_lmb *lmb);
> +void update_numa_distance(struct device_node *node);
>  
>  #else
>  
> @@ -93,6 +94,7 @@ static inline int of_drconf_to_nid_single(struct drmem_lmb *lmb)
>  	return first_online_node;
>  }
>  
> +static inline void update_numa_distance(struct device_node *node) {}
>  #endif /* CONFIG_NUMA */
>  
>  #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> index 368719b14dcc..c695faf67d68 100644
> --- a/arch/powerpc/mm/numa.c
> +++ b/arch/powerpc/mm/numa.c
> @@ -208,22 +208,6 @@ int __node_distance(int a, int b)
>  }
>  EXPORT_SYMBOL(__node_distance);
>  
> -static void initialize_distance_lookup_table(int nid,
> -		const __be32 *associativity)
> -{
> -	int i;
> -
> -	if (affinity_form != FORM1_AFFINITY)
> -		return;
> -
> -	for (i = 0; i < distance_ref_points_depth; i++) {
> -		const __be32 *entry;
> -
> -		entry = &associativity[be32_to_cpu(distance_ref_points[i]) - 1];
> -		distance_lookup_table[nid][i] = of_read_number(entry, 1);
> -	}
> -}
> -
>  /*
>   * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
>   * info is found.
> @@ -241,15 +225,6 @@ static int associativity_to_nid(const __be32 *associativity)
>  	/* POWER4 LPAR uses 0xffff as invalid node */
>  	if (nid == 0xffff || nid >= nr_node_ids)
>  		nid = NUMA_NO_NODE;
> -
> -	if (nid > 0 &&
> -		of_read_number(associativity, 1) >= distance_ref_points_depth) {
> -		/*
> -		 * Skip the length field and send start of associativity array

So.. this highlights a point.  In your comments you're treating
"associativity array" as meaning the value including the length plus
the assoc IDs, but this comment you've removed is treating it as just
the assoc IDs.  Personally I think things become clearer to talk about
if you treat the canonical form of "associativity array" as just the
assoc IDs (with implicit length), then treat the ibm,associativity
properties as containing (length + associativity array).

> -		 */
> -		initialize_distance_lookup_table(nid, associativity + 1);
> -	}
> -
>  out:
>  	return nid;
>  }
> @@ -287,6 +262,48 @@ int of_node_to_nid(struct device_node *device)
>  }
>  EXPORT_SYMBOL(of_node_to_nid);
>  
> +static void __initialize_form1_numa_distance(const __be32 *associativity)
> +{
> +	int i, nid;
> +
> +	if (affinity_form != FORM1_AFFINITY)
> +		return;
> +
> +	nid = associativity_to_nid(associativity);
> +	if (nid != NUMA_NO_NODE) {
> +		for (i = 0; i < distance_ref_points_depth; i++) {
> +			const __be32 *entry;
> +
> +			entry = &associativity[be32_to_cpu(distance_ref_points[i])];

So, here you've now removed all checks that distance_ref_points[i]
doesn't overflow the length of associativity.  It comes back in 6/6,
but it's still not ideal.

> +			distance_lookup_table[nid][i] = of_read_number(entry, 1);
> +		}
> +	}
> +}
> +
> +static void initialize_form1_numa_distance(struct device_node *node)
> +{
> +	const __be32 *associativity;
> +
> +	associativity = of_get_associativity(node);
> +	if (!associativity)
> +		return;
> +
> +	__initialize_form1_numa_distance(associativity);
> +}
> +
> +/*
> + * Used to update distance information w.r.t newly added node.
> + */
> +void update_numa_distance(struct device_node *node)
> +{
> +	if (affinity_form == FORM0_AFFINITY)
> +		return;
> +	else if (affinity_form == FORM1_AFFINITY) {
> +		initialize_form1_numa_distance(node);
> +		return;
> +	}
> +}
> +
>  static int __init find_primary_domain_index(void)
>  {
>  	int index;
> @@ -433,6 +450,48 @@ static int of_get_assoc_arrays(struct assoc_arrays *aa)
>  	return 0;
>  }
>  
> +static int get_nid_and_numa_distance(struct drmem_lmb *lmb)
> +{
> +	struct assoc_arrays aa = { .arrays = NULL };
> +	int default_nid = NUMA_NO_NODE;

There's still no point I can see to this intermediate variable.

> +	int nid = default_nid;
> +	int rc, index;
> +
> +	if ((primary_domain_index < 0) || !numa_enabled)
> +		return default_nid;
> +
> +	rc = of_get_assoc_arrays(&aa);
> +	if (rc)
> +		return default_nid;
> +
> +	if (primary_domain_index <= aa.array_sz &&
> +	    !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) {
> +		index = lmb->aa_index * aa.array_sz + primary_domain_index - 1;
> +		nid = of_read_number(&aa.arrays[index], 1);
> +
> +		if (nid == 0xffff || nid >= nr_node_ids)
> +			nid = default_nid;
> +		if (nid > 0 && affinity_form == FORM1_AFFINITY) {
> +			int i;
> +			const __be32 *associativity;
> +
> +			index = lmb->aa_index * aa.array_sz;
> +			associativity = &aa.arrays[index];
> +			/*
> +			 * lookup array associativity entries have different format
> +			 * There is no length of the array as the first element.
> +			 */
> +			for (i = 0; i < distance_ref_points_depth; i++) {
> +				const __be32 *entry;
> +
> +				entry = &associativity[be32_to_cpu(distance_ref_points[i]) - 1];
> +				distance_lookup_table[nid][i] = of_read_number(entry, 1);
> +			}
> +		}
> +	}
> +	return nid;
> +}
> +
>  /*
>   * This is like of_node_to_nid_single() for memory represented in the
>   * ibm,dynamic-reconfiguration-memory node.
> @@ -458,21 +517,14 @@ int of_drconf_to_nid_single(struct drmem_lmb *lmb)
>  
>  		if (nid == 0xffff || nid >= nr_node_ids)
>  			nid = default_nid;
> -
> -		if (nid > 0) {
> -			index = lmb->aa_index * aa.array_sz;
> -			initialize_distance_lookup_table(nid,
> -							&aa.arrays[index]);
> -		}
>  	}
> -
>  	return nid;
>  }
>  
>  #ifdef CONFIG_PPC_SPLPAR
> -static int vphn_get_nid(long lcpu)
> +
> +static int __vphn_get_associativity(long lcpu, __be32 *associativity)
>  {
> -	__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
>  	long rc, hwid;
>  
>  	/*
> @@ -492,12 +544,30 @@ static int vphn_get_nid(long lcpu)
>  
>  		rc = hcall_vphn(hwid, VPHN_FLAG_VCPU, associativity);
>  		if (rc == H_SUCCESS)
> -			return associativity_to_nid(associativity);
> +			return 0;
>  	}
>  
> +	return -1;
> +}
> +
> +static int vphn_get_nid(long lcpu)
> +{
> +	__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
> +
> +
> +	if (!__vphn_get_associativity(lcpu, associativity))
> +		return associativity_to_nid(associativity);
> +
>  	return NUMA_NO_NODE;
> +
>  }
>  #else
> +
> +static int __vphn_get_associativity(long lcpu, __be32 *associativity)
> +{
> +	return -1;
> +}
> +
>  static int vphn_get_nid(long unused)
>  {
>  	return NUMA_NO_NODE;
> @@ -692,7 +762,7 @@ static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
>  			size = read_n_cells(n_mem_size_cells, usm);
>  		}
>  
> -		nid = of_drconf_to_nid_single(lmb);
> +		nid = get_nid_and_numa_distance(lmb);
>  		fake_numa_create_new_node(((base + size) >> PAGE_SHIFT),
>  					  &nid);
>  		node_set_online(nid);
> @@ -709,6 +779,7 @@ static int __init parse_numa_properties(void)
>  	struct device_node *memory;
>  	int default_nid = 0;
>  	unsigned long i;
> +	const __be32 *associativity;
>  
>  	if (numa_enabled == 0) {
>  		printk(KERN_WARNING "NUMA disabled by user\n");
> @@ -734,18 +805,30 @@ static int __init parse_numa_properties(void)
>  	 * each node to be onlined must have NODE_DATA etc backing it.
>  	 */
>  	for_each_present_cpu(i) {
> +		__be32 vphn_assoc[VPHN_ASSOC_BUFSIZE];
>  		struct device_node *cpu;
> -		int nid = vphn_get_nid(i);
> +		int nid = NUMA_NO_NODE;
>  
> -		/*
> -		 * Don't fall back to default_nid yet -- we will plug
> -		 * cpus into nodes once the memory scan has discovered
> -		 * the topology.
> -		 */
> -		if (nid == NUMA_NO_NODE) {
> +		memset(vphn_assoc, 0, VPHN_ASSOC_BUFSIZE * sizeof(__be32));
> +
> +		if (__vphn_get_associativity(i, vphn_assoc) == 0) {
> +			nid = associativity_to_nid(vphn_assoc);
> +			__initialize_form1_numa_distance(vphn_assoc);
> +		} else {
> +
> +			/*
> +			 * Don't fall back to default_nid yet -- we will plug
> +			 * cpus into nodes once the memory scan has discovered
> +			 * the topology.
> +			 */
>  			cpu = of_get_cpu_node(i, NULL);
>  			BUG_ON(!cpu);
> -			nid = of_node_to_nid_single(cpu);
> +
> +			associativity = of_get_associativity(cpu);
> +			if (associativity) {
> +				nid = associativity_to_nid(associativity);
> +				__initialize_form1_numa_distance(associativity);
> +			}
>  			of_node_put(cpu);
>  		}
>  
> @@ -781,8 +864,11 @@ static int __init parse_numa_properties(void)
>  		 * have associativity properties.  If none, then
>  		 * everything goes to default_nid.
>  		 */
> -		nid = of_node_to_nid_single(memory);
> -		if (nid < 0)
> +		associativity = of_get_associativity(memory);
> +		if (associativity) {
> +			nid = associativity_to_nid(associativity);
> +			__initialize_form1_numa_distance(associativity);
> +		} else
>  			nid = default_nid;
>  
>  		fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
> diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> index 7e970f81d8ff..778b6ab35f0d 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> @@ -498,6 +498,8 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
>  		return saved_rc;
>  	}
>  
> +	update_numa_distance(dn);
> +
>  	rc = dlpar_online_cpu(dn);
>  	if (rc) {
>  		saved_rc = rc;
> diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
> index 377d852f5a9a..ee1d81d7e54a 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
> @@ -180,6 +180,8 @@ static int update_lmb_associativity_index(struct drmem_lmb *lmb)
>  		return -ENODEV;
>  	}
>  
> +	update_numa_distance(lmb_node);
> +
>  	dr_node = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
>  	if (!dr_node) {
>  		dlpar_free_cc_nodes(lmb_node);

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <http://lists.ozlabs.org/pipermail/linuxppc-dev/attachments/20210810/7ca5aac3/attachment.sig>


More information about the Linuxppc-dev mailing list