[PATCH v6 3/6] powerpc/pseries: Consolidate different NUMA distance update code paths

David Gibson david at gibson.dropbear.id.au
Fri Aug 6 16:37:47 AEST 2021


On Tue, Jul 27, 2021 at 03:33:08PM +0530, Aneesh Kumar K.V wrote:
> The associativity details of the newly added resourced are collected from
> the hypervisor via "ibm,configure-connector" rtas call. Update the numa
> distance details of the newly added numa node after the above call.
> 
> Instead of updating NUMA distance every time we lookup a node id
> from the associativity property, add helpers that can be used
> during boot which does this only once. Also remove the distance
> update from node id lookup helpers.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar at linux.ibm.com>
> ---
>  arch/powerpc/include/asm/topology.h           |   2 +
>  arch/powerpc/mm/numa.c                        | 178 +++++++++++++-----
>  arch/powerpc/platforms/pseries/hotplug-cpu.c  |   2 +
>  .../platforms/pseries/hotplug-memory.c        |   2 +
>  4 files changed, 138 insertions(+), 46 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
> index e4db64c0e184..a6425a70c37b 100644
> --- a/arch/powerpc/include/asm/topology.h
> +++ b/arch/powerpc/include/asm/topology.h
> @@ -64,6 +64,7 @@ static inline int early_cpu_to_node(int cpu)
>  }
>  
>  int of_drconf_to_nid_single(struct drmem_lmb *lmb);
> +void update_numa_distance(struct device_node *node);
>  
>  #else
>  
> @@ -93,6 +94,7 @@ static inline int of_drconf_to_nid_single(struct drmem_lmb *lmb)
>  	return first_online_node;
>  }
>  
> +static inline void update_numa_distance(struct device_node *node) {}
>  #endif /* CONFIG_NUMA */
>  
>  #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> index 368719b14dcc..c695faf67d68 100644
> --- a/arch/powerpc/mm/numa.c
> +++ b/arch/powerpc/mm/numa.c
> @@ -208,22 +208,6 @@ int __node_distance(int a, int b)
>  }
>  EXPORT_SYMBOL(__node_distance);
>  
> -static void initialize_distance_lookup_table(int nid,
> -		const __be32 *associativity)
> -{
> -	int i;
> -
> -	if (affinity_form != FORM1_AFFINITY)
> -		return;
> -
> -	for (i = 0; i < distance_ref_points_depth; i++) {
> -		const __be32 *entry;
> -
> -		entry = &associativity[be32_to_cpu(distance_ref_points[i]) - 1];
> -		distance_lookup_table[nid][i] = of_read_number(entry, 1);
> -	}
> -}
> -
>  /*
>   * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
>   * info is found.
> @@ -241,15 +225,6 @@ static int associativity_to_nid(const __be32 *associativity)
>  	/* POWER4 LPAR uses 0xffff as invalid node */
>  	if (nid == 0xffff || nid >= nr_node_ids)
>  		nid = NUMA_NO_NODE;
> -
> -	if (nid > 0 &&
> -		of_read_number(associativity, 1) >= distance_ref_points_depth) {
> -		/*
> -		 * Skip the length field and send start of associativity array
> -		 */
> -		initialize_distance_lookup_table(nid, associativity + 1);
> -	}
> -
>  out:
>  	return nid;
>  }
> @@ -287,6 +262,48 @@ int of_node_to_nid(struct device_node *device)
>  }
>  EXPORT_SYMBOL(of_node_to_nid);
>  
> +static void __initialize_form1_numa_distance(const __be32 *associativity)
> +{
> +	int i, nid;
> +
> +	if (affinity_form != FORM1_AFFINITY)
> +		return;
> +
> +	nid = associativity_to_nid(associativity);
> +	if (nid != NUMA_NO_NODE) {
> +		for (i = 0; i < distance_ref_points_depth; i++) {
> +			const __be32 *entry;
> +
> +			entry = &associativity[be32_to_cpu(distance_ref_points[i])];
> +			distance_lookup_table[nid][i] = of_read_number(entry, 1);

So, in the conversion from the old initialize_distance_lookup_table()
you change this from accepting a bare associativity list, to requiring
the length on the front.  [*]

> +		}
> +	}
> +}
> +
> +static void initialize_form1_numa_distance(struct device_node *node)
> +{
> +	const __be32 *associativity;
> +
> +	associativity = of_get_associativity(node);
> +	if (!associativity)
> +		return;
> +
> +	__initialize_form1_numa_distance(associativity);
> +}
> +
> +/*
> + * Used to update distance information w.r.t newly added node.
> + */
> +void update_numa_distance(struct device_node *node)
> +{
> +	if (affinity_form == FORM0_AFFINITY)
> +		return;
> +	else if (affinity_form == FORM1_AFFINITY) {
> +		initialize_form1_numa_distance(node);
> +		return;
> +	}
> +}
> +
>  static int __init find_primary_domain_index(void)
>  {
>  	int index;
> @@ -433,6 +450,48 @@ static int of_get_assoc_arrays(struct assoc_arrays *aa)
>  	return 0;
>  }
>  
> +static int get_nid_and_numa_distance(struct drmem_lmb *lmb)
> +{
> +	struct assoc_arrays aa = { .arrays = NULL };
> +	int default_nid = NUMA_NO_NODE;

You never change default_nid, so it seems like it would be clearer to
get rid of this and just use NUMA_NO_NODE inline everywhere below.

> +	int nid = default_nid;
> +	int rc, index;
> +
> +	if ((primary_domain_index < 0) || !numa_enabled)
> +		return default_nid;
> +
> +	rc = of_get_assoc_arrays(&aa);
> +	if (rc)
> +		return default_nid;
> +
> +	if (primary_domain_index <= aa.array_sz &&
> +	    !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) {
> +		index = lmb->aa_index * aa.array_sz + primary_domain_index - 1;
> +		nid = of_read_number(&aa.arrays[index], 1);
> +
> +		if (nid == 0xffff || nid >= nr_node_ids)
> +			nid = default_nid;
> +		if (nid > 0 && affinity_form == FORM1_AFFINITY) {

Is the nid > 0 test assuming something about what are and aren't valid
nids?  Should it be nid != NUMA_NO_NODE instead?

> +			int i;
> +			const __be32 *associativity;
> +
> +			index = lmb->aa_index * aa.array_sz;
> +			associativity = &aa.arrays[index];

Again, you can reuse form1_numa_distance() here if it weren't for the
fact that you broke it at [*] above to no longer accept bare
associativity lists.

> +			/*
> +			 * lookup array associativity entries have different format
> +			 * There is no length of the array as the first element.
> +			 */
> +			for (i = 0; i < distance_ref_points_depth; i++) {
> +				const __be32 *entry;
> +
> +				entry = &associativity[be32_to_cpu(distance_ref_points[i]) - 1];
> +				distance_lookup_table[nid][i] = of_read_number(entry, 1);
> +			}
> +		}
> +	}
> +	return nid;
> +}
> +
>  /*
>   * This is like of_node_to_nid_single() for memory represented in the
>   * ibm,dynamic-reconfiguration-memory node.
> @@ -458,21 +517,14 @@ int of_drconf_to_nid_single(struct drmem_lmb *lmb)
>  
>  		if (nid == 0xffff || nid >= nr_node_ids)
>  			nid = default_nid;
> -
> -		if (nid > 0) {
> -			index = lmb->aa_index * aa.array_sz;
> -			initialize_distance_lookup_table(nid,
> -							&aa.arrays[index]);
> -		}
>  	}
> -
>  	return nid;
>  }
>  
>  #ifdef CONFIG_PPC_SPLPAR
> -static int vphn_get_nid(long lcpu)
> +
> +static int __vphn_get_associativity(long lcpu, __be32 *associativity)
>  {
> -	__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
>  	long rc, hwid;
>  
>  	/*
> @@ -492,12 +544,30 @@ static int vphn_get_nid(long lcpu)
>  
>  		rc = hcall_vphn(hwid, VPHN_FLAG_VCPU, associativity);
>  		if (rc == H_SUCCESS)
> -			return associativity_to_nid(associativity);
> +			return 0;
>  	}
>  
> +	return -1;
> +}
> +
> +static int vphn_get_nid(long lcpu)
> +{
> +	__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
> +
> +
> +	if (!__vphn_get_associativity(lcpu, associativity))
> +		return associativity_to_nid(associativity);
> +
>  	return NUMA_NO_NODE;
> +
>  }
>  #else
> +
> +static int __vphn_get_associativity(long lcpu, __be32 *associativity)
> +{
> +	return -1;
> +}
> +
>  static int vphn_get_nid(long unused)
>  {
>  	return NUMA_NO_NODE;
> @@ -692,7 +762,7 @@ static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
>  			size = read_n_cells(n_mem_size_cells, usm);
>  		}
>  
> -		nid = of_drconf_to_nid_single(lmb);
> +		nid = get_nid_and_numa_distance(lmb);
>  		fake_numa_create_new_node(((base + size) >> PAGE_SHIFT),
>  					  &nid);
>  		node_set_online(nid);
> @@ -709,6 +779,7 @@ static int __init parse_numa_properties(void)
>  	struct device_node *memory;
>  	int default_nid = 0;
>  	unsigned long i;
> +	const __be32 *associativity;
>  
>  	if (numa_enabled == 0) {
>  		printk(KERN_WARNING "NUMA disabled by user\n");
> @@ -734,18 +805,30 @@ static int __init parse_numa_properties(void)
>  	 * each node to be onlined must have NODE_DATA etc backing it.
>  	 */
>  	for_each_present_cpu(i) {
> +		__be32 vphn_assoc[VPHN_ASSOC_BUFSIZE];
>  		struct device_node *cpu;
> -		int nid = vphn_get_nid(i);
> +		int nid = NUMA_NO_NODE;
>  
> -		/*
> -		 * Don't fall back to default_nid yet -- we will plug
> -		 * cpus into nodes once the memory scan has discovered
> -		 * the topology.
> -		 */
> -		if (nid == NUMA_NO_NODE) {
> +		memset(vphn_assoc, 0, VPHN_ASSOC_BUFSIZE * sizeof(__be32));
> +
> +		if (__vphn_get_associativity(i, vphn_assoc) == 0) {
> +			nid = associativity_to_nid(vphn_assoc);
> +			__initialize_form1_numa_distance(vphn_assoc);
> +		} else {
> +
> +			/*
> +			 * Don't fall back to default_nid yet -- we will plug
> +			 * cpus into nodes once the memory scan has discovered
> +			 * the topology.
> +			 */
>  			cpu = of_get_cpu_node(i, NULL);
>  			BUG_ON(!cpu);
> -			nid = of_node_to_nid_single(cpu);
> +
> +			associativity = of_get_associativity(cpu);
> +			if (associativity) {
> +				nid = associativity_to_nid(associativity);
> +				__initialize_form1_numa_distance(associativity);
> +			}
>  			of_node_put(cpu);
>  		}
>  
> @@ -781,8 +864,11 @@ static int __init parse_numa_properties(void)
>  		 * have associativity properties.  If none, then
>  		 * everything goes to default_nid.
>  		 */
> -		nid = of_node_to_nid_single(memory);
> -		if (nid < 0)
> +		associativity = of_get_associativity(memory);
> +		if (associativity) {
> +			nid = associativity_to_nid(associativity);
> +			__initialize_form1_numa_distance(associativity);
> +		} else
>  			nid = default_nid;
>  
>  		fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
> diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> index 7e970f81d8ff..778b6ab35f0d 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> @@ -498,6 +498,8 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
>  		return saved_rc;
>  	}
>  
> +	update_numa_distance(dn);
> +
>  	rc = dlpar_online_cpu(dn);
>  	if (rc) {
>  		saved_rc = rc;
> diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
> index 377d852f5a9a..ee1d81d7e54a 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
> @@ -180,6 +180,8 @@ static int update_lmb_associativity_index(struct drmem_lmb *lmb)
>  		return -ENODEV;
>  	}
>  
> +	update_numa_distance(lmb_node);
> +
>  	dr_node = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
>  	if (!dr_node) {
>  		dlpar_free_cc_nodes(lmb_node);

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <http://lists.ozlabs.org/pipermail/linuxppc-dev/attachments/20210806/6b015458/attachment.sig>


More information about the Linuxppc-dev mailing list