[PATCH v5 4/6] powerpc/pseries: Consolidate different NUMA distance update code paths
David Gibson
david at gibson.dropbear.id.au
Thu Jul 22 11:40:33 AEST 2021
On Mon, Jun 28, 2021 at 08:41:15PM +0530, Aneesh Kumar K.V wrote:
> The associativity details of the newly added resourced are collected from
> the hypervisor via "ibm,configure-connector" rtas call. Update the numa
> distance details of the newly added numa node after the above call.
>
> Instead of updating NUMA distance every time we lookup a node id
> from the associativity property, add helpers that can be used
> during boot which does this only once. Also remove the distance
> update from node id lookup helpers.
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar at linux.ibm.com>
> ---
> arch/powerpc/mm/numa.c | 173 +++++++++++++-----
> arch/powerpc/platforms/pseries/hotplug-cpu.c | 2 +
> .../platforms/pseries/hotplug-memory.c | 2 +
> arch/powerpc/platforms/pseries/pseries.h | 1 +
> 4 files changed, 132 insertions(+), 46 deletions(-)
>
> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> index 0ec16999beef..7b142f79d600 100644
> --- a/arch/powerpc/mm/numa.c
> +++ b/arch/powerpc/mm/numa.c
> @@ -208,22 +208,6 @@ int __node_distance(int a, int b)
> }
> EXPORT_SYMBOL(__node_distance);
>
> -static void initialize_distance_lookup_table(int nid,
> - const __be32 *associativity)
> -{
> - int i;
> -
> - if (affinity_form != FORM1_AFFINITY)
> - return;
> -
> - for (i = 0; i < max_associativity_domain_index; i++) {
> - const __be32 *entry;
> -
> - entry = &associativity[be32_to_cpu(distance_ref_points[i]) - 1];
> - distance_lookup_table[nid][i] = of_read_number(entry, 1);
> - }
> -}
> -
> /*
> * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
> * info is found.
> @@ -241,15 +225,6 @@ static int associativity_to_nid(const __be32 *associativity)
> /* POWER4 LPAR uses 0xffff as invalid node */
> if (nid == 0xffff || nid >= nr_node_ids)
> nid = NUMA_NO_NODE;
> -
> - if (nid > 0 &&
> - of_read_number(associativity, 1) >= max_associativity_domain_index) {
> - /*
> - * Skip the length field and send start of associativity array
> - */
> - initialize_distance_lookup_table(nid, associativity + 1);
> - }
> -
> out:
> return nid;
> }
> @@ -287,6 +262,49 @@ int of_node_to_nid(struct device_node *device)
> }
> EXPORT_SYMBOL(of_node_to_nid);
>
> +static void __initialize_form1_numa_distance(const __be32 *associativity)
> +{
> + int i, nid;
> +
> + if (affinity_form != FORM1_AFFINITY)
Since this shouldn't be called on a !form1 system, this could be a WARN_ON().
> + return;
> +
> + if (of_read_number(associativity, 1) >= primary_domain_index) {
> + nid = of_read_number(&associativity[primary_domain_index], 1);
This computes the nid from the assoc array independently of
associativity_to_nid, which doesn't seem like a good idea. Wouldn't
it be better to call assocaitivity_to_nid(), then make the next bit
conditional on nid !== NUMA_NO_NODE?
> +
> + for (i = 0; i < max_associativity_domain_index; i++) {
> + const __be32 *entry;
> +
> + entry = &associativity[be32_to_cpu(distance_ref_points[i])];
> + distance_lookup_table[nid][i] = of_read_number(entry, 1);
> + }
> + }
> +}
> +
> +static void initialize_form1_numa_distance(struct device_node *node)
> +{
> + const __be32 *associativity;
> +
> + associativity = of_get_associativity(node);
> + if (!associativity)
> + return;
> +
> + __initialize_form1_numa_distance(associativity);
> +}
> +
> +/*
> + * Used to update distance information w.r.t newly added node.
> + */
> +void update_numa_distance(struct device_node *node)
> +{
> + if (affinity_form == FORM0_AFFINITY)
> + return;
> + else if (affinity_form == FORM1_AFFINITY) {
> + initialize_form1_numa_distance(node);
> + return;
> + }
> +}
> +
> static int __init find_primary_domain_index(void)
> {
> int index;
> @@ -433,6 +451,48 @@ static int of_get_assoc_arrays(struct assoc_arrays *aa)
> return 0;
> }
>
> +static int get_nid_and_numa_distance(struct drmem_lmb *lmb)
> +{
> + struct assoc_arrays aa = { .arrays = NULL };
> + int default_nid = NUMA_NO_NODE;
> + int nid = default_nid;
> + int rc, index;
> +
> + if ((primary_domain_index < 0) || !numa_enabled)
Under what circumstances could you get primary_domain_index < 0?
> + return default_nid;
> +
> + rc = of_get_assoc_arrays(&aa);
> + if (rc)
> + return default_nid;
> +
> + if (primary_domain_index <= aa.array_sz &&
> + !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) {
> + index = lmb->aa_index * aa.array_sz + primary_domain_index - 1;
Does anywhere verify that primary_domain_index <= aa.array_sz?
> + nid = of_read_number(&aa.arrays[index], 1);
> +
> + if (nid == 0xffff || nid >= nr_node_ids)
> + nid = default_nid;
> + if (nid > 0 && affinity_form == FORM1_AFFINITY) {
> + int i;
> + const __be32 *associativity;
> +
> + index = lmb->aa_index * aa.array_sz;
> + associativity = &aa.arrays[index];
> + /*
> + * lookup array associativity entries have different format
> + * There is no length of the array as the first element.
The difference it very small, and this is not a hot path. Couldn't
you reduce a chunk of code by prepending aa.array_sz, then re-using
__initialize_form1_numa_distance. Or even making
__initialize_form1_numa_distance() take the length as a parameter.
> + */
> + for (i = 0; i < max_associativity_domain_index; i++) {
> + const __be32 *entry;
> +
> + entry = &associativity[be32_to_cpu(distance_ref_points[i]) - 1];
Does anywhere verify that distance_ref_points[i] <= aa.array_size for
every i?
> + distance_lookup_table[nid][i] = of_read_number(entry, 1);
> + }
> + }
> + }
> + return nid;
> +}
> +
> /*
> * This is like of_node_to_nid_single() for memory represented in the
> * ibm,dynamic-reconfiguration-memory node.
> @@ -458,21 +518,14 @@ int of_drconf_to_nid_single(struct drmem_lmb *lmb)
>
> if (nid == 0xffff || nid >= nr_node_ids)
> nid = default_nid;
> -
> - if (nid > 0) {
> - index = lmb->aa_index * aa.array_sz;
> - initialize_distance_lookup_table(nid,
> - &aa.arrays[index]);
> - }
> }
> -
> return nid;
> }
>
> #ifdef CONFIG_PPC_SPLPAR
> -static int vphn_get_nid(long lcpu)
> +
> +static int __vphn_get_associativity(long lcpu, __be32 *associativity)
> {
> - __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
> long rc, hwid;
>
> /*
> @@ -492,10 +545,22 @@ static int vphn_get_nid(long lcpu)
>
> rc = hcall_vphn(hwid, VPHN_FLAG_VCPU, associativity);
> if (rc == H_SUCCESS)
> - return associativity_to_nid(associativity);
> + return 0;
> }
>
> + return -1;
> +}
> +
> +static int vphn_get_nid(long lcpu)
> +{
> + __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
> +
> +
> + if (!__vphn_get_associativity(lcpu, associativity))
> + return associativity_to_nid(associativity);
> +
> return NUMA_NO_NODE;
> +
> }
> #else
> static int vphn_get_nid(long unused)
> @@ -692,7 +757,7 @@ static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
> size = read_n_cells(n_mem_size_cells, usm);
> }
>
> - nid = of_drconf_to_nid_single(lmb);
> + nid = get_nid_and_numa_distance(lmb);
> fake_numa_create_new_node(((base + size) >> PAGE_SHIFT),
> &nid);
> node_set_online(nid);
> @@ -709,6 +774,7 @@ static int __init parse_numa_properties(void)
> struct device_node *memory;
> int default_nid = 0;
> unsigned long i;
> + const __be32 *associativity;
>
> if (numa_enabled == 0) {
> printk(KERN_WARNING "NUMA disabled by user\n");
> @@ -734,18 +800,30 @@ static int __init parse_numa_properties(void)
> * each node to be onlined must have NODE_DATA etc backing it.
> */
> for_each_present_cpu(i) {
> + __be32 vphn_assoc[VPHN_ASSOC_BUFSIZE];
> struct device_node *cpu;
> - int nid = vphn_get_nid(i);
> + int nid = NUMA_NO_NODE;
>
> - /*
> - * Don't fall back to default_nid yet -- we will plug
> - * cpus into nodes once the memory scan has discovered
> - * the topology.
> - */
> - if (nid == NUMA_NO_NODE) {
> + memset(vphn_assoc, 0, VPHN_ASSOC_BUFSIZE * sizeof(__be32));
What's the memset() for? AFAICT you only look at vphn_assoc in the
branch where __vphn_get_associativity() succeeds.
> +
> + if (__vphn_get_associativity(i, vphn_assoc) == 0) {
> + nid = associativity_to_nid(vphn_assoc);
> + __initialize_form1_numa_distance(vphn_assoc);
> + } else {
> +
> + /*
> + * Don't fall back to default_nid yet -- we will plug
> + * cpus into nodes once the memory scan has discovered
> + * the topology.
> + */
> cpu = of_get_cpu_node(i, NULL);
> BUG_ON(!cpu);
> - nid = of_node_to_nid_single(cpu);
> +
> + associativity = of_get_associativity(cpu);
> + if (associativity) {
> + nid = associativity_to_nid(associativity);
> + __initialize_form1_numa_distance(associativity);
> + }
> of_node_put(cpu);
> }
>
> @@ -781,8 +859,11 @@ static int __init parse_numa_properties(void)
> * have associativity properties. If none, then
> * everything goes to default_nid.
> */
> - nid = of_node_to_nid_single(memory);
> - if (nid < 0)
> + associativity = of_get_associativity(memory);
> + if (associativity) {
> + nid = associativity_to_nid(associativity);
> + __initialize_form1_numa_distance(associativity);
> + } else
> nid = default_nid;
>
> fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
> diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> index 7e970f81d8ff..778b6ab35f0d 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
> @@ -498,6 +498,8 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
> return saved_rc;
> }
>
> + update_numa_distance(dn);
> +
> rc = dlpar_online_cpu(dn);
> if (rc) {
> saved_rc = rc;
> diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
> index 36f66556a7c6..40d350f31a34 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
> @@ -180,6 +180,8 @@ static int update_lmb_associativity_index(struct drmem_lmb *lmb)
> return -ENODEV;
> }
>
> + update_numa_distance(lmb_node);
> +
> dr_node = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
> if (!dr_node) {
> dlpar_free_cc_nodes(lmb_node);
> diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
> index 1f051a786fb3..663a0859cf13 100644
> --- a/arch/powerpc/platforms/pseries/pseries.h
> +++ b/arch/powerpc/platforms/pseries/pseries.h
> @@ -113,4 +113,5 @@ extern u32 pseries_security_flavor;
> void pseries_setup_security_mitigations(void);
> void pseries_lpar_read_hblkrm_characteristics(void);
>
> +void update_numa_distance(struct device_node *node);
> #endif /* _PSERIES_PSERIES_H */
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: not available
URL: <http://lists.ozlabs.org/pipermail/linuxppc-dev/attachments/20210722/63c32a92/attachment-0001.sig>
More information about the Linuxppc-dev
mailing list