[RFC PATCH 8/8] powerpc/papr_scm: Use FORM2 associativity details

Aneesh Kumar K.V aneesh.kumar at linux.ibm.com
Tue Jun 15 15:57:50 AEST 2021


David Gibson <david at gibson.dropbear.id.au> writes:

> On Mon, Jun 14, 2021 at 10:10:03PM +0530, Aneesh Kumar K.V wrote:
>> FORM2 introduce a concept of secondary domain which is identical to the
>> conceept of FORM1 primary domain. Use secondary domain as the numa node
>> when using persistent memory device. For DAX kmem use the logical domain
>> id introduced in FORM2. This new numa node
>> 
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar at linux.ibm.com>
>> ---
>>  arch/powerpc/mm/numa.c                    | 28 +++++++++++++++++++++++
>>  arch/powerpc/platforms/pseries/papr_scm.c | 26 +++++++++++++--------
>>  arch/powerpc/platforms/pseries/pseries.h  |  1 +
>>  3 files changed, 45 insertions(+), 10 deletions(-)
>> 
>> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
>> index 86cd2af014f7..b9ac6d02e944 100644
>> --- a/arch/powerpc/mm/numa.c
>> +++ b/arch/powerpc/mm/numa.c
>> @@ -265,6 +265,34 @@ static int associativity_to_nid(const __be32 *associativity)
>>  	return nid;
>>  }
>>  
>> +int get_primary_and_secondary_domain(struct device_node *node, int *primary, int *secondary)
>> +{
>> +	int secondary_index;
>> +	const __be32 *associativity;
>> +
>> +	if (!numa_enabled) {
>> +		*primary = NUMA_NO_NODE;
>> +		*secondary = NUMA_NO_NODE;
>> +		return 0;
>> +	}
>> +
>> +	associativity = of_get_associativity(node);
>> +	if (!associativity)
>> +		return -ENODEV;
>> +
>> +	if (of_read_number(associativity, 1) >= primary_domain_index) {
>> +		*primary = of_read_number(&associativity[primary_domain_index], 1);
>> +		secondary_index = of_read_number(&distance_ref_points[1], 1);
>
> Secondary ID is always the second reference point, but primary depends
> on the length of resources?  That seems very weird.

primary_domain_index is distance_ref_point[0]. With Form2 we would find
both primary and secondary domain ID same for all resources other than
persistent memory device. The usage w.r.t. persistent memory is
explained in patch 7.

With Form2 the primary domainID and secondary domainID are used to identify the NUMA nodes
the kernel should use when using persistent memory devices. Persistent memory devices
can also be used as regular memory using DAX KMEM driver and primary domainID indicates
the numa node number OS should use when using these devices as regular memory. Secondary
domainID is the numa node number that should be used when using this device as
persistent memory. In the later case, we are interested in the locality of the
device to an established numa node. In the above example, if the last row represents a
persistent memory device/resource, NUMA node number 40 will be used when using the device
as regular memory and NUMA node number 0 will be the device numa node when using it as
a persistent memory device.


>
>> +		*secondary = of_read_number(&associativity[secondary_index], 1);
>> +	}
>> +	if (*primary == 0xffff || *primary >= nr_node_ids)
>> +		*primary = NUMA_NO_NODE;
>> +
>> +	if (*secondary == 0xffff || *secondary >= nr_node_ids)
>> +		*secondary = NUMA_NO_NODE;
>> +	return 0;
>> +}
>> +
>>  /* Returns the nid associated with the given device tree node,
>>   * or -1 if not found.
>>   */
>> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
>> index ef26fe40efb0..9bf2f1f3ddc5 100644
>> --- a/arch/powerpc/platforms/pseries/papr_scm.c
>> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
>> @@ -18,6 +18,7 @@
>>  #include <asm/plpar_wrappers.h>
>>  #include <asm/papr_pdsm.h>
>>  #include <asm/mce.h>
>> +#include "pseries.h"
>>  
>>  #define BIND_ANY_ADDR (~0ul)
>>  
>> @@ -88,6 +89,8 @@ struct papr_scm_perf_stats {
>>  struct papr_scm_priv {
>>  	struct platform_device *pdev;
>>  	struct device_node *dn;
>> +	int numa_node;
>> +	int target_node;
>>  	uint32_t drc_index;
>>  	uint64_t blocks;
>>  	uint64_t block_size;
>> @@ -923,7 +926,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
>>  	struct nd_mapping_desc mapping;
>>  	struct nd_region_desc ndr_desc;
>>  	unsigned long dimm_flags;
>> -	int target_nid, online_nid;
>>  	ssize_t stat_size;
>>  
>>  	p->bus_desc.ndctl = papr_scm_ndctl;
>> @@ -974,10 +976,8 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
>>  	mapping.size = p->blocks * p->block_size; // XXX: potential overflow?
>>  
>>  	memset(&ndr_desc, 0, sizeof(ndr_desc));
>> -	target_nid = dev_to_node(&p->pdev->dev);
>> -	online_nid = numa_map_to_online_node(target_nid);
>> -	ndr_desc.numa_node = online_nid;
>> -	ndr_desc.target_node = target_nid;
>> +	ndr_desc.numa_node = p->numa_node;
>> +	ndr_desc.target_node = p->target_node;
>>  	ndr_desc.res = &p->res;
>>  	ndr_desc.of_node = p->dn;
>>  	ndr_desc.provider_data = p;
>> @@ -1001,9 +1001,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
>>  				ndr_desc.res, p->dn);
>>  		goto err;
>>  	}
>> -	if (target_nid != online_nid)
>> -		dev_info(dev, "Region registered with target node %d and online node %d",
>> -			 target_nid, online_nid);
>>  
>>  	mutex_lock(&papr_ndr_lock);
>>  	list_add_tail(&p->region_list, &papr_nd_regions);
>> @@ -1096,7 +1093,7 @@ static int papr_scm_probe(struct platform_device *pdev)
>>  	struct papr_scm_priv *p;
>>  	const char *uuid_str;
>>  	u64 uuid[2];
>> -	int rc;
>> +	int rc, numa_node;
>>  
>>  	/* check we have all the required DT properties */
>>  	if (of_property_read_u32(dn, "ibm,my-drc-index", &drc_index)) {
>> @@ -1119,11 +1116,20 @@ static int papr_scm_probe(struct platform_device *pdev)
>>  		return -ENODEV;
>>  	}
>>  
>> -
>>  	p = kzalloc(sizeof(*p), GFP_KERNEL);
>>  	if (!p)
>>  		return -ENOMEM;
>>  
>> +	if (get_primary_and_secondary_domain(dn, &p->target_node, &numa_node)) {
>> +		dev_err(&pdev->dev, "%pOF: missing NUMA attributes!\n", dn);
>> +		rc = -ENODEV;
>> +		goto err;
>> +	}
>> +	p->numa_node = numa_map_to_online_node(numa_node);
>> +	if (numa_node != p->numa_node)
>> +		dev_info(&pdev->dev, "Region registered with online node %d and device tree node %d",
>> +			 p->numa_node, numa_node);
>> +
>>  	/* Initialize the dimm mutex */
>>  	mutex_init(&p->health_mutex);
>>  
>> diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
>> index 663a0859cf13..9c2a1fc9ded1 100644
>> --- a/arch/powerpc/platforms/pseries/pseries.h
>> +++ b/arch/powerpc/platforms/pseries/pseries.h
>> @@ -114,4 +114,5 @@ void pseries_setup_security_mitigations(void);
>>  void pseries_lpar_read_hblkrm_characteristics(void);
>>  
>>  void update_numa_distance(struct device_node *node);
>> +int get_primary_and_secondary_domain(struct device_node *node, int *primary, int *secondary);
>>  #endif /* _PSERIES_PSERIES_H */
>
> -- 
> David Gibson			| I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
> 				| _way_ _around_!
> http://www.ozlabs.org/~dgibson


More information about the Linuxppc-dev mailing list