[PATCH v3] cpufreq: powernv: Add support of frequency domain

Rafael J. Wysocki rjw at rjwysocki.net
Fri Feb 9 23:19:04 AEDT 2018


On Monday, January 22, 2018 9:17:34 AM CET Abhishek Goel wrote:
> Frequency-domain indicates group of CPUs that would share same frequency.
> It is detected using device-tree node "frequency-domain-indicator".
> frequency-domain-indicator is a bitmask which will have different value
> depending upon the generation of the processor.
> 
> CPUs of the same chip for which the result of a bitwise AND between
> their PIR and the frequency-domain-indicator is the same share the same
> frequency.
> 
> In this patch, we define hash-table indexed by the aforementioned
> bitwise ANDed value to store the cpumask of the CPUs sharing the same
> frequency domain. Further, the cpufreq policy will be created per
> frequency-domain
> 
> So for POWER9, a cpufreq policy is created per quad while for POWER8 it
> is created per core. Governor decides frequency for each policy but
> multiple cores may come under same policy. In such case frequency needs
> to be set on each core sharing that policy.
> 
> Signed-off-by: Abhishek Goel <huntbag at linux.vnet.ibm.com>
> ---
> 
> Skiboot patch required for the corresponding device-tree changes have been
> posted here : http://patchwork.ozlabs.org/patch/862256/
> 
>  drivers/cpufreq/powernv-cpufreq.c | 104 ++++++++++++++++++++++++++++++++++----
>  1 file changed, 95 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
> index b6d7c4c..aab23a4 100644
> --- a/drivers/cpufreq/powernv-cpufreq.c
> +++ b/drivers/cpufreq/powernv-cpufreq.c
> @@ -37,6 +37,7 @@
>  #include <asm/smp.h> /* Required for cpu_sibling_mask() in UP configs */
>  #include <asm/opal.h>
>  #include <linux/timer.h>
> +#include <linux/hashtable.h>
>  
>  #define POWERNV_MAX_PSTATES	256
>  #define PMSR_PSAFE_ENABLE	(1UL << 30)
> @@ -130,6 +131,9 @@ enum throttle_reason_type {
>  static int nr_chips;
>  static DEFINE_PER_CPU(struct chip *, chip_info);
>  
> +static u32 freq_domain_indicator;
> +static bool p9_occ_quirk;
> +
>  /*
>   * Note:
>   * The set of pstates consists of contiguous integers.
> @@ -194,6 +198,38 @@ static inline void reset_gpstates(struct cpufreq_policy *policy)
>  	gpstates->last_gpstate_idx = 0;
>  }
>  
> +#define SIZE NR_CPUS
> +#define ORDER_FREQ_MAP ilog2(SIZE)
> +
> +static DEFINE_HASHTABLE(freq_domain_map, ORDER_FREQ_MAP);
> +
> +struct hashmap {
> +	cpumask_t mask;
> +	int chip_id;
> +	u32 pir_key;
> +	struct hlist_node hash_node;
> +};
> +
> +static void insert(u32 key, int cpu)
> +{
> +	struct hashmap *data;
> +
> +	hash_for_each_possible(freq_domain_map, data, hash_node, key%SIZE) {
> +		if (data->chip_id == cpu_to_chip_id(cpu) &&
> +			data->pir_key == key) {
> +			cpumask_set_cpu(cpu, &data->mask);
> +			return;
> +		}
> +	}
> +
> +	data = kzalloc(sizeof(*data), GFP_KERNEL);
> +	hash_add(freq_domain_map, &data->hash_node, key%SIZE);
> +	cpumask_set_cpu(cpu, &data->mask);
> +	data->chip_id = cpu_to_chip_id(cpu);
> +	data->pir_key = key;
> +
> +}
> +
>  /*
>   * Initialize the freq table based on data obtained
>   * from the firmware passed via device-tree
> @@ -206,6 +242,7 @@ static int init_powernv_pstates(void)
>  	u32 len_ids, len_freqs;
>  	u32 pstate_min, pstate_max, pstate_nominal;
>  	u32 pstate_turbo, pstate_ultra_turbo;
> +	u32 key;
>  
>  	power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
>  	if (!power_mgt) {
> @@ -246,9 +283,18 @@ static int init_powernv_pstates(void)
>  	else
>  		powernv_pstate_info.wof_enabled = true;
>  
> +	if (of_device_is_compatible(power_mgt, "freq-domain-v1") &&
> +		of_property_read_u32(power_mgt, "ibm,freq-domain-indicator",
> +				&freq_domain_indicator))
> +		pr_warn("ibm,freq-domain-indicator not found\n");
> +
> +        if (of_device_is_compatible(power_mgt, "p9-occ-quirk"))
> +		p9_occ_quirk = true;
> +
>  next:
>  	pr_info("cpufreq pstate min %d nominal %d max %d\n", pstate_min,
>  		pstate_nominal, pstate_max);
> +	pr_info("frequency domain indicator %d", freq_domain_indicator);
>  	pr_info("Workload Optimized Frequency is %s in the platform\n",
>  		(powernv_pstate_info.wof_enabled) ? "enabled" : "disabled");
>  
> @@ -276,6 +322,15 @@ static int init_powernv_pstates(void)
>  		return -ENODEV;
>  	}
>  
> +	if (freq_domain_indicator) {
> +		hash_init(freq_domain_map);
> +		for_each_possible_cpu(i) {
> +			key = ((u32) get_hard_smp_processor_id(i) &
> +				freq_domain_indicator);
> +			insert(key, i);
> +		}
> +	}
> +
>  	powernv_pstate_info.nr_pstates = nr_pstates;
>  	pr_debug("NR PStates %d\n", nr_pstates);
>  	for (i = 0; i < nr_pstates; i++) {
> @@ -760,25 +815,56 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy,
>  
>  	spin_unlock(&gpstates->gpstate_lock);
>  
> -	/*
> -	 * Use smp_call_function to send IPI and execute the
> -	 * mtspr on target CPU.  We could do that without IPI
> -	 * if current CPU is within policy->cpus (core)
> +	/* The current DVFS implementation in firmware requires that to set a
> +	 * frequency in a quad, all cores of the quad need to set frequency in
> +	 * their respective PMCR's. Ideally setting frequency on any of the
> +	 * core of that quad should change frequency for the quad.
>  	 */
> -	smp_call_function_any(policy->cpus, set_pstate, &freq_data, 1);
> +	if (p9_occ_quirk) {
> +		cpumask_t temp;
> +		u32 cpu;
> +
> +		cpumask_copy(&temp, policy->cpus);
> +		while (!cpumask_empty(&temp)) {
> +			cpu = cpumask_first(&temp);
> +			smp_call_function_any(cpu_sibling_mask(cpu),
> +					set_pstate, &freq_data, 1);
> +			cpumask_andnot(&temp, &temp, cpu_sibling_mask(cpu));
> +		}
> +	} else {
> +		smp_call_function_any(policy->cpus, set_pstate, &freq_data, 1);
> +	}
> +
>  	return 0;
>  }
>  
>  static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy)
>  {
> -	int base, i, ret;
> +	int ret;
>  	struct kernfs_node *kn;
>  	struct global_pstate_info *gpstates;
>  
> -	base = cpu_first_thread_sibling(policy->cpu);
> +	if (!freq_domain_indicator) {

IMO it would be more straightforward to write this conditional as

if (freq_domain_indicator) {
   do something
} else {
   do something else
}

> +		int base, i;
>  
> -	for (i = 0; i < threads_per_core; i++)
> -		cpumask_set_cpu(base + i, policy->cpus);
> +		base = cpu_first_thread_sibling(policy->cpu);
> +		for (i = 0; i < threads_per_core; i++)
> +			cpumask_set_cpu(base + i, policy->cpus);
> +	} else {
> +		u32 key;
> +		struct hashmap *data;
> +
> +		key = ((u32) get_hard_smp_processor_id(policy->cpu) &
> +				freq_domain_indicator);

The outer paren on the right-hand side is not necessary.

The explicit case to u32 isn't necessary as well AFAICS.

> +		hash_for_each_possible(freq_domain_map, data, hash_node,
> +								 key%SIZE) {
> +			if (data->chip_id == cpu_to_chip_id(policy->cpu) &&
> +				data->pir_key == key) {

You have slightly confusing indentation here, as the continuation of the
condition is at the same level as the statements in the conditional block.

> +				cpumask_copy(policy->cpus, &data->mask);
> +				break;
> +			}
> +		}
> +	}
>  
>  	kn = kernfs_find_and_get(policy->kobj.sd, throttle_attr_grp.name);
>  	if (!kn) {
> 

Overall, from cpufreq perspective, I don't see anything to complain about
here.

Thanks,
Rafael



More information about the Linuxppc-dev mailing list