[PATCH v13 4/5] powerpc/perf: Add core imc pmu support
Madhavan Srinivasan
maddy at linux.vnet.ibm.com
Sat Jul 22 17:12:31 AEST 2017
my bad, missed to change the authership of this patch.
From: Anju T Sudhakar <anju at linux.vnet.ibm.com>
On Wednesday 19 July 2017 03:06 AM, Madhavan Srinivasan wrote:
> Add support to register Core In-Memory Collection pmu counters.
> Patch adds core imc specific data structures, along with memory
> init functions and cpuhotplug support.
>
> Signed-off-by: Anju T Sudhakar <anju at linux.vnet.ibm.com>
> Signed-off-by: Hemant Kumar <hemant at linux.vnet.ibm.com>
> Signed-off-by: Madhavan Srinivasan <maddy at linux.vnet.ibm.com>
> ---
> arch/powerpc/perf/imc-pmu.c | 303 +++++++++++++++++++++++++++++++++++++++++++-
> include/linux/cpuhotplug.h | 1 +
> 2 files changed, 299 insertions(+), 5 deletions(-)
>
> diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
> index 6e00ea7358a2..cd8ba3b98d29 100644
> --- a/arch/powerpc/perf/imc-pmu.c
> +++ b/arch/powerpc/perf/imc-pmu.c
> @@ -18,7 +18,6 @@
> #include <asm/smp.h>
> #include <linux/string.h>
>
> -//Nest imc data structures and variable
> /*
> * Used to avoid races in counting the nest-pmu units during hotplug
> * register and unregister
> @@ -30,6 +29,11 @@ static cpumask_t nest_imc_cpumask;
> struct imc_pmu_ref *nest_imc_refc;
> static int nest_pmus;
>
> +//Core imc data structs and variables
> +static cpumask_t core_imc_cpumask;
> +struct imc_pmu_ref *core_imc_refc;
> +static struct imc_pmu *core_imc_pmu;
> +
> struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
> {
> return container_of(event->pmu, struct imc_pmu, pmu);
> @@ -61,11 +65,13 @@ static ssize_t imc_pmu_cpumask_get_attr(struct device *dev,
> struct imc_pmu *imc_pmu = container_of(pmu, struct imc_pmu, pmu);
> cpumask_t *active_mask;
>
> - /* Subsequenct patch will add more pmu types here */
> switch(imc_pmu->domain){
> case IMC_DOMAIN_NEST:
> active_mask = &nest_imc_cpumask;
> break;
> + case IMC_DOMAIN_CORE:
> + active_mask = &core_imc_cpumask;
> + break;
> default:
> return 0;
> }
> @@ -485,6 +491,240 @@ static int nest_imc_event_init(struct perf_event *event)
> return 0;
> }
>
> +/*
> + * core_imc_mem_init : Initializes memory for the current core.
> + *
> + * Uses alloc_pages_node() and uses the returned address as an argument to
> + * an opal call to configure the pdbar. The address sent as an argument is
> + * converted to physical address before the opal call is made. This is the
> + * base address at which the core imc counters are populated.
> + */
> +static int core_imc_mem_init(int cpu, int size)
> +{
> + int phys_id, rc = 0, core_id = (cpu / threads_per_core);
> + struct imc_mem_info *mem_info;
> +
> + /*
> + * alloc_pages_node() will allocate memory for core in the
> + * local node only.
> + */
> + phys_id = topology_physical_package_id(cpu);
> + mem_info = &core_imc_pmu->mem_info[core_id];
> + mem_info->id = core_id;
> +
> + /* We need only vbase for core counters */
> + mem_info->vbase = page_address(alloc_pages_node(phys_id,
> + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
> + get_order(size)));
> + if (!mem_info->vbase)
> + return -ENOMEM;
> +
> + /* Init the mutex */
> + core_imc_refc[core_id].id = core_id;
> + mutex_init(&core_imc_refc[core_id].lock);
> +
> + rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_CORE,
> + __pa((void *)mem_info->vbase),
> + get_hard_smp_processor_id(cpu));
> + if (rc) {
> + free_pages((u64)mem_info->vbase, get_order(size));
> + mem_info->vbase = NULL;
> + }
> +
> + return rc;
> +}
> +
> +static bool is_core_imc_mem_inited(int cpu)
> +{
> + struct imc_mem_info *mem_info;
> + int core_id = (cpu / threads_per_core);
> +
> + mem_info = &core_imc_pmu->mem_info[core_id];
> + if (!mem_info->vbase)
> + return false;
> +
> + return true;
> +}
> +
> +static int ppc_core_imc_cpu_online(unsigned int cpu)
> +{
> + const struct cpumask *l_cpumask;
> + static struct cpumask tmp_mask;
> + int ret = 0;
> +
> + /* Get the cpumask for this core */
> + l_cpumask = cpu_sibling_mask(cpu);
> +
> + /* If a cpu for this core is already set, then, don't do anything */
> + if (cpumask_and(&tmp_mask, l_cpumask, &core_imc_cpumask))
> + return 0;
> +
> + if (!is_core_imc_mem_inited(cpu)) {
> + ret = core_imc_mem_init(cpu, core_imc_pmu->counter_mem_size);
> + if (ret) {
> + pr_info("core_imc memory allocation for cpu %d failed\n", cpu);
> + return ret;
> + }
> + }
> +
> + /* set the cpu in the mask */
> + cpumask_set_cpu(cpu, &core_imc_cpumask);
> + return 0;
> +}
> +
> +static int ppc_core_imc_cpu_offline(unsigned int cpu)
> +{
> + unsigned int ncpu, core_id;
> + struct imc_pmu_ref *ref;
> +
> + /*
> + * clear this cpu out of the mask, if not present in the mask,
> + * don't bother doing anything.
> + */
> + if (!cpumask_test_and_clear_cpu(cpu, &core_imc_cpumask))
> + return 0;
> +
> + /* Find any online cpu in that core except the current "cpu" */
> + ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
> +
> + if (ncpu >= 0 && ncpu < nr_cpu_ids) {
> + cpumask_set_cpu(ncpu, &core_imc_cpumask);
> + perf_pmu_migrate_context(&core_imc_pmu->pmu, cpu, ncpu);
> + } else {
> + /*
> + * If this is the last cpu in this core then, skip taking refernce
> + * count mutex lock for this core and directly zero "refc" for
> + * this core.
> + */
> + opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
> + get_hard_smp_processor_id(cpu));
> + core_id = cpu / threads_per_core;
> + ref = &core_imc_refc[core_id];
> + if (!ref)
> + return -EINVAL;
> +
> + ref->refc = 0;
> + }
> + return 0;
> +}
> +
> +static int core_imc_pmu_cpumask_init(void)
> +{
> + return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
> + "perf/powerpc/imc_core:online",
> + ppc_core_imc_cpu_online,
> + ppc_core_imc_cpu_offline);
> +}
> +
> +static void core_imc_counters_release(struct perf_event *event)
> +{
> + int rc, core_id;
> + struct imc_pmu_ref *ref;
> +
> + if (event->cpu < 0)
> + return;
> + /*
> + * See if we need to disable the IMC PMU.
> + * If no events are currently in use, then we have to take a
> + * mutex to ensure that we don't race with another task doing
> + * enable or disable the core counters.
> + */
> + core_id = event->cpu / threads_per_core;
> +
> + /* Take the mutex lock and decrement the refernce count for this core */
> + ref = &core_imc_refc[core_id];
> + if (!ref)
> + return;
> +
> + mutex_lock(&ref->lock);
> + ref->refc--;
> + if (ref->refc == 0) {
> + rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
> + get_hard_smp_processor_id(event->cpu));
> + if (rc) {
> + mutex_unlock(&ref->lock);
> + pr_err("IMC: Unable to stop the counters for core %d\n", core_id);
> + return;
> + }
> + } else if (ref->refc < 0) {
> + WARN(1, "core-imc: Invalid event reference count\n");
> + ref->refc = 0;
> + }
> + mutex_unlock(&ref->lock);
> +}
> +
> +static int core_imc_event_init(struct perf_event *event)
> +{
> + int core_id, rc;
> + u64 config = event->attr.config;
> + struct imc_mem_info *pcmi;
> + struct imc_pmu *pmu;
> + struct imc_pmu_ref *ref;
> +
> + if (event->attr.type != event->pmu->type)
> + return -ENOENT;
> +
> + /* Sampling not supported */
> + if (event->hw.sample_period)
> + return -EINVAL;
> +
> + /* unsupported modes and filters */
> + if (event->attr.exclude_user ||
> + event->attr.exclude_kernel ||
> + event->attr.exclude_hv ||
> + event->attr.exclude_idle ||
> + event->attr.exclude_host ||
> + event->attr.exclude_guest)
> + return -EINVAL;
> +
> + if (event->cpu < 0)
> + return -EINVAL;
> +
> + event->hw.idx = -1;
> + pmu = imc_event_to_pmu(event);
> +
> + /* Sanity check for config (event offset) */
> + if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size))
> + return -EINVAL;
> +
> + if (!is_core_imc_mem_inited(event->cpu))
> + return -ENODEV;
> +
> + core_id = event->cpu / threads_per_core;
> + pcmi = &core_imc_pmu->mem_info[core_id];
> + if ((!pcmi->vbase))
> + return -ENODEV;
> +
> + /* Get the core_imc mutex for this core */
> + ref = &core_imc_refc[core_id];
> + if (!ref)
> + return -EINVAL;
> +
> + /*
> + * Core pmu units are enabled only when it is used.
> + * See if this is triggered for the first time.
> + * If yes, take the mutex lock and enable the core counters.
> + * If not, just increment the count in core_imc_refc struct.
> + */
> + mutex_lock(&ref->lock);
> + if (ref->refc == 0) {
> + rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
> + get_hard_smp_processor_id(event->cpu));
> + if (rc) {
> + mutex_unlock(&ref->lock);
> + pr_err("core-imc: Unable to start the counters for core %d\n",
> + core_id);
> + return rc;
> + }
> + }
> + ++ref->refc;
> + mutex_unlock(&ref->lock);
> +
> + event->hw.event_base = (u64)pcmi->vbase + (config & IMC_EVENT_OFFSET_MASK);
> + event->destroy = core_imc_counters_release;
> + return 0;
> +}
> +
> static u64 * get_event_base_addr(struct perf_event *event)
> {
> /*
> @@ -563,12 +803,15 @@ static int update_pmu_ops(struct imc_pmu *pmu)
> pmu->pmu.attr_groups = pmu->attr_groups;
> pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group;
>
> - /* Subsequenct patch will add more pmu types here */
> switch (pmu->domain) {
> case IMC_DOMAIN_NEST:
> pmu->pmu.event_init = nest_imc_event_init;
> pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group;
> break;
> + case IMC_DOMAIN_CORE:
> + pmu->pmu.event_init = core_imc_event_init;
> + pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group;
> + break;
> default:
> break;
> }
> @@ -620,6 +863,22 @@ static int init_nest_pmu_ref(void)
> return 0;
> }
>
> +static void cleanup_all_core_imc_memory(void)
> +{
> + int i, nr_cores = num_present_cpus() / threads_per_core;
> + struct imc_mem_info *ptr = core_imc_pmu->mem_info;
> + int size = core_imc_pmu->counter_mem_size;
> +
> + /* mem_info will never be NULL */
> + for (i = 0; i < nr_cores; i++) {
> + if (ptr[i].vbase)
> + free_pages((u64)ptr->vbase, get_order(size));
> + }
> +
> + kfree(ptr);
> + kfree(core_imc_refc);
> +}
> +
> /*
> * Common function to unregister cpu hotplug callback and
> * free the memory.
> @@ -640,6 +899,12 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
> mutex_unlock(&nest_init_lock);
> }
>
> + /* Free core_imc memory */
> + if (pmu_ptr->domain == IMC_DOMAIN_CORE) {
> + cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE);
> + cleanup_all_core_imc_memory();
> + }
> +
> /* Only free the attr_groups which are dynamically allocated */
> kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
> kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
> @@ -655,11 +920,11 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
> int pmu_index)
> {
> const char *s;
> + int nr_cores;
>
> if (of_property_read_string(parent, "name", &s))
> return -ENODEV;
>
> - /* Subsequenct patch will add more pmu types here */
> switch (pmu_ptr->domain) {
> case IMC_DOMAIN_NEST:
> /* Update the pmu name */
> @@ -670,6 +935,27 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
> /* Needed for hotplug/migration */
> per_nest_pmu_arr[pmu_index] = pmu_ptr;
> break;
> + case IMC_DOMAIN_CORE:
> + /* Update the pmu name */
> + pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
> + if (!pmu_ptr->pmu.name)
> + return -ENOMEM;
> +
> + nr_cores = num_present_cpus() / threads_per_core;
> + pmu_ptr->mem_info = kcalloc(nr_cores, sizeof(struct imc_mem_info),
> + GFP_KERNEL);
> +
> + if (!pmu_ptr->mem_info)
> + return -ENOMEM;
> +
> + core_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref),
> + GFP_KERNEL);
> +
> + if (!core_imc_refc)
> + return -ENOMEM;
> +
> + core_imc_pmu = pmu_ptr;
> + break;
> default:
> return -EINVAL;
> }
> @@ -695,7 +981,6 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
> if (ret)
> goto err_free;
>
> - /* Subsequenct patch will add more pmu types here */
> switch (pmu_ptr->domain) {
> case IMC_DOMAIN_NEST:
> /*
> @@ -721,6 +1006,14 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
> nest_pmus++;
> mutex_unlock(&nest_init_lock);
> break;
> + case IMC_DOMAIN_CORE:
> + ret = core_imc_pmu_cpumask_init();
> + if (ret) {
> + cleanup_all_core_imc_memory();
> + return ret;
> + }
> +
> + break;
> default:
> return -1; /* Unknown domain */
> }
> diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
> index 0853a14b1fa1..1be505db0090 100644
> --- a/include/linux/cpuhotplug.h
> +++ b/include/linux/cpuhotplug.h
> @@ -140,6 +140,7 @@ enum cpuhp_state {
> CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
> CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
> CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
> + CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
> CPUHP_AP_WORKQUEUE_ONLINE,
> CPUHP_AP_RCUTREE_ONLINE,
> CPUHP_AP_ONLINE_DYN,
More information about the Linuxppc-dev
mailing list