[PATCH v2 1/4] tools/perf: Fix perf bench futex to correct usage of affinity for machines with #CPUs > 1K

Srikar Dronamraju srikar at linux.vnet.ibm.com
Fri Apr 8 22:27:46 AEST 2022


* Athira Rajeev <atrajeev at linux.vnet.ibm.com> [2022-04-06 23:21:10]:

> perf bench futex testcase fails on systems with CPU's
> more than 1K.
> 
> Testcase: perf bench futex all
> Failure snippet:
> <<>>Running futex/hash benchmark...
> 
> perf: pthread_create: No such file or directory
> <<>>
> 
> All the futex benchmarks ( ie hash, lock-api, requeue, wake,
> wake-parallel ), pthread_create is invoked in respective bench_futex_*
> function. Though the logs shows direct failure from pthread_create,
> strace logs showed that actual failure is from  "sched_setaffinity"
> returning EINVAL (invalid argument). This happens because the default
> mask size in glibc is 1024. To overcome this 1024 CPUs mask size
> limitation of cpu_set_t, change the mask size using the CPU_*_S macros.
> 
> Patch addresses this by fixing all the futex benchmarks to use
> CPU_ALLOC to allocate cpumask, CPU_ALLOC_SIZE for size, and
> CPU_SET_S to set the mask.
> 
> Tested-by: Disha Goel <disgoel at linux.vnet.ibm.com>
> Signed-off-by: Athira Rajeev <atrajeev at linux.vnet.ibm.com>
> Reported-by: Disha Goel <disgoel at linux.vnet.ibm.com>

Looks good to me
Reviewed-by: Srikar Dronamraju <srikar at linux.vnet.ibm.com>


> ---
>  tools/perf/bench/futex-hash.c          | 26 +++++++++++++++++++-------
>  tools/perf/bench/futex-lock-pi.c       | 21 ++++++++++++++++-----
>  tools/perf/bench/futex-requeue.c       | 21 ++++++++++++++++-----
>  tools/perf/bench/futex-wake-parallel.c | 21 ++++++++++++++++-----
>  tools/perf/bench/futex-wake.c          | 22 ++++++++++++++++------
>  5 files changed, 83 insertions(+), 28 deletions(-)
> 
> diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
> index 9627b6ab8670..dfce64e551e2 100644
> --- a/tools/perf/bench/futex-hash.c
> +++ b/tools/perf/bench/futex-hash.c
> @@ -122,12 +122,14 @@ static void print_summary(void)
>  int bench_futex_hash(int argc, const char **argv)
>  {
>  	int ret = 0;
> -	cpu_set_t cpuset;
> +	cpu_set_t *cpuset;
>  	struct sigaction act;
>  	unsigned int i;
>  	pthread_attr_t thread_attr;
>  	struct worker *worker = NULL;
>  	struct perf_cpu_map *cpu;
> +	int nrcpus;
> +	size_t size;
> 
>  	argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0);
>  	if (argc) {
> @@ -170,25 +172,35 @@ int bench_futex_hash(int argc, const char **argv)
>  	threads_starting = params.nthreads;
>  	pthread_attr_init(&thread_attr);
>  	gettimeofday(&bench__start, NULL);
> +
> +	nrcpus = perf_cpu_map__nr(cpu);
> +	cpuset = CPU_ALLOC(nrcpus);
> +	BUG_ON(!cpuset);
> +	size = CPU_ALLOC_SIZE(nrcpus);
> +
>  	for (i = 0; i < params.nthreads; i++) {
>  		worker[i].tid = i;
>  		worker[i].futex = calloc(params.nfutexes, sizeof(*worker[i].futex));
>  		if (!worker[i].futex)
>  			goto errmem;
> 
> -		CPU_ZERO(&cpuset);
> -		CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
> +		CPU_ZERO_S(size, cpuset);
> 
> -		ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
> -		if (ret)
> +		CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
> +		ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset);
> +		if (ret) {
> +			CPU_FREE(cpuset);
>  			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
> -
> +		}
>  		ret = pthread_create(&worker[i].thread, &thread_attr, workerfn,
>  				     (void *)(struct worker *) &worker[i]);
> -		if (ret)
> +		if (ret) {
> +			CPU_FREE(cpuset);
>  			err(EXIT_FAILURE, "pthread_create");
> +		}
> 
>  	}
> +	CPU_FREE(cpuset);
>  	pthread_attr_destroy(&thread_attr);
> 
>  	pthread_mutex_lock(&thread_lock);
> diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
> index a512a320df74..61c3bb80d4cf 100644
> --- a/tools/perf/bench/futex-lock-pi.c
> +++ b/tools/perf/bench/futex-lock-pi.c
> @@ -120,11 +120,17 @@ static void *workerfn(void *arg)
>  static void create_threads(struct worker *w, pthread_attr_t thread_attr,
>  			   struct perf_cpu_map *cpu)
>  {
> -	cpu_set_t cpuset;
> +	cpu_set_t *cpuset;
>  	unsigned int i;
> +	int nrcpus =  perf_cpu_map__nr(cpu);
> +	size_t size;
> 
>  	threads_starting = params.nthreads;
> 
> +	cpuset = CPU_ALLOC(nrcpus);
> +	BUG_ON(!cpuset);
> +	size = CPU_ALLOC_SIZE(nrcpus);
> +
>  	for (i = 0; i < params.nthreads; i++) {
>  		worker[i].tid = i;
> 
> @@ -135,15 +141,20 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr,
>  		} else
>  			worker[i].futex = &global_futex;
> 
> -		CPU_ZERO(&cpuset);
> -		CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
> +		CPU_ZERO_S(size, cpuset);
> +		CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
> 
> -		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
> +		if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {
> +			CPU_FREE(cpuset);
>  			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
> +		}
> 
> -		if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i]))
> +		if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i])) {
> +			CPU_FREE(cpuset);
>  			err(EXIT_FAILURE, "pthread_create");
> +		}
>  	}
> +	CPU_FREE(cpuset);
>  }
> 
>  int bench_futex_lock_pi(int argc, const char **argv)
> diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
> index aca47ce8b1e7..2cb013f7ffe5 100644
> --- a/tools/perf/bench/futex-requeue.c
> +++ b/tools/perf/bench/futex-requeue.c
> @@ -123,22 +123,33 @@ static void *workerfn(void *arg __maybe_unused)
>  static void block_threads(pthread_t *w,
>  			  pthread_attr_t thread_attr, struct perf_cpu_map *cpu)
>  {
> -	cpu_set_t cpuset;
> +	cpu_set_t *cpuset;
>  	unsigned int i;
> +	int nrcpus = perf_cpu_map__nr(cpu);
> +	size_t size;
> 
>  	threads_starting = params.nthreads;
> 
> +	cpuset = CPU_ALLOC(nrcpus);
> +	BUG_ON(!cpuset);
> +	size = CPU_ALLOC_SIZE(nrcpus);
> +
>  	/* create and block all threads */
>  	for (i = 0; i < params.nthreads; i++) {
> -		CPU_ZERO(&cpuset);
> -		CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
> +		CPU_ZERO_S(size, cpuset);
> +		CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
> 
> -		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
> +		if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {
> +			CPU_FREE(cpuset);
>  			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
> +		}
> 
> -		if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
> +		if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) {
> +			CPU_FREE(cpuset);
>  			err(EXIT_FAILURE, "pthread_create");
> +		}
>  	}
> +	CPU_FREE(cpuset);
>  }
> 
>  static void toggle_done(int sig __maybe_unused,
> diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
> index 888ee6037945..efa5070a5eb3 100644
> --- a/tools/perf/bench/futex-wake-parallel.c
> +++ b/tools/perf/bench/futex-wake-parallel.c
> @@ -144,22 +144,33 @@ static void *blocked_workerfn(void *arg __maybe_unused)
>  static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
>  			  struct perf_cpu_map *cpu)
>  {
> -	cpu_set_t cpuset;
> +	cpu_set_t *cpuset;
>  	unsigned int i;
> +	int nrcpus = perf_cpu_map__nr(cpu);
> +	size_t size;
> 
>  	threads_starting = params.nthreads;
> 
> +	cpuset = CPU_ALLOC(nrcpus);
> +	BUG_ON(!cpuset);
> +	size = CPU_ALLOC_SIZE(nrcpus);
> +
>  	/* create and block all threads */
>  	for (i = 0; i < params.nthreads; i++) {
> -		CPU_ZERO(&cpuset);
> -		CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
> +		CPU_ZERO_S(size, cpuset);
> +		CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
> 
> -		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
> +		if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {
> +			CPU_FREE(cpuset);
>  			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
> +		}
> 
> -		if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL))
> +		if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL)) {
> +			CPU_FREE(cpuset);
>  			err(EXIT_FAILURE, "pthread_create");
> +		}
>  	}
> +	CPU_FREE(cpuset);
>  }
> 
>  static void print_run(struct thread_data *waking_worker, unsigned int run_num)
> diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
> index aa82db51c0ab..3a10f54900c1 100644
> --- a/tools/perf/bench/futex-wake.c
> +++ b/tools/perf/bench/futex-wake.c
> @@ -97,22 +97,32 @@ static void print_summary(void)
>  static void block_threads(pthread_t *w,
>  			  pthread_attr_t thread_attr, struct perf_cpu_map *cpu)
>  {
> -	cpu_set_t cpuset;
> +	cpu_set_t *cpuset;
>  	unsigned int i;
> -
> +	size_t size;
> +	int nrcpus = perf_cpu_map__nr(cpu);
>  	threads_starting = params.nthreads;
> 
> +	cpuset = CPU_ALLOC(nrcpus);
> +	BUG_ON(!cpuset);
> +	size = CPU_ALLOC_SIZE(nrcpus);
> +
>  	/* create and block all threads */
>  	for (i = 0; i < params.nthreads; i++) {
> -		CPU_ZERO(&cpuset);
> -		CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
> +		CPU_ZERO_S(size, cpuset);
> +		CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
> 
> -		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
> +		if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {
> +			CPU_FREE(cpuset);
>  			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
> +		}
> 
> -		if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
> +		if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) {
> +			CPU_FREE(cpuset);
>  			err(EXIT_FAILURE, "pthread_create");
> +		}
>  	}
> +	CPU_FREE(cpuset);
>  }
> 
>  static void toggle_done(int sig __maybe_unused,
> -- 
> 2.35.1
> 


More information about the Linuxppc-dev mailing list