Endless soft-lockups for compiling workload since next-20200519

Peter Zijlstra peterz at infradead.org
Thu May 21 21:00:27 AEST 2020


On Thu, May 21, 2020 at 12:49:37PM +0200, Peter Zijlstra wrote:
> On Thu, May 21, 2020 at 11:39:39AM +0200, Peter Zijlstra wrote:
> > On Thu, May 21, 2020 at 02:40:36AM +0200, Frederic Weisbecker wrote:
> 
> > This:
> > 
> > >         smp_call_function_single_async() {             smp_call_function_single_async() {
> > >             // verified csd->flags != CSD_LOCK             // verified csd->flags != CSD_LOCK
> > >             csd->flags = CSD_LOCK                          csd->flags = CSD_LOCK
> > 
> > concurrent smp_call_function_single_async() using the same csd is what
> > I'm looking at as well.
> 
> So something like this ought to cure the fundamental problem and make
> smp_call_function_single_async() more user friendly, but also more
> expensive.
> 
> The problem is that while the ILB case is easy to fix, I can't seem to
> find an equally nice solution for the ttwu_remote_queue() case; that
> would basically require sticking the wake_csd in task_struct, I'll also
> post that.
> 
> So it's either this:

Or this:

---
 include/linux/sched.h | 4 ++++
 kernel/sched/core.c   | 7 ++++---
 kernel/sched/fair.c   | 2 +-
 kernel/sched/sched.h  | 1 -
 4 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f38d62c4632c..136ee400b568 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -696,6 +696,10 @@ struct task_struct {
 	struct uclamp_se		uclamp[UCLAMP_CNT];
 #endif
 
+#ifdef CONFIG_SMP
+	call_single_data_t		wake_csd;
+#endif
+
 #ifdef CONFIG_PREEMPT_NOTIFIERS
 	/* List of struct preempt_notifier: */
 	struct hlist_head		preempt_notifiers;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5b286469e26e..a7129652e89b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2320,7 +2320,7 @@ static void ttwu_queue_remote(struct task_struct *p, int cpu, int wake_flags)
 
 	if (llist_add(&p->wake_entry, &rq->wake_list)) {
 		if (!set_nr_if_polling(rq->idle))
-			smp_call_function_single_async(cpu, &rq->wake_csd);
+			smp_call_function_single_async(cpu, &p->wake_csd);
 		else
 			trace_sched_wake_idle_without_ipi(cpu);
 	}
@@ -2921,6 +2921,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 #endif
 #if defined(CONFIG_SMP)
 	p->on_cpu = 0;
+	p->wake_csd = (struct __call_single_data) {
+		.func = wake_csd_func,
+	};
 #endif
 	init_task_preempt_count(p);
 #ifdef CONFIG_SMP
@@ -6723,8 +6726,6 @@ void __init sched_init(void)
 		rq->avg_idle = 2*sysctl_sched_migration_cost;
 		rq->max_idle_balance_cost = sysctl_sched_migration_cost;
 
-		rq_csd_init(rq, &rq->wake_csd, wake_csd_func);
-
 		INIT_LIST_HEAD(&rq->cfs_tasks);
 
 		rq_attach_root(rq, &def_root_domain);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 01f94cf52783..b6d8a7b991f0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -10033,7 +10033,7 @@ static void kick_ilb(unsigned int flags)
 	 * is idle. And the softirq performing nohz idle load balance
 	 * will be run before returning from the IPI.
 	 */
-	smp_call_function_single_async(ilb_cpu, &cpu_rq(ilb_cpu)->nohz_csd);
+	smp_call_function_single_async(ilb_cpu, &this_rq()->nohz_csd);
 }
 
 /*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index f7ab6334e992..c35f0ef43ab0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1021,7 +1021,6 @@ struct rq {
 #endif
 
 #ifdef CONFIG_SMP
-	call_single_data_t	wake_csd;
 	struct llist_head	wake_list;
 #endif
 


More information about the Linuxppc-dev mailing list