[Skiboot] [PATCH 16/28] cpu: Fix problem calling time_wait with lock held

Benjamin Herrenschmidt benh at kernel.crashing.org
Thu Jul 7 11:50:07 AEST 2016


Also make the locking around re-init safer, properly block the
OS from restarting a thread that was caught for re-init.

Signed-off-by: Benjamin Herrenschmidt <benh at kernel.crashing.org>
---
 core/cpu.c    | 24 +++++++++++++++++-------
 include/cpu.h |  1 +
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/core/cpu.c b/core/cpu.c
index 8a7a43f..2084150 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -726,6 +726,11 @@ static int64_t opal_start_cpu_thread(uint64_t server_no, uint64_t start_address)
 		prerror("OPAL: CPU not active in OPAL !\n");
 		return OPAL_WRONG_STATE;
 	}
+	if (cpu->in_reinit) {
+		unlock(&reinit_lock);
+		prerror("OPAL: CPU being reinitialized !\n");
+		return OPAL_WRONG_STATE;
+	}
 	job = __cpu_queue_job(cpu, "start_thread",
 			      opal_start_thread_job, (void *)start_address,
 			      true);
@@ -821,34 +826,37 @@ static int64_t opal_reinit_cpus(uint64_t flags)
 
 	prerror("OPAL: Trying a CPU re-init with flags: 0x%llx\n", flags);
 
+ again:
 	lock(&reinit_lock);
 
 	for (cpu = first_cpu(); cpu; cpu = next_cpu(cpu)) {
-		if (cpu == this_cpu())
+		if (cpu == this_cpu() || cpu->in_reinit)
 			continue;
 		if (cpu->state == cpu_state_os) {
+			unlock(&reinit_lock);
 			/*
 			 * That might be a race with return CPU during kexec
 			 * where we are still, wait a bit and try again
 			 */
 			for (i = 0; (i < 1000) &&
 				     (cpu->state == cpu_state_os); i++) {
-				unlock(&reinit_lock);
 				time_wait_ms(1);
-				lock(&reinit_lock);
 			}
 			if (cpu->state == cpu_state_os) {
 				prerror("OPAL: CPU 0x%x not in OPAL !\n", cpu->pir);
-				rc = OPAL_WRONG_STATE;
-				goto bail;
+				return OPAL_WRONG_STATE;
 			}
+			goto again;
 		}
+		cpu->in_reinit = true;
 	}
 	/*
 	 * Now we need to mark ourselves "active" or we'll be skipped
 	 * by the various "for_each_active_..." calls done by slw_reinit()
 	 */
 	this_cpu()->state = cpu_state_active;
+	this_cpu()->in_reinit = true;
+	unlock(&reinit_lock);
 
 	/*
 	 * If the flags affect endianness and we are on P8 DD2 or later, then
@@ -875,10 +883,12 @@ static int64_t opal_reinit_cpus(uint64_t flags)
 		rc = slw_reinit(flags);
 
 	/* And undo the above */
+	lock(&reinit_lock);
 	this_cpu()->state = cpu_state_os;
-
-bail:
+	for (cpu = first_cpu(); cpu; cpu = next_cpu(cpu))
+		cpu->in_reinit = false;
 	unlock(&reinit_lock);
+
 	return rc;
 }
 opal_call(OPAL_REINIT_CPUS, opal_reinit_cpus, 1);
diff --git a/include/cpu.h b/include/cpu.h
index 59923d5..62f5629 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -60,6 +60,7 @@ struct cpu_thread {
 	bool				con_need_flush;
 	bool				in_mcount;
 	bool				in_poller;
+	bool				in_reinit;
 	uint32_t			hbrt_spec_wakeup; /* primary only */
 	uint64_t			save_l2_fir_action1;
 	uint64_t			current_token;
-- 
2.7.4



More information about the Skiboot mailing list