[Skiboot] [RFC][PATCH] wait loop improvements

Nicholas Piggin npiggin at gmail.com
Fri May 12 03:58:56 AEST 2017


This patch has a few improvements to wait loops. Primarily two
things:

- Running busy-wait loops entirely in lowest SMT priority rather than
  calling cpu_relax(), which is not all that suitable an interface
  for the job. This gives other threads more cycles to do useful work.

- Reducing the amount of unnecessary timeout waits. These take quite
  a while to run in the simulator.

These reduce time to boot a 2 core, 16 thread POWER8 sim to Linux
userspace from about 43 seconds on my laptop to 13. Haven't tested on
real hardware -- one possibly dodgy thing is changing HILE concurrently,
but there doesn't seem to be any other protection against that.

Just a request for comments at the moment. The long SMP boot times
in the simulator were bugging me so I can clean this up and split
it into pieces if there is interest to merge it. (might be some small
gains on hardware here and there, but unlikely to be too significant I
think)

Thanks,
Nick
---
 core/cpu.c          | 50 ++++++++++++++++++++++++++++++++++++++++++++------
 core/hmi.c          |  4 +++-
 core/init.c         |  4 ++--
 core/lock.c         |  5 ++++-
 core/timebase.c     | 26 +++++++-------------------
 core/timer.c        |  5 ++++-
 hw/lpc-uart.c       | 10 +++++++---
 include/cpu.h       |  1 +
 include/processor.h |  2 ++
 9 files changed, 74 insertions(+), 33 deletions(-)

diff --git a/core/cpu.c b/core/cpu.c
index c7e650da..8c24cbfc 100644
--- a/core/cpu.c
+++ b/core/cpu.c
@@ -289,10 +289,15 @@ void cpu_process_jobs(void)
 static void cpu_idle_default(enum cpu_wake_cause wake_on __unused)
 {
 	/* Maybe do something better for simulators ? */
-	cpu_relax();
-	cpu_relax();
-	cpu_relax();
-	cpu_relax();
+	if (wake_on == cpu_wake_on_job) {
+		struct cpu_thread *cpu = this_cpu();
+
+		smt_lowest();
+		/* Check for jobs again */
+		while (!cpu_check_jobs(cpu))
+			barrier();
+		smt_medium();
+	}
 }
 
 static void cpu_idle_p8(enum cpu_wake_cause wake_on)
@@ -385,6 +390,28 @@ void cpu_idle(enum cpu_wake_cause wake_on)
 	}
 }
 
+void cpu_idle_delay(unsigned long delay, unsigned long min_pm)
+{
+	unsigned long end = mftb() + delay;
+
+	if (pm_enabled && delay > min_pm) {
+		for (;;) {
+			if (delay >= 0x7fffffff)
+				delay = 0x7fffffff;
+			mtspr(SPR_DEC, delay);
+			cpu_idle(cpu_wake_on_dec);
+
+			if (tb_compare(mftb(), end) == TB_AAFTERB)
+				break;
+		}
+	} else {
+		smt_lowest();
+		while (tb_compare(mftb(), end) != TB_AAFTERB)
+			barrier();
+		smt_medium();
+	}
+}
+
 void cpu_process_local_jobs(void)
 {
 	struct cpu_thread *cpu = first_available_cpu();
@@ -1000,6 +1027,7 @@ static void cpu_change_hile(void *hilep)
 static int64_t cpu_change_all_hile(bool hile)
 {
 	struct cpu_thread *cpu;
+	bool done;
 
 	prlog(PR_INFO, "CPU: Switching HILE on all CPUs to %d\n", hile);
 
@@ -1010,9 +1038,19 @@ static int64_t cpu_change_all_hile(bool hile)
 			cpu_change_hile(&hile);
 			continue;
 		}
-		cpu_wait_job(cpu_queue_job(cpu, "cpu_change_hile",
-					   cpu_change_hile, &hile), true);
+		cpu_queue_job(cpu, "cpu_change_hile", cpu_change_hile, &hile);
 	}
+
+	smt_lowest();
+	do {
+		done = true;
+		for_each_available_cpu(cpu) {
+			if (cpu->current_hile != hile)
+				done = false;
+		}
+	} while (!done);
+	smt_medium();
+
 	return OPAL_SUCCESS;
 }
 
diff --git a/core/hmi.c b/core/hmi.c
index e55a85b2..06d83476 100644
--- a/core/hmi.c
+++ b/core/hmi.c
@@ -599,6 +599,7 @@ static void wait_for_subcore_threads(void)
 {
 	uint64_t timeout = 0;
 
+	smt_lowest();
 	while (!(*(this_cpu()->core_hmi_state_ptr) & HMI_STATE_CLEANUP_DONE)) {
 		/*
 		 * We use a fixed number of TIMEOUT_LOOPS rather
@@ -616,8 +617,9 @@ static void wait_for_subcore_threads(void)
 			prlog(PR_DEBUG, "HMI: TB pre-recovery timeout\n");
 			break;
 		}
-		cpu_relax();
+		barrier();
 	}
+	smt_medium();
 }
 
 /*
diff --git a/core/init.c b/core/init.c
index dce10fd6..434b4ff3 100644
--- a/core/init.c
+++ b/core/init.c
@@ -1068,9 +1068,9 @@ void __noreturn __secondary_cpu_entry(void)
 	/* Wait for work to do */
 	while(true) {
 		if (cpu_check_jobs(cpu))
-		    cpu_process_jobs();
+			cpu_process_jobs();
 		else
-		    cpu_idle(cpu_wake_on_job);
+			cpu_idle(cpu_wake_on_job);
 	}
 }
 
diff --git a/core/lock.c b/core/lock.c
index e82048bf..0868f2ba 100644
--- a/core/lock.c
+++ b/core/lock.c
@@ -93,7 +93,10 @@ void lock(struct lock *l)
 	for (;;) {
 		if (try_lock(l))
 			break;
-		cpu_relax();
+		smt_lowest();
+		while (l->lock_val)
+			barrier();
+		smt_medium();
 	}
 }
 
diff --git a/core/timebase.c b/core/timebase.c
index a3c0fec5..6c2f656e 100644
--- a/core/timebase.c
+++ b/core/timebase.c
@@ -24,8 +24,8 @@ unsigned long tb_hz = 512000000;
 
 static void time_wait_poll(unsigned long duration)
 {
-	unsigned long remaining = duration;
-	unsigned long end = mftb() + duration;
+	unsigned long now = mftb();
+	unsigned long end = now + duration;
 	unsigned long period = msecs_to_tb(5);
 
 	if (this_cpu()->tb_invalid) {
@@ -33,7 +33,9 @@ static void time_wait_poll(unsigned long duration)
 		return;
 	}
 
-	while (tb_compare(mftb(), end) != TB_AAFTERB) {
+	while (tb_compare(now, end) != TB_AAFTERB) {
+		unsigned long remaining = now - end;
+
 		/* Call pollers periodically but not continually to avoid
 		 * bouncing cachelines due to lock contention. */
 		if (remaining >= period) {
@@ -43,7 +45,7 @@ static void time_wait_poll(unsigned long duration)
 		} else
 			time_wait_nopoll(remaining);
 
-		cpu_relax();
+		now = mftb();
 	}
 }
 
@@ -64,7 +66,6 @@ void time_wait(unsigned long duration)
 
 void time_wait_nopoll(unsigned long duration)
 {
-	unsigned long end = mftb() + duration;
 	unsigned long min = usecs_to_tb(10);
 
 	if (this_cpu()->tb_invalid) {
@@ -72,20 +73,7 @@ void time_wait_nopoll(unsigned long duration)
 		return;
 	}
 
-	for (;;) {
-		uint64_t delay, tb = mftb();
-
-		if (tb_compare(tb, end) == TB_AAFTERB)
-			break;
-		delay = end - tb;
-		if (delay >= 0x7fffffff)
-			delay = 0x7fffffff;
-		if (delay >= min) {
-			mtspr(SPR_DEC, delay);
-			cpu_idle(cpu_wake_on_dec);
-		} else
-			cpu_relax();
-	}
+	cpu_idle_delay(duration, min);
 }
 
 void time_wait_ms(unsigned long ms)
diff --git a/core/timer.c b/core/timer.c
index 75489963..4b5925fe 100644
--- a/core/timer.c
+++ b/core/timer.c
@@ -45,7 +45,10 @@ static void __sync_timer(struct timer *t)
 
 	while (t->running) {
 		unlock(&timer_lock);
-		cpu_relax();
+		smt_lowest();
+		while (t->running)
+			barrier();
+		smt_medium();
 		/* Should we call the pollers here ? */
 		lock(&timer_lock);
 	}
diff --git a/hw/lpc-uart.c b/hw/lpc-uart.c
index 6e349ef9..912bffa5 100644
--- a/hw/lpc-uart.c
+++ b/hw/lpc-uart.c
@@ -115,10 +115,14 @@ static void uart_check_tx_room(void)
 
 static void uart_wait_tx_room(void)
 {
-	while(!tx_room) {
+	while (!tx_room) {
 		uart_check_tx_room();
-		if (!tx_room)
-			cpu_relax();
+		if (!tx_room) {
+			smt_lowest();
+			while (!tx_room)
+				barrier();
+			smt_medium();
+		}
 	}
 }
 
diff --git a/include/cpu.h b/include/cpu.h
index 1e147aa8..4c58e34a 100644
--- a/include/cpu.h
+++ b/include/cpu.h
@@ -276,5 +276,6 @@ extern unsigned long __attrconst cpu_stack_bottom(unsigned int pir);
 extern unsigned long __attrconst cpu_stack_top(unsigned int pir);
 
 extern void cpu_idle(enum cpu_wake_cause wake_on);
+extern void cpu_idle_delay(unsigned long delay, unsigned long min_pm);
 
 #endif /* __CPU_H */
diff --git a/include/processor.h b/include/processor.h
index b98dff8d..5906b865 100644
--- a/include/processor.h
+++ b/include/processor.h
@@ -197,6 +197,7 @@
 #define smt_medium_low	or 6,6,6
 #define smt_extra_high	or 7,7,7
 #define smt_very_low	or 31,31,31
+#define smt_lowest	smt_low ; smt_very_low
 
 #else /* __ASSEMBLY__ */
 
@@ -214,6 +215,7 @@ static inline void smt_medium_high(void){ asm volatile("or 5,5,5");	}
 static inline void smt_medium_low(void)	{ asm volatile("or 6,6,6");	}
 static inline void smt_extra_high(void)	{ asm volatile("or 7,7,7");	}
 static inline void smt_very_low(void)	{ asm volatile("or 31,31,31");	}
+static inline void smt_lowest(void)	{ smt_low(); smt_very_low();	}
 
 /*
  * SPR access functions
-- 
2.11.0



More information about the Skiboot mailing list