[PATCH] 2.6 timebase synchronization
Samuel Rydh
samuel at ibrium.se
Fri Aug 8 10:15:23 EST 2003
Hi,
Here is a patch that provides improved generic SMP timebase
synchronization for 2.6.
The calibration error is typically 0 ticks (at least on my machine).
Without this patch, the difference is about 2 * 10^6 ticks...
/Samuel
# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:
# ChangeSet 1.998 -> 1.999
# arch/ppc/kernel/Makefile 1.51 -> 1.52
# arch/ppc/kernel/smp.c 1.35 -> 1.36
# (new) -> 1.1 arch/ppc/kernel/smp-tbsync.c
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 03/08/08 samuel at ibrium.se 1.999
# SMP timebase synchronization
# --------------------------------------------
#
diff -Nru a/arch/ppc/kernel/Makefile b/arch/ppc/kernel/Makefile
--- a/arch/ppc/kernel/Makefile Fri Aug 8 01:51:49 2003
+++ b/arch/ppc/kernel/Makefile Fri Aug 8 01:51:49 2003
@@ -25,7 +25,7 @@
obj-$(CONFIG_PCI) += pci.o
obj-$(CONFIG_PCI) += pci-dma.o
obj-$(CONFIG_KGDB) += ppc-stub.o
-obj-$(CONFIG_SMP) += smp.o
+obj-$(CONFIG_SMP) += smp.o smp-tbsync.o
obj-$(CONFIG_TAU) += temp.o
ifdef CONFIG_MATH_EMULATION
diff -Nru a/arch/ppc/kernel/smp-tbsync.c b/arch/ppc/kernel/smp-tbsync.c
--- /dev/null Wed Dec 31 16:00:00 1969
+++ b/arch/ppc/kernel/smp-tbsync.c Fri Aug 8 01:51:49 2003
@@ -0,0 +1,187 @@
+/*
+ * Smp timebase synchronization for ppc.
+ *
+ * Copyright (C) 2003 Samuel Rydh (samuel at ibrium.se)
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/unistd.h>
+#include <linux/init.h>
+#include <asm/atomic.h>
+#include <asm/smp.h>
+#include <asm/time.h>
+
+#define NUM_ITER 300
+
+enum {
+ kExit=0, kSetAndTest, kTest
+};
+
+/* keep things aligned in order to avoid cache ping-pong */
+struct tbsync_vars {
+ /* CPU0 write */
+ volatile int tbu;
+ volatile int tbl;
+ volatile int mark;
+ volatile int handshake;
+ volatile int cmd;
+ int filler1[3+8];
+
+ /* CPU0 and CPU1 write */
+ atomic_t contest;
+};
+
+static volatile int tbsync_ack;
+static volatile int running;
+
+static struct tbsync_vars *tbsync;
+
+
+#define I_SYNC() asm volatile("sync; isync");
+#define CONTEST_CACHE_FLUSH() asm volatile("dcbf 0,%0 ; sync ; isync" : : \
+ "r" (&tbsync->contest) );
+
+
+static int __devinit
+enter_contest( int mark, int add )
+{
+ while( (int)(get_tbl() - mark) < 0 )
+ ;
+ return atomic_add_return( add, &tbsync->contest );
+}
+
+void __devinit
+smp_generic_take_timebase( void )
+{
+ int cmd, tbl;
+
+ while( !running )
+ ;
+ for( ;; ) {
+ tbsync_ack = 1;
+ while( !tbsync->handshake )
+ ;
+
+ I_SYNC();
+
+ cmd = tbsync->cmd;
+ tbl = tbsync->tbl;
+ tbsync_ack = 0;
+ if( cmd == kExit )
+ return;
+
+ while( tbsync->handshake )
+ ;
+ if( cmd == kSetAndTest ) {
+ asm volatile ("isync");
+ asm volatile ("mttbl %0" :: "r" (tbl) );
+ asm volatile ("mttbu %0" :: "r" (tbsync->tbu) );
+ }
+ (void) enter_contest( tbsync->mark, 2 );
+
+ CONTEST_CACHE_FLUSH();
+ }
+}
+
+static int __devinit
+start_contest( int cmd, int offset, int num )
+{
+ int i, tbu, tbl, mark, result, score=0;
+
+ tbsync->cmd = cmd;
+
+ for( i=-3; i<num; ) {
+ atomic_set( &tbsync->contest, 0 );
+ CONTEST_CACHE_FLUSH();
+
+ tbl = get_tbl() + 2000;
+ tbsync->tbu = tbu = get_tbu();
+ tbsync->tbl = tbl + offset;
+ tbsync->mark = mark = tbl + 500;
+
+ I_SYNC();
+
+ tbsync->handshake = 1;
+ while( tbsync_ack )
+ ;
+
+ while( (int)(get_tbl() - tbl) <= 0 )
+ ;
+ tbsync->handshake = 0;
+ result = enter_contest( mark, -1 );
+
+ while( !tbsync_ack )
+ ;
+
+ if( tbsync->tbu != get_tbu() || ((tbsync->tbl ^ get_tbl()) & 0x80000000) )
+ continue;
+
+ if( i++ > 0 )
+ score += result;
+ }
+ return score;
+}
+
+void __devinit
+smp_generic_give_timebase( void )
+{
+ int i, offset, score, min=0, max=5000, next_offset=1000;
+ int best=10000, best_offset;
+
+ printk("Synchronizing timebase\n");
+
+ /* if this fails then this kernel won't work anyway... */
+ tbsync = kmalloc( sizeof(*tbsync), GFP_KERNEL );
+ memset( tbsync, 0, sizeof(*tbsync) );
+ I_SYNC();
+ running = 1;
+ while( !tbsync_ack )
+ ;
+
+ /* binary search for optimal offset */
+ do {
+ offset = next_offset;
+ score = start_contest( kSetAndTest, offset, NUM_ITER );
+
+ printk("score %d, offset %d [%d..%d]\n", score, offset, min, max );
+
+ if( score > 0 )
+ max = offset;
+ else {
+ min = offset;
+ score = -score;
+ }
+ if( score < best ) {
+ best_offset = offset;
+ best = score;
+ }
+ next_offset = (min + max)/2;
+ } while( offset != next_offset );
+
+ /* guard against inaccurate mttb */
+ for( i=0; i<10; i++ ) {
+ if( (score=start_contest(kTest, best_offset, NUM_ITER)) < 0 )
+ score = -score;
+ if( score <= best || score < 20 )
+ break;
+ start_contest( kSetAndTest, best_offset, NUM_ITER/10 );
+ }
+ printk("Final timbase offset: %d (%d/%d)\n", best_offset, score, NUM_ITER );
+
+ /* exiting */
+ tbsync->cmd = kExit;
+ I_SYNC();
+ tbsync->handshake = 1;
+ while( tbsync_ack )
+ ;
+ tbsync->handshake = 0;
+ kfree( tbsync );
+ tbsync = NULL;
+
+ /* all done */
+ smp_tb_synchronized = 1;
+}
diff -Nru a/arch/ppc/kernel/smp.c b/arch/ppc/kernel/smp.c
--- a/arch/ppc/kernel/smp.c Fri Aug 8 01:51:49 2003
+++ b/arch/ppc/kernel/smp.c Fri Aug 8 01:51:49 2003
@@ -58,10 +58,6 @@
/* all cpu mappings are 1-1 -- Cort */
volatile unsigned long cpu_callin_map[NR_CPUS];
-#define TB_SYNC_PASSES 4
-volatile unsigned long __initdata tb_sync_flag = 0;
-volatile unsigned long __initdata tb_offset = 0;
-
int start_secondary(void *);
extern int cpu_idle(void *unused);
void smp_call_function_interrupt(void);
@@ -288,41 +284,6 @@
atomic_inc(&call_data->finished);
}
-/* FIXME: Do this properly for all archs --RR */
-static spinlock_t timebase_lock = SPIN_LOCK_UNLOCKED;
-static unsigned int timebase_upper = 0, timebase_lower = 0;
-
-void __devinit
-smp_generic_give_timebase(void)
-{
- spin_lock(&timebase_lock);
- do {
- timebase_upper = get_tbu();
- timebase_lower = get_tbl();
- } while (timebase_upper != get_tbu());
- spin_unlock(&timebase_lock);
-
- while (timebase_upper || timebase_lower)
- rmb();
-}
-
-void __devinit
-smp_generic_take_timebase(void)
-{
- int done = 0;
-
- while (!done) {
- spin_lock(&timebase_lock);
- if (timebase_upper || timebase_lower) {
- set_tb(timebase_upper, timebase_lower);
- timebase_upper = 0;
- timebase_lower = 0;
- done = 1;
- }
- spin_unlock(&timebase_lock);
- }
-}
-
static void __devinit smp_store_cpu_info(int id)
{
struct cpuinfo_PPC *c = &cpu_data[id];
@@ -423,7 +384,7 @@
/* wake up cpu */
smp_ops->kick_cpu(cpu);
-
+
/*
* wait to see if the cpu made a callin (is actually up).
* use this value that I found through experimentation.
** Sent via the linuxppc-dev mail list. See http://lists.linuxppc.org/
More information about the Linuxppc-dev
mailing list