[Cbe-oss-dev] [PATCH] spu preemptive scheduling problems

luke lukebr at linux.vnet.ibm.com
Tue Mar 20 13:23:32 EST 2007


This patch solves a couple of problems with the new runqueue management
code introduced with the preemptive spu scheduler.  spu_prio_wait
atomically puts the current thread to sleep on the ctx->stop_wq after
having placed the context on the runqueue. This is done by overlapping
the context and runqueue locks in sleep path, so that spu_reschedule()
can perform the wakeup with just the runqueue lock. Both operations need
to be done atomically in the sleep path, so that the wakeup is
guaranteed to result in the scheduling of the controlling thread. 

The second problem fixed by this patch involves a mismatch in runqueue
operations in the spu_prio_wait() and spu_reschedule(). The former adds
the ctx onto the runqueue before sleeping and removes it after waking.
The latter also removes the ctx from the runqueue, but it shouldn't as
that results in a double removal.

The third problem fixed by this patch is really a performance
optimization.  I changed spe_deactivate() to return an integer so 
that the caller is told whether another context was actually scheduled
or not.  This enables spu_yield to be smarter about calling yield.

I have tested this patch in a limited way.  I wrote a testcase that
creates 32 contexts and then schedules them, so that there are 
lots of runqueue operations, timeslicing, context switches.
yielding, ...

I think this patch may fix a recently reported bug, where a DSI occurs
when yielding.  At least that is the defect that drove these changes.

Signed-off-by: Luke Browning <lukebrowning at us.ibm.com>


Index: linux-2.6.20/arch/powerpc/platforms/cell/spufs/context.c
===================================================================
--- linux-2.6.20.orig/arch/powerpc/platforms/cell/spufs/context.c
2007-03-19 20:25:30.000000000 -0300
+++ linux-2.6.20/arch/powerpc/platforms/cell/spufs/context.c	2007-03-19
20:26:31.000000000 -0300
@@ -72,7 +72,7 @@
 	struct spu_context *ctx;
 	ctx = container_of(kref, struct spu_context, kref);
 	mutex_lock(&ctx->state_mutex);
-	spu_deactivate(ctx);
+	(void) spu_deactivate(ctx);
 	mutex_unlock(&ctx->state_mutex);
 	spu_fini_csa(&ctx->csa);
 	if (ctx->gang)
@@ -202,7 +202,7 @@
 {
 	spu_acquire(ctx);
 	if (ctx->state != SPU_STATE_SAVED)
-		spu_deactivate(ctx);
+		(void) spu_deactivate(ctx);
 }
 
 void spu_set_profile_private_kref(struct spu_context * ctx,
Index: linux-2.6.20/arch/powerpc/platforms/cell/spufs/sched.c
===================================================================
--- linux-2.6.20.orig/arch/powerpc/platforms/cell/spufs/sched.c
2007-03-19 20:14:08.000000000 -0300
+++ linux-2.6.20/arch/powerpc/platforms/cell/spufs/sched.c	2007-03-19
20:36:23.000000000 -0300
@@ -102,8 +102,8 @@
 	int preempted = 0;
 
 	/*
-	 * If this context is beeing stopped avoid rescheduling from the
-	 * scheduler tick because we would deadlock on the state_mutex.
+	 * If this context is being stopped avoid rescheduling from the
+	 * scheduler tick because we would block on the state_mutex.
 	 * The caller will yield the spu later on anyway.
 	 */
 	if (test_bit(SPU_SCHED_EXITING, &ctx->sched_flags))
@@ -114,7 +114,7 @@
 	if (spu) {
 		int best = sched_find_first_bit(spu_prio->bitmap);
 		if (best <= ctx->prio) {
-			spu_deactivate(ctx);
+			(void) spu_deactivate(ctx);
 			preempted = 1;
 		}
 	}
@@ -273,44 +273,40 @@
  * spu_add_to_rq - add a context to the runqueue
  * @ctx:       context to add
  */
-static void spu_add_to_rq(struct spu_context *ctx)
+static void __spu_add_to_rq(struct spu_context *ctx)
 {
-	spin_lock(&spu_prio->runq_lock);
-	list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]);
-	set_bit(ctx->prio, spu_prio->bitmap);
-	mb();
-	spin_unlock(&spu_prio->runq_lock);
+	int prio = ctx->prio;
+
+	list_add_tail(&ctx->rq, &spu_prio->runq[prio]);
+	set_bit(prio, spu_prio->bitmap);
 }
 
-static void __spu_del_from_rq(struct spu_context *ctx, int prio)
+static void __spu_del_from_rq(struct spu_context *ctx)
 {
+	int prio = ctx->prio;
+
 	if (!list_empty(&ctx->rq))
 		list_del_init(&ctx->rq);
 	if (list_empty(&spu_prio->runq[prio]))
-		clear_bit(ctx->prio, spu_prio->bitmap);
-}
-
-/**
- * spu_del_from_rq - remove a context from the runqueue
- * @ctx:       context to remove
- */
-static void spu_del_from_rq(struct spu_context *ctx)
-{
-	spin_lock(&spu_prio->runq_lock);
-	__spu_del_from_rq(ctx, ctx->prio);
-	spin_unlock(&spu_prio->runq_lock);
+		clear_bit(prio, spu_prio->bitmap);
 }
 
 static void spu_prio_wait(struct spu_context *ctx)
 {
 	DEFINE_WAIT(wait);
 
+	spin_lock(&spu_prio->runq_lock);
 	prepare_to_wait_exclusive(&ctx->stop_wq, &wait, TASK_INTERRUPTIBLE);
 	if (!signal_pending(current)) {
+		__spu_add_to_rq(ctx);
+		spin_unlock(&spu_prio->runq_lock);
 		mutex_unlock(&ctx->state_mutex);
 		schedule();
 		mutex_lock(&ctx->state_mutex);
+		spin_lock(&spu_prio->runq_lock);
+		__spu_del_from_rq(ctx);
 	}
+	spin_unlock(&spu_prio->runq_lock);
 	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(&ctx->stop_wq, &wait);
 }
@@ -321,9 +317,12 @@
  *
  * This function is called whenever a spu becomes idle.	 It looks for
the
  * most suitable runnable spu context and schedules it for execution.
+ *
+ * Returns 1 if a wakeup occurred so that caller can yield if desired.
  */
-static void spu_reschedule(struct spu *spu)
+static int spu_reschedule(struct spu *spu)
 {
+	int need_yield = 0;
 	int best;
 
 	spu_free(spu);
@@ -337,10 +336,12 @@
 		BUG_ON(list_empty(rq));
 
 		ctx = list_entry(rq->next, struct spu_context, rq);
-		__spu_del_from_rq(ctx, best);
 		wake_up(&ctx->stop_wq);
+		need_yield = 1;
 	}
 	spin_unlock(&spu_prio->runq_lock);
+
+	return need_yield;
 }
 
 static struct spu *spu_get_idle(struct spu_context *ctx)
@@ -437,7 +438,7 @@
  * @ctx:	spu context to schedule
  * @flags:	flags (currently ignored)
  *
- * Tries to find a free spu to run @ctx.  If no free spu is availble
+ * Tries to find a free spu to run @ctx.  If no free spu is available
  * add the context to the runqueue so it gets woken up once an spu
  * is available.
  */
@@ -462,9 +463,7 @@
 			return 0;
 		}
 
-		spu_add_to_rq(ctx);
 		spu_prio_wait(ctx);
-		spu_del_from_rq(ctx);
 	} while (!signal_pending(current));
 
 	return -ERESTARTSYS;
@@ -477,14 +476,17 @@
  * Unbind @ctx from the physical spu it is running on and schedule
  * the highest priority context to run on the freed physical spu.
  */
-void spu_deactivate(struct spu_context *ctx)
+int spu_deactivate(struct spu_context *ctx)
 {
 	struct spu *spu = ctx->spu;
+	int need_yield = 0;
 
 	if (spu) {
 		spu_unbind_context(spu, ctx);
-		spu_reschedule(spu);
+		need_yield = spu_reschedule(spu);
 	}
+
+	return need_yield;
 }
 
 /**
@@ -506,8 +508,7 @@
 			if (best < MAX_PRIO) {
 				pr_debug("%s: yielding SPU %d NODE %d\n",
 					 __FUNCTION__, spu->number, spu->node);
-				spu_deactivate(ctx);
-				need_yield = 1;
+				need_yield = spu_deactivate(ctx);
 			}
 		}
 		mutex_unlock(&ctx->state_mutex);
Index: linux-2.6.20/arch/powerpc/platforms/cell/spufs/spufs.h
===================================================================
--- linux-2.6.20.orig/arch/powerpc/platforms/cell/spufs/spufs.h
2007-03-19 20:24:17.000000000 -0300
+++ linux-2.6.20/arch/powerpc/platforms/cell/spufs/spufs.h	2007-03-19
20:24:56.000000000 -0300
@@ -220,7 +220,7 @@
 int spu_acquire_exclusive(struct spu_context *ctx);
 
 int spu_activate(struct spu_context *ctx, unsigned long flags);
-void spu_deactivate(struct spu_context *ctx);
+int spu_deactivate(struct spu_context *ctx);
 void spu_yield(struct spu_context *ctx);
 void spu_switch_notify(struct spu *spu, struct spu_context *ctx);
 void spu_start_tick(struct spu_context *ctx);





More information about the cbe-oss-dev mailing list