[Cbe-oss-dev] [PATCH] spusched: extend list_mutex coverage

Christoph Hellwig hch at lst.de
Tue Jul 15 14:08:12 EST 2008


spu->ctx is only stable while we hold the list_mutex is held.  Extent
hold times of the lock in find_victim and spusched_tick to avoid potentially
dangling dereferences.  To do so we have to switch to a trylock
and opencode spu_schedule in spusched_tick which is rather ugly, but I
can't find a good way around it.


Signed-off-by: Christoph Hellwig <hch at lst.de>

Index: linux-2.6/arch/powerpc/platforms/cell/spufs/sched.c
===================================================================
--- linux-2.6.orig/arch/powerpc/platforms/cell/spufs/sched.c	2008-07-15 06:02:19.000000000 +0200
+++ linux-2.6/arch/powerpc/platforms/cell/spufs/sched.c	2008-07-15 06:03:40.000000000 +0200
@@ -633,7 +633,6 @@ static struct spu *find_victim(struct sp
 			    (!victim || tmp->prio > victim->prio))
 				victim = spu->ctx;
 		}
-		mutex_unlock(&cbe_spu_info[node].list_mutex);
 
 		if (victim) {
 			/*
@@ -647,6 +646,7 @@ static struct spu *find_victim(struct sp
 			 * look at another context or give up after X retries.
 			 */
 			if (!mutex_trylock(&victim->state_mutex)) {
+				mutex_unlock(&cbe_spu_info[node].list_mutex);
 				victim = NULL;
 				goto restart;
 			}
@@ -659,16 +659,15 @@ static struct spu *find_victim(struct sp
 				 * restart the search.
 				 */
 				mutex_unlock(&victim->state_mutex);
+				mutex_unlock(&cbe_spu_info[node].list_mutex);
 				victim = NULL;
 				goto restart;
 			}
 
 			spu_context_trace(__spu_deactivate__unload, ctx, spu);
 
-			mutex_lock(&cbe_spu_info[node].list_mutex);
 			cbe_spu_info[node].nr_active--;
 			spu_unbind_context(spu, victim);
-			mutex_unlock(&cbe_spu_info[node].list_mutex);
 
 			victim->stats.invol_ctx_switch++;
 			spu->stats.invol_ctx_switch++;
@@ -676,9 +675,10 @@ static struct spu *find_victim(struct sp
 				spu_add_to_rq(victim);
 
 			mutex_unlock(&victim->state_mutex);
-
+			mutex_unlock(&cbe_spu_info[node].list_mutex);
 			return spu;
 		}
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
 	}
 
 	return NULL;
@@ -715,16 +715,21 @@ static void spu_schedule(struct spu *spu
 	spu_release(ctx);
 }
 
-static void spu_unschedule(struct spu *spu, struct spu_context *ctx)
+static void __spu_unschedule(struct spu *spu, struct spu_context *ctx)
 {
-	int node = spu->node;
-
-	mutex_lock(&cbe_spu_info[node].list_mutex);
-	cbe_spu_info[node].nr_active--;
+	cbe_spu_info[spu->node].nr_active--;
 	spu->alloc_state = SPU_FREE;
 	spu_unbind_context(spu, ctx);
 	ctx->stats.invol_ctx_switch++;
 	spu->stats.invol_ctx_switch++;
+}
+
+static void spu_unschedule(struct spu *spu, struct spu_context *ctx)
+{
+	int node = spu->node;
+
+	mutex_lock(&cbe_spu_info[node].list_mutex);
+	__spu_unschedule(spu, ctx);
 	mutex_unlock(&cbe_spu_info[node].list_mutex);
 }
 
@@ -875,8 +880,13 @@ static noinline void spusched_tick(struc
 	struct spu_context *new = NULL;
 	struct spu *spu = NULL;
 
-	if (spu_acquire(ctx))
-		BUG();	/* a kernel thread never has signals pending */
+	/*
+	 * If we can't lock the context give it another timeslice.
+	 */
+	if (!mutex_trylock(&ctx->state_mutex)) {
+		ctx->time_slice++;
+		return;
+	}
 
 	if (ctx->state != SPU_STATE_RUNNABLE)
 		goto out;
@@ -892,9 +902,16 @@ static noinline void spusched_tick(struc
 
 	spu_context_trace(spusched_tick__preempt, ctx, spu);
 
-	new = grab_runnable_context(ctx->prio + 1, spu->node);
+	/*
+	 * Try to find a new context on the runqueue that we can lock
+	 * without sleeping.
+	 */
+	do {
+		new = grab_runnable_context(ctx->prio + 1, spu->node);
+	} while (new && !mutex_trylock(&new->state_mutex));
+
 	if (new) {
-		spu_unschedule(spu, ctx);
+		__spu_unschedule(spu, ctx);
 		if (test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
 			spu_add_to_rq(ctx);
 	} else {
@@ -904,8 +921,24 @@ static noinline void spusched_tick(struc
 out:
 	spu_release(ctx);
 
-	if (new)
-		spu_schedule(spu, new);
+	/*
+	 * This is an opencoded spu_schedule accounting for the fact
+	 * that we have the list_lock already taken.
+	 *
+	 * Not pretty.
+	 */
+	if (new) {
+		int node = spu->node;
+
+		BUG_ON(spu->ctx);
+
+		spu_set_timeslice(new);
+		spu_bind_context(spu, new);
+		cbe_spu_info[node].nr_active++;
+		spu->alloc_state = SPU_USED;
+		wake_up_all(&new->run_wq);
+		spu_release(new);
+	}
 }
 
 /**
@@ -972,11 +1005,8 @@ static int spusched_thread(void *unused)
 					cbe_list) {
 				struct spu_context *ctx = spu->ctx;
 
-				if (ctx) {
-					mutex_unlock(mtx);
+				if (ctx)
 					spusched_tick(ctx);
-					mutex_lock(mtx);
-				}
 			}
 			mutex_unlock(mtx);
 		}



More information about the cbe-oss-dev mailing list