[Cbe-oss-dev] [PATCH 9:11] spufs: fix access mechanism for problem state registers [take 2]

Mon May 26 21:48:55 EST 2008

Fix access mechanism for problem state registers

The current mechanism for providing access to problem state registers
is broken because it relies on the application to invoke spu_run for 
the target context.  This is convenient and it works most of the time
as the library is repeatedly driving spu_run to redrive the context
after processing an spu exception or event.  But, it is wrong as it
assumes that there will be another spu error or exception for the
target context.  There may or may not be.  It is also not desirable
from a performance perspective as it introduces delays into the 
application.  This patch activates the context in spufs_ps_nopfn
regardless of whether the target context is in kernel mode or user
mode.

Signed-off-by: Luke Browning <lukebrowning at us.ibm.com>

---

This replaces the previous version of this patch.
Tested against libspe2.mfc test.

---

Index: linux-2.6.25/arch/powerpc/platforms/cell/spufs/file.c
===================================================================

--- linux-2.6.25.orig/arch/powerpc/platforms/cell/spufs/file.c
+++ linux-2.6.25/arch/powerpc/platforms/cell/spufs/file.c
@@ -387,26 +387,7 @@ static unsigned long spufs_ps_nopfn(stru
 	if (ctx->state == SPU_STATE_SAVED) {
 		up_read(&current->mm->mmap_sem);
 		spu_context_nospu_trace(spufs_ps_nopfn__sleep, ctx);
-
-		/*
-		 * Activate context if it is inside spu_run.  It may
-		 * not be schedulable under its own control if it has
-		 * faulted.  This should provide concurrency between the
-		 * the faulting thread and the target spu context enabling
-		 * the fault to be more quickly resolved.  find_victim
-		 * may even find a lazily loaded context to preempt
-		 * resulting in no downside at all.  On the other hand,
-		 * there is nothing to keep the context loaded if it
-		 * has faulted as it may become the target of another
-		 * activation.  For this reason, we don't have to worry
-		 * about the context be lazily loaded for an entire
-		 * quantum.  It might even be worth removing the test
-		 * for SPU_SCHED_SPU_RUN.
-		 */
-		if (test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
-			ret = spu_activate(ctx, 0);
-		else
-			ret = 0;
+		ret = spu_activate(ctx, 0);
 		if (!ret) {
 			ret = spufs_wait(ctx->run_wq,
 					ctx->state == SPU_STATE_RUNNABLE);
Index: linux-2.6.25/arch/powerpc/platforms/cell/spufs/sched.c
===================================================================
--- linux-2.6.25.orig/arch/powerpc/platforms/cell/spufs/sched.c
+++ linux-2.6.25/arch/powerpc/platforms/cell/spufs/sched.c
@@ -107,10 +107,10 @@ void spu_set_timeslice(struct spu_contex
 void __spu_update_sched_info(struct spu_context *ctx)
 {
 	/*
-	 * assert that the context is not on the runqueue, so it is safe
-	 * to change its scheduling parameters.
+	 * Remove from the runqueue so that we can apply a new priority.  It
+	 * doesn't need to be re-queued since the caller is spu_run.
 	 */
-	BUG_ON(!list_empty(&ctx->rq));
+	spu_del_from_rq(ctx);
 
 	/*
 	 * 32-Bit assignments are atomic on powerpc, and we don't care about
@@ -220,6 +220,7 @@ void do_notify_spus_active(void)
  */
 static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
 {
+
 	spu_context_trace(spu_bind_context__enter, ctx, spu);
 
 	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
@@ -405,6 +406,10 @@ static void spu_unbind_context(struct sp
 {
 	spu_context_trace(spu_unbind_context__enter, ctx, spu);
 
+	BUG_ON(!mutex_is_locked(&ctx->state_mutex));
+	BUG_ON(spu->ctx != ctx);
+	BUG_ON(ctx->state != SPU_STATE_RUNNABLE);
+
 	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
 
  	if (spu->ctx->flags & SPU_CREATE_NOSCHED)
@@ -464,7 +469,7 @@ static int spu_on_rq(struct spu_context 
 static void __spu_add_to_rq(struct spu_context *ctx)
 {
 	BUG_ON(ctx->state == SPU_STATE_RUNNABLE);
-	BUG_ON(!test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags));
+	BUG_ON(test_bit(SPU_SCHED_DEACTIVATE, &ctx->sched_flags));
 
 	if (list_empty(&ctx->rq)) {
 		list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]);
@@ -476,11 +481,9 @@ static void __spu_add_to_rq(struct spu_c
 
 static void spu_add_to_rq(struct spu_context *ctx)
 {
-	if (test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags)) {
-		spin_lock(&spu_prio->runq_lock);
-		__spu_add_to_rq(ctx);
-		spin_unlock(&spu_prio->runq_lock);
-	}
+	spin_lock(&spu_prio->runq_lock);
+	__spu_add_to_rq(ctx);
+	spin_unlock(&spu_prio->runq_lock);
 }
 
 static void __spu_del_from_rq(struct spu_context *ctx)
@@ -594,7 +597,7 @@ static struct spu *spu_get_idle(struct s
  */
 static struct spu *find_victim(struct spu_context *ctx)
 {
-	struct spu_context *victim = NULL;
+	struct spu_context *victim;
 	struct spu_context *victim_pf = NULL;
 	struct spu_context *victim_um = NULL;
 	struct spu *spu;
@@ -620,6 +623,8 @@ restart:
 		if (!retry)
 			continue;
 
+		victim = NULL;
+
 		mutex_lock(&cbe_spu_info[node].list_mutex);
 		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
 			struct spu_context *tmp = spu->ctx;
@@ -653,7 +658,6 @@ restart:
 
 			/* Lock the context and re-check state. */
 			if (!mutex_trylock(&victim->state_mutex)) {
-				victim = NULL;
 				retry--;
 				goto restart;
 			}
@@ -665,13 +669,12 @@ restart:
 			 * fault to be resolved.
 			 */
 			spu = victim->spu;
-			if (!spu ||
+			if (!spu || (spu->node != node) ||
 			   (test_bit(SPU_SCHED_SPU_RUN, &victim->sched_flags) &&
 			    (!victim->csa.class_1_dsisr) &&
 			    (victim->prio <= ctx->prio))) {
 
 				mutex_unlock(&victim->state_mutex);
-				victim = NULL;
 				retry--;
 				goto restart;
 			}
@@ -708,6 +711,7 @@ static void __spu_schedule(struct spu *s
 	int node = spu->node;
 	int success = 0;
 
+	BUG_ON(!mutex_is_locked(&ctx->state_mutex));
 	BUG_ON(ctx->state == SPU_STATE_RUNNABLE);
 	BUG_ON(spu_on_rq(ctx));
 	BUG_ON(test_bit(SPU_SCHED_DEACTIVATE, &ctx->sched_flags));
@@ -900,8 +904,8 @@ void spu_deactivate(struct spu_context *
 	 * the callers of grab_runnable_context.
 	 */
 	set_bit(SPU_SCHED_DEACTIVATE, &ctx->sched_flags);
-	__spu_deactivate(ctx, 1, MAX_PRIO);
 	spu_del_from_rq(ctx);
+	__spu_deactivate(ctx, 1, MAX_PRIO);
 }
 
 /**