[Cbe-oss-dev] [PATCH 3/6] spufs: fix starvation case with terminated spes

Luke Browning lukebr at linux.vnet.ibm.com
Fri Feb 15 22:01:58 EST 2008


Fix spufs scheduler starvation case caused by stop-n-signal code. 

Add safety net to spufs time slice code to check for contexts that are
executing user mode code. Preempt these contexts as necessary to service
SPE jobs waiting to run. There is no guarantee that the user mode code 
will invoke the scheduler again.  Amongst other stop-n-signal code, 
this patch deactivates terminated contexts.

Signed-off-by: Luke Browning <lukebrowning at us.ibm.com>

Index: spufs/arch/powerpc/platforms/cell/spufs/sched.c
===================================================================
--- spufs.orig/arch/powerpc/platforms/cell/spufs/sched.c	2008-02-14 21:03:41.000000000 -0200
+++ spufs/arch/powerpc/platforms/cell/spufs/sched.c	2008-02-15 08:29:24.000000000 -0200
@@ -857,33 +857,58 @@
 	struct spu_context *new = NULL;
 	struct spu *spu = NULL;
 	u32 status;
+	u32 stopped;
+	u64 dsisr;
+	int force;
 
 	if (spu_acquire(ctx))
 		BUG();	/* a kernel thread never has signals pending */
 
 	if (ctx->state != SPU_STATE_RUNNABLE)
 		goto out;
-	if (spu_stopped(ctx, &status))
-		goto out;
 	if (ctx->flags & SPU_CREATE_NOSCHED)
 		goto out;
-	if (ctx->policy == SCHED_FIFO)
+	if (test_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags))
 		goto out;
-
-	if (--ctx->time_slice)
+	dsisr = ctx->csa.dsisr;
+	if (dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED))
+		goto out;
+	if (ctx->csa.class_0_pending)
 		goto out;
 
-	spu = ctx->spu;
-
-	spu_context_trace(spusched_tick__preempt, ctx, spu);
+	force = 0;
 
-	new = grab_runnable_context(ctx->prio + 1, spu->node);
-	if (new) {
-		spu_unschedule(spu, ctx);
-		spu_add_to_rq(ctx);
-	} else {
-		spu_context_nospu_trace(spusched_tick__newslice, ctx);
-		ctx->time_slice++;
+	/*
+	 * If the context is loaded but not running, it is executing user
+	 * mode code.  Force it out if there is another runnable context.
+	 * It would have yielded if a context had been on the runqueue
+	 * when it was returning to user mode.  Can't depend on the
+	 * library to invoke spufs scheduler again.  This is a starvation
+	 * issue as the scheduler must be able to service the runqueue.
+	 */
+	stopped = SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP |
+		SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP;
+	status = ctx->ops->status_read(ctx);
+	if (!(status & SPU_STATUS_RUNNING) && (status & stopped))
+		force = 1;
+	else if ((ctx->policy != SCHED_FIFO) && (--ctx->time_slice == 0))
+		force = 2;
+
+	if (force) {
+		spu = ctx->spu;
+
+		spu_context_trace(spusched_tick__preempt, ctx, spu);
+
+		new = grab_runnable_context(MAX_PRIO, spu->node);
+		if (new) {
+			spu_unschedule(spu, ctx);
+			if (force == 2)
+				spu_add_to_rq(ctx);
+		} else {
+			spu_context_nospu_trace(spusched_tick__newslice, ctx);
+			if ((ctx->policy != SCHED_FIFO) && (force == 2))
+				ctx->time_slice++;
+		}
 	}
 out:
 	spu_release(ctx);





More information about the cbe-oss-dev mailing list