[Cbe-oss-dev] [RFC] [PATCH 3:8] SPU Gang Scheduling - Change cbe_spu_info[] mutex to spin lock

Tue Mar 4 06:49:32 EST 2008

Change the mutex_lock protecting the cbe_spu_info[] to a spin_lock.

This structure groups the physical spus. The list_mutex must be changed 
to a spin lock, because the runq_lock is a spin_lock.  You can't nest 
mutexes under spin_locks.  The lock for the cbe_spu_info[] is taken 
under the runq_lock as may spus need to be allocated to schedule a gang.

Change spu_bind_context() and spu_unbind_context() so that they are not 
called under the new spin lock as that would cause a deadlock, if they
blocked on higher level allocations (mmap) that are protected by mutexes.

Signed-off-by: Luke Browning <lukebrowning at us.ibm.com>

---
Index: spufs/arch/powerpc/platforms/cell/spu_base.c
===================================================================

--- spufs.orig/arch/powerpc/platforms/cell/spu_base.c
+++ spufs/arch/powerpc/platforms/cell/spu_base.c
@@ -631,10 +631,10 @@ static int __init create_spu(void *data)
 	if (ret)
 		goto out_free_irqs;
 
-	mutex_lock(&cbe_spu_info[spu->node].list_mutex);
+	spin_lock(&cbe_spu_info[spu->node].list_lock);
 	list_add(&spu->cbe_list, &cbe_spu_info[spu->node].spus);
 	cbe_spu_info[spu->node].n_spus++;
-	mutex_unlock(&cbe_spu_info[spu->node].list_mutex);
+	spin_unlock(&cbe_spu_info[spu->node].list_lock);
 
 	mutex_lock(&spu_full_list_mutex);
 	spin_lock_irqsave(&spu_full_list_lock, flags);
@@ -712,7 +712,7 @@ static int __init init_spu_base(void)
 	int i, ret = 0;
 
 	for (i = 0; i < MAX_NUMNODES; i++) {
-		mutex_init(&cbe_spu_info[i].list_mutex);
+		spin_lock_init(&cbe_spu_info[i].list_lock);
 		INIT_LIST_HEAD(&cbe_spu_info[i].spus);
 	}
 
Index: spufs/arch/powerpc/platforms/cell/spufs/sched.c
===================================================================
--- spufs.orig/arch/powerpc/platforms/cell/spufs/sched.c
+++ spufs/arch/powerpc/platforms/cell/spufs/sched.c
@@ -144,11 +144,11 @@ void spu_update_sched_info(struct spu_co
 		node = ctx->spu->node;
 
 		/*
-		 * Take list_mutex to sync with find_victim().
+		 * Take list_lock to sync with find_victim().
 		 */
-		mutex_lock(&cbe_spu_info[node].list_mutex);
+		spin_lock(&cbe_spu_info[node].list_lock);
 		__spu_update_sched_info(ctx);
-		mutex_unlock(&cbe_spu_info[node].list_mutex);
+		spin_unlock(&cbe_spu_info[node].list_lock);
 	} else {
 		__spu_update_sched_info(ctx);
 	}
@@ -190,7 +190,7 @@ void do_notify_spus_active(void)
 	for_each_online_node(node) {
 		struct spu *spu;
 
-		mutex_lock(&cbe_spu_info[node].list_mutex);
+		spin_lock(&cbe_spu_info[node].list_lock);
 		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
 			if (spu->alloc_state != SPU_FREE) {
 				struct spu_context *ctx = spu->ctx;
@@ -200,7 +200,7 @@ void do_notify_spus_active(void)
 				wake_up_all(&ctx->stop_wq);
 			}
 		}
-		mutex_unlock(&cbe_spu_info[node].list_mutex);
+		spin_unlock(&cbe_spu_info[node].list_lock);
 	}
 }
 
@@ -221,7 +221,6 @@ static void spu_bind_context(struct spu 
 	ctx->stats.slb_flt_base = spu->stats.slb_flt;
 	ctx->stats.class2_intr_base = spu->stats.class2_intr;
 
-	spu->ctx = ctx;
 	spu->flags = 0;
 	ctx->spu = spu;
 	ctx->ops = &spu_hw_ops;
@@ -244,11 +243,11 @@ static void spu_bind_context(struct spu 
 }
 
 /*
- * Must be used with the list_mutex held.
+ * Must be used with the list_lock held.
  */
 static inline int sched_spu(struct spu *spu)
 {
-	BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex));
+	BUG_ON(!spin_is_locked(&cbe_spu_info[spu->node].list_lock));
 
 	return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED));
 }
@@ -302,15 +301,15 @@ static struct spu *aff_ref_location(stru
 		node = (node < MAX_NUMNODES) ? node : 0;
 		if (!node_allowed(ctx, node))
 			continue;
-		mutex_lock(&cbe_spu_info[node].list_mutex);
+		spin_lock(&cbe_spu_info[node].list_lock);
 		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
 			if ((!mem_aff || spu->has_mem_affinity) &&
 							sched_spu(spu)) {
-				mutex_unlock(&cbe_spu_info[node].list_mutex);
+				spin_unlock(&cbe_spu_info[node].list_lock);
 				return spu;
 			}
 		}
-		mutex_unlock(&cbe_spu_info[node].list_mutex);
+		spin_unlock(&cbe_spu_info[node].list_lock);
 	}
 	return NULL;
 }
@@ -424,7 +423,6 @@ static void spu_unbind_context(struct sp
 	spu->tgid = 0;
 	ctx->ops = &spu_backing_ops;
 	spu->flags = 0;
-	spu->ctx = NULL;
 
 	ctx->stats.slb_flt +=
 		(spu->stats.slb_flt - ctx->stats.slb_flt_base);
@@ -533,11 +531,11 @@ static struct spu *spu_get_idle(struct s
 			mutex_unlock(&ctx->gang->aff_mutex);
 			node = aff_ref_spu->node;
 
-			mutex_lock(&cbe_spu_info[node].list_mutex);
+			spin_lock(&cbe_spu_info[node].list_lock);
 			spu = ctx_location(aff_ref_spu, ctx->aff_offset, node);
 			if (spu && spu->alloc_state == SPU_FREE)
 				goto found;
-			mutex_unlock(&cbe_spu_info[node].list_mutex);
+			spin_unlock(&cbe_spu_info[node].list_lock);
 
 			mutex_lock(&ctx->gang->aff_mutex);
 			if (atomic_dec_and_test(&ctx->gang->aff_sched_count))
@@ -553,12 +551,12 @@ static struct spu *spu_get_idle(struct s
 		if (!node_allowed(ctx, node))
 			continue;
 
-		mutex_lock(&cbe_spu_info[node].list_mutex);
+		spin_lock(&cbe_spu_info[node].list_lock);
 		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
 			if (spu->alloc_state == SPU_FREE)
 				goto found;
 		}
-		mutex_unlock(&cbe_spu_info[node].list_mutex);
+		spin_unlock(&cbe_spu_info[node].list_lock);
 	}
 
  not_found:
@@ -567,7 +565,7 @@ static struct spu *spu_get_idle(struct s
 
  found:
 	spu->alloc_state = SPU_USED;
-	mutex_unlock(&cbe_spu_info[node].list_mutex);
+	spin_unlock(&cbe_spu_info[node].list_lock);
 	spu_context_trace(spu_get_idle__found, ctx, spu);
 	spu_init_channels(spu);
 	return spu;
@@ -601,7 +599,7 @@ static struct spu *find_victim(struct sp
 		if (!node_allowed(ctx, node))
 			continue;
 
-		mutex_lock(&cbe_spu_info[node].list_mutex);
+		spin_lock(&cbe_spu_info[node].list_lock);
 		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
 			struct spu_context *tmp = spu->ctx;
 
@@ -610,7 +608,7 @@ static struct spu *find_victim(struct sp
 			    (!victim || tmp->prio > victim->prio))
 				victim = spu->ctx;
 		}
-		mutex_unlock(&cbe_spu_info[node].list_mutex);
+		spin_unlock(&cbe_spu_info[node].list_lock);
 
 		if (victim) {
 			/*
@@ -642,10 +640,12 @@ static struct spu *find_victim(struct sp
 
 			spu_context_trace(__spu_deactivate__unload, ctx, spu);
 
-			mutex_lock(&cbe_spu_info[node].list_mutex);
-			cbe_spu_info[node].nr_active--;
 			spu_unbind_context(spu, victim);
-			mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+			spin_lock(&cbe_spu_info[node].list_lock);
+			spu->ctx = NULL;
+			cbe_spu_info[node].nr_active--;
+			spin_unlock(&cbe_spu_info[node].list_lock);
 
 			victim->stats.invol_ctx_switch++;
 			spu->stats.invol_ctx_switch++;
@@ -667,14 +667,16 @@ static void __spu_schedule(struct spu *s
 
 	spu_set_timeslice(ctx);
 
-	mutex_lock(&cbe_spu_info[node].list_mutex);
+	spin_lock(&cbe_spu_info[node].list_lock);
 	if (spu->ctx == NULL) {
-		spu_bind_context(spu, ctx);
 		cbe_spu_info[node].nr_active++;
 		spu->alloc_state = SPU_USED;
+		spu->ctx = ctx;
 		success = 1;
 	}
-	mutex_unlock(&cbe_spu_info[node].list_mutex);
+	spin_unlock(&cbe_spu_info[node].list_lock);
+
+	spu_bind_context(spu, ctx);
 
 	if (success)
 		wake_up_all(&ctx->run_wq);
@@ -695,13 +697,15 @@ static void spu_unschedule(struct spu *s
 {
 	int node = spu->node;
 
-	mutex_lock(&cbe_spu_info[node].list_mutex);
+	spu_unbind_context(spu, ctx);
+
+	spin_lock(&cbe_spu_info[node].list_lock);
 	cbe_spu_info[node].nr_active--;
 	spu->alloc_state = SPU_FREE;
-	spu_unbind_context(spu, ctx);
+	spu->ctx = NULL;
 	ctx->stats.invol_ctx_switch++;
 	spu->stats.invol_ctx_switch++;
-	mutex_unlock(&cbe_spu_info[node].list_mutex);
+	spin_unlock(&cbe_spu_info[node].list_lock);
 }
 
 /**
@@ -896,7 +900,7 @@ out:
  *
  * Return the number of tasks currently running or waiting to run.
  *
- * Note that we don't take runq_lock / list_mutex here.  Reading
+ * Note that we don't take runq_lock / list_lock here.  Reading
  * a single 32bit value is atomic on powerpc, and we don't care
  * about memory ordering issues here.
  */
@@ -948,20 +952,20 @@ static int spusched_thread(void *unused)
 		set_current_state(TASK_INTERRUPTIBLE);
 		schedule();
 		for (node = 0; node < MAX_NUMNODES; node++) {
-			struct mutex *mtx = &cbe_spu_info[node].list_mutex;
+			spinlock_t *l = &cbe_spu_info[node].list_lock;
 
-			mutex_lock(mtx);
+			spin_lock(l);
 			list_for_each_entry(spu, &cbe_spu_info[node].spus,
 					cbe_list) {
 				struct spu_context *ctx = spu->ctx;
 
 				if (ctx) {
-					mutex_unlock(mtx);
+					spin_unlock(l);
 					spusched_tick(ctx);
-					mutex_lock(mtx);
+					spin_lock(l);
 				}
 			}
-			mutex_unlock(mtx);
+			spin_unlock(l);
 		}
 	}
 
@@ -1093,11 +1097,11 @@ void spu_sched_exit(void)
 	kthread_stop(spusched_task);
 
 	for (node = 0; node < MAX_NUMNODES; node++) {
-		mutex_lock(&cbe_spu_info[node].list_mutex);
+		spin_lock(&cbe_spu_info[node].list_lock);
 		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list)
 			if (spu->alloc_state != SPU_FREE)
 				spu->alloc_state = SPU_FREE;
-		mutex_unlock(&cbe_spu_info[node].list_mutex);
+		spin_unlock(&cbe_spu_info[node].list_lock);
 	}
 	kfree(spu_prio);
 }
Index: spufs/include/asm-powerpc/spu.h
===================================================================
--- spufs.orig/include/asm-powerpc/spu.h
+++ spufs/include/asm-powerpc/spu.h
@@ -186,7 +186,7 @@ struct spu {
 };
 
 struct cbe_spu_info {
-	struct mutex list_mutex;
+	spinlock_t list_lock;
 	struct list_head spus;
 	int n_spus;
 	int nr_active;