[Cbe-oss-dev] Gang scheduling [RFC] [PATCH 1:9] cbe_spu_info spin lock

Luke Browning lukebr at linux.vnet.ibm.com
Sat Mar 15 07:59:20 EST 2008


Change the mutex_lock protecting the cbe_spu_info[] to a spin_lock.

This structure groups the physical spus. The list_mutex must be changed 
to a spin lock, because the runq_lock is a spin_lock.  You can't nest 
mutexes under spin_locks.  The lock for the cbe_spu_info[] is taken 
under the runq_lock as may spus need to be allocated to schedule a gang.

Change spu_bind_context() and spu_unbind_context() so that they are not 
called under the new spin lock as that would cause a deadlock, if they
blocked on higher level allocations (mmap) that are protected by mutexes.

Signed-off-by: Luke Browning <lukebrowning at us.ibm.com>

---
Index: public_git/arch/powerpc/platforms/cell/spu_base.c
===================================================================
--- public_git.orig/arch/powerpc/platforms/cell/spu_base.c	2008-03-06 18:01:53.000000000 -0300
+++ public_git/arch/powerpc/platforms/cell/spu_base.c	2008-03-13 15:14:36.000000000 -0300
@@ -629,10 +629,10 @@
 	if (ret)
 		goto out_free_irqs;
 
-	mutex_lock(&cbe_spu_info[spu->node].list_mutex);
+	spin_lock(&cbe_spu_info[spu->node].list_lock);
 	list_add(&spu->cbe_list, &cbe_spu_info[spu->node].spus);
 	cbe_spu_info[spu->node].n_spus++;
-	mutex_unlock(&cbe_spu_info[spu->node].list_mutex);
+	spin_unlock(&cbe_spu_info[spu->node].list_lock);
 
 	mutex_lock(&spu_full_list_mutex);
 	spin_lock_irqsave(&spu_full_list_lock, flags);
@@ -710,7 +710,7 @@
 	int i, ret = 0;
 
 	for (i = 0; i < MAX_NUMNODES; i++) {
-		mutex_init(&cbe_spu_info[i].list_mutex);
+		spin_lock_init(&cbe_spu_info[i].list_lock);
 		INIT_LIST_HEAD(&cbe_spu_info[i].spus);
 	}
 
Index: public_git/arch/powerpc/platforms/cell/spufs/sched.c
===================================================================
--- public_git.orig/arch/powerpc/platforms/cell/spufs/sched.c	2008-03-12 19:08:22.000000000 -0300
+++ public_git/arch/powerpc/platforms/cell/spufs/sched.c	2008-03-13 16:15:32.000000000 -0300
@@ -150,11 +150,11 @@
 		node = ctx->spu->node;
 
 		/*
-		 * Take list_mutex to sync with find_victim().
+		 * Take list_lock to sync with find_victim().
 		 */
-		mutex_lock(&cbe_spu_info[node].list_mutex);
+		spin_lock(&cbe_spu_info[node].list_lock);
 		__spu_update_sched_info(ctx);
-		mutex_unlock(&cbe_spu_info[node].list_mutex);
+		spin_unlock(&cbe_spu_info[node].list_lock);
 	} else {
 		__spu_update_sched_info(ctx);
 	}
@@ -176,9 +176,9 @@
 {
 	int rval;
 
-	spin_lock(&spu_prio->runq_lock);
+	spin_lock(&cbe_spu_info[node].list_lock);
 	rval = __node_allowed(ctx, node);
-	spin_unlock(&spu_prio->runq_lock);
+	spin_unlock(&cbe_spu_info[node].list_lock);
 
 	return rval;
 }
@@ -196,7 +196,7 @@
 	for_each_online_node(node) {
 		struct spu *spu;
 
-		mutex_lock(&cbe_spu_info[node].list_mutex);
+		spin_lock(&cbe_spu_info[node].list_lock);
 		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
 			if (spu->alloc_state != SPU_FREE) {
 				struct spu_context *ctx = spu->ctx;
@@ -206,7 +206,7 @@
 				wake_up_all(&ctx->stop_wq);
 			}
 		}
-		mutex_unlock(&cbe_spu_info[node].list_mutex);
+		spin_unlock(&cbe_spu_info[node].list_lock);
 	}
 }
 
@@ -227,7 +227,6 @@
 	ctx->stats.slb_flt_base = spu->stats.slb_flt;
 	ctx->stats.class2_intr_base = spu->stats.class2_intr;
 
-	spu->ctx = ctx;
 	spu->flags = 0;
 	ctx->spu = spu;
 	ctx->ops = &spu_hw_ops;
@@ -250,11 +249,11 @@
 }
 
 /*
- * Must be used with the list_mutex held.
+ * Must be used with the list_lock held.
  */
 static inline int sched_spu(struct spu *spu)
 {
-	BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex));
+	BUG_ON(!spin_is_locked(&cbe_spu_info[spu->node].list_lock));
 
 	return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED));
 }
@@ -308,15 +307,15 @@
 		node = (node < MAX_NUMNODES) ? node : 0;
 		if (!node_allowed(ctx, node))
 			continue;
-		mutex_lock(&cbe_spu_info[node].list_mutex);
+		spin_lock(&cbe_spu_info[node].list_lock);
 		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
 			if ((!mem_aff || spu->has_mem_affinity) &&
 							sched_spu(spu)) {
-				mutex_unlock(&cbe_spu_info[node].list_mutex);
+				spin_unlock(&cbe_spu_info[node].list_lock);
 				return spu;
 			}
 		}
-		mutex_unlock(&cbe_spu_info[node].list_mutex);
+		spin_unlock(&cbe_spu_info[node].list_lock);
 	}
 	return NULL;
 }
@@ -430,7 +429,6 @@
 	spu->tgid = 0;
 	ctx->ops = &spu_backing_ops;
 	spu->flags = 0;
-	spu->ctx = NULL;
 
 	ctx->stats.slb_flt +=
 		(spu->stats.slb_flt - ctx->stats.slb_flt_base);
@@ -539,11 +537,11 @@
 			mutex_unlock(&ctx->gang->aff_mutex);
 			node = aff_ref_spu->node;
 
-			mutex_lock(&cbe_spu_info[node].list_mutex);
+			spin_lock(&cbe_spu_info[node].list_lock);
 			spu = ctx_location(aff_ref_spu, ctx->aff_offset, node);
 			if (spu && spu->alloc_state == SPU_FREE)
 				goto found;
-			mutex_unlock(&cbe_spu_info[node].list_mutex);
+			spin_unlock(&cbe_spu_info[node].list_lock);
 
 			mutex_lock(&ctx->gang->aff_mutex);
 			if (atomic_dec_and_test(&ctx->gang->aff_sched_count))
@@ -559,12 +557,12 @@
 		if (!node_allowed(ctx, node))
 			continue;
 
-		mutex_lock(&cbe_spu_info[node].list_mutex);
+		spin_lock(&cbe_spu_info[node].list_lock);
 		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
 			if (spu->alloc_state == SPU_FREE)
 				goto found;
 		}
-		mutex_unlock(&cbe_spu_info[node].list_mutex);
+		spin_unlock(&cbe_spu_info[node].list_lock);
 	}
 
  not_found:
@@ -573,7 +571,7 @@
 
  found:
 	spu->alloc_state = SPU_USED;
-	mutex_unlock(&cbe_spu_info[node].list_mutex);
+	spin_unlock(&cbe_spu_info[node].list_lock);
 	spu_context_trace(spu_get_idle__found, ctx, spu);
 	spu_init_channels(spu);
 	return spu;
@@ -607,7 +605,7 @@
 		if (!node_allowed(ctx, node))
 			continue;
 
-		mutex_lock(&cbe_spu_info[node].list_mutex);
+		spin_lock(&cbe_spu_info[node].list_lock);
 		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
 			struct spu_context *tmp = spu->ctx;
 
@@ -616,7 +614,7 @@
 			    (!victim || tmp->prio > victim->prio))
 				victim = spu->ctx;
 		}
-		mutex_unlock(&cbe_spu_info[node].list_mutex);
+		spin_unlock(&cbe_spu_info[node].list_lock);
 
 		if (victim) {
 			/*
@@ -648,10 +646,12 @@
 
 			spu_context_trace(__spu_deactivate__unload, ctx, spu);
 
-			mutex_lock(&cbe_spu_info[node].list_mutex);
-			cbe_spu_info[node].nr_active--;
 			spu_unbind_context(spu, victim);
-			mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+			spin_lock(&cbe_spu_info[node].list_lock);
+			spu->ctx = NULL;
+			cbe_spu_info[node].nr_active--;
+			spin_unlock(&cbe_spu_info[node].list_lock);
 
 			victim->stats.invol_ctx_switch++;
 			spu->stats.invol_ctx_switch++;
@@ -673,14 +673,16 @@
 
 	spu_set_timeslice(ctx);
 
-	mutex_lock(&cbe_spu_info[node].list_mutex);
+	spin_lock(&cbe_spu_info[node].list_lock);
 	if (spu->ctx == NULL) {
-		spu_bind_context(spu, ctx);
 		cbe_spu_info[node].nr_active++;
 		spu->alloc_state = SPU_USED;
+		spu->ctx = ctx;
 		success = 1;
 	}
-	mutex_unlock(&cbe_spu_info[node].list_mutex);
+	spin_unlock(&cbe_spu_info[node].list_lock);
+
+	spu_bind_context(spu, ctx);
 
 	if (success)
 		wake_up_all(&ctx->run_wq);
@@ -701,13 +703,15 @@
 {
 	int node = spu->node;
 
-	mutex_lock(&cbe_spu_info[node].list_mutex);
+	spu_unbind_context(spu, ctx);
+
+	spin_lock(&cbe_spu_info[node].list_lock);
 	cbe_spu_info[node].nr_active--;
 	spu->alloc_state = SPU_FREE;
-	spu_unbind_context(spu, ctx);
+	spu->ctx = NULL;
 	ctx->stats.invol_ctx_switch++;
 	spu->stats.invol_ctx_switch++;
-	mutex_unlock(&cbe_spu_info[node].list_mutex);
+	spin_unlock(&cbe_spu_info[node].list_lock);
 }
 
 /**
@@ -895,7 +899,7 @@
  *
  * Return the number of tasks currently running or waiting to run.
  *
- * Note that we don't take runq_lock / list_mutex here.  Reading
+ * Note that we don't take runq_lock / list_lock here.  Reading
  * a single 32bit value is atomic on powerpc, and we don't care
  * about memory ordering issues here.
  */
@@ -947,20 +951,20 @@
 		set_current_state(TASK_INTERRUPTIBLE);
 		schedule();
 		for (node = 0; node < MAX_NUMNODES; node++) {
-			struct mutex *mtx = &cbe_spu_info[node].list_mutex;
+			spinlock_t *l = &cbe_spu_info[node].list_lock;
 
-			mutex_lock(mtx);
+			spin_lock(l);
 			list_for_each_entry(spu, &cbe_spu_info[node].spus,
 					cbe_list) {
 				struct spu_context *ctx = spu->ctx;
 
 				if (ctx) {
-					mutex_unlock(mtx);
+					spin_unlock(l);
 					spusched_tick(ctx);
-					mutex_lock(mtx);
+					spin_lock(l);
 				}
 			}
-			mutex_unlock(mtx);
+			spin_unlock(l);
 		}
 	}
 
@@ -1092,11 +1096,11 @@
 	kthread_stop(spusched_task);
 
 	for (node = 0; node < MAX_NUMNODES; node++) {
-		mutex_lock(&cbe_spu_info[node].list_mutex);
+		spin_lock(&cbe_spu_info[node].list_lock);
 		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list)
 			if (spu->alloc_state != SPU_FREE)
 				spu->alloc_state = SPU_FREE;
-		mutex_unlock(&cbe_spu_info[node].list_mutex);
+		spin_unlock(&cbe_spu_info[node].list_lock);
 	}
 	kfree(spu_prio);
 }
Index: public_git/include/asm-powerpc/spu.h
===================================================================
--- public_git.orig/include/asm-powerpc/spu.h	2008-03-06 18:02:07.000000000 -0300
+++ public_git/include/asm-powerpc/spu.h	2008-03-13 15:14:36.000000000 -0300
@@ -185,7 +185,7 @@
 };
 
 struct cbe_spu_info {
-	struct mutex list_mutex;
+	spinlock_t list_lock;
 	struct list_head spus;
 	int n_spus;
 	int nr_active;





More information about the cbe-oss-dev mailing list