[Cbe-oss-dev] [PATCH] spufs: rework list management and associated locking

Fri Jul 20 02:12:11 EST 2007

This sorts out the various lists and related locks in the spu code.

In detail:

 - the per-node free_spus and active_list are gone.  Instead struct spu
   gained an alloc_state member telling whether the spu is free or not
 - the per-node spus array is now locked by a per-node mutex, which
   takes over from the global spu_lock and the per-node active_mutex
 - the spu_alloc* and spu_free function are gone as the state change is
   now done inline in the spufs code.  This allows some more sharing of
   code for the affinity vs normal case and more efficient locking
 - some little refactoring in the affinity code for this locking scheme

I've tested this with Jeremy's and Rui's testsuites and there are no
regressions, but neither of them has much affinity coverage. Andrew can
you review this with an eye on affinity issues and try to run some
affinity tests?


Signed-off-by: Christoph Hellwig <hch at lst.de>

Index: linux-cell/arch/powerpc/platforms/cell/spu_base.c
===================================================================

--- linux-cell.orig/arch/powerpc/platforms/cell/spu_base.c	2007-07-19 16:59:32.000000000 +0200
+++ linux-cell/arch/powerpc/platforms/cell/spu_base.c	2007-07-19 17:00:45.000000000 +0200
@@ -416,7 +416,7 @@ static void spu_free_irqs(struct spu *sp
 		free_irq(spu->irqs[2], spu);
 }
 
-static void spu_init_channels(struct spu *spu)
+void spu_init_channels(struct spu *spu)
 {
 	static const struct {
 		 unsigned channel;
@@ -449,66 +449,7 @@ static void spu_init_channels(struct spu
 		out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count);
 	}
 }
-
-struct spu *spu_alloc_spu(struct spu *req_spu)
-{
-	struct spu *spu, *ret = NULL;
-
-	spin_lock(&spu_lock);
-	list_for_each_entry(spu, &cbe_spu_info[req_spu->node].free_spus, list) {
-		if (spu == req_spu) {
-			list_del_init(&spu->list);
-			pr_debug("Got SPU %d %d\n", spu->number, spu->node);
-			spu_init_channels(spu);
-			ret = spu;
-			break;
-		}
-	}
-	spin_unlock(&spu_lock);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(spu_alloc_spu);
-
-struct spu *spu_alloc_node(int node)
-{
-	struct spu *spu = NULL;
-
-	spin_lock(&spu_lock);
-	if (!list_empty(&cbe_spu_info[node].free_spus)) {
-		spu = list_entry(cbe_spu_info[node].free_spus.next, struct spu,
-									list);
-		list_del_init(&spu->list);
-		pr_debug("Got SPU %d %d\n", spu->number, spu->node);
-	}
-	spin_unlock(&spu_lock);
-
-	if (spu)
-		spu_init_channels(spu);
-	return spu;
-}
-EXPORT_SYMBOL_GPL(spu_alloc_node);
-
-struct spu *spu_alloc(void)
-{
-	struct spu *spu = NULL;
-	int node;
-
-	for (node = 0; node < MAX_NUMNODES; node++) {
-		spu = spu_alloc_node(node);
-		if (spu)
-			break;
-	}
-
-	return spu;
-}
-
-void spu_free(struct spu *spu)
-{
-	spin_lock(&spu_lock);
-	list_add_tail(&spu->list, &cbe_spu_info[spu->node].free_spus);
-	spin_unlock(&spu_lock);
-}
-EXPORT_SYMBOL_GPL(spu_free);
+EXPORT_SYMBOL_GPL(spu_init_channels);
 
 static int spu_shutdown(struct sys_device *sysdev)
 {
@@ -603,6 +544,8 @@ static int __init create_spu(void *data)
 	if (!spu)
 		goto out;
 
+	spu->alloc_state = SPU_FREE;
+
 	spin_lock_init(&spu->register_lock);
 	spin_lock(&spu_lock);
 	spu->number = number++;
@@ -623,11 +566,10 @@ static int __init create_spu(void *data)
 	if (ret)
 		goto out_free_irqs;
 
-	spin_lock(&spu_lock);
-	list_add(&spu->list, &cbe_spu_info[spu->node].free_spus);
+	mutex_lock(&cbe_spu_info[spu->node].list_mutex);
 	list_add(&spu->cbe_list, &cbe_spu_info[spu->node].spus);
 	cbe_spu_info[spu->node].n_spus++;
-	spin_unlock(&spu_lock);
+	mutex_unlock(&cbe_spu_info[spu->node].list_mutex);
 
 	mutex_lock(&spu_full_list_mutex);
 	spin_lock_irqsave(&spu_full_list_lock, flags);
@@ -833,8 +775,8 @@ static int __init init_spu_base(void)
 	int i, ret = 0;
 
 	for (i = 0; i < MAX_NUMNODES; i++) {
+		mutex_init(&cbe_spu_info[i].list_mutex);
 		INIT_LIST_HEAD(&cbe_spu_info[i].spus);
-		INIT_LIST_HEAD(&cbe_spu_info[i].free_spus);
 	}
 
 	if (!spu_management_ops)
Index: linux-cell/include/asm-powerpc/spu.h
===================================================================
--- linux-cell.orig/include/asm-powerpc/spu.h	2007-07-19 16:59:32.000000000 +0200
+++ linux-cell/include/asm-powerpc/spu.h	2007-07-19 17:00:45.000000000 +0200
@@ -127,10 +127,9 @@ struct spu {
 	unsigned long problem_phys;
 	struct spu_problem __iomem *problem;
 	struct spu_priv2 __iomem *priv2;
-	struct list_head list;
 	struct list_head cbe_list;
-	struct list_head sched_list;
 	struct list_head full_list;
+	enum { SPU_FREE, SPU_USED } alloc_state;
 	int number;
 	unsigned int irqs[3];
 	u32 node;
@@ -193,18 +192,16 @@ struct spu {
 };
 
 struct cbe_spu_info {
+	struct mutex list_mutex;
 	struct list_head spus;
-	struct list_head free_spus;
 	int n_spus;
+	int nr_active;
 	atomic_t reserved_spus;
 };
 
 extern struct cbe_spu_info cbe_spu_info[];
 
-struct spu *spu_alloc(void);
-struct spu *spu_alloc_node(int node);
-struct spu *spu_alloc_spu(struct spu *spu);
-void spu_free(struct spu *spu);
+void spu_init_channels(struct spu *spu);
 int spu_irq_class_0_bottom(struct spu *spu);
 int spu_irq_class_1_bottom(struct spu *spu);
 void spu_irq_setaffinity(struct spu *spu, int cpu);
Index: linux-cell/arch/powerpc/platforms/cell/spufs/sched.c
===================================================================
--- linux-cell.orig/arch/powerpc/platforms/cell/spufs/sched.c	2007-07-19 17:00:41.000000000 +0200
+++ linux-cell/arch/powerpc/platforms/cell/spufs/sched.c	2007-07-19 17:00:45.000000000 +0200
@@ -51,9 +51,6 @@ struct spu_prio_array {
 	DECLARE_BITMAP(bitmap, MAX_PRIO);
 	struct list_head runq[MAX_PRIO];
 	spinlock_t runq_lock;
-	struct list_head active_list[MAX_NUMNODES];
-	struct mutex active_mutex[MAX_NUMNODES];
-	int nr_active[MAX_NUMNODES];
 	int nr_waiting;
 };
 
@@ -127,7 +124,7 @@ void __spu_update_sched_info(struct spu_
 	ctx->policy = current->policy;
 
 	/*
-	 * A lot of places that don't hold active_mutex poke into
+	 * A lot of places that don't hold list_mutex poke into
 	 * cpus_allowed, including grab_runnable_context which
 	 * already holds the runq_lock.  So abuse runq_lock
 	 * to protect this field aswell.
@@ -141,9 +138,9 @@ void spu_update_sched_info(struct spu_co
 {
 	int node = ctx->spu->node;
 
-	mutex_lock(&spu_prio->active_mutex[node]);
+	mutex_lock(&cbe_spu_info[node].list_mutex);
 	__spu_update_sched_info(ctx);
-	mutex_unlock(&spu_prio->active_mutex[node]);
+	mutex_unlock(&cbe_spu_info[node].list_mutex);
 }
 
 static int __node_allowed(struct spu_context *ctx, int node)
@@ -169,39 +166,6 @@ static int node_allowed(struct spu_conte
 	return rval;
 }
 
-/**
- * spu_add_to_active_list - add spu to active list
- * @spu:	spu to add to the active list
- */
-static void spu_add_to_active_list(struct spu *spu)
-{
-	int node = spu->node;
-
-	mutex_lock(&spu_prio->active_mutex[node]);
-	spu_prio->nr_active[node]++;
-	list_add_tail(&spu->list, &spu_prio->active_list[node]);
-	mutex_unlock(&spu_prio->active_mutex[node]);
-}
-
-static void __spu_remove_from_active_list(struct spu *spu)
-{
-	list_del_init(&spu->list);
-	spu_prio->nr_active[spu->node]--;
-}
-
-/**
- * spu_remove_from_active_list - remove spu from active list
- * @spu:       spu to remove from the active list
- */
-static void spu_remove_from_active_list(struct spu *spu)
-{
-	int node = spu->node;
-
-	mutex_lock(&spu_prio->active_mutex[node]);
-	__spu_remove_from_active_list(spu);
-	mutex_unlock(&spu_prio->active_mutex[node]);
-}
-
 static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier);
 
 void spu_switch_notify(struct spu *spu, struct spu_context *ctx)
@@ -219,14 +183,18 @@ static void notify_spus_active(void)
 	 */
 	for (node = 0; node < MAX_NUMNODES; node++) {
 		struct spu *spu;
-		mutex_lock(&spu_prio->active_mutex[node]);
+
+		mutex_lock(&cbe_spu_info[node].list_mutex);
 		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
-			struct spu_context *ctx = spu->ctx;
-			set_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags);
-			mb();
-			wake_up_all(&ctx->stop_wq);
+			if (spu->alloc_state != SPU_FREE) {
+				struct spu_context *ctx = spu->ctx;
+				set_bit(SPU_SCHED_NOTIFY_ACTIVE,
+					&ctx->sched_flags);
+				mb();
+				wake_up_all(&ctx->stop_wq);
+			}
 		}
-		mutex_unlock(&spu_prio->active_mutex[node]);
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
 	}
 }
 
@@ -247,10 +215,12 @@ int spu_switch_event_unregister(struct n
 EXPORT_SYMBOL_GPL(spu_switch_event_unregister);
 
 /*
- * XXX(hch): needs locking.
+ * Must be used with the list_mutex held.
  */
 static inline int sched_spu(struct spu *spu)
 {
+	BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex));
+
 	return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED));
 }
 
@@ -303,11 +273,15 @@ static struct spu *aff_ref_location(stru
 		node = (node < MAX_NUMNODES) ? node : 0;
 		if (!node_allowed(ctx, node))
 			continue;
+		mutex_lock(&cbe_spu_info[node].list_mutex);
 		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
 			if ((!mem_aff || spu->has_mem_affinity) &&
-							sched_spu(spu))
+							sched_spu(spu)) {
+				mutex_unlock(&cbe_spu_info[node].list_mutex);
 				return spu;
+			}
 		}
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
 	}
 	return NULL;
 }
@@ -335,13 +309,14 @@ static void aff_set_ref_point_location(s
 	gang->aff_ref_spu = aff_ref_location(ctx, mem_aff, gs, lowest_offset);
 }
 
-static struct spu *ctx_location(struct spu *ref, int offset)
+static struct spu *ctx_location(struct spu *ref, int offset, int node)
 {
 	struct spu *spu;
 
 	spu = NULL;
 	if (offset >= 0) {
 		list_for_each_entry(spu, ref->aff_list.prev, aff_list) {
+			BUG_ON(spu->node != node);
 			if (offset == 0)
 				break;
 			if (sched_spu(spu))
@@ -349,12 +324,14 @@ static struct spu *ctx_location(struct s
 		}
 	} else {
 		list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) {
+			BUG_ON(spu->node != node);
 			if (offset == 0)
 				break;
 			if (sched_spu(spu))
 				offset++;
 		}
 	}
+
 	return spu;
 }
 
@@ -362,13 +339,13 @@ static struct spu *ctx_location(struct s
  * affinity_check is called each time a context is going to be scheduled.
  * It returns the spu ptr on which the context must run.
  */
-struct spu *affinity_check(struct spu_context *ctx)
+static int has_affinity(struct spu_context *ctx)
 {
-	struct spu_gang *gang;
+	struct spu_gang *gang = ctx->gang;
 
 	if (list_empty(&ctx->aff_list))
-		return NULL;
-	gang = ctx->gang;
+		return 0;
+
 	mutex_lock(&gang->aff_mutex);
 	if (!gang->aff_ref_spu) {
 		if (!(gang->aff_flags & AFF_MERGED))
@@ -378,9 +355,8 @@ struct spu *affinity_check(struct spu_co
 		aff_set_ref_point_location(gang);
 	}
 	mutex_unlock(&gang->aff_mutex);
-	if (!gang->aff_ref_spu)
-		return NULL;
-	return ctx_location(gang->aff_ref_spu, ctx->aff_offset);
+
+	return gang->aff_ref_spu != NULL;
 }
 
 /**
@@ -533,22 +509,41 @@ static void spu_prio_wait(struct spu_con
 
 static struct spu *spu_get_idle(struct spu_context *ctx)
 {
-	struct spu *spu = NULL;
-	int node = cpu_to_node(raw_smp_processor_id());
-	int n;
-
-	spu = affinity_check(ctx);
-	if (spu)
-		return spu_alloc_spu(spu);
+	struct spu *spu;
+	int node, n;
+
+	if (has_affinity(ctx)) {
+		node = ctx->gang->aff_ref_spu->node;
+
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		spu = ctx_location(ctx->gang->aff_ref_spu, ctx->aff_offset, node);
+		if (spu && spu->alloc_state == SPU_FREE)
+			goto found;
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
+		return NULL;
+	}
 
+	node = cpu_to_node(raw_smp_processor_id());
 	for (n = 0; n < MAX_NUMNODES; n++, node++) {
 		node = (node < MAX_NUMNODES) ? node : 0;
 		if (!node_allowed(ctx, node))
 			continue;
-		spu = spu_alloc_node(node);
-		if (spu)
-			break;
+
+		mutex_lock(&cbe_spu_info[node].list_mutex);
+		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+			if (spu->alloc_state == SPU_FREE)
+				goto found;
+		}
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
 	}
+
+	return NULL;
+
+ found:
+	spu->alloc_state = SPU_USED;
+	mutex_unlock(&cbe_spu_info[node].list_mutex);
+	pr_debug("Got SPU %d %d\n", spu->number, spu->node);
+	spu_init_channels(spu);
 	return spu;
 }
 
@@ -578,7 +573,7 @@ static struct spu *find_victim(struct sp
 		if (!node_allowed(ctx, node))
 			continue;
 
-		mutex_lock(&spu_prio->active_mutex[node]);
+		mutex_lock(&cbe_spu_info[node].list_mutex);
 		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
 			struct spu_context *tmp = spu->ctx;
 
@@ -586,7 +581,7 @@ static struct spu *find_victim(struct sp
 			    (!victim || tmp->prio > victim->prio))
 				victim = spu->ctx;
 		}
-		mutex_unlock(&spu_prio->active_mutex[node]);
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
 
 		if (victim) {
 			/*
@@ -611,7 +606,11 @@ static struct spu *find_victim(struct sp
 				victim = NULL;
 				goto restart;
 			}
-			spu_remove_from_active_list(spu);
+
+			mutex_lock(&cbe_spu_info[node].list_mutex);
+			cbe_spu_info[node].nr_active--;
+			mutex_unlock(&cbe_spu_info[node].list_mutex);
+
 			spu_unbind_context(spu, victim);
 			victim->stats.invol_ctx_switch++;
 			spu->stats.invol_ctx_switch++;
@@ -660,8 +659,13 @@ int spu_activate(struct spu_context *ctx
 		if (!spu && rt_prio(ctx->prio))
 			spu = find_victim(ctx);
 		if (spu) {
+			int node = spu->node;
+
 			spu_bind_context(spu, ctx);
-			spu_add_to_active_list(spu);
+
+			mutex_lock(&cbe_spu_info[node].list_mutex);
+			cbe_spu_info[node].nr_active++;
+			mutex_unlock(&cbe_spu_info[node].list_mutex);
 			return 0;
 		}
 
@@ -710,11 +714,17 @@ static int __spu_deactivate(struct spu_c
 	if (spu) {
 		new = grab_runnable_context(max_prio, spu->node);
 		if (new || force) {
-			spu_remove_from_active_list(spu);
+			int node = spu->node;
+
 			spu_unbind_context(spu, ctx);
 			ctx->stats.vol_ctx_switch++;
 			spu->stats.vol_ctx_switch++;
-			spu_free(spu);
+
+			mutex_lock(&cbe_spu_info[node].list_mutex);
+			spu->alloc_state = SPU_FREE;
+			cbe_spu_info[node].nr_active--;
+			mutex_unlock(&cbe_spu_info[node].list_mutex);
+
 			if (new)
 				wake_up(&new->stop_wq);
 		}
@@ -764,7 +774,7 @@ static void spusched_tick(struct spu_con
 		return;
 
 	/*
-	 * Unfortunately active_mutex ranks outside of state_mutex, so
+	 * Unfortunately list_mutex ranks outside of state_mutex, so
 	 * we have to trylock here.  If we fail give the context another
 	 * tick and try again.
 	 */
@@ -774,12 +784,11 @@ static void spusched_tick(struct spu_con
 
 		new = grab_runnable_context(ctx->prio + 1, spu->node);
 		if (new) {
-
-			__spu_remove_from_active_list(spu);
 			spu_unbind_context(spu, ctx);
 			ctx->stats.invol_ctx_switch++;
 			spu->stats.invol_ctx_switch++;
-			spu_free(spu);
+			spu->alloc_state = SPU_FREE;
+			cbe_spu_info[spu->node].nr_active--;
 			wake_up(&new->stop_wq);
 			/*
 			 * We need to break out of the wait loop in
@@ -800,7 +809,7 @@ static void spusched_tick(struct spu_con
  *
  * Return the number of tasks currently running or waiting to run.
  *
- * Note that we don't take runq_lock / active_mutex here.  Reading
+ * Note that we don't take runq_lock / list_mutex here.  Reading
  * a single 32bit value is atomic on powerpc, and we don't care
  * about memory ordering issues here.
  */
@@ -809,7 +818,7 @@ static unsigned long count_active_contex
 	int nr_active = 0, node;
 
 	for (node = 0; node < MAX_NUMNODES; node++)
-		nr_active += spu_prio->nr_active[node];
+		nr_active += cbe_spu_info[node].nr_active;
 	nr_active += spu_prio->nr_waiting;
 
 	return nr_active;
@@ -856,10 +865,11 @@ static int spusched_thread(void *unused)
 		set_current_state(TASK_INTERRUPTIBLE);
 		schedule();
 		for (node = 0; node < MAX_NUMNODES; node++) {
-			mutex_lock(&spu_prio->active_mutex[node]);
-			list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
-				spusched_tick(spu->ctx);
-			mutex_unlock(&spu_prio->active_mutex[node]);
+			mutex_lock(&cbe_spu_info[node].list_mutex);
+			list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list)
+				if (spu->alloc_state != SPU_FREE)
+					spusched_tick(spu->ctx);
+			mutex_unlock(&cbe_spu_info[node].list_mutex);
 		}
 	}
 
@@ -918,8 +928,6 @@ int __init spu_sched_init(void)
 		__clear_bit(i, spu_prio->bitmap);
 	}
 	__set_bit(MAX_PRIO, spu_prio->bitmap);
-	for (i = 0; i < MAX_NUMNODES; i++)
-		mutex_init(&spu_prio->active_mutex[i]);
 
 	spin_lock_init(&spu_prio->runq_lock);
 
@@ -959,10 +967,11 @@ void __exit spu_sched_exit(void)
 	kthread_stop(spusched_task);
 
 	for (node = 0; node < MAX_NUMNODES; node++) {
-		mutex_lock(&spu_prio->active_mutex[node]);
+		mutex_lock(&cbe_spu_info[node].list_mutex);
 		list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list)
-			spu_free(spu);
-		mutex_unlock(&spu_prio->active_mutex[node]);
+			if (spu->alloc_state != SPU_FREE)
+				spu->alloc_state = SPU_FREE;
+		mutex_unlock(&cbe_spu_info[node].list_mutex);
 	}
 	kfree(spu_prio);
 }