[Cbe-oss-dev] [PATCH] spufs: rework list management and associated locking
Christoph Hellwig
hch at lst.de
Sat Jul 21 03:55:30 EST 2007
When rebasing my quilt tree I found the patch was quite garbeled because
parts leaked into other patch in the series. Here's one that's okay
and includes the fix I just posted:
Index: linux-cell/arch/powerpc/platforms/cell/spu_base.c
===================================================================
--- linux-cell.orig/arch/powerpc/platforms/cell/spu_base.c 2007-07-20 18:17:43.000000000 +0200
+++ linux-cell/arch/powerpc/platforms/cell/spu_base.c 2007-07-20 18:39:39.000000000 +0200
@@ -416,7 +416,7 @@ static void spu_free_irqs(struct spu *sp
free_irq(spu->irqs[2], spu);
}
-static void spu_init_channels(struct spu *spu)
+void spu_init_channels(struct spu *spu)
{
static const struct {
unsigned channel;
@@ -449,66 +449,7 @@ static void spu_init_channels(struct spu
out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count);
}
}
-
-struct spu *spu_alloc_spu(struct spu *req_spu)
-{
- struct spu *spu, *ret = NULL;
-
- spin_lock(&spu_lock);
- list_for_each_entry(spu, &cbe_spu_info[req_spu->node].free_spus, list) {
- if (spu == req_spu) {
- list_del_init(&spu->list);
- pr_debug("Got SPU %d %d\n", spu->number, spu->node);
- spu_init_channels(spu);
- ret = spu;
- break;
- }
- }
- spin_unlock(&spu_lock);
- return ret;
-}
-EXPORT_SYMBOL_GPL(spu_alloc_spu);
-
-struct spu *spu_alloc_node(int node)
-{
- struct spu *spu = NULL;
-
- spin_lock(&spu_lock);
- if (!list_empty(&cbe_spu_info[node].free_spus)) {
- spu = list_entry(cbe_spu_info[node].free_spus.next, struct spu,
- list);
- list_del_init(&spu->list);
- pr_debug("Got SPU %d %d\n", spu->number, spu->node);
- }
- spin_unlock(&spu_lock);
-
- if (spu)
- spu_init_channels(spu);
- return spu;
-}
-EXPORT_SYMBOL_GPL(spu_alloc_node);
-
-struct spu *spu_alloc(void)
-{
- struct spu *spu = NULL;
- int node;
-
- for (node = 0; node < MAX_NUMNODES; node++) {
- spu = spu_alloc_node(node);
- if (spu)
- break;
- }
-
- return spu;
-}
-
-void spu_free(struct spu *spu)
-{
- spin_lock(&spu_lock);
- list_add_tail(&spu->list, &cbe_spu_info[spu->node].free_spus);
- spin_unlock(&spu_lock);
-}
-EXPORT_SYMBOL_GPL(spu_free);
+EXPORT_SYMBOL_GPL(spu_init_channels);
static int spu_shutdown(struct sys_device *sysdev)
{
@@ -603,6 +544,8 @@ static int __init create_spu(void *data)
if (!spu)
goto out;
+ spu->alloc_state = SPU_FREE;
+
spin_lock_init(&spu->register_lock);
spin_lock(&spu_lock);
spu->number = number++;
@@ -623,11 +566,10 @@ static int __init create_spu(void *data)
if (ret)
goto out_free_irqs;
- spin_lock(&spu_lock);
- list_add(&spu->list, &cbe_spu_info[spu->node].free_spus);
+ mutex_lock(&cbe_spu_info[spu->node].list_mutex);
list_add(&spu->cbe_list, &cbe_spu_info[spu->node].spus);
cbe_spu_info[spu->node].n_spus++;
- spin_unlock(&spu_lock);
+ mutex_unlock(&cbe_spu_info[spu->node].list_mutex);
mutex_lock(&spu_full_list_mutex);
spin_lock_irqsave(&spu_full_list_lock, flags);
@@ -833,8 +775,8 @@ static int __init init_spu_base(void)
int i, ret = 0;
for (i = 0; i < MAX_NUMNODES; i++) {
+ mutex_init(&cbe_spu_info[i].list_mutex);
INIT_LIST_HEAD(&cbe_spu_info[i].spus);
- INIT_LIST_HEAD(&cbe_spu_info[i].free_spus);
}
if (!spu_management_ops)
Index: linux-cell/include/asm-powerpc/spu.h
===================================================================
--- linux-cell.orig/include/asm-powerpc/spu.h 2007-07-20 18:17:43.000000000 +0200
+++ linux-cell/include/asm-powerpc/spu.h 2007-07-20 18:39:39.000000000 +0200
@@ -127,10 +127,9 @@ struct spu {
unsigned long problem_phys;
struct spu_problem __iomem *problem;
struct spu_priv2 __iomem *priv2;
- struct list_head list;
struct list_head cbe_list;
- struct list_head sched_list;
struct list_head full_list;
+ enum { SPU_FREE, SPU_USED } alloc_state;
int number;
unsigned int irqs[3];
u32 node;
@@ -193,18 +192,16 @@ struct spu {
};
struct cbe_spu_info {
+ struct mutex list_mutex;
struct list_head spus;
- struct list_head free_spus;
int n_spus;
+ int nr_active;
atomic_t reserved_spus;
};
extern struct cbe_spu_info cbe_spu_info[];
-struct spu *spu_alloc(void);
-struct spu *spu_alloc_node(int node);
-struct spu *spu_alloc_spu(struct spu *spu);
-void spu_free(struct spu *spu);
+void spu_init_channels(struct spu *spu);
int spu_irq_class_0_bottom(struct spu *spu);
int spu_irq_class_1_bottom(struct spu *spu);
void spu_irq_setaffinity(struct spu *spu, int cpu);
Index: linux-cell/arch/powerpc/platforms/cell/spufs/sched.c
===================================================================
--- linux-cell.orig/arch/powerpc/platforms/cell/spufs/sched.c 2007-07-20 18:39:39.000000000 +0200
+++ linux-cell/arch/powerpc/platforms/cell/spufs/sched.c 2007-07-20 18:40:48.000000000 +0200
@@ -51,9 +51,6 @@ struct spu_prio_array {
DECLARE_BITMAP(bitmap, MAX_PRIO);
struct list_head runq[MAX_PRIO];
spinlock_t runq_lock;
- struct list_head active_list[MAX_NUMNODES];
- struct mutex active_mutex[MAX_NUMNODES];
- int nr_active[MAX_NUMNODES];
int nr_waiting;
};
@@ -127,7 +124,7 @@ void __spu_update_sched_info(struct spu_
ctx->policy = current->policy;
/*
- * A lot of places that don't hold active_mutex poke into
+ * A lot of places that don't hold list_mutex poke into
* cpus_allowed, including grab_runnable_context which
* already holds the runq_lock. So abuse runq_lock
* to protect this field aswell.
@@ -141,9 +138,9 @@ void spu_update_sched_info(struct spu_co
{
int node = ctx->spu->node;
- mutex_lock(&spu_prio->active_mutex[node]);
+ mutex_lock(&cbe_spu_info[node].list_mutex);
__spu_update_sched_info(ctx);
- mutex_unlock(&spu_prio->active_mutex[node]);
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
}
static int __node_allowed(struct spu_context *ctx, int node)
@@ -169,39 +166,6 @@ static int node_allowed(struct spu_conte
return rval;
}
-/**
- * spu_add_to_active_list - add spu to active list
- * @spu: spu to add to the active list
- */
-static void spu_add_to_active_list(struct spu *spu)
-{
- int node = spu->node;
-
- mutex_lock(&spu_prio->active_mutex[node]);
- spu_prio->nr_active[node]++;
- list_add_tail(&spu->list, &spu_prio->active_list[node]);
- mutex_unlock(&spu_prio->active_mutex[node]);
-}
-
-static void __spu_remove_from_active_list(struct spu *spu)
-{
- list_del_init(&spu->list);
- spu_prio->nr_active[spu->node]--;
-}
-
-/**
- * spu_remove_from_active_list - remove spu from active list
- * @spu: spu to remove from the active list
- */
-static void spu_remove_from_active_list(struct spu *spu)
-{
- int node = spu->node;
-
- mutex_lock(&spu_prio->active_mutex[node]);
- __spu_remove_from_active_list(spu);
- mutex_unlock(&spu_prio->active_mutex[node]);
-}
-
static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier);
void spu_switch_notify(struct spu *spu, struct spu_context *ctx)
@@ -213,20 +177,25 @@ void spu_switch_notify(struct spu *spu,
static void notify_spus_active(void)
{
int node;
+
/* Wake up the active spu_contexts. When the awakened processes
* see their "notify_active" flag is set, they will call
* spu_switch_notify();
*/
for (node = 0; node < MAX_NUMNODES; node++) {
struct spu *spu;
- mutex_lock(&spu_prio->active_mutex[node]);
- list_for_each_entry(spu, &spu_prio->active_list[node], list) {
- struct spu_context *ctx = spu->ctx;
- set_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags);
- mb();
- wake_up_all(&ctx->stop_wq);
+
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+ if (spu->alloc_state != SPU_FREE) {
+ struct spu_context *ctx = spu->ctx;
+ set_bit(SPU_SCHED_NOTIFY_ACTIVE,
+ &ctx->sched_flags);
+ mb();
+ wake_up_all(&ctx->stop_wq);
+ }
}
- mutex_unlock(&spu_prio->active_mutex[node]);
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
}
}
@@ -247,10 +216,12 @@ int spu_switch_event_unregister(struct n
EXPORT_SYMBOL_GPL(spu_switch_event_unregister);
/*
- * XXX(hch): needs locking.
+ * Must be used with the list_mutex held.
*/
static inline int sched_spu(struct spu *spu)
{
+ BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex));
+
return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED));
}
@@ -303,11 +274,15 @@ static struct spu *aff_ref_location(stru
node = (node < MAX_NUMNODES) ? node : 0;
if (!node_allowed(ctx, node))
continue;
+ mutex_lock(&cbe_spu_info[node].list_mutex);
list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
if ((!mem_aff || spu->has_mem_affinity) &&
- sched_spu(spu))
+ sched_spu(spu)) {
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
return spu;
+ }
}
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
}
return NULL;
}
@@ -335,13 +310,14 @@ static void aff_set_ref_point_location(s
gang->aff_ref_spu = aff_ref_location(ctx, mem_aff, gs, lowest_offset);
}
-static struct spu *ctx_location(struct spu *ref, int offset)
+static struct spu *ctx_location(struct spu *ref, int offset, int node)
{
struct spu *spu;
spu = NULL;
if (offset >= 0) {
list_for_each_entry(spu, ref->aff_list.prev, aff_list) {
+ BUG_ON(spu->node != node);
if (offset == 0)
break;
if (sched_spu(spu))
@@ -349,12 +325,14 @@ static struct spu *ctx_location(struct s
}
} else {
list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) {
+ BUG_ON(spu->node != node);
if (offset == 0)
break;
if (sched_spu(spu))
offset++;
}
}
+
return spu;
}
@@ -362,13 +340,13 @@ static struct spu *ctx_location(struct s
* affinity_check is called each time a context is going to be scheduled.
* It returns the spu ptr on which the context must run.
*/
-struct spu *affinity_check(struct spu_context *ctx)
+static int has_affinity(struct spu_context *ctx)
{
- struct spu_gang *gang;
+ struct spu_gang *gang = ctx->gang;
if (list_empty(&ctx->aff_list))
- return NULL;
- gang = ctx->gang;
+ return 0;
+
mutex_lock(&gang->aff_mutex);
if (!gang->aff_ref_spu) {
if (!(gang->aff_flags & AFF_MERGED))
@@ -378,9 +356,8 @@ struct spu *affinity_check(struct spu_co
aff_set_ref_point_location(gang);
}
mutex_unlock(&gang->aff_mutex);
- if (!gang->aff_ref_spu)
- return NULL;
- return ctx_location(gang->aff_ref_spu, ctx->aff_offset);
+
+ return gang->aff_ref_spu != NULL;
}
/**
@@ -533,22 +510,41 @@ static void spu_prio_wait(struct spu_con
static struct spu *spu_get_idle(struct spu_context *ctx)
{
- struct spu *spu = NULL;
- int node = cpu_to_node(raw_smp_processor_id());
- int n;
-
- spu = affinity_check(ctx);
- if (spu)
- return spu_alloc_spu(spu);
+ struct spu *spu;
+ int node, n;
+
+ if (has_affinity(ctx)) {
+ node = ctx->gang->aff_ref_spu->node;
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ spu = ctx_location(ctx->gang->aff_ref_spu, ctx->aff_offset, node);
+ if (spu && spu->alloc_state == SPU_FREE)
+ goto found;
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+ return NULL;
+ }
+
+ node = cpu_to_node(raw_smp_processor_id());
for (n = 0; n < MAX_NUMNODES; n++, node++) {
node = (node < MAX_NUMNODES) ? node : 0;
if (!node_allowed(ctx, node))
continue;
- spu = spu_alloc_node(node);
- if (spu)
- break;
+
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+ if (spu->alloc_state == SPU_FREE)
+ goto found;
+ }
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
}
+
+ return NULL;
+
+ found:
+ spu->alloc_state = SPU_USED;
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+ pr_debug("Got SPU %d %d\n", spu->number, spu->node);
+ spu_init_channels(spu);
return spu;
}
@@ -578,15 +574,15 @@ static struct spu *find_victim(struct sp
if (!node_allowed(ctx, node))
continue;
- mutex_lock(&spu_prio->active_mutex[node]);
- list_for_each_entry(spu, &spu_prio->active_list[node], list) {
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
struct spu_context *tmp = spu->ctx;
if (tmp->prio > ctx->prio &&
(!victim || tmp->prio > victim->prio))
victim = spu->ctx;
}
- mutex_unlock(&spu_prio->active_mutex[node]);
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
if (victim) {
/*
@@ -611,7 +607,11 @@ static struct spu *find_victim(struct sp
victim = NULL;
goto restart;
}
- spu_remove_from_active_list(spu);
+
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ cbe_spu_info[node].nr_active--;
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+
spu_unbind_context(spu, victim);
victim->stats.invol_ctx_switch++;
spu->stats.invol_ctx_switch++;
@@ -660,8 +660,12 @@ int spu_activate(struct spu_context *ctx
if (!spu && rt_prio(ctx->prio))
spu = find_victim(ctx);
if (spu) {
+ int node = spu->node;
+
+ mutex_lock(&cbe_spu_info[node].list_mutex);
spu_bind_context(spu, ctx);
- spu_add_to_active_list(spu);
+ cbe_spu_info[node].nr_active++;
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
return 0;
}
@@ -710,11 +714,17 @@ static int __spu_deactivate(struct spu_c
if (spu) {
new = grab_runnable_context(max_prio, spu->node);
if (new || force) {
- spu_remove_from_active_list(spu);
+ int node = spu->node;
+
+ mutex_lock(&cbe_spu_info[node].list_mutex);
spu_unbind_context(spu, ctx);
+ spu->alloc_state = SPU_FREE;
+ cbe_spu_info[node].nr_active--;
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+
ctx->stats.vol_ctx_switch++;
spu->stats.vol_ctx_switch++;
- spu_free(spu);
+
if (new)
wake_up(&new->stop_wq);
}
@@ -753,7 +763,7 @@ void spu_yield(struct spu_context *ctx)
}
}
-static void spusched_tick(struct spu_context *ctx)
+static noinline void spusched_tick(struct spu_context *ctx)
{
if (ctx->flags & SPU_CREATE_NOSCHED)
return;
@@ -764,7 +774,7 @@ static void spusched_tick(struct spu_con
return;
/*
- * Unfortunately active_mutex ranks outside of state_mutex, so
+ * Unfortunately list_mutex ranks outside of state_mutex, so
* we have to trylock here. If we fail give the context another
* tick and try again.
*/
@@ -774,12 +784,11 @@ static void spusched_tick(struct spu_con
new = grab_runnable_context(ctx->prio + 1, spu->node);
if (new) {
-
- __spu_remove_from_active_list(spu);
spu_unbind_context(spu, ctx);
ctx->stats.invol_ctx_switch++;
spu->stats.invol_ctx_switch++;
- spu_free(spu);
+ spu->alloc_state = SPU_FREE;
+ cbe_spu_info[spu->node].nr_active--;
wake_up(&new->stop_wq);
/*
* We need to break out of the wait loop in
@@ -800,7 +809,7 @@ static void spusched_tick(struct spu_con
*
* Return the number of tasks currently running or waiting to run.
*
- * Note that we don't take runq_lock / active_mutex here. Reading
+ * Note that we don't take runq_lock / list_mutex here. Reading
* a single 32bit value is atomic on powerpc, and we don't care
* about memory ordering issues here.
*/
@@ -809,7 +818,7 @@ static unsigned long count_active_contex
int nr_active = 0, node;
for (node = 0; node < MAX_NUMNODES; node++)
- nr_active += spu_prio->nr_active[node];
+ nr_active += cbe_spu_info[node].nr_active;
nr_active += spu_prio->nr_waiting;
return nr_active;
@@ -856,12 +865,11 @@ static int spusched_thread(void *unused)
set_current_state(TASK_INTERRUPTIBLE);
schedule();
for (node = 0; node < MAX_NUMNODES; node++) {
- mutex_lock(&spu_prio->active_mutex[node]);
- list_for_each_entry_safe(spu, next,
- &spu_prio->active_list[node],
- list)
- spusched_tick(spu->ctx);
- mutex_unlock(&spu_prio->active_mutex[node]);
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list)
+ if (spu->ctx)
+ spusched_tick(spu->ctx);
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
}
}
@@ -920,10 +928,7 @@ int __init spu_sched_init(void)
__clear_bit(i, spu_prio->bitmap);
}
__set_bit(MAX_PRIO, spu_prio->bitmap);
- for (i = 0; i < MAX_NUMNODES; i++) {
- mutex_init(&spu_prio->active_mutex[i]);
- INIT_LIST_HEAD(&spu_prio->active_list[i]);
- }
+
spin_lock_init(&spu_prio->runq_lock);
setup_timer(&spusched_timer, spusched_wake, 0);
@@ -962,13 +967,11 @@ void __exit spu_sched_exit(void)
kthread_stop(spusched_task);
for (node = 0; node < MAX_NUMNODES; node++) {
- mutex_lock(&spu_prio->active_mutex[node]);
- list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node],
- list) {
- list_del_init(&spu->list);
- spu_free(spu);
- }
- mutex_unlock(&spu_prio->active_mutex[node]);
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list)
+ if (spu->alloc_state != SPU_FREE)
+ spu->alloc_state = SPU_FREE;
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
}
kfree(spu_prio);
}
More information about the cbe-oss-dev
mailing list