[Cbe-oss-dev] [PATCH] spusched: extend list_mutex coverage
Christoph Hellwig
hch at lst.de
Tue Jul 22 19:16:56 EST 2008
On Tue, Jul 15, 2008 at 06:08:12AM +0200, Christoph Hellwig wrote:
> spu->ctx is only stable while we hold the list_mutex is held. Extent
> hold times of the lock in find_victim and spusched_tick to avoid potentially
> dangling dereferences. To do so we have to switch to a trylock
> and opencode spu_schedule in spusched_tick which is rather ugly, but I
> can't find a good way around it.
ping? This is a quite important fix for a bug found by the Boeblingen
Test team.
> Signed-off-by: Christoph Hellwig <hch at lst.de>
>
> Index: linux-2.6/arch/powerpc/platforms/cell/spufs/sched.c
> ===================================================================
> --- linux-2.6.orig/arch/powerpc/platforms/cell/spufs/sched.c 2008-07-15 06:02:19.000000000 +0200
> +++ linux-2.6/arch/powerpc/platforms/cell/spufs/sched.c 2008-07-15 06:03:40.000000000 +0200
> @@ -633,7 +633,6 @@ static struct spu *find_victim(struct sp
> (!victim || tmp->prio > victim->prio))
> victim = spu->ctx;
> }
> - mutex_unlock(&cbe_spu_info[node].list_mutex);
>
> if (victim) {
> /*
> @@ -647,6 +646,7 @@ static struct spu *find_victim(struct sp
> * look at another context or give up after X retries.
> */
> if (!mutex_trylock(&victim->state_mutex)) {
> + mutex_unlock(&cbe_spu_info[node].list_mutex);
> victim = NULL;
> goto restart;
> }
> @@ -659,16 +659,15 @@ static struct spu *find_victim(struct sp
> * restart the search.
> */
> mutex_unlock(&victim->state_mutex);
> + mutex_unlock(&cbe_spu_info[node].list_mutex);
> victim = NULL;
> goto restart;
> }
>
> spu_context_trace(__spu_deactivate__unload, ctx, spu);
>
> - mutex_lock(&cbe_spu_info[node].list_mutex);
> cbe_spu_info[node].nr_active--;
> spu_unbind_context(spu, victim);
> - mutex_unlock(&cbe_spu_info[node].list_mutex);
>
> victim->stats.invol_ctx_switch++;
> spu->stats.invol_ctx_switch++;
> @@ -676,9 +675,10 @@ static struct spu *find_victim(struct sp
> spu_add_to_rq(victim);
>
> mutex_unlock(&victim->state_mutex);
> -
> + mutex_unlock(&cbe_spu_info[node].list_mutex);
> return spu;
> }
> + mutex_unlock(&cbe_spu_info[node].list_mutex);
> }
>
> return NULL;
> @@ -715,16 +715,21 @@ static void spu_schedule(struct spu *spu
> spu_release(ctx);
> }
>
> -static void spu_unschedule(struct spu *spu, struct spu_context *ctx)
> +static void __spu_unschedule(struct spu *spu, struct spu_context *ctx)
> {
> - int node = spu->node;
> -
> - mutex_lock(&cbe_spu_info[node].list_mutex);
> - cbe_spu_info[node].nr_active--;
> + cbe_spu_info[spu->node].nr_active--;
> spu->alloc_state = SPU_FREE;
> spu_unbind_context(spu, ctx);
> ctx->stats.invol_ctx_switch++;
> spu->stats.invol_ctx_switch++;
> +}
> +
> +static void spu_unschedule(struct spu *spu, struct spu_context *ctx)
> +{
> + int node = spu->node;
> +
> + mutex_lock(&cbe_spu_info[node].list_mutex);
> + __spu_unschedule(spu, ctx);
> mutex_unlock(&cbe_spu_info[node].list_mutex);
> }
>
> @@ -875,8 +880,13 @@ static noinline void spusched_tick(struc
> struct spu_context *new = NULL;
> struct spu *spu = NULL;
>
> - if (spu_acquire(ctx))
> - BUG(); /* a kernel thread never has signals pending */
> + /*
> + * If we can't lock the context give it another timeslice.
> + */
> + if (!mutex_trylock(&ctx->state_mutex)) {
> + ctx->time_slice++;
> + return;
> + }
>
> if (ctx->state != SPU_STATE_RUNNABLE)
> goto out;
> @@ -892,9 +902,16 @@ static noinline void spusched_tick(struc
>
> spu_context_trace(spusched_tick__preempt, ctx, spu);
>
> - new = grab_runnable_context(ctx->prio + 1, spu->node);
> + /*
> + * Try to find a new context on the runqueue that we can lock
> + * without sleeping.
> + */
> + do {
> + new = grab_runnable_context(ctx->prio + 1, spu->node);
> + } while (new && !mutex_trylock(&new->state_mutex));
> +
> if (new) {
> - spu_unschedule(spu, ctx);
> + __spu_unschedule(spu, ctx);
> if (test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
> spu_add_to_rq(ctx);
> } else {
> @@ -904,8 +921,24 @@ static noinline void spusched_tick(struc
> out:
> spu_release(ctx);
>
> - if (new)
> - spu_schedule(spu, new);
> + /*
> + * This is an opencoded spu_schedule accounting for the fact
> + * that we have the list_lock already taken.
> + *
> + * Not pretty.
> + */
> + if (new) {
> + int node = spu->node;
> +
> + BUG_ON(spu->ctx);
> +
> + spu_set_timeslice(new);
> + spu_bind_context(spu, new);
> + cbe_spu_info[node].nr_active++;
> + spu->alloc_state = SPU_USED;
> + wake_up_all(&new->run_wq);
> + spu_release(new);
> + }
> }
>
> /**
> @@ -972,11 +1005,8 @@ static int spusched_thread(void *unused)
> cbe_list) {
> struct spu_context *ctx = spu->ctx;
>
> - if (ctx) {
> - mutex_unlock(mtx);
> + if (ctx)
> spusched_tick(ctx);
> - mutex_lock(mtx);
> - }
> }
> mutex_unlock(mtx);
> }
> _______________________________________________
> cbe-oss-dev mailing list
> cbe-oss-dev at ozlabs.org
> https://ozlabs.org/mailman/listinfo/cbe-oss-dev
---end quoted text---
More information about the cbe-oss-dev
mailing list