Subject: spu sched: fix cpu/node binding From: Christoph Hellwig Add a cpus_allowed allowed filed to struct spu_context so that we always use the cpu mask of the owning thread instead of the one happening to call into the scheduler. Also use this information in grab_runnable_context to avoid spurious wakeups. Signed-off-by: Christoph Hellwig Signed-off-by: Arnd Bergmann Index: linux-2.6/arch/powerpc/platforms/cell/spufs/context.c =================================================================== --- linux-2.6.orig/arch/powerpc/platforms/cell/spufs/context.c +++ linux-2.6/arch/powerpc/platforms/cell/spufs/context.c @@ -57,7 +57,7 @@ struct spu_context *alloc_spu_context(st INIT_LIST_HEAD(&ctx->aff_list); if (gang) spu_gang_add_ctx(gang, ctx); - + ctx->cpus_allowed = current->cpus_allowed; spu_set_timeslice(ctx); goto out; out_free: Index: linux-2.6/arch/powerpc/platforms/cell/spufs/sched.c =================================================================== --- linux-2.6.orig/arch/powerpc/platforms/cell/spufs/sched.c +++ linux-2.6/arch/powerpc/platforms/cell/spufs/sched.c @@ -113,6 +113,16 @@ void __spu_update_sched_info(struct spu_ else ctx->prio = current->static_prio; ctx->policy = current->policy; + + /* + * A lot of places that don't hold active_mutex poke into + * cpus_allowed, including grab_runnable_context which + * already holds the runq_lock. So abuse runq_lock + * to protect this field aswell. + */ + spin_lock(&spu_prio->runq_lock); + ctx->cpus_allowed = current->cpus_allowed; + spin_unlock(&spu_prio->runq_lock); } void spu_update_sched_info(struct spu_context *ctx) @@ -124,16 +134,27 @@ void spu_update_sched_info(struct spu_co mutex_unlock(&spu_prio->active_mutex[node]); } -static inline int node_allowed(int node) +static int __node_allowed(struct spu_context *ctx, int node) { - cpumask_t mask; + if (nr_cpus_node(node)) { + cpumask_t mask = node_to_cpumask(node); - if (!nr_cpus_node(node)) - return 0; - mask = node_to_cpumask(node); - if (!cpus_intersects(mask, current->cpus_allowed)) - return 0; - return 1; + if (cpus_intersects(mask, ctx->cpus_allowed)) + return 1; + } + + return 0; +} + +static int node_allowed(struct spu_context *ctx, int node) +{ + int rval; + + spin_lock(&spu_prio->runq_lock); + rval = __node_allowed(ctx, node); + spin_unlock(&spu_prio->runq_lock); + + return rval; } /** @@ -331,7 +352,7 @@ static struct spu *spu_get_idle(struct s for (n = 0; n < MAX_NUMNODES; n++, node++) { node = (node < MAX_NUMNODES) ? node : 0; - if (!node_allowed(node)) + if (!node_allowed(ctx, node)) continue; spu = spu_alloc_node(node); if (spu) @@ -363,7 +384,7 @@ static struct spu *find_victim(struct sp node = cpu_to_node(raw_smp_processor_id()); for (n = 0; n < MAX_NUMNODES; n++, node++) { node = (node < MAX_NUMNODES) ? node : 0; - if (!node_allowed(node)) + if (!node_allowed(ctx, node)) continue; mutex_lock(&spu_prio->active_mutex[node]); @@ -458,23 +479,28 @@ int spu_activate(struct spu_context *ctx * Remove the highest priority context on the runqueue and return it * to the caller. Returns %NULL if no runnable context was found. */ -static struct spu_context *grab_runnable_context(int prio) +static struct spu_context *grab_runnable_context(int prio, int node) { - struct spu_context *ctx = NULL; + struct spu_context *ctx; int best; spin_lock(&spu_prio->runq_lock); best = sched_find_first_bit(spu_prio->bitmap); - if (best < prio) { + while (best < prio) { struct list_head *rq = &spu_prio->runq[best]; - BUG_ON(list_empty(rq)); - - ctx = list_entry(rq->next, struct spu_context, rq); - __spu_del_from_rq(ctx); + list_for_each_entry(ctx, rq, rq) { + /* XXX(hch): check for affinity here aswell */ + if (__node_allowed(ctx, node)) { + __spu_del_from_rq(ctx); + goto found; + } + } + best++; } + ctx = NULL; + found: spin_unlock(&spu_prio->runq_lock); - return ctx; } @@ -484,7 +510,7 @@ static int __spu_deactivate(struct spu_c struct spu_context *new = NULL; if (spu) { - new = grab_runnable_context(max_prio); + new = grab_runnable_context(max_prio, spu->node); if (new || force) { spu_remove_from_active_list(spu); spu_unbind_context(spu, ctx); @@ -536,9 +562,11 @@ static void spusched_tick(struct spu_con * tick and try again. */ if (mutex_trylock(&ctx->state_mutex)) { - struct spu_context *new = grab_runnable_context(ctx->prio + 1); + struct spu *spu = ctx->spu; + struct spu_context *new; + + new = grab_runnable_context(ctx->prio + 1, spu->node); if (new) { - struct spu *spu = ctx->spu; __spu_remove_from_active_list(spu); spu_unbind_context(spu, ctx); @@ -675,7 +703,8 @@ static inline int sched_spu(struct spu * } static struct spu * -aff_ref_location(int mem_aff, int group_size, int prio, int lowest_offset) +aff_ref_location(struct spu_context *ctx, int mem_aff, + int group_size, int lowest_offset) { struct spu *spu; int node, n; @@ -686,7 +715,7 @@ aff_ref_location(int mem_aff, int group_ node = cpu_to_node(raw_smp_processor_id()); for (n = 0; n < MAX_NUMNODES; n++, node++) { node = (node < MAX_NUMNODES) ? node : 0; - if (!node_allowed(node)) + if (!node_allowed(ctx, node)) continue; list_for_each_entry(spu, &be_spu_info[node].spus, be_list) { if ((!mem_aff || spu->has_mem_affinity) && @@ -716,8 +745,7 @@ static void aff_set_ref_point_location(s lowest_offset = ctx->aff_offset; } - gang->aff_ref_spu = aff_ref_location(mem_aff, gs, ctx->prio, - lowest_offset); + gang->aff_ref_spu = aff_ref_location(ctx, mem_aff, gs, lowest_offset); } static struct spu* ctx_location(struct spu *ref, int offset) Index: linux-2.6/arch/powerpc/platforms/cell/spufs/spufs.h =================================================================== --- linux-2.6.orig/arch/powerpc/platforms/cell/spufs/spufs.h +++ linux-2.6/arch/powerpc/platforms/cell/spufs/spufs.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -89,6 +90,7 @@ struct spu_context { unsigned long sched_flags; int policy; int prio; + cpumask_t cpus_allowed; struct list_head aff_list; int aff_head;