Subject: spu sched: fix cpu/node binding

From: Christoph Hellwig <hch@lst.de>
Add a cpus_allowed allowed filed to struct spu_context so that we always
use the cpu mask of the owning thread instead of the one happening to
call into the scheduler.  Also use this information in
grab_runnable_context to avoid spurious wakeups.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>

Index: linux-2.6/arch/powerpc/platforms/cell/spufs/context.c
===================================================================
--- linux-2.6.orig/arch/powerpc/platforms/cell/spufs/context.c
+++ linux-2.6/arch/powerpc/platforms/cell/spufs/context.c
@@ -57,7 +57,7 @@ struct spu_context *alloc_spu_context(st
 	INIT_LIST_HEAD(&ctx->aff_list);
 	if (gang)
 		spu_gang_add_ctx(gang, ctx);
-
+	ctx->cpus_allowed = current->cpus_allowed;
 	spu_set_timeslice(ctx);
 	goto out;
 out_free:
Index: linux-2.6/arch/powerpc/platforms/cell/spufs/sched.c
===================================================================
--- linux-2.6.orig/arch/powerpc/platforms/cell/spufs/sched.c
+++ linux-2.6/arch/powerpc/platforms/cell/spufs/sched.c
@@ -113,6 +113,16 @@ void __spu_update_sched_info(struct spu_
 	else
 		ctx->prio = current->static_prio;
 	ctx->policy = current->policy;
+
+	/*
+	 * A lot of places that don't hold active_mutex poke into
+	 * cpus_allowed, including grab_runnable_context which
+	 * already holds the runq_lock.  So abuse runq_lock
+	 * to protect this field aswell.
+	 */
+	spin_lock(&spu_prio->runq_lock);
+	ctx->cpus_allowed = current->cpus_allowed;
+	spin_unlock(&spu_prio->runq_lock);
 }
 
 void spu_update_sched_info(struct spu_context *ctx)
@@ -124,16 +134,27 @@ void spu_update_sched_info(struct spu_co
 	mutex_unlock(&spu_prio->active_mutex[node]);
 }
 
-static inline int node_allowed(int node)
+static int __node_allowed(struct spu_context *ctx, int node)
 {
-	cpumask_t mask;
+	if (nr_cpus_node(node)) {
+		cpumask_t mask = node_to_cpumask(node);
 
-	if (!nr_cpus_node(node))
-		return 0;
-	mask = node_to_cpumask(node);
-	if (!cpus_intersects(mask, current->cpus_allowed))
-		return 0;
-	return 1;
+		if (cpus_intersects(mask, ctx->cpus_allowed))
+			return 1;
+	}
+
+	return 0;
+}
+
+static int node_allowed(struct spu_context *ctx, int node)
+{
+	int rval;
+
+	spin_lock(&spu_prio->runq_lock);
+	rval = __node_allowed(ctx, node);
+	spin_unlock(&spu_prio->runq_lock);
+
+	return rval;
 }
 
 /**
@@ -331,7 +352,7 @@ static struct spu *spu_get_idle(struct s
 
 	for (n = 0; n < MAX_NUMNODES; n++, node++) {
 		node = (node < MAX_NUMNODES) ? node : 0;
-		if (!node_allowed(node))
+		if (!node_allowed(ctx, node))
 			continue;
 		spu = spu_alloc_node(node);
 		if (spu)
@@ -363,7 +384,7 @@ static struct spu *find_victim(struct sp
 	node = cpu_to_node(raw_smp_processor_id());
 	for (n = 0; n < MAX_NUMNODES; n++, node++) {
 		node = (node < MAX_NUMNODES) ? node : 0;
-		if (!node_allowed(node))
+		if (!node_allowed(ctx, node))
 			continue;
 
 		mutex_lock(&spu_prio->active_mutex[node]);
@@ -458,23 +479,28 @@ int spu_activate(struct spu_context *ctx
  * Remove the highest priority context on the runqueue and return it
  * to the caller.  Returns %NULL if no runnable context was found.
  */
-static struct spu_context *grab_runnable_context(int prio)
+static struct spu_context *grab_runnable_context(int prio, int node)
 {
-	struct spu_context *ctx = NULL;
+	struct spu_context *ctx;
 	int best;
 
 	spin_lock(&spu_prio->runq_lock);
 	best = sched_find_first_bit(spu_prio->bitmap);
-	if (best < prio) {
+	while (best < prio) {
 		struct list_head *rq = &spu_prio->runq[best];
 
-		BUG_ON(list_empty(rq));
-
-		ctx = list_entry(rq->next, struct spu_context, rq);
-		__spu_del_from_rq(ctx);
+		list_for_each_entry(ctx, rq, rq) {
+			/* XXX(hch): check for affinity here aswell */
+			if (__node_allowed(ctx, node)) {
+				__spu_del_from_rq(ctx);
+				goto found;
+			}
+		}
+		best++;
 	}
+	ctx = NULL;
+ found:
 	spin_unlock(&spu_prio->runq_lock);
-
 	return ctx;
 }
 
@@ -484,7 +510,7 @@ static int __spu_deactivate(struct spu_c
 	struct spu_context *new = NULL;
 
 	if (spu) {
-		new = grab_runnable_context(max_prio);
+		new = grab_runnable_context(max_prio, spu->node);
 		if (new || force) {
 			spu_remove_from_active_list(spu);
 			spu_unbind_context(spu, ctx);
@@ -536,9 +562,11 @@ static void spusched_tick(struct spu_con
 	 * tick and try again.
 	 */
 	if (mutex_trylock(&ctx->state_mutex)) {
-		struct spu_context *new = grab_runnable_context(ctx->prio + 1);
+ 		struct spu *spu = ctx->spu;
+		struct spu_context *new;
+
+		new = grab_runnable_context(ctx->prio + 1, spu->node);
 		if (new) {
- 			struct spu *spu = ctx->spu;
 
 			__spu_remove_from_active_list(spu);
 			spu_unbind_context(spu, ctx);
@@ -675,7 +703,8 @@ static inline int sched_spu(struct spu *
 }
 
 static struct spu *
-aff_ref_location(int mem_aff, int group_size, int prio, int lowest_offset)
+aff_ref_location(struct spu_context *ctx, int mem_aff,
+		 int group_size, int lowest_offset)
 {
 	struct spu *spu;
 	int node, n;
@@ -686,7 +715,7 @@ aff_ref_location(int mem_aff, int group_
 	node = cpu_to_node(raw_smp_processor_id());
 	for (n = 0; n < MAX_NUMNODES; n++, node++) {
 		node = (node < MAX_NUMNODES) ? node : 0;
-		if (!node_allowed(node))
+		if (!node_allowed(ctx, node))
 			continue;
 		list_for_each_entry(spu, &be_spu_info[node].spus, be_list) {
 			if ((!mem_aff || spu->has_mem_affinity) &&
@@ -716,8 +745,7 @@ static void aff_set_ref_point_location(s
 		lowest_offset = ctx->aff_offset;
 	}
 
-	gang->aff_ref_spu = aff_ref_location(mem_aff, gs, ctx->prio,
-							lowest_offset);
+	gang->aff_ref_spu = aff_ref_location(ctx, mem_aff, gs, lowest_offset);
 }
 
 static struct spu* ctx_location(struct spu *ref, int offset)
Index: linux-2.6/arch/powerpc/platforms/cell/spufs/spufs.h
===================================================================
--- linux-2.6.orig/arch/powerpc/platforms/cell/spufs/spufs.h
+++ linux-2.6/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -26,6 +26,7 @@
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/fs.h>
+#include <linux/cpumask.h>
 
 #include <asm/spu.h>
 #include <asm/spu_csa.h>
@@ -89,6 +90,7 @@ struct spu_context {
 	unsigned long sched_flags;
 	int policy;
 	int prio;
+	cpumask_t cpus_allowed;
 
 	struct list_head aff_list;
 	int aff_head;