cache_reap(): Further reduction in interrupt holdoff

cache_reap takes the l3->list_lock (disabling interrupts) unconditionally and
then does a few checks and maybe does some cleanup. This patch makes
cache_reap() only take the lock if there is work to do and then the lock
is taken and released for each cleaning action.

The checking of when to do the next reaping is done without any locking
and becomes racy. Should not matter since reaping can also be skipped
if the slab mutex cannot be acquired.

The same is true for the touched processing. If we get this wrong once
in awhile then we will mistakenly clean or not clean the shared cache.
This will impact performance slightly.

Signed-off-by: Christoph Lameter <clameter@sgi.com>

Index: linux-2.6.16-rc5-mm2/mm/slab.c
===================================================================
--- linux-2.6.16-rc5-mm2.orig/mm/slab.c	2006-03-03 07:50:57.000000000 -0800
+++ linux-2.6.16-rc5-mm2/mm/slab.c	2006-03-03 15:26:45.000000000 -0800
@@ -293,13 +293,13 @@ struct kmem_list3 {
 	struct list_head slabs_full;
 	struct list_head slabs_free;
 	unsigned long free_objects;
-	unsigned long next_reap;
-	int free_touched;
 	unsigned int free_limit;
 	unsigned int colour_next;	/* Per-node cache coloring */
 	spinlock_t list_lock;
 	struct array_cache *shared;	/* shared per node */
 	struct array_cache **alien;	/* on other nodes */
+	unsigned long next_reap;	/* updated without locking */
+	int free_touched;		/* updated without locking */
 };
 
 /*
@@ -701,8 +701,7 @@ static enum {
 
 static DEFINE_PER_CPU(struct work_struct, reap_work);
 
-static void free_block(struct kmem_cache *cachep, void **objpp, int len,
-			int node);
+static void free_block(struct kmem_cache *cachep, void **objpp, int len);
 static void enable_cpucache(struct kmem_cache *cachep);
 static void cache_reap(void *unused);
 static int __node_shrink(struct kmem_cache *cachep, int node);
@@ -942,16 +941,20 @@ static void free_alien_cache(struct arra
 	kfree(ac_ptr);
 }
 
-static void __drain_alien_cache(struct kmem_cache *cachep,
-				struct array_cache *ac, int node)
+static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
+			struct array_cache *ac, int force);
+
+static void drain_array_locked(struct kmem_cache *cachep,
+			struct kmem_list3 *l3,
+			struct array_cache *ac,
+			int force)
 {
-	struct kmem_list3 *rl3 = cachep->nodelists[node];
+	unsigned long flags;
 
-	if (ac->avail) {
-		spin_lock(&rl3->list_lock);
-		free_block(cachep, ac->entry, ac->avail, node);
-		ac->avail = 0;
-		spin_unlock(&rl3->list_lock);
+	if (ac && ac->avail) {
+		spin_lock_irqsave(&ac->lock, flags);
+		drain_array(cachep, l3, ac, 0);
+		spin_unlock_irqrestore(&ac->lock, flags);
 	}
 }
 
@@ -962,35 +965,23 @@ static void reap_alien(struct kmem_cache
 {
 	int node = __get_cpu_var(reap_node);
 
-	if (l3->alien) {
-		struct array_cache *ac = l3->alien[node];
-		if (ac && ac->avail) {
-			spin_lock_irq(&ac->lock);
-			__drain_alien_cache(cachep, ac, node);
-			spin_unlock_irq(&ac->lock);
-		}
-	}
+	drain_array_locked(cachep, l3, l3->alien[node], 0);
 }
 
 static void drain_alien_cache(struct kmem_cache *cachep,
 				struct array_cache **alien)
 {
-	int i = 0;
-	struct array_cache *ac;
-	unsigned long flags;
+	int i;
+	struct kmem_list3 *l3 = cachep->nodelists[numa_node_id()];
 
-	for_each_online_node(i) {
-		ac = alien[i];
-		if (ac) {
-			spin_lock_irqsave(&ac->lock, flags);
-			__drain_alien_cache(cachep, ac, i);
-			spin_unlock_irqrestore(&ac->lock, flags);
-		}
-	}
+	if (!l3->alien)
+		return;
+
+	for_each_online_node(i)
+		drain_array_locked(cachep, l3, alien[i], 1);
 }
 #else
 
-#define drain_alien_cache(cachep, alien) do { } while (0)
 #define reap_alien(cachep, l3) do { } while (0)
 
 static inline struct array_cache **alloc_alien_cache(int node, int limit)
@@ -1135,7 +1126,7 @@ static int __devinit cpuup_callback(stru
 			/* Free limit for this kmem_list3 */
 			l3->free_limit -= cachep->batchcount;
 			if (nc)
-				free_block(cachep, nc->entry, nc->avail, node);
+				free_block(cachep, nc->entry, nc->avail);
 
 			if (!cpus_empty(mask)) {
 				spin_unlock_irq(&l3->list_lock);
@@ -1145,7 +1136,7 @@ static int __devinit cpuup_callback(stru
 			shared = l3->shared;
 			if (shared) {
 				free_block(cachep, l3->shared->entry,
-					   l3->shared->avail, node);
+					   l3->shared->avail);
 				l3->shared = NULL;
 			}
 
@@ -1155,10 +1146,8 @@ static int __devinit cpuup_callback(stru
 			spin_unlock_irq(&l3->list_lock);
 
 			kfree(shared);
-			if (alien) {
-				drain_alien_cache(cachep, alien);
-				free_alien_cache(alien);
-			}
+			drain_alien_cache(cachep, alien);
+			free_alien_cache(alien);
 free_array_cache:
 			kfree(nc);
 		}
@@ -2125,21 +2114,13 @@ static void check_spinlock_acquired_node
 #define check_spinlock_acquired_node(x, y) do { } while(0)
 #endif
 
-static void drain_array_locked(struct kmem_cache *cachep,
-			struct array_cache *ac, int force, int node);
-
 static void do_drain(void *arg)
 {
 	struct kmem_cache *cachep = arg;
-	struct array_cache *ac;
-	int node = numa_node_id();
 
 	check_irq_off();
-	ac = cpu_cache_get(cachep);
-	spin_lock(&cachep->nodelists[node]->list_lock);
-	free_block(cachep, ac->entry, ac->avail, node);
-	spin_unlock(&cachep->nodelists[node]->list_lock);
-	ac->avail = 0;
+	drain_array(cachep, cachep->nodelists[numa_node_id()],
+			 cpu_cache_get(cachep), 1);
 }
 
 static void drain_cpu_caches(struct kmem_cache *cachep)
@@ -2152,11 +2133,8 @@ static void drain_cpu_caches(struct kmem
 	for_each_online_node(node) {
 		l3 = cachep->nodelists[node];
 		if (l3) {
-			spin_lock_irq(&l3->list_lock);
-			drain_array_locked(cachep, l3->shared, 1, node);
-			spin_unlock_irq(&l3->list_lock);
-			if (l3->alien)
-				drain_alien_cache(cachep, l3->alien);
+			drain_array(cachep, l3, l3->shared, 1);
+			drain_alien_cache(cachep, l3->alien);
 		}
 	}
 }
@@ -2946,8 +2924,7 @@ done:
 /*
  * Caller needs to acquire correct kmem_list's list_lock
  */
-static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
-		       int node)
+static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects)
 {
 	int i;
 	struct kmem_list3 *l3;
@@ -2955,8 +2932,10 @@ static void free_block(struct kmem_cache
 	for (i = 0; i < nr_objects; i++) {
 		void *objp = objpp[i];
 		struct slab *slabp;
+		int node;
 
 		slabp = virt_to_slab(objp);
+		node = slabp->nodeid;
 		l3 = cachep->nodelists[node];
 		list_del(&slabp->list);
 		check_spinlock_acquired_node(cachep, node);
@@ -3010,7 +2989,7 @@ static void cache_flusharray(struct kmem
 		}
 	}
 
-	free_block(cachep, ac->entry, batchcount, node);
+	free_block(cachep, ac->entry, batchcount);
 free_done:
 #if STATS
 	{
@@ -3064,14 +3043,13 @@ static inline void __cache_free(struct k
 				alien = l3->alien[nodeid];
 				spin_lock(&alien->lock);
 				if (unlikely(alien->avail == alien->limit))
-					__drain_alien_cache(cachep,
-							    alien, nodeid);
+					drain_array(cachep, l3, alien, 0);
 				alien->entry[alien->avail++] = objp;
 				spin_unlock(&alien->lock);
 			} else {
 				spin_lock(&(cachep->nodelists[nodeid])->
 					  list_lock);
-				free_block(cachep, &objp, 1, nodeid);
+				free_block(cachep, &objp, 1);
 				spin_unlock(&(cachep->nodelists[nodeid])->
 					    list_lock);
 			}
@@ -3403,7 +3381,7 @@ static int alloc_kmemlist(struct kmem_ca
 
 			nc = cachep->nodelists[node]->shared;
 			if (nc)
-				free_block(cachep, nc->entry, nc->avail, node);
+				free_block(cachep, nc->entry, nc->avail);
 
 			l3->shared = new;
 			if (!cachep->nodelists[node]->alien) {
@@ -3484,7 +3462,7 @@ static int do_tune_cpucache(struct kmem_
 		if (!ccold)
 			continue;
 		spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
-		free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));
+		free_block(cachep, ccold->entry, ccold->avail);
 		spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
 		kfree(ccold);
 	}
@@ -3553,23 +3531,28 @@ static void enable_cpucache(struct kmem_
 		       cachep->name, -err);
 }
 
-static void drain_array_locked(struct kmem_cache *cachep,
-				struct array_cache *ac, int force, int node)
+static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
+			struct array_cache *ac, int force)
 {
 	int tofree;
 
-	check_spinlock_acquired_node(cachep, node);
+	/* Skip draining if the array has no elements */
+	if (!ac || !ac->avail)
+		return;
+
+	spin_lock_irq(&l3->list_lock);
 	if (ac->touched && !force) {
 		ac->touched = 0;
 	} else if (ac->avail) {
 		tofree = force ? ac->avail : (ac->limit + 4) / 5;
 		if (tofree > ac->avail)
 			tofree = (ac->avail + 1) / 2;
-		free_block(cachep, ac->entry, tofree, node);
+		free_block(cachep, ac->entry, tofree);
 		ac->avail -= tofree;
 		memmove(ac->entry, &(ac->entry[tofree]),
 			sizeof(void *) * ac->avail);
 	}
+	spin_unlock_irq(&l3->list_lock);
 }
 
 /**
@@ -3605,33 +3588,48 @@ static void cache_reap(void *unused)
 		searchp = list_entry(walk, struct kmem_cache, next);
 		check_irq_on();
 
+		/*
+		 * We only take the l3 lock if absolutely necessary and we
+		 * have established with reasonable certainty that
+		 * we can do some work if the lock was obtained.
+		 */
 		l3 = searchp->nodelists[numa_node_id()];
+
 		reap_alien(searchp, l3);
-		spin_lock_irq(&l3->list_lock);
 
-		drain_array_locked(searchp, cpu_cache_get(searchp), 0,
-				   numa_node_id());
+		drain_array(searchp, l3, cpu_cache_get(searchp), 0);
 
+		/*
+		 * These are racy checks but it does not matter
+		 * if we skip one check or scan twice.
+		 */
 		if (time_after(l3->next_reap, jiffies))
-			goto next_unlock;
+			goto next;
 
 		l3->next_reap = jiffies + REAPTIMEOUT_LIST3;
 
-		if (l3->shared)
-			drain_array_locked(searchp, l3->shared, 0,
-					   numa_node_id());
+		drain_array(searchp, l3, l3->shared, 0);
 
 		if (l3->free_touched) {
 			l3->free_touched = 0;
-			goto next_unlock;
+			goto next;
 		}
 
 		tofree = (l3->free_limit + 5 * searchp->num - 1) /
 				(5 * searchp->num);
 		do {
+			/*
+			 * Do not lock if there are no free blocks.
+			 */
+			if (list_empty(&l3->slabs_free))
+				break;
+
+			spin_lock_irq(&l3->list_lock);
 			p = l3->slabs_free.next;
-			if (p == &(l3->slabs_free))
+			if (p == &(l3->slabs_free)) {
+				spin_unlock_irq(&l3->list_lock);
 				break;
+			}
 
 			slabp = list_entry(p, struct slab, list);
 			BUG_ON(slabp->inuse);
@@ -3646,10 +3644,8 @@ static void cache_reap(void *unused)
 			l3->free_objects -= searchp->num;
 			spin_unlock_irq(&l3->list_lock);
 			slab_destroy(searchp, slabp);
-			spin_lock_irq(&l3->list_lock);
 		} while (--tofree > 0);
-next_unlock:
-		spin_unlock_irq(&l3->list_lock);
+next:
 		cond_resched();
 	}
 	check_irq_on();