Index: linux-2.6/include/linux/slub_def.h
===================================================================
--- linux-2.6.orig/include/linux/slub_def.h	2006-12-09 18:17:58.000000000 -0800
+++ linux-2.6/include/linux/slub_def.h	2006-12-09 18:25:04.000000000 -0800
@@ -11,6 +11,20 @@
 #include <linux/workqueue.h>
 
 /*
+ * Per cpu structure to manage active slabs.
+ */
+struct active_slab {
+	struct page *page;
+	struct kmem_cache *slab;
+	int offset;
+	int size;
+	int referenced;
+#ifdef CONFIG_SMP
+	int flush_active;
+	struct delayed_work flush;
+#endif
+}  ____cacheline_aligned_in_smp;
+/*
  * Slab cache management.
  */
 struct kmem_cache {
@@ -18,7 +32,6 @@
 	struct list_head partial;
 	unsigned long nr_partial;
 	int offset;		/* Free pointer offset. */
-	struct page *active[NR_CPUS];
 	atomic_long_t nr_slabs;	/* Total slabs used */
 	unsigned int order;	/* Size of the slab page */
 	unsigned long flags;
@@ -33,11 +46,7 @@
 	int inuse;		/* Used portion of the chunk */
 	const char *name;	/* Name (only for display!) */
 	struct list_head list;	/* List of slabs */
-#ifdef CONFIG_SMP
-	struct mutex flushing;
-	atomic_t active_cpus;		/* if >0 then flusher is scheduled */
-	struct delayed_work flush;
-#endif
+	struct active_slab active[NR_CPUS]  ____cacheline_aligned_in_smp;
 };
 
 /*
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c	2006-12-09 18:18:05.000000000 -0800
+++ linux-2.6/mm/slub.c	2006-12-09 18:32:36.000000000 -0800
@@ -448,14 +448,15 @@
 /*
  * Remove the currently active slab
  */
-static void __always_inline deactivate_slab(struct kmem_cache *s,
-						struct page *page, int cpu)
+static void __always_inline deactivate_slab(struct active_slab *a)
 {
-	s->active[cpu] = NULL;
+	struct page *page = a->page;
+
+	a->page = NULL;
+	a->referenced = 0;
 	__ClearPageActive(page);
-	__ClearPageReferenced(page);
 
-	putback_slab(s, page);
+	putback_slab(a->slab, page);
 }
 
 /*
@@ -465,13 +466,12 @@
 static void flush_active(void *d)
 {
 	struct kmem_cache *s = d;
-	int cpu = smp_processor_id();
-	struct page *page = s->active[cpu];
+	struct active_slab *a = &s->active[smp_processor_id()];
 
-	page = s->active[cpu];
-	if (likely(page)) {
-		slab_lock(page);
-		deactivate_slab(s, page, cpu);
+	if (likely(a->page)) {
+		slab_lock(a->page);
+		deactivate_slab(a);
+		a->flush_active = 0;
 	}
 }
 
@@ -479,50 +479,31 @@
 /*
  * Called from IPI during flushing to check and flush active slabs.
  */
-void check_flush_active(void *d)
+void check_flush_active(struct work_struct *w)
 {
-	struct kmem_cache *s = d;
-	int cpu = smp_processor_id();
-	struct page *page = s->active[cpu];
+	struct active_slab *a = container_of(w, struct active_slab, flush.work);
 
-	if (!page)
+	if (!a->page)
 		return;
 
-	if (PageReferenced(page)) {
-		ClearPageReferenced(page);
-		atomic_inc(&s->active_cpus);
+	if (a->referenced) {
+		a->referenced = 0;
+		a->flush_active = 1;
+		schedule_delayed_work(&a->flush, 2 * HZ);
 	} else {
-		slab_lock(page);
-		deactivate_slab(s, page, cpu);
+		slab_lock(a->page);
+		deactivate_slab(a);
+		a->flush_active = 0;
 	}
 }
 
 /*
  * Called from eventd
  */
-static void flusher(struct work_struct *w)
-{
-	struct kmem_cache *s = container_of(w, struct kmem_cache, flush.work);
-
-	if (!mutex_trylock(&s->flushing))
-		return;
-
-	atomic_set(&s->active_cpus, num_online_cpus());
-	on_each_cpu(check_flush_active, s, 1, 1);
-	if (atomic_read(&s->active_cpus))
-		schedule_delayed_work(&s->flush, 2 * HZ);
-	mutex_unlock(&s->flushing);
-}
-
 static void drain_all(struct kmem_cache *s)
 {
-	if (atomic_read(&s->active_cpus)) {
-		mutex_lock(&s->flushing);
-		cancel_delayed_work(&s->flush);
-		atomic_set(&s->active_cpus, 0);
-		on_each_cpu(flush_active, s, 1, 1);
-		mutex_unlock(&s->flushing);
-	}
+	on_each_cpu(flush_active, s , 1, 1);
+
 }
 #else
 static void drain_all(struct kmem_cache *s)
@@ -530,7 +511,7 @@
 	unsigned long flags;
 
 	local_irq_save(flags);
-	flush_active(s);
+	flush_active(&s->active[0]);
 	local_irq_restore(flags);
 }
 #endif
@@ -538,36 +519,35 @@
 static __always_inline void *__slab_alloc(struct kmem_cache *s,
 					gfp_t gfpflags, int node)
 {
+	struct active_slab *a;
 	struct page *page;
 	void **object;
 	void *next_object;
 	unsigned long flags;
-	int cpu;
 
 	local_irq_save(flags);
-	cpu = smp_processor_id();
-	page = s->active[cpu];
-	if (!page)
+	a = &s->active[smp_processor_id()];
+	if (!a->page)
 		goto new_slab;
 
-	slab_lock(page);
-	check_free_chain(s, page);
-	if (unlikely(!page->freelist))
+	slab_lock(a->page);
+	check_free_chain(s, a->page);
+	if (unlikely(!a->page->freelist))
 		goto another_slab;
 
-	if (unlikely(node != -1 && page_to_nid(page) != node))
+	if (unlikely(node != -1 && page_to_nid(a->page) != node))
 		goto another_slab;
 redo:
-	page->inuse++;
-	object = page->freelist;
-	page->freelist = next_object = object[page->offset];
-	__SetPageReferenced(page);
-	slab_unlock(page);
+	a->page->inuse++;
+	object = a->page->freelist;
+	a->page->freelist = next_object = object[a->offset];
+	a->referenced = 1;
+	slab_unlock(a->page);
 	local_irq_restore(flags);
 	return object;
 
 another_slab:
-	deactivate_slab(s, page, cpu);
+	deactivate_slab(a);
 
 new_slab:
 	page = get_partial(s, gfpflags, node);
@@ -586,27 +566,26 @@
 	 */
 	if (unlikely(s->objects == 1)) {
 		local_irq_restore(flags);
-		return page_address(page);
+		return page_address(a->page);
 	}
 
-	slab_lock(page);
+	slab_lock(a->page);
 
 gotpage:
-	if (s->active[cpu]) {
+	if (a->page) {
 		slab_unlock(page);
 		discard_slab(s, page);
-		page = s->active[cpu];
-		slab_lock(page);
+		slab_lock(a->page);
 	} else
-		s->active[cpu] = page;
+		a->page = page;
 
-	__SetPageActive(page);
-	check_free_chain(s, page);
+	__SetPageActive(a->page);
+	check_free_chain(s, a->page);
 
 #ifdef CONFIG_SMP
-	if (keventd_up() && !atomic_read(&s->active_cpus)) {
-		atomic_inc(&s->active_cpus);
-		schedule_delayed_work(&s->flush, 2 * HZ);
+	if (keventd_up() && !a->flush_active) {
+		a->flush_active = 1;
+		schedule_delayed_work(&a->flush, 2 * HZ);
 	}
 #endif
 	goto redo;
@@ -637,7 +616,6 @@
 		return;
 
 	page = virt_to_page(x);
-
 	if (unlikely(PageCompound(page)))
 		page = page->first_page;
 
@@ -821,14 +799,7 @@
 	atomic_long_set(&s->nr_slabs, 0);
 	atomic_set(&s->refcount, 1);
 	spin_lock_init(&s->list_lock);
-	for_each_possible_cpu(cpu)
-		s->active[cpu] = NULL;
 	INIT_LIST_HEAD(&s->partial);
-#ifdef CONFIG_SMP
-	mutex_init(&s->flushing);
-	atomic_set(&s->active_cpus, 0);
-	INIT_DELAYED_WORK(&s->flush, flusher);
-#endif
 	s->name = name;
 	s->ctor = ctor;
 	s->dtor = dtor;
@@ -867,6 +838,19 @@
 	if (!s->objects)
 		goto error;
 
+	for_each_possible_cpu(cpu) {
+		struct active_slab *a = &s->active[cpu];
+
+		a->page = NULL;
+		a->size = size;
+		a->offset = s->offset;
+		a->slab = s;
+#ifdef CONFIG_SMP
+		a->flush_active = 0;
+		INIT_DELAYED_WORK(&a->flush, check_flush_active);
+#endif
+	}
+
 	register_slab(s);
 	return 1;
 
@@ -1093,11 +1077,11 @@
 	int cpu;
 
 	for_each_possible_cpu(cpu) {
-		struct page *page = s->active[cpu];
+		struct active_slab *a = &s->active[cpu];
 
-		if (page) {
+		if (a->page) {
 			nr_active++;
-			active += page->inuse;
+			active += a->page->inuse;
 		}
 	}