Index: linux-2.6/include/linux/slab_def.h
===================================================================
--- linux-2.6.orig/include/linux/slab_def.h	2009-10-24 14:43:16.000000000 -0500
+++ linux-2.6/include/linux/slab_def.h	2009-10-24 14:47:17.000000000 -0500
@@ -24,7 +24,7 @@
 
 struct kmem_cache {
 /* 1) per-cpu data, touched during every alloc/free */
-	struct array_cache *array[NR_CPUS];
+	struct array_cache *cpu_array;
 /* 2) Cache tunables. Protected by cache_chain_mutex */
 	unsigned int batchcount;
 	unsigned int limit;
Index: linux-2.6/mm/slab.c
===================================================================
--- linux-2.6.orig/mm/slab.c	2009-10-24 14:43:16.000000000 -0500
+++ linux-2.6/mm/slab.c	2009-10-24 14:48:39.000000000 -0500
@@ -588,11 +588,6 @@
 #undef CACHE
 };
 
-static struct arraycache_init initarray_cache __initdata =
-    { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
-static struct arraycache_init initarray_generic =
-    { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
-
 /* internal cache of cache description objs */
 static struct kmem_cache cache_cache = {
 	.batchcount = 1,
@@ -689,7 +684,7 @@
 
 static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
 {
-	return cachep->array[smp_processor_id()];
+	return this_cpu_ptr(cachep->cpu_array);
 }
 
 static inline struct kmem_cache *__find_general_cachep(size_t size,
@@ -878,13 +873,22 @@
 	}
 }
 
-static struct array_cache *alloc_arraycache(int node, int entries,
-					    int batchcount, gfp_t gfp)
+static struct array_cache *alloc_arraycache(int node, int entries, gfp_t gfp)
+{
+	int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
+
+	return kmalloc_node(memsize, gfp, node);
+}
+
+static struct array_cache *alloc_cpu_arraycache(int entries, gfp_t gfp)
 {
 	int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
-	struct array_cache *nc = NULL;
 
-	nc = kmalloc_node(memsize, gfp, node);
+	return __alloc_percpu(memsize, __alignof__ (struct array_cache));
+}
+
+static void init_arraycache(struct array_cache *nc, int entries, int batchcount)
+{
 	/*
 	 * The array_cache structures contain pointers to free object.
 	 * However, when such objects are allocated or transfered to another
@@ -900,9 +904,9 @@
 		nc->touched = 0;
 		spin_lock_init(&nc->lock);
 	}
-	return nc;
 }
 
+
 /*
  * Transfer objects in one arraycache to another.
  * Locking must be handled by the caller.
@@ -978,13 +982,14 @@
 				ac_ptr[i] = NULL;
 				continue;
 			}
-			ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp);
+			ac_ptr[i] = alloc_arraycache(node, limit, gfp);
 			if (!ac_ptr[i]) {
 				for (i--; i >= 0; i--)
 					kfree(ac_ptr[i]);
 				kfree(ac_ptr);
 				return NULL;
 			}
+			init_arraycache(ac_ptr[i], limit, 0xbaadf00d);
 		}
 	}
 	return ac_ptr;
@@ -1106,8 +1111,7 @@
 		struct array_cache **alien;
 
 		/* cpu is dead; no one can alloc from it. */
-		nc = cachep->array[cpu];
-		cachep->array[cpu] = NULL;
+		nc = per_cpu_ptr(cachep->cpu_array, cpu);
 		l3 = cachep->nodelists[node];
 
 		if (!l3)
@@ -1206,32 +1210,31 @@
 	 * array caches
 	 */
 	list_for_each_entry(cachep, &cache_chain, next) {
-		struct array_cache *nc;
 		struct array_cache *shared = NULL;
 		struct array_cache **alien = NULL;
 
-		nc = alloc_arraycache(node, cachep->limit,
-					cachep->batchcount, GFP_KERNEL);
-		if (!nc)
-			goto bad;
+		init_arraycache(this_cpu_ptr(cachep->cpu_array), cachep->limit, cachep->batchcount);
+
 		if (cachep->shared) {
 			shared = alloc_arraycache(node,
 				cachep->shared * cachep->batchcount,
-				0xbaadf00d, GFP_KERNEL);
+				GFP_KERNEL);
 			if (!shared) {
-				kfree(nc);
+				free_percpu(cachep->cpu_array);
 				goto bad;
 			}
+			init_arraycache(shared,
+				cachep->shared * cachep->batchcount,
+				0xbaadf00d);
 		}
 		if (use_alien_caches) {
 			alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);
 			if (!alien) {
 				kfree(shared);
-				kfree(nc);
+				free_percpu(cachep->cpu_array);
 				goto bad;
 			}
 		}
-		cachep->array[cpu] = nc;
 		l3 = cachep->nodelists[node];
 		BUG_ON(!l3);
 
@@ -1413,7 +1416,8 @@
 	INIT_LIST_HEAD(&cache_chain);
 	list_add(&cache_cache.next, &cache_chain);
 	cache_cache.colour_off = cache_line_size();
-	cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
+	cache_cache.cpu_array = alloc_cpu_arraycache(BOOT_CPUCACHE_ENTRIES, GFP_KERNEL);
+	init_arraycache(this_cpu_ptr(cache_cache.cpu_array), BOOT_CPUCACHE_ENTRIES, 1);
 	cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
 
 	/*
@@ -1496,36 +1500,6 @@
 		sizes++;
 		names++;
 	}
-	/* 4) Replace the bootstrap head arrays */
-	{
-		struct array_cache *ptr;
-
-		ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
-
-		BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
-		memcpy(ptr, cpu_cache_get(&cache_cache),
-		       sizeof(struct arraycache_init));
-		/*
-		 * Do not assume that spinlocks can be initialized via memcpy:
-		 */
-		spin_lock_init(&ptr->lock);
-
-		cache_cache.array[smp_processor_id()] = ptr;
-
-		ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
-
-		BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)
-		       != &initarray_generic.cache);
-		memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
-		       sizeof(struct arraycache_init));
-		/*
-		 * Do not assume that spinlocks can be initialized via memcpy:
-		 */
-		spin_lock_init(&ptr->lock);
-
-		malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
-		    ptr;
-	}
 	/* 5) Replace the bootstrap kmem_list3's */
 	{
 		int nid;
@@ -1914,8 +1888,7 @@
 	int i;
 	struct kmem_list3 *l3;
 
-	for_each_online_cpu(i)
-	    kfree(cachep->array[i]);
+	free_percpu(cachep->cpu_array);
 
 	/* NUMA: free the list3 structures */
 	for_each_online_node(i) {
@@ -2011,7 +1984,8 @@
 		 * that's used by kmalloc(24), otherwise the creation of
 		 * further caches will BUG().
 		 */
-		cachep->array[smp_processor_id()] = &initarray_generic.cache;
+		cachep->cpu_array = alloc_cpu_arraycache(BOOT_CPUCACHE_ENTRIES,
+				GFP_KERNEL);
 
 		/*
 		 * If the cache that's used by kmalloc(sizeof(kmem_list3)) is
@@ -2024,8 +1998,8 @@
 		else
 			g_cpucache_up = PARTIAL_AC;
 	} else {
-		cachep->array[smp_processor_id()] =
-			kmalloc(sizeof(struct arraycache_init), gfp);
+		cachep->cpu_array = alloc_cpu_arraycache(BOOT_CPUCACHE_ENTRIES,
+				GFP_KERNEL);
 
 		if (g_cpucache_up == PARTIAL_AC) {
 			set_up_list3s(cachep, SIZE_L3);
@@ -3807,11 +3781,13 @@
 		if (cachep->shared) {
 			new_shared = alloc_arraycache(node,
 				cachep->shared*cachep->batchcount,
-					0xbaadf00d, gfp);
+					gfp);
 			if (!new_shared) {
 				free_alien_cache(new_alien);
 				goto fail;
 			}
+			init_arraycache(new_shared, cachep->shared * cachep->batchcount,
+					0xbaadf00d);
 		}
 
 		l3 = cachep->nodelists[node];
@@ -3873,47 +3849,26 @@
 	return -ENOMEM;
 }
 
-struct ccupdate_struct {
-	struct kmem_cache *cachep;
-	struct array_cache *new[NR_CPUS];
-};
-
-static void do_ccupdate_local(void *info)
-{
-	struct ccupdate_struct *new = info;
-	struct array_cache *old;
-
-	check_irq_off();
-	old = cpu_cache_get(new->cachep);
-
-	new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
-	new->new[smp_processor_id()] = old;
-}
-
 /* Always called with the cache_chain_mutex held */
 static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
 				int batchcount, int shared, gfp_t gfp)
 {
-	struct ccupdate_struct *new;
+	struct array_cache *new;
+	struct array_cache *old;
 	int i;
 
-	new = kzalloc(sizeof(*new), gfp);
+	new = alloc_cpu_arraycache(limit, gfp);
 	if (!new)
 		return -ENOMEM;
 
-	for_each_online_cpu(i) {
-		new->new[i] = alloc_arraycache(cpu_to_node(i), limit,
-						batchcount, gfp);
-		if (!new->new[i]) {
-			for (i--; i >= 0; i--)
-				kfree(new->new[i]);
-			kfree(new);
-			return -ENOMEM;
-		}
-	}
-	new->cachep = cachep;
+	for_each_online_cpu(i)
+		init_arraycache(per_cpu_ptr(new, i), limit, batchcount);
 
-	on_each_cpu(do_ccupdate_local, (void *)new, 1);
+	/* FIX ME: Cannot do invidual transition anymore. Race with slab operations on each
+	 * cpu.
+	 */
+	old = cachep->cpu_array;
+	cachep->cpu_array = new;
 
 	check_irq_on();
 	cachep->batchcount = batchcount;
@@ -3921,15 +3876,14 @@
 	cachep->shared = shared;
 
 	for_each_online_cpu(i) {
-		struct array_cache *ccold = new->new[i];
+		struct array_cache *ccold = per_cpu_ptr(old, i);
 		if (!ccold)
 			continue;
 		spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
 		free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));
 		spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
-		kfree(ccold);
 	}
-	kfree(new);
+	free_percpu(old);
 	return alloc_kmemlist(cachep, gfp);
 }