Index: linux-2.6.21-rc5/include/linux/mmzone.h
===================================================================
--- linux-2.6.21-rc5.orig/include/linux/mmzone.h	2007-03-26 20:07:51.000000000 -0700
+++ linux-2.6.21-rc5/include/linux/mmzone.h	2007-03-28 20:34:54.000000000 -0700
@@ -24,6 +24,15 @@
 #endif
 #define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1))
 
+#define MAX_SLAB_CACHES 256
+
+struct kmem_cache_node {
+	spinlock_t list_lock;	/* Protect partial list and nr_partial */
+	unsigned long nr_partial;
+	atomic_long_t nr_slabs;
+	struct list_head partial;
+};
+
 struct free_area {
 	struct list_head	free_list;
 	unsigned long		nr_free;
@@ -449,6 +458,9 @@
 	wait_queue_head_t kswapd_wait;
 	struct task_struct *kswapd;
 	int kswapd_max_order;
+#ifdef CONFIG_SLUB
+	struct kmem_cache_node	slabs[MAX_SLAB_CACHES];
+#endif
 } pg_data_t;
 
 #define node_present_pages(nid)	(NODE_DATA(nid)->node_present_pages)
Index: linux-2.6.21-rc5/include/linux/slub_def.h
===================================================================
--- linux-2.6.21-rc5.orig/include/linux/slub_def.h	2007-03-28 20:19:19.000000000 -0700
+++ linux-2.6.21-rc5/include/linux/slub_def.h	2007-03-28 20:34:54.000000000 -0700
@@ -11,13 +11,6 @@
 #include <linux/workqueue.h>
 #include <linux/kobject.h>
 
-struct kmem_cache_node {
-	spinlock_t list_lock;	/* Protect partial list and nr_partial */
-	unsigned long nr_partial;
-	atomic_long_t nr_slabs;
-	struct list_head partial;
-};
-
 /*
  * Slab cache management.
  */
@@ -29,13 +22,6 @@
 	int offset;		/* Free pointer offset. */
 	atomic_t cpu_slabs;	/* != 0 -> flusher scheduled. */
 	int defrag_ratio;
-
-	/*
-	 * Avoid an extra cache line for UP, SMP and for the node local to
-	 * struct kmem_cache.
-	 */
-	struct kmem_cache_node local_node;
-
 	/* Allocation and freeing of slabs */
 	unsigned int order;
 	int objects;		/* Number of objects in slab */
@@ -52,10 +38,6 @@
 	struct delayed_work flush;
 	struct mutex flushing;
 #endif
-#ifdef CONFIG_NUMA
-	struct kmem_cache_node *node[MAX_NUMNODES];
-#endif
-	struct page *cpu_slab[NR_CPUS];
 };
 
 /*
@@ -72,13 +54,11 @@
 #define KMALLOC_EXTRAS 0
 #endif
 
-#define KMALLOC_NR_CACHES (KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW \
-			 + 1 + KMALLOC_EXTRAS)
 /*
  * We keep the general caches in an array of slab caches that are used for
  * 2^x bytes of allocations.
  */
-extern struct kmem_cache kmalloc_caches[KMALLOC_NR_CACHES];
+extern struct kmem_cache slub_caches[MAX_SLAB_CACHES];
 
 /*
  * Sorry that the following has to be that ugly but some versions of GCC
@@ -86,12 +66,10 @@
  */
 static inline int kmalloc_index(int size)
 {
-#ifdef KMALLOC_EXTRA
 	if (size > 64 && size <= 96)
-		return KMALLOC_SHIFT_HIGH + 1;
+		return 1;
 	if (size > 128 && size <= 192)
-		return KMALLOC_SHIFT_HIGH + 2;
-#endif
+		return 2;
 	if (size <=          8) return 3;
 	if (size <=         16) return 4;
 	if (size <=         32) return 5;
@@ -128,7 +106,7 @@
  */
 static inline struct kmem_cache *kmalloc_slab(size_t size)
 {
-	int index = kmalloc_index(size) - KMALLOC_SHIFT_LOW;
+	int index = kmalloc_index(size);
 
 	if (index < 0) {
 		/*
@@ -138,7 +116,7 @@
 		extern void __kmalloc_size_too_large(void);
 		__kmalloc_size_too_large();
 	}
-	return &kmalloc_caches[index];
+	return slub_caches + index;
 }
 
 #ifdef CONFIG_ZONE_DMA
Index: linux-2.6.21-rc5/mm/slub.c
===================================================================
--- linux-2.6.21-rc5.orig/mm/slub.c	2007-03-28 20:19:21.000000000 -0700
+++ linux-2.6.21-rc5/mm/slub.c	2007-03-28 20:52:51.000000000 -0700
@@ -103,8 +103,6 @@
 /* Internal SLUB flags */
 #define __OBJECT_POISON 0x80000000	/* Poison object */
 
-static int kmem_size = sizeof(struct kmem_cache);
-
 #ifdef CONFIG_SMP
 static struct notifier_block slab_notifier;
 #endif
@@ -122,7 +120,8 @@
 
 /* A list of all slab caches on the system */
 static DECLARE_RWSEM(slub_lock);
-LIST_HEAD(slab_caches);
+
+static DEFINE_PER_CPU(struct page *, cpu_slab)[MAX_SLAB_CACHES];
 
 #ifdef CONFIG_SYSFS
 static int sysfs_slab_add(struct kmem_cache *);
@@ -138,13 +137,14 @@
  * 			Core slab cache functions
  *******************************************************************/
 
-struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
+static inline struct page **get_cpu_slab(struct kmem_cache *s)
 {
-#ifdef CONFIG_NUMA
-	return s->node[node];
-#else
-	return &s->local_node;
-#endif
+	return &__get_cpu_var(cpu_slab)[s - slub_caches];
+}
+
+static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
+{
+	return &NODE_DATA(node)->slabs[s - slub_caches];
 }
 
 /*
@@ -1009,39 +1009,39 @@
  * Remove the cpu slab
  */
 static void __always_inline deactivate_slab(struct kmem_cache *s,
-						struct page *page, int cpu)
+						struct page *page, struct page **ppage)
 {
-	s->cpu_slab[cpu] = NULL;
+	*ppage = NULL;
 	ClearPageActive(page);
 	ClearPageReferenced(page);
 
 	putback_slab(s, page);
 }
 
-static void flush_slab(struct kmem_cache *s, struct page *page, int cpu)
+static void flush_slab(struct kmem_cache *s, struct page *page, struct page **ppage)
 {
 	slab_lock(page);
-	deactivate_slab(s, page, cpu);
+	deactivate_slab(s, page, ppage);
 }
 
 /*
  * Flush cpu slab.
  * Called from IPI handler with interrupts disabled.
  */
-static void __flush_cpu_slab(struct kmem_cache *s, int cpu)
+static void __flush_cpu_slab(struct kmem_cache *s, struct page **ppage)
 {
-	struct page *page = s->cpu_slab[cpu];
+	struct page *page = *ppage;
 
 	if (likely(page))
-		flush_slab(s, page, cpu);
+		flush_slab(s, page, ppage);
 }
 
 static void flush_cpu_slab(void *d)
 {
 	struct kmem_cache *s = d;
-	int cpu = smp_processor_id();
+	struct page **ppage = get_cpu_slab(s);
 
-	__flush_cpu_slab(s, cpu);
+	__flush_cpu_slab(s, ppage);
 }
 
 #ifdef CONFIG_SMP
@@ -1051,13 +1051,13 @@
 static void check_flush_cpu_slab(void *private)
 {
 	struct kmem_cache *s = private;
-	int cpu = smp_processor_id();
-	struct page *page = s->cpu_slab[cpu];
+	struct page **ppage = get_cpu_slab(s);
+	struct page *page = *ppage;
 
 	if (page) {
 		if (!TestClearPageReferenced(page))
 			return;
-		flush_slab(s, page, cpu);
+		flush_slab(s, page, ppage);
 	}
 	atomic_dec(&s->cpu_slabs);
 }
@@ -1117,13 +1117,13 @@
 					gfp_t gfpflags, int node)
 {
 	struct page *page;
+	struct page **ppage;
 	void **object;
 	unsigned long flags;
-	int cpu;
 
 	local_irq_save(flags);
-	cpu = smp_processor_id();
-	page = s->cpu_slab[cpu];
+	ppage = get_cpu_slab(s);
+	page = *ppage;
 	if (!page)
 		goto new_slab;
 
@@ -1148,7 +1148,7 @@
 	return object;
 
 another_slab:
-	deactivate_slab(s, page, cpu);
+	deactivate_slab(s, page, ppage);
 
 new_slab:
 	page = get_partial(s, gfpflags, node);
@@ -1165,29 +1165,29 @@
 			return page_address(page);
 		}
 
-		if (s->cpu_slab[cpu]) {
+		if (*ppage) {
 			/*
 			 * Someone else populated the cpu_slab while
 			 * we enabled interrupts. The page may not
 			 * be on the required node.
 			 */
 			if (node == -1 ||
-				page_to_nid(s->cpu_slab[cpu]) == node) {
+				page_to_nid(*ppage) == node) {
 				/*
 				 * Current cpuslab is acceptable and we
 				 * want the current one since its cache hot
 				 */
 				discard_slab(s, page);
-				page = s->cpu_slab[cpu];
+				page = *ppage;
 				slab_lock(page);
 				goto redo;
 			}
-			flush_slab(s, s->cpu_slab[cpu], cpu);
+			flush_slab(s, *ppage, ppage);
 		}
 		slab_lock(page);
 	}
 
-	s->cpu_slab[cpu] = page;
+	*ppage = page;
 	SetPageActive(page);
 
 #ifdef CONFIG_SMP
@@ -1398,20 +1398,6 @@
 	return ALIGN(align, sizeof(void *));
 }
 
-static void free_kmem_cache_nodes(struct kmem_cache *s)
-{
-#ifdef CONFIG_NUMA
-	int node;
-
-	for_each_online_node(node) {
-		struct kmem_cache_node *n = s->node[node];
-		if (n && n != &s->local_node)
-			kfree(n);
-		s->node[node] = NULL;
-	}
-#endif
-}
-
 static void init_kmem_cache_node(struct kmem_cache_node *n)
 {
 	memset(n, 0, sizeof(struct kmem_cache_node));
@@ -1422,58 +1408,10 @@
 
 static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
 {
-#ifdef CONFIG_NUMA
 	int node;
-	int local_node;
-
-	if (slab_state >= UP)
-		local_node = page_to_nid(virt_to_page(s));
-	else
-		local_node = 0;
-
-	for_each_online_node(node) {
-		struct kmem_cache_node *n;
-
-		if (local_node == node)
-			n = &s->local_node;
-		else
-		if (slab_state == DOWN) {
-			/*
-			 * No kmalloc_node yet so do it by hand.
-			 * We know that this is the first slab on the
-			 * node for this slabcache. There are no concurrent
-			 * accesses possible. Which simplifies things.
-			 */
-			unsigned long flags;
-			struct page *page;
-
-			BUG_ON(s->size < sizeof(struct kmem_cache_node));
-			local_irq_save(flags);
-			page = new_slab(s, gfpflags, node);
-
-			BUG_ON(!page);
-			n = page->freelist;
-			page->freelist = *(void **)page->freelist;
-			page->inuse++;
-			local_irq_restore(flags);
-		} else
-			n = kmalloc_node(sizeof(struct kmem_cache_node),
-				gfpflags, node);
-
-		if (!n) {
-			free_kmem_cache_nodes(s);
-			return 0;
-		}
-
-		s->node[node] = n;
-		init_kmem_cache_node(n);
 
-		if (slab_state == DOWN)
-			atomic_long_inc(&n->nr_slabs);
-	}
-#else
-	init_kmem_cache_node(&s->local_node);
-#endif
+	for_each_online_node(node)
+		init_kmem_cache_node(get_node(s, node));
 	return 1;
 }
 
@@ -1555,7 +1493,7 @@
 		void (*ctor)(void *, struct kmem_cache *, unsigned long),
 		void (*dtor)(void *, struct kmem_cache *, unsigned long))
 {
-	memset(s, 0, kmem_size);
+	memset(s, 0, sizeof(struct kmem_cache));
 	s->name = name;
 	s->ctor = ctor;
 	s->dtor = dtor;
@@ -1683,7 +1621,6 @@
 		if (atomic_long_read(&n->nr_slabs))
 			return 1;
 	}
-	free_kmem_cache_nodes(s);
 	return 0;
 }
 EXPORT_SYMBOL(kmem_cache_close);
@@ -1711,12 +1648,8 @@
  *		Kmalloc subsystem
  *******************************************************************/
 
-struct kmem_cache kmalloc_caches[KMALLOC_NR_CACHES] __cacheline_aligned;
-EXPORT_SYMBOL(kmalloc_caches);
-
-#ifdef CONFIG_ZONE_DMA
-static struct kmem_cache *kmalloc_caches_dma[KMALLOC_NR_CACHES];
-#endif
+struct kmem_cache slub_caches[MAX_SLAB_CACHES] __cacheline_aligned;
+EXPORT_SYMBOL(slub_caches);
 
 static int __init setup_slub_min_order(char *str)
 {
@@ -1794,7 +1727,6 @@
 			flags, NULL, NULL))
 		goto panic;
 
-	list_add(&s->list, &slab_caches);
 	up_write(&slub_lock);
 	if (sysfs_slab_add(s))
 		goto panic;
@@ -1807,7 +1739,7 @@
 
 static struct kmem_cache *get_slab(size_t size, gfp_t flags)
 {
-	int index = kmalloc_index(size) - KMALLOC_SHIFT_LOW;
+	int index = kmalloc_index(size);
 
 	if (!size)
 		return NULL;
@@ -1818,41 +1750,30 @@
 #ifdef CONFIG_ZONE_DMA
 	if ((flags & SLUB_DMA)) {
 		struct kmem_cache *s;
-		struct kmem_cache *x;
 		char *text;
 		size_t realsize;
 
-		s = kmalloc_caches_dma[index];
-		if (s)
-			return s;
+		s = &slub_caches[index + KMALLOC_SHIFT_HIGH + 1];
 
-		/* Dynamically create dma cache */
-		x = kmalloc(kmem_size, flags & ~SLUB_DMA);
-		if (!x)
-			panic("Unable to allocate memory for dma cache\n");
+		if (s->refcount)
+			return s;
 
-#ifdef KMALLOC_EXTRA
-		if (index <= KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW)
-#endif
-			realsize = 1 << (index + KMALLOC_SHIFT_LOW);
-#ifdef KMALLOC_EXTRA
+		if (index >= KMALLOC_SHIFT_LOW && index <= KMALLOC_SHIFT_HIGH)
+			realsize = 1 << index;
 		else {
-			index -= KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW +1;
-			if (!index)
+			if (index == 0)
 				realsize = 96;
 			else
 				realsize = 192;
 		}
-#endif
 
 		text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d",
 				(unsigned int)realsize);
-		s = create_kmalloc_cache(x, text, realsize, flags);
-		kmalloc_caches_dma[index] = s;
+		create_kmalloc_cache(s, text, realsize, flags);
 		return s;
 	}
 #endif
-	return &kmalloc_caches[index];
+	return &slub_caches[index];
 }
 
 void *__kmalloc(size_t size, gfp_t flags)
@@ -1872,6 +1793,7 @@
 
 	if (s)
 		return kmem_cache_alloc_node(s, flags, node);
+
 	return NULL;
 }
 EXPORT_SYMBOL(__kmalloc_node);
@@ -1984,43 +1906,22 @@
 void __init kmem_cache_init(void)
 {
 	int i;
-	int kmem_cache_node_cache =
-		kmalloc_index(sizeof(struct kmem_cache_node));
-
-	BUG_ON(kmem_cache_node_cache < 0 ||
-		kmem_cache_node_cache > KMALLOC_SHIFT_HIGH);
 
-	/*
-	 * Must first have the slab cache available for the allocations of the
-	 * struct kmalloc_cache_node's. There is special bootstrap code in
-	 * kmem_cache_open for slab_state == DOWN.
-	 */
-	create_kmalloc_cache(&kmalloc_caches[kmem_cache_node_cache
-			- KMALLOC_SHIFT_LOW],
-			"kmalloc",
-			1 << kmem_cache_node_cache,
-			GFP_KERNEL);
+	for(i = 0; i < MAX_SLAB_CACHES; i++)
+		slub_caches[i].refcount = 0;
 
-	/* Now we are able to allocate the per node structures */
 	slab_state = PARTIAL;
 
-	for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
-		if (i == kmem_cache_node_cache)
-			continue;
-		create_kmalloc_cache(
-			&kmalloc_caches[i - KMALLOC_SHIFT_LOW],
-			"kmalloc", 1 << i, GFP_KERNEL);
-	}
-
-#ifdef KMALLOC_EXTRA
 	/* Caches that are not of the two-to-the-power-of size */
-	create_kmalloc_cache(&kmalloc_caches
-		[KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW + 1],
+	create_kmalloc_cache(slub_caches + 1,
 				"kmalloc-96", 96, GFP_KERNEL);
-	create_kmalloc_cache(&kmalloc_caches
-		[KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW + 2],
+	create_kmalloc_cache(slub_caches + 2,
 				"kmalloc-192", 192, GFP_KERNEL);
-#endif
+
+	for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++)
+		create_kmalloc_cache(slub_caches + i,
+			"kmalloc", 1 << i, GFP_KERNEL);
+
 	slab_state = UP;
 
 	/* Provide the correct kmalloc names now that the caches are up */
@@ -2028,20 +1929,15 @@
 		char *name = kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i);
 
 		BUG_ON(!name);
-		kmalloc_caches[i - KMALLOC_SHIFT_LOW].name = name;
+		slub_caches[i].name = name;
 	};
 
 #ifdef CONFIG_SMP
 	register_cpu_notifier(&slab_notifier);
 #endif
-	if (nr_cpu_ids)	/* Remove when nr_cpu_ids was fixed ! */
-		kmem_size = offsetof(struct kmem_cache, cpu_slab)
-			 + nr_cpu_ids * sizeof(struct page *);
-
 	printk(KERN_INFO "SLUB V6: General Slabs=%ld, HW alignment=%d, "
 		"Processors=%d, Nodes=%d\n",
-		(unsigned long)KMALLOC_SHIFT_HIGH + KMALLOC_EXTRAS + 1
-			- KMALLOC_SHIFT_LOW,
+		(unsigned long)KMALLOC_SHIFT_HIGH -1,
 		L1_CACHE_BYTES,
 		nr_cpu_ids,
 		nr_node_ids);
@@ -2055,7 +1951,7 @@
 		void (*ctor)(void *, struct kmem_cache *, unsigned long),
 		void (*dtor)(void *, struct kmem_cache *, unsigned long))
 {
-	struct list_head *h;
+	struct kmem_cache *s;
 
 	if (slub_nomerge || (flags & SLUB_NEVER_MERGE))
 		return NULL;
@@ -2067,9 +1963,9 @@
 	align = calculate_alignment(flags, align);
 	size = ALIGN(size, align);
 
-	list_for_each(h, &slab_caches) {
-		struct kmem_cache *s =
-			container_of(h, struct kmem_cache, list);
+	for(s = slub_caches; s < slub_caches + MAX_SLAB_CACHES; s++) {
+		if (!s->refcount)
+			continue;
 
 		if (size > s->size)
 			continue;
@@ -2098,6 +1994,18 @@
 	return NULL;
 }
 
+int reserved_slab(struct kmem_cache *s)
+{
+#ifdef CONFIG_ZONE_DMA
+	if (s > slub_caches + 2 * KMALLOC_SHIFT_HIGH)
+		return 0;
+	if (s < slub_caches + KMALLOC_SHIFT_HIGH)
+		return 0;
+	return 1;
+#else
+	return 0;
+#endif
+}
 struct kmem_cache *kmem_cache_create(const char *name, size_t size,
 		size_t align, unsigned long flags,
 		void (*ctor)(void *, struct kmem_cache *, unsigned long),
@@ -2118,16 +2026,17 @@
 		if (sysfs_slab_alias(s, name))
 			goto err;
 	} else {
-		s = kmalloc(kmem_size, GFP_KERNEL);
-		if (s && kmem_cache_open(s, GFP_KERNEL, name,
+		for(s = slub_caches; s < slub_caches + MAX_SLAB_CACHES; s++)
+			if (!s->refcount && !reserved_slab(s))
+				break;
+		BUG_ON(s >= slub_caches + MAX_SLAB_CACHES);
+		if (kmem_cache_open(s, GFP_KERNEL, name,
 				size, align, flags, ctor, dtor)) {
 			if (sysfs_slab_add(s)) {
 				kfree(s);
 				goto err;
 			}
-			list_add(&s->list, &slab_caches);
-		} else
-			kfree(s);
+		}
 	}
 	up_write(&slub_lock);
 	return s;
@@ -2154,17 +2063,15 @@
 EXPORT_SYMBOL(kmem_cache_zalloc);
 
 #ifdef CONFIG_SMP
-static void for_all_slabs(void (*func)(struct kmem_cache *, int), int cpu)
+static void for_all_slabs(void (*func)(struct kmem_cache *, struct page **),
+	struct page **ppage)
 {
-	struct list_head *h;
+	struct kmem_cache *s;
 
 	down_read(&slub_lock);
-	list_for_each(h, &slab_caches) {
-		struct kmem_cache *s =
-			container_of(h, struct kmem_cache, list);
-
-		func(s, cpu);
-	}
+	for (s = slub_caches; s + MAX_SLAB_CACHES; s++)
+		if (s->refcount)
+			func(s, ppage + (s - slub_caches));
 	up_read(&slub_lock);
 }
 
@@ -2180,7 +2087,7 @@
 	switch (action) {
 	case CPU_UP_CANCELED:
 	case CPU_DEAD:
-		for_all_slabs(__flush_cpu_slab, cpu);
+		for_all_slabs(__flush_cpu_slab, per_cpu(cpu_slab, cpu));
 		break;
 	default:
 		break;
@@ -2378,7 +2285,7 @@
 
 	if (flags & SO_CPU)
 		for_each_possible_cpu(cpu) {
-			struct page *page = s->cpu_slab[cpu];
+			struct page *page = per_cpu(cpu_slab, cpu)[s - slub_caches];
 
 			if (page) {
 				int x = 0;
@@ -2410,7 +2317,7 @@
 	int cpu;
 
 	for_each_possible_cpu(cpu)
-		if (s->cpu_slab[cpu])
+		if (per_cpu(cpu_slab, cpu)[s - slub_caches])
 			return 1;
 
 	for_each_node(node) {
@@ -2813,7 +2720,7 @@
 int __init slab_sysfs_init(void)
 {
 	int err;
-	struct list_head *h;
+	struct kmem_cache *s;
 
 	err = subsystem_register(&slab_subsys);
 	if (err) {
@@ -2823,9 +2730,9 @@
 
 	slab_state = SYSFS;
 
-	list_for_each(h, &slab_caches) {
-		struct kmem_cache *s =
-			container_of(h, struct kmem_cache, list);
+	for(s = slub_caches; s < slub_caches + MAX_SLAB_CACHES; s++) {
+		if (!s->refcount)
+			continue;
 
 		err = sysfs_slab_add(s);
 		BUG_ON(err);