Index: linux-2.6.20-mm2/include/linux/slub_def.h
===================================================================
--- linux-2.6.20-mm2.orig/include/linux/slub_def.h	2007-02-23 06:02:04.000000000 -0800
+++ linux-2.6.20-mm2/include/linux/slub_def.h	2007-02-23 06:13:35.000000000 -0800
@@ -10,16 +10,19 @@
 #include <linux/gfp.h>
 #include <linux/workqueue.h>
 
+struct kmem_cache_node {
+	spinlock_t list_lock;	/* Protect partial list and nr_partial */
+	unsigned long nr_partial;
+	struct list_head partial;
+	atomic_long_t nr_slabs;
+};
+
 /*
  * Slab cache management.
  */
 struct kmem_cache {
-	spinlock_t list_lock;	/* Protecty partial list and nr_partial */
-	struct list_head partial;
-	unsigned long nr_partial;
 	int offset;		/* Free pointer offset. */
 	struct page *cpu_slab[NR_CPUS];
-	atomic_long_t nr_slabs[MAX_NUMNODES];
 	unsigned int order;
 	unsigned long flags;
 	int size;		/* Total size of an object */
@@ -38,6 +41,8 @@ struct kmem_cache {
 	atomic_t cpu_slabs;	/* if >0 then flusher is scheduled */
 	struct delayed_work flush;
 #endif
+	struct kmem_cache_node *node[MAX_NUMNODES];
+	struct kmem_cache_node local_node_info;
 };
 
 /*
@@ -119,6 +124,7 @@ static inline struct kmem_cache *kmalloc
 #ifdef CONFIG_ZONE_DMA
 #define SLUB_DMA __GFP_DMA
 #else
+/* Disable SLAB functionality */
 #define SLUB_DMA 0
 #endif
 
Index: linux-2.6.20-mm2/mm/slub.c
===================================================================
--- linux-2.6.20-mm2.orig/mm/slub.c	2007-02-23 06:12:55.000000000 -0800
+++ linux-2.6.20-mm2/mm/slub.c	2007-02-23 06:13:35.000000000 -0800
@@ -30,6 +30,8 @@
 #include <linux/slab.h>
 #include <linux/seq_file.h>
 #include <linux/cpu.h>
+#include <linux/cpuset.h>
+#include <linux/mempolicy.h>
 
 /*
  * Overloading of page flags that are otherwise used for LRU management.
@@ -102,6 +104,20 @@ static void unregister_slab(struct kmem_
 static struct notifier_block slab_notifier;
 #endif
 
+#ifdef CONFIG_NUMA
+struct kmem_cache kmem_cache_node = {
+	.flags = SLAB_PANIC;
+	.size = sizeof(struct kmem_cache_node);
+	.objects = PAGE_SIZE / sizeof(struct kmem_cache_node);
+	.objsize = sizeof(struct kmem_cache_node);
+	.inuse = sizeof(struct kmem_cache_node);
+	.name = "kmem_cache_node",
+	.node = { &local_node_info, &local_node_info .... };
+	.local_node_info = { SPIN_LOCK_INIT(), 0, INIT_LIST_HEAD(), ATOMIC_INIT(0) }
+};
+#endif
+
+
 /********************************************************************
  * 			Core slab cache functions
  *******************************************************************/
@@ -243,19 +259,25 @@ static __always_inline int slab_trylock(
  */
 static void __always_inline add_partial(struct kmem_cache *s, struct page *page)
 {
-	spin_lock(&s->list_lock);
-	s->nr_partial++;
-	list_add_tail(&page->lru, &s->partial);
-	spin_unlock(&s->list_lock);
+	int node = page_to_nid(page);
+	struct kmem_cache_node *n = s->node[node];
+
+	spin_lock(&n->list_lock);
+	n->nr_partial++;
+	list_add_tail(&page->lru, &n->partial);
+	spin_unlock(&n->list_lock);
 }
 
 static void __always_inline remove_partial(struct kmem_cache *s,
 						struct page *page)
 {
-	spin_lock(&s->list_lock);
+	int node = page_to_nid(page);
+	struct kmem_cache_node *n = s->node[node];
+
+	spin_lock(&n->list_lock);
 	list_del(&page->lru);
-	s->nr_partial--;
-	spin_unlock(&s->list_lock);
+	n->nr_partial--;
+	spin_unlock(&n->list_lock);
 }
 
 /*
@@ -263,78 +285,83 @@ static void __always_inline remove_parti
  *
  * Must hold list_lock
  */
-static __always_inline int lock_and_del_slab(struct kmem_cache *s,
+static __always_inline int lock_and_del_slab(struct kmem_cache_node *n,
 						struct page *page)
 {
 	if (slab_trylock(page)) {
 		list_del(&page->lru);
-		s->nr_partial--;
+		n->nr_partial--;
 		return 1;
 	}
 	return 0;
 }
 
 /*
- * Get a partial page, lock it and return it.
+ * Try to get a partial slab from the indicated node
  */
-#ifdef CONFIG_NUMA
-static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
+static struct page *get_partial_node(struct kmem_cache_node *n)
 {
 	struct page *page;
-	int searchnode = (node == -1) ? numa_node_id() : node;
-
-	if (!s->nr_partial)
-		return NULL;
 
-	spin_lock(&s->list_lock);
 	/*
-	 * Search for slab on the right node
+	 * Racy check. If we mistakenly see no partial slabs then we
+	 * just allocate an empty slab. If we mistakenly try to get a
+	 * partial slab then get_partials() will return NULL.
 	 */
-	list_for_each_entry(page, &s->partial, lru)
-		if (likely(page_to_nid(page) == searchnode) &&
-			lock_and_del_slab(s, page))
-				goto out;
-
-	if (likely(!(flags & __GFP_THISNODE))) {
-		/*
-		 * We can fall back to any other node in order to
-		 * reduce the size of the partial list.
-		 */
-		list_for_each_entry(page, &s->partial, lru)
-			if (likely(lock_and_del_slab(s, page)))
-				goto out;
-	}
+	if (!n->nr_partial)
+		return NULL;
 
-	/* Nothing found */
+	spin_lock(&n->list_lock);
+	list_for_each_entry(page, &n->partial, lru)
+		if (lock_and_del_slab(n, page))
+			goto out;
 	page = NULL;
 out:
-	spin_unlock(&s->list_lock);
+	spin_unlock(&n->list_lock);
 	return page;
 }
-#else
+
+struct page *get_any_partial(struct kmem_cache *s, int node, gfp_t flags)
+{
+#ifdef CONFIG_NUMA
+        struct zonelist *zonelist = &NODE_DATA(slab_node(current->mempolicy))
+                                        ->node_zonelists[gfp_zone(flags)];
+        struct zone **z;
+        struct page *page;
+        int nid;
+
+        /*
+         * Look through allowed nodes for objects available
+         * from existing per node queues.
+         */
+        for (z = zonelist->zones; *z; z++) {
+                nid = zone_to_nid(*z);
+
+                if (cpuset_zone_allowed_hardwall(*z, flags) &&
+                                s->node[nid]) {
+                        page = get_partial_node(s->node[node]);
+                        if (page)
+                                return page;
+                }
+        }
+#endif
+        return NULL;
+}
+
+/*
+ * Get a partial page, lock it and return it.
+ */
 static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
 {
 	struct page *page;
+	int searchnode = (node == -1) ? numa_node_id() : node;
 
-	/*
-	 * Racy check. If we mistakenly see no partial slabs then we
-	 * just allocate an empty slab.
-	 */
-	if (!s->nr_partial)
-		return NULL;
-
-	spin_lock(&s->list_lock);
-	list_for_each_entry(page, &s->partial, lru)
-		if (likely(lock_and_del_slab(s, page)))
-			goto out;
+	page = get_partial_node(s->node[searchnode]);
+	if (page || (flags & __GFP_THISNODE))
+		return page;
 
-	/* No slab or all slabs busy */
-	page = NULL;
-out:
-	spin_unlock(&s->list_lock);
-	return page;
+	return get_any_partial(s, node, flags);
 }
-#endif
 
 /*
  * Debugging checks
@@ -425,7 +452,10 @@ void check_free_chain(struct kmem_cache 
 
 static void discard_slab(struct kmem_cache *s, struct page *page)
 {
-	atomic_long_dec(&s->nr_slabs[page_to_nid(page)]);
+	int node = page_to_nid(page);
+	struct kmem_cache_node *n = s->node[node];
+
+	atomic_long_dec(&n->nr_slabs);
 
 	page->mapping = NULL;
 	reset_page_mapcount(page);
@@ -438,6 +468,7 @@ static void discard_slab(struct kmem_cac
 static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 {
 	struct page *page;
+	struct kmem_cache_node *n;
 
 	BUG_ON(flags & ~(GFP_DMA | GFP_LEVEL_MASK | __GFP_NO_GROW));
 	if (flags & __GFP_NO_GROW)
@@ -450,11 +481,14 @@ static struct page *new_slab(struct kmem
 	if (!page)
 		goto out;
 
+	node = page_to_nid(page);
+	n = s->node[node];
+
 	page->offset = s->offset;
 
-	atomic_long_inc(&s->nr_slabs[page_to_nid(page)]);
+	atomic_long_inc(&n->nr_slabs);
 
-	page->slab = (struct kmem_cache *)s;
+	page->slab = s;
 	__SetPageSlab(page);
 
 	if (s->objects > 1) {
@@ -886,16 +920,28 @@ int kmem_cache_open(struct kmem_cache *s
 {
 	int cpu;
 	int node;
+	int local_node = page_to_nid(virt_to_page(s));
 
 	BUG_ON(flags & SLUB_UNIMPLEMENTED);
 	memset(s, 0, sizeof(struct kmem_cache));
-	for_each_node(node)
-		atomic_long_set(&s->nr_slabs[node], 0);
+	s->node[numa_node_id()] = &s->local_node_info;
+
+	for_each_online_node(node) {
+		struct kmem_cache_node *n = s->node[node];
+
+		if (node != local_node)
+			n = s->node[node] = kmem_cache_alloc_node(
+				&kmem_cache_node,
+				GFP_KERNEL,
+				node);
+		atomic_long_set(&n->nr_slabs, 0);
+		spin_lock_init(&n->list_lock);
+		n->nr_partial = 0;
+		INIT_LIST_HEAD(&n->partial);
+	}
 	atomic_set(&s->refcount, 1);
-	spin_lock_init(&s->list_lock);
 	for_each_possible_cpu(cpu)
 		s->cpu_slab[cpu] = NULL;
-	INIT_LIST_HEAD(&s->partial);
 #ifdef CONFIG_SMP
 	mutex_init(&s->flushing);
 	atomic_set(&s->cpu_slabs, 0);
@@ -1003,20 +1049,21 @@ const char *kmem_cache_name(struct kmem_
 }
 EXPORT_SYMBOL(kmem_cache_name);
 
-static int free_list(struct kmem_cache *s, struct list_head *list)
+static int free_list(struct kmem_cache *s, struct kmem_cache_node *n,
+			struct list_head *list)
 {
 	int slabs_inuse = 0;
 	unsigned long flags;
 	struct page *page, *h;
 
-	spin_lock_irqsave(&s->list_lock, flags);
+	spin_lock_irqsave(&n->list_lock, flags);
 	list_for_each_entry_safe(page, h, list, lru)
 		if (!page->inuse) {
 			list_del(&page->lru);
 			discard_slab(s, page);
 		} else
 			slabs_inuse++;
-	spin_unlock_irqrestore(&s->list_lock, flags);
+	spin_unlock_irqrestore(&n->list_lock, flags);
 	return slabs_inuse;
 }
 
@@ -1027,17 +1074,29 @@ static int free_list(struct kmem_cache *
 int kmem_cache_close(struct kmem_cache *s)
 {
 	int node;
+	int local_node = page_to_nid(virt_to_page(s));
 
 	if (!atomic_dec_and_test(&s->refcount))
 		return 0;
 
 	flush_all(s);
-	free_list(s, &s->partial);
 
-	for_each_online_node(node)
-		if (atomic_long_read(&s->nr_slabs[node]))
+	/* Attempt to free all objects */
+	for_each_online_node(node) {
+		struct kmem_cache_node *n = s->node[node];
+
+		free_list(s, n, &n->partial);
+		if (atomic_long_read(&n->nr_slabs))
 			return 1;
+	}
+
+	/* Free allocated metadata */
+	for_each_online_node(node) {
+		if (node != local_node)
+			kfree(s->node[node]);
 
+		s->node[node] = NULL;
+	}
 	unregister_slab(s);
 	return 0;
 }
@@ -1056,18 +1115,23 @@ EXPORT_SYMBOL(kmem_cache_destroy);
 
 
 static unsigned long count_objects(struct kmem_cache *s,
-	struct list_head *list, unsigned long *nodes)
+	unsigned long *nodes)
 {
 	int count = 0;
 	struct page *page;
 	unsigned long flags;
+	int node;
 
-	spin_lock_irqsave(&s->list_lock, flags);
-	list_for_each_entry(page, list, lru) {
-		count += page->inuse;
-		nodes[page_to_nid(page)]++;
+	for_each_online_node(node) {
+		struct kmem_cache_node *n = s->node[node];
+
+		spin_lock_irqsave(&n->list_lock, flags);
+		list_for_each_entry(page, &n->partial, lru) {
+			count += page->inuse;
+			nodes[node]++;
+		}
+		spin_unlock_irqrestore(&n->list_lock, flags);
 	}
-	spin_unlock_irqrestore(&s->list_lock, flags);
 	return count;
 }
 
@@ -1075,16 +1139,20 @@ static unsigned long slab_objects(struct
 	unsigned long *p_total, unsigned long *p_cpu_slabs,
 	unsigned long *p_partial, unsigned long *nodes)
 {
-	int in_partial_slabs = count_objects(s, &s->partial, nodes);
+	int partial = 0;
+	int in_partial_slabs = count_objects(s, nodes);
 	int nr_slabs = 0;
 	int cpu_slabs = 0;
 	int nr_in_cpu_slabs = 0;
 	int cpu;
 	int node;
 
-	for_each_online_node(node)
-		nr_slabs += nodes[node] = atomic_read(&s->nr_slabs[node]);
+	for_each_online_node(node) {
+		struct kmem_cache_node *n = s->node[node];
 
+		nr_slabs += nodes[node] = atomic_read(&n->nr_slabs);
+		partial += n->nr_partial;
+	}
 	for_each_possible_cpu(cpu) {
 		struct page *page = s->cpu_slab[cpu];
 
@@ -1096,7 +1164,7 @@ static unsigned long slab_objects(struct
 	}
 
 	if (p_partial)
-		*p_partial = s->nr_partial;
+		*p_partial = partial;
 
 	if (p_cpu_slabs)
 		*p_cpu_slabs = cpu_slabs;
@@ -1105,7 +1173,7 @@ static unsigned long slab_objects(struct
 		*p_total = nr_slabs;
 
 	return in_partial_slabs + nr_in_cpu_slabs +
-		(nr_slabs - s->nr_partial - cpu_slabs) * s->objects;
+		(nr_slabs - partial - cpu_slabs) * s->objects;
 }
 
 /********************************************************************
@@ -1233,14 +1301,20 @@ void __init kmem_cache_init(void)
 {
 	int i;
 
+#ifdef CONFIG_NUMA
+	kmem_cache_open(&kmem_cache_node, "kmem_cache_node",
+		sizeof(struct kmem_cache_node),
+		ARCH_KMALLOC_MINALIGN, SLAB_PANIC, NULL, NULL);
+#endif
+
+	slab_state = PARTIAL;
 	for (i =  KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
 		create_kmalloc_cache(
 			&kmalloc_caches[i - KMALLOC_SHIFT_LOW],
 			"kmalloc", 1 << i);
 	}
-#ifdef KMALLOC_EXTRA
-	slab_state = PARTIAL;
 
+#ifdef KMALLOC_EXTRA
 	/* Caches that are not of the two-to-the-power-of size */
 	create_kmalloc_cache(&kmalloc_caches
 		[KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW + 1],