Index: linux-2.6.18-rc4-mm3/include/linux/kmalloc.h
===================================================================
--- linux-2.6.18-rc4-mm3.orig/include/linux/kmalloc.h	2006-08-26 16:38:22.102567870 -0700
+++ linux-2.6.18-rc4-mm3/include/linux/kmalloc.h	2006-08-26 17:38:03.021890407 -0700
@@ -15,16 +15,13 @@
 #define KMALLOC_ALLOCATOR slabifier_allocator
 #endif
 
-#ifdef ARCH_NEEDS_SMALL_SLABS
 #define KMALLOC_SHIFT_LOW 3
-#else
-#define KMALLOC_SHIFT_LOW 7
-#endif
 
-#define KMALLOC_SHIFT_HIGH 20
+#define KMALLOC_SHIFT_HIGH 18
 
-#ifdef ARCH_NEEDS_SMALL_SLABS
+#if L1_CACHE_BYTES <= 64
 #define KMALLOC_EXTRAS 2
+#define KMALLOC_EXTRA
 #else
 #define KMALLOC_EXTRAS 0
 #endif
@@ -37,7 +34,7 @@
  * non DMA cache (DMA simply means memory for legacy I/O. The regular
  * caches can be used for devices that can DMA to all of memory).
  */
-extern struct slab_control kmalloc_caches[2][KMALLOC_NR_CACHES];
+extern struct slab_control kmalloc_caches[KMALLOC_NR_CACHES];
 
 /*
  * Sorry that the following has to be that ugly but GCC has trouble
@@ -45,15 +42,15 @@ extern struct slab_control kmalloc_cache
  */
 static inline int kmalloc_index(int size)
 {
-#ifdef ARCH_NEEDS_SMALL_SLABS
 	if (size <=    8) return 3;
 	if (size <=   16) return 4;
 	if (size <=   32) return 5;
 	if (size <=   64) return 6;
+#ifdef KMALLOC_EXTRA
 	if (size <=   96) return KMALLOC_SHIFT_HIGH + 1;
 #endif
 	if (size <=  128) return 7;
-#ifdef ARCH_NEEDS_SMALL_SLABS
+#ifdef KMALLOC_EXTRA
 	if (size <=  192) return KMALLOC_SHIFT_HIGH + 2;
 #endif
 	if (size <=  256) return 8;
@@ -67,8 +64,6 @@ static inline int kmalloc_index(int size
 	if (size <=  64 * 1024) return 16;
 	if (size <= 128 * 1024) return 17;
 	if (size <= 256 * 1024) return 18;
-	if (size <= 512 * 1024) return 19;
-	if (size <=1024 * 1024) return 20;
 	return -1;
 }
 
@@ -78,7 +73,7 @@ static inline int kmalloc_index(int size
  * This ought to end up with a global pointer to the right cache
  * in kmalloc_caches.
  */
-static inline struct slab_cache *kmalloc_slab(size_t size, gfp_t flags)
+static inline struct slab_cache *kmalloc_slab(size_t size)
 {
 	int index = kmalloc_index(size) - KMALLOC_SHIFT_LOW;
 
@@ -90,7 +85,7 @@ static inline struct slab_cache *kmalloc
 		extern void __kmalloc_size_too_large(void);
 		__kmalloc_size_too_large();
 	}
-	return &kmalloc_caches[!!(flags & __GFP_DMA)][index].sc;
+	return &kmalloc_caches[index].sc;
 }
 
 extern void *__kmalloc(size_t, gfp_t);
@@ -98,8 +93,8 @@ extern void *__kmalloc(size_t, gfp_t);
 
 static inline void *kmalloc(size_t size, gfp_t flags)
 {
-	if (__builtin_constant_p(size)) {
-		struct slab_cache *s = kmalloc_slab(size, flags);
+	if (__builtin_constant_p(size) && !(flags & __GFP_DMA)) {
+		struct slab_cache *s = kmalloc_slab(size);
 
 		return KMALLOC_ALLOCATOR.alloc(s, flags);
 	} else
@@ -110,8 +105,8 @@ static inline void *kmalloc(size_t size,
 extern void *__kmalloc_node(size_t, gfp_t, int);
 static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 {
-	if (__builtin_constant_p(size)) {
-		struct slab_cache *s = kmalloc_slab(size, flags);
+	if (__builtin_constant_p(size) && !(flags & __GFP_DMA)) {
+		struct slab_cache *s = kmalloc_slab(size);
 
 		return KMALLOC_ALLOCATOR.alloc_node(s, flags, node);
 	} else
@@ -133,4 +128,7 @@ extern void *kzalloc(size_t, gfp_t);
 /* Figure out what size the chunk is */
 extern size_t ksize(const void *);
 
+extern struct page_allocator *reclaimable_allocator;
+extern struct page_allocator *unreclaimable_allocator;
+
 #endif	/* _LINUX_KMALLOC_H */
Index: linux-2.6.18-rc4-mm3/mm/kmalloc.c
===================================================================
--- linux-2.6.18-rc4-mm3.orig/mm/kmalloc.c	2006-08-26 16:38:22.103544372 -0700
+++ linux-2.6.18-rc4-mm3/mm/kmalloc.c	2006-08-26 17:42:54.424782042 -0700
@@ -10,17 +10,97 @@
 #include <linux/kmalloc.h>
 #include <linux/slabstat.h>
 
-struct slab_control kmalloc_caches[2][KMALLOC_NR_CACHES] __cacheline_aligned;
+#ifndef ARCH_KMALLOC_MINALIGN
+#define ARCH_KMALLOC_MINALIGN sizeof(void *)
+#endif
+
+struct slab_control kmalloc_caches[KMALLOC_NR_CACHES] __cacheline_aligned;
 EXPORT_SYMBOL(kmalloc_caches);
 
+static struct page_allocator *dma_allocator;
+struct page_allocator *reclaimable_allocator;
+struct page_allocator *unreclaimable_allocator;
+
+static struct slab_cache *kmalloc_caches_dma[KMALLOC_NR_CACHES];
+
+/*
+ * Given a slab size find the correct order to use.
+ * We only support powers of two so there is really
+ * no need for anything special. Objects will always
+ * fit exactly into the slabs with no overhead.
+ */
+static __init int order(size_t size)
+{
+	if (size >= PAGE_SIZE)
+		/* One object per slab */
+		return fls(size -1) - PAGE_SHIFT;
+
+	/* Multiple objects per page which will fit neatly */
+	return 0;
+}
+
+static struct slab_cache *create_kmalloc_cache(struct slab_control *x,
+		const char *name,
+		const struct page_allocator *p,
+		int size)
+{
+	struct slab_cache s;
+	struct slab_cache *rs;
+
+	s.page_alloc = p;
+	s.slab_alloc = &KMALLOC_ALLOCATOR;
+	s.size = size;
+	s.align = ARCH_KMALLOC_MINALIGN;
+	s.offset = 0;
+	s.objsize = size;
+	s.inuse = size;
+	s.node = -1;
+	s.order = order(size);
+	s.name = "kmalloc";
+	rs = KMALLOC_ALLOCATOR.create(x, &s);
+	if (!rs)
+		panic("Creation of kmalloc slab %s size=%d failed.\n",
+			name, size);
+	register_slab(rs);
+	return rs;
+}
+
 static struct slab_cache *get_slab(size_t size, gfp_t flags)
 {
 	int index = kmalloc_index(size) - KMALLOC_SHIFT_LOW;
+	struct slab_cache *s;
+	struct slab_control *x;
+	size_t realsize;
 
 	BUG_ON(size < 0);
 
-	return &kmalloc_caches[!!(flags & __GFP_DMA)][index].sc;
+	if (!(flags & __GFP_DMA))
+		return &kmalloc_caches[index].sc;
+
+	s = kmalloc_caches_dma[index];
+	if (s)
+		return s;
+
+	/* Dynamically create dma cache */
+	x = kmalloc(sizeof(struct slab_control), flags & ~(__GFP_DMA));
+
+	if (!x)
+		panic("Unable to allocate memory for dma cache\n");
+
+#ifdef KMALLOC_EXTRA
+	if (index <= KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW)
+#endif
+		realsize = 1 << index;
+#ifdef KMALLOC_EXTRA
+	else if (index = KMALLOC_EXTRA)
+		realsize = 96;
+	else
+		realsize = 192;
+#endif
 
+	s = create_kmalloc_cache(x, "kmalloc_dma", dma_allocator, realsize);
+	kmalloc_caches_dma[index] = s;
+	return s;
 }
 
 void *__kmalloc(size_t size, gfp_t flags)
@@ -55,27 +135,12 @@ size_t ksize(const void *object)
 EXPORT_SYMBOL(ksize);
 
 /*
- * Given a slab size find the correct order to use.
- * We only support powers of two so there is really
- * no need for anything special. Objects will always
- * fit exactly into the slabs with no overhead.
- */
-static __init int order(size_t size)
-{
-	if (size >= PAGE_SIZE)
-		/* One object per slab */
-		return fls(size -1) - PAGE_SHIFT;
-
-	/* Multiple objects per page which will fit neatly */
-	return 0;
-}
-/*
  * Provide the kmalloc array as regular slab allocator for the
  * generic allocator framework.
  */
 struct slab_allocator kmalloc_slab_allocator;
 
-struct slab_cache *kmalloc_create(struct slab_control *x,
+static struct slab_cache *kmalloc_create(struct slab_control *x,
 	const struct slab_cache *s)
 {
 	struct slab_cache *km;
@@ -86,76 +151,50 @@ struct slab_cache *kmalloc_create(struct
 			|| s->offset)
 		return NULL;
 
-	km = &kmalloc_caches[0][index].sc;
+	km = &kmalloc_caches[index].sc;
 
 	BUG_ON(s->size > km->size);
 
 	return KMALLOC_ALLOCATOR.dup(km);
 }
 
-#ifndef ARCH_KMALLOC_MINALIGN
-#define ARCH_KMALLOC_MINALIGN sizeof(void *)
-#endif
-
-void __init create_kmalloc_cache(struct slab_control *x,
-		const char *name,
-		const struct page_allocator *p,
-		int size)
-{
-	struct slab_cache s;
-	struct slab_cache *rs;
-
-	s.page_alloc = p;
-	s.slab_alloc = &KMALLOC_ALLOCATOR;
-	s.size = size;
-	s.align = ARCH_KMALLOC_MINALIGN;
-	s.offset = 0;
-	s.objsize = size;
-	s.inuse = size;
-	s.node = -1;
-	s.order = order(size);
-	s.name = "kmalloc";
-	rs = KMALLOC_ALLOCATOR.create(x, &s);
-	if (!rs)
-		panic("Creation of kmalloc slab %s size=%d failed.\n",
-			name, size);
-	register_slab(rs);
-}
+static void null_destructor(struct page_allocator *x) {}
 
-void __init kmalloc_init_array(int dma, const char *name,
-		const struct page_allocator *pa)
+void __init kmalloc_init(void)
 {
 	int i;
 
 	for (i =  KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
 		create_kmalloc_cache(
-			&kmalloc_caches[dma][i - KMALLOC_SHIFT_LOW],
-			name, pa, 1 << i);
+			&kmalloc_caches[i - KMALLOC_SHIFT_LOW],
+			"kmalloc", &page_allocator, 1 << i);
 	}
-#ifdef ARCH_NEEDS_SMALL_SLABS
+#ifdef KMALLOC_EXTRA
 	/* Non-power of two caches */
-	create_kmalloc_cache(&kmalloc_caches[dma]
+	create_kmalloc_cache(&kmalloc_caches
 		[KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW + 1], name, pa, 96);
-	create_kmalloc_cache(&kmalloc_caches[dma]
+	create_kmalloc_cache(&kmalloc_caches
 		[KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW + 2], name, pa, 192);
 #endif
-}
-
-void __init kmalloc_init(void)
-{
 
-	kmalloc_init_array(0, "kmalloc", &page_allocator);
 	/*
 	 * The above must be done first. Deriving a page allocator requires
 	 * a working (normal) kmalloc array.
 	 */
+	unreclaimable_allocator = unreclaimable_slab(&page_allocator);
+	unreclaimable_allocator->destructor = null_destructor;
 
 	/*
-	 * On all my machines the DMA array is always empty. I wish we
-	 * could get rid of it.
+	 * Fix up the initial arrays. Because of the precending uses
+	 * we likely have consumed a couple of pages that we cannot account
+	 * for.
 	 */
-	kmalloc_init_array(1, "kmalloc-DMA",
-			dmaify_page_allocator(&page_allocator));
+	for(i = 0; i < KMALLOC_NR_CACHES; i++)
+		kmalloc_caches[i].sc.page_alloc = unreclaimable_allocator;
+
+	reclaimable_allocator = reclaimable_slab(&page_allocator);
+	reclaimable_allocator->destructor = null_destructor;
+	dma_allocator = dmaify_page_allocator(unreclaimable_allocator);
 
 	/* And deal with the kmalloc_cache_allocator */
 	memcpy(&kmalloc_slab_allocator, &KMALLOC_ALLOCATOR,
Index: linux-2.6.18-rc4-mm3/mm/allocator.c
===================================================================
--- linux-2.6.18-rc4-mm3.orig/mm/allocator.c	2006-08-26 16:38:18.584230641 -0700
+++ linux-2.6.18-rc4-mm3/mm/allocator.c	2006-08-26 17:23:59.646273179 -0700
@@ -12,6 +12,17 @@
  * Section One: Page Allocators
  */
 
+static char *alloc_str_combine(const char *new, const char *base)
+{
+	char *s;
+
+	s = kmalloc(strlen(new) + strlen(base) + 2, GFP_KERNEL);
+	strcpy(s, new);
+	strcat(s, ":");
+	strcat(s, base);
+	return s;
+}
+
 /* For static allocators */
 static void null_destructor(struct page_allocator *a) {}
 
@@ -65,16 +76,11 @@ struct derived_page_allocator *derive_pa
 {
 	struct derived_page_allocator *d =
 		kmalloc(sizeof(struct derived_page_allocator), GFP_KERNEL);
-	char *s;
 
 	d->base = base;
 	d->a.allocate = base->allocate;
 	d->a.free = base->free;
-	s = kmalloc(strlen(name) + strlen(base->name) + 2, GFP_KERNEL);
-	strcpy(s, name);
-	strcat(s, ":");
-	strcat(s, base->name);
-	d->a.name = s;
+	d->a.name = alloc_str_combine(name, base->name);
 	d->a.destructor = derived_destructor;
 	return d;
 };
@@ -201,16 +207,11 @@ struct page_allocator *ctor_and_dtor_for
 {
 	struct deconstructor *d =
 		kmalloc(sizeof(struct deconstructor), GFP_KERNEL);
-	char *s;
 
 	d->a.allocate = ctor ? ctor_alloc : base->allocate;
 	d->a.free = dtor ? dtor_free : base->free;
 	d->a.destructor = derived_destructor;
-	s = kmalloc(strlen(base->name) + 1 + 10, GFP_KERNEL);
-	strcpy(s, "ctor_dtor");
-	strcat(s, ":");
-	strcat(s, base->name);
-	d->a.name = s;
+	d->a.name = alloc_str_combine("ctor_dtor", base->name);
 	d->base = base;
 	d->ctor = ctor;
 	d->dtor = dtor;
@@ -223,16 +224,14 @@ struct page_allocator *ctor_and_dtor_for
  * Track reclaimable pages. This is used by the slabulator
  * to mark allocations of certain slab caches.
  */
-atomic_t slab_reclaim_pages = ATOMIC_INIT(0);
-EXPORT_SYMBOL(slab_reclaim_pages);
-
 static struct page *rac_alloc(const struct page_allocator *a, int order,
 			gfp_t flags, int node)
 {
 	struct derived_page_allocator *d = (void *)a;
+	struct page *page = d->base->allocate(d->base, order, flags, node);
 
-	atomic_add(1 << order, &slab_reclaim_pages);
-	return d->base->allocate(d->base, order, flags, node);
+	mod_zone_page_state(page_zone(page), NR_SLAB_RECLAIMABLE, 1 << order);
+	return page;
 }
 
 static void rac_free(const struct page_allocator *a, struct page *page,
@@ -240,14 +239,15 @@ static void rac_free(const struct page_a
 {
 	struct derived_page_allocator *d = (void *)a;
 
-	atomic_sub(1 << order, &slab_reclaim_pages);
+	mod_zone_page_state(page_zone(page),
+					NR_SLAB_RECLAIMABLE, -(1 << order));
 	d->base->free(d->base, page, order);
 }
 
-struct page_allocator *reclaim_allocator(const struct page_allocator *base)
+struct page_allocator *reclaimable_slab(const struct page_allocator *base)
 {
 	struct derived_page_allocator *d =
-			derive_page_allocator(&page_allocator,"reclaim");
+		derive_page_allocator(&page_allocator,"reclaimable");
 
 	d->a.allocate = rac_alloc;
 	d->a.free = rac_free;
@@ -255,6 +255,41 @@ struct page_allocator *reclaim_allocator
 }
 
 /*
+ * Track unreclaimable pages. This is used by the slabulator
+ * to mark allocations of certain slab caches.
+ */
+static struct page *urac_alloc(const struct page_allocator *a, int order,
+			gfp_t flags, int node)
+{
+	struct derived_page_allocator *d = (void *)a;
+	struct page *page = d->base->allocate(d->base, order, flags, node);
+
+	mod_zone_page_state(page_zone(page),
+			NR_SLAB_UNRECLAIMABLE, 1 << order);
+	return page;
+}
+
+static void urac_free(const struct page_allocator *a, struct page *page,
+							int order)
+{
+	struct derived_page_allocator *d = (void *)a;
+
+	mod_zone_page_state(page_zone(page),
+					NR_SLAB_UNRECLAIMABLE, -(1 << order));
+	d->base->free(d->base, page, order);
+}
+
+struct page_allocator *unreclaimable_slab(const struct page_allocator *base)
+{
+	struct derived_page_allocator *d =
+		derive_page_allocator(&page_allocator,"unreclaimable");
+
+	d->a.allocate = urac_alloc;
+	d->a.free = urac_free;
+	return &d->a;
+}
+
+/*
  * Numacontrol for allocators
  */
 struct numactl {
@@ -284,7 +319,7 @@ struct page_allocator *numactl_allocator
 
 	d->a.allocate = numactl_alloc;
 	d->a.destructor = derived_destructor;
-	d->a.name = "numa";
+	d->a.name = alloc_str_combine("numa", base->name);
 	d->base = base;
 	d->node = node;
 	d->flags = flags;
@@ -310,16 +345,10 @@ struct derived_slab_allocator *derive_sl
 			const char *name) {
 	struct derived_slab_allocator *d =
 		 kmalloc(sizeof(struct derived_slab_allocator), GFP_KERNEL);
-	char *s;
 
 	memcpy(&d->a, base, sizeof(struct slab_allocator));
 	d->base = base;
-	s = kmalloc(strlen(name) + strlen(base->name) + 2, GFP_KERNEL);
-	strcpy(s, name);
-	strcat(s, ":");
-	strcat(s, d->base->name);
-	d->a.name = s;
-	d->a.name = "derived";
+	d->a.name = alloc_str_combine("name", base->name);
 	d->a.destructor = derived_slab_destructor;
 	return d;
 }
Index: linux-2.6.18-rc4-mm3/mm/slabulator.c
===================================================================
--- linux-2.6.18-rc4-mm3.orig/mm/slabulator.c	2006-08-26 16:38:23.927650380 -0700
+++ linux-2.6.18-rc4-mm3/mm/slabulator.c	2006-08-26 17:52:03.742399379 -0700
@@ -69,7 +69,7 @@ struct slab_cache *kmem_cache_create(con
 		void (*ctor)(void *, struct slab_cache *, unsigned long),
 		void (*dtor)(void *, struct slab_cache *, unsigned long))
 {
-	const struct page_allocator *a = &page_allocator;
+	const struct page_allocator *a;
 	struct slab_cache s;
 	struct slab_cache *rs;
 	struct slab_control *x;
@@ -87,7 +87,9 @@ struct slab_cache *kmem_cache_create(con
 
 	/* Pick the right allocator for our purposes */
 	if (flags & SLAB_RECLAIM_ACCOUNT)
-		a = reclaim_allocator(a);
+		a = reclaimable_allocator;
+	else
+		a = unreclaimable_allocator;
 
 	if (flags & SLAB_CACHE_DMA)
 		a = dmaify_page_allocator(a);
Index: linux-2.6.18-rc4-mm3/mm/slabifier.c
===================================================================
--- linux-2.6.18-rc4-mm3.orig/mm/slabifier.c	2006-08-26 16:38:20.821397056 -0700
+++ linux-2.6.18-rc4-mm3/mm/slabifier.c	2006-08-26 17:08:35.404562189 -0700
@@ -337,7 +337,6 @@ static void discard_slab(struct slab *s,
 	__ClearPageSlab(page);
 
 	s->sc.page_alloc->free(s->sc.page_alloc, page, s->sc.order);
-	sub_zone_page_state(page_zone(page), NR_SLAB, 1 << s->sc.order);
 }
 
 /*
@@ -355,7 +354,6 @@ static struct page *new_slab(struct slab
 
 	set_slab(page, s);
 	__SetPageSlab(page);
-	add_zone_page_state(page_zone(page), NR_SLAB, 1 << s->sc.order);
 	atomic_long_inc(&s->nr_slabs);
 	return page;
 }
Index: linux-2.6.18-rc4-mm3/include/linux/allocator.h
===================================================================
--- linux-2.6.18-rc4-mm3.orig/include/linux/allocator.h	2006-08-26 16:38:18.583254139 -0700
+++ linux-2.6.18-rc4-mm3/include/linux/allocator.h	2006-08-26 17:06:28.093094154 -0700
@@ -58,9 +58,10 @@ struct page_allocator *dmaify_page_alloc
 /*
  * Allocation and freeing is tracked with slab_reclaim_pages
  */
-extern atomic_t slab_reclaim_pages;
+struct page_allocator *reclaimable_slab
+			(const struct page_allocator *base);
 
-struct page_allocator *reclaim_allocator
+struct page_allocator *unreclaimable_slab
 			(const struct page_allocator *base);
 
 /*