Index: linux-2.6.18-rc4-mm3/include/linux/kmalloc.h =================================================================== --- linux-2.6.18-rc4-mm3.orig/include/linux/kmalloc.h 2006-08-26 16:38:22.102567870 -0700 +++ linux-2.6.18-rc4-mm3/include/linux/kmalloc.h 2006-08-26 17:38:03.021890407 -0700 @@ -15,16 +15,13 @@ #define KMALLOC_ALLOCATOR slabifier_allocator #endif -#ifdef ARCH_NEEDS_SMALL_SLABS #define KMALLOC_SHIFT_LOW 3 -#else -#define KMALLOC_SHIFT_LOW 7 -#endif -#define KMALLOC_SHIFT_HIGH 20 +#define KMALLOC_SHIFT_HIGH 18 -#ifdef ARCH_NEEDS_SMALL_SLABS +#if L1_CACHE_BYTES <= 64 #define KMALLOC_EXTRAS 2 +#define KMALLOC_EXTRA #else #define KMALLOC_EXTRAS 0 #endif @@ -37,7 +34,7 @@ * non DMA cache (DMA simply means memory for legacy I/O. The regular * caches can be used for devices that can DMA to all of memory). */ -extern struct slab_control kmalloc_caches[2][KMALLOC_NR_CACHES]; +extern struct slab_control kmalloc_caches[KMALLOC_NR_CACHES]; /* * Sorry that the following has to be that ugly but GCC has trouble @@ -45,15 +42,15 @@ extern struct slab_control kmalloc_cache */ static inline int kmalloc_index(int size) { -#ifdef ARCH_NEEDS_SMALL_SLABS if (size <= 8) return 3; if (size <= 16) return 4; if (size <= 32) return 5; if (size <= 64) return 6; +#ifdef KMALLOC_EXTRA if (size <= 96) return KMALLOC_SHIFT_HIGH + 1; #endif if (size <= 128) return 7; -#ifdef ARCH_NEEDS_SMALL_SLABS +#ifdef KMALLOC_EXTRA if (size <= 192) return KMALLOC_SHIFT_HIGH + 2; #endif if (size <= 256) return 8; @@ -67,8 +64,6 @@ static inline int kmalloc_index(int size if (size <= 64 * 1024) return 16; if (size <= 128 * 1024) return 17; if (size <= 256 * 1024) return 18; - if (size <= 512 * 1024) return 19; - if (size <=1024 * 1024) return 20; return -1; } @@ -78,7 +73,7 @@ static inline int kmalloc_index(int size * This ought to end up with a global pointer to the right cache * in kmalloc_caches. */ -static inline struct slab_cache *kmalloc_slab(size_t size, gfp_t flags) +static inline struct slab_cache *kmalloc_slab(size_t size) { int index = kmalloc_index(size) - KMALLOC_SHIFT_LOW; @@ -90,7 +85,7 @@ static inline struct slab_cache *kmalloc extern void __kmalloc_size_too_large(void); __kmalloc_size_too_large(); } - return &kmalloc_caches[!!(flags & __GFP_DMA)][index].sc; + return &kmalloc_caches[index].sc; } extern void *__kmalloc(size_t, gfp_t); @@ -98,8 +93,8 @@ extern void *__kmalloc(size_t, gfp_t); static inline void *kmalloc(size_t size, gfp_t flags) { - if (__builtin_constant_p(size)) { - struct slab_cache *s = kmalloc_slab(size, flags); + if (__builtin_constant_p(size) && !(flags & __GFP_DMA)) { + struct slab_cache *s = kmalloc_slab(size); return KMALLOC_ALLOCATOR.alloc(s, flags); } else @@ -110,8 +105,8 @@ static inline void *kmalloc(size_t size, extern void *__kmalloc_node(size_t, gfp_t, int); static inline void *kmalloc_node(size_t size, gfp_t flags, int node) { - if (__builtin_constant_p(size)) { - struct slab_cache *s = kmalloc_slab(size, flags); + if (__builtin_constant_p(size) && !(flags & __GFP_DMA)) { + struct slab_cache *s = kmalloc_slab(size); return KMALLOC_ALLOCATOR.alloc_node(s, flags, node); } else @@ -133,4 +128,7 @@ extern void *kzalloc(size_t, gfp_t); /* Figure out what size the chunk is */ extern size_t ksize(const void *); +extern struct page_allocator *reclaimable_allocator; +extern struct page_allocator *unreclaimable_allocator; + #endif /* _LINUX_KMALLOC_H */ Index: linux-2.6.18-rc4-mm3/mm/kmalloc.c =================================================================== --- linux-2.6.18-rc4-mm3.orig/mm/kmalloc.c 2006-08-26 16:38:22.103544372 -0700 +++ linux-2.6.18-rc4-mm3/mm/kmalloc.c 2006-08-26 17:42:54.424782042 -0700 @@ -10,17 +10,97 @@ #include #include -struct slab_control kmalloc_caches[2][KMALLOC_NR_CACHES] __cacheline_aligned; +#ifndef ARCH_KMALLOC_MINALIGN +#define ARCH_KMALLOC_MINALIGN sizeof(void *) +#endif + +struct slab_control kmalloc_caches[KMALLOC_NR_CACHES] __cacheline_aligned; EXPORT_SYMBOL(kmalloc_caches); +static struct page_allocator *dma_allocator; +struct page_allocator *reclaimable_allocator; +struct page_allocator *unreclaimable_allocator; + +static struct slab_cache *kmalloc_caches_dma[KMALLOC_NR_CACHES]; + +/* + * Given a slab size find the correct order to use. + * We only support powers of two so there is really + * no need for anything special. Objects will always + * fit exactly into the slabs with no overhead. + */ +static __init int order(size_t size) +{ + if (size >= PAGE_SIZE) + /* One object per slab */ + return fls(size -1) - PAGE_SHIFT; + + /* Multiple objects per page which will fit neatly */ + return 0; +} + +static struct slab_cache *create_kmalloc_cache(struct slab_control *x, + const char *name, + const struct page_allocator *p, + int size) +{ + struct slab_cache s; + struct slab_cache *rs; + + s.page_alloc = p; + s.slab_alloc = &KMALLOC_ALLOCATOR; + s.size = size; + s.align = ARCH_KMALLOC_MINALIGN; + s.offset = 0; + s.objsize = size; + s.inuse = size; + s.node = -1; + s.order = order(size); + s.name = "kmalloc"; + rs = KMALLOC_ALLOCATOR.create(x, &s); + if (!rs) + panic("Creation of kmalloc slab %s size=%d failed.\n", + name, size); + register_slab(rs); + return rs; +} + static struct slab_cache *get_slab(size_t size, gfp_t flags) { int index = kmalloc_index(size) - KMALLOC_SHIFT_LOW; + struct slab_cache *s; + struct slab_control *x; + size_t realsize; BUG_ON(size < 0); - return &kmalloc_caches[!!(flags & __GFP_DMA)][index].sc; + if (!(flags & __GFP_DMA)) + return &kmalloc_caches[index].sc; + + s = kmalloc_caches_dma[index]; + if (s) + return s; + + /* Dynamically create dma cache */ + x = kmalloc(sizeof(struct slab_control), flags & ~(__GFP_DMA)); + + if (!x) + panic("Unable to allocate memory for dma cache\n"); + +#ifdef KMALLOC_EXTRA + if (index <= KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW) +#endif + realsize = 1 << index; +#ifdef KMALLOC_EXTRA + else if (index = KMALLOC_EXTRA) + realsize = 96; + else + realsize = 192; +#endif + s = create_kmalloc_cache(x, "kmalloc_dma", dma_allocator, realsize); + kmalloc_caches_dma[index] = s; + return s; } void *__kmalloc(size_t size, gfp_t flags) @@ -55,27 +135,12 @@ size_t ksize(const void *object) EXPORT_SYMBOL(ksize); /* - * Given a slab size find the correct order to use. - * We only support powers of two so there is really - * no need for anything special. Objects will always - * fit exactly into the slabs with no overhead. - */ -static __init int order(size_t size) -{ - if (size >= PAGE_SIZE) - /* One object per slab */ - return fls(size -1) - PAGE_SHIFT; - - /* Multiple objects per page which will fit neatly */ - return 0; -} -/* * Provide the kmalloc array as regular slab allocator for the * generic allocator framework. */ struct slab_allocator kmalloc_slab_allocator; -struct slab_cache *kmalloc_create(struct slab_control *x, +static struct slab_cache *kmalloc_create(struct slab_control *x, const struct slab_cache *s) { struct slab_cache *km; @@ -86,76 +151,50 @@ struct slab_cache *kmalloc_create(struct || s->offset) return NULL; - km = &kmalloc_caches[0][index].sc; + km = &kmalloc_caches[index].sc; BUG_ON(s->size > km->size); return KMALLOC_ALLOCATOR.dup(km); } -#ifndef ARCH_KMALLOC_MINALIGN -#define ARCH_KMALLOC_MINALIGN sizeof(void *) -#endif - -void __init create_kmalloc_cache(struct slab_control *x, - const char *name, - const struct page_allocator *p, - int size) -{ - struct slab_cache s; - struct slab_cache *rs; - - s.page_alloc = p; - s.slab_alloc = &KMALLOC_ALLOCATOR; - s.size = size; - s.align = ARCH_KMALLOC_MINALIGN; - s.offset = 0; - s.objsize = size; - s.inuse = size; - s.node = -1; - s.order = order(size); - s.name = "kmalloc"; - rs = KMALLOC_ALLOCATOR.create(x, &s); - if (!rs) - panic("Creation of kmalloc slab %s size=%d failed.\n", - name, size); - register_slab(rs); -} +static void null_destructor(struct page_allocator *x) {} -void __init kmalloc_init_array(int dma, const char *name, - const struct page_allocator *pa) +void __init kmalloc_init(void) { int i; for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { create_kmalloc_cache( - &kmalloc_caches[dma][i - KMALLOC_SHIFT_LOW], - name, pa, 1 << i); + &kmalloc_caches[i - KMALLOC_SHIFT_LOW], + "kmalloc", &page_allocator, 1 << i); } -#ifdef ARCH_NEEDS_SMALL_SLABS +#ifdef KMALLOC_EXTRA /* Non-power of two caches */ - create_kmalloc_cache(&kmalloc_caches[dma] + create_kmalloc_cache(&kmalloc_caches [KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW + 1], name, pa, 96); - create_kmalloc_cache(&kmalloc_caches[dma] + create_kmalloc_cache(&kmalloc_caches [KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW + 2], name, pa, 192); #endif -} - -void __init kmalloc_init(void) -{ - kmalloc_init_array(0, "kmalloc", &page_allocator); /* * The above must be done first. Deriving a page allocator requires * a working (normal) kmalloc array. */ + unreclaimable_allocator = unreclaimable_slab(&page_allocator); + unreclaimable_allocator->destructor = null_destructor; /* - * On all my machines the DMA array is always empty. I wish we - * could get rid of it. + * Fix up the initial arrays. Because of the precending uses + * we likely have consumed a couple of pages that we cannot account + * for. */ - kmalloc_init_array(1, "kmalloc-DMA", - dmaify_page_allocator(&page_allocator)); + for(i = 0; i < KMALLOC_NR_CACHES; i++) + kmalloc_caches[i].sc.page_alloc = unreclaimable_allocator; + + reclaimable_allocator = reclaimable_slab(&page_allocator); + reclaimable_allocator->destructor = null_destructor; + dma_allocator = dmaify_page_allocator(unreclaimable_allocator); /* And deal with the kmalloc_cache_allocator */ memcpy(&kmalloc_slab_allocator, &KMALLOC_ALLOCATOR, Index: linux-2.6.18-rc4-mm3/mm/allocator.c =================================================================== --- linux-2.6.18-rc4-mm3.orig/mm/allocator.c 2006-08-26 16:38:18.584230641 -0700 +++ linux-2.6.18-rc4-mm3/mm/allocator.c 2006-08-26 17:23:59.646273179 -0700 @@ -12,6 +12,17 @@ * Section One: Page Allocators */ +static char *alloc_str_combine(const char *new, const char *base) +{ + char *s; + + s = kmalloc(strlen(new) + strlen(base) + 2, GFP_KERNEL); + strcpy(s, new); + strcat(s, ":"); + strcat(s, base); + return s; +} + /* For static allocators */ static void null_destructor(struct page_allocator *a) {} @@ -65,16 +76,11 @@ struct derived_page_allocator *derive_pa { struct derived_page_allocator *d = kmalloc(sizeof(struct derived_page_allocator), GFP_KERNEL); - char *s; d->base = base; d->a.allocate = base->allocate; d->a.free = base->free; - s = kmalloc(strlen(name) + strlen(base->name) + 2, GFP_KERNEL); - strcpy(s, name); - strcat(s, ":"); - strcat(s, base->name); - d->a.name = s; + d->a.name = alloc_str_combine(name, base->name); d->a.destructor = derived_destructor; return d; }; @@ -201,16 +207,11 @@ struct page_allocator *ctor_and_dtor_for { struct deconstructor *d = kmalloc(sizeof(struct deconstructor), GFP_KERNEL); - char *s; d->a.allocate = ctor ? ctor_alloc : base->allocate; d->a.free = dtor ? dtor_free : base->free; d->a.destructor = derived_destructor; - s = kmalloc(strlen(base->name) + 1 + 10, GFP_KERNEL); - strcpy(s, "ctor_dtor"); - strcat(s, ":"); - strcat(s, base->name); - d->a.name = s; + d->a.name = alloc_str_combine("ctor_dtor", base->name); d->base = base; d->ctor = ctor; d->dtor = dtor; @@ -223,16 +224,14 @@ struct page_allocator *ctor_and_dtor_for * Track reclaimable pages. This is used by the slabulator * to mark allocations of certain slab caches. */ -atomic_t slab_reclaim_pages = ATOMIC_INIT(0); -EXPORT_SYMBOL(slab_reclaim_pages); - static struct page *rac_alloc(const struct page_allocator *a, int order, gfp_t flags, int node) { struct derived_page_allocator *d = (void *)a; + struct page *page = d->base->allocate(d->base, order, flags, node); - atomic_add(1 << order, &slab_reclaim_pages); - return d->base->allocate(d->base, order, flags, node); + mod_zone_page_state(page_zone(page), NR_SLAB_RECLAIMABLE, 1 << order); + return page; } static void rac_free(const struct page_allocator *a, struct page *page, @@ -240,14 +239,15 @@ static void rac_free(const struct page_a { struct derived_page_allocator *d = (void *)a; - atomic_sub(1 << order, &slab_reclaim_pages); + mod_zone_page_state(page_zone(page), + NR_SLAB_RECLAIMABLE, -(1 << order)); d->base->free(d->base, page, order); } -struct page_allocator *reclaim_allocator(const struct page_allocator *base) +struct page_allocator *reclaimable_slab(const struct page_allocator *base) { struct derived_page_allocator *d = - derive_page_allocator(&page_allocator,"reclaim"); + derive_page_allocator(&page_allocator,"reclaimable"); d->a.allocate = rac_alloc; d->a.free = rac_free; @@ -255,6 +255,41 @@ struct page_allocator *reclaim_allocator } /* + * Track unreclaimable pages. This is used by the slabulator + * to mark allocations of certain slab caches. + */ +static struct page *urac_alloc(const struct page_allocator *a, int order, + gfp_t flags, int node) +{ + struct derived_page_allocator *d = (void *)a; + struct page *page = d->base->allocate(d->base, order, flags, node); + + mod_zone_page_state(page_zone(page), + NR_SLAB_UNRECLAIMABLE, 1 << order); + return page; +} + +static void urac_free(const struct page_allocator *a, struct page *page, + int order) +{ + struct derived_page_allocator *d = (void *)a; + + mod_zone_page_state(page_zone(page), + NR_SLAB_UNRECLAIMABLE, -(1 << order)); + d->base->free(d->base, page, order); +} + +struct page_allocator *unreclaimable_slab(const struct page_allocator *base) +{ + struct derived_page_allocator *d = + derive_page_allocator(&page_allocator,"unreclaimable"); + + d->a.allocate = urac_alloc; + d->a.free = urac_free; + return &d->a; +} + +/* * Numacontrol for allocators */ struct numactl { @@ -284,7 +319,7 @@ struct page_allocator *numactl_allocator d->a.allocate = numactl_alloc; d->a.destructor = derived_destructor; - d->a.name = "numa"; + d->a.name = alloc_str_combine("numa", base->name); d->base = base; d->node = node; d->flags = flags; @@ -310,16 +345,10 @@ struct derived_slab_allocator *derive_sl const char *name) { struct derived_slab_allocator *d = kmalloc(sizeof(struct derived_slab_allocator), GFP_KERNEL); - char *s; memcpy(&d->a, base, sizeof(struct slab_allocator)); d->base = base; - s = kmalloc(strlen(name) + strlen(base->name) + 2, GFP_KERNEL); - strcpy(s, name); - strcat(s, ":"); - strcat(s, d->base->name); - d->a.name = s; - d->a.name = "derived"; + d->a.name = alloc_str_combine("name", base->name); d->a.destructor = derived_slab_destructor; return d; } Index: linux-2.6.18-rc4-mm3/mm/slabulator.c =================================================================== --- linux-2.6.18-rc4-mm3.orig/mm/slabulator.c 2006-08-26 16:38:23.927650380 -0700 +++ linux-2.6.18-rc4-mm3/mm/slabulator.c 2006-08-26 17:52:03.742399379 -0700 @@ -69,7 +69,7 @@ struct slab_cache *kmem_cache_create(con void (*ctor)(void *, struct slab_cache *, unsigned long), void (*dtor)(void *, struct slab_cache *, unsigned long)) { - const struct page_allocator *a = &page_allocator; + const struct page_allocator *a; struct slab_cache s; struct slab_cache *rs; struct slab_control *x; @@ -87,7 +87,9 @@ struct slab_cache *kmem_cache_create(con /* Pick the right allocator for our purposes */ if (flags & SLAB_RECLAIM_ACCOUNT) - a = reclaim_allocator(a); + a = reclaimable_allocator; + else + a = unreclaimable_allocator; if (flags & SLAB_CACHE_DMA) a = dmaify_page_allocator(a); Index: linux-2.6.18-rc4-mm3/mm/slabifier.c =================================================================== --- linux-2.6.18-rc4-mm3.orig/mm/slabifier.c 2006-08-26 16:38:20.821397056 -0700 +++ linux-2.6.18-rc4-mm3/mm/slabifier.c 2006-08-26 17:08:35.404562189 -0700 @@ -337,7 +337,6 @@ static void discard_slab(struct slab *s, __ClearPageSlab(page); s->sc.page_alloc->free(s->sc.page_alloc, page, s->sc.order); - sub_zone_page_state(page_zone(page), NR_SLAB, 1 << s->sc.order); } /* @@ -355,7 +354,6 @@ static struct page *new_slab(struct slab set_slab(page, s); __SetPageSlab(page); - add_zone_page_state(page_zone(page), NR_SLAB, 1 << s->sc.order); atomic_long_inc(&s->nr_slabs); return page; } Index: linux-2.6.18-rc4-mm3/include/linux/allocator.h =================================================================== --- linux-2.6.18-rc4-mm3.orig/include/linux/allocator.h 2006-08-26 16:38:18.583254139 -0700 +++ linux-2.6.18-rc4-mm3/include/linux/allocator.h 2006-08-26 17:06:28.093094154 -0700 @@ -58,9 +58,10 @@ struct page_allocator *dmaify_page_alloc /* * Allocation and freeing is tracked with slab_reclaim_pages */ -extern atomic_t slab_reclaim_pages; +struct page_allocator *reclaimable_slab + (const struct page_allocator *base); -struct page_allocator *reclaim_allocator +struct page_allocator *unreclaimable_slab (const struct page_allocator *base); /*