A generic kmalloc layer for the modular slab Regular kmalloc allocations are optimized. DMA kmalloc slabs are created on demand. Also re exports the kmalloc array as a new slab_allocator that can be used to tie into the kmalloc array (the slabulator uses that to avoid creating new slabs that are compatible with generic kmalloc caches). Signed-off-by: Christoph Lameter Index: linux-2.6.19-rc4-mm2/include/linux/kmalloc.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.19-rc4-mm2/include/linux/kmalloc.h 2006-11-03 13:10:00.275405277 -0600 @@ -0,0 +1,138 @@ +#ifndef _LINUX_KMALLOC_H +#define _LINUX_KMALLOC_H +/* + * In kernel dynamic memory allocator. + * + * (C) 2006 Silicon Graphics, Inc, + * Christoph Lameter + */ + +#include +#include + +#ifndef KMALLOC_ALLOCATOR +#define KMALLOC_ALLOCATOR slabifier_allocator +#endif + +#define KMALLOC_SHIFT_LOW 3 + +#define KMALLOC_SHIFT_HIGH 18 + +#if L1_CACHE_BYTES <= 64 +#define KMALLOC_EXTRAS 2 +#define KMALLOC_EXTRA +#else +#define KMALLOC_EXTRAS 0 +#endif + +#define KMALLOC_NR_CACHES (KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW \ + + 1 + KMALLOC_EXTRAS) +/* + * We keep the general caches in an array of slab caches that are used for + * 2^x bytes of allocations. For each size we generate a DMA and a + * non DMA cache (DMA simply means memory for legacy I/O. The regular + * caches can be used for devices that can DMA to all of memory). + */ +extern struct slab_control kmalloc_caches[KMALLOC_NR_CACHES]; + +/* + * Sorry that the following has to be that ugly but GCC has trouble + * with constant propagation and loops. + */ +static inline int kmalloc_index(int size) +{ + if (size <= 8) return 3; + if (size <= 16) return 4; + if (size <= 32) return 5; + if (size <= 64) return 6; +#ifdef KMALLOC_EXTRA + if (size <= 96) return KMALLOC_SHIFT_HIGH + 1; +#endif + if (size <= 128) return 7; +#ifdef KMALLOC_EXTRA + if (size <= 192) return KMALLOC_SHIFT_HIGH + 2; +#endif + if (size <= 256) return 8; + if (size <= 512) return 9; + if (size <= 1024) return 10; + if (size <= 2048) return 11; + if (size <= 4096) return 12; + if (size <= 8 * 1024) return 13; + if (size <= 16 * 1024) return 14; + if (size <= 32 * 1024) return 15; + if (size <= 64 * 1024) return 16; + if (size <= 128 * 1024) return 17; + if (size <= 256 * 1024) return 18; + return -1; +} + +/* + * Find the slab cache for a given combination of allocation flags and size. + * + * This ought to end up with a global pointer to the right cache + * in kmalloc_caches. + */ +static inline struct slab_cache *kmalloc_slab(size_t size) +{ + int index = kmalloc_index(size) - KMALLOC_SHIFT_LOW; + + if (index < 0) { + /* + * Generate a link failure. Would be great if we could + * do something to stop the compile here. + */ + extern void __kmalloc_size_too_large(void); + __kmalloc_size_too_large(); + } + return &kmalloc_caches[index].sc; +} + +extern void *__kmalloc(size_t, gfp_t); +#define ____kmalloc __kmalloc + +static inline void *kmalloc(size_t size, gfp_t flags) +{ + if (__builtin_constant_p(size) && !(flags & __GFP_DMA)) { + struct slab_cache *s = kmalloc_slab(size); + + return KMALLOC_ALLOCATOR.alloc(s, flags); + } else + return __kmalloc(size, flags); +} + +#ifdef CONFIG_NUMA +extern void *__kmalloc_node(size_t, gfp_t, int); +static inline void *kmalloc_node(size_t size, gfp_t flags, int node) +{ + if (__builtin_constant_p(size) && !(flags & __GFP_DMA)) { + struct slab_cache *s = kmalloc_slab(size); + + return KMALLOC_ALLOCATOR.alloc_node(s, flags, node); + } else + return __kmalloc_node(size, flags, node); +} +#else +#define kmalloc_node(__size, __flags, __node) kmalloc((__size), (__flags)) +#endif + +/* Free an object */ +static inline void kfree(const void *x) +{ + return KMALLOC_ALLOCATOR.free(NULL, x); +} + +/* Allocate and zero the specified number of bytes */ +extern void *kzalloc(size_t, gfp_t); + +/* Figure out what size the chunk is */ +extern size_t ksize(const void *); + +extern struct page_allocator *reclaimable_allocator; +extern struct page_allocator *unreclaimable_allocator; + +extern int slab_min_order; + +#define kmalloc_track_caller(size, flags) \ + __kmalloc(size, flags) + +#endif /* _LINUX_KMALLOC_H */ Index: linux-2.6.19-rc4-mm2/mm/kmalloc.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.19-rc4-mm2/mm/kmalloc.c 2006-11-03 13:10:00.289078216 -0600 @@ -0,0 +1,225 @@ +/* + * Create generic slab caches for memory allocation. + * + * (C) 2006 Silicon Graphics. Inc. Christoph Lameter + */ + +#include +#include +#include +#include +#include + +#ifndef ARCH_KMALLOC_MINALIGN +#define ARCH_KMALLOC_MINALIGN sizeof(void *) +#endif + +struct slab_control kmalloc_caches[KMALLOC_NR_CACHES] __cacheline_aligned; +EXPORT_SYMBOL(kmalloc_caches); + +static struct page_allocator *dma_allocator; +struct page_allocator *reclaimable_allocator; +struct page_allocator *unreclaimable_allocator; + +/* + * Mininum order of slab pages. This influences locking overhead and slab + * fragmentation. A higher order reduces the number of partial slabs + * and increases the number of allocations possible without having to + * take the list_lock. + */ +int slab_min_order = 0; + +static struct slab_cache *kmalloc_caches_dma[KMALLOC_NR_CACHES]; + +static int __init setup_slab_min_order(char *str) +{ + get_option (&str, &slab_min_order); + + return 1; +} + +__setup("slab_min_order=", setup_slab_min_order); + +/* + * Given a slab size find the correct order to use. + * We only support powers of two so there is really + * no need for anything special. Objects will always + * fit exactly into the slabs with no overhead. + */ +static int order(size_t size) +{ + unsigned long base_size = PAGE_SIZE << slab_min_order; + + if (size >= base_size) + /* One object per slab */ + return fls(size -1) - PAGE_SHIFT; + + return slab_min_order; +} + +static struct slab_cache *create_kmalloc_cache(struct slab_control *x, + const char *name, + const struct page_allocator *p, + int size) +{ + struct slab_cache s; + struct slab_cache *rs; + + s.page_alloc = p; + s.slab_alloc = &KMALLOC_ALLOCATOR; + s.size = size; + s.align = ARCH_KMALLOC_MINALIGN; + s.offset = 0; + s.objsize = size; + s.inuse = size; + s.node = -1; + s.order = order(size); + s.name = "kmalloc"; + rs = KMALLOC_ALLOCATOR.create(x, &s); + if (!rs) + panic("Creation of kmalloc slab %s size=%d failed.\n", + name, size); + register_slab(rs); + return rs; +} + +static struct slab_cache *get_slab(size_t size, gfp_t flags) +{ + int index = kmalloc_index(size) - KMALLOC_SHIFT_LOW; + struct slab_cache *s; + struct slab_control *x; + size_t realsize; + + BUG_ON(size < 0); + + if (!(flags & __GFP_DMA)) + return &kmalloc_caches[index].sc; + + s = kmalloc_caches_dma[index]; + if (s) + return s; + + /* Dynamically create dma cache */ + x = kmalloc(sizeof(struct slab_control), flags & ~(__GFP_DMA)); + + if (!x) + panic("Unable to allocate memory for dma cache\n"); + +#ifdef KMALLOC_EXTRA + if (index <= KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW) +#endif + realsize = 1 << index; +#ifdef KMALLOC_EXTRA + else if (index = KMALLOC_EXTRA) + realsize = 96; + else + realsize = 192; +#endif + + s = create_kmalloc_cache(x, "kmalloc_dma", dma_allocator, realsize); + kmalloc_caches_dma[index] = s; + return s; +} + +void *__kmalloc(size_t size, gfp_t flags) +{ + return KMALLOC_ALLOCATOR.alloc(get_slab(size, flags), flags); +} +EXPORT_SYMBOL(__kmalloc); + +#ifdef CONFIG_NUMA +void *__kmalloc_node(size_t size, gfp_t flags, int node) +{ + return KMALLOC_ALLOCATOR.alloc_node(get_slab(size, flags), + flags, node); +} +EXPORT_SYMBOL(__kmalloc_node); +#endif + +void *kzalloc(size_t size, gfp_t flags) +{ + void *x = __kmalloc(size, flags); + + if (x) + memset(x, 0, size); + return x; +} +EXPORT_SYMBOL(kzalloc); + +size_t ksize(const void *object) +{ + return KMALLOC_ALLOCATOR.object_size(NULL, object); +}; +EXPORT_SYMBOL(ksize); + +/* + * Provide the kmalloc array as regular slab allocator for the + * generic allocator framework. + */ +struct slab_allocator kmalloc_slab_allocator; + +static struct slab_cache *kmalloc_create(struct slab_control *x, + const struct slab_cache *s) +{ + struct slab_cache *km; + + int index = max(0, fls(s->size - 1) - KMALLOC_SHIFT_LOW); + + if (index > KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW + 1 + || s->offset) + return NULL; + + km = &kmalloc_caches[index].sc; + + BUG_ON(s->size > km->size); + + return KMALLOC_ALLOCATOR.dup(km); +} + +static void null_destructor(struct page_allocator *x) {} + +void __init kmalloc_init(void) +{ + int i; + + for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { + create_kmalloc_cache( + &kmalloc_caches[i - KMALLOC_SHIFT_LOW], + "kmalloc", &page_allocator, 1 << i); + } +#ifdef KMALLOC_EXTRA + /* Non-power of two caches */ + create_kmalloc_cache(&kmalloc_caches + [KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW + 1], name, + &page_allocator, 96); + create_kmalloc_cache(&kmalloc_caches + [KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW + 2], name, + &page_allocator, 192); +#endif + + /* + * The above must be done first. Deriving a page allocator requires + * a working (normal) kmalloc array. + */ + unreclaimable_allocator = unreclaimable_slab(&page_allocator); + unreclaimable_allocator->destructor = null_destructor; + + /* + * Fix up the initial arrays. Because of the precending uses + * we likely have consumed a couple of pages that we cannot account + * for. + */ + for(i = 0; i < KMALLOC_NR_CACHES; i++) + kmalloc_caches[i].sc.page_alloc = unreclaimable_allocator; + + reclaimable_allocator = reclaimable_slab(&page_allocator); + reclaimable_allocator->destructor = null_destructor; + dma_allocator = dmaify_page_allocator(unreclaimable_allocator); + + /* And deal with the kmalloc_cache_allocator */ + memcpy(&kmalloc_slab_allocator, &KMALLOC_ALLOCATOR, + sizeof(struct slab_allocator)); + kmalloc_slab_allocator.create = kmalloc_create; + kmalloc_slab_allocator.destructor = null_slab_allocator_destructor; +} + Index: linux-2.6.19-rc4-mm2/mm/Makefile =================================================================== --- linux-2.6.19-rc4-mm2.orig/mm/Makefile 2006-11-03 13:09:39.859753685 -0600 +++ linux-2.6.19-rc4-mm2/mm/Makefile 2006-11-03 13:10:00.295914686 -0600 @@ -30,4 +30,4 @@ obj-$(CONFIG_MEMORY_HOTPLUG) += memory_h obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_SMP) += allocpercpu.o -obj-$(CONFIG_MODULAR_SLAB) += allocator.o slabifier.o slabstat.o +obj-$(CONFIG_MODULAR_SLAB) += allocator.o slabifier.o slabstat.o kmalloc.o Index: linux-2.6.19-rc4-mm2/include/asm-i386/page.h =================================================================== --- linux-2.6.19-rc4-mm2.orig/include/asm-i386/page.h 2006-11-02 14:19:32.565983879 -0600 +++ linux-2.6.19-rc4-mm2/include/asm-i386/page.h 2006-11-03 13:10:00.341816698 -0600 @@ -37,6 +37,7 @@ #define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE +#define ARCH_NEEDS_SMALL_SLABS /* * These are used to make use of C type-checking..