Index: linux-2.6.18-rc4-mm2/include/linux/kmalloc.h =================================================================== --- linux-2.6.18-rc4-mm2.orig/include/linux/kmalloc.h 2006-08-25 23:48:24.369666763 -0700 +++ linux-2.6.18-rc4-mm2/include/linux/kmalloc.h 2006-08-26 16:26:16.212863974 -0700 @@ -15,16 +15,13 @@ #define KMALLOC_ALLOCATOR slabifier_allocator #endif -#ifdef ARCH_NEEDS_SMALL_SLABS #define KMALLOC_SHIFT_LOW 3 -#else -#define KMALLOC_SHIFT_LOW 7 -#endif -#define KMALLOC_SHIFT_HIGH 20 +#define KMALLOC_SHIFT_HIGH 17 -#ifdef ARCH_NEEDS_SMALL_SLABS +#if L1_CACHE_BYTES <= 64 #define KMALLOC_EXTRAS 2 +#define KMALLOC_EXTRA #else #define KMALLOC_EXTRAS 0 #endif @@ -37,7 +34,7 @@ * non DMA cache (DMA simply means memory for legacy I/O. The regular * caches can be used for devices that can DMA to all of memory). */ -extern struct slab_control kmalloc_caches[2][KMALLOC_NR_CACHES]; +extern struct slab_control kmalloc_caches[KMALLOC_NR_CACHES]; /* * Sorry that the following has to be that ugly but GCC has trouble @@ -45,15 +42,15 @@ extern struct slab_control kmalloc_cache */ static inline int kmalloc_index(int size) { -#ifdef ARCH_NEEDS_SMALL_SLABS if (size <= 8) return 3; if (size <= 16) return 4; if (size <= 32) return 5; if (size <= 64) return 6; +#ifdef KMALLOC_EXTRA if (size <= 96) return KMALLOC_SHIFT_HIGH + 1; #endif if (size <= 128) return 7; -#ifdef ARCH_NEEDS_SMALL_SLABS +#ifdef KMALLOC_EXTRA if (size <= 192) return KMALLOC_SHIFT_HIGH + 2; #endif if (size <= 256) return 8; @@ -66,9 +63,6 @@ static inline int kmalloc_index(int size if (size <= 32 * 1024) return 15; if (size <= 64 * 1024) return 16; if (size <= 128 * 1024) return 17; - if (size <= 256 * 1024) return 18; - if (size <= 512 * 1024) return 19; - if (size <=1024 * 1024) return 20; return -1; } @@ -78,7 +72,7 @@ static inline int kmalloc_index(int size * This ought to end up with a global pointer to the right cache * in kmalloc_caches. */ -static inline struct slab_cache *kmalloc_slab(size_t size, gfp_t flags) +static inline struct slab_cache *kmalloc_slab(size_t size) { int index = kmalloc_index(size) - KMALLOC_SHIFT_LOW; @@ -90,7 +84,7 @@ static inline struct slab_cache *kmalloc extern void __kmalloc_size_too_large(void); __kmalloc_size_too_large(); } - return &kmalloc_caches[!!(flags & __GFP_DMA)][index].sc; + return &kmalloc_caches[index].sc; } extern void *__kmalloc(size_t, gfp_t); @@ -98,8 +92,8 @@ extern void *__kmalloc(size_t, gfp_t); static inline void *kmalloc(size_t size, gfp_t flags) { - if (__builtin_constant_p(size)) { - struct slab_cache *s = kmalloc_slab(size, flags); + if (__builtin_constant_p(size) && !(flags & __GFP_DMA)) { + struct slab_cache *s = kmalloc_slab(size); return KMALLOC_ALLOCATOR.alloc(s, flags); } else @@ -110,8 +104,8 @@ static inline void *kmalloc(size_t size, extern void *__kmalloc_node(size_t, gfp_t, int); static inline void *kmalloc_node(size_t size, gfp_t flags, int node) { - if (__builtin_constant_p(size)) { - struct slab_cache *s = kmalloc_slab(size, flags); + if (__builtin_constant_p(size) && !(flags & __GFP_DMA)) { + struct slab_cache *s = kmalloc_slab(size); return KMALLOC_ALLOCATOR.alloc_node(s, flags, node); } else Index: linux-2.6.18-rc4-mm2/mm/kmalloc.c =================================================================== --- linux-2.6.18-rc4-mm2.orig/mm/kmalloc.c 2006-08-25 23:48:24.370643265 -0700 +++ linux-2.6.18-rc4-mm2/mm/kmalloc.c 2006-08-26 16:23:21.486358615 -0700 @@ -10,17 +10,95 @@ #include #include -struct slab_control kmalloc_caches[2][KMALLOC_NR_CACHES] __cacheline_aligned; +#ifndef ARCH_KMALLOC_MINALIGN +#define ARCH_KMALLOC_MINALIGN sizeof(void *) +#endif + +struct slab_control kmalloc_caches[KMALLOC_NR_CACHES] __cacheline_aligned; EXPORT_SYMBOL(kmalloc_caches); +static struct page_allocator *dma_allocator; + +static struct slab_cache *kmalloc_caches_dma[KMALLOC_NR_CACHES]; + +/* + * Given a slab size find the correct order to use. + * We only support powers of two so there is really + * no need for anything special. Objects will always + * fit exactly into the slabs with no overhead. + */ +static __init int order(size_t size) +{ + if (size >= PAGE_SIZE) + /* One object per slab */ + return fls(size -1) - PAGE_SHIFT; + + /* Multiple objects per page which will fit neatly */ + return 0; +} + +static struct slab_cache *create_kmalloc_cache(struct slab_control *x, + const char *name, + const struct page_allocator *p, + int size) +{ + struct slab_cache s; + struct slab_cache *rs; + + s.page_alloc = p; + s.slab_alloc = &KMALLOC_ALLOCATOR; + s.size = size; + s.align = ARCH_KMALLOC_MINALIGN; + s.offset = 0; + s.objsize = size; + s.inuse = size; + s.node = -1; + s.order = order(size); + s.name = "kmalloc"; + rs = KMALLOC_ALLOCATOR.create(x, &s); + if (!rs) + panic("Creation of kmalloc slab %s size=%d failed.\n", + name, size); + register_slab(rs); + return rs; +} + static struct slab_cache *get_slab(size_t size, gfp_t flags) { int index = kmalloc_index(size) - KMALLOC_SHIFT_LOW; + struct slab_cache *s; + struct slab_control *x; + size_t realsize; BUG_ON(size < 0); - return &kmalloc_caches[!!(flags & __GFP_DMA)][index].sc; + if (!(flags & __GFP_DMA)) + return &kmalloc_caches[index].sc; + + s = kmalloc_caches_dma[index]; + if (s) + return s; + + /* Dynamically create dma cache */ + x = kmalloc(sizeof(struct slab_control), flags & ~(__GFP_DMA)); + + if (!x) + panic("Unable to allocate memory for dma cache\n"); +#ifdef KMALLOC_EXTRA + if (index <= KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW) +#endif + realsize = 1 << index; +#ifdef KMALLOC_EXTRA + else if (index = KMALLOC_EXTRA) + realsize = 96; + else + realsize = 192; +#endif + + s = create_kmalloc_cache(x, "kmalloc_dma", dma_allocator, realsize); + kmalloc_caches_dma[index] = s; + return s; } void *__kmalloc(size_t size, gfp_t flags) @@ -55,27 +133,12 @@ size_t ksize(const void *object) EXPORT_SYMBOL(ksize); /* - * Given a slab size find the correct order to use. - * We only support powers of two so there is really - * no need for anything special. Objects will always - * fit exactly into the slabs with no overhead. - */ -static __init int order(size_t size) -{ - if (size >= PAGE_SIZE) - /* One object per slab */ - return fls(size -1) - PAGE_SHIFT; - - /* Multiple objects per page which will fit neatly */ - return 0; -} -/* * Provide the kmalloc array as regular slab allocator for the * generic allocator framework. */ struct slab_allocator kmalloc_slab_allocator; -struct slab_cache *kmalloc_create(struct slab_control *x, +static struct slab_cache *kmalloc_create(struct slab_control *x, const struct slab_cache *s) { struct slab_cache *km; @@ -86,53 +149,24 @@ struct slab_cache *kmalloc_create(struct || s->offset) return NULL; - km = &kmalloc_caches[0][index].sc; + km = &kmalloc_caches[index].sc; BUG_ON(s->size > km->size); return KMALLOC_ALLOCATOR.dup(km); } -#ifndef ARCH_KMALLOC_MINALIGN -#define ARCH_KMALLOC_MINALIGN sizeof(void *) -#endif - -void __init create_kmalloc_cache(struct slab_control *x, - const char *name, - const struct page_allocator *p, - int size) -{ - struct slab_cache s; - struct slab_cache *rs; - - s.page_alloc = p; - s.slab_alloc = &KMALLOC_ALLOCATOR; - s.size = size; - s.align = ARCH_KMALLOC_MINALIGN; - s.offset = 0; - s.objsize = size; - s.inuse = size; - s.node = -1; - s.order = order(size); - s.name = "kmalloc"; - rs = KMALLOC_ALLOCATOR.create(x, &s); - if (!rs) - panic("Creation of kmalloc slab %s size=%d failed.\n", - name, size); - register_slab(rs); -} - -void __init kmalloc_init_array(int dma, const char *name, +static void __init kmalloc_init_array(const char *name, const struct page_allocator *pa) { int i; for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { create_kmalloc_cache( - &kmalloc_caches[dma][i - KMALLOC_SHIFT_LOW], + &kmalloc_caches[i - KMALLOC_SHIFT_LOW], name, pa, 1 << i); } -#ifdef ARCH_NEEDS_SMALL_SLABS +#ifdef KMALLOC_EXTRA /* Non-power of two caches */ create_kmalloc_cache(&kmalloc_caches[dma] [KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW + 1], name, pa, 96); @@ -144,18 +178,12 @@ void __init kmalloc_init_array(int dma, void __init kmalloc_init(void) { - kmalloc_init_array(0, "kmalloc", &page_allocator); + kmalloc_init_array("kmalloc", &page_allocator); /* * The above must be done first. Deriving a page allocator requires * a working (normal) kmalloc array. */ - - /* - * On all my machines the DMA array is always empty. I wish we - * could get rid of it. - */ - kmalloc_init_array(1, "kmalloc-DMA", - dmaify_page_allocator(&page_allocator)); + dma_allocator = dmaify_page_allocator(&page_allocator); /* And deal with the kmalloc_cache_allocator */ memcpy(&kmalloc_slab_allocator, &KMALLOC_ALLOCATOR,