From: Christoph Lameter Subject: slub: Support 4k kmallocs again to compensate for page allocator slowness Currently we hand off PAGE_SIZEd kmallocs to the page allocator in the mistaken belief that the page allocator can handle these allocations effectively. However, measurements indicate a mininum slowdown by the factor of 8 (and that is only SMP, NUMA is much worse) vs the slub fastpath which causes regressions in tbench. Increase the number of kmalloc caches by one so that we again handle 4k pages. 4k page buffering for the page allocator will be performed by slub like it is for slab. At some point the page allocator fastpath should be fixed. A lot of the kernel would benefit from faster 4k page allocations. If that is done then the 4k allocs may again be forwarded to the page allocator and this patch could be reverted. Reviewed-by: Pekka Enberg Acked-by: Mel Gorman Signed-off-by: Christoph Lameter --- include/linux/slub_def.h | 6 +++--- mm/slub.c | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) Index: linux-2.6/include/linux/slub_def.h =================================================================== --- linux-2.6.orig/include/linux/slub_def.h 2008-02-13 19:28:26.230777095 -0800 +++ linux-2.6/include/linux/slub_def.h 2008-02-13 19:30:34.911297582 -0800 @@ -111,7 +111,7 @@ struct kmem_cache { * We keep the general caches in an array of slab caches that are used for * 2^x bytes of allocations. */ -extern struct kmem_cache kmalloc_caches[PAGE_SHIFT]; +extern struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1]; /* * Sorry that the following has to be that ugly but some versions of GCC @@ -197,7 +197,7 @@ static __always_inline void *kmalloc_lar static __always_inline void *kmalloc(size_t size, gfp_t flags) { if (__builtin_constant_p(size)) { - if (size > PAGE_SIZE / 2) + if (size > PAGE_SIZE) return kmalloc_large(size, flags); if (!(flags & SLUB_DMA)) { @@ -219,7 +219,7 @@ void *kmem_cache_alloc_node(struct kmem_ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) { if (__builtin_constant_p(size) && - size <= PAGE_SIZE / 2 && !(flags & SLUB_DMA)) { + size <= PAGE_SIZE && !(flags & SLUB_DMA)) { struct kmem_cache *s = kmalloc_slab(size); if (!s) Index: linux-2.6/mm/slub.c =================================================================== --- linux-2.6.orig/mm/slub.c 2008-02-13 19:28:59.906913253 -0800 +++ linux-2.6/mm/slub.c 2008-02-13 19:49:23.881868199 -0800 @@ -2517,11 +2517,11 @@ EXPORT_SYMBOL(kmem_cache_destroy); * Kmalloc subsystem *******************************************************************/ -struct kmem_cache kmalloc_caches[PAGE_SHIFT] __cacheline_aligned; +struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned; EXPORT_SYMBOL(kmalloc_caches); #ifdef CONFIG_ZONE_DMA -static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT]; +static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1]; #endif static int __init setup_slub_min_order(char *str) @@ -2703,7 +2703,7 @@ void *__kmalloc(size_t size, gfp_t flags { struct kmem_cache *s; - if (unlikely(size > PAGE_SIZE / 2)) + if (unlikely(size > PAGE_SIZE)) return kmalloc_large(size, flags); s = get_slab(size, flags); @@ -2720,7 +2720,7 @@ void *__kmalloc_node(size_t size, gfp_t { struct kmem_cache *s; - if (unlikely(size > PAGE_SIZE / 2)) + if (unlikely(size > PAGE_SIZE)) return kmalloc_large(size, flags); s = get_slab(size, flags); @@ -3032,7 +3032,7 @@ void __init kmem_cache_init(void) caches++; } - for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) { + for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) { create_kmalloc_cache(&kmalloc_caches[i], "kmalloc", 1 << i, GFP_KERNEL); caches++; @@ -3059,7 +3059,7 @@ void __init kmem_cache_init(void) slab_state = UP; /* Provide the correct kmalloc names now that the caches are up */ - for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) + for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) kmalloc_caches[i]. name = kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); @@ -3088,7 +3088,7 @@ static int slab_unmergeable(struct kmem_ if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE)) return 1; - if ((s->flags & __PAGE_ALLOC_FALLBACK) + if ((s->flags & __PAGE_ALLOC_FALLBACK)) return 1; if (s->ctor) @@ -3252,7 +3252,7 @@ void *__kmalloc_track_caller(size_t size { struct kmem_cache *s; - if (unlikely(size > PAGE_SIZE / 2)) + if (unlikely(size > PAGE_SIZE)) return kmalloc_large(size, gfpflags); s = get_slab(size, gfpflags); @@ -3268,7 +3268,7 @@ void *__kmalloc_node_track_caller(size_t { struct kmem_cache *s; - if (unlikely(size > PAGE_SIZE / 2)) + if (unlikely(size > PAGE_SIZE)) return kmalloc_large(size, gfpflags); s = get_slab(size, gfpflags);