--- include/asm-generic/slub.h | 83 ++++++++++++++++++++++++++ include/asm-generic/slub_cmpxchg.h | 65 ++++++++++++++++++++ include/asm-generic/slub_cmpxchg_local.h | 75 +++++++++++++++++++++++ mm/slub.c | 98 ++++--------------------------- 4 files changed, 236 insertions(+), 85 deletions(-) Index: linux-2.6.23-rc1/include/asm-generic/slub_cmpxchg_local.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.23-rc1/include/asm-generic/slub_cmpxchg_local.h 2007-07-27 13:12:34.000000000 -0700 @@ -0,0 +1,75 @@ +/* + * cmpxchg_local implementation for SLUB + * + * This allows interrupt less hotpaths. + * + * We disable preempt to be able to use cmpxchg without the lock + * prefix. + * + * (C) 2007 Silicon Graphics. Inc. + * Christoph Lameter + */ + +static void __always_inline *slab_alloc(struct kmem_cache *s, + gfp_t gfpflags, int node, void *addr) +{ + struct page *page; + void **object; + + preempt_disable(); +redo: + page = s->cpu_slab[smp_processor_id()]; + + if (unlikely(!page)) + goto slow; + + object = page->lockless_freelist; + + if (unlikely(!object)) + goto slow; + + if (unlikely(node != -1 && page_to_nid(page) != node)) + goto slow; + + if (cmpxchg_local(&page->lockless_freelist, object, + object[page->offset]) != object) + goto redo; + preempt_enable(); + + if (unlikely((gfpflags & __GFP_ZERO))) + memset(object, 0, s->objsize); + + return object; + +slow: + preempt_enable(); + return __slab_alloc(s, gfpflags, node, addr); +} + +static void __always_inline slab_free(struct kmem_cache *s, + struct page *page, void *x, void *addr) +{ + void **object = (void *)x; + void **ll; + + preempt_disable(); +redo: + if (unlikely(page != s->cpu_slab[smp_processor_id()])) + goto slow; + + if (unlikely(SlabDebug(page))) + goto slow; + + ll = page->lockless_freelist; + object[page->offset] = ll; + if (cmpxchg_local(&page->lockless_freelist, ll, object) != ll) + goto redo; + + preempt_enable(); + return; + +slow: + preempt_enable(); + __slab_free(s, page, x, addr); +} + Index: linux-2.6.23-rc1/mm/slub.c =================================================================== --- linux-2.6.23-rc1.orig/mm/slub.c 2007-07-27 13:09:06.000000000 -0700 +++ linux-2.6.23-rc1/mm/slub.c 2007-07-27 13:24:18.000000000 -0700 @@ -1555,63 +1555,6 @@ debug: } /* - * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) - * have the fastpath folded into their functions. So no function call - * overhead for requests that can be satisfied on the fastpath. - * - * The fastpath works by first checking if the lockless freelist can be used. - * If not then __slab_alloc is called for slow processing. - * - * Otherwise we can simply pick the next object from the lockless free list. - */ -static void __always_inline *slab_alloc(struct kmem_cache *s, - gfp_t gfpflags, int node, void *addr) -{ - struct page *page; - void **object; - unsigned long flags; - - local_irq_save(flags); - page = s->cpu_slab[smp_processor_id()]; - - if (unlikely(!page)) - goto slow; - - object = page->lockless_freelist; - - if (unlikely(!object)) - goto slow; - - if (unlikely(node != -1 && page_to_nid(page) != node)) - goto slow; - - page->lockless_freelist = object[page->offset]; - local_irq_restore(flags); - - if (unlikely((gfpflags & __GFP_ZERO) && object)) - memset(object, 0, s->objsize); - - return object; -slow: - local_irq_restore(flags); - return __slab_alloc(s, gfpflags, node, addr); -} - -void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) -{ - return slab_alloc(s, gfpflags, -1, __builtin_return_address(0)); -} -EXPORT_SYMBOL(kmem_cache_alloc); - -#ifdef CONFIG_NUMA -void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) -{ - return slab_alloc(s, gfpflags, node, __builtin_return_address(0)); -} -EXPORT_SYMBOL(kmem_cache_alloc_node); -#endif - -/* * Slow patch handling. This may still be called frequently since objects * have a longer lifetime than the cpu slabs in most processing loads. * @@ -1674,38 +1617,23 @@ debug: } /* - * Fastpath with forced inlining to produce a kfree and kmem_cache_free that - * can perform fastpath freeing without additional function calls. - * - * The fastpath is only possible if we are freeing to the current cpu slab - * of this processor. This typically the case if we have just allocated - * the item before. - * - * If fastpath is not possible then fall back to __slab_free where we deal - * with all sorts of special processing. + * Include the arch specific hotpath definitions */ -static void __always_inline slab_free(struct kmem_cache *s, - struct page *page, void *x, void *addr) -{ - void **object = (void *)x; - unsigned long flags; - - local_irq_save(flags); - if (unlikely(page != s->cpu_slab[smp_processor_id()])) - goto slow; - - if (unlikely(SlabDebug(page))) - goto slow; +#include - object[page->offset] = page->lockless_freelist; - page->lockless_freelist = object; - local_irq_restore(flags); - return; +void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) +{ + return slab_alloc(s, gfpflags, -1, __builtin_return_address(0)); +} +EXPORT_SYMBOL(kmem_cache_alloc); -slow: - local_irq_restore(flags); - __slab_free(s, page, x, addr); +#ifdef CONFIG_NUMA +void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) +{ + return slab_alloc(s, gfpflags, node, __builtin_return_address(0)); } +EXPORT_SYMBOL(kmem_cache_alloc_node); +#endif void kmem_cache_free(struct kmem_cache *s, void *x) { Index: linux-2.6.23-rc1/include/asm-generic/slub_cmpxchg.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.23-rc1/include/asm-generic/slub_cmpxchg.h 2007-07-27 13:15:25.000000000 -0700 @@ -0,0 +1,65 @@ +/* + * cmpxchg implementation of hotpath code for SLUB + * + * This allows interruptless hotpaths. + * + * (C) 2007 Silicon Graphics. Inc. + * Christoph Lameter + */ + +static void __always_inline *slab_alloc(struct kmem_cache *s, + gfp_t gfpflags, int node, void *addr) +{ + struct page *page; + void **object; + +redo: + page = s->cpu_slab[raw_smp_processor_id()]; + + if (unlikely(!page)) + goto slow; + + object = page->lockless_freelist; + + if (unlikely(!object)) + goto slow; + + if (unlikely(node != -1 && page_to_nid(page) != node)) + goto slow; + + if (cmpxchg_local(&page->lockless_freelist, object, + object[page->offset]) != object) + goto redo; + + if (unlikely((gfpflags & __GFP_ZERO))) + memset(object, 0, s->objsize); + + return object; + +slow: + return __slab_alloc(s, gfpflags, node, addr); +} + +static void __always_inline slab_free(struct kmem_cache *s, + struct page *page, void *x, void *addr) +{ + void **object = (void *)x; + void **ll; + +redo: + if (unlikely(page != s->cpu_slab[raw_smp_processor_id()])) + goto slow; + + if (unlikely(SlabDebug(page))) + goto slow; + + ll = page->lockless_freelist; + object[page->offset] = ll; + if (cmpxchg_local(&page->lockless_freelist, ll, object) != ll) + goto redo; + + return; +slow: + __slab_free(s, page, x, addr); +} + Index: linux-2.6.23-rc1/include/asm-generic/slub.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.23-rc1/include/asm-generic/slub.h 2007-07-27 13:24:06.000000000 -0700 @@ -0,0 +1,83 @@ +/* + * Generic fastpath implementations for SLUB + * + * (C) 2007 Silicon Graphics, Inc. + * Christoph Lameter + * + * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) + * have the fastpath folded into their functions. So no function call + * overhead for requests that can be satisfied on the fastpath. + * + * The fastpath works by first checking if the lockless freelist can be used. + * If not then __slab_alloc is called for slow processing. + * + * Otherwise we can simply pick the next object from the lockless free list. + */ +static void __always_inline *slab_alloc(struct kmem_cache *s, + gfp_t gfpflags, int node, void *addr) +{ + struct page *page; + void **object; + unsigned long flags; + + local_irq_save(flags); + page = s->cpu_slab[smp_processor_id()]; + + if (unlikely(!page)) + goto slow; + + object = page->lockless_freelist; + + if (unlikely(!object)) + goto slow; + + if (unlikely(node != -1 && page_to_nid(page) != node)) + goto slow; + + page->lockless_freelist = object[page->offset]; + local_irq_restore(flags); + + if (unlikely((gfpflags & __GFP_ZERO) && object)) + memset(object, 0, s->objsize); + + return object; +slow: + local_irq_restore(flags); + return __slab_alloc(s, gfpflags, node, addr); +} + +/* + * Fastpath with forced inlining to produce a kfree and kmem_cache_free that + * can perform fastpath freeing without additional function calls. + * + * The fastpath is only possible if we are freeing to the current cpu slab + * of this processor. This typically the case if we have just allocated + * the item before. + * + * If fastpath is not possible then fall back to __slab_free where we deal + * with all sorts of special processing. + */ +static void __always_inline slab_free(struct kmem_cache *s, + struct page *page, void *x, void *addr) +{ + void **object = (void *)x; + unsigned long flags; + + local_irq_save(flags); + if (unlikely(page != s->cpu_slab[smp_processor_id()])) + goto slow; + + if (unlikely(SlabDebug(page))) + goto slow; + + object[page->offset] = page->lockless_freelist; + page->lockless_freelist = object; + local_irq_restore(flags); + return; + +slow: + local_irq_restore(flags); + __slab_free(s, page, x, addr); +} + +