Support concurrent local and remote frees and allocs on a slab. About 5-7% performance gain. Or am I also seeing runtime variations? If there was any performance benefit left for SLAB then it will be gone with this patch since we eliminate the overhead of the atomics. What we do is use the last free field in the page struct (the private field that was freed up through the compound page flag rework) to setup a separate per cpu freelist. From that one we can allocate without taking the slab lock because we checkout the complete list of free objects when we first touch the slab and then mark the slab as completely allocated. If we have a cpu_freelist then we can also free to that list if we run on that processor without taking the slab lock. This allows even concurrent allocations and frees on the same slab using two mutually exclusive freelists. Allocs and frees from the processor owning the per cpu slab will bypass the slab lock using the cpu_freelist. Remove frees will use the slab lock to synchronize and use the freelist for marking items as free. So local allocs and frees may run concurrently with remote frees without synchronization. If the allocator is running out of its per cpu freelist then it will consult the per slab freelist (which requires the slab lock) and reload the cpu_freelist if there are objects that were remotely freed. Amazing how this came together. Signed-off-by: Christoph Lameter --- include/linux/mm_types.h | 5 ++- mm/slub.c | 67 ++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 59 insertions(+), 13 deletions(-) Index: slub/include/linux/mm_types.h =================================================================== --- slub.orig/include/linux/mm_types.h 2007-05-04 20:09:26.000000000 -0700 +++ slub/include/linux/mm_types.h 2007-05-04 20:09:33.000000000 -0700 @@ -50,9 +50,12 @@ struct page { spinlock_t ptl; #endif struct { /* SLUB uses */ - struct page *first_page; /* Compound pages */ + void **cpu_freelist; /* Per cpu freelist */ struct kmem_cache *slab; /* Pointer to slab */ }; + struct { + struct page *first_page; /* Compound pages */ + }; }; union { pgoff_t index; /* Our offset within mapping. */ Index: slub/mm/slub.c =================================================================== --- slub.orig/mm/slub.c 2007-05-04 20:09:26.000000000 -0700 +++ slub/mm/slub.c 2007-05-04 20:14:04.000000000 -0700 @@ -81,10 +81,13 @@ * PageActive The slab is used as a cpu cache. Allocations * may be performed from the slab. The slab is not * on any slab list and cannot be moved onto one. + * The cpu slab may have a cpu_freelist in order + * to optimize allocations and frees on a particular + * cpu. * * PageError Slab requires special handling due to debug * options set. This moves slab handling out of - * the fast path. + * the fast path and disables cpu_freelists. */ /* @@ -857,6 +860,7 @@ static struct page *new_slab(struct kmem set_freepointer(s, last, NULL); page->freelist = start; + page->cpu_freelist = NULL; page->inuse = 0; out: if (flags & __GFP_WAIT) @@ -1121,6 +1125,23 @@ static void putback_slab(struct kmem_cac */ static void deactivate_slab(struct kmem_cache *s, struct page *page, int cpu) { + /* + * Merge cpu freelist into freelist. Typically we get here + * because both freelists are empty. So this is unlikely + * to occur. + */ + while (unlikely(page->cpu_freelist)) { + void **object; + + /* Retrieve object from cpu_freelist */ + object = page->cpu_freelist; + page->cpu_freelist = page->cpu_freelist[page->offset]; + + /* And put onto the regular freelist */ + object[page->offset] = page->freelist; + page->freelist = object; + page->inuse--; + } s->cpu_slab[cpu] = NULL; ClearPageActive(page); @@ -1190,22 +1211,33 @@ static void *slab_alloc(struct kmem_cach local_irq_save(flags); cpu = smp_processor_id(); page = s->cpu_slab[cpu]; - if (!page) + if (unlikely(!page)) goto new_slab; - slab_lock(page); - if (unlikely(node != -1 && page_to_nid(page) != node)) + if (unlikely(node != -1 && page_to_nid(page) != node)) { + slab_lock(page); goto another_slab; + } + + if (likely(page->cpu_freelist)) { + object = page->cpu_freelist; + page->cpu_freelist = object[page->offset]; + local_irq_restore(flags); + return object; + } + + slab_lock(page); redo: - object = page->freelist; - if (unlikely(!object)) + if (!page->freelist) goto another_slab; - if (unlikely(PageError(page))) + if (PageError(page)) goto debug; -have_object: - page->inuse++; - page->freelist = object[page->offset]; + /* Reload the cpu freelist while allocating the next object */ + object = page->freelist; + page->cpu_freelist = object[page->offset]; + page->freelist = NULL; + page->inuse = s->objects; slab_unlock(page); local_irq_restore(flags); return object; @@ -1215,7 +1247,7 @@ another_slab: new_slab: page = get_partial(s, gfpflags, node); - if (likely(page)) { + if (page) { have_slab: s->cpu_slab[cpu] = page; SetPageActive(page); @@ -1251,6 +1283,7 @@ have_slab: local_irq_restore(flags); return NULL; debug: + object = page->freelist; if (!alloc_object_checks(s, page, object)) goto another_slab; if (s->flags & SLAB_STORE_USER) @@ -1261,8 +1294,12 @@ debug: page->freelist); dump_stack(); } + page->freelist = object[page->offset]; + page->inuse++; init_object(s, object, 1); - goto have_object; + slab_unlock(page); + local_irq_restore(flags); + return object; } void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) @@ -1293,6 +1330,12 @@ static void slab_free(struct kmem_cache unsigned long flags; local_irq_save(flags); + if (page == s->cpu_slab[smp_processor_id()] && !PageError(page)) { + object[page->offset] = page->cpu_freelist; + page->cpu_freelist = object; + local_irq_restore(flags); + return; + } slab_lock(page); if (unlikely(PageError(page)))