Index: linux-2.6/mm/slub.c =================================================================== --- linux-2.6.orig/mm/slub.c 2007-07-02 10:33:39.000000000 -0700 +++ linux-2.6/mm/slub.c 2007-07-02 17:03:16.000000000 -0700 @@ -99,7 +99,7 @@ * the fast path and disables lockless freelists. */ -#define LOCKLESS_OFF (void *)16 +#define LOCKLESS_OFF (void **)16 #ifdef CONFIG_SLUB_DEBUG #define SLABDEBUG (1 << PG_error) @@ -1320,7 +1320,7 @@ /* * Careful with the lockless freelist. Allocations occur without * disabling interrupts and preemption. So we need to disable - * the use of the lockess freelist with an atomic operation before + * the use of the lockless freelist in an atomic way before * we start merging objects back into the slab * * Clearing s->cpu_slab should occur as early as possible in order @@ -1329,17 +1329,20 @@ * freelist is empty. */ s->cpu_slab[cpu] = NULL; + + /* + * Optimize the case where we do not have to dirty the + * page struct. + */ if (likely(!page->lockless_freelist)) { unfreeze_slab(s, page); return; } - lockless_freelist = xchg(&page->lockless_freelist, NULL); + lockless_freelist = xchg(&page->lockless_freelist, LOCKLESS_OFF); /* - * Merge cpu freelist into freelist. Typically we get here - * because both freelists are empty. So this is unlikely - * to occur. + * Merge cpu freelist into freelist. */ while (lockless_freelist) { void **object; @@ -1524,7 +1527,8 @@ redo: page = s->cpu_slab[raw_smp_processor_id()]; - if (unlikely(!page || !page->lockless_freelist || + if (unlikely(!page || + page->lockless_freelist <= LOCKLESS_OFF || (node != -1 && page_to_nid(page) != node))) object = __slab_alloc(s, gfpflags, node, addr); @@ -1565,7 +1569,9 @@ { void *prior; void **object = (void *)x; + unsigned long flags; + local_irq_save(flags); slab_lock(page); if (unlikely(SlabDebug(page))) @@ -1591,6 +1597,7 @@ out_unlock: slab_unlock(page); + local_irq_restore(flags); return; slab_empty: @@ -1602,6 +1609,7 @@ slab_unlock(page); discard_slab(s, page); + local_irq_restore(flags); return; debug: @@ -1625,18 +1633,22 @@ struct page *page, void *x, void *addr) { void **object = (void *)x; - unsigned long flags; + void **ll; - local_irq_save(flags); - if (likely(SlabFrozen(page) && - page == s->cpu_slab[smp_processor_id()] && - !SlabDebug(page))) { - object[page->offset] = page->lockless_freelist; - page->lockless_freelist = object; +redo: + ll = page->lockless_freelist; + if (likely(ll != LOCKLESS_OFF && + /* + * Is this really needed? We could just free into + * another cpuslab if it has been setup since it + * will do the counting for us then? + */ + page == s->cpu_slab[smp_processor_id()])) { + object[page->offset] = ll; + if (cmpxchg(&page->lockless_freelist, ll, object) != ll) + goto redo; } else __slab_free(s, page, x, addr); - - local_irq_restore(flags); } void kmem_cache_free(struct kmem_cache *s, void *x)