--- include/linux/bounds.h | 1 include/linux/slub_def.h | 16 ++++++- kernel/bounds.c | 4 + mm/slub.c | 106 ++++++++++++++++++++++++++++++++++++----------- 4 files changed, 103 insertions(+), 24 deletions(-) Index: linux-2.6/include/linux/slub_def.h =================================================================== --- linux-2.6.orig/include/linux/slub_def.h 2009-10-02 16:56:06.000000000 -0500 +++ linux-2.6/include/linux/slub_def.h 2009-10-02 17:26:44.000000000 -0500 @@ -34,9 +34,23 @@ enum stat_item { ORDER_FALLBACK, /* Number of times fallback was necessary */ NR_SLUB_STAT_ITEMS }; +#define SLUB_TID_SHIFT ((BITS_PER_LONG - CPU_SHIFT) / 2) +#define SLUB_OFFSET_SHIFT (BITS_PER_LONG - SLUB_TID_SHIFT - CPU_SHIFT) +union slub_tid { + unsigned long w; + struct { + unsigned long cpu : CPU_SHIFT; + unsigned long tid : SLUB_TID_SHIFT; + unsigned long offset: SLUB_OFFSET_SHIFT; + }; +}; + +#define TID_EMPTY ((1 << SLUB_OFFSET_SHIFT) - 1) + struct kmem_cache_cpu { - void **freelist; /* Pointer to first free per cpu object */ + union slub_tid tid; struct page *page; /* The slab from which we are allocating */ + void **base; /* The start of the page */ int node; /* The node of the page (or -1 for debug) */ #ifdef CONFIG_SLUB_STATS unsigned stat[NR_SLUB_STAT_ITEMS]; Index: linux-2.6/mm/slub.c =================================================================== --- linux-2.6.orig/mm/slub.c 2009-10-02 16:56:06.000000000 -0500 +++ linux-2.6/mm/slub.c 2009-10-02 17:25:12.000000000 -0500 @@ -1450,6 +1450,7 @@ static void deactivate_slab(struct kmem_ { struct page *page = c->page; int tail = 1; + union slub_tid old; if (page->freelist) stat(s, DEACTIVATE_REMOTE_FREES); @@ -1458,19 +1459,38 @@ static void deactivate_slab(struct kmem_ * because both freelists are empty. So this is unlikely * to occur. */ - while (unlikely(c->freelist)) { - void **object; - tail = 0; /* Hot objects. Put the slab first */ + c->page = NULL; - /* Retrieve object from cpu_freelist */ - object = c->freelist; - c->freelist = get_freepointer(s, c->freelist); - - /* And put onto the regular freelist */ - set_freepointer(s, object, page->freelist); - page->freelist = object; - page->inuse--; + /* The above stops any new activities of the slab_free + * lockless hotpath. Any slab_free will now end up in the + * slow path. + * + * We need to do a transaction to invalidate any ongoing + * activity too. + */ + old = c->tid; + barrier(); + c->tid.tid = old.tid + 1; + c->tid.offset = TID_EMPTY; + + if (old.offset != TID_EMPTY) { + void **freelist = c->base + old.offset * 8; + + while (unlikely(freelist)) { + void **object; + + tail = 0; /* Hot objects. Put the slab first */ + + /* Retrieve object from cpu_freelist */ + object = freelist; + freelist = get_freepointer(s, freelist); + + /* And put onto the regular freelist */ + set_freepointer(s, object, page->freelist); + page->freelist = object; + page->inuse--; + } } c->page = NULL; unfreeze_slab(s, page, tail); @@ -1607,10 +1627,14 @@ static void *__slab_alloc(struct kmem_ca { void **object; struct page *page = __this_cpu_read(s->cpu_slab->page); + void *base; + unsigned long flags; + union slub_tid old, new; /* We handle __GFP_ZERO in the caller */ gfpflags &= ~__GFP_ZERO; + local_irq_save(flags); if (!page) goto new_slab; @@ -1627,12 +1651,19 @@ load_freelist: if (unlikely(SLABDEBUG && PageSlubDebug(page))) goto debug; - __this_cpu_write(s->cpu_slab->freelist, get_freepointer(s, object)); + base = page_address(page); + old = this_cpu_read(s->cpu_slab->tid); + new.cpu = smp_processor_id(); + new.tid = old.tid + 1; + new.offset = get_freepointer(s, object) - base; + __this_cpu_write(s->cpu_slab->tid, new); page->inuse = page->objects; page->freelist = NULL; + __this_cpu_write(s->cpu_slab->base, base); __this_cpu_write(s->cpu_slab->node, page_to_nid(page)); unlock_out: slab_unlock(page); + local_irq_restore(flags); stat(s, ALLOC_SLOWPATH); return object; @@ -1664,6 +1695,7 @@ new_slab: __this_cpu_write(s->cpu_slab->page, page); goto load_freelist; } + local_irq_restore(flags); if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) slab_out_of_memory(s, gfpflags, node); return NULL; @@ -1691,7 +1723,7 @@ static __always_inline void *slab_alloc( gfp_t gfpflags, int node, unsigned long addr) { void **object; - unsigned long flags; + union slub_tid old; gfpflags &= gfp_allowed_mask; @@ -1701,18 +1733,27 @@ static __always_inline void *slab_alloc( if (should_failslab(s->objsize, gfpflags)) return NULL; - local_irq_save(flags); - object = __this_cpu_read(s->cpu_slab->freelist); - if (unlikely(!object || !node_match(s, node))) +redo: + old = __this_cpu_read(s->cpu_slab->tid); + barrier(); + + if (unlikely(old.offset == TID_EMPTY || !node_match(s, node))) object = __slab_alloc(s, gfpflags, node, addr); else { - __this_cpu_write(s->cpu_slab->freelist, - get_freepointer(s, object)); + union slub_tid new; + + object = __this_cpu_read(s->cpu_slab->base) + old.offset * 8; + new.cpu = old.cpu; + new.tid = old.tid + 1; + new.offset = (void **)get_freepointer(s, object) + - __this_cpu_read(s->cpu_slab->base); + if (this_cpu_cmpxchg(s->cpu_slab->tid.w, old.w, new.w) != old.w) + goto redo; + stat(s, ALLOC_FASTPATH); } - local_irq_restore(flags); if (unlikely((gfpflags & __GFP_ZERO) && object)) memset(object, 0, s->objsize); @@ -1777,8 +1818,10 @@ static void __slab_free(struct kmem_cach { void *prior; void **object = (void *)x; + unsigned long flags; stat(s, FREE_SLOWPATH); + local_irq_save(flags); slab_lock(page); if (unlikely(SLABDEBUG && PageSlubDebug(page))) @@ -1809,6 +1852,7 @@ checks_ok: out_unlock: slab_unlock(page); + local_irq_restore(flags); return; slab_empty: @@ -1820,6 +1864,7 @@ slab_empty: stat(s, FREE_REMOVE_PARTIAL); } slab_unlock(page); + local_irq_restore(flags); stat(s, FREE_SLAB); discard_slab(s, page); return; @@ -1845,6 +1890,7 @@ static __always_inline void slab_free(st struct page *page, void *x, unsigned long addr) { void **object = (void *)x; + union slub_tid old; unsigned long flags; kmemleak_free_recursive(x, s->flags); @@ -1854,15 +1900,29 @@ static __always_inline void slab_free(st if (!(s->flags & SLAB_DEBUG_OBJECTS)) debug_check_no_obj_freed(object, s->objsize); +redo: + old = this_cpu_read(s->cpu_slab->tid); + barrier(); if (likely(page == __this_cpu_read(s->cpu_slab->page) && __this_cpu_read(s->cpu_slab->node) >= 0)) { - set_freepointer(s, object, __this_cpu_read(s->cpu_slab->freelist)); - __this_cpu_write(s->cpu_slab->freelist, object); + union slub_tid new; + + new.cpu = old.cpu; + new.tid = old.tid + 1; + + if (old.offset != 0xffff) + set_freepointer(s, object, + __this_cpu_read(s->cpu_slab->base) + old.offset * 8); + else + set_freepointer(s, object, NULL); + + new.offset = object - __this_cpu_read(s->cpu_slab->base); + if (this_cpu_cmpxchg(s->cpu_slab->tid.w, old.w, new.w) != old.w) + goto redo; + stat(s, FREE_FASTPATH); } else __slab_free(s, page, x, addr); - - local_irq_restore(flags); } void kmem_cache_free(struct kmem_cache *s, void *x) Index: linux-2.6/include/linux/bounds.h =================================================================== --- linux-2.6.orig/include/linux/bounds.h 2009-10-02 17:13:32.000000000 -0500 +++ linux-2.6/include/linux/bounds.h 2009-10-02 17:21:12.000000000 -0500 @@ -9,5 +9,6 @@ #define NR_PAGEFLAGS 23 /* __NR_PAGEFLAGS # */ #define MAX_NR_ZONES 3 /* __MAX_NR_ZONES # */ +#define CPU_SHIFT 3 /* get_order(NR_CPUS << PAGE_SHIFT) # */ #endif Index: linux-2.6/kernel/bounds.c =================================================================== --- linux-2.6.orig/kernel/bounds.c 2009-10-02 17:14:08.000000000 -0500 +++ linux-2.6/kernel/bounds.c 2009-10-02 17:21:09.000000000 -0500 @@ -9,11 +9,15 @@ #include #include #include +#include +#include +#include void foo(void) { /* The enum constants to put into include/linux/bounds.h */ DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); + DEFINE(CPU_SHIFT, get_order(NR_CPUS << PAGE_SHIFT)); /* End of constants */ }