--- include/linux/slub_def.h | 1 mm/slub.c | 116 ++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 112 insertions(+), 5 deletions(-) Index: linux-2.6/mm/slub.c =================================================================== --- linux-2.6.orig/mm/slub.c 2008-02-28 21:39:45.283399423 -0800 +++ linux-2.6/mm/slub.c 2008-02-28 22:23:52.385927773 -0800 @@ -1022,6 +1022,61 @@ static inline unsigned long kmem_cache_f #define slub_debug 0 #endif +#ifdef SLUB_FASTPATH +/* Fastpath functions to store and retrieve data in a pointer */ +static inline unsigned long counter(void *x) +{ + return (unsigned long)(x) >> (BITS_PER_LONG / 2); +} + +static inline unsigned long offset(void *x) +{ + return (unsigned long)(x) & (1 << (BITS_PER_LONG / 2 - 1)); +} + +static inline void *make_freepointer(unsigned long offset, unsigned long counter) +{ + return (void *)(offset + (counter << (BITS_PER_LONG / 2))); +} + +static inline void *get_object(struct kmem_cache_cpu *c) +{ + return c->addr + offset(c->freelist); +} + +static inline void set_next_object(struct kmem_cache_cpu *c, void *object) +{ + struct kmem_cache_vfp x = { object - c->addr, c->free.counter + 1 }; + + c->free = x; +} +#else +static inline unsigned long counter(void *x) +{ + return 0; +} + +static inline unsigned long offset(void *x) +{ + return (unsigned long)(x); +} + +static inline void *make_freepointer(unsigned long offset, unsigned long counter) +{ + return (void *)offset; +} + +static inline void *get_object(struct kmem_cache_cpu *c) +{ + return c->freelist; +} + +static inline void set_next_object(struct kmem_cache_cpu *c, void *object) +{ + c->freelist = object; +} +#endif + static inline struct page *alloc_slab_page(gfp_t flags, int node, int order) { if (node == -1) @@ -1393,22 +1448,23 @@ static void deactivate_slab(struct kmem_ { struct page *page = c->page; int tail = 1; + void **freelist = get_next_object(c); - if (c->freelist) + if (freelist) stat(c, DEACTIVATE_REMOTE_FREES); /* * Merge cpu freelist into slab freelist. Typically we get here * because both freelists are empty. So this is unlikely * to occur. */ - while (unlikely(c->freelist)) { + while (unlikely(freelist)) { void **object; tail = 0; /* Hot objects. Put the slab first */ /* Retrieve object from cpu_freelist */ - object = c->freelist; - c->freelist = c->freelist[c->offset]; + object = freelist; + freelist = freelist[c->offset]; /* And put onto the regular freelist */ object[c->offset] = page->freelist; @@ -1416,6 +1472,7 @@ static void deactivate_slab(struct kmem_ page->inuse--; } c->page = NULL; + set_next_object(c, NULL); unfreeze_slab(s, page, tail); } @@ -1513,7 +1570,9 @@ load_freelist: goto debug; object = c->page->freelist; - c->freelist = object[c->offset]; + c->addr = page_address(c->page); + c->freelist = make_freepointer(object[c->offset] - c->addr, + count(c->freelist) + 1); c->page->inuse = slab_objects(s, c->page); c->page->freelist = NULL; c->node = page_to_nid(c->page); @@ -1581,6 +1640,35 @@ static __always_inline void *slab_alloc( struct kmem_cache_cpu *c; unsigned long flags; +#ifdef SLUB_FASTPATH + c = get_cpu_slab(s, raw_smp_processor_id()); + do { + void **old; + void **new; + + old = c->freelist; + if (unlikely(!old || !node_match(c, node))) { + object = __slab_alloc(s, gfpflags, node, addr, c); + break; + } + /* + * Whenever c->addr is changed, the counter version + * _must_ be incremented. This barrier insures we read + * free before c->addr wrt interrupts. + */ + barrier(); + object = c->addr + offset(old); + stat(c, ALLOC_FASTPATH); + + /* + * No need to increment the counter here, because only + * object free will lead to counter re-use. + */ + new = make_freepointer(object[c->offset] - c->base, + counter(old)); + } while (cmpxchg_local(&c->freelist, old, new) != old); + +#else local_irq_save(flags); c = get_cpu_slab(s, smp_processor_id()); if (unlikely(!c->freelist || !node_match(c, node))) @@ -1593,6 +1681,7 @@ static __always_inline void *slab_alloc( stat(c, ALLOC_FASTPATH); } local_irq_restore(flags); +#endif if (unlikely((gfpflags & __GFP_ZERO) && object)) memset(object, 0, c->objsize); @@ -1697,6 +1786,22 @@ static __always_inline void slab_free(st { void **object = (void *)x; struct kmem_cache_cpu *c; + +#ifdef SLUB_FASTPATH + void **old; + void **new; + + c = get_cpu_slab(s, raw_smp_processor_id()); + debug_check_no_locks_freed(object, s->objsize); + do { + old = c->freelist; + barrier(); + object[c->offset] = c->addr + offset(old); + stat(c, FREE_FASTPATH); + new = make_freepointer(offset(old),count(old) + 1); + } while (cmpxchg_local(&c->freelist, old, new) != old); + +#else unsigned long flags; local_irq_save(flags); @@ -1710,6 +1815,7 @@ static __always_inline void slab_free(st __slab_free(s, page, x, addr, c->offset); local_irq_restore(flags); +#endif } void kmem_cache_free(struct kmem_cache *s, void *x) Index: linux-2.6/include/linux/slub_def.h =================================================================== --- linux-2.6.orig/include/linux/slub_def.h 2008-02-28 21:49:58.506708895 -0800 +++ linux-2.6/include/linux/slub_def.h 2008-02-28 21:50:45.899338411 -0800 @@ -35,6 +35,7 @@ enum stat_item { struct kmem_cache_cpu { void **freelist; /* Pointer to first free per cpu object */ struct page *page; /* The slab from which we are allocating */ + void *addr; /* Address of the slab */ int node; /* The node of the page (or -1 for debug) */ unsigned int offset; /* Freepointer offset (in word units) */ unsigned int objsize; /* Size of an object (from kmem_cache) */