--- mm/slub.c | 173 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 173 insertions(+) Index: linux-2.6/mm/slub.c =================================================================== --- linux-2.6.orig/mm/slub.c 2008-03-14 03:57:12.000000000 -0700 +++ linux-2.6/mm/slub.c 2008-03-14 03:58:59.000000000 -0700 @@ -1573,7 +1573,14 @@ static void *__slab_alloc(struct kmem_ca { void **object; struct page *new; +#ifdef SLUB_FASTPATH + unsigned long flags; + local_irq_save(flags); +#ifdef CONFIG_PREEMPT + c = get_cpu_slab(s, raw_smp_processor_id()); +#endif +#endif if (!c->page) goto new_slab; @@ -1597,6 +1604,9 @@ load_freelist: unlock_out: slab_unlock(c->page); stat(c, ALLOC_SLOWPATH); +#ifdef SLUB_FASTPATH + local_irq_restore(flags); +#endif return object; another_slab: @@ -1628,6 +1638,9 @@ new_slab: c->page = new; goto load_freelist; } +#ifdef SLUB_FASTPATH + local_irq_restore(flags); +#endif return NULL; debug: if (!alloc_debug_processing(s, c->page, object, addr)) @@ -1654,6 +1667,90 @@ static __always_inline void *slab_alloc( { void **object; struct kmem_cache_cpu *c; + +/* + * The SLUB_FASTPATH path is provisional and is currently disabled if the + * kernel is compiled with preemption or if the arch does not support + * fast cmpxchg operations. There are a couple of coming changes that will + * simplify matters and allow preemption. Ultimately we may end up making + * SLUB_FASTPATH the default. + * + * 1. The introduction of the per cpu allocator will avoid array lookups + * through get_cpu_slab(). A special register can be used instead. + * + * 2. The introduction of per cpu atomic operations (cpu_ops) means that + * we can realize the logic here entirely with per cpu atomics. The + * per cpu atomic ops will take care of the preemption issues. + */ + +#ifdef SLUB_FASTPATH + void *old, *new, *result, *next_object; + unsigned long base; + + preempt_disable(); + c = get_cpu_slab(s, raw_smp_processor_id()); +fastpath: /* fastpath cmpxchg loop */ + old = c->freelist; + /* + * Whenever c->base is changed, the sequence number + * _must_ be incremented. This barrier insures we read + * version before c->base wrt interrupts. + */ + barrier(); + base = c->base; + if (unlikely(is_end(old) || !node_match(c, node))) + goto slowpath; + if (unlikely(get_high_half((unsigned long)old) == HALF_LONG_MASK)) + goto slowpath; + /* + * make_ptr on base should always return a valid pointer; + * insure base has not been changed by a nested interrupt by + * re-reading the freelist sequence number. It makes sure the + * base and the offset will generate a valid pointer. + */ + barrier(); + if (c->freelist != old) + goto fastpath; /* retry */ + object = make_ptr(base, old); + /* + * Need to increment the MSB counter here, because + * object[c->offset] use is racy. We can race against + * another slab_alloc fast path. + * Note that the object[c->offset] read may return garbage, but + * is insured to point to a valid address since pages are always + * reused in the page allocator. We know if the + * object[c->offset] read returned garbage because the sequence + * number is incremented each time the freelist is modified. + */ + next_object = object[c->offset]; + if (unlikely(!same_base(base, next_object))) + goto slowpath; + stat(c, ALLOC_FASTPATH); + new = make_version(old + HALF_LONG_MASK + 1, next_object); + result = cmpxchg_local(&c->freelist, old, new); +#ifdef CONFIG_DEBUG_VM + /* + * Just to be paranoid : warn if we detect that enough free or + * slow paths nested on top of us to get the counter to go + * half-way to overflow. That would be insane to do that much + * allocations/free in interrupt handers, but check it anyway. + */ + WARN_ON(result - old > -1UL >> 1); +#endif + if (result != old) + goto fastpath; /* retry */ + preempt_enable(); + goto got_object; +slowpath: + preempt_enable(); + /* + * __slab_alloc must make no assumption about the + * tests previously done by slab_alloc : we could be + * migrated to a different CPU. + */ + object = __slab_alloc(s, gfpflags, node, addr, c); +got_object: +#else unsigned long flags; local_irq_save(flags); @@ -1668,6 +1765,7 @@ static __always_inline void *slab_alloc( stat(c, ALLOC_FASTPATH); } local_irq_restore(flags); +#endif if (unlikely((gfpflags & __GFP_ZERO) && object)) memset(object, 0, c->objsize); @@ -1704,6 +1802,11 @@ static void __slab_free(struct kmem_cach void **object = (void *)x; struct kmem_cache_cpu *c; +#ifdef SLUB_FASTPATH + unsigned long flags; + + local_irq_save(flags); +#endif c = get_cpu_slab(s, raw_smp_processor_id()); stat(c, FREE_SLOWPATH); slab_lock(page); @@ -1735,6 +1838,9 @@ checks_ok: out_unlock: slab_unlock(page); +#ifdef SLUB_FASTPATH + local_irq_restore(flags); +#endif return; slab_empty: @@ -1748,6 +1854,9 @@ slab_empty: slab_unlock(page); stat(c, FREE_SLAB); discard_slab(s, page); +#ifdef SLUB_FASTPATH + local_irq_restore(flags); +#endif return; debug: @@ -1772,6 +1881,69 @@ static __always_inline void slab_free(st { void **object = (void *)x; struct kmem_cache_cpu *c; + +#ifdef SLUB_FASTPATH + void *old, *new, *result; + unsigned long base; + + preempt_disable(); + c = get_cpu_slab(s, raw_smp_processor_id()); + debug_check_no_locks_freed(object, c->objsize); + while (1) { + old = c->freelist; + /* + * If the compiler would reorder the retrieval of c->page and + * c->base to come before c->freelist then an interrupt + * could change the cpu slab before we retrieve c->version. + * We could be matching on a page no longer active and put the + * object onto the freelist of the wrong slab. + * + * On the other hand: If we already have the version + * then any change of cpu_slab will cause the cmpxchg to fail + * since the freelist pointers are unique per slab. + */ + barrier(); + base = c->base; + if (unlikely(get_high_half((unsigned long)old) == HALF_LONG_MASK + || !same_base(base, object) + || page != c->page || c->node < 0)) { + preempt_enable(); + /* + * __slab_free must make no assumption about the + * tests previously done by slab_free : we could be + * migrated to a different CPU. + */ + __slab_free(s, page, x, addr, c->offset); + break; + } + /* + * It's ok to overwrite the content of object[c->offset] because + * we own the object. This object won't appear in the freelist + * until our cmpxchg_local succeeds. Therefore, no other part of + * the slub slow path can use this object. + * The result of make_ptr does not have to be dereferenced + * until the cmpxchg succeeds. We don't care if base and old are + * out-of-sync. + */ + object[c->offset] = make_ptr(base, old); + stat(c, FREE_FASTPATH); + new = make_version(old + HALF_LONG_MASK + 1, object); + result = cmpxchg_local(&c->freelist, old, new); +#ifdef CONFIG_DEBUG_VM + /* + * Just to be paranoid : warn if we detect that enough free or + * slow paths nested on top of us to get the counter to go + * half-way to overflow. That would be insane to do that much + * allocations/free in interrupt handers, but check it anyway. + */ + WARN_ON(result - old > -1UL >> 1); +#endif + if (result == old) { + preempt_enable(); + break; + } + } +#else unsigned long flags; local_irq_save(flags); @@ -1785,6 +1957,7 @@ static __always_inline void slab_free(st __slab_free(s, page, x, addr, c->offset); local_irq_restore(flags); +#endif } void kmem_cache_free(struct kmem_cache *s, void *x)