--- mm/slub.c | 178 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 177 insertions(+), 1 deletion(-) Index: linux-2.6/mm/slub.c =================================================================== --- linux-2.6.orig/mm/slub.c 2008-03-14 03:52:26.000000000 -0700 +++ linux-2.6/mm/slub.c 2008-03-14 04:40:00.000000000 -0700 @@ -301,6 +301,64 @@ static inline unsigned long slab_objects return s->max_objects; } +#if defined(CONFIG_FAST_CMPXCHG_LOCAL) && defined(CONFIG_SMP) +/* + * For the fastpath we version the pointers. This occurs using the lower + * bits of the pointers. Versioning is impossible if we have exhausted all + * lower bits. In that case we fall back to the slow paths. + * + * The minimum pointer alignment is 8 bytes which leaves at least the + * lower 3 bits for versioning. + */ +#define SLUB_FASTPATH +#define VERSIONING_BITS 3 +#define VERSION_MASK ((1 << VERSIONING_BITS) -1) +#define LAST_VERSION VERSION_MASK + +static inline int get_version(void *p) +{ + return (unsigned long)p & VERSION_MASK; +} + +static inline void *unversion(void *p) +{ + return (void *)((unsigned long)p & ~VERSION_MASK); +} + +static inline void *set_version(void *p, int version) +{ + return p + version; +} + +static inline int next_version(int version) +{ + return (version + 1) & VERSION_MASK; +} + +static inline void *set_next_version(void *p, int version) +{ + return p + next_version(version); +} + +/* + * Setup the next freelist pointer increasing the version number + * if necessary. + */ +static inline void set_freelist(struct kmem_cache_cpu *c, void **p) +{ + void *freelist = c->freelist; + int version = get_version(freelist); + + if (version < LAST_VERSION || !in_interrupt()) + version = next_version(version); + c->freelist = set_version(p, version); +} + +static inline void *get_freelist(struct kmem_cache_cpu *c) +{ + return unversion(c->freelist); +} +#else static inline void **get_freelist(struct kmem_cache_cpu *c) { return c->freelist; @@ -310,7 +368,7 @@ static inline void set_freelist(struct k { c->freelist = p; } - +#endif /* Verify that a pointer has an address that is valid within a slab page */ static inline int check_valid_pointer(struct kmem_cache *s, struct page *page, const void *object) @@ -1518,7 +1576,14 @@ static void *__slab_alloc(struct kmem_ca { void **object; struct page *new; +#ifdef SLUB_FASTPATH + unsigned long flags; + local_irq_save(flags); +#ifdef CONFIG_PREEMPT + c = get_cpu_slab(s, raw_smp_processor_id()); +#endif +#endif if (!c->page) goto new_slab; @@ -1542,6 +1607,9 @@ load_freelist: unlock_out: slab_unlock(c->page); stat(c, ALLOC_SLOWPATH); +#ifdef SLUB_FASTPATH + local_irq_restore(flags); +#endif return object; another_slab: @@ -1573,6 +1641,9 @@ new_slab: c->page = new; goto load_freelist; } +#ifdef SLUB_FASTPATH + local_irq_restore(flags); +#endif return NULL; debug: if (!alloc_debug_processing(s, c->page, object, addr)) @@ -1599,6 +1670,56 @@ static __always_inline void *slab_alloc( { void **object; struct kmem_cache_cpu *c; + +/* + * The SLUB_FASTPATH path is provisional and is currently disabled if the + * kernel is compiled with preemption or if the arch does not support + * fast cmpxchg operations. There are a couple of coming changes that will + * simplify matters and allow preemption. Ultimately we may end up making + * SLUB_FASTPATH the default. + * + * 1. The introduction of the per cpu allocator will avoid array lookups + * through get_cpu_slab(). A special register can be used instead. + * + * 2. The introduction of per cpu atomic operations (cpu_ops) means that + * we can realize the logic here entirely with per cpu atomics. The + * per cpu atomic ops will take care of the preemption issues. + */ + +#ifdef SLUB_FASTPATH + void *freelist; + int version; + + preempt_disable(); + c = get_cpu_slab(s, raw_smp_processor_id()); + do { + freelist = c->freelist; + object = unversion(freelist); + version = get_version(freelist); + if (unlikely(!object || !node_match(c, node) + || version == LAST_VERSION)) { + preempt_enable(); + object = __slab_alloc(s, gfpflags, node, addr, c); + goto got_object; + } + stat(c, ALLOC_FASTPATH); + /* + * Note that the cmpxchg does a speculative read to + * the freepointer of the object. That pointer may have + * been overwritten with some data if a concurrent alloc + * grabbed the object. The page may even have been freed. + * + * The cmpxchg_local will fail because of the versioning + * if any of this occurs. However, this means that + * environments that do not allow read of a page after + * it was freed (some virtualization and debugging tools) + * are not compatible with fastpath processing. + */ + } while (cmpxchg_local(&c->freelist, freelist, + set_next_version(object[c->offset], version)) != freelist); + preempt_enable(); +got_object: +#else unsigned long flags; local_irq_save(flags); @@ -1613,6 +1734,7 @@ static __always_inline void *slab_alloc( stat(c, ALLOC_FASTPATH); } local_irq_restore(flags); +#endif if (unlikely((gfpflags & __GFP_ZERO) && object)) memset(object, 0, c->objsize); @@ -1649,6 +1771,11 @@ static void __slab_free(struct kmem_cach void **object = (void *)x; struct kmem_cache_cpu *c; +#ifdef SLUB_FASTPATH + unsigned long flags; + + local_irq_save(flags); +#endif c = get_cpu_slab(s, raw_smp_processor_id()); stat(c, FREE_SLOWPATH); slab_lock(page); @@ -1680,6 +1807,9 @@ checks_ok: out_unlock: slab_unlock(page); +#ifdef SLUB_FASTPATH + local_irq_restore(flags); +#endif return; slab_empty: @@ -1693,6 +1823,9 @@ slab_empty: slab_unlock(page); stat(c, FREE_SLAB); discard_slab(s, page); +#ifdef SLUB_FASTPATH + local_irq_restore(flags); +#endif return; debug: @@ -1717,6 +1850,48 @@ static __always_inline void slab_free(st { void **object = (void *)x; struct kmem_cache_cpu *c; + +#ifdef SLUB_FASTPATH + void *old, *next_free; + int version; + + preempt_disable(); + c = get_cpu_slab(s, raw_smp_processor_id()); + debug_check_no_locks_freed(object, c->objsize); + do { + old = c->freelist; + /* + * If the compiler would reorder the retrieval of c->page and + * c->base to come before c->freelist then an interrupt + * could change the cpu slab before we retrieve c->version. + * We could be matching on a page no longer active and put the + * object onto the freelist of the wrong slab. + * + * On the other hand: If we already have the version + * then any change of cpu_slab will cause the cmpxchg to fail + * since the freelist pointers are unique per slab. + */ + barrier(); + next_free = unversion(old); + version = get_version(old); + if (unlikely(page != c->page || c->node < 0 || + version == LAST_VERSION)) { + preempt_enable(); + __slab_free(s, page, x, addr, c->offset); + return; + } + /* + * It's ok to overwrite the content of object[c->offset] because + * we own the object. This object won't appear in the freelist + * until our cmpxchg_local succeeds. Therefore, no other part of + * the slub slow path can use this object. + */ + object[c->offset] = next_free; + stat(c, FREE_FASTPATH); + } while (cmpxchg_local(&c->freelist, old, + set_next_version(object, version)) != old); + preempt_enable(); +#else unsigned long flags; local_irq_save(flags); @@ -1730,6 +1905,7 @@ static __always_inline void slab_free(st __slab_free(s, page, x, addr, c->offset); local_irq_restore(flags); +#endif } void kmem_cache_free(struct kmem_cache *s, void *x)