---
 mm/slub.c |  173 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 173 insertions(+)

Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c	2008-03-14 03:57:12.000000000 -0700
+++ linux-2.6/mm/slub.c	2008-03-14 03:58:59.000000000 -0700
@@ -1573,7 +1573,14 @@ static void *__slab_alloc(struct kmem_ca
 {
 	void **object;
 	struct page *new;
+#ifdef SLUB_FASTPATH
+	unsigned long flags;
 
+	local_irq_save(flags);
+#ifdef CONFIG_PREEMPT
+	c = get_cpu_slab(s, raw_smp_processor_id());
+#endif
+#endif
 	if (!c->page)
 		goto new_slab;
 
@@ -1597,6 +1604,9 @@ load_freelist:
 unlock_out:
 	slab_unlock(c->page);
 	stat(c, ALLOC_SLOWPATH);
+#ifdef SLUB_FASTPATH
+	local_irq_restore(flags);
+#endif
 	return object;
 
 another_slab:
@@ -1628,6 +1638,9 @@ new_slab:
 		c->page = new;
 		goto load_freelist;
 	}
+#ifdef SLUB_FASTPATH
+	local_irq_restore(flags);
+#endif
 	return NULL;
 debug:
 	if (!alloc_debug_processing(s, c->page, object, addr))
@@ -1654,6 +1667,90 @@ static __always_inline void *slab_alloc(
 {
 	void **object;
 	struct kmem_cache_cpu *c;
+
+/*
+ * The SLUB_FASTPATH path is provisional and is currently disabled if the
+ * kernel is compiled with preemption or if the arch does not support
+ * fast cmpxchg operations. There are a couple of coming changes that will
+ * simplify matters and allow preemption. Ultimately we may end up making
+ * SLUB_FASTPATH the default.
+ *
+ * 1. The introduction of the per cpu allocator will avoid array lookups
+ *    through get_cpu_slab(). A special register can be used instead.
+ *
+ * 2. The introduction of per cpu atomic operations (cpu_ops) means that
+ *    we can realize the logic here entirely with per cpu atomics. The
+ *    per cpu atomic ops will take care of the preemption issues.
+ */
+
+#ifdef SLUB_FASTPATH
+ 	void *old, *new, *result, *next_object;
+ 	unsigned long base;
+
+ 	preempt_disable();
+ 	c = get_cpu_slab(s, raw_smp_processor_id());
+fastpath:	/* fastpath cmpxchg loop */
+ 	old = c->freelist;
+ 	/*
+ 	 * Whenever c->base is changed, the sequence number
+ 	 * _must_ be incremented. This barrier insures we read
+ 	 * version before c->base wrt interrupts.
+ 	 */
+ 	barrier();
+ 	base = c->base;
+ 	if (unlikely(is_end(old) || !node_match(c, node)))
+ 		goto slowpath;
+ 	if (unlikely(get_high_half((unsigned long)old) == HALF_LONG_MASK))
+ 		goto slowpath;
+ 	/*
+ 	 * make_ptr on base should always return a valid pointer;
+ 	 * insure base has not been changed by a nested interrupt by
+ 	 * re-reading the freelist sequence number. It makes sure the
+ 	 * base and the offset will generate a valid pointer.
+ 	 */
+ 	barrier();
+ 	if (c->freelist != old)
+ 		goto fastpath;	/* retry */
+ 	object = make_ptr(base, old);
+ 	/*
+ 	 * Need to increment the MSB counter here, because
+ 	 * object[c->offset] use is racy. We can race against
+ 	 * another slab_alloc fast path.
+ 	 * Note that the object[c->offset] read may return garbage, but
+ 	 * is insured to point to a valid address since pages are always
+ 	 * reused in the page allocator. We know if the
+ 	 * object[c->offset] read returned garbage because the sequence
+ 	 * number is incremented each time the freelist is modified.
+ 	 */
+ 	next_object = object[c->offset];
+ 	if (unlikely(!same_base(base, next_object)))
+ 		goto slowpath;
+ 	stat(c, ALLOC_FASTPATH);
+ 	new = make_version(old + HALF_LONG_MASK + 1, next_object);
+ 	result = cmpxchg_local(&c->freelist, old, new);
+#ifdef CONFIG_DEBUG_VM
+	/*
+	 * Just to be paranoid : warn if we detect that enough free or
+	 * slow paths nested on top of us to get the counter to go
+	 * half-way to overflow. That would be insane to do that much
+	 * allocations/free in interrupt handers, but check it anyway.
+	 */
+	WARN_ON(result - old > -1UL >> 1);
+#endif
+	if (result != old)
+		goto fastpath;	/* retry */
+	preempt_enable();
+	goto got_object;
+slowpath:
+	preempt_enable();
+	/*
+	 * __slab_alloc must make no assumption about the
+	 * tests previously done by slab_alloc : we could be
+	 * migrated to a different CPU.
+	 */
+	object = __slab_alloc(s, gfpflags, node, addr, c);
+got_object:
+#else
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -1668,6 +1765,7 @@ static __always_inline void *slab_alloc(
 		stat(c, ALLOC_FASTPATH);
 	}
 	local_irq_restore(flags);
+#endif
 
 	if (unlikely((gfpflags & __GFP_ZERO) && object))
 		memset(object, 0, c->objsize);
@@ -1704,6 +1802,11 @@ static void __slab_free(struct kmem_cach
 	void **object = (void *)x;
 	struct kmem_cache_cpu *c;
 
+#ifdef SLUB_FASTPATH
+	unsigned long flags;
+
+	local_irq_save(flags);
+#endif
 	c = get_cpu_slab(s, raw_smp_processor_id());
 	stat(c, FREE_SLOWPATH);
 	slab_lock(page);
@@ -1735,6 +1838,9 @@ checks_ok:
 
 out_unlock:
 	slab_unlock(page);
+#ifdef SLUB_FASTPATH
+	local_irq_restore(flags);
+#endif
 	return;
 
 slab_empty:
@@ -1748,6 +1854,9 @@ slab_empty:
 	slab_unlock(page);
 	stat(c, FREE_SLAB);
 	discard_slab(s, page);
+#ifdef SLUB_FASTPATH
+	local_irq_restore(flags);
+#endif
 	return;
 
 debug:
@@ -1772,6 +1881,69 @@ static __always_inline void slab_free(st
 {
 	void **object = (void *)x;
 	struct kmem_cache_cpu *c;
+
+#ifdef SLUB_FASTPATH
+	void *old, *new, *result;
+	unsigned long base;
+
+	preempt_disable();
+	c = get_cpu_slab(s, raw_smp_processor_id());
+	debug_check_no_locks_freed(object, c->objsize);
+	while (1) {
+		old = c->freelist;
+		/*
+		 * If the compiler would reorder the retrieval of c->page and
+		 * c->base to come before c->freelist then an interrupt
+		 * could change the cpu slab before we retrieve c->version.
+		 * We could be matching on a page no longer active and put the
+		 * object onto the freelist of the wrong slab.
+		 *
+		 * On the other hand: If we already have the version
+		 * then any change of cpu_slab will cause the cmpxchg to fail
+		 * since the freelist pointers are unique per slab.
+		 */
+		barrier();
+		base = c->base;
+		if (unlikely(get_high_half((unsigned long)old) == HALF_LONG_MASK
+				|| !same_base(base, object)
+				|| page != c->page || c->node < 0)) {
+			preempt_enable();
+			/*
+			 * __slab_free must make no assumption about the
+			 * tests previously done by slab_free : we could be
+			 * migrated to a different CPU.
+			 */
+			__slab_free(s, page, x, addr, c->offset);
+			break;
+		}
+		/*
+		 * It's ok to overwrite the content of object[c->offset] because
+		 * we own the object. This object won't appear in the freelist
+		 * until our cmpxchg_local succeeds. Therefore, no other part of
+		 * the slub slow path can use this object.
+		 * The result of make_ptr does not have to be dereferenced
+		 * until the cmpxchg succeeds. We don't care if base and old are
+		 * out-of-sync.
+		 */
+		object[c->offset] = make_ptr(base, old);
+		stat(c, FREE_FASTPATH);
+		new = make_version(old + HALF_LONG_MASK + 1, object);
+		result = cmpxchg_local(&c->freelist, old, new);
+#ifdef CONFIG_DEBUG_VM
+		/*
+		 * Just to be paranoid : warn if we detect that enough free or
+		 * slow paths nested on top of us to get the counter to go
+		 * half-way to overflow. That would be insane to do that much
+		 * allocations/free in interrupt handers, but check it anyway.
+		 */
+		WARN_ON(result - old > -1UL >> 1);
+#endif
+		if (result == old) {
+			preempt_enable();
+			break;
+		}
+	}
+#else
 	unsigned long flags;
 
 	local_irq_save(flags);
@@ -1785,6 +1957,7 @@ static __always_inline void slab_free(st
 		__slab_free(s, page, x, addr, c->offset);
 
 	local_irq_restore(flags);
+#endif
 }
 
 void kmem_cache_free(struct kmem_cache *s, void *x)