---
 include/asm-x86/percpu_32.h |    4 ++++
 mm/slub.c                   |   24 +++++++++++-------------
 2 files changed, 15 insertions(+), 13 deletions(-)

Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c	2007-10-31 14:33:47.897626367 -0700
+++ linux-2.6/mm/slub.c	2007-10-31 15:05:58.259370890 -0700
@@ -1503,10 +1503,10 @@ static void flush_all(struct kmem_cache 
  * Check if the objects in a per cpu structure fit numa
  * locality expectations.
  */
-static inline int node_match(struct kmem_cache_cpu *c, int node)
+static inline int node_match(int n, int node)
 {
 #ifdef CONFIG_NUMA
-	if (node != -1 && c->node != node)
+	if (node != -1 && n != node)
 		return 0;
 #endif
 	return 1;
@@ -1539,7 +1539,7 @@ static noinline unsigned long get_new_sl
 		 * requested node even if __GFP_THISNODE was
 		 * specified. So we need to recheck.
 		 */
-		if (node_match(c, node)) {
+		if (node_match(c->node, node)) {
 			/*
 			 * Current cpuslab is acceptable and we
 			 * want the current one since its cache hot
@@ -1581,11 +1581,12 @@ static void *__slab_alloc(struct kmem_ca
 
 	local_irq_save(flags);
 	preempt_enable_no_resched();
+	c = get_cpu_slab(s, smp_processor_id());
 #endif
 	if (likely(c->page)) {
 		state = slab_lock(c->page);
 
-		if (unlikely(node_match(c, node) &&
+		if (unlikely(node_match(c->node, node) &&
 			c->page->freelist != c->page->end))
 				goto load_freelist;
 
@@ -1659,27 +1660,24 @@ static void __always_inline *slab_alloc(
 	struct kmem_cache_cpu *c;
 
 #ifdef CONFIG_FAST_CMPXCHG_LOCAL
-	preempt_disable();
-	c = this_cpu_slab(s);
+	c = s->cpu_slab;
 	do {
-		object = c->freelist;
-		if (unlikely(is_end(object) || !node_match(c, node))) {
-			object = __slab_alloc(s, gfpflags, node, addr, c);
+		object = percpu_read(c->freelist);
+		if (unlikely(is_end(object) || !node_match(percpu_read(c->node), node))) {
+			object = __slab_alloc(s, gfpflags, node, addr, this_cpu_slab(s));
 			if (unlikely(!object)) {
-				preempt_enable();
 				goto out;
 			}
 			break;
 		}
-	} while (cmpxchg_local(&c->freelist, object,
+	} while (percpu_cmpxchg(&c->freelist, object,
 			get_freepointer(s, object)) != object);
-	preempt_enable();
 #else
 	unsigned long flags;
 
 	local_irq_save(flags);
 	c = this_cpu_slab(s);
-	if (unlikely((is_end(c->freelist)) || !node_match(c, node))) {
+	if (unlikely((is_end(c->freelist)) || !node_match(c->node, node))) {
 
 		object = __slab_alloc(s, gfpflags, node, addr, c);
 		if (unlikely(!object)) {
Index: linux-2.6/include/asm-x86/percpu_32.h
===================================================================
--- linux-2.6.orig/include/asm-x86/percpu_32.h	2007-10-31 15:04:25.003371191 -0700
+++ linux-2.6/include/asm-x86/percpu_32.h	2007-10-31 15:05:11.052213024 -0700
@@ -151,6 +151,10 @@ extern void __bad_percpu_size(void);
 #define x86_add_percpu(var,val) percpu_to_op("add", per_cpu__##var, val)
 #define x86_sub_percpu(var,val) percpu_to_op("sub", per_cpu__##var, val)
 #define x86_or_percpu(var,val) percpu_to_op("or", per_cpu__##var, val)
+
+#define percpu_read(ptr) percpu_from_op("mov", ptr)
+#define percpu_cmpxchg(ptr, o, n ) percpu_from_op("cmpxchg", ptr, o, n)
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ARCH_I386_PERCPU__ */