Index: linux-2.6/include/linux/slab_def.h =================================================================== --- linux-2.6.orig/include/linux/slab_def.h 2009-10-21 13:24:09.000000000 -0500 +++ linux-2.6/include/linux/slab_def.h 2009-10-21 13:24:53.000000000 -0500 @@ -24,7 +24,7 @@ struct kmem_cache { /* 1) per-cpu data, touched during every alloc/free */ - struct array_cache *array[NR_CPUS]; + struct array_cache *array; /* 2) Cache tunables. Protected by cache_chain_mutex */ unsigned int batchcount; unsigned int limit; Index: linux-2.6/mm/slab.c =================================================================== --- linux-2.6.orig/mm/slab.c 2009-10-21 13:24:56.000000000 -0500 +++ linux-2.6/mm/slab.c 2009-10-21 13:51:44.000000000 -0500 @@ -689,7 +689,7 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) { - return cachep->array[smp_processor_id()]; + return this_cpu_ptr(cachep->array); } static inline struct kmem_cache *__find_general_cachep(size_t size, @@ -1106,8 +1106,8 @@ struct array_cache **alien; /* cpu is dead; no one can alloc from it. */ - nc = cachep->array[cpu]; - cachep->array[cpu] = NULL; + nc = get_cpu_ptr(cachep->array, cpu); + *nc = NULL; l3 = cachep->nodelists[node]; if (!l3) @@ -1143,7 +1143,6 @@ free_alien_cache(alien); } free_array_cache: - kfree(nc); } /* * In the previous loop, all the objects were freed to @@ -1523,8 +1522,7 @@ */ spin_lock_init(&ptr->lock); - malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = - ptr; + this_cpu_write(malloc_sizes[INDEX_AC].cs_cachep->array, ptr); } /* 5) Replace the bootstrap kmem_list3's */ { @@ -1914,8 +1912,7 @@ int i; struct kmem_list3 *l3; - for_each_online_cpu(i) - kfree(cachep->array[i]); + per_cpu_free(cachep->array); /* NUMA: free the list3 structures */ for_each_online_node(i) { @@ -2011,7 +2008,7 @@ * that's used by kmalloc(24), otherwise the creation of * further caches will BUG(). */ - cachep->array[smp_processor_id()] = &initarray_generic.cache; + this_cpu_write(cachep->array) = &initarray_generic.cache; /* * If the cache that's used by kmalloc(sizeof(kmem_list3)) is @@ -2024,8 +2021,7 @@ else g_cpucache_up = PARTIAL_AC; } else { - cachep->array[smp_processor_id()] = - kmalloc(sizeof(struct arraycache_init), gfp); + this_cpu_write(cachep->array, kmalloc(sizeof(struct arraycache_init), gfp)); if (g_cpucache_up == PARTIAL_AC) { set_up_list3s(cachep, SIZE_L3); @@ -2931,9 +2927,9 @@ retry: check_irq_off(); node = numa_node_id(); - ac = cpu_cache_get(cachep); - batchcount = ac->batchcount; - if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { + ac = cachep->array; + batchcount = __this_cpu_read(ac->batchcount); + if (!__this_cpu_read(ac->touched) && batchcount > BATCHREFILL_LIMIT) { /* * If there was little recent activity on this cache, then * perform only a partial refill. Otherwise we could generate @@ -2943,11 +2939,11 @@ } l3 = cachep->nodelists[node]; - BUG_ON(ac->avail > 0 || !l3); + BUG_ON(__this_cpu_read(ac->avail) > 0 || !l3); spin_lock(&l3->list_lock); /* See if we can refill from the shared array */ - if (l3->shared && transfer_objects(ac, l3->shared, batchcount)) + if (l3->shared && transfer_objects(__this_cpu_ptr(ac), l3->shared, batchcount)) goto alloc_done; while (batchcount > 0) { @@ -2978,8 +2974,9 @@ STATS_INC_ACTIVE(cachep); STATS_SET_HIGH(cachep); - ac->entry[ac->avail++] = slab_get_obj(cachep, slabp, + __this_cpu_write(ac->entry)[__this_cpu_read(ac->avail)] = slab_get_obj(cachep, slabp, node); + __this_cpu_inc(ac->avail); } check_slabp(cachep, slabp); @@ -2992,24 +2989,25 @@ } must_grow: - l3->free_objects -= ac->avail; + l3->free_objects -= __this_cpu_read(ac->avail); alloc_done: spin_unlock(&l3->list_lock); - if (unlikely(!ac->avail)) { + if (unlikely(!__this_cpu_read(ac->avail))) { int x; x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL); /* cache_grow can reenable interrupts, then ac could change. */ - ac = cpu_cache_get(cachep); - if (!x && ac->avail == 0) /* no objects in sight? abort */ + ac = cachep->array; + if (!x && __this_cpu_read(ac->avail) == 0) /* no objects in sight? abort */ return NULL; - if (!ac->avail) /* objects refilled by interrupt? */ + if (!__this_cpu_read(ac->avail)) /* objects refilled by interrupt? */ goto retry; } - ac->touched = 1; - return ac->entry[--ac->avail]; + __this_cpu_write(ac->touched, 1); + __this_cpu_dec(ac->available); + return __this_cpu_read(ac->entry)[__this_cpu_read(ac->available)]; } static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, @@ -3095,11 +3093,12 @@ check_irq_off(); - ac = cpu_cache_get(cachep); - if (likely(ac->avail)) { + ac = cachep->array; + if (likely(__this_cpu_read(ac->avail))) { STATS_INC_ALLOCHIT(cachep); - ac->touched = 1; - objp = ac->entry[--ac->avail]; + __this_cpu_write(ac->touched, 1); + __this_cpu_dec(ac->avail); + objp = __this_cpu_read(ac->entry)[__this_cpu_read(ac->avail)]; } else { STATS_INC_ALLOCMISS(cachep); objp = cache_alloc_refill(cachep, flags); @@ -3109,7 +3108,7 @@ * per-CPU caches is leaked, we need to make sure kmemleak doesn't * treat the array pointers as a reference to the object. */ - kmemleak_erase(&ac->entry[ac->avail]); + kmemleak_erase(this_cpu_ptr(ac->entry)[__this_cpu_read(ac->avail)]); return objp; } @@ -3504,13 +3503,13 @@ memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail); } -/* +/*r * Release an obj back to its cache. If the obj has a constructed state, it must * be in this state _before_ it is released. Called with disabled ints. */ static inline void __cache_free(struct kmem_cache *cachep, void *objp) { - struct array_cache *ac = cpu_cache_get(cachep); + struct array_cache *ac = cachep->array; check_irq_off(); kmemleak_free_recursive(objp, cachep->flags); @@ -3528,14 +3527,16 @@ if (nr_online_nodes > 1 && cache_free_alien(cachep, objp)) return; - if (likely(ac->avail < ac->limit)) { + if (likely(__this_cpu_read(ac->avail) < __this_cpu_read(ac->limit))) { STATS_INC_FREEHIT(cachep); - ac->entry[ac->avail++] = objp; + __this_cpu_write(ac->entry)[__this_cpu_read(ac->avail)] = objp; + __this_cpu_inc(ac->avail); return; } else { STATS_INC_FREEMISS(cachep); - cache_flusharray(cachep, ac); - ac->entry[ac->avail++] = objp; + cache_flusharray(cachep, __this_cpu_ptr(ac)); + __this_cpu_write(ac->entry)[__this_cpu_read(ac->avail)] = objp; + __this_cpu_inc(ac->avail); } }