Index: linux-2.6.19-mm1/include/linux/slub_def.h =================================================================== --- linux-2.6.19-mm1.orig/include/linux/slub_def.h 2006-12-12 15:34:24.000000000 -0800 +++ linux-2.6.19-mm1/include/linux/slub_def.h 2006-12-12 15:54:39.000000000 -0800 @@ -11,18 +11,29 @@ #include /* + * Per cpu structure to manage active slabs. + */ +struct active_slab { + struct page *page; + struct kmem_cache *slab; + int referenced; +#ifdef CONFIG_SMP + int flush_active; + struct delayed_work flush; +#endif +} ____cacheline_aligned_in_smp; +/* * Slab cache management. */ struct kmem_cache { spinlock_t list_lock; /* Protecty partial list and nr_partial */ struct list_head partial; unsigned long nr_partial; - int offset; /* Free pointer offset. */ - struct page *active[NR_CPUS]; atomic_long_t nr_slabs; /* Total slabs used */ + int offset; /* Free pointer offset. */ + int size; /* Total size of an object */ unsigned int order; /* Size of the slab page */ unsigned long flags; - int size; /* Total size of an object */ int objects; /* Number of objects in slab */ atomic_t refcount; /* Refcount for destroy */ int align; @@ -33,10 +44,10 @@ int inuse; /* Used portion of the chunk */ const char *name; /* Name (only for display!) */ struct list_head list; /* List of slabs */ -#ifdef CONFIG_SMP - struct mutex flushing; - atomic_t active_cpus; /* if >0 then flusher is scheduled */ - struct delayed_work flush; +#ifdef CONFIG_NUMA + struct active_slab *active[NR_CPUS]; +#else + struct active_slab active[NR_CPUS] ____cacheline_aligned_in_smp; #endif }; Index: linux-2.6.19-mm1/mm/slub.c =================================================================== --- linux-2.6.19-mm1.orig/mm/slub.c 2006-12-12 15:34:28.000000000 -0800 +++ linux-2.6.19-mm1/mm/slub.c 2006-12-12 15:50:37.000000000 -0800 @@ -60,6 +60,12 @@ static void register_slab(struct kmem_cache *s); static void unregister_slab(struct kmem_cache *s); +#ifdef CONFIG_NUMA +#define ACTIVE_SLAB(__s,__cpu) ((__s)->active[__cpu]) +#else +#define ACTIVE_SLAB(__s,__cpu) (&(__s)->active[__cpu]) +#endif + /******************************************************************** * Core slab cache functions *******************************************************************/ @@ -450,14 +456,15 @@ /* * Remove the currently active slab */ -static void __always_inline deactivate_slab(struct kmem_cache *s, - struct page *page, int cpu) +static void __always_inline deactivate_slab(struct active_slab *a) { - s->active[cpu] = NULL; + struct page *page = a->page; + + a->page = NULL; + a->referenced = 0; __ClearPageActive(page); - __ClearPageReferenced(page); - putback_slab(s, page); + putback_slab(a->slab, page); } /* @@ -467,13 +474,12 @@ static void flush_active(void *d) { struct kmem_cache *s = d; - int cpu = smp_processor_id(); - struct page *page = s->active[cpu]; + struct active_slab *a = ACTIVE_SLAB(s, smp_processor_id()); - page = s->active[cpu]; - if (likely(page)) { - slab_lock(page); - deactivate_slab(s, page, cpu); + if (likely(a->page)) { + slab_lock(a->page); + deactivate_slab(a); + a->flush_active = 0; } } @@ -481,50 +487,31 @@ /* * Called from IPI during flushing to check and flush active slabs. */ -void check_flush_active(void *d) +void check_flush_active(struct work_struct *w) { - struct kmem_cache *s = d; - int cpu = smp_processor_id(); - struct page *page = s->active[cpu]; + struct active_slab *a = container_of(w, struct active_slab, flush.work); - if (!page) + if (!a->page) return; - if (PageReferenced(page)) { - ClearPageReferenced(page); - atomic_inc(&s->active_cpus); + if (a->referenced) { + a->referenced = 0; + a->flush_active = 1; + schedule_delayed_work(&a->flush, 2 * HZ); } else { - slab_lock(page); - deactivate_slab(s, page, cpu); + slab_lock(a->page); + deactivate_slab(a); + a->flush_active = 0; } } /* * Called from eventd */ -static void flusher(struct work_struct *w) -{ - struct kmem_cache *s = container_of(w, struct kmem_cache, flush.work); - - if (!mutex_trylock(&s->flushing)) - return; - - atomic_set(&s->active_cpus, num_online_cpus()); - on_each_cpu(check_flush_active, s, 1, 1); - if (atomic_read(&s->active_cpus)) - schedule_delayed_work(&s->flush, 2 * HZ); - mutex_unlock(&s->flushing); -} - static void drain_all(struct kmem_cache *s) { - if (atomic_read(&s->active_cpus)) { - mutex_lock(&s->flushing); - cancel_delayed_work(&s->flush); - atomic_set(&s->active_cpus, 0); - on_each_cpu(flush_active, s, 1, 1); - mutex_unlock(&s->flushing); - } + on_each_cpu(flush_active, s , 1, 1); + } #else static void drain_all(struct kmem_cache *s) @@ -532,7 +519,7 @@ unsigned long flags; local_irq_save(flags); - flush_active(s); + flush_active(&s->active[0]); local_irq_restore(flags); } #endif @@ -540,36 +527,35 @@ static __always_inline void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node) { + struct active_slab *a; struct page *page; void **object; void *next_object; unsigned long flags; - int cpu; local_irq_save(flags); - cpu = smp_processor_id(); - page = s->active[cpu]; - if (!page) + a = ACTIVE_SLAB(s, smp_processor_id()); + if (!a->page) goto new_slab; - slab_lock(page); - check_free_chain(s, page); - if (unlikely(!page->freelist)) + slab_lock(a->page); + check_free_chain(s, a->page); + if (unlikely(!a->page->freelist)) goto another_slab; - if (unlikely(node != -1 && page_to_nid(page) != node)) + if (unlikely(node != -1 && page_to_nid(a->page) != node)) goto another_slab; redo: - page->inuse++; - object = page->freelist; - page->freelist = next_object = object[page->offset]; - __SetPageReferenced(page); - slab_unlock(page); + a->page->inuse++; + object = a->page->freelist; + a->page->freelist = next_object = object[a->page->offset]; + a->referenced = 1; + slab_unlock(a->page); local_irq_restore(flags); return object; another_slab: - deactivate_slab(s, page, cpu); + deactivate_slab(a); new_slab: page = get_partial(s, gfpflags, node); @@ -588,27 +574,26 @@ */ if (unlikely(s->objects == 1)) { local_irq_restore(flags); - return page_address(page); + return page_address(a->page); } - slab_lock(page); + slab_lock(a->page); gotpage: - if (s->active[cpu]) { + if (a->page) { slab_unlock(page); discard_slab(s, page); - page = s->active[cpu]; - slab_lock(page); + slab_lock(a->page); } else - s->active[cpu] = page; + a->page = page; - __SetPageActive(page); - check_free_chain(s, page); + __SetPageActive(a->page); + check_free_chain(s, a->page); #ifdef CONFIG_SMP - if (keventd_up() && !atomic_read(&s->active_cpus)) { - atomic_inc(&s->active_cpus); - schedule_delayed_work(&s->flush, 2 * HZ); + if (keventd_up() && !a->flush_active) { + a->flush_active = 1; + schedule_delayed_work(&a->flush, 2 * HZ); } #endif goto redo; @@ -639,7 +624,6 @@ return; page = virt_to_page(x); - if (unlikely(PageCompound(page))) page = page->first_page; @@ -822,14 +806,7 @@ atomic_long_set(&s->nr_slabs, 0); atomic_set(&s->refcount, 1); spin_lock_init(&s->list_lock); - for_each_possible_cpu(cpu) - s->active[cpu] = NULL; INIT_LIST_HEAD(&s->partial); -#ifdef CONFIG_SMP - mutex_init(&s->flushing); - atomic_set(&s->active_cpus, 0); - INIT_DELAYED_WORK(&s->flush, flusher); -#endif s->name = name; s->ctor = ctor; s->dtor = dtor; @@ -876,6 +853,23 @@ if (!s->objects) goto error; + for_each_online_cpu(cpu) { + struct active_slab *a; + +#ifdef CONFIG_NUMA + s->active[cpu] = a = kmalloc(sizeof(struct active_slab), GFP_KERNEL); +#else + a = ACTIVE_SLAB(s, cpu); +#endif + + a->page = NULL; + a->slab = s; +#ifdef CONFIG_SMP + a->flush_active = 0; + INIT_DELAYED_WORK(&a->flush, check_flush_active); +#endif + } + register_slab(s); return 1; @@ -1068,6 +1062,11 @@ return 1; unregister_slab(s); + +#ifdef CONFIG_NUMA + for_each_cpu(cpu) + kfree(ACTIVE_SLAB(s, cpu)); +#endif return 0; } EXPORT_SYMBOL(kmem_cache_close); @@ -1083,7 +1082,6 @@ } EXPORT_SYMBOL(kmem_cache_destroy); - static unsigned long count_objects(struct kmem_cache *s, struct list_head *list) { int count = 0; @@ -1108,11 +1106,11 @@ int cpu; for_each_possible_cpu(cpu) { - struct page *page = s->active[cpu]; + struct active_slab *a = ACTIVE_SLAB(s, cpu); - if (page) { + if (a->page) { nr_active++; - active += page->inuse; + active += a->page->inuse; } } @@ -1129,6 +1127,10 @@ (nr_slabs - s->nr_partial - nr_active) * s->objects; } +#ifdef CONFIG_NUMA +/* logic to bring up per cpu portions is missing here */ +#endif + /******************************************************************** * Kmalloc subsystem *******************************************************************/