--- include/linux/slub_def.h | 1 mm/slub.c | 101 ++++++++++++++++++++++++++--------------------- mm/vmscan.c | 2 3 files changed, 60 insertions(+), 44 deletions(-) Index: linux-2.6/mm/vmscan.c =================================================================== --- linux-2.6.orig/mm/vmscan.c 2008-04-04 00:01:22.587945090 -0700 +++ linux-2.6/mm/vmscan.c 2008-04-04 00:01:34.241696254 -0700 @@ -234,7 +234,7 @@ unsigned long shrink_slab(unsigned long shrinker->nr += total_scan; } up_read(&shrinker_rwsem); - if (gfp_mask & __GFP_FS) + if (ret && (gfp_mask & __GFP_FS)) kmem_cache_defrag(zone ? zone_to_nid(zone) : -1); return ret; } Index: linux-2.6/mm/slub.c =================================================================== --- linux-2.6.orig/mm/slub.c 2008-04-04 00:01:22.597946466 -0700 +++ linux-2.6/mm/slub.c 2008-04-04 01:08:41.168660159 -0700 @@ -1813,7 +1813,7 @@ static int slub_nomerge; * system components. Generally order 0 allocations should be preferred since * order 0 does not cause fragmentation in the page allocator. Larger objects * be problematic to put into order 0 slabs because there may be too much - * unused space left. We go to a higher order if more than 1/8th of the slab + * unused space left. We go to a higher order if more than 1/16th of the slab * would be wasted. * * In order to reach satisfactory performance we must ensure that a minimum @@ -1875,8 +1875,10 @@ static inline int calculate_order(int si * we reduce the minimum objects required in a slab. */ min_objects = slub_min_objects; + if (!min_objects) min_objects = 4 * fls(nr_cpu_ids); + while (min_objects > 1) { fraction = 16; while (fraction >= 4) { @@ -2331,7 +2333,7 @@ static int kmem_cache_open(struct kmem_c goto error; s->refcount = 1; - s->defrag_ratio = 20; + s->defrag_ratio = 30; #ifdef CONFIG_NUMA s->remote_node_defrag_ratio = 100; #endif @@ -2463,10 +2465,6 @@ EXPORT_SYMBOL(kmem_cache_destroy); struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned; EXPORT_SYMBOL(kmalloc_caches); -#ifdef CONFIG_ZONE_DMA -static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1]; -#endif - static int __init setup_slub_min_order(char *str) { get_option(&str, &slub_min_order); @@ -2526,6 +2524,7 @@ panic: } #ifdef CONFIG_ZONE_DMA +static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1]; static void sysfs_add_func(struct work_struct *w) { @@ -2770,37 +2769,6 @@ void kmem_cache_setup_defrag(struct kmem } EXPORT_SYMBOL(kmem_cache_setup_defrag); -#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SLABINFO) -static unsigned long count_partial(struct kmem_cache_node *n, - int (*get_count)(struct page *)) -{ - unsigned long flags; - unsigned long x = 0; - struct page *page; - - spin_lock_irqsave(&n->list_lock, flags); - list_for_each_entry(page, &n->partial, lru) - x += get_count(page); - spin_unlock_irqrestore(&n->list_lock, flags); - return x; -} - -static int count_inuse(struct page *page) -{ - return page->inuse; -} - -static int count_total(struct page *page) -{ - return page->objects; -} - -static int count_free(struct page *page) -{ - return page->objects - page->inuse; -} -#endif - /* * Vacate all objects in the given slab. * @@ -2972,13 +2940,11 @@ static unsigned long __kmem_cache_shrink return freed; } -/* - * Defrag slabs conditional on the amount of fragmentation on each node. - */ int kmem_cache_defrag(int node) { struct kmem_cache *s; unsigned long slabs = 0; + unsigned long reclaimed; /* * kmem_cache_defrag may be called from the reclaim path which may be @@ -2990,14 +2956,32 @@ int kmem_cache_defrag(int node) return 0; list_for_each_entry(s, &slab_caches, list) { + + if (time_before(jiffies, s->next_defrag)) + continue; + + /* + * Defragmentable caches come first. If the slab cache is not + * defragmentable then we can stop traversing the list. + */ + if (!s->kick) + break; + if (node == -1) { int nid; for_each_node_state(nid, N_NORMAL_MEMORY) - slabs += __kmem_cache_shrink(s, nid, + reclaimed = __kmem_cache_shrink(s, nid, MAX_PARTIAL); } else - slabs += __kmem_cache_shrink(s, node, MAX_PARTIAL); + reclaimed = __kmem_cache_shrink(s, node, MAX_PARTIAL); + + if (reclaimed) + s->next_defrag = jiffies + HZ / 10; + else + s->next_defrag = jiffies + HZ; + + slabs += reclaimed; } up_read(&slub_lock); return slabs; @@ -3087,7 +3071,7 @@ static int slab_mem_going_online_callbac return 0; /* - * We are bringing a node online. No memory is availabe yet. We must + * We are bringing a node online. No memory is available yet. We must * allocate a kmem_cache_node structure in order to bring the node * online. */ @@ -3424,6 +3408,37 @@ void *__kmalloc_node_track_caller(size_t return slab_alloc(s, gfpflags, node, caller); } +#if (defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)) || defined(CONFIG_SLABINFO) +static unsigned long count_partial(struct kmem_cache_node *n, + int (*get_count)(struct page *)) +{ + unsigned long flags; + unsigned long x = 0; + struct page *page; + + spin_lock_irqsave(&n->list_lock, flags); + list_for_each_entry(page, &n->partial, lru) + x += get_count(page); + spin_unlock_irqrestore(&n->list_lock, flags); + return x; +} + +static int count_inuse(struct page *page) +{ + return page->inuse; +} + +static int count_total(struct page *page) +{ + return page->objects; +} + +static int count_free(struct page *page) +{ + return page->objects - page->inuse; +} +#endif + #if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG) static int validate_slab(struct kmem_cache *s, struct page *page, unsigned long *map) Index: linux-2.6/include/linux/slub_def.h =================================================================== --- linux-2.6.orig/include/linux/slub_def.h 2008-04-04 00:20:38.897944829 -0700 +++ linux-2.6/include/linux/slub_def.h 2008-04-04 00:31:24.685547112 -0700 @@ -91,6 +91,7 @@ struct kmem_cache { struct kmem_cache_order_objects min; gfp_t allocflags; /* gfp flags to use on each alloc */ int refcount; /* Refcount for slab cache destroy */ + unsigned long next_defrag; void (*ctor)(struct kmem_cache *, void *); /* * Called with slab lock held and interrupts disabled.