Index: linux-2.6/include/linux/mmzone.h =================================================================== --- linux-2.6.orig/include/linux/mmzone.h 2008-04-10 11:06:52.000000000 -0700 +++ linux-2.6/include/linux/mmzone.h 2008-04-10 11:08:04.000000000 -0700 @@ -263,6 +263,7 @@ unsigned long nr_scan_active; unsigned long nr_scan_inactive; unsigned long pages_scanned; /* since last reclaim */ + unsigned long slab_objects_freed; /* Since last slab defrag */ unsigned long flags; /* zone flags, see below */ /* Zone statistics */ Index: linux-2.6/include/linux/slub_def.h =================================================================== --- linux-2.6.orig/include/linux/slub_def.h 2008-04-10 11:06:52.000000000 -0700 +++ linux-2.6/include/linux/slub_def.h 2008-04-10 11:08:04.000000000 -0700 @@ -91,7 +91,6 @@ struct kmem_cache_order_objects min; gfp_t allocflags; /* gfp flags to use on each alloc */ int refcount; /* Refcount for slab cache destroy */ - unsigned long next_defrag; void (*ctor)(struct kmem_cache *, void *); /* * Called with slab lock held and interrupts disabled. Index: linux-2.6/mm/slub.c =================================================================== --- linux-2.6.orig/mm/slub.c 2008-04-10 11:06:52.000000000 -0700 +++ linux-2.6/mm/slub.c 2008-04-10 11:08:04.000000000 -0700 @@ -2985,9 +2985,6 @@ list_for_each_entry(s, &slab_caches, list) { - if (time_before(jiffies, s->next_defrag)) - continue; - /* * Defragmentable caches come first. If the slab cache is not * defragmentable then we can stop traversing the list. @@ -3004,11 +3001,6 @@ } else reclaimed = __kmem_cache_shrink(s, node, MAX_PARTIAL); - if (reclaimed) - s->next_defrag = jiffies + HZ / 10; - else - s->next_defrag = jiffies + HZ; - slabs += reclaimed; } up_read(&slub_lock); Index: linux-2.6/mm/vmscan.c =================================================================== --- linux-2.6.orig/mm/vmscan.c 2008-04-10 11:06:52.000000000 -0700 +++ linux-2.6/mm/vmscan.c 2008-04-10 11:24:02.000000000 -0700 @@ -234,8 +234,34 @@ shrinker->nr += total_scan; } up_read(&shrinker_rwsem); - if (ret && (gfp_mask & __GFP_FS)) - kmem_cache_defrag(zone ? zone_to_nid(zone) : -1); + + /* + * "ret" doesnt really contain the freed object count. The shrinkers + * fake it. Gotta go with what we are getting though. + * + * Handling of the freed object counter is also racy. If we get the + * wrong counts then we may unnecessarily do a defrag pass or defer + * one. "ret" is already faked. So this is just increasing + * the already existing fuzziness to get some notion as to when + * to initiate slab defrag which will hopefully be okay. + */ + if (zone) { + /* balance_pgdat running on a zone so we only scan one node */ + zone->slab_objects_freed += ret; + if (zone->slab_objects_freed > 100 && (gfp_mask & __GFP_FS)) { + zone->slab_objects_freed = 0; + kmem_cache_defrag(zone_to_nid(zone)); + } + } else { + static unsigned long global_objects_freed = 0; + + /* Direct (and thus global) reclaim. Scan all nodes */ + global_objects_freed += ret; + if (global_objects_freed > 100 && (gfp_mask & __GFP_FS)) { + global_objects_freed = 0; + kmem_cache_defrag(-1); + } + } return ret; }