SLUB: Add defrag ratio The defrag ratio determines the rate at which we defragment slabs from other nodes. The higher the percentrage the more agressive other nodes slabs are defragmented and the higher the chance that kmalloc() will return off node memory. Signed-off-by: Christoph Lameter Index: linux-2.6.21-rc4-mm1/include/linux/slub_def.h =================================================================== --- linux-2.6.21-rc4-mm1.orig/include/linux/slub_def.h 2007-03-22 15:02:07.000000000 -0700 +++ linux-2.6.21-rc4-mm1/include/linux/slub_def.h 2007-03-22 15:06:14.000000000 -0700 @@ -28,6 +28,7 @@ struct kmem_cache { int objsize; /* The size of an object without meta data */ int offset; /* Free pointer offset. */ atomic_t cpu_slabs; /* != 0 -> flusher scheduled. */ + int defrag_ratio; /* * Avoid an extra cache line for UP, SMP and for the node local to Index: linux-2.6.21-rc4-mm1/mm/slub.c =================================================================== --- linux-2.6.21-rc4-mm1.orig/mm/slub.c 2007-03-22 15:04:33.000000000 -0700 +++ linux-2.6.21-rc4-mm1/mm/slub.c 2007-03-22 15:04:45.000000000 -0700 @@ -913,11 +913,29 @@ out: static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) { #ifdef CONFIG_NUMA - struct zonelist *zonelist = &NODE_DATA(slab_node(current->mempolicy)) - ->node_zonelists[gfp_zone(flags)]; + struct zonelist *zonelist; struct zone **z; struct page *page; + /* + * The defrag ratio allows to configure the tradeoffs between + * inter node defragmentation and node local allocations. + * A lower defrag_ratio increases the tendency to do local + * allocations instead of scanning throught the partial + * lists on other nodes. + * + * If defrag_ratio is set to 0 then kmalloc() always + * returns node local objects. If its higher then kmalloc() + * may return off node objects in order to avoid fragmentation. + * + * A higher ratio means slabs may be taken from other nodes + * thus reducing the number of partial slabs on those nodes. + */ + if (!s->defrag_ratio || get_cycles() % 1024 > s->defrag_ratio) + return NULL; + + zonelist = &NODE_DATA(slab_node(current->mempolicy)) + ->node_zonelists[gfp_zone(flags)]; for (z = zonelist->zones; *z; z++) { struct kmem_cache_node *n; @@ -1536,6 +1554,9 @@ static int kmem_cache_open(struct kmem_c goto error; s->refcount = 1; +#ifdef CONFIG_NUMA + s->defrag_ratio = 100; +#endif #ifdef CONFIG_SMP mutex_init(&s->flushing); @@ -2538,6 +2559,23 @@ static ssize_t store_user_store(struct k } SLAB_ATTR(store_user); +#ifdef CONFIG_NUMA +static ssize_t defrag_ratio_show(struct kmem_cache *s, char *buf) +{ + return sprintf(buf, "%d\n", s->defrag_ratio / 10); +} + +static ssize_t defrag_ratio_store(struct kmem_cache *s, const char *buf, size_t length) +{ + int n = simple_strtoul(buf, NULL, 10); + + if (n < 100) + s->defrag_ratio = n * 10; + return length; +} +SLAB_ATTR(defrag_ratio); +#endif + static struct attribute * slab_attrs[] = { &slab_size_attr.attr, &object_size_attr.attr, @@ -2562,6 +2600,9 @@ static struct attribute * slab_attrs[] = #ifdef CONFIG_ZONE_DMA &cache_dma_attr.attr, #endif +#ifdef CONFIG_NUMA + &defrag_ratio_attr.attr, +#endif NULL };