Index: linux-2.6.21-rc4/include/linux/slub_def.h =================================================================== --- linux-2.6.21-rc4.orig/include/linux/slub_def.h 2007-03-21 21:06:47.000000000 -0700 +++ linux-2.6.21-rc4/include/linux/slub_def.h 2007-03-21 21:16:58.000000000 -0700 @@ -27,15 +27,15 @@ unsigned long flags; int size; /* Total size of an object */ int objects; /* Number of objects in slab */ - int align; /* Alignment */ struct kmem_cache_node local_node; int refcount; /* Refcount for destroy */ void (*ctor)(void *, struct kmem_cache *, unsigned long); void (*dtor)(void *, struct kmem_cache *, unsigned long); + int align; /* Alignment */ int objsize; /* The size of an object that is in a chunk */ - int inuse; /* Used portion of the chunk */ - const char *name; /* Name (only for display!) */ + int inuse; /* Used portion up to first metadata */ + const char *name; struct list_head list; /* List of slabs */ struct kobject kobj; /* For sysfs */ #ifdef CONFIG_SMP @@ -47,6 +47,7 @@ struct delayed_work flush; #endif #ifdef CONFIG_NUMA + int defrag_ratio; struct kmem_cache_node *node[MAX_NUMNODES]; #endif struct page *cpu_slab[NR_CPUS]; Index: linux-2.6.21-rc4/mm/slub.c =================================================================== --- linux-2.6.21-rc4.orig/mm/slub.c 2007-03-21 21:16:38.000000000 -0700 +++ linux-2.6.21-rc4/mm/slub.c 2007-03-21 21:19:10.000000000 -0700 @@ -913,11 +913,29 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) { #ifdef CONFIG_NUMA - struct zonelist *zonelist = &NODE_DATA(slab_node(current->mempolicy)) - ->node_zonelists[gfp_zone(flags)]; + struct zonelist *zonelist; struct zone **z; struct page *page; + /* + * The defrag ratio allows to configure the tradeoffs between + * inter node defragmentation and node local allocations. + * A lower defrag_ratio increases the tendency to do local + * allocations instead of scanning throught the partial + * lists on other nodes. + * + * If defrag_ratio is set to 0 then kmalloc() always + * returns node local objects. If its higher then kmalloc() + * may return off node objects in order to avoid fragmentation. + * + * A higher ratio means slabs may be taken from other nodes + * thus reducing the number of partial slabs on those nodes. + */ + if (!s->defrag_ratio || get_cycles() % 1024 > s->defrag_ratio) + return NULL; + + zonelist = &NODE_DATA(slab_node(current->mempolicy)) + ->node_zonelists[gfp_zone(flags)]; for (z = zonelist->zones; *z; z++) { struct kmem_cache_node *n; @@ -1536,6 +1554,9 @@ goto error; s->refcount = 1; +#ifdef CONFIG_NUMA + s->defrag_ratio = 100; +#endif #ifdef CONFIG_SMP mutex_init(&s->flushing); @@ -2521,6 +2541,23 @@ } SLAB_ATTR(store_user); +#ifdef CONFIG_NUMA +static ssize_t defrag_ratio_show(struct kmem_cache *s, char *buf) +{ + return sprintf(buf, "%d\n", s->defrag_ratio / 10); +} + +static ssize_t defrag_ratio_store(struct kmem_cache *s, const char *buf, size_t length) +{ + int n = simple_strtoul(buf, NULL, 10); + + if (n < 100) + s->defrag_ratio = n * 10; + return length; +} +SLAB_ATTR(defrag_ratio); +#endif + static struct attribute * slab_attrs[] = { &slab_size_attr.attr, &object_size_attr.attr, @@ -2545,6 +2582,9 @@ #ifdef CONFIG_ZONE_DMA &cache_dma_attr.attr, #endif +#ifdef CONFIG_NUMA + &defrag_ratio_attr.attr, +#endif NULL };