Index: linux-2.6.21-rc4-mm1/include/linux/slub_def.h =================================================================== --- linux-2.6.21-rc4-mm1.orig/include/linux/slub_def.h 2007-03-21 12:04:00.000000000 -0700 +++ linux-2.6.21-rc4-mm1/include/linux/slub_def.h 2007-03-21 12:05:50.000000000 -0700 @@ -27,15 +27,15 @@ struct kmem_cache { unsigned long flags; int size; /* Total size of an object */ int objects; /* Number of objects in slab */ - int align; /* Alignment */ struct kmem_cache_node local_node; int refcount; /* Refcount for destroy */ void (*ctor)(void *, struct kmem_cache *, unsigned long); void (*dtor)(void *, struct kmem_cache *, unsigned long); + int align; /* Alignment */ int objsize; /* The size of an object that is in a chunk */ - int inuse; /* Used portion of the chunk */ - const char *name; /* Name (only for display!) */ + int inuse; /* Used portion up to first metadata */ + const char *name; struct list_head list; /* List of slabs */ struct kobject kobj; /* For sysfs */ #ifdef CONFIG_SMP @@ -47,6 +47,7 @@ struct kmem_cache { struct delayed_work flush; #endif #ifdef CONFIG_NUMA + int defrag_ratio; struct kmem_cache_node *node[MAX_NUMNODES]; #endif struct page *cpu_slab[NR_CPUS]; Index: linux-2.6.21-rc4-mm1/mm/slub.c =================================================================== --- linux-2.6.21-rc4-mm1.orig/mm/slub.c 2007-03-21 12:05:59.000000000 -0700 +++ linux-2.6.21-rc4-mm1/mm/slub.c 2007-03-21 12:36:26.000000000 -0700 @@ -63,7 +63,7 @@ #define SLUB_UNIMPLEMENTED (SLAB_DEBUG_INITIAL) #define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \ - SLAB_POISON) + SLAB_POISON | SLAB_STORE_USER) /* * Set of flags that will prevent slab merging */ @@ -385,7 +385,7 @@ static int slab_pad_check(struct kmem_ca u8 *p; int length, remainder; - if (!s->flags & SLAB_POISON) + if (!(s->flags & SLAB_POISON)) return 1; p = page_address(page); @@ -895,11 +895,29 @@ out: static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) { #ifdef CONFIG_NUMA - struct zonelist *zonelist = &NODE_DATA(slab_node(current->mempolicy)) - ->node_zonelists[gfp_zone(flags)]; + struct zonelist *zonelist; struct zone **z; struct page *page; + /* + * The defrag ratio allows to configure the tradeoffs between + * inter node defragmentation and node local allocations. + * A lower defrag_ratio increases the tendency to do local + * allocations instead of scanning throught the partial + * lists on other nodes. + * + * If defrag_ratio is set to 0 then kmalloc() always + * returns node local objects. If its higher then kmalloc() + * may return off node objects in order to avoid fragmentation. + * + * A higher ratio means slabs may be taken from other nodes + * thus reducing the number of partial slabs on those nodes. + */ + if (!s->defrag_ratio || get_cycles() % 1024 > s->defrag_ratio) + return NULL; + + zonelist = &NODE_DATA(slab_node(current->mempolicy)) + ->node_zonelists[gfp_zone(flags)]; for (z = zonelist->zones; *z; z++) { struct kmem_cache_node *n; @@ -1520,6 +1538,9 @@ static int kmem_cache_open(struct kmem_c goto error; s->refcount = 1; +#ifdef CONFIG_NUMA + s->defrag_ratio = 100; +#endif #ifdef CONFIG_SMP mutex_init(&s->flushing); @@ -2299,11 +2320,11 @@ struct slab_attribute { __ATTR(_name, 0644, _name##_show, _name##_store) -static ssize_t size_show(struct kmem_cache *s, char *buf) +static ssize_t total_size_show(struct kmem_cache *s, char *buf) { return sprintf(buf, "%d\n", s->size); } -SLAB_ATTR_RO(size); +SLAB_ATTR_RO(total_size); static ssize_t align_show(struct kmem_cache *s, char *buf) { @@ -2413,7 +2434,6 @@ static ssize_t _trace_show(struct kmem_c static ssize_t _trace_store(struct kmem_cache *s, const char *buf, size_t length) { s->flags &= ~SLAB_TRACE; - printk("_trace_store = %s\n", buf); if (buf[0] == '1') s->flags |= SLAB_TRACE; return length; @@ -2525,9 +2545,27 @@ static ssize_t _store_user_store(struct } SLAB_ATTR(_store_user); +#ifdef CONFIG_NUMA +static ssize_t _defrag_ratio_show(struct kmem_cache *s, char *buf) +{ + return sprintf(buf, "%d\n", s->defrag_ratio / 10); +} + +static ssize_t _defrag_ratio_store(struct kmem_cache *s, const char *buf, size_t length) +{ + int n = simple_strtoul(buf, NULL, 10); + + if (n < 100) + s->defrag_ratio = n * 10; + return length; +} +SLAB_ATTR(_defrag_ratio); +#endif + static struct attribute * slab_attrs[] = { - &size_attr.attr, + &total_size_attr.attr, &object_size_attr.attr, + &align_attr.attr, &objs_per_slab_attr.attr, &order_attr.attr, &objects_attr.attr, @@ -2549,6 +2587,9 @@ static struct attribute * slab_attrs[] = #ifdef CONFIG_CPUSET &_mem_spread_attr.attr, #endif +#ifdef CONFIG_NUMA + &_defrag_ratio_attr.attr, +#endif #ifdef CONFIG_ZONE_DMA &_cache_dma_attr.attr, #endif