Index: linux-2.6.18-rc1/mm/slab.c =================================================================== --- linux-2.6.18-rc1.orig/mm/slab.c 2006-07-05 21:09:49.000000000 -0700 +++ linux-2.6.18-rc1/mm/slab.c 2006-07-17 05:08:10.130237888 -0700 @@ -258,6 +258,21 @@ struct slab_rcu { * The limit is stored in the per-cpu structure to reduce the data cache * footprint. * + * The array cache may be used for three different purposes: + * + * A. Per cpu cache + * ---------------- + * Cache is only accessed with interrupts disabled. The lock is not used. + * + * B. Shared cache + * --------------- + * Cache is only accessed when the list_lock of the corresponding kmem_list3 + * structure is held. The lock is not used. + * + * C. Alien cache (NUMA) + * --------------------- + * The lock field is used for locking. No other locking is necessary. + * The batchcount is not used. */ struct array_cache { unsigned int avail; @@ -285,6 +300,10 @@ struct arraycache_init { /* * The slab lists for all objects. + * + * Multiple list_locks may be taken if we have an OFF_SLAB cache. + * In that case the list_lock of the OFF_SLAB cache must be taken + * before the cache used for slab management. */ struct kmem_list3 { struct list_head slabs_partial; /* partial list first, better asm code */ @@ -2181,8 +2200,15 @@ kmem_cache_create (const char *name, siz cachep->gfpflags |= GFP_DMA; cachep->buffer_size = size; - if (flags & CFLGS_OFF_SLAB) + if (flags & CFLGS_OFF_SLAB) { cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u); + /* + * The management slab must not be OFF_SLAB otherwise we + * will have to hold a cascade of list_locks when we have to + * free slabs. + */ + BUG_ON(cachep->slabp_cache->flags & CFLGS_OFF_SLAB); + } cachep->ctor = ctor; cachep->dtor = dtor; cachep->name = name; @@ -3114,11 +3140,27 @@ static void free_block(struct kmem_cache list_add(&slabp->list, &l3->slabs_free); } } else { - /* Unconditionally move a slab to the end of the - * partial list on free - maximum time for the - * other objects to be freed, too. + /* + * Move slab to the end of the list if the number + * of allocated objects is low to increase the + * likelyhood of freeing the rest. + * + * If the number of allocated objects is high then + * we want this at the start of the list in order + * to fill the slab up again. + * + * This reduces fragmentation. + * + * We are not touching the links if the slab is + * in between because we know the slab is already on + * the partial list. No need to unnecessarily dirty + * cachelines. */ - list_add_tail(&slabp->list, &l3->slabs_partial); + if (cachep->num < 8 || slabp->inuse <= cachep->num / 4) + list_add_tail(&slabp->list, &l3->slabs_partial); + else + if (slabp->inuse > cachep->num / 2 + cachep->num / 4) + list_add(&slabp->list, &l3->slabs_partial); } } }