From: Christoph Lameter Subject: slub: Update statistics handling for variable order slabs Change the statistics to consider that slabs of the same slabcache can have different number of objects in them since they may be of different order. Provide a new sysfs field total_objects which shows the total objects that the allocated slabs of a slabcache could hold. Update the description of the objects field in the kmem_cache structure. Its role is now to be the limit of the maximum number of objects per slab if a slab is allocated with the largest possible order. Signed-off-by: Christoph Lameter --- include/linux/slub_def.h | 3 +- mm/slub.c | 65 +++++++++++++++++++++++++++++++++++++---------- 2 files changed, 54 insertions(+), 14 deletions(-) Index: linux-2.6/include/linux/slub_def.h =================================================================== --- linux-2.6.orig/include/linux/slub_def.h 2008-02-15 11:33:26.849254450 -0800 +++ linux-2.6/include/linux/slub_def.h 2008-02-15 12:05:32.006287443 -0800 @@ -46,6 +46,7 @@ struct kmem_cache_node { spinlock_t list_lock; /* Protect partial list and nr_partial */ unsigned long nr_partial; atomic_long_t nr_slabs; + atomic_long_t total_objects; struct list_head partial; #ifdef CONFIG_SLUB_DEBUG struct list_head full; @@ -70,7 +71,7 @@ struct kmem_cache { struct kmem_cache_node local_node; /* Allocation and freeing of slabs */ - int objects; /* Number of objects in slab */ + int objects; /* Number of objects in a slab of maximum size */ gfp_t allocflags; /* gfp flags to use on each alloc */ int refcount; /* Refcount for slab cache destroy */ void (*ctor)(struct kmem_cache *, void *); Index: linux-2.6/mm/slub.c =================================================================== --- linux-2.6.orig/mm/slub.c 2008-02-15 11:34:54.429691706 -0800 +++ linux-2.6/mm/slub.c 2008-02-15 12:20:31.623873534 -0800 @@ -1129,8 +1129,10 @@ static struct page *new_slab(struct kmem goto out; n = get_node(s, page_to_nid(page)); - if (n) + if (n) { atomic_long_inc(&n->nr_slabs); + atomic_long_add(slab_objects(s, page), &n->total_objects); + } page->slab = s; page->flags |= 1 << PG_slab; if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON | @@ -1207,6 +1209,7 @@ static void discard_slab(struct kmem_cac struct kmem_cache_node *n = get_node(s, page_to_nid(page)); atomic_long_dec(&n->nr_slabs); + atomic_long_sub(slab_objects(s, page), &n->total_objects); reset_page_mapcount(page); __ClearPageSlab(page); free_slab(s, page); @@ -2798,7 +2801,7 @@ void kfree(const void *x) } EXPORT_SYMBOL(kfree); -static unsigned long count_partial(struct kmem_cache_node *n) +static unsigned long count_partial_inuse(struct kmem_cache_node *n) { unsigned long flags; unsigned long x = 0; @@ -2812,6 +2815,23 @@ static unsigned long count_partial(struc } /* + * Count the total number of objects in the partial list + */ +static unsigned long count_partial_total(struct kmem_cache *s, + struct kmem_cache_node *n) +{ + unsigned long flags; + unsigned long x = 0; + struct page *page; + + spin_lock_irqsave(&n->list_lock, flags); + list_for_each_entry(page, &n->partial, lru) + x += slab_objects(s, page); + spin_unlock_irqrestore(&n->list_lock, flags); + return x; +} + +/* * kmem_cache_shrink removes empty slabs from the partial lists and sorts * the remaining slabs by the number of items in use. The slabs with the * most items in use come first. New allocations will then fill those up @@ -3678,13 +3698,15 @@ enum slab_stat_type { SL_FULL, SL_PARTIAL, SL_CPU, - SL_OBJECTS + SL_OBJECTS, + SL_TOTAL }; #define SO_FULL (1 << SL_FULL) #define SO_PARTIAL (1 << SL_PARTIAL) #define SO_CPU (1 << SL_CPU) #define SO_OBJECTS (1 << SL_OBJECTS) +#define SO_TOTAL (1 << SL_TOTAL) static unsigned long show_slab_objects(struct kmem_cache *s, char *buf, unsigned long flags) @@ -3712,7 +3734,9 @@ static unsigned long show_slab_objects(s continue; if (page) { if (flags & SO_CPU) { - if (flags & SO_OBJECTS) + if (flags & SO_TOTAL) + x = slab_objects(s, page); + else if (flags & SO_OBJECTS) x = page->inuse; else x = 1; @@ -3727,8 +3751,10 @@ static unsigned long show_slab_objects(s struct kmem_cache_node *n = get_node(s, node); if (flags & SO_PARTIAL) { - if (flags & SO_OBJECTS) - x = count_partial(n); + if (flags & SO_TOTAL) + x = count_partial_total(s, n); + else if (flags & SO_OBJECTS) + x = count_partial_inuse(n); else x = n->nr_partial; total += x; @@ -3739,9 +3765,12 @@ static unsigned long show_slab_objects(s int full_slabs = atomic_long_read(&n->nr_slabs) - per_cpu[node] - n->nr_partial; - - if (flags & SO_OBJECTS) - x = full_slabs * s->objects; + if (flags & SO_TOTAL) + x = atomic_long_read(&n->total_objects); + else if (flags & SO_OBJECTS) + x = atomic_long_read(&n->total_objects) - + (count_partial_total(s, n) - + count_partial_inuse(n)); else x = full_slabs; total += x; @@ -3871,6 +3900,12 @@ static ssize_t objects_show(struct kmem_ } SLAB_ATTR_RO(objects); +static ssize_t total_objects_show(struct kmem_cache *s, char *buf) +{ + return show_slab_objects(s, buf, SO_TOTAL|SO_FULL); +} +SLAB_ATTR_RO(total_objects); + static ssize_t sanity_checks_show(struct kmem_cache *s, char *buf) { return sprintf(buf, "%d\n", !!(s->flags & SLAB_DEBUG_FREE)); @@ -4127,6 +4162,7 @@ static struct attribute *slab_attrs[] = &objs_per_slab_attr.attr, &order_attr.attr, &objects_attr.attr, + &total_objects_attr.attr, &slabs_attr.attr, &partial_attr.attr, &cpu_slabs_attr.attr, @@ -4455,7 +4491,9 @@ static int s_show(struct seq_file *m, vo unsigned long nr_partials = 0; unsigned long nr_slabs = 0; unsigned long nr_inuse = 0; - unsigned long nr_objs; + unsigned long nr_objs = 0; + unsigned long nr_partial_inuse = 0; + unsigned long nr_partial_total = 0; struct kmem_cache *s; int node; @@ -4469,11 +4507,12 @@ static int s_show(struct seq_file *m, vo nr_partials += n->nr_partial; nr_slabs += atomic_long_read(&n->nr_slabs); - nr_inuse += count_partial(n); + nr_objs += atomic_long_read(&n->total_objects); + nr_partial_inuse += count_partial_inuse(n); + nr_partial_total += count_partial_total(s, n); } - nr_objs = nr_slabs * s->objects; - nr_inuse += (nr_slabs - nr_partials) * s->objects; + nr_inuse = nr_objs - (nr_partial_total - nr_partial_inuse); seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse, nr_objs, s->size, s->objects, (1 << s->order));