From clameter@sgi.com Wed Apr 25 19:50:26 2007 Message-Id: <20070426024946.111308599@sgi.com> User-Agent: quilt/0.45-1 Date: Wed, 25 Apr 2007 19:49:46 -0700 From: clameter@sgi.com To: christoph@lameter.com Subject: [patch 0/7] SLUB updates -- From clameter@sgi.com Wed Apr 25 19:50:28 2007 Message-Id: <20070426025027.591133686@sgi.com> References: <20070426024946.111308599@sgi.com> User-Agent: quilt/0.45-1 Date: Wed, 25 Apr 2007 19:49:47 -0700 From: clameter@sgi.com To: christoph@lameter.com Subject: [patch 1/7] SLUB: Remove duplicate VM_BUG_ON Content-Disposition: inline; filename=slub_duplicate Somehow this artifact got in during merge with mm. Signed-off-by: Christoph Lameter Index: linux-2.6.21-rc7-mm1/mm/slub.c =================================================================== --- linux-2.6.21-rc7-mm1.orig/mm/slub.c 2007-04-25 09:48:40.000000000 -0700 +++ linux-2.6.21-rc7-mm1/mm/slub.c 2007-04-25 09:48:47.000000000 -0700 @@ -633,8 +633,6 @@ static void add_full(struct kmem_cache * VM_BUG_ON(!irqs_disabled()); - VM_BUG_ON(!irqs_disabled()); - if (!(s->flags & SLAB_STORE_USER)) return; -- From clameter@sgi.com Wed Apr 25 19:50:28 2007 Message-Id: <20070426025028.370851451@sgi.com> References: <20070426024946.111308599@sgi.com> User-Agent: quilt/0.45-1 Date: Wed, 25 Apr 2007 19:49:48 -0700 From: clameter@sgi.com To: christoph@lameter.com Subject: [patch 2/7] SLAB: Fix sysfs directory handling Content-Disposition: inline; filename=slub_sysfs_dir_fix This fixes the problem that SLUB does not track the names of aliased slabs by changing the way that SLUB manages the files in /sys/slab. If the slab that is being operated on is not mergeable (usually the case if we are debugging) then do not create any aliases. If an alias exists that we conflict with then remove it before creating the directory for the unmergeable slab. If there is a true slab cache there and not an alias then we fail since there is a true duplication of slab cache names. So debugging allows the detection of slab name duplication as usual. If the slab is mergeable then we create a directory with a unique name created from the slab size, slab options and the pointer to the kmem_cache structure (disambiguation). All names referring to the slabs will then be created as symlinks to that unique name. These symlinks are not going to be removed on kmem_cache_destroy() since we only carry a counter for the number of aliases. If a new symlink is created then it may just replace an existing one. This means that one can create a gazillion slabs with the same name (if they all refer to mergeable caches). It will only increase the alias count. So we have the potential of not detecting duplicate slab names (there is actually no harm done by doing that....). We will detect the duplications as as soon as debugging is enabled because we will then no longer generate symlinks and special unique names. Signed-off-by: Christoph Lameter Index: linux-2.6.21-rc7-mm1/mm/slub.c =================================================================== --- linux-2.6.21-rc7-mm1.orig/mm/slub.c 2007-04-25 19:41:23.000000000 -0700 +++ linux-2.6.21-rc7-mm1/mm/slub.c 2007-04-25 19:41:23.000000000 -0700 @@ -3297,16 +3297,68 @@ static struct kset_uevent_ops slab_ueven decl_subsys(slab, &slab_ktype, &slab_uevent_ops); +#define ID_STR_LENGTH 64 + +/* Create a unique string id for a slab cache: + * format + * :[flags-]size:[memory address of kmemcache] + */ +static char *create_unique_id(struct kmem_cache *s) +{ + char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL); + char *p = name; + + BUG_ON(!name); + + *p++ = ':'; + /* + * First flags affecting slabcache operations */ + if (s->flags & SLAB_CACHE_DMA) + *p++ = 'd'; + if (s->flags & SLAB_RECLAIM_ACCOUNT) + *p++ = 'a'; + if (s->flags & SLAB_DESTROY_BY_RCU) + *p++ = 'r';\ + /* Debug flags */ + if (s->flags & SLAB_RED_ZONE) + *p++ = 'Z'; + if (s->flags & SLAB_POISON) + *p++ = 'P'; + if (s->flags & SLAB_STORE_USER) + *p++ = 'U'; + if (p != name + 1) + *p++ = '-'; + p += sprintf(p,"%07d:0x%p" ,s->size, s); + BUG_ON(p > name + ID_STR_LENGTH - 1); + return name; +} + static int sysfs_slab_add(struct kmem_cache *s) { int err; + const char *name; if (slab_state < SYSFS) /* Defer until later */ return 0; + if (s->flags & SLUB_NEVER_MERGE) { + /* + * Slabcache can never be merged so we can use the name proper. + * This is typically the case for debug situations. In that + * case we can catch duplicate names easily. + */ + sysfs_remove_link(&slab_subsys.kset.kobj, s->name); + name = s->name; + } else + /* + * Create a unique name for the slab as a target + * for the symlinks. + */ + name = create_unique_id(s); + kobj_set_kset_s(s, slab_subsys); - kobject_set_name(&s->kobj, s->name); + kobject_set_name(&s->kobj, name); kobject_init(&s->kobj); err = kobject_add(&s->kobj); if (err) @@ -3316,6 +3368,10 @@ static int sysfs_slab_add(struct kmem_ca if (err) return err; kobject_uevent(&s->kobj, KOBJ_ADD); + if (!(s->flags & SLUB_NEVER_MERGE)) { + sysfs_slab_alias(s, s->name); + kfree(name); + } return 0; } @@ -3341,9 +3397,14 @@ static int sysfs_slab_alias(struct kmem_ { struct saved_alias *al; - if (slab_state == SYSFS) + if (slab_state == SYSFS) { + /* + * If we have a leftover link then remove it. + */ + sysfs_remove_link(&slab_subsys.kset.kobj, name); return sysfs_create_link(&slab_subsys.kset.kobj, &s->kobj, name); + } al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL); if (!al) -- From clameter@sgi.com Wed Apr 25 19:50:29 2007 Message-Id: <20070426025029.073313835@sgi.com> References: <20070426024946.111308599@sgi.com> User-Agent: quilt/0.45-1 Date: Wed, 25 Apr 2007 19:49:49 -0700 From: clameter@sgi.com To: christoph@lameter.com Subject: [patch 3/7] SLUB: debug printk cleanup Content-Disposition: inline; filename=slub_at_cleanup Set up a new function slab_err in order to report errors consistently. Consistently report corrective actions taken by SLUB by a printk starting with @@@. Signed-off-by: Christoph Lameter Index: linux-2.6.21-rc7-mm1/mm/slub.c =================================================================== --- linux-2.6.21-rc7-mm1.orig/mm/slub.c 2007-04-25 11:18:00.000000000 -0700 +++ linux-2.6.21-rc7-mm1/mm/slub.c 2007-04-25 11:20:55.000000000 -0700 @@ -324,8 +324,8 @@ static void object_err(struct kmem_cache { u8 *addr = page_address(page); - printk(KERN_ERR "*** SLUB: %s in %s@0x%p slab 0x%p\n", - reason, s->name, object, page); + printk(KERN_ERR "*** SLUB %s: %s@0x%p slab 0x%p\n", + s->name, reason, object, page); printk(KERN_ERR " offset=%tu flags=0x%04lx inuse=%u freelist=0x%p\n", object - addr, page->flags, page->inuse, page->freelist); if (object > addr + 16) @@ -335,6 +335,19 @@ static void object_err(struct kmem_cache dump_stack(); } +static void slab_err(struct kmem_cache *s, struct page *page, char *reason, ...) +{ + va_list args; + char buf[100]; + + va_start(args, reason); + vsnprintf(buf, sizeof(buf), reason, args); + va_end(args); + printk(KERN_ERR "*** SLUB %s: %s in slab @0x%p\n", s->name, buf, + page); + dump_stack(); +} + static void init_object(struct kmem_cache *s, void *object, int active) { u8 *p = object; @@ -412,7 +425,7 @@ static int check_valid_pointer(struct km static void restore_bytes(struct kmem_cache *s, char *message, u8 data, void *from, void *to) { - printk(KERN_ERR "@@@ SLUB: %s Restoring %s (0x%x) from 0x%p-0x%p\n", + printk(KERN_ERR "@@@ SLUB %s: Restoring %s (0x%x) from 0x%p-0x%p\n", s->name, message, data, from, to - 1); memset(from, data, to - from); } @@ -459,8 +472,7 @@ static int slab_pad_check(struct kmem_ca return 1; if (!check_bytes(p + length, POISON_INUSE, remainder)) { - printk(KERN_ERR "SLUB: %s slab 0x%p: Padding fails check\n", - s->name, p); + slab_err(s, page, "Padding check failed"); dump_stack(); restore_bytes(s, "slab padding", POISON_INUSE, p + length, p + length + remainder); @@ -547,30 +559,25 @@ static int check_slab(struct kmem_cache VM_BUG_ON(!irqs_disabled()); if (!PageSlab(page)) { - printk(KERN_ERR "SLUB: %s Not a valid slab page @0x%p " - "flags=%lx mapping=0x%p count=%d \n", - s->name, page, page->flags, page->mapping, + slab_err(s, page, "Not a valid slab page flags=%lx " + "mapping=0x%p count=%d", page->flags, page->mapping, page_count(page)); return 0; } if (page->offset * sizeof(void *) != s->offset) { - printk(KERN_ERR "SLUB: %s Corrupted offset %lu in slab @0x%p" - " flags=0x%lx mapping=0x%p count=%d\n", - s->name, + slab_err(s, page, "Corrupted offset %lu flags=0x%lx " + "mapping=0x%p count=%d", (unsigned long)(page->offset * sizeof(void *)), - page, page->flags, page->mapping, page_count(page)); - dump_stack(); return 0; } if (page->inuse > s->objects) { - printk(KERN_ERR "SLUB: %s inuse %u > max %u in slab " - "page @0x%p flags=%lx mapping=0x%p count=%d\n", - s->name, page->inuse, s->objects, page, page->flags, + slab_err(s, page, "inuse %u > max %u @0x%p flags=%lx " + "mapping=0x%p count=%d", + s->name, page->inuse, s->objects, page->flags, page->mapping, page_count(page)); - dump_stack(); return 0; } /* Slab_pad_check fixes things up after itself */ @@ -599,12 +606,13 @@ static int on_freelist(struct kmem_cache set_freepointer(s, object, NULL); break; } else { - printk(KERN_ERR "SLUB: %s slab 0x%p " - "freepointer 0x%p corrupted.\n", - s->name, page, fp); - dump_stack(); + slab_err(s, page, "Freepointer 0x%p corrupt", + fp); page->freelist = NULL; page->inuse = s->objects; + printk(KERN_ERR "@@@ SLUB %s: Freelist " + "cleared. Slab 0x%p\n", + s->name, page); return 0; } break; @@ -615,11 +623,12 @@ static int on_freelist(struct kmem_cache } if (page->inuse != s->objects - nr) { - printk(KERN_ERR "slab %s: page 0x%p wrong object count." - " counter is %d but counted were %d\n", - s->name, page, page->inuse, - s->objects - nr); + slab_err(s, page, "Wrong object count. Counter is %d but " + "counted were %d", s, page, page->inuse, + s->objects - nr); page->inuse = s->objects - nr; + printk(KERN_ERR "@@@ SLUB %s: Object count adjusted. " + "Slab @0x%p\n", s->name, page); } return search == NULL; } @@ -663,10 +672,7 @@ static int alloc_object_checks(struct km goto bad; if (object && !on_freelist(s, page, object)) { - printk(KERN_ERR "SLUB: %s Object 0x%p@0x%p " - "already allocated.\n", - s->name, object, page); - dump_stack(); + slab_err(s, page, "Object 0x%p already allocated", object); goto bad; } @@ -706,15 +712,12 @@ static int free_object_checks(struct kme goto fail; if (!check_valid_pointer(s, page, object)) { - printk(KERN_ERR "SLUB: %s slab 0x%p invalid " - "object pointer 0x%p\n", - s->name, page, object); + slab_err(s, page, "Invalid object pointer 0x%p", object); goto fail; } if (on_freelist(s, page, object)) { - printk(KERN_ERR "SLUB: %s slab 0x%p object " - "0x%p already free.\n", s->name, page, object); + slab_err(s, page, "Object 0x%p already free", object); goto fail; } @@ -723,24 +726,22 @@ static int free_object_checks(struct kme if (unlikely(s != page->slab)) { if (!PageSlab(page)) - printk(KERN_ERR "slab_free %s size %d: attempt to" - "free object(0x%p) outside of slab.\n", - s->name, s->size, object); + slab_err(s, page, "Attempt to free object(0x%p) " + "outside of slab", object); else - if (!page->slab) + if (!page->slab) { printk(KERN_ERR - "slab_free : no slab(NULL) for object 0x%p.\n", + "SLUB : no slab for object 0x%p.\n", object); + dump_stack(); + } else - printk(KERN_ERR "slab_free %s(%d): object at 0x%p" - " belongs to slab %s(%d)\n", - s->name, s->size, object, - page->slab->name, page->slab->size); + slab_err(s, page, "object at 0x%p belongs " + "to slab %s", object, page->slab->name); goto fail; } return 1; fail: - dump_stack(); printk(KERN_ERR "@@@ SLUB: %s slab 0x%p object at 0x%p not freed.\n", s->name, page, object); return 0; @@ -2478,6 +2479,8 @@ __initcall(cpucache_init); #endif #ifdef SLUB_RESILIENCY_TEST +static unsigned long validate_slab_cache(struct kmem_cache *s); + static void resiliency_test(void) { u8 *p; @@ -2527,6 +2530,9 @@ static void resiliency_test(void) p[512] = 0xab; printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->%p\n\n", p); validate_slab_cache(kmalloc_caches + 9); + + printk(KERN_ERR "\4. Test that kmalloc fails\n"); + p = kmalloc (1 << 30, GFP_KERNEL); } #else static void resiliency_test(void) {}; @@ -2592,16 +2598,16 @@ static void validate_slab_slab(struct km validate_slab(s, page); slab_unlock(page); } else - printk(KERN_INFO "SLUB: %s Skipped busy slab %p\n", + printk(KERN_INFO "SLUB %s: Skipped busy slab %p\n", s->name, page); if (s->flags & DEBUG_DEFAULT_FLAGS) { if (!PageError(page)) - printk(KERN_ERR "SLUB: %s PageError not set " + printk(KERN_ERR "SLUB %s: PageError not set " "on slab %p\n", s->name, page); } else { if (PageError(page)) - printk(KERN_ERR "SLUB: %s PageError set on " + printk(KERN_ERR "SLUB %s: PageError set on " "slab %p\n", s->name, page); } } @@ -2619,8 +2625,8 @@ static int validate_slab_node(struct kme count++; } if (count != n->nr_partial) - printk("SLUB: %s %ld partial slabs counted but counter=%ld\n", - s->name, count, n->nr_partial); + printk(KERN_ERR "SLUB %s: %ld partial slabs counted but " + "counter=%ld\n", s->name, count, n->nr_partial); if (!(s->flags & SLAB_STORE_USER)) goto out; @@ -2630,8 +2636,9 @@ static int validate_slab_node(struct kme count++; } if (count != atomic_long_read(&n->nr_slabs)) - printk("SLUB: %s %ld slabs counted but counter=%ld\n", - s->name, count, atomic_long_read(&n->nr_slabs)); + printk(KERN_ERR "SLUB: %s %ld slabs counted but " + "counter=%ld\n", s->name, count, + atomic_long_read(&n->nr_slabs)); out: spin_unlock_irqrestore(&n->list_lock, flags); -- From clameter@sgi.com Wed Apr 25 19:50:30 2007 Message-Id: <20070426025029.675577170@sgi.com> References: <20070426024946.111308599@sgi.com> User-Agent: quilt/0.45-1 Date: Wed, 25 Apr 2007 19:49:50 -0700 From: clameter@sgi.com To: christoph@lameter.com Subject: [patch 4/7] SLUB: Conform more to SLABs SLAB_HWCACHE_ALIGN behavior Content-Disposition: inline; filename=slub_hwalign Currently SLUB is using a strict L1_CACHE_BYTES alignment if SLAB_HWCACHE_ALIGN is specified. SLAB does not align to a cacheline if the object is smaller than half of a cacheline. Small objects are then aligned by SLAB to a fraction of a cacheline. Make SLUB just forget about the alignment requirement if the object size is less than L1_CACHE_BYTES. It seems that fractional alignments are no good because they grow the object and reduce the object density in a cache line needlessly causing additional cache line fetches. If we are already throwing the user suggestion of a cache line alignment away then lets do the best we can. Maybe SLAB_HWCACHE_ALIGN also needs to be tossed given its wishy-washy handling but doing so would require an audit of all kmem_cache_allocs throughout the kernel source. In any case one needs to explictly specify an alignment during kmem_cache_create to either slab allocator in order to ensure that the objects are cacheline aligned. Signed-off-by: Christoph Lameter Index: linux-2.6.21-rc7-mm1/mm/slub.c =================================================================== --- linux-2.6.21-rc7-mm1.orig/mm/slub.c 2007-04-25 12:25:52.000000000 -0700 +++ linux-2.6.21-rc7-mm1/mm/slub.c 2007-04-25 12:26:25.000000000 -0700 @@ -1483,9 +1483,19 @@ static int calculate_order(int size) * various ways of specifying it. */ static unsigned long calculate_alignment(unsigned long flags, - unsigned long align) + unsigned long align, unsigned long size) { - if (flags & SLAB_HWCACHE_ALIGN) + /* + * If the user wants hardware cache aligned objects then + * follow that suggestion if the object is sufficiently + * large. + * + * The hardware cache alignment cannot override the + * specified alignment though. If that is greater + * then use it. + */ + if ((flags & SLAB_HWCACHE_ALIGN) && + size > L1_CACHE_BYTES / 2) return max_t(unsigned long, align, L1_CACHE_BYTES); if (align < ARCH_SLAB_MINALIGN) @@ -1674,7 +1684,7 @@ static int calculate_sizes(struct kmem_c * user specified (this is unecessarily complex due to the attempt * to be compatible with SLAB. Should be cleaned up some day). */ - align = calculate_alignment(flags, align); + align = calculate_alignment(flags, align, s->objsize); /* * SLUB stores one object immediately after another beginning from @@ -2251,7 +2261,7 @@ static struct kmem_cache *find_mergeable return NULL; size = ALIGN(size, sizeof(void *)); - align = calculate_alignment(flags, align); + align = calculate_alignment(flags, align, size); size = ALIGN(size, align); list_for_each(h, &slab_caches) { -- From clameter@sgi.com Wed Apr 25 19:50:30 2007 Message-Id: <20070426025030.187643496@sgi.com> References: <20070426024946.111308599@sgi.com> User-Agent: quilt/0.45-1 Date: Wed, 25 Apr 2007 19:49:51 -0700 From: clameter@sgi.com To: christoph@lameter.com Subject: [patch 5/7] SLUB: Add MIN_PARTIAL Content-Disposition: inline; filename=slab_partial We leave a mininum of partial slabs on nodes when we search for partial slabs on other node. Define a constant for that value. Then modify slub to keep MIN_PARTIAL slabs around. This avoids bad situations where a function frees the last object in a slab (which results in the page being returned to the page allocator) only to then allocate one again (which requires getting a page back from the page allocator if the partial list was empty). Keeping a couple of slabs on the partial list reduces overhead. Empty slabs are added to the end of the partial list to insure that partially allocated slabs are consumed first (defragmentation). Signed-off-by: Christoph Lameter Index: linux-2.6.21-rc7-mm1/mm/slub.c =================================================================== --- linux-2.6.21-rc7-mm1.orig/mm/slub.c 2007-04-25 12:26:25.000000000 -0700 +++ linux-2.6.21-rc7-mm1/mm/slub.c 2007-04-25 12:26:31.000000000 -0700 @@ -93,6 +93,9 @@ * slab handling out of the fast path. */ +/* Mininum number of partial slabs */ +#define MIN_PARTIAL 2 + /* * Issues still to be resolved: * @@ -636,16 +639,8 @@ static int on_freelist(struct kmem_cache /* * Tracking of fully allocated slabs for debugging */ -static void add_full(struct kmem_cache *s, struct page *page) +static void add_full(struct kmem_cache_node *n, struct page *page) { - struct kmem_cache_node *n; - - VM_BUG_ON(!irqs_disabled()); - - if (!(s->flags & SLAB_STORE_USER)) - return; - - n = get_node(s, page_to_nid(page)); spin_lock(&n->list_lock); list_add(&page->lru, &n->full); spin_unlock(&n->list_lock); @@ -924,10 +919,16 @@ static __always_inline int slab_trylock( /* * Management of partially allocated slabs */ -static void add_partial(struct kmem_cache *s, struct page *page) +static void add_partial_tail(struct kmem_cache_node *n, struct page *page) { - struct kmem_cache_node *n = get_node(s, page_to_nid(page)); + spin_lock(&n->list_lock); + n->nr_partial++; + list_add_tail(&page->lru, &n->partial); + spin_unlock(&n->list_lock); +} +static void add_partial(struct kmem_cache_node *n, struct page *page) +{ spin_lock(&n->list_lock); n->nr_partial++; list_add(&page->lru, &n->partial); @@ -1027,7 +1028,7 @@ static struct page *get_any_partial(stru n = get_node(s, zone_to_nid(*z)); if (n && cpuset_zone_allowed_hardwall(*z, flags) && - n->nr_partial > 2) { + n->nr_partial > MIN_PARTIAL) { page = get_partial_node(n); if (page) return page; @@ -1061,15 +1062,32 @@ static struct page *get_partial(struct k */ static void putback_slab(struct kmem_cache *s, struct page *page) { - if (page->inuse) { + struct kmem_cache_node *n = get_node(s, page_to_nid(page)); + + if (page->inuse || n->nr_partial < MIN_PARTIAL) { + if (page->freelist) - add_partial(s, page); - else if (PageError(page)) - add_full(s, page); + add_partial(n, page); + else + if (PageError(page) && (s->flags & SLAB_STORE_USER)) + add_full(n, page); slab_unlock(page); + } else { - slab_unlock(page); - discard_slab(s, page); + if (n->nr_partial < MIN_PARTIAL) { + /* + * Adding an empty page to the partial slabs in order + * to avoid page allocator overhead. This page needs to + * come after all the others that are not fully empty + * in order to make sure that we do maximum + * defragmentation. + */ + add_partial_tail(n, page); + slab_unlock(page); + } else { + slab_unlock(page); + discard_slab(s, page); + } } } @@ -1326,7 +1344,7 @@ checks_ok: * then add it. */ if (unlikely(!prior)) - add_partial(s, page); + add_partial(get_node(s, page_to_nid(page)), page); out_unlock: slab_unlock(page); @@ -1542,7 +1560,7 @@ static struct kmem_cache_node * __init e kmalloc_caches->node[node] = n; init_kmem_cache_node(n); atomic_long_inc(&n->nr_slabs); - add_partial(kmalloc_caches, page); + add_partial(n, page); return n; } -- From clameter@sgi.com Wed Apr 25 19:50:31 2007 Message-Id: <20070426025030.709166572@sgi.com> References: <20070426024946.111308599@sgi.com> User-Agent: quilt/0.45-1 Date: Wed, 25 Apr 2007 19:49:52 -0700 From: clameter@sgi.com To: christoph@lameter.com Subject: [patch 6/7] SLUB: Free slabs and sort partial slab lists in kmem_cache_shrink Content-Disposition: inline; filename=slab_shrink_cache At kmem_cache_shrink check if we have any empty slabs on the partial if so then remove them. Also--as an anti-fragmentation measure--sort the partial slabs so that the most fully allocated ones come first and the least allocated last. The next allocations may fill up the nearly full slabs. Having the least allocated slabs last gives them the maximum chance that their remaining objects may be freed. Thus we can hopefully minimize the partial slabs. I think this is the best one can do in terms antifragmentation measures. Real defragmentation (meaning moving objects out of slabs with the least free objects to those that are almost full) can be implemted by reverse scanning through the list produced here but that would mean that we need to provide a callback at slab cache creation that allows the deletion or moving of an object. This will involve slab API changes so defer for now. Signed-off-by: Christoph Lameter --- mm/slub.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 104 insertions(+), 14 deletions(-) Index: slub/mm/slub.c =================================================================== --- slub.orig/mm/slub.c 2007-04-25 15:15:45.000000000 -0700 +++ slub/mm/slub.c 2007-04-25 16:47:36.000000000 -0700 @@ -93,9 +93,6 @@ * slab handling out of the fast path. */ -/* Mininum number of partial slabs */ -#define MIN_PARTIAL 2 - /* * Issues still to be resolved: * @@ -112,6 +109,19 @@ /* Enable to test recovery from slab corruption on boot */ #undef SLUB_RESILIENCY_TEST +/* + * Mininum number of partial slabs. These will be left on the partial + * lists even if they are empty. kmem_cache_shrink may reclaim them. + */ +#define MIN_PARTIAL 2 + +/* + * Maximum number of desirable partial slabs. + * The existence of more partial slabs makes kmem_cache_shrink + * reorganize the slab list by object number. + */ +#define MAX_PARTIAL 10 + #define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \ SLAB_POISON | SLAB_STORE_USER) /* @@ -2165,6 +2175,77 @@ void kfree(const void *x) } EXPORT_SYMBOL(kfree); +/* + * kmem_cache_shrink removes empty slabs from the partial lists + * and then sorts the partially allocated slabs by the number + * of items in use. The slabs with the most items in use + * come first. New allocations will remove these from the + * partial list because they are full. The slabs with the + * least items are placed last. If it happens that the objects + * are freed then the page can be returned to the page allocator. + */ +int kmem_cache_shrink(struct kmem_cache *s) +{ + int node; + int i; + struct kmem_cache_node *n; + struct page *page; + struct page *t; + struct list_head *slabs_by_inuse = + kmalloc(sizeof(struct list_head) * s->objects, GFP_KERNEL); + unsigned long flags; + + if (!slabs_by_inuse) + return -ENOMEM; + + flush_all(s); + for_each_online_node(node) { + n = get_node(s, node); + + if (n->nr_partial <= MIN_PARTIAL) + continue; + + for (i = 0; i < s->objects; i++) + INIT_LIST_HEAD(slabs_by_inuse + i); + + spin_lock_irqsave(&n->list_lock, flags); + + /* + * Build lists indexed by the items in use in + * each slab or free slabs if empty. + * + * Note that concurrent frees may occur while + * we hold the list_lock. page->inuse here is + * the upper limit. + */ + list_for_each_entry_safe(page, t, &n->partial, lru) { + if (!page->inuse) { + list_del(&page->lru); + discard_slab(s, page); + } else + if (n->nr_partial > MAX_PARTIAL) + list_move(&page->lru, + slabs_by_inuse + page->inuse); + } + + if (n->nr_partial <= MAX_PARTIAL) + continue; + + /* + * Rebuild the partial list with the slabs filled up + * most first and the least used slabs at the end. + */ + for (i = s->objects - 1; i > 0; i--) + list_splice(slabs_by_inuse + i, n->partial.prev); + + spin_unlock_irqrestore(&n->list_lock, flags); + } + + kfree(slabs_by_inuse); + return 0; +} +EXPORT_SYMBOL(kmem_cache_shrink); + /** * krealloc - reallocate memory. The contents will remain unchanged. * @@ -2410,17 +2491,6 @@ static struct notifier_block __cpuinitda #endif -/*************************************************************** - * Compatiblility definitions - **************************************************************/ - -int kmem_cache_shrink(struct kmem_cache *s) -{ - flush_all(s); - return 0; -} -EXPORT_SYMBOL(kmem_cache_shrink); - #ifdef CONFIG_NUMA /***************************************************************** @@ -3199,6 +3269,25 @@ static ssize_t validate_store(struct kme } SLAB_ATTR(validate); +static ssize_t shrink_show(struct kmem_cache *s, char *buf) +{ + return 0; +} + +static ssize_t shrink_store(struct kmem_cache *s, + const char *buf, size_t length) +{ + if (buf[0] == '1') { + int rc = kmem_cache_shrink(s); + + if (rc) + return rc; + } else + return -EINVAL; + return length; +} +SLAB_ATTR(shrink); + static ssize_t alloc_calls_show(struct kmem_cache *s, char *buf) { if (!(s->flags & SLAB_STORE_USER)) @@ -3255,6 +3344,7 @@ static struct attribute * slab_attrs[] = &poison_attr.attr, &store_user_attr.attr, &validate_attr.attr, + &shrink_attr.attr, &alloc_calls_attr.attr, &free_calls_attr.attr, #ifdef CONFIG_ZONE_DMA -- From clameter@sgi.com Wed Apr 25 19:50:32 2007 Message-Id: <20070426025031.787956951@sgi.com> References: <20070426024946.111308599@sgi.com> User-Agent: quilt/0.45-1 Date: Wed, 25 Apr 2007 19:49:53 -0700 From: clameter@sgi.com To: christoph@lameter.com Subject: [patch 7/7] SLUB: Major slabinfo update Content-Disposition: inline; filename=slub_slabinfo_update Enhancement to slabinfo - Support for slab shrinking (-r option) - Slab summary showing system totals - Sync with new form of alias handling - Sort by size, reverse sorting etc - Alias lookups Signed-off-by: Christoph Lameter Index: linux-2.6.21-rc7-mm1/Documentation/vm/slabinfo.c =================================================================== --- linux-2.6.21-rc7-mm1.orig/Documentation/vm/slabinfo.c 2007-04-25 19:41:19.000000000 -0700 +++ linux-2.6.21-rc7-mm1/Documentation/vm/slabinfo.c 2007-04-25 19:42:27.000000000 -0700 @@ -17,15 +17,45 @@ #include #include +#define MAX_SLABS 500 +#define MAX_ALIASES 500 + +struct slabinfo { + char *name; + int alias; + int refs; + int aliases, align, cache_dma, cpu_slabs, destroy_by_rcu; + int hwcache_align, object_size, objs_per_slab; + int sanity_checks, slab_size, store_user, trace; + int order, poison, reclaim_account, red_zone; + unsigned long partial, objects, slabs; + char *numa; + char *numa_partial; +} slabinfo[MAX_SLABS]; + +struct aliasinfo { + char *name; + char *ref; + struct slabinfo *slab; +} aliasinfo[MAX_ALIASES]; + +int slabs = 0; +int aliases = 0; + char buffer[4096]; int show_alias = 0; int show_slab = 0; -int show_parameters = 0; int skip_zero = 1; int show_numa = 0; int show_track = 0; +int show_first_alias = 0; int validate = 0; +int shrink = 0; +int show_inverted = 0; +int show_single_ref = 0; +int show_totals = 0; +int sort_size = 0; int page_size; @@ -47,11 +77,16 @@ void usage(void) "-a|--aliases Show aliases\n" "-h|--help Show usage information\n" "-n|--numa Show NUMA information\n" - "-p|--parameters Show global parameters\n" + "-r|--reduce Shrink slabs\n" "-v|--validate Validate slabs\n" "-t|--tracking Show alloc/free information\n" + "-T|--Totals Show summary information\n" "-s|--slabs Show slabs\n" + "-S|--Size Sort by size\n" "-z|--zero Include empty slabs\n" + "-f|--first-alias Show first alias\n" + "-i|--inverted Inverted list\n" + "-1|--1ref Single reference\n" ); } @@ -86,23 +121,32 @@ unsigned long get_obj(char *name) unsigned long get_obj_and_str(char *name, char **x) { unsigned long result = 0; + char *p; + + *x = NULL; if (!read_obj(name)) { x = NULL; return 0; } - result = strtoul(buffer, x, 10); - while (**x == ' ') - (*x)++; + result = strtoul(buffer, &p, 10); + while (*p == ' ') + p++; + if (*p) + *x = strdup(p); return result; } -void set_obj(char *name, int n) +void set_obj(struct slabinfo *s, char *name, int n) { - FILE *f = fopen(name, "w"); + char x[100]; + + sprintf(x, "%s/%s", s->name, name); + + FILE *f = fopen(x, "w"); if (!f) - fatal("Cannot write to %s\n", name); + fatal("Cannot write to %s\n", x); fprintf(f, "%d\n", n); fclose(f); @@ -143,31 +187,14 @@ int store_size(char *buffer, unsigned lo return n; } -void alias(const char *name) +void slab_validate(struct slabinfo *s) { - int count; - char *p; - - if (!show_alias) - return; - - count = readlink(name, buffer, sizeof(buffer)); - - if (count < 0) - return; - - buffer[count] = 0; - - p = buffer + count; - - while (p > buffer && p[-1] != '/') - p--; - printf("%-20s -> %s\n", name, p); + set_obj(s, "validate", 1); } -void slab_validate(char *name) +void slab_shrink(struct slabinfo *s) { - set_obj("validate", 1); + set_obj(s, "shrink", 1); } int line = 0; @@ -178,132 +205,543 @@ void first_line(void) "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n"); } -void slab(const char *name) +/* + * Find the shortest alias of a slab + */ +struct aliasinfo *find_one_alias(struct slabinfo *find) +{ + struct aliasinfo *a; + struct aliasinfo *best = NULL; + + for(a = aliasinfo;a < aliasinfo + aliases; a++) { + if (a->slab == find && + (!best || strlen(best->name) < strlen(a->name))) { + best = a; + if (strncmp(a->name,"kmall", 5) == 0) + return best; + } + } + if (best) + return best; + fatal("Cannot find alias for %s\n", find->name); + return NULL; +} + +unsigned long slab_size(struct slabinfo *s) +{ + return s->slabs * (page_size << s->order); +} + + +void slabcache(struct slabinfo *s) { - unsigned long aliases, align, cache_dma, cpu_slabs, destroy_by_rcu; - unsigned long hwcache_align, object_size, objects, objs_per_slab; - unsigned long order, partial, poison, reclaim_account, red_zone; - unsigned long sanity_checks, slab_size, slabs, store_user, trace; char size_str[20]; char dist_str[40]; + char nn[20]; char flags[20]; char *p = flags; + char *n = s->name; - if (!show_slab) + if (skip_zero && !s->slabs) return; - aliases = get_obj("aliases"); - align = get_obj("align"); - cache_dma = get_obj("cache_dma"); - cpu_slabs = get_obj("cpu_slabs"); - destroy_by_rcu = get_obj("destroy_by_rcu"); - hwcache_align = get_obj("hwcache_align"); - object_size = get_obj("object_size"); - objects = get_obj("objects"); - objs_per_slab = get_obj("objs_per_slab"); - order = get_obj("order"); - partial = get_obj("partial"); - poison = get_obj("poison"); - reclaim_account = get_obj("reclaim_account"); - red_zone = get_obj("red_zone"); - sanity_checks = get_obj("sanity_checks"); - slab_size = get_obj("slab_size"); - slabs = get_obj("slabs"); - store_user = get_obj("store_user"); - trace = get_obj("trace"); - - if (skip_zero && !slabs) - return; - - store_size(size_str, slabs * page_size); - sprintf(dist_str,"%lu/%lu/%lu", slabs, partial, cpu_slabs); + store_size(size_str, slab_size(s)); + sprintf(dist_str,"%lu/%lu/%d", s->slabs, s->partial, s->cpu_slabs); if (!line++) first_line(); - if (aliases) + if (s->aliases) *p++ = '*'; - if (cache_dma) + if (s->cache_dma) *p++ = 'd'; - if (hwcache_align) + if (s->hwcache_align) *p++ = 'A'; - if (poison) + if (s->poison) *p++ = 'P'; - if (reclaim_account) + if (s->reclaim_account) *p++ = 'a'; - if (red_zone) + if (s->red_zone) *p++ = 'Z'; - if (sanity_checks) + if (s->sanity_checks) *p++ = 'F'; - if (store_user) + if (s->store_user) *p++ = 'U'; - if (trace) + if (s->trace) *p++ = 'T'; *p = 0; - printf("%-20s %8ld %7ld %8s %14s %3ld %1ld %3ld %3ld %s\n", - name, objects, object_size, size_str, dist_str, - objs_per_slab, order, - slabs ? (partial * 100) / slabs : 100, - slabs ? (objects * object_size * 100) / - (slabs * (page_size << order)) : 100, + + if (n[0] == ':') { + strncpy(nn, n, 20); + n = nn; + p = n + 4; + while (*p && *p !=':') + p++; + *p = 0; + } + printf("%-20s %8ld %7d %8s %14s %3d %1d %3ld %3ld %s\n", + n, s->objects, s->object_size, size_str, dist_str, + s->objs_per_slab, s->order, + s->slabs ? (s->partial * 100) / s->slabs : 100, + s->slabs ? (s->objects * s->object_size * 100) / + (s->slabs * (page_size << s->order)) : 100, flags); } -void slab_numa(const char *name) +void slab_numa(struct slabinfo *s) { - unsigned long slabs; - char *numainfo; - - slabs = get_obj_and_str("slabs", &numainfo); - - if (skip_zero && !slabs) + if (skip_zero && !s->slabs) return; - printf("%-20s %s", name, numainfo); -} - -void parameter(const char *name) -{ - if (!show_parameters) - return; + printf("%-20s %s", s->name, s->numa); } -void show_tracking(const char *name) +void show_tracking(struct slabinfo *s) { - printf("\n%s: Calls to allocate a slab object\n", name); + printf("\n%s: Calls to allocate a slab object\n", s->name); printf("---------------------------------------------------\n"); if (read_obj("alloc_calls")) printf(buffer); - printf("%s: Calls to free a slab object\n", name); + printf("%s: Calls to free a slab object\n", s->name); printf("-----------------------------------------------\n"); if (read_obj("free_calls")) printf(buffer); } +void totals(void) +{ + struct slabinfo *s; + + int used_slabs = 0; + char b1[20], b2[20], b3[20], b4[20]; + unsigned long long min_objsize = 0, max_objsize = 0, avg_objsize; + unsigned long long min_partial = 0, max_partial = 0, avg_partial, total_partial = 0; + unsigned long long min_slabs = 0, max_slabs = 0, avg_slabs, total_slabs = 0; + unsigned long long min_size = 0, max_size = 0, avg_size, total_size = 0; + unsigned long long min_waste = 0, max_waste = 0, avg_waste, total_waste = 0; + unsigned long long min_objects = 0, max_objects = 0, avg_objects, total_objects = 0; + unsigned long long min_objwaste = 0, max_objwaste = 0, avg_objwaste; + unsigned long long min_used = 0, max_used = 0, avg_used, total_used = 0; + unsigned long min_ppart = 0, max_ppart = 0, avg_ppart, total_ppart = 0; + unsigned long min_partobj = 0, max_partobj = 0, avg_partobj; + unsigned long total_objects_in_partial = 0; + + for (s = slabinfo; s < slabinfo + slabs; s++) { + unsigned long long size; + unsigned long partial; + unsigned long slabs; + unsigned long used; + unsigned long long wasted; + unsigned long long objwaste; + long long objects_in_partial; + unsigned long percentage_partial; + + if (!s->slabs || !s->objects) + continue; + + used_slabs++; + + size = slab_size(s); + partial = s->partial << s->order; + slabs = s->slabs << s->order; + used = s->objects * s->object_size; + wasted = size - used; + objwaste = wasted / s->objects; + + objects_in_partial = s->objects - (s->slabs - s->partial - s ->cpu_slabs) + * s->objs_per_slab; + + if (objects_in_partial < 0) + objects_in_partial = 0; + printf("%s oip=%ldd obj=%ld slabs=%ld part=%ld cpu=%d ops=%d\n", + s->name, objects_in_partial, s->objects, s->slabs, s->partial, + s->cpu_slabs, s->objs_per_slab); + + percentage_partial = objects_in_partial * 100 / s->objects; + + printf("ppart=%d\n",percentage_partial); + + if (s->object_size < min_objsize || !min_objsize) + min_objsize = s->object_size; + if (partial && (partial < min_partial || !min_partial)) + min_partial = partial; + if (slabs < min_slabs || !min_partial) + min_slabs = slabs; + if (size < min_size) + min_size = size; + if (wasted < min_waste && !min_waste) + min_waste = wasted; + if (objwaste < min_objwaste || !min_objwaste) + min_objwaste = objwaste; + if (s->objects < min_objects || !min_objects) + min_objects = s->objects; + if (used < min_used || !min_used) + min_used = used; + if (objects_in_partial < min_partobj || !min_partobj) + min_partobj = objects_in_partial; + if (percentage_partial < min_ppart || !min_ppart) + min_ppart = percentage_partial; + + if (s->object_size > max_objsize) + max_objsize = s->object_size; + if (partial > max_partial) + max_partial = partial; + if (slabs > max_slabs) + max_slabs = slabs; + if (size > max_size) + max_size = size; + if (wasted > max_waste) + max_waste = wasted; + if (objwaste > max_objwaste) + max_objwaste = objwaste; + if (s->objects > max_objects) + max_objects = s->objects; + if (used > max_used) + max_used = used; + if (objects_in_partial > max_partobj) + max_partobj = objects_in_partial; + if (percentage_partial > max_ppart) + max_ppart = percentage_partial; + + total_objects += s->objects; + total_partial += partial; + total_slabs += slabs; + total_used += used; + total_waste += wasted; + total_size += size; + total_ppart += percentage_partial; + total_objects_in_partial += objects_in_partial; + } + + if (!total_objects) { + printf("No objects\n"); + return; + } + if (!used_slabs) { + printf("No slabs\n"); + return; + } + avg_partial = total_partial / used_slabs; + avg_slabs = total_slabs / used_slabs; + avg_waste = total_waste / used_slabs; + avg_size = total_waste / used_slabs; + avg_objects = total_objects / used_slabs; + avg_used = total_used / used_slabs; + avg_ppart = total_ppart / used_slabs; + avg_partobj = total_objects_in_partial / used_slabs; + + avg_objsize = total_used / total_objects; + avg_objwaste = total_waste / total_objects; + + printf("Slabcache Totals\n"); + printf("----------------\n"); + printf("Slabcaches : %3d Aliases : %3d Active: %3d\n", + slabs, aliases, used_slabs); + + store_size(b1, total_used);store_size(b2, total_waste); + store_size(b3, total_waste * 100 / total_used); + printf("Memory used: %5s # Loss : %5s MRatio: %3s%%\n", b1, b2, b3); + + store_size(b1, total_objects);store_size(b2, total_objects_in_partial); + store_size(b3, total_objects_in_partial * 100 / total_objects); + printf("# Objects : %5s # PartObj: %5s ORatio: %3s%%\n", b1, b2, b3); + + printf("\n"); + printf("Per Cache Average Min Max Total\n"); + printf("---------------------------------------------------------\n"); + + store_size(b1, avg_objects);store_size(b2, min_objects); + store_size(b3, max_objects);store_size(b4, total_objects); + printf("# Objects %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + store_size(b1, avg_slabs);store_size(b2, min_slabs); + store_size(b3, max_slabs);store_size(b4, total_slabs); + printf("# Slabs %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + store_size(b1, avg_partial);store_size(b2, min_partial); + store_size(b3, max_partial);store_size(b4, total_partial); + printf("# Partial %10s %10s %10s %10s\n", + b1, b2, b3, b4); + store_size(b1, avg_ppart);store_size(b2, min_ppart); + store_size(b3, max_ppart); + printf("Partial %10s%% %10s%% %10s%%\n", + b1, b2, b3); + + store_size(b1, avg_size);store_size(b2, min_size); + store_size(b3, max_size);store_size(b4, total_size); + printf("Memory %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + store_size(b1, avg_used);store_size(b2, min_used); + store_size(b3, max_used);store_size(b4, total_used); + printf("Used %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + store_size(b1, avg_slabs);store_size(b2, min_slabs); + store_size(b3, max_slabs);store_size(b4, total_slabs); + printf("Waste %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + printf("\n"); + printf("Per Object Average Min Max\n"); + printf("---------------------------------------------\n"); + + store_size(b1, avg_objsize);store_size(b2, min_objsize); + store_size(b3, max_objsize); + printf("Size %10s %10s %10s\n", + b1, b2, b3); + + store_size(b1, avg_objwaste);store_size(b2, min_objwaste); + store_size(b3, max_objwaste); + printf("Loss %10s %10s %10s\n", + b1, b2, b3); +} + +void sort_slabs(void) +{ + struct slabinfo *s1,*s2; + + for (s1 = slabinfo; s1 < slabinfo + slabs; s1++) { + for (s2 = s1 + 1; s2 < slabinfo + slabs; s2++) { + int result; + + if (sort_size) + result = slab_size(s1) < slab_size(s2); + else + result = strcasecmp(s1->name, s2->name); + + if (show_inverted) + result = -result; + + if (result > 0) { + struct slabinfo t; + + memcpy(&t, s1, sizeof(struct slabinfo)); + memcpy(s1, s2, sizeof(struct slabinfo)); + memcpy(s2, &t, sizeof(struct slabinfo)); + } + } + } +} + +void sort_aliases(void) +{ + struct aliasinfo *a1,*a2; + + for (a1 = aliasinfo; a1 < aliasinfo + aliases; a1++) { + for (a2 = a1 + 1; a2 < aliasinfo + aliases; a2++) { + char *n1, *n2; + + n1 = a1->name; + n2 = a2->name; + if (show_alias && !show_inverted) { + n1 = a1->ref; + n2 = a2->ref; + } + if (strcasecmp(n1, n2) > 0) { + struct aliasinfo t; + + memcpy(&t, a1, sizeof(struct aliasinfo)); + memcpy(a1, a2, sizeof(struct aliasinfo)); + memcpy(a2, &t, sizeof(struct aliasinfo)); + } + } + } +} + +void link_slabs(void) +{ + struct aliasinfo *a; + struct slabinfo *s; + + for (a = aliasinfo; a < aliasinfo + aliases; a++) { + + for(s = slabinfo; s < slabinfo + slabs; s++) + if (strcmp(a->ref, s->name) == 0) { + a->slab = s; + s->refs++; + break; + } + if (s == slabinfo + slabs) + fatal("Unresolved alias %s\n", a->ref); + } +} + +void alias(void) +{ + struct aliasinfo *a; + char *active = NULL; + + sort_aliases(); + link_slabs(); + + for(a = aliasinfo; a < aliasinfo + aliases; a++) { + + if (!show_single_ref && a->slab->refs == 1) + continue; + + if (!show_inverted) { + if (active) { + if (strcmp(a->slab->name, active) == 0) { + printf(" %s", a->name); + continue; + } + } + printf("\n%-20s <- %s", a->slab->name, a->name); + active = a->slab->name; + } + else + printf("%-20s -> %s\n", a->name, a->slab->name); + } + if (active) + printf("\n"); +} + + +void rename_slabs(void) +{ + struct slabinfo *s; + struct aliasinfo *a; + + for (s = slabinfo; s < slabinfo + slabs; s++) { + if (*s->name != ':') + continue; + + if (s->refs > 1 && !show_first_alias) + continue; + + a = find_one_alias(s); + + s->name = a->name; + } +} + int slab_mismatch(char *slab) { return regexec(&pattern, slab, 0, NULL, 0); } +void read_slab_dir(void) +{ + DIR *dir; + struct dirent *de; + struct slabinfo *slab = slabinfo; + struct aliasinfo *alias = aliasinfo; + char *p; + int count; + + dir = opendir("."); + while ((de = readdir(dir))) { + if (de->d_name[0] == '.' || + slab_mismatch(de->d_name)) + continue; + switch (de->d_type) { + case DT_LNK: + alias->name = strdup(de->d_name); + count = readlink(de->d_name, buffer, sizeof(buffer)); + + if (count < 0) + fatal("Cannot read symlink %s\n", de->d_name); + + buffer[count] = 0; + p = buffer + count; + while (p > buffer && p[-1] != '/') + p--; + alias->ref = strdup(p); + alias++; + break; + case DT_DIR: + if (chdir(de->d_name)) + fatal("Unable to access slab %s\n", slab->name); + slab->name = strdup(de->d_name); + slab->alias = 0; + slab->refs = 0; + slab->aliases = get_obj("aliases"); + slab->align = get_obj("align"); + slab->cache_dma = get_obj("cache_dma"); + slab->cpu_slabs = get_obj("cpu_slabs"); + slab->destroy_by_rcu = get_obj("destroy_by_rcu"); + slab->hwcache_align = get_obj("hwcache_align"); + slab->object_size = get_obj("object_size"); + slab->objects = get_obj("objects"); + slab->objs_per_slab = get_obj("objs_per_slab"); + slab->order = get_obj("order"); + slab->partial = get_obj("partial"); + slab->partial = get_obj_and_str("partial", &slab->numa_partial); + slab->poison = get_obj("poison"); + slab->reclaim_account = get_obj("reclaim_account"); + slab->red_zone = get_obj("red_zone"); + slab->sanity_checks = get_obj("sanity_checks"); + slab->slab_size = get_obj("slab_size"); + slab->slabs = get_obj_and_str("slabs", &slab->numa); + slab->store_user = get_obj("store_user"); + slab->trace = get_obj("trace"); + chdir(".."); + slab++; + break; + default : + fatal("Unknown file type %lx\n", de->d_type); + } + } + closedir(dir); + slabs = slab - slabinfo; + aliases = alias - aliasinfo; + if (slabs > MAX_SLABS) + fatal("Too many slabs\n"); + if (aliases > MAX_ALIASES) + fatal("Too many aliases\n"); +} + +void output_slabs(void) +{ + struct slabinfo *slab; + + for (slab = slabinfo; slab < slabinfo + slabs; slab++) { + + if (slab->alias) + continue; + + + if (show_numa) + slab_numa(slab); + else + if (show_track) + show_tracking(slab); + else + if (validate) + slab_validate(slab); + else + if (shrink) + slab_shrink(slab); + else { + if (show_slab) + slabcache(slab); + } + } +} + struct option opts[] = { { "aliases", 0, NULL, 'a' }, { "slabs", 0, NULL, 's' }, { "numa", 0, NULL, 'n' }, - { "parameters", 0, NULL, 'p' }, { "zero", 0, NULL, 'z' }, { "help", 0, NULL, 'h' }, { "validate", 0, NULL, 'v' }, + { "first-alias", 0, NULL, 'f' }, + { "reduce", 0, NULL, 'r' }, { "track", 0, NULL, 't'}, + { "inverted", 0, NULL, 'i'}, + { "1ref", 0, NULL, '1'}, { NULL, 0, NULL, 0 } }; int main(int argc, char *argv[]) { - DIR *dir; - struct dirent *de; int c; int err; char *pattern_source; @@ -312,22 +750,31 @@ int main(int argc, char *argv[]) if (chdir("/sys/slab")) fatal("This kernel does not have SLUB support.\n"); - while ((c = getopt_long(argc, argv, "ahtvnpsz", opts, NULL)) != -1) + while ((c = getopt_long(argc, argv, "afhi1nprstvzTS", opts, NULL)) != -1) switch(c) { - case 's': - show_slab = 1; + case '1': + show_single_ref = 1; break; case 'a': show_alias = 1; break; + case 'f': + show_first_alias = 1; + break; + case 'h': + usage(); + return 0; + case 'i': + show_inverted = 1; + break; case 'n': show_numa = 1; break; - case 'p': - show_parameters = 1; + case 'r': + shrink = 1; break; - case 'z': - skip_zero = 0; + case 's': + show_slab = 1; break; case 't': show_track = 1; @@ -335,17 +782,23 @@ int main(int argc, char *argv[]) case 'v': validate = 1; break; - case 'h': - usage(); - return 0; + case 'z': + skip_zero = 0; + break; + case 'T': + show_totals = 1; + break; + case 'S': + sort_size = 1; + break; default: fatal("%s: Invalid option '%c'\n", argv[0], optopt); } - if (!show_slab && !show_alias && !show_parameters && !show_track - && !validate) + if (!show_slab && !show_alias && !show_track + && !validate && !shrink) show_slab = 1; if (argc > optind) @@ -357,39 +810,17 @@ int main(int argc, char *argv[]) if (err) fatal("%s: Invalid pattern '%s' code %d\n", argv[0], pattern_source, err); - - dir = opendir("."); - while ((de = readdir(dir))) { - if (de->d_name[0] == '.' || - slab_mismatch(de->d_name)) - continue; - switch (de->d_type) { - case DT_LNK: - alias(de->d_name); - break; - case DT_DIR: - if (chdir(de->d_name)) - fatal("Unable to access slab %s\n", de->d_name); - - if (show_numa) - slab_numa(de->d_name); - else - if (show_track) - show_tracking(de->d_name); - else - if (validate) - slab_validate(de->d_name); - else - slab(de->d_name); - chdir(".."); - break; - case DT_REG: - parameter(de->d_name); - break; - default : - fatal("Unknown file type %lx\n", de->d_type); - } + read_slab_dir(); + if (show_alias) + alias(); + else + if (show_totals) + totals(); + else { + link_slabs(); + rename_slabs(); + sort_slabs(); + output_slabs(); } - closedir(dir); return 0; } --