Index: linux-2.6.21-rc1/mm/slub.c =================================================================== --- linux-2.6.21-rc1.orig/mm/slub.c 2007-02-27 16:01:00.000000000 -0800 +++ linux-2.6.21-rc1/mm/slub.c 2007-02-27 19:41:02.000000000 -0800 @@ -18,6 +18,7 @@ #include #include #include +#include /* * Lock order: @@ -58,10 +59,10 @@ /* * Flags from the regular SLAB that SLUB does not support: */ -#define SLUB_UNIMPLEMENTED (SLAB_DEBUG_INITIAL | SLAB_STORE_USER) +#define SLUB_UNIMPLEMENTED (SLAB_DEBUG_INITIAL) #define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \ - SLAB_STORE_USER | SLAB_POISON) + SLAB_POISON | SLAB_SANITY) /* * Set of flags that will prevent slab merging */ @@ -121,29 +122,102 @@ struct kmem_cache_node *get_node(struct */ static void print_section(char *text, u8 *addr, unsigned int length) { - int i; + int i, offset; int newline = 1; + char ascii[17]; + + if (length > 128) + length = 128; + ascii[16] = 0; for (i = 0; i < length; i++) { if (newline) { - printk(KERN_ERR "%s %p: ", text, addr + i); + printk(KERN_ERR "%10s %p: ", text, addr + i); newline = 0; } - printk(" %2x", addr[i]); - if ((i % 16) == 15) { - printk("\n"); + printk(" %02x", addr[i]); + offset = i % 16; + ascii[offset] = isgraph(addr[i]) ? addr[i] : '.'; + if (offset == 15) { + printk(" %s\n",ascii); newline = 1; } } + if (!newline) { + i %= 16; + while (i < 16) { + printk(" "); + ascii[i] = ' '; + i++; + } + printk(" %s\n", ascii); + } +} + +/* Slow version of get and set free pointer. */ +static void *get_freepointer(struct kmem_cache *s, void *object) +{ + void **p = object; + + return p[s->offset]; +} + +static void set_freepointer(struct kmem_cache *s, void *object, void *fp) +{ + void **p = object; + + p[s->offset] = fp; +} + +static void *get_track(struct kmem_cache *s, void *object, int alloc) +{ + void **p = object + s->inuse + sizeof(void *); + + return p[alloc]; +} + +static void set_track(struct kmem_cache *s, void *object, + int alloc, void *addr) +{ + void **p = object + s->inuse + sizeof(void *); + + p[alloc] = addr; +} + +#define set_tracking(__s, __o, __a) set_track(__s, __o, __a, \ + __builtin_return_address(0)) + +static void init_tracking(struct kmem_cache *s, void *object) +{ + if (s->flags & SLAB_STORE_USER) { + set_track(s, object, 0, NULL); + set_track(s, object, 1, NULL); + } } static void print_trailer(struct kmem_cache *s, u8 *p) { + unsigned int off = s->inuse; + + if (off == s->offset) + off += sizeof(void *); + if (s->flags & SLAB_RED_ZONE) print_section("Redzone", p + s->objsize, s->inuse - s->objsize); - if (s->inuse != s->size) - print_section("Filler", p + s->inuse, s->size - s->inuse); + + printk(KERN_ERR "FreePointer %p: %p\n", p + s->inuse, + get_freepointer(s, p)); + + if (s->flags & SLAB_STORE_USER) { + printk(KERN_ERR "Last Allocate from %p. Last Free from %p\n", + get_track(s, p, 0), get_track(s, p, 1)); + off += 2* sizeof(void *); + } + + if (off != s->size) + /* Beginning of the filler is the free pointer */ + print_section("Filler", p + off, s->size - off); } static void object_err(struct kmem_cache *s, struct page *page, @@ -151,23 +225,28 @@ static void object_err(struct kmem_cache { u8 *addr = page_address(page); - printk(KERN_ERR "SLUB: %s failure in %s@%p offset=%ld flags=%lx " - "inuse=%d freelist=%p\n", - reason, s->name, object, object - addr, page->flags, - page->inuse, page->freelist); - - print_section("Object", object, min(s->objsize, 128)); + printk(KERN_ERR "*** SLUB: %s failure in %s@%p Slab %p\n", + reason, s->name, object, page); + printk(KERN_ERR " offset=%ld flags=%04lx inuse=%d freelist=%p\n", + object - addr, page->flags, page->inuse, page->freelist); + print_section("Object", object, s->objsize); print_trailer(s, object); if (object > addr) { printk(KERN_ERR "Prior object trailer:\n"); print_trailer(s, object - s->size); } + printk(KERN_ERR "dump\n"); + dump_stack(); + printk(KERN_ERR "dump\n"); } static void init_object(struct kmem_cache *s, void *object, int active) { u8 *p = object; + if (s->objects == 1) + return; + if (s->flags & SLAB_POISON) { memset(p, POISON_FREE, s->objsize -1); p[s->objsize -1] = POISON_END; @@ -190,25 +269,82 @@ static int check_bytes(u8 *start, unsign return 1; } +/* + * Object layout: + * + * object + * Bytes of the object to be managed. + * object + objsize + * Filler to reach word boundary. This is also used for redzoning. + * object + inuse + * Filler to reach required alignment boundary + * object + size + * + * Object layouts can only be assured if the slabcache was not merged. If any + * debug options are set then the slabcache will not be merged. If slabs were + * merged then the objsize and inuse boundaries are to be ignored. + */ static int check_object(struct kmem_cache *s, struct page *page, void *object, int active) { u8 *p = object; + u8 *endobject = object + s->objsize; + u8 *page_addr; + u8 *fp; - if (s->flags & SLAB_RED_ZONE) - if (!check_bytes(p + s->objsize, + /* Offset of first byte after free pointer */ + unsigned long off = s->inuse; + + if (s->offset) + off += sizeof(void *); + + /* Single object slabs do not get policed */ + if (s->objects == 1) + return 1; + + if (s->flags & SLAB_RED_ZONE) { + if (!check_bytes(endobject, active ? RED_ACTIVE : RED_INACTIVE, - s->inuse - s->objsize)) { - object_err(s, page, object, - active ? "Redzone Active" : "Redzone Inactive"); - return 0; - } - if ((s->flags & SLAB_POISON) && !active) - if (!check_bytes(p, POISON_FREE, s->objsize -1) || - p[s->objsize -1] != POISON_END) { + s->inuse - s->objsize)) { + object_err(s, page, object, + active ? "Redzone Active" : + "Redzone Inactive"); + return 0; + } + } else + if ((s->flags & SLAB_POISON) && + s->objsize < s->inuse && + !check_bytes(endobject, POISON_INUSE, s->inuse - s->objsize)) + object_err(s, page, p, "Alignment Filler Check"); + + if (s->flags & SLAB_POISON) { + if (!active && (!check_bytes(p, POISON_FREE, s->objsize - 1) || + p[s->objsize -1] != POISON_END)) { object_err(s, page, p, "Poison"); return 0; } + if (s->size > off && !check_bytes(p + off, + POISON_INUSE, s->size - off)) + object_err(s, page, p, + "Interobject Filler Check"); + } + + /* Check free pointer validity */ + fp = get_freepointer(s, p); + page_addr = page_address(page); + + if (fp && (fp < page_addr || + fp >= page_addr + (PAGE_SIZE << s->order) || + ((fp - page_addr) % s->size))) { + object_err(s, page, p, "Freepointer corrupted"); + /* + * No choice but to zap it. This may cause + * another error because the object count + * is now wrong. + */ + set_freepointer(s, p, NULL); + return 0; + } return 1; } @@ -217,8 +353,11 @@ static int check_valid_pointer(struct km { void *base = page_address(page); + if (!object) + return 1; + if (object < base || object >= base + s->objects * s->size) { - printk(KERN_CRIT "slab %s size %d: pointer %p->%p not in" + printk(KERN_CRIT "SLUB: %s size %d: pointer %p->%p not in" " range (%p-%p) in page %p\n", s->name, s->size, origin, object, base, base + s->objects * s->size, page); @@ -226,23 +365,39 @@ static int check_valid_pointer(struct km } if ((object - base) % s->size) { - printk(KERN_CRIT "slab %s size %d: pointer %p->%p\n" + printk(KERN_CRIT "SLUB: %s size %d: pointer %p->%p\n" "does not properly point" - "to an object in page %p\n", + "to an object in slab %p\n", s->name, s->size, origin, object, page); return 0; } return 1; } -static void check_slab(struct page *page) +static int check_slab(struct kmem_cache *s, struct page *page) { if (!PageSlab(page)) { - printk(KERN_CRIT "Not a valid slab page @%p flags=%lx" + printk(KERN_CRIT "SLUB: %s Not a valid slab page @%p flags=%lx" " mapping=%p count=%d \n", - page, page->flags, page->mapping, page_count(page)); - BUG(); + s->name, page, page->flags, page->mapping, + page_count(page)); + return 0; } + if (page->offset != s->offset) { + printk(KERN_CRIT "SLUB: %s Corrupted offset %u in slab @%p" + " flags=%lx mapping=%p count=%d\n", + s->name, page->offset, page, page->flags, + page->mapping, page_count(page)); + return 0; + } + if (page->inuse > s->objects) { + printk(KERN_CRIT "SLUB: %s Inuse %u > max %u in slab page @%p" + " flags=%lx mapping=%p count=%d\n", + s->name, page->inuse, s->objects, page, page->flags, + page->mapping, page_count(page)); + return 0; + } + return 1; } /* @@ -259,29 +414,21 @@ static int on_freelist(struct kmem_cache if (s->objects == 1) return 0; - check_slab(page); - while (object && nr <= s->objects) { if (object == search) return 1; if (!check_valid_pointer(s, page, object, origin)) - goto try_recover; + return 0; origin = object; object = object[s->offset]; nr++; } - if (page->inuse != s->objects - nr) { + if (page->inuse != s->objects - nr) printk(KERN_CRIT "slab %s: page %p wrong object count." " counter is %d but counted were %d\n", s->name, page, page->inuse, s->objects - nr); -try_recover: - printk(KERN_CRIT "****** Trying to continue by marking " - "all objects in the slab used (memory leak!)\n"); - page->inuse = s->objects; - page->freelist = NULL; - } return 0; } @@ -291,6 +438,90 @@ static void check_free_chain(struct kmem on_freelist(s, page, NULL); } +static void alloc_object_checks(struct kmem_cache *s, struct page *page, + void *object) +{ + if (!check_slab(s, page)) + goto bad; + + if (object && !on_freelist(s, page, object)) + goto bad; + + if (!check_valid_pointer(s, page, object, object)) + goto bad; + + if (!object) + return; + + if (!check_object(s, page, object, 0)) + goto bad; + init_object(s, object, 1); + + if (s->flags & SLAB_TRACE) { + printk("SLUB-Trace %s alloc object=%p slab=%p inuse=%d" + " freelist=%p\n", + s->name, object, page, page->inuse, + page->freelist); + dump_stack(); + } + if (s->flags & SLAB_STORE_USER) + set_tracking(s, object, 0); + return; +bad: + /* Mark slab full */ + page->inuse = s->objects; + page->freelist = NULL; +} + +static int free_object_checks(struct kmem_cache *s, struct page *page, void *object) +{ + if (!check_slab(s, page)) + return 0; + + if (!check_valid_pointer(s, page, object, NULL)) + goto dumpret; + + if (!check_object(s, page, object, 1)) + goto dumpret; + + if (unlikely(s != page->slab)) { + if (!PageSlab(page)) + printk(KERN_CRIT "slab_free %s size %d: attempt to" + "free object(%p) outside of slab.\n", + s->name, s->size, object); + else + if (!page->slab) { + printk(KERN_CRIT + "slab_free : no slab(NULL) for object %p.\n", + object); + goto dumpret; + } else + printk(KERN_CRIT "slab_free %s(%d): object at %p" + " belongs to slab %s(%d)\n", + s->name, s->size, object, + page->slab->name, page->slab->size); + goto dumpret; + } + if (s->flags & SLAB_TRACE) { + printk("SLUB-Trace %s free object=%p slab=%p" + "inuse=%d freelist=%p\n", + s->name, object, page, page->inuse, + page->freelist); + print_section("SLUB-Trace", object, min(s->objsize, 128)); + dump_stack(); + } + init_object(s, object, 0); + if (s->flags & SLAB_STORE_USER) + set_tracking(s, object, 1); + return 1; + +dumpret: + dump_stack(); + printk(KERN_CRIT "***** Trying to continue by not " + "freeing object.\n"); + return 0; +} + /* * Slab allocation and freeing */ @@ -318,7 +549,7 @@ static struct page *allocate_slab(struct NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, pages); - if (unlikely(s->ctor)) { + if (unlikely(s->ctor) || (s->flags & SLAB_STORE_USER)) { void *start = page_address(page); void *end = start + (pages << PAGE_SHIFT); void *p; @@ -356,7 +587,8 @@ static struct page *new_slab(struct kmem page->slab = s; page->flags |= 1 << PG_slab; if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON | - SLAB_STORE_USER | SLAB_TRACE) || s->objects == 1) + SLAB_STORE_USER | SLAB_TRACE | SLAB_SANITY) || + s->objects == 1) page->flags |= 1 << PG_error; if (s->objects > 1) { @@ -365,23 +597,24 @@ static struct page *new_slab(struct kmem void **last = start; void *p = start + s->size; + if (unlikely(s->flags & SLAB_POISON)) + memset(start, POISON_INUSE, PAGE_SIZE << s->order); while (p < end) { - init_object(s, last, 0); + if (PageError(page)) { + init_object(s, last, 0); + init_tracking(s, last); + } last[s->offset] = p; last = p; p += s->size; } - init_object(s, last, 0); last[s->offset] = NULL; page->freelist = start; page->inuse = 0; - check_free_chain(s, page); - if (s->flags & SLAB_POISON) { - unsigned long leftover = start + - (PAGE_SIZE << s->order) - end; - - if (leftover) - memset(end, POISON_INUSE, leftover); + if (PageError(page)) { + init_object(s, last, 0); + init_tracking(s, last); + check_free_chain(s, page); } } @@ -396,13 +629,21 @@ static void __free_slab(struct kmem_cach { int pages = 1 << s->order; - if (unlikely(s->dtor)) { + if (unlikely(PageError(page))) { void *start = page_address(page); void *end = start + (pages << PAGE_SHIFT); void *p; - for (p = start; p <= end - s->size; p += s->size) - s->dtor(p, s, 0); + for (p = start; p <= end - s->size; p += s->size) { + if (s->dtor) + s->dtor(p, s, 0); + else + check_object(s, page, p, 0); + } + if ((s->flags & SLAB_POISON) && + check_bytes(end, POISON_INUSE, + (PAGE_SIZE << s->order) - (end - start))) + object_err(s, page, p, "Slabend filler"); } mod_zone_page_state(page_zone(page), @@ -704,7 +945,6 @@ static __always_inline void *__slab_allo { struct page *page; void **object; - void *next_object; unsigned long flags; int cpu; @@ -715,32 +955,19 @@ static __always_inline void *__slab_allo goto new_slab; slab_lock(page); - if (unlikely(PageError(page))) - check_free_chain(s, page); - if (unlikely(!page->freelist)) - goto another_slab; - if (unlikely(node != -1 && page_to_nid(page) != node)) goto another_slab; redo: + if (unlikely(!page->freelist)) + goto another_slab; + if (unlikely(PageError(page))) + alloc_object_checks(s, page, page->freelist); page->inuse++; object = page->freelist; - page->freelist = next_object = object[page->offset]; + page->freelist = object[page->offset]; SetPageReferenced(page); slab_unlock(page); local_irq_restore(flags); - if (unlikely(PageError(page))) { - if (!check_object(s, page, object, 0)) - dump_stack(); - init_object(s, object, 1); - if (s->flags & SLAB_TRACE) { - printk("SLUB-Trace %s alloc node=%d gfp=%4lx " - "object=%p slab=%p inuse=%d freelist=%p\n", - s->name, node, (unsigned long)gfpflags, - object, page, page->inuse, page->freelist); - dump_stack(); - } - } return object; another_slab: @@ -770,6 +997,7 @@ gotpage: discard_slab(s, page); page = s->cpu_slab[cpu]; slab_lock(page); + } else s->cpu_slab[cpu] = page; @@ -819,23 +1047,10 @@ void kmem_cache_free(struct kmem_cache * local_irq_save(flags); if (unlikely(PageError(page))) { - if (unlikely(s != page->slab)) - goto slab_mismatch; if (s->objects == 1) goto single_object_slab; - if (!check_valid_pointer(s, page, object, NULL)) - goto dumpret; - if (!check_object(s, page, object, 1)) - goto dumpret; - if (s->flags & SLAB_TRACE) { - printk("SLUB-Trace %s free object=%p slab=%p" - "inuse=%d freelist=%p\n", - s->name, object, page, page->inuse, - page->freelist); - print_section("SLUB-Trace", x, min(s->objsize, 128)); - dump_stack(); - } - init_object(s, object, 0); + if (!free_object_checks(s, page, x)) + goto out; } slab_lock(page); @@ -882,31 +1097,6 @@ double_free: dump_stack(); goto out_unlock; -slab_mismatch: - if (!PageSlab(page)) { - printk(KERN_CRIT "slab_free %s size %d: attempt to free " - "object(%p) outside of slab.\n", - s->name, s->size, object); - goto dumpret; - } - - if (!page->slab) { - printk(KERN_CRIT - "slab_free : no slab(NULL) for object %p.\n", - object); - goto dumpret; - } - - printk(KERN_CRIT "slab_free %s(%d): object at %p" - " belongs to slab %s(%d)\n", - s->name, s->size, object, - page->slab->name, page->slab->size); - -dumpret: - dump_stack(); - printk(KERN_CRIT "***** Trying to continue by not " - "freeing object.\n"); - goto out; } EXPORT_SYMBOL(kmem_cache_free); @@ -962,7 +1152,8 @@ static int slub_nomerge = 0; /* * Debug settings */ -static int slub_debug = 0; +// static int slub_debug = 0; +static int slub_debug = SLAB_SANITY; static char *slub_debug_slabs = NULL; static int calculate_order(int size) @@ -1106,9 +1297,9 @@ int kmem_cache_open(struct kmem_cache *s flags |= slub_debug; if ((flags & SLAB_POISON) && ((flags & SLAB_DESTROY_BY_RCU) || - s->ctor || s->dtor)) { + ctor || dtor)) { if (!(slub_debug & SLAB_POISON)) - printk(KERN_WARNING "SLUB %s: Clear SLAB_POISON " + printk(KERN_WARNING "SLUB %s: Clearing SLAB_POISON " "because de/constructor exists.\n", s->name); flags &= ~SLAB_POISON; @@ -1147,6 +1338,9 @@ int kmem_cache_open(struct kmem_cache *s size += sizeof(void *); } + if (flags & SLAB_STORE_USER) + size += 2 * sizeof(void *); + align = calculate_alignment(flags, align); size = ALIGN(size, align); @@ -1173,8 +1367,8 @@ int kmem_cache_open(struct kmem_cache *s } error: if (flags & SLAB_PANIC) - panic("Cannot create slab %s size=%ld realsize=%d " - "order=%d offset=%d flags=%lx\n", + panic("Cannot create slab %s size=%lu realsize=%u " + "order=%u offset=%u flags=%lx\n", s->name, (unsigned long)size, s->size, s->order, s->offset, flags); return 0; @@ -1380,6 +1574,7 @@ static int __init setup_slub_debug(char case 'p' : case 'P' : slub_debug |= SLAB_POISON;break; case 'u' : case 'U' : slub_debug |= SLAB_STORE_USER;break; case 't' : case 'T' : slub_debug |= SLAB_TRACE;break; + case 's' : case 'S' : slub_debug |= SLAB_SANITY;break; default: printk(KERN_CRIT "slub_debug option '%c' unknown. skipped\n",*str); } @@ -1445,7 +1640,8 @@ static struct kmem_cache *get_slab(size_ } #endif - text = kasprintf(flags, "kmalloc_dma-%d", (unsigned int)realsize); + text = kasprintf(flags, "kmalloc_dma-%d", + (unsigned int)realsize); s = create_kmalloc_cache(x, text, realsize, flags); kmalloc_caches_dma[index] = s; return s; @@ -1558,6 +1754,7 @@ static struct kmem_cache *kmem_cache_dup atomic_inc(&s->refcount); + down_write(&slabstat_sem); if (!s->aliases) s->aliases = kstrdup(name, flags); else { @@ -1569,30 +1766,53 @@ static struct kmem_cache *kmem_cache_dup kfree(s->aliases); s->aliases = x; } + up_write(&slabstat_sem); return s; } /* * Find a mergeable slab cache */ -static struct kmem_cache *find_mergeable(unsigned long size, unsigned long flags) +static struct kmem_cache *find_mergeable(size_t size, + size_t align, unsigned long flags, + void (*ctor)(void *, struct kmem_cache *, unsigned long), + void (*dtor)(void *, struct kmem_cache *, unsigned long)) { struct list_head *h; if (slub_nomerge || (flags & SLUB_NEVER_MERGE)) return NULL; + if (ctor || dtor) + return NULL; + + size = ALIGN(size, sizeof(void *)); + align = calculate_alignment(flags, align); + size = ALIGN(size, align); + down_read(&slabstat_sem); list_for_each(h, &slab_caches) { struct kmem_cache *s = container_of(h, struct kmem_cache, list); - if (s->size >= size && - !(s->flags & SLUB_NEVER_MERGE) && - s->size - size <= sizeof(void *)) { - up_read(&slabstat_sem); - return s; - } + if (size > s->size) + continue; + + if (s->flags & SLUB_NEVER_MERGE) + continue; + + /* + * Check if alignment is compatible. + * Courtesy of Adrian Drzewiecki + */ + if ((s->size & ~(align -1)) != s->size) + continue; + + if (s->size - size >= sizeof(void *)) + continue; + + up_read(&slabstat_sem); + return s; } up_read(&slabstat_sem); return NULL; @@ -1603,13 +1823,9 @@ struct kmem_cache *kmem_cache_create(con void (*ctor)(void *, struct kmem_cache *, unsigned long), void (*dtor)(void *, struct kmem_cache *, unsigned long)) { - struct kmem_cache *s = NULL; - - if (!ctor && !dtor) - s = find_mergeable( - ALIGN(size, calculate_alignment(flags, align)), - flags); + struct kmem_cache *s; + s = find_mergeable(size, align, flags, dtor, ctor); if (s) { printk(KERN_INFO "SLUB: Merging slab_cache %s size %d" " with slab_cache %s size %d\n", @@ -1617,6 +1833,7 @@ struct kmem_cache *kmem_cache_create(con return kmem_cache_dup(s, GFP_KERNEL, name); } + /* This needs to go elsewhere at some point */ if (nr_cpu_ids) kmem_size = sizeof(struct kmem_cache) - (NR_CPUS - nr_cpu_ids) * sizeof(struct page *); @@ -1625,7 +1842,6 @@ struct kmem_cache *kmem_cache_create(con if (s && kmem_cache_open(s, GFP_KERNEL, name, size, align, flags, ctor, dtor)) return s; - kfree(s); return NULL; } @@ -1760,6 +1976,8 @@ static int s_show(struct seq_file *m, vo *d++ = 'P'; if (s->flags & SLAB_TRACE) *d++ = 'T'; + if (s->flags & SLAB_SANITY) + *d++ = 's'; *d = 0; Index: linux-2.6.21-rc1/include/linux/slab.h =================================================================== --- linux-2.6.21-rc1.orig/include/linux/slab.h 2007-02-27 16:20:41.000000000 -0800 +++ linux-2.6.21-rc1/include/linux/slab.h 2007-02-27 16:21:06.000000000 -0800 @@ -33,6 +33,7 @@ typedef struct kmem_cache kmem_cache_t _ #define SLAB_DESTROY_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */ #define SLAB_MEM_SPREAD 0x00100000UL /* Spread some memory over cpuset */ #define SLAB_TRACE 0x00200000UL /* Trace allocations and frees */ +#define SLAB_SANITY 0x00400000UL /* Enable sanity checks */ /* Flags passed to a constructor functions */ #define SLAB_CTOR_CONSTRUCTOR 0x001UL /* If not set, then deconstructor */