Index: linux-2.6.21-rc2/mm/slub.c =================================================================== --- linux-2.6.21-rc2.orig/mm/slub.c 2007-02-28 23:41:42.000000000 -0800 +++ linux-2.6.21-rc2/mm/slub.c 2007-02-28 23:41:51.000000000 -0800 @@ -165,16 +165,12 @@ */ static void *get_freepointer(struct kmem_cache *s, void *object) { - void **p = object; - - return p[s->offset]; + return *(void **)(object + s->offset); } static void set_freepointer(struct kmem_cache *s, void *object, void *fp) { - void **p = object; - - p[s->offset] = fp; + *(void **)(object + s->offset) = fp; } /* @@ -208,7 +204,7 @@ static void print_trailer(struct kmem_cache *s, u8 *p) { - unsigned int off; + unsigned int off; /* Offset of last byte */ if (s->offset) off = s->offset + sizeof(void *); @@ -219,7 +215,8 @@ print_section("Redzone", p + s->objsize, s->inuse - s->objsize); - printk(KERN_ERR "FreePointer %p: %p\n", p + s->offset, + printk(KERN_ERR "FreePointer %p: %p\n", + p + s->offset, get_freepointer(s, p)); if (s->flags & SLAB_STORE_USER) { @@ -303,30 +300,57 @@ * Bytes of the object to be managed. * If the freepointer may overlay the object then the free * pointer is the first word of the object. + * Poisoning uses 0x6b (POISON_FREE) and the last byte is + * 0xa5 (POISON_END) + * * object + s->objsize * Padding to reach word boundary. This is also used for Redzoning. * Padding is extended to word size if Redzoning is enabled * and objsize == inuse. + * We fill with 0x71 (RED_INACTIVE) for inactive objects and with + * 0xa5 (RED_INACTIVE) for objects in use. + * * object + s->inuse * A. Free pointer (if we cannot overwrite object on free) * B. Tracking data for SLAB_STORE_USER * C. Padding to reach required alignment boundary + * Padding is done using 0x5a (POISON_INUSE) + * * object + s->size * * If slabcaches are merged then the objsize and inuse boundaries are to be ignored. + * And therefore no slab options that rely on these boundaries may be used with + * merged slabcaches. */ + +static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p) +{ + unsigned long off = s->inuse; /* The end of info */ + + if (s->offset) + /* Freepointer is placed after the object. */ + off += sizeof(void *); + + if (s->flags & SLAB_STORE_USER) + /* We also have user information there */ + off += 2 * sizeof(void *); + + if (s->size == off) + return 1; + + if (check_bytes(p + off, POISON_INUSE, s->size - off)) + return 1; + + object_err(s, page, p, "Padding check fails"); + return 0; +} + static int check_object(struct kmem_cache *s, struct page *page, void *object, int active) { u8 *p = object; u8 *endobject = object + s->objsize; - /* Offset of first byte after free pointer */ - unsigned long off = s->inuse; - - if (s->offset) - off += sizeof(void *); - /* Single object slabs do not get policed */ if (s->objects == 1) return 1; @@ -344,7 +368,7 @@ if ((s->flags & SLAB_POISON) && s->objsize < s->inuse && !check_bytes(endobject, POISON_INUSE, s->inuse - s->objsize)) - object_err(s, page, p, "Alignment Filler check fails"); + object_err(s, page, p, "Alignment Padding check fails"); if (s->flags & SLAB_POISON) { if (!active && (!check_bytes(p, POISON_FREE, s->objsize - 1) || @@ -352,16 +376,14 @@ object_err(s, page, p, "Poison"); return 0; } - if (s->size > off && !check_bytes(p + off, - POISON_INUSE, s->size - off)) - object_err(s, page, p, - "Interobject Filler check fails"); + if (!check_pad_bytes(s, page, p)) + return 0; } - if (s->offset == 0 && active) + if (!s->offset && active) /* * Object and freepointer overlap. Cannot check - * if object is allocated. + * freepointer while object is allocated. */ return 1; @@ -388,11 +410,11 @@ page_count(page)); return 0; } - if (page->offset != s->offset) { - printk(KERN_CRIT "SLUB: %s Corrupted offset %u in slab @%p" + if (page->offset * sizeof(void *) != s->offset) { + printk(KERN_CRIT "SLUB: %s Corrupted offset %lu in slab @%p" " flags=%lx mapping=%p count=%d\n", - s->name, page->offset, page, page->flags, - page->mapping, page_count(page)); + s->name, page->offset * sizeof(void *), page, + page->flags, page->mapping, page_count(page)); return 0; } if (page->inuse > s->objects) { @@ -500,7 +522,7 @@ } if (!check_valid_pointer(s, page, object)) { - printk(KERN_ERR "SLUB: %s slab %p invalid free pointer %p\n", + printk(KERN_ERR "SLUB: %s slab %p invalid object pointer %p\n", s->name, page, object); goto fail; } @@ -607,7 +629,7 @@ n = get_node(s, page_to_nid(page)); if (n) atomic_long_inc(&n->nr_slabs); - page->offset = s->offset; + page->offset = s->offset / sizeof(void *); page->slab = s; page->flags |= 1 << PG_slab; if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON | @@ -618,7 +640,7 @@ if (s->objects > 1) { void *start = page_address(page); void *end = start + s->objects * s->size; - void **last = start; + void *last = start; void *p = start + s->size; if (unlikely(s->flags & SLAB_POISON)) @@ -628,11 +650,11 @@ init_object(s, last, 0); init_tracking(s, last); } - last[s->offset] = p; + set_freepointer(s, last, p); last = p; p += s->size; } - last[s->offset] = NULL; + set_freepointer(s, last, NULL); page->freelist = start; page->inuse = 0; if (PageError(page)) { @@ -807,7 +829,7 @@ /* * Get a page from somewhere. Search in increasing NUMA - * distance. + * distances. */ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) { @@ -823,7 +845,7 @@ n = get_node(s, zone_to_nid(*z)); if (n && cpuset_zone_allowed_hardwall(*z, flags) && - n->nr_partial) { + n->nr_partial > 2) { page = get_partial_node(n); if (page) return page; @@ -979,9 +1001,9 @@ slab_lock(page); if (unlikely(node != -1 && page_to_nid(page) != node)) goto another_slab; +redo: if (unlikely(!page->freelist)) goto another_slab; -redo: object = page->freelist; if (unlikely(PageError(page))) { if (!alloc_object_checks(s, page, object)) @@ -1001,29 +1023,39 @@ new_slab: page = get_partial(s, gfpflags, node); - if (page) - goto gotpage; + if (unlikely(!page)) { - page = new_slab(s, gfpflags, node); - if (!page) { - local_irq_restore(flags); - return NULL; - } - - if (unlikely(s->objects == 1)) { - local_irq_restore(flags); - return page_address(page); - } + page = new_slab(s, gfpflags, node); + if (!page) { + local_irq_restore(flags); + return NULL; + } - slab_lock(page); + if (s->objects == 1) { + local_irq_restore(flags); + return page_address(page); + } -gotpage: - if (unlikely(s->cpu_slab[cpu])) { - slab_unlock(page); - discard_slab(s, page); - page = s->cpu_slab[cpu]; + if (s->cpu_slab[cpu]) { + /* + * Someone else populated the cpu_slab while + * we enabled interrupts. The page may not + * be on the required node. + */ + if (node == -1 || + page_to_nid(s->cpu_slab[cpu]) == node) { + /* + * Current cpuslab is acceptable and we + * want the current one since its cache hot + */ + discard_slab(s, page); + page = s->cpu_slab[cpu]; + slab_lock(page); + goto redo; + } else + deactivate_slab(s, s->cpu_slab[cpu], cpu); + } slab_lock(page); - goto redo; } s->cpu_slab[cpu] = page; @@ -1208,8 +1240,10 @@ static unsigned long calculate_alignment(unsigned long flags, unsigned long align) { - if (flags & (SLAB_MUST_HWCACHE_ALIGN|SLAB_HWCACHE_ALIGN)) + if (flags & SLAB_HWCACHE_ALIGN) return L1_CACHE_BYTES; + if (flags & SLAB_MUST_HWCACHE_ALIGN) + return max(align, (unsigned long)L1_CACHE_BYTES); if (align < ARCH_SLAB_MINALIGN) return ARCH_SLAB_MINALIGN; @@ -1360,9 +1394,9 @@ * kmem_cache_free. * * This is the case if we do RCU, have a constructor or - * destructor. + * destructor or are poisoning the objects. */ - s->offset = size / sizeof(void *); + s->offset = size; size += sizeof(void *); } @@ -1772,6 +1806,9 @@ #ifdef CONFIG_SMP register_cpu_notifier(&slab_notifier); #endif + printk("SLUB V3.1: General slabs=%d, HW alignment=%d, Options=%s\n", + KMALLOC_SHIFT_HIGH + KMALLOC_EXTRAS + 1 - KMALLOC_SHIFT_LOW, + L1_CACHE_BYTES, slub_debug); } static struct kmem_cache *kmem_cache_dup(struct kmem_cache *s, @@ -1898,7 +1935,7 @@ static void unregister_slab(struct kmem_cache *s) { down_write(&slabstat_sem); - list_add(&s->list, &slab_caches); + list_del(&s->list); up_write(&slabstat_sem); }