--- include/linux/mm_types.h | 11 - include/linux/slub_def.h | 7 - mm/slub.c | 259 ++++++++++++++++++++--------------------------- 3 files changed, 119 insertions(+), 158 deletions(-) Index: linux-2.6/include/linux/mm_types.h =================================================================== --- linux-2.6.orig/include/linux/mm_types.h 2008-02-06 20:41:59.751732152 -0800 +++ linux-2.6/include/linux/mm_types.h 2008-02-06 20:43:45.753969012 -0800 @@ -37,13 +37,7 @@ struct page { unsigned long flags; /* Atomic flags, some possibly * updated asynchronously */ atomic_t _count; /* Usage count, see below. */ - union { - atomic_t _mapcount; /* Count of ptes mapped in mms, - * to show when page is mapped - * & limit reverse map searches. - */ - unsigned int inuse; /* SLUB: Nr of objects */ - }; + atomic_t _mapcount; /* Count of ptes mapped in mms, */ union { struct { unsigned long private; /* Mapping-private opaque data: @@ -60,6 +54,7 @@ struct page { * it points to anon_vma object: * see PAGE_MAPPING_ANON below. */ + void *address; /* SLUB page address */ }; #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS spinlock_t ptl; @@ -69,7 +64,7 @@ struct page { }; union { pgoff_t index; /* Our offset within mapping. */ - void *freelist; /* SLUB: freelist req. slab lock */ + unsigned long freemap; /* SLUB: map of free objects */ }; struct list_head lru; /* Pageout list, eg. active_list * protected by zone->lru_lock ! Index: linux-2.6/include/linux/slub_def.h =================================================================== --- linux-2.6.orig/include/linux/slub_def.h 2008-02-06 20:41:59.763732392 -0800 +++ linux-2.6/include/linux/slub_def.h 2008-02-06 20:43:45.753969012 -0800 @@ -12,10 +12,11 @@ #include struct kmem_cache_cpu { - void **freelist; /* Pointer to first free per cpu object */ + unsigned long freemap; /* Bitmap of free objects */ struct page *page; /* The slab from which we are allocating */ + void *address; /* Page address */ + unsigned int size; /* Slab size */ int node; /* The node of the page (or -1 for debug) */ - unsigned int offset; /* Freepointer offset (in word units) */ unsigned int objsize; /* Size of an object (from kmem_cache) */ }; @@ -37,7 +38,7 @@ struct kmem_cache { unsigned long flags; int size; /* The size of an object including meta data */ int objsize; /* The size of an object without meta data */ - int offset; /* Free pointer offset. */ + unsigned long empty_freemap; /* Freemap of an empty slag */ int order; /* Index: linux-2.6/mm/slub.c =================================================================== --- linux-2.6.orig/mm/slub.c 2008-02-06 20:41:59.775732631 -0800 +++ linux-2.6/mm/slub.c 2008-02-06 20:53:27.916872836 -0800 @@ -288,7 +288,7 @@ static inline int check_valid_pointer(st if (!object) return 1; - base = page_address(page); + base = page->address; if (object < base || object >= base + s->objects * s->size || (object - base) % s->size) { return 0; @@ -297,31 +297,34 @@ static inline int check_valid_pointer(st return 1; } -/* - * Slow version of get and set free pointer. - * - * This version requires touching the cache lines of kmem_cache which - * we avoid to do in the fast alloc free paths. There we obtain the offset - * from the page struct. - */ -static inline void *get_freepointer(struct kmem_cache *s, void *object) +static inline int object_index(struct kmem_cache *s, struct page *page, + const void *addr) { - return *(void **)(object + s->offset); + unsigned long offset = addr - page->address; + + VM_BUG_ON(offset >= (PAGE_SIZE << s->order) || offset % s->size); + + return offset / s->size; } -static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) +static inline void *object_addr(struct kmem_cache *s, struct page *page, + int index) { - *(void **)(object + s->offset) = fp; + VM_BUG_ON(index < 0 || index >= s->objects); + + return page->address + index * s->size; } /* Loop over all objects in a slab */ -#define for_each_object(__p, __s, __addr) \ - for (__p = (__addr); __p < (__addr) + (__s)->objects * (__s)->size;\ - __p += (__s)->size) +#define for_each_object(__i, __p, __s, __page) \ + for (__i = 0; __i < s->objects; __i++) \ + if ((__p = object_addr(s, __page, __i)), 1) /* Scan freelist */ -#define for_each_free_object(__p, __s, __free) \ - for (__p = (__free); __p; __p = get_freepointer((__s), __p)) +#define for_each_free_object(__i, __p, __s, __page) \ + for (__i = 0; __i < s->objects; __i++) \ + if ((__p = object_addr(s, __page, __i)), \ + test_bit(__i, __page->freemap)) /* Determine object index from a given position */ static inline int slab_index(void *p, struct kmem_cache *s, void *addr) @@ -381,10 +384,7 @@ static struct track *get_track(struct km { struct track *p; - if (s->offset) - p = object + s->offset + sizeof(void *); - else - p = object + s->inuse; + p = object + s->inuse; return p + alloc; } @@ -394,10 +394,7 @@ static void set_track(struct kmem_cache { struct track *p; - if (s->offset) - p = object + s->offset + sizeof(void *); - else - p = object + s->inuse; + p = object + s->inuse; p += alloc; if (addr) { @@ -473,7 +470,7 @@ static void slab_fix(struct kmem_cache * static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) { unsigned int off; /* Offset of last byte */ - u8 *addr = page_address(page); + u8 *addr = page->address; print_tracking(s, p); @@ -491,10 +488,7 @@ static void print_trailer(struct kmem_ca print_section("Redzone", p + s->objsize, s->inuse - s->objsize); - if (s->offset) - off = s->offset + sizeof(void *); - else - off = s->inuse; + off = s->inuse; if (s->flags & SLAB_STORE_USER) off += 2 * sizeof(struct track); @@ -625,10 +619,6 @@ static int check_pad_bytes(struct kmem_c { unsigned long off = s->inuse; /* The end of info */ - if (s->offset) - /* Freepointer is placed after the object. */ - off += sizeof(void *); - if (s->flags & SLAB_STORE_USER) /* We also have user information there */ off += 2 * sizeof(struct track); @@ -651,7 +641,7 @@ static int slab_pad_check(struct kmem_ca if (!(s->flags & SLAB_POISON)) return 1; - start = page_address(page); + start = page->address; end = start + (PAGE_SIZE << s->order); length = s->objects * s->size; remainder = end - (start + length); @@ -703,13 +693,6 @@ static int check_object(struct kmem_cach check_pad_bytes(s, page, p); } - if (!s->offset && active) - /* - * Object and freepointer overlap. Cannot check - * freepointer while object is allocated. - */ - return 1; - /* Check free pointer validity */ if (!check_valid_pointer(s, page, get_freepointer(s, p))) { object_err(s, page, p, "Freepointer corrupt"); @@ -835,7 +818,7 @@ static void setup_object_debug(struct km } static int alloc_debug_processing(struct kmem_cache *s, struct page *page, - void *object, void *addr) + int index, void *addr) { if (!check_slab(s, page)) goto bad; @@ -875,7 +858,7 @@ bad: } static int free_debug_processing(struct kmem_cache *s, struct page *page, - void *object, void *addr) + int index, void *addr) { if (!check_slab(s, page)) goto fail; @@ -1018,10 +1001,10 @@ static inline void setup_object_debug(st struct page *page, void *object) {} static inline int alloc_debug_processing(struct kmem_cache *s, - struct page *page, void *object, void *addr) { return 0; } + struct page *page, int index, void *addr) { return 0; } static inline int free_debug_processing(struct kmem_cache *s, - struct page *page, void *object, void *addr) { return 0; } + struct page *page, int index, void *addr) { return 0; } static inline int slab_pad_check(struct kmem_cache *s, struct page *page) { return 1; } @@ -1081,9 +1064,8 @@ static struct page *new_slab(struct kmem { struct page *page; struct kmem_cache_node *n; - void *start; - void *last; void *p; + int index; BUG_ON(flags & GFP_SLAB_BUG_MASK); @@ -1096,27 +1078,23 @@ static struct page *new_slab(struct kmem if (n) atomic_long_inc(&n->nr_slabs); page->slab = s; + page->address = page_address(page); page->flags |= 1 << PG_slab; if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | SLAB_TRACE)) SetSlabDebug(page); - start = page_address(page); + page->freemap = s->empty_freemap; if (unlikely(s->flags & SLAB_POISON)) - memset(start, POISON_INUSE, PAGE_SIZE << s->order); + memset(page->address, POISON_INUSE, + PAGE_SIZE << s->order); - last = start; - for_each_object(p, s, start) { - setup_object(s, page, last); - set_freepointer(s, last, p); - last = p; + if (SlabDebug(page) || s->ctor) { + for_each_object(index, p, s, page) + setup_object(s, page, p); } - setup_object(s, page, last); - set_freepointer(s, last, NULL); - page->freelist = start; - page->inuse = 0; out: return page; } @@ -1127,9 +1105,10 @@ static void __free_slab(struct kmem_cach if (unlikely(SlabDebug(page))) { void *p; + int index; slab_pad_check(s, page); - for_each_object(p, s, page_address(page)) + for_each_object(index, p, s, page) check_object(s, page, p, 0); ClearSlabDebug(page); } @@ -1139,6 +1118,7 @@ static void __free_slab(struct kmem_cach NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, -pages); + page->address = NULL; __free_pages(page, s->order); } @@ -1339,9 +1319,9 @@ static void unfreeze_slab(struct kmem_ca struct kmem_cache_node *n = get_node(s, page_to_nid(page)); ClearSlabFrozen(page); - if (page->inuse) { + if (page->freemap != s->empty_freemap) { - if (page->freelist) + if (page->freemap) add_partial(n, page, tail); else if (SlabDebug(page) && (s->flags & SLAB_STORE_USER)) add_full(n, page); @@ -1372,27 +1352,12 @@ static void unfreeze_slab(struct kmem_ca static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) { struct page *page = c->page; - int tail = 1; - /* - * Merge cpu freelist into freelist. Typically we get here - * because both freelists are empty. So this is unlikely - * to occur. - */ - while (unlikely(c->freelist)) { - void **object; - - tail = 0; /* Hot objects. Put the slab first */ + int tail = c->freemap != 0; - /* Retrieve object from cpu_freelist */ - object = c->freelist; - c->freelist = c->freelist[c->offset]; - - /* And put onto the regular freelist */ - object[c->offset] = page->freelist; - page->freelist = object; - page->inuse--; - } + page->freemap |= c->freemap; + c->freemap = 0; c->page = NULL; + c->address = NULL; unfreeze_slab(s, page, tail); } @@ -1467,8 +1432,9 @@ static inline int node_match(struct kmem static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c) { - void **object; struct page *new; + int index; + unsigned long map; if (!c->page) goto new_slab; @@ -1477,19 +1443,19 @@ static void *__slab_alloc(struct kmem_ca if (unlikely(!node_match(c, node))) goto another_slab; load_freelist: - object = c->page->freelist; - if (unlikely(!object)) + map = c->page->freemap; + if (unlikely(!map)) goto another_slab; if (unlikely(SlabDebug(c->page))) goto debug; - object = c->page->freelist; - c->freelist = object[c->offset]; - c->page->inuse = s->objects; - c->page->freelist = NULL; - c->node = page_to_nid(c->page); + c->page->freemap = 0; + index = __ffs(map); + __clear_bit(index, &map); + c->freemap = map; +unlock_out: slab_unlock(c->page); - return object; + return object_addr(s, c->page, index); another_slab: deactivate_slab(s, c); @@ -1498,6 +1464,8 @@ new_slab: new = get_partial(s, gfpflags, node); if (new) { c->page = new; + c->address = new->address; + c->node = page_to_nid(new); goto load_freelist; } @@ -1516,19 +1484,19 @@ new_slab: slab_lock(new); SetSlabFrozen(new); c->page = new; + c->address = new->address; + c->node = page_to_nid(new); goto load_freelist; } return NULL; debug: - object = c->page->freelist; - if (!alloc_debug_processing(s, c->page, object, addr)) + index = __ffs(c->page->freemap); + if (!alloc_debug_processing(s, c->page, index, addr)) goto another_slab; - c->page->inuse++; - c->page->freelist = object[c->offset]; + __clear_bit(index, &c->page->freemap); c->node = -1; - slab_unlock(c->page); - return object; + goto unlock_out; } /* @@ -1547,16 +1515,21 @@ static __always_inline void *slab_alloc( void **object; unsigned long flags; struct kmem_cache_cpu *c; + unsigned long map; local_irq_save(flags); c = get_cpu_slab(s, smp_processor_id()); - if (unlikely(!c->freelist || !node_match(c, node))) + map = c->freemap; + if (unlikely(!map || !node_match(c, node))) object = __slab_alloc(s, gfpflags, node, addr, c); else { - object = c->freelist; - c->freelist = object[c->offset]; + int index; + + index = __ffs(map); + object = c->address + index * c->size; + __clear_bit(index, &c->freemap); } local_irq_restore(flags); @@ -1589,24 +1562,22 @@ EXPORT_SYMBOL(kmem_cache_alloc_node); * handling required then we can return immediately. */ static void __slab_free(struct kmem_cache *s, struct page *page, - void *x, void *addr, unsigned int offset) + int index, void *addr) { - void *prior; - void **object = (void *)x; + unsigned long prior; slab_lock(page); if (unlikely(SlabDebug(page))) goto debug; checks_ok: - prior = object[offset] = page->freelist; - page->freelist = object; - page->inuse--; + prior = page->freemap; + __set_bit(index, &page->freemap); if (unlikely(SlabFrozen(page))) goto out_unlock; - if (unlikely(!page->inuse)) + if (unlikely(page->freemap == s->empty_freemap)) goto slab_empty; /* @@ -1633,7 +1604,7 @@ slab_empty: return; debug: - if (!free_debug_processing(s, page, x, addr)) + if (!free_debug_processing(s, page, index, addr)) goto out_unlock; goto checks_ok; } @@ -1650,20 +1621,22 @@ debug: * with all sorts of special processing. */ static __always_inline void slab_free(struct kmem_cache *s, - struct page *page, void *x, void *addr) + struct page *page, const void *object, void *addr) { - void **object = (void *)x; unsigned long flags; struct kmem_cache_cpu *c; + int index; + unsigned long offset; local_irq_save(flags); - debug_check_no_locks_freed(object, s->objsize); c = get_cpu_slab(s, smp_processor_id()); - if (likely(page == c->page && c->node >= 0)) { - object[c->offset] = c->freelist; - c->freelist = object; - } else - __slab_free(s, page, x, addr, c->offset); + offset = object - page->address; + index = offset / c->size; +// debug_check_no_locks_freed(x, s->objsize); + if (likely(page == c->page && c->node >= 0)) + __set_bit(index, &c->freemap); + else + __slab_free(s, page, index, addr); local_irq_restore(flags); } @@ -1842,10 +1815,10 @@ static void init_kmem_cache_cpu(struct k struct kmem_cache_cpu *c) { c->page = NULL; - c->freelist = NULL; + c->freemap = 0; c->node = 0; - c->offset = s->offset / sizeof(void *); c->objsize = s->objsize; + c->size = s->size; } static void init_kmem_cache_node(struct kmem_cache_node *n) @@ -1890,7 +1863,7 @@ static struct kmem_cache_cpu *alloc_kmem if (c) per_cpu(kmem_cache_cpu_free, cpu) = - (void *)c->freelist; + (void *)c->freemap; else { /* Table overflow: So allocate ourselves */ c = kmalloc_node( @@ -1911,7 +1884,7 @@ static void free_kmem_cache_cpu(struct k kfree(c); return; } - c->freelist = (void *)per_cpu(kmem_cache_cpu_free, cpu); + c->freemap = (unsigned long)per_cpu(kmem_cache_cpu_free, cpu); per_cpu(kmem_cache_cpu_free, cpu) = c; } @@ -2134,24 +2107,10 @@ static int calculate_sizes(struct kmem_c /* * With that we have determined the number of bytes in actual use - * by the object. This is the potential offset to the free pointer. + * by the object. */ s->inuse = size; - if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) || - s->ctor)) { - /* - * Relocate free pointer after the object if it is not - * permitted to overwrite the first word of the object on - * kmem_cache_free. - * - * This is the case if we do RCU, have a constructor or - * destructor or are poisoning the objects. - */ - s->offset = size; - size += sizeof(void *); - } - #ifdef CONFIG_SLUB_DEBUG if (flags & SLAB_STORE_USER) /* @@ -2218,6 +2177,9 @@ static int kmem_cache_open(struct kmem_c #ifdef CONFIG_NUMA s->remote_node_defrag_ratio = 100; #endif + s->objects = min(s->objects, BITS_PER_LONG); + s->empty_freemap = -1UL >> (64 - s->objects); + BUG_ON(!s->empty_freemap); if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA)) goto error; @@ -2227,9 +2189,8 @@ static int kmem_cache_open(struct kmem_c error: if (flags & SLAB_PANIC) panic("Cannot create slab %s size=%lu realsize=%u " - "order=%u offset=%u flags=%lx\n", - s->name, (unsigned long)size, s->size, s->order, - s->offset, flags); + "order=%u flags=%lx\n", + s->name, (unsigned long)size, s->size, s->order, flags); return 0; } @@ -2287,7 +2248,7 @@ static int free_list(struct kmem_cache * spin_lock_irqsave(&n->list_lock, flags); list_for_each_entry_safe(page, h, list, lru) - if (!page->inuse) { + if (page->freemap == s->empty_freemap) { list_del(&page->lru); discard_slab(s, page); } else @@ -2610,7 +2571,7 @@ void kfree(const void *x) put_page(page); return; } - slab_free(page->slab, page, (void *)x, __builtin_return_address(0)); + slab_free(page->slab, page, x, __builtin_return_address(0)); } EXPORT_SYMBOL(kfree); @@ -2622,7 +2583,7 @@ static unsigned long count_partial(struc spin_lock_irqsave(&n->list_lock, flags); list_for_each_entry(page, &n->partial, lru) - x += page->inuse; + x += BITS_PER_LONG - hweight64(page->freemap); spin_unlock_irqrestore(&n->list_lock, flags); return x; } @@ -2670,7 +2631,9 @@ int kmem_cache_shrink(struct kmem_cache * list_lock. page->inuse here is the upper limit. */ list_for_each_entry_safe(page, t, &n->partial, lru) { - if (!page->inuse && slab_trylock(page)) { + unsigned long map = page->freemap; + + if (map == s->empty_freemap && slab_trylock(page)) { /* * Must hold slab lock here because slab_free * may have freed the last object and be @@ -2681,8 +2644,8 @@ int kmem_cache_shrink(struct kmem_cache slab_unlock(page); discard_slab(s, page); } else { - list_move(&page->lru, - slabs_by_inuse + page->inuse); + list_move(&page->lru, slabs_by_inuse + + BITS_PER_LONG - hweight64(page->freemap)); } } @@ -3104,7 +3067,8 @@ static int validate_slab(struct kmem_cac unsigned long *map) { void *p; - void *addr = page_address(page); + void *addr = page->address; + int index; if (!check_slab(s, page) || !on_freelist(s, page, NULL)) @@ -3119,7 +3083,7 @@ static int validate_slab(struct kmem_cac return 0; } - for_each_object(p, s, addr) + for_each_object(ndex, p, s, page) if (!test_bit(slab_index(p, s, addr), map)) if (!check_object(s, page, p, 1)) return 0; @@ -3384,15 +3348,16 @@ static int add_location(struct loc_track static void process_slab(struct loc_track *t, struct kmem_cache *s, struct page *page, enum track_item alloc) { - void *addr = page_address(page); + void *addr = page->address; DECLARE_BITMAP(map, s->objects); void *p; + int index; bitmap_zero(map, s->objects); for_each_free_object(p, s, page->freelist) set_bit(slab_index(p, s, addr), map); - for_each_object(p, s, addr) + for_each_object(index, p, s, page) if (!test_bit(slab_index(p, s, addr), map)) add_location(t, s, get_track(s, p, alloc)); }