--- include/linux/mm_types.h | 2 include/linux/slub_def.h | 1 mm/slub.c | 135 +++++++++++++++++++++++++---------------------- 3 files changed, 74 insertions(+), 64 deletions(-) Index: slub/include/linux/mm_types.h =================================================================== --- slub.orig/include/linux/mm_types.h 2007-05-13 16:19:07.000000000 -0700 +++ slub/include/linux/mm_types.h 2007-05-13 16:38:07.000000000 -0700 @@ -26,7 +26,7 @@ struct page { */ struct { /* SLUB uses */ short unsigned int inuse; - short unsigned int offset; + short unsigned int cpu; }; }; union { Index: slub/include/linux/slub_def.h =================================================================== --- slub.orig/include/linux/slub_def.h 2007-05-13 16:18:48.000000000 -0700 +++ slub/include/linux/slub_def.h 2007-05-13 16:19:02.000000000 -0700 @@ -13,6 +13,7 @@ struct kmem_cache_cpu { struct page *page; + unsigned long offset; }; struct kmem_cache_node { Index: slub/mm/slub.c =================================================================== --- slub.orig/mm/slub.c 2007-05-13 15:15:33.000000000 -0700 +++ slub/mm/slub.c 2007-05-13 16:33:21.000000000 -0700 @@ -363,6 +363,19 @@ static inline void set_freepointer(struc *(void **)(object + s->offset) = fp; } +/* + * Fast version of get and set free pointer utilizing per cpu structures + */ +static inline void *get_fp_cpu(struct kmem_cache_cpu *c, void *object) +{ + return *(void **)(object + c->offset); +} + +static inline void set_fp_cpu(struct kmem_cache_cpu *c, void *object, void *fp) +{ + *(void **)(object + c->offset) = fp; +} + /* Loop over all objects in a slab */ #define for_each_object(__p, __s, __addr) \ for (__p = (__addr); __p < (__addr) + (__s)->objects * (__s)->size;\ @@ -733,15 +746,6 @@ static int check_slab(struct kmem_cache page_count(page)); return 0; } - if (page->offset * sizeof(void *) != s->offset) { - slab_err(s, page, "Corrupted offset %lu flags=0x%lx " - "mapping=0x%p count=%d", - (unsigned long)(page->offset * sizeof(void *)), - page->flags, - page->mapping, - page_count(page)); - return 0; - } if (page->inuse > s->objects) { slab_err(s, page, "inuse %u > max %u @0x%p flags=%lx " "mapping=0x%p count=%d", @@ -878,8 +882,6 @@ bad: s->name, page); page->inuse = s->objects; page->freelist = NULL; - /* Fix up fields that may be corrupted */ - page->offset = s->offset / sizeof(void *); } return 0; } @@ -977,29 +979,11 @@ __setup("slub_debug", setup_slub_debug); static void kmem_cache_open_debug_check(struct kmem_cache *s) { /* - * The page->offset field is only 16 bit wide. This is an offset - * in units of words from the beginning of an object. If the slab - * size is bigger then we cannot move the free pointer behind the - * object anymore. - * - * On 32 bit platforms the limit is 256k. On 64bit platforms - * the limit is 512k. - * - * Debugging or ctor may create a need to move the free - * pointer. Fail if this happens. + * Enable debugging if selected on the kernel commandline. */ - if (s->size >= 65535 * sizeof(void *)) { - BUG_ON(s->flags & (SLAB_RED_ZONE | SLAB_POISON | - SLAB_STORE_USER | SLAB_DESTROY_BY_RCU)); - BUG_ON(s->ctor); - } - else - /* - * Enable debugging if selected on the kernel commandline. - */ - if (slub_debug && (!slub_debug_slabs || - strncmp(slub_debug_slabs, s->name, - strlen(slub_debug_slabs)) == 0)) + if (slub_debug && (!slub_debug_slabs || + strncmp(slub_debug_slabs, s->name, + strlen(slub_debug_slabs)) == 0)) s->flags |= slub_debug; } #else @@ -1060,7 +1044,6 @@ static struct page *new_slab(struct kmem page->inuse = 0; page->lockless_freelist = NULL; - page->offset = s->offset / sizeof(void *); page->slab = s; start = page_address(page); @@ -1205,7 +1188,7 @@ static void remove_partial(struct kmem_c static inline int lock_and_freeze_slab(struct kmem_cache_node *n, struct page *page) { if (slab_trylock(page)) { - list_del(&page->lru); + list_del_init(&page->lru); n->nr_partial--; SetSlabFrozen(page); return 1; @@ -1219,6 +1202,7 @@ static inline int lock_and_freeze_slab(s static struct page *get_partial_node(struct kmem_cache_node *n) { struct page *page; + int nr_objects; /* * Racy check. If we mistakenly see no partial slabs then we @@ -1233,9 +1217,16 @@ static struct page *get_partial_node(str list_for_each_entry(page, &n->partial, lru) if (lock_and_freeze_slab(n, page)) goto out; - page = NULL; -out: spin_unlock(&n->list_lock); + return NULL; + +out: + nr_objects = s->objects - page->inuse; + while (nr_objects < s->min_objects && n->nr_partial) { + aux_page = next_page(n); + list_add(&aux_page->lru, &page->lru); + nr_objects += s->objects - page->inuse; + } return page; } @@ -1352,20 +1343,23 @@ static void deactivate_slab(struct kmem_ * because both freelists are empty. So this is unlikely * to occur. */ - while (unlikely(c->page->lockless_freelist)) { - void **object; - - /* Retrieve object from cpu_freelist */ - object = c->page->lockless_freelist; - c->page->lockless_freelist = c->page->lockless_freelist[c->page->offset]; - - /* And put onto the regular freelist */ - object[c->page->offset] = c->page->freelist; - c->page->freelist = object; - c->page->inuse--; + for_each_entry_safe(p, p2, c->page, lru) { + while (unlikely(p->lockless_freelist)) { + void **object; + + /* Retrieve object from cpu_freelist */ + object = p->lockless_freelist; + p->lockless_freelist = get_fp_cpu(c, p); + + /* And put onto the regular freelist */ + set_fp_cpu(c, object, p->freelist); + p->freelist = object; + p->inuse--; + } + list_del(&p->lru); + page->cpu = -1; + unfreeze_slab(s, c->page); } - c->page = NULL; - unfreeze_slab(s, c->page); } static void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) @@ -1441,9 +1435,10 @@ load_freelist: goto debug; object = c->page->freelist; - c->page->lockless_freelist = object[c->page->offset]; + c->page->lockless_freelist = get_fp_cpu(c, object); c->page->inuse = s->objects; c->page->freelist = NULL; + c->page->cpu = smp_processor_id(); slab_unlock(c->page); return object; @@ -1493,7 +1488,7 @@ debug: goto another_slab; c->page->inuse++; - c->page->freelist = object[c->page->offset]; + c->page->freelist = get_fp_cpu(c, object); slab_unlock(c->page); return object; } @@ -1517,15 +1512,28 @@ static void __always_inline *slab_alloc( local_irq_save(flags); c = get_kcpu(s, smp_processor_id()); - if (unlikely(!c->page || !c->page->lockless_freelist || + if (unlikely(!c->page || (node != -1 && page_to_nid(c->page) != node))) + goto slow; - object = __slab_alloc(s, gfpflags, node, addr, c); + if (!c->lockless->freelist) { + /* + * This pages freelist is exhausted. Maybe there are + * more object in another cpu slab ? + */ + struct page *next_page = lru_to_page(c->page.next); - else { - object = c->page->lockless_freelist; - c->page->lockless_freelist = object[c->page->offset]; + if (!next_page->lockless_freelist) + goto slow; + + c->page = next_page; } + object = c->page->lockless_freelist; + c->page->lockless_freelist = get_fp_cpu(c, object); + local_irq_restore(flags); + return object; +slow: + object = __slab_alloc(s, gfpflags, node, addr, c); local_irq_restore(flags); return object; } @@ -1553,7 +1561,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_node); * handling required then we can return immediately. */ static void __slab_free(struct kmem_cache *s, struct page *page, - void *x, void *addr) + void *x, void *addr, struct kmem_cache_cpu *c) { void *prior; void **object = (void *)x; @@ -1563,7 +1571,8 @@ static void __slab_free(struct kmem_cach if (unlikely(SlabDebug(page))) goto debug; checks_ok: - prior = object[page->offset] = page->freelist; + prior = page->freelist; + set_fp_cpu(c, object, prior); page->freelist = object; page->inuse--; @@ -1621,12 +1630,12 @@ static void __always_inline slab_free(st struct kmem_cache_cpu *c; local_irq_save(flags); - c = get_kcpu(s, smp_processor_id()); - if (likely(page == c->page && !SlabDebug(page))) { - object[page->offset] = page->lockless_freelist; + if (likely(page->cpu == smp_processor_id() && !SlabDebug(page))) { + c = get_kcpu(s, smp_processor_id()); + set_fp_cpu(c, object, page->lockless_freelist); page->lockless_freelist = object; } else - __slab_free(s, page, x, addr); + __slab_free(s, page, x, addr, c); local_irq_restore(flags); }