Index: linux-2.6.18-rc4-mm3/mm/slabifier.c =================================================================== --- linux-2.6.18-rc4-mm3.orig/mm/slabifier.c 2006-08-29 23:42:51.968310327 -0700 +++ linux-2.6.18-rc4-mm3/mm/slabifier.c 2006-08-29 23:54:02.087058393 -0700 @@ -12,7 +12,7 @@ #include #include -// #define SLABIFIER_DEBUG +#define SLABIFIER_DEBUG #ifdef SLABIFIER_DEBUG #define DBUG_ON(_x) BUG_ON(_x) @@ -20,6 +20,13 @@ #define DBUG_ON(_x) #endif +struct active_info { + struct page *page; + void *freelist; + int inuse; + int referenced; + ZONE_PADDING(xx); /* Fill up the cacheline */ +}; struct slab { struct slab_cache sc; @@ -37,7 +44,8 @@ struct slab { spinlock_t list_lock; struct list_head partial; unsigned long nr_partial; - struct page *active[NR_CPUS]; + ZONE_PADDING(xx); + struct active_info active[NR_CPUS]; }; /* @@ -90,17 +98,6 @@ static __always_inline void set_object_p page->index = (unsigned long)object; } -static __always_inline void *get_active_pointer(struct page *page) -{ - return (void *)page->lru.prev; -} - -static __always_inline void set_active_pointer(struct page *page, - void *object) -{ - page->lru.prev = object; -} - static __always_inline struct slab *get_slab(struct page *page) { return (struct slab *)page->mapping; @@ -137,32 +134,6 @@ static __always_inline int get_object_co return *object_counter(page); } -static __always_inline int *active_counter(struct page *page) -{ - return (int *)&page->lru.next; -} - -static __always_inline void inc_active_counter(struct page *page) -{ - (*active_counter(page))++; -} - -static __always_inline void dec_active_counter(struct page *page) -{ - (*active_counter(page))--; -} - -static __always_inline void set_active_counter(struct page *page, - int counter) -{ - *active_counter(page) = counter; -} - -static __always_inline int get_active_counter(struct page *page) -{ - return *active_counter(page); -} - /* * Locking for each individual slab using the pagelock */ @@ -442,34 +413,34 @@ static void __always_inline putback_slab } } -static void deactivate_slab(struct slab *s, struct page *page) +static void deactivate_slab(struct slab *s, struct active_info *ai) { - void *freelist; - - check_active_slab(page); - slab_lock(page); - freelist = get_active_pointer(page); - if (unlikely(freelist)) { - /* Merge freelists */ - if (get_object_pointer(page)) { - while (freelist) { - void **x = freelist; + check_active_slab(ai->page); + slab_lock(ai->page); + if (unlikely(ai->freelist)) { + /* Deal with rare situations when the freelist was left */ + printk(KERN_CRIT "Deactivating slab %p that was not fully allocated\n", ai->page); + if (get_object_pointer(ai->page)) { + printk(KERN_CRIT "Merging freelists %p act-inuse=%d page-inuse=%d\n",ai->page, ai->inuse, + get_object_counter(ai->page)); + while (ai->freelist) { + void **x = ai->freelist; /* Remove object from active freelist */ - freelist = x[s->offset]; + ai->freelist = x[s->offset]; /* Push onto object freelist */ - x[s->offset] = get_object_pointer(page); - set_object_pointer(page, x); - dec_object_counter(page); + x[s->offset] = get_object_pointer(ai->page); + set_object_pointer(ai->page, x); + dec_object_counter(ai->page); } } else { - set_object_pointer(page, freelist); - set_object_counter(page, get_active_counter(page)); + printk("Adopting active freelist %p inuse=%d\n", ai->page, ai->inuse); + set_object_pointer(ai->page, ai->freelist); + set_object_counter(ai->page, ai->inuse); } } - ClearPageReferenced(page); - putback_slab(s, page); + putback_slab(s, ai->page); } /* @@ -479,13 +450,12 @@ static void deactivate_slab(struct slab static void flush_active(void *d) { struct slab *s = d; - int cpu = smp_processor_id(); - struct page *page = s->active[cpu]; + struct active_info *ai = &s->active[smp_processor_id()]; - if (page) { - s->active[cpu] = NULL; - ClearPageReferenced(page); - deactivate_slab(s, page); + if (ai->page) { + ai->referenced = 0; + deactivate_slab(s, ai); + ai->page = NULL; } } @@ -500,18 +470,17 @@ static void flush_active(void *d) static void check_flush_active(void *d) { struct slab *s = d; - int cpu = smp_processor_id(); - struct page *page = s->active[cpu]; + struct active_info *ai = &s->active[smp_processor_id()]; - if (!page) + if (!ai->page) return; - if (PageReferenced(page)) { - ClearPageReferenced(page); + if (ai->referenced) { + ai->referenced = 0; atomic_inc(&s->active_cpus); } else { - deactivate_slab(s, page); - s->active[cpu] = NULL; + deactivate_slab(s, ai); + ai->page = NULL; } } @@ -605,8 +574,11 @@ static struct slab_cache *slab_create(st atomic_set(&s->refcount, 1); spin_lock_init(&s->list_lock); mutex_init(&s->flushing); - for_each_possible_cpu(cpu) - s->active[cpu] = NULL; + for_each_possible_cpu(cpu) { + struct active_info *ai = &s->active[cpu]; + + ai->page = NULL; + } return &s->sc; } @@ -618,7 +590,7 @@ static struct slab_cache *slab_create(st * * Return NULL if we cannot reload. */ -static struct page *reload(struct slab *s, unsigned long cpu, gfp_t flags, +static struct page *reload(struct slab *s, struct active_info *ai, gfp_t flags, int node) { void *p, *start, *end; @@ -640,7 +612,7 @@ static struct page *reload(struct slab * * slab operation on the cache. Setting active slab to NULL * will allow the other slab operation to allocate a page. */ - s->active[cpu] = NULL; + ai->page = NULL; if ((flags & __GFP_WAIT)) { local_irq_enable(); page = new_slab(s, flags, node); @@ -667,30 +639,26 @@ static struct page *reload(struct slab * check_free_chain(s, page); /* We droppped the lock .... */ - if (s->active[cpu]) { - + if (ai->page) { add_partial(s, page); - return s->active[cpu]; + return ai->page; } gotpage: /* - * Now we have a page that is isolated from the lists, - */ - ClearPageReferenced(page); - - - /* * An active page will appear to slab_free like * a full page but will have a shadow freelist * and a shadow counter. */ - set_active_pointer(page, get_object_pointer(page)); + ai->referenced = 0; + ai->page = page; + ai->inuse = get_object_counter(page); + ai->freelist = get_object_pointer(page); + set_object_pointer(page, NULL); - set_active_counter(page, get_object_counter(page)); set_object_counter(page, s->objects); - s->active[cpu] = page; - slab_unlock(page); + check_free_chain(s, page); + slab_unlock(page); #ifdef CONFIG_SMP if (keventd_up() && !atomic_read(&s->active_cpus)) { @@ -706,11 +674,9 @@ static __always_inline void *__slab_allo gfp_t gfpflags, int node) { struct slab *s = (void *)sc; - struct page *page; void **object; - void **freelist; + struct active_info *ai; unsigned long flags; - int cpu; if (unlikely(s->objects == 1)) { struct page *page = new_slab(s, gfpflags, node); @@ -722,52 +688,50 @@ static __always_inline void *__slab_allo } local_irq_save(flags); - cpu = smp_processor_id(); - page = s->active[cpu]; - if (unlikely(!page)) { + ai = &s->active[smp_processor_id()]; + if (unlikely(!ai->page)) { load: - page = reload(s, cpu, gfpflags, node); - s->active[cpu] = page; - if (unlikely(!page)) { + if (unlikely(!reload(s, ai, gfpflags, node))) { local_irq_restore(flags); return NULL; } } - check_active_slab(page); - freelist = get_active_pointer(page); - if (unlikely(!freelist)) { + check_active_slab(ai->page); + if (unlikely(!ai->freelist)) { /* * Free list exhausted. Now we need to see if any additional * frees have occurred in the meantime on this slab. Then * we use the build up free list. */ - if (get_object_pointer(page)) { - slab_lock(page); - freelist = get_object_pointer(page); - set_object_pointer(page, NULL); - set_object_counter(page, s->objects); - set_active_counter(page, 0); - slab_unlock(page); + if (get_object_pointer(ai->page)) { + printk(KERN_CRIT "swizzle %s: %p concurrent frees=%d\n", + s->sc.name, ai->page, s->objects - get_object_counter(ai->page)); + slab_lock(ai->page); + ai->freelist = get_object_pointer(ai->page); + ai->inuse = 0; + set_object_pointer(ai->page, NULL); + set_object_counter(ai->page, s->objects); + slab_unlock(ai->page); } /* * If the above did not help us then we need a new slab */ - if (unlikely(!freelist) || - (node >= 0 && page_to_nid(page) != node)) { + if (unlikely(!ai->freelist) || + (node >= 0 && page_to_nid(ai->page) != node)) { /* Return a slab unfit for further allocation */ - deactivate_slab(s, page); + deactivate_slab(s, ai); goto load; } } - object = freelist; - set_active_pointer(page, freelist[s->offset]); - inc_active_counter(page); - SetPageReferenced(page); + object = ai->freelist; + ai->freelist = object[s->offset]; + ai->inuse++; + ai->referenced = 1; local_irq_restore(flags); return object; } @@ -803,6 +767,7 @@ static void slab_free(struct slab_cache struct page * page; void *prior; void **object = (void *)x; + struct active_info *ai; unsigned long flags; if (!object) @@ -847,11 +812,16 @@ dumpret: } local_irq_save(flags); - if (page == s->active[smp_processor_id()]) { - /* fast bypass to local active slab */ - object[s->offset] = get_active_pointer(page); - set_active_pointer(page, object); - dec_active_counter(page); + + /* + * If this is a free out of the current slab then there is no + * need for special checks. + */ + ai = &s->active[smp_processor_id()]; + if (ai->page == page) { + object[s->offset] = ai->freelist; + ai->freelist = object; + ai->inuse--; local_irq_restore(flags); return; } @@ -1091,7 +1061,7 @@ static unsigned long slab_objects(struct int cpu; for_each_possible_cpu(cpu) { - struct page *page = s->active[cpu]; + struct page *page = s->active[cpu].page; if (page) { nr_active++; Index: linux-2.6.18-rc4-mm3/include/linux/allocator.h =================================================================== --- linux-2.6.18-rc4-mm3.orig/include/linux/allocator.h 2006-08-29 11:08:02.691391970 -0700 +++ linux-2.6.18-rc4-mm3/include/linux/allocator.h 2006-08-29 23:46:16.395131407 -0700 @@ -132,7 +132,7 @@ struct slab_cache { struct slab_control { struct slab_cache sc; /* Common information */ void *data[50]; /* Some data */ - void *percpu[NR_CPUS]; /* Some per cpu information. */ + char percpu[NR_CPUS][L1_CACHE_BYTES]; /* Some per cpu information. */ }; struct slab_allocator {