Index: linux-2.6.19-rc4-mm2/mm/slabifier.c =================================================================== --- linux-2.6.19-rc4-mm2.orig/mm/slabifier.c 2006-11-03 13:09:22.667986532 -0600 +++ linux-2.6.19-rc4-mm2/mm/slabifier.c 2006-11-03 13:11:03.624091753 -0600 @@ -32,9 +32,9 @@ struct slab { int size; /* Total size of an object */ int offset; /* Free pointer offset. */ int objects; /* Number of objects in slab */ - spinlock_t list_lock; - struct list_head partial; - unsigned long nr_partial; + int fallback; /* Last fallback node */ + atomic_long_t nr_partial; + struct page *partial[MAX_NUMNODES]; struct page *active[NR_CPUS]; }; @@ -44,7 +44,6 @@ struct slab { * * Lock order: * 1. slab_lock(page) - * 2. slab->list_lock * * The slabifier assigns one slab for allocation to each processor. * Allocations only occur from these active slabs. @@ -81,99 +80,54 @@ static __always_inline void slab_unlock( */ static void __always_inline add_partial(struct slab *s, struct page *page) { - spin_lock(&s->list_lock); - s->nr_partial++; - list_add_tail(&page->lru, &s->partial); - spin_unlock(&s->list_lock); -} - -static void __always_inline remove_partial(struct slab *s, - struct page *page) -{ - spin_lock(&s->list_lock); - list_del(&page->lru); - s->nr_partial--; - spin_unlock(&s->list_lock); -} + int node = page_to_nid(page); + struct page *oldpage; -/* - * Lock page and remove it from the partial list - * - * Must hold list_lock - */ -static __always_inline int lock_and_del_slab(struct slab *s, - struct page *page) -{ - if (bit_spin_trylock(PG_locked, &page->flags)) { - list_del(&page->lru); - s->nr_partial--; - return 1; - } - return 0; + do { + oldpage = s->partial[node]; + page->lru.next = (void *)oldpage; + } while (cmpxchg(&s->partial[node], oldpage, page) != oldpage); + atomic_long_inc(&s->nr_partial); } /* * Get a partial page, lock it and return it. */ -#ifdef CONFIG_NUMA static struct page *get_partial(struct slab *s, int node) { struct page *page; int searchnode = (node == -1) ? numa_node_id() : node; + int fallback; - if (!s->nr_partial) +redo: + if (!atomic_read(&s->nr_partial)) return NULL; - spin_lock(&s->list_lock); - /* - * Search for slab on the right node - */ - list_for_each_entry(page, &s->partial, lru) - if (likely(page_to_nid(page) == searchnode) && - lock_and_del_slab(s, page)) - goto out; + page = s->partial[searchnode]; - if (likely(node == -1)) { - /* - * We can fall back to any other node in order to - * reduce the size of the partial list. - */ - list_for_each_entry(page, &s->partial, lru) - if (likely(lock_and_del_slab(s, page))) - goto out; + if (page) { + if (cmpxchg(&s->partial[node], page, page->lru.next) != page) + goto redo; + atomic_long_dec(&s->nr_partial); + return page; } - /* Nothing found */ - page = NULL; -out: - spin_unlock(&s->list_lock); - return page; -} -#else -static struct page *get_partial(struct slab *s, int node) -{ - struct page *page; - - /* Racy check. If we mistakenly see no partial slabs then we - * just allocate an empty slab. If we mistakenly try to get a - * partial slab then get_partials() will return NULL. - */ - if (!s->nr_partial) - return NULL; - - spin_lock(&s->list_lock); - list_for_each_entry(page, &s->partial, lru) - if (likely(lock_and_del_slab(s, page))) - goto out; +#ifdef CONFIG_NUMA + fallback = s->fallback; + do { + /* sequentially allocate all partials from other nodes .... */ + if (s->partial[fallback]) { + searchnode = fallback; + goto redo; + } + fallback++; + if (fallback == MAX_NUMNODES) + fallback = 0; - /* No slab or all slabs busy */ - page = NULL; -out: - spin_unlock(&s->list_lock); - return page; -} + } while (fallback != s->fallback); #endif - + return NULL; +} /* * Debugging checks @@ -584,9 +538,12 @@ out_unlock: /* * All object have been freed. + * But we keep the block for future use or until we have a request + * to shrink the slab. */ - remove_partial(s, page); slab_unlock(page); + local_irq_restore(flags); + return; single_object_slab: discard_slab(s, page); local_irq_restore(flags); @@ -684,7 +641,7 @@ static struct slab_cache *slab_create(st s->objects = (PAGE_SIZE << sc->order) / s->size; BUG_ON(s->objects > 65535); atomic_long_set(&s->nr_slabs, 0); - s->nr_partial = 0; + atomic_set(&s->nr_partial, 0); #ifdef CONFIG_SMP atomic_set(&s->active_cpus, 0); INIT_WORK(&s->flush, &flusher, s); @@ -692,10 +649,8 @@ static struct slab_cache *slab_create(st if (!s->objects) return NULL; - INIT_LIST_HEAD(&s->partial); - + memset(s->partial, 0, sizeof(s->partial)); atomic_set(&s->refcount, 1); - spin_lock_init(&s->list_lock); mutex_init(&s->flushing); for_each_possible_cpu(cpu) s->active[cpu] = NULL; @@ -809,7 +764,7 @@ static int slab_shrink(struct slab_cache drain_all(s); local_irq_save(flags); - for(i = 0; s->nr_partial > 1 && i < s->nr_partial - 1; i++ ) { + for(i = 0; atomic_read(&s->nr_partial) > 1 && i < atomic_read(&s->nr_partial) - 1; i++ ) { struct page * page; page = get_partial(s, -1); @@ -847,20 +802,23 @@ static struct slab_cache *slab_dup(struc return &s->sc; } -static int free_list(struct slab *s, struct list_head *list) +static int free_list(struct slab *s) { int slabs_inuse = 0; - unsigned long flags; - struct page *page, *h; + struct page *page; + int node; - spin_lock_irqsave(&s->list_lock, flags); - list_for_each_entry_safe(page, h, list, lru) - if (!page->inuse) { - list_del(&s->partial); - discard_slab(s, page); - } else - slabs_inuse++; - spin_unlock_irqrestore(&s->list_lock, flags); + for_each_node(node) { + page = s->partial[node]; + while (!page) { + BUG_ON(!pfn_valid(page_to_pfn(page)) || page->inuse > s->objects); + if (!page->inuse) + discard_slab(s, page); + else + slabs_inuse++; + } + page = (void *)page->lru.next; + } return slabs_inuse; } @@ -872,7 +830,7 @@ static int slab_destroy(struct slab_cach return 0; drain_all(s); - free_list(s, &s->partial); + free_list(s); if (atomic_long_read(&s->nr_slabs)) return 1; @@ -882,16 +840,22 @@ static int slab_destroy(struct slab_cach return 0; } -static unsigned long count_objects(struct slab *s, struct list_head *list) +/* + * This is racy and may produce weird results. We check the page pointers + * carefully to see if they are still valid. + */ +static unsigned long count_objects(struct slab *s) { int count = 0; struct page *page; - unsigned long flags; + int node; - spin_lock_irqsave(&s->list_lock, flags); - list_for_each_entry(page, list, lru) - count += page->inuse; - spin_unlock_irqrestore(&s->list_lock, flags); + for_each_node(node) { + page = s->partial[node]; + while (!page && pfn_valid(page_to_pfn(page)) && page->inuse < s->objects) + count += page->inuse; + page = (void *)page->lru.next; + } return count; } @@ -900,7 +864,7 @@ static unsigned long slab_objects(struct unsigned long *p_partial) { struct slab *s = (void *)sc; - int partial = count_objects(s, &s->partial); + int partial = count_objects(s); int nr_slabs = atomic_read(&s->nr_slabs); int active = 0; /* Active slabs */ int nr_active = 0; /* Objects in active slabs */ @@ -916,7 +880,7 @@ static unsigned long slab_objects(struct } if (p_partial) - *p_partial = s->nr_partial; + *p_partial = atomic_long_read(&s->nr_partial); if (p_active) *p_active = nr_active; @@ -925,7 +889,7 @@ static unsigned long slab_objects(struct *p_total = nr_slabs; return partial + active + - (nr_slabs - s->nr_partial - nr_active) * s->objects; + (nr_slabs - atomic_read(&s->nr_partial) - nr_active) * s->objects; } const struct slab_allocator slabifier_allocator = { Index: linux-2.6.19-rc4-mm2/include/linux/allocator.h =================================================================== --- linux-2.6.19-rc4-mm2.orig/include/linux/allocator.h 2006-11-03 13:08:55.208820359 -0600 +++ linux-2.6.19-rc4-mm2/include/linux/allocator.h 2006-11-03 13:11:03.641671249 -0600 @@ -133,6 +133,7 @@ struct slab_control { struct slab_cache sc; /* Common information */ void *data[50]; /* Some data */ void *percpu[NR_CPUS]; /* Some per cpu information. */ + void *pernode[MAX_NUMNODES]; /* some per node data */ }; struct slab_allocator {