Index: linux-2.6.19-rc6-mm1/mm/slabifier.c =================================================================== --- linux-2.6.19-rc6-mm1.orig/mm/slabifier.c 2006-11-27 17:28:47.000000000 -0800 +++ linux-2.6.19-rc6-mm1/mm/slabifier.c 2006-11-27 17:28:48.000000000 -0800 @@ -33,7 +33,7 @@ int offset; /* Free pointer offset. */ int objects; /* Number of objects in slab */ int fallback; /* Last fallback node */ - atomic_long_t nr_partial; + nodemask_t partials; /* Nodes that have partial slabs */ struct page *partial[MAX_NUMNODES]; struct page *active[NR_CPUS]; }; @@ -87,7 +87,9 @@ oldpage = s->partial[node]; page->lru.next = (void *)oldpage; } while (cmpxchg(&s->partial[node], oldpage, page) != oldpage); - atomic_long_inc(&s->nr_partial); + + if (!oldpage) + node_set(s->partials, node); } /* @@ -100,31 +102,42 @@ int fallback; redo: - if (!atomic_read(&s->nr_partial)) - return NULL; - page = s->partial[searchnode]; - if (page) { - if (cmpxchg(&s->partial[node], page, page->lru.next) != page) + if (likely(page)) { + struct page *n = page->lru.next; + + if (!n) + /* We depopulate the partials of a node. + * Must do this before the cmpxchg since a + * concurrent add immediately after the cmpxchg + * may add a node again and we do not want to have + * to deal with cleared node with partial blocks. + * We can handle a set node with no partial block though. + */ + node_clear(s->partials, node); + + if (unlikely(cmpxchg(&s->partial[node], page, n) != page)) { + /* Hmmm... Regardless there are still partials here */ + node_set(s->partials, node); goto redo; - atomic_long_dec(&s->nr_partial); + } + return page; } - #ifdef CONFIG_NUMA - fallback = s->fallback; - do { - /* sequentially allocate all partials from other nodes .... */ - if (s->partial[fallback]) { - searchnode = fallback; - goto redo; - } - fallback++; + /* Node has no partials */ + node_clear(s->partials, node); + if (!nodes_empty(s->partials)) { + fallback = s->fallback; + fallback = next_node(s->fallback, s->partials); + if (fallback == MAX_NUMNODES) - fallback = 0; + fallback = first_node(s->partials); - } while (fallback != s->fallback); + s->fallback = fallback; + goto redo; + } #endif return NULL; } @@ -641,7 +654,6 @@ s->objects = (PAGE_SIZE << sc->order) / s->size; BUG_ON(s->objects > 65535); atomic_long_set(&s->nr_slabs, 0); - atomic_set(&s->nr_partial, 0); #ifdef CONFIG_SMP atomic_set(&s->active_cpus, 0); INIT_WORK(&s->flush, &flusher, s); @@ -649,11 +661,11 @@ if (!s->objects) return NULL; + nodes_clear(s->partials); memset(s->partial, 0, sizeof(s->partial)); + memset(s->active, 0, sizeof(s->active)); atomic_set(&s->refcount, 1); mutex_init(&s->flushing); - for_each_possible_cpu(cpu) - s->active[cpu] = NULL; return &s->sc; } @@ -760,15 +772,21 @@ unsigned long flags; int slabs_freed = 0; int i; + struct page *lastpage = NULL; drain_all(s); local_irq_save(flags); - for(i = 0; atomic_read(&s->nr_partial) > 1 && i < atomic_read(&s->nr_partial) - 1; i++ ) { + for(i = 0; i < atomic_read(&s->nr_slabs); i++ ) { struct page * page; page = get_partial(s, -1); - if (!page) + /* + * If there is no partial page left or we have just + * a single page (same page is returned to us) then + * stop. + */ + if (!page || page == lastpage) break; /* @@ -788,6 +806,7 @@ */ __ClearPageActive(page); putback_slab(s, page); + lastpage = page; } local_irq_restore(flags); return slabs_freed; @@ -808,7 +827,7 @@ struct page *page; int node; - for_each_node(node) { + for_each_nodemask(node, s->partials) { page = s->partial[node]; while (!page) { BUG_ON(!pfn_valid(page_to_pfn(page)) || page->inuse > s->objects); @@ -850,7 +869,7 @@ struct page *page; int node; - for_each_node(node) { + for_each_nodemask(node, s->partials) { page = s->partial[node]; while (!page && pfn_valid(page_to_pfn(page)) && page->inuse < s->objects) count += page->inuse; @@ -859,28 +878,51 @@ return count; } -static unsigned long slab_objects(struct slab_cache *sc, - unsigned long *p_total, unsigned long *p_active, - unsigned long *p_partial) +/* + * This is racy and may produce weird results. We check the page pointers + * carefully to see if they are still valid. + */ +static unsigned long count_partials(struct slab *s) +{ + int count = 0; + struct page *page; + int node; + + for_each_nodemask(node, s->partials) { + page = s->partial[node]; + while (!page && pfn_valid(page_to_pfn(page)) && page->inuse < s->objects) + count++; + page = (void *)page->lru.next; + } + return count; +} + +static unsigned int count_active(struct slab *s) { - struct slab *s = (void *)sc; - int partial = count_objects(s); - int nr_slabs = atomic_read(&s->nr_slabs); - int active = 0; /* Active slabs */ - int nr_active = 0; /* Objects in active slabs */ int cpu; + int count = 0; for_each_possible_cpu(cpu) { struct page *page = s->active[cpu]; - if (page) { - nr_active++; - active += page->inuse; - } + if (page) + count++; } + return count; +} +static unsigned long slab_objects(struct slab_cache *sc, + unsigned long *p_total, unsigned long *p_active, + unsigned long *p_partial) +{ + struct slab *s = (void *)sc; + int partial = count_objects(s); + int active = count_active(s); + int nr_partial = count_partials(s); + int nr_slabs = atomic_read(&s->nr_slabs); + int nr_active = 0; /* Objects in active slabs */ if (p_partial) - *p_partial = atomic_long_read(&s->nr_partial); + *p_partial = nr_partial; if (p_active) *p_active = nr_active; @@ -889,7 +931,7 @@ *p_total = nr_slabs; return partial + active + - (nr_slabs - atomic_read(&s->nr_partial) - nr_active) * s->objects; + (nr_slabs - nr_partial - nr_active) * s->objects; } const struct slab_allocator slabifier_allocator = {