Index: linux-2.6.18-rc6-mm2/mm/slabifier.c =================================================================== --- linux-2.6.18-rc6-mm2.orig/mm/slabifier.c 2006-09-13 19:44:32.008010541 -0500 +++ linux-2.6.18-rc6-mm2/mm/slabifier.c 2006-09-13 19:45:15.162021601 -0500 @@ -36,6 +36,9 @@ struct slab { struct list_head partial; unsigned long nr_partial; struct page *active[NR_CPUS]; +#ifdef CONFIG_NUMA + struct page *numa_partial[MAX_NUMNODES]; +#endif }; /* @@ -79,7 +82,7 @@ static __always_inline void slab_unlock( /* * Management of partially allocated slabs */ -static void __always_inline add_partial(struct slab *s, struct page *page) +static void __always_inline __add_partial(struct slab *s, struct page *page) { spin_lock(&s->list_lock); s->nr_partial++; @@ -87,9 +90,28 @@ static void __always_inline add_partial( spin_unlock(&s->list_lock); } +static void __always_inline add_partial(struct slab *s, struct page *page) +{ +#ifdef CONFIG_NUMA + int node = page_to_nid(page); + + BUG_ON((unsigned long)page < PAGE_SIZE); + + if (cmpxchg(&s->numa_partial[node], NULL, page) == NULL) + return; +#endif + __add_partial(s, page); +} + static void __always_inline remove_partial(struct slab *s, struct page *page) { +#ifdef CONFIG_NUMA + int node = page_to_nid(page); + + if (cmpxchg(&s->numa_partial[node], page, NULL) == page) + return; +#endif spin_lock(&s->list_lock); list_del(&page->lru); s->nr_partial--; @@ -115,44 +137,141 @@ static __always_inline int lock_and_del_ /* * Get a partial page, lock it and return it. */ +static struct page *__get_partial(struct slab *s, int node) +{ + struct page *page; + + spin_lock(&s->list_lock); + list_for_each_entry(page, &s->partial, lru) + if (likely(lock_and_del_slab(s, page))) + goto out; + + /* No slab or all slabs busy */ + page = NULL; +out: + spin_unlock(&s->list_lock); + return page; +} + +void print_numa_partial(struct slab *s) +{ + int node; + for_each_node(node) + if (s->numa_partial[node]) + printk(KERN_CRIT "%d. = %p\n", node, s->numa_partial[node]); +} + #ifdef CONFIG_NUMA static struct page *get_partial(struct slab *s, int node) { - struct page *page; - int searchnode = (node == -1) ? numa_node_id() : node; + struct page *page, *page2; + struct page *last_twice = NULL; + struct page *on_node = NULL; + struct page *off_node = NULL; + int searchnode = (node == -1) ? numa_node_id() : node; + + page = s->numa_partial[searchnode]; +// if (page && (unsigned long)page < PAGE_SIZE) { +// printk(KERN_CRIT "get_partial(%s,%d) page=%p. Fixed up\n", s->sc.name, searchnode, page); +// print_numa_partial(s); +// s->numa_partial[searchnode] = NULL; +// page = NULL; +// } + if (page && cmpxchg(&s->numa_partial[searchnode], page, NULL) == page) + goto lock_return; if (!s->nr_partial) + /* + * This disregards the slabs we have in the numa_partial + * array. But we know that we have only one available + * per node. It is faster to allocate a new + * slab rather than do a linear search through numa_partial + */ return NULL; spin_lock(&s->list_lock); + /* - * Search for slab on the right node + * Search for slab on the right node while repopulating the + * numa_partial array. */ - list_for_each_entry(page, &s->partial, lru) - if (likely(page_to_nid(page) == searchnode) && - lock_and_del_slab(s, page)) - goto out; + list_for_each_entry_safe(page, page2, &s->partial, lru) { + int node = page_to_nid(page); - if (likely(node == -1)) { - /* - * We can fall back to any other node in order to - * reduce the size of the partial list. - */ - list_for_each_entry(page, &s->partial, lru) - if (likely(lock_and_del_slab(s, page))) - goto out; + BUG_ON((unsigned long)page < PAGE_SIZE); + + if (s->numa_partial[node]) { + /* + * Node array element already filled up, so we + * wont need to do this next time around. The + * second slab for the requested node is ours. + */ + if (likely(page_to_nid(page) == searchnode)) + goto del_unlock_lock_return; + /* + * This is one double here but it did not match the node. + * However, memorize the slab in case we do not match at all. + */ + last_twice = page; + } + else + if (cmpxchg(&s->numa_partial[node], NULL, page) == NULL) { + /* Successful refill of the numa_partial entry */ + list_del(&page->lru); + s->nr_partial--; + if (node == searchnode) + on_node = page; + else + off_node = page; + } } + /* + * If we encountered a page from the requested node during the + * search then use it. + */ + page = on_node; + if (page && cmpxchg(&s->numa_partial[searchnode], page, NULL) == page) + goto unlock_lock_return; + + if (node != -1) + goto fail; + /* + * Nothing on our node. If we have had two slabs on any other node then + * take the second one from that one. This results in a better + * distribution of the per node table. + */ + page = last_twice; + if (!page) + goto fail; + + + /* + * Last chance: + * If we encountered any off node page then use that. + */ + page = off_node; + if (page && cmpxchg(&s->numa_partial[searchnode], page, NULL) == page) + goto unlock_lock_return; + +fail: /* Nothing found */ - page = NULL; -out: spin_unlock(&s->list_lock); + return NULL; + +del_unlock_lock_return: + list_del(&page->lru); + +unlock_lock_return: + spin_unlock(&s->list_lock); + +lock_return: + slab_lock(page); return page; } #else static struct page *get_partial(struct slab *s, int node) { - struct page *page; /* Racy check. If we mistakenly see no partial slabs then we * just allocate an empty slab. If we mistakenly try to get a @@ -161,20 +280,10 @@ static struct page *get_partial(struct s if (!s->nr_partial) return NULL; - spin_lock(&s->list_lock); - list_for_each_entry(page, &s->partial, lru) - if (likely(lock_and_del_slab(s, page))) - goto out; - - /* No slab or all slabs busy */ - page = NULL; -out: - spin_unlock(&s->list_lock); - return page; + return __get_partial(s, node); } #endif - /* * Debugging checks */ @@ -331,6 +440,18 @@ out: * * On exit the slab lock will have been dropped. */ +static void __always_inline __putback_slab(struct slab *s, struct page *page) +{ + if (page->inuse) { + if (page->inuse < s->objects) + __add_partial(s, page); + slab_unlock(page); + } else { + slab_unlock(page); + discard_slab(s, page); + } +} + static void __always_inline putback_slab(struct slab *s, struct page *page) { if (page->inuse) { @@ -414,6 +535,24 @@ void flusher(void *d) static void drain_all(struct slab *s) { +#ifdef CONFIG_NUMA + int node; + + spin_lock(&s->list_lock); + for_each_node(node) { + struct page *page = s->numa_partial[node]; + + while (page) { + if (cmpxchg(&s->numa_partial[node], page, NULL) + == page) { + list_add(&page->lru, &s->partial); + s->nr_partial++; + }; + page = s->numa_partial[node]; + } + } + spin_unlock(&s->list_lock); +#endif if (atomic_read(&s->active_cpus)) { mutex_lock(&s->flushing); cancel_delayed_work(&s->flush); @@ -667,6 +806,7 @@ static struct slab_cache *slab_create(st { struct slab *s = (void *)x; int cpu; + int node; /* Verify that the generic structure is big enough for our data */ BUG_ON(sizeof(struct slab_control) < sizeof(struct slab)); @@ -699,6 +839,10 @@ static struct slab_cache *slab_create(st mutex_init(&s->flushing); for_each_possible_cpu(cpu) s->active[cpu] = NULL; +#ifdef CONFIG_NUMA + for_each_node(node) + s->numa_partial[node] = NULL; +#endif return &s->sc; } @@ -812,7 +956,7 @@ static int slab_shrink(struct slab_cache for(i = 0; s->nr_partial > 1 && i < s->nr_partial - 1; i++ ) { struct page * page; - page = get_partial(s, -1); + page = __get_partial(s, -1); if (!page) break; @@ -832,7 +976,7 @@ static int slab_shrink(struct slab_cache * list, the used list or free it. */ __ClearPageActive(page); - putback_slab(s, page); + __putback_slab(s, page); } local_irq_restore(flags); return slabs_freed; Index: linux-2.6.18-rc6-mm2/include/linux/allocator.h =================================================================== --- linux-2.6.18-rc6-mm2.orig/include/linux/allocator.h 2006-09-13 19:33:41.015915409 -0500 +++ linux-2.6.18-rc6-mm2/include/linux/allocator.h 2006-09-13 19:45:15.178624556 -0500 @@ -133,6 +133,7 @@ struct slab_control { struct slab_cache sc; /* Common information */ void *data[50]; /* Some data */ void *percpu[NR_CPUS]; /* Some per cpu information. */ + void *pernode[MAX_NUMNODES]; /* Some per node information. */ }; struct slab_allocator {