SLUB: Make NUMA support optional NUMA support in the slab allocators may create unecessary overhead for small NUMA configurations (especially those that realize multiple nodes on a motherboard like Opterons). Signed-off-by: Christoph Lameter Index: linux-2.6/include/linux/slab.h =================================================================== --- linux-2.6.orig/include/linux/slab.h 2007-09-06 06:29:37.000000000 -0700 +++ linux-2.6/include/linux/slab.h 2007-09-06 06:29:46.000000000 -0700 @@ -178,7 +178,7 @@ static inline void *kcalloc(size_t n, si return __kmalloc(n * size, flags | __GFP_ZERO); } -#if !defined(CONFIG_NUMA) && !defined(CONFIG_SLOB) +#if !defined(CONFIG_SLAB_NUMA) && !defined(CONFIG_SLOB) /** * kmalloc_node - allocate memory from a specific node * @size: how many bytes of memory are required. @@ -206,7 +206,7 @@ static inline void *kmem_cache_alloc_nod { return kmem_cache_alloc(cachep, flags); } -#endif /* !CONFIG_NUMA && !CONFIG_SLOB */ +#endif /* !CONFIG_SLAB_NUMA && !CONFIG_SLOB */ /* * kmalloc_track_caller is a special version of kmalloc that records the @@ -225,7 +225,7 @@ extern void *__kmalloc_track_caller(size __kmalloc(size, flags) #endif /* DEBUG_SLAB */ -#ifdef CONFIG_NUMA +#ifdef CONFIG_SLAB_NUMA /* * kmalloc_node_track_caller is a special version of kmalloc_node that * records the calling function of the routine calling it for slab leak @@ -244,7 +244,7 @@ extern void *__kmalloc_node_track_caller __kmalloc_node(size, flags, node) #endif -#else /* CONFIG_NUMA */ +#else /* CONFIG_SLAB_NUMA */ #define kmalloc_node_track_caller(size, flags, node) \ kmalloc_track_caller(size, flags) Index: linux-2.6/include/linux/slub_def.h =================================================================== --- linux-2.6.orig/include/linux/slub_def.h 2007-09-06 06:29:37.000000000 -0700 +++ linux-2.6/include/linux/slub_def.h 2007-09-06 06:29:46.000000000 -0700 @@ -50,7 +50,7 @@ struct kmem_cache { struct kobject kobj; /* For sysfs */ #endif -#ifdef CONFIG_NUMA +#ifdef CONFIG_SLAB_NUMA int defrag_ratio; struct kmem_cache_node *node[MAX_NUMNODES]; #endif @@ -179,7 +179,7 @@ static __always_inline void *kmalloc(siz return __kmalloc(size, flags); } -#ifdef CONFIG_NUMA +#ifdef CONFIG_SLAB_NUMA void *__kmalloc_node(size_t size, gfp_t flags, int node); void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); Index: linux-2.6/mm/slub.c =================================================================== --- linux-2.6.orig/mm/slub.c 2007-09-06 06:29:37.000000000 -0700 +++ linux-2.6/mm/slub.c 2007-09-06 06:29:46.000000000 -0700 @@ -137,6 +137,12 @@ static inline void ClearSlabDebug(struct page->flags &= ~SLABDEBUG; } +#ifdef CONFIG_SLAB_NUMA +#define node(x) page_to_nid(x) +#else +#define node(x) 0 +#endif + /* * Issues still to be resolved: * @@ -270,7 +276,7 @@ int slab_is_available(void) static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) { -#ifdef CONFIG_NUMA +#ifdef CONFIG_SLAB_NUMA return s->node[node]; #else return &s->local_node; @@ -819,7 +825,7 @@ static void remove_full(struct kmem_cach if (!(s->flags & SLAB_STORE_USER)) return; - n = get_node(s, page_to_nid(page)); + n = get_node(s, node(page)); spin_lock(&n->list_lock); list_del(&page->lru); @@ -1088,7 +1094,7 @@ static struct page *new_slab(struct kmem if (!page) goto out; - n = get_node(s, page_to_nid(page)); + n = get_node(s, node(page)); if (n) atomic_long_inc(&n->nr_slabs); page->offset = s->offset / sizeof(void *); @@ -1167,7 +1173,7 @@ static void free_slab(struct kmem_cache static void discard_slab(struct kmem_cache *s, struct page *page) { - struct kmem_cache_node *n = get_node(s, page_to_nid(page)); + struct kmem_cache_node *n = get_node(s, node(page)); atomic_long_dec(&n->nr_slabs); reset_page_mapcount(page); @@ -1218,7 +1224,7 @@ static void add_partial(struct kmem_cach static void remove_partial(struct kmem_cache *s, struct page *page) { - struct kmem_cache_node *n = get_node(s, page_to_nid(page)); + struct kmem_cache_node *n = get_node(s, node(page)); spin_lock(&n->list_lock); list_del(&page->lru); @@ -1273,7 +1279,7 @@ out: */ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) { -#ifdef CONFIG_NUMA +#ifdef CONFIG_SLAB_NUMA struct zonelist *zonelist; struct zone **z; struct page *page; @@ -1341,7 +1347,7 @@ static struct page *get_partial(struct k */ static void unfreeze_slab(struct kmem_cache *s, struct page *page) { - struct kmem_cache_node *n = get_node(s, page_to_nid(page)); + struct kmem_cache_node *n = get_node(s, node(page)); ClearSlabFrozen(page); if (page->inuse) { @@ -1463,7 +1469,7 @@ static void *__slab_alloc(struct kmem_ca goto new_slab; slab_lock(page); - if (unlikely(node != -1 && page_to_nid(page) != node)) + if (unlikely(node != -1 && node(page) != node)) goto another_slab; load_freelist: object = page->freelist; @@ -1501,7 +1507,7 @@ new_slab: * specified. So we need to recheck. */ if (node == -1 || - page_to_nid(s->cpu_slab[cpu]) == node) { + node(s->cpu_slab[cpu]) == node) { /* * Current cpuslab is acceptable and we * want the current one since its cache hot @@ -1551,7 +1557,7 @@ static void __always_inline *slab_alloc( local_irq_save(flags); page = s->cpu_slab[smp_processor_id()]; if (unlikely(!page || !page->lockless_freelist || - (node != -1 && page_to_nid(page) != node))) + (node != -1 && node(page) != node))) object = __slab_alloc(s, gfpflags, node, addr, page); @@ -1573,7 +1579,7 @@ void *kmem_cache_alloc(struct kmem_cache } EXPORT_SYMBOL(kmem_cache_alloc); -#ifdef CONFIG_NUMA +#ifdef CONFIG_SLAB_NUMA void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) { return slab_alloc(s, gfpflags, node, __builtin_return_address(0)); @@ -1616,7 +1622,7 @@ checks_ok: * then add it. */ if (unlikely(!prior)) - add_partial(get_node(s, page_to_nid(page)), page); + add_partial(get_node(s, node(page)), page); out_unlock: slab_unlock(page); @@ -1860,7 +1866,7 @@ static void init_kmem_cache_node(struct #endif } -#ifdef CONFIG_NUMA +#ifdef CONFIG_SLAB_NUMA /* * No kmalloc_node yet so do it by hand. We know that this is the first * slab on the node for this slabcache. There are no concurrent accesses @@ -2096,7 +2102,7 @@ static int kmem_cache_open(struct kmem_c goto error; s->refcount = 1; -#ifdef CONFIG_NUMA +#ifdef CONFIG_SLAB_NUMA s->defrag_ratio = 100; #endif @@ -2415,7 +2421,7 @@ void *__kmalloc(size_t size, gfp_t flags } EXPORT_SYMBOL(__kmalloc); -#ifdef CONFIG_NUMA +#ifdef CONFIG_SLAB_NUMA void *__kmalloc_node(size_t size, gfp_t flags, int node) { struct kmem_cache *s = get_slab(size, flags); @@ -2567,7 +2573,7 @@ void __init kmem_cache_init(void) int i; int caches = 0; -#ifdef CONFIG_NUMA +#ifdef CONFIG_SLAB_NUMA /* * Must first have the slab cache available for the allocations of the * struct kmem_cache_node's. There is special bootstrap code in @@ -3559,7 +3565,7 @@ static ssize_t free_calls_show(struct km } SLAB_ATTR_RO(free_calls); -#ifdef CONFIG_NUMA +#ifdef CONFIG_SLAB_NUMA static ssize_t defrag_ratio_show(struct kmem_cache *s, char *buf) { return sprintf(buf, "%d\n", s->defrag_ratio / 10); @@ -3604,7 +3610,7 @@ static struct attribute * slab_attrs[] = #ifdef CONFIG_ZONE_DMA &cache_dma_attr.attr, #endif -#ifdef CONFIG_NUMA +#ifdef CONFIG_SLAB_NUMA &defrag_ratio_attr.attr, #endif NULL Index: linux-2.6/init/Kconfig =================================================================== --- linux-2.6.orig/init/Kconfig 2007-09-06 06:57:05.000000000 -0700 +++ linux-2.6/init/Kconfig 2007-09-06 07:27:11.000000000 -0700 @@ -543,6 +543,7 @@ choice config SLAB bool "SLAB" + select SLAB_NUMA help The regular slab allocator that is established and known to work well in all environments. It organizes cache hot objects in @@ -570,6 +571,19 @@ config SLOB endchoice +config SLAB_NUMA + depends on NUMA + bool "Slab NUMA Support" + default y + help + Slab NUMA support allows NUMA aware slab operations. The + NUMA logic creates overhead that may result in regressions on + systems with a small number of nodes (such as multiple nodes + on the same motherboard) but it may be essential for distributed + NUMA systems with a high NUMA factor. + WARNING: Disabling Slab NUMA support will disable all NUMA locality + controls for slab objects. + endmenu # General setup config RT_MUTEXES