Simple NUMA slab allocator The NUMA slab allocator simply generates a slab per node on demand and uses the slabifier to manage per node slab caches. Signed-off-by: Christoph Lameter Index: linux-2.6.18-rc4/mm/numa_slab.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.18-rc4/mm/numa_slab.c 2006-08-19 18:17:00.065868237 -0700 @@ -0,0 +1,263 @@ +/* + * NUMA slab implementation + * + * (C) 2006 Silicon Graphics Inc. + * Christoph Lameter +#include +#include +#include +#include +#include +#include + +#define NUMA_SLAB_DEBUG + +//#define TPRINTK printk +#define TPRINTK(x, ...) + +/* + * Allocator on which we base this functionality. + */ +#define base slabifier_allocator + +struct numa_slab { + struct slab_cache sc; + spinlock_t lock; + atomic_t refcount; + struct slab_cache *node[MAX_NUMNODES]; +}; + +static int __numa_slab_destroy(struct numa_slab *n) +{ + int node; + + TPRINTK(KERN_CRIT "__numa_slab_destroy(%s)\n", n->sc.name); + + for_each_node(node) { + base.free(NULL,n->node[node]); + n->node[node] = NULL; + } + return 0; +} + +static struct slab_cache *bring_up_node(struct numa_slab *n, int node) +{ + struct slab_cache *s = n->node[node]; + struct slab_cache *sc = &n->sc; + + TPRINTK(KERN_CRIT "bring_up_node(%s, %d)\n", n->sc.name, node); + if (s) + return s; + + spin_lock(&n->lock); + s = n->node[node]; + if (s) { + spin_unlock(&n->lock); + return s; + } + s = n->node[node] = base.create(&base, sc->page_alloc, node, + sc->name, sc->size, sc->align, sc->order, sc->objsize, sc->inuse, + sc->offset); + + spin_unlock(&n->lock); + return s; +} + +static struct slab_cache *numa_slab_create + (struct slab_control *x, + const struct slab_allocator *slab_alloc, + const struct page_allocator *page_alloc, + const struct slab_cache *s) +{ + struct numa_slab *n; + + TPRINTK(KERN_CRIT "numa_slab_create(%s, %s, %d, %s, %d, %d, %d ,%d ,%d ,%d)\n", + slab_alloc->name, page_alloc->name, node, name, size, + align, order, objsize, inuse, offset); + + + n = base.alloc(numa_cache, in_atomic() ? GFP_ATOMIC : GFP_KERNEL); + if (!n) + return NULL; + + memset(n, 0, sizeof(struct numa_slab)); + memcpy(&n->sc, s, sizeof(struct slab_cache)); + spin_lock_init(&n->lock); + atomic_set(&n->refcount, 1); + + /* Do not bring up a node here. slabulator may set a constructor after the fact */ + return &n->sc; +} + +static void *numa_slab_alloc(struct slab_cache *sc, gfp_t flags); + +static void *numa_slab_alloc_node(struct slab_cache *sc, gfp_t flags, int node) +{ + struct numa_slab *n = (void *)sc; + struct slab_cache *s; + + TPRINTK(KERN_CRIT "numa_slab_alloc_node(%s, %x, %d)\n", sc->name, flags, node); + + if (node < 0) + node = numa_node_id(); + + s = n->node[node]; + + if (unlikely(!s)) { + s = bring_up_node(n, node); + if (!s) + return NULL; + } + return base.alloc(s, flags); +} + +static void *numa_slab_alloc(struct slab_cache *sc, gfp_t flags) +{ + int node = numa_node_id(); + + TPRINTK(KERN_CRIT "numa_slab_alloc(%s, %x)\n", sc->name, flags); + + if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY)) + && !in_interrupt()) { + if (cpuset_do_slab_mem_spread()) + node = cpuset_mem_spread_node(); + else if (current->mempolicy) + node = slab_node(current->mempolicy); + } + return numa_slab_alloc_node(sc, flags, node); +} + +static int numa_slab_destroy(struct slab_cache *sc) +{ + struct numa_slab *n = (void *)sc; + + TPRINTK(KERN_CRIT "numa_slab_destroy(%s)\n", sc->name); + + if (!atomic_dec_and_test(&n->refcount)) + return 0; + + __numa_slab_destroy(n); + return 0; +} + +static int numa_slab_pointer_valid(struct slab_cache *sc, const void *object) +{ + struct numa_slab *n = (void *)sc; + int node; + + TPRINTK(KERN_CRIT "numa_slab_pointer_valid(%s, %p)\n", sc->name, object); + + /* We can deduct from the allocator which node this is. */ + node = ((struct numactl *)(sc->page_alloc))->node; + return base.valid_pointer(n->node[node], object); +} + +static unsigned long numa_slab_object_size(struct slab_cache *sc, + const void *object) +{ + struct numa_slab *n = (void *)sc; + int node; + + TPRINTK(KERN_CRIT "numa_slab_object_size(%s, %p)\n", sc->name, object); + + /* We can deduct from the allocator which node this is. */ + node = ((struct numactl *)(sc->page_alloc))->node; + return base.object_size(n->node[node], object); +} + +static void numa_slab_free(struct slab_cache *sc, const void *object) +{ + TPRINTK(KERN_CRIT "numa_slab_free(%s, %p)\n", sc ? sc->name : "", object); + base.free(NULL, object); +} + +static struct slab_cache *numa_slab_dup(struct slab_cache *sc) +{ + struct numa_slab *n = (void *)sc; + + TPRINTK(KERN_CRIT "numa_slab_dup(%s)\n", sc->name); + + atomic_inc(&n->refcount); + return sc; +} + +static struct slab_cache *numa_slab_node(struct slab_cache *sc, int node) +{ + struct numa_slab *n = (void *)sc; + struct slab_cache *s = n->node[node]; + + return s; +} + +static int numa_slab_shrink(struct slab_cache *sc, + int (*move_object)(struct slab_cache *, void *)) +{ + struct numa_slab *n = (void *)sc; + int node; + int count = 0; + + TPRINTK(KERN_CRIT "numa_slab_shrink(%s, %p)\n", sc->name, move_object); + + /* + * FIXME: What you really want to do here is to + * run the shrinking on each node separately + */ + spin_lock(&n->lock); + for_each_node(node) { + struct slab_cache *s = n->node[node]; + + if (s) + count += base.shrink(n->node[node], move_object); + } + spin_unlock(&n->lock); + return count; +} + +static unsigned long numa_slab_objects(struct slab_cache *sc, + unsigned long *active, unsigned long *partial) +{ + struct numa_slab *n = (void *)sc; + int node; + unsigned long count = 0; + unsigned long count_active = 0; + unsigned long count_partial = 0; + + printk(KERN_CRIT "numa_slab_objects(%s)\n", sc->name); + + for_each_node(node) { + unsigned long nactive, npartial; + struct slab_cache *s = n->node[node]; + + if (!s) + continue; + + count += base.objects(n->node[node], &nactive, &npartial); + count_active += nactive; + count_partial += npartial; + } + if (active) + *active = count_active; + if (partial) + *partial = count_partial; + return count; +} + +const struct slab_allocator numa_slab_allocator = { + .name = "NumaSlab", + .create = numa_slab_create, + .alloc = numa_slab_alloc, + .alloc_node = numa_slab_alloc_node, + .free = numa_slab_free, + .valid_pointer = numa_slab_pointer_valid, + .object_size = numa_slab_object_size, + .objects = numa_slab_objects, + .shrink = numa_slab_shrink, + .dup = numa_slab_dup, + .node = numa_slab_node, + .destroy = numa_slab_destroy, + .destructor = null_slab_allocator_destructor +}; +EXPORT_SYMBOL(numa_slab_allocator); Index: linux-2.6.18-rc4/mm/Makefile =================================================================== --- linux-2.6.18-rc4.orig/mm/Makefile 2006-08-19 16:46:06.586412205 -0700 +++ linux-2.6.18-rc4/mm/Makefile 2006-08-19 18:13:59.716643596 -0700 @@ -25,4 +25,5 @@ obj-$(CONFIG_MEMORY_HOTPLUG) += memory_h obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_MODULAR_SLAB) += allocator.o slabifier.o kmalloc.o slabulator.o +obj-$(CONFIG_NUMA_SLAB) += numa_slab.o Index: linux-2.6.18-rc4/init/Kconfig =================================================================== --- linux-2.6.18-rc4.orig/init/Kconfig 2006-08-19 16:46:06.587388707 -0700 +++ linux-2.6.18-rc4/init/Kconfig 2006-08-19 18:13:31.412725870 -0700 @@ -411,6 +411,11 @@ config SHMEM option replaces shmem and tmpfs with the much simpler ramfs code, which may be appropriate on small systems without swap. +config NUMA_SLAB + default y + bool "NUMA Slab allocator (for lots of memory)" + depends on MODULAR_SLAB && NUMA + config VM_EVENT_COUNTERS default y bool "Enable VM event counters for /proc/vmstat" if EMBEDDED