The slab emulation layer. This provides a layer that implements the existing slab API. We put a hook into slab.h to redirect includes for slab.h to slabulator.h. kmem_cache_create dynamically derives page allocators with the proper features requested. Signed-off-by: Christoph Lameter Index: linux-2.6.18-rc4-mm2/mm/slabulator.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.18-rc4-mm2/mm/slabulator.c 2006-08-25 19:26:44.812526130 -0700 @@ -0,0 +1,285 @@ +/* + * Slabulator = Emulate the Slab API. + * + * (C) 2006 Silicon Graphics, Inc. Christoph Lameter + * + */ +#include +#include +#include +#include +#include +#include +#include + +#define SLAB_MAX_ORDER 4 + +#define SLABULATOR_MERGE + +#ifndef ARCH_SLAB_MINALIGN +#define ARCH_SLAB_MINALIGN sizeof(void *) +#endif + +static int calculate_order(int size) +{ + int order; + int rem; + + for(order = max(0, fls(size - 1) - PAGE_SHIFT); + order < MAX_ORDER; order++) { + unsigned long slab_size = PAGE_SIZE << order; + + if (slab_size < size) + continue; + + rem = slab_size % size; + + if (rem * 8 <= PAGE_SIZE << order) + break; + + } + if (order >= MAX_ORDER) + return -E2BIG; + return order; +} + +/* + * We can actually operate slabs any time after the page allocator is up. + * slab_is_available() merely means that the kmalloc array is available. + * + * However, be aware that deriving allocators depends on kmalloc being + * functional. + */ +int slabulator_up = 0; + +int slab_is_available(void) { + return slabulator_up; +} + +void kmem_cache_init(void) +{ + extern void kmalloc_init(void); + + kmalloc_init(); + slabulator_up = 1; +} + +struct slab_cache *kmem_cache_create(const char *name, size_t size, + size_t align, unsigned long flags, + void (*ctor)(void *, struct slab_cache *, unsigned long), + void (*dtor)(void *, struct slab_cache *, unsigned long)) +{ + const struct page_allocator *a = &page_allocator; + struct slab_cache s; + struct slab_cache *rs; + struct slab_control *x; + int page_size_slab; + + s.offset = 0; + s.align = max(ARCH_SLAB_MINALIGN, ALIGN(align, sizeof(void *))); + + if (flags & (SLAB_MUST_HWCACHE_ALIGN|SLAB_HWCACHE_ALIGN)) + s.align = L1_CACHE_BYTES; + + s.inuse = size; + s.objsize = size; + s.size = ALIGN(size, s.align); + + /* Pick the right allocator for our purposes */ + if (flags & SLAB_RECLAIM_ACCOUNT) + a = reclaim_allocator(a); + + if (flags & SLAB_CACHE_DMA) + a = dmaify_page_allocator(a); + + if (flags & SLAB_DESTROY_BY_RCU) + a = rcuify_page_allocator(a); + + page_size_slab = (PAGE_SIZE << calculate_order(s.size)) > (s.size << 1); + + if (page_size_slab && ((flags & SLAB_DESTROY_BY_RCU) || ctor || dtor)) { + /* + * For RCU processing and constructors / destructors: + * The object must remain intact even if it is free. + * The free pointer would hurt us there. + * Relocate the free object pointer out of + * the space used by the object. + * + * Slabs with a single object do not need this since + * those do not have to deal with free pointers. + */ + s.offset = s.size - sizeof(void *); + if (s.offset < s.objsize) { + /* + * Would overlap the object. We need to waste some + * more space to make the object RCU safe + */ + s.offset = s.size; + s.size += s.align; + } + s.inuse = s.size; + } + + s.order = calculate_order(s.size); + + if (s.order < 0) + goto error; + + s.name = name; + s.node = -1; + + x = kmalloc(sizeof(struct slab_control), GFP_KERNEL); + + if (!x) + return NULL; + s.page_alloc = a; + s.slab_alloc = &SLABULATOR_ALLOCATOR; +#ifdef SLABULATOR_MERGE + /* + * This works but is this really something we want? + */ + if (((s.size & (s.size - 1))==0) && !ctor && !dtor && + !(flags & (SLAB_DESTROY_BY_RCU|SLAB_RECLAIM_ACCOUNT))) { + + printk(KERN_INFO "Merging slab_cache %s size %d into" + " kmalloc array\n", name, s.size); + rs = kmalloc_slab_allocator.create(x, &s); + kfree(x); + x = NULL; + } else +#endif + rs = SLABULATOR_ALLOCATOR.create(x, &s); + if (!rs) + goto error; + + /* + * Now deal with constuctors and destructors. We need to know the + * slab_cache address in order to be able to pass the slab_cache + * address down the chain. + */ + if (ctor || dtor) + rs->page_alloc = + ctor_and_dtor_for_page_allocator(rs->page_alloc, + rs->size, rs, + (void *)ctor, (void *)dtor); + + if (x) + register_slab(rs); + return rs; + +error: + a->destructor((struct page_allocator *)a); + if (flags & SLAB_PANIC) + panic("Cannot create slab %s size=%ld realsize=%d " + "order=%d offset=%d flags=%lx\n", + s.name, size, s.size, s.order, s.offset, flags); + + + return NULL; +} +EXPORT_SYMBOL(kmem_cache_create); + +int kmem_cache_destroy(struct slab_cache *s) +{ + SLABULATOR_ALLOCATOR.destroy(s); + unregister_slab(s); + kfree(s); + return 0; +} +EXPORT_SYMBOL(kmem_cache_destroy); + +void *kmem_cache_zalloc(struct slab_cache *s, gfp_t flags) +{ + void *x; + + x = kmem_cache_alloc(s, flags); + if (x) + memset(x, 0, s->objsize); + return x; +} + +/* + * Generic reaper (the slabifier has its own way of reaping) + */ +#ifdef CONFIG_NUMA +/* + * Special reaping functions for NUMA systems called from cache_reap(). + */ +static DEFINE_PER_CPU(unsigned long, reap_node); + +static void init_reap_node(int cpu) +{ + int node; + + node = next_node(cpu_to_node(cpu), node_online_map); + if (node == MAX_NUMNODES) + node = first_node(node_online_map); + + __get_cpu_var(reap_node) = node; +} + +static void next_reap_node(void) +{ + int node = __get_cpu_var(reap_node); + + /* + * Also drain per cpu pages on remote zones + */ + if (node != numa_node_id()) + drain_node_pages(node); + + node = next_node(node, node_online_map); + if (unlikely(node >= MAX_NUMNODES)) + node = first_node(node_online_map); + __get_cpu_var(reap_node) = node; +} +#else +#define init_reap_node(cpu) do { } while (0) +#define next_reap_node(void) do { } while (0) +#endif + +#define REAPTIMEOUT_CPUC (2*HZ) + +#ifdef CONFIG_SMP +static DEFINE_PER_CPU(struct work_struct, reap_work); + +static void cache_reap(void *unused) +{ + next_reap_node(); + refresh_cpu_vm_stats(smp_processor_id()); + + schedule_delayed_work(&__get_cpu_var(reap_work), + REAPTIMEOUT_CPUC); +} + +static void __devinit start_cpu_timer(int cpu) +{ + struct work_struct *reap_work = &per_cpu(reap_work, cpu); + + /* + * When this gets called from do_initcalls via cpucache_init(), + * init_workqueues() has already run, so keventd will be setup + * at that time. + */ + if (keventd_up() && reap_work->func == NULL) { + init_reap_node(cpu); + INIT_WORK(reap_work, cache_reap, NULL); + schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu); + } +} + +static int __init cpucache_init(void) +{ + int cpu; + + /* + * Register the timers that drain pcp pages and update vm statistics + */ + for_each_online_cpu(cpu) + start_cpu_timer(cpu); + return 0; +} +__initcall(cpucache_init); +#endif + + Index: linux-2.6.18-rc4-mm2/include/linux/slabulator.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.18-rc4-mm2/include/linux/slabulator.h 2006-08-25 19:25:26.427721448 -0700 @@ -0,0 +1,123 @@ +#ifndef _LINUX_SLABULATOR_H +#define _LINUX_SLABULATOR_H +/* + * Slabulator: Emulate the existing Slab API. + * + * (C) 2006 Silicon Graphics, Inc. + * Christoph Lameter + */ + +#include +#include + +#define kmem_cache_t struct slab_cache +#define kmem_cache slab_cache + +#ifndef SLABULATOR_ALLOCATOR +#define SLABULATOR_ALLOCATOR slabifier_allocator +#endif + +/* + * We really should be getting rid of these. This is only + * a select list/ + */ +#define SLAB_KERNEL GFP_KERNEL +#define SLAB_ATOMIC GFP_ATOMIC +#define SLAB_NOFS GFP_NOFS +#define SLAB_NOIO GFP_NOIO + +/* No debug features for now */ +#define SLAB_HWCACHE_ALIGN 0x00002000UL +#define SLAB_CACHE_DMA 0x00004000UL +#define SLAB_MUST_HWCACHE_ALIGN 0x00008000UL +#define SLAB_RECLAIM_ACCOUNT 0x00020000UL +#define SLAB_PANIC 0x00040000UL +#define SLAB_DESTROY_BY_RCU 0x00080000UL +#define SLAB_MEM_SPREAD 0x00100000UL + +/* flags passed to a constructor func */ +#define SLAB_CTOR_CONSTRUCTOR 0x001UL +#define SLAB_CTOR_ATOMIC 0x002UL +#define SLAB_CTOR_VERIFY 0x004UL + +/* + * slab_allocators are always available after the page allocator + * has been brought up. kmem_cache_init creates the kmalloc array: + */ +extern int slab_is_available(void); +extern void kmem_cache_init(void); + +/* System wide caches (Should these be really here?) */ +extern struct slab_cache *vm_area_cachep; +extern struct slab_cache *names_cachep; +extern struct slab_cache *files_cachep; +extern struct slab_cache *filp_cachep; +extern struct slab_cache *fs_cachep; +extern struct slab_cache *sighand_cachep; +extern struct slab_cache *bio_cachep; + +extern struct slab_cache *kmem_cache_create(const char *name, size_t size, + size_t align, unsigned long flags, + void (*ctor)(void *, struct slab_cache *, unsigned long), + void (*dtor)(void *, struct slab_cache *, unsigned long)); + +static inline unsigned int kmem_cache_size(struct slab_cache *s) +{ + return s->objsize; +} + +static inline const char *kmem_cache_name(struct slab_cache *s) +{ + return s->name; +} + +static inline void *kmem_cache_alloc(struct slab_cache *s, gfp_t flags) +{ + return SLABULATOR_ALLOCATOR.alloc(s, flags); +} + +static inline void *kmem_cache_alloc_node(struct slab_cache *s, + gfp_t flags, int node) +{ + return SLABULATOR_ALLOCATOR.alloc_node(s, flags, node); +} + +extern void *kmem_cache_zalloc(struct slab_cache *s, gfp_t flags); + +static inline void kmem_cache_free(struct slab_cache *s, const void *x) +{ + SLABULATOR_ALLOCATOR.free(s, x); +} + +static inline int kmem_ptr_validate(struct slab_cache *s, void *x) +{ + return SLABULATOR_ALLOCATOR.valid_pointer(s, x); +} + +extern int kmem_cache_destroy(struct slab_cache *s); + +static inline int kmem_cache_shrink(struct slab_cache *s) +{ + return SLABULATOR_ALLOCATOR.shrink(s, NULL); +} + +/** + * kcalloc - allocate memory for an array. The memory is set to zero. + * @n: number of elements. + * @size: element size. + * @flags: the type of memory to allocate. + */ +static inline void *kcalloc(size_t n, size_t size, gfp_t flags) +{ + if (n != 0 && size > ULONG_MAX / n) + return NULL; + return kzalloc(n * size, flags); +} + +/* No current shrink statistics */ +struct shrinker; +static inline void kmem_set_shrinker(kmem_cache_t *cachep, + struct shrinker *shrinker) +{} +#endif /* _LINUX_SLABULATOR_H */ + Index: linux-2.6.18-rc4-mm2/mm/Makefile =================================================================== --- linux-2.6.18-rc4-mm2.orig/mm/Makefile 2006-08-25 19:18:23.408944973 -0700 +++ linux-2.6.18-rc4-mm2/mm/Makefile 2006-08-25 19:22:15.433664468 -0700 @@ -25,4 +25,5 @@ obj-$(CONFIG_MEMORY_HOTPLUG) += memory_h obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_SMP) += allocpercpu.o -obj-$(CONFIG_MODULAR_SLAB) += allocator.o slabifier.o slabstat.o kmalloc.o +obj-$(CONFIG_MODULAR_SLAB) += allocator.o slabifier.o slabstat.o \ + kmalloc.o slabulator.o Index: linux-2.6.18-rc4-mm2/init/Kconfig =================================================================== --- linux-2.6.18-rc4-mm2.orig/init/Kconfig 2006-08-23 12:37:01.678839129 -0700 +++ linux-2.6.18-rc4-mm2/init/Kconfig 2006-08-25 19:22:15.434640970 -0700 @@ -332,6 +332,26 @@ config CC_OPTIMIZE_FOR_SIZE If unsure, say N. +config SLAB + default y + bool "Traditional SLAB allocator" + help + Disabling this allows the use of alternate slab allocators + with less overhead such as SLOB (very simple) or the + use the slabifier with the module allocator framework. + Note that alternate slab allocators may not provide + the complete functionality for slab. + +config MODULAR_SLAB + default y + bool "Use the modular allocator framework" + depends on EXPERIMENTAL && !SLAB + help + The modular allocator framework allows the flexible use + of different slab allocators and page allocators for memory + allocation. This will completely replace the existing + slab allocator. Beware this is experimental code. + menuconfig EMBEDDED bool "Configure standard kernel features (for small systems)" help @@ -370,7 +390,6 @@ config KALLSYMS_EXTRA_PASS reported. KALLSYMS_EXTRA_PASS is only a temporary workaround while you wait for kallsyms to be fixed. - config HOTPLUG bool "Support for hot-pluggable devices" if EMBEDDED default y @@ -445,15 +464,6 @@ config SHMEM option replaces shmem and tmpfs with the much simpler ramfs code, which may be appropriate on small systems without swap. -config SLAB - default y - bool "Use full SLAB allocator" if EMBEDDED - help - Disabling this replaces the advanced SLAB allocator and - kmalloc support with the drastically simpler SLOB allocator. - SLOB is more space efficient but does not scale well and is - more susceptible to fragmentation. - config VM_EVENT_COUNTERS default y bool "Enable VM event counters for /proc/vmstat" if EMBEDDED @@ -475,7 +485,7 @@ config BASE_SMALL default 1 if !BASE_FULL config SLOB - default !SLAB + default !SLAB && !MODULAR_SLAB bool menu "Loadable module support" Index: linux-2.6.18-rc4-mm2/include/linux/slab.h =================================================================== --- linux-2.6.18-rc4-mm2.orig/include/linux/slab.h 2006-08-23 12:37:01.493303726 -0700 +++ linux-2.6.18-rc4-mm2/include/linux/slab.h 2006-08-25 19:22:15.435617472 -0700 @@ -9,6 +9,10 @@ #if defined(__KERNEL__) +#ifdef CONFIG_MODULAR_SLAB +#include +#else + typedef struct kmem_cache kmem_cache_t; #include @@ -293,6 +297,8 @@ extern kmem_cache_t *bio_cachep; extern atomic_t slab_reclaim_pages; +#endif /* CONFIG_SLABULATOR */ + #endif /* __KERNEL__ */ #endif /* _LINUX_SLAB_H */