Index: linux-2.6.21-rc5/include/linux/mmzone.h =================================================================== --- linux-2.6.21-rc5.orig/include/linux/mmzone.h 2007-03-29 13:45:48.000000000 -0700 +++ linux-2.6.21-rc5/include/linux/mmzone.h 2007-03-29 13:48:01.000000000 -0700 @@ -24,6 +24,15 @@ #endif #define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1)) +#define MAX_SLAB_CACHES 256 + +struct kmem_cache_node { + spinlock_t list_lock; /* Protect partial list and nr_partial */ + unsigned long nr_partial; + atomic_long_t nr_slabs; + struct list_head partial; +}; + struct free_area { struct list_head free_list; unsigned long nr_free; @@ -449,6 +458,9 @@ typedef struct pglist_data { wait_queue_head_t kswapd_wait; struct task_struct *kswapd; int kswapd_max_order; +#ifdef CONFIG_SLUB + struct kmem_cache_node slabs[MAX_SLAB_CACHES]; +#endif } pg_data_t; #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) Index: linux-2.6.21-rc5/include/linux/slub_def.h =================================================================== --- linux-2.6.21-rc5.orig/include/linux/slub_def.h 2007-03-29 13:47:16.000000000 -0700 +++ linux-2.6.21-rc5/include/linux/slub_def.h 2007-03-29 13:48:01.000000000 -0700 @@ -11,13 +11,6 @@ #include #include -struct kmem_cache_node { - spinlock_t list_lock; /* Protect partial list and nr_partial */ - unsigned long nr_partial; - atomic_long_t nr_slabs; - struct list_head partial; -}; - /* * Slab cache management. */ @@ -29,13 +22,6 @@ struct kmem_cache { int offset; /* Free pointer offset. */ atomic_t cpu_slabs; /* != 0 -> flusher scheduled. */ int defrag_ratio; - - /* - * Avoid an extra cache line for UP, SMP and for the node local to - * struct kmem_cache. - */ - struct kmem_cache_node local_node; - /* Allocation and freeing of slabs */ unsigned int order; int objects; /* Number of objects in slab */ @@ -52,33 +38,19 @@ struct kmem_cache { struct delayed_work flush; struct mutex flushing; #endif -#ifdef CONFIG_NUMA - struct kmem_cache_node *node[MAX_NUMNODES]; -#endif - struct page *cpu_slab[NR_CPUS]; -}; +} ____cacheline_aligned; /* * Kmalloc subsystem. */ #define KMALLOC_SHIFT_LOW 3 -#define KMALLOC_SHIFT_HIGH 18 +#define KMALLOC_SHIFT_HIGH 23 -#if L1_CACHE_BYTES <= 64 -#define KMALLOC_EXTRAS 2 -#define KMALLOC_EXTRA -#else -#define KMALLOC_EXTRAS 0 -#endif - -#define KMALLOC_NR_CACHES (KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW \ - + 1 + KMALLOC_EXTRAS) /* - * We keep the general caches in an array of slab caches that are used for - * 2^x bytes of allocations. + * The first array elements are used for the kmalloc array */ -extern struct kmem_cache kmalloc_caches[KMALLOC_NR_CACHES]; +extern struct kmem_cache slub_caches[MAX_SLAB_CACHES]; /* * Sorry that the following has to be that ugly but some versions of GCC @@ -86,29 +58,32 @@ extern struct kmem_cache kmalloc_caches[ */ static inline int kmalloc_index(int size) { -#ifdef KMALLOC_EXTRA if (size > 64 && size <= 96) - return KMALLOC_SHIFT_HIGH + 1; + return 1; if (size > 128 && size <= 192) - return KMALLOC_SHIFT_HIGH + 2; -#endif - if (size <= 8) return 3; - if (size <= 16) return 4; - if (size <= 32) return 5; - if (size <= 64) return 6; - if (size <= 128) return 7; - if (size <= 256) return 8; - if (size <= 512) return 9; - if (size <= 1024) return 10; - if (size <= 2 * 1024) return 11; - if (size <= 4 * 1024) return 12; - if (size <= 8 * 1024) return 13; - if (size <= 16 * 1024) return 14; - if (size <= 32 * 1024) return 15; - if (size <= 64 * 1024) return 16; - if (size <= 128 * 1024) return 17; - if (size <= 256 * 1024) return 18; + return 2; + if (size <= 8) return 3; + if (size <= 16) return 4; + if (size <= 32) return 5; + if (size <= 64) return 6; + if (size <= 128) return 7; + if (size <= 256) return 8; + if (size <= 512) return 9; + if (size <= 1024) return 10; + if (size <= 2 * 1024) return 11; + if (size <= 4 * 1024) return 12; + if (size <= 8 * 1024) return 13; + if (size <= 16 * 1024) return 14; + if (size <= 32 * 1024) return 15; + if (size <= 64 * 1024) return 16; + if (size <= 128 * 1024) return 17; + if (size <= 256 * 1024) return 18; + if (size <= 512 * 1024) return 19; + if (size <= 1024 * 1024) return 20; + if (size <= 2 * 1024 * 1024) return 21; + if (size <= 4 * 1024 * 1024) return 22; + if (size <= 8 * 1024 * 1024) return 23; return -1; /* @@ -128,7 +103,7 @@ static inline int kmalloc_index(int size */ static inline struct kmem_cache *kmalloc_slab(size_t size) { - int index = kmalloc_index(size) - KMALLOC_SHIFT_LOW; + int index = kmalloc_index(size); if (index < 0) { /* @@ -138,7 +113,7 @@ static inline struct kmem_cache *kmalloc extern void __kmalloc_size_too_large(void); __kmalloc_size_too_large(); } - return &kmalloc_caches[index]; + return slub_caches + index; } #ifdef CONFIG_ZONE_DMA Index: linux-2.6.21-rc5/mm/slub.c =================================================================== --- linux-2.6.21-rc5.orig/mm/slub.c 2007-03-29 13:47:50.000000000 -0700 +++ linux-2.6.21-rc5/mm/slub.c 2007-03-29 13:48:01.000000000 -0700 @@ -123,8 +123,6 @@ /* Internal SLUB flags */ #define __OBJECT_POISON 0x80000000 /* Poison object */ -static int kmem_size = sizeof(struct kmem_cache); - #ifdef CONFIG_SMP static struct notifier_block slab_notifier; #endif @@ -142,7 +140,8 @@ int slab_is_available(void) { /* A list of all slab caches on the system */ static DECLARE_RWSEM(slub_lock); -LIST_HEAD(slab_caches); + +static DEFINE_PER_CPU(struct page *, cpu_slab)[MAX_SLAB_CACHES]; #ifdef CONFIG_SYSFS static int sysfs_slab_add(struct kmem_cache *); @@ -160,13 +159,14 @@ static void setup_slab_die(void) {}; * Core slab cache functions *******************************************************************/ -struct kmem_cache_node *get_node(struct kmem_cache *s, int node) +static inline struct page **get_cpu_slab(struct kmem_cache *s) { -#ifdef CONFIG_NUMA - return s->node[node]; -#else - return &s->local_node; -#endif + return &__get_cpu_var(cpu_slab)[s - slub_caches]; +} + +static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) +{ + return &NODE_DATA(node)->slabs[s - slub_caches]; } /* @@ -1031,39 +1031,39 @@ static void __always_inline putback_slab * Remove the cpu slab */ static void __always_inline deactivate_slab(struct kmem_cache *s, - struct page *page, int cpu) + struct page *page, struct page **ppage) { - s->cpu_slab[cpu] = NULL; + *ppage = NULL; ClearPageActive(page); ClearPageReferenced(page); putback_slab(s, page); } -static void flush_slab(struct kmem_cache *s, struct page *page, int cpu) +static void flush_slab(struct kmem_cache *s, struct page *page, struct page **ppage) { slab_lock(page); - deactivate_slab(s, page, cpu); + deactivate_slab(s, page, ppage); } /* * Flush cpu slab. * Called from IPI handler with interrupts disabled. */ -static void __flush_cpu_slab(struct kmem_cache *s, int cpu) +static void __flush_cpu_slab(struct kmem_cache *s, struct page **ppage) { - struct page *page = s->cpu_slab[cpu]; + struct page *page = *ppage; if (likely(page)) - flush_slab(s, page, cpu); + flush_slab(s, page, ppage); } static void flush_cpu_slab(void *d) { struct kmem_cache *s = d; - int cpu = smp_processor_id(); + struct page **ppage = get_cpu_slab(s); - __flush_cpu_slab(s, cpu); + __flush_cpu_slab(s, ppage); } #ifdef CONFIG_SMP @@ -1073,13 +1073,13 @@ static void flush_cpu_slab(void *d) static void check_flush_cpu_slab(void *private) { struct kmem_cache *s = private; - int cpu = smp_processor_id(); - struct page *page = s->cpu_slab[cpu]; + struct page **ppage = get_cpu_slab(s); + struct page *page = *ppage; if (page) { if (!TestClearPageReferenced(page)) return; - flush_slab(s, page, cpu); + flush_slab(s, page, ppage); } atomic_dec(&s->cpu_slabs); } @@ -1139,13 +1139,13 @@ static __always_inline void *slab_alloc( gfp_t gfpflags, int node) { struct page *page; + struct page **ppage; void **object; unsigned long flags; - int cpu; local_irq_save(flags); - cpu = smp_processor_id(); - page = s->cpu_slab[cpu]; + ppage = get_cpu_slab(s); + page = *ppage; if (!page) goto new_slab; @@ -1170,7 +1170,7 @@ redo: return object; another_slab: - deactivate_slab(s, page, cpu); + deactivate_slab(s, page, ppage); new_slab: page = get_partial(s, gfpflags, node); @@ -1187,29 +1187,29 @@ new_slab: return page_address(page); } - if (s->cpu_slab[cpu]) { + if (*ppage) { /* * Someone else populated the cpu_slab while * we enabled interrupts. The page may not * be on the required node. */ if (node == -1 || - page_to_nid(s->cpu_slab[cpu]) == node) { + page_to_nid(*ppage) == node) { /* * Current cpuslab is acceptable and we * want the current one since its cache hot */ discard_slab(s, page); - page = s->cpu_slab[cpu]; + page = *ppage; slab_lock(page); goto redo; } - flush_slab(s, s->cpu_slab[cpu], cpu); + flush_slab(s, *ppage, ppage); } slab_lock(page); } - s->cpu_slab[cpu] = page; + *ppage = page; SetPageActive(page); #ifdef CONFIG_SMP @@ -1420,20 +1420,6 @@ static unsigned long calculate_alignment return ALIGN(align, sizeof(void *)); } -static void free_kmem_cache_nodes(struct kmem_cache *s) -{ -#ifdef CONFIG_NUMA - int node; - - for_each_online_node(node) { - struct kmem_cache_node *n = s->node[node]; - if (n && n != &s->local_node) - kfree(n); - s->node[node] = NULL; - } -#endif -} - static void init_kmem_cache_node(struct kmem_cache_node *n) { memset(n, 0, sizeof(struct kmem_cache_node)); @@ -1444,58 +1430,10 @@ static void init_kmem_cache_node(struct static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) { -#ifdef CONFIG_NUMA int node; - int local_node; - - if (slab_state >= UP) - local_node = page_to_nid(virt_to_page(s)); - else - local_node = 0; - for_each_online_node(node) { - struct kmem_cache_node *n; - - if (local_node == node) - n = &s->local_node; - else - if (slab_state == DOWN) { - /* - * No kmalloc_node yet so do it by hand. - * We know that this is the first slab on the - * node for this slabcache. There are no concurrent - * accesses possible. Which simplifies things. - */ - unsigned long flags; - struct page *page; - - BUG_ON(s->size < sizeof(struct kmem_cache_node)); - local_irq_save(flags); - page = new_slab(s, gfpflags, node); - - BUG_ON(!page); - n = page->freelist; - page->freelist = *(void **)page->freelist; - page->inuse++; - local_irq_restore(flags); - } else - n = kmalloc_node(sizeof(struct kmem_cache_node), - gfpflags, node); - - if (!n) { - free_kmem_cache_nodes(s); - return 0; - } - - s->node[node] = n; - init_kmem_cache_node(n); - - if (slab_state == DOWN) - atomic_long_inc(&n->nr_slabs); - } -#else - init_kmem_cache_node(&s->local_node); -#endif + for_each_online_node(node) + init_kmem_cache_node(get_node(s, node)); return 1; } @@ -1577,7 +1515,7 @@ static int kmem_cache_open(struct kmem_c void (*ctor)(void *, struct kmem_cache *, unsigned long), void (*dtor)(void *, struct kmem_cache *, unsigned long)) { - memset(s, 0, kmem_size); + memset(s, 0, sizeof(struct kmem_cache)); s->name = name; s->ctor = ctor; s->dtor = dtor; @@ -1705,7 +1643,6 @@ static int kmem_cache_close(struct kmem_ if (atomic_long_read(&n->nr_slabs)) return 1; } - free_kmem_cache_nodes(s); return 0; } EXPORT_SYMBOL(kmem_cache_close); @@ -1733,12 +1670,8 @@ EXPORT_SYMBOL(kmem_cache_destroy); * Kmalloc subsystem *******************************************************************/ -struct kmem_cache kmalloc_caches[KMALLOC_NR_CACHES] __cacheline_aligned; -EXPORT_SYMBOL(kmalloc_caches); - -#ifdef CONFIG_ZONE_DMA -static struct kmem_cache *kmalloc_caches_dma[KMALLOC_NR_CACHES]; -#endif +struct kmem_cache slub_caches[MAX_SLAB_CACHES] __cacheline_aligned; +EXPORT_SYMBOL(slub_caches); static int __init setup_slub_min_order(char *str) { @@ -1816,7 +1749,6 @@ static struct kmem_cache *create_kmalloc flags, NULL, NULL)) goto panic; - list_add(&s->list, &slab_caches); up_write(&slub_lock); if (sysfs_slab_add(s)) goto panic; @@ -1829,7 +1761,7 @@ panic: static struct kmem_cache *get_slab(size_t size, gfp_t flags) { - int index = kmalloc_index(size) - KMALLOC_SHIFT_LOW; + int index = kmalloc_index(size); if (!size) return NULL; @@ -1840,41 +1772,30 @@ static struct kmem_cache *get_slab(size_ #ifdef CONFIG_ZONE_DMA if ((flags & SLUB_DMA)) { struct kmem_cache *s; - struct kmem_cache *x; char *text; size_t realsize; - s = kmalloc_caches_dma[index]; - if (s) - return s; + s = &slub_caches[index + KMALLOC_SHIFT_HIGH + 1]; - /* Dynamically create dma cache */ - x = kmalloc(kmem_size, flags & ~SLUB_DMA); - if (!x) - panic("Unable to allocate memory for dma cache\n"); + if (s->refcount) + return s; -#ifdef KMALLOC_EXTRA - if (index <= KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW) -#endif - realsize = 1 << (index + KMALLOC_SHIFT_LOW); -#ifdef KMALLOC_EXTRA + if (index >= KMALLOC_SHIFT_LOW && index <= KMALLOC_SHIFT_HIGH) + realsize = 1 << index; else { - index -= KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW +1; - if (!index) + if (index == 0) realsize = 96; else realsize = 192; } -#endif text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", (unsigned int)realsize); - s = create_kmalloc_cache(x, text, realsize, flags); - kmalloc_caches_dma[index] = s; + create_kmalloc_cache(s, text, realsize, flags); return s; } #endif - return &kmalloc_caches[index]; + return &slub_caches[index]; } void *__kmalloc(size_t size, gfp_t flags) @@ -1894,6 +1815,7 @@ void *__kmalloc_node(size_t size, gfp_t if (s) return kmem_cache_alloc_node(s, flags, node); + return NULL; } EXPORT_SYMBOL(__kmalloc_node); @@ -2006,43 +1928,22 @@ EXPORT_SYMBOL(krealloc); void __init kmem_cache_init(void) { int i; - int kmem_cache_node_cache = - kmalloc_index(sizeof(struct kmem_cache_node)); - BUG_ON(kmem_cache_node_cache < 0 || - kmem_cache_node_cache > KMALLOC_SHIFT_HIGH); + for(i = 0; i < MAX_SLAB_CACHES; i++) + slub_caches[i].refcount = 0; - /* - * Must first have the slab cache available for the allocations of the - * struct kmalloc_cache_node's. There is special bootstrap code in - * kmem_cache_open for slab_state == DOWN. - */ - create_kmalloc_cache(&kmalloc_caches[kmem_cache_node_cache - - KMALLOC_SHIFT_LOW], - "kmalloc", - 1 << kmem_cache_node_cache, - GFP_KERNEL); - - /* Now we are able to allocate the per node structures */ slab_state = PARTIAL; - for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { - if (i == kmem_cache_node_cache) - continue; - create_kmalloc_cache( - &kmalloc_caches[i - KMALLOC_SHIFT_LOW], - "kmalloc", 1 << i, GFP_KERNEL); - } - -#ifdef KMALLOC_EXTRA /* Caches that are not of the two-to-the-power-of size */ - create_kmalloc_cache(&kmalloc_caches - [KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW + 1], + create_kmalloc_cache(slub_caches + 1, "kmalloc-96", 96, GFP_KERNEL); - create_kmalloc_cache(&kmalloc_caches - [KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW + 2], + create_kmalloc_cache(slub_caches + 2, "kmalloc-192", 192, GFP_KERNEL); -#endif + + for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) + create_kmalloc_cache(slub_caches + i, + "kmalloc", 1 << i, GFP_KERNEL); + slab_state = UP; /* Provide the correct kmalloc names now that the caches are up */ @@ -2050,22 +1951,17 @@ void __init kmem_cache_init(void) char *name = kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); BUG_ON(!name); - kmalloc_caches[i - KMALLOC_SHIFT_LOW].name = name; + slub_caches[i].name = name; }; #ifdef CONFIG_SMP register_cpu_notifier(&slab_notifier); #endif - if (nr_cpu_ids) /* Remove when nr_cpu_ids was fixed ! */ - kmem_size = offsetof(struct kmem_cache, cpu_slab) - + nr_cpu_ids * sizeof(struct page *); - setup_slab_die(); printk(KERN_INFO "SLUB V6: General Slabs=%ld, HW alignment=%d, " "Processors=%d, Nodes=%d\n", - (unsigned long)KMALLOC_SHIFT_HIGH + KMALLOC_EXTRAS + 1 - - KMALLOC_SHIFT_LOW, + (unsigned long)KMALLOC_SHIFT_HIGH - 1, L1_CACHE_BYTES, nr_cpu_ids, nr_node_ids); @@ -2079,7 +1975,7 @@ static struct kmem_cache *find_mergeable void (*ctor)(void *, struct kmem_cache *, unsigned long), void (*dtor)(void *, struct kmem_cache *, unsigned long)) { - struct list_head *h; + struct kmem_cache *s; if (slub_nomerge || (flags & SLUB_NEVER_MERGE)) return NULL; @@ -2091,9 +1987,9 @@ static struct kmem_cache *find_mergeable align = calculate_alignment(flags, align); size = ALIGN(size, align); - list_for_each(h, &slab_caches) { - struct kmem_cache *s = - container_of(h, struct kmem_cache, list); + for(s = slub_caches; s < slub_caches + MAX_SLAB_CACHES; s++) { + if (!s->refcount) + continue; if (size > s->size) continue; @@ -2122,6 +2018,18 @@ static struct kmem_cache *find_mergeable return NULL; } +int reserved_slab(struct kmem_cache *s) +{ +#ifdef CONFIG_ZONE_DMA + if (s > slub_caches + 2 * KMALLOC_SHIFT_HIGH) + return 0; + if (s < slub_caches + KMALLOC_SHIFT_HIGH) + return 0; + return 1; +#else + return 0; +#endif +} struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align, unsigned long flags, void (*ctor)(void *, struct kmem_cache *, unsigned long), @@ -2142,16 +2050,17 @@ struct kmem_cache *kmem_cache_create(con if (sysfs_slab_alias(s, name)) goto err; } else { - s = kmalloc(kmem_size, GFP_KERNEL); - if (s && kmem_cache_open(s, GFP_KERNEL, name, + for(s = slub_caches; s < slub_caches + MAX_SLAB_CACHES; s++) + if (!s->refcount && !reserved_slab(s)) + break; + BUG_ON(s >= slub_caches + MAX_SLAB_CACHES); + if (kmem_cache_open(s, GFP_KERNEL, name, size, align, flags, ctor, dtor)) { if (sysfs_slab_add(s)) { kfree(s); goto err; } - list_add(&s->list, &slab_caches); - } else - kfree(s); + } } up_write(&slub_lock); return s; @@ -2178,17 +2087,15 @@ void *kmem_cache_zalloc(struct kmem_cach EXPORT_SYMBOL(kmem_cache_zalloc); #ifdef CONFIG_SMP -static void for_all_slabs(void (*func)(struct kmem_cache *, int), int cpu) +static void for_all_slabs(void (*func)(struct kmem_cache *, struct page **), + struct page **ppage) { - struct list_head *h; + struct kmem_cache *s; down_read(&slub_lock); - list_for_each(h, &slab_caches) { - struct kmem_cache *s = - container_of(h, struct kmem_cache, list); - - func(s, cpu); - } + for (s = slub_caches; s + MAX_SLAB_CACHES; s++) + if (s->refcount) + func(s, ppage + (s - slub_caches)); up_read(&slub_lock); } @@ -2204,7 +2111,7 @@ static int __cpuinit slab_cpuup_callback switch (action) { case CPU_UP_CANCELED: case CPU_DEAD: - for_all_slabs(__flush_cpu_slab, cpu); + for_all_slabs(__flush_cpu_slab, per_cpu(cpu_slab, cpu)); break; default: break; @@ -2402,7 +2309,7 @@ static unsigned long slab_objects(struct if (flags & SO_CPU) for_each_possible_cpu(cpu) { - struct page *page = s->cpu_slab[cpu]; + struct page *page = per_cpu(cpu_slab, cpu)[s - slub_caches]; if (page) { int x = 0; @@ -2434,7 +2341,7 @@ static int any_slab_objects(struct kmem_ int cpu; for_each_possible_cpu(cpu) - if (s->cpu_slab[cpu]) + if (per_cpu(cpu_slab, cpu)[s - slub_caches]) return 1; for_each_node(node) { @@ -2509,8 +2416,11 @@ static int slab_die_call(struct notifier { struct kmem_cache *s; - list_for_each_entry(s, &slab_caches, list) + for(s = slub_caches; s < slub_caches + MAX_SLAB_CACHES; s++) { + if (!s->refcount) + continue; validate_slab_cache(s); + } return NOTIFY_OK; } @@ -2898,7 +2808,7 @@ int sysfs_slab_alias(struct kmem_cache * int __init slab_sysfs_init(void) { int err; - struct list_head *h; + struct kmem_cache *s; err = subsystem_register(&slab_subsys); if (err) { @@ -2908,9 +2818,9 @@ int __init slab_sysfs_init(void) slab_state = SYSFS; - list_for_each(h, &slab_caches) { - struct kmem_cache *s = - container_of(h, struct kmem_cache, list); + for(s = slub_caches; s < slub_caches + MAX_SLAB_CACHES; s++) { + if (!s->refcount) + continue; err = sysfs_slab_add(s); BUG_ON(err);