Index: linux-2.6.21-rc5/include/linux/mmzone.h =================================================================== --- linux-2.6.21-rc5.orig/include/linux/mmzone.h 2007-03-26 20:07:51.000000000 -0700 +++ linux-2.6.21-rc5/include/linux/mmzone.h 2007-03-28 20:34:54.000000000 -0700 @@ -24,6 +24,15 @@ #endif #define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1)) +#define MAX_SLAB_CACHES 256 + +struct kmem_cache_node { + spinlock_t list_lock; /* Protect partial list and nr_partial */ + unsigned long nr_partial; + atomic_long_t nr_slabs; + struct list_head partial; +}; + struct free_area { struct list_head free_list; unsigned long nr_free; @@ -449,6 +458,9 @@ wait_queue_head_t kswapd_wait; struct task_struct *kswapd; int kswapd_max_order; +#ifdef CONFIG_SLUB + struct kmem_cache_node slabs[MAX_SLAB_CACHES]; +#endif } pg_data_t; #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) Index: linux-2.6.21-rc5/include/linux/slub_def.h =================================================================== --- linux-2.6.21-rc5.orig/include/linux/slub_def.h 2007-03-28 20:19:19.000000000 -0700 +++ linux-2.6.21-rc5/include/linux/slub_def.h 2007-03-28 20:34:54.000000000 -0700 @@ -11,13 +11,6 @@ #include #include -struct kmem_cache_node { - spinlock_t list_lock; /* Protect partial list and nr_partial */ - unsigned long nr_partial; - atomic_long_t nr_slabs; - struct list_head partial; -}; - /* * Slab cache management. */ @@ -29,13 +22,6 @@ int offset; /* Free pointer offset. */ atomic_t cpu_slabs; /* != 0 -> flusher scheduled. */ int defrag_ratio; - - /* - * Avoid an extra cache line for UP, SMP and for the node local to - * struct kmem_cache. - */ - struct kmem_cache_node local_node; - /* Allocation and freeing of slabs */ unsigned int order; int objects; /* Number of objects in slab */ @@ -52,10 +38,6 @@ struct delayed_work flush; struct mutex flushing; #endif -#ifdef CONFIG_NUMA - struct kmem_cache_node *node[MAX_NUMNODES]; -#endif - struct page *cpu_slab[NR_CPUS]; }; /* @@ -72,13 +54,11 @@ #define KMALLOC_EXTRAS 0 #endif -#define KMALLOC_NR_CACHES (KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW \ - + 1 + KMALLOC_EXTRAS) /* * We keep the general caches in an array of slab caches that are used for * 2^x bytes of allocations. */ -extern struct kmem_cache kmalloc_caches[KMALLOC_NR_CACHES]; +extern struct kmem_cache slub_caches[MAX_SLAB_CACHES]; /* * Sorry that the following has to be that ugly but some versions of GCC @@ -86,12 +66,10 @@ */ static inline int kmalloc_index(int size) { -#ifdef KMALLOC_EXTRA if (size > 64 && size <= 96) - return KMALLOC_SHIFT_HIGH + 1; + return 1; if (size > 128 && size <= 192) - return KMALLOC_SHIFT_HIGH + 2; -#endif + return 2; if (size <= 8) return 3; if (size <= 16) return 4; if (size <= 32) return 5; @@ -128,7 +106,7 @@ */ static inline struct kmem_cache *kmalloc_slab(size_t size) { - int index = kmalloc_index(size) - KMALLOC_SHIFT_LOW; + int index = kmalloc_index(size); if (index < 0) { /* @@ -138,7 +116,7 @@ extern void __kmalloc_size_too_large(void); __kmalloc_size_too_large(); } - return &kmalloc_caches[index]; + return slub_caches + index; } #ifdef CONFIG_ZONE_DMA Index: linux-2.6.21-rc5/mm/slub.c =================================================================== --- linux-2.6.21-rc5.orig/mm/slub.c 2007-03-28 20:19:21.000000000 -0700 +++ linux-2.6.21-rc5/mm/slub.c 2007-03-28 20:52:51.000000000 -0700 @@ -103,8 +103,6 @@ /* Internal SLUB flags */ #define __OBJECT_POISON 0x80000000 /* Poison object */ -static int kmem_size = sizeof(struct kmem_cache); - #ifdef CONFIG_SMP static struct notifier_block slab_notifier; #endif @@ -122,7 +120,8 @@ /* A list of all slab caches on the system */ static DECLARE_RWSEM(slub_lock); -LIST_HEAD(slab_caches); + +static DEFINE_PER_CPU(struct page *, cpu_slab)[MAX_SLAB_CACHES]; #ifdef CONFIG_SYSFS static int sysfs_slab_add(struct kmem_cache *); @@ -138,13 +137,14 @@ * Core slab cache functions *******************************************************************/ -struct kmem_cache_node *get_node(struct kmem_cache *s, int node) +static inline struct page **get_cpu_slab(struct kmem_cache *s) { -#ifdef CONFIG_NUMA - return s->node[node]; -#else - return &s->local_node; -#endif + return &__get_cpu_var(cpu_slab)[s - slub_caches]; +} + +static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) +{ + return &NODE_DATA(node)->slabs[s - slub_caches]; } /* @@ -1009,39 +1009,39 @@ * Remove the cpu slab */ static void __always_inline deactivate_slab(struct kmem_cache *s, - struct page *page, int cpu) + struct page *page, struct page **ppage) { - s->cpu_slab[cpu] = NULL; + *ppage = NULL; ClearPageActive(page); ClearPageReferenced(page); putback_slab(s, page); } -static void flush_slab(struct kmem_cache *s, struct page *page, int cpu) +static void flush_slab(struct kmem_cache *s, struct page *page, struct page **ppage) { slab_lock(page); - deactivate_slab(s, page, cpu); + deactivate_slab(s, page, ppage); } /* * Flush cpu slab. * Called from IPI handler with interrupts disabled. */ -static void __flush_cpu_slab(struct kmem_cache *s, int cpu) +static void __flush_cpu_slab(struct kmem_cache *s, struct page **ppage) { - struct page *page = s->cpu_slab[cpu]; + struct page *page = *ppage; if (likely(page)) - flush_slab(s, page, cpu); + flush_slab(s, page, ppage); } static void flush_cpu_slab(void *d) { struct kmem_cache *s = d; - int cpu = smp_processor_id(); + struct page **ppage = get_cpu_slab(s); - __flush_cpu_slab(s, cpu); + __flush_cpu_slab(s, ppage); } #ifdef CONFIG_SMP @@ -1051,13 +1051,13 @@ static void check_flush_cpu_slab(void *private) { struct kmem_cache *s = private; - int cpu = smp_processor_id(); - struct page *page = s->cpu_slab[cpu]; + struct page **ppage = get_cpu_slab(s); + struct page *page = *ppage; if (page) { if (!TestClearPageReferenced(page)) return; - flush_slab(s, page, cpu); + flush_slab(s, page, ppage); } atomic_dec(&s->cpu_slabs); } @@ -1117,13 +1117,13 @@ gfp_t gfpflags, int node) { struct page *page; + struct page **ppage; void **object; unsigned long flags; - int cpu; local_irq_save(flags); - cpu = smp_processor_id(); - page = s->cpu_slab[cpu]; + ppage = get_cpu_slab(s); + page = *ppage; if (!page) goto new_slab; @@ -1148,7 +1148,7 @@ return object; another_slab: - deactivate_slab(s, page, cpu); + deactivate_slab(s, page, ppage); new_slab: page = get_partial(s, gfpflags, node); @@ -1165,29 +1165,29 @@ return page_address(page); } - if (s->cpu_slab[cpu]) { + if (*ppage) { /* * Someone else populated the cpu_slab while * we enabled interrupts. The page may not * be on the required node. */ if (node == -1 || - page_to_nid(s->cpu_slab[cpu]) == node) { + page_to_nid(*ppage) == node) { /* * Current cpuslab is acceptable and we * want the current one since its cache hot */ discard_slab(s, page); - page = s->cpu_slab[cpu]; + page = *ppage; slab_lock(page); goto redo; } - flush_slab(s, s->cpu_slab[cpu], cpu); + flush_slab(s, *ppage, ppage); } slab_lock(page); } - s->cpu_slab[cpu] = page; + *ppage = page; SetPageActive(page); #ifdef CONFIG_SMP @@ -1398,20 +1398,6 @@ return ALIGN(align, sizeof(void *)); } -static void free_kmem_cache_nodes(struct kmem_cache *s) -{ -#ifdef CONFIG_NUMA - int node; - - for_each_online_node(node) { - struct kmem_cache_node *n = s->node[node]; - if (n && n != &s->local_node) - kfree(n); - s->node[node] = NULL; - } -#endif -} - static void init_kmem_cache_node(struct kmem_cache_node *n) { memset(n, 0, sizeof(struct kmem_cache_node)); @@ -1422,58 +1408,10 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) { -#ifdef CONFIG_NUMA int node; - int local_node; - - if (slab_state >= UP) - local_node = page_to_nid(virt_to_page(s)); - else - local_node = 0; - - for_each_online_node(node) { - struct kmem_cache_node *n; - - if (local_node == node) - n = &s->local_node; - else - if (slab_state == DOWN) { - /* - * No kmalloc_node yet so do it by hand. - * We know that this is the first slab on the - * node for this slabcache. There are no concurrent - * accesses possible. Which simplifies things. - */ - unsigned long flags; - struct page *page; - - BUG_ON(s->size < sizeof(struct kmem_cache_node)); - local_irq_save(flags); - page = new_slab(s, gfpflags, node); - - BUG_ON(!page); - n = page->freelist; - page->freelist = *(void **)page->freelist; - page->inuse++; - local_irq_restore(flags); - } else - n = kmalloc_node(sizeof(struct kmem_cache_node), - gfpflags, node); - - if (!n) { - free_kmem_cache_nodes(s); - return 0; - } - - s->node[node] = n; - init_kmem_cache_node(n); - if (slab_state == DOWN) - atomic_long_inc(&n->nr_slabs); - } -#else - init_kmem_cache_node(&s->local_node); -#endif + for_each_online_node(node) + init_kmem_cache_node(get_node(s, node)); return 1; } @@ -1555,7 +1493,7 @@ void (*ctor)(void *, struct kmem_cache *, unsigned long), void (*dtor)(void *, struct kmem_cache *, unsigned long)) { - memset(s, 0, kmem_size); + memset(s, 0, sizeof(struct kmem_cache)); s->name = name; s->ctor = ctor; s->dtor = dtor; @@ -1683,7 +1621,6 @@ if (atomic_long_read(&n->nr_slabs)) return 1; } - free_kmem_cache_nodes(s); return 0; } EXPORT_SYMBOL(kmem_cache_close); @@ -1711,12 +1648,8 @@ * Kmalloc subsystem *******************************************************************/ -struct kmem_cache kmalloc_caches[KMALLOC_NR_CACHES] __cacheline_aligned; -EXPORT_SYMBOL(kmalloc_caches); - -#ifdef CONFIG_ZONE_DMA -static struct kmem_cache *kmalloc_caches_dma[KMALLOC_NR_CACHES]; -#endif +struct kmem_cache slub_caches[MAX_SLAB_CACHES] __cacheline_aligned; +EXPORT_SYMBOL(slub_caches); static int __init setup_slub_min_order(char *str) { @@ -1794,7 +1727,6 @@ flags, NULL, NULL)) goto panic; - list_add(&s->list, &slab_caches); up_write(&slub_lock); if (sysfs_slab_add(s)) goto panic; @@ -1807,7 +1739,7 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags) { - int index = kmalloc_index(size) - KMALLOC_SHIFT_LOW; + int index = kmalloc_index(size); if (!size) return NULL; @@ -1818,41 +1750,30 @@ #ifdef CONFIG_ZONE_DMA if ((flags & SLUB_DMA)) { struct kmem_cache *s; - struct kmem_cache *x; char *text; size_t realsize; - s = kmalloc_caches_dma[index]; - if (s) - return s; + s = &slub_caches[index + KMALLOC_SHIFT_HIGH + 1]; - /* Dynamically create dma cache */ - x = kmalloc(kmem_size, flags & ~SLUB_DMA); - if (!x) - panic("Unable to allocate memory for dma cache\n"); + if (s->refcount) + return s; -#ifdef KMALLOC_EXTRA - if (index <= KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW) -#endif - realsize = 1 << (index + KMALLOC_SHIFT_LOW); -#ifdef KMALLOC_EXTRA + if (index >= KMALLOC_SHIFT_LOW && index <= KMALLOC_SHIFT_HIGH) + realsize = 1 << index; else { - index -= KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW +1; - if (!index) + if (index == 0) realsize = 96; else realsize = 192; } -#endif text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", (unsigned int)realsize); - s = create_kmalloc_cache(x, text, realsize, flags); - kmalloc_caches_dma[index] = s; + create_kmalloc_cache(s, text, realsize, flags); return s; } #endif - return &kmalloc_caches[index]; + return &slub_caches[index]; } void *__kmalloc(size_t size, gfp_t flags) @@ -1872,6 +1793,7 @@ if (s) return kmem_cache_alloc_node(s, flags, node); + return NULL; } EXPORT_SYMBOL(__kmalloc_node); @@ -1984,43 +1906,22 @@ void __init kmem_cache_init(void) { int i; - int kmem_cache_node_cache = - kmalloc_index(sizeof(struct kmem_cache_node)); - - BUG_ON(kmem_cache_node_cache < 0 || - kmem_cache_node_cache > KMALLOC_SHIFT_HIGH); - /* - * Must first have the slab cache available for the allocations of the - * struct kmalloc_cache_node's. There is special bootstrap code in - * kmem_cache_open for slab_state == DOWN. - */ - create_kmalloc_cache(&kmalloc_caches[kmem_cache_node_cache - - KMALLOC_SHIFT_LOW], - "kmalloc", - 1 << kmem_cache_node_cache, - GFP_KERNEL); + for(i = 0; i < MAX_SLAB_CACHES; i++) + slub_caches[i].refcount = 0; - /* Now we are able to allocate the per node structures */ slab_state = PARTIAL; - for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { - if (i == kmem_cache_node_cache) - continue; - create_kmalloc_cache( - &kmalloc_caches[i - KMALLOC_SHIFT_LOW], - "kmalloc", 1 << i, GFP_KERNEL); - } - -#ifdef KMALLOC_EXTRA /* Caches that are not of the two-to-the-power-of size */ - create_kmalloc_cache(&kmalloc_caches - [KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW + 1], + create_kmalloc_cache(slub_caches + 1, "kmalloc-96", 96, GFP_KERNEL); - create_kmalloc_cache(&kmalloc_caches - [KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW + 2], + create_kmalloc_cache(slub_caches + 2, "kmalloc-192", 192, GFP_KERNEL); -#endif + + for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) + create_kmalloc_cache(slub_caches + i, + "kmalloc", 1 << i, GFP_KERNEL); + slab_state = UP; /* Provide the correct kmalloc names now that the caches are up */ @@ -2028,20 +1929,15 @@ char *name = kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); BUG_ON(!name); - kmalloc_caches[i - KMALLOC_SHIFT_LOW].name = name; + slub_caches[i].name = name; }; #ifdef CONFIG_SMP register_cpu_notifier(&slab_notifier); #endif - if (nr_cpu_ids) /* Remove when nr_cpu_ids was fixed ! */ - kmem_size = offsetof(struct kmem_cache, cpu_slab) - + nr_cpu_ids * sizeof(struct page *); - printk(KERN_INFO "SLUB V6: General Slabs=%ld, HW alignment=%d, " "Processors=%d, Nodes=%d\n", - (unsigned long)KMALLOC_SHIFT_HIGH + KMALLOC_EXTRAS + 1 - - KMALLOC_SHIFT_LOW, + (unsigned long)KMALLOC_SHIFT_HIGH -1, L1_CACHE_BYTES, nr_cpu_ids, nr_node_ids); @@ -2055,7 +1951,7 @@ void (*ctor)(void *, struct kmem_cache *, unsigned long), void (*dtor)(void *, struct kmem_cache *, unsigned long)) { - struct list_head *h; + struct kmem_cache *s; if (slub_nomerge || (flags & SLUB_NEVER_MERGE)) return NULL; @@ -2067,9 +1963,9 @@ align = calculate_alignment(flags, align); size = ALIGN(size, align); - list_for_each(h, &slab_caches) { - struct kmem_cache *s = - container_of(h, struct kmem_cache, list); + for(s = slub_caches; s < slub_caches + MAX_SLAB_CACHES; s++) { + if (!s->refcount) + continue; if (size > s->size) continue; @@ -2098,6 +1994,18 @@ return NULL; } +int reserved_slab(struct kmem_cache *s) +{ +#ifdef CONFIG_ZONE_DMA + if (s > slub_caches + 2 * KMALLOC_SHIFT_HIGH) + return 0; + if (s < slub_caches + KMALLOC_SHIFT_HIGH) + return 0; + return 1; +#else + return 0; +#endif +} struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align, unsigned long flags, void (*ctor)(void *, struct kmem_cache *, unsigned long), @@ -2118,16 +2026,17 @@ if (sysfs_slab_alias(s, name)) goto err; } else { - s = kmalloc(kmem_size, GFP_KERNEL); - if (s && kmem_cache_open(s, GFP_KERNEL, name, + for(s = slub_caches; s < slub_caches + MAX_SLAB_CACHES; s++) + if (!s->refcount && !reserved_slab(s)) + break; + BUG_ON(s >= slub_caches + MAX_SLAB_CACHES); + if (kmem_cache_open(s, GFP_KERNEL, name, size, align, flags, ctor, dtor)) { if (sysfs_slab_add(s)) { kfree(s); goto err; } - list_add(&s->list, &slab_caches); - } else - kfree(s); + } } up_write(&slub_lock); return s; @@ -2154,17 +2063,15 @@ EXPORT_SYMBOL(kmem_cache_zalloc); #ifdef CONFIG_SMP -static void for_all_slabs(void (*func)(struct kmem_cache *, int), int cpu) +static void for_all_slabs(void (*func)(struct kmem_cache *, struct page **), + struct page **ppage) { - struct list_head *h; + struct kmem_cache *s; down_read(&slub_lock); - list_for_each(h, &slab_caches) { - struct kmem_cache *s = - container_of(h, struct kmem_cache, list); - - func(s, cpu); - } + for (s = slub_caches; s + MAX_SLAB_CACHES; s++) + if (s->refcount) + func(s, ppage + (s - slub_caches)); up_read(&slub_lock); } @@ -2180,7 +2087,7 @@ switch (action) { case CPU_UP_CANCELED: case CPU_DEAD: - for_all_slabs(__flush_cpu_slab, cpu); + for_all_slabs(__flush_cpu_slab, per_cpu(cpu_slab, cpu)); break; default: break; @@ -2378,7 +2285,7 @@ if (flags & SO_CPU) for_each_possible_cpu(cpu) { - struct page *page = s->cpu_slab[cpu]; + struct page *page = per_cpu(cpu_slab, cpu)[s - slub_caches]; if (page) { int x = 0; @@ -2410,7 +2317,7 @@ int cpu; for_each_possible_cpu(cpu) - if (s->cpu_slab[cpu]) + if (per_cpu(cpu_slab, cpu)[s - slub_caches]) return 1; for_each_node(node) { @@ -2813,7 +2720,7 @@ int __init slab_sysfs_init(void) { int err; - struct list_head *h; + struct kmem_cache *s; err = subsystem_register(&slab_subsys); if (err) { @@ -2823,9 +2730,9 @@ slab_state = SYSFS; - list_for_each(h, &slab_caches) { - struct kmem_cache *s = - container_of(h, struct kmem_cache, list); + for(s = slub_caches; s < slub_caches + MAX_SLAB_CACHES; s++) { + if (!s->refcount) + continue; err = sysfs_slab_add(s); BUG_ON(err);