Index: linux-2.6.19-mm1/mm/slub.c =================================================================== --- linux-2.6.19-mm1.orig/mm/slub.c 2006-12-12 22:50:26.119313526 -0800 +++ linux-2.6.19-mm1/mm/slub.c 2006-12-13 15:11:26.305662154 -0800 @@ -61,6 +61,11 @@ static void register_slab(struct kmem_ca static void unregister_slab(struct kmem_cache *s); #ifdef CONFIG_NUMA + +/* We need to bootstrap the slab with the active slabs in a special way */ +#define ACTIVE_SLAB_NR kmalloc_index(sizeof(struct active_slab)) +#define ACTIVE_SLAB_SLAB &kmalloc_caches[ACTIVE_SLAB_NR - KMALLOC_SHIFT_LOW] + #define ACTIVE_SLAB(__s,__cpu) ((__s)->active[__cpu]) #else #define ACTIVE_SLAB(__s,__cpu) (&(__s)->active[__cpu]) @@ -180,7 +185,8 @@ static void free_slab(struct kmem_cache */ static __always_inline void slab_lock(struct page *page) { - bit_spin_lock(PG_locked, &page->flags); +// bit_spin_lock(PG_locked, &page->flags); + BUG_ON(!bit_spin_trylock(PG_locked, &page->flags)); } static __always_inline void slab_unlock(struct page *page) @@ -555,7 +561,6 @@ static __always_inline void *__slab_allo gfp_t gfpflags, int node) { struct active_slab *a; - struct page *page; void **object; unsigned long flags; @@ -576,9 +581,10 @@ static __always_inline void *__slab_allo deactivate_slab(a); new_slab: - page = get_partial(s, gfpflags, node); - if (!page) { - page = new_slab(s, flags, node); + a->page = get_partial(s, gfpflags, node); + if (unlikely(!a->page)) { + struct page *page = new_slab(s, flags, node); + if (!page) { local_irq_restore(flags); return NULL; @@ -593,26 +599,18 @@ new_slab: return page_address(page); } + a = ACTIVE_SLAB(s, smp_processor_id()); + if (a->page) + discard_slab(s, page); + else + a->page = page; + slab_lock(page); } - if (a->page) { - slab_unlock(page); - discard_slab(s, page); - slab_lock(a->page); - } else - a->page = page; - __SetPageActive(a->page); check_free_chain(s, a->page); -#ifdef CONFIG_SMP - if (keventd_up() && !a->flush_active) { - a->flush_active = 1; - schedule_delayed_work(&a->flush, 2 * HZ); - } -#endif - switch_freelist: a->freelist = a->page->freelist; a->page->freelist = NULL; @@ -626,6 +624,12 @@ have_object: a->nr_free--; a->referenced = 1; a->freelist = object[a->page->offset]; +#ifdef CONFIG_SMP + if (!a->flush_active && keventd_up()) { + a->flush_active = 1; + schedule_delayed_work(&a->flush, 2 * HZ); + } +#endif local_irq_restore(flags); return object; } @@ -637,6 +641,30 @@ void *kmem_cache_alloc(struct kmem_cache EXPORT_SYMBOL(kmem_cache_alloc); #ifdef CONFIG_NUMA +/* + * Bootstrap function to allow the allocation of active_slabs without + * having active slabs yet. + */ +static void * __init early_active_slab_alloc(int node) +{ + struct kmem_cache *s = ACTIVE_SLAB_SLAB; + struct page *page; + void **object; + + page = get_partial(s, GFP_KERNEL, node); + if (!page) { + page = new_slab(s, GFP_KERNEL, node); + + BUG_ON(!page); + slab_lock(page); + } + object = page->freelist; + page->freelist = object[s ->offset]; + page->inuse++; + putback_slab(s, page); + return object; +} + void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) { return __slab_alloc(s, gfpflags, node); @@ -836,6 +864,26 @@ static int calculate_order(int size) return order; } +#ifdef CONFIG_NUMA +/* + * Boot strap slabs + */ + +/* + * We can actually operate slabs any time after the page allocator is up. + * slab_is_available() merely means that the kmalloc array is available. + * + * However, be aware that deriving allocators depends on kmalloc being + * functional. + */ +static enum {DOWN, PARTIAL, UP } slab_state = DOWN; + +int slab_is_available(void) { + return slab_state == UP; +} + +#endif + int kmem_cache_open(struct kmem_cache *s, const char *name, size_t size, size_t align, unsigned long flags, @@ -900,19 +948,25 @@ int kmem_cache_open(struct kmem_cache *s struct active_slab *a; #ifdef CONFIG_NUMA - s->active[cpu] = a = kmalloc(sizeof(struct active_slab), GFP_KERNEL); + if (slab_state == DOWN) { + BUG_ON(s != ACTIVE_SLAB_SLAB); + a = early_active_slab_alloc(cpu_to_node(cpu)); + } else + a = kmem_cache_alloc_node(ACTIVE_SLAB_SLAB, + GFP_KERNEL, cpu_to_node(cpu)); + s->active[cpu] = a; #else a = ACTIVE_SLAB(s, cpu); #endif a->page = NULL; a->slab = s; + a->referenced = 0; #ifdef CONFIG_SMP a->flush_active = 0; INIT_DELAYED_WORK(&a->flush, check_flush_active); #endif } - register_slab(s); return 1; @@ -1095,6 +1149,8 @@ static int free_list(struct kmem_cache * */ int kmem_cache_close(struct kmem_cache *s) { + int cpu; + if (!atomic_dec_and_test(&s->refcount)) return 0; @@ -1193,9 +1249,12 @@ static int __init setup_slab_min_order(c __setup("slab_min_order=", setup_slab_min_order); static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s, - const char *name, int size) + const char *name, int size) { + if (s->size) + return s; + if (!kmem_cache_open(s, name, size, ARCH_KMALLOC_MINALIGN, 0, NULL, NULL)) panic("Creation of kmalloc slab %s size=%d failed.\n", @@ -1207,7 +1266,6 @@ static struct kmem_cache *get_slab(size_ { int index = kmalloc_index(size) - KMALLOC_SHIFT_LOW; struct kmem_cache *s; - struct kmem_cache *x; size_t realsize; BUG_ON(size < 0); @@ -1220,9 +1278,9 @@ static struct kmem_cache *get_slab(size_ return s; /* Dynamically create dma cache */ - x = kmalloc(sizeof(struct kmem_cache), flags & ~(__GFP_DMA)); + s = kmalloc(sizeof(struct kmem_cache), flags & ~(__GFP_DMA)); - if (!x) + if (!s) panic("Unable to allocate memory for dma cache\n"); #ifdef KMALLOC_EXTRA @@ -1236,7 +1294,9 @@ static struct kmem_cache *get_slab(size_ realsize = 192; #endif - s = create_kmalloc_cache(x, "kmalloc_dma", realsize); + create_kmalloc_cache(s, + kasprintf(flags, "kmalloc-dma-%ld", realsize), + realsize); kmalloc_caches_dma[index] = s; return s; } @@ -1274,22 +1334,50 @@ void kfree(const void *object) } EXPORT_SYMBOL(kfree); -void __init kmalloc_init(void) +void __init kmem_cache_init(void) { int i; + char *bootname = "kmalloc"; + +#ifdef CONFIG_NUMA + + /* + * NUMA Bootstrap only works if the active slab does not use a + * EXTRA slab. + */ + BUG_ON(ACTIVE_SLAB_NR > KMALLOC_SHIFT_HIGH || ACTIVE_SLAB_NR < 0); + + create_kmalloc_cache(ACTIVE_SLAB_SLAB, + "active_slab", + 1 << ACTIVE_SLAB_NR); + slab_state = PARTIAL; +#endif - for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { - create_kmalloc_cache( + for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) + create_kmalloc_cache( &kmalloc_caches[i - KMALLOC_SHIFT_LOW], - "kmalloc", 1 << i); - } + bootname, + 1 << i); + #ifdef KMALLOC_EXTRA /* Non-power of two caches */ create_kmalloc_cache(&kmalloc_caches - [KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW + 1], "kmalloc", 96); + [KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW + 1], bootname, 96); create_kmalloc_cache(&kmalloc_caches - [KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW + 2], "kmalloc", 192); + [KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW + 2], bootname, 192); #endif + slab_state = UP; + + /* We can provide the kmalloc names now that the caches are up */ + for (i = 0; i < KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW + + KMALLOC_EXTRAS; + i++) + kmalloc_caches[i].name = kasprintf(GFP_KERNEL, "kmalloc-%d", + kmalloc_caches[i].size); + printk(KERN_INFO "Kmalloc cache initialized: Caches %d." + " Min_order %d.\n", + KMALLOC_SHIFT_HIGH - KMALLOC_SHIFT_LOW + KMALLOC_EXTRAS, + slab_min_order); } /******************************************************************** @@ -1298,25 +1386,6 @@ void __init kmalloc_init(void) #define SLAB_MAX_ORDER 4 -/* - * We can actually operate slabs any time after the page allocator is up. - * slab_is_available() merely means that the kmalloc array is available. - * - * However, be aware that deriving allocators depends on kmalloc being - * functional. - */ -static int slab_up = 0; - -int slab_is_available(void) { - return slab_up; -} - -void kmem_cache_init(void) -{ - kmalloc_init(); - slab_up = 1; -} - static struct kmem_cache *__kmalloc_slab(size_t size) { int index = kmalloc_index(size) - KMALLOC_SHIFT_LOW; @@ -1351,7 +1420,7 @@ struct kmem_cache *kmem_cache_create(con * a new slab. */ if (s->size - sz <= sizeof(void *)) { - printk(KERN_INFO "SLUB: Merging slab_cache %s size %d" + printk(KERN_INFO "SLUB: Merging slab_cache %s size %ld" " into kmalloc array size %d\n", name, size, s->size); return kmem_cache_dup(s);