slab allocators: Remove SLAB_HWCACHE_ALIGN There are two ways of specifying an alignment: 1. Explicitly 2. by setting the SLAB_HWCACHE_ALIGN option. 2 is equal to explicitly setting the alignment to L1_CACHE_BYTES. If both are set (seems that this was done just to be safe or so) then confusing situations arise. Which alignment should be applied? Then we have the usual crappy special casing in SLAB. SLAB_HWCACHE_ALIGN only means align to L1_BYTES if the object is larger than L1_BYTES / 2. So SLAB_HWCACHE_ALIGN does not always align to the cacheline. Great, another source of confusion. Remove SLAB_HWCACHE_ALIGN and require explicitly specifying the alignment of each slab cache. If we may want an alignment to a fraction of a cacheline then call the newly provided function fract_align(int size) Add SLAB_FRACT_ALIGN to specify that the slab may be aligned at a fraction of the alignment...... WTH.... Signed-off-by: Christoph Lameter Index: linux-2.6.21-rc6/include/linux/slab.h =================================================================== --- linux-2.6.21-rc6.orig/include/linux/slab.h 2007-04-16 22:24:24.000000000 -0700 +++ linux-2.6.21-rc6/include/linux/slab.h 2007-04-16 23:09:52.000000000 -0700 @@ -24,7 +24,6 @@ typedef struct kmem_cache kmem_cache_t _ #define SLAB_DEBUG_INITIAL 0x00000200UL /* DEBUG: Call constructor (as verifier) */ #define SLAB_RED_ZONE 0x00000400UL /* DEBUG: Red zone objs in a cache */ #define SLAB_POISON 0x00000800UL /* DEBUG: Poison objects */ -#define SLAB_HWCACHE_ALIGN 0x00002000UL /* Align objs on cache lines */ #define SLAB_CACHE_DMA 0x00004000UL /* Use GFP_DMA memory */ #define SLAB_STORE_USER 0x00010000UL /* DEBUG: Store the last owner for bug hunting */ #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ @@ -44,6 +43,22 @@ typedef struct kmem_cache kmem_cache_t _ void __init kmem_cache_init(void); int slab_is_available(void); +/* + * Determine alignment from a slab cache size. If the object is + * larger than half a cacheline then use cacheline alignment. + * If the object is smaller then use a fraction of a cacheline + * for the alignment. + */ +static inline unsigned long fract_cache_align(unsigned long size) +{ + unsigned long align = L1_CACHE_BYTES; + + while (size > align / 2) + align /= 2; + + return align; +} + struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, unsigned long, void (*)(void *, struct kmem_cache *, unsigned long), Index: linux-2.6.21-rc6/mm/slab.c =================================================================== --- linux-2.6.21-rc6.orig/mm/slab.c 2007-04-16 22:24:24.000000000 -0700 +++ linux-2.6.21-rc6/mm/slab.c 2007-04-16 23:06:57.000000000 -0700 @@ -139,14 +139,10 @@ /* Shouldn't this be in a header file somewhere? */ #define BYTES_PER_WORD sizeof(void *) -#ifndef cache_line_size -#define cache_line_size() L1_CACHE_BYTES -#endif - #ifndef ARCH_KMALLOC_MINALIGN /* * Enforce a minimum alignment for the kmalloc caches. - * Usually, the kmalloc caches are cache_line_size() aligned, except when + * Usually, the kmalloc caches are L1_CACHE_BYTES aligned, except when * DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned. * Some archs want to perform DMA into kmalloc caches and need a guaranteed * alignment larger than BYTES_PER_WORD. ARCH_KMALLOC_MINALIGN allows that. @@ -163,24 +159,19 @@ * If possible: Do not enable this flag for CONFIG_DEBUG_SLAB, it disables * some debug features. */ -#define ARCH_SLAB_MINALIGN 0 -#endif - -#ifndef ARCH_KMALLOC_FLAGS -#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN +#define ARCH_SLAB_MINALIGN L1_CACHE_BYTES #endif /* Legal flag mask for kmem_cache_create(). */ #if DEBUG # define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \ - SLAB_POISON | SLAB_HWCACHE_ALIGN | \ + SLAB_POISON | \ SLAB_CACHE_DMA | \ SLAB_STORE_USER | \ SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD) #else -# define CREATE_MASK (SLAB_HWCACHE_ALIGN | \ - SLAB_CACHE_DMA | \ +# define CREATE_MASK (SLAB_CACHE_DMA | \ SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD) #endif @@ -1440,7 +1431,7 @@ void __init kmem_cache_init(void) /* 1) create the cache_cache */ INIT_LIST_HEAD(&cache_chain); list_add(&cache_cache.next, &cache_chain); - cache_cache.colour_off = cache_line_size(); + cache_cache.colour_off = L1_CACHE_BYTES; cache_cache.array[smp_processor_id()] = &initarray_cache.cache; cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE]; @@ -1454,13 +1445,13 @@ void __init kmem_cache_init(void) cache_cache.obj_size = cache_cache.buffer_size; #endif cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, - cache_line_size()); + L1_CACHE_BYTES); cache_cache.reciprocal_buffer_size = reciprocal_value(cache_cache.buffer_size); for (order = 0; order < MAX_ORDER; order++) { cache_estimate(order, cache_cache.buffer_size, - cache_line_size(), 0, &left_over, &cache_cache.num); + L1_CACHE_BYTES, 0, &left_over, &cache_cache.num); if (cache_cache.num) break; } @@ -1468,7 +1459,7 @@ void __init kmem_cache_init(void) cache_cache.gfporder = order; cache_cache.colour = left_over / cache_cache.colour_off; cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + - sizeof(struct slab), cache_line_size()); + sizeof(struct slab), L1_CACHE_BYTES); /* 2+3) create the kmalloc caches */ sizes = malloc_sizes; @@ -1482,7 +1473,7 @@ void __init kmem_cache_init(void) sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name, sizes[INDEX_AC].cs_size, - ARCH_KMALLOC_MINALIGN, + fract_cache_align(sizes[INDEX_AC].cs_size), ARCH_KMALLOC_FLAGS|SLAB_PANIC, NULL, NULL); @@ -1490,7 +1481,7 @@ void __init kmem_cache_init(void) sizes[INDEX_L3].cs_cachep = kmem_cache_create(names[INDEX_L3].name, sizes[INDEX_L3].cs_size, - ARCH_KMALLOC_MINALIGN, + fract_cache_align(sizes[INDEX_L3].cs_size), ARCH_KMALLOC_FLAGS|SLAB_PANIC, NULL, NULL); } @@ -1508,7 +1499,7 @@ void __init kmem_cache_init(void) if (!sizes->cs_cachep) { sizes->cs_cachep = kmem_cache_create(names->name, sizes->cs_size, - ARCH_KMALLOC_MINALIGN, + fract_cache_align(sizes->cs_size), ARCH_KMALLOC_FLAGS|SLAB_PANIC, NULL, NULL); } @@ -1516,7 +1507,7 @@ void __init kmem_cache_init(void) sizes->cs_dmacachep = kmem_cache_create( names->name_dma, sizes->cs_size, - ARCH_KMALLOC_MINALIGN, + fract_cache_align(sizes->cs_size), ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA| SLAB_PANIC, NULL, NULL); @@ -2127,10 +2118,6 @@ static int setup_cpu_cache(struct kmem_c * * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check * for buffer overruns. - * - * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware - * cacheline. This can be beneficial if you're counting cycles as closely - * as davem. */ struct kmem_cache * kmem_cache_create (const char *name, size_t size, size_t align, @@ -2223,20 +2210,7 @@ kmem_cache_create (const char *name, siz } /* calculate the final buffer alignment: */ - - /* 1) arch recommendation: can be overridden for debug */ - if (flags & SLAB_HWCACHE_ALIGN) { - /* - * Default alignment: as specified by the arch code. Except if - * an object is really small, then squeeze multiple objects into - * one cacheline. - */ - ralign = cache_line_size(); - while (size <= ralign / 2) - ralign /= 2; - } else { - ralign = BYTES_PER_WORD; - } + ralign = BYTES_PER_WORD; /* * Redzoning and user store require word alignment. Note this will be @@ -2287,7 +2261,7 @@ kmem_cache_create (const char *name, siz } #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) if (size >= malloc_sizes[INDEX_L3 + 1].cs_size - && cachep->obj_size > cache_line_size() && size < PAGE_SIZE) { + && cachep->obj_size > L1_CACHE_BYTES && size < PAGE_SIZE) { cachep->obj_offset += PAGE_SIZE - size; size = PAGE_SIZE; } @@ -2334,7 +2308,7 @@ kmem_cache_create (const char *name, siz cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab); } - cachep->colour_off = cache_line_size(); + cachep->colour_off = L1_CACHE_BYTES; /* Offset must be a multiple of the alignment. */ if (cachep->colour_off < align) cachep->colour_off = align; Index: linux-2.6.21-rc6/include/linux/i2o.h =================================================================== --- linux-2.6.21-rc6.orig/include/linux/i2o.h 2007-04-16 22:24:24.000000000 -0700 +++ linux-2.6.21-rc6/include/linux/i2o.h 2007-04-16 23:06:57.000000000 -0700 @@ -945,7 +945,7 @@ static inline int i2o_pool_alloc(struct strcpy(pool->name, name); pool->slab = - kmem_cache_create(pool->name, size, 0, SLAB_HWCACHE_ALIGN, NULL, + kmem_cache_create(pool->name, size, fract_cache_align(size), 0, NULL, NULL); if (!pool->slab) goto free_name; Index: linux-2.6.21-rc6/kernel/fork.c =================================================================== --- linux-2.6.21-rc6.orig/kernel/fork.c 2007-04-16 22:24:24.000000000 -0700 +++ linux-2.6.21-rc6/kernel/fork.c 2007-04-16 23:06:57.000000000 -0700 @@ -1434,24 +1434,24 @@ static void sighand_ctor(void *data, str void __init proc_caches_init(void) { sighand_cachep = kmem_cache_create("sighand_cache", - sizeof(struct sighand_struct), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU, + sizeof(struct sighand_struct), fract_cache_align(sizeof(struct sighand_struct)), + SLAB_PANIC|SLAB_DESTROY_BY_RCU, sighand_ctor, NULL); signal_cachep = kmem_cache_create("signal_cache", - sizeof(struct signal_struct), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + sizeof(struct signal_struct), L1_CACHE_BYTES, + SLAB_PANIC, NULL, NULL); files_cachep = kmem_cache_create("files_cache", - sizeof(struct files_struct), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + sizeof(struct files_struct), L1_CACHE_BYTES, + SLAB_PANIC, NULL, NULL); fs_cachep = kmem_cache_create("fs_cache", - sizeof(struct fs_struct), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + sizeof(struct fs_struct), L1_CACHE_BYTES, + SLAB_PANIC, NULL, NULL); vm_area_cachep = kmem_cache_create("vm_area_struct", sizeof(struct vm_area_struct), 0, SLAB_PANIC, NULL, NULL); mm_cachep = kmem_cache_create("mm_struct", - sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + sizeof(struct mm_struct), max(ARCH_MIN_MMSTRUCT_ALIGN, L1_CACHE_BYTES), + SLAB_PANIC, NULL, NULL); } Index: linux-2.6.21-rc6/mm/slob.c =================================================================== --- linux-2.6.21-rc6.orig/mm/slob.c 2007-04-16 22:24:24.000000000 -0700 +++ linux-2.6.21-rc6/mm/slob.c 2007-04-16 23:06:57.000000000 -0700 @@ -294,10 +294,7 @@ struct kmem_cache *kmem_cache_create(con c->size = size; c->ctor = ctor; c->dtor = dtor; - /* ignore alignment unless it's forced */ - c->align = (flags & SLAB_HWCACHE_ALIGN) ? SLOB_ALIGN : 0; - if (c->align < align) - c->align = align; + c->align = align; } return c; Index: linux-2.6.21-rc6/mm/slub.c =================================================================== --- linux-2.6.21-rc6.orig/mm/slub.c 2007-04-16 22:24:24.000000000 -0700 +++ linux-2.6.21-rc6/mm/slub.c 2007-04-16 23:06:57.000000000 -0700 @@ -1500,9 +1500,6 @@ static int calculate_order(int size) static unsigned long calculate_alignment(unsigned long flags, unsigned long align) { - if (flags & SLAB_HWCACHE_ALIGN) - return max_t(unsigned long, align, L1_CACHE_BYTES); - if (align < ARCH_SLAB_MINALIGN) return ARCH_SLAB_MINALIGN; @@ -3081,12 +3078,6 @@ static ssize_t reclaim_account_store(str } SLAB_ATTR(reclaim_account); -static ssize_t hwcache_align_show(struct kmem_cache *s, char *buf) -{ - return sprintf(buf, "%d\n", !!(s->flags & SLAB_HWCACHE_ALIGN)); -} -SLAB_ATTR_RO(hwcache_align); - #ifdef CONFIG_ZONE_DMA static ssize_t cache_dma_show(struct kmem_cache *s, char *buf) { @@ -3223,7 +3214,6 @@ static struct attribute * slab_attrs[] = &align_attr.attr, &sanity_checks_attr.attr, &trace_attr.attr, - &hwcache_align_attr.attr, &reclaim_account_attr.attr, &destroy_by_rcu_attr.attr, &red_zone_attr.attr, Index: linux-2.6.21-rc6/security/keys/key.c =================================================================== --- linux-2.6.21-rc6.orig/security/keys/key.c 2007-04-16 22:24:24.000000000 -0700 +++ linux-2.6.21-rc6/security/keys/key.c 2007-04-16 23:06:57.000000000 -0700 @@ -1001,7 +1001,7 @@ void __init key_init(void) { /* allocate a slab in which we can store keys */ key_jar = kmem_cache_create("key_jar", sizeof(struct key), - 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + fract_cache_align(sizeof(struct key)), SLAB_PANIC, NULL, NULL); /* add the special key types */ list_add_tail(&key_type_keyring.link, &key_types_list); Index: linux-2.6.21-rc6/net/bridge/br_fdb.c =================================================================== --- linux-2.6.21-rc6.orig/net/bridge/br_fdb.c 2007-04-16 23:10:37.000000000 -0700 +++ linux-2.6.21-rc6/net/bridge/br_fdb.c 2007-04-16 23:11:08.000000000 -0700 @@ -31,8 +31,7 @@ void __init br_fdb_init(void) { br_fdb_cache = kmem_cache_create("bridge_fdb_cache", sizeof(struct net_bridge_fdb_entry), - 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + L1_CACHE_BYTES, 0, NULL, NULL); } void __exit br_fdb_fini(void) Index: linux-2.6.21-rc6/net/core/skbuff.c =================================================================== --- linux-2.6.21-rc6.orig/net/core/skbuff.c 2007-04-16 23:11:41.000000000 -0700 +++ linux-2.6.21-rc6/net/core/skbuff.c 2007-04-16 23:15:11.000000000 -0700 @@ -2052,14 +2052,14 @@ void __init skb_init(void) { skbuff_head_cache = kmem_cache_create("skbuff_head_cache", sizeof(struct sk_buff), - 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, + L1_CACHE_BYTES, + SLAB_PANIC, NULL, NULL); skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", (2*sizeof(struct sk_buff)) + sizeof(atomic_t), - 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, + L1_CACHE_BYTES, + SLAB_PANIC, NULL, NULL); }