--- include/asm-generic/local.h | 25 +++++++++++++++++++++++-- include/asm-x86_64/local.h | 16 ++++++++++++++++ mm/slub.c | 14 ++++++++------ mm/vmstat.c | 29 +++++++++++++++++++---------- 4 files changed, 66 insertions(+), 18 deletions(-) Index: linux-2.6.22-rc6-mm1/include/asm-generic/local.h =================================================================== --- linux-2.6.22-rc6-mm1.orig/include/asm-generic/local.h 2007-07-12 19:10:00.000000000 -0700 +++ linux-2.6.22-rc6-mm1/include/asm-generic/local.h 2007-07-12 19:36:40.000000000 -0700 @@ -46,13 +46,34 @@ typedef struct #define local_add_unless(l, a, u) atomic_long_add_unless((&(l)->a), (a), (u)) #define local_inc_not_zero(l) atomic_long_inc_not_zero(&(l)->a) -/* Non-atomic variants, ie. preemption disabled and won't be touched - * in interrupt, etc. Some archs can optimize this case well. */ +/* + * Establish a state necessary for __local_xx functions to work. + */ +#define __local_begin(flags) local_irq_disable(flags) + +static inline void __local_end(unsigned long flags) +{ + local_irq_restore(flags); +} + +/* + * Non-atomic variants, ie. within local_begin() / local_end() or + * preempt_disable / enable() and won't be touched in interrupt, etc. + * Some archs can optimize this case well. + */ #define __local_inc(l) local_set((l), local_read(l) + 1) #define __local_dec(l) local_set((l), local_read(l) - 1) #define __local_add(i,l) local_set((l), local_read(l) + (i)) #define __local_sub(i,l) local_set((l), local_read(l) - (i)) +#define __local_cmpxchg((v), (o), (n)) (*(v) = (n), (o)) +#define __local_xchg((v), (n)) \ +({ \ + __typeof(v) x = *(v); \ + *(v) = (n); \ + x; \ +)} + /* Use these for per-cpu local_t variables: on some archs they are * much more efficient than these naive implementations. Note they take * a variable (eg. mystruct.foo), not an address. Index: linux-2.6.22-rc6-mm1/include/asm-x86_64/local.h =================================================================== --- linux-2.6.22-rc6-mm1.orig/include/asm-x86_64/local.h 2007-07-12 19:05:39.000000000 -0700 +++ linux-2.6.22-rc6-mm1/include/asm-x86_64/local.h 2007-07-12 19:43:24.000000000 -0700 @@ -9,6 +9,7 @@ typedef struct atomic_long_t a; } local_t; + #define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } #define local_read(l) atomic_long_read(&(l)->a) @@ -181,11 +182,26 @@ static __inline__ long local_sub_return( /* On x86-64 these are better than the atomic variants on SMP kernels because they dont use a lock prefix. */ + +#define __local_begin(__flags) \ +{ \ + (__flags) = 0; \ + preempt_disable(); \ +} + +static inline void __local_end(unsigned long flags) { + preempt_enable(); +} + #define __local_inc(l) local_inc(l) #define __local_dec(l) local_dec(l) #define __local_add(i,l) local_add((i),(l)) #define __local_sub(i,l) local_sub((i),(l)) +#define __local_cmpxchg cmpxchg_local +#define __local_xchg xchg + + /* Use these for per-cpu local_t variables: on some archs they are * much more efficient than these naive implementations. Note they take * a variable, not an address. Index: linux-2.6.22-rc6-mm1/mm/slub.c =================================================================== --- linux-2.6.22-rc6-mm1.orig/mm/slub.c 2007-07-12 19:08:11.000000000 -0700 +++ linux-2.6.22-rc6-mm1/mm/slub.c 2007-07-12 19:29:00.000000000 -0700 @@ -1565,18 +1565,19 @@ static void __always_inline *slab_alloc( { void **object; struct kmem_cache_cpu *c; + unsigned long flags; - preempt_disable(); + __local_begin(flags); c = get_cpu_slab(s, smp_processor_id()); redo: object = c->freelist; if (unlikely(!object || !node_match(c, node))) return __slab_alloc(s, gfpflags, node, addr, c); - if (cmpxchg_local(&c->freelist, object, object[c->offset]) != object) + if (__local_cmpxchg(&c->freelist, object, object[c->offset]) != object) goto redo; - preempt_enable(); + __local_end(flags); if (unlikely((gfpflags & __GFP_ZERO))) memset(object, 0, c->objsize); @@ -1678,8 +1679,9 @@ static void __always_inline slab_free(st void **object = (void *)x; struct kmem_cache_cpu *c; void **freelist; + unsigned long flags; - preempt_disable(); + __local_begin(flags); c = get_cpu_slab(s, smp_processor_id()); redo: freelist = c->freelist; @@ -1687,10 +1689,10 @@ redo: return __slab_free(s, page, x, addr, c->offset); object[c->offset] = freelist; - if (cmpxchg_local(&c->freelist, freelist, object) != freelist) + if (__local_cmpxchg(&c->freelist, freelist, object) != freelist) goto redo; - preempt_enable(); + __local_end(flags); } void kmem_cache_free(struct kmem_cache *s, void *x) Index: linux-2.6.22-rc6-mm1/mm/vmstat.c =================================================================== --- linux-2.6.22-rc6-mm1.orig/mm/vmstat.c 2007-07-12 19:06:58.000000000 -0700 +++ linux-2.6.22-rc6-mm1/mm/vmstat.c 2007-07-12 19:32:32.000000000 -0700 @@ -187,9 +187,11 @@ EXPORT_SYMBOL(__mod_zone_page_state); void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, int delta) { - preempt_disable(); + unsigned long flags; + + __local_begin(flags); __mod_zone_page_state(zone, item, delta); - preempt_enable(); + __local_end(flags); } EXPORT_SYMBOL(mod_zone_page_state); @@ -280,27 +282,32 @@ EXPORT_SYMBOL(__dec_zone_page_state); void inc_zone_state(struct zone *zone, enum zone_stat_item item) { - preempt_disable(); + unsigned long flags; + + __local_begin(flags); __inc_zone_state(zone, item); - preempt_enable(); + __local_end(flags); } void inc_zone_page_state(struct page *page, enum zone_stat_item item) { struct zone *zone; + unsigned long flags; zone = page_zone(page); - preempt_disable(); + __local_begin(flags); __inc_zone_state(zone, item); - preempt_enable(); + __local_end(flags); } EXPORT_SYMBOL(inc_zone_page_state); void dec_zone_page_state(struct page *page, enum zone_stat_item item) { - preempt_disable(); + unsigned long flags; + + __local_begin(flags); __dec_zone_page_state(page, item); - preempt_enable(); + __local_end(flags); } EXPORT_SYMBOL(dec_zone_page_state); @@ -325,15 +332,16 @@ void refresh_cpu_vm_stats(int cpu) for_each_zone(zone) { struct per_cpu_pageset *p; + unsigned long flags; if (!populated_zone(zone)) continue; p = zone_pcp(zone, cpu); - + __local_begin(flags); for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) if (p->vm_stat_diff[i]) { - int diff = xchg(&p->vm_stat_diff[i], 0); + int diff = __local_xchg(&p->vm_stat_diff[i], 0); zone_page_state_add(diff, zone, i); #ifdef CONFIG_NUMA @@ -342,6 +350,7 @@ void refresh_cpu_vm_stats(int cpu) #endif } #ifdef CONFIG_NUMA + __local_end(flags); /* * Deal with draining the remote pageset of this * processor