Index: linux-2.6.17-rc6-mm2/include/asm-x86_64/local.h =================================================================== --- linux-2.6.17-rc6-mm2.orig/include/asm-x86_64/local.h 2006-06-12 09:08:24.284596533 -0700 +++ linux-2.6.17-rc6-mm2/include/asm-x86_64/local.h 2006-06-12 09:14:52.730280444 -0700 @@ -45,29 +45,34 @@ static inline void local_sub(long i, loc :"ir" (i), "m" (v->counter)); } -/* On x86-64 these are better than the atomic variants on SMP kernels - because they dont use a lock prefix. */ +/* + * Use these for per-cpu local_t variables: on some archs they are + * much more efficient than these naive implementations. Note they take + * a variable (eg. mystruct.foo), not an address. + * + * This could be done better if we moved the per cpu data directly + * after GS. + */ +#define cpu_local_read(v) local_read(&per_cpu(v, raw_smp_processor_id())) +#define cpu_local_set(v, i) local_set(&per_cpu(v, raw_smp_processor_id()), (i)) +#define cpu_local_inc(v) local_inc(&per_cpu(v, raw_smp_processor_id())) +#define cpu_local_dec(v) local_dec(&per_cpu(v, raw_smp_processor_id())) +#define cpu_local_add(i, v) local_add((i), &per_cpu(v, raw_smp_processor_id())) +#define cpu_local_sub(i, v) local_sub((i), &per_cpu(v, raw_smp_processor_id())) + +/* + * Non-atomic increments, ie. preemption disabled and won't be touched + * in interrupt, etc. Some archs can optimize this case well. + */ #define __local_inc(l) local_inc(l) #define __local_dec(l) local_dec(l) #define __local_add(i,l) local_add((i),(l)) #define __local_sub(i,l) local_sub((i),(l)) -/* Use these for per-cpu local_t variables: on some archs they are - * much more efficient than these naive implementations. Note they take - * a variable, not an address. - */ -#define cpu_local_read(v) local_read(&__get_cpu_var(v)) -#define cpu_local_set(v, i) local_set(&__get_cpu_var(v), (i)) -#define cpu_local_inc(v) local_inc(&__get_cpu_var(v)) -#define cpu_local_dec(v) local_dec(&__get_cpu_var(v)) -#define cpu_local_add(i, v) local_add((i), &__get_cpu_var(v)) -#define cpu_local_sub(i, v) local_sub((i), &__get_cpu_var(v)) - #define __cpu_local_inc(v) cpu_local_inc(v) #define __cpu_local_dec(v) cpu_local_dec(v) -#define __cpu_local_add(i, v) cpu_local_add((i), (v)) -#define __cpu_local_sub(i, v) cpu_local_sub((i), (v)) - -#endif /* _ARCH_I386_LOCAL_H */ +#define __cpu_local_add(i, v) cpu_local_add(i, v) +#define __cpu_local_sub(i, v) cpu_local_sub(i, v) +#endif /* _ARCH_X8664_LOCAL_H */ Index: linux-2.6.17-rc6-mm2/include/asm-i386/local.h =================================================================== --- linux-2.6.17-rc6-mm2.orig/include/asm-i386/local.h 2006-06-05 17:57:02.000000000 -0700 +++ linux-2.6.17-rc6-mm2/include/asm-i386/local.h 2006-06-12 09:13:57.534479835 -0700 @@ -45,26 +45,26 @@ static __inline__ void local_sub(long i, :"ir" (i), "m" (v->counter)); } -/* On x86, these are no better than the atomic variants. */ +#define cpu_local_read(v) local_read(&per_cpu(v, raw_smp_processor_id())) +#define cpu_local_set(v, i) local_set(&per_cpu(v, raw_smp_processor_id()), (i)) +#define cpu_local_inc(v) local_inc(&per_cpu(v, raw_smp_processor_id())) +#define cpu_local_dec(v) local_dec(&per_cpu(v, raw_smp_processor_id())) +#define cpu_local_add(i, v) local_add((i), &per_cpu(v, raw_smp_processor_id())) +#define cpu_local_sub(i, v) local_sub((i), &per_cpu(v, raw_smp_processor_id())) + +/* + * Non-atomic increments, ie. preemption disabled and won't be touched + * in interrupt, etc. + */ #define __local_inc(l) local_inc(l) #define __local_dec(l) local_dec(l) #define __local_add(i,l) local_add((i),(l)) #define __local_sub(i,l) local_sub((i),(l)) -/* Use these for per-cpu local_t variables: on some archs they are - * much more efficient than these naive implementations. Note they take - * a variable, not an address. - */ -#define cpu_local_read(v) local_read(&__get_cpu_var(v)) -#define cpu_local_set(v, i) local_set(&__get_cpu_var(v), (i)) -#define cpu_local_inc(v) local_inc(&__get_cpu_var(v)) -#define cpu_local_dec(v) local_dec(&__get_cpu_var(v)) -#define cpu_local_add(i, v) local_add((i), &__get_cpu_var(v)) -#define cpu_local_sub(i, v) local_sub((i), &__get_cpu_var(v)) - #define __cpu_local_inc(v) cpu_local_inc(v) #define __cpu_local_dec(v) cpu_local_dec(v) -#define __cpu_local_add(i, v) cpu_local_add((i), (v)) -#define __cpu_local_sub(i, v) cpu_local_sub((i), (v)) +#define __cpu_local_add(i, v) cpu_local_add(i, v) +#define __cpu_local_sub(i, v) cpu_local_sub(i, v) #endif /* _ARCH_I386_LOCAL_H */ +