--- include/asm-generic/percpu.h | 39 +++--------- include/asm-x86/percpu_32.h | 130 ++----------------------------------------- init/main.c | 16 +---- 3 files changed, 23 insertions(+), 162 deletions(-) Index: linux-2.6/include/asm-generic/percpu.h =================================================================== --- linux-2.6.orig/include/asm-generic/percpu.h 2007-11-20 22:08:20.293037780 -0800 +++ linux-2.6/include/asm-generic/percpu.h 2007-11-21 09:42:14.869538121 -0800 @@ -4,53 +4,38 @@ #include #define __GENERIC_PER_CPU -#ifdef CONFIG_SMP -extern unsigned long __per_cpu_offset[NR_CPUS]; +#define per_cpu_offset(x) cpu_offset(x) -#define per_cpu_offset(x) (__per_cpu_offset[x]) +#define per_cpu_var(var) per_cpu__##var /* Separate out the type, so (int[3], foo) works. */ #define DEFINE_PER_CPU(type, name) \ - __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name + __attribute__((__section__(".data.percpu"))) __typeof__(type) \ + per_cpu_var(name) #define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ __attribute__((__section__(".data.percpu.shared_aligned"))) \ - __typeof__(type) per_cpu__##name \ + __typeof__(type) per_cpuo_var(name) \ ____cacheline_aligned_in_smp /* var is in discarded region: offset to particular copy we want */ #define per_cpu(var, cpu) (*({ \ extern int simple_identifier_##var(void); \ - RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); })) -#define __get_cpu_var(var) per_cpu(var, smp_processor_id()) -#define __raw_get_cpu_var(var) per_cpu(var, raw_smp_processor_id()) + CPU_PTR(&per_cpu_var(var),cpu); })) +#define __get_cpu_var(var) THIS_CPU(&per_cpu_var(var))) +#define __raw_get_cpu_var(var) __THIS_CPU(&per_cpu_var(var)) /* A macro to avoid #include hell... */ #define percpu_modcopy(pcpudst, src, size) \ do { \ unsigned int __i; \ for_each_possible_cpu(__i) \ - memcpy((pcpudst)+__per_cpu_offset[__i], \ - (src), (size)); \ + memcpy(CPU_PTR(pcpudst), (src), (size)); \ } while (0) -#else /* ! SMP */ -#define DEFINE_PER_CPU(type, name) \ - __typeof__(type) per_cpu__##name - -#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - DEFINE_PER_CPU(type, name) - -#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu__##var)) -#define __get_cpu_var(var) per_cpu__##var -#define __raw_get_cpu_var(var) per_cpu__##var - -#endif /* SMP */ - -#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name - -#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) -#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) +#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu_var(name) +#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__var(name)) +#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu_var(name)) #endif /* _ASM_GENERIC_PERCPU_H_ */ Index: linux-2.6/init/main.c =================================================================== --- linux-2.6.orig/init/main.c 2007-11-20 22:05:58.522787909 -0800 +++ linux-2.6/init/main.c 2007-11-20 22:35:10.554037637 -0800 @@ -364,25 +364,17 @@ static inline void smp_prepare_cpus(unsi #else #ifdef __GENERIC_PER_CPU -unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; - -EXPORT_SYMBOL(__per_cpu_offset); - static void __init setup_per_cpu_areas(void) { unsigned long size, i; char *ptr; - unsigned long nr_possible_cpus = num_possible_cpus(); /* Copy section for each CPU (we discard the original) */ - size = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE); - ptr = alloc_bootmem_pages(size * nr_possible_cpus); + size = __per_cpu_end - __per_cpu_start; + ptr = boot_cpu_alloc(size); - for_each_possible_cpu(i) { - __per_cpu_offset[i] = ptr - __per_cpu_start; - memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); - ptr += size; - } + for_each_possible_cpu(i) + memcpy(CPU_PTR(ptr, i), __load_per_cpu_start, size); } #endif /* !__GENERIC_PER_CPU */ Index: linux-2.6/include/asm-x86/percpu_32.h =================================================================== --- linux-2.6.orig/include/asm-x86/percpu_32.h 2007-11-20 22:13:35.181288030 -0800 +++ linux-2.6/include/asm-x86/percpu_32.h 2007-11-20 22:18:42.049287313 -0800 @@ -15,142 +15,26 @@ * Example: * PER_CPU(cpu_gdt_descr, %ebx) */ -#ifdef CONFIG_SMP #define PER_CPU(var, reg) \ movl %fs:per_cpu__##this_cpu_off, reg; \ lea per_cpu__##var(reg), reg #define PER_CPU_VAR(var) %fs:per_cpu__##var -#else /* ! SMP */ -#define PER_CPU(var, reg) \ - movl $per_cpu__##var, reg -#define PER_CPU_VAR(var) per_cpu__##var -#endif /* SMP */ - #else /* ...!ASSEMBLY */ char cpu_area[]; -/* - * PER_CPU finds an address of a per-cpu variable. - * - * Args: - * var - variable name - * cpu - 32bit register containing the current CPU number - * - * The resulting address is stored in the "cpu" argument. - * - * Example: - * PER_CPU(cpu_gdt_descr, %ebx) - */ -#ifdef CONFIG_SMP -/* Same as generic implementation except for optimized local access. */ -#define __GENERIC_PER_CPU - -/* This is used for other cpus to find our section. */ -extern unsigned long __per_cpu_offset[]; - -#define per_cpu_offset(x) (__per_cpu_offset[x]) - -/* Separate out the type, so (int[3], foo) works. */ -#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name -#define DEFINE_PER_CPU(type, name) \ - __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name - -#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - __attribute__((__section__(".data.percpu.shared_aligned"))) \ - __typeof__(type) per_cpu__##name \ - ____cacheline_aligned_in_smp +#include /* We can use this directly for local CPU (faster). */ DECLARE_PER_CPU(unsigned long, this_cpu_off); -/* var is in discarded region: offset to particular copy we want */ -#define per_cpu(var, cpu) (*({ \ - extern int simple_indentifier_##var(void); \ - RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); })) - -#define __raw_get_cpu_var(var) (*({ \ - extern int simple_indentifier_##var(void); \ - RELOC_HIDE(&per_cpu__##var, x86_read_percpu(this_cpu_off)); \ -})) - -#define __get_cpu_var(var) __raw_get_cpu_var(var) - -/* A macro to avoid #include hell... */ -#define percpu_modcopy(pcpudst, src, size) \ -do { \ - unsigned int __i; \ - for_each_possible_cpu(__i) \ - memcpy((pcpudst)+__per_cpu_offset[__i], \ - (src), (size)); \ -} while (0) - -#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) -#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) - -/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */ -#define __percpu_seg "%%fs:" -#else /* !SMP */ -#include -#define __percpu_seg "" -#endif /* SMP */ +#define __THIS_CPU_OFFSET CPU_READ(this_cpu_off) -/* For arch-specific code, we can use direct single-insn ops (they - * don't give an lvalue though). */ -extern void __bad_percpu_size(void); - -#define percpu_to_op(op,var,val) \ - do { \ - typedef typeof(var) T__; \ - if (0) { T__ tmp__; tmp__ = (val); } \ - switch (sizeof(var)) { \ - case 1: \ - asm(op "b %1,"__percpu_seg"%0" \ - : "+m" (var) \ - :"ri" ((T__)val)); \ - break; \ - case 2: \ - asm(op "w %1,"__percpu_seg"%0" \ - : "+m" (var) \ - :"ri" ((T__)val)); \ - break; \ - case 4: \ - asm(op "l %1,"__percpu_seg"%0" \ - : "+m" (var) \ - :"ri" ((T__)val)); \ - break; \ - default: __bad_percpu_size(); \ - } \ - } while (0) - -#define percpu_from_op(op,var) \ - ({ \ - typeof(var) ret__; \ - switch (sizeof(var)) { \ - case 1: \ - asm(op "b "__percpu_seg"%1,%0" \ - : "=r" (ret__) \ - : "m" (var)); \ - break; \ - case 2: \ - asm(op "w "__percpu_seg"%1,%0" \ - : "=r" (ret__) \ - : "m" (var)); \ - break; \ - case 4: \ - asm(op "l "__percpu_seg"%1,%0" \ - : "=r" (ret__) \ - : "m" (var)); \ - break; \ - default: __bad_percpu_size(); \ - } \ - ret__; }) - -#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var) -#define x86_write_percpu(var,val) percpu_to_op("mov", per_cpu__##var, val) -#define x86_add_percpu(var,val) percpu_to_op("add", per_cpu__##var, val) -#define x86_sub_percpu(var,val) percpu_to_op("sub", per_cpu__##var, val) -#define x86_or_percpu(var,val) percpu_to_op("or", per_cpu__##var, val) +#define x86_read_percpu(var) CPU_READ(per_cpu_var(var)) +#define x86_write_percpu(var,val) CPU_WRITE(per_cpu_var(var), (val)) +#define x86_add_percpu(var,val) CPU_ADD(per_cpu_var(var), (val)) +#define x86_sub_percpu(var,val) CPU_SUB(per_cpu_var(var), (val)) +#define x86_or_percpu(var,val) CPU_OR(per_cpu_var(var), (val)) #endif /* !__ASSEMBLY__ */ #endif /* __ARCH_I386_PERCPU__ */