--- arch/powerpc/kernel/setup_64.c | 22 ++++------------------ arch/sparc64/kernel/smp.c | 3 +++ arch/x86/kernel/setup64.c | 27 +++++---------------------- include/asm-generic/percpu.h | 17 ++++++++++++----- include/asm-ia64/percpu.h | 33 +++++++++------------------------ include/asm-powerpc/percpu.h | 18 ------------------ include/asm-s390/percpu.h | 22 ++++------------------ include/asm-sparc64/percpu.h | 22 ++-------------------- include/asm-x86/percpu_32.h | 21 +-------------------- include/asm-x86/percpu_64.h | 32 +++----------------------------- include/linux/percpu.h | 18 ++++++++++++++++++ init/main.c | 22 +++++++++++----------- 12 files changed, 72 insertions(+), 185 deletions(-) Index: linux-2.6/arch/powerpc/kernel/setup_64.c =================================================================== --- linux-2.6.orig/arch/powerpc/kernel/setup_64.c 2007-11-22 15:00:31.329204985 -0800 +++ linux-2.6/arch/powerpc/kernel/setup_64.c 2007-11-22 15:01:24.305705141 -0800 @@ -576,25 +576,11 @@ void cpu_die(void) #ifdef CONFIG_SMP void __init setup_per_cpu_areas(void) { - int i; - unsigned long size; - char *ptr; + int cpu; - /* Copy section for each CPU (we discard the original) */ - size = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE); -#ifdef CONFIG_MODULES - if (size < PERCPU_ENOUGH_ROOM) - size = PERCPU_ENOUGH_ROOM; -#endif - - for_each_possible_cpu(i) { - ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size); - if (!ptr) - panic("Cannot allocate cpu data for CPU %d\n", i); - - paca[i].data_offset = ptr - __per_cpu_start; - memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); - } + __setup_per_cpu_areas(); + for_each_possible_cpu(cpu) + paca[i].data_offset = __per_cpu_offset[cpu]; /* Now that per_cpu is setup, initialize cpu_sibling_map */ smp_setup_cpu_sibling_map(); Index: linux-2.6/arch/sparc64/kernel/smp.c =================================================================== --- linux-2.6.orig/arch/sparc64/kernel/smp.c 2007-11-22 15:00:31.337205044 -0800 +++ linux-2.6/arch/sparc64/kernel/smp.c 2007-11-22 15:01:24.309705150 -0800 @@ -1421,6 +1421,8 @@ unsigned long __per_cpu_shift __read_mos EXPORT_SYMBOL(__per_cpu_base); EXPORT_SYMBOL(__per_cpu_shift); +void __init setup_per_cpu_areas(void) {} + void __init real_setup_per_cpu_areas(void) { unsigned long goal, size, i; @@ -1433,6 +1435,7 @@ void __init real_setup_per_cpu_areas(voi for (size = PAGE_SIZE; size < goal; size <<= 1UL) __per_cpu_shift++; + __setup_per_cpu_areas(); ptr = alloc_bootmem_pages(size * NR_CPUS); __per_cpu_base = ptr - __per_cpu_start; Index: linux-2.6/arch/x86/kernel/setup64.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/setup64.c 2007-11-22 15:00:31.349205290 -0800 +++ linux-2.6/arch/x86/kernel/setup64.c 2007-11-22 15:01:24.309705150 -0800 @@ -86,32 +86,15 @@ __setup("noexec32=", nonx32_setup); */ void __init setup_per_cpu_areas(void) { - int i; - unsigned long size; + int cpu; + extern unsigned long __per_cpu_offset[]; #ifdef CONFIG_HOTPLUG_CPU prefill_possible_map(); #endif - - /* Copy section for each CPU (we discard the original) */ - size = PERCPU_ENOUGH_ROOM; - - printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size); - for_each_cpu_mask (i, cpu_possible_map) { - char *ptr; - - if (!NODE_DATA(cpu_to_node(i))) { - printk("cpu with no node %d, num_online_nodes %d\n", - i, num_online_nodes()); - ptr = alloc_bootmem_pages(size); - } else { - ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size); - } - if (!ptr) - panic("Cannot allocate cpu data for CPU %d\n", i); - cpu_pda(i)->data_offset = ptr - __per_cpu_start; - memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); - } + __setup_per_cpu_areas(); + for_each_cpu_mask (cpu, cpu_possible_map) + cpu_pda(cpu)->data_offset = __per_cpu_offset[cpu]; } void pda_init(int cpu) Index: linux-2.6/include/asm-ia64/percpu.h =================================================================== --- linux-2.6.orig/include/asm-ia64/percpu.h 2007-11-22 15:00:31.357204778 -0800 +++ linux-2.6/include/asm-ia64/percpu.h 2007-11-22 15:01:24.325704706 -0800 @@ -16,30 +16,6 @@ #include #include -#ifdef CONFIG_SMP - -extern unsigned long __per_cpu_offset[NR_CPUS]; -#define per_cpu_offset(x) (__per_cpu_offset[x]) - -/* Equal to __per_cpu_offset[smp_processor_id()], but faster to access: */ -DECLARE_PER_CPU(unsigned long, local_per_cpu_offset); - -#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu])) -#define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __ia64_per_cpu_var(local_per_cpu_offset))) -#define __raw_get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __ia64_per_cpu_var(local_per_cpu_offset))) - -extern void setup_per_cpu_areas (void); -extern void *per_cpu_init(void); - -#else /* ! SMP */ - -#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu__##var)) -#define __get_cpu_var(var) per_cpu__##var -#define __raw_get_cpu_var(var) per_cpu__##var -#define per_cpu_init() (__phys_per_cpu_start) - -#endif /* SMP */ - /* * Be extremely careful when taking the address of this variable! Due to virtual * remapping, it is different from the canonical address returned by __get_cpu_var(var)! @@ -48,6 +24,15 @@ extern void *per_cpu_init(void); */ #define __ia64_per_cpu_var(var) (per_cpu__##var) +/* Equal to __per_cpu_offset[smp_processor_id()], but faster to access: */ +DECLARE_PER_CPU(unsigned long, local_per_cpu_offset); + +#define __my_cpu_offset __ia64_per_cpu_var(local_per_cpu_offset) + +extern void *per_cpu_init(void); + +#include + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_IA64_PERCPU_H */ Index: linux-2.6/include/linux/percpu.h =================================================================== --- linux-2.6.orig/include/linux/percpu.h 2007-11-22 15:00:31.361204931 -0800 +++ linux-2.6/include/linux/percpu.h 2007-11-22 15:01:24.325704706 -0800 @@ -27,6 +27,24 @@ #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) +#ifdef CONFIG_SMP +/* + * setup_per_cpu_areas() is called before sched_init() to setup the + * per cpu areas. If it is not defined then a dummy is provided that + * called __setup_per_cpu_areas(). + * + * setup_per_cpu_areas should do preparatory things and then call + * __setup_per_cpu_areas() which will setup the areas. + * After __setup_per_cpu_areas() returns futher setups may be done + * by an arch. + */ +extern void setup_per_cpu_areas(void); +extern void __setup_per_cpu_areas (void); +#else +static inline void setup_per_cpu_areas(void) {} +static inline void __setup_per_cpu_areas(void) {} +#endif + /* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ #ifndef PERCPU_ENOUGH_ROOM #ifdef CONFIG_MODULES Index: linux-2.6/init/main.c =================================================================== --- linux-2.6.orig/init/main.c 2007-11-22 15:00:31.429205161 -0800 +++ linux-2.6/init/main.c 2007-11-22 15:01:24.329704835 -0800 @@ -358,33 +358,33 @@ static void __init smp_init(void) #define smp_init() do { } while (0) #endif -static inline void setup_per_cpu_areas(void) { } static inline void smp_prepare_cpus(unsigned int maxcpus) { } #else -#ifdef __GENERIC_PER_CPU unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; - EXPORT_SYMBOL(__per_cpu_offset); -static void __init setup_per_cpu_areas(void) +void __init __setup_per_cpu_areas(void) { - unsigned long size, i; + unsigned long size, cpu; char *ptr; - unsigned long nr_possible_cpus = num_possible_cpus(); /* Copy section for each CPU (we discard the original) */ size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE); - ptr = alloc_bootmem_pages(size * nr_possible_cpus); - for_each_possible_cpu(i) { - __per_cpu_offset[i] = ptr - __per_cpu_start; + for_each_possible_cpu(cpu) { + ptr = __alloc_bootmem_node(NODE_DATA(cpu_to_node(cpu)), + size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); + __per_cpu_offset[cpu] = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); - ptr += size; } } -#endif /* !__GENERIC_PER_CPU */ + +void __attribute__((weak)) __init setup_per_cpu_areas(void) +{ + __setup_per_cpu_areas(); +} /* Called by boot processor to activate the rest. */ static void __init smp_init(void) Index: linux-2.6/include/asm-generic/percpu.h =================================================================== --- linux-2.6.orig/include/asm-generic/percpu.h 2007-11-22 15:01:17.922409112 -0800 +++ linux-2.6/include/asm-generic/percpu.h 2007-11-22 15:11:13.293169133 -0800 @@ -3,17 +3,24 @@ #include #include -#define __GENERIC_PER_CPU #ifdef CONFIG_SMP +#ifndef __per_cpu_offset extern unsigned long __per_cpu_offset[NR_CPUS]; - #define per_cpu_offset(x) (__per_cpu_offset[x]) +#endif + +#ifndef __my_cpu_offset +#define __my_cpu_offset per_cpu_offsset(raw_smp_processor_id()) +#define my_cpu_offset per_cpu_offset(smp_processor_id()) +#else +#define my_cpu_offset __my_cpu_offset +#endif /* var is in discarded region: offset to particular copy we want */ -#define per_cpu(var, cpu) *RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]) -#define __get_cpu_var(var) per_cpu(var, smp_processor_id()) -#define __raw_get_cpu_var(var) per_cpu(var, raw_smp_processor_id()) +#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, per_cpu_offset(cpu))) +#define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, my_cpu_offset)) +#define __raw_get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __my_cpu_offset)) #else /* ! SMP */ Index: linux-2.6/include/asm-powerpc/percpu.h =================================================================== --- linux-2.6.orig/include/asm-powerpc/percpu.h 2007-11-22 15:00:31.377205010 -0800 +++ linux-2.6/include/asm-powerpc/percpu.h 2007-11-22 15:01:24.329704835 -0800 @@ -7,30 +7,12 @@ * Same as asm-generic/percpu.h, except that we store the per cpu offset * in the paca. Based on the x86-64 implementation. */ - -#ifdef CONFIG_SMP - #include #define __per_cpu_offset(cpu) (paca[cpu].data_offset) #define __my_cpu_offset() get_paca()->data_offset #define per_cpu_offset(x) (__per_cpu_offset(x)) -/* var is in discarded region: offset to particular copy we want */ -#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu))) -#define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __my_cpu_offset())) -#define __raw_get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, local_paca->data_offset)) - -extern void setup_per_cpu_areas(void); - -#else /* ! SMP */ - -#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu__##var)) -#define __get_cpu_var(var) per_cpu__##var -#define __raw_get_cpu_var(var) per_cpu__##var - -#endif /* SMP */ -#else #include #endif #endif /* _ASM_POWERPC_PERCPU_H_ */ Index: linux-2.6/include/asm-sparc64/percpu.h =================================================================== --- linux-2.6.orig/include/asm-sparc64/percpu.h 2007-11-22 15:00:31.401204930 -0800 +++ linux-2.6/include/asm-sparc64/percpu.h 2007-11-22 15:01:24.329704835 -0800 @@ -5,29 +5,11 @@ register unsigned long __local_per_cpu_offset asm("g5"); -#ifdef CONFIG_SMP - -#define setup_per_cpu_areas() do { } while (0) -extern void real_setup_per_cpu_areas(void); - -extern unsigned long __per_cpu_base; -extern unsigned long __per_cpu_shift; #define __per_cpu_offset(__cpu) \ (__per_cpu_base + ((unsigned long)(__cpu) << __per_cpu_shift)) #define per_cpu_offset(x) (__per_cpu_offset(x)) -/* var is in discarded region: offset to particular copy we want */ -#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu))) -#define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __local_per_cpu_offset)) -#define __raw_get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __local_per_cpu_offset)) - -#else /* ! SMP */ - -#define real_setup_per_cpu_areas() do { } while (0) - -#define per_cpu(var, cpu) (*((void)cpu, &per_cpu__##var)) -#define __get_cpu_var(var) per_cpu__##var -#define __raw_get_cpu_var(var) per_cpu__##var +#define __my_cpu_offset __local_per_cpu_offsest -#endif /* SMP */ +#include #endif /* __ARCH_SPARC64_PERCPU__ */ Index: linux-2.6/include/asm-x86/percpu_64.h =================================================================== --- linux-2.6.orig/include/asm-x86/percpu_64.h 2007-11-22 15:00:31.405204788 -0800 +++ linux-2.6/include/asm-x86/percpu_64.h 2007-11-22 15:08:25.851823509 -0800 @@ -7,38 +7,12 @@ should be just put into a single section and referenced directly from %gs */ -#ifdef CONFIG_SMP - #include #define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset) -#define __my_cpu_offset() read_pda(data_offset) - -#define per_cpu_offset(x) (__per_cpu_offset(x)) - -/* var is in discarded region: offset to particular copy we want */ -#define per_cpu(var, cpu) (*({ \ - extern int simple_identifier_##var(void); \ - RELOC_HIDE(&per_cpu__##var, __per_cpu_offset(cpu)); })) -#define __get_cpu_var(var) (*({ \ - extern int simple_identifier_##var(void); \ - RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()); })) -#define __raw_get_cpu_var(var) (*({ \ - extern int simple_identifier_##var(void); \ - RELOC_HIDE(&per_cpu__##var, __my_cpu_offset()); })) - -extern void setup_per_cpu_areas(void); - -#else /* ! SMP */ - -#define DEFINE_PER_CPU(type, name) \ - __typeof__(type) per_cpu__##name -#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - DEFINE_PER_CPU(type, name) +#define per_cpu_offset(cpu) __per_cpu_offset(cpu) +#define __my_cpu_offset read_pda(data_offset) -#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu__##var)) -#define __get_cpu_var(var) per_cpu__##var -#define __raw_get_cpu_var(var) per_cpu__##var +#include -#endif /* SMP */ #endif /* _ASM_X8664_PERCPU_H_ */ Index: linux-2.6/include/asm-s390/percpu.h =================================================================== --- linux-2.6.orig/include/asm-s390/percpu.h 2007-11-22 15:00:31.421205270 -0800 +++ linux-2.6/include/asm-s390/percpu.h 2007-11-22 15:01:24.329704835 -0800 @@ -4,8 +4,6 @@ #include #include -#define __GENERIC_PER_CPU - /* * s390 uses its own implementation for per cpu data, the offset of * the cpu local data area is cached in the cpu's lowcore memory. @@ -15,7 +13,7 @@ */ #if defined(__s390x__) && defined(MODULE) -#define __reloc_hide(var,offset) (*({ \ +#define RELOC_HIDE(var,offset) (*({ \ extern int simple_identifier_##var(void); \ unsigned long *__ptr; \ asm ( "larl %0,per_cpu__"#var"@GOTENT" \ @@ -24,7 +22,7 @@ #else -#define __reloc_hide(var, offset) (*({ \ +#define RELOC_HIDE(var, offset) (*({ \ extern int simple_identifier_##var(void); \ unsigned long __ptr; \ asm ( "" : "=a" (__ptr) : "0" (&per_cpu__##var) ); \ @@ -32,20 +30,8 @@ #endif -#ifdef CONFIG_SMP - -extern unsigned long __per_cpu_offset[NR_CPUS]; - -#define __get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset) -#define __raw_get_cpu_var(var) __reloc_hide(var,S390_lowcore.percpu_offset) -#define per_cpu(var,cpu) __reloc_hide(var,__per_cpu_offset[cpu]) -#define per_cpu_offset(x) (__per_cpu_offset[x]) - -#else /* ! SMP */ +#define __my_cpu_offset S390_lowcore.percpu_offset -#define __get_cpu_var(var) __reloc_hide(var,0) -#define __raw_get_cpu_var(var) __reloc_hide(var,0) -#define per_cpu(var,cpu) __reloc_hide(var,0) +#include -#endif /* SMP */ #endif /* __ARCH_S390_PERCPU__ */ Index: linux-2.6/include/asm-x86/percpu_32.h =================================================================== --- linux-2.6.orig/include/asm-x86/percpu_32.h 2007-11-22 15:00:31.413205410 -0800 +++ linux-2.6/include/asm-x86/percpu_32.h 2007-11-22 15:01:24.329704835 -0800 @@ -40,29 +40,10 @@ * Example: * PER_CPU(cpu_gdt_descr, %ebx) */ -#ifdef CONFIG_SMP -/* Same as generic implementation except for optimized local access. */ -#define __GENERIC_PER_CPU - -/* This is used for other cpus to find our section. */ -extern unsigned long __per_cpu_offset[]; - -#define per_cpu_offset(x) (__per_cpu_offset[x]) /* We can use this directly for local CPU (faster). */ DECLARE_PER_CPU(unsigned long, this_cpu_off); - -/* var is in discarded region: offset to particular copy we want */ -#define per_cpu(var, cpu) (*({ \ - extern int simple_indentifier_##var(void); \ - RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); })) - -#define __raw_get_cpu_var(var) (*({ \ - extern int simple_indentifier_##var(void); \ - RELOC_HIDE(&per_cpu__##var, x86_read_percpu(this_cpu_off)); \ -})) - -#define __get_cpu_var(var) __raw_get_cpu_var(var) +#define __my_cpu_offset x86_read_percpu(this_cpu_off) /* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */ #define __percpu_seg "%%fs:"