percpu: Base per cpu data at ZERO Move percpu data to start at 0. This means that the offsets to segment or other registers or per cpu variables become very small. A per cpu variable address can be cast to unsigned long to get the offset into the per cpu area. The pointers to the per cpu areas can point directly to beginning of the per cpu data. Currently we offset them by __per_cpu_start. - x86_64 gs: is pointing to the start of the per cpu area for the currently executing processor. - x86_32 fs: has the same role - sparc g5 is pointing to the per cpu area - IA64: PERCPU_DATA is a fixed address that can be used as an offset to read per cpu data for the current processor. The core kernel will be able to use these registers/addresses to avoid adding offsets to per cpu data of any kind in the future. We get rid of the current loader variables for per cpu boundaries. Instead we use __per_cpu_load Address where the loader placed the per cpu data (Source address for copying into the per cpu areas) __per_cpu_size The size of the per cpu area at __per_cpu_load (This is not the size of the total per cpu area which may be dynamically extended) Signed-off-by: Christoph Lameter --- arch/arm/kernel/vmlinux.lds.S | 7 ++----- arch/ia64/kernel/setup.c | 5 ++--- arch/ia64/kernel/vmlinux.lds.S | 15 ++------------- arch/ia64/mm/contig.c | 6 +++--- arch/ia64/mm/discontig.c | 14 ++++++-------- arch/powerpc/kernel/setup_64.c | 6 +++--- arch/powerpc/kernel/vmlinux.lds.S | 8 +------- arch/sparc64/kernel/smp.c | 4 ++-- arch/x86/kernel/setup64.c | 4 ++-- arch/x86/kernel/vmlinux_32.lds.S | 9 ++------- arch/x86/kernel/vmlinux_64.lds.S | 1 + include/asm-generic/percpu.h | 3 ++- include/asm-generic/sections.h | 4 ++-- include/asm-generic/vmlinux.lds.h | 24 +++++++++++++++++++++--- include/asm-ia64/sections.h | 3 ++- include/asm-um/common.lds.S | 9 ++------- include/linux/percpu.h | 2 +- init/main.c | 4 ++-- kernel/lockdep.c | 5 ++--- kernel/module.c | 6 +++--- 20 files changed, 63 insertions(+), 76 deletions(-) Index: linux-2.6/arch/arm/kernel/vmlinux.lds.S =================================================================== --- linux-2.6.orig/arch/arm/kernel/vmlinux.lds.S 2007-11-27 14:43:28.140463422 -0800 +++ linux-2.6/arch/arm/kernel/vmlinux.lds.S 2007-11-27 14:49:40.900040699 -0800 @@ -63,11 +63,8 @@ SECTIONS usr/built-in.o(.init.ramfs) __initramfs_end = .; #endif - . = ALIGN(4096); - __per_cpu_start = .; - *(.data.percpu) - *(.data.percpu.shared_aligned) - __per_cpu_end = .; + PERCPU(4096) + #ifndef CONFIG_XIP_KERNEL __init_begin = _stext; *(.init.data) Index: linux-2.6/arch/ia64/kernel/setup.c =================================================================== --- linux-2.6.orig/arch/ia64/kernel/setup.c 2007-11-27 14:43:28.164463451 -0800 +++ linux-2.6/arch/ia64/kernel/setup.c 2007-11-27 15:07:19.659962367 -0800 @@ -864,8 +864,7 @@ cpu_init (void) * physical addresses of per cpu variables with a simple: * phys = ar.k3 + &per_cpu_var */ - ia64_set_kr(IA64_KR_PER_CPU_DATA, - ia64_tpa(cpu_data) - (long) __per_cpu_start); + ia64_set_kr(IA64_KR_PER_CPU_DATA, ia64_tpa(cpu_data)); get_max_cacheline_size(); @@ -875,7 +874,7 @@ cpu_init (void) * depends on the data returned by identify_cpu(). We break the dependency by * accessing cpu_data() through the canonical per-CPU address. */ - cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(cpu_info) - __per_cpu_start); + cpu_info = cpu_data + ((unsigned long)&__ia64_per_cpu_var(cpu_info)); identify_cpu(cpu_info); #ifdef CONFIG_MCKINLEY Index: linux-2.6/arch/ia64/kernel/vmlinux.lds.S =================================================================== --- linux-2.6.orig/arch/ia64/kernel/vmlinux.lds.S 2007-11-27 14:43:28.176463219 -0800 +++ linux-2.6/arch/ia64/kernel/vmlinux.lds.S 2007-11-27 14:43:29.528463593 -0800 @@ -206,22 +206,11 @@ SECTIONS .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { *(.data.cacheline_aligned) } - /* Per-cpu data: */ - percpu : { } :percpu - . = ALIGN(PERCPU_PAGE_SIZE); - __phys_per_cpu_start = .; - .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET) - { - __per_cpu_start = .; - *(.data.percpu) - *(.data.percpu.shared_aligned) - __per_cpu_end = .; - } - . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits + PERCPU(PAGE_SIZE) + . = __per_cpu_load + PERCPU_PAGE_SIZE; /* ensure percpu data fits * into percpu page size */ - data : { } :data .data : AT(ADDR(.data) - LOAD_OFFSET) { DATA_DATA Index: linux-2.6/arch/ia64/mm/contig.c =================================================================== --- linux-2.6.orig/arch/ia64/mm/contig.c 2007-11-27 14:43:28.188463196 -0800 +++ linux-2.6/arch/ia64/mm/contig.c 2007-11-27 14:43:29.528463593 -0800 @@ -167,13 +167,13 @@ per_cpu_init (void) if (first_time) { first_time=0; for (cpu = 0; cpu < NR_CPUS; cpu++) { - memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start); - __per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start; + memcpy(cpu_data, __per_cpu_load, __per_cpu_size); + __per_cpu_offset[cpu] = cpu_data; cpu_data += PERCPU_PAGE_SIZE; per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; } } - return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; + return (void *)__per_cpu_offset[smp_processor_id()]; } static inline void Index: linux-2.6/arch/ia64/mm/discontig.c =================================================================== --- linux-2.6.orig/arch/ia64/mm/discontig.c 2007-11-27 14:43:28.208463536 -0800 +++ linux-2.6/arch/ia64/mm/discontig.c 2007-11-27 15:05:56.719980878 -0800 @@ -144,10 +144,8 @@ static void *per_cpu_node_setup(void *cp for (cpu = 0; cpu < NR_CPUS; cpu++) { if (node == node_cpuid[cpu].nid) { - memcpy(__va(cpu_data), __phys_per_cpu_start, - __per_cpu_end - __per_cpu_start); - __per_cpu_offset[cpu] = (char*)__va(cpu_data) - - __per_cpu_start; + memcpy(__va(cpu_data), __per_cpu_load, __per_cpu_size); + __per_cpu_offset[cpu] = (unsigned long)__va(cpu_data); cpu_data += PERCPU_PAGE_SIZE; } } @@ -354,8 +352,8 @@ static void __init initialize_pernode_da struct cpuinfo_ia64 *cpu0_cpu_info; cpu = 0; node = node_cpuid[cpu].nid; - cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start + - ((char *)&per_cpu__cpu_info - __per_cpu_start)); + cpu0_cpu_info = (struct cpuinfo_ia64 *)(__per_cpu_load + + ((unsigned long)&per_cpu__cpu_info)); cpu0_cpu_info->node_data = mem_data[node].node_data; } #endif /* CONFIG_SMP */ @@ -495,7 +493,7 @@ void __cpuinit *per_cpu_init(void) if (smp_processor_id() != 0) - return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; + return (void *)__per_cpu_offset[smp_processor_id()]; if (first_time) { first_time = 0; @@ -503,7 +501,7 @@ void __cpuinit *per_cpu_init(void) per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; } - return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; + return (void *)__per_cpu_offset[smp_processor_id()]; } #endif /* CONFIG_SMP */ Index: linux-2.6/arch/powerpc/kernel/setup_64.c =================================================================== --- linux-2.6.orig/arch/powerpc/kernel/setup_64.c 2007-11-27 14:43:28.260463343 -0800 +++ linux-2.6/arch/powerpc/kernel/setup_64.c 2007-11-27 14:43:29.555962769 -0800 @@ -581,7 +581,7 @@ void __init setup_per_cpu_areas(void) char *ptr; /* Copy section for each CPU (we discard the original) */ - size = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE); + size = ALIGN(__per_cpu_size, PAGE_SIZE); #ifdef CONFIG_MODULES if (size < PERCPU_ENOUGH_ROOM) size = PERCPU_ENOUGH_ROOM; @@ -592,8 +592,8 @@ void __init setup_per_cpu_areas(void) if (!ptr) panic("Cannot allocate cpu data for CPU %d\n", i); - paca[i].data_offset = ptr - __per_cpu_start; - memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + paca[i].data_offset = (unsigned long)ptr; + memcpy(ptr, __per_cpu_load, __per_cpu_size); } /* Now that per_cpu is setup, initialize cpu_sibling_map */ Index: linux-2.6/arch/powerpc/kernel/vmlinux.lds.S =================================================================== --- linux-2.6.orig/arch/powerpc/kernel/vmlinux.lds.S 2007-11-27 14:43:28.272462950 -0800 +++ linux-2.6/arch/powerpc/kernel/vmlinux.lds.S 2007-11-27 14:43:29.555962769 -0800 @@ -143,13 +143,7 @@ SECTIONS __initramfs_end = .; } #endif - . = ALIGN(PAGE_SIZE); - .data.percpu : { - __per_cpu_start = .; - *(.data.percpu) - *(.data.percpu.shared_aligned) - __per_cpu_end = .; - } + PERCPU(PAGE_SIZE) . = ALIGN(8); .machine.desc : { Index: linux-2.6/arch/sparc64/kernel/smp.c =================================================================== --- linux-2.6.orig/arch/sparc64/kernel/smp.c 2007-11-27 14:43:28.352463262 -0800 +++ linux-2.6/arch/sparc64/kernel/smp.c 2007-11-27 14:43:29.555962769 -0800 @@ -1435,10 +1435,10 @@ void __init real_setup_per_cpu_areas(voi ptr = alloc_bootmem_pages(size * NR_CPUS); - __per_cpu_base = ptr - __per_cpu_start; + __per_cpu_base = (unsigned long)ptr; for (i = 0; i < NR_CPUS; i++, ptr += size) - memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + memcpy(ptr, __per_cpu_load, __per_cpu_size); /* Setup %g5 for the boot cpu. */ __local_per_cpu_offset = __per_cpu_offset(smp_processor_id()); Index: linux-2.6/arch/x86/kernel/setup64.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/setup64.c 2007-11-27 14:43:28.372463093 -0800 +++ linux-2.6/arch/x86/kernel/setup64.c 2007-11-27 14:43:29.555962769 -0800 @@ -111,8 +111,8 @@ void __init setup_per_cpu_areas(void) } if (!ptr) panic("Cannot allocate cpu data for CPU %d\n", i); - cpu_pda(i)->data_offset = ptr - __per_cpu_start; - memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + cpu_pda(i)->data_offset = (unsigned long)ptr; + memcpy(ptr, __per_cpu_load, __per_cpu_size); /* Relocate the pda */ memcpy(&per_cpu(pda, i), cpu_pda(i), sizeof(struct x8664_pda)); cpu_pda(i) = &per_cpu(pda, i); Index: linux-2.6/arch/x86/kernel/vmlinux_32.lds.S =================================================================== --- linux-2.6.orig/arch/x86/kernel/vmlinux_32.lds.S 2007-11-27 14:43:28.384463539 -0800 +++ linux-2.6/arch/x86/kernel/vmlinux_32.lds.S 2007-11-27 14:43:29.555962769 -0800 @@ -29,6 +29,7 @@ jiffies = jiffies_64; PHDRS { text PT_LOAD FLAGS(5); /* R_E */ + percpu PT_LOAD FLAGS(4); /* R__ */ data PT_LOAD FLAGS(7); /* RWE */ note PT_NOTE FLAGS(0); /* ___ */ } @@ -179,13 +180,7 @@ SECTIONS __initramfs_end = .; } #endif - . = ALIGN(4096); - .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { - __per_cpu_start = .; - *(.data.percpu) - *(.data.percpu.shared_aligned) - __per_cpu_end = .; - } + PERCPU(4096) . = ALIGN(4096); /* freed after init ends here */ Index: linux-2.6/arch/x86/kernel/vmlinux_64.lds.S =================================================================== --- linux-2.6.orig/arch/x86/kernel/vmlinux_64.lds.S 2007-11-27 14:43:28.396463624 -0800 +++ linux-2.6/arch/x86/kernel/vmlinux_64.lds.S 2007-11-27 14:43:29.555962769 -0800 @@ -16,6 +16,7 @@ jiffies_64 = jiffies; _proxy_pda = 1; PHDRS { text PT_LOAD FLAGS(5); /* R_E */ + percpu PT_LOAD FLAGS(4); /* R__ */ data PT_LOAD FLAGS(7); /* RWE */ user PT_LOAD FLAGS(7); /* RWE */ data.init PT_LOAD FLAGS(7); /* RWE */ Index: linux-2.6/include/asm-generic/sections.h =================================================================== --- linux-2.6.orig/include/asm-generic/sections.h 2007-11-27 14:43:28.424463524 -0800 +++ linux-2.6/include/asm-generic/sections.h 2007-11-27 14:43:29.561213368 -0800 @@ -10,8 +10,8 @@ extern char __init_begin[], __init_end[] extern char _sinittext[], _einittext[]; extern char _sextratext[] __attribute__((weak)); extern char _eextratext[] __attribute__((weak)); -extern char _end[]; -extern char __per_cpu_start[], __per_cpu_end[]; +extern char _end[], __per_cpu_load[]; +extern unsigned long __per_cpu_size; extern char __kprobes_text_start[], __kprobes_text_end[]; extern char __initdata_begin[], __initdata_end[]; extern char __start_rodata[], __end_rodata[]; Index: linux-2.6/include/asm-generic/vmlinux.lds.h =================================================================== --- linux-2.6.orig/include/asm-generic/vmlinux.lds.h 2007-11-27 14:43:28.432463406 -0800 +++ linux-2.6/include/asm-generic/vmlinux.lds.h 2007-11-27 15:18:11.804923121 -0800 @@ -255,12 +255,30 @@ *(.initcall7.init) \ *(.initcall7s.init) +#ifdef CONFIG_SMP #define PERCPU(align) \ . = ALIGN(align); \ - __per_cpu_start = .; \ - .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \ + percpu : { } :percpu \ + __per_cpu_load = .; \ + .data.percpu 0 : AT(__per_cpu_load - LOAD_OFFSET) { \ *(.data.percpu.first) \ *(.data.percpu) \ *(.data.percpu.shared_aligned) \ + __per_cpu_size = .; \ } \ - __per_cpu_end = .; + . = __per_cpu_load + __per_cpu_size; \ + data : { } :data + +#else +#define PERCPU(align) \ + . = ALIGN(align); \ + __per_cpu_load = .; \ + .data.percpu : AT(__per_cpu_load - LOAD_OFFSET) { \ + *(.data.percpu.first) \ + *(.data.percpu) \ + *(.data.percpu.shared_aligned) \ + __per_cpu_size = .; \ + } \ + . = __per_cpu_load + __per_cpu_size; \ + +#endif Index: linux-2.6/include/asm-ia64/sections.h =================================================================== --- linux-2.6.orig/include/asm-ia64/sections.h 2007-11-27 14:43:28.456463356 -0800 +++ linux-2.6/include/asm-ia64/sections.h 2007-11-27 14:43:29.561213368 -0800 @@ -8,7 +8,8 @@ #include -extern char __per_cpu_start[], __per_cpu_end[], __phys_per_cpu_start[]; +extern char __per_cpu_load[]; +extern unsigned long __per_cpu_size; extern char __start___vtop_patchlist[], __end___vtop_patchlist[]; extern char __start___mckinley_e9_bundles[], __end___mckinley_e9_bundles[]; extern char __start___phys_stack_reg_patchlist[], __end___phys_stack_reg_patchlist[]; Index: linux-2.6/include/asm-um/common.lds.S =================================================================== --- linux-2.6.orig/include/asm-um/common.lds.S 2007-11-27 14:43:28.468463564 -0800 +++ linux-2.6/include/asm-um/common.lds.S 2007-11-27 14:47:22.736459040 -0800 @@ -48,13 +48,8 @@ __setup_end = .; } - . = ALIGN(32); - .data.percpu : { - __per_cpu_start = . ; - *(.data.percpu) - __per_cpu_end = . ; - } - + PERCPU(32) + .initcall.init : { __initcall_start = .; INITCALLS Index: linux-2.6/include/linux/percpu.h =================================================================== --- linux-2.6.orig/include/linux/percpu.h 2007-11-27 14:43:28.480463288 -0800 +++ linux-2.6/include/linux/percpu.h 2007-11-27 14:43:29.561213368 -0800 @@ -39,7 +39,7 @@ #endif #define PERCPU_ENOUGH_ROOM \ - (__per_cpu_end - __per_cpu_start + PERCPU_MODULE_RESERVE) + (__per_cpu_size + PERCPU_MODULE_RESERVE) #endif /* PERCPU_ENOUGH_ROOM */ /* Index: linux-2.6/init/main.c =================================================================== --- linux-2.6.orig/init/main.c 2007-11-27 14:43:28.492463330 -0800 +++ linux-2.6/init/main.c 2007-11-27 14:43:29.561213368 -0800 @@ -379,8 +379,8 @@ static void __init setup_per_cpu_areas(v ptr = alloc_bootmem_pages(size * nr_possible_cpus); for_each_possible_cpu(i) { - __per_cpu_offset[i] = ptr - __per_cpu_start; - memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + __per_cpu_offset[i] = (unsigned long)ptr; + memcpy(ptr, __per_cpu_load, __per_cpu_size); ptr += size; } } Index: linux-2.6/kernel/lockdep.c =================================================================== --- linux-2.6.orig/kernel/lockdep.c 2007-11-27 14:43:28.504463364 -0800 +++ linux-2.6/kernel/lockdep.c 2007-11-27 14:43:29.561213368 -0800 @@ -609,9 +609,8 @@ static int static_obj(void *obj) * percpu var? */ for_each_possible_cpu(i) { - start = (unsigned long) &__per_cpu_start + per_cpu_offset(i); - end = (unsigned long) &__per_cpu_start + PERCPU_ENOUGH_ROOM - + per_cpu_offset(i); + start = per_cpu_offset(i); + end = start + PERCPU_ENOUGH_ROOM; if ((addr >= start) && (addr < end)) return 1; Index: linux-2.6/kernel/module.c =================================================================== --- linux-2.6.orig/kernel/module.c 2007-11-27 14:43:28.516463551 -0800 +++ linux-2.6/kernel/module.c 2007-11-27 15:06:15.152119803 -0800 @@ -353,7 +353,7 @@ static void *percpu_modalloc(unsigned lo align = PAGE_SIZE; } - ptr = __per_cpu_start; + ptr = (void *)0; for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { /* Extra for alignment requirement. */ extra = ALIGN((unsigned long)ptr, align) - (unsigned long)ptr; @@ -388,7 +388,7 @@ static void *percpu_modalloc(unsigned lo static void percpu_modfree(void *freeme) { unsigned int i; - void *ptr = __per_cpu_start + block_size(pcpu_size[0]); + void *ptr = (void *)0 + block_size(pcpu_size[0]); /* First entry is core kernel percpu data. */ for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { @@ -439,7 +439,7 @@ static int percpu_modinit(void) pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated, GFP_KERNEL); /* Static in-kernel percpu data (used). */ - pcpu_size[0] = -(__per_cpu_end-__per_cpu_start); + pcpu_size[0] = -__per_cpu_size; /* Free room. */ pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0]; if (pcpu_size[1] < 0) { Index: linux-2.6/include/asm-generic/percpu.h =================================================================== --- linux-2.6.orig/include/asm-generic/percpu.h 2007-11-27 14:43:28.444463581 -0800 +++ linux-2.6/include/asm-generic/percpu.h 2007-11-27 14:43:29.565213026 -0800 @@ -42,7 +42,8 @@ extern unsigned long __per_cpu_offset[NR * Only S390 provides its own means of moving the pointer. */ #ifndef SHIFT_PTR -#define SHIFT_PTR(__p, __offset) RELOC_HIDE((__p), (__offset)) +#define SHIFT_PTR(__p, __offset) \ + ((__typeof(__p))((void *)(__p) + (__offset))) #endif /*