X86_64: Fixed Base cpu area Put the base cpu area at a fixed location that is reachable from the kernel text segment. This will avoid a lot of offset calculation and will allow the removal of the arrays of pointers pointing to per cpu areas. The choice of location is a bit awkward right now. I stuffed it between kernel and modules. The optimal layout would be to change the arrangement of kernel text and modules would be to place it behind the modules area but that area is now at the end of the address space. Something like this: 1. Kernel Text 2. Modules 3. cpu area for processor 0 (canonical per cpu pointers) (CPU_AREA_BASE) 4. cpu area for other processors. Area 1-3 must be within 2 GB so that 32 bit offsets can reach all kernel variables. The per cpu offsets of per cpu variables can then be calculated at link time by ld instead of the current runtime calculations. Area 4 needs to be pretty large to support 16k cpus at 16M per cpu each. 256GB is needed. Maybe reserve a terabyte for this area just to be safe? Then we would need to shift the kernel and the modules area down. Signed-off-by: Christoph Lameter --- arch/x86/Kconfig | 3 +-- arch/x86/kernel/setup64.c | 2 +- arch/x86/kernel/vmlinux_64.lds.S | 4 +++- include/asm-x86/pgtable_64.h | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) Index: linux-2.6/arch/x86/kernel/vmlinux_64.lds.S =================================================================== --- linux-2.6.orig/arch/x86/kernel/vmlinux_64.lds.S 2007-11-17 22:10:08.584850535 -0800 +++ linux-2.6/arch/x86/kernel/vmlinux_64.lds.S 2007-11-17 22:11:06.035850454 -0800 @@ -6,6 +6,7 @@ #include #include +#include #undef i386 /* in case the preprocessor is a 32bit one */ @@ -16,6 +17,7 @@ jiffies_64 = jiffies; _proxy_pda = 1; PHDRS { text PT_LOAD FLAGS(5); /* R_E */ + percpu PT_LOAD FLAGS(4); data PT_LOAD FLAGS(7); /* RWE */ user PT_LOAD FLAGS(7); /* RWE */ data.init PT_LOAD FLAGS(7); /* RWE */ @@ -203,7 +205,7 @@ SECTIONS __initramfs_end = .; #endif - PERCPU(4096) + FIXED_ADDR_PERCPU(CPU_AREA_BASE, 4096) . = ALIGN(4096); __init_end = .; Index: linux-2.6/include/asm-x86/pgtable_64.h =================================================================== --- linux-2.6.orig/include/asm-x86/pgtable_64.h 2007-11-17 22:10:54.185350342 -0800 +++ linux-2.6/include/asm-x86/pgtable_64.h 2007-11-17 22:11:06.035850454 -0800 @@ -138,7 +138,7 @@ static inline pte_t ptep_get_and_clear_f #define VMALLOC_START _AC(0xffffc20000000000, UL) #define VMALLOC_END _AC(0xffffe1ffffffffff, UL) #define VMEMMAP_START _AC(0xffffe20000000000, UL) -#define CPU_AREA_BASE _AC(0xfffff20000000000, UL) +#define CPU_AREA_BASE _AC(0xffffffff84000000, UL) #define MODULES_VADDR _AC(0xffffffff88000000, UL) #define MODULES_END _AC(0xfffffffffff00000, UL) #define MODULES_LEN (MODULES_END - MODULES_VADDR) Index: linux-2.6/arch/x86/kernel/setup64.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/setup64.c 2007-11-17 22:10:58.303600242 -0800 +++ linux-2.6/arch/x86/kernel/setup64.c 2007-11-17 22:11:06.039850319 -0800 @@ -111,7 +111,7 @@ void __init setup_per_cpu_areas(void) */ cpu_pda(i)->data_offset = base_for_cpu - __per_cpu_start; - memcpy(base_for_cpu, __per_cpu_start, __per_cpu_end - __per_cpu_start); + memcpy(base_for_cpu, __load_per_cpu_start, __per_cpu_end - __per_cpu_start); pda_for_cpu = &per_cpu(pda, i); /* Relocate the pda */ Index: linux-2.6/arch/x86/Kconfig =================================================================== --- linux-2.6.orig/arch/x86/Kconfig 2007-11-17 22:10:54.185350342 -0800 +++ linux-2.6/arch/x86/Kconfig 2007-11-17 22:11:08.659850282 -0800 @@ -170,8 +170,7 @@ config CPU_AREA_VIRTUAL config CPU_AREA_ORDER int - default "16" if X86_64 - default "6" if X86_32 + default "6" config CPU_AREA_ALLOC_ORDER int