X86_64: Fixed Base cpu area Put the base cpu area at a fixed location that is reachable from the kernel text segment. This will avoid a lot of offset calculation and will allow the removal of the arrays of pointers pointing to per cpu areas. The choice of location is a bit awkward right now. I stuffed it between kernel and modules. The optimal layout would be to change the arrangement of kernel text and modules would be to place it behind the modules area but that area is now at the end of the address space. Something like this: 1. Kernel Text 2. Modules 3. cpu area for processor 0 (canonical per cpu pointers) (CPU_AREA_BASE) 4. cpu area for other processors. Area 1-3 must be within 2 GB so that 32 bit offsets can reach all kernel variables. The per cpu offsets of per cpu variables can then be calculated at link time by ld instead of the current runtime calculations. Area 4 needs to be pretty large to support 16k cpus at 16M per cpu each. 256GB is needed. Maybe reserve a terabyte for this area just to be safe? Then we would need to shift the kernel and the modules area down. Signed-off-by: Christoph Lameter --- include/linux/percpu.h | 21 ++++++++++++++++++--- mm/cpu_alloc.c | 12 ++++++------ 2 files changed, 24 insertions(+), 9 deletions(-) Index: linux-2.6/mm/cpu_alloc.c =================================================================== --- linux-2.6.orig/mm/cpu_alloc.c 2007-11-18 14:38:29.665758187 -0800 +++ linux-2.6/mm/cpu_alloc.c 2007-11-18 15:11:24.936305899 -0800 @@ -50,7 +50,7 @@ static unsigned long units_reserved; /* */ #define ALLOC_SIZE (1UL << (CONFIG_CPU_AREA_ORDER + PAGE_SHIFT)) -static u8 cpu_area[NR_CPUS * ALLOC_SIZE]; +u8 cpu_area[NR_CPUS * ALLOC_SIZE]; static DECLARE_BITMAP(cpu_alloc_map, UNITS); void * __init boot_cpu_alloc(unsigned long size) @@ -59,7 +59,7 @@ void * __init boot_cpu_alloc(unsigned lo units_reserved += size_to_units(size); BUG_ON(units_reserved > UNITS); - return cpu_area + x * UNIT_SIZE; + return (void *)(x * UNIT_SIZE); } static int first_free; /* First known free unit */ @@ -139,7 +139,7 @@ void *cpu_alloc(unsigned long size, gfp_ spin_unlock_irqrestore(&cpu_alloc_map_lock, flags); - ptr = cpu_area + (start + units_reserved) * UNIT_SIZE; + ptr = (void *)((start + units_reserved) * UNIT_SIZE); if (gfpflags & __GFP_ZERO) { int cpu; @@ -164,11 +164,11 @@ void cpu_free(void *start, unsigned long { int units = size_to_units(size); int index; - u8 *p = start; + unsigned long p = (unsigned long)start; unsigned long flags; - BUG_ON(p < (cpu_area + units_reserved * UNIT_SIZE)); - index = (p - cpu_area) / UNIT_SIZE - units_reserved; + BUG_ON(p < units_reserved * UNIT_SIZE); + index = p / UNIT_SIZE - units_reserved; BUG_ON(!test_bit(index, cpu_alloc_map) || index >= UNITS - units_reserved); Index: linux-2.6/include/linux/percpu.h =================================================================== --- linux-2.6.orig/include/linux/percpu.h 2007-11-18 14:38:29.670533597 -0800 +++ linux-2.6/include/linux/percpu.h 2007-11-18 15:10:45.161306181 -0800 @@ -144,11 +144,26 @@ static inline void percpu_free(void *__p * handled from an interrupt context). */ -#define CPU_OFFSET(__cpu) \ - ((unsigned long)(__cpu) << (CONFIG_CPU_AREA_ORDER + PAGE_SHIFT)) +static inline unsigned long cpu_offset(unsigned long cpu) +{ + extern u8 cpu_area[]; + + int shift = CONFIG_CPU_AREA_ORDER + PAGE_SHIFT; + +#ifdef CONFIG_DEBUG_VM + BUG_ON(!cpu_isset(cpu, cpu_possible_map) && system_state == SYSTEM_RUNNING); + WARN_ON(!cpu_isset(cpu, cpu_online_map)); +#endif +#ifndef CPU_AREA_BASE + return (unsigned long)cpu_area + (cpu << shift); +#else + return CPU_AREA_BASE + (cpu << shift); +#endif +} + #define CPU_PTR(__p, __cpu) ((__typeof__(__p))((void *)(__p) + \ - CPU_OFFSET(__cpu))) + cpu_offset(__cpu))) #define CPU_ALLOC(type, flags) cpu_alloc(sizeof(type), flags, \ __alignof__(type))