cpu_alloc: Allow cpu allocations early in boot. Add a new function boot_cpu_alloc(size) that allows allocations of larger areas during early boot. The call uses the bootmem allocator and Signed-off-by: Christoph Lameter --- include/linux/cpu_alloc.h | 6 +++ mm/cpu_alloc.c | 87 ++++++++++++++++++++++++++++++++-------------- 2 files changed, 67 insertions(+), 26 deletions(-) Index: linux-2.6/include/linux/cpu_alloc.h =================================================================== --- linux-2.6.orig/include/linux/cpu_alloc.h 2007-11-13 14:08:01.234749949 -0800 +++ linux-2.6/include/linux/cpu_alloc.h 2007-11-13 14:21:40.523500204 -0800 @@ -52,4 +52,10 @@ void *cpu_alloc(unsigned long size, gfp_t gfp, unsigned long align); void cpu_free(void *cpu_pointer, unsigned long size); +/* + * Early boot allocator for per_cpu variables and special per cpu areas. + * Allocations are not tracked and cannot be freed. + */ +void *boot_cpu_alloc(unsigned long size); + #endif /* _LINUX_CPU_ALLOC_H_ */ Index: linux-2.6/mm/cpu_alloc.c =================================================================== --- linux-2.6.orig/mm/cpu_alloc.c 2007-11-13 14:08:01.547579000 -0800 +++ linux-2.6/mm/cpu_alloc.c 2007-11-13 16:42:48.202519672 -0800 @@ -33,10 +33,20 @@ #define UNITS_PER_BLOCK (ALLOC_SIZE / UNIT_SIZE) /* + * How many units are needed for an object of a given size + */ +static int size_to_units(unsigned long size) +{ + return DIV_ROUND_UP(size, UNIT_SIZE); +} + +/* * Lock to protect the bitmap and the meta data for the cpu allocator. */ static DEFINE_SPINLOCK(cpu_alloc_map_lock); +#define BOOT_ALLOC (1 << __GFP_BITS_SHIFT) + #ifdef CONFIG_CPU_AREA_VIRTUAL /* @@ -58,20 +68,23 @@ static DEFINE_SPINLOCK(cpu_alloc_map_loc static unsigned long *cpu_alloc_map = NULL; static int cpu_alloc_map_order = -1; /* Size of the bitmap in page order */ static unsigned long active_blocks; /* Number of block allocated on each cpu */ -static unsigned long units_free; /* Number of available units */ static unsigned long units_total; /* Total units that are managed */ - +static unsigned long units_reserved; /* Units reserved by boot allocations */ /* * Allocate a block of memory to be used to provide cpu area memory * or to extend the bitmap for the cpu map. */ void *cpu_area_alloc_block(unsigned long size, gfp_t flags, int node) { - struct page *page = alloc_pages_node(node, + if (!(flags & BOOT_ALLOC)) { + struct page *page = alloc_pages_node(node, flags, get_order(size)); - if (page) - return page_address(page); - return NULL; + if (page) + return page_address(page); + return NULL; + } else + return __alloc_bootmem_node(NODE_DATA(node), size, size, + __pa(MAX_DMA_ADDRESS)); } pte_t *cpu_area_pte_populate(pmd_t *pmd, unsigned long addr, @@ -192,8 +205,10 @@ static int expand_cpu_area(gfp_t flags) /* * Determine the size of the bit map needed */ - bits = (blocks + 1) * UNITS_PER_BLOCK; + bits = (blocks + 1) * UNITS_PER_BLOCK - units_reserved; + map_order = get_order(DIV_ROUND_UP(bits, 8)); + BUG_ON(map_order >= MAX_ORDER); start = cpu_area + \ (blocks << (PAGE_SHIFT + CONFIG_CPU_AREA_ALLOC_ORDER)); @@ -243,12 +258,34 @@ static int expand_cpu_area(gfp_t flags) active_blocks++; units_total += UNITS_PER_BLOCK; - units_free += UNITS_PER_BLOCK; err = 0; out: return err; } +void * __init boot_cpu_alloc(unsigned long size) +{ + unsigned long flags; + unsigned long x = units_reserved; + unsigned long units = size_to_units(size); + + /* + * Locking is really not necessary during boot + * but expand_cpu_area() unlocks and relocks. + * If we do not perform locking here then + * + * 1. The cpu_alloc_map_lock is locked when + * we exit boot causing a hang on the next cpu_alloc(). + * 2. lockdep will get upset if we do not consistently + * handle things. + */ + spin_lock_irqsave(&cpu_alloc_map_lock, flags); + while (units_reserved + units > units_total) + expand_cpu_area(BOOT_ALLOC); + spin_unlock_irqrestore(&cpu_alloc_map_lock, flags); + units_reserved += units; + return cpu_area + x * UNIT_SIZE; +} #else /* @@ -261,7 +298,6 @@ out: static u8 cpu_area[NR_CPUS * ALLOC_SIZE]; static DECLARE_BITMAP(cpu_alloc_map, UNITS_PER_BLOCK); -static int units_free = UNITS_PER_BLOCK; #define cpu_alloc_map_order CONFIG_CPU_AREA_ORDER #define units_total UNITS_PER_BLOCK @@ -269,17 +305,18 @@ static inline int expand_cpu_area(gfp_t { return -ENOSYS; } -#endif - -static int first_free; /* First known free unit */ -/* - * How many units are needed for an object of a given size - */ -static int size_to_units(unsigned long size) +void * __init boot_cpu_alloc(unsigned long size) { - return DIV_ROUND_UP(size, UNIT_SIZE); + unsigned long x = units_reserved; + + units_reserved += size_to_units(size); + BUG_ON(units_reserved > units_total); + return cpu_area + x * UNIT_SIZE; } +#endif + +static int first_free; /* First known free unit */ /* * Mark an object as used in the cpu_alloc_map @@ -333,7 +370,7 @@ restart: if (first) first_free = start; - if (start >= units_total) { + if (start >= units_total - units_reserved) { if (expand_cpu_area(gfpflags)) goto out_of_memory; goto restart; @@ -343,7 +380,7 @@ restart: * Check alignment and that there is enough space after * the starting unit. */ - if (start % (align / UNIT_SIZE) == 0 && + if ((start + units_reserved) % (align / UNIT_SIZE) == 0 && find_next_bit(cpu_alloc_map, map_size, start + 1) >= start + units) break; @@ -354,18 +391,17 @@ restart: if (first) first_free = start + units; - while (start + units > units_total) { + while (start + units > units_total - units_reserved) { if (expand_cpu_area(gfpflags)) goto out_of_memory; } set_map(start, units); - units_free -= units; __count_vm_events(CPU_BYTES, units * UNIT_SIZE); spin_unlock_irqrestore(&cpu_alloc_map_lock, flags); - ptr = cpu_area + start * UNIT_SIZE; + ptr = cpu_area + (start + units_reserved) * UNIT_SIZE; if (gfpflags & __GFP_ZERO) { int cpu; @@ -393,15 +429,14 @@ void cpu_free(void *start, unsigned long u8 *p = start; unsigned long flags; - BUG_ON(p < cpu_area); - index = (p - cpu_area) / UNIT_SIZE; + BUG_ON(p < (cpu_area + units_reserved * UNIT_SIZE)); + index = (p - cpu_area) / UNIT_SIZE - units_reserved; BUG_ON(!test_bit(index, cpu_alloc_map) || - index >= units_total); + index >= units_total - units_reserved); spin_lock_irqsave(&cpu_alloc_map_lock, flags); clear_map(index, units); - units_free += units; __count_vm_events(CPU_BYTES, -units * UNIT_SIZE); if (index < first_free) first_free = index;