Try to allocate node memmap near the end of node This fixes problems with very large nodes (over 128GB) filling up all of the first 4GB with their mem_map and not leaving enough space for the swiotlb. Signed-off-by: Andi Kleen --- arch/x86_64/mm/numa.c | 12 +++++++++++- include/linux/bootmem.h | 3 +++ mm/bootmem.c | 2 +- 3 files changed, 15 insertions(+), 2 deletions(-) Index: linux/arch/x86_64/mm/numa.c =================================================================== --- linux.orig/arch/x86_64/mm/numa.c +++ linux/arch/x86_64/mm/numa.c @@ -172,7 +172,7 @@ void __init setup_node_bootmem(int nodei /* Initialize final allocator for a zone */ void __init setup_node_zones(int nodeid) { - unsigned long start_pfn, end_pfn; + unsigned long start_pfn, end_pfn, memmapsize, limit; unsigned long zones[MAX_NR_ZONES]; unsigned long holes[MAX_NR_ZONES]; @@ -182,6 +182,16 @@ void __init setup_node_zones(int nodeid) Dprintk(KERN_INFO "Setting up node %d %lx-%lx\n", nodeid, start_pfn, end_pfn); + /* Try to allocate mem_map at end to not fill up precious <4GB + memory. */ + memmapsize = sizeof(struct page) * (end_pfn-start_pfn); + limit = end_pfn << PAGE_SHIFT; + NODE_DATA(nodeid)->node_mem_map = + __alloc_bootmem_core(NODE_DATA(nodeid)->bdata, + memmapsize, SMP_CACHE_BYTES, + round_down(limit - memmapsize, PAGE_SIZE), + limit); + size_zones(zones, holes, start_pfn, end_pfn); free_area_init_node(nodeid, NODE_DATA(nodeid), zones, start_pfn, holes); Index: linux/include/linux/bootmem.h =================================================================== --- linux.orig/include/linux/bootmem.h +++ linux/include/linux/bootmem.h @@ -52,6 +52,9 @@ extern void * __init __alloc_bootmem_low unsigned long size, unsigned long align, unsigned long goal); +extern void * __init __alloc_bootmem_core(struct bootmem_data *bdata, + unsigned long size, unsigned long align, unsigned long goal, + unsigned long limit); #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE extern void __init reserve_bootmem (unsigned long addr, unsigned long size); #define alloc_bootmem(x) \ Index: linux/mm/bootmem.c =================================================================== --- linux.orig/mm/bootmem.c +++ linux/mm/bootmem.c @@ -152,7 +152,7 @@ static void __init free_bootmem_core(boo * * NOTE: This function is _not_ reentrant. */ -static void * __init +void * __init __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, unsigned long align, unsigned long goal, unsigned long limit) {