From: Mel Gorman Arch-independent zone-sizing uses account_memmap() in an attempt to accurately account for how much memory was used in a zone by memmap. Watermarks and per-cpu sizes initialisations then take the memmap into account. However, the memmap may span multiple zones and in one case, there was an underflow causing boot failures. The fix that perfectly accounts for memory consumed by memmap is complicated with no clear benefit. The architecture-specific code in x86_64 was simpler because it approximated how much memory was consumed for memmap backing that zone regardless of where the memmap was really stored. This patch ditches the account_memmap() complexity and replaces with the simple approximation used by x86_64 while ensuring no underflow occurs. Signed-off-by: Mel Gorman Acked-by: Paul Jackson Signed-off-by: Andrew Morton --- mm/page_alloc.c | 73 +++++++++++----------------------------------- 1 files changed, 18 insertions(+), 55 deletions(-) diff -puN mm/page_alloc.c~account-for-memmap-and-optionally-the-kernel-image-as-holes-fix mm/page_alloc.c --- a/mm/page_alloc.c~account-for-memmap-and-optionally-the-kernel-image-as-holes-fix +++ a/mm/page_alloc.c @@ -2262,58 +2262,6 @@ static void __init calculate_node_totalp realtotalpages); } -#ifdef CONFIG_FLAT_NODE_MEM_MAP -/* Account for mem_map for CONFIG_FLAT_NODE_MEM_MAP */ -unsigned long __meminit account_memmap(struct pglist_data *pgdat, - int zone_index) -{ - unsigned long pages = 0; - if (zone_index == memmap_zone_idx(pgdat->node_mem_map)) { - pages = pgdat->node_spanned_pages; - pages = (pages * sizeof(struct page)) >> PAGE_SHIFT; - printk(KERN_DEBUG "%lu pages used for memmap\n", pages); - } - return pages; -} -#else -/* Account for mem_map for CONFIG_SPARSEMEM */ -unsigned long account_memmap(struct pglist_data *pgdat, int zone_index) -{ - unsigned long pages = 0; - unsigned long memmap_pfn; - struct page *memmap_addr; - int pnum; - unsigned long pgdat_startpfn, pgdat_endpfn; - struct mem_section *section; - - pgdat_startpfn = pgdat->node_start_pfn; - pgdat_endpfn = pgdat_startpfn + pgdat->node_spanned_pages; - - /* Go through valid sections looking for memmap */ - for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { - if (!valid_section_nr(pnum)) - continue; - - section = __nr_to_section(pnum); - if (!section_has_mem_map(section)) - continue; - - memmap_addr = __section_mem_map_addr(section); - memmap_pfn = (unsigned long)memmap_addr >> PAGE_SHIFT; - - if (memmap_pfn < pgdat_startpfn || memmap_pfn >= pgdat_endpfn) - continue; - - if (zone_index == memmap_zone_idx(memmap_addr)) - pages += (PAGES_PER_SECTION * sizeof(struct page)); - } - - pages >>= PAGE_SHIFT; - printk(KERN_DEBUG "%lu pages used for SPARSE memmap\n", pages); - return pages; -} -#endif - /* * Set up the zone data structures: * - mark all pages reserved @@ -2335,17 +2283,32 @@ static void __meminit free_area_init_cor for (j = 0; j < MAX_NR_ZONES; j++) { struct zone *zone = pgdat->node_zones + j; - unsigned long size, realsize; + unsigned long size, realsize, memmap_pages; size = zone_spanned_pages_in_node(nid, j, zones_size); realsize = size - zone_absent_pages_in_node(nid, j, zholes_size); - realsize -= account_memmap(pgdat, j); + /* + * Adjust realsize so that it accounts for how much memory + * is used by this zone for memmap. This affects the watermark + * and per-cpu initialisations + */ + memmap_pages = (size * sizeof(struct page)) >> PAGE_SHIFT; + if (realsize >= memmap_pages) { + realsize -= memmap_pages; + printk(KERN_DEBUG + " %s zone: %lu pages used for memmap\n", + zone_names[j], memmap_pages); + } else + printk(KERN_WARNING + " %s zone: %lu pages exceeds realsize %lu\n", + zone_names[j], memmap_pages, realsize); + /* Account for reserved DMA pages */ if (j == ZONE_DMA && realsize > dma_reserve) { realsize -= dma_reserve; - printk(KERN_DEBUG "%lu pages DMA reserved\n", + printk(KERN_DEBUG " DMA zone: %lu pages reserved\n", dma_reserve); } _