If a zone is empty at boot-time and then hot-added to later, it needs to run the same init code that would have been run on it at boot. This patch breaks out zone table and per-cpu-pages functions for use by the hotplug code. You can almost see all of the free_area_init_core() function on one page now. :) It also adds an if() to the wait-table init code to allow runtime allocations to call kmalloc() instead of using the bootmem allocator Signed-off-by: Dave Hansen DESC C1.1 EDESC Signed-off-by: Dave Hansen DESC C1-fix EDESC Signed-off-by: Dave Hansen Index: linux-2.6.13/mm/page_alloc.c =================================================================== --- linux-2.6.13.orig/mm/page_alloc.c 2005-08-28 16:41:01.000000000 -0700 +++ linux-2.6.13/mm/page_alloc.c 2005-08-30 12:09:37.000000000 -0700 @@ -1850,6 +1850,66 @@ void __init setup_per_cpu_pageset() #endif +static __devinit +void zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) +{ + struct pglist_data *pgdat = zone->zone_pgdat; + int i; + + /* + * The per-page waitqueue mechanism uses hashed waitqueues + * per zone. + */ + zone->wait_table_size = wait_table_size(zone_size_pages); + zone->wait_table_bits = wait_table_bits(zone->wait_table_size); + zone->wait_table = (wait_queue_head_t *) + alloc_bootmem_node(pgdat, zone->wait_table_size + * sizeof(wait_queue_head_t)); + + for(i = 0; i < zone->wait_table_size; ++i) + init_waitqueue_head(zone->wait_table + i); +} + +static __devinit void zone_pcp_init(struct zone *zone) +{ + int cpu; + unsigned long batch = zone_batchsize(zone); + + for (cpu = 0; cpu < NR_CPUS; cpu++) { +#ifdef CONFIG_NUMA + /* Early boot. Slab allocator not functional yet */ + zone->pageset[cpu] = &boot_pageset[cpu]; + setup_pageset(&boot_pageset[cpu],0); +#else + setup_pageset(zone_pcp(zone,cpu), batch); +#endif + } + printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n", + zone->name, zone->present_pages, batch); +} + +static void init_currently_empty_zone(struct zone *zone, + unsigned long zone_start_pfn, unsigned long size) +{ + const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1); + struct pglist_data *pgdat = zone->zone_pgdat; + int nid = pgdat->node_id; + + zone_wait_table_init(zone, size); + pgdat->nr_zones = zone_idx(zone) + 1; + + zone->zone_mem_map = pfn_to_page(zone_start_pfn); + zone->zone_start_pfn = zone_start_pfn; + + if ((zone_start_pfn) & (zone_required_alignment-1)) + printk(KERN_CRIT "BUG: wrong zone alignment, it will crash\n"); + + memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn); + + zone_init_free_lists(pgdat, zone, zone->spanned_pages); +} + + /* * Set up the zone data structures: * - mark all pages reserved @@ -1890,19 +1950,7 @@ static void __init free_area_init_core(s zone->temp_priority = zone->prev_priority = DEF_PRIORITY; - batch = zone_batchsize(zone); - - for (cpu = 0; cpu < NR_CPUS; cpu++) { -#ifdef CONFIG_NUMA - /* Early boot. Slab allocator not functional yet */ - zone->pageset[cpu] = &boot_pageset[cpu]; - setup_pageset(&boot_pageset[cpu],0); -#else - setup_pageset(zone_pcp(zone,cpu), batch); -#endif - } - printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n", - zone_names[j], realsize, batch); + zone_pcp_init(zone); INIT_LIST_HEAD(&zone->active_list); INIT_LIST_HEAD(&zone->inactive_list); zone->nr_scan_active = 0; @@ -1913,28 +1961,10 @@ static void __init free_area_init_core(s if (!size) continue; - /* - * The per-page waitqueue mechanism uses hashed waitqueues - * per zone. - */ - zone->wait_table_size = wait_table_size(size); - zone->wait_table_bits = - wait_table_bits(zone->wait_table_size); - zone->wait_table = (wait_queue_head_t *) - alloc_bootmem_node(pgdat, zone->wait_table_size - * sizeof(wait_queue_head_t)); - - for(i = 0; i < zone->wait_table_size; ++i) - init_waitqueue_head(zone->wait_table + i); - - pgdat->nr_zones = j+1; - - zone->zone_mem_map = pfn_to_page(zone_start_pfn); - zone->zone_start_pfn = zone_start_pfn; - memmap_init(size, nid, j, zone_start_pfn); zonetable_add(zone, nid, j, zone_start_pfn, size); + init_currently_empty_zone(zone, zone_start_pfn, size); zone_start_pfn += size;