I'm not sure that this is a viable general case that we'll run into all that often, at least until we start doing NUMA node hotplug. So, leave this in the testing code for now, and keep it available in case we ever need it. It's handy for testing on an x86emachine where it has <896MB of RAM, because you can boot with mem=512MB, and then add to (what was) an empty HIGHMEM zone. Signed-off-by: Dave Hansen Index: linux-2.6.13/include/linux/memory.h =================================================================== --- linux-2.6.13.orig/include/linux/memory.h 2005-08-30 12:10:12.000000000 -0700 +++ linux-2.6.13/include/linux/memory.h 2005-08-30 12:13:36.000000000 -0700 @@ -77,6 +77,7 @@ extern void unregister_memory_notifier(s #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<node_zones + ZONE_HIGHMEM; - if (zone->present_pages) { + /* + * with mem hotplug we don't increment present_pages + * until the pages are actually freed into the zone, + * but we increment spanned pages much earlier + */ + if (zone->spanned_pages) { #ifndef CONFIG_HIGHMEM BUG(); #endif @@ -1421,11 +1426,11 @@ static int __init build_zonelists_node(p } case ZONE_NORMAL: zone = pgdat->node_zones + ZONE_NORMAL; - if (zone->present_pages) + if (zone->spanned_pages) zonelist->zones[j++] = zone; case ZONE_DMA: zone = pgdat->node_zones + ZONE_DMA; - if (zone->present_pages) + if (zone->spanned_pages) zonelist->zones[j++] = zone; } @@ -1495,7 +1500,7 @@ static int __init find_next_best_node(in return best_node; } -static void __init build_zonelists(pg_data_t *pgdat) +void __devinit build_zonelists(pg_data_t *pgdat) { int i, j, k, node, local_node; int prev_node, load; @@ -1542,7 +1547,7 @@ static void __init build_zonelists(pg_da #else /* CONFIG_NUMA */ -static void __init build_zonelists(pg_data_t *pgdat) +void __devinit build_zonelists(pg_data_t *pgdat) { int i, j, k, node, local_node; @@ -1874,7 +1879,6 @@ void __init setup_per_cpu_pageset() #endif -static __devinit void zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) { int i; @@ -1897,7 +1901,7 @@ void zone_wait_table_init(struct zone *z init_waitqueue_head(zone->wait_table + i); } -static __devinit void zone_pcp_init(struct zone *zone) +void zone_pcp_init(struct zone *zone) { int cpu; unsigned long batch = zone_batchsize(zone); @@ -1920,7 +1924,6 @@ static void init_currently_empty_zone(st { const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1); struct pglist_data *pgdat = zone->zone_pgdat; - int nid = pgdat->node_id; zone_wait_table_init(zone, size); pgdat->nr_zones = zone_idx(zone) + 1; @@ -1934,6 +1937,7 @@ static void init_currently_empty_zone(st memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn); zone_init_free_lists(pgdat, zone, zone->spanned_pages); + zone->spanned_pages = size; } @@ -1946,7 +1950,7 @@ static void init_currently_empty_zone(st static void __init free_area_init_core(struct pglist_data *pgdat, unsigned long *zones_size, unsigned long *zholes_size) { - unsigned long i, j; + unsigned long j; int cpu, nid = pgdat->node_id; unsigned long zone_start_pfn = pgdat->node_start_pfn; @@ -1958,7 +1962,6 @@ static void __init free_area_init_core(s for (j = 0; j < MAX_NR_ZONES; j++) { struct zone *zone = pgdat->node_zones + j; unsigned long size, realsize; - unsigned long batch; realsize = size = zones_size[j]; if (zholes_size) @@ -1968,7 +1971,6 @@ static void __init free_area_init_core(s nr_kernel_pages += realsize; nr_all_pages += realsize; - zone->spanned_pages = size; zone->present_pages = realsize; zone->name = zone_names[j]; spin_lock_init(&zone->lock); @@ -1990,10 +1992,10 @@ static void __init free_area_init_core(s if (!size) continue; - memmap_init(size, nid, j, zone_start_pfn); zonetable_add(zone, nid, j, zone_start_pfn, size); init_currently_empty_zone(zone, zone_start_pfn, size); + //memmap_init(size, nid, j, zone_start_pfn); zone_start_pfn += size; @@ -2606,3 +2608,32 @@ void *__init alloc_large_system_hash(con return table; } + +static inline int zone_previously_initialized(struct zone *zone) +{ + if (zone->wait_table_size) + return 1; + + return 0; +} + +#ifdef CONFIG_MEMORY_HOTPLUG +int hot_add_zone_init(struct zone *zone, unsigned long phys_start_pfn, unsigned long size_pages) +{ + if (zone_previously_initialized(zone)) + return -EEXIST; + + zone_wait_table_init(zone, PAGES_PER_SECTION); + init_currently_empty_zone(zone, phys_start_pfn, PAGES_PER_SECTION); + zone_pcp_init(zone); + + /* + * FIXME: there is no locking at all for the zonelists. + * Least impactful (codewise) way to do this is probably + * to freeze all the CPUs for a sec while this is done. + */ + build_zonelists(zone->zone_pgdat); + + return 0; +} +#endif