Index: linux/mm/page_alloc.c =================================================================== --- linux.orig/mm/page_alloc.c 2004-10-04 15:11:58.000000000 -0700 +++ linux/mm/page_alloc.c 2004-10-12 07:48:39.000000000 -0700 @@ -43,6 +43,9 @@ int numnodes = 1; EXPORT_SYMBOL(numnodes); int sysctl_lower_zone_protection = 0; +#ifdef CONFIG_NUMA +int sysctl_node_swap = 0; +#endif EXPORT_SYMBOL(totalram_pages); EXPORT_SYMBOL(nr_swap_pages); @@ -639,7 +642,16 @@ page = buffered_rmqueue(z, order, cold); if (page) { zone_statistics(zonelist, z); - goto got_pg; +#ifdef CONFIG_NUMA + /* + * If zone allocation leaves less than a (sysctl_node_swap * 10) % + * of the zone free then invoke kswapd. + * (to make it efficient we do (pages * sysctl_node_swap) / 1024)) + */ + if (z->free_pages < (z->present_pages * sysctl_node_swap) << 10) + wakeup_kswapd(z); +#endif + goto got_pg; } } min += z->pages_low * sysctl_lower_zone_protection; Index: linux/kernel/sysctl.c =================================================================== --- linux.orig/kernel/sysctl.c 2004-10-04 15:12:01.000000000 -0700 +++ linux/kernel/sysctl.c 2004-10-12 07:45:41.000000000 -0700 @@ -71,6 +71,9 @@ extern int printk_ratelimit_burst; extern int shm_use_hugepages; extern int mmap_use_hugepages, mmap_hugepages_map_sz; +#ifdef CONFIG_NUMA +extern int sysctl_node_swap; +#endif /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ static int maxolduid = 65535; @@ -890,6 +893,16 @@ .mode = 0644, .proc_handler = &proc_dointvec, }, +#ifdef CONFIG_NUMA + { + .ctl_name = VM_NODE_SWAP, + .procname = "node_swap", + .data = &sysctl_node_swap, + .maxlen = sizeof(sysctl_node_swap), + .mode = 0644, + .proc_handler = &proc_dointvec + }, +#endif { .ctl_name = 0 } }; Index: linux/include/linux/sysctl.h =================================================================== --- linux.orig/include/linux/sysctl.h 2004-10-04 15:12:01.000000000 -0700 +++ linux/include/linux/sysctl.h 2004-10-12 07:36:40.000000000 -0700 @@ -175,6 +175,7 @@ VM_BLOCK_DUMP=24, /* block dump mode */ VM_DISABLE_CAP_MLOCK=25,/* disable CAP_IPC_LOCK checking */ VM_HEAP_STACK_GAP=26, /* int: page gap between heap and stack */ + VM_NODE_SWAP=27, /* Swap local node memory limit (in % *10) */ }; Index: linux/mm/vmscan.c =================================================================== --- linux.orig/mm/vmscan.c 2004-10-11 16:36:17.000000000 -0700 +++ linux/mm/vmscan.c 2004-10-12 07:35:30.000000000 -0700 @@ -1122,7 +1122,9 @@ */ void wakeup_kswapd(struct zone *zone) { - if (zone->free_pages > zone->pages_low) + extern int sysctl_node_swap; + + if (zone->free_pages > (zone->present_pages * sysctl_node_swap) << 10 && zone->free_pages > zone->pages_low) return; if (!waitqueue_active(&zone->zone_pgdat->kswapd_wait)) return;