Optimize off node performance of zone reclaim This patch insures that the performance of off node pages stays the same as before. Off node pagefault tests showed an 18% drop in performance without this patch. - Increase the timeout to 30 seconds to reduce the overhead. - Move all code possible out of the off node hot path for zone reclaim (Sorry Andrew, the struct initialization had to be sacrificed). The read_page_state() bit us there. - Check first for the timeout before any other checks. Signed-off-by: Christoph Lameter Index: linux-2.6.16-rc1-mm1/mm/vmscan.c =================================================================== --- linux-2.6.16-rc1-mm1.orig/mm/vmscan.c 2006-01-18 13:42:05.000000000 -0800 +++ linux-2.6.16-rc1-mm1/mm/vmscan.c 2006-01-19 11:12:31.000000000 -0800 @@ -1826,24 +1826,20 @@ int zone_reclaim_mode __read_mostly; /* * Mininum time between zone reclaim scans */ -#define ZONE_RECLAIM_INTERVAL HZ/2 +#define ZONE_RECLAIM_INTERVAL 30*HZ /* * Try to free up some pages from this zone through reclaim. */ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) { - int nr_pages = 1 << order; + int nr_pages; struct task_struct *p = current; struct reclaim_state reclaim_state; - struct scan_control sc = { - .gfp_mask = gfp_mask, - .may_writepage = 0, - .may_swap = 0, - .nr_mapped = read_page_state(nr_mapped), - .nr_scanned = 0, - .nr_reclaimed = 0, - .priority = 0 - }; + struct scan_control sc; + + if (time_before(jiffies, + zone->last_unsuccessful_zone_reclaim + ZONE_RECLAIM_INTERVAL)) + return 0; if (!(gfp_mask & __GFP_WAIT) || zone->zone_pgdat->node_id != numa_node_id() || @@ -1851,12 +1847,17 @@ int zone_reclaim(struct zone *zone, gfp_ atomic_read(&zone->reclaim_in_progress) > 0) return 0; - if (time_before(jiffies, - zone->last_unsuccessful_zone_reclaim + ZONE_RECLAIM_INTERVAL)) - return 0; + sc.may_writepage = 0; + sc.may_swap = 0; + sc.nr_scanned = 0; + sc.nr_reclaimed = 0; + sc.priority = 0; + sc.nr_mapped = read_page_state(nr_mapped); + sc.gfp_mask = gfp_mask; disable_swap_token(); + nr_pages = 1 << order; if (nr_pages > SWAP_CLUSTER_MAX) sc.swap_cluster_max = nr_pages; else