Atomic reclaim: Core piece Recognize if __GFP_WAIT is not set when try_to_free_pages is called and then disable swapping and writing for the reclaim run. Also disable slab shrinking for now. The paths are certainly not able to handle being called in interrupt disabled mode. Print some information when an atomic reclaim run is complete so that we can see what is going on. Signed-off-by: Christoph Lameter --- mm/vmscan.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) Index: linux-2.6/mm/vmscan.c =================================================================== --- linux-2.6.orig/mm/vmscan.c 2007-08-13 22:48:38.000000000 -0700 +++ linux-2.6/mm/vmscan.c 2007-08-13 22:49:01.000000000 -0700 @@ -161,6 +161,9 @@ unsigned long shrink_slab(unsigned long if (scanned == 0) scanned = SWAP_CLUSTER_MAX; + if (!(gfp_mask & __GFP_WAIT)) + return 1; + if (!down_read_trylock(&shrinker_rwsem)) return 1; /* Assume we'll be able to shrink next time */ @@ -1062,7 +1065,8 @@ static unsigned long shrink_zone(int pri } } - throttle_vm_writeout(sc->gfp_mask); + if (sc->gfp_mask & __GFP_WAIT) + throttle_vm_writeout(sc->gfp_mask); atomic_dec(&zone->reclaim_in_progress); return nr_reclaimed; @@ -1124,6 +1128,9 @@ static unsigned long shrink_zones(int pr * hope that some of these pages can be written. But if the allocating task * holds filesystem locks which prevent writeout this might not work, and the * allocation attempt will fail. + * + * The __GFP_WAIT flag has a special role. If it is cleared then we will not + * sleep and will not perform any allocations. */ unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask) { @@ -1136,15 +1143,18 @@ unsigned long try_to_free_pages(struct z int i; struct scan_control sc = { .gfp_mask = gfp_mask, - .may_writepage = !laptop_mode, .swap_cluster_max = SWAP_CLUSTER_MAX, - .may_swap = 1, .swappiness = vm_swappiness, .order = order, }; count_vm_event(ALLOCSTALL); + if (gfp_mask & __GFP_WAIT) { + sc.may_writepage = !laptop_mode; + sc.may_swap = 1; + } + for (i = 0; zones[i] != NULL; i++) { struct zone *zone = zones[i]; @@ -1171,6 +1181,9 @@ unsigned long try_to_free_pages(struct z goto out; } + if (!(gfp_mask | __GFP_WAIT)) + continue; + /* * Try to write back as many pages as we just scanned. This * tends to cause slow streaming writers to write data to the @@ -1209,6 +1222,9 @@ out: zone->prev_priority = priority; } + if (!(gfp_mask & __GFP_WAIT)) + printk(KERN_WARNING "Atomic reclaim. " + "Reclaimed %lu pages!\n", nr_reclaimed); return ret; }