From: Christoph Lameter <clameter@engr.sgi.com>

Add some comments to explain how zone reclaim works.  And it fixes the
following issues:

- PF_SWAPWRITE needs to be set for RECLAIM_SWAP to be able to write
  out pages to swap. Currently RECLAIM_SWAP may not do that.

- remove setting nr_reclaimed pages after slab reclaim since the slab shrinking
  code does not use that and the nr_reclaimed pages is just right for the
  intended follow up action.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 mm/vmscan.c |   18 ++++++++++++++----
 1 files changed, 14 insertions(+), 4 deletions(-)

diff -puN mm/vmscan.c~zone_reclaim-additional-comments-and-cleanup mm/vmscan.c
--- devel/mm/vmscan.c~zone_reclaim-additional-comments-and-cleanup	2006-03-08 21:35:05.000000000 -0800
+++ devel-akpm/mm/vmscan.c	2006-03-08 21:35:05.000000000 -0800
@@ -1881,6 +1881,7 @@ int zone_reclaim_interval __read_mostly 
  */
 static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 {
+	/* Minimum pages needed in order to stay on node */
 	const unsigned long nr_pages = 1 << order;
 	struct task_struct *p = current;
 	struct reclaim_state reclaim_state;
@@ -1918,9 +1919,12 @@ static int __zone_reclaim(struct zone *z
 
 	if (nr_reclaimed < nr_pages && (zone_reclaim_mode & RECLAIM_SLAB)) {
 		/*
-		 * shrink_slab does not currently allow us to determine
-		 * how many pages were freed in the zone. So we just
-		 * shake the slab and then go offnode for a single allocation.
+		 * shrink_slab() does not currently allow us to determine how
+		 * many pages were freed in this zone. So we just shake the slab
+		 * a bit and then go off node for this particular allocation
+		 * despite possibly having freed enough memory to allocate in
+		 * this zone.  If we freed local memory then the next
+		 * allocations will be local again.
 		 *
 		 * shrink_slab will free memory on all zones and may take
 		 * a long time.
@@ -1931,8 +1935,14 @@ static int __zone_reclaim(struct zone *z
 	p->reclaim_state = NULL;
 	current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE);
 
-	if (nr_reclaimed == 0)
+	if (nr_reclaimed == 0) {
+		/*
+		 * We were unable to reclaim enough pages to stay on node.  We
+		 * now allow off node accesses for a certain time period before
+		 * trying again to reclaim pages from the local zone.
+		 */
 		zone->last_unsuccessful_zone_reclaim = jiffies;
+	}
 
 	return nr_reclaimed >= nr_pages;
 }
_