From: Wu Fengguang Collect info about the global available memory and its consumption speed. The data are used by the stateful method to estimate the thrashing threshold. They are the decisive factor of the correctness/accuracy of the resulting read-ahead size. The accountings are done on a per-node basis. On NUMA systems, it works for the two common real-world schemes: - the reader process allocates caches in a node affined manner; - the reader process allocates caches _balancely_ from a set of nodes. [clameter@sgi.com: Apply type enum zone_type] [clameter@sgi.com: ZVC writeback: Fix mm and other issues] Signed-off-by: Wu Fengguang Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 3 +++ mm/page_alloc.c | 31 +++++++++++++++++++++++++++++++ mm/vmscan.c | 1 + 3 files changed, 35 insertions(+) diff -puN include/linux/mmzone.h~readahead-state-based-method-aging-accounting include/linux/mmzone.h --- a/include/linux/mmzone.h~readahead-state-based-method-aging-accounting +++ a/include/linux/mmzone.h @@ -251,6 +251,7 @@ struct zone { unsigned long nr_scan_active; unsigned long nr_scan_inactive; unsigned long pages_scanned; /* since last reclaim */ + unsigned long total_scanned; /* accumulated, may overflow */ int all_unreclaimable; /* All pages pinned */ /* A count of how many reclaimers are scanning this zone */ @@ -495,6 +496,8 @@ typedef struct pglist_data { void get_zone_counts(unsigned long *active, unsigned long *inactive, unsigned long *free); +unsigned long nr_free_inactive_pages_node(int nid); +unsigned long nr_scanned_pages_node(int nid); void build_all_zonelists(void); void wakeup_kswapd(struct zone *zone, int order); int zone_watermark_ok(struct zone *z, int order, unsigned long mark, diff -puN mm/page_alloc.c~readahead-state-based-method-aging-accounting mm/page_alloc.c --- a/mm/page_alloc.c~readahead-state-based-method-aging-accounting +++ a/mm/page_alloc.c @@ -1818,6 +1818,37 @@ unsigned int nr_free_pagecache_pages(voi return nr_free_zone_pages(gfp_zone(GFP_HIGH_MOVABLE)); } +/* + * Amount of free+inactive RAM in a node. + */ +unsigned long nr_free_inactive_pages_node(int nid) +{ + enum zone_type i; + unsigned long sum = node_page_state(nid, NR_FREE_PAGES) + + node_page_state(nid, NR_INACTIVE); + struct zone *zones = NODE_DATA(nid)->node_zones; + + for (i = 0; i < MAX_NR_ZONES; i++) + sum -= zones[i].pages_low; + + return sum; +} + +/* + * Accumulated scanned pages in a node. + */ +unsigned long nr_scanned_pages_node(int nid) +{ + enum zone_type i; + unsigned long sum = 0; + struct zone *zones = NODE_DATA(nid)->node_zones; + + for (i = 0; i < MAX_NR_ZONES; i++) + sum += zones[i].total_scanned; + + return sum; +} + static inline void show_node(struct zone *zone) { if (NUMA_BUILD) diff -puN mm/vmscan.c~readahead-state-based-method-aging-accounting mm/vmscan.c --- a/mm/vmscan.c~readahead-state-based-method-aging-accounting +++ a/mm/vmscan.c @@ -772,6 +772,7 @@ static unsigned long shrink_inactive_lis &page_list, &nr_scan, sc->order, 0); __mod_zone_page_state(zone, NR_INACTIVE, -nr_taken); zone->pages_scanned += nr_scan; + zone->total_scanned += nr_scan; spin_unlock_irq(&zone->lru_lock); nr_scanned += nr_scan; _