Index: linux-2.6.8.1-ck/include/linux/mmzone.h =================================================================== --- linux-2.6.8.1-ck.orig/include/linux/mmzone.h 2004-08-15 14:08:19.000000000 +1000 +++ linux-2.6.8.1-ck/include/linux/mmzone.h 2004-08-22 14:32:23.000000000 +1000 @@ -125,7 +125,7 @@ struct zone { */ spinlock_t lock; unsigned long free_pages; - unsigned long pages_min, pages_low, pages_high; + unsigned long pages_min, pages_low, pages_high, pages_unmapped; /* * protection[] is a pre-calculated number of extra pages that must be * available in a zone in order for __alloc_pages() to allocate memory @@ -276,6 +276,7 @@ typedef struct pglist_data { struct pglist_data *pgdat_next; wait_queue_head_t kswapd_wait; struct task_struct *kswapd; + int maplimit; } pg_data_t; #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) Index: linux-2.6.8.1-ck/include/linux/swap.h =================================================================== --- linux-2.6.8.1-ck.orig/include/linux/swap.h 2004-08-22 10:58:05.000000000 +1000 +++ linux-2.6.8.1-ck/include/linux/swap.h 2004-08-22 15:26:03.000000000 +1000 @@ -174,7 +174,7 @@ extern void swap_setup(void); /* linux/mm/vmscan.c */ extern int try_to_free_pages(struct zone **, unsigned int, unsigned int); extern int shrink_all_memory(int); -extern int vm_swappiness; +extern int vm_mapped; #ifdef CONFIG_MMU /* linux/mm/shmem.c */ Index: linux-2.6.8.1-ck/include/linux/sysctl.h =================================================================== --- linux-2.6.8.1-ck.orig/include/linux/sysctl.h 2004-08-22 10:58:05.000000000 +1000 +++ linux-2.6.8.1-ck/include/linux/sysctl.h 2004-08-22 17:46:08.687087009 +1000 @@ -159,7 +159,7 @@ enum VM_OVERCOMMIT_RATIO=16, /* percent of RAM to allow overcommit in */ VM_PAGEBUF=17, /* struct: Control pagebuf parameters */ VM_HUGETLB_PAGES=18, /* int: Number of available Huge Pages */ - VM_SWAPPINESS=19, /* Tendency to steal mapped memory */ + VM_MAPPED=19, /* percent mapped min while evicting cache */ VM_LOWER_ZONE_PROTECTION=20,/* Amount of protection of lower zones */ VM_MIN_FREE_KBYTES=21, /* Minimum free kilobytes to maintain */ VM_MAX_MAP_COUNT=22, /* int: Maximum number of mmaps/address-space */ Index: linux-2.6.8.1-ck/kernel/sysctl.c =================================================================== --- linux-2.6.8.1-ck.orig/kernel/sysctl.c 2004-08-22 10:58:05.000000000 +1000 +++ linux-2.6.8.1-ck/kernel/sysctl.c 2004-08-22 17:46:08.641094173 +1000 @@ -717,10 +717,10 @@ static ctl_table vm_table[] = { .proc_handler = &proc_dointvec, }, { - .ctl_name = VM_SWAPPINESS, - .procname = "swappiness", - .data = &vm_swappiness, - .maxlen = sizeof(vm_swappiness), + .ctl_name = VM_MAPPED, + .procname = "mapped", + .data = &vm_mapped, + .maxlen = sizeof(vm_mapped), .mode = 0644, .proc_handler = &proc_dointvec_minmax, .strategy = &sysctl_intvec, Index: linux-2.6.8.1-ck/mm/page_alloc.c =================================================================== --- linux-2.6.8.1-ck.orig/mm/page_alloc.c 2004-08-22 10:58:00.000000000 +1000 +++ linux-2.6.8.1-ck/mm/page_alloc.c 2004-08-22 17:46:06.263464507 +1000 @@ -628,6 +628,9 @@ __alloc_pages(unsigned int gfp_mask, uns */ if (rt_task(p)) min -= z->pages_low >> 1; + else + if (wait && z->free_pages < z->pages_unmapped) + wakeup_kswapd(z); if (z->free_pages >= min || (!wait && z->free_pages >= z->pages_high)) { @@ -1905,6 +1908,7 @@ static void setup_per_zone_pages_min(voi zone->pages_low = zone->pages_min * 2; zone->pages_high = zone->pages_min * 3; + zone->pages_unmapped = zone->present_pages / 3; spin_unlock_irqrestore(&zone->lru_lock, flags); } } Index: linux-2.6.8.1-ck/mm/vmscan.c =================================================================== --- linux-2.6.8.1-ck.orig/mm/vmscan.c 2004-08-22 10:58:07.000000000 +1000 +++ linux-2.6.8.1-ck/mm/vmscan.c 2004-08-22 18:28:29.565723150 +1000 @@ -115,10 +115,7 @@ struct shrinker { #define prefetchw_prev_lru_page(_page, _base, _field) do { } while (0) #endif -/* - * From 0 .. 100. Higher means more swappy. - */ -int vm_swappiness = 60; +int vm_mapped = 66; static long total_memory; static LIST_HEAD(shrinker_list); @@ -338,7 +335,8 @@ static pageout_t pageout(struct page *pa /* * shrink_list adds the number of reclaimed pages to sc->nr_reclaimed */ -static int shrink_list(struct list_head *page_list, struct scan_control *sc) +static int shrink_list(struct list_head *page_list, struct scan_control *sc, + int maplimit) { LIST_HEAD(ret_pages); struct pagevec freed_pvec; @@ -366,6 +364,8 @@ static int shrink_list(struct list_head goto keep_locked; sc->nr_scanned++; + if (maplimit && page_mapped(page)) + goto keep_locked; /* Double the slab pressure for mapped and swapcache pages */ if (page_mapped(page) || PageSwapCache(page)) sc->nr_scanned++; @@ -543,6 +543,7 @@ static void shrink_cache(struct zone *zo LIST_HEAD(page_list); struct pagevec pvec; int max_scan = sc->nr_to_scan; + int maplimit = 0; pagevec_init(&pvec, 1); @@ -584,11 +585,12 @@ static void shrink_cache(struct zone *zo goto done; max_scan -= nr_scan; - if (current_is_kswapd()) + if (current_is_kswapd()) { mod_page_state_zone(zone, pgscan_kswapd, nr_scan); - else + maplimit = zone->zone_pgdat->maplimit; + } else mod_page_state_zone(zone, pgscan_direct, nr_scan); - nr_freed = shrink_list(&page_list, sc); + nr_freed = shrink_list(&page_list, sc, maplimit); if (current_is_kswapd()) mod_page_state(kswapd_steal, nr_freed); mod_page_state_zone(zone, pgsteal, nr_freed); @@ -648,10 +650,6 @@ refill_inactive_zone(struct zone *zone, LIST_HEAD(l_active); /* Pages to go onto the active_list */ struct page *page; struct pagevec pvec; - int reclaim_mapped = 0; - long mapped_ratio; - long distress; - long swap_tendency; lru_add_drain(); pgmoved = 0; @@ -681,42 +679,11 @@ refill_inactive_zone(struct zone *zone, zone->nr_active -= pgmoved; spin_unlock_irq(&zone->lru_lock); - /* - * `distress' is a measure of how much trouble we're having reclaiming - * pages. 0 -> no problems. 100 -> great trouble. - */ - distress = 100 >> zone->prev_priority; - - /* - * The point of this algorithm is to decide when to start reclaiming - * mapped memory instead of just pagecache. Work out how much memory - * is mapped. - */ - mapped_ratio = (sc->nr_mapped * 100) / total_memory; - - /* - * Now decide how much we really want to unmap some pages. The mapped - * ratio is downgraded - just because there's a lot of mapped memory - * doesn't necessarily mean that page reclaim isn't succeeding. - * - * The distress ratio is important - we don't want to start going oom. - * - * A 100% value of vm_swappiness overrides this algorithm altogether. - */ - swap_tendency = mapped_ratio / 2 + distress + vm_swappiness; - - /* - * Now use this metric to decide whether to start moving mapped memory - * onto the inactive list. - */ - if (swap_tendency >= 100) - reclaim_mapped = 1; - while (!list_empty(&l_hold)) { page = lru_to_page(&l_hold); list_del(&page->lru); if (page_mapped(page)) { - if (!reclaim_mapped) { + if (zone->zone_pgdat->maplimit) { list_add(&page->lru, &l_active); continue; } @@ -981,7 +948,7 @@ out: * the page allocator fallback scheme to ensure that aging of pages is balanced * across the zones. */ -static int balance_pgdat(pg_data_t *pgdat, int nr_pages) +static int balance_pgdat(pg_data_t *pgdat, int nr_pages, int maplimit) { int to_free = nr_pages; int priority; @@ -1006,7 +973,15 @@ static int balance_pgdat(pg_data_t *pgda int all_zones_ok = 1; int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ unsigned long lru_pages = 0; + int mapped_ratio = sc.nr_mapped * 100 / total_memory; + if (maplimit && (mapped_ratio > vm_mapped || + priority < DEF_PRIORITY)) + /* + * Evict pages only if mapped_ratio < vm_mapped and + * only do low priority scanning. + */ + goto out; if (nr_pages == 0) { /* * Scan in the highmem->dma direction for the highest @@ -1019,10 +994,13 @@ static int balance_pgdat(pg_data_t *pgda priority != DEF_PRIORITY) continue; - if (zone->free_pages <= zone->pages_high) { - end_zone = i; - goto scan; + if (zone->free_pages <= zone->pages_high || + (maplimit && zone->free_pages <= + zone->pages_unmapped)) { + end_zone = i; + goto scan; } + } goto out; } else { @@ -1148,7 +1126,7 @@ static int kswapd(void *p) schedule(); finish_wait(&pgdat->kswapd_wait, &wait); - balance_pgdat(pgdat, 0); + balance_pgdat(pgdat, 0, pgdat->maplimit); } return 0; } @@ -1158,11 +1136,14 @@ static int kswapd(void *p) */ void wakeup_kswapd(struct zone *zone) { - if (zone->free_pages > zone->pages_low) + if (zone->free_pages > zone->pages_unmapped) return; + if (zone->free_pages > zone->pages_low) + zone->zone_pgdat->maplimit = 1; if (!waitqueue_active(&zone->zone_pgdat->kswapd_wait)) return; wake_up_interruptible(&zone->zone_pgdat->kswapd_wait); + zone->zone_pgdat->maplimit = 0; } #ifdef CONFIG_PM @@ -1182,7 +1163,7 @@ int shrink_all_memory(int nr_pages) current->reclaim_state = &reclaim_state; for_each_pgdat(pgdat) { int freed; - freed = balance_pgdat(pgdat, nr_to_free); + freed = balance_pgdat(pgdat, nr_to_free, 0); ret += freed; nr_to_free -= freed; if (nr_to_free <= 0)