Index: linux-2.6.8.1-ck/include/linux/sched.h =================================================================== --- linux-2.6.8.1-ck.orig/include/linux/sched.h 2004-08-20 22:49:11.690735211 +1000 +++ linux-2.6.8.1-ck/include/linux/sched.h 2004-08-20 23:14:24.066307649 +1000 @@ -575,6 +575,7 @@ do { if (atomic_dec_and_test(&(tsk)->usa #define PF_FORKED 0x00400000 /* I have just forked */ #define PF_YIELDED 0x00800000 /* I have just yielded */ #define PF_UISLEEP 0x01000000 /* Uninterruptible sleep */ +#define PF_KIFLUSHD 0x02000000 /* I am kiflushd */ #ifdef CONFIG_SMP #define SCHED_LOAD_SCALE 128UL /* increase resolution of load */ Index: linux-2.6.8.1-ck/include/linux/swap.h =================================================================== --- linux-2.6.8.1-ck.orig/include/linux/swap.h 2004-08-20 22:49:11.691735051 +1000 +++ linux-2.6.8.1-ck/include/linux/swap.h 2004-08-20 23:14:24.066307649 +1000 @@ -19,6 +19,11 @@ static inline int current_is_kswapd(void return current->flags & PF_KSWAPD; } +static inline int current_is_kiflushd(void) +{ + return current->flags & PF_KIFLUSHD; +} + /* * MAX_SWAPFILES defines the maximum number of swaptypes: things which can * be swapped to. The swap type and the offset into that swap type are @@ -175,6 +180,7 @@ extern void swap_setup(void); extern int try_to_free_pages(struct zone **, unsigned int, unsigned int); extern int shrink_all_memory(int); extern int vm_swappiness; +extern int vm_flush_interval; #ifdef CONFIG_MMU /* linux/mm/shmem.c */ Index: linux-2.6.8.1-ck/include/linux/sysctl.h =================================================================== --- linux-2.6.8.1-ck.orig/include/linux/sysctl.h 2004-08-20 23:02:10.539588108 +1000 +++ linux-2.6.8.1-ck/include/linux/sysctl.h 2004-08-20 23:14:24.067307490 +1000 @@ -167,6 +167,7 @@ enum VM_BLOCK_DUMP=24, /* block dump mode */ VM_HUGETLB_GROUP=25, /* permitted hugetlb group */ VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */ + VM_FLUSH_INTERVAL=27, /* Seconds between ram flushing */ }; Index: linux-2.6.8.1-ck/kernel/sysctl.c =================================================================== --- linux-2.6.8.1-ck.orig/kernel/sysctl.c 2004-08-20 23:02:10.538588268 +1000 +++ linux-2.6.8.1-ck/kernel/sysctl.c 2004-08-20 23:14:24.068307330 +1000 @@ -727,6 +727,17 @@ static ctl_table vm_table[] = { .extra1 = &zero, .extra2 = &one_hundred, }, + { + .ctl_name = VM_FLUSH_INTERVAL, + .procname = "flush_interval", + .data = &vm_flush_interval, + .maxlen = sizeof(vm_flush_interval), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &one_hundred, + }, #ifdef CONFIG_HUGETLB_PAGE { .ctl_name = VM_HUGETLB_PAGES, Index: linux-2.6.8.1-ck/mm/vmscan.c =================================================================== --- linux-2.6.8.1-ck.orig/mm/vmscan.c 2004-08-20 23:02:10.537588428 +1000 +++ linux-2.6.8.1-ck/mm/vmscan.c 2004-08-20 23:27:04.090495329 +1000 @@ -119,7 +119,9 @@ struct shrinker { * From 0 .. 100. Higher means more swappy. */ int vm_swappiness = 33; +int vm_flush_interval = 1; static long total_memory; +static int iflush_idle; static LIST_HEAD(shrinker_list); static DECLARE_MUTEX(shrinker_sem); @@ -366,6 +368,9 @@ static int shrink_list(struct list_head goto keep_locked; sc->nr_scanned++; + + if (unlikely(current_is_kiflushd() && page_mapped(page))) + goto keep_locked; /* Double the slab pressure for mapped and swapcache pages */ if (page_mapped(page) || PageSwapCache(page)) sc->nr_scanned++; @@ -709,7 +714,7 @@ refill_inactive_zone(struct zone *zone, * Now use this metric to decide whether to start moving mapped memory * onto the inactive list. */ - if (swap_tendency >= 100) + if (swap_tendency >= 100 && !current_is_kiflushd()) reclaim_mapped = 1; while (!list_empty(&l_hold)) { @@ -1006,6 +1011,9 @@ static int balance_pgdat(pg_data_t *pgda int all_zones_ok = 1; int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ unsigned long lru_pages = 0; + + if (unlikely(current_is_kiflushd() && priority < DEF_PRIORITY)) + goto out; if (nr_pages == 0) { /* @@ -1142,13 +1150,21 @@ static int kswapd(void *p) tsk->flags |= PF_MEMALLOC|PF_KSWAPD; for ( ; ; ) { + int temp_interval; if (current->flags & PF_FREEZE) refrigerator(PF_FREEZE); prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); schedule(); finish_wait(&pgdat->kswapd_wait, &wait); - + /* + * kswapd active implies memory pressure; + * put kiflushd to sleep. + */ + temp_interval = vm_flush_interval; + vm_flush_interval = 0; + iflush_idle = 1; balance_pgdat(pgdat, 0); + vm_flush_interval = temp_interval; } return 0; } @@ -1217,6 +1233,62 @@ static int __devinit cpu_callback(struct } #endif /* CONFIG_HOTPLUG_CPU */ +static void trickle_out_memory(void) +{ + pg_data_t *pgdat; + struct reclaim_state reclaim_state = { + .reclaimed_slab = 0, + }; + + current->reclaim_state = &reclaim_state; + for_each_pgdat(pgdat) + balance_pgdat(pgdat, SWAP_CLUSTER_MAX); + current->reclaim_state = NULL; +} + +static int kiflushd(void *p) +{ + struct task_struct *tsk = current; + DEFINE_WAIT(wait); + + daemonize("kiflushd"); + set_user_nice(current, 19); + tsk->flags |= PF_MEMALLOC | PF_KIFLUSHD; + + for ( ; ; ) { + int used_ratio, interval = HZ * 60; + unsigned long total_centile, used_ram; + if (current->flags & PF_FREEZE) + refrigerator(PF_FREEZE); + set_current_state(TASK_INTERRUPTIBLE); + if (!vm_flush_interval) { + schedule_timeout(interval); + continue; + } + if (!iflush_idle) + interval = (HZ * vm_flush_interval); + schedule_timeout(interval); + total_centile = totalram_pages / 100 + 1; + used_ram = totalram_pages - nr_free_pages(); + used_ratio = used_ram / total_centile; + if (used_ratio > 66 && + read_page_state(nr_mapped) / used_ram < 67) { + /* + * Trickle out memory till there is less than + * 2/3 ram used or 2/3 of that ram is mapped - + * whichever comes first. + */ + iflush_idle = 0; + trickle_out_memory(); + continue; + } + iflush_idle = 1; + /* + * We didn't do anything so wait one minute before waking up again. + */ + } +} + static int __init kswapd_init(void) { pg_data_t *pgdat; @@ -1229,4 +1301,11 @@ static int __init kswapd_init(void) return 0; } +static int __init kiflushd_init(void) +{ + kernel_thread(kiflushd, NULL, CLONE_KERNEL); + return 0; +} + module_init(kswapd_init) +module_init(kiflushd_init)