diff -Naurp --exclude-from=/home/con/kernel/dontdiff linux-2.6.8.1-ck8/fs/fs-writeback.c linux-2.6.8.1-ck9/fs/fs-writeback.c --- linux-2.6.8.1-ck8/fs/fs-writeback.c 2004-10-02 12:28:04.222227851 +1000 +++ linux-2.6.8.1-ck9/fs/fs-writeback.c 2004-08-15 14:08:15.000000000 +1000 @@ -361,7 +361,6 @@ sync_sb_inodes(struct super_block *sb, s } spin_unlock(&inode_lock); iput(inode); - cond_resched(); spin_lock(&inode_lock); if (wbc->nr_to_write <= 0) break; @@ -421,7 +420,6 @@ restart: } spin_unlock(&sb_lock); spin_unlock(&inode_lock); - cond_resched(); } /* diff -Naurp --exclude-from=/home/con/kernel/dontdiff linux-2.6.8.1-ck8/fs/super.c linux-2.6.8.1-ck9/fs/super.c --- linux-2.6.8.1-ck8/fs/super.c 2004-10-02 12:28:04.911119814 +1000 +++ linux-2.6.8.1-ck9/fs/super.c 2004-08-15 14:08:18.000000000 +1000 @@ -317,7 +317,6 @@ void sync_supers(void) { struct super_block * sb; restart: - cond_resched(); spin_lock(&sb_lock); sb = sb_entry(super_blocks.next); while (sb != sb_entry(&super_blocks)) diff -Naurp --exclude-from=/home/con/kernel/dontdiff linux-2.6.8.1-ck8/include/linux/sched.h linux-2.6.8.1-ck9/include/linux/sched.h --- linux-2.6.8.1-ck8/include/linux/sched.h 2004-10-02 12:28:04.925117619 +1000 +++ linux-2.6.8.1-ck9/include/linux/sched.h 2004-10-02 12:28:15.375478716 +1000 @@ -416,7 +416,7 @@ struct task_struct { unsigned long policy; cpumask_t cpus_allowed; - unsigned long slice, time_slice; + unsigned int slice, time_slice; struct list_head tasks; /* diff -Naurp --exclude-from=/home/con/kernel/dontdiff linux-2.6.8.1-ck8/include/linux/swap.h linux-2.6.8.1-ck9/include/linux/swap.h --- linux-2.6.8.1-ck8/include/linux/swap.h 2004-10-02 12:28:04.925117619 +1000 +++ linux-2.6.8.1-ck9/include/linux/swap.h 2004-10-02 12:28:15.375478716 +1000 @@ -175,6 +175,7 @@ extern void swap_setup(void); extern int try_to_free_pages(struct zone **, unsigned int, unsigned int); extern int shrink_all_memory(int); extern int vm_mapped; +extern int vm_hardmaplimit; #ifdef CONFIG_MMU /* linux/mm/shmem.c */ diff -Naurp --exclude-from=/home/con/kernel/dontdiff linux-2.6.8.1-ck8/include/linux/sysctl.h linux-2.6.8.1-ck9/include/linux/sysctl.h --- linux-2.6.8.1-ck8/include/linux/sysctl.h 2004-10-02 12:28:04.926117462 +1000 +++ linux-2.6.8.1-ck9/include/linux/sysctl.h 2004-10-02 12:28:15.376478560 +1000 @@ -167,6 +167,7 @@ enum VM_BLOCK_DUMP=24, /* block dump mode */ VM_HUGETLB_GROUP=25, /* permitted hugetlb group */ VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */ + VM_HARDMAPLIMIT=27, /* Make mapped a hard limit */ }; diff -Naurp --exclude-from=/home/con/kernel/dontdiff linux-2.6.8.1-ck8/kernel/sched.c linux-2.6.8.1-ck9/kernel/sched.c --- linux-2.6.8.1-ck8/kernel/sched.c 2004-10-02 12:28:04.932116521 +1000 +++ linux-2.6.8.1-ck9/kernel/sched.c 2004-10-02 12:28:15.382477619 +1000 @@ -16,7 +16,7 @@ * by Davide Libenzi, preemptible kernel bits by Robert Love. * 2003-09-03 Interactivity tuning by Con Kolivas. * 2004-04-02 Scheduler domains code by Nick Piggin - * 2004-09-13 New staircase scheduling policy by Con Kolivas with help + * 2004-07-07 New staircase scheduling policy by Con Kolivas with help * from William Lee Irwin III, Zwane Mwaikambo & Peter Williams. */ @@ -65,12 +65,9 @@ #define MAX_USER_PRIO (USER_PRIO(MAX_PRIO)) /* - * Some helpers for time to/from microsecond. (>> 10) approximates (/ 1000) - * to avoid 64 bit division. + * Some helpers for converting nanosecond timing to jiffy resolution */ -#define NS_TO_US(TIME) ((TIME) >> 10) -#define JIFFIES_TO_US(TIME) ((TIME) * (1000000 / HZ)) -#define US_TO_JIFFIES(TIME) ((TIME) / (1000000 / HZ)) +#define NS_TO_JIFFIES(TIME) ((TIME) / (1000000000 / HZ)) int sched_compute = 0; /* @@ -78,7 +75,7 @@ int sched_compute = 0; *compute setting is reserved for dedicated computational scheduling *and has ten times larger intervals. */ -#define _RR_INTERVAL (10000) /* microseconds */ +#define _RR_INTERVAL ((10 * HZ / 1000) ? : 1) #define RR_INTERVAL() (_RR_INTERVAL * (1 + 9 * sched_compute)) #define task_hot(p, now, sd) ((now) - (p)->timestamp < (sd)->cache_hot_time) @@ -275,9 +272,9 @@ static void dec_burst(task_t *p) * slice - the duration a task runs before getting requeued at it's best * priority and has it's burst decremented. */ -static unsigned long slice(task_t *p) +static unsigned int slice(task_t *p) { - unsigned long slice = RR_INTERVAL(); + unsigned int slice = RR_INTERVAL(); if (likely(!rt_task(p) && !batch_task(p))) slice += burst(p) * RR_INTERVAL(); else if (batch_task(p)) @@ -290,9 +287,9 @@ static unsigned long slice(task_t *p) */ int sched_interactive = 1; -static unsigned long rr_interval(task_t * p) +static int rr_interval(task_t * p) { - unsigned long rr_interval = RR_INTERVAL(); + int rr_interval = RR_INTERVAL(); if (batch_task(p)) rr_interval *= 10; else if (iso_task(p)) @@ -310,8 +307,8 @@ static unsigned long rr_interval(task_t */ static int effective_prio(task_t *p) { - int prio; - unsigned long rr, full_slice, used_slice, first_slice; + int prio, rr; + unsigned int full_slice, used_slice, first_slice; unsigned int best_burst; if (rt_task(p)) return p->prio; @@ -353,25 +350,24 @@ static int effective_prio(task_t *p) */ static void recalc_task_prio(task_t *p, unsigned long long now) { - unsigned long long _sleep_time = now - p->timestamp; - unsigned long sleep_time = NS_TO_US(_sleep_time); - unsigned long rr = rr_interval(p); + unsigned long sleep_time = now - p->timestamp; + unsigned int rr = rr_interval(p); unsigned int best_burst = burst(p); - unsigned long minrun = rr * (p->burst + 1) / (best_burst + 1) ? : 1; - + unsigned int minrun = rr * (p->burst + 1) / (best_burst + 1) ? : 1; if (p->flags & PF_FORKED || (p->mm && - (p->runtime + sleep_time < minrun || + (NS_TO_JIFFIES(p->runtime + sleep_time) < minrun || ((!sched_interactive || sched_compute) && - p->runtime + sleep_time < rr)))) { - unsigned long total_run = p->totalrun + p->runtime; + NS_TO_JIFFIES(p->runtime + sleep_time) < rr)))) { + unsigned long ns_totalrun = p->totalrun + p->runtime; + unsigned long total_run = NS_TO_JIFFIES(ns_totalrun); p->flags &= ~PF_FORKED; if (p->slice - total_run < 1) { p->totalrun = 0; dec_burst(p); } else { unsigned int intervals = total_run / rr; - unsigned long remainder; - p->totalrun = total_run; + unsigned int remainder; + p->totalrun = ns_totalrun; p->slice -= intervals * rr; if (p->slice <= rr) { p->totalrun = 0; @@ -383,7 +379,7 @@ static void recalc_task_prio(task_t *p, } } } else { - if (p->totalrun > (best_burst - p->burst) * rr) + if (NS_TO_JIFFIES(p->totalrun) > (best_burst - p->burst) * rr) dec_burst(p); else if (!(p->flags & PF_UISLEEP || p->totalrun)) inc_burst(p); @@ -1772,29 +1768,6 @@ DEFINE_PER_CPU(struct kernel_stat, kstat EXPORT_PER_CPU_SYMBOL(kstat); /* - * Tasks that run out of time_slice but still have slice left get - * requeued with a lower priority && rr_interval time_slice. - */ -static void time_slice_expired(task_t *p, runqueue_t *rq) -{ - set_tsk_need_resched(p); - dequeue_task(p, rq); - p->prio = effective_prio(p); - p->time_slice = rr_interval(p); - enqueue_task(p, rq); -} - -/* - * Tasks lose burst each time they use up a full slice(). - */ -static void slice_expired(task_t *p, runqueue_t *rq) -{ - dec_burst(p); - p->slice = slice(p); - time_slice_expired(p, rq); -} - -/* * This function gets called by the timer code, with HZ frequency. * We call it with interrupts disabled. * @@ -1807,8 +1780,6 @@ void scheduler_tick(int user_ticks, int struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; runqueue_t *rq = this_rq(); task_t *p = current; - unsigned long long _decrement; - long decrement; rq->timestamp_last_tick = sched_clock(); @@ -1849,23 +1820,29 @@ void scheduler_tick(int user_ticks, int spin_lock(&rq->lock); rq->cache_ticks++; - - decrement = JIFFIES_TO_US(1); - _decrement = rq->timestamp_last_tick - p->timestamp; - _decrement = NS_TO_US(_decrement); - if (_decrement > 0 && _decrement < decrement) - decrement = _decrement; - if (p->slice > decrement && US_TO_JIFFIES(p->slice - decrement)) - p->slice -= decrement; - else { - slice_expired(p, rq); + /* + * Tasks lose burst each time they use up a full slice(). + */ + if (!--p->slice) { + set_tsk_need_resched(p); + dequeue_task(p, rq); + dec_burst(p); + p->slice = slice(p); + p->prio = effective_prio(p); + p->time_slice = rr_interval(p); + enqueue_task(p, rq); goto out_unlock; } - if (p->time_slice > decrement && - US_TO_JIFFIES(p->time_slice - decrement)) - p->time_slice -= decrement; - else { - time_slice_expired(p, rq); + /* + * Tasks that run out of time_slice but still have slice left get + * requeued with a lower priority && rr_interval time_slice. + */ + if (!--p->time_slice) { + set_tsk_need_resched(p); + dequeue_task(p, rq); + p->prio = effective_prio(p); + p->time_slice = rr_interval(p); + enqueue_task(p, rq); goto out_unlock; } if (rq->preempted && rq->cache_ticks >= cache_delay) @@ -2028,26 +2005,8 @@ need_resched: release_kernel_lock(prev); now = sched_clock(); - prev->runtime = NS_TO_US(now - prev->timestamp) ? : 1; - if (prev->mm && prev->policy != SCHED_FIFO && - prev->state == TASK_RUNNING && - prev->timestamp > rq->timestamp_last_tick) { - /* - * We have not run through a scheduler_tick and are - * still running so charge us with the runtime. - */ - if (unlikely(US_TO_JIFFIES(prev->slice - - prev->runtime) < 1)) - slice_expired(prev, rq); - else if (unlikely(US_TO_JIFFIES(prev->time_slice - - prev->runtime) < 1)) - time_slice_expired(prev, rq); - else { - prev->slice -= prev->runtime; - prev->time_slice -= prev->runtime; - } - } - prev->timestamp = now; + + prev->runtime = now - prev->timestamp; spin_lock_irq(&rq->lock); /* @@ -2101,6 +2060,7 @@ switch_tasks: prefetch(next); clear_tsk_need_resched(prev); RCU_qsctr(task_cpu(prev))++; + prev->timestamp = now; if (next->flags & PF_YIELDED) { next->flags &= ~PF_YIELDED; dequeue_task(next, rq); @@ -2824,7 +2784,7 @@ asmlinkage long sys_sched_yield(void) dequeue_task(current, rq); current->slice = slice(current); - current->time_slice = rr_interval(current); + current->time_slice = RR_INTERVAL(); if (likely(!rt_task(current) && !batch_task(current))) { current->flags |= PF_YIELDED; current->prio = MAX_PRIO - 2; diff -Naurp --exclude-from=/home/con/kernel/dontdiff linux-2.6.8.1-ck8/kernel/sysctl.c linux-2.6.8.1-ck9/kernel/sysctl.c --- linux-2.6.8.1-ck8/kernel/sysctl.c 2004-10-02 12:28:04.933116364 +1000 +++ linux-2.6.8.1-ck9/kernel/sysctl.c 2004-10-02 12:28:15.383477462 +1000 @@ -727,6 +727,14 @@ static ctl_table vm_table[] = { .extra1 = &zero, .extra2 = &one_hundred, }, + { + .ctl_name = VM_HARDMAPLIMIT, + .procname = "hardmaplimit", + .data = &vm_hardmaplimit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #ifdef CONFIG_HUGETLB_PAGE { .ctl_name = VM_HUGETLB_PAGES, diff -Naurp --exclude-from=/home/con/kernel/dontdiff linux-2.6.8.1-ck8/Makefile linux-2.6.8.1-ck9/Makefile --- linux-2.6.8.1-ck8/Makefile 2004-10-02 12:28:04.934116207 +1000 +++ linux-2.6.8.1-ck9/Makefile 2004-10-02 12:28:15.384477305 +1000 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 8 -EXTRAVERSION = .1-ck8 +EXTRAVERSION = .1-ck9 NAME=Zonked Quokka # *DOCUMENTATION* diff -Naurp --exclude-from=/home/con/kernel/dontdiff linux-2.6.8.1-ck8/mm/vmscan.c linux-2.6.8.1-ck9/mm/vmscan.c --- linux-2.6.8.1-ck8/mm/vmscan.c 2004-10-02 12:28:04.941115110 +1000 +++ linux-2.6.8.1-ck9/mm/vmscan.c 2004-10-02 12:28:15.391476208 +1000 @@ -116,6 +116,7 @@ struct shrinker { #endif int vm_mapped = 66; +int vm_hardmaplimit = 1; static long total_memory; static LIST_HEAD(shrinker_list); @@ -645,6 +646,7 @@ refill_inactive_zone(struct zone *zone, int pgdeactivate = 0; int pgscanned = 0; int nr_pages = sc->nr_to_scan; + unsigned int mapped_ratio; LIST_HEAD(l_hold); /* The pages which were snipped off */ LIST_HEAD(l_inactive); /* Pages to go onto the inactive_list */ LIST_HEAD(l_active); /* Pages to go onto the active_list */ @@ -679,13 +681,16 @@ refill_inactive_zone(struct zone *zone, zone->nr_active -= pgmoved; spin_unlock_irq(&zone->lru_lock); + mapped_ratio = (sc->nr_mapped * 100) / total_memory; + while (!list_empty(&l_hold)) { page = lru_to_page(&l_hold); list_del(&page->lru); if (page_mapped(page)) { - if (zone->zone_pgdat->mapped_nrpages) { - list_add(&page->lru, &l_active); - continue; + if (zone->zone_pgdat->mapped_nrpages || + (vm_hardmaplimit && mapped_ratio < vm_mapped)) { + list_add(&page->lru, &l_active); + continue; } page_map_lock(page); if (page_referenced(page)) {