From: Ashwin Chaugule Instead of using TIMEOUT as a parameter to transfer the token, I think a better solution is to hand it over to a process that proves its eligibilty. What my scheme does, is to find out how frequently a process is calling these functions. The processes that call these more frequently get a higher priority. The idea is to guarantee that a high priority process gets the token. The priority of a process is determined by the number of consecutive calls to swap-in and no-page. I mean "consecutive" not from the scheduler point of view, but from the process point of view. In other words, if the task called these functions every time it was scheduled, it means it is not getting any further with its execution. This way, its a matter of simple comparison of task priorities, to decide whether to transfer the token or not. I did some testing with the two patches combined and the results are as follows: Current Upstream implementation: =============================== root@ashbert:~/crap# time ./qsbench -n 9000000 -p 3 -s 1420300 seed = 1420300 seed = 1420300 seed = 1420300 real 3m40.124s user 0m12.060s sys 0m0.940s -------------reboot----------------- With my implementation : ======================== root@ashbert:~/crap# time ./qsbench -n 9000000 -p 3 -s 1420300 seed = 1420300 seed = 1420300 seed = 1420300 real 2m58.708s user 0m11.880s sys 0m1.070s Kernel build ------------ mem=64M Upstream: 2.6.18 make -j 4 vmlinux real 31m26.021s user 4m32.140s sys 0m23.340s ------------------ My patch: real 27m42.984s user 4m33.800s sys 0m22.080s Signed-off-by: Ashwin Chaugule Cc: Rik van Riel Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- include/linux/sched.h | 17 ++++- include/linux/swap.h | 1 kernel/sysctl.c | 11 --- mm/thrash.c | 128 ++++++++++++++++++++-------------------- 4 files changed, 79 insertions(+), 78 deletions(-) diff -puN include/linux/sched.h~swap-token-new-scheme-to-preempt-token include/linux/sched.h --- a/include/linux/sched.h~swap-token-new-scheme-to-preempt-token +++ a/include/linux/sched.h @@ -344,9 +344,20 @@ struct mm_struct { /* Architecture-specific MM context */ mm_context_t context; - /* Token based thrashing protection. */ - unsigned long swap_token_time; - char recent_pagein; + /* Swap token stuff */ + /* + * Last value of global fault stamp as seen by this process. + * In other words, this value gives an indication of how long + * it has been since this task got the token + */ + unsigned int faultstamp; + + /* + * Deciding factor ! + * Incrememt if (global_faults - faultstamp < FAULTSTAMP_DIFF ) + * else decrement. High priority wins the token. + */ + int token_priority; /* coredumping support */ int core_waiters; diff -puN include/linux/swap.h~swap-token-new-scheme-to-preempt-token include/linux/swap.h --- a/include/linux/swap.h~swap-token-new-scheme-to-preempt-token +++ a/include/linux/swap.h @@ -259,7 +259,6 @@ extern spinlock_t swap_lock; /* linux/mm/thrash.c */ extern struct mm_struct * swap_token_mm; -extern unsigned long swap_token_default_timeout; extern void grab_swap_token(void); extern void __put_swap_token(struct mm_struct *); diff -puN kernel/sysctl.c~swap-token-new-scheme-to-preempt-token kernel/sysctl.c --- a/kernel/sysctl.c~swap-token-new-scheme-to-preempt-token +++ a/kernel/sysctl.c @@ -973,17 +973,6 @@ static ctl_table vm_table[] = { .extra1 = &zero, }, #endif -#ifdef CONFIG_SWAP - { - .ctl_name = VM_SWAP_TOKEN_TIMEOUT, - .procname = "swap_token_timeout", - .data = &swap_token_default_timeout, - .maxlen = sizeof(swap_token_default_timeout), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - .strategy = &sysctl_jiffies, - }, -#endif #ifdef CONFIG_NUMA { .ctl_name = VM_ZONE_RECLAIM_MODE, diff -puN mm/thrash.c~swap-token-new-scheme-to-preempt-token mm/thrash.c --- a/mm/thrash.c~swap-token-new-scheme-to-preempt-token +++ a/mm/thrash.c @@ -14,83 +14,87 @@ #include static DEFINE_SPINLOCK(swap_token_lock); -static unsigned long swap_token_timeout; -static unsigned long swap_token_check; -struct mm_struct * swap_token_mm = &init_mm; - -#define SWAP_TOKEN_CHECK_INTERVAL (HZ * 2) -#define SWAP_TOKEN_TIMEOUT (300 * HZ) -/* - * Currently disabled; Needs further code to work at HZ * 300. - */ -unsigned long swap_token_default_timeout = SWAP_TOKEN_TIMEOUT; - -/* - * Take the token away if the process had no page faults - * in the last interval, or if it has held the token for - * too long. - */ -#define SWAP_TOKEN_ENOUGH_RSS 1 -#define SWAP_TOKEN_TIMED_OUT 2 +struct mm_struct * swap_token_mm = NULL; +unsigned long global_faults = 0; + +#define SWAP_TOKEN_PREEMPT 1 +#define FAULTSTAMP_DIFF 5 + static int should_release_swap_token(struct mm_struct *mm) { int ret = 0; - if (!mm->recent_pagein) - ret = SWAP_TOKEN_ENOUGH_RSS; - else if (time_after(jiffies, swap_token_timeout)) - ret = SWAP_TOKEN_TIMED_OUT; - mm->recent_pagein = 0; + if ( current->mm->token_priority > mm->token_priority ) + ret = SWAP_TOKEN_PREEMPT; + return ret; } -/* - * Try to grab the swapout protection token. We only try to - * grab it once every TOKEN_CHECK_INTERVAL, both to prevent - * SMP lock contention and to check that the process that held - * the token before is no longer thrashing. - */ void grab_swap_token(void) { - struct mm_struct *mm; + struct mm_struct *mm_temp; int reason; - /* We have the token. Let others know we still need it. */ - if (has_swap_token(current->mm)) { - current->mm->recent_pagein = 1; - if (unlikely(!swap_token_default_timeout)) - disable_swap_token(); + /* + * This gives an indication of the number of processes + * contending for the token. + */ + + global_faults++; + + if (!spin_trylock(&swap_token_lock)) return; + + /* + * First come first served. If a process holding the + * token exits, its up for grabs immediately + */ + + if ( swap_token_mm == NULL ) { + swap_token_mm = current->mm; + swap_token_mm->faultstamp = global_faults; + goto out; } - if (time_after(jiffies, swap_token_check)) { + if ((global_faults - current->mm->faultstamp) < FAULTSTAMP_DIFF ) { - if (!swap_token_default_timeout) { - swap_token_check = jiffies + SWAP_TOKEN_CHECK_INTERVAL; - return; - } + /* + * This would mean that too many of the current tasks pages + * have been evicted and therefore it's calling swap-in or no-page + * too frequently. + */ + + current->mm->faultstamp = global_faults; + current->mm->token_priority++; + mm_temp = swap_token_mm; + } + else { + /* + * Decrement priority to ensure that the token holder doesnt + * hold on to it for too long. + */ + + if (current->mm->token_priority > 0) + current->mm->token_priority--; + else { + /* + * After this, the process will be able to contend for the token + * again. + */ - /* ... or if we recently held the token. */ - if (time_before(jiffies, current->mm->swap_token_time)) - return; - - if (!spin_trylock(&swap_token_lock)) - return; - - swap_token_check = jiffies + SWAP_TOKEN_CHECK_INTERVAL; - - mm = swap_token_mm; - if ((reason = should_release_swap_token(mm))) { - unsigned long eligible = jiffies; - if (reason == SWAP_TOKEN_TIMED_OUT) { - eligible += swap_token_default_timeout; - } - mm->swap_token_time = eligible; - swap_token_timeout = jiffies + swap_token_default_timeout; - swap_token_mm = current->mm; + current->mm->token_priority = 0; + current->mm->faultstamp = global_faults; } - spin_unlock(&swap_token_lock); + goto out; + } + + if ((reason = should_release_swap_token(mm_temp))) { + current->mm->faultstamp = global_faults; + swap_token_mm = current->mm; } - return; + +out: + spin_unlock(&swap_token_lock); +return; } /* Called on process exit. */ @@ -98,9 +102,7 @@ void __put_swap_token(struct mm_struct * { spin_lock(&swap_token_lock); if (likely(mm == swap_token_mm)) { - mm->swap_token_time = jiffies + SWAP_TOKEN_CHECK_INTERVAL; - swap_token_mm = &init_mm; - swap_token_check = jiffies; + swap_token_mm = NULL; } spin_unlock(&swap_token_lock); } _