From: Ashwin Chaugule <ashwin.chaugule@celunite.com>

Instead of using TIMEOUT as a parameter to transfer the token, I think a
better solution is to hand it over to a process that proves its eligibilty.  

What my scheme does, is to find out how frequently a process is calling these
functions.  The processes that call these more frequently get a higher
priority.  

The idea is to guarantee that a high priority process gets the token.  The
priority of a process is determined by the number of consecutive calls to
swap-in and no-page.  I mean "consecutive" not from the scheduler point of
view, but from the process point of view.  In other words, if the task called
these functions every time it was scheduled, it means it is not getting any
further with its execution.  

This way, its a matter of simple comparison of task priorities, to
decide whether to transfer the token or not. 

I did some testing with the two patches combined and the results are as
follows:

Current Upstream implementation: 
=============================== 

root@ashbert:~/crap# time ./qsbench -n 9000000 -p 3 -s 1420300 
seed = 1420300 
seed = 1420300 
seed = 1420300 

real    3m40.124s 
user    0m12.060s 
sys     0m0.940s 


-------------reboot----------------- 

With my implementation : 
======================== 

root@ashbert:~/crap# time ./qsbench -n 9000000 -p 3 -s 1420300 
seed = 1420300 
seed = 1420300 
seed = 1420300 

real    2m58.708s 
user    0m11.880s 
sys     0m1.070s 


Kernel build
------------
mem=64M

Upstream:
2.6.18
make -j 4 vmlinux


real    31m26.021s
user    4m32.140s
sys     0m23.340s

------------------

My patch:

real    27m42.984s
user    4m33.800s
sys     0m22.080s


Signed-off-by: Ashwin Chaugule <ashwin.chaugule@celunite.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 include/linux/sched.h |   17 ++++-
 include/linux/swap.h  |    1 
 kernel/sysctl.c       |   11 ---
 mm/thrash.c           |  128 ++++++++++++++++++++--------------------
 4 files changed, 79 insertions(+), 78 deletions(-)

diff -puN include/linux/sched.h~swap-token-new-scheme-to-preempt-token include/linux/sched.h
--- a/include/linux/sched.h~swap-token-new-scheme-to-preempt-token
+++ a/include/linux/sched.h
@@ -344,9 +344,20 @@ struct mm_struct {
 	/* Architecture-specific MM context */
 	mm_context_t context;
 
-	/* Token based thrashing protection. */
-	unsigned long swap_token_time;
-	char recent_pagein;
+	/* Swap token stuff */
+	/*
+	 * Last value of global fault stamp as seen by this process.
+	 * In other words, this value gives an indication of how long
+	 * it has been since this task got the token
+	 */
+	unsigned int faultstamp;
+
+       /*
+	* Deciding factor !
+	* Incrememt if (global_faults - faultstamp < FAULTSTAMP_DIFF )
+        * else decrement. High priority wins the token.
+	*/
+	int token_priority;
 
 	/* coredumping support */
 	int core_waiters;
diff -puN include/linux/swap.h~swap-token-new-scheme-to-preempt-token include/linux/swap.h
--- a/include/linux/swap.h~swap-token-new-scheme-to-preempt-token
+++ a/include/linux/swap.h
@@ -259,7 +259,6 @@ extern spinlock_t swap_lock;
 
 /* linux/mm/thrash.c */
 extern struct mm_struct * swap_token_mm;
-extern unsigned long swap_token_default_timeout;
 extern void grab_swap_token(void);
 extern void __put_swap_token(struct mm_struct *);
 
diff -puN kernel/sysctl.c~swap-token-new-scheme-to-preempt-token kernel/sysctl.c
--- a/kernel/sysctl.c~swap-token-new-scheme-to-preempt-token
+++ a/kernel/sysctl.c
@@ -973,17 +973,6 @@ static ctl_table vm_table[] = {
 		.extra1		= &zero,
 	},
 #endif
-#ifdef CONFIG_SWAP
-	{
-		.ctl_name	= VM_SWAP_TOKEN_TIMEOUT,
-		.procname	= "swap_token_timeout",
-		.data		= &swap_token_default_timeout,
-		.maxlen		= sizeof(swap_token_default_timeout),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-		.strategy	= &sysctl_jiffies,
-	},
-#endif
 #ifdef CONFIG_NUMA
 	{
 		.ctl_name	= VM_ZONE_RECLAIM_MODE,
diff -puN mm/thrash.c~swap-token-new-scheme-to-preempt-token mm/thrash.c
--- a/mm/thrash.c~swap-token-new-scheme-to-preempt-token
+++ a/mm/thrash.c
@@ -14,83 +14,87 @@
 #include <linux/swap.h>
 
 static DEFINE_SPINLOCK(swap_token_lock);
-static unsigned long swap_token_timeout;
-static unsigned long swap_token_check;
-struct mm_struct * swap_token_mm = &init_mm;
-
-#define SWAP_TOKEN_CHECK_INTERVAL (HZ * 2)
-#define SWAP_TOKEN_TIMEOUT	(300 * HZ)
-/*
- * Currently disabled; Needs further code to work at HZ * 300.
- */
-unsigned long swap_token_default_timeout = SWAP_TOKEN_TIMEOUT;
-
-/*
- * Take the token away if the process had no page faults
- * in the last interval, or if it has held the token for
- * too long.
- */
-#define SWAP_TOKEN_ENOUGH_RSS 1
-#define SWAP_TOKEN_TIMED_OUT 2
+struct mm_struct * swap_token_mm = NULL;
+unsigned long global_faults = 0;
+
+#define SWAP_TOKEN_PREEMPT 1
+#define FAULTSTAMP_DIFF 5
+
 static int should_release_swap_token(struct mm_struct *mm)
 {
 	int ret = 0;
-	if (!mm->recent_pagein)
-		ret = SWAP_TOKEN_ENOUGH_RSS;
-	else if (time_after(jiffies, swap_token_timeout))
-		ret = SWAP_TOKEN_TIMED_OUT;
-	mm->recent_pagein = 0;
+	if ( current->mm->token_priority > mm->token_priority )
+		ret = SWAP_TOKEN_PREEMPT;
+
 	return ret;
 }
 
-/*
- * Try to grab the swapout protection token.  We only try to
- * grab it once every TOKEN_CHECK_INTERVAL, both to prevent
- * SMP lock contention and to check that the process that held
- * the token before is no longer thrashing.
- */
 void grab_swap_token(void)
 {
-	struct mm_struct *mm;
+	struct mm_struct *mm_temp;
 	int reason;
 
-	/* We have the token. Let others know we still need it. */
-	if (has_swap_token(current->mm)) {
-		current->mm->recent_pagein = 1;
-		if (unlikely(!swap_token_default_timeout))
-			disable_swap_token();
+	/*
+	 * This gives an indication of the number of processes
+	 * contending for the token.
+	 */
+
+	global_faults++;
+
+	if (!spin_trylock(&swap_token_lock))
 		return;
+
+	/*
+	 * First come first served. If a process holding the
+	 * token exits, its up for grabs immediately
+	 */
+
+	if ( swap_token_mm == NULL ) {
+		swap_token_mm = current->mm;
+		swap_token_mm->faultstamp = global_faults;
+		goto out;
 	}
 
-	if (time_after(jiffies, swap_token_check)) {
+	if ((global_faults - current->mm->faultstamp) < FAULTSTAMP_DIFF )  {
 
-		if (!swap_token_default_timeout) {
-			swap_token_check = jiffies + SWAP_TOKEN_CHECK_INTERVAL;
-			return;
-		}
+	/*
+	 * This would mean that too many of the current tasks pages
+	 * have been evicted and therefore it's calling swap-in or no-page
+	 * too frequently.
+	 */
+
+		current->mm->faultstamp = global_faults;
+		current->mm->token_priority++;
+		mm_temp = swap_token_mm;
+	}
+	else {
+	/*
+	 * Decrement priority to ensure that the token holder doesnt
+	 * hold on to it for too long.
+	 */
+
+		if (current->mm->token_priority > 0)
+			current->mm->token_priority--;
+		else {
+	/*
+	 * After this, the process will be able to contend for the token
+	 * again.
+	 */
 
-		/* ... or if we recently held the token. */
-		if (time_before(jiffies, current->mm->swap_token_time))
-			return;
-
-		if (!spin_trylock(&swap_token_lock))
-			return;
-
-		swap_token_check = jiffies + SWAP_TOKEN_CHECK_INTERVAL;
-
-		mm = swap_token_mm;
-		if ((reason = should_release_swap_token(mm))) {
-			unsigned long eligible = jiffies;
-			if (reason == SWAP_TOKEN_TIMED_OUT) {
-				eligible += swap_token_default_timeout;
-			}
-			mm->swap_token_time = eligible;
-			swap_token_timeout = jiffies + swap_token_default_timeout;
-			swap_token_mm = current->mm;
+			current->mm->token_priority = 0;
+			current->mm->faultstamp = global_faults;
 		}
-		spin_unlock(&swap_token_lock);
+		goto out;
+	}
+
+	if ((reason = should_release_swap_token(mm_temp))) {
+		current->mm->faultstamp = global_faults;
+		swap_token_mm = current->mm;
 	}
-	return;
+
+out:
+	spin_unlock(&swap_token_lock);
+return;
 }
 
 /* Called on process exit. */
@@ -98,9 +102,7 @@ void __put_swap_token(struct mm_struct *
 {
 	spin_lock(&swap_token_lock);
 	if (likely(mm == swap_token_mm)) {
-		mm->swap_token_time = jiffies + SWAP_TOKEN_CHECK_INTERVAL;
-		swap_token_mm = &init_mm;
-		swap_token_check = jiffies;
+		swap_token_mm = NULL;
 	}
 	spin_unlock(&swap_token_lock);
 }
_