diff -Naurp --exclude-from=/home/con/kernel/dontdiff linux-2.6.14-ck3/arch/s390/appldata/appldata_base.c linux-2.6.14-ck4/arch/s390/appldata/appldata_base.c
--- linux-2.6.14-ck3/arch/s390/appldata/appldata_base.c	2005-08-29 13:31:20.000000000 +1000
+++ linux-2.6.14-ck4/arch/s390/appldata/appldata_base.c	2005-11-12 12:23:13.000000000 +1100
@@ -592,12 +592,15 @@ int appldata_register_ops(struct appldat
  */
 void appldata_unregister_ops(struct appldata_ops *ops)
 {
+	void *table;
 	spin_lock(&appldata_ops_lock);
-	unregister_sysctl_table(ops->sysctl_header);
 	list_del(&ops->list);
-	kfree(ops->ctl_table);
+	/* at that point any incoming access will fail */
+	table = ops->ctl_table;
 	ops->ctl_table = NULL;
 	spin_unlock(&appldata_ops_lock);
+	unregister_sysctl_table(ops->sysctl_header);
+	kfree(table);
 	P_INFO("%s-ops unregistered!\n", ops->name);
 }
 /********************** module-ops management <END> **************************/
diff -Naurp --exclude-from=/home/con/kernel/dontdiff linux-2.6.14-ck3/fs/proc/array.c linux-2.6.14-ck4/fs/proc/array.c
--- linux-2.6.14-ck3/fs/proc/array.c	2005-11-12 12:23:04.000000000 +1100
+++ linux-2.6.14-ck4/fs/proc/array.c	2005-11-12 12:23:13.000000000 +1100
@@ -165,7 +165,7 @@ static inline char * task_state(struct t
 	read_lock(&tasklist_lock);
 	buffer += sprintf(buffer,
 		"State:\t%s\n"
-		"Burst:\t%d\n"
+		"Bonus:\t%d\n"
 		"Tgid:\t%d\n"
 		"Pid:\t%d\n"
 		"PPid:\t%d\n"
@@ -173,7 +173,7 @@ static inline char * task_state(struct t
 		"Uid:\t%d\t%d\t%d\t%d\n"
 		"Gid:\t%d\t%d\t%d\t%d\n",
 		get_task_state(p),
-		p->burst,
+		p->bonus,
 	       	p->tgid,
 		p->pid, pid_alive(p) ? p->group_leader->real_parent->tgid : 0,
 		pid_alive(p) && p->ptrace ? p->parent->pid : 0,
diff -Naurp --exclude-from=/home/con/kernel/dontdiff linux-2.6.14-ck3/include/linux/proc_fs.h linux-2.6.14-ck4/include/linux/proc_fs.h
--- linux-2.6.14-ck3/include/linux/proc_fs.h	2005-08-29 13:31:26.000000000 +1000
+++ linux-2.6.14-ck4/include/linux/proc_fs.h	2005-11-12 12:23:13.000000000 +1100
@@ -66,6 +66,7 @@ struct proc_dir_entry {
 	write_proc_t *write_proc;
 	atomic_t count;		/* use count */
 	int deleted;		/* delete flag */
+	void *set;
 };
 
 struct kcore_list {
diff -Naurp --exclude-from=/home/con/kernel/dontdiff linux-2.6.14-ck3/include/linux/sched.h linux-2.6.14-ck4/include/linux/sched.h
--- linux-2.6.14-ck3/include/linux/sched.h	2005-11-12 12:23:04.000000000 +1100
+++ linux-2.6.14-ck4/include/linux/sched.h	2005-11-12 12:23:13.000000000 +1100
@@ -670,7 +670,7 @@ struct task_struct {
 
 	unsigned long long timestamp;
 	unsigned long runtime, totalrun, ns_debit;
-	unsigned int burst;
+	unsigned int bonus;
 	unsigned int slice, time_slice;
 	unsigned long long sched_time; /* sched_clock time spent running */
 
diff -Naurp --exclude-from=/home/con/kernel/dontdiff linux-2.6.14-ck3/include/linux/sysctl.h linux-2.6.14-ck4/include/linux/sysctl.h
--- linux-2.6.14-ck3/include/linux/sysctl.h	2005-11-12 12:23:04.000000000 +1100
+++ linux-2.6.14-ck4/include/linux/sysctl.h	2005-11-12 12:23:13.000000000 +1100
@@ -24,6 +24,7 @@
 #include <linux/compiler.h>
 
 struct file;
+struct completion;
 
 #define CTL_MAXNAME 10		/* how many path components do we allow in a
 				   call to sysctl?   In other words, what is
@@ -930,6 +931,8 @@ struct ctl_table_header
 {
 	ctl_table *ctl_table;
 	struct list_head ctl_entry;
+	int used;
+	struct completion *unregistering;
 };
 
 struct ctl_table_header * register_sysctl_table(ctl_table * table, 
diff -Naurp --exclude-from=/home/con/kernel/dontdiff linux-2.6.14-ck3/kernel/Kconfig.hz linux-2.6.14-ck4/kernel/Kconfig.hz
--- linux-2.6.14-ck3/kernel/Kconfig.hz	2005-11-12 12:23:04.000000000 +1100
+++ linux-2.6.14-ck4/kernel/Kconfig.hz	2005-11-12 12:23:13.000000000 +1100
@@ -21,8 +21,17 @@ choice
 	help
 	  100 HZ is a typical choice for servers, SMP and NUMA systems
 	  with lots of processors that may show reduced performance if
-	  too many timer interrupts are occurring. Laptops should have
-	  better battery life also.
+	  too many timer interrupts are occurring. Laptops may also show
+	  improved battery life.
+
+	config HZ_250_NODEFAULT
+		bool "250 HZ"
+	help
+	 250 HZ is a lousy compromise choice allowing server interactivity
+	 while also showing desktop throughput and no extra power saving on
+	 laptops. Good for when you can't make up your mind.
+	 
+	 Recommend 100 or 1000 instead.
 
 	config HZ_1000
 		bool "1000 HZ"
@@ -35,5 +44,6 @@ endchoice
 config HZ
 	int
 	default 100 if HZ_100
+	default 250 if HZ_250_NODEFAULT
 	default 1000 if HZ_1000
 
diff -Naurp --exclude-from=/home/con/kernel/dontdiff linux-2.6.14-ck3/kernel/sched.c linux-2.6.14-ck4/kernel/sched.c
--- linux-2.6.14-ck3/kernel/sched.c	2005-11-12 12:23:04.000000000 +1100
+++ linux-2.6.14-ck4/kernel/sched.c	2005-11-12 12:23:14.000000000 +1100
@@ -16,9 +16,9 @@
  *		by Davide Libenzi, preemptible kernel bits by Robert Love.
  *  2003-09-03	Interactivity tuning by Con Kolivas.
  *  2004-04-02	Scheduler domains code by Nick Piggin
- *  2005-11-02	New staircase scheduling policy by Con Kolivas with help
+ *  2005-11-08	New staircase scheduling policy by Con Kolivas with help
  *		from William Lee Irwin III, Zwane Mwaikambo & Peter Williams.
- *		Staircase v12.2
+ *		Staircase v13
  */
 
 #include <linux/mm.h>
@@ -633,30 +633,11 @@ static inline void __activate_idle_task(
 }
 
 /*
- * burst - extra intervals an interactive task can run for at best priority
- * instead of descending priorities.
+ * Bonus - How much higher than its base priority an interactive task can run.
  */
-static inline unsigned int burst(task_t *p)
+static inline unsigned int bonus(task_t *p)
 {
-	if (likely(!rt_task(p))) {
-		unsigned int task_user_prio = TASK_USER_PRIO(p);
-		return 39 - task_user_prio;
-	} else
-		return p->burst;
-}
-
-static void inc_burst(task_t *p)
-{
-	unsigned int best_burst;
-	best_burst = burst(p);
-	if (p->burst < best_burst)
-		p->burst++;
-}
-
-static void dec_burst(task_t *p)
-{
-	if (p->burst)
-		p->burst--;
+	return TASK_USER_PRIO(p);
 }
 
 static inline unsigned int rr_interval(task_t * p)
@@ -671,33 +652,61 @@ static inline unsigned int rr_interval(t
 
 /*
  * slice - the duration a task runs before getting requeued at its best
- * priority and has its burst decremented.
+ * priority and has its bonus decremented.
  */
 static inline unsigned int slice(task_t *p)
 {
 	unsigned int slice, rr;
+
 	slice = rr = rr_interval(p);
 	if (likely(!rt_task(p)))
-		slice += burst(p) * rr;
+		slice += (39 - TASK_USER_PRIO(p)) * rr;
 	return slice;
 }
 
 /*
- * sched_interactive - sysctl which allows interactive tasks to have bursts
+ * We increase our bonus by sleeping more than the time we ran.
+ * The ratio of sleep to run gives us the cpu% that we last ran and determines
+ * the maximum bonus we can acquire.
+ */
+static void inc_bonus(task_t *p, unsigned long totalrun, unsigned long sleep)
+{
+	unsigned int best_bonus;
+
+	best_bonus = sleep / (totalrun + 1);
+	if (p->bonus >= best_bonus)
+		return;
+
+	p->bonus++;
+	best_bonus = bonus(p);
+	if (p->bonus > best_bonus)
+		p->bonus = best_bonus;
+}
+
+static void dec_bonus(task_t *p)
+{
+	if (p->bonus)
+		p->bonus--;
+}
+
+/*
+ * sched_interactive - sysctl which allows interactive tasks to have bonus
+ * raise its priority.
  */
 int sched_interactive = 1;
 
 /*
- * effective_prio - dynamic priority dependent on burst.
+ * effective_prio - dynamic priority dependent on bonus.
  * The priority normally decreases by one each RR_INTERVAL.
- * As the burst increases the priority stays at the top "stair" or
+ * As the bonus increases the initial priority starts at a higher "stair" or
  * priority for longer.
  */
 static int effective_prio(task_t *p)
 {
 	int prio;
-	unsigned int full_slice, used_slice, first_slice;
-	unsigned int best_burst, rr;
+	unsigned int full_slice, used_slice = 0;
+	unsigned int best_bonus, rr;
+
 	if (rt_task(p))
 		return p->prio;
 	if (batch_task(p)) {
@@ -722,20 +731,17 @@ static int effective_prio(task_t *p)
 			return MAX_RT_PRIO;
 	}
 
-	best_burst = burst(p);
 	full_slice = slice(p);
+	if (full_slice > p->slice)
+		used_slice = full_slice - p->slice;
+
+	best_bonus = bonus(p);
+	prio = MAX_RT_PRIO + best_bonus;
+	if (sched_interactive && !sched_compute)
+		prio -= p->bonus;
+
 	rr = rr_interval(p);
-	used_slice = full_slice - p->slice;
-	if (p->burst > best_burst)
-		p->burst = best_burst;
-	first_slice = rr;
-	if (sched_interactive && !sched_compute && p->mm)
-		first_slice *= (p->burst + 1);
-	prio = MAX_PRIO - 2 - best_burst;
-
-	if (used_slice < first_slice)
-		return prio;
-	prio += 1 + (used_slice - first_slice) / rr;
+	prio += used_slice / rr;
 	if (prio >= MAX_PRIO - 2)
 		prio = MAX_PRIO - 2;
 	return prio;
@@ -747,7 +753,7 @@ static void continue_slice(task_t *p)
 
 	if (total_run >= p->slice) {
 		p->totalrun -= JIFFIES_TO_NS(p->slice);
-		dec_burst(p);
+		dec_bonus(p);
 	} else {
 		unsigned int remainder;
 		p->slice -= total_run;
@@ -769,16 +775,13 @@ static inline void recalc_task_prio(task
 
 	/*
 	 * Priority is elevated back to best by amount of sleep_time.
-	 * sleep_time is scaled down by number of tasks currently running.
 	 */
-	if (rq_running > 1)
-		sleep_time /= rq_running;
 
 	p->totalrun += p->runtime;
 	if (NS_TO_JIFFIES(p->totalrun) >= p->slice &&
 		NS_TO_JIFFIES(sleep_time) < p->slice) {
 			p->flags &= ~PF_NONSLEEP;
-			dec_burst(p);
+			dec_bonus(p);
 			p->totalrun -= JIFFIES_TO_NS(p->slice);
 			if (sleep_time > p->totalrun)
 				p->totalrun = 0;
@@ -800,7 +803,7 @@ static inline void recalc_task_prio(task
 
 	if (sleep_time >= p->totalrun) {
 		if (!(p->flags & PF_NONSLEEP))
-			inc_burst(p);
+			inc_bonus(p, p->totalrun, sleep_time);
 		p->totalrun = 0;
 		goto out;
 	}
@@ -820,6 +823,8 @@ out:
 static void activate_task(task_t *p, runqueue_t *rq, int local)
 {
 	unsigned long long now = sched_clock();
+	unsigned long rr = rr_interval(p);
+
 #ifdef CONFIG_SMP
 	if (!local) {
 		/* Compensate for drifting sched_clock */
@@ -829,7 +834,7 @@ static void activate_task(task_t *p, run
 	}
 #endif
 	p->slice = slice(p);
-	p->time_slice = rr_interval(p);
+	p->time_slice = p->slice % rr ? : rr;
 	recalc_task_prio(p, now, rq->nr_running);
 	p->flags &= ~PF_NONSLEEP;
 	p->prio = effective_prio(p);
@@ -1453,10 +1458,10 @@ void fastcall wake_up_new_task(task_t *p
 	this_cpu = smp_processor_id();
 	cpu = task_cpu(p);
 
-	/* 
-	 * Forked process gets no burst to prevent fork bombs.
+	/*
+	 * Forked process gets no bonus to prevent fork bombs.
 	 */
-	p->burst = 0;
+	p->bonus = 0;
 
 	if (likely(cpu == this_cpu)) {
 		current->flags |= PF_NONSLEEP;
@@ -2599,10 +2604,10 @@ void scheduler_tick(void)
 		goto out_unlock;
 	p->ns_debit %= NSJIFFY;
 	/*
-	 * Tasks lose burst each time they use up a full slice().
+	 * Tasks lose bonus each time they use up a full slice().
 	 */
 	if (!--p->slice) {
-		dec_burst(p);
+		dec_bonus(p);
 		p->slice = slice(p);
 		time_slice_expired(p, rq);
 		p->totalrun = 0;
@@ -3435,8 +3440,8 @@ void set_user_nice(task_t *p, long nice)
 	delta = new_prio - old_prio;
 	p->static_prio = NICE_TO_PRIO(nice);
 	p->prio += delta;
-	if (p->burst > burst(p))
-		p->burst = burst(p);
+	if (p->bonus > bonus(p))
+		p->bonus= bonus(p);
 
 	if (queued) {
 		enqueue_task(p, rq);
diff -Naurp --exclude-from=/home/con/kernel/dontdiff linux-2.6.14-ck3/kernel/sysctl.c linux-2.6.14-ck4/kernel/sysctl.c
--- linux-2.6.14-ck3/kernel/sysctl.c	2005-11-12 12:23:04.000000000 +1100
+++ linux-2.6.14-ck4/kernel/sysctl.c	2005-11-12 12:23:14.000000000 +1100
@@ -169,7 +169,7 @@ struct file_operations proc_sys_file_ope
 
 extern struct proc_dir_entry *proc_sys_root;
 
-static void register_proc_table(ctl_table *, struct proc_dir_entry *);
+static void register_proc_table(ctl_table *, struct proc_dir_entry *, void *);
 static void unregister_proc_table(ctl_table *, struct proc_dir_entry *);
 #endif
 
@@ -1038,10 +1038,51 @@ static ctl_table dev_table[] = {
 
 extern void init_irq_proc (void);
 
+static DEFINE_SPINLOCK(sysctl_lock);
+
+/* called under sysctl_lock */
+static int use_table(struct ctl_table_header *p)
+{
+	if (unlikely(p->unregistering))
+		return 0;
+	p->used++;
+	return 1;
+}
+
+/* called under sysctl_lock */
+static void unuse_table(struct ctl_table_header *p)
+{
+	if (!--p->used)
+		if (unlikely(p->unregistering))
+			complete(p->unregistering);
+}
+
+/* called under sysctl_lock, will reacquire if has to wait */
+static void start_unregistering(struct ctl_table_header *p)
+{
+	/*
+	 * if p->used is 0, nobody will ever touch that entry again;
+	 * we'll eliminate all paths to it before dropping sysctl_lock
+	 */
+	if (unlikely(p->used)) {
+		struct completion wait;
+		init_completion(&wait);
+		p->unregistering = &wait;
+		spin_unlock(&sysctl_lock);
+		wait_for_completion(&wait);
+		spin_lock(&sysctl_lock);
+	}
+	/*
+	 * do not remove from the list until nobody holds it; walking the
+	 * list in do_sysctl() relies on that.
+	 */
+	list_del_init(&p->ctl_entry);
+}
+
 void __init sysctl_init(void)
 {
 #ifdef CONFIG_PROC_FS
-	register_proc_table(root_table, proc_sys_root);
+	register_proc_table(root_table, proc_sys_root, &root_table_header);
 	init_irq_proc();
 #endif
 }
@@ -1050,6 +1091,7 @@ int do_sysctl(int __user *name, int nlen
 	       void __user *newval, size_t newlen)
 {
 	struct list_head *tmp;
+	int error = -ENOTDIR;
 
 	if (nlen <= 0 || nlen >= CTL_MAXNAME)
 		return -ENOTDIR;
@@ -1058,20 +1100,30 @@ int do_sysctl(int __user *name, int nlen
 		if (!oldlenp || get_user(old_len, oldlenp))
 			return -EFAULT;
 	}
+	spin_lock(&sysctl_lock);
 	tmp = &root_table_header.ctl_entry;
 	do {
 		struct ctl_table_header *head =
 			list_entry(tmp, struct ctl_table_header, ctl_entry);
 		void *context = NULL;
-		int error = parse_table(name, nlen, oldval, oldlenp, 
+
+		if (!use_table(head))
+			continue;
+
+		spin_unlock(&sysctl_lock);
+
+		error = parse_table(name, nlen, oldval, oldlenp, 
 					newval, newlen, head->ctl_table,
 					&context);
 		kfree(context);
+
+		spin_lock(&sysctl_lock);
+		unuse_table(head);
 		if (error != -ENOTDIR)
-			return error;
-		tmp = tmp->next;
-	} while (tmp != &root_table_header.ctl_entry);
-	return -ENOTDIR;
+			break;
+	} while ((tmp = tmp->next) != &root_table_header.ctl_entry);
+	spin_unlock(&sysctl_lock);
+	return error;
 }
 
 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
@@ -1282,12 +1334,16 @@ struct ctl_table_header *register_sysctl
 		return NULL;
 	tmp->ctl_table = table;
 	INIT_LIST_HEAD(&tmp->ctl_entry);
+	tmp->used = 0;
+	tmp->unregistering = NULL;
+	spin_lock(&sysctl_lock);
 	if (insert_at_head)
 		list_add(&tmp->ctl_entry, &root_table_header.ctl_entry);
 	else
 		list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
+	spin_unlock(&sysctl_lock);
 #ifdef CONFIG_PROC_FS
-	register_proc_table(table, proc_sys_root);
+	register_proc_table(table, proc_sys_root, tmp);
 #endif
 	return tmp;
 }
@@ -1301,10 +1357,13 @@ struct ctl_table_header *register_sysctl
  */
 void unregister_sysctl_table(struct ctl_table_header * header)
 {
-	list_del(&header->ctl_entry);
+	might_sleep();
+	spin_lock(&sysctl_lock);
+	start_unregistering(header);
 #ifdef CONFIG_PROC_FS
 	unregister_proc_table(header->ctl_table, proc_sys_root);
 #endif
+	spin_unlock(&sysctl_lock);
 	kfree(header);
 }
 
@@ -1315,7 +1374,7 @@ void unregister_sysctl_table(struct ctl_
 #ifdef CONFIG_PROC_FS
 
 /* Scan the sysctl entries in table and add them all into /proc */
-static void register_proc_table(ctl_table * table, struct proc_dir_entry *root)
+static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, void *set)
 {
 	struct proc_dir_entry *de;
 	int len;
@@ -1351,13 +1410,14 @@ static void register_proc_table(ctl_tabl
 			de = create_proc_entry(table->procname, mode, root);
 			if (!de)
 				continue;
+			de->set = set;
 			de->data = (void *) table;
 			if (table->proc_handler)
 				de->proc_fops = &proc_sys_file_operations;
 		}
 		table->de = de;
 		if (de->mode & S_IFDIR)
-			register_proc_table(table->child, de);
+			register_proc_table(table->child, de, set);
 	}
 }
 
@@ -1382,6 +1442,13 @@ static void unregister_proc_table(ctl_ta
 				continue;
 		}
 
+		/*
+		 * In any case, mark the entry as goner; we'll keep it
+		 * around if it's busy, but we'll know to do nothing with
+		 * its fields.  We are under sysctl_lock here.
+		 */
+		de->data = NULL;
+
 		/* Don't unregister proc entries that are still being used.. */
 		if (atomic_read(&de->count))
 			continue;
@@ -1395,27 +1462,38 @@ static ssize_t do_rw_proc(int write, str
 			  size_t count, loff_t *ppos)
 {
 	int op;
-	struct proc_dir_entry *de;
+	struct proc_dir_entry *de = PDE(file->f_dentry->d_inode);
 	struct ctl_table *table;
 	size_t res;
-	ssize_t error;
-	
-	de = PDE(file->f_dentry->d_inode);
-	if (!de || !de->data)
-		return -ENOTDIR;
-	table = (struct ctl_table *) de->data;
-	if (!table || !table->proc_handler)
-		return -ENOTDIR;
-	op = (write ? 002 : 004);
-	if (ctl_perm(table, op))
-		return -EPERM;
+	ssize_t error = -ENOTDIR;
 	
-	res = count;
-
-	error = (*table->proc_handler) (table, write, file, buf, &res, ppos);
-	if (error)
-		return error;
-	return res;
+	spin_lock(&sysctl_lock);
+	if (de && de->data && use_table(de->set)) {
+		/*
+		 * at that point we know that sysctl was not unregistered
+		 * and won't be until we finish
+		 */
+		spin_unlock(&sysctl_lock);
+		table = (struct ctl_table *) de->data;
+		if (!table || !table->proc_handler)
+			goto out;
+		error = -EPERM;
+		op = (write ? 002 : 004);
+		if (ctl_perm(table, op))
+			goto out;
+		
+		/* careful: calling conventions are nasty here */
+		res = count;
+		error = (*table->proc_handler)(table, write, file,
+						buf, &res, ppos);
+		if (!error)
+			error = res;
+	out:
+		spin_lock(&sysctl_lock);
+		unuse_table(de->set);
+	}
+	spin_unlock(&sysctl_lock);
+	return error;
 }
 
 static int proc_opensys(struct inode *inode, struct file *file)
diff -Naurp --exclude-from=/home/con/kernel/dontdiff linux-2.6.14-ck3/Makefile linux-2.6.14-ck4/Makefile
--- linux-2.6.14-ck3/Makefile	2005-11-12 12:23:04.000000000 +1100
+++ linux-2.6.14-ck4/Makefile	2005-11-12 12:23:14.000000000 +1100
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 14
-EXTRAVERSION = -ck3
+EXTRAVERSION = -ck4
 NAME=Cognac Affected Albatross
 
 # *DOCUMENTATION*
diff -Naurp --exclude-from=/home/con/kernel/dontdiff linux-2.6.14-ck3/net/core/datagram.c linux-2.6.14-ck4/net/core/datagram.c
--- linux-2.6.14-ck3/net/core/datagram.c	2005-10-28 20:22:03.000000000 +1000
+++ linux-2.6.14-ck4/net/core/datagram.c	2005-11-12 12:23:14.000000000 +1100
@@ -213,6 +213,10 @@ int skb_copy_datagram_iovec(const struct
 {
 	int i, err, fraglen, end = 0;
 	struct sk_buff *next = skb_shinfo(skb)->frag_list;
+
+	if (!len)
+		return 0;
+
 next_skb:
 	fraglen = skb_headlen(skb);
 	i = -1;