diff -Naurp --exclude-from=/home/con/kernel/dontdiff linux-2.6.11-ck4/drivers/block/ll_rw_blk.c linux-2.6.11-ck5/drivers/block/ll_rw_blk.c --- linux-2.6.11-ck4/drivers/block/ll_rw_blk.c 2005-04-09 11:17:30.000000000 +1000 +++ linux-2.6.11-ck5/drivers/block/ll_rw_blk.c 2005-04-25 21:12:41.000000000 +1000 @@ -1509,6 +1509,15 @@ request_queue_t *blk_init_queue(request_ if (blk_init_free_list(q)) goto out_init; + /* + * if caller didn't supply a lock, they get per-queue locking with + * our embedded lock + */ + if (!lock) { + spin_lock_init(&q->__queue_lock); + lock = &q->__queue_lock; + } + q->request_fn = rfn; q->back_merge_fn = ll_back_merge_fn; q->front_merge_fn = ll_front_merge_fn; diff -Naurp --exclude-from=/home/con/kernel/dontdiff linux-2.6.11-ck4/drivers/scsi/scsi_lib.c linux-2.6.11-ck5/drivers/scsi/scsi_lib.c --- linux-2.6.11-ck4/drivers/scsi/scsi_lib.c 2005-03-02 19:30:27.000000000 +1100 +++ linux-2.6.11-ck5/drivers/scsi/scsi_lib.c 2005-04-25 21:12:41.000000000 +1000 @@ -349,9 +349,9 @@ void scsi_device_unbusy(struct scsi_devi shost->host_failed)) scsi_eh_wakeup(shost); spin_unlock(shost->host_lock); - spin_lock(&sdev->sdev_lock); + spin_lock(sdev->request_queue->queue_lock); sdev->device_busy--; - spin_unlock_irqrestore(&sdev->sdev_lock, flags); + spin_unlock_irqrestore(sdev->request_queue->queue_lock, flags); } /* @@ -1353,7 +1353,7 @@ struct request_queue *scsi_alloc_queue(s struct Scsi_Host *shost = sdev->host; struct request_queue *q; - q = blk_init_queue(scsi_request_fn, &sdev->sdev_lock); + q = blk_init_queue(scsi_request_fn, NULL); if (!q) return NULL; diff -Naurp --exclude-from=/home/con/kernel/dontdiff linux-2.6.11-ck4/drivers/scsi/scsi_scan.c linux-2.6.11-ck5/drivers/scsi/scsi_scan.c --- linux-2.6.11-ck4/drivers/scsi/scsi_scan.c 2005-03-02 19:30:27.000000000 +1100 +++ linux-2.6.11-ck5/drivers/scsi/scsi_scan.c 2005-04-25 21:12:41.000000000 +1000 @@ -246,7 +246,6 @@ static struct scsi_device *scsi_alloc_sd */ sdev->borken = 1; - spin_lock_init(&sdev->sdev_lock); sdev->request_queue = scsi_alloc_queue(sdev); if (!sdev->request_queue) goto out_free_dev; diff -Naurp --exclude-from=/home/con/kernel/dontdiff linux-2.6.11-ck4/include/linux/blkdev.h linux-2.6.11-ck5/include/linux/blkdev.h --- linux-2.6.11-ck4/include/linux/blkdev.h 2005-04-09 11:17:30.000000000 +1000 +++ linux-2.6.11-ck5/include/linux/blkdev.h 2005-04-25 21:12:41.000000000 +1000 @@ -352,8 +352,11 @@ struct request_queue unsigned long queue_flags; /* - * protects queue structures from reentrancy + * protects queue structures from reentrancy. ->__queue_lock should + * _never_ be used directly, it is queue private. always use + * ->queue_lock. */ + spinlock_t __queue_lock; spinlock_t *queue_lock; /* diff -Naurp --exclude-from=/home/con/kernel/dontdiff linux-2.6.11-ck4/include/scsi/scsi_device.h linux-2.6.11-ck5/include/scsi/scsi_device.h --- linux-2.6.11-ck4/include/scsi/scsi_device.h 2005-03-02 19:30:30.000000000 +1100 +++ linux-2.6.11-ck5/include/scsi/scsi_device.h 2005-04-25 21:12:41.000000000 +1000 @@ -44,7 +44,6 @@ struct scsi_device { struct list_head same_target_siblings; /* just the devices sharing same target id */ volatile unsigned short device_busy; /* commands actually active on low-level */ - spinlock_t sdev_lock; /* also the request queue_lock */ spinlock_t list_lock; struct list_head cmd_list; /* queue of in use SCSI Command structures */ struct list_head starved_entry; diff -Naurp --exclude-from=/home/con/kernel/dontdiff linux-2.6.11-ck4/kernel/sched.c linux-2.6.11-ck5/kernel/sched.c --- linux-2.6.11-ck4/kernel/sched.c 2005-04-09 11:17:30.000000000 +1000 +++ linux-2.6.11-ck5/kernel/sched.c 2005-04-25 21:12:41.000000000 +1000 @@ -16,9 +16,9 @@ * by Davide Libenzi, preemptible kernel bits by Robert Love. * 2003-09-03 Interactivity tuning by Con Kolivas. * 2004-04-02 Scheduler domains code by Nick Piggin - * 2005-03-09 New staircase scheduling policy by Con Kolivas with help + * 2005-04-20 New staircase scheduling policy by Con Kolivas with help * from William Lee Irwin III, Zwane Mwaikambo & Peter Williams. - * Staircase v10.6 + * Staircase v11 */ #include @@ -122,6 +122,7 @@ struct runqueue { * it on another CPU. Always updated under the runqueue lock: */ unsigned long nr_uninterruptible; + unsigned long systime_centile; unsigned long long timestamp_last_tick; unsigned int cache_ticks, preempted; @@ -333,7 +334,7 @@ struct file_operations proc_schedstat_op /* * rq_lock - lock a given runqueue and disable interrupts. */ -static runqueue_t *this_rq_lock(void) +static inline runqueue_t *this_rq_lock(void) __acquires(rq->lock) { runqueue_t *rq; @@ -473,16 +474,16 @@ static inline void sched_info_switch(tas static inline unsigned long ns_diff(unsigned long long v1, unsigned long long v2) { unsigned long long vdiff; - if (unlikely(v1 < v2)) + if (likely(v1 > v2)) { + vdiff = v1 - v2; + if (vdiff > (1 << 31)) + vdiff = 1 << 31; + } else /* - * Rarely the clock goes backwards. There should always be - * a positive difference so return 1. + * Rarely the clock appears to go backwards. There should + * always be a positive difference so return 1. */ vdiff = 1; - else - vdiff = v1 - v2; - if (vdiff > (1 << 31)) - vdiff = 1 << 31; return (unsigned long)vdiff; } @@ -512,12 +513,12 @@ static inline void enqueue_task(struct t * Put task to the end of the run list without the overhead of dequeue * followed by enqueue. */ -static void requeue_task(struct task_struct *p, runqueue_t *rq) +static inline void requeue_task(struct task_struct *p, runqueue_t *rq) { list_move_tail(&p->run_list, rq->queue + p->prio); } -static void enqueue_task_head(struct task_struct *p, runqueue_t *rq) +static inline void enqueue_task_head(struct task_struct *p, runqueue_t *rq) { list_add(&p->run_list, rq->queue + p->prio); __set_bit(p->prio, rq->bitmap); @@ -545,7 +546,7 @@ static inline void __activate_idle_task( * burst - extra intervals an interactive task can run for at best priority * instead of descending priorities. */ -static unsigned int burst(task_t *p) +static inline unsigned int burst(task_t *p) { unsigned int burst = p->burst; @@ -575,7 +576,7 @@ static void dec_burst(task_t *p) p->burst--; } -static unsigned int rr_interval(task_t * p) +static inline unsigned int rr_interval(task_t * p) { unsigned int rr_interval = RR_INTERVAL(); int nice = TASK_NICE(p); @@ -674,13 +675,16 @@ static void continue_slice(task_t *p) * slice instead of starting a new one at high priority. */ static inline void recalc_task_prio(task_t *p, unsigned long long now, - unsigned long rq_load) + unsigned long rq_systime, unsigned long rq_running) { - unsigned long sleep_time; - if (rq_load > 31) - rq_load = 31; - sleep_time = ns_diff(now, p->timestamp) / - (1 << rq_load); + unsigned long sleep_time = ns_diff(now, p->timestamp); + + /* + * Priority is elevated back to best by amount of sleep_time. + * sleep_time is scaled down by in-kernel system time and by + * number of tasks currently running. + */ + sleep_time = sleep_time * (100 - rq_systime) / 200 / (rq_running + 1); p->totalrun += p->runtime; if (NS_TO_JIFFIES(p->totalrun) >= p->slice && @@ -735,7 +739,7 @@ static void activate_task(task_t *p, run #endif p->slice = slice(p); p->time_slice = rr_interval(p); - recalc_task_prio(p, now, rq->nr_running); + recalc_task_prio(p, now, rq->systime_centile / 100, rq->nr_running); p->flags &= ~PF_UISLEEP; p->prio = effective_prio(p); p->timestamp = now; @@ -924,7 +928,7 @@ static inline unsigned long target_load( * Returns the CPU we should wake onto. */ #if defined(ARCH_HAS_SCHED_WAKE_IDLE) -static int wake_idle(int cpu, task_t *p) +static inline int wake_idle(int cpu, task_t *p) { cpumask_t tmp; struct sched_domain *sd; @@ -965,12 +969,8 @@ static int cache_delay = 10 * HZ / 1000; */ static void preempt(task_t *p, runqueue_t *rq) { - if (p->prio > rq->curr->prio) + if (p->prio >= rq->curr->prio) return; - if (p->prio == rq->curr->prio && - ((p->totalrun || p->slice != slice(p)) || - rt_task(rq->curr))) - return; if (!sched_compute || rq->cache_ticks >= cache_delay || !p->mm || rt_task(p)) resched_task(rq->curr); @@ -1477,7 +1477,7 @@ static int find_idlest_cpu(struct task_s * allow dest_cpu, which will force the cpu onto dest_cpu. Then * the cpu_allowed mask is restored. */ -static void sched_migrate_task(task_t *p, int dest_cpu) +static inline void sched_migrate_task(task_t *p, int dest_cpu) { migration_req_t req; runqueue_t *rq; @@ -1543,7 +1543,7 @@ out: * pull_task - move a task from a remote runqueue to the local runqueue. * Both runqueues must be locked. */ -static void pull_task(runqueue_t *src_rq, task_t *p, +static inline void pull_task(runqueue_t *src_rq, task_t *p, runqueue_t *this_rq, int this_cpu) { dequeue_task(p, src_rq); @@ -1563,7 +1563,7 @@ static void pull_task(runqueue_t *src_rq /* * can_migrate_task - may task p from runqueue rq be migrated to this_cpu? */ -static int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, +static inline int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, struct sched_domain *sd, enum idle_type idle) { /* @@ -1821,7 +1821,7 @@ static runqueue_t *find_busiest_queue(st * * Called with this_rq unlocked. */ -static int load_balance(int this_cpu, runqueue_t *this_rq, +static inline int load_balance(int this_cpu, runqueue_t *this_rq, struct sched_domain *sd, enum idle_type idle) { struct sched_group *group; @@ -1927,7 +1927,7 @@ out_balanced: * Called from schedule when this_rq is about to become idle (NEWLY_IDLE). * this_rq is locked. */ -static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, +static inline int load_balance_newidle(int this_cpu, runqueue_t *this_rq, struct sched_domain *sd) { struct sched_group *group; @@ -1967,7 +1967,7 @@ out: * idle_balance is called by schedule() if this_cpu is about to become * idle. Attempts to pull tasks from other CPUs. */ -static void idle_balance(int this_cpu, runqueue_t *this_rq) +static inline void idle_balance(int this_cpu, runqueue_t *this_rq) { struct sched_domain *sd; @@ -1989,7 +1989,7 @@ static void idle_balance(int this_cpu, r * * Called with busiest_rq locked. */ -static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu) +static inline void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu) { struct sched_domain *sd; struct sched_group *cpu_group; @@ -2111,7 +2111,7 @@ static inline void idle_balance(int cpu, } #endif -static int wake_priority_sleeper(runqueue_t *rq) +static inline int wake_priority_sleeper(runqueue_t *rq) { int ret = 0; #ifdef CONFIG_SCHED_SMT @@ -2256,6 +2256,9 @@ void account_system_time(struct task_str cpustat->iowait = cputime64_add(cpustat->iowait, tmp); else cpustat->idle = cputime64_add(cpustat->idle, tmp); + + /* For calculating rolling percentage of sys time per runqueue */ + rq->systime_centile += cputime * 100; } /* @@ -2301,6 +2304,9 @@ void scheduler_tick(void) rq->timestamp_last_tick = sched_clock(); + /* Rolling percentage systime per runqueue */ + rq->systime_centile = rq->systime_centile * 99 / 100; + if (p == rq->idle) { if (wake_priority_sleeper(rq)) goto out; @@ -3449,7 +3455,7 @@ out_unlock: return retval; } -static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, +static inline int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, cpumask_t *new_mask) { if (len < sizeof(cpumask_t)) { @@ -3798,7 +3804,7 @@ static inline struct task_struct *younge return list_entry(p->sibling.next,struct task_struct,sibling); } -static void show_task(task_t * p) +static inline void show_task(task_t * p) { task_t *relative; unsigned state; diff -Naurp --exclude-from=/home/con/kernel/dontdiff linux-2.6.11-ck4/Makefile linux-2.6.11-ck5/Makefile --- linux-2.6.11-ck4/Makefile 2005-04-09 11:17:30.000000000 +1000 +++ linux-2.6.11-ck5/Makefile 2005-04-25 21:12:41.000000000 +1000 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 11 -EXTRAVERSION = -ck4 +EXTRAVERSION = -ck5 NAME=Cognac Woozy Numbat # *DOCUMENTATION*