Index: linux-2.6.14-ck1/kernel/sched.c =================================================================== --- linux-2.6.14-ck1.orig/kernel/sched.c 2005-10-28 20:51:30.000000000 +1000 +++ linux-2.6.14-ck1/kernel/sched.c 2005-10-28 20:51:30.000000000 +1000 @@ -122,6 +122,7 @@ struct runqueue { unsigned long iso_ticks; unsigned int iso_refractory; #ifdef CONFIG_SMP + unsigned long prio_bias; unsigned long cpu_load[3]; #endif unsigned long long nr_switches; @@ -563,13 +564,63 @@ static inline void enqueue_task_head(str __set_bit(p->prio, rq->bitmap); } +#ifdef CONFIG_SMP +static inline void inc_prio_bias(runqueue_t *rq, int prio) +{ + rq->prio_bias += MAX_PRIO - prio; +} + +static inline void dec_prio_bias(runqueue_t *rq, int prio) +{ + rq->prio_bias -= MAX_PRIO - prio; +} + +static inline void inc_nr_running(task_t *p, runqueue_t *rq) +{ + rq->nr_running++; + if (rt_task(p)) { + if (p != rq->migration_thread) + inc_prio_bias(rq, p->prio); + } else + inc_prio_bias(rq, p->static_prio); +} + +static inline void dec_nr_running(task_t *p, runqueue_t *rq) +{ + rq->nr_running--; + if (rt_task(p)) { + if (p != rq->migration_thread) + dec_prio_bias(rq, p->prio); + } else + dec_prio_bias(rq, p->static_prio); +} +#else +static inline void inc_nr_running(task_t *p, runqueue_t *rq) +{ + rq->nr_running++; +} + +static inline void dec_nr_running(task_t *p, runqueue_t *rq) +{ + rq->nr_running--; +} + +static inline void inc_prio_bias(runqueue_t *rq, int prio) +{ +} + +static inline void dec_prio_bias(runqueue_t *rq, int prio) +{ +} +#endif + /* * __activate_task - move a task to the runqueue. */ static void __activate_task(task_t *p, runqueue_t *rq) { enqueue_task(p, rq); - rq->nr_running++; + inc_nr_running(p, rq); } /* @@ -578,7 +629,7 @@ static void __activate_task(task_t *p, r static inline void __activate_idle_task(task_t *p, runqueue_t *rq) { enqueue_task_head(p, rq); - rq->nr_running++; + inc_nr_running(p, rq); } /* @@ -787,7 +838,7 @@ static void activate_task(task_t *p, run */ static inline void deactivate_task(struct task_struct *p, runqueue_t *rq) { - rq->nr_running--; + dec_nr_running(p, rq); dequeue_task(p, rq); } @@ -923,27 +974,61 @@ void kick_process(task_t *p) * We want to under-estimate the load of migration sources, to * balance conservatively. */ -static inline unsigned long source_load(int cpu, int type) +static inline unsigned long __source_load(int cpu, int type, enum idle_type idle) { runqueue_t *rq = cpu_rq(cpu); - unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; + unsigned long running = rq->nr_running; + unsigned long source_load, cpu_load = rq->cpu_load[type-1], + load_now = running * SCHED_LOAD_SCALE; + if (type == 0) - return load_now; + source_load = load_now; + else + source_load = min(cpu_load, load_now); + + if (running > 1 || (idle == NOT_IDLE && running)) + /* + * If we are busy rebalancing the load is biased by + * priority to create 'nice' support across cpus. When + * idle rebalancing we should only bias the source_load if + * there is more than one task running on that queue to + * prevent idle rebalance from trying to pull tasks from a + * queue with only one running task. + */ + source_load = source_load * rq->prio_bias / running; + + return source_load; +} - return min(rq->cpu_load[type-1], load_now); +static inline unsigned long source_load(int cpu, int type) +{ + return __source_load(cpu, type, NOT_IDLE); } /* * Return a high guess at the load of a migration-target cpu */ -static inline unsigned long target_load(int cpu, int type) +static inline unsigned long __target_load(int cpu, int type, enum idle_type idle) { runqueue_t *rq = cpu_rq(cpu); - unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; + unsigned long running = rq->nr_running; + unsigned long target_load, cpu_load = rq->cpu_load[type-1], + load_now = running * SCHED_LOAD_SCALE; + if (type == 0) - return load_now; + target_load = load_now; + else + target_load = max(cpu_load, load_now); + + if (running > 1 || (idle == NOT_IDLE && running)) + target_load = target_load * rq->prio_bias / running; - return max(rq->cpu_load[type-1], load_now); + return target_load; +} + +static inline unsigned long target_load(int cpu, int type) +{ + return __target_load(cpu, type, NOT_IDLE); } /* @@ -1684,9 +1769,9 @@ static inline void pull_task(runqueue_t runqueue_t *this_rq, int this_cpu) { dequeue_task(p, src_rq); - src_rq->nr_running--; + dec_nr_running(p, src_rq); set_task_cpu(p, this_cpu); - this_rq->nr_running++; + inc_nr_running(p, this_rq); enqueue_task(p, this_rq); p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) + this_rq->timestamp_last_tick; @@ -1840,9 +1925,9 @@ find_busiest_group(struct sched_domain * /* Bias balancing toward cpus of our domain */ if (local_group) - load = target_load(i, load_idx); + load = __target_load(i, load_idx, idle); else - load = source_load(i, load_idx); + load = __source_load(i, load_idx, idle); avg_load += load; } @@ -1947,14 +2032,15 @@ out_balanced: /* * find_busiest_queue - find the busiest runqueue among the cpus in group. */ -static runqueue_t *find_busiest_queue(struct sched_group *group) +static runqueue_t *find_busiest_queue(struct sched_group *group, + enum idle_type idle) { unsigned long load, max_load = 0; runqueue_t *busiest = NULL; int i; for_each_cpu_mask(i, group->cpumask) { - load = source_load(i, 0); + load = __source_load(i, 0, idle); if (load > max_load) { max_load = load; @@ -1998,7 +2084,7 @@ static inline int load_balance(int this_ goto out_balanced; } - busiest = find_busiest_queue(group); + busiest = find_busiest_queue(group, idle); if (!busiest) { schedstat_inc(sd, lb_nobusyq[idle]); goto out_balanced; @@ -2121,7 +2207,7 @@ static inline int load_balance_newidle(i goto out_balanced; } - busiest = find_busiest_queue(group); + busiest = find_busiest_queue(group, NEWLY_IDLE); if (!busiest) { schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); goto out_balanced; @@ -3335,8 +3421,10 @@ void set_user_nice(task_t *p, long nice) p->static_prio = NICE_TO_PRIO(nice); goto out_unlock; } - if ((queued = task_queued(p))) + if ((queued = task_queued(p))) { dequeue_task(p, rq); + dec_prio_bias(rq, p->static_prio); + } old_prio = p->prio; new_prio = NICE_TO_PRIO(nice); @@ -3346,6 +3434,7 @@ void set_user_nice(task_t *p, long nice) if (queued) { enqueue_task(p, rq); + inc_prio_bias(rq, p->static_prio); /* * If the task increased its priority or is running and * lowered its priority, then reschedule its CPU: