--- linux-2.6.1/include/linux/sched.h 2004-01-27 17:02:14.440238704 +1100 +++ linux-2.6.1-ck1/include/linux/sched.h 2004-01-27 17:01:45.778595936 +1100 @@ -126,7 +126,16 @@ extern unsigned long nr_iowait(void); #define SCHED_NORMAL 0 #define SCHED_FIFO 1 #define SCHED_RR 2 +#define SCHED_BATCH 3 +#define SCHED_MIN 0 +#define SCHED_MAX 3 + +#define SCHED_RANGE(policy) ((policy) >= SCHED_MIN && \ + (policy) <= SCHED_MAX) +#define SCHED_RT(policy) ((policy) == SCHED_FIFO || \ + (policy) == SCHED_RR) + struct sched_param { int sched_priority; }; @@ -284,6 +293,7 @@ struct signal_struct { #define MAX_PRIO (MAX_RT_PRIO + 40) #define rt_task(p) ((p)->prio < MAX_RT_PRIO) +#define batch_task(p) ((p)->policy == SCHED_BATCH) /* * Some day this will be a full-fledged user tracking system.. --- linux-2.6.1/kernel/sched.c 2004-01-27 17:02:14.452236880 +1100 +++ linux-2.6.1-ck1/kernel/sched.c 2004-01-27 17:03:20.418208528 +1100 @@ -141,7 +141,7 @@ INTERACTIVE_DELTA) #define TASK_INTERACTIVE(p) \ - ((p)->prio <= (p)->static_prio - DELTA(p)) + ((p)->prio <= (p)->static_prio - DELTA(p) && !batch_task(p)) #define JUST_INTERACTIVE_SLEEP(p) \ (JIFFIES_TO_NS(MAX_SLEEP_AVG * \ @@ -153,8 +153,13 @@ #define LOW_CREDIT(p) \ ((p)->interactive_credit < -CREDIT_LIMIT) +/* + * Batch tasks are preempted by any priority normal tasks and never + * preempt other tasks. + */ #define TASK_PREEMPTS_CURR(p, rq) \ - ((p)->prio < (rq)->curr->prio) + ((((p)->prio < (rq)->curr->prio) || (batch_task((rq)->curr))) && \ + (!batch_task(p))) /* * BASE_TIMESLICE scales user-nice values [ -20 ... 19 ] @@ -172,6 +177,13 @@ static inline unsigned int task_timeslice(task_t *p) { + /* + * Batch tasks get much longer timeslices to optimise cpu throughput. + * Since they yield to any other tasks this is not a problem. + */ + if (unlikely(batch_task(p))) + return BASE_TIMESLICE(p) * 10; + return BASE_TIMESLICE(p); } @@ -199,10 +211,10 @@ struct prio_array { struct runqueue { spinlock_t lock; unsigned long nr_running, nr_switches, expired_timestamp, - nr_uninterruptible; + nr_uninterruptible, nr_batch; task_t *curr, *idle; struct mm_struct *prev_mm; - prio_array_t *active, *expired, arrays[2]; + prio_array_t *active, *expired, *batch, arrays[3]; int best_expired_prio, prev_cpu_load[NR_CPUS]; #ifdef CONFIG_NUMA atomic_t *node_nr_running; @@ -274,9 +286,11 @@ __init void node_nr_running_init(void) #else /* !CONFIG_NUMA */ -# define nr_running_init(rq) do { } while (0) -# define nr_running_inc(rq) do { (rq)->nr_running++; } while (0) -# define nr_running_dec(rq) do { (rq)->nr_running--; } while (0) +# define nr_running_init(rq) do { } while (0) +# define nr_running_inc(rq) do { (rq)->nr_running++; } while (0) +# define nr_running_dec(rq) do { (rq)->nr_running--; } while (0) +# define nr_batch_inc(rq) do { (rq)->nr_batch++; } while (0) +# define nr_batch_dec(rq) do { (rq)->nr_batch--; } while (0) #endif /* CONFIG_NUMA */ @@ -369,7 +383,10 @@ static int effective_prio(task_t *p) prio = p->static_prio - bonus; if (prio < MAX_RT_PRIO) prio = MAX_RT_PRIO; - if (prio > MAX_PRIO-1) + if (prio > MAX_PRIO-1 || batch_task(p)) + /* + * Batch tasks are seen as lowest priority. + */ prio = MAX_PRIO-1; return prio; } @@ -379,7 +396,12 @@ static int effective_prio(task_t *p) */ static inline void __activate_task(task_t *p, runqueue_t *rq) { - enqueue_task(p, rq->active); + if (unlikely(batch_task(p))) { + enqueue_task(p, rq->batch); + nr_batch_inc(rq); + } else + enqueue_task(p, rq->active); + nr_running_inc(rq); } @@ -503,6 +525,8 @@ static inline void activate_task(task_t static inline void deactivate_task(struct task_struct *p, runqueue_t *rq) { nr_running_dec(rq); + if (unlikely(batch_task(p))) + nr_batch_dec(rq); if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible++; dequeue_task(p, p->array); @@ -712,8 +736,14 @@ void wake_up_forked_process(task_t * p) else { p->prio = current->prio; list_add_tail(&p->run_list, ¤t->run_list); - p->array = current->array; - p->array->nr_active++; + if (unlikely(batch_task(p))) { + p->array = task_rq(current)->batch; + p->array->nr_active++; + nr_batch_inc(rq); + } else { + p->array = current->array; + p->array->nr_active++; + } nr_running_inc(rq); } task_rq_unlock(rq, &flags); @@ -1141,7 +1171,12 @@ static inline void pull_task(runqueue_t nr_running_dec(src_rq); set_task_cpu(p, this_cpu); nr_running_inc(this_rq); - enqueue_task(p, this_rq->active); + if (unlikely(batch_task(p))) { + nr_batch_dec(src_rq); + nr_batch_inc(this_rq); + enqueue_task(p, this_rq->batch); + } else + enqueue_task(p, this_rq->active); /* * Note that idle threads have a prio of MAX_PRIO, for this test * to be always true for them. @@ -1403,10 +1438,16 @@ void scheduler_tick(int user_ticks, int cpustat->iowait += sys_ticks; else cpustat->idle += sys_ticks; + + if (rq->nr_running) { + resched_task(p); + goto out; + } rebalance_tick(rq, 1); return; } - if (TASK_NICE(p) > 0) + + if (TASK_NICE(p) > 0 || batch_task(p)) cpustat->nice += user_ticks; else cpustat->user += user_ticks; @@ -1448,14 +1489,21 @@ void scheduler_tick(int user_ticks, int p->time_slice = task_timeslice(p); p->first_time_slice = 0; - if (!rq->expired_timestamp) + if (!rq->expired_timestamp && !batch_task(p)) rq->expired_timestamp = jiffies; - if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) { - enqueue_task(p, rq->expired); - if (p->static_prio < rq->best_expired_prio) - rq->best_expired_prio = p->static_prio; - } else - enqueue_task(p, rq->active); + if (unlikely(batch_task(p))) + /* + * Batch tasks expire to the batch array. + */ + enqueue_task(p, rq->batch); + else { + if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) { + enqueue_task(p, rq->expired); + if (p->static_prio < rq->best_expired_prio) + rq->best_expired_prio = p->static_prio; + } else + enqueue_task(p, rq->active); + } } else { /* * Prevent a too long timeslice allowing a task to monopolize @@ -1553,7 +1601,6 @@ need_resched: else deactivate_task(prev, rq); } - if (unlikely(!rq->nr_running)) { #ifdef CONFIG_SMP load_balance(rq, 1, cpu_to_node_mask(smp_processor_id())); @@ -1584,10 +1631,20 @@ need_resched: /* * Switch the active and expired arrays. */ - rq->active = rq->expired; - rq->expired = array; + if (likely(rq->expired->nr_active)) { + rq->active = rq->expired; + rq->expired = array; + rq->expired_timestamp = 0; + } else { + /* + * Switch to the batch array if there are no + * normal tasks left waiting. + */ + rq->active = rq->batch; + rq->batch = array; + } + array = rq->active; - rq->expired_timestamp = 0; rq->best_expired_prio = MAX_PRIO; } @@ -1958,10 +2015,11 @@ void set_user_nice(task_t *p, long nice) enqueue_task(p, array); /* * If the task increased its priority or is running and - * lowered its priority, then reschedule its CPU: + * lowered its priority or is batch, then reschedule its CPU: */ - if (delta < 0 || (delta > 0 && task_running(rq, p))) - resched_task(rq->curr); + if (delta < 0 || ((delta > 0 || batch_task(p)) && + task_running(rq, p))) + resched_task(rq->curr); } out_unlock: task_rq_unlock(rq, &flags); @@ -2098,9 +2156,8 @@ static int setscheduler(pid_t pid, int p policy = p->policy; else { retval = -EINVAL; - if (policy != SCHED_FIFO && policy != SCHED_RR && - policy != SCHED_NORMAL) - goto out_unlock; + if (!SCHED_RANGE(policy)) + goto out_unlock; } /* @@ -2110,12 +2167,11 @@ static int setscheduler(pid_t pid, int p retval = -EINVAL; if (lp.sched_priority < 0 || lp.sched_priority > MAX_USER_RT_PRIO-1) goto out_unlock; - if ((policy == SCHED_NORMAL) != (lp.sched_priority == 0)) - goto out_unlock; + if (!SCHED_RT(policy) != (lp.sched_priority == 0)) + goto out_unlock; retval = -EPERM; - if ((policy == SCHED_FIFO || policy == SCHED_RR) && - !capable(CAP_SYS_NICE)) + if (SCHED_RT(policy) && !capable(CAP_SYS_NICE)) goto out_unlock; if ((current->euid != p->euid) && (current->euid != p->uid) && !capable(CAP_SYS_NICE)) @@ -2132,7 +2188,7 @@ static int setscheduler(pid_t pid, int p p->policy = policy; p->rt_priority = lp.sched_priority; oldprio = p->prio; - if (policy != SCHED_NORMAL) + if (SCHED_RT(policy)) p->prio = MAX_USER_RT_PRIO-1 - p->rt_priority; else p->prio = p->static_prio; @@ -2146,7 +2202,7 @@ static int setscheduler(pid_t pid, int p if (rq->curr == p) { if (p->prio > oldprio) resched_task(rq->curr); - } else if (p->prio < rq->curr->prio) + } else if (TASK_PREEMPTS_CURR(p, rq)) resched_task(rq->curr); } @@ -2352,7 +2408,10 @@ asmlinkage long sys_sched_yield(void) */ if (likely(!rt_task(current))) { dequeue_task(current, array); - enqueue_task(current, rq->expired); + if (unlikely(batch_task(current))) + enqueue_task(current, rq->batch); + else + enqueue_task(current, rq->expired); } else { list_del(¤t->run_list); list_add_tail(¤t->run_list, array->queue + current->prio); @@ -2437,6 +2496,7 @@ asmlinkage long sys_sched_get_priority_m ret = MAX_USER_RT_PRIO-1; break; case SCHED_NORMAL: + case SCHED_BATCH: ret = 0; break; } @@ -2460,6 +2520,7 @@ asmlinkage long sys_sched_get_priority_m ret = 1; break; case SCHED_NORMAL: + case SCHED_BATCH: ret = 0; } return ret; @@ -2893,6 +2954,7 @@ void __init sched_init(void) rq->cpu = (unsigned long)(i); rq->active = rq->arrays; rq->expired = rq->arrays + 1; + rq->batch = rq->arrays + 2; rq->best_expired_prio = MAX_PRIO; spin_lock_init(&rq->lock); @@ -2900,7 +2962,7 @@ void __init sched_init(void) atomic_set(&rq->nr_iowait, 0); nr_running_init(rq); - for (j = 0; j < 2; j++) { + for (j = 0; j < 3; j++) { array = rq->arrays + j; for (k = 0; k < MAX_PRIO; k++) { INIT_LIST_HEAD(array->queue + k);