Index: linux-2.6.7-ck/include/linux/init_task.h =================================================================== --- linux-2.6.7-ck.orig/include/linux/init_task.h 2004-05-23 12:54:57.000000000 +1000 +++ linux-2.6.7-ck/include/linux/init_task.h 2004-07-30 10:33:09.322072493 +1000 @@ -71,8 +71,8 @@ .usage = ATOMIC_INIT(2), \ .flags = 0, \ .lock_depth = -1, \ - .prio = MAX_PRIO-20, \ - .static_prio = MAX_PRIO-20, \ + .prio = MAX_PRIO-21, \ + .static_prio = MAX_PRIO-21, \ .policy = SCHED_NORMAL, \ .cpus_allowed = CPU_MASK_ALL, \ .mm = NULL, \ Index: linux-2.6.7-ck/include/linux/sched.h =================================================================== --- linux-2.6.7-ck.orig/include/linux/sched.h 2004-07-30 10:33:05.709638614 +1000 +++ linux-2.6.7-ck/include/linux/sched.h 2004-07-30 10:33:09.323072336 +1000 @@ -126,9 +126,10 @@ #define SCHED_NORMAL 0 #define SCHED_FIFO 1 #define SCHED_RR 2 +#define SCHED_BATCH 3 #define SCHED_MIN 0 -#define SCHED_MAX 2 +#define SCHED_MAX 3 #define SCHED_RANGE(policy) ((policy) >= SCHED_MIN && \ (policy) <= SCHED_MAX) @@ -312,9 +313,10 @@ #define MAX_USER_RT_PRIO 100 #define MAX_RT_PRIO MAX_USER_RT_PRIO -#define MAX_PRIO (MAX_RT_PRIO + 40) +#define MAX_PRIO (MAX_RT_PRIO + 41) #define rt_task(p) ((p)->prio < MAX_RT_PRIO) +#define batch_task(p) ((p)->policy == SCHED_BATCH) /* * Some day this will be a full-fledged user tracking system.. Index: linux-2.6.7-ck/kernel/sched.c =================================================================== --- linux-2.6.7-ck.orig/kernel/sched.c 2004-07-30 10:33:05.711638300 +1000 +++ linux-2.6.7-ck/kernel/sched.c 2004-07-30 10:33:09.326071866 +1000 @@ -47,7 +47,7 @@ /* * Convert user-nice values [ -20 ... 0 ... 19 ] - * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], + * to static priority [ MAX_RT_PRIO..MAX_PRIO-2 ], * and back. */ #define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20) @@ -57,7 +57,7 @@ /* * 'User priority' is the nice value converted to something we * can work with better when scaling various scheduler parameters, - * it's a [ 0 ... 39 ] range. + * it's a [ 0 ... 40 ] range. */ #define USER_PRIO(p) ((p)-MAX_RT_PRIO) #define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio) @@ -272,8 +272,10 @@ static unsigned int slice(task_t *p) { unsigned int slice = RR_INTERVAL(); - if (likely(!rt_task(p))) + if (likely(!rt_task(p) && !batch_task(p))) slice += burst(p) * RR_INTERVAL(); + else if (batch_task(p)) + slice *= 40 - TASK_USER_PRIO(p); return slice; } @@ -295,6 +297,17 @@ unsigned int best_burst; if (rt_task(p)) return p->prio; + if (batch_task(p)) { + if (unlikely(p->flags & PF_UISLEEP)) { + /* + * If batch is waking up from uninterruptible sleep + * reschedule at a normal priority to begin with. + */ + p->flags |= PF_YIELDED; + return MAX_PRIO - 2; + } + return MAX_PRIO - 1; + } best_burst = burst(p); full_slice = slice(p); @@ -304,13 +317,13 @@ first_slice = RR_INTERVAL(); if (sched_interactive && !sched_compute) first_slice *= (p->burst + 1); - prio = MAX_PRIO - 1 - best_burst; + prio = MAX_PRIO - 2 - best_burst; if (used_slice < first_slice) return prio; prio += 1 + (used_slice - first_slice) / RR_INTERVAL(); - if (prio > MAX_PRIO - 1) - prio = MAX_PRIO - 1; + if (prio > MAX_PRIO - 2) + prio = MAX_PRIO - 2; return prio; } @@ -359,9 +372,11 @@ #endif p->slice = slice(p); recalc_task_prio(p, now); - p->flags &= ~PF_UISLEEP; p->prio = effective_prio(p); + p->flags &= ~PF_UISLEEP; p->time_slice = RR_INTERVAL(); + if (batch_task(p)) + p->time_slice = p->slice; p->timestamp = now; __activate_task(p, rq); } @@ -1759,7 +1774,7 @@ rebalance_tick(cpu, rq, IDLE); return; } - if (TASK_NICE(p) > 0) + if (TASK_NICE(p) > 0 || batch_task(p)) cpustat->nice += user_ticks; else cpustat->user += user_ticks; @@ -1862,8 +1877,9 @@ * physical cpu's resources. -ck */ if (((smt_curr->slice * (100 - sd->per_cpu_gain) / 100) > - slice(p) || rt_task(smt_curr)) && - p->mm && smt_curr->mm && !rt_task(p)) + slice(p) || rt_task(smt_curr) || batch_task(p)) && + p->mm && smt_curr->mm && !rt_task(p) && + !batch_task(smt_curr)) ret = 1; /* @@ -1872,8 +1888,9 @@ * reasons. */ if ((((p->slice * (100 - sd->per_cpu_gain) / 100) > - slice(smt_curr) || rt_task(p)) && - smt_curr->mm && p->mm && !rt_task(smt_curr)) || + slice(smt_curr) || rt_task(p) || batch_task(smt_curr)) && + smt_curr->mm && p->mm && !rt_task(smt_curr) && + !batch_task(p)) || (smt_curr == smt_rq->idle && smt_rq->nr_running)) resched_task(smt_curr); } @@ -2267,8 +2284,9 @@ * If the task increased its priority or is running and * lowered its priority, then reschedule its CPU: */ - if (delta < 0 || (delta > 0 && task_running(rq, p))) - resched_task(rq->curr); + if (delta < 0 || ((delta > 0 || batch_task(p)) && + task_running(rq, p))) + resched_task(rq->curr); } out_unlock: task_rq_unlock(rq, &flags); @@ -2437,6 +2455,12 @@ !capable(CAP_SYS_NICE)) goto out_unlock; + if (!(p->mm) && policy == SCHED_BATCH) + /* + * Don't allow kernel threads to be SCHED_BATCH. + */ + goto out_unlock; + retval = security_task_setscheduler(p, policy, &lp); if (retval) goto out_unlock; @@ -2658,9 +2682,9 @@ dequeue_task(current, rq); current->slice = slice(current); current->time_slice = RR_INTERVAL(); - if (likely(!rt_task(current))) { + if (likely(!rt_task(current) && !batch_task(current))) { current->flags |= PF_YIELDED; - current->prio = MAX_PRIO - 1; + current->prio = MAX_PRIO - 2; } current->burst = 0; enqueue_task(current, rq); @@ -2745,6 +2769,7 @@ ret = MAX_USER_RT_PRIO-1; break; case SCHED_NORMAL: + case SCHED_BATCH: ret = 0; break; } @@ -2768,6 +2793,7 @@ ret = 1; break; case SCHED_NORMAL: + case SCHED_BATCH: ret = 0; } return ret;