Index: linux-2.6.7-ck/include/linux/init_task.h =================================================================== --- linux-2.6.7-ck.orig/include/linux/init_task.h 2004-07-07 20:30:15.860085647 +1000 +++ linux-2.6.7-ck/include/linux/init_task.h 2004-07-07 20:30:38.075612930 +1000 @@ -71,8 +71,8 @@ .usage = ATOMIC_INIT(2), \ .flags = 0, \ .lock_depth = -1, \ - .prio = MAX_PRIO-20, \ - .static_prio = MAX_PRIO-20, \ + .prio = MAX_PRIO-21, \ + .static_prio = MAX_PRIO-21, \ .policy = SCHED_NORMAL, \ .cpus_allowed = CPU_MASK_ALL, \ .mm = NULL, \ Index: linux-2.6.7-ck/include/linux/sched.h =================================================================== --- linux-2.6.7-ck.orig/include/linux/sched.h 2004-07-07 20:30:15.861085491 +1000 +++ linux-2.6.7-ck/include/linux/sched.h 2004-07-07 20:30:38.077612617 +1000 @@ -126,9 +126,10 @@ #define SCHED_NORMAL 0 #define SCHED_FIFO 1 #define SCHED_RR 2 +#define SCHED_BATCH 3 #define SCHED_MIN 0 -#define SCHED_MAX 2 +#define SCHED_MAX 3 #define SCHED_RANGE(policy) ((policy) >= SCHED_MIN && \ (policy) <= SCHED_MAX) @@ -312,9 +313,10 @@ #define MAX_USER_RT_PRIO 100 #define MAX_RT_PRIO MAX_USER_RT_PRIO -#define MAX_PRIO (MAX_RT_PRIO + 40) +#define MAX_PRIO (MAX_RT_PRIO + 41) #define rt_task(p) ((p)->prio < MAX_RT_PRIO) +#define batch_task(p) ((p)->policy == SCHED_BATCH) /* * Some day this will be a full-fledged user tracking system.. Index: linux-2.6.7-ck/kernel/sched.c =================================================================== --- linux-2.6.7-ck.orig/kernel/sched.c 2004-07-07 20:30:15.859085803 +1000 +++ linux-2.6.7-ck/kernel/sched.c 2004-07-07 20:30:43.155818792 +1000 @@ -47,7 +47,7 @@ /* * Convert user-nice values [ -20 ... 0 ... 19 ] - * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], + * to static priority [ MAX_RT_PRIO..MAX_PRIO-2 ], * and back. */ #define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20) @@ -57,7 +57,7 @@ /* * 'User priority' is the nice value converted to something we * can work with better when scaling various scheduler parameters, - * it's a [ 0 ... 39 ] range. + * it's a [ 0 ... 40 ] range. */ #define USER_PRIO(p) ((p)-MAX_RT_PRIO) #define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio) @@ -193,7 +193,8 @@ if (p->prio >= rq->curr->prio) return 0; if (!sched_compute || rq->cache_ticks >= cache_decay_ticks || - rt_task(p) || !p->mm || rq->curr == rq->idle) + rt_task(p) || !p->mm || rq->curr == rq->idle || + (batch_task(rq->curr) && !batch_task(p))) return 1; rq->preempted = 1; return 0; @@ -288,6 +289,8 @@ unsigned int slice = RR_INTERVAL(); if (!rt_task(p)) slice += burst(p) * RR_INTERVAL(); + if (batch_task(p)) + slice *= 10; return slice; } @@ -309,6 +312,17 @@ unsigned int best_burst; if (rt_task(p)) return p->prio; + if (batch_task(p)) { + if (unlikely(p->flags & PF_UISLEEP)) { + /* + * If batch is waking up from uninterruptible sleep + * reschedule at a normal priority to begin with. + */ + p->flags |= PF_YIELDED; + return MAX_PRIO - 2; + } + return MAX_PRIO - 1; + } best_burst = burst(p); full_slice = slice(p); @@ -318,13 +332,13 @@ first_slice = RR_INTERVAL(); if (sched_interactive && !sched_compute) first_slice *= (p->burst + 1); - prio = MAX_PRIO - 1 - best_burst; + prio = MAX_PRIO - 2 - best_burst; if (used_slice < first_slice) return prio; prio += 1 + (used_slice - first_slice) / RR_INTERVAL(); - if (prio > MAX_PRIO - 1) - prio = MAX_PRIO - 1; + if (prio > MAX_PRIO - 2) + prio = MAX_PRIO - 2; return prio; } @@ -373,9 +387,11 @@ #endif p->slice = slice(p); recalc_task_prio(p, now); - p->flags &= ~PF_UISLEEP; p->prio = effective_prio(p); + p->flags &= ~PF_UISLEEP; p->time_slice = RR_INTERVAL(); + if (batch_task(p)) + p->time_slice *= 10; p->timestamp = now; __activate_task(p, rq); } @@ -1753,7 +1769,7 @@ rebalance_tick(cpu, rq, IDLE); return; } - if (TASK_NICE(p) > 0) + if (TASK_NICE(p) > 0 || batch_task(p)) cpustat->nice += user_ticks; else cpustat->user += user_ticks; @@ -1856,8 +1872,9 @@ * physical cpu's resources. -ck */ if (((smt_curr->slice * (100 - sd->per_cpu_gain) / 100) > - slice(p) || rt_task(smt_curr)) && - p->mm && smt_curr->mm && !rt_task(p)) + slice(p) || rt_task(smt_curr) || batch_task(p)) && + p->mm && smt_curr->mm && !rt_task(p) && + !batch_task(smt_curr)) ret = 1; /* @@ -1866,8 +1883,9 @@ * reasons. */ if ((((p->slice * (100 - sd->per_cpu_gain) / 100) > - slice(smt_curr) || rt_task(p)) && - smt_curr->mm && p->mm && !rt_task(smt_curr)) || + slice(smt_curr) || rt_task(p) || batch_task(smt_curr)) && + smt_curr->mm && p->mm && !rt_task(smt_curr) && + !batch_task(p)) || (smt_curr == smt_rq->idle && smt_rq->nr_running)) resched_task(smt_curr); } @@ -2261,8 +2279,9 @@ * If the task increased its priority or is running and * lowered its priority, then reschedule its CPU: */ - if (delta < 0 || (delta > 0 && task_running(rq, p))) - resched_task(rq->curr); + if (delta < 0 || ((delta > 0 || batch_task(p)) && + task_running(rq, p))) + resched_task(rq->curr); } out_unlock: task_rq_unlock(rq, &flags); @@ -2431,6 +2450,12 @@ !capable(CAP_SYS_NICE)) goto out_unlock; + if (!(p->mm) && policy == SCHED_BATCH) + /* + * Don't allow kernel threads to be SCHED_BATCH. + */ + goto out_unlock; + retval = security_task_setscheduler(p, policy, &lp); if (retval) goto out_unlock; @@ -2652,9 +2677,9 @@ dequeue_task(current, rq); current->slice = slice(current); current->time_slice = RR_INTERVAL(); - if (!rt_task(current)) { + if (!rt_task(current) && !batch_task(current)) { current->flags |= PF_YIELDED; - current->prio = MAX_PRIO - 1; + current->prio = MAX_PRIO - 2; } current->burst = 0; enqueue_task(current, rq); @@ -2739,6 +2764,7 @@ ret = MAX_USER_RT_PRIO-1; break; case SCHED_NORMAL: + case SCHED_BATCH: ret = 0; break; } @@ -2762,6 +2788,7 @@ ret = 1; break; case SCHED_NORMAL: + case SCHED_BATCH: ret = 0; } return ret;