Only call rebalance_domains when needed from scheduler_tick. Call rebalance_domains from a tasklet with interrupt enabled. Only call it when one of the sched domains is to be rebalanced. The jiffies when the next balancing action is to take place is kept in a per cpu variable next_balance. Signed-off-by: Christoph Lameter Index: linux-2.6.19-rc3/kernel/sched.c =================================================================== --- linux-2.6.19-rc3.orig/kernel/sched.c 2006-10-27 13:47:07.827921449 -0500 +++ linux-2.6.19-rc3/kernel/sched.c 2006-10-27 13:52:29.149518448 -0500 @@ -227,6 +227,7 @@ struct rq { unsigned long expired_timestamp; unsigned long long timestamp_last_tick; struct task_struct *curr, *idle; + unsigned long next_balance; struct mm_struct *prev_mm; struct prio_array *active, *expired, arrays[2]; int best_expired_prio; @@ -239,7 +240,6 @@ struct rq { int active_balance; int push_cpu; int cpu; /* cpu of this runqueue */ - struct task_struct *migration_thread; struct list_head migration_queue; #endif @@ -2841,7 +2841,8 @@ static void update_load(struct rq *this_ } /* - * rebalance_domains is called from the scheduler_tick. + * rebalance_domains is triggered when needed via a tasklet from the + * scheduler_tick. * * It checks each scheduling domain to see if it is due to be balanced, * and initiates a balancing operation if so. @@ -2859,6 +2860,8 @@ static void rebalance_domains(unsigned l */ enum idle_type idle = (this_rq->idle && !this_rq->nr_running) ? SCHED_IDLE : NOT_IDLE; + /* Maximum time between calls to rebalance_domains */ + unsigned long next_balance = jiffies + 60*HZ; for_each_domain(this_cpu, sd) { if (!(sd->flags & SD_LOAD_BALANCE)) @@ -2887,8 +2890,12 @@ static void rebalance_domains(unsigned l sd->next_balance += interval; } + next_balance = min(next_balance, sd->next_balance); } + this_rq->next_balance = next_balance; } + +DECLARE_TASKLET(rebalance, &rebalance_domains, 0L); #else /* * on UP we do not need to balance between CPUs: @@ -3140,7 +3147,8 @@ void scheduler_tick(void) task_running_tick(rq, p); #ifdef CONFIG_SMP update_load(rq); - rebalance_domains(0); + if (jiffies >= rq->next_balance) + tasklet_schedule(&rebalance); #endif }