Only call rebalance_domains when needed from scheduler_tick. Call rebalance_domains from a tasklet with interrupt enabled. Only call it when one of the sched domains is to be rebalanced. The jiffies when the next balancing action is to take place is kept in a per cpu variable next_balance. Signed-off-by: Christoph Lameter Index: linux-2.6.19-rc2-mm2/kernel/sched.c =================================================================== --- linux-2.6.19-rc2-mm2.orig/kernel/sched.c 2006-10-24 10:40:32.000000000 -0500 +++ linux-2.6.19-rc2-mm2/kernel/sched.c 2006-10-24 10:42:02.135978934 -0500 @@ -2841,8 +2841,11 @@ static void update_load(struct rq *this_ } } +static DEFINE_PER_CPU(unsigned long, next_balance); + /* - * rebalance_domains is called from the scheduler_tick. + * rebalance_domains is triggered when needed via a tasklet from the + * scheduler_tick. * * It checks each scheduling domain to see if it is due to be balanced, * and initiates a balancing operation if so. @@ -2858,6 +2861,8 @@ static void rebalance_domains(unsigned l /* Idle means on the idle queue without a runnable task */ enum idle_type idle = (this_rq->idle && !this_rq->nr_running) ? SCHED_IDLE : NOT_IDLE; + /* Maximum time between calls to rebalance_domains */ + unsigned long next_balance = jiffies + 60*HZ; for_each_domain(this_cpu, sd) { if (!(sd->flags & SD_LOAD_BALANCE)) @@ -2883,8 +2888,12 @@ static void rebalance_domains(unsigned l } sd->next_balance += interval; } + next_balance = min(next_balance, sd->next_balance); } + __get_cpu_var(next_balance) = next_balance; } + +DECLARE_TASKLET(rebalance, &rebalance_domains, 0L); #else /* * on UP we do not need to balance between CPUs: @@ -3137,7 +3146,8 @@ void scheduler_tick(void) } #ifdef CONFIG_SMP update_load(rq); - rebalance_domains(0L); + if (jiffies >= __get_cpu_var(next_balance)) + tasklet_schedule(&rebalance); #endif }