Index: linux-2.6.19-rc2-mm2/include/linux/sched.h =================================================================== --- linux-2.6.19-rc2-mm2.orig/include/linux/sched.h 2006-10-23 19:35:19.651161879 -0500 +++ linux-2.6.19-rc2-mm2/include/linux/sched.h 2006-10-23 19:45:41.735899652 -0500 @@ -692,7 +692,7 @@ struct sched_domain { int flags; /* See SD_* */ /* Runtime fields. */ - unsigned long last_balance; /* init to jiffies. units in jiffies */ + unsigned long next_balance; /* init to jiffies. units in jiffies */ unsigned int balance_interval; /* initialise to 1. units in ms. */ unsigned int nr_balance_failed; /* initialise to 0 */ Index: linux-2.6.19-rc2-mm2/kernel/sched.c =================================================================== --- linux-2.6.19-rc2-mm2.orig/kernel/sched.c 2006-10-23 19:36:26.208865512 -0500 +++ linux-2.6.19-rc2-mm2/kernel/sched.c 2006-10-23 19:48:30.060477114 -0500 @@ -2818,7 +2818,7 @@ static void active_load_balance(struct r } /* - * rebalance_tick will get called every timer tick, on every CPU. + * rebalance_domains is called when needed from the migration thread. * * It checks each scheduling domain to see if it is due to be balanced, * and initiates a balancing operation if so. @@ -2826,19 +2826,26 @@ static void active_load_balance(struct r * Balancing parameters are set up in arch_init_sched_domains. */ -/* Don't have all balancing operations going off at once: */ -static inline unsigned long cpu_offset(int cpu) -{ - return jiffies + cpu * HZ / NR_CPUS; -} - -static void -rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle) +/* + * Returns a time in jiffies that specifies when we need to call this + * function next. + */ +static unsigned long rebalance_domains(void) { - unsigned long this_load, interval, j = cpu_offset(this_cpu); + int this_cpu = smp_processor_id(); + struct rq *this_rq = cpu_rq(this_cpu); + enum idle_type idle; + unsigned long this_load, interval; struct sched_domain *sd; int i, scale; + /* Maximum interval betwen calls to rebalance_domains */ + unsigned long next_balance = jiffies + HZ; + /* + * A queue is only truly idle if it is the idle queue and no task is runnable + */ + idle = (current == this_rq->idle && !this_rq->nr_running) ? + SCHED_IDLE : NOT_IDLE; this_load = this_rq->raw_weighted_load; /* Update our load: */ @@ -2870,7 +2877,7 @@ rebalance_tick(int this_cpu, struct rq * if (unlikely(!interval)) interval = 1; - if (j - sd->last_balance >= interval) { + if (jiffies >= sd->next_balance) { if (load_balance(this_cpu, this_rq, sd, idle)) { /* * We've pulled tasks over so either we're no @@ -2879,39 +2886,33 @@ rebalance_tick(int this_cpu, struct rq * */ idle = NOT_IDLE; } - sd->last_balance += interval; + sd->next_balance += interval; } + next_balance = min(next_balance, sd->next_balance); } + return next_balance; } #else /* * on UP we do not need to balance between CPUs: */ -static inline void rebalance_tick(int cpu, struct rq *rq, enum idle_type idle) -{ -} static inline void idle_balance(int cpu, struct rq *rq) { } #endif -static inline int wake_priority_sleeper(struct rq *rq) +static inline void wake_priority_sleeper(struct rq *rq) { - int ret = 0; - #ifdef CONFIG_SCHED_SMT spin_lock(&rq->lock); /* * If an SMT sibling task has been put to sleep for priority * reasons reschedule the idle task to see if it can now run. */ - if (rq->nr_running) { + if (rq->nr_running) resched_task(rq->idle); - ret = 1; - } spin_unlock(&rq->lock); #endif - return ret; } DEFINE_PER_CPU(struct kernel_stat, kstat); @@ -3058,10 +3059,8 @@ void scheduler_tick(void) rq->timestamp_last_tick = now; if (p == rq->idle) { - if (wake_priority_sleeper(rq)) - goto out; - rebalance_tick(cpu, rq, SCHED_IDLE); - return; + wake_priority_sleeper(rq); + goto out; } /* Task might have expired already, but not scheduled off yet */ @@ -3136,7 +3135,7 @@ void scheduler_tick(void) out_unlock: spin_unlock(&rq->lock); out: - rebalance_tick(cpu, rq, NOT_IDLE); + rebalance_domains(); } #ifdef CONFIG_SCHED_SMT @@ -6431,6 +6430,12 @@ static void init_sched_groups_power(int } while (group != child->groups); } +/* Don't have all balancing operations going off at once: */ +static inline unsigned long cpu_offset(int cpu) +{ + return cpu * HZ / NR_CPUS; +} + /* * Build sched domains for a given set of cpus and attach the sched domains * to the individual cpus @@ -6487,6 +6492,7 @@ static int build_sched_domains(const cpu sd->span = *cpu_map; group = cpu_to_allnodes_group(i, cpu_map); sd->groups = &sched_group_allnodes[group]; + sd->next_balance = jiffies + cpu_offset(i); p = sd; } else p = NULL; @@ -6495,6 +6501,7 @@ static int build_sched_domains(const cpu *sd = SD_NODE_INIT; sd->span = sched_domain_node_span(cpu_to_node(i)); sd->parent = p; + sd->next_balance = jiffies + cpu_offset(i); if (p) p->child = sd; cpus_and(sd->span, sd->span, *cpu_map); @@ -6506,6 +6513,7 @@ static int build_sched_domains(const cpu *sd = SD_CPU_INIT; sd->span = nodemask; sd->parent = p; + sd->next_balance = jiffies + cpu_offset(i); if (p) p->child = sd; sd->groups = &sched_group_phys[group]; @@ -6518,6 +6526,7 @@ static int build_sched_domains(const cpu sd->span = cpu_coregroup_map(i); cpus_and(sd->span, sd->span, *cpu_map); sd->parent = p; + sd->next_balance = jiffies + cpu_offset(i); p->child = sd; sd->groups = &sched_group_core[group]; #endif @@ -6530,6 +6539,7 @@ static int build_sched_domains(const cpu sd->span = cpu_sibling_map[i]; cpus_and(sd->span, sd->span, *cpu_map); sd->parent = p; + sd->next_balance = jiffies + cpu_offset(i); p->child = sd; sd->groups = &sched_group_cpus[group]; #endif Index: linux-2.6.19-rc2-mm2/include/asm-ia64/topology.h =================================================================== --- linux-2.6.19-rc2-mm2.orig/include/asm-ia64/topology.h 2006-10-23 19:35:19.000000000 -0500 +++ linux-2.6.19-rc2-mm2/include/asm-ia64/topology.h 2006-10-23 19:49:37.820421611 -0500 @@ -76,7 +76,6 @@ void build_cpu_to_node_map(void); | SD_BALANCE_NEWIDLE \ | SD_BALANCE_EXEC \ | SD_WAKE_AFFINE, \ - .last_balance = jiffies, \ .balance_interval = 1, \ .nr_balance_failed = 0, \ } @@ -102,7 +101,6 @@ void build_cpu_to_node_map(void); | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ | SD_WAKE_BALANCE, \ - .last_balance = jiffies, \ .balance_interval = 64, \ .nr_balance_failed = 0, \ } Index: linux-2.6.19-rc2-mm2/include/linux/topology.h =================================================================== --- linux-2.6.19-rc2-mm2.orig/include/linux/topology.h 2006-10-23 19:35:19.000000000 -0500 +++ linux-2.6.19-rc2-mm2/include/linux/topology.h 2006-10-23 19:49:17.257066081 -0500 @@ -108,7 +108,6 @@ | SD_WAKE_AFFINE \ | SD_WAKE_IDLE \ | SD_SHARE_CPUPOWER, \ - .last_balance = jiffies, \ .balance_interval = 1, \ .nr_balance_failed = 0, \ } @@ -140,7 +139,6 @@ | SD_WAKE_AFFINE \ | SD_SHARE_PKG_RESOURCES\ | BALANCE_FOR_MC_POWER, \ - .last_balance = jiffies, \ .balance_interval = 1, \ .nr_balance_failed = 0, \ } @@ -170,7 +168,6 @@ | SD_BALANCE_EXEC \ | SD_WAKE_AFFINE \ | BALANCE_FOR_PKG_POWER,\ - .last_balance = jiffies, \ .balance_interval = 1, \ .nr_balance_failed = 0, \ } @@ -195,7 +192,6 @@ .forkexec_idx = 0, /* unused */ \ .per_cpu_gain = 100, \ .flags = SD_LOAD_BALANCE, \ - .last_balance = jiffies, \ .balance_interval = 64, \ .nr_balance_failed = 0, \ }