Index: linux-2.6.19-rc3/kernel/sched.c =================================================================== --- linux-2.6.19-rc3.orig/kernel/sched.c 2006-10-27 15:25:08.730814397 -0500 +++ linux-2.6.19-rc3/kernel/sched.c 2006-10-27 15:35:18.318746984 -0500 @@ -2849,17 +2849,17 @@ static void update_load(struct rq *this_ * Balancing parameters are set up in arch_init_sched_domains. */ -/* Don't have all balancing operations going off at once: */ -static inline unsigned long cpu_offset(int cpu) -{ - return jiffies + cpu * HZ / NR_CPUS; -} - static void -rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle) +rebalance_tick(int this_cpu, struct rq *this_rq) { - unsigned long interval, j = cpu_offset(this_cpu); + unsigned long interval; struct sched_domain *sd; + /* + * A task is idle if this is the idle queue + * and we have no runnable task + */ + enum idle_type idle = (this_rq->idle && !this_rq->nr_running) ? + SCHED_IDLE : NOT_IDLE; for_each_domain(this_cpu, sd) { if (!(sd->flags & SD_LOAD_BALANCE)) @@ -2874,7 +2874,7 @@ rebalance_tick(int this_cpu, struct rq * if (unlikely(!interval)) interval = 1; - if (j - sd->last_balance >= interval) { + if (jiffies - sd->last_balance >= interval) { if (load_balance(this_cpu, this_rq, sd, idle)) { /* * We've pulled tasks over so either we're no @@ -3137,20 +3137,18 @@ void scheduler_tick(void) struct task_struct *p = current; int cpu = smp_processor_id(); struct rq *rq = cpu_rq(cpu); - enum idle_type idle = NOT_IDLE; update_cpu_clock(p, rq, now); rq->timestamp_last_tick = now; - if (p == rq->idle) { + if (p == rq->idle) /* Task on the idle queue */ - if (!wake_priority_sleeper(rq)) - idle = SCHED_IDLE; - } else + wake_priority_sleeper(rq); + else task_running_tick(rq, p); update_load(rq); - rebalance_tick(cpu, rq, idle); + rebalance_tick(cpu, rq); } #ifdef CONFIG_SCHED_SMT @@ -6327,6 +6325,16 @@ static void init_sched_groups_power(int } /* + * Calculate jiffies start to use for each cpu. On sched domain + * initialization this jiffy value is used to stagger the load balancing + * of the cpus so that they do not load balance all at at the same time. + */ +static inline unsigned long cpu_offset(int cpu) +{ + return jiffies + cpu * HZ / NR_CPUS; +} + +/* * Build sched domains for a given set of cpus and attach the sched domains * to the individual cpus */ @@ -6382,6 +6390,7 @@ static int build_sched_domains(const cpu sd->span = *cpu_map; group = cpu_to_allnodes_group(i, cpu_map); sd->groups = &sched_group_allnodes[group]; + sd->last_balance = cpu_offset(i); p = sd; } else p = NULL; @@ -6390,6 +6399,7 @@ static int build_sched_domains(const cpu *sd = SD_NODE_INIT; sd->span = sched_domain_node_span(cpu_to_node(i)); sd->parent = p; + sd->last_balance = cpu_offset(i); if (p) p->child = sd; cpus_and(sd->span, sd->span, *cpu_map); @@ -6401,6 +6411,7 @@ static int build_sched_domains(const cpu *sd = SD_CPU_INIT; sd->span = nodemask; sd->parent = p; + sd->last_balance = cpu_offset(i); if (p) p->child = sd; sd->groups = &sched_group_phys[group]; @@ -6413,6 +6424,7 @@ static int build_sched_domains(const cpu sd->span = cpu_coregroup_map(i); cpus_and(sd->span, sd->span, *cpu_map); sd->parent = p; + sd->last_balance = cpu_offset(i); p->child = sd; sd->groups = &sched_group_core[group]; #endif @@ -6425,6 +6437,7 @@ static int build_sched_domains(const cpu sd->span = cpu_sibling_map[i]; cpus_and(sd->span, sd->span, *cpu_map); sd->parent = p; + sd->last_balance = cpu_offset(i); p->child = sd; sd->groups = &sched_group_cpus[group]; #endif