Add option to allow single threading of sched domains Large sched domains can be very expensive to scan. Add an option SD_SERIALIZE to the sched domain flags. If that flag is set then we make sure that no other such domain is being balanced. Signed-off-by: Christoph Lameter Index: linux-2.6.19-rc5-mm1/include/asm-ia64/topology.h =================================================================== --- linux-2.6.19-rc5-mm1.orig/include/asm-ia64/topology.h 2006-11-14 14:15:37.293272723 -0600 +++ linux-2.6.19-rc5-mm1/include/asm-ia64/topology.h 2006-11-14 14:15:59.412276984 -0600 @@ -101,6 +101,7 @@ void build_cpu_to_node_map(void); .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ + | SD_SERIALIZE \ | SD_WAKE_BALANCE, \ .last_balance = jiffies, \ .balance_interval = 64, \ Index: linux-2.6.19-rc5-mm1/include/linux/sched.h =================================================================== --- linux-2.6.19-rc5-mm1.orig/include/linux/sched.h 2006-11-14 14:15:37.302062505 -0600 +++ linux-2.6.19-rc5-mm1/include/linux/sched.h 2006-11-14 14:15:59.428879915 -0600 @@ -647,6 +647,7 @@ enum idle_type #define SD_SHARE_CPUPOWER 128 /* Domain members share cpu power */ #define SD_POWERSAVINGS_BALANCE 256 /* Balance for power savings */ #define SD_SHARE_PKG_RESOURCES 512 /* Domain members share cpu pkg resources */ +#define SD_SERIALIZE 1024 /* Only a single load balancing instance */ #define BALANCE_FOR_MC_POWER \ (sched_smt_power_savings ? SD_POWERSAVINGS_BALANCE : 0) Index: linux-2.6.19-rc5-mm1/include/linux/topology.h =================================================================== --- linux-2.6.19-rc5-mm1.orig/include/linux/topology.h 2006-11-14 14:15:37.312805572 -0600 +++ linux-2.6.19-rc5-mm1/include/linux/topology.h 2006-11-14 14:15:59.439622988 -0600 @@ -194,7 +194,8 @@ .wake_idx = 0, /* unused */ \ .forkexec_idx = 0, /* unused */ \ .per_cpu_gain = 100, \ - .flags = SD_LOAD_BALANCE, \ + .flags = SD_LOAD_BALANCE \ + | SD_SERIALIZE, \ .last_balance = jiffies, \ .balance_interval = 64, \ .nr_balance_failed = 0, \ Index: linux-2.6.19-rc5-mm1/kernel/sched.c =================================================================== --- linux-2.6.19-rc5-mm1.orig/kernel/sched.c 2006-11-14 14:15:58.397544937 -0600 +++ linux-2.6.19-rc5-mm1/kernel/sched.c 2006-11-14 14:15:59.485525208 -0600 @@ -2883,6 +2883,7 @@ static void update_load(struct rq *this_ * * Balancing parameters are set up in arch_init_sched_domains. */ +static spinlock_t balancing; static void run_rebalance_domains(struct softirq_action *h) { @@ -2912,6 +2913,11 @@ static void run_rebalance_domains(struct if (unlikely(!interval)) interval = 1; + if (sd->flags & SD_SERIALIZE) { + if (!spin_trylock(&balancing)) + goto out; + } + if (time_after_eq(jiffies, sd->last_balance + interval)) { if (load_balance(this_cpu, this_rq, sd, idle)) { /* @@ -2923,6 +2929,9 @@ static void run_rebalance_domains(struct } sd->last_balance = jiffies; } + if (sd->flags & SD_SERIALIZE) + spin_unlock(&balancing); +out: if (time_after(next_balance, sd->last_balance + interval)) next_balance = sd->last_balance + interval; } @@ -6080,6 +6089,7 @@ static unsigned long long measure_migrat size_found = size; } } + /* * Calculate average fluctuation, we use this to prevent * noise from triggering an early break out of the loop: @@ -6987,6 +6997,7 @@ void __init sched_init(void) #ifdef CONFIG_RT_MUTEXES plist_head_init(&init_task.pi_waiters, &init_task.pi_lock); + spin_lock_init(&balancing); #endif /* Index: linux-2.6.19-rc5-mm1/include/asm-i386/topology.h =================================================================== --- linux-2.6.19-rc5-mm1.orig/include/asm-i386/topology.h 2006-11-14 14:15:37.322571997 -0600 +++ linux-2.6.19-rc5-mm1/include/asm-i386/topology.h 2006-11-14 14:15:59.524590928 -0600 @@ -89,6 +89,7 @@ static inline int node_to_first_cpu(int .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ + | SD_SERIALIZE \ | SD_WAKE_BALANCE, \ .last_balance = jiffies, \ .balance_interval = 1, \ Index: linux-2.6.19-rc5-mm1/include/asm-powerpc/topology.h =================================================================== --- linux-2.6.19-rc5-mm1.orig/include/asm-powerpc/topology.h 2006-11-14 14:15:37.332338421 -0600 +++ linux-2.6.19-rc5-mm1/include/asm-powerpc/topology.h 2006-11-14 14:15:59.537287287 -0600 @@ -59,6 +59,7 @@ extern int pcibus_to_node(struct pci_bus | SD_BALANCE_EXEC \ | SD_BALANCE_NEWIDLE \ | SD_WAKE_IDLE \ + | SD_SERIALIZE \ | SD_WAKE_BALANCE, \ .last_balance = jiffies, \ .balance_interval = 1, \ Index: linux-2.6.19-rc5-mm1/include/asm-x86_64/topology.h =================================================================== --- linux-2.6.19-rc5-mm1.orig/include/asm-x86_64/topology.h 2006-11-14 14:15:37.343081488 -0600 +++ linux-2.6.19-rc5-mm1/include/asm-x86_64/topology.h 2006-11-14 14:15:59.549007002 -0600 @@ -47,6 +47,7 @@ extern int __node_distance(int, int); .flags = SD_LOAD_BALANCE \ | SD_BALANCE_FORK \ | SD_BALANCE_EXEC \ + | SD_SERIALIZE \ | SD_WAKE_BALANCE, \ .last_balance = jiffies, \ .balance_interval = 1, \