===== kernel/sched.c 1.224 vs edited ===== --- 1.224/kernel/sched.c 2004-07-12 18:32:52 -04:00 +++ edited/kernel/sched.c 2004-07-23 15:31:40 -04:00 @@ -3699,26 +3699,97 @@ #ifdef CONFIG_NUMA static struct sched_group sched_group_nodes[MAX_NUMNODES]; static DEFINE_PER_CPU(struct sched_domain, node_domains); + +#define SD_CPUS_PER_NODE 64 + +/** + * find_next_best_node - find the next node to include in a sched_domain + * @node: node whose sched_domain we're building + * @used_nodes: nodes already in the sched_domain + * + * Find the next node to include in a given scheduling domain. Simply + * finds the closest node not already in the @used_nodes map. + * + * Should use nodemask_t. + */ +static int __init find_next_best_node(int node, unsigned long *used_nodes) +{ + int i, n, val, min_val, best_node; + + min_val = INT_MAX; + + for (i = 0; i < numnodes; i++) { + /* Start at @node */ + n = (node + i) % numnodes; + + /* Skip already used nodes */ + if (test_bit(used_nodes, n)) + continue; + + /* Simple min distance search */ + val = node_distance(node, i); + + if (val < min_val) { + min_val = val; + best_node = n; + } + } + + set_bit(used_nodes, best_node); + return best_node; +} + +/** + * sched_domain_node_span - get a cpumask for a node's sched_domain + * @node: node whose cpumask we're constructing + * + * Given a node, construct a good cpumask for its sched_domain to span. It + * should be one that prevents unnecessary balancing, but also spreads tasks + * out optimally. + * + * Note that this *should* be heirarchical rather than flat, i.e. the + * domain above single CPUs should only span nodes or physical chassis, and + * a domain above that should contain a larger number of CPUs, though + * probably not all of the available ones. + */ +static cpumask_t __init sched_domain_node_span(int node) +{ + cpumask_t span; + DECLARE_BITMAP(used_nodes, MAX_NUMNODES); + + cpu_clear(span); + bitmap_zero(used_nodes, MAX_NUMNODES); + + for (i = 0; i < SD_MAX_NODES; i++) { + int next_node = find_next_best_node(node, used_nodes); + cpu_set(span, node_to_cpumask(next_node)); + } + return span; +} + static void __init arch_init_sched_domains(void) { int i; struct sched_group *first_node = NULL, *last_node = NULL; + for (i = 0; i < MAX_NUMNODES; i++) { + struct sched_domain *node_sd = &per_cpu(node_domains, i); + + *node_sd = SD_NODE_INIT; + node_sd->span = sched_domain_node_span(i); + node_sd->groups = &sched_group_nodes[cpu_to_node(i)]; + } + /* Set up domains */ for_each_cpu(i) { int node = cpu_to_node(i); cpumask_t nodemask = node_to_cpumask(node); - struct sched_domain *node_sd = &per_cpu(node_domains, i); struct sched_domain *cpu_sd = &per_cpu(cpu_domains, i); - *node_sd = SD_NODE_INIT; - node_sd->span = cpu_possible_map; - node_sd->groups = &sched_group_nodes[cpu_to_node(i)]; - *cpu_sd = SD_CPU_INIT; cpus_and(cpu_sd->span, nodemask, cpu_possible_map); cpu_sd->groups = &sched_group_cpus[i]; - cpu_sd->parent = node_sd; + cpu_sd->parent = &per_cpu(node_domains, i); } /* Set up groups */