From: Zhang Yanmin On i386, kernel irq balance doesn't work. 1) In function do_irq_balance, after kernel finds the min_loaded cpu but before calling set_pending_irq to really pin the selected_irq to the target cpu, kernel does a cpus_and with irq_affinity[selected_irq]. Later on, when the irq is acked, kernel would calls move_native_irq=>desc->handler->set_affinity to change the irq affinity. However, every function pointed by hw_interrupt_type->set_affinity(unsigned int irq, cpumask_t cpumask) always changes irq_affinity[irq] to cpumask. Next time when recalling do_irq_balance, it has to do cpu_ands again with irq_affinity[selected_irq], but irq_affinity[selected_irq] already becomes one cpu selected by the first irq balance. 2) Function balance_irq in file arch/i386/kernel/io_apic.c has the same issue. Signed-off-by: Zhang Yanmin Signed-off-by: Andrew Morton --- arch/i386/kernel/io_apic.c | 15 +++++++++++++-- include/linux/irq.h | 6 ++++++ kernel/irq/proc.c | 3 +++ 3 files changed, 22 insertions(+), 2 deletions(-) diff -puN arch/i386/kernel/io_apic.c~x86-kernel-irq-balancer-fix arch/i386/kernel/io_apic.c --- devel/arch/i386/kernel/io_apic.c~x86-kernel-irq-balancer-fix 2006-05-18 00:29:04.000000000 -0700 +++ devel-akpm/arch/i386/kernel/io_apic.c 2006-05-18 00:33:57.000000000 -0700 @@ -307,6 +307,15 @@ static struct irq_cpu_info { static long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL; +static cpumask_t balance_irq_affinity[NR_IRQS] = { + [0 ... NR_IRQS-1] = CPU_MASK_ALL +}; + +void set_balance_irq_affinity(unsigned int irq, cpumask_t mask) +{ + balance_irq_affinity[irq] = mask; +} + static unsigned long move(int curr_cpu, cpumask_t allowed_mask, unsigned long now, int direction) { @@ -343,7 +352,7 @@ static inline void balance_irq(int cpu, if (irqbalance_disabled) return; - cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]); + cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]); new_cpu = move(cpu, allowed_mask, now, 1); if (cpu != new_cpu) { set_pending_irq(irq, cpumask_of_cpu(new_cpu)); @@ -532,7 +541,9 @@ tryanotherirq: } } - cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]); + cpus_and(allowed_mask, + cpu_online_map, + balance_irq_affinity[selected_irq]); target_cpu_mask = cpumask_of_cpu(min_loaded); cpus_and(tmp, target_cpu_mask, allowed_mask); diff -puN include/linux/irq.h~x86-kernel-irq-balancer-fix include/linux/irq.h --- devel/include/linux/irq.h~x86-kernel-irq-balancer-fix 2006-05-18 00:29:04.000000000 -0700 +++ devel-akpm/include/linux/irq.h 2006-05-18 00:33:57.000000000 -0700 @@ -164,6 +164,12 @@ static inline void set_irq_info(int irq, #endif // CONFIG_SMP +#ifdef CONFIG_IRQBALANCE +extern void set_balance_irq_affinity(unsigned int irq, cpumask_t mask); +#else +#define set_balance_irq_affinity(irq, mask) +#endif + extern int no_irq_affinity; extern int noirqdebug_setup(char *str); diff -puN kernel/irq/proc.c~x86-kernel-irq-balancer-fix kernel/irq/proc.c --- devel/kernel/irq/proc.c~x86-kernel-irq-balancer-fix 2006-05-18 00:29:04.000000000 -0700 +++ devel-akpm/kernel/irq/proc.c 2006-05-18 00:29:04.000000000 -0700 @@ -24,6 +24,8 @@ static struct proc_dir_entry *smp_affini #ifdef CONFIG_GENERIC_PENDING_IRQ void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) { + set_balance_irq_affinity(irq, mask_val); + /* * Save these away for later use. Re-progam when the * interrupt is pending @@ -33,6 +35,7 @@ void proc_set_irq_affinity(unsigned int #else void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) { + set_balance_irq_affinity(irq, mask_val); irq_affinity[irq] = mask_val; irq_desc[irq].handler->set_affinity(irq, mask_val); } _