Fix idle notifiers Previously exit_idle would be called more often than enter_idle Now instead of using complicated tests just keep track of it using the per CPU variable as a flip flop. I moved the idle state into the PDA to make the access more efficient. Original bug report and an initial patch from Stephane Eranian, but rewritten by AK. Signed-off-by: Andi Kleen --- arch/x86_64/kernel/process.c | 15 +++++++++------ include/asm-x86_64/pda.h | 3 ++- 2 files changed, 11 insertions(+), 7 deletions(-) Index: linux/arch/x86_64/kernel/process.c =================================================================== --- linux.orig/arch/x86_64/kernel/process.c +++ linux/arch/x86_64/kernel/process.c @@ -80,25 +80,25 @@ void idle_notifier_unregister(struct not } EXPORT_SYMBOL(idle_notifier_unregister); -enum idle_state { CPU_IDLE, CPU_NOT_IDLE }; -static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE; - void enter_idle(void) { - __get_cpu_var(idle_state) = CPU_IDLE; + write_pda(isidle, 1); atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); } static void __exit_idle(void) { - __get_cpu_var(idle_state) = CPU_NOT_IDLE; + if (read_pda(isidle) == 0) + return; + write_pda(isidle, 0); atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); } /* Called from interrupts to signify idle end */ void exit_idle(void) { - if (current->pid | read_pda(irqcount)) + /* idle loop has pid 0 */ + if (current->pid) return; __exit_idle(); } @@ -220,6 +220,9 @@ void cpu_idle (void) play_dead(); enter_idle(); idle(); + /* In many cases the interrupt that ended idle + has already called exit_idle. But some idle + loops can be woken up without interrupt. */ __exit_idle(); } Index: linux/include/asm-x86_64/pda.h =================================================================== --- linux.orig/include/asm-x86_64/pda.h +++ linux/include/asm-x86_64/pda.h @@ -25,7 +25,8 @@ struct x8664_pda { int nodenumber; /* number of current node */ unsigned int __softirq_pending; unsigned int __nmi_count; /* number of NMI on this CPUs */ - int mmu_state; + short mmu_state; + short isidle; struct mm_struct *active_mm; unsigned apic_timer_irqs; } ____cacheline_aligned_in_smp;