From: Ingo Molnar Systems that enter C3 and have to turn off the APIC we fall back to the PIT as the clock events source which emulates a local events source. Dynticks exposed a bug in the broadcast/local-events emulation code: if the PIT IRQ came earlier than the next high-res timer on an idle CPU would have needed, then the PIT was not reprogrammed for followup irqs. (also, clean things up a bit by splitting out the broadcast reprogramming logic into clockevents_reprogram_broadcast()) This bug can explain certain rare boot-time hangs on C3-capable laptops that run with HIGH_RES_TIMERS and NO_HZ enabled. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Cc: Roman Zippel Cc: john stultz Cc: Andi Kleen Signed-off-by: Andrew Morton --- kernel/time/clockevents.c | 60 +++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 18 deletions(-) diff -puN kernel/time/clockevents.c~updated-add-a-framework-to-manage-clock-event-devices-pit-broadcasting-fix kernel/time/clockevents.c --- a/kernel/time/clockevents.c~updated-add-a-framework-to-manage-clock-event-devices-pit-broadcasting-fix +++ a/kernel/time/clockevents.c @@ -527,6 +527,32 @@ static cpumask_t local_event_broadcast; static void (*broadcast_function)(cpumask_t *mask); static void (*global_event_handler)(struct pt_regs *regs); +/* + * Reprogram the broadcast device: + * + * Called with events_lock held and interrupts disabled. + */ +static void clockevents_reprogram_broadcast(void) +{ + struct clock_event_device *glblevt = global_eventdevice.event; + struct local_events *dev; + ktime_t expires = { .tv64 = KTIME_MAX }; + int64_t delta; + int cpu; + + for (cpu = first_cpu(local_event_broadcast); cpu != NR_CPUS; + cpu = next_cpu(cpu, local_event_broadcast)) { + dev = &per_cpu(local_eventdevices, cpu); + if (dev->expires_next.tv64 < expires.tv64) + expires = dev->expires_next; + } + + if (expires.tv64 != KTIME_MAX) { + delta = ktime_to_ns(ktime_sub(expires, ktime_get())); + do_clockevents_set_next_event(glblevt, delta); + } +} + /** * clockevents_set_broadcast - switch next event device from/to broadcast mode * @@ -536,10 +562,7 @@ static void (*global_event_handler)(stru void clockevents_set_broadcast(struct clock_event_device *evt, int broadcast) { struct local_events *devices = &__get_cpu_var(local_eventdevices); - struct clock_event_device *glblevt = global_eventdevice.event; int cpu = smp_processor_id(); - ktime_t expires = { .tv64 = KTIME_MAX }; - int64_t delta; unsigned long flags; if (devices->nextevt != evt) @@ -556,19 +579,7 @@ void clockevents_set_broadcast(struct cl if (devices->expires_next.tv64 != KTIME_MAX) clockevents_set_next_event(devices->expires_next, 1); } - - /* Reprogram the broadcast device */ - for (cpu = first_cpu(local_event_broadcast); cpu != NR_CPUS; - cpu = next_cpu(cpu, local_event_broadcast)) { - devices = &per_cpu(local_eventdevices, cpu); - if (devices->expires_next.tv64 < expires.tv64) - expires = devices->expires_next; - } - - if (expires.tv64 != KTIME_MAX) { - delta = ktime_to_ns(ktime_sub(expires, ktime_get())); - do_clockevents_set_next_event(glblevt, delta); - } + clockevents_reprogram_broadcast(); spin_unlock_irqrestore(&events_lock, flags); } @@ -635,9 +646,22 @@ static void handle_nextevt_broadcast(str cpu_set(cpu, mask); } } + if (!cpus_empty(mask)) { + /* + * Wakeup the cpus which have an expired event. The + * global event is reprogrammed in the return from + * idle code. + */ + broadcast_function(&mask); + } else { + /* + * The global event did not expire any CPU local + * events. This happens in dyntick mode, as the + * maximum PIT delta is quite small. + */ + clockevents_reprogram_broadcast(); + } spin_unlock(&events_lock); - /* Wakeup the cpus which have an expired event */ - broadcast_function(&mask); } /* _