Some BIOS implementations put the CPU into C3 while advertising the state as C2. This stops the local apic and breaks highres/dyntick. Verify that the apic timer works instead of trusting the BIOS. This should cure akpm's VAIO. Signed-of-by: Thomas Gleixner Signed-off-by: Andrew Morton --- arch/i386/kernel/apic.c | 115 ++++++++++++++++++++++++++++++-- drivers/acpi/processor_idle.c | 75 ++++++++++++++++++-- include/acpi/processor.h | 3 include/asm-i386/apic.h | 1 include/asm-x86_64/apic.h | 2 5 files changed, 182 insertions(+), 14 deletions(-) diff -puN arch/i386/kernel/apic.c~acpi-verify-lapic-timer arch/i386/kernel/apic.c --- a/arch/i386/kernel/apic.c~acpi-verify-lapic-timer +++ a/arch/i386/kernel/apic.c @@ -92,7 +92,21 @@ static struct clock_event_device lapic_c .set_mode = lapic_timer_setup, .set_next_event = lapic_next_event, }; -static DEFINE_PER_CPU(struct clock_event_device, lapic_events); + +/* + * Per CPU local APIC data structure: + * - clock event device + * - variables to hold timer verification data + */ +struct lapic_event_device { + struct clock_event_device evdev; + unsigned long last_delta; + unsigned long counter; +}; +static DEFINE_PER_CPU(struct lapic_event_device, lapic_events); + +/* Scaled math multiplication factor for ACPI lapic verification */ +static unsigned long acpi_verify_mult; /* Local APIC was disabled by the BIOS and enabled by the kernel */ static int enabled_via_apicbase; @@ -207,6 +221,11 @@ static void __setup_APIC_LVTT(unsigned i static void lapic_next_event(unsigned long delta, struct clock_event_device *evt) { + struct lapic_event_device *ldev; + + ldev = container_of(evt, struct lapic_event_device, evdev); + ldev->last_delta = delta; + apic_write_around(APIC_TMICT, delta); } @@ -216,18 +235,23 @@ static void lapic_next_event(unsigned lo static void lapic_timer_setup(enum clock_event_mode mode, struct clock_event_device *evt) { + struct lapic_event_device *ldev; unsigned long flags; unsigned int v; + ldev = container_of(evt, struct lapic_event_device, evdev); + local_irq_save(flags); switch (mode) { case CLOCK_EVT_PERIODIC: + ldev->last_delta = calibration_result / APIC_DIVISOR; case CLOCK_EVT_ONESHOT: __setup_APIC_LVTT(calibration_result, mode != CLOCK_EVT_PERIODIC, 1); break; case CLOCK_EVT_SHUTDOWN: + ldev->last_delta = 0; v = apic_read(APIC_LVTT); v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); apic_write_around(APIC_LVTT, v); @@ -243,7 +267,7 @@ static void lapic_timer_setup(enum clock */ static void __devinit setup_APIC_timer(void) { - struct clock_event_device *levt = &__get_cpu_var(lapic_events); + struct clock_event_device *levt = &__get_cpu_var(lapic_events).evdev; memcpy(levt, &lapic_clockevent, sizeof(*levt)); @@ -317,7 +341,7 @@ static void __init lapic_cal_handler(str */ void __init setup_boot_APIC_clock(void) { - struct clock_event_device *levt = &__get_cpu_var(lapic_events); + struct clock_event_device *levt = &__get_cpu_var(lapic_events).evdev; const long pm_100ms = PMTMR_TICKS_PER_SEC/10; const long pm_thresh = pm_100ms/100; void (*real_handler)(struct pt_regs *regs); @@ -383,6 +407,13 @@ void __init setup_boot_APIC_clock(void) "%lu (%ld)\n", (unsigned long) res, delta); delta = (long) res; } + /* + * Calculate the pmtimer -> lapic conversion factor to + * verify the lapic stability in the power states. + */ + acpi_verify_mult = div_sc(delta, deltapm, 22); + apic_printk(APIC_VERBOSE, "... acpi_verify_mult = %lu\n", + acpi_verify_mult); } /* Calculate the scaled math multiplication factor */ @@ -491,7 +522,7 @@ void __devinit setup_secondary_APIC_cloc void switch_APIC_timer_to_ipi(void *cpumask) { - struct clock_event_device *levt = &__get_cpu_var(lapic_events); + struct clock_event_device *levt = &__get_cpu_var(lapic_events).evdev; cpumask_t mask = *(cpumask_t *)cpumask; int cpu = smp_processor_id(); @@ -502,7 +533,7 @@ EXPORT_SYMBOL(switch_APIC_timer_to_ipi); void switch_ipi_to_APIC_timer(void *cpumask) { - struct clock_event_device *levt = &__get_cpu_var(lapic_events); + struct clock_event_device *levt = &__get_cpu_var(lapic_events).evdev; cpumask_t mask = *(cpumask_t *)cpumask; int cpu = smp_processor_id(); @@ -517,7 +548,7 @@ EXPORT_SYMBOL(switch_ipi_to_APIC_timer); fastcall void local_apic_timer_interrupt(struct pt_regs *regs) { int cpu = smp_processor_id(); - struct clock_event_device *evt = &per_cpu(lapic_events, cpu); + struct clock_event_device *evt = &per_cpu(lapic_events, cpu).evdev; per_cpu(irq_stat, cpu).apic_timer_irqs++; @@ -578,9 +609,79 @@ static void lapic_timer_broadcast(cpumas void lapic_timer_idle_broadcast(int broadcast) { int cpu = smp_processor_id(); - struct clock_event_device *evt = &per_cpu(lapic_events, cpu); + struct clock_event_device *evt = &per_cpu(lapic_events, cpu).evdev; + unsigned long flags; + local_irq_save(flags); clockevents_set_broadcast(evt, broadcast); + local_irq_restore(flags); +} + +/* + * Local APIC verify that timer is stable during this power state + * + * Called with interrupts disabled. + */ +int lapic_timer_idle_verify(unsigned long ticks) +{ + struct lapic_event_device *dev = &__get_cpu_var(lapic_events); + long delta_apic, delta_pm, delta, counter = apic_read(APIC_TMCCT); + const uint32_t pm_500us = PMTMR_TICKS_PER_SEC/2000; + const long pm_250us = PMTMR_TICKS_PER_SEC/4000; + const long pm_100us = PMTMR_TICKS_PER_SEC/10000; + uint64_t delta_ticks; + + /* + * Start the verification: Store current time and the apic counter + */ + if (!ticks) { + dev->counter = counter; + return 0; + } + + /* + * End of verification: + * + * Convert pm timer ticks (from ACPI) to lapic ticks and + * compare with the lapic delta. + * + * We do not make decisions on short sleeps (< 500us) and + * we back out, when the lapic is switched off already + * (last_delta = 0) + */ + if (ticks < pm_500us || !dev->last_delta) + return 0; + delta_ticks = (((u64) ticks) * acpi_verify_mult) >> 22; + delta_pm = (long) delta_ticks; + + delta_apic = dev->counter - counter; + /* Take wraps in periodic mode into account */ + if (delta_apic <= 0) + delta_apic += dev->last_delta; + + /* Calculate the delta between lapic and pm timer */ + delta = delta_pm - delta_apic; + /* + * The delta between pmtimer and lapic is less than 100us: + * lapic is stable. This catches also delta_pm < delta_apic, + * which happens due to clock skew and rounding errors. + */ + if (delta < pm_100us) + return 1; + + /* + * The delta between pmtimer and lapic is greater than 250us: + * lapic is unstable. + */ + if (delta > pm_250us) { + apic_printk(APIC_VERBOSE, "lapic timer verify: delta %ld " + "pmtimer %ld (%ld) lapic %ld(%ld %ld %ld) " + "on cpu %d\n", delta, delta_pm, ticks, delta_apic, + counter, dev->counter, dev->last_delta, + smp_processor_id()); + return -1; + } + return 0; } int setup_profiling_timer(unsigned int multiplier) diff -puN drivers/acpi/processor_idle.c~acpi-verify-lapic-timer drivers/acpi/processor_idle.c --- a/drivers/acpi/processor_idle.c~acpi-verify-lapic-timer +++ a/drivers/acpi/processor_idle.c @@ -249,17 +249,32 @@ static void acpi_timer_check_state(int s struct acpi_processor_power *pwr = &pr->power; /* + * FIXME: Initialize this when the data structure is created ! + */ + if (!pr->power.timer_state_unstable) + pr->power.timer_state_unstable = INT_MAX; + + /* * Check, if one of the previous states already marked the lapic * unstable */ if (pwr->timer_broadcast_on_state < state) return; +#ifdef CONFIG_X86_64 + /* + * This can go away, when x86_64 has the detection support + */ if(cx->type == ACPI_STATE_C3 || - boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { + boot_cpu_data.x86_vendor == X86_VENDOR_AMD) +#else + /* + * We could autodetect that too + */ + if(cx->type == ACPI_STATE_C3) +#endif pr->power.timer_broadcast_on_state = state; - return; - } + } static void acpi_propagate_timer_broadcast(struct acpi_processor *pr) @@ -279,8 +294,36 @@ static void acpi_state_timer_broadcast(s { int state = cx - pr->power.states; - if (state >= pr->power.timer_broadcast_on_state) + if (state >= pr->power.timer_broadcast_on_state) { lapic_timer_idle_broadcast(broadcast); + return; + } + + /* + * On cstate entry we save the lapic timer value + */ + lapic_timer_idle_verify(0); +} + +/* C-State timer verification */ +static void acpi_state_timer_verify(struct acpi_processor *pr, + struct acpi_processor_cx *cx, + uint32_t ticks) +{ + struct acpi_processor_power *pwr = &pr->power; + int state = cx - pr->power.states; + + if (pwr->timer_state_unstable <= state) + return; + + if (lapic_timer_idle_verify(ticks) < 0) { + if (cx->timer_verify++ == 10) { + pwr->timer_state_unstable = state; + printk(KERN_WARNING + "ACPI: lapic on CPU %d stops in C%d[C%d]\n", + smp_processor_id(), state, cx->type); + } + } } #else @@ -293,6 +336,11 @@ static void acpi_state_timer_broadcast(s int broadcast) { } +static void acpi_state_timer_verify(struct acpi_processor *pr, + struct acpi_processor_cx *cx, + uint32_t ticks) +{ +} #endif @@ -442,6 +490,10 @@ static void acpi_processor_idle(void) acpi_cstate_enter(cx); /* Get end time (ticks) */ t2 = inl(acpi_fadt.xpm_tmr_blk.address); + /* Compute time (ticks) that we were actually asleep */ + sleep_ticks = + ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD; + acpi_state_timer_verify(pr, cx, sleep_ticks); #ifdef CONFIG_GENERIC_TIME /* TSC halts in C2, so notify users */ @@ -450,9 +502,6 @@ static void acpi_processor_idle(void) /* Re-enable interrupts */ local_irq_enable(); current_thread_info()->status |= TS_POLLING; - /* Compute time (ticks) that we were actually asleep */ - sleep_ticks = - ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD; acpi_state_timer_broadcast(pr, cx, 0); break; @@ -520,6 +569,18 @@ static void acpi_processor_idle(void) #endif /* + * If the lapic verification found a stopped lapic, we have + * to propagate the result. We can not do it from the verify + * code as smp calls must have interrupts enabled. + */ + if (pr->power.timer_state_unstable < + pr->power.timer_broadcast_on_state) { + pr->power.timer_state_unstable = + pr->power.timer_broadcast_on_state; + acpi_propagate_timer_broadcast(pr); + } + + /* * Promotion? * ---------- * Track the number of longs (time asleep is greater than threshold) diff -puN include/acpi/processor.h~acpi-verify-lapic-timer include/acpi/processor.h --- a/include/acpi/processor.h~acpi-verify-lapic-timer +++ a/include/acpi/processor.h @@ -67,6 +67,7 @@ struct acpi_processor_cx { u32 latency_ticks; u32 power; u32 usage; + s32 timer_verify; u64 time; struct acpi_processor_cx_policy promotion; struct acpi_processor_cx_policy demotion; @@ -80,6 +81,8 @@ struct acpi_processor_power { int count; struct acpi_processor_cx states[ACPI_PROCESSOR_MAX_POWER]; int timer_broadcast_on_state; + int timer_state_verified; + int timer_state_unstable; }; /* Performance Management */ diff -puN include/asm-i386/apic.h~acpi-verify-lapic-timer include/asm-i386/apic.h --- a/include/asm-i386/apic.h~acpi-verify-lapic-timer +++ a/include/asm-i386/apic.h @@ -101,6 +101,7 @@ extern void setup_boot_APIC_clock (void) extern void setup_secondary_APIC_clock (void); extern int APIC_init_uniprocessor (void); extern void lapic_timer_idle_broadcast(int broadcast); +extern int lapic_timer_idle_verify(unsigned long ticks); extern void enable_NMI_through_LVT0 (void * dummy); void switch_APIC_timer_to_ipi(void *cpumask); diff -puN include/asm-x86_64/apic.h~acpi-verify-lapic-timer include/asm-x86_64/apic.h --- a/include/asm-x86_64/apic.h~acpi-verify-lapic-timer +++ a/include/asm-x86_64/apic.h @@ -87,6 +87,8 @@ extern void clustered_apic_check(void); extern void setup_APIC_extened_lvt(unsigned char lvt_off, unsigned char vector, unsigned char msg_type, unsigned char mask); +static inline void lapic_timer_idle_broadcast(int broadcast) { } +static inline int lapic_timer_idle_verify(unsigned long ticks) { return 0;} #define K8_APIC_EXT_LVT_BASE 0x500 #define K8_APIC_EXT_INT_MSG_FIX 0x0 _