From: john stultz This patchset provides a generic timekeeping subsystem that is independent of the timer interrupt. This allows for robust and correct behavior in cases of late or lost ticks, avoids interpolation errors, reduces duplication in arch specific code, and allows or assists future changes such as high-res timers, dynamic ticks, or realtime preemption. Additionally, it provides finer nanosecond resolution values to the clock_gettime functions. The patch set provides the minimal NTP changes, the clocksource abstraction, the core timekeeping code as well as the code to convert i386. I have started on converting more arches, but for now I'm only submmiting code for i386. I'd like to thank the following people who have contributed ideas, criticism, testing and code that has helped shape this work: George Anzinger, Nish Aravamudan, Max Asbock, Serge Belyshev, Dominik Brodowski, Thomas Gleixner, Darren Hart, Christoph Lameter, Matt Mackal, Keith Mannthey, Ingo Molnar, Martin Schwidefsky, Frank Sorenson, Ulrich Windl, Jonathan Woithe, Darrick Wong, Roman Zippel and any others whom I've accidentally left off this list. This patch: Rework some of the interrupt time NTP adjustments so that it could be re-used by the generic timekeeping infrastructure. This is done by logically separating the code which adjusts xtime from the code that decides, based on the NTP state variables, how much to adjust time each tick. This should not affect the existing behavior, but just separate the logical functionality so it can be re-used. Signed-off-by: John Stultz Acked-by: Ingo Molnar Signed-off-by: Andrew Morton --- kernel/timer.c | 123 ++++++++++++++++++++++++++++++++--------------- 1 files changed, 85 insertions(+), 38 deletions(-) diff -puN kernel/timer.c~time-reduced-ntp-rework-part-1 kernel/timer.c --- devel/kernel/timer.c~time-reduced-ntp-rework-part-1 2006-01-05 20:43:49.000000000 -0800 +++ devel-akpm/kernel/timer.c 2006-01-05 20:43:49.000000000 -0800 @@ -590,6 +590,7 @@ static long time_adj; /* tick adjust ( long time_reftime; /* time at last adjustment (s) */ long time_adjust; long time_next_adjust; +long time_adjust_step; /* per tick time_adjust step */ /* * this routine handles the overflow of the microsecond field @@ -717,45 +718,86 @@ static void second_overflow(void) #endif } -/* in the NTP reference this is called "hardclock()" */ -static void update_wall_time_one_tick(void) +/** + * ntp_advance - increments the NTP state machine + * @interval_ns: interval, in nanoseconds + * + * Must be holding the xtime writelock when calling. + */ +static void ntp_advance(unsigned long interval_ns) { - long time_adjust_step, delta_nsec; + static unsigned long interval_sum; - if ((time_adjust_step = time_adjust) != 0 ) { - /* - * We are doing an adjtime thing. Prepare time_adjust_step to - * be within bounds. Note that a positive time_adjust means we - * want the clock to run faster. - * - * Limit the amount of the step to be in the range - * -tickadj .. +tickadj - */ - time_adjust_step = min(time_adjust_step, (long)tickadj); - time_adjust_step = max(time_adjust_step, (long)-tickadj); + /* increment the interval sum: */ + interval_sum += interval_ns; + + /* calculate the per tick singleshot adjtime adjustment step: */ + while (interval_ns >= tick_nsec) { + time_adjust_step = time_adjust; + if (time_adjust_step) { + /* + * We are doing an adjtime thing. + * + * Prepare time_adjust_step to be within bounds. + * Note that a positive time_adjust means we want + * the clock to run faster. + * + * Limit the amount of the step to be in the range + * -tickadj .. +tickadj: + */ + time_adjust_step = min(time_adjust_step, (long)tickadj); + time_adjust_step = max(time_adjust_step, + (long)-tickadj); - /* Reduce by this step the amount of time left */ - time_adjust -= time_adjust_step; - } - delta_nsec = tick_nsec + time_adjust_step * 1000; - /* - * Advance the phase, once it gets to one microsecond, then - * advance the tick more. - */ - time_phase += time_adj; - if ((time_phase >= FINENSEC) || (time_phase <= -FINENSEC)) { - long ltemp = shift_right(time_phase, (SHIFT_SCALE - 10)); - time_phase -= ltemp << (SHIFT_SCALE - 10); - delta_nsec += ltemp; + /* Reduce by this step the amount of time left: */ + time_adjust -= time_adjust_step; + } + interval_ns -= tick_nsec; } - xtime.tv_nsec += delta_nsec; - time_interpolator_update(delta_nsec); /* Changes by adjtime() do not take effect till next tick. */ if (time_next_adjust != 0) { time_adjust = time_next_adjust; time_next_adjust = 0; } + + while (interval_sum >= NSEC_PER_SEC) { + interval_sum -= NSEC_PER_SEC; + second_overflow(); + } +} + +/** + * phase_advance - advance the phase + * + * advance the phase, once it gets to one nanosecond advance the tick more. + */ +static inline long phase_advance(void) +{ + long delta = 0; + + time_phase += time_adj; + + if ((time_phase >= FINENSEC) || (time_phase <= -FINENSEC)) { + delta = shift_right(time_phase, (SHIFT_SCALE - 10)); + time_phase -= delta << (SHIFT_SCALE - 10); + } + + return delta; +} + +/** + * xtime_advance - advance xtime + * @delta_nsec: adjustment in nsecs + */ +static inline void xtime_advance(long delta_nsec) +{ + xtime.tv_nsec += delta_nsec; + if (likely(xtime.tv_nsec < NSEC_PER_SEC)) + return; + + xtime.tv_nsec -= NSEC_PER_SEC; + xtime.tv_sec++; } /* @@ -763,19 +805,24 @@ static void update_wall_time_one_tick(vo * usually just one (we shouldn't be losing ticks, * we're doing this this way mainly for interrupt * latency reasons, not because we think we'll - * have lots of lost timer ticks + * have lots of lost timer ticks) */ static void update_wall_time(unsigned long ticks) { do { - ticks--; - update_wall_time_one_tick(); - if (xtime.tv_nsec >= 1000000000) { - xtime.tv_nsec -= 1000000000; - xtime.tv_sec++; - second_overflow(); - } - } while (ticks); + /* + * Calculate the nsec delta using the precomputed NTP + * adjustments: + * tick_nsec, time_adjust_step, time_adj + */ + long delta_nsec = tick_nsec + time_adjust_step * 1000; + delta_nsec += phase_advance(); + + xtime_advance(delta_nsec); + ntp_advance(tick_nsec); + time_interpolator_update(delta_nsec); + + } while (--ticks); } /* _