Index: linux-2.6.14/arch/alpha/kernel/time.c
===================================================================
--- linux-2.6.14.orig/arch/alpha/kernel/time.c
+++ linux-2.6.14/arch/alpha/kernel/time.c
@@ -55,10 +55,6 @@
 #include "proto.h"
 #include "irq_impl.h"
 
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
 extern unsigned long wall_jiffies;	/* kernel/timer.c */
 
 static int set_rtc_mmss(unsigned long);
Index: linux-2.6.14/arch/arm/kernel/time.c
===================================================================
--- linux-2.6.14.orig/arch/arm/kernel/time.c
+++ linux-2.6.14/arch/arm/kernel/time.c
@@ -36,10 +36,6 @@
 #include <asm/thread_info.h>
 #include <asm/mach/time.h>
 
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
 /*
  * Our system timer.
  */
Index: linux-2.6.14/arch/arm26/kernel/time.c
===================================================================
--- linux-2.6.14.orig/arch/arm26/kernel/time.c
+++ linux-2.6.14/arch/arm26/kernel/time.c
@@ -34,10 +34,6 @@
 #include <asm/irq.h>
 #include <asm/ioc.h>
 
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
 extern unsigned long wall_jiffies;
 
 /* this needs a better home */
Index: linux-2.6.14/arch/cris/kernel/time.c
===================================================================
--- linux-2.6.14.orig/arch/cris/kernel/time.c
+++ linux-2.6.14/arch/cris/kernel/time.c
@@ -32,10 +32,6 @@
 #include <linux/init.h>
 #include <linux/profile.h>
 
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
 int have_rtc;  /* used to remember if we have an RTC or not */;
 
 #define TICK_SIZE tick
Index: linux-2.6.14/arch/frv/kernel/time.c
===================================================================
--- linux-2.6.14.orig/arch/frv/kernel/time.c
+++ linux-2.6.14/arch/frv/kernel/time.c
@@ -34,9 +34,6 @@
 
 extern unsigned long wall_jiffies;
 
-u64 jiffies_64 = INITIAL_JIFFIES;
-EXPORT_SYMBOL(jiffies_64);
-
 unsigned long __nongprelbss __clkin_clock_speed_HZ;
 unsigned long __nongprelbss __ext_bus_clock_speed_HZ;
 unsigned long __nongprelbss __res_bus_clock_speed_HZ;
Index: linux-2.6.14/arch/h8300/kernel/time.c
===================================================================
--- linux-2.6.14.orig/arch/h8300/kernel/time.c
+++ linux-2.6.14/arch/h8300/kernel/time.c
@@ -32,10 +32,6 @@
 
 #define	TICK_SIZE (tick_nsec / 1000)
 
-u64 jiffies_64;
-
-EXPORT_SYMBOL(jiffies_64);
-
 /*
  * timer_interrupt() needs to keep up the real-time clock,
  * as well as call the "do_timer()" routine every clocktick
Index: linux-2.6.14/arch/i386/kernel/time.c
===================================================================
--- linux-2.6.14.orig/arch/i386/kernel/time.c
+++ linux-2.6.14/arch/i386/kernel/time.c
@@ -46,6 +46,7 @@
 #include <linux/bcd.h>
 #include <linux/efi.h>
 #include <linux/mca.h>
+#include <linux/clockchips.h>
 
 #include <asm/io.h>
 #include <asm/smp.h>
@@ -56,6 +57,7 @@
 #include <asm/uaccess.h>
 #include <asm/processor.h>
 #include <asm/timer.h>
+#include <asm/timeofday.h>
 
 #include "mach_time.h"
 
@@ -74,10 +76,6 @@ int pit_latch_buggy;              /* ext
 
 #include "do_timer.h"
 
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
 unsigned int cpu_khz;	/* Detected as we calibrate the TSC */
 EXPORT_SYMBOL(cpu_khz);
 
@@ -86,13 +84,6 @@ extern unsigned long wall_jiffies;
 DEFINE_SPINLOCK(rtc_lock);
 EXPORT_SYMBOL(rtc_lock);
 
-#include <asm/i8253.h>
-
-DEFINE_SPINLOCK(i8253_lock);
-EXPORT_SYMBOL(i8253_lock);
-
-struct timer_opts *cur_timer __read_mostly = &timer_none;
-
 /*
  * This is a special lock that is owned by the CPU and holds the index
  * register we are working with.  It is required for NMI access to the
@@ -122,116 +113,23 @@ void rtc_cmos_write(unsigned char val, u
 }
 EXPORT_SYMBOL(rtc_cmos_write);
 
-/*
- * This version of gettimeofday has microsecond resolution
- * and better than microsecond precision on fast x86 machines with TSC.
- */
-void do_gettimeofday(struct timeval *tv)
-{
-	unsigned long seq;
-	unsigned long usec, sec;
-	unsigned long max_ntp_tick;
-
-	do {
-		unsigned long lost;
-
-		seq = read_seqbegin(&xtime_lock);
-
-		usec = cur_timer->get_offset();
-		lost = jiffies - wall_jiffies;
-
-		/*
-		 * If time_adjust is negative then NTP is slowing the clock
-		 * so make sure not to go into next possible interval.
-		 * Better to lose some accuracy than have time go backwards..
-		 */
-		if (unlikely(time_adjust < 0)) {
-			max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj;
-			usec = min(usec, max_ntp_tick);
-
-			if (lost)
-				usec += lost * max_ntp_tick;
-		}
-		else if (unlikely(lost))
-			usec += lost * (USEC_PER_SEC / HZ);
-
-		sec = xtime.tv_sec;
-		usec += (xtime.tv_nsec / 1000);
-	} while (read_seqretry(&xtime_lock, seq));
-
-	while (usec >= 1000000) {
-		usec -= 1000000;
-		sec++;
-	}
-
-	tv->tv_sec = sec;
-	tv->tv_usec = usec;
-}
-
-EXPORT_SYMBOL(do_gettimeofday);
-
-int do_settimeofday(struct timespec *tv)
-{
-	time_t wtm_sec, sec = tv->tv_sec;
-	long wtm_nsec, nsec = tv->tv_nsec;
-
-	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
-		return -EINVAL;
-
-	write_seqlock_irq(&xtime_lock);
-	/*
-	 * This is revolting. We need to set "xtime" correctly. However, the
-	 * value in this location is the value at the most recent update of
-	 * wall time.  Discover what correction gettimeofday() would have
-	 * made, and then undo it!
-	 */
-	nsec -= cur_timer->get_offset() * NSEC_PER_USEC;
-	nsec -= (jiffies - wall_jiffies) * TICK_NSEC;
-
-	wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
-	wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
-
-	set_normalized_timespec(&xtime, sec, nsec);
-	set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
-
-	ntp_clear();
-	write_sequnlock_irq(&xtime_lock);
-	clock_was_set();
-	return 0;
-}
-
-EXPORT_SYMBOL(do_settimeofday);
-
 static int set_rtc_mmss(unsigned long nowtime)
 {
 	int retval;
-
-	WARN_ON(irqs_disabled());
+	unsigned long flags;
 
 	/* gets recalled with irq locally disabled */
-	spin_lock_irq(&rtc_lock);
+	/* XXX - does irqsave resolve this? -johnstul */
+	spin_lock_irqsave(&rtc_lock, flags);
 	if (efi_enabled)
 		retval = efi_set_rtc_mmss(nowtime);
 	else
 		retval = mach_set_rtc_mmss(nowtime);
-	spin_unlock_irq(&rtc_lock);
+	spin_unlock_irqrestore(&rtc_lock, flags);
 
 	return retval;
 }
 
-
-int timer_ack;
-
-/* monotonic_clock(): returns # of nanoseconds passed since time_init()
- *		Note: This function is required to return accurate
- *		time even in the absence of multiple timer ticks.
- */
-unsigned long long monotonic_clock(void)
-{
-	return cur_timer->monotonic_clock();
-}
-EXPORT_SYMBOL(monotonic_clock);
-
 #if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
 unsigned long profile_pc(struct pt_regs *regs)
 {
@@ -245,70 +143,6 @@ unsigned long profile_pc(struct pt_regs 
 EXPORT_SYMBOL(profile_pc);
 #endif
 
-/*
- * timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
- */
-static inline void do_timer_interrupt(int irq, struct pt_regs *regs)
-{
-#ifdef CONFIG_X86_IO_APIC
-	if (timer_ack) {
-		/*
-		 * Subtle, when I/O APICs are used we have to ack timer IRQ
-		 * manually to reset the IRR bit for do_slow_gettimeoffset().
-		 * This will also deassert NMI lines for the watchdog if run
-		 * on an 82489DX-based system.
-		 */
-		spin_lock(&i8259A_lock);
-		outb(0x0c, PIC_MASTER_OCW3);
-		/* Ack the IRQ; AEOI will end it automatically. */
-		inb(PIC_MASTER_POLL);
-		spin_unlock(&i8259A_lock);
-	}
-#endif
-
-	do_timer_interrupt_hook(regs);
-
-
-	if (MCA_bus) {
-		/* The PS/2 uses level-triggered interrupts.  You can't
-		turn them off, nor would you want to (any attempt to
-		enable edge-triggered interrupts usually gets intercepted by a
-		special hardware circuit).  Hence we have to acknowledge
-		the timer interrupt.  Through some incredibly stupid
-		design idea, the reset for IRQ 0 is done by setting the
-		high bit of the PPI port B (0x61).  Note that some PS/2s,
-		notably the 55SX, work fine if this is removed.  */
-
-		irq = inb_p( 0x61 );	/* read the current state */
-		outb_p( irq|0x80, 0x61 );	/* reset the IRQ */
-	}
-}
-
-/*
- * This is the same as the above, except we _also_ save the current
- * Time Stamp Counter value at the time of the timer interrupt, so that
- * we later on can estimate the time of day more exactly.
- */
-irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
-{
-	/*
-	 * Here we are in the timer irq handler. We just have irqs locally
-	 * disabled but we don't know if the timer_bh is running on the other
-	 * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
-	 * the irq version of write_lock because as just said we have irq
-	 * locally disabled. -arca
-	 */
-	write_seqlock(&xtime_lock);
-
-	cur_timer->mark_offset();
- 
-	do_timer_interrupt(irq, regs);
-
-	write_sequnlock(&xtime_lock);
-	return IRQ_HANDLED;
-}
-
 /* not static: needed by APM */
 unsigned long get_cmos_time(void)
 {
@@ -327,139 +161,42 @@ unsigned long get_cmos_time(void)
 }
 EXPORT_SYMBOL(get_cmos_time);
 
-static void sync_cmos_clock(unsigned long dummy);
-
-static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
-
-static void sync_cmos_clock(unsigned long dummy)
+/* arch specific timeofday hooks */
+nsec_t read_persistent_clock(void)
 {
-	struct timeval now, next;
-	int fail = 1;
+	return (nsec_t)get_cmos_time() * NSEC_PER_SEC;
+}
 
+void sync_persistent_clock(struct timespec ts)
+{
+	static unsigned long last_rtc_update;
 	/*
 	 * If we have an externally synchronized Linux clock, then update
 	 * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
 	 * called as close as possible to 500 ms before the new second starts.
-	 * This code is run on a timer.  If the clock is set, that timer
-	 * may not expire at the correct time.  Thus, we adjust...
 	 */
-	if (!ntp_synced())
-		/*
-		 * Not synced, exit, do not restart a timer (if one is
-		 * running, let it run out).
-		 */
+	if (ts.tv_sec <= last_rtc_update + 660)
 		return;
 
-	do_gettimeofday(&now);
-	if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
-	    now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
-		fail = set_rtc_mmss(now.tv_sec);
-
-	next.tv_usec = USEC_AFTER - now.tv_usec;
-	if (next.tv_usec <= 0)
-		next.tv_usec += USEC_PER_SEC;
-
-	if (!fail)
-		next.tv_sec = 659;
-	else
-		next.tv_sec = 0;
-
-	if (next.tv_usec >= USEC_PER_SEC) {
-		next.tv_sec++;
-		next.tv_usec -= USEC_PER_SEC;
+	if((ts.tv_nsec / 1000) >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
+		(ts.tv_nsec / 1000) <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2) {
+		/* horrible...FIXME */
+		if (set_rtc_mmss(ts.tv_sec) == 0)
+			last_rtc_update = ts.tv_sec;
+		else
+			last_rtc_update = ts.tv_sec - 600; /* do it again in 60 s */
 	}
-	mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
-}
-
-void notify_arch_cmos_timer(void)
-{
-	mod_timer(&sync_cmos_timer, jiffies + 1);
 }
 
-static long clock_cmos_diff, sleep_start;
-
-static struct timer_opts *last_timer;
-static int timer_suspend(struct sys_device *dev, pm_message_t state)
-{
-	/*
-	 * Estimate time zone so that set_time can update the clock
-	 */
-	clock_cmos_diff = -get_cmos_time();
-	clock_cmos_diff += get_seconds();
-	sleep_start = get_cmos_time();
-	last_timer = cur_timer;
-	cur_timer = &timer_none;
-	if (last_timer->suspend)
-		last_timer->suspend(state);
-	return 0;
-}
-
-static int timer_resume(struct sys_device *dev)
-{
-	unsigned long flags;
-	unsigned long sec;
-	unsigned long sleep_length;
-
-#ifdef CONFIG_HPET_TIMER
-	if (is_hpet_enabled())
-		hpet_reenable();
-#endif
-	setup_pit_timer();
-	sec = get_cmos_time() + clock_cmos_diff;
-	sleep_length = (get_cmos_time() - sleep_start) * HZ;
-	write_seqlock_irqsave(&xtime_lock, flags);
-	xtime.tv_sec = sec;
-	xtime.tv_nsec = 0;
-	write_sequnlock_irqrestore(&xtime_lock, flags);
-	jiffies += sleep_length;
-	wall_jiffies += sleep_length;
-	if (last_timer->resume)
-		last_timer->resume();
-	cur_timer = last_timer;
-	last_timer = NULL;
-	touch_softlockup_watchdog();
-	return 0;
-}
-
-static struct sysdev_class timer_sysclass = {
-	.resume = timer_resume,
-	.suspend = timer_suspend,
-	set_kset_name("timer"),
-};
-
-
-/* XXX this driverfs stuff should probably go elsewhere later -john */
-static struct sys_device device_timer = {
-	.id	= 0,
-	.cls	= &timer_sysclass,
-};
-
-static int time_init_device(void)
-{
-	int error = sysdev_class_register(&timer_sysclass);
-	if (!error)
-		error = sysdev_register(&device_timer);
-	return error;
-}
-
-device_initcall(time_init_device);
-
 #ifdef CONFIG_HPET_TIMER
 extern void (*late_time_init)(void);
 /* Duplicate of time_init() below, with hpet_enable part added */
 static void __init hpet_time_init(void)
 {
-	xtime.tv_sec = get_cmos_time();
-	xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
-	set_normalized_timespec(&wall_to_monotonic,
-		-xtime.tv_sec, -xtime.tv_nsec);
-
 	if ((hpet_enable() >= 0) && hpet_use_timer) {
 		printk("Using HPET for base-timer\n");
 	}
 
-	cur_timer = select_timer();
-	printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
 
 	time_init_hook();
 }
@@ -467,6 +204,9 @@ static void __init hpet_time_init(void)
 
 void __init time_init(void)
 {
+	/* Set the clock to HZ Hz: */
+	setup_pit_timer();
+
 #ifdef CONFIG_HPET_TIMER
 	if (is_hpet_capable()) {
 		/*
@@ -477,13 +217,5 @@ void __init time_init(void)
 		return;
 	}
 #endif
-	xtime.tv_sec = get_cmos_time();
-	xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
-	set_normalized_timespec(&wall_to_monotonic,
-		-xtime.tv_sec, -xtime.tv_nsec);
-
-	cur_timer = select_timer();
-	printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
-
 	time_init_hook();
 }
Index: linux-2.6.14/arch/ia64/kernel/time.c
===================================================================
--- linux-2.6.14.orig/arch/ia64/kernel/time.c
+++ linux-2.6.14/arch/ia64/kernel/time.c
@@ -32,10 +32,6 @@
 
 extern unsigned long wall_jiffies;
 
-u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
 #define TIME_KEEPER_ID	0	/* smp_processor_id() of time-keeper */
 
 #ifdef CONFIG_IA64_DEBUG_IRQ
Index: linux-2.6.14/arch/m32r/kernel/time.c
===================================================================
--- linux-2.6.14.orig/arch/m32r/kernel/time.c
+++ linux-2.6.14/arch/m32r/kernel/time.c
@@ -39,10 +39,6 @@ extern void send_IPI_allbutself(int, int
 extern void smp_local_timer_interrupt(struct pt_regs *);
 #endif
 
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
 extern unsigned long wall_jiffies;
 #define TICK_SIZE	(tick_nsec / 1000)
 
Index: linux-2.6.14/arch/m68k/kernel/time.c
===================================================================
--- linux-2.6.14.orig/arch/m68k/kernel/time.c
+++ linux-2.6.14/arch/m68k/kernel/time.c
@@ -27,10 +27,6 @@
 #include <linux/timex.h>
 #include <linux/profile.h>
 
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
 static inline int set_rtc_mmss(unsigned long nowtime)
 {
   if (mach_set_clock_mmss)
Index: linux-2.6.14/arch/m68knommu/kernel/time.c
===================================================================
--- linux-2.6.14.orig/arch/m68knommu/kernel/time.c
+++ linux-2.6.14/arch/m68knommu/kernel/time.c
@@ -27,10 +27,6 @@
 
 #define	TICK_SIZE (tick_nsec / 1000)
 
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
 extern unsigned long wall_jiffies;
 
 
Index: linux-2.6.14/arch/mips/kernel/time.c
===================================================================
--- linux-2.6.14.orig/arch/mips/kernel/time.c
+++ linux-2.6.14/arch/mips/kernel/time.c
@@ -43,10 +43,6 @@
 
 #define TICK_SIZE	(tick_nsec / 1000)
 
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
 /*
  * forward reference
  */
Index: linux-2.6.14/arch/parisc/kernel/time.c
===================================================================
--- linux-2.6.14.orig/arch/parisc/kernel/time.c
+++ linux-2.6.14/arch/parisc/kernel/time.c
@@ -33,10 +33,6 @@
 
 #include <linux/timex.h>
 
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
 /* xtime and wall_jiffies keep wall-clock time */
 extern unsigned long wall_jiffies;
 
Index: linux-2.6.14/arch/ppc/kernel/time.c
===================================================================
--- linux-2.6.14.orig/arch/ppc/kernel/time.c
+++ linux-2.6.14/arch/ppc/kernel/time.c
@@ -66,11 +66,6 @@
 
 #include <asm/time.h>
 
-/* XXX false sharing with below? */
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
 unsigned long disarm_decr[NR_CPUS];
 
 extern struct timezone sys_tz;
Index: linux-2.6.14/arch/ppc64/kernel/time.c
===================================================================
--- linux-2.6.14.orig/arch/ppc64/kernel/time.c
+++ linux-2.6.14/arch/ppc64/kernel/time.c
@@ -68,10 +68,6 @@
 #include <asm/systemcfg.h>
 #include <asm/firmware.h>
 
-u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
 /* keep track of when we need to update the rtc */
 time_t last_rtc_update;
 extern int piranha_simulator;
Index: linux-2.6.14/arch/s390/kernel/time.c
===================================================================
--- linux-2.6.14.orig/arch/s390/kernel/time.c
+++ linux-2.6.14/arch/s390/kernel/time.c
@@ -49,10 +49,6 @@
 
 #define TICK_SIZE tick
 
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
 static ext_int_info_t ext_int_info_cc;
 static u64 init_timer_cc;
 static u64 jiffies_timer_cc;
Index: linux-2.6.14/arch/sh/kernel/time.c
===================================================================
--- linux-2.6.14.orig/arch/sh/kernel/time.c
+++ linux-2.6.14/arch/sh/kernel/time.c
@@ -56,10 +56,6 @@ extern unsigned long wall_jiffies;
 #define TICK_SIZE (tick_nsec / 1000)
 DEFINE_SPINLOCK(tmu0_lock);
 
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
 /* XXX: Can we initialize this in a routine somewhere?  Dreamcast doesn't want
  * these routines anywhere... */
 #ifdef CONFIG_SH_RTC
Index: linux-2.6.14/arch/sh64/kernel/time.c
===================================================================
--- linux-2.6.14.orig/arch/sh64/kernel/time.c
+++ linux-2.6.14/arch/sh64/kernel/time.c
@@ -116,8 +116,6 @@
 
 extern unsigned long wall_jiffies;
 
-u64 jiffies_64 = INITIAL_JIFFIES;
-
 static unsigned long tmu_base, rtc_base;
 unsigned long cprc_base;
 
Index: linux-2.6.14/arch/sparc/kernel/time.c
===================================================================
--- linux-2.6.14.orig/arch/sparc/kernel/time.c
+++ linux-2.6.14/arch/sparc/kernel/time.c
@@ -45,10 +45,6 @@
 
 extern unsigned long wall_jiffies;
 
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
 DEFINE_SPINLOCK(rtc_lock);
 enum sparc_clock_type sp_clock_typ;
 DEFINE_SPINLOCK(mostek_lock);
Index: linux-2.6.14/arch/sparc64/kernel/time.c
===================================================================
--- linux-2.6.14.orig/arch/sparc64/kernel/time.c
+++ linux-2.6.14/arch/sparc64/kernel/time.c
@@ -55,10 +55,6 @@ unsigned long ds1287_regs = 0UL;
 
 extern unsigned long wall_jiffies;
 
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
 static void __iomem *mstk48t08_regs;
 static void __iomem *mstk48t59_regs;
 
Index: linux-2.6.14/arch/um/kernel/time_kern.c
===================================================================
--- linux-2.6.14.orig/arch/um/kernel/time_kern.c
+++ linux-2.6.14/arch/um/kernel/time_kern.c
@@ -22,10 +22,6 @@
 #include "mode.h"
 #include "os.h"
 
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
 int hz(void)
 {
 	return(HZ);
Index: linux-2.6.14/arch/v850/kernel/time.c
===================================================================
--- linux-2.6.14.orig/arch/v850/kernel/time.c
+++ linux-2.6.14/arch/v850/kernel/time.c
@@ -26,10 +26,6 @@
 
 #include "mach.h"
 
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
 #define TICK_SIZE	(tick_nsec / 1000)
 
 /*
Index: linux-2.6.14/arch/x86_64/kernel/time.c
===================================================================
--- linux-2.6.14.orig/arch/x86_64/kernel/time.c
+++ linux-2.6.14/arch/x86_64/kernel/time.c
@@ -38,17 +38,11 @@
 #include <asm/sections.h>
 #include <linux/cpufreq.h>
 #include <linux/hpet.h>
+#include <linux/timeofday.h>
 #ifdef CONFIG_X86_LOCAL_APIC
 #include <asm/apic.h>
 #endif
 
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
-#ifdef CONFIG_CPU_FREQ
-static void cpufreq_delayed_get(void);
-#endif
 extern void i8254_timer_resume(void);
 extern int using_apic_timer;
 
@@ -60,7 +54,9 @@ static int notsc __initdata = 0;
 
 #undef HPET_HACK_ENABLE_DANGEROUS
 
-unsigned int cpu_khz;					/* TSC clocks / usec, not used here */
+unsigned int cpu_khz;					/* CPU clocks / usec, not used here */
+unsigned int tsc_khz;					/* TSC clocks / usec, not used here */
+unsigned long hpet_address;
 static unsigned long hpet_period;			/* fsecs / HPET clock */
 unsigned long hpet_tick;				/* HPET clocks / interrupt */
 static int hpet_use_timer;
@@ -83,107 +79,6 @@ static inline void rdtscll_sync(unsigned
 	rdtscll(*tsc);
 }
 
-/*
- * do_gettimeoffset() returns microseconds since last timer interrupt was
- * triggered by hardware. A memory read of HPET is slower than a register read
- * of TSC, but much more reliable. It's also synchronized to the timer
- * interrupt. Note that do_gettimeoffset() may return more than hpet_tick, if a
- * timer interrupt has happened already, but vxtime.trigger wasn't updated yet.
- * This is not a problem, because jiffies hasn't updated either. They are bound
- * together by xtime_lock.
- */
-
-static inline unsigned int do_gettimeoffset_tsc(void)
-{
-	unsigned long t;
-	unsigned long x;
-	rdtscll_sync(&t);
-	if (t < vxtime.last_tsc) t = vxtime.last_tsc; /* hack */
-	x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32;
-	return x;
-}
-
-static inline unsigned int do_gettimeoffset_hpet(void)
-{
-	/* cap counter read to one tick to avoid inconsistencies */
-	unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last;
-	return (min(counter,hpet_tick) * vxtime.quot) >> 32;
-}
-
-unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
-
-/*
- * This version of gettimeofday() has microsecond resolution and better than
- * microsecond precision, as we're using at least a 10 MHz (usually 14.31818
- * MHz) HPET timer.
- */
-
-void do_gettimeofday(struct timeval *tv)
-{
-	unsigned long seq, t;
- 	unsigned int sec, usec;
-
-	do {
-		seq = read_seqbegin(&xtime_lock);
-
-		sec = xtime.tv_sec;
-		usec = xtime.tv_nsec / 1000;
-
-		/* i386 does some correction here to keep the clock 
-		   monotonous even when ntpd is fixing drift.
-		   But they didn't work for me, there is a non monotonic
-		   clock anyways with ntp.
-		   I dropped all corrections now until a real solution can
-		   be found. Note when you fix it here you need to do the same
-		   in arch/x86_64/kernel/vsyscall.c and export all needed
-		   variables in vmlinux.lds. -AK */ 
-
-		t = (jiffies - wall_jiffies) * (1000000L / HZ) +
-			do_gettimeoffset();
-		usec += t;
-
-	} while (read_seqretry(&xtime_lock, seq));
-
-	tv->tv_sec = sec + usec / 1000000;
-	tv->tv_usec = usec % 1000000;
-}
-
-EXPORT_SYMBOL(do_gettimeofday);
-
-/*
- * settimeofday() first undoes the correction that gettimeofday would do
- * on the time, and then saves it. This is ugly, but has been like this for
- * ages already.
- */
-
-int do_settimeofday(struct timespec *tv)
-{
-	time_t wtm_sec, sec = tv->tv_sec;
-	long wtm_nsec, nsec = tv->tv_nsec;
-
-	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
-		return -EINVAL;
-
-	write_seqlock_irq(&xtime_lock);
-
-	nsec -= do_gettimeoffset() * 1000 +
-		(jiffies - wall_jiffies) * (NSEC_PER_SEC/HZ);
-
-	wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
-	wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
-
-	set_normalized_timespec(&xtime, sec, nsec);
-	set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
-
-	ntp_clear();
-
-	write_sequnlock_irq(&xtime_lock);
-	clock_was_set();
-	return 0;
-}
-
-EXPORT_SYMBOL(do_settimeofday);
-
 unsigned long profile_pc(struct pt_regs *regs)
 {
 	unsigned long pc = instruction_pointer(regs);
@@ -283,90 +178,8 @@ static void set_rtc_mmss(unsigned long n
 	spin_unlock(&rtc_lock);
 }
 
-
-/* monotonic_clock(): returns # of nanoseconds passed since time_init()
- *		Note: This function is required to return accurate
- *		time even in the absence of multiple timer ticks.
- */
-unsigned long long monotonic_clock(void)
-{
-	unsigned long seq;
- 	u32 last_offset, this_offset, offset;
-	unsigned long long base;
-
-	if (vxtime.mode == VXTIME_HPET) {
-		do {
-			seq = read_seqbegin(&xtime_lock);
-
-			last_offset = vxtime.last;
-			base = monotonic_base;
-			this_offset = hpet_readl(HPET_COUNTER);
-
-		} while (read_seqretry(&xtime_lock, seq));
-		offset = (this_offset - last_offset);
-		offset *=(NSEC_PER_SEC/HZ)/hpet_tick;
-		return base + offset;
-	}else{
-		do {
-			seq = read_seqbegin(&xtime_lock);
-
-			last_offset = vxtime.last_tsc;
-			base = monotonic_base;
-		} while (read_seqretry(&xtime_lock, seq));
-		sync_core();
-		rdtscll(this_offset);
-		offset = (this_offset - last_offset)*1000/cpu_khz; 
-		return base + offset;
-	}
-
-
-}
-EXPORT_SYMBOL(monotonic_clock);
-
-static noinline void handle_lost_ticks(int lost, struct pt_regs *regs)
-{
-    static long lost_count;
-    static int warned;
-
-    if (report_lost_ticks) {
-	    printk(KERN_WARNING "time.c: Lost %d timer "
-		   "tick(s)! ", lost);
-	    print_symbol("rip %s)\n", regs->rip);
-    }
-
-    if (lost_count == 1000 && !warned) {
-	    printk(KERN_WARNING
-		   "warning: many lost ticks.\n"
-		   KERN_WARNING "Your time source seems to be instable or "
-		   		"some driver is hogging interupts\n");
-	    print_symbol("rip %s\n", regs->rip);
-	    if (vxtime.mode == VXTIME_TSC && vxtime.hpet_address) {
-		    printk(KERN_WARNING "Falling back to HPET\n");
-		    vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
-		    vxtime.mode = VXTIME_HPET;
-		    do_gettimeoffset = do_gettimeoffset_hpet;
-	    }
-	    /* else should fall back to PIT, but code missing. */
-	    warned = 1;
-    } else
-	    lost_count++;
-
-#ifdef CONFIG_CPU_FREQ
-    /* In some cases the CPU can change frequency without us noticing
-       (like going into thermal throttle)
-       Give cpufreq a change to catch up. */
-    if ((lost_count+1) % 25 == 0) {
-	    cpufreq_delayed_get();
-    }
-#endif
-}
-
 static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 {
-	static unsigned long rtc_update = 0;
-	unsigned long tsc;
-	int delay, offset = 0, lost = 0;
-
 /*
  * Here we are in the timer irq handler. We have irqs locally disabled (so we
  * don't need spin_lock_irqsave()) but we don't know if the timer_bh is running
@@ -376,67 +189,6 @@ static irqreturn_t timer_interrupt(int i
 
 	write_seqlock(&xtime_lock);
 
-	if (vxtime.hpet_address)
-		offset = hpet_readl(HPET_COUNTER);
-
-	if (hpet_use_timer) {
-		/* if we're using the hpet timer functionality,
-		 * we can more accurately know the counter value
-		 * when the timer interrupt occured.
-		 */
-		offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
-		delay = hpet_readl(HPET_COUNTER) - offset;
-	} else {
-		spin_lock(&i8253_lock);
-		outb_p(0x00, 0x43);
-		delay = inb_p(0x40);
-		delay |= inb(0x40) << 8;
-		spin_unlock(&i8253_lock);
-		delay = LATCH - 1 - delay;
-	}
-
-	rdtscll_sync(&tsc);
-
-	if (vxtime.mode == VXTIME_HPET) {
-		if (offset - vxtime.last > hpet_tick) {
-			lost = (offset - vxtime.last) / hpet_tick - 1;
-		}
-
-		monotonic_base += 
-			(offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick;
-
-		vxtime.last = offset;
-#ifdef CONFIG_X86_PM_TIMER
-	} else if (vxtime.mode == VXTIME_PMTMR) {
-		lost = pmtimer_mark_offset();
-#endif
-	} else {
-		offset = (((tsc - vxtime.last_tsc) *
-			   vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ);
-
-		if (offset < 0)
-			offset = 0;
-
-		if (offset > (USEC_PER_SEC / HZ)) {
-			lost = offset / (USEC_PER_SEC / HZ);
-			offset %= (USEC_PER_SEC / HZ);
-		}
-
-		monotonic_base += (tsc - vxtime.last_tsc)*1000000/cpu_khz ;
-
-		vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot;
-
-		if ((((tsc - vxtime.last_tsc) *
-		      vxtime.tsc_quot) >> 32) < offset)
-			vxtime.last_tsc = tsc -
-				(((long) offset << 32) / vxtime.tsc_quot) - 1;
-	}
-
-	if (lost > 0) {
-		handle_lost_ticks(lost, regs);
-		jiffies += lost;
-	}
-
 /*
  * Do the timer stuff.
  */
@@ -459,20 +211,6 @@ static irqreturn_t timer_interrupt(int i
 		smp_local_timer_interrupt(regs);
 #endif
 
-/*
- * If we have an externally synchronized Linux clock, then update CMOS clock
- * accordingly every ~11 minutes. set_rtc_mmss() will be called in the jiffy
- * closest to exactly 500 ms before the next second. If the update fails, we
- * don't care, as it'll be updated on the next turn, and the problem (time way
- * off) isn't likely to go away much sooner anyway.
- */
-
-	if (ntp_synced() && xtime.tv_sec > rtc_update &&
-		abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) {
-		set_rtc_mmss(xtime.tv_sec);
-		rtc_update = xtime.tv_sec + 660;
-	}
- 
 	write_sequnlock(&xtime_lock);
 
 	return IRQ_HANDLED;
@@ -573,6 +311,30 @@ unsigned long get_cmos_time(void)
 	return mktime(year, mon, day, hour, min, sec);
 }
 
+/* arch specific timeofday hooks */
+u64 read_persistent_clock(void)
+{
+	return (u64)get_cmos_time() * NSEC_PER_SEC;
+}
+
+void sync_persistent_clock(struct timespec ts)
+{
+	static unsigned long rtc_update = 0;
+	/*
+	 * If we have an externally synchronized Linux clock, then update
+	 * CMOS clock accordingly every ~11 minutes. set_rtc_mmss() will
+	 * be called in the jiffy closest to exactly 500 ms before the
+	 * next second. If the update fails, we don't care, as it'll be
+	 * updated on the next turn, and the problem (time way off) isn't
+	 * likely to go away much sooner anyway.
+	 */
+	if (ts.tv_sec > rtc_update &&
+		abs(ts.tv_nsec - 500000000) <= tick_nsec / 2) {
+		set_rtc_mmss(xtime.tv_sec);
+		rtc_update = xtime.tv_sec + 660;
+	}
+}
+
 #ifdef CONFIG_CPU_FREQ
 
 /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
@@ -600,23 +362,6 @@ static void handle_cpufreq_delayed_get(v
 	cpufreq_delayed_issched = 0;
 }
 
-/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
- * to verify the CPU frequency the timing core thinks the CPU is running
- * at is still correct.
- */
-static void cpufreq_delayed_get(void)
-{
-	static int warned;
-	if (cpufreq_init && !cpufreq_delayed_issched) {
-		cpufreq_delayed_issched = 1;
-		if (!warned) {
-			warned = 1;
-			printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
-		}
-		schedule_work(&cpufreq_delayed_get_work);
-	}
-}
-
 static unsigned int  ref_freq = 0;
 static unsigned long loops_per_jiffy_ref = 0;
 
@@ -651,8 +396,11 @@ static int time_cpufreq_notifier(struct 
 		cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
 
 		cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
-		if (!(freq->flags & CPUFREQ_CONST_LOOPS))
+		if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
 			vxtime.tsc_quot = (1000L << 32) / cpu_khz;
+			tsc_khz = cpu_khz;
+		}
+
 	}
 	
 	set_cyc2ns_scale(cpu_khz_ref / 1000);
@@ -916,18 +664,12 @@ void __init time_init(void)
 	if (hpet_use_timer) {
 		cpu_khz = hpet_calibrate_tsc();
 		timename = "HPET";
-#ifdef CONFIG_X86_PM_TIMER
-	} else if (pmtmr_ioport) {
-		vxtime_hz = PM_TIMER_FREQUENCY;
-		timename = "PM";
-		pit_init();
-		cpu_khz = pit_calibrate_tsc();
-#endif
 	} else {
 		pit_init();
 		cpu_khz = pit_calibrate_tsc();
 		timename = "PIT";
 	}
+	tsc_khz = cpu_khz;
 
 	printk(KERN_INFO "time.c: Using %ld.%06ld MHz %s timer.\n",
 	       vxtime_hz / 1000000, vxtime_hz % 1000000, timename);
@@ -969,31 +711,8 @@ static __init int unsynchronized_tsc(voi
  */
 void __init time_init_gtod(void)
 {
-	char *timetype;
-
 	if (unsynchronized_tsc())
 		notsc = 1;
-	if (vxtime.hpet_address && notsc) {
-		timetype = hpet_use_timer ? "HPET" : "PIT/HPET";
-		vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
-		vxtime.mode = VXTIME_HPET;
-		do_gettimeoffset = do_gettimeoffset_hpet;
-#ifdef CONFIG_X86_PM_TIMER
-	/* Using PM for gettimeofday is quite slow, but we have no other
-	   choice because the TSC is too unreliable on some systems. */
-	} else if (pmtmr_ioport && !vxtime.hpet_address && notsc) {
-		timetype = "PM";
-		do_gettimeoffset = do_gettimeoffset_pm;
-		vxtime.mode = VXTIME_PMTMR;
-		sysctl_vsyscall = 0;
-		printk(KERN_INFO "Disabling vsyscall due to use of PM timer\n");
-#endif
-	} else {
-		timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC";
-		vxtime.mode = VXTIME_TSC;
-	}
-
-	printk(KERN_INFO "time.c: Using %s based timekeeping.\n", timetype);
 }
 
 __setup("report_lost_ticks", time_setup);
@@ -1016,7 +735,6 @@ static int timer_suspend(struct sys_devi
 
 static int timer_resume(struct sys_device *dev)
 {
-	unsigned long flags;
 	unsigned long sec;
 	unsigned long ctime = get_cmos_time();
 	unsigned long sleep_length = (ctime - sleep_start) * HZ;
@@ -1027,10 +745,6 @@ static int timer_resume(struct sys_devic
 		i8254_timer_resume();
 
 	sec = ctime + clock_cmos_diff;
-	write_seqlock_irqsave(&xtime_lock,flags);
-	xtime.tv_sec = sec;
-	xtime.tv_nsec = 0;
-	write_sequnlock_irqrestore(&xtime_lock,flags);
 	jiffies += sleep_length;
 	wall_jiffies += sleep_length;
 	touch_softlockup_watchdog();
@@ -1305,3 +1019,93 @@ static int __init notsc_setup(char *s)
 __setup("notsc", notsc_setup);
 
 
+/* Code to compensate for TSC C3 stalls */
+static u64 tsc_c3_offset;
+static int tsc_unstable;
+static inline int check_tsc_unstable(void)
+{
+	return tsc_unstable;
+}
+static inline void mark_tsc_unstable(void)
+{
+	tsc_unstable = 1;;
+}
+
+void tsc_c3_compensate(unsigned long nsecs)
+{
+	u64 cycles = ((u64)nsecs * tsc_khz)/1000000;
+	tsc_c3_offset += cycles;
+}
+
+static inline u64 tsc_read_c3_time(void)
+{
+	return tsc_c3_offset;
+}
+
+
+/* Clock source code */
+#include <linux/clocksource.h>
+
+static unsigned long current_tsc_khz = 0;
+static cycle_t read_tsc_c3(void);
+static int tsc_update_callback(void);
+
+static struct clocksource clocksource_tsc = {
+	.name = "tsc",
+	.rating = 300,
+	.type = CLOCKSOURCE_CYCLES,
+	.mask = (cycle_t)-1,
+	.mult = 0, /* to be set */
+	.shift = 22,
+	.update_callback = tsc_update_callback,
+	.is_continuous = 1,
+};
+
+static cycle_t read_tsc_c3(void)
+{
+	cycle_t ret;
+	rdtscll(ret);
+	return ret + tsc_read_c3_time();
+}
+
+static int tsc_update_callback(void)
+{
+	int change = 0;
+	/* check to see if we should switch to the safe clocksource */
+	if (tsc_read_c3_time() &&
+		strncmp(clocksource_tsc.name, "c3tsc", 5)) {
+		printk("Falling back to C3 safe TSC\n");
+		clocksource_tsc.read_fnct = read_tsc_c3;
+		clocksource_tsc.type = CLOCKSOURCE_FUNCTION;
+		clocksource_tsc.name = "c3tsc";
+		change = 1;
+	}
+
+	if (clocksource_tsc.rating != 50 && check_tsc_unstable()) {
+		clocksource_tsc.rating = 50;
+		reselect_clocksource();
+		change = 1;
+	}
+	/* only update if tsc_khz has changed */
+	if (current_tsc_khz != tsc_khz){
+		current_tsc_khz = tsc_khz;
+		clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
+							clocksource_tsc.shift);
+		change = 1;
+	}
+	return change;
+}
+
+static int __init init_tsc_clocksource(void)
+{
+	if (!notsc && tsc_khz) {
+		current_tsc_khz = tsc_khz;
+		clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
+							clocksource_tsc.shift);
+		register_clocksource(&clocksource_tsc);
+	}
+	return 0;
+}
+
+module_init(init_tsc_clocksource);
+
Index: linux-2.6.14/arch/xtensa/kernel/time.c
===================================================================
--- linux-2.6.14.orig/arch/xtensa/kernel/time.c
+++ linux-2.6.14/arch/xtensa/kernel/time.c
@@ -29,9 +29,6 @@
 
 extern volatile unsigned long wall_jiffies;
 
-u64 jiffies_64 = INITIAL_JIFFIES;
-EXPORT_SYMBOL(jiffies_64);
-
 spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
 EXPORT_SYMBOL(rtc_lock);
 
Index: linux-2.6.14/kernel/timer.c
===================================================================
--- linux-2.6.14.orig/kernel/timer.c
+++ linux-2.6.14/kernel/timer.c
@@ -28,7 +28,7 @@
 #include <linux/swap.h>
 #include <linux/notifier.h>
 #include <linux/thread_info.h>
-#include <linux/time.h>
+#include <linux/timeofday.h>
 #include <linux/jiffies.h>
 #include <linux/posix-timers.h>
 #include <linux/cpu.h>
@@ -46,6 +46,10 @@ static void time_interpolator_update(lon
 #define time_interpolator_update(x)
 #endif
 
+u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
+
+EXPORT_SYMBOL(jiffies_64);
+
 /*
  * per-CPU timer vector definitions:
  */
@@ -613,13 +617,94 @@ long time_tolerance = MAXFREQ;		/* frequ
 long time_precision = 1;		/* clock precision (us)		*/
 long time_maxerror = NTP_PHASE_LIMIT;	/* maximum error (us)		*/
 long time_esterror = NTP_PHASE_LIMIT;	/* estimated error (us)		*/
-static long time_phase;			/* phase offset (scaled us)	*/
 long time_freq = (((NSEC_PER_SEC + HZ/2) % HZ - HZ/2) << SHIFT_USEC) / NSEC_PER_USEC;
 					/* frequency offset (scaled ppm)*/
 static long time_adj;			/* tick adjust (scaled 1 / HZ)	*/
 long time_reftime;			/* time at last adjustment (s)	*/
 long time_adjust;
 long time_next_adjust;
+long time_adjust_step;	/* per tick time_adjust step */
+
+long total_sppm;	/* shifted ppm sum of all NTP adjustments */
+long offset_adj_ppm;
+long tick_adj_ppm;
+long singleshot_adj_ppm;
+
+#define MAX_SINGLESHOT_ADJ 500 /* (ppm) */
+#define SEC_PER_DAY 86400
+#define END_OF_DAY(x) (x + SEC_PER_DAY - (x % SEC_PER_DAY) - 1)
+
+/* NTP lock, protects NTP state machine */
+seqlock_t ntp_lock = SEQLOCK_UNLOCKED;
+
+/**
+ * ntp_leapsecond - NTP leapsecond processing code.
+ * now: the current time
+ *
+ * Returns the number of seconds (-1, 0, or 1) that
+ * should be added to the current time to properly
+ * adjust for leapseconds.
+ */
+
+int ntp_leapsecond(struct timespec now)
+{
+	unsigned long flags;
+	/*
+	 * Leap second processing. If in leap-insert state at
+	 * the end of the day, the system clock is set back one
+	 * second; if in leap-delete state, the system clock is
+	 * set ahead one second.
+	 */
+	static time_t leaptime = 0;
+	int ret = 0;
+
+	write_seqlock_irqsave(&ntp_lock, flags);
+	switch (time_state) {
+
+	case TIME_OK:
+		if (time_status & STA_INS) {
+			time_state = TIME_INS;
+			leaptime = END_OF_DAY(now.tv_sec);
+		} else if (time_status & STA_DEL) {
+			time_state = TIME_DEL;
+			leaptime = END_OF_DAY(now.tv_sec);
+		}
+		break;
+
+	case TIME_INS:
+		/* Once we are at (or past) leaptime, insert the second */
+		if (now.tv_sec >= leaptime) {
+			time_state = TIME_OOP;
+			printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
+			ret = -1;
+		}
+		break;
+
+	case TIME_DEL:
+		/* Once we are at (or past) leaptime, delete the second */
+		if (now.tv_sec >= leaptime) {
+			time_state = TIME_WAIT;
+			printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
+			ret = 1;
+		}
+		break;
+
+	case TIME_OOP:
+		/*  Wait for the end of the leap second*/
+		if (now.tv_sec > (leaptime + 1))
+			time_state = TIME_WAIT;
+		time_state = TIME_WAIT;
+		break;
+
+	case TIME_WAIT:
+		if (!(time_status & (STA_INS | STA_DEL)))
+			time_state = TIME_OK;
+		break;
+	}
+
+	write_sequnlock_irqrestore(&ntp_lock, flags);
+	return 0;
+}
 
 /*
  * this routine handles the overflow of the microsecond field
@@ -642,59 +727,6 @@ static void second_overflow(void)
     }
 
     /*
-     * Leap second processing. If in leap-insert state at
-     * the end of the day, the system clock is set back one
-     * second; if in leap-delete state, the system clock is
-     * set ahead one second. The microtime() routine or
-     * external clock driver will insure that reported time
-     * is always monotonic. The ugly divides should be
-     * replaced.
-     */
-    switch (time_state) {
-
-    case TIME_OK:
-	if (time_status & STA_INS)
-	    time_state = TIME_INS;
-	else if (time_status & STA_DEL)
-	    time_state = TIME_DEL;
-	break;
-
-    case TIME_INS:
-	if (xtime.tv_sec % 86400 == 0) {
-	    xtime.tv_sec--;
-	    wall_to_monotonic.tv_sec++;
-	    /* The timer interpolator will make time change gradually instead
-	     * of an immediate jump by one second.
-	     */
-	    time_interpolator_update(-NSEC_PER_SEC);
-	    time_state = TIME_OOP;
-	    clock_was_set();
-	    printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
-	}
-	break;
-
-    case TIME_DEL:
-	if ((xtime.tv_sec + 1) % 86400 == 0) {
-	    xtime.tv_sec++;
-	    wall_to_monotonic.tv_sec--;
-	    /* Use of time interpolator for a gradual change of time */
-	    time_interpolator_update(NSEC_PER_SEC);
-	    time_state = TIME_WAIT;
-	    clock_was_set();
-	    printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
-	}
-	break;
-
-    case TIME_OOP:
-	time_state = TIME_WAIT;
-	break;
-
-    case TIME_WAIT:
-	if (!(time_status & (STA_INS | STA_DEL)))
-	    time_state = TIME_OK;
-    }
-
-    /*
      * Compute the phase adjustment for the next second. In
      * PLL mode, the offset is reduced by a fixed factor
      * times the time constant. In FLL mode the offset is
@@ -703,23 +735,20 @@ static void second_overflow(void)
      * the adjustment over not more than the number of
      * seconds between updates.
      */
-    if (time_offset < 0) {
-	ltemp = -time_offset;
-	if (!(time_status & STA_FLL))
-	    ltemp >>= SHIFT_KG + time_constant;
-	if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
-	    ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
-	time_offset += ltemp;
-	time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
-    } else {
 	ltemp = time_offset;
 	if (!(time_status & STA_FLL))
-	    ltemp >>= SHIFT_KG + time_constant;
-	if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
-	    ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
+		ltemp = shift_right(ltemp, SHIFT_KG + time_constant);
+	ltemp = min(ltemp, (MAXPHASE / MINSEC) << SHIFT_UPDATE);
+	ltemp = max(ltemp, -(MAXPHASE / MINSEC) << SHIFT_UPDATE);
 	time_offset -= ltemp;
 	time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
-    }
+
+	offset_adj_ppm = shift_right(ltemp, SHIFT_UPDATE); /* ppm */
+
+	/* first calculate usec/user_tick offset */
+	tick_adj_ppm = ((USEC_PER_SEC + USER_HZ/2)/USER_HZ) - tick_usec;
+	/* multiply by user_hz to get usec/sec => ppm */
+	tick_adj_ppm *= USER_HZ;
 
     /*
      * Compute the frequency estimate and additional phase
@@ -736,82 +765,90 @@ static void second_overflow(void)
 			 STA_PPSWANDER | STA_PPSERROR);
     }
     ltemp = time_freq + pps_freq;
-    if (ltemp < 0)
-	time_adj -= -ltemp >>
-	    (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
-    else
-	time_adj += ltemp >>
-	    (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
+    time_adj += shift_right(ltemp,(SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE));
 
 #if HZ == 100
     /* Compensate for (HZ==100) != (1 << SHIFT_HZ).
      * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)
      */
-    if (time_adj < 0)
-	time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
-    else
-	time_adj += (time_adj >> 2) + (time_adj >> 5);
+    time_adj += shift_right(time_adj, 2) + shift_right(time_adj, 5);
 #endif
 #if HZ == 1000
     /* Compensate for (HZ==1000) != (1 << SHIFT_HZ).
      * Add 1.5625% and 0.78125% to get 1023.4375; => only 0.05% error (p. 14)
      */
-    if (time_adj < 0)
-	time_adj -= (-time_adj >> 6) + (-time_adj >> 7);
-    else
-	time_adj += (time_adj >> 6) + (time_adj >> 7);
+    time_adj += shift_right(time_adj, 6) + shift_right(time_adj, 7);
 #endif
 }
 
-/* in the NTP reference this is called "hardclock()" */
-static void update_wall_time_one_tick(void)
+
+/**
+ * ntp_get_ppm_adjustment - Returns Shifted PPM adjustment
+ *
+ */
+long ntp_get_ppm_adjustment(void)
+{
+	return total_sppm;
+}
+
+/**
+ * ntp_advance() - increments the NTP state machine
+ *
+ */
+void ntp_advance(unsigned long interval_ns)
 {
-	long time_adjust_step, delta_nsec;
+	static unsigned long interval_sum;
+	unsigned long flags;
+	write_seqlock_irqsave(&ntp_lock, flags);
 
-	if ( (time_adjust_step = time_adjust) != 0 ) {
-	    /* We are doing an adjtime thing. 
-	     *
-	     * Prepare time_adjust_step to be within bounds.
-	     * Note that a positive time_adjust means we want the clock
-	     * to run faster.
-	     *
-	     * Limit the amount of the step to be in the range
-	     * -tickadj .. +tickadj
-	     */
-	     if (time_adjust > tickadj)
-		time_adjust_step = tickadj;
-	     else if (time_adjust < -tickadj)
-		time_adjust_step = -tickadj;
+	/* increment the interval sum */
+	interval_sum += interval_ns;
 
-	    /* Reduce by this step the amount of time left  */
-	    time_adjust -= time_adjust_step;
-	}
-	delta_nsec = tick_nsec + time_adjust_step * 1000;
-	/*
-	 * Advance the phase, once it gets to one microsecond, then
-	 * advance the tick more.
-	 */
-	time_phase += time_adj;
-	if (time_phase <= -FINENSEC) {
-		long ltemp = -time_phase >> (SHIFT_SCALE - 10);
-		time_phase += ltemp << (SHIFT_SCALE - 10);
-		delta_nsec -= ltemp;
-	}
-	else if (time_phase >= FINENSEC) {
-		long ltemp = time_phase >> (SHIFT_SCALE - 10);
-		time_phase -= ltemp << (SHIFT_SCALE - 10);
-		delta_nsec += ltemp;
+	/* calculate the per tick singleshot adjtime adjustment step */
+	while (interval_ns >= tick_nsec) {
+		time_adjust_step = time_adjust;
+		if (time_adjust_step) {
+	    	/* We are doing an adjtime thing.
+		     *
+		     * Prepare time_adjust_step to be within bounds.
+		     * Note that a positive time_adjust means we want the clock
+	    	 * to run faster.
+		     *
+	    	 * Limit the amount of the step to be in the range
+		     * -tickadj .. +tickadj
+		     */
+	    	 time_adjust_step = min(time_adjust_step, (long)tickadj);
+		     time_adjust_step = max(time_adjust_step, (long)-tickadj);
+
+		    /* Reduce by this step the amount of time left  */
+	    	time_adjust -= time_adjust_step;
+		}
+		interval_ns -= tick_nsec;
 	}
-	xtime.tv_nsec += delta_nsec;
-	time_interpolator_update(delta_nsec);
+	singleshot_adj_ppm = time_adjust_step*(1000000/HZ); /* usec/tick => ppm */
 
 	/* Changes by adjtime() do not take effect till next tick. */
 	if (time_next_adjust != 0) {
 		time_adjust = time_next_adjust;
 		time_next_adjust = 0;
 	}
+
+	while (interval_sum >= NSEC_PER_SEC) {
+		interval_sum -= NSEC_PER_SEC;
+		second_overflow();
+	}
+
+	/* calculate the total continuous ppm adjustment */
+	total_sppm = time_freq; /* already shifted by SHIFT_USEC */
+	total_sppm += offset_adj_ppm << SHIFT_USEC;
+	total_sppm += tick_adj_ppm << SHIFT_USEC;
+	total_sppm += singleshot_adj_ppm << SHIFT_USEC;
+
+	write_sequnlock_irqrestore(&ntp_lock, flags);
+
 }
 
+#ifndef CONFIG_GENERIC_TIME
 /*
  * Using a loop looks inefficient, but "ticks" is
  * usually just one (we shouldn't be losing ticks,
@@ -821,16 +858,51 @@ static void update_wall_time_one_tick(vo
  */
 static void update_wall_time(unsigned long ticks)
 {
+	long delta_nsec;
+	static long time_phase; /* phase offset (scaled us)	*/
+
 	do {
 		ticks--;
-		update_wall_time_one_tick();
-		if (xtime.tv_nsec >= 1000000000) {
-			xtime.tv_nsec -= 1000000000;
+
+		/* Calculate the nsec delta using the
+		 * precomputed NTP adjustments:
+		 *     tick_nsec, time_adjust_step, time_adj
+		 */
+		delta_nsec = tick_nsec + time_adjust_step * 1000;
+		/*
+		 * Advance the phase, once it gets to one microsecond, then
+		 * advance the tick more.
+		 */
+		time_phase += time_adj;
+		if ((time_phase >= FINENSEC) || (time_phase <= -FINENSEC)) {
+			long ltemp = shift_right(time_phase, (SHIFT_SCALE - 10));
+			time_phase -= ltemp << (SHIFT_SCALE - 10);
+			delta_nsec += ltemp;
+		}
+
+		xtime.tv_nsec += delta_nsec;
+		if (xtime.tv_nsec >= NSEC_PER_SEC) {
+			int leapsecond;
+			xtime.tv_nsec -= NSEC_PER_SEC;
 			xtime.tv_sec++;
-			second_overflow();
+			/* process leapsecond */
+			leapsecond = ntp_leapsecond(xtime);
+			if (leapsecond) {
+				xtime.tv_sec += leapsecond;
+				wall_to_monotonic.tv_sec -= leapsecond;
+				/* Use of time interpolator for a gradual change of time */
+				time_interpolator_update(leapsecond*NSEC_PER_SEC);
+				clock_was_set();
+			}
 		}
+		ntp_advance(tick_nsec);
+		time_interpolator_update(delta_nsec);
+
 	} while (ticks);
 }
+#else /* !CONFIG_GENERIC_TIME */
+#define update_wall_time(x)
+#endif /* !CONFIG_GENERIC_TIME */
 
 /*
  * Called from the timer interrupt handler to charge one tick to the current 
@@ -912,6 +984,7 @@ static void run_timer_softirq(struct sof
 {
 	tvec_base_t *base = &__get_cpu_var(tvec_bases);
 
+ 	ktimer_run_queues();
 	if (time_after_eq(jiffies, base->timer_jiffies))
 		__run_timers(base);
 }
@@ -1177,62 +1250,6 @@ asmlinkage long sys_gettid(void)
 	return current->pid;
 }
 
-static long __sched nanosleep_restart(struct restart_block *restart)
-{
-	unsigned long expire = restart->arg0, now = jiffies;
-	struct timespec __user *rmtp = (struct timespec __user *) restart->arg1;
-	long ret;
-
-	/* Did it expire while we handled signals? */
-	if (!time_after(expire, now))
-		return 0;
-
-	expire = schedule_timeout_interruptible(expire - now);
-
-	ret = 0;
-	if (expire) {
-		struct timespec t;
-		jiffies_to_timespec(expire, &t);
-
-		ret = -ERESTART_RESTARTBLOCK;
-		if (rmtp && copy_to_user(rmtp, &t, sizeof(t)))
-			ret = -EFAULT;
-		/* The 'restart' block is already filled in */
-	}
-	return ret;
-}
-
-asmlinkage long sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp)
-{
-	struct timespec t;
-	unsigned long expire;
-	long ret;
-
-	if (copy_from_user(&t, rqtp, sizeof(t)))
-		return -EFAULT;
-
-	if ((t.tv_nsec >= 1000000000L) || (t.tv_nsec < 0) || (t.tv_sec < 0))
-		return -EINVAL;
-
-	expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
-	expire = schedule_timeout_interruptible(expire);
-
-	ret = 0;
-	if (expire) {
-		struct restart_block *restart;
-		jiffies_to_timespec(expire, &t);
-		if (rmtp && copy_to_user(rmtp, &t, sizeof(t)))
-			return -EFAULT;
-
-		restart = &current_thread_info()->restart_block;
-		restart->fn = nanosleep_restart;
-		restart->arg0 = jiffies + expire;
-		restart->arg1 = (unsigned long) rmtp;
-		ret = -ERESTART_RESTARTBLOCK;
-	}
-	return ret;
-}
-
 /*
  * sys_sysinfo - fill in sysinfo struct
  */ 
Index: linux-2.6.14/fs/exec.c
===================================================================
--- linux-2.6.14.orig/fs/exec.c
+++ linux-2.6.14/fs/exec.c
@@ -645,9 +645,12 @@ static inline int de_thread(struct task_
 		 * synchronize with any firing (by calling del_timer_sync)
 		 * before we can safely let the old group leader die.
 		 */
-		sig->real_timer.data = (unsigned long)current;
-		if (del_timer_sync(&sig->real_timer))
-			add_timer(&sig->real_timer);
+		sig->real_timer.data = current;
+		spin_unlock_irq(lock);
+		if (ktimer_cancel(&sig->real_timer))
+			ktimer_start(&sig->real_timer, NULL,
+				     KTIMER_RESTART|KTIMER_NOCHECK);
+		spin_lock_irq(lock);
 	}
 	while (atomic_read(&sig->count) > count) {
 		sig->group_exit_task = current;
@@ -659,7 +662,6 @@ static inline int de_thread(struct task_
 	}
 	sig->group_exit_task = NULL;
 	sig->notify_count = 0;
-	sig->real_timer.data = (unsigned long)current;
 	spin_unlock_irq(lock);
 
 	/*
Index: linux-2.6.14/kernel/posix-timers.c
===================================================================
--- linux-2.6.14.orig/kernel/posix-timers.c
+++ linux-2.6.14/kernel/posix-timers.c
@@ -34,7 +34,7 @@
 #include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
-#include <linux/time.h>
+#include <linux/timeofday.h>
 
 #include <asm/uaccess.h>
 #include <asm/semaphore.h>
@@ -48,21 +48,6 @@
 #include <linux/workqueue.h>
 #include <linux/module.h>
 
-#ifndef div_long_long_rem
-#include <asm/div64.h>
-
-#define div_long_long_rem(dividend,divisor,remainder) ({ \
-		       u64 result = dividend;		\
-		       *remainder = do_div(result,divisor); \
-		       result; })
-
-#endif
-#define CLOCK_REALTIME_RES TICK_NSEC  /* In nano seconds. */
-
-static inline u64  mpy_l_X_l_ll(unsigned long mpy1,unsigned long mpy2)
-{
-	return (u64)mpy1 * mpy2;
-}
 /*
  * Management arrays for POSIX timers.	 Timers are kept in slab memory
  * Timer ids are allocated by an external routine that keeps track of the
@@ -148,18 +133,18 @@ static DEFINE_SPINLOCK(idr_lock);
  */
 
 static struct k_clock posix_clocks[MAX_CLOCKS];
+
 /*
- * We only have one real clock that can be set so we need only one abs list,
- * even if we should want to have several clocks with differing resolutions.
+ * These ones are defined below.
  */
-static struct k_clock_abs abs_list = {.list = LIST_HEAD_INIT(abs_list.list),
-				      .lock = SPIN_LOCK_UNLOCKED};
+static int common_nsleep(clockid_t, int flags, struct timespec *t,
+			 struct timespec __user *rmtp);
+static void common_timer_get(struct k_itimer *, struct itimerspec *);
+static int common_timer_set(struct k_itimer *, int,
+			    struct itimerspec *, struct itimerspec *);
+static int common_timer_del(struct k_itimer *timer);
 
-static void posix_timer_fn(unsigned long);
-static u64 do_posix_clock_monotonic_gettime_parts(
-	struct timespec *tp, struct timespec *mo);
-int do_posix_clock_monotonic_gettime(struct timespec *tp);
-static int do_posix_clock_monotonic_get(clockid_t, struct timespec *tp);
+static void posix_timer_fn(void *data);
 
 static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags);
 
@@ -205,21 +190,25 @@ static inline int common_clock_set(clock
 
 static inline int common_timer_create(struct k_itimer *new_timer)
 {
-	INIT_LIST_HEAD(&new_timer->it.real.abs_timer_entry);
-	init_timer(&new_timer->it.real.timer);
-	new_timer->it.real.timer.data = (unsigned long) new_timer;
+	return -EINVAL;
+}
+
+static int timer_create_mono(struct k_itimer *new_timer)
+{
+	ktimer_init(&new_timer->it.real.timer);
+	new_timer->it.real.timer.data = new_timer;
+	new_timer->it.real.timer.function = posix_timer_fn;
+	return 0;
+}
+
+static int timer_create_real(struct k_itimer *new_timer)
+{
+	ktimer_init_real(&new_timer->it.real.timer);
+	new_timer->it.real.timer.data = new_timer;
 	new_timer->it.real.timer.function = posix_timer_fn;
 	return 0;
 }
 
-/*
- * These ones are defined below.
- */
-static int common_nsleep(clockid_t, int flags, struct timespec *t);
-static void common_timer_get(struct k_itimer *, struct itimerspec *);
-static int common_timer_set(struct k_itimer *, int,
-			    struct itimerspec *, struct itimerspec *);
-static int common_timer_del(struct k_itimer *timer);
 
 /*
  * Return nonzero iff we know a priori this clockid_t value is bogus.
@@ -239,19 +228,44 @@ static inline int invalid_clockid(clocki
 	return 1;
 }
 
+/*
+ * Get real time for posix timers
+ */
+static int posix_ktime_get_real_ts(clockid_t which_clock, struct timespec *tp)
+{
+	ktime_get_real_ts(tp);
+	return 0;
+}
+
+/*
+ * Get monotonic time for posix timers
+ */
+static int posix_ktime_get_ts(clockid_t which_clock, struct timespec *tp)
+{
+	ktime_get_ts(tp);
+	return 0;
+}
+
+void do_posix_clock_monotonic_gettime(struct timespec *ts)
+{
+	ktime_get_ts(ts);
+}
 
 /*
  * Initialize everything, well, just everything in Posix clocks/timers ;)
  */
 static __init int init_posix_timers(void)
 {
-	struct k_clock clock_realtime = {.res = CLOCK_REALTIME_RES,
-					 .abs_struct = &abs_list
+	struct k_clock clock_realtime = {
+		.clock_getres = ktimer_get_res_real,
+		.clock_get = posix_ktime_get_real_ts,
+		.timer_create = timer_create_real,
 	};
-	struct k_clock clock_monotonic = {.res = CLOCK_REALTIME_RES,
-		.abs_struct = NULL,
-		.clock_get = do_posix_clock_monotonic_get,
-		.clock_set = do_posix_clock_nosettime
+	struct k_clock clock_monotonic = {
+		.clock_getres = ktimer_get_res,
+		.clock_get = posix_ktime_get_ts,
+		.clock_set = do_posix_clock_nosettime,
+		.timer_create = timer_create_mono,
 	};
 
 	register_posix_clock(CLOCK_REALTIME, &clock_realtime);
@@ -265,117 +279,17 @@ static __init int init_posix_timers(void
 
 __initcall(init_posix_timers);
 
-static void tstojiffie(struct timespec *tp, int res, u64 *jiff)
-{
-	long sec = tp->tv_sec;
-	long nsec = tp->tv_nsec + res - 1;
-
-	if (nsec > NSEC_PER_SEC) {
-		sec++;
-		nsec -= NSEC_PER_SEC;
-	}
-
-	/*
-	 * The scaling constants are defined in <linux/time.h>
-	 * The difference between there and here is that we do the
-	 * res rounding and compute a 64-bit result (well so does that
-	 * but it then throws away the high bits).
-  	 */
-	*jiff =  (mpy_l_X_l_ll(sec, SEC_CONVERSION) +
-		  (mpy_l_X_l_ll(nsec, NSEC_CONVERSION) >> 
-		   (NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
-}
-
-/*
- * This function adjusts the timer as needed as a result of the clock
- * being set.  It should only be called for absolute timers, and then
- * under the abs_list lock.  It computes the time difference and sets
- * the new jiffies value in the timer.  It also updates the timers
- * reference wall_to_monotonic value.  It is complicated by the fact
- * that tstojiffies() only handles positive times and it needs to work
- * with both positive and negative times.  Also, for negative offsets,
- * we need to defeat the res round up.
- *
- * Return is true if there is a new time, else false.
- */
-static long add_clockset_delta(struct k_itimer *timr,
-			       struct timespec *new_wall_to)
-{
-	struct timespec delta;
-	int sign = 0;
-	u64 exp;
-
-	set_normalized_timespec(&delta,
-				new_wall_to->tv_sec -
-				timr->it.real.wall_to_prev.tv_sec,
-				new_wall_to->tv_nsec -
-				timr->it.real.wall_to_prev.tv_nsec);
-	if (likely(!(delta.tv_sec | delta.tv_nsec)))
-		return 0;
-	if (delta.tv_sec < 0) {
-		set_normalized_timespec(&delta,
-					-delta.tv_sec,
-					1 - delta.tv_nsec -
-					posix_clocks[timr->it_clock].res);
-		sign++;
-	}
-	tstojiffie(&delta, posix_clocks[timr->it_clock].res, &exp);
-	timr->it.real.wall_to_prev = *new_wall_to;
-	timr->it.real.timer.expires += (sign ? -exp : exp);
-	return 1;
-}
-
-static void remove_from_abslist(struct k_itimer *timr)
-{
-	if (!list_empty(&timr->it.real.abs_timer_entry)) {
-		spin_lock(&abs_list.lock);
-		list_del_init(&timr->it.real.abs_timer_entry);
-		spin_unlock(&abs_list.lock);
-	}
-}
 
 static void schedule_next_timer(struct k_itimer *timr)
 {
-	struct timespec new_wall_to;
-	struct now_struct now;
-	unsigned long seq;
-
-	/*
-	 * Set up the timer for the next interval (if there is one).
-	 * Note: this code uses the abs_timer_lock to protect
-	 * it.real.wall_to_prev and must hold it until exp is set, not exactly
-	 * obvious...
-
-	 * This function is used for CLOCK_REALTIME* and
-	 * CLOCK_MONOTONIC* timers.  If we ever want to handle other
-	 * CLOCKs, the calling code (do_schedule_next_timer) would need
-	 * to pull the "clock" info from the timer and dispatch the
-	 * "other" CLOCKs "next timer" code (which, I suppose should
-	 * also be added to the k_clock structure).
-	 */
-	if (!timr->it.real.incr)
+	if (ktime_cmp_val(timr->it.real.incr, ==, KTIME_ZERO))
 		return;
 
-	do {
-		seq = read_seqbegin(&xtime_lock);
-		new_wall_to =	wall_to_monotonic;
-		posix_get_now(&now);
-	} while (read_seqretry(&xtime_lock, seq));
-
-	if (!list_empty(&timr->it.real.abs_timer_entry)) {
-		spin_lock(&abs_list.lock);
-		add_clockset_delta(timr, &new_wall_to);
-
-		posix_bump_timer(timr, now);
-
-		spin_unlock(&abs_list.lock);
-	} else {
-		posix_bump_timer(timr, now);
-	}
-	timr->it_overrun_last = timr->it_overrun;
-	timr->it_overrun = -1;
+	timr->it_overrun_last = timr->it.real.overrun;
+	timr->it.real.overrun = timr->it.real.timer.overrun = -1;
 	++timr->it_requeue_pending;
-	add_timer(&timr->it.real.timer);
+	ktimer_start(&timr->it.real.timer, &timr->it.real.incr, KTIMER_FORWARD);
+	timr->it.real.overrun = timr->it.real.timer.overrun;
 }
 
 /*
@@ -413,14 +327,7 @@ int posix_timer_event(struct k_itimer *t
 {
 	memset(&timr->sigq->info, 0, sizeof(siginfo_t));
 	timr->sigq->info.si_sys_private = si_private;
-	/*
-	 * Send signal to the process that owns this timer.
-
-	 * This code assumes that all the possible abs_lists share the
-	 * same lock (there is only one list at this time). If this is
-	 * not the case, the CLOCK info would need to be used to find
-	 * the proper abs list lock.
-	 */
+	/* Send signal to the process that owns this timer.*/
 
 	timr->sigq->info.si_signo = timr->it_sigev_signo;
 	timr->sigq->info.si_errno = 0;
@@ -454,65 +361,28 @@ EXPORT_SYMBOL_GPL(posix_timer_event);
 
  * This code is for CLOCK_REALTIME* and CLOCK_MONOTONIC* timers.
  */
-static void posix_timer_fn(unsigned long __data)
+static void posix_timer_fn(void *data)
 {
-	struct k_itimer *timr = (struct k_itimer *) __data;
+	struct k_itimer *timr = data;
 	unsigned long flags;
-	unsigned long seq;
-	struct timespec delta, new_wall_to;
-	u64 exp = 0;
-	int do_notify = 1;
+	int si_private = 0;
 
 	spin_lock_irqsave(&timr->it_lock, flags);
-	if (!list_empty(&timr->it.real.abs_timer_entry)) {
-		spin_lock(&abs_list.lock);
-		do {
-			seq = read_seqbegin(&xtime_lock);
-			new_wall_to =	wall_to_monotonic;
-		} while (read_seqretry(&xtime_lock, seq));
-		set_normalized_timespec(&delta,
-					new_wall_to.tv_sec -
-					timr->it.real.wall_to_prev.tv_sec,
-					new_wall_to.tv_nsec -
-					timr->it.real.wall_to_prev.tv_nsec);
-		if (likely((delta.tv_sec | delta.tv_nsec ) == 0)) {
-			/* do nothing, timer is on time */
-		} else if (delta.tv_sec < 0) {
-			/* do nothing, timer is already late */
-		} else {
-			/* timer is early due to a clock set */
-			tstojiffie(&delta,
-				   posix_clocks[timr->it_clock].res,
-				   &exp);
-			timr->it.real.wall_to_prev = new_wall_to;
-			timr->it.real.timer.expires += exp;
-			add_timer(&timr->it.real.timer);
-			do_notify = 0;
-		}
-		spin_unlock(&abs_list.lock);
 
-	}
-	if (do_notify)  {
-		int si_private=0;
+	if (ktime_cmp_val(timr->it.real.incr, !=, KTIME_ZERO))
+		si_private = ++timr->it_requeue_pending;
 
-		if (timr->it.real.incr)
-			si_private = ++timr->it_requeue_pending;
-		else {
-			remove_from_abslist(timr);
-		}
+	if (posix_timer_event(timr, si_private))
+		/*
+		 * signal was not sent because of sig_ignor
+		 * we will not get a call back to restart it AND
+		 * it should be restarted.
+		 */
+		schedule_next_timer(timr);
 
-		if (posix_timer_event(timr, si_private))
-			/*
-			 * signal was not sent because of sig_ignor
-			 * we will not get a call back to restart it AND
-			 * it should be restarted.
-			 */
-			schedule_next_timer(timr);
-	}
 	unlock_timer(timr, flags); /* hold thru abs lock to keep irq off */
 }
 
-
 static inline struct task_struct * good_sigevent(sigevent_t * event)
 {
 	struct task_struct *rtn = current->group_leader;
@@ -776,39 +646,41 @@ static struct k_itimer * lock_timer(time
 static void
 common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
 {
-	unsigned long expires;
-	struct now_struct now;
+	ktime_t expires, now, remaining;
+	struct ktimer *timer = &timr->it.real.timer;
 
-	do
-		expires = timr->it.real.timer.expires;
-	while ((volatile long) (timr->it.real.timer.expires) != expires);
-
-	posix_get_now(&now);
-
-	if (expires &&
-	    ((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) &&
-	    !timr->it.real.incr &&
-	    posix_time_before(&timr->it.real.timer, &now))
-		timr->it.real.timer.expires = expires = 0;
-	if (expires) {
-		if (timr->it_requeue_pending & REQUEUE_PENDING ||
-		    (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
-			posix_bump_timer(timr, now);
-			expires = timr->it.real.timer.expires;
-		}
-		else
-			if (!timer_pending(&timr->it.real.timer))
-				expires = 0;
-		if (expires)
-			expires -= now.jiffies;
-	}
-	jiffies_to_timespec(expires, &cur_setting->it_value);
-	jiffies_to_timespec(timr->it.real.incr, &cur_setting->it_interval);
-
-	if (cur_setting->it_value.tv_sec < 0) {
+	memset(cur_setting, 0, sizeof(struct itimerspec));
+	expires = ktimer_get_expiry(timer, &now);
+	remaining = ktime_sub(expires, now);
+
+	/* Time left ? or timer pending */
+	if (ktime_cmp_val(remaining, >, KTIME_ZERO) || ktimer_active(timer))
+		goto calci;
+	/* interval timer ? */
+	if (ktime_cmp_val(timr->it.real.incr, ==, 0))
+		return;
+	/*
+	 * When a requeue is pending or this is a SIGEV_NONE timer
+	 * move the expiry time forward by intervals, so expiry is >
+	 * now.
+	 * The active (non SIGEV_NONE) rearm should be done
+	 * automatically by the ktimer REARM mode. Thats the next
+	 * iteration.  The REQUEUE_PENDING part will go away !
+	 */
+	if (timr->it_requeue_pending & REQUEUE_PENDING ||
+	    (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
+		remaining = forward_posix_timer(timr, now);
+	}
+ calci:
+	/* interval timer ? */
+	if (ktime_cmp_val(timr->it.real.incr, !=, KTIME_ZERO))
+		ktime_to_timespec(&cur_setting->it_interval,
+				  timr->it.real.incr);
+	/* Return 0 only, when the timer is expired and not pending */
+	if (ktime_cmp_val(remaining, <=, KTIME_ZERO))
 		cur_setting->it_value.tv_nsec = 1;
-		cur_setting->it_value.tv_sec = 0;
-	}
+	else
+		ktime_to_timespec(&cur_setting->it_value, remaining);
 }
 
 /* Get the time remaining on a POSIX.1b interval timer. */
@@ -832,6 +704,7 @@ sys_timer_gettime(timer_t timer_id, stru
 
 	return 0;
 }
+
 /*
  * Get the number of overruns of a POSIX.1b interval timer.  This is to
  * be the overrun of the timer last delivered.  At the same time we are
@@ -858,84 +731,6 @@ sys_timer_getoverrun(timer_t timer_id)
 
 	return overrun;
 }
-/*
- * Adjust for absolute time
- *
- * If absolute time is given and it is not CLOCK_MONOTONIC, we need to
- * adjust for the offset between the timer clock (CLOCK_MONOTONIC) and
- * what ever clock he is using.
- *
- * If it is relative time, we need to add the current (CLOCK_MONOTONIC)
- * time to it to get the proper time for the timer.
- */
-static int adjust_abs_time(struct k_clock *clock, struct timespec *tp, 
-			   int abs, u64 *exp, struct timespec *wall_to)
-{
-	struct timespec now;
-	struct timespec oc = *tp;
-	u64 jiffies_64_f;
-	int rtn =0;
-
-	if (abs) {
-		/*
-		 * The mask pick up the 4 basic clocks 
-		 */
-		if (!((clock - &posix_clocks[0]) & ~CLOCKS_MASK)) {
-			jiffies_64_f = do_posix_clock_monotonic_gettime_parts(
-				&now,  wall_to);
-			/*
-			 * If we are doing a MONOTONIC clock
-			 */
-			if((clock - &posix_clocks[0]) & CLOCKS_MONO){
-				now.tv_sec += wall_to->tv_sec;
-				now.tv_nsec += wall_to->tv_nsec;
-			}
-		} else {
-			/*
-			 * Not one of the basic clocks
-			 */
-			clock->clock_get(clock - posix_clocks, &now);
-			jiffies_64_f = get_jiffies_64();
-		}
-		/*
-		 * Take away now to get delta and normalize
-		 */
-		set_normalized_timespec(&oc, oc.tv_sec - now.tv_sec,
-					oc.tv_nsec - now.tv_nsec);
-	}else{
-		jiffies_64_f = get_jiffies_64();
-	}
-	/*
-	 * Check if the requested time is prior to now (if so set now)
-	 */
-	if (oc.tv_sec < 0)
-		oc.tv_sec = oc.tv_nsec = 0;
-
-	if (oc.tv_sec | oc.tv_nsec)
-		set_normalized_timespec(&oc, oc.tv_sec,
-					oc.tv_nsec + clock->res);
-	tstojiffie(&oc, clock->res, exp);
-
-	/*
-	 * Check if the requested time is more than the timer code
-	 * can handle (if so we error out but return the value too).
-	 */
-	if (*exp > ((u64)MAX_JIFFY_OFFSET))
-			/*
-			 * This is a considered response, not exactly in
-			 * line with the standard (in fact it is silent on
-			 * possible overflows).  We assume such a large 
-			 * value is ALMOST always a programming error and
-			 * try not to compound it by setting a really dumb
-			 * value.
-			 */
-			rtn = -EINVAL;
-	/*
-	 * return the actual jiffies expire time, full 64 bits
-	 */
-	*exp += jiffies_64_f;
-	return rtn;
-}
 
 /* Set a POSIX.1b interval timer. */
 /* timr->it_lock is taken. */
@@ -943,68 +738,52 @@ static inline int
 common_timer_set(struct k_itimer *timr, int flags,
 		 struct itimerspec *new_setting, struct itimerspec *old_setting)
 {
-	struct k_clock *clock = &posix_clocks[timr->it_clock];
-	u64 expire_64;
+	ktime_t expires;
+	int mode;
 
 	if (old_setting)
 		common_timer_get(timr, old_setting);
 
 	/* disable the timer */
-	timr->it.real.incr = 0;
+	ktime_set_scalar(timr->it.real.incr, KTIME_ZERO);
 	/*
 	 * careful here.  If smp we could be in the "fire" routine which will
 	 * be spinning as we hold the lock.  But this is ONLY an SMP issue.
 	 */
-	if (try_to_del_timer_sync(&timr->it.real.timer) < 0) {
-#ifdef CONFIG_SMP
-		/*
-		 * It can only be active if on an other cpu.  Since
-		 * we have cleared the interval stuff above, it should
-		 * clear once we release the spin lock.  Of course once
-		 * we do that anything could happen, including the
-		 * complete melt down of the timer.  So return with
-		 * a "retry" exit status.
-		 */
+	if (ktimer_try_to_cancel(&timr->it.real.timer) < 0)
 		return TIMER_RETRY;
-#endif
-	}
-
-	remove_from_abslist(timr);
 
 	timr->it_requeue_pending = (timr->it_requeue_pending + 2) & 
 		~REQUEUE_PENDING;
 	timr->it_overrun_last = 0;
 	timr->it_overrun = -1;
-	/*
-	 *switch off the timer when it_value is zero
-	 */
-	if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec) {
-		timr->it.real.timer.expires = 0;
+
+	/* switch off the timer when it_value is zero */
+	if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec)
 		return 0;
-	}
 
-	if (adjust_abs_time(clock,
-			    &new_setting->it_value, flags & TIMER_ABSTIME, 
-			    &expire_64, &(timr->it.real.wall_to_prev))) {
-		return -EINVAL;
-	}
-	timr->it.real.timer.expires = (unsigned long)expire_64;
-	tstojiffie(&new_setting->it_interval, clock->res, &expire_64);
-	timr->it.real.incr = (unsigned long)expire_64;
+	mode = flags & TIMER_ABSTIME ? KTIMER_ABS : KTIMER_REL;
 
-	/*
-	 * We do not even queue SIGEV_NONE timers!  But we do put them
-	 * in the abs list so we can do that right.
+	/* Posix madness. Only absolute CLOCK_REALTIME timers
+	 * are affected by clock sets. So we must reiniatilize
+	 * the timer.
 	 */
+	if (timr->it_clock == CLOCK_REALTIME && mode == KTIMER_ABS)
+		timer_create_real(timr);
+	else
+		timer_create_mono(timr);
+
+	expires = timespec_to_ktime(new_setting->it_value);
+
+	/* Convert and round the interval */
+	timr->it.real.incr = ktimer_round_timespec(&timr->it.real.timer,
+						     &new_setting->it_interval);
+
+	/* SIGEV_NONE timers are not queued ! See common_timer_get */
 	if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE))
-		add_timer(&timr->it.real.timer);
+		ktimer_start(&timr->it.real.timer, &expires,
+			     mode | KTIMER_NOCHECK | KTIMER_ROUND);
 
-	if (flags & TIMER_ABSTIME && clock->abs_struct) {
-		spin_lock(&clock->abs_struct->lock);
-		list_add_tail(&(timr->it.real.abs_timer_entry),
-			      &(clock->abs_struct->list));
-		spin_unlock(&clock->abs_struct->lock);
-	}
 	return 0;
 }
 
@@ -1039,6 +818,7 @@ retry:
 
 	unlock_timer(timr, flag);
 	if (error == TIMER_RETRY) {
+		wait_for_ktimer(&timr->it.real.timer);
 		rtn = NULL;	// We already got the old time...
 		goto retry;
 	}
@@ -1052,24 +832,10 @@ retry:
 
 static inline int common_timer_del(struct k_itimer *timer)
 {
-	timer->it.real.incr = 0;
+	ktime_set_scalar(timer->it.real.incr, KTIME_ZERO);
 
-	if (try_to_del_timer_sync(&timer->it.real.timer) < 0) {
-#ifdef CONFIG_SMP
-		/*
-		 * It can only be active if on an other cpu.  Since
-		 * we have cleared the interval stuff above, it should
-		 * clear once we release the spin lock.  Of course once
-		 * we do that anything could happen, including the
-		 * complete melt down of the timer.  So return with
-		 * a "retry" exit status.
-		 */
+	if (ktimer_try_to_cancel(&timer->it.real.timer) < 0)
 		return TIMER_RETRY;
-#endif
-	}
-
-	remove_from_abslist(timer);
-
 	return 0;
 }
 
@@ -1085,24 +851,17 @@ sys_timer_delete(timer_t timer_id)
 	struct k_itimer *timer;
 	long flags;
 
-#ifdef CONFIG_SMP
-	int error;
 retry_delete:
-#endif
 	timer = lock_timer(timer_id, &flags);
 	if (!timer)
 		return -EINVAL;
 
-#ifdef CONFIG_SMP
-	error = timer_delete_hook(timer);
-
-	if (error == TIMER_RETRY) {
+	if (timer_delete_hook(timer) == TIMER_RETRY) {
 		unlock_timer(timer, flags);
+		wait_for_ktimer(&timer->it.real.timer);
 		goto retry_delete;
 	}
-#else
-	timer_delete_hook(timer);
-#endif
+
 	spin_lock(&current->sighand->siglock);
 	list_del(&timer->list);
 	spin_unlock(&current->sighand->siglock);
@@ -1119,6 +878,7 @@ retry_delete:
 	release_posix_timer(timer, IT_ID_SET);
 	return 0;
 }
+
 /*
  * return timer owned by the process, used by exit_itimers
  */
@@ -1126,22 +886,14 @@ static inline void itimer_delete(struct 
 {
 	unsigned long flags;
 
-#ifdef CONFIG_SMP
-	int error;
 retry_delete:
-#endif
 	spin_lock_irqsave(&timer->it_lock, flags);
 
-#ifdef CONFIG_SMP
-	error = timer_delete_hook(timer);
-
-	if (error == TIMER_RETRY) {
+	if (timer_delete_hook(timer) == TIMER_RETRY) {
 		unlock_timer(timer, flags);
+		wait_for_ktimer(&timer->it.real.timer);
 		goto retry_delete;
 	}
-#else
-	timer_delete_hook(timer);
-#endif
 	list_del(&timer->list);
 	/*
 	 * This keeps any tasks waiting on the spin lock from thinking
@@ -1170,60 +922,7 @@ void exit_itimers(struct signal_struct *
 	}
 }
 
-/*
- * And now for the "clock" calls
- *
- * These functions are called both from timer functions (with the timer
- * spin_lock_irq() held and from clock calls with no locking.	They must
- * use the save flags versions of locks.
- */
-
-/*
- * We do ticks here to avoid the irq lock ( they take sooo long).
- * The seqlock is great here.  Since we a reader, we don't really care
- * if we are interrupted since we don't take lock that will stall us or
- * any other cpu. Voila, no irq lock is needed.
- *
- */
-
-static u64 do_posix_clock_monotonic_gettime_parts(
-	struct timespec *tp, struct timespec *mo)
-{
-	u64 jiff;
-	unsigned int seq;
-
-	do {
-		seq = read_seqbegin(&xtime_lock);
-		getnstimeofday(tp);
-		*mo = wall_to_monotonic;
-		jiff = jiffies_64;
-
-	} while(read_seqretry(&xtime_lock, seq));
-
-	return jiff;
-}
-
-static int do_posix_clock_monotonic_get(clockid_t clock, struct timespec *tp)
-{
-	struct timespec wall_to_mono;
-
-	do_posix_clock_monotonic_gettime_parts(tp, &wall_to_mono);
-
-	tp->tv_sec += wall_to_mono.tv_sec;
-	tp->tv_nsec += wall_to_mono.tv_nsec;
-
-	if ((tp->tv_nsec - NSEC_PER_SEC) > 0) {
-		tp->tv_nsec -= NSEC_PER_SEC;
-		tp->tv_sec++;
-	}
-	return 0;
-}
-
-int do_posix_clock_monotonic_gettime(struct timespec *tp)
-{
-	return do_posix_clock_monotonic_get(CLOCK_MONOTONIC, tp);
-}
-
+/* Not available / possible... functions */
 int do_posix_clock_nosettime(clockid_t clockid, struct timespec *tp)
 {
 	return -EINVAL;
@@ -1236,7 +935,8 @@ int do_posix_clock_notimer_create(struct
 }
 EXPORT_SYMBOL_GPL(do_posix_clock_notimer_create);
 
-int do_posix_clock_nonanosleep(clockid_t clock, int flags, struct timespec *t)
+int do_posix_clock_nonanosleep(clockid_t clock, int flags, struct timespec *t,
+			       struct timespec __user *r)
 {
 #ifndef ENOTSUP
 	return -EOPNOTSUPP;	/* aka ENOTSUP in userland for POSIX */
@@ -1295,125 +995,34 @@ sys_clock_getres(clockid_t which_clock, 
 	return error;
 }
 
-static void nanosleep_wake_up(unsigned long __data)
-{
-	struct task_struct *p = (struct task_struct *) __data;
-
-	wake_up_process(p);
-}
-
 /*
- * The standard says that an absolute nanosleep call MUST wake up at
- * the requested time in spite of clock settings.  Here is what we do:
- * For each nanosleep call that needs it (only absolute and not on
- * CLOCK_MONOTONIC* (as it can not be set)) we thread a little structure
- * into the "nanosleep_abs_list".  All we need is the task_struct pointer.
- * When ever the clock is set we just wake up all those tasks.	 The rest
- * is done by the while loop in clock_nanosleep().
- *
- * On locking, clock_was_set() is called from update_wall_clock which
- * holds (or has held for it) a write_lock_irq( xtime_lock) and is
- * called from the timer bh code.  Thus we need the irq save locks.
- *
- * Also, on the call from update_wall_clock, that is done as part of a
- * softirq thing.  We don't want to delay the system that much (possibly
- * long list of timers to fix), so we defer that work to keventd.
+ * nanosleep for monotonic and realtime clocks
  */
-
-static DECLARE_WAIT_QUEUE_HEAD(nanosleep_abs_wqueue);
-static DECLARE_WORK(clock_was_set_work, (void(*)(void*))clock_was_set, NULL);
-
-static DECLARE_MUTEX(clock_was_set_lock);
-
-void clock_was_set(void)
+static int common_nsleep(clockid_t which_clock, int flags,
+			 struct timespec *tsave, struct timespec __user *rmtp)
 {
-	struct k_itimer *timr;
-	struct timespec new_wall_to;
-	LIST_HEAD(cws_list);
-	unsigned long seq;
-
+	int mode = flags & TIMER_ABSTIME ? KTIMER_ABS : KTIMER_REL;
 
-	if (unlikely(in_interrupt())) {
-		schedule_work(&clock_was_set_work);
-		return;
+	switch (which_clock) {
+	case CLOCK_REALTIME:
+		/* Posix madness. Only absolute timers on clock realtime
+		   are affected by clock set. */
+		if (mode == KTIMER_ABS)
+			return ktimer_nanosleep_real(tsave, rmtp, mode);
+	case CLOCK_MONOTONIC:
+		return ktimer_nanosleep(tsave, rmtp, mode);
+	default:
+		break;
 	}
-	wake_up_all(&nanosleep_abs_wqueue);
-
-	/*
-	 * Check if there exist TIMER_ABSTIME timers to correct.
-	 *
-	 * Notes on locking: This code is run in task context with irq
-	 * on.  We CAN be interrupted!  All other usage of the abs list
-	 * lock is under the timer lock which holds the irq lock as
-	 * well.  We REALLY don't want to scan the whole list with the
-	 * interrupt system off, AND we would like a sequence lock on
-	 * this code as well.  Since we assume that the clock will not
-	 * be set often, it seems ok to take and release the irq lock
-	 * for each timer.  In fact add_timer will do this, so this is
-	 * not an issue.  So we know when we are done, we will move the
-	 * whole list to a new location.  Then as we process each entry,
-	 * we will move it to the actual list again.  This way, when our
-	 * copy is empty, we are done.  We are not all that concerned
-	 * about preemption so we will use a semaphore lock to protect
-	 * aginst reentry.  This way we will not stall another
-	 * processor.  It is possible that this may delay some timers
-	 * that should have expired, given the new clock, but even this
-	 * will be minimal as we will always update to the current time,
-	 * even if it was set by a task that is waiting for entry to
-	 * this code.  Timers that expire too early will be caught by
-	 * the expire code and restarted.
-
-	 * Absolute timers that repeat are left in the abs list while
-	 * waiting for the task to pick up the signal.  This means we
-	 * may find timers that are not in the "add_timer" list, but are
-	 * in the abs list.  We do the same thing for these, save
-	 * putting them back in the "add_timer" list.  (Note, these are
-	 * left in the abs list mainly to indicate that they are
-	 * ABSOLUTE timers, a fact that is used by the re-arm code, and
-	 * for which we have no other flag.)
-
-	 */
-
-	down(&clock_was_set_lock);
-	spin_lock_irq(&abs_list.lock);
-	list_splice_init(&abs_list.list, &cws_list);
-	spin_unlock_irq(&abs_list.lock);
-	do {
-		do {
-			seq = read_seqbegin(&xtime_lock);
-			new_wall_to =	wall_to_monotonic;
-		} while (read_seqretry(&xtime_lock, seq));
-
-		spin_lock_irq(&abs_list.lock);
-		if (list_empty(&cws_list)) {
-			spin_unlock_irq(&abs_list.lock);
-			break;
-		}
-		timr = list_entry(cws_list.next, struct k_itimer,
-				  it.real.abs_timer_entry);
-
-		list_del_init(&timr->it.real.abs_timer_entry);
-		if (add_clockset_delta(timr, &new_wall_to) &&
-		    del_timer(&timr->it.real.timer))  /* timer run yet? */
-			add_timer(&timr->it.real.timer);
-		list_add(&timr->it.real.abs_timer_entry, &abs_list.list);
-		spin_unlock_irq(&abs_list.lock);
-	} while (1);
-
-	up(&clock_was_set_lock);
+	return -EINVAL;
 }
 
-long clock_nanosleep_restart(struct restart_block *restart_block);
-
 asmlinkage long
 sys_clock_nanosleep(clockid_t which_clock, int flags,
 		    const struct timespec __user *rqtp,
 		    struct timespec __user *rmtp)
 {
 	struct timespec t;
-	struct restart_block *restart_block =
-	    &(current_thread_info()->restart_block);
-	int ret;
 
 	if (invalid_clockid(which_clock))
 		return -EINVAL;
@@ -1421,135 +1030,8 @@ sys_clock_nanosleep(clockid_t which_cloc
 	if (copy_from_user(&t, rqtp, sizeof (struct timespec)))
 		return -EFAULT;
 
-	if ((unsigned) t.tv_nsec >= NSEC_PER_SEC || t.tv_sec < 0)
+	if (!timespec_valid(&t))
 		return -EINVAL;
 
-	/*
-	 * Do this here as nsleep function does not have the real address.
-	 */
-	restart_block->arg1 = (unsigned long)rmtp;
-
-	ret = CLOCK_DISPATCH(which_clock, nsleep, (which_clock, flags, &t));
-
-	if ((ret == -ERESTART_RESTARTBLOCK) && rmtp &&
-					copy_to_user(rmtp, &t, sizeof (t)))
-		return -EFAULT;
-	return ret;
-}
-
-
-static int common_nsleep(clockid_t which_clock,
-			 int flags, struct timespec *tsave)
-{
-	struct timespec t, dum;
-	struct timer_list new_timer;
-	DECLARE_WAITQUEUE(abs_wqueue, current);
-	u64 rq_time = (u64)0;
-	s64 left;
-	int abs;
-	struct restart_block *restart_block =
-	    &current_thread_info()->restart_block;
-
-	abs_wqueue.flags = 0;
-	init_timer(&new_timer);
-	new_timer.expires = 0;
-	new_timer.data = (unsigned long) current;
-	new_timer.function = nanosleep_wake_up;
-	abs = flags & TIMER_ABSTIME;
-
-	if (restart_block->fn == clock_nanosleep_restart) {
-		/*
-		 * Interrupted by a non-delivered signal, pick up remaining
-		 * time and continue.  Remaining time is in arg2 & 3.
-		 */
-		restart_block->fn = do_no_restart_syscall;
-
-		rq_time = restart_block->arg3;
-		rq_time = (rq_time << 32) + restart_block->arg2;
-		if (!rq_time)
-			return -EINTR;
-		left = rq_time - get_jiffies_64();
-		if (left <= (s64)0)
-			return 0;	/* Already passed */
-	}
-
-	if (abs && (posix_clocks[which_clock].clock_get !=
-			    posix_clocks[CLOCK_MONOTONIC].clock_get))
-		add_wait_queue(&nanosleep_abs_wqueue, &abs_wqueue);
-
-	do {
-		t = *tsave;
-		if (abs || !rq_time) {
-			adjust_abs_time(&posix_clocks[which_clock], &t, abs,
-					&rq_time, &dum);
-		}
-
-		left = rq_time - get_jiffies_64();
-		if (left >= (s64)MAX_JIFFY_OFFSET)
-			left = (s64)MAX_JIFFY_OFFSET;
-		if (left < (s64)0)
-			break;
-
-		new_timer.expires = jiffies + left;
-		__set_current_state(TASK_INTERRUPTIBLE);
-		add_timer(&new_timer);
-
-		schedule();
-
-		del_timer_sync(&new_timer);
-		left = rq_time - get_jiffies_64();
-	} while (left > (s64)0 && !test_thread_flag(TIF_SIGPENDING));
-
-	if (abs_wqueue.task_list.next)
-		finish_wait(&nanosleep_abs_wqueue, &abs_wqueue);
-
-	if (left > (s64)0) {
-
-		/*
-		 * Always restart abs calls from scratch to pick up any
-		 * clock shifting that happened while we are away.
-		 */
-		if (abs)
-			return -ERESTARTNOHAND;
-
-		left *= TICK_NSEC;
-		tsave->tv_sec = div_long_long_rem(left, 
-						  NSEC_PER_SEC, 
-						  &tsave->tv_nsec);
-		/*
-		 * Restart works by saving the time remaing in 
-		 * arg2 & 3 (it is 64-bits of jiffies).  The other
-		 * info we need is the clock_id (saved in arg0). 
-		 * The sys_call interface needs the users 
-		 * timespec return address which _it_ saves in arg1.
-		 * Since we have cast the nanosleep call to a clock_nanosleep
-		 * both can be restarted with the same code.
-		 */
-		restart_block->fn = clock_nanosleep_restart;
-		restart_block->arg0 = which_clock;
-		/*
-		 * Caller sets arg1
-		 */
-		restart_block->arg2 = rq_time & 0xffffffffLL;
-		restart_block->arg3 = rq_time >> 32;
-
-		return -ERESTART_RESTARTBLOCK;
-	}
-
-	return 0;
-}
-/*
- * This will restart clock_nanosleep.
- */
-long
-clock_nanosleep_restart(struct restart_block *restart_block)
-{
-	struct timespec t;
-	int ret = common_nsleep(restart_block->arg0, 0, &t);
-
-	if ((ret == -ERESTART_RESTARTBLOCK) && restart_block->arg1 &&
-	    copy_to_user((struct timespec __user *)(restart_block->arg1), &t,
-			 sizeof (t)))
-		return -EFAULT;
-	return ret;
+	return CLOCK_DISPATCH(which_clock, nsleep, (which_clock, flags, &t, rmtp));
 }
Index: linux-2.6.14/Documentation/DocBook/kernel-api.tmpl
===================================================================
--- linux-2.6.14.orig/Documentation/DocBook/kernel-api.tmpl
+++ linux-2.6.14/Documentation/DocBook/kernel-api.tmpl
@@ -54,6 +54,11 @@
 !Ekernel/sched.c
 !Ekernel/timer.c
      </sect1>
+     <sect1><title>High-precision timers</title>
+!Iinclude/linux/ktime.h
+!Iinclude/linux/ktimer.h
+!Ekernel/ktimers.c
+     </sect1>
      <sect1><title>Internal Functions</title>
 !Ikernel/exit.c
 !Ikernel/signal.c
Index: linux-2.6.14/Documentation/ktimers.txt
===================================================================
--- /dev/null
+++ linux-2.6.14/Documentation/ktimers.txt
@@ -0,0 +1,239 @@
+
+ktimers - subsystem for high-precision kernel timers
+----------------------------------------------------
+
+This patch introduces a new subsystem for high-precision kernel timers.
+
+Why two timer subsystems? After a lot of back and forth trying to
+integrate high-precision and high-resolution features into the existing
+timer framework, and after testing various such high-resolution timer
+implementations in practice, we came to the conclusion that the timer
+wheel code is fundamentally not suitable for such an approach. We
+initially didnt believe this ('there must be a way to solve this'), and
+we spent a considerable effort trying to integrate things into the timer
+wheel, but we failed. There are several reasons why such integration is
+impossible:
+
+- the forced handling of low-resolution and high-resolution timers in
+  the same way leads to a lot of compromises, macro magic and #ifdef
+  mess. The timers.c code is very "tightly coded" around jiffies and
+  32-bitness assumptions, and has been honed and micro-optimized for a
+  narrow use case for many years - and thus even small extensions to it
+  frequently break the wheel concept, leading to even worse
+  compromises.
+
+- the unpredictable [O(N)] overhead of cascading leads to delays which
+  necessiate a more complex handling of high resolution timers, which
+  decreases robustness. Such a design still led to rather large timing
+  inaccuracies. Cascading is a fundamental property of the timer wheel
+  concept, it cannot be 'designed out' without unevitabling degrading
+  other portions of the timers.c code in an unacceptable way.
+
+- the implementation of the current posix-timer subsystem on top of
+  the timer wheel has already introduced a quite complex handling of
+  the required readjusting of absolute CLOCK_REALTIME timers at
+  settimeofday or NTP time - showing the rigidity of the timer wheel
+  data structure.
+
+- the timer wheel code is most optimal for use cases which can be
+  identified as "timeouts". Such timeouts are usually set up to cover
+  error conditions in various I/O paths, such as networking and block
+  I/O. The vast majority of those timers never expire and are rarely
+  recascaded because the expected correct event arrives in time so they
+  can be removed from the timer wheel before any further processing of
+  them becomes necessary. Thus the users of these timeouts can accept
+  the granularity and precision tradeoffs of the timer wheel, and
+  largely expect the timer subsystem to have near-zero overhead. Timing
+  for them is not a core purpose, it's most a necessary evil to
+  guarantee the processing of requests, which should be as cheap and
+  unintrusive as possible.
+
+The primary users of precision timers are user-space applications that
+utilize nanosleep, posix-timers and itimer interfaces. Also, in-kernel
+users like drivers and subsystems with a requirement for precise timed
+events can benefit from the availability of a seperate high-precision
+timer subsystem as well.
+
+The ktimer subsystem is easily extended with high-resolution
+capabilities, and patches for that exist and are maturing quickly. The
+increasing demand for realtime and multimedia applications along with
+other potential users for precise timers gives another reason to
+separate the "timeout" and "precise timer" subsystems.
+
+Another potential benefit is that such seperation allows for future
+optimizations of the existing timer wheel implementation for the low
+resolution and low precision use cases - once the precision-sensitive
+APIs are separated from the timer wheel and are migrated over to
+ktimers. E.g. we could decrease the frequency of the timeout subsystem
+from 250 Hz to 100 HZ (or even smaller).
+
+ktimer subsystem implementation details
+---------------------------------------
+
+the basic design considerations were:
+
+- simplicity
+- robust, extensible abstractions
+- data structure not bound to jiffies or any other granularity
+- simplification of existing, timing related kernel code
+
+From our previous experience with various approaches of high-resolution
+timers another basic requirement was the immediate enqueueing and
+ordering of timers at activation time. After looking at several possible
+solutions such as radix trees and hashes, the red black tree was choosen
+as the basic data structure. Rbtrees are available as a library in the
+kernel and are used in various performance-critical areas of e.g. memory
+management and file systems. The rbtree is solely used for the time
+sorted ordering, while a seperate list is used to give the expiry code
+fast access to the queued timers, without having to walk the rbtree.
+(This seperate list is also useful for high-resolution timers where we
+need seperate pending and expired queues while keeping the time-order
+intact.)
+
+The time-ordered enqueueing is not purely for the purposes of the
+high-resolution timers extension though, it also simplifies the handling
+of absolute timers based on CLOCK_REALTIME. The existing implementation
+needed to keep an extra list of all armed absolute CLOCK_REALTIME timers
+along with complex locking. In case of settimeofday and NTP, all the
+timers (!) had to be dequeued, the time-changing code had to fix them up
+one by one, and all of them had to be enqueued again. The time-ordered
+enqueueing and the storage of the expiry time in absolute time units
+removes all this complex and poorly scaling code from the posix-timer
+implementation - the clock can simply be set without having to touch the
+rbtree. This also makes the handling of posix-timers simpler in general.
+
+The locking and per-CPU behavior of ktimers was mostly taken from the
+existing timer wheel code, as it is mature and well suited. Sharing code
+was not really a win, due to the different data structures. Also, the
+ktimer functions now have clearer behavior and clearer names - such as
+ktimer_try_to_cancel() and ktimer_cancel() [which are roughly equivalent
+to del_timer() and del_timer_sync()] - and there's no direct 1:1 mapping
+between them on the algorithmical level.
+
+The internal representation of time values (ktime_t) is implemented via
+macros and inline functions, and can be switched between a "hybrid
+union" type and a plain "scalar" 64bit nanoseconds representation (at
+compile time). The hybrid union type exists to optimize time conversions
+on 32bit CPUs. This build-time-selectable ktime_t storage format was
+implemented to avoid the performance impact of 64-bit multiplications
+and divisions on 32bit CPUs. Such operations are frequently necessary to
+convert between the storage formats provided by kernel and userspace
+interfaces and the internal time format. (See include/linux/ktime.h for
+further details.)
+
+ktimers - rounding of timer values
+----------------------------------
+
+Why do we need rounding at all ?
+
+Firstly, the POSIX specification requires rounding to the resolution -
+whatever that means. The POSIX specification is quite imprecise on the
+details of rounding though, so a practical interpretation had to be
+found.
+
+The first question is which resolution value should be returned to the
+user by the clock_getres() interface.
+
+The simplest case is when the hardware is capable of 1 nsec resolution:
+in that case we can fulfill all wishes and there is no rounding :-)
+
+Another simple case is when the clock hardware has a limited resolution
+that the kernel wants to fully offer to user-space: in this case that
+limited resolution is returned to userspace.
+
+The hairy case is when the underlying hardware is capable of finer
+grained resolution, but the kernel is not willing to offer that
+resolution. Why would the kernel want to do that? Because e.g. the
+system could easily be DoS-ed with high-frequency timer interrupts. Or
+the kernel might want to cluster high-res timer interrupts into groups
+for performance reasons, so that extremely high interrupt rates are
+avoided. So the kernel needs some leeway in deciding the 'effective'
+resolution that it is willing to expose to userspace.
+
+In this case, the clock_getres() decision is easy: we want to return the
+'effective' resolution, not the 'theoretical' resolution. Thus an
+application programmer gets correct information about what granularity
+and accuracy to expect from the system.
+
+What is much less obvious in both the 'hardware is low-res' and 'kernel
+wants to offer low-res' cases is the actual behavior of timers, and
+where and how to round time values to the 'effective' resolution of the
+clock.
+
+For this we first need to see what types of expiries there exist for
+ktimers, and how rounding affects them. Ktimers have the following
+variants:
+
+- relative one-shot timers
+- absolute one-shot timers
+- relative interval timers
+- absolute interval timers
+
+Interval timers can be led back to one-shot timers: they are a series of
+one-shot timers with the same interval. Relative one-shot timers can be
+handled identically to absolute one-shot timers after adding the
+relative expiry time to the current time of the respective clock.
+
+We picked to handle two cases of rounding:
+
+- the rounding of the absolute value of the first expiry time
+- the rounding of the timer interval
+
+An alternative implementation would be to not round the interval and to
+implicitly round at every timer event, but it's not clear what the
+advantages would be from doing that. There are a couple of
+disadvantages:
+
+- the technique seems to contradict the standard's requirement that
+  'time values ... be rounded' (which the interval clearly is).
+
+- other OSs implement the rounding in the way we implemented it.
+
+- also, there is an application surprise factor, the 'do not round
+  intervals' technique can lead to the following sample sequence of
+  events:
+
+    Interval:   1.7ms
+    Resolution: 1ms
+
+    Event timeline:
+
+     2ms - 4ms - 6ms - 7ms - 9ms - 11ms - 12ms - 14ms - 16ms - 17ms ...
+
+  this 2,2,1,2,2,1...msec 'unpredictable and uneven' relative distance
+  of events could surprise applications.
+
+(as a sidenote, current POSIX APIs could be extended with a method of
+periodic timers to have an 'average' frequency, where there is no
+rounding of the interval. No such API exists at the moment.)
+
+ktimers - testing and verification
+----------------------------------
+
+We used the high-resolution timer subsystem ontop of ktimers to verify
+the ktimer implementation details in praxis, and we also ran the posix
+timer tests in order to ensure specification compliance.
+
+The ktimer patch converts the following kernel functionality to use
+ktimers:
+
+ - nanosleep
+ - itimers
+ - posix-timers
+
+The conversion of nanosleep and posix-timers enabled the unification of
+nanosleep and clock_nanosleep.
+
+The code was successfully compiled for the following platforms:
+
+ i386, x86_64, ARM, PPC, PPC64, IA64
+
+The code was run-tested on the following platforms:
+
+ i386(UP/SMP), x86_64(UP/SMP), ARM, PPC
+
+ktimers were also integrated into the -rt tree, along with a
+ktimers-based high-resolution timer implementation, so the ktimers code
+got a healthy amount of testing and use in practice.
+
+	Thomas Gleixner, Ingo Molnar
Index: linux-2.6.14/fs/proc/array.c
===================================================================
--- linux-2.6.14.orig/fs/proc/array.c
+++ linux-2.6.14/fs/proc/array.c
@@ -330,7 +330,7 @@ static int do_task_stat(struct task_stru
 	unsigned long  min_flt = 0,  maj_flt = 0;
 	cputime_t cutime, cstime, utime, stime;
 	unsigned long rsslim = 0;
-	unsigned long it_real_value = 0;
+	DEFINE_KTIME(it_real_value);
 	struct task_struct *t;
 	char tcomm[sizeof(task->comm)];
 
@@ -386,7 +386,7 @@ static int do_task_stat(struct task_stru
 			utime = cputime_add(utime, task->signal->utime);
 			stime = cputime_add(stime, task->signal->stime);
 		}
-		it_real_value = task->signal->it_real_value;
+		it_real_value = task->signal->real_timer.expires;
 	}
 	ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0;
 	read_unlock(&tasklist_lock);
@@ -435,7 +435,7 @@ static int do_task_stat(struct task_stru
 		priority,
 		nice,
 		num_threads,
-		jiffies_to_clock_t(it_real_value),
+		(long) ktime_to_clock_t(it_real_value),
 		start_time,
 		vsize,
 		mm ? get_mm_counter(mm, rss) : 0, /* you might want to shift this left 3 */
Index: linux-2.6.14/include/linux/calc64.h
===================================================================
--- /dev/null
+++ linux-2.6.14/include/linux/calc64.h
@@ -0,0 +1,49 @@
+#ifndef _linux_CALC64_H
+#define _linux_CALC64_H
+
+#include <linux/types.h>
+#include <asm/div64.h>
+
+/*
+ * div_long_long_rem was moved out of jiffies.h as it is
+ * a general math function useful for other things than
+ * jiffy code.
+ *
+ * This is a generic macro which is used when the architecture
+ * specific div64.h does not provide a optimized one.
+ *
+ * The 64bit dividend is divided by the divisor (data type long), the
+ * result is returned and the remainder stored in the variable
+ * referenced by remainder (data type long *). In contrast to the
+ * do_div macro the dividend is kept intact.
+ */
+#ifndef div_long_long_rem
+#define div_long_long_rem(dividend,divisor,remainder) 	\
+({							\
+	u64 result = dividend;				\
+	*remainder = do_div(result,divisor);		\
+	result;						\
+})
+#endif
+
+/*
+ * Sign aware variation of the above. On some architectures a
+ * negative dividend leads to an divide overflow exception, which
+ * is avoided by the sign check.
+ */
+static inline long div_long_long_rem_signed(long long dividend,
+					    long divisor,
+					    long *remainder)
+{
+	long res;
+
+	if (unlikely(dividend < 0)) {
+		res = -div_long_long_rem(-dividend, divisor, remainder);
+		*remainder = -(*remainder);
+	} else {
+		res = div_long_long_rem(dividend, divisor, remainder);
+	}
+	return res;
+}
+
+#endif
Index: linux-2.6.14/include/linux/jiffies.h
===================================================================
--- linux-2.6.14.orig/include/linux/jiffies.h
+++ linux-2.6.14/include/linux/jiffies.h
@@ -1,21 +1,12 @@
 #ifndef _LINUX_JIFFIES_H
 #define _LINUX_JIFFIES_H
 
+#include <linux/calc64.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/time.h>
 #include <linux/timex.h>
 #include <asm/param.h>			/* for HZ */
-#include <asm/div64.h>
-
-#ifndef div_long_long_rem
-#define div_long_long_rem(dividend,divisor,remainder) \
-({							\
-	u64 result = dividend;				\
-	*remainder = do_div(result,divisor);		\
-	result;						\
-})
-#endif
 
 /*
  * The following defines establish the engineering parameters of the PLL
Index: linux-2.6.14/include/linux/ktime.h
===================================================================
--- /dev/null
+++ linux-2.6.14/include/linux/ktime.h
@@ -0,0 +1,390 @@
+/*
+ *  include/linux/ktime.h
+ *
+ *  ktime_t - nanosecond-resolution time format.
+ *
+ *   Copyright(C) 2005, Thomas Gleixner <tglx@linutronix.de>
+ *   Copyright(C) 2005, Red Hat, Inc., Ingo Molnar
+ *
+ *  data type definitions, declarations, prototypes and macros.
+ *
+ *  Started by: Thomas Gleixner and Ingo Molnar
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+#ifndef _LINUX_KTIME_H
+#define _LINUX_KTIME_H
+
+#include <linux/jiffies.h>
+
+/*
+ * ktime_t:
+ *
+ * On 64-bit CPUs a single 64-bit variable is used to store the ktimers
+ * internal representation of time values in scalar nanoseconds. The
+ * design plays out best on 64-bit CPUs, where most conversions are
+ * NOPs and most arithmetic ktime_t operations are plain arithmetic
+ * operations.
+ *
+ * On 32-bit CPUs an optimized representation of the timespec structure
+ * is used to avoid expensive conversions from and to timespecs. The
+ * endian-aware order of the tv struct members is choosen to allow
+ * mathematical operations on the tv64 member of the union too, which
+ * for certain operations produces better code.
+ *
+ * For architectures with efficient support for 64/32-bit conversions the
+ * plain scalar nanosecond based representation can be selected by the
+ * config switch CONFIG_KTIME_SCALAR.
+ */
+
+#define KTIME_ZERO			0
+#define KTIME_MAX			(~((u64)1 << 63))
+
+/*
+ * ktime_t definitions when using the 64-bit scalar representation:
+ */
+
+#if (BITS_PER_LONG == 64) || defined(CONFIG_KTIME_SCALAR)
+
+typedef s64 ktime_t;
+
+/* Define a ktime_t variable and initialize it to zero: */
+#define DEFINE_KTIME(kt)		ktime_t kt = 0
+
+/*
+ * Compare two ktime_t variables. The comparison operand is
+ * given as a literal in the macro call (e.g. <, >, ==):
+ *
+ * ( E.g. "ktime_cmp(t1, <, t2) is still more readable to programmers
+ *   than ktime_before()/ktime_after() would be. )
+ */
+#define ktime_cmp(a, op, b)		((a) op (b))
+
+/*
+ * Compare a ktime_t variable and a constant. The comparison operand is
+ * given as a literal in the macro call (e.g. <, >, ==):
+ */
+#define ktime_cmp_val(a, op, b)		((a) op (b))
+
+/**
+ * ktime_set - Set a ktime_t variable from a seconds/nanoseconds value
+ *
+ * @secs:	seconds to set
+ * @nsecs:	nanoseconds to set
+ *
+ * Return the ktime_t representation of the value
+ */
+#define ktime_set(sec, nsec)	(((s64)(sec) * NSEC_PER_SEC) + (s64)(nsec))
+
+/*
+ * Set a ktime_t variable to a value in a scalar nanosecond representation
+ *
+ * NOTE: use only with KTIME_ZERO or KTIME_MAX to maintain compability
+ * with the union type version.
+ */
+#define ktime_set_scalar(kt, s)		(kt) = (s)
+
+/*
+ * The following 3 macros are used for the nanosleep restart handling
+ * to store the "low" and "high" part of a 64-bit ktime variable.
+ * (on 32-bit CPUs the restart block has 32-bit fields, so we have to
+ *  split the 64-bit value up into two pieces)
+ *
+ * In the scalar representation we have to split up the 64-bit scalar:
+ */
+
+/* Set the "low" and "high" part of a ktime_t variable: */
+#define ktime_set_low_high(l, h)	((s64)((u64)(l)) | (((s64)(h)) << 32))
+
+/* Get the "low" part of a ktime_t variable: */
+#define ktime_get_low(kt)		((kt) & 0xFFFFFFFF)
+
+/* Get the "high" part of a ktime_t variable: */
+#define ktime_get_high(kt)		((kt) >> 32)
+
+/* Subtract two ktime_t variables. rem = lhs -rhs: */
+#define ktime_sub(lhs, rhs)		((lhs) - (rhs))
+
+/* Add two ktime_t variables. res = lhs + rhs: */
+#define ktime_add(lhs, rhs)		((lhs) + (rhs))
+
+/*
+ * Add a ktime_t variable and a scalar nanosecond value.
+ * res = kt + nsval:
+ */
+#define ktime_add_ns(kt, nsval)		((kt) + (nsval))
+
+/* convert a timespec to ktime_t format: */
+#define timespec_to_ktime(ts)		ktime_set((ts).tv_sec, (ts).tv_nsec)
+
+/* convert a timeval to ktime_t format: */
+#define timeval_to_ktime(tv)		ktime_set((tv).tv_sec, (tv).tv_usec * 1000)
+
+/* Map the ktime_t to timespec conversion to ns_to_timespec function */
+#define ktime_to_timespec(ts, kt)	ns_to_timespec(ts, kt)
+
+/* Map the ktime_t to timeval conversion to ns_to_timeval function */
+#define ktime_to_timeval(tv, kt)	ns_to_timeval(tv, kt)
+
+/* Map the ktime_t to clock_t conversion to the inline in jiffies.h: */
+#define ktime_to_clock_t(kt)		nsec_to_clock_t(kt)
+
+/* Convert ktime_t to nanoseconds - NOP in the scalar storage format: */
+#define ktime_to_ns(kt)			(kt)
+
+#if (BITS_PER_LONG == 64)
+/*
+ * Calc ktime_t modulo div.
+ * div is less than NSEC_PER_SEC and (NSEC_PER_SEC % div) = 0 !
+ */
+#define ktime_modulo(kt, div)		(unsigned long)(kt % div)
+#endif
+
+#else
+
+/*
+ * Helper macros/inlines to get the ktime_t math right in the timespec
+ * representation. The macros are sometimes ugly - their actual use is
+ * pretty okay-ish, given the circumstances. We do all this for
+ * performance reasons. The pure scalar nsec_t based code was nice and
+ * simple, but created too many 64-bit / 32-bit conversions and divisions.
+ *
+ * Be especially aware that negative values are represented in a way
+ * that the tv.sec field is negative and the tv.nsec field is greater
+ * or equal to zero but less than nanoseconds per second. This is the
+ * same representation which is used by timespecs.
+ *
+ *   tv.sec < 0 and 0 >= tv.nsec < NSEC_PER_SEC
+ */
+
+typedef union {
+	s64	tv64;
+	struct {
+#ifdef __BIG_ENDIAN
+	s32	sec, nsec;
+#else
+	s32	nsec, sec;
+#endif
+	} tv;
+} ktime_t;
+
+/* Define a ktime_t variable and initialize it to zero: */
+#define DEFINE_KTIME(kt)		ktime_t kt = { .tv64 = 0 }
+
+/*
+ * Compare two ktime_t variables. The comparison operand is
+ * given as a literal in the macro call (e.g. <, >, ==):
+ */
+#define ktime_cmp(a, op, b)		((a).tv64 op (b).tv64)
+
+/*
+ * Compare a ktime_t variable and a constant. The comparison operand is
+ * given as a literal in the macro call (e.g. <, >, ==):
+ */
+#define ktime_cmp_val(a, op, b)		((a).tv64 op (b))
+
+/* Set a ktime_t variable to a value in sec/nsec representation: */
+static inline ktime_t ktime_set(long secs, unsigned long nsecs)
+{
+	return (ktime_t) { .tv = { .sec = secs, .nsec = nsecs } };
+}
+
+/*
+ * Set the scalar value of a ktime variable (union type)
+ * NOTE: use only with KTIME_ZERO or KTIME_MAX!
+ */
+#define ktime_set_scalar(kt, s)		(kt).tv64 = (s)
+
+/*
+ * The following 3 macros are used for the nanosleep restart handling
+ * to store the "low" and "high" part of a 64-bit ktime variable.
+ * (on 32-bit CPUs the restart block has 32-bit fields, so we have to
+ *  split the 64-bit value up into two pieces)
+ *
+ * In the union type representation this is just storing and restoring
+ * the sec and nsec members of the tv structure:
+ */
+
+/* Set the "low" and "high" part of a ktime_t variable: */
+#define ktime_set_low_high(l, h)	ktime_set(h, l)
+
+/* Get the "low" part of a ktime_t variable: */
+#define ktime_get_low(kt)		(kt).tv.nsec
+
+/* Get the "high" part of a ktime_t variable: */
+#define ktime_get_high(kt)		(kt).tv.sec
+
+/**
+ * ktime_sub - subtract two ktime_t variables
+ *
+ * @lhs:	minuend
+ * @rhs:	subtrahend
+ *
+ * Returns the remainder of the substraction
+ */
+static inline ktime_t ktime_sub(ktime_t lhs, ktime_t rhs)
+{
+	ktime_t res;
+
+	res.tv64 = lhs.tv64 - rhs.tv64;
+	if (res.tv.nsec < 0)
+		res.tv.nsec += NSEC_PER_SEC;
+
+	return res;
+}
+
+/**
+ * ktime_add - add two ktime_t variables
+ *
+ * @add1:	addend1
+ * @add2:	addend2
+ *
+ * Returns the sum of addend1 and addend2
+ */
+static inline ktime_t ktime_add(ktime_t add1, ktime_t add2)
+{
+	ktime_t res;
+
+	res.tv64 = add1.tv64 + add2.tv64;
+	/*
+	 * performance trick: the (u32) -NSEC gives 0x00000000Fxxxxxxx
+	 * so we subtract NSEC_PER_SEC and add 1 to the upper 32 bit.
+	 *
+	 * it's equivalent to:
+	 *   tv.nsec -= NSEC_PER_SEC
+	 *   tv.sec ++;
+	 */
+	if (res.tv.nsec >= NSEC_PER_SEC)
+		res.tv64 += (u32)-NSEC_PER_SEC;
+
+	return res;
+}
+
+/**
+ * ktime_add_ns - Add a scalar nanoseconds value to a ktime_t variable
+ *
+ * @kt:		addend
+ * @nsec:	the scalar nsec value to add
+ *
+ * Returns the sum of kt and nsec in ktime_t format
+ */
+extern ktime_t ktime_add_ns(ktime_t kt, u64 nsec);
+
+/**
+ * timespec_to_ktime - convert a timespec to ktime_t format
+ *
+ * @ts:		the timespec variable to convert
+ *
+ * Returns a ktime_t variable with the converted timespec value
+ */
+static inline ktime_t timespec_to_ktime(struct timespec ts)
+{
+	return (ktime_t) { .tv = { .sec = (s32)ts.tv_sec,
+			   	   .nsec = (s32)ts.tv_nsec } };
+}
+
+/**
+ * timeval_to_ktime - convert a timeval to ktime_t format
+ *
+ * @tv:		the timeval variable to convert
+ *
+ * Returns a ktime_t variable with the converted timeval value
+ */
+static inline ktime_t timeval_to_ktime(struct timeval tv)
+{
+	return (ktime_t) { .tv = { .sec = (s32)tv.tv_sec,
+				   .nsec = (s32)tv.tv_usec * 1000 } };
+}
+
+/**
+ * ktime_to_timespec - convert a ktime_t variable to timespec format
+ *
+ * @ts:		pointer to timespec variable to store result
+ * @kt:		the ktime_t variable to convert
+ *
+ * Stores the timespec representation of the ktime value in
+ * the timespec variable pointed to by @ts
+ */
+static inline void ktime_to_timespec(struct timespec *ts, ktime_t kt)
+{
+	ts->tv_sec = (time_t) kt.tv.sec;
+	ts->tv_nsec = (long) kt.tv.nsec;
+}
+
+/**
+ * ktime_to_timeval - convert a ktime_t variable to timeval format
+ *
+ * @tv:		pointer to timeval variable to store result
+ * @kt:		the ktime_t variable to convert
+ *
+ * Stores the timeval representation of the ktime value in
+ * the timeval variable pointed to by @tv
+ */
+static inline void ktime_to_timeval(struct timeval *tv, ktime_t kt)
+{
+	tv->tv_sec = (time_t) kt.tv.sec;
+	tv->tv_usec = (suseconds_t) (kt.tv.nsec / NSEC_PER_USEC);
+}
+
+/**
+ * ktime_to_clock_t - convert a ktime_t variable to clock_t format
+ * @kt:		the ktime_t variable to convert
+ *
+ * Returns a clock_t variable with the converted value
+ */
+static inline clock_t ktime_to_clock_t(ktime_t kt)
+{
+	return nsec_to_clock_t( (u64) kt.tv.sec * NSEC_PER_SEC + kt.tv.nsec);
+}
+
+/**
+ * ktime_to_ns - convert a ktime_t variable to scalar nanoseconds
+ * @kt:		the ktime_t variable to convert
+ *
+ * Returns the scalar nanoseconds representation of kt
+ */
+static inline u64 ktime_to_ns(ktime_t kt)
+{
+	return (u64) kt.tv.sec * NSEC_PER_SEC + kt.tv.nsec;
+}
+
+/*
+ * Calc ktime_t modulo div.
+ * div is less than NSEC_PER_SEC and (NSEC_PER_SEC % div) = 0 !
+ */
+#define ktime_modulo(kt, div)		((unsigned long)kt.tv.nsec % div)
+
+#endif
+
+/*
+ * The resolution of the clocks. The resolution value is returned in
+ * the clock_getres() system call to give application programmers an
+ * idea of the (in)accuracy of timers. Timer values are rounded up to
+ * this resolution values.
+ */
+#define KTIME_LOW_RES		(NSEC_PER_SEC/HZ)
+
+#ifdef CONFIG_GENERIC_TIME
+
+#define ktime_get get_monotonic_clock
+#define ktime_get_real get_realtime_clock
+#define ktime_get_ts(ts) get_monotonic_clock_ts(ts)
+#define ktime_get_real_ts(ts) get_realtime_clock_ts(ts)
+
+#else /* CONFIG_GENERIC_TIME */
+
+/* Get the monotonic time in ktime_t format: */
+extern ktime_t ktime_get(void);
+
+/* Get the real (wall-) time in ktime_t format: */
+extern ktime_t ktime_get_real(void);
+
+/* Get the monotonic time in timespec format: */
+extern void ktime_get_ts(struct timespec *ts);
+
+/* Get the real (wall-) time in timespec format: */
+#define ktime_get_real_ts(ts)	getnstimeofday(ts)
+
+#endif /* !CONFIG_GENERIC_TIME */
+
+#endif
Index: linux-2.6.14/include/linux/ktimer.h
===================================================================
--- /dev/null
+++ linux-2.6.14/include/linux/ktimer.h
@@ -0,0 +1,213 @@
+/*
+ *  include/linux/ktimer.h
+ *
+ *  ktimers - high-precision kernel timers
+ *
+ *   Copyright(C) 2005, Thomas Gleixner <tglx@linutronix.de>
+ *   Copyright(C) 2005, Red Hat, Inc., Ingo Molnar
+ *
+ *  data type definitions, declarations, prototypes
+ *
+ *  Started by: Thomas Gleixner and Ingo Molnar
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+#ifndef _LINUX_KTIMER_H
+#define _LINUX_KTIMER_H
+
+#include <linux/rbtree.h>
+#include <linux/ktime.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/wait.h>
+
+/*
+ * Mode arguments of xxx_ktimer functions:
+ */
+enum ktimer_rearm {
+	KTIMER_ABS = 1,	/* Time value is absolute */
+	KTIMER_REL,	/* Time value is relative to now */
+	KTIMER_INCR,	/* Time value is relative to previous expiry time */
+	KTIMER_FORWARD,	/* Timer is rearmed with value. Overruns accounted */
+	KTIMER_REARM,	/* Timer is rearmed with interval. Overruns accounted */
+	KTIMER_RESTART,	/* Timer is restarted with the stored expiry value */
+
+	/*
+	 * Expiry must not be checked when the timer is started:
+	 * (can be OR-ed with another above mode flag)
+	 */
+	KTIMER_NOCHECK = 0x10000,
+	/*
+	 * Rounding is required when the time is set up. Thats an
+	 * optimization for relative timers as we read current time
+	 * in the enqueing code so we do not need to read is twice.
+	 */
+	KTIMER_ROUND = 0x20000,
+
+	/* (used internally: no rearming) */
+	KTIMER_NOREARM = 0
+};
+
+/*
+ * Timer states:
+ */
+enum ktimer_state {
+	KTIMER_INACTIVE,	/* Timer is inactive */
+	KTIMER_PENDING,		/* Timer is pending */
+	KTIMER_EXPIRED,		/* Timer is expired and queued in the rbtree */
+	KTIMER_EXPIRED_NOQUEUE, /* Timer is expired and not queued in the rbtree */
+};
+
+struct ktimer_base;
+
+/**
+ * struct ktimer - the basic ktimer structure
+ *
+ * @node:	red black tree node for time ordered insertion
+ * @list:	list head for easier access to the time ordered list,
+ *		without walking the red black tree.
+ * @expires:	the absolute expiry time in the ktimers internal
+ *		representation. The time is related to the clock on
+ *		which the timer is based.
+ * @expired:	the absolute time when the timer expired. Used for
+ *		simplifying return path calculations and for debugging
+ *		purposes.
+ * @interval:	the timer interval for automatic rearming
+ * @overrun:	the number of intervals missed when rearming a timer
+ * @state:	state of the timer
+ * @function:	timer expiry callback function
+ * @data:	argument for the callback function
+ * @base:	pointer to the timer base (per cpu and per clock)
+ *
+ * The ktimer structure must be initialized by init_ktimer_#CLOCKTYPE()
+ */
+struct ktimer {
+	struct rb_node		node;
+	struct list_head	list;
+	ktime_t			expires;
+	ktime_t			expired;
+	ktime_t			interval;
+	int			overrun;
+	enum ktimer_state	state;
+	void			(*function)(void *);
+	void			*data;
+	struct ktimer_base	*base;
+};
+
+/**
+ * struct ktimer_base - the timer base for a specific clock
+ *
+ * @index:	clock type index for per_cpu support when moving a timer
+ *		to a base on another cpu.
+ * @lock:	lock protecting the base and associated timers
+ * @active:	red black tree root node for the active timers
+ * @pending:	list of pending timers for simple time ordered access
+ * @count:	the number of active timers
+ * @resolution:	the resolution of the clock, in nanoseconds
+ * @get_time:	function to retrieve the current time of the clock
+ * @curr_timer:	the timer which is executing a callback right now
+ * @wait:	waitqueue to wait for a currently running timer
+ * @name:	string identifier of the clock
+ */
+struct ktimer_base {
+	int			index;
+	spinlock_t		lock;
+	struct rb_root		active;
+	struct list_head	pending;
+	int			count;
+	unsigned long		resolution;
+	ktime_t			(*get_time)(void);
+	struct ktimer		*curr_timer;
+	wait_queue_head_t	wait;
+#ifdef CONFIG_HIGH_RES_TIMERS
+	struct list_head	expired;
+	ktime_t			(*getoffset)(void);
+	int			(*reprogram)(struct ktimer *t,
+					     struct ktimer_base *b, ktime_t n);
+#endif
+	char			*name;
+};
+
+#define KTIMER_POISON		((void *) 0x00100101)
+
+#ifdef CONFIG_HIGH_RES_TIMERS
+
+extern void ktimer_clock_notify(void);
+extern void clock_was_set(void);
+extern int ktimer_interrupt(void);
+
+/*
+ * The resolution of the clocks. The resolution value is returned in
+ * the clock_getres() system call to give application programmers an
+ * idea of the (in)accuracy of timers. Timer values are rounded up to
+ * this resolution values.
+ */
+#define KTIME_REALTIME_RES		CONFIG_HIGH_RES_RESOLUTION
+#define KTIME_MONOTONIC_RES		CONFIG_HIGH_RES_RESOLUTION
+
+#else
+
+#define KTIME_REALTIME_RES		KTIME_LOW_RES
+#define KTIME_MONOTONIC_RES		KTIME_LOW_RES
+
+/*
+ * clock_was_set() is a NOP for non- high-resolution systems. The
+ * time-sorted order guarantees that a timer does not expire early and
+ * is expired in the next softirq when the clock was advanced.
+ */
+#define clock_was_set()			do { } while (0)
+#define ktimer_clock_notify()		do { } while (0)
+
+static inline int ktimer_interrupt(void)
+{
+	return 0;
+}
+
+#endif
+
+/* Exported timer functions: */
+
+/* Initialize timers: */
+extern void ktimer_init(struct ktimer *timer);
+extern void ktimer_init_real(struct ktimer *timer);
+
+/* Basic timer operations: */
+extern int ktimer_start(struct ktimer *timer, ktime_t *tim, int mode);
+extern int ktimer_restart(struct ktimer *timer, ktime_t *tim, int mode);
+extern int ktimer_cancel(struct ktimer *timer);
+extern int ktimer_try_to_cancel(struct ktimer *timer);
+
+/* Query timers: */
+extern ktime_t ktimer_get_remtime(struct ktimer *timer);
+extern ktime_t ktimer_get_expiry(struct ktimer *timer, ktime_t *now);
+extern int ktimer_get_res(clockid_t which_clock, struct timespec *tp);
+extern int ktimer_get_res_real(clockid_t which_clock, struct timespec *tp);
+
+static inline int ktimer_active(struct ktimer *timer)
+{
+	return timer->state != KTIMER_INACTIVE;
+}
+
+/* Convert with rounding based on resolution of timer's clock: */
+extern ktime_t ktimer_round_timeval(struct ktimer *timer, struct timeval *tv);
+extern ktime_t ktimer_round_timespec(struct ktimer *timer, struct timespec *ts);
+
+/* Precise sleep: */
+extern long ktimer_nanosleep(struct timespec *rqtp,
+			     struct timespec __user *rmtp, int mode);
+extern long ktimer_nanosleep_real(struct timespec *rqtp,
+				  struct timespec __user *rmtp, int mode);
+
+#ifdef CONFIG_SMP
+extern void wait_for_ktimer(struct ktimer *timer);
+#else
+# define wait_for_ktimer(t)	do { } while (0)
+#endif
+
+/* Soft interrupt function to run the ktimer queues: */
+extern void ktimer_run_queues(void);
+
+/* Bootup initialization: */
+extern void __init ktimers_init(void);
+
+#endif
Index: linux-2.6.14/include/linux/posix-timers.h
===================================================================
--- linux-2.6.14.orig/include/linux/posix-timers.h
+++ linux-2.6.14/include/linux/posix-timers.h
@@ -51,10 +51,9 @@ struct k_itimer {
 	struct sigqueue *sigq;		/* signal queue entry. */
 	union {
 		struct {
-			struct timer_list timer;
-			struct list_head abs_timer_entry; /* clock abs_timer_list */
-			struct timespec wall_to_prev;   /* wall_to_monotonic used when set */
-			unsigned long incr; /* interval in jiffies */
+			struct ktimer timer;
+			ktime_t incr;
+			int overrun;
 		} real;
 		struct cpu_timer_list cpu;
 		struct {
@@ -66,10 +65,6 @@ struct k_itimer {
 	} it;
 };
 
-struct k_clock_abs {
-	struct list_head list;
-	spinlock_t lock;
-};
 struct k_clock {
 	int res;		/* in nano seconds */
 	int (*clock_getres) (clockid_t which_clock, struct timespec *tp);
@@ -77,7 +72,7 @@ struct k_clock {
 	int (*clock_set) (clockid_t which_clock, struct timespec * tp);
 	int (*clock_get) (clockid_t which_clock, struct timespec * tp);
 	int (*timer_create) (struct k_itimer *timer);
-	int (*nsleep) (clockid_t which_clock, int flags, struct timespec *);
+	int (*nsleep) (clockid_t which_clock, int flags, struct timespec *, struct timespec __user *);
 	int (*timer_set) (struct k_itimer * timr, int flags,
 			  struct itimerspec * new_setting,
 			  struct itimerspec * old_setting);
@@ -91,37 +86,104 @@ void register_posix_clock(clockid_t cloc
 
 /* Error handlers for timer_create, nanosleep and settime */
 int do_posix_clock_notimer_create(struct k_itimer *timer);
-int do_posix_clock_nonanosleep(clockid_t, int flags, struct timespec *);
+int do_posix_clock_nonanosleep(clockid_t, int flags, struct timespec *, struct timespec __user *);
 int do_posix_clock_nosettime(clockid_t, struct timespec *tp);
 
 /* function to call to trigger timer event */
 int posix_timer_event(struct k_itimer *timr, int si_private);
 
-struct now_struct {
-	unsigned long jiffies;
-};
-
-#define posix_get_now(now) (now)->jiffies = jiffies;
-#define posix_time_before(timer, now) \
-                      time_before((timer)->expires, (now)->jiffies)
-
-#define posix_bump_timer(timr, now)					\
-         do {								\
-              long delta, orun;						\
-	      delta = now.jiffies - (timr)->it.real.timer.expires;	\
-              if (delta >= 0) {						\
-	           orun = 1 + (delta / (timr)->it.real.incr);		\
-	          (timr)->it.real.timer.expires +=			\
-			 orun * (timr)->it.real.incr;			\
-                  (timr)->it_overrun += orun;				\
-              }								\
-            }while (0)
+#if (BITS_PER_LONG < 64)
+static inline ktime_t forward_posix_timer(struct k_itimer *t, ktime_t now)
+{
+	ktime_t delta = ktime_sub(now, t->it.real.timer.expires);
+	unsigned long orun = 1;
+
+	if (ktime_cmp_val(delta, <, KTIME_ZERO))
+		goto out;
+
+	if (unlikely(ktime_cmp(delta, >, t->it.real.incr))) {
+
+		int sft = 0;
+		u64 div, dclc, inc, dns;
+
+		dclc = dns = ktime_to_ns(delta);
+		div = inc = ktime_to_ns(t->it.real.incr);
+		/* Make sure the divisor is less than 2^32 */
+		while(div >> 32) {
+			sft++;
+			div >>= 1;
+		}
+		dclc >>= sft;
+		do_div(dclc, (unsigned long) div);
+		orun = (unsigned long) dclc;
+		if (likely(!(inc >> 32)))
+			dclc *= (unsigned long) inc;
+		else
+			dclc *= inc;
+		t->it.real.timer.expires = ktime_add_ns(t->it.real.timer.expires,
+							dclc);
+	} else {
+		t->it.real.timer.expires = ktime_add(t->it.real.timer.expires,
+						     t->it.real.incr);
+	}
+	/*
+	 * Here is the correction for exact.  Also covers delta == incr
+	 * which is the else clause above.
+	 */
+	if (ktime_cmp(t->it.real.timer.expires, <=, now)) {
+		t->it.real.timer.expires = ktime_add(t->it.real.timer.expires,
+						     t->it.real.incr);
+		orun++;
+	}
+	t->it_overrun += orun;
+
+ out:
+	return ktime_sub(t->it.real.timer.expires, now);
+}
+#else
+static inline ktime_t forward_posix_timer(struct k_itimer *t, ktime_t now)
+{
+	ktime_t delta = ktime_sub(now, t->it.real.timer.expires);
+	unsigned long orun = 1;
+
+	if (ktime_cmp_val(delta, <, KTIME_ZERO))
+		goto out;
+
+	if (unlikely(ktime_cmp(delta, >, t->it.real.incr))) {
+
+		u64 dns, inc;
+
+		dns = ktime_to_ns(delta);
+		inc = ktime_to_ns(t->it.real.incr);
+
+		orun = dns / inc;
+		t->it.real.timer.expires = ktime_add_ns(t->it.real.timer.expires,
+							orun * inc);
+	} else {
+		t->it.real.timer.expires = ktime_add(t->it.real.timer.expires,
+						     t->it.real.incr);
+	}
+	/*
+	 * Here is the correction for exact.  Also covers delta == incr
+	 * which is the else clause above.
+	 */
+	if (ktime_cmp(t->it.real.timer.expires, <=, now)) {
+		t->it.real.timer.expires = ktime_add(t->it.real.timer.expires,
+						     t->it.real.incr);
+		orun++;
+	}
+	t->it_overrun += orun;
+ out:
+	return ktime_sub(t->it.real.timer.expires, now);
+}
+#endif
 
 int posix_cpu_clock_getres(clockid_t which_clock, struct timespec *);
 int posix_cpu_clock_get(clockid_t which_clock, struct timespec *);
 int posix_cpu_clock_set(clockid_t which_clock, const struct timespec *tp);
 int posix_cpu_timer_create(struct k_itimer *);
-int posix_cpu_nsleep(clockid_t, int, struct timespec *);
+int posix_cpu_nsleep(clockid_t, int, struct timespec *,
+		     struct timespec __user *);
 int posix_cpu_timer_set(struct k_itimer *, int,
 			struct itimerspec *, struct itimerspec *);
 int posix_cpu_timer_del(struct k_itimer *);
Index: linux-2.6.14/include/linux/sched.h
===================================================================
--- linux-2.6.14.orig/include/linux/sched.h
+++ linux-2.6.14/include/linux/sched.h
@@ -104,6 +104,7 @@ extern unsigned long nr_iowait(void);
 #include <linux/param.h>
 #include <linux/resource.h>
 #include <linux/timer.h>
+#include <linux/ktimer.h>
 
 #include <asm/processor.h>
 
@@ -358,8 +359,7 @@ struct signal_struct {
 	struct list_head posix_timers;
 
 	/* ITIMER_REAL timer for the process */
-	struct timer_list real_timer;
-	unsigned long it_real_value, it_real_incr;
+	struct ktimer real_timer;
 
 	/* ITIMER_PROF and ITIMER_VIRTUAL timers for the process */
 	cputime_t it_prof_expires, it_virt_expires;
Index: linux-2.6.14/include/linux/time.h
===================================================================
--- linux-2.6.14.orig/include/linux/time.h
+++ linux-2.6.14/include/linux/time.h
@@ -4,6 +4,7 @@
 #include <linux/types.h>
 
 #ifdef __KERNEL__
+#include <linux/calc64.h>
 #include <linux/seqlock.h>
 #endif
 
@@ -27,6 +28,10 @@ struct timezone {
 
 #ifdef __KERNEL__
 
+/* timeofday base types */
+typedef s64 nsec_t;
+typedef u64 cycle_t;
+
 /* Parameters used to convert the timespec values */
 #define MSEC_PER_SEC (1000L)
 #define USEC_PER_SEC (1000000L)
@@ -38,38 +43,13 @@ static __inline__ int timespec_equal(str
 	return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec);
 } 
 
-/* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
- * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
- * => year=1980, mon=12, day=31, hour=23, min=59, sec=59.
- *
- * [For the Julian calendar (which was used in Russia before 1917,
- * Britain & colonies before 1752, anywhere else before 1582,
- * and is still in use by some communities) leave out the
- * -year/100+year/400 terms, and add 10.]
- *
- * This algorithm was first published by Gauss (I think).
- *
- * WARNING: this function will overflow on 2106-02-07 06:28:16 on
- * machines were long is 32-bit! (However, as time_t is signed, we
- * will already get problems at other places on 2038-01-19 03:14:08)
- */
-static inline unsigned long
+#define timespec_valid(ts) \
+(((ts)->tv_sec >= 0) && (((unsigned) (ts)->tv_nsec) < NSEC_PER_SEC))
+
+extern unsigned long
 mktime (unsigned int year, unsigned int mon,
 	unsigned int day, unsigned int hour,
-	unsigned int min, unsigned int sec)
-{
-	if (0 >= (int) (mon -= 2)) {	/* 1..12 -> 11,12,1..10 */
-		mon += 12;		/* Puts Feb last since it has leap day */
-		year -= 1;
-	}
-
-	return (((
-		(unsigned long) (year/4 - year/100 + year/400 + 367*mon/12 + day) +
-			year*365 - 719499
-	    )*24 + hour /* now have hours */
-	  )*60 + min /* now have minutes */
-	)*60 + sec; /* finally seconds */
-}
+	unsigned int min, unsigned int sec);
 
 extern struct timespec xtime;
 extern struct timespec wall_to_monotonic;
@@ -80,6 +60,8 @@ static inline unsigned long get_seconds(
 	return xtime.tv_sec;
 }
 
+extern void set_normalized_timespec (struct timespec *ts, time_t sec, long nsec);
+
 struct timespec current_kernel_time(void);
 
 #define CURRENT_TIME (current_kernel_time())
@@ -88,29 +70,64 @@ struct timespec current_kernel_time(void
 extern void do_gettimeofday(struct timeval *tv);
 extern int do_settimeofday(struct timespec *tv);
 extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz);
-extern void clock_was_set(void); // call when ever the clock is set
-extern int do_posix_clock_monotonic_gettime(struct timespec *tp);
+extern void do_posix_clock_monotonic_gettime(struct timespec *ts);
 extern long do_utimes(char __user * filename, struct timeval * times);
 struct itimerval;
 extern int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue);
 extern int do_getitimer(int which, struct itimerval *value);
-extern void getnstimeofday (struct timespec *tv);
 
 extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
 
-static inline void
-set_normalized_timespec (struct timespec *ts, time_t sec, long nsec)
+
+/**
+ * timespec_to_ns - Convert timespec to nanoseconds
+ * @ts:		pointer to the timespec variable to be converted
+ *
+ * Returns the scalar nanosecond representation of the timespec
+ * variable
+ */
+static inline nsec_t timespec_to_ns(struct timespec *ts)
 {
-	while (nsec > NSEC_PER_SEC) {
-		nsec -= NSEC_PER_SEC;
-		++sec;
+	nsec_t res = (nsec_t) ts->tv_sec * NSEC_PER_SEC;
+
+	return res + (nsec_t) ts->tv_nsec;
+}
+
+/**
+ * timeval_to_ns - Convert timeval to nanoseconds
+ * @ts:		pointer to the timeval variable to be converted
+ *
+ * Returns the scalar nanosecond representation of the timeval
+ * variable
+ */
+static inline nsec_t timeval_to_ns(struct timeval *tv)
+{
+	nsec_t res = (nsec_t) tv->tv_sec * NSEC_PER_SEC;
+
+	return res + (nsec_t) tv->tv_usec * NSEC_PER_USEC;
+}
+
+extern void ns_to_timespec(struct timespec *ts, nsec_t nsec);
+extern void ns_to_timeval(struct timeval *tv, nsec_t nsec);
+
+static inline void normalize_timespec(struct timespec *ts)
+{
+	while ((unsigned long)ts->tv_nsec > NSEC_PER_SEC) {
+		ts->tv_nsec -= NSEC_PER_SEC;
+		ts->tv_sec++;
 	}
-	while (nsec < 0) {
-		nsec += NSEC_PER_SEC;
-		--sec;
+}
+
+static inline struct timespec timespec_add_ns(struct timespec a, nsec_t ns)
+{
+	while(ns > NSEC_PER_SEC) {
+		ns -= NSEC_PER_SEC;
+		a.tv_sec++;
 	}
-	ts->tv_sec = sec;
-	ts->tv_nsec = nsec;
+	a.tv_nsec += ns;
+	normalize_timespec(&a);
+
+	return a;
 }
 
 #endif /* __KERNEL__ */
@@ -145,23 +162,18 @@ struct	itimerval {
 /*
  * The IDs of the various system clocks (for POSIX.1b interval timers).
  */
-#define CLOCK_REALTIME		  0
-#define CLOCK_MONOTONIC	  1
+#define CLOCK_REALTIME		 0
+#define CLOCK_MONOTONIC	  	 1
 #define CLOCK_PROCESS_CPUTIME_ID 2
 #define CLOCK_THREAD_CPUTIME_ID	 3
-#define CLOCK_REALTIME_HR	 4
-#define CLOCK_MONOTONIC_HR	  5
 
 /*
  * The IDs of various hardware clocks
  */
-
-
 #define CLOCK_SGI_CYCLE 10
 #define MAX_CLOCKS 16
-#define CLOCKS_MASK  (CLOCK_REALTIME | CLOCK_MONOTONIC | \
-                     CLOCK_REALTIME_HR | CLOCK_MONOTONIC_HR)
-#define CLOCKS_MONO (CLOCK_MONOTONIC & CLOCK_MONOTONIC_HR)
+#define CLOCKS_MASK  (CLOCK_REALTIME | CLOCK_MONOTONIC)
+#define CLOCKS_MONO (CLOCK_MONOTONIC)
 
 /*
  * The various flags for setting POSIX.1b interval timers.
Index: linux-2.6.14/include/linux/timer.h
===================================================================
--- linux-2.6.14.orig/include/linux/timer.h
+++ linux-2.6.14/include/linux/timer.h
@@ -91,6 +91,6 @@ static inline void add_timer(struct time
 
 extern void init_timers(void);
 extern void run_local_timers(void);
-extern void it_real_fn(unsigned long);
+extern void it_real_fn(void *);
 
 #endif
Index: linux-2.6.14/init/main.c
===================================================================
--- linux-2.6.14.orig/init/main.c
+++ linux-2.6.14/init/main.c
@@ -47,6 +47,8 @@
 #include <linux/rmap.h>
 #include <linux/mempolicy.h>
 #include <linux/key.h>
+#include <linux/timeofday.h>
+#include <linux/clockchips.h>
 #include <net/sock.h>
 
 #include <asm/io.h>
@@ -484,8 +486,11 @@ asmlinkage void __init start_kernel(void
 	rcu_init();
 	init_IRQ();
 	pidhash_init();
+	init_clockevents();
 	init_timers();
+	ktimers_init();
 	softirq_init();
+	timeofday_init();
 	time_init();
 
 	/*
Index: linux-2.6.14/kernel/Makefile
===================================================================
--- linux-2.6.14.orig/kernel/Makefile
+++ linux-2.6.14/kernel/Makefile
@@ -7,8 +7,10 @@ obj-y     = sched.o fork.o exec_domain.o
 	    sysctl.o capability.o ptrace.o timer.o user.o \
 	    signal.o sys.o kmod.o workqueue.o pid.o \
 	    rcupdate.o intermodule.o extable.o params.o posix-timers.o \
-	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o
+	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o \
+	    ktimers.o
 
+obj-$(CONFIG_GENERIC_TIME) += time/
 obj-$(CONFIG_FUTEX) += futex.o
 obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
 obj-$(CONFIG_SMP) += cpu.o spinlock.o
Index: linux-2.6.14/kernel/exit.c
===================================================================
--- linux-2.6.14.orig/kernel/exit.c
+++ linux-2.6.14/kernel/exit.c
@@ -842,7 +842,7 @@ fastcall NORET_TYPE void do_exit(long co
 	update_mem_hiwater(tsk);
 	group_dead = atomic_dec_and_test(&tsk->signal->live);
 	if (group_dead) {
- 		del_timer_sync(&tsk->signal->real_timer);
+		ktimer_cancel(&tsk->signal->real_timer);
 		exit_itimers(tsk->signal);
 		acct_process(code);
 	}
Index: linux-2.6.14/kernel/fork.c
===================================================================
--- linux-2.6.14.orig/kernel/fork.c
+++ linux-2.6.14/kernel/fork.c
@@ -804,10 +804,9 @@ static inline int copy_signal(unsigned l
 	init_sigpending(&sig->shared_pending);
 	INIT_LIST_HEAD(&sig->posix_timers);
 
-	sig->it_real_value = sig->it_real_incr = 0;
+	ktimer_init(&sig->real_timer);
 	sig->real_timer.function = it_real_fn;
-	sig->real_timer.data = (unsigned long) tsk;
-	init_timer(&sig->real_timer);
+	sig->real_timer.data = tsk;
 
 	sig->it_virt_expires = cputime_zero;
 	sig->it_virt_incr = cputime_zero;
Index: linux-2.6.14/kernel/itimer.c
===================================================================
--- linux-2.6.14.orig/kernel/itimer.c
+++ linux-2.6.14/kernel/itimer.c
@@ -12,36 +12,49 @@
 #include <linux/syscalls.h>
 #include <linux/time.h>
 #include <linux/posix-timers.h>
+#include <linux/ktimer.h>
 
 #include <asm/uaccess.h>
 
-static unsigned long it_real_value(struct signal_struct *sig)
+/**
+ * itimer_get_remtime - get remaining time for the timer
+ *
+ * @timer: the timer to read
+ * @fake:  a pending, but expired timer returns fake (itimers kludge)
+ *
+ * Returns the delta between the expiry time and now, which can be
+ * less than zero or the fake value described above.
+ */
+static ktime_t itimer_get_remtime(struct ktimer *timer, long fake)
 {
-	unsigned long val = 0;
-	if (timer_pending(&sig->real_timer)) {
-		val = sig->real_timer.expires - jiffies;
-
-		/* look out for negative/zero itimer.. */
-		if ((long) val <= 0)
-			val = 1;
-	}
-	return val;
+	ktime_t rem = ktimer_get_remtime(timer);
+
+	/*
+	 * Racy but safe: if the itimer expires after the above
+	 * ktimer_get_remtime() call but before this condition
+	 * then we return KTIMER_ZERO - which is correct.
+	 */
+	if (ktimer_active(timer)) {
+		if (ktime_cmp_val(rem, <=, KTIME_ZERO))
+			rem = ktime_set(0, fake);
+	} else
+		ktime_set_scalar(rem, KTIME_ZERO);
+
+	return rem;
 }
 
 int do_getitimer(int which, struct itimerval *value)
 {
 	struct task_struct *tsk = current;
-	unsigned long interval, val;
+	ktime_t interval, val;
 	cputime_t cinterval, cval;
 
 	switch (which) {
 	case ITIMER_REAL:
-		spin_lock_irq(&tsk->sighand->siglock);
-		interval = tsk->signal->it_real_incr;
-		val = it_real_value(tsk->signal);
-		spin_unlock_irq(&tsk->sighand->siglock);
-		jiffies_to_timeval(val, &value->it_value);
-		jiffies_to_timeval(interval, &value->it_interval);
+		interval = tsk->signal->real_timer.interval;
+		val = itimer_get_remtime(&tsk->signal->real_timer, NSEC_PER_USEC);
+		ktime_to_timeval(&value->it_value, val);
+		ktime_to_timeval(&value->it_interval, interval);
 		break;
 	case ITIMER_VIRTUAL:
 		read_lock(&tasklist_lock);
@@ -113,59 +126,36 @@ asmlinkage long sys_getitimer(int which,
 }
 
 
-void it_real_fn(unsigned long __data)
+/*
+ * The timer is automagically restarted, when interval != 0
+ */
+void it_real_fn(void *data)
 {
-	struct task_struct * p = (struct task_struct *) __data;
-	unsigned long inc = p->signal->it_real_incr;
-
-	send_group_sig_info(SIGALRM, SEND_SIG_PRIV, p);
-
-	/*
-	 * Now restart the timer if necessary.  We don't need any locking
-	 * here because do_setitimer makes sure we have finished running
-	 * before it touches anything.
-	 * Note, we KNOW we are (or should be) at a jiffie edge here so
-	 * we don't need the +1 stuff.  Also, we want to use the prior
-	 * expire value so as to not "slip" a jiffie if we are late.
-	 * Deal with requesting a time prior to "now" here rather than
-	 * in add_timer.
-	 */
-	if (!inc)
-		return;
-	while (time_before_eq(p->signal->real_timer.expires, jiffies))
-		p->signal->real_timer.expires += inc;
-	add_timer(&p->signal->real_timer);
+	send_group_sig_info(SIGALRM, SEND_SIG_PRIV, data);
 }
 
 int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
 {
 	struct task_struct *tsk = current;
- 	unsigned long val, interval, expires;
+	struct ktimer *timer;
+	ktime_t expires;
 	cputime_t cval, cinterval, nval, ninterval;
 
 	switch (which) {
 	case ITIMER_REAL:
-again:
-		spin_lock_irq(&tsk->sighand->siglock);
-		interval = tsk->signal->it_real_incr;
-		val = it_real_value(tsk->signal);
-		/* We are sharing ->siglock with it_real_fn() */
-		if (try_to_del_timer_sync(&tsk->signal->real_timer) < 0) {
-			spin_unlock_irq(&tsk->sighand->siglock);
-			goto again;
-		}
-		tsk->signal->it_real_incr =
-			timeval_to_jiffies(&value->it_interval);
-		expires = timeval_to_jiffies(&value->it_value);
-		if (expires)
-			mod_timer(&tsk->signal->real_timer,
-				  jiffies + 1 + expires);
-		spin_unlock_irq(&tsk->sighand->siglock);
+		timer = &tsk->signal->real_timer;
+		ktimer_cancel(timer);
 		if (ovalue) {
-			jiffies_to_timeval(val, &ovalue->it_value);
-			jiffies_to_timeval(interval,
-					   &ovalue->it_interval);
-		}
+			ktime_to_timeval(&ovalue->it_value,
+				itimer_get_remtime(timer, NSEC_PER_USEC));
+			ktime_to_timeval(&ovalue->it_interval, timer->interval);
+		}
+		timer->interval = ktimer_round_timeval(timer,
+							&value->it_interval);
+		expires = timeval_to_ktime(value->it_value);
+		if (ktime_cmp_val(expires, != , KTIME_ZERO))
+			ktimer_restart(timer, &expires,
+				KTIMER_REL | KTIMER_NOCHECK | KTIMER_ROUND);
 		break;
 	case ITIMER_VIRTUAL:
 		nval = timeval_to_cputime(&value->it_value);
Index: linux-2.6.14/kernel/ktimers.c
===================================================================
--- /dev/null
+++ linux-2.6.14/kernel/ktimers.c
@@ -0,0 +1,1477 @@
+/*
+ *  linux/kernel/ktimers.c
+ *
+ *  Copyright(C) 2005, Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright(C) 2005, Red Hat, Inc., Ingo Molnar
+ *
+ *  High-precision kernel timers
+ *
+ *  In contrast to the low-resolution timeout API implemented in
+ *  kernel/timer.c, ktimers provide finer resolution and accuracy
+ *  depending on system configuration and capabilities.
+ *
+ *  These timers are currently used for:
+ *   - itimers
+ *   - POSIX timers
+ *   - nanosleep
+ *   - precise in-kernel timing
+ *
+ *  Started by: Thomas Gleixner and Ingo Molnar
+ *
+ *  Credits:
+ *	based on kernel/timer.c
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/clockchips.h>
+#include <linux/cpu.h>
+#include <linux/ktimer.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/syscalls.h>
+#include <linux/timeofday.h>
+#include <linux/interrupt.h>
+
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_HIGH_RES_TIMERS
+static int hrtimer_common_reprogram(struct ktimer *timer,
+				    struct ktimer_base *base, ktime_t now);
+#endif
+
+/*
+ * The timer bases:
+ */
+
+#define MAX_KTIMER_BASES 2
+
+static DEFINE_PER_CPU(struct ktimer_base, ktimer_bases[MAX_KTIMER_BASES]) =
+{
+	{
+		.index = CLOCK_REALTIME,
+		.name = "Realtime",
+		.get_time = &ktime_get_real,
+		.resolution = KTIME_REALTIME_RES,
+#ifdef CONFIG_HIGH_RES_TIMERS
+		.reprogram = &hrtimer_common_reprogram,
+		.getoffset = &get_realtime_offset,
+#endif
+	},
+	{
+		.index = CLOCK_MONOTONIC,
+		.name = "Monotonic",
+		.get_time = &ktime_get,
+		.resolution = KTIME_MONOTONIC_RES,
+#ifdef CONFIG_HIGH_RES_TIMERS
+		.reprogram = &hrtimer_common_reprogram,
+#endif
+	},
+};
+
+#ifndef CONFIG_GENERIC_TIME
+
+/**
+ * ktime_get - get the monotonic time in ktime_t format
+ *
+ * returns the time in ktime_t format
+ */
+ktime_t ktime_get(void)
+{
+	struct timespec now;
+
+	ktime_get_ts(&now);
+
+	return timespec_to_ktime(now);
+}
+
+EXPORT_SYMBOL_GPL(ktime_get);
+
+/**
+ * ktime_get_real - get the real (wall-) time in ktime_t format
+ *
+ * returns the time in ktime_t format
+ */
+ktime_t ktime_get_real(void)
+{
+	struct timespec now;
+
+	getnstimeofday(&now);
+
+	return timespec_to_ktime(now);
+}
+
+EXPORT_SYMBOL_GPL(ktime_get_real);
+
+/**
+ * ktime_get_ts - get the monotonic clock in timespec format
+ *
+ * @ts:		pointer to timespec variable
+ *
+ * The function calculates the monotonic clock from the realtime
+ * clock and the wall_to_monotonic offset and stores the result
+ * in normalized timespec format in the variable pointed to by ts.
+ */
+void ktime_get_ts(struct timespec *ts)
+{
+	struct timespec tomono;
+	unsigned long seq;
+
+	do {
+		seq = read_seqbegin(&xtime_lock);
+		getnstimeofday(ts);
+		tomono = wall_to_monotonic;
+
+	} while (read_seqretry(&xtime_lock, seq));
+
+	set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec,
+				ts->tv_nsec + tomono.tv_nsec);
+}
+#endif
+
+/*
+ * Functions and macros which are different for UP/SMP systems are kept in a
+ * single place
+ */
+#ifdef CONFIG_SMP
+
+#define set_curr_timer(b, t)		(b)->curr_timer = (t)
+#define wake_up_timer_waiters(b)	wake_up(&(b)->wait)
+
+/**
+ * wait_for_ktimer - Wait for a running ktimer
+ *
+ * @timer:	timer to wait for
+ *
+ * The function waits in case the timers callback function is
+ * currently executed on the waitqueue of the timer base. The
+ * waitqueue is woken up after the timer callback function has
+ * finished execution.
+ */
+void wait_for_ktimer(struct ktimer *timer)
+{
+	struct ktimer_base *base = timer->base;
+
+	if (base)
+		wait_event(base->wait,
+			   base->curr_timer != timer);
+}
+
+/*
+ * We are using hashed locking: holding per_cpu(ktimer_bases)[n].lock
+ * means that all timers which are tied to this base via timer->base are
+ * locked, and the base itself is locked too.
+ *
+ * So __run_timers/migrate_timers can safely modify all timers which could
+ * be found on the lists/queues.
+ *
+ * When the timer's base is locked, and the timer removed from list, it is
+ * possible to set timer->base = NULL and drop the lock: the timer remains
+ * locked.
+ */
+static struct ktimer_base *lock_ktimer_base(struct ktimer *timer,
+					    unsigned long *flags)
+{
+	struct ktimer_base *base;
+
+	for (;;) {
+		base = timer->base;
+		if (likely(base != NULL)) {
+			spin_lock_irqsave(&base->lock, *flags);
+			if (likely(base == timer->base))
+				return base;
+			/* The timer has migrated to another CPU */
+			spin_unlock_irqrestore(&base->lock, *flags);
+		}
+		cpu_relax();
+	}
+}
+
+/*
+ * Switch the timer base to the current CPU when possible.
+ */
+static inline struct ktimer_base *
+switch_ktimer_base(struct ktimer *timer, struct ktimer_base *base)
+{
+	struct ktimer_base *new_base;
+
+	new_base = &__get_cpu_var(ktimer_bases[base->index]);
+
+	if (base != new_base) {
+		/*
+		 * We are trying to schedule the timer on the local CPU.
+		 * However we can't change timer's base while it is running,
+		 * so we keep it on the same CPU. No hassle vs. reprogramming
+		 * the event source in the high resolution case. The softirq
+		 * code will take care of this when the timer function has
+		 * completed. There is no conflict as we hold the lock until
+		 * the timer is enqueued.
+		 */
+		if (unlikely(base->curr_timer == timer))
+			return base;
+
+		/* See the comment in lock_timer_base() */
+		timer->base = NULL;
+		spin_unlock(&base->lock);
+		spin_lock(&new_base->lock);
+		timer->base = new_base;
+	}
+	return new_base;
+}
+
+/*
+ * Get the timer base unlocked
+ *
+ * Take care of timer->base = NULL in switch_ktimer_base !
+ */
+static inline struct ktimer_base *
+get_ktimer_base_unlocked(struct ktimer *timer)
+{
+	struct ktimer_base *base;
+
+	while (!(base = timer->base))
+		cpu_relax();
+
+	return base;
+}
+
+#else /* CONFIG_SMP */
+
+#define set_curr_timer(b, t)		do { } while (0)
+#define wake_up_timer_waiters(b)	do { } while (0)
+
+static inline struct ktimer_base *
+lock_ktimer_base(struct ktimer *timer, unsigned long *flags)
+{
+	struct ktimer_base *base = timer->base;
+
+	spin_lock_irqsave(&base->lock, *flags);
+
+	return base;
+}
+
+#define switch_ktimer_base(t, b)	(b)
+#define get_ktimer_base_unlocked(t)	(t)->base
+
+#endif	/* !CONFIG_SMP */
+
+/* High resolution timer related functions */
+#ifdef CONFIG_HIGH_RES_TIMERS
+
+#define ktimer_hres_active (__get_cpu_var(ktimer_hres).active)
+
+struct ktimer_hres {
+	ktime_t		expires_next;
+	ktime_t		next_tick;
+	ktime_t		tick_incr;
+	int		active;
+	int		dotick;
+	unsigned long	check_clocks;
+};
+
+DEFINE_PER_CPU(struct ktimer_hres, ktimer_hres);
+
+/*
+ * Shared reprogramming for clock_realtime and clock_monotonic
+ *
+ * When a new expires first timer is enqueued, we have
+ * to check, whether it expires earlier than the timer
+ * for which the hrt time source was armed.
+ *
+ * Called with interrupts disabled and base lock held
+ */
+static int hrtimer_common_reprogram(struct ktimer *timer,
+				    struct ktimer_base *base, ktime_t now)
+{
+	ktime_t *expires_next = &__get_cpu_var(ktimer_hres).expires_next;
+	ktime_t expires = timer->expires;
+	int res;
+
+	if (base->getoffset)
+		expires = ktime_sub(expires, base->getoffset());
+
+	if (ktime_cmp(expires, >= ,*expires_next))
+		return 0;
+
+	res = clockevents_set_next_event(expires, now);
+	if (!res)
+		*expires_next = expires;
+	return res;
+}
+
+/*
+ * High resolution timer interrupt
+ * Called with interrupts disabled
+ */
+int ktimer_interrupt(void)
+{
+	struct ktimer_base *base;
+	ktime_t expires_next, now;
+	int i, raise = 0, ret = 0;
+	int cpu = smp_processor_id();
+	struct ktimer_hres *hres = &per_cpu(ktimer_hres, cpu);
+
+	/* As long as we did not switch over to high resolution mode
+	 * we expect, that the event source is running in periodic
+	 * mode when it is a source serving other (tick based)
+	 * functionality than next event
+	 *
+	 */
+	if (!hres->active)
+		return 1;
+
+	now = ktime_get();
+
+	if (hres->dotick) {
+		while (ktime_cmp(now, >= , hres->next_tick)) {
+			hres->next_tick = ktime_add(hres->next_tick,
+							hres->tick_incr);
+			ret++;
+		}
+		expires_next = hres->next_tick;
+	} else
+		ktime_set_scalar(expires_next, KTIME_MAX);
+
+	base = per_cpu(ktimer_bases, cpu);
+
+	for (i = 0; i < MAX_KTIMER_BASES; i++) {
+		ktime_t basenow;
+		DEFINE_KTIME(offset);
+
+		spin_lock(&base->lock);
+
+ 		if (list_empty(&base->pending)) {
+ 			spin_unlock(&base->lock);
+ 			base++;
+ 			continue;
+ 		}
+
+		if (base->getoffset) {
+			offset = base->getoffset();
+			basenow = ktime_add(now, offset);
+		} else {
+			basenow = now;
+		}
+
+		while (!list_empty(&base->pending)) {
+			struct ktimer *timer = list_entry(base->pending.next,
+							  struct ktimer, list);
+
+			if (ktime_cmp(basenow, < , timer->expires)) {
+				ktime_t expires;
+
+				expires = ktime_sub(timer->expires, offset);
+				if (ktime_cmp(expires, < , expires_next))
+					expires_next = expires;
+				break;
+			}
+			timer->expired = basenow;
+			list_del(&timer->list);
+			list_add_tail(&timer->list, &base->expired);
+			timer->state = KTIMER_EXPIRED;
+			raise = 1;
+		}
+		spin_unlock(&base->lock);
+		base++;
+	}
+
+	hres->expires_next = expires_next;
+
+	/* Reprogramming necessary ? */
+	if (ktime_cmp_val(expires_next, !=, KTIME_MAX))
+		clockevents_set_next_event(expires_next, now);
+
+	/* Raise softirq ? */
+	if (raise)
+		raise_softirq(KTIMER_SOFTIRQ);
+
+	return ret;
+}
+
+/*
+ * Retrigger next event is called after clock was set
+ */
+void retrigger_next_event(void *arg)
+{
+	ktime_t expires_next, now;
+	int i, cpu = smp_processor_id();
+	struct ktimer_base *base = per_cpu(ktimer_bases, cpu);
+	struct ktimer_hres *hres = &per_cpu(ktimer_hres, cpu);
+
+	now = ktime_get();
+
+	if (hres->dotick)
+		expires_next = hres->next_tick;
+	else
+		ktime_set_scalar(expires_next, KTIME_MAX);
+
+	for (i = 0; i < MAX_KTIMER_BASES; i++) {
+		ktime_t basenow;
+		DEFINE_KTIME(offset);
+		struct ktimer *timer;
+
+		spin_lock(&base->lock);
+
+ 		if (list_empty(&base->pending)) {
+ 			spin_unlock(&base->lock);
+ 			base++;
+ 			continue;
+ 		}
+
+		if (base->getoffset) {
+			offset = base->getoffset();
+			basenow = ktime_add(now, offset);
+		} else {
+			basenow = now;
+		}
+		timer = list_entry(base->pending.next, struct ktimer, list);
+
+		if (ktime_cmp(basenow, < , timer->expires)) {
+			ktime_t expires;
+
+			expires = ktime_sub(timer->expires, offset);
+			if (ktime_cmp(expires, < , expires_next))
+				expires_next = expires;
+		}
+		spin_unlock(&base->lock);
+		base++;
+	}
+
+	hres->expires_next = expires_next;
+
+	/* Reprogramming necessary ? */
+	if (ktime_cmp_val(expires_next, !=, KTIME_MAX))
+		clockevents_set_next_event(expires_next, now);
+}
+
+/*
+ * Clock realtime was set
+ *
+ * Change the offset of the realtime clock vs. the monotonic
+ * clock. Called with xtime lock held !
+ *
+ * We might have to reprogram the high resolution timer interrupt. On
+ * SMP we call the architecture specific code to retrigger _all_ high
+ * resolution timer interrupts. On UP we just disable interrupts and
+ * call the high resolution interrupt code.
+ */
+void clock_was_set(void)
+{
+	preempt_disable();
+	local_irq_disable();
+
+	if (ktimer_hres_active) {
+		retrigger_next_event(NULL);
+		local_irq_enable();
+
+		if (smp_call_function(retrigger_next_event, NULL, 1, 1))
+			BUG();
+	} else
+		local_irq_enable();
+	preempt_enable();
+}
+
+/***
+ * ktimer_clock_notify - A clock source or a clock event has been installed
+ *
+ * Notify the per cpu softirqs to recheck the clock sources and events
+ */
+void ktimer_clock_notify(void)
+{
+	int i;
+
+	for (i = 0; i < NR_CPUS; i++)
+		set_bit(0, &per_cpu(ktimer_hres, i).check_clocks);
+}
+
+/*
+ * A change in the clock source or clock events was detected.
+ * Check the clock source and the events, whether we can switch to
+ * high resolution mode or not.
+ *
+ * TODO: Handle the removal of clock sources / events
+ */
+static void ktimer_check_clocks(void)
+{
+	struct ktimer_hres *hres = &__get_cpu_var(ktimer_hres);
+	unsigned long flags;
+	int dotick;
+
+	if (!test_and_clear_bit(0, &hres->check_clocks))
+		return;
+
+	if (!timeofday_is_continuous())
+		return;
+
+	if (!(dotick = clockevents_next_event_available()))
+		return;
+
+	local_irq_save(flags);
+	clockevents_init_next_event();
+	hres->active = 1;
+	if (dotick == CLOCK_EVT_SCHEDTICK) {
+		struct ktimer helper;
+		struct timespec tsnow;
+		ktime_t now;
+
+		/* Adjust to resolution */
+		ktimer_init(&helper);
+		ktime_get_ts(&tsnow);
+		now = ktimer_round_timespec(&helper, &tsnow);
+		hres->tick_incr = ktime_set(0, NSEC_PER_SEC/HZ);
+		hres->next_tick = ktime_add(now, hres->tick_incr);
+		hres->dotick = 1;
+	} else
+		hres->dotick = 0;
+
+	/* "Retrigger" the interrupt to get things going */
+	retrigger_next_event(NULL);
+	local_irq_restore(flags);
+	printk(KERN_INFO "Ktimers: Switched to high resolution mode CPU %d\n",
+	       smp_processor_id());
+}
+
+/*
+ * For HRT we move expired timers directly to the expired list and set
+ * the status to KTIMER_EXPIRED_NOQUEUE
+ */
+static inline int hres_enqueue_expired(struct ktimer *timer,
+				       struct ktimer_base *base, ktime_t now)
+{
+	timer->expired = now;
+	list_add_tail(&timer->list, &base->expired);
+	timer->state = KTIMER_EXPIRED_NOQUEUE;
+	base->count++;
+	raise_softirq(KTIMER_SOFTIRQ);
+	return 1;
+}
+
+static inline void
+hres_requeue_expired(struct ktimer *timer, struct ktimer_base *base)
+{
+	timer->expired = timer->expires;
+	list_del(&timer->list);
+	list_add_tail(&timer->list, &base->expired);
+	timer->state = KTIMER_EXPIRED;
+	raise_softirq(KTIMER_SOFTIRQ);
+}
+
+#else
+# define ktimer_hres_active		0
+# define hres_enqueue_expired(t,b,n)	0
+# define ktimer_check_clocks()		do { } while (0)
+#endif
+
+/*
+ * Functions for the union type storage format of ktime_t which are
+ * too large for inlining:
+ */
+#if (BITS_PER_LONG < 64)
+
+#ifndef CONFIG_KTIME_SCALAR
+/**
+ * ktime_add_ns - Add a scalar nanoseconds value to a ktime_t variable
+ *
+ * @kt:		addend
+ * @nsec:	the scalar nsec value to add
+ *
+ * Returns the sum of kt and nsec in ktime_t format
+ */
+ktime_t ktime_add_ns(ktime_t kt, u64 nsec)
+{
+	ktime_t tmp;
+
+	if (likely(nsec < NSEC_PER_SEC)) {
+		tmp.tv64 = nsec;
+	} else {
+		unsigned long rem = do_div(nsec, NSEC_PER_SEC);
+
+		tmp = ktime_set((long)nsec, rem);
+	}
+
+	return ktime_add(kt, tmp);
+}
+
+#else
+
+/**
+ * ktime_modulo - Calc ktime_t modulo div
+ *
+ * @kt:		dividend
+ * @div.	divisor
+ *
+ * Return ktime_t modulo div.
+ *
+ * div is less than NSEC_PER_SEC and (NSEC_PER_SEC % div) = 0 !
+ */
+static unsigned long ktime_modulo(ktime_t kt, unsigned long div)
+{
+	return do_div(kt, div);
+}
+
+#endif
+#endif
+
+/*
+ * Counterpart to lock_timer_base above.
+ */
+static inline
+void unlock_ktimer_base(struct ktimer *timer, unsigned long *flags)
+{
+	spin_unlock_irqrestore(&timer->base->lock, *flags);
+}
+
+/**
+ * ktimer_round_timespec - convert timespec to ktime_t with resolution
+ *			     adjustment
+ *
+ * @timer:	ktimer to retrieve the base
+ * @ts:		pointer to the timespec value to be converted
+ *
+ * Returns the resolution adjusted ktime_t representation of the
+ * timespec.
+ *
+ * Note: We can access base without locking here, as ktimers can
+ * migrate between CPUs but can not be moved from one clock source to
+ * another. The clock source binding is set at init_ktimer_XXX time.
+ */
+ktime_t ktimer_round_timespec(struct ktimer *timer, struct timespec *ts)
+{
+	struct ktimer_base *base = get_ktimer_base_unlocked(timer);
+	long rem = ts->tv_nsec % base->resolution;
+	ktime_t t;
+
+	t = ktime_set(ts->tv_sec, ts->tv_nsec);
+
+	/* Check, if the value has to be rounded */
+	if (rem)
+		t = ktime_add_ns(t, base->resolution - rem);
+
+	return t;
+}
+
+/**
+ * ktimer_round_timeval - convert timeval to ktime_t with resolution
+ *			    adjustment
+ *
+ * @timer:	ktimer to retrieve the base
+ * @tv:		pointer to the timeval value to be converted
+ *
+ * Returns the resolution adjusted ktime_t representation of the
+ * timeval.
+ */
+ktime_t ktimer_round_timeval(struct ktimer *timer, struct timeval *tv)
+{
+	struct timespec ts;
+
+	ts.tv_sec = tv->tv_sec;
+	ts.tv_nsec = tv->tv_usec * NSEC_PER_USEC;
+
+	return ktimer_round_timespec(timer, &ts);
+}
+
+/*
+ * enqueue_ktimer - internal function to (re)start a timer
+ *
+ * The timer is inserted in expiry order. Insertion into the
+ * red black tree is O(log(n)). Must hold the base lock.
+ */
+static int enqueue_ktimer(struct ktimer *timer, struct ktimer_base *base,
+			  ktime_t *tim, int mode)
+{
+	struct rb_node **link = &base->active.rb_node;
+	struct list_head *prev = &base->pending;
+	struct rb_node *parent = NULL;
+	struct ktimer *entry;
+	ktime_t now;
+
+	/* Get current time */
+	now = base->get_time();
+
+	/*
+	 * Calculate the absolute expiry time based on the
+	 * timer expiry mode:
+	 */
+	switch (mode & ~(KTIMER_NOCHECK | KTIMER_ROUND)) {
+
+	case KTIMER_ABS:
+		timer->expires = *tim;
+		break;
+
+	case KTIMER_REL:
+		timer->expires = ktime_add(now, *tim);
+		break;
+
+	case KTIMER_INCR:
+		timer->expires = ktime_add(timer->expires, *tim);
+		break;
+
+	case KTIMER_FORWARD:
+		while ktime_cmp(timer->expires, <= , now) {
+			timer->expires = ktime_add(timer->expires, *tim);
+			timer->overrun++;
+		}
+		goto nocheck;
+
+	case KTIMER_REARM:
+		while ktime_cmp(timer->expires, <= , now) {
+			timer->expires = ktime_add(timer->expires,
+						   timer->interval);
+			timer->overrun++;
+		}
+		goto nocheck;
+
+	case KTIMER_RESTART:
+		break;
+
+	default:
+		/* illegal mode */
+		BUG();
+	}
+
+	/*
+	 * Rounding is requested for one shot timers and the first
+	 * event of interval timers. It's done here, so we don't
+	 * have to read the current time twice for relative timers.
+	 */
+	if (mode & KTIMER_ROUND) {
+		unsigned long rem;
+
+		rem = ktime_modulo(timer->expires, base->resolution);
+		if (rem)
+			timer->expires = ktime_add_ns(timer->expires,
+						      base->resolution - rem);
+	}
+
+	/* Expiry time in the past: */
+	if (unlikely(ktime_cmp(timer->expires, <=, now))) {
+		timer->expired = now;
+		/* The caller takes care of expiry */
+		if (!(mode & KTIMER_NOCHECK))
+			return -1;
+		if (hres_enqueue_expired(timer, base, now))
+			return 0;
+	}
+ nocheck:
+
+	/*
+	 * Find the right place in the rbtree:
+	 */
+	while (*link) {
+		parent = *link;
+		entry = rb_entry(parent, struct ktimer, node);
+		/*
+		 * We dont care about collisions. Nodes with
+		 * the same expiry time stay together.
+		 */
+		if (ktime_cmp(timer->expires, <, entry->expires))
+			link = &(*link)->rb_left;
+		else {
+			link = &(*link)->rb_right;
+			prev = &entry->list;
+		}
+	}
+
+	/*
+	 * Insert the timer to the rbtree and to the sorted list:
+	 */
+	rb_link_node(&timer->node, parent, link);
+	rb_insert_color(&timer->node, &base->active);
+	if (ktimer_hres_active && prev != &base->pending) {
+		entry = list_entry(prev, struct ktimer, list);
+		if (entry->state != KTIMER_PENDING)
+			prev = &base->pending;
+	}
+	list_add(&timer->list, prev);
+
+	timer->state = KTIMER_PENDING;
+	base->count++;
+
+#ifdef CONFIG_HIGH_RES_TIMERS
+	if (ktimer_hres_active &&
+		base->pending.next == &timer->list &&
+			base->reprogram &&
+				base->reprogram(timer, base, now))
+		hres_requeue_expired(timer, base);
+#endif
+	return 0;
+}
+
+/*
+ * __remove_ktimer - internal function to remove a timer
+ *
+ * The function also allows automatic rearming for interval timers.
+ * Must hold the base lock.
+ */
+static void
+__remove_ktimer(struct ktimer *timer, struct ktimer_base *base,
+		enum ktimer_rearm rearm)
+{
+	/*
+	 * Remove the timer from the sorted list and from the rbtree:
+	 */
+	list_del(&timer->list);
+	if (timer->state != KTIMER_EXPIRED_NOQUEUE)
+		rb_erase(&timer->node, &base->active);
+	timer->node.rb_parent = KTIMER_POISON;
+
+	timer->state = KTIMER_INACTIVE;
+	base->count--;
+	BUG_ON(base->count < 0);
+
+	/* Auto rearm the timer ? */
+	if (rearm && ktime_cmp_val(timer->interval, !=, KTIME_ZERO))
+		enqueue_ktimer(timer, base, NULL, KTIMER_REARM);
+}
+
+/*
+ * remove ktimer, called with base lock held
+ */
+static inline int remove_ktimer(struct ktimer *timer, struct ktimer_base *base)
+{
+	if (ktimer_active(timer)) {
+		__remove_ktimer(timer, base, KTIMER_NOREARM);
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * Internal function to (re)start a timer.
+ */
+static int
+internal_restart_ktimer(struct ktimer *timer, ktime_t *tim, int mode)
+{
+	struct ktimer_base *base, *new_base;
+	unsigned long flags;
+	int ret;
+
+	BUG_ON(!timer->function);
+
+	base = lock_ktimer_base(timer, &flags);
+
+	/* Remove an active timer from the queue */
+	ret = remove_ktimer(timer, base);
+
+	/* Switch the timer base, if necessary */
+	new_base = switch_ktimer_base(timer, base);
+
+	/*
+	 * When the new timer setting is already expired,
+	 * let the calling code deal with it.
+	 */
+	if (enqueue_ktimer(timer, new_base, tim, mode))
+		ret = -1;
+
+	unlock_ktimer_base(timer, &flags);
+
+	return ret;
+}
+
+/**
+ * ktimer_start - start a timer on the current CPU
+ *
+ * @timer:	the timer to be added
+ * @tim:	expiry time (optional, if not set in the timer)
+ * @mode:	timer setup mode
+ *
+ * Returns:
+ *  0 on success
+ * -1 when the new time setting is already expired
+ */
+int ktimer_start(struct ktimer *timer, ktime_t *tim, int mode)
+{
+	BUG_ON(ktimer_active(timer));
+
+	return internal_restart_ktimer(timer, tim, mode);
+}
+
+EXPORT_SYMBOL_GPL(ktimer_start);
+
+/**
+ * ktimer_restart - modify a running timer
+ *
+ * @timer:	the timer to be modified
+ * @tim:	expiry time (required)
+ * @mode:	timer setup mode
+ *
+ * Returns:
+ *  0 when the timer was not active
+ *  1 when the timer was active
+ * -1 when the new time setting is already expired
+ */
+int ktimer_restart(struct ktimer *timer, ktime_t *tim, int mode)
+{
+	BUG_ON(!tim);
+
+	return internal_restart_ktimer(timer, tim, mode);
+}
+
+EXPORT_SYMBOL_GPL(ktimer_restart);
+
+/**
+ * ktimer_try_to_cancel - try to deactivate a timer
+ *
+ * @timer:	ktimer to stop
+ *
+ * Returns:
+ *  0 when the timer was not active
+ *  1 when the timer was active
+ * -1 when the timer is currently excuting the callback function and
+ *    can not be stopped
+ */
+int ktimer_try_to_cancel(struct ktimer *timer)
+{
+	struct ktimer_base *base;
+	unsigned long flags;
+	int ret = -1;
+
+	base = lock_ktimer_base(timer, &flags);
+
+	if (base->curr_timer != timer) {
+		ret = remove_ktimer(timer, base);
+		if (ret)
+			timer->expired = base->get_time();
+	}
+
+	unlock_ktimer_base(timer, &flags);
+
+	return ret;
+
+}
+
+EXPORT_SYMBOL_GPL(ktimer_try_to_cancel);
+
+/**
+ * ktimer_cancel - cancel a timer and wait for the handler to finish.
+ *
+ * @timer:	the timer to be cancelled
+ *
+ * Returns:
+ *  0 when the timer was not active
+ *  1 when the timer was active
+ */
+int ktimer_cancel(struct ktimer *timer)
+{
+	for (;;) {
+		int ret = ktimer_try_to_cancel(timer);
+
+		if (ret >= 0)
+			return ret;
+		wait_for_ktimer(timer);
+	}
+}
+
+EXPORT_SYMBOL_GPL(ktimer_cancel);
+
+/**
+ * ktimer_get_remtime - get remaining time for the timer
+ *
+ * @timer:	the timer to read
+ *
+ * Returns the delta between the expiry time and now, which can be
+ * less than zero.
+ */
+ktime_t ktimer_get_remtime(struct ktimer *timer)
+{
+	struct ktimer_base *base;
+	unsigned long flags;
+	ktime_t rem;
+
+	base = lock_ktimer_base(timer, &flags);
+	rem = ktime_sub(timer->expires, base->get_time());
+	unlock_ktimer_base(timer, &flags);
+
+	return rem;
+}
+
+/**
+ * ktimer_get_expiry - get expiry time for the timer
+ *
+ * @timer:	the timer to read
+ * @now:	if != NULL then store current base->time into it
+ */
+ktime_t ktimer_get_expiry(struct ktimer *timer, ktime_t *now)
+{
+	struct ktimer_base *base;
+	unsigned long flags;
+	ktime_t expiry;
+
+	base = lock_ktimer_base(timer, &flags);
+	expiry = timer->expires;
+	if (now)
+		*now = base->get_time();
+	unlock_ktimer_base(timer, &flags);
+
+	return expiry;
+}
+
+/*
+ * Functions related to clock sources
+ */
+
+static inline void ktimer_common_init(struct ktimer *timer)
+{
+	memset(timer, 0, sizeof(struct ktimer));
+	timer->node.rb_parent = KTIMER_POISON;
+}
+
+/**
+ * ktimer_init - initialize a timer to the monotonic clock
+ *
+ * @timer:	the timer to be initialized
+ */
+void ktimer_init(struct ktimer *timer)
+{
+	struct ktimer_base *bases;
+
+	ktimer_common_init(timer);
+	bases = per_cpu(ktimer_bases, raw_smp_processor_id());
+	timer->base = &bases[CLOCK_MONOTONIC];
+}
+
+EXPORT_SYMBOL_GPL(ktimer_init);
+
+/**
+ * ktimer_init_real - initialize a timer to the real (wall-) clock
+ *
+ * @timer:	the timer to be initialized
+ */
+void ktimer_init_real(struct ktimer *timer)
+{
+	struct ktimer_base *bases;
+
+	ktimer_common_init(timer);
+	bases = per_cpu(ktimer_bases, raw_smp_processor_id());
+	timer->base = &bases[CLOCK_REALTIME];
+}
+
+EXPORT_SYMBOL_GPL(ktimer_init_real);
+
+/**
+ * ktimer_get_res - get the monotonic timer resolution
+ *
+ * @which_clock: unused parameter for compability with the posix timer code
+ * @tp:		 pointer to timespec variable to store the resolution
+ *
+ * Store the resolution of clock monotonic in the variable pointed to
+ * by tp.
+ */
+int ktimer_get_res(clockid_t which_clock, struct timespec *tp)
+{
+	struct ktimer_base *bases;
+
+	tp->tv_sec = 0;
+	bases = per_cpu(ktimer_bases, raw_smp_processor_id());
+	tp->tv_nsec = bases[CLOCK_MONOTONIC].resolution;
+
+	return 0;
+}
+
+/**
+ * ktimer_get_res_real - get the real timer resolution
+ *
+ * @which_clock: unused parameter for compability with the posix timer code
+ * @tp:		 pointer to timespec variable to store the resolution
+ *
+ * Store the resolution of clock realtime in the variable pointed to
+ * by tp.
+ */
+int ktimer_get_res_real(clockid_t which_clock, struct timespec *tp)
+{
+	struct ktimer_base *bases;
+
+	tp->tv_sec = 0;
+	bases = per_cpu(ktimer_bases, raw_smp_processor_id());
+	tp->tv_nsec = bases[CLOCK_REALTIME].resolution;
+
+	return 0;
+}
+
+#ifdef CONFIG_HIGH_RES_TIMERS
+/*
+ * Expire the per base ktimer-queue in high resolution mode:
+ */
+static inline void run_ktimer_hres_queue(struct ktimer_base *base)
+{
+	spin_lock_irq(&base->lock);
+
+	while (!list_empty(&base->expired)) {
+		struct ktimer *timer;
+		void (*fn)(void *);
+		void *data;
+
+		timer = list_entry(base->expired.next, struct ktimer, list);
+		fn = timer->function;
+		data = timer->data;
+		__remove_ktimer(timer, base, KTIMER_REARM);
+		set_curr_timer(base, timer);
+		spin_unlock_irq(&base->lock);
+
+ 		fn(data);
+
+		spin_lock_irq(&base->lock);
+		set_curr_timer(base, NULL);
+	}
+	spin_unlock_irq(&base->lock);
+
+	wake_up_timer_waiters(base);
+}
+
+static void run_ktimer_softirq(struct softirq_action *h)
+{
+	struct ktimer_base *base = per_cpu(ktimer_bases, smp_processor_id());
+	int i;
+
+	for (i = 0; i < MAX_KTIMER_BASES; i++)
+		run_ktimer_hres_queue(&base[i]);
+}
+
+#endif	/* CONFIG_HIGH_RES_TIMERS */
+
+/*
+ * Expire the per base ktimer-queue. Used for non HRT mode and
+ * as a fallback when HRT init failed:
+ */
+static inline void run_ktimer_queue(struct ktimer_base *base)
+{
+	ktime_t now = base->get_time();
+
+	spin_lock_irq(&base->lock);
+
+	while (!list_empty(&base->pending)) {
+		struct ktimer *timer;
+		void (*fn)(void *);
+		void *data;
+
+		timer = list_entry(base->pending.next, struct ktimer, list);
+		if (ktime_cmp(now, <=, timer->expires))
+			break;
+
+		timer->expired = now;
+		fn = timer->function;
+		data = timer->data;
+		set_curr_timer(base, timer);
+		__remove_ktimer(timer, base, KTIMER_REARM);
+		spin_unlock_irq(&base->lock);
+
+		fn(data);
+
+		spin_lock_irq(&base->lock);
+		set_curr_timer(base, NULL);
+	}
+	spin_unlock_irq(&base->lock);
+
+	wake_up_timer_waiters(base);
+}
+
+/*
+ * Called from timer softirq every jiffy, to expire ktimers.
+ *
+ * For HRT its the fall back code to run the softirq in the timer
+ * softirq context in case the hrtimer initialization failed or has
+ * not been done yet.
+ */
+void ktimer_run_queues(void)
+{
+	struct ktimer_base *base = __get_cpu_var(ktimer_bases);
+	int i;
+
+	ktimer_check_clocks();
+
+	if (ktimer_hres_active)
+		return;
+
+	for (i = 0; i < MAX_KTIMER_BASES; i++)
+		run_ktimer_queue(&base[i]);
+}
+
+/*
+ * Sleep related functions:
+ */
+
+/*
+ * Process-wakeup callback:
+ */
+static void ktimer_wake_up(void *data)
+{
+	wake_up_process(data);
+}
+
+/**
+ * schedule_ktimer - sleep until timeout
+ *
+ * @timer:	ktimer variable initialized with the correct clock base
+ * @t:		timeout value
+ * @mode:	timeout value is abs/rel
+ *
+ * Make the current task sleep until @timeout is
+ * elapsed.
+ *
+ * You can set the task state as follows -
+ *
+ * %TASK_UNINTERRUPTIBLE - at least @timeout is guaranteed to
+ * pass before the routine returns. The routine will return 0
+ *
+ * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
+ * delivered to the current task. In this case the remaining time
+ * will be returned
+ *
+ * The current task state is guaranteed to be TASK_RUNNING when this
+ * routine returns.
+ */
+static ktime_t __sched
+schedule_ktimer(struct ktimer *timer, ktime_t *t, int mode)
+{
+	timer->data = current;
+	timer->function = ktimer_wake_up;
+
+	if (unlikely(ktimer_start(timer, t, mode) < 0)) {
+		__set_current_state(TASK_RUNNING);
+	} else {
+		if (current->state != TASK_RUNNING)
+			schedule();
+		ktimer_cancel(timer);
+	}
+
+	/* Store the absolute expiry time */
+	*t = timer->expires;
+
+	/* Return the remaining time */
+	return ktime_sub(timer->expires, timer->expired);
+}
+
+static ktime_t __sched
+schedule_ktimer_interruptible(struct ktimer *timer, ktime_t *t, int mode)
+{
+	set_current_state(TASK_INTERRUPTIBLE);
+
+	return schedule_ktimer(timer, t, mode);
+}
+
+static long __sched
+nanosleep_restart(struct ktimer *timer, struct restart_block *restart)
+{
+	void *rfn_save = restart->fn;
+	struct timespec __user *rmtp;
+	struct timespec tu;
+	ktime_t t, rem;
+
+	restart->fn = do_no_restart_syscall;
+
+	t = ktime_set_low_high(restart->arg0, restart->arg1);
+
+	rem = schedule_ktimer_interruptible(timer, &t, KTIMER_ABS);
+
+	if (ktime_cmp_val(rem, <=, KTIME_ZERO))
+		return 0;
+
+	rmtp = (struct timespec __user *) restart->arg2;
+	ktime_to_timespec(&tu, rem);
+	if (rmtp && copy_to_user(rmtp, &tu, sizeof(tu)))
+		return -EFAULT;
+
+	restart->fn = rfn_save;
+
+	/* The other values in restart are already filled in */
+	return -ERESTART_RESTARTBLOCK;
+}
+
+static long __sched nanosleep_restart_mono(struct restart_block *restart)
+{
+	struct ktimer timer;
+
+	ktimer_init(&timer);
+
+	return nanosleep_restart(&timer, restart);
+}
+
+static long __sched nanosleep_restart_real(struct restart_block *restart)
+{
+	struct ktimer timer;
+
+	ktimer_init_real(&timer);
+
+	return nanosleep_restart(&timer, restart);
+}
+
+static long __ktimer_nanosleep(struct ktimer *timer, struct timespec *rqtp,
+			     struct timespec __user *rmtp, int mode,
+			     long (*rfn)(struct restart_block *))
+{
+	struct timespec tu;
+	ktime_t rem, t;
+	struct restart_block *restart;
+
+	t = timespec_to_ktime(*rqtp);
+
+	/* t is updated to absolute expiry time ! */
+	rem = schedule_ktimer_interruptible(timer, &t, mode | KTIMER_ROUND);
+
+	if (ktime_cmp_val(rem, <=, KTIME_ZERO))
+		return 0;
+
+	ktime_to_timespec(&tu, rem);
+
+	if (rmtp && copy_to_user(rmtp, &tu, sizeof(tu)))
+		return -EFAULT;
+
+	restart = &current_thread_info()->restart_block;
+	restart->fn = rfn;
+	restart->arg0 = ktime_get_low(t);
+	restart->arg1 = ktime_get_high(t);
+	restart->arg2 = (unsigned long) rmtp;
+
+	return -ERESTART_RESTARTBLOCK;
+}
+
+long ktimer_nanosleep(struct timespec *rqtp,
+			   struct timespec __user *rmtp, int mode)
+{
+	struct ktimer timer;
+
+	ktimer_init(&timer);
+
+	return __ktimer_nanosleep(&timer, rqtp, rmtp, mode,
+				nanosleep_restart_mono);
+}
+
+long ktimer_nanosleep_real(struct timespec *rqtp,
+			   struct timespec __user *rmtp, int mode)
+{
+	struct ktimer timer;
+
+	ktimer_init_real(&timer);
+	return __ktimer_nanosleep(&timer, rqtp, rmtp, mode,
+				nanosleep_restart_real);
+}
+
+asmlinkage long
+sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp)
+{
+	struct timespec tu;
+
+	if (copy_from_user(&tu, rqtp, sizeof(tu)))
+		return -EFAULT;
+
+	if (!timespec_valid(&tu))
+		return -EINVAL;
+
+	return ktimer_nanosleep(&tu, rmtp, KTIMER_REL);
+}
+
+/*
+ * Functions related to boot-time initialization:
+ */
+static void __devinit init_ktimers_cpu(int cpu)
+{
+	struct ktimer_base *base = per_cpu(ktimer_bases, cpu);
+	int i;
+
+	for (i = 0; i < MAX_KTIMER_BASES; i++) {
+		spin_lock_init(&base->lock);
+		INIT_LIST_HEAD(&base->pending);
+#ifdef CONFIG_HIGH_RES_TIMERS
+		INIT_LIST_HEAD(&base->expired);
+#endif
+		init_waitqueue_head(&base->wait);
+		base++;
+	}
+#ifdef CONFIG_HIGH_RES_TIMERS
+	{
+		ktime_t max;
+
+		ktime_set_scalar(max, KTIME_MAX);
+		per_cpu(ktimer_hres, cpu).expires_next = max;
+		set_bit(0, &per_cpu(ktimer_hres, cpu).check_clocks);
+		per_cpu(ktimer_hres, cpu).active = 0;
+	}
+#endif
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+static void migrate_ktimer_list(struct ktimer_base *old_base,
+				struct ktimer_base *new_base)
+{
+	struct ktimer *timer;
+	struct rb_node *node;
+
+	while ((node = rb_first(&old_base->active))) {
+		timer = rb_entry(node, struct ktimer, node);
+		remove_ktimer(timer, old_base);
+		timer->base = new_base;
+		enqueue_ktimer(timer, new_base, NULL, KTIMER_RESTART);
+	}
+}
+
+static void migrate_ktimers(int cpu)
+{
+	struct ktimer_base *old_base, *new_base;
+	int i;
+
+	BUG_ON(cpu_online(cpu));
+	old_base = per_cpu(ktimer_bases, cpu);
+	new_base = get_cpu_var(ktimer_bases);
+
+	local_irq_disable();
+
+	for (i = 0; i < MAX_KTIMER_BASES; i++) {
+
+		spin_lock(&new_base->lock);
+		spin_lock(&old_base->lock);
+
+		BUG_ON(old_base->curr_timer);
+
+		migrate_ktimer_list(old_base, new_base);
+
+		spin_unlock(&old_base->lock);
+		spin_unlock(&new_base->lock);
+		old_base++;
+		new_base++;
+	}
+
+	local_irq_enable();
+	put_cpu_var(ktimer_bases);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static int __devinit ktimer_cpu_notify(struct notifier_block *self,
+				       unsigned long action, void *hcpu)
+{
+	long cpu = (long)hcpu;
+
+	switch(action) {
+
+	case CPU_UP_PREPARE:
+		init_ktimers_cpu(cpu);
+		break;
+
+#ifdef CONFIG_HOTPLUG_CPU
+	case CPU_DEAD:
+		migrate_ktimers(cpu);
+		break;
+#endif
+
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __devinitdata ktimers_nb = {
+	.notifier_call	= ktimer_cpu_notify,
+};
+
+void __init ktimers_init(void)
+{
+	ktimer_cpu_notify(&ktimers_nb, (unsigned long)CPU_UP_PREPARE,
+			  (void *)(long)smp_processor_id());
+	register_cpu_notifier(&ktimers_nb);
+#ifdef CONFIG_HIGH_RES_TIMERS
+	open_softirq(KTIMER_SOFTIRQ, run_ktimer_softirq, NULL);
+#endif
+}
+
Index: linux-2.6.14/kernel/posix-cpu-timers.c
===================================================================
--- linux-2.6.14.orig/kernel/posix-cpu-timers.c
+++ linux-2.6.14/kernel/posix-cpu-timers.c
@@ -1407,7 +1407,7 @@ void set_process_cpu_timer(struct task_s
 static long posix_cpu_clock_nanosleep_restart(struct restart_block *);
 
 int posix_cpu_nsleep(clockid_t which_clock, int flags,
-		     struct timespec *rqtp)
+		     struct timespec *rqtp, struct timespec __user *rmtp)
 {
 	struct restart_block *restart_block =
 	    &current_thread_info()->restart_block;
@@ -1432,7 +1432,6 @@ int posix_cpu_nsleep(clockid_t which_clo
 	error = posix_cpu_timer_create(&timer);
 	timer.it_process = current;
 	if (!error) {
-		struct timespec __user *rmtp;
 		static struct itimerspec zero_it;
 		struct itimerspec it = { .it_value = *rqtp,
 					 .it_interval = {} };
@@ -1479,7 +1478,6 @@ int posix_cpu_nsleep(clockid_t which_clo
 		/*
 		 * Report back to the user the time still remaining.
 		 */
-		rmtp = (struct timespec __user *) restart_block->arg1;
 		if (rmtp != NULL && !(flags & TIMER_ABSTIME) &&
 		    copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
 			return -EFAULT;
@@ -1487,6 +1485,7 @@ int posix_cpu_nsleep(clockid_t which_clo
 		restart_block->fn = posix_cpu_clock_nanosleep_restart;
 		/* Caller already set restart_block->arg1 */
 		restart_block->arg0 = which_clock;
+		restart_block->arg1 = (unsigned long) rmtp;
 		restart_block->arg2 = rqtp->tv_sec;
 		restart_block->arg3 = rqtp->tv_nsec;
 
@@ -1500,10 +1499,15 @@ static long
 posix_cpu_clock_nanosleep_restart(struct restart_block *restart_block)
 {
 	clockid_t which_clock = restart_block->arg0;
-	struct timespec t = { .tv_sec = restart_block->arg2,
-			      .tv_nsec = restart_block->arg3 };
+	struct timespec __user *rmtp;
+	struct timespec t;
+
+	rmtp = (struct timespec __user *) restart_block->arg1;
+	t.tv_sec = restart_block->arg2;
+	t.tv_nsec = restart_block->arg3;
+
 	restart_block->fn = do_no_restart_syscall;
-	return posix_cpu_nsleep(which_clock, TIMER_ABSTIME, &t);
+	return posix_cpu_nsleep(which_clock, TIMER_ABSTIME, &t, rmtp);
 }
 
 
@@ -1524,9 +1528,10 @@ static int process_cpu_timer_create(stru
 	return posix_cpu_timer_create(timer);
 }
 static int process_cpu_nsleep(clockid_t which_clock, int flags,
-			      struct timespec *rqtp)
+			      struct timespec *rqtp,
+			      struct timespec __user *rmtp)
 {
-	return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp);
+	return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp, rmtp);
 }
 static int thread_cpu_clock_getres(clockid_t which_clock, struct timespec *tp)
 {
@@ -1542,7 +1547,7 @@ static int thread_cpu_timer_create(struc
 	return posix_cpu_timer_create(timer);
 }
 static int thread_cpu_nsleep(clockid_t which_clock, int flags,
-			      struct timespec *rqtp)
+			      struct timespec *rqtp, struct timespec __user *rmtp)
 {
 	return -EINVAL;
 }
Index: linux-2.6.14/kernel/time.c
===================================================================
--- linux-2.6.14.orig/kernel/time.c
+++ linux-2.6.14/kernel/time.c
@@ -38,6 +38,7 @@
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
+#include <linux/timeofday.h>
 
 /* 
  * The timezone where the local system is located.  Used as a default by some
@@ -128,6 +129,7 @@ asmlinkage long sys_gettimeofday(struct 
  * as real UNIX machines always do it. This avoids all headaches about
  * daylight saving times and warping kernel clocks.
  */
+#ifndef CONFIG_GENERIC_TIME
 static inline void warp_clock(void)
 {
 	write_seqlock_irq(&xtime_lock);
@@ -137,6 +139,18 @@ static inline void warp_clock(void)
 	write_sequnlock_irq(&xtime_lock);
 	clock_was_set();
 }
+#else /* !CONFIG_GENERIC_TIME */
+/* XXX - this is somewhat cracked out and should
+         be checked  -johnstul@us.ibm.com
+*/
+static inline void warp_clock(void)
+{
+	struct timespec ts;
+	getnstimeofday(&ts);
+	ts.tv_sec += sys_tz.tz_minuteswest * 60;
+	do_settimeofday(&ts);
+}
+#endif /* !CONFIG_GENERIC_TIME */
 
 /*
  * In case for some reason the CMOS clock has not already been running
@@ -154,6 +168,9 @@ int do_sys_settimeofday(struct timespec 
 	static int firsttime = 1;
 	int error = 0;
 
+	if (!timespec_valid(tv))
+		return -EINVAL;
+
 	error = security_settime(tv, tz);
 	if (error)
 		return error;
@@ -231,7 +248,9 @@ int do_adjtimex(struct timex *txc)
 {
         long ltemp, mtemp, save_adjust;
 	int result;
-
+	unsigned long flags;
+	struct timespec now_ts;
+	unsigned long seq;
 	/* In order to modify anything, you gotta be super-user! */
 	if (txc->modes && !capable(CAP_SYS_TIME))
 		return -EPERM;
@@ -254,7 +273,13 @@ int do_adjtimex(struct timex *txc)
 		    txc->tick > 1100000/USER_HZ)
 			return -EINVAL;
 
-	write_seqlock_irq(&xtime_lock);
+	do { /* save off current xtime */
+		seq = read_seqbegin(&xtime_lock);
+		now_ts = xtime;
+	} while (read_seqretry(&xtime_lock, seq));
+
+	write_seqlock_irqsave(&ntp_lock, flags);
+
 	result = time_state;	/* mostly `TIME_OK' */
 
 	/* Save for later - semantics of adjtime is to return old value */
@@ -331,37 +356,27 @@ int do_adjtimex(struct timex *txc)
 		     */
 
 		    if (time_status & STA_FREQHOLD || time_reftime == 0)
-		        time_reftime = xtime.tv_sec;
-		    mtemp = xtime.tv_sec - time_reftime;
-		    time_reftime = xtime.tv_sec;
+		        time_reftime = now_ts.tv_sec;
+		    mtemp = now_ts.tv_sec - time_reftime;
+		    time_reftime = now_ts.tv_sec;
 		    if (time_status & STA_FLL) {
 		        if (mtemp >= MINSEC) {
 			    ltemp = (time_offset / mtemp) << (SHIFT_USEC -
 							      SHIFT_UPDATE);
-			    if (ltemp < 0)
-			        time_freq -= -ltemp >> SHIFT_KH;
-			    else
-			        time_freq += ltemp >> SHIFT_KH;
+			    time_freq += shift_right(ltemp, SHIFT_KH);
 			} else /* calibration interval too short (p. 12) */
 				result = TIME_ERROR;
 		    } else {	/* PLL mode */
 		        if (mtemp < MAXSEC) {
 			    ltemp *= mtemp;
-			    if (ltemp < 0)
-			        time_freq -= -ltemp >> (time_constant +
-							time_constant +
-							SHIFT_KF - SHIFT_USEC);
-			    else
-			        time_freq += ltemp >> (time_constant +
+			    time_freq += shift_right(ltemp,(time_constant +
 						       time_constant +
-						       SHIFT_KF - SHIFT_USEC);
+						       SHIFT_KF - SHIFT_USEC));
 			} else /* calibration interval too long (p. 12) */
 				result = TIME_ERROR;
 		    }
-		    if (time_freq > time_tolerance)
-		        time_freq = time_tolerance;
-		    else if (time_freq < -time_tolerance)
-		        time_freq = -time_tolerance;
+		    time_freq = min(time_freq, time_tolerance);
+		    time_freq = max(time_freq, -time_tolerance);
 		} /* STA_PLL || STA_PPSTIME */
 	    } /* txc->modes & ADJ_OFFSET */
 	    if (txc->modes & ADJ_TICK) {
@@ -384,10 +399,7 @@ leave:	if ((time_status & (STA_UNSYNC|ST
 	if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
 	    txc->offset	   = save_adjust;
 	else {
-	    if (time_offset < 0)
-		txc->offset = -(-time_offset >> SHIFT_UPDATE);
-	    else
-		txc->offset = time_offset >> SHIFT_UPDATE;
+	    txc->offset = shift_right(time_offset, SHIFT_UPDATE);
 	}
 	txc->freq	   = time_freq + pps_freq;
 	txc->maxerror	   = time_maxerror;
@@ -405,7 +417,7 @@ leave:	if ((time_status & (STA_UNSYNC|ST
 	txc->calcnt	   = pps_calcnt;
 	txc->errcnt	   = pps_errcnt;
 	txc->stbcnt	   = pps_stbcnt;
-	write_sequnlock_irq(&xtime_lock);
+	write_sequnlock_irqrestore(&ntp_lock, flags);
 	do_gettimeofday(&txc->time);
 	notify_arch_cmos_timer();
 	return(result);
@@ -486,6 +498,7 @@ struct timespec timespec_trunc(struct ti
 }
 EXPORT_SYMBOL(timespec_trunc);
 
+#ifndef CONFIG_GENERIC_TIME
 #ifdef CONFIG_TIME_INTERPOLATION
 void getnstimeofday (struct timespec *tv)
 {
@@ -522,10 +535,7 @@ int do_settimeofday (struct timespec *tv
 		set_normalized_timespec(&xtime, sec, nsec);
 		set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
 
-		time_adjust = 0;		/* stop active adjtime() */
-		time_status |= STA_UNSYNC;
-		time_maxerror = NTP_PHASE_LIMIT;
-		time_esterror = NTP_PHASE_LIMIT;
+		ntp_clear();
 		time_interpolator_reset();
 	}
 	write_sequnlock_irq(&xtime_lock);
@@ -573,6 +583,131 @@ void getnstimeofday(struct timespec *tv)
 EXPORT_SYMBOL_GPL(getnstimeofday);
 #endif
 
+/**
+ * get_monotonic_clock_ns - Returns monotonically increasing nanoseconds
+ *
+ * Returns the monotonically increasing number of nanoseconds
+ * since the system booted.
+ */
+nsec_t get_monotonic_clock_ns(void)
+{
+	struct timespec ts, mo;
+	unsigned int seq;
+
+	do {
+		seq = read_seqbegin(&xtime_lock);
+		getnstimeofday(&ts);
+		mo = wall_to_monotonic;
+	} while(read_seqretry(&xtime_lock, seq));
+
+	ts.tv_sec += mo.tv_sec;
+	ts.tv_nsec += mo.tv_nsec;
+
+	return ((u64)ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec;
+}
+EXPORT_SYMBOL_GPL(get_monotonic_clock_ns);
+
+
+
+#endif /* !CONFIG_GENERIC_TIME */
+
+/* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
+ * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
+ * => year=1980, mon=12, day=31, hour=23, min=59, sec=59.
+ *
+ * [For the Julian calendar (which was used in Russia before 1917,
+ * Britain & colonies before 1752, anywhere else before 1582,
+ * and is still in use by some communities) leave out the
+ * -year/100+year/400 terms, and add 10.]
+ *
+ * This algorithm was first published by Gauss (I think).
+ *
+ * WARNING: this function will overflow on 2106-02-07 06:28:16 on
+ * machines were long is 32-bit! (However, as time_t is signed, we
+ * will already get problems at other places on 2038-01-19 03:14:08)
+ */
+unsigned long
+mktime (unsigned int year, unsigned int mon,
+	unsigned int day, unsigned int hour,
+	unsigned int min, unsigned int sec)
+{
+	if (0 >= (int) (mon -= 2)) {	/* 1..12 -> 11,12,1..10 */
+		mon += 12;		/* Puts Feb last since it has leap day */
+		year -= 1;
+	}
+
+	return ((((unsigned long)
+		  (year/4 - year/100 + year/400 + 367*mon/12 + day) +
+		  year*365 - 719499
+	    )*24 + hour /* now have hours */
+	  )*60 + min /* now have minutes */
+	)*60 + sec; /* finally seconds */
+}
+
+/**
+ * set_normalized_timespec - set timespec sec and nsec parts and normalize
+ *
+ * @ts:		pointer to timespec variable to be set
+ * @sec:	seconds to set
+ * @nsec:	nanoseconds to set
+ *
+ * Set seconds and nanoseconds field of a timespec variable and
+ * normalize to the timespec storage format
+ *
+ * Note: The tv_nsec part is always in the range of
+ * 	0 <= tv_nsec < NSEC_PER_SEC
+ * For negative values only the tv_sec field is negative !
+ */
+void set_normalized_timespec (struct timespec *ts, time_t sec, long nsec)
+{
+	while (nsec > NSEC_PER_SEC) {
+		nsec -= NSEC_PER_SEC;
+		++sec;
+	}
+	while (nsec < 0) {
+		nsec += NSEC_PER_SEC;
+		--sec;
+	}
+	ts->tv_sec = sec;
+	ts->tv_nsec = nsec;
+}
+
+/**
+ * ns_to_timespec - Convert nanoseconds to timespec
+ *
+ * @ts:		pointer to timespec variable to store result
+ * @nsec:	nanoseconds value to be converted
+ *
+ * Stores the timespec representation of the nanoseconds value in
+ * the timespec variable pointed to by @ts
+ */
+void ns_to_timespec(struct timespec *ts, nsec_t nsec)
+{
+	if (nsec)
+		ts->tv_sec = div_long_long_rem_signed(nsec, NSEC_PER_SEC,
+							&ts->tv_nsec);
+	else
+		ts->tv_sec = ts->tv_nsec = 0;
+}
+
+/**
+ * ns_to_timeval - Convert nanoseconds to timeval
+ *
+ * @tv:		pointer to timeval variable to store result
+ * @nsec:	nanoseconds value to be converted
+ *
+ * Stores the timeval representation of the nanoseconds value in
+ * the timeval variable pointed to by @tv
+ */
+void ns_to_timeval(struct timeval *tv, nsec_t nsec)
+{
+	struct timespec ts;
+
+	ns_to_timespec(&ts, nsec);
+	tv->tv_sec = ts.tv_sec;
+	tv->tv_usec = (suseconds_t) ts.tv_nsec / 1000;
+}
+
 #if (BITS_PER_LONG < 64)
 u64 get_jiffies_64(void)
 {
Index: linux-2.6.14/drivers/scsi/ncr53c8xx.c
===================================================================
--- linux-2.6.14.orig/drivers/scsi/ncr53c8xx.c
+++ linux-2.6.14/drivers/scsi/ncr53c8xx.c
@@ -3481,8 +3481,8 @@ static int ncr_queue_command (struct ncb
 	**----------------------------------------------------
 	*/
 	if (np->settle_time && cmd->timeout_per_command >= HZ) {
-		u_long tlimit = ktime_get(cmd->timeout_per_command - HZ);
-		if (ktime_dif(np->settle_time, tlimit) > 0)
+		u_long tlimit = Ktime_get(cmd->timeout_per_command - HZ);
+		if (Ktime_dif(np->settle_time, tlimit) > 0)
 			np->settle_time = tlimit;
 	}
 
@@ -3516,7 +3516,7 @@ static int ncr_queue_command (struct ncb
 		**	Force ordered tag if necessary to avoid timeouts 
 		**	and to preserve interactivity.
 		*/
-		if (lp && ktime_exp(lp->tags_stime)) {
+		if (lp && Ktime_exp(lp->tags_stime)) {
 			if (lp->tags_smap) {
 				order = M_ORDERED_TAG;
 				if ((DEBUG_FLAGS & DEBUG_TAGS)||bootverbose>2){ 
@@ -3524,7 +3524,7 @@ static int ncr_queue_command (struct ncb
 						"ordered tag forced.\n");
 				}
 			}
-			lp->tags_stime = ktime_get(3*HZ);
+			lp->tags_stime = Ktime_get(3*HZ);
 			lp->tags_smap = lp->tags_umap;
 		}
 
@@ -3792,7 +3792,7 @@ static int ncr_reset_scsi_bus(struct ncb
 	u32 term;
 	int retv = 0;
 
-	np->settle_time	= ktime_get(settle_delay * HZ);
+	np->settle_time	= Ktime_get(settle_delay * HZ);
 
 	if (bootverbose > 1)
 		printk("%s: resetting, "
@@ -5044,7 +5044,7 @@ static void ncr_setup_tags (struct ncb *
 
 static void ncr_timeout (struct ncb *np)
 {
-	u_long	thistime = ktime_get(0);
+	u_long	thistime = Ktime_get(0);
 
 	/*
 	**	If release process in progress, let's go
@@ -5057,7 +5057,7 @@ static void ncr_timeout (struct ncb *np)
 		return;
 	}
 
-	np->timer.expires = ktime_get(SCSI_NCR_TIMER_INTERVAL);
+	np->timer.expires = Ktime_get(SCSI_NCR_TIMER_INTERVAL);
 	add_timer(&np->timer);
 
 	/*
@@ -5336,8 +5336,8 @@ void ncr_exception (struct ncb *np)
 	**=========================================================
 	*/
 
-	if (ktime_exp(np->regtime)) {
-		np->regtime = ktime_get(10*HZ);
+	if (Ktime_exp(np->regtime)) {
+		np->regtime = Ktime_get(10*HZ);
 		for (i = 0; i<sizeof(np->regdump); i++)
 			((char*)&np->regdump)[i] = INB_OFF(i);
 		np->regdump.nc_dstat = dstat;
@@ -5453,7 +5453,7 @@ static int ncr_int_sbmc (struct ncb *np)
 		**	Suspend command processing for 1 second and 
 		**	reinitialize all except the chip.
 		*/
-		np->settle_time	= ktime_get(1*HZ);
+		np->settle_time	= Ktime_get(1*HZ);
 		ncr_init (np, 0, bootverbose ? "scsi mode change" : NULL, HS_RESET);
 		return 1;
 	}
@@ -6923,7 +6923,7 @@ static struct lcb *ncr_setup_lcb (struct
 		for (i = 0 ; i < MAX_TAGS ; i++)
 			lp->cb_tags[i] = i;
 		lp->maxnxs = MAX_TAGS;
-		lp->tags_stime = ktime_get(3*HZ);
+		lp->tags_stime = Ktime_get(3*HZ);
 		ncr_setup_tags (np, sdev);
 	}
 
Index: linux-2.6.14/drivers/scsi/sym53c8xx_defs.h
===================================================================
--- linux-2.6.14.orig/drivers/scsi/sym53c8xx_defs.h
+++ linux-2.6.14/drivers/scsi/sym53c8xx_defs.h
@@ -285,12 +285,12 @@
 **	kernel time values (jiffies) to some that have 
 **	chance not to be too much incorrect. :-)
 */
-#define ktime_get(o)		(jiffies + (u_long) o)
-#define ktime_exp(b)		((long)(jiffies) - (long)(b) >= 0)
-#define ktime_dif(a, b)		((long)(a) - (long)(b))
+#define Ktime_get(o)		(jiffies + (u_long) o)
+#define Ktime_exp(b)		((long)(jiffies) - (long)(b) >= 0)
+#define Ktime_dif(a, b)		((long)(a) - (long)(b))
 /* These ones are not used in this driver */
-#define ktime_add(a, o)		((a) + (u_long)(o))
-#define ktime_sub(a, o)		((a) - (u_long)(o))
+#define Ktime_add(a, o)		((a) + (u_long)(o))
+#define Ktime_sub(a, o)		((a) - (u_long)(o))
 
 
 /*
Index: linux-2.6.14/include/linux/timex.h
===================================================================
--- linux-2.6.14.orig/include/linux/timex.h
+++ linux-2.6.14/include/linux/timex.h
@@ -260,6 +260,7 @@ extern long pps_calcnt;		/* calibration 
 extern long pps_errcnt;		/* calibration errors */
 extern long pps_stbcnt;		/* stability limit exceeded */
 
+extern seqlock_t ntp_lock;
 /**
  * ntp_clear - Clears the NTP state variables
  *
@@ -267,10 +268,14 @@ extern long pps_stbcnt;		/* stability li
  */
 static inline void ntp_clear(void)
 {
+	unsigned long flags;
+	write_seqlock_irqsave(&ntp_lock, flags);
 	time_adjust = 0;		/* stop active adjtime() */
 	time_status |= STA_UNSYNC;
 	time_maxerror = NTP_PHASE_LIMIT;
 	time_esterror = NTP_PHASE_LIMIT;
+	write_sequnlock_irqrestore(&ntp_lock, flags);
+
 }
 
 /**
@@ -282,6 +287,33 @@ static inline int ntp_synced(void)
 	return !(time_status & STA_UNSYNC);
 }
 
+/**
+ * ntp_get_ppm_adjustment - Returns Shifted PPM adjustment
+ *
+ */
+long ntp_get_ppm_adjustment(void);
+
+/**
+ * ntp_advance - Advances the NTP state machine by interval_ns
+ *
+ */
+void ntp_advance(unsigned long interval_ns);
+
+/**
+ * ntp_leapsecond - NTP leapsecond processing code.
+ *
+ */
+int ntp_leapsecond(struct timespec now);
+
+
+/* Required to safely shift negative values */
+#define shift_right(x, s) ({	\
+	__typeof__(x) __x = (x);	\
+	__typeof__(s) __s = (s);	\
+	__x < 0 ? -(-__x >> __s) : __x >> __s;	\
+})
+
+#ifndef CONFIG_GENERIC_TIME
 
 #ifdef CONFIG_TIME_INTERPOLATION
 
@@ -337,6 +369,7 @@ time_interpolator_reset(void)
 }
 
 #endif /* !CONFIG_TIME_INTERPOLATION */
+#endif /* !CONFIG_GENERIC_TIME */
 
 #endif /* KERNEL */
 
Index: linux-2.6.14/Documentation/kernel-parameters.txt
===================================================================
--- linux-2.6.14.orig/Documentation/kernel-parameters.txt
+++ linux-2.6.14/Documentation/kernel-parameters.txt
@@ -52,6 +52,7 @@ restrictions referred to are that the re
 	MTD	MTD support is enabled.
 	NET	Appropriate network support is enabled.
 	NUMA	NUMA support is enabled.
+	GENERIC_TIME The generic timeofday code is enabled.
 	NFS	Appropriate NFS support is enabled.
 	OSS	OSS sound support is enabled.
 	PARIDE	The ParIDE subsystem is enabled.
@@ -330,6 +331,7 @@ running once the system is up.
 				/selinux/checkreqprot.
 
  	clock=		[BUGS=IA-32,HW] gettimeofday timesource override.
+			[Deprecated]
 			Forces specified timesource (if avaliable) to be used
 			when calculating gettimeofday(). If specicified
 			timesource is not avalible, it defaults to PIT.
@@ -1479,6 +1481,10 @@ running once the system is up.
 
 	time		Show timing data prefixed to each printk message line
 
+	clocksource=	[GENERIC_TIME] Override the default clocksource
+			Override the default clocksource and use the clocksource
+			with the name specified.
+
 	tipar.timeout=	[HW,PPT]
 			Set communications timeout in tenths of a second
 			(default 15).
Index: linux-2.6.14/include/linux/clocksource.h
===================================================================
--- /dev/null
+++ linux-2.6.14/include/linux/clocksource.h
@@ -0,0 +1,326 @@
+/*  linux/include/linux/clocksource.h
+ *
+ *  This file contains the structure definitions for clocksources.
+ *
+ *  If you are not a clocksource, or the time of day code, you should
+ *  not be including this file!
+ */
+#ifndef _LINUX_CLOCKSOURCE_H
+#define _LINUX_CLOCKSOURCE_H
+
+#include <linux/types.h>
+#include <linux/time.h>
+#include <linux/timex.h>
+#include <linux/list.h>
+#include <asm/io.h>
+#include <asm/div64.h>
+
+/**
+ * struct clocksource - hardware abstraction for a free running counter
+ *	Provides mostly state-free accessors to the underlying hardware.
+ *
+ * @name:		ptr to clocksource name
+ * @list:		list head for registration
+ * @rating:		rating value for selection (higher is better)
+ *			To avoid rating inflation the following
+ *			list should give you a guide as to how
+ *			to assign your clocksource a rating
+ *			1-99: Unfit for real use
+ *				Only available for bootup and testing purposes.
+ *			100-199: Base level usability.
+ *				Functional for real use, but not desired.
+ *			200-299: Good.
+ *				A correct and usable clocksource.
+ *			300-399: Desired.
+ *				A reasonably fast and accurate clocksource.
+ *			400-499: Perfect
+ *				The ideal clocksource. A must-use where
+ *				available.
+ * @type:		defines clocksource type
+ * @read_fnct:		returns a cycle value
+ * @ptr:		ptr to MMIO'ed counter
+ * @mask:		bitmask for two's complement
+ *			subtraction of non 64 bit counters
+ * @mult:		cycle to nanosecond multiplier
+ * @shift:		cycle to nanosecond divisor (power of two)
+ * @update_callback:	called when safe to alter clocksource values
+ * @is_continuous:	defines if clocksource is free-running.
+ */
+struct clocksource {
+	char* name;
+	struct list_head list;
+	int rating;
+	enum {
+		CLOCKSOURCE_FUNCTION,
+		CLOCKSOURCE_CYCLES,
+		CLOCKSOURCE_MMIO_32,
+		CLOCKSOURCE_MMIO_64
+	} type;
+	cycle_t (*read_fnct)(void);
+	void __iomem *mmio_ptr;
+	cycle_t mask;
+	u32 mult;
+	u32 shift;
+	int (*update_callback)(void);
+	int is_continuous;
+};
+
+
+/**
+ * clocksource_khz2mult - calculates mult from khz and shift
+ * @khz:		Clocksource frequency in KHz
+ * @shift_constant:	Clocksource shift factor
+ *
+ * Helper functions that converts a khz counter frequency to a timsource
+ * multiplier, given the clocksource shift value
+ */
+static inline u32 clocksource_khz2mult(u32 khz, u32 shift_constant)
+{
+	/*  khz = cyc/(Million ns)
+	 *  mult/2^shift  = ns/cyc
+	 *  mult = ns/cyc * 2^shift
+	 *  mult = 1Million/khz * 2^shift
+	 *  mult = 1000000 * 2^shift / khz
+	 *  mult = (1000000<<shift) / khz
+	 */
+	u64 tmp = ((u64)1000000) << shift_constant;
+	tmp += khz/2; /* round for do_div */
+	do_div(tmp, khz);
+	return (u32)tmp;
+}
+
+/**
+ * clocksource_hz2mult - calculates mult from hz and shift
+ * @hz:			Clocksource frequency in Hz
+ * @shift_constant:	Clocksource shift factor
+ *
+ * Helper functions that converts a hz counter
+ * frequency to a timsource multiplier, given the
+ * clocksource shift value
+ */
+static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant)
+{
+	/*  hz = cyc/(Billion ns)
+	 *  mult/2^shift  = ns/cyc
+	 *  mult = ns/cyc * 2^shift
+	 *  mult = 1Billion/hz * 2^shift
+	 *  mult = 1000000000 * 2^shift / hz
+	 *  mult = (1000000000<<shift) / hz
+	 */
+	u64 tmp = ((u64)1000000000) << shift_constant;
+	tmp += hz/2; /* round for do_div */
+	do_div(tmp, hz);
+	return (u32)tmp;
+}
+
+
+#ifndef readq
+/* Provide an a way to atomically read a u64 on a 32bit arch */
+static inline unsigned long long clocksource_readq(void __iomem *addr)
+{
+	u32 low, high;
+	/* loop is required to make sure we get an atomic read */
+	do {
+		high = readl(addr+4);
+		low = readl(addr);
+	} while (high != readl(addr+4));
+
+	return low | (((unsigned long long)high) << 32LL);
+}
+#else
+#define clocksource_readq(x) readq(x)
+#endif
+
+
+/**
+ * read_clocksource: - Access the clocksource's current cycle value
+ * @cs:		pointer to clocksource being read
+ *
+ * Uses the clocksource to return the current cycle_t value
+ */
+static inline cycle_t read_clocksource(struct clocksource *cs)
+{
+	switch (cs->type) {
+	case CLOCKSOURCE_MMIO_32:
+		return (cycle_t)readl(cs->mmio_ptr);
+	case CLOCKSOURCE_MMIO_64:
+		return (cycle_t)clocksource_readq(cs->mmio_ptr);
+	case CLOCKSOURCE_CYCLES:
+		return (cycle_t)get_cycles();
+	default:/* case: CLOCKSOURCE_FUNCTION */
+		return cs->read_fnct();
+	}
+}
+
+/**
+ * ppm_to_mult_adj - Converts shifted ppm values to mult adjustment
+ * @cs:		Pointer to clocksource
+ * @ppm:	Shifted PPM value
+ *
+ * Helper which converts a shifted ppm value to clocksource mult_adj value.
+ *
+ * XXX - this could use some optimization
+ */
+static inline int ppm_to_mult_adj(struct clocksource *cs, int ppm)
+{
+	u64 mult_adj;
+	int ret_adj;
+
+	/* The basic math is as follows:
+	 *     cyc * mult/2^shift * (1 + ppm/MILL) = scaled ns
+	 * We want to precalculate the ppm factor so it can be added
+	 * to the multiplyer saving the extra multiplication step.
+	 *     cyc * (mult/2^shift + (mult/2^shift) * (ppm/MILL)) =
+	 *     cyc * (mult/2^shift + (mult*ppm/MILL)/2^shift) =
+	 *     cyc * (mult + (mult*ppm/MILL))/2^shift =
+	 * Thus we want to calculate the value of:
+	 *     mult*ppm/MILL
+	 */
+ 	mult_adj = abs(ppm);
+	mult_adj = (mult_adj * cs->mult)>>SHIFT_USEC;
+	mult_adj += 1000000/2; /* round for div*/
+	do_div(mult_adj, 1000000);
+	if (ppm < 0)
+		ret_adj = -(int)mult_adj;
+	else
+		ret_adj = (int)mult_adj;
+	return ret_adj;
+}
+
+/**
+ * cyc2ns - converts clocksource cycles to nanoseconds
+ * @cs:		Pointer to clocksource
+ * @ntp_adj:	Multiplier adjustment value
+ * @cycles:	Cycles
+ *
+ * Uses the clocksource and ntp ajdustment to convert cycle_ts to nanoseconds.
+ *
+ * XXX - This could use some mult_lxl_ll() asm optimization
+ */
+static inline nsec_t cyc2ns(struct clocksource *cs, int ntp_adj, cycle_t cycles)
+{
+	u64 ret;
+	ret = (u64)cycles;
+	ret *= (cs->mult + ntp_adj);
+	ret >>= cs->shift;
+	return (nsec_t)ret;
+}
+
+/**
+ * cyc2ns_rem - converts clocksource cycles to nanoseconds w/ remainder
+ * @cs:		Pointer to clocksource
+ * @ntp_adj:	Multiplier adjustment value
+ * @cycles:	Cycles
+ * @rem:	Remainder
+ *
+ * Uses the clocksource and ntp ajdustment interval to convert cycle_t to
+ * nanoseconds. Add in remainder portion which is stored in (ns<<cs->shift)
+ * units and save the new remainder off.
+ *
+ * XXX - This could use some mult_lxl_ll() asm optimization.
+ */
+static inline nsec_t cyc2ns_rem(struct clocksource *cs, int ntp_adj, cycle_t cycles, u64* rem)
+{
+	u64 ret;
+	ret = (u64)cycles;
+	ret *= (cs->mult + ntp_adj);
+	if (rem) {
+		ret += *rem;
+		*rem = ret & ((1<<cs->shift)-1);
+	}
+	ret >>= cs->shift;
+	return (nsec_t)ret;
+}
+
+
+/**
+ * struct clocksource_interval - Fixed interval conversion structure
+ *
+ * @cycles:	A specified number of cycles
+ * @nsecs:	The number of nanoseconds equivalent to the cycles value
+ * @remainder:	Non-integer nanosecond remainder stored in (ns<<cs->shift) units
+ * @remainder_ns_overflow:	Value at which the remainder is equal to
+ *				one second
+ *
+ * This is a optimization structure used by cyc2ns_fixed_rem() to avoid the
+ * multiply in cyc2ns().
+ *
+ * Unless you're the timeofday_periodic_hook, you should not be using this!
+ */
+struct clocksource_interval {
+	cycle_t cycles;
+	nsec_t nsecs;
+	u64 remainder;
+	u64 remainder_ns_overflow;
+};
+
+/**
+ * calculate_clocksource_interval - Calculates a clocksource interval struct
+ *
+ * @c:		Pointer to clocksource.
+ * @adj:	Multiplyer adjustment.
+ * @length_nsec: Desired interval length in nanoseconds.
+ *
+ * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment
+ * pair and interval request.
+ *
+ * Unless you're the timeofday_periodic_hook, you should not be using this!
+ */
+static inline struct clocksource_interval
+calculate_clocksource_interval(struct clocksource *c, long adj,
+				unsigned long length_nsec)
+{
+	struct clocksource_interval ret;
+	u64 tmp;
+
+	/* XXX - All of this could use a whole lot of optimization */
+	tmp = length_nsec;
+	tmp <<= c->shift;
+	do_div(tmp, c->mult+adj);
+
+	ret.cycles = (cycle_t)tmp;
+	if(ret.cycles == 0)
+		ret.cycles = 1;
+
+	ret.remainder = 0;
+	ret.remainder_ns_overflow = 1 << c->shift;
+	ret.nsecs = cyc2ns_rem(c, adj, ret.cycles, &ret.remainder);
+
+	return ret;
+}
+
+/**
+ * cyc2ns_fixed_rem -
+ *	converts clocksource cycles to nanoseconds using fixed intervals
+ *
+ * @interval:	precalculated clocksource_interval structure
+ * @cycles:	Number of clocksource cycles
+ * @rem:	Remainder
+ *
+ * Uses a precalculated fixed cycle/nsec interval to convert cycles to
+ * nanoseconds. Returns the unaccumulated cycles in the cycles pointer as
+ * well as uses and updates the value at the remainder pointer
+ *
+ * Unless you're the timeofday_periodic_hook, you should not be using this!
+ */
+static inline nsec_t cyc2ns_fixed_rem(struct clocksource_interval interval, cycle_t *cycles, u64* rem)
+{
+	nsec_t delta_nsec = 0;
+	while(*cycles > interval.cycles) {
+		delta_nsec += interval.nsecs;
+		*cycles -= interval.cycles;
+		*rem += interval.remainder;
+		while(*rem > interval.remainder_ns_overflow) {
+			*rem -= interval.remainder_ns_overflow;
+			delta_nsec += 1;
+		}
+	}
+	return delta_nsec;
+}
+
+
+/* used to install a new clocksource */
+void register_clocksource(struct clocksource*);
+void reselect_clocksource(void);
+struct clocksource* get_next_clocksource(void);
+#endif
Index: linux-2.6.14/kernel/time/Makefile
===================================================================
--- /dev/null
+++ linux-2.6.14/kernel/time/Makefile
@@ -0,0 +1 @@
+obj-y	= clocksource.o jiffies.o clockevents.o timeofday.o
Index: linux-2.6.14/kernel/time/clocksource.c
===================================================================
--- /dev/null
+++ linux-2.6.14/kernel/time/clocksource.c
@@ -0,0 +1,286 @@
+/*********************************************************************
+* linux/kernel/time/clocksource.c
+*
+* This file contains the functions which manage clocksource drivers.
+*
+* Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*
+* TODO WishList:
+*   o Allow clocksource drivers to be unregistered
+*   o get rid of clocksource_jiffies extern
+**********************************************************************/
+
+#include <linux/clocksource.h>
+#include <linux/sysdev.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+/* XXX - Would like a better way for initializing curr_clocksource */
+extern struct clocksource clocksource_jiffies;
+
+/*[Clocksource internal variables]---------
+ * curr_clocksource:
+ *	currently selected clocksource. Initialized to clocksource_jiffies.
+ * next_clocksource:
+ *	pending next selected clocksource.
+ * clocksource_list:
+ *	linked list with the registered clocksources
+ * clocksource_lock:
+ *	protects manipulations to curr_clocksource and next_clocksource
+ *	and the clocksource_list
+ * override_name:
+ *	Name of the user-specified clocksource.
+ */
+static struct clocksource *curr_clocksource = &clocksource_jiffies;
+static struct clocksource *next_clocksource;
+static LIST_HEAD(clocksource_list);
+static seqlock_t clocksource_lock = SEQLOCK_UNLOCKED;
+
+static char override_name[32];
+
+
+/**
+ * get_next_clocksource - Returns the selected clocksource
+ *
+ */
+struct clocksource *get_next_clocksource(void)
+{
+	write_seqlock(&clocksource_lock);
+	if (next_clocksource) {
+		curr_clocksource = next_clocksource;
+		next_clocksource = NULL;
+	}
+	write_sequnlock(&clocksource_lock);
+
+	return curr_clocksource;
+}
+
+
+/**
+ * select_clocksource - Finds the best registered clocksource.
+ *
+ * Private function. Must have a writelock on clocksource_lock
+ * when called.
+ */
+static struct clocksource *select_clocksource(void)
+{
+	struct clocksource *best = NULL;
+	struct list_head *tmp;
+
+	list_for_each(tmp, &clocksource_list) {
+		struct clocksource *src;
+
+		src = list_entry(tmp, struct clocksource, list);
+		if (!best)
+			best = src;
+
+		/* Check for override */
+		if (strlen(src->name) == strlen(override_name) &&
+		    !strcmp(src->name, override_name)) {
+			best = src;
+			break;
+		}
+		/* Pick the highest rating */
+		if (src->rating > best->rating)
+		 	best = src;
+	}
+	return best;
+}
+
+
+/**
+ * is_registered_source - Checks if clocksource is registered
+ * @c: pointer to a clocksource
+ *
+ * Private helper function, should not be used externally.
+ *
+ * Returns one if the clocksource is already registered, zero otherwise.
+ */
+static inline int is_registered_source(struct clocksource *c)
+{
+	struct list_head *tmp;
+	int len = strlen(c->name);
+
+	list_for_each(tmp, &clocksource_list) {
+		struct clocksource *src;
+
+		src = list_entry(tmp, struct clocksource, list);
+		if (strlen(src->name) == len &&	!strcmp(src->name, c->name))
+			return 1;
+	}
+	return 0;
+}
+
+
+/**
+ * register_clocksource - Used to install new clocksources
+ * @t: clocksource to be registered
+ *
+ */
+void register_clocksource(struct clocksource *c)
+{
+	write_seqlock(&clocksource_lock);
+
+	/* check if clocksource is already registered */
+	if (is_registered_source(c)) {
+		printk("register_clocksource: Cannot register %s. Already registered!",
+		       c->name);
+	} else {
+		list_add(&c->list, &clocksource_list);
+		/* select next clocksource */
+		next_clocksource = select_clocksource();
+	}
+	write_sequnlock(&clocksource_lock);
+}
+EXPORT_SYMBOL(register_clocksource);
+
+
+/**
+ * reselect_clocksource - Rescan list for next clocksource
+ *
+ * A quick helper function to be used if a clocksource changes its
+ * rating. Forces the clocksource list to be re-scaned for the best
+ * clocksource.
+ */
+void reselect_clocksource(void)
+{
+	write_seqlock(&clocksource_lock);
+	next_clocksource = select_clocksource();
+	write_sequnlock(&clocksource_lock);
+}
+
+
+/**
+ * sysfs_show_clocksources - sysfs interface for listing clocksource
+ * @dev: unused
+ * @buf: char buffer to be filled with clocksource list
+ *
+ * Provides sysfs interface for listing registered clocksources
+ */
+static ssize_t sysfs_show_clocksources(struct sys_device *dev, char *buf)
+{
+	char* curr = buf;
+	struct list_head *tmp;
+
+	write_seqlock(&clocksource_lock);
+
+	list_for_each(tmp, &clocksource_list) {
+		struct clocksource *src;
+
+		src = list_entry(tmp, struct clocksource, list);
+		/* Mark current clocksource w/ a star */
+		if (src == curr_clocksource)
+			curr += sprintf(curr, "*");
+		curr += sprintf(curr, "%s ", src->name);
+	}
+	write_sequnlock(&clocksource_lock);
+
+	curr += sprintf(curr, "\n");
+	return curr - buf;
+}
+
+
+/**
+ * sysfs_override_clocksource - interface for manually overriding clocksource
+ * @dev: unused
+ * @buf: name of override clocksource
+ * @count: length of buffer
+ *
+ * Takes input from sysfs interface for manually overriding the default
+ * clocksource selction
+ */
+static ssize_t sysfs_override_clocksource(struct sys_device *dev,
+			const char *buf, size_t count)
+{
+	/* Strings from sysfs write are not 0 terminated ! */
+	if (count >= sizeof(override_name))
+		return -EINVAL;
+	/* Strip of \n */
+	if (buf[count-1] == '\n')
+		count--;
+	if (count < 1)
+		return -EINVAL;
+
+	write_seqlock(&clocksource_lock);
+
+	/* copy the name given */
+	memcpy(override_name, buf, count);
+	override_name[count] = 0;
+
+	/* try to select it */
+	next_clocksource = select_clocksource();
+
+	write_sequnlock(&clocksource_lock);
+	return count;
+}
+
+
+/* Sysfs setup bits:
+ */
+static SYSDEV_ATTR(clocksource, 0600, sysfs_show_clocksources, sysfs_override_clocksource);
+
+static struct sysdev_class clocksource_sysclass = {
+	set_kset_name("clocksource"),
+};
+
+static struct sys_device device_clocksource = {
+	.id	= 0,
+	.cls	= &clocksource_sysclass,
+};
+
+static int init_clocksource_sysfs(void)
+{
+	int error = sysdev_class_register(&clocksource_sysclass);
+	if (!error) {
+		error = sysdev_register(&device_clocksource);
+		if (!error)
+			error = sysdev_create_file(&device_clocksource, &attr_clocksource);
+	}
+	return error;
+}
+device_initcall(init_clocksource_sysfs);
+
+
+/**
+ * boot_override_clocksource - boot clock override
+ * @str: override name
+ *
+ * Takes a clocksource= boot argument and uses it
+ * as the clocksource override name
+ */
+static int __init boot_override_clocksource(char* str)
+{
+	if (str)
+		strlcpy(override_name, str, sizeof(override_name));
+	return 1;
+}
+__setup("clocksource=", boot_override_clocksource);
+
+
+/**
+ * boot_override_clock - Compatibility layer for deprecated boot option
+ * @str: override name
+ *
+ * DEPRECATED! Takes a clock= boot argument and uses it
+ * as the clocksource override name
+ */
+static int __init boot_override_clock(char* str)
+{
+	printk("Warning! clock= boot option is deprecated.\n");
+	return boot_override_clocksource(str);
+}
+__setup("clock=", boot_override_clock);
Index: linux-2.6.14/kernel/time/jiffies.c
===================================================================
--- /dev/null
+++ linux-2.6.14/kernel/time/jiffies.c
@@ -0,0 +1,75 @@
+/***********************************************************************
+* linux/kernel/time/jiffies.c
+*
+* This file contains the jiffies based clocksource.
+*
+* Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*
+************************************************************************/
+#include <linux/clocksource.h>
+#include <linux/jiffies.h>
+#include <linux/init.h>
+
+/* The Jiffies based clocksource is the lowest common
+ * denominator clock source which should function on
+ * all systems. It has the same coarse resolution as
+ * the timer interrupt frequency HZ and it suffers
+ * inaccuracies caused by missed or lost timer
+ * interrupts and the inability for the timer
+ * interrupt hardware to accuratly tick at the
+ * requested HZ value. It is also not reccomended
+ * for "tick-less" systems.
+ */
+#define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/ACTHZ))
+
+/* Since jiffies uses a simple NSEC_PER_JIFFY multiplier
+ * conversion, the .shift value could be zero. However
+ * this would make NTP adjustments impossible as they are
+ * in units of 1/2^.shift. Thus we use JIFFIES_SHIFT to
+ * shift both the nominator and denominator the same
+ * amount, and give ntp adjustments in units of 1/2^8
+ *
+ * The value 8 is somewhat carefully chosen, as anything
+ * larger can result in overflows. NSEC_PER_JIFFY grows as
+ * HZ shrinks, so values greater then 8 overflow 32bits when
+ * HZ=100.
+ */
+#define JIFFIES_SHIFT 8
+
+static cycle_t jiffies_read(void)
+{
+	cycle_t ret = get_jiffies_64();
+	return ret;
+}
+
+struct clocksource clocksource_jiffies = {
+	.name = "jiffies",
+	.rating = 0, /* lowest rating*/
+	.type = CLOCKSOURCE_FUNCTION,
+	.read_fnct = jiffies_read,
+	.mask = (cycle_t)-1,
+	.mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* See above for details */
+	.shift = JIFFIES_SHIFT,
+	.is_continuous = 0,  /* tick based, not free running */
+};
+
+static int __init init_jiffies_clocksource(void)
+{
+	register_clocksource(&clocksource_jiffies);
+	return 0;
+}
+module_init(init_jiffies_clocksource);
Index: linux-2.6.14/drivers/char/hangcheck-timer.c
===================================================================
--- linux-2.6.14.orig/drivers/char/hangcheck-timer.c
+++ linux-2.6.14/drivers/char/hangcheck-timer.c
@@ -49,6 +49,7 @@
 #include <linux/delay.h>
 #include <asm/uaccess.h>
 #include <linux/sysrq.h>
+#include <linux/timeofday.h>
 
 
 #define VERSION_STR "0.9.0"
@@ -130,8 +131,12 @@ __setup("hcheck_dump_tasks", hangcheck_p
 #endif
 
 #ifdef HAVE_MONOTONIC
+#ifndef CONFIG_GENERIC_TIME
 extern unsigned long long monotonic_clock(void);
 #else
+#define monotonic_clock() get_monotonic_clock_ns()
+#endif
+#else
 static inline unsigned long long monotonic_clock(void)
 {
 # ifdef __s390__
Index: linux-2.6.14/include/asm-generic/timeofday.h
===================================================================
--- /dev/null
+++ linux-2.6.14/include/asm-generic/timeofday.h
@@ -0,0 +1,29 @@
+/*  linux/include/asm-generic/timeofday.h
+ *
+ *  This file contains the asm-generic interface
+ *  to the arch specific calls used by the time of day subsystem
+ */
+#ifndef _ASM_GENERIC_TIMEOFDAY_H
+#define _ASM_GENERIC_TIMEOFDAY_H
+#include <linux/types.h>
+#include <linux/time.h>
+#include <linux/timex.h>
+#include <linux/timeofday.h>
+#include <linux/clocksource.h>
+
+#include <asm/div64.h>
+#ifdef CONFIG_GENERIC_TIME
+/* Required externs */
+extern nsec_t read_persistent_clock(void);
+extern void sync_persistent_clock(struct timespec ts);
+
+#ifdef CONFIG_GENERIC_TIME_VSYSCALL
+extern void arch_update_vsyscall_gtod(struct timespec wall_time,
+				cycle_t offset_base, struct clocksource* clock,
+				int ntp_adj);
+#else
+#define arch_update_vsyscall_gtod(x,y,z,w) {}
+#endif /* CONFIG_GENERIC_TIME_VSYSCALL */
+
+#endif /* CONFIG_GENERIC_TIME */
+#endif
Index: linux-2.6.14/include/linux/timeofday.h
===================================================================
--- /dev/null
+++ linux-2.6.14/include/linux/timeofday.h
@@ -0,0 +1,47 @@
+/*  linux/include/linux/timeofday.h
+ *
+ *  This file contains the interface to the time of day subsystem
+ */
+#ifndef _LINUX_TIMEOFDAY_H
+#define _LINUX_TIMEOFDAY_H
+#include <linux/calc64.h>
+#include <linux/types.h>
+#include <linux/ktime.h>
+#include <linux/time.h>
+#include <linux/timex.h>
+
+#ifdef CONFIG_GENERIC_TIME
+
+/* Kernel internal interfaces */
+extern ktime_t get_realtime_offset(void);
+extern ktime_t get_realtime_clock(void);
+extern ktime_t get_monotonic_clock(void);
+
+/* Timespec based interfaces for user space functionality */
+extern void get_realtime_clock_ts(struct timespec *ts);
+extern void get_monotonic_clock_ts(struct timespec *ts);
+
+/* legacy timeofday interfaces*/
+#define getnstimeofday(ts) get_realtime_clock_ts(ts)
+extern void do_gettimeofday(struct timeval *tv);
+extern int do_settimeofday(struct timespec *ts);
+
+/* get_monotonic_clock_ns wrapper */
+#define get_monotonic_clock_ns() ktime_to_ns(get_monotonic_clock())
+
+/* Internal functions */
+extern int timeofday_is_continuous(void);
+extern void timeofday_init(void);
+
+#ifndef CONFIG_IS_TICK_BASED
+#define arch_getoffset() (0)
+#else
+extern unsigned long arch_getoffset(void);
+#endif
+
+#else /* CONFIG_GENERIC_TIME */
+
+#define timeofday_init()
+
+#endif /* CONFIG_GENERIC_TIME */
+#endif /* _LINUX_TIMEOFDAY_H */
Index: linux-2.6.14/kernel/time/timeofday.c
===================================================================
--- /dev/null
+++ linux-2.6.14/kernel/time/timeofday.c
@@ -0,0 +1,681 @@
+/*
+ * linux/kernel/time/timeofday.c
+ *
+ * This file contains the functions which access and manage
+ * the system's time of day functionality.
+ *
+ * Copyright (C) 2003, 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * TODO WishList:
+ *   o See XXX's below.
+ */
+
+#include <linux/timeofday.h>
+#include <linux/clocksource.h>
+#include <linux/ktime.h>
+#include <linux/timex.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/sysdev.h>
+#include <linux/jiffies.h>
+#include <asm/timeofday.h>
+
+/* Periodic hook interval */
+#define PERIODIC_INTERVAL_MS 50
+
+/* [ktime_t based variables]
+ * system_time:
+ * 	Monotonically increasing counter of the number of nanoseconds
+ *	since boot.
+ * wall_time_offset:
+ *	Offset added to system_time to provide accurate time-of-day
+ */
+static ktime_t system_time;
+static ktime_t wall_time_offset;
+
+/* [timespec based variables]
+ * These variable are mirroring the ktime_t based variabled to avoid
+ * performance problems in the userspace interface paths
+ *
+ * wall_time_ts:
+ *	timespec holding the current wall time.
+ * mono_time_ts:
+ *  	timespec holding the current monotonic time.
+ * monotonic_time_offset_ts:
+ *  	timespec holding the difference between wall and monotonic time.
+ */
+static struct timespec wall_time_ts;
+static struct timespec mono_time_ts;
+static struct timespec monotonic_time_offset_ts;
+
+/* [Cycle based variables]
+ * cycle_last:
+ *	Value of the clocksource at the last timeofday_periodic_hook()
+ *	(adjusted only minorly to account for rounded off cycles)
+ */
+static cycle_t cycle_last;
+
+/* [Clocksource_interval variables]
+ * ts_interval: This clocksource_interval is used in the
+ * fixed interval cycles to nanosecond calculation.
+ *
+ * INTERVAL_LEN: This constant is the requested fixed
+ * interval period in nanoseconds.
+ */
+struct clocksource_interval ts_interval;
+#define INTERVAL_LEN ((PERIODIC_INTERVAL_MS-1)*1000000)
+
+/* [Time source data]
+ * clocks:
+ *  	current clocksource pointer
+ */
+static struct clocksource *clock;
+
+/* [NTP adjustment]
+ * ntp_adj:
+ *	value of the current ntp adjustment,
+ *	stored in clocksource multiplier units.
+ */
+int ntp_adj;
+
+/* [Locks]
+ * system_time_lock:
+ * 	generic lock for all locally scoped time values
+ */
+static seqlock_t system_time_lock = SEQLOCK_UNLOCKED;
+
+/* [Suspend/Resume info]
+ * time_suspend_state:
+ * 	variable that keeps track of suspend state
+ * suspend_start:
+ * 	start time of the suspend call
+ */
+static enum {
+	TIME_RUNNING,
+	TIME_SUSPENDED
+} time_suspend_state = TIME_RUNNING;
+
+static nsec_t suspend_start;
+
+/* [Soft-Timers]
+ * timeofday_timer:
+ *  	soft-timer used to call timeofday_periodic_hook()
+ */
+struct timer_list timeofday_timer;
+
+
+/**
+ * update_legacy_time_values - sync legacy time values
+ *
+ * The function is necessary for a smooth transition to
+ * the new time keeping. When all the xtime/wall_to_monotonic
+ * users are converted this function can be removed.
+ *
+ * system_time_lock must be held by the caller
+ */
+static void update_legacy_time_values(void)
+{
+	unsigned long flags;
+
+	write_seqlock_irqsave(&xtime_lock, flags);
+
+	xtime = wall_time_ts;
+	set_normalized_timespec(&wall_to_monotonic,
+		-monotonic_time_offset_ts.tv_sec,
+		-monotonic_time_offset_ts.tv_nsec);
+
+	write_sequnlock_irqrestore(&xtime_lock, flags);
+
+	/* since time state has changed, notify vsyscall code */
+	arch_update_vsyscall_gtod(wall_time_ts, cycle_last, clock, ntp_adj);
+}
+
+/**
+ * __get_nsec_offset - Returns nanoseconds since last call to periodic_hook
+ *
+ * private function, must hold system_time_lock lock when being
+ * called. Returns the number of nanoseconds since the
+ * last call to timeofday_periodic_hook() (adjusted by NTP scaling)
+ */
+static inline nsec_t __get_nsec_offset(void)
+{
+	cycle_t cycle_now, cycle_delta;
+	nsec_t ns_offset;
+
+	/* read clocksource */
+	cycle_now = read_clocksource(clock);
+
+	/* calculate the delta since the last timeofday_periodic_hook */
+	cycle_delta = (cycle_now - cycle_last) & clock->mask;
+
+	/* convert to nanoseconds */
+	ns_offset = cyc2ns(clock, ntp_adj, cycle_delta);
+
+	/* Special case for jiffies tick/offset based systems
+	 * add arch specific offset
+	 */
+	ns_offset += arch_getoffset();
+
+	return ns_offset;
+}
+
+/**
+ * __get_monotonic_clock - Returns monotonically increasing nanoseconds
+ *
+ * private function, must hold system_time_lock lock when being
+ * called. Returns the monotonically increasing number of
+ * nanoseconds since the system booted (adjusted by NTP scaling)
+ */
+static inline ktime_t __get_monotonic_clock(void)
+{
+	nsec_t offset = __get_nsec_offset();
+	return ktime_add_ns(system_time, offset);
+}
+
+/**
+ * get_monotonic_clock - Returns monotonically increasing nanoseconds
+ *
+ * Returns the monotonically increasing number of nanoseconds
+ * since the system booted via __monotonic_clock()
+ */
+ktime_t get_monotonic_clock(void)
+{
+	unsigned long seq;
+	ktime_t ret;
+
+	/* atomically read __get_monotonic_clock_kt() */
+	do {
+		seq = read_seqbegin(&system_time_lock);
+
+		ret = __get_monotonic_clock();
+
+	} while (read_seqretry(&system_time_lock, seq));
+
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(get_monotonic_clock);
+
+/**
+ * get_monotonic_clock_ts - Read monotonic time in timespec format
+ *
+ * @ts: pointer to the timespec to be set
+ *
+ * Read monotonic time in nanoseconds since the system booted and
+ * store the result in the timespec variable pointed to by @ts
+ */
+void get_monotonic_clock_ts(struct timespec *ts)
+{
+	unsigned long seq;
+	struct timespec mono_ts;
+	nsec_t offset;
+
+	do {
+		seq = read_seqbegin(&system_time_lock);
+
+		mono_ts = mono_time_ts;
+		offset = __get_nsec_offset();
+	} while (read_seqretry(&system_time_lock, seq));
+
+	*ts = timespec_add_ns(mono_ts, offset);
+}
+
+/**
+ * get_realtime_offset - Returns the offset of realtime clock
+ *
+ * Returns the number of nanoseconds in ktime_t storage format which
+ * represent the offset of the realtime clock to the monotonic clock
+ */
+ktime_t get_realtime_offset(void)
+{
+	unsigned long seq;
+	ktime_t ret;
+
+	/* atomically read wall_time_offset */
+	do {
+		seq = read_seqbegin(&system_time_lock);
+
+		ret = wall_time_offset;
+
+	} while (read_seqretry(&system_time_lock, seq));
+
+	return ret;
+}
+
+/**
+ * get_realtime_clock - Read realtime clock in ktime_t format
+ *
+ * Returns the wall time in ktime_t format. The resolution is
+ * nanoseconds.
+ */
+ktime_t get_realtime_clock(void)
+{
+	unsigned long seq;
+	ktime_t ret;
+
+	/* atomically read __get_monotonic_clock_kt() */
+	do {
+		seq = read_seqbegin(&system_time_lock);
+
+		ret = __get_monotonic_clock();
+		ret = ktime_add(ret, wall_time_offset);
+
+	} while (read_seqretry(&system_time_lock, seq));
+
+	return ret;
+}
+
+/**
+ * get_realtime_clock_ts - Read the time of day into a timespec variable
+ *
+ * @ts: 	pointer to timespec variable to store current time
+ *
+ * Read time of day and store the result in the timespec variable
+ * pointed to by @ts
+ */
+void get_realtime_clock_ts(struct timespec *ts)
+{
+	struct timespec now_ts;
+	unsigned long seq;
+	nsec_t nsecs;
+
+	do {
+		seq = read_seqbegin(&system_time_lock);
+
+		now_ts = wall_time_ts;
+		nsecs = __get_nsec_offset();
+
+	} while (read_seqretry(&system_time_lock, seq));
+
+	*ts = timespec_add_ns(now_ts, nsecs);
+}
+
+EXPORT_SYMBOL(get_realtime_clock_ts);
+
+/**
+ * do_gettimeofday - Read the time of day into a timeval variable
+ *
+ * @tv:		pointer to timeval variable to store current time
+ *
+ * Read time of day and store the result in the timeval variable
+ * pointed to by @tv
+ *
+ * NOTE: The users should be converted to use get_realtime_clock_ts()
+ */
+void do_gettimeofday(struct timeval *tv)
+{
+	struct timespec now_ts;
+
+	get_realtime_clock_ts(&now_ts);
+	tv->tv_sec = now_ts.tv_sec;
+	tv->tv_usec = now_ts.tv_nsec/1000;
+}
+
+EXPORT_SYMBOL(do_gettimeofday);
+
+/**
+ * do_settimeofday - Sets the time of day
+ *
+ * @tv:		pointer to the timespec variable containing the new time
+ *
+ * Set time of day and adjust the internal offsets, update NTP and
+ * the legacy time interfaces.
+ */
+int do_settimeofday(struct timespec *tv)
+{
+	unsigned long flags;
+	ktime_t newtime;
+
+	newtime = timespec_to_ktime(*tv);
+
+	write_seqlock_irqsave(&system_time_lock, flags);
+
+	/* Calculate the new offset to the monotonic clock */
+	wall_time_offset = ktime_sub(newtime, __get_monotonic_clock());
+	/* Update the internal timespec variables */
+	ktime_to_timespec(&wall_time_ts,
+				ktime_add(system_time, wall_time_offset));
+
+	ktime_to_timespec(&monotonic_time_offset_ts, wall_time_offset);
+
+	ntp_clear();
+	update_legacy_time_values();
+
+	write_sequnlock_irqrestore(&system_time_lock, flags);
+
+	/* inform ktimers about time change */
+	clock_was_set();
+
+	return 0;
+}
+
+EXPORT_SYMBOL(do_settimeofday);
+
+/**
+ * __increment_system_time - Increments system time
+ *
+ * @delta:	nanosecond delta to add to the time variables
+ *
+ * Private helper that increments system_time and related
+ * timekeeping variables.
+ */
+static inline void __increment_system_time(nsec_t delta)
+{
+	system_time = ktime_add_ns(system_time, delta);
+	wall_time_ts = timespec_add_ns(wall_time_ts, delta);
+	mono_time_ts = timespec_add_ns(mono_time_ts, delta);
+}
+
+/**
+ * timeofday_suspend_hook - allows the timeofday subsystem to be shutdown
+ *
+ * @dev:	unused
+ * @state:	unused
+ *
+ * This function allows the timeofday subsystem to be shutdown for a period
+ * of time. Called before going into suspend/hibernate mode.
+ */
+static int timeofday_suspend_hook(struct sys_device *dev, pm_message_t state)
+{
+	unsigned long flags;
+
+	write_seqlock_irqsave(&system_time_lock, flags);
+
+	BUG_ON(time_suspend_state != TIME_RUNNING);
+
+	/* First off, save suspend start time
+	 * then quickly accumulate the current nsec offset.
+	 * These two calls hopefully occur quickly
+	 * because the difference between reads will
+	 * accumulate as time drift on resume.
+	 */
+	suspend_start = read_persistent_clock();
+	__increment_system_time(__get_nsec_offset());
+
+	time_suspend_state = TIME_SUSPENDED;
+
+	write_sequnlock_irqrestore(&system_time_lock, flags);
+
+	return 0;
+}
+
+/**
+ * timeofday_resume_hook - Resumes the timeofday subsystem.
+ *
+ * @dev:	unused
+ *
+ * This function resumes the timeofday subsystem from a previous call
+ * to timeofday_suspend_hook.
+ */
+static int timeofday_resume_hook(struct sys_device *dev)
+{
+	nsec_t suspend_end, suspend_time;
+	unsigned long flags;
+
+	write_seqlock_irqsave(&system_time_lock, flags);
+
+	BUG_ON(time_suspend_state != TIME_SUSPENDED);
+
+	/* Read persistent clock to mark the end of
+	 * the suspend interval then rebase the
+	 * cycle_last to current clocksource value.
+	 * Again, time between these two calls will
+	 * not be accounted for and will show up as
+	 * time drift.
+	 */
+	suspend_end = read_persistent_clock();
+	cycle_last = read_clocksource(clock);
+
+	/* calculate suspend time and add it to system time */
+	suspend_time = suspend_end - suspend_start;
+	__increment_system_time(suspend_time);
+
+	ntp_clear();
+
+	time_suspend_state = TIME_RUNNING;
+
+	update_legacy_time_values();
+
+	write_sequnlock_irqrestore(&system_time_lock, flags);
+
+	/* inform ktimers about time change */
+	clock_was_set();
+
+	return 0;
+}
+
+/* sysfs resume/suspend bits */
+static struct sysdev_class timeofday_sysclass = {
+	.resume = timeofday_resume_hook,
+	.suspend = timeofday_suspend_hook,
+	set_kset_name("timeofday"),
+};
+
+static struct sys_device device_timer = {
+	.id	= 0,
+	.cls	= &timeofday_sysclass,
+};
+
+static int timeofday_init_device(void)
+{
+	int error = sysdev_class_register(&timeofday_sysclass);
+
+	if (!error)
+		error = sysdev_register(&device_timer);
+
+	return error;
+}
+
+device_initcall(timeofday_init_device);
+
+
+/**
+ * timeofday_periodic_hook - Does periodic update of timekeeping values.
+ *
+ * @unused:	unused value
+ *
+ * Calculates the delta since the last call, updates system time and
+ * clears the offset.
+ *
+ * Called via timeofday_timer.
+ */
+static void timeofday_periodic_hook(unsigned long unused)
+{
+	unsigned long flags;
+
+	cycle_t cycle_now, cycle_delta;
+	nsec_t delta_nsec;
+	static u64 remainder;
+
+	long leapsecond;
+	struct clocksource* next;
+
+	int ppm;
+	static int ppm_last;
+
+	int something_changed = 0, clocksource_changed = 0;
+	struct clocksource old_clock;
+	static nsec_t second_check;
+
+	write_seqlock_irqsave(&system_time_lock, flags);
+
+	/* read time source & calc time since last call*/
+	cycle_now = read_clocksource(clock);
+	cycle_delta = (cycle_now - cycle_last) & clock->mask;
+
+	delta_nsec = cyc2ns_fixed_rem(ts_interval, &cycle_delta, &remainder);
+	cycle_last = (cycle_now - cycle_delta)&clock->mask;
+
+	/* update system_time */
+	__increment_system_time(delta_nsec);
+
+	/* advance the ntp state machine by ns interval*/
+	ntp_advance(delta_nsec);
+
+	/* Only call ntp_leapsecond and ntp_sync once a sec */
+	second_check += delta_nsec;
+	if (second_check > NSEC_PER_SEC) {
+		/* do ntp leap second processing*/
+		leapsecond = ntp_leapsecond(wall_time_ts);
+		if (leapsecond) {
+			wall_time_offset = ktime_add_ns(wall_time_offset,
+				 		leapsecond * NSEC_PER_SEC);
+			wall_time_ts.tv_sec += leapsecond;
+			monotonic_time_offset_ts.tv_sec += leapsecond;
+		}
+		/* sync the persistent clock */
+		if (ntp_synced())
+			sync_persistent_clock(wall_time_ts);
+		second_check -= NSEC_PER_SEC;
+	}
+
+	/* if necessary, switch clocksources */
+	next = get_next_clocksource();
+	if (next != clock) {
+		/* immediately set new cycle_last */
+		cycle_last = read_clocksource(next);
+		/* update cycle_now to avoid problems in accumulation later */
+		cycle_now = cycle_last;
+		/* swap clocksources */
+		old_clock = *clock;
+		clock = next;
+		printk(KERN_INFO "Time: %s clocksource has been installed.\n",
+					clock->name);
+		ntp_clear();
+		ntp_adj = 0;
+		remainder = 0;
+		something_changed = 1;
+		clocksource_changed = 1;
+	}
+
+	/* now is a safe time, so allow clocksource to adjust
+	 * itself (for example: to make cpufreq changes).
+	 */
+	if (clock->update_callback) {
+		/* since clocksource state might change,
+		 * keep a copy, but only if we've not
+		 * already changed timesources
+		 */
+		if (!something_changed)
+			old_clock = *clock;
+		if (clock->update_callback()) {
+			remainder = 0;
+			something_changed = 1;
+		}
+	}
+
+	/* check for new PPM adjustment */
+	ppm = ntp_get_ppm_adjustment();
+	if (ppm_last != ppm) {
+		/* make sure old_clock is set */
+		if (!something_changed)
+			old_clock = *clock;
+		something_changed = 1;
+	}
+
+	/* if something changed, recalculate the ntp adjustment value */
+	if (something_changed) {
+		/* accumulate current leftover cycles using old_clock */
+		if (cycle_delta) {
+			delta_nsec = cyc2ns_rem(&old_clock, ntp_adj,
+						cycle_delta, &remainder);
+			cycle_last = cycle_now;
+			__increment_system_time(delta_nsec);
+			ntp_advance(delta_nsec);
+		}
+
+		/* recalculate the ntp adjustment and fixed interval values */
+		ppm_last = ppm;
+		ntp_adj = ppm_to_mult_adj(clock, ppm);
+		ts_interval = calculate_clocksource_interval(clock, ntp_adj,
+					INTERVAL_LEN);
+	}
+
+	update_legacy_time_values();
+
+	write_sequnlock_irqrestore(&system_time_lock, flags);
+
+	if (clocksource_changed)
+		ktimer_clock_notify();
+
+	/* Set us up to go off on the next interval */
+	mod_timer(&timeofday_timer,
+		jiffies + msecs_to_jiffies(PERIODIC_INTERVAL_MS));
+}
+
+/**
+ * timeofday_is_continuous - check to see if timekeeping is free running
+ *
+ */
+int timeofday_is_continuous(void)
+{
+	unsigned long seq;
+	int ret;
+	do {
+		seq = read_seqbegin(&system_time_lock);
+
+		ret = clock->is_continuous;
+
+	} while (read_seqretry(&system_time_lock, seq));
+
+	return ret;
+}
+
+/**
+ * timeofday_init - Initializes time variables
+ */
+void __init timeofday_init(void)
+{
+	unsigned long flags;
+
+	write_seqlock_irqsave(&system_time_lock, flags);
+
+	/* initialize the clock variable */
+	clock = get_next_clocksource();
+
+	/* initialize cycle_last offset base */
+	cycle_last = read_clocksource(clock);
+
+	/* initialize wall_time_offset to now*/
+	/* XXX - this should be something like ns_to_ktime() */
+	wall_time_offset = ktime_add_ns(wall_time_offset,
+					read_persistent_clock());
+
+	/* initialize timespec values */
+	ktime_to_timespec(&wall_time_ts,
+				ktime_add(system_time, wall_time_offset));
+	ktime_to_timespec(&monotonic_time_offset_ts, wall_time_offset);
+
+
+	/* clear NTP scaling factor & state machine */
+	ntp_adj = 0;
+	ntp_clear();
+	ts_interval = calculate_clocksource_interval(clock, ntp_adj,
+				INTERVAL_LEN);
+
+	/* initialize legacy time values */
+	update_legacy_time_values();
+
+	write_sequnlock_irqrestore(&system_time_lock, flags);
+
+	/* Install timeofday_periodic_hook timer */
+	init_timer(&timeofday_timer);
+	timeofday_timer.function = timeofday_periodic_hook;
+	timeofday_timer.expires = jiffies + 1;
+	add_timer(&timeofday_timer);
+}
Index: linux-2.6.14/arch/i386/kernel/Makefile
===================================================================
--- linux-2.6.14.orig/arch/i386/kernel/Makefile
+++ linux-2.6.14/arch/i386/kernel/Makefile
@@ -7,10 +7,9 @@ extra-y := head.o init_task.o vmlinux.ld
 obj-y	:= process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \
 		ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
 		pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
-		doublefault.o quirks.o i8237.o
+		doublefault.o quirks.o i8237.o i8253.o tsc.o
 
 obj-y				+= cpu/
-obj-y				+= timers/
 obj-$(CONFIG_ACPI)		+= acpi/
 obj-$(CONFIG_X86_BIOS_REBOOT)	+= reboot.o
 obj-$(CONFIG_MCA)		+= mca.o
Index: linux-2.6.14/arch/i386/kernel/i8253.c
===================================================================
--- /dev/null
+++ linux-2.6.14/arch/i386/kernel/i8253.c
@@ -0,0 +1,133 @@
+/*
+ * i8253.c  8253/PIT functions
+ *
+ */
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/spinlock.h>
+#include <linux/sysdev.h>
+#include <linux/clockchips.h>
+#include <linux/mca.h>
+
+#include <asm/delay.h>
+#include <asm/i8253.h>
+#include <asm/io.h>
+
+#include <asm/smp.h>
+#include <asm/io_apic.h>
+
+#include "io_ports.h"
+
+DEFINE_SPINLOCK(i8253_lock);
+EXPORT_SYMBOL(i8253_lock);
+
+static void init_pit_timer(int mode)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&i8253_lock, flags);
+
+	if (mode != CLOCK_EVT_ONESHOT) {
+		/* binary, mode 2, LSB/MSB, ch 0 */
+		outb_p(0x34, PIT_MODE);
+		udelay(10);
+		outb_p(LATCH & 0xff , PIT_CH0);	/* LSB */
+		outb(LATCH >> 8 , PIT_CH0);	/* MSB */
+	} else {
+		/* One shot setup */
+		outb_p(0x38, PIT_MODE);
+		udelay(10);
+	}
+
+	spin_unlock_irqrestore(&i8253_lock, flags);
+}
+
+static void pit_next_event(unsigned long evt)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&i8253_lock, flags);
+	outb_p(evt & 0xff , PIT_CH0);	/* LSB */
+	outb(evt >> 8 , PIT_CH0);	/* MSB */
+	spin_unlock_irqrestore(&i8253_lock, flags);
+}
+
+static struct clock_event pit_clockevent = {
+	.name = "pit",
+	.capabilities = CLOCK_CAP_TICK
+#ifndef CONFIG_SMP
+			| CLOCK_CAP_NEXTEVT | CLOCK_CAP_PROFILE |
+			CLOCK_CAP_UPDATE
+#endif
+	,
+	.set_mode = init_pit_timer,
+	.set_next_event = pit_next_event,
+	.start_event = io_apic_timer_ack,
+	.end_event = mca_timer_ack,
+	.shift = 32,
+	.irq = 0,
+};
+
+void setup_pit_timer(void)
+{
+	pit_clockevent.mult = div_sc32(CLOCK_TICK_RATE, NSEC_PER_SEC);
+	pit_clockevent.max_delta_ns =
+		clockevent_delta2ns(0x7FFF, &pit_clockevent);
+	pit_clockevent.min_delta_ns =
+		clockevent_delta2ns(0xF, &pit_clockevent);
+	setup_global_clockevent(&pit_clockevent, CPU_MASK_NONE);
+}
+
+/* Since the PIT overflows every tick, its not very useful
+ * to just read by itself. So use jiffies to emulate a free
+ * running counter.
+ */
+
+static cycle_t pit_read(void)
+{
+	unsigned long flags, seq;
+	int count;
+	u64 jifs;
+
+	do {
+		seq = read_seqbegin(&xtime_lock);
+
+		spin_lock_irqsave(&i8253_lock, flags);
+
+		outb_p(0x00, PIT_MODE);	/* latch the count ASAP */
+		count = inb_p(PIT_CH0);	/* read the latched count */
+		count |= inb_p(PIT_CH0) << 8;
+
+		/* VIA686a test code... reset the latch if count > max + 1 */
+		if (count > LATCH) {
+			outb_p(0x34, PIT_MODE);
+			outb_p(LATCH & 0xff, PIT_CH0);
+			outb(LATCH >> 8, PIT_CH0);
+			count = LATCH - 1;
+		}
+		spin_unlock_irqrestore(&i8253_lock, flags);
+		jifs = get_jiffies_64() - INITIAL_JIFFIES;
+	} while (read_seqretry(&xtime_lock, seq));
+
+	count = (LATCH-1) - count;
+
+	return (cycle_t)(jifs * LATCH) + count;
+}
+
+static struct clocksource clocksource_pit = {
+	.name = "pit",
+	.rating = 110,
+	.type = CLOCKSOURCE_FUNCTION,
+	.read_fnct = pit_read,
+	.mask = (cycle_t)-1,
+	.mult = 0,
+	.shift = 20,
+};
+
+static int __init init_pit_clocksource(void)
+{
+	clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20);
+	register_clocksource(&clocksource_pit);
+	return 0;
+}
+module_init(init_pit_clocksource);
Index: linux-2.6.14/arch/i386/kernel/timers/timer_pit.c
===================================================================
--- linux-2.6.14.orig/arch/i386/kernel/timers/timer_pit.c
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * This code largely moved from arch/i386/kernel/time.c.
- * See comments there for proper credits.
- */
-
-#include <linux/spinlock.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/sysdev.h>
-#include <linux/timex.h>
-#include <asm/delay.h>
-#include <asm/mpspec.h>
-#include <asm/timer.h>
-#include <asm/smp.h>
-#include <asm/io.h>
-#include <asm/arch_hooks.h>
-#include <asm/i8253.h>
-
-#include "do_timer.h"
-#include "io_ports.h"
-
-static int count_p; /* counter in get_offset_pit() */
-
-static int __init init_pit(char* override)
-{
- 	/* check clock override */
- 	if (override[0] && strncmp(override,"pit",3))
- 		printk(KERN_ERR "Warning: clock= override failed. Defaulting to PIT\n");
- 
-	count_p = LATCH;
-	return 0;
-}
-
-static void mark_offset_pit(void)
-{
-	/* nothing needed */
-}
-
-static unsigned long long monotonic_clock_pit(void)
-{
-	return 0;
-}
-
-static void delay_pit(unsigned long loops)
-{
-	int d0;
-	__asm__ __volatile__(
-		"\tjmp 1f\n"
-		".align 16\n"
-		"1:\tjmp 2f\n"
-		".align 16\n"
-		"2:\tdecl %0\n\tjns 2b"
-		:"=&a" (d0)
-		:"0" (loops));
-}
-
-
-/* This function must be called with xtime_lock held.
- * It was inspired by Steve McCanne's microtime-i386 for BSD.  -- jrs
- * 
- * However, the pc-audio speaker driver changes the divisor so that
- * it gets interrupted rather more often - it loads 64 into the
- * counter rather than 11932! This has an adverse impact on
- * do_gettimeoffset() -- it stops working! What is also not
- * good is that the interval that our timer function gets called
- * is no longer 10.0002 ms, but 9.9767 ms. To get around this
- * would require using a different timing source. Maybe someone
- * could use the RTC - I know that this can interrupt at frequencies
- * ranging from 8192Hz to 2Hz. If I had the energy, I'd somehow fix
- * it so that at startup, the timer code in sched.c would select
- * using either the RTC or the 8253 timer. The decision would be
- * based on whether there was any other device around that needed
- * to trample on the 8253. I'd set up the RTC to interrupt at 1024 Hz,
- * and then do some jiggery to have a version of do_timer that 
- * advanced the clock by 1/1024 s. Every time that reached over 1/100
- * of a second, then do all the old code. If the time was kept correct
- * then do_gettimeoffset could just return 0 - there is no low order
- * divider that can be accessed.
- *
- * Ideally, you would be able to use the RTC for the speaker driver,
- * but it appears that the speaker driver really needs interrupt more
- * often than every 120 us or so.
- *
- * Anyway, this needs more thought....		pjsg (1993-08-28)
- * 
- * If you are really that interested, you should be reading
- * comp.protocols.time.ntp!
- */
-
-static unsigned long get_offset_pit(void)
-{
-	int count;
-	unsigned long flags;
-	static unsigned long jiffies_p = 0;
-
-	/*
-	 * cache volatile jiffies temporarily; we have xtime_lock. 
-	 */
-	unsigned long jiffies_t;
-
-	spin_lock_irqsave(&i8253_lock, flags);
-	/* timer count may underflow right here */
-	outb_p(0x00, PIT_MODE);	/* latch the count ASAP */
-
-	count = inb_p(PIT_CH0);	/* read the latched count */
-
-	/*
-	 * We do this guaranteed double memory access instead of a _p 
-	 * postfix in the previous port access. Wheee, hackady hack
-	 */
- 	jiffies_t = jiffies;
-
-	count |= inb_p(PIT_CH0) << 8;
-	
-        /* VIA686a test code... reset the latch if count > max + 1 */
-        if (count > LATCH) {
-                outb_p(0x34, PIT_MODE);
-                outb_p(LATCH & 0xff, PIT_CH0);
-                outb(LATCH >> 8, PIT_CH0);
-                count = LATCH - 1;
-        }
-	
-	/*
-	 * avoiding timer inconsistencies (they are rare, but they happen)...
-	 * there are two kinds of problems that must be avoided here:
-	 *  1. the timer counter underflows
-	 *  2. hardware problem with the timer, not giving us continuous time,
-	 *     the counter does small "jumps" upwards on some Pentium systems,
-	 *     (see c't 95/10 page 335 for Neptun bug.)
-	 */
-
-	if( jiffies_t == jiffies_p ) {
-		if( count > count_p ) {
-			/* the nutcase */
-			count = do_timer_overflow(count);
-		}
-	} else
-		jiffies_p = jiffies_t;
-
-	count_p = count;
-
-	spin_unlock_irqrestore(&i8253_lock, flags);
-
-	count = ((LATCH-1) - count) * TICK_SIZE;
-	count = (count + LATCH/2) / LATCH;
-
-	return count;
-}
-
-
-/* tsc timer_opts struct */
-struct timer_opts timer_pit = {
-	.name = "pit",
-	.mark_offset = mark_offset_pit, 
-	.get_offset = get_offset_pit,
-	.monotonic_clock = monotonic_clock_pit,
-	.delay = delay_pit,
-};
-
-struct init_timer_opts __initdata timer_pit_init = {
-	.init = init_pit, 
-	.opts = &timer_pit,
-};
-
-void setup_pit_timer(void)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&i8253_lock, flags);
-	outb_p(0x34,PIT_MODE);		/* binary, mode 2, LSB/MSB, ch 0 */
-	udelay(10);
-	outb_p(LATCH & 0xff , PIT_CH0);	/* LSB */
-	udelay(10);
-	outb(LATCH >> 8 , PIT_CH0);	/* MSB */
-	spin_unlock_irqrestore(&i8253_lock, flags);
-}
Index: linux-2.6.14/arch/i386/kernel/timers/common.c
===================================================================
--- linux-2.6.14.orig/arch/i386/kernel/timers/common.c
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- *	Common functions used across the timers go here
- */
-
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/errno.h>
-#include <linux/jiffies.h>
-#include <linux/module.h>
-
-#include <asm/io.h>
-#include <asm/timer.h>
-#include <asm/hpet.h>
-
-#include "mach_timer.h"
-
-/* ------ Calibrate the TSC -------
- * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset().
- * Too much 64-bit arithmetic here to do this cleanly in C, and for
- * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2)
- * output busy loop as low as possible. We avoid reading the CTC registers
- * directly because of the awkward 8-bit access mechanism of the 82C54
- * device.
- */
-
-#define CALIBRATE_TIME	(5 * 1000020/HZ)
-
-unsigned long calibrate_tsc(void)
-{
-	mach_prepare_counter();
-
-	{
-		unsigned long startlow, starthigh;
-		unsigned long endlow, endhigh;
-		unsigned long count;
-
-		rdtsc(startlow,starthigh);
-		mach_countup(&count);
-		rdtsc(endlow,endhigh);
-
-
-		/* Error: ECTCNEVERSET */
-		if (count <= 1)
-			goto bad_ctc;
-
-		/* 64-bit subtract - gcc just messes up with long longs */
-		__asm__("subl %2,%0\n\t"
-			"sbbl %3,%1"
-			:"=a" (endlow), "=d" (endhigh)
-			:"g" (startlow), "g" (starthigh),
-			 "0" (endlow), "1" (endhigh));
-
-		/* Error: ECPUTOOFAST */
-		if (endhigh)
-			goto bad_ctc;
-
-		/* Error: ECPUTOOSLOW */
-		if (endlow <= CALIBRATE_TIME)
-			goto bad_ctc;
-
-		__asm__("divl %2"
-			:"=a" (endlow), "=d" (endhigh)
-			:"r" (endlow), "0" (0), "1" (CALIBRATE_TIME));
-
-		return endlow;
-	}
-
-	/*
-	 * The CTC wasn't reliable: we got a hit on the very first read,
-	 * or the CPU was so fast/slow that the quotient wouldn't fit in
-	 * 32 bits..
-	 */
-bad_ctc:
-	return 0;
-}
-
-#ifdef CONFIG_HPET_TIMER
-/* ------ Calibrate the TSC using HPET -------
- * Return 2^32 * (1 / (TSC clocks per usec)) for getting the CPU freq.
- * Second output is parameter 1 (when non NULL)
- * Set 2^32 * (1 / (tsc per HPET clk)) for delay_hpet().
- * calibrate_tsc() calibrates the processor TSC by comparing
- * it to the HPET timer of known frequency.
- * Too much 64-bit arithmetic here to do this cleanly in C
- */
-#define CALIBRATE_CNT_HPET 	(5 * hpet_tick)
-#define CALIBRATE_TIME_HPET 	(5 * KERNEL_TICK_USEC)
-
-unsigned long __devinit calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr)
-{
-	unsigned long tsc_startlow, tsc_starthigh;
-	unsigned long tsc_endlow, tsc_endhigh;
-	unsigned long hpet_start, hpet_end;
-	unsigned long result, remain;
-
-	hpet_start = hpet_readl(HPET_COUNTER);
-	rdtsc(tsc_startlow, tsc_starthigh);
-	do {
-		hpet_end = hpet_readl(HPET_COUNTER);
-	} while ((hpet_end - hpet_start) < CALIBRATE_CNT_HPET);
-	rdtsc(tsc_endlow, tsc_endhigh);
-
-	/* 64-bit subtract - gcc just messes up with long longs */
-	__asm__("subl %2,%0\n\t"
-		"sbbl %3,%1"
-		:"=a" (tsc_endlow), "=d" (tsc_endhigh)
-		:"g" (tsc_startlow), "g" (tsc_starthigh),
-		 "0" (tsc_endlow), "1" (tsc_endhigh));
-
-	/* Error: ECPUTOOFAST */
-	if (tsc_endhigh)
-		goto bad_calibration;
-
-	/* Error: ECPUTOOSLOW */
-	if (tsc_endlow <= CALIBRATE_TIME_HPET)
-		goto bad_calibration;
-
-	ASM_DIV64_REG(result, remain, tsc_endlow, 0, CALIBRATE_TIME_HPET);
-	if (remain > (tsc_endlow >> 1))
-		result++; /* rounding the result */
-
-	if (tsc_hpet_quotient_ptr) {
-		unsigned long tsc_hpet_quotient;
-
-		ASM_DIV64_REG(tsc_hpet_quotient, remain, tsc_endlow, 0,
-			CALIBRATE_CNT_HPET);
-		if (remain > (tsc_endlow >> 1))
-			tsc_hpet_quotient++; /* rounding the result */
-		*tsc_hpet_quotient_ptr = tsc_hpet_quotient;
-	}
-
-	return result;
-bad_calibration:
-	/*
-	 * the CPU was so fast/slow that the quotient wouldn't fit in
-	 * 32 bits..
-	 */
-	return 0;
-}
-#endif
-
-
-unsigned long read_timer_tsc(void)
-{
-	unsigned long retval;
-	rdtscl(retval);
-	return retval;
-}
-
-
-/* calculate cpu_khz */
-void init_cpu_khz(void)
-{
-	if (cpu_has_tsc) {
-		unsigned long tsc_quotient = calibrate_tsc();
-		if (tsc_quotient) {
-			/* report CPU clock rate in Hz.
-			 * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
-			 * clock/second. Our precision is about 100 ppm.
-			 */
-			{	unsigned long eax=0, edx=1000;
-				__asm__("divl %2"
-		       		:"=a" (cpu_khz), "=d" (edx)
-        	       		:"r" (tsc_quotient),
-	                	"0" (eax), "1" (edx));
-				printk("Detected %u.%03u MHz processor.\n",
-					cpu_khz / 1000, cpu_khz % 1000);
-			}
-		}
-	}
-}
-
Index: linux-2.6.14/arch/i386/kernel/timers/timer_tsc.c
===================================================================
--- linux-2.6.14.orig/arch/i386/kernel/timers/timer_tsc.c
+++ /dev/null
@@ -1,595 +0,0 @@
-/*
- * This code largely moved from arch/i386/kernel/time.c.
- * See comments there for proper credits.
- *
- * 2004-06-25    Jesper Juhl
- *      moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4
- *      failing to inline.
- */
-
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/errno.h>
-#include <linux/cpufreq.h>
-#include <linux/string.h>
-#include <linux/jiffies.h>
-
-#include <asm/timer.h>
-#include <asm/io.h>
-/* processor.h for distable_tsc flag */
-#include <asm/processor.h>
-
-#include "io_ports.h"
-#include "mach_timer.h"
-
-#include <asm/hpet.h>
-#include <asm/i8253.h>
-
-#ifdef CONFIG_HPET_TIMER
-static unsigned long hpet_usec_quotient;
-static unsigned long hpet_last;
-static struct timer_opts timer_tsc;
-#endif
-
-static inline void cpufreq_delayed_get(void);
-
-int tsc_disable __devinitdata = 0;
-
-static int use_tsc;
-/* Number of usecs that the last interrupt was delayed */
-static int delay_at_last_interrupt;
-
-static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
-static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
-static unsigned long long monotonic_base;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-/* convert from cycles(64bits) => nanoseconds (64bits)
- *  basic equation:
- *		ns = cycles / (freq / ns_per_sec)
- *		ns = cycles * (ns_per_sec / freq)
- *		ns = cycles * (10^9 / (cpu_mhz * 10^6))
- *		ns = cycles * (10^3 / cpu_mhz)
- *
- *	Then we use scaling math (suggested by george@mvista.com) to get:
- *		ns = cycles * (10^3 * SC / cpu_mhz) / SC
- *		ns = cycles * cyc2ns_scale / SC
- *
- *	And since SC is a constant power of two, we can convert the div
- *  into a shift.   
- *			-johnstul@us.ibm.com "math is hard, lets go shopping!"
- */
-static unsigned long cyc2ns_scale; 
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-
-static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
-{
-	cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
-}
-
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
-	return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
-}
-
-static int count2; /* counter for mark_offset_tsc() */
-
-/* Cached *multiplier* to convert TSC counts to microseconds.
- * (see the equation below).
- * Equal to 2^32 * (1 / (clocks per usec) ).
- * Initialized in time_init.
- */
-static unsigned long fast_gettimeoffset_quotient;
-
-static unsigned long get_offset_tsc(void)
-{
-	register unsigned long eax, edx;
-
-	/* Read the Time Stamp Counter */
-
-	rdtsc(eax,edx);
-
-	/* .. relative to previous jiffy (32 bits is enough) */
-	eax -= last_tsc_low;	/* tsc_low delta */
-
-	/*
-         * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
-         *             = (tsc_low delta) * (usecs_per_clock)
-         *             = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
-	 *
-	 * Using a mull instead of a divl saves up to 31 clock cycles
-	 * in the critical path.
-         */
-
-	__asm__("mull %2"
-		:"=a" (eax), "=d" (edx)
-		:"rm" (fast_gettimeoffset_quotient),
-		 "0" (eax));
-
-	/* our adjusted time offset in microseconds */
-	return delay_at_last_interrupt + edx;
-}
-
-static unsigned long long monotonic_clock_tsc(void)
-{
-	unsigned long long last_offset, this_offset, base;
-	unsigned seq;
-	
-	/* atomically read monotonic base & last_offset */
-	do {
-		seq = read_seqbegin(&monotonic_lock);
-		last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
-		base = monotonic_base;
-	} while (read_seqretry(&monotonic_lock, seq));
-
-	/* Read the Time Stamp Counter */
-	rdtscll(this_offset);
-
-	/* return the value in ns */
-	return base + cycles_2_ns(this_offset - last_offset);
-}
-
-/*
- * Scheduler clock - returns current time in nanosec units.
- */
-unsigned long long sched_clock(void)
-{
-	unsigned long long this_offset;
-
-	/*
-	 * In the NUMA case we dont use the TSC as they are not
-	 * synchronized across all CPUs.
-	 */
-#ifndef CONFIG_NUMA
-	if (!use_tsc)
-#endif
-		/* no locking but a rare wrong value is not a big deal */
-		return jiffies_64 * (1000000000 / HZ);
-
-	/* Read the Time Stamp Counter */
-	rdtscll(this_offset);
-
-	/* return the value in ns */
-	return cycles_2_ns(this_offset);
-}
-
-static void delay_tsc(unsigned long loops)
-{
-	unsigned long bclock, now;
-	
-	rdtscl(bclock);
-	do
-	{
-		rep_nop();
-		rdtscl(now);
-	} while ((now-bclock) < loops);
-}
-
-#ifdef CONFIG_HPET_TIMER
-static void mark_offset_tsc_hpet(void)
-{
-	unsigned long long this_offset, last_offset;
- 	unsigned long offset, temp, hpet_current;
-
-	write_seqlock(&monotonic_lock);
-	last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
-	/*
-	 * It is important that these two operations happen almost at
-	 * the same time. We do the RDTSC stuff first, since it's
-	 * faster. To avoid any inconsistencies, we need interrupts
-	 * disabled locally.
-	 */
-	/*
-	 * Interrupts are just disabled locally since the timer irq
-	 * has the SA_INTERRUPT flag set. -arca
-	 */
-	/* read Pentium cycle counter */
-
-	hpet_current = hpet_readl(HPET_COUNTER);
-	rdtsc(last_tsc_low, last_tsc_high);
-
-	/* lost tick compensation */
-	offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
-	if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) {
-		int lost_ticks = (offset - hpet_last) / hpet_tick;
-		jiffies_64 += lost_ticks;
-	}
-	hpet_last = hpet_current;
-
-	/* update the monotonic base value */
-	this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
-	monotonic_base += cycles_2_ns(this_offset - last_offset);
-	write_sequnlock(&monotonic_lock);
-
-	/* calculate delay_at_last_interrupt */
-	/*
-	 * Time offset = (hpet delta) * ( usecs per HPET clock )
-	 *             = (hpet delta) * ( usecs per tick / HPET clocks per tick)
-	 *             = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
-	 * Where,
-	 * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
-	 */
-	delay_at_last_interrupt = hpet_current - offset;
-	ASM_MUL64_REG(temp, delay_at_last_interrupt,
-			hpet_usec_quotient, delay_at_last_interrupt);
-}
-#endif
-
-
-#ifdef CONFIG_CPU_FREQ
-#include <linux/workqueue.h>
-
-static unsigned int cpufreq_delayed_issched = 0;
-static unsigned int cpufreq_init = 0;
-static struct work_struct cpufreq_delayed_get_work;
-
-static void handle_cpufreq_delayed_get(void *v)
-{
-	unsigned int cpu;
-	for_each_online_cpu(cpu) {
-		cpufreq_get(cpu);
-	}
-	cpufreq_delayed_issched = 0;
-}
-
-/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
- * to verify the CPU frequency the timing core thinks the CPU is running
- * at is still correct.
- */
-static inline void cpufreq_delayed_get(void) 
-{
-	if (cpufreq_init && !cpufreq_delayed_issched) {
-		cpufreq_delayed_issched = 1;
-		printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
-		schedule_work(&cpufreq_delayed_get_work);
-	}
-}
-
-/* If the CPU frequency is scaled, TSC-based delays will need a different
- * loops_per_jiffy value to function properly.
- */
-
-static unsigned int  ref_freq = 0;
-static unsigned long loops_per_jiffy_ref = 0;
-
-#ifndef CONFIG_SMP
-static unsigned long fast_gettimeoffset_ref = 0;
-static unsigned int cpu_khz_ref = 0;
-#endif
-
-static int
-time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
-		       void *data)
-{
-	struct cpufreq_freqs *freq = data;
-
-	if (val != CPUFREQ_RESUMECHANGE)
-		write_seqlock_irq(&xtime_lock);
-	if (!ref_freq) {
-		ref_freq = freq->old;
-		loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
-#ifndef CONFIG_SMP
-		fast_gettimeoffset_ref = fast_gettimeoffset_quotient;
-		cpu_khz_ref = cpu_khz;
-#endif
-	}
-
-	if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
-	    (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
-	    (val == CPUFREQ_RESUMECHANGE)) {
-		if (!(freq->flags & CPUFREQ_CONST_LOOPS))
-			cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
-#ifndef CONFIG_SMP
-		if (cpu_khz)
-			cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
-		if (use_tsc) {
-			if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
-				fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
-				set_cyc2ns_scale(cpu_khz/1000);
-			}
-		}
-#endif
-	}
-
-	if (val != CPUFREQ_RESUMECHANGE)
-		write_sequnlock_irq(&xtime_lock);
-
-	return 0;
-}
-
-static struct notifier_block time_cpufreq_notifier_block = {
-	.notifier_call	= time_cpufreq_notifier
-};
-
-
-static int __init cpufreq_tsc(void)
-{
-	int ret;
-	INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
-	ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
-					CPUFREQ_TRANSITION_NOTIFIER);
-	if (!ret)
-		cpufreq_init = 1;
-	return ret;
-}
-core_initcall(cpufreq_tsc);
-
-#else /* CONFIG_CPU_FREQ */
-static inline void cpufreq_delayed_get(void) { return; }
-#endif 
-
-int recalibrate_cpu_khz(void)
-{
-#ifndef CONFIG_SMP
-	unsigned int cpu_khz_old = cpu_khz;
-
-	if (cpu_has_tsc) {
-		init_cpu_khz();
-		cpu_data[0].loops_per_jiffy =
-		    cpufreq_scale(cpu_data[0].loops_per_jiffy,
-			          cpu_khz_old,
-				  cpu_khz);
-		return 0;
-	} else
-		return -ENODEV;
-#else
-	return -ENODEV;
-#endif
-}
-EXPORT_SYMBOL(recalibrate_cpu_khz);
-
-static void mark_offset_tsc(void)
-{
-	unsigned long lost,delay;
-	unsigned long delta = last_tsc_low;
-	int count;
-	int countmp;
-	static int count1 = 0;
-	unsigned long long this_offset, last_offset;
-	static int lost_count = 0;
-
-	write_seqlock(&monotonic_lock);
-	last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
-	/*
-	 * It is important that these two operations happen almost at
-	 * the same time. We do the RDTSC stuff first, since it's
-	 * faster. To avoid any inconsistencies, we need interrupts
-	 * disabled locally.
-	 */
-
-	/*
-	 * Interrupts are just disabled locally since the timer irq
-	 * has the SA_INTERRUPT flag set. -arca
-	 */
-
-	/* read Pentium cycle counter */
-
-	rdtsc(last_tsc_low, last_tsc_high);
-
-	spin_lock(&i8253_lock);
-	outb_p(0x00, PIT_MODE);     /* latch the count ASAP */
-
-	count = inb_p(PIT_CH0);    /* read the latched count */
-	count |= inb(PIT_CH0) << 8;
-
-	/*
-	 * VIA686a test code... reset the latch if count > max + 1
-	 * from timer_pit.c - cjb
-	 */
-	if (count > LATCH) {
-		outb_p(0x34, PIT_MODE);
-		outb_p(LATCH & 0xff, PIT_CH0);
-		outb(LATCH >> 8, PIT_CH0);
-		count = LATCH - 1;
-	}
-
-	spin_unlock(&i8253_lock);
-
-	if (pit_latch_buggy) {
-		/* get center value of last 3 time lutch */
-		if ((count2 >= count && count >= count1)
-		    || (count1 >= count && count >= count2)) {
-			count2 = count1; count1 = count;
-		} else if ((count1 >= count2 && count2 >= count)
-			   || (count >= count2 && count2 >= count1)) {
-			countmp = count;count = count2;
-			count2 = count1;count1 = countmp;
-		} else {
-			count2 = count1; count1 = count; count = count1;
-		}
-	}
-
-	/* lost tick compensation */
-	delta = last_tsc_low - delta;
-	{
-		register unsigned long eax, edx;
-		eax = delta;
-		__asm__("mull %2"
-		:"=a" (eax), "=d" (edx)
-		:"rm" (fast_gettimeoffset_quotient),
-		 "0" (eax));
-		delta = edx;
-	}
-	delta += delay_at_last_interrupt;
-	lost = delta/(1000000/HZ);
-	delay = delta%(1000000/HZ);
-	if (lost >= 2) {
-		jiffies_64 += lost-1;
-
-		/* sanity check to ensure we're not always losing ticks */
-		if (lost_count++ > 100) {
-			printk(KERN_WARNING "Losing too many ticks!\n");
-			printk(KERN_WARNING "TSC cannot be used as a timesource.  \n");
-			printk(KERN_WARNING "Possible reasons for this are:\n");
-			printk(KERN_WARNING "  You're running with Speedstep,\n");
-			printk(KERN_WARNING "  You don't have DMA enabled for your hard disk (see hdparm),\n");
-			printk(KERN_WARNING "  Incorrect TSC synchronization on an SMP system (see dmesg).\n");
-			printk(KERN_WARNING "Falling back to a sane timesource now.\n");
-
-			clock_fallback();
-		}
-		/* ... but give the TSC a fair chance */
-		if (lost_count > 25)
-			cpufreq_delayed_get();
-	} else
-		lost_count = 0;
-	/* update the monotonic base value */
-	this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
-	monotonic_base += cycles_2_ns(this_offset - last_offset);
-	write_sequnlock(&monotonic_lock);
-
-	/* calculate delay_at_last_interrupt */
-	count = ((LATCH-1) - count) * TICK_SIZE;
-	delay_at_last_interrupt = (count + LATCH/2) / LATCH;
-
-	/* catch corner case where tick rollover occured
-	 * between tsc and pit reads (as noted when
-	 * usec delta is > 90% # of usecs/tick)
-	 */
-	if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
-		jiffies_64++;
-}
-
-static int __init init_tsc(char* override)
-{
-
-	/* check clock override */
-	if (override[0] && strncmp(override,"tsc",3)) {
-#ifdef CONFIG_HPET_TIMER
-		if (is_hpet_enabled()) {
-			printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n");
-		} else
-#endif
-		{
-			return -ENODEV;
-		}
-	}
-
-	/*
-	 * If we have APM enabled or the CPU clock speed is variable
-	 * (CPU stops clock on HLT or slows clock to save power)
-	 * then the TSC timestamps may diverge by up to 1 jiffy from
-	 * 'real time' but nothing will break.
-	 * The most frequent case is that the CPU is "woken" from a halt
-	 * state by the timer interrupt itself, so we get 0 error. In the
-	 * rare cases where a driver would "wake" the CPU and request a
-	 * timestamp, the maximum error is < 1 jiffy. But timestamps are
-	 * still perfectly ordered.
-	 * Note that the TSC counter will be reset if APM suspends
-	 * to disk; this won't break the kernel, though, 'cuz we're
-	 * smart.  See arch/i386/kernel/apm.c.
-	 */
- 	/*
- 	 *	Firstly we have to do a CPU check for chips with
- 	 * 	a potentially buggy TSC. At this point we haven't run
- 	 *	the ident/bugs checks so we must run this hook as it
- 	 *	may turn off the TSC flag.
- 	 *
- 	 *	NOTE: this doesn't yet handle SMP 486 machines where only
- 	 *	some CPU's have a TSC. Thats never worked and nobody has
- 	 *	moaned if you have the only one in the world - you fix it!
- 	 */
-
-	count2 = LATCH; /* initialize counter for mark_offset_tsc() */
-
-	if (cpu_has_tsc) {
-		unsigned long tsc_quotient;
-#ifdef CONFIG_HPET_TIMER
-		if (is_hpet_enabled() && hpet_use_timer) {
-			unsigned long result, remain;
-			printk("Using TSC for gettimeofday\n");
-			tsc_quotient = calibrate_tsc_hpet(NULL);
-			timer_tsc.mark_offset = &mark_offset_tsc_hpet;
-			/*
-			 * Math to calculate hpet to usec multiplier
-			 * Look for the comments at get_offset_tsc_hpet()
-			 */
-			ASM_DIV64_REG(result, remain, hpet_tick,
-					0, KERNEL_TICK_USEC);
-			if (remain > (hpet_tick >> 1))
-				result++; /* rounding the result */
-
-			hpet_usec_quotient = result;
-		} else
-#endif
-		{
-			tsc_quotient = calibrate_tsc();
-		}
-
-		if (tsc_quotient) {
-			fast_gettimeoffset_quotient = tsc_quotient;
-			use_tsc = 1;
-			/*
-			 *	We could be more selective here I suspect
-			 *	and just enable this for the next intel chips ?
-			 */
-			/* report CPU clock rate in Hz.
-			 * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
-			 * clock/second. Our precision is about 100 ppm.
-			 */
-			{	unsigned long eax=0, edx=1000;
-				__asm__("divl %2"
-		       		:"=a" (cpu_khz), "=d" (edx)
-        	       		:"r" (tsc_quotient),
-	                	"0" (eax), "1" (edx));
-				printk("Detected %u.%03u MHz processor.\n",
-					cpu_khz / 1000, cpu_khz % 1000);
-			}
-			set_cyc2ns_scale(cpu_khz/1000);
-			return 0;
-		}
-	}
-	return -ENODEV;
-}
-
-static int tsc_resume(void)
-{
-	write_seqlock(&monotonic_lock);
-	/* Assume this is the last mark offset time */
-	rdtsc(last_tsc_low, last_tsc_high);
-#ifdef CONFIG_HPET_TIMER
-	if (is_hpet_enabled() && hpet_use_timer)
-		hpet_last = hpet_readl(HPET_COUNTER);
-#endif
-	write_sequnlock(&monotonic_lock);
-	return 0;
-}
-
-#ifndef CONFIG_X86_TSC
-/* disable flag for tsc.  Takes effect by clearing the TSC cpu flag
- * in cpu/common.c */
-static int __init tsc_setup(char *str)
-{
-	tsc_disable = 1;
-	return 1;
-}
-#else
-static int __init tsc_setup(char *str)
-{
-	printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
-				"cannot disable TSC.\n");
-	return 1;
-}
-#endif
-__setup("notsc", tsc_setup);
-
-
-
-/************************************************************/
-
-/* tsc timer_opts struct */
-static struct timer_opts timer_tsc = {
-	.name = "tsc",
-	.mark_offset = mark_offset_tsc, 
-	.get_offset = get_offset_tsc,
-	.monotonic_clock = monotonic_clock_tsc,
-	.delay = delay_tsc,
-	.read_timer = read_timer_tsc,
-	.resume	= tsc_resume,
-};
-
-struct init_timer_opts __initdata timer_tsc_init = {
-	.init = init_tsc,
-	.opts = &timer_tsc,
-};
Index: linux-2.6.14/arch/i386/kernel/tsc.c
===================================================================
--- /dev/null
+++ linux-2.6.14/arch/i386/kernel/tsc.c
@@ -0,0 +1,379 @@
+/*
+ * This code largely moved from arch/i386/kernel/timer/timer_tsc.c
+ * which was originally moved from arch/i386/kernel/time.c.
+ * See comments there for proper credits.
+ */
+
+#include <linux/init.h>
+#include <linux/cpufreq.h>
+#include <asm/tsc.h>
+#include <asm/io.h>
+#include "mach_timer.h"
+
+/* On some systems the TSC frequency does not
+ * change with the cpu frequency. So we need
+ * an extra value to store the TSC freq
+ */
+unsigned int tsc_khz;
+
+int tsc_disable __initdata = 0;
+#ifndef CONFIG_X86_TSC
+/* disable flag for tsc.  Takes effect by clearing the TSC cpu flag
+ * in cpu/common.c */
+static int __init tsc_setup(char *str)
+{
+	tsc_disable = 1;
+	return 1;
+}
+#else
+static int __init tsc_setup(char *str)
+{
+	printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
+				"cannot disable TSC.\n");
+	return 1;
+}
+#endif
+__setup("notsc", tsc_setup);
+
+
+int read_current_timer(unsigned long *timer_val)
+{
+	if (!tsc_disable && cpu_khz) {
+		rdtscl(*timer_val);
+		return 0;
+	}
+	return -1;
+}
+
+/* Code to mark and check if the TSC is unstable
+ * due to cpufreq or due to unsynced TSCs
+ */
+static int tsc_unstable;
+static inline int check_tsc_unstable(void)
+{
+	return tsc_unstable;
+}
+
+void mark_tsc_unstable(void)
+{
+	tsc_unstable = 1;
+}
+
+/* Code to compensate for C3 stalls */
+static u64 tsc_c3_offset;
+void tsc_c3_compensate(unsigned long nsecs)
+{
+	/* this could def be optimized */
+	u64 cycles = ((u64)nsecs * tsc_khz);
+	do_div(cycles, 1000000);
+	tsc_c3_offset += cycles;
+}
+
+EXPORT_SYMBOL_GPL(tsc_c3_compensate);
+
+static inline u64 tsc_read_c3_time(void)
+{
+	return tsc_c3_offset;
+}
+
+/* Accellerators for sched_clock()
+ * convert from cycles(64bits) => nanoseconds (64bits)
+ *  basic equation:
+ *		ns = cycles / (freq / ns_per_sec)
+ *		ns = cycles * (ns_per_sec / freq)
+ *		ns = cycles * (10^9 / (cpu_mhz * 10^6))
+ *		ns = cycles * (10^3 / cpu_mhz)
+ *
+ *	Then we use scaling math (suggested by george@mvista.com) to get:
+ *		ns = cycles * (10^3 * SC / cpu_mhz) / SC
+ *		ns = cycles * cyc2ns_scale / SC
+ *
+ *	And since SC is a constant power of two, we can convert the div
+ *  into a shift.
+ *			-johnstul@us.ibm.com "math is hard, lets go shopping!"
+ */
+static unsigned long cyc2ns_scale;
+#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
+
+static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
+{
+	cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
+}
+
+static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+{
+	return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
+}
+
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ */
+unsigned long long sched_clock(void)
+{
+	unsigned long long this_offset;
+
+	/*
+	 * In the NUMA case we dont use the TSC as they are not
+	 * synchronized across all CPUs.
+	 */
+#ifndef CONFIG_NUMA
+	if (!cpu_khz || check_tsc_unstable())
+#endif
+		/* no locking but a rare wrong value is not a big deal */
+		return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
+
+	/* Read the Time Stamp Counter */
+	rdtscll(this_offset);
+	this_offset += tsc_read_c3_time();
+
+	/* return the value in ns */
+	return cycles_2_ns(this_offset);
+}
+
+
+static unsigned long calculate_cpu_khz(void)
+{
+	unsigned long long start, end;
+	unsigned long count;
+	u64 delta64;
+	int i;
+	/* run 3 times to ensure the cache is warm */
+	for(i=0; i<3; i++) {
+		mach_prepare_counter();
+		rdtscll(start);
+		mach_countup(&count);
+		rdtscll(end);
+	}
+	/* Error: ECTCNEVERSET
+	 * The CTC wasn't reliable: we got a hit on the very first read,
+	 * or the CPU was so fast/slow that the quotient wouldn't fit in
+	 * 32 bits..
+	 */
+	if (count <= 1)
+		return 0;
+
+	delta64 = end - start;
+
+	/* cpu freq too fast */
+	if(delta64 > (1ULL<<32))
+		return 0;
+	/* cpu freq too slow */
+	if (delta64 <= CALIBRATE_TIME_MSEC)
+		return 0;
+
+	delta64 += CALIBRATE_TIME_MSEC/2; /* round for do_div */
+	do_div(delta64,CALIBRATE_TIME_MSEC);
+
+	return (unsigned long)delta64;
+}
+
+int recalibrate_cpu_khz(void)
+{
+#ifndef CONFIG_SMP
+	unsigned long cpu_khz_old = cpu_khz;
+
+	if (cpu_has_tsc) {
+		cpu_khz = calculate_cpu_khz();
+		tsc_khz = cpu_khz;
+		cpu_data[0].loops_per_jiffy =
+			cpufreq_scale(cpu_data[0].loops_per_jiffy,
+					cpu_khz_old, cpu_khz);
+		return 0;
+	} else
+		return -ENODEV;
+#else
+	return -ENODEV;
+#endif
+}
+EXPORT_SYMBOL(recalibrate_cpu_khz);
+
+
+void tsc_init(void)
+{
+	if(!cpu_has_tsc || tsc_disable)
+		return;
+
+	cpu_khz = calculate_cpu_khz();
+	tsc_khz = cpu_khz;
+
+	if (!cpu_khz)
+		return;
+
+	printk("Detected %lu.%03lu MHz processor.\n",
+				(unsigned long)cpu_khz / 1000,
+				(unsigned long)cpu_khz % 1000);
+
+	set_cyc2ns_scale(cpu_khz/1000);
+}
+
+
+#ifdef CONFIG_CPU_FREQ
+#include <linux/workqueue.h>
+
+static unsigned int cpufreq_delayed_issched = 0;
+static unsigned int cpufreq_init = 0;
+static struct work_struct cpufreq_delayed_get_work;
+
+static void handle_cpufreq_delayed_get(void *v)
+{
+	unsigned int cpu;
+	for_each_online_cpu(cpu) {
+		cpufreq_get(cpu);
+	}
+	cpufreq_delayed_issched = 0;
+}
+
+/* if we notice cpufreq oddness, schedule a call to cpufreq_get() as it tries
+ * to verify the CPU frequency the timing core thinks the CPU is running
+ * at is still correct.
+ */
+static inline void cpufreq_delayed_get(void)
+{
+	if (cpufreq_init && !cpufreq_delayed_issched) {
+		cpufreq_delayed_issched = 1;
+		printk(KERN_DEBUG "Checking if CPU frequency changed.\n");
+		schedule_work(&cpufreq_delayed_get_work);
+	}
+}
+
+/* If the CPU frequency is scaled, TSC-based delays will need a different
+ * loops_per_jiffy value to function properly.
+ */
+
+static unsigned int  ref_freq = 0;
+static unsigned long loops_per_jiffy_ref = 0;
+
+#ifndef CONFIG_SMP
+static unsigned long cpu_khz_ref = 0;
+#endif
+
+static int time_cpufreq_notifier(struct notifier_block *nb,
+		unsigned long val, void *data)
+{
+	struct cpufreq_freqs *freq = data;
+
+	if (val != CPUFREQ_RESUMECHANGE)
+		write_seqlock_irq(&xtime_lock);
+	if (!ref_freq) {
+		ref_freq = freq->old;
+		loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
+#ifndef CONFIG_SMP
+		cpu_khz_ref = cpu_khz;
+#endif
+	}
+
+	if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
+	    (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
+	    (val == CPUFREQ_RESUMECHANGE)) {
+		if (!(freq->flags & CPUFREQ_CONST_LOOPS))
+			cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
+
+		if (cpu_khz) {
+#ifndef CONFIG_SMP
+			cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
+#endif
+			if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
+				tsc_khz = cpu_khz;
+				set_cyc2ns_scale(cpu_khz/1000);
+				/* TSC based sched_clock turns
+				 * to junk w/ cpufreq
+				 */
+				mark_tsc_unstable();
+			}
+		}
+	}
+
+	if (val != CPUFREQ_RESUMECHANGE)
+		write_sequnlock_irq(&xtime_lock);
+
+	return 0;
+}
+
+static struct notifier_block time_cpufreq_notifier_block = {
+	.notifier_call	= time_cpufreq_notifier
+};
+
+
+static int __init cpufreq_tsc(void)
+{
+	int ret;
+	INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
+	ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
+					CPUFREQ_TRANSITION_NOTIFIER);
+	if (!ret)
+		cpufreq_init = 1;
+
+	return ret;
+}
+core_initcall(cpufreq_tsc);
+
+#endif
+
+/* Clock source code */
+#include <linux/clocksource.h>
+
+static unsigned long current_tsc_khz = 0;
+static cycle_t read_tsc_c3(void);
+static int tsc_update_callback(void);
+
+static struct clocksource clocksource_tsc = {
+	.name = "tsc",
+	.rating = 300,
+	.type = CLOCKSOURCE_CYCLES,
+	.mask = (cycle_t)-1,
+	.mult = 0, /* to be set */
+	.shift = 22,
+	.update_callback = tsc_update_callback,
+	.is_continuous = 1,
+};
+
+static cycle_t read_tsc_c3(void)
+{
+	cycle_t ret;
+	rdtscll(ret);
+	return ret + tsc_read_c3_time();
+}
+
+static int tsc_update_callback(void)
+{
+	int change = 0;
+	/* check to see if we should switch to the safe clocksource */
+	if (tsc_read_c3_time() &&
+		strncmp(clocksource_tsc.name, "c3tsc", 5)) {
+		printk("Falling back to C3 safe TSC\n");
+		clocksource_tsc.read_fnct = read_tsc_c3;
+		clocksource_tsc.type = CLOCKSOURCE_FUNCTION;
+		clocksource_tsc.name = "c3tsc";
+		change = 1;
+	}
+
+	if (clocksource_tsc.rating != 50 && check_tsc_unstable()) {
+		clocksource_tsc.rating = 50;
+		reselect_clocksource();
+		change = 1;
+	}
+	/* only update if tsc_khz has changed */
+	if (current_tsc_khz != tsc_khz){
+		current_tsc_khz = tsc_khz;
+		clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
+							clocksource_tsc.shift);
+		change = 1;
+	}
+	return change;
+}
+
+static int __init init_tsc_clocksource(void)
+{
+
+	/* TSC initialization is done in arch/i386/kernel/tsc.c */
+	if (cpu_has_tsc && tsc_khz) {
+		current_tsc_khz = tsc_khz;
+		clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
+							clocksource_tsc.shift);
+		register_clocksource(&clocksource_tsc);
+	}
+	return 0;
+}
+
+module_init(init_tsc_clocksource);
+
Index: linux-2.6.14/include/asm-i386/timex.h
===================================================================
--- linux-2.6.14.orig/include/asm-i386/timex.h
+++ linux-2.6.14/include/asm-i386/timex.h
@@ -8,6 +8,7 @@
 
 #include <linux/config.h>
 #include <asm/processor.h>
+#include <asm/tsc.h>
 
 #ifdef CONFIG_X86_ELAN
 #  define CLOCK_TICK_RATE 1189200 /* AMD Elan has different frequency! */
@@ -16,39 +17,6 @@
 #endif
 
 
-/*
- * Standard way to access the cycle counter on i586+ CPUs.
- * Currently only used on SMP.
- *
- * If you really have a SMP machine with i486 chips or older,
- * compile for that, and this will just always return zero.
- * That's ok, it just means that the nicer scheduling heuristics
- * won't work for you.
- *
- * We only use the low 32 bits, and we'd simply better make sure
- * that we reschedule before that wraps. Scheduling at least every
- * four billion cycles just basically sounds like a good idea,
- * regardless of how fast the machine is. 
- */
-typedef unsigned long long cycles_t;
-
-static inline cycles_t get_cycles (void)
-{
-	unsigned long long ret=0;
-
-#ifndef CONFIG_X86_TSC
-	if (!cpu_has_tsc)
-		return 0;
-#endif
-
-#if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
-	rdtscll(ret);
-#endif
-	return ret;
-}
-
-extern unsigned int cpu_khz;
-
 extern int read_current_timer(unsigned long *timer_value);
 #define ARCH_HAS_READ_CURRENT_TIMER	1
 
Index: linux-2.6.14/include/asm-i386/tsc.h
===================================================================
--- /dev/null
+++ linux-2.6.14/include/asm-i386/tsc.h
@@ -0,0 +1,48 @@
+/*
+ * linux/include/asm-i386/tsc.h
+ *
+ * i386 TSC related functions
+ */
+#ifndef _ASM_i386_TSC_H
+#define _ASM_i386_TSC_H
+
+#include <linux/config.h>
+#include <asm/processor.h>
+
+/*
+ * Standard way to access the cycle counter on i586+ CPUs.
+ * Currently only used on SMP.
+ *
+ * If you really have a SMP machine with i486 chips or older,
+ * compile for that, and this will just always return zero.
+ * That's ok, it just means that the nicer scheduling heuristics
+ * won't work for you.
+ *
+ * We only use the low 32 bits, and we'd simply better make sure
+ * that we reschedule before that wraps. Scheduling at least every
+ * four billion cycles just basically sounds like a good idea,
+ * regardless of how fast the machine is.
+ */
+typedef unsigned long long cycles_t;
+
+static inline cycles_t get_cycles (void)
+{
+	unsigned long long ret=0;
+
+#ifndef CONFIG_X86_TSC
+	if (!cpu_has_tsc)
+		return 0;
+#endif
+
+#if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
+	rdtscll(ret);
+#endif
+	return ret;
+}
+
+extern unsigned int cpu_khz;
+extern unsigned int tsc_khz;
+extern void tsc_init(void);
+void tsc_c3_compensate(unsigned long usecs);
+extern void mark_tsc_unstable(void);
+#endif
Index: linux-2.6.14/arch/i386/kernel/setup.c
===================================================================
--- linux-2.6.14.orig/arch/i386/kernel/setup.c
+++ linux-2.6.14/arch/i386/kernel/setup.c
@@ -1612,6 +1612,7 @@ void __init setup_arch(char **cmdline_p)
 	conswitchp = &dummy_con;
 #endif
 #endif
+	tsc_init();
 }
 
 #include "setup_arch_post.h"
Index: linux-2.6.14/drivers/acpi/processor_idle.c
===================================================================
--- linux-2.6.14.orig/drivers/acpi/processor_idle.c
+++ linux-2.6.14/drivers/acpi/processor_idle.c
@@ -166,6 +166,7 @@ acpi_processor_power_activate(struct acp
 	return;
 }
 
+extern void tsc_c3_compensate(unsigned long nsecs);
 static atomic_t c3_cpu_count;
 
 static void acpi_processor_idle(void)
@@ -334,6 +335,11 @@ static void acpi_processor_idle(void)
 					  ACPI_MTX_DO_NOT_LOCK);
 		}
 
+#ifdef CONFIG_GENERIC_TIME
+		/* compensate for TSC pause */
+		tsc_c3_compensate((u32)(((u64)((t2-t1)&0xFFFFFF)*286070)>>10));
+#endif
+
 		/* Re-enable interrupts */
 		local_irq_enable();
 		/* Compute time (ticks) that we were actually asleep */
Index: linux-2.6.14/include/asm-i386/mach-default/mach_timer.h
===================================================================
--- linux-2.6.14.orig/include/asm-i386/mach-default/mach_timer.h
+++ linux-2.6.14/include/asm-i386/mach-default/mach_timer.h
@@ -15,7 +15,9 @@
 #ifndef _MACH_TIMER_H
 #define _MACH_TIMER_H
 
-#define CALIBRATE_LATCH	(5 * LATCH)
+#define CALIBRATE_TIME_MSEC 30 /* 30 msecs */
+#define CALIBRATE_LATCH	\
+	((CLOCK_TICK_RATE * CALIBRATE_TIME_MSEC + 1000/2)/1000)
 
 static inline void mach_prepare_counter(void)
 {
Index: linux-2.6.14/include/asm-i386/mach-summit/mach_mpparse.h
===================================================================
--- linux-2.6.14.orig/include/asm-i386/mach-summit/mach_mpparse.h
+++ linux-2.6.14/include/asm-i386/mach-summit/mach_mpparse.h
@@ -30,6 +30,7 @@ static inline int mps_oem_check(struct m
 			(!strncmp(productid, "VIGIL SMP", 9) 
 			 || !strncmp(productid, "EXA", 3)
 			 || !strncmp(productid, "RUTHLESS SMP", 12))){
+		mark_tsc_unstable();
 		use_cyclone = 1; /*enable cyclone-timer*/
 		setup_summit();
 		usb_early_handoff = 1;
@@ -44,6 +45,7 @@ static inline int acpi_madt_oem_check(ch
 	if (!strncmp(oem_id, "IBM", 3) &&
 	    (!strncmp(oem_table_id, "SERVIGIL", 8)
 	     || !strncmp(oem_table_id, "EXA", 3))){
+		mark_tsc_unstable();
 		use_cyclone = 1; /*enable cyclone-timer*/
 		setup_summit();
 		usb_early_handoff = 1;
Index: linux-2.6.14/arch/i386/kernel/acpi/boot.c
===================================================================
--- linux-2.6.14.orig/arch/i386/kernel/acpi/boot.c
+++ linux-2.6.14/arch/i386/kernel/acpi/boot.c
@@ -570,7 +570,7 @@ static int __init acpi_parse_sbf(unsigne
 }
 
 #ifdef CONFIG_HPET_TIMER
-
+#include <asm/hpet.h>
 static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
 {
 	struct acpi_table_hpet *hpet_tbl;
@@ -592,6 +592,7 @@ static int __init acpi_parse_hpet(unsign
 #ifdef	CONFIG_X86_64
 	vxtime.hpet_address = hpet_tbl->addr.addrl |
 	    ((long)hpet_tbl->addr.addrh << 32);
+	hpet_address = vxtime.hpet_address;
 
 	printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
 	       hpet_tbl->id, vxtime.hpet_address);
@@ -600,10 +601,10 @@ static int __init acpi_parse_hpet(unsign
 		extern unsigned long hpet_address;
 
 		hpet_address = hpet_tbl->addr.addrl;
-		printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
-		       hpet_tbl->id, hpet_address);
 	}
-#endif				/* X86 */
+#endif	/* X86 */
+		printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
+			hpet_tbl->id, hpet_address);
 
 	return 0;
 }
@@ -612,7 +613,8 @@ static int __init acpi_parse_hpet(unsign
 #endif
 
 #ifdef CONFIG_X86_PM_TIMER
-extern u32 pmtmr_ioport;
+u32 acpi_pmtmr_ioport;
+int acpi_pmtmr_buggy;
 #endif
 
 static int __init acpi_parse_fadt(unsigned long phys, unsigned long size)
@@ -640,14 +642,15 @@ static int __init acpi_parse_fadt(unsign
 		    ACPI_ADR_SPACE_SYSTEM_IO)
 			return 0;
 
-		pmtmr_ioport = fadt->xpm_tmr_blk.address;
+		acpi_pmtmr_ioport = fadt->xpm_tmr_blk.address;
 	} else {
 		/* FADT rev. 1 */
-		pmtmr_ioport = fadt->V1_pm_tmr_blk;
+		acpi_pmtmr_ioport = fadt->V1_pm_tmr_blk;
 	}
-	if (pmtmr_ioport)
-		printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n",
-		       pmtmr_ioport);
+
+	if (acpi_pmtmr_ioport)
+		printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", acpi_pmtmr_ioport);
+
 #endif
 	return 0;
 }
Index: linux-2.6.14/arch/i386/Kconfig
===================================================================
--- linux-2.6.14.orig/arch/i386/Kconfig
+++ linux-2.6.14/arch/i386/Kconfig
@@ -14,6 +14,10 @@ config X86
 	  486, 586, Pentiums, and various instruction-set-compatible chips by
 	  AMD, Cyrix, and others.
 
+config GENERIC_TIME
+	bool
+	default y
+
 config SEMAPHORE_SLEEPERS
 	bool
 	default y
@@ -466,6 +470,8 @@ config HPET_EMULATE_RTC
 	depends on HPET_TIMER && RTC=y
 	default y
 
+source "kernel/time/Kconfig"
+
 config SMP
 	bool "Symmetric multi-processing support"
 	---help---
Index: linux-2.6.14/arch/i386/lib/delay.c
===================================================================
--- linux-2.6.14.orig/arch/i386/lib/delay.c
+++ linux-2.6.14/arch/i386/lib/delay.c
@@ -13,6 +13,7 @@
 #include <linux/config.h>
 #include <linux/sched.h>
 #include <linux/delay.h>
+#include <linux/timeofday.h>
 #include <linux/module.h>
 #include <asm/processor.h>
 #include <asm/delay.h>
@@ -22,11 +23,20 @@
 #include <asm/smp.h>
 #endif
 
-extern struct timer_opts* timer;
-
+/* XXX - For now just use a simple loop delay
+ *  This has cpufreq issues, but so did the old method.
+ */
 void __delay(unsigned long loops)
 {
-	cur_timer->delay(loops);
+	int d0;
+	__asm__ __volatile__(
+		"\tjmp 1f\n"
+		".align 16\n"
+		"1:\tjmp 2f\n"
+		".align 16\n"
+		"2:\tdecl %0\n\tjns 2b"
+		:"=&a" (d0)
+		:"0" (loops));
 }
 
 inline void __const_udelay(unsigned long xloops)
Index: linux-2.6.14/include/asm-i386/timeofday.h
===================================================================
--- /dev/null
+++ linux-2.6.14/include/asm-i386/timeofday.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_I386_TIMEOFDAY_H
+#define _ASM_I386_TIMEOFDAY_H
+#include <asm-generic/timeofday.h>
+#endif
Index: linux-2.6.14/include/asm-i386/timer.h
===================================================================
--- linux-2.6.14.orig/include/asm-i386/timer.h
+++ linux-2.6.14/include/asm-i386/timer.h
@@ -3,68 +3,10 @@
 #include <linux/init.h>
 #include <linux/pm.h>
 
-/**
- * struct timer_ops - used to define a timer source
- *
- * @name: name of the timer.
- * @init: Probes and initializes the timer. Takes clock= override 
- *        string as an argument. Returns 0 on success, anything else
- *        on failure.
- * @mark_offset: called by the timer interrupt.
- * @get_offset:  called by gettimeofday(). Returns the number of microseconds
- *               since the last timer interupt.
- * @monotonic_clock: returns the number of nanoseconds since the init of the
- *                   timer.
- * @delay: delays this many clock cycles.
- */
-struct timer_opts {
-	char* name;
-	void (*mark_offset)(void);
-	unsigned long (*get_offset)(void);
-	unsigned long long (*monotonic_clock)(void);
-	void (*delay)(unsigned long);
-	unsigned long (*read_timer)(void);
-	int (*suspend)(pm_message_t state);
-	int (*resume)(void);
-};
-
-struct init_timer_opts {
-	int (*init)(char *override);
-	struct timer_opts *opts;
-};
-
 #define TICK_SIZE (tick_nsec / 1000)
-
-extern struct timer_opts* __init select_timer(void);
-extern void clock_fallback(void);
 void setup_pit_timer(void);
-
 /* Modifiers for buggy PIT handling */
-
 extern int pit_latch_buggy;
-
-extern struct timer_opts *cur_timer;
-extern int timer_ack;
-
-/* list of externed timers */
-extern struct timer_opts timer_none;
-extern struct timer_opts timer_pit;
-extern struct init_timer_opts timer_pit_init;
-extern struct init_timer_opts timer_tsc_init;
-#ifdef CONFIG_X86_CYCLONE_TIMER
-extern struct init_timer_opts timer_cyclone_init;
-#endif
-
-extern unsigned long calibrate_tsc(void);
-extern unsigned long read_timer_tsc(void);
-extern void init_cpu_khz(void);
 extern int recalibrate_cpu_khz(void);
-#ifdef CONFIG_HPET_TIMER
-extern struct init_timer_opts timer_hpet_init;
-extern unsigned long calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr);
-#endif
 
-#ifdef CONFIG_X86_PM_TIMER
-extern struct init_timer_opts timer_pmtmr_init;
-#endif
 #endif
Index: linux-2.6.14/arch/i386/kernel/timers/Makefile
===================================================================
--- linux-2.6.14.orig/arch/i386/kernel/timers/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-#
-# Makefile for x86 timers
-#
-
-obj-y := timer.o timer_none.o timer_tsc.o timer_pit.o common.o
-
-obj-$(CONFIG_X86_CYCLONE_TIMER)	+= timer_cyclone.o
-obj-$(CONFIG_HPET_TIMER)	+= timer_hpet.o
-obj-$(CONFIG_X86_PM_TIMER)	+= timer_pm.o
Index: linux-2.6.14/arch/i386/kernel/timers/timer.c
===================================================================
--- linux-2.6.14.orig/arch/i386/kernel/timers/timer.c
+++ /dev/null
@@ -1,75 +0,0 @@
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <asm/timer.h>
-
-#ifdef CONFIG_HPET_TIMER
-/*
- * HPET memory read is slower than tsc reads, but is more dependable as it
- * always runs at constant frequency and reduces complexity due to
- * cpufreq. So, we prefer HPET timer to tsc based one. Also, we cannot use
- * timer_pit when HPET is active. So, we default to timer_tsc.
- */
-#endif
-/* list of timers, ordered by preference, NULL terminated */
-static struct init_timer_opts* __initdata timers[] = {
-#ifdef CONFIG_X86_CYCLONE_TIMER
-	&timer_cyclone_init,
-#endif
-#ifdef CONFIG_HPET_TIMER
-	&timer_hpet_init,
-#endif
-#ifdef CONFIG_X86_PM_TIMER
-	&timer_pmtmr_init,
-#endif
-	&timer_tsc_init,
-	&timer_pit_init,
-	NULL,
-};
-
-static char clock_override[10] __initdata;
-
-static int __init clock_setup(char* str)
-{
-	if (str)
-		strlcpy(clock_override, str, sizeof(clock_override));
-	return 1;
-}
-__setup("clock=", clock_setup);
-
-
-/* The chosen timesource has been found to be bad.
- * Fall back to a known good timesource (the PIT)
- */
-void clock_fallback(void)
-{
-	cur_timer = &timer_pit;
-}
-
-/* iterates through the list of timers, returning the first 
- * one that initializes successfully.
- */
-struct timer_opts* __init select_timer(void)
-{
-	int i = 0;
-	
-	/* find most preferred working timer */
-	while (timers[i]) {
-		if (timers[i]->init)
-			if (timers[i]->init(clock_override) == 0)
-				return timers[i]->opts;
-		++i;
-	}
-		
-	panic("select_timer: Cannot find a suitable timer\n");
-	return NULL;
-}
-
-int read_current_timer(unsigned long *timer_val)
-{
-	if (cur_timer->read_timer) {
-		*timer_val = cur_timer->read_timer();
-		return 0;
-	}
-	return -1;
-}
Index: linux-2.6.14/arch/i386/kernel/timers/timer_cyclone.c
===================================================================
--- linux-2.6.14.orig/arch/i386/kernel/timers/timer_cyclone.c
+++ /dev/null
@@ -1,259 +0,0 @@
-/*	Cyclone-timer: 
- *		This code implements timer_ops for the cyclone counter found
- *		on IBM x440, x360, and other Summit based systems.
- *
- *	Copyright (C) 2002 IBM, John Stultz (johnstul@us.ibm.com)
- */
-
-
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/jiffies.h>
-
-#include <asm/timer.h>
-#include <asm/io.h>
-#include <asm/pgtable.h>
-#include <asm/fixmap.h>
-#include <asm/i8253.h>
-
-#include "io_ports.h"
-
-/* Number of usecs that the last interrupt was delayed */
-static int delay_at_last_interrupt;
-
-#define CYCLONE_CBAR_ADDR 0xFEB00CD0
-#define CYCLONE_PMCC_OFFSET 0x51A0
-#define CYCLONE_MPMC_OFFSET 0x51D0
-#define CYCLONE_MPCS_OFFSET 0x51A8
-#define CYCLONE_TIMER_FREQ 100000000
-#define CYCLONE_TIMER_MASK (((u64)1<<40)-1) /* 40 bit mask */
-int use_cyclone = 0;
-
-static u32* volatile cyclone_timer;	/* Cyclone MPMC0 register */
-static u32 last_cyclone_low;
-static u32 last_cyclone_high;
-static unsigned long long monotonic_base;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-/* helper macro to atomically read both cyclone counter registers */
-#define read_cyclone_counter(low,high) \
-	do{ \
-		high = cyclone_timer[1]; low = cyclone_timer[0]; \
-	} while (high != cyclone_timer[1]);
-
-
-static void mark_offset_cyclone(void)
-{
-	unsigned long lost, delay;
-	unsigned long delta = last_cyclone_low;
-	int count;
-	unsigned long long this_offset, last_offset;
-
-	write_seqlock(&monotonic_lock);
-	last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
-	
-	spin_lock(&i8253_lock);
-	read_cyclone_counter(last_cyclone_low,last_cyclone_high);
-
-	/* read values for delay_at_last_interrupt */
-	outb_p(0x00, 0x43);     /* latch the count ASAP */
-
-	count = inb_p(0x40);    /* read the latched count */
-	count |= inb(0x40) << 8;
-
-	/*
-	 * VIA686a test code... reset the latch if count > max + 1
-	 * from timer_pit.c - cjb
-	 */
-	if (count > LATCH) {
-		outb_p(0x34, PIT_MODE);
-		outb_p(LATCH & 0xff, PIT_CH0);
-		outb(LATCH >> 8, PIT_CH0);
-		count = LATCH - 1;
-	}
-	spin_unlock(&i8253_lock);
-
-	/* lost tick compensation */
-	delta = last_cyclone_low - delta;	
-	delta /= (CYCLONE_TIMER_FREQ/1000000);
-	delta += delay_at_last_interrupt;
-	lost = delta/(1000000/HZ);
-	delay = delta%(1000000/HZ);
-	if (lost >= 2)
-		jiffies_64 += lost-1;
-	
-	/* update the monotonic base value */
-	this_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
-	monotonic_base += (this_offset - last_offset) & CYCLONE_TIMER_MASK;
-	write_sequnlock(&monotonic_lock);
-
-	/* calculate delay_at_last_interrupt */
-	count = ((LATCH-1) - count) * TICK_SIZE;
-	delay_at_last_interrupt = (count + LATCH/2) / LATCH;
-
-
-	/* catch corner case where tick rollover occured 
-	 * between cyclone and pit reads (as noted when 
-	 * usec delta is > 90% # of usecs/tick)
-	 */
-	if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
-		jiffies_64++;
-}
-
-static unsigned long get_offset_cyclone(void)
-{
-	u32 offset;
-
-	if(!cyclone_timer)
-		return delay_at_last_interrupt;
-
-	/* Read the cyclone timer */
-	offset = cyclone_timer[0];
-
-	/* .. relative to previous jiffy */
-	offset = offset - last_cyclone_low;
-
-	/* convert cyclone ticks to microseconds */	
-	/* XXX slow, can we speed this up? */
-	offset = offset/(CYCLONE_TIMER_FREQ/1000000);
-
-	/* our adjusted time offset in microseconds */
-	return delay_at_last_interrupt + offset;
-}
-
-static unsigned long long monotonic_clock_cyclone(void)
-{
-	u32 now_low, now_high;
-	unsigned long long last_offset, this_offset, base;
-	unsigned long long ret;
-	unsigned seq;
-
-	/* atomically read monotonic base & last_offset */
-	do {
-		seq = read_seqbegin(&monotonic_lock);
-		last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
-		base = monotonic_base;
-	} while (read_seqretry(&monotonic_lock, seq));
-
-
-	/* Read the cyclone counter */
-	read_cyclone_counter(now_low,now_high);
-	this_offset = ((unsigned long long)now_high<<32)|now_low;
-
-	/* convert to nanoseconds */
-	ret = base + ((this_offset - last_offset)&CYCLONE_TIMER_MASK);
-	return ret * (1000000000 / CYCLONE_TIMER_FREQ);
-}
-
-static int __init init_cyclone(char* override)
-{
-	u32* reg;	
-	u32 base;		/* saved cyclone base address */
-	u32 pageaddr;	/* page that contains cyclone_timer register */
-	u32 offset;		/* offset from pageaddr to cyclone_timer register */
-	int i;
-	
-	/* check clock override */
-	if (override[0] && strncmp(override,"cyclone",7))
-			return -ENODEV;
-
-	/*make sure we're on a summit box*/
-	if(!use_cyclone) return -ENODEV; 
-	
-	printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n");
-
-	/* find base address */
-	pageaddr = (CYCLONE_CBAR_ADDR)&PAGE_MASK;
-	offset = (CYCLONE_CBAR_ADDR)&(~PAGE_MASK);
-	set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
-	reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
-	if(!reg){
-		printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n");
-		return -ENODEV;
-	}
-	base = *reg;	
-	if(!base){
-		printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n");
-		return -ENODEV;
-	}
-	
-	/* setup PMCC */
-	pageaddr = (base + CYCLONE_PMCC_OFFSET)&PAGE_MASK;
-	offset = (base + CYCLONE_PMCC_OFFSET)&(~PAGE_MASK);
-	set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
-	reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
-	if(!reg){
-		printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n");
-		return -ENODEV;
-	}
-	reg[0] = 0x00000001;
-
-	/* setup MPCS */
-	pageaddr = (base + CYCLONE_MPCS_OFFSET)&PAGE_MASK;
-	offset = (base + CYCLONE_MPCS_OFFSET)&(~PAGE_MASK);
-	set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
-	reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
-	if(!reg){
-		printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n");
-		return -ENODEV;
-	}
-	reg[0] = 0x00000001;
-
-	/* map in cyclone_timer */
-	pageaddr = (base + CYCLONE_MPMC_OFFSET)&PAGE_MASK;
-	offset = (base + CYCLONE_MPMC_OFFSET)&(~PAGE_MASK);
-	set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
-	cyclone_timer = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
-	if(!cyclone_timer){
-		printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n");
-		return -ENODEV;
-	}
-
-	/*quick test to make sure its ticking*/
-	for(i=0; i<3; i++){
-		u32 old = cyclone_timer[0];
-		int stall = 100;
-		while(stall--) barrier();
-		if(cyclone_timer[0] == old){
-			printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n");
-			cyclone_timer = 0;
-			return -ENODEV;
-		}
-	}
-
-	init_cpu_khz();
-
-	/* Everything looks good! */
-	return 0;
-}
-
-
-static void delay_cyclone(unsigned long loops)
-{
-	unsigned long bclock, now;
-	if(!cyclone_timer)
-		return;
-	bclock = cyclone_timer[0];
-	do {
-		rep_nop();
-		now = cyclone_timer[0];
-	} while ((now-bclock) < loops);
-}
-/************************************************************/
-
-/* cyclone timer_opts struct */
-static struct timer_opts timer_cyclone = {
-	.name = "cyclone",
-	.mark_offset = mark_offset_cyclone, 
-	.get_offset = get_offset_cyclone,
-	.monotonic_clock =	monotonic_clock_cyclone,
-	.delay = delay_cyclone,
-};
-
-struct init_timer_opts __initdata timer_cyclone_init = {
-	.init = init_cyclone,
-	.opts = &timer_cyclone,
-};
Index: linux-2.6.14/arch/i386/kernel/timers/timer_hpet.c
===================================================================
--- linux-2.6.14.orig/arch/i386/kernel/timers/timer_hpet.c
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * This code largely moved from arch/i386/kernel/time.c.
- * See comments there for proper credits.
- */
-
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/jiffies.h>
-
-#include <asm/timer.h>
-#include <asm/io.h>
-#include <asm/processor.h>
-
-#include "io_ports.h"
-#include "mach_timer.h"
-#include <asm/hpet.h>
-
-static unsigned long hpet_usec_quotient __read_mostly;	/* convert hpet clks to usec */
-static unsigned long tsc_hpet_quotient __read_mostly;	/* convert tsc to hpet clks */
-static unsigned long hpet_last; 	/* hpet counter value at last tick*/
-static unsigned long last_tsc_low;	/* lsb 32 bits of Time Stamp Counter */
-static unsigned long last_tsc_high; 	/* msb 32 bits of Time Stamp Counter */
-static unsigned long long monotonic_base;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-/* convert from cycles(64bits) => nanoseconds (64bits)
- *  basic equation:
- *		ns = cycles / (freq / ns_per_sec)
- *		ns = cycles * (ns_per_sec / freq)
- *		ns = cycles * (10^9 / (cpu_mhz * 10^6))
- *		ns = cycles * (10^3 / cpu_mhz)
- *
- *	Then we use scaling math (suggested by george@mvista.com) to get:
- *		ns = cycles * (10^3 * SC / cpu_mhz) / SC
- *		ns = cycles * cyc2ns_scale / SC
- *
- *	And since SC is a constant power of two, we can convert the div
- *  into a shift.
- *			-johnstul@us.ibm.com "math is hard, lets go shopping!"
- */
-static unsigned long cyc2ns_scale;
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-
-static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
-{
-	cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
-}
-
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
-	return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
-}
-
-static unsigned long long monotonic_clock_hpet(void)
-{
-	unsigned long long last_offset, this_offset, base;
-	unsigned seq;
-
-	/* atomically read monotonic base & last_offset */
-	do {
-		seq = read_seqbegin(&monotonic_lock);
-		last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
-		base = monotonic_base;
-	} while (read_seqretry(&monotonic_lock, seq));
-
-	/* Read the Time Stamp Counter */
-	rdtscll(this_offset);
-
-	/* return the value in ns */
-	return base + cycles_2_ns(this_offset - last_offset);
-}
-
-static unsigned long get_offset_hpet(void)
-{
-	register unsigned long eax, edx;
-
-	eax = hpet_readl(HPET_COUNTER);
-	eax -= hpet_last;	/* hpet delta */
-	eax = min(hpet_tick, eax);
-	/*
-         * Time offset = (hpet delta) * ( usecs per HPET clock )
-	 *             = (hpet delta) * ( usecs per tick / HPET clocks per tick)
-	 *             = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
-	 *
-	 * Where,
-	 * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
-	 *
-	 * Using a mull instead of a divl saves some cycles in critical path.
-         */
-	ASM_MUL64_REG(eax, edx, hpet_usec_quotient, eax);
-
-	/* our adjusted time offset in microseconds */
-	return edx;
-}
-
-static void mark_offset_hpet(void)
-{
-	unsigned long long this_offset, last_offset;
-	unsigned long offset;
-
-	write_seqlock(&monotonic_lock);
-	last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
-	rdtsc(last_tsc_low, last_tsc_high);
-
-	if (hpet_use_timer)
-		offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
-	else
-		offset = hpet_readl(HPET_COUNTER);
-	if (unlikely(((offset - hpet_last) >= (2*hpet_tick)) && (hpet_last != 0))) {
-		int lost_ticks = ((offset - hpet_last) / hpet_tick) - 1;
-		jiffies_64 += lost_ticks;
-	}
-	hpet_last = offset;
-
-	/* update the monotonic base value */
-	this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
-	monotonic_base += cycles_2_ns(this_offset - last_offset);
-	write_sequnlock(&monotonic_lock);
-}
-
-static void delay_hpet(unsigned long loops)
-{
-	unsigned long hpet_start, hpet_end;
-	unsigned long eax;
-
-	/* loops is the number of cpu cycles. Convert it to hpet clocks */
-	ASM_MUL64_REG(eax, loops, tsc_hpet_quotient, loops);
-
-	hpet_start = hpet_readl(HPET_COUNTER);
-	do {
-		rep_nop();
-		hpet_end = hpet_readl(HPET_COUNTER);
-	} while ((hpet_end - hpet_start) < (loops));
-}
-
-static struct timer_opts timer_hpet;
-
-static int __init init_hpet(char* override)
-{
-	unsigned long result, remain;
-
-	/* check clock override */
-	if (override[0] && strncmp(override,"hpet",4))
-		return -ENODEV;
-
-	if (!is_hpet_enabled())
-		return -ENODEV;
-
-	printk("Using HPET for gettimeofday\n");
-	if (cpu_has_tsc) {
-		unsigned long tsc_quotient = calibrate_tsc_hpet(&tsc_hpet_quotient);
-		if (tsc_quotient) {
-			/* report CPU clock rate in Hz.
-			 * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
-			 * clock/second. Our precision is about 100 ppm.
-			 */
-			{	unsigned long eax=0, edx=1000;
-				ASM_DIV64_REG(cpu_khz, edx, tsc_quotient,
-						eax, edx);
-				printk("Detected %u.%03u MHz processor.\n",
-					cpu_khz / 1000, cpu_khz % 1000);
-			}
-			set_cyc2ns_scale(cpu_khz/1000);
-		}
-		/* set this only when cpu_has_tsc */
-		timer_hpet.read_timer = read_timer_tsc;
-	}
-
-	/*
-	 * Math to calculate hpet to usec multiplier
-	 * Look for the comments at get_offset_hpet()
-	 */
-	ASM_DIV64_REG(result, remain, hpet_tick, 0, KERNEL_TICK_USEC);
-	if (remain > (hpet_tick >> 1))
-		result++; /* rounding the result */
-	hpet_usec_quotient = result;
-
-	return 0;
-}
-
-static int hpet_resume(void)
-{
-	write_seqlock(&monotonic_lock);
-	/* Assume this is the last mark offset time */
-	rdtsc(last_tsc_low, last_tsc_high);
-
-	if (hpet_use_timer)
-		hpet_last = hpet_readl(HPET_T0_CMP) - hpet_tick;
-	else
-		hpet_last = hpet_readl(HPET_COUNTER);
-	write_sequnlock(&monotonic_lock);
-	return 0;
-}
-/************************************************************/
-
-/* tsc timer_opts struct */
-static struct timer_opts timer_hpet __read_mostly = {
-	.name = 		"hpet",
-	.mark_offset =		mark_offset_hpet,
-	.get_offset =		get_offset_hpet,
-	.monotonic_clock =	monotonic_clock_hpet,
-	.delay = 		delay_hpet,
-	.resume	=		hpet_resume,
-};
-
-struct init_timer_opts __initdata timer_hpet_init = {
-	.init =	init_hpet,
-	.opts = &timer_hpet,
-};
Index: linux-2.6.14/arch/i386/kernel/timers/timer_none.c
===================================================================
--- linux-2.6.14.orig/arch/i386/kernel/timers/timer_none.c
+++ /dev/null
@@ -1,39 +0,0 @@
-#include <linux/init.h>
-#include <asm/timer.h>
-
-static void mark_offset_none(void)
-{
-	/* nothing needed */
-}
-
-static unsigned long get_offset_none(void)
-{
-	return 0;
-}
-
-static unsigned long long monotonic_clock_none(void)
-{
-	return 0;
-}
-
-static void delay_none(unsigned long loops)
-{
-	int d0;
-	__asm__ __volatile__(
-		"\tjmp 1f\n"
-		".align 16\n"
-		"1:\tjmp 2f\n"
-		".align 16\n"
-		"2:\tdecl %0\n\tjns 2b"
-		:"=&a" (d0)
-		:"0" (loops));
-}
-
-/* none timer_opts struct */
-struct timer_opts timer_none = {
-	.name = 	"none",
-	.mark_offset =	mark_offset_none, 
-	.get_offset =	get_offset_none,
-	.monotonic_clock =	monotonic_clock_none,
-	.delay = delay_none,
-};
Index: linux-2.6.14/arch/i386/kernel/timers/timer_pm.c
===================================================================
--- linux-2.6.14.orig/arch/i386/kernel/timers/timer_pm.c
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- * (C) Dominik Brodowski <linux@brodo.de> 2003
- *
- * Driver to use the Power Management Timer (PMTMR) available in some
- * southbridges as primary timing source for the Linux kernel.
- *
- * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c,
- * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4.
- *
- * This file is licensed under the GPL v2.
- */
-
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/init.h>
-#include <asm/types.h>
-#include <asm/timer.h>
-#include <asm/smp.h>
-#include <asm/io.h>
-#include <asm/arch_hooks.h>
-
-#include <linux/timex.h>
-#include "mach_timer.h"
-
-/* Number of PMTMR ticks expected during calibration run */
-#define PMTMR_TICKS_PER_SEC 3579545
-#define PMTMR_EXPECTED_RATE \
-  ((CALIBRATE_LATCH * (PMTMR_TICKS_PER_SEC >> 10)) / (CLOCK_TICK_RATE>>10))
-
-
-/* The I/O port the PMTMR resides at.
- * The location is detected during setup_arch(),
- * in arch/i386/acpi/boot.c */
-u32 pmtmr_ioport = 0;
-
-
-/* value of the Power timer at last timer interrupt */
-static u32 offset_tick;
-static u32 offset_delay;
-
-static unsigned long long monotonic_base;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
-
-/*helper function to safely read acpi pm timesource*/
-static inline u32 read_pmtmr(void)
-{
-	u32 v1=0,v2=0,v3=0;
-	/* It has been reported that because of various broken
-	 * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time
-	 * source is not latched, so you must read it multiple
-	 * times to insure a safe value is read.
-	 */
-	do {
-		v1 = inl(pmtmr_ioport);
-		v2 = inl(pmtmr_ioport);
-		v3 = inl(pmtmr_ioport);
-	} while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1)
-			|| (v3 > v1 && v3 < v2));
-
-	/* mask the output to 24 bits */
-	return v2 & ACPI_PM_MASK;
-}
-
-
-/*
- * Some boards have the PMTMR running way too fast. We check
- * the PMTMR rate against PIT channel 2 to catch these cases.
- */
-static int verify_pmtmr_rate(void)
-{
-	u32 value1, value2;
-	unsigned long count, delta;
-
-	mach_prepare_counter();
-	value1 = read_pmtmr();
-	mach_countup(&count);
-	value2 = read_pmtmr();
-	delta = (value2 - value1) & ACPI_PM_MASK;
-
-	/* Check that the PMTMR delta is within 5% of what we expect */
-	if (delta < (PMTMR_EXPECTED_RATE * 19) / 20 ||
-	    delta > (PMTMR_EXPECTED_RATE * 21) / 20) {
-		printk(KERN_INFO "PM-Timer running at invalid rate: %lu%% of normal - aborting.\n", 100UL * delta / PMTMR_EXPECTED_RATE);
-		return -1;
-	}
-
-	return 0;
-}
-
-
-static int init_pmtmr(char* override)
-{
-	u32 value1, value2;
-	unsigned int i;
-
- 	if (override[0] && strncmp(override,"pmtmr",5))
-		return -ENODEV;
-
-	if (!pmtmr_ioport)
-		return -ENODEV;
-
-	/* we use the TSC for delay_pmtmr, so make sure it exists */
-	if (!cpu_has_tsc)
-		return -ENODEV;
-
-	/* "verify" this timing source */
-	value1 = read_pmtmr();
-	for (i = 0; i < 10000; i++) {
-		value2 = read_pmtmr();
-		if (value2 == value1)
-			continue;
-		if (value2 > value1)
-			goto pm_good;
-		if ((value2 < value1) && ((value2) < 0xFFF))
-			goto pm_good;
-		printk(KERN_INFO "PM-Timer had inconsistent results: 0x%#x, 0x%#x - aborting.\n", value1, value2);
-		return -EINVAL;
-	}
-	printk(KERN_INFO "PM-Timer had no reasonable result: 0x%#x - aborting.\n", value1);
-	return -ENODEV;
-
-pm_good:
-	if (verify_pmtmr_rate() != 0)
-		return -ENODEV;
-
-	init_cpu_khz();
-	return 0;
-}
-
-static inline u32 cyc2us(u32 cycles)
-{
-	/* The Power Management Timer ticks at 3.579545 ticks per microsecond.
-	 * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%]
-	 *
-	 * Even with HZ = 100, delta is at maximum 35796 ticks, so it can
-	 * easily be multiplied with 286 (=0x11E) without having to fear
-	 * u32 overflows.
-	 */
-	cycles *= 286;
-	return (cycles >> 10);
-}
-
-/*
- * this gets called during each timer interrupt
- *   - Called while holding the writer xtime_lock
- */
-static void mark_offset_pmtmr(void)
-{
-	u32 lost, delta, last_offset;
-	static int first_run = 1;
-	last_offset = offset_tick;
-
-	write_seqlock(&monotonic_lock);
-
-	offset_tick = read_pmtmr();
-
-	/* calculate tick interval */
-	delta = (offset_tick - last_offset) & ACPI_PM_MASK;
-
-	/* convert to usecs */
-	delta = cyc2us(delta);
-
-	/* update the monotonic base value */
-	monotonic_base += delta * NSEC_PER_USEC;
-	write_sequnlock(&monotonic_lock);
-
-	/* convert to ticks */
-	delta += offset_delay;
-	lost = delta / (USEC_PER_SEC / HZ);
-	offset_delay = delta % (USEC_PER_SEC / HZ);
-
-
-	/* compensate for lost ticks */
-	if (lost >= 2)
-		jiffies_64 += lost - 1;
-
-	/* don't calculate delay for first run,
-	   or if we've got less then a tick */
-	if (first_run || (lost < 1)) {
-		first_run = 0;
-		offset_delay = 0;
-	}
-}
-
-static int pmtmr_resume(void)
-{
-	write_seqlock(&monotonic_lock);
-	/* Assume this is the last mark offset time */
-	offset_tick = read_pmtmr();
-	write_sequnlock(&monotonic_lock);
-	return 0;
-}
-
-static unsigned long long monotonic_clock_pmtmr(void)
-{
-	u32 last_offset, this_offset;
-	unsigned long long base, ret;
-	unsigned seq;
-
-
-	/* atomically read monotonic base & last_offset */
-	do {
-		seq = read_seqbegin(&monotonic_lock);
-		last_offset = offset_tick;
-		base = monotonic_base;
-	} while (read_seqretry(&monotonic_lock, seq));
-
-	/* Read the pmtmr */
-	this_offset =  read_pmtmr();
-
-	/* convert to nanoseconds */
-	ret = (this_offset - last_offset) & ACPI_PM_MASK;
-	ret = base + (cyc2us(ret) * NSEC_PER_USEC);
-	return ret;
-}
-
-static void delay_pmtmr(unsigned long loops)
-{
-	unsigned long bclock, now;
-
-	rdtscl(bclock);
-	do
-	{
-		rep_nop();
-		rdtscl(now);
-	} while ((now-bclock) < loops);
-}
-
-
-/*
- * get the offset (in microseconds) from the last call to mark_offset()
- *	- Called holding a reader xtime_lock
- */
-static unsigned long get_offset_pmtmr(void)
-{
-	u32 now, offset, delta = 0;
-
-	offset = offset_tick;
-	now = read_pmtmr();
-	delta = (now - offset)&ACPI_PM_MASK;
-
-	return (unsigned long) offset_delay + cyc2us(delta);
-}
-
-
-/* acpi timer_opts struct */
-static struct timer_opts timer_pmtmr = {
-	.name			= "pmtmr",
-	.mark_offset		= mark_offset_pmtmr,
-	.get_offset		= get_offset_pmtmr,
-	.monotonic_clock 	= monotonic_clock_pmtmr,
-	.delay 			= delay_pmtmr,
-	.read_timer 		= read_timer_tsc,
-	.resume			= pmtmr_resume,
-};
-
-struct init_timer_opts __initdata timer_pmtmr_init = {
-	.init = init_pmtmr,
-	.opts = &timer_pmtmr,
-};
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>");
-MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86");
Index: linux-2.6.14/arch/x86_64/Kconfig
===================================================================
--- linux-2.6.14.orig/arch/x86_64/Kconfig
+++ linux-2.6.14/arch/x86_64/Kconfig
@@ -24,6 +24,14 @@ config X86
 	bool
 	default y
 
+config GENERIC_TIME
+       bool
+       default y
+
+config GENERIC_TIME_VSYSCALL
+       bool
+       default y
+
 config SEMAPHORE_SLEEPERS
 	bool
 	default y
Index: linux-2.6.14/arch/x86_64/kernel/Makefile
===================================================================
--- linux-2.6.14.orig/arch/x86_64/kernel/Makefile
+++ linux-2.6.14/arch/x86_64/kernel/Makefile
@@ -29,7 +29,6 @@ obj-$(CONFIG_GART_IOMMU)	+= pci-gart.o a
 obj-$(CONFIG_DUMMY_IOMMU)	+= pci-nommu.o pci-dma.o
 obj-$(CONFIG_SWIOTLB)		+= swiotlb.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
-obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer.o
 
 obj-$(CONFIG_MODULES)		+= module.o
 
Index: linux-2.6.14/arch/x86_64/kernel/pmtimer.c
===================================================================
--- linux-2.6.14.orig/arch/x86_64/kernel/pmtimer.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/* Ported over from i386 by AK, original copyright was:
- *
- * (C) Dominik Brodowski <linux@brodo.de> 2003
- *
- * Driver to use the Power Management Timer (PMTMR) available in some
- * southbridges as primary timing source for the Linux kernel.
- *
- * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c,
- * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4.
- *
- * This file is licensed under the GPL v2.
- *
- * Dropped all the hardware bug workarounds for now. Hopefully they
- * are not needed on 64bit chipsets.
- */
-
-#include <linux/jiffies.h>
-#include <linux/kernel.h>
-#include <linux/time.h>
-#include <linux/init.h>
-#include <linux/cpumask.h>
-#include <asm/io.h>
-#include <asm/proto.h>
-#include <asm/msr.h>
-#include <asm/vsyscall.h>
-
-/* The I/O port the PMTMR resides at.
- * The location is detected during setup_arch(),
- * in arch/i386/kernel/acpi/boot.c */
-u32 pmtmr_ioport;
-
-/* value of the Power timer at last timer interrupt */
-static u32 offset_delay;
-static u32 last_pmtmr_tick;
-
-#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
-
-static inline u32 cyc2us(u32 cycles)
-{
-	/* The Power Management Timer ticks at 3.579545 ticks per microsecond.
-	 * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%]
-	 *
-	 * Even with HZ = 100, delta is at maximum 35796 ticks, so it can
-	 * easily be multiplied with 286 (=0x11E) without having to fear
-	 * u32 overflows.
-	 */
-	cycles *= 286;
-	return (cycles >> 10);
-}
-
-int pmtimer_mark_offset(void)
-{
-	static int first_run = 1;
-	unsigned long tsc;
-	u32 lost;
-
-	u32 tick = inl(pmtmr_ioport);
-	u32 delta;
-
-	delta = cyc2us((tick - last_pmtmr_tick) & ACPI_PM_MASK);
-
-	last_pmtmr_tick = tick;
-	monotonic_base += delta * NSEC_PER_USEC;
-
-	delta += offset_delay;
-
-	lost = delta / (USEC_PER_SEC / HZ);
-	offset_delay = delta % (USEC_PER_SEC / HZ);
-
-	rdtscll(tsc);
-	vxtime.last_tsc = tsc - offset_delay * cpu_khz;
-
-	/* don't calculate delay for first run,
-	   or if we've got less then a tick */
-	if (first_run || (lost < 1)) {
-		first_run = 0;
-		offset_delay = 0;
-	}
-
-	return lost - 1;
-}
-
-unsigned int do_gettimeoffset_pm(void)
-{
-	u32 now, offset, delta = 0;
-
-	offset = last_pmtmr_tick;
-	now = inl(pmtmr_ioport);
-	delta = (now - offset) & ACPI_PM_MASK;
-
-	return offset_delay + cyc2us(delta);
-}
-
-
-static int __init nopmtimer_setup(char *s)
-{
-	pmtmr_ioport = 0;
-	return 0;
-}
-
-__setup("nopmtimer", nopmtimer_setup);
Index: linux-2.6.14/arch/x86_64/kernel/vmlinux.lds.S
===================================================================
--- linux-2.6.14.orig/arch/x86_64/kernel/vmlinux.lds.S
+++ linux-2.6.14/arch/x86_64/kernel/vmlinux.lds.S
@@ -99,6 +99,13 @@ SECTIONS
   .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) }
   jiffies = VVIRT(.jiffies);
 
+  .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) { *(.vsyscall_gtod_data) }
+  vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
+
+  .vsyscall_gtod_lock : AT(VLOAD(.vsyscall_gtod_lock)) { *(.vsyscall_gtod_lock) }
+  vsyscall_gtod_lock = VVIRT(.vsyscall_gtod_lock);
+
+
   .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) { *(.vsyscall_1) }
   .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) { *(.vsyscall_2) }
   .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) { *(.vsyscall_3) }
Index: linux-2.6.14/arch/x86_64/kernel/vsyscall.c
===================================================================
--- linux-2.6.14.orig/arch/x86_64/kernel/vsyscall.c
+++ linux-2.6.14/arch/x86_64/kernel/vsyscall.c
@@ -19,6 +19,8 @@
  *  want per guest time just set the kernel.vsyscall64 sysctl to 0.
  */
 
+#include <linux/timeofday.h>
+#include <linux/clocksource.h>
 #include <linux/time.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -40,6 +42,21 @@
 int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
 seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
 
+
+struct vsyscall_gtod_data_t {
+	struct timeval wall_time_tv;
+	struct timezone sys_tz;
+	cycle_t offset_base;
+	struct clocksource clock;
+};
+
+extern struct vsyscall_gtod_data_t vsyscall_gtod_data;
+struct vsyscall_gtod_data_t __vsyscall_gtod_data __section_vsyscall_gtod_data;
+
+extern seqlock_t vsyscall_gtod_lock;
+seqlock_t __vsyscall_gtod_lock __section_vsyscall_gtod_lock = SEQLOCK_UNLOCKED;
+
+
 #include <asm/unistd.h>
 
 static force_inline void timeval_normalize(struct timeval * tv)
@@ -53,40 +70,54 @@ static force_inline void timeval_normali
 	}
 }
 
-static force_inline void do_vgettimeofday(struct timeval * tv)
+/* XXX - this is ugly. gettimeofday() has a label in it so we can't
+	call it twice.
+ */
+static force_inline int syscall_gtod(struct timeval *tv, struct timezone *tz)
 {
-	long sequence, t;
-	unsigned long sec, usec;
-
+	int ret;
+	asm volatile("syscall"
+		: "=a" (ret)
+		: "0" (__NR_gettimeofday),"D" (tv),"S" (tz) : __syscall_clobber );
+	return ret;
+}
+static force_inline void do_vgettimeofday(struct timeval* tv)
+{
+	cycle_t now, cycle_delta;
+	nsec_t nsec_delta;
+	unsigned long seq;
 	do {
-		sequence = read_seqbegin(&__xtime_lock);
-		
-		sec = __xtime.tv_sec;
-		usec = (__xtime.tv_nsec / 1000) +
-			(__jiffies - __wall_jiffies) * (1000000 / HZ);
-
-		if (__vxtime.mode != VXTIME_HPET) {
-			sync_core();
-			rdtscll(t);
-			if (t < __vxtime.last_tsc)
-				t = __vxtime.last_tsc;
-			usec += ((t - __vxtime.last_tsc) *
-				 __vxtime.tsc_quot) >> 32;
-			/* See comment in x86_64 do_gettimeofday. */
-		} else {
-			usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) -
-				  __vxtime.last) * __vxtime.quot) >> 32;
+		seq = read_seqbegin(&__vsyscall_gtod_lock);
+
+		if (__vsyscall_gtod_data.clock.type == CLOCKSOURCE_FUNCTION) {
+			syscall_gtod(tv, NULL);
+			return;
 		}
-	} while (read_seqretry(&__xtime_lock, sequence));
 
-	tv->tv_sec = sec + usec / 1000000;
-	tv->tv_usec = usec % 1000000;
+		/* read the timeosurce and calc cycle_delta */
+		now = read_clocksource(&__vsyscall_gtod_data.clock);
+		cycle_delta = (now - __vsyscall_gtod_data.offset_base)
+					& __vsyscall_gtod_data.clock.mask;
+
+		/* convert cycles to nsecs */
+		nsec_delta = cycle_delta * __vsyscall_gtod_data.clock.mult;
+		nsec_delta = nsec_delta >> __vsyscall_gtod_data.clock.shift;
+
+		/* add nsec offset to wall_time_tv */
+		*tv = __vsyscall_gtod_data.wall_time_tv;
+		do_div(nsec_delta, NSEC_PER_USEC);
+		tv->tv_usec += (unsigned long) nsec_delta;
+		while (tv->tv_usec > USEC_PER_SEC) {
+			tv->tv_sec += 1;
+			tv->tv_usec -= USEC_PER_SEC;
+		}
+	} while (read_seqretry(&__vsyscall_gtod_lock, seq));
 }
 
 /* RED-PEN may want to readd seq locking, but then the variable should be write-once. */
 static force_inline void do_get_tz(struct timezone * tz)
 {
-	*tz = __sys_tz;
+	*tz = __vsyscall_gtod_data.sys_tz;
 }
 
 static force_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
@@ -122,11 +153,13 @@ int __vsyscall(0) vgettimeofday(struct t
  * unlikely */
 time_t __vsyscall(1) vtime(time_t *t)
 {
+	struct timeval tv;
 	if (unlikely(!__sysctl_vsyscall))
 		return time_syscall(t);
-	else if (t)
-		*t = __xtime.tv_sec;		
-	return __xtime.tv_sec;
+	vgettimeofday(&tv, 0);
+	if (t)
+		*t = tv.tv_sec;
+	return tv.tv_sec;
 }
 
 long __vsyscall(2) venosys_0(void)
@@ -139,6 +172,49 @@ long __vsyscall(3) venosys_1(void)
 	return -ENOSYS;
 }
 
+struct clocksource* curr_clock;
+
+void arch_update_vsyscall_gtod(struct timespec wall_time, cycle_t offset_base,
+				struct clocksource *clock, int ntp_adj)
+{
+	unsigned long flags;
+
+	write_seqlock_irqsave(&vsyscall_gtod_lock, flags);
+
+	/* XXX - hackitty hack hack. this is terrible! */
+	if (curr_clock != clock) {
+		if ((clock->type == CLOCKSOURCE_MMIO_32)
+				|| (clock->type == CLOCKSOURCE_MMIO_64)) {
+			unsigned long vaddr = (unsigned long)clock->mmio_ptr;
+			pgd_t *pgd = pgd_offset_k(vaddr);
+			pud_t *pud = pud_offset(pgd, vaddr);
+			pmd_t *pmd = pmd_offset(pud,vaddr);
+			pte_t *pte = pte_offset_kernel(pmd, vaddr);
+			*pte = pte_mkread(*pte);
+		}
+		curr_clock = clock;
+	}
+
+	/* save off wall time as timeval */
+	vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time.tv_sec;
+	vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time.tv_nsec/1000;
+
+	/* save offset_base */
+	vsyscall_gtod_data.offset_base = offset_base;
+
+	/* copy current clocksource */
+	vsyscall_gtod_data.clock = *clock;
+
+	/* apply ntp adjustment to clocksource mult */
+	vsyscall_gtod_data.clock.mult += ntp_adj;
+
+	/* save off current timezone */
+	vsyscall_gtod_data.sys_tz = sys_tz;
+
+	write_sequnlock_irqrestore(&vsyscall_gtod_lock, flags);
+
+}
+
 #ifdef CONFIG_SYSCTL
 
 #define SYSCALL 0x050f
@@ -217,6 +293,7 @@ static int __init vsyscall_init(void)
 	BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
 	BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
 	map_vsyscall();
+	sysctl_vsyscall = 1;
 #ifdef CONFIG_SYSCTL
 	register_sysctl_table(kernel_root_table2, 0);
 #endif
Index: linux-2.6.14/include/asm-generic/div64.h
===================================================================
--- linux-2.6.14.orig/include/asm-generic/div64.h
+++ linux-2.6.14/include/asm-generic/div64.h
@@ -55,4 +55,13 @@ extern uint32_t __div64_32(uint64_t *div
 
 #endif /* BITS_PER_LONG */
 
+#ifndef div_long_long_rem
+#define div_long_long_rem(dividend,divisor,remainder) \
+({							\
+	u64 result = dividend;				\
+	*remainder = do_div(result,divisor);		\
+	result;						\
+})
+#endif
+
 #endif /* _ASM_GENERIC_DIV64_H */
Index: linux-2.6.14/include/asm-x86_64/hpet.h
===================================================================
--- linux-2.6.14.orig/include/asm-x86_64/hpet.h
+++ linux-2.6.14/include/asm-x86_64/hpet.h
@@ -1,6 +1,6 @@
 #ifndef _ASM_X8664_HPET_H
 #define _ASM_X8664_HPET_H 1
-
+#include <asm/fixmap.h>
 /*
  * Documentation on HPET can be found at:
  *      http://www.intel.com/ial/home/sp/pcmmspec.htm
@@ -44,6 +44,7 @@
 #define HPET_TN_SETVAL		0x040
 #define HPET_TN_32BIT		0x100
 
+extern unsigned long hpet_address;	/* hpet memory map physical address */
 extern int is_hpet_enabled(void);
 extern int hpet_rtc_timer_init(void);
 extern int oem_force_hpet_timer(void);
Index: linux-2.6.14/include/asm-x86_64/timeofday.h
===================================================================
--- /dev/null
+++ linux-2.6.14/include/asm-x86_64/timeofday.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_X86_64_TIMEOFDAY_H
+#define _ASM_X86_64_TIMEOFDAY_H
+#include <asm-generic/timeofday.h>
+#endif
Index: linux-2.6.14/include/asm-x86_64/timex.h
===================================================================
--- linux-2.6.14.orig/include/asm-x86_64/timex.h
+++ linux-2.6.14/include/asm-x86_64/timex.h
@@ -24,6 +24,8 @@ static inline cycles_t get_cycles (void)
 }
 
 extern unsigned int cpu_khz;
+extern unsigned int tsc_khz;
+extern void tsc_c3_compensate(unsigned long usecs);
 
 extern int read_current_timer(unsigned long *timer_value);
 #define ARCH_HAS_READ_CURRENT_TIMER	1
Index: linux-2.6.14/include/asm-x86_64/vsyscall.h
===================================================================
--- linux-2.6.14.orig/include/asm-x86_64/vsyscall.h
+++ linux-2.6.14/include/asm-x86_64/vsyscall.h
@@ -22,6 +22,8 @@ enum vsyscall_num {
 #define __section_sysctl_vsyscall __attribute__ ((unused, __section__ (".sysctl_vsyscall"), aligned(16)))
 #define __section_xtime __attribute__ ((unused, __section__ (".xtime"), aligned(16)))
 #define __section_xtime_lock __attribute__ ((unused, __section__ (".xtime_lock"), aligned(16)))
+#define __section_vsyscall_gtod_data __attribute__ ((unused, __section__ (".vsyscall_gtod_data"),aligned(16)))
+#define __section_vsyscall_gtod_lock __attribute__ ((unused, __section__ (".vsyscall_gtod_lock"),aligned(16)))
 
 #define VXTIME_TSC	1
 #define VXTIME_HPET	2
Index: linux-2.6.14/drivers/Makefile
===================================================================
--- linux-2.6.14.orig/drivers/Makefile
+++ linux-2.6.14/drivers/Makefile
@@ -67,3 +67,4 @@ obj-$(CONFIG_INFINIBAND)	+= infiniband/
 obj-$(CONFIG_SGI_IOC4)		+= sn/
 obj-y				+= firmware/
 obj-$(CONFIG_CRYPTO)		+= crypto/
+obj-$(CONFIG_GENERIC_TIME) += clocksource/
Index: linux-2.6.14/drivers/clocksource/Makefile
===================================================================
--- /dev/null
+++ linux-2.6.14/drivers/clocksource/Makefile
@@ -0,0 +1,4 @@
+#XXX doesn't boot! obj-$(CONFIG_X86) += tsc-interp.o
+obj-$(CONFIG_X86_CYCLONE_TIMER) += cyclone.o
+obj-$(CONFIG_X86_PM_TIMER) += acpi_pm.o
+obj-$(CONFIG_HPET_TIMER) += hpet.o
Index: linux-2.6.14/drivers/clocksource/acpi_pm.c
===================================================================
--- /dev/null
+++ linux-2.6.14/drivers/clocksource/acpi_pm.c
@@ -0,0 +1,153 @@
+/*
+ * linux/drivers/clocksource/acpi_pm.c
+ *
+ * This file contains the ACPI PM based clocksource.
+ *
+ * This code was largely moved from the i386 timer_pm.c file
+ * which was (C) Dominik Brodowski <linux@brodo.de> 2003
+ * and contained the following comments:
+ *
+ * Driver to use the Power Management Timer (PMTMR) available in some
+ * southbridges as primary timing source for the Linux kernel.
+ *
+ * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c,
+ * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4.
+ *
+ * This file is licensed under the GPL v2.
+ */
+
+
+#include <linux/clocksource.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <asm/io.h>
+
+/* Number of PMTMR ticks expected during calibration run */
+#define PMTMR_TICKS_PER_SEC 3579545
+
+#if (defined(CONFIG_X86) && (!defined(CONFIG_X86_64)))
+#include "mach_timer.h"
+#define PMTMR_EXPECTED_RATE ((PMTMR_TICKS_PER_SEC*CALIBRATE_TIME_MSEC)/1000)
+#endif
+
+/* The I/O port the PMTMR resides at.
+ * The location is detected during setup_arch(),
+ * in arch/i386/acpi/boot.c */
+extern u32 acpi_pmtmr_ioport;
+extern int acpi_pmtmr_buggy;
+
+#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
+
+
+static inline u32 read_pmtmr(void)
+{
+	/* mask the output to 24 bits */
+	return inl(acpi_pmtmr_ioport) & ACPI_PM_MASK;
+}
+
+static cycle_t acpi_pm_read_verified(void)
+{
+	u32 v1=0,v2=0,v3=0;
+	/* It has been reported that because of various broken
+	 * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM clock
+	 * source is not latched, so you must read it multiple
+	 * times to insure a safe value is read.
+	 */
+	do {
+		v1 = read_pmtmr();
+		v2 = read_pmtmr();
+		v3 = read_pmtmr();
+	} while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1)
+			|| (v3 > v1 && v3 < v2));
+
+	return (cycle_t)v2;
+}
+
+
+static cycle_t acpi_pm_read(void)
+{
+	return (cycle_t)read_pmtmr();
+}
+
+struct clocksource clocksource_acpi_pm = {
+	.name = "acpi_pm",
+	.rating = 200,
+	.type = CLOCKSOURCE_FUNCTION,
+	.read_fnct = acpi_pm_read,
+	.mask = (cycle_t)ACPI_PM_MASK,
+	.mult = 0, /*to be caluclated*/
+	.shift = 22,
+	.is_continuous = 1,
+};
+
+#if (defined(CONFIG_X86) && (!defined(CONFIG_X86_64)))
+/*
+ * Some boards have the PMTMR running way too fast. We check
+ * the PMTMR rate against PIT channel 2 to catch these cases.
+ */
+static int __init verify_pmtmr_rate(void)
+{
+	u32 value1, value2;
+	unsigned long count, delta;
+
+	mach_prepare_counter();
+	value1 = read_pmtmr();
+	mach_countup(&count);
+	value2 = read_pmtmr();
+	delta = (value2 - value1) & ACPI_PM_MASK;
+
+	/* Check that the PMTMR delta is within 5% of what we expect */
+	if (delta < (PMTMR_EXPECTED_RATE * 19) / 20 ||
+	    delta > (PMTMR_EXPECTED_RATE * 21) / 20) {
+		printk(KERN_INFO "PM-Timer running at invalid rate: %lu%% of normal - aborting.\n", 100UL * delta / PMTMR_EXPECTED_RATE);
+		return -1;
+	}
+
+	return 0;
+}
+#else
+#define verify_pmtmr_rate() (0)
+#endif
+
+static int __init init_acpi_pm_clocksource(void)
+{
+	u32 value1, value2;
+	unsigned int i;
+
+	if (!acpi_pmtmr_ioport)
+		return -ENODEV;
+
+	clocksource_acpi_pm.mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC,
+									clocksource_acpi_pm.shift);
+
+	/* "verify" this timing source */
+	value1 = read_pmtmr();
+	for (i = 0; i < 10000; i++) {
+		value2 = read_pmtmr();
+		if (value2 == value1)
+			continue;
+		if (value2 > value1)
+			goto pm_good;
+		if ((value2 < value1) && ((value2) < 0xFFF))
+			goto pm_good;
+		printk(KERN_INFO "PM-Timer had inconsistent results: 0x%#x, 0x%#x - aborting.\n", value1, value2);
+		return -EINVAL;
+	}
+	printk(KERN_INFO "PM-Timer had no reasonable result: 0x%#x - aborting.\n", value1);
+	return -ENODEV;
+
+pm_good:
+	if (verify_pmtmr_rate() != 0)
+		return -ENODEV;
+
+	/* check to see if pmtmr is known buggy */
+	if (acpi_pmtmr_buggy) {
+		clocksource_acpi_pm.read_fnct = acpi_pm_read_verified;
+		clocksource_acpi_pm.rating = 110;
+	}
+
+	register_clocksource(&clocksource_acpi_pm);
+	return 0;
+}
+
+module_init(init_acpi_pm_clocksource);
Index: linux-2.6.14/drivers/clocksource/cyclone.c
===================================================================
--- /dev/null
+++ linux-2.6.14/drivers/clocksource/cyclone.c
@@ -0,0 +1,139 @@
+#include <linux/clocksource.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/timex.h>
+#include <linux/init.h>
+
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include "mach_timer.h"
+
+#define CYCLONE_CBAR_ADDR 0xFEB00CD0		/* base address ptr*/
+#define CYCLONE_PMCC_OFFSET 0x51A0		/* offset to control register */
+#define CYCLONE_MPCS_OFFSET 0x51A8		/* offset to select register */
+#define CYCLONE_MPMC_OFFSET 0x51D0		/* offset to count register */
+#define CYCLONE_TIMER_FREQ 100000000
+#define CYCLONE_TIMER_MASK (0xFFFFFFFF) /* 32 bit mask */
+
+int use_cyclone = 0;
+
+struct clocksource clocksource_cyclone = {
+	.name = "cyclone",
+	.rating = 250,
+	.type = CLOCKSOURCE_MMIO_32,
+	.mmio_ptr = NULL, /* to be set */
+	.mask = (cycle_t)CYCLONE_TIMER_MASK,
+	.mult = 10,
+	.shift = 0,
+	.is_continuous = 1,
+};
+
+static unsigned long __init calibrate_cyclone(void)
+{
+	u64 delta64;
+	unsigned long start, end;
+	unsigned long i, count;
+	unsigned long cyclone_freq_khz;
+
+	/* repeat 3 times to make sure the cache is warm */
+	for(i=0; i < 3; i++) {
+		mach_prepare_counter();
+		start = readl(clocksource_cyclone.mmio_ptr);
+		mach_countup(&count);
+		end = readl(clocksource_cyclone.mmio_ptr);
+	}
+
+	delta64 = end - start;
+
+	delta64 += CALIBRATE_TIME_MSEC/2; /* round for do_div */
+	do_div(delta64,CALIBRATE_TIME_MSEC);
+
+	cyclone_freq_khz = (unsigned long)delta64;
+
+	printk("calculated cyclone_freq: %lu khz\n", cyclone_freq_khz);
+	return cyclone_freq_khz;
+}
+
+static int __init init_cyclone_clocksource(void)
+{
+	unsigned long base;	/* saved value from CBAR */
+	unsigned long offset;
+	u32 __iomem* reg;
+	u32 __iomem* volatile cyclone_timer;	/* Cyclone MPMC0 register */
+	unsigned long khz;
+	int i;
+
+	/*make sure we're on a summit box*/
+	if (!use_cyclone) return -ENODEV;
+
+	printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n");
+
+	/* find base address */
+	offset = CYCLONE_CBAR_ADDR;
+	reg = ioremap_nocache(offset, sizeof(reg));
+	if(!reg){
+		printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n");
+		return -ENODEV;
+	}
+	/* even on 64bit systems, this is only 32bits */
+	base = readl(reg);
+	if(!base){
+		printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n");
+		return -ENODEV;
+	}
+	iounmap(reg);
+
+	/* setup PMCC */
+	offset = base + CYCLONE_PMCC_OFFSET;
+	reg = ioremap_nocache(offset, sizeof(reg));
+	if(!reg){
+		printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n");
+		return -ENODEV;
+	}
+	writel(0x00000001,reg);
+	iounmap(reg);
+
+	/* setup MPCS */
+	offset = base + CYCLONE_MPCS_OFFSET;
+	reg = ioremap_nocache(offset, sizeof(reg));
+	if(!reg){
+		printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n");
+		return -ENODEV;
+	}
+	writel(0x00000001,reg);
+	iounmap(reg);
+
+	/* map in cyclone_timer */
+	offset = base + CYCLONE_MPMC_OFFSET;
+	cyclone_timer = ioremap_nocache(offset, sizeof(u64));
+	if(!cyclone_timer){
+		printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n");
+		return -ENODEV;
+	}
+
+	/*quick test to make sure its ticking*/
+	for(i=0; i<3; i++){
+		u32 old = readl(cyclone_timer);
+		int stall = 100;
+		while(stall--) barrier();
+		if(readl(cyclone_timer) == old){
+			printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n");
+			iounmap(cyclone_timer);
+			cyclone_timer = NULL;
+			return -ENODEV;
+		}
+	}
+	clocksource_cyclone.mmio_ptr = cyclone_timer;
+
+	/* sort out mult/shift values */
+	khz = calibrate_cyclone();
+	clocksource_cyclone.shift = 22;
+	clocksource_cyclone.mult = clocksource_khz2mult(khz,
+									clocksource_cyclone.shift);
+
+	register_clocksource(&clocksource_cyclone);
+
+	return 0;
+}
+
+module_init(init_cyclone_clocksource);
Index: linux-2.6.14/drivers/clocksource/hpet.c
===================================================================
--- /dev/null
+++ linux-2.6.14/drivers/clocksource/hpet.c
@@ -0,0 +1,60 @@
+#include <linux/clocksource.h>
+#include <linux/hpet.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <asm/io.h>
+#include <asm/hpet.h>
+
+#define HPET_MASK (0xFFFFFFFF)
+#define HPET_SHIFT 22
+
+/* FSEC = 10^-15 NSEC = 10^-9 */
+#define FSEC_PER_NSEC 1000000
+
+struct clocksource clocksource_hpet = {
+	.name = "hpet",
+	.rating = 250,
+	.type = CLOCKSOURCE_MMIO_32,
+	.mmio_ptr = NULL,
+	.mask = (cycle_t)HPET_MASK,
+	.mult = 0, /* set below */
+	.shift = HPET_SHIFT,
+	.is_continuous = 1,
+};
+
+static int __init init_hpet_clocksource(void)
+{
+	unsigned long hpet_period;
+	void __iomem* hpet_base;
+	u64 tmp;
+
+	if (!hpet_address)
+		return -ENODEV;
+
+	/* calculate the hpet address */
+	hpet_base =
+		(void __iomem*)ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
+	clocksource_hpet.mmio_ptr = hpet_base + HPET_COUNTER;
+
+	/* calculate the frequency */
+	hpet_period = readl(hpet_base + HPET_PERIOD);
+
+
+	/* hpet period is in femto seconds per cycle
+	 * so we need to convert this to ns/cyc units
+	 * aproximated by mult/2^shift
+	 *
+	 *  fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift
+	 *  fsec/cyc * 1ns/1000000fsec * 2^shift = mult
+	 *  fsec/cyc * 2^shift * 1nsec/1000000fsec = mult
+	 *  (fsec/cyc << shift)/1000000 = mult
+	 *  (hpet_period << shift)/FSEC_PER_NSEC = mult
+	 */
+	tmp = (u64)hpet_period << HPET_SHIFT;
+	do_div(tmp, FSEC_PER_NSEC);
+	clocksource_hpet.mult = (u32)tmp;
+
+	register_clocksource(&clocksource_hpet);
+	return 0;
+}
+module_init(init_hpet_clocksource);
Index: linux-2.6.14/drivers/clocksource/tsc-interp.c
===================================================================
--- /dev/null
+++ linux-2.6.14/drivers/clocksource/tsc-interp.c
@@ -0,0 +1,112 @@
+/* TSC-Jiffies Interpolation clocksource
+	Example interpolation clocksource.
+TODO:
+	o per-cpu TSC offsets
+*/
+#include <linux/clocksource.h>
+#include <linux/timer.h>
+#include <linux/timex.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/threads.h>
+#include <linux/smp.h>
+
+static unsigned long current_tsc_khz = 0;
+
+static seqlock_t tsc_interp_lock = SEQLOCK_UNLOCKED;
+static unsigned long tsc_then;
+static unsigned long jiffies_then;
+struct timer_list tsc_interp_timer;
+
+static unsigned long mult, shift;
+
+#define NSEC_PER_JIFFY ((((unsigned long long)NSEC_PER_SEC)<<8)/ACTHZ)
+#define SHIFT_VAL 22
+
+static cycle_t read_tsc_interp(void);
+static void tsc_interp_update_callback(void);
+
+static struct clocksource clocksource_tsc_interp = {
+	.name = "tsc-interp",
+	.rating = 150,
+	.type = CLOCKSOURCE_FUNCTION,
+	.read_fnct = read_tsc_interp,
+	.mask = (cycle_t)((1ULL<<32)-1),
+	.mult = 1<<SHIFT_VAL,
+	.shift = SHIFT_VAL,
+	.update_callback = tsc_interp_update_callback,
+};
+
+static void tsc_interp_sync(unsigned long unused)
+{
+	cycle_t tsc_now;
+	unsigned long jiffies_now;
+
+	do {
+		jiffies_now = jiffies;
+		rdtscll(tsc_now);
+	} while (jiffies_now != jiffies);
+
+	write_seqlock(&tsc_interp_lock);
+	jiffies_then = jiffies_now;
+	tsc_then = tsc_now;
+	write_sequnlock(&tsc_interp_lock);
+
+	mod_timer(&tsc_interp_timer, jiffies+1);
+}
+
+
+static cycle_t read_tsc_interp(void)
+{
+	cycle_t ret;
+	cycle_t now, then;
+	unsigned long jiffs_now, jiffs_then;
+	unsigned long seq;
+
+	do {
+		seq = read_seqbegin(&tsc_interp_lock);
+
+		jiffs_now = jiffies;
+		jiffs_then = jiffies_then;
+		then = tsc_then;
+
+	} while (read_seqretry(&tsc_interp_lock, seq));
+
+	rdtscll(now);
+	ret = (cycle_t)jiffs_then * NSEC_PER_JIFFY;
+	if (jiffs_then == jiffs_now)
+		ret += min((cycle_t)NSEC_PER_JIFFY,(cycle_t)((now - then)*mult)>> shift);
+	else
+		ret += (cycle_t)(jiffs_now - jiffs_then)*NSEC_PER_JIFFY;
+
+	return ret;
+}
+
+static void tsc_interp_update_callback(void)
+{
+	/* only update if tsc_khz has changed */
+	if (current_tsc_khz != tsc_khz){
+		current_tsc_khz = tsc_khz;
+		mult = clocksource_khz2mult(current_tsc_khz, shift);
+	}
+}
+
+
+static int __init init_tsc_interp_clocksource(void)
+{
+	/* TSC initialization is done in arch/i386/kernel/tsc.c */
+	if (cpu_has_tsc && tsc_khz) {
+		current_tsc_khz = tsc_khz;
+		shift = SHIFT_VAL;
+		mult = clocksource_khz2mult(current_tsc_khz, shift);
+		/* setup periodic soft-timer */
+		init_timer(&tsc_interp_timer);
+		tsc_interp_timer.function = tsc_interp_sync;
+		tsc_interp_timer.expires = jiffies;
+		add_timer(&tsc_interp_timer);
+
+		register_clocksource(&clocksource_tsc_interp);
+	}
+	return 0;
+}
+module_init(init_tsc_interp_clocksource);
Index: linux-2.6.14/kernel/time/Kconfig
===================================================================
--- /dev/null
+++ linux-2.6.14/kernel/time/Kconfig
@@ -0,0 +1,36 @@
+#
+# Timer subsystem related configuration options
+#
+
+config KTIME_SCALAR
+	bool "Ktimers 64bit scalar representation"
+	depends on !64BIT
+	default n
+	help
+	 (You dont want to change this unless you want to hack the
+	 timer code. Just keep it disabled.)
+
+	 This option enables the 64bit based scalar representation
+	 of the ktimer internal variables on 32bit systems. On i386
+	 this results in denser code and slightly better overall
+	 performance.
+
+config HIGH_RES_TIMERS
+	bool "High Resolution Timer Support"
+	depends on GENERIC_TIME
+	help
+	  This option enables high resolution timer support. If your
+	  hardware is not capable then this option only increases
+	  the size of the kernel image.
+
+config HIGH_RES_RESOLUTION
+	int "High Resolution Timer resolution (nanoseconds)"
+	depends on HIGH_RES_TIMERS
+	default 1000
+	help
+	  This sets the resolution in nanoseconds of the high resolution
+	  timers. Too fine a resolution (small a number) will usually
+	  not be observable due to normal system latencies.  For an
+          800 MHz processor about 10,000 (10 microseconds) is recommended as a
+	  finest resolution.  If you don't need that sort of resolution,
+	  larger values may generate less overhead.
Index: linux-2.6.14/include/linux/clockchips.h
===================================================================
--- /dev/null
+++ linux-2.6.14/include/linux/clockchips.h
@@ -0,0 +1,127 @@
+/*  linux/include/linux/clockchips.h
+ *
+ *  This file contains the structure definitions for clockchips.
+ *
+ *  If you are not a clockchip, or the time of day code, you should
+ *  not be including this file!
+ */
+#ifndef _LINUX_CLOCKCHIPS_H
+#define _LINUX_CLOCKCHIPS_H
+
+#include <linux/config.h>
+
+#ifdef CONFIG_GENERIC_TIME
+
+#include <linux/clocksource.h>
+#include <linux/interrupt.h>
+
+/* Clock event modes and commands */
+enum {
+	CLOCK_EVT_NONE,
+	CLOCK_EVT_STARTUP,
+	CLOCK_EVT_PERIODIC,
+	CLOCK_EVT_ONESHOT,
+	CLOCK_EVT_IPI,
+	CLOCK_EVT_STOP,
+	CLOCK_EVT_SHUTDOWN,
+	CLOCK_EVT_RUN_CYCLIC,
+	CLOCK_EVT_SCHEDTICK,
+	CLOCK_EVT_NOTICK,
+};
+
+/* Clock event capability flags */
+#define CLOCK_CAP_TICK		0x000001
+
+#if defined(CONFIG_HIGH_RES_TIMERS) || defined(CONFIG_DYNTICK)
+#define CLOCK_CAP_NEXTEVT	0x000002
+#else
+#define CLOCK_CAP_NEXTEVT	0x000000
+#endif
+
+#define CLOCK_CAP_UPDATE	0x000004
+
+#ifndef CONFIG_PROFILE_NMI
+#define CLOCK_CAP_PROFILE	0x000008
+#else
+#define CLOCK_CAP_PROFILE	0x000000
+#endif
+
+#define CLOCK_CAP_MASK		(CLOCK_CAP_TICK | CLOCK_CAP_NEXTEVT | CLOCK_CAP_PROFILE | CLOCK_CAP_UPDATE)
+
+/* The device has its own interrupt handler */
+#define CLOCK_HAS_IRQHANDLER	0x010000
+
+struct clock_event;
+
+/**
+ * struct clock_event - clock event descriptor
+ *
+ * @name:		ptr to clock event name
+ * @capabilities:	capabilities of the event chip
+ * @max_delta_ns:	maximum delta value in ns
+ * @min_delta_ns:	minimum delta value in ns
+ * @mult:		nanosecond to cycles multiplier
+ * @shift:		nanoseconds to cycles divisor (power of two)
+ * @set_next_event:	set next event
+ * @set_mode:		set mode function
+ * @suspend:		suspend function (optional)
+ * @resume:		resume function (optional)
+ * @evthandler:		Assigned by the framework to be called by the low
+ *			level handler of the event source
+ * @start_event:	called on entry (optional for chip handling...)
+ * @end_event:		called on exit (optional for chip handling...)
+ * @priv:		private device data
+ */
+struct clock_event {
+	const char* name;
+	unsigned int capabilities;
+	unsigned long max_delta_ns;
+	unsigned long min_delta_ns;
+	u32 mult;
+	u32 shift;
+	void (*set_next_event)(unsigned long evt);
+	void (*set_mode)(int mode);
+	int (*suspend)(void);
+	int (*resume)(void);
+	void (*event_handler)(struct pt_regs *regs);
+	void (*start_event)(void *priv);
+	void (*end_event)(void *priv);
+	unsigned int irq;
+	void *priv;
+};
+
+
+
+/*
+ * Calculate a multiplication factor with shift=32
+ */
+static inline unsigned long div_sc32(unsigned long a, unsigned long b)
+{
+	u64 tmp = ((u64)a) << 32;
+	do_div(tmp, b);
+	return (unsigned long) tmp;
+}
+
+static inline unsigned long mpy_sc32(unsigned long a, unsigned long b)
+{
+	u64 res = (u64) a * b;
+
+	return (unsigned long) (res >> 32);
+}
+
+/* Clock event layer functions */
+extern int setup_local_clockevent(struct clock_event *, cpumask_t cpumask);
+extern int setup_global_clockevent(struct clock_event *, cpumask_t cpumask);
+extern unsigned long clockevent_delta2ns(unsigned long latch, struct clock_event *evt);
+extern void init_clockevents(void);
+
+extern int clockevents_init_next_event(void);
+extern int clockevents_set_next_event(ktime_t expires, ktime_t now);
+extern void clockevents_trigger_next_event(void);
+extern int clockevents_next_event_available(void);
+
+#else
+# define init_clockevents() do { } while(0)
+#endif
+
+#endif
Index: linux-2.6.14/kernel/time/clockevents.c
===================================================================
--- /dev/null
+++ linux-2.6.14/kernel/time/clockevents.c
@@ -0,0 +1,605 @@
+/*
+ * linux/kernel/time/clockevents.c
+ *
+ * This file contains functions which manage clock event drivers.
+ *
+ * Copyright(C) 2005 Thomas Gleixner <tglx@linutronix.de>
+ *
+ * Kudos to Ingo Molnar for review, criticism, ideas
+ *
+ * We have two types of clock event devices:
+ * - global events (one device per system)
+ * - local events (one device per cpu)
+ *
+ * We assign the various time(r) related interrupts to those devices
+ *
+ * - global tick
+ * - profiling (per cpu)
+ * - next timer events (per cpu)
+ *
+ * TODO:
+ * - implement variable frequency profiling
+ */
+
+#include <linux/clockchips.h>
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/notifier.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/sysdev.h>
+#include <linux/ktimer.h>
+
+#define MAX_CLOCK_EVENTS	4
+
+struct event_descr {
+	struct clock_event *event;
+	unsigned int mode;
+	unsigned int real_caps;
+	struct irqaction action;
+};
+
+struct local_events {
+	int installed;
+	struct event_descr events[MAX_CLOCK_EVENTS];
+	struct clock_event *nextevt;
+};
+
+/* Variables related to the global event source */
+static struct event_descr global_eventsource;
+
+/* Variables related to the per cpu local event sources */
+static DEFINE_PER_CPU(struct local_events, local_eventsources);
+
+#ifdef CONFIG_SMP
+# define recalc_global_event(e) do { } while(0)
+#else
+# define recalc_global_event(c) recalc_active_event(&global_eventsource, c)
+#endif
+
+/*
+ * Math helper. Convert a latch value to ns
+ */
+unsigned long clockevent_delta2ns(unsigned long latch, struct clock_event *evt)
+{
+	u64 clc = ((u64) latch << evt->shift);
+
+	do_div(clc, evt->mult);
+	if (clc < KTIME_MONOTONIC_RES)
+		clc = KTIME_MONOTONIC_RES;
+	if (clc > 0x7FFFFFFF)
+		clc = 0x7FFFFFFF;
+
+	return (unsigned long) clc;
+}
+
+/*
+ * Generic timer interrupt handler usable for all kinds of events
+ */
+static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct clock_event *evt = dev_id;
+
+	if (evt->start_event)
+		evt->start_event(evt->priv);
+
+	evt->event_handler(regs);
+
+	if (evt->end_event)
+		evt->end_event(evt->priv);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * Handle tick
+ */
+static void handle_tick(struct pt_regs *regs)
+{
+	write_seqlock(&xtime_lock);
+	do_timer(regs);
+	write_sequnlock(&xtime_lock);
+}
+
+/*
+ * Handle tick and update
+ */
+static void handle_tick_update(struct pt_regs *regs)
+{
+	write_seqlock(&xtime_lock);
+	do_timer(regs);
+	write_sequnlock(&xtime_lock);
+
+	update_process_times(user_mode(regs));
+}
+
+/*
+ * Handle tick, update and profiling
+ */
+static void handle_tick_update_profile(struct pt_regs *regs)
+{
+	write_seqlock(&xtime_lock);
+	do_timer(regs);
+	write_sequnlock(&xtime_lock);
+
+	update_process_times(user_mode(regs));
+	profile_tick(CPU_PROFILING, regs);
+}
+
+/*
+ * Handle update
+ */
+static void handle_update(struct pt_regs *regs)
+{
+	update_process_times(user_mode(regs));
+}
+
+/*
+ * Handle update and profile
+ */
+static void handle_update_profile(struct pt_regs *regs)
+{
+	update_process_times(user_mode(regs));
+	profile_tick(CPU_PROFILING, regs);
+}
+
+/*
+ * Handle profile
+ */
+static void handle_profile(struct pt_regs *regs)
+{
+	profile_tick(CPU_PROFILING, regs);
+}
+
+/*
+ * Handle next event
+ */
+static void handle_nextevent(struct pt_regs *regs)
+{
+	ktimer_interrupt();
+}
+
+/*
+ * Handle next event, tick
+ */
+static void handle_nextevent_tick(struct pt_regs *regs)
+{
+	int res;
+
+	res = ktimer_interrupt();
+	for (; res > 0; res--)
+		handle_tick(regs);
+}
+
+/*
+ * Handle next event, update
+ */
+static void handle_nextevent_update(struct pt_regs *regs)
+{
+	if (ktimer_interrupt() > 0)
+		handle_update(regs);
+}
+
+/*
+ * Handle next event, tick, update
+ */
+static void handle_nextevent_tick_update(struct pt_regs *regs)
+{
+	int res;
+
+	if ((res = ktimer_interrupt()) == 0)
+		return;
+
+	for (; res > 0; res--)
+		handle_tick(regs);
+
+	handle_update(regs);
+}
+
+/*
+ * Handle next event, profile
+ */
+static void handle_nextevent_profile(struct pt_regs *regs)
+{
+	if (ktimer_interrupt() > 0)
+		handle_profile(regs);
+}
+
+/*
+ * Handle next event, update, profile
+ */
+static void handle_nextevent_update_profile(struct pt_regs *regs)
+{
+	if (ktimer_interrupt() > 0)
+		handle_update_profile(regs);
+}
+
+/*
+ * Handle next event, tick, update, profile
+ */
+static void handle_nextevent_all(struct pt_regs *regs)
+{
+	int res;
+
+	if ((res = ktimer_interrupt()) == 0)
+		return;
+
+	for (; res > 0; res--)
+		handle_tick(regs);
+
+	handle_update_profile(regs);
+}
+
+/*
+ * Lookup table for event assignment
+ */
+static void *event_handlers[] = {
+	NULL,				/* 0: No capability selected */
+	handle_tick,			/* 1: Tick only	*/
+	handle_nextevent,		/* 2: Next event only */
+	handle_nextevent_tick,		/* 3: Next event + tick */
+	handle_update,			/* 4: Update process times */
+	handle_tick_update,		/* 5: Tick + update process times */
+	handle_nextevent_update,	/* 6: Next event +
+					      update process times */
+	handle_nextevent_tick_update,	/* 7: Next event + tick +
+					      update process times */
+	handle_profile,			/* 8: Profiling int */
+	NULL,				/* 9: Tick + profiling */
+	handle_nextevent_profile,	/* A: Next event + profiling */
+	NULL,				/* B: Next event + tick + profiling */
+	handle_update_profile,		/* C: Update process times +
+					      profiling */
+	handle_tick_update_profile,	/* D: Tick + update process times +
+					      profiling */
+	handle_nextevent_update_profile,/* E: Next event +
+					      update process times +
+					      profiling */
+	handle_nextevent_all,		/* F: Next event + tick +
+					      update process times +
+					      profiling */
+};
+
+/*
+ * The selection model makes following assumptions:
+ *
+ * There is only one global event source set up. Global event sources
+ * are unique devices in a system (UP/SMP) Usually they are setup
+ * early in the bootup phase to provide the basic tick environment to
+ * bring up hardware. Such a device can be capable of providing all in
+ * one functionality including next event scheduling.
+ *
+ * When a system has decicated event sources which can be used for
+ * particular purposes then we assume that there are no devices setup
+ * which provide "competing" functionality. i.e. the developer has to
+ * decide which device should be used for a particular functionality
+ * rather than letting the management code guess about the best
+ * fit. The code manages the cases, where the number of event sources
+ * is unknown during compile time, but the functionality of the event
+ * source is assigned to the respective event source by a human best
+ * fit decision.
+ *
+ * The purpose of the management code is to provide handling code for
+ * the various possible combinations and the necessary infrastructure
+ * to handle next event (e.g. high resolution) scheduling with a
+ * single event source, which makes a periodic rescheduling of the
+ * tick interupt necessary. This is done to avoid the #ifdef mess all
+ * over the architecture dependend timer and event interupt code for
+ * the various possible use case combinations and allows clean non
+ * intrusive implementation of configurable extensions to the time
+ * related event system e.g. dynamic ticks, high resolution
+ * timers.
+ *
+ * Some architectures can use a NMI based profiling mechanism. If this
+ * is used, then profiling is excluded from the event assignements.
+ *
+ * SMP systems CPU which have no unique global event source should not
+ * setup a global event source. The correct way is setting up one
+ * event source (usually local to CPU0 or the bootcpu in hotplug
+ * systems) which has the CLOCK_CAP_TICK flag set, so the management
+ * code assigns exactly one tick source for the complete system.
+ *
+ * A special case are pseudo event sources (IPI mechanisms) on SMP
+ * systems. They can be used for populating tick events from one event
+ * source across multiple CPUs.
+ *
+ */
+static int setup_event(struct event_descr *descr, struct clock_event *evt,
+		       unsigned int caps, cpumask_t cpumask)
+{
+	void *handler = event_handlers[caps];
+
+	if (!handler) {
+		printk(KERN_ERR "Unsupported event source %s\n", evt->name);
+		return -EINVAL;
+	}
+
+	/* Store the event handler */
+	evt->event_handler = handler;
+
+	/* Save the event descriptor reference */
+	descr->event = evt;
+
+	if (!(evt->capabilities & CLOCK_HAS_IRQHANDLER)) {
+		descr->action.name = evt->name;
+		descr->action.handler = timer_interrupt;
+		descr->action.flags = SA_INTERRUPT;
+		descr->action.mask = cpumask;
+		descr->action.dev_id = evt;
+		setup_irq(evt->irq, &descr->action);
+	}
+
+	descr->real_caps = caps;
+	descr->mode = CLOCK_EVT_STARTUP;
+	if (evt->set_mode)
+		evt->set_mode(CLOCK_EVT_STARTUP);
+	printk(KERN_INFO "Event source %s installed with caps set: %02x\n",
+	       descr->event->name, descr->real_caps);
+
+	return 0;
+}
+
+/*
+ * Mask out the functionality which is covered by the new event source
+ * and assign a new event handler.
+ */
+static unsigned int recalc_active_event(struct event_descr *descr,
+					unsigned int caps)
+{
+	unsigned int gcaps;
+
+	if (!descr->event)
+		return caps;
+
+	/* Find out the overlapping bits */
+	gcaps = descr->real_caps & caps;
+
+	/*
+	 * Be careful here. We dont know in which order the event
+	 * sources are set up. So we might switch off a previously
+	 * registered source completely.
+	 *
+	 * Might need more thoughts though.
+	 */
+	if (gcaps == descr->real_caps) {
+		int i;
+
+		i = ffs(gcaps) - 1;
+		gcaps &= ~(1 << i);
+		caps &= ~(1 << i);
+	}
+	if (!gcaps)
+		return caps;
+
+	/* Mask the bits which are now covered by the new event */
+	descr->real_caps &= ~gcaps;
+
+	/* Assign the new event handler */
+	descr->event->event_handler = event_handlers[descr->real_caps];
+	printk(KERN_INFO "Event source %s new caps set: %02x\n" ,
+	       descr->event->name, descr->real_caps);
+
+	return caps;
+}
+
+/*
+ * Recalc the events and reassign the handlers if necessary
+ */
+static int recalc_events(struct local_events *sources, struct clock_event *evt,
+			 cpumask_t cpumask)
+{
+	unsigned int caps = evt->capabilities & CLOCK_CAP_MASK;
+	int i;
+
+	if (sources->installed == MAX_CLOCK_EVENTS)
+		return -ENOSPC;
+
+	if (!event_handlers[caps])
+		return -EINVAL;
+
+	recalc_global_event(caps);
+
+	for (i = 0; i < sources->installed; i++)
+		caps = recalc_active_event(&sources->events[i], caps);
+
+	setup_event(&sources->events[sources->installed], evt, caps, cpumask);
+	sources->installed++;
+	if (evt->capabilities & CLOCK_CAP_NEXTEVT) {
+		sources->nextevt = evt;
+		ktimer_clock_notify();
+	}
+
+	return 0;
+}
+
+/**
+ * setup_local_clockevent - Set up a cpu local clock event device
+ *
+ * @evtdev:	event device to be registered
+ * @cpumask:	cpumask for the irq setup
+ */
+int setup_local_clockevent(struct clock_event *evtdev, cpumask_t cpumask)
+{
+	struct local_events *sources = &__get_cpu_var(local_eventsources);
+	unsigned long flags;
+	int res;
+
+	/* Recalc event sources and maybe reassign interrupts */
+	local_irq_save(flags);
+	res = recalc_events(sources, evtdev, cpumask);
+	local_irq_restore(flags);
+
+	return res;
+}
+EXPORT_SYMBOL(setup_local_clockevent);
+
+/**
+ * set_global_clockevent - Set the device which generates global clock events
+ *
+ * @evt:	The device which generates global clock events (ticks)
+ */
+int __init setup_global_clockevent(struct clock_event *evt, cpumask_t cpumask)
+{
+	int res;
+
+	res = setup_event(&global_eventsource, evt,
+			   evt->capabilities & CLOCK_CAP_MASK, cpumask);
+#ifndef CONFIG_SMP
+	/*
+	 * The "global" event source on UP systems can serve as
+	 * next event source !
+	 */
+	if (!res && (evt->capabilities & CLOCK_CAP_NEXTEVT))
+		per_cpu(local_eventsources, 0).nextevt = evt;
+#endif
+	return res;
+}
+
+/**
+ * clockevents_next_event_available - Check for a installed next event source
+ */
+int clockevents_next_event_available(void)
+{
+	struct local_events *sources = &__get_cpu_var(local_eventsources);
+        int i;
+
+	if (!sources->nextevt)
+		return 0;
+
+#ifndef CONFIG_SMP
+	if (sources->nextevt == global_eventsource.event)
+		return CLOCK_EVT_SCHEDTICK;
+#endif
+	/*
+	 * Check, whether the next event source is solely for next events or
+	 * it has to do some periodic tick functionality
+	 * We use the real_caps field here, as some other source might
+	 * have switched off one of the capability flags.
+	 */
+	for (i = 0; i < sources->installed; i++) {
+                if (sources->nextevt != sources->events[i].event)
+                        continue;
+
+	        if (sources->events[i].real_caps & ~CLOCK_CAP_NEXTEVT)
+        		return CLOCK_EVT_SCHEDTICK;
+		return CLOCK_EVT_NOTICK;
+        }
+	return CLOCK_EVT_NOTICK;
+}
+
+int clockevents_init_next_event(void)
+{
+	struct local_events *sources = &__get_cpu_var(local_eventsources);
+
+	if (!sources->nextevt)
+		return 0;
+
+	if (sources->nextevt->set_mode)
+		sources->nextevt->set_mode(CLOCK_EVT_ONESHOT);
+
+	return 1;
+}
+
+int clockevents_set_next_event(ktime_t expires, ktime_t now)
+{
+	struct local_events *sources = &__get_cpu_var(local_eventsources);
+	nsec_t delta = ktime_to_ns(ktime_sub(expires, now));
+	unsigned long clc;
+
+	if (delta <= 0)
+		return -ETIME;
+	if (delta > sources->nextevt->max_delta_ns)
+		delta = sources->nextevt->max_delta_ns;
+	if (delta < sources->nextevt->min_delta_ns)
+		delta = sources->nextevt->min_delta_ns;
+
+	clc = mpy_sc32((unsigned long) delta, sources->nextevt->mult);
+	sources->nextevt->set_next_event(clc);
+	return 0;
+}
+
+void clockevents_trigger_next_event(void)
+{
+}
+
+#ifdef CONFIG_PM
+static int
+global_eventsource_suspend(struct sys_device *dev, pm_message_t state)
+{
+	/* Do generic stuff here */
+	if (global_eventsource.event->suspend)
+		global_eventsource.event->suspend();
+	return 0;
+}
+
+static int global_eventsource_resume(struct sys_device *dev)
+{
+	/* Do generic stuff here */
+	if (global_eventsource.event->resume)
+		global_eventsource.event->resume();
+	return 0;
+}
+#else
+# define global_eventsource_resume	NULL
+# define global_eventsource_suspend	NULL
+#endif
+
+static struct sysdev_class global_clock_event_sysclass = {
+	.resume = global_eventsource_resume,
+	.suspend = global_eventsource_suspend,
+	set_kset_name("global_clock_event"),
+};
+
+static struct sys_device device_global_clock_event = {
+	.id	= 0,
+	.cls	= &global_clock_event_sysclass,
+};
+
+static int __init global_clock_event_devinit(void)
+{
+	int error = sysdev_class_register(&global_clock_event_sysclass);
+
+	if (!error)
+		error = sysdev_register(&device_global_clock_event);
+
+	return error;
+}
+
+device_initcall(global_clock_event_devinit);
+
+/*
+ * Functions related to initialization
+ */
+static void __devinit init_clockevents_cpu(int cpu)
+{
+}
+
+static int __devinit clockevents_cpu_notify(struct notifier_block *self,
+					    unsigned long action, void *hcpu)
+{
+	long cpu = (long)hcpu;
+
+	switch(action) {
+	case CPU_UP_PREPARE:
+		init_clockevents_cpu(cpu);
+		break;
+#ifdef CONFIG_HOTPLUG_CPU
+	case CPU_DEAD:
+		break;
+#endif
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __devinitdata clockevents_nb = {
+	.notifier_call	= clockevents_cpu_notify,
+};
+
+void __init init_clockevents(void)
+{
+	clockevents_cpu_notify(&clockevents_nb, (unsigned long)CPU_UP_PREPARE,
+				(void *)(long)smp_processor_id());
+	register_cpu_notifier(&clockevents_nb);
+}
Index: linux-2.6.14/arch/i386/kernel/apic.c
===================================================================
--- linux-2.6.14.orig/arch/i386/kernel/apic.c
+++ linux-2.6.14/arch/i386/kernel/apic.c
@@ -26,6 +26,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/sysdev.h>
 #include <linux/cpu.h>
+#include <linux/clockchips.h>
 
 #include <asm/atomic.h>
 #include <asm/smp.h>
@@ -50,6 +51,23 @@ int enable_local_apic __initdata = 0; /*
  */
 int apic_verbosity;
 
+static unsigned int calibration_result;
+
+static void lapic_next_event(unsigned long evt);
+static void lapic_timer_setup(int mode);
+
+static struct clock_event lapic_clockevent = {
+	.name = "lapic",
+	.capabilities = CLOCK_CAP_NEXTEVT | CLOCK_CAP_PROFILE |
+			CLOCK_HAS_IRQHANDLER
+#ifdef CONFIG_SMP
+			| CLOCK_CAP_UPDATE
+#endif
+	,
+	.shift = 32,
+	.set_mode = lapic_timer_setup,
+	.set_next_event = lapic_next_event,
+};
 
 static void apic_pm_activate(void);
 
@@ -92,10 +110,6 @@ void __init apic_intr_init(void)
 /* Using APIC to generate smp_local_timer_interrupt? */
 int using_apic_timer = 0;
 
-static DEFINE_PER_CPU(int, prof_multiplier) = 1;
-static DEFINE_PER_CPU(int, prof_old_multiplier) = 1;
-static DEFINE_PER_CPU(int, prof_counter) = 1;
-
 static int enabled_via_apicbase;
 
 void enable_NMI_through_LVT0 (void * dummy)
@@ -869,6 +883,11 @@ fake_ioapic_page:
  */
 
 /*
+ * FIXME: Move this to i8253.h. There is no need to keep the access to
+ * the PIT scattered all around the place -tglx
+ */
+
+/*
  * The timer chip is already set up at HZ interrupts per second here,
  * but we do not accept timer interrupts yet. We only allow the BP
  * to calibrate.
@@ -926,12 +945,16 @@ void (*wait_timer_tick)(void) __devinitd
 
 #define APIC_DIVISOR 16
 
-static void __setup_APIC_LVTT(unsigned int clocks)
+static void __setup_APIC_LVTT(unsigned int clocks, int oneshot)
 {
 	unsigned int lvtt_value, tmp_value, ver;
 
 	ver = GET_APIC_VERSION(apic_read(APIC_LVR));
-	lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
+
+	lvtt_value = LOCAL_TIMER_VECTOR;
+	if (!oneshot)
+		lvtt_value |= APIC_LVT_TIMER_PERIODIC;
+
 	if (!APIC_INTEGRATED(ver))
 		lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
 	apic_write_around(APIC_LVTT, lvtt_value);
@@ -944,31 +967,37 @@ static void __setup_APIC_LVTT(unsigned i
 				& ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
 				| APIC_TDR_DIV_16);
 
-	apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
+	if (!oneshot)
+		apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
 }
 
-static void __devinit setup_APIC_timer(unsigned int clocks)
+static void lapic_next_event(unsigned long evt)
+{
+	apic_write_around(APIC_TMICT, evt);
+}
+
+static void lapic_timer_setup(int mode)
 {
 	unsigned long flags;
 
 	local_irq_save(flags);
-
-	/*
-	 * Wait for IRQ0's slice:
-	 */
-	wait_timer_tick();
-
-	__setup_APIC_LVTT(clocks);
-
+	__setup_APIC_LVTT(calibration_result, mode == CLOCK_EVT_ONESHOT);
 	local_irq_restore(flags);
 }
 
+static void __devinit setup_APIC_timer(void)
+{
+	setup_local_clockevent(&lapic_clockevent, CPU_MASK_NONE);
+}
+
 /*
  * In this function we calibrate APIC bus clocks to the external
  * timer. Unfortunately we cannot use jiffies and the timer irq
  * to calibrate, since some later bootup code depends on getting
  * the first irq? Ugh.
  *
+ * TODO: Fix this rather than saying "Ugh" -tglx
+ *
  * We want to do the calibration only once since we
  * want to have local timer irqs syncron. CPUs connected
  * by the same APIC bus have the very same bus frequency.
@@ -991,7 +1020,7 @@ static int __init calibrate_APIC_clock(v
 	 * value into the APIC clock, we just want to get the
 	 * counter running for calibration.
 	 */
-	__setup_APIC_LVTT(1000000000);
+	__setup_APIC_LVTT(1000000000, 0);
 
 	/*
 	 * The timer chip counts down to zero. Let's wait
@@ -1028,6 +1057,13 @@ static int __init calibrate_APIC_clock(v
 
 	result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
 
+	/* Calculate the scaled math multiplication factor */
+	lapic_clockevent.mult = div_sc32(tt1-tt2, TICK_NSEC * LOOPS);
+	lapic_clockevent.max_delta_ns =
+		clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
+	lapic_clockevent.min_delta_ns =
+		clockevent_delta2ns(0xF, &lapic_clockevent);
+
 	if (cpu_has_tsc)
 		apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
 			"%ld.%04ld MHz.\n",
@@ -1042,8 +1078,6 @@ static int __init calibrate_APIC_clock(v
 	return result;
 }
 
-static unsigned int calibration_result;
-
 void __init setup_boot_APIC_clock(void)
 {
 	apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n");
@@ -1055,14 +1089,14 @@ void __init setup_boot_APIC_clock(void)
 	/*
 	 * Now set up the timer for real.
 	 */
-	setup_APIC_timer(calibration_result);
+	setup_APIC_timer();
 
 	local_irq_enable();
 }
 
 void __devinit setup_secondary_APIC_clock(void)
 {
-	setup_APIC_timer(calibration_result);
+	setup_APIC_timer();
 }
 
 void __devinit disable_APIC_timer(void)
@@ -1085,6 +1119,8 @@ void enable_APIC_timer(void)
 	}
 }
 
+static DEFINE_PER_CPU(int, prof_multiplier) = 1;
+
 /*
  * the frequency of the profiling timer can be changed
  * by writing a multiplier value into /proc/profile.
@@ -1112,60 +1148,6 @@ int setup_profiling_timer(unsigned int m
 
 	return 0;
 }
-
-#undef APIC_DIVISOR
-
-/*
- * Local timer interrupt handler. It does both profiling and
- * process statistics/rescheduling.
- *
- * We do profiling in every local tick, statistics/rescheduling
- * happen only every 'profiling multiplier' ticks. The default
- * multiplier is 1 and it can be changed by writing the new multiplier
- * value into /proc/profile.
- */
-
-inline void smp_local_timer_interrupt(struct pt_regs * regs)
-{
-	int cpu = smp_processor_id();
-
-	profile_tick(CPU_PROFILING, regs);
-	if (--per_cpu(prof_counter, cpu) <= 0) {
-		/*
-		 * The multiplier may have changed since the last time we got
-		 * to this point as a result of the user writing to
-		 * /proc/profile. In this case we need to adjust the APIC
-		 * timer accordingly.
-		 *
-		 * Interrupts are already masked off at this point.
-		 */
-		per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu);
-		if (per_cpu(prof_counter, cpu) !=
-					per_cpu(prof_old_multiplier, cpu)) {
-			__setup_APIC_LVTT(
-					calibration_result/
-					per_cpu(prof_counter, cpu));
-			per_cpu(prof_old_multiplier, cpu) =
-						per_cpu(prof_counter, cpu);
-		}
-
-#ifdef CONFIG_SMP
-		update_process_times(user_mode_vm(regs));
-#endif
-	}
-
-	/*
-	 * We take the 'long' return path, and there every subsystem
-	 * grabs the apropriate locks (kernel lock/ irq lock).
-	 *
-	 * we might want to decouple profiling from the 'long path',
-	 * and do the profiling totally in assembly.
-	 *
-	 * Currently this isn't too much of an issue (performance wise),
-	 * we can take more than 100K local irqs per second on a 100 MHz P5.
-	 */
-}
-
 /*
  * Local APIC timer interrupt. This is the most natural way for doing
  * local interrupts, but local timer interrupts can be emulated by
@@ -1195,7 +1177,8 @@ fastcall void smp_apic_timer_interrupt(s
 	 * interrupt lock, which is the WrongThing (tm) to do.
 	 */
 	irq_enter();
-	smp_local_timer_interrupt(regs);
+	if (lapic_clockevent.event_handler)
+		lapic_clockevent.event_handler(regs);
 	irq_exit();
 }
 
Index: linux-2.6.14/arch/i386/kernel/i8259.c
===================================================================
--- linux-2.6.14.orig/arch/i386/kernel/i8259.c
+++ linux-2.6.14/arch/i386/kernel/i8259.c
@@ -422,12 +422,6 @@ void __init init_IRQ(void)
 	intr_init_hook();
 
 	/*
-	 * Set the clock to HZ Hz, we already have a valid
-	 * vector now:
-	 */
-	setup_pit_timer();
-
-	/*
 	 * External FPU? Set up irq13 if so, for
 	 * original braindamaged IBM FERR coupling.
 	 */
Index: linux-2.6.14/arch/i386/kernel/io_apic.c
===================================================================
--- linux-2.6.14.orig/arch/i386/kernel/io_apic.c
+++ linux-2.6.14/arch/i386/kernel/io_apic.c
@@ -87,6 +87,25 @@ int vector_irq[NR_VECTORS] __read_mostly
 #define vector_to_irq(vector)	(vector)
 #endif
 
+static int timer_ack;
+
+void io_apic_timer_ack(void *priv)
+{
+	if (timer_ack) {
+		/*
+		 * Subtle, when I/O APICs are used we have to ack timer IRQ
+		 * manually to reset the IRR bit for do_slow_gettimeoffset().
+		 * This will also deassert NMI lines for the watchdog if run
+		 * on an 82489DX-based system.
+		 */
+		spin_lock(&i8259A_lock);
+		outb(0x0c, PIC_MASTER_OCW3);
+		/* Ack the IRQ; AEOI will end it automatically. */
+		inb(PIC_MASTER_POLL);
+		spin_unlock(&i8259A_lock);
+	}
+}
+
 /*
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
  * shared ISA-space IRQs, so we have to support them. We are super
Index: linux-2.6.14/arch/i386/kernel/mca.c
===================================================================
--- linux-2.6.14.orig/arch/i386/kernel/mca.c
+++ linux-2.6.14/arch/i386/kernel/mca.c
@@ -472,3 +472,22 @@ void mca_handle_nmi(void)
 
 	mca_nmi_hook();
 } /* mca_handle_nmi */
+
+void mca_timer_ack(void *priv)
+{
+	int irq;
+
+	if (MCA_bus) {
+		/* The PS/2 uses level-triggered interrupts.  You can't
+		turn them off, nor would you want to (any attempt to
+		enable edge-triggered interrupts usually gets intercepted by a
+		special hardware circuit).  Hence we have to acknowledge
+		the timer interrupt.  Through some incredibly stupid
+		design idea, the reset for IRQ 0 is done by setting the
+		high bit of the PPI port B (0x61).  Note that some PS/2s,
+		notably the 55SX, work fine if this is removed.  */
+
+		irq = inb_p( 0x61 );	/* read the current state */
+		outb_p( irq|0x80, 0x61 );	/* reset the IRQ */
+	}
+}
Index: linux-2.6.14/arch/i386/mach-default/setup.c
===================================================================
--- linux-2.6.14.orig/arch/i386/mach-default/setup.c
+++ linux-2.6.14/arch/i386/mach-default/setup.c
@@ -78,8 +78,6 @@ void __init trap_init_hook(void)
 {
 }
 
-static struct irqaction irq0  = { timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL};
-
 /**
  * time_init_hook - do any specific initialisations for the system timer.
  *
@@ -89,7 +87,6 @@ static struct irqaction irq0  = { timer_
  **/
 void __init time_init_hook(void)
 {
-	setup_irq(0, &irq0);
 }
 
 #ifdef CONFIG_MCA
Index: linux-2.6.14/include/asm-i386/arch_hooks.h
===================================================================
--- linux-2.6.14.orig/include/asm-i386/arch_hooks.h
+++ linux-2.6.14/include/asm-i386/arch_hooks.h
@@ -14,7 +14,6 @@
 extern void init_ISA_irqs(void);
 extern void apic_intr_init(void);
 extern void smp_intr_init(void);
-extern irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs);
 
 /* these are the defined hooks */
 extern void intr_init_hook(void);
Index: linux-2.6.14/include/asm-i386/io_apic.h
===================================================================
--- linux-2.6.14.orig/include/asm-i386/io_apic.h
+++ linux-2.6.14/include/asm-i386/io_apic.h
@@ -204,8 +204,11 @@ extern int io_apic_set_pci_routing (int 
 
 extern int (*ioapic_renumber_irq)(int ioapic, int irq);
 
+extern void io_apic_timer_ack(void *);
+
 #else  /* !CONFIG_X86_IO_APIC */
 #define io_apic_assign_pci_irqs 0
+#define io_apic_timer_ack NULL
 #endif
 
 extern int assign_irq_vector(int irq);
Index: linux-2.6.14/include/asm-i386/mach-default/do_timer.h
===================================================================
--- linux-2.6.14.orig/include/asm-i386/mach-default/do_timer.h
+++ linux-2.6.14/include/asm-i386/mach-default/do_timer.h
@@ -1,86 +1,2 @@
 /* defines for inline arch setup functions */
 
-#include <asm/apic.h>
-#include <asm/i8259.h>
-
-/**
- * do_timer_interrupt_hook - hook into timer tick
- * @regs:	standard registers from interrupt
- *
- * Description:
- *	This hook is called immediately after the timer interrupt is ack'd.
- *	It's primary purpose is to allow architectures that don't possess
- *	individual per CPU clocks (like the CPU APICs supply) to broadcast the
- *	timer interrupt as a means of triggering reschedules etc.
- **/
-
-static inline void do_timer_interrupt_hook(struct pt_regs *regs)
-{
-	do_timer(regs);
-#ifndef CONFIG_SMP
-	update_process_times(user_mode(regs));
-#endif
-/*
- * In the SMP case we use the local APIC timer interrupt to do the
- * profiling, except when we simulate SMP mode on a uniprocessor
- * system, in that case we have to call the local interrupt handler.
- */
-#ifndef CONFIG_X86_LOCAL_APIC
-	profile_tick(CPU_PROFILING, regs);
-#else
-	if (!using_apic_timer)
-		smp_local_timer_interrupt(regs);
-#endif
-}
-
-
-/* you can safely undefine this if you don't have the Neptune chipset */
-
-#define BUGGY_NEPTUN_TIMER
-
-/**
- * do_timer_overflow - process a detected timer overflow condition
- * @count:	hardware timer interrupt count on overflow
- *
- * Description:
- *	This call is invoked when the jiffies count has not incremented but
- *	the hardware timer interrupt has.  It means that a timer tick interrupt
- *	came along while the previous one was pending, thus a tick was missed
- **/
-static inline int do_timer_overflow(int count)
-{
-	int i;
-
-	spin_lock(&i8259A_lock);
-	/*
-	 * This is tricky when I/O APICs are used;
-	 * see do_timer_interrupt().
-	 */
-	i = inb(0x20);
-	spin_unlock(&i8259A_lock);
-	
-	/* assumption about timer being IRQ0 */
-	if (i & 0x01) {
-		/*
-		 * We cannot detect lost timer interrupts ... 
-		 * well, that's why we call them lost, don't we? :)
-		 * [hmm, on the Pentium and Alpha we can ... sort of]
-		 */
-		count -= LATCH;
-	} else {
-#ifdef BUGGY_NEPTUN_TIMER
-		/*
-		 * for the Neptun bug we know that the 'latch'
-		 * command doesn't latch the high and low value
-		 * of the counter atomically. Thus we have to 
-		 * substract 256 from the counter 
-		 * ... funny, isnt it? :)
-		 */
-		
-		count -= 256;
-#else
-		printk("do_slow_gettimeoffset(): hardware timer problem?\n");
-#endif
-	}
-	return count;
-}
Index: linux-2.6.14/include/linux/mca.h
===================================================================
--- linux-2.6.14.orig/include/linux/mca.h
+++ linux-2.6.14/include/linux/mca.h
@@ -12,8 +12,10 @@
 #include <asm/mca.h>
 
 extern int MCA_bus;
+extern void mca_timer_ack(void *);
 #else
 #define MCA_bus 0
+#define mca_timer_ack NULL
 #endif
 
 /* This sets up an information callback for /proc/mca/slot?.  The
Index: linux-2.6.14/arch/x86_64/kernel/i8259.c
===================================================================
--- linux-2.6.14.orig/arch/x86_64/kernel/i8259.c
+++ linux-2.6.14/arch/x86_64/kernel/i8259.c
@@ -515,7 +515,7 @@ void i8254_timer_resume(void)
 }
 
 static struct sysdev_class timer_sysclass = {
-	set_kset_name("timer"),
+	set_kset_name("timer_pit"),
 	.resume		= timer_resume,
 };
 
Index: linux-2.6.14/include/linux/interrupt.h
===================================================================
--- linux-2.6.14.orig/include/linux/interrupt.h
+++ linux-2.6.14/include/linux/interrupt.h
@@ -112,7 +112,10 @@ enum
 	NET_TX_SOFTIRQ,
 	NET_RX_SOFTIRQ,
 	SCSI_SOFTIRQ,
-	TASKLET_SOFTIRQ
+	TASKLET_SOFTIRQ,
+#ifdef CONFIG_HIGH_RES_TIMERS
+	KTIMER_SOFTIRQ,
+#endif
 };
 
 /* softirq mask and active fields moved to irq_cpustat_t in
Index: linux-2.6.14/Makefile
===================================================================
--- linux-2.6.14.orig/Makefile
+++ linux-2.6.14/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 14
-EXTRAVERSION =
+EXTRAVERSION =-kthrt1
 NAME=Affluent Albatross
 
 # *DOCUMENTATION*