Index: linux/Documentation/kernel-parameters.txt
===================================================================
--- linux.orig/Documentation/kernel-parameters.txt
+++ linux/Documentation/kernel-parameters.txt
@@ -1637,6 +1637,12 @@ running once the system is up.
time Show timing data prefixed to each printk message line
+ timeout_granularity=
+ [KNL]
+ Timeout granularity: process timer wheel timers every
+ timeout_granularity jiffies. Defaults to 1 (process
+ timers HZ times per second - most finegrained).
+
clocksource= [GENERIC_TIME] Override the default clocksource
Override the default clocksource and use the clocksource
with the name specified.
Index: linux/Makefile
===================================================================
--- linux.orig/Makefile
+++ linux/Makefile
@@ -1,7 +1,7 @@
VERSION = 2
PATCHLEVEL = 6
SUBLEVEL = 18
-EXTRAVERSION =
+EXTRAVERSION = -rt2
NAME=Avast! A bilge rat!
# *DOCUMENTATION*
@@ -485,10 +485,14 @@ endif
include $(srctree)/arch/$(ARCH)/Makefile
-ifdef CONFIG_FRAME_POINTER
-CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,)
+ifdef CONFIG_MCOUNT
+CFLAGS += -pg -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,)
else
-CFLAGS += -fomit-frame-pointer
+ ifdef CONFIG_FRAME_POINTER
+ CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,)
+ else
+ CFLAGS += -fomit-frame-pointer
+ endif
endif
ifdef CONFIG_UNWIND_INFO
Index: linux/arch/arm/Kconfig
===================================================================
--- linux.orig/arch/arm/Kconfig
+++ linux/arch/arm/Kconfig
@@ -17,6 +17,10 @@ config ARM
Europe. There is an ARM Linux project with a web page at
.
+config GENERIC_TIME
+ bool
+ default y
+
config MMU
bool
default y
@@ -51,6 +55,18 @@ config GENERIC_HARDIRQS
bool
default y
+config STACKTRACE_SUPPORT
+ bool
+ default y
+
+config LOCKDEP_SUPPORT
+ bool
+ default y
+
+config TRACE_IRQFLAGS_SUPPORT
+ bool
+ default y
+
config HARDIRQS_SW_RESEND
bool
default y
@@ -344,6 +360,15 @@ source "arch/arm/mach-at91rm9200/Kconfig
source "arch/arm/mach-netx/Kconfig"
+config IS_TICK_BASED
+ bool
+ depends on GENERIC_TIME
+ default y
+ help
+ This is used on platforms that have not added a clocksource to
+ support GENERIC_TIME. Platforms which have a clocksource
+ should set this to 'n' in their mach-*/Kconfig.
+
# Definitions to make life easier
config ARCH_ACORN
bool
@@ -419,6 +444,8 @@ endmenu
menu "Kernel Features"
+source "kernel/time/Kconfig"
+
config SMP
bool "Symmetric Multi-Processing (EXPERIMENTAL)"
depends on EXPERIMENTAL && REALVIEW_MPCORE
@@ -463,38 +490,7 @@ config LOCAL_TIMERS
accounting to be spread across the timer interval, preventing a
"thundering herd" at every timer tick.
-config PREEMPT
- bool "Preemptible Kernel (EXPERIMENTAL)"
- depends on EXPERIMENTAL
- help
- This option reduces the latency of the kernel when reacting to
- real-time or interactive events by allowing a low priority process to
- be preempted even if it is in kernel mode executing a system call.
- This allows applications to run more reliably even when the system is
- under load.
-
- Say Y here if you are building a kernel for a desktop, embedded
- or real-time system. Say N if you are unsure.
-
-config NO_IDLE_HZ
- bool "Dynamic tick timer"
- help
- Select this option if you want to disable continuous timer ticks
- and have them programmed to occur as required. This option saves
- power as the system can remain in idle state for longer.
-
- By default dynamic tick is disabled during the boot, and can be
- manually enabled with:
-
- echo 1 > /sys/devices/system/timer/timer0/dyn_tick
-
- Alternatively, if you want dynamic tick automatically enabled
- during boot, pass "dyntick=enable" via the kernel command string.
-
- Please note that dynamic tick may affect the accuracy of
- timekeeping on some platforms depending on the implementation.
- Currently at least OMAP, PXA2xx and SA11x0 platforms are known
- to have accurate timekeeping with dynamic tick.
+source kernel/Kconfig.preempt
config HZ
int
Index: linux/arch/arm/boot/compressed/head.S
===================================================================
--- linux.orig/arch/arm/boot/compressed/head.S
+++ linux/arch/arm/boot/compressed/head.S
@@ -822,6 +822,19 @@ memdump: mov r12, r0
mov pc, r10
#endif
+#ifdef CONFIG_MCOUNT
+/* CONFIG_MCOUNT causes boot header to be built with -pg requiring this
+ * trampoline
+ */
+ .text
+ .align 0
+ .type mcount %function
+ .global mcount
+mcount:
+ mov pc, lr @ just return
+#endif
+
+
reloc_end:
.align
Index: linux/arch/arm/common/time-acorn.c
===================================================================
--- linux.orig/arch/arm/common/time-acorn.c
+++ linux/arch/arm/common/time-acorn.c
@@ -77,7 +77,7 @@ ioc_timer_interrupt(int irq, void *dev_i
static struct irqaction ioc_timer_irq = {
.name = "timer",
- .flags = IRQF_DISABLED,
+ .flags = IRQF_DISABLED | IRQF_NODELAY,
.handler = ioc_timer_interrupt
};
Index: linux/arch/arm/kernel/dma.c
===================================================================
--- linux.orig/arch/arm/kernel/dma.c
+++ linux/arch/arm/kernel/dma.c
@@ -20,7 +20,7 @@
#include
-DEFINE_SPINLOCK(dma_spin_lock);
+DEFINE_RAW_SPINLOCK(dma_spin_lock);
EXPORT_SYMBOL(dma_spin_lock);
static dma_t dma_chan[MAX_DMA_CHANNELS];
Index: linux/arch/arm/kernel/entry-armv.S
===================================================================
--- linux.orig/arch/arm/kernel/entry-armv.S
+++ linux/arch/arm/kernel/entry-armv.S
@@ -191,6 +191,9 @@ __dabt_svc:
__irq_svc:
svc_entry
+#ifdef CONFIG_TRACE_IRQFLAGS
+ bl trace_hardirqs_off
+#endif
#ifdef CONFIG_PREEMPT
get_thread_info tsk
ldr r8, [tsk, #TI_PREEMPT] @ get preempt count
@@ -201,7 +204,7 @@ __irq_svc:
irq_handler
#ifdef CONFIG_PREEMPT
ldr r0, [tsk, #TI_FLAGS] @ get flags
- tst r0, #_TIF_NEED_RESCHED
+ tst r0, #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED
blne svc_preempt
preempt_return:
ldr r0, [tsk, #TI_PREEMPT] @ read preempt value
@@ -211,6 +214,10 @@ preempt_return:
#endif
ldr r0, [sp, #S_PSR] @ irqs are already disabled
msr spsr_cxsf, r0
+#ifdef CONFIG_TRACE_IRQFLAGS
+ tst r0, #PSR_I_BIT
+ bleq trace_hardirqs_on
+#endif
ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr
.ltorg
@@ -228,7 +235,7 @@ svc_preempt:
str r7, [tsk, #TI_PREEMPT] @ expects preempt_count == 0
1: bl preempt_schedule_irq @ irq en/disable is done inside
ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
- tst r0, #_TIF_NEED_RESCHED
+ tst r0, #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED
beq preempt_return @ go again
b 1b
#endif
@@ -398,6 +405,9 @@ __dabt_usr:
__irq_usr:
usr_entry
+#ifdef CONFIG_TRACE_IRQFLAGS
+ bl trace_hardirqs_off
+#endif
get_thread_info tsk
#ifdef CONFIG_PREEMPT
ldr r8, [tsk, #TI_PREEMPT] @ get preempt count
@@ -412,6 +422,9 @@ __irq_usr:
teq r0, r7
strne r0, [r0, -r0]
#endif
+#ifdef CONFIG_TRACE_IRQFLAGS
+ bl trace_hardirqs_on
+#endif
mov why, #0
b ret_to_user
Index: linux/arch/arm/kernel/entry-common.S
===================================================================
--- linux.orig/arch/arm/kernel/entry-common.S
+++ linux/arch/arm/kernel/entry-common.S
@@ -3,6 +3,8 @@
*
* Copyright (C) 2000 Russell King
*
+ * LATENCY_TRACE/mcount support (C) 2005 Timesys john.cooper@timesys.com
+ *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
@@ -40,7 +42,7 @@ ret_fast_syscall:
fast_work_pending:
str r0, [sp, #S_R0+S_OFF]! @ returned r0
work_pending:
- tst r1, #_TIF_NEED_RESCHED
+ tst r1, #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED
bne work_resched
tst r1, #_TIF_NOTIFY_RESUME | _TIF_SIGPENDING
beq no_work_pending
@@ -50,7 +52,8 @@ work_pending:
b ret_slow_syscall @ Check work again
work_resched:
- bl schedule
+ bl __schedule
+
/*
* "slow" syscall return path. "why" tells us if this was a real syscall.
*/
@@ -387,6 +390,112 @@ ENTRY(sys_oabi_call_table)
#include "calls.S"
#undef ABI
#undef OBSOLETE
+#endif
+
+#ifdef CONFIG_FRAME_POINTER
+
+#ifdef CONFIG_MCOUNT
+/*
+ * At the point where we are in mcount() we maintain the
+ * frame of the prologue code and keep the call to mcount()
+ * out of the stack frame list:
+
+ saved pc <---\ caller of instrumented routine
+ saved lr |
+ ip/prev_sp |
+ fp -----^ |
+ : |
+ |
+ -> saved pc | instrumented routine
+ | saved lr |
+ | ip/prev_sp |
+ | fp ---------/
+ | :
+ |
+ | mcount
+ | saved pc
+ | saved lr
+ | ip/prev sp
+ -- fp
+ r3
+ r2
+ r1
+ sp-> r0
+ :
+ */
+
+ .text
+ .align 0
+ .type mcount %function
+ .global mcount
+
+/* gcc -pg generated FUNCTION_PROLOGUE references mcount()
+ * and has already created the stack frame invocation for
+ * the routine we have been called to instrument. We create
+ * a complete frame nevertheless, as we want to use the same
+ * call to mcount() from c code.
+ */
+mcount:
+
+ ldr ip, =mcount_enabled @ leave early, if disabled
+ ldr ip, [ip]
+ cmp ip, #0
+ moveq pc, lr
+
+ mov ip, sp
+ stmdb sp!, {r0 - r3, fp, ip, lr, pc} @ create stack frame
+
+ ldr r1, [fp, #-4] @ get lr (the return address
+ @ of the caller of the
+ @ instrumented function)
+ mov r0, lr @ get lr - (the return address
+ @ of the instrumented function)
+
+ sub fp, ip, #4 @ point fp at this frame
+
+ bl __trace
+1:
+ ldmdb fp, {r0 - r3, fp, sp, pc} @ pop entry frame and return
+
+#endif
+
+/* ARM replacement for unsupported gcc __builtin_return_address(n)
+ * where 0 < n. n == 0 is supported here as well.
+ *
+ * Walk up the stack frame until the desired frame is found or a NULL
+ * fp is encountered, return NULL in the latter case.
+ *
+ * Note: it is possible under code optimization for the stack invocation
+ * of an ancestor function (level N) to be removed before calling a
+ * descendant function (level N+1). No easy means is available to deduce
+ * this scenario with the result being [for example] caller_addr(0) when
+ * called from level N+1 returning level N-1 rather than the expected
+ * level N. This optimization issue appears isolated to the case of
+ * a call to a level N+1 routine made at the tail end of a level N
+ * routine -- the level N frame is deleted and a simple branch is made
+ * to the level N+1 routine.
+ */
+
+ .text
+ .align 0
+ .type arm_return_addr %function
+ .global arm_return_addr
+
+arm_return_addr:
+ mov ip, r0
+ mov r0, fp
+3:
+ cmp r0, #0
+ beq 1f @ frame list hit end, bail
+ cmp ip, #0
+ beq 2f @ reached desired frame
+ ldr r0, [r0, #-12] @ else continue, get next fp
+ sub ip, ip, #1
+ b 3b
+2:
+ ldr r0, [r0, #-4] @ get target return address
+1:
+ mov pc, lr
#endif
Index: linux/arch/arm/kernel/fiq.c
===================================================================
--- linux.orig/arch/arm/kernel/fiq.c
+++ linux/arch/arm/kernel/fiq.c
@@ -89,7 +89,7 @@ void set_fiq_handler(void *start, unsign
* disable irqs for the duration. Note - these functions are almost
* entirely coded in assembly.
*/
-void __attribute__((naked)) set_fiq_regs(struct pt_regs *regs)
+void notrace __attribute__((naked)) set_fiq_regs(struct pt_regs *regs)
{
register unsigned long tmp;
asm volatile (
@@ -107,7 +107,7 @@ void __attribute__((naked)) set_fiq_regs
: "r" (®s->ARM_r8), "I" (PSR_I_BIT | PSR_F_BIT | FIQ_MODE));
}
-void __attribute__((naked)) get_fiq_regs(struct pt_regs *regs)
+void notrace __attribute__((naked)) get_fiq_regs(struct pt_regs *regs)
{
register unsigned long tmp;
asm volatile (
Index: linux/arch/arm/kernel/irq.c
===================================================================
--- linux.orig/arch/arm/kernel/irq.c
+++ linux/arch/arm/kernel/irq.c
@@ -101,7 +101,7 @@ unlock:
/* Handle bad interrupts */
static struct irq_desc bad_irq_desc = {
.handle_irq = handle_bad_irq,
- .lock = SPIN_LOCK_UNLOCKED
+ .lock = RAW_SPIN_LOCK_UNLOCKED(bad_irq_desc.lock)
};
/*
@@ -109,10 +109,12 @@ static struct irq_desc bad_irq_desc = {
* come via this function. Instead, they should provide their
* own 'handler'
*/
-asmlinkage void asm_do_IRQ(unsigned int irq, struct pt_regs *regs)
+asmlinkage notrace void asm_do_IRQ(unsigned int irq, struct pt_regs *regs)
{
struct irqdesc *desc = irq_desc + irq;
+ trace_special(instruction_pointer(regs), irq, 0);
+
/*
* Some hardware gives randomly wrong interrupts. Rather
* than crashing, do something sensible.
Index: linux/arch/arm/kernel/process.c
===================================================================
--- linux.orig/arch/arm/kernel/process.c
+++ linux/arch/arm/kernel/process.c
@@ -123,7 +123,7 @@ static void default_idle(void)
cpu_relax();
else {
local_irq_disable();
- if (!need_resched()) {
+ if (!need_resched() && !need_resched_delayed()) {
timer_dyn_reprogram();
arch_idle();
}
@@ -154,8 +154,14 @@ void cpu_idle(void)
if (!idle)
idle = default_idle;
leds_event(led_idle_start);
- while (!need_resched())
- idle();
+
+ if (!need_resched() && !need_resched_delayed() &&
+ !hrtimer_stop_sched_tick()) {
+ while (!need_resched() && !need_resched_delayed())
+ idle();
+ }
+ hrtimer_restart_sched_tick();
+
leds_event(led_idle_end);
preempt_enable_no_resched();
schedule();
Index: linux/arch/arm/kernel/semaphore.c
===================================================================
--- linux.orig/arch/arm/kernel/semaphore.c
+++ linux/arch/arm/kernel/semaphore.c
@@ -49,14 +49,16 @@
* we cannot lose wakeup events.
*/
-void __up(struct semaphore *sem)
+fastcall void __attribute_used__ __compat_up(struct compat_semaphore *sem)
{
wake_up(&sem->wait);
}
+EXPORT_SYMBOL(__compat_up);
+
static DEFINE_SPINLOCK(semaphore_lock);
-void __sched __down(struct semaphore * sem)
+fastcall void __attribute_used__ __sched __compat_down(struct compat_semaphore * sem)
{
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
@@ -89,7 +91,9 @@ void __sched __down(struct semaphore * s
wake_up(&sem->wait);
}
-int __sched __down_interruptible(struct semaphore * sem)
+EXPORT_SYMBOL(__compat_down);
+
+fastcall int __attribute_used__ __sched __compat_down_interruptible(struct compat_semaphore * sem)
{
int retval = 0;
struct task_struct *tsk = current;
@@ -140,6 +144,8 @@ int __sched __down_interruptible(struct
return retval;
}
+EXPORT_SYMBOL(__compat_down_interruptible);
+
/*
* Trylock failed - make sure we correct for
* having decremented the count.
@@ -148,7 +154,7 @@ int __sched __down_interruptible(struct
* single "cmpxchg" without failure cases,
* but then it wouldn't work on a 386.
*/
-int __down_trylock(struct semaphore * sem)
+fastcall int __attribute_used__ __compat_down_trylock(struct compat_semaphore * sem)
{
int sleepers;
unsigned long flags;
@@ -168,6 +174,15 @@ int __down_trylock(struct semaphore * se
return 1;
}
+EXPORT_SYMBOL(__compat_down_trylock);
+
+fastcall int compat_sem_is_locked(struct compat_semaphore *sem)
+{
+ return (int) atomic_read(&sem->count) < 0;
+}
+
+EXPORT_SYMBOL(compat_sem_is_locked);
+
/*
* The semaphore operations have a special calling sequence that
* allow us to do a simpler in-line version of them. These routines
@@ -185,7 +200,7 @@ asm(" .section .sched.text,\"ax\",%progb
__down_failed: \n\
stmfd sp!, {r0 - r4, lr} \n\
mov r0, ip \n\
- bl __down \n\
+ bl __compat_down \n\
ldmfd sp!, {r0 - r4, pc} \n\
\n\
.align 5 \n\
@@ -193,7 +208,7 @@ __down_failed: \n\
__down_interruptible_failed: \n\
stmfd sp!, {r0 - r4, lr} \n\
mov r0, ip \n\
- bl __down_interruptible \n\
+ bl __compat_down_interruptible \n\
mov ip, r0 \n\
ldmfd sp!, {r0 - r4, pc} \n\
\n\
@@ -202,7 +217,7 @@ __down_interruptible_failed: \n\
__down_trylock_failed: \n\
stmfd sp!, {r0 - r4, lr} \n\
mov r0, ip \n\
- bl __down_trylock \n\
+ bl __compat_down_trylock \n\
mov ip, r0 \n\
ldmfd sp!, {r0 - r4, pc} \n\
\n\
@@ -211,7 +226,7 @@ __down_trylock_failed: \n\
__up_wakeup: \n\
stmfd sp!, {r0 - r4, lr} \n\
mov r0, ip \n\
- bl __up \n\
+ bl __compat_up \n\
ldmfd sp!, {r0 - r4, pc} \n\
");
Index: linux/arch/arm/kernel/signal.c
===================================================================
--- linux.orig/arch/arm/kernel/signal.c
+++ linux/arch/arm/kernel/signal.c
@@ -630,6 +630,14 @@ static int do_signal(sigset_t *oldset, s
siginfo_t info;
int signr;
+#ifdef CONFIG_PREEMPT_RT
+ /*
+ * Fully-preemptible kernel does not need interrupts disabled:
+ */
+ local_irq_enable();
+ preempt_check_resched();
+#endif
+
/*
* We want the common case to go fast, which
* is why we may in certain cases get here from
Index: linux/arch/arm/kernel/smp.c
===================================================================
--- linux.orig/arch/arm/kernel/smp.c
+++ linux/arch/arm/kernel/smp.c
@@ -515,7 +515,7 @@ static void ipi_call_function(unsigned i
cpu_clear(cpu, data->unfinished);
}
-static DEFINE_SPINLOCK(stop_lock);
+static DEFINE_RAW_SPINLOCK(stop_lock);
/*
* ipi_cpu_stop - handle IPI from smp_send_stop()
Index: linux/arch/arm/kernel/time.c
===================================================================
--- linux.orig/arch/arm/kernel/time.c
+++ linux/arch/arm/kernel/time.c
@@ -69,10 +69,12 @@ EXPORT_SYMBOL(profile_pc);
*/
int (*set_rtc)(void);
+#ifdef CONFIG_IS_TICK_BASED
static unsigned long dummy_gettimeoffset(void)
{
return 0;
}
+#endif
/*
* Scheduler clock - returns current time in nanosec units.
@@ -84,34 +86,10 @@ unsigned long long __attribute__((weak))
return (unsigned long long)jiffies * (1000000000 / HZ);
}
-static unsigned long next_rtc_update;
-
-/*
- * If we have an externally synchronized linux clock, then update
- * CMOS clock accordingly every ~11 minutes. set_rtc() has to be
- * called as close as possible to 500 ms before the new second
- * starts.
- */
-static inline void do_set_rtc(void)
+void sync_persistent_clock(struct timespec ts)
{
- if (!ntp_synced() || set_rtc == NULL)
- return;
-
- if (next_rtc_update &&
- time_before((unsigned long)xtime.tv_sec, next_rtc_update))
- return;
-
- if (xtime.tv_nsec < 500000000 - ((unsigned) tick_nsec >> 1) &&
- xtime.tv_nsec >= 500000000 + ((unsigned) tick_nsec >> 1))
- return;
-
- if (set_rtc())
- /*
- * rtc update failed. Try again in 60s
- */
- next_rtc_update = xtime.tv_sec + 60;
- else
- next_rtc_update = xtime.tv_sec + 660;
+ if (set_rtc)
+ set_rtc();
}
#ifdef CONFIG_LEDS
@@ -230,68 +208,6 @@ static inline void do_leds(void)
#define do_leds()
#endif
-void do_gettimeofday(struct timeval *tv)
-{
- unsigned long flags;
- unsigned long seq;
- unsigned long usec, sec, lost;
-
- do {
- seq = read_seqbegin_irqsave(&xtime_lock, flags);
- usec = system_timer->offset();
-
- lost = jiffies - wall_jiffies;
- if (lost)
- usec += lost * USECS_PER_JIFFY;
-
- sec = xtime.tv_sec;
- usec += xtime.tv_nsec / 1000;
- } while (read_seqretry_irqrestore(&xtime_lock, seq, flags));
-
- /* usec may have gone up a lot: be safe */
- while (usec >= 1000000) {
- usec -= 1000000;
- sec++;
- }
-
- tv->tv_sec = sec;
- tv->tv_usec = usec;
-}
-
-EXPORT_SYMBOL(do_gettimeofday);
-
-int do_settimeofday(struct timespec *tv)
-{
- time_t wtm_sec, sec = tv->tv_sec;
- long wtm_nsec, nsec = tv->tv_nsec;
-
- if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
- return -EINVAL;
-
- write_seqlock_irq(&xtime_lock);
- /*
- * This is revolting. We need to set "xtime" correctly. However, the
- * value in this location is the value at the most recent update of
- * wall time. Discover what correction gettimeofday() would have
- * done, and then undo it!
- */
- nsec -= system_timer->offset() * NSEC_PER_USEC;
- nsec -= (jiffies - wall_jiffies) * TICK_NSEC;
-
- wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
- wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
-
- set_normalized_timespec(&xtime, sec, nsec);
- set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
-
- ntp_clear();
- write_sequnlock_irq(&xtime_lock);
- clock_was_set();
- return 0;
-}
-
-EXPORT_SYMBOL(do_settimeofday);
-
/**
* save_time_delta - Save the offset between system time and RTC time
* @delta: pointer to timespec to store delta
@@ -332,7 +248,6 @@ void timer_tick(struct pt_regs *regs)
{
profile_tick(CPU_PROFILING, regs);
do_leds();
- do_set_rtc();
do_timer(regs);
#ifndef CONFIG_SMP
update_process_times(user_mode(regs));
@@ -500,8 +415,10 @@ device_initcall(timer_init_sysfs);
void __init time_init(void)
{
+#ifdef CONFIG_IS_TICK_BASED
if (system_timer->offset == NULL)
system_timer->offset = dummy_gettimeoffset;
+#endif
system_timer->init();
#ifdef CONFIG_NO_IDLE_HZ
Index: linux/arch/arm/kernel/traps.c
===================================================================
--- linux.orig/arch/arm/kernel/traps.c
+++ linux/arch/arm/kernel/traps.c
@@ -176,6 +176,7 @@ void dump_stack(void)
{
#ifdef CONFIG_DEBUG_ERRORS
__backtrace();
+ print_traces(current);
#endif
}
@@ -191,7 +192,7 @@ void show_stack(struct task_struct *tsk,
if (tsk != current)
fp = thread_saved_fp(tsk);
else
- asm("mov%? %0, fp" : "=r" (fp));
+ asm("mov %0, fp" : "=r" (fp) : : "cc");
c_backtrace(fp, 0x10);
barrier();
@@ -216,7 +217,7 @@ static void __die(const char *str, int e
}
}
-DEFINE_SPINLOCK(die_lock);
+DEFINE_RAW_SPINLOCK(die_lock);
/*
* This function is protected against re-entrancy.
@@ -252,7 +253,7 @@ void notify_die(const char *str, struct
}
static LIST_HEAD(undef_hook);
-static DEFINE_SPINLOCK(undef_lock);
+static DEFINE_RAW_SPINLOCK(undef_lock);
void register_undef_hook(struct undef_hook *hook)
{
Index: linux/arch/arm/lib/Makefile
===================================================================
--- linux.orig/arch/arm/lib/Makefile
+++ linux/arch/arm/lib/Makefile
@@ -41,6 +41,7 @@ lib-$(CONFIG_ARCH_RPC) += ecard.o io-ac
lib-$(CONFIG_ARCH_CLPS7500) += io-acorn.o
lib-$(CONFIG_ARCH_L7200) += io-acorn.o
lib-$(CONFIG_ARCH_SHARK) += io-shark.o
+lib-$(CONFIG_STACKTRACE) += stacktrace.o
$(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S
$(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S
Index: linux/arch/arm/lib/stacktrace.c
===================================================================
--- /dev/null
+++ linux/arch/arm/lib/stacktrace.c
@@ -0,0 +1,77 @@
+#include
+#include
+
+struct stackframe {
+ unsigned long fp;
+ unsigned long sp;
+ unsigned long lr;
+ unsigned long pc;
+};
+
+int walk_stackframe(unsigned long fp, unsigned long low, unsigned long high,
+ int (*fn)(struct stackframe *, void *), void *data)
+{
+ struct stackframe *frame;
+
+ do {
+ /*
+ * Check current frame pointer is within bounds
+ */
+ if ((fp - 12) < low || fp + 4 >= high)
+ break;
+
+ frame = (struct stackframe *)(fp - 12);
+
+ if (fn(frame, data))
+ break;
+
+ /*
+ * Update the low bound - the next frame must always
+ * be at a higher address than the current frame.
+ */
+ low = fp + 4;
+ fp = frame->fp;
+ } while (fp);
+
+ return 0;
+}
+
+struct stack_trace_data {
+ struct stack_trace *trace;
+ unsigned int skip;
+};
+
+static int save_trace(struct stackframe *frame, void *d)
+{
+ struct stack_trace_data *data = d;
+ struct stack_trace *trace = data->trace;
+
+ if (data->skip) {
+ data->skip--;
+ return 0;
+ }
+
+ trace->entries[trace->nr_entries++] = frame->lr;
+
+ return trace->nr_entries >= trace->max_entries;
+}
+
+void save_stack_trace(struct stack_trace *trace, struct task_struct *task,
+ int all_contexts, unsigned int skip)
+{
+ struct stack_trace_data data;
+ unsigned long fp, base;
+
+ data.trace = trace;
+ data.skip = skip;
+
+ if (task) {
+ base = (unsigned long)task_stack_page(task);
+ fp = 0;
+ } else {
+ base = (unsigned long)task_stack_page(current);
+ asm("mov %0, fp" : "=r" (fp));
+ }
+
+ walk_stackframe(fp, base, base + THREAD_SIZE, save_trace, &data);
+}
Index: linux/arch/arm/mach-footbridge/netwinder-hw.c
===================================================================
--- linux.orig/arch/arm/mach-footbridge/netwinder-hw.c
+++ linux/arch/arm/mach-footbridge/netwinder-hw.c
@@ -67,7 +67,7 @@ static inline void wb977_ww(int reg, int
/*
* This is a lock for accessing ports GP1_IO_BASE and GP2_IO_BASE
*/
-DEFINE_SPINLOCK(gpio_lock);
+DEFINE_RAW_SPINLOCK(gpio_lock);
static unsigned int current_gpio_op;
static unsigned int current_gpio_io;
Index: linux/arch/arm/mach-footbridge/netwinder-leds.c
===================================================================
--- linux.orig/arch/arm/mach-footbridge/netwinder-leds.c
+++ linux/arch/arm/mach-footbridge/netwinder-leds.c
@@ -32,7 +32,7 @@ static char led_state;
static char hw_led_state;
static DEFINE_SPINLOCK(leds_lock);
-extern spinlock_t gpio_lock;
+extern raw_spinlock_t gpio_lock;
static void netwinder_leds_event(led_event_t evt)
{
Index: linux/arch/arm/mach-integrator/core.c
===================================================================
--- linux.orig/arch/arm/mach-integrator/core.c
+++ linux/arch/arm/mach-integrator/core.c
@@ -164,7 +164,7 @@ static struct amba_pl010_data integrator
#define CM_CTRL IO_ADDRESS(INTEGRATOR_HDR_BASE) + INTEGRATOR_HDR_CTRL_OFFSET
-static DEFINE_SPINLOCK(cm_lock);
+static DEFINE_RAW_SPINLOCK(cm_lock);
/**
* cm_control - update the CM_CTRL register.
Index: linux/arch/arm/mach-integrator/pci_v3.c
===================================================================
--- linux.orig/arch/arm/mach-integrator/pci_v3.c
+++ linux/arch/arm/mach-integrator/pci_v3.c
@@ -162,7 +162,7 @@
* 7:2 register number
*
*/
-static DEFINE_SPINLOCK(v3_lock);
+static DEFINE_RAW_SPINLOCK(v3_lock);
#define PCI_BUS_NONMEM_START 0x00000000
#define PCI_BUS_NONMEM_SIZE SZ_256M
Index: linux/arch/arm/mach-integrator/platsmp.c
===================================================================
--- linux.orig/arch/arm/mach-integrator/platsmp.c
+++ linux/arch/arm/mach-integrator/platsmp.c
@@ -31,7 +31,7 @@ extern void integrator_secondary_startup
volatile int __cpuinitdata pen_release = -1;
unsigned long __cpuinitdata phys_pen_release = 0;
-static DEFINE_SPINLOCK(boot_lock);
+static DEFINE_RAW_SPINLOCK(boot_lock);
void __cpuinit platform_secondary_init(unsigned int cpu)
{
Index: linux/arch/arm/mach-ixp4xx/Kconfig
===================================================================
--- linux.orig/arch/arm/mach-ixp4xx/Kconfig
+++ linux/arch/arm/mach-ixp4xx/Kconfig
@@ -1,5 +1,9 @@
if ARCH_IXP4XX
+config IS_TICK_BASED
+ bool
+ default n
+
config ARCH_SUPPORTS_BIG_ENDIAN
bool
default y
Index: linux/arch/arm/mach-ixp4xx/common-pci.c
===================================================================
--- linux.orig/arch/arm/mach-ixp4xx/common-pci.c
+++ linux/arch/arm/mach-ixp4xx/common-pci.c
@@ -53,7 +53,7 @@ unsigned long ixp4xx_pci_reg_base = 0;
* these transactions are atomic or we will end up
* with corrupt data on the bus or in a driver.
*/
-static DEFINE_SPINLOCK(ixp4xx_pci_lock);
+static DEFINE_RAW_SPINLOCK(ixp4xx_pci_lock);
/*
* Read from PCI config space
Index: linux/arch/arm/mach-ixp4xx/common.c
===================================================================
--- linux.orig/arch/arm/mach-ixp4xx/common.c
+++ linux/arch/arm/mach-ixp4xx/common.c
@@ -26,6 +26,8 @@
#include
#include
#include
+#include
+#include
#include
#include
@@ -38,6 +40,11 @@
#include
#include
+#ifdef CONFIG_HIGH_RES_TIMERS
+static int __init ixp4xx_clockevent_init(void);
+static struct clock_event clockevent_ixp4xx;
+#endif
+
/*************************************************************************
* IXP4xx chipset I/O mapping
*************************************************************************/
@@ -253,25 +260,17 @@ void __init ixp4xx_init_irq(void)
static unsigned volatile last_jiffy_time;
-#define CLOCK_TICKS_PER_USEC ((CLOCK_TICK_RATE + USEC_PER_SEC/2) / USEC_PER_SEC)
-
-/* IRQs are disabled before entering here from do_gettimeofday() */
-static unsigned long ixp4xx_gettimeoffset(void)
-{
- u32 elapsed;
-
- elapsed = *IXP4XX_OSTS - last_jiffy_time;
-
- return elapsed / CLOCK_TICKS_PER_USEC;
-}
-
static irqreturn_t ixp4xx_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
- write_seqlock(&xtime_lock);
-
/* Clear Pending Interrupt by writing '1' to it */
*IXP4XX_OSST = IXP4XX_OSST_TIMER_1_PEND;
+#ifdef CONFIG_HIGH_RES_TIMERS
+ if (clockevent_ixp4xx.event_handler)
+ clockevent_ixp4xx.event_handler(regs);
+#else
+ write_seqlock(&xtime_lock);
+
/*
* Catch up with the real idea of time
*/
@@ -281,6 +280,7 @@ static irqreturn_t ixp4xx_timer_interrup
}
write_sequnlock(&xtime_lock);
+#endif
return IRQ_HANDLED;
}
@@ -299,17 +299,18 @@ static void __init ixp4xx_timer_init(voi
/* Setup the Timer counter value */
*IXP4XX_OSRT1 = (LATCH & ~IXP4XX_OST_RELOAD_MASK) | IXP4XX_OST_ENABLE;
- /* Reset time-stamp counter */
- *IXP4XX_OSTS = 0;
last_jiffy_time = 0;
/* Connect the interrupt handler and enable the interrupt */
setup_irq(IRQ_IXP4XX_TIMER1, &ixp4xx_timer_irq);
+
+#ifdef CONFIG_HIGH_RES_TIMERS
+ ixp4xx_clockevent_init();
+#endif
}
struct sys_timer ixp4xx_timer = {
.init = ixp4xx_timer_init,
- .offset = ixp4xx_gettimeoffset,
};
static struct resource ixp46x_i2c_resources[] = {
@@ -365,3 +366,70 @@ void __init ixp4xx_sys_init(void)
ixp4xx_exp_bus_size >> 20);
}
+cycle_t ixp4xx_get_cycles(void)
+{
+ return *IXP4XX_OSTS;
+}
+
+static struct clocksource clocksource_ixp4xx = {
+ .name = "OSTS",
+ .rating = 200,
+ .read = ixp4xx_get_cycles,
+ .mask = 0xFFFFFFFF,
+ .shift = 20,
+ .is_continuous = 1,
+};
+
+static int __init ixp4xx_clocksource_init(void)
+{
+ /* Reset time-stamp counter */
+ *IXP4XX_OSTS = 0;
+
+ clocksource_ixp4xx.mult =
+ clocksource_khz2mult(66660, clocksource_ixp4xx.shift);
+ clocksource_register(&clocksource_ixp4xx);
+
+ return 0;
+}
+device_initcall(ixp4xx_clocksource_init);
+
+#ifdef CONFIG_HIGH_RES_TIMERS
+static u32 clockevent_mode = 0;
+
+static void ixp4xx_set_next_event(unsigned long evt,
+ struct clock_event *unused)
+{
+ u32 oneshot = (clockevent_mode == CLOCK_EVT_ONESHOT) ?
+ IXP4XX_OST_ONE_SHOT : 0;
+
+ *IXP4XX_OSRT1 = (evt & ~IXP4XX_OST_RELOAD_MASK) | IXP4XX_OST_ENABLE |
+ oneshot;
+}
+
+static void ixp4xx_set_mode(int mode, struct clock_event *evt)
+{
+ clockevent_mode = mode;
+}
+
+static struct clock_event clockevent_ixp4xx = {
+ .name = "ixp4xx timer1",
+ .capabilities = CLOCK_CAP_NEXTEVT | CLOCK_CAP_TICK |
+ CLOCK_CAP_UPDATE | CLOCK_CAP_PROFILE,
+ .shift = 32,
+ .set_mode = ixp4xx_set_mode,
+ .set_next_event = ixp4xx_set_next_event,
+};
+
+static int __init ixp4xx_clockevent_init(void)
+{
+ clockevent_ixp4xx.mult = div_sc(FREQ, NSEC_PER_SEC,
+ clockevent_ixp4xx.shift);
+ clockevent_ixp4xx.max_delta_ns =
+ clockevent_delta2ns(0xfffffffe, &clockevent_ixp4xx);
+ clockevent_ixp4xx.min_delta_ns =
+ clockevent_delta2ns(0xf, &clockevent_ixp4xx);
+ register_local_clockevent(&clockevent_ixp4xx);
+
+ return 0;
+}
+#endif
Index: linux/arch/arm/mach-omap1/pm.c
===================================================================
--- linux.orig/arch/arm/mach-omap1/pm.c
+++ linux/arch/arm/mach-omap1/pm.c
@@ -120,7 +120,7 @@ void omap_pm_idle(void)
local_irq_disable();
local_fiq_disable();
- if (need_resched()) {
+ if (need_resched() || need_resched_delayed()) {
local_fiq_enable();
local_irq_enable();
return;
Index: linux/arch/arm/mach-omap2/pm.c
===================================================================
--- linux.orig/arch/arm/mach-omap2/pm.c
+++ linux/arch/arm/mach-omap2/pm.c
@@ -53,7 +53,7 @@ void omap2_pm_idle(void)
{
local_irq_disable();
local_fiq_disable();
- if (need_resched()) {
+ if (need_resched() || need_resched_delayed()) {
local_fiq_enable();
local_irq_enable();
return;
Index: linux/arch/arm/mach-sa1100/badge4.c
===================================================================
--- linux.orig/arch/arm/mach-sa1100/badge4.c
+++ linux/arch/arm/mach-sa1100/badge4.c
@@ -240,15 +240,22 @@ void badge4_set_5V(unsigned subsystem, i
/* detect on->off and off->on transitions */
if ((!old_5V_bitmap) && (badge4_5V_bitmap)) {
/* was off, now on */
- printk(KERN_INFO "%s: enabling 5V supply rail\n", __FUNCTION__);
GPSR = BADGE4_GPIO_PCMEN5V;
} else if ((old_5V_bitmap) && (!badge4_5V_bitmap)) {
/* was on, now off */
- printk(KERN_INFO "%s: disabling 5V supply rail\n", __FUNCTION__);
GPCR = BADGE4_GPIO_PCMEN5V;
}
local_irq_restore(flags);
+
+ /* detect on->off and off->on transitions */
+ if ((!old_5V_bitmap) && (badge4_5V_bitmap)) {
+ /* was off, now on */
+ printk(KERN_INFO "%s: enabling 5V supply rail\n", __FUNCTION__);
+ } else if ((old_5V_bitmap) && (!badge4_5V_bitmap)) {
+ /* was on, now off */
+ printk(KERN_INFO "%s: disabling 5V supply rail\n", __FUNCTION__);
+ }
}
EXPORT_SYMBOL(badge4_set_5V);
Index: linux/arch/arm/mach-shark/leds.c
===================================================================
--- linux.orig/arch/arm/mach-shark/leds.c
+++ linux/arch/arm/mach-shark/leds.c
@@ -32,7 +32,7 @@ static char led_state;
static short hw_led_state;
static short saved_state;
-static DEFINE_SPINLOCK(leds_lock);
+static DEFINE_RAW_SPINLOCK(leds_lock);
short sequoia_read(int addr) {
outw(addr,0x24);
Index: linux/arch/arm/mach-versatile/Kconfig
===================================================================
--- linux.orig/arch/arm/mach-versatile/Kconfig
+++ linux/arch/arm/mach-versatile/Kconfig
@@ -1,6 +1,10 @@
menu "Versatile platform type"
depends on ARCH_VERSATILE
+config IS_TICK_BASED
+ bool
+ default n
+
config ARCH_VERSATILE_PB
bool "Support Versatile/PB platform"
default y
Index: linux/arch/arm/mach-versatile/core.c
===================================================================
--- linux.orig/arch/arm/mach-versatile/core.c
+++ linux/arch/arm/mach-versatile/core.c
@@ -26,6 +26,8 @@
#include
#include
#include
+#include
+#include
#include
#include
@@ -808,59 +810,50 @@ void __init versatile_init(void)
#define TICKS2USECS(x) ((x) / TICKS_PER_uSEC)
#endif
-/*
- * Returns number of ms since last clock interrupt. Note that interrupts
- * will have been disabled by do_gettimeoffset()
- */
-static unsigned long versatile_gettimeoffset(void)
+#ifdef CONFIG_HIGH_RES_TIMERS
+static void timer_set_mode(int mode, struct clock_event *clk)
{
- unsigned long ticks1, ticks2, status;
-
- /*
- * Get the current number of ticks. Note that there is a race
- * condition between us reading the timer and checking for
- * an interrupt. We get around this by ensuring that the
- * counter has not reloaded between our two reads.
- */
- ticks2 = readl(TIMER0_VA_BASE + TIMER_VALUE) & 0xffff;
- do {
- ticks1 = ticks2;
- status = __raw_readl(VA_IC_BASE + VIC_RAW_STATUS);
- ticks2 = readl(TIMER0_VA_BASE + TIMER_VALUE) & 0xffff;
- } while (ticks2 > ticks1);
-
- /*
- * Number of ticks since last interrupt.
- */
- ticks1 = TIMER_RELOAD - ticks2;
-
- /*
- * Interrupt pending? If so, we've reloaded once already.
- *
- * FIXME: Need to check this is effectively timer 0 that expires
- */
- if (status & IRQMASK_TIMERINT0_1)
- ticks1 += TIMER_RELOAD;
+ if (mode == CLOCK_EVT_PERIODIC) {
+ writel(TIMER_CTRL_PERIODIC | TIMER_CTRL_32BIT | TIMER_CTRL_IE |
+ TIMER_CTRL_ENABLE, TIMER0_VA_BASE + TIMER_CTRL);
+ } else {
+ writel(TIMER_CTRL_ONESHOT | TIMER_CTRL_32BIT | TIMER_CTRL_IE |
+ TIMER_CTRL_ENABLE, TIMER0_VA_BASE + TIMER_CTRL);
+ }
+}
- /*
- * Convert the ticks to usecs
- */
- return TICKS2USECS(ticks1);
+static void timer_set_next_event(unsigned long evt, struct clock_event *unused)
+{
+ BUG_ON(!evt);
+ writel(evt, TIMER0_VA_BASE + TIMER_LOAD);
}
+static struct clock_event timer0_clock = {
+ .name = "timer0",
+ .shift = 32,
+ .capabilities = CLOCK_CAP_TICK | CLOCK_CAP_UPDATE |
+ CLOCK_CAP_NEXTEVT | CLOCK_CAP_PROFILE,
+ .set_mode = timer_set_mode,
+ .set_next_event = timer_set_next_event,
+};
+#endif
+
/*
* IRQ handler for the timer
*/
static irqreturn_t versatile_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
- write_seqlock(&xtime_lock);
-
// ...clear the interrupt
writel(1, TIMER0_VA_BASE + TIMER_INTCLR);
+#ifdef CONFIG_HIGH_RES_TIMERS
+ if (timer0_clock.event_handler)
+ timer0_clock.event_handler(regs);
+#else
+ write_seqlock(&xtime_lock);
timer_tick(regs);
-
write_sequnlock(&xtime_lock);
+#endif
return IRQ_HANDLED;
}
@@ -893,11 +886,20 @@ static void __init versatile_timer_init(
/*
* Initialise to a known state (all timers off)
*/
- writel(0, TIMER0_VA_BASE + TIMER_CTRL);
+ writel(0, TIMER0_VA_BASE + TIMER_CTRL);
writel(0, TIMER1_VA_BASE + TIMER_CTRL);
writel(0, TIMER2_VA_BASE + TIMER_CTRL);
writel(0, TIMER3_VA_BASE + TIMER_CTRL);
+#ifdef CONFIG_HIGH_RES_TIMERS
+ timer0_clock.mult = div_sc(1000000, NSEC_PER_SEC, timer0_clock.shift);
+ timer0_clock.max_delta_ns =
+ clockevent_delta2ns(0xffffffff, &timer0_clock);
+ timer0_clock.min_delta_ns =
+ clockevent_delta2ns(0xf, &timer0_clock);
+ register_global_clockevent(&timer0_clock);
+#endif
+
writel(TIMER_RELOAD, TIMER0_VA_BASE + TIMER_LOAD);
writel(TIMER_RELOAD, TIMER0_VA_BASE + TIMER_VALUE);
writel(TIMER_DIVISOR | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC |
@@ -911,5 +913,36 @@ static void __init versatile_timer_init(
struct sys_timer versatile_timer = {
.init = versatile_timer_init,
- .offset = versatile_gettimeoffset,
};
+
+cycle_t versatile_get_cycles(void)
+{
+ return ~readl(TIMER3_VA_BASE + TIMER_VALUE);
+}
+
+static struct clocksource clocksource_versatile = {
+ .name = "timer3",
+ .rating = 200,
+ .read = versatile_get_cycles,
+ .mask = 0xFFFFFFFF,
+ .shift = 20,
+ .is_continuous = 1,
+};
+
+static int __init versatile_clocksource_init(void)
+{
+ writel(0, TIMER3_VA_BASE + TIMER_CTRL);
+ writel(0xffffffff, TIMER3_VA_BASE + TIMER_LOAD);
+ writel(0xffffffff, TIMER3_VA_BASE + TIMER_VALUE);
+ writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC,
+ TIMER3_VA_BASE + TIMER_CTRL);
+
+ clocksource_versatile.mult =
+ clocksource_khz2mult(1000, clocksource_versatile.shift);
+ clocksource_register(&clocksource_versatile);
+
+ return 0;
+}
+
+device_initcall(versatile_clocksource_init);
+
Index: linux/arch/arm/mm/consistent.c
===================================================================
--- linux.orig/arch/arm/mm/consistent.c
+++ linux/arch/arm/mm/consistent.c
@@ -40,7 +40,7 @@
* These are the page tables (2MB each) covering uncached, DMA consistent allocations
*/
static pte_t *consistent_pte[NUM_CONSISTENT_PTES];
-static DEFINE_SPINLOCK(consistent_lock);
+static DEFINE_RAW_SPINLOCK(consistent_lock);
/*
* VM region handling support.
Index: linux/arch/arm/mm/copypage-v4mc.c
===================================================================
--- linux.orig/arch/arm/mm/copypage-v4mc.c
+++ linux/arch/arm/mm/copypage-v4mc.c
@@ -29,7 +29,7 @@
#define TOP_PTE(x) pte_offset_kernel(top_pmd, x)
-static DEFINE_SPINLOCK(minicache_lock);
+static DEFINE_RAW_SPINLOCK(minicache_lock);
/*
* ARMv4 mini-dcache optimised copy_user_page
@@ -43,7 +43,7 @@ static DEFINE_SPINLOCK(minicache_lock);
* instruction. If your processor does not supply this, you have to write your
* own copy_user_page that does the right thing.
*/
-static void __attribute__((naked))
+static void notrace __attribute__((naked))
mc_copy_user_page(void *from, void *to)
{
asm volatile(
@@ -82,7 +82,7 @@ void v4_mc_copy_user_page(void *kto, con
/*
* ARMv4 optimised clear_user_page
*/
-void __attribute__((naked))
+void notrace __attribute__((naked))
v4_mc_clear_user_page(void *kaddr, unsigned long vaddr)
{
asm volatile(
Index: linux/arch/arm/mm/copypage-v6.c
===================================================================
--- linux.orig/arch/arm/mm/copypage-v6.c
+++ linux/arch/arm/mm/copypage-v6.c
@@ -26,7 +26,7 @@
#define TOP_PTE(x) pte_offset_kernel(top_pmd, x)
-static DEFINE_SPINLOCK(v6_lock);
+static DEFINE_RAW_SPINLOCK(v6_lock);
/*
* Copy the user page. No aliasing to deal with so we can just
Index: linux/arch/arm/mm/copypage-xscale.c
===================================================================
--- linux.orig/arch/arm/mm/copypage-xscale.c
+++ linux/arch/arm/mm/copypage-xscale.c
@@ -31,7 +31,7 @@
#define TOP_PTE(x) pte_offset_kernel(top_pmd, x)
-static DEFINE_SPINLOCK(minicache_lock);
+static DEFINE_RAW_SPINLOCK(minicache_lock);
/*
* XScale mini-dcache optimised copy_user_page
@@ -41,7 +41,7 @@ static DEFINE_SPINLOCK(minicache_lock);
* Dcache aliasing issue. The writes will be forwarded to the write buffer,
* and merged as appropriate.
*/
-static void __attribute__((naked))
+static void notrace __attribute__((naked))
mc_copy_user_page(void *from, void *to)
{
/*
@@ -104,7 +104,7 @@ void xscale_mc_copy_user_page(void *kto,
/*
* XScale optimised clear_user_page
*/
-void __attribute__((naked))
+void notrace __attribute__((naked))
xscale_mc_clear_user_page(void *kaddr, unsigned long vaddr)
{
asm volatile(
Index: linux/arch/arm/mm/fault.c
===================================================================
--- linux.orig/arch/arm/mm/fault.c
+++ linux/arch/arm/mm/fault.c
@@ -215,7 +215,7 @@ out:
return fault;
}
-static int
+static notrace int
do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
struct task_struct *tsk;
@@ -315,7 +315,7 @@ no_context:
* interrupt or a critical region, and should only copy the information
* from the master page table, nothing more.
*/
-static int
+static notrace int
do_translation_fault(unsigned long addr, unsigned int fsr,
struct pt_regs *regs)
{
@@ -361,7 +361,7 @@ bad_area:
* Some section permission faults need to be handled gracefully.
* They can happen due to a __{get,put}_user during an oops.
*/
-static int
+static notrace int
do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
struct task_struct *tsk = current;
@@ -372,7 +372,7 @@ do_sect_fault(unsigned long addr, unsign
/*
* This abort handler always returns "fault".
*/
-static int
+static notrace int
do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
return 1;
@@ -427,7 +427,7 @@ static struct fsr_info {
{ do_bad, SIGBUS, 0, "unknown 31" }
};
-void __init
+void __init notrace
hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *),
int sig, const char *name)
{
@@ -441,7 +441,7 @@ hook_fault_code(int nr, int (*fn)(unsign
/*
* Dispatch a data abort to the relevant handler.
*/
-asmlinkage void
+asmlinkage notrace void
do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
const struct fsr_info *inf = fsr_info + (fsr & 15) + ((fsr & (1 << 10)) >> 6);
@@ -460,7 +460,7 @@ do_DataAbort(unsigned long addr, unsigne
notify_die("", regs, &info, fsr, 0);
}
-asmlinkage void
+asmlinkage notrace void
do_PrefetchAbort(unsigned long addr, struct pt_regs *regs)
{
do_translation_fault(addr, 0, regs);
Index: linux/arch/arm/mm/init.c
===================================================================
--- linux.orig/arch/arm/mm/init.c
+++ linux/arch/arm/mm/init.c
@@ -25,7 +25,7 @@
#include
#include
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers);
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
extern void _stext, _text, _etext, __data_start, _end, __init_begin, __init_end;
Index: linux/arch/arm/plat-omap/clock.c
===================================================================
--- linux.orig/arch/arm/plat-omap/clock.c
+++ linux/arch/arm/plat-omap/clock.c
@@ -29,7 +29,7 @@
static LIST_HEAD(clocks);
static DEFINE_MUTEX(clocks_mutex);
-static DEFINE_SPINLOCK(clockfw_lock);
+static DEFINE_RAW_SPINLOCK(clockfw_lock);
static struct clk_functions *arch_clock;
Index: linux/arch/arm/plat-omap/dma.c
===================================================================
--- linux.orig/arch/arm/plat-omap/dma.c
+++ linux/arch/arm/plat-omap/dma.c
@@ -949,7 +949,7 @@ static struct irqaction omap24xx_dma_irq
/*----------------------------------------------------------------------------*/
static struct lcd_dma_info {
- spinlock_t lock;
+ raw_spinlock_t lock;
int reserved;
void (* callback)(u16 status, void *data);
void *cb_data;
Index: linux/arch/arm/plat-omap/gpio.c
===================================================================
--- linux.orig/arch/arm/plat-omap/gpio.c
+++ linux/arch/arm/plat-omap/gpio.c
@@ -120,7 +120,7 @@ struct gpio_bank {
u32 reserved_map;
u32 suspend_wakeup;
u32 saved_wakeup;
- spinlock_t lock;
+ raw_spinlock_t lock;
};
#define METHOD_MPUIO 0
Index: linux/arch/arm/plat-omap/mux.c
===================================================================
--- linux.orig/arch/arm/plat-omap/mux.c
+++ linux/arch/arm/plat-omap/mux.c
@@ -56,7 +56,7 @@ int __init omap_mux_register(struct pin_
*/
int __init_or_module omap_cfg_reg(const unsigned long index)
{
- static DEFINE_SPINLOCK(mux_spin_lock);
+ static DEFINE_RAW_SPINLOCK(mux_spin_lock);
unsigned long flags;
struct pin_config *cfg;
Index: linux/arch/arm/plat-omap/pm.c
===================================================================
--- linux.orig/arch/arm/plat-omap/pm.c
+++ linux/arch/arm/plat-omap/pm.c
@@ -84,7 +84,7 @@ void omap_pm_idle(void)
local_irq_disable();
local_fiq_disable();
- if (need_resched()) {
+ if (need_resched() || need_resched_delayed()) {
local_fiq_enable();
local_irq_enable();
return;
Index: linux/arch/h8300/Kconfig
===================================================================
--- linux.orig/arch/h8300/Kconfig
+++ linux/arch/h8300/Kconfig
@@ -41,6 +41,10 @@ config GENERIC_CALIBRATE_DELAY
bool
default y
+config GENERIC_TIME
+ bool
+ default y
+
config TIME_LOW_RES
bool
default y
Index: linux/arch/h8300/kernel/time.c
===================================================================
--- linux.orig/arch/h8300/kernel/time.c
+++ linux/arch/h8300/kernel/time.c
@@ -68,58 +68,6 @@ void time_init(void)
platform_timer_setup(timer_interrupt);
}
-/*
- * This version of gettimeofday has near microsecond resolution.
- */
-void do_gettimeofday(struct timeval *tv)
-{
- unsigned long flags;
- unsigned long usec, sec;
-
- read_lock_irqsave(&xtime_lock, flags);
- usec = 0;
- sec = xtime.tv_sec;
- usec += (xtime.tv_nsec / 1000);
- read_unlock_irqrestore(&xtime_lock, flags);
-
- while (usec >= 1000000) {
- usec -= 1000000;
- sec++;
- }
-
- tv->tv_sec = sec;
- tv->tv_usec = usec;
-}
-
-EXPORT_SYMBOL(do_gettimeofday);
-
-int do_settimeofday(struct timespec *tv)
-{
- if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
- return -EINVAL;
-
- write_lock_irq(&xtime_lock);
- /* This is revolting. We need to set the xtime.tv_usec
- * correctly. However, the value in this location is
- * is value at the last tick.
- * Discover what correction gettimeofday
- * would have done, and then undo it!
- */
- while (tv->tv_nsec < 0) {
- tv->tv_nsec += NSEC_PER_SEC;
- tv->tv_sec--;
- }
-
- xtime.tv_sec = tv->tv_sec;
- xtime.tv_nsec = tv->tv_nsec;
- ntp_clear();
- write_sequnlock_irq(&xtime_lock);
- clock_was_set();
- return 0;
-}
-
-EXPORT_SYMBOL(do_settimeofday);
-
unsigned long long sched_clock(void)
{
return (unsigned long long)jiffies * (1000000000 / HZ);
Index: linux/arch/i386/Kconfig
===================================================================
--- linux.orig/arch/i386/Kconfig
+++ linux/arch/i386/Kconfig
@@ -65,6 +65,8 @@ source "init/Kconfig"
menu "Processor type and features"
+source "kernel/time/Kconfig"
+
config SMP
bool "Symmetric multi-processing support"
---help---
@@ -261,6 +263,19 @@ config SCHED_MC
source "kernel/Kconfig.preempt"
+config RWSEM_GENERIC_SPINLOCK
+ bool
+ depends on M386 || PREEMPT_RT
+ default y
+
+config ASM_SEMAPHORES
+ bool
+ default y
+
+config RWSEM_XCHGADD_ALGORITHM
+ bool
+ default y if !RWSEM_GENERIC_SPINLOCK
+
config X86_UP_APIC
bool "Local APIC support on uniprocessors"
depends on !SMP && !(X86_VISWS || X86_VOYAGER)
@@ -708,6 +723,7 @@ config BOOT_IOREMAP
config REGPARM
bool "Use register arguments"
+ depends on !MCOUNT
default y
help
Compile the kernel with -mregparm=3. This instructs gcc to use
@@ -791,6 +807,10 @@ config HOTPLUG_CPU
enable suspend on SMP systems. CPUs can be controlled through
/sys/devices/system/cpu.
+config GENERIC_TIME_VSYSCALL
+ depends on EXPERIMENTAL
+ bool "VSYSCALL gettimeofday() interface"
+
config COMPAT_VDSO
bool "Compat VDSO support"
default y
Index: linux/arch/i386/Kconfig.cpu
===================================================================
--- linux.orig/arch/i386/Kconfig.cpu
+++ linux/arch/i386/Kconfig.cpu
@@ -235,11 +235,6 @@ config RWSEM_GENERIC_SPINLOCK
depends on M386
default y
-config RWSEM_XCHGADD_ALGORITHM
- bool
- depends on !M386
- default y
-
config GENERIC_CALIBRATE_DELAY
bool
default y
Index: linux/arch/i386/Kconfig.debug
===================================================================
--- linux.orig/arch/i386/Kconfig.debug
+++ linux/arch/i386/Kconfig.debug
@@ -22,6 +22,7 @@ config EARLY_PRINTK
config DEBUG_STACKOVERFLOW
bool "Check for stack overflows"
depends on DEBUG_KERNEL
+ default y
help
This option will cause messages to be printed if free stack space
drops below a certain limit.
@@ -29,6 +30,7 @@ config DEBUG_STACKOVERFLOW
config DEBUG_STACK_USAGE
bool "Stack utilization instrumentation"
depends on DEBUG_KERNEL
+ default y
help
Enables the display of the minimum amount of free stack which each
task has ever had available in the sysrq-T and sysrq-P debug output.
@@ -49,6 +51,7 @@ config DEBUG_PAGEALLOC
config DEBUG_RODATA
bool "Write protect kernel read-only data structures"
depends on DEBUG_KERNEL
+ default y
help
Mark the kernel read-only data as write-protected in the pagetables,
in order to catch accidental (and incorrect) writes to such const
@@ -59,6 +62,7 @@ config DEBUG_RODATA
config 4KSTACKS
bool "Use 4Kb for kernel stacks instead of 8Kb"
depends on DEBUG_KERNEL
+ default y
help
If you say Y here the kernel will use a 4Kb stacksize for the
kernel stack attached to each process/thread. This facilitates
Index: linux/arch/i386/boot/compressed/misc.c
===================================================================
--- linux.orig/arch/i386/boot/compressed/misc.c
+++ linux/arch/i386/boot/compressed/misc.c
@@ -15,6 +15,12 @@
#include
#include
+#ifdef CONFIG_MCOUNT
+void notrace mcount(void)
+{
+}
+#endif
+
/*
* gzip declarations
*/
@@ -107,7 +113,7 @@ static long free_mem_end_ptr;
#define INPLACE_MOVE_ROUTINE 0x1000
#define LOW_BUFFER_START 0x2000
#define LOW_BUFFER_MAX 0x90000
-#define HEAP_SIZE 0x3000
+#define HEAP_SIZE 0x4000
static unsigned int low_buffer_end, low_buffer_size;
static int high_loaded =0;
static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/;
Index: linux/arch/i386/kernel/Makefile
===================================================================
--- linux.orig/arch/i386/kernel/Makefile
+++ linux/arch/i386/kernel/Makefile
@@ -4,7 +4,7 @@
extra-y := head.o init_task.o vmlinux.lds
-obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \
+obj-y := process.o signal.o entry.o traps.o irq.o \
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
pci-dma.o i386_ksyms.o i387.o bootflag.o \
quirks.o i8237.o topology.o alternative.o i8253.o tsc.o
@@ -12,6 +12,7 @@ obj-y := process.o semaphore.o signal.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
obj-y += acpi/
+obj-$(CONFIG_GENERIC_TIME_VSYSCALL) += vsyscall-gtod.o
obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o
obj-$(CONFIG_MCA) += mca.o
obj-$(CONFIG_X86_MSR) += msr.o
@@ -20,6 +21,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_APM) += apm.o
obj-$(CONFIG_X86_SMP) += smp.o smpboot.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
+obj-$(CONFIG_MCOUNT) += mcount-wrapper.o
obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
@@ -30,6 +32,7 @@ obj-$(CONFIG_X86_NUMAQ) += numaq.o
obj-$(CONFIG_X86_SUMMIT_NUMA) += summit.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_MODULES) += module.o
+obj-$(CONFIG_ASM_SEMAPHORES) += semaphore.o
obj-y += sysenter.o vsyscall.o
obj-$(CONFIG_ACPI_SRAT) += srat.o
obj-$(CONFIG_HPET_TIMER) += time_hpet.o
Index: linux/arch/i386/kernel/acpi/boot.c
===================================================================
--- linux.orig/arch/i386/kernel/acpi/boot.c
+++ linux/arch/i386/kernel/acpi/boot.c
@@ -53,8 +53,6 @@ static inline int acpi_madt_oem_check(ch
#include
#endif /* CONFIG_X86_LOCAL_APIC */
-static inline int gsi_irq_sharing(int gsi) { return gsi; }
-
#endif /* X86 */
#define BAD_MADT_ENTRY(entry, end) ( \
@@ -459,12 +457,7 @@ void __init acpi_pic_sci_set_trigger(uns
int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
{
-#ifdef CONFIG_X86_IO_APIC
- if (use_pci_vector() && !platform_legacy_irq(gsi))
- *irq = IO_APIC_VECTOR(gsi);
- else
-#endif
- *irq = gsi_irq_sharing(gsi);
+ *irq = gsi;
return 0;
}
@@ -575,6 +568,7 @@ static int __init acpi_parse_sbf(unsigne
}
#ifdef CONFIG_HPET_TIMER
+#include
static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
{
@@ -595,21 +589,13 @@ static int __init acpi_parse_hpet(unsign
return -1;
}
#ifdef CONFIG_X86_64
- vxtime.hpet_address = hpet_tbl->addr.addrl |
+ hpet_address = hpet_tbl->addr.addrl |
((long)hpet_tbl->addr.addrh << 32);
-
+#else
+ hpet_address = hpet_tbl->addr.addrl;
+#endif
printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
- hpet_tbl->id, vxtime.hpet_address);
-#else /* X86 */
- {
- extern unsigned long hpet_address;
-
- hpet_address = hpet_tbl->addr.addrl;
- printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
- hpet_tbl->id, hpet_address);
- }
-#endif /* X86 */
-
+ hpet_tbl->id, hpet_address);
return 0;
}
#else
Index: linux/arch/i386/kernel/apic.c
===================================================================
--- linux.orig/arch/i386/kernel/apic.c
+++ linux/arch/i386/kernel/apic.c
@@ -25,6 +25,7 @@
#include
#include
#include
+#include
#include
#include
@@ -59,6 +60,23 @@ int enable_local_apic __initdata = 0; /*
*/
int apic_verbosity;
+static unsigned int calibration_result;
+
+static void lapic_next_event(unsigned long delta, struct clock_event *evt);
+static void lapic_timer_setup(int mode, struct clock_event *evt);
+
+static struct clock_event lapic_clockevent = {
+ .name = "lapic",
+ .capabilities = CLOCK_CAP_NEXTEVT | CLOCK_CAP_PROFILE
+#ifdef CONFIG_SMP
+ | CLOCK_CAP_UPDATE
+#endif
+ ,
+ .shift = 32,
+ .set_mode = lapic_timer_setup,
+ .set_next_event = lapic_next_event,
+};
+static DEFINE_PER_CPU(struct clock_event, lapic_events);
static void apic_pm_activate(void);
@@ -909,6 +927,11 @@ fake_ioapic_page:
*/
/*
+ * FIXME: Move this to i8253.h. There is no need to keep the access to
+ * the PIT scattered all around the place -tglx
+ */
+
+/*
* The timer chip is already set up at HZ interrupts per second here,
* but we do not accept timer interrupts yet. We only allow the BP
* to calibrate.
@@ -966,13 +989,15 @@ void (*wait_timer_tick)(void) __devinitd
#define APIC_DIVISOR 16
-static void __setup_APIC_LVTT(unsigned int clocks)
+static void __setup_APIC_LVTT(unsigned int clocks, int oneshot)
{
unsigned int lvtt_value, tmp_value, ver;
int cpu = smp_processor_id();
ver = GET_APIC_VERSION(apic_read(APIC_LVR));
- lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
+ lvtt_value = LOCAL_TIMER_VECTOR;
+ if (!oneshot)
+ lvtt_value |= APIC_LVT_TIMER_PERIODIC;
if (!APIC_INTEGRATED(ver))
lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
@@ -989,23 +1014,31 @@ static void __setup_APIC_LVTT(unsigned i
& ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
| APIC_TDR_DIV_16);
- apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
+ if (!oneshot)
+ apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
+}
+
+static void lapic_next_event(unsigned long delta, struct clock_event *evt)
+{
+ apic_write_around(APIC_TMICT, delta);
}
-static void __devinit setup_APIC_timer(unsigned int clocks)
+static void lapic_timer_setup(int mode, struct clock_event *evt)
{
unsigned long flags;
local_irq_save(flags);
+ __setup_APIC_LVTT(calibration_result, mode != CLOCK_EVT_PERIODIC);
+ local_irq_restore(flags);
+}
- /*
- * Wait for IRQ0's slice:
- */
- wait_timer_tick();
+static void __devinit setup_APIC_timer(void)
+{
+ struct clock_event *levt = &__get_cpu_var(lapic_events);
- __setup_APIC_LVTT(clocks);
+ memcpy(levt, &lapic_clockevent, sizeof(*levt));
- local_irq_restore(flags);
+ register_local_clockevent(levt);
}
/*
@@ -1014,6 +1047,8 @@ static void __devinit setup_APIC_timer(u
* to calibrate, since some later bootup code depends on getting
* the first irq? Ugh.
*
+ * TODO: Fix this rather than saying "Ugh" -tglx
+ *
* We want to do the calibration only once since we
* want to have local timer irqs syncron. CPUs connected
* by the same APIC bus have the very same bus frequency.
@@ -1036,7 +1071,7 @@ static int __init calibrate_APIC_clock(v
* value into the APIC clock, we just want to get the
* counter running for calibration.
*/
- __setup_APIC_LVTT(1000000000);
+ __setup_APIC_LVTT(1000000000, 0);
/*
* The timer chip counts down to zero. Let's wait
@@ -1073,6 +1108,14 @@ static int __init calibrate_APIC_clock(v
result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
+ /* Calculate the scaled math multiplication factor */
+ lapic_clockevent.mult = div_sc(tt1-tt2, TICK_NSEC * LOOPS, 32);
+ lapic_clockevent.max_delta_ns =
+ clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
+ printk("lapic max_delta_ns: %ld\n", lapic_clockevent.max_delta_ns);
+ lapic_clockevent.min_delta_ns =
+ clockevent_delta2ns(0xF, &lapic_clockevent);
+
if (cpu_has_tsc)
apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
"%ld.%04ld MHz.\n",
@@ -1087,8 +1130,6 @@ static int __init calibrate_APIC_clock(v
return result;
}
-static unsigned int calibration_result;
-
void __init setup_boot_APIC_clock(void)
{
unsigned long flags;
@@ -1101,14 +1142,14 @@ void __init setup_boot_APIC_clock(void)
/*
* Now set up the timer for real.
*/
- setup_APIC_timer(calibration_result);
+ setup_APIC_timer();
local_irq_restore(flags);
}
void __devinit setup_secondary_APIC_clock(void)
{
- setup_APIC_timer(calibration_result);
+ setup_APIC_timer();
}
void disable_APIC_timer(void)
@@ -1154,6 +1195,13 @@ void switch_APIC_timer_to_ipi(void *cpum
!cpu_isset(cpu, timer_bcast_ipi)) {
disable_APIC_timer();
cpu_set(cpu, timer_bcast_ipi);
+#ifdef CONFIG_HIGH_RES_TIMERS
+ printk("Disabling NO_HZ and high resolution timers "
+ "due to timer broadcasting\n");
+ for_each_possible_cpu(cpu)
+ per_cpu(lapic_events, cpu).capabilities &=
+ ~CLOCK_CAP_NEXTEVT;
+#endif
}
}
EXPORT_SYMBOL(switch_APIC_timer_to_ipi);
@@ -1190,6 +1238,8 @@ inline void smp_local_timer_interrupt(st
update_process_times(user_mode_vm(regs));
#endif
+ trace_special(regs->eip, 0, 0);
+
/*
* We take the 'long' return path, and there every subsystem
* grabs the apropriate locks (kernel lock/ irq lock).
@@ -1211,15 +1261,18 @@ inline void smp_local_timer_interrupt(st
* interrupt as well. Thus we cannot inline the local irq ... ]
*/
-fastcall void smp_apic_timer_interrupt(struct pt_regs *regs)
+fastcall notrace void smp_apic_timer_interrupt(struct pt_regs *regs)
{
int cpu = smp_processor_id();
+ struct clock_event *evt = &per_cpu(lapic_events, cpu);
/*
* the NMI deadlock-detector uses this.
*/
per_cpu(irq_stat, cpu).apic_timer_irqs++;
+ trace_special(regs->eip, 0, 0);
+
/*
* NOTE! We'd better ACK the irq immediately,
* because timer handling can be slow.
@@ -1231,7 +1284,15 @@ fastcall void smp_apic_timer_interrupt(s
* interrupt lock, which is the WrongThing (tm) to do.
*/
irq_enter();
- smp_local_timer_interrupt(regs);
+ /*
+ * If the task is currently running in user mode, don't
+ * detect soft lockups. If CONFIG_DETECT_SOFTLOCKUP is not
+ * configured, this should be optimized out.
+ */
+ if (user_mode(regs))
+ touch_softlockup_watchdog();
+
+ evt->event_handler(regs);
irq_exit();
}
@@ -1240,6 +1301,8 @@ static void up_apic_timer_interrupt_call
{
int cpu = smp_processor_id();
+ trace_special(regs->eip, 1, 0);
+
/*
* the NMI deadlock-detector uses this.
*/
@@ -1323,6 +1386,7 @@ fastcall void smp_error_interrupt(struct
*/
printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n",
smp_processor_id(), v , v1);
+ dump_stack();
irq_exit();
}
Index: linux/arch/i386/kernel/apm.c
===================================================================
--- linux.orig/arch/i386/kernel/apm.c
+++ linux/arch/i386/kernel/apm.c
@@ -233,7 +233,6 @@
#include "io_ports.h"
-extern unsigned long get_cmos_time(void);
extern void machine_real_restart(unsigned char *, int);
#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
@@ -1152,26 +1151,6 @@ out:
spin_unlock(&user_list_lock);
}
-static void set_time(void)
-{
- if (got_clock_diff) { /* Must know time zone in order to set clock */
- xtime.tv_sec = get_cmos_time() + clock_cmos_diff;
- xtime.tv_nsec = 0;
- }
-}
-
-static void get_time_diff(void)
-{
-#ifndef CONFIG_APM_RTC_IS_GMT
- /*
- * Estimate time zone so that set_time can update the clock
- */
- clock_cmos_diff = -get_cmos_time();
- clock_cmos_diff += get_seconds();
- got_clock_diff = 1;
-#endif
-}
-
static void reinit_timer(void)
{
#ifdef INIT_TIMER_AFTER_SUSPEND
@@ -1211,19 +1190,6 @@ static int suspend(int vetoable)
local_irq_disable();
device_power_down(PMSG_SUSPEND);
- /* serialize with the timer interrupt */
- write_seqlock(&xtime_lock);
-
- /* protect against access to timer chip registers */
- spin_lock(&i8253_lock);
-
- get_time_diff();
- /*
- * Irq spinlock must be dropped around set_system_power_state.
- * We'll undo any timer changes due to interrupts below.
- */
- spin_unlock(&i8253_lock);
- write_sequnlock(&xtime_lock);
local_irq_enable();
save_processor_state();
@@ -1232,13 +1198,7 @@ static int suspend(int vetoable)
restore_processor_state();
local_irq_disable();
- write_seqlock(&xtime_lock);
- spin_lock(&i8253_lock);
reinit_timer();
- set_time();
-
- spin_unlock(&i8253_lock);
- write_sequnlock(&xtime_lock);
if (err == APM_NO_ERROR)
err = APM_SUCCESS;
@@ -1267,11 +1227,6 @@ static void standby(void)
local_irq_disable();
device_power_down(PMSG_SUSPEND);
- /* serialize with the timer interrupt */
- write_seqlock(&xtime_lock);
- /* If needed, notify drivers here */
- get_time_diff();
- write_sequnlock(&xtime_lock);
local_irq_enable();
err = set_system_power_state(APM_STATE_STANDBY);
@@ -1365,9 +1320,6 @@ static void check_events(void)
ignore_bounce = 1;
if ((event != APM_NORMAL_RESUME)
|| (ignore_normal_resume == 0)) {
- write_seqlock_irq(&xtime_lock);
- set_time();
- write_sequnlock_irq(&xtime_lock);
device_resume();
pm_send_all(PM_RESUME, (void *)0);
queue_event(event, NULL);
@@ -1383,9 +1335,6 @@ static void check_events(void)
break;
case APM_UPDATE_TIME:
- write_seqlock_irq(&xtime_lock);
- set_time();
- write_sequnlock_irq(&xtime_lock);
break;
case APM_CRITICAL_SUSPEND:
Index: linux/arch/i386/kernel/cpu/mtrr/generic.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/mtrr/generic.c
+++ linux/arch/i386/kernel/cpu/mtrr/generic.c
@@ -234,7 +234,7 @@ static unsigned long set_mtrr_state(u32
static unsigned long cr4 = 0;
static u32 deftype_lo, deftype_hi;
-static DEFINE_SPINLOCK(set_atomicity_lock);
+static DEFINE_RAW_SPINLOCK(set_atomicity_lock);
/*
* Since we are disabling the cache don't allow any interrupts - they
Index: linux/arch/i386/kernel/cpu/mtrr/main.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/mtrr/main.c
+++ linux/arch/i386/kernel/cpu/mtrr/main.c
@@ -135,8 +135,6 @@ struct set_mtrr_data {
mtrr_type smp_type;
};
-#ifdef CONFIG_SMP
-
static void ipi_handler(void *info)
/* [SUMMARY] Synchronisation handler. Executed by "other" CPUs.
[RETURNS] Nothing.
@@ -166,8 +164,6 @@ static void ipi_handler(void *info)
local_irq_restore(flags);
}
-#endif
-
/**
* set_mtrr - update mtrrs on all processors
* @reg: mtrr in question
Index: linux/arch/i386/kernel/cpu/transmeta.c
===================================================================
--- linux.orig/arch/i386/kernel/cpu/transmeta.c
+++ linux/arch/i386/kernel/cpu/transmeta.c
@@ -9,7 +9,8 @@ static void __init init_transmeta(struct
{
unsigned int cap_mask, uk, max, dummy;
unsigned int cms_rev1, cms_rev2;
- unsigned int cpu_rev, cpu_freq, cpu_flags, new_cpu_rev;
+ unsigned int cpu_rev, cpu_freq = 0 /* shut up gcc warning */,
+ cpu_flags, new_cpu_rev;
char cpu_info[65];
get_model_name(c); /* Same as AMD/Cyrix */
Index: linux/arch/i386/kernel/entry.S
===================================================================
--- linux.orig/arch/i386/kernel/entry.S
+++ linux/arch/i386/kernel/entry.S
@@ -248,14 +248,18 @@ ENTRY(resume_userspace)
#ifdef CONFIG_PREEMPT
ENTRY(resume_kernel)
cli
+ cmpl $0, kernel_preemption
+ jz restore_nocheck
cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
jnz restore_nocheck
need_resched:
movl TI_flags(%ebp), %ecx # need_resched set ?
testb $_TIF_NEED_RESCHED, %cl
- jz restore_all
+ jz restore_nocheck
testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ?
- jz restore_all
+ jz restore_nocheck
+ cli
+ TRACE_IRQS_OFF
call preempt_schedule_irq
jmp need_resched
#endif
@@ -311,6 +315,11 @@ sysenter_past_esp:
pushl %eax
CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
+#ifdef CONFIG_LATENCY_TRACE
+ pushl %edx; pushl %ecx; pushl %ebx; pushl %eax
+ call sys_call
+ popl %eax; popl %ebx; popl %ecx; popl %edx
+#endif
GET_THREAD_INFO(%ebp)
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
@@ -325,6 +334,11 @@ sysenter_past_esp:
movl TI_flags(%ebp), %ecx
testw $_TIF_ALLWORK_MASK, %cx
jne syscall_exit_work
+#ifdef CONFIG_LATENCY_TRACE
+ pushl %eax
+ call sys_ret
+ popl %eax
+#endif
/* if something modifies registers it must also disable sysexit */
movl EIP(%esp), %edx
movl OLDESP(%esp), %ecx
@@ -341,6 +355,11 @@ ENTRY(system_call)
pushl %eax # save orig_eax
CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
+#ifdef CONFIG_LATENCY_TRACE
+ pushl %edx; pushl %ecx; pushl %ebx; pushl %eax
+ call sys_call
+ popl %eax; popl %ebx; popl %ecx; popl %edx
+#endif
GET_THREAD_INFO(%ebp)
testl $TF_MASK,EFLAGS(%esp)
jz no_singlestep
@@ -430,19 +449,20 @@ ldt_ss:
ALIGN
RING0_PTREGS_FRAME # can't unwind into user space anyway
work_pending:
- testb $_TIF_NEED_RESCHED, %cl
+ testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED), %ecx
jz work_notifysig
work_resched:
- call schedule
- cli # make sure we don't miss an interrupt
+ cli
+ TRACE_IRQS_OFF
+ call __schedule
+ # make sure we don't miss an interrupt
# setting need_resched or sigpending
# between sampling and the iret
- TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
# than syscall tracing?
jz restore_all
- testb $_TIF_NEED_RESCHED, %cl
+ testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED), %ecx
jnz work_resched
work_notifysig: # deal with pending signals and
Index: linux/arch/i386/kernel/head.S
===================================================================
--- linux.orig/arch/i386/kernel/head.S
+++ linux/arch/i386/kernel/head.S
@@ -397,6 +397,7 @@ ignore_int:
call printk
#endif
addl $(5*4),%esp
+ call dump_stack
popl %ds
popl %es
popl %edx
Index: linux/arch/i386/kernel/i386_ksyms.c
===================================================================
--- linux.orig/arch/i386/kernel/i386_ksyms.c
+++ linux/arch/i386/kernel/i386_ksyms.c
@@ -2,10 +2,12 @@
#include
#include
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_failed_interruptible);
-EXPORT_SYMBOL(__down_failed_trylock);
-EXPORT_SYMBOL(__up_wakeup);
+#ifdef CONFIG_ASM_SEMAPHORES
+EXPORT_SYMBOL(__compat_down_failed);
+EXPORT_SYMBOL(__compat_down_failed_interruptible);
+EXPORT_SYMBOL(__compat_down_failed_trylock);
+EXPORT_SYMBOL(__compat_up_wakeup);
+#endif
/* Networking helper routines. */
EXPORT_SYMBOL(csum_partial_copy_generic);
@@ -20,7 +22,7 @@ EXPORT_SYMBOL(__put_user_8);
EXPORT_SYMBOL(strstr);
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && defined(CONFIG_ASM_SEMAPHORES)
extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
EXPORT_SYMBOL(__write_lock_failed);
Index: linux/arch/i386/kernel/i8253.c
===================================================================
--- linux.orig/arch/i386/kernel/i8253.c
+++ linux/arch/i386/kernel/i8253.c
@@ -2,7 +2,7 @@
* i8253.c 8253/PIT functions
*
*/
-#include
+#include
#include
#include
#include
@@ -16,22 +16,66 @@
#include "io_ports.h"
-DEFINE_SPINLOCK(i8253_lock);
+DEFINE_RAW_SPINLOCK(i8253_lock);
EXPORT_SYMBOL(i8253_lock);
-void setup_pit_timer(void)
+static void init_pit_timer(int mode, struct clock_event *evt)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8253_lock, flags);
+
+ switch(mode) {
+ case CLOCK_EVT_PERIODIC:
+ /* binary, mode 2, LSB/MSB, ch 0 */
+ outb_p(0x34, PIT_MODE);
+ udelay(10);
+ outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
+ outb(LATCH >> 8 , PIT_CH0); /* MSB */
+ break;
+
+ case CLOCK_EVT_ONESHOT:
+ case CLOCK_EVT_SHUTDOWN:
+ /* One shot setup */
+ outb_p(0x38, PIT_MODE);
+ udelay(10);
+ break;
+ }
+ spin_unlock_irqrestore(&i8253_lock, flags);
+}
+
+static void pit_next_event(unsigned long delta, struct clock_event *evt)
{
unsigned long flags;
spin_lock_irqsave(&i8253_lock, flags);
- outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
- udelay(10);
- outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
- udelay(10);
- outb(LATCH >> 8 , PIT_CH0); /* MSB */
+ outb_p(delta & 0xff , PIT_CH0); /* LSB */
+ outb(delta >> 8 , PIT_CH0); /* MSB */
spin_unlock_irqrestore(&i8253_lock, flags);
}
+struct clock_event pit_clockevent = {
+ .name = "pit",
+ .capabilities = CLOCK_CAP_TICK | CLOCK_CAP_PROFILE | CLOCK_CAP_UPDATE
+#ifndef CONFIG_SMP
+ | CLOCK_CAP_NEXTEVT
+#endif
+ ,
+ .set_mode = init_pit_timer,
+ .set_next_event = pit_next_event,
+ .shift = 32,
+};
+
+void setup_pit_timer(void)
+{
+ pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, 32);
+ pit_clockevent.max_delta_ns =
+ clockevent_delta2ns(0x7FFF, &pit_clockevent);
+ pit_clockevent.min_delta_ns =
+ clockevent_delta2ns(0xF, &pit_clockevent);
+ register_global_clockevent(&pit_clockevent);
+}
+
/*
* Since the PIT overflows every tick, its not very useful
* to just read by itself. So use jiffies to emulate a free
@@ -46,7 +90,7 @@ static cycle_t pit_read(void)
static u32 old_jifs;
spin_lock_irqsave(&i8253_lock, flags);
- /*
+ /*
* Although our caller may have the read side of xtime_lock,
* this is now a seqlock, and we are cheating in this routine
* by having side effects on state that we cannot undo if
Index: linux/arch/i386/kernel/i8259.c
===================================================================
--- linux.orig/arch/i386/kernel/i8259.c
+++ linux/arch/i386/kernel/i8259.c
@@ -34,39 +34,21 @@
* moves to arch independent land
*/
-DEFINE_SPINLOCK(i8259A_lock);
-
-static void end_8259A_irq (unsigned int irq)
-{
- if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)) &&
- irq_desc[irq].action)
- enable_8259A_irq(irq);
-}
-
-#define shutdown_8259A_irq disable_8259A_irq
-
static void mask_and_ack_8259A(unsigned int);
-unsigned int startup_8259A_irq(unsigned int irq)
-{
- enable_8259A_irq(irq);
- return 0; /* never anything pending */
-}
-
-static struct hw_interrupt_type i8259A_irq_type = {
- .typename = "XT-PIC",
- .startup = startup_8259A_irq,
- .shutdown = shutdown_8259A_irq,
- .enable = enable_8259A_irq,
- .disable = disable_8259A_irq,
- .ack = mask_and_ack_8259A,
- .end = end_8259A_irq,
+static struct irq_chip i8259A_chip = {
+ .name = "XT-PIC",
+ .mask = disable_8259A_irq,
+ .unmask = enable_8259A_irq,
+ .mask_ack = mask_and_ack_8259A,
};
/*
* 8259A PIC functions to handle ISA devices:
*/
+DEFINE_RAW_SPINLOCK(i8259A_lock);
+
/*
* This contains the irq mask for both 8259A irq controllers,
*/
@@ -131,7 +113,7 @@ void make_8259A_irq(unsigned int irq)
{
disable_irq_nosync(irq);
io_apic_irqs &= ~(1<
#include
#include
+#include
#include
#include
@@ -38,6 +39,7 @@
#include
#include
#include
+#include
#include
@@ -49,8 +51,8 @@ atomic_t irq_mis_count;
/* Where if anywhere is the i8259 connect in external int mode */
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
-static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
+static DEFINE_RAW_SPINLOCK(ioapic_lock);
+static DEFINE_RAW_SPINLOCK(vector_lock);
int timer_over_8254 __initdata = 1;
@@ -85,14 +87,6 @@ static struct irq_pin_list {
int apic, pin, next;
} irq_2_pin[PIN_MAP_SIZE];
-int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
-#ifdef CONFIG_PCI_MSI
-#define vector_to_irq(vector) \
- (platform_legacy_irq(vector) ? vector : vector_irq[vector])
-#else
-#define vector_to_irq(vector) (vector)
-#endif
-
/*
* The common case is 1:1 IRQ<->pin mappings. Sometimes there are
* shared ISA-space IRQs, so we have to support them. We are super
@@ -136,6 +130,105 @@ static void __init replace_pin_at_irq(un
}
}
+//#define IOAPIC_CACHE
+
+#ifdef IOAPIC_CACHE
+# define MAX_IOAPIC_CACHE 512
+
+/*
+ * Cache register values:
+ */
+static unsigned int io_apic_cache[MAX_IO_APICS][MAX_IOAPIC_CACHE]
+ ____cacheline_aligned_in_smp;
+#endif
+
+inline unsigned int __raw_io_apic_read(unsigned int apic, unsigned int reg)
+{
+ *IO_APIC_BASE(apic) = reg;
+ return *(IO_APIC_BASE(apic)+4);
+}
+
+unsigned int raw_io_apic_read(unsigned int apic, unsigned int reg)
+{
+ unsigned int val = __raw_io_apic_read(apic, reg);
+
+#ifdef IOAPIC_CACHE
+ io_apic_cache[apic][reg] = val;
+#endif
+ return val;
+}
+
+unsigned int io_apic_read(unsigned int apic, unsigned int reg)
+{
+#ifdef IOAPIC_CACHE
+ if (unlikely(reg >= MAX_IOAPIC_CACHE)) {
+ static int once = 1;
+
+ if (once) {
+ once = 0;
+ printk("WARNING: ioapic register cache overflow: %d.\n",
+ reg);
+ dump_stack();
+ }
+ return __raw_io_apic_read(apic, reg);
+ }
+ if (io_apic_cache[apic][reg] && !sis_apic_bug)
+ return io_apic_cache[apic][reg];
+#endif
+ return raw_io_apic_read(apic, reg);
+}
+
+void io_apic_write(unsigned int apic, unsigned int reg, unsigned int val)
+{
+#ifdef IOAPIC_CACHE
+ if (unlikely(reg >= MAX_IOAPIC_CACHE)) {
+ static int once = 1;
+
+ if (once) {
+ once = 0;
+ printk("WARNING: ioapic register cache overflow: %d.\n",
+ reg);
+ dump_stack();
+ }
+ } else
+ io_apic_cache[apic][reg] = val;
+#endif
+ *IO_APIC_BASE(apic) = reg;
+ *(IO_APIC_BASE(apic)+4) = val;
+}
+
+/*
+ * Some systems need a POST flush or else level-triggered interrupts
+ * generate lots of spurious interrupts due to the POST-ed write not
+ * reaching the IOAPIC before the IRQ is ACK-ed in the local APIC.
+ */
+#ifdef CONFIG_SMP
+# define IOAPIC_POSTFLUSH
+#endif
+
+/*
+ * Re-write a value: to be used for read-modify-write
+ * cycles where the read already set up the index register.
+ *
+ * Older SiS APIC requires we rewrite the index regiser
+ */
+void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val)
+{
+#ifdef IOAPIC_CACHE
+ io_apic_cache[apic][reg] = val;
+#endif
+ if (unlikely(sis_apic_bug))
+ *IO_APIC_BASE(apic) = reg;
+ *(IO_APIC_BASE(apic)+4) = val;
+#ifndef IOAPIC_POSTFLUSH
+ if (unlikely(sis_apic_bug))
+#endif
+ /*
+ * Force POST flush by reading:
+ */
+ val = *(IO_APIC_BASE(apic)+4);
+}
+
static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable)
{
struct irq_pin_list *entry = irq_2_pin + irq;
@@ -167,18 +260,6 @@ static void __unmask_IO_APIC_irq (unsign
__modify_IO_APIC_irq(irq, 0, 0x00010000);
}
-/* mask = 1, trigger = 0 */
-static void __mask_and_edge_IO_APIC_irq (unsigned int irq)
-{
- __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
-}
-
-/* mask = 0, trigger = 1 */
-static void __unmask_and_level_IO_APIC_irq (unsigned int irq)
-{
- __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
-}
-
static void mask_IO_APIC_irq (unsigned int irq)
{
unsigned long flags;
@@ -258,7 +339,7 @@ static void set_ioapic_affinity_irq(unsi
break;
entry = irq_2_pin + entry->next;
}
- set_irq_info(irq, cpumask);
+ set_native_irq_info(irq, cpumask);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
@@ -1159,46 +1240,45 @@ static inline int IO_APIC_irq_trigger(in
/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
-int assign_irq_vector(int irq)
+static int __assign_irq_vector(int irq)
{
static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
- unsigned long flags;
int vector;
- BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
+ BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
- spin_lock_irqsave(&vector_lock, flags);
-
- if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
- spin_unlock_irqrestore(&vector_lock, flags);
+ if (IO_APIC_VECTOR(irq) > 0)
return IO_APIC_VECTOR(irq);
- }
-next:
+
current_vector += 8;
if (current_vector == SYSCALL_VECTOR)
- goto next;
+ current_vector += 8;
if (current_vector >= FIRST_SYSTEM_VECTOR) {
offset++;
- if (!(offset%8)) {
- spin_unlock_irqrestore(&vector_lock, flags);
+ if (!(offset % 8))
return -ENOSPC;
- }
current_vector = FIRST_DEVICE_VECTOR + offset;
}
vector = current_vector;
- vector_irq[vector] = irq;
- if (irq != AUTO_ASSIGN)
- IO_APIC_VECTOR(irq) = vector;
+ IO_APIC_VECTOR(irq) = vector;
+ return vector;
+}
+
+static int assign_irq_vector(int irq)
+{
+ unsigned long flags;
+ int vector;
+
+ spin_lock_irqsave(&vector_lock, flags);
+ vector = __assign_irq_vector(irq);
spin_unlock_irqrestore(&vector_lock, flags);
return vector;
}
-
-static struct hw_interrupt_type ioapic_level_type;
-static struct hw_interrupt_type ioapic_edge_type;
+static struct irq_chip ioapic_chip;
#define IOAPIC_AUTO -1
#define IOAPIC_EDGE 0
@@ -1206,16 +1286,17 @@ static struct hw_interrupt_type ioapic_e
static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
{
- unsigned idx;
-
- idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
-
if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
- trigger == IOAPIC_LEVEL)
- irq_desc[idx].chip = &ioapic_level_type;
- else
- irq_desc[idx].chip = &ioapic_edge_type;
- set_intr_gate(vector, interrupt[idx]);
+ trigger == IOAPIC_LEVEL) {
+#ifdef CONFIG_PREEMPT_HARDIRQS
+ set_irq_chip_and_handler(irq, &ioapic_chip, handle_level_irq);
+#else
+ set_irq_chip_and_handler(irq, &ioapic_chip, handle_fasteoi_irq);
+#endif
+ } else {
+ set_irq_chip_and_handler(irq, &ioapic_chip, handle_edge_irq);
+ }
+ set_intr_gate(vector, interrupt[irq]);
}
static void __init setup_IO_APIC_irqs(void)
@@ -1326,7 +1407,8 @@ static void __init setup_ExtINT_IRQ0_pin
* The timer IRQ doesn't have to know that behind the
* scene we have a 8259A-master in AEOI mode ...
*/
- irq_desc[0].chip = &ioapic_edge_type;
+ irq_desc[0].chip = &ioapic_chip;
+ set_irq_handler(0, handle_edge_irq);
/*
* Add it to the IO-APIC irq-routing table:
@@ -1445,8 +1527,8 @@ void __init print_IO_APIC(void)
struct IO_APIC_route_entry entry;
spin_lock_irqsave(&ioapic_lock, flags);
- *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
- *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
+ *(((int *)&entry)+0) = raw_io_apic_read(apic, 0x10+i*2);
+ *(((int *)&entry)+1) = raw_io_apic_read(apic, 0x11+i*2);
spin_unlock_irqrestore(&ioapic_lock, flags);
printk(KERN_DEBUG " %02x %03X %02X ",
@@ -1467,17 +1549,12 @@ void __init print_IO_APIC(void)
);
}
}
- if (use_pci_vector())
- printk(KERN_INFO "Using vector-based indexing\n");
printk(KERN_DEBUG "IRQ to pin mappings:\n");
for (i = 0; i < NR_IRQS; i++) {
struct irq_pin_list *entry = irq_2_pin + i;
if (entry->pin < 0)
continue;
- if (use_pci_vector() && !platform_legacy_irq(i))
- printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
- else
- printk(KERN_DEBUG "IRQ%d ", i);
+ printk(KERN_DEBUG "IRQ%d ", i);
for (;;) {
printk("-> %d:%d", entry->apic, entry->pin);
if (!entry->next)
@@ -1492,7 +1569,7 @@ void __init print_IO_APIC(void)
return;
}
-#if 0
+#if 1
static void print_APIC_bitfield (int base)
{
@@ -1893,7 +1970,7 @@ static int __init timer_irq_works(void)
* might have cached one ExtINT interrupt. Finally, at
* least one tick may be lost due to delays.
*/
- if (jiffies - t1 > 4)
+ if (jiffies - t1 > 4 && jiffies - t1 < 16)
return 1;
return 0;
@@ -1913,6 +1990,8 @@ static int __init timer_irq_works(void)
*/
/*
+ * Startup quirk:
+ *
* Starting up a edge-triggered IO-APIC interrupt is
* nasty - we need to make sure that we get the edge.
* If it is already asserted for some reason, we need
@@ -1920,8 +1999,10 @@ static int __init timer_irq_works(void)
*
* This is not complete - we should be able to fake
* an edge even if it isn't on the 8259A...
+ *
+ * (We do this for level-triggered IRQs too - it cannot hurt.)
*/
-static unsigned int startup_edge_ioapic_irq(unsigned int irq)
+static unsigned int startup_ioapic_irq(unsigned int irq)
{
int was_pending = 0;
unsigned long flags;
@@ -1938,47 +2019,18 @@ static unsigned int startup_edge_ioapic_
return was_pending;
}
-/*
- * Once we have recorded IRQ_PENDING already, we can mask the
- * interrupt for real. This prevents IRQ storms from unhandled
- * devices.
- */
-static void ack_edge_ioapic_irq(unsigned int irq)
-{
- move_irq(irq);
- if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
- == (IRQ_PENDING | IRQ_DISABLED))
- mask_IO_APIC_irq(irq);
- ack_APIC_irq();
-}
-
-/*
- * Level triggered interrupts can just be masked,
- * and shutting down and starting up the interrupt
- * is the same as enabling and disabling them -- except
- * with a startup need to return a "was pending" value.
- *
- * Level triggered interrupts are special because we
- * do not touch any IO-APIC register while handling
- * them. We ack the APIC in the end-IRQ handler, not
- * in the start-IRQ-handler. Protection against reentrance
- * from the same interrupt is still provided, both by the
- * generic IRQ layer and by the fact that an unacked local
- * APIC does not accept IRQs.
- */
-static unsigned int startup_level_ioapic_irq (unsigned int irq)
+static void ack_ioapic_irq(unsigned int irq)
{
- unmask_IO_APIC_irq(irq);
-
- return 0; /* don't check for pending */
+ move_native_irq(irq);
+ ack_APIC_irq();
}
-static void end_level_ioapic_irq (unsigned int irq)
+static void ack_ioapic_quirk_irq(unsigned int irq)
{
unsigned long v;
int i;
- move_irq(irq);
+ move_native_irq(irq);
/*
* It appears there is an erratum which affects at least version 0x11
* of I/O APIC (that's the 82093AA and cores integrated into various
@@ -2007,111 +2059,34 @@ static void end_level_ioapic_irq (unsign
if (!(v & (1 << (i & 0x1f)))) {
atomic_inc(&irq_mis_count);
spin_lock(&ioapic_lock);
- __mask_and_edge_IO_APIC_irq(irq);
- __unmask_and_level_IO_APIC_irq(irq);
+ /* mask = 1, trigger = 0 */
+ __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
+ /* mask = 0, trigger = 1 */
+ __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
spin_unlock(&ioapic_lock);
}
}
-#ifdef CONFIG_PCI_MSI
-static unsigned int startup_edge_ioapic_vector(unsigned int vector)
-{
- int irq = vector_to_irq(vector);
-
- return startup_edge_ioapic_irq(irq);
-}
-
-static void ack_edge_ioapic_vector(unsigned int vector)
-{
- int irq = vector_to_irq(vector);
-
- move_native_irq(vector);
- ack_edge_ioapic_irq(irq);
-}
-
-static unsigned int startup_level_ioapic_vector (unsigned int vector)
-{
- int irq = vector_to_irq(vector);
-
- return startup_level_ioapic_irq (irq);
-}
-
-static void end_level_ioapic_vector (unsigned int vector)
-{
- int irq = vector_to_irq(vector);
-
- move_native_irq(vector);
- end_level_ioapic_irq(irq);
-}
-
-static void mask_IO_APIC_vector (unsigned int vector)
-{
- int irq = vector_to_irq(vector);
-
- mask_IO_APIC_irq(irq);
-}
-
-static void unmask_IO_APIC_vector (unsigned int vector)
-{
- int irq = vector_to_irq(vector);
-
- unmask_IO_APIC_irq(irq);
-}
-
-#ifdef CONFIG_SMP
-static void set_ioapic_affinity_vector (unsigned int vector,
- cpumask_t cpu_mask)
-{
- int irq = vector_to_irq(vector);
-
- set_native_irq_info(vector, cpu_mask);
- set_ioapic_affinity_irq(irq, cpu_mask);
-}
-#endif
-#endif
-
-static int ioapic_retrigger(unsigned int irq)
+static int ioapic_retrigger_irq(unsigned int irq)
{
send_IPI_self(IO_APIC_VECTOR(irq));
return 1;
}
-/*
- * Level and edge triggered IO-APIC interrupts need different handling,
- * so we use two separate IRQ descriptors. Edge triggered IRQs can be
- * handled with the level-triggered descriptor, but that one has slightly
- * more overhead. Level-triggered interrupts cannot be handled with the
- * edge-triggered handler, without risking IRQ storms and other ugly
- * races.
- */
-static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
- .typename = "IO-APIC-edge",
- .startup = startup_edge_ioapic,
- .shutdown = shutdown_edge_ioapic,
- .enable = enable_edge_ioapic,
- .disable = disable_edge_ioapic,
- .ack = ack_edge_ioapic,
- .end = end_edge_ioapic,
+static struct irq_chip ioapic_chip __read_mostly = {
+ .name = "IO-APIC",
+ .startup = startup_ioapic_irq,
+ .mask = mask_IO_APIC_irq,
+ .unmask = unmask_IO_APIC_irq,
+ .ack = ack_ioapic_irq,
+ .eoi = ack_ioapic_quirk_irq,
#ifdef CONFIG_SMP
- .set_affinity = set_ioapic_affinity,
+ .set_affinity = set_ioapic_affinity_irq,
#endif
- .retrigger = ioapic_retrigger,
+ .retrigger = ioapic_retrigger_irq,
};
-static struct hw_interrupt_type ioapic_level_type __read_mostly = {
- .typename = "IO-APIC-level",
- .startup = startup_level_ioapic,
- .shutdown = shutdown_level_ioapic,
- .enable = enable_level_ioapic,
- .disable = disable_level_ioapic,
- .ack = mask_and_ack_level_ioapic,
- .end = end_level_ioapic,
-#ifdef CONFIG_SMP
- .set_affinity = set_ioapic_affinity,
-#endif
- .retrigger = ioapic_retrigger,
-};
static inline void init_IO_APIC_traps(void)
{
@@ -2130,11 +2105,6 @@ static inline void init_IO_APIC_traps(vo
*/
for (irq = 0; irq < NR_IRQS ; irq++) {
int tmp = irq;
- if (use_pci_vector()) {
- if (!platform_legacy_irq(tmp))
- if ((tmp = vector_to_irq(tmp)) == -1)
- continue;
- }
if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
/*
* Hmm.. We don't have an entry for this,
@@ -2145,20 +2115,21 @@ static inline void init_IO_APIC_traps(vo
make_8259A_irq(irq);
else
/* Strange. Oh, well.. */
- irq_desc[irq].chip = &no_irq_type;
+ irq_desc[irq].chip = &no_irq_chip;
}
}
}
-static void enable_lapic_irq (unsigned int irq)
-{
- unsigned long v;
+/*
+ * The local APIC irq-chip implementation:
+ */
- v = apic_read(APIC_LVT0);
- apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
+static void ack_apic(unsigned int irq)
+{
+ ack_APIC_irq();
}
-static void disable_lapic_irq (unsigned int irq)
+static void mask_lapic_irq (unsigned int irq)
{
unsigned long v;
@@ -2166,21 +2137,19 @@ static void disable_lapic_irq (unsigned
apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
}
-static void ack_lapic_irq (unsigned int irq)
+static void unmask_lapic_irq (unsigned int irq)
{
- ack_APIC_irq();
-}
+ unsigned long v;
-static void end_lapic_irq (unsigned int i) { /* nothing */ }
+ v = apic_read(APIC_LVT0);
+ apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
+}
-static struct hw_interrupt_type lapic_irq_type __read_mostly = {
- .typename = "local-APIC-edge",
- .startup = NULL, /* startup_irq() not used for IRQ0 */
- .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
- .enable = enable_lapic_irq,
- .disable = disable_lapic_irq,
- .ack = ack_lapic_irq,
- .end = end_lapic_irq
+static struct irq_chip lapic_chip __read_mostly = {
+ .name = "local-APIC-edge",
+ .mask = mask_lapic_irq,
+ .unmask = unmask_lapic_irq,
+ .eoi = ack_apic,
};
static void setup_nmi (void)
@@ -2361,7 +2330,7 @@ static inline void check_timer(void)
printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
disable_8259A_irq(0);
- irq_desc[0].chip = &lapic_irq_type;
+ set_irq_chip_and_handler(0, &lapic_chip, handle_fasteoi_irq);
apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
enable_8259A_irq(0);
@@ -2543,6 +2512,117 @@ static int __init ioapic_init_sysfs(void
device_initcall(ioapic_init_sysfs);
+#ifdef CONFIG_PCI_MSI
+/*
+ * Dynamic irq allocate and deallocation for MSI
+ */
+int create_irq(void)
+{
+ /* Allocate an unused irq */
+ int irq, new, vector;
+ unsigned long flags;
+
+ irq = -ENOSPC;
+ spin_lock_irqsave(&vector_lock, flags);
+ for (new = (NR_IRQS - 1); new >= 0; new--) {
+ if (platform_legacy_irq(new))
+ continue;
+ if (irq_vector[new] != 0)
+ continue;
+ vector = __assign_irq_vector(new);
+ if (likely(vector > 0))
+ irq = new;
+ break;
+ }
+ spin_unlock_irqrestore(&vector_lock, flags);
+
+ if (irq >= 0) {
+ set_intr_gate(vector, interrupt[irq]);
+ dynamic_irq_init(irq);
+ }
+ return irq;
+}
+
+void destroy_irq(unsigned int irq)
+{
+ unsigned long flags;
+
+ dynamic_irq_cleanup(irq);
+
+ spin_lock_irqsave(&vector_lock, flags);
+ irq_vector[irq] = 0;
+ spin_unlock_irqrestore(&vector_lock, flags);
+}
+#endif /* CONFIG_PCI_MSI */
+
+/*
+ * MSI mesage composition
+ */
+#ifdef CONFIG_PCI_MSI
+static int msi_msg_setup(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
+{
+ /* For now always this code always uses physical delivery
+ * mode.
+ */
+ int vector;
+ unsigned dest;
+
+ vector = assign_irq_vector(irq);
+ if (vector >= 0) {
+ dest = cpu_mask_to_apicid(TARGET_CPUS);
+
+ msg->address_hi = MSI_ADDR_BASE_HI;
+ msg->address_lo =
+ MSI_ADDR_BASE_LO |
+ ((INT_DEST_MODE == 0) ?
+ MSI_ADDR_DEST_MODE_PHYSICAL:
+ MSI_ADDR_DEST_MODE_LOGICAL) |
+ ((INT_DELIVERY_MODE != dest_LowestPrio) ?
+ MSI_ADDR_REDIRECTION_CPU:
+ MSI_ADDR_REDIRECTION_LOWPRI) |
+ MSI_ADDR_DEST_ID(dest);
+
+ msg->data =
+ MSI_DATA_TRIGGER_EDGE |
+ MSI_DATA_LEVEL_ASSERT |
+ ((INT_DELIVERY_MODE != dest_LowestPrio) ?
+ MSI_DATA_DELIVERY_FIXED:
+ MSI_DATA_DELIVERY_LOWPRI) |
+ MSI_DATA_VECTOR(vector);
+ }
+ return vector;
+}
+
+static void msi_msg_teardown(unsigned int irq)
+{
+ return;
+}
+
+static void msi_msg_set_affinity(unsigned int irq, cpumask_t mask, struct msi_msg *msg)
+{
+ int vector;
+ unsigned dest;
+
+ vector = assign_irq_vector(irq);
+ if (vector > 0) {
+ dest = cpu_mask_to_apicid(mask);
+
+ msg->data &= ~MSI_DATA_VECTOR_MASK;
+ msg->data |= MSI_DATA_VECTOR(vector);
+ msg->address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+ msg->address_lo |= MSI_ADDR_DEST_ID(dest);
+ }
+}
+
+struct msi_ops arch_msi_ops = {
+ .needs_64bit_address = 0,
+ .setup = msi_msg_setup,
+ .teardown = msi_msg_teardown,
+ .target = msi_msg_set_affinity,
+};
+
+#endif /* CONFIG_PCI_MSI */
+
/* --------------------------------------------------------------------------
ACPI-based IOAPIC Configuration
-------------------------------------------------------------------------- */
@@ -2697,7 +2777,7 @@ int io_apic_set_pci_routing (int ioapic,
spin_lock_irqsave(&ioapic_lock, flags);
io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
- set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
+ set_native_irq_info(irq, TARGET_CPUS);
spin_unlock_irqrestore(&ioapic_lock, flags);
return 0;
Index: linux/arch/i386/kernel/irq.c
===================================================================
--- linux.orig/arch/i386/kernel/irq.c
+++ linux/arch/i386/kernel/irq.c
@@ -51,10 +51,11 @@ static union irq_ctx *softirq_ctx[NR_CPU
* SMP cross-CPU interrupts have their own specific
* handlers).
*/
-fastcall unsigned int do_IRQ(struct pt_regs *regs)
+fastcall notrace unsigned int do_IRQ(struct pt_regs *regs)
{
/* high bit used in ret_from_ code */
int irq = ~regs->orig_eax;
+ struct irq_desc *desc = irq_desc + irq;
#ifdef CONFIG_4KSTACKS
union irq_ctx *curctx, *irqctx;
u32 *isp;
@@ -67,6 +68,11 @@ fastcall unsigned int do_IRQ(struct pt_r
}
irq_enter();
+#ifdef CONFIG_LATENCY_TRACE
+ if (irq == trace_user_trigger_irq)
+ user_trace_start();
+#endif
+ trace_special(regs->eip, irq, 0);
#ifdef CONFIG_DEBUG_STACKOVERFLOW
/* Debugging check for stack overflow: is there less than 1KB free? */
{
@@ -75,12 +81,25 @@ fastcall unsigned int do_IRQ(struct pt_r
__asm__ __volatile__("andl %%esp,%0" :
"=r" (esp) : "0" (THREAD_SIZE - 1));
if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) {
- printk("do_IRQ: stack overflow: %ld\n",
+ printk("BUG: do_IRQ: stack overflow: %ld\n",
esp - sizeof(struct thread_info));
dump_stack();
}
}
#endif
+#ifdef CONFIG_NO_HZ
+ if (idle_cpu(smp_processor_id())) {
+ update_jiffies();
+ /*
+ * Force polling-idle loops to break out into
+ * the sched-timer setting code, to make sure
+ * that timer interval changes due to __mod_timer()
+ * in IRQ context get properly propagated:
+ */
+ if (tsk_is_polling(current))
+ set_need_resched();
+ }
+#endif
#ifdef CONFIG_4KSTACKS
@@ -94,7 +113,7 @@ fastcall unsigned int do_IRQ(struct pt_r
* current stack (which is the irq stack already after all)
*/
if (curctx != irqctx) {
- int arg1, arg2, ebx;
+ int arg1, arg2, arg3, ebx;
/* build the stack frame on the IRQ stack */
isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
@@ -110,16 +129,17 @@ fastcall unsigned int do_IRQ(struct pt_r
(curctx->tinfo.preempt_count & SOFTIRQ_MASK);
asm volatile(
- " xchgl %%ebx,%%esp \n"
- " call __do_IRQ \n"
+ " xchgl %%ebx,%%esp \n"
+ " call *%%edi \n"
" movl %%ebx,%%esp \n"
- : "=a" (arg1), "=d" (arg2), "=b" (ebx)
- : "0" (irq), "1" (regs), "2" (isp)
- : "memory", "cc", "ecx"
+ : "=a" (arg1), "=d" (arg2), "=c" (arg3), "=b" (ebx)
+ : "0" (irq), "1" (desc), "2" (regs), "3" (isp),
+ "D" (desc->handle_irq)
+ : "memory", "cc"
);
} else
#endif
- __do_IRQ(irq, regs);
+ desc->handle_irq(irq, desc, regs);
irq_exit();
@@ -242,8 +262,10 @@ int show_interrupts(struct seq_file *p,
}
if (i < NR_IRQS) {
- spin_lock_irqsave(&irq_desc[i].lock, flags);
- action = irq_desc[i].action;
+ irq_desc_t *desc = irq_desc + i;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ action = desc->action;
if (!action)
goto skip;
seq_printf(p, "%3d: ",i);
@@ -253,7 +275,22 @@ int show_interrupts(struct seq_file *p,
for_each_online_cpu(j)
seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
#endif
- seq_printf(p, " %14s", irq_desc[i].chip->typename);
+ seq_printf(p, " %-14s", irq_desc[i].chip->name);
+#define F(x,c) ((desc->status & x) ? c : '.')
+ seq_printf(p, " [%c%c%c%c%c%c%c%c%c/",
+ F(IRQ_INPROGRESS, 'I'),
+ F(IRQ_DISABLED, 'D'),
+ F(IRQ_PENDING, 'P'),
+ F(IRQ_REPLAY, 'R'),
+ F(IRQ_AUTODETECT, 'A'),
+ F(IRQ_WAITING, 'W'),
+ F(IRQ_LEVEL, 'L'),
+ F(IRQ_MASKED, 'M'),
+ F(IRQ_NODELAY, 'N'));
+#undef F
+ seq_printf(p, "%3d]", desc->irqs_unhandled);
+
+ seq_printf(p, "-%s", handle_irq_name(irq_desc[i].handle_irq));
seq_printf(p, " %s", action->name);
for (action=action->next; action; action = action->next)
Index: linux/arch/i386/kernel/kprobes.c
===================================================================
--- linux.orig/arch/i386/kernel/kprobes.c
+++ linux/arch/i386/kernel/kprobes.c
@@ -338,7 +338,7 @@ ss_probe:
/* Boost up -- we can execute copied instructions directly */
reset_current_kprobe();
regs->eip = (unsigned long)p->ainsn.insn;
- preempt_enable_no_resched();
+ preempt_enable();
return 1;
}
#endif
@@ -347,7 +347,7 @@ ss_probe:
return 1;
no_kprobe:
- preempt_enable_no_resched();
+ preempt_enable();
return ret;
}
@@ -566,7 +566,7 @@ static int __kprobes post_kprobe_handler
}
reset_current_kprobe();
out:
- preempt_enable_no_resched();
+ preempt_enable();
/*
* if somebody else is singlestepping across a probe point, eflags
@@ -600,7 +600,7 @@ static int __kprobes kprobe_fault_handle
restore_previous_kprobe(kcb);
else
reset_current_kprobe();
- preempt_enable_no_resched();
+ preempt_enable();
break;
case KPROBE_HIT_ACTIVE:
case KPROBE_HIT_SSDONE:
@@ -734,7 +734,7 @@ int __kprobes longjmp_break_handler(stru
*regs = kcb->jprobe_saved_regs;
memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack,
MIN_STACK_SIZE(stack_addr));
- preempt_enable_no_resched();
+ preempt_enable();
return 1;
}
return 0;
Index: linux/arch/i386/kernel/mcount-wrapper.S
===================================================================
--- /dev/null
+++ linux/arch/i386/kernel/mcount-wrapper.S
@@ -0,0 +1,27 @@
+/*
+ * linux/arch/i386/mcount-wrapper.S
+ *
+ * Copyright (C) 2004 Ingo Molnar
+ */
+
+.globl mcount
+mcount:
+
+ cmpl $0, mcount_enabled
+ jz out
+
+ push %ebp
+ mov %esp, %ebp
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+
+ call __mcount
+
+ popl %edx
+ popl %ecx
+ popl %eax
+ popl %ebp
+out:
+ ret
+
Index: linux/arch/i386/kernel/microcode.c
===================================================================
--- linux.orig/arch/i386/kernel/microcode.c
+++ linux/arch/i386/kernel/microcode.c
@@ -115,7 +115,7 @@ module_param(verbose, int, 0644);
#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
/* serialize access to the physical write to MSR 0x79 */
-static DEFINE_SPINLOCK(microcode_update_lock);
+static DEFINE_RAW_SPINLOCK(microcode_update_lock);
/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
static DEFINE_MUTEX(microcode_mutex);
Index: linux/arch/i386/kernel/mpparse.c
===================================================================
--- linux.orig/arch/i386/kernel/mpparse.c
+++ linux/arch/i386/kernel/mpparse.c
@@ -228,12 +228,17 @@ static void __init MP_bus_info (struct m
mpc_oem_bus_info(m, str, translation_table[mpc_record]);
+ /*
+ * mpc_busid is char:
+ */
+#if MAX_MP_BUSSES < 256
if (m->mpc_busid >= MAX_MP_BUSSES) {
printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
" is too large, max. supported is %d\n",
m->mpc_busid, str, MAX_MP_BUSSES - 1);
return;
}
+#endif
if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
Index: linux/arch/i386/kernel/nmi.c
===================================================================
--- linux.orig/arch/i386/kernel/nmi.c
+++ linux/arch/i386/kernel/nmi.c
@@ -21,6 +21,7 @@
#include
#include
#include
+#include
#include
#include
@@ -30,7 +31,7 @@
unsigned int nmi_watchdog = NMI_NONE;
extern int unknown_nmi_panic;
-static unsigned int nmi_hz = HZ;
+static unsigned int nmi_hz = 1000;
static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
static unsigned int nmi_p4_cccr_val;
extern void show_registers(struct pt_regs *regs);
@@ -99,7 +100,6 @@ int nmi_active;
#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
-#ifdef CONFIG_SMP
/* The performance counters used by NMI_LOCAL_APIC don't trigger when
* the CPU is idle. To make sure the NMI watchdog really ticks on all
* CPUs during the test make them busy.
@@ -107,7 +107,12 @@ int nmi_active;
static __init void nmi_cpu_busy(void *data)
{
volatile int *endflag = data;
+ /*
+ * avoid a warning, on PREEMPT_RT this wont run in hardirq context:
+ */
+#ifndef CONFIG_PREEMPT_RT
local_irq_enable_in_hardirq();
+#endif
/* Intentionally don't use cpu_relax here. This is
to make sure that the performance counter really ticks,
even if there is a simulator or similar that catches the
@@ -117,7 +122,6 @@ static __init void nmi_cpu_busy(void *da
while (*endflag == 0)
barrier();
}
-#endif
static int __init check_nmi_watchdog(void)
{
@@ -140,7 +144,7 @@ static int __init check_nmi_watchdog(voi
for_each_possible_cpu(cpu)
prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
local_irq_enable();
- mdelay((10*1000)/nmi_hz); // wait 10 ticks
+ mdelay((100*1000)/nmi_hz); // wait 100 ticks
for_each_possible_cpu(cpu) {
#ifdef CONFIG_SMP
@@ -167,7 +171,7 @@ static int __init check_nmi_watchdog(voi
/* now that we know it works we can reduce NMI frequency to
something more reasonable; makes a difference in some configs */
if (nmi_watchdog == NMI_LOCAL_APIC)
- nmi_hz = 1;
+ nmi_hz = 10000;
kfree(prev_nmi_count);
return 0;
@@ -579,9 +583,34 @@ EXPORT_SYMBOL(touch_nmi_watchdog);
extern void die_nmi(struct pt_regs *, const char *msg);
-void nmi_watchdog_tick (struct pt_regs * regs)
+int nmi_show_regs[NR_CPUS];
+
+void nmi_show_all_regs(void)
{
+ int i;
+
+ if (nmi_watchdog == NMI_NONE)
+ return;
+ if (system_state != SYSTEM_RUNNING) {
+ printk("nmi_show_all_regs(): system state %d, not doing.\n",
+ system_state);
+ return;
+ }
+ printk("nmi_show_all_regs(): start on CPU#%d.\n",
+ raw_smp_processor_id());
+ dump_stack();
+ for_each_online_cpu(i)
+ nmi_show_regs[i] = 1;
+ for_each_online_cpu(i)
+ while (nmi_show_regs[i] == 1)
+ barrier();
+}
+
+static DEFINE_RAW_SPINLOCK(nmi_print_lock);
+
+void notrace nmi_watchdog_tick (struct pt_regs * regs)
+{
/*
* Since current_thread_info()-> is always on the stack, and we
* always switch the stack NMI-atomically, it's safe to use
@@ -590,7 +619,16 @@ void nmi_watchdog_tick (struct pt_regs *
unsigned int sum;
int cpu = smp_processor_id();
- sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
+ sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_irqs(0);
+
+ profile_tick(CPU_PROFILING, regs);
+ if (nmi_show_regs[cpu]) {
+ nmi_show_regs[cpu] = 0;
+ spin_lock(&nmi_print_lock);
+ printk("NMI show regs on CPU#%d:\n", cpu);
+ show_regs(regs);
+ spin_unlock(&nmi_print_lock);
+ }
if (last_irq_sums[cpu] == sum) {
/*
@@ -598,11 +636,26 @@ void nmi_watchdog_tick (struct pt_regs *
* wait a few IRQs (5 seconds) before doing the oops ...
*/
alert_counter[cpu]++;
- if (alert_counter[cpu] == 5*nmi_hz)
- /*
- * die_nmi will return ONLY if NOTIFY_STOP happens..
- */
- die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP");
+ if (alert_counter[cpu] && !(alert_counter[cpu] % (5*nmi_hz))) {
+ int i;
+
+ bust_spinlocks(1);
+ spin_lock(&nmi_print_lock);
+ printk("NMI watchdog detected lockup on CPU#%d (%d/%d)\n",
+ cpu, alert_counter[cpu], 5*nmi_hz);
+ show_regs(regs);
+ spin_unlock(&nmi_print_lock);
+
+ for_each_online_cpu(i)
+ if (i != cpu)
+ nmi_show_regs[i] = 1;
+ for_each_online_cpu(i)
+ while (nmi_show_regs[i] == 1)
+ barrier();
+
+ die_nmi(regs, "NMI Watchdog detected LOCKUP");
+ }
+
} else {
last_irq_sums[cpu] = sum;
alert_counter[cpu] = 0;
Index: linux/arch/i386/kernel/process.c
===================================================================
--- linux.orig/arch/i386/kernel/process.c
+++ linux/arch/i386/kernel/process.c
@@ -103,16 +103,20 @@ void default_idle(void)
if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
current_thread_info()->status &= ~TS_POLLING;
smp_mb__after_clear_bit();
- while (!need_resched()) {
+ while (!need_resched() && !need_resched_delayed()) {
local_irq_disable();
- if (!need_resched())
- safe_halt();
- else
+ if (!need_resched() && !need_resched_delayed()) {
+ if (!hrtimer_stop_sched_tick())
+ safe_halt();
+ else
+ local_irq_enable();
+ hrtimer_restart_sched_tick();
+ } else
local_irq_enable();
}
current_thread_info()->status |= TS_POLLING;
} else {
- while (!need_resched())
+ while (!need_resched() && !need_resched_delayed())
cpu_relax();
}
}
@@ -125,16 +129,18 @@ EXPORT_SYMBOL(default_idle);
* to poll the ->work.need_resched flag instead of waiting for the
* cross-CPU IPI to arrive. Use this option with caution.
*/
-static void poll_idle (void)
+static void poll_idle(void)
{
local_irq_enable();
- asm volatile(
- "2:"
- "testl %0, %1;"
- "rep; nop;"
- "je 2b;"
- : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags));
+ while (!need_resched() && !need_resched_delayed()) {
+ hrtimer_stop_sched_tick();
+ local_irq_enable();
+ while (!need_resched() && !need_resched_delayed() && !rcu_pending(smp_processor_id()) && !local_softirq_pending())
+ rep_nop();
+ hrtimer_restart_sched_tick();
+ local_irq_enable();
+ }
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -177,7 +183,9 @@ void cpu_idle(void)
/* endless idle loop with no priority at all */
while (1) {
- while (!need_resched()) {
+ BUG_ON(irqs_disabled());
+
+ while (!need_resched() && !need_resched_delayed()) {
void (*idle)(void);
if (__get_cpu_var(cpu_idle_state))
@@ -195,9 +203,11 @@ void cpu_idle(void)
__get_cpu_var(irq_stat).idle_timestamp = jiffies;
idle();
}
- preempt_enable_no_resched();
- schedule();
+ local_irq_disable();
+ __preempt_enable_no_resched();
+ __schedule();
preempt_disable();
+ local_irq_enable();
}
}
@@ -240,13 +250,16 @@ static void mwait_idle(void)
{
local_irq_enable();
- while (!need_resched()) {
+ while (!need_resched() && !need_resched_delayed()) {
+ if (hrtimer_stop_sched_tick())
+ break;
__monitor((void *)¤t_thread_info()->flags, 0, 0);
smp_mb();
- if (need_resched())
+ if (need_resched() || need_resched_delayed())
break;
__mwait(0, 0);
}
+ hrtimer_restart_sched_tick();
}
void __devinit select_idle_routine(const struct cpuinfo_x86 *c)
@@ -363,15 +376,23 @@ void exit_thread(void)
if (unlikely(test_thread_flag(TIF_IO_BITMAP))) {
struct task_struct *tsk = current;
struct thread_struct *t = &tsk->thread;
- int cpu = get_cpu();
- struct tss_struct *tss = &per_cpu(init_tss, cpu);
+ void *io_bitmap_ptr = t->io_bitmap_ptr;
+ int cpu;
+ struct tss_struct *tss;
- kfree(t->io_bitmap_ptr);
+ /*
+ * On PREEMPT_RT we must not call kfree() with
+ * preemption disabled, so we first zap the pointer:
+ */
t->io_bitmap_ptr = NULL;
+ kfree(io_bitmap_ptr);
+
clear_thread_flag(TIF_IO_BITMAP);
/*
* Careful, clear this in the TSS too:
*/
+ cpu = get_cpu();
+ tss = &per_cpu(init_tss, cpu);
memset(tss->io_bitmap, 0xff, tss->io_bitmap_max);
t->io_bitmap_max = 0;
tss->io_bitmap_owner = NULL;
Index: linux/arch/i386/kernel/semaphore.c
===================================================================
--- linux.orig/arch/i386/kernel/semaphore.c
+++ linux/arch/i386/kernel/semaphore.c
@@ -12,6 +12,7 @@
*
* rw semaphores implemented November 1999 by Benjamin LaHaise
*/
+#include
#include
/*
@@ -27,15 +28,15 @@
asm(
".section .sched.text\n"
".align 4\n"
-".globl __down_failed\n"
-"__down_failed:\n\t"
+".globl __compat_down_failed\n"
+"__compat_down_failed:\n\t"
#if defined(CONFIG_FRAME_POINTER)
"pushl %ebp\n\t"
"movl %esp,%ebp\n\t"
#endif
"pushl %edx\n\t"
"pushl %ecx\n\t"
- "call __down\n\t"
+ "call __compat_down\n\t"
"popl %ecx\n\t"
"popl %edx\n\t"
#if defined(CONFIG_FRAME_POINTER)
@@ -48,15 +49,15 @@ asm(
asm(
".section .sched.text\n"
".align 4\n"
-".globl __down_failed_interruptible\n"
-"__down_failed_interruptible:\n\t"
+".globl __compat_down_failed_interruptible\n"
+"__compat_down_failed_interruptible:\n\t"
#if defined(CONFIG_FRAME_POINTER)
"pushl %ebp\n\t"
"movl %esp,%ebp\n\t"
#endif
"pushl %edx\n\t"
"pushl %ecx\n\t"
- "call __down_interruptible\n\t"
+ "call __compat_down_interruptible\n\t"
"popl %ecx\n\t"
"popl %edx\n\t"
#if defined(CONFIG_FRAME_POINTER)
@@ -69,15 +70,15 @@ asm(
asm(
".section .sched.text\n"
".align 4\n"
-".globl __down_failed_trylock\n"
-"__down_failed_trylock:\n\t"
+".globl __compat_down_failed_trylock\n"
+"__compat_down_failed_trylock:\n\t"
#if defined(CONFIG_FRAME_POINTER)
"pushl %ebp\n\t"
"movl %esp,%ebp\n\t"
#endif
"pushl %edx\n\t"
"pushl %ecx\n\t"
- "call __down_trylock\n\t"
+ "call __compat_down_trylock\n\t"
"popl %ecx\n\t"
"popl %edx\n\t"
#if defined(CONFIG_FRAME_POINTER)
@@ -90,45 +91,13 @@ asm(
asm(
".section .sched.text\n"
".align 4\n"
-".globl __up_wakeup\n"
-"__up_wakeup:\n\t"
+".globl __compat_up_wakeup\n"
+"__compat_up_wakeup:\n\t"
"pushl %edx\n\t"
"pushl %ecx\n\t"
- "call __up\n\t"
+ "call __compat_up\n\t"
"popl %ecx\n\t"
"popl %edx\n\t"
"ret"
);
-/*
- * rw spinlock fallbacks
- */
-#if defined(CONFIG_SMP)
-asm(
-".section .sched.text\n"
-".align 4\n"
-".globl __write_lock_failed\n"
-"__write_lock_failed:\n\t"
- LOCK_PREFIX "addl $" RW_LOCK_BIAS_STR ",(%eax)\n"
-"1: rep; nop\n\t"
- "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t"
- "jne 1b\n\t"
- LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t"
- "jnz __write_lock_failed\n\t"
- "ret"
-);
-
-asm(
-".section .sched.text\n"
-".align 4\n"
-".globl __read_lock_failed\n"
-"__read_lock_failed:\n\t"
- LOCK_PREFIX "incl (%eax)\n"
-"1: rep; nop\n\t"
- "cmpl $1,(%eax)\n\t"
- "js 1b\n\t"
- LOCK_PREFIX "decl (%eax)\n\t"
- "js __read_lock_failed\n\t"
- "ret"
-);
-#endif
Index: linux/arch/i386/kernel/setup.c
===================================================================
--- linux.orig/arch/i386/kernel/setup.c
+++ linux/arch/i386/kernel/setup.c
@@ -61,7 +61,7 @@
#include
#include
#include
-
+#include
/* Forward Declaration. */
void __init find_max_pfn(void);
@@ -1580,6 +1580,7 @@ void __init setup_arch(char **cmdline_p)
#endif
#endif
tsc_init();
+ vsyscall_init();
}
static __init int add_pcspkr(void)
Index: linux/arch/i386/kernel/signal.c
===================================================================
--- linux.orig/arch/i386/kernel/signal.c
+++ linux/arch/i386/kernel/signal.c
@@ -532,6 +532,13 @@ handle_signal(unsigned long sig, siginfo
}
}
+#ifdef CONFIG_PREEMPT_RT
+ /*
+ * Fully-preemptible kernel does not need interrupts disabled:
+ */
+ local_irq_enable();
+ preempt_check_resched();
+#endif
/*
* If TF is set due to a debugger (PT_DTRACE), clear the TF flag so
* that register information in the sigcontext is correct.
@@ -572,6 +579,13 @@ static void fastcall do_signal(struct pt
struct k_sigaction ka;
sigset_t *oldset;
+#ifdef CONFIG_PREEMPT_RT
+ /*
+ * Fully-preemptible kernel does not need interrupts disabled:
+ */
+ local_irq_enable();
+ preempt_check_resched();
+#endif
/*
* We want the common case to go fast, which
* is why we may in certain cases get here from
Index: linux/arch/i386/kernel/smp.c
===================================================================
--- linux.orig/arch/i386/kernel/smp.c
+++ linux/arch/i386/kernel/smp.c
@@ -255,7 +255,7 @@ void send_IPI_mask_sequence(cpumask_t ma
static cpumask_t flush_cpumask;
static struct mm_struct * flush_mm;
static unsigned long flush_va;
-static DEFINE_SPINLOCK(tlbstate_lock);
+static DEFINE_RAW_SPINLOCK(tlbstate_lock);
#define FLUSH_ALL 0xffffffff
/*
@@ -400,7 +400,7 @@ static void flush_tlb_others(cpumask_t c
while (!cpus_empty(flush_cpumask))
/* nothing. lockup detection does not belong here */
- mb();
+ cpu_relax();
flush_mm = NULL;
flush_va = 0;
@@ -491,10 +491,20 @@ void smp_send_reschedule(int cpu)
}
/*
+ * this function sends a 'reschedule' IPI to all other CPUs.
+ * This is used when RT tasks are starving and other CPUs
+ * might be able to run them:
+ */
+void smp_send_reschedule_allbutself(void)
+{
+ send_IPI_allbutself(RESCHEDULE_VECTOR);
+}
+
+/*
* Structure and data for smp_call_function(). This is designed to minimise
* static memory requirements. It also looks cleaner.
*/
-static DEFINE_SPINLOCK(call_lock);
+static DEFINE_RAW_SPINLOCK(call_lock);
struct call_data_struct {
void (*func) (void *info);
@@ -599,13 +609,14 @@ void smp_send_stop(void)
}
/*
- * Reschedule call back. Nothing to do,
- * all the work is done automatically when
- * we return from the interrupt.
+ * Reschedule call back. Trigger a reschedule pass so that
+ * RT-overload balancing can pass tasks around.
*/
-fastcall void smp_reschedule_interrupt(struct pt_regs *regs)
+fastcall notrace void smp_reschedule_interrupt(struct pt_regs *regs)
{
+ trace_special(regs->eip, 0, 0);
ack_APIC_irq();
+ set_tsk_need_resched(current);
}
fastcall void smp_call_function_interrupt(struct pt_regs *regs)
Index: linux/arch/i386/kernel/time.c
===================================================================
--- linux.orig/arch/i386/kernel/time.c
+++ linux/arch/i386/kernel/time.c
@@ -131,7 +131,7 @@ static int set_rtc_mmss(unsigned long no
int timer_ack;
#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
-unsigned long profile_pc(struct pt_regs *regs)
+unsigned long notrace profile_pc(struct pt_regs *regs)
{
unsigned long pc = instruction_pointer(regs);
@@ -150,15 +150,6 @@ EXPORT_SYMBOL(profile_pc);
*/
irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
- /*
- * Here we are in the timer irq handler. We just have irqs locally
- * disabled but we don't know if the timer_bh is running on the other
- * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
- * the irq version of write_lock because as just said we have irq
- * locally disabled. -arca
- */
- write_seqlock(&xtime_lock);
-
#ifdef CONFIG_X86_IO_APIC
if (timer_ack) {
/*
@@ -177,7 +168,6 @@ irqreturn_t timer_interrupt(int irq, voi
do_timer_interrupt_hook(regs);
-
if (MCA_bus) {
/* The PS/2 uses level-triggered interrupts. You can't
turn them off, nor would you want to (any attempt to
@@ -192,8 +182,6 @@ irqreturn_t timer_interrupt(int irq, voi
outb_p( irq|0x80, 0x61 ); /* reset the IRQ */
}
- write_sequnlock(&xtime_lock);
-
#ifdef CONFIG_X86_LOCAL_APIC
if (using_apic_timer)
smp_send_timer_broadcast_ipi(regs);
@@ -203,7 +191,7 @@ irqreturn_t timer_interrupt(int irq, voi
}
/* not static: needed by APM */
-unsigned long get_cmos_time(void)
+unsigned long read_persistent_clock(void)
{
unsigned long retval;
unsigned long flags;
@@ -219,7 +207,7 @@ unsigned long get_cmos_time(void)
return retval;
}
-EXPORT_SYMBOL(get_cmos_time);
+EXPORT_SYMBOL(read_persistent_clock);
static void sync_cmos_clock(unsigned long dummy);
@@ -270,75 +258,11 @@ void notify_arch_cmos_timer(void)
mod_timer(&sync_cmos_timer, jiffies + 1);
}
-static long clock_cmos_diff, sleep_start;
-
-static int timer_suspend(struct sys_device *dev, pm_message_t state)
-{
- /*
- * Estimate time zone so that set_time can update the clock
- */
- clock_cmos_diff = -get_cmos_time();
- clock_cmos_diff += get_seconds();
- sleep_start = get_cmos_time();
- return 0;
-}
-
-static int timer_resume(struct sys_device *dev)
-{
- unsigned long flags;
- unsigned long sec;
- unsigned long sleep_length;
-
-#ifdef CONFIG_HPET_TIMER
- if (is_hpet_enabled())
- hpet_reenable();
-#endif
- setup_pit_timer();
- sec = get_cmos_time() + clock_cmos_diff;
- sleep_length = (get_cmos_time() - sleep_start) * HZ;
- write_seqlock_irqsave(&xtime_lock, flags);
- xtime.tv_sec = sec;
- xtime.tv_nsec = 0;
- jiffies_64 += sleep_length;
- wall_jiffies += sleep_length;
- write_sequnlock_irqrestore(&xtime_lock, flags);
- touch_softlockup_watchdog();
- return 0;
-}
-
-static struct sysdev_class timer_sysclass = {
- .resume = timer_resume,
- .suspend = timer_suspend,
- set_kset_name("timer"),
-};
-
-
-/* XXX this driverfs stuff should probably go elsewhere later -john */
-static struct sys_device device_timer = {
- .id = 0,
- .cls = &timer_sysclass,
-};
-
-static int time_init_device(void)
-{
- int error = sysdev_class_register(&timer_sysclass);
- if (!error)
- error = sysdev_register(&device_timer);
- return error;
-}
-
-device_initcall(time_init_device);
-
#ifdef CONFIG_HPET_TIMER
extern void (*late_time_init)(void);
/* Duplicate of time_init() below, with hpet_enable part added */
static void __init hpet_time_init(void)
{
- xtime.tv_sec = get_cmos_time();
- xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
- set_normalized_timespec(&wall_to_monotonic,
- -xtime.tv_sec, -xtime.tv_nsec);
-
if ((hpet_enable() >= 0) && hpet_use_timer) {
printk("Using HPET for base-timer\n");
}
@@ -359,10 +283,6 @@ void __init time_init(void)
return;
}
#endif
- xtime.tv_sec = get_cmos_time();
- xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
- set_normalized_timespec(&wall_to_monotonic,
- -xtime.tv_sec, -xtime.tv_nsec);
time_init_hook();
}
Index: linux/arch/i386/kernel/traps.c
===================================================================
--- linux.orig/arch/i386/kernel/traps.c
+++ linux/arch/i386/kernel/traps.c
@@ -226,6 +226,7 @@ static void show_trace_log_lvl(struct ta
break;
printk("%s =======================\n", log_lvl);
}
+ print_traces(task);
}
void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack)
@@ -276,6 +277,12 @@ void dump_stack(void)
EXPORT_SYMBOL(dump_stack);
+#if defined(CONFIG_DEBUG_STACKOVERFLOW) && defined(CONFIG_LATENCY_TRACE)
+extern unsigned long worst_stack_left;
+#else
+# define worst_stack_left -1L
+#endif
+
void show_registers(struct pt_regs *regs)
{
int i;
@@ -302,8 +309,8 @@ void show_registers(struct pt_regs *regs
regs->eax, regs->ebx, regs->ecx, regs->edx);
printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
regs->esi, regs->edi, regs->ebp, esp);
- printk(KERN_EMERG "ds: %04x es: %04x ss: %04x\n",
- regs->xds & 0xffff, regs->xes & 0xffff, ss);
+ printk(KERN_EMERG "ds: %04x es: %04x ss: %04x preempt: %08x\n",
+ regs->xds & 0xffff, regs->xes & 0xffff, ss, preempt_count());
printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
TASK_COMM_LEN, current->comm, current->pid,
current_thread_info(), current, current->thread_info);
@@ -375,11 +382,11 @@ static void handle_BUG(struct pt_regs *r
void die(const char * str, struct pt_regs * regs, long err)
{
static struct {
- spinlock_t lock;
+ raw_spinlock_t lock;
u32 lock_owner;
int lock_owner_depth;
} die = {
- .lock = SPIN_LOCK_UNLOCKED,
+ .lock = RAW_SPIN_LOCK_UNLOCKED(die.lock),
.lock_owner = -1,
.lock_owner_depth = 0
};
@@ -486,6 +493,11 @@ static void __kprobes do_trap(int trapnr
if (!user_mode(regs))
goto kernel_trap;
+#ifdef CONFIG_PREEMPT_RT
+ local_irq_enable();
+ preempt_check_resched();
+#endif
+
trap_signal: {
if (info)
force_sig_info(signr, info, tsk);
@@ -505,6 +517,7 @@ static void __kprobes do_trap(int trapnr
if (ret) goto trap_signal;
return;
}
+ print_traces(tsk);
}
#define DO_ERROR(trapnr, signr, str, name) \
@@ -703,10 +716,11 @@ void die_nmi (struct pt_regs *regs, cons
crash_kexec(regs);
}
+ nmi_exit();
do_exit(SIGSEGV);
}
-static void default_do_nmi(struct pt_regs * regs)
+static void notrace default_do_nmi(struct pt_regs * regs)
{
unsigned char reason = 0;
@@ -715,9 +729,6 @@ static void default_do_nmi(struct pt_reg
reason = get_nmi_reason();
if (!(reason & 0xc0)) {
- if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
- == NOTIFY_STOP)
- return;
#ifdef CONFIG_X86_LOCAL_APIC
/*
* Ok, so this is none of the documented NMI sources,
@@ -725,9 +736,13 @@ static void default_do_nmi(struct pt_reg
*/
if (nmi_watchdog) {
nmi_watchdog_tick(regs);
+// trace_special(6, 1, 0);
return;
}
#endif
+ if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
+ == NOTIFY_STOP)
+ return;
unknown_nmi_error(reason, regs);
return;
}
@@ -744,18 +759,19 @@ static void default_do_nmi(struct pt_reg
reassert_nmi();
}
-static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
+static notrace int dummy_nmi_callback(struct pt_regs * regs, int cpu)
{
return 0;
}
static nmi_callback_t nmi_callback = dummy_nmi_callback;
-fastcall void do_nmi(struct pt_regs * regs, long error_code)
+fastcall notrace void do_nmi(struct pt_regs * regs, long error_code)
{
int cpu;
nmi_enter();
+ nmi_trace((unsigned long)do_nmi, regs->eip, regs->eflags);
cpu = smp_processor_id();
Index: linux/arch/i386/kernel/tsc.c
===================================================================
--- linux.orig/arch/i386/kernel/tsc.c
+++ linux/arch/i386/kernel/tsc.c
@@ -11,6 +11,7 @@
#include
#include
+#include
#include
#include
#include
@@ -333,6 +334,16 @@ static cycle_t read_tsc(void)
return ret;
}
+
+static cycle_t __vsyscall_fn vread_tsc(void)
+{
+ cycle_t ret;
+
+ rdtscll(ret);
+
+ return ret;
+}
+
static struct clocksource clocksource_tsc = {
.name = "tsc",
.rating = 300,
@@ -342,6 +353,7 @@ static struct clocksource clocksource_ts
.shift = 22,
.update_callback = tsc_update_callback,
.is_continuous = 1,
+ .vread = vread_tsc,
};
static int tsc_update_callback(void)
Index: linux/arch/i386/kernel/vm86.c
===================================================================
--- linux.orig/arch/i386/kernel/vm86.c
+++ linux/arch/i386/kernel/vm86.c
@@ -109,6 +109,7 @@ struct pt_regs * fastcall save_v86_state
local_irq_enable();
if (!current->thread.vm86_info) {
+ local_irq_disable();
printk("no vm86_info: BAD\n");
do_exit(SIGSEGV);
}
Index: linux/arch/i386/kernel/vmlinux.lds.S
===================================================================
--- linux.orig/arch/i386/kernel/vmlinux.lds.S
+++ linux/arch/i386/kernel/vmlinux.lds.S
@@ -8,6 +8,8 @@
#include
#include
#include
+#include
+#include
OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
OUTPUT_ARCH(i386)
@@ -71,6 +73,51 @@ SECTIONS
.data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) }
_edata = .; /* End of data section */
+/* VSYSCALL_GTOD data */
+#ifdef CONFIG_GENERIC_TIME_VSYSCALL
+#undef VSYSCALL_ADDR
+#define VSYSCALL_ADDR VSYSCALL_GTOD_START
+#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + SIZEOF(.data.read_mostly) + 4095) & ~(4095))
+#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + SIZEOF(.data.read_mostly) + 4095) & ~(4095))
+
+#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR)
+#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
+
+#define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR)
+#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
+
+ . = VSYSCALL_ADDR;
+ .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) }
+ __vsyscall_0 = VSYSCALL_VIRT_ADDR;
+
+ .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { *(.vsyscall_fn) }
+ .vsyscall_data : AT(VLOAD(.vsyscall_data)) { *(.vsyscall_data) }
+
+ . = ALIGN(32);
+ .vsyscall_gtod_data : AT (VLOAD(.vsyscall_gtod_data)) { *(.vsyscall_gtod_data) }
+ vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
+
+ . = ALIGN(32);
+ .vsyscall_gtod_lock : AT (VLOAD(.vsyscall_gtod_lock)) { *(.vsyscall_gtod_lock) }
+ vsyscall_gtod_lock = VVIRT(.vsyscall_gtod_lock);
+
+ .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) { *(.vsyscall_1) }
+ .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) { *(.vsyscall_2) }
+ .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) { *(.vsyscall_3) }
+
+ . = VSYSCALL_VIRT_ADDR + 4096;
+
+#undef VSYSCALL_ADDR
+#undef VSYSCALL_PHYS_ADDR
+#undef VSYSCALL_VIRT_ADDR
+#undef VLOAD_OFFSET
+#undef VLOAD
+#undef VVIRT_OFFSET
+#undef VVIRT
+
+#endif
+/* END of VSYSCALL_GTOD data*/
+
#ifdef CONFIG_STACK_UNWIND
. = ALIGN(4);
.eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {
Index: linux/arch/i386/kernel/vsyscall-gtod.c
===================================================================
--- /dev/null
+++ linux/arch/i386/kernel/vsyscall-gtod.c
@@ -0,0 +1,179 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+struct vsyscall_gtod_data_t {
+ struct timeval wall_time_tv;
+ struct timezone sys_tz;
+ struct clocksource clock;
+};
+
+struct vsyscall_gtod_data_t vsyscall_gtod_data;
+struct vsyscall_gtod_data_t __vsyscall_gtod_data __section_vsyscall_gtod_data;
+
+seqlock_t vsyscall_gtod_lock = SEQLOCK_UNLOCKED;
+seqlock_t __vsyscall_gtod_lock __section_vsyscall_gtod_lock = SEQLOCK_UNLOCKED;
+
+int errno;
+static inline _syscall2(int,gettimeofday,struct timeval *,tv,struct timezone *,tz);
+
+static int vsyscall_mapped = 0; /* flag variable for remap_vsyscall() */
+extern struct timezone sys_tz;
+
+static inline void do_vgettimeofday(struct timeval* tv)
+{
+ cycle_t now, cycle_delta;
+ s64 nsec_delta;
+
+ if (!__vsyscall_gtod_data.clock.vread) {
+ gettimeofday(tv, NULL);
+ return;
+ }
+
+ /* read the clocksource and calc cycle_delta */
+ now = __vsyscall_gtod_data.clock.vread();
+ cycle_delta = (now - __vsyscall_gtod_data.clock.cycle_last)
+ & __vsyscall_gtod_data.clock.mask;
+
+ /* convert cycles to nsecs */
+ nsec_delta = cycle_delta * __vsyscall_gtod_data.clock.mult;
+ nsec_delta = nsec_delta >> __vsyscall_gtod_data.clock.shift;
+
+ /* add nsec offset to wall_time_tv */
+ *tv = __vsyscall_gtod_data.wall_time_tv;
+ do_div(nsec_delta, NSEC_PER_USEC);
+ while (nsec_delta > NSEC_PER_SEC) {
+ tv->tv_sec += 1;
+ nsec_delta -= NSEC_PER_SEC;
+ }
+ tv->tv_usec += ((unsigned long)nsec_delta)/1000;
+}
+
+static inline void do_get_tz(struct timezone *tz)
+{
+ *tz = __vsyscall_gtod_data.sys_tz;
+}
+
+static int __vsyscall(0) asmlinkage vgettimeofday(struct timeval *tv, struct timezone *tz)
+{
+ unsigned long seq;
+ do {
+ seq = read_seqbegin(&__vsyscall_gtod_lock);
+
+ if (tv)
+ do_vgettimeofday(tv);
+ if (tz)
+ do_get_tz(tz);
+
+ } while (read_seqretry(&__vsyscall_gtod_lock, seq));
+
+ return 0;
+}
+
+static time_t __vsyscall(1) asmlinkage vtime(time_t * t)
+{
+ struct timeval tv;
+ vgettimeofday(&tv,NULL);
+ if (t)
+ *t = tv.tv_sec;
+ return tv.tv_sec;
+}
+
+struct clocksource* curr_clock;
+
+void update_vsyscall(struct timespec *wall_time,
+ struct clocksource* clock)
+{
+ unsigned long flags;
+
+ write_seqlock_irqsave(&vsyscall_gtod_lock, flags);
+
+ /* XXX - hackitty hack hack. this is terrible! */
+ if (curr_clock != clock) {
+ curr_clock = clock;
+ }
+
+ /* save off wall time as timeval */
+ vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time->tv_sec;
+ vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time->tv_nsec/1000;
+
+ /* copy current clocksource */
+ vsyscall_gtod_data.clock = *clock;
+
+ /* save off current timezone */
+ vsyscall_gtod_data.sys_tz = sys_tz;
+
+ write_sequnlock_irqrestore(&vsyscall_gtod_lock, flags);
+
+}
+extern char __vsyscall_0;
+
+static void __init map_vsyscall(void)
+{
+ unsigned long physaddr_page0 = (unsigned long) &__vsyscall_0 - PAGE_OFFSET;
+
+ /* Initially we map the VSYSCALL page w/ PAGE_KERNEL permissions to
+ * keep the alternate_instruction code from bombing out when it
+ * changes the seq_lock memory barriers in vgettimeofday()
+ */
+ __set_fixmap(FIX_VSYSCALL_GTOD_FIRST_PAGE, physaddr_page0, PAGE_KERNEL);
+}
+
+static int __init remap_vsyscall(void)
+{
+ unsigned long physaddr_page0 = (unsigned long) &__vsyscall_0 - PAGE_OFFSET;
+
+ if (!vsyscall_mapped)
+ return 0;
+
+ /* Remap the VSYSCALL page w/ PAGE_KERNEL_VSYSCALL permissions
+ * after the alternate_instruction code has run
+ */
+ clear_fixmap(FIX_VSYSCALL_GTOD_FIRST_PAGE);
+ __set_fixmap(FIX_VSYSCALL_GTOD_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
+
+ return 0;
+}
+
+int __init vsyscall_init(void)
+{
+ printk("VSYSCALL: consistency checks...");
+ if ((unsigned long) &vgettimeofday != VSYSCALL_ADDR(__NR_vgettimeofday)) {
+ printk("vgettimeofday link addr broken\n");
+ printk("VSYSCALL: vsyscall_init failed!\n");
+ return -EFAULT;
+ }
+ if ((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)) {
+ printk("vtime link addr broken\n");
+ printk("VSYSCALL: vsyscall_init failed!\n");
+ return -EFAULT;
+ }
+ if (VSYSCALL_ADDR(0) != __fix_to_virt(FIX_VSYSCALL_GTOD_FIRST_PAGE)) {
+ printk("fixmap first vsyscall 0x%lx should be 0x%x\n",
+ __fix_to_virt(FIX_VSYSCALL_GTOD_FIRST_PAGE),
+ VSYSCALL_ADDR(0));
+ printk("VSYSCALL: vsyscall_init failed!\n");
+ return -EFAULT;
+ }
+
+
+ printk("passed...mapping...");
+ map_vsyscall();
+ printk("done.\n");
+ vsyscall_mapped = 1;
+ printk("VSYSCALL: fixmap virt addr: 0x%lx\n",
+ __fix_to_virt(FIX_VSYSCALL_GTOD_FIRST_PAGE));
+
+ return 0;
+}
+__initcall(remap_vsyscall);
Index: linux/arch/i386/lib/bitops.c
===================================================================
--- linux.orig/arch/i386/lib/bitops.c
+++ linux/arch/i386/lib/bitops.c
@@ -68,3 +68,35 @@ int find_next_zero_bit(const unsigned lo
return (offset + set + res);
}
EXPORT_SYMBOL(find_next_zero_bit);
+
+
+/*
+ * rw spinlock fallbacks
+ */
+#ifdef CONFIG_SMP
+asm(
+".align 4\n"
+".globl __write_lock_failed\n"
+"__write_lock_failed:\n\t"
+ LOCK_PREFIX "addl $" RW_LOCK_BIAS_STR ",(%eax)\n"
+"1: rep; nop\n\t"
+ "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t"
+ "jne 1b\n\t"
+ LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t"
+ "jnz __write_lock_failed\n\t"
+ "ret"
+);
+
+asm(
+".align 4\n"
+".globl __read_lock_failed\n"
+"__read_lock_failed:\n\t"
+ LOCK_PREFIX "incl (%eax)\n"
+"1: rep; nop\n\t"
+ "cmpl $1,(%eax)\n\t"
+ "js 1b\n\t"
+ LOCK_PREFIX "decl (%eax)\n\t"
+ "js __read_lock_failed\n\t"
+ "ret"
+);
+#endif
Index: linux/arch/i386/mach-default/setup.c
===================================================================
--- linux.orig/arch/i386/mach-default/setup.c
+++ linux/arch/i386/mach-default/setup.c
@@ -35,7 +35,7 @@ void __init pre_intr_init_hook(void)
/*
* IRQ2 is cascade interrupt to second interrupt controller
*/
-static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL};
+static struct irqaction irq2 = { no_action, IRQF_NODELAY, CPU_MASK_NONE, "cascade", NULL, NULL};
/**
* intr_init_hook - post gate setup interrupt initialisation
@@ -79,7 +79,7 @@ void __init trap_init_hook(void)
{
}
-static struct irqaction irq0 = { timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL};
+static struct irqaction irq0 = { timer_interrupt, IRQF_DISABLED | IRQF_NODELAY, CPU_MASK_NONE, "timer", NULL, NULL};
/**
* time_init_hook - do any specific initialisations for the system timer.
Index: linux/arch/i386/mach-visws/setup.c
===================================================================
--- linux.orig/arch/i386/mach-visws/setup.c
+++ linux/arch/i386/mach-visws/setup.c
@@ -115,7 +115,7 @@ void __init pre_setup_arch_hook()
static struct irqaction irq0 = {
.handler = timer_interrupt,
- .flags = IRQF_DISABLED,
+ .flags = IRQF_DISABLED | IRQF_NODELAY,
.name = "timer",
};
Index: linux/arch/i386/mach-visws/visws_apic.c
===================================================================
--- linux.orig/arch/i386/mach-visws/visws_apic.c
+++ linux/arch/i386/mach-visws/visws_apic.c
@@ -259,11 +259,13 @@ out_unlock:
static struct irqaction master_action = {
.handler = piix4_master_intr,
.name = "PIIX4-8259",
+ .flags = IRQF_NODELAY,
};
static struct irqaction cascade_action = {
.handler = no_action,
.name = "cascade",
+ .flags = IRQF_NODELAY,
};
Index: linux/arch/i386/mach-voyager/setup.c
===================================================================
--- linux.orig/arch/i386/mach-voyager/setup.c
+++ linux/arch/i386/mach-voyager/setup.c
@@ -18,7 +18,7 @@ void __init pre_intr_init_hook(void)
/*
* IRQ2 is cascade interrupt to second interrupt controller
*/
-static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL};
+static struct irqaction irq2 = { no_action, IRQF_NODELAY, CPU_MASK_NONE, "cascade", NULL, NULL};
void __init intr_init_hook(void)
{
@@ -40,7 +40,7 @@ void __init trap_init_hook(void)
{
}
-static struct irqaction irq0 = { timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL};
+static struct irqaction irq0 = { timer_interrupt, IRQF_DISABLED | IRQF_NODELAY, CPU_MASK_NONE, "timer", NULL, NULL};
void __init time_init_hook(void)
{
Index: linux/arch/i386/mm/fault.c
===================================================================
--- linux.orig/arch/i386/mm/fault.c
+++ linux/arch/i386/mm/fault.c
@@ -73,6 +73,9 @@ void bust_spinlocks(int yes)
int loglevel_save = console_loglevel;
if (yes) {
+ stop_trace();
+ user_trace_stop();
+ zap_rt_locks();
oops_in_progress = 1;
return;
}
@@ -325,8 +328,8 @@ static inline int vmalloc_fault(unsigned
* bit 3 == 1 means use of reserved bit detected
* bit 4 == 1 means fault was an instruction fetch
*/
-fastcall void __kprobes do_page_fault(struct pt_regs *regs,
- unsigned long error_code)
+fastcall notrace void __kprobes do_page_fault(struct pt_regs *regs,
+ unsigned long error_code)
{
struct task_struct *tsk;
struct mm_struct *mm;
@@ -337,6 +340,7 @@ fastcall void __kprobes do_page_fault(st
/* get the address */
address = read_cr2();
+ trace_special(regs->eip, error_code, address);
tsk = current;
Index: linux/arch/i386/mm/highmem.c
===================================================================
--- linux.orig/arch/i386/mm/highmem.c
+++ linux/arch/i386/mm/highmem.c
@@ -18,6 +18,26 @@ void kunmap(struct page *page)
kunmap_high(page);
}
+void kunmap_virt(void *ptr)
+{
+ struct page *page;
+
+ if ((unsigned long)ptr < PKMAP_ADDR(0))
+ return;
+ page = pte_page(pkmap_page_table[PKMAP_NR((unsigned long)ptr)]);
+ kunmap(page);
+}
+
+struct page *kmap_to_page(void *ptr)
+{
+ struct page *page;
+
+ if ((unsigned long)ptr < PKMAP_ADDR(0))
+ return virt_to_page(ptr);
+ page = pte_page(pkmap_page_table[PKMAP_NR((unsigned long)ptr)]);
+ return page;
+}
+
/*
* kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because
* no global lock is needed and because the kmap code must perform a global TLB
@@ -26,7 +46,7 @@ void kunmap(struct page *page)
* However when holding an atomic kmap is is not legal to sleep, so atomic
* kmaps are appropriate for short, tight code paths only.
*/
-void *kmap_atomic(struct page *page, enum km_type type)
+void *__kmap_atomic(struct page *page, enum km_type type)
{
enum fixed_addresses idx;
unsigned long vaddr;
@@ -48,7 +68,7 @@ void *kmap_atomic(struct page *page, enu
return (void*) vaddr;
}
-void kunmap_atomic(void *kvaddr, enum km_type type)
+void __kunmap_atomic(void *kvaddr, enum km_type type)
{
#ifdef CONFIG_DEBUG_HIGHMEM
unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
@@ -78,7 +98,7 @@ void kunmap_atomic(void *kvaddr, enum km
/* This is the same as kmap_atomic() but can map memory that doesn't
* have a struct page associated with it.
*/
-void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
+void *__kmap_atomic_pfn(unsigned long pfn, enum km_type type)
{
enum fixed_addresses idx;
unsigned long vaddr;
@@ -93,7 +113,7 @@ void *kmap_atomic_pfn(unsigned long pfn,
return (void*) vaddr;
}
-struct page *kmap_atomic_to_page(void *ptr)
+struct page *__kmap_atomic_to_page(void *ptr)
{
unsigned long idx, vaddr = (unsigned long)ptr;
pte_t *pte;
@@ -108,6 +128,7 @@ struct page *kmap_atomic_to_page(void *p
EXPORT_SYMBOL(kmap);
EXPORT_SYMBOL(kunmap);
-EXPORT_SYMBOL(kmap_atomic);
-EXPORT_SYMBOL(kunmap_atomic);
-EXPORT_SYMBOL(kmap_atomic_to_page);
+EXPORT_SYMBOL(kunmap_virt);
+EXPORT_SYMBOL(__kmap_atomic);
+EXPORT_SYMBOL(__kunmap_atomic);
+EXPORT_SYMBOL(__kmap_atomic_to_page);
Index: linux/arch/i386/mm/init.c
===================================================================
--- linux.orig/arch/i386/mm/init.c
+++ linux/arch/i386/mm/init.c
@@ -45,7 +45,7 @@
unsigned int __VMALLOC_RESERVE = 128 << 20;
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers);
unsigned long highstart_pfn, highend_pfn;
static int noinline do_test_wp_bit(void);
@@ -194,7 +194,7 @@ static inline int page_kills_ppro(unsign
extern int is_available_memory(efi_memory_desc_t *);
-int page_is_ram(unsigned long pagenr)
+int notrace page_is_ram(unsigned long pagenr)
{
int i;
unsigned long addr, end;
Index: linux/arch/i386/mm/pgtable.c
===================================================================
--- linux.orig/arch/i386/mm/pgtable.c
+++ linux/arch/i386/mm/pgtable.c
@@ -182,7 +182,7 @@ void pmd_ctor(void *pmd, kmem_cache_t *c
* recommendations and having no core impact whatsoever.
* -- wli
*/
-DEFINE_SPINLOCK(pgd_lock);
+DEFINE_RAW_SPINLOCK(pgd_lock);
struct page *pgd_list;
static inline void pgd_list_add(pgd_t *pgd)
Index: linux/arch/i386/oprofile/Kconfig
===================================================================
--- linux.orig/arch/i386/oprofile/Kconfig
+++ linux/arch/i386/oprofile/Kconfig
@@ -15,3 +15,6 @@ config OPROFILE
If unsure, say N.
+config PROFILE_NMI
+ bool
+ default y
Index: linux/arch/i386/pci/Makefile
===================================================================
--- linux.orig/arch/i386/pci/Makefile
+++ linux/arch/i386/pci/Makefile
@@ -4,8 +4,9 @@ obj-$(CONFIG_PCI_BIOS) += pcbios.o
obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o
obj-$(CONFIG_PCI_DIRECT) += direct.o
+obj-$(CONFIG_ACPI) += acpi.o
+
pci-y := fixup.o
-pci-$(CONFIG_ACPI) += acpi.o
pci-y += legacy.o irq.o
pci-$(CONFIG_X86_VISWS) := visws.o fixup.o
Index: linux/arch/i386/pci/direct.c
===================================================================
--- linux.orig/arch/i386/pci/direct.c
+++ linux/arch/i386/pci/direct.c
@@ -220,16 +220,23 @@ static int __init pci_check_type1(void)
unsigned int tmp;
int works = 0;
- local_irq_save(flags);
+ spin_lock_irqsave(&pci_config_lock, flags);
outb(0x01, 0xCFB);
tmp = inl(0xCF8);
outl(0x80000000, 0xCF8);
- if (inl(0xCF8) == 0x80000000 && pci_sanity_check(&pci_direct_conf1)) {
- works = 1;
+
+ if (inl(0xCF8) == 0x80000000) {
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+
+ if (pci_sanity_check(&pci_direct_conf1))
+ works = 1;
+
+ spin_lock_irqsave(&pci_config_lock, flags);
}
outl(tmp, 0xCF8);
- local_irq_restore(flags);
+
+ spin_unlock_irqrestore(&pci_config_lock, flags);
return works;
}
@@ -239,17 +246,19 @@ static int __init pci_check_type2(void)
unsigned long flags;
int works = 0;
- local_irq_save(flags);
+ spin_lock_irqsave(&pci_config_lock, flags);
outb(0x00, 0xCFB);
outb(0x00, 0xCF8);
outb(0x00, 0xCFA);
- if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00 &&
- pci_sanity_check(&pci_direct_conf2)) {
- works = 1;
- }
- local_irq_restore(flags);
+ if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00) {
+ spin_unlock_irqrestore(&pci_config_lock, flags);
+
+ if (pci_sanity_check(&pci_direct_conf2))
+ works = 1;
+ } else
+ spin_unlock_irqrestore(&pci_config_lock, flags);
return works;
}
Index: linux/arch/i386/pci/irq.c
===================================================================
--- linux.orig/arch/i386/pci/irq.c
+++ linux/arch/i386/pci/irq.c
@@ -981,10 +981,6 @@ static void __init pcibios_fixup_irqs(vo
pci_name(bridge), 'A' + pin, irq);
}
if (irq >= 0) {
- if (use_pci_vector() &&
- !platform_legacy_irq(irq))
- irq = IO_APIC_VECTOR(irq);
-
printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
pci_name(dev), 'A' + pin, irq);
dev->irq = irq;
@@ -1169,33 +1165,3 @@ static int pirq_enable_irq(struct pci_de
}
return 0;
}
-
-int pci_vector_resources(int last, int nr_released)
-{
- int count = nr_released;
-
- int next = last;
- int offset = (last % 8);
-
- while (next < FIRST_SYSTEM_VECTOR) {
- next += 8;
-#ifdef CONFIG_X86_64
- if (next == IA32_SYSCALL_VECTOR)
- continue;
-#else
- if (next == SYSCALL_VECTOR)
- continue;
-#endif
- count++;
- if (next >= FIRST_SYSTEM_VECTOR) {
- if (offset%8) {
- next = FIRST_DEVICE_VECTOR + offset;
- offset++;
- continue;
- }
- count--;
- }
- }
-
- return count;
-}
Index: linux/arch/ia64/Kconfig
===================================================================
--- linux.orig/arch/ia64/Kconfig
+++ linux/arch/ia64/Kconfig
@@ -32,6 +32,7 @@ config SWIOTLB
config RWSEM_XCHGADD_ALGORITHM
bool
+ depends on !PREEMPT_RT
default y
config GENERIC_FIND_NEXT_BIT
@@ -42,7 +43,11 @@ config GENERIC_CALIBRATE_DELAY
bool
default y
-config TIME_INTERPOLATION
+config GENERIC_TIME
+ bool
+ default y
+
+config GENERIC_TIME_VSYSCALL
bool
default y
@@ -258,6 +263,69 @@ config SMP
If you don't know what to do here, say N.
+
+config GENERIC_TIME
+ bool
+ default y
+
+config HIGH_RES_TIMERS
+ bool "High-Resolution Timers"
+ help
+
+ POSIX timers are available by default. This option enables
+ high-resolution POSIX timers. With this option the resolution
+ is at least 1 microsecond. High resolution is not free. If
+ enabled this option will add a small overhead each time a
+ timer expires that is not on a 1/HZ tick boundary. If no such
+ timers are used the overhead is nil.
+
+ This option enables two additional POSIX CLOCKS,
+ CLOCK_REALTIME_HR and CLOCK_MONOTONIC_HR. Note that this
+ option does not change the resolution of CLOCK_REALTIME or
+ CLOCK_MONOTONIC which remain at 1/HZ resolution.
+
+config HIGH_RES_RESOLUTION
+ int "High-Resolution-Timer resolution (nanoseconds)"
+ depends on HIGH_RES_TIMERS
+ default 1000
+ help
+
+ This sets the resolution of timers accessed with
+ CLOCK_REALTIME_HR and CLOCK_MONOTONIC_HR. Too
+ fine a resolution (small a number) will usually not
+ be observable due to normal system latencies. For an
+ 800 MHZ processor about 10,000 is the recommended maximum
+ (smallest number). If you don't need that sort of resolution,
+ higher numbers may generate less overhead.
+
+choice
+ prompt "Clock source"
+ depends on HIGH_RES_TIMERS
+ default HIGH_RES_TIMER_ITC
+ help
+ This option allows you to choose the hardware source in charge
+ of generating high precision interruptions on your system.
+ On IA-64 these are:
+
+
+ ITC Interval Time Counter 1/CPU clock
+ HPET High Precision Event Timer ~ (XXX:have to check the spec)
+
+ The ITC timer is available on all the ia64 computers because
+ it is integrated directly into the processor. However it may not
+ give correct results on MP machines with processors running
+ at different clock rates. In this case you may want to use
+ the HPET if available on your machine.
+
+
+config HIGH_RES_TIMER_ITC
+ bool "Interval Time Counter/ITC"
+
+config HIGH_RES_TIMER_HPET
+ bool "High Precision Event Timer/HPET"
+
+endchoice
+
config NR_CPUS
int "Maximum number of CPUs (2-1024)"
range 2 1024
@@ -310,17 +378,15 @@ config FORCE_CPEI_RETARGET
This option it useful to enable this feature on older BIOS's as well.
You can also enable this by using boot command line option force_cpei=1.
-config PREEMPT
- bool "Preemptible Kernel"
- help
- This option reduces the latency of the kernel when reacting to
- real-time or interactive events by allowing a low priority process to
- be preempted even if it is in kernel mode executing a system call.
- This allows applications to run more reliably even when the system is
- under load.
+source "kernel/Kconfig.preempt"
- Say Y here if you are building a kernel for a desktop, embedded
- or real-time system. Say N if you are unsure.
+config RWSEM_GENERIC_SPINLOCK
+ bool
+ depends on PREEMPT_RT
+ default y
+
+config PREEMPT
+ def_bool y if (PREEMPT_RT || PREEMPT_SOFTIRQS || PREEMPT_HARDIRQS || PREEMPT_VOLUNTARY || PREEMPT_DESKTOP)
source "mm/Kconfig"
Index: linux/arch/ia64/configs/bigsur_defconfig
===================================================================
--- linux.orig/arch/ia64/configs/bigsur_defconfig
+++ linux/arch/ia64/configs/bigsur_defconfig
@@ -85,7 +85,7 @@ CONFIG_MMU=y
CONFIG_SWIOTLB=y
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_TIME_INTERPOLATION=y
+CONFIG_GENERIC_TIME=y
CONFIG_EFI=y
CONFIG_GENERIC_IOMAP=y
CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
Index: linux/arch/ia64/configs/gensparse_defconfig
===================================================================
--- linux.orig/arch/ia64/configs/gensparse_defconfig
+++ linux/arch/ia64/configs/gensparse_defconfig
@@ -86,7 +86,7 @@ CONFIG_MMU=y
CONFIG_SWIOTLB=y
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_TIME_INTERPOLATION=y
+CONFIG_GENERIC_TIME=y
CONFIG_EFI=y
CONFIG_GENERIC_IOMAP=y
CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
Index: linux/arch/ia64/configs/sim_defconfig
===================================================================
--- linux.orig/arch/ia64/configs/sim_defconfig
+++ linux/arch/ia64/configs/sim_defconfig
@@ -86,7 +86,7 @@ CONFIG_MMU=y
CONFIG_SWIOTLB=y
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_TIME_INTERPOLATION=y
+CONFIG_GENERIC_TIME=y
CONFIG_EFI=y
CONFIG_GENERIC_IOMAP=y
CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
Index: linux/arch/ia64/configs/sn2_defconfig
===================================================================
--- linux.orig/arch/ia64/configs/sn2_defconfig
+++ linux/arch/ia64/configs/sn2_defconfig
@@ -83,7 +83,7 @@ CONFIG_SWIOTLB=y
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_GENERIC_FIND_NEXT_BIT=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_TIME_INTERPOLATION=y
+CONFIG_GENERIC_TIME=y
CONFIG_DMI=y
CONFIG_EFI=y
CONFIG_GENERIC_IOMAP=y
Index: linux/arch/ia64/configs/tiger_defconfig
===================================================================
--- linux.orig/arch/ia64/configs/tiger_defconfig
+++ linux/arch/ia64/configs/tiger_defconfig
@@ -86,7 +86,7 @@ CONFIG_MMU=y
CONFIG_SWIOTLB=y
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_TIME_INTERPOLATION=y
+CONFIG_GENERIC_TIME=y
CONFIG_EFI=y
CONFIG_GENERIC_IOMAP=y
CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
Index: linux/arch/ia64/configs/zx1_defconfig
===================================================================
--- linux.orig/arch/ia64/configs/zx1_defconfig
+++ linux/arch/ia64/configs/zx1_defconfig
@@ -84,7 +84,7 @@ CONFIG_MMU=y
CONFIG_SWIOTLB=y
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_TIME_INTERPOLATION=y
+CONFIG_GENERIC_TIME=y
CONFIG_EFI=y
CONFIG_GENERIC_IOMAP=y
CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
Index: linux/arch/ia64/defconfig
===================================================================
--- linux.orig/arch/ia64/defconfig
+++ linux/arch/ia64/defconfig
@@ -86,7 +86,7 @@ CONFIG_MMU=y
CONFIG_SWIOTLB=y
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_TIME_INTERPOLATION=y
+CONFIG_GENERIC_TIME=y
CONFIG_EFI=y
CONFIG_GENERIC_IOMAP=y
CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
Index: linux/arch/ia64/kernel/asm-offsets.c
===================================================================
--- linux.orig/arch/ia64/kernel/asm-offsets.c
+++ linux/arch/ia64/kernel/asm-offsets.c
@@ -7,6 +7,7 @@
#define ASM_OFFSETS_C 1
#include
+#include
#include
#include
@@ -254,18 +255,13 @@ void foo(void)
offsetof (struct pal_min_state_area_s, pmsa_xip));
BLANK();
+#ifdef CONFIG_TIME_INTERPOLATION
/* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */
- DEFINE(IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET, offsetof (struct time_interpolator, addr));
- DEFINE(IA64_TIME_INTERPOLATOR_SOURCE_OFFSET, offsetof (struct time_interpolator, source));
- DEFINE(IA64_TIME_INTERPOLATOR_SHIFT_OFFSET, offsetof (struct time_interpolator, shift));
- DEFINE(IA64_TIME_INTERPOLATOR_NSEC_OFFSET, offsetof (struct time_interpolator, nsec_per_cyc));
- DEFINE(IA64_TIME_INTERPOLATOR_OFFSET_OFFSET, offsetof (struct time_interpolator, offset));
- DEFINE(IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET, offsetof (struct time_interpolator, last_cycle));
- DEFINE(IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET, offsetof (struct time_interpolator, last_counter));
- DEFINE(IA64_TIME_INTERPOLATOR_JITTER_OFFSET, offsetof (struct time_interpolator, jitter));
- DEFINE(IA64_TIME_INTERPOLATOR_MASK_OFFSET, offsetof (struct time_interpolator, mask));
- DEFINE(IA64_TIME_SOURCE_CPU, TIME_SOURCE_CPU);
- DEFINE(IA64_TIME_SOURCE_MMIO64, TIME_SOURCE_MMIO64);
- DEFINE(IA64_TIME_SOURCE_MMIO32, TIME_SOURCE_MMIO32);
DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, offsetof (struct timespec, tv_nsec));
+ DEFINE(IA64_CLOCKSOURCE_MASK_OFFSET, offsetof (struct clocksource, mask));
+ DEFINE(IA64_CLOCKSOURCE_MULT_OFFSET, offsetof (struct clocksource, mult));
+ DEFINE(IA64_CLOCKSOURCE_SHIFT_OFFSET, offsetof (struct clocksource, shift));
+ DEFINE(IA64_CLOCKSOURCE_MMIO_PTR_OFFSET, offsetof (struct clocksource, fsys_mmio_ptr));
+ DEFINE(IA64_CLOCKSOURCE_CYCLE_LAST_OFFSET, offsetof (struct clocksource, cycle_last));
+#endif
}
Index: linux/arch/ia64/kernel/cyclone.c
===================================================================
--- linux.orig/arch/ia64/kernel/cyclone.c
+++ linux/arch/ia64/kernel/cyclone.c
@@ -3,6 +3,7 @@
#include
#include
#include
+#include
#include
/* IBM Summit (EXA) Cyclone counter code*/
@@ -18,13 +19,21 @@ void __init cyclone_setup(void)
use_cyclone = 1;
}
+static void __iomem *cyclone_mc_ptr;
-struct time_interpolator cyclone_interpolator = {
- .source = TIME_SOURCE_MMIO64,
- .shift = 16,
- .frequency = CYCLONE_TIMER_FREQ,
- .drift = -100,
- .mask = (1LL << 40) - 1
+static cycle_t read_cyclone(void)
+{
+ return (cycle_t)readq((void __iomem *)cyclone_mc_ptr);
+}
+
+static struct clocksource clocksource_cyclone = {
+ .name = "cyclone",
+ .rating = 300,
+ .read = read_cyclone,
+ .mask = (1LL << 40) - 1,
+ .mult = 0, /*to be caluclated*/
+ .shift = 16,
+ .is_continuous = 1,
};
int __init init_cyclone_clock(void)
@@ -101,8 +110,10 @@ int __init init_cyclone_clock(void)
}
}
/* initialize last tick */
- cyclone_interpolator.addr = cyclone_timer;
- register_time_interpolator(&cyclone_interpolator);
+ clocksource_cyclone.fsys_mmio_ptr = cyclone_mc_ptr = cyclone_timer;
+ clocksource_cyclone.mult = clocksource_hz2mult(CYCLONE_TIMER_FREQ,
+ clocksource_cyclone.shift);
+ clocksource_register(&clocksource_cyclone);
return 0;
}
Index: linux/arch/ia64/kernel/entry.S
===================================================================
--- linux.orig/arch/ia64/kernel/entry.S
+++ linux/arch/ia64/kernel/entry.S
@@ -1101,23 +1101,24 @@ skip_rbs_switch:
st8 [r2]=r8
st8 [r3]=r10
.work_pending:
- tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0?
+ tbit.nz p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0?
+(p6) br.cond.sptk.few .needresched
+ tbit.z p6,p0=r31,TIF_NEED_RESCHED_DELAYED // current_thread_info()->need_resched_delayed==0?
(p6) br.cond.sptk.few .notify
-#ifdef CONFIG_PREEMPT
-(pKStk) dep r21=-1,r0,PREEMPT_ACTIVE_BIT,1
+
+.needresched:
+
+(pKStk) br.cond.sptk.many .fromkernel
;;
-(pKStk) st4 [r20]=r21
ssm psr.i // enable interrupts
-#endif
br.call.spnt.many rp=schedule
-.ret9: cmp.eq p6,p0=r0,r0 // p6 <- 1
- rsm psr.i // disable interrupts
- ;;
-#ifdef CONFIG_PREEMPT
-(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
- ;;
-(pKStk) st4 [r20]=r0 // preempt_count() <- 0
-#endif
+.ret9a: rsm psr.i // disable interrupts
+ ;;
+ br.cond.sptk.many .endpreemptdep
+.fromkernel:
+ br.call.spnt.many rp=preempt_schedule_irq
+.ret9b: rsm psr.i // disable interrupts
+.endpreemptdep:
(pLvSys)br.cond.sptk.few .work_pending_syscall_end
br.cond.sptk.many .work_processed_kernel // re-check
Index: linux/arch/ia64/kernel/fsys.S
===================================================================
--- linux.orig/arch/ia64/kernel/fsys.S
+++ linux/arch/ia64/kernel/fsys.S
@@ -24,6 +24,7 @@
#include "entry.h"
+#ifdef CONFIG_TIME_INTERPOLATION
/*
* See Documentation/ia64/fsys.txt for details on fsyscalls.
*
@@ -145,13 +146,6 @@ ENTRY(fsys_set_tid_address)
FSYS_RETURN
END(fsys_set_tid_address)
-/*
- * Ensure that the time interpolator structure is compatible with the asm code
- */
-#if IA64_TIME_INTERPOLATOR_SOURCE_OFFSET !=0 || IA64_TIME_INTERPOLATOR_SHIFT_OFFSET != 2 \
- || IA64_TIME_INTERPOLATOR_JITTER_OFFSET != 3 || IA64_TIME_INTERPOLATOR_NSEC_OFFSET != 4
-#error fsys_gettimeofday incompatible with changes to struct time_interpolator
-#endif
#define CLOCK_REALTIME 0
#define CLOCK_MONOTONIC 1
#define CLOCK_DIVIDE_BY_1000 0x4000
@@ -177,19 +171,18 @@ ENTRY(fsys_gettimeofday)
// r11 = preserved: saved ar.pfs
// r12 = preserved: memory stack
// r13 = preserved: thread pointer
- // r14 = address of mask / mask
+ // r14 = address of mask / mask value
// r15 = preserved: system call number
// r16 = preserved: current task pointer
// r17 = wall to monotonic use
- // r18 = time_interpolator->offset
- // r19 = address of wall_to_monotonic
- // r20 = pointer to struct time_interpolator / pointer to time_interpolator->address
- // r21 = shift factor
- // r22 = address of time interpolator->last_counter
- // r23 = address of time_interpolator->last_cycle
- // r24 = adress of time_interpolator->offset
- // r25 = last_cycle value
- // r26 = last_counter value
+ // r19 = address of itc_lastcycle
+ // r20 = struct clocksource / address of first element
+ // r21 = shift value
+ // r22 = address of itc_jitter/ wall_to_monotonic
+ // r23 = address of shift
+ // r24 = address mult factor / cycle_last value
+ // r25 = itc_lastcycle value
+ // r26 = address clocksource cycle_last
// r27 = pointer to xtime
// r28 = sequence number at the beginning of critcal section
// r29 = address of seqlock
@@ -199,9 +192,9 @@ ENTRY(fsys_gettimeofday)
// p6,p7 short term use
// p8 = timesource ar.itc
// p9 = timesource mmio64
- // p10 = timesource mmio32
+ // p10 = timesource mmio32 - not used
// p11 = timesource not to be handled by asm code
- // p12 = memory time source ( = p9 | p10)
+ // p12 = memory time source ( = p9 | p10) - not used
// p13 = do cmpxchg with time_interpolator_last_cycle
// p14 = Divide by 1000
// p15 = Add monotonic
@@ -212,61 +205,55 @@ ENTRY(fsys_gettimeofday)
tnat.nz p6,p0 = r31 // branch deferred since it does not fit into bundle structure
mov pr = r30,0xc000 // Set predicates according to function
add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
- movl r20 = time_interpolator
+ movl r20 = fsyscall_clock // load fsyscall clocksource address
;;
- ld8 r20 = [r20] // get pointer to time_interpolator structure
+ add r10 = IA64_CLOCKSOURCE_MMIO_PTR_OFFSET,r20
movl r29 = xtime_lock
ld4 r2 = [r2] // process work pending flags
movl r27 = xtime
;; // only one bundle here
- ld8 r21 = [r20] // first quad with control information
+ add r14 = IA64_CLOCKSOURCE_MASK_OFFSET,r20
+ movl r22 = itc_jitter
+ add r24 = IA64_CLOCKSOURCE_MULT_OFFSET,r20
and r2 = TIF_ALLWORK_MASK,r2
(p6) br.cond.spnt.few .fail_einval // deferred branch
;;
- add r10 = IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET,r20
- extr r3 = r21,32,32 // time_interpolator->nsec_per_cyc
- extr r8 = r21,0,16 // time_interpolator->source
+ ld8 r30 = [r10] // clocksource->mmio_ptr
+ movl r19 = itc_lastcycle
+ add r23 = IA64_CLOCKSOURCE_SHIFT_OFFSET,r20
cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled
(p6) br.cond.spnt.many fsys_fallback_syscall
;;
- cmp.eq p8,p12 = 0,r8 // Check for cpu timer
- cmp.eq p9,p0 = 1,r8 // MMIO64 ?
- extr r2 = r21,24,8 // time_interpolator->jitter
- cmp.eq p10,p0 = 2,r8 // MMIO32 ?
- cmp.ltu p11,p0 = 2,r8 // function or other clock
-(p11) br.cond.spnt.many fsys_fallback_syscall
- ;;
- setf.sig f7 = r3 // Setup for scaling of counter
-(p15) movl r19 = wall_to_monotonic
-(p12) ld8 r30 = [r10]
- cmp.ne p13,p0 = r2,r0 // need jitter compensation?
- extr r21 = r21,16,8 // shift factor
+ ld8 r14 = [r14] // clocksource mask value
+ ld4 r2 = [r22] // itc_jitter value
+ add r26 = IA64_CLOCKSOURCE_CYCLE_LAST_OFFSET,r20 // clock fsyscall_cycle_last
+ ld4 r3 = [r24] // clocksource->mult value
+ cmp.eq p8,p9 = 0,r30 // Check for cpu timer, no mmio_ptr, set p8, clear p9
+ ;;
+ setf.sig f7 = r3 // Setup for mult scaling of counter
+(p15) movl r22 = wall_to_monotonic
+ ld4 r21 = [r23] // shift value
+(p8) cmp.ne p13,p0 = r2,r0 // need jitter compensation, set p13
+(p9) cmp.eq p13,p0 = 0,r30 // if mmio_ptr, clear p13 jitter control
;;
.time_redo:
.pred.rel.mutex p8,p9,p10
ld4.acq r28 = [r29] // xtime_lock.sequence. Must come first for locking purposes
(p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!!
- add r22 = IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET,r20
(p9) ld8 r2 = [r30] // readq(ti->address). Could also have latency issues..
-(p10) ld4 r2 = [r30] // readw(ti->address)
-(p13) add r23 = IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET,r20
+(p13) ld8 r25 = [r19] // get itc_lastcycle value
;; // could be removed by moving the last add upward
- ld8 r26 = [r22] // time_interpolator->last_counter
-(p13) ld8 r25 = [r23] // time interpolator->last_cycle
- add r24 = IA64_TIME_INTERPOLATOR_OFFSET_OFFSET,r20
-(p15) ld8 r17 = [r19],IA64_TIMESPEC_TV_NSEC_OFFSET
ld8 r9 = [r27],IA64_TIMESPEC_TV_NSEC_OFFSET
- add r14 = IA64_TIME_INTERPOLATOR_MASK_OFFSET, r20
+ ld8 r24 = [r26] // get fsyscall_cycle_last value
+(p15) ld8 r17 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET
;;
- ld8 r18 = [r24] // time_interpolator->offset
ld8 r8 = [r27],-IA64_TIMESPEC_TV_NSEC_OFFSET // xtime.tv_nsec
-(p13) sub r3 = r25,r2 // Diff needed before comparison (thanks davidm)
+(p13) sub r3 = r25,r2 // Diff needed before comparison (thanks davidm)
;;
- ld8 r14 = [r14] // time_interpolator->mask
-(p13) cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared
- sub r10 = r2,r26 // current_counter - last_counter
+(p13) cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared
+ sub r10 = r2,r24 // current_counter - last_counter
;;
-(p6) sub r10 = r25,r26 // time we got was less than last_cycle
+(p6) sub r10 = r25,r24 // time we got was less than last_cycle
(p7) mov ar.ccv = r25 // more than last_cycle. Prep for cmpxchg
;;
and r10 = r10,r14 // Apply mask
@@ -274,22 +261,21 @@ ENTRY(fsys_gettimeofday)
setf.sig f8 = r10
nop.i 123
;;
-(p7) cmpxchg8.rel r3 = [r23],r2,ar.ccv
+(p7) cmpxchg8.rel r3 = [r19],r2,ar.ccv
EX(.fail_efault, probe.w.fault r31, 3) // This takes 5 cycles and we have spare time
xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter)
(p15) add r9 = r9,r17 // Add wall to monotonic.secs to result secs
;;
-(p15) ld8 r17 = [r19],-IA64_TIMESPEC_TV_NSEC_OFFSET
+(p15) ld8 r17 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET
(p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful redo
// simulate tbit.nz.or p7,p0 = r28,0
and r28 = ~1,r28 // Make sequence even to force retry if odd
getf.sig r2 = f8
mf
- add r8 = r8,r18 // Add time interpolator offset
;;
ld4 r10 = [r29] // xtime_lock.sequence
(p15) add r8 = r8, r17 // Add monotonic.nsecs to nsecs
- shr.u r2 = r2,r21
+ shr.u r2 = r2,r21 // shift by factor
;; // overloaded 3 bundles!
// End critical section.
add r8 = r8,r2 // Add xtime.nsecs
@@ -348,6 +334,26 @@ ENTRY(fsys_clock_gettime)
br.many .gettime
END(fsys_clock_gettime)
+
+#else // !CONFIG_TIME_INTERPOLATION
+
+# define fsys_gettimeofday 0
+# define fsys_clock_gettime 0
+
+.fail_einval:
+ mov r8 = EINVAL
+ mov r10 = -1
+ FSYS_RETURN
+
+.fail_efault:
+ mov r8 = EFAULT
+ mov r10 = -1
+ FSYS_RETURN
+
+#endif
+
+
+
/*
* long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize).
*/
Index: linux/arch/ia64/kernel/iosapic.c
===================================================================
--- linux.orig/arch/ia64/kernel/iosapic.c
+++ linux/arch/ia64/kernel/iosapic.c
@@ -112,7 +112,7 @@
(PAGE_SIZE / sizeof(struct iosapic_rte_info))
#define RTE_PREALLOCATED (1)
-static DEFINE_SPINLOCK(iosapic_lock);
+static DEFINE_RAW_SPINLOCK(iosapic_lock);
/*
* These tables map IA-64 vectors to the IOSAPIC pin that generates this
@@ -409,6 +409,34 @@ iosapic_startup_level_irq (unsigned int
return 0;
}
+/*
+ * In the preemptible case mask the IRQ first then handle it and ack it.
+ */
+#ifdef CONFIG_PREEMPT_HARDIRQS
+
+static void
+iosapic_ack_level_irq (unsigned int irq)
+{
+ ia64_vector vec = irq_to_vector(irq);
+ struct iosapic_rte_info *rte;
+
+ move_irq(irq);
+ mask_irq(irq);
+ list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list)
+ iosapic_eoi(rte->addr, vec);
+}
+
+static void
+iosapic_end_level_irq (unsigned int irq)
+{
+ if (!(irq_desc[irq].status & IRQ_INPROGRESS))
+ unmask_irq(irq);
+}
+
+#else /* !CONFIG_PREEMPT_HARDIRQS */
+
+#define iosapic_ack_level_irq nop
+
static void
iosapic_end_level_irq (unsigned int irq)
{
@@ -420,10 +448,12 @@ iosapic_end_level_irq (unsigned int irq)
iosapic_eoi(rte->addr, vec);
}
+
+#endif
+
#define iosapic_shutdown_level_irq mask_irq
#define iosapic_enable_level_irq unmask_irq
#define iosapic_disable_level_irq mask_irq
-#define iosapic_ack_level_irq nop
struct hw_interrupt_type irq_type_iosapic_level = {
.typename = "IO-SAPIC-level",
Index: linux/arch/ia64/kernel/irq_ia64.c
===================================================================
--- linux.orig/arch/ia64/kernel/irq_ia64.c
+++ linux/arch/ia64/kernel/irq_ia64.c
@@ -30,6 +30,7 @@
#include
#include
#include
+#include
#include
#include
@@ -105,6 +106,25 @@ reserve_irq_vector (int vector)
return test_and_set_bit(pos, ia64_vector_mask);
}
+/*
+ * Dynamic irq allocate and deallocation for MSI
+ */
+int create_irq(void)
+{
+ int vector = assign_irq_vector(AUTO_ASSIGN);
+
+ if (vector >= 0)
+ dynamic_irq_init(vector);
+
+ return vector;
+}
+
+void destroy_irq(unsigned int irq)
+{
+ dynamic_irq_cleanup(irq);
+ free_irq_vector(irq);
+}
+
#ifdef CONFIG_SMP
# define IS_RESCHEDULE(vec) (vec == IA64_IPI_RESCHEDULE)
#else
Index: linux/arch/ia64/kernel/mca.c
===================================================================
--- linux.orig/arch/ia64/kernel/mca.c
+++ linux/arch/ia64/kernel/mca.c
@@ -152,7 +152,7 @@ ia64_mca_spin(const char *func)
typedef struct ia64_state_log_s
{
- spinlock_t isl_lock;
+ raw_spinlock_t isl_lock;
int isl_index;
unsigned long isl_count;
ia64_err_rec_t *isl_log[IA64_MAX_LOGS]; /* need space to store header + error log */
Index: linux/arch/ia64/kernel/perfmon.c
===================================================================
--- linux.orig/arch/ia64/kernel/perfmon.c
+++ linux/arch/ia64/kernel/perfmon.c
@@ -277,7 +277,7 @@ typedef struct {
*/
typedef struct pfm_context {
- spinlock_t ctx_lock; /* context protection */
+ raw_spinlock_t ctx_lock; /* context protection */
pfm_context_flags_t ctx_flags; /* bitmask of flags (block reason incl.) */
unsigned int ctx_state; /* state: active/inactive (no bitfield) */
@@ -363,7 +363,7 @@ typedef struct pfm_context {
* mostly used to synchronize between system wide and per-process
*/
typedef struct {
- spinlock_t pfs_lock; /* lock the structure */
+ raw_spinlock_t pfs_lock; /* lock the structure */
unsigned int pfs_task_sessions; /* number of per task sessions */
unsigned int pfs_sys_sessions; /* number of per system wide sessions */
@@ -504,7 +504,7 @@ static pfm_intr_handler_desc_t *pfm_alt
static struct proc_dir_entry *perfmon_dir;
static pfm_uuid_t pfm_null_uuid = {0,};
-static spinlock_t pfm_buffer_fmt_lock;
+static raw_spinlock_t pfm_buffer_fmt_lock;
static LIST_HEAD(pfm_buffer_fmt_list);
static pmu_config_t *pmu_conf;
Index: linux/arch/ia64/kernel/process.c
===================================================================
--- linux.orig/arch/ia64/kernel/process.c
+++ linux/arch/ia64/kernel/process.c
@@ -96,6 +96,9 @@ show_stack (struct task_struct *task, un
void
dump_stack (void)
{
+ if (irqs_disabled()) {
+ printk("Uh oh.. entering dump_stack() with irqs disabled.\n");
+ }
show_stack(NULL, NULL);
}
@@ -199,7 +202,7 @@ void
default_idle (void)
{
local_irq_enable();
- while (!need_resched()) {
+ while (!need_resched() && !need_resched_delayed()) {
if (can_do_pal_halt)
safe_halt();
else
@@ -275,7 +278,7 @@ cpu_idle (void)
else
current_thread_info()->status |= TS_POLLING;
- if (!need_resched()) {
+ if (!need_resched() && !need_resched_delayed()) {
void (*idle)(void);
#ifdef CONFIG_SMP
min_xtp();
@@ -297,10 +300,11 @@ cpu_idle (void)
normal_xtp();
#endif
}
- preempt_enable_no_resched();
- schedule();
+ __preempt_enable_no_resched();
+ __schedule();
+
preempt_disable();
- check_pgt_cache();
+
if (cpu_is_offline(cpu))
play_dead();
}
Index: linux/arch/ia64/kernel/sal.c
===================================================================
--- linux.orig/arch/ia64/kernel/sal.c
+++ linux/arch/ia64/kernel/sal.c
@@ -18,7 +18,7 @@
#include
#include
- __cacheline_aligned DEFINE_SPINLOCK(sal_lock);
+ __cacheline_aligned DEFINE_RAW_SPINLOCK(sal_lock);
unsigned long sal_platform_features;
unsigned short sal_revision;
Index: linux/arch/ia64/kernel/salinfo.c
===================================================================
--- linux.orig/arch/ia64/kernel/salinfo.c
+++ linux/arch/ia64/kernel/salinfo.c
@@ -141,7 +141,7 @@ enum salinfo_state {
struct salinfo_data {
cpumask_t cpu_event; /* which cpus have outstanding events */
- struct semaphore mutex;
+ struct compat_semaphore mutex;
u8 *log_buffer;
u64 log_size;
u8 *oemdata; /* decoded oem data */
@@ -157,8 +157,8 @@ struct salinfo_data {
static struct salinfo_data salinfo_data[ARRAY_SIZE(salinfo_log_name)];
-static DEFINE_SPINLOCK(data_lock);
-static DEFINE_SPINLOCK(data_saved_lock);
+static DEFINE_RAW_SPINLOCK(data_lock);
+static DEFINE_RAW_SPINLOCK(data_saved_lock);
/** salinfo_platform_oemdata - optional callback to decode oemdata from an error
* record.
Index: linux/arch/ia64/kernel/semaphore.c
===================================================================
--- linux.orig/arch/ia64/kernel/semaphore.c
+++ linux/arch/ia64/kernel/semaphore.c
@@ -40,12 +40,12 @@
*/
void
-__up (struct semaphore *sem)
+__up (struct compat_semaphore *sem)
{
wake_up(&sem->wait);
}
-void __sched __down (struct semaphore *sem)
+void __sched __down (struct compat_semaphore *sem)
{
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
@@ -82,7 +82,7 @@ void __sched __down (struct semaphore *s
tsk->state = TASK_RUNNING;
}
-int __sched __down_interruptible (struct semaphore * sem)
+int __sched __down_interruptible (struct compat_semaphore * sem)
{
int retval = 0;
struct task_struct *tsk = current;
@@ -142,7 +142,7 @@ int __sched __down_interruptible (struct
* count.
*/
int
-__down_trylock (struct semaphore *sem)
+__down_trylock (struct compat_semaphore *sem)
{
unsigned long flags;
int sleepers;
Index: linux/arch/ia64/kernel/signal.c
===================================================================
--- linux.orig/arch/ia64/kernel/signal.c
+++ linux/arch/ia64/kernel/signal.c
@@ -487,6 +487,14 @@ ia64_do_signal (sigset_t *oldset, struct
long errno = scr->pt.r8;
# define ERR_CODE(c) (IS_IA32_PROCESS(&scr->pt) ? -(c) : (c))
+#ifdef CONFIG_PREEMPT_RT
+ /*
+ * Fully-preemptible kernel does not need interrupts disabled:
+ */
+ local_irq_enable();
+ preempt_check_resched();
+#endif
+
/*
* In the ia64_leave_kernel code path, we want the common case to go fast, which
* is why we may in certain cases get here from kernel mode. Just return without
Index: linux/arch/ia64/kernel/smp.c
===================================================================
--- linux.orig/arch/ia64/kernel/smp.c
+++ linux/arch/ia64/kernel/smp.c
@@ -222,6 +222,22 @@ smp_send_reschedule (int cpu)
platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0);
}
+/*
+ * this function sends a 'reschedule' IPI to all other CPUs.
+ * This is used when RT tasks are starving and other CPUs
+ * might be able to run them:
+ */
+void smp_send_reschedule_allbutself(void)
+{
+ unsigned int cpu;
+
+ for_each_online_cpu(cpu) {
+ if (cpu != smp_processor_id())
+ platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0);
+ }
+}
+
+
void
smp_flush_tlb_all (void)
{
Index: linux/arch/ia64/kernel/smpboot.c
===================================================================
--- linux.orig/arch/ia64/kernel/smpboot.c
+++ linux/arch/ia64/kernel/smpboot.c
@@ -371,6 +371,8 @@ smp_setup_percpu_timer (void)
{
}
+extern void register_itc_clockevent(void);
+
static void __devinit
smp_callin (void)
{
@@ -430,6 +432,7 @@ smp_callin (void)
#ifdef CONFIG_IA32_SUPPORT
ia32_gdt_init();
#endif
+ register_itc_clockevent();
/*
* Allow the master to continue.
Index: linux/arch/ia64/kernel/time.c
===================================================================
--- linux.orig/arch/ia64/kernel/time.c
+++ linux/arch/ia64/kernel/time.c
@@ -20,6 +20,7 @@
#include
#include
#include
+#include
#include
#include
@@ -31,6 +32,10 @@
extern unsigned long wall_jiffies;
+static cycle_t itc_get_cycles(void);
+cycle_t itc_lastcycle __attribute__((aligned(L1_CACHE_BYTES)));
+int itc_jitter __attribute__((aligned(L1_CACHE_BYTES)));
+
volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */
#ifdef CONFIG_IA64_DEBUG_IRQ
@@ -40,11 +45,16 @@ EXPORT_SYMBOL(last_cli_ip);
#endif
-static struct time_interpolator itc_interpolator = {
- .shift = 16,
- .mask = 0xffffffffffffffffLL,
- .source = TIME_SOURCE_CPU
+static struct clocksource clocksource_itc = {
+ .name = "itc",
+ .rating = 350,
+ .read = itc_get_cycles,
+ .mask = 0xffffffffffffffffLL,
+ .mult = 0, /*to be caluclated*/
+ .shift = 16,
+ .is_continuous = 1,
};
+static struct clocksource *clocksource_itc_p;
static irqreturn_t
timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
@@ -57,38 +67,57 @@ timer_interrupt (int irq, void *dev_id,
platform_timer_interrupt(irq, dev_id, regs);
+#if 0
new_itm = local_cpu_data->itm_next;
if (!time_after(ia64_get_itc(), new_itm))
printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n",
ia64_get_itc(), new_itm);
-
profile_tick(CPU_PROFILING, regs);
+#endif
- while (1) {
- update_process_times(user_mode(regs));
+ if (time_after(ia64_get_itc(), local_cpu_data->itm_tick_next)) {
- new_itm += local_cpu_data->itm_delta;
+ unsigned long new_tick_itm;
+ new_tick_itm = local_cpu_data->itm_tick_next;
- if (smp_processor_id() == time_keeper_id) {
- /*
- * Here we are in the timer irq handler. We have irqs locally
- * disabled, but we don't know if the timer_bh is running on
- * another CPU. We need to avoid to SMP race by acquiring the
- * xtime_lock.
- */
- write_seqlock(&xtime_lock);
- do_timer(regs);
- local_cpu_data->itm_next = new_itm;
- write_sequnlock(&xtime_lock);
- } else
- local_cpu_data->itm_next = new_itm;
+ profile_tick(CPU_PROFILING, regs);
+
+ while (1) {
+ update_process_times(user_mode(regs));
+
+ new_tick_itm += local_cpu_data->itm_tick_delta;
+
+ if (smp_processor_id() == time_keeper_id) {
+ /*
+ * Here we are in the timer irq handler. We have irqs locally
+ * disabled, but we don't know if the timer_bh is running on
+ * another CPU. We need to avoid to SMP race by acquiring the
+ * xtime_lock.
+ */
+ write_seqlock(&xtime_lock);
+ do_timer(regs);
+ local_cpu_data->itm_tick_next = new_tick_itm;
+ write_sequnlock(&xtime_lock);
+ } else
+ local_cpu_data->itm_tick_next = new_tick_itm;
+
+ if (time_after(new_tick_itm, ia64_get_itc()))
+ break;
+ }
+ }
- if (time_after(new_itm, ia64_get_itc()))
- break;
+ if (time_after(ia64_get_itc(), local_cpu_data->itm_timer_next)) {
+ if (itc_clockevent.event_handler)
+ itc_clockevent.event_handler(regs);
}
do {
+ // FIXME, really, please
+ new_itm = local_cpu_data->itm_tick_next;
+
+ if (time_after(new_itm, local_cpu_data->itm_timer_next))
+ new_itm = local_cpu_data->itm_timer_next;
/*
* If we're too close to the next clock tick for
* comfort, we increase the safety margin by
@@ -98,8 +127,8 @@ timer_interrupt (int irq, void *dev_id,
* too fast (with the potentially devastating effect
* of losing monotony of time).
*/
- while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2))
- new_itm += local_cpu_data->itm_delta;
+ while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_tick_delta/2))
+ new_itm += local_cpu_data->itm_tick_delta;
ia64_set_itm(new_itm);
/* double check, in case we got hit by a (slow) PMI: */
} while (time_after_eq(ia64_get_itc(), new_itm));
@@ -118,7 +147,7 @@ ia64_cpu_local_tick (void)
/* arrange for the cycle counter to generate a timer interrupt: */
ia64_set_itv(IA64_TIMER_VECTOR);
- delta = local_cpu_data->itm_delta;
+ delta = local_cpu_data->itm_tick_delta;
/*
* Stagger the timer tick for each CPU so they don't occur all at (almost) the
* same time:
@@ -127,8 +156,8 @@ ia64_cpu_local_tick (void)
unsigned long hi = 1UL << ia64_fls(cpu);
shift = (2*(cpu - hi) + 1) * delta/hi/2;
}
- local_cpu_data->itm_next = ia64_get_itc() + delta + shift;
- ia64_set_itm(local_cpu_data->itm_next);
+ local_cpu_data->itm_tick_next = ia64_get_itc() + delta + shift;
+ ia64_set_itm(local_cpu_data->itm_tick_next);
}
static int nojitter;
@@ -186,7 +215,7 @@ ia64_init_itm (void)
itc_freq = (platform_base_freq*itc_ratio.num)/itc_ratio.den;
- local_cpu_data->itm_delta = (itc_freq + HZ/2) / HZ;
+ local_cpu_data->itm_tick_delta = (itc_freq + HZ/2) / HZ;
printk(KERN_DEBUG "CPU %d: base freq=%lu.%03luMHz, ITC ratio=%u/%u, "
"ITC freq=%lu.%03luMHz", smp_processor_id(),
platform_base_freq / 1000000, (platform_base_freq / 1000) % 1000,
@@ -206,9 +235,8 @@ ia64_init_itm (void)
local_cpu_data->nsec_per_cyc = ((NSEC_PER_SEC<itc_freq;
- itc_interpolator.drift = itc_drift;
#ifdef CONFIG_SMP
/* On IA64 in an SMP configuration ITCs are never accurately synchronized.
* Jitter compensation requires a cmpxchg which may limit
@@ -220,18 +248,57 @@ ia64_init_itm (void)
* even going backward) if the ITC offsets between the individual CPUs
* are too large.
*/
- if (!nojitter) itc_interpolator.jitter = 1;
+ if (!nojitter) itc_jitter = 1;
#endif
- register_time_interpolator(&itc_interpolator);
}
+#endif
/* Setup the CPU local timer tick */
ia64_cpu_local_tick();
+
+ if (!clocksource_itc_p) {
+ /* Sort out mult/shift values: */
+ clocksource_itc.mult = clocksource_hz2mult(local_cpu_data->itc_freq,
+ clocksource_itc.shift);
+ clocksource_register(&clocksource_itc);
+ clocksource_itc_p = &clocksource_itc;
+ }
}
+
+static cycle_t itc_get_cycles()
+{
+ if (itc_jitter) {
+ u64 lcycle;
+ u64 now;
+
+ do {
+ lcycle = itc_lastcycle;
+ now = get_cycles();
+ if (lcycle && time_after(lcycle, now))
+ return lcycle;
+
+ /* When holding the xtime write lock, there's no need
+ * to add the overhead of the cmpxchg. Readers are
+ * force to retry until the write lock is released.
+ */
+ if (spin_is_locked(&xtime_lock.lock)) {
+ itc_lastcycle = now;
+ return now;
+ }
+ /* Keep track of the last timer value returned. The use of cmpxchg here
+ * will cause contention in an SMP environment.
+ */
+ } while (unlikely(cmpxchg(&itc_lastcycle, lcycle, now) != lcycle));
+ return now;
+ } else
+ return get_cycles();
+}
+
+
static struct irqaction timer_irqaction = {
.handler = timer_interrupt,
- .flags = IRQF_DISABLED,
+ .flags = IRQF_DISABLED | IRQF_NODELAY,
.name = "timer"
};
@@ -252,6 +319,8 @@ time_init (void)
* tv_nsec field must be normalized (i.e., 0 <= nsec < NSEC_PER_SEC).
*/
set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec);
+ register_itc_clocksource();
+ register_itc_clockevent();
}
/*
@@ -304,3 +373,10 @@ ia64_setup_printk_clock(void)
if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT))
ia64_printk_clock = ia64_itc_printk_clock;
}
+
+struct clocksource fsyscall_clock __attribute__((aligned(L1_CACHE_BYTES)));
+
+void update_vsyscall(struct timespec *wall, struct clocksource *c)
+{
+ fsyscall_clock = *c;
+}
Index: linux/arch/ia64/kernel/traps.c
===================================================================
--- linux.orig/arch/ia64/kernel/traps.c
+++ linux/arch/ia64/kernel/traps.c
@@ -24,7 +24,7 @@
#include
#include
-extern spinlock_t timerlist_lock;
+extern raw_spinlock_t timerlist_lock;
fpswa_interface_t *fpswa_interface;
EXPORT_SYMBOL(fpswa_interface);
@@ -85,11 +85,11 @@ void
die (const char *str, struct pt_regs *regs, long err)
{
static struct {
- spinlock_t lock;
+ raw_spinlock_t lock;
u32 lock_owner;
int lock_owner_depth;
} die = {
- .lock = SPIN_LOCK_UNLOCKED,
+ .lock = RAW_SPIN_LOCK_UNLOCKED,
.lock_owner = -1,
.lock_owner_depth = 0
};
@@ -226,7 +226,7 @@ __kprobes ia64_bad_break (unsigned long
* access to fph by the time we get here, as the IVT's "Disabled FP-Register" handler takes
* care of clearing psr.dfh.
*/
-static inline void
+void
disabled_fph_fault (struct pt_regs *regs)
{
struct ia64_psr *psr = ia64_psr(regs);
@@ -245,7 +245,7 @@ disabled_fph_fault (struct pt_regs *regs
= (struct task_struct *)ia64_get_kr(IA64_KR_FPU_OWNER);
if (ia64_is_local_fpu_owner(current)) {
- preempt_enable_no_resched();
+ __preempt_enable_no_resched();
return;
}
@@ -265,7 +265,7 @@ disabled_fph_fault (struct pt_regs *regs
*/
psr->mfh = 1;
}
- preempt_enable_no_resched();
+ __preempt_enable_no_resched();
}
static inline int
Index: linux/arch/ia64/kernel/unwind.c
===================================================================
--- linux.orig/arch/ia64/kernel/unwind.c
+++ linux/arch/ia64/kernel/unwind.c
@@ -81,7 +81,7 @@ typedef unsigned long unw_word;
typedef unsigned char unw_hash_index_t;
static struct {
- spinlock_t lock; /* spinlock for unwind data */
+ raw_spinlock_t lock; /* spinlock for unwind data */
/* list of unwind tables (one per load-module) */
struct unw_table *tables;
@@ -145,7 +145,7 @@ static struct {
# endif
} unw = {
.tables = &unw.kernel_table,
- .lock = SPIN_LOCK_UNLOCKED,
+ .lock = RAW_SPIN_LOCK_UNLOCKED,
.save_order = {
UNW_REG_RP, UNW_REG_PFS, UNW_REG_PSP, UNW_REG_PR,
UNW_REG_UNAT, UNW_REG_LC, UNW_REG_FPSR, UNW_REG_PRI_UNAT_GR
Index: linux/arch/ia64/kernel/unwind_i.h
===================================================================
--- linux.orig/arch/ia64/kernel/unwind_i.h
+++ linux/arch/ia64/kernel/unwind_i.h
@@ -154,7 +154,7 @@ struct unw_script {
unsigned long ip; /* ip this script is for */
unsigned long pr_mask; /* mask of predicates script depends on */
unsigned long pr_val; /* predicate values this script is for */
- rwlock_t lock;
+ raw_rwlock_t lock;
unsigned int flags; /* see UNW_FLAG_* in unwind.h */
unsigned short lru_chain; /* used for least-recently-used chain */
unsigned short coll_chain; /* used for hash collisions */
Index: linux/arch/ia64/mm/init.c
===================================================================
--- linux.orig/arch/ia64/mm/init.c
+++ linux/arch/ia64/mm/init.c
@@ -36,7 +36,7 @@
#include
#include
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers);
DEFINE_PER_CPU(unsigned long *, __pgtable_quicklist);
DEFINE_PER_CPU(long, __pgtable_quicklist_size);
@@ -92,15 +92,11 @@ check_pgt_cache(void)
if (unlikely(pgtable_quicklist_size <= MIN_PGT_PAGES))
return;
- preempt_disable();
while (unlikely((pages_to_free = min_pages_to_free()) > 0)) {
while (pages_to_free--) {
free_page((unsigned long)pgtable_quicklist_alloc());
}
- preempt_enable();
- preempt_disable();
}
- preempt_enable();
}
void
Index: linux/arch/ia64/mm/tlb.c
===================================================================
--- linux.orig/arch/ia64/mm/tlb.c
+++ linux/arch/ia64/mm/tlb.c
@@ -32,7 +32,7 @@ static struct {
} purge;
struct ia64_ctx ia64_ctx = {
- .lock = SPIN_LOCK_UNLOCKED,
+ .lock = RAW_SPIN_LOCK_UNLOCKED,
.next = 1,
.max_ctx = ~0U
};
Index: linux/arch/ia64/pci/pci.c
===================================================================
--- linux.orig/arch/ia64/pci/pci.c
+++ linux/arch/ia64/pci/pci.c
@@ -809,12 +809,3 @@ pcibios_prep_mwi (struct pci_dev *dev)
}
return rc;
}
-
-int pci_vector_resources(int last, int nr_released)
-{
- int count = nr_released;
-
- count += (IA64_LAST_DEVICE_VECTOR - last);
-
- return count;
-}
Index: linux/arch/ia64/sn/kernel/sn2/timer.c
===================================================================
--- linux.orig/arch/ia64/sn/kernel/sn2/timer.c
+++ linux/arch/ia64/sn/kernel/sn2/timer.c
@@ -11,6 +11,7 @@
#include
#include
#include
+#include
#include
#include
@@ -22,11 +23,21 @@
extern unsigned long sn_rtc_cycles_per_second;
-static struct time_interpolator sn2_interpolator = {
- .drift = -1,
- .shift = 10,
- .mask = (1LL << 55) - 1,
- .source = TIME_SOURCE_MMIO64
+static void __iomem *sn2_mc_ptr;
+
+static cycle_t read_sn2(void)
+{
+ return (cycle_t)readq(sn2_mc_ptr);
+}
+
+static struct clocksource clocksource_sn2 = {
+ .name = "sn2_rtc",
+ .rating = 300,
+ .read = read_sn2,
+ .mask = (1LL << 55) - 1,
+ .mult = 0,
+ .shift = 10,
+ .is_continuous = 1,
};
/*
@@ -47,9 +58,10 @@ ia64_sn_udelay (unsigned long usecs)
void __init sn_timer_init(void)
{
- sn2_interpolator.frequency = sn_rtc_cycles_per_second;
- sn2_interpolator.addr = RTC_COUNTER_ADDR;
- register_time_interpolator(&sn2_interpolator);
+ clocksource_sn2.fsys_mmio_ptr = sn2_mc_ptr = RTC_COUNTER_ADDR;
+ clocksource_sn2.mult = clocksource_hz2mult(sn_rtc_cycles_per_second,
+ clocksource_sn2.shift);
+ clocksource_register(&clocksource_sn2);
ia64_udelay = &ia64_sn_udelay;
}
Index: linux/arch/mips/Kconfig
===================================================================
--- linux.orig/arch/mips/Kconfig
+++ linux/arch/mips/Kconfig
@@ -417,6 +417,7 @@ config MOMENCO_JAGUAR_ATX
config MOMENCO_OCELOT
bool "Momentum Ocelot board"
select DMA_NONCOHERENT
+ select NO_SPINLOCK
select HW_HAS_PCI
select IRQ_CPU
select IRQ_CPU_RM7K
@@ -837,6 +838,7 @@ source "arch/mips/cobalt/Kconfig"
endmenu
+
config RWSEM_GENERIC_SPINLOCK
bool
default y
@@ -844,6 +846,10 @@ config RWSEM_GENERIC_SPINLOCK
config RWSEM_XCHGADD_ALGORITHM
bool
+config ASM_SEMAPHORES
+ bool
+ default y
+
config GENERIC_FIND_NEXT_BIT
bool
default y
@@ -889,6 +895,9 @@ config DMA_NEED_PCI_MAP_STATE
config OWN_DMA
bool
+config NO_SPINLOCK
+ bool
+
config EARLY_PRINTK
bool
@@ -1843,12 +1852,17 @@ config MIPS_INSANE_LARGE
This will result in additional memory usage, so it is not
recommended for normal users.
-endmenu
-
-config RWSEM_GENERIC_SPINLOCK
+config GENERIC_TIME
bool
default y
+source "kernel/time/Kconfig"
+
+config CPU_SPEED
+ int "CPU speed used for clocksource/clockevent calculations"
+ default 600
+endmenu
+
source "init/Kconfig"
menu "Bus options (PCI, PCMCIA, EISA, ISA, TC)"
Index: linux/arch/mips/kernel/Makefile
===================================================================
--- linux.orig/arch/mips/kernel/Makefile
+++ linux/arch/mips/kernel/Makefile
@@ -5,7 +5,7 @@
extra-y := head.o init_task.o vmlinux.lds
obj-y += cpu-probe.o branch.o entry.o genex.o irq.o process.o \
- ptrace.o reset.o semaphore.o setup.o signal.o syscall.o \
+ ptrace.o reset.o setup.o signal.o syscall.o \
time.o traps.o unaligned.o
binfmt_irix-objs := irixelf.o irixinv.o irixioctl.o irixsig.o \
@@ -15,6 +15,8 @@ obj-$(CONFIG_MODULES) += mips_ksyms.o m
obj-$(CONFIG_APM) += apm.o
+obj-$(CONFIG_ASM_SEMAPHORES) += semaphore.o
+
obj-$(CONFIG_CPU_R3000) += r2300_fpu.o r2300_switch.o
obj-$(CONFIG_CPU_TX39XX) += r2300_fpu.o r2300_switch.o
obj-$(CONFIG_CPU_TX49XX) += r4k_fpu.o r4k_switch.o
Index: linux/arch/mips/kernel/asm-offsets.c
===================================================================
--- linux.orig/arch/mips/kernel/asm-offsets.c
+++ linux/arch/mips/kernel/asm-offsets.c
@@ -10,6 +10,9 @@
*/
#include
#include
+#include
+#include
+#include
#include
#include
#include
Index: linux/arch/mips/kernel/entry.S
===================================================================
--- linux.orig/arch/mips/kernel/entry.S
+++ linux/arch/mips/kernel/entry.S
@@ -25,7 +25,7 @@
.endm
#else
.macro preempt_stop
- local_irq_disable
+ raw_local_irq_disable
.endm
#define resume_kernel restore_all
#endif
@@ -40,7 +40,7 @@ FEXPORT(ret_from_irq)
beqz t0, resume_kernel
resume_userspace:
- local_irq_disable # make sure we dont miss an
+ raw_local_irq_disable # make sure we dont miss an
# interrupt setting need_resched
# between sampling and return
LONG_L a2, TI_FLAGS($28) # current->work
@@ -50,7 +50,9 @@ resume_userspace:
#ifdef CONFIG_PREEMPT
resume_kernel:
- local_irq_disable
+ raw_local_irq_disable
+ lw t0, kernel_preemption
+ beqz t0, restore_all
lw t0, TI_PRE_COUNT($28)
bnez t0, restore_all
need_resched:
@@ -60,7 +62,9 @@ need_resched:
LONG_L t0, PT_STATUS(sp) # Interrupts off?
andi t0, 1
beqz t0, restore_all
+ raw_local_irq_disable
jal preempt_schedule_irq
+ sw zero, TI_PRE_COUNT($28)
b need_resched
#endif
@@ -68,7 +72,7 @@ FEXPORT(ret_from_fork)
jal schedule_tail # a0 = struct task_struct *prev
FEXPORT(syscall_exit)
- local_irq_disable # make sure need_resched and
+ raw_local_irq_disable # make sure need_resched and
# signals dont change between
# sampling and return
LONG_L a2, TI_FLAGS($28) # current->work
@@ -133,19 +137,21 @@ FEXPORT(restore_partial) # restore part
.set at
work_pending:
- andi t0, a2, _TIF_NEED_RESCHED # a2 is preloaded with TI_FLAGS
+ # a2 is preloaded with TI_FLAGS
+ andi t0, a2, (_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED)
beqz t0, work_notifysig
work_resched:
+ raw_local_irq_enable t0
jal schedule
- local_irq_disable # make sure need_resched and
+ raw_local_irq_disable # make sure need_resched and
# signals dont change between
# sampling and return
LONG_L a2, TI_FLAGS($28)
andi t0, a2, _TIF_WORK_MASK # is there any work to be done
# other than syscall tracing?
beqz t0, restore_all
- andi t0, a2, _TIF_NEED_RESCHED
+ andi t0, a2, (_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED)
bnez t0, work_resched
work_notifysig: # deal with pending signals and
@@ -161,7 +167,7 @@ syscall_exit_work:
li t0, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT
and t0, a2 # a2 is preloaded with TI_FLAGS
beqz t0, work_pending # trace bit set?
- local_irq_enable # could let do_syscall_trace()
+ raw_local_irq_enable # could let do_syscall_trace()
# call schedule() instead
move a0, sp
li a1, 1
Index: linux/arch/mips/kernel/i8259.c
===================================================================
--- linux.orig/arch/mips/kernel/i8259.c
+++ linux/arch/mips/kernel/i8259.c
@@ -31,7 +31,7 @@ void disable_8259A_irq(unsigned int irq)
* moves to arch independent land
*/
-DEFINE_SPINLOCK(i8259A_lock);
+DEFINE_RAW_SPINLOCK(i8259A_lock);
static void end_8259A_irq (unsigned int irq)
{
Index: linux/arch/mips/kernel/irq.c
===================================================================
--- linux.orig/arch/mips/kernel/irq.c
+++ linux/arch/mips/kernel/irq.c
@@ -137,7 +137,10 @@ void __init init_IRQ(void)
irq_desc[i].action = NULL;
irq_desc[i].depth = 1;
irq_desc[i].chip = &no_irq_chip;
- spin_lock_init(&irq_desc[i].lock);
+ __raw_spin_lock_init(&irq_desc[i].lock);
+#ifdef CONFIG_PREEMPT_HARDIRQS
+ irq_desc[i].thread = NULL;
+#endif
#ifdef CONFIG_MIPS_MT_SMTC
irq_hwmask[i] = 0;
#endif /* CONFIG_MIPS_MT_SMTC */
Index: linux/arch/mips/kernel/module.c
===================================================================
--- linux.orig/arch/mips/kernel/module.c
+++ linux/arch/mips/kernel/module.c
@@ -39,7 +39,7 @@ struct mips_hi16 {
static struct mips_hi16 *mips_hi16_list;
static LIST_HEAD(dbe_list);
-static DEFINE_SPINLOCK(dbe_lock);
+static DEFINE_RAW_SPINLOCK(dbe_lock);
void *module_alloc(unsigned long size)
{
Index: linux/arch/mips/kernel/process.c
===================================================================
--- linux.orig/arch/mips/kernel/process.c
+++ linux/arch/mips/kernel/process.c
@@ -54,16 +54,18 @@ ATTRIB_NORET void cpu_idle(void)
{
/* endless idle loop with no priority at all */
while (1) {
- while (!need_resched()) {
+ while (!need_resched() && !need_resched_delayed()) {
#ifdef CONFIG_MIPS_MT_SMTC
smtc_idle_loop_hook();
#endif /* CONFIG_MIPS_MT_SMTC */
if (cpu_wait)
(*cpu_wait)();
}
- preempt_enable_no_resched();
- schedule();
+ local_irq_disable();
+ __preempt_enable_no_resched();
+ __schedule();
preempt_disable();
+ local_irq_enable();
}
}
Index: linux/arch/mips/kernel/scall32-o32.S
===================================================================
--- linux.orig/arch/mips/kernel/scall32-o32.S
+++ linux/arch/mips/kernel/scall32-o32.S
@@ -84,7 +84,7 @@ stack_done:
1: sw v0, PT_R2(sp) # result
o32_syscall_exit:
- local_irq_disable # make sure need_resched and
+ raw_local_irq_disable # make sure need_resched and
# signals dont change between
# sampling and return
lw a2, TI_FLAGS($28) # current->work
Index: linux/arch/mips/kernel/scall64-64.S
===================================================================
--- linux.orig/arch/mips/kernel/scall64-64.S
+++ linux/arch/mips/kernel/scall64-64.S
@@ -72,7 +72,7 @@ NESTED(handle_sys64, PT_SIZE, sp)
1: sd v0, PT_R2(sp) # result
n64_syscall_exit:
- local_irq_disable # make sure need_resched and
+ raw_local_irq_disable # make sure need_resched and
# signals dont change between
# sampling and return
LONG_L a2, TI_FLAGS($28) # current->work
Index: linux/arch/mips/kernel/scall64-n32.S
===================================================================
--- linux.orig/arch/mips/kernel/scall64-n32.S
+++ linux/arch/mips/kernel/scall64-n32.S
@@ -69,7 +69,7 @@ NESTED(handle_sysn32, PT_SIZE, sp)
sd v0, PT_R0(sp) # set flag for syscall restarting
1: sd v0, PT_R2(sp) # result
- local_irq_disable # make sure need_resched and
+ raw_local_irq_disable # make sure need_resched and
# signals dont change between
# sampling and return
LONG_L a2, TI_FLAGS($28) # current->work
Index: linux/arch/mips/kernel/scall64-o32.S
===================================================================
--- linux.orig/arch/mips/kernel/scall64-o32.S
+++ linux/arch/mips/kernel/scall64-o32.S
@@ -98,7 +98,7 @@ NESTED(handle_sys, PT_SIZE, sp)
1: sd v0, PT_R2(sp) # result
o32_syscall_exit:
- local_irq_disable # make need_resched and
+ raw_local_irq_disable # make need_resched and
# signals dont change between
# sampling and return
LONG_L a2, TI_FLAGS($28)
Index: linux/arch/mips/kernel/semaphore.c
===================================================================
--- linux.orig/arch/mips/kernel/semaphore.c
+++ linux/arch/mips/kernel/semaphore.c
@@ -36,7 +36,7 @@
* sem->count and sem->waking atomic. Scalability isn't an issue because
* this lock is used on UP only so it's just an empty variable.
*/
-static inline int __sem_update_count(struct semaphore *sem, int incr)
+static inline int __sem_update_count(struct compat_semaphore *sem, int incr)
{
int old_count, tmp;
@@ -67,7 +67,7 @@ static inline int __sem_update_count(str
: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
: "r" (incr), "m" (sem->count));
} else {
- static DEFINE_SPINLOCK(semaphore_lock);
+ static DEFINE_RAW_SPINLOCK(semaphore_lock);
unsigned long flags;
spin_lock_irqsave(&semaphore_lock, flags);
@@ -80,7 +80,7 @@ static inline int __sem_update_count(str
return old_count;
}
-void __up(struct semaphore *sem)
+void __compat_up(struct compat_semaphore *sem)
{
/*
* Note that we incremented count in up() before we came here,
@@ -94,7 +94,7 @@ void __up(struct semaphore *sem)
wake_up(&sem->wait);
}
-EXPORT_SYMBOL(__up);
+EXPORT_SYMBOL(__compat_up);
/*
* Note that when we come in to __down or __down_interruptible,
@@ -104,7 +104,7 @@ EXPORT_SYMBOL(__up);
* Thus it is only when we decrement count from some value > 0
* that we have actually got the semaphore.
*/
-void __sched __down(struct semaphore *sem)
+void __sched __compat_down(struct compat_semaphore *sem)
{
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
@@ -133,9 +133,9 @@ void __sched __down(struct semaphore *se
wake_up(&sem->wait);
}
-EXPORT_SYMBOL(__down);
+EXPORT_SYMBOL(__compat_down);
-int __sched __down_interruptible(struct semaphore * sem)
+int __sched __compat_down_interruptible(struct compat_semaphore * sem)
{
int retval = 0;
struct task_struct *tsk = current;
@@ -165,4 +165,10 @@ int __sched __down_interruptible(struct
return retval;
}
-EXPORT_SYMBOL(__down_interruptible);
+EXPORT_SYMBOL(__compat_down_interruptible);
+
+int fastcall compat_sem_is_locked(struct compat_semaphore *sem)
+{
+ return (int) atomic_read(&sem->count) < 0;
+}
+EXPORT_SYMBOL(compat_sem_is_locked);
Index: linux/arch/mips/kernel/signal.c
===================================================================
--- linux.orig/arch/mips/kernel/signal.c
+++ linux/arch/mips/kernel/signal.c
@@ -416,6 +416,10 @@ void do_signal(struct pt_regs *regs)
siginfo_t info;
int signr;
+#ifdef CONFIG_PREEMPT_RT
+ local_irq_enable();
+ preempt_check_resched();
+#endif
/*
* We want the common case to go fast, which is why we may in certain
* cases get here from kernel mode. Just return without doing anything
Index: linux/arch/mips/kernel/signal32.c
===================================================================
--- linux.orig/arch/mips/kernel/signal32.c
+++ linux/arch/mips/kernel/signal32.c
@@ -807,6 +807,10 @@ void do_signal32(struct pt_regs *regs)
siginfo_t info;
int signr;
+#ifdef CONFIG_PREEMPT_RT
+ local_irq_enable();
+ preempt_check_resched();
+#endif
/*
* We want the common case to go fast, which is why we may in certain
* cases get here from kernel mode. Just return without doing anything
Index: linux/arch/mips/kernel/smp.c
===================================================================
--- linux.orig/arch/mips/kernel/smp.c
+++ linux/arch/mips/kernel/smp.c
@@ -115,7 +115,22 @@ asmlinkage void start_secondary(void)
cpu_idle();
}
-DEFINE_SPINLOCK(smp_call_lock);
+DEFINE_RAW_SPINLOCK(smp_call_lock);
+
+/*
+ * this function sends a 'reschedule' IPI to all other CPUs.
+ * This is used when RT tasks are starving and other CPUs
+ * might be able to run them.
+ */
+void smp_send_reschedule_allbutself(void)
+{
+ int cpu = smp_processor_id();
+ int i;
+
+ for (i = 0; i < NR_CPUS; i++)
+ if (cpu_online(i) && i != cpu)
+ core_send_ipi(i, SMP_RESCHEDULE_YOURSELF);
+}
struct call_data_struct *call_data;
@@ -303,6 +318,8 @@ int setup_profiling_timer(unsigned int m
return 0;
}
+static DEFINE_RAW_SPINLOCK(tlbstate_lock);
+
static void flush_tlb_all_ipi(void *info)
{
local_flush_tlb_all();
@@ -360,6 +377,7 @@ static inline void smp_on_each_tlb(void
void flush_tlb_mm(struct mm_struct *mm)
{
preempt_disable();
+ spin_lock(&tlbstate_lock);
if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) {
smp_on_other_tlbs(flush_tlb_mm_ipi, (void *)mm);
@@ -369,6 +387,7 @@ void flush_tlb_mm(struct mm_struct *mm)
if (smp_processor_id() != i)
cpu_context(i, mm) = 0;
}
+ spin_unlock(&tlbstate_lock);
local_flush_tlb_mm(mm);
preempt_enable();
@@ -392,6 +411,8 @@ void flush_tlb_range(struct vm_area_stru
struct mm_struct *mm = vma->vm_mm;
preempt_disable();
+ spin_lock(&tlbstate_lock);
+
if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) {
struct flush_tlb_data fd;
@@ -405,6 +426,7 @@ void flush_tlb_range(struct vm_area_stru
if (smp_processor_id() != i)
cpu_context(i, mm) = 0;
}
+ spin_unlock(&tlbstate_lock);
local_flush_tlb_range(vma, start, end);
preempt_enable();
}
@@ -435,6 +457,8 @@ static void flush_tlb_page_ipi(void *inf
void flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
{
preempt_disable();
+ spin_lock(&tlbstate_lock);
+
if ((atomic_read(&vma->vm_mm->mm_users) != 1) || (current->mm != vma->vm_mm)) {
struct flush_tlb_data fd;
@@ -447,6 +471,7 @@ void flush_tlb_page(struct vm_area_struc
if (smp_processor_id() != i)
cpu_context(i, vma->vm_mm) = 0;
}
+ spin_unlock(&tlbstate_lock);
local_flush_tlb_page(vma, page);
preempt_enable();
}
Index: linux/arch/mips/kernel/time.c
===================================================================
--- linux.orig/arch/mips/kernel/time.c
+++ linux/arch/mips/kernel/time.c
@@ -10,6 +10,11 @@
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
+ *
+ * This implementation of High Res Timers uses two timers. One is the system
+ * timer. The second is used for the high res timers. The high res timers
+ * require the CPU to have count/compare registers. The mips_set_next_event()
+ * function schedules the next high res timer interrupt.
*/
#include
#include
@@ -23,6 +28,7 @@
#include
#include
#include
+#include
#include
#include
@@ -49,8 +55,28 @@
*/
extern volatile unsigned long wall_jiffies;
+/* any missed timer interrupts */
+int missed_timer_count;
+
DEFINE_SPINLOCK(rtc_lock);
+#ifdef CONFIG_HIGH_RES_TIMERS
+static void mips_set_next_event(unsigned long evt);
+static void mips_set_mode(int mode, void *priv);
+
+static struct clock_event lapic_clockevent = {
+ .name = "mips clockevent interface",
+ .capabilities = CLOCK_CAP_NEXTEVT | CLOCK_CAP_PROFILE |
+ CLOCK_HAS_IRQHANDLER
+#ifdef CONFIG_SMP
+ | CLOCK_CAP_UPDATE
+#endif
+ ,
+ .shift = 32,
+ .set_next_event = mips_set_next_event,
+};
+#endif
+
/*
* By default we provide the null RTC ops
*/
@@ -68,6 +94,12 @@ unsigned long (*rtc_mips_get_time)(void)
int (*rtc_mips_set_time)(unsigned long) = null_rtc_set_time;
int (*rtc_mips_set_mmss)(unsigned long);
+u64 read_persistent_clock(void)
+{
+ unsigned long sec;
+ sec = rtc_mips_get_time();
+ return (u64)sec * NSEC_PER_SEC;
+}
/* usecs per counter cycle, shifted to left by 32 bits */
static unsigned int sll32_usecs_per_cycle;
@@ -75,18 +107,30 @@ static unsigned int sll32_usecs_per_cycl
/* how many counter cycles in a jiffy */
static unsigned long cycles_per_jiffy __read_mostly;
+static unsigned long hrt_cycles_per_jiffy __read_mostly;
+
+
/* Cycle counter value at the previous timer interrupt.. */
static unsigned int timerhi, timerlo;
/* expirelo is the count value for next CPU timer interrupt */
static unsigned int expirelo;
-
/*
* Null timer ack for systems not needing one (e.g. i8254).
*/
static void null_timer_ack(void) { /* nothing */ }
+#ifdef CONFIG_HIGH_RES_TIMERS
+/*
+ * Set the next event
+ */
+static void mips_set_next_event(unsigned long evt)
+{
+ write_c0_compare(read_c0_count() + evt);
+}
+#endif
+
/*
* Null high precision timer functions for systems lacking one.
*/
@@ -100,7 +144,6 @@ static void null_hpt_init(unsigned int c
/* nothing */
}
-
/*
* Timer ack for an R4k-compatible timer of a known frequency.
*/
@@ -110,14 +153,15 @@ static void c0_timer_ack(void)
#ifndef CONFIG_SOC_PNX8550 /* pnx8550 resets to zero */
/* Ack this timer interrupt and set the next one. */
- expirelo += cycles_per_jiffy;
+ expirelo += hrt_cycles_per_jiffy;
#endif
write_c0_compare(expirelo);
/* Check to see if we have missed any timer interrupts. */
- while (((count = read_c0_count()) - expirelo) < 0x7fffffff) {
- /* missed_timer_count++; */
- expirelo = count + cycles_per_jiffy;
+ count = read_c0_count();
+ if ((count - expirelo) < 0x7fffffff) {
+ /* missed_timer_count++; */
+ expirelo = count + hrt_cycles_per_jiffy;
write_c0_compare(expirelo);
}
}
@@ -250,11 +294,9 @@ static unsigned long null_gettimeoffset(
return 0;
}
-
/* The function pointer to one of the gettimeoffset funcs. */
unsigned long (*do_gettimeoffset)(void) = null_gettimeoffset;
-
static unsigned long fixed_rate_gettimeoffset(void)
{
u32 count;
@@ -410,6 +452,7 @@ void local_timer_interrupt(int irq, void
{
if (current->pid)
profile_tick(CPU_PROFILING, regs);
+
update_process_times(user_mode(regs));
}
@@ -438,7 +481,7 @@ irqreturn_t timer_interrupt(int irq, voi
/*
* If we have an externally synchronized Linux clock, then update
- * CMOS clock accordingly every ~11 minutes. rtc_mips_set_time() has to be
+ * CMOS clock accordingly every ~11 minutes. rtc_set_time() has to be
* called as close as possible to 500 ms before the new second starts.
*/
if (ntp_synced() &&
@@ -518,6 +561,15 @@ int (*perf_irq)(struct pt_regs *regs) =
EXPORT_SYMBOL(null_perf_irq);
EXPORT_SYMBOL(perf_irq);
+#ifdef CONFIG_HIGH_RES_TIMERS
+void event_timer_handler(struct pt_regs *regs)
+{
+ c0_timer_ack();
+ if (lapic_clockevent.event_handler)
+ lapic_clockevent.event_handler(regs,NULL);
+}
+#endif
+
asmlinkage void ll_timer_interrupt(int irq, struct pt_regs *regs)
{
int r2 = cpu_has_mips_r2;
@@ -531,6 +583,15 @@ asmlinkage void ll_timer_interrupt(int i
* performance counter interrupt was pending, so we have to run the
* performance counter interrupt handler anyway.
*/
+#ifdef CONFIG_HIGH_RES_TIMERS
+ /*
+ * Run the event handler
+ */
+ if (!r2 || (read_c0_cause() & (1 << 26)))
+ if (lapic_clockevent.event_handler)
+ lapic_clockevent.event_handler(regs,NULL);
+#endif
+
if (!r2 || (read_c0_cause() & (1 << 26)))
if (perf_irq(regs))
goto out;
@@ -563,7 +624,7 @@ asmlinkage void ll_local_timer_interrupt
* b) (optional) calibrate and set the mips_hpt_frequency
* (only needed if you intended to use fixed_rate_gettimeoffset
* or use cpu counter as timer interrupt source)
- * 2) setup xtime based on rtc_mips_get_time().
+ * 2) setup xtime based on rtc_get_time().
* 3) choose a appropriate gettimeoffset routine.
* 4) calculate a couple of cached variables for later usage
* 5) plat_timer_setup() -
@@ -578,7 +639,7 @@ unsigned int mips_hpt_frequency;
static struct irqaction timer_irqaction = {
.handler = timer_interrupt,
- .flags = IRQF_DISABLED,
+ .flags = IRQF_NODELAY | IRQF_DISABLED,
.name = "timer",
};
@@ -627,6 +688,9 @@ static unsigned int __init calibrate_hpt
void __init time_init(void)
{
+#ifdef CONFIG_HIGH_RES_TIMERS
+ u64 temp;
+#endif
if (board_time_init)
board_time_init();
@@ -688,6 +752,12 @@ void __init time_init(void)
/* Calculate cache parameters. */
cycles_per_jiffy = (mips_hpt_frequency + HZ / 2) / HZ;
+#ifdef CONFIG_HIGH_RES_TIMERS
+ hrt_cycles_per_jiffy = ( (CONFIG_CPU_SPEED * 1000000) + HZ / 2) / HZ;
+#else
+ hrt_cycles_per_jiffy = cycles_per_jiffy;
+#endif
+
/* sll32_usecs_per_cycle = 10^6 * 2^32 / mips_counter_freq */
do_div64_32(sll32_usecs_per_cycle,
1000000, mips_hpt_frequency / 2,
@@ -776,3 +846,128 @@ unsigned long long sched_clock(void)
{
return (unsigned long long)jiffies*(1000000000/HZ);
}
+
+
+#ifdef CONFIG_SMP
+/*
+ * We have to synchronize the master CPU with all the slave CPUs
+ */
+static atomic_t cpus_started;
+static atomic_t cpus_ready;
+static atomic_t cpus_count;
+/*
+ * Master processor inits
+ */
+static void sync_cpus_init(int v)
+{
+ atomic_set(&cpus_count, 0);
+ mb();
+ atomic_set(&cpus_started, v);
+ mb();
+ atomic_set(&cpus_ready, v);
+ mb();
+}
+
+/*
+ * Called by the master processor
+ */
+static void sync_cpus_master(int v)
+{
+ atomic_set(&cpus_count, 0);
+ mb();
+ atomic_set(&cpus_started, v);
+ mb();
+ /* Wait here till all other CPUs are now ready */
+ while (atomic_read(&cpus_count) != (num_online_cpus() -1) )
+ mb();
+ atomic_set(&cpus_ready, v);
+ mb();
+}
+/*
+ * Called by the slave processors
+ */
+static void sync_cpus_slave(int v)
+{
+ /* Check if the master has been through this */
+ while (atomic_read(&cpus_started) != v)
+ mb();
+ atomic_inc(&cpus_count);
+ mb();
+ while (atomic_read(&cpus_ready) != v)
+ mb();
+}
+/*
+ * Called by the slave CPUs when done syncing the count register
+ * with the master processor
+ */
+static void sync_cpus_slave_exit(int v)
+{
+ while (atomic_read(&cpus_started) != v)
+ mb();
+ atomic_inc(&cpus_count);
+ mb();
+}
+
+#define LOOPS 100
+static u32 c0_count[NR_CPUS]; /* Count register per CPU */
+static u32 c[NR_CPUS][LOOPS + 1]; /* Count register per CPU per loop for syncing */
+
+/*
+ * Slave processors execute this via IPI
+ */
+static void sync_c0_count_slave(void *info)
+{
+ int cpus = 1, loop, prev_count = 0, cpu = smp_processor_id();
+ unsigned long flags;
+ u32 diff_count; /* CPU count registers are 32-bit */
+ local_irq_save(flags);
+
+ for(loop = 0; loop <= LOOPS; loop++) {
+ /* Sync with the Master processor */
+ sync_cpus_slave(cpus++);
+ c[cpu][loop] = c0_count[cpu] = read_c0_count();
+ mb();
+ sync_cpus_slave(cpus++);
+ diff_count = c0_count[0] - c0_count[cpu];
+ diff_count += prev_count;
+ diff_count += read_c0_count();
+ write_c0_count(diff_count);
+ prev_count = (prev_count >> 1) +
+ ((int)(c0_count[0] - c0_count[cpu]) >> 1);
+ }
+
+ /* Slave processor is done syncing count register with Master */
+ sync_cpus_slave_exit(cpus++);
+ printk("SMP: Slave processor %d done syncing count \n", cpu);
+ local_irq_restore(flags);
+}
+
+/*
+ * Master kicks off the syncing process
+ */
+void sync_c0_count_master(void)
+{
+ int cpus = 0, loop, cpu = smp_processor_id();
+ unsigned long flags;
+
+ printk("SMP: Starting to sync the c0 count register ... \n");
+ sync_cpus_init(cpus++);
+
+ /* Kick off the slave processors to also start the syncing process */
+ smp_call_function(sync_c0_count_slave, NULL, 0, 0);
+ local_irq_save(flags);
+
+ for (loop = 0; loop <= LOOPS; loop++) {
+ /* Wait for all the CPUs here */
+ sync_cpus_master(cpus++);
+ c[cpu][loop] = c0_count[cpu] = read_c0_count();
+ mb();
+ /* Do syncing once more */
+ sync_cpus_master(cpus++);
+ }
+ sync_cpus_master(cpus++);
+ local_irq_restore(flags);
+
+ printk("SMP: Syncing process completed accross CPUs ... \n");
+}
+#endif /* CONFIG_SMP */
Index: linux/arch/mips/kernel/traps.c
===================================================================
--- linux.orig/arch/mips/kernel/traps.c
+++ linux/arch/mips/kernel/traps.c
@@ -274,7 +274,7 @@ void show_registers(struct pt_regs *regs
printk("\n");
}
-static DEFINE_SPINLOCK(die_lock);
+static DEFINE_RAW_SPINLOCK(die_lock);
NORET_TYPE void ATTRIB_NORET die(const char * str, struct pt_regs * regs)
{
Index: linux/arch/mips/mm/init.c
===================================================================
--- linux.orig/arch/mips/mm/init.c
+++ linux/arch/mips/mm/init.c
@@ -36,7 +36,7 @@
#include
#include
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers);
unsigned long highstart_pfn, highend_pfn;
Index: linux/arch/mips/sibyte/cfe/smp.c
===================================================================
--- linux.orig/arch/mips/sibyte/cfe/smp.c
+++ linux/arch/mips/sibyte/cfe/smp.c
@@ -107,4 +107,8 @@ void prom_smp_finish(void)
*/
void prom_cpus_done(void)
{
+#ifdef CONFIG_HIGH_RES_TIMERS
+ extern void sync_c0_count_master(void);
+ sync_c0_count_master();
+#endif
}
Index: linux/arch/mips/sibyte/sb1250/irq.c
===================================================================
--- linux.orig/arch/mips/sibyte/sb1250/irq.c
+++ linux/arch/mips/sibyte/sb1250/irq.c
@@ -85,7 +85,7 @@ static struct irq_chip sb1250_irq_type =
/* Store the CPU id (not the logical number) */
int sb1250_irq_owner[SB1250_NR_IRQS];
-DEFINE_SPINLOCK(sb1250_imr_lock);
+DEFINE_RAW_SPINLOCK(sb1250_imr_lock);
void sb1250_mask_irq(int cpu, int irq)
{
@@ -262,7 +262,7 @@ static irqreturn_t sb1250_dummy_handler
static struct irqaction sb1250_dummy_action = {
.handler = sb1250_dummy_handler,
- .flags = 0,
+ .flags = IRQF_NODELAY,
.mask = CPU_MASK_NONE,
.name = "sb1250-private",
.next = NULL,
@@ -372,6 +372,10 @@ void __init arch_init_irq(void)
#ifdef CONFIG_KGDB
imask |= STATUSF_IP6;
#endif
+
+#ifdef CONFIG_HIGH_RES_TIMERS
+ imask |= STATUSF_IP7;
+#endif
/* Enable necessary IPs, disable the rest */
change_c0_status(ST0_IM, imask);
@@ -465,6 +469,9 @@ asmlinkage void plat_irq_dispatch(struct
else
#endif
+ if (pending & CAUSEF_IP7)
+ event_timer_handler(regs);
+
if (pending & CAUSEF_IP4)
sb1250_timer_interrupt(regs);
Index: linux/arch/mips/sibyte/sb1250/smp.c
===================================================================
--- linux.orig/arch/mips/sibyte/sb1250/smp.c
+++ linux/arch/mips/sibyte/sb1250/smp.c
@@ -59,7 +59,7 @@ void sb1250_smp_finish(void)
{
extern void sb1250_time_init(void);
sb1250_time_init();
- local_irq_enable();
+ raw_local_irq_enable();
}
/*
Index: linux/arch/mips/sibyte/swarm/setup.c
===================================================================
--- linux.orig/arch/mips/sibyte/swarm/setup.c
+++ linux/arch/mips/sibyte/swarm/setup.c
@@ -131,6 +131,12 @@ void __init plat_mem_setup(void)
rtc_mips_set_time = m41t81_set_time;
}
+#ifdef CONFIG_HIGH_RES_TIMERS
+ /*
+ * set the mips_hpt_frequency here
+ */
+ mips_hpt_frequency = CONFIG_CPU_SPEED * 1000000;
+#endif
printk("This kernel optimized for "
#ifdef CONFIG_SIMULATION
"simulation"
Index: linux/arch/powerpc/Kconfig
===================================================================
--- linux.orig/arch/powerpc/Kconfig
+++ linux/arch/powerpc/Kconfig
@@ -26,18 +26,15 @@ config MMU
bool
default y
-config GENERIC_HARDIRQS
+config GENERIC_TIME
bool
default y
-config IRQ_PER_CPU
+config GENERIC_HARDIRQS
bool
default y
-config RWSEM_GENERIC_SPINLOCK
- bool
-
-config RWSEM_XCHGADD_ALGORITHM
+config IRQ_PER_CPU
bool
default y
@@ -596,6 +593,18 @@ config HIGHMEM
source kernel/Kconfig.hz
source kernel/Kconfig.preempt
+
+config RWSEM_GENERIC_SPINLOCK
+ bool
+ default y
+
+config ASM_SEMAPHORES
+ bool
+ default y
+
+config RWSEM_XCHGADD_ALGORITHM
+ bool
+
source "fs/Kconfig.binfmt"
# We optimistically allocate largepages from the VM, so make the limit
Index: linux/arch/powerpc/boot/Makefile
===================================================================
--- linux.orig/arch/powerpc/boot/Makefile
+++ linux/arch/powerpc/boot/Makefile
@@ -29,6 +29,14 @@ OBJCOPYFLAGS := contents,alloc,load,r
OBJCOPY_COFF_ARGS := -O aixcoff-rs6000 --set-start 0x500000
OBJCOPY_MIB_ARGS := -O aixcoff-rs6000 -R .stab -R .stabstr -R .comment
+ifdef CONFIG_MCOUNT
+# do not trace the boot loader
+nullstring :=
+space := $(nullstring) # end of the line
+pg_flag = $(nullstring) -pg # end of the line
+CFLAGS := $(subst ${pg_flag},${space},${CFLAGS})
+endif
+
zlib := inffast.c inflate.c inftrees.c
zlibheader := inffast.h inffixed.h inflate.h inftrees.h infutil.h
zliblinuxheader := zlib.h zconf.h zutil.h
@@ -44,7 +52,7 @@ obj-boot := $(addsuffix .o, $(basename $
BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj)
quiet_cmd_copy_zlib = COPY $@
- cmd_copy_zlib = sed "s@__attribute_used__@@;s@]\+\).*@\"\1\"@" $< > $@
+ cmd_copy_zlib = sed "s@__attribute_used__@@;s@.include.@@;s@.include.@@;s@.*spin.*lock.*@@;s@.*SPINLOCK.*@@;s@]\+\).*@\"\1\"@" $< > $@
quiet_cmd_copy_zlibheader = COPY $@
cmd_copy_zlibheader = sed "s@]\+\).*@\"\1\"@" $< > $@
Index: linux/arch/powerpc/kernel/Makefile
===================================================================
--- linux.orig/arch/powerpc/kernel/Makefile
+++ linux/arch/powerpc/kernel/Makefile
@@ -10,10 +10,11 @@ CFLAGS_prom_init.o += -fPIC
CFLAGS_btext.o += -fPIC
endif
-obj-y := semaphore.o cputable.o ptrace.o syscalls.o \
+obj-y := cputable.o ptrace.o syscalls.o \
irq.o align.o signal_32.o pmc.o vdso.o \
init_task.o process.o systbl.o idle.o
obj-y += vdso32/
+obj-$(CONFIG_ASM_SEMAPHORES) += semaphore.o
obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \
signal_64.o ptrace32.o \
paca.o cpu_setup_power4.o \
Index: linux/arch/powerpc/kernel/entry_32.S
===================================================================
--- linux.orig/arch/powerpc/kernel/entry_32.S
+++ linux/arch/powerpc/kernel/entry_32.S
@@ -638,7 +638,7 @@ user_exc_return: /* r10 contains MSR_KE
/* Check current_thread_info()->flags */
rlwinm r9,r1,0,0,(31-THREAD_SHIFT)
lwz r9,TI_FLAGS(r9)
- andi. r0,r9,(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NEED_RESCHED)
+ andi. r0,r9,(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED)
bne do_work
restore_user:
@@ -856,7 +856,7 @@ load_dbcr0:
#endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
do_work: /* r10 contains MSR_KERNEL here */
- andi. r0,r9,_TIF_NEED_RESCHED
+ andi. r0,r9,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED)
beq do_user_signal
do_resched: /* r10 contains MSR_KERNEL here */
@@ -870,7 +870,7 @@ recheck:
MTMSRD(r10) /* disable interrupts */
rlwinm r9,r1,0,0,(31-THREAD_SHIFT)
lwz r9,TI_FLAGS(r9)
- andi. r0,r9,_TIF_NEED_RESCHED
+ andi. r0,r9,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED)
bne- do_resched
andi. r0,r9,_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK
beq restore_user
@@ -978,3 +978,85 @@ machine_check_in_rtas:
/* XXX load up BATs and panic */
#endif /* CONFIG_PPC_RTAS */
+
+#ifdef CONFIG_MCOUNT
+/*
+ * mcount() is not the same as _mcount(). The callers of mcount() have a
+ * normal context. The callers of _mcount() do not have a stack frame and
+ * have not saved the "caller saves" registers.
+ */
+_GLOBAL(mcount)
+ stwu r1,-16(r1)
+ mflr r3
+ lis r5,mcount_enabled@ha
+ lwz r5,mcount_enabled@l(r5)
+ stw r3,20(r1)
+ cmpwi r5,0
+ beq 1f
+ /* r3 contains lr (eip), put parent lr (parent_eip) in r4 */
+ lwz r4,16(r1)
+ lwz r4,4(r4)
+ bl __trace
+1:
+ lwz r0,20(r1)
+ mtlr r0
+ addi r1,r1,16
+ blr
+
+/*
+ * The -pg flag, which is specified in the case of CONFIG_MCOUNT, causes the
+ * C compiler to add a call to _mcount() at the start of each function
+ * preamble, before the stack frame is created. An example of this preamble
+ * code is:
+ *
+ * mflr r0
+ * lis r12,-16354
+ * stw r0,4(r1)
+ * addi r0,r12,-19652
+ * bl 0xc00034c8 <_mcount>
+ * mflr r0
+ * stwu r1,-16(r1)
+ */
+_GLOBAL(_mcount)
+#define M_STK_SIZE 48
+ /* Would not expect to need to save cr, but glibc version of */
+ /* _mcount() does, so cautiously saving it here too. */
+ stwu r1,-M_STK_SIZE(r1)
+ stw r3, 12(r1)
+ stw r4, 16(r1)
+ stw r5, 20(r1)
+ stw r6, 24(r1)
+ mflr r3 /* will use as first arg to __trace() */
+ mfcr r4
+ lis r5,mcount_enabled@ha
+ lwz r5,mcount_enabled@l(r5)
+ cmpwi r5,0
+ stw r3, 44(r1) /* lr */
+ stw r4, 8(r1) /* cr */
+ stw r7, 28(r1)
+ stw r8, 32(r1)
+ stw r9, 36(r1)
+ stw r10,40(r1)
+ beq 1f
+ /* r3 contains lr (eip), put parent lr (parent_eip) in r4 */
+ lwz r4,M_STK_SIZE+4(r1)
+ bl __trace
+1:
+ lwz r8, 8(r1) /* cr */
+ lwz r9, 44(r1) /* lr */
+ lwz r3, 12(r1)
+ lwz r4, 16(r1)
+ lwz r5, 20(r1)
+ mtcrf 0xff,r8
+ mtctr r9
+ lwz r0, 52(r1)
+ lwz r6, 24(r1)
+ lwz r7, 28(r1)
+ lwz r8, 32(r1)
+ lwz r9, 36(r1)
+ lwz r10,40(r1)
+ addi r1,r1,M_STK_SIZE
+ mtlr r0
+ bctr
+
+#endif /* CONFIG_MCOUNT */
Index: linux/arch/powerpc/kernel/irq.c
===================================================================
--- linux.orig/arch/powerpc/kernel/irq.c
+++ linux/arch/powerpc/kernel/irq.c
@@ -91,8 +91,6 @@ extern atomic_t ipi_sent;
#endif
#ifdef CONFIG_PPC64
-EXPORT_SYMBOL(irq_desc);
-
int distribute_irqs = 1;
#endif /* CONFIG_PPC64 */
Index: linux/arch/powerpc/kernel/ppc_ksyms.c
===================================================================
--- linux.orig/arch/powerpc/kernel/ppc_ksyms.c
+++ linux/arch/powerpc/kernel/ppc_ksyms.c
@@ -16,7 +16,6 @@
#include
#include
-#include
#include
#include
#include
@@ -189,7 +188,6 @@ EXPORT_SYMBOL(screen_info);
#ifdef CONFIG_PPC32
EXPORT_SYMBOL(timer_interrupt);
-EXPORT_SYMBOL(irq_desc);
EXPORT_SYMBOL(tb_ticks_per_jiffy);
EXPORT_SYMBOL(console_drivers);
EXPORT_SYMBOL(cacheable_memcpy);
Index: linux/arch/powerpc/kernel/semaphore.c
===================================================================
--- linux.orig/arch/powerpc/kernel/semaphore.c
+++ linux/arch/powerpc/kernel/semaphore.c
@@ -31,7 +31,7 @@
* sem->count = tmp;
* return old_count;
*/
-static inline int __sem_update_count(struct semaphore *sem, int incr)
+static inline int __sem_update_count(struct compat_semaphore *sem, int incr)
{
int old_count, tmp;
@@ -50,7 +50,7 @@ static inline int __sem_update_count(str
return old_count;
}
-void __up(struct semaphore *sem)
+void __compat_up(struct compat_semaphore *sem)
{
/*
* Note that we incremented count in up() before we came here,
@@ -63,7 +63,7 @@ void __up(struct semaphore *sem)
__sem_update_count(sem, 1);
wake_up(&sem->wait);
}
-EXPORT_SYMBOL(__up);
+EXPORT_SYMBOL(__compat_up);
/*
* Note that when we come in to __down or __down_interruptible,
@@ -73,7 +73,7 @@ EXPORT_SYMBOL(__up);
* Thus it is only when we decrement count from some value > 0
* that we have actually got the semaphore.
*/
-void __sched __down(struct semaphore *sem)
+void __sched __compat_down(struct compat_semaphore *sem)
{
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
@@ -101,9 +101,9 @@ void __sched __down(struct semaphore *se
*/
wake_up(&sem->wait);
}
-EXPORT_SYMBOL(__down);
+EXPORT_SYMBOL(__compat_down);
-int __sched __down_interruptible(struct semaphore * sem)
+int __sched __compat_down_interruptible(struct compat_semaphore *sem)
{
int retval = 0;
struct task_struct *tsk = current;
@@ -132,4 +132,10 @@ int __sched __down_interruptible(struct
wake_up(&sem->wait);
return retval;
}
-EXPORT_SYMBOL(__down_interruptible);
+EXPORT_SYMBOL(__compat_down_interruptible);
+
+int compat_sem_is_locked(struct compat_semaphore *sem)
+{
+ return (int) atomic_read(&sem->count) < 0;
+}
+EXPORT_SYMBOL(compat_sem_is_locked);
Index: linux/arch/powerpc/kernel/smp.c
===================================================================
--- linux.orig/arch/powerpc/kernel/smp.c
+++ linux/arch/powerpc/kernel/smp.c
@@ -148,6 +148,16 @@ void smp_send_reschedule(int cpu)
smp_ops->message_pass(cpu, PPC_MSG_RESCHEDULE);
}
+/*
+ * this function sends a 'reschedule' IPI to all other CPUs.
+ * This is used when RT tasks are starving and other CPUs
+ * might be able to run them:
+ */
+void smp_send_reschedule_allbutself(void)
+{
+ smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_RESCHEDULE);
+}
+
#ifdef CONFIG_DEBUGGER
void smp_send_debugger_break(int cpu)
{
@@ -184,7 +194,7 @@ void smp_send_stop(void)
* static memory requirements. It also looks cleaner.
* Stolen from the i386 version.
*/
-static __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_lock);
+static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(call_lock);
static struct call_data_struct {
void (*func) (void *info);
Index: linux/arch/powerpc/kernel/time.c
===================================================================
--- linux.orig/arch/powerpc/kernel/time.c
+++ linux/arch/powerpc/kernel/time.c
@@ -73,6 +73,9 @@
#endif
#include
+unsigned long cpu_khz; /* Detected as we calibrate the TSC */
+EXPORT_SYMBOL(cpu_khz);
+
/* keep track of when we need to update the rtc */
time_t last_rtc_update;
#ifdef CONFIG_PPC_ISERIES
@@ -115,8 +118,6 @@ EXPORT_SYMBOL_GPL(rtc_lock);
u64 tb_to_ns_scale;
unsigned tb_to_ns_shift;
-struct gettimeofday_struct do_gtod;
-
extern unsigned long wall_jiffies;
extern struct timezone sys_tz;
@@ -407,162 +408,8 @@ static __inline__ void timer_check_rtc(v
}
}
-/*
- * This version of gettimeofday has microsecond resolution.
- */
-static inline void __do_gettimeofday(struct timeval *tv)
-{
- unsigned long sec, usec;
- u64 tb_ticks, xsec;
- struct gettimeofday_vars *temp_varp;
- u64 temp_tb_to_xs, temp_stamp_xsec;
-
- /*
- * These calculations are faster (gets rid of divides)
- * if done in units of 1/2^20 rather than microseconds.
- * The conversion to microseconds at the end is done
- * without a divide (and in fact, without a multiply)
- */
- temp_varp = do_gtod.varp;
-
- /* Sampling the time base must be done after loading
- * do_gtod.varp in order to avoid racing with update_gtod.
- */
- data_barrier(temp_varp);
- tb_ticks = get_tb() - temp_varp->tb_orig_stamp;
- temp_tb_to_xs = temp_varp->tb_to_xs;
- temp_stamp_xsec = temp_varp->stamp_xsec;
- xsec = temp_stamp_xsec + mulhdu(tb_ticks, temp_tb_to_xs);
- sec = xsec / XSEC_PER_SEC;
- usec = (unsigned long)xsec & (XSEC_PER_SEC - 1);
- usec = SCALE_XSEC(usec, 1000000);
-
- tv->tv_sec = sec;
- tv->tv_usec = usec;
-}
-
-void do_gettimeofday(struct timeval *tv)
-{
- if (__USE_RTC()) {
- /* do this the old way */
- unsigned long flags, seq;
- unsigned int sec, nsec, usec;
-
- do {
- seq = read_seqbegin_irqsave(&xtime_lock, flags);
- sec = xtime.tv_sec;
- nsec = xtime.tv_nsec + tb_ticks_since(tb_last_jiffy);
- } while (read_seqretry_irqrestore(&xtime_lock, seq, flags));
- usec = nsec / 1000;
- while (usec >= 1000000) {
- usec -= 1000000;
- ++sec;
- }
- tv->tv_sec = sec;
- tv->tv_usec = usec;
- return;
- }
- __do_gettimeofday(tv);
-}
-
-EXPORT_SYMBOL(do_gettimeofday);
-
-/*
- * There are two copies of tb_to_xs and stamp_xsec so that no
- * lock is needed to access and use these values in
- * do_gettimeofday. We alternate the copies and as long as a
- * reasonable time elapses between changes, there will never
- * be inconsistent values. ntpd has a minimum of one minute
- * between updates.
- */
-static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec,
- u64 new_tb_to_xs)
-{
- unsigned temp_idx;
- struct gettimeofday_vars *temp_varp;
-
- temp_idx = (do_gtod.var_idx == 0);
- temp_varp = &do_gtod.vars[temp_idx];
-
- temp_varp->tb_to_xs = new_tb_to_xs;
- temp_varp->tb_orig_stamp = new_tb_stamp;
- temp_varp->stamp_xsec = new_stamp_xsec;
- smp_mb();
- do_gtod.varp = temp_varp;
- do_gtod.var_idx = temp_idx;
-
- /*
- * tb_update_count is used to allow the userspace gettimeofday code
- * to assure itself that it sees a consistent view of the tb_to_xs and
- * stamp_xsec variables. It reads the tb_update_count, then reads
- * tb_to_xs and stamp_xsec and then reads tb_update_count again. If
- * the two values of tb_update_count match and are even then the
- * tb_to_xs and stamp_xsec values are consistent. If not, then it
- * loops back and reads them again until this criteria is met.
- * We expect the caller to have done the first increment of
- * vdso_data->tb_update_count already.
- */
- vdso_data->tb_orig_stamp = new_tb_stamp;
- vdso_data->stamp_xsec = new_stamp_xsec;
- vdso_data->tb_to_xs = new_tb_to_xs;
- vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec;
- vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec;
- smp_wmb();
- ++(vdso_data->tb_update_count);
-}
-
-/*
- * When the timebase - tb_orig_stamp gets too big, we do a manipulation
- * between tb_orig_stamp and stamp_xsec. The goal here is to keep the
- * difference tb - tb_orig_stamp small enough to always fit inside a
- * 32 bits number. This is a requirement of our fast 32 bits userland
- * implementation in the vdso. If we "miss" a call to this function
- * (interrupt latency, CPU locked in a spinlock, ...) and we end up
- * with a too big difference, then the vdso will fallback to calling
- * the syscall
- */
-static __inline__ void timer_recalc_offset(u64 cur_tb)
-{
- unsigned long offset;
- u64 new_stamp_xsec;
- u64 tlen, t2x;
- u64 tb, xsec_old, xsec_new;
- struct gettimeofday_vars *varp;
-
- if (__USE_RTC())
- return;
- tlen = current_tick_length();
- offset = cur_tb - do_gtod.varp->tb_orig_stamp;
- if (tlen == last_tick_len && offset < 0x80000000u)
- return;
- if (tlen != last_tick_len) {
- t2x = mulhdu(tlen << TICKLEN_SHIFT, ticklen_to_xs);
- last_tick_len = tlen;
- } else
- t2x = do_gtod.varp->tb_to_xs;
- new_stamp_xsec = (u64) xtime.tv_nsec * XSEC_PER_SEC;
- do_div(new_stamp_xsec, 1000000000);
- new_stamp_xsec += (u64) xtime.tv_sec * XSEC_PER_SEC;
-
- ++vdso_data->tb_update_count;
- smp_mb();
-
- /*
- * Make sure time doesn't go backwards for userspace gettimeofday.
- */
- tb = get_tb();
- varp = do_gtod.varp;
- xsec_old = mulhdu(tb - varp->tb_orig_stamp, varp->tb_to_xs)
- + varp->stamp_xsec;
- xsec_new = mulhdu(tb - cur_tb, t2x) + new_stamp_xsec;
- if (xsec_new < xsec_old)
- new_stamp_xsec += xsec_old - xsec_new;
-
- update_gtod(cur_tb, new_stamp_xsec, t2x);
-}
-
#ifdef CONFIG_SMP
-unsigned long profile_pc(struct pt_regs *regs)
+unsigned long notrace profile_pc(struct pt_regs *regs)
{
unsigned long pc = instruction_pointer(regs);
@@ -610,11 +457,7 @@ static void iSeries_tb_recal(void)
tb_ticks_per_sec = new_tb_ticks_per_sec;
calc_cputime_factors();
div128_by_32( XSEC_PER_SEC, 0, tb_ticks_per_sec, &divres );
- do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
tb_to_xs = divres.result_low;
- do_gtod.varp->tb_to_xs = tb_to_xs;
- vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
- vdso_data->tb_to_xs = tb_to_xs;
}
else {
printk( "Titan recalibrate: FAILED (difference > 4 percent)\n"
@@ -781,81 +624,6 @@ unsigned long long sched_clock(void)
return mulhdu(get_tb(), tb_to_ns_scale) << tb_to_ns_shift;
}
-int do_settimeofday(struct timespec *tv)
-{
- time_t wtm_sec, new_sec = tv->tv_sec;
- long wtm_nsec, new_nsec = tv->tv_nsec;
- unsigned long flags;
- u64 new_xsec;
- unsigned long tb_delta;
-
- if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
- return -EINVAL;
-
- write_seqlock_irqsave(&xtime_lock, flags);
-
- /*
- * Updating the RTC is not the job of this code. If the time is
- * stepped under NTP, the RTC will be updated after STA_UNSYNC
- * is cleared. Tools like clock/hwclock either copy the RTC
- * to the system time, in which case there is no point in writing
- * to the RTC again, or write to the RTC but then they don't call
- * settimeofday to perform this operation.
- */
-#ifdef CONFIG_PPC_ISERIES
- if (first_settimeofday) {
- iSeries_tb_recal();
- first_settimeofday = 0;
- }
-#endif
-
- /* Make userspace gettimeofday spin until we're done. */
- ++vdso_data->tb_update_count;
- smp_mb();
-
- /*
- * Subtract off the number of nanoseconds since the
- * beginning of the last tick.
- * Note that since we don't increment jiffies_64 anywhere other
- * than in do_timer (since we don't have a lost tick problem),
- * wall_jiffies will always be the same as jiffies,
- * and therefore the (jiffies - wall_jiffies) computation
- * has been removed.
- */
- tb_delta = tb_ticks_since(tb_last_jiffy);
- tb_delta = mulhdu(tb_delta, do_gtod.varp->tb_to_xs); /* in xsec */
- new_nsec -= SCALE_XSEC(tb_delta, 1000000000);
-
- wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - new_sec);
- wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - new_nsec);
-
- set_normalized_timespec(&xtime, new_sec, new_nsec);
- set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
-
- /* In case of a large backwards jump in time with NTP, we want the
- * clock to be updated as soon as the PLL is again in lock.
- */
- last_rtc_update = new_sec - 658;
-
- ntp_clear();
-
- new_xsec = xtime.tv_nsec;
- if (new_xsec != 0) {
- new_xsec *= XSEC_PER_SEC;
- do_div(new_xsec, NSEC_PER_SEC);
- }
- new_xsec += (u64)xtime.tv_sec * XSEC_PER_SEC;
- update_gtod(tb_last_jiffy, new_xsec, do_gtod.varp->tb_to_xs);
-
- vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
- vdso_data->tz_dsttime = sys_tz.tz_dsttime;
-
- write_sequnlock_irqrestore(&xtime_lock, flags);
- clock_was_set();
- return 0;
-}
-
-EXPORT_SYMBOL(do_settimeofday);
static int __init get_freq(char *name, int cells, unsigned long *val)
{
@@ -1024,20 +792,6 @@ void __init time_init(void)
xtime.tv_sec = tm;
xtime.tv_nsec = 0;
- do_gtod.varp = &do_gtod.vars[0];
- do_gtod.var_idx = 0;
- do_gtod.varp->tb_orig_stamp = tb_last_jiffy;
- __get_cpu_var(last_jiffy) = tb_last_jiffy;
- do_gtod.varp->stamp_xsec = (u64) xtime.tv_sec * XSEC_PER_SEC;
- do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
- do_gtod.varp->tb_to_xs = tb_to_xs;
- do_gtod.tb_to_us = tb_to_us;
-
- vdso_data->tb_orig_stamp = tb_last_jiffy;
- vdso_data->tb_update_count = 0;
- vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
- vdso_data->stamp_xsec = (u64) xtime.tv_sec * XSEC_PER_SEC;
- vdso_data->tb_to_xs = tb_to_xs;
time_freq = 0;
@@ -1050,7 +804,6 @@ void __init time_init(void)
set_dec(tb_ticks_per_jiffy);
}
-
#define FEBRUARY 2
#define STARTOFTIME 1970
#define SECDAY 86400L
@@ -1195,3 +948,36 @@ void div128_by_32(u64 dividend_high, u64
dr->result_low = ((u64)y << 32) + z;
}
+
+
+/* powerpc clocksource code */
+
+#include
+static cycle_t timebase_read(void)
+{
+ return (cycle_t)get_tb();
+}
+
+struct clocksource clocksource_timebase = {
+ .name = "timebase",
+ .rating = 200,
+ .read = timebase_read,
+ .mask = (cycle_t)-1,
+ .mult = 0,
+ .shift = 22,
+};
+
+
+/* XXX - this should be calculated or properly externed! */
+static int __init init_timebase_clocksource(void)
+{
+ if (__USE_RTC())
+ return -ENODEV;
+
+ clocksource_timebase.mult = clocksource_hz2mult(tb_ticks_per_sec,
+ clocksource_timebase.shift);
+ return clocksource_register(&clocksource_timebase);
+}
+
+module_init(init_timebase_clocksource);
+
Index: linux/arch/powerpc/kernel/traps.c
===================================================================
--- linux.orig/arch/powerpc/kernel/traps.c
+++ linux/arch/powerpc/kernel/traps.c
@@ -93,7 +93,7 @@ EXPORT_SYMBOL(unregister_die_notifier);
* Trap & Exception support
*/
-static DEFINE_SPINLOCK(die_lock);
+static DEFINE_RAW_SPINLOCK(die_lock);
int die(const char *str, struct pt_regs *regs, long err)
{
@@ -164,6 +164,11 @@ void _exception(int signr, struct pt_reg
return;
}
+#ifdef CONFIG_PREEMPT_RT
+ local_irq_enable();
+ preempt_check_resched();
+#endif
+
memset(&info, 0, sizeof(info));
info.si_signo = signr;
info.si_code = code;
Index: linux/arch/powerpc/lib/locks.c
===================================================================
--- linux.orig/arch/powerpc/lib/locks.c
+++ linux/arch/powerpc/lib/locks.c
@@ -24,7 +24,7 @@
#include
#include
-void __spin_yield(raw_spinlock_t *lock)
+void __spin_yield(__raw_spinlock_t *lock)
{
unsigned int lock_value, holder_cpu, yield_count;
@@ -79,7 +79,7 @@ void __rw_yield(raw_rwlock_t *rw)
}
#endif
-void __raw_spin_unlock_wait(raw_spinlock_t *lock)
+void __raw_spin_unlock_wait(__raw_spinlock_t *lock)
{
while (lock->slock) {
HMT_low();
Index: linux/arch/powerpc/mm/fault.c
===================================================================
--- linux.orig/arch/powerpc/mm/fault.c
+++ linux/arch/powerpc/mm/fault.c
@@ -149,8 +149,8 @@ static void do_dabr(struct pt_regs *regs
* The return value is 0 if the fault was handled, or the signal
* number if this is a kernel fault that can't be handled here.
*/
-int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
- unsigned long error_code)
+int __kprobes notrace do_page_fault(struct pt_regs *regs,
+ unsigned long address, unsigned long error_code)
{
struct vm_area_struct * vma;
struct mm_struct *mm = current->mm;
Index: linux/arch/powerpc/mm/init_32.c
===================================================================
--- linux.orig/arch/powerpc/mm/init_32.c
+++ linux/arch/powerpc/mm/init_32.c
@@ -56,7 +56,7 @@
#endif
#define MAX_LOW_MEM CONFIG_LOWMEM_SIZE
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers);
unsigned long total_memory;
unsigned long total_lowmem;
Index: linux/arch/powerpc/mm/tlb_64.c
===================================================================
--- linux.orig/arch/powerpc/mm/tlb_64.c
+++ linux/arch/powerpc/mm/tlb_64.c
@@ -37,7 +37,7 @@ DEFINE_PER_CPU(struct ppc64_tlb_batch, p
/* This is declared as we are using the more or less generic
* include/asm-powerpc/tlb.h file -- tgall
*/
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers);
DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
unsigned long pte_freelist_forced_free;
Index: linux/arch/powerpc/platforms/cell/smp.c
===================================================================
--- linux.orig/arch/powerpc/platforms/cell/smp.c
+++ linux/arch/powerpc/platforms/cell/smp.c
@@ -133,7 +133,7 @@ static void __devinit smp_iic_setup_cpu(
iic_setup_cpu();
}
-static DEFINE_SPINLOCK(timebase_lock);
+static DEFINE_RAW_SPINLOCK(timebase_lock);
static unsigned long timebase = 0;
static void __devinit cell_give_timebase(void)
Index: linux/arch/powerpc/platforms/chrp/smp.c
===================================================================
--- linux.orig/arch/powerpc/platforms/chrp/smp.c
+++ linux/arch/powerpc/platforms/chrp/smp.c
@@ -45,7 +45,7 @@ static void __devinit smp_chrp_setup_cpu
mpic_setup_this_cpu();
}
-static DEFINE_SPINLOCK(timebase_lock);
+static DEFINE_RAW_SPINLOCK(timebase_lock);
static unsigned int timebase_upper = 0, timebase_lower = 0;
void __devinit smp_chrp_give_timebase(void)
Index: linux/arch/powerpc/platforms/chrp/time.c
===================================================================
--- linux.orig/arch/powerpc/platforms/chrp/time.c
+++ linux/arch/powerpc/platforms/chrp/time.c
@@ -27,7 +27,7 @@
#include
#include
-extern spinlock_t rtc_lock;
+extern raw_spinlock_t rtc_lock;
static int nvram_as1 = NVRAM_AS1;
static int nvram_as0 = NVRAM_AS0;
Index: linux/arch/powerpc/platforms/iseries/setup.c
===================================================================
--- linux.orig/arch/powerpc/platforms/iseries/setup.c
+++ linux/arch/powerpc/platforms/iseries/setup.c
@@ -594,12 +594,14 @@ static void yield_shared_processor(void)
static void iseries_shared_idle(void)
{
while (1) {
- while (!need_resched() && !hvlpevent_is_pending()) {
+ while (!need_resched() && !need_resched_delayed()
+ && !hvlpevent_is_pending()) {
local_irq_disable();
ppc64_runlatch_off();
/* Recheck with irqs off */
- if (!need_resched() && !hvlpevent_is_pending())
+ if (!need_resched() && !need_resched_delayed()
+ && !hvlpevent_is_pending())
yield_shared_processor();
HMT_medium();
Index: linux/arch/powerpc/platforms/powermac/feature.c
===================================================================
--- linux.orig/arch/powerpc/platforms/powermac/feature.c
+++ linux/arch/powerpc/platforms/powermac/feature.c
@@ -59,7 +59,7 @@ extern struct device_node *k2_skiplist[2
* We use a single global lock to protect accesses. Each driver has
* to take care of its own locking
*/
-DEFINE_SPINLOCK(feature_lock);
+DEFINE_RAW_SPINLOCK(feature_lock);
#define LOCK(flags) spin_lock_irqsave(&feature_lock, flags);
#define UNLOCK(flags) spin_unlock_irqrestore(&feature_lock, flags);
Index: linux/arch/powerpc/platforms/powermac/nvram.c
===================================================================
--- linux.orig/arch/powerpc/platforms/powermac/nvram.c
+++ linux/arch/powerpc/platforms/powermac/nvram.c
@@ -80,7 +80,7 @@ static int is_core_99;
static int core99_bank = 0;
static int nvram_partitions[3];
// XXX Turn that into a sem
-static DEFINE_SPINLOCK(nv_lock);
+static DEFINE_RAW_SPINLOCK(nv_lock);
static int (*core99_write_bank)(int bank, u8* datas);
static int (*core99_erase_bank)(int bank);
Index: linux/arch/powerpc/platforms/powermac/pic.c
===================================================================
--- linux.orig/arch/powerpc/platforms/powermac/pic.c
+++ linux/arch/powerpc/platforms/powermac/pic.c
@@ -63,7 +63,7 @@ static int max_irqs;
static int max_real_irqs;
static u32 level_mask[4];
-static DEFINE_SPINLOCK(pmac_pic_lock);
+static DEFINE_RAW_SPINLOCK(pmac_pic_lock);
#define NR_MASK_WORDS ((NR_IRQS + 31) / 32)
static unsigned long ppc_lost_interrupts[NR_MASK_WORDS];
Index: linux/arch/powerpc/platforms/pseries/setup.c
===================================================================
--- linux.orig/arch/powerpc/platforms/pseries/setup.c
+++ linux/arch/powerpc/platforms/pseries/setup.c
@@ -483,7 +483,8 @@ static void pseries_dedicated_idle_sleep
set_thread_flag(TIF_POLLING_NRFLAG);
while (get_tb() < start_snooze) {
- if (need_resched() || cpu_is_offline(cpu))
+ if (need_resched() || need_resched_delayed() ||
+ cpu_is_offline(cpu))
goto out;
ppc64_runlatch_off();
HMT_low();
@@ -494,7 +495,8 @@ static void pseries_dedicated_idle_sleep
clear_thread_flag(TIF_POLLING_NRFLAG);
smp_mb();
local_irq_disable();
- if (need_resched() || cpu_is_offline(cpu))
+ if (need_resched() || need_resched_delayed() ||
+ cpu_is_offline(cpu))
goto out;
}
Index: linux/arch/powerpc/platforms/pseries/smp.c
===================================================================
--- linux.orig/arch/powerpc/platforms/pseries/smp.c
+++ linux/arch/powerpc/platforms/pseries/smp.c
@@ -344,7 +344,7 @@ static void __devinit smp_xics_setup_cpu
}
#endif /* CONFIG_XICS */
-static DEFINE_SPINLOCK(timebase_lock);
+static DEFINE_RAW_SPINLOCK(timebase_lock);
static unsigned long timebase = 0;
static void __devinit pSeries_give_timebase(void)
Index: linux/arch/ppc/8260_io/enet.c
===================================================================
--- linux.orig/arch/ppc/8260_io/enet.c
+++ linux/arch/ppc/8260_io/enet.c
@@ -116,7 +116,7 @@ struct scc_enet_private {
scc_t *sccp;
struct net_device_stats stats;
uint tx_full;
- spinlock_t lock;
+ raw_spinlock_t lock;
};
static int scc_enet_open(struct net_device *dev);
Index: linux/arch/ppc/8260_io/fcc_enet.c
===================================================================
--- linux.orig/arch/ppc/8260_io/fcc_enet.c
+++ linux/arch/ppc/8260_io/fcc_enet.c
@@ -376,7 +376,7 @@ struct fcc_enet_private {
volatile fcc_enet_t *ep;
struct net_device_stats stats;
uint tx_free;
- spinlock_t lock;
+ raw_spinlock_t lock;
#ifdef CONFIG_USE_MDIO
uint phy_id;
Index: linux/arch/ppc/8xx_io/commproc.c
===================================================================
--- linux.orig/arch/ppc/8xx_io/commproc.c
+++ linux/arch/ppc/8xx_io/commproc.c
@@ -356,7 +356,7 @@ cpm_setbrg(uint brg, uint rate)
/*
* dpalloc / dpfree bits.
*/
-static spinlock_t cpm_dpmem_lock;
+static raw_spinlock_t cpm_dpmem_lock;
/*
* 16 blocks should be enough to satisfy all requests
* until the memory subsystem goes up...
Index: linux/arch/ppc/8xx_io/enet.c
===================================================================
--- linux.orig/arch/ppc/8xx_io/enet.c
+++ linux/arch/ppc/8xx_io/enet.c
@@ -143,7 +143,7 @@ struct scc_enet_private {
unsigned char *rx_vaddr[RX_RING_SIZE];
struct net_device_stats stats;
uint tx_full;
- spinlock_t lock;
+ raw_spinlock_t lock;
};
static int scc_enet_open(struct net_device *dev);
Index: linux/arch/ppc/8xx_io/fec.c
===================================================================
--- linux.orig/arch/ppc/8xx_io/fec.c
+++ linux/arch/ppc/8xx_io/fec.c
@@ -164,7 +164,7 @@ struct fec_enet_private {
struct net_device_stats stats;
uint tx_full;
- spinlock_t lock;
+ raw_spinlock_t lock;
#ifdef CONFIG_USE_MDIO
uint phy_id;
Index: linux/arch/ppc/Kconfig
===================================================================
--- linux.orig/arch/ppc/Kconfig
+++ linux/arch/ppc/Kconfig
@@ -12,13 +12,6 @@ config GENERIC_HARDIRQS
bool
default y
-config RWSEM_GENERIC_SPINLOCK
- bool
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
- default y
-
config GENERIC_HWEIGHT
bool
default y
@@ -955,6 +948,18 @@ config HIGHMEM
source kernel/Kconfig.hz
source kernel/Kconfig.preempt
+
+config RWSEM_GENERIC_SPINLOCK
+ bool
+ default y
+
+config ASM_SEMAPHORES
+ bool
+ default y
+
+config RWSEM_XCHGADD_ALGORITHM
+ bool
+
source "mm/Kconfig"
source "fs/Kconfig.binfmt"
Index: linux/arch/ppc/boot/Makefile
===================================================================
--- linux.orig/arch/ppc/boot/Makefile
+++ linux/arch/ppc/boot/Makefile
@@ -14,6 +14,15 @@
#
CFLAGS += -fno-builtin -D__BOOTER__ -Iarch/$(ARCH)/boot/include
+
+ifdef CONFIG_MCOUNT
+# do not trace the boot loader
+nullstring :=
+space := $(nullstring) # end of the line
+pg_flag = $(nullstring) -pg # end of the line
+CFLAGS := $(subst ${pg_flag},${space},${CFLAGS})
+endif
+
HOSTCFLAGS += -Iarch/$(ARCH)/boot/include
BOOT_TARGETS = zImage zImage.initrd znetboot znetboot.initrd
Index: linux/arch/ppc/kernel/dma-mapping.c
===================================================================
--- linux.orig/arch/ppc/kernel/dma-mapping.c
+++ linux/arch/ppc/kernel/dma-mapping.c
@@ -70,7 +70,7 @@ int map_page(unsigned long va, phys_addr
* This is the page table (2MB) covering uncached, DMA consistent allocations
*/
static pte_t *consistent_pte;
-static DEFINE_SPINLOCK(consistent_lock);
+static DEFINE_RAW_SPINLOCK(consistent_lock);
/*
* VM region handling support.
Index: linux/arch/ppc/kernel/entry.S
===================================================================
--- linux.orig/arch/ppc/kernel/entry.S
+++ linux/arch/ppc/kernel/entry.S
@@ -856,7 +856,7 @@ load_dbcr0:
#endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
do_work: /* r10 contains MSR_KERNEL here */
- andi. r0,r9,_TIF_NEED_RESCHED
+ andi. r0,r9,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED)
beq do_user_signal
do_resched: /* r10 contains MSR_KERNEL here */
@@ -870,7 +870,7 @@ recheck:
MTMSRD(r10) /* disable interrupts */
rlwinm r9,r1,0,0,18
lwz r9,TI_FLAGS(r9)
- andi. r0,r9,_TIF_NEED_RESCHED
+ andi. r0,r9,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED)
bne- do_resched
andi. r0,r9,_TIF_SIGPENDING
beq restore_user
Index: linux/arch/ppc/kernel/semaphore.c
===================================================================
--- linux.orig/arch/ppc/kernel/semaphore.c
+++ linux/arch/ppc/kernel/semaphore.c
@@ -29,7 +29,7 @@
* sem->count = tmp;
* return old_count;
*/
-static inline int __sem_update_count(struct semaphore *sem, int incr)
+static inline int __sem_update_count(struct compat_semaphore *sem, int incr)
{
int old_count, tmp;
@@ -48,7 +48,7 @@ static inline int __sem_update_count(str
return old_count;
}
-void __up(struct semaphore *sem)
+void __compat_up(struct compat_semaphore *sem)
{
/*
* Note that we incremented count in up() before we came here,
@@ -70,7 +70,7 @@ void __up(struct semaphore *sem)
* Thus it is only when we decrement count from some value > 0
* that we have actually got the semaphore.
*/
-void __sched __down(struct semaphore *sem)
+void __sched __compat_down(struct compat_semaphore *sem)
{
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
@@ -100,7 +100,7 @@ void __sched __down(struct semaphore *se
wake_up(&sem->wait);
}
-int __sched __down_interruptible(struct semaphore * sem)
+int __sched __compat_down_interruptible(struct compat_semaphore * sem)
{
int retval = 0;
struct task_struct *tsk = current;
@@ -129,3 +129,8 @@ int __sched __down_interruptible(struct
wake_up(&sem->wait);
return retval;
}
+
+int compat_sem_is_locked(struct compat_semaphore *sem)
+{
+ return (int) atomic_read(&sem->count) < 0;
+}
Index: linux/arch/ppc/kernel/smp.c
===================================================================
--- linux.orig/arch/ppc/kernel/smp.c
+++ linux/arch/ppc/kernel/smp.c
@@ -137,6 +137,16 @@ void smp_send_reschedule(int cpu)
smp_message_pass(cpu, PPC_MSG_RESCHEDULE);
}
+/*
+ * this function sends a 'reschedule' IPI to all other CPUs.
+ * This is used when RT tasks are starving and other CPUs
+ * might be able to run them:
+ */
+void smp_send_reschedule_allbutself(void)
+{
+ smp_message_pass(MSG_ALL_BUT_SELF, PPC_MSG_RESCHEDULE, 0, 0);
+}
+
#ifdef CONFIG_XMON
void smp_send_xmon_break(int cpu)
{
@@ -161,7 +171,7 @@ void smp_send_stop(void)
* static memory requirements. It also looks cleaner.
* Stolen from the i386 version.
*/
-static DEFINE_SPINLOCK(call_lock);
+static DEFINE_RAW_SPINLOCK(call_lock);
static struct call_data_struct {
void (*func) (void *info);
Index: linux/arch/ppc/kernel/time.c
===================================================================
--- linux.orig/arch/ppc/kernel/time.c
+++ linux/arch/ppc/kernel/time.c
@@ -65,6 +65,9 @@
#include
+unsigned long cpu_khz; /* Detected as we calibrate the TSC */
+EXPORT_SYMBOL(cpu_khz);
+
unsigned long disarm_decr[NR_CPUS];
extern struct timezone sys_tz;
@@ -103,7 +106,7 @@ static inline int tb_delta(unsigned *jif
}
#ifdef CONFIG_SMP
-unsigned long profile_pc(struct pt_regs *regs)
+unsigned long notrace profile_pc(struct pt_regs *regs)
{
unsigned long pc = instruction_pointer(regs);
Index: linux/arch/ppc/kernel/traps.c
===================================================================
--- linux.orig/arch/ppc/kernel/traps.c
+++ linux/arch/ppc/kernel/traps.c
@@ -71,7 +71,7 @@ void (*debugger_fault_handler)(struct pt
* Trap & Exception support
*/
-DEFINE_SPINLOCK(die_lock);
+DEFINE_RAW_SPINLOCK(die_lock);
int die(const char * str, struct pt_regs * fp, long err)
{
@@ -106,6 +106,10 @@ void _exception(int signr, struct pt_reg
debugger(regs);
die("Exception in kernel mode", regs, signr);
}
+#ifdef CONFIG_PREEMPT_RT
+ local_irq_enable();
+ preempt_check_resched();
+#endif
info.si_signo = signr;
info.si_errno = 0;
info.si_code = code;
Index: linux/arch/ppc/lib/locks.c
===================================================================
--- linux.orig/arch/ppc/lib/locks.c
+++ linux/arch/ppc/lib/locks.c
@@ -42,7 +42,7 @@ static inline unsigned long __spin_trylo
return ret;
}
-void _raw_spin_lock(spinlock_t *lock)
+void __raw_spin_lock(raw_spinlock_t *lock)
{
int cpu = smp_processor_id();
unsigned int stuck = INIT_STUCK;
@@ -62,9 +62,9 @@ void _raw_spin_lock(spinlock_t *lock)
lock->owner_pc = (unsigned long)__builtin_return_address(0);
lock->owner_cpu = cpu;
}
-EXPORT_SYMBOL(_raw_spin_lock);
+EXPORT_SYMBOL(__raw_spin_lock);
-int _raw_spin_trylock(spinlock_t *lock)
+int __raw_spin_trylock(raw_spinlock_t *lock)
{
if (__spin_trylock(&lock->lock))
return 0;
@@ -72,9 +72,9 @@ int _raw_spin_trylock(spinlock_t *lock)
lock->owner_pc = (unsigned long)__builtin_return_address(0);
return 1;
}
-EXPORT_SYMBOL(_raw_spin_trylock);
+EXPORT_SYMBOL(__raw_spin_trylock);
-void _raw_spin_unlock(spinlock_t *lp)
+void __raw_spin_unlock(raw_spinlock_t *lp)
{
if ( !lp->lock )
printk("_spin_unlock(%p): no lock cpu %d curr PC %p %s/%d\n",
@@ -88,13 +88,13 @@ void _raw_spin_unlock(spinlock_t *lp)
wmb();
lp->lock = 0;
}
-EXPORT_SYMBOL(_raw_spin_unlock);
+EXPORT_SYMBOL(__raw_spin_unlock);
/*
* For rwlocks, zero is unlocked, -1 is write-locked,
* positive is read-locked.
*/
-static __inline__ int __read_trylock(rwlock_t *rw)
+static __inline__ int __read_trylock(raw_rwlock_t *rw)
{
signed int tmp;
@@ -114,13 +114,13 @@ static __inline__ int __read_trylock(rwl
return tmp;
}
-int _raw_read_trylock(rwlock_t *rw)
+int __raw_read_trylock(raw_rwlock_t *rw)
{
return __read_trylock(rw) > 0;
}
-EXPORT_SYMBOL(_raw_read_trylock);
+EXPORT_SYMBOL(__raw_read_trylock);
-void _raw_read_lock(rwlock_t *rw)
+void __raw_read_lock(rwlock_t *rw)
{
unsigned int stuck;
@@ -135,9 +135,9 @@ void _raw_read_lock(rwlock_t *rw)
}
}
}
-EXPORT_SYMBOL(_raw_read_lock);
+EXPORT_SYMBOL(__raw_read_lock);
-void _raw_read_unlock(rwlock_t *rw)
+void __raw_read_unlock(raw_rwlock_t *rw)
{
if ( rw->lock == 0 )
printk("_read_unlock(): %s/%d (nip %08lX) lock %d\n",
@@ -146,9 +146,9 @@ void _raw_read_unlock(rwlock_t *rw)
wmb();
atomic_dec((atomic_t *) &(rw)->lock);
}
-EXPORT_SYMBOL(_raw_read_unlock);
+EXPORT_SYMBOL(__raw_read_unlock);
-void _raw_write_lock(rwlock_t *rw)
+void __raw_write_lock(raw_rwlock_t *rw)
{
unsigned int stuck;
@@ -164,18 +164,18 @@ void _raw_write_lock(rwlock_t *rw)
}
wmb();
}
-EXPORT_SYMBOL(_raw_write_lock);
+EXPORT_SYMBOL(__raw_write_lock);
-int _raw_write_trylock(rwlock_t *rw)
+int __raw_write_trylock(raw_rwlock_t *rw)
{
if (cmpxchg(&rw->lock, 0, -1) != 0)
return 0;
wmb();
return 1;
}
-EXPORT_SYMBOL(_raw_write_trylock);
+EXPORT_SYMBOL(__raw_write_trylock);
-void _raw_write_unlock(rwlock_t *rw)
+void __raw_write_unlock(raw_rwlock_t *rw)
{
if (rw->lock >= 0)
printk("_write_lock(): %s/%d (nip %08lX) lock %d\n",
@@ -184,6 +184,6 @@ void _raw_write_unlock(rwlock_t *rw)
wmb();
rw->lock = 0;
}
-EXPORT_SYMBOL(_raw_write_unlock);
+EXPORT_SYMBOL(__raw_write_unlock);
#endif
Index: linux/arch/ppc/mm/fault.c
===================================================================
--- linux.orig/arch/ppc/mm/fault.c
+++ linux/arch/ppc/mm/fault.c
@@ -89,7 +89,7 @@ static int store_updates_sp(struct pt_re
* the error_code parameter is ESR for a data fault, 0 for an instruction
* fault.
*/
-int do_page_fault(struct pt_regs *regs, unsigned long address,
+int notrace do_page_fault(struct pt_regs *regs, unsigned long address,
unsigned long error_code)
{
struct vm_area_struct * vma;
Index: linux/arch/ppc/mm/init.c
===================================================================
--- linux.orig/arch/ppc/mm/init.c
+++ linux/arch/ppc/mm/init.c
@@ -55,7 +55,7 @@
#endif
#define MAX_LOW_MEM CONFIG_LOWMEM_SIZE
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers);
unsigned long total_memory;
unsigned long total_lowmem;
Index: linux/arch/ppc/platforms/apus_setup.c
===================================================================
--- linux.orig/arch/ppc/platforms/apus_setup.c
+++ linux/arch/ppc/platforms/apus_setup.c
@@ -275,6 +275,7 @@ void apus_calibrate_decr(void)
freq/1000000, freq%1000000);
tb_ticks_per_jiffy = freq / HZ;
tb_to_us = mulhwu_scale_factor(freq, 1000000);
+ cpu_khz = freq / 1000;
__bus_speed = bus_speed;
__speed_test_failed = speed_test_failed;
Index: linux/arch/ppc/platforms/ev64260.c
===================================================================
--- linux.orig/arch/ppc/platforms/ev64260.c
+++ linux/arch/ppc/platforms/ev64260.c
@@ -550,6 +550,7 @@ ev64260_calibrate_decr(void)
tb_ticks_per_jiffy = freq / HZ;
tb_to_us = mulhwu_scale_factor(freq, 1000000);
+ cpu_khz = freq / 1000;
return;
}
Index: linux/arch/ppc/platforms/gemini_setup.c
===================================================================
--- linux.orig/arch/ppc/platforms/gemini_setup.c
+++ linux/arch/ppc/platforms/gemini_setup.c
@@ -459,6 +459,7 @@ void __init gemini_calibrate_decr(void)
divisor = 4;
tb_ticks_per_jiffy = freq / HZ / divisor;
tb_to_us = mulhwu_scale_factor(freq/divisor, 1000000);
+ cpu_khz = (freq / divisor) / 1000;
}
unsigned long __init gemini_find_end_of_memory(void)
Index: linux/arch/ppc/platforms/hdpu.c
===================================================================
--- linux.orig/arch/ppc/platforms/hdpu.c
+++ linux/arch/ppc/platforms/hdpu.c
@@ -55,7 +55,7 @@ static void parse_bootinfo(unsigned long
static void hdpu_set_l1pe(void);
static void hdpu_cpustate_set(unsigned char new_state);
#ifdef CONFIG_SMP
-static DEFINE_SPINLOCK(timebase_lock);
+static DEFINE_RAW_SPINLOCK(timebase_lock);
static unsigned int timebase_upper = 0, timebase_lower = 0;
extern int smp_tb_synchronized;
Index: linux/arch/ppc/platforms/powerpmc250.c
===================================================================
--- linux.orig/arch/ppc/platforms/powerpmc250.c
+++ linux/arch/ppc/platforms/powerpmc250.c
@@ -163,6 +163,7 @@ powerpmc250_calibrate_decr(void)
tb_ticks_per_jiffy = freq / (HZ * divisor);
tb_to_us = mulhwu_scale_factor(freq/divisor, 1000000);
+ cpu_khz = (freq / divisor) / 1000;
}
static void
Index: linux/arch/ppc/platforms/prep_setup.c
===================================================================
--- linux.orig/arch/ppc/platforms/prep_setup.c
+++ linux/arch/ppc/platforms/prep_setup.c
@@ -940,6 +940,7 @@ prep_calibrate_decr(void)
(freq/divisor)/1000000,
(freq/divisor)%1000000);
tb_to_us = mulhwu_scale_factor(freq/divisor, 1000000);
+ cpu_khz = (freq / divisor) / 1000;
tb_ticks_per_jiffy = freq / HZ / divisor;
}
}
Index: linux/arch/ppc/platforms/prpmc750.c
===================================================================
--- linux.orig/arch/ppc/platforms/prpmc750.c
+++ linux/arch/ppc/platforms/prpmc750.c
@@ -268,6 +268,7 @@ static void __init prpmc750_calibrate_de
tb_ticks_per_jiffy = freq / (HZ * divisor);
tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000);
+ cpu_khz = (freq / divisor) / 1000;
}
static void prpmc750_restart(char *cmd)
Index: linux/arch/ppc/platforms/prpmc800.c
===================================================================
--- linux.orig/arch/ppc/platforms/prpmc800.c
+++ linux/arch/ppc/platforms/prpmc800.c
@@ -327,6 +327,7 @@ static void __init prpmc800_calibrate_de
tb_ticks_per_second = 100000000 / 4;
tb_ticks_per_jiffy = tb_ticks_per_second / HZ;
tb_to_us = mulhwu_scale_factor(tb_ticks_per_second, 1000000);
+ cpu_khz = tb_ticks_per_second / 1000;
return;
}
@@ -367,6 +368,7 @@ static void __init prpmc800_calibrate_de
tb_ticks_per_second = (tbl_end - tbl_start) * 2;
tb_ticks_per_jiffy = tb_ticks_per_second / HZ;
tb_to_us = mulhwu_scale_factor(tb_ticks_per_second, 1000000);
+ cpu_khz = tb_ticks_per_second / 1000;
}
static void prpmc800_restart(char *cmd)
Index: linux/arch/ppc/platforms/sbc82xx.c
===================================================================
--- linux.orig/arch/ppc/platforms/sbc82xx.c
+++ linux/arch/ppc/platforms/sbc82xx.c
@@ -65,7 +65,7 @@ static void sbc82xx_time_init(void)
static volatile char *sbc82xx_i8259_map;
static char sbc82xx_i8259_mask = 0xff;
-static DEFINE_SPINLOCK(sbc82xx_i8259_lock);
+static DEFINE_RAW_SPINLOCK(sbc82xx_i8259_lock);
static void sbc82xx_i8259_mask_and_ack_irq(unsigned int irq_nr)
{
Index: linux/arch/ppc/platforms/spruce.c
===================================================================
--- linux.orig/arch/ppc/platforms/spruce.c
+++ linux/arch/ppc/platforms/spruce.c
@@ -147,6 +147,7 @@ spruce_calibrate_decr(void)
freq = SPRUCE_BUS_SPEED;
tb_ticks_per_jiffy = freq / HZ / divisor;
tb_to_us = mulhwu_scale_factor(freq/divisor, 1000000);
+ cpu_khz = (freq / divisor) / 1000;
}
static int
Index: linux/arch/ppc/syslib/cpm2_common.c
===================================================================
--- linux.orig/arch/ppc/syslib/cpm2_common.c
+++ linux/arch/ppc/syslib/cpm2_common.c
@@ -114,7 +114,7 @@ cpm2_fastbrg(uint brg, uint rate, int di
/*
* dpalloc / dpfree bits.
*/
-static spinlock_t cpm_dpmem_lock;
+static raw_spinlock_t cpm_dpmem_lock;
/* 16 blocks should be enough to satisfy all requests
* until the memory subsystem goes up... */
static rh_block_t cpm_boot_dpmem_rh_block[16];
Index: linux/arch/ppc/syslib/ibm44x_common.c
===================================================================
--- linux.orig/arch/ppc/syslib/ibm44x_common.c
+++ linux/arch/ppc/syslib/ibm44x_common.c
@@ -63,6 +63,7 @@ void __init ibm44x_calibrate_decr(unsign
{
tb_ticks_per_jiffy = freq / HZ;
tb_to_us = mulhwu_scale_factor(freq, 1000000);
+ cpu_khz = freq / 1000;
/* Set the time base to zero */
mtspr(SPRN_TBWL, 0);
Index: linux/arch/ppc/syslib/m8260_setup.c
===================================================================
--- linux.orig/arch/ppc/syslib/m8260_setup.c
+++ linux/arch/ppc/syslib/m8260_setup.c
@@ -79,6 +79,7 @@ m8260_calibrate_decr(void)
divisor = 4;
tb_ticks_per_jiffy = freq / HZ / divisor;
tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000);
+ cpu_khz = (freq / divisor) / 1000;
}
/* The 8260 has an internal 1-second timer update register that
Index: linux/arch/ppc/syslib/m8xx_setup.c
===================================================================
--- linux.orig/arch/ppc/syslib/m8xx_setup.c
+++ linux/arch/ppc/syslib/m8xx_setup.c
@@ -218,6 +218,7 @@ void __init m8xx_calibrate_decr(void)
printk("Decrementer Frequency = %d/%d\n", freq, divisor);
tb_ticks_per_jiffy = freq / HZ / divisor;
tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000);
+ cpu_khz = (freq / divisor) / 1000;
/* Perform some more timer/timebase initialization. This used
* to be done elsewhere, but other changes caused it to get
Index: linux/arch/ppc/syslib/mpc52xx_setup.c
===================================================================
--- linux.orig/arch/ppc/syslib/mpc52xx_setup.c
+++ linux/arch/ppc/syslib/mpc52xx_setup.c
@@ -215,6 +215,7 @@ mpc52xx_calibrate_decr(void)
tb_ticks_per_jiffy = xlbfreq / HZ / divisor;
tb_to_us = mulhwu_scale_factor(xlbfreq / divisor, 1000000);
+ cpu_khz = (xlbfreq / divisor) / 1000;
}
Index: linux/arch/ppc/syslib/ocp.c
===================================================================
--- linux.orig/arch/ppc/syslib/ocp.c
+++ linux/arch/ppc/syslib/ocp.c
@@ -44,11 +44,11 @@
#include
#include
#include
+#include
#include
#include
#include
-#include
#include
//#define DBG(x) printk x
Index: linux/arch/ppc/syslib/open_pic.c
===================================================================
--- linux.orig/arch/ppc/syslib/open_pic.c
+++ linux/arch/ppc/syslib/open_pic.c
@@ -526,7 +526,7 @@ void openpic_reset_processor_phys(u_int
}
#if defined(CONFIG_SMP) || defined(CONFIG_PM)
-static DEFINE_SPINLOCK(openpic_setup_lock);
+static DEFINE_RAW_SPINLOCK(openpic_setup_lock);
#endif
#ifdef CONFIG_SMP
Index: linux/arch/ppc/syslib/open_pic2.c
===================================================================
--- linux.orig/arch/ppc/syslib/open_pic2.c
+++ linux/arch/ppc/syslib/open_pic2.c
@@ -380,7 +380,7 @@ static void openpic2_set_spurious(u_int
vec);
}
-static DEFINE_SPINLOCK(openpic2_setup_lock);
+static DEFINE_RAW_SPINLOCK(openpic2_setup_lock);
/*
* Initialize a timer interrupt (and disable it)
Index: linux/arch/ppc/syslib/ppc4xx_setup.c
===================================================================
--- linux.orig/arch/ppc/syslib/ppc4xx_setup.c
+++ linux/arch/ppc/syslib/ppc4xx_setup.c
@@ -172,6 +172,7 @@ ppc4xx_calibrate_decr(void)
freq = bip->bi_tbfreq;
tb_ticks_per_jiffy = freq / HZ;
tb_to_us = mulhwu_scale_factor(freq, 1000000);
+ cpu_khz = freq / 1000;
/* Set the time base to zero.
** At 200 Mhz, time base will rollover in ~2925 years.
Index: linux/arch/ppc/syslib/ppc85xx_setup.c
===================================================================
--- linux.orig/arch/ppc/syslib/ppc85xx_setup.c
+++ linux/arch/ppc/syslib/ppc85xx_setup.c
@@ -57,6 +57,7 @@ mpc85xx_calibrate_decr(void)
divisor = 8;
tb_ticks_per_jiffy = freq / divisor / HZ;
tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000);
+ cpu_khz = (freq / divisor) / 1000;
/* Set the time base to zero */
mtspr(SPRN_TBWL, 0);
Index: linux/arch/ppc/syslib/todc_time.c
===================================================================
--- linux.orig/arch/ppc/syslib/todc_time.c
+++ linux/arch/ppc/syslib/todc_time.c
@@ -506,6 +506,7 @@ todc_calibrate_decr(void)
tb_ticks_per_jiffy = freq / HZ;
tb_to_us = mulhwu_scale_factor(freq, 1000000);
+ cpu_khz = freq / 1000;
return;
}
Index: linux/arch/sparc64/Kconfig
===================================================================
--- linux.orig/arch/sparc64/Kconfig
+++ linux/arch/sparc64/Kconfig
@@ -26,7 +26,7 @@ config MMU
bool
default y
-config TIME_INTERPOLATION
+config GENERIC_TIME
bool
default y
Index: linux/arch/sparc64/defconfig
===================================================================
--- linux.orig/arch/sparc64/defconfig
+++ linux/arch/sparc64/defconfig
@@ -7,7 +7,7 @@ CONFIG_SPARC=y
CONFIG_SPARC64=y
CONFIG_64BIT=y
CONFIG_MMU=y
-CONFIG_TIME_INTERPOLATION=y
+CONFIG_GENERIC_TIME=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
CONFIG_SPARC64_PAGE_SIZE_8KB=y
# CONFIG_SPARC64_PAGE_SIZE_64KB is not set
Index: linux/arch/sparc64/kernel/time.c
===================================================================
--- linux.orig/arch/sparc64/kernel/time.c
+++ linux/arch/sparc64/kernel/time.c
@@ -31,6 +31,7 @@
#include
#include
#include
+#include
#include
#include
@@ -621,7 +622,7 @@ static void __init set_system_time(void)
if (!mregs && !dregs) {
prom_printf("Something wrong, clock regs not mapped yet.\n");
prom_halt();
- }
+ }
if (mregs) {
spin_lock_irq(&mostek_lock);
@@ -821,7 +822,7 @@ static int __devinit clock_probe(struct
}
set_system_time();
-
+
local_irq_restore(flags);
return 0;
@@ -976,22 +977,33 @@ static struct notifier_block sparc64_cpu
#endif /* CONFIG_CPU_FREQ */
-static struct time_interpolator sparc64_cpu_interpolator = {
- .source = TIME_SOURCE_CPU,
- .shift = 16,
- .mask = 0xffffffffffffffffLL
+static cycle_t read_itc(void)
+{
+ return (cycle_t)get_cycles());
+}
+
+static struct clocksource clocksource_sparc64_itc = {
+ .name = "sparc64_itc",
+ .rating = 300,
+ .read = read_itc,
+ .mask = 0xffffffffffffffffLL,
+ .mult = 0, /*to be caluclated*/
+ .shift = 16,
+ .is_continuous = 1,
};
+
/* The quotient formula is taken from the IA64 port. */
#define SPARC64_NSEC_PER_CYC_SHIFT 30UL
void __init time_init(void)
{
unsigned long clock = sparc64_init_timers();
- sparc64_cpu_interpolator.frequency = clock;
- register_time_interpolator(&sparc64_cpu_interpolator);
+ clocksource_sparc64_itc.mult = clocksource_hz2mult(clock,
+ clocksource_sparc64_itc.shift);
+ clocksource_register(&clocksource_sparc64_itc);
- /* Now that the interpolator is registered, it is
+ /* Now that the clocksource is registered, it is
* safe to start the timer ticking.
*/
sparc64_start_timers();
@@ -1026,11 +1038,11 @@ static int set_rtc_mmss(unsigned long no
unsigned long flags;
u8 tmp;
- /*
+ /*
* Not having a register set can lead to trouble.
* Also starfire doesn't have a tod clock.
*/
- if (!mregs && !dregs)
+ if (!mregs && !dregs)
return -1;
if (mregs) {
Index: linux/arch/v850/Kconfig
===================================================================
--- linux.orig/arch/v850/Kconfig
+++ linux/arch/v850/Kconfig
@@ -34,6 +34,10 @@ config GENERIC_IRQ_PROBE
bool
default y
+config GENERIC_TIME
+ bool
+ default y
+
config TIME_LOW_RES
bool
default y
Index: linux/arch/v850/kernel/time.c
===================================================================
--- linux.orig/arch/v850/kernel/time.c
+++ linux/arch/v850/kernel/time.c
@@ -99,81 +99,6 @@ static irqreturn_t timer_interrupt (int
return IRQ_HANDLED;
}
-/*
- * This version of gettimeofday has near microsecond resolution.
- */
-void do_gettimeofday (struct timeval *tv)
-{
-#if 0 /* DAVIDM later if possible */
- extern volatile unsigned long lost_ticks;
- unsigned long lost;
-#endif
- unsigned long flags;
- unsigned long usec, sec;
- unsigned long seq;
-
- do {
- seq = read_seqbegin_irqsave(&xtime_lock, flags);
-
-#if 0
- usec = mach_gettimeoffset ? mach_gettimeoffset () : 0;
-#else
- usec = 0;
-#endif
-#if 0 /* DAVIDM later if possible */
- lost = lost_ticks;
- if (lost)
- usec += lost * (1000000/HZ);
-#endif
- sec = xtime.tv_sec;
- usec += xtime.tv_nsec / 1000;
- } while (read_seqretry_irqrestore(&xtime_lock, seq, flags));
-
- while (usec >= 1000000) {
- usec -= 1000000;
- sec++;
- }
-
- tv->tv_sec = sec;
- tv->tv_usec = usec;
-}
-
-EXPORT_SYMBOL(do_gettimeofday);
-
-int do_settimeofday(struct timespec *tv)
-{
- if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
- return -EINVAL;
-
- write_seqlock_irq (&xtime_lock);
-
- /* This is revolting. We need to set the xtime.tv_nsec
- * correctly. However, the value in this location is
- * is value at the last tick.
- * Discover what correction gettimeofday
- * would have done, and then undo it!
- */
-#if 0
- tv->tv_nsec -= mach_gettimeoffset() * 1000;
-#endif
-
- while (tv->tv_nsec < 0) {
- tv->tv_nsec += NSEC_PER_SEC;
- tv->tv_sec--;
- }
-
- xtime.tv_sec = tv->tv_sec;
- xtime.tv_nsec = tv->tv_nsec;
-
- ntp_clear();
-
- write_sequnlock_irq (&xtime_lock);
- clock_was_set();
- return 0;
-}
-
-EXPORT_SYMBOL(do_settimeofday);
-
static int timer_dev_id;
static struct irqaction timer_irqaction = {
timer_interrupt,
Index: linux/arch/x86_64/Kconfig
===================================================================
--- linux.orig/arch/x86_64/Kconfig
+++ linux/arch/x86_64/Kconfig
@@ -24,6 +24,14 @@ config X86
bool
default y
+config GENERIC_TIME
+ bool
+ default y
+
+config GENERIC_TIME_VSYSCALL
+ bool
+ default y
+
config LOCKDEP_SUPPORT
bool
default y
@@ -46,13 +54,6 @@ config ISA
config SBUS
bool
-config RWSEM_GENERIC_SPINLOCK
- bool
- default y
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
-
config GENERIC_HWEIGHT
bool
default y
@@ -289,6 +290,14 @@ config NUMA
If the system is EM64T, you should say N unless your system is EM64T
NUMA.
+config RWSEM_GENERIC_SPINLOCK
+ bool
+ default y
+
+config RWSEM_XCHGADD_ALGORITHM
+ depends on !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT
+ bool
+
config K8_NUMA
bool "Old style AMD Opteron NUMA detection"
depends on NUMA
@@ -659,3 +668,6 @@ source "security/Kconfig"
source "crypto/Kconfig"
source "lib/Kconfig"
+
+source "kernel/time/Kconfig"
+
Index: linux/arch/x86_64/ia32/ia32entry.S
===================================================================
--- linux.orig/arch/x86_64/ia32/ia32entry.S
+++ linux/arch/x86_64/ia32/ia32entry.S
@@ -119,7 +119,9 @@ sysenter_do_call:
cmpl $(IA32_NR_syscalls-1),%eax
ja ia32_badsys
IA32_ARG_FIXUP 1
+ TRACE_SYS_IA32_CALL
call *ia32_sys_call_table(,%rax,8)
+ TRACE_SYS_RET
movq %rax,RAX-ARGOFFSET(%rsp)
GET_THREAD_INFO(%r10)
cli
@@ -227,7 +229,9 @@ cstar_do_call:
cmpl $IA32_NR_syscalls-1,%eax
ja ia32_badsys
IA32_ARG_FIXUP 1
+ TRACE_SYS_IA32_CALL
call *ia32_sys_call_table(,%rax,8)
+ TRACE_SYS_RET
movq %rax,RAX-ARGOFFSET(%rsp)
GET_THREAD_INFO(%r10)
cli
@@ -320,8 +324,10 @@ ia32_do_syscall:
cmpl $(IA32_NR_syscalls-1),%eax
ja ia32_badsys
IA32_ARG_FIXUP
+ TRACE_SYS_IA32_CALL
call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
ia32_sysret:
+ TRACE_SYS_RET
movq %rax,RAX-ARGOFFSET(%rsp)
jmp int_ret_from_sys_call
@@ -390,7 +396,7 @@ END(ia32_ptregs_common)
.section .rodata,"a"
.align 8
-ia32_sys_call_table:
+ENTRY(ia32_sys_call_table)
.quad sys_restart_syscall
.quad sys_exit
.quad stub32_fork
@@ -713,4 +719,7 @@ ia32_sys_call_table:
.quad sys_tee
.quad compat_sys_vmsplice
.quad compat_sys_move_pages
+#ifdef CONFIG_LATENCY_TRACE
+.globl ia32_syscall_end
+#endif
ia32_syscall_end:
Index: linux/arch/x86_64/kernel/Makefile
===================================================================
--- linux.orig/arch/x86_64/kernel/Makefile
+++ linux/arch/x86_64/kernel/Makefile
@@ -8,7 +8,7 @@ obj-y := process.o signal.o entry.o trap
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
x8664_ksyms.o i387.o syscall.o vsyscall.o \
setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
- pci-dma.o pci-nommu.o alternative.o
+ pci-dma.o pci-nommu.o alternative.o hpet.o tsc.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_X86_MCE) += mce.o
Index: linux/arch/x86_64/kernel/apic.c
===================================================================
--- linux.orig/arch/x86_64/kernel/apic.c
+++ linux/arch/x86_64/kernel/apic.c
@@ -25,6 +25,7 @@
#include
#include
#include
+#include
#include
#include
@@ -36,9 +37,9 @@
#include
#include
#include
+#include
int apic_verbosity;
-int apic_runs_main_timer;
int apic_calibrate_pmtmr __initdata;
int disable_apic_timer __initdata;
@@ -52,6 +53,25 @@ static cpumask_t timer_interrupt_broadca
/* Using APIC to generate smp_local_timer_interrupt? */
int using_apic_timer __read_mostly = 0;
+
+static unsigned int calibration_result;
+
+static void lapic_next_event(unsigned long delta, struct clock_event *evt);
+static void lapic_timer_setup(int mode, struct clock_event *evt);
+
+static struct clock_event lapic_clockevent = {
+ .name = "lapic",
+ .capabilities = CLOCK_CAP_NEXTEVT | CLOCK_CAP_PROFILE
+#ifdef CONFIG_SMP
+ | CLOCK_CAP_UPDATE
+#endif
+ ,
+ .shift = 32,
+ .set_mode = lapic_timer_setup,
+ .set_next_event = lapic_next_event,
+};
+static DEFINE_PER_CPU(struct clock_event, lapic_events);
+
static void apic_pm_activate(void);
void enable_NMI_through_LVT0 (void * dummy)
@@ -527,8 +547,7 @@ static int lapic_suspend(struct sys_devi
apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
- local_save_flags(flags);
- local_irq_disable();
+ local_irq_save(flags);
disable_local_APIC();
local_irq_restore(flags);
return 0;
@@ -696,13 +715,16 @@ void __init init_apic_mappings(void)
#define APIC_DIVISOR 16
-static void __setup_APIC_LVTT(unsigned int clocks)
+static void __setup_APIC_LVTT(unsigned int clocks, int oneshot)
{
unsigned int lvtt_value, tmp_value, ver;
int cpu = smp_processor_id();
ver = GET_APIC_VERSION(apic_read(APIC_LVR));
- lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
+ lvtt_value = LOCAL_TIMER_VECTOR;
+ if (!oneshot)
+ lvtt_value |= APIC_LVT_TIMER_PERIODIC;
+
if (cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask))
lvtt_value |= APIC_LVT_MASKED;
@@ -717,48 +739,34 @@ static void __setup_APIC_LVTT(unsigned i
& ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
| APIC_TDR_DIV_16);
- apic_write(APIC_TMICT, clocks/APIC_DIVISOR);
+ if (!oneshot)
+ apic_write(APIC_TMICT, clocks/APIC_DIVISOR);
}
-static void setup_APIC_timer(unsigned int clocks)
+static void lapic_next_event(unsigned long delta, struct clock_event *evt)
+{
+ apic_write(APIC_TMICT, delta);
+}
+
+static void lapic_timer_setup(int mode, struct clock_event *evt)
{
unsigned long flags;
local_irq_save(flags);
-
- /* wait for irq slice */
- if (vxtime.hpet_address && hpet_use_timer) {
- int trigger = hpet_readl(HPET_T0_CMP);
- while (hpet_readl(HPET_COUNTER) >= trigger)
- /* do nothing */ ;
- while (hpet_readl(HPET_COUNTER) < trigger)
- /* do nothing */ ;
- } else {
- int c1, c2;
- outb_p(0x00, 0x43);
- c2 = inb_p(0x40);
- c2 |= inb_p(0x40) << 8;
- do {
- c1 = c2;
- outb_p(0x00, 0x43);
- c2 = inb_p(0x40);
- c2 |= inb_p(0x40) << 8;
- } while (c2 - c1 < 300);
- }
- __setup_APIC_LVTT(clocks);
- /* Turn off PIT interrupt if we use APIC timer as main timer.
- Only works with the PM timer right now
- TBD fix it for HPET too. */
- if (vxtime.mode == VXTIME_PMTMR &&
- smp_processor_id() == boot_cpu_id &&
- apic_runs_main_timer == 1 &&
- !cpu_isset(boot_cpu_id, timer_interrupt_broadcast_ipi_mask)) {
- stop_timer_interrupt();
- apic_runs_main_timer++;
- }
+ __setup_APIC_LVTT(calibration_result, mode != CLOCK_EVT_PERIODIC);
local_irq_restore(flags);
}
+
+static void __devinit setup_APIC_timer(void)
+{
+ struct clock_event *levt = &__get_cpu_var(lapic_events);
+
+ memcpy(levt, &lapic_clockevent, sizeof(*levt));
+
+ register_local_clockevent(levt);
+}
+
/*
* In this function we calibrate APIC bus clocks to the external
* timer. Unfortunately we cannot use jiffies and the timer irq
@@ -778,12 +786,13 @@ static int __init calibrate_APIC_clock(v
{
int apic, apic_start, tsc, tsc_start;
int result;
+ u64 wallclock_nsecs;
/*
* Put whatever arbitrary (but long enough) timeout
* value into the APIC clock, we just want to get the
* counter running for calibration.
*/
- __setup_APIC_LVTT(1000000000);
+ __setup_APIC_LVTT(1000000000, 0);
apic_start = apic_read(APIC_TMCCT);
#ifdef CONFIG_X86_PM_TIMER
@@ -791,6 +800,8 @@ static int __init calibrate_APIC_clock(v
pmtimer_wait(5000); /* 5ms wait */
apic = apic_read(APIC_TMCCT);
result = (apic_start - apic) * 1000L / 5;
+ printk("using pmtimer for lapic calibration\n");
+ wallclock_nsecs = 5000000;
} else
#endif
{
@@ -804,6 +815,8 @@ static int __init calibrate_APIC_clock(v
result = (apic_start - apic) * 1000L * cpu_khz /
(tsc - tsc_start);
+ wallclock_nsecs = ((u64)tsc - (u64)tsc_start) * 1000000 / (u64)cpu_khz;
+
}
printk("result %d\n", result);
@@ -811,11 +824,22 @@ static int __init calibrate_APIC_clock(v
printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n",
result / 1000 / 1000, result / 1000 % 1000);
+
+
+
+ /* Calculate the scaled math multiplication factor */
+ lapic_clockevent.mult = div_sc(apic_start - apic, wallclock_nsecs, 32);
+
+ lapic_clockevent.max_delta_ns =
+ clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
+ printk("lapic max_delta_ns: %ld\n", lapic_clockevent.max_delta_ns);
+ lapic_clockevent.min_delta_ns =
+ clockevent_delta2ns(0xF, &lapic_clockevent);
+
+
return result * APIC_DIVISOR / HZ;
}
-static unsigned int calibration_result;
-
void __init setup_boot_APIC_clock (void)
{
if (disable_apic_timer) {
@@ -832,7 +856,7 @@ void __init setup_boot_APIC_clock (void)
/*
* Now set up the timer for real.
*/
- setup_APIC_timer(calibration_result);
+ setup_APIC_timer();
local_irq_enable();
}
@@ -840,7 +864,7 @@ void __init setup_boot_APIC_clock (void)
void __cpuinit setup_secondary_APIC_clock(void)
{
local_irq_disable(); /* FIXME: Do we need this? --RR */
- setup_APIC_timer(calibration_result);
+ setup_APIC_timer();
local_irq_enable();
}
@@ -887,6 +911,13 @@ void switch_APIC_timer_to_ipi(void *cpum
!cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) {
disable_APIC_timer();
cpu_set(cpu, timer_interrupt_broadcast_ipi_mask);
+#ifdef CONFIG_HIGH_RES_TIMERS
+ printk("Disabling NO_HZ and high resolution timers "
+ "due to timer broadcasting\n");
+ for_each_possible_cpu(cpu)
+ per_cpu(lapic_events, cpu).capabilities &=
+ ~CLOCK_CAP_NEXTEVT;
+#endif
}
}
EXPORT_SYMBOL(switch_APIC_timer_to_ipi);
@@ -945,8 +976,6 @@ void smp_local_timer_interrupt(struct pt
#ifdef CONFIG_SMP
update_process_times(user_mode(regs));
#endif
- if (apic_runs_main_timer > 1 && smp_processor_id() == boot_cpu_id)
- main_timer_handler(regs);
/*
* We take the 'long' return path, and there every subsystem
* grabs the appropriate locks (kernel lock/ irq lock).
@@ -969,6 +998,8 @@ void smp_local_timer_interrupt(struct pt
*/
void smp_apic_timer_interrupt(struct pt_regs *regs)
{
+ int cpu = smp_processor_id();
+ struct clock_event *evt = &per_cpu(lapic_events, cpu);
/*
* the NMI deadlock-detector uses this.
*/
@@ -986,7 +1017,7 @@ void smp_apic_timer_interrupt(struct pt_
*/
exit_idle();
irq_enter();
- smp_local_timer_interrupt(regs);
+ evt->event_handler(regs);
irq_exit();
}
@@ -1161,26 +1192,11 @@ static __init int setup_noapictimer(char
return 1;
}
-static __init int setup_apicmaintimer(char *str)
-{
- apic_runs_main_timer = 1;
- nohpet = 1;
- return 1;
-}
-__setup("apicmaintimer", setup_apicmaintimer);
-
-static __init int setup_noapicmaintimer(char *str)
-{
- apic_runs_main_timer = -1;
- return 1;
-}
-__setup("noapicmaintimer", setup_noapicmaintimer);
-
static __init int setup_apicpmtimer(char *s)
{
apic_calibrate_pmtmr = 1;
notsc_setup(NULL);
- return setup_apicmaintimer(NULL);
+ return 1;
}
__setup("apicpmtimer", setup_apicpmtimer);
Index: linux/arch/x86_64/kernel/early_printk.c
===================================================================
--- linux.orig/arch/x86_64/kernel/early_printk.c
+++ linux/arch/x86_64/kernel/early_printk.c
@@ -203,7 +203,7 @@ static int early_console_initialized = 0
void early_printk(const char *fmt, ...)
{
- char buf[512];
+ static char buf[512];
int n;
va_list ap;
Index: linux/arch/x86_64/kernel/entry.S
===================================================================
--- linux.orig/arch/x86_64/kernel/entry.S
+++ linux/arch/x86_64/kernel/entry.S
@@ -45,6 +45,47 @@
.code64
+#ifdef CONFIG_LATENCY_TRACE
+
+ENTRY(mcount)
+ cmpq $0, mcount_enabled
+ jz out
+
+ push %rbp
+ mov %rsp,%rbp
+
+ push %r11
+ push %r10
+ push %r9
+ push %r8
+ push %rdi
+ push %rsi
+ push %rdx
+ push %rcx
+ push %rax
+
+ mov 0x0(%rbp),%rax
+ mov 0x8(%rbp),%rdi
+ mov 0x8(%rax),%rsi
+
+ call __trace
+
+ pop %rax
+ pop %rcx
+ pop %rdx
+ pop %rsi
+ pop %rdi
+ pop %r8
+ pop %r9
+ pop %r10
+ pop %r11
+
+ pop %rbp
+out:
+ ret
+
+#endif
+
#ifndef CONFIG_PREEMPT
#define retint_kernel retint_restore_args
#endif
@@ -221,7 +262,9 @@ ENTRY(system_call)
cmpq $__NR_syscall_max,%rax
ja badsys
movq %r10,%rcx
+ TRACE_SYS_CALL
call *sys_call_table(,%rax,8) # XXX: rip relative
+ TRACE_SYS_RET
movq %rax,RAX-ARGOFFSET(%rsp)
/*
* Syscall return path ending with SYSRET (fast path)
@@ -255,8 +298,8 @@ sysret_check:
/* edx: work, edi: workmask */
sysret_careful:
CFI_RESTORE_STATE
- bt $TIF_NEED_RESCHED,%edx
- jnc sysret_signal
+ testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edx
+ jz sysret_signal
TRACE_IRQS_ON
sti
pushq %rdi
@@ -279,7 +322,7 @@ sysret_signal:
leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
xorl %esi,%esi # oldset -> arg2
call ptregscall_common
-1: movl $_TIF_NEED_RESCHED,%edi
+1: movl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edi
/* Use IRET because user could have changed frame. This
works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
cli
@@ -303,7 +346,9 @@ tracesys:
cmpq $__NR_syscall_max,%rax
ja 1f
movq %r10,%rcx /* fixup for C */
+ TRACE_SYS_CALL
call *sys_call_table(,%rax,8)
+ TRACE_SYS_RET
1: movq %rax,RAX-ARGOFFSET(%rsp)
/* Use IRET because user could have changed frame */
jmp int_ret_from_sys_call
@@ -349,8 +394,8 @@ int_with_check:
/* First do a reschedule test. */
/* edx: work, edi: workmask */
int_careful:
- bt $TIF_NEED_RESCHED,%edx
- jnc int_very_careful
+ testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edx
+ jz int_very_careful
TRACE_IRQS_ON
sti
pushq %rdi
@@ -387,7 +432,7 @@ int_signal:
movq %rsp,%rdi # &ptregs -> arg1
xorl %esi,%esi # oldset -> arg2
call do_notify_resume
-1: movl $_TIF_NEED_RESCHED,%edi
+1: movl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edi
int_restore_rest:
RESTORE_REST
cli
@@ -585,8 +630,8 @@ bad_iret:
/* edi: workmask, edx: work */
retint_careful:
CFI_RESTORE_STATE
- bt $TIF_NEED_RESCHED,%edx
- jnc retint_signal
+ testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edx
+ jz retint_signal
TRACE_IRQS_ON
sti
pushq %rdi
@@ -612,7 +657,7 @@ retint_signal:
RESTORE_REST
cli
TRACE_IRQS_OFF
- movl $_TIF_NEED_RESCHED,%edi
+ movl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edi
GET_THREAD_INFO(%rcx)
jmp retint_check
Index: linux/arch/x86_64/kernel/head64.c
===================================================================
--- linux.orig/arch/x86_64/kernel/head64.c
+++ linux/arch/x86_64/kernel/head64.c
@@ -10,6 +10,7 @@
#include
#include
#include
+#include
#include
#include
@@ -74,7 +75,7 @@ static void __init setup_boot_cpu_data(v
boot_cpu_data.x86_mask = eax & 0xf;
}
-void __init x86_64_start_kernel(char * real_mode_data)
+void __init notrace x86_64_start_kernel(char * real_mode_data)
{
char *s;
int i;
@@ -99,6 +100,7 @@ void __init x86_64_start_kernel(char * r
cpu_pda(i) = &boot_cpu_pda[i];
pda_init(0);
+
copy_bootdata(real_mode_data);
#ifdef CONFIG_SMP
cpu_set(0, cpu_online_map);
@@ -120,5 +122,6 @@ void __init x86_64_start_kernel(char * r
panic("Kernel too big for kernel mapping\n");
setup_boot_cpu_data();
+
start_kernel();
}
Index: linux/arch/x86_64/kernel/hpet.c
===================================================================
--- /dev/null
+++ linux/arch/x86_64/kernel/hpet.c
@@ -0,0 +1,475 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+int nohpet __initdata = 0;
+
+unsigned long hpet_address;
+static unsigned long hpet_period; /* fsecs / HPET clock */
+unsigned long hpet_tick; /* HPET clocks / interrupt */
+int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */
+
+#define FSEC_PER_TICK (FSEC_PER_SEC / HZ)
+
+/*
+ * calibrate_tsc() calibrates the processor TSC in a very simple way, comparing
+ * it to the HPET timer of known frequency.
+ */
+
+#define TICK_COUNT 100000000
+
+unsigned int __init hpet_calibrate_tsc(void)
+{
+ int tsc_start, hpet_start;
+ int tsc_now, hpet_now;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ local_irq_disable();
+
+ hpet_start = hpet_readl(HPET_COUNTER);
+ rdtscl(tsc_start);
+
+ do {
+ local_irq_disable();
+ hpet_now = hpet_readl(HPET_COUNTER);
+ tsc_now = get_cycles_sync();
+ local_irq_restore(flags);
+ } while ((tsc_now - tsc_start) < TICK_COUNT &&
+ (hpet_now - hpet_start) < TICK_COUNT);
+
+ return (tsc_now - tsc_start) * 1000000000L
+ / ((hpet_now - hpet_start) * hpet_period / 1000);
+}
+
+
+
+#ifdef CONFIG_HPET
+static __init int late_hpet_init(void)
+{
+ struct hpet_data hd;
+ unsigned int ntimer;
+
+ if (!hpet_address)
+ return 0;
+
+ memset(&hd, 0, sizeof (hd));
+
+ ntimer = hpet_readl(HPET_ID);
+ ntimer = (ntimer & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT;
+ ntimer++;
+
+ /*
+ * Register with driver.
+ * Timer0 and Timer1 is used by platform.
+ */
+ hd.hd_phys_address = hpet_address;
+ hd.hd_address = (void __iomem *)fix_to_virt(FIX_HPET_BASE);
+ hd.hd_nirqs = ntimer;
+ hd.hd_flags = HPET_DATA_PLATFORM;
+ hpet_reserve_timer(&hd, 0);
+#ifdef CONFIG_HPET_EMULATE_RTC
+ hpet_reserve_timer(&hd, 1);
+#endif
+ hd.hd_irq[0] = HPET_LEGACY_8254;
+ hd.hd_irq[1] = HPET_LEGACY_RTC;
+ if (ntimer > 2) {
+ struct hpet *hpet;
+ struct hpet_timer *timer;
+ int i;
+
+ hpet = (struct hpet *) fix_to_virt(FIX_HPET_BASE);
+ timer = &hpet->hpet_timers[2];
+ for (i = 2; i < ntimer; timer++, i++)
+ hd.hd_irq[i] = (timer->hpet_config &
+ Tn_INT_ROUTE_CNF_MASK) >>
+ Tn_INT_ROUTE_CNF_SHIFT;
+
+ }
+
+ hpet_alloc(&hd);
+ return 0;
+}
+fs_initcall(late_hpet_init);
+#endif
+
+static int hpet_timer_stop_set_go(unsigned long tick)
+{
+ unsigned int cfg;
+
+/*
+ * Stop the timers and reset the main counter.
+ */
+
+ cfg = hpet_readl(HPET_CFG);
+ cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
+ hpet_writel(cfg, HPET_CFG);
+ hpet_writel(0, HPET_COUNTER);
+ hpet_writel(0, HPET_COUNTER + 4);
+
+/*
+ * Set up timer 0, as periodic with first interrupt to happen at hpet_tick,
+ * and period also hpet_tick.
+ */
+ if (hpet_use_timer) {
+ hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
+ HPET_TN_32BIT, HPET_T0_CFG);
+ hpet_writel(hpet_tick, HPET_T0_CMP); /* next interrupt */
+ hpet_writel(hpet_tick, HPET_T0_CMP); /* period */
+ cfg |= HPET_CFG_LEGACY;
+ }
+/*
+ * Go!
+ */
+
+ cfg |= HPET_CFG_ENABLE;
+ hpet_writel(cfg, HPET_CFG);
+
+ return 0;
+}
+
+int hpet_arch_init(void)
+{
+ unsigned int id;
+
+ if (!hpet_address)
+ return -1;
+ set_fixmap_nocache(FIX_HPET_BASE, hpet_address);
+ __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
+
+/*
+ * Read the period, compute tick and quotient.
+ */
+
+ id = hpet_readl(HPET_ID);
+
+ if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER))
+ return -1;
+
+ hpet_period = hpet_readl(HPET_PERIOD);
+ if (hpet_period < 100000 || hpet_period > 100000000)
+ return -1;
+
+ hpet_tick = (FSEC_PER_TICK + hpet_period / 2) / hpet_period;
+
+ hpet_use_timer = (id & HPET_ID_LEGSUP);
+
+ return hpet_timer_stop_set_go(hpet_tick);
+}
+
+int hpet_reenable(void)
+{
+ return hpet_timer_stop_set_go(hpet_tick);
+}
+
+int hpet_stop(void)
+{
+ return hpet_timer_stop_set_go(0);
+}
+
+#ifdef CONFIG_HPET_EMULATE_RTC
+/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET
+ * is enabled, we support RTC interrupt functionality in software.
+ * RTC has 3 kinds of interrupts:
+ * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock
+ * is updated
+ * 2) Alarm Interrupt - generate an interrupt at a specific time of day
+ * 3) Periodic Interrupt - generate periodic interrupt, with frequencies
+ * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2)
+ * (1) and (2) above are implemented using polling at a frequency of
+ * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt
+ * overhead. (DEFAULT_RTC_INT_FREQ)
+ * For (3), we use interrupts at 64Hz or user specified periodic
+ * frequency, whichever is higher.
+ */
+#include
+
+#define DEFAULT_RTC_INT_FREQ 64
+#define RTC_NUM_INTS 1
+
+static unsigned long UIE_on;
+static unsigned long prev_update_sec;
+
+static unsigned long AIE_on;
+static struct rtc_time alarm_time;
+
+static unsigned long PIE_on;
+static unsigned long PIE_freq = DEFAULT_RTC_INT_FREQ;
+static unsigned long PIE_count;
+
+static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */
+static unsigned int hpet_t1_cmp; /* cached comparator register */
+
+int is_hpet_enabled(void)
+{
+ return hpet_address != 0;
+}
+
+/*
+ * Timer 1 for RTC, we do not use periodic interrupt feature,
+ * even if HPET supports periodic interrupts on Timer 1.
+ * The reason being, to set up a periodic interrupt in HPET, we need to
+ * stop the main counter. And if we do that everytime someone diables/enables
+ * RTC, we will have adverse effect on main kernel timer running on Timer 0.
+ * So, for the time being, simulate the periodic interrupt in software.
+ *
+ * hpet_rtc_timer_init() is called for the first time and during subsequent
+ * interuppts reinit happens through hpet_rtc_timer_reinit().
+ */
+int hpet_rtc_timer_init(void)
+{
+ unsigned int cfg, cnt;
+ unsigned long flags;
+
+ if (!is_hpet_enabled())
+ return 0;
+ /*
+ * Set the counter 1 and enable the interrupts.
+ */
+ if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
+ hpet_rtc_int_freq = PIE_freq;
+ else
+ hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
+
+ local_irq_save(flags);
+ cnt = hpet_readl(HPET_COUNTER);
+ cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq);
+ hpet_writel(cnt, HPET_T1_CMP);
+ hpet_t1_cmp = cnt;
+ local_irq_restore(flags);
+
+ cfg = hpet_readl(HPET_T1_CFG);
+ cfg &= ~HPET_TN_PERIODIC;
+ cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
+ hpet_writel(cfg, HPET_T1_CFG);
+
+ return 1;
+}
+
+static void hpet_rtc_timer_reinit(void)
+{
+ unsigned int cfg, cnt;
+
+ if (unlikely(!(PIE_on | AIE_on | UIE_on))) {
+ cfg = hpet_readl(HPET_T1_CFG);
+ cfg &= ~HPET_TN_ENABLE;
+ hpet_writel(cfg, HPET_T1_CFG);
+ return;
+ }
+
+ if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
+ hpet_rtc_int_freq = PIE_freq;
+ else
+ hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
+
+ /* It is more accurate to use the comparator value than current count.*/
+ cnt = hpet_t1_cmp;
+ cnt += hpet_tick*HZ/hpet_rtc_int_freq;
+ hpet_writel(cnt, HPET_T1_CMP);
+ hpet_t1_cmp = cnt;
+}
+
+/*
+ * The functions below are called from rtc driver.
+ * Return 0 if HPET is not being used.
+ * Otherwise do the necessary changes and return 1.
+ */
+int hpet_mask_rtc_irq_bit(unsigned long bit_mask)
+{
+ if (!is_hpet_enabled())
+ return 0;
+
+ if (bit_mask & RTC_UIE)
+ UIE_on = 0;
+ if (bit_mask & RTC_PIE)
+ PIE_on = 0;
+ if (bit_mask & RTC_AIE)
+ AIE_on = 0;
+
+ return 1;
+}
+
+int hpet_set_rtc_irq_bit(unsigned long bit_mask)
+{
+ int timer_init_reqd = 0;
+
+ if (!is_hpet_enabled())
+ return 0;
+
+ if (!(PIE_on | AIE_on | UIE_on))
+ timer_init_reqd = 1;
+
+ if (bit_mask & RTC_UIE) {
+ UIE_on = 1;
+ }
+ if (bit_mask & RTC_PIE) {
+ PIE_on = 1;
+ PIE_count = 0;
+ }
+ if (bit_mask & RTC_AIE) {
+ AIE_on = 1;
+ }
+
+ if (timer_init_reqd)
+ hpet_rtc_timer_init();
+
+ return 1;
+}
+
+int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec)
+{
+ if (!is_hpet_enabled())
+ return 0;
+
+ alarm_time.tm_hour = hrs;
+ alarm_time.tm_min = min;
+ alarm_time.tm_sec = sec;
+
+ return 1;
+}
+
+int hpet_set_periodic_freq(unsigned long freq)
+{
+ if (!is_hpet_enabled())
+ return 0;
+
+ PIE_freq = freq;
+ PIE_count = 0;
+
+ return 1;
+}
+
+int hpet_rtc_dropped_irq(void)
+{
+ if (!is_hpet_enabled())
+ return 0;
+
+ return 1;
+}
+
+irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+ struct rtc_time curr_time;
+ unsigned long rtc_int_flag = 0;
+ int call_rtc_interrupt = 0;
+
+ hpet_rtc_timer_reinit();
+
+ if (UIE_on | AIE_on) {
+ rtc_get_rtc_time(&curr_time);
+ }
+ if (UIE_on) {
+ if (curr_time.tm_sec != prev_update_sec) {
+ /* Set update int info, call real rtc int routine */
+ call_rtc_interrupt = 1;
+ rtc_int_flag = RTC_UF;
+ prev_update_sec = curr_time.tm_sec;
+ }
+ }
+ if (PIE_on) {
+ PIE_count++;
+ if (PIE_count >= hpet_rtc_int_freq/PIE_freq) {
+ /* Set periodic int info, call real rtc int routine */
+ call_rtc_interrupt = 1;
+ rtc_int_flag |= RTC_PF;
+ PIE_count = 0;
+ }
+ }
+ if (AIE_on) {
+ if ((curr_time.tm_sec == alarm_time.tm_sec) &&
+ (curr_time.tm_min == alarm_time.tm_min) &&
+ (curr_time.tm_hour == alarm_time.tm_hour)) {
+ /* Set alarm int info, call real rtc int routine */
+ call_rtc_interrupt = 1;
+ rtc_int_flag |= RTC_AF;
+ }
+ }
+ if (call_rtc_interrupt) {
+ rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8));
+ rtc_interrupt(rtc_int_flag, dev_id, regs);
+ }
+ return IRQ_HANDLED;
+}
+#endif
+
+static int __init nohpet_setup(char *s)
+{
+ nohpet = 1;
+ return 1;
+}
+
+__setup("nohpet", nohpet_setup);
+
+#define HPET_MASK 0xFFFFFFFF
+#define HPET_SHIFT 22
+
+/* FSEC = 10^-15 NSEC = 10^-9 */
+#define FSEC_PER_NSEC 1000000
+
+static void *hpet_ptr;
+
+static cycle_t read_hpet(void)
+{
+ return (cycle_t)readl(hpet_ptr);
+}
+
+static cycle_t __vsyscall_fn vread_hpet(void)
+{
+ return (cycle_t)readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0);
+}
+
+struct clocksource clocksource_hpet = {
+ .name = "hpet",
+ .rating = 250,
+ .read = read_hpet,
+ .mask = (cycle_t)HPET_MASK,
+ .mult = 0, /* set below */
+ .shift = HPET_SHIFT,
+ .is_continuous = 1,
+ .vread = vread_hpet,
+};
+
+static int __init init_hpet_clocksource(void)
+{
+ unsigned long hpet_period;
+ void __iomem *hpet_base;
+ u64 tmp;
+
+ if (!hpet_address)
+ return -ENODEV;
+
+ /* calculate the hpet address: */
+ hpet_base =
+ (void __iomem*)ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
+ hpet_ptr = hpet_base + HPET_COUNTER;
+
+ /* calculate the frequency: */
+ hpet_period = readl(hpet_base + HPET_PERIOD);
+
+ /*
+ * hpet period is in femto seconds per cycle
+ * so we need to convert this to ns/cyc units
+ * aproximated by mult/2^shift
+ *
+ * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift
+ * fsec/cyc * 1ns/1000000fsec * 2^shift = mult
+ * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult
+ * (fsec/cyc << shift)/1000000 = mult
+ * (hpet_period << shift)/FSEC_PER_NSEC = mult
+ */
+ tmp = (u64)hpet_period << HPET_SHIFT;
+ do_div(tmp, FSEC_PER_NSEC);
+ clocksource_hpet.mult = (u32)tmp;
+
+ return clocksource_register(&clocksource_hpet);
+}
+
+module_init(init_hpet_clocksource);
Index: linux/arch/x86_64/kernel/i8259.c
===================================================================
--- linux.orig/arch/x86_64/kernel/i8259.c
+++ linux/arch/x86_64/kernel/i8259.c
@@ -43,17 +43,10 @@
BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
BI(x,c) BI(x,d) BI(x,e) BI(x,f)
-#define BUILD_15_IRQS(x) \
- BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
- BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
- BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
- BI(x,c) BI(x,d) BI(x,e)
-
/*
* ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
* (these are usually mapped to vectors 0x20-0x2f)
*/
-BUILD_16_IRQS(0x0)
#ifdef CONFIG_X86_LOCAL_APIC
/*
@@ -66,19 +59,14 @@ BUILD_16_IRQS(0x0)
*
* (these are usually mapped into the 0x30-0xff vector range)
*/
- BUILD_16_IRQS(0x1) BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
+ BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
-BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
-
-#ifdef CONFIG_PCI_MSI
- BUILD_15_IRQS(0xe)
-#endif
+BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf)
#endif
#undef BUILD_16_IRQS
-#undef BUILD_15_IRQS
#undef BI
@@ -91,26 +79,11 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
-#define IRQLIST_15(x) \
- IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
- IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
- IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
- IRQ(x,c), IRQ(x,d), IRQ(x,e)
-
void (*interrupt[NR_IRQS])(void) = {
- IRQLIST_16(0x0),
-
-#ifdef CONFIG_X86_IO_APIC
- IRQLIST_16(0x1), IRQLIST_16(0x2), IRQLIST_16(0x3),
+ IRQLIST_16(0x2), IRQLIST_16(0x3),
IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
- IRQLIST_16(0xc), IRQLIST_16(0xd)
-
-#ifdef CONFIG_PCI_MSI
- , IRQLIST_15(0xe)
-#endif
-
-#endif
+ IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf)
};
#undef IRQ
@@ -126,46 +99,21 @@ void (*interrupt[NR_IRQS])(void) = {
* moves to arch independent land
*/
-DEFINE_SPINLOCK(i8259A_lock);
-
-static void end_8259A_irq (unsigned int irq)
-{
- if (irq > 256) {
- char var;
- printk("return %p stack %p ti %p\n", __builtin_return_address(0), &var, task_thread_info(current));
-
- BUG();
- }
-
- if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)) &&
- irq_desc[irq].action)
- enable_8259A_irq(irq);
-}
-
-#define shutdown_8259A_irq disable_8259A_irq
-
static void mask_and_ack_8259A(unsigned int);
-static unsigned int startup_8259A_irq(unsigned int irq)
-{
- enable_8259A_irq(irq);
- return 0; /* never anything pending */
-}
-
-static struct hw_interrupt_type i8259A_irq_type = {
- .typename = "XT-PIC",
- .startup = startup_8259A_irq,
- .shutdown = shutdown_8259A_irq,
- .enable = enable_8259A_irq,
- .disable = disable_8259A_irq,
- .ack = mask_and_ack_8259A,
- .end = end_8259A_irq,
+static struct irq_chip i8259A_chip = {
+ .name = "XT-PIC",
+ .mask = disable_8259A_irq,
+ .unmask = enable_8259A_irq,
+ .mask_ack = mask_and_ack_8259A,
};
/*
* 8259A PIC functions to handle ISA devices:
*/
+DEFINE_RAW_SPINLOCK(i8259A_lock);
+
/*
* This contains the irq mask for both 8259A irq controllers,
*/
@@ -234,7 +182,7 @@ void make_8259A_irq(unsigned int irq)
{
disable_irq_nosync(irq);
io_apic_irqs &= ~(1<= NR_IRQS)
- break;
if (vector != IA32_SYSCALL_VECTOR)
set_intr_gate(vector, interrupt[i]);
}
@@ -557,7 +525,7 @@ void __init init_IRQ(void)
* IRQ0 must be given a fixed assignment and initialized,
* because it's used before the IO-APIC is set up.
*/
- set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
+ __get_cpu_var(vector_irq)[FIRST_DEVICE_VECTOR] = 0;
/*
* The reschedule interrupt is a CPU-to-CPU reschedule-helper
Index: linux/arch/x86_64/kernel/io_apic.c
===================================================================
--- linux.orig/arch/x86_64/kernel/io_apic.c
+++ linux/arch/x86_64/kernel/io_apic.c
@@ -26,6 +26,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -41,6 +42,9 @@
#include
#include
#include
+#include
+
+static int assign_irq_vector(int irq, cpumask_t mask);
#define __apicdebuginit __init
@@ -55,8 +59,8 @@ int timer_over_8254 __initdata = 0;
/* Where if anywhere is the i8259 connect in external int mode */
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
-static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
+static DEFINE_RAW_SPINLOCK(ioapic_lock);
+static DEFINE_RAW_SPINLOCK(vector_lock);
/*
* # of IRQ routing registers
@@ -81,14 +85,6 @@ static struct irq_pin_list {
short apic, pin, next;
} irq_2_pin[PIN_MAP_SIZE];
-int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
-#ifdef CONFIG_PCI_MSI
-#define vector_to_irq(vector) \
- (platform_legacy_irq(vector) ? vector : vector_irq[vector])
-#else
-#define vector_to_irq(vector) (vector)
-#endif
-
#define __DO_ACTION(R, ACTION, FINAL) \
\
{ \
@@ -104,6 +100,9 @@ int vector_irq[NR_VECTORS] __read_mostly
reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \
reg ACTION; \
io_apic_modify(entry->apic, reg); \
+ /* Force POST flush by reading: */ \
+ reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \
+ \
if (!entry->next) \
break; \
entry = irq_2_pin + entry->next; \
@@ -112,11 +111,35 @@ int vector_irq[NR_VECTORS] __read_mostly
}
#ifdef CONFIG_SMP
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
+{
+ int apic, pin;
+ struct irq_pin_list *entry = irq_2_pin + irq;
+
+ BUG_ON(irq >= NR_IRQS);
+ for (;;) {
+ unsigned int reg;
+ apic = entry->apic;
+ pin = entry->pin;
+ if (pin == -1)
+ break;
+ io_apic_write(apic, 0x11 + pin*2, dest);
+ reg = io_apic_read(apic, 0x10 + pin*2);
+ reg &= ~0x000000ff;
+ reg |= vector;
+ io_apic_modify(apic, reg);
+ if (!entry->next)
+ break;
+ entry = irq_2_pin + entry->next;
+ }
+}
+
static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
{
unsigned long flags;
unsigned int dest;
cpumask_t tmp;
+ int vector;
cpus_and(tmp, mask, cpu_online_map);
if (cpus_empty(tmp))
@@ -124,7 +147,13 @@ static void set_ioapic_affinity_irq(unsi
cpus_and(mask, tmp, CPU_MASK_ALL);
- dest = cpu_mask_to_apicid(mask);
+ vector = assign_irq_vector(irq, mask);
+ if (vector < 0)
+ return;
+
+ cpus_clear(tmp);
+ cpu_set(vector >> 8, tmp);
+ dest = cpu_mask_to_apicid(tmp);
/*
* Only the high 8 bits are valid.
@@ -132,14 +161,12 @@ static void set_ioapic_affinity_irq(unsi
dest = SET_APIC_LOGICAL_ID(dest);
spin_lock_irqsave(&ioapic_lock, flags);
- __DO_ACTION(1, = dest, )
- set_irq_info(irq, mask);
+ __target_IO_APIC_irq(irq, dest, vector & 0xff);
+ set_native_irq_info(irq, mask);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
#endif
-static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF };
-
/*
* The common case is 1:1 IRQ<->pin mappings. Sometimes there are
* shared ISA-space IRQs, so we have to support them. We are super
@@ -170,10 +197,8 @@ static void add_pin_to_irq(unsigned int
static void name##_IO_APIC_irq (unsigned int irq) \
__DO_ACTION(R, ACTION, FINAL)
-DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) )
- /* mask = 1 */
-DO_ACTION( __unmask, 0, &= 0xfffeffff, )
- /* mask = 0 */
+DO_ACTION( __mask, 0, |= 0x00010000, ) /* mask = 1 */
+DO_ACTION( __unmask, 0, &= 0xfffeffff, ) /* mask = 0 */
static void mask_IO_APIC_irq (unsigned int irq)
{
@@ -695,64 +720,6 @@ static inline int irq_trigger(int idx)
return MPBIOS_trigger(idx);
}
-static int next_irq = 16;
-
-/*
- * gsi_irq_sharing -- Name overload! "irq" can be either a legacy IRQ
- * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number
- * from ACPI, which can reach 800 in large boxen.
- *
- * Compact the sparse GSI space into a sequential IRQ series and reuse
- * vectors if possible.
- */
-int gsi_irq_sharing(int gsi)
-{
- int i, tries, vector;
-
- BUG_ON(gsi >= NR_IRQ_VECTORS);
-
- if (platform_legacy_irq(gsi))
- return gsi;
-
- if (gsi_2_irq[gsi] != 0xFF)
- return (int)gsi_2_irq[gsi];
-
- tries = NR_IRQS;
- try_again:
- vector = assign_irq_vector(gsi);
-
- /*
- * Sharing vectors means sharing IRQs, so scan irq_vectors for previous
- * use of vector and if found, return that IRQ. However, we never want
- * to share legacy IRQs, which usually have a different trigger mode
- * than PCI.
- */
- for (i = 0; i < NR_IRQS; i++)
- if (IO_APIC_VECTOR(i) == vector)
- break;
- if (platform_legacy_irq(i)) {
- if (--tries >= 0) {
- IO_APIC_VECTOR(i) = 0;
- goto try_again;
- }
- panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi);
- }
- if (i < NR_IRQS) {
- gsi_2_irq[gsi] = i;
- printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n",
- gsi, vector, i);
- return i;
- }
-
- i = next_irq++;
- BUG_ON(i >= NR_IRQS);
- gsi_2_irq[gsi] = i;
- IO_APIC_VECTOR(i) = vector;
- printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n",
- gsi, vector, i);
- return i;
-}
-
static int pin_2_irq(int idx, int apic, int pin)
{
int irq, i;
@@ -782,7 +749,6 @@ static int pin_2_irq(int idx, int apic,
while (i < apic)
irq += nr_ioapic_registers[i++];
irq += pin;
- irq = gsi_irq_sharing(irq);
break;
}
default:
@@ -830,46 +796,83 @@ static inline int IO_APIC_irq_trigger(in
}
/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
-u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
+unsigned int irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_EXTERNAL_VECTOR, 0 };
-int assign_irq_vector(int irq)
+static int __assign_irq_vector(int irq, cpumask_t mask)
{
- static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
- unsigned long flags;
- int vector;
-
- BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
-
- spin_lock_irqsave(&vector_lock, flags);
-
- if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
- spin_unlock_irqrestore(&vector_lock, flags);
- return IO_APIC_VECTOR(irq);
- }
+ /*
+ * NOTE! The local APIC isn't very good at handling
+ * multiple interrupts at the same interrupt level.
+ * As the interrupt level is determined by taking the
+ * vector number and shifting that right by 4, we
+ * want to spread these out a bit so that they don't
+ * all fall in the same interrupt level.
+ *
+ * Also, we've got to be careful not to trash gate
+ * 0x80, because int 0x80 is hm, kind of importantish. ;)
+ */
+ static struct {
+ int vector;
+ int offset;
+ } pos[NR_CPUS] = { [ 0 ... NR_CPUS - 1] = {FIRST_DEVICE_VECTOR, 0} };
+ int old_vector = -1;
+ int cpu;
+
+ BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
+
+ if (IO_APIC_VECTOR(irq) > 0)
+ old_vector = IO_APIC_VECTOR(irq);
+ if ((old_vector > 0) && cpu_isset(old_vector >> 8, mask)) {
+ return old_vector;
+ }
+
+ for_each_cpu_mask(cpu, mask) {
+ int vector, offset;
+ vector = pos[cpu].vector;
+ offset = pos[cpu].offset;
next:
- current_vector += 8;
- if (current_vector == IA32_SYSCALL_VECTOR)
- goto next;
-
- if (current_vector >= FIRST_SYSTEM_VECTOR) {
- /* If we run out of vectors on large boxen, must share them. */
- offset = (offset + 1) % 8;
- current_vector = FIRST_DEVICE_VECTOR + offset;
+ vector += 8;
+ if (vector >= FIRST_SYSTEM_VECTOR) {
+ /* If we run out of vectors on large boxen, must share them. */
+ offset = (offset + 1) % 8;
+ vector = FIRST_DEVICE_VECTOR + offset;
+ }
+ if (unlikely(pos[cpu].vector == vector))
+ continue;
+ if (vector == IA32_SYSCALL_VECTOR)
+ goto next;
+ if (per_cpu(vector_irq, cpu)[vector] != -1)
+ goto next;
+ /* Found one! */
+ pos[cpu].vector = vector;
+ pos[cpu].offset = offset;
+ if (old_vector >= 0) {
+ int old_cpu = old_vector >> 8;
+ old_vector &= 0xff;
+ per_cpu(vector_irq, old_cpu)[old_vector] = -1;
+ }
+ per_cpu(vector_irq, cpu)[vector] = irq;
+ vector |= cpu << 8;
+ IO_APIC_VECTOR(irq) = vector;
+ return vector;
}
+ return -ENOSPC;
+}
- vector = current_vector;
- vector_irq[vector] = irq;
- if (irq != AUTO_ASSIGN)
- IO_APIC_VECTOR(irq) = vector;
+static int assign_irq_vector(int irq, cpumask_t mask)
+{
+ int vector;
+ unsigned long flags;
+ spin_lock_irqsave(&vector_lock, flags);
+ vector = __assign_irq_vector(irq, mask);
spin_unlock_irqrestore(&vector_lock, flags);
-
return vector;
}
extern void (*interrupt[NR_IRQS])(void);
-static struct hw_interrupt_type ioapic_level_type;
-static struct hw_interrupt_type ioapic_edge_type;
+
+static struct irq_chip ioapic_chip;
#define IOAPIC_AUTO -1
#define IOAPIC_EDGE 0
@@ -877,16 +880,16 @@ static struct hw_interrupt_type ioapic_e
static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
{
- unsigned idx;
-
- idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
-
if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
- trigger == IOAPIC_LEVEL)
- irq_desc[idx].chip = &ioapic_level_type;
- else
- irq_desc[idx].chip = &ioapic_edge_type;
- set_intr_gate(vector, interrupt[idx]);
+ trigger == IOAPIC_LEVEL) {
+#ifdef CONFIG_PREEMPT_HARDIRQS
+ set_irq_chip_and_handler(irq, &ioapic_chip, handle_level_irq);
+#else
+ set_irq_chip_and_handler(irq, &ioapic_chip, handle_fasteoi_irq);
+#endif
+ } else {
+ set_irq_chip_and_handler(irq, &ioapic_chip, handle_edge_irq);
+ }
}
static void __init setup_IO_APIC_irqs(void)
@@ -936,8 +939,15 @@ static void __init setup_IO_APIC_irqs(vo
continue;
if (IO_APIC_IRQ(irq)) {
- vector = assign_irq_vector(irq);
- entry.vector = vector;
+ cpumask_t mask;
+ vector = assign_irq_vector(irq, TARGET_CPUS);
+ if (vector < 0)
+ continue;
+
+ cpus_clear(mask);
+ cpu_set(vector >> 8, mask);
+ entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
+ entry.vector = vector & 0xff;
ioapic_register_intr(irq, vector, IOAPIC_AUTO);
if (!apic && (irq < 16))
@@ -987,7 +997,7 @@ static void __init setup_ExtINT_IRQ0_pin
* The timer IRQ doesn't have to know that behind the
* scene we have a 8259A-master in AEOI mode ...
*/
- irq_desc[0].chip = &ioapic_edge_type;
+ set_irq_chip_and_handler(0, &ioapic_chip, handle_edge_irq);
/*
* Add it to the IO-APIC irq-routing table:
@@ -1106,17 +1116,12 @@ void __apicdebuginit print_IO_APIC(void)
);
}
}
- if (use_pci_vector())
- printk(KERN_INFO "Using vector-based indexing\n");
printk(KERN_DEBUG "IRQ to pin mappings:\n");
for (i = 0; i < NR_IRQS; i++) {
struct irq_pin_list *entry = irq_2_pin + i;
if (entry->pin < 0)
continue;
- if (use_pci_vector() && !platform_legacy_irq(i))
- printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
- else
- printk(KERN_DEBUG "IRQ%d ", i);
+ printk(KERN_DEBUG "IRQ%d ", i);
for (;;) {
printk("-> %d:%d", entry->apic, entry->pin);
if (!entry->next)
@@ -1502,7 +1507,7 @@ static int __init timer_irq_works(void)
* an edge even if it isn't on the 8259A...
*/
-static unsigned int startup_edge_ioapic_irq(unsigned int irq)
+static unsigned int startup_ioapic_irq(unsigned int irq)
{
int was_pending = 0;
unsigned long flags;
@@ -1519,107 +1524,16 @@ static unsigned int startup_edge_ioapic_
return was_pending;
}
-/*
- * Once we have recorded IRQ_PENDING already, we can mask the
- * interrupt for real. This prevents IRQ storms from unhandled
- * devices.
- */
-static void ack_edge_ioapic_irq(unsigned int irq)
-{
- move_irq(irq);
- if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
- == (IRQ_PENDING | IRQ_DISABLED))
- mask_IO_APIC_irq(irq);
- ack_APIC_irq();
-}
-
-/*
- * Level triggered interrupts can just be masked,
- * and shutting down and starting up the interrupt
- * is the same as enabling and disabling them -- except
- * with a startup need to return a "was pending" value.
- *
- * Level triggered interrupts are special because we
- * do not touch any IO-APIC register while handling
- * them. We ack the APIC in the end-IRQ handler, not
- * in the start-IRQ-handler. Protection against reentrance
- * from the same interrupt is still provided, both by the
- * generic IRQ layer and by the fact that an unacked local
- * APIC does not accept IRQs.
- */
-static unsigned int startup_level_ioapic_irq (unsigned int irq)
-{
- unmask_IO_APIC_irq(irq);
-
- return 0; /* don't check for pending */
-}
-
-static void end_level_ioapic_irq (unsigned int irq)
-{
- move_irq(irq);
- ack_APIC_irq();
-}
-
-#ifdef CONFIG_PCI_MSI
-static unsigned int startup_edge_ioapic_vector(unsigned int vector)
+static int ioapic_retrigger_irq(unsigned int irq)
{
- int irq = vector_to_irq(vector);
+ cpumask_t mask;
+ unsigned vector;
- return startup_edge_ioapic_irq(irq);
-}
+ vector = irq_vector[irq];
+ cpus_clear(mask);
+ cpu_set(vector >> 8, mask);
-static void ack_edge_ioapic_vector(unsigned int vector)
-{
- int irq = vector_to_irq(vector);
-
- move_native_irq(vector);
- ack_edge_ioapic_irq(irq);
-}
-
-static unsigned int startup_level_ioapic_vector (unsigned int vector)
-{
- int irq = vector_to_irq(vector);
-
- return startup_level_ioapic_irq (irq);
-}
-
-static void end_level_ioapic_vector (unsigned int vector)
-{
- int irq = vector_to_irq(vector);
-
- move_native_irq(vector);
- end_level_ioapic_irq(irq);
-}
-
-static void mask_IO_APIC_vector (unsigned int vector)
-{
- int irq = vector_to_irq(vector);
-
- mask_IO_APIC_irq(irq);
-}
-
-static void unmask_IO_APIC_vector (unsigned int vector)
-{
- int irq = vector_to_irq(vector);
-
- unmask_IO_APIC_irq(irq);
-}
-
-#ifdef CONFIG_SMP
-static void set_ioapic_affinity_vector (unsigned int vector,
- cpumask_t cpu_mask)
-{
- int irq = vector_to_irq(vector);
-
- set_native_irq_info(vector, cpu_mask);
- set_ioapic_affinity_irq(irq, cpu_mask);
-}
-#endif // CONFIG_SMP
-#endif // CONFIG_PCI_MSI
-
-static int ioapic_retrigger(unsigned int irq)
-{
- send_IPI_self(IO_APIC_VECTOR(irq));
+ send_IPI_mask(mask, vector & 0xff);
return 1;
}
@@ -1633,32 +1547,47 @@ static int ioapic_retrigger(unsigned int
* races.
*/
-static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
- .typename = "IO-APIC-edge",
- .startup = startup_edge_ioapic,
- .shutdown = shutdown_edge_ioapic,
- .enable = enable_edge_ioapic,
- .disable = disable_edge_ioapic,
- .ack = ack_edge_ioapic,
- .end = end_edge_ioapic,
-#ifdef CONFIG_SMP
- .set_affinity = set_ioapic_affinity,
+static void ack_apic_edge(unsigned int irq)
+{
+ move_native_irq(irq);
+ ack_APIC_irq();
+}
+
+static void ack_apic_level(unsigned int irq)
+{
+ int do_unmask_irq = 0;
+
+#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
+ /* If we are moving the irq we need to mask it */
+ if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
+ do_unmask_irq = 1;
+ mask_IO_APIC_irq(irq);
+ }
#endif
- .retrigger = ioapic_retrigger,
-};
-static struct hw_interrupt_type ioapic_level_type __read_mostly = {
- .typename = "IO-APIC-level",
- .startup = startup_level_ioapic,
- .shutdown = shutdown_level_ioapic,
- .enable = enable_level_ioapic,
- .disable = disable_level_ioapic,
- .ack = mask_and_ack_level_ioapic,
- .end = end_level_ioapic,
+ /*
+ * We must acknowledge the irq before we move it or the acknowledge will
+ * not propogate properly.
+ */
+ ack_APIC_irq();
+
+ /* Now we can move and renable the irq */
+ move_masked_irq(irq);
+ if (unlikely(do_unmask_irq))
+ unmask_IO_APIC_irq(irq);
+}
+
+static struct irq_chip ioapic_chip __read_mostly = {
+ .name = "IO-APIC",
+ .startup = startup_ioapic_irq,
+ .mask = mask_IO_APIC_irq,
+ .unmask = unmask_IO_APIC_irq,
+ .ack = ack_apic_edge,
+ .eoi = ack_apic_level,
#ifdef CONFIG_SMP
- .set_affinity = set_ioapic_affinity,
+ .set_affinity = set_ioapic_affinity_irq,
#endif
- .retrigger = ioapic_retrigger,
+ .retrigger = ioapic_retrigger_irq,
};
static inline void init_IO_APIC_traps(void)
@@ -1678,11 +1607,6 @@ static inline void init_IO_APIC_traps(vo
*/
for (irq = 0; irq < NR_IRQS ; irq++) {
int tmp = irq;
- if (use_pci_vector()) {
- if (!platform_legacy_irq(tmp))
- if ((tmp = vector_to_irq(tmp)) == -1)
- continue;
- }
if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
/*
* Hmm.. We don't have an entry for this,
@@ -1693,7 +1617,7 @@ static inline void init_IO_APIC_traps(vo
make_8259A_irq(irq);
else
/* Strange. Oh, well.. */
- irq_desc[irq].chip = &no_irq_type;
+ irq_desc[irq].chip = &no_irq_chip;
}
}
}
@@ -1812,8 +1736,6 @@ static inline void unlock_ExtINT_logic(v
spin_unlock_irqrestore(&ioapic_lock, flags);
}
-int timer_uses_ioapic_pin_0;
-
/*
* This code may look a bit paranoid, but it's supposed to cooperate with
* a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
@@ -1831,8 +1753,7 @@ static inline void check_timer(void)
* get/set the timer IRQ vector:
*/
disable_8259A_irq(0);
- vector = assign_irq_vector(0);
- set_intr_gate(vector, interrupt[0]);
+ vector = assign_irq_vector(0, TARGET_CPUS);
/*
* Subtle, code in do_timer_interrupt() expects an AEOI
@@ -1851,9 +1772,6 @@ static inline void check_timer(void)
pin2 = ioapic_i8259.pin;
apic2 = ioapic_i8259.apic;
- if (pin1 == 0)
- timer_uses_ioapic_pin_0 = 1;
-
apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
vector, apic1, pin1, apic2, pin2);
@@ -2069,6 +1987,124 @@ static int __init ioapic_init_sysfs(void
device_initcall(ioapic_init_sysfs);
+/*
+ * Dynamic irq allocate and deallocation
+ */
+int create_irq(void)
+{
+ /* Allocate an unused irq */
+ int irq;
+ int new;
+ int vector = 0;
+ unsigned long flags;
+
+ irq = -ENOSPC;
+ spin_lock_irqsave(&vector_lock, flags);
+ for (new = (NR_IRQS - 1); new >= 0; new--) {
+ if (platform_legacy_irq(new))
+ continue;
+ if (irq_vector[new] != 0)
+ continue;
+ vector = __assign_irq_vector(new, TARGET_CPUS);
+ if (likely(vector > 0))
+ irq = new;
+ break;
+ }
+ spin_unlock_irqrestore(&vector_lock, flags);
+
+ if (irq >= 0) {
+ dynamic_irq_init(irq);
+ }
+ return irq;
+}
+
+void destroy_irq(unsigned int irq)
+{
+ unsigned long flags;
+
+ dynamic_irq_cleanup(irq);
+
+ spin_lock_irqsave(&vector_lock, flags);
+ irq_vector[irq] = 0;
+ spin_unlock_irqrestore(&vector_lock, flags);
+}
+
+/*
+ * MSI mesage composition
+ */
+#ifdef CONFIG_PCI_MSI
+static int msi_msg_setup(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
+{
+ /* For now always this code always uses physical delivery
+ * mode.
+ */
+ int vector;
+ unsigned dest;
+
+ vector = assign_irq_vector(irq, TARGET_CPUS);
+ if (vector >= 0) {
+ cpumask_t tmp;
+
+ cpus_clear(tmp);
+ cpu_set(vector >> 8, tmp);
+ dest = cpu_mask_to_apicid(tmp);
+
+ msg->address_hi = MSI_ADDR_BASE_HI;
+ msg->address_lo =
+ MSI_ADDR_BASE_LO |
+ ((INT_DEST_MODE == 0) ?
+ MSI_ADDR_DEST_MODE_PHYSICAL:
+ MSI_ADDR_DEST_MODE_LOGICAL) |
+ ((INT_DELIVERY_MODE != dest_LowestPrio) ?
+ MSI_ADDR_REDIRECTION_CPU:
+ MSI_ADDR_REDIRECTION_LOWPRI) |
+ MSI_ADDR_DEST_ID(dest);
+
+ msg->data =
+ MSI_DATA_TRIGGER_EDGE |
+ MSI_DATA_LEVEL_ASSERT |
+ ((INT_DELIVERY_MODE != dest_LowestPrio) ?
+ MSI_DATA_DELIVERY_FIXED:
+ MSI_DATA_DELIVERY_LOWPRI) |
+ MSI_DATA_VECTOR(vector);
+ }
+ return vector;
+}
+
+static void msi_msg_teardown(unsigned int irq)
+{
+ return;
+}
+
+static void msi_msg_set_affinity(unsigned int irq, cpumask_t mask, struct msi_msg *msg)
+{
+ int vector;
+ unsigned dest;
+
+ vector = assign_irq_vector(irq, mask);
+ if (vector > 0) {
+ cpumask_t tmp;
+
+ cpus_clear(tmp);
+ cpu_set(vector >> 8, tmp);
+ dest = cpu_mask_to_apicid(tmp);
+
+ msg->data &= ~MSI_DATA_VECTOR_MASK;
+ msg->data |= MSI_DATA_VECTOR(vector);
+ msg->address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+ msg->address_lo |= MSI_ADDR_DEST_ID(dest);
+ }
+}
+
+struct msi_ops arch_msi_ops = {
+ .needs_64bit_address = 0,
+ .setup = msi_msg_setup,
+ .teardown = msi_msg_teardown,
+ .target = msi_msg_set_affinity,
+};
+
+#endif
+
/* --------------------------------------------------------------------------
ACPI-based IOAPIC Configuration
-------------------------------------------------------------------------- */
@@ -2107,6 +2143,8 @@ int io_apic_set_pci_routing (int ioapic,
{
struct IO_APIC_route_entry entry;
unsigned long flags;
+ int vector;
+ cpumask_t mask;
if (!IO_APIC_IRQ(irq)) {
apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
@@ -2115,6 +2153,20 @@ int io_apic_set_pci_routing (int ioapic,
}
/*
+ * IRQs < 16 are already in the irq_2_pin[] map
+ */
+ if (irq >= 16)
+ add_pin_to_irq(irq, ioapic, pin);
+
+
+ vector = assign_irq_vector(irq, TARGET_CPUS);
+ if (vector < 0)
+ return vector;
+
+ cpus_clear(mask);
+ cpu_set(vector >> 8, mask);
+
+ /*
* Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
* Note that we mask (disable) IRQs now -- these get enabled when the
* corresponding device driver registers for this IRQ.
@@ -2124,19 +2176,11 @@ int io_apic_set_pci_routing (int ioapic,
entry.delivery_mode = INT_DELIVERY_MODE;
entry.dest_mode = INT_DEST_MODE;
- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
+ entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
entry.trigger = triggering;
entry.polarity = polarity;
entry.mask = 1; /* Disabled (masked) */
-
- irq = gsi_irq_sharing(irq);
- /*
- * IRQs < 16 are already in the irq_2_pin[] map
- */
- if (irq >= 16)
- add_pin_to_irq(irq, ioapic, pin);
-
- entry.vector = assign_irq_vector(irq);
+ entry.vector = vector & 0xff;
apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
"IRQ %d Mode:%i Active:%i)\n", ioapic,
@@ -2151,7 +2195,7 @@ int io_apic_set_pci_routing (int ioapic,
spin_lock_irqsave(&ioapic_lock, flags);
io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
- set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
+ set_native_irq_info(irq, TARGET_CPUS);
spin_unlock_irqrestore(&ioapic_lock, flags);
return 0;
Index: linux/arch/x86_64/kernel/irq.c
===================================================================
--- linux.orig/arch/x86_64/kernel/irq.c
+++ linux/arch/x86_64/kernel/irq.c
@@ -79,7 +79,8 @@ int show_interrupts(struct seq_file *p,
for_each_online_cpu(j)
seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
#endif
- seq_printf(p, " %14s", irq_desc[i].chip->typename);
+ seq_printf(p, " %8s", irq_desc[i].chip->name);
+ seq_printf(p, "-%s", handle_irq_name(irq_desc[i].handle_irq));
seq_printf(p, " %s", action->name);
for (action=action->next; action; action = action->next)
@@ -116,7 +117,18 @@ skip:
asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
{
/* high bit used in ret_from_ code */
- unsigned irq = ~regs->orig_rax;
+ unsigned vector = ~regs->orig_rax;
+ unsigned irq;
+
+ exit_idle();
+ irq_enter();
+ irq = __get_cpu_var(vector_irq)[vector];
+
+#ifdef CONFIG_LATENCY_TRACE
+ if (irq == trace_user_trigger_irq)
+ user_trace_start();
+#endif
+ trace_special(regs->rip, irq, 0);
if (unlikely(irq >= NR_IRQS)) {
printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
@@ -124,12 +136,24 @@ asmlinkage unsigned int do_IRQ(struct pt
BUG();
}
- exit_idle();
- irq_enter();
#ifdef CONFIG_DEBUG_STACKOVERFLOW
stack_overflow_check(regs);
#endif
- __do_IRQ(irq, regs);
+#ifdef CONFIG_NO_HZ
+ if (idle_cpu(smp_processor_id())) {
+ update_jiffies();
+ /*
+ * Force polling-idle loops to break out into
+ * the sched-timer setting code, to make sure
+ * that timer interval changes due to __mod_timer()
+ * in IRQ context get properly propagated:
+ */
+ if (tsk_is_polling(current))
+ set_need_resched();
+ }
+#endif
+
+ generic_handle_irq(irq, regs);
irq_exit();
return 1;
Index: linux/arch/x86_64/kernel/mpparse.c
===================================================================
--- linux.orig/arch/x86_64/kernel/mpparse.c
+++ linux/arch/x86_64/kernel/mpparse.c
@@ -909,20 +909,11 @@ void __init mp_config_acpi_legacy_irqs (
return;
}
-#define MAX_GSI_NUM 4096
-
int mp_register_gsi(u32 gsi, int triggering, int polarity)
{
int ioapic = -1;
int ioapic_pin = 0;
int idx, bit = 0;
- static int pci_irq = 16;
- /*
- * Mapping between Global System Interrupts, which
- * represent all possible interrupts, to the IRQs
- * assigned to actual devices.
- */
- static int gsi_to_irq[MAX_GSI_NUM];
if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
return gsi;
@@ -955,42 +946,11 @@ int mp_register_gsi(u32 gsi, int trigger
if ((1< 15), but
- * avoid a problem where the 8254 timer (IRQ0) is setup
- * via an override (so it's not on pin 0 of the ioapic),
- * and at the same time, the pin 0 interrupt is a PCI
- * type. The gsi > 15 test could cause these two pins
- * to be shared as IRQ0, and they are not shareable.
- * So test for this condition, and if necessary, avoid
- * the pin collision.
- */
- if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
- gsi = pci_irq++;
- /*
- * Don't assign IRQ used by ACPI SCI
- */
- if (gsi == acpi_fadt.sci_int)
- gsi = pci_irq++;
- gsi_to_irq[irq] = gsi;
- } else {
- printk(KERN_ERR "GSI %u is too high\n", gsi);
- return gsi;
- }
- }
-
io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
Index: linux/arch/x86_64/kernel/nmi.c
===================================================================
--- linux.orig/arch/x86_64/kernel/nmi.c
+++ linux/arch/x86_64/kernel/nmi.c
@@ -37,7 +37,7 @@
* This is maintained separately from nmi_active because the NMI
* watchdog may also be driven from the I/O APIC timer.
*/
-static DEFINE_SPINLOCK(lapic_nmi_owner_lock);
+static DEFINE_RAW_SPINLOCK(lapic_nmi_owner_lock);
static unsigned int lapic_nmi_owner;
#define LAPIC_NMI_WATCHDOG (1<<0)
#define LAPIC_NMI_RESERVED (1<<1)
@@ -127,7 +127,9 @@ void __cpuinit nmi_watchdog_default(void
static __init void nmi_cpu_busy(void *data)
{
volatile int *endflag = data;
+#ifndef CONFIG_PREEMPT_RT
local_irq_enable_in_hardirq();
+#endif
/* Intentionally don't use cpu_relax here. This is
to make sure that the performance counter really ticks,
even if there is a simulator or similar that catches the
@@ -526,12 +528,42 @@ void touch_nmi_watchdog (void)
touch_softlockup_watchdog();
}
+int nmi_show_regs[NR_CPUS];
+
+void nmi_show_all_regs(void)
+{
+ int i;
+
+ if (nmi_watchdog == NMI_NONE)
+ return;
+ if (system_state != SYSTEM_RUNNING) {
+ printk("nmi_show_all_regs(): system state %d, not doing.\n",
+ system_state);
+ return;
+ }
+
+ for_each_online_cpu(i)
+ nmi_show_regs[i] = 1;
+ for_each_online_cpu(i)
+ while (nmi_show_regs[i] == 1)
+ barrier();
+}
+
+static DEFINE_RAW_SPINLOCK(nmi_print_lock);
+
void __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
{
int sum;
int touched = 0;
+ int cpu = safe_smp_processor_id();
sum = read_pda(apic_timer_irqs);
+ if (nmi_show_regs[cpu]) {
+ nmi_show_regs[cpu] = 0;
+ spin_lock(&nmi_print_lock);
+ show_regs(regs);
+ spin_unlock(&nmi_print_lock);
+ }
if (__get_cpu_var(nmi_touch)) {
__get_cpu_var(nmi_touch) = 0;
touched = 1;
@@ -549,6 +581,11 @@ void __kprobes nmi_watchdog_tick(struct
*/
local_inc(&__get_cpu_var(alert_counter));
if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) {
+ int i;
+
+ for (i = 0; i < NR_CPUS; i++)
+ nmi_show_regs[i] = 1;
+
if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
== NOTIFY_STOP) {
local_set(&__get_cpu_var(alert_counter), 0);
Index: linux/arch/x86_64/kernel/pmtimer.c
===================================================================
--- linux.orig/arch/x86_64/kernel/pmtimer.c
+++ linux/arch/x86_64/kernel/pmtimer.c
@@ -24,15 +24,6 @@
#include
#include
-/* The I/O port the PMTMR resides at.
- * The location is detected during setup_arch(),
- * in arch/i386/kernel/acpi/boot.c */
-u32 pmtmr_ioport __read_mostly;
-
-/* value of the Power timer at last timer interrupt */
-static u32 offset_delay;
-static u32 last_pmtmr_tick;
-
#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
static inline u32 cyc2us(u32 cycles)
@@ -48,38 +39,6 @@ static inline u32 cyc2us(u32 cycles)
return (cycles >> 10);
}
-int pmtimer_mark_offset(void)
-{
- static int first_run = 1;
- unsigned long tsc;
- u32 lost;
-
- u32 tick = inl(pmtmr_ioport);
- u32 delta;
-
- delta = cyc2us((tick - last_pmtmr_tick) & ACPI_PM_MASK);
-
- last_pmtmr_tick = tick;
- monotonic_base += delta * NSEC_PER_USEC;
-
- delta += offset_delay;
-
- lost = delta / (USEC_PER_SEC / HZ);
- offset_delay = delta % (USEC_PER_SEC / HZ);
-
- rdtscll(tsc);
- vxtime.last_tsc = tsc - offset_delay * (u64)cpu_khz / 1000;
-
- /* don't calculate delay for first run,
- or if we've got less then a tick */
- if (first_run || (lost < 1)) {
- first_run = 0;
- offset_delay = 0;
- }
-
- return lost - 1;
-}
-
static unsigned pmtimer_wait_tick(void)
{
u32 a, b;
@@ -101,23 +60,6 @@ void pmtimer_wait(unsigned us)
} while (cyc2us(b - a) < us);
}
-void pmtimer_resume(void)
-{
- last_pmtmr_tick = inl(pmtmr_ioport);
-}
-
-unsigned int do_gettimeoffset_pm(void)
-{
- u32 now, offset, delta = 0;
-
- offset = last_pmtmr_tick;
- now = inl(pmtmr_ioport);
- delta = (now - offset) & ACPI_PM_MASK;
-
- return offset_delay + cyc2us(delta);
-}
-
-
static int __init nopmtimer_setup(char *s)
{
pmtmr_ioport = 0;
Index: linux/arch/x86_64/kernel/process.c
===================================================================
--- linux.orig/arch/x86_64/kernel/process.c
+++ linux/arch/x86_64/kernel/process.c
@@ -113,11 +113,15 @@ static void default_idle(void)
current_thread_info()->status &= ~TS_POLLING;
smp_mb__after_clear_bit();
- while (!need_resched()) {
+ while (!need_resched() && !need_resched_delayed()) {
local_irq_disable();
- if (!need_resched())
- safe_halt();
- else
+ if (!need_resched() && !need_resched_delayed()) {
+ if (!hrtimer_stop_sched_tick())
+ safe_halt();
+ else
+ local_irq_enable();
+ hrtimer_restart_sched_tick();
+ } else
local_irq_enable();
}
current_thread_info()->status |= TS_POLLING;
@@ -131,6 +135,14 @@ static void default_idle(void)
static void poll_idle (void)
{
local_irq_enable();
+ while (!need_resched() && !need_resched_delayed()) {
+ hrtimer_stop_sched_tick();
+ local_irq_enable();
+ while (!need_resched() && !need_resched_delayed() && !rcu_pending(smp_processor_id()) && !local_softirq_pending())
+ rep_nop();
+ hrtimer_restart_sched_tick();
+ local_irq_enable();
+ }
asm volatile(
"2:"
@@ -206,7 +218,9 @@ void cpu_idle (void)
current_thread_info()->status |= TS_POLLING;
/* endless idle loop with no priority at all */
while (1) {
- while (!need_resched()) {
+ BUG_ON(irqs_disabled());
+
+ while (!need_resched() && !need_resched_delayed()) {
void (*idle)(void);
if (__get_cpu_var(cpu_idle_state))
@@ -218,14 +232,16 @@ void cpu_idle (void)
idle = default_idle;
if (cpu_is_offline(smp_processor_id()))
play_dead();
+ stop_critical_timing();
enter_idle();
idle();
__exit_idle();
}
-
- preempt_enable_no_resched();
- schedule();
+ local_irq_disable();
+ __preempt_enable_no_resched();
+ __schedule();
preempt_disable();
+ local_irq_enable();
}
}
@@ -240,13 +256,16 @@ static void mwait_idle(void)
{
local_irq_enable();
- while (!need_resched()) {
+ while (!need_resched() && !need_resched_delayed()) {
+ if (hrtimer_stop_sched_tick())
+ break;
__monitor((void *)¤t_thread_info()->flags, 0, 0);
smp_mb();
- if (need_resched())
+ if (need_resched() && !need_resched_delayed())
break;
__mwait(0, 0);
}
+ hrtimer_restart_sched_tick();
}
void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
@@ -346,13 +365,14 @@ void exit_thread(void)
struct thread_struct *t = &me->thread;
if (me->thread.io_bitmap_ptr) {
- struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
+ struct tss_struct *tss;
kfree(t->io_bitmap_ptr);
t->io_bitmap_ptr = NULL;
/*
* Careful, clear this in the TSS too:
*/
+ tss = &per_cpu(init_tss, get_cpu());
memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
t->io_bitmap_max = 0;
put_cpu();
Index: linux/arch/x86_64/kernel/setup64.c
===================================================================
--- linux.orig/arch/x86_64/kernel/setup64.c
+++ linux/arch/x86_64/kernel/setup64.c
@@ -116,7 +116,7 @@ void __init setup_per_cpu_areas(void)
}
}
-void pda_init(int cpu)
+void notrace pda_init(int cpu)
{
struct x8664_pda *pda = cpu_pda(cpu);
@@ -185,7 +185,7 @@ void __cpuinit check_efer(void)
* 'CPU state barrier', nothing should get across.
* A lot of state is already set up in PDA init.
*/
-void __cpuinit cpu_init (void)
+void __cpuinit notrace cpu_init (void)
{
int cpu = stack_smp_processor_id();
struct tss_struct *t = &per_cpu(init_tss, cpu);
Index: linux/arch/x86_64/kernel/signal.c
===================================================================
--- linux.orig/arch/x86_64/kernel/signal.c
+++ linux/arch/x86_64/kernel/signal.c
@@ -431,6 +431,13 @@ int do_signal(struct pt_regs *regs, sigs
siginfo_t info;
int signr;
+#ifdef CONFIG_PREEMPT_RT
+ /*
+ * Fully-preemptible kernel does not need interrupts disabled:
+ */
+ local_irq_enable();
+ preempt_check_resched();
+#endif
/*
* We want the common case to go fast, which
* is why we may in certain cases get here from
Index: linux/arch/x86_64/kernel/smp.c
===================================================================
--- linux.orig/arch/x86_64/kernel/smp.c
+++ linux/arch/x86_64/kernel/smp.c
@@ -57,7 +57,7 @@ union smp_flush_state {
struct mm_struct *flush_mm;
unsigned long flush_va;
#define FLUSH_ALL -1ULL
- spinlock_t tlbstate_lock;
+ raw_spinlock_t tlbstate_lock;
};
char pad[SMP_CACHE_BYTES];
} ____cacheline_aligned;
@@ -296,10 +296,20 @@ void smp_send_reschedule(int cpu)
}
/*
+ * this function sends a 'reschedule' IPI to all other CPUs.
+ * This is used when RT tasks are starving and other CPUs
+ * might be able to run them:
+ */
+void smp_send_reschedule_allbutself(void)
+{
+ send_IPI_allbutself(RESCHEDULE_VECTOR);
+}
+
+/*
* Structure and data for smp_call_function(). This is designed to minimise
* static memory requirements. It also looks cleaner.
*/
-static DEFINE_SPINLOCK(call_lock);
+static DEFINE_RAW_SPINLOCK(call_lock);
struct call_data_struct {
void (*func) (void *info);
Index: linux/arch/x86_64/kernel/smpboot.c
===================================================================
--- linux.orig/arch/x86_64/kernel/smpboot.c
+++ linux/arch/x86_64/kernel/smpboot.c
@@ -204,7 +204,7 @@ static void __cpuinit smp_store_cpu_info
latency and low latency is the primary objective here. -AK */
#define no_cpu_relax() barrier()
-static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock);
+static __cpuinitdata __DEFINE_RAW_SPINLOCK(tsc_sync_lock);
static volatile __cpuinitdata unsigned long go[SLAVE + 1];
static int notscsync __cpuinitdata;
@@ -530,7 +530,7 @@ static inline void set_cpu_sibling_map(i
/*
* Setup code on secondary processor (after comming out of the trampoline)
*/
-void __cpuinit start_secondary(void)
+void __cpuinit notrace start_secondary(void)
{
/*
* Dont put anything before smp_callin(), SMP
Index: linux/arch/x86_64/kernel/time.c
===================================================================
--- linux.orig/arch/x86_64/kernel/time.c
+++ linux/arch/x86_64/kernel/time.c
@@ -39,149 +39,29 @@
#include
#include
#include
+#include
+#include
#ifdef CONFIG_X86_LOCAL_APIC
#include
#endif
+#include
-#ifdef CONFIG_CPU_FREQ
-static void cpufreq_delayed_get(void);
-#endif
extern void i8254_timer_resume(void);
extern int using_apic_timer;
+extern struct clock_event pit_clockevent;
-static char *time_init_gtod(void);
DEFINE_SPINLOCK(rtc_lock);
EXPORT_SYMBOL(rtc_lock);
-DEFINE_SPINLOCK(i8253_lock);
-
-int nohpet __initdata = 0;
-static int notsc __initdata = 0;
+DEFINE_RAW_SPINLOCK(i8253_lock);
#define USEC_PER_TICK (USEC_PER_SEC / HZ)
#define NSEC_PER_TICK (NSEC_PER_SEC / HZ)
-#define FSEC_PER_TICK (FSEC_PER_SEC / HZ)
-#define NS_SCALE 10 /* 2^10, carefully chosen */
-#define US_SCALE 32 /* 2^32, arbitralrily chosen */
-unsigned int cpu_khz; /* TSC clocks / usec, not used here */
-EXPORT_SYMBOL(cpu_khz);
-static unsigned long hpet_period; /* fsecs / HPET clock */
-unsigned long hpet_tick; /* HPET clocks / interrupt */
-int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */
-unsigned long vxtime_hz = PIT_TICK_RATE;
int report_lost_ticks; /* command line option */
-unsigned long long monotonic_base;
-
-struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
-
-volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
-unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
-struct timespec __xtime __section_xtime;
-struct timezone __sys_tz __section_sys_tz;
-
-/*
- * do_gettimeoffset() returns microseconds since last timer interrupt was
- * triggered by hardware. A memory read of HPET is slower than a register read
- * of TSC, but much more reliable. It's also synchronized to the timer
- * interrupt. Note that do_gettimeoffset() may return more than hpet_tick, if a
- * timer interrupt has happened already, but vxtime.trigger wasn't updated yet.
- * This is not a problem, because jiffies hasn't updated either. They are bound
- * together by xtime_lock.
- */
-
-static inline unsigned int do_gettimeoffset_tsc(void)
-{
- unsigned long t;
- unsigned long x;
- t = get_cycles_sync();
- if (t < vxtime.last_tsc)
- t = vxtime.last_tsc; /* hack */
- x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> US_SCALE;
- return x;
-}
-
-static inline unsigned int do_gettimeoffset_hpet(void)
-{
- /* cap counter read to one tick to avoid inconsistencies */
- unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last;
- return (min(counter,hpet_tick) * vxtime.quot) >> US_SCALE;
-}
-
-unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
-
-/*
- * This version of gettimeofday() has microsecond resolution and better than
- * microsecond precision, as we're using at least a 10 MHz (usually 14.31818
- * MHz) HPET timer.
- */
-
-void do_gettimeofday(struct timeval *tv)
-{
- unsigned long seq, t;
- unsigned int sec, usec;
-
- do {
- seq = read_seqbegin(&xtime_lock);
-
- sec = xtime.tv_sec;
- usec = xtime.tv_nsec / NSEC_PER_USEC;
-
- /* i386 does some correction here to keep the clock
- monotonous even when ntpd is fixing drift.
- But they didn't work for me, there is a non monotonic
- clock anyways with ntp.
- I dropped all corrections now until a real solution can
- be found. Note when you fix it here you need to do the same
- in arch/x86_64/kernel/vsyscall.c and export all needed
- variables in vmlinux.lds. -AK */
-
- t = (jiffies - wall_jiffies) * USEC_PER_TICK +
- do_gettimeoffset();
- usec += t;
-
- } while (read_seqretry(&xtime_lock, seq));
-
- tv->tv_sec = sec + usec / USEC_PER_SEC;
- tv->tv_usec = usec % USEC_PER_SEC;
-}
-
-EXPORT_SYMBOL(do_gettimeofday);
-
-/*
- * settimeofday() first undoes the correction that gettimeofday would do
- * on the time, and then saves it. This is ugly, but has been like this for
- * ages already.
- */
-
-int do_settimeofday(struct timespec *tv)
-{
- time_t wtm_sec, sec = tv->tv_sec;
- long wtm_nsec, nsec = tv->tv_nsec;
-
- if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
- return -EINVAL;
-
- write_seqlock_irq(&xtime_lock);
-
- nsec -= do_gettimeoffset() * NSEC_PER_USEC +
- (jiffies - wall_jiffies) * NSEC_PER_TICK;
-
- wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
- wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
-
- set_normalized_timespec(&xtime, sec, nsec);
- set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
-
- ntp_clear();
-
- write_sequnlock_irq(&xtime_lock);
- clock_was_set();
- return 0;
-}
-EXPORT_SYMBOL(do_settimeofday);
+volatile unsigned long jiffies = INITIAL_JIFFIES;
unsigned long profile_pc(struct pt_regs *regs)
{
@@ -277,84 +157,9 @@ static void set_rtc_mmss(unsigned long n
}
-/* monotonic_clock(): returns # of nanoseconds passed since time_init()
- * Note: This function is required to return accurate
- * time even in the absence of multiple timer ticks.
- */
-unsigned long long monotonic_clock(void)
-{
- unsigned long seq;
- u32 last_offset, this_offset, offset;
- unsigned long long base;
-
- if (vxtime.mode == VXTIME_HPET) {
- do {
- seq = read_seqbegin(&xtime_lock);
-
- last_offset = vxtime.last;
- base = monotonic_base;
- this_offset = hpet_readl(HPET_COUNTER);
- } while (read_seqretry(&xtime_lock, seq));
- offset = (this_offset - last_offset);
- offset *= NSEC_PER_TICK / hpet_tick;
- } else {
- do {
- seq = read_seqbegin(&xtime_lock);
-
- last_offset = vxtime.last_tsc;
- base = monotonic_base;
- } while (read_seqretry(&xtime_lock, seq));
- this_offset = get_cycles_sync();
- /* FIXME: 1000 or 1000000? */
- offset = (this_offset - last_offset)*1000 / cpu_khz;
- }
- return base + offset;
-}
-EXPORT_SYMBOL(monotonic_clock);
-
-static noinline void handle_lost_ticks(int lost, struct pt_regs *regs)
-{
- static long lost_count;
- static int warned;
- if (report_lost_ticks) {
- printk(KERN_WARNING "time.c: Lost %d timer tick(s)! ", lost);
- print_symbol("rip %s)\n", regs->rip);
- }
-
- if (lost_count == 1000 && !warned) {
- printk(KERN_WARNING "warning: many lost ticks.\n"
- KERN_WARNING "Your time source seems to be instable or "
- "some driver is hogging interupts\n");
- print_symbol("rip %s\n", regs->rip);
- if (vxtime.mode == VXTIME_TSC && vxtime.hpet_address) {
- printk(KERN_WARNING "Falling back to HPET\n");
- if (hpet_use_timer)
- vxtime.last = hpet_readl(HPET_T0_CMP) -
- hpet_tick;
- else
- vxtime.last = hpet_readl(HPET_COUNTER);
- vxtime.mode = VXTIME_HPET;
- do_gettimeoffset = do_gettimeoffset_hpet;
- }
- /* else should fall back to PIT, but code missing. */
- warned = 1;
- } else
- lost_count++;
-
-#ifdef CONFIG_CPU_FREQ
- /* In some cases the CPU can change frequency without us noticing
- Give cpufreq a change to catch up. */
- if ((lost_count+1) % 25 == 0)
- cpufreq_delayed_get();
-#endif
-}
-
void main_timer_handler(struct pt_regs *regs)
{
static unsigned long rtc_update = 0;
- unsigned long tsc;
- int delay = 0, offset = 0, lost = 0;
-
/*
* Here we are in the timer irq handler. We have irqs locally disabled (so we
* don't need spin_lock_irqsave()) but we don't know if the timer_bh is running
@@ -362,92 +167,11 @@ void main_timer_handler(struct pt_regs *
* variables, because both do_timer() and us change them -arca+vojtech
*/
- write_seqlock(&xtime_lock);
-
- if (vxtime.hpet_address)
- offset = hpet_readl(HPET_COUNTER);
-
- if (hpet_use_timer) {
- /* if we're using the hpet timer functionality,
- * we can more accurately know the counter value
- * when the timer interrupt occured.
- */
- offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
- delay = hpet_readl(HPET_COUNTER) - offset;
- } else if (!pmtmr_ioport) {
- spin_lock(&i8253_lock);
- outb_p(0x00, 0x43);
- delay = inb_p(0x40);
- delay |= inb(0x40) << 8;
- spin_unlock(&i8253_lock);
- delay = LATCH - 1 - delay;
- }
-
- tsc = get_cycles_sync();
-
- if (vxtime.mode == VXTIME_HPET) {
- if (offset - vxtime.last > hpet_tick) {
- lost = (offset - vxtime.last) / hpet_tick - 1;
- }
-
- monotonic_base +=
- (offset - vxtime.last) * NSEC_PER_TICK / hpet_tick;
-
- vxtime.last = offset;
-#ifdef CONFIG_X86_PM_TIMER
- } else if (vxtime.mode == VXTIME_PMTMR) {
- lost = pmtimer_mark_offset();
-#endif
- } else {
- offset = (((tsc - vxtime.last_tsc) *
- vxtime.tsc_quot) >> US_SCALE) - USEC_PER_TICK;
-
- if (offset < 0)
- offset = 0;
-
- if (offset > USEC_PER_TICK) {
- lost = offset / USEC_PER_TICK;
- offset %= USEC_PER_TICK;
- }
-
- /* FIXME: 1000 or 1000000? */
- monotonic_base += (tsc - vxtime.last_tsc) * 1000000 / cpu_khz;
-
- vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot;
-
- if ((((tsc - vxtime.last_tsc) *
- vxtime.tsc_quot) >> US_SCALE) < offset)
- vxtime.last_tsc = tsc -
- (((long) offset << US_SCALE) / vxtime.tsc_quot) - 1;
- }
-
- if (lost > 0) {
- handle_lost_ticks(lost, regs);
- jiffies += lost;
- }
-
/*
* Do the timer stuff.
*/
- do_timer(regs);
-#ifndef CONFIG_SMP
- update_process_times(user_mode(regs));
-#endif
-
-/*
- * In the SMP case we use the local APIC timer interrupt to do the profiling,
- * except when we simulate SMP mode on a uniprocessor system, in that case we
- * have to call the local interrupt handler.
- */
-
-#ifndef CONFIG_X86_LOCAL_APIC
- profile_tick(CPU_PROFILING, regs);
-#else
- if (!using_apic_timer)
- smp_local_timer_interrupt(regs);
-#endif
-
+ pit_clockevent.event_handler(regs);
/*
* If we have an externally synchronized Linux clock, then update CMOS clock
* accordingly every ~11 minutes. set_rtc_mmss() will be called in the jiffy
@@ -462,13 +186,10 @@ void main_timer_handler(struct pt_regs *
rtc_update = xtime.tv_sec + 660;
}
- write_sequnlock(&xtime_lock);
}
static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
- if (apic_runs_main_timer > 1)
- return IRQ_HANDLED;
main_timer_handler(regs);
#ifdef CONFIG_X86_LOCAL_APIC
if (using_apic_timer)
@@ -477,39 +198,6 @@ static irqreturn_t timer_interrupt(int i
return IRQ_HANDLED;
}
-static unsigned int cyc2ns_scale __read_mostly;
-
-static inline void set_cyc2ns_scale(unsigned long cpu_khz)
-{
- cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz;
-}
-
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
- return (cyc * cyc2ns_scale) >> NS_SCALE;
-}
-
-unsigned long long sched_clock(void)
-{
- unsigned long a = 0;
-
-#if 0
- /* Don't do a HPET read here. Using TSC always is much faster
- and HPET may not be mapped yet when the scheduler first runs.
- Disadvantage is a small drift between CPUs in some configurations,
- but that should be tolerable. */
- if (__vxtime.mode == VXTIME_HPET)
- return (hpet_readl(HPET_COUNTER) * vxtime.quot) >> US_SCALE;
-#endif
-
- /* Could do CPU core sync here. Opteron can execute rdtsc speculatively,
- which means it is not completely exact and may not be monotonous between
- CPUs. But the errors should be too small to matter for scheduling
- purposes. */
-
- rdtscll(a);
- return cycles_2_ns(a);
-}
static unsigned long get_cmos_time(void)
{
@@ -562,142 +250,6 @@ static unsigned long get_cmos_time(void)
return mktime(year, mon, day, hour, min, sec);
}
-#ifdef CONFIG_CPU_FREQ
-
-/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
- changes.
-
- RED-PEN: On SMP we assume all CPUs run with the same frequency. It's
- not that important because current Opteron setups do not support
- scaling on SMP anyroads.
-
- Should fix up last_tsc too. Currently gettimeofday in the
- first tick after the change will be slightly wrong. */
-
-#include
-
-static unsigned int cpufreq_delayed_issched = 0;
-static unsigned int cpufreq_init = 0;
-static struct work_struct cpufreq_delayed_get_work;
-
-static void handle_cpufreq_delayed_get(void *v)
-{
- unsigned int cpu;
- for_each_online_cpu(cpu) {
- cpufreq_get(cpu);
- }
- cpufreq_delayed_issched = 0;
-}
-
-/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
- * to verify the CPU frequency the timing core thinks the CPU is running
- * at is still correct.
- */
-static void cpufreq_delayed_get(void)
-{
- static int warned;
- if (cpufreq_init && !cpufreq_delayed_issched) {
- cpufreq_delayed_issched = 1;
- if (!warned) {
- warned = 1;
- printk(KERN_DEBUG
- "Losing some ticks... checking if CPU frequency changed.\n");
- }
- schedule_work(&cpufreq_delayed_get_work);
- }
-}
-
-static unsigned int ref_freq = 0;
-static unsigned long loops_per_jiffy_ref = 0;
-
-static unsigned long cpu_khz_ref = 0;
-
-static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
- void *data)
-{
- struct cpufreq_freqs *freq = data;
- unsigned long *lpj, dummy;
-
- if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC))
- return 0;
-
- lpj = &dummy;
- if (!(freq->flags & CPUFREQ_CONST_LOOPS))
-#ifdef CONFIG_SMP
- lpj = &cpu_data[freq->cpu].loops_per_jiffy;
-#else
- lpj = &boot_cpu_data.loops_per_jiffy;
-#endif
-
- if (!ref_freq) {
- ref_freq = freq->old;
- loops_per_jiffy_ref = *lpj;
- cpu_khz_ref = cpu_khz;
- }
- if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
- (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
- (val == CPUFREQ_RESUMECHANGE)) {
- *lpj =
- cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
-
- cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
- if (!(freq->flags & CPUFREQ_CONST_LOOPS))
- vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
- }
-
- set_cyc2ns_scale(cpu_khz_ref);
-
- return 0;
-}
-
-static struct notifier_block time_cpufreq_notifier_block = {
- .notifier_call = time_cpufreq_notifier
-};
-
-static int __init cpufreq_tsc(void)
-{
- INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
- if (!cpufreq_register_notifier(&time_cpufreq_notifier_block,
- CPUFREQ_TRANSITION_NOTIFIER))
- cpufreq_init = 1;
- return 0;
-}
-
-core_initcall(cpufreq_tsc);
-
-#endif
-
-/*
- * calibrate_tsc() calibrates the processor TSC in a very simple way, comparing
- * it to the HPET timer of known frequency.
- */
-
-#define TICK_COUNT 100000000
-
-static unsigned int __init hpet_calibrate_tsc(void)
-{
- int tsc_start, hpet_start;
- int tsc_now, hpet_now;
- unsigned long flags;
-
- local_irq_save(flags);
- local_irq_disable();
-
- hpet_start = hpet_readl(HPET_COUNTER);
- rdtscl(tsc_start);
-
- do {
- local_irq_disable();
- hpet_now = hpet_readl(HPET_COUNTER);
- tsc_now = get_cycles_sync();
- local_irq_restore(flags);
- } while ((tsc_now - tsc_start) < TICK_COUNT &&
- (hpet_now - hpet_start) < TICK_COUNT);
-
- return (tsc_now - tsc_start) * 1000000000L
- / ((hpet_now - hpet_start) * hpet_period / 1000);
-}
-
/*
* pit_calibrate_tsc() uses the speaker output (channel 2) of
@@ -728,137 +280,84 @@ static unsigned int __init pit_calibrate
return (end - start) / 50;
}
-#ifdef CONFIG_HPET
-static __init int late_hpet_init(void)
-{
- struct hpet_data hd;
- unsigned int ntimer;
-
- if (!vxtime.hpet_address)
- return 0;
-
- memset(&hd, 0, sizeof (hd));
-
- ntimer = hpet_readl(HPET_ID);
- ntimer = (ntimer & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT;
- ntimer++;
-
- /*
- * Register with driver.
- * Timer0 and Timer1 is used by platform.
- */
- hd.hd_phys_address = vxtime.hpet_address;
- hd.hd_address = (void __iomem *)fix_to_virt(FIX_HPET_BASE);
- hd.hd_nirqs = ntimer;
- hd.hd_flags = HPET_DATA_PLATFORM;
- hpet_reserve_timer(&hd, 0);
-#ifdef CONFIG_HPET_EMULATE_RTC
- hpet_reserve_timer(&hd, 1);
-#endif
- hd.hd_irq[0] = HPET_LEGACY_8254;
- hd.hd_irq[1] = HPET_LEGACY_RTC;
- if (ntimer > 2) {
- struct hpet *hpet;
- struct hpet_timer *timer;
- int i;
-
- hpet = (struct hpet *) fix_to_virt(FIX_HPET_BASE);
- timer = &hpet->hpet_timers[2];
- for (i = 2; i < ntimer; timer++, i++)
- hd.hd_irq[i] = (timer->hpet_config &
- Tn_INT_ROUTE_CNF_MASK) >>
- Tn_INT_ROUTE_CNF_SHIFT;
+#define PIT_MODE 0x43
+#define PIT_CH0 0x40
- }
+static void __init __pit_init(int val, u8 mode)
+{
+ unsigned long flags;
- hpet_alloc(&hd);
- return 0;
+ spin_lock_irqsave(&i8253_lock, flags);
+ outb_p(mode, PIT_MODE);
+ outb_p(val & 0xff, PIT_CH0); /* LSB */
+ outb_p(val >> 8, PIT_CH0); /* MSB */
+ spin_unlock_irqrestore(&i8253_lock, flags);
}
-fs_initcall(late_hpet_init);
-#endif
-static int hpet_timer_stop_set_go(unsigned long tick)
+static void init_pit_timer(int mode, struct clock_event *evt)
{
- unsigned int cfg;
-
-/*
- * Stop the timers and reset the main counter.
- */
+ unsigned long flags;
- cfg = hpet_readl(HPET_CFG);
- cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
- hpet_writel(cfg, HPET_CFG);
- hpet_writel(0, HPET_COUNTER);
- hpet_writel(0, HPET_COUNTER + 4);
+ spin_lock_irqsave(&i8253_lock, flags);
-/*
- * Set up timer 0, as periodic with first interrupt to happen at hpet_tick,
- * and period also hpet_tick.
- */
- if (hpet_use_timer) {
- hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
- HPET_TN_32BIT, HPET_T0_CFG);
- hpet_writel(hpet_tick, HPET_T0_CMP); /* next interrupt */
- hpet_writel(hpet_tick, HPET_T0_CMP); /* period */
- cfg |= HPET_CFG_LEGACY;
+ switch(mode) {
+ case CLOCK_EVT_PERIODIC:
+ /* binary, mode 2, LSB/MSB, ch 0 */
+ outb_p(0x34, PIT_MODE);
+ udelay(10);
+ outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
+ outb(LATCH >> 8 , PIT_CH0); /* MSB */
+ break;
+
+ case CLOCK_EVT_ONESHOT:
+ /* One shot setup */
+ outb_p(0x38, PIT_MODE);
+ udelay(10);
+ break;
+ case CLOCK_EVT_SHUTDOWN:
+ outb_p(0x30, PIT_MODE);
+ outb_p(0, PIT_CH0); /* LSB */
+ outb_p(0, PIT_CH0); /* MSB */
+ disable_irq(0);
+ break;
}
-/*
- * Go!
- */
-
- cfg |= HPET_CFG_ENABLE;
- hpet_writel(cfg, HPET_CFG);
-
- return 0;
+ spin_unlock_irqrestore(&i8253_lock, flags);
}
-static int hpet_init(void)
+static void pit_next_event(unsigned long delta, struct clock_event *evt)
{
- unsigned int id;
-
- if (!vxtime.hpet_address)
- return -1;
- set_fixmap_nocache(FIX_HPET_BASE, vxtime.hpet_address);
- __set_fixmap(VSYSCALL_HPET, vxtime.hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
-
-/*
- * Read the period, compute tick and quotient.
- */
-
- id = hpet_readl(HPET_ID);
-
- if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER))
- return -1;
-
- hpet_period = hpet_readl(HPET_PERIOD);
- if (hpet_period < 100000 || hpet_period > 100000000)
- return -1;
-
- hpet_tick = (FSEC_PER_TICK + hpet_period / 2) / hpet_period;
-
- hpet_use_timer = (id & HPET_ID_LEGSUP);
+ unsigned long flags;
- return hpet_timer_stop_set_go(hpet_tick);
+ spin_lock_irqsave(&i8253_lock, flags);
+ outb_p(delta & 0xff , PIT_CH0); /* LSB */
+ outb(delta >> 8 , PIT_CH0); /* MSB */
+ spin_unlock_irqrestore(&i8253_lock, flags);
}
-static int hpet_reenable(void)
+struct clock_event pit_clockevent = {
+ .name = "pit",
+ .capabilities = CLOCK_CAP_TICK | CLOCK_CAP_PROFILE | CLOCK_CAP_UPDATE
+#ifndef CONFIG_SMP
+ | CLOCK_CAP_NEXTEVT
+#endif
+ ,
+ .set_mode = init_pit_timer,
+ .set_next_event = pit_next_event,
+ .shift = 32,
+};
+
+void setup_pit_timer(void)
{
- return hpet_timer_stop_set_go(hpet_tick);
+ pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, 32);
+ pit_clockevent.max_delta_ns =
+ clockevent_delta2ns(0x7FFF, &pit_clockevent);
+ pit_clockevent.min_delta_ns =
+ clockevent_delta2ns(0xF, &pit_clockevent);
+ register_global_clockevent(&pit_clockevent);
}
-#define PIT_MODE 0x43
-#define PIT_CH0 0x40
-static void __init __pit_init(int val, u8 mode)
-{
- unsigned long flags;
- spin_lock_irqsave(&i8253_lock, flags);
- outb_p(mode, PIT_MODE);
- outb_p(val & 0xff, PIT_CH0); /* LSB */
- outb_p(val >> 8, PIT_CH0); /* MSB */
- spin_unlock_irqrestore(&i8253_lock, flags);
-}
void __init pit_init(void)
{
@@ -873,9 +372,9 @@ void __init pit_stop_interrupt(void)
void __init stop_timer_interrupt(void)
{
char *name;
- if (vxtime.hpet_address) {
+ if (hpet_address) {
name = "HPET";
- hpet_timer_stop_set_go(0);
+ hpet_stop();
} else {
name = "PIT";
pit_stop_interrupt();
@@ -890,119 +389,47 @@ int __init time_setup(char *str)
}
static struct irqaction irq0 = {
- timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL
+ timer_interrupt, IRQF_DISABLED | IRQF_NODELAY, CPU_MASK_NONE, "timer", NULL, NULL
};
void __init time_init(void)
{
char *timename;
- char *gtod;
if (nohpet)
- vxtime.hpet_address = 0;
-
+ hpet_address = 0;
xtime.tv_sec = get_cmos_time();
xtime.tv_nsec = 0;
set_normalized_timespec(&wall_to_monotonic,
-xtime.tv_sec, -xtime.tv_nsec);
- if (!hpet_init())
- vxtime_hz = (FSEC_PER_SEC + hpet_period / 2) / hpet_period;
- else
- vxtime.hpet_address = 0;
+ if (hpet_arch_init())
+ hpet_address = 0;
+
+ setup_pit_timer();
if (hpet_use_timer) {
/* set tick_nsec to use the proper rate for HPET */
tick_nsec = TICK_NSEC_HPET;
cpu_khz = hpet_calibrate_tsc();
timename = "HPET";
-#ifdef CONFIG_X86_PM_TIMER
- } else if (pmtmr_ioport && !vxtime.hpet_address) {
- vxtime_hz = PM_TIMER_FREQUENCY;
- timename = "PM";
- pit_init();
- cpu_khz = pit_calibrate_tsc();
-#endif
} else {
pit_init();
cpu_khz = pit_calibrate_tsc();
timename = "PIT";
}
- vxtime.mode = VXTIME_TSC;
- gtod = time_init_gtod();
+ if (unsynchronized_tsc())
+ mark_tsc_unstable();
- printk(KERN_INFO "time.c: Using %ld.%06ld MHz WALL %s GTOD %s timer.\n",
- vxtime_hz / 1000000, vxtime_hz % 1000000, timename, gtod);
printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
cpu_khz / 1000, cpu_khz % 1000);
- vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz;
- vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
- vxtime.last_tsc = get_cycles_sync();
setup_irq(0, &irq0);
set_cyc2ns_scale(cpu_khz);
}
-/*
- * Make an educated guess if the TSC is trustworthy and synchronized
- * over all CPUs.
- */
-__cpuinit int unsynchronized_tsc(void)
-{
-#ifdef CONFIG_SMP
- if (apic_is_clustered_box())
- return 1;
-#endif
- /* Most intel systems have synchronized TSCs except for
- multi node systems */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
-#ifdef CONFIG_ACPI
- /* But TSC doesn't tick in C3 so don't use it there */
- if (acpi_fadt.length > 0 && acpi_fadt.plvl3_lat < 100)
- return 1;
-#endif
- return 0;
- }
-
- /* Assume multi socket systems are not synchronized */
- return num_present_cpus() > 1;
-}
-
-/*
- * Decide what mode gettimeofday should use.
- */
-__init static char *time_init_gtod(void)
-{
- char *timetype;
-
- if (unsynchronized_tsc())
- notsc = 1;
- if (vxtime.hpet_address && notsc) {
- timetype = hpet_use_timer ? "HPET" : "PIT/HPET";
- if (hpet_use_timer)
- vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
- else
- vxtime.last = hpet_readl(HPET_COUNTER);
- vxtime.mode = VXTIME_HPET;
- do_gettimeoffset = do_gettimeoffset_hpet;
-#ifdef CONFIG_X86_PM_TIMER
- /* Using PM for gettimeofday is quite slow, but we have no other
- choice because the TSC is too unreliable on some systems. */
- } else if (pmtmr_ioport && !vxtime.hpet_address && notsc) {
- timetype = "PM";
- do_gettimeoffset = do_gettimeoffset_pm;
- vxtime.mode = VXTIME_PMTMR;
- sysctl_vsyscall = 0;
- printk(KERN_INFO "Disabling vsyscall due to use of PM timer\n");
-#endif
- } else {
- timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC";
- vxtime.mode = VXTIME_TSC;
- }
- return timetype;
-}
__setup("report_lost_ticks", time_setup);
@@ -1033,7 +460,7 @@ static int timer_resume(struct sys_devic
unsigned long ctime = get_cmos_time();
unsigned long sleep_length = (ctime - sleep_start) * HZ;
- if (vxtime.hpet_address)
+ if (hpet_address)
hpet_reenable();
else
i8254_timer_resume();
@@ -1042,21 +469,9 @@ static int timer_resume(struct sys_devic
write_seqlock_irqsave(&xtime_lock,flags);
xtime.tv_sec = sec;
xtime.tv_nsec = 0;
- if (vxtime.mode == VXTIME_HPET) {
- if (hpet_use_timer)
- vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
- else
- vxtime.last = hpet_readl(HPET_COUNTER);
-#ifdef CONFIG_X86_PM_TIMER
- } else if (vxtime.mode == VXTIME_PMTMR) {
- pmtimer_resume();
-#endif
- } else
- vxtime.last_tsc = get_cycles_sync();
- write_sequnlock_irqrestore(&xtime_lock,flags);
jiffies += sleep_length;
wall_jiffies += sleep_length;
- monotonic_base += sleep_length * (NSEC_PER_SEC/HZ);
+ write_sequnlock_irqrestore(&xtime_lock,flags);
touch_softlockup_watchdog();
return 0;
}
@@ -1083,243 +498,3 @@ static int time_init_device(void)
device_initcall(time_init_device);
-#ifdef CONFIG_HPET_EMULATE_RTC
-/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET
- * is enabled, we support RTC interrupt functionality in software.
- * RTC has 3 kinds of interrupts:
- * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock
- * is updated
- * 2) Alarm Interrupt - generate an interrupt at a specific time of day
- * 3) Periodic Interrupt - generate periodic interrupt, with frequencies
- * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2)
- * (1) and (2) above are implemented using polling at a frequency of
- * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt
- * overhead. (DEFAULT_RTC_INT_FREQ)
- * For (3), we use interrupts at 64Hz or user specified periodic
- * frequency, whichever is higher.
- */
-#include
-
-#define DEFAULT_RTC_INT_FREQ 64
-#define RTC_NUM_INTS 1
-
-static unsigned long UIE_on;
-static unsigned long prev_update_sec;
-
-static unsigned long AIE_on;
-static struct rtc_time alarm_time;
-
-static unsigned long PIE_on;
-static unsigned long PIE_freq = DEFAULT_RTC_INT_FREQ;
-static unsigned long PIE_count;
-
-static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */
-static unsigned int hpet_t1_cmp; /* cached comparator register */
-
-int is_hpet_enabled(void)
-{
- return vxtime.hpet_address != 0;
-}
-
-/*
- * Timer 1 for RTC, we do not use periodic interrupt feature,
- * even if HPET supports periodic interrupts on Timer 1.
- * The reason being, to set up a periodic interrupt in HPET, we need to
- * stop the main counter. And if we do that everytime someone diables/enables
- * RTC, we will have adverse effect on main kernel timer running on Timer 0.
- * So, for the time being, simulate the periodic interrupt in software.
- *
- * hpet_rtc_timer_init() is called for the first time and during subsequent
- * interuppts reinit happens through hpet_rtc_timer_reinit().
- */
-int hpet_rtc_timer_init(void)
-{
- unsigned int cfg, cnt;
- unsigned long flags;
-
- if (!is_hpet_enabled())
- return 0;
- /*
- * Set the counter 1 and enable the interrupts.
- */
- if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
- hpet_rtc_int_freq = PIE_freq;
- else
- hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
-
- local_irq_save(flags);
- cnt = hpet_readl(HPET_COUNTER);
- cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq);
- hpet_writel(cnt, HPET_T1_CMP);
- hpet_t1_cmp = cnt;
- local_irq_restore(flags);
-
- cfg = hpet_readl(HPET_T1_CFG);
- cfg &= ~HPET_TN_PERIODIC;
- cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
- hpet_writel(cfg, HPET_T1_CFG);
-
- return 1;
-}
-
-static void hpet_rtc_timer_reinit(void)
-{
- unsigned int cfg, cnt;
-
- if (unlikely(!(PIE_on | AIE_on | UIE_on))) {
- cfg = hpet_readl(HPET_T1_CFG);
- cfg &= ~HPET_TN_ENABLE;
- hpet_writel(cfg, HPET_T1_CFG);
- return;
- }
-
- if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
- hpet_rtc_int_freq = PIE_freq;
- else
- hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
-
- /* It is more accurate to use the comparator value than current count.*/
- cnt = hpet_t1_cmp;
- cnt += hpet_tick*HZ/hpet_rtc_int_freq;
- hpet_writel(cnt, HPET_T1_CMP);
- hpet_t1_cmp = cnt;
-}
-
-/*
- * The functions below are called from rtc driver.
- * Return 0 if HPET is not being used.
- * Otherwise do the necessary changes and return 1.
- */
-int hpet_mask_rtc_irq_bit(unsigned long bit_mask)
-{
- if (!is_hpet_enabled())
- return 0;
-
- if (bit_mask & RTC_UIE)
- UIE_on = 0;
- if (bit_mask & RTC_PIE)
- PIE_on = 0;
- if (bit_mask & RTC_AIE)
- AIE_on = 0;
-
- return 1;
-}
-
-int hpet_set_rtc_irq_bit(unsigned long bit_mask)
-{
- int timer_init_reqd = 0;
-
- if (!is_hpet_enabled())
- return 0;
-
- if (!(PIE_on | AIE_on | UIE_on))
- timer_init_reqd = 1;
-
- if (bit_mask & RTC_UIE) {
- UIE_on = 1;
- }
- if (bit_mask & RTC_PIE) {
- PIE_on = 1;
- PIE_count = 0;
- }
- if (bit_mask & RTC_AIE) {
- AIE_on = 1;
- }
-
- if (timer_init_reqd)
- hpet_rtc_timer_init();
-
- return 1;
-}
-
-int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec)
-{
- if (!is_hpet_enabled())
- return 0;
-
- alarm_time.tm_hour = hrs;
- alarm_time.tm_min = min;
- alarm_time.tm_sec = sec;
-
- return 1;
-}
-
-int hpet_set_periodic_freq(unsigned long freq)
-{
- if (!is_hpet_enabled())
- return 0;
-
- PIE_freq = freq;
- PIE_count = 0;
-
- return 1;
-}
-
-int hpet_rtc_dropped_irq(void)
-{
- if (!is_hpet_enabled())
- return 0;
-
- return 1;
-}
-
-irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs)
-{
- struct rtc_time curr_time;
- unsigned long rtc_int_flag = 0;
- int call_rtc_interrupt = 0;
-
- hpet_rtc_timer_reinit();
-
- if (UIE_on | AIE_on) {
- rtc_get_rtc_time(&curr_time);
- }
- if (UIE_on) {
- if (curr_time.tm_sec != prev_update_sec) {
- /* Set update int info, call real rtc int routine */
- call_rtc_interrupt = 1;
- rtc_int_flag = RTC_UF;
- prev_update_sec = curr_time.tm_sec;
- }
- }
- if (PIE_on) {
- PIE_count++;
- if (PIE_count >= hpet_rtc_int_freq/PIE_freq) {
- /* Set periodic int info, call real rtc int routine */
- call_rtc_interrupt = 1;
- rtc_int_flag |= RTC_PF;
- PIE_count = 0;
- }
- }
- if (AIE_on) {
- if ((curr_time.tm_sec == alarm_time.tm_sec) &&
- (curr_time.tm_min == alarm_time.tm_min) &&
- (curr_time.tm_hour == alarm_time.tm_hour)) {
- /* Set alarm int info, call real rtc int routine */
- call_rtc_interrupt = 1;
- rtc_int_flag |= RTC_AF;
- }
- }
- if (call_rtc_interrupt) {
- rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8));
- rtc_interrupt(rtc_int_flag, dev_id, regs);
- }
- return IRQ_HANDLED;
-}
-#endif
-
-static int __init nohpet_setup(char *s)
-{
- nohpet = 1;
- return 1;
-}
-
-__setup("nohpet", nohpet_setup);
-
-int __init notsc_setup(char *s)
-{
- notsc = 1;
- return 1;
-}
-
-__setup("notsc", notsc_setup);
Index: linux/arch/x86_64/kernel/traps.c
===================================================================
--- linux.orig/arch/x86_64/kernel/traps.c
+++ linux/arch/x86_64/kernel/traps.c
@@ -368,6 +368,7 @@ void show_trace(struct task_struct *tsk,
#undef HANDLE_STACK
printk("\n");
+ print_traces(tsk);
}
static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long * rsp)
@@ -497,7 +498,7 @@ void out_of_line_bug(void)
EXPORT_SYMBOL(out_of_line_bug);
#endif
-static DEFINE_SPINLOCK(die_lock);
+static DEFINE_RAW_SPINLOCK(die_lock);
static int die_owner = -1;
static unsigned int die_nest_count;
Index: linux/arch/x86_64/kernel/tsc.c
===================================================================
--- /dev/null
+++ linux/arch/x86_64/kernel/tsc.c
@@ -0,0 +1,229 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+
+#define NS_SCALE 10 /* 2^10, carefully chosen */
+#define US_SCALE 32 /* 2^32, arbitralrily chosen */
+
+static int notsc __initdata = 0;
+
+unsigned int cpu_khz; /* TSC clocks / usec, not used here */
+EXPORT_SYMBOL(cpu_khz);
+
+static unsigned int cyc2ns_scale __read_mostly;
+
+void set_cyc2ns_scale(unsigned long khz)
+{
+ cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / khz;
+}
+
+static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+{
+ return (cyc * cyc2ns_scale) >> NS_SCALE;
+}
+
+unsigned long long sched_clock(void)
+{
+ unsigned long a = 0;
+
+ /* Could do CPU core sync here. Opteron can execute rdtsc speculatively,
+ which means it is not completely exact and may not be monotonous between
+ CPUs. But the errors should be too small to matter for scheduling
+ purposes. */
+
+ rdtscll(a);
+ return cycles_2_ns(a);
+}
+
+static int tsc_unstable;
+
+static inline int check_tsc_unstable(void)
+{
+ return tsc_unstable;
+}
+
+void mark_tsc_unstable(void)
+{
+ tsc_unstable = 1;
+}
+EXPORT_SYMBOL_GPL(mark_tsc_unstable);
+
+#ifdef CONFIG_CPU_FREQ
+
+/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
+ changes.
+
+ RED-PEN: On SMP we assume all CPUs run with the same frequency. It's
+ not that important because current Opteron setups do not support
+ scaling on SMP anyroads.
+
+ Should fix up last_tsc too. Currently gettimeofday in the
+ first tick after the change will be slightly wrong. */
+
+#include
+
+static unsigned int cpufreq_delayed_issched = 0;
+static unsigned int cpufreq_init = 0;
+static struct work_struct cpufreq_delayed_get_work;
+
+static void handle_cpufreq_delayed_get(void *v)
+{
+ unsigned int cpu;
+ for_each_online_cpu(cpu) {
+ cpufreq_get(cpu);
+ }
+ cpufreq_delayed_issched = 0;
+}
+
+static unsigned int ref_freq = 0;
+static unsigned long loops_per_jiffy_ref = 0;
+
+static unsigned long cpu_khz_ref = 0;
+
+static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct cpufreq_freqs *freq = data;
+ unsigned long *lpj, dummy;
+
+ if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC))
+ return 0;
+
+ lpj = &dummy;
+ if (!(freq->flags & CPUFREQ_CONST_LOOPS))
+#ifdef CONFIG_SMP
+ lpj = &cpu_data[freq->cpu].loops_per_jiffy;
+#else
+ lpj = &boot_cpu_data.loops_per_jiffy;
+#endif
+
+ if (!ref_freq) {
+ ref_freq = freq->old;
+ loops_per_jiffy_ref = *lpj;
+ cpu_khz_ref = cpu_khz;
+ }
+ if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
+ (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
+ (val == CPUFREQ_RESUMECHANGE)) {
+ *lpj =
+ cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
+
+ cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
+ if (!(freq->flags & CPUFREQ_CONST_LOOPS))
+ mark_tsc_unstable();
+ }
+
+ set_cyc2ns_scale(cpu_khz_ref);
+
+ return 0;
+}
+
+static struct notifier_block time_cpufreq_notifier_block = {
+ .notifier_call = time_cpufreq_notifier
+};
+
+static int __init cpufreq_tsc(void)
+{
+ INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
+ if (!cpufreq_register_notifier(&time_cpufreq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER))
+ cpufreq_init = 1;
+ return 0;
+}
+
+core_initcall(cpufreq_tsc);
+
+#endif
+/*
+ * Make an educated guess if the TSC is trustworthy and synchronized
+ * over all CPUs.
+ */
+__cpuinit int unsynchronized_tsc(void)
+{
+#ifdef CONFIG_SMP
+ if (apic_is_clustered_box())
+ return 1;
+#endif
+ /* Most intel systems have synchronized TSCs except for
+ multi node systems */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
+#ifdef CONFIG_ACPI
+ /* But TSC doesn't tick in C3 so don't use it there */
+ if (acpi_fadt.length > 0 && acpi_fadt.plvl3_lat < 100)
+ return 1;
+#endif
+ return 0;
+ }
+
+ /* Assume multi socket systems are not synchronized */
+ return num_present_cpus() > 1;
+}
+
+int __init notsc_setup(char *s)
+{
+ notsc = 1;
+ return 1;
+}
+
+__setup("notsc", notsc_setup);
+
+
+/* clock source code: */
+
+static int tsc_update_callback(void);
+
+static cycle_t read_tsc(void)
+{
+ cycle_t ret = (cycle_t)get_cycles_sync();
+ return ret;
+}
+
+static cycle_t __vsyscall_fn vread_tsc(void)
+{
+ cycle_t ret = (cycle_t)get_cycles_sync();
+ return ret;
+}
+
+static struct clocksource clocksource_tsc = {
+ .name = "tsc",
+ .rating = 300,
+ .read = read_tsc,
+ .mask = (cycle_t)-1,
+ .mult = 0, /* to be set */
+ .shift = 22,
+ .update_callback = tsc_update_callback,
+ .is_continuous = 1,
+ .vread = vread_tsc,
+};
+
+static int tsc_update_callback(void)
+{
+ int change = 0;
+
+ /* check to see if we should switch to the safe clocksource: */
+ if (clocksource_tsc.rating != 50 && check_tsc_unstable()) {
+ clocksource_tsc.rating = 50;
+ clocksource_reselect();
+ change = 1;
+ }
+ return change;
+}
+
+static int __init init_tsc_clocksource(void)
+{
+ if (!notsc) {
+ clocksource_tsc.mult = clocksource_khz2mult(cpu_khz,
+ clocksource_tsc.shift);
+ return clocksource_register(&clocksource_tsc);
+ }
+ return 0;
+}
+
+module_init(init_tsc_clocksource);
Index: linux/arch/x86_64/kernel/vmlinux.lds.S
===================================================================
--- linux.orig/arch/x86_64/kernel/vmlinux.lds.S
+++ linux/arch/x86_64/kernel/vmlinux.lds.S
@@ -93,27 +93,11 @@ SECTIONS
__vsyscall_0 = VSYSCALL_VIRT_ADDR;
. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
- .xtime_lock : AT(VLOAD(.xtime_lock)) { *(.xtime_lock) }
- xtime_lock = VVIRT(.xtime_lock);
-
- .vxtime : AT(VLOAD(.vxtime)) { *(.vxtime) }
- vxtime = VVIRT(.vxtime);
-
- .wall_jiffies : AT(VLOAD(.wall_jiffies)) { *(.wall_jiffies) }
- wall_jiffies = VVIRT(.wall_jiffies);
-
- .sys_tz : AT(VLOAD(.sys_tz)) { *(.sys_tz) }
- sys_tz = VVIRT(.sys_tz);
-
- .sysctl_vsyscall : AT(VLOAD(.sysctl_vsyscall)) { *(.sysctl_vsyscall) }
- sysctl_vsyscall = VVIRT(.sysctl_vsyscall);
-
- .xtime : AT(VLOAD(.xtime)) { *(.xtime) }
- xtime = VVIRT(.xtime);
-
+ .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { *(.vsyscall_fn) }
. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
- .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) }
- jiffies = VVIRT(.jiffies);
+ .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) { *(.vsyscall_gtod_data) }
+ vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
+
.vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) { *(.vsyscall_1) }
.vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) { *(.vsyscall_2) }
Index: linux/arch/x86_64/kernel/vsyscall.c
===================================================================
--- linux.orig/arch/x86_64/kernel/vsyscall.c
+++ linux/arch/x86_64/kernel/vsyscall.c
@@ -26,65 +26,50 @@
#include
#include
#include
+#include
#include
#include
#include
+#include
#include
#include
#include
-#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
-
-int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
-seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
+#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) notrace
-#include
-
-static __always_inline void timeval_normalize(struct timeval * tv)
-{
- time_t __sec;
+struct vsyscall_gtod_data_t {
+ raw_seqlock_t lock;
+ int sysctl_enabled;
+ struct timeval wall_time_tv;
+ struct timezone sys_tz;
+ cycle_t offset_base;
+ struct clocksource clock;
+};
- __sec = tv->tv_usec / 1000000;
- if (__sec) {
- tv->tv_usec %= 1000000;
- tv->tv_sec += __sec;
- }
-}
+struct vsyscall_gtod_data_t __vsyscall_gtod_data __section_vsyscall_gtod_data = {
+ .lock = __RAW_SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
+ .sysctl_enabled = 1,
+};
-static __always_inline void do_vgettimeofday(struct timeval * tv)
+void update_vsyscall(struct timespec* wall_time, struct clocksource* clock)
{
- long sequence, t;
- unsigned long sec, usec;
+ unsigned long flags;
- do {
- sequence = read_seqbegin(&__xtime_lock);
-
- sec = __xtime.tv_sec;
- usec = (__xtime.tv_nsec / 1000) +
- (__jiffies - __wall_jiffies) * (1000000 / HZ);
-
- if (__vxtime.mode != VXTIME_HPET) {
- t = get_cycles_sync();
- if (t < __vxtime.last_tsc)
- t = __vxtime.last_tsc;
- usec += ((t - __vxtime.last_tsc) *
- __vxtime.tsc_quot) >> 32;
- /* See comment in x86_64 do_gettimeofday. */
- } else {
- usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) -
- __vxtime.last) * __vxtime.quot) >> 32;
- }
- } while (read_seqretry(&__xtime_lock, sequence));
+ write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
+ /* copy vsyscall data */
+ vsyscall_gtod_data.clock = *clock;
+ vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time->tv_sec;
+ vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time->tv_nsec/1000;
+ vsyscall_gtod_data.sys_tz = sys_tz;
- tv->tv_sec = sec + usec / 1000000;
- tv->tv_usec = usec % 1000000;
+ write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
}
/* RED-PEN may want to readd seq locking, but then the variable should be write-once. */
static __always_inline void do_get_tz(struct timezone * tz)
{
- *tz = __sys_tz;
+ *tz = __vsyscall_gtod_data.sys_tz;
}
static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
@@ -105,10 +90,44 @@ static __always_inline long time_syscall
return secs;
}
+static __always_inline void do_vgettimeofday(struct timeval * tv)
+{
+ cycle_t now, base, mask, cycle_delta;
+ unsigned long seq, mult, shift, nsec_delta;
+ cycle_t (*vread)(void);
+ do {
+ seq = read_seqbegin(&__vsyscall_gtod_data.lock);
+
+ vread = __vsyscall_gtod_data.clock.vread;
+ if (unlikely(!__vsyscall_gtod_data.sysctl_enabled || !vread)) {
+ gettimeofday(tv,0);
+ return;
+ }
+ now = vread();
+ base = __vsyscall_gtod_data.clock.cycle_last;
+ mask = __vsyscall_gtod_data.clock.mask;
+ mult = __vsyscall_gtod_data.clock.mult;
+ shift = __vsyscall_gtod_data.clock.shift;
+
+ *tv = __vsyscall_gtod_data.wall_time_tv;
+
+ } while (read_seqretry(&__vsyscall_gtod_data.lock, seq));
+
+ /* calculate interval: */
+ cycle_delta = (now - base) & mask;
+ /* convert to nsecs: */
+ nsec_delta = (cycle_delta * mult) >> shift;
+
+ /* convert to usecs and add to timespec: */
+ tv->tv_usec += nsec_delta / NSEC_PER_USEC;
+ while (tv->tv_usec > USEC_PER_SEC) {
+ tv->tv_sec += 1;
+ tv->tv_usec -= USEC_PER_SEC;
+ }
+}
+
int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
{
- if (!__sysctl_vsyscall)
- return gettimeofday(tv,tz);
if (tv)
do_vgettimeofday(tv);
if (tz)
@@ -120,11 +139,11 @@ int __vsyscall(0) vgettimeofday(struct t
* unlikely */
time_t __vsyscall(1) vtime(time_t *t)
{
- if (!__sysctl_vsyscall)
+ if (unlikely(!__vsyscall_gtod_data.sysctl_enabled))
return time_syscall(t);
else if (t)
- *t = __xtime.tv_sec;
- return __xtime.tv_sec;
+ *t = __vsyscall_gtod_data.wall_time_tv.tv_sec;
+ return __vsyscall_gtod_data.wall_time_tv.tv_sec;
}
long __vsyscall(2) venosys_0(void)
@@ -163,7 +182,7 @@ static int vsyscall_sysctl_change(ctl_ta
ret = -ENOMEM;
goto out;
}
- if (!sysctl_vsyscall) {
+ if (!vsyscall_gtod_data.sysctl_enabled) {
*map1 = SYSCALL;
*map2 = SYSCALL;
} else {
@@ -186,7 +205,7 @@ static int vsyscall_sysctl_nostrat(ctl_t
static ctl_table kernel_table2[] = {
{ .ctl_name = 99, .procname = "vsyscall64",
- .data = &sysctl_vsyscall, .maxlen = sizeof(int), .mode = 0644,
+ .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), .mode = 0644,
.strategy = vsyscall_sysctl_nostrat,
.proc_handler = vsyscall_sysctl_change },
{ 0, }
Index: linux/arch/x86_64/kernel/x8664_ksyms.c
===================================================================
--- linux.orig/arch/x86_64/kernel/x8664_ksyms.c
+++ linux/arch/x86_64/kernel/x8664_ksyms.c
@@ -12,10 +12,12 @@
EXPORT_SYMBOL(kernel_thread);
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_failed_interruptible);
-EXPORT_SYMBOL(__down_failed_trylock);
-EXPORT_SYMBOL(__up_wakeup);
+#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
+EXPORT_SYMBOL(__compat_down_failed);
+EXPORT_SYMBOL(__compat_down_failed_interruptible);
+EXPORT_SYMBOL(__compat_down_failed_trylock);
+EXPORT_SYMBOL(__compat_up_wakeup);
+#endif
EXPORT_SYMBOL(__get_user_1);
EXPORT_SYMBOL(__get_user_2);
Index: linux/arch/x86_64/lib/thunk.S
===================================================================
--- linux.orig/arch/x86_64/lib/thunk.S
+++ linux/arch/x86_64/lib/thunk.S
@@ -42,11 +42,13 @@
thunk rwsem_wake_thunk,rwsem_wake
thunk rwsem_downgrade_thunk,rwsem_downgrade_wake
#endif
-
- thunk __down_failed,__down
- thunk_retrax __down_failed_interruptible,__down_interruptible
- thunk_retrax __down_failed_trylock,__down_trylock
- thunk __up_wakeup,__up
+
+#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
+ thunk __compat_down_failed,__compat_down
+ thunk_retrax __compat_down_failed_interruptible,__compat_down_interruptible
+ thunk_retrax __compat_down_failed_trylock,__compat_down_trylock
+ thunk __compat_up_wakeup,__compat_up
+#endif
#ifdef CONFIG_TRACE_IRQFLAGS
thunk trace_hardirqs_on_thunk,trace_hardirqs_on
Index: linux/arch/x86_64/mm/fault.c
===================================================================
--- linux.orig/arch/x86_64/mm/fault.c
+++ linux/arch/x86_64/mm/fault.c
@@ -79,6 +79,7 @@ void bust_spinlocks(int yes)
{
int loglevel_save = console_loglevel;
if (yes) {
+ stop_trace();
oops_in_progress = 1;
} else {
#ifdef CONFIG_VT
Index: linux/arch/x86_64/mm/init.c
===================================================================
--- linux.orig/arch/x86_64/mm/init.c
+++ linux/arch/x86_64/mm/init.c
@@ -51,7 +51,7 @@ EXPORT_SYMBOL(dma_ops);
static unsigned long dma_reserve __initdata;
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers);
/*
* NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
Index: linux/block/cfq-iosched.c
===================================================================
--- linux.orig/block/cfq-iosched.c
+++ linux/block/cfq-iosched.c
@@ -1283,7 +1283,7 @@ static void cfq_exit_single_io_context(s
q = cfqd->queue;
- WARN_ON(!irqs_disabled());
+ WARN_ON_NONRT(!irqs_disabled());
spin_lock(q->queue_lock);
Index: linux/block/ll_rw_blk.c
===================================================================
--- linux.orig/block/ll_rw_blk.c
+++ linux/block/ll_rw_blk.c
@@ -1547,7 +1547,7 @@ static int ll_merge_requests_fn(request_
*/
void blk_plug_device(request_queue_t *q)
{
- WARN_ON(!irqs_disabled());
+ WARN_ON_NONRT(!irqs_disabled());
/*
* don't plug a stopped queue, it must be paired with blk_start_queue()
@@ -1570,7 +1570,7 @@ EXPORT_SYMBOL(blk_plug_device);
*/
int blk_remove_plug(request_queue_t *q)
{
- WARN_ON(!irqs_disabled());
+ WARN_ON_NONRT(!irqs_disabled());
if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
return 0;
@@ -3584,13 +3584,15 @@ void exit_io_context(void)
struct io_context *ioc;
struct cfq_io_context *cic;
- local_irq_save(flags);
+ // FIXME: unsafe upstream too?
+
+ local_irq_save_nort(flags);
task_lock(current);
ioc = current->io_context;
current->io_context = NULL;
ioc->task = NULL;
task_unlock(current);
- local_irq_restore(flags);
+ local_irq_restore_nort(flags);
if (ioc->aic && ioc->aic->exit)
ioc->aic->exit(ioc->aic);
Index: linux/drivers/acpi/executer/exmutex.c
===================================================================
--- linux.orig/drivers/acpi/executer/exmutex.c
+++ linux/drivers/acpi/executer/exmutex.c
@@ -267,9 +267,9 @@ acpi_ex_release_mutex(union acpi_operand
&& (obj_desc->mutex.os_mutex != ACPI_GLOBAL_LOCK)) {
ACPI_ERROR((AE_INFO,
"Thread %X cannot release Mutex [%4.4s] acquired by thread %X",
- (u32) walk_state->thread->thread_id,
+ (u32)(long) walk_state->thread->thread_id,
acpi_ut_get_node_name(obj_desc->mutex.node),
- (u32) obj_desc->mutex.owner_thread->thread_id));
+ (u32)(long) obj_desc->mutex.owner_thread->thread_id));
return_ACPI_STATUS(AE_AML_NOT_OWNER);
}
Index: linux/drivers/acpi/osl.c
===================================================================
--- linux.orig/drivers/acpi/osl.c
+++ linux/drivers/acpi/osl.c
@@ -676,13 +676,13 @@ void acpi_os_delete_lock(acpi_spinlock h
acpi_status
acpi_os_create_semaphore(u32 max_units, u32 initial_units, acpi_handle * handle)
{
- struct semaphore *sem = NULL;
+ struct compat_semaphore *sem = NULL;
- sem = acpi_os_allocate(sizeof(struct semaphore));
+ sem = acpi_os_allocate(sizeof(struct compat_semaphore));
if (!sem)
return AE_NO_MEMORY;
- memset(sem, 0, sizeof(struct semaphore));
+ memset(sem, 0, sizeof(struct compat_semaphore));
sema_init(sem, initial_units);
@@ -705,7 +705,7 @@ EXPORT_SYMBOL(acpi_os_create_semaphore);
acpi_status acpi_os_delete_semaphore(acpi_handle handle)
{
- struct semaphore *sem = (struct semaphore *)handle;
+ struct compat_semaphore *sem = (struct compat_semaphore *)handle;
if (!sem)
@@ -733,7 +733,7 @@ EXPORT_SYMBOL(acpi_os_delete_semaphore);
acpi_status acpi_os_wait_semaphore(acpi_handle handle, u32 units, u16 timeout)
{
acpi_status status = AE_OK;
- struct semaphore *sem = (struct semaphore *)handle;
+ struct compat_semaphore *sem = (struct compat_semaphore *)handle;
int ret = 0;
@@ -820,7 +820,7 @@ EXPORT_SYMBOL(acpi_os_wait_semaphore);
*/
acpi_status acpi_os_signal_semaphore(acpi_handle handle, u32 units)
{
- struct semaphore *sem = (struct semaphore *)handle;
+ struct compat_semaphore *sem = (struct compat_semaphore *)handle;
if (!sem || (units < 1))
Index: linux/drivers/acpi/processor_idle.c
===================================================================
--- linux.orig/drivers/acpi/processor_idle.c
+++ linux/drivers/acpi/processor_idle.c
@@ -38,9 +38,11 @@
#include
#include
#include /* need_resched() */
+#include
#include
#include
+#include
#include