Index: linux.prev/Documentation/DocBook/Makefile
===================================================================
--- linux.prev.orig/Documentation/DocBook/Makefile
+++ linux.prev/Documentation/DocBook/Makefile
@@ -10,7 +10,8 @@ DOCBOOKS := wanbook.xml z8530book.xml mc
kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
procfs-guide.xml writing_usb_driver.xml \
sis900.xml kernel-api.xml journal-api.xml lsm.xml usb.xml \
- gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml
+ gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
+ genericirq.xml
###
# The build process is as follows (targets):
Index: linux.prev/Documentation/DocBook/genericirq.tmpl
===================================================================
--- /dev/null
+++ linux.prev/Documentation/DocBook/genericirq.tmpl
@@ -0,0 +1,560 @@
+
+
+
+
+
+ Linux generic IRQ handling
+
+
+
+ Thomas
+ Gleixner
+
+
+ tglx@linutronix.de
+
+
+
+
+ Ingo
+ Molnar
+
+
+ mingo@elte.hu
+
+
+
+
+
+
+ 2005
+ Thomas Gleixner
+
+
+ 2005
+ Ingo Molnar
+
+
+
+
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License version 2 as published by the Free Software Foundation.
+
+
+
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+
+
+
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+
+
+
+ For more details see the file COPYING in the source
+ distribution of Linux.
+
+
+
+
+
+
+
+ Introduction
+
+ The generic interrupt handling layer is designed to provide a
+ complete abstraction of interrupt handling for device drivers
+ and is able to handle all different types of interrupt controller
+ hardware. Device drivers use generic API function to request, enable,
+ disable and free interrupts. The drivers do not have to know anything
+ about interrupt hardware, so they can be used on different hardware
+ platforms without code changes.
+
+
+ This documentation is provided for developers who want to implement
+ architecture interrupt support based on the Generic IRQ handling layer.
+
+
+
+
+ Rationale
+
+ The original implementation of interrupt handling in Linux is using
+ the __do_IRQ() super-handler, which must be able to deal with every
+ type of interrupt logic. This is achieved by an 'interrupt type'
+ structure and runtime flags to handle special cases.
+ Furthermore the superhandler assumed a certain type of interrupt
+ handling hardware and turned out to be not capable of handling all
+ kind of interrupt controller hardware which can be found through
+ the architectures. The all in one approach also adds unnecessary
+ complexity for every user.
+
+
+ Originally, Russell King identified different types of handlers to
+ build a quite universal set for the ARM interrupt handler
+ implementation in Linux 2.5/2.6. He distiguished between:
+
+ Level type
+ Edge type
+ Simple type
+
+ In the SMP world of the __do_IRQ() super-handler another type
+ was identified:
+
+ Per CPU type
+
+
+
+ This split implementation of handlers allows to optimize the flow
+ of the interrupt handling for each specific interrupt type.
+ This reduces complexitiy in that particular code path and allows
+ the optimized handling of a given type.
+
+
+ The original general implementation uses interrupt_type structures
+ to differentiate the flow control in the super-handler. This
+ leads to a mix of flow logic and code related to hardware details.
+ Russell Kings ARM implementation which replaced the type by a chip
+ abstraction did the mix the other way around.
+
+
+ The natural conclusion was a clean seperation of the 'type flow'
+ and the 'chip'. Analysing a couple of architecture implementations
+ reveals that many of them can use a generic set of 'type flow'
+ implementations and only need to add the chip level specific code.
+ The seperation is also valuable for the (sub)architectures,
+ which need specific quirks in the type flow itself, because it
+ provides a more transparent design.
+
+
+ Each interrupt type implementation has assigned its own flow
+ handler, which should be normally one of the generic
+ implementations. The flow handler implementation makes it
+ simple to provide demultiplexing handlers which can be found in
+ embedded platforms on various architectures.
+
+
+ The seperation makes the generic interrupt handling more flexible
+ and extensible. An (sub)architecture can use a generic type flow
+ implementation for e.g. 'level type' interrupts and add a
+ (sub)architecture specific 'edge type' implementation.
+
+
+ To make the transition to the new model easier and prevent the
+ breakage of existing implementations the __do_IRQ() super-handler
+ is still available. This leads to a kind of duality for the time
+ being. Over time the new model should achieve a homogeneous
+ implementation scheme over all architectures with enhanced
+ maintainability and cleanliness.
+
+
+
+ Known Bugs And Assumptions
+
+ None (hopefully).
+
+
+
+
+ Abstraction layers
+
+ There are three main levels of abstraction in the interrupt code:
+
+ Highlevel driver API
+ Abstract interrupt type
+ Chiplevel hardware encapsulation
+
+
+
+ The seperation of interrupt type and chip level functionality
+ provides the most flexible design. This implementation can handle
+ all kinds of interrupt hardware and the necessary workarounds for
+ the interrupt types without the need of redundant implementations.
+ The seperation handles also edge and level type interrupts
+ on the same hardware chip.
+
+
+ Interrupt control flow
+
+ Each interrupt is described by an interrupt description structure
+ irq_desc. The interrupt is referenced by an 'unsigned int' numeric
+ value which selects the corresponding interrupt decription structure
+ in the description structures array.
+ The description structure contains status information and pointers
+ to the interrupt type structure and the interrupt chip structure
+ which are assigned to this interrupt.
+
+
+ Whenever an interrupt triggers, the lowlevel arch code calls into
+ the generic interrupt code by calling desc->handler->handle_irq().
+ This highlevel IRQ handling function only uses other
+ desc->handler primitives which describe the control flow operation
+ necessary for the interrupt type. These operations are calling
+ the chip primitives referenced by the assigned chip description
+ structure.
+
+
+
+ Highlevel Driver API
+
+ The highlevel Driver API consists of following functions:
+
+ request_irq()
+ free_irq()
+ disable_irq()
+ enable_irq()
+ disable_irq_nosync() (SMP only)
+ synchronize_irq() (SMP only)
+ set_irq_type()
+ set_irq_wake()
+ set_irq_data()
+ set_irq_chip()
+ set_irq_chip_data()
+
+ See the autogenerated function documentation for details.
+
+
+
+ Abstract interrupt type
+
+ The 'interrupt type' (struct irq_type) abstraction mainly consists of
+ methods which implement the 'interrupt handling flow'. The generic
+ layer provides a set of pre-defined types:
+
+ default_level_type
+ default_edge_type
+ default_simple_type
+ default_percpu_type
+
+ The default type implementations use the generic type handlers.
+
+ handle_level_type
+ handle_edge_type
+ handle_simple_type
+ handle_percpu_type
+
+ The interrupt types (either predefined or architecture specific) are
+ assigned to specific interrupts by the architecture either during
+ bootup or during device initialization.
+
+
+ Default type implementations
+
+ Helper functions
+
+ The helper functions call the chip primitives and
+ are used by the default type implementations.
+ Following helper functions are implemented (simplified excerpt):
+
+default_enable(irq)
+{
+ desc->chip->unmask(irq);
+}
+
+default_disable(irq)
+{
+ desc->chip->mask(irq);
+}
+
+default_ack(irq)
+{
+ chip->ack(irq);
+}
+
+default_mask_ack(irq)
+{
+ if (chip->mask_ack) {
+ chip->mask_ack(irq);
+ } else {
+ chip->mask(irq);
+ chip->ack(irq);
+ }
+}
+
+noop(irq)
+{
+}
+
+default_set_type(irq, type)
+{
+ if (desc->chip->set_type) {
+ if (desc->chip->set_type(irq, type))
+ return NULL;
+ }
+
+ return default_handler for type;
+}
+
+
+
+
+ Default Level IRQ type
+
+ The default Level IRQ type implements the functions
+
+ enabledefault_enable
+ disabledefault_disable
+ startdefault_mask_ack
+ enddefault_enable
+ handle_irqhandle_level_irq
+ set_typedefault_set_type
+
+
+
+
+ Default Edge IRQ type
+
+ The default Edge IRQ type implements the functions
+
+ enabledefault_enable
+ disabledefault_disable
+ startdefault_ack
+ holddefault_mask_ack
+ endnoop
+ handle_irqhandle_edge_irq
+ set_typedefault_set_type
+
+
+
+
+ Default simple IRQ type
+
+ The default simple IRQ type implements the functions
+
+ enablenoop
+ disablenoop
+ handle_irqhandle_simple_irq
+
+
+
+
+ Default per CPU IRQ type
+
+ The default per CPU IRQ type implements the functions
+
+ enabledefault_enable
+ disabledefault_disable
+ startdefault_ack
+ enddefault_enable
+ handle_irqhandle_percpu_irq
+
+
+
+
+
+ Default type handler implementations
+
+ Default Level IRQ type handler
+
+ handle_level_type provides a generic implementation
+ for level type interrupts.
+
+
+ Following control flow is implemented (simplified excerpt):
+
+desc->handler->start();
+handle_IRQ_event(desc->action);
+desc->handler->end();
+
+
+
+
+ Default Edge IRQ type handler
+
+ handle_edge_type provides a generic implementation
+ for edge type interrupts.
+
+
+ Following control flow is implemented (simplified excerpt):
+
+if (desc->status & running) {
+ desc->handler->hold();
+ desc->status |= pending | masked;
+ return;
+}
+desc->handler->start();
+desc->status |= running;
+do {
+ if (desc->status & masked)
+ desc->handler->enable();
+ desc-status &= ~pending;
+ handle_IRQ_event(desc->action);
+} while (status & pending);
+desc-status &= ~running;
+desc->handler->end();
+
+
+
+
+ Default simple IRQ type handler
+
+ handle_simple_type provides a generic implementation
+ for simple type interrupts.
+
+
+ Note: The simple type handler does not call any
+ handler/chip primitives.
+
+
+ Following control flow is implemented (simplified excerpt):
+
+handle_IRQ_event(desc->action);
+
+
+
+
+ Default per CPU type handler
+
+ handle_percpu_type provides a generic implementation
+ for per CPU type interrupts.
+
+
+ Per CPU interrupts are only available on SMP and
+ the handler provides a simplified version without
+ locking.
+
+
+ Following control flow is implemented (simplified excerpt):
+
+desc->handler->start();
+handle_IRQ_event(desc->action);
+desc->handler->end();
+
+
+
+
+
+ Architecture specific type implementation
+
+ If an architecture needs to implement its own type structures, then
+ the following primitives have to be implemented:
+
+ handle_irq() - The handle_irq function pointer should preferably point to
+ one of the generic type handler functions
+ startup() - Optional
+ shutdown() - Optional
+ enable()
+ disable()
+ start()
+ hold() - For edge type interupts only
+ end()
+ set_type - Optional
+ set_affinity - SMP only
+
+
+
+
+ Quirks and optimizations
+
+ The generic functions are intended for 'clean' architectures and chips,
+ which have no platform-specific IRQ handling quirks. If an architecture
+ needs to implement quirks on the 'flow' level then it can do so by
+ overriding the irqtype. This is also done for compatibility reasons, as
+ most architectures use irqtypes only at the moment.
+
+
+ An architecture could implement all of its IRQ logic via pushing
+ chip handling details into the irqtype's ->start()/->end()/->hold()
+ functions. This is only recommended when the underlying primitives
+ are pure chip primitives without additional quirks. The direct pointer
+ to the chip functions reduces the indirection level by one.
+
+
+
+
+ Chiplevel hardware encapsulation
+
+ The chip level hardware description structure irq_chip
+ contains all the direct chip relevant functions, which
+ can be utilized by the irq_type implementations.
+
+ ack()
+ mask_ack() - Optional, recommended for performance
+ mask()
+ unmask()
+ retrigger() - Optional
+ set_type() - Optional
+ set_wake() - Optional
+
+ These primitives are strictly intended to mean what they say: ack means
+ ACK, masking means masking of an IRQ line, etc. It is up to the flow
+ handler(s) to use these basic units of lowlevel functionality.
+
+
+
+
+
+ __do_IRQ entry point
+
+ The original implementation __do_IRQ() is an alternative entry
+ point for all types of interrupts.
+
+
+ This handler turned out to be not suitable for all
+ interrupt hardware and was therefor reimplemented with split
+ functionality for egde/level/simple/percpu interrupts. This is not
+ only a functional optimization. It also shortenes code pathes for
+ interrupts.
+
+
+ To make use of the split implementation, replace the call to
+ __do_IRQ by a call to desc->handler->handle_irq() and associate
+ the appropriate handler function to desc->handler->handle_irq().
+ In most cases the generic type and handler implementations should
+ be sufficient.
+
+
+
+
+ Locking on SMP
+
+ The locking of chip registers is up to the architecture that
+ defines the chip primitives. There is a chip->lock field that can be used
+ for serialization, but the generic layer does not touch it. The per-irq
+ structure is protected via desc->lock, by the generic layer.
+
+
+
+ Structures
+
+ This chapter contains the autogenerated documentation of the structures which are
+ used in the generic IRQ layer.
+
+!Iinclude/linux/irq.h
+
+
+
+ Public Functions Provided
+
+ This chapter contains the autogenerated documentation of the kernel API functions
+ which are exported.
+
+!Ekernel/irq/manage.c
+
+
+
+ Internal Functions Provided
+
+ This chapter contains the autogenerated documentation of the internal functions.
+
+!Ikernel/irq/handle.c
+
+
+
+ Credits
+
+ The following people have contributed to this document:
+
+ Thomas Gleixnertglx@linutronix.de
+ Ingo Molnarmingo@elte.hu
+
+
+
+
Index: linux.prev/Documentation/DocBook/kernel-api.tmpl
===================================================================
--- linux.prev.orig/Documentation/DocBook/kernel-api.tmpl
+++ linux.prev/Documentation/DocBook/kernel-api.tmpl
@@ -54,6 +54,11 @@
!Ekernel/sched.c
!Ekernel/timer.c
+ High-resolution timers
+!Iinclude/linux/ktime.h
+!Iinclude/linux/hrtimer.h
+!Ekernel/hrtimer.c
+
Internal Functions
!Ikernel/exit.c
!Ikernel/signal.c
Index: linux.prev/Documentation/RCU/proc.txt
===================================================================
--- /dev/null
+++ linux.prev/Documentation/RCU/proc.txt
@@ -0,0 +1,207 @@
+/proc Filesystem Entries for RCU
+
+
+CONFIG_RCU_STATS
+
+The CONFIG_RCU_STATS config option is available only in conjunction with
+CONFIG_PREEMPT_RCU. It makes four /proc entries available, namely: rcuctrs,
+rcuptrs, rcugp, and rcustats.
+
+/proc/rcuctrs
+
+ CPU last cur
+ 0 1 1
+ 1 1 1
+ 2 1 1
+ 3 0 2
+ ggp = 230725
+
+This displays the number of processes that started RCU read-side critical
+sections on each CPU. In absence of preemption, the "last" and "cur"
+counts for a given CPU will always sum to one. Therefore, in the example
+output above, each CPU has started one RCU read-side critical section
+that was later preempted. The "last" column counts RCU read-side critical
+sections that started prior to the last counter flip, while the "cur"
+column counts critical sections that started after the last counter flip.
+
+The "ggp" count is a count of the number of counter flips since boot.
+Since this is shown as an odd number, the "cur" counts are stored in
+the zero-th element of each of the per-CPU arrays, and the "last" counts
+are stored in the first element of each of the per-CPU arrays.
+
+
+/proc/rcuptrs
+
+ nl=c04c7160/c04c7960 nt=c04c72d0
+ wl=c04c7168/c04c794c wt=c04c72bc dl=c04c7170/00000000 dt=c04c7170
+
+This displays the head and tail of each of CONFIG_PREEMPT_RCU's three
+callback lists. This will soon change to display this on a per-CPU
+basis, since each CPU will soon have its own set of callback lists.
+In the example above, the "next" list header is located at hex address
+0xc04c7160, the first element on the list at hex address 0xc04c7960,
+and the last element on the list at hex address 0xc04c72d0. The "wl="
+and "wt=" output is similar for the "wait" list, and the "dl=" and "dt="
+output for the "done" list. The "done" list is normally emptied very
+quickly after being filled, so will usually be empty as shown above.
+Note that the tail pointer points into the list header in this case.
+
+Callbacks are placed in the "next" list by call_rcu(), moved to the
+"wait" list after the next counter flip, and moved to the "done" list
+on the counter flip after that. Once on the "done" list, the callbacks
+are invoked.
+
+
+/proc/rcugp
+
+ oldggp=241419 newggp=241421
+
+This entry invokes synchronize_rcu() and prints out the number of counter
+flips since boot before and after the synchronize_rcu(). These two
+numbers will always differ by at least two. Unless RCU is broken. ;-)
+
+
+/proc/rcustats
+
+ ggp=242416 lgp=242416 sr=0 rcc=396233
+ na=2090938 nl=9 wa=2090929 wl=9 dl=0 dr=2090920 di=2090920
+ rtf1=22230730 rtf2=20139162 rtf3=242416 rtfe1=2085911 rtfe2=5657 rtfe3=19896746
+
+The quantities printed are as follows:
+
+o "ggp=": The number of flips since boot.
+
+o "lgp=": The number of flips sensed by the local structure since
+ boot. This will soon be per-CPU.
+
+o "sr=": The number of explicit call to synchronize_rcu().
+ Except that this is currently broken, so always reads as zero.
+ It is likely to be removed...
+
+o "rcc=": The number of calls to rcu_check_callbacks().
+
+o "na=": The number of callbacks that call_rcu() has registered
+ since boot.
+
+o "nl=": The number of callbacks currently on the "next" list.
+
+o "wa=": The number of callbacks that have moved to the "wait"
+ list since boot.
+
+o "wl=": The number of callbacks currently on the "wait" list.
+
+o "da=": The number of callbacks that have been moved to the
+ "done" list since boot.
+
+o "dl=": The number of callbacks currently on the "done" list.
+
+o "dr=": The number of callbacks that have been removed from the
+ "done" list since boot.
+
+o "di=": The number of callbacks that have been invoked after being
+ removed from the "done" list.
+
+o "rtf1=": The number of attempts to flip the counters.
+
+o "rtf2=": The number of attempts to flip the counters that successfully
+ acquired the fliplock.
+
+o "rtf3=": The number of successful counter flips.
+
+o "rtfe1=": The number of attempts to flip the counters that failed
+ due to the lock being held by someone else.
+
+o "rtfe2=": The number of attempts to flip the counters that were
+ abandoned due to someone else doing the job for us.
+
+o "rtfe3=": The number of attempts to flip the counters that failed
+ due to some task still being in an RCU read-side critical section
+ starting from before the last successful counter flip.
+
+
+CONFIG_RCU_TORTURE_TEST
+
+The CONFIG_RCU_TORTURE_TEST config option is available for all RCU
+implementations. It makes three /proc entries available, namely: rcutw,
+rcutr, and rcuts.
+
+
+/proc/rcutw
+
+Reading this entry starts a new torture test, or ends an earlier one
+if one is already in progress (in other words, there can be only one
+writer at a time). This sleeps uninterruptibly, so be sure to run
+it in the background. One could argue that it would be good to have
+multiple writers, but Linux uses RCU heavily enough that you will get
+write-side contention whether you want it or not. If you want additional
+write-side contention, repeatedly create and destroy several large file
+trees in parallel. Or use some other RCU-protected update.
+
+
+/proc/rcutr
+
+Reading this entry starts a new torture reader, which runs until sent
+a signal (e.g., control-C). If testing an RCU implementation with
+preemptible read-side critical sections, make sure to spawn at least
+two /proc/rcutr instances for each CPU.
+
+
+/proc/rcuts
+
+Displays the current state of the torture test:
+
+ ggp = 20961
+ rtc: c04496f4 ver: 8734 tfle: 0 rta: 8734 rtaf: 0 rtf: 8715
+ Reader Pipe: 88024120 63914 0 0 0 0 0 0 0 0 0
+ Reader Batch: 88024097 63937 0 0 0 0 0 0 0 0
+ Free-Block Circulation: 8733 8731 8729 8727 8725 8723 8721 8719 8717 8715 0
+
+The entries are as follows:
+
+o "ggp": The number of counter flips (or batches) since boot.
+
+o "rtc": The hexadecimal address of the structure currently visible
+ to readers.
+
+o "ver": The number of times since boot that the rcutw writer task
+ has changed the structure visible to readers.
+
+o "tfle": If non-zero, indicates that the "torture freelist"
+ containing structure to be placed into the "rtc" area is empty.
+ This condition is important, since it can fool you into thinking
+ that RCU is working when it is not. :-/
+
+o "rta": Number of structures allocated from the torture freelist.
+
+o "rtaf": Number of allocations from the torture freelist that have
+ failed due to the list being empty.
+
+o "rtf": Number of frees into the torture freelist.
+
+o "Reader Pipe": Histogram of "ages" of structures seen by readers.
+ If any entries past the first two are non-zero, RCU is broken.
+ And /proc/rcuts prints "!!!" to make sure you notice. The age
+ of a newly allocated structure is zero, it becomes one when
+ removed from reader visibility, and is incremented once per
+ grace period subsequently -- and is freed after passing through
+ (RCU_TORTURE_PIPE_LEN-2) grace periods.
+
+ The output displayed above was taken from a correctly working
+ RCU. If you want to see what it looks like when broken, break
+ it yourself. ;-)
+
+o "Reader Batch": Another histogram of "ages" of structures seen
+ by readers, but in terms of counter flips (or batches) rather
+ than in terms of grace periods. The legal number of non-zero
+ entries is again two. The reason for this separate view is
+ that it is easier to get the third entry to show up in the
+ "Reader Batch" list than in the "Reader Pipe" list.
+
+o "Free-Block Circulation": Shows the number of torture structures
+ that have reached a given point in the pipeline. The first element
+ should closely correspond to the number of structures allocated,
+ the second to the number that have been removed from reader view,
+ and all but the last remaining to the corresponding number of
+ passes through a grace period. The last entry should be zero,
+ as it is only incremented if a torture structure's counter
+ somehow gets incremented farther than it should.
Index: linux.prev/Documentation/hrtimers.txt
===================================================================
--- /dev/null
+++ linux.prev/Documentation/hrtimers.txt
@@ -0,0 +1,178 @@
+
+hrtimers - subsystem for high-resolution kernel timers
+----------------------------------------------------
+
+This patch introduces a new subsystem for high-resolution kernel timers.
+
+One might ask the question: we already have a timer subsystem
+(kernel/timers.c), why do we need two timer subsystems? After a lot of
+back and forth trying to integrate high-resolution and high-precision
+features into the existing timer framework, and after testing various
+such high-resolution timer implementations in practice, we came to the
+conclusion that the timer wheel code is fundamentally not suitable for
+such an approach. We initially didnt believe this ('there must be a way
+to solve this'), and spent a considerable effort trying to integrate
+things into the timer wheel, but we failed. In hindsight, there are
+several reasons why such integration is hard/impossible:
+
+- the forced handling of low-resolution and high-resolution timers in
+ the same way leads to a lot of compromises, macro magic and #ifdef
+ mess. The timers.c code is very "tightly coded" around jiffies and
+ 32-bitness assumptions, and has been honed and micro-optimized for a
+ relatively narrow use case (jiffies in a relatively narrow HZ range)
+ for many years - and thus even small extensions to it easily break
+ the wheel concept, leading to even worse compromises. The timer wheel
+ code is very good and tight code, there's zero problems with it in its
+ current usage - but it is simply not suitable to be extended for
+ high-res timers.
+
+- the unpredictable [O(N)] overhead of cascading leads to delays which
+ necessiate a more complex handling of high resolution timers, which
+ in turn decreases robustness. Such a design still led to rather large
+ timing inaccuracies. Cascading is a fundamental property of the timer
+ wheel concept, it cannot be 'designed out' without unevitably
+ degrading other portions of the timers.c code in an unacceptable way.
+
+- the implementation of the current posix-timer subsystem on top of
+ the timer wheel has already introduced a quite complex handling of
+ the required readjusting of absolute CLOCK_REALTIME timers at
+ settimeofday or NTP time - further underlying our experience by
+ example: that the timer wheel data structure is too rigid for high-res
+ timers.
+
+- the timer wheel code is most optimal for use cases which can be
+ identified as "timeouts". Such timeouts are usually set up to cover
+ error conditions in various I/O paths, such as networking and block
+ I/O. The vast majority of those timers never expire and are rarely
+ recascaded because the expected correct event arrives in time so they
+ can be removed from the timer wheel before any further processing of
+ them becomes necessary. Thus the users of these timeouts can accept
+ the granularity and precision tradeoffs of the timer wheel, and
+ largely expect the timer subsystem to have near-zero overhead.
+ Accurate timing for them is not a core purpose - in fact most of the
+ timeout values used are ad-hoc. For them it is at most a necessary
+ evil to guarantee the processing of actual timeout completions
+ (because most of the timeouts are deleted before completion), which
+ should thus be as cheap and unintrusive as possible.
+
+The primary users of precision timers are user-space applications that
+utilize nanosleep, posix-timers and itimer interfaces. Also, in-kernel
+users like drivers and subsystems which require precise timed events
+(e.g. multimedia) can benefit from the availability of a seperate
+high-resolution timer subsystem as well.
+
+While this subsystem does not offer high-resolution clock sources just
+yet, the hrtimer subsystem can be easily extended with high-resolution
+clock capabilities, and patches for that exist and are maturing quickly.
+The increasing demand for realtime and multimedia applications along
+with other potential users for precise timers gives another reason to
+separate the "timeout" and "precise timer" subsystems.
+
+Another potential benefit is that such a seperation allows even more
+special-purpose optimization of the existing timer wheel for the low
+resolution and low precision use cases - once the precision-sensitive
+APIs are separated from the timer wheel and are migrated over to
+hrtimers. E.g. we could decrease the frequency of the timeout subsystem
+from 250 Hz to 100 HZ (or even smaller).
+
+hrtimer subsystem implementation details
+----------------------------------------
+
+the basic design considerations were:
+
+- simplicity
+
+- data structure not bound to jiffies or any other granularity. All the
+ kernel logic works at 64-bit nanoseconds resolution - no compromises.
+
+- simplification of existing, timing related kernel code
+
+another basic requirement was the immediate enqueueing and ordering of
+timers at activation time. After looking at several possible solutions
+such as radix trees and hashes, we chose the red black tree as the basic
+data structure. Rbtrees are available as a library in the kernel and are
+used in various performance-critical areas of e.g. memory management and
+file systems. The rbtree is solely used for time sorted ordering, while
+a separate list is used to give the expiry code fast access to the
+queued timers, without having to walk the rbtree.
+
+(This seperate list is also useful for later when we'll introduce
+high-resolution clocks, where we need seperate pending and expired
+queues while keeping the time-order intact.)
+
+Time-ordered enqueueing is not purely for the purposes of
+high-resolution clocks though, it also simplifies the handling of
+absolute timers based on a low-resolution CLOCK_REALTIME. The existing
+implementation needed to keep an extra list of all armed absolute
+CLOCK_REALTIME timers along with complex locking. In case of
+settimeofday and NTP, all the timers (!) had to be dequeued, the
+time-changing code had to fix them up one by one, and all of them had to
+be enqueued again. The time-ordered enqueueing and the storage of the
+expiry time in absolute time units removes all this complex and poorly
+scaling code from the posix-timer implementation - the clock can simply
+be set without having to touch the rbtree. This also makes the handling
+of posix-timers simpler in general.
+
+The locking and per-CPU behavior of hrtimers was mostly taken from the
+existing timer wheel code, as it is mature and well suited. Sharing code
+was not really a win, due to the different data structures. Also, the
+hrtimer functions now have clearer behavior and clearer names - such as
+hrtimer_try_to_cancel() and hrtimer_cancel() [which are roughly
+equivalent to del_timer() and del_timer_sync()] - so there's no direct
+1:1 mapping between them on the algorithmical level, and thus no real
+potential for code sharing either.
+
+Basic data types: every time value, absolute or relative, is in a
+special nanosecond-resolution type: ktime_t. The kernel-internal
+representation of ktime_t values and operations is implemented via
+macros and inline functions, and can be switched between a "hybrid
+union" type and a plain "scalar" 64bit nanoseconds representation (at
+compile time). The hybrid union type optimizes time conversions on 32bit
+CPUs. This build-time-selectable ktime_t storage format was implemented
+to avoid the performance impact of 64-bit multiplications and divisions
+on 32bit CPUs. Such operations are frequently necessary to convert
+between the storage formats provided by kernel and userspace interfaces
+and the internal time format. (See include/linux/ktime.h for further
+details.)
+
+hrtimers - rounding of timer values
+-----------------------------------
+
+the hrtimer code will round timer events to lower-resolution clocks
+because it has to. Otherwise it will do no artificial rounding at all.
+
+one question is, what resolution value should be returned to the user by
+the clock_getres() interface. This will return whatever real resolution
+a given clock has - be it low-res, high-res, or artificially-low-res.
+
+hrtimers - testing and verification
+----------------------------------
+
+We used the high-resolution clock subsystem ontop of hrtimers to verify
+the hrtimer implementation details in praxis, and we also ran the posix
+timer tests in order to ensure specification compliance. We also ran
+tests on low-resolution clocks.
+
+The hrtimer patch converts the following kernel functionality to use
+hrtimers:
+
+ - nanosleep
+ - itimers
+ - posix-timers
+
+The conversion of nanosleep and posix-timers enabled the unification of
+nanosleep and clock_nanosleep.
+
+The code was successfully compiled for the following platforms:
+
+ i386, x86_64, ARM, PPC, PPC64, IA64
+
+The code was run-tested on the following platforms:
+
+ i386(UP/SMP), x86_64(UP/SMP), ARM, PPC
+
+hrtimers were also integrated into the -rt tree, along with a
+hrtimers-based high-resolution clock implementation, so the hrtimers
+code got a healthy amount of testing and use in practice.
+
+ Thomas Gleixner, Ingo Molnar
Index: linux.prev/Documentation/kernel-parameters.txt
===================================================================
--- linux.prev.orig/Documentation/kernel-parameters.txt
+++ linux.prev/Documentation/kernel-parameters.txt
@@ -52,6 +52,7 @@ restrictions referred to are that the re
MTD MTD support is enabled.
NET Appropriate network support is enabled.
NUMA NUMA support is enabled.
+ GENERIC_TIME The generic timeofday code is enabled.
NFS Appropriate NFS support is enabled.
OSS OSS sound support is enabled.
PARIDE The ParIDE subsystem is enabled.
@@ -329,10 +330,11 @@ running once the system is up.
Value can be changed at runtime via
/selinux/checkreqprot.
- clock= [BUGS=IA-32,HW] gettimeofday timesource override.
- Forces specified timesource (if avaliable) to be used
- when calculating gettimeofday(). If specicified
- timesource is not avalible, it defaults to PIT.
+ clock= [BUGS=IA-32, HW] gettimeofday clocksource override.
+ [Deprecated]
+ Forces specified clocksource (if avaliable) to be used
+ when calculating gettimeofday(). If specified
+ clocksource is not avalible, it defaults to PIT.
Format: { pit | tsc | cyclone | pmtmr }
hpet= [IA-32,HPET] option to disable HPET and use PIT.
@@ -1477,6 +1479,10 @@ running once the system is up.
time Show timing data prefixed to each printk message line
+ clocksource= [GENERIC_TIME] Override the default clocksource
+ Override the default clocksource and use the clocksource
+ with the name specified.
+
tipar.timeout= [HW,PPT]
Set communications timeout in tenths of a second
(default 15).
Index: linux.prev/Documentation/timekeeping.txt
===================================================================
--- /dev/null
+++ linux.prev/Documentation/timekeeping.txt
@@ -0,0 +1,350 @@
+How timekeeping works with CONFIG_GENERIC_TIME
+========================================================================
+
+The generic timekeeping code maintains and allows access to the systems
+understanding of how much time has passed from a certain point. However, in
+order to measure the passing of time, the generic timekeeping code relies on
+the clocksource abstraction. A clocksource abstracts a free running counter
+who's value increases at a known frequency.
+
+In the generic timekeeping code, we use a pointer to a selected clocksource to
+measure the passing of time.
+
+struct clocksource *clock
+
+The clocksource has some limitations however. Since its likely of fixed width,
+it will not increment forever and will overflow. In order to still properly
+keep time, we must occasionally accumulate an interval of time. In the generic
+timekeeping code, we accumulate the amount of time system the system booted
+into the value system_time, which keeps nanosecond resolution in a ktime_t
+storage.
+
+ktime_t system_time
+
+Since its likely your system has not been running continually since midnight on
+the 1st of January in 1970, we must provide an offset from that time in
+accordance with conventions. This only occasionally changed (via
+settimeofday()) offset is the wall_time_offset value, which is also stored as a
+ktime_t.
+
+ktime_t wall_time_offset
+
+
+Since we accumulate time in intervals, we need a base cycle value that we can
+use to generate an offset from the time value kept in system_time. We store
+this value in cycle_last.
+
+cycle_t cycle_last;
+
+
+Further since all clocks drift somewhat from each other, we use the adjustment
+values provided via adjtimex() to correct our clocksource frequency for each
+interval. This frequency adjustment value is stored in ntp_adj.
+
+long ntp_adj;
+
+Now that we've covered the core global variables for timekeeping, lets look at
+how we maintain these values.
+
+As stated above, we want to avoid the clocksource from overflowing on us, so we
+accumulate a time interval periodically. This periodic accumulation function is
+called timeofday_periodic_hook(). In simplified pseudo code, it logically is
+presented as:
+
+timeofday_periodic_hook():
+ cycle_now = read_clocksource(clock)
+ cycle_delta = (cycle_now - cycle_last) & clock->mask
+ nsec = cyc2ns(clock, cycle_delta, ntp_adj)
+ system_time += nsec
+ cycle_last = cycle_now
+
+ /* do other stuff */
+
+You can see we read the cycle value from the clocksource, calculate a cycle
+delta for the interval since we last called timeofday_periodic_hook(), convert
+that cycle delta to a nanosecond interval (for now ignore ntp_adj), add it to
+the system time and finally set our cycle_last value to cycle_now for the next
+interval. Using this simple algorithm we can correctly measure and record the
+passing of time.
+
+But just storing this info isn't very useful, we also want to make it available
+to be used elsewhere. So how do we provide a notion of how much time has passed
+inbetween calls to timeofday_periodic_hook()?
+
+First, lets create a function that calculates the time since the last call to
+timeofday_peridoic_hook().
+
+get_nsec_offset():
+ cycle_now = read_clocksource(clock)
+ cycle_delta = (cycle_now - cycle_last) & clock->mask
+ nsec = cyc2ns(clock, cycle_delta, ntp_adj)
+ return nsec
+
+Here you can see, we read the clocksource, calculate a cycle interval, and
+convert that to a nanosecond interval. Just like how it is done in
+timeofday_periodic_hook!
+
+Now lets use this function to provide the number of nanoseconds that the system
+has been running:
+
+do_monotonic_clock():
+ return system_time + get_nsec_offset()
+
+Here we trivially add the nanosecond offset since the last
+timeofday_periodic_hook() to the value of system_time which was stored at the
+last timeofday_periodic_hook().
+
+Note that since we use the same method to calculate time intervals, assuming
+each function is atomic and the clocksource functions as it should, time cannot
+go backward!
+
+Now to get the time of day using the standard convention:
+
+do_gettimeofday():
+ return do_monotonic_clock() + wall_time_offset
+
+We simply add the wall_time_offset, and we have the number of nanoseconds since
+1970 began!
+
+
+Of course, in real life, things are not so static. We have to handle a number
+of dynamic values that may change and affect timekeeping. In order to do these
+safely, we must only change values in-between intervals. This means the
+periodic_hook call must handle these changes.
+
+Since clocksources can be changed while the system is running, we need to check
+for and possibly switch to using new clocksources in the periodic_hook call.
+Further, clocksources may change their frequency. Since this must be done only
+at a safe point, we use the update_callback function pointer (for more details,
+see "How to write a clocksource driver" below), this too must be done
+in-between intervals in the periodic_hook call. Finally, since the ntp
+adjustment made in the cyc2ns conversion is not static, we need to update the
+ntp state machine and get a calculate a new adjustment value.
+
+This adds some extra pseudo code to the timeofday_periodic_hook function:
+
+timeofday_periodic_hook():
+ cycle_now = read_clocksource(clock)
+ cycle_delta = (cycle_now - cycle_last) & clock->mask
+ nsec = cyc2ns(clock, cycle_delta, ntp_adj)
+ system_time += nsec
+ cycle_last = cycle_now
+
+ next = get_next_clocksource()
+ if(next != clock):
+ cycle_last = read_clocksource(next)
+ clock = next
+
+ if(clock->update_callback):
+ clock->update_callback()
+
+ ntp_advance(nsec)
+ ppm = ntp_get_ppm_adjustment()
+ ntp_adj = ppm_to_mult_adj(clock, ppm)
+
+
+Unfortunately, the actual timeofday_periodic_hook code is not as simple as this
+pseudo code. For performance concerns, much has been done to pre-calculate
+values and use them repeatedly. Thus be aware that the code in timeofday.c is
+more complex, however the functional logic is the same.
+
+
+How to port an architecture to GENERIC_TIME
+========================================================================
+Porting an architecture to the GENERIC_TIME timekeeping code consists of moving
+a little bit of code around then deleting a fair amount. It is my hope that
+this will reduce the arch specific maintenance work around timekeeping.
+
+Porting an arch usually requires the following steps.
+
+1. Define CONFIG_GENERIC_TIME in the arches Kconfig
+2. Implementing the following functions
+ nsec_t read_persistent_clock(void)
+ void sync_persistent_clock(struct timespec ts)
+3. Removing all of the arch specific timekeeping code
+ do_gettimeofday()
+ do_settimeofday()
+ etc
+4. Implementing clocksource drivers
+ See "How to write a clocksource driver" for more details
+
+The exceptions to the above are:
+
+5. If the arch is has no continuous clocksource
+ A) Implement 1-3 in the above list.
+ B) Define CONFIG_IS_TICK_BASED in arches Kconfig
+ C) Implement the "long arch_getoffset(void)" function
+
+6. If the arch supports vsyscall gettimeofday (see x86_64 for reference)
+ A) Implement 1-4 in the above list
+ B) Define GENERIC_TIME_VSYSCALL
+ C) Implement arch_update_vsyscall_gtod()
+ D) Implement vsyscall gettimeofday (similar to __get_realtime_clock_ts)
+ E) Implement vread functions for supported clocksources
+
+
+
+How to write a clocksource driver.
+========================================================================
+First, a quick summary of what a clocksource driver provides.
+
+Simply put, a clocksource is a abstraction of a free running increasing
+counter. The abstraction provides the minimal amount of info for that counter
+to be usable for timekeeping. Those required values are:
+ 1. It's name
+ 2. A rating value for selection priority
+ 3. A read function pointer
+ 4. A mask value for correct twos-complement subtraction
+ 5. A mult and shift pair that approximate the counter frequency
+ mult/(2^shift) ~= nanoseconds per cycle
+
+Additionally, there are other optionally set values that allow for advanced
+functionality. Those values are:
+ 6. The update_callback function.
+ 7. The is_continuous flag.
+ 8. The vread function pointer
+ 9. The vdata pointer value
+
+
+Now lets go over these values in detail.
+
+1. Name.
+ The clocksource's name should be unique since it is used for both
+identification as well as for manually overriding the default clocksource
+selection. The name length must be shorter then 32 characters in order for it
+to be properly overrided.
+
+2. Rating value
+ This rating value is used as a priority value for clocksource
+selection. It has no direct connection to quality or physical properties of the
+clocksource, but is to be set and manipulated to guarantee that the best (by no
+specific metric) clocksource that will provide correct timekeeping is
+automatically selected. Rating suggestions can be found in
+include/linux/clocksource.h
+
+3. Read function pointer
+ This pointer should point to a function that returns an unsigned
+increasing cycle value from the clocksource. The value should have a coverage
+from zero to the maximum cycle value the clocksource can provide. This does not
+have to be direct hardware value and can also be a software counter. An example
+of a software counter is the jiffies clocksource.
+
+4. The mask value
+ This value should be the largest power of two that is smaller then the
+maximum cycle value. This allows twos complement subtraction to work on
+overflow boundary conditions if the max value is less then (cycle_t)-1. So for
+example, if we have a 16 bit counter (ie: one that loops to zero after
+0x0000FFFF), the mask would be 0xFFFF. So then when finding the cycle
+difference around a overflow, where now = 0x0013 and then = 0xFFEE, we can
+compute the cycle delta properly using the equation:
+ delta = (now - then)&mask
+ delta = (0x0013 - 0xFFEE) & 0xFFFF
+ delta = 0xFFFF0025 & 0xFFFF /* note the unmasked negative value */
+ delta = 0x25
+
+5. The mult and shift pair
+ These 32bit values approximate the nanosecond per cycle frequency of
+the clocksource using the equation: mult/(2^shift). If you have a khz or hz
+frequency value, the mult value for a given shift value can be easily
+calculated using the clocksource_hz2mult() and clocksource_khz2mult() helper
+functions. When selecting a shift value, it is important to be careful. Larger
+shift values give a finer precision in the cycle to nanosecond conversion and
+allows for more exact NTP adjustments. However if you select too large a shift
+value, the resulting mult value might overflow a cycle_t * mult computation.
+
+
+So if you have a simple hardware counter that does not change frequency,
+filling in the above should be sufficient for a functional clocksource. But
+read on for details on implementing a more complex clocksource.
+
+6. The update_callback function pointer.
+ If this function pointer is non-NULL, it will be called every periodic
+hook when it is safe for the clocksource to change its state. This would be
+necessary in the case where the counter frequency changes, for example. One
+user of this function pointer is the TSC clocksource. When the TSC frequency
+changes (which may occur if the cpu changes frequency) we need to notify the
+clocksource at a safe point where that state may change. Thus, if the TSC has
+changed frequency we set the new mult/shift values in the update_callback
+function.
+
+7. The is_continuous flag.
+ This flag variable (0 if false, 1 if true) denotes that the clocksource
+is continuous. This means that it is a purely hardware driven clocksource and
+is not dependent on any software code to run for it to increment properly. This
+denotation will be useful in the future when timer ticks may be disabled for
+long periods of time. Doing so using software clocksources, like the jiffies
+clocksource, would cause timekeeping problems.
+
+8. The vread function pointer.
+ This function pointer points to a user-space accessible function that
+reads the clocksource. This is used in userspace gettimeofday implementations
+to improve performance. See the x86-64 TSC clocksource implementation for an
+example.
+
+8. The vdata pointer.
+ This pointer is passed to the vread function pointer in a userspace
+gettimeofday implementation. Its usage is dependent on the vread
+implementation, but if the pointer points to data, that data must be readable
+from userspace.
+
+
+Now lets write a quick clocksource for an imaginary bit of hardware. Here are
+the specs:
+
+ A 32bit counter can be found at the MMIO address 0xFEEDF000. It runs at
+100Mhz. To enable it, the the low bit of the address 0xFEEDF0F0 must be set to
+one.
+
+So lets start out an empty cool-counter.c file, and define the clocksource.
+
+#include
+#include
+#include
+
+#define COOL_READ_PTR 0xFEEDF000
+#define COOL_START_PTR 0xFEEDF0F0
+
+static __iomem *cool_ptr = COOL_READ_PTR;
+
+struct clocksource clocksource_cool
+{
+ .name = "cool",
+ .rating = 200, /* its a pretty decent clock */
+ .mask = 0xFFFFFFFF, /* 32 bits */
+ .mult = 0, /*to be computed */
+ .shift = 10,
+}
+
+
+Now let's write the read function:
+
+cycle_t cool_counter_read(void)
+{
+ cycle_t ret = readl(cool_ptr);
+ return ret;
+}
+
+Finally, lets write the init function:
+
+void cool_counter_init(void)
+{
+ __iomem *ptr = COOL_START_PTR;
+ u32 val;
+
+ /* start the counter */
+ val = readl(ptr);
+ val |= 0x1;
+ writel(val, ptr);
+
+ /* finish initializing the clocksource */
+ clocksource_cool.read = cool_counter_read;
+ clocksource_cool.mult = clocksource_khz2mult(100000,
+ clocksource_cool.shift);
+
+ /* register the clocksource */
+ register_clocksource(&clocksource_cool);
+}
+module_init(cool_counter_init);
+
+
+Now wasn't that easy!
Index: linux.prev/Makefile
===================================================================
--- linux.prev.orig/Makefile
+++ linux.prev/Makefile
@@ -1,7 +1,7 @@
VERSION = 2
PATCHLEVEL = 6
SUBLEVEL = 15
-EXTRAVERSION =
+EXTRAVERSION =-rt21
NAME=Sliding Snow Leopard
# *DOCUMENTATION*
@@ -519,10 +519,14 @@ CFLAGS += $(call add-align,CONFIG_CC_AL
CFLAGS += $(call add-align,CONFIG_CC_ALIGN_LOOPS,-loops)
CFLAGS += $(call add-align,CONFIG_CC_ALIGN_JUMPS,-jumps)
-ifdef CONFIG_FRAME_POINTER
-CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,)
+ifdef CONFIG_MCOUNT
+CFLAGS += -pg -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,)
else
-CFLAGS += -fomit-frame-pointer
+ ifdef CONFIG_FRAME_POINTER
+ CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,)
+ else
+ CFLAGS += -fomit-frame-pointer
+ endif
endif
ifdef CONFIG_DEBUG_INFO
Index: linux.prev/arch/arm/Kconfig
===================================================================
--- linux.prev.orig/arch/arm/Kconfig
+++ linux.prev/arch/arm/Kconfig
@@ -50,6 +50,10 @@ config UID16
bool
default y
+config GENERIC_HARDIRQS
+ bool
+ default y
+
config RWSEM_GENERIC_SPINLOCK
bool
default y
@@ -368,18 +372,7 @@ config LOCAL_TIMERS
accounting to be spread across the timer interval, preventing a
"thundering herd" at every timer tick.
-config PREEMPT
- bool "Preemptible Kernel (EXPERIMENTAL)"
- depends on EXPERIMENTAL
- help
- This option reduces the latency of the kernel when reacting to
- real-time or interactive events by allowing a low priority process to
- be preempted even if it is in kernel mode executing a system call.
- This allows applications to run more reliably even when the system is
- under load.
-
- Say Y here if you are building a kernel for a desktop, embedded
- or real-time system. Say N if you are unsure.
+source kernel/Kconfig.preempt
config NO_IDLE_HZ
bool "Dynamic tick timer"
Index: linux.prev/arch/arm/boot/compressed/head.S
===================================================================
--- linux.prev.orig/arch/arm/boot/compressed/head.S
+++ linux.prev/arch/arm/boot/compressed/head.S
@@ -710,6 +710,19 @@ memdump: mov r12, r0
mov pc, r10
#endif
+#ifdef CONFIG_MCOUNT
+/* CONFIG_MCOUNT causes boot header to be built with -pg requiring this
+ * trampoline
+ */
+ .text
+ .align 0
+ .type mcount %function
+ .global mcount
+mcount:
+ mov pc, lr @ just return
+#endif
+
+
reloc_end:
.align
Index: linux.prev/arch/arm/boot/compressed/misc.c
===================================================================
--- linux.prev.orig/arch/arm/boot/compressed/misc.c
+++ linux.prev/arch/arm/boot/compressed/misc.c
@@ -199,6 +199,7 @@ static ulg free_mem_ptr_end;
#define HEAP_SIZE 0x2000
+#define ZLIB_INFLATE_NO_INFLATE_LOCK
#include "../../../../lib/inflate.c"
#ifndef STANDALONE_DEBUG
Index: linux.prev/arch/arm/common/dmabounce.c
===================================================================
--- linux.prev.orig/arch/arm/common/dmabounce.c
+++ linux.prev/arch/arm/common/dmabounce.c
@@ -404,11 +404,11 @@ dma_map_single(struct device *dev, void
BUG_ON(dir == DMA_NONE);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
dma_addr = map_single(dev, ptr, size, dir);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return dma_addr;
}
@@ -431,11 +431,11 @@ dma_unmap_single(struct device *dev, dma
BUG_ON(dir == DMA_NONE);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
unmap_single(dev, dma_addr, size, dir);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
int
@@ -450,7 +450,7 @@ dma_map_sg(struct device *dev, struct sc
BUG_ON(dir == DMA_NONE);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
for (i = 0; i < nents; i++, sg++) {
struct page *page = sg->page;
@@ -462,7 +462,7 @@ dma_map_sg(struct device *dev, struct sc
map_single(dev, ptr, length, dir);
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return nents;
}
@@ -479,7 +479,7 @@ dma_unmap_sg(struct device *dev, struct
BUG_ON(dir == DMA_NONE);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
for (i = 0; i < nents; i++, sg++) {
dma_addr_t dma_addr = sg->dma_address;
@@ -488,7 +488,7 @@ dma_unmap_sg(struct device *dev, struct
unmap_single(dev, dma_addr, length, dir);
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
void
@@ -500,11 +500,11 @@ dma_sync_single_for_cpu(struct device *d
dev_dbg(dev, "%s(ptr=%p,size=%d,dir=%x)\n",
__func__, (void *) dma_addr, size, dir);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
sync_single(dev, dma_addr, size, dir);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
void
@@ -516,11 +516,11 @@ dma_sync_single_for_device(struct device
dev_dbg(dev, "%s(ptr=%p,size=%d,dir=%x)\n",
__func__, (void *) dma_addr, size, dir);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
sync_single(dev, dma_addr, size, dir);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
void
@@ -535,7 +535,7 @@ dma_sync_sg_for_cpu(struct device *dev,
BUG_ON(dir == DMA_NONE);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
for (i = 0; i < nents; i++, sg++) {
dma_addr_t dma_addr = sg->dma_address;
@@ -544,7 +544,7 @@ dma_sync_sg_for_cpu(struct device *dev,
sync_single(dev, dma_addr, length, dir);
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
void
@@ -559,7 +559,7 @@ dma_sync_sg_for_device(struct device *de
BUG_ON(dir == DMA_NONE);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
for (i = 0; i < nents; i++, sg++) {
dma_addr_t dma_addr = sg->dma_address;
@@ -568,7 +568,7 @@ dma_sync_sg_for_device(struct device *de
sync_single(dev, dma_addr, length, dir);
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
static int
Index: linux.prev/arch/arm/common/locomo.c
===================================================================
--- linux.prev.orig/arch/arm/common/locomo.c
+++ linux.prev/arch/arm/common/locomo.c
@@ -425,6 +425,12 @@ static struct irqchip locomo_spi_chip =
.unmask = locomo_spi_unmask_irq,
};
+static DEFINE_IRQ_CHAINED_TYPE(locomo_handler);
+static DEFINE_IRQ_CHAINED_TYPE(locomo_key_handler);
+static DEFINE_IRQ_CHAINED_TYPE(locomo_gpio_handler);
+static DEFINE_IRQ_CHAINED_TYPE(locomo_lt_handler);
+static DEFINE_IRQ_CHAINED_TYPE(locomo_spi_handler);
+
static void locomo_setup_irq(struct locomo *lchip)
{
int irq;
Index: linux.prev/arch/arm/common/sa1111.c
===================================================================
--- linux.prev.orig/arch/arm/common/sa1111.c
+++ linux.prev/arch/arm/common/sa1111.c
@@ -171,11 +171,11 @@ sa1111_irq_handler(unsigned int irq, str
for (i = IRQ_SA1111_START; stat0; i++, stat0 >>= 1)
if (stat0 & 1)
- do_edge_IRQ(i, irq_desc + i, regs);
+ handle_edge_irq(i, irq_desc + i, regs);
for (i = IRQ_SA1111_START + 32; stat1; i++, stat1 >>= 1)
if (stat1 & 1)
- do_edge_IRQ(i, irq_desc + i, regs);
+ handle_edge_irq(i, irq_desc + i, regs);
/* For level-based interrupts */
desc->chip->unmask(irq);
@@ -380,6 +380,8 @@ static struct irqchip sa1111_high_chip =
.set_wake = sa1111_wake_highirq,
};
+static DEFINE_IRQ_CHAINED_TYPE(sa1111_irq_handler);
+
static void sa1111_setup_irq(struct sa1111 *sachip)
{
void __iomem *irqbase = sachip->base + SA1111_INTC;
Index: linux.prev/arch/arm/common/time-acorn.c
===================================================================
--- linux.prev.orig/arch/arm/common/time-acorn.c
+++ linux.prev/arch/arm/common/time-acorn.c
@@ -16,6 +16,7 @@
#include
#include
#include
+#include
#include
#include
@@ -76,7 +77,7 @@ ioc_timer_interrupt(int irq, void *dev_i
static struct irqaction ioc_timer_irq = {
.name = "timer",
- .flags = SA_INTERRUPT,
+ .flags = SA_INTERRUPT | SA_NODELAY,
.handler = ioc_timer_interrupt
};
Index: linux.prev/arch/arm/kernel/calls.S
===================================================================
--- linux.prev.orig/arch/arm/kernel/calls.S
+++ linux.prev/arch/arm/kernel/calls.S
@@ -7,11 +7,8 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
- * This file is included twice in entry-common.S
+ * NR_syscalls now defined in include/asm-arm/unistd.h - tglx
*/
-#ifndef NR_syscalls
-#define NR_syscalls 328
-#else
__syscall_start:
/* 0 */ .long sys_restart_syscall
@@ -341,4 +338,3 @@ __syscall_end:
.rept NR_syscalls - (__syscall_end - __syscall_start) / 4
.long sys_ni_syscall
.endr
-#endif
Index: linux.prev/arch/arm/kernel/dma.c
===================================================================
--- linux.prev.orig/arch/arm/kernel/dma.c
+++ linux.prev/arch/arm/kernel/dma.c
@@ -22,7 +22,7 @@
#include
-DEFINE_SPINLOCK(dma_spin_lock);
+DEFINE_RAW_SPINLOCK(dma_spin_lock);
#if MAX_DMA_CHANNELS > 0
Index: linux.prev/arch/arm/kernel/ecard.c
===================================================================
--- linux.prev.orig/arch/arm/kernel/ecard.c
+++ linux.prev/arch/arm/kernel/ecard.c
@@ -619,7 +619,7 @@ ecard_irqexp_handler(unsigned int irq, s
ecard_t *ec = slot_to_ecard(slot);
if (ec->claimed) {
- struct irqdesc *d = irqdesc + ec->irq;
+ struct irqdesc *d = irq_desc + ec->irq;
/*
* this ugly code is so that we can operate a
* prioritorising system:
@@ -1052,6 +1052,9 @@ ecard_probe(int slot, card_type_t type)
return rc;
}
+static DEFINE_IRQ_CHAINED_TYPE(ecard_irqexp_handler);
+static DEFINE_IRQ_CHAINED_TYPE(ecard_irq_handler);
+
/*
* Initialise the expansion card system.
* Locate all hardware - interrupt management and
@@ -1081,8 +1084,10 @@ static int __init ecard_init(void)
irqhw = ecard_probeirqhw();
- set_irq_chained_handler(IRQ_EXPANSIONCARD,
- irqhw ? ecard_irqexp_handler : ecard_irq_handler);
+ if (irqhw)
+ set_irq_chained_handler(IRQ_EXPANSIONCARD, ecard_irqexp_handler);
+ else
+ set_irq_chained_handler(IRQ_EXPANSIONCARD, ecard_irq_handler);
ecard_proc_init();
Index: linux.prev/arch/arm/kernel/entry-armv.S
===================================================================
--- linux.prev.orig/arch/arm/kernel/entry-armv.S
+++ linux.prev/arch/arm/kernel/entry-armv.S
@@ -192,7 +192,7 @@ __irq_svc:
irq_handler
#ifdef CONFIG_PREEMPT
ldr r0, [tsk, #TI_FLAGS] @ get flags
- tst r0, #_TIF_NEED_RESCHED
+ tst r0, #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED
blne svc_preempt
preempt_return:
ldr r0, [tsk, #TI_PREEMPT] @ read preempt value
@@ -219,7 +219,7 @@ svc_preempt:
str r7, [tsk, #TI_PREEMPT] @ expects preempt_count == 0
1: bl preempt_schedule_irq @ irq en/disable is done inside
ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
- tst r0, #_TIF_NEED_RESCHED
+ tst r0, #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED
beq preempt_return @ go again
b 1b
#endif
Index: linux.prev/arch/arm/kernel/entry-common.S
===================================================================
--- linux.prev.orig/arch/arm/kernel/entry-common.S
+++ linux.prev/arch/arm/kernel/entry-common.S
@@ -3,6 +3,8 @@
*
* Copyright (C) 2000 Russell King
*
+ * LATENCY_TRACE/mcount support (C) 2005 Timesys john.cooper@timesys.com
+ *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
@@ -41,7 +43,7 @@ ret_fast_syscall:
fast_work_pending:
str r0, [sp, #S_R0+S_OFF]! @ returned r0
work_pending:
- tst r1, #_TIF_NEED_RESCHED
+ tst r1, #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED
bne work_resched
tst r1, #_TIF_NOTIFY_RESUME | _TIF_SIGPENDING
beq no_work_pending
@@ -51,7 +53,8 @@ work_pending:
b ret_slow_syscall @ Check work again
work_resched:
- bl schedule
+ bl __schedule
+
/*
* "slow" syscall return path. "why" tells us if this was a real syscall.
*/
@@ -87,8 +90,6 @@ ENTRY(ret_from_fork)
b ret_slow_syscall
-#include "calls.S"
-
/*=============================================================================
* SWI handler
*-----------------------------------------------------------------------------
@@ -271,3 +272,110 @@ sys_mmap2:
str r5, [sp, #4]
b do_mmap2
#endif
+
+#ifdef CONFIG_FRAME_POINTER
+
+#ifdef CONFIG_MCOUNT
+/*
+ * At the point where we are in mcount() we maintain the
+ * frame of the prologue code and keep the call to mcount()
+ * out of the stack frame list:
+
+ saved pc <---\ caller of instrumented routine
+ saved lr |
+ ip/prev_sp |
+ fp -----^ |
+ : |
+ |
+ -> saved pc | instrumented routine
+ | saved lr |
+ | ip/prev_sp |
+ | fp ---------/
+ | :
+ |
+ | mcount
+ | saved pc
+ | saved lr
+ | ip/prev sp
+ -- fp
+ r3
+ r2
+ r1
+ sp-> r0
+ :
+ */
+
+ .text
+ .align 0
+ .type mcount %function
+ .global mcount
+
+/* gcc -pg generated FUNCTION_PROLOGUE references mcount()
+ * and has already created the stack frame invocation for
+ * the routine we have been called to instrument. We create
+ * a complete frame nevertheless, as we want to use the same
+ * call to mcount() from c code.
+ */
+mcount:
+
+ ldr ip, =mcount_enabled @ leave early, if disabled
+ ldr ip, [ip]
+ cmp ip, #0
+ moveq pc,lr
+
+ mov ip, sp
+ stmdb sp!, {r0 - r3, fp, ip, lr, pc} @ create stack frame
+
+ ldr r1, [fp, #-4] @ get lr (the return address
+ @ of the caller of the
+ @ instrumented function)
+ mov r0, lr @ get lr - (the return address
+ @ of the instrumented function)
+
+ sub fp, ip, #4 @ point fp at this frame
+
+ bl __trace
+1:
+ ldmdb fp, {r0 - r3, fp, sp, pc} @ pop entry frame and return
+
+#endif
+
+/* ARM replacement for unsupported gcc __builtin_return_address(n)
+ * where 0 < n. n == 0 is supported here as well.
+ *
+ * Walk up the stack frame until the desired frame is found or a NULL
+ * fp is encountered, return NULL in the latter case.
+ *
+ * Note: it is possible under code optimization for the stack invocation
+ * of an ancestor function (level N) to be removed before calling a
+ * descendant function (level N+1). No easy means is available to deduce
+ * this scenario with the result being [for example] caller_addr(0) when
+ * called from level N+1 returning level N-1 rather than the expected
+ * level N. This optimization issue appears isolated to the case of
+ * a call to a level N+1 routine made at the tail end of a level N
+ * routine -- the level N frame is deleted and a simple branch is made
+ * to the level N+1 routine.
+ */
+
+ .text
+ .align 0
+ .type arm_return_addr %function
+ .global arm_return_addr
+
+arm_return_addr:
+ mov ip, r0
+ mov r0, fp
+3:
+ cmp r0, #0
+ beq 1f @ frame list hit end, bail
+ cmp ip, #0
+ beq 2f @ reached desired frame
+ ldr r0, [r0, #-12] @ else continue, get next fp
+ sub ip, ip, #1
+ b 3b
+2:
+ ldr r0, [r0, #-4] @ get target return address
+1:
+ mov pc, lr
+
+#endif
Index: linux.prev/arch/arm/kernel/fiq.c
===================================================================
--- linux.prev.orig/arch/arm/kernel/fiq.c
+++ linux.prev/arch/arm/kernel/fiq.c
@@ -38,6 +38,7 @@
#include
#include
#include
+#include
#include
#include
@@ -88,7 +89,7 @@ void set_fiq_handler(void *start, unsign
* disable irqs for the duration. Note - these functions are almost
* entirely coded in assembly.
*/
-void __attribute__((naked)) set_fiq_regs(struct pt_regs *regs)
+void notrace __attribute__((naked)) set_fiq_regs(struct pt_regs *regs)
{
register unsigned long tmp;
asm volatile (
@@ -106,7 +107,7 @@ void __attribute__((naked)) set_fiq_regs
: "r" (®s->ARM_r8), "I" (PSR_I_BIT | PSR_F_BIT | FIQ_MODE));
}
-void __attribute__((naked)) get_fiq_regs(struct pt_regs *regs)
+void notrace __attribute__((naked)) get_fiq_regs(struct pt_regs *regs)
{
register unsigned long tmp;
asm volatile (
Index: linux.prev/arch/arm/kernel/init_task.c
===================================================================
--- linux.prev.orig/arch/arm/kernel/init_task.c
+++ linux.prev/arch/arm/kernel/init_task.c
@@ -12,8 +12,8 @@
#include
#include
-static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
+static struct fs_struct init_fs = INIT_FS(init_fs);
+static struct files_struct init_files = INIT_FILES(init_files);
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
Index: linux.prev/arch/arm/kernel/irq.c
===================================================================
--- linux.prev.orig/arch/arm/kernel/irq.c
+++ linux.prev/arch/arm/kernel/irq.c
@@ -27,6 +27,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -38,193 +39,11 @@
#include
#include
-#include
#include
-#include
#include
-/*
- * Maximum IRQ count. Currently, this is arbitary. However, it should
- * not be set too low to prevent false triggering. Conversely, if it
- * is set too high, then you could miss a stuck IRQ.
- *
- * Maybe we ought to set a timer and re-enable the IRQ at a later time?
- */
-#define MAX_IRQ_CNT 100000
-
-static int noirqdebug;
-static volatile unsigned long irq_err_count;
-static DEFINE_SPINLOCK(irq_controller_lock);
-static LIST_HEAD(irq_pending);
-
-struct irqdesc irq_desc[NR_IRQS];
void (*init_arch_irq)(void) __initdata = NULL;
-/*
- * No architecture-specific irq_finish function defined in arm/arch/irqs.h.
- */
-#ifndef irq_finish
-#define irq_finish(irq) do { } while (0)
-#endif
-
-/*
- * Dummy mask/unmask handler
- */
-void dummy_mask_unmask_irq(unsigned int irq)
-{
-}
-
-irqreturn_t no_action(int irq, void *dev_id, struct pt_regs *regs)
-{
- return IRQ_NONE;
-}
-
-void do_bad_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs)
-{
- irq_err_count += 1;
- printk(KERN_ERR "IRQ: spurious interrupt %d\n", irq);
-}
-
-static struct irqchip bad_chip = {
- .ack = dummy_mask_unmask_irq,
- .mask = dummy_mask_unmask_irq,
- .unmask = dummy_mask_unmask_irq,
-};
-
-static struct irqdesc bad_irq_desc = {
- .chip = &bad_chip,
- .handle = do_bad_IRQ,
- .pend = LIST_HEAD_INIT(bad_irq_desc.pend),
- .disable_depth = 1,
-};
-
-#ifdef CONFIG_SMP
-void synchronize_irq(unsigned int irq)
-{
- struct irqdesc *desc = irq_desc + irq;
-
- while (desc->running)
- barrier();
-}
-EXPORT_SYMBOL(synchronize_irq);
-
-#define smp_set_running(desc) do { desc->running = 1; } while (0)
-#define smp_clear_running(desc) do { desc->running = 0; } while (0)
-#else
-#define smp_set_running(desc) do { } while (0)
-#define smp_clear_running(desc) do { } while (0)
-#endif
-
-/**
- * disable_irq_nosync - disable an irq without waiting
- * @irq: Interrupt to disable
- *
- * Disable the selected interrupt line. Enables and disables
- * are nested. We do this lazily.
- *
- * This function may be called from IRQ context.
- */
-void disable_irq_nosync(unsigned int irq)
-{
- struct irqdesc *desc = irq_desc + irq;
- unsigned long flags;
-
- spin_lock_irqsave(&irq_controller_lock, flags);
- desc->disable_depth++;
- list_del_init(&desc->pend);
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-}
-EXPORT_SYMBOL(disable_irq_nosync);
-
-/**
- * disable_irq - disable an irq and wait for completion
- * @irq: Interrupt to disable
- *
- * Disable the selected interrupt line. Enables and disables
- * are nested. This functions waits for any pending IRQ
- * handlers for this interrupt to complete before returning.
- * If you use this function while holding a resource the IRQ
- * handler may need you will deadlock.
- *
- * This function may be called - with care - from IRQ context.
- */
-void disable_irq(unsigned int irq)
-{
- struct irqdesc *desc = irq_desc + irq;
-
- disable_irq_nosync(irq);
- if (desc->action)
- synchronize_irq(irq);
-}
-EXPORT_SYMBOL(disable_irq);
-
-/**
- * enable_irq - enable interrupt handling on an irq
- * @irq: Interrupt to enable
- *
- * Re-enables the processing of interrupts on this IRQ line.
- * Note that this may call the interrupt handler, so you may
- * get unexpected results if you hold IRQs disabled.
- *
- * This function may be called from IRQ context.
- */
-void enable_irq(unsigned int irq)
-{
- struct irqdesc *desc = irq_desc + irq;
- unsigned long flags;
-
- spin_lock_irqsave(&irq_controller_lock, flags);
- if (unlikely(!desc->disable_depth)) {
- printk("enable_irq(%u) unbalanced from %p\n", irq,
- __builtin_return_address(0));
- } else if (!--desc->disable_depth) {
- desc->probing = 0;
- desc->chip->unmask(irq);
-
- /*
- * If the interrupt is waiting to be processed,
- * try to re-run it. We can't directly run it
- * from here since the caller might be in an
- * interrupt-protected region.
- */
- if (desc->pending && list_empty(&desc->pend)) {
- desc->pending = 0;
- if (!desc->chip->retrigger ||
- desc->chip->retrigger(irq))
- list_add(&desc->pend, &irq_pending);
- }
- }
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-}
-EXPORT_SYMBOL(enable_irq);
-
-/*
- * Enable wake on selected irq
- */
-void enable_irq_wake(unsigned int irq)
-{
- struct irqdesc *desc = irq_desc + irq;
- unsigned long flags;
-
- spin_lock_irqsave(&irq_controller_lock, flags);
- if (desc->chip->set_wake)
- desc->chip->set_wake(irq, 1);
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-}
-EXPORT_SYMBOL(enable_irq_wake);
-
-void disable_irq_wake(unsigned int irq)
-{
- struct irqdesc *desc = irq_desc + irq;
- unsigned long flags;
-
- spin_lock_irqsave(&irq_controller_lock, flags);
- if (desc->chip->set_wake)
- desc->chip->set_wake(irq, 0);
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-}
-EXPORT_SYMBOL(disable_irq_wake);
-
int show_interrupts(struct seq_file *p, void *v)
{
int i = *(loff_t *) v, cpu;
@@ -243,7 +62,7 @@ int show_interrupts(struct seq_file *p,
}
if (i < NR_IRQS) {
- spin_lock_irqsave(&irq_controller_lock, flags);
+ spin_lock_irqsave(&irq_desc[i].lock, flags);
action = irq_desc[i].action;
if (!action)
goto unlock;
@@ -257,7 +76,7 @@ int show_interrupts(struct seq_file *p,
seq_putc(p, '\n');
unlock:
- spin_unlock_irqrestore(&irq_controller_lock, flags);
+ spin_unlock_irqrestore(&irq_desc[i].lock, flags);
} else if (i == NR_IRQS) {
#ifdef CONFIG_ARCH_ACORN
show_fiq_list(p, v);
@@ -266,374 +85,83 @@ unlock:
show_ipi_list(p);
show_local_irqs(p);
#endif
+#ifdef FIXME_TGLX
seq_printf(p, "Err: %10lu\n", irq_err_count);
- }
- return 0;
-}
-
-/*
- * IRQ lock detection.
- *
- * Hopefully, this should get us out of a few locked situations.
- * However, it may take a while for this to happen, since we need
- * a large number if IRQs to appear in the same jiffie with the
- * same instruction pointer (or within 2 instructions).
- */
-static int check_irq_lock(struct irqdesc *desc, int irq, struct pt_regs *regs)
-{
- unsigned long instr_ptr = instruction_pointer(regs);
-
- if (desc->lck_jif == jiffies &&
- desc->lck_pc >= instr_ptr && desc->lck_pc < instr_ptr + 8) {
- desc->lck_cnt += 1;
-
- if (desc->lck_cnt > MAX_IRQ_CNT) {
- printk(KERN_ERR "IRQ LOCK: IRQ%d is locking the system, disabled\n", irq);
- return 1;
- }
- } else {
- desc->lck_cnt = 0;
- desc->lck_pc = instruction_pointer(regs);
- desc->lck_jif = jiffies;
- }
- return 0;
-}
-
-static void
-report_bad_irq(unsigned int irq, struct pt_regs *regs, struct irqdesc *desc, int ret)
-{
- static int count = 100;
- struct irqaction *action;
-
- if (!count || noirqdebug)
- return;
-
- count--;
-
- if (ret != IRQ_HANDLED && ret != IRQ_NONE) {
- printk("irq%u: bogus retval mask %x\n", irq, ret);
- } else {
- printk("irq%u: nobody cared\n", irq);
- }
- show_regs(regs);
- dump_stack();
- printk(KERN_ERR "handlers:");
- action = desc->action;
- do {
- printk("\n" KERN_ERR "[<%p>]", action->handler);
- print_symbol(" (%s)", (unsigned long)action->handler);
- action = action->next;
- } while (action);
- printk("\n");
-}
-
-static int
-__do_irq(unsigned int irq, struct irqaction *action, struct pt_regs *regs)
-{
- unsigned int status;
- int ret, retval = 0;
-
- spin_unlock(&irq_controller_lock);
-
-#ifdef CONFIG_NO_IDLE_HZ
- if (!(action->flags & SA_TIMER) && system_timer->dyn_tick != NULL) {
- write_seqlock(&xtime_lock);
- if (system_timer->dyn_tick->state & DYN_TICK_ENABLED)
- system_timer->dyn_tick->handler(irq, 0, regs);
- write_sequnlock(&xtime_lock);
- }
#endif
-
- if (!(action->flags & SA_INTERRUPT))
- local_irq_enable();
-
- status = 0;
- do {
- ret = action->handler(irq, action->dev_id, regs);
- if (ret == IRQ_HANDLED)
- status |= action->flags;
- retval |= ret;
- action = action->next;
- } while (action);
-
- if (status & SA_SAMPLE_RANDOM)
- add_interrupt_randomness(irq);
-
- spin_lock_irq(&irq_controller_lock);
-
- return retval;
-}
-
-/*
- * This is for software-decoded IRQs. The caller is expected to
- * handle the ack, clear, mask and unmask issues.
- */
-void
-do_simple_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs)
-{
- struct irqaction *action;
- const unsigned int cpu = smp_processor_id();
-
- desc->triggered = 1;
-
- kstat_cpu(cpu).irqs[irq]++;
-
- smp_set_running(desc);
-
- action = desc->action;
- if (action) {
- int ret = __do_irq(irq, action, regs);
- if (ret != IRQ_HANDLED)
- report_bad_irq(irq, regs, desc, ret);
- }
-
- smp_clear_running(desc);
-}
-
-/*
- * Most edge-triggered IRQ implementations seem to take a broken
- * approach to this. Hence the complexity.
- */
-void
-do_edge_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs)
-{
- const unsigned int cpu = smp_processor_id();
-
- desc->triggered = 1;
-
- /*
- * If we're currently running this IRQ, or its disabled,
- * we shouldn't process the IRQ. Instead, turn on the
- * hardware masks.
- */
- if (unlikely(desc->running || desc->disable_depth))
- goto running;
-
- /*
- * Acknowledge and clear the IRQ, but don't mask it.
- */
- desc->chip->ack(irq);
-
- /*
- * Mark the IRQ currently in progress.
- */
- desc->running = 1;
-
- kstat_cpu(cpu).irqs[irq]++;
-
- do {
- struct irqaction *action;
-
- action = desc->action;
- if (!action)
- break;
-
- if (desc->pending && !desc->disable_depth) {
- desc->pending = 0;
- desc->chip->unmask(irq);
- }
-
- __do_irq(irq, action, regs);
- } while (desc->pending && !desc->disable_depth);
-
- desc->running = 0;
-
- /*
- * If we were disabled or freed, shut down the handler.
- */
- if (likely(desc->action && !check_irq_lock(desc, irq, regs)))
- return;
-
- running:
- /*
- * We got another IRQ while this one was masked or
- * currently running. Delay it.
- */
- desc->pending = 1;
- desc->chip->mask(irq);
- desc->chip->ack(irq);
-}
-
-/*
- * Level-based IRQ handler. Nice and simple.
- */
-void
-do_level_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs)
-{
- struct irqaction *action;
- const unsigned int cpu = smp_processor_id();
-
- desc->triggered = 1;
-
- /*
- * Acknowledge, clear _AND_ disable the interrupt.
- */
- desc->chip->ack(irq);
-
- if (likely(!desc->disable_depth)) {
- kstat_cpu(cpu).irqs[irq]++;
-
- smp_set_running(desc);
-
- /*
- * Return with this interrupt masked if no action
- */
- action = desc->action;
- if (action) {
- int ret = __do_irq(irq, desc->action, regs);
-
- if (ret != IRQ_HANDLED)
- report_bad_irq(irq, regs, desc, ret);
-
- if (likely(!desc->disable_depth &&
- !check_irq_lock(desc, irq, regs)))
- desc->chip->unmask(irq);
- }
-
- smp_clear_running(desc);
}
+ return 0;
}
-static void do_pending_irqs(struct pt_regs *regs)
-{
- struct list_head head, *l, *n;
-
- do {
- struct irqdesc *desc;
-
- /*
- * First, take the pending interrupts off the list.
- * The act of calling the handlers may add some IRQs
- * back onto the list.
- */
- head = irq_pending;
- INIT_LIST_HEAD(&irq_pending);
- head.next->prev = &head;
- head.prev->next = &head;
-
- /*
- * Now run each entry. We must delete it from our
- * list before calling the handler.
- */
- list_for_each_safe(l, n, &head) {
- desc = list_entry(l, struct irqdesc, pend);
- list_del_init(&desc->pend);
- desc_handle_irq(desc - irq_desc, desc, regs);
- }
-
- /*
- * The list must be empty.
- */
- BUG_ON(!list_empty(&head));
- } while (!list_empty(&irq_pending));
-}
+/* Handle bad interrupts */
+static struct irq_desc bad_irq = {
+ .handler = &no_irq_type,
+ .lock = RAW_SPIN_LOCK_UNLOCKED
+};
/*
- * do_IRQ handles all hardware IRQ's. Decoded IRQs should not
+ * asm_do_IRQ handles all hardware IRQ's. Decoded IRQs should not
* come via this function. Instead, they should provide their
* own 'handler'
*/
-asmlinkage void asm_do_IRQ(unsigned int irq, struct pt_regs *regs)
+asmlinkage notrace void asm_do_IRQ(unsigned int irq, struct pt_regs *regs)
{
struct irqdesc *desc = irq_desc + irq;
+ trace_special(instruction_pointer(regs), irq, 0);
+
/*
* Some hardware gives randomly wrong interrupts. Rather
* than crashing, do something sensible.
*/
if (irq >= NR_IRQS)
- desc = &bad_irq_desc;
+ desc = &bad_irq;
irq_enter();
- spin_lock(&irq_controller_lock);
- desc_handle_irq(irq, desc, regs);
- /*
- * Now re-run any pending interrupts.
- */
- if (!list_empty(&irq_pending))
- do_pending_irqs(regs);
-
- irq_finish(irq);
+ desc_handle_irq(irq, desc, regs);
- spin_unlock(&irq_controller_lock);
irq_exit();
}
-void __set_irq_handler(unsigned int irq, irq_handler_t handle, int is_chained)
+void __set_irq_handler(unsigned int irq, struct irq_type *type, int is_chained)
{
struct irqdesc *desc;
unsigned long flags;
if (irq >= NR_IRQS) {
- printk(KERN_ERR "Trying to install handler for IRQ%d\n", irq);
+ printk(KERN_ERR "Trying to install type control for IRQ%d\n", irq);
return;
}
- if (handle == NULL)
- handle = do_bad_IRQ;
-
desc = irq_desc + irq;
- if (is_chained && desc->chip == &bad_chip)
- printk(KERN_WARNING "Trying to install chained handler for IRQ%d\n", irq);
-
- spin_lock_irqsave(&irq_controller_lock, flags);
- if (handle == do_bad_IRQ) {
- desc->chip->mask(irq);
- desc->chip->ack(irq);
- desc->disable_depth = 1;
- }
- desc->handle = handle;
- if (handle != do_bad_IRQ && is_chained) {
- desc->valid = 0;
- desc->probe_ok = 0;
- desc->disable_depth = 0;
- desc->chip->unmask(irq);
+ /* Uninstall ? */
+ if (type == NULL || type == &no_irq_type) {
+ spin_lock_irqsave(&desc->lock, flags);
+ if (desc->chip) {
+ desc->chip->mask(irq);
+ desc->chip->ack(irq);
+ }
+ desc->depth = 1;
+ spin_unlock_irqrestore(&desc->lock, flags);
}
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-}
-void set_irq_chip(unsigned int irq, struct irqchip *chip)
-{
- struct irqdesc *desc;
- unsigned long flags;
-
- if (irq >= NR_IRQS) {
- printk(KERN_ERR "Trying to install chip for IRQ%d\n", irq);
+ /* Install the irq_type */
+ if (generic_set_irq_type(irq, type))
return;
- }
-
- if (chip == NULL)
- chip = &bad_chip;
-
- desc = irq_desc + irq;
- spin_lock_irqsave(&irq_controller_lock, flags);
- desc->chip = chip;
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-}
-int set_irq_type(unsigned int irq, unsigned int type)
-{
- struct irqdesc *desc;
- unsigned long flags;
- int ret = -ENXIO;
+ spin_lock_irqsave(&desc->lock, flags);
+ if (is_chained && (desc->handler == &no_irq_type || !desc->chip))
+ printk(KERN_WARNING "Trying to install chained interrupt type for IRQ%d\n", irq);
- if (irq >= NR_IRQS) {
- printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq);
- return -ENODEV;
- }
-
- desc = irq_desc + irq;
- if (desc->chip->set_type) {
- spin_lock_irqsave(&irq_controller_lock, flags);
- ret = desc->chip->set_type(irq, type);
- spin_unlock_irqrestore(&irq_controller_lock, flags);
+ if (type != NULL && is_chained) {
+ desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE;
+ desc->depth = 0;
+ if (desc->chip)
+ desc->chip->unmask(irq);
}
-
- return ret;
+ spin_unlock_irqrestore(&desc->lock, flags);
}
-EXPORT_SYMBOL(set_irq_type);
void set_irq_flags(unsigned int irq, unsigned int iflags)
{
@@ -646,400 +174,28 @@ void set_irq_flags(unsigned int irq, uns
}
desc = irq_desc + irq;
- spin_lock_irqsave(&irq_controller_lock, flags);
- desc->valid = (iflags & IRQF_VALID) != 0;
- desc->probe_ok = (iflags & IRQF_PROBE) != 0;
- desc->noautoenable = (iflags & IRQF_NOAUTOEN) != 0;
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-}
-
-int setup_irq(unsigned int irq, struct irqaction *new)
-{
- int shared = 0;
- struct irqaction *old, **p;
- unsigned long flags;
- struct irqdesc *desc;
-
- /*
- * Some drivers like serial.c use request_irq() heavily,
- * so we have to be careful not to interfere with a
- * running system.
- */
- if (new->flags & SA_SAMPLE_RANDOM) {
- /*
- * This function might sleep, we want to call it first,
- * outside of the atomic block.
- * Yes, this might clear the entropy pool if the wrong
- * driver is attempted to be loaded, without actually
- * installing a new handler, but is this really a problem,
- * only the sysadmin is able to do this.
- */
- rand_initialize_irq(irq);
- }
-
- /*
- * The following block of code has to be executed atomically
- */
- desc = irq_desc + irq;
- spin_lock_irqsave(&irq_controller_lock, flags);
- p = &desc->action;
- if ((old = *p) != NULL) {
- /* Can't share interrupts unless both agree to */
- if (!(old->flags & new->flags & SA_SHIRQ)) {
- spin_unlock_irqrestore(&irq_controller_lock, flags);
- return -EBUSY;
- }
-
- /* add new interrupt at end of irq queue */
- do {
- p = &old->next;
- old = *p;
- } while (old);
- shared = 1;
- }
-
- *p = new;
-
- if (!shared) {
- desc->probing = 0;
- desc->running = 0;
- desc->pending = 0;
- desc->disable_depth = 1;
- if (!desc->noautoenable) {
- desc->disable_depth = 0;
- desc->chip->unmask(irq);
- }
- }
-
- spin_unlock_irqrestore(&irq_controller_lock, flags);
- return 0;
-}
-
-/**
- * request_irq - allocate an interrupt line
- * @irq: Interrupt line to allocate
- * @handler: Function to be called when the IRQ occurs
- * @irqflags: Interrupt type flags
- * @devname: An ascii name for the claiming device
- * @dev_id: A cookie passed back to the handler function
- *
- * This call allocates interrupt resources and enables the
- * interrupt line and IRQ handling. From the point this
- * call is made your handler function may be invoked. Since
- * your handler function must clear any interrupt the board
- * raises, you must take care both to initialise your hardware
- * and to set up the interrupt handler in the right order.
- *
- * Dev_id must be globally unique. Normally the address of the
- * device data structure is used as the cookie. Since the handler
- * receives this value it makes sense to use it.
- *
- * If your interrupt is shared you must pass a non NULL dev_id
- * as this is required when freeing the interrupt.
- *
- * Flags:
- *
- * SA_SHIRQ Interrupt is shared
- *
- * SA_INTERRUPT Disable local interrupts while processing
- *
- * SA_SAMPLE_RANDOM The interrupt can be used for entropy
- *
- */
-int request_irq(unsigned int irq, irqreturn_t (*handler)(int, void *, struct pt_regs *),
- unsigned long irq_flags, const char * devname, void *dev_id)
-{
- unsigned long retval;
- struct irqaction *action;
-
- if (irq >= NR_IRQS || !irq_desc[irq].valid || !handler ||
- (irq_flags & SA_SHIRQ && !dev_id))
- return -EINVAL;
-
- action = (struct irqaction *)kmalloc(sizeof(struct irqaction), GFP_KERNEL);
- if (!action)
- return -ENOMEM;
-
- action->handler = handler;
- action->flags = irq_flags;
- cpus_clear(action->mask);
- action->name = devname;
- action->next = NULL;
- action->dev_id = dev_id;
-
- retval = setup_irq(irq, action);
-
- if (retval)
- kfree(action);
- return retval;
-}
-
-EXPORT_SYMBOL(request_irq);
-
-/**
- * free_irq - free an interrupt
- * @irq: Interrupt line to free
- * @dev_id: Device identity to free
- *
- * Remove an interrupt handler. The handler is removed and if the
- * interrupt line is no longer in use by any driver it is disabled.
- * On a shared IRQ the caller must ensure the interrupt is disabled
- * on the card it drives before calling this function.
- *
- * This function must not be called from interrupt context.
- */
-void free_irq(unsigned int irq, void *dev_id)
-{
- struct irqaction * action, **p;
- unsigned long flags;
-
- if (irq >= NR_IRQS || !irq_desc[irq].valid) {
- printk(KERN_ERR "Trying to free IRQ%d\n",irq);
- dump_stack();
- return;
- }
-
- spin_lock_irqsave(&irq_controller_lock, flags);
- for (p = &irq_desc[irq].action; (action = *p) != NULL; p = &action->next) {
- if (action->dev_id != dev_id)
- continue;
-
- /* Found it - now free it */
- *p = action->next;
- break;
- }
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-
- if (!action) {
- printk(KERN_ERR "Trying to free free IRQ%d\n",irq);
- dump_stack();
- } else {
- synchronize_irq(irq);
- kfree(action);
- }
-}
-
-EXPORT_SYMBOL(free_irq);
-
-static DECLARE_MUTEX(probe_sem);
-
-/* Start the interrupt probing. Unlike other architectures,
- * we don't return a mask of interrupts from probe_irq_on,
- * but return the number of interrupts enabled for the probe.
- * The interrupts which have been enabled for probing is
- * instead recorded in the irq_desc structure.
- */
-unsigned long probe_irq_on(void)
-{
- unsigned int i, irqs = 0;
- unsigned long delay;
-
- down(&probe_sem);
-
- /*
- * first snaffle up any unassigned but
- * probe-able interrupts
- */
- spin_lock_irq(&irq_controller_lock);
- for (i = 0; i < NR_IRQS; i++) {
- if (!irq_desc[i].probe_ok || irq_desc[i].action)
- continue;
-
- irq_desc[i].probing = 1;
- irq_desc[i].triggered = 0;
- if (irq_desc[i].chip->set_type)
- irq_desc[i].chip->set_type(i, IRQT_PROBE);
- irq_desc[i].chip->unmask(i);
- irqs += 1;
- }
- spin_unlock_irq(&irq_controller_lock);
-
- /*
- * wait for spurious interrupts to mask themselves out again
- */
- for (delay = jiffies + HZ/10; time_before(jiffies, delay); )
- /* min 100ms delay */;
-
- /*
- * now filter out any obviously spurious interrupts
- */
- spin_lock_irq(&irq_controller_lock);
- for (i = 0; i < NR_IRQS; i++) {
- if (irq_desc[i].probing && irq_desc[i].triggered) {
- irq_desc[i].probing = 0;
- irqs -= 1;
- }
- }
- spin_unlock_irq(&irq_controller_lock);
-
- return irqs;
-}
-
-EXPORT_SYMBOL(probe_irq_on);
-
-unsigned int probe_irq_mask(unsigned long irqs)
-{
- unsigned int mask = 0, i;
-
- spin_lock_irq(&irq_controller_lock);
- for (i = 0; i < 16 && i < NR_IRQS; i++)
- if (irq_desc[i].probing && irq_desc[i].triggered)
- mask |= 1 << i;
- spin_unlock_irq(&irq_controller_lock);
-
- up(&probe_sem);
-
- return mask;
-}
-EXPORT_SYMBOL(probe_irq_mask);
-
-/*
- * Possible return values:
- * >= 0 - interrupt number
- * -1 - no interrupt/many interrupts
- */
-int probe_irq_off(unsigned long irqs)
-{
- unsigned int i;
- int irq_found = NO_IRQ;
-
- /*
- * look at the interrupts, and find exactly one
- * that we were probing has been triggered
- */
- spin_lock_irq(&irq_controller_lock);
- for (i = 0; i < NR_IRQS; i++) {
- if (irq_desc[i].probing &&
- irq_desc[i].triggered) {
- if (irq_found != NO_IRQ) {
- irq_found = NO_IRQ;
- goto out;
- }
- irq_found = i;
- }
- }
-
- if (irq_found == -1)
- irq_found = NO_IRQ;
-out:
- spin_unlock_irq(&irq_controller_lock);
-
- up(&probe_sem);
-
- return irq_found;
-}
-
-EXPORT_SYMBOL(probe_irq_off);
-
-#ifdef CONFIG_SMP
-static void route_irq(struct irqdesc *desc, unsigned int irq, unsigned int cpu)
-{
- pr_debug("IRQ%u: moving from cpu%u to cpu%u\n", irq, desc->cpu, cpu);
-
- spin_lock_irq(&irq_controller_lock);
- desc->cpu = cpu;
- desc->chip->set_cpu(desc, irq, cpu);
- spin_unlock_irq(&irq_controller_lock);
-}
-
-#ifdef CONFIG_PROC_FS
-static int
-irq_affinity_read_proc(char *page, char **start, off_t off, int count,
- int *eof, void *data)
-{
- struct irqdesc *desc = irq_desc + ((int)data);
- int len = cpumask_scnprintf(page, count, desc->affinity);
-
- if (count - len < 2)
- return -EINVAL;
- page[len++] = '\n';
- page[len] = '\0';
-
- return len;
-}
-
-static int
-irq_affinity_write_proc(struct file *file, const char __user *buffer,
- unsigned long count, void *data)
-{
- unsigned int irq = (unsigned int)data;
- struct irqdesc *desc = irq_desc + irq;
- cpumask_t affinity, tmp;
- int ret = -EIO;
-
- if (!desc->chip->set_cpu)
- goto out;
-
- ret = cpumask_parse(buffer, count, affinity);
- if (ret)
- goto out;
-
- cpus_and(tmp, affinity, cpu_online_map);
- if (cpus_empty(tmp)) {
- ret = -EINVAL;
- goto out;
- }
-
- desc->affinity = affinity;
- route_irq(desc, irq, first_cpu(tmp));
- ret = count;
-
- out:
- return ret;
-}
-#endif
-#endif
-
-void __init init_irq_proc(void)
-{
-#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS)
- struct proc_dir_entry *dir;
- int irq;
-
- dir = proc_mkdir("irq", NULL);
- if (!dir)
- return;
-
- for (irq = 0; irq < NR_IRQS; irq++) {
- struct proc_dir_entry *entry;
- struct irqdesc *desc;
- char name[16];
-
- desc = irq_desc + irq;
- memset(name, 0, sizeof(name));
- snprintf(name, sizeof(name) - 1, "%u", irq);
-
- desc->procdir = proc_mkdir(name, dir);
- if (!desc->procdir)
- continue;
-
- entry = create_proc_entry("smp_affinity", 0600, desc->procdir);
- if (entry) {
- entry->nlink = 1;
- entry->data = (void *)irq;
- entry->read_proc = irq_affinity_read_proc;
- entry->write_proc = irq_affinity_write_proc;
- }
- }
-#endif
+ spin_lock_irqsave(&desc->lock, flags);
+ desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE;
+ if (iflags & IRQF_VALID)
+ desc->status &= ~IRQ_NOREQUEST;
+ if (iflags & IRQF_PROBE)
+ desc->status &= ~IRQ_NOPROBE;
+ spin_unlock_irqrestore(&desc->lock, flags);
}
void __init init_IRQ(void)
{
- struct irqdesc *desc;
extern void init_dma(void);
int irq;
+ for (irq = 0; irq < NR_IRQS; irq++)
+ irq_desc[irq].status |= IRQ_NOREQUEST;
+
#ifdef CONFIG_SMP
bad_irq_desc.affinity = CPU_MASK_ALL;
bad_irq_desc.cpu = smp_processor_id();
#endif
- for (irq = 0, desc = irq_desc; irq < NR_IRQS; irq++, desc++) {
- *desc = bad_irq_desc;
- INIT_LIST_HEAD(&desc->pend);
- }
-
init_arch_irq();
init_dma();
}
Index: linux.prev/arch/arm/kernel/process.c
===================================================================
--- linux.prev.orig/arch/arm/kernel/process.c
+++ linux.prev/arch/arm/kernel/process.c
@@ -89,12 +89,12 @@ void default_idle(void)
if (hlt_counter)
cpu_relax();
else {
- local_irq_disable();
+ raw_local_irq_disable();
if (!need_resched()) {
timer_dyn_reprogram();
arch_idle();
}
- local_irq_enable();
+ raw_local_irq_enable();
}
}
@@ -124,8 +124,8 @@ void cpu_idle(void)
while (!need_resched())
idle();
leds_event(led_idle_end);
- preempt_enable_no_resched();
- schedule();
+ __preempt_enable_no_resched();
+ __schedule();
preempt_disable();
}
}
Index: linux.prev/arch/arm/kernel/semaphore.c
===================================================================
--- linux.prev.orig/arch/arm/kernel/semaphore.c
+++ linux.prev/arch/arm/kernel/semaphore.c
@@ -49,14 +49,16 @@
* we cannot lose wakeup events.
*/
-void __up(struct semaphore *sem)
+fastcall void __attribute_used__ __compat_up(struct compat_semaphore *sem)
{
wake_up(&sem->wait);
}
+EXPORT_SYMBOL(__compat_up);
+
static DEFINE_SPINLOCK(semaphore_lock);
-void __sched __down(struct semaphore * sem)
+fastcall void __attribute_used__ __sched __compat_down(struct compat_semaphore * sem)
{
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
@@ -89,7 +91,9 @@ void __sched __down(struct semaphore * s
wake_up(&sem->wait);
}
-int __sched __down_interruptible(struct semaphore * sem)
+EXPORT_SYMBOL(__compat_down);
+
+fastcall int __attribute_used__ __sched __compat_down_interruptible(struct compat_semaphore * sem)
{
int retval = 0;
struct task_struct *tsk = current;
@@ -140,6 +144,8 @@ int __sched __down_interruptible(struct
return retval;
}
+EXPORT_SYMBOL(__compat_down_interruptible);
+
/*
* Trylock failed - make sure we correct for
* having decremented the count.
@@ -148,7 +154,7 @@ int __sched __down_interruptible(struct
* single "cmpxchg" without failure cases,
* but then it wouldn't work on a 386.
*/
-int __down_trylock(struct semaphore * sem)
+fastcall int __attribute_used__ __compat_down_trylock(struct compat_semaphore * sem)
{
int sleepers;
unsigned long flags;
@@ -168,6 +174,15 @@ int __down_trylock(struct semaphore * se
return 1;
}
+EXPORT_SYMBOL(__compat_down_trylock);
+
+fastcall int compat_sem_is_locked(struct compat_semaphore *sem)
+{
+ return (int) atomic_read(&sem->count) < 0;
+}
+
+EXPORT_SYMBOL(compat_sem_is_locked);
+
/*
* The semaphore operations have a special calling sequence that
* allow us to do a simpler in-line version of them. These routines
@@ -184,7 +199,7 @@ asm(" .section .sched.text,\"ax\",%progb
__down_failed: \n\
stmfd sp!, {r0 - r3, lr} \n\
mov r0, ip \n\
- bl __down \n\
+ bl __compat_down \n\
ldmfd sp!, {r0 - r3, pc} \n\
\n\
.align 5 \n\
@@ -192,7 +207,7 @@ __down_failed: \n\
__down_interruptible_failed: \n\
stmfd sp!, {r0 - r3, lr} \n\
mov r0, ip \n\
- bl __down_interruptible \n\
+ bl __compat_down_interruptible \n\
mov ip, r0 \n\
ldmfd sp!, {r0 - r3, pc} \n\
\n\
@@ -201,7 +216,7 @@ __down_interruptible_failed: \n\
__down_trylock_failed: \n\
stmfd sp!, {r0 - r3, lr} \n\
mov r0, ip \n\
- bl __down_trylock \n\
+ bl __compat_down_trylock \n\
mov ip, r0 \n\
ldmfd sp!, {r0 - r3, pc} \n\
\n\
@@ -210,7 +225,7 @@ __down_trylock_failed: \n\
__up_wakeup: \n\
stmfd sp!, {r0 - r3, lr} \n\
mov r0, ip \n\
- bl __up \n\
+ bl __compat_up \n\
ldmfd sp!, {r0 - r3, pc} \n\
");
Index: linux.prev/arch/arm/kernel/signal.c
===================================================================
--- linux.prev.orig/arch/arm/kernel/signal.c
+++ linux.prev/arch/arm/kernel/signal.c
@@ -628,6 +628,14 @@ static int do_signal(sigset_t *oldset, s
siginfo_t info;
int signr;
+#ifdef CONFIG_PREEMPT_RT
+ /*
+ * Fully-preemptible kernel does not need interrupts disabled:
+ */
+ raw_local_irq_enable();
+ preempt_check_resched();
+#endif
+
/*
* We want the common case to go fast, which
* is why we may in certain cases get here from
Index: linux.prev/arch/arm/kernel/smp.c
===================================================================
--- linux.prev.orig/arch/arm/kernel/smp.c
+++ linux.prev/arch/arm/kernel/smp.c
@@ -56,6 +56,7 @@ struct ipi_data {
unsigned long bits;
};
+/* FIXME */
static DEFINE_PER_CPU(struct ipi_data, ipi_data) = {
.lock = SPIN_LOCK_UNLOCKED,
};
@@ -348,7 +349,7 @@ static void send_ipi_message(cpumask_t c
unsigned long flags;
unsigned int cpu;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
for_each_cpu_mask(cpu, callmap) {
struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
@@ -363,7 +364,7 @@ static void send_ipi_message(cpumask_t c
*/
smp_cross_call(callmap);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
/*
@@ -520,7 +521,7 @@ static void ipi_call_function(unsigned i
cpu_clear(cpu, data->unfinished);
}
-static DEFINE_SPINLOCK(stop_lock);
+static DEFINE_RAW_SPINLOCK(stop_lock);
/*
* ipi_cpu_stop - handle IPI from smp_send_stop()
@@ -535,7 +536,7 @@ static void ipi_cpu_stop(unsigned int cp
cpu_clear(cpu, cpu_online_map);
local_fiq_disable();
- local_irq_disable();
+ raw_local_irq_disable();
while (1)
cpu_relax();
Index: linux.prev/arch/arm/kernel/traps.c
===================================================================
--- linux.prev.orig/arch/arm/kernel/traps.c
+++ linux.prev/arch/arm/kernel/traps.c
@@ -177,6 +177,8 @@ void dump_stack(void)
{
#ifdef CONFIG_DEBUG_ERRORS
__backtrace();
+ print_traces(current);
+ show_held_locks(current);
#endif
}
@@ -217,7 +219,7 @@ static void __die(const char *str, int e
}
}
-DEFINE_SPINLOCK(die_lock);
+DEFINE_RAW_SPINLOCK(die_lock);
/*
* This function is protected against re-entrancy.
@@ -249,7 +251,7 @@ void notify_die(const char *str, struct
}
static LIST_HEAD(undef_hook);
-static DEFINE_SPINLOCK(undef_lock);
+static DEFINE_RAW_SPINLOCK(undef_lock);
void register_undef_hook(struct undef_hook *hook)
{
@@ -341,7 +343,7 @@ asmlinkage void bad_mode(struct pt_regs
handler[reason], processor_modes[proc_mode]);
die("Oops - bad mode", regs, 0);
- local_irq_disable();
+ raw_local_irq_disable();
panic("bad mode");
}
Index: linux.prev/arch/arm/mach-clps711x/p720t-leds.c
===================================================================
--- linux.prev.orig/arch/arm/mach-clps711x/p720t-leds.c
+++ linux.prev/arch/arm/mach-clps711x/p720t-leds.c
@@ -36,7 +36,7 @@ static void p720t_leds_event(led_event_t
unsigned long flags;
u32 pddr;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
switch(ledevt) {
case led_idle_start:
break;
@@ -53,7 +53,7 @@ static void p720t_leds_event(led_event_t
break;
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
static int __init leds_init(void)
Index: linux.prev/arch/arm/mach-clps711x/time.c
===================================================================
--- linux.prev.orig/arch/arm/mach-clps711x/time.c
+++ linux.prev/arch/arm/mach-clps711x/time.c
@@ -19,6 +19,7 @@
#include
#include
#include
+#include
#include
#include
Index: linux.prev/arch/arm/mach-clps7500/core.c
===================================================================
--- linux.prev.orig/arch/arm/mach-clps7500/core.c
+++ linux.prev/arch/arm/mach-clps7500/core.c
@@ -9,6 +9,7 @@
#include
#include
#include
+#include
#include
#include
#include
Index: linux.prev/arch/arm/mach-ebsa110/core.c
===================================================================
--- linux.prev.orig/arch/arm/mach-ebsa110/core.c
+++ linux.prev/arch/arm/mach-ebsa110/core.c
@@ -56,14 +56,14 @@ static void __init ebsa110_init_irq(void
unsigned long flags;
unsigned int irq;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
__raw_writeb(0xff, IRQ_MCLR);
__raw_writeb(0x55, IRQ_MSET);
__raw_writeb(0x00, IRQ_MSET);
if (__raw_readb(IRQ_MASK) != 0x55)
while (1);
__raw_writeb(0xff, IRQ_MCLR); /* clear all interrupt enables */
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
for (irq = 0; irq < NR_IRQS; irq++) {
set_irq_chip(irq, &ebsa110_irq_chip);
Index: linux.prev/arch/arm/mach-footbridge/dc21285-timer.c
===================================================================
--- linux.prev.orig/arch/arm/mach-footbridge/dc21285-timer.c
+++ linux.prev/arch/arm/mach-footbridge/dc21285-timer.c
@@ -6,6 +6,7 @@
*/
#include
#include
+#include
#include
Index: linux.prev/arch/arm/mach-footbridge/isa-irq.c
===================================================================
--- linux.prev.orig/arch/arm/mach-footbridge/isa-irq.c
+++ linux.prev/arch/arm/mach-footbridge/isa-irq.c
@@ -102,6 +102,17 @@ static struct irqaction irq_cascade = {
static struct resource pic1_resource = { "pic1", 0x20, 0x3f };
static struct resource pic2_resource = { "pic2", 0xa0, 0xbf };
+static DEFINE_IRQ_CHAINED_TYPE(isa_irq_handler);
+
+static unsigned int startup_irq_disabled(unsigned int irq)
+{
+ return 0;
+}
+
+/* Interrupt type for irqs which must not be
+ * automatically enabled in reqeust_irq */
+static struct irq_type level_type_nostart;
+
void __init isa_init_irq(unsigned int host_irq)
{
unsigned int irq;
@@ -159,9 +170,11 @@ void __init isa_init_irq(unsigned int ho
* There appears to be a missing pull-up
* resistor on this line.
*/
- if (machine_is_netwinder())
- set_irq_flags(_ISA_IRQ(11), IRQF_VALID |
- IRQF_PROBE | IRQF_NOAUTOEN);
+ if (machine_is_netwinder()) {
+ level_type_nostart = default_level_type;
+ level_type_nostart.startup = startup_irq_disabled;
+ set_irq_handler(_ISA_IRQ(11), &level_type_nostart);
+ }
}
}
Index: linux.prev/arch/arm/mach-footbridge/isa-timer.c
===================================================================
--- linux.prev.orig/arch/arm/mach-footbridge/isa-timer.c
+++ linux.prev/arch/arm/mach-footbridge/isa-timer.c
@@ -6,6 +6,7 @@
*/
#include
#include
+#include
#include
#include
Index: linux.prev/arch/arm/mach-footbridge/netwinder-hw.c
===================================================================
--- linux.prev.orig/arch/arm/mach-footbridge/netwinder-hw.c
+++ linux.prev/arch/arm/mach-footbridge/netwinder-hw.c
@@ -68,7 +68,7 @@ static inline void wb977_ww(int reg, int
/*
* This is a lock for accessing ports GP1_IO_BASE and GP2_IO_BASE
*/
-DEFINE_SPINLOCK(gpio_lock);
+DEFINE_RAW_SPINLOCK(gpio_lock);
static unsigned int current_gpio_op;
static unsigned int current_gpio_io;
Index: linux.prev/arch/arm/mach-footbridge/netwinder-leds.c
===================================================================
--- linux.prev.orig/arch/arm/mach-footbridge/netwinder-leds.c
+++ linux.prev/arch/arm/mach-footbridge/netwinder-leds.c
@@ -33,7 +33,7 @@ static char led_state;
static char hw_led_state;
static DEFINE_SPINLOCK(leds_lock);
-extern spinlock_t gpio_lock;
+extern raw_spinlock_t gpio_lock;
static void netwinder_leds_event(led_event_t evt)
{
Index: linux.prev/arch/arm/mach-h720x/common.c
===================================================================
--- linux.prev.orig/arch/arm/mach-h720x/common.c
+++ linux.prev/arch/arm/mach-h720x/common.c
@@ -163,6 +163,11 @@ h720x_gpiod_demux_handler(unsigned int i
h720x_gpio_handler(mask, irq, desc, regs);
}
+static DEFINE_IRQ_CHAINED_TYPE(h720x_gpioa_demux_handler);
+static DEFINE_IRQ_CHAINED_TYPE(h720x_gpiob_demux_handler);
+static DEFINE_IRQ_CHAINED_TYPE(h720x_gpioc_demux_handler);
+static DEFINE_IRQ_CHAINED_TYPE(h720x_gpiod_demux_handler);
+
#ifdef CONFIG_CPU_H7202
static void
h720x_gpioe_demux_handler(unsigned int irq_unused, struct irqdesc *desc,
@@ -175,6 +180,7 @@ h720x_gpioe_demux_handler(unsigned int i
IRQDBG("%s mask: 0x%08x irq: %d\n",__FUNCTION__,mask,irq);
h720x_gpio_handler(mask, irq, desc, regs);
}
+static DEFINE_IRQ_CHAINED_TYPE(h720x_gpioe_demux_handler);
#endif
static struct irqchip h720x_global_chip = {
Index: linux.prev/arch/arm/mach-h720x/cpu-h7202.c
===================================================================
--- linux.prev.orig/arch/arm/mach-h720x/cpu-h7202.c
+++ linux.prev/arch/arm/mach-h720x/cpu-h7202.c
@@ -175,6 +175,8 @@ static struct irqaction h7202_timer_irq
.handler = h7202_timer_interrupt,
};
+static DEFINE_IRQ_CHAINED_TYPE(h7202_timerx_demux_handler);
+
/*
* Setup TIMER0 as system timer
*/
Index: linux.prev/arch/arm/mach-imx/dma.c
===================================================================
--- linux.prev.orig/arch/arm/mach-imx/dma.c
+++ linux.prev/arch/arm/mach-imx/dma.c
@@ -43,7 +43,7 @@ imx_request_dma(char *name, imx_dma_prio
if (!name || !irq_handler)
return -EINVAL;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
/* try grabbing a DMA channel with the requested priority */
for (i = prio; i < prio + (prio == DMA_PRIO_LOW) ? 8 : 4; i++) {
@@ -75,7 +75,7 @@ imx_request_dma(char *name, imx_dma_prio
i = -ENODEV;
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return i;
}
@@ -91,10 +91,10 @@ imx_free_dma(int dma_ch)
return;
}
- local_irq_save(flags);
+ raw_local_irq_save(flags);
DIMR &= ~(1 << dma_ch);
dma_channels[dma_ch].name = NULL;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
static irqreturn_t
Index: linux.prev/arch/arm/mach-imx/irq.c
===================================================================
--- linux.prev.orig/arch/arm/mach-imx/irq.c
+++ linux.prev/arch/arm/mach-imx/irq.c
@@ -217,6 +217,11 @@ static struct irqchip imx_gpio_chip = {
.set_type = imx_gpio_irq_type,
};
+static DEFINE_IRQ_CHAINED_TYPE(imx_gpioa_demux_handler);
+static DEFINE_IRQ_CHAINED_TYPE(imx_gpiob_demux_handler);
+static DEFINE_IRQ_CHAINED_TYPE(imx_gpioc_demux_handler);
+static DEFINE_IRQ_CHAINED_TYPE(imx_gpiod_demux_handler);
+
void __init
imx_init_irq(void)
{
Index: linux.prev/arch/arm/mach-imx/leds-mx1ads.c
===================================================================
--- linux.prev.orig/arch/arm/mach-imx/leds-mx1ads.c
+++ linux.prev/arch/arm/mach-imx/leds-mx1ads.c
@@ -29,7 +29,7 @@ mx1ads_leds_event(led_event_t ledevt)
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
switch (ledevt) {
#ifdef CONFIG_LEDS_CPU
@@ -49,5 +49,5 @@ mx1ads_leds_event(led_event_t ledevt)
default:
break;
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
Index: linux.prev/arch/arm/mach-imx/time.c
===================================================================
--- linux.prev.orig/arch/arm/mach-imx/time.c
+++ linux.prev/arch/arm/mach-imx/time.c
@@ -13,6 +13,7 @@
#include
#include
#include
+#include
#include
#include
Index: linux.prev/arch/arm/mach-integrator/core.c
===================================================================
--- linux.prev.orig/arch/arm/mach-integrator/core.c
+++ linux.prev/arch/arm/mach-integrator/core.c
@@ -13,6 +13,7 @@
#include
#include
#include
+#include
#include
#include
@@ -117,7 +118,7 @@ arch_initcall(integrator_init);
#define CM_CTRL IO_ADDRESS(INTEGRATOR_HDR_BASE) + INTEGRATOR_HDR_CTRL_OFFSET
-static DEFINE_SPINLOCK(cm_lock);
+static DEFINE_RAW_SPINLOCK(cm_lock);
/**
* cm_control - update the CM_CTRL register.
Index: linux.prev/arch/arm/mach-integrator/leds.c
===================================================================
--- linux.prev.orig/arch/arm/mach-integrator/leds.c
+++ linux.prev/arch/arm/mach-integrator/leds.c
@@ -41,7 +41,7 @@ static void integrator_leds_event(led_ev
unsigned int update_alpha_leds;
// yup, change the LEDs
- local_irq_save(flags);
+ raw_local_irq_save(flags);
update_alpha_leds = 0;
switch(ledevt) {
@@ -76,7 +76,7 @@ static void integrator_leds_event(led_ev
while (__raw_readl(dbg_base + INTEGRATOR_DBG_ALPHA_OFFSET) & 1);
__raw_writel(saved_leds, dbg_base + INTEGRATOR_DBG_LEDS_OFFSET);
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
static int __init leds_init(void)
Index: linux.prev/arch/arm/mach-integrator/pci_v3.c
===================================================================
--- linux.prev.orig/arch/arm/mach-integrator/pci_v3.c
+++ linux.prev/arch/arm/mach-integrator/pci_v3.c
@@ -163,7 +163,7 @@
* 7:2 register number
*
*/
-static DEFINE_SPINLOCK(v3_lock);
+static DEFINE_RAW_SPINLOCK(v3_lock);
#define PCI_BUS_NONMEM_START 0x00000000
#define PCI_BUS_NONMEM_SIZE SZ_256M
Index: linux.prev/arch/arm/mach-integrator/platsmp.c
===================================================================
--- linux.prev.orig/arch/arm/mach-integrator/platsmp.c
+++ linux.prev/arch/arm/mach-integrator/platsmp.c
@@ -31,7 +31,7 @@ extern void integrator_secondary_startup
volatile int __cpuinitdata pen_release = -1;
unsigned long __cpuinitdata phys_pen_release = 0;
-static DEFINE_SPINLOCK(boot_lock);
+static DEFINE_RAW_SPINLOCK(boot_lock);
void __cpuinit platform_secondary_init(unsigned int cpu)
{
Index: linux.prev/arch/arm/mach-integrator/time.c
===================================================================
--- linux.prev.orig/arch/arm/mach-integrator/time.c
+++ linux.prev/arch/arm/mach-integrator/time.c
@@ -96,7 +96,8 @@ static struct rtc_ops rtc_ops = {
.set_alarm = rtc_set_alarm,
};
-static irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+static irqreturn_t arm_rtc_interrupt(int irq, void *dev_id,
+ struct pt_regs *regs)
{
writel(0, rtc_base + RTC_EOI);
return IRQ_HANDLED;
@@ -124,7 +125,7 @@ static int rtc_probe(struct amba_device
xtime.tv_sec = __raw_readl(rtc_base + RTC_DR);
- ret = request_irq(dev->irq[0], rtc_interrupt, SA_INTERRUPT,
+ ret = request_irq(dev->irq[0], arm_rtc_interrupt, SA_INTERRUPT,
"rtc-pl030", dev);
if (ret)
goto map_out;
Index: linux.prev/arch/arm/mach-ixp2000/core.c
===================================================================
--- linux.prev.orig/arch/arm/mach-ixp2000/core.c
+++ linux.prev/arch/arm/mach-ixp2000/core.c
@@ -20,6 +20,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -276,9 +277,9 @@ void gpio_line_config(int line, int dire
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
if (direction == GPIO_OUT) {
- irq_desc[line + IRQ_IXP2000_GPIO0].valid = 0;
+ set_irq_flags(line + IRQ_IXP2000_GPIO0, 0);
/* if it's an output, it ain't an interrupt anymore */
GPIO_IRQ_falling_edge &= ~(1 << line);
@@ -291,7 +292,7 @@ void gpio_line_config(int line, int dire
} else if (direction == GPIO_IN) {
ixp2000_reg_wrb(IXP2000_GPIO_PDCR, 1 << line);
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
@@ -344,8 +345,7 @@ static int ixp2000_GPIO_irq_type(unsigne
/*
* Finally, mark the corresponding IRQ as valid.
*/
- irq_desc[irq].valid = 1;
-
+ set_irq_flags(irq, IRQF_VALID);
return 0;
}
@@ -449,6 +449,8 @@ static struct irqchip ixp2000_irq_chip =
.unmask = ixp2000_irq_unmask
};
+static DEFINE_IRQ_CHAINED_TYPE(ixp2000_GPIO_irq_handler);
+
void __init ixp2000_init_irq(void)
{
int irq;
Index: linux.prev/arch/arm/mach-ixp2000/ixdp2x00.c
===================================================================
--- linux.prev.orig/arch/arm/mach-ixp2000/ixdp2x00.c
+++ linux.prev/arch/arm/mach-ixp2000/ixdp2x00.c
@@ -146,6 +146,8 @@ static struct irqchip ixdp2x00_cpld_irq_
.unmask = ixdp2x00_irq_unmask
};
+static DEFINE_IRQ_CHAINED_TYPE(ixdp2x00_irq_handler);
+
void ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigned long *mask_reg, unsigned long nr_irqs)
{
unsigned int irq;
@@ -168,7 +170,7 @@ void ixdp2x00_init_irq(volatile unsigned
}
/* Hook into PCI interrupt */
- set_irq_chained_handler(IRQ_IXP2000_PCIB, &ixdp2x00_irq_handler);
+ set_irq_chained_handler(IRQ_IXP2000_PCIB, ixdp2x00_irq_handler);
}
/*************************************************************************
Index: linux.prev/arch/arm/mach-ixp2000/ixdp2x01.c
===================================================================
--- linux.prev.orig/arch/arm/mach-ixp2000/ixdp2x01.c
+++ linux.prev/arch/arm/mach-ixp2000/ixdp2x01.c
@@ -95,6 +95,8 @@ static struct irqchip ixdp2x01_irq_chip
.unmask = ixdp2x01_irq_unmask
};
+static DEFINE_IRQ_CHAINED_TYPE(ixdp2x01_irq_handler);
+
/*
* We only do anything if we are the master NPU on the board.
* The slave NPU only has the ethernet chip going directly to
@@ -127,7 +129,7 @@ void __init ixdp2x01_init_irq(void)
}
/* Hook into PCI interrupts */
- set_irq_chained_handler(IRQ_IXP2000_PCIB, &ixdp2x01_irq_handler);
+ set_irq_chained_handler(IRQ_IXP2000_PCIB, ixdp2x01_irq_handler);
}
Index: linux.prev/arch/arm/mach-ixp2000/pci.c
===================================================================
--- linux.prev.orig/arch/arm/mach-ixp2000/pci.c
+++ linux.prev/arch/arm/mach-ixp2000/pci.c
@@ -145,7 +145,7 @@ int ixp2000_pci_abort_handler(unsigned l
pci_master_aborts = 1;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
temp = *(IXP2000_PCI_CONTROL);
if (temp & ((1 << 8) | (1 << 5))) {
ixp2000_reg_wrb(IXP2000_PCI_CONTROL, temp);
@@ -158,7 +158,7 @@ int ixp2000_pci_abort_handler(unsigned l
temp = *(IXP2000_PCI_CMDSTAT);
}
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
/*
* If it was an imprecise abort, then we need to correct the
@@ -176,7 +176,7 @@ clear_master_aborts(void)
volatile u32 temp;
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
temp = *(IXP2000_PCI_CONTROL);
if (temp & ((1 << 8) | (1 << 5))) {
ixp2000_reg_wrb(IXP2000_PCI_CONTROL, temp);
@@ -189,7 +189,7 @@ clear_master_aborts(void)
temp = *(IXP2000_PCI_CMDSTAT);
}
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return 0;
}
Index: linux.prev/arch/arm/mach-ixp4xx/common-pci.c
===================================================================
--- linux.prev.orig/arch/arm/mach-ixp4xx/common-pci.c
+++ linux.prev/arch/arm/mach-ixp4xx/common-pci.c
@@ -53,7 +53,7 @@ unsigned long ixp4xx_pci_reg_base = 0;
* these transactions are atomic or we will end up
* with corrupt data on the bus or in a driver.
*/
-static DEFINE_SPINLOCK(ixp4xx_pci_lock);
+static DEFINE_RAW_SPINLOCK(ixp4xx_pci_lock);
/*
* Read from PCI config space
Index: linux.prev/arch/arm/mach-ixp4xx/coyote-pci.c
===================================================================
--- linux.prev.orig/arch/arm/mach-ixp4xx/coyote-pci.c
+++ linux.prev/arch/arm/mach-ixp4xx/coyote-pci.c
@@ -17,6 +17,7 @@
#include
#include
#include
+#include
#include
#include
Index: linux.prev/arch/arm/mach-ixp4xx/ixdp425-pci.c
===================================================================
--- linux.prev.orig/arch/arm/mach-ixp4xx/ixdp425-pci.c
+++ linux.prev/arch/arm/mach-ixp4xx/ixdp425-pci.c
@@ -16,6 +16,7 @@
#include
#include
+#include
#include
#include
#include
Index: linux.prev/arch/arm/mach-ixp4xx/ixdpg425-pci.c
===================================================================
--- linux.prev.orig/arch/arm/mach-ixp4xx/ixdpg425-pci.c
+++ linux.prev/arch/arm/mach-ixp4xx/ixdpg425-pci.c
@@ -16,10 +16,10 @@
#include
#include
#include
+#include
#include
#include
-#include
#include
Index: linux.prev/arch/arm/mach-l7200/core.c
===================================================================
--- linux.prev.orig/arch/arm/mach-l7200/core.c
+++ linux.prev/arch/arm/mach-l7200/core.c
@@ -7,6 +7,7 @@
*/
#include
#include
+#include
#include
#include
Index: linux.prev/arch/arm/mach-lh7a40x/arch-kev7a400.c
===================================================================
--- linux.prev.orig/arch/arm/mach-lh7a40x/arch-kev7a400.c
+++ linux.prev/arch/arm/mach-lh7a40x/arch-kev7a400.c
@@ -81,6 +81,8 @@ static void kev7a400_cpld_handler (unsig
}
}
+static DEFINE_IRQ_CHAINED_TYPE(kev7a400_cpld_handler);
+
void __init lh7a40x_init_board_irq (void)
{
int irq;
Index: linux.prev/arch/arm/mach-lh7a40x/arch-lpd7a40x.c
===================================================================
--- linux.prev.orig/arch/arm/mach-lh7a40x/arch-lpd7a40x.c
+++ linux.prev/arch/arm/mach-lh7a40x/arch-lpd7a40x.c
@@ -12,6 +12,7 @@
#include
#include
#include
+#include
#include
#include
@@ -173,6 +174,7 @@ static void lpd7a40x_cpld_handler (unsig
desc->chip->unmask (irq); /* Level-triggered need this */
}
+static DEFINE_IRQ_CHAINED_TYPE(lpd7a40x_cpld_handler);
void __init lh7a40x_init_board_irq (void)
{
Index: linux.prev/arch/arm/mach-lh7a40x/irq-kev7a400.c
===================================================================
--- linux.prev.orig/arch/arm/mach-lh7a40x/irq-kev7a400.c
+++ linux.prev/arch/arm/mach-lh7a40x/irq-kev7a400.c
@@ -60,6 +60,8 @@ lh7a400_cpld_handler (unsigned int irq,
}
}
+static DEFINE_IRQ_CHAINED_TYPE(kev7a400_cpld_handler);
+
/* IRQ initialization */
void __init
Index: linux.prev/arch/arm/mach-lh7a40x/irq-lpd7a40x.c
===================================================================
--- linux.prev.orig/arch/arm/mach-lh7a40x/irq-lpd7a40x.c
+++ linux.prev/arch/arm/mach-lh7a40x/irq-lpd7a40x.c
@@ -71,6 +71,7 @@ static void lh7a40x_cpld_handler (unsign
desc->chip->unmask (irq); /* Level-triggered need this */
}
+static DEFINE_IRQ_CHAINED_TYPE(lh7a40x_cpld_handler);
/* IRQ initialization */
Index: linux.prev/arch/arm/mach-lh7a40x/time.c
===================================================================
--- linux.prev.orig/arch/arm/mach-lh7a40x/time.c
+++ linux.prev/arch/arm/mach-lh7a40x/time.c
@@ -12,6 +12,7 @@
#include
#include
#include
+#include
#include
#include
Index: linux.prev/arch/arm/mach-omap1/board-osk.c
===================================================================
--- linux.prev.orig/arch/arm/mach-omap1/board-osk.c
+++ linux.prev/arch/arm/mach-omap1/board-osk.c
@@ -29,7 +29,7 @@
#include
#include
#include
-#include
+#include
#include
#include
Index: linux.prev/arch/arm/mach-omap1/fpga.c
===================================================================
--- linux.prev.orig/arch/arm/mach-omap1/fpga.c
+++ linux.prev/arch/arm/mach-omap1/fpga.c
@@ -120,6 +120,8 @@ static struct irqchip omap_fpga_irq = {
.unmask = fpga_unmask_irq,
};
+static DEFINE_IRQ_CHAINED_TYPE(innovator_fpga_IRQ_demux);
+
/*
* All of the FPGA interrupt request inputs except for the touchscreen are
* edge-sensitive; the touchscreen is level-sensitive. The edge-sensitive
Index: linux.prev/arch/arm/mach-omap1/leds-h2p2-debug.c
===================================================================
--- linux.prev.orig/arch/arm/mach-omap1/leds-h2p2-debug.c
+++ linux.prev/arch/arm/mach-omap1/leds-h2p2-debug.c
@@ -45,7 +45,7 @@ void h2p2_dbg_leds_event(led_event_t evt
static struct h2p2_dbg_fpga __iomem *fpga;
static u16 led_state, hw_led_state;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
if (!(led_state & LED_STATE_ENABLED) && evt != led_start)
goto done;
@@ -164,5 +164,5 @@ void h2p2_dbg_leds_event(led_event_t evt
__raw_writew(~hw_led_state, &fpga->leds);
done:
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
Index: linux.prev/arch/arm/mach-omap1/serial.c
===================================================================
--- linux.prev.orig/arch/arm/mach-omap1/serial.c
+++ linux.prev/arch/arm/mach-omap1/serial.c
@@ -12,6 +12,7 @@
#include
#include
#include
+#include
#include
#include
#include
Index: linux.prev/arch/arm/mach-pxa/dma.c
===================================================================
--- linux.prev.orig/arch/arm/mach-pxa/dma.c
+++ linux.prev/arch/arm/mach-pxa/dma.c
@@ -43,7 +43,7 @@ int pxa_request_dma (char *name, pxa_dma
if (!name || !irq_handler)
return -EINVAL;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
/* try grabbing a DMA channel with the requested priority */
for (i = prio; i < prio + PXA_DMA_NBCH(prio); i++) {
@@ -73,7 +73,7 @@ int pxa_request_dma (char *name, pxa_dma
i = -ENODEV;
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return i;
}
@@ -88,10 +88,10 @@ void pxa_free_dma (int dma_ch)
return;
}
- local_irq_save(flags);
+ raw_local_irq_save(flags);
DCSR(dma_ch) = DCSR_STARTINTR|DCSR_ENDINTR|DCSR_BUSERR;
dma_channels[dma_ch].name = NULL;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
static irqreturn_t dma_irq_handler(int irq, void *dev_id, struct pt_regs *regs)
Index: linux.prev/arch/arm/mach-pxa/generic.c
===================================================================
--- linux.prev.orig/arch/arm/mach-pxa/generic.c
+++ linux.prev/arch/arm/mach-pxa/generic.c
@@ -51,7 +51,7 @@ void pxa_gpio_mode(int gpio_mode)
int fn = (gpio_mode & GPIO_MD_MASK_FN) >> 8;
int gafr;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
if (gpio_mode & GPIO_DFLT_LOW)
GPCR(gpio) = GPIO_bit(gpio);
else if (gpio_mode & GPIO_DFLT_HIGH)
@@ -62,7 +62,7 @@ void pxa_gpio_mode(int gpio_mode)
GPDR(gpio) &= ~GPIO_bit(gpio);
gafr = GAFR(gpio) & ~(0x3 << (((gpio) & 0xf)*2));
GAFR(gpio) = gafr | (fn << (((gpio) & 0xf)*2));
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
EXPORT_SYMBOL(pxa_gpio_mode);
@@ -73,14 +73,14 @@ EXPORT_SYMBOL(pxa_gpio_mode);
void pxa_set_cken(int clock, int enable)
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
if (enable)
CKEN |= clock;
else
CKEN &= ~clock;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
EXPORT_SYMBOL(pxa_set_cken);
Index: linux.prev/arch/arm/mach-pxa/idp.c
===================================================================
--- linux.prev.orig/arch/arm/mach-pxa/idp.c
+++ linux.prev/arch/arm/mach-pxa/idp.c
@@ -18,6 +18,7 @@
#include
#include
+#include
#include
#include
Index: linux.prev/arch/arm/mach-pxa/irq.c
===================================================================
--- linux.prev.orig/arch/arm/mach-pxa/irq.c
+++ linux.prev/arch/arm/mach-pxa/irq.c
@@ -244,6 +244,7 @@ static struct irqchip pxa_muxed_gpio_chi
.set_type = pxa_gpio_irq_type,
};
+static DEFINE_IRQ_CHAINED_TYPE(pxa_gpio_demux_handler);
void __init pxa_init_irq(void)
{
Index: linux.prev/arch/arm/mach-pxa/leds-idp.c
===================================================================
--- linux.prev.orig/arch/arm/mach-pxa/leds-idp.c
+++ linux.prev/arch/arm/mach-pxa/leds-idp.c
@@ -34,7 +34,7 @@ void idp_leds_event(led_event_t evt)
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
switch (evt) {
case led_start:
@@ -113,5 +113,5 @@ void idp_leds_event(led_event_t evt)
else
IDP_CPLD_LED_CONTROL |= IDP_LEDS_MASK;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
Index: linux.prev/arch/arm/mach-pxa/leds-lubbock.c
===================================================================
--- linux.prev.orig/arch/arm/mach-pxa/leds-lubbock.c
+++ linux.prev/arch/arm/mach-pxa/leds-lubbock.c
@@ -48,7 +48,7 @@ void lubbock_leds_event(led_event_t evt)
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
switch (evt) {
case led_start:
@@ -122,5 +122,5 @@ void lubbock_leds_event(led_event_t evt)
else
LUB_DISC_BLNK_LED |= 0xff;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
Index: linux.prev/arch/arm/mach-pxa/leds-mainstone.c
===================================================================
--- linux.prev.orig/arch/arm/mach-pxa/leds-mainstone.c
+++ linux.prev/arch/arm/mach-pxa/leds-mainstone.c
@@ -43,7 +43,7 @@ void mainstone_leds_event(led_event_t ev
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
switch (evt) {
case led_start:
@@ -117,5 +117,5 @@ void mainstone_leds_event(led_event_t ev
else
MST_LEDCTRL |= 0xff;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
Index: linux.prev/arch/arm/mach-pxa/lubbock.c
===================================================================
--- linux.prev.orig/arch/arm/mach-pxa/lubbock.c
+++ linux.prev/arch/arm/mach-pxa/lubbock.c
@@ -52,9 +52,9 @@ void lubbock_set_misc_wr(unsigned int ma
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
LUB_MISC_WR = (LUB_MISC_WR & ~mask) | (set & mask);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
EXPORT_SYMBOL(lubbock_set_misc_wr);
@@ -95,6 +95,8 @@ static void lubbock_irq_handler(unsigned
} while (pending);
}
+static DEFINE_IRQ_CHAINED_TYPE(lubbock_irq_handler);
+
static void __init lubbock_init_irq(void)
{
int irq;
Index: linux.prev/arch/arm/mach-pxa/mainstone.c
===================================================================
--- linux.prev.orig/arch/arm/mach-pxa/mainstone.c
+++ linux.prev/arch/arm/mach-pxa/mainstone.c
@@ -84,6 +84,8 @@ static void mainstone_irq_handler(unsign
} while (pending);
}
+static DEFINE_IRQ_CHAINED_TYPE(mainstone_irq_handler);
+
static void __init mainstone_init_irq(void)
{
int irq;
Index: linux.prev/arch/arm/mach-rpc/dma.c
===================================================================
--- linux.prev.orig/arch/arm/mach-rpc/dma.c
+++ linux.prev/arch/arm/mach-rpc/dma.c
@@ -171,11 +171,11 @@ static void iomd_disable_dma(dmach_t cha
unsigned long dma_base = dma->dma_base;
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
if (dma->state != ~DMA_ST_AB)
disable_irq(dma->dma_irq);
iomd_writeb(0, dma_base + CR);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
static int iomd_set_dma_speed(dmach_t channel, dma_t *dma, int cycle)
Index: linux.prev/arch/arm/mach-rpc/irq.c
===================================================================
--- linux.prev.orig/arch/arm/mach-rpc/irq.c
+++ linux.prev/arch/arm/mach-rpc/irq.c
@@ -112,6 +112,15 @@ static struct irqchip iomd_fiq_chip = {
.unmask = iomd_unmask_irq_fiq,
};
+static unsigned int startup_irq_disabled(unsigned int irq)
+{
+ return 0;
+}
+
+/* Interrupt type for irqs which must not be
+ * automatically enabled in reqeust_irq */
+static struct irq_type level_type_nostart;
+
void __init rpc_init_irq(void)
{
unsigned int irq, flags;
@@ -121,16 +130,15 @@ void __init rpc_init_irq(void)
iomd_writeb(0, IOMD_FIQMASK);
iomd_writeb(0, IOMD_DMAMASK);
+ level_type_nostart = default_level_type;
+ level_type_nostart.startup = startup_irq_disabled;
+
for (irq = 0; irq < NR_IRQS; irq++) {
flags = IRQF_VALID;
if (irq <= 6 || (irq >= 9 && irq <= 15))
flags |= IRQF_PROBE;
- if (irq == 21 || (irq >= 16 && irq <= 19) ||
- irq == IRQ_KEYBOARDTX)
- flags |= IRQF_NOAUTOEN;
-
switch (irq) {
case 0 ... 7:
set_irq_chip(irq, &iomd_a_chip);
@@ -155,6 +163,10 @@ void __init rpc_init_irq(void)
set_irq_flags(irq, IRQF_VALID);
break;
}
+
+ if (irq == 21 || (irq >= 16 && irq <= 19) ||
+ irq == IRQ_KEYBOARDTX)
+ set_irq_handler(irq, &level_type_nostart);
}
init_FIQ();
Index: linux.prev/arch/arm/mach-s3c2410/bast-irq.c
===================================================================
--- linux.prev.orig/arch/arm/mach-s3c2410/bast-irq.c
+++ linux.prev/arch/arm/mach-s3c2410/bast-irq.c
@@ -136,13 +136,15 @@ bast_irq_pc104_demux(unsigned int irq,
for (i = 0; stat != 0; i++, stat >>= 1) {
if (stat & 1) {
irqno = bast_pc104_irqs[i];
-
- desc_handle_irq(irqno, irq_desc + irqno, regs);
+ desc = irq_desc + irqno;
+ desc_handle_irq(irqno, desc, regs);
}
}
}
}
+DEFINE_IRQ_CHAINED_TYPE(bast_irq_pc104_demux);
+
static __init int bast_irq_init(void)
{
unsigned int i;
@@ -156,7 +158,7 @@ static __init int bast_irq_init(void)
set_irq_chained_handler(IRQ_ISA, bast_irq_pc104_demux);
- /* reigster our IRQs */
+ /* register our IRQs */
for (i = 0; i < 4; i++) {
unsigned int irqno = bast_pc104_irqs[i];
Index: linux.prev/arch/arm/mach-s3c2410/clock.c
===================================================================
--- linux.prev.orig/arch/arm/mach-s3c2410/clock.c
+++ linux.prev/arch/arm/mach-s3c2410/clock.c
@@ -61,7 +61,7 @@ void inline s3c24xx_clk_enable(unsigned
unsigned long clkcon;
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
clkcon = __raw_readl(S3C2410_CLKCON);
clkcon &= ~clocks;
@@ -74,7 +74,7 @@ void inline s3c24xx_clk_enable(unsigned
__raw_writel(clkcon, S3C2410_CLKCON);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
/* enable and disable calls for use with the clk struct */
Index: linux.prev/arch/arm/mach-s3c2410/dma.c
===================================================================
--- linux.prev.orig/arch/arm/mach-s3c2410/dma.c
+++ linux.prev/arch/arm/mach-s3c2410/dma.c
@@ -329,11 +329,11 @@ static int s3c2410_dma_start(s3c2410_dma
pr_debug("s3c2410_start_dma: channel=%d\n", chan->number);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
if (chan->state == S3C2410_DMA_RUNNING) {
pr_debug("s3c2410_start_dma: already running (%d)\n", chan->state);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return 0;
}
@@ -348,7 +348,7 @@ static int s3c2410_dma_start(s3c2410_dma
printk(KERN_ERR "dma%d: channel has nothing loaded\n",
chan->number);
chan->state = S3C2410_DMA_IDLE;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return -EINVAL;
}
@@ -385,7 +385,7 @@ static int s3c2410_dma_start(s3c2410_dma
dbg_showchan(chan);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return 0;
}
@@ -451,7 +451,7 @@ int s3c2410_dma_enqueue(unsigned int cha
buf->id = id;
buf->magic = BUF_MAGIC;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
if (chan->curr == NULL) {
/* we've got nothing loaded... */
@@ -485,7 +485,7 @@ int s3c2410_dma_enqueue(unsigned int cha
"timeout loading buffer\n",
chan->number);
dbg_showchan(chan);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return -EINVAL;
}
}
@@ -499,7 +499,7 @@ int s3c2410_dma_enqueue(unsigned int cha
}
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return 0;
}
@@ -661,9 +661,9 @@ s3c2410_dma_irq(int irq, void *devpw, st
return IRQ_HANDLED;
}
- local_irq_save(flags);
+ raw_local_irq_save(flags);
s3c2410_dma_loadbuffer(chan, chan->next);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
} else {
s3c2410_dma_lastxfer(chan);
@@ -698,14 +698,14 @@ int s3c2410_dma_request(unsigned int cha
check_channel(channel);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
dbg_showchan(chan);
if (chan->in_use) {
if (client != chan->client) {
printk(KERN_ERR "dma%d: already in use\n", channel);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return -EBUSY;
} else {
printk(KERN_ERR "dma%d: client already has channel\n", channel);
@@ -724,7 +724,7 @@ int s3c2410_dma_request(unsigned int cha
if (err) {
chan->in_use = 0;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
printk(KERN_ERR "%s: cannot get IRQ %d for DMA %d\n",
client->name, chan->irq, chan->number);
@@ -735,7 +735,7 @@ int s3c2410_dma_request(unsigned int cha
chan->irq_enabled = 1;
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
/* need to setup */
@@ -764,7 +764,7 @@ int s3c2410_dma_free(dmach_t channel, s3
check_channel(channel);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
if (chan->client != client) {
@@ -789,7 +789,7 @@ int s3c2410_dma_free(dmach_t channel, s3
free_irq(chan->irq, (void *)chan);
chan->irq_claimed = 0;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return 0;
}
@@ -805,7 +805,7 @@ static int s3c2410_dma_dostop(s3c2410_dm
dbg_showchan(chan);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
s3c2410_dma_call_op(chan, S3C2410_DMAOP_STOP);
@@ -823,7 +823,7 @@ static int s3c2410_dma_dostop(s3c2410_dm
chan->state = S3C2410_DMA_IDLE;
chan->load_state = S3C2410_DMALOAD_NONE;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return 0;
}
@@ -840,7 +840,7 @@ static int s3c2410_dma_flush(s3c2410_dma
pr_debug("%s:\n", __FUNCTION__);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
if (chan->state != S3C2410_DMA_IDLE) {
pr_debug("%s: stopping channel...\n", __FUNCTION__ );
@@ -865,7 +865,7 @@ static int s3c2410_dma_flush(s3c2410_dma
}
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return 0;
}
Index: linux.prev/arch/arm/mach-s3c2410/gpio.c
===================================================================
--- linux.prev.orig/arch/arm/mach-s3c2410/gpio.c
+++ linux.prev/arch/arm/mach-s3c2410/gpio.c
@@ -80,7 +80,7 @@ void s3c2410_gpio_cfgpin(unsigned int pi
/* modify the specified register wwith IRQs off */
- local_irq_save(flags);
+ raw_local_irq_save(flags);
con = __raw_readl(base + 0x00);
con &= ~mask;
@@ -88,7 +88,7 @@ void s3c2410_gpio_cfgpin(unsigned int pi
__raw_writel(con, base + 0x00);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
EXPORT_SYMBOL(s3c2410_gpio_cfgpin);
@@ -119,14 +119,14 @@ void s3c2410_gpio_pullup(unsigned int pi
if (pin < S3C2410_GPIO_BANKB)
return;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
up = __raw_readl(base + 0x08);
up &= ~(1L << offs);
up |= to << offs;
__raw_writel(up, base + 0x08);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
EXPORT_SYMBOL(s3c2410_gpio_pullup);
@@ -138,14 +138,14 @@ void s3c2410_gpio_setpin(unsigned int pi
unsigned long flags;
unsigned long dat;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
dat = __raw_readl(base + 0x04);
dat &= ~(1 << offs);
dat |= to << offs;
__raw_writel(dat, base + 0x04);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
EXPORT_SYMBOL(s3c2410_gpio_setpin);
@@ -165,12 +165,12 @@ unsigned int s3c2410_modify_misccr(unsig
unsigned long flags;
unsigned long misccr;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
misccr = __raw_readl(S3C2410_MISCCR);
misccr &= ~clear;
misccr ^= change;
__raw_writel(misccr, S3C2410_MISCCR);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return misccr;
}
@@ -211,7 +211,7 @@ int s3c2410_gpio_irqfilter(unsigned int
pin -= S3C2410_GPG8_EINT16;
reg += pin & ~3;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
/* update filter width and clock source */
@@ -227,7 +227,7 @@ int s3c2410_gpio_irqfilter(unsigned int
val |= on << ((pin * 4) + 3);
__raw_writel(val, S3C2410_EXTINT2);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return 0;
}
Index: linux.prev/arch/arm/mach-s3c2410/irq.c
===================================================================
--- linux.prev.orig/arch/arm/mach-s3c2410/irq.c
+++ linux.prev/arch/arm/mach-s3c2410/irq.c
@@ -573,6 +573,11 @@ s3c_irq_demux_uart2(unsigned int irq,
}
+static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_uart0);
+static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_uart1);
+static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_uart2);
+static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_adc);
+
/* s3c24xx_init_irq
*
* Initialise S3C2410 IRQ system
Index: linux.prev/arch/arm/mach-s3c2410/s3c2440-dsc.c
===================================================================
--- linux.prev.orig/arch/arm/mach-s3c2410/s3c2440-dsc.c
+++ linux.prev/arch/arm/mach-s3c2410/s3c2440-dsc.c
@@ -45,14 +45,14 @@ int s3c2440_set_dsc(unsigned int pin, un
base = (pin & S3C2440_SELECT_DSC1) ? S3C2440_DSC1 : S3C2440_DSC0;
mask = 3 << S3C2440_DSC_GETSHIFT(pin);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
val = __raw_readl(base);
val &= ~mask;
val |= value & mask;
__raw_writel(val, base);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return 0;
}
Index: linux.prev/arch/arm/mach-s3c2410/s3c2440-irq.c
===================================================================
--- linux.prev.orig/arch/arm/mach-s3c2410/s3c2440-irq.c
+++ linux.prev/arch/arm/mach-s3c2410/s3c2440-irq.c
@@ -157,6 +157,9 @@ static struct irqchip s3c_irq_cam = {
.ack = s3c_irq_cam_ack,
};
+static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_wdtac97);
+static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_cam);
+
static int s3c2440_irq_add(struct sys_device *sysdev)
{
unsigned int irqno;
Index: linux.prev/arch/arm/mach-s3c2410/time.c
===================================================================
--- linux.prev.orig/arch/arm/mach-s3c2410/time.c
+++ linux.prev/arch/arm/mach-s3c2410/time.c
@@ -23,6 +23,7 @@
#include
#include
#include
+#include
#include
#include
Index: linux.prev/arch/arm/mach-sa1100/assabet.c
===================================================================
--- linux.prev.orig/arch/arm/mach-sa1100/assabet.c
+++ linux.prev/arch/arm/mach-sa1100/assabet.c
@@ -61,10 +61,10 @@ void ASSABET_BCR_frob(unsigned int mask,
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
BCR_value = (BCR_value & ~mask) | val;
ASSABET_BCR = BCR_value;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
EXPORT_SYMBOL(ASSABET_BCR_frob);
Index: linux.prev/arch/arm/mach-sa1100/badge4.c
===================================================================
--- linux.prev.orig/arch/arm/mach-sa1100/badge4.c
+++ linux.prev/arch/arm/mach-sa1100/badge4.c
@@ -227,7 +227,7 @@ void badge4_set_5V(unsigned subsystem, i
unsigned long flags;
unsigned old_5V_bitmap;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
old_5V_bitmap = badge4_5V_bitmap;
@@ -240,15 +240,22 @@ void badge4_set_5V(unsigned subsystem, i
/* detect on->off and off->on transitions */
if ((!old_5V_bitmap) && (badge4_5V_bitmap)) {
/* was off, now on */
- printk(KERN_INFO "%s: enabling 5V supply rail\n", __FUNCTION__);
GPSR = BADGE4_GPIO_PCMEN5V;
} else if ((old_5V_bitmap) && (!badge4_5V_bitmap)) {
/* was on, now off */
- printk(KERN_INFO "%s: disabling 5V supply rail\n", __FUNCTION__);
GPCR = BADGE4_GPIO_PCMEN5V;
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
+
+ /* detect on->off and off->on transitions */
+ if ((!old_5V_bitmap) && (badge4_5V_bitmap)) {
+ /* was off, now on */
+ printk(KERN_INFO "%s: enabling 5V supply rail\n", __FUNCTION__);
+ } else if ((old_5V_bitmap) && (!badge4_5V_bitmap)) {
+ /* was on, now off */
+ printk(KERN_INFO "%s: disabling 5V supply rail\n", __FUNCTION__);
+ }
}
EXPORT_SYMBOL(badge4_set_5V);
Index: linux.prev/arch/arm/mach-sa1100/cerf.c
===================================================================
--- linux.prev.orig/arch/arm/mach-sa1100/cerf.c
+++ linux.prev/arch/arm/mach-sa1100/cerf.c
@@ -15,6 +15,7 @@
#include
#include
#include
+#include
#include
#include
Index: linux.prev/arch/arm/mach-sa1100/cpu-sa1110.c
===================================================================
--- linux.prev.orig/arch/arm/mach-sa1100/cpu-sa1110.c
+++ linux.prev/arch/arm/mach-sa1100/cpu-sa1110.c
@@ -282,7 +282,7 @@ static int sa1110_target(struct cpufreq_
* This means that we won't access SDRAM for the duration of
* the programming.
*/
- local_irq_save(flags);
+ raw_local_irq_save(flags);
asm("mcr p15, 0, %0, c7, c10, 4" : : "r" (0));
udelay(10);
__asm__ __volatile__(" \n\
@@ -303,7 +303,7 @@ static int sa1110_target(struct cpufreq_
: "r" (&MDCNFG), "r" (&PPCR), "0" (sd.mdcnfg),
"r" (sd.mdrefr), "r" (sd.mdcas[0]),
"r" (sd.mdcas[1]), "r" (sd.mdcas[2]), "r" (ppcr));
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
/*
* Now, return the SDRAM refresh back to normal.
Index: linux.prev/arch/arm/mach-sa1100/dma.c
===================================================================
--- linux.prev.orig/arch/arm/mach-sa1100/dma.c
+++ linux.prev/arch/arm/mach-sa1100/dma.c
@@ -227,7 +227,7 @@ int sa1100_start_dma(dma_regs_t *regs, d
if (size > MAX_DMA_SIZE)
return -EOVERFLOW;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
status = regs->RdDCSR;
/* If both DMA buffers are started, there's nothing else we can do. */
@@ -262,7 +262,7 @@ int sa1100_start_dma(dma_regs_t *regs, d
ret = 0;
out:
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return ret;
}
Index: linux.prev/arch/arm/mach-sa1100/generic.c
===================================================================
--- linux.prev.orig/arch/arm/mach-sa1100/generic.c
+++ linux.prev/arch/arm/mach-sa1100/generic.c
@@ -138,7 +138,7 @@ unsigned long long sched_clock(void)
static void sa1100_power_off(void)
{
mdelay(100);
- local_irq_disable();
+ raw_local_irq_disable();
/* disable internal oscillator, float CS lines */
PCFR = (PCFR_OPDE | PCFR_FP | PCFR_FS);
/* enable wake-up on GPIO0 (Assabet...) */
@@ -411,7 +411,7 @@ void __init sa1110_mb_disable(void)
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
PGSR &= ~GPIO_MBGNT;
GPCR = GPIO_MBGNT;
@@ -419,7 +419,7 @@ void __init sa1110_mb_disable(void)
GAFR &= ~(GPIO_MBGNT | GPIO_MBREQ);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
/*
@@ -430,7 +430,7 @@ void __init sa1110_mb_enable(void)
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
PGSR &= ~GPIO_MBGNT;
GPCR = GPIO_MBGNT;
@@ -439,6 +439,6 @@ void __init sa1110_mb_enable(void)
GAFR |= (GPIO_MBGNT | GPIO_MBREQ);
TUCR |= TUCR_MR;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
Index: linux.prev/arch/arm/mach-sa1100/h3600.c
===================================================================
--- linux.prev.orig/arch/arm/mach-sa1100/h3600.c
+++ linux.prev/arch/arm/mach-sa1100/h3600.c
@@ -331,7 +331,7 @@ static void h3100_control_egpio(enum ipa
}
if (egpio || gpio) {
- local_irq_save(flags);
+ raw_local_irq_save(flags);
if (setp) {
h3100_egpio |= egpio;
GPSR = gpio;
@@ -340,7 +340,7 @@ static void h3100_control_egpio(enum ipa
GPCR = gpio;
}
H3100_EGPIO = h3100_egpio;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
}
@@ -463,13 +463,13 @@ static void h3600_control_egpio(enum ipa
}
if (egpio) {
- local_irq_save(flags);
+ raw_local_irq_save(flags);
if (setp)
h3600_egpio |= egpio;
else
h3600_egpio &= ~egpio;
H3600_EGPIO = h3600_egpio;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
}
@@ -800,6 +800,8 @@ static void h3800_unmask_gpio_irq(unsign
H3800_ASIC2_GPIINTSTAT |= mask;
}
+static DEFINE_IRQ_CHAINED_TYPE(h3800_IRQ_demux);
+
static void __init h3800_init_irq(void)
{
int i;
@@ -838,7 +840,7 @@ static void __init h3800_init_irq(void)
}
#endif
set_irq_type(IRQ_GPIO_H3800_ASIC, IRQT_RISING);
- set_irq_chained_handler(IRQ_GPIO_H3800_ASIC, &h3800_IRQ_demux);
+ set_irq_chained_handler(IRQ_GPIO_H3800_ASIC, h3800_IRQ_demux);
}
Index: linux.prev/arch/arm/mach-sa1100/irq.c
===================================================================
--- linux.prev.orig/arch/arm/mach-sa1100/irq.c
+++ linux.prev/arch/arm/mach-sa1100/irq.c
@@ -11,12 +11,13 @@
*/
#include
#include
+#include
+#include
#include
#include
#include
#include
-#include
#include
#include "generic.h"
@@ -281,6 +282,8 @@ static int __init sa1100irq_init_devicef
return sysdev_register(&sa1100irq_device);
}
+static DEFINE_IRQ_CHAINED_TYPE(sa1100_high_gpio_handler);
+
device_initcall(sa1100irq_init_devicefs);
void __init sa1100_init_irq(void)
Index: linux.prev/arch/arm/mach-sa1100/leds-assabet.c
===================================================================
--- linux.prev.orig/arch/arm/mach-sa1100/leds-assabet.c
+++ linux.prev/arch/arm/mach-sa1100/leds-assabet.c
@@ -32,7 +32,7 @@ void assabet_leds_event(led_event_t evt)
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
switch (evt) {
case led_start:
@@ -111,5 +111,5 @@ void assabet_leds_event(led_event_t evt)
if (led_state & LED_STATE_ENABLED)
ASSABET_BCR_frob(ASSABET_BCR_LED_MASK, hw_led_state);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
Index: linux.prev/arch/arm/mach-sa1100/leds-badge4.c
===================================================================
--- linux.prev.orig/arch/arm/mach-sa1100/leds-badge4.c
+++ linux.prev/arch/arm/mach-sa1100/leds-badge4.c
@@ -36,7 +36,7 @@ void badge4_leds_event(led_event_t evt)
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
switch (evt) {
case led_start:
@@ -108,5 +108,5 @@ void badge4_leds_event(led_event_t evt)
GPCR = hw_led_state ^ LED_MASK;
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
Index: linux.prev/arch/arm/mach-sa1100/leds-cerf.c
===================================================================
--- linux.prev.orig/arch/arm/mach-sa1100/leds-cerf.c
+++ linux.prev/arch/arm/mach-sa1100/leds-cerf.c
@@ -29,7 +29,7 @@ void cerf_leds_event(led_event_t evt)
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
switch (evt) {
case led_start:
@@ -107,5 +107,5 @@ void cerf_leds_event(led_event_t evt)
GPCR = hw_led_state ^ LED_MASK;
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
Index: linux.prev/arch/arm/mach-sa1100/leds-hackkit.c
===================================================================
--- linux.prev.orig/arch/arm/mach-sa1100/leds-hackkit.c
+++ linux.prev/arch/arm/mach-sa1100/leds-hackkit.c
@@ -33,7 +33,7 @@ void hackkit_leds_event(led_event_t evt)
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
switch(evt) {
case led_start:
@@ -109,5 +109,5 @@ void hackkit_leds_event(led_event_t evt)
GPCR = hw_led_state ^ LED_MASK;
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
Index: linux.prev/arch/arm/mach-sa1100/leds-lart.c
===================================================================
--- linux.prev.orig/arch/arm/mach-sa1100/leds-lart.c
+++ linux.prev/arch/arm/mach-sa1100/leds-lart.c
@@ -32,7 +32,7 @@ void lart_leds_event(led_event_t evt)
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
switch(evt) {
case led_start:
@@ -98,5 +98,5 @@ void lart_leds_event(led_event_t evt)
GPCR = hw_led_state ^ LED_MASK;
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
Index: linux.prev/arch/arm/mach-sa1100/neponset.c
===================================================================
--- linux.prev.orig/arch/arm/mach-sa1100/neponset.c
+++ linux.prev/arch/arm/mach-sa1100/neponset.c
@@ -137,6 +137,8 @@ static struct sa1100_port_fns neponset_p
.get_mctrl = neponset_get_mctrl,
};
+static DEFINE_IRQ_CHAINED_TYPE(neponset_irq_handler);
+
static int neponset_probe(struct platform_device *dev)
{
sa1100_register_uart_fns(&neponset_port_fns);
Index: linux.prev/arch/arm/mach-sa1100/pleb.c
===================================================================
--- linux.prev.orig/arch/arm/mach-sa1100/pleb.c
+++ linux.prev/arch/arm/mach-sa1100/pleb.c
@@ -7,6 +7,7 @@
#include
#include
#include
+#include
#include
Index: linux.prev/arch/arm/mach-sa1100/simpad.c
===================================================================
--- linux.prev.orig/arch/arm/mach-sa1100/simpad.c
+++ linux.prev/arch/arm/mach-sa1100/simpad.c
@@ -174,7 +174,7 @@ static void __init simpad_map_io(void)
static void simpad_power_off(void)
{
- local_irq_disable(); // was cli
+ raw_local_irq_disable(); // was cli
set_cs3(0x800); /* only SD_MEDIAQ */
/* disable internal oscillator, float CS lines */
@@ -191,7 +191,7 @@ static void simpad_power_off(void)
PMCR = PMCR_SF;
while(1);
- local_irq_enable(); /* we won't ever call it */
+ raw_local_irq_enable(); /* we won't ever call it */
}
Index: linux.prev/arch/arm/mach-sa1100/time.c
===================================================================
--- linux.prev.orig/arch/arm/mach-sa1100/time.c
+++ linux.prev/arch/arm/mach-sa1100/time.c
@@ -11,6 +11,7 @@
#include
#include
#include
+#include
#include
#include
Index: linux.prev/arch/arm/mach-shark/core.c
===================================================================
--- linux.prev.orig/arch/arm/mach-shark/core.c
+++ linux.prev/arch/arm/mach-shark/core.c
@@ -6,6 +6,7 @@
#include
#include
#include
+#include
#include
#include
Index: linux.prev/arch/arm/mach-shark/leds.c
===================================================================
--- linux.prev.orig/arch/arm/mach-shark/leds.c
+++ linux.prev/arch/arm/mach-shark/leds.c
@@ -33,7 +33,7 @@ static char led_state;
static short hw_led_state;
static short saved_state;
-static DEFINE_SPINLOCK(leds_lock);
+static DEFINE_RAW_SPINLOCK(leds_lock);
short sequoia_read(int addr) {
outw(addr,0x24);
Index: linux.prev/arch/arm/mach-versatile/core.c
===================================================================
--- linux.prev.orig/arch/arm/mach-versatile/core.c
+++ linux.prev/arch/arm/mach-versatile/core.c
@@ -113,6 +113,8 @@ sic_handle_irq(unsigned int irq, struct
} while (status);
}
+static DEFINE_IRQ_CHAINED_TYPE(sic_handle_irq);
+
#if 1
#define IRQ_MMCI0A IRQ_VICSOURCE22
#define IRQ_AACI IRQ_VICSOURCE24
@@ -162,7 +164,7 @@ void __init versatile_init_irq(void)
}
}
- set_irq_handler(IRQ_VICSOURCE31, sic_handle_irq);
+ set_irq_chained_handler(IRQ_VICSOURCE31, sic_handle_irq);
vic_unmask_irq(IRQ_VICSOURCE31);
/* Do second interrupt controller */
@@ -785,7 +787,7 @@ static void versatile_leds_event(led_eve
unsigned long flags;
u32 val;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
val = readl(VA_LEDS_BASE);
switch (ledevt) {
@@ -810,7 +812,7 @@ static void versatile_leds_event(led_eve
}
writel(val, VA_LEDS_BASE);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
#endif /* CONFIG_LEDS */
Index: linux.prev/arch/arm/mm/consistent.c
===================================================================
--- linux.prev.orig/arch/arm/mm/consistent.c
+++ linux.prev/arch/arm/mm/consistent.c
@@ -30,7 +30,7 @@
* This is the page table (2MB) covering uncached, DMA consistent allocations
*/
static pte_t *consistent_pte;
-static DEFINE_SPINLOCK(consistent_lock);
+static DEFINE_RAW_SPINLOCK(consistent_lock);
/*
* VM region handling support.
Index: linux.prev/arch/arm/mm/copypage-v4mc.c
===================================================================
--- linux.prev.orig/arch/arm/mm/copypage-v4mc.c
+++ linux.prev/arch/arm/mm/copypage-v4mc.c
@@ -29,7 +29,7 @@
#define TOP_PTE(x) pte_offset_kernel(top_pmd, x)
-static DEFINE_SPINLOCK(minicache_lock);
+static DEFINE_RAW_SPINLOCK(minicache_lock);
/*
* ARMv4 mini-dcache optimised copy_user_page
@@ -43,7 +43,7 @@ static DEFINE_SPINLOCK(minicache_lock);
* instruction. If your processor does not supply this, you have to write your
* own copy_user_page that does the right thing.
*/
-static void __attribute__((naked))
+static void notrace __attribute__((naked))
mc_copy_user_page(void *from, void *to)
{
asm volatile(
@@ -82,7 +82,7 @@ void v4_mc_copy_user_page(void *kto, con
/*
* ARMv4 optimised clear_user_page
*/
-void __attribute__((naked))
+void notrace __attribute__((naked))
v4_mc_clear_user_page(void *kaddr, unsigned long vaddr)
{
asm volatile(
Index: linux.prev/arch/arm/mm/copypage-v6.c
===================================================================
--- linux.prev.orig/arch/arm/mm/copypage-v6.c
+++ linux.prev/arch/arm/mm/copypage-v6.c
@@ -26,7 +26,7 @@
#define TOP_PTE(x) pte_offset_kernel(top_pmd, x)
-static DEFINE_SPINLOCK(v6_lock);
+static DEFINE_RAW_SPINLOCK(v6_lock);
/*
* Copy the user page. No aliasing to deal with so we can just
Index: linux.prev/arch/arm/mm/copypage-xscale.c
===================================================================
--- linux.prev.orig/arch/arm/mm/copypage-xscale.c
+++ linux.prev/arch/arm/mm/copypage-xscale.c
@@ -31,7 +31,7 @@
#define TOP_PTE(x) pte_offset_kernel(top_pmd, x)
-static DEFINE_SPINLOCK(minicache_lock);
+static DEFINE_RAW_SPINLOCK(minicache_lock);
/*
* XScale mini-dcache optimised copy_user_page
@@ -41,7 +41,7 @@ static DEFINE_SPINLOCK(minicache_lock);
* Dcache aliasing issue. The writes will be forwarded to the write buffer,
* and merged as appropriate.
*/
-static void __attribute__((naked))
+static void notrace __attribute__((naked))
mc_copy_user_page(void *from, void *to)
{
/*
@@ -104,7 +104,7 @@ void xscale_mc_copy_user_page(void *kto,
/*
* XScale optimised clear_user_page
*/
-void __attribute__((naked))
+void notrace __attribute__((naked))
xscale_mc_clear_user_page(void *kaddr, unsigned long vaddr)
{
asm volatile(
Index: linux.prev/arch/arm/mm/fault-armv.c
===================================================================
--- linux.prev.orig/arch/arm/mm/fault-armv.c
+++ linux.prev/arch/arm/mm/fault-armv.c
@@ -166,7 +166,7 @@ static int __init check_writebuffer(unsi
{
register unsigned long zero = 0, one = 1, val;
- local_irq_disable();
+ raw_local_irq_disable();
mb();
*p1 = one;
mb();
@@ -174,7 +174,7 @@ static int __init check_writebuffer(unsi
mb();
val = *p1;
mb();
- local_irq_enable();
+ raw_local_irq_enable();
return val != zero;
}
Index: linux.prev/arch/arm/mm/fault.c
===================================================================
--- linux.prev.orig/arch/arm/mm/fault.c
+++ linux.prev/arch/arm/mm/fault.c
@@ -216,7 +216,7 @@ out:
return fault;
}
-static int
+static notrace int
do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
struct task_struct *tsk;
@@ -316,7 +316,7 @@ no_context:
* interrupt or a critical region, and should only copy the information
* from the master page table, nothing more.
*/
-static int
+static notrace int
do_translation_fault(unsigned long addr, unsigned int fsr,
struct pt_regs *regs)
{
@@ -362,7 +362,7 @@ bad_area:
* Some section permission faults need to be handled gracefully.
* They can happen due to a __{get,put}_user during an oops.
*/
-static int
+static notrace int
do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
struct task_struct *tsk = current;
@@ -373,7 +373,7 @@ do_sect_fault(unsigned long addr, unsign
/*
* This abort handler always returns "fault".
*/
-static int
+static notrace int
do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
return 1;
@@ -428,7 +428,7 @@ static struct fsr_info {
{ do_bad, SIGBUS, 0, "unknown 31" }
};
-void __init
+void __init notrace
hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *),
int sig, const char *name)
{
@@ -442,7 +442,7 @@ hook_fault_code(int nr, int (*fn)(unsign
/*
* Dispatch a data abort to the relevant handler.
*/
-asmlinkage void
+asmlinkage notrace void
do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
const struct fsr_info *inf = fsr_info + (fsr & 15) + ((fsr & (1 << 10)) >> 6);
@@ -461,7 +461,7 @@ do_DataAbort(unsigned long addr, unsigne
notify_die("", regs, &info, fsr, 0);
}
-asmlinkage void
+asmlinkage notrace void
do_PrefetchAbort(unsigned long addr, struct pt_regs *regs)
{
do_translation_fault(addr, 0, regs);
Index: linux.prev/arch/arm/mm/init.c
===================================================================
--- linux.prev.orig/arch/arm/mm/init.c
+++ linux.prev/arch/arm/mm/init.c
@@ -28,7 +28,7 @@
#define TABLE_SIZE (2 * PTRS_PER_PTE * sizeof(pte_t))
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers);
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
extern void _stext, _text, _etext, __data_start, _end, __init_begin, __init_end;
Index: linux.prev/arch/arm/plat-omap/clock.c
===================================================================
--- linux.prev.orig/arch/arm/plat-omap/clock.c
+++ linux.prev/arch/arm/plat-omap/clock.c
@@ -28,7 +28,7 @@
LIST_HEAD(clocks);
static DECLARE_MUTEX(clocks_sem);
-DEFINE_SPINLOCK(clockfw_lock);
+DEFINE_RAW_SPINLOCK(clockfw_lock);
static struct clk_functions *arch_clock;
Index: linux.prev/arch/arm/plat-omap/dma.c
===================================================================
--- linux.prev.orig/arch/arm/plat-omap/dma.c
+++ linux.prev/arch/arm/plat-omap/dma.c
@@ -557,7 +557,7 @@ void omap_clear_dma(int lch)
{
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
if (cpu_class_is_omap1()) {
int status;
@@ -574,7 +574,7 @@ void omap_clear_dma(int lch)
omap_writel(0, lch_base + i);
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
void omap_start_dma(int lch)
@@ -903,7 +903,7 @@ static struct irqaction omap24xx_dma_irq
/*----------------------------------------------------------------------------*/
static struct lcd_dma_info {
- spinlock_t lock;
+ raw_spinlock_t lock;
int reserved;
void (* callback)(u16 status, void *data);
void *cb_data;
Index: linux.prev/arch/arm/plat-omap/gpio.c
===================================================================
--- linux.prev.orig/arch/arm/plat-omap/gpio.c
+++ linux.prev/arch/arm/plat-omap/gpio.c
@@ -121,7 +121,7 @@ struct gpio_bank {
u32 reserved_map;
u32 suspend_wakeup;
u32 saved_wakeup;
- spinlock_t lock;
+ raw_spinlock_t lock;
};
#define METHOD_MPUIO 0
@@ -736,7 +736,7 @@ static void gpio_irq_handler(unsigned in
desc->chip->ack(irq);
- bank = (struct gpio_bank *) desc->data;
+ bank = (struct gpio_bank *) desc->handler_data;
if (bank->method == METHOD_MPUIO)
isr_reg = bank->base + OMAP_MPUIO_GPIO_INT;
#ifdef CONFIG_ARCH_OMAP15XX
@@ -837,6 +837,8 @@ static struct irqchip mpuio_irq_chip = {
.unmask = mpuio_unmask_irq
};
+static DEFINE_IRQ_CHAINED_TYPE(gpio_irq_handler);
+
static int initialized;
static struct clk * gpio_ick;
static struct clk * gpio_fck;
Index: linux.prev/arch/arm/plat-omap/mux.c
===================================================================
--- linux.prev.orig/arch/arm/plat-omap/mux.c
+++ linux.prev/arch/arm/plat-omap/mux.c
@@ -57,7 +57,7 @@ int __init omap_mux_register(struct pin_
*/
int __init_or_module omap_cfg_reg(const unsigned long index)
{
- static DEFINE_SPINLOCK(mux_spin_lock);
+ static DEFINE_RAW_SPINLOCK(mux_spin_lock);
unsigned long flags;
struct pin_config *cfg;
Index: linux.prev/arch/arm/plat-omap/pm.c
===================================================================
--- linux.prev.orig/arch/arm/plat-omap/pm.c
+++ linux.prev/arch/arm/plat-omap/pm.c
@@ -82,11 +82,11 @@ void omap_pm_idle(void)
* seconds for wait for interrupt.
*/
- local_irq_disable();
+ raw_local_irq_disable();
local_fiq_disable();
if (need_resched()) {
local_fiq_enable();
- local_irq_enable();
+ raw_local_irq_enable();
return;
}
mask32 = omap_readl(ARM_SYSST);
@@ -111,7 +111,7 @@ void omap_pm_idle(void)
omap_sram_idle();
local_fiq_enable();
- local_irq_enable();
+ raw_local_irq_enable();
}
/*
@@ -182,7 +182,7 @@ void omap_pm_suspend(void)
* Step 1: turn off interrupts (FIXME: NOTE: already disabled)
*/
- local_irq_disable();
+ raw_local_irq_disable();
local_fiq_disable();
/*
@@ -335,7 +335,7 @@ void omap_pm_suspend(void)
* Reenable interrupts
*/
- local_irq_enable();
+ raw_local_irq_enable();
local_fiq_enable();
omap_serial_wake_trigger(0);
Index: linux.prev/arch/arm26/boot/compressed/misc.c
===================================================================
--- linux.prev.orig/arch/arm26/boot/compressed/misc.c
+++ linux.prev/arch/arm26/boot/compressed/misc.c
@@ -184,6 +184,7 @@ static ulg free_mem_ptr_end;
#define HEAP_SIZE 0x2000
+#define ZLIB_INFLATE_NO_INFLATE_LOCK
#include "../../../../lib/inflate.c"
#ifndef STANDALONE_DEBUG
Index: linux.prev/arch/i386/Kconfig
===================================================================
--- linux.prev.orig/arch/i386/Kconfig
+++ linux.prev/arch/i386/Kconfig
@@ -14,6 +14,10 @@ config X86_32
486, 586, Pentiums, and various instruction-set-compatible chips by
AMD, Cyrix, and others.
+config GENERIC_TIME
+ bool
+ default y
+
config SEMAPHORE_SLEEPERS
bool
default y
@@ -173,6 +177,8 @@ config HPET_EMULATE_RTC
depends on HPET_TIMER && RTC=y
default y
+source "kernel/time/Kconfig"
+
config SMP
bool "Symmetric multi-processing support"
---help---
@@ -228,6 +234,19 @@ config SCHED_SMT
source "kernel/Kconfig.preempt"
+config RWSEM_GENERIC_SPINLOCK
+ bool
+ depends on M386 || PREEMPT_RT
+ default y
+
+config ASM_SEMAPHORES
+ bool
+ default y
+
+config RWSEM_XCHGADD_ALGORITHM
+ bool
+ default y if !RWSEM_GENERIC_SPINLOCK
+
config X86_UP_APIC
bool "Local APIC support on uniprocessors"
depends on !SMP && !(X86_VISWS || X86_VOYAGER)
@@ -619,7 +638,7 @@ config BOOT_IOREMAP
config REGPARM
bool "Use register arguments (EXPERIMENTAL)"
- depends on EXPERIMENTAL
+ depends on EXPERIMENTAL && !MCOUNT
default n
help
Compile the kernel with -mregparm=3. This uses a different ABI
@@ -1055,3 +1074,7 @@ config X86_TRAMPOLINE
bool
depends on X86_SMP || (X86_VOYAGER && SMP)
default y
+
+config KTIME_SCALAR
+ bool
+ default y
Index: linux.prev/arch/i386/Kconfig.cpu
===================================================================
--- linux.prev.orig/arch/i386/Kconfig.cpu
+++ linux.prev/arch/i386/Kconfig.cpu
@@ -229,11 +229,6 @@ config RWSEM_GENERIC_SPINLOCK
depends on M386
default y
-config RWSEM_XCHGADD_ALGORITHM
- bool
- depends on !M386
- default y
-
config GENERIC_CALIBRATE_DELAY
bool
default y
Index: linux.prev/arch/i386/Kconfig.debug
===================================================================
--- linux.prev.orig/arch/i386/Kconfig.debug
+++ linux.prev/arch/i386/Kconfig.debug
@@ -18,6 +18,7 @@ config EARLY_PRINTK
config DEBUG_STACKOVERFLOW
bool "Check for stack overflows"
depends on DEBUG_KERNEL
+ default y
help
This option will cause messages to be printed if free stack space
drops below a certain limit.
@@ -25,6 +26,7 @@ config DEBUG_STACKOVERFLOW
config DEBUG_STACK_USAGE
bool "Stack utilization instrumentation"
depends on DEBUG_KERNEL
+ default y
help
Enables the display of the minimum amount of free stack which each
task has ever had available in the sysrq-T and sysrq-P debug output.
Index: linux.prev/arch/i386/boot/compressed/misc.c
===================================================================
--- linux.prev.orig/arch/i386/boot/compressed/misc.c
+++ linux.prev/arch/i386/boot/compressed/misc.c
@@ -15,6 +15,12 @@
#include
#include
+#ifdef CONFIG_MCOUNT
+void notrace mcount(void)
+{
+}
+#endif
+
/*
* gzip declarations
*/
@@ -112,7 +118,7 @@ static long free_mem_end_ptr;
#define INPLACE_MOVE_ROUTINE 0x1000
#define LOW_BUFFER_START 0x2000
#define LOW_BUFFER_MAX 0x90000
-#define HEAP_SIZE 0x3000
+#define HEAP_SIZE 0x4000
static unsigned int low_buffer_end, low_buffer_size;
static int high_loaded =0;
static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/;
@@ -125,6 +131,7 @@ static int lines, cols;
static void * xquad_portio = NULL;
#endif
+#define ZLIB_INFLATE_NO_INFLATE_LOCK
#include "../../../../lib/inflate.c"
static void *malloc(int size)
Index: linux.prev/arch/i386/kernel/Makefile
===================================================================
--- linux.prev.orig/arch/i386/kernel/Makefile
+++ linux.prev/arch/i386/kernel/Makefile
@@ -4,13 +4,13 @@
extra-y := head.o init_task.o vmlinux.lds
-obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \
+obj-y := process.o signal.o entry.o traps.o irq.o vm86.o \
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
- doublefault.o quirks.o i8237.o
+ doublefault.o quirks.o i8237.o i8253.o tsc.o
+obj-$(CONFIG_ASM_SEMAPHORES) += semaphore.o
obj-y += cpu/
-obj-y += timers/
obj-$(CONFIG_ACPI) += acpi/
obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o
obj-$(CONFIG_MCA) += mca.o
@@ -20,6 +20,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_APM) += apm.o
obj-$(CONFIG_X86_SMP) += smp.o smpboot.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
+obj-$(CONFIG_MCOUNT) += mcount-wrapper.o
obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
@@ -34,6 +35,8 @@ obj-$(CONFIG_ACPI_SRAT) += srat.o
obj-$(CONFIG_HPET_TIMER) += time_hpet.o
obj-$(CONFIG_EFI) += efi.o efi_stub.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+obj-$(CONFIG_SYSFS) += switch2poll.o
+obj-$(CONFIG_HPET_TIMER) += hpet.o
EXTRA_AFLAGS := -traditional
Index: linux.prev/arch/i386/kernel/acpi/boot.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/acpi/boot.c
+++ linux.prev/arch/i386/kernel/acpi/boot.c
@@ -567,7 +567,7 @@ static int __init acpi_parse_sbf(unsigne
}
#ifdef CONFIG_HPET_TIMER
-
+#include
static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
{
struct acpi_table_hpet *hpet_tbl;
@@ -589,6 +589,7 @@ static int __init acpi_parse_hpet(unsign
#ifdef CONFIG_X86_64
vxtime.hpet_address = hpet_tbl->addr.addrl |
((long)hpet_tbl->addr.addrh << 32);
+ hpet_address = vxtime.hpet_address;
printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
hpet_tbl->id, vxtime.hpet_address);
@@ -597,10 +598,10 @@ static int __init acpi_parse_hpet(unsign
extern unsigned long hpet_address;
hpet_address = hpet_tbl->addr.addrl;
- printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
- hpet_tbl->id, hpet_address);
}
-#endif /* X86 */
+#endif /* X86 */
+ printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
+ hpet_tbl->id, hpet_address);
return 0;
}
@@ -608,9 +609,8 @@ static int __init acpi_parse_hpet(unsign
#define acpi_parse_hpet NULL
#endif
-#ifdef CONFIG_X86_PM_TIMER
-extern u32 pmtmr_ioport;
-#endif
+u32 acpi_pmtmr_ioport;
+int acpi_pmtmr_buggy;
static int __init acpi_parse_fadt(unsigned long phys, unsigned long size)
{
@@ -629,7 +629,6 @@ static int __init acpi_parse_fadt(unsign
acpi_fadt.force_apic_physical_destination_mode =
fadt->force_apic_physical_destination_mode;
-#ifdef CONFIG_X86_PM_TIMER
/* detect the location of the ACPI PM Timer */
if (fadt->revision >= FADT2_REVISION_ID) {
/* FADT rev. 2 */
@@ -637,22 +636,22 @@ static int __init acpi_parse_fadt(unsign
ACPI_ADR_SPACE_SYSTEM_IO)
return 0;
- pmtmr_ioport = fadt->xpm_tmr_blk.address;
+ acpi_pmtmr_ioport = fadt->xpm_tmr_blk.address;
/*
* "X" fields are optional extensions to the original V1.0
* fields, so we must selectively expand V1.0 fields if the
* corresponding X field is zero.
*/
- if (!pmtmr_ioport)
- pmtmr_ioport = fadt->V1_pm_tmr_blk;
+ if (!acpi_pmtmr_ioport)
+ acpi_pmtmr_ioport = fadt->V1_pm_tmr_blk;
} else {
/* FADT rev. 1 */
- pmtmr_ioport = fadt->V1_pm_tmr_blk;
+ acpi_pmtmr_ioport = fadt->V1_pm_tmr_blk;
}
- if (pmtmr_ioport)
- printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n",
- pmtmr_ioport);
-#endif
+
+ if (acpi_pmtmr_ioport)
+ printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", acpi_pmtmr_ioport);
+
return 0;
}
Index: linux.prev/arch/i386/kernel/apic.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/apic.c
+++ linux.prev/arch/i386/kernel/apic.c
@@ -26,6 +26,7 @@
#include
#include
#include
+#include
#include
#include
@@ -50,6 +51,23 @@ int enable_local_apic __initdata = 0; /*
*/
int apic_verbosity;
+static unsigned int calibration_result;
+
+static void lapic_next_event(unsigned long evt);
+static void lapic_timer_setup(int mode);
+
+static struct clock_event lapic_clockevent = {
+ .name = "lapic",
+ .capabilities = CLOCK_CAP_NEXTEVT | CLOCK_CAP_PROFILE |
+ CLOCK_HAS_IRQHANDLER
+#ifdef CONFIG_SMP
+ | CLOCK_CAP_UPDATE
+#endif
+ ,
+ .shift = 32,
+ .set_mode = lapic_timer_setup,
+ .set_next_event = lapic_next_event,
+};
static void apic_pm_activate(void);
@@ -92,10 +110,6 @@ void __init apic_intr_init(void)
/* Using APIC to generate smp_local_timer_interrupt? */
int using_apic_timer = 0;
-static DEFINE_PER_CPU(int, prof_multiplier) = 1;
-static DEFINE_PER_CPU(int, prof_old_multiplier) = 1;
-static DEFINE_PER_CPU(int, prof_counter) = 1;
-
static int enabled_via_apicbase;
void enable_NMI_through_LVT0 (void * dummy)
@@ -567,13 +581,13 @@ void lapic_shutdown(void)
if (!cpu_has_apic)
return;
- local_irq_disable();
+ raw_local_irq_disable();
clear_local_APIC();
if (enabled_via_apicbase)
disable_local_APIC();
- local_irq_enable();
+ raw_local_irq_enable();
}
#ifdef CONFIG_PM
@@ -617,9 +631,9 @@ static int lapic_suspend(struct sys_devi
apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
disable_local_APIC();
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return 0;
}
@@ -631,7 +645,7 @@ static int lapic_resume(struct sys_devic
if (!apic_pm_state.active)
return 0;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
/*
* Make sure the APICBASE points to the right address
@@ -662,7 +676,7 @@ static int lapic_resume(struct sys_devic
apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
apic_write(APIC_ESR, 0);
apic_read(APIC_ESR);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return 0;
}
@@ -875,6 +889,11 @@ fake_ioapic_page:
*/
/*
+ * FIXME: Move this to i8253.h. There is no need to keep the access to
+ * the PIT scattered all around the place -tglx
+ */
+
+/*
* The timer chip is already set up at HZ interrupts per second here,
* but we do not accept timer interrupts yet. We only allow the BP
* to calibrate.
@@ -932,12 +951,16 @@ void (*wait_timer_tick)(void) __devinitd
#define APIC_DIVISOR 16
-static void __setup_APIC_LVTT(unsigned int clocks)
+static void __setup_APIC_LVTT(unsigned int clocks, int oneshot)
{
unsigned int lvtt_value, tmp_value, ver;
ver = GET_APIC_VERSION(apic_read(APIC_LVR));
- lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
+
+ lvtt_value = LOCAL_TIMER_VECTOR;
+ if (!oneshot)
+ lvtt_value |= APIC_LVT_TIMER_PERIODIC;
+
if (!APIC_INTEGRATED(ver))
lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
apic_write_around(APIC_LVTT, lvtt_value);
@@ -950,23 +973,27 @@ static void __setup_APIC_LVTT(unsigned i
& ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
| APIC_TDR_DIV_16);
- apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
+ if (!oneshot)
+ apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
}
-static void __devinit setup_APIC_timer(unsigned int clocks)
+static void lapic_next_event(unsigned long evt)
{
- unsigned long flags;
-
- local_irq_save(flags);
+ apic_write_around(APIC_TMICT, evt);
+}
- /*
- * Wait for IRQ0's slice:
- */
- wait_timer_tick();
+static void lapic_timer_setup(int mode)
+{
+ unsigned long flags;
- __setup_APIC_LVTT(clocks);
+ raw_local_irq_save(flags);
+ __setup_APIC_LVTT(calibration_result, mode == CLOCK_EVT_ONESHOT);
+ raw_local_irq_restore(flags);
+}
- local_irq_restore(flags);
+static void __devinit setup_APIC_timer(void)
+{
+ setup_local_clockevent(&lapic_clockevent, CPU_MASK_NONE);
}
/*
@@ -975,6 +1002,8 @@ static void __devinit setup_APIC_timer(u
* to calibrate, since some later bootup code depends on getting
* the first irq? Ugh.
*
+ * TODO: Fix this rather than saying "Ugh" -tglx
+ *
* We want to do the calibration only once since we
* want to have local timer irqs syncron. CPUs connected
* by the same APIC bus have the very same bus frequency.
@@ -997,7 +1026,7 @@ static int __init calibrate_APIC_clock(v
* value into the APIC clock, we just want to get the
* counter running for calibration.
*/
- __setup_APIC_LVTT(1000000000);
+ __setup_APIC_LVTT(1000000000, 0);
/*
* The timer chip counts down to zero. Let's wait
@@ -1034,6 +1063,13 @@ static int __init calibrate_APIC_clock(v
result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
+ /* Calculate the scaled math multiplication factor */
+ lapic_clockevent.mult = div_sc32(tt1-tt2, TICK_NSEC * LOOPS);
+ lapic_clockevent.max_delta_ns =
+ clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
+ lapic_clockevent.min_delta_ns =
+ clockevent_delta2ns(0xF, &lapic_clockevent);
+
if (cpu_has_tsc)
apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
"%ld.%04ld MHz.\n",
@@ -1048,28 +1084,26 @@ static int __init calibrate_APIC_clock(v
return result;
}
-static unsigned int calibration_result;
-
void __init setup_boot_APIC_clock(void)
{
unsigned long flags;
apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n");
using_apic_timer = 1;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
calibration_result = calibrate_APIC_clock();
/*
* Now set up the timer for real.
*/
- setup_APIC_timer(calibration_result);
+ setup_APIC_timer();
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
void __devinit setup_secondary_APIC_clock(void)
{
- setup_APIC_timer(calibration_result);
+ setup_APIC_timer();
}
void __devinit disable_APIC_timer(void)
@@ -1092,6 +1126,8 @@ void enable_APIC_timer(void)
}
}
+static DEFINE_PER_CPU(int, prof_multiplier) = 1;
+
/*
* the frequency of the profiling timer can be changed
* by writing a multiplier value into /proc/profile.
@@ -1119,60 +1155,6 @@ int setup_profiling_timer(unsigned int m
return 0;
}
-
-#undef APIC_DIVISOR
-
-/*
- * Local timer interrupt handler. It does both profiling and
- * process statistics/rescheduling.
- *
- * We do profiling in every local tick, statistics/rescheduling
- * happen only every 'profiling multiplier' ticks. The default
- * multiplier is 1 and it can be changed by writing the new multiplier
- * value into /proc/profile.
- */
-
-inline void smp_local_timer_interrupt(struct pt_regs * regs)
-{
- int cpu = smp_processor_id();
-
- profile_tick(CPU_PROFILING, regs);
- if (--per_cpu(prof_counter, cpu) <= 0) {
- /*
- * The multiplier may have changed since the last time we got
- * to this point as a result of the user writing to
- * /proc/profile. In this case we need to adjust the APIC
- * timer accordingly.
- *
- * Interrupts are already masked off at this point.
- */
- per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu);
- if (per_cpu(prof_counter, cpu) !=
- per_cpu(prof_old_multiplier, cpu)) {
- __setup_APIC_LVTT(
- calibration_result/
- per_cpu(prof_counter, cpu));
- per_cpu(prof_old_multiplier, cpu) =
- per_cpu(prof_counter, cpu);
- }
-
-#ifdef CONFIG_SMP
- update_process_times(user_mode_vm(regs));
-#endif
- }
-
- /*
- * We take the 'long' return path, and there every subsystem
- * grabs the apropriate locks (kernel lock/ irq lock).
- *
- * we might want to decouple profiling from the 'long path',
- * and do the profiling totally in assembly.
- *
- * Currently this isn't too much of an issue (performance wise),
- * we can take more than 100K local irqs per second on a 100 MHz P5.
- */
-}
-
/*
* Local APIC timer interrupt. This is the most natural way for doing
* local interrupts, but local timer interrupts can be emulated by
@@ -1182,7 +1164,7 @@ inline void smp_local_timer_interrupt(st
* interrupt as well. Thus we cannot inline the local irq ... ]
*/
-fastcall void smp_apic_timer_interrupt(struct pt_regs *regs)
+fastcall notrace void smp_apic_timer_interrupt(struct pt_regs *regs)
{
int cpu = smp_processor_id();
@@ -1191,6 +1173,8 @@ fastcall void smp_apic_timer_interrupt(s
*/
per_cpu(irq_stat, cpu).apic_timer_irqs++;
+ trace_special(regs->eip, 0, 0);
+
/*
* NOTE! We'd better ACK the irq immediately,
* because timer handling can be slow.
@@ -1202,7 +1186,17 @@ fastcall void smp_apic_timer_interrupt(s
* interrupt lock, which is the WrongThing (tm) to do.
*/
irq_enter();
- smp_local_timer_interrupt(regs);
+ /*
+ * If the task is currently running in user mode, don't
+ * detect soft lockups. If CONFIG_DETECT_SOFTLOCKUP is not
+ * configured, this should be optimized out.
+ */
+ if (user_mode(regs))
+ touch_softlockup_watchdog();
+
+ if (lapic_clockevent.event_handler)
+ lapic_clockevent.event_handler(regs);
+
irq_exit();
}
@@ -1257,6 +1251,7 @@ fastcall void smp_error_interrupt(struct
*/
printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n",
smp_processor_id(), v , v1);
+ dump_stack();
irq_exit();
}
Index: linux.prev/arch/i386/kernel/apm.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/apm.c
+++ linux.prev/arch/i386/kernel/apm.c
@@ -552,9 +552,9 @@ static inline void apm_restore_cpus(cpum
*/
#define APM_DO_CLI \
if (apm_info.allow_ints) \
- local_irq_enable(); \
+ raw_local_irq_enable(); \
else \
- local_irq_disable();
+ raw_local_irq_disable();
#ifdef APM_ZERO_SEGS
# define APM_DECL_SEGS \
@@ -606,12 +606,12 @@ static u8 apm_bios_call(u32 func, u32 eb
save_desc_40 = gdt[0x40 / 8];
gdt[0x40 / 8] = bad_bios_desc;
- local_save_flags(flags);
+ raw_local_save_flags(flags);
APM_DO_CLI;
APM_DO_SAVE_SEGS;
apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi);
APM_DO_RESTORE_SEGS;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
gdt[0x40 / 8] = save_desc_40;
put_cpu();
apm_restore_cpus(cpus);
@@ -650,12 +650,12 @@ static u8 apm_bios_call_simple(u32 func,
save_desc_40 = gdt[0x40 / 8];
gdt[0x40 / 8] = bad_bios_desc;
- local_save_flags(flags);
+ raw_local_save_flags(flags);
APM_DO_CLI;
APM_DO_SAVE_SEGS;
error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax);
APM_DO_RESTORE_SEGS;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
gdt[0x40 / 8] = save_desc_40;
put_cpu();
apm_restore_cpus(cpus);
@@ -1215,7 +1215,7 @@ static int suspend(int vetoable)
}
device_suspend(PMSG_SUSPEND);
- local_irq_disable();
+ raw_local_irq_disable();
device_power_down(PMSG_SUSPEND);
/* serialize with the timer interrupt */
@@ -1231,14 +1231,14 @@ static int suspend(int vetoable)
*/
spin_unlock(&i8253_lock);
write_sequnlock(&xtime_lock);
- local_irq_enable();
+ raw_local_irq_enable();
save_processor_state();
err = set_system_power_state(APM_STATE_SUSPEND);
ignore_normal_resume = 1;
restore_processor_state();
- local_irq_disable();
+ raw_local_irq_disable();
write_seqlock(&xtime_lock);
spin_lock(&i8253_lock);
reinit_timer();
@@ -1253,7 +1253,7 @@ static int suspend(int vetoable)
apm_error("suspend", err);
err = (err == APM_SUCCESS) ? 0 : -EIO;
device_power_up();
- local_irq_enable();
+ raw_local_irq_enable();
device_resume();
pm_send_all(PM_RESUME, (void *)0);
queue_event(APM_NORMAL_RESUME, NULL);
@@ -1272,22 +1272,22 @@ static void standby(void)
{
int err;
- local_irq_disable();
+ raw_local_irq_disable();
device_power_down(PMSG_SUSPEND);
/* serialize with the timer interrupt */
write_seqlock(&xtime_lock);
/* If needed, notify drivers here */
get_time_diff();
write_sequnlock(&xtime_lock);
- local_irq_enable();
+ raw_local_irq_enable();
err = set_system_power_state(APM_STATE_STANDBY);
if ((err != APM_SUCCESS) && (err != APM_NO_ERROR))
apm_error("standby", err);
- local_irq_disable();
+ raw_local_irq_disable();
device_power_up();
- local_irq_enable();
+ raw_local_irq_enable();
}
static apm_event_t get_event(void)
Index: linux.prev/arch/i386/kernel/cpu/cpufreq/longhaul.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/cpu/cpufreq/longhaul.c
+++ linux.prev/arch/i386/kernel/cpu/cpufreq/longhaul.c
@@ -144,7 +144,7 @@ static void do_powersaver(union msr_long
longhaul->bits.RevisionKey = 0;
preempt_disable();
- local_irq_save(flags);
+ raw_local_irq_save(flags);
/*
* get current pci bus master state for all devices
@@ -166,11 +166,11 @@ static void do_powersaver(union msr_long
outb(0xFE,0x21); /* TMR0 only */
outb(0xFF,0x80); /* delay */
- safe_halt();
+ raw_safe_halt();
wrmsrl(MSR_VIA_LONGHAUL, longhaul->val);
halt();
- local_irq_disable();
+ raw_local_irq_disable();
outb(tmp_mask,0x21); /* restore mask */
@@ -184,7 +184,7 @@ static void do_powersaver(union msr_long
pci_write_config_byte(dev, PCI_COMMAND, pci_cmd);
}
} while (dev != NULL);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
preempt_enable();
/* disable bus ratio bit */
@@ -245,16 +245,16 @@ static void longhaul_setstate(unsigned i
/* Enable software clock multiplier */
bcr2.bits.ESOFTBF = 1;
bcr2.bits.CLOCKMUL = clock_ratio_index;
- local_irq_disable();
+ raw_local_irq_disable();
wrmsrl (MSR_VIA_BCR2, bcr2.val);
- safe_halt();
+ raw_safe_halt();
/* Disable software clock multiplier */
rdmsrl (MSR_VIA_BCR2, bcr2.val);
bcr2.bits.ESOFTBF = 0;
- local_irq_disable();
+ raw_local_irq_disable();
wrmsrl (MSR_VIA_BCR2, bcr2.val);
- local_irq_enable();
+ raw_local_irq_enable();
break;
/*
Index: linux.prev/arch/i386/kernel/cpu/mtrr/cyrix.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/cpu/mtrr/cyrix.c
+++ linux.prev/arch/i386/kernel/cpu/mtrr/cyrix.c
@@ -17,7 +17,7 @@ cyrix_get_arr(unsigned int reg, unsigned
arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */
/* Save flags and disable interrupts */
- local_irq_save(flags);
+ raw_local_irq_save(flags);
ccr3 = getCx86(CX86_CCR3);
setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
@@ -28,7 +28,7 @@ cyrix_get_arr(unsigned int reg, unsigned
setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
/* Enable interrupts if it was enabled previously */
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
shift = ((unsigned char *) base)[1] & 0x0f;
*base >>= PAGE_SHIFT;
Index: linux.prev/arch/i386/kernel/cpu/mtrr/generic.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/cpu/mtrr/generic.c
+++ linux.prev/arch/i386/kernel/cpu/mtrr/generic.c
@@ -234,7 +234,7 @@ static unsigned long set_mtrr_state(u32
static unsigned long cr4 = 0;
static u32 deftype_lo, deftype_hi;
-static DEFINE_SPINLOCK(set_atomicity_lock);
+static DEFINE_RAW_SPINLOCK(set_atomicity_lock);
/*
* Since we are disabling the cache don't allow any interrupts - they
@@ -296,14 +296,14 @@ static void generic_set_all(void)
unsigned long mask, count;
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
prepare_set();
/* Actually set the state */
mask = set_mtrr_state(deftype_lo,deftype_hi);
post_set();
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
/* Use the atomic bitops to update the global mask */
for (count = 0; count < sizeof mask * 8; ++count) {
@@ -331,7 +331,7 @@ static void generic_set_mtrr(unsigned in
vr = &mtrr_state.var_ranges[reg];
- local_irq_save(flags);
+ raw_local_irq_save(flags);
prepare_set();
if (size == 0) {
@@ -350,7 +350,7 @@ static void generic_set_mtrr(unsigned in
}
post_set();
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
Index: linux.prev/arch/i386/kernel/cpu/mtrr/main.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/cpu/mtrr/main.c
+++ linux.prev/arch/i386/kernel/cpu/mtrr/main.c
@@ -146,7 +146,7 @@ static void ipi_handler(void *info)
struct set_mtrr_data *data = info;
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
atomic_dec(&data->count);
while(!atomic_read(&data->gate))
@@ -164,7 +164,7 @@ static void ipi_handler(void *info)
cpu_relax();
atomic_dec(&data->count);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
#endif
@@ -225,7 +225,7 @@ static void set_mtrr(unsigned int reg, u
if (smp_call_function(ipi_handler, &data, 1, 0) != 0)
panic("mtrr: timed out waiting for other CPUs\n");
- local_irq_save(flags);
+ raw_local_irq_save(flags);
while(atomic_read(&data.count))
cpu_relax();
@@ -259,7 +259,7 @@ static void set_mtrr(unsigned int reg, u
while(atomic_read(&data.count))
cpu_relax();
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
/**
@@ -695,11 +695,11 @@ void mtrr_ap_init(void)
* 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to
* prevent mtrr entry changes
*/
- local_irq_save(flags);
+ raw_local_irq_save(flags);
mtrr_if->set_all();
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
static int __init mtrr_init_finialize(void)
Index: linux.prev/arch/i386/kernel/cpu/mtrr/state.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/cpu/mtrr/state.c
+++ linux.prev/arch/i386/kernel/cpu/mtrr/state.c
@@ -12,7 +12,7 @@ void set_mtrr_prepare_save(struct set_mt
unsigned int cr0;
/* Disable interrupts locally */
- local_irq_save(ctxt->flags);
+ raw_local_irq_save(ctxt->flags);
if (use_intel() || is_cpu(CYRIX)) {
@@ -73,6 +73,6 @@ void set_mtrr_done(struct set_mtrr_conte
write_cr4(ctxt->cr4val);
}
/* Re-enable interrupts locally (if enabled previously) */
- local_irq_restore(ctxt->flags);
+ raw_local_irq_restore(ctxt->flags);
}
Index: linux.prev/arch/i386/kernel/entry.S
===================================================================
--- linux.prev.orig/arch/i386/kernel/entry.S
+++ linux.prev/arch/i386/kernel/entry.S
@@ -76,10 +76,10 @@ NT_MASK = 0x00004000
VM_MASK = 0x00020000
#ifdef CONFIG_PREEMPT
-#define preempt_stop cli
+# define preempt_stop cli
#else
-#define preempt_stop
-#define resume_kernel restore_nocheck
+# define preempt_stop
+# define resume_kernel restore_nocheck
#endif
#define SAVE_ALL \
@@ -160,14 +160,17 @@ ENTRY(resume_userspace)
#ifdef CONFIG_PREEMPT
ENTRY(resume_kernel)
cli
+ cmpl $0, kernel_preemption
+ jz restore_nocheck
cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
jnz restore_nocheck
need_resched:
movl TI_flags(%ebp), %ecx # need_resched set ?
testb $_TIF_NEED_RESCHED, %cl
- jz restore_all
+ jz restore_nocheck
testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ?
- jz restore_all
+ jz restore_nocheck
+ cli
call preempt_schedule_irq
jmp need_resched
#endif
@@ -200,6 +203,11 @@ sysenter_past_esp:
pushl %eax
SAVE_ALL
+#ifdef CONFIG_LATENCY_TRACE
+ pushl %edx; pushl %ecx; pushl %ebx; pushl %eax
+ call sys_call
+ popl %eax; popl %ebx; popl %ecx; popl %edx
+#endif
GET_THREAD_INFO(%ebp)
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
@@ -213,6 +221,11 @@ sysenter_past_esp:
movl TI_flags(%ebp), %ecx
testw $_TIF_ALLWORK_MASK, %cx
jne syscall_exit_work
+#ifdef CONFIG_LATENCY_TRACE
+ pushl %eax
+ call sys_ret
+ popl %eax
+#endif
/* if something modifies registers it must also disable sysexit */
movl EIP(%esp), %edx
movl OLDESP(%esp), %ecx
@@ -225,6 +238,11 @@ sysenter_past_esp:
ENTRY(system_call)
pushl %eax # save orig_eax
SAVE_ALL
+#ifdef CONFIG_LATENCY_TRACE
+ pushl %edx; pushl %ecx; pushl %ebx; pushl %eax
+ call sys_call
+ popl %eax; popl %ebx; popl %ecx; popl %edx
+#endif
GET_THREAD_INFO(%ebp)
# system call tracing in operation / emulation
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
@@ -254,6 +272,17 @@ restore_all:
cmpl $((4 << 8) | 3), %eax
je ldt_ss # returning to user-space with LDT SS
restore_nocheck:
+#if defined(CONFIG_CRITICAL_IRQSOFF_TIMING) || defined(CONFIG_LATENCY_TRACE)
+ pushl %eax
+#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING
+ call trace_irqs_on
+#endif
+#ifdef CONFIG_LATENCY_TRACE
+ call sys_ret
+#endif
+ popl %eax
+#endif
+restore_nocheck_nmi:
RESTORE_REGS
addl $4, %esp
1: iret
@@ -297,18 +326,19 @@ ldt_ss:
# perform work that needs to be done immediately before resumption
ALIGN
work_pending:
- testb $_TIF_NEED_RESCHED, %cl
+ testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED), %ecx
jz work_notifysig
work_resched:
- call schedule
- cli # make sure we don't miss an interrupt
+ cli
+ call __schedule
+ # make sure we don't miss an interrupt
# setting need_resched or sigpending
# between sampling and the iret
movl TI_flags(%ebp), %ecx
andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
# than syscall tracing?
jz restore_all
- testb $_TIF_NEED_RESCHED, %cl
+ testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED), %ecx
jnz work_resched
work_notifysig: # deal with pending signals and
@@ -351,6 +381,11 @@ syscall_trace_entry:
syscall_exit_work:
testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
jz work_pending
+#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING
+ pushl %eax
+ call trace_irqs_on
+ popl %eax
+#endif
sti # could let do_syscall_trace() call
# schedule() instead
movl %esp, %eax
@@ -412,9 +447,16 @@ ENTRY(irq_entries_start)
vector=vector+1
.endr
+#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING
+# define TRACE_IRQS_OFF call trace_irqs_off_lowlevel;
+#else
+# define TRACE_IRQS_OFF
+#endif
+
ALIGN
common_interrupt:
SAVE_ALL
+ TRACE_IRQS_OFF
movl %esp,%eax
call do_IRQ
jmp ret_from_intr
@@ -423,6 +465,7 @@ common_interrupt:
ENTRY(name) \
pushl $nr-256; \
SAVE_ALL \
+ TRACE_IRQS_OFF \
movl %esp,%eax; \
call smp_/**/name; \
jmp ret_from_intr;
@@ -552,7 +595,7 @@ nmi_stack_correct:
xorl %edx,%edx # zero error code
movl %esp,%eax # pt_regs pointer
call do_nmi
- jmp restore_all
+ jmp restore_nocheck_nmi
nmi_stack_fixup:
FIX_STACK(12,nmi_stack_correct, 1)
Index: linux.prev/arch/i386/kernel/hpet.c
===================================================================
--- /dev/null
+++ linux.prev/arch/i386/kernel/hpet.c
@@ -0,0 +1,69 @@
+#include
+#include
+#include
+#include
+
+#include
+#include
+
+#define HPET_MASK 0xFFFFFFFF
+#define HPET_SHIFT 22
+
+/* FSEC = 10^-15 NSEC = 10^-9 */
+#define FSEC_PER_NSEC 1000000
+
+static void *hpet_ptr;
+
+static cycle_t read_hpet(void)
+{
+ return (cycle_t)readl(hpet_ptr);
+}
+
+struct clocksource clocksource_hpet = {
+ .name = "hpet",
+ .rating = 250,
+ .read = read_hpet,
+ .mask = (cycle_t)HPET_MASK,
+ .mult = 0, /* set below */
+ .shift = HPET_SHIFT,
+ .is_continuous = 1,
+};
+
+static int __init init_hpet_clocksource(void)
+{
+ unsigned long hpet_period;
+ void __iomem* hpet_base;
+ u64 tmp;
+
+ if (!hpet_address)
+ return -ENODEV;
+
+ /* calculate the hpet address: */
+ hpet_base =
+ (void __iomem*)ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
+ hpet_ptr = hpet_base + HPET_COUNTER;
+
+ /* calculate the frequency: */
+ hpet_period = readl(hpet_base + HPET_PERIOD);
+
+ /*
+ * hpet period is in femto seconds per cycle
+ * so we need to convert this to ns/cyc units
+ * aproximated by mult/2^shift
+ *
+ * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift
+ * fsec/cyc * 1ns/1000000fsec * 2^shift = mult
+ * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult
+ * (fsec/cyc << shift)/1000000 = mult
+ * (hpet_period << shift)/FSEC_PER_NSEC = mult
+ */
+ tmp = (u64)hpet_period << HPET_SHIFT;
+ do_div(tmp, FSEC_PER_NSEC);
+ clocksource_hpet.mult = (u32)tmp;
+
+ register_clocksource(&clocksource_hpet);
+
+ return 0;
+}
+
+module_init(init_hpet_clocksource);
Index: linux.prev/arch/i386/kernel/i386_ksyms.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/i386_ksyms.c
+++ linux.prev/arch/i386/kernel/i386_ksyms.c
@@ -6,10 +6,12 @@
/* This is definitely a GPL-only symbol */
EXPORT_SYMBOL_GPL(cpu_gdt_table);
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_failed_interruptible);
-EXPORT_SYMBOL(__down_failed_trylock);
-EXPORT_SYMBOL(__up_wakeup);
+#ifdef CONFIG_ASM_SEMAPHORES
+EXPORT_SYMBOL(__compat_down_failed);
+EXPORT_SYMBOL(__compat_down_failed_interruptible);
+EXPORT_SYMBOL(__compat_down_failed_trylock);
+EXPORT_SYMBOL(__compat_up_wakeup);
+#endif
/* Networking helper routines. */
EXPORT_SYMBOL(csum_partial_copy_generic);
@@ -25,7 +27,7 @@ EXPORT_SYMBOL(__put_user_8);
EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(strstr);
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && defined(CONFIG_ASM_SEMAPHORES)
extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
EXPORT_SYMBOL(__write_lock_failed);
Index: linux.prev/arch/i386/kernel/i8253.c
===================================================================
--- /dev/null
+++ linux.prev/arch/i386/kernel/i8253.c
@@ -0,0 +1,137 @@
+/*
+ * i8253.c 8253/PIT functions
+ *
+ */
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+
+#include "io_ports.h"
+
+DEFINE_RAW_SPINLOCK(i8253_lock);
+EXPORT_SYMBOL(i8253_lock);
+
+static void init_pit_timer(int mode)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8253_lock, flags);
+
+ if (mode != CLOCK_EVT_ONESHOT) {
+ /* binary, mode 2, LSB/MSB, ch 0 */
+ outb_p(0x34, PIT_MODE);
+ udelay(10);
+ outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
+ outb(LATCH >> 8 , PIT_CH0); /* MSB */
+ } else {
+ /* One shot setup */
+ outb_p(0x38, PIT_MODE);
+ udelay(10);
+ }
+
+ spin_unlock_irqrestore(&i8253_lock, flags);
+}
+
+static void pit_next_event(unsigned long evt)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8253_lock, flags);
+ outb_p(evt & 0xff , PIT_CH0); /* LSB */
+ outb(evt >> 8 , PIT_CH0); /* MSB */
+ spin_unlock_irqrestore(&i8253_lock, flags);
+}
+
+static struct clock_event pit_clockevent = {
+ .name = "pit",
+ .capabilities = CLOCK_CAP_TICK
+#ifndef CONFIG_SMP
+ | CLOCK_CAP_NEXTEVT | CLOCK_CAP_PROFILE |
+ CLOCK_CAP_UPDATE
+#endif
+ ,
+ .set_mode = init_pit_timer,
+ .set_next_event = pit_next_event,
+ .start_event = io_apic_timer_ack,
+ .end_event = mca_timer_ack,
+ .shift = 32,
+ .irq = 0,
+};
+
+void setup_pit_timer(void)
+{
+ pit_clockevent.mult = div_sc32(CLOCK_TICK_RATE, NSEC_PER_SEC);
+ pit_clockevent.max_delta_ns =
+ clockevent_delta2ns(0x7FFF, &pit_clockevent);
+ pit_clockevent.min_delta_ns =
+ clockevent_delta2ns(0xF, &pit_clockevent);
+ setup_global_clockevent(&pit_clockevent, CPU_MASK_NONE);
+}
+
+/*
+ * Since the PIT overflows every tick, its not very useful
+ * to just read by itself. So use jiffies to emulate a free
+ * running counter:
+ */
+static cycle_t pit_read(void)
+{
+ unsigned long flags, seq;
+ int count;
+ u64 jifs;
+
+ do {
+ seq = read_seqbegin(&xtime_lock);
+
+ spin_lock_irqsave(&i8253_lock, flags);
+ outb_p(0x00, PIT_MODE); /* latch the count ASAP */
+ count = inb_p(PIT_CH0); /* read the latched count */
+ count |= inb_p(PIT_CH0) << 8;
+
+ /* VIA686a test code... reset the latch if count > max + 1 */
+ if (count > LATCH) {
+ outb_p(0x34, PIT_MODE);
+ outb_p(LATCH & 0xff, PIT_CH0);
+ outb(LATCH >> 8, PIT_CH0);
+ count = LATCH - 1;
+ }
+ spin_unlock_irqrestore(&i8253_lock, flags);
+
+ jifs = jiffies_64;
+ } while (read_seqretry(&xtime_lock, seq));
+
+ jifs -= INITIAL_JIFFIES;
+ count = (LATCH-1) - count;
+
+ return (cycle_t)(jifs * LATCH) + count;
+}
+
+static struct clocksource clocksource_pit = {
+ .name = "pit",
+ .rating = 110,
+ .read = pit_read,
+ .mask = (cycle_t)-1,
+ .mult = 0,
+ .shift = 20,
+};
+
+static int __init init_pit_clocksource(void)
+{
+ if (num_possible_cpus() > 4) /* PIT does not scale! */
+ return 0;
+
+ clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20);
+ register_clocksource(&clocksource_pit);
+
+ return 0;
+}
+module_init(init_pit_clocksource);
Index: linux.prev/arch/i386/kernel/i8259.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/i8259.c
+++ linux.prev/arch/i386/kernel/i8259.c
@@ -35,7 +35,7 @@
* moves to arch independent land
*/
-DEFINE_SPINLOCK(i8259A_lock);
+DEFINE_RAW_SPINLOCK(i8259A_lock);
static void end_8259A_irq (unsigned int irq)
{
@@ -366,7 +366,7 @@ static irqreturn_t math_error_irq(int cp
* New motherboards sometimes make IRQ 13 be a PCI interrupt,
* so allow interrupt sharing.
*/
-static struct irqaction fpu_irq = { math_error_irq, 0, CPU_MASK_NONE, "fpu", NULL, NULL };
+static struct irqaction fpu_irq = { math_error_irq, SA_NODELAY, CPU_MASK_NONE, "fpu", NULL, NULL };
void __init init_ISA_irqs (void)
{
@@ -422,12 +422,6 @@ void __init init_IRQ(void)
intr_init_hook();
/*
- * Set the clock to HZ Hz, we already have a valid
- * vector now:
- */
- setup_pit_timer();
-
- /*
* External FPU? Set up irq13 if so, for
* original braindamaged IBM FERR coupling.
*/
Index: linux.prev/arch/i386/kernel/init_task.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/init_task.c
+++ linux.prev/arch/i386/kernel/init_task.c
@@ -10,8 +10,8 @@
#include
#include
-static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
+static struct fs_struct init_fs = INIT_FS(init_fs);
+static struct files_struct init_files = INIT_FILES(init_files);
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
Index: linux.prev/arch/i386/kernel/io_apic.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/io_apic.c
+++ linux.prev/arch/i386/kernel/io_apic.c
@@ -49,7 +49,7 @@ atomic_t irq_mis_count;
/* Where if anywhere is the i8259 connect in external int mode */
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
-static DEFINE_SPINLOCK(ioapic_lock);
+static DEFINE_RAW_SPINLOCK(ioapic_lock);
/*
* Is the SiS APIC rmw bug present ?
@@ -90,6 +90,27 @@ int vector_irq[NR_VECTORS] __read_mostly
#define vector_to_irq(vector) (vector)
#endif
+static int timer_ack;
+
+void io_apic_timer_ack(void *priv)
+{
+ unsigned long flags;
+
+ if (timer_ack) {
+ /*
+ * Subtle, when I/O APICs are used we have to ack timer IRQ
+ * manually to reset the IRR bit for do_slow_gettimeoffset().
+ * This will also deassert NMI lines for the watchdog if run
+ * on an 82489DX-based system.
+ */
+ spin_lock_irqsave(&i8259A_lock, flags);
+ outb(0x0c, PIC_MASTER_OCW3);
+ /* Ack the IRQ; AEOI will end it automatically. */
+ inb(PIC_MASTER_POLL);
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+ }
+}
+
/*
* The common case is 1:1 IRQ<->pin mappings. Sometimes there are
* shared ISA-space IRQs, so we have to support them. We are super
@@ -133,6 +154,105 @@ static void __init replace_pin_at_irq(un
}
}
+//#define IOAPIC_CACHE
+
+#ifdef IOAPIC_CACHE
+# define MAX_IOAPIC_CACHE 512
+
+/*
+ * Cache register values:
+ */
+static unsigned int io_apic_cache[MAX_IO_APICS][MAX_IOAPIC_CACHE]
+ ____cacheline_aligned_in_smp;
+#endif
+
+inline unsigned int __raw_io_apic_read(unsigned int apic, unsigned int reg)
+{
+ *IO_APIC_BASE(apic) = reg;
+ return *(IO_APIC_BASE(apic)+4);
+}
+
+unsigned int raw_io_apic_read(unsigned int apic, unsigned int reg)
+{
+ unsigned int val = __raw_io_apic_read(apic, reg);
+
+#ifdef IOAPIC_CACHE
+ io_apic_cache[apic][reg] = val;
+#endif
+ return val;
+}
+
+unsigned int io_apic_read(unsigned int apic, unsigned int reg)
+{
+#ifdef IOAPIC_CACHE
+ if (unlikely(reg >= MAX_IOAPIC_CACHE)) {
+ static int once = 1;
+
+ if (once) {
+ once = 0;
+ printk("WARNING: ioapic register cache overflow: %d.\n",
+ reg);
+ dump_stack();
+ }
+ return __raw_io_apic_read(apic, reg);
+ }
+ if (io_apic_cache[apic][reg] && !sis_apic_bug)
+ return io_apic_cache[apic][reg];
+#endif
+ return raw_io_apic_read(apic, reg);
+}
+
+void io_apic_write(unsigned int apic, unsigned int reg, unsigned int val)
+{
+#ifdef IOAPIC_CACHE
+ if (unlikely(reg >= MAX_IOAPIC_CACHE)) {
+ static int once = 1;
+
+ if (once) {
+ once = 0;
+ printk("WARNING: ioapic register cache overflow: %d.\n",
+ reg);
+ dump_stack();
+ }
+ } else
+ io_apic_cache[apic][reg] = val;
+#endif
+ *IO_APIC_BASE(apic) = reg;
+ *(IO_APIC_BASE(apic)+4) = val;
+}
+
+/*
+ * Some systems need a POST flush or else level-triggered interrupts
+ * generate lots of spurious interrupts due to the POST-ed write not
+ * reaching the IOAPIC before the IRQ is ACK-ed in the local APIC.
+ */
+#ifdef CONFIG_SMP
+# define IOAPIC_POSTFLUSH
+#endif
+
+/*
+ * Re-write a value: to be used for read-modify-write
+ * cycles where the read already set up the index register.
+ *
+ * Older SiS APIC requires we rewrite the index regiser
+ */
+void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val)
+{
+#ifdef IOAPIC_CACHE
+ io_apic_cache[apic][reg] = val;
+#endif
+ if (unlikely(sis_apic_bug))
+ *IO_APIC_BASE(apic) = reg;
+ *(IO_APIC_BASE(apic)+4) = val;
+#ifndef IOAPIC_POSTFLUSH
+ if (unlikely(sis_apic_bug))
+#endif
+ /*
+ * Force POST flush by reading:
+ */
+ val = *(IO_APIC_BASE(apic)+4);
+}
+
static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable)
{
struct irq_pin_list *entry = irq_2_pin + irq;
@@ -164,18 +284,6 @@ static void __unmask_IO_APIC_irq (unsign
__modify_IO_APIC_irq(irq, 0, 0x00010000);
}
-/* mask = 1, trigger = 0 */
-static void __mask_and_edge_IO_APIC_irq (unsigned int irq)
-{
- __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
-}
-
-/* mask = 0, trigger = 1 */
-static void __unmask_and_level_IO_APIC_irq (unsigned int irq)
-{
- __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
-}
-
static void mask_IO_APIC_irq (unsigned int irq)
{
unsigned long flags;
@@ -1430,8 +1538,8 @@ void __init print_IO_APIC(void)
struct IO_APIC_route_entry entry;
spin_lock_irqsave(&ioapic_lock, flags);
- *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
- *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
+ *(((int *)&entry)+0) = raw_io_apic_read(apic, 0x10+i*2);
+ *(((int *)&entry)+1) = raw_io_apic_read(apic, 0x11+i*2);
spin_unlock_irqrestore(&ioapic_lock, flags);
printk(KERN_DEBUG " %02x %03X %02X ",
@@ -1477,7 +1585,7 @@ void __init print_IO_APIC(void)
return;
}
-#if 0
+#if 1
static void print_APIC_bitfield (int base)
{
@@ -1866,7 +1974,7 @@ static int __init timer_irq_works(void)
{
unsigned long t1 = jiffies;
- local_irq_enable();
+ raw_local_irq_enable();
/* Let ten ticks pass... */
mdelay((10 * 1000) / HZ);
@@ -1877,7 +1985,7 @@ static int __init timer_irq_works(void)
* might have cached one ExtINT interrupt. Finally, at
* least one tick may be lost due to delays.
*/
- if (jiffies - t1 > 4)
+ if (jiffies - t1 > 4 && jiffies - t1 < 16)
return 1;
return 0;
@@ -1930,9 +2038,11 @@ static unsigned int startup_edge_ioapic_
static void ack_edge_ioapic_irq(unsigned int irq)
{
move_irq(irq);
+#if 0
if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
== (IRQ_PENDING | IRQ_DISABLED))
mask_IO_APIC_irq(irq);
+#endif
ack_APIC_irq();
}
@@ -1957,6 +2067,30 @@ static unsigned int startup_level_ioapic
return 0; /* don't check for pending */
}
+#ifdef CONFIG_PREEMPT_HARDIRQS
+
+/*
+ * in the PREEMPT_HARDIRQS case we dont want to keep the local
+ * APIC unacked, because the prevents further interrupts from
+ * being handled - and with IRQ threads being delayed arbitrarily,
+ * that's unacceptable. So we first mask the IRQ, then ack it.
+ * The hardirq thread will then unmask it.
+ */
+static void mask_and_ack_level_ioapic_irq(unsigned int irq)
+{
+ move_irq(irq);
+ mask_IO_APIC_irq(irq);
+ ack_APIC_irq();
+}
+
+#else
+
+static void mask_and_ack_level_ioapic_irq(unsigned int irq)
+{
+}
+
+#endif
+
static void end_level_ioapic_irq (unsigned int irq)
{
unsigned long v;
@@ -1991,8 +2125,10 @@ static void end_level_ioapic_irq (unsign
if (!(v & (1 << (i & 0x1f)))) {
atomic_inc(&irq_mis_count);
spin_lock(&ioapic_lock);
- __mask_and_edge_IO_APIC_irq(irq);
- __unmask_and_level_IO_APIC_irq(irq);
+ /* mask = 1, trigger = 0 */
+ __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
+ /* mask = 0, trigger = 1 */
+ __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
spin_unlock(&ioapic_lock);
}
}
@@ -2020,6 +2156,13 @@ static unsigned int startup_level_ioapic
return startup_level_ioapic_irq (irq);
}
+static void mask_and_ack_level_ioapic_vector (unsigned int vector)
+{
+ int irq = vector_to_irq(vector);
+
+ mask_and_ack_level_ioapic_irq(irq);
+}
+
static void end_level_ioapic_vector (unsigned int vector)
{
int irq = vector_to_irq(vector);
Index: linux.prev/arch/i386/kernel/irq.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/irq.c
+++ linux.prev/arch/i386/kernel/irq.c
@@ -51,7 +51,7 @@ static union irq_ctx *softirq_ctx[NR_CPU
* SMP cross-CPU interrupts have their own specific
* handlers).
*/
-fastcall unsigned int do_IRQ(struct pt_regs *regs)
+fastcall notrace unsigned int do_IRQ(struct pt_regs *regs)
{
/* high bits used in ret_from_ code */
int irq = regs->orig_eax & 0xff;
@@ -59,8 +59,12 @@ fastcall unsigned int do_IRQ(struct pt_r
union irq_ctx *curctx, *irqctx;
u32 *isp;
#endif
-
irq_enter();
+#ifdef CONFIG_LATENCY_TRACE
+ if (irq == trace_user_trigger_irq)
+ user_trace_start();
+#endif
+ trace_special(regs->eip, irq, 0);
#ifdef CONFIG_DEBUG_STACKOVERFLOW
/* Debugging check for stack overflow: is there less than 1KB free? */
{
@@ -69,7 +73,7 @@ fastcall unsigned int do_IRQ(struct pt_r
__asm__ __volatile__("andl %%esp,%0" :
"=r" (esp) : "0" (THREAD_SIZE - 1));
if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) {
- printk("do_IRQ: stack overflow: %ld\n",
+ printk("BUG: do_IRQ: stack overflow: %ld\n",
esp - sizeof(struct thread_info));
dump_stack();
}
@@ -173,7 +177,7 @@ asmlinkage void do_softirq(void)
if (in_interrupt())
return;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
if (local_softirq_pending()) {
curctx = current_thread_info();
@@ -194,7 +198,7 @@ asmlinkage void do_softirq(void)
);
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
EXPORT_SYMBOL(do_softirq);
@@ -224,8 +228,10 @@ int show_interrupts(struct seq_file *p,
}
if (i < NR_IRQS) {
- spin_lock_irqsave(&irq_desc[i].lock, flags);
- action = irq_desc[i].action;
+ irq_desc_t *desc = irq_desc + i;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ action = desc->action;
if (!action)
goto skip;
seq_printf(p, "%3d: ",i);
@@ -235,15 +241,27 @@ int show_interrupts(struct seq_file *p,
for_each_online_cpu(j)
seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
#endif
- seq_printf(p, " %14s", irq_desc[i].handler->typename);
+ seq_printf(p, " %-14s", desc->handler->typename);
+#define F(x,c) ((desc->status & x) ? c : '.')
+ seq_printf(p, " [%c%c%c%c%c%c%c%c%c/",
+ F(IRQ_INPROGRESS, 'I'),
+ F(IRQ_DISABLED, 'D'),
+ F(IRQ_PENDING, 'P'),
+ F(IRQ_REPLAY, 'R'),
+ F(IRQ_AUTODETECT, 'A'),
+ F(IRQ_WAITING, 'W'),
+ F(IRQ_LEVEL, 'L'),
+ F(IRQ_MASKED, 'M'),
+ F(IRQ_NODELAY, 'N'));
+#undef F
+ seq_printf(p, "%3d]", desc->irqs_unhandled);
seq_printf(p, " %s", action->name);
-
for (action=action->next; action; action = action->next)
seq_printf(p, ", %s", action->name);
seq_putc(p, '\n');
skip:
- spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+ spin_unlock_irqrestore(&desc->lock, flags);
} else if (i == NR_IRQS) {
seq_printf(p, "NMI: ");
for_each_online_cpu(j)
@@ -298,9 +316,9 @@ void fixup_irqs(cpumask_t map)
barrier();
#else
/* That doesn't seem sufficient. Give it 1ms. */
- local_irq_enable();
+ raw_local_irq_enable();
mdelay(1);
- local_irq_disable();
+ raw_local_irq_disable();
#endif
}
#endif
Index: linux.prev/arch/i386/kernel/mca.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/mca.c
+++ linux.prev/arch/i386/kernel/mca.c
@@ -472,3 +472,22 @@ void mca_handle_nmi(void)
mca_nmi_hook();
} /* mca_handle_nmi */
+
+void mca_timer_ack(void *priv)
+{
+ int irq;
+
+ if (MCA_bus) {
+ /* The PS/2 uses level-triggered interrupts. You can't
+ turn them off, nor would you want to (any attempt to
+ enable edge-triggered interrupts usually gets intercepted by a
+ special hardware circuit). Hence we have to acknowledge
+ the timer interrupt. Through some incredibly stupid
+ design idea, the reset for IRQ 0 is done by setting the
+ high bit of the PPI port B (0x61). Note that some PS/2s,
+ notably the 55SX, work fine if this is removed. */
+
+ irq = inb_p( 0x61 ); /* read the current state */
+ outb_p( irq|0x80, 0x61 ); /* reset the IRQ */
+ }
+}
Index: linux.prev/arch/i386/kernel/mcount-wrapper.S
===================================================================
--- /dev/null
+++ linux.prev/arch/i386/kernel/mcount-wrapper.S
@@ -0,0 +1,27 @@
+/*
+ * linux/arch/i386/mcount-wrapper.S
+ *
+ * Copyright (C) 2004 Ingo Molnar
+ */
+
+.globl mcount
+mcount:
+
+ cmpl $0, mcount_enabled
+ jz out
+
+ push %ebp
+ mov %esp, %ebp
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+
+ call __mcount
+
+ popl %edx
+ popl %ecx
+ popl %eax
+ popl %ebp
+out:
+ ret
+
Index: linux.prev/arch/i386/kernel/microcode.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/microcode.c
+++ linux.prev/arch/i386/kernel/microcode.c
@@ -109,7 +109,7 @@ MODULE_LICENSE("GPL");
#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
/* serialize access to the physical write to MSR 0x79 */
-static DEFINE_SPINLOCK(microcode_update_lock);
+static DEFINE_RAW_SPINLOCK(microcode_update_lock);
/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
static DECLARE_MUTEX(microcode_sem);
Index: linux.prev/arch/i386/kernel/nmi.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/nmi.c
+++ linux.prev/arch/i386/kernel/nmi.c
@@ -34,7 +34,7 @@
unsigned int nmi_watchdog = NMI_NONE;
extern int unknown_nmi_panic;
-static unsigned int nmi_hz = HZ;
+static unsigned int nmi_hz = 1000;
static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
static unsigned int nmi_p4_cccr_val;
extern void show_registers(struct pt_regs *regs);
@@ -108,7 +108,7 @@ int nmi_active;
static __init void nmi_cpu_busy(void *data)
{
volatile int *endflag = data;
- local_irq_enable();
+ raw_local_irq_enable();
/* Intentionally don't use cpu_relax here. This is
to make sure that the performance counter really ticks,
even if there is a simulator or similar that catches the
@@ -140,8 +140,8 @@ static int __init check_nmi_watchdog(voi
for (cpu = 0; cpu < NR_CPUS; cpu++)
prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
- local_irq_enable();
- mdelay((10*1000)/nmi_hz); // wait 10 ticks
+ raw_local_irq_enable();
+ mdelay((100*1000)/nmi_hz); // wait 100 ticks
for (cpu = 0; cpu < NR_CPUS; cpu++) {
#ifdef CONFIG_SMP
@@ -168,7 +168,7 @@ static int __init check_nmi_watchdog(voi
/* now that we know it works we can reduce NMI frequency to
something more reasonable; makes a difference in some configs */
if (nmi_watchdog == NMI_LOCAL_APIC)
- nmi_hz = 1;
+ nmi_hz = 10000;
kfree(prev_nmi_count);
return 0;
@@ -521,9 +521,34 @@ void touch_nmi_watchdog (void)
extern void die_nmi(struct pt_regs *, const char *msg);
-void nmi_watchdog_tick (struct pt_regs * regs)
+int nmi_show_regs[NR_CPUS];
+
+void nmi_show_all_regs(void)
{
+ int i;
+
+ if (nmi_watchdog == NMI_NONE)
+ return;
+ if (system_state != SYSTEM_RUNNING) {
+ printk("nmi_show_all_regs(): system state %d, not doing.\n",
+ system_state);
+ return;
+ }
+ printk("nmi_show_all_regs(): start on CPU#%d.\n",
+ raw_smp_processor_id());
+ dump_stack();
+
+ for_each_online_cpu(i)
+ nmi_show_regs[i] = 1;
+ for_each_online_cpu(i)
+ while (nmi_show_regs[i] == 1)
+ barrier();
+}
+
+static DEFINE_RAW_SPINLOCK(nmi_print_lock);
+void notrace nmi_watchdog_tick (struct pt_regs * regs)
+{
/*
* Since current_thread_info()-> is always on the stack, and we
* always switch the stack NMI-atomically, it's safe to use
@@ -531,7 +556,16 @@ void nmi_watchdog_tick (struct pt_regs *
*/
int sum, cpu = smp_processor_id();
- sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
+ sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_irqs(0);
+
+ profile_tick(CPU_PROFILING, regs);
+ if (nmi_show_regs[cpu]) {
+ nmi_show_regs[cpu] = 0;
+ spin_lock(&nmi_print_lock);
+ printk("NMI show regs on CPU#%d:\n", cpu);
+ show_regs(regs);
+ spin_unlock(&nmi_print_lock);
+ }
if (last_irq_sums[cpu] == sum) {
/*
@@ -539,12 +573,25 @@ void nmi_watchdog_tick (struct pt_regs *
* wait a few IRQs (5 seconds) before doing the oops ...
*/
alert_counter[cpu]++;
- if (alert_counter[cpu] == 5*nmi_hz)
- /*
- * die_nmi will return ONLY if NOTIFY_STOP happens..
- */
- die_nmi(regs, "NMI Watchdog detected LOCKUP");
+ if (alert_counter[cpu] && !(alert_counter[cpu] % (5*nmi_hz))) {
+ int i;
+
+ bust_spinlocks(1);
+ spin_lock(&nmi_print_lock);
+ printk("NMI watchdog detected lockup on CPU#%d (%d/%d)\n", cpu, alert_counter[cpu], 5*nmi_hz);
+ show_regs(regs);
+ spin_unlock(&nmi_print_lock);
+
+ for_each_online_cpu(i)
+ if (i != cpu)
+ nmi_show_regs[i] = 1;
+ for_each_online_cpu(i)
+ while (nmi_show_regs[i] == 1)
+ barrier();
+ die_nmi(regs, "NMI Watchdog detected LOCKUP");
+ }
+ } else {
last_irq_sums[cpu] = sum;
alert_counter[cpu] = 0;
}
Index: linux.prev/arch/i386/kernel/process.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/process.c
+++ linux.prev/arch/i386/kernel/process.c
@@ -39,6 +39,7 @@
#include
#include
#include
+#include
#include
#include
@@ -64,6 +65,12 @@ static int hlt_counter;
unsigned long boot_option_idle_override = 0;
EXPORT_SYMBOL(boot_option_idle_override);
+DEFINE_SPINLOCK(pm_idle_switch_lock);
+EXPORT_SYMBOL_GPL(pm_idle_switch_lock);
+
+int pm_idle_locked = 0;
+EXPORT_SYMBOL_GPL(pm_idle_locked);
+
/*
* Return saved PC of a blocked thread.
*/
@@ -99,21 +106,21 @@ EXPORT_SYMBOL(enable_hlt);
*/
void default_idle(void)
{
- local_irq_enable();
+ raw_local_irq_enable();
if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
clear_thread_flag(TIF_POLLING_NRFLAG);
smp_mb__after_clear_bit();
- while (!need_resched()) {
- local_irq_disable();
- if (!need_resched())
- safe_halt();
+ while (!need_resched() && !need_resched_delayed()) {
+ raw_local_irq_disable();
+ if (!need_resched() && !need_resched_delayed())
+ raw_safe_halt();
else
- local_irq_enable();
+ raw_local_irq_enable();
}
set_thread_flag(TIF_POLLING_NRFLAG);
} else {
- while (!need_resched())
+ while (!need_resched() && !need_resched_delayed())
cpu_relax();
}
}
@@ -126,16 +133,17 @@ EXPORT_SYMBOL(default_idle);
* to poll the ->work.need_resched flag instead of waiting for the
* cross-CPU IPI to arrive. Use this option with caution.
*/
-static void poll_idle (void)
+void poll_idle (void)
{
- local_irq_enable();
+ raw_local_irq_enable();
asm volatile(
"2:"
"testl %0, %1;"
"rep; nop;"
"je 2b;"
- : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags));
+ : : "i"(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED),
+ "m" (current_thread_info()->flags));
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -153,7 +161,7 @@ static inline void play_dead(void)
/*
* With physical CPU hotplug, we should halt the cpu
*/
- local_irq_disable();
+ raw_local_irq_disable();
while (1)
halt();
}
@@ -178,7 +186,9 @@ void cpu_idle(void)
/* endless idle loop with no priority at all */
while (1) {
- while (!need_resched()) {
+ BUG_ON(raw_irqs_disabled());
+
+ while (!need_resched() && !need_resched_delayed()) {
void (*idle)(void);
if (__get_cpu_var(cpu_idle_state))
@@ -196,9 +206,11 @@ void cpu_idle(void)
__get_cpu_var(irq_stat).idle_timestamp = jiffies;
idle();
}
- preempt_enable_no_resched();
- schedule();
+ raw_local_irq_disable();
+ __preempt_enable_no_resched();
+ __schedule();
preempt_disable();
+ raw_local_irq_enable();
}
}
@@ -239,12 +251,12 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
*/
static void mwait_idle(void)
{
- local_irq_enable();
+ raw_local_irq_enable();
- while (!need_resched()) {
+ while (!need_resched() && !need_resched_delayed()) {
__monitor((void *)¤t_thread_info()->flags, 0, 0);
smp_mb();
- if (need_resched())
+ if (need_resched() || need_resched_delayed())
break;
__mwait(0, 0);
}
@@ -372,11 +384,16 @@ void exit_thread(void)
/* The process may have allocated an io port bitmap... nuke it. */
if (unlikely(NULL != t->io_bitmap_ptr)) {
- int cpu = get_cpu();
- struct tss_struct *tss = &per_cpu(init_tss, cpu);
+ int cpu;
+ struct tss_struct *tss;
+ void *io_bitmap_ptr = t->io_bitmap_ptr;
- kfree(t->io_bitmap_ptr);
t->io_bitmap_ptr = NULL;
+ mb();
+ kfree(io_bitmap_ptr);
+
+ cpu = get_cpu();
+ tss = &per_cpu(init_tss, cpu);
/*
* Careful, clear this in the TSS too:
*/
Index: linux.prev/arch/i386/kernel/reboot.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/reboot.c
+++ linux.prev/arch/i386/kernel/reboot.c
@@ -202,7 +202,7 @@ void machine_real_restart(unsigned char
{
unsigned long flags;
- local_irq_disable();
+ raw_local_irq_disable();
/* Write zero to CMOS register number 0x0f, which the BIOS POST
routine will recognize as telling it to do a proper reboot. (Well
Index: linux.prev/arch/i386/kernel/semaphore.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/semaphore.c
+++ linux.prev/arch/i386/kernel/semaphore.c
@@ -13,6 +13,7 @@
* rw semaphores implemented November 1999 by Benjamin LaHaise
*/
#include
+#include
#include
/*
@@ -28,15 +29,15 @@
asm(
".section .sched.text\n"
".align 4\n"
-".globl __down_failed\n"
-"__down_failed:\n\t"
+".globl __compat_down_failed\n"
+"__compat_down_failed:\n\t"
#if defined(CONFIG_FRAME_POINTER)
"pushl %ebp\n\t"
"movl %esp,%ebp\n\t"
#endif
"pushl %edx\n\t"
"pushl %ecx\n\t"
- "call __down\n\t"
+ "call __compat_down\n\t"
"popl %ecx\n\t"
"popl %edx\n\t"
#if defined(CONFIG_FRAME_POINTER)
@@ -49,15 +50,15 @@ asm(
asm(
".section .sched.text\n"
".align 4\n"
-".globl __down_failed_interruptible\n"
-"__down_failed_interruptible:\n\t"
+".globl __compat_down_failed_interruptible\n"
+"__compat_down_failed_interruptible:\n\t"
#if defined(CONFIG_FRAME_POINTER)
"pushl %ebp\n\t"
"movl %esp,%ebp\n\t"
#endif
"pushl %edx\n\t"
"pushl %ecx\n\t"
- "call __down_interruptible\n\t"
+ "call __compat_down_interruptible\n\t"
"popl %ecx\n\t"
"popl %edx\n\t"
#if defined(CONFIG_FRAME_POINTER)
@@ -70,15 +71,15 @@ asm(
asm(
".section .sched.text\n"
".align 4\n"
-".globl __down_failed_trylock\n"
-"__down_failed_trylock:\n\t"
+".globl __compat_down_failed_trylock\n"
+"__compat_down_failed_trylock:\n\t"
#if defined(CONFIG_FRAME_POINTER)
"pushl %ebp\n\t"
"movl %esp,%ebp\n\t"
#endif
"pushl %edx\n\t"
"pushl %ecx\n\t"
- "call __down_trylock\n\t"
+ "call __compat_down_trylock\n\t"
"popl %ecx\n\t"
"popl %edx\n\t"
#if defined(CONFIG_FRAME_POINTER)
@@ -91,45 +92,13 @@ asm(
asm(
".section .sched.text\n"
".align 4\n"
-".globl __up_wakeup\n"
-"__up_wakeup:\n\t"
+".globl __compat_up_wakeup\n"
+"__compat_up_wakeup:\n\t"
"pushl %edx\n\t"
"pushl %ecx\n\t"
- "call __up\n\t"
+ "call __compat_up\n\t"
"popl %ecx\n\t"
"popl %edx\n\t"
"ret"
);
-/*
- * rw spinlock fallbacks
- */
-#if defined(CONFIG_SMP)
-asm(
-".section .sched.text\n"
-".align 4\n"
-".globl __write_lock_failed\n"
-"__write_lock_failed:\n\t"
- LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax)\n"
-"1: rep; nop\n\t"
- "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t"
- "jne 1b\n\t"
- LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t"
- "jnz __write_lock_failed\n\t"
- "ret"
-);
-
-asm(
-".section .sched.text\n"
-".align 4\n"
-".globl __read_lock_failed\n"
-"__read_lock_failed:\n\t"
- LOCK "incl (%eax)\n"
-"1: rep; nop\n\t"
- "cmpl $1,(%eax)\n\t"
- "js 1b\n\t"
- LOCK "decl (%eax)\n\t"
- "js __read_lock_failed\n\t"
- "ret"
-);
-#endif
Index: linux.prev/arch/i386/kernel/setup.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/setup.c
+++ linux.prev/arch/i386/kernel/setup.c
@@ -1620,6 +1620,7 @@ void __init setup_arch(char **cmdline_p)
conswitchp = &dummy_con;
#endif
#endif
+ tsc_init();
}
#include "setup_arch_post.h"
Index: linux.prev/arch/i386/kernel/signal.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/signal.c
+++ linux.prev/arch/i386/kernel/signal.c
@@ -604,6 +604,13 @@ int fastcall do_signal(struct pt_regs *r
int signr;
struct k_sigaction ka;
+#ifdef CONFIG_PREEMPT_RT
+ /*
+ * Fully-preemptible kernel does not need interrupts disabled:
+ */
+ raw_local_irq_enable();
+ preempt_check_resched();
+#endif
/*
* We want the common case to go fast, which
* is why we may in certain cases get here from
Index: linux.prev/arch/i386/kernel/smp.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/smp.c
+++ linux.prev/arch/i386/kernel/smp.c
@@ -163,7 +163,7 @@ void send_IPI_mask_bitmask(cpumask_t cpu
unsigned long cfg;
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
/*
* Wait for idle.
@@ -186,7 +186,7 @@ void send_IPI_mask_bitmask(cpumask_t cpu
*/
apic_write_around(APIC_ICR, cfg);
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
void send_IPI_mask_sequence(cpumask_t mask, int vector)
@@ -200,7 +200,7 @@ void send_IPI_mask_sequence(cpumask_t ma
* should be modified to do 1 message per cluster ID - mbligh
*/
- local_irq_save(flags);
+ raw_local_irq_save(flags);
for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) {
if (cpu_isset(query_cpu, mask)) {
@@ -227,7 +227,7 @@ void send_IPI_mask_sequence(cpumask_t ma
apic_write_around(APIC_ICR, cfg);
}
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
}
#include /* must come after the send_IPI functions above for inlining */
@@ -245,7 +245,7 @@ void send_IPI_mask_sequence(cpumask_t ma
static cpumask_t flush_cpumask;
static struct mm_struct * flush_mm;
static unsigned long flush_va;
-static DEFINE_SPINLOCK(tlbstate_lock);
+static DEFINE_RAW_SPINLOCK(tlbstate_lock);
#define FLUSH_ALL 0xffffffff
/*
@@ -390,7 +390,7 @@ static void flush_tlb_others(cpumask_t c
while (!cpus_empty(flush_cpumask))
/* nothing. lockup detection does not belong here */
- mb();
+ cpu_relax();
flush_mm = NULL;
flush_va = 0;
@@ -481,10 +481,20 @@ void smp_send_reschedule(int cpu)
}
/*
+ * this function sends a 'reschedule' IPI to all other CPUs.
+ * This is used when RT tasks are starving and other CPUs
+ * might be able to run them:
+ */
+void smp_send_reschedule_allbutself(void)
+{
+ send_IPI_allbutself(RESCHEDULE_VECTOR);
+}
+
+/*
* Structure and data for smp_call_function(). This is designed to minimise
* static memory requirements. It also looks cleaner.
*/
-static DEFINE_SPINLOCK(call_lock);
+static DEFINE_RAW_SPINLOCK(call_lock);
struct call_data_struct {
void (*func) (void *info);
@@ -538,7 +548,7 @@ int smp_call_function (void (*func) (voi
}
/* Can deadlock when called with interrupts disabled */
- WARN_ON(irqs_disabled());
+ WARN_ON(raw_irqs_disabled());
data.func = func;
data.info = info;
@@ -572,7 +582,7 @@ static void stop_this_cpu (void * dummy)
* Remove this CPU:
*/
cpu_clear(smp_processor_id(), cpu_online_map);
- local_irq_disable();
+ raw_local_irq_disable();
disable_local_APIC();
if (cpu_data[smp_processor_id()].hlt_works_ok)
for(;;) halt();
@@ -587,19 +597,20 @@ void smp_send_stop(void)
{
smp_call_function(stop_this_cpu, NULL, 1, 0);
- local_irq_disable();
+ raw_local_irq_disable();
disable_local_APIC();
- local_irq_enable();
+ raw_local_irq_enable();
}
/*
- * Reschedule call back. Nothing to do,
- * all the work is done automatically when
- * we return from the interrupt.
+ * Reschedule call back. Trigger a reschedule pass so that
+ * RT-overload balancing can pass tasks around.
*/
-fastcall void smp_reschedule_interrupt(struct pt_regs *regs)
+fastcall notrace void smp_reschedule_interrupt(struct pt_regs *regs)
{
+ trace_special(regs->eip, 0, 0);
ack_APIC_irq();
+ set_tsk_need_resched(current);
}
fastcall void smp_call_function_interrupt(struct pt_regs *regs)
Index: linux.prev/arch/i386/kernel/smpboot.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/smpboot.c
+++ linux.prev/arch/i386/kernel/smpboot.c
@@ -212,142 +212,299 @@ valid_k7:
;
}
-/*
- * TSC synchronization.
- *
- * We first check whether all CPUs have their TSC's synchronized,
- * then we print a warning if not, and always resync.
- */
+static atomic_t tsc_start_flag, tsc_check_start, tsc_check_stop;
-static atomic_t tsc_start_flag = ATOMIC_INIT(0);
-static atomic_t tsc_count_start = ATOMIC_INIT(0);
-static atomic_t tsc_count_stop = ATOMIC_INIT(0);
-static unsigned long long tsc_values[NR_CPUS];
-
-#define NR_LOOPS 5
-
-static void __init synchronize_tsc_bp (void)
+static int __init check_tsc_warp(void)
{
- int i;
- unsigned long long t0;
- unsigned long long sum, avg;
- long long delta;
- unsigned int one_usec;
- int buggy = 0;
-
- printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
-
- /* convert from kcyc/sec to cyc/usec */
- one_usec = cpu_khz / 1000;
+ static DEFINE_RAW_SPINLOCK(warp_lock);
+ static long long prev;
+ static unsigned int error;
- atomic_set(&tsc_start_flag, 1);
- wmb();
+ int cpus = num_booting_cpus(), nr = 0;
+ long long start, now, end, delta;
+ atomic_inc(&tsc_check_start);
+ while (atomic_read(&tsc_check_start) != cpus)
+ cpu_relax();
/*
- * We loop a few times to get a primed instruction cache,
- * then the last pass is more or less synchronized and
- * the BP and APs set their cycle counters to zero all at
- * once. This reduces the chance of having random offsets
- * between the processors, and guarantees that the maximum
- * delay between the cycle counters is never bigger than
- * the latency of information-passing (cachelines) between
- * two CPUs.
+ * Run the check for 500 msecs:
*/
- for (i = 0; i < NR_LOOPS; i++) {
- /*
- * all APs synchronize but they loop on '== num_cpus'
- */
- while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
- mb();
- atomic_set(&tsc_count_stop, 0);
- wmb();
- /*
- * this lets the APs save their current TSC:
- */
- atomic_inc(&tsc_count_start);
+ rdtscll(start);
+ end = start + cpu_khz*500;
- rdtscll(tsc_values[smp_processor_id()]);
+ for (;;) {
/*
- * We clear the TSC in the last loop:
+ * Check for the TSC going backwards (between CPUs):
*/
- if (i == NR_LOOPS-1)
- write_tsc(0, 0);
+ spin_lock(&warp_lock);
+ rdtscll(now);
+ delta = now - prev;
+ prev = now;
+ spin_unlock(&warp_lock);
+ if (unlikely(delta < 0))
+ error = 1;
+ if (now > end)
+ break;
/*
- * Wait for all APs to leave the synchronization point:
+ * Take it easy every couple of iterations,
+ * to not starve other CPUs:
*/
- while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
- mb();
- atomic_set(&tsc_count_start, 0);
- wmb();
- atomic_inc(&tsc_count_stop);
+ nr++;
+ if (!(nr % 31))
+ cpu_relax();
}
- sum = 0;
- for (i = 0; i < NR_CPUS; i++) {
- if (cpu_isset(i, cpu_callout_map)) {
- t0 = tsc_values[i];
- sum += t0;
- }
- }
- avg = sum;
- do_div(avg, num_booting_cpus());
+ atomic_inc(&tsc_check_stop);
+ while (atomic_read(&tsc_check_stop) != cpus)
+ cpu_relax();
- sum = 0;
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_isset(i, cpu_callout_map))
- continue;
- delta = tsc_values[i] - avg;
- if (delta < 0)
- delta = -delta;
- /*
- * We report bigger than 2 microseconds clock differences.
- */
- if (delta > 2*one_usec) {
- long realdelta;
- if (!buggy) {
- buggy = 1;
- printk("\n");
- }
- realdelta = delta;
- do_div(realdelta, one_usec);
- if (tsc_values[i] < avg)
- realdelta = -realdelta;
+ return error;
+}
- printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
- }
+/*
+ * TSC synchronization based on ia64 itc synchronization code. Synchronize
+ * pairs of processors rahter than tring to synchronize all of the processors
+ * with a single event. When several processors are all waiting for an
+ * event they don't all see it at the same time. The write will cause
+ * an invalidate on each processors cache and then they all scramble to
+ * re-read that cache line.
+ *
+ * Writing the TSC resets the upper 32-bits, so we need to be careful
+ * that all of the cpus can be synchronized before we overflow the
+ * 32-bit count.
+ */
- sum += delta;
+#define MASTER 0
+#define SLAVE (SMP_CACHE_BYTES/sizeof(long))
+
+#define NUM_ROUNDS 64 /* magic value */
+#define NUM_ITERS 5 /* likewise */
+
+static volatile unsigned long go[2*SLAVE] __cacheline_aligned;
+static volatile int current_slave = -1;
+static volatile int tsc_sync_complete = 0;
+static volatile int tsc_adj_latency = 0;
+static unsigned int max_rt = 0;
+static unsigned int max_delta = 0;
+
+#define DEBUG_TSC_SYNC 0
+#if DEBUG_TSC_SYNC
+struct tsc_sync_debug {
+ long rt; /* roundtrip time */
+ long master; /* master's timestamp */
+ long diff; /* difference between midpoint and master's timestamp */
+ long lat; /* estimate of tsc adjustment latency */
+} tsc_sync_debug[NUM_ROUNDS*NR_CPUS];
+#endif
+
+void
+sync_master(void)
+{
+ unsigned long n, tsc, last_go_master;
+
+ last_go_master = 0;
+ while (1) {
+ while ((n = go[MASTER]) == last_go_master)
+ rep_nop();
+ if (n == ~0)
+ break;
+ rdtscl(tsc);
+ if (unlikely(!tsc))
+ tsc = 1;
+ go[SLAVE] = tsc;
+ last_go_master = n;
}
- if (!buggy)
- printk("passed.\n");
}
-static void __init synchronize_tsc_ap (void)
+/*
+ * Return the number of cycles by which our TSC differs from the TSC on
+ * the master (time-keeper) CPU. A positive number indicates our TSC is
+ * ahead of the master, negative that it is behind.
+ */
+static inline long
+get_delta (long *rt, long *master)
{
- int i;
+ unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
+ unsigned long tcenter, t0, t1, tm, last_go_slave;
+ long i;
+
+ last_go_slave = go[SLAVE];
+ for (i = 0; i < NUM_ITERS; ++i) {
+ rdtscl(t0);
+ go[MASTER] = i+1;
+ while ((tm = go[SLAVE]) == last_go_slave)
+ rep_nop();
+ rdtscl(t1);
+
+ if (t1 - t0 < best_t1 - best_t0)
+ best_t0 = t0, best_t1 = t1, best_tm = tm;
+ last_go_slave = tm;
+ }
+
+ *rt = best_t1 - best_t0;
+ *master = best_tm - best_t0;
+
+ /* average best_t0 and best_t1 without overflow: */
+ tcenter = (best_t0/2 + best_t1/2);
+ if (best_t0 % 2 + best_t1 % 2 == 2)
+ ++tcenter;
+ return tcenter - best_tm;
+}
+
+/*
+ * Synchronize TSC of the current (slave) CPU with the TSC of the MASTER CPU
+ * (normally the time-keeper CPU). We use a closed loop to eliminate the
+ * possibility of unaccounted-for errors (such as getting a machine check in
+ * the middle of a calibration step). The basic idea is for the slave to ask
+ * the master what TSC value it has and to read its own TSC before and after
+ * the master responds. Each iteration gives us three
+ * timestamps:
+ *
+ * slave master
+ *
+ * t0 ---\
+ * ---\
+ * --->
+ * tm
+ * /---
+ * /---
+ * t1 <---
+ *
+ *
+ * The goal is to adjust the slave's TSC such that tm falls exactly half-way
+ * between t0 and t1. If we achieve this, the clocks are synchronized provided
+ * the interconnect between the slave and the master is symmetric. Even if the
+ * interconnect were asymmetric, we would still know that the synchronization
+ * error is smaller than the roundtrip latency (t0 - t1).
+ *
+ * When the interconnect is quiet and symmetric, this lets us synchronize the
+ * TSC to within one or two cycles. However, we can only *guarantee* that the
+ * synchronization is accurate to within a round-trip time, which is typically
+ * in the range of several hundred cycles (e.g., ~500 cycles). In practice,
+ * this means that the TSC's are usually almost perfectly synchronized, but we
+ * shouldn't assume that the accuracy is much better than half a micro second
+ * or so.
+ */
+
+static void __init
+synchronize_tsc_ap (void)
+{
+ long i, delta, adj, adjust_latency, n_rounds;
+ unsigned long rt, master_time_stamp, tsc;
+#if DEBUG_TSC_SYNC
+ struct tsc_sync_debug *t =
+ &tsc_sync_debug[smp_processor_id() * NUM_ROUNDS];
+#endif
+
+ while (!atomic_read(&tsc_start_flag))
+ mb();
+
+ if (!check_tsc_warp())
+ return;
/*
- * Not every cpu is online at the time
- * this gets called, so we first wait for the BP to
- * finish SMP initialization:
+ * Wait for our turn to synchronize with the boot processor.
*/
- while (!atomic_read(&tsc_start_flag)) mb();
+ while (current_slave != smp_processor_id())
+ rep_nop();
+ adjust_latency = tsc_adj_latency;
+
+ go[SLAVE] = 0;
+ go[MASTER] = 0;
+ write_tsc(0,0);
+ for (i = 0; i < NUM_ROUNDS; ++i) {
+ delta = get_delta(&rt, &master_time_stamp);
+ if (delta == 0)
+ break;
+
+ if (i > 0)
+ adjust_latency += -delta;
+ adj = -delta + adjust_latency/8;
+ rdtscl(tsc);
+ write_tsc(tsc + adj, 0);
+#if DEBUG_TSC_SYNC
+ t[i].rt = rt;
+ t[i].master = master_time_stamp;
+ t[i].diff = delta;
+ t[i].lat = adjust_latency/8;
+#endif
+ }
+ n_rounds = i;
+ go[MASTER] = ~0;
+
+#if (DEBUG_TSC_SYNC == 2)
+ for (i = 0; i < n_rounds; ++i)
+ printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
+ t[i].rt, t[i].master, t[i].diff, t[i].lat);
+
+ printk("CPU %d: synchronized TSC (last diff %ld cycles, maxerr %lu cycles)\n",
+ smp_processor_id(), delta, rt);
+
+ printk("It took %ld rounds\n", n_rounds);
+#endif
+ if (rt > max_rt)
+ max_rt = rt;
+ if (delta < 0)
+ delta = -delta;
+ if (delta > max_delta)
+ max_delta = delta;
+ tsc_adj_latency = adjust_latency;
+ current_slave = -1;
+ while (!tsc_sync_complete)
+ rep_nop();
+}
+
+/*
+ * The boot processor set its own TSC to zero and then gives each
+ * slave processor the chance to synchronize itself.
+ */
- for (i = 0; i < NR_LOOPS; i++) {
- atomic_inc(&tsc_count_start);
- while (atomic_read(&tsc_count_start) != num_booting_cpus())
- mb();
+static void __init synchronize_tsc_bp (void)
+{
+ unsigned int tsc_low, tsc_high, error;
+ int cpu;
+
+ atomic_set(&tsc_start_flag, 1);
- rdtscll(tsc_values[smp_processor_id()]);
- if (i == NR_LOOPS-1)
- write_tsc(0, 0);
+ printk(KERN_INFO "checking TSC synchronization across %u CPUs: ",
+ num_booting_cpus());
- atomic_inc(&tsc_count_stop);
- while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
+ if (!check_tsc_warp()) {
+ printk("passed.\n");
+ return;
+ }
+ printk("failed.\n");
+
+ printk(KERN_INFO "starting TSC synchronization\n");
+ write_tsc(0, 0);
+
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ if (!cpu_isset(cpu, cpu_callout_map))
+ continue;
+ if (cpu == smp_processor_id())
+ continue;
+ go[MASTER] = 0;
+ current_slave = cpu;
+ sync_master();
+ while (current_slave != -1)
+ rep_nop();
+ }
+ rdtsc(tsc_low, tsc_high);
+ if (tsc_high)
+ printk("TSC overflowed during synchronization\n");
+ else
+ printk("TSC synchronization complete max_delta=%d cycles\n",
+ max_delta);
+ if (max_rt < 4293) {
+ error = (max_rt * 1000000)/cpu_khz;
+ printk("TSC sync round-trip time %d.%03d microseconds\n",
+ error/1000, error%1000);
+ } else {
+ printk("TSC sync round-trip time %d cycles\n", max_rt);
}
+ tsc_sync_complete = 1;
}
-#undef NR_LOOPS
extern void calibrate_delay(void);
@@ -547,7 +704,7 @@ static void __devinit start_secondary(vo
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
/* We can take interrupts now: we're officially "up". */
- local_irq_enable();
+ raw_local_irq_enable();
wmb();
cpu_idle();
@@ -1340,9 +1497,9 @@ int __cpu_disable(void)
clear_local_APIC();
/* Allow any queued timer interrupts to get serviced */
- local_irq_enable();
+ raw_local_irq_enable();
mdelay(1);
- local_irq_disable();
+ raw_local_irq_disable();
remove_siblinginfo(cpu);
@@ -1386,11 +1543,11 @@ int __devinit __cpu_up(unsigned int cpu)
/* In case one didn't come up */
if (!cpu_isset(cpu, cpu_callin_map)) {
printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
- local_irq_enable();
+ raw_local_irq_enable();
return -EIO;
}
- local_irq_enable();
+ raw_local_irq_enable();
per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
/* Unleash the CPU! */
cpu_set(cpu, smp_commenced_mask);
Index: linux.prev/arch/i386/kernel/switch2poll.c
===================================================================
--- /dev/null
+++ linux.prev/arch/i386/kernel/switch2poll.c
@@ -0,0 +1,5 @@
+/*
+ * Same type of hack used for early_printk. This keeps the code
+ * in one place.
+ */
+#include "../../x86_64/kernel/switch2poll.c"
Index: linux.prev/arch/i386/kernel/time.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/time.c
+++ linux.prev/arch/i386/kernel/time.c
@@ -46,6 +46,7 @@
#include
#include
#include
+#include
#include
#include
@@ -56,6 +57,7 @@
#include
#include
#include
+#include
#include "mach_time.h"
@@ -79,16 +81,9 @@ EXPORT_SYMBOL(cpu_khz);
extern unsigned long wall_jiffies;
-DEFINE_SPINLOCK(rtc_lock);
+DEFINE_RAW_SPINLOCK(rtc_lock);
EXPORT_SYMBOL(rtc_lock);
-#include
-
-DEFINE_SPINLOCK(i8253_lock);
-EXPORT_SYMBOL(i8253_lock);
-
-struct timer_opts *cur_timer __read_mostly = &timer_none;
-
/*
* This is a special lock that is owned by the CPU and holds the index
* register we are working with. It is required for NMI access to the
@@ -118,118 +113,25 @@ void rtc_cmos_write(unsigned char val, u
}
EXPORT_SYMBOL(rtc_cmos_write);
-/*
- * This version of gettimeofday has microsecond resolution
- * and better than microsecond precision on fast x86 machines with TSC.
- */
-void do_gettimeofday(struct timeval *tv)
-{
- unsigned long seq;
- unsigned long usec, sec;
- unsigned long max_ntp_tick;
-
- do {
- unsigned long lost;
-
- seq = read_seqbegin(&xtime_lock);
-
- usec = cur_timer->get_offset();
- lost = jiffies - wall_jiffies;
-
- /*
- * If time_adjust is negative then NTP is slowing the clock
- * so make sure not to go into next possible interval.
- * Better to lose some accuracy than have time go backwards..
- */
- if (unlikely(time_adjust < 0)) {
- max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj;
- usec = min(usec, max_ntp_tick);
-
- if (lost)
- usec += lost * max_ntp_tick;
- }
- else if (unlikely(lost))
- usec += lost * (USEC_PER_SEC / HZ);
-
- sec = xtime.tv_sec;
- usec += (xtime.tv_nsec / 1000);
- } while (read_seqretry(&xtime_lock, seq));
-
- while (usec >= 1000000) {
- usec -= 1000000;
- sec++;
- }
-
- tv->tv_sec = sec;
- tv->tv_usec = usec;
-}
-
-EXPORT_SYMBOL(do_gettimeofday);
-
-int do_settimeofday(struct timespec *tv)
-{
- time_t wtm_sec, sec = tv->tv_sec;
- long wtm_nsec, nsec = tv->tv_nsec;
-
- if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
- return -EINVAL;
-
- write_seqlock_irq(&xtime_lock);
- /*
- * This is revolting. We need to set "xtime" correctly. However, the
- * value in this location is the value at the most recent update of
- * wall time. Discover what correction gettimeofday() would have
- * made, and then undo it!
- */
- nsec -= cur_timer->get_offset() * NSEC_PER_USEC;
- nsec -= (jiffies - wall_jiffies) * TICK_NSEC;
-
- wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
- wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
-
- set_normalized_timespec(&xtime, sec, nsec);
- set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
-
- ntp_clear();
- write_sequnlock_irq(&xtime_lock);
- clock_was_set();
- return 0;
-}
-
-EXPORT_SYMBOL(do_settimeofday);
-
static int set_rtc_mmss(unsigned long nowtime)
{
int retval;
-
- WARN_ON(irqs_disabled());
+ unsigned long flags;
/* gets recalled with irq locally disabled */
- spin_lock_irq(&rtc_lock);
+ /* XXX - does irqsave resolve this? -johnstul */
+ spin_lock_irqsave(&rtc_lock, flags);
if (efi_enabled)
retval = efi_set_rtc_mmss(nowtime);
else
retval = mach_set_rtc_mmss(nowtime);
- spin_unlock_irq(&rtc_lock);
+ spin_unlock_irqrestore(&rtc_lock, flags);
return retval;
}
-
-int timer_ack;
-
-/* monotonic_clock(): returns # of nanoseconds passed since time_init()
- * Note: This function is required to return accurate
- * time even in the absence of multiple timer ticks.
- */
-unsigned long long monotonic_clock(void)
-{
- return cur_timer->monotonic_clock();
-}
-EXPORT_SYMBOL(monotonic_clock);
-
#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
-unsigned long profile_pc(struct pt_regs *regs)
+unsigned long notrace profile_pc(struct pt_regs *regs)
{
unsigned long pc = instruction_pointer(regs);
@@ -241,70 +143,6 @@ unsigned long profile_pc(struct pt_regs
EXPORT_SYMBOL(profile_pc);
#endif
-/*
- * timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
- */
-static inline void do_timer_interrupt(int irq, struct pt_regs *regs)
-{
-#ifdef CONFIG_X86_IO_APIC
- if (timer_ack) {
- /*
- * Subtle, when I/O APICs are used we have to ack timer IRQ
- * manually to reset the IRR bit for do_slow_gettimeoffset().
- * This will also deassert NMI lines for the watchdog if run
- * on an 82489DX-based system.
- */
- spin_lock(&i8259A_lock);
- outb(0x0c, PIC_MASTER_OCW3);
- /* Ack the IRQ; AEOI will end it automatically. */
- inb(PIC_MASTER_POLL);
- spin_unlock(&i8259A_lock);
- }
-#endif
-
- do_timer_interrupt_hook(regs);
-
-
- if (MCA_bus) {
- /* The PS/2 uses level-triggered interrupts. You can't
- turn them off, nor would you want to (any attempt to
- enable edge-triggered interrupts usually gets intercepted by a
- special hardware circuit). Hence we have to acknowledge
- the timer interrupt. Through some incredibly stupid
- design idea, the reset for IRQ 0 is done by setting the
- high bit of the PPI port B (0x61). Note that some PS/2s,
- notably the 55SX, work fine if this is removed. */
-
- irq = inb_p( 0x61 ); /* read the current state */
- outb_p( irq|0x80, 0x61 ); /* reset the IRQ */
- }
-}
-
-/*
- * This is the same as the above, except we _also_ save the current
- * Time Stamp Counter value at the time of the timer interrupt, so that
- * we later on can estimate the time of day more exactly.
- */
-irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
-{
- /*
- * Here we are in the timer irq handler. We just have irqs locally
- * disabled but we don't know if the timer_bh is running on the other
- * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
- * the irq version of write_lock because as just said we have irq
- * locally disabled. -arca
- */
- write_seqlock(&xtime_lock);
-
- cur_timer->mark_offset();
-
- do_timer_interrupt(irq, regs);
-
- write_sequnlock(&xtime_lock);
- return IRQ_HANDLED;
-}
-
/* not static: needed by APM */
unsigned long get_cmos_time(void)
{
@@ -323,139 +161,42 @@ unsigned long get_cmos_time(void)
}
EXPORT_SYMBOL(get_cmos_time);
-static void sync_cmos_clock(unsigned long dummy);
-
-static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
-
-static void sync_cmos_clock(unsigned long dummy)
+/* arch specific timeofday hooks */
+nsec_t read_persistent_clock(void)
{
- struct timeval now, next;
- int fail = 1;
+ return (nsec_t)get_cmos_time() * NSEC_PER_SEC;
+}
+void sync_persistent_clock(struct timespec ts)
+{
+ static unsigned long last_rtc_update;
/*
* If we have an externally synchronized Linux clock, then update
* CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
* called as close as possible to 500 ms before the new second starts.
- * This code is run on a timer. If the clock is set, that timer
- * may not expire at the correct time. Thus, we adjust...
*/
- if (!ntp_synced())
- /*
- * Not synced, exit, do not restart a timer (if one is
- * running, let it run out).
- */
+ if (ts.tv_sec <= last_rtc_update + 660)
return;
- do_gettimeofday(&now);
- if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
- now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
- fail = set_rtc_mmss(now.tv_sec);
-
- next.tv_usec = USEC_AFTER - now.tv_usec;
- if (next.tv_usec <= 0)
- next.tv_usec += USEC_PER_SEC;
-
- if (!fail)
- next.tv_sec = 659;
- else
- next.tv_sec = 0;
-
- if (next.tv_usec >= USEC_PER_SEC) {
- next.tv_sec++;
- next.tv_usec -= USEC_PER_SEC;
+ if((ts.tv_nsec / 1000) >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
+ (ts.tv_nsec / 1000) <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2) {
+ /* horrible...FIXME */
+ if (set_rtc_mmss(ts.tv_sec) == 0)
+ last_rtc_update = ts.tv_sec;
+ else
+ last_rtc_update = ts.tv_sec - 600; /* do it again in 60 s */
}
- mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
-}
-
-void notify_arch_cmos_timer(void)
-{
- mod_timer(&sync_cmos_timer, jiffies + 1);
-}
-
-static long clock_cmos_diff, sleep_start;
-
-static struct timer_opts *last_timer;
-static int timer_suspend(struct sys_device *dev, pm_message_t state)
-{
- /*
- * Estimate time zone so that set_time can update the clock
- */
- clock_cmos_diff = -get_cmos_time();
- clock_cmos_diff += get_seconds();
- sleep_start = get_cmos_time();
- last_timer = cur_timer;
- cur_timer = &timer_none;
- if (last_timer->suspend)
- last_timer->suspend(state);
- return 0;
-}
-
-static int timer_resume(struct sys_device *dev)
-{
- unsigned long flags;
- unsigned long sec;
- unsigned long sleep_length;
-
-#ifdef CONFIG_HPET_TIMER
- if (is_hpet_enabled())
- hpet_reenable();
-#endif
- setup_pit_timer();
- sec = get_cmos_time() + clock_cmos_diff;
- sleep_length = (get_cmos_time() - sleep_start) * HZ;
- write_seqlock_irqsave(&xtime_lock, flags);
- xtime.tv_sec = sec;
- xtime.tv_nsec = 0;
- write_sequnlock_irqrestore(&xtime_lock, flags);
- jiffies += sleep_length;
- wall_jiffies += sleep_length;
- if (last_timer->resume)
- last_timer->resume();
- cur_timer = last_timer;
- last_timer = NULL;
- touch_softlockup_watchdog();
- return 0;
}
-static struct sysdev_class timer_sysclass = {
- .resume = timer_resume,
- .suspend = timer_suspend,
- set_kset_name("timer"),
-};
-
-
-/* XXX this driverfs stuff should probably go elsewhere later -john */
-static struct sys_device device_timer = {
- .id = 0,
- .cls = &timer_sysclass,
-};
-
-static int time_init_device(void)
-{
- int error = sysdev_class_register(&timer_sysclass);
- if (!error)
- error = sysdev_register(&device_timer);
- return error;
-}
-
-device_initcall(time_init_device);
-
#ifdef CONFIG_HPET_TIMER
extern void (*late_time_init)(void);
/* Duplicate of time_init() below, with hpet_enable part added */
static void __init hpet_time_init(void)
{
- xtime.tv_sec = get_cmos_time();
- xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
- set_normalized_timespec(&wall_to_monotonic,
- -xtime.tv_sec, -xtime.tv_nsec);
-
if ((hpet_enable() >= 0) && hpet_use_timer) {
printk("Using HPET for base-timer\n");
}
- cur_timer = select_timer();
- printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
time_init_hook();
}
@@ -463,6 +204,9 @@ static void __init hpet_time_init(void)
void __init time_init(void)
{
+ /* Set the clock to HZ Hz: */
+ setup_pit_timer();
+
#ifdef CONFIG_HPET_TIMER
if (is_hpet_capable()) {
/*
@@ -473,13 +217,5 @@ void __init time_init(void)
return;
}
#endif
- xtime.tv_sec = get_cmos_time();
- xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
- set_normalized_timespec(&wall_to_monotonic,
- -xtime.tv_sec, -xtime.tv_nsec);
-
- cur_timer = select_timer();
- printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
-
time_init_hook();
}
Index: linux.prev/arch/i386/kernel/time_hpet.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/time_hpet.c
+++ linux.prev/arch/i386/kernel/time_hpet.c
@@ -259,8 +259,6 @@ __setup("hpet=", hpet_setup);
#include
#include
-extern irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs);
-
#define DEFAULT_RTC_INT_FREQ 64
#define RTC_NUM_INTS 1
@@ -303,12 +301,12 @@ int hpet_rtc_timer_init(void)
else
hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
cnt = hpet_readl(HPET_COUNTER);
cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq);
hpet_writel(cnt, HPET_T1_CMP);
hpet_t1_cmp = cnt;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
cfg = hpet_readl(HPET_T1_CFG);
cfg &= ~HPET_TN_PERIODIC;
Index: linux.prev/arch/i386/kernel/timers/Makefile
===================================================================
--- linux.prev.orig/arch/i386/kernel/timers/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-#
-# Makefile for x86 timers
-#
-
-obj-y := timer.o timer_none.o timer_tsc.o timer_pit.o common.o
-
-obj-$(CONFIG_X86_CYCLONE_TIMER) += timer_cyclone.o
-obj-$(CONFIG_HPET_TIMER) += timer_hpet.o
-obj-$(CONFIG_X86_PM_TIMER) += timer_pm.o
Index: linux.prev/arch/i386/kernel/timers/common.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/timers/common.c
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Common functions used across the timers go here
- */
-
-#include
-#include
-#include
-#include
-#include
-
-#include
-#include
-#include
-
-#include "mach_timer.h"
-
-/* ------ Calibrate the TSC -------
- * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset().
- * Too much 64-bit arithmetic here to do this cleanly in C, and for
- * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2)
- * output busy loop as low as possible. We avoid reading the CTC registers
- * directly because of the awkward 8-bit access mechanism of the 82C54
- * device.
- */
-
-#define CALIBRATE_TIME (5 * 1000020/HZ)
-
-unsigned long calibrate_tsc(void)
-{
- mach_prepare_counter();
-
- {
- unsigned long startlow, starthigh;
- unsigned long endlow, endhigh;
- unsigned long count;
-
- rdtsc(startlow,starthigh);
- mach_countup(&count);
- rdtsc(endlow,endhigh);
-
-
- /* Error: ECTCNEVERSET */
- if (count <= 1)
- goto bad_ctc;
-
- /* 64-bit subtract - gcc just messes up with long longs */
- __asm__("subl %2,%0\n\t"
- "sbbl %3,%1"
- :"=a" (endlow), "=d" (endhigh)
- :"g" (startlow), "g" (starthigh),
- "0" (endlow), "1" (endhigh));
-
- /* Error: ECPUTOOFAST */
- if (endhigh)
- goto bad_ctc;
-
- /* Error: ECPUTOOSLOW */
- if (endlow <= CALIBRATE_TIME)
- goto bad_ctc;
-
- __asm__("divl %2"
- :"=a" (endlow), "=d" (endhigh)
- :"r" (endlow), "0" (0), "1" (CALIBRATE_TIME));
-
- return endlow;
- }
-
- /*
- * The CTC wasn't reliable: we got a hit on the very first read,
- * or the CPU was so fast/slow that the quotient wouldn't fit in
- * 32 bits..
- */
-bad_ctc:
- return 0;
-}
-
-#ifdef CONFIG_HPET_TIMER
-/* ------ Calibrate the TSC using HPET -------
- * Return 2^32 * (1 / (TSC clocks per usec)) for getting the CPU freq.
- * Second output is parameter 1 (when non NULL)
- * Set 2^32 * (1 / (tsc per HPET clk)) for delay_hpet().
- * calibrate_tsc() calibrates the processor TSC by comparing
- * it to the HPET timer of known frequency.
- * Too much 64-bit arithmetic here to do this cleanly in C
- */
-#define CALIBRATE_CNT_HPET (5 * hpet_tick)
-#define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC)
-
-unsigned long __devinit calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr)
-{
- unsigned long tsc_startlow, tsc_starthigh;
- unsigned long tsc_endlow, tsc_endhigh;
- unsigned long hpet_start, hpet_end;
- unsigned long result, remain;
-
- hpet_start = hpet_readl(HPET_COUNTER);
- rdtsc(tsc_startlow, tsc_starthigh);
- do {
- hpet_end = hpet_readl(HPET_COUNTER);
- } while ((hpet_end - hpet_start) < CALIBRATE_CNT_HPET);
- rdtsc(tsc_endlow, tsc_endhigh);
-
- /* 64-bit subtract - gcc just messes up with long longs */
- __asm__("subl %2,%0\n\t"
- "sbbl %3,%1"
- :"=a" (tsc_endlow), "=d" (tsc_endhigh)
- :"g" (tsc_startlow), "g" (tsc_starthigh),
- "0" (tsc_endlow), "1" (tsc_endhigh));
-
- /* Error: ECPUTOOFAST */
- if (tsc_endhigh)
- goto bad_calibration;
-
- /* Error: ECPUTOOSLOW */
- if (tsc_endlow <= CALIBRATE_TIME_HPET)
- goto bad_calibration;
-
- ASM_DIV64_REG(result, remain, tsc_endlow, 0, CALIBRATE_TIME_HPET);
- if (remain > (tsc_endlow >> 1))
- result++; /* rounding the result */
-
- if (tsc_hpet_quotient_ptr) {
- unsigned long tsc_hpet_quotient;
-
- ASM_DIV64_REG(tsc_hpet_quotient, remain, tsc_endlow, 0,
- CALIBRATE_CNT_HPET);
- if (remain > (tsc_endlow >> 1))
- tsc_hpet_quotient++; /* rounding the result */
- *tsc_hpet_quotient_ptr = tsc_hpet_quotient;
- }
-
- return result;
-bad_calibration:
- /*
- * the CPU was so fast/slow that the quotient wouldn't fit in
- * 32 bits..
- */
- return 0;
-}
-#endif
-
-
-unsigned long read_timer_tsc(void)
-{
- unsigned long retval;
- rdtscl(retval);
- return retval;
-}
-
-
-/* calculate cpu_khz */
-void init_cpu_khz(void)
-{
- if (cpu_has_tsc) {
- unsigned long tsc_quotient = calibrate_tsc();
- if (tsc_quotient) {
- /* report CPU clock rate in Hz.
- * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
- * clock/second. Our precision is about 100 ppm.
- */
- { unsigned long eax=0, edx=1000;
- __asm__("divl %2"
- :"=a" (cpu_khz), "=d" (edx)
- :"r" (tsc_quotient),
- "0" (eax), "1" (edx));
- printk("Detected %u.%03u MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
- }
- }
- }
-}
-
Index: linux.prev/arch/i386/kernel/timers/timer.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/timers/timer.c
+++ /dev/null
@@ -1,75 +0,0 @@
-#include
-#include
-#include
-#include
-
-#ifdef CONFIG_HPET_TIMER
-/*
- * HPET memory read is slower than tsc reads, but is more dependable as it
- * always runs at constant frequency and reduces complexity due to
- * cpufreq. So, we prefer HPET timer to tsc based one. Also, we cannot use
- * timer_pit when HPET is active. So, we default to timer_tsc.
- */
-#endif
-/* list of timers, ordered by preference, NULL terminated */
-static struct init_timer_opts* __initdata timers[] = {
-#ifdef CONFIG_X86_CYCLONE_TIMER
- &timer_cyclone_init,
-#endif
-#ifdef CONFIG_HPET_TIMER
- &timer_hpet_init,
-#endif
-#ifdef CONFIG_X86_PM_TIMER
- &timer_pmtmr_init,
-#endif
- &timer_tsc_init,
- &timer_pit_init,
- NULL,
-};
-
-static char clock_override[10] __initdata;
-
-static int __init clock_setup(char* str)
-{
- if (str)
- strlcpy(clock_override, str, sizeof(clock_override));
- return 1;
-}
-__setup("clock=", clock_setup);
-
-
-/* The chosen timesource has been found to be bad.
- * Fall back to a known good timesource (the PIT)
- */
-void clock_fallback(void)
-{
- cur_timer = &timer_pit;
-}
-
-/* iterates through the list of timers, returning the first
- * one that initializes successfully.
- */
-struct timer_opts* __init select_timer(void)
-{
- int i = 0;
-
- /* find most preferred working timer */
- while (timers[i]) {
- if (timers[i]->init)
- if (timers[i]->init(clock_override) == 0)
- return timers[i]->opts;
- ++i;
- }
-
- panic("select_timer: Cannot find a suitable timer\n");
- return NULL;
-}
-
-int read_current_timer(unsigned long *timer_val)
-{
- if (cur_timer->read_timer) {
- *timer_val = cur_timer->read_timer();
- return 0;
- }
- return -1;
-}
Index: linux.prev/arch/i386/kernel/timers/timer_cyclone.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/timers/timer_cyclone.c
+++ /dev/null
@@ -1,259 +0,0 @@
-/* Cyclone-timer:
- * This code implements timer_ops for the cyclone counter found
- * on IBM x440, x360, and other Summit based systems.
- *
- * Copyright (C) 2002 IBM, John Stultz (johnstul@us.ibm.com)
- */
-
-
-#include
-#include
-#include
-#include
-#include
-#include
-
-#include
-#include
-#include
-#include
-#include
-
-#include "io_ports.h"
-
-/* Number of usecs that the last interrupt was delayed */
-static int delay_at_last_interrupt;
-
-#define CYCLONE_CBAR_ADDR 0xFEB00CD0
-#define CYCLONE_PMCC_OFFSET 0x51A0
-#define CYCLONE_MPMC_OFFSET 0x51D0
-#define CYCLONE_MPCS_OFFSET 0x51A8
-#define CYCLONE_TIMER_FREQ 100000000
-#define CYCLONE_TIMER_MASK (((u64)1<<40)-1) /* 40 bit mask */
-int use_cyclone = 0;
-
-static u32* volatile cyclone_timer; /* Cyclone MPMC0 register */
-static u32 last_cyclone_low;
-static u32 last_cyclone_high;
-static unsigned long long monotonic_base;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-/* helper macro to atomically read both cyclone counter registers */
-#define read_cyclone_counter(low,high) \
- do{ \
- high = cyclone_timer[1]; low = cyclone_timer[0]; \
- } while (high != cyclone_timer[1]);
-
-
-static void mark_offset_cyclone(void)
-{
- unsigned long lost, delay;
- unsigned long delta = last_cyclone_low;
- int count;
- unsigned long long this_offset, last_offset;
-
- write_seqlock(&monotonic_lock);
- last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
-
- spin_lock(&i8253_lock);
- read_cyclone_counter(last_cyclone_low,last_cyclone_high);
-
- /* read values for delay_at_last_interrupt */
- outb_p(0x00, 0x43); /* latch the count ASAP */
-
- count = inb_p(0x40); /* read the latched count */
- count |= inb(0x40) << 8;
-
- /*
- * VIA686a test code... reset the latch if count > max + 1
- * from timer_pit.c - cjb
- */
- if (count > LATCH) {
- outb_p(0x34, PIT_MODE);
- outb_p(LATCH & 0xff, PIT_CH0);
- outb(LATCH >> 8, PIT_CH0);
- count = LATCH - 1;
- }
- spin_unlock(&i8253_lock);
-
- /* lost tick compensation */
- delta = last_cyclone_low - delta;
- delta /= (CYCLONE_TIMER_FREQ/1000000);
- delta += delay_at_last_interrupt;
- lost = delta/(1000000/HZ);
- delay = delta%(1000000/HZ);
- if (lost >= 2)
- jiffies_64 += lost-1;
-
- /* update the monotonic base value */
- this_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
- monotonic_base += (this_offset - last_offset) & CYCLONE_TIMER_MASK;
- write_sequnlock(&monotonic_lock);
-
- /* calculate delay_at_last_interrupt */
- count = ((LATCH-1) - count) * TICK_SIZE;
- delay_at_last_interrupt = (count + LATCH/2) / LATCH;
-
-
- /* catch corner case where tick rollover occured
- * between cyclone and pit reads (as noted when
- * usec delta is > 90% # of usecs/tick)
- */
- if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
- jiffies_64++;
-}
-
-static unsigned long get_offset_cyclone(void)
-{
- u32 offset;
-
- if(!cyclone_timer)
- return delay_at_last_interrupt;
-
- /* Read the cyclone timer */
- offset = cyclone_timer[0];
-
- /* .. relative to previous jiffy */
- offset = offset - last_cyclone_low;
-
- /* convert cyclone ticks to microseconds */
- /* XXX slow, can we speed this up? */
- offset = offset/(CYCLONE_TIMER_FREQ/1000000);
-
- /* our adjusted time offset in microseconds */
- return delay_at_last_interrupt + offset;
-}
-
-static unsigned long long monotonic_clock_cyclone(void)
-{
- u32 now_low, now_high;
- unsigned long long last_offset, this_offset, base;
- unsigned long long ret;
- unsigned seq;
-
- /* atomically read monotonic base & last_offset */
- do {
- seq = read_seqbegin(&monotonic_lock);
- last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
- base = monotonic_base;
- } while (read_seqretry(&monotonic_lock, seq));
-
-
- /* Read the cyclone counter */
- read_cyclone_counter(now_low,now_high);
- this_offset = ((unsigned long long)now_high<<32)|now_low;
-
- /* convert to nanoseconds */
- ret = base + ((this_offset - last_offset)&CYCLONE_TIMER_MASK);
- return ret * (1000000000 / CYCLONE_TIMER_FREQ);
-}
-
-static int __init init_cyclone(char* override)
-{
- u32* reg;
- u32 base; /* saved cyclone base address */
- u32 pageaddr; /* page that contains cyclone_timer register */
- u32 offset; /* offset from pageaddr to cyclone_timer register */
- int i;
-
- /* check clock override */
- if (override[0] && strncmp(override,"cyclone",7))
- return -ENODEV;
-
- /*make sure we're on a summit box*/
- if(!use_cyclone) return -ENODEV;
-
- printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n");
-
- /* find base address */
- pageaddr = (CYCLONE_CBAR_ADDR)&PAGE_MASK;
- offset = (CYCLONE_CBAR_ADDR)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!reg){
- printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n");
- return -ENODEV;
- }
- base = *reg;
- if(!base){
- printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n");
- return -ENODEV;
- }
-
- /* setup PMCC */
- pageaddr = (base + CYCLONE_PMCC_OFFSET)&PAGE_MASK;
- offset = (base + CYCLONE_PMCC_OFFSET)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!reg){
- printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n");
- return -ENODEV;
- }
- reg[0] = 0x00000001;
-
- /* setup MPCS */
- pageaddr = (base + CYCLONE_MPCS_OFFSET)&PAGE_MASK;
- offset = (base + CYCLONE_MPCS_OFFSET)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!reg){
- printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n");
- return -ENODEV;
- }
- reg[0] = 0x00000001;
-
- /* map in cyclone_timer */
- pageaddr = (base + CYCLONE_MPMC_OFFSET)&PAGE_MASK;
- offset = (base + CYCLONE_MPMC_OFFSET)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- cyclone_timer = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!cyclone_timer){
- printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n");
- return -ENODEV;
- }
-
- /*quick test to make sure its ticking*/
- for(i=0; i<3; i++){
- u32 old = cyclone_timer[0];
- int stall = 100;
- while(stall--) barrier();
- if(cyclone_timer[0] == old){
- printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n");
- cyclone_timer = 0;
- return -ENODEV;
- }
- }
-
- init_cpu_khz();
-
- /* Everything looks good! */
- return 0;
-}
-
-
-static void delay_cyclone(unsigned long loops)
-{
- unsigned long bclock, now;
- if(!cyclone_timer)
- return;
- bclock = cyclone_timer[0];
- do {
- rep_nop();
- now = cyclone_timer[0];
- } while ((now-bclock) < loops);
-}
-/************************************************************/
-
-/* cyclone timer_opts struct */
-static struct timer_opts timer_cyclone = {
- .name = "cyclone",
- .mark_offset = mark_offset_cyclone,
- .get_offset = get_offset_cyclone,
- .monotonic_clock = monotonic_clock_cyclone,
- .delay = delay_cyclone,
-};
-
-struct init_timer_opts __initdata timer_cyclone_init = {
- .init = init_cyclone,
- .opts = &timer_cyclone,
-};
Index: linux.prev/arch/i386/kernel/timers/timer_hpet.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/timers/timer_hpet.c
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * This code largely moved from arch/i386/kernel/time.c.
- * See comments there for proper credits.
- */
-
-#include
-#include
-#include
-#include
-#include
-#include
-
-#include
-#include
-#include
-
-#include "io_ports.h"
-#include "mach_timer.h"
-#include
-
-static unsigned long hpet_usec_quotient __read_mostly; /* convert hpet clks to usec */
-static unsigned long tsc_hpet_quotient __read_mostly; /* convert tsc to hpet clks */
-static unsigned long hpet_last; /* hpet counter value at last tick*/
-static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
-static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
-static unsigned long long monotonic_base;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-/* convert from cycles(64bits) => nanoseconds (64bits)
- * basic equation:
- * ns = cycles / (freq / ns_per_sec)
- * ns = cycles * (ns_per_sec / freq)
- * ns = cycles * (10^9 / (cpu_khz * 10^3))
- * ns = cycles * (10^6 / cpu_khz)
- *
- * Then we use scaling math (suggested by george@mvista.com) to get:
- * ns = cycles * (10^6 * SC / cpu_khz) / SC
- * ns = cycles * cyc2ns_scale / SC
- *
- * And since SC is a constant power of two, we can convert the div
- * into a shift.
- *
- * We can use khz divisor instead of mhz to keep a better percision, since
- * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
- * (mathieu.desnoyers@polymtl.ca)
- *
- * -johnstul@us.ibm.com "math is hard, lets go shopping!"
- */
-static unsigned long cyc2ns_scale;
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-
-static inline void set_cyc2ns_scale(unsigned long cpu_khz)
-{
- cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
-}
-
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
- return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
-}
-
-static unsigned long long monotonic_clock_hpet(void)
-{
- unsigned long long last_offset, this_offset, base;
- unsigned seq;
-
- /* atomically read monotonic base & last_offset */
- do {
- seq = read_seqbegin(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- base = monotonic_base;
- } while (read_seqretry(&monotonic_lock, seq));
-
- /* Read the Time Stamp Counter */
- rdtscll(this_offset);
-
- /* return the value in ns */
- return base + cycles_2_ns(this_offset - last_offset);
-}
-
-static unsigned long get_offset_hpet(void)
-{
- register unsigned long eax, edx;
-
- eax = hpet_readl(HPET_COUNTER);
- eax -= hpet_last; /* hpet delta */
- eax = min(hpet_tick, eax);
- /*
- * Time offset = (hpet delta) * ( usecs per HPET clock )
- * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
- * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
- *
- * Where,
- * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
- *
- * Using a mull instead of a divl saves some cycles in critical path.
- */
- ASM_MUL64_REG(eax, edx, hpet_usec_quotient, eax);
-
- /* our adjusted time offset in microseconds */
- return edx;
-}
-
-static void mark_offset_hpet(void)
-{
- unsigned long long this_offset, last_offset;
- unsigned long offset;
-
- write_seqlock(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- rdtsc(last_tsc_low, last_tsc_high);
-
- if (hpet_use_timer)
- offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
- else
- offset = hpet_readl(HPET_COUNTER);
- if (unlikely(((offset - hpet_last) >= (2*hpet_tick)) && (hpet_last != 0))) {
- int lost_ticks = ((offset - hpet_last) / hpet_tick) - 1;
- jiffies_64 += lost_ticks;
- }
- hpet_last = offset;
-
- /* update the monotonic base value */
- this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- monotonic_base += cycles_2_ns(this_offset - last_offset);
- write_sequnlock(&monotonic_lock);
-}
-
-static void delay_hpet(unsigned long loops)
-{
- unsigned long hpet_start, hpet_end;
- unsigned long eax;
-
- /* loops is the number of cpu cycles. Convert it to hpet clocks */
- ASM_MUL64_REG(eax, loops, tsc_hpet_quotient, loops);
-
- hpet_start = hpet_readl(HPET_COUNTER);
- do {
- rep_nop();
- hpet_end = hpet_readl(HPET_COUNTER);
- } while ((hpet_end - hpet_start) < (loops));
-}
-
-static struct timer_opts timer_hpet;
-
-static int __init init_hpet(char* override)
-{
- unsigned long result, remain;
-
- /* check clock override */
- if (override[0] && strncmp(override,"hpet",4))
- return -ENODEV;
-
- if (!is_hpet_enabled())
- return -ENODEV;
-
- printk("Using HPET for gettimeofday\n");
- if (cpu_has_tsc) {
- unsigned long tsc_quotient = calibrate_tsc_hpet(&tsc_hpet_quotient);
- if (tsc_quotient) {
- /* report CPU clock rate in Hz.
- * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
- * clock/second. Our precision is about 100 ppm.
- */
- { unsigned long eax=0, edx=1000;
- ASM_DIV64_REG(cpu_khz, edx, tsc_quotient,
- eax, edx);
- printk("Detected %u.%03u MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
- }
- set_cyc2ns_scale(cpu_khz);
- }
- /* set this only when cpu_has_tsc */
- timer_hpet.read_timer = read_timer_tsc;
- }
-
- /*
- * Math to calculate hpet to usec multiplier
- * Look for the comments at get_offset_hpet()
- */
- ASM_DIV64_REG(result, remain, hpet_tick, 0, KERNEL_TICK_USEC);
- if (remain > (hpet_tick >> 1))
- result++; /* rounding the result */
- hpet_usec_quotient = result;
-
- return 0;
-}
-
-static int hpet_resume(void)
-{
- write_seqlock(&monotonic_lock);
- /* Assume this is the last mark offset time */
- rdtsc(last_tsc_low, last_tsc_high);
-
- if (hpet_use_timer)
- hpet_last = hpet_readl(HPET_T0_CMP) - hpet_tick;
- else
- hpet_last = hpet_readl(HPET_COUNTER);
- write_sequnlock(&monotonic_lock);
- return 0;
-}
-/************************************************************/
-
-/* tsc timer_opts struct */
-static struct timer_opts timer_hpet __read_mostly = {
- .name = "hpet",
- .mark_offset = mark_offset_hpet,
- .get_offset = get_offset_hpet,
- .monotonic_clock = monotonic_clock_hpet,
- .delay = delay_hpet,
- .resume = hpet_resume,
-};
-
-struct init_timer_opts __initdata timer_hpet_init = {
- .init = init_hpet,
- .opts = &timer_hpet,
-};
Index: linux.prev/arch/i386/kernel/timers/timer_none.c
===================================================================
--- linux.prev.orig/arch/i386/kernel/timers/timer_none.c
+++ /dev/null
@@ -1,39 +0,0 @@
-#include