Index: linux-2.6.16/Documentation/DocBook/Makefile
===================================================================
--- linux-2.6.16.orig/Documentation/DocBook/Makefile 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/Documentation/DocBook/Makefile 2006-10-19 16:52:29.000000000 +0200
@@ -10,7 +10,8 @@ DOCBOOKS := wanbook.xml z8530book.xml mc
kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
procfs-guide.xml writing_usb_driver.xml \
sis900.xml kernel-api.xml journal-api.xml lsm.xml usb.xml \
- gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml
+ gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
+ genericirq.xml
###
# The build process is as follows (targets):
Index: linux-2.6.16/Documentation/DocBook/genericirq.tmpl
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.16/Documentation/DocBook/genericirq.tmpl 2006-10-19 16:52:29.000000000 +0200
@@ -0,0 +1,560 @@
+
+
+
+
+
+ Linux generic IRQ handling
+
+
+
+ Thomas
+ Gleixner
+
+
+ tglx@linutronix.de
+
+
+
+
+ Ingo
+ Molnar
+
+
+ mingo@elte.hu
+
+
+
+
+
+
+ 2005
+ Thomas Gleixner
+
+
+ 2005
+ Ingo Molnar
+
+
+
+
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License version 2 as published by the Free Software Foundation.
+
+
+
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+
+
+
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+
+
+
+ For more details see the file COPYING in the source
+ distribution of Linux.
+
+
+
+
+
+
+
+ Introduction
+
+ The generic interrupt handling layer is designed to provide a
+ complete abstraction of interrupt handling for device drivers
+ and is able to handle all different types of interrupt controller
+ hardware. Device drivers use generic API function to request, enable,
+ disable and free interrupts. The drivers do not have to know anything
+ about interrupt hardware, so they can be used on different hardware
+ platforms without code changes.
+
+
+ This documentation is provided for developers who want to implement
+ architecture interrupt support based on the Generic IRQ handling layer.
+
+
+
+
+ Rationale
+
+ The original implementation of interrupt handling in Linux is using
+ the __do_IRQ() super-handler, which must be able to deal with every
+ type of interrupt logic. This is achieved by an 'interrupt type'
+ structure and runtime flags to handle special cases.
+ Furthermore the superhandler assumed a certain type of interrupt
+ handling hardware and turned out to be not capable of handling all
+ kind of interrupt controller hardware which can be found through
+ the architectures. The all in one approach also adds unnecessary
+ complexity for every user.
+
+
+ Originally, Russell King identified different types of handlers to
+ build a quite universal set for the ARM interrupt handler
+ implementation in Linux 2.5/2.6. He distiguished between:
+
+ Level type
+ Edge type
+ Simple type
+
+ In the SMP world of the __do_IRQ() super-handler another type
+ was identified:
+
+ Per CPU type
+
+
+
+ This split implementation of handlers allows to optimize the flow
+ of the interrupt handling for each specific interrupt type.
+ This reduces complexitiy in that particular code path and allows
+ the optimized handling of a given type.
+
+
+ The original general implementation uses interrupt_type structures
+ to differentiate the flow control in the super-handler. This
+ leads to a mix of flow logic and code related to hardware details.
+ Russell Kings ARM implementation which replaced the type by a chip
+ abstraction did the mix the other way around.
+
+
+ The natural conclusion was a clean seperation of the 'type flow'
+ and the 'chip'. Analysing a couple of architecture implementations
+ reveals that many of them can use a generic set of 'type flow'
+ implementations and only need to add the chip level specific code.
+ The seperation is also valuable for the (sub)architectures,
+ which need specific quirks in the type flow itself, because it
+ provides a more transparent design.
+
+
+ Each interrupt type implementation has assigned its own flow
+ handler, which should be normally one of the generic
+ implementations. The flow handler implementation makes it
+ simple to provide demultiplexing handlers which can be found in
+ embedded platforms on various architectures.
+
+
+ The seperation makes the generic interrupt handling more flexible
+ and extensible. An (sub)architecture can use a generic type flow
+ implementation for e.g. 'level type' interrupts and add a
+ (sub)architecture specific 'edge type' implementation.
+
+
+ To make the transition to the new model easier and prevent the
+ breakage of existing implementations the __do_IRQ() super-handler
+ is still available. This leads to a kind of duality for the time
+ being. Over time the new model should achieve a homogeneous
+ implementation scheme over all architectures with enhanced
+ maintainability and cleanliness.
+
+
+
+ Known Bugs And Assumptions
+
+ None (hopefully).
+
+
+
+
+ Abstraction layers
+
+ There are three main levels of abstraction in the interrupt code:
+
+ Highlevel driver API
+ Abstract interrupt type
+ Chiplevel hardware encapsulation
+
+
+
+ The seperation of interrupt type and chip level functionality
+ provides the most flexible design. This implementation can handle
+ all kinds of interrupt hardware and the necessary workarounds for
+ the interrupt types without the need of redundant implementations.
+ The seperation handles also edge and level type interrupts
+ on the same hardware chip.
+
+
+ Interrupt control flow
+
+ Each interrupt is described by an interrupt description structure
+ irq_desc. The interrupt is referenced by an 'unsigned int' numeric
+ value which selects the corresponding interrupt decription structure
+ in the description structures array.
+ The description structure contains status information and pointers
+ to the interrupt type structure and the interrupt chip structure
+ which are assigned to this interrupt.
+
+
+ Whenever an interrupt triggers, the lowlevel arch code calls into
+ the generic interrupt code by calling desc->handler->handle_irq().
+ This highlevel IRQ handling function only uses other
+ desc->handler primitives which describe the control flow operation
+ necessary for the interrupt type. These operations are calling
+ the chip primitives referenced by the assigned chip description
+ structure.
+
+
+
+ Highlevel Driver API
+
+ The highlevel Driver API consists of following functions:
+
+ request_irq()
+ free_irq()
+ disable_irq()
+ enable_irq()
+ disable_irq_nosync() (SMP only)
+ synchronize_irq() (SMP only)
+ set_irq_type()
+ set_irq_wake()
+ set_irq_data()
+ set_irq_chip()
+ set_irq_chip_data()
+
+ See the autogenerated function documentation for details.
+
+
+
+ Abstract interrupt type
+
+ The 'interrupt type' (struct irq_type) abstraction mainly consists of
+ methods which implement the 'interrupt handling flow'. The generic
+ layer provides a set of pre-defined types:
+
+ default_level_type
+ default_edge_type
+ default_simple_type
+ default_percpu_type
+
+ The default type implementations use the generic type handlers.
+
+ handle_level_type
+ handle_edge_type
+ handle_simple_type
+ handle_percpu_type
+
+ The interrupt types (either predefined or architecture specific) are
+ assigned to specific interrupts by the architecture either during
+ bootup or during device initialization.
+
+
+ Default type implementations
+
+ Helper functions
+
+ The helper functions call the chip primitives and
+ are used by the default type implementations.
+ Following helper functions are implemented (simplified excerpt):
+
+default_enable(irq)
+{
+ desc->chip->unmask(irq);
+}
+
+default_disable(irq)
+{
+ desc->chip->mask(irq);
+}
+
+default_ack(irq)
+{
+ chip->ack(irq);
+}
+
+default_mask_ack(irq)
+{
+ if (chip->mask_ack) {
+ chip->mask_ack(irq);
+ } else {
+ chip->mask(irq);
+ chip->ack(irq);
+ }
+}
+
+noop(irq)
+{
+}
+
+default_set_type(irq, type)
+{
+ if (desc->chip->set_type) {
+ if (desc->chip->set_type(irq, type))
+ return NULL;
+ }
+
+ return default_handler for type;
+}
+
+
+
+
+ Default Level IRQ type
+
+ The default Level IRQ type implements the functions
+
+ enabledefault_enable
+ disabledefault_disable
+ startdefault_mask_ack
+ enddefault_enable
+ handle_irqhandle_level_irq
+ set_typedefault_set_type
+
+
+
+
+ Default Edge IRQ type
+
+ The default Edge IRQ type implements the functions
+
+ enabledefault_enable
+ disabledefault_disable
+ startdefault_ack
+ holddefault_mask_ack
+ endnoop
+ handle_irqhandle_edge_irq
+ set_typedefault_set_type
+
+
+
+
+ Default simple IRQ type
+
+ The default simple IRQ type implements the functions
+
+ enablenoop
+ disablenoop
+ handle_irqhandle_simple_irq
+
+
+
+
+ Default per CPU IRQ type
+
+ The default per CPU IRQ type implements the functions
+
+ enabledefault_enable
+ disabledefault_disable
+ startdefault_ack
+ enddefault_enable
+ handle_irqhandle_percpu_irq
+
+
+
+
+
+ Default type handler implementations
+
+ Default Level IRQ type handler
+
+ handle_level_type provides a generic implementation
+ for level type interrupts.
+
+
+ Following control flow is implemented (simplified excerpt):
+
+desc->handler->start();
+handle_IRQ_event(desc->action);
+desc->handler->end();
+
+
+
+
+ Default Edge IRQ type handler
+
+ handle_edge_type provides a generic implementation
+ for edge type interrupts.
+
+
+ Following control flow is implemented (simplified excerpt):
+
+if (desc->status & running) {
+ desc->handler->hold();
+ desc->status |= pending | masked;
+ return;
+}
+desc->handler->start();
+desc->status |= running;
+do {
+ if (desc->status & masked)
+ desc->handler->enable();
+ desc-status &= ~pending;
+ handle_IRQ_event(desc->action);
+} while (status & pending);
+desc-status &= ~running;
+desc->handler->end();
+
+
+
+
+ Default simple IRQ type handler
+
+ handle_simple_type provides a generic implementation
+ for simple type interrupts.
+
+
+ Note: The simple type handler does not call any
+ handler/chip primitives.
+
+
+ Following control flow is implemented (simplified excerpt):
+
+handle_IRQ_event(desc->action);
+
+
+
+
+ Default per CPU type handler
+
+ handle_percpu_type provides a generic implementation
+ for per CPU type interrupts.
+
+
+ Per CPU interrupts are only available on SMP and
+ the handler provides a simplified version without
+ locking.
+
+
+ Following control flow is implemented (simplified excerpt):
+
+desc->handler->start();
+handle_IRQ_event(desc->action);
+desc->handler->end();
+
+
+
+
+
+ Architecture specific type implementation
+
+ If an architecture needs to implement its own type structures, then
+ the following primitives have to be implemented:
+
+ handle_irq() - The handle_irq function pointer should preferably point to
+ one of the generic type handler functions
+ startup() - Optional
+ shutdown() - Optional
+ enable()
+ disable()
+ start()
+ hold() - For edge type interupts only
+ end()
+ set_type - Optional
+ set_affinity - SMP only
+
+
+
+
+ Quirks and optimizations
+
+ The generic functions are intended for 'clean' architectures and chips,
+ which have no platform-specific IRQ handling quirks. If an architecture
+ needs to implement quirks on the 'flow' level then it can do so by
+ overriding the irqtype. This is also done for compatibility reasons, as
+ most architectures use irqtypes only at the moment.
+
+
+ An architecture could implement all of its IRQ logic via pushing
+ chip handling details into the irqtype's ->start()/->end()/->hold()
+ functions. This is only recommended when the underlying primitives
+ are pure chip primitives without additional quirks. The direct pointer
+ to the chip functions reduces the indirection level by one.
+
+
+
+
+ Chiplevel hardware encapsulation
+
+ The chip level hardware description structure irq_chip
+ contains all the direct chip relevant functions, which
+ can be utilized by the irq_type implementations.
+
+ ack()
+ mask_ack() - Optional, recommended for performance
+ mask()
+ unmask()
+ retrigger() - Optional
+ set_type() - Optional
+ set_wake() - Optional
+
+ These primitives are strictly intended to mean what they say: ack means
+ ACK, masking means masking of an IRQ line, etc. It is up to the flow
+ handler(s) to use these basic units of lowlevel functionality.
+
+
+
+
+
+ __do_IRQ entry point
+
+ The original implementation __do_IRQ() is an alternative entry
+ point for all types of interrupts.
+
+
+ This handler turned out to be not suitable for all
+ interrupt hardware and was therefor reimplemented with split
+ functionality for egde/level/simple/percpu interrupts. This is not
+ only a functional optimization. It also shortenes code pathes for
+ interrupts.
+
+
+ To make use of the split implementation, replace the call to
+ __do_IRQ by a call to desc->handler->handle_irq() and associate
+ the appropriate handler function to desc->handler->handle_irq().
+ In most cases the generic type and handler implementations should
+ be sufficient.
+
+
+
+
+ Locking on SMP
+
+ The locking of chip registers is up to the architecture that
+ defines the chip primitives. There is a chip->lock field that can be used
+ for serialization, but the generic layer does not touch it. The per-irq
+ structure is protected via desc->lock, by the generic layer.
+
+
+
+ Structures
+
+ This chapter contains the autogenerated documentation of the structures which are
+ used in the generic IRQ layer.
+
+!Iinclude/linux/irq.h
+
+
+
+ Public Functions Provided
+
+ This chapter contains the autogenerated documentation of the kernel API functions
+ which are exported.
+
+!Ekernel/irq/manage.c
+
+
+
+ Internal Functions Provided
+
+ This chapter contains the autogenerated documentation of the internal functions.
+
+!Ikernel/irq/handle.c
+
+
+
+ Credits
+
+ The following people have contributed to this document:
+
+ Thomas Gleixnertglx@linutronix.de
+ Ingo Molnarmingo@elte.hu
+
+
+
+
Index: linux-2.6.16/Documentation/RCU/proc.txt
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.16/Documentation/RCU/proc.txt 2006-10-19 16:52:29.000000000 +0200
@@ -0,0 +1,207 @@
+/proc Filesystem Entries for RCU
+
+
+CONFIG_RCU_STATS
+
+The CONFIG_RCU_STATS config option is available only in conjunction with
+CONFIG_PREEMPT_RCU. It makes four /proc entries available, namely: rcuctrs,
+rcuptrs, rcugp, and rcustats.
+
+/proc/rcuctrs
+
+ CPU last cur
+ 0 1 1
+ 1 1 1
+ 2 1 1
+ 3 0 2
+ ggp = 230725
+
+This displays the number of processes that started RCU read-side critical
+sections on each CPU. In absence of preemption, the "last" and "cur"
+counts for a given CPU will always sum to one. Therefore, in the example
+output above, each CPU has started one RCU read-side critical section
+that was later preempted. The "last" column counts RCU read-side critical
+sections that started prior to the last counter flip, while the "cur"
+column counts critical sections that started after the last counter flip.
+
+The "ggp" count is a count of the number of counter flips since boot.
+Since this is shown as an odd number, the "cur" counts are stored in
+the zero-th element of each of the per-CPU arrays, and the "last" counts
+are stored in the first element of each of the per-CPU arrays.
+
+
+/proc/rcuptrs
+
+ nl=c04c7160/c04c7960 nt=c04c72d0
+ wl=c04c7168/c04c794c wt=c04c72bc dl=c04c7170/00000000 dt=c04c7170
+
+This displays the head and tail of each of CONFIG_PREEMPT_RCU's three
+callback lists. This will soon change to display this on a per-CPU
+basis, since each CPU will soon have its own set of callback lists.
+In the example above, the "next" list header is located at hex address
+0xc04c7160, the first element on the list at hex address 0xc04c7960,
+and the last element on the list at hex address 0xc04c72d0. The "wl="
+and "wt=" output is similar for the "wait" list, and the "dl=" and "dt="
+output for the "done" list. The "done" list is normally emptied very
+quickly after being filled, so will usually be empty as shown above.
+Note that the tail pointer points into the list header in this case.
+
+Callbacks are placed in the "next" list by call_rcu(), moved to the
+"wait" list after the next counter flip, and moved to the "done" list
+on the counter flip after that. Once on the "done" list, the callbacks
+are invoked.
+
+
+/proc/rcugp
+
+ oldggp=241419 newggp=241421
+
+This entry invokes synchronize_rcu() and prints out the number of counter
+flips since boot before and after the synchronize_rcu(). These two
+numbers will always differ by at least two. Unless RCU is broken. ;-)
+
+
+/proc/rcustats
+
+ ggp=242416 lgp=242416 sr=0 rcc=396233
+ na=2090938 nl=9 wa=2090929 wl=9 dl=0 dr=2090920 di=2090920
+ rtf1=22230730 rtf2=20139162 rtf3=242416 rtfe1=2085911 rtfe2=5657 rtfe3=19896746
+
+The quantities printed are as follows:
+
+o "ggp=": The number of flips since boot.
+
+o "lgp=": The number of flips sensed by the local structure since
+ boot. This will soon be per-CPU.
+
+o "sr=": The number of explicit call to synchronize_rcu().
+ Except that this is currently broken, so always reads as zero.
+ It is likely to be removed...
+
+o "rcc=": The number of calls to rcu_check_callbacks().
+
+o "na=": The number of callbacks that call_rcu() has registered
+ since boot.
+
+o "nl=": The number of callbacks currently on the "next" list.
+
+o "wa=": The number of callbacks that have moved to the "wait"
+ list since boot.
+
+o "wl=": The number of callbacks currently on the "wait" list.
+
+o "da=": The number of callbacks that have been moved to the
+ "done" list since boot.
+
+o "dl=": The number of callbacks currently on the "done" list.
+
+o "dr=": The number of callbacks that have been removed from the
+ "done" list since boot.
+
+o "di=": The number of callbacks that have been invoked after being
+ removed from the "done" list.
+
+o "rtf1=": The number of attempts to flip the counters.
+
+o "rtf2=": The number of attempts to flip the counters that successfully
+ acquired the fliplock.
+
+o "rtf3=": The number of successful counter flips.
+
+o "rtfe1=": The number of attempts to flip the counters that failed
+ due to the lock being held by someone else.
+
+o "rtfe2=": The number of attempts to flip the counters that were
+ abandoned due to someone else doing the job for us.
+
+o "rtfe3=": The number of attempts to flip the counters that failed
+ due to some task still being in an RCU read-side critical section
+ starting from before the last successful counter flip.
+
+
+CONFIG_RCU_TORTURE_TEST
+
+The CONFIG_RCU_TORTURE_TEST config option is available for all RCU
+implementations. It makes three /proc entries available, namely: rcutw,
+rcutr, and rcuts.
+
+
+/proc/rcutw
+
+Reading this entry starts a new torture test, or ends an earlier one
+if one is already in progress (in other words, there can be only one
+writer at a time). This sleeps uninterruptibly, so be sure to run
+it in the background. One could argue that it would be good to have
+multiple writers, but Linux uses RCU heavily enough that you will get
+write-side contention whether you want it or not. If you want additional
+write-side contention, repeatedly create and destroy several large file
+trees in parallel. Or use some other RCU-protected update.
+
+
+/proc/rcutr
+
+Reading this entry starts a new torture reader, which runs until sent
+a signal (e.g., control-C). If testing an RCU implementation with
+preemptible read-side critical sections, make sure to spawn at least
+two /proc/rcutr instances for each CPU.
+
+
+/proc/rcuts
+
+Displays the current state of the torture test:
+
+ ggp = 20961
+ rtc: c04496f4 ver: 8734 tfle: 0 rta: 8734 rtaf: 0 rtf: 8715
+ Reader Pipe: 88024120 63914 0 0 0 0 0 0 0 0 0
+ Reader Batch: 88024097 63937 0 0 0 0 0 0 0 0
+ Free-Block Circulation: 8733 8731 8729 8727 8725 8723 8721 8719 8717 8715 0
+
+The entries are as follows:
+
+o "ggp": The number of counter flips (or batches) since boot.
+
+o "rtc": The hexadecimal address of the structure currently visible
+ to readers.
+
+o "ver": The number of times since boot that the rcutw writer task
+ has changed the structure visible to readers.
+
+o "tfle": If non-zero, indicates that the "torture freelist"
+ containing structure to be placed into the "rtc" area is empty.
+ This condition is important, since it can fool you into thinking
+ that RCU is working when it is not. :-/
+
+o "rta": Number of structures allocated from the torture freelist.
+
+o "rtaf": Number of allocations from the torture freelist that have
+ failed due to the list being empty.
+
+o "rtf": Number of frees into the torture freelist.
+
+o "Reader Pipe": Histogram of "ages" of structures seen by readers.
+ If any entries past the first two are non-zero, RCU is broken.
+ And /proc/rcuts prints "!!!" to make sure you notice. The age
+ of a newly allocated structure is zero, it becomes one when
+ removed from reader visibility, and is incremented once per
+ grace period subsequently -- and is freed after passing through
+ (RCU_TORTURE_PIPE_LEN-2) grace periods.
+
+ The output displayed above was taken from a correctly working
+ RCU. If you want to see what it looks like when broken, break
+ it yourself. ;-)
+
+o "Reader Batch": Another histogram of "ages" of structures seen
+ by readers, but in terms of counter flips (or batches) rather
+ than in terms of grace periods. The legal number of non-zero
+ entries is again two. The reason for this separate view is
+ that it is easier to get the third entry to show up in the
+ "Reader Batch" list than in the "Reader Pipe" list.
+
+o "Free-Block Circulation": Shows the number of torture structures
+ that have reached a given point in the pipeline. The first element
+ should closely correspond to the number of structures allocated,
+ the second to the number that have been removed from reader view,
+ and all but the last remaining to the corresponding number of
+ passes through a grace period. The last entry should be zero,
+ as it is only incremented if a torture structure's counter
+ somehow gets incremented farther than it should.
Index: linux-2.6.16/Documentation/RCU/whatisRCU.txt
===================================================================
--- linux-2.6.16.orig/Documentation/RCU/whatisRCU.txt 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/Documentation/RCU/whatisRCU.txt 2006-10-19 16:52:29.000000000 +0200
@@ -790,7 +790,6 @@ RCU pointer update:
RCU grace period:
- synchronize_kernel (deprecated)
synchronize_net
synchronize_sched
synchronize_rcu
Index: linux-2.6.16/Documentation/feature-removal-schedule.txt
===================================================================
--- linux-2.6.16.orig/Documentation/feature-removal-schedule.txt 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/Documentation/feature-removal-schedule.txt 2006-10-19 16:52:29.000000000 +0200
@@ -32,21 +32,6 @@ Who: Adrian Bunk
---------------------------
-What: RCU API moves to EXPORT_SYMBOL_GPL
-When: April 2006
-Files: include/linux/rcupdate.h, kernel/rcupdate.c
-Why: Outside of Linux, the only implementations of anything even
- vaguely resembling RCU that I am aware of are in DYNIX/ptx,
- VM/XA, Tornado, and K42. I do not expect anyone to port binary
- drivers or kernel modules from any of these, since the first two
- are owned by IBM and the last two are open-source research OSes.
- So these will move to GPL after a grace period to allow
- people, who might be using implementations that I am not aware
- of, to adjust to this upcoming change.
-Who: Paul E. McKenney
-
----------------------------
-
What: raw1394: requests of type RAW1394_REQ_ISO_SEND, RAW1394_REQ_ISO_LISTEN
When: November 2005
Why: Deprecated in favour of the new ioctl-based rawiso interface, which is
Index: linux-2.6.16/Documentation/kernel-parameters.txt
===================================================================
--- linux-2.6.16.orig/Documentation/kernel-parameters.txt 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/Documentation/kernel-parameters.txt 2006-10-19 16:52:29.000000000 +0200
@@ -52,6 +52,7 @@ restrictions referred to are that the re
MTD MTD support is enabled.
NET Appropriate network support is enabled.
NUMA NUMA support is enabled.
+ GENERIC_TIME The generic timeofday code is enabled.
NFS Appropriate NFS support is enabled.
OSS OSS sound support is enabled.
PARIDE The ParIDE subsystem is enabled.
@@ -329,10 +330,11 @@ running once the system is up.
Value can be changed at runtime via
/selinux/checkreqprot.
- clock= [BUGS=IA-32,HW] gettimeofday timesource override.
- Forces specified timesource (if avaliable) to be used
- when calculating gettimeofday(). If specicified
- timesource is not avalible, it defaults to PIT.
+ clock= [BUGS=IA-32, HW] gettimeofday clocksource override.
+ [Deprecated]
+ Forces specified clocksource (if avaliable) to be used
+ when calculating gettimeofday(). If specified
+ clocksource is not avalible, it defaults to PIT.
Format: { pit | tsc | cyclone | pmtmr }
disable_8254_timer
@@ -1579,6 +1581,10 @@ running once the system is up.
time Show timing data prefixed to each printk message line
+ clocksource= [GENERIC_TIME] Override the default clocksource
+ Override the default clocksource and use the clocksource
+ with the name specified.
+
tipar.timeout= [HW,PPT]
Set communications timeout in tenths of a second
(default 15).
Index: linux-2.6.16/Documentation/pi-futex.txt
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.16/Documentation/pi-futex.txt 2006-10-19 16:52:29.000000000 +0200
@@ -0,0 +1,121 @@
+Lightweight PI-futexes
+----------------------
+
+We are calling them lightweight for 3 reasons:
+
+ - in the user-space fastpath a PI-enabled futex involves no kernel work
+ (or any other PI complexity) at all. No registration, no extra kernel
+ calls - just pure fast atomic ops in userspace.
+
+ - even in the slowpath, the system call and scheduling pattern is very
+ similar to normal futexes.
+
+ - the in-kernel PI implementation is streamlined around the mutex
+ abstraction, with strict rules that keep the implementation
+ relatively simple: only a single owner may own a lock (i.e. no
+ read-write lock support), only the owner may unlock a lock, no
+ recursive locking, etc.
+
+Priority Inheritance - why?
+---------------------------
+
+The short reply: user-space PI helps achieving/improving determinism for
+user-space applications. In the best-case, it can help achieve
+determinism and well-bound latencies. Even in the worst-case, PI will
+improve the statistical distribution of locking related application
+delays.
+
+The longer reply:
+-----------------
+
+Firstly, sharing locks between multiple tasks is a common programming
+technique that often cannot be replaced with lockless algorithms. As we
+can see it in the kernel [which is a quite complex program in itself],
+lockless structures are rather the exception than the norm - the current
+ratio of lockless vs. locky code for shared data structures is somewhere
+between 1:10 and 1:100. Lockless is hard, and the complexity of lockless
+algorithms often endangers to ability to do robust reviews of said code.
+I.e. critical RT apps often choose lock structures to protect critical
+data structures, instead of lockless algorithms. Furthermore, there are
+cases (like shared hardware, or other resource limits) where lockless
+access is mathematically impossible.
+
+Media players (such as Jack) are an example of reasonable application
+design with multiple tasks (with multiple priority levels) sharing
+short-held locks: for example, a highprio audio playback thread is
+combined with medium-prio construct-audio-data threads and low-prio
+display-colory-stuff threads. Add video and decoding to the mix and
+we've got even more priority levels.
+
+So once we accept that synchronization objects (locks) are an
+unavoidable fact of life, and once we accept that multi-task userspace
+apps have a very fair expectation of being able to use locks, we've got
+to think about how to offer the option of a deterministic locking
+implementation to user-space.
+
+Most of the technical counter-arguments against doing priority
+inheritance only apply to kernel-space locks. But user-space locks are
+different, there we cannot disable interrupts or make the task
+non-preemptible in a critical section, so the 'use spinlocks' argument
+does not apply (user-space spinlocks have the same priority inversion
+problems as other user-space locking constructs). Fact is, pretty much
+the only technique that currently enables good determinism for userspace
+locks (such as futex-based pthread mutexes) is priority inheritance:
+
+Currently (without PI), if a high-prio and a low-prio task shares a lock
+[this is a quite common scenario for most non-trivial RT applications],
+even if all critical sections are coded carefully to be deterministic
+(i.e. all critical sections are short in duration and only execute a
+limited number of instructions), the kernel cannot guarantee any
+deterministic execution of the high-prio task: any medium-priority task
+could preempt the low-prio task while it holds the shared lock and
+executes the critical section, and could delay it indefinitely.
+
+Implementation:
+---------------
+
+As mentioned before, the userspace fastpath of PI-enabled pthread
+mutexes involves no kernel work at all - they behave quite similarly to
+normal futex-based locks: a 0 value means unlocked, and a value==TID
+means locked. (This is the same method as used by list-based robust
+futexes.) Userspace uses atomic ops to lock/unlock these mutexes without
+entering the kernel.
+
+To handle the slowpath, we have added two new futex ops:
+
+ FUTEX_LOCK_PI
+ FUTEX_UNLOCK_PI
+
+If the lock-acquire fastpath fails, [i.e. an atomic transition from 0 to
+TID fails], then FUTEX_LOCK_PI is called. The kernel does all the
+remaining work: if there is no futex-queue attached to the futex address
+yet then the code looks up the task that owns the futex [it has put its
+own TID into the futex value], and attaches a 'PI state' structure to
+the futex-queue. The pi_state includes an rt-mutex, which is a PI-aware,
+kernel-based synchronization object. The 'other' task is made the owner
+of the rt-mutex, and the FUTEX_WAITERS bit is atomically set in the
+futex value. Then this task tries to lock the rt-mutex, on which it
+blocks. Once it returns, it has the mutex acquired, and it sets the
+futex value to its own TID and returns. Userspace has no other work to
+perform - it now owns the lock, and futex value contains
+FUTEX_WAITERS|TID.
+
+If the unlock side fastpath succeeds, [i.e. userspace manages to do a
+TID -> 0 atomic transition of the futex value], then no kernel work is
+triggered.
+
+If the unlock fastpath fails (because the FUTEX_WAITERS bit is set),
+then FUTEX_UNLOCK_PI is called, and the kernel unlocks the futex on the
+behalf of userspace - and it also unlocks the attached
+pi_state->rt_mutex and thus wakes up any potential waiters.
+
+Note that under this approach, contrary to previous PI-futex approaches,
+there is no prior 'registration' of a PI-futex. [which is not quite
+possible anyway, due to existing ABI properties of pthread mutexes.]
+
+Also, under this scheme, 'robustness' and 'PI' are two orthogonal
+properties of futexes, and all four combinations are possible: futex,
+robust-futex, PI-futex, robust+PI-futex.
+
+More details about priority inheritance can be found in
+Documentation/rtmutex.txt.
Index: linux-2.6.16/Documentation/robust-futex-ABI.txt
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.16/Documentation/robust-futex-ABI.txt 2006-10-19 16:52:29.000000000 +0200
@@ -0,0 +1,182 @@
+Started by Paul Jackson
+
+The robust futex ABI
+--------------------
+
+Robust_futexes provide a mechanism that is used in addition to normal
+futexes, for kernel assist of cleanup of held locks on task exit.
+
+The interesting data as to what futexes a thread is holding is kept on a
+linked list in user space, where it can be updated efficiently as locks
+are taken and dropped, without kernel intervention. The only additional
+kernel intervention required for robust_futexes above and beyond what is
+required for futexes is:
+
+ 1) a one time call, per thread, to tell the kernel where its list of
+ held robust_futexes begins, and
+ 2) internal kernel code at exit, to handle any listed locks held
+ by the exiting thread.
+
+The existing normal futexes already provide a "Fast Userspace Locking"
+mechanism, which handles uncontested locking without needing a system
+call, and handles contested locking by maintaining a list of waiting
+threads in the kernel. Options on the sys_futex(2) system call support
+waiting on a particular futex, and waking up the next waiter on a
+particular futex.
+
+For robust_futexes to work, the user code (typically in a library such
+as glibc linked with the application) has to manage and place the
+necessary list elements exactly as the kernel expects them. If it fails
+to do so, then improperly listed locks will not be cleaned up on exit,
+probably causing deadlock or other such failure of the other threads
+waiting on the same locks.
+
+A thread that anticipates possibly using robust_futexes should first
+issue the system call:
+
+ asmlinkage long
+ sys_set_robust_list(struct robust_list_head __user *head, size_t len);
+
+The pointer 'head' points to a structure in the threads address space
+consisting of three words. Each word is 32 bits on 32 bit arch's, or 64
+bits on 64 bit arch's, and local byte order. Each thread should have
+its own thread private 'head'.
+
+If a thread is running in 32 bit compatibility mode on a 64 native arch
+kernel, then it can actually have two such structures - one using 32 bit
+words for 32 bit compatibility mode, and one using 64 bit words for 64
+bit native mode. The kernel, if it is a 64 bit kernel supporting 32 bit
+compatibility mode, will attempt to process both lists on each task
+exit, if the corresponding sys_set_robust_list() call has been made to
+setup that list.
+
+ The first word in the memory structure at 'head' contains a
+ pointer to a single linked list of 'lock entries', one per lock,
+ as described below. If the list is empty, the pointer will point
+ to itself, 'head'. The last 'lock entry' points back to the 'head'.
+
+ The second word, called 'offset', specifies the offset from the
+ address of the associated 'lock entry', plus or minus, of what will
+ be called the 'lock word', from that 'lock entry'. The 'lock word'
+ is always a 32 bit word, unlike the other words above. The 'lock
+ word' holds 3 flag bits in the upper 3 bits, and the thread id (TID)
+ of the thread holding the lock in the bottom 29 bits. See further
+ below for a description of the flag bits.
+
+ The third word, called 'list_op_pending', contains transient copy of
+ the address of the 'lock entry', during list insertion and removal,
+ and is needed to correctly resolve races should a thread exit while
+ in the middle of a locking or unlocking operation.
+
+Each 'lock entry' on the single linked list starting at 'head' consists
+of just a single word, pointing to the next 'lock entry', or back to
+'head' if there are no more entries. In addition, nearby to each 'lock
+entry', at an offset from the 'lock entry' specified by the 'offset'
+word, is one 'lock word'.
+
+The 'lock word' is always 32 bits, and is intended to be the same 32 bit
+lock variable used by the futex mechanism, in conjunction with
+robust_futexes. The kernel will only be able to wakeup the next thread
+waiting for a lock on a threads exit if that next thread used the futex
+mechanism to register the address of that 'lock word' with the kernel.
+
+For each futex lock currently held by a thread, if it wants this
+robust_futex support for exit cleanup of that lock, it should have one
+'lock entry' on this list, with its associated 'lock word' at the
+specified 'offset'. Should a thread die while holding any such locks,
+the kernel will walk this list, mark any such locks with a bit
+indicating their holder died, and wakeup the next thread waiting for
+that lock using the futex mechanism.
+
+When a thread has invoked the above system call to indicate it
+anticipates using robust_futexes, the kernel stores the passed in 'head'
+pointer for that task. The task may retrieve that value later on by
+using the system call:
+
+ asmlinkage long
+ sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr,
+ size_t __user *len_ptr);
+
+It is anticipated that threads will use robust_futexes embedded in
+larger, user level locking structures, one per lock. The kernel
+robust_futex mechanism doesn't care what else is in that structure, so
+long as the 'offset' to the 'lock word' is the same for all
+robust_futexes used by that thread. The thread should link those locks
+it currently holds using the 'lock entry' pointers. It may also have
+other links between the locks, such as the reverse side of a double
+linked list, but that doesn't matter to the kernel.
+
+By keeping its locks linked this way, on a list starting with a 'head'
+pointer known to the kernel, the kernel can provide to a thread the
+essential service available for robust_futexes, which is to help clean
+up locks held at the time of (a perhaps unexpectedly) exit.
+
+Actual locking and unlocking, during normal operations, is handled
+entirely by user level code in the contending threads, and by the
+existing futex mechanism to wait for, and wakeup, locks. The kernels
+only essential involvement in robust_futexes is to remember where the
+list 'head' is, and to walk the list on thread exit, handling locks
+still held by the departing thread, as described below.
+
+There may exist thousands of futex lock structures in a threads shared
+memory, on various data structures, at a given point in time. Only those
+lock structures for locks currently held by that thread should be on
+that thread's robust_futex linked lock list a given time.
+
+A given futex lock structure in a user shared memory region may be held
+at different times by any of the threads with access to that region. The
+thread currently holding such a lock, if any, is marked with the threads
+TID in the lower 29 bits of the 'lock word'.
+
+When adding or removing a lock from its list of held locks, in order for
+the kernel to correctly handle lock cleanup regardless of when the task
+exits (perhaps it gets an unexpected signal 9 in the middle of
+manipulating this list), the user code must observe the following
+protocol on 'lock entry' insertion and removal:
+
+On insertion:
+ 1) set the 'list_op_pending' word to the address of the 'lock word'
+ to be inserted,
+ 2) acquire the futex lock,
+ 3) add the lock entry, with its thread id (TID) in the bottom 29 bits
+ of the 'lock word', to the linked list starting at 'head', and
+ 4) clear the 'list_op_pending' word.
+
+On removal:
+ 1) set the 'list_op_pending' word to the address of the 'lock word'
+ to be removed,
+ 2) remove the lock entry for this lock from the 'head' list,
+ 2) release the futex lock, and
+ 2) clear the 'lock_op_pending' word.
+
+On exit, the kernel will consider the address stored in
+'list_op_pending' and the address of each 'lock word' found by walking
+the list starting at 'head'. For each such address, if the bottom 29
+bits of the 'lock word' at offset 'offset' from that address equals the
+exiting threads TID, then the kernel will do two things:
+
+ 1) if bit 31 (0x80000000) is set in that word, then attempt a futex
+ wakeup on that address, which will waken the next thread that has
+ used to the futex mechanism to wait on that address, and
+ 2) atomically set bit 30 (0x40000000) in the 'lock word'.
+
+In the above, bit 31 was set by futex waiters on that lock to indicate
+they were waiting, and bit 30 is set by the kernel to indicate that the
+lock owner died holding the lock.
+
+The kernel exit code will silently stop scanning the list further if at
+any point:
+
+ 1) the 'head' pointer or an subsequent linked list pointer
+ is not a valid address of a user space word
+ 2) the calculated location of the 'lock word' (address plus
+ 'offset') is not the valud address of a 32 bit user space
+ word
+ 3) if the list contains more than 1 million (subject to
+ future kernel configuration changes) elements.
+
+When the kernel sees a list entry whose 'lock word' doesn't have the
+current threads TID in the lower 29 bits, it does nothing with that
+entry, and goes on to the next entry.
+
+Bit 29 (0x20000000) of the 'lock word' is reserved for future use.
Index: linux-2.6.16/Documentation/robust-futexes.txt
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.16/Documentation/robust-futexes.txt 2006-10-19 16:52:29.000000000 +0200
@@ -0,0 +1,218 @@
+Started by: Ingo Molnar
+
+Background
+----------
+
+what are robust futexes? To answer that, we first need to understand
+what futexes are: normal futexes are special types of locks that in the
+noncontended case can be acquired/released from userspace without having
+to enter the kernel.
+
+A futex is in essence a user-space address, e.g. a 32-bit lock variable
+field. If userspace notices contention (the lock is already owned and
+someone else wants to grab it too) then the lock is marked with a value
+that says "there's a waiter pending", and the sys_futex(FUTEX_WAIT)
+syscall is used to wait for the other guy to release it. The kernel
+creates a 'futex queue' internally, so that it can later on match up the
+waiter with the waker - without them having to know about each other.
+When the owner thread releases the futex, it notices (via the variable
+value) that there were waiter(s) pending, and does the
+sys_futex(FUTEX_WAKE) syscall to wake them up. Once all waiters have
+taken and released the lock, the futex is again back to 'uncontended'
+state, and there's no in-kernel state associated with it. The kernel
+completely forgets that there ever was a futex at that address. This
+method makes futexes very lightweight and scalable.
+
+"Robustness" is about dealing with crashes while holding a lock: if a
+process exits prematurely while holding a pthread_mutex_t lock that is
+also shared with some other process (e.g. yum segfaults while holding a
+pthread_mutex_t, or yum is kill -9-ed), then waiters for that lock need
+to be notified that the last owner of the lock exited in some irregular
+way.
+
+To solve such types of problems, "robust mutex" userspace APIs were
+created: pthread_mutex_lock() returns an error value if the owner exits
+prematurely - and the new owner can decide whether the data protected by
+the lock can be recovered safely.
+
+There is a big conceptual problem with futex based mutexes though: it is
+the kernel that destroys the owner task (e.g. due to a SEGFAULT), but
+the kernel cannot help with the cleanup: if there is no 'futex queue'
+(and in most cases there is none, futexes being fast lightweight locks)
+then the kernel has no information to clean up after the held lock!
+Userspace has no chance to clean up after the lock either - userspace is
+the one that crashes, so it has no opportunity to clean up. Catch-22.
+
+In practice, when e.g. yum is kill -9-ed (or segfaults), a system reboot
+is needed to release that futex based lock. This is one of the leading
+bugreports against yum.
+
+To solve this problem, the traditional approach was to extend the vma
+(virtual memory area descriptor) concept to have a notion of 'pending
+robust futexes attached to this area'. This approach requires 3 new
+syscall variants to sys_futex(): FUTEX_REGISTER, FUTEX_DEREGISTER and
+FUTEX_RECOVER. At do_exit() time, all vmas are searched to see whether
+they have a robust_head set. This approach has two fundamental problems
+left:
+
+ - it has quite complex locking and race scenarios. The vma-based
+ approach had been pending for years, but they are still not completely
+ reliable.
+
+ - they have to scan _every_ vma at sys_exit() time, per thread!
+
+The second disadvantage is a real killer: pthread_exit() takes around 1
+microsecond on Linux, but with thousands (or tens of thousands) of vmas
+every pthread_exit() takes a millisecond or more, also totally
+destroying the CPU's L1 and L2 caches!
+
+This is very much noticeable even for normal process sys_exit_group()
+calls: the kernel has to do the vma scanning unconditionally! (this is
+because the kernel has no knowledge about how many robust futexes there
+are to be cleaned up, because a robust futex might have been registered
+in another task, and the futex variable might have been simply mmap()-ed
+into this process's address space).
+
+This huge overhead forced the creation of CONFIG_FUTEX_ROBUST so that
+normal kernels can turn it off, but worse than that: the overhead makes
+robust futexes impractical for any type of generic Linux distribution.
+
+So something had to be done.
+
+New approach to robust futexes
+------------------------------
+
+At the heart of this new approach there is a per-thread private list of
+robust locks that userspace is holding (maintained by glibc) - which
+userspace list is registered with the kernel via a new syscall [this
+registration happens at most once per thread lifetime]. At do_exit()
+time, the kernel checks this user-space list: are there any robust futex
+locks to be cleaned up?
+
+In the common case, at do_exit() time, there is no list registered, so
+the cost of robust futexes is just a simple current->robust_list != NULL
+comparison. If the thread has registered a list, then normally the list
+is empty. If the thread/process crashed or terminated in some incorrect
+way then the list might be non-empty: in this case the kernel carefully
+walks the list [not trusting it], and marks all locks that are owned by
+this thread with the FUTEX_OWNER_DIED bit, and wakes up one waiter (if
+any).
+
+The list is guaranteed to be private and per-thread at do_exit() time,
+so it can be accessed by the kernel in a lockless way.
+
+There is one race possible though: since adding to and removing from the
+list is done after the futex is acquired by glibc, there is a few
+instructions window for the thread (or process) to die there, leaving
+the futex hung. To protect against this possibility, userspace (glibc)
+also maintains a simple per-thread 'list_op_pending' field, to allow the
+kernel to clean up if the thread dies after acquiring the lock, but just
+before it could have added itself to the list. Glibc sets this
+list_op_pending field before it tries to acquire the futex, and clears
+it after the list-add (or list-remove) has finished.
+
+That's all that is needed - all the rest of robust-futex cleanup is done
+in userspace [just like with the previous patches].
+
+Ulrich Drepper has implemented the necessary glibc support for this new
+mechanism, which fully enables robust mutexes.
+
+Key differences of this userspace-list based approach, compared to the
+vma based method:
+
+ - it's much, much faster: at thread exit time, there's no need to loop
+ over every vma (!), which the VM-based method has to do. Only a very
+ simple 'is the list empty' op is done.
+
+ - no VM changes are needed - 'struct address_space' is left alone.
+
+ - no registration of individual locks is needed: robust mutexes dont
+ need any extra per-lock syscalls. Robust mutexes thus become a very
+ lightweight primitive - so they dont force the application designer
+ to do a hard choice between performance and robustness - robust
+ mutexes are just as fast.
+
+ - no per-lock kernel allocation happens.
+
+ - no resource limits are needed.
+
+ - no kernel-space recovery call (FUTEX_RECOVER) is needed.
+
+ - the implementation and the locking is "obvious", and there are no
+ interactions with the VM.
+
+Performance
+-----------
+
+I have benchmarked the time needed for the kernel to process a list of 1
+million (!) held locks, using the new method [on a 2GHz CPU]:
+
+ - with FUTEX_WAIT set [contended mutex]: 130 msecs
+ - without FUTEX_WAIT set [uncontended mutex]: 30 msecs
+
+I have also measured an approach where glibc does the lock notification
+[which it currently does for !pshared robust mutexes], and that took 256
+msecs - clearly slower, due to the 1 million FUTEX_WAKE syscalls
+userspace had to do.
+
+(1 million held locks are unheard of - we expect at most a handful of
+locks to be held at a time. Nevertheless it's nice to know that this
+approach scales nicely.)
+
+Implementation details
+----------------------
+
+The patch adds two new syscalls: one to register the userspace list, and
+one to query the registered list pointer:
+
+ asmlinkage long
+ sys_set_robust_list(struct robust_list_head __user *head,
+ size_t len);
+
+ asmlinkage long
+ sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr,
+ size_t __user *len_ptr);
+
+List registration is very fast: the pointer is simply stored in
+current->robust_list. [Note that in the future, if robust futexes become
+widespread, we could extend sys_clone() to register a robust-list head
+for new threads, without the need of another syscall.]
+
+So there is virtually zero overhead for tasks not using robust futexes,
+and even for robust futex users, there is only one extra syscall per
+thread lifetime, and the cleanup operation, if it happens, is fast and
+straightforward. The kernel doesnt have any internal distinction between
+robust and normal futexes.
+
+If a futex is found to be held at exit time, the kernel sets the
+following bit of the futex word:
+
+ #define FUTEX_OWNER_DIED 0x40000000
+
+and wakes up the next futex waiter (if any). User-space does the rest of
+the cleanup.
+
+Otherwise, robust futexes are acquired by glibc by putting the TID into
+the futex field atomically. Waiters set the FUTEX_WAITERS bit:
+
+ #define FUTEX_WAITERS 0x80000000
+
+and the remaining bits are for the TID.
+
+Testing, architecture support
+-----------------------------
+
+i've tested the new syscalls on x86 and x86_64, and have made sure the
+parsing of the userspace list is robust [ ;-) ] even if the list is
+deliberately corrupted.
+
+i386 and x86_64 syscalls are wired up at the moment, and Ulrich has
+tested the new glibc code (on x86_64 and i386), and it works for his
+robust-mutex testcases.
+
+All other architectures should build just fine too - but they wont have
+the new syscalls yet.
+
+Architectures need to implement the new futex_atomic_cmpxchg_inatomic()
+inline function before writing up the syscalls (that function returns
+-ENOSYS right now).
Index: linux-2.6.16/Documentation/rt-mutex.txt
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.16/Documentation/rt-mutex.txt 2006-10-19 16:52:29.000000000 +0200
@@ -0,0 +1,74 @@
+RT-mutex subsystem with PI support
+----------------------------------
+
+RT-mutexes with priority inheritance are used to support PI-futexes,
+which enable pthread_mutex_t priority inheritance attributes
+(PTHREAD_PRIO_INHERIT). [See Documentation/pi-futex.txt for more details
+about PI-futexes.]
+
+This technology was developed in the -rt tree and streamlined for
+pthread_mutex support.
+
+Basic principles:
+-----------------
+
+RT-mutexes extend the semantics of simple mutexes by the priority
+inheritance protocol.
+
+A low priority owner of a rt-mutex inherits the priority of a higher
+priority waiter until the rt-mutex is released. If the temporarily
+boosted owner blocks on a rt-mutex itself it propagates the priority
+boosting to the owner of the other rt_mutex it gets blocked on. The
+priority boosting is immediately removed once the rt_mutex has been
+unlocked.
+
+This approach allows us to shorten the block of high-prio tasks on
+mutexes which protect shared resources. Priority inheritance is not a
+magic bullet for poorly designed applications, but it allows
+well-designed applications to use userspace locks in critical parts of
+an high priority thread, without losing determinism.
+
+The enqueueing of the waiters into the rtmutex waiter list is done in
+priority order. For same priorities FIFO order is chosen. For each
+rtmutex, only the top priority waiter is enqueued into the owner's
+priority waiters list. This list too queues in priority order. Whenever
+the top priority waiter of a task changes (for example it timed out or
+got a signal), the priority of the owner task is readjusted. [The
+priority enqueueing is handled by "plists", see include/linux/plist.h
+for more details.]
+
+RT-mutexes are optimized for fastpath operations and have no internal
+locking overhead when locking an uncontended mutex or unlocking a mutex
+without waiters. The optimized fastpath operations require cmpxchg
+support. [If that is not available then the rt-mutex internal spinlock
+is used]
+
+The state of the rt-mutex is tracked via the owner field of the rt-mutex
+structure:
+
+rt_mutex->owner holds the task_struct pointer of the owner. Bit 0 and 1
+are used to keep track of the "owner is pending" and "rtmutex has
+waiters" state.
+
+ owner bit1 bit0
+ NULL 0 0 mutex is free (fast acquire possible)
+ NULL 0 1 invalid state
+ NULL 1 0 invalid state
+ NULL 1 1 invalid state
+ taskpointer 0 0 mutex is held (fast release possible)
+ taskpointer 0 1 task is pending owner
+ taskpointer 1 0 mutex is held and has waiters
+ taskpointer 1 1 task is pending owner and mutex has waiters
+
+Pending-ownership handling is a performance optimization:
+pending-ownership is assigned to the first (highest priority) waiter of
+the mutex, when the mutex is released. The thread is woken up and once
+it starts executing it can acquire the mutex. Until the mutex is taken
+by it (bit 0 is cleared) a competing higher priority thread can "steal"
+the mutex which puts the woken up thread back on the waiters list.
+
+The pending-ownership optimization is especially important for the
+uninterrupted workflow of high-prio tasks which repeatedly
+takes/releases locks that have lower-prio waiters. Without this
+optimization the higher-prio thread would ping-pong to the lower-prio
+task [because at unlock time we always assign a new owner].
Index: linux-2.6.16/Documentation/timekeeping.txt
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.16/Documentation/timekeeping.txt 2006-10-19 16:52:29.000000000 +0200
@@ -0,0 +1,347 @@
+How timekeeping works with CONFIG_GENERIC_TIME
+========================================================================
+
+The generic timekeeping code maintains and allows access to the systems
+understanding of how much time has passed from a certain point. However, in
+order to measure the passing of time, the generic timekeeping code relies on
+the clocksource abstraction. A clocksource abstracts a free running counter
+who's value increases at a known frequency.
+
+In the generic timekeeping code, we use a pointer to a selected clocksource to
+measure the passing of time.
+
+struct clocksource *clock
+
+The clocksource has some limitations however. Since its likely of fixed width,
+it will not increment forever and will overflow. In order to still properly
+keep time, we must occasionally accumulate an interval of time. In the generic
+timekeeping code, we accumulate the amount of time system the system booted
+into the value system_time, which keeps nanosecond resolution in a ktime_t
+storage.
+
+ktime_t system_time
+
+Since its likely your system has not been running continually since midnight on
+the 1st of January in 1970, we must provide an offset from that time in
+accordance with conventions. This only occasionally changed (via
+settimeofday()) offset is the wall_time_offset value, which is also stored as a
+ktime_t.
+
+ktime_t wall_time_offset
+
+
+Since we accumulate time in intervals, we need a base cycle value that we can
+use to generate an offset from the time value kept in system_time. We store
+this value in cycle_last.
+
+cycle_t cycle_last;
+
+
+Further since all clocks drift somewhat from each other, we use the adjustment
+values provided via adjtimex() to correct our clocksource frequency for each
+interval. This frequency adjustment value is stored in ntp_adj.
+
+long ntp_adj;
+
+Now that we've covered the core global variables for timekeeping, lets look at
+how we maintain these values.
+
+As stated above, we want to avoid the clocksource from overflowing on us, so we
+accumulate a time interval periodically. This periodic accumulation function is
+called timeofday_periodic_hook(). In simplified pseudo code, it logically is
+presented as:
+
+timeofday_periodic_hook():
+ cycle_now = read_clocksource(clock)
+ cycle_delta = (cycle_now - cycle_last) & clock->mask
+ nsec = cyc2ns(clock, cycle_delta, ntp_adj)
+ system_time += nsec
+ cycle_last = cycle_now
+
+ /* do other stuff */
+
+You can see we read the cycle value from the clocksource, calculate a cycle
+delta for the interval since we last called timeofday_periodic_hook(), convert
+that cycle delta to a nanosecond interval (for now ignore ntp_adj), add it to
+the system time and finally set our cycle_last value to cycle_now for the next
+interval. Using this simple algorithm we can correctly measure and record the
+passing of time.
+
+But just storing this info isn't very useful, we also want to make it available
+to be used elsewhere. So how do we provide a notion of how much time has passed
+inbetween calls to timeofday_periodic_hook()?
+
+First, lets create a function that calculates the time since the last call to
+timeofday_peridoic_hook().
+
+get_nsec_offset():
+ cycle_now = read_clocksource(clock)
+ cycle_delta = (cycle_now - cycle_last) & clock->mask
+ nsec = cyc2ns(clock, cycle_delta, ntp_adj)
+ return nsec
+
+Here you can see, we read the clocksource, calculate a cycle interval, and
+convert that to a nanosecond interval. Just like how it is done in
+timeofday_periodic_hook!
+
+Now lets use this function to provide the number of nanoseconds that the system
+has been running:
+
+do_monotonic_clock():
+ return system_time + get_nsec_offset()
+
+Here we trivially add the nanosecond offset since the last
+timeofday_periodic_hook() to the value of system_time which was stored at the
+last timeofday_periodic_hook().
+
+Note that since we use the same method to calculate time intervals, assuming
+each function is atomic and the clocksource functions as it should, time cannot
+go backward!
+
+Now to get the time of day using the standard convention:
+
+do_gettimeofday():
+ return do_monotonic_clock() + wall_time_offset
+
+We simply add the wall_time_offset, and we have the number of nanoseconds since
+1970 began!
+
+
+Of course, in real life, things are not so static. We have to handle a number
+of dynamic values that may change and affect timekeeping. In order to do these
+safely, we must only change values in-between intervals. This means the
+periodic_hook call must handle these changes.
+
+Since clocksources can be changed while the system is running, we need to check
+for and possibly switch to using new clocksources in the periodic_hook call.
+Further, clocksources may change their frequency. Since this must be done only
+at a safe point, we use the update_callback function pointer (for more details,
+see "How to write a clocksource driver" below), this too must be done
+in-between intervals in the periodic_hook call. Finally, since the ntp
+adjustment made in the cyc2ns conversion is not static, we need to update the
+ntp state machine and get a calculate a new adjustment value.
+
+This adds some extra pseudo code to the timeofday_periodic_hook function:
+
+timeofday_periodic_hook():
+ cycle_now = read_clocksource(clock)
+ cycle_delta = (cycle_now - cycle_last) & clock->mask
+ nsec = cyc2ns(clock, cycle_delta, ntp_adj)
+ system_time += nsec
+ cycle_last = cycle_now
+
+ next = get_next_clocksource()
+ if(next != clock):
+ cycle_last = read_clocksource(next)
+ clock = next
+
+ if(clock->update_callback):
+ clock->update_callback()
+
+ ntp_advance(nsec)
+ ppm = ntp_get_ppm_adjustment()
+ ntp_adj = ppm_to_mult_adj(clock, ppm)
+
+
+Unfortunately, the actual timeofday_periodic_hook code is not as simple as this
+pseudo code. For performance concerns, much has been done to pre-calculate
+values and use them repeatedly. Thus be aware that the code in timeofday.c is
+more complex, however the functional logic is the same.
+
+
+How to port an architecture to GENERIC_TIME
+========================================================================
+Porting an architecture to the GENERIC_TIME timekeeping code consists of moving
+a little bit of code around then deleting a fair amount. It is my hope that
+this will reduce the arch specific maintenance work around timekeeping.
+
+Porting an arch usually requires the following steps.
+
+1. Define CONFIG_GENERIC_TIME in the arches Kconfig
+2. Implementing the following functions
+ s64 read_persistent_clock(void)
+ void sync_persistent_clock(struct timespec ts)
+3. Removing all of the arch specific timekeeping code
+ do_gettimeofday()
+ do_settimeofday()
+ etc
+4. Implementing clocksource drivers
+ See "How to write a clocksource driver" for more details
+
+The exceptions to the above are:
+
+5. If the arch is has no continuous clocksource
+ A) Implement 1-3 in the above list.
+ B) Define CONFIG_IS_TICK_BASED in arches Kconfig
+ C) Implement the "long arch_getoffset(void)" function
+
+6. If the arch supports vsyscall gettimeofday (see x86_64 for reference)
+ A) Implement 1-4 in the above list
+ B) Define GENERIC_TIME_VSYSCALL
+ C) Implement arch_update_vsyscall_gtod()
+ D) Implement vsyscall gettimeofday (similar to __get_realtime_clock_ts)
+ E) Implement vread functions for supported clocksources
+
+
+
+How to write a clocksource driver.
+========================================================================
+First, a quick summary of what a clocksource driver provides.
+
+Simply put, a clocksource is a abstraction of a free running increasing
+counter. The abstraction provides the minimal amount of info for that counter
+to be usable for timekeeping. Those required values are:
+ 1. It's name
+ 2. A rating value for selection priority
+ 3. A read function pointer
+ 4. A mask value for correct twos-complement subtraction
+ 5. A mult and shift pair that approximate the counter frequency
+ mult/(2^shift) ~= nanoseconds per cycle
+
+Additionally, there are other optionally set values that allow for advanced
+functionality. Those values are:
+ 6. The update_callback function.
+ 7. The is_continuous flag.
+ 8. The vread function pointer
+ 9. The vdata pointer value
+
+
+Now lets go over these values in detail.
+
+1. Name.
+ The clocksource's name should be unique since it is used for both
+identification as well as for manually overriding the default clocksource
+selection. The name length must be shorter then 32 characters in order for it
+to be properly overrided.
+
+2. Rating value
+ This rating value is used as a priority value for clocksource
+selection. It has no direct connection to quality or physical properties of the
+clocksource, but is to be set and manipulated to guarantee that the best (by no
+specific metric) clocksource that will provide correct timekeeping is
+automatically selected. Rating suggestions can be found in
+include/linux/clocksource.h
+
+3. Read function pointer
+ This pointer should point to a function that returns an unsigned
+increasing cycle value from the clocksource. The value should have a coverage
+from zero to the maximum cycle value the clocksource can provide. This does not
+have to be direct hardware value and can also be a software counter. An example
+of a software counter is the jiffies clocksource.
+
+4. The mask value
+ This value should be the largest power of two that is smaller then the
+maximum cycle value. This allows twos complement subtraction to work on
+overflow boundary conditions if the max value is less then (cycle_t)-1. So for
+example, if we have a 16 bit counter (ie: one that loops to zero after
+0x0000FFFF), the mask would be 0xFFFF. So then when finding the cycle
+difference around a overflow, where now = 0x0013 and then = 0xFFEE, we can
+compute the cycle delta properly using the equation:
+ delta = (now - then)&mask
+ delta = (0x0013 - 0xFFEE) & 0xFFFF
+ delta = 0xFFFF0025 & 0xFFFF /* note the unmasked negative value */
+ delta = 0x25
+
+5. The mult and shift pair
+ These 32bit values approximate the nanosecond per cycle frequency of
+the clocksource using the equation: mult/(2^shift). If you have a khz or hz
+frequency value, the mult value for a given shift value can be easily
+calculated using the clocksource_hz2mult() and clocksource_khz2mult() helper
+functions. When selecting a shift value, it is important to be careful. Larger
+shift values give a finer precision in the cycle to nanosecond conversion and
+allows for more exact NTP adjustments. However if you select too large a shift
+value, the resulting mult value might overflow a cycle_t * mult computation.
+
+
+So if you have a simple hardware counter that does not change frequency,
+filling in the above should be sufficient for a functional clocksource. But
+read on for details on implementing a more complex clocksource.
+
+6. The update_callback function pointer.
+ If this function pointer is non-NULL, it will be called every periodic
+hook when it is safe for the clocksource to change its state. This would be
+necessary in the case where the counter frequency changes, for example. One
+user of this function pointer is the TSC clocksource. When the TSC frequency
+changes (which may occur if the cpu changes frequency) we need to notify the
+clocksource at a safe point where that state may change. Thus, if the TSC has
+changed frequency we set the new mult/shift values in the update_callback
+function.
+
+7. The is_continuous flag.
+ This flag variable (0 if false, 1 if true) denotes that the clocksource
+is continuous. This means that it is a purely hardware driven clocksource and
+is not dependent on any software code to run for it to increment properly. This
+denotation will be useful in the future when timer ticks may be disabled for
+long periods of time. Doing so using software clocksources, like the jiffies
+clocksource, would cause timekeeping problems.
+
+8. The vread function pointer.
+ This function pointer points to a user-space accessible function that
+reads the clocksource. This is used in userspace gettimeofday implementations
+to improve performance. See the x86-64 TSC clocksource implementation for an
+example.
+
+8. The vdata pointer.
+ This pointer is passed to the vread function pointer in a userspace
+gettimeofday implementation. Its usage is dependent on the vread
+implementation, but if the pointer points to data, that data must be readable
+from userspace.
+
+
+Now lets write a quick clocksource for an imaginary bit of hardware. Here are
+the specs:
+
+ A 32bit counter can be found at the MMIO address 0xFEEDF000. It runs at
+100Mhz. To enable it, the the low bit of the address 0xFEEDF0F0 must be set to
+one.
+
+So lets start out an empty cool-counter.c file, and define the clocksource.
+
+#include
+#include
+#include
+
+#define COOL_READ_PTR 0xFEEDF000
+#define COOL_START_PTR 0xFEEDF0F0
+
+static __iomem void *cool_ptr = (void*)COOL_READ_PTR;
+
+static struct clocksource clocksource_cool = {
+ .name = "cool",
+ .rating = 200, /* its a pretty decent clock */
+ .mask = 0xFFFFFFFF, /* 32 bits */
+ .mult = 0, /*to be computed */
+ .shift = 10,
+};
+
+/* Now let's create the read function: */
+
+static cycle_t cool_counter_read(void)
+{
+ return (cycle_t)readl(cool_ptr);
+}
+
+/* Finally, lets create the init function: */
+
+static int __init cool_counter_init(void)
+{
+ __iomem void *ptr = (void*)COOL_START_PTR;
+ u32 val;
+
+ /* start the counter */
+ val = readl(ptr);
+ val |= 0x1;
+ writel(val, ptr);
+
+ /* finish initializing the clocksource */
+ clocksource_cool.read = cool_counter_read;
+ clocksource_cool.mult = clocksource_khz2mult(100000,
+ clocksource_cool.shift);
+
+ /* register the clocksource */
+ return register_clocksource(&clocksource_cool);
+}
+module_init(cool_counter_init);
+
+
+Now wasn't that easy!
Index: linux-2.6.16/Makefile
===================================================================
--- linux-2.6.16.orig/Makefile 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/Makefile 2006-11-22 17:50:43.000000000 +0100
@@ -1,7 +1,7 @@
VERSION = 2
PATCHLEVEL = 6
SUBLEVEL = 16
-EXTRAVERSION =
+EXTRAVERSION =-rt29-tglx4
NAME=Sliding Snow Leopard
# *DOCUMENTATION*
@@ -511,10 +511,14 @@ CFLAGS += $(call add-align,CONFIG_CC_AL
CFLAGS += $(call add-align,CONFIG_CC_ALIGN_LOOPS,-loops)
CFLAGS += $(call add-align,CONFIG_CC_ALIGN_JUMPS,-jumps)
-ifdef CONFIG_FRAME_POINTER
-CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,)
+ifdef CONFIG_MCOUNT
+CFLAGS += -pg -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,)
else
-CFLAGS += -fomit-frame-pointer
+ ifdef CONFIG_FRAME_POINTER
+ CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,)
+ else
+ CFLAGS += -fomit-frame-pointer
+ endif
endif
ifdef CONFIG_DEBUG_INFO
Index: linux-2.6.16/arch/arm/Kconfig
===================================================================
--- linux-2.6.16.orig/arch/arm/Kconfig 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/Kconfig 2006-10-19 16:52:29.000000000 +0200
@@ -46,6 +46,10 @@ config MCA
(and especially the web page given
there) before attempting to build an MCA bus kernel.
+config GENERIC_HARDIRQS
+ bool
+ default y
+
config RWSEM_GENERIC_SPINLOCK
bool
default y
@@ -401,18 +405,7 @@ config LOCAL_TIMERS
accounting to be spread across the timer interval, preventing a
"thundering herd" at every timer tick.
-config PREEMPT
- bool "Preemptible Kernel (EXPERIMENTAL)"
- depends on EXPERIMENTAL
- help
- This option reduces the latency of the kernel when reacting to
- real-time or interactive events by allowing a low priority process to
- be preempted even if it is in kernel mode executing a system call.
- This allows applications to run more reliably even when the system is
- under load.
-
- Say Y here if you are building a kernel for a desktop, embedded
- or real-time system. Say N if you are unsure.
+source kernel/Kconfig.preempt
config NO_IDLE_HZ
bool "Dynamic tick timer"
Index: linux-2.6.16/arch/arm/boot/compressed/head.S
===================================================================
--- linux-2.6.16.orig/arch/arm/boot/compressed/head.S 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/boot/compressed/head.S 2006-10-19 16:52:29.000000000 +0200
@@ -714,6 +714,19 @@ memdump: mov r12, r0
mov pc, r10
#endif
+#ifdef CONFIG_MCOUNT
+/* CONFIG_MCOUNT causes boot header to be built with -pg requiring this
+ * trampoline
+ */
+ .text
+ .align 0
+ .type mcount %function
+ .global mcount
+mcount:
+ mov pc, lr @ just return
+#endif
+
+
reloc_end:
.align
Index: linux-2.6.16/arch/arm/boot/compressed/misc.c
===================================================================
--- linux-2.6.16.orig/arch/arm/boot/compressed/misc.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/boot/compressed/misc.c 2006-10-19 16:52:29.000000000 +0200
@@ -199,6 +199,7 @@ static ulg free_mem_ptr_end;
#define HEAP_SIZE 0x2000
+#define ZLIB_INFLATE_NO_INFLATE_LOCK
#include "../../../../lib/inflate.c"
#ifndef STANDALONE_DEBUG
Index: linux-2.6.16/arch/arm/common/locomo.c
===================================================================
--- linux-2.6.16.orig/arch/arm/common/locomo.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/common/locomo.c 2006-10-19 16:52:29.000000000 +0200
@@ -425,6 +425,12 @@ static struct irqchip locomo_spi_chip =
.unmask = locomo_spi_unmask_irq,
};
+static DEFINE_IRQ_CHAINED_TYPE(locomo_handler);
+static DEFINE_IRQ_CHAINED_TYPE(locomo_key_handler);
+static DEFINE_IRQ_CHAINED_TYPE(locomo_gpio_handler);
+static DEFINE_IRQ_CHAINED_TYPE(locomo_lt_handler);
+static DEFINE_IRQ_CHAINED_TYPE(locomo_spi_handler);
+
static void locomo_setup_irq(struct locomo *lchip)
{
int irq;
Index: linux-2.6.16/arch/arm/common/sa1111.c
===================================================================
--- linux-2.6.16.orig/arch/arm/common/sa1111.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/common/sa1111.c 2006-10-19 16:52:29.000000000 +0200
@@ -153,7 +153,7 @@ static void
sa1111_irq_handler(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs)
{
unsigned int stat0, stat1, i;
- void __iomem *base = desc->data;
+ void __iomem *base = desc->handler_data;
stat0 = sa1111_readl(base + SA1111_INTSTATCLR0);
stat1 = sa1111_readl(base + SA1111_INTSTATCLR1);
@@ -171,11 +171,11 @@ sa1111_irq_handler(unsigned int irq, str
for (i = IRQ_SA1111_START; stat0; i++, stat0 >>= 1)
if (stat0 & 1)
- do_edge_IRQ(i, irq_desc + i, regs);
+ handle_edge_irq(i, irq_desc + i, regs);
for (i = IRQ_SA1111_START + 32; stat1; i++, stat1 >>= 1)
if (stat1 & 1)
- do_edge_IRQ(i, irq_desc + i, regs);
+ handle_edge_irq(i, irq_desc + i, regs);
/* For level-based interrupts */
desc->chip->unmask(irq);
@@ -380,6 +380,8 @@ static struct irqchip sa1111_high_chip =
.set_wake = sa1111_wake_highirq,
};
+static DEFINE_IRQ_CHAINED_TYPE(sa1111_irq_handler);
+
static void sa1111_setup_irq(struct sa1111 *sachip)
{
void __iomem *irqbase = sachip->base + SA1111_INTC;
Index: linux-2.6.16/arch/arm/common/time-acorn.c
===================================================================
--- linux-2.6.16.orig/arch/arm/common/time-acorn.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/common/time-acorn.c 2006-10-19 16:52:29.000000000 +0200
@@ -16,6 +16,7 @@
#include
#include
#include
+#include
#include
#include
@@ -76,7 +77,7 @@ ioc_timer_interrupt(int irq, void *dev_i
static struct irqaction ioc_timer_irq = {
.name = "timer",
- .flags = SA_INTERRUPT,
+ .flags = SA_INTERRUPT | SA_NODELAY,
.handler = ioc_timer_interrupt
};
Index: linux-2.6.16/arch/arm/kernel/calls.S
===================================================================
--- linux-2.6.16.orig/arch/arm/kernel/calls.S 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/kernel/calls.S 2006-10-19 16:52:29.000000000 +0200
@@ -332,7 +332,7 @@
/* 320 */ CALL(sys_get_mempolicy)
CALL(sys_set_mempolicy)
#ifndef syscalls_counted
-.equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
+.equ syscalls_padding, ((__NR_syscalls + 3) & ~3) - __NR_syscalls
#define syscalls_counted
#endif
.rept syscalls_padding
Index: linux-2.6.16/arch/arm/kernel/dma.c
===================================================================
--- linux-2.6.16.orig/arch/arm/kernel/dma.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/kernel/dma.c 2006-10-19 16:52:29.000000000 +0200
@@ -20,7 +20,7 @@
#include
-DEFINE_SPINLOCK(dma_spin_lock);
+DEFINE_RAW_SPINLOCK(dma_spin_lock);
EXPORT_SYMBOL(dma_spin_lock);
static dma_t dma_chan[MAX_DMA_CHANNELS];
Index: linux-2.6.16/arch/arm/kernel/ecard.c
===================================================================
--- linux-2.6.16.orig/arch/arm/kernel/ecard.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/kernel/ecard.c 2006-10-19 16:52:29.000000000 +0200
@@ -620,7 +620,7 @@ ecard_irqexp_handler(unsigned int irq, s
ecard_t *ec = slot_to_ecard(slot);
if (ec->claimed) {
- struct irqdesc *d = irqdesc + ec->irq;
+ struct irqdesc *d = irq_desc + ec->irq;
/*
* this ugly code is so that we can operate a
* prioritorising system:
@@ -1053,6 +1053,9 @@ ecard_probe(int slot, card_type_t type)
return rc;
}
+static DEFINE_IRQ_CHAINED_TYPE(ecard_irqexp_handler);
+static DEFINE_IRQ_CHAINED_TYPE(ecard_irq_handler);
+
/*
* Initialise the expansion card system.
* Locate all hardware - interrupt management and
@@ -1082,8 +1085,10 @@ static int __init ecard_init(void)
irqhw = ecard_probeirqhw();
- set_irq_chained_handler(IRQ_EXPANSIONCARD,
- irqhw ? ecard_irqexp_handler : ecard_irq_handler);
+ if (irqhw)
+ set_irq_chained_handler(IRQ_EXPANSIONCARD, ecard_irqexp_handler);
+ else
+ set_irq_chained_handler(IRQ_EXPANSIONCARD, ecard_irq_handler);
ecard_proc_init();
Index: linux-2.6.16/arch/arm/kernel/entry-armv.S
===================================================================
--- linux-2.6.16.orig/arch/arm/kernel/entry-armv.S 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/kernel/entry-armv.S 2006-10-19 16:52:29.000000000 +0200
@@ -201,7 +201,7 @@ __irq_svc:
irq_handler
#ifdef CONFIG_PREEMPT
ldr r0, [tsk, #TI_FLAGS] @ get flags
- tst r0, #_TIF_NEED_RESCHED
+ tst r0, #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED
blne svc_preempt
preempt_return:
ldr r0, [tsk, #TI_PREEMPT] @ read preempt value
@@ -228,7 +228,7 @@ svc_preempt:
str r7, [tsk, #TI_PREEMPT] @ expects preempt_count == 0
1: bl preempt_schedule_irq @ irq en/disable is done inside
ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
- tst r0, #_TIF_NEED_RESCHED
+ tst r0, #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED
beq preempt_return @ go again
b 1b
#endif
Index: linux-2.6.16/arch/arm/kernel/entry-common.S
===================================================================
--- linux-2.6.16.orig/arch/arm/kernel/entry-common.S 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/kernel/entry-common.S 2006-10-19 16:52:29.000000000 +0200
@@ -3,6 +3,8 @@
*
* Copyright (C) 2000 Russell King
*
+ * LATENCY_TRACE/mcount support (C) 2005 Timesys john.cooper@timesys.com
+ *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
@@ -41,7 +43,7 @@ ret_fast_syscall:
fast_work_pending:
str r0, [sp, #S_R0+S_OFF]! @ returned r0
work_pending:
- tst r1, #_TIF_NEED_RESCHED
+ tst r1, #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED
bne work_resched
tst r1, #_TIF_NOTIFY_RESUME | _TIF_SIGPENDING
beq no_work_pending
@@ -51,7 +53,8 @@ work_pending:
b ret_slow_syscall @ Check work again
work_resched:
- bl schedule
+ bl __schedule
+
/*
* "slow" syscall return path. "why" tells us if this was a real syscall.
*/
@@ -87,8 +90,8 @@ ENTRY(ret_from_fork)
b ret_slow_syscall
- .equ NR_syscalls,0
-#define CALL(x) .equ NR_syscalls,NR_syscalls+1
+ .equ __NR_syscalls,0 @ Used to determine syscall table padding.
+#define CALL(x) .equ __NR_syscalls,__NR_syscalls+1
#include "calls.S"
#undef CALL
#define CALL(x) .long x
@@ -202,7 +205,7 @@ ENTRY(vector_swi)
tst ip, #_TIF_SYSCALL_TRACE @ are we tracing syscalls?
bne __sys_trace
- cmp scno, #NR_syscalls @ check upper syscall limit
+ cmp scno, #__NR_syscalls @ check upper syscall limit
adr lr, ret_fast_syscall @ return address
ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine
@@ -226,7 +229,7 @@ __sys_trace:
adr lr, __sys_trace_return @ return address
mov scno, r0 @ syscall number (possibly new)
add r1, sp, #S_R0 + S_OFF @ pointer to regs
- cmp scno, #NR_syscalls @ check upper syscall limit
+ cmp scno, #__NR_syscalls @ check upper syscall limit
ldmccia r1, {r0 - r3} @ have to reload r0 - r3
ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine
b 2b
@@ -273,7 +276,7 @@ ENTRY(sys_call_table)
sys_syscall:
eor scno, r0, #__NR_OABI_SYSCALL_BASE
cmp scno, #__NR_syscall - __NR_SYSCALL_BASE
- cmpne scno, #NR_syscalls @ check range
+ cmpne scno, #__NR_syscalls @ check range
stmloia sp, {r5, r6} @ shuffle args
movlo r0, r1
movlo r1, r2
@@ -388,6 +391,112 @@ ENTRY(sys_oabi_call_table)
#include "calls.S"
#undef ABI
#undef OBSOLETE
+#endif
+
+#ifdef CONFIG_FRAME_POINTER
+
+#ifdef CONFIG_MCOUNT
+/*
+ * At the point where we are in mcount() we maintain the
+ * frame of the prologue code and keep the call to mcount()
+ * out of the stack frame list:
+
+ saved pc <---\ caller of instrumented routine
+ saved lr |
+ ip/prev_sp |
+ fp -----^ |
+ : |
+ |
+ -> saved pc | instrumented routine
+ | saved lr |
+ | ip/prev_sp |
+ | fp ---------/
+ | :
+ |
+ | mcount
+ | saved pc
+ | saved lr
+ | ip/prev sp
+ -- fp
+ r3
+ r2
+ r1
+ sp-> r0
+ :
+ */
+
+ .text
+ .align 0
+ .type mcount %function
+ .global mcount
+
+/* gcc -pg generated FUNCTION_PROLOGUE references mcount()
+ * and has already created the stack frame invocation for
+ * the routine we have been called to instrument. We create
+ * a complete frame nevertheless, as we want to use the same
+ * call to mcount() from c code.
+ */
+mcount:
+
+ ldr ip, =mcount_enabled @ leave early, if disabled
+ ldr ip, [ip]
+ cmp ip, #0
+ moveq pc, lr
+
+ mov ip, sp
+ stmdb sp!, {r0 - r3, fp, ip, lr, pc} @ create stack frame
+
+ ldr r1, [fp, #-4] @ get lr (the return address
+ @ of the caller of the
+ @ instrumented function)
+ mov r0, lr @ get lr - (the return address
+ @ of the instrumented function)
+
+ sub fp, ip, #4 @ point fp at this frame
+
+ bl __trace
+1:
+ ldmdb fp, {r0 - r3, fp, sp, pc} @ pop entry frame and return
+
+#endif
+
+/* ARM replacement for unsupported gcc __builtin_return_address(n)
+ * where 0 < n. n == 0 is supported here as well.
+ *
+ * Walk up the stack frame until the desired frame is found or a NULL
+ * fp is encountered, return NULL in the latter case.
+ *
+ * Note: it is possible under code optimization for the stack invocation
+ * of an ancestor function (level N) to be removed before calling a
+ * descendant function (level N+1). No easy means is available to deduce
+ * this scenario with the result being [for example] caller_addr(0) when
+ * called from level N+1 returning level N-1 rather than the expected
+ * level N. This optimization issue appears isolated to the case of
+ * a call to a level N+1 routine made at the tail end of a level N
+ * routine -- the level N frame is deleted and a simple branch is made
+ * to the level N+1 routine.
+ */
+
+ .text
+ .align 0
+ .type arm_return_addr %function
+ .global arm_return_addr
+
+arm_return_addr:
+ mov ip, r0
+ mov r0, fp
+3:
+ cmp r0, #0
+ beq 1f @ frame list hit end, bail
+ cmp ip, #0
+ beq 2f @ reached desired frame
+ ldr r0, [r0, #-12] @ else continue, get next fp
+ sub ip, ip, #1
+ b 3b
+2:
+ ldr r0, [r0, #-4] @ get target return address
+1:
+ mov pc, lr
#endif
Index: linux-2.6.16/arch/arm/kernel/fiq.c
===================================================================
--- linux-2.6.16.orig/arch/arm/kernel/fiq.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/kernel/fiq.c 2006-10-19 16:52:29.000000000 +0200
@@ -38,6 +38,7 @@
#include
#include
#include
+#include
#include
#include
@@ -88,7 +89,7 @@ void set_fiq_handler(void *start, unsign
* disable irqs for the duration. Note - these functions are almost
* entirely coded in assembly.
*/
-void __attribute__((naked)) set_fiq_regs(struct pt_regs *regs)
+void notrace __attribute__((naked)) set_fiq_regs(struct pt_regs *regs)
{
register unsigned long tmp;
asm volatile (
@@ -106,7 +107,7 @@ void __attribute__((naked)) set_fiq_regs
: "r" (®s->ARM_r8), "I" (PSR_I_BIT | PSR_F_BIT | FIQ_MODE));
}
-void __attribute__((naked)) get_fiq_regs(struct pt_regs *regs)
+void notrace __attribute__((naked)) get_fiq_regs(struct pt_regs *regs)
{
register unsigned long tmp;
asm volatile (
Index: linux-2.6.16/arch/arm/kernel/irq.c
===================================================================
--- linux-2.6.16.orig/arch/arm/kernel/irq.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/kernel/irq.c 2006-10-19 16:52:29.000000000 +0200
@@ -27,6 +27,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -38,193 +39,11 @@
#include
#include
-#include
#include
-#include
#include
-/*
- * Maximum IRQ count. Currently, this is arbitary. However, it should
- * not be set too low to prevent false triggering. Conversely, if it
- * is set too high, then you could miss a stuck IRQ.
- *
- * Maybe we ought to set a timer and re-enable the IRQ at a later time?
- */
-#define MAX_IRQ_CNT 100000
-
-static int noirqdebug;
-static volatile unsigned long irq_err_count;
-static DEFINE_SPINLOCK(irq_controller_lock);
-static LIST_HEAD(irq_pending);
-
-struct irqdesc irq_desc[NR_IRQS];
void (*init_arch_irq)(void) __initdata = NULL;
-/*
- * No architecture-specific irq_finish function defined in arm/arch/irqs.h.
- */
-#ifndef irq_finish
-#define irq_finish(irq) do { } while (0)
-#endif
-
-/*
- * Dummy mask/unmask handler
- */
-void dummy_mask_unmask_irq(unsigned int irq)
-{
-}
-
-irqreturn_t no_action(int irq, void *dev_id, struct pt_regs *regs)
-{
- return IRQ_NONE;
-}
-
-void do_bad_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs)
-{
- irq_err_count += 1;
- printk(KERN_ERR "IRQ: spurious interrupt %d\n", irq);
-}
-
-static struct irqchip bad_chip = {
- .ack = dummy_mask_unmask_irq,
- .mask = dummy_mask_unmask_irq,
- .unmask = dummy_mask_unmask_irq,
-};
-
-static struct irqdesc bad_irq_desc = {
- .chip = &bad_chip,
- .handle = do_bad_IRQ,
- .pend = LIST_HEAD_INIT(bad_irq_desc.pend),
- .disable_depth = 1,
-};
-
-#ifdef CONFIG_SMP
-void synchronize_irq(unsigned int irq)
-{
- struct irqdesc *desc = irq_desc + irq;
-
- while (desc->running)
- barrier();
-}
-EXPORT_SYMBOL(synchronize_irq);
-
-#define smp_set_running(desc) do { desc->running = 1; } while (0)
-#define smp_clear_running(desc) do { desc->running = 0; } while (0)
-#else
-#define smp_set_running(desc) do { } while (0)
-#define smp_clear_running(desc) do { } while (0)
-#endif
-
-/**
- * disable_irq_nosync - disable an irq without waiting
- * @irq: Interrupt to disable
- *
- * Disable the selected interrupt line. Enables and disables
- * are nested. We do this lazily.
- *
- * This function may be called from IRQ context.
- */
-void disable_irq_nosync(unsigned int irq)
-{
- struct irqdesc *desc = irq_desc + irq;
- unsigned long flags;
-
- spin_lock_irqsave(&irq_controller_lock, flags);
- desc->disable_depth++;
- list_del_init(&desc->pend);
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-}
-EXPORT_SYMBOL(disable_irq_nosync);
-
-/**
- * disable_irq - disable an irq and wait for completion
- * @irq: Interrupt to disable
- *
- * Disable the selected interrupt line. Enables and disables
- * are nested. This functions waits for any pending IRQ
- * handlers for this interrupt to complete before returning.
- * If you use this function while holding a resource the IRQ
- * handler may need you will deadlock.
- *
- * This function may be called - with care - from IRQ context.
- */
-void disable_irq(unsigned int irq)
-{
- struct irqdesc *desc = irq_desc + irq;
-
- disable_irq_nosync(irq);
- if (desc->action)
- synchronize_irq(irq);
-}
-EXPORT_SYMBOL(disable_irq);
-
-/**
- * enable_irq - enable interrupt handling on an irq
- * @irq: Interrupt to enable
- *
- * Re-enables the processing of interrupts on this IRQ line.
- * Note that this may call the interrupt handler, so you may
- * get unexpected results if you hold IRQs disabled.
- *
- * This function may be called from IRQ context.
- */
-void enable_irq(unsigned int irq)
-{
- struct irqdesc *desc = irq_desc + irq;
- unsigned long flags;
-
- spin_lock_irqsave(&irq_controller_lock, flags);
- if (unlikely(!desc->disable_depth)) {
- printk("enable_irq(%u) unbalanced from %p\n", irq,
- __builtin_return_address(0));
- } else if (!--desc->disable_depth) {
- desc->probing = 0;
- desc->chip->unmask(irq);
-
- /*
- * If the interrupt is waiting to be processed,
- * try to re-run it. We can't directly run it
- * from here since the caller might be in an
- * interrupt-protected region.
- */
- if (desc->pending && list_empty(&desc->pend)) {
- desc->pending = 0;
- if (!desc->chip->retrigger ||
- desc->chip->retrigger(irq))
- list_add(&desc->pend, &irq_pending);
- }
- }
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-}
-EXPORT_SYMBOL(enable_irq);
-
-/*
- * Enable wake on selected irq
- */
-void enable_irq_wake(unsigned int irq)
-{
- struct irqdesc *desc = irq_desc + irq;
- unsigned long flags;
-
- spin_lock_irqsave(&irq_controller_lock, flags);
- if (desc->chip->set_wake)
- desc->chip->set_wake(irq, 1);
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-}
-EXPORT_SYMBOL(enable_irq_wake);
-
-void disable_irq_wake(unsigned int irq)
-{
- struct irqdesc *desc = irq_desc + irq;
- unsigned long flags;
-
- spin_lock_irqsave(&irq_controller_lock, flags);
- if (desc->chip->set_wake)
- desc->chip->set_wake(irq, 0);
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-}
-EXPORT_SYMBOL(disable_irq_wake);
-
int show_interrupts(struct seq_file *p, void *v)
{
int i = *(loff_t *) v, cpu;
@@ -243,7 +62,7 @@ int show_interrupts(struct seq_file *p,
}
if (i < NR_IRQS) {
- spin_lock_irqsave(&irq_controller_lock, flags);
+ spin_lock_irqsave(&irq_desc[i].lock, flags);
action = irq_desc[i].action;
if (!action)
goto unlock;
@@ -257,7 +76,7 @@ int show_interrupts(struct seq_file *p,
seq_putc(p, '\n');
unlock:
- spin_unlock_irqrestore(&irq_controller_lock, flags);
+ spin_unlock_irqrestore(&irq_desc[i].lock, flags);
} else if (i == NR_IRQS) {
#ifdef CONFIG_ARCH_ACORN
show_fiq_list(p, v);
@@ -266,374 +85,83 @@ unlock:
show_ipi_list(p);
show_local_irqs(p);
#endif
+#ifdef FIXME_TGLX
seq_printf(p, "Err: %10lu\n", irq_err_count);
- }
- return 0;
-}
-
-/*
- * IRQ lock detection.
- *
- * Hopefully, this should get us out of a few locked situations.
- * However, it may take a while for this to happen, since we need
- * a large number if IRQs to appear in the same jiffie with the
- * same instruction pointer (or within 2 instructions).
- */
-static int check_irq_lock(struct irqdesc *desc, int irq, struct pt_regs *regs)
-{
- unsigned long instr_ptr = instruction_pointer(regs);
-
- if (desc->lck_jif == jiffies &&
- desc->lck_pc >= instr_ptr && desc->lck_pc < instr_ptr + 8) {
- desc->lck_cnt += 1;
-
- if (desc->lck_cnt > MAX_IRQ_CNT) {
- printk(KERN_ERR "IRQ LOCK: IRQ%d is locking the system, disabled\n", irq);
- return 1;
- }
- } else {
- desc->lck_cnt = 0;
- desc->lck_pc = instruction_pointer(regs);
- desc->lck_jif = jiffies;
- }
- return 0;
-}
-
-static void
-report_bad_irq(unsigned int irq, struct pt_regs *regs, struct irqdesc *desc, int ret)
-{
- static int count = 100;
- struct irqaction *action;
-
- if (!count || noirqdebug)
- return;
-
- count--;
-
- if (ret != IRQ_HANDLED && ret != IRQ_NONE) {
- printk("irq%u: bogus retval mask %x\n", irq, ret);
- } else {
- printk("irq%u: nobody cared\n", irq);
- }
- show_regs(regs);
- dump_stack();
- printk(KERN_ERR "handlers:");
- action = desc->action;
- do {
- printk("\n" KERN_ERR "[<%p>]", action->handler);
- print_symbol(" (%s)", (unsigned long)action->handler);
- action = action->next;
- } while (action);
- printk("\n");
-}
-
-static int
-__do_irq(unsigned int irq, struct irqaction *action, struct pt_regs *regs)
-{
- unsigned int status;
- int ret, retval = 0;
-
- spin_unlock(&irq_controller_lock);
-
-#ifdef CONFIG_NO_IDLE_HZ
- if (!(action->flags & SA_TIMER) && system_timer->dyn_tick != NULL) {
- write_seqlock(&xtime_lock);
- if (system_timer->dyn_tick->state & DYN_TICK_ENABLED)
- system_timer->dyn_tick->handler(irq, 0, regs);
- write_sequnlock(&xtime_lock);
- }
#endif
-
- if (!(action->flags & SA_INTERRUPT))
- local_irq_enable();
-
- status = 0;
- do {
- ret = action->handler(irq, action->dev_id, regs);
- if (ret == IRQ_HANDLED)
- status |= action->flags;
- retval |= ret;
- action = action->next;
- } while (action);
-
- if (status & SA_SAMPLE_RANDOM)
- add_interrupt_randomness(irq);
-
- spin_lock_irq(&irq_controller_lock);
-
- return retval;
-}
-
-/*
- * This is for software-decoded IRQs. The caller is expected to
- * handle the ack, clear, mask and unmask issues.
- */
-void
-do_simple_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs)
-{
- struct irqaction *action;
- const unsigned int cpu = smp_processor_id();
-
- desc->triggered = 1;
-
- kstat_cpu(cpu).irqs[irq]++;
-
- smp_set_running(desc);
-
- action = desc->action;
- if (action) {
- int ret = __do_irq(irq, action, regs);
- if (ret != IRQ_HANDLED)
- report_bad_irq(irq, regs, desc, ret);
- }
-
- smp_clear_running(desc);
-}
-
-/*
- * Most edge-triggered IRQ implementations seem to take a broken
- * approach to this. Hence the complexity.
- */
-void
-do_edge_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs)
-{
- const unsigned int cpu = smp_processor_id();
-
- desc->triggered = 1;
-
- /*
- * If we're currently running this IRQ, or its disabled,
- * we shouldn't process the IRQ. Instead, turn on the
- * hardware masks.
- */
- if (unlikely(desc->running || desc->disable_depth))
- goto running;
-
- /*
- * Acknowledge and clear the IRQ, but don't mask it.
- */
- desc->chip->ack(irq);
-
- /*
- * Mark the IRQ currently in progress.
- */
- desc->running = 1;
-
- kstat_cpu(cpu).irqs[irq]++;
-
- do {
- struct irqaction *action;
-
- action = desc->action;
- if (!action)
- break;
-
- if (desc->pending && !desc->disable_depth) {
- desc->pending = 0;
- desc->chip->unmask(irq);
- }
-
- __do_irq(irq, action, regs);
- } while (desc->pending && !desc->disable_depth);
-
- desc->running = 0;
-
- /*
- * If we were disabled or freed, shut down the handler.
- */
- if (likely(desc->action && !check_irq_lock(desc, irq, regs)))
- return;
-
- running:
- /*
- * We got another IRQ while this one was masked or
- * currently running. Delay it.
- */
- desc->pending = 1;
- desc->chip->mask(irq);
- desc->chip->ack(irq);
-}
-
-/*
- * Level-based IRQ handler. Nice and simple.
- */
-void
-do_level_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs)
-{
- struct irqaction *action;
- const unsigned int cpu = smp_processor_id();
-
- desc->triggered = 1;
-
- /*
- * Acknowledge, clear _AND_ disable the interrupt.
- */
- desc->chip->ack(irq);
-
- if (likely(!desc->disable_depth)) {
- kstat_cpu(cpu).irqs[irq]++;
-
- smp_set_running(desc);
-
- /*
- * Return with this interrupt masked if no action
- */
- action = desc->action;
- if (action) {
- int ret = __do_irq(irq, desc->action, regs);
-
- if (ret != IRQ_HANDLED)
- report_bad_irq(irq, regs, desc, ret);
-
- if (likely(!desc->disable_depth &&
- !check_irq_lock(desc, irq, regs)))
- desc->chip->unmask(irq);
- }
-
- smp_clear_running(desc);
}
+ return 0;
}
-static void do_pending_irqs(struct pt_regs *regs)
-{
- struct list_head head, *l, *n;
-
- do {
- struct irqdesc *desc;
-
- /*
- * First, take the pending interrupts off the list.
- * The act of calling the handlers may add some IRQs
- * back onto the list.
- */
- head = irq_pending;
- INIT_LIST_HEAD(&irq_pending);
- head.next->prev = &head;
- head.prev->next = &head;
-
- /*
- * Now run each entry. We must delete it from our
- * list before calling the handler.
- */
- list_for_each_safe(l, n, &head) {
- desc = list_entry(l, struct irqdesc, pend);
- list_del_init(&desc->pend);
- desc_handle_irq(desc - irq_desc, desc, regs);
- }
-
- /*
- * The list must be empty.
- */
- BUG_ON(!list_empty(&head));
- } while (!list_empty(&irq_pending));
-}
+/* Handle bad interrupts */
+static struct irq_desc bad_irq = {
+ .handler = &no_irq_type,
+ .lock = RAW_SPIN_LOCK_UNLOCKED
+};
/*
- * do_IRQ handles all hardware IRQ's. Decoded IRQs should not
+ * asm_do_IRQ handles all hardware IRQ's. Decoded IRQs should not
* come via this function. Instead, they should provide their
* own 'handler'
*/
-asmlinkage void asm_do_IRQ(unsigned int irq, struct pt_regs *regs)
+asmlinkage notrace void asm_do_IRQ(unsigned int irq, struct pt_regs *regs)
{
struct irqdesc *desc = irq_desc + irq;
+ trace_special(instruction_pointer(regs), irq, 0);
+
/*
* Some hardware gives randomly wrong interrupts. Rather
* than crashing, do something sensible.
*/
if (irq >= NR_IRQS)
- desc = &bad_irq_desc;
+ desc = &bad_irq;
irq_enter();
- spin_lock(&irq_controller_lock);
- desc_handle_irq(irq, desc, regs);
-
- /*
- * Now re-run any pending interrupts.
- */
- if (!list_empty(&irq_pending))
- do_pending_irqs(regs);
- irq_finish(irq);
+ desc_handle_irq(irq, desc, regs);
- spin_unlock(&irq_controller_lock);
irq_exit();
}
-void __set_irq_handler(unsigned int irq, irq_handler_t handle, int is_chained)
+void __set_irq_handler(unsigned int irq, struct irq_type *type, int is_chained)
{
struct irqdesc *desc;
unsigned long flags;
if (irq >= NR_IRQS) {
- printk(KERN_ERR "Trying to install handler for IRQ%d\n", irq);
+ printk(KERN_ERR "Trying to install type control for IRQ%d\n", irq);
return;
}
- if (handle == NULL)
- handle = do_bad_IRQ;
-
desc = irq_desc + irq;
- if (is_chained && desc->chip == &bad_chip)
- printk(KERN_WARNING "Trying to install chained handler for IRQ%d\n", irq);
-
- spin_lock_irqsave(&irq_controller_lock, flags);
- if (handle == do_bad_IRQ) {
- desc->chip->mask(irq);
- desc->chip->ack(irq);
- desc->disable_depth = 1;
- }
- desc->handle = handle;
- if (handle != do_bad_IRQ && is_chained) {
- desc->valid = 0;
- desc->probe_ok = 0;
- desc->disable_depth = 0;
- desc->chip->unmask(irq);
+ /* Uninstall ? */
+ if (type == NULL || type == &no_irq_type) {
+ spin_lock_irqsave(&desc->lock, flags);
+ if (desc->chip) {
+ desc->chip->mask(irq);
+ desc->chip->ack(irq);
+ }
+ desc->depth = 1;
+ spin_unlock_irqrestore(&desc->lock, flags);
}
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-}
-
-void set_irq_chip(unsigned int irq, struct irqchip *chip)
-{
- struct irqdesc *desc;
- unsigned long flags;
- if (irq >= NR_IRQS) {
- printk(KERN_ERR "Trying to install chip for IRQ%d\n", irq);
+ /* Install the irq_type */
+ if (generic_set_irq_type(irq, type))
return;
- }
-
- if (chip == NULL)
- chip = &bad_chip;
-
- desc = irq_desc + irq;
- spin_lock_irqsave(&irq_controller_lock, flags);
- desc->chip = chip;
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-}
-
-int set_irq_type(unsigned int irq, unsigned int type)
-{
- struct irqdesc *desc;
- unsigned long flags;
- int ret = -ENXIO;
- if (irq >= NR_IRQS) {
- printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq);
- return -ENODEV;
- }
+ spin_lock_irqsave(&desc->lock, flags);
+ if (is_chained && (desc->handler == &no_irq_type || !desc->chip))
+ printk(KERN_WARNING "Trying to install chained interrupt type for IRQ%d\n", irq);
- desc = irq_desc + irq;
- if (desc->chip->set_type) {
- spin_lock_irqsave(&irq_controller_lock, flags);
- ret = desc->chip->set_type(irq, type);
- spin_unlock_irqrestore(&irq_controller_lock, flags);
+ if (type != NULL && is_chained) {
+ desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE;
+ desc->depth = 0;
+ if (desc->chip)
+ desc->chip->unmask(irq);
}
-
- return ret;
+ spin_unlock_irqrestore(&desc->lock, flags);
}
-EXPORT_SYMBOL(set_irq_type);
void set_irq_flags(unsigned int irq, unsigned int iflags)
{
@@ -646,421 +174,30 @@ void set_irq_flags(unsigned int irq, uns
}
desc = irq_desc + irq;
- spin_lock_irqsave(&irq_controller_lock, flags);
- desc->valid = (iflags & IRQF_VALID) != 0;
- desc->probe_ok = (iflags & IRQF_PROBE) != 0;
- desc->noautoenable = (iflags & IRQF_NOAUTOEN) != 0;
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-}
-
-int setup_irq(unsigned int irq, struct irqaction *new)
-{
- int shared = 0;
- struct irqaction *old, **p;
- unsigned long flags;
- struct irqdesc *desc;
-
- /*
- * Some drivers like serial.c use request_irq() heavily,
- * so we have to be careful not to interfere with a
- * running system.
- */
- if (new->flags & SA_SAMPLE_RANDOM) {
- /*
- * This function might sleep, we want to call it first,
- * outside of the atomic block.
- * Yes, this might clear the entropy pool if the wrong
- * driver is attempted to be loaded, without actually
- * installing a new handler, but is this really a problem,
- * only the sysadmin is able to do this.
- */
- rand_initialize_irq(irq);
- }
-
- /*
- * The following block of code has to be executed atomically
- */
- desc = irq_desc + irq;
- spin_lock_irqsave(&irq_controller_lock, flags);
- p = &desc->action;
- if ((old = *p) != NULL) {
- /*
- * Can't share interrupts unless both agree to and are
- * the same type.
- */
- if (!(old->flags & new->flags & SA_SHIRQ) ||
- (~old->flags & new->flags) & SA_TRIGGER_MASK) {
- spin_unlock_irqrestore(&irq_controller_lock, flags);
- return -EBUSY;
- }
-
- /* add new interrupt at end of irq queue */
- do {
- p = &old->next;
- old = *p;
- } while (old);
- shared = 1;
- }
-
- *p = new;
-
- if (!shared) {
- desc->probing = 0;
- desc->running = 0;
- desc->pending = 0;
- desc->disable_depth = 1;
-
- if (new->flags & SA_TRIGGER_MASK &&
- desc->chip->set_type) {
- unsigned int type = new->flags & SA_TRIGGER_MASK;
- desc->chip->set_type(irq, type);
- }
-
- if (!desc->noautoenable) {
- desc->disable_depth = 0;
- desc->chip->unmask(irq);
- }
- }
-
- spin_unlock_irqrestore(&irq_controller_lock, flags);
- return 0;
-}
-
-/**
- * request_irq - allocate an interrupt line
- * @irq: Interrupt line to allocate
- * @handler: Function to be called when the IRQ occurs
- * @irqflags: Interrupt type flags
- * @devname: An ascii name for the claiming device
- * @dev_id: A cookie passed back to the handler function
- *
- * This call allocates interrupt resources and enables the
- * interrupt line and IRQ handling. From the point this
- * call is made your handler function may be invoked. Since
- * your handler function must clear any interrupt the board
- * raises, you must take care both to initialise your hardware
- * and to set up the interrupt handler in the right order.
- *
- * Dev_id must be globally unique. Normally the address of the
- * device data structure is used as the cookie. Since the handler
- * receives this value it makes sense to use it.
- *
- * If your interrupt is shared you must pass a non NULL dev_id
- * as this is required when freeing the interrupt.
- *
- * Flags:
- *
- * SA_SHIRQ Interrupt is shared
- *
- * SA_INTERRUPT Disable local interrupts while processing
- *
- * SA_SAMPLE_RANDOM The interrupt can be used for entropy
- *
- */
-int request_irq(unsigned int irq, irqreturn_t (*handler)(int, void *, struct pt_regs *),
- unsigned long irq_flags, const char * devname, void *dev_id)
-{
- unsigned long retval;
- struct irqaction *action;
-
- if (irq >= NR_IRQS || !irq_desc[irq].valid || !handler ||
- (irq_flags & SA_SHIRQ && !dev_id))
- return -EINVAL;
-
- action = (struct irqaction *)kmalloc(sizeof(struct irqaction), GFP_KERNEL);
- if (!action)
- return -ENOMEM;
-
- action->handler = handler;
- action->flags = irq_flags;
- cpus_clear(action->mask);
- action->name = devname;
- action->next = NULL;
- action->dev_id = dev_id;
-
- retval = setup_irq(irq, action);
-
- if (retval)
- kfree(action);
- return retval;
-}
-
-EXPORT_SYMBOL(request_irq);
-
-/**
- * free_irq - free an interrupt
- * @irq: Interrupt line to free
- * @dev_id: Device identity to free
- *
- * Remove an interrupt handler. The handler is removed and if the
- * interrupt line is no longer in use by any driver it is disabled.
- * On a shared IRQ the caller must ensure the interrupt is disabled
- * on the card it drives before calling this function.
- *
- * This function must not be called from interrupt context.
- */
-void free_irq(unsigned int irq, void *dev_id)
-{
- struct irqaction * action, **p;
- unsigned long flags;
-
- if (irq >= NR_IRQS || !irq_desc[irq].valid) {
- printk(KERN_ERR "Trying to free IRQ%d\n",irq);
- dump_stack();
- return;
- }
-
- spin_lock_irqsave(&irq_controller_lock, flags);
- for (p = &irq_desc[irq].action; (action = *p) != NULL; p = &action->next) {
- if (action->dev_id != dev_id)
- continue;
-
- /* Found it - now free it */
- *p = action->next;
- break;
- }
- spin_unlock_irqrestore(&irq_controller_lock, flags);
-
- if (!action) {
- printk(KERN_ERR "Trying to free free IRQ%d\n",irq);
- dump_stack();
- } else {
- synchronize_irq(irq);
- kfree(action);
- }
-}
-
-EXPORT_SYMBOL(free_irq);
-
-static DECLARE_MUTEX(probe_sem);
-
-/* Start the interrupt probing. Unlike other architectures,
- * we don't return a mask of interrupts from probe_irq_on,
- * but return the number of interrupts enabled for the probe.
- * The interrupts which have been enabled for probing is
- * instead recorded in the irq_desc structure.
- */
-unsigned long probe_irq_on(void)
-{
- unsigned int i, irqs = 0;
- unsigned long delay;
-
- down(&probe_sem);
-
- /*
- * first snaffle up any unassigned but
- * probe-able interrupts
- */
- spin_lock_irq(&irq_controller_lock);
- for (i = 0; i < NR_IRQS; i++) {
- if (!irq_desc[i].probe_ok || irq_desc[i].action)
- continue;
-
- irq_desc[i].probing = 1;
- irq_desc[i].triggered = 0;
- if (irq_desc[i].chip->set_type)
- irq_desc[i].chip->set_type(i, IRQT_PROBE);
- irq_desc[i].chip->unmask(i);
- irqs += 1;
- }
- spin_unlock_irq(&irq_controller_lock);
-
- /*
- * wait for spurious interrupts to mask themselves out again
- */
- for (delay = jiffies + HZ/10; time_before(jiffies, delay); )
- /* min 100ms delay */;
-
- /*
- * now filter out any obviously spurious interrupts
- */
- spin_lock_irq(&irq_controller_lock);
- for (i = 0; i < NR_IRQS; i++) {
- if (irq_desc[i].probing && irq_desc[i].triggered) {
- irq_desc[i].probing = 0;
- irqs -= 1;
- }
- }
- spin_unlock_irq(&irq_controller_lock);
-
- return irqs;
-}
-
-EXPORT_SYMBOL(probe_irq_on);
-
-unsigned int probe_irq_mask(unsigned long irqs)
-{
- unsigned int mask = 0, i;
-
- spin_lock_irq(&irq_controller_lock);
- for (i = 0; i < 16 && i < NR_IRQS; i++)
- if (irq_desc[i].probing && irq_desc[i].triggered)
- mask |= 1 << i;
- spin_unlock_irq(&irq_controller_lock);
-
- up(&probe_sem);
-
- return mask;
-}
-EXPORT_SYMBOL(probe_irq_mask);
-
-/*
- * Possible return values:
- * >= 0 - interrupt number
- * -1 - no interrupt/many interrupts
- */
-int probe_irq_off(unsigned long irqs)
-{
- unsigned int i;
- int irq_found = NO_IRQ;
-
- /*
- * look at the interrupts, and find exactly one
- * that we were probing has been triggered
- */
- spin_lock_irq(&irq_controller_lock);
- for (i = 0; i < NR_IRQS; i++) {
- if (irq_desc[i].probing &&
- irq_desc[i].triggered) {
- if (irq_found != NO_IRQ) {
- irq_found = NO_IRQ;
- goto out;
- }
- irq_found = i;
- }
- }
-
- if (irq_found == -1)
- irq_found = NO_IRQ;
-out:
- spin_unlock_irq(&irq_controller_lock);
-
- up(&probe_sem);
-
- return irq_found;
-}
-
-EXPORT_SYMBOL(probe_irq_off);
-
-#ifdef CONFIG_SMP
-static void route_irq(struct irqdesc *desc, unsigned int irq, unsigned int cpu)
-{
- pr_debug("IRQ%u: moving from cpu%u to cpu%u\n", irq, desc->cpu, cpu);
-
- spin_lock_irq(&irq_controller_lock);
- desc->cpu = cpu;
- desc->chip->set_cpu(desc, irq, cpu);
- spin_unlock_irq(&irq_controller_lock);
-}
-
-#ifdef CONFIG_PROC_FS
-static int
-irq_affinity_read_proc(char *page, char **start, off_t off, int count,
- int *eof, void *data)
-{
- struct irqdesc *desc = irq_desc + ((int)data);
- int len = cpumask_scnprintf(page, count, desc->affinity);
-
- if (count - len < 2)
- return -EINVAL;
- page[len++] = '\n';
- page[len] = '\0';
-
- return len;
-}
-
-static int
-irq_affinity_write_proc(struct file *file, const char __user *buffer,
- unsigned long count, void *data)
-{
- unsigned int irq = (unsigned int)data;
- struct irqdesc *desc = irq_desc + irq;
- cpumask_t affinity, tmp;
- int ret = -EIO;
-
- if (!desc->chip->set_cpu)
- goto out;
-
- ret = cpumask_parse(buffer, count, affinity);
- if (ret)
- goto out;
-
- cpus_and(tmp, affinity, cpu_online_map);
- if (cpus_empty(tmp)) {
- ret = -EINVAL;
- goto out;
- }
-
- desc->affinity = affinity;
- route_irq(desc, irq, first_cpu(tmp));
- ret = count;
-
- out:
- return ret;
-}
-#endif
-#endif
-
-void __init init_irq_proc(void)
-{
-#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS)
- struct proc_dir_entry *dir;
- int irq;
-
- dir = proc_mkdir("irq", NULL);
- if (!dir)
- return;
-
- for (irq = 0; irq < NR_IRQS; irq++) {
- struct proc_dir_entry *entry;
- struct irqdesc *desc;
- char name[16];
-
- desc = irq_desc + irq;
- memset(name, 0, sizeof(name));
- snprintf(name, sizeof(name) - 1, "%u", irq);
-
- desc->procdir = proc_mkdir(name, dir);
- if (!desc->procdir)
- continue;
-
- entry = create_proc_entry("smp_affinity", 0600, desc->procdir);
- if (entry) {
- entry->nlink = 1;
- entry->data = (void *)irq;
- entry->read_proc = irq_affinity_read_proc;
- entry->write_proc = irq_affinity_write_proc;
- }
- }
-#endif
+ spin_lock_irqsave(&desc->lock, flags);
+ desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE;
+ if (iflags & IRQF_VALID)
+ desc->status &= ~IRQ_NOREQUEST;
+ if (iflags & IRQF_PROBE)
+ desc->status &= ~IRQ_NOPROBE;
+ spin_unlock_irqrestore(&desc->lock, flags);
}
void __init init_IRQ(void)
{
- struct irqdesc *desc;
+ extern void init_dma(void);
int irq;
+ for (irq = 0; irq < NR_IRQS; irq++)
+ irq_desc[irq].status |= IRQ_NOREQUEST;
+
#ifdef CONFIG_SMP
bad_irq_desc.affinity = CPU_MASK_ALL;
bad_irq_desc.cpu = smp_processor_id();
#endif
-
- for (irq = 0, desc = irq_desc; irq < NR_IRQS; irq++, desc++) {
- *desc = bad_irq_desc;
- INIT_LIST_HEAD(&desc->pend);
- }
-
init_arch_irq();
}
-static int __init noirqdebug_setup(char *str)
-{
- noirqdebug = 1;
- return 1;
-}
-
-__setup("noirqdebug", noirqdebug_setup);
-
#ifdef CONFIG_HOTPLUG_CPU
/*
* The CPU has been marked offline. Migrate IRQs off this CPU. If
Index: linux-2.6.16/arch/arm/kernel/process.c
===================================================================
--- linux-2.6.16.orig/arch/arm/kernel/process.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/kernel/process.c 2006-10-19 16:52:29.000000000 +0200
@@ -124,8 +124,8 @@ void cpu_idle(void)
while (!need_resched())
idle();
leds_event(led_idle_end);
- preempt_enable_no_resched();
- schedule();
+ __preempt_enable_no_resched();
+ __schedule();
preempt_disable();
}
}
Index: linux-2.6.16/arch/arm/kernel/semaphore.c
===================================================================
--- linux-2.6.16.orig/arch/arm/kernel/semaphore.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/kernel/semaphore.c 2006-10-19 16:52:29.000000000 +0200
@@ -49,14 +49,16 @@
* we cannot lose wakeup events.
*/
-void __up(struct semaphore *sem)
+fastcall void __attribute_used__ __compat_up(struct compat_semaphore *sem)
{
wake_up(&sem->wait);
}
+EXPORT_SYMBOL(__compat_up);
+
static DEFINE_SPINLOCK(semaphore_lock);
-void __sched __down(struct semaphore * sem)
+fastcall void __attribute_used__ __sched __compat_down(struct compat_semaphore * sem)
{
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
@@ -89,7 +91,9 @@ void __sched __down(struct semaphore * s
wake_up(&sem->wait);
}
-int __sched __down_interruptible(struct semaphore * sem)
+EXPORT_SYMBOL(__compat_down);
+
+fastcall int __attribute_used__ __sched __compat_down_interruptible(struct compat_semaphore * sem)
{
int retval = 0;
struct task_struct *tsk = current;
@@ -140,6 +144,8 @@ int __sched __down_interruptible(struct
return retval;
}
+EXPORT_SYMBOL(__compat_down_interruptible);
+
/*
* Trylock failed - make sure we correct for
* having decremented the count.
@@ -148,7 +154,7 @@ int __sched __down_interruptible(struct
* single "cmpxchg" without failure cases,
* but then it wouldn't work on a 386.
*/
-int __down_trylock(struct semaphore * sem)
+fastcall int __attribute_used__ __compat_down_trylock(struct compat_semaphore * sem)
{
int sleepers;
unsigned long flags;
@@ -168,6 +174,15 @@ int __down_trylock(struct semaphore * se
return 1;
}
+EXPORT_SYMBOL(__compat_down_trylock);
+
+fastcall int compat_sem_is_locked(struct compat_semaphore *sem)
+{
+ return (int) atomic_read(&sem->count) < 0;
+}
+
+EXPORT_SYMBOL(compat_sem_is_locked);
+
/*
* The semaphore operations have a special calling sequence that
* allow us to do a simpler in-line version of them. These routines
@@ -185,7 +200,7 @@ asm(" .section .sched.text,\"ax\",%progb
__down_failed: \n\
stmfd sp!, {r0 - r4, lr} \n\
mov r0, ip \n\
- bl __down \n\
+ bl __compat_down \n\
ldmfd sp!, {r0 - r4, pc} \n\
\n\
.align 5 \n\
@@ -193,7 +208,7 @@ __down_failed: \n\
__down_interruptible_failed: \n\
stmfd sp!, {r0 - r4, lr} \n\
mov r0, ip \n\
- bl __down_interruptible \n\
+ bl __compat_down_interruptible \n\
mov ip, r0 \n\
ldmfd sp!, {r0 - r4, pc} \n\
\n\
@@ -202,7 +217,7 @@ __down_interruptible_failed: \n\
__down_trylock_failed: \n\
stmfd sp!, {r0 - r4, lr} \n\
mov r0, ip \n\
- bl __down_trylock \n\
+ bl __compat_down_trylock \n\
mov ip, r0 \n\
ldmfd sp!, {r0 - r4, pc} \n\
\n\
@@ -211,7 +226,7 @@ __down_trylock_failed: \n\
__up_wakeup: \n\
stmfd sp!, {r0 - r4, lr} \n\
mov r0, ip \n\
- bl __up \n\
+ bl __compat_up \n\
ldmfd sp!, {r0 - r4, pc} \n\
");
Index: linux-2.6.16/arch/arm/kernel/signal.c
===================================================================
--- linux-2.6.16.orig/arch/arm/kernel/signal.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/kernel/signal.c 2006-10-19 16:52:29.000000000 +0200
@@ -635,6 +635,14 @@ static int do_signal(sigset_t *oldset, s
siginfo_t info;
int signr;
+#ifdef CONFIG_PREEMPT_RT
+ /*
+ * Fully-preemptible kernel does not need interrupts disabled:
+ */
+ local_irq_enable();
+ preempt_check_resched();
+#endif
+
/*
* We want the common case to go fast, which
* is why we may in certain cases get here from
Index: linux-2.6.16/arch/arm/kernel/smp.c
===================================================================
--- linux-2.6.16.orig/arch/arm/kernel/smp.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/kernel/smp.c 2006-10-19 16:52:29.000000000 +0200
@@ -519,7 +519,7 @@ static void ipi_call_function(unsigned i
cpu_clear(cpu, data->unfinished);
}
-static DEFINE_SPINLOCK(stop_lock);
+static DEFINE_RAW_SPINLOCK(stop_lock);
/*
* ipi_cpu_stop - handle IPI from smp_send_stop()
Index: linux-2.6.16/arch/arm/kernel/traps.c
===================================================================
--- linux-2.6.16.orig/arch/arm/kernel/traps.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/kernel/traps.c 2006-10-19 16:52:29.000000000 +0200
@@ -177,6 +177,10 @@ void dump_stack(void)
{
#ifdef CONFIG_DEBUG_ERRORS
__backtrace();
+ print_traces(current);
+#ifdef CONFIG_DEBUG_MUTEXES
+ show_held_locks(current);
+#endif
#endif
}
@@ -217,7 +221,7 @@ static void __die(const char *str, int e
}
}
-DEFINE_SPINLOCK(die_lock);
+DEFINE_RAW_SPINLOCK(die_lock);
/*
* This function is protected against re-entrancy.
@@ -256,7 +260,7 @@ void notify_die(const char *str, struct
}
static LIST_HEAD(undef_hook);
-static DEFINE_SPINLOCK(undef_lock);
+static DEFINE_RAW_SPINLOCK(undef_lock);
void register_undef_hook(struct undef_hook *hook)
{
Index: linux-2.6.16/arch/arm/mach-at91rm9200/gpio.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-at91rm9200/gpio.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-at91rm9200/gpio.c 2006-10-19 16:52:29.000000000 +0200
@@ -261,7 +261,7 @@ static void gpio_irq_handler(unsigned ir
void __iomem *pio;
u32 isr;
- pio = (void __force __iomem *) desc->chipdata;
+ pio = (void __force __iomem *) desc->chip->chip_data;
/* temporarily mask (level sensitive) parent IRQ */
desc->chip->ack(irq);
@@ -270,12 +270,12 @@ static void gpio_irq_handler(unsigned ir
if (!isr)
break;
- pin = (unsigned) desc->data;
+ pin = (unsigned) desc->handler_data;
gpio = &irq_desc[pin];
while (isr) {
if (isr & 1) {
- if (unlikely(gpio->disable_depth)) {
+ if (unlikely(gpio->depth)) {
/*
* The core ARM interrupt handler lazily disables IRQs so
* another IRQ must be generated before it actually gets
@@ -284,7 +284,7 @@ static void gpio_irq_handler(unsigned ir
gpio_irq_mask(pin);
}
else
- gpio->handle(pin, gpio, regs);
+ desc_handle_irq(pin, gpio, regs);
}
pin++;
gpio++;
@@ -295,6 +295,8 @@ static void gpio_irq_handler(unsigned ir
/* now it may re-trigger */
}
+static DEFINE_IRQ_CHAINED_TYPE(gpio_irq_handler);
+
/* call this from board-specific init_irq */
void __init at91_gpio_irq_setup(unsigned banks)
{
Index: linux-2.6.16/arch/arm/mach-at91rm9200/time.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-at91rm9200/time.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-at91rm9200/time.c 2006-10-19 16:52:29.000000000 +0200
@@ -22,13 +22,13 @@
#include
#include
#include
+#include
#include
#include
#include
#include
#include
-#include
#include
/*
Index: linux-2.6.16/arch/arm/mach-clps711x/time.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-clps711x/time.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-clps711x/time.c 2006-10-19 16:52:29.000000000 +0200
@@ -19,6 +19,7 @@
#include
#include
#include
+#include
#include
#include
Index: linux-2.6.16/arch/arm/mach-clps7500/core.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-clps7500/core.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-clps7500/core.c 2006-10-19 16:52:29.000000000 +0200
@@ -9,6 +9,7 @@
#include
#include
#include
+#include
#include
#include
#include
Index: linux-2.6.16/arch/arm/mach-footbridge/dc21285-timer.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-footbridge/dc21285-timer.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-footbridge/dc21285-timer.c 2006-10-19 16:52:29.000000000 +0200
@@ -6,6 +6,7 @@
*/
#include
#include
+#include
#include
Index: linux-2.6.16/arch/arm/mach-footbridge/isa-irq.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-footbridge/isa-irq.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-footbridge/isa-irq.c 2006-10-19 16:52:29.000000000 +0200
@@ -102,6 +102,17 @@ static struct irqaction irq_cascade = {
static struct resource pic1_resource = { "pic1", 0x20, 0x3f };
static struct resource pic2_resource = { "pic2", 0xa0, 0xbf };
+static DEFINE_IRQ_CHAINED_TYPE(isa_irq_handler);
+
+static unsigned int startup_irq_disabled(unsigned int irq)
+{
+ return 0;
+}
+
+/* Interrupt type for irqs which must not be
+ * automatically enabled in reqeust_irq */
+static struct irq_type level_type_nostart;
+
void __init isa_init_irq(unsigned int host_irq)
{
unsigned int irq;
@@ -159,9 +170,11 @@ void __init isa_init_irq(unsigned int ho
* There appears to be a missing pull-up
* resistor on this line.
*/
- if (machine_is_netwinder())
- set_irq_flags(_ISA_IRQ(11), IRQF_VALID |
- IRQF_PROBE | IRQF_NOAUTOEN);
+ if (machine_is_netwinder()) {
+ level_type_nostart = default_level_type;
+ level_type_nostart.startup = startup_irq_disabled;
+ set_irq_handler(_ISA_IRQ(11), &level_type_nostart);
+ }
}
}
Index: linux-2.6.16/arch/arm/mach-footbridge/isa-timer.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-footbridge/isa-timer.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-footbridge/isa-timer.c 2006-10-19 16:52:29.000000000 +0200
@@ -6,6 +6,7 @@
*/
#include
#include
+#include
#include
#include
Index: linux-2.6.16/arch/arm/mach-footbridge/netwinder-hw.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-footbridge/netwinder-hw.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-footbridge/netwinder-hw.c 2006-10-19 16:52:29.000000000 +0200
@@ -68,7 +68,7 @@ static inline void wb977_ww(int reg, int
/*
* This is a lock for accessing ports GP1_IO_BASE and GP2_IO_BASE
*/
-DEFINE_SPINLOCK(gpio_lock);
+DEFINE_RAW_SPINLOCK(gpio_lock);
static unsigned int current_gpio_op;
static unsigned int current_gpio_io;
Index: linux-2.6.16/arch/arm/mach-footbridge/netwinder-leds.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-footbridge/netwinder-leds.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-footbridge/netwinder-leds.c 2006-10-19 16:52:29.000000000 +0200
@@ -33,7 +33,7 @@ static char led_state;
static char hw_led_state;
static DEFINE_SPINLOCK(leds_lock);
-extern spinlock_t gpio_lock;
+extern raw_spinlock_t gpio_lock;
static void netwinder_leds_event(led_event_t evt)
{
Index: linux-2.6.16/arch/arm/mach-h720x/common.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-h720x/common.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-h720x/common.c 2006-10-19 16:52:29.000000000 +0200
@@ -163,6 +163,11 @@ h720x_gpiod_demux_handler(unsigned int i
h720x_gpio_handler(mask, irq, desc, regs);
}
+static DEFINE_IRQ_CHAINED_TYPE(h720x_gpioa_demux_handler);
+static DEFINE_IRQ_CHAINED_TYPE(h720x_gpiob_demux_handler);
+static DEFINE_IRQ_CHAINED_TYPE(h720x_gpioc_demux_handler);
+static DEFINE_IRQ_CHAINED_TYPE(h720x_gpiod_demux_handler);
+
#ifdef CONFIG_CPU_H7202
static void
h720x_gpioe_demux_handler(unsigned int irq_unused, struct irqdesc *desc,
@@ -175,6 +180,7 @@ h720x_gpioe_demux_handler(unsigned int i
IRQDBG("%s mask: 0x%08x irq: %d\n",__FUNCTION__,mask,irq);
h720x_gpio_handler(mask, irq, desc, regs);
}
+static DEFINE_IRQ_CHAINED_TYPE(h720x_gpioe_demux_handler);
#endif
static struct irqchip h720x_global_chip = {
Index: linux-2.6.16/arch/arm/mach-h720x/cpu-h7202.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-h720x/cpu-h7202.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-h720x/cpu-h7202.c 2006-10-19 16:52:29.000000000 +0200
@@ -175,6 +175,8 @@ static struct irqaction h7202_timer_irq
.handler = h7202_timer_interrupt,
};
+static DEFINE_IRQ_CHAINED_TYPE(h7202_timerx_demux_handler);
+
/*
* Setup TIMER0 as system timer
*/
Index: linux-2.6.16/arch/arm/mach-imx/irq.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-imx/irq.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-imx/irq.c 2006-10-19 16:52:29.000000000 +0200
@@ -217,6 +217,11 @@ static struct irqchip imx_gpio_chip = {
.set_type = imx_gpio_irq_type,
};
+static DEFINE_IRQ_CHAINED_TYPE(imx_gpioa_demux_handler);
+static DEFINE_IRQ_CHAINED_TYPE(imx_gpiob_demux_handler);
+static DEFINE_IRQ_CHAINED_TYPE(imx_gpioc_demux_handler);
+static DEFINE_IRQ_CHAINED_TYPE(imx_gpiod_demux_handler);
+
void __init
imx_init_irq(void)
{
Index: linux-2.6.16/arch/arm/mach-imx/time.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-imx/time.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-imx/time.c 2006-10-19 16:52:29.000000000 +0200
@@ -13,6 +13,7 @@
#include
#include
#include
+#include
#include
#include
Index: linux-2.6.16/arch/arm/mach-integrator/core.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-integrator/core.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-integrator/core.c 2006-10-19 16:52:29.000000000 +0200
@@ -13,6 +13,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -117,7 +118,7 @@ arch_initcall(integrator_init);
#define CM_CTRL IO_ADDRESS(INTEGRATOR_HDR_BASE) + INTEGRATOR_HDR_CTRL_OFFSET
-static DEFINE_SPINLOCK(cm_lock);
+static DEFINE_RAW_SPINLOCK(cm_lock);
/**
* cm_control - update the CM_CTRL register.
Index: linux-2.6.16/arch/arm/mach-integrator/pci_v3.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-integrator/pci_v3.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-integrator/pci_v3.c 2006-10-19 16:52:29.000000000 +0200
@@ -163,7 +163,7 @@
* 7:2 register number
*
*/
-static DEFINE_SPINLOCK(v3_lock);
+static DEFINE_RAW_SPINLOCK(v3_lock);
#define PCI_BUS_NONMEM_START 0x00000000
#define PCI_BUS_NONMEM_SIZE SZ_256M
Index: linux-2.6.16/arch/arm/mach-integrator/platsmp.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-integrator/platsmp.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-integrator/platsmp.c 2006-10-19 16:52:29.000000000 +0200
@@ -31,7 +31,7 @@ extern void integrator_secondary_startup
volatile int __cpuinitdata pen_release = -1;
unsigned long __cpuinitdata phys_pen_release = 0;
-static DEFINE_SPINLOCK(boot_lock);
+static DEFINE_RAW_SPINLOCK(boot_lock);
void __cpuinit platform_secondary_init(unsigned int cpu)
{
Index: linux-2.6.16/arch/arm/mach-ixp2000/core.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-ixp2000/core.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-ixp2000/core.c 2006-10-19 16:52:29.000000000 +0200
@@ -20,6 +20,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -288,7 +289,7 @@ void gpio_line_config(int line, int dire
local_irq_save(flags);
if (direction == GPIO_OUT) {
- irq_desc[line + IRQ_IXP2000_GPIO0].valid = 0;
+ set_irq_flags(line + IRQ_IXP2000_GPIO0, 0);
/* if it's an output, it ain't an interrupt anymore */
GPIO_IRQ_falling_edge &= ~(1 << line);
@@ -354,8 +355,7 @@ static int ixp2000_GPIO_irq_type(unsigne
/*
* Finally, mark the corresponding IRQ as valid.
*/
- irq_desc[irq].valid = 1;
-
+ set_irq_flags(irq, IRQF_VALID);
return 0;
}
@@ -414,7 +414,7 @@ static void ixp2000_err_irq_handler(unsi
for(i = 31; i >= 0; i--) {
if(status & (1 << i)) {
desc = irq_desc + IRQ_IXP2000_DRAM0_MIN_ERR + i;
- desc->handle(IRQ_IXP2000_DRAM0_MIN_ERR + i, desc, regs);
+ desc_handle_irq(IRQ_IXP2000_DRAM0_MIN_ERR + i, desc, regs);
}
}
}
@@ -459,6 +459,9 @@ static struct irqchip ixp2000_irq_chip =
.unmask = ixp2000_irq_unmask
};
+static DEFINE_IRQ_CHAINED_TYPE(ixp2000_GPIO_irq_handler);
+static DEFINE_IRQ_CHAINED_TYPE(ixp2000_err_irq_handler);
+
void __init ixp2000_init_irq(void)
{
int irq;
Index: linux-2.6.16/arch/arm/mach-ixp2000/ixdp2x00.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-ixp2000/ixdp2x00.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-ixp2000/ixdp2x00.c 2006-10-19 16:52:29.000000000 +0200
@@ -146,6 +146,8 @@ static struct irqchip ixdp2x00_cpld_irq_
.unmask = ixdp2x00_irq_unmask
};
+static DEFINE_IRQ_CHAINED_TYPE(ixdp2x00_irq_handler);
+
void ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigned long *mask_reg, unsigned long nr_irqs)
{
unsigned int irq;
@@ -168,7 +170,7 @@ void ixdp2x00_init_irq(volatile unsigned
}
/* Hook into PCI interrupt */
- set_irq_chained_handler(IRQ_IXP2000_PCIB, &ixdp2x00_irq_handler);
+ set_irq_chained_handler(IRQ_IXP2000_PCIB, ixdp2x00_irq_handler);
}
/*************************************************************************
Index: linux-2.6.16/arch/arm/mach-ixp2000/ixdp2x01.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-ixp2000/ixdp2x01.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-ixp2000/ixdp2x01.c 2006-10-19 16:52:29.000000000 +0200
@@ -95,6 +95,8 @@ static struct irqchip ixdp2x01_irq_chip
.unmask = ixdp2x01_irq_unmask
};
+static DEFINE_IRQ_CHAINED_TYPE(ixdp2x01_irq_handler);
+
/*
* We only do anything if we are the master NPU on the board.
* The slave NPU only has the ethernet chip going directly to
@@ -127,7 +129,7 @@ void __init ixdp2x01_init_irq(void)
}
/* Hook into PCI interrupts */
- set_irq_chained_handler(IRQ_IXP2000_PCIB, &ixdp2x01_irq_handler);
+ set_irq_chained_handler(IRQ_IXP2000_PCIB, ixdp2x01_irq_handler);
}
Index: linux-2.6.16/arch/arm/mach-ixp4xx/common-pci.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-ixp4xx/common-pci.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-ixp4xx/common-pci.c 2006-10-19 16:52:29.000000000 +0200
@@ -53,7 +53,7 @@ unsigned long ixp4xx_pci_reg_base = 0;
* these transactions are atomic or we will end up
* with corrupt data on the bus or in a driver.
*/
-static DEFINE_SPINLOCK(ixp4xx_pci_lock);
+static DEFINE_RAW_SPINLOCK(ixp4xx_pci_lock);
/*
* Read from PCI config space
Index: linux-2.6.16/arch/arm/mach-ixp4xx/coyote-pci.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-ixp4xx/coyote-pci.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-ixp4xx/coyote-pci.c 2006-10-19 16:52:29.000000000 +0200
@@ -17,6 +17,7 @@
#include
#include
#include
+#include
#include
#include
Index: linux-2.6.16/arch/arm/mach-ixp4xx/ixdp425-pci.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-ixp4xx/ixdp425-pci.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-ixp4xx/ixdp425-pci.c 2006-10-19 16:52:29.000000000 +0200
@@ -16,6 +16,7 @@
#include
#include
+#include
#include
#include
#include
Index: linux-2.6.16/arch/arm/mach-ixp4xx/ixdpg425-pci.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-ixp4xx/ixdpg425-pci.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-ixp4xx/ixdpg425-pci.c 2006-10-19 16:52:29.000000000 +0200
@@ -16,10 +16,10 @@
#include
#include
#include
+#include
#include
#include
-#include
#include
Index: linux-2.6.16/arch/arm/mach-ixp4xx/nas100d-pci.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-ixp4xx/nas100d-pci.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-ixp4xx/nas100d-pci.c 2006-10-19 16:52:29.000000000 +0200
@@ -18,6 +18,7 @@
#include
#include
#include
+#include
#include
#include
Index: linux-2.6.16/arch/arm/mach-ixp4xx/nas100d-power.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-ixp4xx/nas100d-power.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-ixp4xx/nas100d-power.c 2006-10-19 16:52:29.000000000 +0200
@@ -20,6 +20,7 @@
#include
#include
#include
+#include
#include
Index: linux-2.6.16/arch/arm/mach-l7200/core.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-l7200/core.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-l7200/core.c 2006-10-19 16:52:29.000000000 +0200
@@ -7,6 +7,7 @@
*/
#include
#include
+#include
#include
#include
Index: linux-2.6.16/arch/arm/mach-lh7a40x/arch-kev7a400.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-lh7a40x/arch-kev7a400.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-lh7a40x/arch-kev7a400.c 2006-10-19 16:52:29.000000000 +0200
@@ -81,6 +81,8 @@ static void kev7a400_cpld_handler (unsig
}
}
+static DEFINE_IRQ_CHAINED_TYPE(kev7a400_cpld_handler);
+
void __init lh7a40x_init_board_irq (void)
{
int irq;
Index: linux-2.6.16/arch/arm/mach-lh7a40x/arch-lpd7a40x.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-lh7a40x/arch-lpd7a40x.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-lh7a40x/arch-lpd7a40x.c 2006-10-19 16:52:29.000000000 +0200
@@ -12,6 +12,7 @@
#include
#include
#include
+#include
#include
#include
@@ -173,6 +174,7 @@ static void lpd7a40x_cpld_handler (unsig
desc->chip->unmask (irq); /* Level-triggered need this */
}
+static DEFINE_IRQ_CHAINED_TYPE(lpd7a40x_cpld_handler);
void __init lh7a40x_init_board_irq (void)
{
Index: linux-2.6.16/arch/arm/mach-lh7a40x/irq-kev7a400.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-lh7a40x/irq-kev7a400.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-lh7a40x/irq-kev7a400.c 2006-10-19 16:52:29.000000000 +0200
@@ -60,6 +60,8 @@ lh7a400_cpld_handler (unsigned int irq,
}
}
+static DEFINE_IRQ_CHAINED_TYPE(kev7a400_cpld_handler);
+
/* IRQ initialization */
void __init
Index: linux-2.6.16/arch/arm/mach-lh7a40x/irq-lpd7a40x.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-lh7a40x/irq-lpd7a40x.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-lh7a40x/irq-lpd7a40x.c 2006-10-19 16:52:29.000000000 +0200
@@ -71,6 +71,7 @@ static void lh7a40x_cpld_handler (unsign
desc->chip->unmask (irq); /* Level-triggered need this */
}
+static DEFINE_IRQ_CHAINED_TYPE(lh7a40x_cpld_handler);
/* IRQ initialization */
Index: linux-2.6.16/arch/arm/mach-lh7a40x/time.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-lh7a40x/time.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-lh7a40x/time.c 2006-10-19 16:52:29.000000000 +0200
@@ -12,6 +12,7 @@
#include
#include
#include
+#include
#include
#include
Index: linux-2.6.16/arch/arm/mach-omap1/board-osk.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-omap1/board-osk.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-omap1/board-osk.c 2006-10-19 16:52:29.000000000 +0200
@@ -29,7 +29,7 @@
#include
#include
#include
-#include
+#include
#include
#include
Index: linux-2.6.16/arch/arm/mach-omap1/fpga.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-omap1/fpga.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-omap1/fpga.c 2006-10-19 16:52:29.000000000 +0200
@@ -120,6 +120,8 @@ static struct irqchip omap_fpga_irq = {
.unmask = fpga_unmask_irq,
};
+static DEFINE_IRQ_CHAINED_TYPE(innovator_fpga_IRQ_demux);
+
/*
* All of the FPGA interrupt request inputs except for the touchscreen are
* edge-sensitive; the touchscreen is level-sensitive. The edge-sensitive
Index: linux-2.6.16/arch/arm/mach-omap1/serial.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-omap1/serial.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-omap1/serial.c 2006-10-19 16:52:29.000000000 +0200
@@ -12,6 +12,7 @@
#include
#include
#include
+#include
#include
#include
#include
Index: linux-2.6.16/arch/arm/mach-pxa/idp.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-pxa/idp.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-pxa/idp.c 2006-10-19 16:52:29.000000000 +0200
@@ -18,6 +18,7 @@
#include
#include
+#include
#include
#include
Index: linux-2.6.16/arch/arm/mach-pxa/irq.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-pxa/irq.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-pxa/irq.c 2006-10-19 16:52:29.000000000 +0200
@@ -244,6 +244,7 @@ static struct irqchip pxa_muxed_gpio_chi
.set_type = pxa_gpio_irq_type,
};
+static DEFINE_IRQ_CHAINED_TYPE(pxa_gpio_demux_handler);
void __init pxa_init_irq(void)
{
Index: linux-2.6.16/arch/arm/mach-pxa/lubbock.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-pxa/lubbock.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-pxa/lubbock.c 2006-10-19 16:52:29.000000000 +0200
@@ -95,6 +95,8 @@ static void lubbock_irq_handler(unsigned
} while (pending);
}
+static DEFINE_IRQ_CHAINED_TYPE(lubbock_irq_handler);
+
static void __init lubbock_init_irq(void)
{
int irq;
Index: linux-2.6.16/arch/arm/mach-pxa/mainstone.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-pxa/mainstone.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-pxa/mainstone.c 2006-10-19 16:52:29.000000000 +0200
@@ -85,6 +85,8 @@ static void mainstone_irq_handler(unsign
} while (pending);
}
+static DEFINE_IRQ_CHAINED_TYPE(mainstone_irq_handler);
+
static void __init mainstone_init_irq(void)
{
int irq;
Index: linux-2.6.16/arch/arm/mach-pxa/sharpsl_pm.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-pxa/sharpsl_pm.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-pxa/sharpsl_pm.c 2006-10-19 16:52:29.000000000 +0200
@@ -18,11 +18,11 @@
#include
#include
#include
+#include
#include
#include
#include
-#include
#include
#include
#include
Index: linux-2.6.16/arch/arm/mach-rpc/irq.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-rpc/irq.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-rpc/irq.c 2006-10-19 16:52:29.000000000 +0200
@@ -112,6 +112,15 @@ static struct irqchip iomd_fiq_chip = {
.unmask = iomd_unmask_irq_fiq,
};
+static unsigned int startup_irq_disabled(unsigned int irq)
+{
+ return 0;
+}
+
+/* Interrupt type for irqs which must not be
+ * automatically enabled in reqeust_irq */
+static struct irq_type level_type_nostart;
+
void __init rpc_init_irq(void)
{
unsigned int irq, flags;
@@ -121,16 +130,15 @@ void __init rpc_init_irq(void)
iomd_writeb(0, IOMD_FIQMASK);
iomd_writeb(0, IOMD_DMAMASK);
+ level_type_nostart = default_level_type;
+ level_type_nostart.startup = startup_irq_disabled;
+
for (irq = 0; irq < NR_IRQS; irq++) {
flags = IRQF_VALID;
if (irq <= 6 || (irq >= 9 && irq <= 15))
flags |= IRQF_PROBE;
- if (irq == 21 || (irq >= 16 && irq <= 19) ||
- irq == IRQ_KEYBOARDTX)
- flags |= IRQF_NOAUTOEN;
-
switch (irq) {
case 0 ... 7:
set_irq_chip(irq, &iomd_a_chip);
@@ -155,6 +163,10 @@ void __init rpc_init_irq(void)
set_irq_flags(irq, IRQF_VALID);
break;
}
+
+ if (irq == 21 || (irq >= 16 && irq <= 19) ||
+ irq == IRQ_KEYBOARDTX)
+ set_irq_handler(irq, &level_type_nostart);
}
init_FIQ();
Index: linux-2.6.16/arch/arm/mach-s3c2410/bast-irq.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-s3c2410/bast-irq.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-s3c2410/bast-irq.c 2006-10-19 16:52:29.000000000 +0200
@@ -136,13 +136,15 @@ bast_irq_pc104_demux(unsigned int irq,
for (i = 0; stat != 0; i++, stat >>= 1) {
if (stat & 1) {
irqno = bast_pc104_irqs[i];
-
- desc_handle_irq(irqno, irq_desc + irqno, regs);
+ desc = irq_desc + irqno;
+ desc_handle_irq(irqno, desc, regs);
}
}
}
}
+DEFINE_IRQ_CHAINED_TYPE(bast_irq_pc104_demux);
+
static __init int bast_irq_init(void)
{
unsigned int i;
@@ -156,7 +158,7 @@ static __init int bast_irq_init(void)
set_irq_chained_handler(IRQ_ISA, bast_irq_pc104_demux);
- /* reigster our IRQs */
+ /* register our IRQs */
for (i = 0; i < 4; i++) {
unsigned int irqno = bast_pc104_irqs[i];
Index: linux-2.6.16/arch/arm/mach-s3c2410/irq.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-s3c2410/irq.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-s3c2410/irq.c 2006-10-19 16:52:29.000000000 +0200
@@ -573,6 +573,11 @@ s3c_irq_demux_uart2(unsigned int irq,
}
+static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_uart0);
+static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_uart1);
+static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_uart2);
+static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_adc);
+
/* s3c24xx_init_irq
*
* Initialise S3C2410 IRQ system
Index: linux-2.6.16/arch/arm/mach-s3c2410/s3c2440-irq.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-s3c2410/s3c2440-irq.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-s3c2410/s3c2440-irq.c 2006-10-19 16:52:29.000000000 +0200
@@ -157,6 +157,9 @@ static struct irqchip s3c_irq_cam = {
.ack = s3c_irq_cam_ack,
};
+static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_wdtac97);
+static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_cam);
+
static int s3c2440_irq_add(struct sys_device *sysdev)
{
unsigned int irqno;
Index: linux-2.6.16/arch/arm/mach-s3c2410/time.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-s3c2410/time.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-s3c2410/time.c 2006-10-19 16:52:29.000000000 +0200
@@ -23,6 +23,7 @@
#include
#include
#include
+#include
#include
#include
Index: linux-2.6.16/arch/arm/mach-sa1100/badge4.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-sa1100/badge4.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-sa1100/badge4.c 2006-10-19 16:52:29.000000000 +0200
@@ -240,15 +240,22 @@ void badge4_set_5V(unsigned subsystem, i
/* detect on->off and off->on transitions */
if ((!old_5V_bitmap) && (badge4_5V_bitmap)) {
/* was off, now on */
- printk(KERN_INFO "%s: enabling 5V supply rail\n", __FUNCTION__);
GPSR = BADGE4_GPIO_PCMEN5V;
} else if ((old_5V_bitmap) && (!badge4_5V_bitmap)) {
/* was on, now off */
- printk(KERN_INFO "%s: disabling 5V supply rail\n", __FUNCTION__);
GPCR = BADGE4_GPIO_PCMEN5V;
}
local_irq_restore(flags);
+
+ /* detect on->off and off->on transitions */
+ if ((!old_5V_bitmap) && (badge4_5V_bitmap)) {
+ /* was off, now on */
+ printk(KERN_INFO "%s: enabling 5V supply rail\n", __FUNCTION__);
+ } else if ((old_5V_bitmap) && (!badge4_5V_bitmap)) {
+ /* was on, now off */
+ printk(KERN_INFO "%s: disabling 5V supply rail\n", __FUNCTION__);
+ }
}
EXPORT_SYMBOL(badge4_set_5V);
Index: linux-2.6.16/arch/arm/mach-sa1100/cerf.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-sa1100/cerf.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-sa1100/cerf.c 2006-10-19 16:52:29.000000000 +0200
@@ -15,6 +15,7 @@
#include
#include
#include
+#include
#include
#include
Index: linux-2.6.16/arch/arm/mach-sa1100/h3600.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-sa1100/h3600.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-sa1100/h3600.c 2006-10-19 16:52:29.000000000 +0200
@@ -798,6 +798,8 @@ static void h3800_unmask_gpio_irq(unsign
H3800_ASIC2_GPIINTSTAT |= mask;
}
+static DEFINE_IRQ_CHAINED_TYPE(h3800_IRQ_demux);
+
static void __init h3800_init_irq(void)
{
int i;
@@ -836,7 +838,7 @@ static void __init h3800_init_irq(void)
}
#endif
set_irq_type(IRQ_GPIO_H3800_ASIC, IRQT_RISING);
- set_irq_chained_handler(IRQ_GPIO_H3800_ASIC, &h3800_IRQ_demux);
+ set_irq_chained_handler(IRQ_GPIO_H3800_ASIC, h3800_IRQ_demux);
}
Index: linux-2.6.16/arch/arm/mach-sa1100/irq.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-sa1100/irq.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-sa1100/irq.c 2006-10-19 16:52:29.000000000 +0200
@@ -11,12 +11,13 @@
*/
#include
#include
+#include
+#include
#include
#include
#include
#include
-#include
#include
#include "generic.h"
@@ -281,6 +282,8 @@ static int __init sa1100irq_init_devicef
return sysdev_register(&sa1100irq_device);
}
+static DEFINE_IRQ_CHAINED_TYPE(sa1100_high_gpio_handler);
+
device_initcall(sa1100irq_init_devicefs);
void __init sa1100_init_irq(void)
Index: linux-2.6.16/arch/arm/mach-sa1100/neponset.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-sa1100/neponset.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-sa1100/neponset.c 2006-10-19 16:52:29.000000000 +0200
@@ -137,6 +137,8 @@ static struct sa1100_port_fns neponset_p
.get_mctrl = neponset_get_mctrl,
};
+static DEFINE_IRQ_CHAINED_TYPE(neponset_irq_handler);
+
static int neponset_probe(struct platform_device *dev)
{
sa1100_register_uart_fns(&neponset_port_fns);
Index: linux-2.6.16/arch/arm/mach-sa1100/pleb.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-sa1100/pleb.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-sa1100/pleb.c 2006-10-19 16:52:29.000000000 +0200
@@ -7,6 +7,7 @@
#include
#include
#include
+#include
#include
Index: linux-2.6.16/arch/arm/mach-sa1100/time.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-sa1100/time.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-sa1100/time.c 2006-10-19 16:52:29.000000000 +0200
@@ -11,6 +11,7 @@
#include
#include
#include
+#include
#include
#include
Index: linux-2.6.16/arch/arm/mach-shark/core.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-shark/core.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-shark/core.c 2006-10-19 16:52:29.000000000 +0200
@@ -6,6 +6,7 @@
#include
#include
#include
+#include
#include
#include
Index: linux-2.6.16/arch/arm/mach-shark/leds.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-shark/leds.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-shark/leds.c 2006-10-19 16:52:29.000000000 +0200
@@ -33,7 +33,7 @@ static char led_state;
static short hw_led_state;
static short saved_state;
-static DEFINE_SPINLOCK(leds_lock);
+static DEFINE_RAW_SPINLOCK(leds_lock);
short sequoia_read(int addr) {
outw(addr,0x24);
Index: linux-2.6.16/arch/arm/mach-versatile/core.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mach-versatile/core.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mach-versatile/core.c 2006-10-19 16:52:29.000000000 +0200
@@ -96,6 +96,8 @@ sic_handle_irq(unsigned int irq, struct
} while (status);
}
+static DEFINE_IRQ_CHAINED_TYPE(sic_handle_irq);
+
#if 1
#define IRQ_MMCI0A IRQ_VICSOURCE22
#define IRQ_AACI IRQ_VICSOURCE24
@@ -114,7 +116,7 @@ void __init versatile_init_irq(void)
vic_init(VA_VIC_BASE, ~(1 << 31));
- set_irq_handler(IRQ_VICSOURCE31, sic_handle_irq);
+ set_irq_chained_handler(IRQ_VICSOURCE31, sic_handle_irq);
enable_irq(IRQ_VICSOURCE31);
/* Do second interrupt controller */
Index: linux-2.6.16/arch/arm/mm/consistent.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mm/consistent.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mm/consistent.c 2006-10-19 16:52:29.000000000 +0200
@@ -39,7 +39,7 @@
* These are the page tables (2MB each) covering uncached, DMA consistent allocations
*/
static pte_t *consistent_pte[NUM_CONSISTENT_PTES];
-static DEFINE_SPINLOCK(consistent_lock);
+static DEFINE_RAW_SPINLOCK(consistent_lock);
/*
* VM region handling support.
Index: linux-2.6.16/arch/arm/mm/copypage-v4mc.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mm/copypage-v4mc.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mm/copypage-v4mc.c 2006-10-19 16:52:29.000000000 +0200
@@ -29,7 +29,7 @@
#define TOP_PTE(x) pte_offset_kernel(top_pmd, x)
-static DEFINE_SPINLOCK(minicache_lock);
+static DEFINE_RAW_SPINLOCK(minicache_lock);
/*
* ARMv4 mini-dcache optimised copy_user_page
@@ -43,7 +43,7 @@ static DEFINE_SPINLOCK(minicache_lock);
* instruction. If your processor does not supply this, you have to write your
* own copy_user_page that does the right thing.
*/
-static void __attribute__((naked))
+static void notrace __attribute__((naked))
mc_copy_user_page(void *from, void *to)
{
asm volatile(
@@ -82,7 +82,7 @@ void v4_mc_copy_user_page(void *kto, con
/*
* ARMv4 optimised clear_user_page
*/
-void __attribute__((naked))
+void notrace __attribute__((naked))
v4_mc_clear_user_page(void *kaddr, unsigned long vaddr)
{
asm volatile(
Index: linux-2.6.16/arch/arm/mm/copypage-v6.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mm/copypage-v6.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mm/copypage-v6.c 2006-10-19 16:52:29.000000000 +0200
@@ -26,7 +26,7 @@
#define TOP_PTE(x) pte_offset_kernel(top_pmd, x)
-static DEFINE_SPINLOCK(v6_lock);
+static DEFINE_RAW_SPINLOCK(v6_lock);
/*
* Copy the user page. No aliasing to deal with so we can just
Index: linux-2.6.16/arch/arm/mm/copypage-xscale.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mm/copypage-xscale.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mm/copypage-xscale.c 2006-10-19 16:52:29.000000000 +0200
@@ -31,7 +31,7 @@
#define TOP_PTE(x) pte_offset_kernel(top_pmd, x)
-static DEFINE_SPINLOCK(minicache_lock);
+static DEFINE_RAW_SPINLOCK(minicache_lock);
/*
* XScale mini-dcache optimised copy_user_page
@@ -41,7 +41,7 @@ static DEFINE_SPINLOCK(minicache_lock);
* Dcache aliasing issue. The writes will be forwarded to the write buffer,
* and merged as appropriate.
*/
-static void __attribute__((naked))
+static void notrace __attribute__((naked))
mc_copy_user_page(void *from, void *to)
{
/*
@@ -104,7 +104,7 @@ void xscale_mc_copy_user_page(void *kto,
/*
* XScale optimised clear_user_page
*/
-void __attribute__((naked))
+void notrace __attribute__((naked))
xscale_mc_clear_user_page(void *kaddr, unsigned long vaddr)
{
asm volatile(
Index: linux-2.6.16/arch/arm/mm/fault.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mm/fault.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mm/fault.c 2006-10-19 16:52:29.000000000 +0200
@@ -216,7 +216,7 @@ out:
return fault;
}
-static int
+static notrace int
do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
struct task_struct *tsk;
@@ -316,7 +316,7 @@ no_context:
* interrupt or a critical region, and should only copy the information
* from the master page table, nothing more.
*/
-static int
+static notrace int
do_translation_fault(unsigned long addr, unsigned int fsr,
struct pt_regs *regs)
{
@@ -362,7 +362,7 @@ bad_area:
* Some section permission faults need to be handled gracefully.
* They can happen due to a __{get,put}_user during an oops.
*/
-static int
+static notrace int
do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
struct task_struct *tsk = current;
@@ -373,7 +373,7 @@ do_sect_fault(unsigned long addr, unsign
/*
* This abort handler always returns "fault".
*/
-static int
+static notrace int
do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
return 1;
@@ -428,7 +428,7 @@ static struct fsr_info {
{ do_bad, SIGBUS, 0, "unknown 31" }
};
-void __init
+void __init notrace
hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *),
int sig, const char *name)
{
@@ -442,7 +442,7 @@ hook_fault_code(int nr, int (*fn)(unsign
/*
* Dispatch a data abort to the relevant handler.
*/
-asmlinkage void
+asmlinkage notrace void
do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
const struct fsr_info *inf = fsr_info + (fsr & 15) + ((fsr & (1 << 10)) >> 6);
@@ -461,7 +461,7 @@ do_DataAbort(unsigned long addr, unsigne
notify_die("", regs, &info, fsr, 0);
}
-asmlinkage void
+asmlinkage notrace void
do_PrefetchAbort(unsigned long addr, struct pt_regs *regs)
{
do_translation_fault(addr, 0, regs);
Index: linux-2.6.16/arch/arm/mm/init.c
===================================================================
--- linux-2.6.16.orig/arch/arm/mm/init.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/mm/init.c 2006-10-19 16:52:29.000000000 +0200
@@ -28,7 +28,7 @@
#define TABLE_SIZE (2 * PTRS_PER_PTE * sizeof(pte_t))
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers);
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
extern void _stext, _text, _etext, __data_start, _end, __init_begin, __init_end;
Index: linux-2.6.16/arch/arm/plat-omap/clock.c
===================================================================
--- linux-2.6.16.orig/arch/arm/plat-omap/clock.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/plat-omap/clock.c 2006-10-19 16:52:29.000000000 +0200
@@ -29,7 +29,7 @@
LIST_HEAD(clocks);
static DEFINE_MUTEX(clocks_mutex);
-DEFINE_SPINLOCK(clockfw_lock);
+DEFINE_RAW_SPINLOCK(clockfw_lock);
static struct clk_functions *arch_clock;
Index: linux-2.6.16/arch/arm/plat-omap/dma.c
===================================================================
--- linux-2.6.16.orig/arch/arm/plat-omap/dma.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/plat-omap/dma.c 2006-10-19 16:52:29.000000000 +0200
@@ -903,7 +903,7 @@ static struct irqaction omap24xx_dma_irq
/*----------------------------------------------------------------------------*/
static struct lcd_dma_info {
- spinlock_t lock;
+ raw_spinlock_t lock;
int reserved;
void (* callback)(u16 status, void *data);
void *cb_data;
Index: linux-2.6.16/arch/arm/plat-omap/gpio.c
===================================================================
--- linux-2.6.16.orig/arch/arm/plat-omap/gpio.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/plat-omap/gpio.c 2006-10-19 16:52:29.000000000 +0200
@@ -121,7 +121,7 @@ struct gpio_bank {
u32 reserved_map;
u32 suspend_wakeup;
u32 saved_wakeup;
- spinlock_t lock;
+ raw_spinlock_t lock;
};
#define METHOD_MPUIO 0
@@ -736,7 +736,7 @@ static void gpio_irq_handler(unsigned in
desc->chip->ack(irq);
- bank = (struct gpio_bank *) desc->data;
+ bank = (struct gpio_bank *) desc->handler_data;
if (bank->method == METHOD_MPUIO)
isr_reg = bank->base + OMAP_MPUIO_GPIO_INT;
#ifdef CONFIG_ARCH_OMAP15XX
@@ -837,6 +837,8 @@ static struct irqchip mpuio_irq_chip = {
.unmask = mpuio_unmask_irq
};
+static DEFINE_IRQ_CHAINED_TYPE(gpio_irq_handler);
+
static int initialized;
static struct clk * gpio_ick;
static struct clk * gpio_fck;
Index: linux-2.6.16/arch/arm/plat-omap/mux.c
===================================================================
--- linux-2.6.16.orig/arch/arm/plat-omap/mux.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm/plat-omap/mux.c 2006-10-19 16:52:29.000000000 +0200
@@ -57,7 +57,7 @@ int __init omap_mux_register(struct pin_
*/
int __init_or_module omap_cfg_reg(const unsigned long index)
{
- static DEFINE_SPINLOCK(mux_spin_lock);
+ static DEFINE_RAW_SPINLOCK(mux_spin_lock);
unsigned long flags;
struct pin_config *cfg;
Index: linux-2.6.16/arch/arm26/boot/compressed/misc.c
===================================================================
--- linux-2.6.16.orig/arch/arm26/boot/compressed/misc.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/arm26/boot/compressed/misc.c 2006-10-19 16:52:29.000000000 +0200
@@ -184,6 +184,7 @@ static ulg free_mem_ptr_end;
#define HEAP_SIZE 0x2000
+#define ZLIB_INFLATE_NO_INFLATE_LOCK
#include "../../../../lib/inflate.c"
#ifndef STANDALONE_DEBUG
Index: linux-2.6.16/arch/i386/Kconfig
===================================================================
--- linux-2.6.16.orig/arch/i386/Kconfig 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/i386/Kconfig 2006-10-19 16:52:29.000000000 +0200
@@ -14,6 +14,10 @@ config X86_32
486, 586, Pentiums, and various instruction-set-compatible chips by
AMD, Cyrix, and others.
+config GENERIC_TIME
+ bool
+ default y
+
config SEMAPHORE_SLEEPERS
bool
default y
@@ -173,6 +177,8 @@ config HPET_EMULATE_RTC
depends on HPET_TIMER && RTC=y
default y
+source "kernel/time/Kconfig"
+
config SMP
bool "Symmetric multi-processing support"
---help---
@@ -228,6 +234,19 @@ config SCHED_SMT
source "kernel/Kconfig.preempt"
+config RWSEM_GENERIC_SPINLOCK
+ bool
+ depends on M386 || PREEMPT_RT
+ default y
+
+config ASM_SEMAPHORES
+ bool
+ default y
+
+config RWSEM_XCHGADD_ALGORITHM
+ bool
+ default y if !RWSEM_GENERIC_SPINLOCK
+
config X86_UP_APIC
bool "Local APIC support on uniprocessors"
depends on !SMP && !(X86_VISWS || X86_VOYAGER)
@@ -661,7 +680,7 @@ config BOOT_IOREMAP
config REGPARM
bool "Use register arguments (EXPERIMENTAL)"
- depends on EXPERIMENTAL
+ depends on EXPERIMENTAL && !MCOUNT
default n
help
Compile the kernel with -mregparm=3. This uses a different ABI
Index: linux-2.6.16/arch/i386/Kconfig.cpu
===================================================================
--- linux-2.6.16.orig/arch/i386/Kconfig.cpu 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/i386/Kconfig.cpu 2006-10-19 16:52:29.000000000 +0200
@@ -235,11 +235,6 @@ config RWSEM_GENERIC_SPINLOCK
depends on M386
default y
-config RWSEM_XCHGADD_ALGORITHM
- bool
- depends on !M386
- default y
-
config GENERIC_CALIBRATE_DELAY
bool
default y
Index: linux-2.6.16/arch/i386/Kconfig.debug
===================================================================
--- linux-2.6.16.orig/arch/i386/Kconfig.debug 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/i386/Kconfig.debug 2006-10-19 16:52:29.000000000 +0200
@@ -18,6 +18,7 @@ config EARLY_PRINTK
config DEBUG_STACKOVERFLOW
bool "Check for stack overflows"
depends on DEBUG_KERNEL
+ default y
help
This option will cause messages to be printed if free stack space
drops below a certain limit.
@@ -25,6 +26,7 @@ config DEBUG_STACKOVERFLOW
config DEBUG_STACK_USAGE
bool "Stack utilization instrumentation"
depends on DEBUG_KERNEL
+ default y
help
Enables the display of the minimum amount of free stack which each
task has ever had available in the sysrq-T and sysrq-P debug output.
@@ -45,6 +47,7 @@ config DEBUG_PAGEALLOC
config DEBUG_RODATA
bool "Write protect kernel read-only data structures"
depends on DEBUG_KERNEL
+ default y
help
Mark the kernel read-only data as write-protected in the pagetables,
in order to catch accidental (and incorrect) writes to such const
@@ -55,6 +58,7 @@ config DEBUG_RODATA
config 4KSTACKS
bool "Use 4Kb for kernel stacks instead of 8Kb"
depends on DEBUG_KERNEL
+ default y
help
If you say Y here the kernel will use a 4Kb stacksize for the
kernel stack attached to each process/thread. This facilitates
Index: linux-2.6.16/arch/i386/boot/compressed/misc.c
===================================================================
--- linux-2.6.16.orig/arch/i386/boot/compressed/misc.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/i386/boot/compressed/misc.c 2006-10-19 16:52:29.000000000 +0200
@@ -15,6 +15,12 @@
#include
#include
+#ifdef CONFIG_MCOUNT
+void notrace mcount(void)
+{
+}
+#endif
+
/*
* gzip declarations
*/
@@ -112,7 +118,7 @@ static long free_mem_end_ptr;
#define INPLACE_MOVE_ROUTINE 0x1000
#define LOW_BUFFER_START 0x2000
#define LOW_BUFFER_MAX 0x90000
-#define HEAP_SIZE 0x3000
+#define HEAP_SIZE 0x4000
static unsigned int low_buffer_end, low_buffer_size;
static int high_loaded =0;
static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/;
@@ -125,6 +131,7 @@ static int lines, cols;
static void * xquad_portio = NULL;
#endif
+#define ZLIB_INFLATE_NO_INFLATE_LOCK
#include "../../../../lib/inflate.c"
static void *malloc(int size)
Index: linux-2.6.16/arch/i386/kernel/Makefile
===================================================================
--- linux-2.6.16.orig/arch/i386/kernel/Makefile 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/i386/kernel/Makefile 2006-10-19 16:52:29.000000000 +0200
@@ -4,13 +4,13 @@
extra-y := head.o init_task.o vmlinux.lds
-obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \
+obj-y := process.o signal.o entry.o traps.o irq.o \
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
- quirks.o i8237.o topology.o
+ quirks.o i8237.o i8253.o tsc.o topology.o
+obj-$(CONFIG_ASM_SEMAPHORES) += semaphore.o
obj-y += cpu/
-obj-y += timers/
obj-y += acpi/
obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o
obj-$(CONFIG_MCA) += mca.o
@@ -20,6 +20,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_APM) += apm.o
obj-$(CONFIG_X86_SMP) += smp.o smpboot.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
+obj-$(CONFIG_MCOUNT) += mcount-wrapper.o
obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
@@ -37,6 +38,7 @@ obj-$(CONFIG_EFI) += efi.o efi_stub.o
obj-$(CONFIG_DOUBLEFAULT) += doublefault.o
obj-$(CONFIG_VM86) += vm86.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+obj-$(CONFIG_HPET_TIMER) += hpet.o
EXTRA_AFLAGS := -traditional
Index: linux-2.6.16/arch/i386/kernel/acpi/boot.c
===================================================================
--- linux-2.6.16.orig/arch/i386/kernel/acpi/boot.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/i386/kernel/acpi/boot.c 2006-10-19 16:52:29.000000000 +0200
@@ -574,7 +574,7 @@ static int __init acpi_parse_sbf(unsigne
}
#ifdef CONFIG_HPET_TIMER
-
+#include
static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
{
struct acpi_table_hpet *hpet_tbl;
@@ -596,6 +596,7 @@ static int __init acpi_parse_hpet(unsign
#ifdef CONFIG_X86_64
vxtime.hpet_address = hpet_tbl->addr.addrl |
((long)hpet_tbl->addr.addrh << 32);
+ hpet_address = vxtime.hpet_address;
printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
hpet_tbl->id, vxtime.hpet_address);
@@ -604,10 +605,10 @@ static int __init acpi_parse_hpet(unsign
extern unsigned long hpet_address;
hpet_address = hpet_tbl->addr.addrl;
- printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
- hpet_tbl->id, hpet_address);
}
-#endif /* X86 */
+#endif /* X86 */
+ printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
+ hpet_tbl->id, hpet_address);
return 0;
}
Index: linux-2.6.16/arch/i386/kernel/apic.c
===================================================================
--- linux-2.6.16.orig/arch/i386/kernel/apic.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/i386/kernel/apic.c 2006-10-19 16:52:29.000000000 +0200
@@ -26,6 +26,7 @@
#include
#include
#include
+#include
#include
#include
@@ -58,6 +59,23 @@ int enable_local_apic __initdata = 0; /*
*/
int apic_verbosity;
+static unsigned int calibration_result;
+
+static void lapic_next_event(unsigned long evt);
+static void lapic_timer_setup(int mode);
+
+static struct clock_event lapic_clockevent = {
+ .name = "lapic",
+ .capabilities = CLOCK_CAP_NEXTEVT | CLOCK_CAP_PROFILE |
+ CLOCK_HAS_IRQHANDLER
+#ifdef CONFIG_SMP
+ | CLOCK_CAP_UPDATE
+#endif
+ ,
+ .shift = 32,
+ .set_mode = lapic_timer_setup,
+ .set_next_event = lapic_next_event,
+};
static void apic_pm_activate(void);
@@ -883,6 +901,11 @@ fake_ioapic_page:
*/
/*
+ * FIXME: Move this to i8253.h. There is no need to keep the access to
+ * the PIT scattered all around the place -tglx
+ */
+
+/*
* The timer chip is already set up at HZ interrupts per second here,
* but we do not accept timer interrupts yet. We only allow the BP
* to calibrate.
@@ -940,13 +963,15 @@ void (*wait_timer_tick)(void) __devinitd
#define APIC_DIVISOR 16
-static void __setup_APIC_LVTT(unsigned int clocks)
+static void __setup_APIC_LVTT(unsigned int clocks, int oneshot)
{
unsigned int lvtt_value, tmp_value, ver;
int cpu = smp_processor_id();
ver = GET_APIC_VERSION(apic_read(APIC_LVR));
- lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
+ lvtt_value = LOCAL_TIMER_VECTOR;
+ if (!oneshot)
+ lvtt_value |= APIC_LVT_TIMER_PERIODIC;
if (!APIC_INTEGRATED(ver))
lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
@@ -963,31 +988,37 @@ static void __setup_APIC_LVTT(unsigned i
& ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
| APIC_TDR_DIV_16);
- apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
+ if (!oneshot)
+ apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
+}
+
+static void lapic_next_event(unsigned long evt)
+{
+ apic_write_around(APIC_TMICT, evt);
}
-static void __devinit setup_APIC_timer(unsigned int clocks)
+static void lapic_timer_setup(int mode)
{
unsigned long flags;
local_irq_save(flags);
-
- /*
- * Wait for IRQ0's slice:
- */
- wait_timer_tick();
-
- __setup_APIC_LVTT(clocks);
-
+ __setup_APIC_LVTT(calibration_result, mode == CLOCK_EVT_ONESHOT);
local_irq_restore(flags);
}
+static void __devinit setup_APIC_timer(void)
+{
+ setup_local_clockevent(&lapic_clockevent, CPU_MASK_NONE);
+}
+
/*
* In this function we calibrate APIC bus clocks to the external
* timer. Unfortunately we cannot use jiffies and the timer irq
* to calibrate, since some later bootup code depends on getting
* the first irq? Ugh.
*
+ * TODO: Fix this rather than saying "Ugh" -tglx
+ *
* We want to do the calibration only once since we
* want to have local timer irqs syncron. CPUs connected
* by the same APIC bus have the very same bus frequency.
@@ -1010,7 +1041,7 @@ static int __init calibrate_APIC_clock(v
* value into the APIC clock, we just want to get the
* counter running for calibration.
*/
- __setup_APIC_LVTT(1000000000);
+ __setup_APIC_LVTT(1000000000, 0);
/*
* The timer chip counts down to zero. Let's wait
@@ -1047,6 +1078,13 @@ static int __init calibrate_APIC_clock(v
result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
+ /* Calculate the scaled math multiplication factor */
+ lapic_clockevent.mult = div_sc32(tt1-tt2, TICK_NSEC * LOOPS);
+ lapic_clockevent.max_delta_ns =
+ clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
+ lapic_clockevent.min_delta_ns =
+ clockevent_delta2ns(0xF, &lapic_clockevent);
+
if (cpu_has_tsc)
apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
"%ld.%04ld MHz.\n",
@@ -1061,8 +1099,6 @@ static int __init calibrate_APIC_clock(v
return result;
}
-static unsigned int calibration_result;
-
void __init setup_boot_APIC_clock(void)
{
unsigned long flags;
@@ -1075,14 +1111,14 @@ void __init setup_boot_APIC_clock(void)
/*
* Now set up the timer for real.
*/
- setup_APIC_timer(calibration_result);
+ setup_APIC_timer();
local_irq_restore(flags);
}
void __devinit setup_secondary_APIC_clock(void)
{
- setup_APIC_timer(calibration_result);
+ setup_APIC_timer();
}
void disable_APIC_timer(void)
@@ -1153,6 +1189,8 @@ inline void smp_local_timer_interrupt(st
update_process_times(user_mode_vm(regs));
#endif
+ trace_special(regs->eip, 0, 0);
+
/*
* We take the 'long' return path, and there every subsystem
* grabs the apropriate locks (kernel lock/ irq lock).
@@ -1174,7 +1212,7 @@ inline void smp_local_timer_interrupt(st
* interrupt as well. Thus we cannot inline the local irq ... ]
*/
-fastcall void smp_apic_timer_interrupt(struct pt_regs *regs)
+fastcall notrace void smp_apic_timer_interrupt(struct pt_regs *regs)
{
int cpu = smp_processor_id();
@@ -1183,6 +1221,8 @@ fastcall void smp_apic_timer_interrupt(s
*/
per_cpu(irq_stat, cpu).apic_timer_irqs++;
+ trace_special(regs->eip, 0, 0);
+
/*
* NOTE! We'd better ACK the irq immediately,
* because timer handling can be slow.
@@ -1194,7 +1234,17 @@ fastcall void smp_apic_timer_interrupt(s
* interrupt lock, which is the WrongThing (tm) to do.
*/
irq_enter();
- smp_local_timer_interrupt(regs);
+ /*
+ * If the task is currently running in user mode, don't
+ * detect soft lockups. If CONFIG_DETECT_SOFTLOCKUP is not
+ * configured, this should be optimized out.
+ */
+ if (user_mode(regs))
+ touch_softlockup_watchdog();
+
+ if (lapic_clockevent.event_handler)
+ lapic_clockevent.event_handler(regs);
+
irq_exit();
}
@@ -1286,6 +1336,7 @@ fastcall void smp_error_interrupt(struct
*/
printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n",
smp_processor_id(), v , v1);
+ dump_stack();
irq_exit();
}
Index: linux-2.6.16/arch/i386/kernel/cpu/mtrr/generic.c
===================================================================
--- linux-2.6.16.orig/arch/i386/kernel/cpu/mtrr/generic.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/i386/kernel/cpu/mtrr/generic.c 2006-10-19 16:52:29.000000000 +0200
@@ -234,7 +234,7 @@ static unsigned long set_mtrr_state(u32
static unsigned long cr4 = 0;
static u32 deftype_lo, deftype_hi;
-static DEFINE_SPINLOCK(set_atomicity_lock);
+static DEFINE_RAW_SPINLOCK(set_atomicity_lock);
/*
* Since we are disabling the cache don't allow any interrupts - they
Index: linux-2.6.16/arch/i386/kernel/entry.S
===================================================================
--- linux-2.6.16.orig/arch/i386/kernel/entry.S 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/i386/kernel/entry.S 2006-10-19 16:52:29.000000000 +0200
@@ -43,6 +43,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -76,7 +77,7 @@ NT_MASK = 0x00004000
VM_MASK = 0x00020000
#ifdef CONFIG_PREEMPT
-#define preempt_stop cli
+#define preempt_stop cli; TRACE_IRQS_OFF
#else
#define preempt_stop
#define resume_kernel restore_nocheck
@@ -160,14 +161,17 @@ ENTRY(resume_userspace)
#ifdef CONFIG_PREEMPT
ENTRY(resume_kernel)
cli
+ cmpl $0, kernel_preemption
+ jz restore_nocheck
cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
jnz restore_nocheck
need_resched:
movl TI_flags(%ebp), %ecx # need_resched set ?
testb $_TIF_NEED_RESCHED, %cl
- jz restore_all
+ jz restore_nocheck
testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ?
- jz restore_all
+ jz restore_nocheck
+ cli
call preempt_schedule_irq
jmp need_resched
#endif
@@ -179,6 +183,10 @@ need_resched:
ENTRY(sysenter_entry)
movl TSS_sysenter_esp0(%esp),%esp
sysenter_past_esp:
+ /*
+ * No need to trace this one: sysenter disabled irqs and
+ * we quickly enable it without doing anything else:
+ */
sti
pushl $(__USER_DS)
pushl %ebp
@@ -200,6 +208,11 @@ sysenter_past_esp:
pushl %eax
SAVE_ALL
+#ifdef CONFIG_LATENCY_TRACE
+ pushl %edx; pushl %ecx; pushl %ebx; pushl %eax
+ call sys_call
+ popl %eax; popl %ebx; popl %ecx; popl %edx
+#endif
GET_THREAD_INFO(%ebp)
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
@@ -210,13 +223,20 @@ sysenter_past_esp:
call *sys_call_table(,%eax,4)
movl %eax,EAX(%esp)
cli
+ TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
testw $_TIF_ALLWORK_MASK, %cx
jne syscall_exit_work
+#ifdef CONFIG_LATENCY_TRACE
+ pushl %eax
+ call sys_ret
+ popl %eax
+#endif
/* if something modifies registers it must also disable sysexit */
movl EIP(%esp), %edx
movl OLDESP(%esp), %ecx
xorl %ebp,%ebp
+ TRACE_IRQS_ON
sti
sysexit
@@ -225,6 +245,11 @@ sysenter_past_esp:
ENTRY(system_call)
pushl %eax # save orig_eax
SAVE_ALL
+#ifdef CONFIG_LATENCY_TRACE
+ pushl %edx; pushl %ecx; pushl %ebx; pushl %eax
+ call sys_call
+ popl %eax; popl %ebx; popl %ecx; popl %edx
+#endif
GET_THREAD_INFO(%ebp)
# system call tracing in operation / emulation
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
@@ -239,6 +264,7 @@ syscall_exit:
cli # make sure we don't miss an interrupt
# setting need_resched or sigpending
# between sampling and the iret
+ TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
testw $_TIF_ALLWORK_MASK, %cx # current->work
jne syscall_exit_work
@@ -254,11 +280,14 @@ restore_all:
cmpl $((4 << 8) | 3), %eax
je ldt_ss # returning to user-space with LDT SS
restore_nocheck:
+ TRACE_IRQS_ON
+restore_nocheck2:
RESTORE_REGS
addl $4, %esp
1: iret
.section .fixup,"ax"
iret_exc:
+ TRACE_IRQS_ON
sti
pushl $0 # no error code
pushl $do_iret_error
@@ -282,10 +311,12 @@ ldt_ss:
* dosemu and wine happy. */
subl $8, %esp # reserve space for switch16 pointer
cli
+ TRACE_IRQS_OFF
movl %esp, %eax
/* Set up the 16bit stack frame with switch32 pointer on top,
* and a switch16 pointer on top of the current frame. */
call setup_x86_bogus_stack
+ TRACE_IRQS_ON
RESTORE_REGS
lss 20+4(%esp), %esp # switch to 16bit stack
1: iret
@@ -297,18 +328,20 @@ ldt_ss:
# perform work that needs to be done immediately before resumption
ALIGN
work_pending:
- testb $_TIF_NEED_RESCHED, %cl
+ testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED), %ecx
jz work_notifysig
work_resched:
- call schedule
- cli # make sure we don't miss an interrupt
+ cli
+ call __schedule
+ # make sure we don't miss an interrupt
# setting need_resched or sigpending
# between sampling and the iret
+ TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
# than syscall tracing?
jz restore_all
- testb $_TIF_NEED_RESCHED, %cl
+ testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED), %ecx
jnz work_resched
work_notifysig: # deal with pending signals and
@@ -353,6 +386,7 @@ syscall_trace_entry:
syscall_exit_work:
testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
jz work_pending
+ TRACE_IRQS_ON
sti # could let do_syscall_trace() call
# schedule() instead
movl %esp, %eax
@@ -414,9 +448,14 @@ ENTRY(irq_entries_start)
vector=vector+1
.endr
+/*
+ * the CPU automatically disables interrupts when executing an IRQ vector,
+ * so IRQ-flags tracing has to follow that:
+ */
ALIGN
common_interrupt:
SAVE_ALL
+ TRACE_IRQS_OFF
movl %esp,%eax
call do_IRQ
jmp ret_from_intr
@@ -425,6 +464,7 @@ common_interrupt:
ENTRY(name) \
pushl $nr-256; \
SAVE_ALL \
+ TRACE_IRQS_OFF \
movl %esp,%eax; \
call smp_/**/name; \
jmp ret_from_intr;
@@ -554,7 +594,8 @@ nmi_stack_correct:
xorl %edx,%edx # zero error code
movl %esp,%eax # pt_regs pointer
call do_nmi
- jmp restore_all
+ jmp restore_nocheck2
+# jmp restore_all
nmi_stack_fixup:
FIX_STACK(12,nmi_stack_correct, 1)
Index: linux-2.6.16/arch/i386/kernel/head.S
===================================================================
--- linux-2.6.16.orig/arch/i386/kernel/head.S 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/i386/kernel/head.S 2006-10-19 16:52:29.000000000 +0200
@@ -404,6 +404,7 @@ ignore_int:
call printk
#endif
addl $(5*4),%esp
+ call dump_stack
popl %ds
popl %es
popl %edx
Index: linux-2.6.16/arch/i386/kernel/hpet.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.16/arch/i386/kernel/hpet.c 2006-10-19 16:52:29.000000000 +0200
@@ -0,0 +1,67 @@
+#include
+#include
+#include
+#include
+
+#include
+#include
+
+#define HPET_MASK 0xFFFFFFFF
+#define HPET_SHIFT 22
+
+/* FSEC = 10^-15 NSEC = 10^-9 */
+#define FSEC_PER_NSEC 1000000
+
+static void *hpet_ptr;
+
+static cycle_t read_hpet(void)
+{
+ return (cycle_t)readl(hpet_ptr);
+}
+
+static struct clocksource clocksource_hpet = {
+ .name = "hpet",
+ .rating = 250,
+ .read = read_hpet,
+ .mask = (cycle_t)HPET_MASK,
+ .mult = 0, /* set below */
+ .shift = HPET_SHIFT,
+ .is_continuous = 1,
+};
+
+static int __init init_hpet_clocksource(void)
+{
+ unsigned long hpet_period;
+ void __iomem* hpet_base;
+ u64 tmp;
+
+ if (!hpet_address)
+ return -ENODEV;
+
+ /* calculate the hpet address: */
+ hpet_base =
+ (void __iomem*)ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
+ hpet_ptr = hpet_base + HPET_COUNTER;
+
+ /* calculate the frequency: */
+ hpet_period = readl(hpet_base + HPET_PERIOD);
+
+ /*
+ * hpet period is in femto seconds per cycle
+ * so we need to convert this to ns/cyc units
+ * aproximated by mult/2^shift
+ *
+ * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift
+ * fsec/cyc * 1ns/1000000fsec * 2^shift = mult
+ * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult
+ * (fsec/cyc << shift)/1000000 = mult
+ * (hpet_period << shift)/FSEC_PER_NSEC = mult
+ */
+ tmp = (u64)hpet_period << HPET_SHIFT;
+ do_div(tmp, FSEC_PER_NSEC);
+ clocksource_hpet.mult = (u32)tmp;
+
+ return register_clocksource(&clocksource_hpet);
+}
+
+module_init(init_hpet_clocksource);
Index: linux-2.6.16/arch/i386/kernel/i386_ksyms.c
===================================================================
--- linux-2.6.16.orig/arch/i386/kernel/i386_ksyms.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/i386/kernel/i386_ksyms.c 2006-10-19 16:52:29.000000000 +0200
@@ -3,10 +3,12 @@
#include
#include
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_failed_interruptible);
-EXPORT_SYMBOL(__down_failed_trylock);
-EXPORT_SYMBOL(__up_wakeup);
+#ifdef CONFIG_ASM_SEMAPHORES
+EXPORT_SYMBOL(__compat_down_failed);
+EXPORT_SYMBOL(__compat_down_failed_interruptible);
+EXPORT_SYMBOL(__compat_down_failed_trylock);
+EXPORT_SYMBOL(__compat_up_wakeup);
+#endif
/* Networking helper routines. */
EXPORT_SYMBOL(csum_partial_copy_generic);
@@ -22,7 +24,7 @@ EXPORT_SYMBOL(__put_user_8);
EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(strstr);
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && defined(CONFIG_ASM_SEMAPHORES)
extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
EXPORT_SYMBOL(__write_lock_failed);
Index: linux-2.6.16/arch/i386/kernel/i8253.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.16/arch/i386/kernel/i8253.c 2006-10-19 16:52:29.000000000 +0200
@@ -0,0 +1,135 @@
+/*
+ * i8253.c 8253/PIT functions
+ *
+ */
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+
+#include "io_ports.h"
+
+DEFINE_RAW_SPINLOCK(i8253_lock);
+EXPORT_SYMBOL(i8253_lock);
+
+static void init_pit_timer(int mode)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8253_lock, flags);
+
+ if (mode != CLOCK_EVT_ONESHOT) {
+ /* binary, mode 2, LSB/MSB, ch 0 */
+ outb_p(0x34, PIT_MODE);
+ udelay(10);
+ outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
+ outb(LATCH >> 8 , PIT_CH0); /* MSB */
+ } else {
+ /* One shot setup */
+ outb_p(0x38, PIT_MODE);
+ udelay(10);
+ }
+
+ spin_unlock_irqrestore(&i8253_lock, flags);
+}
+
+static void pit_next_event(unsigned long evt)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8253_lock, flags);
+ outb_p(evt & 0xff , PIT_CH0); /* LSB */
+ outb(evt >> 8 , PIT_CH0); /* MSB */
+ spin_unlock_irqrestore(&i8253_lock, flags);
+}
+
+static struct clock_event pit_clockevent = {
+ .name = "pit",
+ .capabilities = CLOCK_CAP_TICK
+#ifndef CONFIG_SMP
+ | CLOCK_CAP_NEXTEVT | CLOCK_CAP_PROFILE |
+ CLOCK_CAP_UPDATE
+#endif
+ ,
+ .set_mode = init_pit_timer,
+ .set_next_event = pit_next_event,
+ .start_event = io_apic_timer_ack,
+ .end_event = mca_timer_ack,
+ .shift = 32,
+ .irq = 0,
+};
+
+void setup_pit_timer(void)
+{
+ pit_clockevent.mult = div_sc32(CLOCK_TICK_RATE, NSEC_PER_SEC);
+ pit_clockevent.max_delta_ns =
+ clockevent_delta2ns(0x7FFF, &pit_clockevent);
+ pit_clockevent.min_delta_ns =
+ clockevent_delta2ns(0xF, &pit_clockevent);
+ setup_global_clockevent(&pit_clockevent, CPU_MASK_NONE);
+}
+
+/*
+ * Since the PIT overflows every tick, its not very useful
+ * to just read by itself. So use jiffies to emulate a free
+ * running counter:
+ */
+static cycle_t pit_read(void)
+{
+ unsigned long flags, seq;
+ int count;
+ u64 jifs;
+
+ do {
+ seq = read_seqbegin(&xtime_lock);
+
+ spin_lock_irqsave(&i8253_lock, flags);
+ outb_p(0x00, PIT_MODE); /* latch the count ASAP */
+ count = inb_p(PIT_CH0); /* read the latched count */
+ count |= inb_p(PIT_CH0) << 8;
+
+ /* VIA686a test code... reset the latch if count > max + 1 */
+ if (count > LATCH) {
+ outb_p(0x34, PIT_MODE);
+ outb_p(LATCH & 0xff, PIT_CH0);
+ outb(LATCH >> 8, PIT_CH0);
+ count = LATCH - 1;
+ }
+ spin_unlock_irqrestore(&i8253_lock, flags);
+
+ jifs = jiffies_64;
+ } while (read_seqretry(&xtime_lock, seq));
+
+ jifs -= INITIAL_JIFFIES;
+ count = (LATCH-1) - count;
+
+ return (cycle_t)(jifs * LATCH) + count;
+}
+
+static struct clocksource clocksource_pit = {
+ .name = "pit",
+ .rating = 110,
+ .read = pit_read,
+ .mask = (cycle_t)-1,
+ .mult = 0,
+ .shift = 20,
+};
+
+static int __init init_pit_clocksource(void)
+{
+ if (num_possible_cpus() > 4) /* PIT does not scale! */
+ return 0;
+
+ clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20);
+ return register_clocksource(&clocksource_pit);
+}
+module_init(init_pit_clocksource);
Index: linux-2.6.16/arch/i386/kernel/i8259.c
===================================================================
--- linux-2.6.16.orig/arch/i386/kernel/i8259.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/i386/kernel/i8259.c 2006-10-19 16:52:29.000000000 +0200
@@ -35,7 +35,7 @@
* moves to arch independent land
*/
-DEFINE_SPINLOCK(i8259A_lock);
+DEFINE_RAW_SPINLOCK(i8259A_lock);
static void end_8259A_irq (unsigned int irq)
{
@@ -366,7 +366,7 @@ static irqreturn_t math_error_irq(int cp
* New motherboards sometimes make IRQ 13 be a PCI interrupt,
* so allow interrupt sharing.
*/
-static struct irqaction fpu_irq = { math_error_irq, 0, CPU_MASK_NONE, "fpu", NULL, NULL };
+static struct irqaction fpu_irq = { math_error_irq, SA_NODELAY, CPU_MASK_NONE, "fpu", NULL, NULL };
void __init init_ISA_irqs (void)
{
@@ -422,12 +422,6 @@ void __init init_IRQ(void)
intr_init_hook();
/*
- * Set the clock to HZ Hz, we already have a valid
- * vector now:
- */
- setup_pit_timer();
-
- /*
* External FPU? Set up irq13 if so, for
* original braindamaged IBM FERR coupling.
*/
Index: linux-2.6.16/arch/i386/kernel/io_apic.c
===================================================================
--- linux-2.6.16.orig/arch/i386/kernel/io_apic.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/i386/kernel/io_apic.c 2006-10-19 16:52:29.000000000 +0200
@@ -49,7 +49,7 @@ atomic_t irq_mis_count;
/* Where if anywhere is the i8259 connect in external int mode */
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
-static DEFINE_SPINLOCK(ioapic_lock);
+static DEFINE_RAW_SPINLOCK(ioapic_lock);
int timer_over_8254 __initdata = 1;
@@ -92,6 +92,27 @@ int vector_irq[NR_VECTORS] __read_mostly
#define vector_to_irq(vector) (vector)
#endif
+static int timer_ack;
+
+void io_apic_timer_ack(void *priv)
+{
+ unsigned long flags;
+
+ if (timer_ack) {
+ /*
+ * Subtle, when I/O APICs are used we have to ack timer IRQ
+ * manually to reset the IRR bit for do_slow_gettimeoffset().
+ * This will also deassert NMI lines for the watchdog if run
+ * on an 82489DX-based system.
+ */
+ spin_lock_irqsave(&i8259A_lock, flags);
+ outb(0x0c, PIC_MASTER_OCW3);
+ /* Ack the IRQ; AEOI will end it automatically. */
+ inb(PIC_MASTER_POLL);
+ spin_unlock_irqrestore(&i8259A_lock, flags);
+ }
+}
+
/*
* The common case is 1:1 IRQ<->pin mappings. Sometimes there are
* shared ISA-space IRQs, so we have to support them. We are super
@@ -135,6 +156,105 @@ static void __init replace_pin_at_irq(un
}
}
+//#define IOAPIC_CACHE
+
+#ifdef IOAPIC_CACHE
+# define MAX_IOAPIC_CACHE 512
+
+/*
+ * Cache register values:
+ */
+static unsigned int io_apic_cache[MAX_IO_APICS][MAX_IOAPIC_CACHE]
+ ____cacheline_aligned_in_smp;
+#endif
+
+inline unsigned int __raw_io_apic_read(unsigned int apic, unsigned int reg)
+{
+ *IO_APIC_BASE(apic) = reg;
+ return *(IO_APIC_BASE(apic)+4);
+}
+
+unsigned int raw_io_apic_read(unsigned int apic, unsigned int reg)
+{
+ unsigned int val = __raw_io_apic_read(apic, reg);
+
+#ifdef IOAPIC_CACHE
+ io_apic_cache[apic][reg] = val;
+#endif
+ return val;
+}
+
+unsigned int io_apic_read(unsigned int apic, unsigned int reg)
+{
+#ifdef IOAPIC_CACHE
+ if (unlikely(reg >= MAX_IOAPIC_CACHE)) {
+ static int once = 1;
+
+ if (once) {
+ once = 0;
+ printk("WARNING: ioapic register cache overflow: %d.\n",
+ reg);
+ dump_stack();
+ }
+ return __raw_io_apic_read(apic, reg);
+ }
+ if (io_apic_cache[apic][reg] && !sis_apic_bug)
+ return io_apic_cache[apic][reg];
+#endif
+ return raw_io_apic_read(apic, reg);
+}
+
+void io_apic_write(unsigned int apic, unsigned int reg, unsigned int val)
+{
+#ifdef IOAPIC_CACHE
+ if (unlikely(reg >= MAX_IOAPIC_CACHE)) {
+ static int once = 1;
+
+ if (once) {
+ once = 0;
+ printk("WARNING: ioapic register cache overflow: %d.\n",
+ reg);
+ dump_stack();
+ }
+ } else
+ io_apic_cache[apic][reg] = val;
+#endif
+ *IO_APIC_BASE(apic) = reg;
+ *(IO_APIC_BASE(apic)+4) = val;
+}
+
+/*
+ * Some systems need a POST flush or else level-triggered interrupts
+ * generate lots of spurious interrupts due to the POST-ed write not
+ * reaching the IOAPIC before the IRQ is ACK-ed in the local APIC.
+ */
+#ifdef CONFIG_SMP
+# define IOAPIC_POSTFLUSH
+#endif
+
+/*
+ * Re-write a value: to be used for read-modify-write
+ * cycles where the read already set up the index register.
+ *
+ * Older SiS APIC requires we rewrite the index regiser
+ */
+void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val)
+{
+#ifdef IOAPIC_CACHE
+ io_apic_cache[apic][reg] = val;
+#endif
+ if (unlikely(sis_apic_bug))
+ *IO_APIC_BASE(apic) = reg;
+ *(IO_APIC_BASE(apic)+4) = val;
+#ifndef IOAPIC_POSTFLUSH
+ if (unlikely(sis_apic_bug))
+#endif
+ /*
+ * Force POST flush by reading:
+ */
+ val = *(IO_APIC_BASE(apic)+4);
+}
+
static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable)
{
struct irq_pin_list *entry = irq_2_pin + irq;
@@ -166,18 +286,6 @@ static void __unmask_IO_APIC_irq (unsign
__modify_IO_APIC_irq(irq, 0, 0x00010000);
}
-/* mask = 1, trigger = 0 */
-static void __mask_and_edge_IO_APIC_irq (unsigned int irq)
-{
- __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
-}
-
-/* mask = 0, trigger = 1 */
-static void __unmask_and_level_IO_APIC_irq (unsigned int irq)
-{
- __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
-}
-
static void mask_IO_APIC_irq (unsigned int irq)
{
unsigned long flags;
@@ -1432,8 +1540,8 @@ void __init print_IO_APIC(void)
struct IO_APIC_route_entry entry;
spin_lock_irqsave(&ioapic_lock, flags);
- *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
- *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
+ *(((int *)&entry)+0) = raw_io_apic_read(apic, 0x10+i*2);
+ *(((int *)&entry)+1) = raw_io_apic_read(apic, 0x11+i*2);
spin_unlock_irqrestore(&ioapic_lock, flags);
printk(KERN_DEBUG " %02x %03X %02X ",
@@ -1479,7 +1587,7 @@ void __init print_IO_APIC(void)
return;
}
-#if 0
+#if 1
static void print_APIC_bitfield (int base)
{
@@ -1879,7 +1987,7 @@ static int __init timer_irq_works(void)
* might have cached one ExtINT interrupt. Finally, at
* least one tick may be lost due to delays.
*/
- if (jiffies - t1 > 4)
+ if (jiffies - t1 > 4 && jiffies - t1 < 16)
return 1;
return 0;
@@ -1932,9 +2040,11 @@ static unsigned int startup_edge_ioapic_
static void ack_edge_ioapic_irq(unsigned int irq)
{
move_irq(irq);
+#if 0
if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
== (IRQ_PENDING | IRQ_DISABLED))
mask_IO_APIC_irq(irq);
+#endif
ack_APIC_irq();
}
@@ -1959,6 +2069,30 @@ static unsigned int startup_level_ioapic
return 0; /* don't check for pending */
}
+#ifdef CONFIG_PREEMPT_HARDIRQS
+
+/*
+ * in the PREEMPT_HARDIRQS case we dont want to keep the local
+ * APIC unacked, because the prevents further interrupts from
+ * being handled - and with IRQ threads being delayed arbitrarily,
+ * that's unacceptable. So we first mask the IRQ, then ack it.
+ * The hardirq thread will then unmask it.
+ */
+static void mask_and_ack_level_ioapic_irq(unsigned int irq)
+{
+ move_irq(irq);
+ mask_IO_APIC_irq(irq);
+ ack_APIC_irq();
+}
+
+#else
+
+static void mask_and_ack_level_ioapic_irq(unsigned int irq)
+{
+}
+
+#endif
+
static void end_level_ioapic_irq (unsigned int irq)
{
unsigned long v;
@@ -1993,8 +2127,10 @@ static void end_level_ioapic_irq (unsign
if (!(v & (1 << (i & 0x1f)))) {
atomic_inc(&irq_mis_count);
spin_lock(&ioapic_lock);
- __mask_and_edge_IO_APIC_irq(irq);
- __unmask_and_level_IO_APIC_irq(irq);
+ /* mask = 1, trigger = 0 */
+ __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
+ /* mask = 0, trigger = 1 */
+ __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
spin_unlock(&ioapic_lock);
}
}
@@ -2022,6 +2158,13 @@ static unsigned int startup_level_ioapic
return startup_level_ioapic_irq (irq);
}
+static void mask_and_ack_level_ioapic_vector (unsigned int vector)
+{
+ int irq = vector_to_irq(vector);
+
+ mask_and_ack_level_ioapic_irq(irq);
+}
+
static void end_level_ioapic_vector (unsigned int vector)
{
int irq = vector_to_irq(vector);
Index: linux-2.6.16/arch/i386/kernel/irq.c
===================================================================
--- linux-2.6.16.orig/arch/i386/kernel/irq.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/i386/kernel/irq.c 2006-10-19 16:52:29.000000000 +0200
@@ -51,7 +51,7 @@ static union irq_ctx *softirq_ctx[NR_CPU
* SMP cross-CPU interrupts have their own specific
* handlers).
*/
-fastcall unsigned int do_IRQ(struct pt_regs *regs)
+fastcall notrace unsigned int do_IRQ(struct pt_regs *regs)
{
/* high bits used in ret_from_ code */
int irq = regs->orig_eax & 0xff;
@@ -59,8 +59,12 @@ fastcall unsigned int do_IRQ(struct pt_r
union irq_ctx *curctx, *irqctx;
u32 *isp;
#endif
-
irq_enter();
+#ifdef CONFIG_LATENCY_TRACE
+ if (irq == trace_user_trigger_irq)
+ user_trace_start();
+#endif
+ trace_special(regs->eip, irq, 0);
#ifdef CONFIG_DEBUG_STACKOVERFLOW
/* Debugging check for stack overflow: is there less than 1KB free? */
{
@@ -69,7 +73,7 @@ fastcall unsigned int do_IRQ(struct pt_r
__asm__ __volatile__("andl %%esp,%0" :
"=r" (esp) : "0" (THREAD_SIZE - 1));
if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) {
- printk("do_IRQ: stack overflow: %ld\n",
+ printk("BUG: do_IRQ: stack overflow: %ld\n",
esp - sizeof(struct thread_info));
dump_stack();
}
@@ -224,8 +228,10 @@ int show_interrupts(struct seq_file *p,
}
if (i < NR_IRQS) {
- spin_lock_irqsave(&irq_desc[i].lock, flags);
- action = irq_desc[i].action;
+ irq_desc_t *desc = irq_desc + i;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ action = desc->action;
if (!action)
goto skip;
seq_printf(p, "%3d: ",i);
@@ -235,15 +241,27 @@ int show_interrupts(struct seq_file *p,
for_each_online_cpu(j)
seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
#endif
- seq_printf(p, " %14s", irq_desc[i].handler->typename);
+ seq_printf(p, " %-14s", desc->handler->typename);
+#define F(x,c) ((desc->status & x) ? c : '.')
+ seq_printf(p, " [%c%c%c%c%c%c%c%c%c/",
+ F(IRQ_INPROGRESS, 'I'),
+ F(IRQ_DISABLED, 'D'),
+ F(IRQ_PENDING, 'P'),
+ F(IRQ_REPLAY, 'R'),
+ F(IRQ_AUTODETECT, 'A'),
+ F(IRQ_WAITING, 'W'),
+ F(IRQ_LEVEL, 'L'),
+ F(IRQ_MASKED, 'M'),
+ F(IRQ_NODELAY, 'N'));
+#undef F
+ seq_printf(p, "%3d]", desc->irqs_unhandled);
seq_printf(p, " %s", action->name);
-
for (action=action->next; action; action = action->next)
seq_printf(p, ", %s", action->name);
seq_putc(p, '\n');
skip:
- spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+ spin_unlock_irqrestore(&desc->lock, flags);
} else if (i == NR_IRQS) {
seq_printf(p, "NMI: ");
for_each_online_cpu(j)
Index: linux-2.6.16/arch/i386/kernel/mca.c
===================================================================
--- linux-2.6.16.orig/arch/i386/kernel/mca.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/i386/kernel/mca.c 2006-10-19 16:52:29.000000000 +0200
@@ -472,3 +472,22 @@ void mca_handle_nmi(void)
mca_nmi_hook();
} /* mca_handle_nmi */
+
+void mca_timer_ack(void *priv)
+{
+ int irq;
+
+ if (MCA_bus) {
+ /* The PS/2 uses level-triggered interrupts. You can't
+ turn them off, nor would you want to (any attempt to
+ enable edge-triggered interrupts usually gets intercepted by a
+ special hardware circuit). Hence we have to acknowledge
+ the timer interrupt. Through some incredibly stupid
+ design idea, the reset for IRQ 0 is done by setting the
+ high bit of the PPI port B (0x61). Note that some PS/2s,
+ notably the 55SX, work fine if this is removed. */
+
+ irq = inb_p( 0x61 ); /* read the current state */
+ outb_p( irq|0x80, 0x61 ); /* reset the IRQ */
+ }
+}
Index: linux-2.6.16/arch/i386/kernel/mcount-wrapper.S
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.16/arch/i386/kernel/mcount-wrapper.S 2006-10-19 16:52:29.000000000 +0200
@@ -0,0 +1,27 @@
+/*
+ * linux/arch/i386/mcount-wrapper.S
+ *
+ * Copyright (C) 2004 Ingo Molnar
+ */
+
+.globl mcount
+mcount:
+
+ cmpl $0, mcount_enabled
+ jz out
+
+ push %ebp
+ mov %esp, %ebp
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+
+ call __mcount
+
+ popl %edx
+ popl %ecx
+ popl %eax
+ popl %ebp
+out:
+ ret
+
Index: linux-2.6.16/arch/i386/kernel/microcode.c
===================================================================
--- linux-2.6.16.orig/arch/i386/kernel/microcode.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/i386/kernel/microcode.c 2006-10-19 16:52:29.000000000 +0200
@@ -111,7 +111,7 @@ MODULE_LICENSE("GPL");
#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
/* serialize access to the physical write to MSR 0x79 */
-static DEFINE_SPINLOCK(microcode_update_lock);
+static DEFINE_RAW_SPINLOCK(microcode_update_lock);
/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
static DECLARE_MUTEX(microcode_sem);
Index: linux-2.6.16/arch/i386/kernel/nmi.c
===================================================================
--- linux-2.6.16.orig/arch/i386/kernel/nmi.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/i386/kernel/nmi.c 2006-10-19 16:52:29.000000000 +0200
@@ -34,7 +34,7 @@
unsigned int nmi_watchdog = NMI_NONE;
extern int unknown_nmi_panic;
-static unsigned int nmi_hz = HZ;
+static unsigned int nmi_hz = 1000;
static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
static unsigned int nmi_p4_cccr_val;
extern void show_registers(struct pt_regs *regs);
@@ -141,7 +141,7 @@ static int __init check_nmi_watchdog(voi
for_each_cpu(cpu)
prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count;
local_irq_enable();
- mdelay((10*1000)/nmi_hz); // wait 10 ticks
+ mdelay((100*1000)/nmi_hz); // wait 100 ticks
for (cpu = 0; cpu < NR_CPUS; cpu++) {
#ifdef CONFIG_SMP
@@ -168,7 +168,7 @@ static int __init check_nmi_watchdog(voi
/* now that we know it works we can reduce NMI frequency to
something more reasonable; makes a difference in some configs */
if (nmi_watchdog == NMI_LOCAL_APIC)
- nmi_hz = 1;
+ nmi_hz = 10000;
kfree(prev_nmi_count);
return 0;
@@ -521,9 +521,34 @@ void touch_nmi_watchdog (void)
extern void die_nmi(struct pt_regs *, const char *msg);
-void nmi_watchdog_tick (struct pt_regs * regs)
+int nmi_show_regs[NR_CPUS];
+
+void nmi_show_all_regs(void)
{
+ int i;
+
+ if (nmi_watchdog == NMI_NONE)
+ return;
+ if (system_state != SYSTEM_RUNNING) {
+ printk("nmi_show_all_regs(): system state %d, not doing.\n",
+ system_state);
+ return;
+ }
+ printk("nmi_show_all_regs(): start on CPU#%d.\n",
+ raw_smp_processor_id());
+ dump_stack();
+
+ for_each_online_cpu(i)
+ nmi_show_regs[i] = 1;
+ for_each_online_cpu(i)
+ while (nmi_show_regs[i] == 1)
+ barrier();
+}
+
+static DEFINE_RAW_SPINLOCK(nmi_print_lock);
+void notrace nmi_watchdog_tick (struct pt_regs * regs)
+{
/*
* Since current_thread_info()-> is always on the stack, and we
* always switch the stack NMI-atomically, it's safe to use
@@ -531,7 +556,16 @@ void nmi_watchdog_tick (struct pt_regs *
*/
int sum, cpu = smp_processor_id();
- sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
+ sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_irqs(0);
+
+ profile_tick(CPU_PROFILING, regs);
+ if (nmi_show_regs[cpu]) {
+ nmi_show_regs[cpu] = 0;
+ spin_lock(&nmi_print_lock);
+ printk("NMI show regs on CPU#%d:\n", cpu);
+ show_regs(regs);
+ spin_unlock(&nmi_print_lock);
+ }
if (last_irq_sums[cpu] == sum) {
/*
@@ -539,15 +573,29 @@ void nmi_watchdog_tick (struct pt_regs *
* wait a few IRQs (5 seconds) before doing the oops ...
*/
alert_counter[cpu]++;
- if (alert_counter[cpu] == 5*nmi_hz)
- /*
- * die_nmi will return ONLY if NOTIFY_STOP happens..
- */
+ if (alert_counter[cpu] && !(alert_counter[cpu] % (5*nmi_hz))) {
+ int i;
+
+ bust_spinlocks(1);
+ spin_lock(&nmi_print_lock);
+ printk("NMI watchdog detected lockup on CPU#%d (%d/%d)\n", cpu, alert_counter[cpu], 5*nmi_hz);
+ show_regs(regs);
+ spin_unlock(&nmi_print_lock);
+
+ for_each_online_cpu(i)
+ if (i != cpu)
+ nmi_show_regs[i] = 1;
+ for_each_online_cpu(i)
+ while (nmi_show_regs[i] == 1)
+ barrier();
+
die_nmi(regs, "NMI Watchdog detected LOCKUP");
+ }
} else {
last_irq_sums[cpu] = sum;
alert_counter[cpu] = 0;
}
+
if (nmi_perfctr_msr) {
if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) {
/*
Index: linux-2.6.16/arch/i386/kernel/numaq.c
===================================================================
--- linux-2.6.16.orig/arch/i386/kernel/numaq.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/i386/kernel/numaq.c 2006-10-19 16:52:29.000000000 +0200
@@ -79,10 +79,12 @@ int __init get_memcfg_numaq(void)
return 1;
}
-static int __init numaq_dsc_disable(void)
+static int __init numaq_tsc_disable(void)
{
- printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
- tsc_disable = 1;
+ if (num_online_nodes() > 1) {
+ printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
+ tsc_disable = 1;
+ }
return 0;
}
-core_initcall(numaq_dsc_disable);
+arch_initcall(numaq_tsc_disable);
Index: linux-2.6.16/arch/i386/kernel/process.c
===================================================================
--- linux-2.6.16.orig/arch/i386/kernel/process.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/i386/kernel/process.c 2006-10-19 16:52:29.000000000 +0200
@@ -105,16 +105,16 @@ void default_idle(void)
if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
clear_thread_flag(TIF_POLLING_NRFLAG);
smp_mb__after_clear_bit();
- while (!need_resched()) {
+ while (!need_resched() && !need_resched_delayed()) {
local_irq_disable();
- if (!need_resched())
+ if (!need_resched() && !need_resched_delayed())
safe_halt();
else
local_irq_enable();
}
set_thread_flag(TIF_POLLING_NRFLAG);
} else {
- while (!need_resched())
+ while (!need_resched() && !need_resched_delayed())
cpu_relax();
}
}
@@ -179,7 +179,9 @@ void cpu_idle(void)
/* endless idle loop with no priority at all */
while (1) {
- while (!need_resched()) {
+ BUG_ON(irqs_disabled());
+
+ while (!need_resched() && !need_resched_delayed()) {
void (*idle)(void);
if (__get_cpu_var(cpu_idle_state))
@@ -197,9 +199,11 @@ void cpu_idle(void)
__get_cpu_var(irq_stat).idle_timestamp = jiffies;
idle();
}
- preempt_enable_no_resched();
- schedule();
+ local_irq_disable();
+ __preempt_enable_no_resched();
+ __schedule();
preempt_disable();
+ local_irq_enable();
}
}
@@ -242,10 +246,10 @@ static void mwait_idle(void)
{
local_irq_enable();
- while (!need_resched()) {
+ while (!need_resched() && !need_resched_delayed()) {
__monitor((void *)¤t_thread_info()->flags, 0, 0);
smp_mb();
- if (need_resched())
+ if (need_resched() || need_resched_delayed())
break;
__mwait(0, 0);
}
@@ -373,11 +377,16 @@ void exit_thread(void)
/* The process may have allocated an io port bitmap... nuke it. */
if (unlikely(NULL != t->io_bitmap_ptr)) {
- int cpu = get_cpu();
- struct tss_struct *tss = &per_cpu(init_tss, cpu);
+ int cpu;
+ struct tss_struct *tss;
+ void *io_bitmap_ptr = t->io_bitmap_ptr;
- kfree(t->io_bitmap_ptr);
t->io_bitmap_ptr = NULL;
+ mb();
+ kfree(io_bitmap_ptr);
+
+ cpu = get_cpu();
+ tss = &per_cpu(init_tss, cpu);
/*
* Careful, clear this in the TSS too:
*/
Index: linux-2.6.16/arch/i386/kernel/semaphore.c
===================================================================
--- linux-2.6.16.orig/arch/i386/kernel/semaphore.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/i386/kernel/semaphore.c 2006-10-19 16:52:29.000000000 +0200
@@ -13,6 +13,7 @@
* rw semaphores implemented November 1999 by Benjamin LaHaise
*/
#include
+#include
#include
/*
@@ -28,15 +29,15 @@
asm(
".section .sched.text\n"
".align 4\n"
-".globl __down_failed\n"
-"__down_failed:\n\t"
+".globl __compat_down_failed\n"
+"__compat_down_failed:\n\t"
#if defined(CONFIG_FRAME_POINTER)
"pushl %ebp\n\t"
"movl %esp,%ebp\n\t"
#endif
"pushl %edx\n\t"
"pushl %ecx\n\t"
- "call __down\n\t"
+ "call __compat_down\n\t"
"popl %ecx\n\t"
"popl %edx\n\t"
#if defined(CONFIG_FRAME_POINTER)
@@ -49,15 +50,15 @@ asm(
asm(
".section .sched.text\n"
".align 4\n"
-".globl __down_failed_interruptible\n"
-"__down_failed_interruptible:\n\t"
+".globl __compat_down_failed_interruptible\n"
+"__compat_down_failed_interruptible:\n\t"
#if defined(CONFIG_FRAME_POINTER)
"pushl %ebp\n\t"
"movl %esp,%ebp\n\t"
#endif
"pushl %edx\n\t"
"pushl %ecx\n\t"
- "call __down_interruptible\n\t"
+ "call __compat_down_interruptible\n\t"
"popl %ecx\n\t"
"popl %edx\n\t"
#if defined(CONFIG_FRAME_POINTER)
@@ -70,15 +71,15 @@ asm(
asm(
".section .sched.text\n"
".align 4\n"
-".globl __down_failed_trylock\n"
-"__down_failed_trylock:\n\t"
+".globl __compat_down_failed_trylock\n"
+"__compat_down_failed_trylock:\n\t"
#if defined(CONFIG_FRAME_POINTER)
"pushl %ebp\n\t"
"movl %esp,%ebp\n\t"
#endif
"pushl %edx\n\t"
"pushl %ecx\n\t"
- "call __down_trylock\n\t"
+ "call __compat_down_trylock\n\t"
"popl %ecx\n\t"
"popl %edx\n\t"
#if defined(CONFIG_FRAME_POINTER)
@@ -91,45 +92,13 @@ asm(
asm(
".section .sched.text\n"
".align 4\n"
-".globl __up_wakeup\n"
-"__up_wakeup:\n\t"
+".globl __compat_up_wakeup\n"
+"__compat_up_wakeup:\n\t"
"pushl %edx\n\t"
"pushl %ecx\n\t"
- "call __up\n\t"
+ "call __compat_up\n\t"
"popl %ecx\n\t"
"popl %edx\n\t"
"ret"
);
-/*
- * rw spinlock fallbacks
- */
-#if defined(CONFIG_SMP)
-asm(
-".section .sched.text\n"
-".align 4\n"
-".globl __write_lock_failed\n"
-"__write_lock_failed:\n\t"
- LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax)\n"
-"1: rep; nop\n\t"
- "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t"
- "jne 1b\n\t"
- LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t"
- "jnz __write_lock_failed\n\t"
- "ret"
-);
-
-asm(
-".section .sched.text\n"
-".align 4\n"
-".globl __read_lock_failed\n"
-"__read_lock_failed:\n\t"
- LOCK "incl (%eax)\n"
-"1: rep; nop\n\t"
- "cmpl $1,(%eax)\n\t"
- "js 1b\n\t"
- LOCK "decl (%eax)\n\t"
- "js __read_lock_failed\n\t"
- "ret"
-);
-#endif
Index: linux-2.6.16/arch/i386/kernel/setup.c
===================================================================
--- linux-2.6.16.orig/arch/i386/kernel/setup.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/i386/kernel/setup.c 2006-10-19 16:52:29.000000000 +0200
@@ -1632,6 +1632,7 @@ void __init setup_arch(char **cmdline_p)
conswitchp = &dummy_con;
#endif
#endif
+ tsc_init();
}
#include "setup_arch_post.h"
Index: linux-2.6.16/arch/i386/kernel/signal.c
===================================================================
--- linux-2.6.16.orig/arch/i386/kernel/signal.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/i386/kernel/signal.c 2006-10-19 16:52:29.000000000 +0200
@@ -531,6 +531,13 @@ handle_signal(unsigned long sig, siginfo
}
}
+#ifdef CONFIG_PREEMPT_RT
+ /*
+ * Fully-preemptible kernel does not need interrupts disabled:
+ */
+ local_irq_enable();
+ preempt_check_resched();
+#endif
/*
* If TF is set due to a debugger (PT_DTRACE), clear the TF flag so
* that register information in the sigcontext is correct.
@@ -571,6 +578,13 @@ static void fastcall do_signal(struct pt
struct k_sigaction ka;
sigset_t *oldset;
+#ifdef CONFIG_PREEMPT_RT
+ /*
+ * Fully-preemptible kernel does not need interrupts disabled:
+ */
+ local_irq_enable();
+ preempt_check_resched();
+#endif
/*
* We want the common case to go fast, which
* is why we may in certain cases get here from
Index: linux-2.6.16/arch/i386/kernel/smp.c
===================================================================
--- linux-2.6.16.orig/arch/i386/kernel/smp.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/i386/kernel/smp.c 2006-10-19 16:52:29.000000000 +0200
@@ -245,7 +245,7 @@ void send_IPI_mask_sequence(cpumask_t ma
static cpumask_t flush_cpumask;
static struct mm_struct * flush_mm;
static unsigned long flush_va;
-static DEFINE_SPINLOCK(tlbstate_lock);
+static DEFINE_RAW_SPINLOCK(tlbstate_lock);
#define FLUSH_ALL 0xffffffff
/*
@@ -390,7 +390,7 @@ static void flush_tlb_others(cpumask_t c
while (!cpus_empty(flush_cpumask))
/* nothing. lockup detection does not belong here */
- mb();
+ cpu_relax();
flush_mm = NULL;
flush_va = 0;
@@ -481,10 +481,20 @@ void smp_send_reschedule(int cpu)
}
/*
+ * this function sends a 'reschedule' IPI to all other CPUs.
+ * This is used when RT tasks are starving and other CPUs
+ * might be able to run them:
+ */
+void smp_send_reschedule_allbutself(void)
+{
+ send_IPI_allbutself(RESCHEDULE_VECTOR);
+}
+
+/*
* Structure and data for smp_call_function(). This is designed to minimise
* static memory requirements. It also looks cleaner.
*/
-static DEFINE_SPINLOCK(call_lock);
+static DEFINE_RAW_SPINLOCK(call_lock);
struct call_data_struct {
void (*func) (void *info);
@@ -593,13 +603,14 @@ void smp_send_stop(void)
}
/*
- * Reschedule call back. Nothing to do,
- * all the work is done automatically when
- * we return from the interrupt.
+ * Reschedule call back. Trigger a reschedule pass so that
+ * RT-overload balancing can pass tasks around.
*/
-fastcall void smp_reschedule_interrupt(struct pt_regs *regs)
+fastcall notrace void smp_reschedule_interrupt(struct pt_regs *regs)
{
+ trace_special(regs->eip, 0, 0);
ack_APIC_irq();
+ set_tsk_need_resched(current);
}
fastcall void smp_call_function_interrupt(struct pt_regs *regs)
Index: linux-2.6.16/arch/i386/kernel/smpboot.c
===================================================================
--- linux-2.6.16.orig/arch/i386/kernel/smpboot.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/i386/kernel/smpboot.c 2006-10-19 16:52:29.000000000 +0200
@@ -208,142 +208,299 @@ valid_k7:
;
}
-/*
- * TSC synchronization.
- *
- * We first check whether all CPUs have their TSC's synchronized,
- * then we print a warning if not, and always resync.
- */
+static atomic_t tsc_start_flag, tsc_check_start, tsc_check_stop;
-static atomic_t tsc_start_flag = ATOMIC_INIT(0);
-static atomic_t tsc_count_start = ATOMIC_INIT(0);
-static atomic_t tsc_count_stop = ATOMIC_INIT(0);
-static unsigned long long tsc_values[NR_CPUS];
-
-#define NR_LOOPS 5
-
-static void __init synchronize_tsc_bp (void)
+static int __init check_tsc_warp(void)
{
- int i;
- unsigned long long t0;
- unsigned long long sum, avg;
- long long delta;
- unsigned int one_usec;
- int buggy = 0;
-
- printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
-
- /* convert from kcyc/sec to cyc/usec */
- one_usec = cpu_khz / 1000;
+ static DEFINE_RAW_SPINLOCK(warp_lock);
+ static long long prev;
+ static unsigned int error;
- atomic_set(&tsc_start_flag, 1);
- wmb();
+ int cpus = num_booting_cpus(), nr = 0;
+ long long start, now, end, delta;
+ atomic_inc(&tsc_check_start);
+ while (atomic_read(&tsc_check_start) != cpus)
+ cpu_relax();
/*
- * We loop a few times to get a primed instruction cache,
- * then the last pass is more or less synchronized and
- * the BP and APs set their cycle counters to zero all at
- * once. This reduces the chance of having random offsets
- * between the processors, and guarantees that the maximum
- * delay between the cycle counters is never bigger than
- * the latency of information-passing (cachelines) between
- * two CPUs.
+ * Run the check for 500 msecs:
*/
- for (i = 0; i < NR_LOOPS; i++) {
- /*
- * all APs synchronize but they loop on '== num_cpus'
- */
- while (atomic_read(&tsc_count_start) != num_booting_cpus()-1)
- mb();
- atomic_set(&tsc_count_stop, 0);
- wmb();
- /*
- * this lets the APs save their current TSC:
- */
- atomic_inc(&tsc_count_start);
+ rdtscll(start);
+ end = start + cpu_khz*500;
- rdtscll(tsc_values[smp_processor_id()]);
+ for (;;) {
/*
- * We clear the TSC in the last loop:
+ * Check for the TSC going backwards (between CPUs):
*/
- if (i == NR_LOOPS-1)
- write_tsc(0, 0);
+ spin_lock(&warp_lock);
+ rdtscll(now);
+ delta = now - prev;
+ prev = now;
+ spin_unlock(&warp_lock);
+ if (unlikely(delta < 0))
+ error = 1;
+ if (now > end)
+ break;
/*
- * Wait for all APs to leave the synchronization point:
+ * Take it easy every couple of iterations,
+ * to not starve other CPUs:
*/
- while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1)
- mb();
- atomic_set(&tsc_count_start, 0);
- wmb();
- atomic_inc(&tsc_count_stop);
+ nr++;
+ if (!(nr % 31))
+ cpu_relax();
}
- sum = 0;
- for (i = 0; i < NR_CPUS; i++) {
- if (cpu_isset(i, cpu_callout_map)) {
- t0 = tsc_values[i];
- sum += t0;
- }
- }
- avg = sum;
- do_div(avg, num_booting_cpus());
+ atomic_inc(&tsc_check_stop);
+ while (atomic_read(&tsc_check_stop) != cpus)
+ cpu_relax();
- sum = 0;
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_isset(i, cpu_callout_map))
- continue;
- delta = tsc_values[i] - avg;
- if (delta < 0)
- delta = -delta;
- /*
- * We report bigger than 2 microseconds clock differences.
- */
- if (delta > 2*one_usec) {
- long realdelta;
- if (!buggy) {
- buggy = 1;
- printk("\n");
- }
- realdelta = delta;
- do_div(realdelta, one_usec);
- if (tsc_values[i] < avg)
- realdelta = -realdelta;
+ return error;
+}
- printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta);
- }
+/*
+ * TSC synchronization based on ia64 itc synchronization code. Synchronize
+ * pairs of processors rahter than tring to synchronize all of the processors
+ * with a single event. When several processors are all waiting for an
+ * event they don't all see it at the same time. The write will cause
+ * an invalidate on each processors cache and then they all scramble to
+ * re-read that cache line.
+ *
+ * Writing the TSC resets the upper 32-bits, so we need to be careful
+ * that all of the cpus can be synchronized before we overflow the
+ * 32-bit count.
+ */
- sum += delta;
+#define MASTER 0
+#define SLAVE (SMP_CACHE_BYTES/sizeof(long))
+
+#define NUM_ROUNDS 64 /* magic value */
+#define NUM_ITERS 5 /* likewise */
+
+static volatile unsigned long go[2*SLAVE] __cacheline_aligned;
+static volatile int current_slave = -1;
+static volatile int tsc_sync_complete = 0;
+static volatile int tsc_adj_latency = 0;
+static unsigned int max_rt = 0;
+static unsigned int max_delta = 0;
+
+#define DEBUG_TSC_SYNC 0
+#if DEBUG_TSC_SYNC
+struct tsc_sync_debug {
+ long rt; /* roundtrip time */
+ long master; /* master's timestamp */
+ long diff; /* difference between midpoint and master's timestamp */
+ long lat; /* estimate of tsc adjustment latency */
+} tsc_sync_debug[NUM_ROUNDS*NR_CPUS];
+#endif
+
+void
+sync_master(void)
+{
+ unsigned long n, tsc, last_go_master;
+
+ last_go_master = 0;
+ while (1) {
+ while ((n = go[MASTER]) == last_go_master)
+ rep_nop();
+ if (n == ~0)
+ break;
+ rdtscl(tsc);
+ if (unlikely(!tsc))
+ tsc = 1;
+ go[SLAVE] = tsc;
+ last_go_master = n;
}
- if (!buggy)
- printk("passed.\n");
}
-static void __init synchronize_tsc_ap (void)
+/*
+ * Return the number of cycles by which our TSC differs from the TSC on
+ * the master (time-keeper) CPU. A positive number indicates our TSC is
+ * ahead of the master, negative that it is behind.
+ */
+static inline long
+get_delta (long *rt, long *master)
{
- int i;
+ unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
+ unsigned long tcenter, t0, t1, tm, last_go_slave;
+ long i;
+
+ last_go_slave = go[SLAVE];
+ for (i = 0; i < NUM_ITERS; ++i) {
+ rdtscl(t0);
+ go[MASTER] = i+1;
+ while ((tm = go[SLAVE]) == last_go_slave)
+ rep_nop();
+ rdtscl(t1);
+
+ if (t1 - t0 < best_t1 - best_t0)
+ best_t0 = t0, best_t1 = t1, best_tm = tm;
+ last_go_slave = tm;
+ }
+
+ *rt = best_t1 - best_t0;
+ *master = best_tm - best_t0;
+
+ /* average best_t0 and best_t1 without overflow: */
+ tcenter = (best_t0/2 + best_t1/2);
+ if (best_t0 % 2 + best_t1 % 2 == 2)
+ ++tcenter;
+ return tcenter - best_tm;
+}
+
+/*
+ * Synchronize TSC of the current (slave) CPU with the TSC of the MASTER CPU
+ * (normally the time-keeper CPU). We use a closed loop to eliminate the
+ * possibility of unaccounted-for errors (such as getting a machine check in
+ * the middle of a calibration step). The basic idea is for the slave to ask
+ * the master what TSC value it has and to read its own TSC before and after
+ * the master responds. Each iteration gives us three
+ * timestamps:
+ *
+ * slave master
+ *
+ * t0 ---\
+ * ---\
+ * --->
+ * tm
+ * /---
+ * /---
+ * t1 <---
+ *
+ *
+ * The goal is to adjust the slave's TSC such that tm falls exactly half-way
+ * between t0 and t1. If we achieve this, the clocks are synchronized provided
+ * the interconnect between the slave and the master is symmetric. Even if the
+ * interconnect were asymmetric, we would still know that the synchronization
+ * error is smaller than the roundtrip latency (t0 - t1).
+ *
+ * When the interconnect is quiet and symmetric, this lets us synchronize the
+ * TSC to within one or two cycles. However, we can only *guarantee* that the
+ * synchronization is accurate to within a round-trip time, which is typically
+ * in the range of several hundred cycles (e.g., ~500 cycles). In practice,
+ * this means that the TSC's are usually almost perfectly synchronized, but we
+ * shouldn't assume that the accuracy is much better than half a micro second
+ * or so.
+ */
+
+static void __init
+synchronize_tsc_ap (void)
+{
+ long i, delta, adj, adjust_latency, n_rounds;
+ unsigned long rt, master_time_stamp, tsc;
+#if DEBUG_TSC_SYNC
+ struct tsc_sync_debug *t =
+ &tsc_sync_debug[smp_processor_id() * NUM_ROUNDS];
+#endif
+
+ while (!atomic_read(&tsc_start_flag))
+ mb();
+
+ if (!check_tsc_warp())
+ return;
/*
- * Not every cpu is online at the time
- * this gets called, so we first wait for the BP to
- * finish SMP initialization:
+ * Wait for our turn to synchronize with the boot processor.
*/
- while (!atomic_read(&tsc_start_flag)) mb();
+ while (current_slave != smp_processor_id())
+ rep_nop();
+ adjust_latency = tsc_adj_latency;
+
+ go[SLAVE] = 0;
+ go[MASTER] = 0;
+ write_tsc(0,0);
+ for (i = 0; i < NUM_ROUNDS; ++i) {
+ delta = get_delta(&rt, &master_time_stamp);
+ if (delta == 0)
+ break;
+
+ if (i > 0)
+ adjust_latency += -delta;
+ adj = -delta + adjust_latency/8;
+ rdtscl(tsc);
+ write_tsc(tsc + adj, 0);
+#if DEBUG_TSC_SYNC
+ t[i].rt = rt;
+ t[i].master = master_time_stamp;
+ t[i].diff = delta;
+ t[i].lat = adjust_latency/8;
+#endif
+ }
+ n_rounds = i;
+ go[MASTER] = ~0;
+
+#if (DEBUG_TSC_SYNC == 2)
+ for (i = 0; i < n_rounds; ++i)
+ printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
+ t[i].rt, t[i].master, t[i].diff, t[i].lat);
+
+ printk("CPU %d: synchronized TSC (last diff %ld cycles, maxerr %lu cycles)\n",
+ smp_processor_id(), delta, rt);
+
+ printk("It took %ld rounds\n", n_rounds);
+#endif
+ if (rt > max_rt)
+ max_rt = rt;
+ if (delta < 0)
+ delta = -delta;
+ if (delta > max_delta)
+ max_delta = delta;
+ tsc_adj_latency = adjust_latency;
+ current_slave = -1;
+ while (!tsc_sync_complete)
+ rep_nop();
+}
+
+/*
+ * The boot processor set its own TSC to zero and then gives each
+ * slave processor the chance to synchronize itself.
+ */
- for (i = 0; i < NR_LOOPS; i++) {
- atomic_inc(&tsc_count_start);
- while (atomic_read(&tsc_count_start) != num_booting_cpus())
- mb();
+static void __init synchronize_tsc_bp (void)
+{
+ unsigned int tsc_low, tsc_high, error;
+ int cpu;
+
+ atomic_set(&tsc_start_flag, 1);
- rdtscll(tsc_values[smp_processor_id()]);
- if (i == NR_LOOPS-1)
- write_tsc(0, 0);
+ printk(KERN_INFO "checking TSC synchronization across %u CPUs: ",
+ num_booting_cpus());
- atomic_inc(&tsc_count_stop);
- while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
+ if (!check_tsc_warp()) {
+ printk("passed.\n");
+ return;
+ }
+ printk("failed.\n");
+
+ printk(KERN_INFO "starting TSC synchronization\n");
+ write_tsc(0, 0);
+
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ if (!cpu_isset(cpu, cpu_callout_map))
+ continue;
+ if (cpu == smp_processor_id())
+ continue;
+ go[MASTER] = 0;
+ current_slave = cpu;
+ sync_master();
+ while (current_slave != -1)
+ rep_nop();
+ }
+ rdtsc(tsc_low, tsc_high);
+ if (tsc_high)
+ printk("TSC overflowed during synchronization\n");
+ else
+ printk("TSC synchronization complete max_delta=%d cycles\n",
+ max_delta);
+ if (max_rt < 4293) {
+ error = (max_rt * 1000000)/cpu_khz;
+ printk("TSC sync round-trip time %d.%03d microseconds\n",
+ error/1000, error%1000);
+ } else {
+ printk("TSC sync round-trip time %d cycles\n", max_rt);
}
+ tsc_sync_complete = 1;
}
-#undef NR_LOOPS
extern void calibrate_delay(void);
Index: linux-2.6.16/arch/i386/kernel/syscall_table.S
===================================================================
--- linux-2.6.16.orig/arch/i386/kernel/syscall_table.S 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/i386/kernel/syscall_table.S 2006-10-19 16:52:29.000000000 +0200
@@ -310,3 +310,5 @@ ENTRY(sys_call_table)
.long sys_pselect6
.long sys_ppoll
.long sys_unshare /* 310 */
+ .long sys_set_robust_list
+ .long sys_get_robust_list
Index: linux-2.6.16/arch/i386/kernel/time.c
===================================================================
--- linux-2.6.16.orig/arch/i386/kernel/time.c 2006-09-20 10:56:46.000000000 +0200
+++ linux-2.6.16/arch/i386/kernel/time.c 2006-10-19 16:52:29.000000000 +0200
@@ -46,6 +46,7 @@
#include
#include
#include
+#include
#include
#include
@@ -56,6 +57,7 @@
#include
#include
#include
+#include
#include "mach_time.h"
@@ -79,16 +81,9 @@ EXPORT_SYMBOL(cpu_khz);
extern unsigned long wall_jiffies;
-DEFINE_SPINLOCK(rtc_lock);
+DEFINE_RAW_SPINLOCK(rtc_lock);
EXPORT_SYMBOL(rtc_lock);
-#include