Index: linux.prev/Documentation/DocBook/Makefile =================================================================== --- linux.prev.orig/Documentation/DocBook/Makefile +++ linux.prev/Documentation/DocBook/Makefile @@ -10,7 +10,8 @@ DOCBOOKS := wanbook.xml z8530book.xml mc kernel-hacking.xml kernel-locking.xml deviceiobook.xml \ procfs-guide.xml writing_usb_driver.xml \ sis900.xml kernel-api.xml journal-api.xml lsm.xml usb.xml \ - gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml + gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ + genericirq.xml ### # The build process is as follows (targets): Index: linux.prev/Documentation/DocBook/genericirq.tmpl =================================================================== --- /dev/null +++ linux.prev/Documentation/DocBook/genericirq.tmpl @@ -0,0 +1,560 @@ + + + + + + Linux generic IRQ handling + + + + Thomas + Gleixner + +
+ tglx@linutronix.de +
+
+
+ + Ingo + Molnar + +
+ mingo@elte.hu +
+
+
+
+ + + 2005 + Thomas Gleixner + + + 2005 + Ingo Molnar + + + + + This documentation is free software; you can redistribute + it and/or modify it under the terms of the GNU General Public + License version 2 as published by the Free Software Foundation. + + + + This program is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied + warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + + + You should have received a copy of the GNU General Public + License along with this program; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, + MA 02111-1307 USA + + + + For more details see the file COPYING in the source + distribution of Linux. + + +
+ + + + + Introduction + + The generic interrupt handling layer is designed to provide a + complete abstraction of interrupt handling for device drivers + and is able to handle all different types of interrupt controller + hardware. Device drivers use generic API function to request, enable, + disable and free interrupts. The drivers do not have to know anything + about interrupt hardware, so they can be used on different hardware + platforms without code changes. + + + This documentation is provided for developers who want to implement + architecture interrupt support based on the Generic IRQ handling layer. + + + + + Rationale + + The original implementation of interrupt handling in Linux is using + the __do_IRQ() super-handler, which must be able to deal with every + type of interrupt logic. This is achieved by an 'interrupt type' + structure and runtime flags to handle special cases. + Furthermore the superhandler assumed a certain type of interrupt + handling hardware and turned out to be not capable of handling all + kind of interrupt controller hardware which can be found through + the architectures. The all in one approach also adds unnecessary + complexity for every user. + + + Originally, Russell King identified different types of handlers to + build a quite universal set for the ARM interrupt handler + implementation in Linux 2.5/2.6. He distiguished between: + + Level type + Edge type + Simple type + + In the SMP world of the __do_IRQ() super-handler another type + was identified: + + Per CPU type + + + + This split implementation of handlers allows to optimize the flow + of the interrupt handling for each specific interrupt type. + This reduces complexitiy in that particular code path and allows + the optimized handling of a given type. + + + The original general implementation uses interrupt_type structures + to differentiate the flow control in the super-handler. This + leads to a mix of flow logic and code related to hardware details. + Russell Kings ARM implementation which replaced the type by a chip + abstraction did the mix the other way around. + + + The natural conclusion was a clean seperation of the 'type flow' + and the 'chip'. Analysing a couple of architecture implementations + reveals that many of them can use a generic set of 'type flow' + implementations and only need to add the chip level specific code. + The seperation is also valuable for the (sub)architectures, + which need specific quirks in the type flow itself, because it + provides a more transparent design. + + + Each interrupt type implementation has assigned its own flow + handler, which should be normally one of the generic + implementations. The flow handler implementation makes it + simple to provide demultiplexing handlers which can be found in + embedded platforms on various architectures. + + + The seperation makes the generic interrupt handling more flexible + and extensible. An (sub)architecture can use a generic type flow + implementation for e.g. 'level type' interrupts and add a + (sub)architecture specific 'edge type' implementation. + + + To make the transition to the new model easier and prevent the + breakage of existing implementations the __do_IRQ() super-handler + is still available. This leads to a kind of duality for the time + being. Over time the new model should achieve a homogeneous + implementation scheme over all architectures with enhanced + maintainability and cleanliness. + + + + Known Bugs And Assumptions + + None (hopefully). + + + + + Abstraction layers + + There are three main levels of abstraction in the interrupt code: + + Highlevel driver API + Abstract interrupt type + Chiplevel hardware encapsulation + + + + The seperation of interrupt type and chip level functionality + provides the most flexible design. This implementation can handle + all kinds of interrupt hardware and the necessary workarounds for + the interrupt types without the need of redundant implementations. + The seperation handles also edge and level type interrupts + on the same hardware chip. + + + Interrupt control flow + + Each interrupt is described by an interrupt description structure + irq_desc. The interrupt is referenced by an 'unsigned int' numeric + value which selects the corresponding interrupt decription structure + in the description structures array. + The description structure contains status information and pointers + to the interrupt type structure and the interrupt chip structure + which are assigned to this interrupt. + + + Whenever an interrupt triggers, the lowlevel arch code calls into + the generic interrupt code by calling desc->handler->handle_irq(). + This highlevel IRQ handling function only uses other + desc->handler primitives which describe the control flow operation + necessary for the interrupt type. These operations are calling + the chip primitives referenced by the assigned chip description + structure. + + + + Highlevel Driver API + + The highlevel Driver API consists of following functions: + + request_irq() + free_irq() + disable_irq() + enable_irq() + disable_irq_nosync() (SMP only) + synchronize_irq() (SMP only) + set_irq_type() + set_irq_wake() + set_irq_data() + set_irq_chip() + set_irq_chip_data() + + See the autogenerated function documentation for details. + + + + Abstract interrupt type + + The 'interrupt type' (struct irq_type) abstraction mainly consists of + methods which implement the 'interrupt handling flow'. The generic + layer provides a set of pre-defined types: + + default_level_type + default_edge_type + default_simple_type + default_percpu_type + + The default type implementations use the generic type handlers. + + handle_level_type + handle_edge_type + handle_simple_type + handle_percpu_type + + The interrupt types (either predefined or architecture specific) are + assigned to specific interrupts by the architecture either during + bootup or during device initialization. + + + Default type implementations + + Helper functions + + The helper functions call the chip primitives and + are used by the default type implementations. + Following helper functions are implemented (simplified excerpt): + +default_enable(irq) +{ + desc->chip->unmask(irq); +} + +default_disable(irq) +{ + desc->chip->mask(irq); +} + +default_ack(irq) +{ + chip->ack(irq); +} + +default_mask_ack(irq) +{ + if (chip->mask_ack) { + chip->mask_ack(irq); + } else { + chip->mask(irq); + chip->ack(irq); + } +} + +noop(irq) +{ +} + +default_set_type(irq, type) +{ + if (desc->chip->set_type) { + if (desc->chip->set_type(irq, type)) + return NULL; + } + + return default_handler for type; +} + + + + + Default Level IRQ type + + The default Level IRQ type implements the functions + + enabledefault_enable + disabledefault_disable + startdefault_mask_ack + enddefault_enable + handle_irqhandle_level_irq + set_typedefault_set_type + + + + + Default Edge IRQ type + + The default Edge IRQ type implements the functions + + enabledefault_enable + disabledefault_disable + startdefault_ack + holddefault_mask_ack + endnoop + handle_irqhandle_edge_irq + set_typedefault_set_type + + + + + Default simple IRQ type + + The default simple IRQ type implements the functions + + enablenoop + disablenoop + handle_irqhandle_simple_irq + + + + + Default per CPU IRQ type + + The default per CPU IRQ type implements the functions + + enabledefault_enable + disabledefault_disable + startdefault_ack + enddefault_enable + handle_irqhandle_percpu_irq + + + + + + Default type handler implementations + + Default Level IRQ type handler + + handle_level_type provides a generic implementation + for level type interrupts. + + + Following control flow is implemented (simplified excerpt): + +desc->handler->start(); +handle_IRQ_event(desc->action); +desc->handler->end(); + + + + + Default Edge IRQ type handler + + handle_edge_type provides a generic implementation + for edge type interrupts. + + + Following control flow is implemented (simplified excerpt): + +if (desc->status & running) { + desc->handler->hold(); + desc->status |= pending | masked; + return; +} +desc->handler->start(); +desc->status |= running; +do { + if (desc->status & masked) + desc->handler->enable(); + desc-status &= ~pending; + handle_IRQ_event(desc->action); +} while (status & pending); +desc-status &= ~running; +desc->handler->end(); + + + + + Default simple IRQ type handler + + handle_simple_type provides a generic implementation + for simple type interrupts. + + + Note: The simple type handler does not call any + handler/chip primitives. + + + Following control flow is implemented (simplified excerpt): + +handle_IRQ_event(desc->action); + + + + + Default per CPU type handler + + handle_percpu_type provides a generic implementation + for per CPU type interrupts. + + + Per CPU interrupts are only available on SMP and + the handler provides a simplified version without + locking. + + + Following control flow is implemented (simplified excerpt): + +desc->handler->start(); +handle_IRQ_event(desc->action); +desc->handler->end(); + + + + + + Architecture specific type implementation + + If an architecture needs to implement its own type structures, then + the following primitives have to be implemented: + + handle_irq() - The handle_irq function pointer should preferably point to + one of the generic type handler functions + startup() - Optional + shutdown() - Optional + enable() + disable() + start() + hold() - For edge type interupts only + end() + set_type - Optional + set_affinity - SMP only + + + + + Quirks and optimizations + + The generic functions are intended for 'clean' architectures and chips, + which have no platform-specific IRQ handling quirks. If an architecture + needs to implement quirks on the 'flow' level then it can do so by + overriding the irqtype. This is also done for compatibility reasons, as + most architectures use irqtypes only at the moment. + + + An architecture could implement all of its IRQ logic via pushing + chip handling details into the irqtype's ->start()/->end()/->hold() + functions. This is only recommended when the underlying primitives + are pure chip primitives without additional quirks. The direct pointer + to the chip functions reduces the indirection level by one. + + + + + Chiplevel hardware encapsulation + + The chip level hardware description structure irq_chip + contains all the direct chip relevant functions, which + can be utilized by the irq_type implementations. + + ack() + mask_ack() - Optional, recommended for performance + mask() + unmask() + retrigger() - Optional + set_type() - Optional + set_wake() - Optional + + These primitives are strictly intended to mean what they say: ack means + ACK, masking means masking of an IRQ line, etc. It is up to the flow + handler(s) to use these basic units of lowlevel functionality. + + + + + + __do_IRQ entry point + + The original implementation __do_IRQ() is an alternative entry + point for all types of interrupts. + + + This handler turned out to be not suitable for all + interrupt hardware and was therefor reimplemented with split + functionality for egde/level/simple/percpu interrupts. This is not + only a functional optimization. It also shortenes code pathes for + interrupts. + + + To make use of the split implementation, replace the call to + __do_IRQ by a call to desc->handler->handle_irq() and associate + the appropriate handler function to desc->handler->handle_irq(). + In most cases the generic type and handler implementations should + be sufficient. + + + + + Locking on SMP + + The locking of chip registers is up to the architecture that + defines the chip primitives. There is a chip->lock field that can be used + for serialization, but the generic layer does not touch it. The per-irq + structure is protected via desc->lock, by the generic layer. + + + + Structures + + This chapter contains the autogenerated documentation of the structures which are + used in the generic IRQ layer. + +!Iinclude/linux/irq.h + + + + Public Functions Provided + + This chapter contains the autogenerated documentation of the kernel API functions + which are exported. + +!Ekernel/irq/manage.c + + + + Internal Functions Provided + + This chapter contains the autogenerated documentation of the internal functions. + +!Ikernel/irq/handle.c + + + + Credits + + The following people have contributed to this document: + + Thomas Gleixnertglx@linutronix.de + Ingo Molnarmingo@elte.hu + + + +
Index: linux.prev/Documentation/DocBook/kernel-api.tmpl =================================================================== --- linux.prev.orig/Documentation/DocBook/kernel-api.tmpl +++ linux.prev/Documentation/DocBook/kernel-api.tmpl @@ -54,6 +54,11 @@ !Ekernel/sched.c !Ekernel/timer.c + High-resolution timers +!Iinclude/linux/ktime.h +!Iinclude/linux/hrtimer.h +!Ekernel/hrtimer.c + Internal Functions !Ikernel/exit.c !Ikernel/signal.c Index: linux.prev/Documentation/RCU/proc.txt =================================================================== --- /dev/null +++ linux.prev/Documentation/RCU/proc.txt @@ -0,0 +1,207 @@ +/proc Filesystem Entries for RCU + + +CONFIG_RCU_STATS + +The CONFIG_RCU_STATS config option is available only in conjunction with +CONFIG_PREEMPT_RCU. It makes four /proc entries available, namely: rcuctrs, +rcuptrs, rcugp, and rcustats. + +/proc/rcuctrs + + CPU last cur + 0 1 1 + 1 1 1 + 2 1 1 + 3 0 2 + ggp = 230725 + +This displays the number of processes that started RCU read-side critical +sections on each CPU. In absence of preemption, the "last" and "cur" +counts for a given CPU will always sum to one. Therefore, in the example +output above, each CPU has started one RCU read-side critical section +that was later preempted. The "last" column counts RCU read-side critical +sections that started prior to the last counter flip, while the "cur" +column counts critical sections that started after the last counter flip. + +The "ggp" count is a count of the number of counter flips since boot. +Since this is shown as an odd number, the "cur" counts are stored in +the zero-th element of each of the per-CPU arrays, and the "last" counts +are stored in the first element of each of the per-CPU arrays. + + +/proc/rcuptrs + + nl=c04c7160/c04c7960 nt=c04c72d0 + wl=c04c7168/c04c794c wt=c04c72bc dl=c04c7170/00000000 dt=c04c7170 + +This displays the head and tail of each of CONFIG_PREEMPT_RCU's three +callback lists. This will soon change to display this on a per-CPU +basis, since each CPU will soon have its own set of callback lists. +In the example above, the "next" list header is located at hex address +0xc04c7160, the first element on the list at hex address 0xc04c7960, +and the last element on the list at hex address 0xc04c72d0. The "wl=" +and "wt=" output is similar for the "wait" list, and the "dl=" and "dt=" +output for the "done" list. The "done" list is normally emptied very +quickly after being filled, so will usually be empty as shown above. +Note that the tail pointer points into the list header in this case. + +Callbacks are placed in the "next" list by call_rcu(), moved to the +"wait" list after the next counter flip, and moved to the "done" list +on the counter flip after that. Once on the "done" list, the callbacks +are invoked. + + +/proc/rcugp + + oldggp=241419 newggp=241421 + +This entry invokes synchronize_rcu() and prints out the number of counter +flips since boot before and after the synchronize_rcu(). These two +numbers will always differ by at least two. Unless RCU is broken. ;-) + + +/proc/rcustats + + ggp=242416 lgp=242416 sr=0 rcc=396233 + na=2090938 nl=9 wa=2090929 wl=9 dl=0 dr=2090920 di=2090920 + rtf1=22230730 rtf2=20139162 rtf3=242416 rtfe1=2085911 rtfe2=5657 rtfe3=19896746 + +The quantities printed are as follows: + +o "ggp=": The number of flips since boot. + +o "lgp=": The number of flips sensed by the local structure since + boot. This will soon be per-CPU. + +o "sr=": The number of explicit call to synchronize_rcu(). + Except that this is currently broken, so always reads as zero. + It is likely to be removed... + +o "rcc=": The number of calls to rcu_check_callbacks(). + +o "na=": The number of callbacks that call_rcu() has registered + since boot. + +o "nl=": The number of callbacks currently on the "next" list. + +o "wa=": The number of callbacks that have moved to the "wait" + list since boot. + +o "wl=": The number of callbacks currently on the "wait" list. + +o "da=": The number of callbacks that have been moved to the + "done" list since boot. + +o "dl=": The number of callbacks currently on the "done" list. + +o "dr=": The number of callbacks that have been removed from the + "done" list since boot. + +o "di=": The number of callbacks that have been invoked after being + removed from the "done" list. + +o "rtf1=": The number of attempts to flip the counters. + +o "rtf2=": The number of attempts to flip the counters that successfully + acquired the fliplock. + +o "rtf3=": The number of successful counter flips. + +o "rtfe1=": The number of attempts to flip the counters that failed + due to the lock being held by someone else. + +o "rtfe2=": The number of attempts to flip the counters that were + abandoned due to someone else doing the job for us. + +o "rtfe3=": The number of attempts to flip the counters that failed + due to some task still being in an RCU read-side critical section + starting from before the last successful counter flip. + + +CONFIG_RCU_TORTURE_TEST + +The CONFIG_RCU_TORTURE_TEST config option is available for all RCU +implementations. It makes three /proc entries available, namely: rcutw, +rcutr, and rcuts. + + +/proc/rcutw + +Reading this entry starts a new torture test, or ends an earlier one +if one is already in progress (in other words, there can be only one +writer at a time). This sleeps uninterruptibly, so be sure to run +it in the background. One could argue that it would be good to have +multiple writers, but Linux uses RCU heavily enough that you will get +write-side contention whether you want it or not. If you want additional +write-side contention, repeatedly create and destroy several large file +trees in parallel. Or use some other RCU-protected update. + + +/proc/rcutr + +Reading this entry starts a new torture reader, which runs until sent +a signal (e.g., control-C). If testing an RCU implementation with +preemptible read-side critical sections, make sure to spawn at least +two /proc/rcutr instances for each CPU. + + +/proc/rcuts + +Displays the current state of the torture test: + + ggp = 20961 + rtc: c04496f4 ver: 8734 tfle: 0 rta: 8734 rtaf: 0 rtf: 8715 + Reader Pipe: 88024120 63914 0 0 0 0 0 0 0 0 0 + Reader Batch: 88024097 63937 0 0 0 0 0 0 0 0 + Free-Block Circulation: 8733 8731 8729 8727 8725 8723 8721 8719 8717 8715 0 + +The entries are as follows: + +o "ggp": The number of counter flips (or batches) since boot. + +o "rtc": The hexadecimal address of the structure currently visible + to readers. + +o "ver": The number of times since boot that the rcutw writer task + has changed the structure visible to readers. + +o "tfle": If non-zero, indicates that the "torture freelist" + containing structure to be placed into the "rtc" area is empty. + This condition is important, since it can fool you into thinking + that RCU is working when it is not. :-/ + +o "rta": Number of structures allocated from the torture freelist. + +o "rtaf": Number of allocations from the torture freelist that have + failed due to the list being empty. + +o "rtf": Number of frees into the torture freelist. + +o "Reader Pipe": Histogram of "ages" of structures seen by readers. + If any entries past the first two are non-zero, RCU is broken. + And /proc/rcuts prints "!!!" to make sure you notice. The age + of a newly allocated structure is zero, it becomes one when + removed from reader visibility, and is incremented once per + grace period subsequently -- and is freed after passing through + (RCU_TORTURE_PIPE_LEN-2) grace periods. + + The output displayed above was taken from a correctly working + RCU. If you want to see what it looks like when broken, break + it yourself. ;-) + +o "Reader Batch": Another histogram of "ages" of structures seen + by readers, but in terms of counter flips (or batches) rather + than in terms of grace periods. The legal number of non-zero + entries is again two. The reason for this separate view is + that it is easier to get the third entry to show up in the + "Reader Batch" list than in the "Reader Pipe" list. + +o "Free-Block Circulation": Shows the number of torture structures + that have reached a given point in the pipeline. The first element + should closely correspond to the number of structures allocated, + the second to the number that have been removed from reader view, + and all but the last remaining to the corresponding number of + passes through a grace period. The last entry should be zero, + as it is only incremented if a torture structure's counter + somehow gets incremented farther than it should. Index: linux.prev/Documentation/hrtimers.txt =================================================================== --- /dev/null +++ linux.prev/Documentation/hrtimers.txt @@ -0,0 +1,178 @@ + +hrtimers - subsystem for high-resolution kernel timers +---------------------------------------------------- + +This patch introduces a new subsystem for high-resolution kernel timers. + +One might ask the question: we already have a timer subsystem +(kernel/timers.c), why do we need two timer subsystems? After a lot of +back and forth trying to integrate high-resolution and high-precision +features into the existing timer framework, and after testing various +such high-resolution timer implementations in practice, we came to the +conclusion that the timer wheel code is fundamentally not suitable for +such an approach. We initially didnt believe this ('there must be a way +to solve this'), and spent a considerable effort trying to integrate +things into the timer wheel, but we failed. In hindsight, there are +several reasons why such integration is hard/impossible: + +- the forced handling of low-resolution and high-resolution timers in + the same way leads to a lot of compromises, macro magic and #ifdef + mess. The timers.c code is very "tightly coded" around jiffies and + 32-bitness assumptions, and has been honed and micro-optimized for a + relatively narrow use case (jiffies in a relatively narrow HZ range) + for many years - and thus even small extensions to it easily break + the wheel concept, leading to even worse compromises. The timer wheel + code is very good and tight code, there's zero problems with it in its + current usage - but it is simply not suitable to be extended for + high-res timers. + +- the unpredictable [O(N)] overhead of cascading leads to delays which + necessiate a more complex handling of high resolution timers, which + in turn decreases robustness. Such a design still led to rather large + timing inaccuracies. Cascading is a fundamental property of the timer + wheel concept, it cannot be 'designed out' without unevitably + degrading other portions of the timers.c code in an unacceptable way. + +- the implementation of the current posix-timer subsystem on top of + the timer wheel has already introduced a quite complex handling of + the required readjusting of absolute CLOCK_REALTIME timers at + settimeofday or NTP time - further underlying our experience by + example: that the timer wheel data structure is too rigid for high-res + timers. + +- the timer wheel code is most optimal for use cases which can be + identified as "timeouts". Such timeouts are usually set up to cover + error conditions in various I/O paths, such as networking and block + I/O. The vast majority of those timers never expire and are rarely + recascaded because the expected correct event arrives in time so they + can be removed from the timer wheel before any further processing of + them becomes necessary. Thus the users of these timeouts can accept + the granularity and precision tradeoffs of the timer wheel, and + largely expect the timer subsystem to have near-zero overhead. + Accurate timing for them is not a core purpose - in fact most of the + timeout values used are ad-hoc. For them it is at most a necessary + evil to guarantee the processing of actual timeout completions + (because most of the timeouts are deleted before completion), which + should thus be as cheap and unintrusive as possible. + +The primary users of precision timers are user-space applications that +utilize nanosleep, posix-timers and itimer interfaces. Also, in-kernel +users like drivers and subsystems which require precise timed events +(e.g. multimedia) can benefit from the availability of a seperate +high-resolution timer subsystem as well. + +While this subsystem does not offer high-resolution clock sources just +yet, the hrtimer subsystem can be easily extended with high-resolution +clock capabilities, and patches for that exist and are maturing quickly. +The increasing demand for realtime and multimedia applications along +with other potential users for precise timers gives another reason to +separate the "timeout" and "precise timer" subsystems. + +Another potential benefit is that such a seperation allows even more +special-purpose optimization of the existing timer wheel for the low +resolution and low precision use cases - once the precision-sensitive +APIs are separated from the timer wheel and are migrated over to +hrtimers. E.g. we could decrease the frequency of the timeout subsystem +from 250 Hz to 100 HZ (or even smaller). + +hrtimer subsystem implementation details +---------------------------------------- + +the basic design considerations were: + +- simplicity + +- data structure not bound to jiffies or any other granularity. All the + kernel logic works at 64-bit nanoseconds resolution - no compromises. + +- simplification of existing, timing related kernel code + +another basic requirement was the immediate enqueueing and ordering of +timers at activation time. After looking at several possible solutions +such as radix trees and hashes, we chose the red black tree as the basic +data structure. Rbtrees are available as a library in the kernel and are +used in various performance-critical areas of e.g. memory management and +file systems. The rbtree is solely used for time sorted ordering, while +a separate list is used to give the expiry code fast access to the +queued timers, without having to walk the rbtree. + +(This seperate list is also useful for later when we'll introduce +high-resolution clocks, where we need seperate pending and expired +queues while keeping the time-order intact.) + +Time-ordered enqueueing is not purely for the purposes of +high-resolution clocks though, it also simplifies the handling of +absolute timers based on a low-resolution CLOCK_REALTIME. The existing +implementation needed to keep an extra list of all armed absolute +CLOCK_REALTIME timers along with complex locking. In case of +settimeofday and NTP, all the timers (!) had to be dequeued, the +time-changing code had to fix them up one by one, and all of them had to +be enqueued again. The time-ordered enqueueing and the storage of the +expiry time in absolute time units removes all this complex and poorly +scaling code from the posix-timer implementation - the clock can simply +be set without having to touch the rbtree. This also makes the handling +of posix-timers simpler in general. + +The locking and per-CPU behavior of hrtimers was mostly taken from the +existing timer wheel code, as it is mature and well suited. Sharing code +was not really a win, due to the different data structures. Also, the +hrtimer functions now have clearer behavior and clearer names - such as +hrtimer_try_to_cancel() and hrtimer_cancel() [which are roughly +equivalent to del_timer() and del_timer_sync()] - so there's no direct +1:1 mapping between them on the algorithmical level, and thus no real +potential for code sharing either. + +Basic data types: every time value, absolute or relative, is in a +special nanosecond-resolution type: ktime_t. The kernel-internal +representation of ktime_t values and operations is implemented via +macros and inline functions, and can be switched between a "hybrid +union" type and a plain "scalar" 64bit nanoseconds representation (at +compile time). The hybrid union type optimizes time conversions on 32bit +CPUs. This build-time-selectable ktime_t storage format was implemented +to avoid the performance impact of 64-bit multiplications and divisions +on 32bit CPUs. Such operations are frequently necessary to convert +between the storage formats provided by kernel and userspace interfaces +and the internal time format. (See include/linux/ktime.h for further +details.) + +hrtimers - rounding of timer values +----------------------------------- + +the hrtimer code will round timer events to lower-resolution clocks +because it has to. Otherwise it will do no artificial rounding at all. + +one question is, what resolution value should be returned to the user by +the clock_getres() interface. This will return whatever real resolution +a given clock has - be it low-res, high-res, or artificially-low-res. + +hrtimers - testing and verification +---------------------------------- + +We used the high-resolution clock subsystem ontop of hrtimers to verify +the hrtimer implementation details in praxis, and we also ran the posix +timer tests in order to ensure specification compliance. We also ran +tests on low-resolution clocks. + +The hrtimer patch converts the following kernel functionality to use +hrtimers: + + - nanosleep + - itimers + - posix-timers + +The conversion of nanosleep and posix-timers enabled the unification of +nanosleep and clock_nanosleep. + +The code was successfully compiled for the following platforms: + + i386, x86_64, ARM, PPC, PPC64, IA64 + +The code was run-tested on the following platforms: + + i386(UP/SMP), x86_64(UP/SMP), ARM, PPC + +hrtimers were also integrated into the -rt tree, along with a +hrtimers-based high-resolution clock implementation, so the hrtimers +code got a healthy amount of testing and use in practice. + + Thomas Gleixner, Ingo Molnar Index: linux.prev/Documentation/kernel-parameters.txt =================================================================== --- linux.prev.orig/Documentation/kernel-parameters.txt +++ linux.prev/Documentation/kernel-parameters.txt @@ -52,6 +52,7 @@ restrictions referred to are that the re MTD MTD support is enabled. NET Appropriate network support is enabled. NUMA NUMA support is enabled. + GENERIC_TIME The generic timeofday code is enabled. NFS Appropriate NFS support is enabled. OSS OSS sound support is enabled. PARIDE The ParIDE subsystem is enabled. @@ -329,10 +330,11 @@ running once the system is up. Value can be changed at runtime via /selinux/checkreqprot. - clock= [BUGS=IA-32,HW] gettimeofday timesource override. - Forces specified timesource (if avaliable) to be used - when calculating gettimeofday(). If specicified - timesource is not avalible, it defaults to PIT. + clock= [BUGS=IA-32, HW] gettimeofday clocksource override. + [Deprecated] + Forces specified clocksource (if avaliable) to be used + when calculating gettimeofday(). If specified + clocksource is not avalible, it defaults to PIT. Format: { pit | tsc | cyclone | pmtmr } hpet= [IA-32,HPET] option to disable HPET and use PIT. @@ -1477,6 +1479,10 @@ running once the system is up. time Show timing data prefixed to each printk message line + clocksource= [GENERIC_TIME] Override the default clocksource + Override the default clocksource and use the clocksource + with the name specified. + tipar.timeout= [HW,PPT] Set communications timeout in tenths of a second (default 15). Index: linux.prev/Documentation/timekeeping.txt =================================================================== --- /dev/null +++ linux.prev/Documentation/timekeeping.txt @@ -0,0 +1,350 @@ +How timekeeping works with CONFIG_GENERIC_TIME +======================================================================== + +The generic timekeeping code maintains and allows access to the systems +understanding of how much time has passed from a certain point. However, in +order to measure the passing of time, the generic timekeeping code relies on +the clocksource abstraction. A clocksource abstracts a free running counter +who's value increases at a known frequency. + +In the generic timekeeping code, we use a pointer to a selected clocksource to +measure the passing of time. + +struct clocksource *clock + +The clocksource has some limitations however. Since its likely of fixed width, +it will not increment forever and will overflow. In order to still properly +keep time, we must occasionally accumulate an interval of time. In the generic +timekeeping code, we accumulate the amount of time system the system booted +into the value system_time, which keeps nanosecond resolution in a ktime_t +storage. + +ktime_t system_time + +Since its likely your system has not been running continually since midnight on +the 1st of January in 1970, we must provide an offset from that time in +accordance with conventions. This only occasionally changed (via +settimeofday()) offset is the wall_time_offset value, which is also stored as a +ktime_t. + +ktime_t wall_time_offset + + +Since we accumulate time in intervals, we need a base cycle value that we can +use to generate an offset from the time value kept in system_time. We store +this value in cycle_last. + +cycle_t cycle_last; + + +Further since all clocks drift somewhat from each other, we use the adjustment +values provided via adjtimex() to correct our clocksource frequency for each +interval. This frequency adjustment value is stored in ntp_adj. + +long ntp_adj; + +Now that we've covered the core global variables for timekeeping, lets look at +how we maintain these values. + +As stated above, we want to avoid the clocksource from overflowing on us, so we +accumulate a time interval periodically. This periodic accumulation function is +called timeofday_periodic_hook(). In simplified pseudo code, it logically is +presented as: + +timeofday_periodic_hook(): + cycle_now = read_clocksource(clock) + cycle_delta = (cycle_now - cycle_last) & clock->mask + nsec = cyc2ns(clock, cycle_delta, ntp_adj) + system_time += nsec + cycle_last = cycle_now + + /* do other stuff */ + +You can see we read the cycle value from the clocksource, calculate a cycle +delta for the interval since we last called timeofday_periodic_hook(), convert +that cycle delta to a nanosecond interval (for now ignore ntp_adj), add it to +the system time and finally set our cycle_last value to cycle_now for the next +interval. Using this simple algorithm we can correctly measure and record the +passing of time. + +But just storing this info isn't very useful, we also want to make it available +to be used elsewhere. So how do we provide a notion of how much time has passed +inbetween calls to timeofday_periodic_hook()? + +First, lets create a function that calculates the time since the last call to +timeofday_peridoic_hook(). + +get_nsec_offset(): + cycle_now = read_clocksource(clock) + cycle_delta = (cycle_now - cycle_last) & clock->mask + nsec = cyc2ns(clock, cycle_delta, ntp_adj) + return nsec + +Here you can see, we read the clocksource, calculate a cycle interval, and +convert that to a nanosecond interval. Just like how it is done in +timeofday_periodic_hook! + +Now lets use this function to provide the number of nanoseconds that the system +has been running: + +do_monotonic_clock(): + return system_time + get_nsec_offset() + +Here we trivially add the nanosecond offset since the last +timeofday_periodic_hook() to the value of system_time which was stored at the +last timeofday_periodic_hook(). + +Note that since we use the same method to calculate time intervals, assuming +each function is atomic and the clocksource functions as it should, time cannot +go backward! + +Now to get the time of day using the standard convention: + +do_gettimeofday(): + return do_monotonic_clock() + wall_time_offset + +We simply add the wall_time_offset, and we have the number of nanoseconds since +1970 began! + + +Of course, in real life, things are not so static. We have to handle a number +of dynamic values that may change and affect timekeeping. In order to do these +safely, we must only change values in-between intervals. This means the +periodic_hook call must handle these changes. + +Since clocksources can be changed while the system is running, we need to check +for and possibly switch to using new clocksources in the periodic_hook call. +Further, clocksources may change their frequency. Since this must be done only +at a safe point, we use the update_callback function pointer (for more details, +see "How to write a clocksource driver" below), this too must be done +in-between intervals in the periodic_hook call. Finally, since the ntp +adjustment made in the cyc2ns conversion is not static, we need to update the +ntp state machine and get a calculate a new adjustment value. + +This adds some extra pseudo code to the timeofday_periodic_hook function: + +timeofday_periodic_hook(): + cycle_now = read_clocksource(clock) + cycle_delta = (cycle_now - cycle_last) & clock->mask + nsec = cyc2ns(clock, cycle_delta, ntp_adj) + system_time += nsec + cycle_last = cycle_now + + next = get_next_clocksource() + if(next != clock): + cycle_last = read_clocksource(next) + clock = next + + if(clock->update_callback): + clock->update_callback() + + ntp_advance(nsec) + ppm = ntp_get_ppm_adjustment() + ntp_adj = ppm_to_mult_adj(clock, ppm) + + +Unfortunately, the actual timeofday_periodic_hook code is not as simple as this +pseudo code. For performance concerns, much has been done to pre-calculate +values and use them repeatedly. Thus be aware that the code in timeofday.c is +more complex, however the functional logic is the same. + + +How to port an architecture to GENERIC_TIME +======================================================================== +Porting an architecture to the GENERIC_TIME timekeeping code consists of moving +a little bit of code around then deleting a fair amount. It is my hope that +this will reduce the arch specific maintenance work around timekeeping. + +Porting an arch usually requires the following steps. + +1. Define CONFIG_GENERIC_TIME in the arches Kconfig +2. Implementing the following functions + nsec_t read_persistent_clock(void) + void sync_persistent_clock(struct timespec ts) +3. Removing all of the arch specific timekeeping code + do_gettimeofday() + do_settimeofday() + etc +4. Implementing clocksource drivers + See "How to write a clocksource driver" for more details + +The exceptions to the above are: + +5. If the arch is has no continuous clocksource + A) Implement 1-3 in the above list. + B) Define CONFIG_IS_TICK_BASED in arches Kconfig + C) Implement the "long arch_getoffset(void)" function + +6. If the arch supports vsyscall gettimeofday (see x86_64 for reference) + A) Implement 1-4 in the above list + B) Define GENERIC_TIME_VSYSCALL + C) Implement arch_update_vsyscall_gtod() + D) Implement vsyscall gettimeofday (similar to __get_realtime_clock_ts) + E) Implement vread functions for supported clocksources + + + +How to write a clocksource driver. +======================================================================== +First, a quick summary of what a clocksource driver provides. + +Simply put, a clocksource is a abstraction of a free running increasing +counter. The abstraction provides the minimal amount of info for that counter +to be usable for timekeeping. Those required values are: + 1. It's name + 2. A rating value for selection priority + 3. A read function pointer + 4. A mask value for correct twos-complement subtraction + 5. A mult and shift pair that approximate the counter frequency + mult/(2^shift) ~= nanoseconds per cycle + +Additionally, there are other optionally set values that allow for advanced +functionality. Those values are: + 6. The update_callback function. + 7. The is_continuous flag. + 8. The vread function pointer + 9. The vdata pointer value + + +Now lets go over these values in detail. + +1. Name. + The clocksource's name should be unique since it is used for both +identification as well as for manually overriding the default clocksource +selection. The name length must be shorter then 32 characters in order for it +to be properly overrided. + +2. Rating value + This rating value is used as a priority value for clocksource +selection. It has no direct connection to quality or physical properties of the +clocksource, but is to be set and manipulated to guarantee that the best (by no +specific metric) clocksource that will provide correct timekeeping is +automatically selected. Rating suggestions can be found in +include/linux/clocksource.h + +3. Read function pointer + This pointer should point to a function that returns an unsigned +increasing cycle value from the clocksource. The value should have a coverage +from zero to the maximum cycle value the clocksource can provide. This does not +have to be direct hardware value and can also be a software counter. An example +of a software counter is the jiffies clocksource. + +4. The mask value + This value should be the largest power of two that is smaller then the +maximum cycle value. This allows twos complement subtraction to work on +overflow boundary conditions if the max value is less then (cycle_t)-1. So for +example, if we have a 16 bit counter (ie: one that loops to zero after +0x0000FFFF), the mask would be 0xFFFF. So then when finding the cycle +difference around a overflow, where now = 0x0013 and then = 0xFFEE, we can +compute the cycle delta properly using the equation: + delta = (now - then)&mask + delta = (0x0013 - 0xFFEE) & 0xFFFF + delta = 0xFFFF0025 & 0xFFFF /* note the unmasked negative value */ + delta = 0x25 + +5. The mult and shift pair + These 32bit values approximate the nanosecond per cycle frequency of +the clocksource using the equation: mult/(2^shift). If you have a khz or hz +frequency value, the mult value for a given shift value can be easily +calculated using the clocksource_hz2mult() and clocksource_khz2mult() helper +functions. When selecting a shift value, it is important to be careful. Larger +shift values give a finer precision in the cycle to nanosecond conversion and +allows for more exact NTP adjustments. However if you select too large a shift +value, the resulting mult value might overflow a cycle_t * mult computation. + + +So if you have a simple hardware counter that does not change frequency, +filling in the above should be sufficient for a functional clocksource. But +read on for details on implementing a more complex clocksource. + +6. The update_callback function pointer. + If this function pointer is non-NULL, it will be called every periodic +hook when it is safe for the clocksource to change its state. This would be +necessary in the case where the counter frequency changes, for example. One +user of this function pointer is the TSC clocksource. When the TSC frequency +changes (which may occur if the cpu changes frequency) we need to notify the +clocksource at a safe point where that state may change. Thus, if the TSC has +changed frequency we set the new mult/shift values in the update_callback +function. + +7. The is_continuous flag. + This flag variable (0 if false, 1 if true) denotes that the clocksource +is continuous. This means that it is a purely hardware driven clocksource and +is not dependent on any software code to run for it to increment properly. This +denotation will be useful in the future when timer ticks may be disabled for +long periods of time. Doing so using software clocksources, like the jiffies +clocksource, would cause timekeeping problems. + +8. The vread function pointer. + This function pointer points to a user-space accessible function that +reads the clocksource. This is used in userspace gettimeofday implementations +to improve performance. See the x86-64 TSC clocksource implementation for an +example. + +8. The vdata pointer. + This pointer is passed to the vread function pointer in a userspace +gettimeofday implementation. Its usage is dependent on the vread +implementation, but if the pointer points to data, that data must be readable +from userspace. + + +Now lets write a quick clocksource for an imaginary bit of hardware. Here are +the specs: + + A 32bit counter can be found at the MMIO address 0xFEEDF000. It runs at +100Mhz. To enable it, the the low bit of the address 0xFEEDF0F0 must be set to +one. + +So lets start out an empty cool-counter.c file, and define the clocksource. + +#include +#include +#include + +#define COOL_READ_PTR 0xFEEDF000 +#define COOL_START_PTR 0xFEEDF0F0 + +static __iomem *cool_ptr = COOL_READ_PTR; + +struct clocksource clocksource_cool +{ + .name = "cool", + .rating = 200, /* its a pretty decent clock */ + .mask = 0xFFFFFFFF, /* 32 bits */ + .mult = 0, /*to be computed */ + .shift = 10, +} + + +Now let's write the read function: + +cycle_t cool_counter_read(void) +{ + cycle_t ret = readl(cool_ptr); + return ret; +} + +Finally, lets write the init function: + +void cool_counter_init(void) +{ + __iomem *ptr = COOL_START_PTR; + u32 val; + + /* start the counter */ + val = readl(ptr); + val |= 0x1; + writel(val, ptr); + + /* finish initializing the clocksource */ + clocksource_cool.read = cool_counter_read; + clocksource_cool.mult = clocksource_khz2mult(100000, + clocksource_cool.shift); + + /* register the clocksource */ + register_clocksource(&clocksource_cool); +} +module_init(cool_counter_init); + + +Now wasn't that easy! Index: linux.prev/Makefile =================================================================== --- linux.prev.orig/Makefile +++ linux.prev/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 15 -EXTRAVERSION = +EXTRAVERSION =-rt21 NAME=Sliding Snow Leopard # *DOCUMENTATION* @@ -519,10 +519,14 @@ CFLAGS += $(call add-align,CONFIG_CC_AL CFLAGS += $(call add-align,CONFIG_CC_ALIGN_LOOPS,-loops) CFLAGS += $(call add-align,CONFIG_CC_ALIGN_JUMPS,-jumps) -ifdef CONFIG_FRAME_POINTER -CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,) +ifdef CONFIG_MCOUNT +CFLAGS += -pg -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,) else -CFLAGS += -fomit-frame-pointer + ifdef CONFIG_FRAME_POINTER + CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,) + else + CFLAGS += -fomit-frame-pointer + endif endif ifdef CONFIG_DEBUG_INFO Index: linux.prev/arch/arm/Kconfig =================================================================== --- linux.prev.orig/arch/arm/Kconfig +++ linux.prev/arch/arm/Kconfig @@ -50,6 +50,10 @@ config UID16 bool default y +config GENERIC_HARDIRQS + bool + default y + config RWSEM_GENERIC_SPINLOCK bool default y @@ -368,18 +372,7 @@ config LOCAL_TIMERS accounting to be spread across the timer interval, preventing a "thundering herd" at every timer tick. -config PREEMPT - bool "Preemptible Kernel (EXPERIMENTAL)" - depends on EXPERIMENTAL - help - This option reduces the latency of the kernel when reacting to - real-time or interactive events by allowing a low priority process to - be preempted even if it is in kernel mode executing a system call. - This allows applications to run more reliably even when the system is - under load. - - Say Y here if you are building a kernel for a desktop, embedded - or real-time system. Say N if you are unsure. +source kernel/Kconfig.preempt config NO_IDLE_HZ bool "Dynamic tick timer" Index: linux.prev/arch/arm/boot/compressed/head.S =================================================================== --- linux.prev.orig/arch/arm/boot/compressed/head.S +++ linux.prev/arch/arm/boot/compressed/head.S @@ -710,6 +710,19 @@ memdump: mov r12, r0 mov pc, r10 #endif +#ifdef CONFIG_MCOUNT +/* CONFIG_MCOUNT causes boot header to be built with -pg requiring this + * trampoline + */ + .text + .align 0 + .type mcount %function + .global mcount +mcount: + mov pc, lr @ just return +#endif + + reloc_end: .align Index: linux.prev/arch/arm/boot/compressed/misc.c =================================================================== --- linux.prev.orig/arch/arm/boot/compressed/misc.c +++ linux.prev/arch/arm/boot/compressed/misc.c @@ -199,6 +199,7 @@ static ulg free_mem_ptr_end; #define HEAP_SIZE 0x2000 +#define ZLIB_INFLATE_NO_INFLATE_LOCK #include "../../../../lib/inflate.c" #ifndef STANDALONE_DEBUG Index: linux.prev/arch/arm/common/dmabounce.c =================================================================== --- linux.prev.orig/arch/arm/common/dmabounce.c +++ linux.prev/arch/arm/common/dmabounce.c @@ -404,11 +404,11 @@ dma_map_single(struct device *dev, void BUG_ON(dir == DMA_NONE); - local_irq_save(flags); + raw_local_irq_save(flags); dma_addr = map_single(dev, ptr, size, dir); - local_irq_restore(flags); + raw_local_irq_restore(flags); return dma_addr; } @@ -431,11 +431,11 @@ dma_unmap_single(struct device *dev, dma BUG_ON(dir == DMA_NONE); - local_irq_save(flags); + raw_local_irq_save(flags); unmap_single(dev, dma_addr, size, dir); - local_irq_restore(flags); + raw_local_irq_restore(flags); } int @@ -450,7 +450,7 @@ dma_map_sg(struct device *dev, struct sc BUG_ON(dir == DMA_NONE); - local_irq_save(flags); + raw_local_irq_save(flags); for (i = 0; i < nents; i++, sg++) { struct page *page = sg->page; @@ -462,7 +462,7 @@ dma_map_sg(struct device *dev, struct sc map_single(dev, ptr, length, dir); } - local_irq_restore(flags); + raw_local_irq_restore(flags); return nents; } @@ -479,7 +479,7 @@ dma_unmap_sg(struct device *dev, struct BUG_ON(dir == DMA_NONE); - local_irq_save(flags); + raw_local_irq_save(flags); for (i = 0; i < nents; i++, sg++) { dma_addr_t dma_addr = sg->dma_address; @@ -488,7 +488,7 @@ dma_unmap_sg(struct device *dev, struct unmap_single(dev, dma_addr, length, dir); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } void @@ -500,11 +500,11 @@ dma_sync_single_for_cpu(struct device *d dev_dbg(dev, "%s(ptr=%p,size=%d,dir=%x)\n", __func__, (void *) dma_addr, size, dir); - local_irq_save(flags); + raw_local_irq_save(flags); sync_single(dev, dma_addr, size, dir); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void @@ -516,11 +516,11 @@ dma_sync_single_for_device(struct device dev_dbg(dev, "%s(ptr=%p,size=%d,dir=%x)\n", __func__, (void *) dma_addr, size, dir); - local_irq_save(flags); + raw_local_irq_save(flags); sync_single(dev, dma_addr, size, dir); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void @@ -535,7 +535,7 @@ dma_sync_sg_for_cpu(struct device *dev, BUG_ON(dir == DMA_NONE); - local_irq_save(flags); + raw_local_irq_save(flags); for (i = 0; i < nents; i++, sg++) { dma_addr_t dma_addr = sg->dma_address; @@ -544,7 +544,7 @@ dma_sync_sg_for_cpu(struct device *dev, sync_single(dev, dma_addr, length, dir); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } void @@ -559,7 +559,7 @@ dma_sync_sg_for_device(struct device *de BUG_ON(dir == DMA_NONE); - local_irq_save(flags); + raw_local_irq_save(flags); for (i = 0; i < nents; i++, sg++) { dma_addr_t dma_addr = sg->dma_address; @@ -568,7 +568,7 @@ dma_sync_sg_for_device(struct device *de sync_single(dev, dma_addr, length, dir); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } static int Index: linux.prev/arch/arm/common/locomo.c =================================================================== --- linux.prev.orig/arch/arm/common/locomo.c +++ linux.prev/arch/arm/common/locomo.c @@ -425,6 +425,12 @@ static struct irqchip locomo_spi_chip = .unmask = locomo_spi_unmask_irq, }; +static DEFINE_IRQ_CHAINED_TYPE(locomo_handler); +static DEFINE_IRQ_CHAINED_TYPE(locomo_key_handler); +static DEFINE_IRQ_CHAINED_TYPE(locomo_gpio_handler); +static DEFINE_IRQ_CHAINED_TYPE(locomo_lt_handler); +static DEFINE_IRQ_CHAINED_TYPE(locomo_spi_handler); + static void locomo_setup_irq(struct locomo *lchip) { int irq; Index: linux.prev/arch/arm/common/sa1111.c =================================================================== --- linux.prev.orig/arch/arm/common/sa1111.c +++ linux.prev/arch/arm/common/sa1111.c @@ -171,11 +171,11 @@ sa1111_irq_handler(unsigned int irq, str for (i = IRQ_SA1111_START; stat0; i++, stat0 >>= 1) if (stat0 & 1) - do_edge_IRQ(i, irq_desc + i, regs); + handle_edge_irq(i, irq_desc + i, regs); for (i = IRQ_SA1111_START + 32; stat1; i++, stat1 >>= 1) if (stat1 & 1) - do_edge_IRQ(i, irq_desc + i, regs); + handle_edge_irq(i, irq_desc + i, regs); /* For level-based interrupts */ desc->chip->unmask(irq); @@ -380,6 +380,8 @@ static struct irqchip sa1111_high_chip = .set_wake = sa1111_wake_highirq, }; +static DEFINE_IRQ_CHAINED_TYPE(sa1111_irq_handler); + static void sa1111_setup_irq(struct sa1111 *sachip) { void __iomem *irqbase = sachip->base + SA1111_INTC; Index: linux.prev/arch/arm/common/time-acorn.c =================================================================== --- linux.prev.orig/arch/arm/common/time-acorn.c +++ linux.prev/arch/arm/common/time-acorn.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -76,7 +77,7 @@ ioc_timer_interrupt(int irq, void *dev_i static struct irqaction ioc_timer_irq = { .name = "timer", - .flags = SA_INTERRUPT, + .flags = SA_INTERRUPT | SA_NODELAY, .handler = ioc_timer_interrupt }; Index: linux.prev/arch/arm/kernel/calls.S =================================================================== --- linux.prev.orig/arch/arm/kernel/calls.S +++ linux.prev/arch/arm/kernel/calls.S @@ -7,11 +7,8 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * - * This file is included twice in entry-common.S + * NR_syscalls now defined in include/asm-arm/unistd.h - tglx */ -#ifndef NR_syscalls -#define NR_syscalls 328 -#else __syscall_start: /* 0 */ .long sys_restart_syscall @@ -341,4 +338,3 @@ __syscall_end: .rept NR_syscalls - (__syscall_end - __syscall_start) / 4 .long sys_ni_syscall .endr -#endif Index: linux.prev/arch/arm/kernel/dma.c =================================================================== --- linux.prev.orig/arch/arm/kernel/dma.c +++ linux.prev/arch/arm/kernel/dma.c @@ -22,7 +22,7 @@ #include -DEFINE_SPINLOCK(dma_spin_lock); +DEFINE_RAW_SPINLOCK(dma_spin_lock); #if MAX_DMA_CHANNELS > 0 Index: linux.prev/arch/arm/kernel/ecard.c =================================================================== --- linux.prev.orig/arch/arm/kernel/ecard.c +++ linux.prev/arch/arm/kernel/ecard.c @@ -619,7 +619,7 @@ ecard_irqexp_handler(unsigned int irq, s ecard_t *ec = slot_to_ecard(slot); if (ec->claimed) { - struct irqdesc *d = irqdesc + ec->irq; + struct irqdesc *d = irq_desc + ec->irq; /* * this ugly code is so that we can operate a * prioritorising system: @@ -1052,6 +1052,9 @@ ecard_probe(int slot, card_type_t type) return rc; } +static DEFINE_IRQ_CHAINED_TYPE(ecard_irqexp_handler); +static DEFINE_IRQ_CHAINED_TYPE(ecard_irq_handler); + /* * Initialise the expansion card system. * Locate all hardware - interrupt management and @@ -1081,8 +1084,10 @@ static int __init ecard_init(void) irqhw = ecard_probeirqhw(); - set_irq_chained_handler(IRQ_EXPANSIONCARD, - irqhw ? ecard_irqexp_handler : ecard_irq_handler); + if (irqhw) + set_irq_chained_handler(IRQ_EXPANSIONCARD, ecard_irqexp_handler); + else + set_irq_chained_handler(IRQ_EXPANSIONCARD, ecard_irq_handler); ecard_proc_init(); Index: linux.prev/arch/arm/kernel/entry-armv.S =================================================================== --- linux.prev.orig/arch/arm/kernel/entry-armv.S +++ linux.prev/arch/arm/kernel/entry-armv.S @@ -192,7 +192,7 @@ __irq_svc: irq_handler #ifdef CONFIG_PREEMPT ldr r0, [tsk, #TI_FLAGS] @ get flags - tst r0, #_TIF_NEED_RESCHED + tst r0, #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED blne svc_preempt preempt_return: ldr r0, [tsk, #TI_PREEMPT] @ read preempt value @@ -219,7 +219,7 @@ svc_preempt: str r7, [tsk, #TI_PREEMPT] @ expects preempt_count == 0 1: bl preempt_schedule_irq @ irq en/disable is done inside ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS - tst r0, #_TIF_NEED_RESCHED + tst r0, #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED beq preempt_return @ go again b 1b #endif Index: linux.prev/arch/arm/kernel/entry-common.S =================================================================== --- linux.prev.orig/arch/arm/kernel/entry-common.S +++ linux.prev/arch/arm/kernel/entry-common.S @@ -3,6 +3,8 @@ * * Copyright (C) 2000 Russell King * + * LATENCY_TRACE/mcount support (C) 2005 Timesys john.cooper@timesys.com + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. @@ -41,7 +43,7 @@ ret_fast_syscall: fast_work_pending: str r0, [sp, #S_R0+S_OFF]! @ returned r0 work_pending: - tst r1, #_TIF_NEED_RESCHED + tst r1, #_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED bne work_resched tst r1, #_TIF_NOTIFY_RESUME | _TIF_SIGPENDING beq no_work_pending @@ -51,7 +53,8 @@ work_pending: b ret_slow_syscall @ Check work again work_resched: - bl schedule + bl __schedule + /* * "slow" syscall return path. "why" tells us if this was a real syscall. */ @@ -87,8 +90,6 @@ ENTRY(ret_from_fork) b ret_slow_syscall -#include "calls.S" - /*============================================================================= * SWI handler *----------------------------------------------------------------------------- @@ -271,3 +272,110 @@ sys_mmap2: str r5, [sp, #4] b do_mmap2 #endif + +#ifdef CONFIG_FRAME_POINTER + +#ifdef CONFIG_MCOUNT +/* + * At the point where we are in mcount() we maintain the + * frame of the prologue code and keep the call to mcount() + * out of the stack frame list: + + saved pc <---\ caller of instrumented routine + saved lr | + ip/prev_sp | + fp -----^ | + : | + | + -> saved pc | instrumented routine + | saved lr | + | ip/prev_sp | + | fp ---------/ + | : + | + | mcount + | saved pc + | saved lr + | ip/prev sp + -- fp + r3 + r2 + r1 + sp-> r0 + : + */ + + .text + .align 0 + .type mcount %function + .global mcount + +/* gcc -pg generated FUNCTION_PROLOGUE references mcount() + * and has already created the stack frame invocation for + * the routine we have been called to instrument. We create + * a complete frame nevertheless, as we want to use the same + * call to mcount() from c code. + */ +mcount: + + ldr ip, =mcount_enabled @ leave early, if disabled + ldr ip, [ip] + cmp ip, #0 + moveq pc,lr + + mov ip, sp + stmdb sp!, {r0 - r3, fp, ip, lr, pc} @ create stack frame + + ldr r1, [fp, #-4] @ get lr (the return address + @ of the caller of the + @ instrumented function) + mov r0, lr @ get lr - (the return address + @ of the instrumented function) + + sub fp, ip, #4 @ point fp at this frame + + bl __trace +1: + ldmdb fp, {r0 - r3, fp, sp, pc} @ pop entry frame and return + +#endif + +/* ARM replacement for unsupported gcc __builtin_return_address(n) + * where 0 < n. n == 0 is supported here as well. + * + * Walk up the stack frame until the desired frame is found or a NULL + * fp is encountered, return NULL in the latter case. + * + * Note: it is possible under code optimization for the stack invocation + * of an ancestor function (level N) to be removed before calling a + * descendant function (level N+1). No easy means is available to deduce + * this scenario with the result being [for example] caller_addr(0) when + * called from level N+1 returning level N-1 rather than the expected + * level N. This optimization issue appears isolated to the case of + * a call to a level N+1 routine made at the tail end of a level N + * routine -- the level N frame is deleted and a simple branch is made + * to the level N+1 routine. + */ + + .text + .align 0 + .type arm_return_addr %function + .global arm_return_addr + +arm_return_addr: + mov ip, r0 + mov r0, fp +3: + cmp r0, #0 + beq 1f @ frame list hit end, bail + cmp ip, #0 + beq 2f @ reached desired frame + ldr r0, [r0, #-12] @ else continue, get next fp + sub ip, ip, #1 + b 3b +2: + ldr r0, [r0, #-4] @ get target return address +1: + mov pc, lr + +#endif Index: linux.prev/arch/arm/kernel/fiq.c =================================================================== --- linux.prev.orig/arch/arm/kernel/fiq.c +++ linux.prev/arch/arm/kernel/fiq.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -88,7 +89,7 @@ void set_fiq_handler(void *start, unsign * disable irqs for the duration. Note - these functions are almost * entirely coded in assembly. */ -void __attribute__((naked)) set_fiq_regs(struct pt_regs *regs) +void notrace __attribute__((naked)) set_fiq_regs(struct pt_regs *regs) { register unsigned long tmp; asm volatile ( @@ -106,7 +107,7 @@ void __attribute__((naked)) set_fiq_regs : "r" (®s->ARM_r8), "I" (PSR_I_BIT | PSR_F_BIT | FIQ_MODE)); } -void __attribute__((naked)) get_fiq_regs(struct pt_regs *regs) +void notrace __attribute__((naked)) get_fiq_regs(struct pt_regs *regs) { register unsigned long tmp; asm volatile ( Index: linux.prev/arch/arm/kernel/init_task.c =================================================================== --- linux.prev.orig/arch/arm/kernel/init_task.c +++ linux.prev/arch/arm/kernel/init_task.c @@ -12,8 +12,8 @@ #include #include -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; +static struct fs_struct init_fs = INIT_FS(init_fs); +static struct files_struct init_files = INIT_FILES(init_files); static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); Index: linux.prev/arch/arm/kernel/irq.c =================================================================== --- linux.prev.orig/arch/arm/kernel/irq.c +++ linux.prev/arch/arm/kernel/irq.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -38,193 +39,11 @@ #include #include -#include #include -#include #include -/* - * Maximum IRQ count. Currently, this is arbitary. However, it should - * not be set too low to prevent false triggering. Conversely, if it - * is set too high, then you could miss a stuck IRQ. - * - * Maybe we ought to set a timer and re-enable the IRQ at a later time? - */ -#define MAX_IRQ_CNT 100000 - -static int noirqdebug; -static volatile unsigned long irq_err_count; -static DEFINE_SPINLOCK(irq_controller_lock); -static LIST_HEAD(irq_pending); - -struct irqdesc irq_desc[NR_IRQS]; void (*init_arch_irq)(void) __initdata = NULL; -/* - * No architecture-specific irq_finish function defined in arm/arch/irqs.h. - */ -#ifndef irq_finish -#define irq_finish(irq) do { } while (0) -#endif - -/* - * Dummy mask/unmask handler - */ -void dummy_mask_unmask_irq(unsigned int irq) -{ -} - -irqreturn_t no_action(int irq, void *dev_id, struct pt_regs *regs) -{ - return IRQ_NONE; -} - -void do_bad_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) -{ - irq_err_count += 1; - printk(KERN_ERR "IRQ: spurious interrupt %d\n", irq); -} - -static struct irqchip bad_chip = { - .ack = dummy_mask_unmask_irq, - .mask = dummy_mask_unmask_irq, - .unmask = dummy_mask_unmask_irq, -}; - -static struct irqdesc bad_irq_desc = { - .chip = &bad_chip, - .handle = do_bad_IRQ, - .pend = LIST_HEAD_INIT(bad_irq_desc.pend), - .disable_depth = 1, -}; - -#ifdef CONFIG_SMP -void synchronize_irq(unsigned int irq) -{ - struct irqdesc *desc = irq_desc + irq; - - while (desc->running) - barrier(); -} -EXPORT_SYMBOL(synchronize_irq); - -#define smp_set_running(desc) do { desc->running = 1; } while (0) -#define smp_clear_running(desc) do { desc->running = 0; } while (0) -#else -#define smp_set_running(desc) do { } while (0) -#define smp_clear_running(desc) do { } while (0) -#endif - -/** - * disable_irq_nosync - disable an irq without waiting - * @irq: Interrupt to disable - * - * Disable the selected interrupt line. Enables and disables - * are nested. We do this lazily. - * - * This function may be called from IRQ context. - */ -void disable_irq_nosync(unsigned int irq) -{ - struct irqdesc *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&irq_controller_lock, flags); - desc->disable_depth++; - list_del_init(&desc->pend); - spin_unlock_irqrestore(&irq_controller_lock, flags); -} -EXPORT_SYMBOL(disable_irq_nosync); - -/** - * disable_irq - disable an irq and wait for completion - * @irq: Interrupt to disable - * - * Disable the selected interrupt line. Enables and disables - * are nested. This functions waits for any pending IRQ - * handlers for this interrupt to complete before returning. - * If you use this function while holding a resource the IRQ - * handler may need you will deadlock. - * - * This function may be called - with care - from IRQ context. - */ -void disable_irq(unsigned int irq) -{ - struct irqdesc *desc = irq_desc + irq; - - disable_irq_nosync(irq); - if (desc->action) - synchronize_irq(irq); -} -EXPORT_SYMBOL(disable_irq); - -/** - * enable_irq - enable interrupt handling on an irq - * @irq: Interrupt to enable - * - * Re-enables the processing of interrupts on this IRQ line. - * Note that this may call the interrupt handler, so you may - * get unexpected results if you hold IRQs disabled. - * - * This function may be called from IRQ context. - */ -void enable_irq(unsigned int irq) -{ - struct irqdesc *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&irq_controller_lock, flags); - if (unlikely(!desc->disable_depth)) { - printk("enable_irq(%u) unbalanced from %p\n", irq, - __builtin_return_address(0)); - } else if (!--desc->disable_depth) { - desc->probing = 0; - desc->chip->unmask(irq); - - /* - * If the interrupt is waiting to be processed, - * try to re-run it. We can't directly run it - * from here since the caller might be in an - * interrupt-protected region. - */ - if (desc->pending && list_empty(&desc->pend)) { - desc->pending = 0; - if (!desc->chip->retrigger || - desc->chip->retrigger(irq)) - list_add(&desc->pend, &irq_pending); - } - } - spin_unlock_irqrestore(&irq_controller_lock, flags); -} -EXPORT_SYMBOL(enable_irq); - -/* - * Enable wake on selected irq - */ -void enable_irq_wake(unsigned int irq) -{ - struct irqdesc *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&irq_controller_lock, flags); - if (desc->chip->set_wake) - desc->chip->set_wake(irq, 1); - spin_unlock_irqrestore(&irq_controller_lock, flags); -} -EXPORT_SYMBOL(enable_irq_wake); - -void disable_irq_wake(unsigned int irq) -{ - struct irqdesc *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&irq_controller_lock, flags); - if (desc->chip->set_wake) - desc->chip->set_wake(irq, 0); - spin_unlock_irqrestore(&irq_controller_lock, flags); -} -EXPORT_SYMBOL(disable_irq_wake); - int show_interrupts(struct seq_file *p, void *v) { int i = *(loff_t *) v, cpu; @@ -243,7 +62,7 @@ int show_interrupts(struct seq_file *p, } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_controller_lock, flags); + spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto unlock; @@ -257,7 +76,7 @@ int show_interrupts(struct seq_file *p, seq_putc(p, '\n'); unlock: - spin_unlock_irqrestore(&irq_controller_lock, flags); + spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { #ifdef CONFIG_ARCH_ACORN show_fiq_list(p, v); @@ -266,374 +85,83 @@ unlock: show_ipi_list(p); show_local_irqs(p); #endif +#ifdef FIXME_TGLX seq_printf(p, "Err: %10lu\n", irq_err_count); - } - return 0; -} - -/* - * IRQ lock detection. - * - * Hopefully, this should get us out of a few locked situations. - * However, it may take a while for this to happen, since we need - * a large number if IRQs to appear in the same jiffie with the - * same instruction pointer (or within 2 instructions). - */ -static int check_irq_lock(struct irqdesc *desc, int irq, struct pt_regs *regs) -{ - unsigned long instr_ptr = instruction_pointer(regs); - - if (desc->lck_jif == jiffies && - desc->lck_pc >= instr_ptr && desc->lck_pc < instr_ptr + 8) { - desc->lck_cnt += 1; - - if (desc->lck_cnt > MAX_IRQ_CNT) { - printk(KERN_ERR "IRQ LOCK: IRQ%d is locking the system, disabled\n", irq); - return 1; - } - } else { - desc->lck_cnt = 0; - desc->lck_pc = instruction_pointer(regs); - desc->lck_jif = jiffies; - } - return 0; -} - -static void -report_bad_irq(unsigned int irq, struct pt_regs *regs, struct irqdesc *desc, int ret) -{ - static int count = 100; - struct irqaction *action; - - if (!count || noirqdebug) - return; - - count--; - - if (ret != IRQ_HANDLED && ret != IRQ_NONE) { - printk("irq%u: bogus retval mask %x\n", irq, ret); - } else { - printk("irq%u: nobody cared\n", irq); - } - show_regs(regs); - dump_stack(); - printk(KERN_ERR "handlers:"); - action = desc->action; - do { - printk("\n" KERN_ERR "[<%p>]", action->handler); - print_symbol(" (%s)", (unsigned long)action->handler); - action = action->next; - } while (action); - printk("\n"); -} - -static int -__do_irq(unsigned int irq, struct irqaction *action, struct pt_regs *regs) -{ - unsigned int status; - int ret, retval = 0; - - spin_unlock(&irq_controller_lock); - -#ifdef CONFIG_NO_IDLE_HZ - if (!(action->flags & SA_TIMER) && system_timer->dyn_tick != NULL) { - write_seqlock(&xtime_lock); - if (system_timer->dyn_tick->state & DYN_TICK_ENABLED) - system_timer->dyn_tick->handler(irq, 0, regs); - write_sequnlock(&xtime_lock); - } #endif - - if (!(action->flags & SA_INTERRUPT)) - local_irq_enable(); - - status = 0; - do { - ret = action->handler(irq, action->dev_id, regs); - if (ret == IRQ_HANDLED) - status |= action->flags; - retval |= ret; - action = action->next; - } while (action); - - if (status & SA_SAMPLE_RANDOM) - add_interrupt_randomness(irq); - - spin_lock_irq(&irq_controller_lock); - - return retval; -} - -/* - * This is for software-decoded IRQs. The caller is expected to - * handle the ack, clear, mask and unmask issues. - */ -void -do_simple_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) -{ - struct irqaction *action; - const unsigned int cpu = smp_processor_id(); - - desc->triggered = 1; - - kstat_cpu(cpu).irqs[irq]++; - - smp_set_running(desc); - - action = desc->action; - if (action) { - int ret = __do_irq(irq, action, regs); - if (ret != IRQ_HANDLED) - report_bad_irq(irq, regs, desc, ret); - } - - smp_clear_running(desc); -} - -/* - * Most edge-triggered IRQ implementations seem to take a broken - * approach to this. Hence the complexity. - */ -void -do_edge_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) -{ - const unsigned int cpu = smp_processor_id(); - - desc->triggered = 1; - - /* - * If we're currently running this IRQ, or its disabled, - * we shouldn't process the IRQ. Instead, turn on the - * hardware masks. - */ - if (unlikely(desc->running || desc->disable_depth)) - goto running; - - /* - * Acknowledge and clear the IRQ, but don't mask it. - */ - desc->chip->ack(irq); - - /* - * Mark the IRQ currently in progress. - */ - desc->running = 1; - - kstat_cpu(cpu).irqs[irq]++; - - do { - struct irqaction *action; - - action = desc->action; - if (!action) - break; - - if (desc->pending && !desc->disable_depth) { - desc->pending = 0; - desc->chip->unmask(irq); - } - - __do_irq(irq, action, regs); - } while (desc->pending && !desc->disable_depth); - - desc->running = 0; - - /* - * If we were disabled or freed, shut down the handler. - */ - if (likely(desc->action && !check_irq_lock(desc, irq, regs))) - return; - - running: - /* - * We got another IRQ while this one was masked or - * currently running. Delay it. - */ - desc->pending = 1; - desc->chip->mask(irq); - desc->chip->ack(irq); -} - -/* - * Level-based IRQ handler. Nice and simple. - */ -void -do_level_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs) -{ - struct irqaction *action; - const unsigned int cpu = smp_processor_id(); - - desc->triggered = 1; - - /* - * Acknowledge, clear _AND_ disable the interrupt. - */ - desc->chip->ack(irq); - - if (likely(!desc->disable_depth)) { - kstat_cpu(cpu).irqs[irq]++; - - smp_set_running(desc); - - /* - * Return with this interrupt masked if no action - */ - action = desc->action; - if (action) { - int ret = __do_irq(irq, desc->action, regs); - - if (ret != IRQ_HANDLED) - report_bad_irq(irq, regs, desc, ret); - - if (likely(!desc->disable_depth && - !check_irq_lock(desc, irq, regs))) - desc->chip->unmask(irq); - } - - smp_clear_running(desc); } + return 0; } -static void do_pending_irqs(struct pt_regs *regs) -{ - struct list_head head, *l, *n; - - do { - struct irqdesc *desc; - - /* - * First, take the pending interrupts off the list. - * The act of calling the handlers may add some IRQs - * back onto the list. - */ - head = irq_pending; - INIT_LIST_HEAD(&irq_pending); - head.next->prev = &head; - head.prev->next = &head; - - /* - * Now run each entry. We must delete it from our - * list before calling the handler. - */ - list_for_each_safe(l, n, &head) { - desc = list_entry(l, struct irqdesc, pend); - list_del_init(&desc->pend); - desc_handle_irq(desc - irq_desc, desc, regs); - } - - /* - * The list must be empty. - */ - BUG_ON(!list_empty(&head)); - } while (!list_empty(&irq_pending)); -} +/* Handle bad interrupts */ +static struct irq_desc bad_irq = { + .handler = &no_irq_type, + .lock = RAW_SPIN_LOCK_UNLOCKED +}; /* - * do_IRQ handles all hardware IRQ's. Decoded IRQs should not + * asm_do_IRQ handles all hardware IRQ's. Decoded IRQs should not * come via this function. Instead, they should provide their * own 'handler' */ -asmlinkage void asm_do_IRQ(unsigned int irq, struct pt_regs *regs) +asmlinkage notrace void asm_do_IRQ(unsigned int irq, struct pt_regs *regs) { struct irqdesc *desc = irq_desc + irq; + trace_special(instruction_pointer(regs), irq, 0); + /* * Some hardware gives randomly wrong interrupts. Rather * than crashing, do something sensible. */ if (irq >= NR_IRQS) - desc = &bad_irq_desc; + desc = &bad_irq; irq_enter(); - spin_lock(&irq_controller_lock); - desc_handle_irq(irq, desc, regs); - /* - * Now re-run any pending interrupts. - */ - if (!list_empty(&irq_pending)) - do_pending_irqs(regs); - - irq_finish(irq); + desc_handle_irq(irq, desc, regs); - spin_unlock(&irq_controller_lock); irq_exit(); } -void __set_irq_handler(unsigned int irq, irq_handler_t handle, int is_chained) +void __set_irq_handler(unsigned int irq, struct irq_type *type, int is_chained) { struct irqdesc *desc; unsigned long flags; if (irq >= NR_IRQS) { - printk(KERN_ERR "Trying to install handler for IRQ%d\n", irq); + printk(KERN_ERR "Trying to install type control for IRQ%d\n", irq); return; } - if (handle == NULL) - handle = do_bad_IRQ; - desc = irq_desc + irq; - if (is_chained && desc->chip == &bad_chip) - printk(KERN_WARNING "Trying to install chained handler for IRQ%d\n", irq); - - spin_lock_irqsave(&irq_controller_lock, flags); - if (handle == do_bad_IRQ) { - desc->chip->mask(irq); - desc->chip->ack(irq); - desc->disable_depth = 1; - } - desc->handle = handle; - if (handle != do_bad_IRQ && is_chained) { - desc->valid = 0; - desc->probe_ok = 0; - desc->disable_depth = 0; - desc->chip->unmask(irq); + /* Uninstall ? */ + if (type == NULL || type == &no_irq_type) { + spin_lock_irqsave(&desc->lock, flags); + if (desc->chip) { + desc->chip->mask(irq); + desc->chip->ack(irq); + } + desc->depth = 1; + spin_unlock_irqrestore(&desc->lock, flags); } - spin_unlock_irqrestore(&irq_controller_lock, flags); -} -void set_irq_chip(unsigned int irq, struct irqchip *chip) -{ - struct irqdesc *desc; - unsigned long flags; - - if (irq >= NR_IRQS) { - printk(KERN_ERR "Trying to install chip for IRQ%d\n", irq); + /* Install the irq_type */ + if (generic_set_irq_type(irq, type)) return; - } - - if (chip == NULL) - chip = &bad_chip; - - desc = irq_desc + irq; - spin_lock_irqsave(&irq_controller_lock, flags); - desc->chip = chip; - spin_unlock_irqrestore(&irq_controller_lock, flags); -} -int set_irq_type(unsigned int irq, unsigned int type) -{ - struct irqdesc *desc; - unsigned long flags; - int ret = -ENXIO; + spin_lock_irqsave(&desc->lock, flags); + if (is_chained && (desc->handler == &no_irq_type || !desc->chip)) + printk(KERN_WARNING "Trying to install chained interrupt type for IRQ%d\n", irq); - if (irq >= NR_IRQS) { - printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq); - return -ENODEV; - } - - desc = irq_desc + irq; - if (desc->chip->set_type) { - spin_lock_irqsave(&irq_controller_lock, flags); - ret = desc->chip->set_type(irq, type); - spin_unlock_irqrestore(&irq_controller_lock, flags); + if (type != NULL && is_chained) { + desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE; + desc->depth = 0; + if (desc->chip) + desc->chip->unmask(irq); } - - return ret; + spin_unlock_irqrestore(&desc->lock, flags); } -EXPORT_SYMBOL(set_irq_type); void set_irq_flags(unsigned int irq, unsigned int iflags) { @@ -646,400 +174,28 @@ void set_irq_flags(unsigned int irq, uns } desc = irq_desc + irq; - spin_lock_irqsave(&irq_controller_lock, flags); - desc->valid = (iflags & IRQF_VALID) != 0; - desc->probe_ok = (iflags & IRQF_PROBE) != 0; - desc->noautoenable = (iflags & IRQF_NOAUTOEN) != 0; - spin_unlock_irqrestore(&irq_controller_lock, flags); -} - -int setup_irq(unsigned int irq, struct irqaction *new) -{ - int shared = 0; - struct irqaction *old, **p; - unsigned long flags; - struct irqdesc *desc; - - /* - * Some drivers like serial.c use request_irq() heavily, - * so we have to be careful not to interfere with a - * running system. - */ - if (new->flags & SA_SAMPLE_RANDOM) { - /* - * This function might sleep, we want to call it first, - * outside of the atomic block. - * Yes, this might clear the entropy pool if the wrong - * driver is attempted to be loaded, without actually - * installing a new handler, but is this really a problem, - * only the sysadmin is able to do this. - */ - rand_initialize_irq(irq); - } - - /* - * The following block of code has to be executed atomically - */ - desc = irq_desc + irq; - spin_lock_irqsave(&irq_controller_lock, flags); - p = &desc->action; - if ((old = *p) != NULL) { - /* Can't share interrupts unless both agree to */ - if (!(old->flags & new->flags & SA_SHIRQ)) { - spin_unlock_irqrestore(&irq_controller_lock, flags); - return -EBUSY; - } - - /* add new interrupt at end of irq queue */ - do { - p = &old->next; - old = *p; - } while (old); - shared = 1; - } - - *p = new; - - if (!shared) { - desc->probing = 0; - desc->running = 0; - desc->pending = 0; - desc->disable_depth = 1; - if (!desc->noautoenable) { - desc->disable_depth = 0; - desc->chip->unmask(irq); - } - } - - spin_unlock_irqrestore(&irq_controller_lock, flags); - return 0; -} - -/** - * request_irq - allocate an interrupt line - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs - * @irqflags: Interrupt type flags - * @devname: An ascii name for the claiming device - * @dev_id: A cookie passed back to the handler function - * - * This call allocates interrupt resources and enables the - * interrupt line and IRQ handling. From the point this - * call is made your handler function may be invoked. Since - * your handler function must clear any interrupt the board - * raises, you must take care both to initialise your hardware - * and to set up the interrupt handler in the right order. - * - * Dev_id must be globally unique. Normally the address of the - * device data structure is used as the cookie. Since the handler - * receives this value it makes sense to use it. - * - * If your interrupt is shared you must pass a non NULL dev_id - * as this is required when freeing the interrupt. - * - * Flags: - * - * SA_SHIRQ Interrupt is shared - * - * SA_INTERRUPT Disable local interrupts while processing - * - * SA_SAMPLE_RANDOM The interrupt can be used for entropy - * - */ -int request_irq(unsigned int irq, irqreturn_t (*handler)(int, void *, struct pt_regs *), - unsigned long irq_flags, const char * devname, void *dev_id) -{ - unsigned long retval; - struct irqaction *action; - - if (irq >= NR_IRQS || !irq_desc[irq].valid || !handler || - (irq_flags & SA_SHIRQ && !dev_id)) - return -EINVAL; - - action = (struct irqaction *)kmalloc(sizeof(struct irqaction), GFP_KERNEL); - if (!action) - return -ENOMEM; - - action->handler = handler; - action->flags = irq_flags; - cpus_clear(action->mask); - action->name = devname; - action->next = NULL; - action->dev_id = dev_id; - - retval = setup_irq(irq, action); - - if (retval) - kfree(action); - return retval; -} - -EXPORT_SYMBOL(request_irq); - -/** - * free_irq - free an interrupt - * @irq: Interrupt line to free - * @dev_id: Device identity to free - * - * Remove an interrupt handler. The handler is removed and if the - * interrupt line is no longer in use by any driver it is disabled. - * On a shared IRQ the caller must ensure the interrupt is disabled - * on the card it drives before calling this function. - * - * This function must not be called from interrupt context. - */ -void free_irq(unsigned int irq, void *dev_id) -{ - struct irqaction * action, **p; - unsigned long flags; - - if (irq >= NR_IRQS || !irq_desc[irq].valid) { - printk(KERN_ERR "Trying to free IRQ%d\n",irq); - dump_stack(); - return; - } - - spin_lock_irqsave(&irq_controller_lock, flags); - for (p = &irq_desc[irq].action; (action = *p) != NULL; p = &action->next) { - if (action->dev_id != dev_id) - continue; - - /* Found it - now free it */ - *p = action->next; - break; - } - spin_unlock_irqrestore(&irq_controller_lock, flags); - - if (!action) { - printk(KERN_ERR "Trying to free free IRQ%d\n",irq); - dump_stack(); - } else { - synchronize_irq(irq); - kfree(action); - } -} - -EXPORT_SYMBOL(free_irq); - -static DECLARE_MUTEX(probe_sem); - -/* Start the interrupt probing. Unlike other architectures, - * we don't return a mask of interrupts from probe_irq_on, - * but return the number of interrupts enabled for the probe. - * The interrupts which have been enabled for probing is - * instead recorded in the irq_desc structure. - */ -unsigned long probe_irq_on(void) -{ - unsigned int i, irqs = 0; - unsigned long delay; - - down(&probe_sem); - - /* - * first snaffle up any unassigned but - * probe-able interrupts - */ - spin_lock_irq(&irq_controller_lock); - for (i = 0; i < NR_IRQS; i++) { - if (!irq_desc[i].probe_ok || irq_desc[i].action) - continue; - - irq_desc[i].probing = 1; - irq_desc[i].triggered = 0; - if (irq_desc[i].chip->set_type) - irq_desc[i].chip->set_type(i, IRQT_PROBE); - irq_desc[i].chip->unmask(i); - irqs += 1; - } - spin_unlock_irq(&irq_controller_lock); - - /* - * wait for spurious interrupts to mask themselves out again - */ - for (delay = jiffies + HZ/10; time_before(jiffies, delay); ) - /* min 100ms delay */; - - /* - * now filter out any obviously spurious interrupts - */ - spin_lock_irq(&irq_controller_lock); - for (i = 0; i < NR_IRQS; i++) { - if (irq_desc[i].probing && irq_desc[i].triggered) { - irq_desc[i].probing = 0; - irqs -= 1; - } - } - spin_unlock_irq(&irq_controller_lock); - - return irqs; -} - -EXPORT_SYMBOL(probe_irq_on); - -unsigned int probe_irq_mask(unsigned long irqs) -{ - unsigned int mask = 0, i; - - spin_lock_irq(&irq_controller_lock); - for (i = 0; i < 16 && i < NR_IRQS; i++) - if (irq_desc[i].probing && irq_desc[i].triggered) - mask |= 1 << i; - spin_unlock_irq(&irq_controller_lock); - - up(&probe_sem); - - return mask; -} -EXPORT_SYMBOL(probe_irq_mask); - -/* - * Possible return values: - * >= 0 - interrupt number - * -1 - no interrupt/many interrupts - */ -int probe_irq_off(unsigned long irqs) -{ - unsigned int i; - int irq_found = NO_IRQ; - - /* - * look at the interrupts, and find exactly one - * that we were probing has been triggered - */ - spin_lock_irq(&irq_controller_lock); - for (i = 0; i < NR_IRQS; i++) { - if (irq_desc[i].probing && - irq_desc[i].triggered) { - if (irq_found != NO_IRQ) { - irq_found = NO_IRQ; - goto out; - } - irq_found = i; - } - } - - if (irq_found == -1) - irq_found = NO_IRQ; -out: - spin_unlock_irq(&irq_controller_lock); - - up(&probe_sem); - - return irq_found; -} - -EXPORT_SYMBOL(probe_irq_off); - -#ifdef CONFIG_SMP -static void route_irq(struct irqdesc *desc, unsigned int irq, unsigned int cpu) -{ - pr_debug("IRQ%u: moving from cpu%u to cpu%u\n", irq, desc->cpu, cpu); - - spin_lock_irq(&irq_controller_lock); - desc->cpu = cpu; - desc->chip->set_cpu(desc, irq, cpu); - spin_unlock_irq(&irq_controller_lock); -} - -#ifdef CONFIG_PROC_FS -static int -irq_affinity_read_proc(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct irqdesc *desc = irq_desc + ((int)data); - int len = cpumask_scnprintf(page, count, desc->affinity); - - if (count - len < 2) - return -EINVAL; - page[len++] = '\n'; - page[len] = '\0'; - - return len; -} - -static int -irq_affinity_write_proc(struct file *file, const char __user *buffer, - unsigned long count, void *data) -{ - unsigned int irq = (unsigned int)data; - struct irqdesc *desc = irq_desc + irq; - cpumask_t affinity, tmp; - int ret = -EIO; - - if (!desc->chip->set_cpu) - goto out; - - ret = cpumask_parse(buffer, count, affinity); - if (ret) - goto out; - - cpus_and(tmp, affinity, cpu_online_map); - if (cpus_empty(tmp)) { - ret = -EINVAL; - goto out; - } - - desc->affinity = affinity; - route_irq(desc, irq, first_cpu(tmp)); - ret = count; - - out: - return ret; -} -#endif -#endif - -void __init init_irq_proc(void) -{ -#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS) - struct proc_dir_entry *dir; - int irq; - - dir = proc_mkdir("irq", NULL); - if (!dir) - return; - - for (irq = 0; irq < NR_IRQS; irq++) { - struct proc_dir_entry *entry; - struct irqdesc *desc; - char name[16]; - - desc = irq_desc + irq; - memset(name, 0, sizeof(name)); - snprintf(name, sizeof(name) - 1, "%u", irq); - - desc->procdir = proc_mkdir(name, dir); - if (!desc->procdir) - continue; - - entry = create_proc_entry("smp_affinity", 0600, desc->procdir); - if (entry) { - entry->nlink = 1; - entry->data = (void *)irq; - entry->read_proc = irq_affinity_read_proc; - entry->write_proc = irq_affinity_write_proc; - } - } -#endif + spin_lock_irqsave(&desc->lock, flags); + desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE; + if (iflags & IRQF_VALID) + desc->status &= ~IRQ_NOREQUEST; + if (iflags & IRQF_PROBE) + desc->status &= ~IRQ_NOPROBE; + spin_unlock_irqrestore(&desc->lock, flags); } void __init init_IRQ(void) { - struct irqdesc *desc; extern void init_dma(void); int irq; + for (irq = 0; irq < NR_IRQS; irq++) + irq_desc[irq].status |= IRQ_NOREQUEST; + #ifdef CONFIG_SMP bad_irq_desc.affinity = CPU_MASK_ALL; bad_irq_desc.cpu = smp_processor_id(); #endif - for (irq = 0, desc = irq_desc; irq < NR_IRQS; irq++, desc++) { - *desc = bad_irq_desc; - INIT_LIST_HEAD(&desc->pend); - } - init_arch_irq(); init_dma(); } Index: linux.prev/arch/arm/kernel/process.c =================================================================== --- linux.prev.orig/arch/arm/kernel/process.c +++ linux.prev/arch/arm/kernel/process.c @@ -89,12 +89,12 @@ void default_idle(void) if (hlt_counter) cpu_relax(); else { - local_irq_disable(); + raw_local_irq_disable(); if (!need_resched()) { timer_dyn_reprogram(); arch_idle(); } - local_irq_enable(); + raw_local_irq_enable(); } } @@ -124,8 +124,8 @@ void cpu_idle(void) while (!need_resched()) idle(); leds_event(led_idle_end); - preempt_enable_no_resched(); - schedule(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); } } Index: linux.prev/arch/arm/kernel/semaphore.c =================================================================== --- linux.prev.orig/arch/arm/kernel/semaphore.c +++ linux.prev/arch/arm/kernel/semaphore.c @@ -49,14 +49,16 @@ * we cannot lose wakeup events. */ -void __up(struct semaphore *sem) +fastcall void __attribute_used__ __compat_up(struct compat_semaphore *sem) { wake_up(&sem->wait); } +EXPORT_SYMBOL(__compat_up); + static DEFINE_SPINLOCK(semaphore_lock); -void __sched __down(struct semaphore * sem) +fastcall void __attribute_used__ __sched __compat_down(struct compat_semaphore * sem) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); @@ -89,7 +91,9 @@ void __sched __down(struct semaphore * s wake_up(&sem->wait); } -int __sched __down_interruptible(struct semaphore * sem) +EXPORT_SYMBOL(__compat_down); + +fastcall int __attribute_used__ __sched __compat_down_interruptible(struct compat_semaphore * sem) { int retval = 0; struct task_struct *tsk = current; @@ -140,6 +144,8 @@ int __sched __down_interruptible(struct return retval; } +EXPORT_SYMBOL(__compat_down_interruptible); + /* * Trylock failed - make sure we correct for * having decremented the count. @@ -148,7 +154,7 @@ int __sched __down_interruptible(struct * single "cmpxchg" without failure cases, * but then it wouldn't work on a 386. */ -int __down_trylock(struct semaphore * sem) +fastcall int __attribute_used__ __compat_down_trylock(struct compat_semaphore * sem) { int sleepers; unsigned long flags; @@ -168,6 +174,15 @@ int __down_trylock(struct semaphore * se return 1; } +EXPORT_SYMBOL(__compat_down_trylock); + +fastcall int compat_sem_is_locked(struct compat_semaphore *sem) +{ + return (int) atomic_read(&sem->count) < 0; +} + +EXPORT_SYMBOL(compat_sem_is_locked); + /* * The semaphore operations have a special calling sequence that * allow us to do a simpler in-line version of them. These routines @@ -184,7 +199,7 @@ asm(" .section .sched.text,\"ax\",%progb __down_failed: \n\ stmfd sp!, {r0 - r3, lr} \n\ mov r0, ip \n\ - bl __down \n\ + bl __compat_down \n\ ldmfd sp!, {r0 - r3, pc} \n\ \n\ .align 5 \n\ @@ -192,7 +207,7 @@ __down_failed: \n\ __down_interruptible_failed: \n\ stmfd sp!, {r0 - r3, lr} \n\ mov r0, ip \n\ - bl __down_interruptible \n\ + bl __compat_down_interruptible \n\ mov ip, r0 \n\ ldmfd sp!, {r0 - r3, pc} \n\ \n\ @@ -201,7 +216,7 @@ __down_interruptible_failed: \n\ __down_trylock_failed: \n\ stmfd sp!, {r0 - r3, lr} \n\ mov r0, ip \n\ - bl __down_trylock \n\ + bl __compat_down_trylock \n\ mov ip, r0 \n\ ldmfd sp!, {r0 - r3, pc} \n\ \n\ @@ -210,7 +225,7 @@ __down_trylock_failed: \n\ __up_wakeup: \n\ stmfd sp!, {r0 - r3, lr} \n\ mov r0, ip \n\ - bl __up \n\ + bl __compat_up \n\ ldmfd sp!, {r0 - r3, pc} \n\ "); Index: linux.prev/arch/arm/kernel/signal.c =================================================================== --- linux.prev.orig/arch/arm/kernel/signal.c +++ linux.prev/arch/arm/kernel/signal.c @@ -628,6 +628,14 @@ static int do_signal(sigset_t *oldset, s siginfo_t info; int signr; +#ifdef CONFIG_PREEMPT_RT + /* + * Fully-preemptible kernel does not need interrupts disabled: + */ + raw_local_irq_enable(); + preempt_check_resched(); +#endif + /* * We want the common case to go fast, which * is why we may in certain cases get here from Index: linux.prev/arch/arm/kernel/smp.c =================================================================== --- linux.prev.orig/arch/arm/kernel/smp.c +++ linux.prev/arch/arm/kernel/smp.c @@ -56,6 +56,7 @@ struct ipi_data { unsigned long bits; }; +/* FIXME */ static DEFINE_PER_CPU(struct ipi_data, ipi_data) = { .lock = SPIN_LOCK_UNLOCKED, }; @@ -348,7 +349,7 @@ static void send_ipi_message(cpumask_t c unsigned long flags; unsigned int cpu; - local_irq_save(flags); + raw_local_irq_save(flags); for_each_cpu_mask(cpu, callmap) { struct ipi_data *ipi = &per_cpu(ipi_data, cpu); @@ -363,7 +364,7 @@ static void send_ipi_message(cpumask_t c */ smp_cross_call(callmap); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -520,7 +521,7 @@ static void ipi_call_function(unsigned i cpu_clear(cpu, data->unfinished); } -static DEFINE_SPINLOCK(stop_lock); +static DEFINE_RAW_SPINLOCK(stop_lock); /* * ipi_cpu_stop - handle IPI from smp_send_stop() @@ -535,7 +536,7 @@ static void ipi_cpu_stop(unsigned int cp cpu_clear(cpu, cpu_online_map); local_fiq_disable(); - local_irq_disable(); + raw_local_irq_disable(); while (1) cpu_relax(); Index: linux.prev/arch/arm/kernel/traps.c =================================================================== --- linux.prev.orig/arch/arm/kernel/traps.c +++ linux.prev/arch/arm/kernel/traps.c @@ -177,6 +177,8 @@ void dump_stack(void) { #ifdef CONFIG_DEBUG_ERRORS __backtrace(); + print_traces(current); + show_held_locks(current); #endif } @@ -217,7 +219,7 @@ static void __die(const char *str, int e } } -DEFINE_SPINLOCK(die_lock); +DEFINE_RAW_SPINLOCK(die_lock); /* * This function is protected against re-entrancy. @@ -249,7 +251,7 @@ void notify_die(const char *str, struct } static LIST_HEAD(undef_hook); -static DEFINE_SPINLOCK(undef_lock); +static DEFINE_RAW_SPINLOCK(undef_lock); void register_undef_hook(struct undef_hook *hook) { @@ -341,7 +343,7 @@ asmlinkage void bad_mode(struct pt_regs handler[reason], processor_modes[proc_mode]); die("Oops - bad mode", regs, 0); - local_irq_disable(); + raw_local_irq_disable(); panic("bad mode"); } Index: linux.prev/arch/arm/mach-clps711x/p720t-leds.c =================================================================== --- linux.prev.orig/arch/arm/mach-clps711x/p720t-leds.c +++ linux.prev/arch/arm/mach-clps711x/p720t-leds.c @@ -36,7 +36,7 @@ static void p720t_leds_event(led_event_t unsigned long flags; u32 pddr; - local_irq_save(flags); + raw_local_irq_save(flags); switch(ledevt) { case led_idle_start: break; @@ -53,7 +53,7 @@ static void p720t_leds_event(led_event_t break; } - local_irq_restore(flags); + raw_local_irq_restore(flags); } static int __init leds_init(void) Index: linux.prev/arch/arm/mach-clps711x/time.c =================================================================== --- linux.prev.orig/arch/arm/mach-clps711x/time.c +++ linux.prev/arch/arm/mach-clps711x/time.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include Index: linux.prev/arch/arm/mach-clps7500/core.c =================================================================== --- linux.prev.orig/arch/arm/mach-clps7500/core.c +++ linux.prev/arch/arm/mach-clps7500/core.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include Index: linux.prev/arch/arm/mach-ebsa110/core.c =================================================================== --- linux.prev.orig/arch/arm/mach-ebsa110/core.c +++ linux.prev/arch/arm/mach-ebsa110/core.c @@ -56,14 +56,14 @@ static void __init ebsa110_init_irq(void unsigned long flags; unsigned int irq; - local_irq_save(flags); + raw_local_irq_save(flags); __raw_writeb(0xff, IRQ_MCLR); __raw_writeb(0x55, IRQ_MSET); __raw_writeb(0x00, IRQ_MSET); if (__raw_readb(IRQ_MASK) != 0x55) while (1); __raw_writeb(0xff, IRQ_MCLR); /* clear all interrupt enables */ - local_irq_restore(flags); + raw_local_irq_restore(flags); for (irq = 0; irq < NR_IRQS; irq++) { set_irq_chip(irq, &ebsa110_irq_chip); Index: linux.prev/arch/arm/mach-footbridge/dc21285-timer.c =================================================================== --- linux.prev.orig/arch/arm/mach-footbridge/dc21285-timer.c +++ linux.prev/arch/arm/mach-footbridge/dc21285-timer.c @@ -6,6 +6,7 @@ */ #include #include +#include #include Index: linux.prev/arch/arm/mach-footbridge/isa-irq.c =================================================================== --- linux.prev.orig/arch/arm/mach-footbridge/isa-irq.c +++ linux.prev/arch/arm/mach-footbridge/isa-irq.c @@ -102,6 +102,17 @@ static struct irqaction irq_cascade = { static struct resource pic1_resource = { "pic1", 0x20, 0x3f }; static struct resource pic2_resource = { "pic2", 0xa0, 0xbf }; +static DEFINE_IRQ_CHAINED_TYPE(isa_irq_handler); + +static unsigned int startup_irq_disabled(unsigned int irq) +{ + return 0; +} + +/* Interrupt type for irqs which must not be + * automatically enabled in reqeust_irq */ +static struct irq_type level_type_nostart; + void __init isa_init_irq(unsigned int host_irq) { unsigned int irq; @@ -159,9 +170,11 @@ void __init isa_init_irq(unsigned int ho * There appears to be a missing pull-up * resistor on this line. */ - if (machine_is_netwinder()) - set_irq_flags(_ISA_IRQ(11), IRQF_VALID | - IRQF_PROBE | IRQF_NOAUTOEN); + if (machine_is_netwinder()) { + level_type_nostart = default_level_type; + level_type_nostart.startup = startup_irq_disabled; + set_irq_handler(_ISA_IRQ(11), &level_type_nostart); + } } } Index: linux.prev/arch/arm/mach-footbridge/isa-timer.c =================================================================== --- linux.prev.orig/arch/arm/mach-footbridge/isa-timer.c +++ linux.prev/arch/arm/mach-footbridge/isa-timer.c @@ -6,6 +6,7 @@ */ #include #include +#include #include #include Index: linux.prev/arch/arm/mach-footbridge/netwinder-hw.c =================================================================== --- linux.prev.orig/arch/arm/mach-footbridge/netwinder-hw.c +++ linux.prev/arch/arm/mach-footbridge/netwinder-hw.c @@ -68,7 +68,7 @@ static inline void wb977_ww(int reg, int /* * This is a lock for accessing ports GP1_IO_BASE and GP2_IO_BASE */ -DEFINE_SPINLOCK(gpio_lock); +DEFINE_RAW_SPINLOCK(gpio_lock); static unsigned int current_gpio_op; static unsigned int current_gpio_io; Index: linux.prev/arch/arm/mach-footbridge/netwinder-leds.c =================================================================== --- linux.prev.orig/arch/arm/mach-footbridge/netwinder-leds.c +++ linux.prev/arch/arm/mach-footbridge/netwinder-leds.c @@ -33,7 +33,7 @@ static char led_state; static char hw_led_state; static DEFINE_SPINLOCK(leds_lock); -extern spinlock_t gpio_lock; +extern raw_spinlock_t gpio_lock; static void netwinder_leds_event(led_event_t evt) { Index: linux.prev/arch/arm/mach-h720x/common.c =================================================================== --- linux.prev.orig/arch/arm/mach-h720x/common.c +++ linux.prev/arch/arm/mach-h720x/common.c @@ -163,6 +163,11 @@ h720x_gpiod_demux_handler(unsigned int i h720x_gpio_handler(mask, irq, desc, regs); } +static DEFINE_IRQ_CHAINED_TYPE(h720x_gpioa_demux_handler); +static DEFINE_IRQ_CHAINED_TYPE(h720x_gpiob_demux_handler); +static DEFINE_IRQ_CHAINED_TYPE(h720x_gpioc_demux_handler); +static DEFINE_IRQ_CHAINED_TYPE(h720x_gpiod_demux_handler); + #ifdef CONFIG_CPU_H7202 static void h720x_gpioe_demux_handler(unsigned int irq_unused, struct irqdesc *desc, @@ -175,6 +180,7 @@ h720x_gpioe_demux_handler(unsigned int i IRQDBG("%s mask: 0x%08x irq: %d\n",__FUNCTION__,mask,irq); h720x_gpio_handler(mask, irq, desc, regs); } +static DEFINE_IRQ_CHAINED_TYPE(h720x_gpioe_demux_handler); #endif static struct irqchip h720x_global_chip = { Index: linux.prev/arch/arm/mach-h720x/cpu-h7202.c =================================================================== --- linux.prev.orig/arch/arm/mach-h720x/cpu-h7202.c +++ linux.prev/arch/arm/mach-h720x/cpu-h7202.c @@ -175,6 +175,8 @@ static struct irqaction h7202_timer_irq .handler = h7202_timer_interrupt, }; +static DEFINE_IRQ_CHAINED_TYPE(h7202_timerx_demux_handler); + /* * Setup TIMER0 as system timer */ Index: linux.prev/arch/arm/mach-imx/dma.c =================================================================== --- linux.prev.orig/arch/arm/mach-imx/dma.c +++ linux.prev/arch/arm/mach-imx/dma.c @@ -43,7 +43,7 @@ imx_request_dma(char *name, imx_dma_prio if (!name || !irq_handler) return -EINVAL; - local_irq_save(flags); + raw_local_irq_save(flags); /* try grabbing a DMA channel with the requested priority */ for (i = prio; i < prio + (prio == DMA_PRIO_LOW) ? 8 : 4; i++) { @@ -75,7 +75,7 @@ imx_request_dma(char *name, imx_dma_prio i = -ENODEV; } - local_irq_restore(flags); + raw_local_irq_restore(flags); return i; } @@ -91,10 +91,10 @@ imx_free_dma(int dma_ch) return; } - local_irq_save(flags); + raw_local_irq_save(flags); DIMR &= ~(1 << dma_ch); dma_channels[dma_ch].name = NULL; - local_irq_restore(flags); + raw_local_irq_restore(flags); } static irqreturn_t Index: linux.prev/arch/arm/mach-imx/irq.c =================================================================== --- linux.prev.orig/arch/arm/mach-imx/irq.c +++ linux.prev/arch/arm/mach-imx/irq.c @@ -217,6 +217,11 @@ static struct irqchip imx_gpio_chip = { .set_type = imx_gpio_irq_type, }; +static DEFINE_IRQ_CHAINED_TYPE(imx_gpioa_demux_handler); +static DEFINE_IRQ_CHAINED_TYPE(imx_gpiob_demux_handler); +static DEFINE_IRQ_CHAINED_TYPE(imx_gpioc_demux_handler); +static DEFINE_IRQ_CHAINED_TYPE(imx_gpiod_demux_handler); + void __init imx_init_irq(void) { Index: linux.prev/arch/arm/mach-imx/leds-mx1ads.c =================================================================== --- linux.prev.orig/arch/arm/mach-imx/leds-mx1ads.c +++ linux.prev/arch/arm/mach-imx/leds-mx1ads.c @@ -29,7 +29,7 @@ mx1ads_leds_event(led_event_t ledevt) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); switch (ledevt) { #ifdef CONFIG_LEDS_CPU @@ -49,5 +49,5 @@ mx1ads_leds_event(led_event_t ledevt) default: break; } - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux.prev/arch/arm/mach-imx/time.c =================================================================== --- linux.prev.orig/arch/arm/mach-imx/time.c +++ linux.prev/arch/arm/mach-imx/time.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include Index: linux.prev/arch/arm/mach-integrator/core.c =================================================================== --- linux.prev.orig/arch/arm/mach-integrator/core.c +++ linux.prev/arch/arm/mach-integrator/core.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -117,7 +118,7 @@ arch_initcall(integrator_init); #define CM_CTRL IO_ADDRESS(INTEGRATOR_HDR_BASE) + INTEGRATOR_HDR_CTRL_OFFSET -static DEFINE_SPINLOCK(cm_lock); +static DEFINE_RAW_SPINLOCK(cm_lock); /** * cm_control - update the CM_CTRL register. Index: linux.prev/arch/arm/mach-integrator/leds.c =================================================================== --- linux.prev.orig/arch/arm/mach-integrator/leds.c +++ linux.prev/arch/arm/mach-integrator/leds.c @@ -41,7 +41,7 @@ static void integrator_leds_event(led_ev unsigned int update_alpha_leds; // yup, change the LEDs - local_irq_save(flags); + raw_local_irq_save(flags); update_alpha_leds = 0; switch(ledevt) { @@ -76,7 +76,7 @@ static void integrator_leds_event(led_ev while (__raw_readl(dbg_base + INTEGRATOR_DBG_ALPHA_OFFSET) & 1); __raw_writel(saved_leds, dbg_base + INTEGRATOR_DBG_LEDS_OFFSET); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } static int __init leds_init(void) Index: linux.prev/arch/arm/mach-integrator/pci_v3.c =================================================================== --- linux.prev.orig/arch/arm/mach-integrator/pci_v3.c +++ linux.prev/arch/arm/mach-integrator/pci_v3.c @@ -163,7 +163,7 @@ * 7:2 register number * */ -static DEFINE_SPINLOCK(v3_lock); +static DEFINE_RAW_SPINLOCK(v3_lock); #define PCI_BUS_NONMEM_START 0x00000000 #define PCI_BUS_NONMEM_SIZE SZ_256M Index: linux.prev/arch/arm/mach-integrator/platsmp.c =================================================================== --- linux.prev.orig/arch/arm/mach-integrator/platsmp.c +++ linux.prev/arch/arm/mach-integrator/platsmp.c @@ -31,7 +31,7 @@ extern void integrator_secondary_startup volatile int __cpuinitdata pen_release = -1; unsigned long __cpuinitdata phys_pen_release = 0; -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); void __cpuinit platform_secondary_init(unsigned int cpu) { Index: linux.prev/arch/arm/mach-integrator/time.c =================================================================== --- linux.prev.orig/arch/arm/mach-integrator/time.c +++ linux.prev/arch/arm/mach-integrator/time.c @@ -96,7 +96,8 @@ static struct rtc_ops rtc_ops = { .set_alarm = rtc_set_alarm, }; -static irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t arm_rtc_interrupt(int irq, void *dev_id, + struct pt_regs *regs) { writel(0, rtc_base + RTC_EOI); return IRQ_HANDLED; @@ -124,7 +125,7 @@ static int rtc_probe(struct amba_device xtime.tv_sec = __raw_readl(rtc_base + RTC_DR); - ret = request_irq(dev->irq[0], rtc_interrupt, SA_INTERRUPT, + ret = request_irq(dev->irq[0], arm_rtc_interrupt, SA_INTERRUPT, "rtc-pl030", dev); if (ret) goto map_out; Index: linux.prev/arch/arm/mach-ixp2000/core.c =================================================================== --- linux.prev.orig/arch/arm/mach-ixp2000/core.c +++ linux.prev/arch/arm/mach-ixp2000/core.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -276,9 +277,9 @@ void gpio_line_config(int line, int dire { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); if (direction == GPIO_OUT) { - irq_desc[line + IRQ_IXP2000_GPIO0].valid = 0; + set_irq_flags(line + IRQ_IXP2000_GPIO0, 0); /* if it's an output, it ain't an interrupt anymore */ GPIO_IRQ_falling_edge &= ~(1 << line); @@ -291,7 +292,7 @@ void gpio_line_config(int line, int dire } else if (direction == GPIO_IN) { ixp2000_reg_wrb(IXP2000_GPIO_PDCR, 1 << line); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } @@ -344,8 +345,7 @@ static int ixp2000_GPIO_irq_type(unsigne /* * Finally, mark the corresponding IRQ as valid. */ - irq_desc[irq].valid = 1; - + set_irq_flags(irq, IRQF_VALID); return 0; } @@ -449,6 +449,8 @@ static struct irqchip ixp2000_irq_chip = .unmask = ixp2000_irq_unmask }; +static DEFINE_IRQ_CHAINED_TYPE(ixp2000_GPIO_irq_handler); + void __init ixp2000_init_irq(void) { int irq; Index: linux.prev/arch/arm/mach-ixp2000/ixdp2x00.c =================================================================== --- linux.prev.orig/arch/arm/mach-ixp2000/ixdp2x00.c +++ linux.prev/arch/arm/mach-ixp2000/ixdp2x00.c @@ -146,6 +146,8 @@ static struct irqchip ixdp2x00_cpld_irq_ .unmask = ixdp2x00_irq_unmask }; +static DEFINE_IRQ_CHAINED_TYPE(ixdp2x00_irq_handler); + void ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigned long *mask_reg, unsigned long nr_irqs) { unsigned int irq; @@ -168,7 +170,7 @@ void ixdp2x00_init_irq(volatile unsigned } /* Hook into PCI interrupt */ - set_irq_chained_handler(IRQ_IXP2000_PCIB, &ixdp2x00_irq_handler); + set_irq_chained_handler(IRQ_IXP2000_PCIB, ixdp2x00_irq_handler); } /************************************************************************* Index: linux.prev/arch/arm/mach-ixp2000/ixdp2x01.c =================================================================== --- linux.prev.orig/arch/arm/mach-ixp2000/ixdp2x01.c +++ linux.prev/arch/arm/mach-ixp2000/ixdp2x01.c @@ -95,6 +95,8 @@ static struct irqchip ixdp2x01_irq_chip .unmask = ixdp2x01_irq_unmask }; +static DEFINE_IRQ_CHAINED_TYPE(ixdp2x01_irq_handler); + /* * We only do anything if we are the master NPU on the board. * The slave NPU only has the ethernet chip going directly to @@ -127,7 +129,7 @@ void __init ixdp2x01_init_irq(void) } /* Hook into PCI interrupts */ - set_irq_chained_handler(IRQ_IXP2000_PCIB, &ixdp2x01_irq_handler); + set_irq_chained_handler(IRQ_IXP2000_PCIB, ixdp2x01_irq_handler); } Index: linux.prev/arch/arm/mach-ixp2000/pci.c =================================================================== --- linux.prev.orig/arch/arm/mach-ixp2000/pci.c +++ linux.prev/arch/arm/mach-ixp2000/pci.c @@ -145,7 +145,7 @@ int ixp2000_pci_abort_handler(unsigned l pci_master_aborts = 1; - local_irq_save(flags); + raw_local_irq_save(flags); temp = *(IXP2000_PCI_CONTROL); if (temp & ((1 << 8) | (1 << 5))) { ixp2000_reg_wrb(IXP2000_PCI_CONTROL, temp); @@ -158,7 +158,7 @@ int ixp2000_pci_abort_handler(unsigned l temp = *(IXP2000_PCI_CMDSTAT); } } - local_irq_restore(flags); + raw_local_irq_restore(flags); /* * If it was an imprecise abort, then we need to correct the @@ -176,7 +176,7 @@ clear_master_aborts(void) volatile u32 temp; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); temp = *(IXP2000_PCI_CONTROL); if (temp & ((1 << 8) | (1 << 5))) { ixp2000_reg_wrb(IXP2000_PCI_CONTROL, temp); @@ -189,7 +189,7 @@ clear_master_aborts(void) temp = *(IXP2000_PCI_CMDSTAT); } } - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } Index: linux.prev/arch/arm/mach-ixp4xx/common-pci.c =================================================================== --- linux.prev.orig/arch/arm/mach-ixp4xx/common-pci.c +++ linux.prev/arch/arm/mach-ixp4xx/common-pci.c @@ -53,7 +53,7 @@ unsigned long ixp4xx_pci_reg_base = 0; * these transactions are atomic or we will end up * with corrupt data on the bus or in a driver. */ -static DEFINE_SPINLOCK(ixp4xx_pci_lock); +static DEFINE_RAW_SPINLOCK(ixp4xx_pci_lock); /* * Read from PCI config space Index: linux.prev/arch/arm/mach-ixp4xx/coyote-pci.c =================================================================== --- linux.prev.orig/arch/arm/mach-ixp4xx/coyote-pci.c +++ linux.prev/arch/arm/mach-ixp4xx/coyote-pci.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include Index: linux.prev/arch/arm/mach-ixp4xx/ixdp425-pci.c =================================================================== --- linux.prev.orig/arch/arm/mach-ixp4xx/ixdp425-pci.c +++ linux.prev/arch/arm/mach-ixp4xx/ixdp425-pci.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include Index: linux.prev/arch/arm/mach-ixp4xx/ixdpg425-pci.c =================================================================== --- linux.prev.orig/arch/arm/mach-ixp4xx/ixdpg425-pci.c +++ linux.prev/arch/arm/mach-ixp4xx/ixdpg425-pci.c @@ -16,10 +16,10 @@ #include #include #include +#include #include #include -#include #include Index: linux.prev/arch/arm/mach-l7200/core.c =================================================================== --- linux.prev.orig/arch/arm/mach-l7200/core.c +++ linux.prev/arch/arm/mach-l7200/core.c @@ -7,6 +7,7 @@ */ #include #include +#include #include #include Index: linux.prev/arch/arm/mach-lh7a40x/arch-kev7a400.c =================================================================== --- linux.prev.orig/arch/arm/mach-lh7a40x/arch-kev7a400.c +++ linux.prev/arch/arm/mach-lh7a40x/arch-kev7a400.c @@ -81,6 +81,8 @@ static void kev7a400_cpld_handler (unsig } } +static DEFINE_IRQ_CHAINED_TYPE(kev7a400_cpld_handler); + void __init lh7a40x_init_board_irq (void) { int irq; Index: linux.prev/arch/arm/mach-lh7a40x/arch-lpd7a40x.c =================================================================== --- linux.prev.orig/arch/arm/mach-lh7a40x/arch-lpd7a40x.c +++ linux.prev/arch/arm/mach-lh7a40x/arch-lpd7a40x.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -173,6 +174,7 @@ static void lpd7a40x_cpld_handler (unsig desc->chip->unmask (irq); /* Level-triggered need this */ } +static DEFINE_IRQ_CHAINED_TYPE(lpd7a40x_cpld_handler); void __init lh7a40x_init_board_irq (void) { Index: linux.prev/arch/arm/mach-lh7a40x/irq-kev7a400.c =================================================================== --- linux.prev.orig/arch/arm/mach-lh7a40x/irq-kev7a400.c +++ linux.prev/arch/arm/mach-lh7a40x/irq-kev7a400.c @@ -60,6 +60,8 @@ lh7a400_cpld_handler (unsigned int irq, } } +static DEFINE_IRQ_CHAINED_TYPE(kev7a400_cpld_handler); + /* IRQ initialization */ void __init Index: linux.prev/arch/arm/mach-lh7a40x/irq-lpd7a40x.c =================================================================== --- linux.prev.orig/arch/arm/mach-lh7a40x/irq-lpd7a40x.c +++ linux.prev/arch/arm/mach-lh7a40x/irq-lpd7a40x.c @@ -71,6 +71,7 @@ static void lh7a40x_cpld_handler (unsign desc->chip->unmask (irq); /* Level-triggered need this */ } +static DEFINE_IRQ_CHAINED_TYPE(lh7a40x_cpld_handler); /* IRQ initialization */ Index: linux.prev/arch/arm/mach-lh7a40x/time.c =================================================================== --- linux.prev.orig/arch/arm/mach-lh7a40x/time.c +++ linux.prev/arch/arm/mach-lh7a40x/time.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include Index: linux.prev/arch/arm/mach-omap1/board-osk.c =================================================================== --- linux.prev.orig/arch/arm/mach-omap1/board-osk.c +++ linux.prev/arch/arm/mach-omap1/board-osk.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include Index: linux.prev/arch/arm/mach-omap1/fpga.c =================================================================== --- linux.prev.orig/arch/arm/mach-omap1/fpga.c +++ linux.prev/arch/arm/mach-omap1/fpga.c @@ -120,6 +120,8 @@ static struct irqchip omap_fpga_irq = { .unmask = fpga_unmask_irq, }; +static DEFINE_IRQ_CHAINED_TYPE(innovator_fpga_IRQ_demux); + /* * All of the FPGA interrupt request inputs except for the touchscreen are * edge-sensitive; the touchscreen is level-sensitive. The edge-sensitive Index: linux.prev/arch/arm/mach-omap1/leds-h2p2-debug.c =================================================================== --- linux.prev.orig/arch/arm/mach-omap1/leds-h2p2-debug.c +++ linux.prev/arch/arm/mach-omap1/leds-h2p2-debug.c @@ -45,7 +45,7 @@ void h2p2_dbg_leds_event(led_event_t evt static struct h2p2_dbg_fpga __iomem *fpga; static u16 led_state, hw_led_state; - local_irq_save(flags); + raw_local_irq_save(flags); if (!(led_state & LED_STATE_ENABLED) && evt != led_start) goto done; @@ -164,5 +164,5 @@ void h2p2_dbg_leds_event(led_event_t evt __raw_writew(~hw_led_state, &fpga->leds); done: - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux.prev/arch/arm/mach-omap1/serial.c =================================================================== --- linux.prev.orig/arch/arm/mach-omap1/serial.c +++ linux.prev/arch/arm/mach-omap1/serial.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include Index: linux.prev/arch/arm/mach-pxa/dma.c =================================================================== --- linux.prev.orig/arch/arm/mach-pxa/dma.c +++ linux.prev/arch/arm/mach-pxa/dma.c @@ -43,7 +43,7 @@ int pxa_request_dma (char *name, pxa_dma if (!name || !irq_handler) return -EINVAL; - local_irq_save(flags); + raw_local_irq_save(flags); /* try grabbing a DMA channel with the requested priority */ for (i = prio; i < prio + PXA_DMA_NBCH(prio); i++) { @@ -73,7 +73,7 @@ int pxa_request_dma (char *name, pxa_dma i = -ENODEV; } - local_irq_restore(flags); + raw_local_irq_restore(flags); return i; } @@ -88,10 +88,10 @@ void pxa_free_dma (int dma_ch) return; } - local_irq_save(flags); + raw_local_irq_save(flags); DCSR(dma_ch) = DCSR_STARTINTR|DCSR_ENDINTR|DCSR_BUSERR; dma_channels[dma_ch].name = NULL; - local_irq_restore(flags); + raw_local_irq_restore(flags); } static irqreturn_t dma_irq_handler(int irq, void *dev_id, struct pt_regs *regs) Index: linux.prev/arch/arm/mach-pxa/generic.c =================================================================== --- linux.prev.orig/arch/arm/mach-pxa/generic.c +++ linux.prev/arch/arm/mach-pxa/generic.c @@ -51,7 +51,7 @@ void pxa_gpio_mode(int gpio_mode) int fn = (gpio_mode & GPIO_MD_MASK_FN) >> 8; int gafr; - local_irq_save(flags); + raw_local_irq_save(flags); if (gpio_mode & GPIO_DFLT_LOW) GPCR(gpio) = GPIO_bit(gpio); else if (gpio_mode & GPIO_DFLT_HIGH) @@ -62,7 +62,7 @@ void pxa_gpio_mode(int gpio_mode) GPDR(gpio) &= ~GPIO_bit(gpio); gafr = GAFR(gpio) & ~(0x3 << (((gpio) & 0xf)*2)); GAFR(gpio) = gafr | (fn << (((gpio) & 0xf)*2)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(pxa_gpio_mode); @@ -73,14 +73,14 @@ EXPORT_SYMBOL(pxa_gpio_mode); void pxa_set_cken(int clock, int enable) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); if (enable) CKEN |= clock; else CKEN &= ~clock; - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(pxa_set_cken); Index: linux.prev/arch/arm/mach-pxa/idp.c =================================================================== --- linux.prev.orig/arch/arm/mach-pxa/idp.c +++ linux.prev/arch/arm/mach-pxa/idp.c @@ -18,6 +18,7 @@ #include #include +#include #include #include Index: linux.prev/arch/arm/mach-pxa/irq.c =================================================================== --- linux.prev.orig/arch/arm/mach-pxa/irq.c +++ linux.prev/arch/arm/mach-pxa/irq.c @@ -244,6 +244,7 @@ static struct irqchip pxa_muxed_gpio_chi .set_type = pxa_gpio_irq_type, }; +static DEFINE_IRQ_CHAINED_TYPE(pxa_gpio_demux_handler); void __init pxa_init_irq(void) { Index: linux.prev/arch/arm/mach-pxa/leds-idp.c =================================================================== --- linux.prev.orig/arch/arm/mach-pxa/leds-idp.c +++ linux.prev/arch/arm/mach-pxa/leds-idp.c @@ -34,7 +34,7 @@ void idp_leds_event(led_event_t evt) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); switch (evt) { case led_start: @@ -113,5 +113,5 @@ void idp_leds_event(led_event_t evt) else IDP_CPLD_LED_CONTROL |= IDP_LEDS_MASK; - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux.prev/arch/arm/mach-pxa/leds-lubbock.c =================================================================== --- linux.prev.orig/arch/arm/mach-pxa/leds-lubbock.c +++ linux.prev/arch/arm/mach-pxa/leds-lubbock.c @@ -48,7 +48,7 @@ void lubbock_leds_event(led_event_t evt) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); switch (evt) { case led_start: @@ -122,5 +122,5 @@ void lubbock_leds_event(led_event_t evt) else LUB_DISC_BLNK_LED |= 0xff; - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux.prev/arch/arm/mach-pxa/leds-mainstone.c =================================================================== --- linux.prev.orig/arch/arm/mach-pxa/leds-mainstone.c +++ linux.prev/arch/arm/mach-pxa/leds-mainstone.c @@ -43,7 +43,7 @@ void mainstone_leds_event(led_event_t ev { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); switch (evt) { case led_start: @@ -117,5 +117,5 @@ void mainstone_leds_event(led_event_t ev else MST_LEDCTRL |= 0xff; - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux.prev/arch/arm/mach-pxa/lubbock.c =================================================================== --- linux.prev.orig/arch/arm/mach-pxa/lubbock.c +++ linux.prev/arch/arm/mach-pxa/lubbock.c @@ -52,9 +52,9 @@ void lubbock_set_misc_wr(unsigned int ma { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); LUB_MISC_WR = (LUB_MISC_WR & ~mask) | (set & mask); - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(lubbock_set_misc_wr); @@ -95,6 +95,8 @@ static void lubbock_irq_handler(unsigned } while (pending); } +static DEFINE_IRQ_CHAINED_TYPE(lubbock_irq_handler); + static void __init lubbock_init_irq(void) { int irq; Index: linux.prev/arch/arm/mach-pxa/mainstone.c =================================================================== --- linux.prev.orig/arch/arm/mach-pxa/mainstone.c +++ linux.prev/arch/arm/mach-pxa/mainstone.c @@ -84,6 +84,8 @@ static void mainstone_irq_handler(unsign } while (pending); } +static DEFINE_IRQ_CHAINED_TYPE(mainstone_irq_handler); + static void __init mainstone_init_irq(void) { int irq; Index: linux.prev/arch/arm/mach-rpc/dma.c =================================================================== --- linux.prev.orig/arch/arm/mach-rpc/dma.c +++ linux.prev/arch/arm/mach-rpc/dma.c @@ -171,11 +171,11 @@ static void iomd_disable_dma(dmach_t cha unsigned long dma_base = dma->dma_base; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); if (dma->state != ~DMA_ST_AB) disable_irq(dma->dma_irq); iomd_writeb(0, dma_base + CR); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static int iomd_set_dma_speed(dmach_t channel, dma_t *dma, int cycle) Index: linux.prev/arch/arm/mach-rpc/irq.c =================================================================== --- linux.prev.orig/arch/arm/mach-rpc/irq.c +++ linux.prev/arch/arm/mach-rpc/irq.c @@ -112,6 +112,15 @@ static struct irqchip iomd_fiq_chip = { .unmask = iomd_unmask_irq_fiq, }; +static unsigned int startup_irq_disabled(unsigned int irq) +{ + return 0; +} + +/* Interrupt type for irqs which must not be + * automatically enabled in reqeust_irq */ +static struct irq_type level_type_nostart; + void __init rpc_init_irq(void) { unsigned int irq, flags; @@ -121,16 +130,15 @@ void __init rpc_init_irq(void) iomd_writeb(0, IOMD_FIQMASK); iomd_writeb(0, IOMD_DMAMASK); + level_type_nostart = default_level_type; + level_type_nostart.startup = startup_irq_disabled; + for (irq = 0; irq < NR_IRQS; irq++) { flags = IRQF_VALID; if (irq <= 6 || (irq >= 9 && irq <= 15)) flags |= IRQF_PROBE; - if (irq == 21 || (irq >= 16 && irq <= 19) || - irq == IRQ_KEYBOARDTX) - flags |= IRQF_NOAUTOEN; - switch (irq) { case 0 ... 7: set_irq_chip(irq, &iomd_a_chip); @@ -155,6 +163,10 @@ void __init rpc_init_irq(void) set_irq_flags(irq, IRQF_VALID); break; } + + if (irq == 21 || (irq >= 16 && irq <= 19) || + irq == IRQ_KEYBOARDTX) + set_irq_handler(irq, &level_type_nostart); } init_FIQ(); Index: linux.prev/arch/arm/mach-s3c2410/bast-irq.c =================================================================== --- linux.prev.orig/arch/arm/mach-s3c2410/bast-irq.c +++ linux.prev/arch/arm/mach-s3c2410/bast-irq.c @@ -136,13 +136,15 @@ bast_irq_pc104_demux(unsigned int irq, for (i = 0; stat != 0; i++, stat >>= 1) { if (stat & 1) { irqno = bast_pc104_irqs[i]; - - desc_handle_irq(irqno, irq_desc + irqno, regs); + desc = irq_desc + irqno; + desc_handle_irq(irqno, desc, regs); } } } } +DEFINE_IRQ_CHAINED_TYPE(bast_irq_pc104_demux); + static __init int bast_irq_init(void) { unsigned int i; @@ -156,7 +158,7 @@ static __init int bast_irq_init(void) set_irq_chained_handler(IRQ_ISA, bast_irq_pc104_demux); - /* reigster our IRQs */ + /* register our IRQs */ for (i = 0; i < 4; i++) { unsigned int irqno = bast_pc104_irqs[i]; Index: linux.prev/arch/arm/mach-s3c2410/clock.c =================================================================== --- linux.prev.orig/arch/arm/mach-s3c2410/clock.c +++ linux.prev/arch/arm/mach-s3c2410/clock.c @@ -61,7 +61,7 @@ void inline s3c24xx_clk_enable(unsigned unsigned long clkcon; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); clkcon = __raw_readl(S3C2410_CLKCON); clkcon &= ~clocks; @@ -74,7 +74,7 @@ void inline s3c24xx_clk_enable(unsigned __raw_writel(clkcon, S3C2410_CLKCON); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* enable and disable calls for use with the clk struct */ Index: linux.prev/arch/arm/mach-s3c2410/dma.c =================================================================== --- linux.prev.orig/arch/arm/mach-s3c2410/dma.c +++ linux.prev/arch/arm/mach-s3c2410/dma.c @@ -329,11 +329,11 @@ static int s3c2410_dma_start(s3c2410_dma pr_debug("s3c2410_start_dma: channel=%d\n", chan->number); - local_irq_save(flags); + raw_local_irq_save(flags); if (chan->state == S3C2410_DMA_RUNNING) { pr_debug("s3c2410_start_dma: already running (%d)\n", chan->state); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -348,7 +348,7 @@ static int s3c2410_dma_start(s3c2410_dma printk(KERN_ERR "dma%d: channel has nothing loaded\n", chan->number); chan->state = S3C2410_DMA_IDLE; - local_irq_restore(flags); + raw_local_irq_restore(flags); return -EINVAL; } @@ -385,7 +385,7 @@ static int s3c2410_dma_start(s3c2410_dma dbg_showchan(chan); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -451,7 +451,7 @@ int s3c2410_dma_enqueue(unsigned int cha buf->id = id; buf->magic = BUF_MAGIC; - local_irq_save(flags); + raw_local_irq_save(flags); if (chan->curr == NULL) { /* we've got nothing loaded... */ @@ -485,7 +485,7 @@ int s3c2410_dma_enqueue(unsigned int cha "timeout loading buffer\n", chan->number); dbg_showchan(chan); - local_irq_restore(flags); + raw_local_irq_restore(flags); return -EINVAL; } } @@ -499,7 +499,7 @@ int s3c2410_dma_enqueue(unsigned int cha } } - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -661,9 +661,9 @@ s3c2410_dma_irq(int irq, void *devpw, st return IRQ_HANDLED; } - local_irq_save(flags); + raw_local_irq_save(flags); s3c2410_dma_loadbuffer(chan, chan->next); - local_irq_restore(flags); + raw_local_irq_restore(flags); } else { s3c2410_dma_lastxfer(chan); @@ -698,14 +698,14 @@ int s3c2410_dma_request(unsigned int cha check_channel(channel); - local_irq_save(flags); + raw_local_irq_save(flags); dbg_showchan(chan); if (chan->in_use) { if (client != chan->client) { printk(KERN_ERR "dma%d: already in use\n", channel); - local_irq_restore(flags); + raw_local_irq_restore(flags); return -EBUSY; } else { printk(KERN_ERR "dma%d: client already has channel\n", channel); @@ -724,7 +724,7 @@ int s3c2410_dma_request(unsigned int cha if (err) { chan->in_use = 0; - local_irq_restore(flags); + raw_local_irq_restore(flags); printk(KERN_ERR "%s: cannot get IRQ %d for DMA %d\n", client->name, chan->irq, chan->number); @@ -735,7 +735,7 @@ int s3c2410_dma_request(unsigned int cha chan->irq_enabled = 1; } - local_irq_restore(flags); + raw_local_irq_restore(flags); /* need to setup */ @@ -764,7 +764,7 @@ int s3c2410_dma_free(dmach_t channel, s3 check_channel(channel); - local_irq_save(flags); + raw_local_irq_save(flags); if (chan->client != client) { @@ -789,7 +789,7 @@ int s3c2410_dma_free(dmach_t channel, s3 free_irq(chan->irq, (void *)chan); chan->irq_claimed = 0; - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -805,7 +805,7 @@ static int s3c2410_dma_dostop(s3c2410_dm dbg_showchan(chan); - local_irq_save(flags); + raw_local_irq_save(flags); s3c2410_dma_call_op(chan, S3C2410_DMAOP_STOP); @@ -823,7 +823,7 @@ static int s3c2410_dma_dostop(s3c2410_dm chan->state = S3C2410_DMA_IDLE; chan->load_state = S3C2410_DMALOAD_NONE; - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -840,7 +840,7 @@ static int s3c2410_dma_flush(s3c2410_dma pr_debug("%s:\n", __FUNCTION__); - local_irq_save(flags); + raw_local_irq_save(flags); if (chan->state != S3C2410_DMA_IDLE) { pr_debug("%s: stopping channel...\n", __FUNCTION__ ); @@ -865,7 +865,7 @@ static int s3c2410_dma_flush(s3c2410_dma } } - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } Index: linux.prev/arch/arm/mach-s3c2410/gpio.c =================================================================== --- linux.prev.orig/arch/arm/mach-s3c2410/gpio.c +++ linux.prev/arch/arm/mach-s3c2410/gpio.c @@ -80,7 +80,7 @@ void s3c2410_gpio_cfgpin(unsigned int pi /* modify the specified register wwith IRQs off */ - local_irq_save(flags); + raw_local_irq_save(flags); con = __raw_readl(base + 0x00); con &= ~mask; @@ -88,7 +88,7 @@ void s3c2410_gpio_cfgpin(unsigned int pi __raw_writel(con, base + 0x00); - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(s3c2410_gpio_cfgpin); @@ -119,14 +119,14 @@ void s3c2410_gpio_pullup(unsigned int pi if (pin < S3C2410_GPIO_BANKB) return; - local_irq_save(flags); + raw_local_irq_save(flags); up = __raw_readl(base + 0x08); up &= ~(1L << offs); up |= to << offs; __raw_writel(up, base + 0x08); - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(s3c2410_gpio_pullup); @@ -138,14 +138,14 @@ void s3c2410_gpio_setpin(unsigned int pi unsigned long flags; unsigned long dat; - local_irq_save(flags); + raw_local_irq_save(flags); dat = __raw_readl(base + 0x04); dat &= ~(1 << offs); dat |= to << offs; __raw_writel(dat, base + 0x04); - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(s3c2410_gpio_setpin); @@ -165,12 +165,12 @@ unsigned int s3c2410_modify_misccr(unsig unsigned long flags; unsigned long misccr; - local_irq_save(flags); + raw_local_irq_save(flags); misccr = __raw_readl(S3C2410_MISCCR); misccr &= ~clear; misccr ^= change; __raw_writel(misccr, S3C2410_MISCCR); - local_irq_restore(flags); + raw_local_irq_restore(flags); return misccr; } @@ -211,7 +211,7 @@ int s3c2410_gpio_irqfilter(unsigned int pin -= S3C2410_GPG8_EINT16; reg += pin & ~3; - local_irq_save(flags); + raw_local_irq_save(flags); /* update filter width and clock source */ @@ -227,7 +227,7 @@ int s3c2410_gpio_irqfilter(unsigned int val |= on << ((pin * 4) + 3); __raw_writel(val, S3C2410_EXTINT2); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } Index: linux.prev/arch/arm/mach-s3c2410/irq.c =================================================================== --- linux.prev.orig/arch/arm/mach-s3c2410/irq.c +++ linux.prev/arch/arm/mach-s3c2410/irq.c @@ -573,6 +573,11 @@ s3c_irq_demux_uart2(unsigned int irq, } +static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_uart0); +static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_uart1); +static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_uart2); +static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_adc); + /* s3c24xx_init_irq * * Initialise S3C2410 IRQ system Index: linux.prev/arch/arm/mach-s3c2410/s3c2440-dsc.c =================================================================== --- linux.prev.orig/arch/arm/mach-s3c2410/s3c2440-dsc.c +++ linux.prev/arch/arm/mach-s3c2410/s3c2440-dsc.c @@ -45,14 +45,14 @@ int s3c2440_set_dsc(unsigned int pin, un base = (pin & S3C2440_SELECT_DSC1) ? S3C2440_DSC1 : S3C2440_DSC0; mask = 3 << S3C2440_DSC_GETSHIFT(pin); - local_irq_save(flags); + raw_local_irq_save(flags); val = __raw_readl(base); val &= ~mask; val |= value & mask; __raw_writel(val, base); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } Index: linux.prev/arch/arm/mach-s3c2410/s3c2440-irq.c =================================================================== --- linux.prev.orig/arch/arm/mach-s3c2410/s3c2440-irq.c +++ linux.prev/arch/arm/mach-s3c2410/s3c2440-irq.c @@ -157,6 +157,9 @@ static struct irqchip s3c_irq_cam = { .ack = s3c_irq_cam_ack, }; +static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_wdtac97); +static DEFINE_IRQ_CHAINED_TYPE(s3c_irq_demux_cam); + static int s3c2440_irq_add(struct sys_device *sysdev) { unsigned int irqno; Index: linux.prev/arch/arm/mach-s3c2410/time.c =================================================================== --- linux.prev.orig/arch/arm/mach-s3c2410/time.c +++ linux.prev/arch/arm/mach-s3c2410/time.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include Index: linux.prev/arch/arm/mach-sa1100/assabet.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/assabet.c +++ linux.prev/arch/arm/mach-sa1100/assabet.c @@ -61,10 +61,10 @@ void ASSABET_BCR_frob(unsigned int mask, { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); BCR_value = (BCR_value & ~mask) | val; ASSABET_BCR = BCR_value; - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(ASSABET_BCR_frob); Index: linux.prev/arch/arm/mach-sa1100/badge4.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/badge4.c +++ linux.prev/arch/arm/mach-sa1100/badge4.c @@ -227,7 +227,7 @@ void badge4_set_5V(unsigned subsystem, i unsigned long flags; unsigned old_5V_bitmap; - local_irq_save(flags); + raw_local_irq_save(flags); old_5V_bitmap = badge4_5V_bitmap; @@ -240,15 +240,22 @@ void badge4_set_5V(unsigned subsystem, i /* detect on->off and off->on transitions */ if ((!old_5V_bitmap) && (badge4_5V_bitmap)) { /* was off, now on */ - printk(KERN_INFO "%s: enabling 5V supply rail\n", __FUNCTION__); GPSR = BADGE4_GPIO_PCMEN5V; } else if ((old_5V_bitmap) && (!badge4_5V_bitmap)) { /* was on, now off */ - printk(KERN_INFO "%s: disabling 5V supply rail\n", __FUNCTION__); GPCR = BADGE4_GPIO_PCMEN5V; } - local_irq_restore(flags); + raw_local_irq_restore(flags); + + /* detect on->off and off->on transitions */ + if ((!old_5V_bitmap) && (badge4_5V_bitmap)) { + /* was off, now on */ + printk(KERN_INFO "%s: enabling 5V supply rail\n", __FUNCTION__); + } else if ((old_5V_bitmap) && (!badge4_5V_bitmap)) { + /* was on, now off */ + printk(KERN_INFO "%s: disabling 5V supply rail\n", __FUNCTION__); + } } EXPORT_SYMBOL(badge4_set_5V); Index: linux.prev/arch/arm/mach-sa1100/cerf.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/cerf.c +++ linux.prev/arch/arm/mach-sa1100/cerf.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include Index: linux.prev/arch/arm/mach-sa1100/cpu-sa1110.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/cpu-sa1110.c +++ linux.prev/arch/arm/mach-sa1100/cpu-sa1110.c @@ -282,7 +282,7 @@ static int sa1110_target(struct cpufreq_ * This means that we won't access SDRAM for the duration of * the programming. */ - local_irq_save(flags); + raw_local_irq_save(flags); asm("mcr p15, 0, %0, c7, c10, 4" : : "r" (0)); udelay(10); __asm__ __volatile__(" \n\ @@ -303,7 +303,7 @@ static int sa1110_target(struct cpufreq_ : "r" (&MDCNFG), "r" (&PPCR), "0" (sd.mdcnfg), "r" (sd.mdrefr), "r" (sd.mdcas[0]), "r" (sd.mdcas[1]), "r" (sd.mdcas[2]), "r" (ppcr)); - local_irq_restore(flags); + raw_local_irq_restore(flags); /* * Now, return the SDRAM refresh back to normal. Index: linux.prev/arch/arm/mach-sa1100/dma.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/dma.c +++ linux.prev/arch/arm/mach-sa1100/dma.c @@ -227,7 +227,7 @@ int sa1100_start_dma(dma_regs_t *regs, d if (size > MAX_DMA_SIZE) return -EOVERFLOW; - local_irq_save(flags); + raw_local_irq_save(flags); status = regs->RdDCSR; /* If both DMA buffers are started, there's nothing else we can do. */ @@ -262,7 +262,7 @@ int sa1100_start_dma(dma_regs_t *regs, d ret = 0; out: - local_irq_restore(flags); + raw_local_irq_restore(flags); return ret; } Index: linux.prev/arch/arm/mach-sa1100/generic.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/generic.c +++ linux.prev/arch/arm/mach-sa1100/generic.c @@ -138,7 +138,7 @@ unsigned long long sched_clock(void) static void sa1100_power_off(void) { mdelay(100); - local_irq_disable(); + raw_local_irq_disable(); /* disable internal oscillator, float CS lines */ PCFR = (PCFR_OPDE | PCFR_FP | PCFR_FS); /* enable wake-up on GPIO0 (Assabet...) */ @@ -411,7 +411,7 @@ void __init sa1110_mb_disable(void) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); PGSR &= ~GPIO_MBGNT; GPCR = GPIO_MBGNT; @@ -419,7 +419,7 @@ void __init sa1110_mb_disable(void) GAFR &= ~(GPIO_MBGNT | GPIO_MBREQ); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -430,7 +430,7 @@ void __init sa1110_mb_enable(void) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); PGSR &= ~GPIO_MBGNT; GPCR = GPIO_MBGNT; @@ -439,6 +439,6 @@ void __init sa1110_mb_enable(void) GAFR |= (GPIO_MBGNT | GPIO_MBREQ); TUCR |= TUCR_MR; - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux.prev/arch/arm/mach-sa1100/h3600.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/h3600.c +++ linux.prev/arch/arm/mach-sa1100/h3600.c @@ -331,7 +331,7 @@ static void h3100_control_egpio(enum ipa } if (egpio || gpio) { - local_irq_save(flags); + raw_local_irq_save(flags); if (setp) { h3100_egpio |= egpio; GPSR = gpio; @@ -340,7 +340,7 @@ static void h3100_control_egpio(enum ipa GPCR = gpio; } H3100_EGPIO = h3100_egpio; - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -463,13 +463,13 @@ static void h3600_control_egpio(enum ipa } if (egpio) { - local_irq_save(flags); + raw_local_irq_save(flags); if (setp) h3600_egpio |= egpio; else h3600_egpio &= ~egpio; H3600_EGPIO = h3600_egpio; - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -800,6 +800,8 @@ static void h3800_unmask_gpio_irq(unsign H3800_ASIC2_GPIINTSTAT |= mask; } +static DEFINE_IRQ_CHAINED_TYPE(h3800_IRQ_demux); + static void __init h3800_init_irq(void) { int i; @@ -838,7 +840,7 @@ static void __init h3800_init_irq(void) } #endif set_irq_type(IRQ_GPIO_H3800_ASIC, IRQT_RISING); - set_irq_chained_handler(IRQ_GPIO_H3800_ASIC, &h3800_IRQ_demux); + set_irq_chained_handler(IRQ_GPIO_H3800_ASIC, h3800_IRQ_demux); } Index: linux.prev/arch/arm/mach-sa1100/irq.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/irq.c +++ linux.prev/arch/arm/mach-sa1100/irq.c @@ -11,12 +11,13 @@ */ #include #include +#include +#include #include #include #include #include -#include #include #include "generic.h" @@ -281,6 +282,8 @@ static int __init sa1100irq_init_devicef return sysdev_register(&sa1100irq_device); } +static DEFINE_IRQ_CHAINED_TYPE(sa1100_high_gpio_handler); + device_initcall(sa1100irq_init_devicefs); void __init sa1100_init_irq(void) Index: linux.prev/arch/arm/mach-sa1100/leds-assabet.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/leds-assabet.c +++ linux.prev/arch/arm/mach-sa1100/leds-assabet.c @@ -32,7 +32,7 @@ void assabet_leds_event(led_event_t evt) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); switch (evt) { case led_start: @@ -111,5 +111,5 @@ void assabet_leds_event(led_event_t evt) if (led_state & LED_STATE_ENABLED) ASSABET_BCR_frob(ASSABET_BCR_LED_MASK, hw_led_state); - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux.prev/arch/arm/mach-sa1100/leds-badge4.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/leds-badge4.c +++ linux.prev/arch/arm/mach-sa1100/leds-badge4.c @@ -36,7 +36,7 @@ void badge4_leds_event(led_event_t evt) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); switch (evt) { case led_start: @@ -108,5 +108,5 @@ void badge4_leds_event(led_event_t evt) GPCR = hw_led_state ^ LED_MASK; } - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux.prev/arch/arm/mach-sa1100/leds-cerf.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/leds-cerf.c +++ linux.prev/arch/arm/mach-sa1100/leds-cerf.c @@ -29,7 +29,7 @@ void cerf_leds_event(led_event_t evt) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); switch (evt) { case led_start: @@ -107,5 +107,5 @@ void cerf_leds_event(led_event_t evt) GPCR = hw_led_state ^ LED_MASK; } - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux.prev/arch/arm/mach-sa1100/leds-hackkit.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/leds-hackkit.c +++ linux.prev/arch/arm/mach-sa1100/leds-hackkit.c @@ -33,7 +33,7 @@ void hackkit_leds_event(led_event_t evt) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); switch(evt) { case led_start: @@ -109,5 +109,5 @@ void hackkit_leds_event(led_event_t evt) GPCR = hw_led_state ^ LED_MASK; } - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux.prev/arch/arm/mach-sa1100/leds-lart.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/leds-lart.c +++ linux.prev/arch/arm/mach-sa1100/leds-lart.c @@ -32,7 +32,7 @@ void lart_leds_event(led_event_t evt) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); switch(evt) { case led_start: @@ -98,5 +98,5 @@ void lart_leds_event(led_event_t evt) GPCR = hw_led_state ^ LED_MASK; } - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux.prev/arch/arm/mach-sa1100/neponset.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/neponset.c +++ linux.prev/arch/arm/mach-sa1100/neponset.c @@ -137,6 +137,8 @@ static struct sa1100_port_fns neponset_p .get_mctrl = neponset_get_mctrl, }; +static DEFINE_IRQ_CHAINED_TYPE(neponset_irq_handler); + static int neponset_probe(struct platform_device *dev) { sa1100_register_uart_fns(&neponset_port_fns); Index: linux.prev/arch/arm/mach-sa1100/pleb.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/pleb.c +++ linux.prev/arch/arm/mach-sa1100/pleb.c @@ -7,6 +7,7 @@ #include #include #include +#include #include Index: linux.prev/arch/arm/mach-sa1100/simpad.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/simpad.c +++ linux.prev/arch/arm/mach-sa1100/simpad.c @@ -174,7 +174,7 @@ static void __init simpad_map_io(void) static void simpad_power_off(void) { - local_irq_disable(); // was cli + raw_local_irq_disable(); // was cli set_cs3(0x800); /* only SD_MEDIAQ */ /* disable internal oscillator, float CS lines */ @@ -191,7 +191,7 @@ static void simpad_power_off(void) PMCR = PMCR_SF; while(1); - local_irq_enable(); /* we won't ever call it */ + raw_local_irq_enable(); /* we won't ever call it */ } Index: linux.prev/arch/arm/mach-sa1100/time.c =================================================================== --- linux.prev.orig/arch/arm/mach-sa1100/time.c +++ linux.prev/arch/arm/mach-sa1100/time.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include Index: linux.prev/arch/arm/mach-shark/core.c =================================================================== --- linux.prev.orig/arch/arm/mach-shark/core.c +++ linux.prev/arch/arm/mach-shark/core.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include Index: linux.prev/arch/arm/mach-shark/leds.c =================================================================== --- linux.prev.orig/arch/arm/mach-shark/leds.c +++ linux.prev/arch/arm/mach-shark/leds.c @@ -33,7 +33,7 @@ static char led_state; static short hw_led_state; static short saved_state; -static DEFINE_SPINLOCK(leds_lock); +static DEFINE_RAW_SPINLOCK(leds_lock); short sequoia_read(int addr) { outw(addr,0x24); Index: linux.prev/arch/arm/mach-versatile/core.c =================================================================== --- linux.prev.orig/arch/arm/mach-versatile/core.c +++ linux.prev/arch/arm/mach-versatile/core.c @@ -113,6 +113,8 @@ sic_handle_irq(unsigned int irq, struct } while (status); } +static DEFINE_IRQ_CHAINED_TYPE(sic_handle_irq); + #if 1 #define IRQ_MMCI0A IRQ_VICSOURCE22 #define IRQ_AACI IRQ_VICSOURCE24 @@ -162,7 +164,7 @@ void __init versatile_init_irq(void) } } - set_irq_handler(IRQ_VICSOURCE31, sic_handle_irq); + set_irq_chained_handler(IRQ_VICSOURCE31, sic_handle_irq); vic_unmask_irq(IRQ_VICSOURCE31); /* Do second interrupt controller */ @@ -785,7 +787,7 @@ static void versatile_leds_event(led_eve unsigned long flags; u32 val; - local_irq_save(flags); + raw_local_irq_save(flags); val = readl(VA_LEDS_BASE); switch (ledevt) { @@ -810,7 +812,7 @@ static void versatile_leds_event(led_eve } writel(val, VA_LEDS_BASE); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #endif /* CONFIG_LEDS */ Index: linux.prev/arch/arm/mm/consistent.c =================================================================== --- linux.prev.orig/arch/arm/mm/consistent.c +++ linux.prev/arch/arm/mm/consistent.c @@ -30,7 +30,7 @@ * This is the page table (2MB) covering uncached, DMA consistent allocations */ static pte_t *consistent_pte; -static DEFINE_SPINLOCK(consistent_lock); +static DEFINE_RAW_SPINLOCK(consistent_lock); /* * VM region handling support. Index: linux.prev/arch/arm/mm/copypage-v4mc.c =================================================================== --- linux.prev.orig/arch/arm/mm/copypage-v4mc.c +++ linux.prev/arch/arm/mm/copypage-v4mc.c @@ -29,7 +29,7 @@ #define TOP_PTE(x) pte_offset_kernel(top_pmd, x) -static DEFINE_SPINLOCK(minicache_lock); +static DEFINE_RAW_SPINLOCK(minicache_lock); /* * ARMv4 mini-dcache optimised copy_user_page @@ -43,7 +43,7 @@ static DEFINE_SPINLOCK(minicache_lock); * instruction. If your processor does not supply this, you have to write your * own copy_user_page that does the right thing. */ -static void __attribute__((naked)) +static void notrace __attribute__((naked)) mc_copy_user_page(void *from, void *to) { asm volatile( @@ -82,7 +82,7 @@ void v4_mc_copy_user_page(void *kto, con /* * ARMv4 optimised clear_user_page */ -void __attribute__((naked)) +void notrace __attribute__((naked)) v4_mc_clear_user_page(void *kaddr, unsigned long vaddr) { asm volatile( Index: linux.prev/arch/arm/mm/copypage-v6.c =================================================================== --- linux.prev.orig/arch/arm/mm/copypage-v6.c +++ linux.prev/arch/arm/mm/copypage-v6.c @@ -26,7 +26,7 @@ #define TOP_PTE(x) pte_offset_kernel(top_pmd, x) -static DEFINE_SPINLOCK(v6_lock); +static DEFINE_RAW_SPINLOCK(v6_lock); /* * Copy the user page. No aliasing to deal with so we can just Index: linux.prev/arch/arm/mm/copypage-xscale.c =================================================================== --- linux.prev.orig/arch/arm/mm/copypage-xscale.c +++ linux.prev/arch/arm/mm/copypage-xscale.c @@ -31,7 +31,7 @@ #define TOP_PTE(x) pte_offset_kernel(top_pmd, x) -static DEFINE_SPINLOCK(minicache_lock); +static DEFINE_RAW_SPINLOCK(minicache_lock); /* * XScale mini-dcache optimised copy_user_page @@ -41,7 +41,7 @@ static DEFINE_SPINLOCK(minicache_lock); * Dcache aliasing issue. The writes will be forwarded to the write buffer, * and merged as appropriate. */ -static void __attribute__((naked)) +static void notrace __attribute__((naked)) mc_copy_user_page(void *from, void *to) { /* @@ -104,7 +104,7 @@ void xscale_mc_copy_user_page(void *kto, /* * XScale optimised clear_user_page */ -void __attribute__((naked)) +void notrace __attribute__((naked)) xscale_mc_clear_user_page(void *kaddr, unsigned long vaddr) { asm volatile( Index: linux.prev/arch/arm/mm/fault-armv.c =================================================================== --- linux.prev.orig/arch/arm/mm/fault-armv.c +++ linux.prev/arch/arm/mm/fault-armv.c @@ -166,7 +166,7 @@ static int __init check_writebuffer(unsi { register unsigned long zero = 0, one = 1, val; - local_irq_disable(); + raw_local_irq_disable(); mb(); *p1 = one; mb(); @@ -174,7 +174,7 @@ static int __init check_writebuffer(unsi mb(); val = *p1; mb(); - local_irq_enable(); + raw_local_irq_enable(); return val != zero; } Index: linux.prev/arch/arm/mm/fault.c =================================================================== --- linux.prev.orig/arch/arm/mm/fault.c +++ linux.prev/arch/arm/mm/fault.c @@ -216,7 +216,7 @@ out: return fault; } -static int +static notrace int do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { struct task_struct *tsk; @@ -316,7 +316,7 @@ no_context: * interrupt or a critical region, and should only copy the information * from the master page table, nothing more. */ -static int +static notrace int do_translation_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { @@ -362,7 +362,7 @@ bad_area: * Some section permission faults need to be handled gracefully. * They can happen due to a __{get,put}_user during an oops. */ -static int +static notrace int do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { struct task_struct *tsk = current; @@ -373,7 +373,7 @@ do_sect_fault(unsigned long addr, unsign /* * This abort handler always returns "fault". */ -static int +static notrace int do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { return 1; @@ -428,7 +428,7 @@ static struct fsr_info { { do_bad, SIGBUS, 0, "unknown 31" } }; -void __init +void __init notrace hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *), int sig, const char *name) { @@ -442,7 +442,7 @@ hook_fault_code(int nr, int (*fn)(unsign /* * Dispatch a data abort to the relevant handler. */ -asmlinkage void +asmlinkage notrace void do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { const struct fsr_info *inf = fsr_info + (fsr & 15) + ((fsr & (1 << 10)) >> 6); @@ -461,7 +461,7 @@ do_DataAbort(unsigned long addr, unsigne notify_die("", regs, &info, fsr, 0); } -asmlinkage void +asmlinkage notrace void do_PrefetchAbort(unsigned long addr, struct pt_regs *regs) { do_translation_fault(addr, 0, regs); Index: linux.prev/arch/arm/mm/init.c =================================================================== --- linux.prev.orig/arch/arm/mm/init.c +++ linux.prev/arch/arm/mm/init.c @@ -28,7 +28,7 @@ #define TABLE_SIZE (2 * PTRS_PER_PTE * sizeof(pte_t)) -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern void _stext, _text, _etext, __data_start, _end, __init_begin, __init_end; Index: linux.prev/arch/arm/plat-omap/clock.c =================================================================== --- linux.prev.orig/arch/arm/plat-omap/clock.c +++ linux.prev/arch/arm/plat-omap/clock.c @@ -28,7 +28,7 @@ LIST_HEAD(clocks); static DECLARE_MUTEX(clocks_sem); -DEFINE_SPINLOCK(clockfw_lock); +DEFINE_RAW_SPINLOCK(clockfw_lock); static struct clk_functions *arch_clock; Index: linux.prev/arch/arm/plat-omap/dma.c =================================================================== --- linux.prev.orig/arch/arm/plat-omap/dma.c +++ linux.prev/arch/arm/plat-omap/dma.c @@ -557,7 +557,7 @@ void omap_clear_dma(int lch) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); if (cpu_class_is_omap1()) { int status; @@ -574,7 +574,7 @@ void omap_clear_dma(int lch) omap_writel(0, lch_base + i); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } void omap_start_dma(int lch) @@ -903,7 +903,7 @@ static struct irqaction omap24xx_dma_irq /*----------------------------------------------------------------------------*/ static struct lcd_dma_info { - spinlock_t lock; + raw_spinlock_t lock; int reserved; void (* callback)(u16 status, void *data); void *cb_data; Index: linux.prev/arch/arm/plat-omap/gpio.c =================================================================== --- linux.prev.orig/arch/arm/plat-omap/gpio.c +++ linux.prev/arch/arm/plat-omap/gpio.c @@ -121,7 +121,7 @@ struct gpio_bank { u32 reserved_map; u32 suspend_wakeup; u32 saved_wakeup; - spinlock_t lock; + raw_spinlock_t lock; }; #define METHOD_MPUIO 0 @@ -736,7 +736,7 @@ static void gpio_irq_handler(unsigned in desc->chip->ack(irq); - bank = (struct gpio_bank *) desc->data; + bank = (struct gpio_bank *) desc->handler_data; if (bank->method == METHOD_MPUIO) isr_reg = bank->base + OMAP_MPUIO_GPIO_INT; #ifdef CONFIG_ARCH_OMAP15XX @@ -837,6 +837,8 @@ static struct irqchip mpuio_irq_chip = { .unmask = mpuio_unmask_irq }; +static DEFINE_IRQ_CHAINED_TYPE(gpio_irq_handler); + static int initialized; static struct clk * gpio_ick; static struct clk * gpio_fck; Index: linux.prev/arch/arm/plat-omap/mux.c =================================================================== --- linux.prev.orig/arch/arm/plat-omap/mux.c +++ linux.prev/arch/arm/plat-omap/mux.c @@ -57,7 +57,7 @@ int __init omap_mux_register(struct pin_ */ int __init_or_module omap_cfg_reg(const unsigned long index) { - static DEFINE_SPINLOCK(mux_spin_lock); + static DEFINE_RAW_SPINLOCK(mux_spin_lock); unsigned long flags; struct pin_config *cfg; Index: linux.prev/arch/arm/plat-omap/pm.c =================================================================== --- linux.prev.orig/arch/arm/plat-omap/pm.c +++ linux.prev/arch/arm/plat-omap/pm.c @@ -82,11 +82,11 @@ void omap_pm_idle(void) * seconds for wait for interrupt. */ - local_irq_disable(); + raw_local_irq_disable(); local_fiq_disable(); if (need_resched()) { local_fiq_enable(); - local_irq_enable(); + raw_local_irq_enable(); return; } mask32 = omap_readl(ARM_SYSST); @@ -111,7 +111,7 @@ void omap_pm_idle(void) omap_sram_idle(); local_fiq_enable(); - local_irq_enable(); + raw_local_irq_enable(); } /* @@ -182,7 +182,7 @@ void omap_pm_suspend(void) * Step 1: turn off interrupts (FIXME: NOTE: already disabled) */ - local_irq_disable(); + raw_local_irq_disable(); local_fiq_disable(); /* @@ -335,7 +335,7 @@ void omap_pm_suspend(void) * Reenable interrupts */ - local_irq_enable(); + raw_local_irq_enable(); local_fiq_enable(); omap_serial_wake_trigger(0); Index: linux.prev/arch/arm26/boot/compressed/misc.c =================================================================== --- linux.prev.orig/arch/arm26/boot/compressed/misc.c +++ linux.prev/arch/arm26/boot/compressed/misc.c @@ -184,6 +184,7 @@ static ulg free_mem_ptr_end; #define HEAP_SIZE 0x2000 +#define ZLIB_INFLATE_NO_INFLATE_LOCK #include "../../../../lib/inflate.c" #ifndef STANDALONE_DEBUG Index: linux.prev/arch/i386/Kconfig =================================================================== --- linux.prev.orig/arch/i386/Kconfig +++ linux.prev/arch/i386/Kconfig @@ -14,6 +14,10 @@ config X86_32 486, 586, Pentiums, and various instruction-set-compatible chips by AMD, Cyrix, and others. +config GENERIC_TIME + bool + default y + config SEMAPHORE_SLEEPERS bool default y @@ -173,6 +177,8 @@ config HPET_EMULATE_RTC depends on HPET_TIMER && RTC=y default y +source "kernel/time/Kconfig" + config SMP bool "Symmetric multi-processing support" ---help--- @@ -228,6 +234,19 @@ config SCHED_SMT source "kernel/Kconfig.preempt" +config RWSEM_GENERIC_SPINLOCK + bool + depends on M386 || PREEMPT_RT + default y + +config ASM_SEMAPHORES + bool + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + default y if !RWSEM_GENERIC_SPINLOCK + config X86_UP_APIC bool "Local APIC support on uniprocessors" depends on !SMP && !(X86_VISWS || X86_VOYAGER) @@ -619,7 +638,7 @@ config BOOT_IOREMAP config REGPARM bool "Use register arguments (EXPERIMENTAL)" - depends on EXPERIMENTAL + depends on EXPERIMENTAL && !MCOUNT default n help Compile the kernel with -mregparm=3. This uses a different ABI @@ -1055,3 +1074,7 @@ config X86_TRAMPOLINE bool depends on X86_SMP || (X86_VOYAGER && SMP) default y + +config KTIME_SCALAR + bool + default y Index: linux.prev/arch/i386/Kconfig.cpu =================================================================== --- linux.prev.orig/arch/i386/Kconfig.cpu +++ linux.prev/arch/i386/Kconfig.cpu @@ -229,11 +229,6 @@ config RWSEM_GENERIC_SPINLOCK depends on M386 default y -config RWSEM_XCHGADD_ALGORITHM - bool - depends on !M386 - default y - config GENERIC_CALIBRATE_DELAY bool default y Index: linux.prev/arch/i386/Kconfig.debug =================================================================== --- linux.prev.orig/arch/i386/Kconfig.debug +++ linux.prev/arch/i386/Kconfig.debug @@ -18,6 +18,7 @@ config EARLY_PRINTK config DEBUG_STACKOVERFLOW bool "Check for stack overflows" depends on DEBUG_KERNEL + default y help This option will cause messages to be printed if free stack space drops below a certain limit. @@ -25,6 +26,7 @@ config DEBUG_STACKOVERFLOW config DEBUG_STACK_USAGE bool "Stack utilization instrumentation" depends on DEBUG_KERNEL + default y help Enables the display of the minimum amount of free stack which each task has ever had available in the sysrq-T and sysrq-P debug output. Index: linux.prev/arch/i386/boot/compressed/misc.c =================================================================== --- linux.prev.orig/arch/i386/boot/compressed/misc.c +++ linux.prev/arch/i386/boot/compressed/misc.c @@ -15,6 +15,12 @@ #include #include +#ifdef CONFIG_MCOUNT +void notrace mcount(void) +{ +} +#endif + /* * gzip declarations */ @@ -112,7 +118,7 @@ static long free_mem_end_ptr; #define INPLACE_MOVE_ROUTINE 0x1000 #define LOW_BUFFER_START 0x2000 #define LOW_BUFFER_MAX 0x90000 -#define HEAP_SIZE 0x3000 +#define HEAP_SIZE 0x4000 static unsigned int low_buffer_end, low_buffer_size; static int high_loaded =0; static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/; @@ -125,6 +131,7 @@ static int lines, cols; static void * xquad_portio = NULL; #endif +#define ZLIB_INFLATE_NO_INFLATE_LOCK #include "../../../../lib/inflate.c" static void *malloc(int size) Index: linux.prev/arch/i386/kernel/Makefile =================================================================== --- linux.prev.orig/arch/i386/kernel/Makefile +++ linux.prev/arch/i386/kernel/Makefile @@ -4,13 +4,13 @@ extra-y := head.o init_task.o vmlinux.lds -obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ +obj-y := process.o signal.o entry.o traps.o irq.o vm86.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ - doublefault.o quirks.o i8237.o + doublefault.o quirks.o i8237.o i8253.o tsc.o +obj-$(CONFIG_ASM_SEMAPHORES) += semaphore.o obj-y += cpu/ -obj-y += timers/ obj-$(CONFIG_ACPI) += acpi/ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca.o @@ -20,6 +20,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp.o smpboot.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o +obj-$(CONFIG_MCOUNT) += mcount-wrapper.o obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o @@ -34,6 +35,8 @@ obj-$(CONFIG_ACPI_SRAT) += srat.o obj-$(CONFIG_HPET_TIMER) += time_hpet.o obj-$(CONFIG_EFI) += efi.o efi_stub.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +obj-$(CONFIG_SYSFS) += switch2poll.o +obj-$(CONFIG_HPET_TIMER) += hpet.o EXTRA_AFLAGS := -traditional Index: linux.prev/arch/i386/kernel/acpi/boot.c =================================================================== --- linux.prev.orig/arch/i386/kernel/acpi/boot.c +++ linux.prev/arch/i386/kernel/acpi/boot.c @@ -567,7 +567,7 @@ static int __init acpi_parse_sbf(unsigne } #ifdef CONFIG_HPET_TIMER - +#include static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) { struct acpi_table_hpet *hpet_tbl; @@ -589,6 +589,7 @@ static int __init acpi_parse_hpet(unsign #ifdef CONFIG_X86_64 vxtime.hpet_address = hpet_tbl->addr.addrl | ((long)hpet_tbl->addr.addrh << 32); + hpet_address = vxtime.hpet_address; printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", hpet_tbl->id, vxtime.hpet_address); @@ -597,10 +598,10 @@ static int __init acpi_parse_hpet(unsign extern unsigned long hpet_address; hpet_address = hpet_tbl->addr.addrl; - printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", - hpet_tbl->id, hpet_address); } -#endif /* X86 */ +#endif /* X86 */ + printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", + hpet_tbl->id, hpet_address); return 0; } @@ -608,9 +609,8 @@ static int __init acpi_parse_hpet(unsign #define acpi_parse_hpet NULL #endif -#ifdef CONFIG_X86_PM_TIMER -extern u32 pmtmr_ioport; -#endif +u32 acpi_pmtmr_ioport; +int acpi_pmtmr_buggy; static int __init acpi_parse_fadt(unsigned long phys, unsigned long size) { @@ -629,7 +629,6 @@ static int __init acpi_parse_fadt(unsign acpi_fadt.force_apic_physical_destination_mode = fadt->force_apic_physical_destination_mode; -#ifdef CONFIG_X86_PM_TIMER /* detect the location of the ACPI PM Timer */ if (fadt->revision >= FADT2_REVISION_ID) { /* FADT rev. 2 */ @@ -637,22 +636,22 @@ static int __init acpi_parse_fadt(unsign ACPI_ADR_SPACE_SYSTEM_IO) return 0; - pmtmr_ioport = fadt->xpm_tmr_blk.address; + acpi_pmtmr_ioport = fadt->xpm_tmr_blk.address; /* * "X" fields are optional extensions to the original V1.0 * fields, so we must selectively expand V1.0 fields if the * corresponding X field is zero. */ - if (!pmtmr_ioport) - pmtmr_ioport = fadt->V1_pm_tmr_blk; + if (!acpi_pmtmr_ioport) + acpi_pmtmr_ioport = fadt->V1_pm_tmr_blk; } else { /* FADT rev. 1 */ - pmtmr_ioport = fadt->V1_pm_tmr_blk; + acpi_pmtmr_ioport = fadt->V1_pm_tmr_blk; } - if (pmtmr_ioport) - printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", - pmtmr_ioport); -#endif + + if (acpi_pmtmr_ioport) + printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", acpi_pmtmr_ioport); + return 0; } Index: linux.prev/arch/i386/kernel/apic.c =================================================================== --- linux.prev.orig/arch/i386/kernel/apic.c +++ linux.prev/arch/i386/kernel/apic.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -50,6 +51,23 @@ int enable_local_apic __initdata = 0; /* */ int apic_verbosity; +static unsigned int calibration_result; + +static void lapic_next_event(unsigned long evt); +static void lapic_timer_setup(int mode); + +static struct clock_event lapic_clockevent = { + .name = "lapic", + .capabilities = CLOCK_CAP_NEXTEVT | CLOCK_CAP_PROFILE | + CLOCK_HAS_IRQHANDLER +#ifdef CONFIG_SMP + | CLOCK_CAP_UPDATE +#endif + , + .shift = 32, + .set_mode = lapic_timer_setup, + .set_next_event = lapic_next_event, +}; static void apic_pm_activate(void); @@ -92,10 +110,6 @@ void __init apic_intr_init(void) /* Using APIC to generate smp_local_timer_interrupt? */ int using_apic_timer = 0; -static DEFINE_PER_CPU(int, prof_multiplier) = 1; -static DEFINE_PER_CPU(int, prof_old_multiplier) = 1; -static DEFINE_PER_CPU(int, prof_counter) = 1; - static int enabled_via_apicbase; void enable_NMI_through_LVT0 (void * dummy) @@ -567,13 +581,13 @@ void lapic_shutdown(void) if (!cpu_has_apic) return; - local_irq_disable(); + raw_local_irq_disable(); clear_local_APIC(); if (enabled_via_apicbase) disable_local_APIC(); - local_irq_enable(); + raw_local_irq_enable(); } #ifdef CONFIG_PM @@ -617,9 +631,9 @@ static int lapic_suspend(struct sys_devi apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); - local_irq_save(flags); + raw_local_irq_save(flags); disable_local_APIC(); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -631,7 +645,7 @@ static int lapic_resume(struct sys_devic if (!apic_pm_state.active) return 0; - local_irq_save(flags); + raw_local_irq_save(flags); /* * Make sure the APICBASE points to the right address @@ -662,7 +676,7 @@ static int lapic_resume(struct sys_devic apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -875,6 +889,11 @@ fake_ioapic_page: */ /* + * FIXME: Move this to i8253.h. There is no need to keep the access to + * the PIT scattered all around the place -tglx + */ + +/* * The timer chip is already set up at HZ interrupts per second here, * but we do not accept timer interrupts yet. We only allow the BP * to calibrate. @@ -932,12 +951,16 @@ void (*wait_timer_tick)(void) __devinitd #define APIC_DIVISOR 16 -static void __setup_APIC_LVTT(unsigned int clocks) +static void __setup_APIC_LVTT(unsigned int clocks, int oneshot) { unsigned int lvtt_value, tmp_value, ver; ver = GET_APIC_VERSION(apic_read(APIC_LVR)); - lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; + + lvtt_value = LOCAL_TIMER_VECTOR; + if (!oneshot) + lvtt_value |= APIC_LVT_TIMER_PERIODIC; + if (!APIC_INTEGRATED(ver)) lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); apic_write_around(APIC_LVTT, lvtt_value); @@ -950,23 +973,27 @@ static void __setup_APIC_LVTT(unsigned i & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | APIC_TDR_DIV_16); - apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); + if (!oneshot) + apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); } -static void __devinit setup_APIC_timer(unsigned int clocks) +static void lapic_next_event(unsigned long evt) { - unsigned long flags; - - local_irq_save(flags); + apic_write_around(APIC_TMICT, evt); +} - /* - * Wait for IRQ0's slice: - */ - wait_timer_tick(); +static void lapic_timer_setup(int mode) +{ + unsigned long flags; - __setup_APIC_LVTT(clocks); + raw_local_irq_save(flags); + __setup_APIC_LVTT(calibration_result, mode == CLOCK_EVT_ONESHOT); + raw_local_irq_restore(flags); +} - local_irq_restore(flags); +static void __devinit setup_APIC_timer(void) +{ + setup_local_clockevent(&lapic_clockevent, CPU_MASK_NONE); } /* @@ -975,6 +1002,8 @@ static void __devinit setup_APIC_timer(u * to calibrate, since some later bootup code depends on getting * the first irq? Ugh. * + * TODO: Fix this rather than saying "Ugh" -tglx + * * We want to do the calibration only once since we * want to have local timer irqs syncron. CPUs connected * by the same APIC bus have the very same bus frequency. @@ -997,7 +1026,7 @@ static int __init calibrate_APIC_clock(v * value into the APIC clock, we just want to get the * counter running for calibration. */ - __setup_APIC_LVTT(1000000000); + __setup_APIC_LVTT(1000000000, 0); /* * The timer chip counts down to zero. Let's wait @@ -1034,6 +1063,13 @@ static int __init calibrate_APIC_clock(v result = (tt1-tt2)*APIC_DIVISOR/LOOPS; + /* Calculate the scaled math multiplication factor */ + lapic_clockevent.mult = div_sc32(tt1-tt2, TICK_NSEC * LOOPS); + lapic_clockevent.max_delta_ns = + clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); + lapic_clockevent.min_delta_ns = + clockevent_delta2ns(0xF, &lapic_clockevent); + if (cpu_has_tsc) apic_printk(APIC_VERBOSE, "..... CPU clock speed is " "%ld.%04ld MHz.\n", @@ -1048,28 +1084,26 @@ static int __init calibrate_APIC_clock(v return result; } -static unsigned int calibration_result; - void __init setup_boot_APIC_clock(void) { unsigned long flags; apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"); using_apic_timer = 1; - local_irq_save(flags); + raw_local_irq_save(flags); calibration_result = calibrate_APIC_clock(); /* * Now set up the timer for real. */ - setup_APIC_timer(calibration_result); + setup_APIC_timer(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void __devinit setup_secondary_APIC_clock(void) { - setup_APIC_timer(calibration_result); + setup_APIC_timer(); } void __devinit disable_APIC_timer(void) @@ -1092,6 +1126,8 @@ void enable_APIC_timer(void) } } +static DEFINE_PER_CPU(int, prof_multiplier) = 1; + /* * the frequency of the profiling timer can be changed * by writing a multiplier value into /proc/profile. @@ -1119,60 +1155,6 @@ int setup_profiling_timer(unsigned int m return 0; } - -#undef APIC_DIVISOR - -/* - * Local timer interrupt handler. It does both profiling and - * process statistics/rescheduling. - * - * We do profiling in every local tick, statistics/rescheduling - * happen only every 'profiling multiplier' ticks. The default - * multiplier is 1 and it can be changed by writing the new multiplier - * value into /proc/profile. - */ - -inline void smp_local_timer_interrupt(struct pt_regs * regs) -{ - int cpu = smp_processor_id(); - - profile_tick(CPU_PROFILING, regs); - if (--per_cpu(prof_counter, cpu) <= 0) { - /* - * The multiplier may have changed since the last time we got - * to this point as a result of the user writing to - * /proc/profile. In this case we need to adjust the APIC - * timer accordingly. - * - * Interrupts are already masked off at this point. - */ - per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu); - if (per_cpu(prof_counter, cpu) != - per_cpu(prof_old_multiplier, cpu)) { - __setup_APIC_LVTT( - calibration_result/ - per_cpu(prof_counter, cpu)); - per_cpu(prof_old_multiplier, cpu) = - per_cpu(prof_counter, cpu); - } - -#ifdef CONFIG_SMP - update_process_times(user_mode_vm(regs)); -#endif - } - - /* - * We take the 'long' return path, and there every subsystem - * grabs the apropriate locks (kernel lock/ irq lock). - * - * we might want to decouple profiling from the 'long path', - * and do the profiling totally in assembly. - * - * Currently this isn't too much of an issue (performance wise), - * we can take more than 100K local irqs per second on a 100 MHz P5. - */ -} - /* * Local APIC timer interrupt. This is the most natural way for doing * local interrupts, but local timer interrupts can be emulated by @@ -1182,7 +1164,7 @@ inline void smp_local_timer_interrupt(st * interrupt as well. Thus we cannot inline the local irq ... ] */ -fastcall void smp_apic_timer_interrupt(struct pt_regs *regs) +fastcall notrace void smp_apic_timer_interrupt(struct pt_regs *regs) { int cpu = smp_processor_id(); @@ -1191,6 +1173,8 @@ fastcall void smp_apic_timer_interrupt(s */ per_cpu(irq_stat, cpu).apic_timer_irqs++; + trace_special(regs->eip, 0, 0); + /* * NOTE! We'd better ACK the irq immediately, * because timer handling can be slow. @@ -1202,7 +1186,17 @@ fastcall void smp_apic_timer_interrupt(s * interrupt lock, which is the WrongThing (tm) to do. */ irq_enter(); - smp_local_timer_interrupt(regs); + /* + * If the task is currently running in user mode, don't + * detect soft lockups. If CONFIG_DETECT_SOFTLOCKUP is not + * configured, this should be optimized out. + */ + if (user_mode(regs)) + touch_softlockup_watchdog(); + + if (lapic_clockevent.event_handler) + lapic_clockevent.event_handler(regs); + irq_exit(); } @@ -1257,6 +1251,7 @@ fastcall void smp_error_interrupt(struct */ printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n", smp_processor_id(), v , v1); + dump_stack(); irq_exit(); } Index: linux.prev/arch/i386/kernel/apm.c =================================================================== --- linux.prev.orig/arch/i386/kernel/apm.c +++ linux.prev/arch/i386/kernel/apm.c @@ -552,9 +552,9 @@ static inline void apm_restore_cpus(cpum */ #define APM_DO_CLI \ if (apm_info.allow_ints) \ - local_irq_enable(); \ + raw_local_irq_enable(); \ else \ - local_irq_disable(); + raw_local_irq_disable(); #ifdef APM_ZERO_SEGS # define APM_DECL_SEGS \ @@ -606,12 +606,12 @@ static u8 apm_bios_call(u32 func, u32 eb save_desc_40 = gdt[0x40 / 8]; gdt[0x40 / 8] = bad_bios_desc; - local_save_flags(flags); + raw_local_save_flags(flags); APM_DO_CLI; APM_DO_SAVE_SEGS; apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi); APM_DO_RESTORE_SEGS; - local_irq_restore(flags); + raw_local_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; put_cpu(); apm_restore_cpus(cpus); @@ -650,12 +650,12 @@ static u8 apm_bios_call_simple(u32 func, save_desc_40 = gdt[0x40 / 8]; gdt[0x40 / 8] = bad_bios_desc; - local_save_flags(flags); + raw_local_save_flags(flags); APM_DO_CLI; APM_DO_SAVE_SEGS; error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax); APM_DO_RESTORE_SEGS; - local_irq_restore(flags); + raw_local_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; put_cpu(); apm_restore_cpus(cpus); @@ -1215,7 +1215,7 @@ static int suspend(int vetoable) } device_suspend(PMSG_SUSPEND); - local_irq_disable(); + raw_local_irq_disable(); device_power_down(PMSG_SUSPEND); /* serialize with the timer interrupt */ @@ -1231,14 +1231,14 @@ static int suspend(int vetoable) */ spin_unlock(&i8253_lock); write_sequnlock(&xtime_lock); - local_irq_enable(); + raw_local_irq_enable(); save_processor_state(); err = set_system_power_state(APM_STATE_SUSPEND); ignore_normal_resume = 1; restore_processor_state(); - local_irq_disable(); + raw_local_irq_disable(); write_seqlock(&xtime_lock); spin_lock(&i8253_lock); reinit_timer(); @@ -1253,7 +1253,7 @@ static int suspend(int vetoable) apm_error("suspend", err); err = (err == APM_SUCCESS) ? 0 : -EIO; device_power_up(); - local_irq_enable(); + raw_local_irq_enable(); device_resume(); pm_send_all(PM_RESUME, (void *)0); queue_event(APM_NORMAL_RESUME, NULL); @@ -1272,22 +1272,22 @@ static void standby(void) { int err; - local_irq_disable(); + raw_local_irq_disable(); device_power_down(PMSG_SUSPEND); /* serialize with the timer interrupt */ write_seqlock(&xtime_lock); /* If needed, notify drivers here */ get_time_diff(); write_sequnlock(&xtime_lock); - local_irq_enable(); + raw_local_irq_enable(); err = set_system_power_state(APM_STATE_STANDBY); if ((err != APM_SUCCESS) && (err != APM_NO_ERROR)) apm_error("standby", err); - local_irq_disable(); + raw_local_irq_disable(); device_power_up(); - local_irq_enable(); + raw_local_irq_enable(); } static apm_event_t get_event(void) Index: linux.prev/arch/i386/kernel/cpu/cpufreq/longhaul.c =================================================================== --- linux.prev.orig/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ linux.prev/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -144,7 +144,7 @@ static void do_powersaver(union msr_long longhaul->bits.RevisionKey = 0; preempt_disable(); - local_irq_save(flags); + raw_local_irq_save(flags); /* * get current pci bus master state for all devices @@ -166,11 +166,11 @@ static void do_powersaver(union msr_long outb(0xFE,0x21); /* TMR0 only */ outb(0xFF,0x80); /* delay */ - safe_halt(); + raw_safe_halt(); wrmsrl(MSR_VIA_LONGHAUL, longhaul->val); halt(); - local_irq_disable(); + raw_local_irq_disable(); outb(tmp_mask,0x21); /* restore mask */ @@ -184,7 +184,7 @@ static void do_powersaver(union msr_long pci_write_config_byte(dev, PCI_COMMAND, pci_cmd); } } while (dev != NULL); - local_irq_restore(flags); + raw_local_irq_restore(flags); preempt_enable(); /* disable bus ratio bit */ @@ -245,16 +245,16 @@ static void longhaul_setstate(unsigned i /* Enable software clock multiplier */ bcr2.bits.ESOFTBF = 1; bcr2.bits.CLOCKMUL = clock_ratio_index; - local_irq_disable(); + raw_local_irq_disable(); wrmsrl (MSR_VIA_BCR2, bcr2.val); - safe_halt(); + raw_safe_halt(); /* Disable software clock multiplier */ rdmsrl (MSR_VIA_BCR2, bcr2.val); bcr2.bits.ESOFTBF = 0; - local_irq_disable(); + raw_local_irq_disable(); wrmsrl (MSR_VIA_BCR2, bcr2.val); - local_irq_enable(); + raw_local_irq_enable(); break; /* Index: linux.prev/arch/i386/kernel/cpu/mtrr/cyrix.c =================================================================== --- linux.prev.orig/arch/i386/kernel/cpu/mtrr/cyrix.c +++ linux.prev/arch/i386/kernel/cpu/mtrr/cyrix.c @@ -17,7 +17,7 @@ cyrix_get_arr(unsigned int reg, unsigned arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ /* Save flags and disable interrupts */ - local_irq_save(flags); + raw_local_irq_save(flags); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ @@ -28,7 +28,7 @@ cyrix_get_arr(unsigned int reg, unsigned setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ /* Enable interrupts if it was enabled previously */ - local_irq_restore(flags); + raw_local_irq_restore(flags); shift = ((unsigned char *) base)[1] & 0x0f; *base >>= PAGE_SHIFT; Index: linux.prev/arch/i386/kernel/cpu/mtrr/generic.c =================================================================== --- linux.prev.orig/arch/i386/kernel/cpu/mtrr/generic.c +++ linux.prev/arch/i386/kernel/cpu/mtrr/generic.c @@ -234,7 +234,7 @@ static unsigned long set_mtrr_state(u32 static unsigned long cr4 = 0; static u32 deftype_lo, deftype_hi; -static DEFINE_SPINLOCK(set_atomicity_lock); +static DEFINE_RAW_SPINLOCK(set_atomicity_lock); /* * Since we are disabling the cache don't allow any interrupts - they @@ -296,14 +296,14 @@ static void generic_set_all(void) unsigned long mask, count; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); prepare_set(); /* Actually set the state */ mask = set_mtrr_state(deftype_lo,deftype_hi); post_set(); - local_irq_restore(flags); + raw_local_irq_restore(flags); /* Use the atomic bitops to update the global mask */ for (count = 0; count < sizeof mask * 8; ++count) { @@ -331,7 +331,7 @@ static void generic_set_mtrr(unsigned in vr = &mtrr_state.var_ranges[reg]; - local_irq_save(flags); + raw_local_irq_save(flags); prepare_set(); if (size == 0) { @@ -350,7 +350,7 @@ static void generic_set_mtrr(unsigned in } post_set(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type) Index: linux.prev/arch/i386/kernel/cpu/mtrr/main.c =================================================================== --- linux.prev.orig/arch/i386/kernel/cpu/mtrr/main.c +++ linux.prev/arch/i386/kernel/cpu/mtrr/main.c @@ -146,7 +146,7 @@ static void ipi_handler(void *info) struct set_mtrr_data *data = info; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); atomic_dec(&data->count); while(!atomic_read(&data->gate)) @@ -164,7 +164,7 @@ static void ipi_handler(void *info) cpu_relax(); atomic_dec(&data->count); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #endif @@ -225,7 +225,7 @@ static void set_mtrr(unsigned int reg, u if (smp_call_function(ipi_handler, &data, 1, 0) != 0) panic("mtrr: timed out waiting for other CPUs\n"); - local_irq_save(flags); + raw_local_irq_save(flags); while(atomic_read(&data.count)) cpu_relax(); @@ -259,7 +259,7 @@ static void set_mtrr(unsigned int reg, u while(atomic_read(&data.count)) cpu_relax(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /** @@ -695,11 +695,11 @@ void mtrr_ap_init(void) * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to * prevent mtrr entry changes */ - local_irq_save(flags); + raw_local_irq_save(flags); mtrr_if->set_all(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static int __init mtrr_init_finialize(void) Index: linux.prev/arch/i386/kernel/cpu/mtrr/state.c =================================================================== --- linux.prev.orig/arch/i386/kernel/cpu/mtrr/state.c +++ linux.prev/arch/i386/kernel/cpu/mtrr/state.c @@ -12,7 +12,7 @@ void set_mtrr_prepare_save(struct set_mt unsigned int cr0; /* Disable interrupts locally */ - local_irq_save(ctxt->flags); + raw_local_irq_save(ctxt->flags); if (use_intel() || is_cpu(CYRIX)) { @@ -73,6 +73,6 @@ void set_mtrr_done(struct set_mtrr_conte write_cr4(ctxt->cr4val); } /* Re-enable interrupts locally (if enabled previously) */ - local_irq_restore(ctxt->flags); + raw_local_irq_restore(ctxt->flags); } Index: linux.prev/arch/i386/kernel/entry.S =================================================================== --- linux.prev.orig/arch/i386/kernel/entry.S +++ linux.prev/arch/i386/kernel/entry.S @@ -76,10 +76,10 @@ NT_MASK = 0x00004000 VM_MASK = 0x00020000 #ifdef CONFIG_PREEMPT -#define preempt_stop cli +# define preempt_stop cli #else -#define preempt_stop -#define resume_kernel restore_nocheck +# define preempt_stop +# define resume_kernel restore_nocheck #endif #define SAVE_ALL \ @@ -160,14 +160,17 @@ ENTRY(resume_userspace) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) cli + cmpl $0, kernel_preemption + jz restore_nocheck cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? jnz restore_nocheck need_resched: movl TI_flags(%ebp), %ecx # need_resched set ? testb $_TIF_NEED_RESCHED, %cl - jz restore_all + jz restore_nocheck testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ? - jz restore_all + jz restore_nocheck + cli call preempt_schedule_irq jmp need_resched #endif @@ -200,6 +203,11 @@ sysenter_past_esp: pushl %eax SAVE_ALL +#ifdef CONFIG_LATENCY_TRACE + pushl %edx; pushl %ecx; pushl %ebx; pushl %eax + call sys_call + popl %eax; popl %ebx; popl %ecx; popl %edx +#endif GET_THREAD_INFO(%ebp) /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ @@ -213,6 +221,11 @@ sysenter_past_esp: movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx jne syscall_exit_work +#ifdef CONFIG_LATENCY_TRACE + pushl %eax + call sys_ret + popl %eax +#endif /* if something modifies registers it must also disable sysexit */ movl EIP(%esp), %edx movl OLDESP(%esp), %ecx @@ -225,6 +238,11 @@ sysenter_past_esp: ENTRY(system_call) pushl %eax # save orig_eax SAVE_ALL +#ifdef CONFIG_LATENCY_TRACE + pushl %edx; pushl %ecx; pushl %ebx; pushl %eax + call sys_call + popl %eax; popl %ebx; popl %ecx; popl %edx +#endif GET_THREAD_INFO(%ebp) # system call tracing in operation / emulation /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ @@ -254,6 +272,17 @@ restore_all: cmpl $((4 << 8) | 3), %eax je ldt_ss # returning to user-space with LDT SS restore_nocheck: +#if defined(CONFIG_CRITICAL_IRQSOFF_TIMING) || defined(CONFIG_LATENCY_TRACE) + pushl %eax +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING + call trace_irqs_on +#endif +#ifdef CONFIG_LATENCY_TRACE + call sys_ret +#endif + popl %eax +#endif +restore_nocheck_nmi: RESTORE_REGS addl $4, %esp 1: iret @@ -297,18 +326,19 @@ ldt_ss: # perform work that needs to be done immediately before resumption ALIGN work_pending: - testb $_TIF_NEED_RESCHED, %cl + testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED), %ecx jz work_notifysig work_resched: - call schedule - cli # make sure we don't miss an interrupt + cli + call __schedule + # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret movl TI_flags(%ebp), %ecx andl $_TIF_WORK_MASK, %ecx # is there any work to be done other # than syscall tracing? jz restore_all - testb $_TIF_NEED_RESCHED, %cl + testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED), %ecx jnz work_resched work_notifysig: # deal with pending signals and @@ -351,6 +381,11 @@ syscall_trace_entry: syscall_exit_work: testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl jz work_pending +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING + pushl %eax + call trace_irqs_on + popl %eax +#endif sti # could let do_syscall_trace() call # schedule() instead movl %esp, %eax @@ -412,9 +447,16 @@ ENTRY(irq_entries_start) vector=vector+1 .endr +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING +# define TRACE_IRQS_OFF call trace_irqs_off_lowlevel; +#else +# define TRACE_IRQS_OFF +#endif + ALIGN common_interrupt: SAVE_ALL + TRACE_IRQS_OFF movl %esp,%eax call do_IRQ jmp ret_from_intr @@ -423,6 +465,7 @@ common_interrupt: ENTRY(name) \ pushl $nr-256; \ SAVE_ALL \ + TRACE_IRQS_OFF \ movl %esp,%eax; \ call smp_/**/name; \ jmp ret_from_intr; @@ -552,7 +595,7 @@ nmi_stack_correct: xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_nmi - jmp restore_all + jmp restore_nocheck_nmi nmi_stack_fixup: FIX_STACK(12,nmi_stack_correct, 1) Index: linux.prev/arch/i386/kernel/hpet.c =================================================================== --- /dev/null +++ linux.prev/arch/i386/kernel/hpet.c @@ -0,0 +1,69 @@ +#include +#include +#include +#include + +#include +#include + +#define HPET_MASK 0xFFFFFFFF +#define HPET_SHIFT 22 + +/* FSEC = 10^-15 NSEC = 10^-9 */ +#define FSEC_PER_NSEC 1000000 + +static void *hpet_ptr; + +static cycle_t read_hpet(void) +{ + return (cycle_t)readl(hpet_ptr); +} + +struct clocksource clocksource_hpet = { + .name = "hpet", + .rating = 250, + .read = read_hpet, + .mask = (cycle_t)HPET_MASK, + .mult = 0, /* set below */ + .shift = HPET_SHIFT, + .is_continuous = 1, +}; + +static int __init init_hpet_clocksource(void) +{ + unsigned long hpet_period; + void __iomem* hpet_base; + u64 tmp; + + if (!hpet_address) + return -ENODEV; + + /* calculate the hpet address: */ + hpet_base = + (void __iomem*)ioremap_nocache(hpet_address, HPET_MMAP_SIZE); + hpet_ptr = hpet_base + HPET_COUNTER; + + /* calculate the frequency: */ + hpet_period = readl(hpet_base + HPET_PERIOD); + + /* + * hpet period is in femto seconds per cycle + * so we need to convert this to ns/cyc units + * aproximated by mult/2^shift + * + * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift + * fsec/cyc * 1ns/1000000fsec * 2^shift = mult + * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult + * (fsec/cyc << shift)/1000000 = mult + * (hpet_period << shift)/FSEC_PER_NSEC = mult + */ + tmp = (u64)hpet_period << HPET_SHIFT; + do_div(tmp, FSEC_PER_NSEC); + clocksource_hpet.mult = (u32)tmp; + + register_clocksource(&clocksource_hpet); + + return 0; +} + +module_init(init_hpet_clocksource); Index: linux.prev/arch/i386/kernel/i386_ksyms.c =================================================================== --- linux.prev.orig/arch/i386/kernel/i386_ksyms.c +++ linux.prev/arch/i386/kernel/i386_ksyms.c @@ -6,10 +6,12 @@ /* This is definitely a GPL-only symbol */ EXPORT_SYMBOL_GPL(cpu_gdt_table); -EXPORT_SYMBOL(__down_failed); -EXPORT_SYMBOL(__down_failed_interruptible); -EXPORT_SYMBOL(__down_failed_trylock); -EXPORT_SYMBOL(__up_wakeup); +#ifdef CONFIG_ASM_SEMAPHORES +EXPORT_SYMBOL(__compat_down_failed); +EXPORT_SYMBOL(__compat_down_failed_interruptible); +EXPORT_SYMBOL(__compat_down_failed_trylock); +EXPORT_SYMBOL(__compat_up_wakeup); +#endif /* Networking helper routines. */ EXPORT_SYMBOL(csum_partial_copy_generic); @@ -25,7 +27,7 @@ EXPORT_SYMBOL(__put_user_8); EXPORT_SYMBOL(strpbrk); EXPORT_SYMBOL(strstr); -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) && defined(CONFIG_ASM_SEMAPHORES) extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); EXPORT_SYMBOL(__write_lock_failed); Index: linux.prev/arch/i386/kernel/i8253.c =================================================================== --- /dev/null +++ linux.prev/arch/i386/kernel/i8253.c @@ -0,0 +1,137 @@ +/* + * i8253.c 8253/PIT functions + * + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "io_ports.h" + +DEFINE_RAW_SPINLOCK(i8253_lock); +EXPORT_SYMBOL(i8253_lock); + +static void init_pit_timer(int mode) +{ + unsigned long flags; + + spin_lock_irqsave(&i8253_lock, flags); + + if (mode != CLOCK_EVT_ONESHOT) { + /* binary, mode 2, LSB/MSB, ch 0 */ + outb_p(0x34, PIT_MODE); + udelay(10); + outb_p(LATCH & 0xff , PIT_CH0); /* LSB */ + outb(LATCH >> 8 , PIT_CH0); /* MSB */ + } else { + /* One shot setup */ + outb_p(0x38, PIT_MODE); + udelay(10); + } + + spin_unlock_irqrestore(&i8253_lock, flags); +} + +static void pit_next_event(unsigned long evt) +{ + unsigned long flags; + + spin_lock_irqsave(&i8253_lock, flags); + outb_p(evt & 0xff , PIT_CH0); /* LSB */ + outb(evt >> 8 , PIT_CH0); /* MSB */ + spin_unlock_irqrestore(&i8253_lock, flags); +} + +static struct clock_event pit_clockevent = { + .name = "pit", + .capabilities = CLOCK_CAP_TICK +#ifndef CONFIG_SMP + | CLOCK_CAP_NEXTEVT | CLOCK_CAP_PROFILE | + CLOCK_CAP_UPDATE +#endif + , + .set_mode = init_pit_timer, + .set_next_event = pit_next_event, + .start_event = io_apic_timer_ack, + .end_event = mca_timer_ack, + .shift = 32, + .irq = 0, +}; + +void setup_pit_timer(void) +{ + pit_clockevent.mult = div_sc32(CLOCK_TICK_RATE, NSEC_PER_SEC); + pit_clockevent.max_delta_ns = + clockevent_delta2ns(0x7FFF, &pit_clockevent); + pit_clockevent.min_delta_ns = + clockevent_delta2ns(0xF, &pit_clockevent); + setup_global_clockevent(&pit_clockevent, CPU_MASK_NONE); +} + +/* + * Since the PIT overflows every tick, its not very useful + * to just read by itself. So use jiffies to emulate a free + * running counter: + */ +static cycle_t pit_read(void) +{ + unsigned long flags, seq; + int count; + u64 jifs; + + do { + seq = read_seqbegin(&xtime_lock); + + spin_lock_irqsave(&i8253_lock, flags); + outb_p(0x00, PIT_MODE); /* latch the count ASAP */ + count = inb_p(PIT_CH0); /* read the latched count */ + count |= inb_p(PIT_CH0) << 8; + + /* VIA686a test code... reset the latch if count > max + 1 */ + if (count > LATCH) { + outb_p(0x34, PIT_MODE); + outb_p(LATCH & 0xff, PIT_CH0); + outb(LATCH >> 8, PIT_CH0); + count = LATCH - 1; + } + spin_unlock_irqrestore(&i8253_lock, flags); + + jifs = jiffies_64; + } while (read_seqretry(&xtime_lock, seq)); + + jifs -= INITIAL_JIFFIES; + count = (LATCH-1) - count; + + return (cycle_t)(jifs * LATCH) + count; +} + +static struct clocksource clocksource_pit = { + .name = "pit", + .rating = 110, + .read = pit_read, + .mask = (cycle_t)-1, + .mult = 0, + .shift = 20, +}; + +static int __init init_pit_clocksource(void) +{ + if (num_possible_cpus() > 4) /* PIT does not scale! */ + return 0; + + clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20); + register_clocksource(&clocksource_pit); + + return 0; +} +module_init(init_pit_clocksource); Index: linux.prev/arch/i386/kernel/i8259.c =================================================================== --- linux.prev.orig/arch/i386/kernel/i8259.c +++ linux.prev/arch/i386/kernel/i8259.c @@ -35,7 +35,7 @@ * moves to arch independent land */ -DEFINE_SPINLOCK(i8259A_lock); +DEFINE_RAW_SPINLOCK(i8259A_lock); static void end_8259A_irq (unsigned int irq) { @@ -366,7 +366,7 @@ static irqreturn_t math_error_irq(int cp * New motherboards sometimes make IRQ 13 be a PCI interrupt, * so allow interrupt sharing. */ -static struct irqaction fpu_irq = { math_error_irq, 0, CPU_MASK_NONE, "fpu", NULL, NULL }; +static struct irqaction fpu_irq = { math_error_irq, SA_NODELAY, CPU_MASK_NONE, "fpu", NULL, NULL }; void __init init_ISA_irqs (void) { @@ -422,12 +422,6 @@ void __init init_IRQ(void) intr_init_hook(); /* - * Set the clock to HZ Hz, we already have a valid - * vector now: - */ - setup_pit_timer(); - - /* * External FPU? Set up irq13 if so, for * original braindamaged IBM FERR coupling. */ Index: linux.prev/arch/i386/kernel/init_task.c =================================================================== --- linux.prev.orig/arch/i386/kernel/init_task.c +++ linux.prev/arch/i386/kernel/init_task.c @@ -10,8 +10,8 @@ #include #include -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; +static struct fs_struct init_fs = INIT_FS(init_fs); +static struct files_struct init_files = INIT_FILES(init_files); static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); Index: linux.prev/arch/i386/kernel/io_apic.c =================================================================== --- linux.prev.orig/arch/i386/kernel/io_apic.c +++ linux.prev/arch/i386/kernel/io_apic.c @@ -49,7 +49,7 @@ atomic_t irq_mis_count; /* Where if anywhere is the i8259 connect in external int mode */ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; -static DEFINE_SPINLOCK(ioapic_lock); +static DEFINE_RAW_SPINLOCK(ioapic_lock); /* * Is the SiS APIC rmw bug present ? @@ -90,6 +90,27 @@ int vector_irq[NR_VECTORS] __read_mostly #define vector_to_irq(vector) (vector) #endif +static int timer_ack; + +void io_apic_timer_ack(void *priv) +{ + unsigned long flags; + + if (timer_ack) { + /* + * Subtle, when I/O APICs are used we have to ack timer IRQ + * manually to reset the IRR bit for do_slow_gettimeoffset(). + * This will also deassert NMI lines for the watchdog if run + * on an 82489DX-based system. + */ + spin_lock_irqsave(&i8259A_lock, flags); + outb(0x0c, PIC_MASTER_OCW3); + /* Ack the IRQ; AEOI will end it automatically. */ + inb(PIC_MASTER_POLL); + spin_unlock_irqrestore(&i8259A_lock, flags); + } +} + /* * The common case is 1:1 IRQ<->pin mappings. Sometimes there are * shared ISA-space IRQs, so we have to support them. We are super @@ -133,6 +154,105 @@ static void __init replace_pin_at_irq(un } } +//#define IOAPIC_CACHE + +#ifdef IOAPIC_CACHE +# define MAX_IOAPIC_CACHE 512 + +/* + * Cache register values: + */ +static unsigned int io_apic_cache[MAX_IO_APICS][MAX_IOAPIC_CACHE] + ____cacheline_aligned_in_smp; +#endif + +inline unsigned int __raw_io_apic_read(unsigned int apic, unsigned int reg) +{ + *IO_APIC_BASE(apic) = reg; + return *(IO_APIC_BASE(apic)+4); +} + +unsigned int raw_io_apic_read(unsigned int apic, unsigned int reg) +{ + unsigned int val = __raw_io_apic_read(apic, reg); + +#ifdef IOAPIC_CACHE + io_apic_cache[apic][reg] = val; +#endif + return val; +} + +unsigned int io_apic_read(unsigned int apic, unsigned int reg) +{ +#ifdef IOAPIC_CACHE + if (unlikely(reg >= MAX_IOAPIC_CACHE)) { + static int once = 1; + + if (once) { + once = 0; + printk("WARNING: ioapic register cache overflow: %d.\n", + reg); + dump_stack(); + } + return __raw_io_apic_read(apic, reg); + } + if (io_apic_cache[apic][reg] && !sis_apic_bug) + return io_apic_cache[apic][reg]; +#endif + return raw_io_apic_read(apic, reg); +} + +void io_apic_write(unsigned int apic, unsigned int reg, unsigned int val) +{ +#ifdef IOAPIC_CACHE + if (unlikely(reg >= MAX_IOAPIC_CACHE)) { + static int once = 1; + + if (once) { + once = 0; + printk("WARNING: ioapic register cache overflow: %d.\n", + reg); + dump_stack(); + } + } else + io_apic_cache[apic][reg] = val; +#endif + *IO_APIC_BASE(apic) = reg; + *(IO_APIC_BASE(apic)+4) = val; +} + +/* + * Some systems need a POST flush or else level-triggered interrupts + * generate lots of spurious interrupts due to the POST-ed write not + * reaching the IOAPIC before the IRQ is ACK-ed in the local APIC. + */ +#ifdef CONFIG_SMP +# define IOAPIC_POSTFLUSH +#endif + +/* + * Re-write a value: to be used for read-modify-write + * cycles where the read already set up the index register. + * + * Older SiS APIC requires we rewrite the index regiser + */ +void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val) +{ +#ifdef IOAPIC_CACHE + io_apic_cache[apic][reg] = val; +#endif + if (unlikely(sis_apic_bug)) + *IO_APIC_BASE(apic) = reg; + *(IO_APIC_BASE(apic)+4) = val; +#ifndef IOAPIC_POSTFLUSH + if (unlikely(sis_apic_bug)) +#endif + /* + * Force POST flush by reading: + */ + val = *(IO_APIC_BASE(apic)+4); +} + static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable) { struct irq_pin_list *entry = irq_2_pin + irq; @@ -164,18 +284,6 @@ static void __unmask_IO_APIC_irq (unsign __modify_IO_APIC_irq(irq, 0, 0x00010000); } -/* mask = 1, trigger = 0 */ -static void __mask_and_edge_IO_APIC_irq (unsigned int irq) -{ - __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); -} - -/* mask = 0, trigger = 1 */ -static void __unmask_and_level_IO_APIC_irq (unsigned int irq) -{ - __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); -} - static void mask_IO_APIC_irq (unsigned int irq) { unsigned long flags; @@ -1430,8 +1538,8 @@ void __init print_IO_APIC(void) struct IO_APIC_route_entry entry; spin_lock_irqsave(&ioapic_lock, flags); - *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2); - *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2); + *(((int *)&entry)+0) = raw_io_apic_read(apic, 0x10+i*2); + *(((int *)&entry)+1) = raw_io_apic_read(apic, 0x11+i*2); spin_unlock_irqrestore(&ioapic_lock, flags); printk(KERN_DEBUG " %02x %03X %02X ", @@ -1477,7 +1585,7 @@ void __init print_IO_APIC(void) return; } -#if 0 +#if 1 static void print_APIC_bitfield (int base) { @@ -1866,7 +1974,7 @@ static int __init timer_irq_works(void) { unsigned long t1 = jiffies; - local_irq_enable(); + raw_local_irq_enable(); /* Let ten ticks pass... */ mdelay((10 * 1000) / HZ); @@ -1877,7 +1985,7 @@ static int __init timer_irq_works(void) * might have cached one ExtINT interrupt. Finally, at * least one tick may be lost due to delays. */ - if (jiffies - t1 > 4) + if (jiffies - t1 > 4 && jiffies - t1 < 16) return 1; return 0; @@ -1930,9 +2038,11 @@ static unsigned int startup_edge_ioapic_ static void ack_edge_ioapic_irq(unsigned int irq) { move_irq(irq); +#if 0 if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) == (IRQ_PENDING | IRQ_DISABLED)) mask_IO_APIC_irq(irq); +#endif ack_APIC_irq(); } @@ -1957,6 +2067,30 @@ static unsigned int startup_level_ioapic return 0; /* don't check for pending */ } +#ifdef CONFIG_PREEMPT_HARDIRQS + +/* + * in the PREEMPT_HARDIRQS case we dont want to keep the local + * APIC unacked, because the prevents further interrupts from + * being handled - and with IRQ threads being delayed arbitrarily, + * that's unacceptable. So we first mask the IRQ, then ack it. + * The hardirq thread will then unmask it. + */ +static void mask_and_ack_level_ioapic_irq(unsigned int irq) +{ + move_irq(irq); + mask_IO_APIC_irq(irq); + ack_APIC_irq(); +} + +#else + +static void mask_and_ack_level_ioapic_irq(unsigned int irq) +{ +} + +#endif + static void end_level_ioapic_irq (unsigned int irq) { unsigned long v; @@ -1991,8 +2125,10 @@ static void end_level_ioapic_irq (unsign if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(irq); - __unmask_and_level_IO_APIC_irq(irq); + /* mask = 1, trigger = 0 */ + __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); + /* mask = 0, trigger = 1 */ + __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); spin_unlock(&ioapic_lock); } } @@ -2020,6 +2156,13 @@ static unsigned int startup_level_ioapic return startup_level_ioapic_irq (irq); } +static void mask_and_ack_level_ioapic_vector (unsigned int vector) +{ + int irq = vector_to_irq(vector); + + mask_and_ack_level_ioapic_irq(irq); +} + static void end_level_ioapic_vector (unsigned int vector) { int irq = vector_to_irq(vector); Index: linux.prev/arch/i386/kernel/irq.c =================================================================== --- linux.prev.orig/arch/i386/kernel/irq.c +++ linux.prev/arch/i386/kernel/irq.c @@ -51,7 +51,7 @@ static union irq_ctx *softirq_ctx[NR_CPU * SMP cross-CPU interrupts have their own specific * handlers). */ -fastcall unsigned int do_IRQ(struct pt_regs *regs) +fastcall notrace unsigned int do_IRQ(struct pt_regs *regs) { /* high bits used in ret_from_ code */ int irq = regs->orig_eax & 0xff; @@ -59,8 +59,12 @@ fastcall unsigned int do_IRQ(struct pt_r union irq_ctx *curctx, *irqctx; u32 *isp; #endif - irq_enter(); +#ifdef CONFIG_LATENCY_TRACE + if (irq == trace_user_trigger_irq) + user_trace_start(); +#endif + trace_special(regs->eip, irq, 0); #ifdef CONFIG_DEBUG_STACKOVERFLOW /* Debugging check for stack overflow: is there less than 1KB free? */ { @@ -69,7 +73,7 @@ fastcall unsigned int do_IRQ(struct pt_r __asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (THREAD_SIZE - 1)); if (unlikely(esp < (sizeof(struct thread_info) + STACK_WARN))) { - printk("do_IRQ: stack overflow: %ld\n", + printk("BUG: do_IRQ: stack overflow: %ld\n", esp - sizeof(struct thread_info)); dump_stack(); } @@ -173,7 +177,7 @@ asmlinkage void do_softirq(void) if (in_interrupt()) return; - local_irq_save(flags); + raw_local_irq_save(flags); if (local_softirq_pending()) { curctx = current_thread_info(); @@ -194,7 +198,7 @@ asmlinkage void do_softirq(void) ); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(do_softirq); @@ -224,8 +228,10 @@ int show_interrupts(struct seq_file *p, } if (i < NR_IRQS) { - spin_lock_irqsave(&irq_desc[i].lock, flags); - action = irq_desc[i].action; + irq_desc_t *desc = irq_desc + i; + + spin_lock_irqsave(&desc->lock, flags); + action = desc->action; if (!action) goto skip; seq_printf(p, "%3d: ",i); @@ -235,15 +241,27 @@ int show_interrupts(struct seq_file *p, for_each_online_cpu(j) seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); #endif - seq_printf(p, " %14s", irq_desc[i].handler->typename); + seq_printf(p, " %-14s", desc->handler->typename); +#define F(x,c) ((desc->status & x) ? c : '.') + seq_printf(p, " [%c%c%c%c%c%c%c%c%c/", + F(IRQ_INPROGRESS, 'I'), + F(IRQ_DISABLED, 'D'), + F(IRQ_PENDING, 'P'), + F(IRQ_REPLAY, 'R'), + F(IRQ_AUTODETECT, 'A'), + F(IRQ_WAITING, 'W'), + F(IRQ_LEVEL, 'L'), + F(IRQ_MASKED, 'M'), + F(IRQ_NODELAY, 'N')); +#undef F + seq_printf(p, "%3d]", desc->irqs_unhandled); seq_printf(p, " %s", action->name); - for (action=action->next; action; action = action->next) seq_printf(p, ", %s", action->name); seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + spin_unlock_irqrestore(&desc->lock, flags); } else if (i == NR_IRQS) { seq_printf(p, "NMI: "); for_each_online_cpu(j) @@ -298,9 +316,9 @@ void fixup_irqs(cpumask_t map) barrier(); #else /* That doesn't seem sufficient. Give it 1ms. */ - local_irq_enable(); + raw_local_irq_enable(); mdelay(1); - local_irq_disable(); + raw_local_irq_disable(); #endif } #endif Index: linux.prev/arch/i386/kernel/mca.c =================================================================== --- linux.prev.orig/arch/i386/kernel/mca.c +++ linux.prev/arch/i386/kernel/mca.c @@ -472,3 +472,22 @@ void mca_handle_nmi(void) mca_nmi_hook(); } /* mca_handle_nmi */ + +void mca_timer_ack(void *priv) +{ + int irq; + + if (MCA_bus) { + /* The PS/2 uses level-triggered interrupts. You can't + turn them off, nor would you want to (any attempt to + enable edge-triggered interrupts usually gets intercepted by a + special hardware circuit). Hence we have to acknowledge + the timer interrupt. Through some incredibly stupid + design idea, the reset for IRQ 0 is done by setting the + high bit of the PPI port B (0x61). Note that some PS/2s, + notably the 55SX, work fine if this is removed. */ + + irq = inb_p( 0x61 ); /* read the current state */ + outb_p( irq|0x80, 0x61 ); /* reset the IRQ */ + } +} Index: linux.prev/arch/i386/kernel/mcount-wrapper.S =================================================================== --- /dev/null +++ linux.prev/arch/i386/kernel/mcount-wrapper.S @@ -0,0 +1,27 @@ +/* + * linux/arch/i386/mcount-wrapper.S + * + * Copyright (C) 2004 Ingo Molnar + */ + +.globl mcount +mcount: + + cmpl $0, mcount_enabled + jz out + + push %ebp + mov %esp, %ebp + pushl %eax + pushl %ecx + pushl %edx + + call __mcount + + popl %edx + popl %ecx + popl %eax + popl %ebp +out: + ret + Index: linux.prev/arch/i386/kernel/microcode.c =================================================================== --- linux.prev.orig/arch/i386/kernel/microcode.c +++ linux.prev/arch/i386/kernel/microcode.c @@ -109,7 +109,7 @@ MODULE_LICENSE("GPL"); #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) /* serialize access to the physical write to MSR 0x79 */ -static DEFINE_SPINLOCK(microcode_update_lock); +static DEFINE_RAW_SPINLOCK(microcode_update_lock); /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ static DECLARE_MUTEX(microcode_sem); Index: linux.prev/arch/i386/kernel/nmi.c =================================================================== --- linux.prev.orig/arch/i386/kernel/nmi.c +++ linux.prev/arch/i386/kernel/nmi.c @@ -34,7 +34,7 @@ unsigned int nmi_watchdog = NMI_NONE; extern int unknown_nmi_panic; -static unsigned int nmi_hz = HZ; +static unsigned int nmi_hz = 1000; static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ static unsigned int nmi_p4_cccr_val; extern void show_registers(struct pt_regs *regs); @@ -108,7 +108,7 @@ int nmi_active; static __init void nmi_cpu_busy(void *data) { volatile int *endflag = data; - local_irq_enable(); + raw_local_irq_enable(); /* Intentionally don't use cpu_relax here. This is to make sure that the performance counter really ticks, even if there is a simulator or similar that catches the @@ -140,8 +140,8 @@ static int __init check_nmi_watchdog(voi for (cpu = 0; cpu < NR_CPUS; cpu++) prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; - local_irq_enable(); - mdelay((10*1000)/nmi_hz); // wait 10 ticks + raw_local_irq_enable(); + mdelay((100*1000)/nmi_hz); // wait 100 ticks for (cpu = 0; cpu < NR_CPUS; cpu++) { #ifdef CONFIG_SMP @@ -168,7 +168,7 @@ static int __init check_nmi_watchdog(voi /* now that we know it works we can reduce NMI frequency to something more reasonable; makes a difference in some configs */ if (nmi_watchdog == NMI_LOCAL_APIC) - nmi_hz = 1; + nmi_hz = 10000; kfree(prev_nmi_count); return 0; @@ -521,9 +521,34 @@ void touch_nmi_watchdog (void) extern void die_nmi(struct pt_regs *, const char *msg); -void nmi_watchdog_tick (struct pt_regs * regs) +int nmi_show_regs[NR_CPUS]; + +void nmi_show_all_regs(void) { + int i; + + if (nmi_watchdog == NMI_NONE) + return; + if (system_state != SYSTEM_RUNNING) { + printk("nmi_show_all_regs(): system state %d, not doing.\n", + system_state); + return; + } + printk("nmi_show_all_regs(): start on CPU#%d.\n", + raw_smp_processor_id()); + dump_stack(); + + for_each_online_cpu(i) + nmi_show_regs[i] = 1; + for_each_online_cpu(i) + while (nmi_show_regs[i] == 1) + barrier(); +} + +static DEFINE_RAW_SPINLOCK(nmi_print_lock); +void notrace nmi_watchdog_tick (struct pt_regs * regs) +{ /* * Since current_thread_info()-> is always on the stack, and we * always switch the stack NMI-atomically, it's safe to use @@ -531,7 +556,16 @@ void nmi_watchdog_tick (struct pt_regs * */ int sum, cpu = smp_processor_id(); - sum = per_cpu(irq_stat, cpu).apic_timer_irqs; + sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_irqs(0); + + profile_tick(CPU_PROFILING, regs); + if (nmi_show_regs[cpu]) { + nmi_show_regs[cpu] = 0; + spin_lock(&nmi_print_lock); + printk("NMI show regs on CPU#%d:\n", cpu); + show_regs(regs); + spin_unlock(&nmi_print_lock); + } if (last_irq_sums[cpu] == sum) { /* @@ -539,12 +573,25 @@ void nmi_watchdog_tick (struct pt_regs * * wait a few IRQs (5 seconds) before doing the oops ... */ alert_counter[cpu]++; - if (alert_counter[cpu] == 5*nmi_hz) - /* - * die_nmi will return ONLY if NOTIFY_STOP happens.. - */ - die_nmi(regs, "NMI Watchdog detected LOCKUP"); + if (alert_counter[cpu] && !(alert_counter[cpu] % (5*nmi_hz))) { + int i; + + bust_spinlocks(1); + spin_lock(&nmi_print_lock); + printk("NMI watchdog detected lockup on CPU#%d (%d/%d)\n", cpu, alert_counter[cpu], 5*nmi_hz); + show_regs(regs); + spin_unlock(&nmi_print_lock); + + for_each_online_cpu(i) + if (i != cpu) + nmi_show_regs[i] = 1; + for_each_online_cpu(i) + while (nmi_show_regs[i] == 1) + barrier(); + die_nmi(regs, "NMI Watchdog detected LOCKUP"); + } + } else { last_irq_sums[cpu] = sum; alert_counter[cpu] = 0; } Index: linux.prev/arch/i386/kernel/process.c =================================================================== --- linux.prev.orig/arch/i386/kernel/process.c +++ linux.prev/arch/i386/kernel/process.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -64,6 +65,12 @@ static int hlt_counter; unsigned long boot_option_idle_override = 0; EXPORT_SYMBOL(boot_option_idle_override); +DEFINE_SPINLOCK(pm_idle_switch_lock); +EXPORT_SYMBOL_GPL(pm_idle_switch_lock); + +int pm_idle_locked = 0; +EXPORT_SYMBOL_GPL(pm_idle_locked); + /* * Return saved PC of a blocked thread. */ @@ -99,21 +106,21 @@ EXPORT_SYMBOL(enable_hlt); */ void default_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); if (!hlt_counter && boot_cpu_data.hlt_works_ok) { clear_thread_flag(TIF_POLLING_NRFLAG); smp_mb__after_clear_bit(); - while (!need_resched()) { - local_irq_disable(); - if (!need_resched()) - safe_halt(); + while (!need_resched() && !need_resched_delayed()) { + raw_local_irq_disable(); + if (!need_resched() && !need_resched_delayed()) + raw_safe_halt(); else - local_irq_enable(); + raw_local_irq_enable(); } set_thread_flag(TIF_POLLING_NRFLAG); } else { - while (!need_resched()) + while (!need_resched() && !need_resched_delayed()) cpu_relax(); } } @@ -126,16 +133,17 @@ EXPORT_SYMBOL(default_idle); * to poll the ->work.need_resched flag instead of waiting for the * cross-CPU IPI to arrive. Use this option with caution. */ -static void poll_idle (void) +void poll_idle (void) { - local_irq_enable(); + raw_local_irq_enable(); asm volatile( "2:" "testl %0, %1;" "rep; nop;" "je 2b;" - : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags)); + : : "i"(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_DELAYED), + "m" (current_thread_info()->flags)); } #ifdef CONFIG_HOTPLUG_CPU @@ -153,7 +161,7 @@ static inline void play_dead(void) /* * With physical CPU hotplug, we should halt the cpu */ - local_irq_disable(); + raw_local_irq_disable(); while (1) halt(); } @@ -178,7 +186,9 @@ void cpu_idle(void) /* endless idle loop with no priority at all */ while (1) { - while (!need_resched()) { + BUG_ON(raw_irqs_disabled()); + + while (!need_resched() && !need_resched_delayed()) { void (*idle)(void); if (__get_cpu_var(cpu_idle_state)) @@ -196,9 +206,11 @@ void cpu_idle(void) __get_cpu_var(irq_stat).idle_timestamp = jiffies; idle(); } - preempt_enable_no_resched(); - schedule(); + raw_local_irq_disable(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); + raw_local_irq_enable(); } } @@ -239,12 +251,12 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); */ static void mwait_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); - while (!need_resched()) { + while (!need_resched() && !need_resched_delayed()) { __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); - if (need_resched()) + if (need_resched() || need_resched_delayed()) break; __mwait(0, 0); } @@ -372,11 +384,16 @@ void exit_thread(void) /* The process may have allocated an io port bitmap... nuke it. */ if (unlikely(NULL != t->io_bitmap_ptr)) { - int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + int cpu; + struct tss_struct *tss; + void *io_bitmap_ptr = t->io_bitmap_ptr; - kfree(t->io_bitmap_ptr); t->io_bitmap_ptr = NULL; + mb(); + kfree(io_bitmap_ptr); + + cpu = get_cpu(); + tss = &per_cpu(init_tss, cpu); /* * Careful, clear this in the TSS too: */ Index: linux.prev/arch/i386/kernel/reboot.c =================================================================== --- linux.prev.orig/arch/i386/kernel/reboot.c +++ linux.prev/arch/i386/kernel/reboot.c @@ -202,7 +202,7 @@ void machine_real_restart(unsigned char { unsigned long flags; - local_irq_disable(); + raw_local_irq_disable(); /* Write zero to CMOS register number 0x0f, which the BIOS POST routine will recognize as telling it to do a proper reboot. (Well Index: linux.prev/arch/i386/kernel/semaphore.c =================================================================== --- linux.prev.orig/arch/i386/kernel/semaphore.c +++ linux.prev/arch/i386/kernel/semaphore.c @@ -13,6 +13,7 @@ * rw semaphores implemented November 1999 by Benjamin LaHaise */ #include +#include #include /* @@ -28,15 +29,15 @@ asm( ".section .sched.text\n" ".align 4\n" -".globl __down_failed\n" -"__down_failed:\n\t" +".globl __compat_down_failed\n" +"__compat_down_failed:\n\t" #if defined(CONFIG_FRAME_POINTER) "pushl %ebp\n\t" "movl %esp,%ebp\n\t" #endif "pushl %edx\n\t" "pushl %ecx\n\t" - "call __down\n\t" + "call __compat_down\n\t" "popl %ecx\n\t" "popl %edx\n\t" #if defined(CONFIG_FRAME_POINTER) @@ -49,15 +50,15 @@ asm( asm( ".section .sched.text\n" ".align 4\n" -".globl __down_failed_interruptible\n" -"__down_failed_interruptible:\n\t" +".globl __compat_down_failed_interruptible\n" +"__compat_down_failed_interruptible:\n\t" #if defined(CONFIG_FRAME_POINTER) "pushl %ebp\n\t" "movl %esp,%ebp\n\t" #endif "pushl %edx\n\t" "pushl %ecx\n\t" - "call __down_interruptible\n\t" + "call __compat_down_interruptible\n\t" "popl %ecx\n\t" "popl %edx\n\t" #if defined(CONFIG_FRAME_POINTER) @@ -70,15 +71,15 @@ asm( asm( ".section .sched.text\n" ".align 4\n" -".globl __down_failed_trylock\n" -"__down_failed_trylock:\n\t" +".globl __compat_down_failed_trylock\n" +"__compat_down_failed_trylock:\n\t" #if defined(CONFIG_FRAME_POINTER) "pushl %ebp\n\t" "movl %esp,%ebp\n\t" #endif "pushl %edx\n\t" "pushl %ecx\n\t" - "call __down_trylock\n\t" + "call __compat_down_trylock\n\t" "popl %ecx\n\t" "popl %edx\n\t" #if defined(CONFIG_FRAME_POINTER) @@ -91,45 +92,13 @@ asm( asm( ".section .sched.text\n" ".align 4\n" -".globl __up_wakeup\n" -"__up_wakeup:\n\t" +".globl __compat_up_wakeup\n" +"__compat_up_wakeup:\n\t" "pushl %edx\n\t" "pushl %ecx\n\t" - "call __up\n\t" + "call __compat_up\n\t" "popl %ecx\n\t" "popl %edx\n\t" "ret" ); -/* - * rw spinlock fallbacks - */ -#if defined(CONFIG_SMP) -asm( -".section .sched.text\n" -".align 4\n" -".globl __write_lock_failed\n" -"__write_lock_failed:\n\t" - LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax)\n" -"1: rep; nop\n\t" - "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" - "jne 1b\n\t" - LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" - "jnz __write_lock_failed\n\t" - "ret" -); - -asm( -".section .sched.text\n" -".align 4\n" -".globl __read_lock_failed\n" -"__read_lock_failed:\n\t" - LOCK "incl (%eax)\n" -"1: rep; nop\n\t" - "cmpl $1,(%eax)\n\t" - "js 1b\n\t" - LOCK "decl (%eax)\n\t" - "js __read_lock_failed\n\t" - "ret" -); -#endif Index: linux.prev/arch/i386/kernel/setup.c =================================================================== --- linux.prev.orig/arch/i386/kernel/setup.c +++ linux.prev/arch/i386/kernel/setup.c @@ -1620,6 +1620,7 @@ void __init setup_arch(char **cmdline_p) conswitchp = &dummy_con; #endif #endif + tsc_init(); } #include "setup_arch_post.h" Index: linux.prev/arch/i386/kernel/signal.c =================================================================== --- linux.prev.orig/arch/i386/kernel/signal.c +++ linux.prev/arch/i386/kernel/signal.c @@ -604,6 +604,13 @@ int fastcall do_signal(struct pt_regs *r int signr; struct k_sigaction ka; +#ifdef CONFIG_PREEMPT_RT + /* + * Fully-preemptible kernel does not need interrupts disabled: + */ + raw_local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which * is why we may in certain cases get here from Index: linux.prev/arch/i386/kernel/smp.c =================================================================== --- linux.prev.orig/arch/i386/kernel/smp.c +++ linux.prev/arch/i386/kernel/smp.c @@ -163,7 +163,7 @@ void send_IPI_mask_bitmask(cpumask_t cpu unsigned long cfg; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); /* * Wait for idle. @@ -186,7 +186,7 @@ void send_IPI_mask_bitmask(cpumask_t cpu */ apic_write_around(APIC_ICR, cfg); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void send_IPI_mask_sequence(cpumask_t mask, int vector) @@ -200,7 +200,7 @@ void send_IPI_mask_sequence(cpumask_t ma * should be modified to do 1 message per cluster ID - mbligh */ - local_irq_save(flags); + raw_local_irq_save(flags); for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) { if (cpu_isset(query_cpu, mask)) { @@ -227,7 +227,7 @@ void send_IPI_mask_sequence(cpumask_t ma apic_write_around(APIC_ICR, cfg); } } - local_irq_restore(flags); + raw_local_irq_restore(flags); } #include /* must come after the send_IPI functions above for inlining */ @@ -245,7 +245,7 @@ void send_IPI_mask_sequence(cpumask_t ma static cpumask_t flush_cpumask; static struct mm_struct * flush_mm; static unsigned long flush_va; -static DEFINE_SPINLOCK(tlbstate_lock); +static DEFINE_RAW_SPINLOCK(tlbstate_lock); #define FLUSH_ALL 0xffffffff /* @@ -390,7 +390,7 @@ static void flush_tlb_others(cpumask_t c while (!cpus_empty(flush_cpumask)) /* nothing. lockup detection does not belong here */ - mb(); + cpu_relax(); flush_mm = NULL; flush_va = 0; @@ -481,10 +481,20 @@ void smp_send_reschedule(int cpu) } /* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them: + */ +void smp_send_reschedule_allbutself(void) +{ + send_IPI_allbutself(RESCHEDULE_VECTOR); +} + +/* * Structure and data for smp_call_function(). This is designed to minimise * static memory requirements. It also looks cleaner. */ -static DEFINE_SPINLOCK(call_lock); +static DEFINE_RAW_SPINLOCK(call_lock); struct call_data_struct { void (*func) (void *info); @@ -538,7 +548,7 @@ int smp_call_function (void (*func) (voi } /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); + WARN_ON(raw_irqs_disabled()); data.func = func; data.info = info; @@ -572,7 +582,7 @@ static void stop_this_cpu (void * dummy) * Remove this CPU: */ cpu_clear(smp_processor_id(), cpu_online_map); - local_irq_disable(); + raw_local_irq_disable(); disable_local_APIC(); if (cpu_data[smp_processor_id()].hlt_works_ok) for(;;) halt(); @@ -587,19 +597,20 @@ void smp_send_stop(void) { smp_call_function(stop_this_cpu, NULL, 1, 0); - local_irq_disable(); + raw_local_irq_disable(); disable_local_APIC(); - local_irq_enable(); + raw_local_irq_enable(); } /* - * Reschedule call back. Nothing to do, - * all the work is done automatically when - * we return from the interrupt. + * Reschedule call back. Trigger a reschedule pass so that + * RT-overload balancing can pass tasks around. */ -fastcall void smp_reschedule_interrupt(struct pt_regs *regs) +fastcall notrace void smp_reschedule_interrupt(struct pt_regs *regs) { + trace_special(regs->eip, 0, 0); ack_APIC_irq(); + set_tsk_need_resched(current); } fastcall void smp_call_function_interrupt(struct pt_regs *regs) Index: linux.prev/arch/i386/kernel/smpboot.c =================================================================== --- linux.prev.orig/arch/i386/kernel/smpboot.c +++ linux.prev/arch/i386/kernel/smpboot.c @@ -212,142 +212,299 @@ valid_k7: ; } -/* - * TSC synchronization. - * - * We first check whether all CPUs have their TSC's synchronized, - * then we print a warning if not, and always resync. - */ +static atomic_t tsc_start_flag, tsc_check_start, tsc_check_stop; -static atomic_t tsc_start_flag = ATOMIC_INIT(0); -static atomic_t tsc_count_start = ATOMIC_INIT(0); -static atomic_t tsc_count_stop = ATOMIC_INIT(0); -static unsigned long long tsc_values[NR_CPUS]; - -#define NR_LOOPS 5 - -static void __init synchronize_tsc_bp (void) +static int __init check_tsc_warp(void) { - int i; - unsigned long long t0; - unsigned long long sum, avg; - long long delta; - unsigned int one_usec; - int buggy = 0; - - printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus()); - - /* convert from kcyc/sec to cyc/usec */ - one_usec = cpu_khz / 1000; + static DEFINE_RAW_SPINLOCK(warp_lock); + static long long prev; + static unsigned int error; - atomic_set(&tsc_start_flag, 1); - wmb(); + int cpus = num_booting_cpus(), nr = 0; + long long start, now, end, delta; + atomic_inc(&tsc_check_start); + while (atomic_read(&tsc_check_start) != cpus) + cpu_relax(); /* - * We loop a few times to get a primed instruction cache, - * then the last pass is more or less synchronized and - * the BP and APs set their cycle counters to zero all at - * once. This reduces the chance of having random offsets - * between the processors, and guarantees that the maximum - * delay between the cycle counters is never bigger than - * the latency of information-passing (cachelines) between - * two CPUs. + * Run the check for 500 msecs: */ - for (i = 0; i < NR_LOOPS; i++) { - /* - * all APs synchronize but they loop on '== num_cpus' - */ - while (atomic_read(&tsc_count_start) != num_booting_cpus()-1) - mb(); - atomic_set(&tsc_count_stop, 0); - wmb(); - /* - * this lets the APs save their current TSC: - */ - atomic_inc(&tsc_count_start); + rdtscll(start); + end = start + cpu_khz*500; - rdtscll(tsc_values[smp_processor_id()]); + for (;;) { /* - * We clear the TSC in the last loop: + * Check for the TSC going backwards (between CPUs): */ - if (i == NR_LOOPS-1) - write_tsc(0, 0); + spin_lock(&warp_lock); + rdtscll(now); + delta = now - prev; + prev = now; + spin_unlock(&warp_lock); + if (unlikely(delta < 0)) + error = 1; + if (now > end) + break; /* - * Wait for all APs to leave the synchronization point: + * Take it easy every couple of iterations, + * to not starve other CPUs: */ - while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1) - mb(); - atomic_set(&tsc_count_start, 0); - wmb(); - atomic_inc(&tsc_count_stop); + nr++; + if (!(nr % 31)) + cpu_relax(); } - sum = 0; - for (i = 0; i < NR_CPUS; i++) { - if (cpu_isset(i, cpu_callout_map)) { - t0 = tsc_values[i]; - sum += t0; - } - } - avg = sum; - do_div(avg, num_booting_cpus()); + atomic_inc(&tsc_check_stop); + while (atomic_read(&tsc_check_stop) != cpus) + cpu_relax(); - sum = 0; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_isset(i, cpu_callout_map)) - continue; - delta = tsc_values[i] - avg; - if (delta < 0) - delta = -delta; - /* - * We report bigger than 2 microseconds clock differences. - */ - if (delta > 2*one_usec) { - long realdelta; - if (!buggy) { - buggy = 1; - printk("\n"); - } - realdelta = delta; - do_div(realdelta, one_usec); - if (tsc_values[i] < avg) - realdelta = -realdelta; + return error; +} - printk(KERN_INFO "CPU#%d had %ld usecs TSC skew, fixed it up.\n", i, realdelta); - } +/* + * TSC synchronization based on ia64 itc synchronization code. Synchronize + * pairs of processors rahter than tring to synchronize all of the processors + * with a single event. When several processors are all waiting for an + * event they don't all see it at the same time. The write will cause + * an invalidate on each processors cache and then they all scramble to + * re-read that cache line. + * + * Writing the TSC resets the upper 32-bits, so we need to be careful + * that all of the cpus can be synchronized before we overflow the + * 32-bit count. + */ - sum += delta; +#define MASTER 0 +#define SLAVE (SMP_CACHE_BYTES/sizeof(long)) + +#define NUM_ROUNDS 64 /* magic value */ +#define NUM_ITERS 5 /* likewise */ + +static volatile unsigned long go[2*SLAVE] __cacheline_aligned; +static volatile int current_slave = -1; +static volatile int tsc_sync_complete = 0; +static volatile int tsc_adj_latency = 0; +static unsigned int max_rt = 0; +static unsigned int max_delta = 0; + +#define DEBUG_TSC_SYNC 0 +#if DEBUG_TSC_SYNC +struct tsc_sync_debug { + long rt; /* roundtrip time */ + long master; /* master's timestamp */ + long diff; /* difference between midpoint and master's timestamp */ + long lat; /* estimate of tsc adjustment latency */ +} tsc_sync_debug[NUM_ROUNDS*NR_CPUS]; +#endif + +void +sync_master(void) +{ + unsigned long n, tsc, last_go_master; + + last_go_master = 0; + while (1) { + while ((n = go[MASTER]) == last_go_master) + rep_nop(); + if (n == ~0) + break; + rdtscl(tsc); + if (unlikely(!tsc)) + tsc = 1; + go[SLAVE] = tsc; + last_go_master = n; } - if (!buggy) - printk("passed.\n"); } -static void __init synchronize_tsc_ap (void) +/* + * Return the number of cycles by which our TSC differs from the TSC on + * the master (time-keeper) CPU. A positive number indicates our TSC is + * ahead of the master, negative that it is behind. + */ +static inline long +get_delta (long *rt, long *master) { - int i; + unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0; + unsigned long tcenter, t0, t1, tm, last_go_slave; + long i; + + last_go_slave = go[SLAVE]; + for (i = 0; i < NUM_ITERS; ++i) { + rdtscl(t0); + go[MASTER] = i+1; + while ((tm = go[SLAVE]) == last_go_slave) + rep_nop(); + rdtscl(t1); + + if (t1 - t0 < best_t1 - best_t0) + best_t0 = t0, best_t1 = t1, best_tm = tm; + last_go_slave = tm; + } + + *rt = best_t1 - best_t0; + *master = best_tm - best_t0; + + /* average best_t0 and best_t1 without overflow: */ + tcenter = (best_t0/2 + best_t1/2); + if (best_t0 % 2 + best_t1 % 2 == 2) + ++tcenter; + return tcenter - best_tm; +} + +/* + * Synchronize TSC of the current (slave) CPU with the TSC of the MASTER CPU + * (normally the time-keeper CPU). We use a closed loop to eliminate the + * possibility of unaccounted-for errors (such as getting a machine check in + * the middle of a calibration step). The basic idea is for the slave to ask + * the master what TSC value it has and to read its own TSC before and after + * the master responds. Each iteration gives us three + * timestamps: + * + * slave master + * + * t0 ---\ + * ---\ + * ---> + * tm + * /--- + * /--- + * t1 <--- + * + * + * The goal is to adjust the slave's TSC such that tm falls exactly half-way + * between t0 and t1. If we achieve this, the clocks are synchronized provided + * the interconnect between the slave and the master is symmetric. Even if the + * interconnect were asymmetric, we would still know that the synchronization + * error is smaller than the roundtrip latency (t0 - t1). + * + * When the interconnect is quiet and symmetric, this lets us synchronize the + * TSC to within one or two cycles. However, we can only *guarantee* that the + * synchronization is accurate to within a round-trip time, which is typically + * in the range of several hundred cycles (e.g., ~500 cycles). In practice, + * this means that the TSC's are usually almost perfectly synchronized, but we + * shouldn't assume that the accuracy is much better than half a micro second + * or so. + */ + +static void __init +synchronize_tsc_ap (void) +{ + long i, delta, adj, adjust_latency, n_rounds; + unsigned long rt, master_time_stamp, tsc; +#if DEBUG_TSC_SYNC + struct tsc_sync_debug *t = + &tsc_sync_debug[smp_processor_id() * NUM_ROUNDS]; +#endif + + while (!atomic_read(&tsc_start_flag)) + mb(); + + if (!check_tsc_warp()) + return; /* - * Not every cpu is online at the time - * this gets called, so we first wait for the BP to - * finish SMP initialization: + * Wait for our turn to synchronize with the boot processor. */ - while (!atomic_read(&tsc_start_flag)) mb(); + while (current_slave != smp_processor_id()) + rep_nop(); + adjust_latency = tsc_adj_latency; + + go[SLAVE] = 0; + go[MASTER] = 0; + write_tsc(0,0); + for (i = 0; i < NUM_ROUNDS; ++i) { + delta = get_delta(&rt, &master_time_stamp); + if (delta == 0) + break; + + if (i > 0) + adjust_latency += -delta; + adj = -delta + adjust_latency/8; + rdtscl(tsc); + write_tsc(tsc + adj, 0); +#if DEBUG_TSC_SYNC + t[i].rt = rt; + t[i].master = master_time_stamp; + t[i].diff = delta; + t[i].lat = adjust_latency/8; +#endif + } + n_rounds = i; + go[MASTER] = ~0; + +#if (DEBUG_TSC_SYNC == 2) + for (i = 0; i < n_rounds; ++i) + printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n", + t[i].rt, t[i].master, t[i].diff, t[i].lat); + + printk("CPU %d: synchronized TSC (last diff %ld cycles, maxerr %lu cycles)\n", + smp_processor_id(), delta, rt); + + printk("It took %ld rounds\n", n_rounds); +#endif + if (rt > max_rt) + max_rt = rt; + if (delta < 0) + delta = -delta; + if (delta > max_delta) + max_delta = delta; + tsc_adj_latency = adjust_latency; + current_slave = -1; + while (!tsc_sync_complete) + rep_nop(); +} + +/* + * The boot processor set its own TSC to zero and then gives each + * slave processor the chance to synchronize itself. + */ - for (i = 0; i < NR_LOOPS; i++) { - atomic_inc(&tsc_count_start); - while (atomic_read(&tsc_count_start) != num_booting_cpus()) - mb(); +static void __init synchronize_tsc_bp (void) +{ + unsigned int tsc_low, tsc_high, error; + int cpu; + + atomic_set(&tsc_start_flag, 1); - rdtscll(tsc_values[smp_processor_id()]); - if (i == NR_LOOPS-1) - write_tsc(0, 0); + printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", + num_booting_cpus()); - atomic_inc(&tsc_count_stop); - while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb(); + if (!check_tsc_warp()) { + printk("passed.\n"); + return; + } + printk("failed.\n"); + + printk(KERN_INFO "starting TSC synchronization\n"); + write_tsc(0, 0); + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + if (!cpu_isset(cpu, cpu_callout_map)) + continue; + if (cpu == smp_processor_id()) + continue; + go[MASTER] = 0; + current_slave = cpu; + sync_master(); + while (current_slave != -1) + rep_nop(); + } + rdtsc(tsc_low, tsc_high); + if (tsc_high) + printk("TSC overflowed during synchronization\n"); + else + printk("TSC synchronization complete max_delta=%d cycles\n", + max_delta); + if (max_rt < 4293) { + error = (max_rt * 1000000)/cpu_khz; + printk("TSC sync round-trip time %d.%03d microseconds\n", + error/1000, error%1000); + } else { + printk("TSC sync round-trip time %d cycles\n", max_rt); } + tsc_sync_complete = 1; } -#undef NR_LOOPS extern void calibrate_delay(void); @@ -547,7 +704,7 @@ static void __devinit start_secondary(vo per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; /* We can take interrupts now: we're officially "up". */ - local_irq_enable(); + raw_local_irq_enable(); wmb(); cpu_idle(); @@ -1340,9 +1497,9 @@ int __cpu_disable(void) clear_local_APIC(); /* Allow any queued timer interrupts to get serviced */ - local_irq_enable(); + raw_local_irq_enable(); mdelay(1); - local_irq_disable(); + raw_local_irq_disable(); remove_siblinginfo(cpu); @@ -1386,11 +1543,11 @@ int __devinit __cpu_up(unsigned int cpu) /* In case one didn't come up */ if (!cpu_isset(cpu, cpu_callin_map)) { printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu); - local_irq_enable(); + raw_local_irq_enable(); return -EIO; } - local_irq_enable(); + raw_local_irq_enable(); per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; /* Unleash the CPU! */ cpu_set(cpu, smp_commenced_mask); Index: linux.prev/arch/i386/kernel/switch2poll.c =================================================================== --- /dev/null +++ linux.prev/arch/i386/kernel/switch2poll.c @@ -0,0 +1,5 @@ +/* + * Same type of hack used for early_printk. This keeps the code + * in one place. + */ +#include "../../x86_64/kernel/switch2poll.c" Index: linux.prev/arch/i386/kernel/time.c =================================================================== --- linux.prev.orig/arch/i386/kernel/time.c +++ linux.prev/arch/i386/kernel/time.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -56,6 +57,7 @@ #include #include #include +#include #include "mach_time.h" @@ -79,16 +81,9 @@ EXPORT_SYMBOL(cpu_khz); extern unsigned long wall_jiffies; -DEFINE_SPINLOCK(rtc_lock); +DEFINE_RAW_SPINLOCK(rtc_lock); EXPORT_SYMBOL(rtc_lock); -#include - -DEFINE_SPINLOCK(i8253_lock); -EXPORT_SYMBOL(i8253_lock); - -struct timer_opts *cur_timer __read_mostly = &timer_none; - /* * This is a special lock that is owned by the CPU and holds the index * register we are working with. It is required for NMI access to the @@ -118,118 +113,25 @@ void rtc_cmos_write(unsigned char val, u } EXPORT_SYMBOL(rtc_cmos_write); -/* - * This version of gettimeofday has microsecond resolution - * and better than microsecond precision on fast x86 machines with TSC. - */ -void do_gettimeofday(struct timeval *tv) -{ - unsigned long seq; - unsigned long usec, sec; - unsigned long max_ntp_tick; - - do { - unsigned long lost; - - seq = read_seqbegin(&xtime_lock); - - usec = cur_timer->get_offset(); - lost = jiffies - wall_jiffies; - - /* - * If time_adjust is negative then NTP is slowing the clock - * so make sure not to go into next possible interval. - * Better to lose some accuracy than have time go backwards.. - */ - if (unlikely(time_adjust < 0)) { - max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj; - usec = min(usec, max_ntp_tick); - - if (lost) - usec += lost * max_ntp_tick; - } - else if (unlikely(lost)) - usec += lost * (USEC_PER_SEC / HZ); - - sec = xtime.tv_sec; - usec += (xtime.tv_nsec / 1000); - } while (read_seqretry(&xtime_lock, seq)); - - while (usec >= 1000000) { - usec -= 1000000; - sec++; - } - - tv->tv_sec = sec; - tv->tv_usec = usec; -} - -EXPORT_SYMBOL(do_gettimeofday); - -int do_settimeofday(struct timespec *tv) -{ - time_t wtm_sec, sec = tv->tv_sec; - long wtm_nsec, nsec = tv->tv_nsec; - - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) - return -EINVAL; - - write_seqlock_irq(&xtime_lock); - /* - * This is revolting. We need to set "xtime" correctly. However, the - * value in this location is the value at the most recent update of - * wall time. Discover what correction gettimeofday() would have - * made, and then undo it! - */ - nsec -= cur_timer->get_offset() * NSEC_PER_USEC; - nsec -= (jiffies - wall_jiffies) * TICK_NSEC; - - wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); - wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); - - set_normalized_timespec(&xtime, sec, nsec); - set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - - ntp_clear(); - write_sequnlock_irq(&xtime_lock); - clock_was_set(); - return 0; -} - -EXPORT_SYMBOL(do_settimeofday); - static int set_rtc_mmss(unsigned long nowtime) { int retval; - - WARN_ON(irqs_disabled()); + unsigned long flags; /* gets recalled with irq locally disabled */ - spin_lock_irq(&rtc_lock); + /* XXX - does irqsave resolve this? -johnstul */ + spin_lock_irqsave(&rtc_lock, flags); if (efi_enabled) retval = efi_set_rtc_mmss(nowtime); else retval = mach_set_rtc_mmss(nowtime); - spin_unlock_irq(&rtc_lock); + spin_unlock_irqrestore(&rtc_lock, flags); return retval; } - -int timer_ack; - -/* monotonic_clock(): returns # of nanoseconds passed since time_init() - * Note: This function is required to return accurate - * time even in the absence of multiple timer ticks. - */ -unsigned long long monotonic_clock(void) -{ - return cur_timer->monotonic_clock(); -} -EXPORT_SYMBOL(monotonic_clock); - #if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) -unsigned long profile_pc(struct pt_regs *regs) +unsigned long notrace profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); @@ -241,70 +143,6 @@ unsigned long profile_pc(struct pt_regs EXPORT_SYMBOL(profile_pc); #endif -/* - * timer_interrupt() needs to keep up the real-time clock, - * as well as call the "do_timer()" routine every clocktick - */ -static inline void do_timer_interrupt(int irq, struct pt_regs *regs) -{ -#ifdef CONFIG_X86_IO_APIC - if (timer_ack) { - /* - * Subtle, when I/O APICs are used we have to ack timer IRQ - * manually to reset the IRR bit for do_slow_gettimeoffset(). - * This will also deassert NMI lines for the watchdog if run - * on an 82489DX-based system. - */ - spin_lock(&i8259A_lock); - outb(0x0c, PIC_MASTER_OCW3); - /* Ack the IRQ; AEOI will end it automatically. */ - inb(PIC_MASTER_POLL); - spin_unlock(&i8259A_lock); - } -#endif - - do_timer_interrupt_hook(regs); - - - if (MCA_bus) { - /* The PS/2 uses level-triggered interrupts. You can't - turn them off, nor would you want to (any attempt to - enable edge-triggered interrupts usually gets intercepted by a - special hardware circuit). Hence we have to acknowledge - the timer interrupt. Through some incredibly stupid - design idea, the reset for IRQ 0 is done by setting the - high bit of the PPI port B (0x61). Note that some PS/2s, - notably the 55SX, work fine if this is removed. */ - - irq = inb_p( 0x61 ); /* read the current state */ - outb_p( irq|0x80, 0x61 ); /* reset the IRQ */ - } -} - -/* - * This is the same as the above, except we _also_ save the current - * Time Stamp Counter value at the time of the timer interrupt, so that - * we later on can estimate the time of day more exactly. - */ -irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) -{ - /* - * Here we are in the timer irq handler. We just have irqs locally - * disabled but we don't know if the timer_bh is running on the other - * CPU. We need to avoid to SMP race with it. NOTE: we don' t need - * the irq version of write_lock because as just said we have irq - * locally disabled. -arca - */ - write_seqlock(&xtime_lock); - - cur_timer->mark_offset(); - - do_timer_interrupt(irq, regs); - - write_sequnlock(&xtime_lock); - return IRQ_HANDLED; -} - /* not static: needed by APM */ unsigned long get_cmos_time(void) { @@ -323,139 +161,42 @@ unsigned long get_cmos_time(void) } EXPORT_SYMBOL(get_cmos_time); -static void sync_cmos_clock(unsigned long dummy); - -static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); - -static void sync_cmos_clock(unsigned long dummy) +/* arch specific timeofday hooks */ +nsec_t read_persistent_clock(void) { - struct timeval now, next; - int fail = 1; + return (nsec_t)get_cmos_time() * NSEC_PER_SEC; +} +void sync_persistent_clock(struct timespec ts) +{ + static unsigned long last_rtc_update; /* * If we have an externally synchronized Linux clock, then update * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be * called as close as possible to 500 ms before the new second starts. - * This code is run on a timer. If the clock is set, that timer - * may not expire at the correct time. Thus, we adjust... */ - if (!ntp_synced()) - /* - * Not synced, exit, do not restart a timer (if one is - * running, let it run out). - */ + if (ts.tv_sec <= last_rtc_update + 660) return; - do_gettimeofday(&now); - if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 && - now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2) - fail = set_rtc_mmss(now.tv_sec); - - next.tv_usec = USEC_AFTER - now.tv_usec; - if (next.tv_usec <= 0) - next.tv_usec += USEC_PER_SEC; - - if (!fail) - next.tv_sec = 659; - else - next.tv_sec = 0; - - if (next.tv_usec >= USEC_PER_SEC) { - next.tv_sec++; - next.tv_usec -= USEC_PER_SEC; + if((ts.tv_nsec / 1000) >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 && + (ts.tv_nsec / 1000) <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2) { + /* horrible...FIXME */ + if (set_rtc_mmss(ts.tv_sec) == 0) + last_rtc_update = ts.tv_sec; + else + last_rtc_update = ts.tv_sec - 600; /* do it again in 60 s */ } - mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next)); -} - -void notify_arch_cmos_timer(void) -{ - mod_timer(&sync_cmos_timer, jiffies + 1); -} - -static long clock_cmos_diff, sleep_start; - -static struct timer_opts *last_timer; -static int timer_suspend(struct sys_device *dev, pm_message_t state) -{ - /* - * Estimate time zone so that set_time can update the clock - */ - clock_cmos_diff = -get_cmos_time(); - clock_cmos_diff += get_seconds(); - sleep_start = get_cmos_time(); - last_timer = cur_timer; - cur_timer = &timer_none; - if (last_timer->suspend) - last_timer->suspend(state); - return 0; -} - -static int timer_resume(struct sys_device *dev) -{ - unsigned long flags; - unsigned long sec; - unsigned long sleep_length; - -#ifdef CONFIG_HPET_TIMER - if (is_hpet_enabled()) - hpet_reenable(); -#endif - setup_pit_timer(); - sec = get_cmos_time() + clock_cmos_diff; - sleep_length = (get_cmos_time() - sleep_start) * HZ; - write_seqlock_irqsave(&xtime_lock, flags); - xtime.tv_sec = sec; - xtime.tv_nsec = 0; - write_sequnlock_irqrestore(&xtime_lock, flags); - jiffies += sleep_length; - wall_jiffies += sleep_length; - if (last_timer->resume) - last_timer->resume(); - cur_timer = last_timer; - last_timer = NULL; - touch_softlockup_watchdog(); - return 0; } -static struct sysdev_class timer_sysclass = { - .resume = timer_resume, - .suspend = timer_suspend, - set_kset_name("timer"), -}; - - -/* XXX this driverfs stuff should probably go elsewhere later -john */ -static struct sys_device device_timer = { - .id = 0, - .cls = &timer_sysclass, -}; - -static int time_init_device(void) -{ - int error = sysdev_class_register(&timer_sysclass); - if (!error) - error = sysdev_register(&device_timer); - return error; -} - -device_initcall(time_init_device); - #ifdef CONFIG_HPET_TIMER extern void (*late_time_init)(void); /* Duplicate of time_init() below, with hpet_enable part added */ static void __init hpet_time_init(void) { - xtime.tv_sec = get_cmos_time(); - xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); - set_normalized_timespec(&wall_to_monotonic, - -xtime.tv_sec, -xtime.tv_nsec); - if ((hpet_enable() >= 0) && hpet_use_timer) { printk("Using HPET for base-timer\n"); } - cur_timer = select_timer(); - printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name); time_init_hook(); } @@ -463,6 +204,9 @@ static void __init hpet_time_init(void) void __init time_init(void) { + /* Set the clock to HZ Hz: */ + setup_pit_timer(); + #ifdef CONFIG_HPET_TIMER if (is_hpet_capable()) { /* @@ -473,13 +217,5 @@ void __init time_init(void) return; } #endif - xtime.tv_sec = get_cmos_time(); - xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); - set_normalized_timespec(&wall_to_monotonic, - -xtime.tv_sec, -xtime.tv_nsec); - - cur_timer = select_timer(); - printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name); - time_init_hook(); } Index: linux.prev/arch/i386/kernel/time_hpet.c =================================================================== --- linux.prev.orig/arch/i386/kernel/time_hpet.c +++ linux.prev/arch/i386/kernel/time_hpet.c @@ -259,8 +259,6 @@ __setup("hpet=", hpet_setup); #include #include -extern irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs); - #define DEFAULT_RTC_INT_FREQ 64 #define RTC_NUM_INTS 1 @@ -303,12 +301,12 @@ int hpet_rtc_timer_init(void) else hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; - local_irq_save(flags); + raw_local_irq_save(flags); cnt = hpet_readl(HPET_COUNTER); cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq); hpet_writel(cnt, HPET_T1_CMP); hpet_t1_cmp = cnt; - local_irq_restore(flags); + raw_local_irq_restore(flags); cfg = hpet_readl(HPET_T1_CFG); cfg &= ~HPET_TN_PERIODIC; Index: linux.prev/arch/i386/kernel/timers/Makefile =================================================================== --- linux.prev.orig/arch/i386/kernel/timers/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -# -# Makefile for x86 timers -# - -obj-y := timer.o timer_none.o timer_tsc.o timer_pit.o common.o - -obj-$(CONFIG_X86_CYCLONE_TIMER) += timer_cyclone.o -obj-$(CONFIG_HPET_TIMER) += timer_hpet.o -obj-$(CONFIG_X86_PM_TIMER) += timer_pm.o Index: linux.prev/arch/i386/kernel/timers/common.c =================================================================== --- linux.prev.orig/arch/i386/kernel/timers/common.c +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Common functions used across the timers go here - */ - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "mach_timer.h" - -/* ------ Calibrate the TSC ------- - * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset(). - * Too much 64-bit arithmetic here to do this cleanly in C, and for - * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2) - * output busy loop as low as possible. We avoid reading the CTC registers - * directly because of the awkward 8-bit access mechanism of the 82C54 - * device. - */ - -#define CALIBRATE_TIME (5 * 1000020/HZ) - -unsigned long calibrate_tsc(void) -{ - mach_prepare_counter(); - - { - unsigned long startlow, starthigh; - unsigned long endlow, endhigh; - unsigned long count; - - rdtsc(startlow,starthigh); - mach_countup(&count); - rdtsc(endlow,endhigh); - - - /* Error: ECTCNEVERSET */ - if (count <= 1) - goto bad_ctc; - - /* 64-bit subtract - gcc just messes up with long longs */ - __asm__("subl %2,%0\n\t" - "sbbl %3,%1" - :"=a" (endlow), "=d" (endhigh) - :"g" (startlow), "g" (starthigh), - "0" (endlow), "1" (endhigh)); - - /* Error: ECPUTOOFAST */ - if (endhigh) - goto bad_ctc; - - /* Error: ECPUTOOSLOW */ - if (endlow <= CALIBRATE_TIME) - goto bad_ctc; - - __asm__("divl %2" - :"=a" (endlow), "=d" (endhigh) - :"r" (endlow), "0" (0), "1" (CALIBRATE_TIME)); - - return endlow; - } - - /* - * The CTC wasn't reliable: we got a hit on the very first read, - * or the CPU was so fast/slow that the quotient wouldn't fit in - * 32 bits.. - */ -bad_ctc: - return 0; -} - -#ifdef CONFIG_HPET_TIMER -/* ------ Calibrate the TSC using HPET ------- - * Return 2^32 * (1 / (TSC clocks per usec)) for getting the CPU freq. - * Second output is parameter 1 (when non NULL) - * Set 2^32 * (1 / (tsc per HPET clk)) for delay_hpet(). - * calibrate_tsc() calibrates the processor TSC by comparing - * it to the HPET timer of known frequency. - * Too much 64-bit arithmetic here to do this cleanly in C - */ -#define CALIBRATE_CNT_HPET (5 * hpet_tick) -#define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC) - -unsigned long __devinit calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr) -{ - unsigned long tsc_startlow, tsc_starthigh; - unsigned long tsc_endlow, tsc_endhigh; - unsigned long hpet_start, hpet_end; - unsigned long result, remain; - - hpet_start = hpet_readl(HPET_COUNTER); - rdtsc(tsc_startlow, tsc_starthigh); - do { - hpet_end = hpet_readl(HPET_COUNTER); - } while ((hpet_end - hpet_start) < CALIBRATE_CNT_HPET); - rdtsc(tsc_endlow, tsc_endhigh); - - /* 64-bit subtract - gcc just messes up with long longs */ - __asm__("subl %2,%0\n\t" - "sbbl %3,%1" - :"=a" (tsc_endlow), "=d" (tsc_endhigh) - :"g" (tsc_startlow), "g" (tsc_starthigh), - "0" (tsc_endlow), "1" (tsc_endhigh)); - - /* Error: ECPUTOOFAST */ - if (tsc_endhigh) - goto bad_calibration; - - /* Error: ECPUTOOSLOW */ - if (tsc_endlow <= CALIBRATE_TIME_HPET) - goto bad_calibration; - - ASM_DIV64_REG(result, remain, tsc_endlow, 0, CALIBRATE_TIME_HPET); - if (remain > (tsc_endlow >> 1)) - result++; /* rounding the result */ - - if (tsc_hpet_quotient_ptr) { - unsigned long tsc_hpet_quotient; - - ASM_DIV64_REG(tsc_hpet_quotient, remain, tsc_endlow, 0, - CALIBRATE_CNT_HPET); - if (remain > (tsc_endlow >> 1)) - tsc_hpet_quotient++; /* rounding the result */ - *tsc_hpet_quotient_ptr = tsc_hpet_quotient; - } - - return result; -bad_calibration: - /* - * the CPU was so fast/slow that the quotient wouldn't fit in - * 32 bits.. - */ - return 0; -} -#endif - - -unsigned long read_timer_tsc(void) -{ - unsigned long retval; - rdtscl(retval); - return retval; -} - - -/* calculate cpu_khz */ -void init_cpu_khz(void) -{ - if (cpu_has_tsc) { - unsigned long tsc_quotient = calibrate_tsc(); - if (tsc_quotient) { - /* report CPU clock rate in Hz. - * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = - * clock/second. Our precision is about 100 ppm. - */ - { unsigned long eax=0, edx=1000; - __asm__("divl %2" - :"=a" (cpu_khz), "=d" (edx) - :"r" (tsc_quotient), - "0" (eax), "1" (edx)); - printk("Detected %u.%03u MHz processor.\n", - cpu_khz / 1000, cpu_khz % 1000); - } - } - } -} - Index: linux.prev/arch/i386/kernel/timers/timer.c =================================================================== --- linux.prev.orig/arch/i386/kernel/timers/timer.c +++ /dev/null @@ -1,75 +0,0 @@ -#include -#include -#include -#include - -#ifdef CONFIG_HPET_TIMER -/* - * HPET memory read is slower than tsc reads, but is more dependable as it - * always runs at constant frequency and reduces complexity due to - * cpufreq. So, we prefer HPET timer to tsc based one. Also, we cannot use - * timer_pit when HPET is active. So, we default to timer_tsc. - */ -#endif -/* list of timers, ordered by preference, NULL terminated */ -static struct init_timer_opts* __initdata timers[] = { -#ifdef CONFIG_X86_CYCLONE_TIMER - &timer_cyclone_init, -#endif -#ifdef CONFIG_HPET_TIMER - &timer_hpet_init, -#endif -#ifdef CONFIG_X86_PM_TIMER - &timer_pmtmr_init, -#endif - &timer_tsc_init, - &timer_pit_init, - NULL, -}; - -static char clock_override[10] __initdata; - -static int __init clock_setup(char* str) -{ - if (str) - strlcpy(clock_override, str, sizeof(clock_override)); - return 1; -} -__setup("clock=", clock_setup); - - -/* The chosen timesource has been found to be bad. - * Fall back to a known good timesource (the PIT) - */ -void clock_fallback(void) -{ - cur_timer = &timer_pit; -} - -/* iterates through the list of timers, returning the first - * one that initializes successfully. - */ -struct timer_opts* __init select_timer(void) -{ - int i = 0; - - /* find most preferred working timer */ - while (timers[i]) { - if (timers[i]->init) - if (timers[i]->init(clock_override) == 0) - return timers[i]->opts; - ++i; - } - - panic("select_timer: Cannot find a suitable timer\n"); - return NULL; -} - -int read_current_timer(unsigned long *timer_val) -{ - if (cur_timer->read_timer) { - *timer_val = cur_timer->read_timer(); - return 0; - } - return -1; -} Index: linux.prev/arch/i386/kernel/timers/timer_cyclone.c =================================================================== --- linux.prev.orig/arch/i386/kernel/timers/timer_cyclone.c +++ /dev/null @@ -1,259 +0,0 @@ -/* Cyclone-timer: - * This code implements timer_ops for the cyclone counter found - * on IBM x440, x360, and other Summit based systems. - * - * Copyright (C) 2002 IBM, John Stultz (johnstul@us.ibm.com) - */ - - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "io_ports.h" - -/* Number of usecs that the last interrupt was delayed */ -static int delay_at_last_interrupt; - -#define CYCLONE_CBAR_ADDR 0xFEB00CD0 -#define CYCLONE_PMCC_OFFSET 0x51A0 -#define CYCLONE_MPMC_OFFSET 0x51D0 -#define CYCLONE_MPCS_OFFSET 0x51A8 -#define CYCLONE_TIMER_FREQ 100000000 -#define CYCLONE_TIMER_MASK (((u64)1<<40)-1) /* 40 bit mask */ -int use_cyclone = 0; - -static u32* volatile cyclone_timer; /* Cyclone MPMC0 register */ -static u32 last_cyclone_low; -static u32 last_cyclone_high; -static unsigned long long monotonic_base; -static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; - -/* helper macro to atomically read both cyclone counter registers */ -#define read_cyclone_counter(low,high) \ - do{ \ - high = cyclone_timer[1]; low = cyclone_timer[0]; \ - } while (high != cyclone_timer[1]); - - -static void mark_offset_cyclone(void) -{ - unsigned long lost, delay; - unsigned long delta = last_cyclone_low; - int count; - unsigned long long this_offset, last_offset; - - write_seqlock(&monotonic_lock); - last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low; - - spin_lock(&i8253_lock); - read_cyclone_counter(last_cyclone_low,last_cyclone_high); - - /* read values for delay_at_last_interrupt */ - outb_p(0x00, 0x43); /* latch the count ASAP */ - - count = inb_p(0x40); /* read the latched count */ - count |= inb(0x40) << 8; - - /* - * VIA686a test code... reset the latch if count > max + 1 - * from timer_pit.c - cjb - */ - if (count > LATCH) { - outb_p(0x34, PIT_MODE); - outb_p(LATCH & 0xff, PIT_CH0); - outb(LATCH >> 8, PIT_CH0); - count = LATCH - 1; - } - spin_unlock(&i8253_lock); - - /* lost tick compensation */ - delta = last_cyclone_low - delta; - delta /= (CYCLONE_TIMER_FREQ/1000000); - delta += delay_at_last_interrupt; - lost = delta/(1000000/HZ); - delay = delta%(1000000/HZ); - if (lost >= 2) - jiffies_64 += lost-1; - - /* update the monotonic base value */ - this_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low; - monotonic_base += (this_offset - last_offset) & CYCLONE_TIMER_MASK; - write_sequnlock(&monotonic_lock); - - /* calculate delay_at_last_interrupt */ - count = ((LATCH-1) - count) * TICK_SIZE; - delay_at_last_interrupt = (count + LATCH/2) / LATCH; - - - /* catch corner case where tick rollover occured - * between cyclone and pit reads (as noted when - * usec delta is > 90% # of usecs/tick) - */ - if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ)) - jiffies_64++; -} - -static unsigned long get_offset_cyclone(void) -{ - u32 offset; - - if(!cyclone_timer) - return delay_at_last_interrupt; - - /* Read the cyclone timer */ - offset = cyclone_timer[0]; - - /* .. relative to previous jiffy */ - offset = offset - last_cyclone_low; - - /* convert cyclone ticks to microseconds */ - /* XXX slow, can we speed this up? */ - offset = offset/(CYCLONE_TIMER_FREQ/1000000); - - /* our adjusted time offset in microseconds */ - return delay_at_last_interrupt + offset; -} - -static unsigned long long monotonic_clock_cyclone(void) -{ - u32 now_low, now_high; - unsigned long long last_offset, this_offset, base; - unsigned long long ret; - unsigned seq; - - /* atomically read monotonic base & last_offset */ - do { - seq = read_seqbegin(&monotonic_lock); - last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low; - base = monotonic_base; - } while (read_seqretry(&monotonic_lock, seq)); - - - /* Read the cyclone counter */ - read_cyclone_counter(now_low,now_high); - this_offset = ((unsigned long long)now_high<<32)|now_low; - - /* convert to nanoseconds */ - ret = base + ((this_offset - last_offset)&CYCLONE_TIMER_MASK); - return ret * (1000000000 / CYCLONE_TIMER_FREQ); -} - -static int __init init_cyclone(char* override) -{ - u32* reg; - u32 base; /* saved cyclone base address */ - u32 pageaddr; /* page that contains cyclone_timer register */ - u32 offset; /* offset from pageaddr to cyclone_timer register */ - int i; - - /* check clock override */ - if (override[0] && strncmp(override,"cyclone",7)) - return -ENODEV; - - /*make sure we're on a summit box*/ - if(!use_cyclone) return -ENODEV; - - printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n"); - - /* find base address */ - pageaddr = (CYCLONE_CBAR_ADDR)&PAGE_MASK; - offset = (CYCLONE_CBAR_ADDR)&(~PAGE_MASK); - set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr); - reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset); - if(!reg){ - printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n"); - return -ENODEV; - } - base = *reg; - if(!base){ - printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n"); - return -ENODEV; - } - - /* setup PMCC */ - pageaddr = (base + CYCLONE_PMCC_OFFSET)&PAGE_MASK; - offset = (base + CYCLONE_PMCC_OFFSET)&(~PAGE_MASK); - set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr); - reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset); - if(!reg){ - printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n"); - return -ENODEV; - } - reg[0] = 0x00000001; - - /* setup MPCS */ - pageaddr = (base + CYCLONE_MPCS_OFFSET)&PAGE_MASK; - offset = (base + CYCLONE_MPCS_OFFSET)&(~PAGE_MASK); - set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr); - reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset); - if(!reg){ - printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n"); - return -ENODEV; - } - reg[0] = 0x00000001; - - /* map in cyclone_timer */ - pageaddr = (base + CYCLONE_MPMC_OFFSET)&PAGE_MASK; - offset = (base + CYCLONE_MPMC_OFFSET)&(~PAGE_MASK); - set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr); - cyclone_timer = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset); - if(!cyclone_timer){ - printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n"); - return -ENODEV; - } - - /*quick test to make sure its ticking*/ - for(i=0; i<3; i++){ - u32 old = cyclone_timer[0]; - int stall = 100; - while(stall--) barrier(); - if(cyclone_timer[0] == old){ - printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n"); - cyclone_timer = 0; - return -ENODEV; - } - } - - init_cpu_khz(); - - /* Everything looks good! */ - return 0; -} - - -static void delay_cyclone(unsigned long loops) -{ - unsigned long bclock, now; - if(!cyclone_timer) - return; - bclock = cyclone_timer[0]; - do { - rep_nop(); - now = cyclone_timer[0]; - } while ((now-bclock) < loops); -} -/************************************************************/ - -/* cyclone timer_opts struct */ -static struct timer_opts timer_cyclone = { - .name = "cyclone", - .mark_offset = mark_offset_cyclone, - .get_offset = get_offset_cyclone, - .monotonic_clock = monotonic_clock_cyclone, - .delay = delay_cyclone, -}; - -struct init_timer_opts __initdata timer_cyclone_init = { - .init = init_cyclone, - .opts = &timer_cyclone, -}; Index: linux.prev/arch/i386/kernel/timers/timer_hpet.c =================================================================== --- linux.prev.orig/arch/i386/kernel/timers/timer_hpet.c +++ /dev/null @@ -1,217 +0,0 @@ -/* - * This code largely moved from arch/i386/kernel/time.c. - * See comments there for proper credits. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "io_ports.h" -#include "mach_timer.h" -#include - -static unsigned long hpet_usec_quotient __read_mostly; /* convert hpet clks to usec */ -static unsigned long tsc_hpet_quotient __read_mostly; /* convert tsc to hpet clks */ -static unsigned long hpet_last; /* hpet counter value at last tick*/ -static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */ -static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */ -static unsigned long long monotonic_base; -static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; - -/* convert from cycles(64bits) => nanoseconds (64bits) - * basic equation: - * ns = cycles / (freq / ns_per_sec) - * ns = cycles * (ns_per_sec / freq) - * ns = cycles * (10^9 / (cpu_khz * 10^3)) - * ns = cycles * (10^6 / cpu_khz) - * - * Then we use scaling math (suggested by george@mvista.com) to get: - * ns = cycles * (10^6 * SC / cpu_khz) / SC - * ns = cycles * cyc2ns_scale / SC - * - * And since SC is a constant power of two, we can convert the div - * into a shift. - * - * We can use khz divisor instead of mhz to keep a better percision, since - * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. - * (mathieu.desnoyers@polymtl.ca) - * - * -johnstul@us.ibm.com "math is hard, lets go shopping!" - */ -static unsigned long cyc2ns_scale; -#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ - -static inline void set_cyc2ns_scale(unsigned long cpu_khz) -{ - cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; -} - -static inline unsigned long long cycles_2_ns(unsigned long long cyc) -{ - return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; -} - -static unsigned long long monotonic_clock_hpet(void) -{ - unsigned long long last_offset, this_offset, base; - unsigned seq; - - /* atomically read monotonic base & last_offset */ - do { - seq = read_seqbegin(&monotonic_lock); - last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - base = monotonic_base; - } while (read_seqretry(&monotonic_lock, seq)); - - /* Read the Time Stamp Counter */ - rdtscll(this_offset); - - /* return the value in ns */ - return base + cycles_2_ns(this_offset - last_offset); -} - -static unsigned long get_offset_hpet(void) -{ - register unsigned long eax, edx; - - eax = hpet_readl(HPET_COUNTER); - eax -= hpet_last; /* hpet delta */ - eax = min(hpet_tick, eax); - /* - * Time offset = (hpet delta) * ( usecs per HPET clock ) - * = (hpet delta) * ( usecs per tick / HPET clocks per tick) - * = (hpet delta) * ( hpet_usec_quotient ) / (2^32) - * - * Where, - * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick - * - * Using a mull instead of a divl saves some cycles in critical path. - */ - ASM_MUL64_REG(eax, edx, hpet_usec_quotient, eax); - - /* our adjusted time offset in microseconds */ - return edx; -} - -static void mark_offset_hpet(void) -{ - unsigned long long this_offset, last_offset; - unsigned long offset; - - write_seqlock(&monotonic_lock); - last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - rdtsc(last_tsc_low, last_tsc_high); - - if (hpet_use_timer) - offset = hpet_readl(HPET_T0_CMP) - hpet_tick; - else - offset = hpet_readl(HPET_COUNTER); - if (unlikely(((offset - hpet_last) >= (2*hpet_tick)) && (hpet_last != 0))) { - int lost_ticks = ((offset - hpet_last) / hpet_tick) - 1; - jiffies_64 += lost_ticks; - } - hpet_last = offset; - - /* update the monotonic base value */ - this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - monotonic_base += cycles_2_ns(this_offset - last_offset); - write_sequnlock(&monotonic_lock); -} - -static void delay_hpet(unsigned long loops) -{ - unsigned long hpet_start, hpet_end; - unsigned long eax; - - /* loops is the number of cpu cycles. Convert it to hpet clocks */ - ASM_MUL64_REG(eax, loops, tsc_hpet_quotient, loops); - - hpet_start = hpet_readl(HPET_COUNTER); - do { - rep_nop(); - hpet_end = hpet_readl(HPET_COUNTER); - } while ((hpet_end - hpet_start) < (loops)); -} - -static struct timer_opts timer_hpet; - -static int __init init_hpet(char* override) -{ - unsigned long result, remain; - - /* check clock override */ - if (override[0] && strncmp(override,"hpet",4)) - return -ENODEV; - - if (!is_hpet_enabled()) - return -ENODEV; - - printk("Using HPET for gettimeofday\n"); - if (cpu_has_tsc) { - unsigned long tsc_quotient = calibrate_tsc_hpet(&tsc_hpet_quotient); - if (tsc_quotient) { - /* report CPU clock rate in Hz. - * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = - * clock/second. Our precision is about 100 ppm. - */ - { unsigned long eax=0, edx=1000; - ASM_DIV64_REG(cpu_khz, edx, tsc_quotient, - eax, edx); - printk("Detected %u.%03u MHz processor.\n", - cpu_khz / 1000, cpu_khz % 1000); - } - set_cyc2ns_scale(cpu_khz); - } - /* set this only when cpu_has_tsc */ - timer_hpet.read_timer = read_timer_tsc; - } - - /* - * Math to calculate hpet to usec multiplier - * Look for the comments at get_offset_hpet() - */ - ASM_DIV64_REG(result, remain, hpet_tick, 0, KERNEL_TICK_USEC); - if (remain > (hpet_tick >> 1)) - result++; /* rounding the result */ - hpet_usec_quotient = result; - - return 0; -} - -static int hpet_resume(void) -{ - write_seqlock(&monotonic_lock); - /* Assume this is the last mark offset time */ - rdtsc(last_tsc_low, last_tsc_high); - - if (hpet_use_timer) - hpet_last = hpet_readl(HPET_T0_CMP) - hpet_tick; - else - hpet_last = hpet_readl(HPET_COUNTER); - write_sequnlock(&monotonic_lock); - return 0; -} -/************************************************************/ - -/* tsc timer_opts struct */ -static struct timer_opts timer_hpet __read_mostly = { - .name = "hpet", - .mark_offset = mark_offset_hpet, - .get_offset = get_offset_hpet, - .monotonic_clock = monotonic_clock_hpet, - .delay = delay_hpet, - .resume = hpet_resume, -}; - -struct init_timer_opts __initdata timer_hpet_init = { - .init = init_hpet, - .opts = &timer_hpet, -}; Index: linux.prev/arch/i386/kernel/timers/timer_none.c =================================================================== --- linux.prev.orig/arch/i386/kernel/timers/timer_none.c +++ /dev/null @@ -1,39 +0,0 @@ -#include -#include - -static void mark_offset_none(void) -{ - /* nothing needed */ -} - -static unsigned long get_offset_none(void) -{ - return 0; -} - -static unsigned long long monotonic_clock_none(void) -{ - return 0; -} - -static void delay_none(unsigned long loops) -{ - int d0; - __asm__ __volatile__( - "\tjmp 1f\n" - ".align 16\n" - "1:\tjmp 2f\n" - ".align 16\n" - "2:\tdecl %0\n\tjns 2b" - :"=&a" (d0) - :"0" (loops)); -} - -/* none timer_opts struct */ -struct timer_opts timer_none = { - .name = "none", - .mark_offset = mark_offset_none, - .get_offset = get_offset_none, - .monotonic_clock = monotonic_clock_none, - .delay = delay_none, -}; Index: linux.prev/arch/i386/kernel/timers/timer_pit.c =================================================================== --- linux.prev.orig/arch/i386/kernel/timers/timer_pit.c +++ /dev/null @@ -1,177 +0,0 @@ -/* - * This code largely moved from arch/i386/kernel/time.c. - * See comments there for proper credits. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "do_timer.h" -#include "io_ports.h" - -static int count_p; /* counter in get_offset_pit() */ - -static int __init init_pit(char* override) -{ - /* check clock override */ - if (override[0] && strncmp(override,"pit",3)) - printk(KERN_ERR "Warning: clock= override failed. Defaulting " - "to PIT\n"); - init_cpu_khz(); - count_p = LATCH; - return 0; -} - -static void mark_offset_pit(void) -{ - /* nothing needed */ -} - -static unsigned long long monotonic_clock_pit(void) -{ - return 0; -} - -static void delay_pit(unsigned long loops) -{ - int d0; - __asm__ __volatile__( - "\tjmp 1f\n" - ".align 16\n" - "1:\tjmp 2f\n" - ".align 16\n" - "2:\tdecl %0\n\tjns 2b" - :"=&a" (d0) - :"0" (loops)); -} - - -/* This function must be called with xtime_lock held. - * It was inspired by Steve McCanne's microtime-i386 for BSD. -- jrs - * - * However, the pc-audio speaker driver changes the divisor so that - * it gets interrupted rather more often - it loads 64 into the - * counter rather than 11932! This has an adverse impact on - * do_gettimeoffset() -- it stops working! What is also not - * good is that the interval that our timer function gets called - * is no longer 10.0002 ms, but 9.9767 ms. To get around this - * would require using a different timing source. Maybe someone - * could use the RTC - I know that this can interrupt at frequencies - * ranging from 8192Hz to 2Hz. If I had the energy, I'd somehow fix - * it so that at startup, the timer code in sched.c would select - * using either the RTC or the 8253 timer. The decision would be - * based on whether there was any other device around that needed - * to trample on the 8253. I'd set up the RTC to interrupt at 1024 Hz, - * and then do some jiggery to have a version of do_timer that - * advanced the clock by 1/1024 s. Every time that reached over 1/100 - * of a second, then do all the old code. If the time was kept correct - * then do_gettimeoffset could just return 0 - there is no low order - * divider that can be accessed. - * - * Ideally, you would be able to use the RTC for the speaker driver, - * but it appears that the speaker driver really needs interrupt more - * often than every 120 us or so. - * - * Anyway, this needs more thought.... pjsg (1993-08-28) - * - * If you are really that interested, you should be reading - * comp.protocols.time.ntp! - */ - -static unsigned long get_offset_pit(void) -{ - int count; - unsigned long flags; - static unsigned long jiffies_p = 0; - - /* - * cache volatile jiffies temporarily; we have xtime_lock. - */ - unsigned long jiffies_t; - - spin_lock_irqsave(&i8253_lock, flags); - /* timer count may underflow right here */ - outb_p(0x00, PIT_MODE); /* latch the count ASAP */ - - count = inb_p(PIT_CH0); /* read the latched count */ - - /* - * We do this guaranteed double memory access instead of a _p - * postfix in the previous port access. Wheee, hackady hack - */ - jiffies_t = jiffies; - - count |= inb_p(PIT_CH0) << 8; - - /* VIA686a test code... reset the latch if count > max + 1 */ - if (count > LATCH) { - outb_p(0x34, PIT_MODE); - outb_p(LATCH & 0xff, PIT_CH0); - outb(LATCH >> 8, PIT_CH0); - count = LATCH - 1; - } - - /* - * avoiding timer inconsistencies (they are rare, but they happen)... - * there are two kinds of problems that must be avoided here: - * 1. the timer counter underflows - * 2. hardware problem with the timer, not giving us continuous time, - * the counter does small "jumps" upwards on some Pentium systems, - * (see c't 95/10 page 335 for Neptun bug.) - */ - - if( jiffies_t == jiffies_p ) { - if( count > count_p ) { - /* the nutcase */ - count = do_timer_overflow(count); - } - } else - jiffies_p = jiffies_t; - - count_p = count; - - spin_unlock_irqrestore(&i8253_lock, flags); - - count = ((LATCH-1) - count) * TICK_SIZE; - count = (count + LATCH/2) / LATCH; - - return count; -} - - -/* tsc timer_opts struct */ -struct timer_opts timer_pit = { - .name = "pit", - .mark_offset = mark_offset_pit, - .get_offset = get_offset_pit, - .monotonic_clock = monotonic_clock_pit, - .delay = delay_pit, -}; - -struct init_timer_opts __initdata timer_pit_init = { - .init = init_pit, - .opts = &timer_pit, -}; - -void setup_pit_timer(void) -{ - unsigned long flags; - - spin_lock_irqsave(&i8253_lock, flags); - outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ - udelay(10); - outb_p(LATCH & 0xff , PIT_CH0); /* LSB */ - udelay(10); - outb(LATCH >> 8 , PIT_CH0); /* MSB */ - spin_unlock_irqrestore(&i8253_lock, flags); -} Index: linux.prev/arch/i386/kernel/timers/timer_pm.c =================================================================== --- linux.prev.orig/arch/i386/kernel/timers/timer_pm.c +++ /dev/null @@ -1,268 +0,0 @@ -/* - * (C) Dominik Brodowski 2003 - * - * Driver to use the Power Management Timer (PMTMR) available in some - * southbridges as primary timing source for the Linux kernel. - * - * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c, - * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4. - * - * This file is licensed under the GPL v2. - */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include "mach_timer.h" - -/* Number of PMTMR ticks expected during calibration run */ -#define PMTMR_TICKS_PER_SEC 3579545 -#define PMTMR_EXPECTED_RATE \ - ((CALIBRATE_LATCH * (PMTMR_TICKS_PER_SEC >> 10)) / (CLOCK_TICK_RATE>>10)) - - -/* The I/O port the PMTMR resides at. - * The location is detected during setup_arch(), - * in arch/i386/acpi/boot.c */ -u32 pmtmr_ioport = 0; - - -/* value of the Power timer at last timer interrupt */ -static u32 offset_tick; -static u32 offset_delay; - -static unsigned long long monotonic_base; -static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; - -#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ - -/*helper function to safely read acpi pm timesource*/ -static inline u32 read_pmtmr(void) -{ - u32 v1=0,v2=0,v3=0; - /* It has been reported that because of various broken - * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time - * source is not latched, so you must read it multiple - * times to insure a safe value is read. - */ - do { - v1 = inl(pmtmr_ioport); - v2 = inl(pmtmr_ioport); - v3 = inl(pmtmr_ioport); - } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) - || (v3 > v1 && v3 < v2)); - - /* mask the output to 24 bits */ - return v2 & ACPI_PM_MASK; -} - - -/* - * Some boards have the PMTMR running way too fast. We check - * the PMTMR rate against PIT channel 2 to catch these cases. - */ -static int verify_pmtmr_rate(void) -{ - u32 value1, value2; - unsigned long count, delta; - - mach_prepare_counter(); - value1 = read_pmtmr(); - mach_countup(&count); - value2 = read_pmtmr(); - delta = (value2 - value1) & ACPI_PM_MASK; - - /* Check that the PMTMR delta is within 5% of what we expect */ - if (delta < (PMTMR_EXPECTED_RATE * 19) / 20 || - delta > (PMTMR_EXPECTED_RATE * 21) / 20) { - printk(KERN_INFO "PM-Timer running at invalid rate: %lu%% of normal - aborting.\n", 100UL * delta / PMTMR_EXPECTED_RATE); - return -1; - } - - return 0; -} - - -static int init_pmtmr(char* override) -{ - u32 value1, value2; - unsigned int i; - - if (override[0] && strncmp(override,"pmtmr",5)) - return -ENODEV; - - if (!pmtmr_ioport) - return -ENODEV; - - /* we use the TSC for delay_pmtmr, so make sure it exists */ - if (!cpu_has_tsc) - return -ENODEV; - - /* "verify" this timing source */ - value1 = read_pmtmr(); - for (i = 0; i < 10000; i++) { - value2 = read_pmtmr(); - if (value2 == value1) - continue; - if (value2 > value1) - goto pm_good; - if ((value2 < value1) && ((value2) < 0xFFF)) - goto pm_good; - printk(KERN_INFO "PM-Timer had inconsistent results: 0x%#x, 0x%#x - aborting.\n", value1, value2); - return -EINVAL; - } - printk(KERN_INFO "PM-Timer had no reasonable result: 0x%#x - aborting.\n", value1); - return -ENODEV; - -pm_good: - if (verify_pmtmr_rate() != 0) - return -ENODEV; - - init_cpu_khz(); - return 0; -} - -static inline u32 cyc2us(u32 cycles) -{ - /* The Power Management Timer ticks at 3.579545 ticks per microsecond. - * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%] - * - * Even with HZ = 100, delta is at maximum 35796 ticks, so it can - * easily be multiplied with 286 (=0x11E) without having to fear - * u32 overflows. - */ - cycles *= 286; - return (cycles >> 10); -} - -/* - * this gets called during each timer interrupt - * - Called while holding the writer xtime_lock - */ -static void mark_offset_pmtmr(void) -{ - u32 lost, delta, last_offset; - static int first_run = 1; - last_offset = offset_tick; - - write_seqlock(&monotonic_lock); - - offset_tick = read_pmtmr(); - - /* calculate tick interval */ - delta = (offset_tick - last_offset) & ACPI_PM_MASK; - - /* convert to usecs */ - delta = cyc2us(delta); - - /* update the monotonic base value */ - monotonic_base += delta * NSEC_PER_USEC; - write_sequnlock(&monotonic_lock); - - /* convert to ticks */ - delta += offset_delay; - lost = delta / (USEC_PER_SEC / HZ); - offset_delay = delta % (USEC_PER_SEC / HZ); - - - /* compensate for lost ticks */ - if (lost >= 2) - jiffies_64 += lost - 1; - - /* don't calculate delay for first run, - or if we've got less then a tick */ - if (first_run || (lost < 1)) { - first_run = 0; - offset_delay = 0; - } -} - -static int pmtmr_resume(void) -{ - write_seqlock(&monotonic_lock); - /* Assume this is the last mark offset time */ - offset_tick = read_pmtmr(); - write_sequnlock(&monotonic_lock); - return 0; -} - -static unsigned long long monotonic_clock_pmtmr(void) -{ - u32 last_offset, this_offset; - unsigned long long base, ret; - unsigned seq; - - - /* atomically read monotonic base & last_offset */ - do { - seq = read_seqbegin(&monotonic_lock); - last_offset = offset_tick; - base = monotonic_base; - } while (read_seqretry(&monotonic_lock, seq)); - - /* Read the pmtmr */ - this_offset = read_pmtmr(); - - /* convert to nanoseconds */ - ret = (this_offset - last_offset) & ACPI_PM_MASK; - ret = base + (cyc2us(ret) * NSEC_PER_USEC); - return ret; -} - -static void delay_pmtmr(unsigned long loops) -{ - unsigned long bclock, now; - - rdtscl(bclock); - do - { - rep_nop(); - rdtscl(now); - } while ((now-bclock) < loops); -} - - -/* - * get the offset (in microseconds) from the last call to mark_offset() - * - Called holding a reader xtime_lock - */ -static unsigned long get_offset_pmtmr(void) -{ - u32 now, offset, delta = 0; - - offset = offset_tick; - now = read_pmtmr(); - delta = (now - offset)&ACPI_PM_MASK; - - return (unsigned long) offset_delay + cyc2us(delta); -} - - -/* acpi timer_opts struct */ -static struct timer_opts timer_pmtmr = { - .name = "pmtmr", - .mark_offset = mark_offset_pmtmr, - .get_offset = get_offset_pmtmr, - .monotonic_clock = monotonic_clock_pmtmr, - .delay = delay_pmtmr, - .read_timer = read_timer_tsc, - .resume = pmtmr_resume, -}; - -struct init_timer_opts __initdata timer_pmtmr_init = { - .init = init_pmtmr, - .opts = &timer_pmtmr, -}; - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Dominik Brodowski "); -MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86"); Index: linux.prev/arch/i386/kernel/timers/timer_tsc.c =================================================================== --- linux.prev.orig/arch/i386/kernel/timers/timer_tsc.c +++ /dev/null @@ -1,600 +0,0 @@ -/* - * This code largely moved from arch/i386/kernel/time.c. - * See comments there for proper credits. - * - * 2004-06-25 Jesper Juhl - * moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4 - * failing to inline. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -/* processor.h for distable_tsc flag */ -#include - -#include "io_ports.h" -#include "mach_timer.h" - -#include -#include - -#ifdef CONFIG_HPET_TIMER -static unsigned long hpet_usec_quotient; -static unsigned long hpet_last; -static struct timer_opts timer_tsc; -#endif - -static inline void cpufreq_delayed_get(void); - -int tsc_disable __devinitdata = 0; - -static int use_tsc; -/* Number of usecs that the last interrupt was delayed */ -static int delay_at_last_interrupt; - -static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */ -static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */ -static unsigned long long monotonic_base; -static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; - -/* convert from cycles(64bits) => nanoseconds (64bits) - * basic equation: - * ns = cycles / (freq / ns_per_sec) - * ns = cycles * (ns_per_sec / freq) - * ns = cycles * (10^9 / (cpu_khz * 10^3)) - * ns = cycles * (10^6 / cpu_khz) - * - * Then we use scaling math (suggested by george@mvista.com) to get: - * ns = cycles * (10^6 * SC / cpu_khz) / SC - * ns = cycles * cyc2ns_scale / SC - * - * And since SC is a constant power of two, we can convert the div - * into a shift. - * - * We can use khz divisor instead of mhz to keep a better percision, since - * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. - * (mathieu.desnoyers@polymtl.ca) - * - * -johnstul@us.ibm.com "math is hard, lets go shopping!" - */ -static unsigned long cyc2ns_scale; -#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ - -static inline void set_cyc2ns_scale(unsigned long cpu_khz) -{ - cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; -} - -static inline unsigned long long cycles_2_ns(unsigned long long cyc) -{ - return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; -} - -static int count2; /* counter for mark_offset_tsc() */ - -/* Cached *multiplier* to convert TSC counts to microseconds. - * (see the equation below). - * Equal to 2^32 * (1 / (clocks per usec) ). - * Initialized in time_init. - */ -static unsigned long fast_gettimeoffset_quotient; - -static unsigned long get_offset_tsc(void) -{ - register unsigned long eax, edx; - - /* Read the Time Stamp Counter */ - - rdtsc(eax,edx); - - /* .. relative to previous jiffy (32 bits is enough) */ - eax -= last_tsc_low; /* tsc_low delta */ - - /* - * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient - * = (tsc_low delta) * (usecs_per_clock) - * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy) - * - * Using a mull instead of a divl saves up to 31 clock cycles - * in the critical path. - */ - - __asm__("mull %2" - :"=a" (eax), "=d" (edx) - :"rm" (fast_gettimeoffset_quotient), - "0" (eax)); - - /* our adjusted time offset in microseconds */ - return delay_at_last_interrupt + edx; -} - -static unsigned long long monotonic_clock_tsc(void) -{ - unsigned long long last_offset, this_offset, base; - unsigned seq; - - /* atomically read monotonic base & last_offset */ - do { - seq = read_seqbegin(&monotonic_lock); - last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - base = monotonic_base; - } while (read_seqretry(&monotonic_lock, seq)); - - /* Read the Time Stamp Counter */ - rdtscll(this_offset); - - /* return the value in ns */ - return base + cycles_2_ns(this_offset - last_offset); -} - -/* - * Scheduler clock - returns current time in nanosec units. - */ -unsigned long long sched_clock(void) -{ - unsigned long long this_offset; - - /* - * In the NUMA case we dont use the TSC as they are not - * synchronized across all CPUs. - */ -#ifndef CONFIG_NUMA - if (!use_tsc) -#endif - /* no locking but a rare wrong value is not a big deal */ - return jiffies_64 * (1000000000 / HZ); - - /* Read the Time Stamp Counter */ - rdtscll(this_offset); - - /* return the value in ns */ - return cycles_2_ns(this_offset); -} - -static void delay_tsc(unsigned long loops) -{ - unsigned long bclock, now; - - rdtscl(bclock); - do - { - rep_nop(); - rdtscl(now); - } while ((now-bclock) < loops); -} - -#ifdef CONFIG_HPET_TIMER -static void mark_offset_tsc_hpet(void) -{ - unsigned long long this_offset, last_offset; - unsigned long offset, temp, hpet_current; - - write_seqlock(&monotonic_lock); - last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - /* - * It is important that these two operations happen almost at - * the same time. We do the RDTSC stuff first, since it's - * faster. To avoid any inconsistencies, we need interrupts - * disabled locally. - */ - /* - * Interrupts are just disabled locally since the timer irq - * has the SA_INTERRUPT flag set. -arca - */ - /* read Pentium cycle counter */ - - hpet_current = hpet_readl(HPET_COUNTER); - rdtsc(last_tsc_low, last_tsc_high); - - /* lost tick compensation */ - offset = hpet_readl(HPET_T0_CMP) - hpet_tick; - if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) { - int lost_ticks = (offset - hpet_last) / hpet_tick; - jiffies_64 += lost_ticks; - } - hpet_last = hpet_current; - - /* update the monotonic base value */ - this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - monotonic_base += cycles_2_ns(this_offset - last_offset); - write_sequnlock(&monotonic_lock); - - /* calculate delay_at_last_interrupt */ - /* - * Time offset = (hpet delta) * ( usecs per HPET clock ) - * = (hpet delta) * ( usecs per tick / HPET clocks per tick) - * = (hpet delta) * ( hpet_usec_quotient ) / (2^32) - * Where, - * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick - */ - delay_at_last_interrupt = hpet_current - offset; - ASM_MUL64_REG(temp, delay_at_last_interrupt, - hpet_usec_quotient, delay_at_last_interrupt); -} -#endif - - -#ifdef CONFIG_CPU_FREQ -#include - -static unsigned int cpufreq_delayed_issched = 0; -static unsigned int cpufreq_init = 0; -static struct work_struct cpufreq_delayed_get_work; - -static void handle_cpufreq_delayed_get(void *v) -{ - unsigned int cpu; - for_each_online_cpu(cpu) { - cpufreq_get(cpu); - } - cpufreq_delayed_issched = 0; -} - -/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries - * to verify the CPU frequency the timing core thinks the CPU is running - * at is still correct. - */ -static inline void cpufreq_delayed_get(void) -{ - if (cpufreq_init && !cpufreq_delayed_issched) { - cpufreq_delayed_issched = 1; - printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n"); - schedule_work(&cpufreq_delayed_get_work); - } -} - -/* If the CPU frequency is scaled, TSC-based delays will need a different - * loops_per_jiffy value to function properly. - */ - -static unsigned int ref_freq = 0; -static unsigned long loops_per_jiffy_ref = 0; - -#ifndef CONFIG_SMP -static unsigned long fast_gettimeoffset_ref = 0; -static unsigned int cpu_khz_ref = 0; -#endif - -static int -time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, - void *data) -{ - struct cpufreq_freqs *freq = data; - - if (val != CPUFREQ_RESUMECHANGE) - write_seqlock_irq(&xtime_lock); - if (!ref_freq) { - ref_freq = freq->old; - loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy; -#ifndef CONFIG_SMP - fast_gettimeoffset_ref = fast_gettimeoffset_quotient; - cpu_khz_ref = cpu_khz; -#endif - } - - if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || - (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || - (val == CPUFREQ_RESUMECHANGE)) { - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) - cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); -#ifndef CONFIG_SMP - if (cpu_khz) - cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new); - if (use_tsc) { - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { - fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq); - set_cyc2ns_scale(cpu_khz); - } - } -#endif - } - - if (val != CPUFREQ_RESUMECHANGE) - write_sequnlock_irq(&xtime_lock); - - return 0; -} - -static struct notifier_block time_cpufreq_notifier_block = { - .notifier_call = time_cpufreq_notifier -}; - - -static int __init cpufreq_tsc(void) -{ - int ret; - INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL); - ret = cpufreq_register_notifier(&time_cpufreq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - if (!ret) - cpufreq_init = 1; - return ret; -} -core_initcall(cpufreq_tsc); - -#else /* CONFIG_CPU_FREQ */ -static inline void cpufreq_delayed_get(void) { return; } -#endif - -int recalibrate_cpu_khz(void) -{ -#ifndef CONFIG_SMP - unsigned int cpu_khz_old = cpu_khz; - - if (cpu_has_tsc) { - init_cpu_khz(); - cpu_data[0].loops_per_jiffy = - cpufreq_scale(cpu_data[0].loops_per_jiffy, - cpu_khz_old, - cpu_khz); - return 0; - } else - return -ENODEV; -#else - return -ENODEV; -#endif -} -EXPORT_SYMBOL(recalibrate_cpu_khz); - -static void mark_offset_tsc(void) -{ - unsigned long lost,delay; - unsigned long delta = last_tsc_low; - int count; - int countmp; - static int count1 = 0; - unsigned long long this_offset, last_offset; - static int lost_count = 0; - - write_seqlock(&monotonic_lock); - last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - /* - * It is important that these two operations happen almost at - * the same time. We do the RDTSC stuff first, since it's - * faster. To avoid any inconsistencies, we need interrupts - * disabled locally. - */ - - /* - * Interrupts are just disabled locally since the timer irq - * has the SA_INTERRUPT flag set. -arca - */ - - /* read Pentium cycle counter */ - - rdtsc(last_tsc_low, last_tsc_high); - - spin_lock(&i8253_lock); - outb_p(0x00, PIT_MODE); /* latch the count ASAP */ - - count = inb_p(PIT_CH0); /* read the latched count */ - count |= inb(PIT_CH0) << 8; - - /* - * VIA686a test code... reset the latch if count > max + 1 - * from timer_pit.c - cjb - */ - if (count > LATCH) { - outb_p(0x34, PIT_MODE); - outb_p(LATCH & 0xff, PIT_CH0); - outb(LATCH >> 8, PIT_CH0); - count = LATCH - 1; - } - - spin_unlock(&i8253_lock); - - if (pit_latch_buggy) { - /* get center value of last 3 time lutch */ - if ((count2 >= count && count >= count1) - || (count1 >= count && count >= count2)) { - count2 = count1; count1 = count; - } else if ((count1 >= count2 && count2 >= count) - || (count >= count2 && count2 >= count1)) { - countmp = count;count = count2; - count2 = count1;count1 = countmp; - } else { - count2 = count1; count1 = count; count = count1; - } - } - - /* lost tick compensation */ - delta = last_tsc_low - delta; - { - register unsigned long eax, edx; - eax = delta; - __asm__("mull %2" - :"=a" (eax), "=d" (edx) - :"rm" (fast_gettimeoffset_quotient), - "0" (eax)); - delta = edx; - } - delta += delay_at_last_interrupt; - lost = delta/(1000000/HZ); - delay = delta%(1000000/HZ); - if (lost >= 2) { - jiffies_64 += lost-1; - - /* sanity check to ensure we're not always losing ticks */ - if (lost_count++ > 100) { - printk(KERN_WARNING "Losing too many ticks!\n"); - printk(KERN_WARNING "TSC cannot be used as a timesource. \n"); - printk(KERN_WARNING "Possible reasons for this are:\n"); - printk(KERN_WARNING " You're running with Speedstep,\n"); - printk(KERN_WARNING " You don't have DMA enabled for your hard disk (see hdparm),\n"); - printk(KERN_WARNING " Incorrect TSC synchronization on an SMP system (see dmesg).\n"); - printk(KERN_WARNING "Falling back to a sane timesource now.\n"); - - clock_fallback(); - } - /* ... but give the TSC a fair chance */ - if (lost_count > 25) - cpufreq_delayed_get(); - } else - lost_count = 0; - /* update the monotonic base value */ - this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; - monotonic_base += cycles_2_ns(this_offset - last_offset); - write_sequnlock(&monotonic_lock); - - /* calculate delay_at_last_interrupt */ - count = ((LATCH-1) - count) * TICK_SIZE; - delay_at_last_interrupt = (count + LATCH/2) / LATCH; - - /* catch corner case where tick rollover occured - * between tsc and pit reads (as noted when - * usec delta is > 90% # of usecs/tick) - */ - if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ)) - jiffies_64++; -} - -static int __init init_tsc(char* override) -{ - - /* check clock override */ - if (override[0] && strncmp(override,"tsc",3)) { -#ifdef CONFIG_HPET_TIMER - if (is_hpet_enabled()) { - printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n"); - } else -#endif - { - return -ENODEV; - } - } - - /* - * If we have APM enabled or the CPU clock speed is variable - * (CPU stops clock on HLT or slows clock to save power) - * then the TSC timestamps may diverge by up to 1 jiffy from - * 'real time' but nothing will break. - * The most frequent case is that the CPU is "woken" from a halt - * state by the timer interrupt itself, so we get 0 error. In the - * rare cases where a driver would "wake" the CPU and request a - * timestamp, the maximum error is < 1 jiffy. But timestamps are - * still perfectly ordered. - * Note that the TSC counter will be reset if APM suspends - * to disk; this won't break the kernel, though, 'cuz we're - * smart. See arch/i386/kernel/apm.c. - */ - /* - * Firstly we have to do a CPU check for chips with - * a potentially buggy TSC. At this point we haven't run - * the ident/bugs checks so we must run this hook as it - * may turn off the TSC flag. - * - * NOTE: this doesn't yet handle SMP 486 machines where only - * some CPU's have a TSC. Thats never worked and nobody has - * moaned if you have the only one in the world - you fix it! - */ - - count2 = LATCH; /* initialize counter for mark_offset_tsc() */ - - if (cpu_has_tsc) { - unsigned long tsc_quotient; -#ifdef CONFIG_HPET_TIMER - if (is_hpet_enabled() && hpet_use_timer) { - unsigned long result, remain; - printk("Using TSC for gettimeofday\n"); - tsc_quotient = calibrate_tsc_hpet(NULL); - timer_tsc.mark_offset = &mark_offset_tsc_hpet; - /* - * Math to calculate hpet to usec multiplier - * Look for the comments at get_offset_tsc_hpet() - */ - ASM_DIV64_REG(result, remain, hpet_tick, - 0, KERNEL_TICK_USEC); - if (remain > (hpet_tick >> 1)) - result++; /* rounding the result */ - - hpet_usec_quotient = result; - } else -#endif - { - tsc_quotient = calibrate_tsc(); - } - - if (tsc_quotient) { - fast_gettimeoffset_quotient = tsc_quotient; - use_tsc = 1; - /* - * We could be more selective here I suspect - * and just enable this for the next intel chips ? - */ - /* report CPU clock rate in Hz. - * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) = - * clock/second. Our precision is about 100 ppm. - */ - { unsigned long eax=0, edx=1000; - __asm__("divl %2" - :"=a" (cpu_khz), "=d" (edx) - :"r" (tsc_quotient), - "0" (eax), "1" (edx)); - printk("Detected %u.%03u MHz processor.\n", - cpu_khz / 1000, cpu_khz % 1000); - } - set_cyc2ns_scale(cpu_khz); - return 0; - } - } - return -ENODEV; -} - -static int tsc_resume(void) -{ - write_seqlock(&monotonic_lock); - /* Assume this is the last mark offset time */ - rdtsc(last_tsc_low, last_tsc_high); -#ifdef CONFIG_HPET_TIMER - if (is_hpet_enabled() && hpet_use_timer) - hpet_last = hpet_readl(HPET_COUNTER); -#endif - write_sequnlock(&monotonic_lock); - return 0; -} - -#ifndef CONFIG_X86_TSC -/* disable flag for tsc. Takes effect by clearing the TSC cpu flag - * in cpu/common.c */ -static int __init tsc_setup(char *str) -{ - tsc_disable = 1; - return 1; -} -#else -static int __init tsc_setup(char *str) -{ - printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " - "cannot disable TSC.\n"); - return 1; -} -#endif -__setup("notsc", tsc_setup); - - - -/************************************************************/ - -/* tsc timer_opts struct */ -static struct timer_opts timer_tsc = { - .name = "tsc", - .mark_offset = mark_offset_tsc, - .get_offset = get_offset_tsc, - .monotonic_clock = monotonic_clock_tsc, - .delay = delay_tsc, - .read_timer = read_timer_tsc, - .resume = tsc_resume, -}; - -struct init_timer_opts __initdata timer_tsc_init = { - .init = init_tsc, - .opts = &timer_tsc, -}; Index: linux.prev/arch/i386/kernel/traps.c =================================================================== --- linux.prev.orig/arch/i386/kernel/traps.c +++ linux.prev/arch/i386/kernel/traps.c @@ -93,7 +93,7 @@ asmlinkage void machine_check(void); static int kstack_depth_to_print = 24; struct notifier_block *i386die_chain; -static DEFINE_SPINLOCK(die_notifier_lock); +static DEFINE_RAW_SPINLOCK(die_notifier_lock); int register_die_notifier(struct notifier_block *nb) { @@ -116,22 +116,27 @@ static inline unsigned long print_contex unsigned long *stack, unsigned long ebp) { unsigned long addr; +#ifndef CONFIG_FRAME_POINTER + unsigned long prev_frame; +#endif -#ifdef CONFIG_FRAME_POINTER +#ifdef CONFIG_FRAME_POINTER while (valid_stack_ptr(tinfo, (void *)ebp)) { addr = *(unsigned long *)(ebp + 4); printk(" [<%08lx>] ", addr); print_symbol("%s", addr); - printk("\n"); + printk(" (%ld)\n", *(unsigned long *)ebp - ebp); ebp = *(unsigned long *)ebp; } #else + prev_frame = (unsigned long)stack; while (valid_stack_ptr(tinfo, stack)) { addr = *stack++; if (__kernel_text_address(addr)) { printk(" [<%08lx>]", addr); print_symbol(" %s", addr); - printk("\n"); + printk(" (%ld)\n", (unsigned long)stack - prev_frame); + prev_frame = (unsigned long)stack; } } #endif @@ -163,6 +168,8 @@ void show_trace(struct task_struct *task break; printk(" =======================\n"); } + print_traces(task); + show_held_locks(task); } void show_stack(struct task_struct *task, unsigned long *esp) @@ -201,6 +208,12 @@ void dump_stack(void) EXPORT_SYMBOL(dump_stack); +#if defined(CONFIG_DEBUG_STACKOVERFLOW) && defined(CONFIG_LATENCY_TRACE) +extern unsigned long worst_stack_left; +#else +# define worst_stack_left -1L +#endif + void show_registers(struct pt_regs *regs) { int i; @@ -225,10 +238,17 @@ void show_registers(struct pt_regs *regs regs->eax, regs->ebx, regs->ecx, regs->edx); printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", regs->esi, regs->edi, regs->ebp, esp); - printk("ds: %04x es: %04x ss: %04x\n", - regs->xds & 0xffff, regs->xes & 0xffff, ss); - printk("Process %s (pid: %d, threadinfo=%p task=%p)", + printk("ds: %04x es: %04x ss: %04x preempt: %08x\n", + regs->xds & 0xffff, regs->xes & 0xffff, ss, preempt_count()); + printk("Process %s (pid: %d, threadinfo=%p task=%p", current->comm, current->pid, current_thread_info(), current); + + if (in_kernel) + printk(" stack_left=%ld worst_left=%ld)", + (esp & (THREAD_SIZE-1))-sizeof(struct thread_info), + worst_stack_left); + else + printk(")"); /* * When in-kernel, we also print out the stack and code at the * time of the fault.. @@ -297,11 +317,11 @@ bug: void die(const char * str, struct pt_regs * regs, long err) { static struct { - spinlock_t lock; + raw_spinlock_t lock; u32 lock_owner; int lock_owner_depth; } die = { - .lock = SPIN_LOCK_UNLOCKED, + .lock = RAW_SPIN_LOCK_UNLOCKED, .lock_owner = -1, .lock_owner_depth = 0 }; @@ -379,6 +399,11 @@ static void __kprobes do_trap(int trapnr if (!user_mode(regs)) goto kernel_trap; +#ifdef CONFIG_PREEMPT_RT + raw_local_irq_enable(); + preempt_check_resched(); +#endif + trap_signal: { if (info) force_sig_info(signr, info, tsk); @@ -509,7 +534,7 @@ fastcall void __kprobes do_general_prote return; gp_in_vm86: - local_irq_enable(); + raw_local_irq_enable(); handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); return; @@ -563,10 +588,12 @@ static void unknown_nmi_error(unsigned c printk("Do you have a strange power saving mode enabled?\n"); } -static DEFINE_SPINLOCK(nmi_print_lock); +static DEFINE_RAW_SPINLOCK(nmi_print_lock); void die_nmi (struct pt_regs *regs, const char *msg) { + deadlock_trace_off(); + if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 0, SIGINT) == NOTIFY_STOP) return; @@ -594,10 +621,11 @@ void die_nmi (struct pt_regs *regs, cons crash_kexec(regs); } + nmi_exit(); do_exit(SIGSEGV); } -static void default_do_nmi(struct pt_regs * regs) +static void notrace default_do_nmi(struct pt_regs * regs) { unsigned char reason = 0; @@ -616,6 +644,7 @@ static void default_do_nmi(struct pt_reg */ if (nmi_watchdog) { nmi_watchdog_tick(regs); +// trace_special(6, 1, 0); return; } #endif @@ -635,18 +664,19 @@ static void default_do_nmi(struct pt_reg reassert_nmi(); } -static int dummy_nmi_callback(struct pt_regs * regs, int cpu) +static notrace int dummy_nmi_callback(struct pt_regs * regs, int cpu) { return 0; } static nmi_callback_t nmi_callback = dummy_nmi_callback; -fastcall void do_nmi(struct pt_regs * regs, long error_code) +fastcall notrace void do_nmi(struct pt_regs * regs, long error_code) { int cpu; nmi_enter(); + nmi_trace((unsigned long)do_nmi, regs->eip, regs->eflags); cpu = smp_processor_id(); @@ -717,7 +747,7 @@ fastcall void __kprobes do_debug(struct return; /* It's safe to allow irq's after DR6 has been saved */ if (regs->eflags & X86_EFLAGS_IF) - local_irq_enable(); + raw_local_irq_enable(); /* Mask out spurious debug traps due to lazy DR7 setting */ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { Index: linux.prev/arch/i386/kernel/tsc.c =================================================================== --- /dev/null +++ linux.prev/arch/i386/kernel/tsc.c @@ -0,0 +1,395 @@ +/* + * This code largely moved from arch/i386/kernel/timer/timer_tsc.c + * which was originally moved from arch/i386/kernel/time.c. + * See comments there for proper credits. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "mach_timer.h" + +/* + * On some systems the TSC frequency does not + * change with the cpu frequency. So we need + * an extra value to store the TSC freq + */ +unsigned int tsc_khz; + +int tsc_disable __initdata = 0; + +#ifdef CONFIG_X86_TSC +static int __init tsc_setup(char *str) +{ + printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " + "cannot disable TSC.\n"); + return 1; +} +#else +/* + * disable flag for tsc. Takes effect by clearing the TSC cpu flag + * in cpu/common.c + */ +static int __init tsc_setup(char *str) +{ + tsc_disable = 1; + + return 1; +} +#endif + +__setup("notsc", tsc_setup); + +/* + * code to mark and check if the TSC is unstable + * due to cpufreq or due to unsynced TSCs + */ +static int tsc_unstable; + +static inline int check_tsc_unstable(void) +{ + return tsc_unstable; +} + +void mark_tsc_unstable(void) +{ + tsc_unstable = 1; +} +EXPORT_SYMBOL_GPL(mark_tsc_unstable); + +/* Accellerators for sched_clock() + * convert from cycles(64bits) => nanoseconds (64bits) + * basic equation: + * ns = cycles / (freq / ns_per_sec) + * ns = cycles * (ns_per_sec / freq) + * ns = cycles * (10^9 / (cpu_khz * 10^3)) + * ns = cycles * (10^6 / cpu_khz) + * + * Then we use scaling math (suggested by george@mvista.com) to get: + * ns = cycles * (10^6 * SC / cpu_khz) / SC + * ns = cycles * cyc2ns_scale / SC + * + * And since SC is a constant power of two, we can convert the div + * into a shift. + * + * We can use khz divisor instead of mhz to keep a better percision, since + * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. + * (mathieu.desnoyers@polymtl.ca) + * + * -johnstul@us.ibm.com "math is hard, lets go shopping!" + */ +static unsigned long cyc2ns_scale; + +#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ + +static inline void set_cyc2ns_scale(unsigned long cpu_khz) +{ + cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; +} + +static inline unsigned long long cycles_2_ns(unsigned long long cyc) +{ + return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; +} + +/* + * Scheduler clock - returns current time in nanosec units. + */ +unsigned long long sched_clock(void) +{ + unsigned long long this_offset; + + /* + * in the NUMA case we dont use the TSC as they are not + * synchronized across all CPUs. + */ +#ifndef CONFIG_NUMA + if (!cpu_khz || check_tsc_unstable()) +#endif + /* no locking but a rare wrong value is not a big deal */ + return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); + + /* read the Time Stamp Counter: */ + rdtscll(this_offset); + + /* return the value in ns */ + return cycles_2_ns(this_offset); +} + +static unsigned long calculate_cpu_khz(void) +{ + unsigned long long start, end; + unsigned long count; + u64 delta64; + int i; + unsigned long flags; + + raw_local_irq_save(flags); + + /* run 3 times to ensure the cache is warm */ + for (i = 0; i < 3; i++) { + mach_prepare_counter(); + rdtscll(start); + mach_countup(&count); + rdtscll(end); + } + /* + * Error: ECTCNEVERSET + * The CTC wasn't reliable: we got a hit on the very first read, + * or the CPU was so fast/slow that the quotient wouldn't fit in + * 32 bits.. + */ + if (count <= 1) + goto err; + + delta64 = end - start; + + /* cpu freq too fast: */ + if (delta64 > (1ULL<<32)) + goto err; + + /* cpu freq too slow: */ + if (delta64 <= CALIBRATE_TIME_MSEC) + goto err; + + delta64 += CALIBRATE_TIME_MSEC/2; /* round for do_div */ + do_div(delta64,CALIBRATE_TIME_MSEC); + + raw_local_irq_restore(flags); + return (unsigned long)delta64; +err: + raw_local_irq_restore(flags); + return 0; +} + +int recalibrate_cpu_khz(void) +{ +#ifndef CONFIG_SMP + unsigned long cpu_khz_old = cpu_khz; + + if (cpu_has_tsc) { + cpu_khz = calculate_cpu_khz(); + tsc_khz = cpu_khz; + cpu_data[0].loops_per_jiffy = + cpufreq_scale(cpu_data[0].loops_per_jiffy, + cpu_khz_old, cpu_khz); + return 0; + } else + return -ENODEV; +#else + return -ENODEV; +#endif +} + +EXPORT_SYMBOL(recalibrate_cpu_khz); + +void tsc_init(void) +{ + if (!cpu_has_tsc || tsc_disable) + return; + + cpu_khz = calculate_cpu_khz(); + tsc_khz = cpu_khz; + + if (!cpu_khz) + return; + + printk("Detected %lu.%03lu MHz processor.\n", + (unsigned long)cpu_khz / 1000, + (unsigned long)cpu_khz % 1000); + + set_cyc2ns_scale(cpu_khz); + use_tsc_delay(); +} + +#ifdef CONFIG_CPU_FREQ + +static unsigned int cpufreq_delayed_issched = 0; +static unsigned int cpufreq_init = 0; +static struct work_struct cpufreq_delayed_get_work; + +static void handle_cpufreq_delayed_get(void *v) +{ + unsigned int cpu; + + for_each_online_cpu(cpu) + cpufreq_get(cpu); + + cpufreq_delayed_issched = 0; +} + +/* + * if we notice cpufreq oddness, schedule a call to cpufreq_get() as it tries + * to verify the CPU frequency the timing core thinks the CPU is running + * at is still correct. + */ +static inline void cpufreq_delayed_get(void) +{ + if (cpufreq_init && !cpufreq_delayed_issched) { + cpufreq_delayed_issched = 1; + printk(KERN_DEBUG "Checking if CPU frequency changed.\n"); + schedule_work(&cpufreq_delayed_get_work); + } +} + +/* + * if the CPU frequency is scaled, TSC-based delays will need a different + * loops_per_jiffy value to function properly. + */ +static unsigned int ref_freq = 0; +static unsigned long loops_per_jiffy_ref = 0; +static unsigned long cpu_khz_ref = 0; + +static int +time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) +{ + struct cpufreq_freqs *freq = data; + + if (val != CPUFREQ_RESUMECHANGE) + write_seqlock_irq(&xtime_lock); + + if (!ref_freq) { + ref_freq = freq->old; + loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy; + cpu_khz_ref = cpu_khz; + } + + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || + (val == CPUFREQ_RESUMECHANGE)) { + if (!(freq->flags & CPUFREQ_CONST_LOOPS)) + cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); + + if (cpu_khz) { + + if (num_online_cpus() == 1) + cpu_khz = cpufreq_scale(cpu_khz_ref, + ref_freq, freq->new); + if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { + tsc_khz = cpu_khz; + set_cyc2ns_scale(cpu_khz); + /* + * TSC based sched_clock turns + * to junk w/ cpufreq + */ + mark_tsc_unstable(); + } + } + } + + if (val != CPUFREQ_RESUMECHANGE) + write_sequnlock_irq(&xtime_lock); + + return 0; +} + +static struct notifier_block time_cpufreq_notifier_block = { + .notifier_call = time_cpufreq_notifier +}; + +static int __init cpufreq_tsc(void) +{ + int ret; + + INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL); + ret = cpufreq_register_notifier(&time_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + if (!ret) + cpufreq_init = 1; + + return ret; +} + +core_initcall(cpufreq_tsc); + +#endif + +/* clock source code */ + +static unsigned long current_tsc_khz = 0; +static int tsc_update_callback(void); + +static cycle_t read_tsc(void) +{ + cycle_t ret; + + rdtscll(ret); + + return ret; +} + +static struct clocksource clocksource_tsc = { + .name = "tsc", + .rating = 300, + .read = read_tsc, + .mask = (cycle_t)-1, + .mult = 0, /* to be set */ + .shift = 22, + .update_callback = tsc_update_callback, + .is_continuous = 1, +}; + +static int tsc_update_callback(void) +{ + int change = 0; + + /* check to see if we should switch to the safe clocksource: */ + if (clocksource_tsc.rating != 50 && check_tsc_unstable()) { + clocksource_tsc.rating = 50; + reselect_clocksource(); + change = 1; + } + + /* only update if tsc_khz has changed: */ + if (current_tsc_khz != tsc_khz) { + current_tsc_khz = tsc_khz; + clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, + clocksource_tsc.shift); + change = 1; + } + + return change; +} + +/* + * Make an educated guess if the TSC is trustworthy and synchronized + * over all CPUs. + */ +static __init int unsynchronized_tsc(void) +{ + /* + * Intel systems are normally all synchronized. + * Exceptions must mark TSC as unstable: + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + return 0; + + /* assume multi socket systems are not synchronized: */ + return num_possible_cpus() > 1; +} + +/* NUMAQ can't use TSC: */ +static int __init init_tsc_clocksource(void) +{ + /* TSC initialization is done in arch/i386/kernel/tsc.c */ + if (cpu_has_tsc && tsc_khz && !tsc_disable) { + if (unsynchronized_tsc()) /* lower rating if unsynced */ + mark_tsc_unstable(); + current_tsc_khz = tsc_khz; + clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, + clocksource_tsc.shift); + register_clocksource(&clocksource_tsc); + } + + return 0; +} + +module_init(init_tsc_clocksource); Index: linux.prev/arch/i386/kernel/vm86.c =================================================================== --- linux.prev.orig/arch/i386/kernel/vm86.c +++ linux.prev/arch/i386/kernel/vm86.c @@ -105,9 +105,10 @@ struct pt_regs * fastcall save_v86_state * from process context. Enable interrupts here, before trying * to access user space. */ - local_irq_enable(); + raw_local_irq_enable(); if (!current->thread.vm86_info) { + raw_local_irq_disable(); printk("no vm86_info: BAD\n"); do_exit(SIGSEGV); } Index: linux.prev/arch/i386/lib/bitops.c =================================================================== --- linux.prev.orig/arch/i386/lib/bitops.c +++ linux.prev/arch/i386/lib/bitops.c @@ -68,3 +68,37 @@ int find_next_zero_bit(const unsigned lo return (offset + set + res); } EXPORT_SYMBOL(find_next_zero_bit); + + +/* + * rw spinlock fallbacks + */ +#if defined(CONFIG_SMP) +asm( +".section .sched.text\n" +".align 4\n" +".globl __write_lock_failed\n" +"__write_lock_failed:\n\t" + LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax)\n" +"1: rep; nop\n\t" + "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" + "jne 1b\n\t" + LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" + "jnz __write_lock_failed\n\t" + "ret" +); + +asm( +".section .sched.text\n" +".align 4\n" +".globl __read_lock_failed\n" +"__read_lock_failed:\n\t" + LOCK "incl (%eax)\n" +"1: rep; nop\n\t" + "cmpl $1,(%eax)\n\t" + "js 1b\n\t" + LOCK "decl (%eax)\n\t" + "js __read_lock_failed\n\t" + "ret" +); +#endif Index: linux.prev/arch/i386/lib/delay.c =================================================================== --- linux.prev.orig/arch/i386/lib/delay.c +++ linux.prev/arch/i386/lib/delay.c @@ -10,43 +10,93 @@ * we have to worry about. */ +#include +#include #include #include #include -#include + #include #include #include #ifdef CONFIG_SMP -#include +# include #endif -extern struct timer_opts* timer; +/* simple loop based delay: */ +static void delay_loop(unsigned long loops) +{ + int d0; + + __asm__ __volatile__( + "\tjmp 1f\n" + ".align 16\n" + "1:\tjmp 2f\n" + ".align 16\n" + "2:\tdecl %0\n\tjns 2b" + :"=&a" (d0) + :"0" (loops)); +} + +/* TSC based delay: */ +static void delay_tsc(unsigned long loops) +{ + unsigned long bclock, now; + + rdtscl(bclock); + do { + rep_nop(); + rdtscl(now); + } while ((now-bclock) < loops); +} + +/* + * Since we calibrate only once at boot, this + * function should be set once at boot and not changed + */ +static void (*delay_fn)(unsigned long) = delay_loop; + +void use_tsc_delay(void) +{ + delay_fn = delay_tsc; +} + +int read_current_timer(unsigned long *timer_val) +{ + if (delay_fn == delay_tsc) { + rdtscl(*timer_val); + return 0; + } + return -1; +} void __delay(unsigned long loops) { - cur_timer->delay(loops); + delay_fn(loops); } inline void __const_udelay(unsigned long xloops) { int d0; + xloops *= 4; __asm__("mull %0" :"=d" (xloops), "=&a" (d0) - :"1" (xloops),"0" (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4))); - __delay(++xloops); + :"1" (xloops), "0" + (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4))); + + __delay(++xloops); } void __udelay(unsigned long usecs) { - __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ + __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ } void __ndelay(unsigned long nsecs) { - __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ + __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ } EXPORT_SYMBOL(__delay); Index: linux.prev/arch/i386/mach-default/setup.c =================================================================== --- linux.prev.orig/arch/i386/mach-default/setup.c +++ linux.prev/arch/i386/mach-default/setup.c @@ -34,7 +34,7 @@ void __init pre_intr_init_hook(void) /* * IRQ2 is cascade interrupt to second interrupt controller */ -static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL}; +static struct irqaction irq2 = { no_action, SA_NODELAY, CPU_MASK_NONE, "cascade", NULL, NULL}; /** * intr_init_hook - post gate setup interrupt initialisation @@ -78,8 +78,6 @@ void __init trap_init_hook(void) { } -static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL}; - /** * time_init_hook - do any specific initialisations for the system timer. * @@ -89,7 +87,6 @@ static struct irqaction irq0 = { timer_ **/ void __init time_init_hook(void) { - setup_irq(0, &irq0); } #ifdef CONFIG_MCA Index: linux.prev/arch/i386/mach-visws/setup.c =================================================================== --- linux.prev.orig/arch/i386/mach-visws/setup.c +++ linux.prev/arch/i386/mach-visws/setup.c @@ -113,7 +113,7 @@ void __init pre_setup_arch_hook() static struct irqaction irq0 = { .handler = timer_interrupt, - .flags = SA_INTERRUPT, + .flags = SA_INTERRUPT | SA_NODELAY, .name = "timer", }; Index: linux.prev/arch/i386/mach-visws/visws_apic.c =================================================================== --- linux.prev.orig/arch/i386/mach-visws/visws_apic.c +++ linux.prev/arch/i386/mach-visws/visws_apic.c @@ -260,11 +260,13 @@ out_unlock: static struct irqaction master_action = { .handler = piix4_master_intr, .name = "PIIX4-8259", + .flags = SA_NODELAY, }; static struct irqaction cascade_action = { .handler = no_action, .name = "cascade", + .flags = SA_NODELAY, }; Index: linux.prev/arch/i386/mach-voyager/setup.c =================================================================== --- linux.prev.orig/arch/i386/mach-voyager/setup.c +++ linux.prev/arch/i386/mach-voyager/setup.c @@ -16,7 +16,7 @@ void __init pre_intr_init_hook(void) /* * IRQ2 is cascade interrupt to second interrupt controller */ -static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL}; +static struct irqaction irq2 = { no_action, SA_NODELAY, CPU_MASK_NONE, "cascade", NULL, NULL}; void __init intr_init_hook(void) { @@ -39,7 +39,7 @@ void __init trap_init_hook(void) { } -static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL}; +static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT | SA_NODELAY, CPU_MASK_NONE, "timer", NULL, NULL}; void __init time_init_hook(void) { Index: linux.prev/arch/i386/mm/fault.c =================================================================== --- linux.prev.orig/arch/i386/mm/fault.c +++ linux.prev/arch/i386/mm/fault.c @@ -39,6 +39,8 @@ void bust_spinlocks(int yes) int loglevel_save = console_loglevel; if (yes) { + stop_trace(); + zap_rt_locks(); oops_in_progress = 1; return; } @@ -224,8 +226,8 @@ fastcall void do_invalid_op(struct pt_re * bit 1 == 0 means read, 1 means write * bit 2 == 0 means kernel, 1 means user-mode */ -fastcall void __kprobes do_page_fault(struct pt_regs *regs, - unsigned long error_code) +fastcall notrace void __kprobes do_page_fault(struct pt_regs *regs, + unsigned long error_code) { struct task_struct *tsk; struct mm_struct *mm; @@ -236,13 +238,14 @@ fastcall void __kprobes do_page_fault(st /* get the address */ address = read_cr2(); + trace_special(regs->eip, error_code, address); if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, SIGSEGV) == NOTIFY_STOP) return; /* It's safe to allow irq's after cr2 has been saved */ if (regs->eflags & (X86_EFLAGS_IF|VM_MASK)) - local_irq_enable(); + raw_local_irq_enable(); tsk = current; @@ -449,9 +452,9 @@ no_context: } #endif if (address < PAGE_SIZE) - printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); + printk(KERN_ALERT "BUG: Unable to handle kernel NULL pointer dereference"); else - printk(KERN_ALERT "Unable to handle kernel paging request"); + printk(KERN_ALERT "BUG: Unable to handle kernel paging request"); printk(" at virtual address %08lx\n",address); printk(KERN_ALERT " printing eip:\n"); printk("%08lx\n", regs->eip); Index: linux.prev/arch/i386/mm/highmem.c =================================================================== --- linux.prev.orig/arch/i386/mm/highmem.c +++ linux.prev/arch/i386/mm/highmem.c @@ -18,6 +18,27 @@ void kunmap(struct page *page) kunmap_high(page); } +void kunmap_virt(void *ptr) +{ + struct page *page; + + if ((unsigned long)ptr < PKMAP_ADDR(0)) + return; + page = pte_page(pkmap_page_table[PKMAP_NR((unsigned long)ptr)]); + kunmap(page); +} + +struct page *kmap_to_page(void *ptr) +{ + struct page *page; + + if ((unsigned long)ptr < PKMAP_ADDR(0)) + return virt_to_page(ptr); + page = pte_page(pkmap_page_table[PKMAP_NR((unsigned long)ptr)]); + return page; +} + + /* * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because * no global lock is needed and because the kmap code must perform a global TLB @@ -26,7 +47,7 @@ void kunmap(struct page *page) * However when holding an atomic kmap is is not legal to sleep, so atomic * kmaps are appropriate for short, tight code paths only. */ -void *kmap_atomic(struct page *page, enum km_type type) +void *__kmap_atomic(struct page *page, enum km_type type) { enum fixed_addresses idx; unsigned long vaddr; @@ -48,7 +69,7 @@ void *kmap_atomic(struct page *page, enu return (void*) vaddr; } -void kunmap_atomic(void *kvaddr, enum km_type type) +void __kunmap_atomic(void *kvaddr, enum km_type type) { #ifdef CONFIG_DEBUG_HIGHMEM unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; @@ -78,7 +99,7 @@ void kunmap_atomic(void *kvaddr, enum km /* This is the same as kmap_atomic() but can map memory that doesn't * have a struct page associated with it. */ -void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) +void *__kmap_atomic_pfn(unsigned long pfn, enum km_type type) { enum fixed_addresses idx; unsigned long vaddr; @@ -93,7 +114,7 @@ void *kmap_atomic_pfn(unsigned long pfn, return (void*) vaddr; } -struct page *kmap_atomic_to_page(void *ptr) +struct page *__kmap_atomic_to_page(void *ptr) { unsigned long idx, vaddr = (unsigned long)ptr; pte_t *pte; @@ -108,6 +129,7 @@ struct page *kmap_atomic_to_page(void *p EXPORT_SYMBOL(kmap); EXPORT_SYMBOL(kunmap); -EXPORT_SYMBOL(kmap_atomic); -EXPORT_SYMBOL(kunmap_atomic); -EXPORT_SYMBOL(kmap_atomic_to_page); +EXPORT_SYMBOL(kunmap_virt); +EXPORT_SYMBOL(__kmap_atomic); +EXPORT_SYMBOL(__kunmap_atomic); +EXPORT_SYMBOL(__kmap_atomic_to_page); Index: linux.prev/arch/i386/mm/init.c =================================================================== --- linux.prev.orig/arch/i386/mm/init.c +++ linux.prev/arch/i386/mm/init.c @@ -44,7 +44,7 @@ unsigned int __VMALLOC_RESERVE = 128 << 20; -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); unsigned long highstart_pfn, highend_pfn; static int noinline do_test_wp_bit(void); Index: linux.prev/arch/i386/mm/pageattr.c =================================================================== --- linux.prev.orig/arch/i386/mm/pageattr.c +++ linux.prev/arch/i386/mm/pageattr.c @@ -207,6 +207,9 @@ void kernel_map_pages(struct page *page, { if (PageHighMem(page)) return; + if (!enable) + check_no_locks_freed(page_address(page), page_address(page+numpages)); + /* the return value is ignored - the calls cannot fail, * large pages are disabled at boot time. */ Index: linux.prev/arch/i386/mm/pgtable.c =================================================================== --- linux.prev.orig/arch/i386/mm/pgtable.c +++ linux.prev/arch/i386/mm/pgtable.c @@ -183,7 +183,7 @@ void pmd_ctor(void *pmd, kmem_cache_t *c * recommendations and having no core impact whatsoever. * -- wli */ -DEFINE_SPINLOCK(pgd_lock); +DEFINE_RAW_SPINLOCK(pgd_lock); struct page *pgd_list; static inline void pgd_list_add(pgd_t *pgd) Index: linux.prev/arch/i386/oprofile/Kconfig =================================================================== --- linux.prev.orig/arch/i386/oprofile/Kconfig +++ linux.prev/arch/i386/oprofile/Kconfig @@ -15,3 +15,6 @@ config OPROFILE If unsure, say N. +config PROFILE_NMI + bool + default y Index: linux.prev/arch/i386/pci/Makefile =================================================================== --- linux.prev.orig/arch/i386/pci/Makefile +++ linux.prev/arch/i386/pci/Makefile @@ -4,8 +4,9 @@ obj-$(CONFIG_PCI_BIOS) += pcbios.o obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o obj-$(CONFIG_PCI_DIRECT) += direct.o +obj-$(CONFIG_ACPI) += acpi.o + pci-y := fixup.o -pci-$(CONFIG_ACPI) += acpi.o pci-y += legacy.o irq.o pci-$(CONFIG_X86_VISWS) := visws.o fixup.o Index: linux.prev/arch/i386/pci/direct.c =================================================================== --- linux.prev.orig/arch/i386/pci/direct.c +++ linux.prev/arch/i386/pci/direct.c @@ -211,16 +211,23 @@ static int __init pci_check_type1(void) unsigned int tmp; int works = 0; - local_irq_save(flags); + spin_lock_irqsave(&pci_config_lock, flags); outb(0x01, 0xCFB); tmp = inl(0xCF8); outl(0x80000000, 0xCF8); - if (inl(0xCF8) == 0x80000000 && pci_sanity_check(&pci_direct_conf1)) { - works = 1; + + if (inl(0xCF8) == 0x80000000) { + spin_unlock_irqrestore(&pci_config_lock, flags); + + if (pci_sanity_check(&pci_direct_conf1)) + works = 1; + + spin_lock_irqsave(&pci_config_lock, flags); } outl(tmp, 0xCF8); - local_irq_restore(flags); + + spin_unlock_irqrestore(&pci_config_lock, flags); return works; } @@ -230,17 +237,19 @@ static int __init pci_check_type2(void) unsigned long flags; int works = 0; - local_irq_save(flags); + spin_lock_irqsave(&pci_config_lock, flags); outb(0x00, 0xCFB); outb(0x00, 0xCF8); outb(0x00, 0xCFA); - if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00 && - pci_sanity_check(&pci_direct_conf2)) { - works = 1; - } - local_irq_restore(flags); + if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00) { + spin_unlock_irqrestore(&pci_config_lock, flags); + + if (pci_sanity_check(&pci_direct_conf2)) + works = 1; + } else + spin_unlock_irqrestore(&pci_config_lock, flags); return works; } Index: linux.prev/arch/i386/pci/pcbios.c =================================================================== --- linux.prev.orig/arch/i386/pci/pcbios.c +++ linux.prev/arch/i386/pci/pcbios.c @@ -70,7 +70,7 @@ static unsigned long bios32_service(unsi unsigned long entry; /* %edx */ unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); __asm__("lcall *(%%edi); cld" : "=a" (return_code), "=b" (address), @@ -79,7 +79,7 @@ static unsigned long bios32_service(unsi : "0" (service), "1" (0), "D" (&bios32_indirect)); - local_irq_restore(flags); + raw_local_irq_restore(flags); switch (return_code) { case 0: @@ -110,7 +110,7 @@ static int __devinit check_pcibios(void) if ((pcibios_entry = bios32_service(PCI_SERVICE))) { pci_indirect.address = pcibios_entry + PAGE_OFFSET; - local_irq_save(flags); + raw_local_irq_save(flags); __asm__( "lcall *(%%edi); cld\n\t" "jc 1f\n\t" @@ -123,7 +123,7 @@ static int __devinit check_pcibios(void) : "1" (PCIBIOS_PCI_BIOS_PRESENT), "D" (&pci_indirect) : "memory"); - local_irq_restore(flags); + raw_local_irq_restore(flags); status = (eax >> 8) & 0xff; hw_mech = eax & 0xff; Index: linux.prev/arch/mips/Kconfig =================================================================== --- linux.prev.orig/arch/mips/Kconfig +++ linux.prev/arch/mips/Kconfig @@ -362,6 +362,7 @@ config MOMENCO_JAGUAR_ATX config MOMENCO_OCELOT bool "Support for Momentum Ocelot board" select DMA_NONCOHERENT + select NO_SPINLOCK select HW_HAS_PCI select IRQ_CPU select IRQ_CPU_RM7K @@ -792,12 +793,21 @@ source "arch/mips/philips/pnx8550/common endmenu +source "kernel/Kconfig.preempt" + config RWSEM_GENERIC_SPINLOCK bool + depends on !PREEMPT_RT default y config RWSEM_XCHGADD_ALGORITHM bool + depends on !PREEMPT_RT + +config ASM_SEMAPHORES + bool +# depends on !PREEMPT_RT + default y config GENERIC_CALIBRATE_DELAY bool @@ -832,6 +842,9 @@ config DMA_NEED_PCI_MAP_STATE config OWN_DMA bool +config NO_SPINLOCK + bool + config EARLY_PRINTK bool @@ -1637,10 +1650,6 @@ config MIPS_INSANE_LARGE endmenu -config RWSEM_GENERIC_SPINLOCK - bool - default y - source "init/Kconfig" menu "Bus options (PCI, PCMCIA, EISA, ISA, TC)" Index: linux.prev/arch/mips/arc/misc.c =================================================================== --- linux.prev.orig/arch/mips/arc/misc.c +++ linux.prev/arch/mips/arc/misc.c @@ -27,7 +27,7 @@ VOID ArcHalt(VOID) { bc_disable(); - local_irq_disable(); + raw_local_irq_disable(); #ifdef CONFIG_SCSI_SGIWD93 reset_wd33c93(sgiwd93_host); #endif @@ -39,7 +39,7 @@ VOID ArcPowerDown(VOID) { bc_disable(); - local_irq_disable(); + raw_local_irq_disable(); #ifdef CONFIG_SCSI_SGIWD93 reset_wd33c93(sgiwd93_host); #endif @@ -52,7 +52,7 @@ VOID ArcRestart(VOID) { bc_disable(); - local_irq_disable(); + raw_local_irq_disable(); #ifdef CONFIG_SCSI_SGIWD93 reset_wd33c93(sgiwd93_host); #endif @@ -64,7 +64,7 @@ VOID ArcReboot(VOID) { bc_disable(); - local_irq_disable(); + raw_local_irq_disable(); #ifdef CONFIG_SCSI_SGIWD93 reset_wd33c93(sgiwd93_host); #endif @@ -76,7 +76,7 @@ VOID ArcEnterInteractiveMode(VOID) { bc_disable(); - local_irq_disable(); + raw_local_irq_disable(); #ifdef CONFIG_SCSI_SGIWD93 reset_wd33c93(sgiwd93_host); #endif Index: linux.prev/arch/mips/gt64120/ev64120/irq.c =================================================================== --- linux.prev.orig/arch/mips/gt64120/ev64120/irq.c +++ linux.prev/arch/mips/gt64120/ev64120/irq.c @@ -60,25 +60,25 @@ static void disable_ev64120_irq(unsigned { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); if (irq_nr >= 8) { // All PCI interrupts are on line 5 or 2 clear_c0_status(9 << 10); } else { clear_c0_status(1 << (irq_nr + 8)); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void enable_ev64120_irq(unsigned int irq_nr) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); if (irq_nr >= 8) // All PCI interrupts are on line 5 or 2 set_c0_status(9 << 10); else set_c0_status(1 << (irq_nr + 8)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int startup_ev64120_irq(unsigned int irq) @@ -119,7 +119,7 @@ void gt64120_irq_setup(void) /* Sets the exception_handler array. */ set_except_vector(0, galileo_handle_int); - local_irq_disable(); + raw_local_irq_disable(); /* * Enable timer. Other interrupts will be enabled as they are Index: linux.prev/arch/mips/gt64120/momenco_ocelot/irq.c =================================================================== --- linux.prev.orig/arch/mips/gt64120/momenco_ocelot/irq.c +++ linux.prev/arch/mips/gt64120/momenco_ocelot/irq.c @@ -57,7 +57,7 @@ void __init arch_init_irq(void) * int-handler is not on bootstrap */ clear_c0_status(ST0_IM); - local_irq_disable(); + raw_local_irq_disable(); /* Sets the first-level interrupt dispatcher. */ set_except_vector(0, ocelot_handle_int); Index: linux.prev/arch/mips/ite-boards/generic/irq.c =================================================================== --- linux.prev.orig/arch/mips/ite-boards/generic/irq.c +++ linux.prev/arch/mips/ite-boards/generic/irq.c @@ -171,9 +171,9 @@ void enable_cpu_timer(void) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); set_c0_status(0x100 << EXT_IRQ5_TO_IP); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void __init arch_init_irq(void) Index: linux.prev/arch/mips/ite-boards/generic/time.c =================================================================== --- linux.prev.orig/arch/mips/ite-boards/generic/time.c +++ linux.prev/arch/mips/ite-boards/generic/time.c @@ -124,7 +124,7 @@ static unsigned long __init cal_r4koff(v { unsigned int flags; - local_irq_save(flags); + raw_local_irq_save(flags); /* Start counter exactly on falling edge of update flag */ while (CMOS_READ(RTC_REG_A) & RTC_UIP); @@ -140,7 +140,7 @@ static unsigned long __init cal_r4koff(v mips_hpt_frequency = read_c0_count(); /* restore interrupts */ - local_irq_restore(flags); + raw_local_irq_restore(flags); return (mips_hpt_frequency / HZ); } @@ -153,11 +153,11 @@ it8172_rtc_get_time(void) /* avoid update-in-progress. */ for (;;) { - local_irq_save(flags); + raw_local_irq_save(flags); if (! (CMOS_READ(RTC_REG_A) & RTC_UIP)) break; /* don't hold intr closed all the time */ - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* Read regs. */ @@ -170,7 +170,7 @@ it8172_rtc_get_time(void) hw_to_bin(*rtc_century_reg) * 100; /* restore interrupts */ - local_irq_restore(flags); + raw_local_irq_restore(flags); return mktime(year, mon, day, hour, min, sec); } @@ -186,11 +186,11 @@ it8172_rtc_set_time(unsigned long t) /* avoid update-in-progress. */ for (;;) { - local_irq_save(flags); + raw_local_irq_save(flags); if (! (CMOS_READ(RTC_REG_A) & RTC_UIP)) break; /* don't hold intr closed all the time */ - local_irq_restore(flags); + raw_local_irq_restore(flags); } *rtc_century_reg = bin_to_hw(tm.tm_year/100); @@ -202,7 +202,7 @@ it8172_rtc_set_time(unsigned long t) CMOS_WRITE(bin_to_hw(tm.tm_year%100), RTC_YEAR); /* restore interrupts */ - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -211,7 +211,7 @@ void __init it8172_time_init(void) { unsigned int est_freq, flags; - local_irq_save(flags); + raw_local_irq_save(flags); saved_control = CMOS_READ(RTC_CONTROL); @@ -225,7 +225,7 @@ void __init it8172_time_init(void) printk("CPU frequency %d.%02d MHz\n", est_freq/1000000, (est_freq%1000000)*100/1000000); - local_irq_restore(flags); + raw_local_irq_restore(flags); rtc_get_time = it8172_rtc_get_time; rtc_set_time = it8172_rtc_set_time; Index: linux.prev/arch/mips/jmr3927/rbhma3100/setup.c =================================================================== --- linux.prev.orig/arch/mips/jmr3927/rbhma3100/setup.c +++ linux.prev/arch/mips/jmr3927/rbhma3100/setup.c @@ -115,7 +115,7 @@ static inline void do_reset(void) static void jmr3927_machine_restart(char *command) { - local_irq_disable(); + raw_local_irq_disable(); puts("Rebooting..."); do_reset(); } Index: linux.prev/arch/mips/kernel/Makefile =================================================================== --- linux.prev.orig/arch/mips/kernel/Makefile +++ linux.prev/arch/mips/kernel/Makefile @@ -5,7 +5,7 @@ extra-y := head.o init_task.o vmlinux.lds obj-y += cpu-probe.o branch.o entry.o genex.o irq.o process.o \ - ptrace.o reset.o semaphore.o setup.o signal.o syscall.o \ + ptrace.o reset.o setup.o signal.o syscall.o \ time.o traps.o unaligned.o binfmt_irix-objs := irixelf.o irixinv.o irixioctl.o irixsig.o \ @@ -13,6 +13,8 @@ binfmt_irix-objs := irixelf.o irixinv.o obj-$(CONFIG_MODULES) += mips_ksyms.o module.o +obj-$(CONFIG_ASM_SEMAPHORES) += semaphore.o + obj-$(CONFIG_CPU_R3000) += r2300_fpu.o r2300_switch.o obj-$(CONFIG_CPU_TX39XX) += r2300_fpu.o r2300_switch.o obj-$(CONFIG_CPU_TX49XX) += r4k_fpu.o r4k_switch.o Index: linux.prev/arch/mips/kernel/asm-offsets.c =================================================================== --- linux.prev.orig/arch/mips/kernel/asm-offsets.c +++ linux.prev/arch/mips/kernel/asm-offsets.c @@ -11,6 +11,9 @@ #include #include #include +#include +#include +#include #include #include #include Index: linux.prev/arch/mips/kernel/cpu-bugs64.c =================================================================== --- linux.prev.orig/arch/mips/kernel/cpu-bugs64.c +++ linux.prev/arch/mips/kernel/cpu-bugs64.c @@ -48,7 +48,7 @@ static inline void mult_sh_align_mod(lon * used for. */ - local_irq_save(flags); + raw_local_irq_save(flags); /* * The following code leads to a wrong result of the first * dsll32 when executed on R4000 rev. 2.2 or 3.0 (PRId @@ -101,7 +101,7 @@ static inline void mult_sh_align_mod(lon "" : "=r" (lv2) : "0" (lv2), "r" (p)); - local_irq_restore(flags); + raw_local_irq_restore(flags); *v1 = lv1; *v2 = lv2; @@ -182,7 +182,7 @@ static inline void check_daddi(void) printk("Checking for the daddi bug... "); - local_irq_save(flags); + raw_local_irq_save(flags); handler = set_except_vector(12, handle_daddi_ov); /* * The following code fails to trigger an overflow exception @@ -208,7 +208,7 @@ static inline void check_daddi(void) : "=r" (v), "=&r" (tmp) : "I" (0xffffffffffffdb9a), "I" (0x1234)); set_except_vector(12, handler); - local_irq_restore(flags); + raw_local_irq_restore(flags); if (daddi_ov) { printk("no.\n"); @@ -217,7 +217,7 @@ static inline void check_daddi(void) printk("yes, workaround... "); - local_irq_save(flags); + raw_local_irq_save(flags); handler = set_except_vector(12, handle_daddi_ov); asm volatile( "addiu %1, $0, %2\n\t" @@ -226,7 +226,7 @@ static inline void check_daddi(void) : "=r" (v), "=&r" (tmp) : "I" (0xffffffffffffdb9a), "I" (0x1234)); set_except_vector(12, handler); - local_irq_restore(flags); + raw_local_irq_restore(flags); if (daddi_ov) { printk("yes.\n"); Index: linux.prev/arch/mips/kernel/entry.S =================================================================== --- linux.prev.orig/arch/mips/kernel/entry.S +++ linux.prev/arch/mips/kernel/entry.S @@ -23,7 +23,7 @@ .endm #else .macro preempt_stop - local_irq_disable + mips_raw_local_irq_disable .endm #define resume_kernel restore_all #endif @@ -38,7 +38,7 @@ FEXPORT(ret_from_irq) beqz t0, resume_kernel resume_userspace: - local_irq_disable # make sure we dont miss an + mips_raw_local_irq_disable # make sure we dont miss an # interrupt setting need_resched # between sampling and return LONG_L a2, TI_FLAGS($28) # current->work @@ -48,7 +48,9 @@ resume_userspace: #ifdef CONFIG_PREEMPT resume_kernel: - local_irq_disable + mips_local_irq_disable + lw t0, kernel_preemption + beqz t0, restore_all lw t0, TI_PRE_COUNT($28) bnez t0, restore_all need_resched: @@ -66,7 +68,7 @@ FEXPORT(ret_from_fork) jal schedule_tail # a0 = task_t *prev FEXPORT(syscall_exit) - local_irq_disable # make sure need_resched and + mips_raw_local_irq_disable # make sure need_resched and # signals dont change between # sampling and return LONG_L a2, TI_FLAGS($28) # current->work @@ -85,19 +87,21 @@ FEXPORT(restore_partial) # restore part .set at work_pending: - andi t0, a2, _TIF_NEED_RESCHED # a2 is preloaded with TI_FLAGS + # a2 is preloaded with TI_FLAGS + andi t0, a2, (_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) beqz t0, work_notifysig work_resched: + mips_raw_local_irq_enable t0 jal schedule - local_irq_disable # make sure need_resched and + mips_raw_local_irq_disable # make sure need_resched and # signals dont change between # sampling and return LONG_L a2, TI_FLAGS($28) andi t0, a2, _TIF_WORK_MASK # is there any work to be done # other than syscall tracing? beqz t0, restore_all - andi t0, a2, _TIF_NEED_RESCHED + andi t0, a2, (_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bnez t0, work_resched work_notifysig: # deal with pending signals and @@ -113,7 +117,7 @@ syscall_exit_work: li t0, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT and t0, a2 # a2 is preloaded with TI_FLAGS beqz t0, work_pending # trace bit set? - local_irq_enable # could let do_syscall_trace() + mips_raw_local_irq_enable # could let do_syscall_trace() # call schedule() instead move a0, sp li a1, 1 Index: linux.prev/arch/mips/kernel/gdb-stub.c =================================================================== --- linux.prev.orig/arch/mips/kernel/gdb-stub.c +++ linux.prev/arch/mips/kernel/gdb-stub.c @@ -402,7 +402,7 @@ void set_debug_traps(void) unsigned long flags; unsigned char c; - local_irq_save(flags); + raw_local_irq_save(flags); for (ht = hard_trap_info; ht->tt && ht->signo; ht++) saved_vectors[ht->tt] = set_except_vector(ht->tt, trap_low); @@ -418,7 +418,7 @@ void set_debug_traps(void) putDebugChar('+'); /* ack it */ initialized = 1; - local_irq_restore(flags); + raw_local_irq_restore(flags); } void restore_debug_traps(void) @@ -426,10 +426,10 @@ void restore_debug_traps(void) struct hard_trap_info *ht; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); for (ht = hard_trap_info; ht->tt && ht->signo; ht++) set_except_vector(ht->tt, saved_vectors[ht->tt]); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -661,12 +661,12 @@ static void kgdb_wait(void *arg) unsigned flags; int cpu = smp_processor_id(); - local_irq_save(flags); + raw_local_irq_save(flags); __raw_spin_lock(&kgdb_cpulock[cpu]); __raw_spin_unlock(&kgdb_cpulock[cpu]); - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux.prev/arch/mips/kernel/i8259.c =================================================================== --- linux.prev.orig/arch/mips/kernel/i8259.c +++ linux.prev/arch/mips/kernel/i8259.c @@ -31,7 +31,7 @@ void disable_8259A_irq(unsigned int irq) * moves to arch independent land */ -DEFINE_SPINLOCK(i8259A_lock); +DEFINE_RAW_SPINLOCK(i8259A_lock); static void end_8259A_irq (unsigned int irq) { Index: linux.prev/arch/mips/kernel/init_task.c =================================================================== --- linux.prev.orig/arch/mips/kernel/init_task.c +++ linux.prev/arch/mips/kernel/init_task.c @@ -9,8 +9,8 @@ #include #include -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; +static struct fs_struct init_fs = INIT_FS(init_fs); +static struct files_struct init_files = INIT_FILES(init_files); static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); Index: linux.prev/arch/mips/kernel/irq-rm7000.c =================================================================== --- linux.prev.orig/arch/mips/kernel/irq-rm7000.c +++ linux.prev/arch/mips/kernel/irq-rm7000.c @@ -33,18 +33,18 @@ static inline void rm7k_cpu_irq_enable(u { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); unmask_rm7k_irq(irq); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void rm7k_cpu_irq_disable(unsigned int irq) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); mask_rm7k_irq(irq); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int rm7k_cpu_irq_startup(unsigned int irq) Index: linux.prev/arch/mips/kernel/irq-rm9000.c =================================================================== --- linux.prev.orig/arch/mips/kernel/irq-rm9000.c +++ linux.prev/arch/mips/kernel/irq-rm9000.c @@ -34,18 +34,18 @@ static inline void rm9k_cpu_irq_enable(u { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); unmask_rm9k_irq(irq); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void rm9k_cpu_irq_disable(unsigned int irq) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); mask_rm9k_irq(irq); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int rm9k_cpu_irq_startup(unsigned int irq) @@ -79,9 +79,9 @@ static void local_rm9k_perfcounter_irq_s unsigned int irq = (unsigned int) args; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); mask_rm9k_irq(irq); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void rm9k_perfcounter_irq_shutdown(unsigned int irq) Index: linux.prev/arch/mips/kernel/irq.c =================================================================== --- linux.prev.orig/arch/mips/kernel/irq.c +++ linux.prev/arch/mips/kernel/irq.c @@ -125,7 +125,10 @@ void __init init_IRQ(void) irq_desc[i].action = NULL; irq_desc[i].depth = 1; irq_desc[i].handler = &no_irq_type; - spin_lock_init(&irq_desc[i].lock); + __raw_spin_lock_init(&irq_desc[i].lock); +#ifdef CONFIG_PREEMPT_HARDIRQS + irq_desc[i].thread = NULL; +#endif } arch_init_irq(); Index: linux.prev/arch/mips/kernel/irq_cpu.c =================================================================== --- linux.prev.orig/arch/mips/kernel/irq_cpu.c +++ linux.prev/arch/mips/kernel/irq_cpu.c @@ -54,20 +54,20 @@ static inline void mips_cpu_irq_enable(u { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); unmask_mips_irq(irq); back_to_back_c0_hazard(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void mips_cpu_irq_disable(unsigned int irq) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); mask_mips_irq(irq); back_to_back_c0_hazard(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int mips_cpu_irq_startup(unsigned int irq) Index: linux.prev/arch/mips/kernel/module.c =================================================================== --- linux.prev.orig/arch/mips/kernel/module.c +++ linux.prev/arch/mips/kernel/module.c @@ -39,7 +39,7 @@ struct mips_hi16 { static struct mips_hi16 *mips_hi16_list; static LIST_HEAD(dbe_list); -static DEFINE_SPINLOCK(dbe_lock); +static DEFINE_RAW_SPINLOCK(dbe_lock); void *module_alloc(unsigned long size) { Index: linux.prev/arch/mips/kernel/process.c =================================================================== --- linux.prev.orig/arch/mips/kernel/process.c +++ linux.prev/arch/mips/kernel/process.c @@ -47,13 +47,15 @@ */ ATTRIB_NORET void cpu_idle(void) { + raw_local_irq_enable(); + /* endless idle loop with no priority at all */ while (1) { - while (!need_resched()) + while (!need_resched() && !need_resched_delayed()) if (cpu_wait) (*cpu_wait)(); - preempt_enable_no_resched(); - schedule(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); } } Index: linux.prev/arch/mips/kernel/scall32-o32.S =================================================================== --- linux.prev.orig/arch/mips/kernel/scall32-o32.S +++ linux.prev/arch/mips/kernel/scall32-o32.S @@ -72,7 +72,7 @@ stack_done: 1: sw v0, PT_R2(sp) # result o32_syscall_exit: - local_irq_disable # make sure need_resched and + mips_raw_local_irq_disable # make sure need_resched and # signals dont change between # sampling and return lw a2, TI_FLAGS($28) # current->work Index: linux.prev/arch/mips/kernel/scall64-64.S =================================================================== --- linux.prev.orig/arch/mips/kernel/scall64-64.S +++ linux.prev/arch/mips/kernel/scall64-64.S @@ -71,7 +71,7 @@ NESTED(handle_sys64, PT_SIZE, sp) 1: sd v0, PT_R2(sp) # result n64_syscall_exit: - local_irq_disable # make sure need_resched and + raw_local_irq_disable # make sure need_resched and # signals dont change between # sampling and return LONG_L a2, TI_FLAGS($28) # current->work Index: linux.prev/arch/mips/kernel/scall64-n32.S =================================================================== --- linux.prev.orig/arch/mips/kernel/scall64-n32.S +++ linux.prev/arch/mips/kernel/scall64-n32.S @@ -68,7 +68,7 @@ NESTED(handle_sysn32, PT_SIZE, sp) sd v0, PT_R0(sp) # set flag for syscall restarting 1: sd v0, PT_R2(sp) # result - local_irq_disable # make sure need_resched and + raw_local_irq_disable # make sure need_resched and # signals dont change between # sampling and return LONG_L a2, TI_FLAGS($28) # current->work Index: linux.prev/arch/mips/kernel/scall64-o32.S =================================================================== --- linux.prev.orig/arch/mips/kernel/scall64-o32.S +++ linux.prev/arch/mips/kernel/scall64-o32.S @@ -97,7 +97,7 @@ NESTED(handle_sys, PT_SIZE, sp) 1: sd v0, PT_R2(sp) # result o32_syscall_exit: - local_irq_disable # make need_resched and + raw_local_irq_disable # make need_resched and # signals dont change between # sampling and return LONG_L a2, TI_FLAGS($28) Index: linux.prev/arch/mips/kernel/semaphore.c =================================================================== --- linux.prev.orig/arch/mips/kernel/semaphore.c +++ linux.prev/arch/mips/kernel/semaphore.c @@ -36,7 +36,7 @@ * sem->count and sem->waking atomic. Scalability isn't an issue because * this lock is used on UP only so it's just an empty variable. */ -static inline int __sem_update_count(struct semaphore *sem, int incr) +static inline int __sem_update_count(struct compat_semaphore *sem, int incr) { int old_count, tmp; @@ -67,7 +67,7 @@ static inline int __sem_update_count(str : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count) : "r" (incr), "m" (sem->count)); } else { - static DEFINE_SPINLOCK(semaphore_lock); + static DEFINE_RAW_SPINLOCK(semaphore_lock); unsigned long flags; spin_lock_irqsave(&semaphore_lock, flags); @@ -80,7 +80,7 @@ static inline int __sem_update_count(str return old_count; } -void __up(struct semaphore *sem) +void __compat_up(struct compat_semaphore *sem) { /* * Note that we incremented count in up() before we came here, @@ -94,7 +94,7 @@ void __up(struct semaphore *sem) wake_up(&sem->wait); } -EXPORT_SYMBOL(__up); +EXPORT_SYMBOL(__compat_up); /* * Note that when we come in to __down or __down_interruptible, @@ -104,7 +104,7 @@ EXPORT_SYMBOL(__up); * Thus it is only when we decrement count from some value > 0 * that we have actually got the semaphore. */ -void __sched __down(struct semaphore *sem) +void __sched __compat_down(struct compat_semaphore *sem) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); @@ -133,9 +133,9 @@ void __sched __down(struct semaphore *se wake_up(&sem->wait); } -EXPORT_SYMBOL(__down); +EXPORT_SYMBOL(__compat_down); -int __sched __down_interruptible(struct semaphore * sem) +int __sched __compat_down_interruptible(struct compat_semaphore * sem) { int retval = 0; struct task_struct *tsk = current; @@ -165,4 +165,4 @@ int __sched __down_interruptible(struct return retval; } -EXPORT_SYMBOL(__down_interruptible); +EXPORT_SYMBOL(__compat_down_interruptible); Index: linux.prev/arch/mips/kernel/signal.c =================================================================== --- linux.prev.orig/arch/mips/kernel/signal.c +++ linux.prev/arch/mips/kernel/signal.c @@ -426,6 +426,10 @@ int do_signal(sigset_t *oldset, struct p siginfo_t info; int signr; +#ifdef CONFIG_PREEMPT_RT + raw_local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which is why we may in certain * cases get here from kernel mode. Just return without doing anything Index: linux.prev/arch/mips/kernel/signal32.c =================================================================== --- linux.prev.orig/arch/mips/kernel/signal32.c +++ linux.prev/arch/mips/kernel/signal32.c @@ -814,6 +814,10 @@ int do_signal32(sigset_t *oldset, struct siginfo_t info; int signr; +#ifdef CONFIG_PREEMPT_RT + raw_local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which is why we may in certain * cases get here from kernel mode. Just return without doing anything Index: linux.prev/arch/mips/kernel/smp.c =================================================================== --- linux.prev.orig/arch/mips/kernel/smp.c +++ linux.prev/arch/mips/kernel/smp.c @@ -106,7 +106,22 @@ asmlinkage void start_secondary(void) cpu_idle(); } -DEFINE_SPINLOCK(smp_call_lock); +DEFINE_RAW_SPINLOCK(smp_call_lock); + +/* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them. + */ +void smp_send_reschedule_allbutself(void) +{ + int cpu = smp_processor_id(); + int i; + + for (i = 0; i < NR_CPUS; i++) + if (cpu_online(i) && i != cpu) + core_send_ipi(i, SMP_RESCHEDULE_YOURSELF); +} struct call_data_struct *call_data; @@ -215,7 +230,7 @@ static void stop_this_cpu(void *dummy) * Remove this CPU: */ cpu_clear(smp_processor_id(), cpu_online_map); - local_irq_enable(); /* May need to service _machine_restart IPI */ + raw_local_irq_enable(); /* May need to service _machine_restart IPI */ for (;;); /* Wait if available. */ } @@ -289,6 +304,8 @@ int setup_profiling_timer(unsigned int m return 0; } +static DEFINE_RAW_SPINLOCK(tlbstate_lock); + static void flush_tlb_all_ipi(void *info) { local_flush_tlb_all(); @@ -320,6 +337,7 @@ static void flush_tlb_mm_ipi(void *mm) void flush_tlb_mm(struct mm_struct *mm) { preempt_disable(); + spin_lock(&tlbstate_lock); if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) { smp_call_function(flush_tlb_mm_ipi, (void *)mm, 1, 1); @@ -329,6 +347,7 @@ void flush_tlb_mm(struct mm_struct *mm) if (smp_processor_id() != i) cpu_context(i, mm) = 0; } + spin_unlock(&tlbstate_lock); local_flush_tlb_mm(mm); preempt_enable(); @@ -352,6 +371,8 @@ void flush_tlb_range(struct vm_area_stru struct mm_struct *mm = vma->vm_mm; preempt_disable(); + spin_lock(&tlbstate_lock); + if ((atomic_read(&mm->mm_users) != 1) || (current->mm != mm)) { struct flush_tlb_data fd; @@ -365,6 +386,7 @@ void flush_tlb_range(struct vm_area_stru if (smp_processor_id() != i) cpu_context(i, mm) = 0; } + spin_unlock(&tlbstate_lock); local_flush_tlb_range(vma, start, end); preempt_enable(); } @@ -395,6 +417,8 @@ static void flush_tlb_page_ipi(void *inf void flush_tlb_page(struct vm_area_struct *vma, unsigned long page) { preempt_disable(); + spin_lock(&tlbstate_lock); + if ((atomic_read(&vma->vm_mm->mm_users) != 1) || (current->mm != vma->vm_mm)) { struct flush_tlb_data fd; @@ -407,6 +431,7 @@ void flush_tlb_page(struct vm_area_struc if (smp_processor_id() != i) cpu_context(i, vma->vm_mm) = 0; } + spin_unlock(&tlbstate_lock); local_flush_tlb_page(vma, page); preempt_enable(); } Index: linux.prev/arch/mips/kernel/time.c =================================================================== --- linux.prev.orig/arch/mips/kernel/time.c +++ linux.prev/arch/mips/kernel/time.c @@ -50,7 +50,7 @@ */ extern volatile unsigned long wall_jiffies; -DEFINE_SPINLOCK(rtc_lock); +DEFINE_RAW_SPINLOCK(rtc_lock); /* * By default we provide the null RTC ops @@ -554,7 +554,7 @@ unsigned int mips_hpt_frequency; static struct irqaction timer_irqaction = { .handler = timer_interrupt, - .flags = SA_INTERRUPT, + .flags = SA_NODELAY | SA_INTERRUPT, .name = "timer", }; Index: linux.prev/arch/mips/kernel/traps.c =================================================================== --- linux.prev.orig/arch/mips/kernel/traps.c +++ linux.prev/arch/mips/kernel/traps.c @@ -274,7 +274,7 @@ void show_registers(struct pt_regs *regs printk("\n"); } -static DEFINE_SPINLOCK(die_lock); +static DEFINE_RAW_SPINLOCK(die_lock); NORET_TYPE void ATTRIB_NORET die(const char * str, struct pt_regs * regs) { Index: linux.prev/arch/mips/lasat/interrupt.c =================================================================== --- linux.prev.orig/arch/mips/lasat/interrupt.c +++ linux.prev/arch/mips/lasat/interrupt.c @@ -39,18 +39,18 @@ void disable_lasat_irq(unsigned int irq_ { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); *lasat_int_mask &= ~(1 << irq_nr) << lasat_int_mask_shift; - local_irq_restore(flags); + raw_local_irq_restore(flags); } void enable_lasat_irq(unsigned int irq_nr) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); *lasat_int_mask |= (1 << irq_nr) << lasat_int_mask_shift; - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int startup_lasat_irq(unsigned int irq) Index: linux.prev/arch/mips/lasat/reset.c =================================================================== --- linux.prev.orig/arch/mips/lasat/reset.c +++ linux.prev/arch/mips/lasat/reset.c @@ -33,7 +33,7 @@ int lasat_boot_to_service = 0; static void lasat_machine_restart(char *command) { - local_irq_disable(); + raw_local_irq_disable(); if (lasat_boot_to_service) { printk("machine_restart: Rebooting to service mode\n"); @@ -47,7 +47,7 @@ static void lasat_machine_restart(char * #define MESSAGE "System halted" static void lasat_machine_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); /* Disable interrupts and loop forever */ printk(KERN_NOTICE MESSAGE "\n"); Index: linux.prev/arch/mips/lib-32/dump_tlb.c =================================================================== --- linux.prev.orig/arch/mips/lib-32/dump_tlb.c +++ linux.prev/arch/mips/lib-32/dump_tlb.c @@ -118,7 +118,7 @@ void dump_tlb_addr(unsigned long addr) unsigned int flags, oldpid; int index; - local_irq_save(flags); + raw_local_irq_save(flags); oldpid = read_c0_entryhi() & 0xff; BARRIER(); write_c0_entryhi((addr & PAGE_MASK) | oldpid); @@ -127,7 +127,7 @@ void dump_tlb_addr(unsigned long addr) BARRIER(); index = read_c0_index(); write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); if (index < 0) { printk("No entry for address 0x%08lx in TLB\n", addr); Index: linux.prev/arch/mips/lib-32/r3k_dump_tlb.c =================================================================== --- linux.prev.orig/arch/mips/lib-32/r3k_dump_tlb.c +++ linux.prev/arch/mips/lib-32/r3k_dump_tlb.c @@ -79,13 +79,13 @@ void dump_tlb_addr(unsigned long addr) unsigned long flags, oldpid; int index; - local_irq_save(flags); + raw_local_irq_save(flags); oldpid = read_c0_entryhi() & 0xff; write_c0_entryhi((addr & PAGE_MASK) | oldpid); tlb_probe(); index = read_c0_index(); write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); if (index < 0) { printk("No entry for address 0x%08lx in TLB\n", addr); Index: linux.prev/arch/mips/lib-64/dump_tlb.c =================================================================== --- linux.prev.orig/arch/mips/lib-64/dump_tlb.c +++ linux.prev/arch/mips/lib-64/dump_tlb.c @@ -112,7 +112,7 @@ void dump_tlb_addr(unsigned long addr) unsigned int flags, oldpid; int index; - local_irq_save(flags); + raw_local_irq_save(flags); oldpid = read_c0_entryhi() & 0xff; BARRIER(); write_c0_entryhi((addr & PAGE_MASK) | oldpid); @@ -121,7 +121,7 @@ void dump_tlb_addr(unsigned long addr) BARRIER(); index = read_c0_index(); write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); if (index < 0) { printk("No entry for address 0x%08lx in TLB\n", addr); Index: linux.prev/arch/mips/math-emu/cp1emu.c =================================================================== --- linux.prev.orig/arch/mips/math-emu/cp1emu.c +++ linux.prev/arch/mips/math-emu/cp1emu.c @@ -1269,7 +1269,9 @@ int fpu_emulator_cop1Handler(struct pt_r if (sig) break; + preempt_enable(); cond_resched(); + preempt_disable(); } while (xcp->cp0_epc > prevepc); /* SIGILL indicates a non-fpu instruction */ Index: linux.prev/arch/mips/mips-boards/generic/time.c =================================================================== --- linux.prev.orig/arch/mips/mips-boards/generic/time.c +++ linux.prev/arch/mips/mips-boards/generic/time.c @@ -139,7 +139,7 @@ static unsigned int __init estimate_cpu_ #if defined(CONFIG_MIPS_ATLAS) || defined(CONFIG_MIPS_MALTA) unsigned int flags; - local_irq_save(flags); + raw_local_irq_save(flags); /* Start counter exactly on falling edge of update flag */ while (CMOS_READ(RTC_REG_A) & RTC_UIP); @@ -155,7 +155,7 @@ static unsigned int __init estimate_cpu_ count = read_c0_count(); /* restore interrupts */ - local_irq_restore(flags); + raw_local_irq_restore(flags); #endif mips_hpt_frequency = count; @@ -178,7 +178,7 @@ void __init mips_time_init(void) { unsigned int est_freq, flags; - local_irq_save(flags); + raw_local_irq_save(flags); /* Set Data mode - binary. */ CMOS_WRITE(CMOS_READ(RTC_CONTROL) | RTC_DM_BINARY, RTC_CONTROL); @@ -190,7 +190,7 @@ void __init mips_time_init(void) cpu_khz = est_freq / 1000; - local_irq_restore(flags); + raw_local_irq_restore(flags); } void __init mips_timer_setup(struct irqaction *irq) Index: linux.prev/arch/mips/mm/c-r4k.c =================================================================== --- linux.prev.orig/arch/mips/mm/c-r4k.c +++ linux.prev/arch/mips/mm/c-r4k.c @@ -117,9 +117,9 @@ static inline void blast_r4600_v1_icache { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); blast_icache32(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static inline void tx49_blast_icache32(void) @@ -147,9 +147,9 @@ static inline void blast_icache32_r4600_ { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); blast_icache32_page_indexed(page); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static inline void tx49_blast_icache32_page_indexed(unsigned long page) @@ -1090,7 +1090,7 @@ static int __init probe_scache(void) * This is such a bitch, you'd think they would make it easy to do * this. Away you daemons of stupidity! */ - local_irq_save(flags); + raw_local_irq_save(flags); /* Fill each size-multiple cache line with a valid tag. */ pow2 = (64 * 1024); @@ -1118,7 +1118,7 @@ static int __init probe_scache(void) break; pow2 <<= 1; } - local_irq_restore(flags); + raw_local_irq_restore(flags); addr -= begin; scache_size = addr; Index: linux.prev/arch/mips/mm/c-tx39.c =================================================================== --- linux.prev.orig/arch/mips/mm/c-tx39.c +++ linux.prev/arch/mips/mm/c-tx39.c @@ -49,7 +49,7 @@ static void tx39h_flush_icache_all(void) unsigned long flags, config; /* disable icache (set ICE#) */ - local_irq_save(flags); + raw_local_irq_save(flags); config = read_c0_conf(); write_c0_conf(config & ~TX39_CONF_ICE); TX39_STOP_STREAMING(); @@ -61,7 +61,7 @@ static void tx39h_flush_icache_all(void) } write_c0_conf(config); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void tx39h_dma_cache_wback_inv(unsigned long addr, unsigned long size) @@ -104,39 +104,39 @@ static inline void tx39_blast_icache_pag { unsigned long flags, config; /* disable icache (set ICE#) */ - local_irq_save(flags); + raw_local_irq_save(flags); config = read_c0_conf(); write_c0_conf(config & ~TX39_CONF_ICE); TX39_STOP_STREAMING(); blast_icache16_page(addr); write_c0_conf(config); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static inline void tx39_blast_icache_page_indexed(unsigned long addr) { unsigned long flags, config; /* disable icache (set ICE#) */ - local_irq_save(flags); + raw_local_irq_save(flags); config = read_c0_conf(); write_c0_conf(config & ~TX39_CONF_ICE); TX39_STOP_STREAMING(); blast_icache16_page_indexed(addr); write_c0_conf(config); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static inline void tx39_blast_icache(void) { unsigned long flags, config; /* disable icache (set ICE#) */ - local_irq_save(flags); + raw_local_irq_save(flags); config = read_c0_conf(); write_c0_conf(config & ~TX39_CONF_ICE); TX39_STOP_STREAMING(); blast_icache16(); write_c0_conf(config); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static inline void tx39_flush_cache_all(void) @@ -266,7 +266,7 @@ static void tx39_flush_icache_range(unsi addr = start & ~(dc_lsize - 1); aend = (end - 1) & ~(dc_lsize - 1); /* disable icache (set ICE#) */ - local_irq_save(flags); + raw_local_irq_save(flags); config = read_c0_conf(); write_c0_conf(config & ~TX39_CONF_ICE); TX39_STOP_STREAMING(); @@ -278,7 +278,7 @@ static void tx39_flush_icache_range(unsi addr += dc_lsize; } write_c0_conf(config); - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -367,13 +367,13 @@ static void tx39_flush_cache_sigtramp(un protected_writeback_dcache_line(addr & ~(dc_lsize - 1)); /* disable icache (set ICE#) */ - local_irq_save(flags); + raw_local_irq_save(flags); config = read_c0_conf(); write_c0_conf(config & ~TX39_CONF_ICE); TX39_STOP_STREAMING(); protected_flush_icache_line(addr & ~(ic_lsize - 1)); write_c0_conf(config); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static __init void tx39_probe_cache(void) Index: linux.prev/arch/mips/mm/init.c =================================================================== --- linux.prev.orig/arch/mips/mm/init.c +++ linux.prev/arch/mips/mm/init.c @@ -35,7 +35,7 @@ #include #include -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); unsigned long highstart_pfn, highend_pfn; Index: linux.prev/arch/mips/mm/sc-ip22.c =================================================================== --- linux.prev.orig/arch/mips/mm/sc-ip22.c +++ linux.prev/arch/mips/mm/sc-ip22.c @@ -72,7 +72,7 @@ static void indy_sc_wback_invalidate(uns first_line = SC_INDEX(addr); last_line = SC_INDEX(addr + size - 1); - local_irq_save(flags); + raw_local_irq_save(flags); if (first_line <= last_line) { indy_sc_wipe(first_line, last_line); goto out; @@ -81,7 +81,7 @@ static void indy_sc_wback_invalidate(uns indy_sc_wipe(first_line, SC_SIZE - SC_LINE); indy_sc_wipe(0, last_line); out: - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void indy_sc_enable(void) Index: linux.prev/arch/mips/mm/sc-r5k.c =================================================================== --- linux.prev.orig/arch/mips/mm/sc-r5k.c +++ linux.prev/arch/mips/mm/sc-r5k.c @@ -61,20 +61,20 @@ static void r5k_sc_enable(void) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); set_c0_config(R5K_CONF_SE); blast_r5000_scache(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void r5k_sc_disable(void) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); blast_r5000_scache(); clear_c0_config(R5K_CONF_SE); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static inline int __init r5k_sc_probe(void) Index: linux.prev/arch/mips/mm/tlb-andes.c =================================================================== --- linux.prev.orig/arch/mips/mm/tlb-andes.c +++ linux.prev/arch/mips/mm/tlb-andes.c @@ -27,7 +27,7 @@ void local_flush_tlb_all(void) unsigned long old_ctx; unsigned long entry; - local_irq_save(flags); + raw_local_irq_save(flags); /* Save old context and create impossible VPN2 value */ old_ctx = read_c0_entryhi() & ASID_MASK; write_c0_entryhi(CKSEG0); @@ -43,7 +43,7 @@ void local_flush_tlb_all(void) entry++; } write_c0_entryhi(old_ctx); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void local_flush_tlb_mm(struct mm_struct *mm) @@ -64,7 +64,7 @@ void local_flush_tlb_range(struct vm_are unsigned long flags; int size; - local_irq_save(flags); + raw_local_irq_save(flags); size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; size = (size + 1) >> 1; if (size <= NTLB_ENTRIES_HALF) { @@ -93,7 +93,7 @@ void local_flush_tlb_range(struct vm_are } else { drop_mmu_context(mm, cpu); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -105,7 +105,7 @@ void local_flush_tlb_kernel_range(unsign size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; size = (size + 1) >> 1; - local_irq_save(flags); + raw_local_irq_save(flags); if (size <= NTLB_ENTRIES_HALF) { int pid = read_c0_entryhi(); @@ -131,7 +131,7 @@ void local_flush_tlb_kernel_range(unsign } else { local_flush_tlb_all(); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) @@ -143,7 +143,7 @@ void local_flush_tlb_page(struct vm_area newpid = (cpu_context(smp_processor_id(), vma->vm_mm) & ASID_MASK); page &= (PAGE_MASK << 1); - local_irq_save(flags); + raw_local_irq_save(flags); oldpid = (read_c0_entryhi() & ASID_MASK); write_c0_entryhi(page | newpid); tlb_probe(); @@ -157,7 +157,7 @@ void local_flush_tlb_page(struct vm_area finish: write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -170,7 +170,7 @@ void local_flush_tlb_one(unsigned long p unsigned long flags; int oldpid, idx; - local_irq_save(flags); + raw_local_irq_save(flags); page &= (PAGE_MASK << 1); oldpid = read_c0_entryhi() & 0xff; write_c0_entryhi(page); @@ -185,7 +185,7 @@ void local_flush_tlb_one(unsigned long p } write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* XXX Simplify this. On the R10000 writing a TLB entry for an virtual @@ -216,7 +216,7 @@ void __update_tlb(struct vm_area_struct vma->vm_mm) & ASID_MASK), pid); } - local_irq_save(flags); + raw_local_irq_save(flags); address &= (PAGE_MASK << 1); write_c0_entryhi(address | (pid)); pgdp = pgd_offset(vma->vm_mm, address); @@ -234,7 +234,7 @@ void __update_tlb(struct vm_area_struct tlb_write_indexed(); } write_c0_entryhi(pid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void __init tlb_init(void) Index: linux.prev/arch/mips/mm/tlb-r3k.c =================================================================== --- linux.prev.orig/arch/mips/mm/tlb-r3k.c +++ linux.prev/arch/mips/mm/tlb-r3k.c @@ -49,7 +49,7 @@ void local_flush_tlb_all(void) printk("[tlball]"); #endif - local_irq_save(flags); + raw_local_irq_save(flags); old_ctx = read_c0_entryhi() & ASID_MASK; write_c0_entrylo0(0); entry = r3k_have_wired_reg ? read_c0_wired() : 8; @@ -60,7 +60,7 @@ void local_flush_tlb_all(void) tlb_write_indexed(); } write_c0_entryhi(old_ctx); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void local_flush_tlb_mm(struct mm_struct *mm) @@ -89,7 +89,7 @@ void local_flush_tlb_range(struct vm_are printk("[tlbrange<%lu,0x%08lx,0x%08lx>]", cpu_context(cpu, mm) & ASID_MASK, start, end); #endif - local_irq_save(flags); + raw_local_irq_save(flags); size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; if (size <= current_cpu_data.tlbsize) { int oldpid = read_c0_entryhi() & ASID_MASK; @@ -115,7 +115,7 @@ void local_flush_tlb_range(struct vm_are } else { drop_mmu_context(mm, cpu); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -127,7 +127,7 @@ void local_flush_tlb_kernel_range(unsign #ifdef DEBUG_TLB printk("[tlbrange<%lu,0x%08lx,0x%08lx>]", start, end); #endif - local_irq_save(flags); + raw_local_irq_save(flags); size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; if (size <= current_cpu_data.tlbsize) { int pid = read_c0_entryhi(); @@ -153,7 +153,7 @@ void local_flush_tlb_kernel_range(unsign } else { local_flush_tlb_all(); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) @@ -169,7 +169,7 @@ void local_flush_tlb_page(struct vm_area #endif newpid = cpu_context(cpu, vma->vm_mm) & ASID_MASK; page &= PAGE_MASK; - local_irq_save(flags); + raw_local_irq_save(flags); oldpid = read_c0_entryhi() & ASID_MASK; write_c0_entryhi(page | newpid); BARRIER; @@ -183,7 +183,7 @@ void local_flush_tlb_page(struct vm_area finish: write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -207,7 +207,7 @@ void __update_tlb(struct vm_area_struct } #endif - local_irq_save(flags); + raw_local_irq_save(flags); address &= PAGE_MASK; write_c0_entryhi(address | pid); BARRIER; @@ -221,7 +221,7 @@ void __update_tlb(struct vm_area_struct tlb_write_indexed(); } write_c0_entryhi(pid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void __init add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, @@ -240,7 +240,7 @@ void __init add_wired_entry(unsigned lon entrylo0, entryhi, pagemask); #endif - local_irq_save(flags); + raw_local_irq_save(flags); /* Save old context and create impossible VPN2 value */ old_ctx = read_c0_entryhi() & ASID_MASK; old_pagemask = read_c0_pagemask(); @@ -260,7 +260,7 @@ void __init add_wired_entry(unsigned lon write_c0_entryhi(old_ctx); write_c0_pagemask(old_pagemask); local_flush_tlb_all(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } else if (wired < 8) { #ifdef DEBUG_TLB @@ -268,7 +268,7 @@ void __init add_wired_entry(unsigned lon entrylo0, entryhi); #endif - local_irq_save(flags); + raw_local_irq_save(flags); old_ctx = read_c0_entryhi() & ASID_MASK; write_c0_entrylo0(entrylo0); write_c0_entryhi(entryhi); @@ -277,7 +277,7 @@ void __init add_wired_entry(unsigned lon tlb_write_indexed(); write_c0_entryhi(old_ctx); local_flush_tlb_all(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } } Index: linux.prev/arch/mips/mm/tlb-r4k.c =================================================================== --- linux.prev.orig/arch/mips/mm/tlb-r4k.c +++ linux.prev/arch/mips/mm/tlb-r4k.c @@ -38,7 +38,7 @@ void local_flush_tlb_all(void) unsigned long old_ctx; int entry; - local_irq_save(flags); + raw_local_irq_save(flags); /* Save old context and create impossible VPN2 value */ old_ctx = read_c0_entryhi(); write_c0_entrylo0(0); @@ -57,7 +57,7 @@ void local_flush_tlb_all(void) } tlbw_use_hazard(); write_c0_entryhi(old_ctx); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* All entries common to a mm share an asid. To effectively flush @@ -89,7 +89,7 @@ void local_flush_tlb_range(struct vm_are size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; size = (size + 1) >> 1; - local_irq_save(flags); + raw_local_irq_save(flags); if (size <= current_cpu_data.tlbsize/2) { int oldpid = read_c0_entryhi(); int newpid = cpu_asid(cpu, mm); @@ -120,7 +120,7 @@ void local_flush_tlb_range(struct vm_are } else { drop_mmu_context(mm, cpu); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -131,7 +131,7 @@ void local_flush_tlb_kernel_range(unsign size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; size = (size + 1) >> 1; - local_irq_save(flags); + raw_local_irq_save(flags); if (size <= current_cpu_data.tlbsize / 2) { int pid = read_c0_entryhi(); @@ -162,7 +162,7 @@ void local_flush_tlb_kernel_range(unsign } else { local_flush_tlb_all(); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) @@ -175,7 +175,7 @@ void local_flush_tlb_page(struct vm_area newpid = cpu_asid(cpu, vma->vm_mm); page &= (PAGE_MASK << 1); - local_irq_save(flags); + raw_local_irq_save(flags); oldpid = read_c0_entryhi(); write_c0_entryhi(page | newpid); mtc0_tlbw_hazard(); @@ -194,7 +194,7 @@ void local_flush_tlb_page(struct vm_area finish: write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } } @@ -207,7 +207,7 @@ void local_flush_tlb_one(unsigned long p unsigned long flags; int oldpid, idx; - local_irq_save(flags); + raw_local_irq_save(flags); oldpid = read_c0_entryhi(); page &= (PAGE_MASK << 1); write_c0_entryhi(page); @@ -226,7 +226,7 @@ void local_flush_tlb_one(unsigned long p } write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -249,7 +249,7 @@ void __update_tlb(struct vm_area_struct if (current->active_mm != vma->vm_mm) return; - local_irq_save(flags); + raw_local_irq_save(flags); pid = read_c0_entryhi() & ASID_MASK; address &= (PAGE_MASK << 1); @@ -277,7 +277,7 @@ void __update_tlb(struct vm_area_struct else tlb_write_indexed(); tlbw_use_hazard(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #if 0 @@ -291,7 +291,7 @@ static void r4k_update_mmu_cache_hwbug(s pte_t *ptep; int idx; - local_irq_save(flags); + raw_local_irq_save(flags); address &= (PAGE_MASK << 1); asid = read_c0_entryhi() & ASID_MASK; write_c0_entryhi(address | asid); @@ -310,7 +310,7 @@ static void r4k_update_mmu_cache_hwbug(s else tlb_write_indexed(); tlbw_use_hazard(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #endif @@ -322,7 +322,7 @@ void __init add_wired_entry(unsigned lon unsigned long old_pagemask; unsigned long old_ctx; - local_irq_save(flags); + raw_local_irq_save(flags); /* Save old context and create impossible VPN2 value */ old_ctx = read_c0_entryhi(); old_pagemask = read_c0_pagemask(); @@ -342,7 +342,7 @@ void __init add_wired_entry(unsigned lon BARRIER; write_c0_pagemask(old_pagemask); local_flush_tlb_all(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -362,7 +362,7 @@ __init int add_temporary_entry(unsigned unsigned long old_pagemask; unsigned long old_ctx; - local_irq_save(flags); + raw_local_irq_save(flags); /* Save old context and create impossible VPN2 value */ old_ctx = read_c0_entryhi(); old_pagemask = read_c0_pagemask(); @@ -386,7 +386,7 @@ __init int add_temporary_entry(unsigned write_c0_entryhi(old_ctx); write_c0_pagemask(old_pagemask); out: - local_irq_restore(flags); + raw_local_irq_restore(flags); return ret; } Index: linux.prev/arch/mips/mm/tlb-r8k.c =================================================================== --- linux.prev.orig/arch/mips/mm/tlb-r8k.c +++ linux.prev/arch/mips/mm/tlb-r8k.c @@ -35,7 +35,7 @@ void local_flush_tlb_all(void) unsigned long old_ctx; int entry; - local_irq_save(flags); + raw_local_irq_save(flags); /* Save old context and create impossible VPN2 value */ old_ctx = read_c0_entryhi(); write_c0_entrylo(0); @@ -49,7 +49,7 @@ void local_flush_tlb_all(void) } tlbw_use_hazard(); write_c0_entryhi(old_ctx); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void local_flush_tlb_mm(struct mm_struct *mm) @@ -74,7 +74,7 @@ void local_flush_tlb_range(struct vm_are size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; size = (size + 1) >> 1; - local_irq_save(flags); + raw_local_irq_save(flags); if (size > TFP_TLB_SIZE / 2) { drop_mmu_context(mm, cpu); @@ -106,7 +106,7 @@ void local_flush_tlb_range(struct vm_are write_c0_entryhi(oldpid); out_restore: - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* Usable for KV1 addresses only! */ @@ -123,7 +123,7 @@ void local_flush_tlb_kernel_range(unsign return; } - local_irq_save(flags); + raw_local_irq_save(flags); write_c0_entrylo(0); @@ -145,7 +145,7 @@ void local_flush_tlb_kernel_range(unsign tlb_write(); } - local_irq_restore(flags); + raw_local_irq_restore(flags); } void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) @@ -160,7 +160,7 @@ void local_flush_tlb_page(struct vm_area newpid = cpu_asid(cpu, vma->vm_mm); page &= PAGE_MASK; - local_irq_save(flags); + raw_local_irq_save(flags); oldpid = read_c0_entryhi(); write_c0_vaddr(page); write_c0_entryhi(newpid); @@ -175,7 +175,7 @@ void local_flush_tlb_page(struct vm_area finish: write_c0_entryhi(oldpid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -199,7 +199,7 @@ void __update_tlb(struct vm_area_struct pid = read_c0_entryhi() & ASID_MASK; - local_irq_save(flags); + raw_local_irq_save(flags); address &= PAGE_MASK; write_c0_vaddr(address); write_c0_entryhi(pid); @@ -212,7 +212,7 @@ void __update_tlb(struct vm_area_struct tlb_write(); write_c0_entryhi(pid); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void __init probe_tlb(unsigned long config) Index: linux.prev/arch/mips/momentum/ocelot_g/irq.c =================================================================== --- linux.prev.orig/arch/mips/momentum/ocelot_g/irq.c +++ linux.prev/arch/mips/momentum/ocelot_g/irq.c @@ -58,7 +58,7 @@ void __init arch_init_irq(void) * int-handler is not on bootstrap */ clear_c0_status(ST0_IM); - local_irq_disable(); + raw_local_irq_disable(); /* Sets the first-level interrupt dispatcher. */ set_except_vector(0, ocelot_handle_int); Index: linux.prev/arch/mips/pci/ops-au1000.c =================================================================== --- linux.prev.orig/arch/mips/pci/ops-au1000.c +++ linux.prev/arch/mips/pci/ops-au1000.c @@ -93,7 +93,7 @@ static int config_access(unsigned char a return -1; } - local_irq_save(flags); + raw_local_irq_save(flags); au_writel(((0x2000 << 16) | (au_readl(Au1500_PCI_STATCMD) & 0xffff)), Au1500_PCI_STATCMD); au_sync_udelay(1); @@ -125,7 +125,7 @@ static int config_access(unsigned char a if (board_pci_idsel) { if (board_pci_idsel(device, 1) == 0) { *data = 0xffffffff; - local_irq_restore(flags); + raw_local_irq_restore(flags); return -1; } } @@ -184,7 +184,7 @@ static int config_access(unsigned char a (void)board_pci_idsel(device, 0); } - local_irq_restore(flags); + raw_local_irq_restore(flags); return error; #endif } Index: linux.prev/arch/mips/pmc-sierra/yosemite/smp.c =================================================================== --- linux.prev.orig/arch/mips/pmc-sierra/yosemite/smp.c +++ linux.prev/arch/mips/pmc-sierra/yosemite/smp.c @@ -19,7 +19,7 @@ static unsigned char launchstack[LAUNCHS static void __init prom_smp_bootstrap(void) { - local_irq_disable(); + raw_local_irq_disable(); while (spin_is_locked(&launch_lock)); Index: linux.prev/arch/mips/sgi-ip22/ip22-eisa.c =================================================================== --- linux.prev.orig/arch/mips/sgi-ip22/ip22-eisa.c +++ linux.prev/arch/mips/sgi-ip22/ip22-eisa.c @@ -98,13 +98,13 @@ static void enable_eisa1_irq(unsigned in unsigned long flags; u8 mask; - local_irq_save(flags); + raw_local_irq_save(flags); mask = inb(EISA_INT1_MASK); mask &= ~((u8) (1 << irq)); outb(mask, EISA_INT1_MASK); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int startup_eisa1_irq(unsigned int irq) @@ -160,13 +160,13 @@ static void enable_eisa2_irq(unsigned in unsigned long flags; u8 mask; - local_irq_save(flags); + raw_local_irq_save(flags); mask = inb(EISA_INT2_MASK); mask &= ~((u8) (1 << (irq - 8))); outb(mask, EISA_INT2_MASK); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int startup_eisa2_irq(unsigned int irq) Index: linux.prev/arch/mips/sgi-ip22/ip22-int.c =================================================================== --- linux.prev.orig/arch/mips/sgi-ip22/ip22-int.c +++ linux.prev/arch/mips/sgi-ip22/ip22-int.c @@ -44,12 +44,12 @@ static void enable_local0_irq(unsigned i { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); /* don't allow mappable interrupt to be enabled from setup_irq, * we have our own way to do so */ if (irq != SGI_MAP_0_IRQ) sgint->imask0 |= (1 << (irq - SGINT_LOCAL0)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int startup_local0_irq(unsigned int irq) @@ -62,9 +62,9 @@ static void disable_local0_irq(unsigned { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); sgint->imask0 &= ~(1 << (irq - SGINT_LOCAL0)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #define shutdown_local0_irq disable_local0_irq @@ -90,12 +90,12 @@ static void enable_local1_irq(unsigned i { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); /* don't allow mappable interrupt to be enabled from setup_irq, * we have our own way to do so */ if (irq != SGI_MAP_1_IRQ) sgint->imask1 |= (1 << (irq - SGINT_LOCAL1)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int startup_local1_irq(unsigned int irq) @@ -108,9 +108,9 @@ void disable_local1_irq(unsigned int irq { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); sgint->imask1 &= ~(1 << (irq - SGINT_LOCAL1)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #define shutdown_local1_irq disable_local1_irq @@ -136,10 +136,10 @@ static void enable_local2_irq(unsigned i { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); sgint->imask0 |= (1 << (SGI_MAP_0_IRQ - SGINT_LOCAL0)); sgint->cmeimask0 |= (1 << (irq - SGINT_LOCAL2)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int startup_local2_irq(unsigned int irq) @@ -152,11 +152,11 @@ void disable_local2_irq(unsigned int irq { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); sgint->cmeimask0 &= ~(1 << (irq - SGINT_LOCAL2)); if (!sgint->cmeimask0) sgint->imask0 &= ~(1 << (SGI_MAP_0_IRQ - SGINT_LOCAL0)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #define shutdown_local2_irq disable_local2_irq @@ -182,10 +182,10 @@ static void enable_local3_irq(unsigned i { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); sgint->imask1 |= (1 << (SGI_MAP_1_IRQ - SGINT_LOCAL1)); sgint->cmeimask1 |= (1 << (irq - SGINT_LOCAL3)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static unsigned int startup_local3_irq(unsigned int irq) @@ -198,11 +198,11 @@ void disable_local3_irq(unsigned int irq { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); sgint->cmeimask1 &= ~(1 << (irq - SGINT_LOCAL3)); if (!sgint->cmeimask1) sgint->imask1 &= ~(1 << (SGI_MAP_1_IRQ - SGINT_LOCAL1)); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #define shutdown_local3_irq disable_local3_irq Index: linux.prev/arch/mips/sgi-ip22/ip22-reset.c =================================================================== --- linux.prev.orig/arch/mips/sgi-ip22/ip22-reset.c +++ linux.prev/arch/mips/sgi-ip22/ip22-reset.c @@ -66,7 +66,7 @@ static void sgi_machine_power_off(void) { unsigned int tmp; - local_irq_disable(); + raw_local_irq_disable(); /* Disable watchdog */ tmp = hpc3c0->rtcregs[RTC_CMD] & 0xff; Index: linux.prev/arch/mips/sgi-ip27/ip27-smp.c =================================================================== --- linux.prev.orig/arch/mips/sgi-ip27/ip27-smp.c +++ linux.prev/arch/mips/sgi-ip27/ip27-smp.c @@ -179,7 +179,7 @@ void __init prom_boot_secondary(int cpu, void prom_init_secondary(void) { per_cpu_init(); - local_irq_enable(); + raw_local_irq_enable(); } void __init prom_cpus_done(void) Index: linux.prev/arch/mips/sibyte/sb1250/irq.c =================================================================== --- linux.prev.orig/arch/mips/sibyte/sb1250/irq.c +++ linux.prev/arch/mips/sibyte/sb1250/irq.c @@ -86,7 +86,7 @@ static struct hw_interrupt_type sb1250_i /* Store the CPU id (not the logical number) */ int sb1250_irq_owner[SB1250_NR_IRQS]; -DEFINE_SPINLOCK(sb1250_imr_lock); +DEFINE_RAW_SPINLOCK(sb1250_imr_lock); void sb1250_mask_irq(int cpu, int irq) { @@ -267,7 +267,7 @@ static irqreturn_t sb1250_dummy_handler static struct irqaction sb1250_dummy_action = { .handler = sb1250_dummy_handler, - .flags = 0, + .flags = SA_NODELAY, .mask = CPU_MASK_NONE, .name = "sb1250-private", .next = NULL, Index: linux.prev/arch/mips/sibyte/sb1250/smp.c =================================================================== --- linux.prev.orig/arch/mips/sibyte/sb1250/smp.c +++ linux.prev/arch/mips/sibyte/sb1250/smp.c @@ -59,7 +59,7 @@ void sb1250_smp_finish(void) { extern void sb1250_time_init(void); sb1250_time_init(); - local_irq_enable(); + raw_local_irq_enable(); } /* Index: linux.prev/arch/mips/sni/reset.c =================================================================== --- linux.prev.orig/arch/mips/sni/reset.c +++ linux.prev/arch/mips/sni/reset.c @@ -30,7 +30,7 @@ void sni_machine_restart(char *command) /* This does a normal via the keyboard controller like a PC. We can do that easier ... */ - local_irq_disable(); + raw_local_irq_disable(); for (;;) { for (i=0; i<100; i++) { kb_wait(); Index: linux.prev/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_irq.c =================================================================== --- linux.prev.orig/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_irq.c +++ linux.prev/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_irq.c @@ -669,7 +669,7 @@ void __init arch_init_irq(void) { extern void tx4927_irq_init(void); - local_irq_disable(); + raw_local_irq_disable(); tx4927_irq_init(); toshiba_rbtx4927_irq_ioc_init(); Index: linux.prev/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c =================================================================== --- linux.prev.orig/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c +++ linux.prev/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c @@ -732,7 +732,7 @@ void toshiba_rbtx4927_restart(char *comm reg_wr08(RBTX4927_SW_RESET_DO, RBTX4927_SW_RESET_DO_SET); /* do something passive while waiting for reset */ - local_irq_disable(); + raw_local_irq_disable(); while (1) asm_wait(); @@ -743,7 +743,7 @@ void toshiba_rbtx4927_restart(char *comm void toshiba_rbtx4927_halt(void) { printk(KERN_NOTICE "System Halted\n"); - local_irq_disable(); + raw_local_irq_disable(); while (1) { asm_wait(); } Index: linux.prev/arch/mips/vr41xx/common/pmu.c =================================================================== --- linux.prev.orig/arch/mips/vr41xx/common/pmu.c +++ linux.prev/arch/mips/vr41xx/common/pmu.c @@ -62,7 +62,7 @@ static inline void software_reset(void) static void vr41xx_restart(char *command) { - local_irq_disable(); + raw_local_irq_disable(); software_reset(); printk(KERN_NOTICE "\nYou can reset your system\n"); while (1) ; @@ -70,14 +70,14 @@ static void vr41xx_restart(char *command static void vr41xx_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); printk(KERN_NOTICE "\nYou can turn off the power supply\n"); while (1) ; } static void vr41xx_power_off(void) { - local_irq_disable(); + raw_local_irq_disable(); printk(KERN_NOTICE "\nYou can turn off the power supply\n"); while (1) ; } Index: linux.prev/arch/powerpc/Kconfig =================================================================== --- linux.prev.orig/arch/powerpc/Kconfig +++ linux.prev/arch/powerpc/Kconfig @@ -33,13 +33,6 @@ config GENERIC_HARDIRQS bool default y -config RWSEM_GENERIC_SPINLOCK - bool - -config RWSEM_XCHGADD_ALGORITHM - bool - default y - config GENERIC_CALIBRATE_DELAY bool default y @@ -484,6 +477,18 @@ config HIGHMEM source kernel/Kconfig.hz source kernel/Kconfig.preempt + +config RWSEM_GENERIC_SPINLOCK + bool + default y + +config ASM_SEMAPHORES + bool + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + source "fs/Kconfig.binfmt" # We optimistically allocate largepages from the VM, so make the limit Index: linux.prev/arch/powerpc/boot/Makefile =================================================================== --- linux.prev.orig/arch/powerpc/boot/Makefile +++ linux.prev/arch/powerpc/boot/Makefile @@ -28,6 +28,14 @@ BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAG BOOTLFLAGS := -T $(srctree)/$(src)/zImage.lds OBJCOPYFLAGS := contents,alloc,load,readonly,data +ifdef CONFIG_MCOUNT +# do not trace the boot loader +nullstring := +space := $(nullstring) # end of the line +pg_flag = $(nullstring) -pg # end of the line +CFLAGS := $(subst ${pg_flag},${space},${CFLAGS}) +endif + zlib := infblock.c infcodes.c inffast.c inflate.c inftrees.c infutil.c zlibheader := infblock.h infcodes.h inffast.h inftrees.h infutil.h zliblinuxheader := zlib.h zconf.h zutil.h @@ -43,7 +51,7 @@ obj-boot := $(addsuffix .o, $(basename $ BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj) quiet_cmd_copy_zlib = COPY $@ - cmd_copy_zlib = sed "s@__attribute_used__@@;s@]\+\).*@\"\1\"@" $< > $@ + cmd_copy_zlib = sed "s@__attribute_used__@@;s@.include.@@;s@.include.@@;s@.*spin.*lock.*@@;s@.*SPINLOCK.*@@;s@]\+\).*@\"\1\"@" $< > $@ quiet_cmd_copy_zlibheader = COPY $@ cmd_copy_zlibheader = sed "s@]\+\).*@\"\1\"@" $< > $@ Index: linux.prev/arch/powerpc/kernel/Makefile =================================================================== --- linux.prev.orig/arch/powerpc/kernel/Makefile +++ linux.prev/arch/powerpc/kernel/Makefile @@ -11,9 +11,10 @@ CFLAGS_prom_init.o += -fPIC CFLAGS_btext.o += -fPIC endif -obj-y := semaphore.o cputable.o ptrace.o syscalls.o \ +obj-y := cputable.o ptrace.o syscalls.o \ irq.o align.o signal_32.o pmc.o vdso.o obj-y += vdso32/ +obj-$(CONFIG_ASM_SEMAPHORES) += semaphore.o obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \ signal_64.o ptrace32.o systbl.o \ paca.o ioctl32.o cpu_setup_power4.o \ Index: linux.prev/arch/powerpc/kernel/entry_32.S =================================================================== --- linux.prev.orig/arch/powerpc/kernel/entry_32.S +++ linux.prev/arch/powerpc/kernel/entry_32.S @@ -239,7 +239,7 @@ ret_from_syscall: SYNC MTMSRD(r10) lwz r9,TI_FLAGS(r12) - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED) + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne- syscall_exit_work syscall_exit_cont: #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) @@ -317,7 +317,7 @@ syscall_exit_work: rlwinm r12,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ lwz r9,TI_FLAGS(r12) 5: - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne 1f lwz r5,_MSR(r1) andi. r5,r5,MSR_PR @@ -658,7 +658,7 @@ user_exc_return: /* r10 contains MSR_KE /* Check current_thread_info()->flags */ rlwinm r9,r1,0,0,(31-THREAD_SHIFT) lwz r9,TI_FLAGS(r9) - andi. r0,r9,(_TIF_SIGPENDING|_TIF_NEED_RESCHED) + andi. r0,r9,(_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne do_work restore_user: @@ -876,7 +876,7 @@ load_dbcr0: #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ do_work: /* r10 contains MSR_KERNEL here */ - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) beq do_user_signal do_resched: /* r10 contains MSR_KERNEL here */ @@ -998,3 +998,85 @@ machine_check_in_rtas: /* XXX load up BATs and panic */ #endif /* CONFIG_PPC_RTAS */ + +#ifdef CONFIG_MCOUNT +/* + * mcount() is not the same as _mcount(). The callers of mcount() have a + * normal context. The callers of _mcount() do not have a stack frame and + * have not saved the "caller saves" registers. + */ +_GLOBAL(mcount) + stwu r1,-16(r1) + mflr r3 + lis r5,mcount_enabled@ha + lwz r5,mcount_enabled@l(r5) + stw r3,20(r1) + cmpwi r5,0 + beq 1f + /* r3 contains lr (eip), put parent lr (parent_eip) in r4 */ + lwz r4,16(r1) + lwz r4,4(r4) + bl __trace +1: + lwz r0,20(r1) + mtlr r0 + addi r1,r1,16 + blr + +/* + * The -pg flag, which is specified in the case of CONFIG_MCOUNT, causes the + * C compiler to add a call to _mcount() at the start of each function + * preamble, before the stack frame is created. An example of this preamble + * code is: + * + * mflr r0 + * lis r12,-16354 + * stw r0,4(r1) + * addi r0,r12,-19652 + * bl 0xc00034c8 <_mcount> + * mflr r0 + * stwu r1,-16(r1) + */ +_GLOBAL(_mcount) +#define M_STK_SIZE 48 + /* Would not expect to need to save cr, but glibc version of */ + /* _mcount() does, so cautiously saving it here too. */ + stwu r1,-M_STK_SIZE(r1) + stw r3, 12(r1) + stw r4, 16(r1) + stw r5, 20(r1) + stw r6, 24(r1) + mflr r3 /* will use as first arg to __trace() */ + mfcr r4 + lis r5,mcount_enabled@ha + lwz r5,mcount_enabled@l(r5) + cmpwi r5,0 + stw r3, 44(r1) /* lr */ + stw r4, 8(r1) /* cr */ + stw r7, 28(r1) + stw r8, 32(r1) + stw r9, 36(r1) + stw r10,40(r1) + beq 1f + /* r3 contains lr (eip), put parent lr (parent_eip) in r4 */ + lwz r4,M_STK_SIZE+4(r1) + bl __trace +1: + lwz r8, 8(r1) /* cr */ + lwz r9, 44(r1) /* lr */ + lwz r3, 12(r1) + lwz r4, 16(r1) + lwz r5, 20(r1) + mtcrf 0xff,r8 + mtctr r9 + lwz r0, 52(r1) + lwz r6, 24(r1) + lwz r7, 28(r1) + lwz r8, 32(r1) + lwz r9, 36(r1) + lwz r10,40(r1) + addi r1,r1,M_STK_SIZE + mtlr r0 + bctr + +#endif /* CONFIG_MCOUNT */ Index: linux.prev/arch/powerpc/kernel/idle_64.c =================================================================== --- linux.prev.orig/arch/powerpc/kernel/idle_64.c +++ linux.prev/arch/powerpc/kernel/idle_64.c @@ -37,7 +37,7 @@ void default_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - if (!need_resched()) { + if (!need_resched() && !need_resched_delayed()) { while (!need_resched() && !cpu_is_offline(cpu)) { ppc64_runlatch_off(); @@ -53,9 +53,11 @@ void default_idle(void) } ppc64_runlatch_on(); - preempt_enable_no_resched(); - schedule(); + raw_local_irq_disable(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); + raw_local_irq_enable(); if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) cpu_die(); } @@ -71,9 +73,11 @@ void native_idle(void) if (need_resched()) { ppc64_runlatch_on(); - preempt_enable_no_resched(); - schedule(); + raw_local_irq_disable(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); + raw_local_irq_enable(); } if (cpu_is_offline(smp_processor_id()) && Index: linux.prev/arch/powerpc/kernel/init_task.c =================================================================== --- linux.prev.orig/arch/powerpc/kernel/init_task.c +++ linux.prev/arch/powerpc/kernel/init_task.c @@ -3,12 +3,12 @@ #include #include #include -#include +#include #include #include -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; +static struct fs_struct init_fs = INIT_FS(init_fs); +static struct files_struct init_files = INIT_FILES(init_files); static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); Index: linux.prev/arch/powerpc/kernel/irq.c =================================================================== --- linux.prev.orig/arch/powerpc/kernel/irq.c +++ linux.prev/arch/powerpc/kernel/irq.c @@ -100,8 +100,6 @@ extern atomic_t ipi_sent; #endif /* CONFIG_PPC32 */ #ifdef CONFIG_PPC64 -EXPORT_SYMBOL(irq_desc); - int distribute_irqs = 1; u64 ppc64_interrupt_controller; #endif /* CONFIG_PPC64 */ Index: linux.prev/arch/powerpc/kernel/ppc_ksyms.c =================================================================== --- linux.prev.orig/arch/powerpc/kernel/ppc_ksyms.c +++ linux.prev/arch/powerpc/kernel/ppc_ksyms.c @@ -17,7 +17,6 @@ #include #include -#include #include #include #include @@ -221,16 +220,11 @@ EXPORT_SYMBOL(screen_info); #ifdef CONFIG_PPC32 EXPORT_SYMBOL(__delay); EXPORT_SYMBOL(timer_interrupt); -EXPORT_SYMBOL(irq_desc); EXPORT_SYMBOL(tb_ticks_per_jiffy); EXPORT_SYMBOL(console_drivers); EXPORT_SYMBOL(cacheable_memcpy); #endif -EXPORT_SYMBOL(__up); -EXPORT_SYMBOL(__down); -EXPORT_SYMBOL(__down_interruptible); - #ifdef CONFIG_8xx EXPORT_SYMBOL(cpm_install_handler); EXPORT_SYMBOL(cpm_free_handler); Index: linux.prev/arch/powerpc/kernel/process.c =================================================================== --- linux.prev.orig/arch/powerpc/kernel/process.c +++ linux.prev/arch/powerpc/kernel/process.c @@ -327,10 +327,10 @@ struct task_struct *__switch_to(struct t } #endif - local_irq_save(flags); + raw_local_irq_save(flags); last = _switch(old_thread, new_thread); - local_irq_restore(flags); + raw_local_irq_restore(flags); return last; } Index: linux.prev/arch/powerpc/kernel/rtas.c =================================================================== --- linux.prev.orig/arch/powerpc/kernel/rtas.c +++ linux.prev/arch/powerpc/kernel/rtas.c @@ -31,7 +31,7 @@ #include struct rtas_t rtas = { - .lock = SPIN_LOCK_UNLOCKED + .lock = SPIN_LOCK_UNLOCKED(rtas.lock) }; EXPORT_SYMBOL(rtas); @@ -620,7 +620,7 @@ void rtas_stop_self(void) { struct rtas_args *rtas_args = &rtas_stop_self_args; - local_irq_disable(); + raw_local_irq_disable(); BUG_ON(rtas_args->token == RTAS_UNKNOWN_SERVICE); Index: linux.prev/arch/powerpc/kernel/semaphore.c =================================================================== --- linux.prev.orig/arch/powerpc/kernel/semaphore.c +++ linux.prev/arch/powerpc/kernel/semaphore.c @@ -31,7 +31,7 @@ * sem->count = tmp; * return old_count; */ -static inline int __sem_update_count(struct semaphore *sem, int incr) +static inline int __sem_update_count(struct compat_semaphore *sem, int incr) { int old_count, tmp; @@ -50,7 +50,7 @@ static inline int __sem_update_count(str return old_count; } -void __up(struct semaphore *sem) +void __compat_up(struct compat_semaphore *sem) { /* * Note that we incremented count in up() before we came here, @@ -63,7 +63,7 @@ void __up(struct semaphore *sem) __sem_update_count(sem, 1); wake_up(&sem->wait); } -EXPORT_SYMBOL(__up); +EXPORT_SYMBOL(__compat_up); /* * Note that when we come in to __down or __down_interruptible, @@ -73,7 +73,7 @@ EXPORT_SYMBOL(__up); * Thus it is only when we decrement count from some value > 0 * that we have actually got the semaphore. */ -void __sched __down(struct semaphore *sem) +void __sched __compat_down(struct compat_semaphore *sem) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); @@ -101,9 +101,9 @@ void __sched __down(struct semaphore *se */ wake_up(&sem->wait); } -EXPORT_SYMBOL(__down); +EXPORT_SYMBOL(__compat_down); -int __sched __down_interruptible(struct semaphore * sem) +int __sched __compat_down_interruptible(struct compat_semaphore *sem) { int retval = 0; struct task_struct *tsk = current; @@ -132,4 +132,10 @@ int __sched __down_interruptible(struct wake_up(&sem->wait); return retval; } -EXPORT_SYMBOL(__down_interruptible); +EXPORT_SYMBOL(__compat_down_interruptible); + +int compat_sem_is_locked(struct compat_semaphore *sem) +{ + return (int) atomic_read(&sem->count) < 0; +} +EXPORT_SYMBOL(compat_sem_is_locked); Index: linux.prev/arch/powerpc/kernel/setup-common.c =================================================================== --- linux.prev.orig/arch/powerpc/kernel/setup-common.c +++ linux.prev/arch/powerpc/kernel/setup-common.c @@ -105,7 +105,7 @@ void machine_restart(char *cmd) smp_send_stop(); #endif printk(KERN_EMERG "System Halted, OK to turn off power\n"); - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } @@ -117,7 +117,7 @@ void machine_power_off(void) smp_send_stop(); #endif printk(KERN_EMERG "System Halted, OK to turn off power\n"); - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } /* Used by the G5 thermal driver */ @@ -134,7 +134,7 @@ void machine_halt(void) smp_send_stop(); #endif printk(KERN_EMERG "System Halted, OK to turn off power\n"); - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } Index: linux.prev/arch/powerpc/kernel/smp-tbsync.c =================================================================== --- linux.prev.orig/arch/powerpc/kernel/smp-tbsync.c +++ linux.prev/arch/powerpc/kernel/smp-tbsync.c @@ -47,7 +47,7 @@ void __devinit smp_generic_take_timebase int cmd; u64 tb; - local_irq_disable(); + raw_local_irq_disable(); while (!running) barrier(); rmb(); @@ -71,7 +71,7 @@ void __devinit smp_generic_take_timebase set_tb(tb >> 32, tb & 0xfffffffful); enter_contest(tbsync->mark, -1); } - local_irq_enable(); + raw_local_irq_enable(); } static int __devinit start_contest(int cmd, long offset, int num) @@ -82,7 +82,7 @@ static int __devinit start_contest(int c tbsync->cmd = cmd; - local_irq_disable(); + raw_local_irq_disable(); for (i = -3; i < num; ) { tb = get_tb() + 400; tbsync->tb = tb + offset; @@ -105,7 +105,7 @@ static int __devinit start_contest(int c if (i++ > 0) score += tbsync->race_result; } - local_irq_enable(); + raw_local_irq_enable(); return score; } Index: linux.prev/arch/powerpc/kernel/smp.c =================================================================== --- linux.prev.orig/arch/powerpc/kernel/smp.c +++ linux.prev/arch/powerpc/kernel/smp.c @@ -140,6 +140,16 @@ void smp_send_reschedule(int cpu) smp_ops->message_pass(cpu, PPC_MSG_RESCHEDULE); } +/* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them: + */ +void smp_send_reschedule_allbutself(void) +{ + smp_ops->message_pass(MSG_ALL_BUT_SELF, PPC_MSG_RESCHEDULE); +} + #ifdef CONFIG_DEBUGGER void smp_send_debugger_break(int cpu) { @@ -149,7 +159,7 @@ void smp_send_debugger_break(int cpu) static void stop_this_cpu(void *dummy) { - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } @@ -164,7 +174,7 @@ void smp_send_stop(void) * static memory requirements. It also looks cleaner. * Stolen from the i386 version. */ -static __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_lock); +static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(call_lock); static struct call_data_struct { void (*func) (void *info); @@ -200,7 +210,7 @@ int smp_call_function (void (*func) (voi u64 timeout; /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); + WARN_ON(raw_irqs_disabled()); data.func = func; data.info = info; @@ -529,7 +539,7 @@ int __devinit start_secondary(void *unus cpu_set(cpu, cpu_online_map); spin_unlock(&call_lock); - local_irq_enable(); + raw_local_irq_enable(); cpu_idle(); return 0; Index: linux.prev/arch/powerpc/kernel/time.c =================================================================== --- linux.prev.orig/arch/powerpc/kernel/time.c +++ linux.prev/arch/powerpc/kernel/time.c @@ -72,6 +72,9 @@ #endif #include +unsigned long cpu_khz; /* Detected as we calibrate the TSC */ +EXPORT_SYMBOL(cpu_khz); + /* keep track of when we need to update the rtc */ time_t last_rtc_update; extern int piranha_simulator; @@ -100,7 +103,7 @@ unsigned long tb_ticks_per_sec; u64 tb_to_xs; unsigned tb_to_us; unsigned long processor_freq; -DEFINE_SPINLOCK(rtc_lock); +DEFINE_RAW_SPINLOCK(rtc_lock); EXPORT_SYMBOL_GPL(rtc_lock); u64 tb_to_ns_scale; @@ -335,7 +338,7 @@ static __inline__ void timer_recalc_offs } #ifdef CONFIG_SMP -unsigned long profile_pc(struct pt_regs *regs) +unsigned long notrace profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); @@ -698,6 +701,7 @@ void __init time_init(void) tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000); div128_by_32(1024*1024, 0, tb_ticks_per_sec, &res); tb_to_xs = res.result_low; + cpu_khz = ppc_tb_freq / 1000; #ifdef CONFIG_PPC64 get_paca()->default_decr = tb_ticks_per_jiffy; Index: linux.prev/arch/powerpc/kernel/traps.c =================================================================== --- linux.prev.orig/arch/powerpc/kernel/traps.c +++ linux.prev/arch/powerpc/kernel/traps.c @@ -91,7 +91,7 @@ int register_die_notifier(struct notifie * Trap & Exception support */ -static DEFINE_SPINLOCK(die_lock); +static DEFINE_RAW_SPINLOCK(die_lock); int die(const char *str, struct pt_regs *regs, long err) { @@ -182,6 +182,11 @@ void _exception(int signr, struct pt_reg return; } +#ifdef CONFIG_PREEMPT_RT + raw_local_irq_enable(); + preempt_check_resched(); +#endif + memset(&info, 0, sizeof(info)); info.si_signo = signr; info.si_code = code; Index: linux.prev/arch/powerpc/lib/locks.c =================================================================== --- linux.prev.orig/arch/powerpc/lib/locks.c +++ linux.prev/arch/powerpc/lib/locks.c @@ -25,7 +25,7 @@ #include #include -void __spin_yield(raw_spinlock_t *lock) +void __spin_yield(__raw_spinlock_t *lock) { unsigned int lock_value, holder_cpu, yield_count; struct paca_struct *holder_paca; @@ -84,7 +84,7 @@ void __rw_yield(raw_rwlock_t *rw) } #endif -void __raw_spin_unlock_wait(raw_spinlock_t *lock) +void __raw_spin_unlock_wait(__raw_spinlock_t *lock) { while (lock->slock) { HMT_low(); Index: linux.prev/arch/powerpc/mm/fault.c =================================================================== --- linux.prev.orig/arch/powerpc/mm/fault.c +++ linux.prev/arch/powerpc/mm/fault.c @@ -117,8 +117,8 @@ static void do_dabr(struct pt_regs *regs * The return value is 0 if the fault was handled, or the signal * number if this is a kernel fault that can't be handled here. */ -int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, - unsigned long error_code) +int __kprobes notrace do_page_fault(struct pt_regs *regs, + unsigned long address, unsigned long error_code) { struct vm_area_struct * vma; struct mm_struct *mm = current->mm; Index: linux.prev/arch/powerpc/mm/init_32.c =================================================================== --- linux.prev.orig/arch/powerpc/mm/init_32.c +++ linux.prev/arch/powerpc/mm/init_32.c @@ -57,7 +57,7 @@ #endif #define MAX_LOW_MEM CONFIG_LOWMEM_SIZE -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); unsigned long total_memory; unsigned long total_lowmem; Index: linux.prev/arch/powerpc/mm/tlb_64.c =================================================================== --- linux.prev.orig/arch/powerpc/mm/tlb_64.c +++ linux.prev/arch/powerpc/mm/tlb_64.c @@ -38,7 +38,7 @@ DEFINE_PER_CPU(struct ppc64_tlb_batch, p /* This is declared as we are using the more or less generic * include/asm-ppc64/tlb.h file -- tgall */ -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); unsigned long pte_freelist_forced_free; Index: linux.prev/arch/powerpc/platforms/cell/smp.c =================================================================== --- linux.prev.orig/arch/powerpc/platforms/cell/smp.c +++ linux.prev/arch/powerpc/platforms/cell/smp.c @@ -134,7 +134,7 @@ static void __devinit smp_iic_setup_cpu( iic_setup_cpu(); } -static DEFINE_SPINLOCK(timebase_lock); +static DEFINE_RAW_SPINLOCK(timebase_lock); static unsigned long timebase = 0; static void __devinit cell_give_timebase(void) Index: linux.prev/arch/powerpc/platforms/chrp/smp.c =================================================================== --- linux.prev.orig/arch/powerpc/platforms/chrp/smp.c +++ linux.prev/arch/powerpc/platforms/chrp/smp.c @@ -47,7 +47,7 @@ static void __devinit smp_chrp_setup_cpu mpic_setup_this_cpu(); } -static DEFINE_SPINLOCK(timebase_lock); +static DEFINE_RAW_SPINLOCK(timebase_lock); static unsigned int timebase_upper = 0, timebase_lower = 0; void __devinit smp_chrp_give_timebase(void) Index: linux.prev/arch/powerpc/platforms/chrp/time.c =================================================================== --- linux.prev.orig/arch/powerpc/platforms/chrp/time.c +++ linux.prev/arch/powerpc/platforms/chrp/time.c @@ -28,7 +28,7 @@ #include #include -extern spinlock_t rtc_lock; +extern raw_spinlock_t rtc_lock; static int nvram_as1 = NVRAM_AS1; static int nvram_as0 = NVRAM_AS0; Index: linux.prev/arch/powerpc/platforms/iseries/setup.c =================================================================== --- linux.prev.orig/arch/powerpc/platforms/iseries/setup.c +++ linux.prev/arch/powerpc/platforms/iseries/setup.c @@ -673,16 +673,18 @@ static void yield_shared_processor(void) static void iseries_shared_idle(void) { while (1) { - while (!need_resched() && !hvlpevent_is_pending()) { - local_irq_disable(); + while (!need_resched() && !need_resched_delayed() + && !hvlpevent_is_pending()) { + raw_local_irq_disable(); ppc64_runlatch_off(); /* Recheck with irqs off */ - if (!need_resched() && !hvlpevent_is_pending()) + if (!need_resched() && !need_resched_delayed() + && !hvlpevent_is_pending()) yield_shared_processor(); HMT_medium(); - local_irq_enable(); + raw_local_irq_enable(); } ppc64_runlatch_on(); Index: linux.prev/arch/powerpc/platforms/powermac/feature.c =================================================================== --- linux.prev.orig/arch/powerpc/platforms/powermac/feature.c +++ linux.prev/arch/powerpc/platforms/powermac/feature.c @@ -63,7 +63,7 @@ extern struct device_node *k2_skiplist[2 * We use a single global lock to protect accesses. Each driver has * to take care of its own locking */ -static DEFINE_SPINLOCK(feature_lock); +static DEFINE_RAW_SPINLOCK(feature_lock); #define LOCK(flags) spin_lock_irqsave(&feature_lock, flags); #define UNLOCK(flags) spin_unlock_irqrestore(&feature_lock, flags); Index: linux.prev/arch/powerpc/platforms/powermac/nvram.c =================================================================== --- linux.prev.orig/arch/powerpc/platforms/powermac/nvram.c +++ linux.prev/arch/powerpc/platforms/powermac/nvram.c @@ -81,7 +81,7 @@ static int is_core_99; static int core99_bank = 0; static int nvram_partitions[3]; // XXX Turn that into a sem -static DEFINE_SPINLOCK(nv_lock); +static DEFINE_RAW_SPINLOCK(nv_lock); extern int pmac_newworld; extern int system_running; Index: linux.prev/arch/powerpc/platforms/powermac/pic.c =================================================================== --- linux.prev.orig/arch/powerpc/platforms/powermac/pic.c +++ linux.prev/arch/powerpc/platforms/powermac/pic.c @@ -69,7 +69,7 @@ static int max_irqs; static int max_real_irqs; static u32 level_mask[4]; -static DEFINE_SPINLOCK(pmac_pic_lock); +static DEFINE_RAW_SPINLOCK(pmac_pic_lock); #define GATWICK_IRQ_POOL_SIZE 10 static struct interrupt_info gatwick_int_pool[GATWICK_IRQ_POOL_SIZE]; Index: linux.prev/arch/powerpc/platforms/powermac/smp.c =================================================================== --- linux.prev.orig/arch/powerpc/platforms/powermac/smp.c +++ linux.prev/arch/powerpc/platforms/powermac/smp.c @@ -436,7 +436,7 @@ struct smp_ops_t psurge_smp_ops = { static struct device_node *pmac_tb_clock_chip_host; static u8 pmac_tb_pulsar_addr; static void (*pmac_tb_freeze)(int freeze); -static DEFINE_SPINLOCK(timebase_lock); +static DEFINE_RAW_SPINLOCK(timebase_lock); static unsigned long timebase; static void smp_core99_cypress_tb_freeze(int freeze) Index: linux.prev/arch/powerpc/platforms/pseries/setup.c =================================================================== --- linux.prev.orig/arch/powerpc/platforms/pseries/setup.c +++ linux.prev/arch/powerpc/platforms/pseries/setup.c @@ -336,7 +336,7 @@ static void __init pSeries_discover_pic static void pSeries_mach_cpu_die(void) { - local_irq_disable(); + raw_local_irq_disable(); idle_task_exit(); /* Some hardware requires clearing the CPPR, while other hardware does not * it is safe either way @@ -458,7 +458,7 @@ static inline void dedicated_idle_sleep( /* Only sleep if the other thread is not idle */ if (!(ppaca->lppaca.idle)) { - local_irq_disable(); + raw_local_irq_disable(); /* * We are about to sleep the thread and so wont be polling any @@ -474,10 +474,10 @@ static inline void dedicated_idle_sleep( * a prod occurs. Returning from the cede enables external * interrupts. */ - if (!need_resched()) + if (!need_resched() && !need_resched_delayed()) cede_processor(); else - local_irq_enable(); + raw_local_irq_enable(); set_thread_flag(TIF_POLLING_NRFLAG); } else { /* @@ -552,8 +552,9 @@ static void pseries_shared_idle(void) */ lpaca->lppaca.idle = 1; - while (!need_resched() && !cpu_is_offline(cpu)) { - local_irq_disable(); + while (!need_resched() && !need_resched_delayed() && + !cpu_is_offline(cpu)) { + raw_local_irq_disable(); ppc64_runlatch_off(); /* @@ -569,7 +570,7 @@ static void pseries_shared_idle(void) if (!need_resched()) cede_processor(); else - local_irq_enable(); + raw_local_irq_enable(); HMT_medium(); } @@ -577,8 +578,8 @@ static void pseries_shared_idle(void) lpaca->lppaca.idle = 0; ppc64_runlatch_on(); - preempt_enable_no_resched(); - schedule(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) Index: linux.prev/arch/powerpc/platforms/pseries/smp.c =================================================================== --- linux.prev.orig/arch/powerpc/platforms/pseries/smp.c +++ linux.prev/arch/powerpc/platforms/pseries/smp.c @@ -345,7 +345,7 @@ static void __devinit smp_xics_setup_cpu } #endif /* CONFIG_XICS */ -static DEFINE_SPINLOCK(timebase_lock); +static DEFINE_RAW_SPINLOCK(timebase_lock); static unsigned long timebase = 0; static void __devinit pSeries_give_timebase(void) Index: linux.prev/arch/powerpc/xmon/xmon.c =================================================================== --- linux.prev.orig/arch/powerpc/xmon/xmon.c +++ linux.prev/arch/powerpc/xmon/xmon.c @@ -522,10 +522,10 @@ irqreturn_t xmon_irq(int irq, void *d, struct pt_regs *regs) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); printf("Keyboard interrupt\n"); xmon(regs); - local_irq_restore(flags); + raw_local_irq_restore(flags); return IRQ_HANDLED; } Index: linux.prev/arch/ppc/8260_io/enet.c =================================================================== --- linux.prev.orig/arch/ppc/8260_io/enet.c +++ linux.prev/arch/ppc/8260_io/enet.c @@ -116,7 +116,7 @@ struct scc_enet_private { scc_t *sccp; struct net_device_stats stats; uint tx_full; - spinlock_t lock; + raw_spinlock_t lock; }; static int scc_enet_open(struct net_device *dev); Index: linux.prev/arch/ppc/8260_io/fcc_enet.c =================================================================== --- linux.prev.orig/arch/ppc/8260_io/fcc_enet.c +++ linux.prev/arch/ppc/8260_io/fcc_enet.c @@ -377,7 +377,7 @@ struct fcc_enet_private { volatile fcc_enet_t *ep; struct net_device_stats stats; uint tx_free; - spinlock_t lock; + raw_spinlock_t lock; #ifdef CONFIG_USE_MDIO uint phy_id; Index: linux.prev/arch/ppc/8xx_io/commproc.c =================================================================== --- linux.prev.orig/arch/ppc/8xx_io/commproc.c +++ linux.prev/arch/ppc/8xx_io/commproc.c @@ -356,7 +356,7 @@ cpm_setbrg(uint brg, uint rate) /* * dpalloc / dpfree bits. */ -static spinlock_t cpm_dpmem_lock; +static raw_spinlock_t cpm_dpmem_lock; /* * 16 blocks should be enough to satisfy all requests * until the memory subsystem goes up... Index: linux.prev/arch/ppc/8xx_io/enet.c =================================================================== --- linux.prev.orig/arch/ppc/8xx_io/enet.c +++ linux.prev/arch/ppc/8xx_io/enet.c @@ -144,7 +144,7 @@ struct scc_enet_private { unsigned char *rx_vaddr[RX_RING_SIZE]; struct net_device_stats stats; uint tx_full; - spinlock_t lock; + raw_spinlock_t lock; }; static int scc_enet_open(struct net_device *dev); Index: linux.prev/arch/ppc/8xx_io/fec.c =================================================================== --- linux.prev.orig/arch/ppc/8xx_io/fec.c +++ linux.prev/arch/ppc/8xx_io/fec.c @@ -165,7 +165,7 @@ struct fec_enet_private { struct net_device_stats stats; uint tx_full; - spinlock_t lock; + raw_spinlock_t lock; #ifdef CONFIG_USE_MDIO uint phy_id; Index: linux.prev/arch/ppc/Kconfig =================================================================== --- linux.prev.orig/arch/ppc/Kconfig +++ linux.prev/arch/ppc/Kconfig @@ -15,13 +15,6 @@ config GENERIC_HARDIRQS bool default y -config RWSEM_GENERIC_SPINLOCK - bool - -config RWSEM_XCHGADD_ALGORITHM - bool - default y - config GENERIC_CALIBRATE_DELAY bool default y @@ -950,6 +943,18 @@ config HIGHMEM source kernel/Kconfig.hz source kernel/Kconfig.preempt + +config RWSEM_GENERIC_SPINLOCK + bool + default y + +config ASM_SEMAPHORES + bool + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + source "mm/Kconfig" source "fs/Kconfig.binfmt" Index: linux.prev/arch/ppc/boot/Makefile =================================================================== --- linux.prev.orig/arch/ppc/boot/Makefile +++ linux.prev/arch/ppc/boot/Makefile @@ -11,6 +11,15 @@ # CFLAGS += -fno-builtin -D__BOOTER__ -Iarch/$(ARCH)/boot/include + +ifdef CONFIG_MCOUNT +# do not trace the boot loader +nullstring := +space := $(nullstring) # end of the line +pg_flag = $(nullstring) -pg # end of the line +CFLAGS := $(subst ${pg_flag},${space},${CFLAGS}) +endif + HOSTCFLAGS += -Iarch/$(ARCH)/boot/include BOOT_TARGETS = zImage zImage.initrd znetboot znetboot.initrd Index: linux.prev/arch/ppc/boot/lib/Makefile =================================================================== --- linux.prev.orig/arch/ppc/boot/lib/Makefile +++ linux.prev/arch/ppc/boot/lib/Makefile @@ -5,19 +5,49 @@ CFLAGS_kbd.o := -Idrivers/char CFLAGS_vreset.o := -I$(srctree)/arch/ppc/boot/include -zlib := infblock.c infcodes.c inffast.c inflate.c inftrees.c infutil.c - -lib-y += $(zlib:.c=.o) div64.o -lib-$(CONFIG_VGA_CONSOLE) += vreset.o kbd.o - +zlib := infblock.c infcodes.c inffast.c inflate.c inftrees.c infutil.c +zlibheader := infblock.h infcodes.h inffast.h inftrees.h infutil.h +zliblinuxheader := zlib.h zconf.h zutil.h + +$(addprefix $(obj)/,$(zlib)): $(addprefix $(obj)/,$(zliblinuxheader)) $(addprefix $(obj)/,$(zlibheader)) + +src-boot := div64.S +src-boot += $(zlib) +#src-boot := $(addprefix $(obj)/, $(src-boot)) +obj-boot := $(addsuffix .o, $(basename $(src-boot))) -# zlib files needs header from their original place -EXTRA_CFLAGS += -Ilib/zlib_inflate +BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj) $(CFLAGS) quiet_cmd_copy_zlib = COPY $@ - cmd_copy_zlib = cat $< > $@ + cmd_copy_zlib = sed "s@__attribute_used__@@;s@.include.@@;s@.include.@@;s@.*spin.*lock.*@@;s@.*SPINLOCK.*@@;s@]\+\).*@\"\1\"@" $< > $@ + +quiet_cmd_copy_zlibheader = COPY $@ + cmd_copy_zlibheader = sed "s@]\+\).*@\"\1\"@" $< > $@ +# stddef.h for NULL +quiet_cmd_copy_zliblinuxheader = COPY $@ + cmd_copy_zliblinuxheader = sed "s@.include.@@;s@.include.@@;s@@@;s@]\+\).*@\"\1\"@" $< > $@ $(addprefix $(obj)/,$(zlib)): $(obj)/%: $(srctree)/lib/zlib_inflate/% $(call cmd,copy_zlib) -clean-files := $(zlib) +$(addprefix $(obj)/,$(zlibheader)): $(obj)/%: $(srctree)/lib/zlib_inflate/% + $(call cmd,copy_zlibheader) + +$(addprefix $(obj)/,$(zliblinuxheader)): $(obj)/%: $(srctree)/include/linux/% + $(call cmd,copy_zliblinuxheader) + +clean-files := $(zlib) $(zlibheader) $(zliblinuxheader) + +quiet_cmd_bootcc = BOOTCC $@ + cmd_bootcc = $(CC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $< + +quiet_cmd_bootas = BOOTAS $@ + cmd_bootas = $(CC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $< + +$(patsubst %.c,%.o, $(filter %.c, $(src-boot))): %.o: %.c + $(call if_changed_dep,bootcc) +$(patsubst %.S,%.o, $(filter %.S, $(src-boot))): %.o: %.S + $(call if_changed_dep,bootas) + +lib-y += $(obj-boot) +lib-$(CONFIG_VGA_CONSOLE) += vreset.o kbd.o Index: linux.prev/arch/ppc/kernel/dma-mapping.c =================================================================== --- linux.prev.orig/arch/ppc/kernel/dma-mapping.c +++ linux.prev/arch/ppc/kernel/dma-mapping.c @@ -71,7 +71,7 @@ int map_page(unsigned long va, phys_addr * This is the page table (2MB) covering uncached, DMA consistent allocations */ static pte_t *consistent_pte; -static DEFINE_SPINLOCK(consistent_lock); +static DEFINE_RAW_SPINLOCK(consistent_lock); /* * VM region handling support. @@ -403,7 +403,7 @@ static inline void __dma_sync_page_highm int nr_segs = 1 + ((size - seg_size) + PAGE_SIZE - 1)/PAGE_SIZE; int seg_nr = 0; - local_irq_save(flags); + raw_local_irq_save(flags); do { start = (unsigned long)kmap_atomic(page + seg_nr, @@ -422,7 +422,7 @@ static inline void __dma_sync_page_highm seg_offset = 0; } while (seg_nr < nr_segs); - local_irq_restore(flags); + raw_local_irq_restore(flags); } #endif /* CONFIG_HIGHMEM */ Index: linux.prev/arch/ppc/kernel/entry.S =================================================================== --- linux.prev.orig/arch/ppc/kernel/entry.S +++ linux.prev/arch/ppc/kernel/entry.S @@ -239,7 +239,7 @@ ret_from_syscall: SYNC MTMSRD(r10) lwz r9,TI_FLAGS(r12) - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED) + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne- syscall_exit_work syscall_exit_cont: #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) @@ -317,7 +317,7 @@ syscall_exit_work: rlwinm r12,r1,0,0,18 /* current_thread_info() */ lwz r9,TI_FLAGS(r12) 5: - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne 1f lwz r5,_MSR(r1) andi. r5,r5,MSR_PR @@ -658,7 +658,7 @@ user_exc_return: /* r10 contains MSR_KE /* Check current_thread_info()->flags */ rlwinm r9,r1,0,0,18 lwz r9,TI_FLAGS(r9) - andi. r0,r9,(_TIF_SIGPENDING|_TIF_NEED_RESCHED) + andi. r0,r9,(_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne do_work restore_user: @@ -876,7 +876,7 @@ load_dbcr0: #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ do_work: /* r10 contains MSR_KERNEL here */ - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) beq do_user_signal do_resched: /* r10 contains MSR_KERNEL here */ @@ -890,7 +890,7 @@ recheck: MTMSRD(r10) /* disable interrupts */ rlwinm r9,r1,0,0,18 lwz r9,TI_FLAGS(r9) - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED) bne- do_resched andi. r0,r9,_TIF_SIGPENDING beq restore_user @@ -1000,3 +1000,85 @@ machine_check_in_rtas: /* XXX load up BATs and panic */ #endif /* CONFIG_PPC_OF */ + +#ifdef CONFIG_MCOUNT + +/* + * mcount() is not the same as _mcount(). The callers of mcount() have a + * normal context. The callers of _mcount() do not have a stack frame and + * have not saved the "caller saves" registers. + */ +_GLOBAL(mcount) + stwu r1,-16(r1) + mflr r3 + lis r5,mcount_enabled@ha + lwz r5,mcount_enabled@l(r5) + stw r3,20(r1) + cmpwi r5,0 + beq 1f + /* r3 contains lr (eip), put parent lr (parent_eip) in r4 */ + lwz r4,16(r1) + lwz r4,4(r4) + bl __trace +1: + lwz r0,20(r1) + mtlr r0 + addi r1,r1,16 + blr + +/* + * The -pg flag, which is specified in the case of CONFIG_MCOUNT, causes the + * C compiler to add a call to _mcount() at the start of each function preamble, + * before the stack frame is created. An example of this preamble code is: + * + * mflr r0 + * lis r12,-16354 + * stw r0,4(r1) + * addi r0,r12,-19652 + * bl 0xc00034c8 <_mcount> + * mflr r0 + * stwu r1,-16(r1) + */ +_GLOBAL(_mcount) +#define M_STK_SIZE 48 + /* Would not expect to need to save cr, but glibc version of */ + /* _mcount() does, so cautiously saving it here too. */ + stwu r1,-M_STK_SIZE(r1) + stw r3, 12(r1) + stw r4, 16(r1) + stw r5, 20(r1) + stw r6, 24(r1) + mflr r3 /* will use as first arg to __trace() */ + mfcr r4 + lis r5,mcount_enabled@ha + lwz r5,mcount_enabled@l(r5) + cmpwi r5,0 + stw r3, 44(r1) /* lr */ + stw r4, 8(r1) /* cr */ + stw r7, 28(r1) + stw r8, 32(r1) + stw r9, 36(r1) + stw r10,40(r1) + beq 1f + /* r3 contains lr (eip), put parent lr (parent_eip) in r4 */ + lwz r4,M_STK_SIZE+4(r1) + bl __trace +1: + lwz r8, 8(r1) /* cr */ + lwz r9, 44(r1) /* lr */ + lwz r3, 12(r1) + lwz r4, 16(r1) + lwz r5, 20(r1) + mtcrf 0xff,r8 + mtctr r9 + lwz r0, 52(r1) + lwz r6, 24(r1) + lwz r7, 28(r1) + lwz r8, 32(r1) + lwz r9, 36(r1) + lwz r10,40(r1) + addi r1,r1,M_STK_SIZE + mtlr r0 + bctr + +#endif /* CONFIG_MCOUNT */ Index: linux.prev/arch/ppc/kernel/idle.c =================================================================== --- linux.prev.orig/arch/ppc/kernel/idle.c +++ linux.prev/arch/ppc/kernel/idle.c @@ -41,7 +41,7 @@ void default_idle(void) powersave = ppc_md.power_save; - if (!need_resched()) { + if (!need_resched() && !need_resched_delayed()) { if (powersave != NULL) powersave(); #ifdef CONFIG_SMP @@ -64,6 +64,10 @@ void cpu_idle(void) for (;;) { while (!need_resched()) { + BUG_ON(raw_irqs_disabled()); + stop_critical_timing(); + propagate_preempt_locks_value(); + if (ppc_md.idle != NULL) ppc_md.idle(); else @@ -72,9 +76,11 @@ void cpu_idle(void) if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) cpu_die(); - preempt_enable_no_resched(); - schedule(); + raw_local_irq_disable(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); + raw_local_irq_enable(); } } Index: linux.prev/arch/ppc/kernel/ppc_ksyms.c =================================================================== --- linux.prev.orig/arch/ppc/kernel/ppc_ksyms.c +++ linux.prev/arch/ppc/kernel/ppc_ksyms.c @@ -272,7 +272,6 @@ EXPORT_SYMBOL(screen_info); EXPORT_SYMBOL(__delay); EXPORT_SYMBOL(timer_interrupt); -EXPORT_SYMBOL(irq_desc); EXPORT_SYMBOL(tb_ticks_per_jiffy); EXPORT_SYMBOL(get_wchan); EXPORT_SYMBOL(console_drivers); @@ -280,9 +279,6 @@ EXPORT_SYMBOL(console_drivers); EXPORT_SYMBOL(xmon); EXPORT_SYMBOL(xmon_printf); #endif -EXPORT_SYMBOL(__up); -EXPORT_SYMBOL(__down); -EXPORT_SYMBOL(__down_interruptible); #if defined(CONFIG_KGDB) || defined(CONFIG_XMON) extern void (*debugger)(struct pt_regs *regs); Index: linux.prev/arch/ppc/kernel/process.c =================================================================== --- linux.prev.orig/arch/ppc/kernel/process.c +++ linux.prev/arch/ppc/kernel/process.c @@ -37,6 +37,8 @@ #include #include #include +#include +#include #include #include @@ -52,8 +54,8 @@ struct task_struct *last_task_used_math struct task_struct *last_task_used_altivec = NULL; struct task_struct *last_task_used_spe = NULL; -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; +static struct fs_struct init_fs = INIT_FS(init_fs); +static struct files_struct init_files = INIT_FILES(init_files); static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); @@ -301,7 +303,7 @@ struct task_struct *__switch_to(struct t unsigned long s; struct task_struct *last; - local_irq_save(s); + raw_local_irq_save(s); #ifdef CHECK_STACK check_stack(prev); check_stack(new); @@ -364,7 +366,7 @@ struct task_struct *__switch_to(struct t new_thread = &new->thread; old_thread = ¤t->thread; last = _switch(old_thread, new_thread); - local_irq_restore(s); + raw_local_irq_restore(s); return last; } Index: linux.prev/arch/ppc/kernel/semaphore.c =================================================================== --- linux.prev.orig/arch/ppc/kernel/semaphore.c +++ linux.prev/arch/ppc/kernel/semaphore.c @@ -29,7 +29,7 @@ * sem->count = tmp; * return old_count; */ -static inline int __sem_update_count(struct semaphore *sem, int incr) +static inline int __sem_update_count(struct compat_semaphore *sem, int incr) { int old_count, tmp; @@ -48,7 +48,7 @@ static inline int __sem_update_count(str return old_count; } -void __up(struct semaphore *sem) +void __compat_up(struct compat_semaphore *sem) { /* * Note that we incremented count in up() before we came here, @@ -70,7 +70,7 @@ void __up(struct semaphore *sem) * Thus it is only when we decrement count from some value > 0 * that we have actually got the semaphore. */ -void __sched __down(struct semaphore *sem) +void __sched __compat_down(struct compat_semaphore *sem) { struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); @@ -100,7 +100,7 @@ void __sched __down(struct semaphore *se wake_up(&sem->wait); } -int __sched __down_interruptible(struct semaphore * sem) +int __sched __compat_down_interruptible(struct compat_semaphore * sem) { int retval = 0; struct task_struct *tsk = current; @@ -129,3 +129,8 @@ int __sched __down_interruptible(struct wake_up(&sem->wait); return retval; } + +int compat_sem_is_locked(struct compat_semaphore *sem) +{ + return (int) atomic_read(&sem->count) < 0; +} Index: linux.prev/arch/ppc/kernel/smp-tbsync.c =================================================================== --- linux.prev.orig/arch/ppc/kernel/smp-tbsync.c +++ linux.prev/arch/ppc/kernel/smp-tbsync.c @@ -49,7 +49,7 @@ smp_generic_take_timebase( void ) { int cmd, tbl, tbu; - local_irq_disable(); + raw_local_irq_disable(); while( !running ) ; rmb(); @@ -78,7 +78,7 @@ smp_generic_take_timebase( void ) } enter_contest( tbsync->mark, -1 ); } - local_irq_enable(); + raw_local_irq_enable(); } static int __devinit @@ -88,7 +88,7 @@ start_contest( int cmd, int offset, int tbsync->cmd = cmd; - local_irq_disable(); + raw_local_irq_disable(); for( i=-3; itbu = tbu = get_tbu(); @@ -114,7 +114,7 @@ start_contest( int cmd, int offset, int if( i++ > 0 ) score += tbsync->race_result; } - local_irq_enable(); + raw_local_irq_enable(); return score; } Index: linux.prev/arch/ppc/kernel/smp.c =================================================================== --- linux.prev.orig/arch/ppc/kernel/smp.c +++ linux.prev/arch/ppc/kernel/smp.c @@ -138,6 +138,16 @@ void smp_send_reschedule(int cpu) smp_message_pass(cpu, PPC_MSG_RESCHEDULE); } +/* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them: + */ +void smp_send_reschedule_allbutself(void) +{ + smp_message_pass(MSG_ALL_BUT_SELF, PPC_MSG_RESCHEDULE, 0, 0); +} + #ifdef CONFIG_XMON void smp_send_xmon_break(int cpu) { @@ -147,7 +157,7 @@ void smp_send_xmon_break(int cpu) static void stop_this_cpu(void *dummy) { - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } @@ -162,7 +172,7 @@ void smp_send_stop(void) * static memory requirements. It also looks cleaner. * Stolen from the i386 version. */ -static DEFINE_SPINLOCK(call_lock); +static DEFINE_RAW_SPINLOCK(call_lock); static struct call_data_struct { void (*func) (void *info); @@ -197,7 +207,7 @@ int smp_call_function(void (*func) (void if (num_online_cpus() <= 1) return 0; /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); + WARN_ON(raw_irqs_disabled()); return __smp_call_function(func, info, wait, MSG_ALL_BUT_SELF); } @@ -358,7 +368,7 @@ int __devinit start_secondary(void *unus cpu_set(cpu, cpu_online_map); spin_unlock(&call_lock); - local_irq_enable(); + raw_local_irq_enable(); cpu_idle(); return 0; Index: linux.prev/arch/ppc/kernel/temp.c =================================================================== --- linux.prev.orig/arch/ppc/kernel/temp.c +++ linux.prev/arch/ppc/kernel/temp.c @@ -142,7 +142,7 @@ static void tau_timeout(void * info) int shrink; /* disabling interrupts *should* be okay */ - local_irq_save(flags); + raw_local_irq_save(flags); cpu = smp_processor_id(); #ifndef CONFIG_TAU_INT @@ -185,7 +185,7 @@ static void tau_timeout(void * info) */ mtspr(SPRN_THRM3, THRM3_SITV(500*60) | THRM3_E); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void tau_timeout_smp(unsigned long unused) Index: linux.prev/arch/ppc/kernel/time.c =================================================================== --- linux.prev.orig/arch/ppc/kernel/time.c +++ linux.prev/arch/ppc/kernel/time.c @@ -66,6 +66,9 @@ #include +unsigned long cpu_khz; /* Detected as we calibrate the TSC */ +EXPORT_SYMBOL(cpu_khz); + unsigned long disarm_decr[NR_CPUS]; extern struct timezone sys_tz; @@ -86,7 +89,7 @@ extern unsigned long wall_jiffies; /* used for timezone offset */ static long timezone_offset; -DEFINE_SPINLOCK(rtc_lock); +DEFINE_RAW_SPINLOCK(rtc_lock); EXPORT_SYMBOL(rtc_lock); @@ -104,7 +107,7 @@ static inline int tb_delta(unsigned *jif } #ifdef CONFIG_SMP -unsigned long profile_pc(struct pt_regs *regs) +unsigned long notrace profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); Index: linux.prev/arch/ppc/kernel/traps.c =================================================================== --- linux.prev.orig/arch/ppc/kernel/traps.c +++ linux.prev/arch/ppc/kernel/traps.c @@ -77,7 +77,7 @@ void (*debugger_fault_handler)(struct pt * Trap & Exception support */ -DEFINE_SPINLOCK(die_lock); +DEFINE_RAW_SPINLOCK(die_lock); int die(const char * str, struct pt_regs * fp, long err) { @@ -118,6 +118,10 @@ void _exception(int signr, struct pt_reg debugger(regs); die("Exception in kernel mode", regs, signr); } +#ifdef CONFIG_PREEMPT_RT + raw_local_irq_enable(); + preempt_check_resched(); +#endif info.si_signo = signr; info.si_errno = 0; info.si_code = code; Index: linux.prev/arch/ppc/lib/locks.c =================================================================== --- linux.prev.orig/arch/ppc/lib/locks.c +++ linux.prev/arch/ppc/lib/locks.c @@ -43,7 +43,7 @@ static inline unsigned long __spin_trylo return ret; } -void _raw_spin_lock(spinlock_t *lock) +void __raw_spin_lock(raw_spinlock_t *lock) { int cpu = smp_processor_id(); unsigned int stuck = INIT_STUCK; @@ -63,9 +63,9 @@ void _raw_spin_lock(spinlock_t *lock) lock->owner_pc = (unsigned long)__builtin_return_address(0); lock->owner_cpu = cpu; } -EXPORT_SYMBOL(_raw_spin_lock); +EXPORT_SYMBOL(__raw_spin_lock); -int _raw_spin_trylock(spinlock_t *lock) +int __raw_spin_trylock(raw_spinlock_t *lock) { if (__spin_trylock(&lock->lock)) return 0; @@ -73,9 +73,9 @@ int _raw_spin_trylock(spinlock_t *lock) lock->owner_pc = (unsigned long)__builtin_return_address(0); return 1; } -EXPORT_SYMBOL(_raw_spin_trylock); +EXPORT_SYMBOL(__raw_spin_trylock); -void _raw_spin_unlock(spinlock_t *lp) +void __raw_spin_unlock(raw_spinlock_t *lp) { if ( !lp->lock ) printk("_spin_unlock(%p): no lock cpu %d curr PC %p %s/%d\n", @@ -89,13 +89,13 @@ void _raw_spin_unlock(spinlock_t *lp) wmb(); lp->lock = 0; } -EXPORT_SYMBOL(_raw_spin_unlock); +EXPORT_SYMBOL(__raw_spin_unlock); /* * For rwlocks, zero is unlocked, -1 is write-locked, * positive is read-locked. */ -static __inline__ int __read_trylock(rwlock_t *rw) +static __inline__ int __read_trylock(raw_rwlock_t *rw) { signed int tmp; @@ -115,13 +115,13 @@ static __inline__ int __read_trylock(rwl return tmp; } -int _raw_read_trylock(rwlock_t *rw) +int __raw_read_trylock(raw_rwlock_t *rw) { return __read_trylock(rw) > 0; } -EXPORT_SYMBOL(_raw_read_trylock); +EXPORT_SYMBOL(__raw_read_trylock); -void _raw_read_lock(rwlock_t *rw) +void __raw_read_lock(rwlock_t *rw) { unsigned int stuck; @@ -136,9 +136,9 @@ void _raw_read_lock(rwlock_t *rw) } } } -EXPORT_SYMBOL(_raw_read_lock); +EXPORT_SYMBOL(__raw_read_lock); -void _raw_read_unlock(rwlock_t *rw) +void __raw_read_unlock(raw_rwlock_t *rw) { if ( rw->lock == 0 ) printk("_read_unlock(): %s/%d (nip %08lX) lock %d\n", @@ -147,9 +147,9 @@ void _raw_read_unlock(rwlock_t *rw) wmb(); atomic_dec((atomic_t *) &(rw)->lock); } -EXPORT_SYMBOL(_raw_read_unlock); +EXPORT_SYMBOL(__raw_read_unlock); -void _raw_write_lock(rwlock_t *rw) +void __raw_write_lock(raw_rwlock_t *rw) { unsigned int stuck; @@ -165,18 +165,18 @@ void _raw_write_lock(rwlock_t *rw) } wmb(); } -EXPORT_SYMBOL(_raw_write_lock); +EXPORT_SYMBOL(__raw_write_lock); -int _raw_write_trylock(rwlock_t *rw) +int __raw_write_trylock(raw_rwlock_t *rw) { if (cmpxchg(&rw->lock, 0, -1) != 0) return 0; wmb(); return 1; } -EXPORT_SYMBOL(_raw_write_trylock); +EXPORT_SYMBOL(__raw_write_trylock); -void _raw_write_unlock(rwlock_t *rw) +void __raw_write_unlock(raw_rwlock_t *rw) { if (rw->lock >= 0) printk("_write_lock(): %s/%d (nip %08lX) lock %d\n", @@ -185,6 +185,6 @@ void _raw_write_unlock(rwlock_t *rw) wmb(); rw->lock = 0; } -EXPORT_SYMBOL(_raw_write_unlock); +EXPORT_SYMBOL(__raw_write_unlock); #endif Index: linux.prev/arch/ppc/mm/fault.c =================================================================== --- linux.prev.orig/arch/ppc/mm/fault.c +++ linux.prev/arch/ppc/mm/fault.c @@ -92,7 +92,7 @@ static int store_updates_sp(struct pt_re * the error_code parameter is ESR for a data fault, 0 for an instruction * fault. */ -int do_page_fault(struct pt_regs *regs, unsigned long address, +int notrace do_page_fault(struct pt_regs *regs, unsigned long address, unsigned long error_code) { struct vm_area_struct * vma; Index: linux.prev/arch/ppc/mm/init.c =================================================================== --- linux.prev.orig/arch/ppc/mm/init.c +++ linux.prev/arch/ppc/mm/init.c @@ -56,7 +56,7 @@ #endif #define MAX_LOW_MEM CONFIG_LOWMEM_SIZE -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); unsigned long total_memory; unsigned long total_lowmem; Index: linux.prev/arch/ppc/platforms/4xx/xilinx_ml300.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/4xx/xilinx_ml300.c +++ linux.prev/arch/ppc/platforms/4xx/xilinx_ml300.c @@ -62,7 +62,7 @@ static volatile unsigned *powerdown_base static void xilinx_power_off(void) { - local_irq_disable(); + raw_local_irq_disable(); out_be32(powerdown_base, XPAR_POWER_0_POWERDOWN_VALUE); while (1) ; } Index: linux.prev/arch/ppc/platforms/apus_setup.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/apus_setup.c +++ linux.prev/arch/ppc/platforms/apus_setup.c @@ -282,6 +282,7 @@ void apus_calibrate_decr(void) freq/1000000, freq%1000000); tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; __bus_speed = bus_speed; __speed_test_failed = speed_test_failed; @@ -480,7 +481,7 @@ void cache_clear(__u32 addr, int length) void apus_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); APUS_WRITE(APUS_REG_LOCK, REGLOCK_BLACKMAGICK1|REGLOCK_BLACKMAGICK2); @@ -598,7 +599,7 @@ int __debug_serinit( void ) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); /* turn off Rx and Tx interrupts */ custom.intena = IF_RBF | IF_TBE; @@ -606,7 +607,7 @@ int __debug_serinit( void ) /* clear any pending interrupt */ custom.intreq = IF_RBF | IF_TBE; - local_irq_restore(flags); + raw_local_irq_restore(flags); /* * set the appropriate directions for the modem control flags, Index: linux.prev/arch/ppc/platforms/chestnut.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/chestnut.c +++ linux.prev/arch/ppc/platforms/chestnut.c @@ -455,7 +455,7 @@ chestnut_restart(char *cmd) { volatile ulong i = 10000000; - local_irq_disable(); + raw_local_irq_disable(); /* * Set CPLD Reg 3 bit 0 to 1 to allow MPP signals on reset to work @@ -474,7 +474,7 @@ chestnut_restart(char *cmd) static void chestnut_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); for (;;); /* NOTREACHED */ } Index: linux.prev/arch/ppc/platforms/chrp_smp.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/chrp_smp.c +++ linux.prev/arch/ppc/platforms/chrp_smp.c @@ -58,7 +58,7 @@ smp_chrp_setup_cpu(int cpu_nr) do_openpic_setup_cpu(); } -static DEFINE_SPINLOCK(timebase_lock); +static DEFINE_RAW_SPINLOCK(timebase_lock); static unsigned int timebase_upper = 0, timebase_lower = 0; void __devinit Index: linux.prev/arch/ppc/platforms/chrp_time.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/chrp_time.c +++ linux.prev/arch/ppc/platforms/chrp_time.c @@ -28,7 +28,7 @@ #include #include -extern spinlock_t rtc_lock; +extern raw_spinlock_t rtc_lock; static int nvram_as1 = NVRAM_AS1; static int nvram_as0 = NVRAM_AS0; @@ -188,4 +188,5 @@ void __init chrp_calibrate_decr(void) freq/1000000, freq%1000000); tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; } Index: linux.prev/arch/ppc/platforms/cpci690.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/cpci690.c +++ linux.prev/arch/ppc/platforms/cpci690.c @@ -322,7 +322,7 @@ cpci690_reset_board(void) { u32 i = 10000; - local_irq_disable(); + raw_local_irq_disable(); out_8((cpci690_br_base + CPCI690_BR_SW_RESET), 0x11); while (i != 0) i++; Index: linux.prev/arch/ppc/platforms/ev64260.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/ev64260.c +++ linux.prev/arch/ppc/platforms/ev64260.c @@ -446,7 +446,7 @@ ev64260_platform_notify(struct device *d static void ev64260_reset_board(void *addr) { - local_irq_disable(); + raw_local_irq_disable(); /* disable and invalidate the L2 cache */ _set_L2CR(0); @@ -514,7 +514,7 @@ ev64260_restart(char *cmd) static void ev64260_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1); /* NOTREACHED */ } @@ -553,6 +553,7 @@ ev64260_calibrate_decr(void) tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; return; } Index: linux.prev/arch/ppc/platforms/gemini_setup.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/gemini_setup.c +++ linux.prev/arch/ppc/platforms/gemini_setup.c @@ -303,7 +303,7 @@ void __init gemini_init_l2(void) void gemini_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); /* make a clean restart, not via the MPIC */ _gemini_reboot(); for(;;); @@ -462,6 +462,7 @@ void __init gemini_calibrate_decr(void) divisor = 4; tb_ticks_per_jiffy = freq / HZ / divisor; tb_to_us = mulhwu_scale_factor(freq/divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; } unsigned long __init gemini_find_end_of_memory(void) Index: linux.prev/arch/ppc/platforms/hdpu.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/hdpu.c +++ linux.prev/arch/ppc/platforms/hdpu.c @@ -474,7 +474,7 @@ static void hdpu_reset_board(void) hdpu_cpustate_set(CPUSTATE_KERNEL_MAJOR | CPUSTATE_KERNEL_RESET); - local_irq_disable(); + raw_local_irq_disable(); /* Clear all the LEDs */ mv64x60_write(&bh, MV64x60_GPP_VALUE_CLR, ((1 << 4) | @@ -516,7 +516,7 @@ static void hdpu_restart(char *cmd) static void hdpu_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); hdpu_cpustate_set(CPUSTATE_KERNEL_MAJOR | CPUSTATE_KERNEL_HALT); Index: linux.prev/arch/ppc/platforms/lopec.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/lopec.c +++ linux.prev/arch/ppc/platforms/lopec.c @@ -153,7 +153,7 @@ lopec_restart(char *cmd) reg |= 0x80; *((unsigned char *) LOPEC_SYSSTAT1) = reg; - local_irq_disable(); + raw_local_irq_disable(); while(1); #undef LOPEC_SYSSTAT1 } @@ -161,7 +161,7 @@ lopec_restart(char *cmd) static void lopec_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while(1); } Index: linux.prev/arch/ppc/platforms/mvme5100.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/mvme5100.c +++ linux.prev/arch/ppc/platforms/mvme5100.c @@ -262,7 +262,7 @@ mvme5100_map_io(void) static void mvme5100_reset_board(void) { - local_irq_disable(); + raw_local_irq_disable(); /* Set exception prefix high - to the firmware */ _nmask_and_or_msr(0, MSR_IP); @@ -286,7 +286,7 @@ mvme5100_restart(char *cmd) static void mvme5100_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1); } Index: linux.prev/arch/ppc/platforms/pal4_setup.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/pal4_setup.c +++ linux.prev/arch/ppc/platforms/pal4_setup.c @@ -82,7 +82,7 @@ pal4_show_cpuinfo(struct seq_file *m) static void pal4_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); __asm__ __volatile__("lis 3,0xfff0\n \ ori 3,3,0x100\n \ mtspr 26,3\n \ @@ -96,7 +96,7 @@ pal4_restart(char *cmd) static void pal4_power_off(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } Index: linux.prev/arch/ppc/platforms/pmac_cpufreq.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/pmac_cpufreq.c +++ linux.prev/arch/ppc/platforms/pmac_cpufreq.c @@ -285,7 +285,7 @@ static int pmu_set_cpu_speed(int low_spe asm volatile("mtdec %0" : : "r" (0x7fffffff)); /* We can now disable MSR_EE */ - local_irq_save(flags); + raw_local_irq_save(flags); /* Giveup the FPU & vec */ enable_kernel_fp(); @@ -341,7 +341,7 @@ static int pmu_set_cpu_speed(int low_spe openpic_set_priority(pic_prio); /* Let interrupts flow again ... */ - local_irq_restore(flags); + raw_local_irq_restore(flags); #ifdef DEBUG_FREQ debug_calc_bogomips(); Index: linux.prev/arch/ppc/platforms/pmac_feature.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/pmac_feature.c +++ linux.prev/arch/ppc/platforms/pmac_feature.c @@ -63,7 +63,7 @@ extern struct device_node *k2_skiplist[2 * We use a single global lock to protect accesses. Each driver has * to take care of its own locking */ -static DEFINE_SPINLOCK(feature_lock); +static DEFINE_RAW_SPINLOCK(feature_lock); #define LOCK(flags) spin_lock_irqsave(&feature_lock, flags); #define UNLOCK(flags) spin_unlock_irqrestore(&feature_lock, flags); Index: linux.prev/arch/ppc/platforms/pmac_nvram.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/pmac_nvram.c +++ linux.prev/arch/ppc/platforms/pmac_nvram.c @@ -80,7 +80,7 @@ static volatile unsigned char *nvram_dat static int nvram_mult, is_core_99; static int core99_bank = 0; static int nvram_partitions[3]; -static DEFINE_SPINLOCK(nv_lock); +static DEFINE_RAW_SPINLOCK(nv_lock); extern int pmac_newworld; extern int system_running; Index: linux.prev/arch/ppc/platforms/pmac_pic.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/pmac_pic.c +++ linux.prev/arch/ppc/platforms/pmac_pic.c @@ -69,7 +69,7 @@ static int max_irqs; static int max_real_irqs; static u32 level_mask[4]; -static DEFINE_SPINLOCK(pmac_pic_lock); +static DEFINE_RAW_SPINLOCK(pmac_pic_lock); #define GATWICK_IRQ_POOL_SIZE 10 Index: linux.prev/arch/ppc/platforms/pmac_smp.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/pmac_smp.c +++ linux.prev/arch/ppc/platforms/pmac_smp.c @@ -499,8 +499,8 @@ static void __devinit smp_core99_kick_cp return; if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu", 0x346); - local_irq_save(flags); - local_irq_disable(); + raw_local_irq_save(flags); + raw_local_irq_disable(); /* Save reset vector */ save_vector = *vector; @@ -528,7 +528,7 @@ static void __devinit smp_core99_kick_cp *vector = save_vector; flush_icache_range((unsigned long) vector, (unsigned long) vector + 4); - local_irq_restore(flags); + raw_local_irq_restore(flags); if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347); } @@ -570,7 +570,7 @@ void smp_core99_take_timebase(void) mb(); /* set our stuff the same as the primary */ - local_irq_save(flags); + raw_local_irq_save(flags); set_dec(1); set_tb(pri_tb_hi, pri_tb_lo); last_jiffy_stamp(smp_processor_id()) = pri_tb_stamp; @@ -579,7 +579,7 @@ void smp_core99_take_timebase(void) /* tell the primary we're done */ sec_tb_reset = 0; mb(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* not __init, called in sleep/wakeup code */ @@ -599,7 +599,7 @@ void smp_core99_give_timebase(void) /* freeze the timebase and read it */ /* disable interrupts so the timebase is disabled for the shortest possible time */ - local_irq_save(flags); + raw_local_irq_save(flags); pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 4); pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, core99_tb_gpio, 0); mb(); @@ -623,7 +623,7 @@ void smp_core99_give_timebase(void) /* Now, restart the timebase by leaving the GPIO to an open collector */ pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 0); pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, core99_tb_gpio, 0); - local_irq_restore(flags); + raw_local_irq_restore(flags); } Index: linux.prev/arch/ppc/platforms/pmac_time.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/pmac_time.c +++ linux.prev/arch/ppc/platforms/pmac_time.c @@ -197,6 +197,7 @@ via_calibrate_decr(void) tb_ticks_per_jiffy = (dstart - dend) / ((6 * HZ)/100); tb_to_us = mulhwu_scale_factor(dstart - dend, 60000); + cpu_khz = (dstart - dend) / 60; printk(KERN_INFO "via_calibrate_decr: ticks per jiffy = %u (%u ticks)\n", tb_ticks_per_jiffy, dstart - dend); @@ -288,4 +289,5 @@ pmac_calibrate_decr(void) freq/1000000, freq%1000000); tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; } Index: linux.prev/arch/ppc/platforms/powerpmc250.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/powerpmc250.c +++ linux.prev/arch/ppc/platforms/powerpmc250.c @@ -166,12 +166,13 @@ powerpmc250_calibrate_decr(void) tb_ticks_per_jiffy = freq / (HZ * divisor); tb_to_us = mulhwu_scale_factor(freq/divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; } static void powerpmc250_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); /* Hard reset */ writeb(0x11, 0xfe000332); while(1); @@ -180,7 +181,7 @@ powerpmc250_restart(char *cmd) static void powerpmc250_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1); } Index: linux.prev/arch/ppc/platforms/pplus.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/pplus.c +++ linux.prev/arch/ppc/platforms/pplus.c @@ -607,7 +607,7 @@ static void pplus_restart(char *cmd) { unsigned long i = 10000; - local_irq_disable(); + raw_local_irq_disable(); /* set VIA IDE controller into native mode */ pplus_set_VIA_IDE_native(); Index: linux.prev/arch/ppc/platforms/prep_setup.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/prep_setup.c +++ linux.prev/arch/ppc/platforms/prep_setup.c @@ -464,7 +464,7 @@ static void prep_restart(char *cmd) { #define PREP_SP92 0x92 /* Special Port 92 */ - local_irq_disable(); /* no interrupts */ + raw_local_irq_disable(); /* no interrupts */ /* set exception prefix high - to the prom */ _nmask_and_or_msr(0, MSR_IP); @@ -482,7 +482,7 @@ prep_restart(char *cmd) static void prep_halt(void) { - local_irq_disable(); /* no interrupts */ + raw_local_irq_disable(); /* no interrupts */ /* set exception prefix high - to the prom */ _nmask_and_or_msr(0, MSR_IP); @@ -550,7 +550,7 @@ prep_sig750_poweroff(void) { /* tweak the power manager found in most IBM PRePs (except Thinkpads) */ - local_irq_disable(); + raw_local_irq_disable(); /* set exception prefix high - to the prom */ _nmask_and_or_msr(0, MSR_IP); @@ -944,6 +944,7 @@ prep_calibrate_decr(void) (freq/divisor)/1000000, (freq/divisor)%1000000); tb_to_us = mulhwu_scale_factor(freq/divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; tb_ticks_per_jiffy = freq / HZ / divisor; } } Index: linux.prev/arch/ppc/platforms/prpmc750.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/prpmc750.c +++ linux.prev/arch/ppc/platforms/prpmc750.c @@ -271,18 +271,19 @@ static void __init prpmc750_calibrate_de tb_ticks_per_jiffy = freq / (HZ * divisor); tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; } static void prpmc750_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); writeb(PRPMC750_MODRST_MASK, PRPMC750_MODRST_REG); while (1) ; } static void prpmc750_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } Index: linux.prev/arch/ppc/platforms/prpmc800.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/prpmc800.c +++ linux.prev/arch/ppc/platforms/prpmc800.c @@ -330,6 +330,7 @@ static void __init prpmc800_calibrate_de tb_ticks_per_second = 100000000 / 4; tb_ticks_per_jiffy = tb_ticks_per_second / HZ; tb_to_us = mulhwu_scale_factor(tb_ticks_per_second, 1000000); + cpu_khz = tb_ticks_per_second / 1000; return; } @@ -370,13 +371,14 @@ static void __init prpmc800_calibrate_de tb_ticks_per_second = (tbl_end - tbl_start) * 2; tb_ticks_per_jiffy = tb_ticks_per_second / HZ; tb_to_us = mulhwu_scale_factor(tb_ticks_per_second, 1000000); + cpu_khz = tb_ticks_per_second / 1000; } static void prpmc800_restart(char *cmd) { ulong temp; - local_irq_disable(); + raw_local_irq_disable(); temp = in_be32((uint *) HARRIER_MISC_CSR_REG); temp |= HARRIER_RSTOUT; out_be32((uint *) HARRIER_MISC_CSR_REG, temp); @@ -385,7 +387,7 @@ static void prpmc800_restart(char *cmd) static void prpmc800_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } Index: linux.prev/arch/ppc/platforms/radstone_ppc7d.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/radstone_ppc7d.c +++ linux.prev/arch/ppc/platforms/radstone_ppc7d.c @@ -176,7 +176,7 @@ static void ppc7d_power_off(void) { u32 data; - local_irq_disable(); + raw_local_irq_disable(); /* Ensure that internal MV643XX watchdog is disabled. * The Disco watchdog uses MPP17 on this hardware. Index: linux.prev/arch/ppc/platforms/sandpoint.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/sandpoint.c +++ linux.prev/arch/ppc/platforms/sandpoint.c @@ -527,7 +527,7 @@ sandpoint_map_io(void) static void sandpoint_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); /* Set exception prefix high - to the firmware */ _nmask_and_or_msr(0, MSR_IP); @@ -541,7 +541,7 @@ sandpoint_restart(char *cmd) static void sandpoint_power_off(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); /* No way to shut power off with software */ /* NOTREACHED */ } Index: linux.prev/arch/ppc/platforms/sbc82xx.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/sbc82xx.c +++ linux.prev/arch/ppc/platforms/sbc82xx.c @@ -68,7 +68,7 @@ static void sbc82xx_time_init(void) static volatile char *sbc82xx_i8259_map; static char sbc82xx_i8259_mask = 0xff; -static DEFINE_SPINLOCK(sbc82xx_i8259_lock); +static DEFINE_RAW_SPINLOCK(sbc82xx_i8259_lock); static void sbc82xx_i8259_mask_and_ack_irq(unsigned int irq_nr) { Index: linux.prev/arch/ppc/platforms/spruce.c =================================================================== --- linux.prev.orig/arch/ppc/platforms/spruce.c +++ linux.prev/arch/ppc/platforms/spruce.c @@ -150,6 +150,7 @@ spruce_calibrate_decr(void) freq = SPRUCE_BUS_SPEED; tb_ticks_per_jiffy = freq / HZ / divisor; tb_to_us = mulhwu_scale_factor(freq/divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; } static int @@ -236,7 +237,7 @@ spruce_setup_arch(void) static void spruce_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); /* SRR0 has system reset vector, SRR1 has default MSR value */ /* rfi restores MSR from SRR1 and sets the PC to the SRR0 value */ Index: linux.prev/arch/ppc/syslib/cpm2_common.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/cpm2_common.c +++ linux.prev/arch/ppc/syslib/cpm2_common.c @@ -114,7 +114,7 @@ cpm2_fastbrg(uint brg, uint rate, int di /* * dpalloc / dpfree bits. */ -static spinlock_t cpm_dpmem_lock; +static raw_spinlock_t cpm_dpmem_lock; /* 16 blocks should be enough to satisfy all requests * until the memory subsystem goes up... */ static rh_block_t cpm_boot_dpmem_rh_block[16]; Index: linux.prev/arch/ppc/syslib/ibm440gx_common.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/ibm440gx_common.c +++ linux.prev/arch/ppc/syslib/ibm440gx_common.c @@ -157,7 +157,7 @@ void __init ibm440gx_l2c_enable(void){ return; } - local_irq_save(flags); + raw_local_irq_save(flags); asm volatile ("sync" ::: "memory"); /* Disable SRAM */ @@ -201,7 +201,7 @@ void __init ibm440gx_l2c_enable(void){ mtdcr(DCRN_L2C0_CFG, r); asm volatile ("sync; isync" ::: "memory"); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* Disable L2 cache */ @@ -209,7 +209,7 @@ void __init ibm440gx_l2c_disable(void){ u32 r; unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); asm volatile ("sync" ::: "memory"); /* Disable L2C mode */ @@ -228,7 +228,7 @@ void __init ibm440gx_l2c_disable(void){ SRAM_SBCR_BAS3 | SRAM_SBCR_BS_64KB | SRAM_SBCR_BU_RW); asm volatile ("sync; isync" ::: "memory"); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void __init ibm440gx_l2c_setup(struct ibm44x_clocks* p) Index: linux.prev/arch/ppc/syslib/ibm44x_common.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/ibm44x_common.c +++ linux.prev/arch/ppc/syslib/ibm44x_common.c @@ -66,6 +66,7 @@ void __init ibm44x_calibrate_decr(unsign { tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; /* Set the time base to zero */ mtspr(SPRN_TBWL, 0); @@ -82,19 +83,19 @@ extern void abort(void); static void ibm44x_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); abort(); } static void ibm44x_power_off(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } static void ibm44x_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } Index: linux.prev/arch/ppc/syslib/m8260_pci_erratum9.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/m8260_pci_erratum9.c +++ linux.prev/arch/ppc/syslib/m8260_pci_erratum9.c @@ -132,7 +132,7 @@ idma_pci9_read(u8 *dst, u8 *src, int byt volatile idma_bd_t *bd = &idma_dpram->bd; volatile cpm2_map_t *immap = cpm2_immr; - local_irq_save(flags); + raw_local_irq_save(flags); /* initialize IDMA parameter RAM for this transfer */ if (sinc) @@ -161,7 +161,7 @@ idma_pci9_read(u8 *dst, u8 *src, int byt /* wait for transfer to complete */ while(bd->flags & IDMA_BD_V); - local_irq_restore(flags); + raw_local_irq_restore(flags); return; } @@ -184,7 +184,7 @@ idma_pci9_write(u8 *dst, u8 *src, int by volatile idma_bd_t *bd = &idma_dpram->bd; volatile cpm2_map_t *immap = cpm2_immr; - local_irq_save(flags); + raw_local_irq_save(flags); /* initialize IDMA parameter RAM for this transfer */ if (dinc) @@ -213,7 +213,7 @@ idma_pci9_write(u8 *dst, u8 *src, int by /* wait for transfer to complete */ while(bd->flags & IDMA_BD_V); - local_irq_restore(flags); + raw_local_irq_restore(flags); return; } Index: linux.prev/arch/ppc/syslib/m8260_setup.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/m8260_setup.c +++ linux.prev/arch/ppc/syslib/m8260_setup.c @@ -82,6 +82,7 @@ m8260_calibrate_decr(void) divisor = 4; tb_ticks_per_jiffy = freq / HZ / divisor; tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; } /* The 8260 has an internal 1-second timer update register that @@ -132,7 +133,7 @@ m8260_restart(char *cmd) static void m8260_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1); } Index: linux.prev/arch/ppc/syslib/m8xx_setup.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/m8xx_setup.c +++ linux.prev/arch/ppc/syslib/m8xx_setup.c @@ -160,6 +160,7 @@ void __init m8xx_calibrate_decr(void) printk("Decrementer Frequency = %d/%d\n", freq, divisor); tb_ticks_per_jiffy = freq / HZ / divisor; tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; /* Perform some more timer/timebase initialization. This used * to be done elsewhere, but other changes caused it to get @@ -231,7 +232,7 @@ m8xx_restart(char *cmd) { __volatile__ unsigned char dummy; - local_irq_disable(); + raw_local_irq_disable(); out_be32(&((immap_t *)IMAP_ADDR)->im_clkrst.car_plprcr, in_be32(&((immap_t *)IMAP_ADDR)->im_clkrst.car_plprcr) | 0x00000080); /* Clear the ME bit in MSR to cause checkstop on machine check Index: linux.prev/arch/ppc/syslib/mpc52xx_setup.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/mpc52xx_setup.c +++ linux.prev/arch/ppc/syslib/mpc52xx_setup.c @@ -40,7 +40,7 @@ mpc52xx_restart(char *cmd) { struct mpc52xx_gpt __iomem *gpt0 = MPC52xx_VA(MPC52xx_GPTx_OFFSET(0)); - local_irq_disable(); + raw_local_irq_disable(); /* Turn on the watchdog and wait for it to expire. It effectively does a reset */ @@ -53,7 +53,7 @@ mpc52xx_restart(char *cmd) void mpc52xx_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); while (1); } @@ -214,6 +214,7 @@ mpc52xx_calibrate_decr(void) tb_ticks_per_jiffy = xlbfreq / HZ / divisor; tb_to_us = mulhwu_scale_factor(xlbfreq / divisor, 1000000); + cpu_khz = (xlbfreq / divisor) / 1000; } int mpc52xx_match_psc_function(int psc_idx, const char *func) Index: linux.prev/arch/ppc/syslib/ocp.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/ocp.c +++ linux.prev/arch/ppc/syslib/ocp.c @@ -45,11 +45,11 @@ #include #include #include +#include #include #include #include -#include #include //#define DBG(x) printk x Index: linux.prev/arch/ppc/syslib/open_pic.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/open_pic.c +++ linux.prev/arch/ppc/syslib/open_pic.c @@ -529,7 +529,7 @@ void openpic_reset_processor_phys(u_int } #if defined(CONFIG_SMP) || defined(CONFIG_PM) -static DEFINE_SPINLOCK(openpic_setup_lock); +static DEFINE_RAW_SPINLOCK(openpic_setup_lock); #endif #ifdef CONFIG_SMP Index: linux.prev/arch/ppc/syslib/open_pic2.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/open_pic2.c +++ linux.prev/arch/ppc/syslib/open_pic2.c @@ -383,7 +383,7 @@ static void openpic2_set_spurious(u_int vec); } -static DEFINE_SPINLOCK(openpic2_setup_lock); +static DEFINE_RAW_SPINLOCK(openpic2_setup_lock); /* * Initialize a timer interrupt (and disable it) Index: linux.prev/arch/ppc/syslib/ppc4xx_setup.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/ppc4xx_setup.c +++ linux.prev/arch/ppc/syslib/ppc4xx_setup.c @@ -142,7 +142,7 @@ static void ppc4xx_power_off(void) { printk("System Halted\n"); - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } @@ -150,7 +150,7 @@ static void ppc4xx_halt(void) { printk("System Halted\n"); - local_irq_disable(); + raw_local_irq_disable(); while (1) ; } @@ -173,6 +173,7 @@ ppc4xx_calibrate_decr(void) freq = bip->bi_tbfreq; tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; /* Set the time base to zero. ** At 200 Mhz, time base will rollover in ~2925 years. Index: linux.prev/arch/ppc/syslib/ppc83xx_setup.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/ppc83xx_setup.c +++ linux.prev/arch/ppc/syslib/ppc83xx_setup.c @@ -138,7 +138,7 @@ mpc83xx_restart(char *cmd) reg = ioremap(BCSR_PHYS_ADDR, BCSR_SIZE); - local_irq_disable(); + raw_local_irq_disable(); /* * Unlock the BCSR bits so a PRST will update the contents. @@ -167,14 +167,14 @@ mpc83xx_restart(char *cmd) void mpc83xx_power_off(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } void mpc83xx_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } Index: linux.prev/arch/ppc/syslib/ppc85xx_setup.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/ppc85xx_setup.c +++ linux.prev/arch/ppc/syslib/ppc85xx_setup.c @@ -60,6 +60,7 @@ mpc85xx_calibrate_decr(void) divisor = 8; tb_ticks_per_jiffy = freq / divisor / HZ; tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000); + cpu_khz = (freq / divisor) / 1000; /* Set the time base to zero */ mtspr(SPRN_TBWL, 0); @@ -115,21 +116,21 @@ mpc85xx_early_serial_map(void) void mpc85xx_restart(char *cmd) { - local_irq_disable(); + raw_local_irq_disable(); abort(); } void mpc85xx_power_off(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } void mpc85xx_halt(void) { - local_irq_disable(); + raw_local_irq_disable(); for(;;); } Index: linux.prev/arch/ppc/syslib/prom.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/prom.c +++ linux.prev/arch/ppc/syslib/prom.c @@ -1396,7 +1396,7 @@ print_properties(struct device_node *np) } #endif -static DEFINE_SPINLOCK(rtas_lock); +static DEFINE_RAW_SPINLOCK(rtas_lock); /* this can be called after setup -- Cort */ int Index: linux.prev/arch/ppc/syslib/todc_time.c =================================================================== --- linux.prev.orig/arch/ppc/syslib/todc_time.c +++ linux.prev/arch/ppc/syslib/todc_time.c @@ -508,6 +508,7 @@ todc_calibrate_decr(void) tb_ticks_per_jiffy = freq / HZ; tb_to_us = mulhwu_scale_factor(freq, 1000000); + cpu_khz = freq / 1000; return; } Index: linux.prev/arch/ppc/xmon/xmon.c =================================================================== --- linux.prev.orig/arch/ppc/xmon/xmon.c +++ linux.prev/arch/ppc/xmon/xmon.c @@ -297,10 +297,10 @@ irqreturn_t xmon_irq(int irq, void *d, struct pt_regs *regs) { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); printf("Keyboard interrupt\n"); xmon(regs); - local_irq_restore(flags); + raw_local_irq_restore(flags); return IRQ_HANDLED; } Index: linux.prev/arch/sh64/kernel/time.c =================================================================== --- linux.prev.orig/arch/sh64/kernel/time.c +++ linux.prev/arch/sh64/kernel/time.c @@ -417,7 +417,7 @@ static __init unsigned int get_cpu_hz(vo /* ** Regardless the toolchain, force the compiler to use the ** arbitrary register r3 as a clock tick counter. - ** NOTE: r3 must be in accordance with rtc_interrupt() + ** NOTE: r3 must be in accordance with sh64_rtc_interrupt() */ register unsigned long long __rtc_irq_flag __asm__ ("r3"); @@ -482,7 +482,8 @@ static __init unsigned int get_cpu_hz(vo #endif } -static irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t sh64_rtc_interrupt(int irq, void *dev_id, + struct pt_regs *regs) { ctrl_outb(0, RCR1); /* Disable Carry Interrupts */ regs->regs[3] = 1; /* Using r3 */ @@ -491,7 +492,7 @@ static irqreturn_t rtc_interrupt(int irq } static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL}; -static struct irqaction irq1 = { rtc_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "rtc", NULL, NULL}; +static struct irqaction irq1 = { sh64_rtc_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "rtc", NULL, NULL}; void __init time_init(void) { Index: linux.prev/arch/x86_64/Kconfig =================================================================== --- linux.prev.orig/arch/x86_64/Kconfig +++ linux.prev/arch/x86_64/Kconfig @@ -24,6 +24,14 @@ config X86 bool default y +config GENERIC_TIME + bool + default y + +config GENERIC_TIME_VSYSCALL + bool + default y + config SEMAPHORE_SLEEPERS bool default y @@ -38,13 +46,6 @@ config ISA config SBUS bool -config RWSEM_GENERIC_SPINLOCK - bool - default y - -config RWSEM_XCHGADD_ALGORITHM - bool - config GENERIC_CALIBRATE_DELAY bool default y @@ -199,6 +200,8 @@ config MTRR See for more information. +source "kernel/time/Kconfig" + config SMP bool "Symmetric multi-processing support" ---help--- @@ -237,6 +240,14 @@ config NUMA If the system is EM64T, you should say N unless your system is EM64T NUMA. +config RWSEM_GENERIC_SPINLOCK + bool + default y + +config RWSEM_XCHGADD_ALGORITHM + depends on !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT + bool + config K8_NUMA bool "Old style AMD Opteron NUMA detection" depends on NUMA @@ -327,21 +338,6 @@ config HPET_TIMER as it is off-chip. You can find the HPET spec at . -config X86_PM_TIMER - bool "PM timer" - depends on ACPI - default y - help - Support the ACPI PM timer for time keeping. This is slow, - but is useful on some chipsets without HPET on systems with more - than one CPU. On a single processor or single socket multi core - system it is normally not required. - When the PM timer is active 64bit vsyscalls are disabled - and should not be enabled (/proc/sys/kernel/vsyscall64 should - not be changed). - The kernel selects the PM timer only as a last resort, so it is - useful to enable just in case. - config HPET_EMULATE_RTC bool "Provide RTC interrupt" depends on HPET_TIMER && RTC=y Index: linux.prev/arch/x86_64/boot/compressed/misc.c =================================================================== --- linux.prev.orig/arch/x86_64/boot/compressed/misc.c +++ linux.prev/arch/x86_64/boot/compressed/misc.c @@ -114,6 +114,7 @@ static char *vidmem = (char *)0xb8000; static int vidport; static int lines, cols; +#define ZLIB_INFLATE_NO_INFLATE_LOCK #include "../../../../lib/inflate.c" static void *malloc(int size) Index: linux.prev/arch/x86_64/ia32/sys_ia32.c =================================================================== --- linux.prev.orig/arch/x86_64/ia32/sys_ia32.c +++ linux.prev/arch/x86_64/ia32/sys_ia32.c @@ -456,6 +456,10 @@ sys32_settimeofday(struct compat_timeval struct timespec kts; struct timezone ktz; + int ret = timeofday_API_hacks(tv, tz); + if (ret != 1) + return ret; + if (tv) { if (get_tv32(&ktv, tv)) return -EFAULT; Index: linux.prev/arch/x86_64/kernel/Makefile =================================================================== --- linux.prev.orig/arch/x86_64/kernel/Makefile +++ linux.prev/arch/x86_64/kernel/Makefile @@ -29,7 +29,7 @@ obj-$(CONFIG_EARLY_PRINTK) += early_prin obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o obj-$(CONFIG_KPROBES) += kprobes.o -obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o +obj-$(CONFIG_SYSFS) += switch2poll.o obj-$(CONFIG_MODULES) += module.o Index: linux.prev/arch/x86_64/kernel/apic.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/apic.c +++ linux.prev/arch/x86_64/kernel/apic.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -38,13 +39,15 @@ int apic_verbosity; int disable_apic_timer __initdata; +/* + * cpu_mask that denotes the CPUs that needs timer interrupt coming in as + * IPIs in place of local APIC timers + */ +static cpumask_t timer_interrupt_broadcast_ipi_mask; + /* Using APIC to generate smp_local_timer_interrupt? */ int using_apic_timer = 0; -static DEFINE_PER_CPU(int, prof_multiplier) = 1; -static DEFINE_PER_CPU(int, prof_old_multiplier) = 1; -static DEFINE_PER_CPU(int, prof_counter) = 1; - static void apic_pm_activate(void); void enable_NMI_through_LVT0 (void * dummy) @@ -485,10 +488,9 @@ static int lapic_suspend(struct sys_devi apic_pm_state.apic_tmict = apic_read(APIC_TMICT); apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); - local_save_flags(flags); - local_irq_disable(); + raw_local_irq_save(flags); disable_local_APIC(); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -503,7 +505,7 @@ static int lapic_resume(struct sys_devic /* XXX: Pavel needs this for S3 resume, but can't explain why */ set_fixmap_nocache(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE); - local_irq_save(flags); + raw_local_irq_save(flags); rdmsr(MSR_IA32_APICBASE, l, h); l &= ~MSR_IA32_APICBASE_BASE; l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; @@ -526,7 +528,7 @@ static int lapic_resume(struct sys_devic apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); - local_irq_restore(flags); + raw_local_irq_restore(flags); return 0; } @@ -660,9 +662,14 @@ void __init init_apic_mappings(void) static void __setup_APIC_LVTT(unsigned int clocks) { unsigned int lvtt_value, tmp_value, ver; + int cpu = smp_processor_id(); ver = GET_APIC_VERSION(apic_read(APIC_LVR)); lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; + + if (cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) + lvtt_value |= APIC_LVT_MASKED; + apic_write_around(APIC_LVTT, lvtt_value); /* @@ -680,7 +687,7 @@ static void setup_APIC_timer(unsigned in { unsigned long flags; - local_irq_save(flags); + raw_local_irq_save(flags); /* For some reasons this doesn't work on Simics, so fake it for now */ if (!strstr(boot_cpu_data.x86_model_id, "Screwdriver")) { @@ -710,7 +717,7 @@ static void setup_APIC_timer(unsigned in __setup_APIC_LVTT(clocks); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -767,7 +774,7 @@ void __init setup_boot_APIC_clock (void) printk(KERN_INFO "Using local APIC timer interrupts.\n"); using_apic_timer = 1; - local_irq_disable(); + raw_local_irq_disable(); calibration_result = calibrate_APIC_clock(); /* @@ -775,17 +782,17 @@ void __init setup_boot_APIC_clock (void) */ setup_APIC_timer(calibration_result); - local_irq_enable(); + raw_local_irq_enable(); } void __cpuinit setup_secondary_APIC_clock(void) { - local_irq_disable(); /* FIXME: Do we need this? --RR */ + raw_local_irq_disable(); /* FIXME: Do we need this? --RR */ setup_APIC_timer(calibration_result); - local_irq_enable(); + raw_local_irq_enable(); } -void __cpuinit disable_APIC_timer(void) +void disable_APIC_timer(void) { if (using_apic_timer) { unsigned long v; @@ -797,7 +804,10 @@ void __cpuinit disable_APIC_timer(void) void enable_APIC_timer(void) { - if (using_apic_timer) { + int cpu = smp_processor_id(); + + if (using_apic_timer && + !cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) { unsigned long v; v = apic_read(APIC_LVTT); @@ -805,32 +815,45 @@ void enable_APIC_timer(void) } } -/* - * the frequency of the profiling timer can be changed - * by writing a multiplier value into /proc/profile. - */ -int setup_profiling_timer(unsigned int multiplier) +void switch_APIC_timer_to_ipi(void *cpumask) { - int i; + cpumask_t mask = *(cpumask_t *)cpumask; + int cpu = smp_processor_id(); - /* - * Sanity check. [at least 500 APIC cycles should be - * between APIC interrupts as a rule of thumb, to avoid - * irqs flooding us] - */ - if ( (!multiplier) || (calibration_result/multiplier < 500)) - return -EINVAL; + if (cpu_isset(cpu, mask) && + !cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) { + disable_APIC_timer(); + cpu_set(cpu, timer_interrupt_broadcast_ipi_mask); + } +} +EXPORT_SYMBOL(switch_APIC_timer_to_ipi); - /* - * Set the new multiplier for each CPU. CPUs don't start using the - * new values until the next timer interrupt in which they do process - * accounting. At that time they also adjust their APIC timers - * accordingly. - */ - for (i = 0; i < NR_CPUS; ++i) - per_cpu(prof_multiplier, i) = multiplier; +void smp_send_timer_broadcast_ipi(void) +{ + cpumask_t mask; - return 0; + cpus_and(mask, cpu_online_map, timer_interrupt_broadcast_ipi_mask); + if (!cpus_empty(mask)) { + send_IPI_mask(mask, LOCAL_TIMER_VECTOR); + } +} + +void switch_ipi_to_APIC_timer(void *cpumask) +{ + cpumask_t mask = *(cpumask_t *)cpumask; + int cpu = smp_processor_id(); + + if (cpu_isset(cpu, mask) && + cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) { + cpu_clear(cpu, timer_interrupt_broadcast_ipi_mask); + enable_APIC_timer(); + } +} +EXPORT_SYMBOL(switch_ipi_to_APIC_timer); + +int setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; } #ifdef CONFIG_X86_MCE_AMD @@ -857,32 +880,10 @@ void setup_threshold_lvt(unsigned long l void smp_local_timer_interrupt(struct pt_regs *regs) { - int cpu = smp_processor_id(); - profile_tick(CPU_PROFILING, regs); - if (--per_cpu(prof_counter, cpu) <= 0) { - /* - * The multiplier may have changed since the last time we got - * to this point as a result of the user writing to - * /proc/profile. In this case we need to adjust the APIC - * timer accordingly. - * - * Interrupts are already masked off at this point. - */ - per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu); - if (per_cpu(prof_counter, cpu) != - per_cpu(prof_old_multiplier, cpu)) { - __setup_APIC_LVTT(calibration_result/ - per_cpu(prof_counter, cpu)); - per_cpu(prof_old_multiplier, cpu) = - per_cpu(prof_counter, cpu); - } - #ifdef CONFIG_SMP - update_process_times(user_mode(regs)); + update_process_times(user_mode(regs)); #endif - } - /* * We take the 'long' return path, and there every subsystem * grabs the appropriate locks (kernel lock/ irq lock). Index: linux.prev/arch/x86_64/kernel/early_printk.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/early_printk.c +++ linux.prev/arch/x86_64/kernel/early_printk.c @@ -206,7 +206,7 @@ static int early_console_initialized = 0 void early_printk(const char *fmt, ...) { - char buf[512]; + static char buf[512]; int n; va_list ap; Index: linux.prev/arch/x86_64/kernel/entry.S =================================================================== --- linux.prev.orig/arch/x86_64/kernel/entry.S +++ linux.prev/arch/x86_64/kernel/entry.S @@ -48,6 +48,15 @@ #define retint_kernel retint_restore_args #endif +#ifdef CONFIG_CRITICAL_IRQSOFF_TIMING +# define CALL_TRACE_IRQS_ON \ + push %rbp; \ + mov %rsp, %rbp; \ + call trace_irqs_on; \ + leaveq +#else +# define CALL_TRACE_IRQS_ON +#endif /* * C code is not supposed to know about undefined top of stack. Every time * a C function with an pt_regs argument is called from the SYSCALL based @@ -230,8 +239,8 @@ sysret_check: /* edx: work, edi: workmask */ sysret_careful: CFI_RESTORE_STATE - bt $TIF_NEED_RESCHED,%edx - jnc sysret_signal + testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edx + jz sysret_signal sti pushq %rdi CFI_ADJUST_CFA_OFFSET 8 @@ -252,7 +261,7 @@ sysret_signal: leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 xorl %esi,%esi # oldset -> arg2 call ptregscall_common -1: movl $_TIF_NEED_RESCHED,%edi +1: movl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edi jmp sysret_check badsys: @@ -319,8 +328,8 @@ int_with_check: /* First do a reschedule test. */ /* edx: work, edi: workmask */ int_careful: - bt $TIF_NEED_RESCHED,%edx - jnc int_very_careful + testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edx + jz int_very_careful sti pushq %rdi CFI_ADJUST_CFA_OFFSET 8 @@ -353,7 +362,7 @@ int_signal: movq %rsp,%rdi # &ptregs -> arg1 xorl %esi,%esi # oldset -> arg2 call do_notify_resume -1: movl $_TIF_NEED_RESCHED,%edi +1: movl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edi int_restore_rest: RESTORE_REST cli @@ -554,8 +563,8 @@ bad_iret: /* edi: workmask, edx: work */ retint_careful: CFI_RESTORE_STATE - bt $TIF_NEED_RESCHED,%edx - jnc retint_signal + testl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edx + jz retint_signal sti pushq %rdi CFI_ADJUST_CFA_OFFSET 8 @@ -577,7 +586,7 @@ retint_signal: call do_notify_resume RESTORE_REST cli - movl $_TIF_NEED_RESCHED,%edi + movl $(_TIF_NEED_RESCHED|_TIF_NEED_RESCHED_DELAYED),%edi GET_THREAD_INFO(%rcx) jmp retint_check @@ -593,6 +602,7 @@ retint_kernel: bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ jnc retint_restore_args call preempt_schedule_irq + CALL_TRACE_IRQS_ON jmp exit_intr #endif CFI_ENDPROC @@ -1041,3 +1051,41 @@ ENTRY(call_softirq) CFI_ADJUST_CFA_OFFSET -8 ret CFI_ENDPROC + +#ifdef CONFIG_LATENCY_TRACE + +ENTRY(mcount) + cmpq $0, trace_enabled + jz out + + push %rbp + mov %rsp,%rbp + + push %r9 + push %r8 + push %rdi + push %rsi + push %rdx + push %rcx + push %rax + + mov 0x0(%rbp),%rax + mov 0x8(%rbp),%rdi + mov 0x8(%rax),%rsi + + call __trace + + pop %rax + pop %rcx + pop %rdx + pop %rsi + pop %rdi + pop %r8 + pop %r9 + + leaveq +out: + ret + +#endif + Index: linux.prev/arch/x86_64/kernel/genapic_flat.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/genapic_flat.c +++ linux.prev/arch/x86_64/kernel/genapic_flat.c @@ -50,8 +50,8 @@ static void flat_send_IPI_mask(cpumask_t unsigned long cfg; unsigned long flags; - local_save_flags(flags); - local_irq_disable(); + raw_local_save_flags(flags); + raw_local_irq_disable(); /* * Wait for idle. @@ -73,7 +73,7 @@ static void flat_send_IPI_mask(cpumask_t * Send the IPI. The write to APIC_ICR fires this off. */ apic_write(APIC_ICR, cfg); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void flat_send_IPI_allbutself(int vector) Index: linux.prev/arch/x86_64/kernel/i8259.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/i8259.c +++ linux.prev/arch/x86_64/kernel/i8259.c @@ -127,7 +127,7 @@ void (*interrupt[NR_IRQS])(void) = { * moves to arch independent land */ -DEFINE_SPINLOCK(i8259A_lock); +DEFINE_RAW_SPINLOCK(i8259A_lock); static void end_8259A_irq (unsigned int irq) { @@ -448,7 +448,7 @@ device_initcall(i8259A_init_sysfs); * IRQ2 is cascade interrupt to second interrupt controller */ -static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL}; +static struct irqaction irq2 = { no_action, SA_NODELAY, CPU_MASK_NONE, "cascade", NULL, NULL}; void __init init_ISA_irqs (void) { Index: linux.prev/arch/x86_64/kernel/init_task.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/init_task.c +++ linux.prev/arch/x86_64/kernel/init_task.c @@ -10,8 +10,8 @@ #include #include -static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; +static struct fs_struct init_fs = INIT_FS(init_fs); +static struct files_struct init_files = INIT_FILES(init_files); static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); Index: linux.prev/arch/x86_64/kernel/io_apic.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/io_apic.c +++ linux.prev/arch/x86_64/kernel/io_apic.c @@ -46,7 +46,7 @@ static int no_timer_check; int disable_timer_pin_1 __initdata; -static DEFINE_SPINLOCK(ioapic_lock); +static DEFINE_RAW_SPINLOCK(ioapic_lock); /* * # of IRQ routing registers @@ -94,6 +94,9 @@ int vector_irq[NR_VECTORS] __read_mostly reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ reg ACTION; \ io_apic_modify(entry->apic, reg); \ + /* Force POST flush by reading: */ \ + reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ + \ if (!entry->next) \ break; \ entry = irq_2_pin + entry->next; \ @@ -160,10 +163,8 @@ static void add_pin_to_irq(unsigned int static void name##_IO_APIC_irq (unsigned int irq) \ __DO_ACTION(R, ACTION, FINAL) -DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) ) - /* mask = 1 */ -DO_ACTION( __unmask, 0, &= 0xfffeffff, ) - /* mask = 0 */ +DO_ACTION( __mask, 0, |= 0x00010000, ) /* mask = 1 */ +DO_ACTION( __unmask, 0, &= 0xfffeffff, ) /* mask = 0 */ static void mask_IO_APIC_irq (unsigned int irq) { @@ -1338,7 +1339,7 @@ static int __init timer_irq_works(void) { unsigned long t1 = jiffies; - local_irq_enable(); + raw_local_irq_enable(); /* Let ten ticks pass... */ mdelay((10 * 1000) / HZ); @@ -1431,12 +1432,50 @@ static unsigned int startup_level_ioapic return 0; /* don't check for pending */ } +/* + * In the preemptible case mask the IRQ first then handle it and ack it. + * + * (In the non-preemptible case we keep the IRQ unacked in the local APIC + * and dont need to do the masking, because the code executes atomically.) + */ +#ifdef CONFIG_PREEMPT_HARDIRQS + +static void mask_and_ack_level_ioapic_irq(unsigned int irq) +{ + move_irq(irq); + mask_IO_APIC_irq(irq); + ack_APIC_irq(); +} + +static void end_level_ioapic_irq(unsigned int irq) +{ + if (!(irq_desc[irq].status & IRQ_INPROGRESS)) + unmask_IO_APIC_irq(irq); +} + +static void enable_level_ioapic_irq(unsigned int irq) +{ + unmask_IO_APIC_irq(irq); +} + +#else /* !CONFIG_PREEMPT_HARDIRQS */ + +static void mask_and_ack_level_ioapic_irq(unsigned int irq) +{ +} + static void end_level_ioapic_irq (unsigned int irq) { move_irq(irq); ack_APIC_irq(); } +static void enable_level_ioapic_irq(unsigned int irq) +{ + unmask_IO_APIC_irq(irq); +} +#endif /* !CONFIG_PREEMPT_HARDIRQS */ + #ifdef CONFIG_PCI_MSI static unsigned int startup_edge_ioapic_vector(unsigned int vector) { @@ -1460,6 +1499,13 @@ static unsigned int startup_level_ioapic return startup_level_ioapic_irq (irq); } +static void mask_and_ack_level_ioapic_vector (unsigned int vector) +{ + int irq = vector_to_irq(vector); + + mask_and_ack_level_ioapic_irq(irq); +} + static void end_level_ioapic_vector (unsigned int vector) { int irq = vector_to_irq(vector); @@ -1468,6 +1514,11 @@ static void end_level_ioapic_vector (uns end_level_ioapic_irq(irq); } +static void enable_level_ioapic_vector(unsigned int vector) +{ + enable_level_ioapic_irq(vector_to_irq(vector)); +} + static void mask_IO_APIC_vector (unsigned int vector) { int irq = vector_to_irq(vector); Index: linux.prev/arch/x86_64/kernel/irq.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/irq.c +++ linux.prev/arch/x86_64/kernel/irq.c @@ -129,9 +129,9 @@ void fixup_irqs(cpumask_t map) } /* That doesn't seem sufficient. Give it 1ms. */ - local_irq_enable(); + raw_local_irq_enable(); mdelay(1); - local_irq_disable(); + raw_local_irq_disable(); } #endif @@ -145,11 +145,11 @@ asmlinkage void do_softirq(void) if (in_interrupt()) return; - local_irq_save(flags); + raw_local_irq_save(flags); pending = local_softirq_pending(); /* Switch to interrupt stack */ if (pending) call_softirq(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL(do_softirq); Index: linux.prev/arch/x86_64/kernel/machine_kexec.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/machine_kexec.c +++ linux.prev/arch/x86_64/kernel/machine_kexec.c @@ -190,7 +190,7 @@ NORET_TYPE void machine_kexec(struct kim relocate_new_kernel_t rnk; /* Interrupts aren't acceptable while we reboot */ - local_irq_disable(); + raw_local_irq_disable(); /* Calculate the offsets */ page_list = image->head; Index: linux.prev/arch/x86_64/kernel/nmi.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/nmi.c +++ linux.prev/arch/x86_64/kernel/nmi.c @@ -43,7 +43,7 @@ * This is maintained separately from nmi_active because the NMI * watchdog may also be driven from the I/O APIC timer. */ -static DEFINE_SPINLOCK(lapic_nmi_owner_lock); +static DEFINE_RAW_SPINLOCK(lapic_nmi_owner_lock); static unsigned int lapic_nmi_owner; #define LAPIC_NMI_WATCHDOG (1<<0) #define LAPIC_NMI_RESERVED (1<<1) @@ -127,7 +127,7 @@ void __cpuinit nmi_watchdog_default(void static __init void nmi_cpu_busy(void *data) { volatile int *endflag = data; - local_irq_enable(); + raw_local_irq_enable(); /* Intentionally don't use cpu_relax here. This is to make sure that the performance counter really ticks, even if there is a simulator or similar that catches the @@ -156,7 +156,7 @@ int __init check_nmi_watchdog (void) for (cpu = 0; cpu < NR_CPUS; cpu++) counts[cpu] = cpu_pda[cpu].__nmi_count; - local_irq_enable(); + raw_local_irq_enable(); mdelay((10*1000)/nmi_hz); // wait 10 ticks for (cpu = 0; cpu < NR_CPUS; cpu++) { @@ -466,12 +466,42 @@ void touch_nmi_watchdog (void) touch_softlockup_watchdog(); } +int nmi_show_regs[NR_CPUS]; + +void nmi_show_all_regs(void) +{ + int i; + + if (nmi_watchdog == NMI_NONE) + return; + if (system_state != SYSTEM_RUNNING) { + printk("nmi_show_all_regs(): system state %d, not doing.\n", + system_state); + return; + } + + for_each_online_cpu(i) + nmi_show_regs[i] = 1; + for_each_online_cpu(i) + while (nmi_show_regs[i] == 1) + barrier(); +} + +static DEFINE_RAW_SPINLOCK(nmi_print_lock); + void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) { int sum; int touched = 0; + int cpu = safe_smp_processor_id(); sum = read_pda(apic_timer_irqs); + if (nmi_show_regs[cpu]) { + nmi_show_regs[cpu] = 0; + spin_lock(&nmi_print_lock); + show_regs(regs); + spin_unlock(&nmi_print_lock); + } if (__get_cpu_var(nmi_touch)) { __get_cpu_var(nmi_touch) = 0; touched = 1; @@ -483,6 +513,11 @@ void nmi_watchdog_tick (struct pt_regs * */ local_inc(&__get_cpu_var(alert_counter)); if (local_read(&__get_cpu_var(alert_counter)) == 5*nmi_hz) { + int i; + + for (i = 0; i < NR_CPUS; i++) + nmi_show_regs[i] = 1; + if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) { local_set(&__get_cpu_var(alert_counter), 0); Index: linux.prev/arch/x86_64/kernel/pmtimer.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/pmtimer.c +++ /dev/null @@ -1,101 +0,0 @@ -/* Ported over from i386 by AK, original copyright was: - * - * (C) Dominik Brodowski 2003 - * - * Driver to use the Power Management Timer (PMTMR) available in some - * southbridges as primary timing source for the Linux kernel. - * - * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c, - * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4. - * - * This file is licensed under the GPL v2. - * - * Dropped all the hardware bug workarounds for now. Hopefully they - * are not needed on 64bit chipsets. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* The I/O port the PMTMR resides at. - * The location is detected during setup_arch(), - * in arch/i386/kernel/acpi/boot.c */ -u32 pmtmr_ioport; - -/* value of the Power timer at last timer interrupt */ -static u32 offset_delay; -static u32 last_pmtmr_tick; - -#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ - -static inline u32 cyc2us(u32 cycles) -{ - /* The Power Management Timer ticks at 3.579545 ticks per microsecond. - * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%] - * - * Even with HZ = 100, delta is at maximum 35796 ticks, so it can - * easily be multiplied with 286 (=0x11E) without having to fear - * u32 overflows. - */ - cycles *= 286; - return (cycles >> 10); -} - -int pmtimer_mark_offset(void) -{ - static int first_run = 1; - unsigned long tsc; - u32 lost; - - u32 tick = inl(pmtmr_ioport); - u32 delta; - - delta = cyc2us((tick - last_pmtmr_tick) & ACPI_PM_MASK); - - last_pmtmr_tick = tick; - monotonic_base += delta * NSEC_PER_USEC; - - delta += offset_delay; - - lost = delta / (USEC_PER_SEC / HZ); - offset_delay = delta % (USEC_PER_SEC / HZ); - - rdtscll(tsc); - vxtime.last_tsc = tsc - offset_delay * cpu_khz; - - /* don't calculate delay for first run, - or if we've got less then a tick */ - if (first_run || (lost < 1)) { - first_run = 0; - offset_delay = 0; - } - - return lost - 1; -} - -unsigned int do_gettimeoffset_pm(void) -{ - u32 now, offset, delta = 0; - - offset = last_pmtmr_tick; - now = inl(pmtmr_ioport); - delta = (now - offset) & ACPI_PM_MASK; - - return offset_delay + cyc2us(delta); -} - - -static int __init nopmtimer_setup(char *s) -{ - pmtmr_ioport = 0; - return 0; -} - -__setup("nopmtimer", nopmtimer_setup); Index: linux.prev/arch/x86_64/kernel/process.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/process.c +++ linux.prev/arch/x86_64/kernel/process.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -60,6 +61,12 @@ static atomic_t hlt_counter = ATOMIC_INI unsigned long boot_option_idle_override = 0; EXPORT_SYMBOL(boot_option_idle_override); +DEFINE_SPINLOCK(pm_idle_switch_lock); +EXPORT_SYMBOL_GPL(pm_idle_switch_lock); + +int pm_idle_locked = 0; +EXPORT_SYMBOL_GPL(pm_idle_locked); + /* * Powermanagement idle function, if any.. */ @@ -86,21 +93,21 @@ EXPORT_SYMBOL(enable_hlt); */ void default_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); if (!atomic_read(&hlt_counter)) { clear_thread_flag(TIF_POLLING_NRFLAG); smp_mb__after_clear_bit(); - while (!need_resched()) { - local_irq_disable(); - if (!need_resched()) - safe_halt(); + while (!need_resched() && !need_resched_delayed()) { + raw_local_irq_disable(); + if (!need_resched() && !need_resched_delayed()) + raw_safe_halt(); else - local_irq_enable(); + raw_local_irq_enable(); } set_thread_flag(TIF_POLLING_NRFLAG); } else { - while (!need_resched()) + while (!need_resched() && !need_resched_delayed()) cpu_relax(); } } @@ -110,9 +117,9 @@ void default_idle(void) * to poll the ->need_resched flag instead of waiting for the * cross-CPU IPI to arrive. Use this option with caution. */ -static void poll_idle (void) +void poll_idle (void) { - local_irq_enable(); + raw_local_irq_enable(); asm volatile( "2:" @@ -188,7 +195,9 @@ void cpu_idle (void) /* endless idle loop with no priority at all */ while (1) { - while (!need_resched()) { + BUG_ON(raw_irqs_disabled()); + + while (!need_resched() && !need_resched_delayed()) { void (*idle)(void); if (__get_cpu_var(cpu_idle_state)) @@ -200,12 +209,15 @@ void cpu_idle (void) idle = default_idle; if (cpu_is_offline(smp_processor_id())) play_dead(); + stop_critical_timing(); + propagate_preempt_locks_value(); idle(); } - - preempt_enable_no_resched(); - schedule(); + raw_local_irq_disable(); + __preempt_enable_no_resched(); + __schedule(); preempt_disable(); + raw_local_irq_enable(); } } @@ -218,12 +230,12 @@ void cpu_idle (void) */ static void mwait_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); - while (!need_resched()) { + while (!need_resched() && !need_resched_delayed()) { __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); - if (need_resched()) + if (need_resched() || need_resched_delayed()) break; __mwait(0, 0); } @@ -314,7 +326,7 @@ void show_regs(struct pt_regs *regs) { printk("CPU %d:", smp_processor_id()); __show_regs(regs); - show_trace(®s->rsp); + show_trace(current, ®s->rsp); } /* @@ -333,13 +345,14 @@ void exit_thread(void) kprobe_flush_task(me); if (me->thread.io_bitmap_ptr) { - struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); + struct tss_struct *tss; kfree(t->io_bitmap_ptr); t->io_bitmap_ptr = NULL; /* * Careful, clear this in the TSS too: */ + tss = &per_cpu(init_tss, get_cpu()); memset(tss->io_bitmap, 0xff, t->io_bitmap_max); t->io_bitmap_max = 0; put_cpu(); Index: linux.prev/arch/x86_64/kernel/reboot.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/reboot.c +++ linux.prev/arch/x86_64/kernel/reboot.c @@ -99,7 +99,7 @@ void machine_shutdown(void) smp_send_stop(); #endif - local_irq_save(flags); + raw_local_irq_save(flags); #ifndef CONFIG_SMP disable_local_APIC(); @@ -107,7 +107,7 @@ void machine_shutdown(void) disable_IO_APIC(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } void machine_emergency_restart(void) Index: linux.prev/arch/x86_64/kernel/setup.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/setup.c +++ linux.prev/arch/x86_64/kernel/setup.c @@ -993,6 +993,7 @@ static void __cpuinit init_intel(struct c->x86_cache_alignment = c->x86_clflush_size * 2; if (c->x86 >= 15) set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); + set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); c->x86_max_cores = intel_num_cpu_cores(c); srat_detect_node(); Index: linux.prev/arch/x86_64/kernel/signal.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/signal.c +++ linux.prev/arch/x86_64/kernel/signal.c @@ -434,6 +434,13 @@ int do_signal(struct pt_regs *regs, sigs siginfo_t info; int signr; +#ifdef CONFIG_PREEMPT_RT + /* + * Fully-preemptible kernel does not need interrupts disabled: + */ + raw_local_irq_enable(); + preempt_check_resched(); +#endif /* * We want the common case to go fast, which * is why we may in certain cases get here from Index: linux.prev/arch/x86_64/kernel/smp.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/smp.c +++ linux.prev/arch/x86_64/kernel/smp.c @@ -297,10 +297,20 @@ void smp_send_reschedule(int cpu) } /* + * this function sends a 'reschedule' IPI to all other CPUs. + * This is used when RT tasks are starving and other CPUs + * might be able to run them: + */ +void smp_send_reschedule_allbutself(void) +{ + send_IPI_allbutself(RESCHEDULE_VECTOR); +} + +/* * Structure and data for smp_call_function(). This is designed to minimise * static memory requirements. It also looks cleaner. */ -static DEFINE_SPINLOCK(call_lock); +static DEFINE_RAW_SPINLOCK(call_lock); struct call_data_struct { void (*func) (void *info); @@ -455,9 +465,9 @@ void smp_stop_cpu(void) * Remove this CPU: */ cpu_clear(smp_processor_id(), cpu_online_map); - local_irq_save(flags); + raw_local_irq_save(flags); disable_local_APIC(); - local_irq_restore(flags); + raw_local_irq_restore(flags); } static void smp_really_stop_cpu(void *dummy) @@ -481,9 +491,9 @@ void smp_send_stop(void) if (!nolock) spin_unlock(&call_lock); - local_irq_disable(); + raw_local_irq_disable(); disable_local_APIC(); - local_irq_enable(); + raw_local_irq_enable(); } /* Index: linux.prev/arch/x86_64/kernel/smpboot.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/smpboot.c +++ linux.prev/arch/x86_64/kernel/smpboot.c @@ -200,7 +200,7 @@ static void __cpuinit smp_store_cpu_info latency and low latency is the primary objective here. -AK */ #define no_cpu_relax() barrier() -static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock); +static __cpuinitdata __DEFINE_RAW_SPINLOCK(tsc_sync_lock); static volatile __cpuinitdata unsigned long go[SLAVE + 1]; static int notscsync __cpuinitdata; @@ -216,7 +216,7 @@ static __cpuinit void sync_master(void * go[MASTER] = 0; - local_irq_save(flags); + raw_local_irq_save(flags); { for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) { while (!go[MASTER]) @@ -225,7 +225,7 @@ static __cpuinit void sync_master(void * rdtscll(go[SLAVE]); } } - local_irq_restore(flags); + raw_local_irq_restore(flags); } /* @@ -335,7 +335,13 @@ static __cpuinit void sync_tsc(unsigned static void __cpuinit tsc_sync_wait(void) { - if (notscsync || !cpu_has_tsc) + /* + * When the CPU has synchronized TSCs assume the BIOS + * or the hardware already synced. Otherwise we could + * mess up a possible perfect synchronization with a + * not-quite-perfect algorithm. + */ + if (notscsync || !cpu_has_tsc || !unsynchronized_tsc()) return; sync_tsc(0); } @@ -1080,7 +1086,7 @@ int __cpuinit __cpu_up(unsigned int cpu) int err; int apicid = cpu_present_to_apicid(cpu); - WARN_ON(irqs_disabled()); + WARN_ON(raw_irqs_disabled()); Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu); Index: linux.prev/arch/x86_64/kernel/switch2poll.c =================================================================== --- /dev/null +++ linux.prev/arch/x86_64/kernel/switch2poll.c @@ -0,0 +1,112 @@ +#include +#include +#include +#include +#include + +extern void poll_idle (void); + +#define KERNEL_ATTR_RW(_name) \ +static struct subsys_attribute _name##_attr = \ + __ATTR(_name, 0644, _name##_show, _name##_store) + +static struct idlep_kobject +{ + struct kobject kobj; + int is_poll; + void (*idle)(void); +} idle_kobj; + +static ssize_t idle_poll_show(struct subsystem *subsys, char *page) +{ + return sprintf(page, "%s\n", (idle_kobj.is_poll ? "on" : "off")); +} + +static ssize_t idle_poll_store(struct subsystem *subsys, + const char *buf, size_t len) +{ + unsigned long flags; + + spin_lock_irqsave(&pm_idle_switch_lock, flags); + + /* + * If power management is handling the idle function, + * then leave it be. + */ + if (pm_idle_locked) { + len = -EBUSY; + goto out; + } + + if (strncmp(buf,"1",1)==0 || + (len >=2 && strncmp(buf,"on",2)==0)) { + if (idle_kobj.is_poll != 1) { + idle_kobj.is_poll = 1; + boot_option_idle_override = 1; + idle_kobj.idle = pm_idle; + pm_idle = poll_idle; + } + } else if (strncmp(buf,"0",1)==0 || + (len >= 3 && strncmp(buf,"off",3)==0)) { + if (idle_kobj.is_poll != 0) { + boot_option_idle_override = 0; + idle_kobj.is_poll = 0; + pm_idle = idle_kobj.idle; + } + } + +out: + spin_unlock_irqrestore(&pm_idle_switch_lock, flags); + + return len; +} + + +KERNEL_ATTR_RW(idle_poll); + +static struct attribute * idle_attrs[] = { + &idle_poll_attr.attr, + NULL +}; + +static struct attribute_group idle_attr_group = { + .attrs = idle_attrs, +}; + +static int __init idle_poll_set_init(void) +{ + int err; + + /* + * If the default is alread poll_idle then + * don't even bother with this. + */ + if (pm_idle == poll_idle) + return 0; + + memset(&idle_kobj, 0, sizeof(idle_kobj)); + + idle_kobj.is_poll = 0; + idle_kobj.idle = pm_idle; + + err = kobject_set_name(&idle_kobj.kobj, "%s", "idle"); + if (err) + goto out; + + idle_kobj.kobj.parent = &kernel_subsys.kset.kobj; + err = kobject_register(&idle_kobj.kobj); + if (err) + goto out; + + err = sysfs_create_group(&idle_kobj.kobj, + &idle_attr_group); + if (err) + goto out; + + return 0; +out: + printk(KERN_INFO "Problem setting up sysfs idle_poll\n"); + return 0; +} + +late_initcall(idle_poll_set_init); Index: linux.prev/arch/x86_64/kernel/time.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/time.c +++ linux.prev/arch/x86_64/kernel/time.c @@ -26,6 +26,7 @@ #include #include #include + #ifdef CONFIG_ACPI #include /* for PM timer frequency */ #endif @@ -38,25 +39,26 @@ #include #include #include +#include +#include #ifdef CONFIG_X86_LOCAL_APIC #include #endif -#ifdef CONFIG_CPU_FREQ -static void cpufreq_delayed_get(void); -#endif extern void i8254_timer_resume(void); extern int using_apic_timer; -DEFINE_SPINLOCK(rtc_lock); -DEFINE_SPINLOCK(i8253_lock); +DEFINE_RAW_SPINLOCK(rtc_lock); +DEFINE_RAW_SPINLOCK(i8253_lock); static int nohpet __initdata = 0; static int notsc __initdata = 0; #undef HPET_HACK_ENABLE_DANGEROUS -unsigned int cpu_khz; /* TSC clocks / usec, not used here */ +unsigned int cpu_khz; /* CPU clocks / usec, not used here */ +unsigned int tsc_khz; /* TSC clocks / usec, not used here */ +unsigned long hpet_address; static unsigned long hpet_period; /* fsecs / HPET clock */ unsigned long hpet_tick; /* HPET clocks / interrupt */ static int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */ @@ -79,107 +81,6 @@ static inline void rdtscll_sync(unsigned rdtscll(*tsc); } -/* - * do_gettimeoffset() returns microseconds since last timer interrupt was - * triggered by hardware. A memory read of HPET is slower than a register read - * of TSC, but much more reliable. It's also synchronized to the timer - * interrupt. Note that do_gettimeoffset() may return more than hpet_tick, if a - * timer interrupt has happened already, but vxtime.trigger wasn't updated yet. - * This is not a problem, because jiffies hasn't updated either. They are bound - * together by xtime_lock. - */ - -static inline unsigned int do_gettimeoffset_tsc(void) -{ - unsigned long t; - unsigned long x; - rdtscll_sync(&t); - if (t < vxtime.last_tsc) t = vxtime.last_tsc; /* hack */ - x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32; - return x; -} - -static inline unsigned int do_gettimeoffset_hpet(void) -{ - /* cap counter read to one tick to avoid inconsistencies */ - unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last; - return (min(counter,hpet_tick) * vxtime.quot) >> 32; -} - -unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc; - -/* - * This version of gettimeofday() has microsecond resolution and better than - * microsecond precision, as we're using at least a 10 MHz (usually 14.31818 - * MHz) HPET timer. - */ - -void do_gettimeofday(struct timeval *tv) -{ - unsigned long seq, t; - unsigned int sec, usec; - - do { - seq = read_seqbegin(&xtime_lock); - - sec = xtime.tv_sec; - usec = xtime.tv_nsec / 1000; - - /* i386 does some correction here to keep the clock - monotonous even when ntpd is fixing drift. - But they didn't work for me, there is a non monotonic - clock anyways with ntp. - I dropped all corrections now until a real solution can - be found. Note when you fix it here you need to do the same - in arch/x86_64/kernel/vsyscall.c and export all needed - variables in vmlinux.lds. -AK */ - - t = (jiffies - wall_jiffies) * (1000000L / HZ) + - do_gettimeoffset(); - usec += t; - - } while (read_seqretry(&xtime_lock, seq)); - - tv->tv_sec = sec + usec / 1000000; - tv->tv_usec = usec % 1000000; -} - -EXPORT_SYMBOL(do_gettimeofday); - -/* - * settimeofday() first undoes the correction that gettimeofday would do - * on the time, and then saves it. This is ugly, but has been like this for - * ages already. - */ - -int do_settimeofday(struct timespec *tv) -{ - time_t wtm_sec, sec = tv->tv_sec; - long wtm_nsec, nsec = tv->tv_nsec; - - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) - return -EINVAL; - - write_seqlock_irq(&xtime_lock); - - nsec -= do_gettimeoffset() * 1000 + - (jiffies - wall_jiffies) * (NSEC_PER_SEC/HZ); - - wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec); - wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec); - - set_normalized_timespec(&xtime, sec, nsec); - set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - - ntp_clear(); - - write_sequnlock_irq(&xtime_lock); - clock_was_set(); - return 0; -} - -EXPORT_SYMBOL(do_settimeofday); - unsigned long profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); @@ -259,8 +160,8 @@ static void set_rtc_mmss(unsigned long n #endif { - BIN_TO_BCD(real_seconds); - BIN_TO_BCD(real_minutes); + BIN_TO_BCD(real_seconds); + BIN_TO_BCD(real_minutes); CMOS_WRITE(real_seconds, RTC_SECONDS); CMOS_WRITE(real_minutes, RTC_MINUTES); } @@ -279,90 +180,8 @@ static void set_rtc_mmss(unsigned long n spin_unlock(&rtc_lock); } - -/* monotonic_clock(): returns # of nanoseconds passed since time_init() - * Note: This function is required to return accurate - * time even in the absence of multiple timer ticks. - */ -unsigned long long monotonic_clock(void) -{ - unsigned long seq; - u32 last_offset, this_offset, offset; - unsigned long long base; - - if (vxtime.mode == VXTIME_HPET) { - do { - seq = read_seqbegin(&xtime_lock); - - last_offset = vxtime.last; - base = monotonic_base; - this_offset = hpet_readl(HPET_COUNTER); - - } while (read_seqretry(&xtime_lock, seq)); - offset = (this_offset - last_offset); - offset *=(NSEC_PER_SEC/HZ)/hpet_tick; - return base + offset; - }else{ - do { - seq = read_seqbegin(&xtime_lock); - - last_offset = vxtime.last_tsc; - base = monotonic_base; - } while (read_seqretry(&xtime_lock, seq)); - sync_core(); - rdtscll(this_offset); - offset = (this_offset - last_offset)*1000/cpu_khz; - return base + offset; - } - - -} -EXPORT_SYMBOL(monotonic_clock); - -static noinline void handle_lost_ticks(int lost, struct pt_regs *regs) -{ - static long lost_count; - static int warned; - - if (report_lost_ticks) { - printk(KERN_WARNING "time.c: Lost %d timer " - "tick(s)! ", lost); - print_symbol("rip %s)\n", regs->rip); - } - - if (lost_count == 1000 && !warned) { - printk(KERN_WARNING - "warning: many lost ticks.\n" - KERN_WARNING "Your time source seems to be instable or " - "some driver is hogging interupts\n"); - print_symbol("rip %s\n", regs->rip); - if (vxtime.mode == VXTIME_TSC && vxtime.hpet_address) { - printk(KERN_WARNING "Falling back to HPET\n"); - vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; - vxtime.mode = VXTIME_HPET; - do_gettimeoffset = do_gettimeoffset_hpet; - } - /* else should fall back to PIT, but code missing. */ - warned = 1; - } else - lost_count++; - -#ifdef CONFIG_CPU_FREQ - /* In some cases the CPU can change frequency without us noticing - (like going into thermal throttle) - Give cpufreq a change to catch up. */ - if ((lost_count+1) % 25 == 0) { - cpufreq_delayed_get(); - } -#endif -} - static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) { - static unsigned long rtc_update = 0; - unsigned long tsc; - int delay, offset = 0, lost = 0; - /* * Here we are in the timer irq handler. We have irqs locally disabled (so we * don't need spin_lock_irqsave()) but we don't know if the timer_bh is running @@ -372,67 +191,6 @@ static irqreturn_t timer_interrupt(int i write_seqlock(&xtime_lock); - if (vxtime.hpet_address) - offset = hpet_readl(HPET_COUNTER); - - if (hpet_use_timer) { - /* if we're using the hpet timer functionality, - * we can more accurately know the counter value - * when the timer interrupt occured. - */ - offset = hpet_readl(HPET_T0_CMP) - hpet_tick; - delay = hpet_readl(HPET_COUNTER) - offset; - } else { - spin_lock(&i8253_lock); - outb_p(0x00, 0x43); - delay = inb_p(0x40); - delay |= inb(0x40) << 8; - spin_unlock(&i8253_lock); - delay = LATCH - 1 - delay; - } - - rdtscll_sync(&tsc); - - if (vxtime.mode == VXTIME_HPET) { - if (offset - vxtime.last > hpet_tick) { - lost = (offset - vxtime.last) / hpet_tick - 1; - } - - monotonic_base += - (offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick; - - vxtime.last = offset; -#ifdef CONFIG_X86_PM_TIMER - } else if (vxtime.mode == VXTIME_PMTMR) { - lost = pmtimer_mark_offset(); -#endif - } else { - offset = (((tsc - vxtime.last_tsc) * - vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ); - - if (offset < 0) - offset = 0; - - if (offset > (USEC_PER_SEC / HZ)) { - lost = offset / (USEC_PER_SEC / HZ); - offset %= (USEC_PER_SEC / HZ); - } - - monotonic_base += (tsc - vxtime.last_tsc)*1000000/cpu_khz ; - - vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot; - - if ((((tsc - vxtime.last_tsc) * - vxtime.tsc_quot) >> 32) < offset) - vxtime.last_tsc = tsc - - (((long) offset << 32) / vxtime.tsc_quot) - 1; - } - - if (lost > 0) { - handle_lost_ticks(lost, regs); - jiffies += lost; - } - /* * Do the timer stuff. */ @@ -455,22 +213,13 @@ static irqreturn_t timer_interrupt(int i smp_local_timer_interrupt(regs); #endif -/* - * If we have an externally synchronized Linux clock, then update CMOS clock - * accordingly every ~11 minutes. set_rtc_mmss() will be called in the jiffy - * closest to exactly 500 ms before the next second. If the update fails, we - * don't care, as it'll be updated on the next turn, and the problem (time way - * off) isn't likely to go away much sooner anyway. - */ - - if (ntp_synced() && xtime.tv_sec > rtc_update && - abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) { - set_rtc_mmss(xtime.tv_sec); - rtc_update = xtime.tv_sec + 660; - } - write_sequnlock(&xtime_lock); +#ifdef CONFIG_X86_LOCAL_APIC + if (using_apic_timer) + smp_send_timer_broadcast_ipi(); +#endif + return IRQ_HANDLED; } @@ -509,10 +258,23 @@ unsigned long long sched_clock(void) return cycles_2_ns(a); } +static int tsc_unstable; + +static inline int check_tsc_unstable(void) +{ + return tsc_unstable; +} + +void mark_tsc_unstable(void) +{ + tsc_unstable = 1; +} +EXPORT_SYMBOL_GPL(mark_tsc_unstable); + unsigned long get_cmos_time(void) { - unsigned int timeout, year, mon, day, hour, min, sec; - unsigned char last, this; + unsigned int timeout = 1000000, year, mon, day, hour, min, sec; + unsigned char uip = 0, this = 0; unsigned long flags; /* @@ -525,50 +287,70 @@ unsigned long get_cmos_time(void) spin_lock_irqsave(&rtc_lock, flags); - timeout = 1000000; - last = this = 0; - - while (timeout && last && !this) { - last = this; + while (timeout && (!uip || this)) { + uip |= this; this = CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP; timeout--; } -/* - * Here we are safe to assume the registers won't change for a whole second, so - * we just go ahead and read them. - */ - - sec = CMOS_READ(RTC_SECONDS); - min = CMOS_READ(RTC_MINUTES); - hour = CMOS_READ(RTC_HOURS); - day = CMOS_READ(RTC_DAY_OF_MONTH); - mon = CMOS_READ(RTC_MONTH); - year = CMOS_READ(RTC_YEAR); + /* + * Here we are safe to assume the registers won't change for a whole + * second, so we just go ahead and read them. + */ + sec = CMOS_READ(RTC_SECONDS); + min = CMOS_READ(RTC_MINUTES); + hour = CMOS_READ(RTC_HOURS); + day = CMOS_READ(RTC_DAY_OF_MONTH); + mon = CMOS_READ(RTC_MONTH); + year = CMOS_READ(RTC_YEAR); spin_unlock_irqrestore(&rtc_lock, flags); -/* - * We know that x86-64 always uses BCD format, no need to check the config - * register. - */ - - BCD_TO_BIN(sec); - BCD_TO_BIN(min); - BCD_TO_BIN(hour); - BCD_TO_BIN(day); - BCD_TO_BIN(mon); - BCD_TO_BIN(year); + /* + * We know that x86-64 always uses BCD format, no need to check the + * config register. + */ + + BCD_TO_BIN(sec); + BCD_TO_BIN(min); + BCD_TO_BIN(hour); + BCD_TO_BIN(day); + BCD_TO_BIN(mon); + BCD_TO_BIN(year); -/* - * x86-64 systems only exists since 2002. - * This will work up to Dec 31, 2100 - */ + /* + * x86-64 systems only exists since 2002. + * This will work up to Dec 31, 2100 + */ year += 2000; return mktime(year, mon, day, hour, min, sec); } +/* arch specific timeofday hooks: */ +u64 read_persistent_clock(void) +{ + return (u64)get_cmos_time() * NSEC_PER_SEC; +} + +void sync_persistent_clock(struct timespec ts) +{ + static unsigned long rtc_update = 0; + /* + * If we have an externally synchronized Linux clock, then update + * CMOS clock accordingly every ~11 minutes. set_rtc_mmss() will + * be called in the jiffy closest to exactly 500 ms before the + * next second. If the update fails, we don't care, as it'll be + * updated on the next turn, and the problem (time way off) isn't + * likely to go away much sooner anyway. + */ + if (ts.tv_sec > rtc_update && + abs(ts.tv_nsec - 500000000) <= tick_nsec / 2) { + set_rtc_mmss(xtime.tv_sec); + rtc_update = xtime.tv_sec + 660; + } +} + #ifdef CONFIG_CPU_FREQ /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency @@ -596,23 +378,6 @@ static void handle_cpufreq_delayed_get(v cpufreq_delayed_issched = 0; } -/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries - * to verify the CPU frequency the timing core thinks the CPU is running - * at is still correct. - */ -static void cpufreq_delayed_get(void) -{ - static int warned; - if (cpufreq_init && !cpufreq_delayed_issched) { - cpufreq_delayed_issched = 1; - if (!warned) { - warned = 1; - printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n"); - } - schedule_work(&cpufreq_delayed_get_work); - } -} - static unsigned int ref_freq = 0; static unsigned long loops_per_jiffy_ref = 0; @@ -647,8 +412,11 @@ static int time_cpufreq_notifier(struct cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new); - if (!(freq->flags & CPUFREQ_CONST_LOOPS)) + if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { vxtime.tsc_quot = (1000L << 32) / cpu_khz; + tsc_khz = cpu_khz; + } + } set_cyc2ns_scale(cpu_khz_ref); @@ -686,18 +454,17 @@ static unsigned int __init hpet_calibrat int tsc_now, hpet_now; unsigned long flags; - local_irq_save(flags); - local_irq_disable(); + raw_local_irq_save(flags); + raw_local_irq_disable(); hpet_start = hpet_readl(HPET_COUNTER); rdtscl(tsc_start); do { - local_irq_disable(); + raw_local_irq_disable(); hpet_now = hpet_readl(HPET_COUNTER); - sync_core(); - rdtscl(tsc_now); - local_irq_restore(flags); + tsc_now = get_cycles_sync(); + raw_local_irq_restore(flags); } while ((tsc_now - tsc_start) < TICK_COUNT && (hpet_now - hpet_start) < TICK_COUNT); @@ -726,11 +493,9 @@ static unsigned int __init pit_calibrate outb(0xb0, 0x43); outb((PIT_TICK_RATE / (1000 / 50)) & 0xff, 0x42); outb((PIT_TICK_RATE / (1000 / 50)) >> 8, 0x42); - rdtscll(start); - sync_core(); + start = get_cycles_sync(); while ((inb(0x61) & 0x20) == 0); - sync_core(); - rdtscll(end); + end = get_cycles_sync(); spin_unlock_irqrestore(&i8253_lock, flags); @@ -744,7 +509,7 @@ static __init int late_hpet_init(void) unsigned int ntimer; if (!vxtime.hpet_address) - return -1; + return -1; memset(&hd, 0, sizeof (hd)); @@ -875,11 +640,9 @@ int __init time_setup(char *str) } static struct irqaction irq0 = { - timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL + timer_interrupt, SA_INTERRUPT | SA_NODELAY, CPU_MASK_NONE, "timer", NULL, NULL }; -extern void __init config_acpi_tables(void); - void __init time_init(void) { char *timename; @@ -914,18 +677,12 @@ void __init time_init(void) if (hpet_use_timer) { cpu_khz = hpet_calibrate_tsc(); timename = "HPET"; -#ifdef CONFIG_X86_PM_TIMER - } else if (pmtmr_ioport && !vxtime.hpet_address) { - vxtime_hz = PM_TIMER_FREQUENCY; - timename = "PM"; - pit_init(); - cpu_khz = pit_calibrate_tsc(); -#endif } else { pit_init(); cpu_khz = pit_calibrate_tsc(); timename = "PIT"; } + tsc_khz = cpu_khz; printk(KERN_INFO "time.c: Using %ld.%06ld MHz %s timer.\n", vxtime_hz / 1000000, vxtime_hz % 1000000, timename); @@ -934,7 +691,7 @@ void __init time_init(void) vxtime.mode = VXTIME_TSC; vxtime.quot = (1000000L << 32) / vxtime_hz; vxtime.tsc_quot = (1000L << 32) / cpu_khz; - rdtscll_sync(&vxtime.last_tsc); + vxtime.last_tsc = get_cycles_sync(); setup_irq(0, &irq0); set_cyc2ns_scale(cpu_khz); @@ -948,7 +705,7 @@ void __init time_init(void) * Make an educated guess if the TSC is trustworthy and synchronized * over all CPUs. */ -static __init int unsynchronized_tsc(void) +__init int unsynchronized_tsc(void) { #ifdef CONFIG_SMP if (oem_force_hpet_timer()) @@ -959,7 +716,7 @@ static __init int unsynchronized_tsc(voi return 0; #endif /* Assume multi socket systems are not synchronized */ - return num_online_cpus() > 1; + return num_present_cpus() > 1; } /* @@ -967,31 +724,8 @@ static __init int unsynchronized_tsc(voi */ void __init time_init_gtod(void) { - char *timetype; - if (unsynchronized_tsc()) - notsc = 1; - if (vxtime.hpet_address && notsc) { - timetype = hpet_use_timer ? "HPET" : "PIT/HPET"; - vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; - vxtime.mode = VXTIME_HPET; - do_gettimeoffset = do_gettimeoffset_hpet; -#ifdef CONFIG_X86_PM_TIMER - /* Using PM for gettimeofday is quite slow, but we have no other - choice because the TSC is too unreliable on some systems. */ - } else if (pmtmr_ioport && !vxtime.hpet_address && notsc) { - timetype = "PM"; - do_gettimeoffset = do_gettimeoffset_pm; - vxtime.mode = VXTIME_PMTMR; - sysctl_vsyscall = 0; - printk(KERN_INFO "Disabling vsyscall due to use of PM timer\n"); -#endif - } else { - timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC"; - vxtime.mode = VXTIME_TSC; - } - - printk(KERN_INFO "time.c: Using %s based timekeeping.\n", timetype); + mark_tsc_unstable(); } __setup("report_lost_ticks", time_setup); @@ -999,6 +733,10 @@ __setup("report_lost_ticks", time_setup) static long clock_cmos_diff; static unsigned long sleep_start; +/* + * sysfs support for the timer. + */ + static int timer_suspend(struct sys_device *dev, pm_message_t state) { /* @@ -1014,7 +752,6 @@ static int timer_suspend(struct sys_devi static int timer_resume(struct sys_device *dev) { - unsigned long flags; unsigned long sec; unsigned long ctime = get_cmos_time(); unsigned long sleep_length = (ctime - sleep_start) * HZ; @@ -1025,10 +762,6 @@ static int timer_resume(struct sys_devic i8254_timer_resume(); sec = ctime + clock_cmos_diff; - write_seqlock_irqsave(&xtime_lock,flags); - xtime.tv_sec = sec; - xtime.tv_nsec = 0; - write_sequnlock_irqrestore(&xtime_lock,flags); jiffies += sleep_length; wall_jiffies += sleep_length; touch_softlockup_watchdog(); @@ -1041,7 +774,6 @@ static struct sysdev_class timer_sysclas set_kset_name("timer"), }; - /* XXX this driverfs stuff should probably go elsewhere later -john */ static struct sys_device device_timer = { .id = 0, @@ -1075,8 +807,6 @@ device_initcall(time_init_device); */ #include -extern irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs); - #define DEFAULT_RTC_INT_FREQ 64 #define RTC_NUM_INTS 1 @@ -1124,12 +854,12 @@ int hpet_rtc_timer_init(void) else hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; - local_irq_save(flags); + raw_local_irq_save(flags); cnt = hpet_readl(HPET_COUNTER); cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq); hpet_writel(cnt, HPET_T1_CMP); hpet_t1_cmp = cnt; - local_irq_restore(flags); + raw_local_irq_restore(flags); cfg = hpet_readl(HPET_T1_CFG); cfg &= ~HPET_TN_PERIODIC; @@ -1285,8 +1015,6 @@ irqreturn_t hpet_rtc_interrupt(int irq, } #endif - - static int __init nohpet_setup(char *s) { nohpet = 1; @@ -1305,3 +1033,141 @@ static int __init notsc_setup(char *s) __setup("notsc", notsc_setup); +/* clock source code: */ + +static unsigned long current_tsc_khz = 0; + +static int tsc_update_callback(void); + +static cycle_t read_tsc(void) +{ + cycle_t ret; + + rdtscll(ret); + + return ret; +} + +static cycle_t __vsyscall_fn vread_tsc(void* unused) +{ + cycle_t ret; + + rdtscll(ret); + + return ret; +} + +static struct clocksource clocksource_tsc = { + .name = "tsc", + .rating = 300, + .read = read_tsc, + .vread = vread_tsc, + .mask = (cycle_t)-1, + .mult = 0, /* to be set */ + .shift = 22, + .update_callback = tsc_update_callback, + .is_continuous = 1, +}; + +static int tsc_update_callback(void) +{ + int change = 0; + + /* check to see if we should switch to the safe clocksource: */ + if (clocksource_tsc.rating != 50 && check_tsc_unstable()) { + clocksource_tsc.rating = 50; + reselect_clocksource(); + change = 1; + } + + /* only update if tsc_khz has changed: */ + if (current_tsc_khz != tsc_khz){ + current_tsc_khz = tsc_khz; + clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, + clocksource_tsc.shift); + change = 1; + } + return change; +} + +static int __init init_tsc_clocksource(void) +{ + if (!notsc && tsc_khz) { + current_tsc_khz = tsc_khz; + clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, + clocksource_tsc.shift); + register_clocksource(&clocksource_tsc); + } + return 0; +} + +module_init(init_tsc_clocksource); + + +#define HPET_MASK 0xFFFFFFFF +#define HPET_SHIFT 22 + +/* FSEC = 10^-15 NSEC = 10^-9 */ +#define FSEC_PER_NSEC 1000000 + +static void *hpet_ptr; + +static cycle_t read_hpet(void) +{ + return (cycle_t)readl(hpet_ptr); +} + +static cycle_t __vsyscall_fn vread_hpet(void* ptr) +{ + return (cycle_t)readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0); +} + +struct clocksource clocksource_hpet = { + .name = "hpet", + .rating = 250, + .read = read_hpet, + .vread = vread_hpet, + .mask = (cycle_t)HPET_MASK, + .mult = 0, /* set below */ + .shift = HPET_SHIFT, + .is_continuous = 1, +}; + +static int __init init_hpet_clocksource(void) +{ + unsigned long hpet_period; + void __iomem *hpet_base; + u64 tmp; + + if (!hpet_address) + return -ENODEV; + + /* calculate the hpet address: */ + hpet_base = + (void __iomem*)ioremap_nocache(hpet_address, HPET_MMAP_SIZE); + hpet_ptr = hpet_base + HPET_COUNTER; + + /* calculate the frequency: */ + hpet_period = readl(hpet_base + HPET_PERIOD); + + /* + * hpet period is in femto seconds per cycle + * so we need to convert this to ns/cyc units + * aproximated by mult/2^shift + * + * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift + * fsec/cyc * 1ns/1000000fsec * 2^shift = mult + * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult + * (fsec/cyc << shift)/1000000 = mult + * (hpet_period << shift)/FSEC_PER_NSEC = mult + */ + tmp = (u64)hpet_period << HPET_SHIFT; + do_div(tmp, FSEC_PER_NSEC); + clocksource_hpet.mult = (u32)tmp; + + register_clocksource(&clocksource_hpet); + + return 0; +} + +module_init(init_hpet_clocksource); Index: linux.prev/arch/x86_64/kernel/traps.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/traps.c +++ linux.prev/arch/x86_64/kernel/traps.c @@ -88,7 +88,7 @@ int register_die_notifier(struct notifie static inline void conditional_sti(struct pt_regs *regs) { if (regs->eflags & X86_EFLAGS_IF) - local_irq_enable(); + raw_local_irq_enable(); } static int kstack_depth_to_print = 10; @@ -154,7 +154,7 @@ static unsigned long *in_exception_stack * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack */ -void show_trace(unsigned long *stack) +void show_trace(struct task_struct *task, unsigned long *stack) { unsigned long addr; const unsigned cpu = safe_smp_processor_id(); @@ -219,6 +219,7 @@ void show_trace(unsigned long *stack) HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0); #undef HANDLE_STACK printk("\n"); + print_traces(task); } void show_stack(struct task_struct *tsk, unsigned long * rsp) @@ -255,7 +256,7 @@ void show_stack(struct task_struct *tsk, printk("%016lx ", *stack++); touch_nmi_watchdog(); } - show_trace((unsigned long *)rsp); + show_trace(tsk, (unsigned long *)rsp); } /* @@ -264,7 +265,7 @@ void show_stack(struct task_struct *tsk, void dump_stack(void) { unsigned long dummy; - show_trace(&dummy); + show_trace(current, &dummy); } EXPORT_SYMBOL(dump_stack); @@ -337,7 +338,7 @@ void out_of_line_bug(void) } #endif -static DEFINE_SPINLOCK(die_lock); +static DEFINE_RAW_SPINLOCK(die_lock); static int die_owner = -1; unsigned long oops_begin(void) @@ -346,7 +347,7 @@ unsigned long oops_begin(void) unsigned long flags; /* racy, but better than risking deadlock. */ - local_irq_save(flags); + raw_local_irq_save(flags); if (!spin_trylock(&die_lock)) { if (cpu == die_owner) /* nested oops. should stop eventually */; Index: linux.prev/arch/x86_64/kernel/vmlinux.lds.S =================================================================== --- linux.prev.orig/arch/x86_64/kernel/vmlinux.lds.S +++ linux.prev/arch/x86_64/kernel/vmlinux.lds.S @@ -99,6 +99,18 @@ SECTIONS .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) } jiffies = VVIRT(.jiffies); + .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { *(.vsyscall_fn) } + .vsyscall_data : AT(VLOAD(.vsyscall_data)) { *(.vsyscall_data) } + + .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) { *(.vsyscall_gtod_data) } + vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data); + + .vsyscall_gtod_lock : AT(VLOAD(.vsyscall_gtod_lock)) { *(.vsyscall_gtod_lock) } + vsyscall_gtod_lock = VVIRT(.vsyscall_gtod_lock); + + .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { *(.vsyscall_fn) } + .vsyscall_data : AT(VLOAD(.vsyscall_data)) { *(.vsyscall_data) } + .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) { *(.vsyscall_1) } .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) { *(.vsyscall_2) } .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) { *(.vsyscall_3) } Index: linux.prev/arch/x86_64/kernel/vsyscall.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/vsyscall.c +++ linux.prev/arch/x86_64/kernel/vsyscall.c @@ -19,6 +19,8 @@ * want per guest time just set the kernel.vsyscall64 sysctl to 0. */ +#include +#include #include #include #include @@ -27,22 +29,34 @@ #include #include + #include #include +#include #include #include #include #include -#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) -#define force_inline __attribute__((always_inline)) inline +#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) notrace int __sysctl_vsyscall __section_sysctl_vsyscall = 1; -seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; +raw_seqlock_t __xtime_lock __section_xtime_lock = RAW_SEQLOCK_UNLOCKED; -#include +struct vsyscall_gtod_data_t { + struct timeval wall_time_tv; + struct timezone sys_tz; + cycle_t offset_base; + struct clocksource clock; +}; + +extern struct vsyscall_gtod_data_t vsyscall_gtod_data; +struct vsyscall_gtod_data_t __vsyscall_gtod_data __section_vsyscall_gtod_data; -static force_inline void timeval_normalize(struct timeval * tv) +extern raw_seqlock_t vsyscall_gtod_lock; +raw_seqlock_t __vsyscall_gtod_lock __section_vsyscall_gtod_lock = RAW_SEQLOCK_UNLOCKED; + +static __always_inline void timeval_normalize(struct timeval * tv) { time_t __sec; @@ -53,43 +67,71 @@ static force_inline void timeval_normali } } -static force_inline void do_vgettimeofday(struct timeval * tv) +/* + * XXX - this is ugly. gettimeofday() has a label in it so we can't + * call it twice. + */ +static __always_inline int syscall_gtod(struct timeval *tv, struct timezone *tz) { - long sequence, t; - unsigned long sec, usec; + int ret; + + asm volatile("syscall" + : "=a" (ret) + : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) + : __syscall_clobber); + + return ret; +} + +static __always_inline void do_vgettimeofday(struct timeval * tv) +{ + cycle_t now, base, mask, cycle_delta; + unsigned long mult, shift, seq; + nsec_t nsec_delta; do { - sequence = read_seqbegin(&__xtime_lock); - - sec = __xtime.tv_sec; - usec = (__xtime.tv_nsec / 1000) + - (__jiffies - __wall_jiffies) * (1000000 / HZ); - - if (__vxtime.mode != VXTIME_HPET) { - sync_core(); - rdtscll(t); - if (t < __vxtime.last_tsc) - t = __vxtime.last_tsc; - usec += ((t - __vxtime.last_tsc) * - __vxtime.tsc_quot) >> 32; - /* See comment in x86_64 do_gettimeofday. */ - } else { - usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) - - __vxtime.last) * __vxtime.quot) >> 32; + seq = read_seqbegin(&__vsyscall_gtod_lock); + + if (!__vsyscall_gtod_data.clock.vread) { + syscall_gtod(tv, NULL); + return; } - } while (read_seqretry(&__xtime_lock, sequence)); - tv->tv_sec = sec + usec / 1000000; - tv->tv_usec = usec % 1000000; + /* read the timeosurce and store state values */ + now = __vsyscall_gtod_data.clock.vread( + __vsyscall_gtod_data.clock.vdata); + + base = __vsyscall_gtod_data.offset_base; + mask = __vsyscall_gtod_data.clock.mask; + + mult = __vsyscall_gtod_data.clock.mult; + shift = __vsyscall_gtod_data.clock.shift; + + *tv = __vsyscall_gtod_data.wall_time_tv; + } while (read_seqretry(&__vsyscall_gtod_lock, seq)); + + /* calculate interval: */ + cycle_delta = (now - base) & mask; + /* convert to nsecs: */ + nsec_delta = (cycle_delta * mult) >> shift; + + /* convert to usecs and add to timespec: */ + do_div(nsec_delta, NSEC_PER_USEC); + tv->tv_usec += (unsigned long) nsec_delta; + + while (tv->tv_usec > USEC_PER_SEC) { + tv->tv_sec += 1; + tv->tv_usec -= USEC_PER_SEC; + } } /* RED-PEN may want to readd seq locking, but then the variable should be write-once. */ -static force_inline void do_get_tz(struct timezone * tz) +static __always_inline void do_get_tz(struct timezone * tz) { - *tz = __sys_tz; + *tz = __vsyscall_gtod_data.sys_tz; } -static force_inline int gettimeofday(struct timeval *tv, struct timezone *tz) +static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) { int ret; asm volatile("vsysc2: syscall" @@ -98,7 +140,7 @@ static force_inline int gettimeofday(str return ret; } -static force_inline long time_syscall(long *t) +static __always_inline long time_syscall(long *t) { long secs; asm volatile("vsysc1: syscall" @@ -122,11 +164,16 @@ int __vsyscall(0) vgettimeofday(struct t * unlikely */ time_t __vsyscall(1) vtime(time_t *t) { + struct timeval tv; + if (unlikely(!__sysctl_vsyscall)) return time_syscall(t); - else if (t) - *t = __xtime.tv_sec; - return __xtime.tv_sec; + + vgettimeofday(&tv, 0); + if (t) + *t = tv.tv_sec; + + return tv.tv_sec; } long __vsyscall(2) venosys_0(void) @@ -139,6 +186,38 @@ long __vsyscall(3) venosys_1(void) return -ENOSYS; } +struct clocksource *curr_clock; + +void arch_update_vsyscall_gtod(struct timespec wall_time, cycle_t offset_base, + struct clocksource *clock, int ntp_adj) +{ + unsigned long flags; + + write_seqlock_irqsave(&vsyscall_gtod_lock, flags); + + /* XXX - hackitty hack hack. this is terrible! */ + if (curr_clock != clock) + curr_clock = clock; + + /* save off wall time as timeval: */ + vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time.tv_sec; + vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time.tv_nsec/1000; + + /* save offset_base: */ + vsyscall_gtod_data.offset_base = offset_base; + + /* copy current clocksource: */ + vsyscall_gtod_data.clock = *clock; + + /* apply ntp adjustment to clocksource mult: */ + vsyscall_gtod_data.clock.mult += ntp_adj; + + /* save off current timezone: */ + vsyscall_gtod_data.sys_tz = sys_tz; + + write_sequnlock_irqrestore(&vsyscall_gtod_lock, flags); +} + #ifdef CONFIG_SYSCTL #define SYSCALL 0x050f @@ -217,6 +296,7 @@ static int __init vsyscall_init(void) BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); map_vsyscall(); + sysctl_vsyscall = 1; #ifdef CONFIG_SYSCTL register_sysctl_table(kernel_root_table2, 0); #endif Index: linux.prev/arch/x86_64/kernel/x8664_ksyms.c =================================================================== --- linux.prev.orig/arch/x86_64/kernel/x8664_ksyms.c +++ linux.prev/arch/x86_64/kernel/x8664_ksyms.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -33,8 +34,6 @@ #include #include -extern spinlock_t rtc_lock; - #ifdef CONFIG_SMP extern void __write_lock_failed(rwlock_t *rw); extern void __read_lock_failed(rwlock_t *rw); @@ -45,8 +44,6 @@ extern struct drive_info_struct drive_in EXPORT_SYMBOL(drive_info); #endif -extern unsigned long get_cmos_time(void); - /* platform dependent support */ EXPORT_SYMBOL(boot_cpu_data); //EXPORT_SYMBOL(dump_fpu); @@ -60,12 +57,13 @@ EXPORT_SYMBOL(probe_irq_mask); EXPORT_SYMBOL(kernel_thread); EXPORT_SYMBOL(pm_idle); EXPORT_SYMBOL(pm_power_off); -EXPORT_SYMBOL(get_cmos_time); -EXPORT_SYMBOL(__down_failed); -EXPORT_SYMBOL(__down_failed_interruptible); -EXPORT_SYMBOL(__down_failed_trylock); -EXPORT_SYMBOL(__up_wakeup); +#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK +EXPORT_SYMBOL(__compat_down_failed); +EXPORT_SYMBOL(__compat_down_failed_interruptible); +EXPORT_SYMBOL(__compat_down_failed_trylock); +EXPORT_SYMBOL(__compat_up_wakeup); +#endif /* Networking helper routines. */ EXPORT_SYMBOL(csum_partial_copy_nocheck); EXPORT_SYMBOL(ip_compute_csum); Index: linux.prev/arch/x86_64/lib/thunk.S =================================================================== --- linux.prev.orig/arch/x86_64/lib/thunk.S +++ linux.prev/arch/x86_64/lib/thunk.S @@ -43,11 +43,13 @@ thunk rwsem_downgrade_thunk,rwsem_downgrade_wake #endif thunk do_softirq_thunk,do_softirq - - thunk __down_failed,__down - thunk_retrax __down_failed_interruptible,__down_interruptible - thunk_retrax __down_failed_trylock,__down_trylock - thunk __up_wakeup,__up + +#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK + thunk __compat_down_failed,__compat_down + thunk_retrax __compat_down_failed_interruptible,__compat_down_interruptible + thunk_retrax __compat_down_failed_trylock,__compat_down_trylock + thunk __compat_up_wakeup,__compat_up +#endif /* SAVE_ARGS below is used only for the .cfi directives it contains. */ CFI_STARTPROC Index: linux.prev/arch/x86_64/mm/fault.c =================================================================== --- linux.prev.orig/arch/x86_64/mm/fault.c +++ linux.prev/arch/x86_64/mm/fault.c @@ -39,6 +39,7 @@ void bust_spinlocks(int yes) { int loglevel_save = console_loglevel; if (yes) { + stop_trace(); oops_in_progress = 1; } else { #ifdef CONFIG_VT @@ -315,7 +316,7 @@ asmlinkage void __kprobes do_page_fault( return; if (likely(regs->eflags & X86_EFLAGS_IF)) - local_irq_enable(); + raw_local_irq_enable(); if (unlikely(page_fault_trace)) printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n", Index: linux.prev/arch/x86_64/mm/init.c =================================================================== --- linux.prev.orig/arch/x86_64/mm/init.c +++ linux.prev/arch/x86_64/mm/init.c @@ -45,7 +45,7 @@ static unsigned long dma_reserve __initdata; -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +DEFINE_PER_CPU_LOCKED(struct mmu_gather, mmu_gathers); /* * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the Index: linux.prev/block/cfq-iosched.c =================================================================== --- linux.prev.orig/block/cfq-iosched.c +++ linux.prev/block/cfq-iosched.c @@ -1241,7 +1241,7 @@ static void cfq_exit_single_io_context(s struct cfq_data *cfqd = cic->cfqq->cfqd; request_queue_t *q = cfqd->queue; - WARN_ON(!irqs_disabled()); + WARN_ON_NONRT(!irqs_disabled()); spin_lock(q->queue_lock); @@ -1265,7 +1265,9 @@ static void cfq_exit_io_context(struct c struct list_head *entry; unsigned long flags; - local_irq_save(flags); + // FIXME: i dont think this code is safe, upstream! + + local_irq_save_nort(flags); /* * put the reference this task is holding to the various queues @@ -1276,7 +1278,7 @@ static void cfq_exit_io_context(struct c } cfq_exit_single_io_context(cic); - local_irq_restore(flags); + local_irq_restore_nort(flags); } static struct cfq_io_context * Index: linux.prev/block/ll_rw_blk.c =================================================================== --- linux.prev.orig/block/ll_rw_blk.c +++ linux.prev/block/ll_rw_blk.c @@ -1412,7 +1412,7 @@ static int ll_merge_requests_fn(request_ */ void blk_plug_device(request_queue_t *q) { - WARN_ON(!irqs_disabled()); + WARN_ON_NONRT(!irqs_disabled()); /* * don't plug a stopped queue, it must be paired with blk_start_queue() @@ -1433,7 +1433,7 @@ EXPORT_SYMBOL(blk_plug_device); */ int blk_remove_plug(request_queue_t *q) { - WARN_ON(!irqs_disabled()); + WARN_ON_NONRT(!irqs_disabled()); if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) return 0; @@ -3270,13 +3270,15 @@ void exit_io_context(void) unsigned long flags; struct io_context *ioc; - local_irq_save(flags); + // FIXME: unsafe upstream too? + + local_irq_save_nort(flags); task_lock(current); ioc = current->io_context; current->io_context = NULL; ioc->task = NULL; task_unlock(current); - local_irq_restore(flags); + local_irq_restore_nort(flags); if (ioc->aic && ioc->aic->exit) ioc->aic->exit(ioc->aic); Index: linux.prev/drivers/Makefile =================================================================== --- linux.prev.orig/drivers/Makefile +++ linux.prev/drivers/Makefile @@ -70,3 +70,4 @@ obj-$(CONFIG_SGI_IOC4) += sn/ obj-y += firmware/ obj-$(CONFIG_CRYPTO) += crypto/ obj-$(CONFIG_SUPERH) += sh/ +obj-$(CONFIG_GENERIC_TIME) += clocksource/ Index: linux.prev/drivers/acpi/Kconfig =================================================================== --- linux.prev.orig/drivers/acpi/Kconfig +++ linux.prev/drivers/acpi/Kconfig @@ -287,24 +287,6 @@ config ACPI_SYSTEM This driver will enable your system to shut down using ACPI, and dump your ACPI DSDT table using /proc/acpi/dsdt. -config X86_PM_TIMER - bool "Power Management Timer Support" - depends on X86 - depends on !X86_64 - default y - help - The Power Management Timer is available on all ACPI-capable, - in most cases even if ACPI is unusable or blacklisted. - - This timing source is not affected by powermanagement features - like aggressive processor idling, throttling, frequency and/or - voltage scaling, unlike the commonly used Time Stamp Counter - (TSC) timing source. - - So, if you see messages like 'Losing too many ticks!' in the - kernel logs, and/or you are using this on a notebook which - does not yet have an HPET, you should say "Y" here. - config ACPI_CONTAINER tristate "ACPI0004,PNP0A05 and PNP0A06 Container Driver (EXPERIMENTAL)" depends on EXPERIMENTAL Index: linux.prev/drivers/acpi/events/evgpe.c =================================================================== --- linux.prev.orig/drivers/acpi/events/evgpe.c +++ linux.prev/drivers/acpi/events/evgpe.c @@ -377,7 +377,7 @@ u32 acpi_ev_gpe_detect(struct acpi_gpe_x struct acpi_gpe_register_info *gpe_register_info; u32 status_reg; u32 enable_reg; - u32 flags; + unsigned long flags; acpi_status status; struct acpi_gpe_block_info *gpe_block; acpi_native_uint i; Index: linux.prev/drivers/acpi/events/evgpeblk.c =================================================================== --- linux.prev.orig/drivers/acpi/events/evgpeblk.c +++ linux.prev/drivers/acpi/events/evgpeblk.c @@ -136,7 +136,7 @@ acpi_status acpi_ev_walk_gpe_list(ACPI_G struct acpi_gpe_block_info *gpe_block; struct acpi_gpe_xrupt_info *gpe_xrupt_info; acpi_status status = AE_OK; - u32 flags; + unsigned long flags; ACPI_FUNCTION_TRACE("ev_walk_gpe_list"); @@ -479,7 +479,7 @@ static struct acpi_gpe_xrupt_info *acpi_ struct acpi_gpe_xrupt_info *next_gpe_xrupt; struct acpi_gpe_xrupt_info *gpe_xrupt; acpi_status status; - u32 flags; + unsigned long flags; ACPI_FUNCTION_TRACE("ev_get_gpe_xrupt_block"); @@ -553,7 +553,7 @@ static acpi_status acpi_ev_delete_gpe_xrupt(struct acpi_gpe_xrupt_info *gpe_xrupt) { acpi_status status; - u32 flags; + unsigned long flags; ACPI_FUNCTION_TRACE("ev_delete_gpe_xrupt"); @@ -610,7 +610,7 @@ acpi_ev_install_gpe_block(struct acpi_gp struct acpi_gpe_block_info *next_gpe_block; struct acpi_gpe_xrupt_info *gpe_xrupt_block; acpi_status status; - u32 flags; + unsigned long flags; ACPI_FUNCTION_TRACE("ev_install_gpe_block"); @@ -663,7 +663,7 @@ acpi_ev_install_gpe_block(struct acpi_gp acpi_status acpi_ev_delete_gpe_block(struct acpi_gpe_block_info *gpe_block) { acpi_status status; - u32 flags; + unsigned long flags; ACPI_FUNCTION_TRACE("ev_install_gpe_block"); Index: linux.prev/drivers/acpi/events/evxface.c =================================================================== --- linux.prev.orig/drivers/acpi/events/evxface.c +++ linux.prev/drivers/acpi/events/evxface.c @@ -562,7 +562,7 @@ acpi_install_gpe_handler(acpi_handle gpe struct acpi_gpe_event_info *gpe_event_info; struct acpi_handler_info *handler; acpi_status status; - u32 flags; + unsigned long flags; ACPI_FUNCTION_TRACE("acpi_install_gpe_handler"); @@ -653,7 +653,7 @@ acpi_remove_gpe_handler(acpi_handle gpe_ struct acpi_gpe_event_info *gpe_event_info; struct acpi_handler_info *handler; acpi_status status; - u32 flags; + unsigned long flags; ACPI_FUNCTION_TRACE("acpi_remove_gpe_handler"); Index: linux.prev/drivers/acpi/osl.c =================================================================== --- linux.prev.orig/drivers/acpi/osl.c +++ linux.prev/drivers/acpi/osl.c @@ -346,9 +346,7 @@ u64 acpi_os_get_timer(void) /* TBD: use HPET if available */ #endif -#ifdef CONFIG_X86_PM_TIMER /* TBD: default to PM timer if HPET was not available */ -#endif if (!t) printk(KERN_ERR PREFIX "acpi_os_get_timer() TBD\n"); @@ -728,14 +726,14 @@ void acpi_os_delete_lock(acpi_handle han acpi_status acpi_os_create_semaphore(u32 max_units, u32 initial_units, acpi_handle * handle) { - struct semaphore *sem = NULL; + struct compat_semaphore *sem = NULL; ACPI_FUNCTION_TRACE("os_create_semaphore"); - sem = acpi_os_allocate(sizeof(struct semaphore)); + sem = acpi_os_allocate(sizeof(struct compat_semaphore)); if (!sem) return_ACPI_STATUS(AE_NO_MEMORY); - memset(sem, 0, sizeof(struct semaphore)); + memset(sem, 0, sizeof(struct compat_semaphore)); sema_init(sem, initial_units); @@ -758,7 +756,7 @@ EXPORT_SYMBOL(acpi_os_create_semaphore); acpi_status acpi_os_delete_semaphore(acpi_handle handle) { - struct semaphore *sem = (struct semaphore *)handle; + struct compat_semaphore *sem = (struct compat_semaphore *)handle; ACPI_FUNCTION_TRACE("os_delete_semaphore"); @@ -787,7 +785,7 @@ EXPORT_SYMBOL(acpi_os_delete_semaphore); acpi_status acpi_os_wait_semaphore(acpi_handle handle, u32 units, u16 timeout) { acpi_status status = AE_OK; - struct semaphore *sem = (struct semaphore *)handle; + struct compat_semaphore *sem = (struct compat_semaphore *)handle; int ret = 0; ACPI_FUNCTION_TRACE("os_wait_semaphore"); @@ -868,7 +866,7 @@ EXPORT_SYMBOL(acpi_os_wait_semaphore); */ acpi_status acpi_os_signal_semaphore(acpi_handle handle, u32 units) { - struct semaphore *sem = (struct semaphore *)handle; + struct compat_semaphore *sem = (struct compat_semaphore *)handle; ACPI_FUNCTION_TRACE("os_signal_semaphore"); Index: linux.prev/drivers/acpi/processor_idle.c =================================================================== --- linux.prev.orig/drivers/acpi/processor_idle.c +++ linux.prev/drivers/acpi/processor_idle.c @@ -37,6 +37,7 @@ #include #include #include +#include #include /* need_resched() */ #include @@ -172,7 +173,7 @@ static void acpi_safe_halt(void) clear_thread_flag(TIF_POLLING_NRFLAG); smp_mb__after_clear_bit(); if (!need_resched()) - safe_halt(); + raw_safe_halt(); set_thread_flag(TIF_POLLING_NRFLAG); } @@ -194,14 +195,14 @@ static void acpi_processor_idle(void) * Interrupts must be disabled during bus mastering calculations and * for C2/C3 transitions. */ - local_irq_disable(); + raw_local_irq_disable(); /* * Check whether we truly need to go idle, or should * reschedule: */ if (unlikely(need_resched())) { - local_irq_enable(); + raw_local_irq_enable(); return; } @@ -268,7 +269,7 @@ static void acpi_processor_idle(void) * issues (e.g. floppy DMA transfer overrun/underrun). */ if (pr->power.bm_activity & cx->demotion.threshold.bm) { - local_irq_enable(); + raw_local_irq_enable(); next_state = cx->demotion.state; goto end; } @@ -297,7 +298,7 @@ static void acpi_processor_idle(void) smp_mb__after_clear_bit(); if (need_resched()) { set_thread_flag(TIF_POLLING_NRFLAG); - local_irq_enable(); + raw_local_irq_enable(); return; } } @@ -333,7 +334,7 @@ static void acpi_processor_idle(void) /* Get end time (ticks) */ t2 = inl(acpi_fadt.xpm_tmr_blk.address); /* Re-enable interrupts */ - local_irq_enable(); + raw_local_irq_enable(); set_thread_flag(TIF_POLLING_NRFLAG); /* Compute time (ticks) that we were actually asleep */ sleep_ticks = @@ -372,8 +373,12 @@ static void acpi_processor_idle(void) ACPI_MTX_DO_NOT_LOCK); } +#ifdef CONFIG_GENERIC_TIME + /* TSC halts in C3, so notify users */ + mark_tsc_unstable(); +#endif /* Re-enable interrupts */ - local_irq_enable(); + raw_local_irq_enable(); set_thread_flag(TIF_POLLING_NRFLAG); /* Compute time (ticks) that we were actually asleep */ sleep_ticks = @@ -381,7 +386,7 @@ static void acpi_processor_idle(void) break; default: - local_irq_enable(); + raw_local_irq_enable(); return; } @@ -1027,6 +1032,7 @@ int acpi_processor_power_init(struct acp static int first_run = 0; struct proc_dir_entry *entry = NULL; unsigned int i; + unsigned long flags; ACPI_FUNCTION_TRACE("acpi_processor_power_init"); @@ -1060,6 +1066,7 @@ int acpi_processor_power_init(struct acp * Note that we use previously set idle handler will be used on * platforms that only support C1. */ + spin_lock_irqsave(&pm_idle_switch_lock, flags); if ((pr->flags.power) && (!boot_option_idle_override)) { printk(KERN_INFO PREFIX "CPU%d (power states:", pr->id); for (i = 1; i <= pr->power.count; i++) @@ -1071,8 +1078,13 @@ int acpi_processor_power_init(struct acp if (pr->id == 0) { pm_idle_save = pm_idle; pm_idle = acpi_processor_idle; + /* + * Don't allow switching of the pm_idle to poll. + */ + pm_idle_locked = 1; } } + spin_unlock_irqrestore(&pm_idle_switch_lock, flags); /* 'power' [R] */ entry = create_proc_entry(ACPI_PROCESSOR_FILE_POWER, @@ -1115,5 +1127,7 @@ int acpi_processor_power_exit(struct acp cpu_idle_wait(); } + pm_idle_locked = 0; + return_VALUE(0); } Index: linux.prev/drivers/acpi/processor_throttling.c =================================================================== --- linux.prev.orig/drivers/acpi/processor_throttling.c +++ linux.prev/drivers/acpi/processor_throttling.c @@ -69,7 +69,7 @@ static int acpi_processor_get_throttling duty_mask <<= pr->throttling.duty_offset; - local_irq_disable(); + raw_local_irq_disable(); value = inl(pr->throttling.address); @@ -87,7 +87,7 @@ static int acpi_processor_get_throttling pr->throttling.state = state; - local_irq_enable(); + raw_local_irq_enable(); ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Throttling state is T%d (%d%% throttling applied)\n", @@ -131,7 +131,7 @@ int acpi_processor_set_throttling(struct duty_mask = ~duty_mask; } - local_irq_disable(); + raw_local_irq_disable(); /* * Disable throttling by writing a 0 to bit 4. Note that we must @@ -158,7 +158,7 @@ int acpi_processor_set_throttling(struct pr->throttling.state = state; - local_irq_enable(); + raw_local_irq_enable(); ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Throttling state set to T%d (%d%%)\n", state, Index: linux.prev/drivers/acpi/sleep/main.c =================================================================== --- linux.prev.orig/drivers/acpi/sleep/main.c +++ linux.prev/drivers/acpi/sleep/main.c @@ -82,7 +82,7 @@ static int acpi_pm_enter(suspend_state_t return error; } - local_irq_save(flags); + raw_local_irq_save(flags); acpi_enable_wakeup_device(acpi_state); switch (pm_state) { case PM_SUSPEND_STANDBY: @@ -105,7 +105,7 @@ static int acpi_pm_enter(suspend_state_t default: return -EINVAL; } - local_irq_restore(flags); + raw_local_irq_restore(flags); printk(KERN_DEBUG "Back to C!\n"); /* restore processor state Index: linux.prev/drivers/acpi/sleep/poweroff.c =================================================================== --- linux.prev.orig/drivers/acpi/sleep/poweroff.c +++ linux.prev/drivers/acpi/sleep/poweroff.c @@ -46,7 +46,7 @@ void acpi_power_off(void) { /* acpi_sleep_prepare(ACPI_STATE_S5) should have already been called */ printk("%s called\n", __FUNCTION__); - local_irq_disable(); + raw_local_irq_disable(); /* Some SMP machines only can poweroff in boot CPU */ acpi_enter_sleep_state(ACPI_STATE_S5); } Index: linux.prev/drivers/atm/atmtcp.c =================================================================== --- linux.prev.orig/drivers/atm/atmtcp.c +++ linux.prev/drivers/atm/atmtcp.c @@ -352,7 +352,7 @@ static struct atm_dev atmtcp_control_dev .ops = &atmtcp_c_dev_ops, .type = "atmtcp", .number = 999, - .lock = SPIN_LOCK_UNLOCKED + .lock = SPIN_LOCK_UNLOCKED(atmtcp_control_dev.lock) }; Index: linux.prev/drivers/base/class.c =================================================================== --- linux.prev.orig/drivers/base/class.c +++ linux.prev/drivers/base/class.c @@ -555,8 +555,10 @@ int class_device_add(struct class_device class_name = make_class_name(class_dev); sysfs_create_link(&class_dev->kobj, &class_dev->dev->kobj, "device"); + /* sysfs_create_link(&class_dev->dev->kobj, &class_dev->kobj, class_name); + */ } kobject_hotplug(&class_dev->kobj, KOBJ_ADD); @@ -667,7 +669,9 @@ void class_device_del(struct class_devic if (class_dev->dev) { class_name = make_class_name(class_dev); sysfs_remove_link(&class_dev->kobj, "device"); + /* sysfs_remove_link(&class_dev->dev->kobj, class_name); + */ } class_device_remove_file(class_dev, &class_dev->uevent_attr); if (class_dev->devt_attr) Index: linux.prev/drivers/block/loop.c =================================================================== --- linux.prev.orig/drivers/block/loop.c +++ linux.prev/drivers/block/loop.c @@ -514,12 +514,12 @@ static int loop_make_request(request_que lo->lo_pending++; loop_add_bio(lo, old_bio); spin_unlock_irq(&lo->lo_lock); - up(&lo->lo_bh_mutex); + complete(&lo->lo_bh_done); return 0; out: if (lo->lo_pending == 0) - up(&lo->lo_bh_mutex); + complete(&lo->lo_bh_done); spin_unlock_irq(&lo->lo_lock); bio_io_error(old_bio, old_bio->bi_size); return 0; @@ -580,23 +580,20 @@ static int loop_thread(void *data) lo->lo_pending = 1; /* - * up sem, we are running + * complete it, we are running */ - up(&lo->lo_sem); + complete(&lo->lo_done); for (;;) { int pending; - /* - * interruptible just to not contribute to load avg - */ - if (down_interruptible(&lo->lo_bh_mutex)) + if (wait_for_completion_interruptible(&lo->lo_bh_done)) continue; spin_lock_irq(&lo->lo_lock); /* - * could be upped because of tear-down, not pending work + * could be completed because of tear-down, not pending work */ if (unlikely(!lo->lo_pending)) { spin_unlock_irq(&lo->lo_lock); @@ -619,7 +616,7 @@ static int loop_thread(void *data) break; } - up(&lo->lo_sem); + complete(&lo->lo_done); return 0; } @@ -830,7 +827,7 @@ static int loop_set_fd(struct loop_devic set_blocksize(bdev, lo_blocksize); kernel_thread(loop_thread, lo, CLONE_KERNEL); - down(&lo->lo_sem); + wait_for_completion(&lo->lo_done); return 0; out_putf: @@ -896,10 +893,10 @@ static int loop_clr_fd(struct loop_devic lo->lo_state = Lo_rundown; lo->lo_pending--; if (!lo->lo_pending) - up(&lo->lo_bh_mutex); + complete(&lo->lo_bh_done); spin_unlock_irq(&lo->lo_lock); - down(&lo->lo_sem); + wait_for_completion(&lo->lo_done); lo->lo_backing_file = NULL; @@ -1276,8 +1273,8 @@ static int __init loop_init(void) if (!lo->lo_queue) goto out_mem4; init_MUTEX(&lo->lo_ctl_mutex); - init_MUTEX_LOCKED(&lo->lo_sem); - init_MUTEX_LOCKED(&lo->lo_bh_mutex); + init_completion(&lo->lo_done); + init_completion(&lo->lo_bh_done); lo->lo_number = i; spin_lock_init(&lo->lo_lock); disk->major = LOOP_MAJOR; Index: linux.prev/drivers/block/paride/pseudo.h =================================================================== --- linux.prev.orig/drivers/block/paride/pseudo.h +++ linux.prev/drivers/block/paride/pseudo.h @@ -43,7 +43,7 @@ static unsigned long ps_timeout; static int ps_tq_active = 0; static int ps_nice = 0; -static DEFINE_SPINLOCK(ps_spinlock __attribute__((unused))); +static __attribute__((unused)) DEFINE_SPINLOCK(ps_spinlock); static DECLARE_WORK(ps_tq, ps_tq_int, NULL); Index: linux.prev/drivers/block/sx8.c =================================================================== --- linux.prev.orig/drivers/block/sx8.c +++ linux.prev/drivers/block/sx8.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -303,7 +304,7 @@ struct carm_host { struct work_struct fsm_task; - struct semaphore probe_sem; + struct completion probe_comp; }; struct carm_response { @@ -1365,7 +1366,7 @@ static void carm_fsm_task (void *_data) } case HST_PROBE_FINISHED: - up(&host->probe_sem); + complete(&host->probe_comp); break; case HST_ERROR: @@ -1641,7 +1642,7 @@ static int carm_init_one (struct pci_dev host->flags = pci_dac ? FL_DAC : 0; spin_lock_init(&host->lock); INIT_WORK(&host->fsm_task, carm_fsm_task, host); - init_MUTEX_LOCKED(&host->probe_sem); + init_completion(&host->probe_comp); for (i = 0; i < ARRAY_SIZE(host->req); i++) host->req[i].tag = i; @@ -1710,8 +1711,8 @@ static int carm_init_one (struct pci_dev if (rc) goto err_out_free_irq; - DPRINTK("waiting for probe_sem\n"); - down(&host->probe_sem); + DPRINTK("waiting for probe_comp\n"); + wait_for_completion(&host->probe_comp); printk(KERN_INFO "%s: pci %s, ports %d, io %lx, irq %u, major %d\n", host->name, pci_name(pdev), (int) CARM_MAX_PORTS, Index: linux.prev/drivers/char/Kconfig =================================================================== --- linux.prev.orig/drivers/char/Kconfig +++ linux.prev/drivers/char/Kconfig @@ -711,6 +711,45 @@ config RTC To compile this driver as a module, choose M here: the module will be called rtc. +config RTC_HISTOGRAM + bool "Real Time Clock Histogram Support" + default n + depends on RTC + ---help--- + If you say Y here then the kernel will track the delivery and + wakeup latency of /dev/rtc using tasks and will report a + histogram to the kernel log when the application closes /dev/rtc. + +config BLOCKER + tristate "Priority Inheritance Debugging (Blocker) Device Support" + default y + ---help--- + If you say Y here then a device will be created that the userspace + pi_test suite uses to test and measure kernel locking primitives. + +config LPPTEST + tristate "Parallel Port Based Latency Measurement Device" + depends on !PARPORT && X86 + default y + ---help--- + If you say Y here then a device will be created that the userspace + testlpp utility uses to measure IRQ latencies of a target system + from an independent measurement system. + + NOTE: this code assumes x86 PCs and that the parallel port is + bidirectional and is on IRQ 7. + + to use the device, both the target and the source system needs to + run a kernel with CONFIG_LPPTEST enabled. To measure latencies, + use the scripts/testlpp utility in your kernel source directory, + and run it (as root) on the source system - it will start printing + out the latencies it took to get a response from the target system: + + Latency of response: 12.2 usecs (121265 cycles) + + then generate various workloads on the target system to see how + (worst-case-) latencies are impacted. + config SGI_DS1286 tristate "SGI DS1286 RTC support" depends on SGI_IP22 Index: linux.prev/drivers/char/Makefile =================================================================== --- linux.prev.orig/drivers/char/Makefile +++ linux.prev/drivers/char/Makefile @@ -57,6 +57,8 @@ obj-$(CONFIG_R3964) += n_r3964.o obj-$(CONFIG_APPLICOM) += applicom.o obj-$(CONFIG_SONYPI) += sonypi.o obj-$(CONFIG_RTC) += rtc.o +obj-$(CONFIG_BLOCKER) += blocker.o +obj-$(CONFIG_LPPTEST) += lpptest.o obj-$(CONFIG_HPET) += hpet.o obj-$(CONFIG_GEN_RTC) += genrtc.o obj-$(CONFIG_EFI_RTC) += efirtc.o Index: linux.prev/drivers/char/blocker.c =================================================================== --- /dev/null +++ linux.prev/drivers/char/blocker.c @@ -0,0 +1,108 @@ +/* + * priority inheritance testing device + */ + +#include +#include +#include + +#define BLOCKER_MINOR 221 + +#define BLOCK_IOCTL 4245 +#define BLOCK_SET_DEPTH 4246 + +#define MAX_LOCK_DEPTH 10 + +void loop(int loops) +{ + int i; + + for (i = 0; i < loops; i++) + get_cycles(); +} + +static spinlock_t blocker_lock[MAX_LOCK_DEPTH]; + +static unsigned int lock_depth = 1; + +void do_the_lock_and_loop(unsigned int args) +{ + int i, max; + + if (rt_task(current)) + max = lock_depth; + else if (lock_depth > 1) + max = (current->pid % lock_depth) + 1; + else + max = 1; + + /* Always lock from the top down */ + for (i = max-1; i >= 0; i--) + spin_lock(&blocker_lock[i]); + loop(args); + for (i = 0; i < max; i++) + spin_unlock(&blocker_lock[i]); +} + +static int blocker_open(struct inode *in, struct file *file) +{ + printk(KERN_INFO "blocker_open called\n"); + + return 0; +} + +static long blocker_ioctl(struct file *file, + unsigned int cmd, unsigned long args) +{ + switch(cmd) { + case BLOCK_IOCTL: + do_the_lock_and_loop(args); + return 0; + case BLOCK_SET_DEPTH: + if (args >= MAX_LOCK_DEPTH) + return -EINVAL; + lock_depth = args; + return 0; + default: + return -EINVAL; + } +} + +static struct file_operations blocker_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .unlocked_ioctl = blocker_ioctl, + .open = blocker_open, +}; + +static struct miscdevice blocker_dev = +{ + BLOCKER_MINOR, + "blocker", + &blocker_fops +}; + +static int __init blocker_init(void) +{ + int i; + + if (misc_register(&blocker_dev)) + return -ENODEV; + + for (i = 0; i < MAX_LOCK_DEPTH; i++) + spin_lock_init(blocker_lock + i); + + return 0; +} + +void __exit blocker_exit(void) +{ + printk(KERN_INFO "blocker device uninstalled\n"); + misc_deregister(&blocker_dev); +} + +module_init(blocker_init); +module_exit(blocker_exit); + +MODULE_LICENSE("GPL"); + Index: linux.prev/drivers/char/epca.c =================================================================== --- linux.prev.orig/drivers/char/epca.c +++ linux.prev/drivers/char/epca.c @@ -80,7 +80,7 @@ static int invalid_lilo_config; /* The ISA boards do window flipping into the same spaces so its only sane with a single lock. It's still pretty efficient */ -static spinlock_t epca_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(epca_lock); /* ----------------------------------------------------------------------- MAXBOARDS is typically 12, but ISA and EISA cards are restricted to Index: linux.prev/drivers/char/hangcheck-timer.c =================================================================== --- linux.prev.orig/drivers/char/hangcheck-timer.c +++ linux.prev/drivers/char/hangcheck-timer.c @@ -49,6 +49,7 @@ #include #include #include +#include #define VERSION_STR "0.9.0" @@ -130,8 +131,12 @@ __setup("hcheck_dump_tasks", hangcheck_p #endif #ifdef HAVE_MONOTONIC +#ifndef CONFIG_GENERIC_TIME extern unsigned long long monotonic_clock(void); #else +#define monotonic_clock() ktime_to_ns(get_monotonic_clock()) +#endif +#else static inline unsigned long long monotonic_clock(void) { # ifdef __s390__ Index: linux.prev/drivers/char/ipmi/ipmi_si_intf.c =================================================================== --- linux.prev.orig/drivers/char/ipmi/ipmi_si_intf.c +++ linux.prev/drivers/char/ipmi/ipmi_si_intf.c @@ -54,7 +54,7 @@ #include #include #include -#ifdef CONFIG_HIGH_RES_TIMERS +#ifdef CONFIG_HIGH_RES_TIMERS_OLD #include # if defined(schedule_next_int) /* Old high-res timer code, do translations. */ @@ -824,7 +824,7 @@ static int initialized = 0; /* Must be called with interrupts off and with the si_lock held. */ static void si_restart_short_timer(struct smi_info *smi_info) { -#if defined(CONFIG_HIGH_RES_TIMERS) +#if defined(CONFIG_HIGH_RES_TIMERS_OLD) unsigned long flags; unsigned long jiffies_now; unsigned long seq; @@ -892,13 +892,13 @@ static void smi_timeout(unsigned long da /* If the state machine asks for a short delay, then shorten the timer timeout. */ if (smi_result == SI_SM_CALL_WITH_DELAY) { -#if defined(CONFIG_HIGH_RES_TIMERS) +#if defined(CONFIG_HIGH_RES_TIMERS_OLD) unsigned long seq; #endif spin_lock_irqsave(&smi_info->count_lock, flags); smi_info->short_timeouts++; spin_unlock_irqrestore(&smi_info->count_lock, flags); -#if defined(CONFIG_HIGH_RES_TIMERS) +#if defined(CONFIG_HIGH_RES_TIMERS_OLD) do { seq = read_seqbegin_irqsave(&xtime_lock, flags); smi_info->si_timer.expires = jiffies; @@ -914,7 +914,7 @@ static void smi_timeout(unsigned long da smi_info->long_timeouts++; spin_unlock_irqrestore(&smi_info->count_lock, flags); smi_info->si_timer.expires = jiffies + SI_TIMEOUT_JIFFIES; -#if defined(CONFIG_HIGH_RES_TIMERS) +#if defined(CONFIG_HIGH_RES_TIMERS_OLD) smi_info->si_timer.arch_cycle_expires = 0; #endif } Index: linux.prev/drivers/char/ipmi/ipmi_watchdog.c =================================================================== --- linux.prev.orig/drivers/char/ipmi/ipmi_watchdog.c +++ linux.prev/drivers/char/ipmi/ipmi_watchdog.c @@ -459,7 +459,8 @@ static void panic_halt_ipmi_set_timeout( when both messages are free. */ static atomic_t heartbeat_tofree = ATOMIC_INIT(0); static DECLARE_MUTEX(heartbeat_lock); -static DECLARE_MUTEX_LOCKED(heartbeat_wait_lock); +/* PREEMPT_RT: should be a completion instead */ +static COMPAT_DECLARE_MUTEX_LOCKED(heartbeat_wait_lock); static void heartbeat_free_smi(struct ipmi_smi_msg *msg) { if (atomic_dec_and_test(&heartbeat_tofree)) Index: linux.prev/drivers/char/lpptest.c =================================================================== --- /dev/null +++ linux.prev/drivers/char/lpptest.c @@ -0,0 +1,163 @@ +/* + * /dev/lpptest device: test IRQ handling latencies over parallel port + * + * Copyright (C) 2005 Thomas Gleixner, Ingo Molnar + * + * licensed under the GPL + * + * You need to have CONFIG_PARPORT disabled for this device, it is a + * completely self-contained device that assumes sole ownership of the + * parallel port. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define LPPTEST_CHAR_MAJOR 245 +#define LPPTEST_DEVICE_NAME "lpptest" + +#define LPPTEST_IRQ 7 + +#define LPPTEST_TEST _IOR (LPPTEST_CHAR_MAJOR, 1, unsigned long long) +#define LPPTEST_DISABLE _IOR (LPPTEST_CHAR_MAJOR, 2, unsigned long long) +#define LPPTEST_ENABLE _IOR (LPPTEST_CHAR_MAJOR, 3, unsigned long long) + +static char dev_id[] = "lpptest"; + +#define INIT_PORT() outb(0x04, 0x37a) +#define ENABLE_IRQ() outb(0x10, 0x37a) +#define DISABLE_IRQ() outb(0, 0x37a) + +static unsigned char out = 0x5a; + +/** + * Interrupt handler. Flip a bit in the reply. + */ +static int lpptest_irq (int irq, void *dev_id, struct pt_regs *regs) +{ + out ^= 0xff; + outb(out, 0x378); + + return IRQ_HANDLED; +} + +static cycles_t test_response(void) +{ + cycles_t now, end; + unsigned char in; + int timeout = 0; + + raw_local_irq_disable(); + in = inb(0x379); + inb(0x378); + outb(0x08, 0x378); + now = get_cycles(); + while(1) { + if (inb(0x379) != in) + break; + if (timeout++ > 1000000) { + outb(0x00, 0x378); + raw_local_irq_enable(); + + return 0; + } + } + end = get_cycles(); + outb(0x00, 0x378); + raw_local_irq_enable(); + + return end - now; +} + +static int lpptest_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static int lpptest_close(struct inode *inode, struct file *file) +{ + return 0; +} + +int lpptest_ioctl(struct inode *inode, struct file *file, unsigned int ioctl_num, unsigned long ioctl_param) +{ + int retval = 0; + + switch (ioctl_num) { + + case LPPTEST_DISABLE: + DISABLE_IRQ(); + break; + + case LPPTEST_ENABLE: + ENABLE_IRQ(); + break; + + case LPPTEST_TEST: { + + cycles_t diff = test_response(); + if (copy_to_user((void *)ioctl_param, (void*) &diff, sizeof(diff))) + goto errcpy; + break; + } + default: retval = -EINVAL; + } + + return retval; + + errcpy: + return -EFAULT; +} + +static struct file_operations lpptest_dev_fops = { + .ioctl = lpptest_ioctl, + .open = lpptest_open, + .release = lpptest_close, +}; + +static int __init lpptest_init (void) +{ + if (register_chrdev(LPPTEST_CHAR_MAJOR, LPPTEST_DEVICE_NAME, &lpptest_dev_fops)) + { + printk(KERN_NOTICE "Can't allocate major number %d for lpptest.\n", + LPPTEST_CHAR_MAJOR); + return -EAGAIN; + } + + if (request_irq (LPPTEST_IRQ, lpptest_irq, 0, "lpptest", dev_id)) { + printk (KERN_WARNING "lpptest: irq %d in use. Unload parport module!\n", LPPTEST_IRQ); + unregister_chrdev(LPPTEST_CHAR_MAJOR, LPPTEST_DEVICE_NAME); + return -EAGAIN; + } + irq_desc[LPPTEST_IRQ].status |= IRQ_NODELAY; + irq_desc[LPPTEST_IRQ].action->flags |= SA_NODELAY | SA_INTERRUPT; + + INIT_PORT(); + ENABLE_IRQ(); + + return 0; +} +module_init (lpptest_init); + +static void __exit lpptest_exit (void) +{ + DISABLE_IRQ(); + + free_irq(LPPTEST_IRQ, dev_id); + unregister_chrdev(LPPTEST_CHAR_MAJOR, LPPTEST_DEVICE_NAME); +} +module_exit (lpptest_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("lpp test module"); + Index: linux.prev/drivers/char/random.c =================================================================== --- linux.prev.orig/drivers/char/random.c +++ linux.prev/drivers/char/random.c @@ -417,7 +417,7 @@ static struct entropy_store input_pool = .poolinfo = &poolinfo_table[0], .name = "input", .limit = 1, - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(input_pool.lock), .pool = input_pool_data }; @@ -426,7 +426,7 @@ static struct entropy_store blocking_poo .name = "blocking", .limit = 1, .pull = &input_pool, - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(blocking_pool.lock), .pool = blocking_pool_data }; @@ -434,7 +434,7 @@ static struct entropy_store nonblocking_ .poolinfo = &poolinfo_table[1], .name = "nonblocking", .pull = &input_pool, - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(nonblocking_pool.lock), .pool = nonblocking_pool_data }; @@ -581,8 +581,11 @@ static void add_timer_randomness(struct preempt_disable(); /* if over the trickle threshold, use only 1 in 4096 samples */ if (input_pool.entropy_count > trickle_thresh && - (__get_cpu_var(trickle_count)++ & 0xfff)) - goto out; + (__get_cpu_var(trickle_count)++ & 0xfff)) { + preempt_enable(); + return; + } + preempt_enable(); sample.jiffies = jiffies; sample.cycles = get_cycles(); @@ -627,9 +630,6 @@ static void add_timer_randomness(struct if(input_pool.entropy_count >= random_read_wakeup_thresh) wake_up_interruptible(&random_read_wait); - -out: - preempt_enable(); } extern void add_input_randomness(unsigned int type, unsigned int code, Index: linux.prev/drivers/char/rtc.c =================================================================== --- linux.prev.orig/drivers/char/rtc.c +++ linux.prev/drivers/char/rtc.c @@ -84,10 +84,36 @@ #include #include +#ifdef CONFIG_MIPS +# include +#endif + #if defined(__i386__) #include #endif +#ifdef CONFIG_RTC_HISTOGRAM + +static cycles_t last_interrupt_time; + +#include + +#define CPU_MHZ (cpu_khz / 1000) + +#define HISTSIZE 10000 +static int histogram[HISTSIZE]; + +static int rtc_state; + +enum rtc_states { + S_STARTUP, /* First round - let the application start */ + S_IDLE, /* Waiting for an interrupt */ + S_WAITING_FOR_READ, /* Signal delivered. waiting for rtc_read() */ + S_READ_MISSED, /* Signal delivered, read() deadline missed */ +}; + +#endif + #ifdef __sparc__ #include #include @@ -149,22 +175,8 @@ static void get_rtc_alm_time (struct rtc #ifdef RTC_IRQ static void rtc_dropped_irq(unsigned long data); -static void set_rtc_irq_bit_locked(unsigned char bit); -static void mask_rtc_irq_bit_locked(unsigned char bit); - -static inline void set_rtc_irq_bit(unsigned char bit) -{ - spin_lock_irq(&rtc_lock); - set_rtc_irq_bit_locked(bit); - spin_unlock_irq(&rtc_lock); -} - -static void mask_rtc_irq_bit(unsigned char bit) -{ - spin_lock_irq(&rtc_lock); - mask_rtc_irq_bit_locked(bit); - spin_unlock_irq(&rtc_lock); -} +static void set_rtc_irq_bit(unsigned char bit); +static void mask_rtc_irq_bit(unsigned char bit); #endif static int rtc_proc_open(struct inode *inode, struct file *file); @@ -193,6 +205,7 @@ static unsigned long rtc_max_user_freq = * rtc_task_lock nests inside rtc_lock. */ static DEFINE_SPINLOCK(rtc_task_lock); +static DEFINE_SPINLOCK(rtc_timer_lock); static rtc_task_t *rtc_callback = NULL; #endif @@ -219,7 +232,146 @@ static inline unsigned char rtc_is_updat return uip; } +#ifndef RTC_IRQ +# undef CONFIG_RTC_HISTOGRAM +#endif + +static inline void rtc_open_event(void) +{ +#ifdef CONFIG_RTC_HISTOGRAM + int i; + + last_interrupt_time = 0; + rtc_state = S_STARTUP; + rtc_irq_data = 0; + + for (i = 0; i < HISTSIZE; i++) + histogram[i] = 0; +#endif +} + +static inline void rtc_wake_event(void) +{ +#ifndef CONFIG_RTC_HISTOGRAM + kill_fasync (&rtc_async_queue, SIGIO, POLL_IN); +#else + if (!(rtc_status & RTC_IS_OPEN)) + return; + + switch (rtc_state) { + /* Startup */ + case S_STARTUP: + kill_fasync (&rtc_async_queue, SIGIO, POLL_IN); + break; + /* Waiting for an interrupt */ + case S_IDLE: + kill_fasync (&rtc_async_queue, SIGIO, POLL_IN); + last_interrupt_time = get_cycles(); + rtc_state = S_WAITING_FOR_READ; + break; + + /* Signal has been delivered. waiting for rtc_read() */ + case S_WAITING_FOR_READ: + /* + * Well foo. The usermode application didn't + * schedule and read in time. + */ + last_interrupt_time = get_cycles(); + rtc_state = S_READ_MISSED; + printk("Read missed before next interrupt\n"); + break; + /* Signal has been delivered, read() deadline was missed */ + case S_READ_MISSED: + /* + * Not much we can do here. We're waiting for the usermode + * application to read the rtc + */ + last_interrupt_time = get_cycles(); + break; + } +#endif +} + +static inline void rtc_read_event(void) +{ +#ifdef CONFIG_RTC_HISTOGRAM + cycles_t now = get_cycles(); + + switch (rtc_state) { + /* Startup */ + case S_STARTUP: + rtc_state = S_IDLE; + break; + + /* Waiting for an interrupt */ + case S_IDLE: + printk("bug in rtc_read(): called in state S_IDLE!\n"); + break; + case S_WAITING_FOR_READ: /* + * Signal has been delivered. + * waiting for rtc_read() + */ + /* + * Well done + */ + case S_READ_MISSED: /* + * Signal has been delivered, read() + * deadline was missed + */ + /* + * So, you finally got here. + */ + if (!last_interrupt_time) + printk("bug in rtc_read(): last_interrupt_time = 0\n"); + rtc_state = S_IDLE; + { + cycles_t latency = now - last_interrupt_time; + unsigned long delta; /* Microseconds */ + + delta = latency; + delta /= CPU_MHZ; + + if (delta > 1000 * 1000) { + printk("rtc: eek\n"); + } else { + unsigned long slot = delta; + if (slot >= HISTSIZE) + slot = HISTSIZE - 1; + histogram[slot]++; + if (delta > 2000) + printk("wow! That was a " + "%ld millisec bump\n", + delta / 1000); + } + } + rtc_state = S_IDLE; + break; + } +#endif +} + +static inline void rtc_close_event(void) +{ +#ifdef CONFIG_RTC_HISTOGRAM + int i = 0; + unsigned long total = 0; + + for (i = 0; i < HISTSIZE; i++) + total += histogram[i]; + if (!total) + return; + + printk("\nrtc latency histogram of {%s/%d, %lu samples}:\n", + current->comm, current->pid, total); + for (i = 0; i < HISTSIZE; i++) { + if (histogram[i]) + printk("%d %d\n", i, histogram[i]); + } +#endif +} + #ifdef RTC_IRQ + /* * A very tiny interrupt handler. It runs with SA_INTERRUPT set, * but there is possibility of conflicting with the set_rtc_mmss() @@ -232,6 +384,8 @@ static inline unsigned char rtc_is_updat irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) { + int mod; + /* * Can be an alarm interrupt, update complete interrupt, * or a periodic interrupt. We store the status in the @@ -239,7 +393,8 @@ irqreturn_t rtc_interrupt(int irq, void * the last read in the remainder of rtc_irq_data. */ - spin_lock (&rtc_lock); + spin_lock(&rtc_timer_lock); + spin_lock(&rtc_lock); rtc_irq_data += 0x100; rtc_irq_data &= ~0xff; if (is_hpet_enabled()) { @@ -253,19 +408,23 @@ irqreturn_t rtc_interrupt(int irq, void rtc_irq_data |= (CMOS_READ(RTC_INTR_FLAGS) & 0xF0); } + mod = 0; if (rtc_status & RTC_TIMER_ON) - mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100); + mod = 1; - spin_unlock (&rtc_lock); + spin_unlock(&rtc_lock); + if (mod) + mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100); + spin_unlock(&rtc_timer_lock); /* Now do the rest of the actions */ spin_lock(&rtc_task_lock); if (rtc_callback) rtc_callback->func(rtc_callback->private_data); spin_unlock(&rtc_task_lock); - wake_up_interruptible(&rtc_wait); - kill_fasync (&rtc_async_queue, SIGIO, POLL_IN); + rtc_wake_event(); + wake_up_interruptible(&rtc_wait); return IRQ_HANDLED; } @@ -350,10 +509,10 @@ static ssize_t rtc_read(struct file *fil __set_current_state(TASK_INTERRUPTIBLE); - spin_lock_irq (&rtc_lock); + spin_lock_irq(&rtc_lock); data = rtc_irq_data; rtc_irq_data = 0; - spin_unlock_irq (&rtc_lock); + spin_unlock_irq(&rtc_lock); if (data != 0) break; @@ -369,6 +528,8 @@ static ssize_t rtc_read(struct file *fil schedule(); } while (1); + rtc_read_event(); + if (count < sizeof(unsigned long)) retval = put_user(data, (unsigned int __user *)buf) ?: sizeof(int); else @@ -383,7 +544,7 @@ static ssize_t rtc_read(struct file *fil static int rtc_do_ioctl(unsigned int cmd, unsigned long arg, int kernel) { - struct rtc_time wtime; + struct rtc_time wtime; #ifdef RTC_IRQ if (rtc_has_irq == 0) { @@ -415,19 +576,24 @@ static int rtc_do_ioctl(unsigned int cmd } case RTC_PIE_OFF: /* Mask periodic int. enab. bit */ { - unsigned long flags; /* can be called from isr via rtc_control() */ - spin_lock_irqsave (&rtc_lock, flags); - mask_rtc_irq_bit_locked(RTC_PIE); + int del = 0; + mask_rtc_irq_bit(RTC_PIE); + spin_lock_irq(&rtc_timer_lock); + spin_lock(&rtc_lock); if (rtc_status & RTC_TIMER_ON) { + del = 1; rtc_status &= ~RTC_TIMER_ON; - del_timer(&rtc_irq_timer); } - spin_unlock_irqrestore (&rtc_lock, flags); + spin_unlock(&rtc_lock); + if (del) + del_timer(&rtc_irq_timer); + spin_unlock_irq(&rtc_timer_lock); return 0; } case RTC_PIE_ON: /* Allow periodic ints */ { - unsigned long flags; /* can be called from isr via rtc_control() */ + int add = 0; + /* * We don't really want Joe User enabling more * than 64Hz of interrupts on a multi-user machine. @@ -436,14 +602,18 @@ static int rtc_do_ioctl(unsigned int cmd (!capable(CAP_SYS_RESOURCE))) return -EACCES; - spin_lock_irqsave (&rtc_lock, flags); + spin_lock_irq(&rtc_timer_lock); + spin_lock(&rtc_lock); if (!(rtc_status & RTC_TIMER_ON)) { + add = 1; rtc_irq_timer.expires = jiffies + HZ/rtc_freq + 2*HZ/100; - add_timer(&rtc_irq_timer); rtc_status |= RTC_TIMER_ON; } - set_rtc_irq_bit_locked(RTC_PIE); - spin_unlock_irqrestore (&rtc_lock, flags); + spin_unlock(&rtc_lock); + if (add) + add_timer(&rtc_irq_timer); + spin_unlock_irq(&rtc_timer_lock); + set_rtc_irq_bit(RTC_PIE); return 0; } case RTC_UIE_OFF: /* Mask ints from RTC updates. */ @@ -599,6 +769,11 @@ static int rtc_do_ioctl(unsigned int cmd save_freq_select = CMOS_READ(RTC_FREQ_SELECT); CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); + /* + * Make CMOS date writes nonpreemptible even on PREEMPT_RT. + * There's a limit to everything! =B-) + */ + preempt_disable(); #ifdef CONFIG_MACH_DECSTATION CMOS_WRITE(real_yrs, RTC_DEC_YEAR); #endif @@ -608,6 +783,7 @@ static int rtc_do_ioctl(unsigned int cmd CMOS_WRITE(hrs, RTC_HOURS); CMOS_WRITE(min, RTC_MINUTES); CMOS_WRITE(sec, RTC_SECONDS); + preempt_enable(); CMOS_WRITE(save_control, RTC_CONTROL); CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); @@ -624,7 +800,6 @@ static int rtc_do_ioctl(unsigned int cmd { int tmp = 0; unsigned char val; - unsigned long flags; /* can be called from isr via rtc_control() */ /* * The max we can do is 8192Hz. @@ -647,9 +822,9 @@ static int rtc_do_ioctl(unsigned int cmd if (arg != (1<f_flags & FASYNC) { rtc_fasync (-1, file, 0); @@ -757,10 +939,11 @@ static int rtc_release(struct inode *ino no_irq: #endif - spin_lock_irq (&rtc_lock); + spin_lock_irq(&rtc_lock); rtc_irq_data = 0; rtc_status &= ~RTC_IS_OPEN; - spin_unlock_irq (&rtc_lock); + spin_unlock_irq(&rtc_lock); + rtc_close_event(); return 0; } @@ -775,9 +958,9 @@ static unsigned int rtc_poll(struct file poll_wait(file, &rtc_wait, wait); - spin_lock_irq (&rtc_lock); + spin_lock_irq(&rtc_lock); l = rtc_irq_data; - spin_unlock_irq (&rtc_lock); + spin_unlock_irq(&rtc_lock); if (l != 0) return POLLIN | POLLRDNORM; @@ -825,12 +1008,15 @@ int rtc_unregister(rtc_task_t *task) return -EIO; #else unsigned char tmp; + int del; - spin_lock_irq(&rtc_lock); + spin_lock_irq(&rtc_timer_lock); + spin_lock(&rtc_lock); spin_lock(&rtc_task_lock); if (rtc_callback != task) { spin_unlock(&rtc_task_lock); - spin_unlock_irq(&rtc_lock); + spin_unlock(&rtc_lock); + spin_unlock_irq(&rtc_timer_lock); return -ENXIO; } rtc_callback = NULL; @@ -844,13 +1030,17 @@ int rtc_unregister(rtc_task_t *task) CMOS_WRITE(tmp, RTC_CONTROL); CMOS_READ(RTC_INTR_FLAGS); } + del = 0; if (rtc_status & RTC_TIMER_ON) { rtc_status &= ~RTC_TIMER_ON; - del_timer(&rtc_irq_timer); + del = 1; } rtc_status &= ~RTC_IS_OPEN; spin_unlock(&rtc_task_lock); - spin_unlock_irq(&rtc_lock); + spin_unlock(&rtc_lock); + if (del) + del_timer(&rtc_irq_timer); + spin_unlock_irq(&rtc_timer_lock); return 0; #endif } @@ -860,15 +1050,12 @@ int rtc_control(rtc_task_t *task, unsign #ifndef RTC_IRQ return -EIO; #else - unsigned long flags; - if (cmd != RTC_PIE_ON && cmd != RTC_PIE_OFF && cmd != RTC_IRQP_SET) - return -EINVAL; - spin_lock_irqsave(&rtc_task_lock, flags); + spin_lock_irq(&rtc_task_lock); if (rtc_callback != task) { - spin_unlock_irqrestore(&rtc_task_lock, flags); + spin_unlock_irq(&rtc_task_lock); return -ENXIO; } - spin_unlock_irqrestore(&rtc_task_lock, flags); + spin_unlock_irq(&rtc_task_lock); return rtc_do_ioctl(cmd, arg, 1); #endif } @@ -1111,17 +1298,21 @@ module_exit(rtc_exit); static void rtc_dropped_irq(unsigned long data) { unsigned long freq; + int mod; - spin_lock_irq (&rtc_lock); + spin_lock_irq(&rtc_timer_lock); + spin_lock(&rtc_lock); if (hpet_rtc_dropped_irq()) { - spin_unlock_irq(&rtc_lock); + spin_unlock(&rtc_lock); + spin_unlock_irq(&rtc_timer_lock); return; } /* Just in case someone disabled the timer from behind our back... */ + mod = 0; if (rtc_status & RTC_TIMER_ON) - mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100); + mod = 1; rtc_irq_data += ((rtc_freq/HZ)<<8); rtc_irq_data &= ~0xff; @@ -1129,7 +1320,10 @@ static void rtc_dropped_irq(unsigned lon freq = rtc_freq; - spin_unlock_irq(&rtc_lock); + spin_unlock(&rtc_lock); + if (mod) + mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100); + spin_unlock_irq(&rtc_timer_lock); printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n", freq); @@ -1325,32 +1519,40 @@ static void get_rtc_alm_time(struct rtc_ * meddles with the interrupt enable/disable bits. */ -static void mask_rtc_irq_bit_locked(unsigned char bit) +static void mask_rtc_irq_bit(unsigned char bit) { unsigned char val; - if (hpet_mask_rtc_irq_bit(bit)) + spin_lock_irq(&rtc_lock); + if (hpet_mask_rtc_irq_bit(bit)) { + spin_unlock_irq(&rtc_lock); return; + } val = CMOS_READ(RTC_CONTROL); val &= ~bit; CMOS_WRITE(val, RTC_CONTROL); CMOS_READ(RTC_INTR_FLAGS); rtc_irq_data = 0; + spin_unlock_irq(&rtc_lock); } -static void set_rtc_irq_bit_locked(unsigned char bit) +static void set_rtc_irq_bit(unsigned char bit) { unsigned char val; - if (hpet_set_rtc_irq_bit(bit)) + spin_lock_irq(&rtc_lock); + if (hpet_set_rtc_irq_bit(bit)) { + spin_unlock_irq(&rtc_lock); return; + } val = CMOS_READ(RTC_CONTROL); val |= bit; CMOS_WRITE(val, RTC_CONTROL); CMOS_READ(RTC_INTR_FLAGS); rtc_irq_data = 0; + spin_unlock_irq(&rtc_lock); } #endif Index: linux.prev/drivers/char/s3c2410-rtc.c =================================================================== --- linux.prev.orig/drivers/char/s3c2410-rtc.c +++ linux.prev/drivers/char/s3c2410-rtc.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include Index: linux.prev/drivers/char/specialix.c =================================================================== --- linux.prev.orig/drivers/char/specialix.c +++ linux.prev/drivers/char/specialix.c @@ -2488,7 +2488,7 @@ static int __init specialix_init(void) #endif for (i = 0; i < SX_NBOARD; i++) - sx_board[i].lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&sx_board[i].lock); if (sx_init_drivers()) { func_exit(); Index: linux.prev/drivers/char/sx.c =================================================================== --- linux.prev.orig/drivers/char/sx.c +++ linux.prev/drivers/char/sx.c @@ -2321,7 +2321,7 @@ static int sx_init_portstructs (int nboa #ifdef NEW_WRITE_LOCKING port->gs.port_write_sem = MUTEX; #endif - port->gs.driver_lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&port->gs.driver_lock); /* * Initializing wait queue */ Index: linux.prev/drivers/char/sysrq.c =================================================================== --- linux.prev.orig/drivers/char/sysrq.c +++ linux.prev/drivers/char/sysrq.c @@ -114,7 +114,7 @@ static struct sysrq_key_op sysrq_crashdu static void sysrq_handle_reboot(int key, struct pt_regs *pt_regs, struct tty_struct *tty) { - local_irq_enable(); + raw_local_irq_enable(); emergency_restart(); } @@ -169,6 +169,38 @@ static struct sysrq_key_op sysrq_showreg .enable_mask = SYSRQ_ENABLE_DUMP, }; +#ifdef CONFIG_DEBUG_DEADLOCKS + +static void sysrq_handle_showlocks(int key, struct pt_regs *pt_regs, + struct tty_struct *tty) +{ + show_all_locks(); +} + +static struct sysrq_key_op sysrq_showlocks_op = { + .handler = sysrq_handle_showlocks, + .help_msg = "show-all-locks(D)", + .action_msg = "Show Locks Held", +}; + +#endif + +#if defined(__i386__) + +static void sysrq_handle_showallregs(int key, struct pt_regs *pt_regs, + struct tty_struct *tty) +{ + nmi_show_all_regs(); +} + +static struct sysrq_key_op sysrq_showallregs_op = { + .handler = sysrq_handle_showallregs, + .help_msg = "showalLcpupc", + .action_msg = "Show Regs On All CPUs", +}; + +#endif + static void sysrq_handle_showstate(int key, struct pt_regs *pt_regs, struct tty_struct *tty) @@ -294,7 +326,11 @@ static struct sysrq_key_op *sysrq_key_ta #else /* c */ NULL, #endif +#ifdef CONFIG_DEBUG_DEADLOCKS +/* d */ &sysrq_showlocks_op, +#else /* d */ NULL, +#endif /* e */ &sysrq_term_op, /* f */ &sysrq_moom_op, /* g */ NULL, @@ -306,7 +342,11 @@ static struct sysrq_key_op *sysrq_key_ta #else /* k */ NULL, #endif +#if defined(__i386__) +/* l */ &sysrq_showallregs_op, +#else /* l */ NULL, +#endif /* m */ &sysrq_showmem_op, /* n */ &sysrq_unrt_op, /* o */ NULL, /* This will often be registered Index: linux.prev/drivers/char/tty_io.c =================================================================== --- linux.prev.orig/drivers/char/tty_io.c +++ linux.prev/drivers/char/tty_io.c @@ -224,6 +224,7 @@ static int check_tty_count(struct tty_st printk(KERN_WARNING "Warning: dev (%s) tty->count(%d) " "!= #fd's(%d) in %s\n", tty->name, tty->count, count, routine); + dump_stack(); return count; } #endif @@ -867,8 +868,8 @@ static void do_tty_hangup(void *data) p->signal->tty = NULL; if (!p->signal->leader) continue; - send_group_sig_info(SIGHUP, SEND_SIG_PRIV, p); - send_group_sig_info(SIGCONT, SEND_SIG_PRIV, p); + group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p); + group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p); if (tty->pgrp > 0) p->signal->tty_old_pgrp = tty->pgrp; } while_each_task_pid(tty->session, PIDTYPE_SID, p); Index: linux.prev/drivers/char/watchdog/cpu5wdt.c =================================================================== --- linux.prev.orig/drivers/char/watchdog/cpu5wdt.c +++ linux.prev/drivers/char/watchdog/cpu5wdt.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -57,7 +58,7 @@ static int ticks = 10000; /* some device data */ static struct { - struct semaphore stop; + struct completion stop; volatile int running; struct timer_list timer; volatile int queue; @@ -85,7 +86,7 @@ static void cpu5wdt_trigger(unsigned lon } else { /* ticks doesn't matter anyway */ - up(&cpu5wdt_device.stop); + complete(&cpu5wdt_device.stop); } } @@ -239,7 +240,7 @@ static int __devinit cpu5wdt_init(void) if ( !val ) printk(KERN_INFO PFX "sorry, was my fault\n"); - init_MUTEX_LOCKED(&cpu5wdt_device.stop); + init_completion(&cpu5wdt_device.stop); cpu5wdt_device.queue = 0; clear_bit(0, &cpu5wdt_device.inuse); @@ -269,7 +270,7 @@ static void __devexit cpu5wdt_exit(void) { if ( cpu5wdt_device.queue ) { cpu5wdt_device.queue = 0; - down(&cpu5wdt_device.stop); + wait_for_completion(&cpu5wdt_device.stop); } misc_deregister(&cpu5wdt_misc); Index: linux.prev/drivers/clocksource/Makefile =================================================================== --- /dev/null +++ linux.prev/drivers/clocksource/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_X86_CYCLONE_TIMER) += cyclone.o +obj-$(CONFIG_ACPI) += acpi_pm.o Index: linux.prev/drivers/clocksource/acpi_pm.c =================================================================== --- /dev/null +++ linux.prev/drivers/clocksource/acpi_pm.c @@ -0,0 +1,123 @@ +/* + * linux/drivers/clocksource/acpi_pm.c + * + * This file contains the ACPI PM based clocksource. + * + * This code was largely moved from the i386 timer_pm.c file + * which was (C) Dominik Brodowski 2003 + * and contained the following comments: + * + * Driver to use the Power Management Timer (PMTMR) available in some + * southbridges as primary timing source for the Linux kernel. + * + * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c, + * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4. + * + * This file is licensed under the GPL v2. + */ + +#include +#include +#include +#include + +/* Number of PMTMR ticks expected during calibration run */ +#define PMTMR_TICKS_PER_SEC 3579545 + +#if (defined(CONFIG_X86) && (!defined(CONFIG_X86_64))) +# include "mach_timer.h" +# define PMTMR_EXPECTED_RATE ((PMTMR_TICKS_PER_SEC*CALIBRATE_TIME_MSEC)/1000) +#endif + +/* + * The I/O port the PMTMR resides at. + * The location is detected during setup_arch(), + * in arch/i386/acpi/boot.c + */ +extern u32 acpi_pmtmr_ioport; +extern int acpi_pmtmr_buggy; + +#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ + +static inline u32 read_pmtmr(void) +{ + /* mask the output to 24 bits */ + return inl(acpi_pmtmr_ioport) & ACPI_PM_MASK; +} + +static cycle_t acpi_pm_read_verified(void) +{ + u32 v1 = 0, v2 = 0, v3 = 0; + + /* + * It has been reported that because of various broken + * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM clock + * source is not latched, so you must read it multiple + * times to ensure a safe value is read: + */ + do { + v1 = read_pmtmr(); + v2 = read_pmtmr(); + v3 = read_pmtmr(); + } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) + || (v3 > v1 && v3 < v2)); + + return (cycle_t)v2; +} + +static cycle_t acpi_pm_read(void) +{ + return (cycle_t)read_pmtmr(); +} + +struct clocksource clocksource_acpi_pm = { + .name = "acpi_pm", + .rating = 200, + .read = acpi_pm_read, + .mask = (cycle_t)ACPI_PM_MASK, + .mult = 0, /*to be caluclated*/ + .shift = 22, + .is_continuous = 1, +}; + +static int __init init_acpi_pm_clocksource(void) +{ + u32 value1, value2; + unsigned int i; + + if (!acpi_pmtmr_ioport) + return -ENODEV; + + clocksource_acpi_pm.mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, + clocksource_acpi_pm.shift); + + /* "verify" this timing source: */ + value1 = read_pmtmr(); + for (i = 0; i < 10000; i++) { + value2 = read_pmtmr(); + if (value2 == value1) + continue; + if (value2 > value1) + goto pm_good; + if ((value2 < value1) && ((value2) < 0xFFF)) + goto pm_good; + printk(KERN_INFO "PM-Timer had inconsistent results: 0x%#x, 0x%#x - aborting.\n", value1, value2); + return -EINVAL; + } + printk(KERN_INFO "PM-Timer had no reasonable result: 0x%#x - aborting.\n", value1); + return -ENODEV; + +pm_good: + + /* check to see if pmtmr is known buggy: */ + if (acpi_pmtmr_buggy) { + clocksource_acpi_pm.read = acpi_pm_read_verified; + clocksource_acpi_pm.rating = 110; + } + + register_clocksource(&clocksource_acpi_pm); + + return 0; +} + +module_init(init_acpi_pm_clocksource); Index: linux.prev/drivers/clocksource/cyclone.c =================================================================== --- /dev/null +++ linux.prev/drivers/clocksource/cyclone.c @@ -0,0 +1,121 @@ +#include +#include +#include +#include +#include + +#include +#include + +#include "mach_timer.h" + +#define CYCLONE_CBAR_ADDR 0xFEB00CD0 /* base address ptr */ +#define CYCLONE_PMCC_OFFSET 0x51A0 /* offset to control register */ +#define CYCLONE_MPCS_OFFSET 0x51A8 /* offset to select register */ +#define CYCLONE_MPMC_OFFSET 0x51D0 /* offset to count register */ +#define CYCLONE_TIMER_FREQ 99780000 /* 100Mhz, but not really */ +#define CYCLONE_TIMER_MASK 0xFFFFFFFF /* 32 bit mask */ + +int use_cyclone = 0; +static void __iomem *cyclone_ptr; + +static cycle_t read_cyclone(void) +{ + return (cycle_t)readl(cyclone_ptr); +} + +struct clocksource clocksource_cyclone = { + .name = "cyclone", + .rating = 250, + .read = read_cyclone, + .mask = (cycle_t)CYCLONE_TIMER_MASK, + .mult = 10, + .shift = 0, + .is_continuous = 1, +}; + +static int __init init_cyclone_clocksource(void) +{ + unsigned long base; /* saved value from CBAR */ + unsigned long offset; + u32 __iomem* volatile cyclone_timer; /* Cyclone MPMC0 register */ + u32 __iomem* reg; + int i; + + /* make sure we're on a summit box: */ + if (!use_cyclone) + return -ENODEV; + + printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n"); + + /* find base address: */ + offset = CYCLONE_CBAR_ADDR; + reg = ioremap_nocache(offset, sizeof(reg)); + if (!reg) { + printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n"); + return -ENODEV; + } + /* even on 64bit systems, this is only 32bits: */ + base = readl(reg); + if (!base) { + printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n"); + return -ENODEV; + } + iounmap(reg); + + /* setup PMCC: */ + offset = base + CYCLONE_PMCC_OFFSET; + reg = ioremap_nocache(offset, sizeof(reg)); + if (!reg) { + printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n"); + return -ENODEV; + } + writel(0x00000001,reg); + iounmap(reg); + + /* setup MPCS: */ + offset = base + CYCLONE_MPCS_OFFSET; + reg = ioremap_nocache(offset, sizeof(reg)); + if (!reg) { + printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n"); + return -ENODEV; + } + writel(0x00000001,reg); + iounmap(reg); + + /* map in cyclone_timer: */ + offset = base + CYCLONE_MPMC_OFFSET; + cyclone_timer = ioremap_nocache(offset, sizeof(u64)); + if (!cyclone_timer) { + printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n"); + return -ENODEV; + } + + /* quick test to make sure its ticking: */ + for (i = 0; i < 3; i++){ + u32 old = readl(cyclone_timer); + int stall = 100; + + while (stall--) + barrier(); + + if (readl(cyclone_timer) == old) { + printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n"); + iounmap(cyclone_timer); + cyclone_timer = NULL; + return -ENODEV; + } + } + cyclone_ptr = cyclone_timer; + + /* sort out mult/shift values: */ + clocksource_cyclone.shift = 22; + clocksource_cyclone.mult = clocksource_hz2mult(CYCLONE_TIMER_FREQ, + clocksource_cyclone.shift); + + register_clocksource(&clocksource_cyclone); + + return 0; +} + +module_init(init_cyclone_clocksource); Index: linux.prev/drivers/connector/cn_proc.c =================================================================== --- linux.prev.orig/drivers/connector/cn_proc.c +++ linux.prev/drivers/connector/cn_proc.c @@ -24,6 +24,7 @@ #include #include +#include #include #include @@ -56,7 +57,7 @@ void proc_fork_connector(struct task_str msg = (struct cn_msg*)buffer; ev = (struct proc_event*)msg->data; get_seq(&msg->seq, &ev->cpu); - getnstimestamp(&ev->timestamp); + ktime_get_ts(&ev->timestamp); /* get high res monotonic timestamp */ ev->what = PROC_EVENT_FORK; ev->event_data.fork.parent_pid = task->real_parent->pid; ev->event_data.fork.parent_tgid = task->real_parent->tgid; @@ -82,7 +83,7 @@ void proc_exec_connector(struct task_str msg = (struct cn_msg*)buffer; ev = (struct proc_event*)msg->data; get_seq(&msg->seq, &ev->cpu); - getnstimestamp(&ev->timestamp); + ktime_get_ts(&ev->timestamp); ev->what = PROC_EVENT_EXEC; ev->event_data.exec.process_pid = task->pid; ev->event_data.exec.process_tgid = task->tgid; @@ -116,7 +117,7 @@ void proc_id_connector(struct task_struc } else return; get_seq(&msg->seq, &ev->cpu); - getnstimestamp(&ev->timestamp); + ktime_get_ts(&ev->timestamp); memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ @@ -136,7 +137,7 @@ void proc_exit_connector(struct task_str msg = (struct cn_msg*)buffer; ev = (struct proc_event*)msg->data; get_seq(&msg->seq, &ev->cpu); - getnstimestamp(&ev->timestamp); + ktime_get_ts(&ev->timestamp); ev->what = PROC_EVENT_EXIT; ev->event_data.exit.process_pid = task->pid; ev->event_data.exit.process_tgid = task->tgid; @@ -169,7 +170,7 @@ static void cn_proc_ack(int err, int rcv msg = (struct cn_msg*)buffer; ev = (struct proc_event*)msg->data; msg->seq = rcvd_seq; - getnstimestamp(&ev->timestamp); + ktime_get_ts(&ev->timestamp); ev->cpu = -1; ev->what = PROC_EVENT_NONE; ev->event_data.ack.err = err; Index: linux.prev/drivers/cpufreq/cpufreq.c =================================================================== --- linux.prev.orig/drivers/cpufreq/cpufreq.c +++ linux.prev/drivers/cpufreq/cpufreq.c @@ -601,7 +601,8 @@ static int cpufreq_add_dev (struct sys_d policy->cpu = cpu; policy->cpus = cpumask_of_cpu(cpu); - init_MUTEX_LOCKED(&policy->lock); + init_MUTEX(&policy->lock); + down(&policy->lock); init_completion(&policy->kobj_unregister); INIT_WORK(&policy->update, handle_update, (void *)(long)cpu); @@ -610,6 +611,7 @@ static int cpufreq_add_dev (struct sys_d */ ret = cpufreq_driver->init(policy); if (ret) { + up(&policy->lock); dprintk("initialization failed\n"); goto err_out; } @@ -622,8 +624,10 @@ static int cpufreq_add_dev (struct sys_d strlcpy(policy->kobj.name, "cpufreq", KOBJ_NAME_LEN); ret = kobject_register(&policy->kobj); - if (ret) + if (ret) { + up(&policy->lock); goto err_out_driver_exit; + } /* set up files for this cpu device */ drv_attr = cpufreq_driver->attr; Index: linux.prev/drivers/i2c/busses/i2c-pxa.c =================================================================== --- linux.prev.orig/drivers/i2c/busses/i2c-pxa.c +++ linux.prev/drivers/i2c/busses/i2c-pxa.c @@ -926,7 +926,7 @@ static struct i2c_algorithm i2c_pxa_algo }; static struct pxa_i2c i2c_pxa = { - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(i2c_pxa.lock), .wait = __WAIT_QUEUE_HEAD_INITIALIZER(i2c_pxa.wait), .adap = { .owner = THIS_MODULE, Index: linux.prev/drivers/i2c/busses/i2c-s3c2410.c =================================================================== --- linux.prev.orig/drivers/i2c/busses/i2c-s3c2410.c +++ linux.prev/drivers/i2c/busses/i2c-s3c2410.c @@ -573,7 +573,7 @@ static struct i2c_algorithm s3c24xx_i2c_ }; static struct s3c24xx_i2c s3c24xx_i2c = { - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(s3c24xx_i2c.lock), .wait = __WAIT_QUEUE_HEAD_INITIALIZER(s3c24xx_i2c.wait), .adap = { .name = "s3c2410-i2c", Index: linux.prev/drivers/i2c/chips/tps65010.c =================================================================== --- linux.prev.orig/drivers/i2c/chips/tps65010.c +++ linux.prev/drivers/i2c/chips/tps65010.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -33,7 +34,6 @@ #include #include -#include #include #include Index: linux.prev/drivers/ide/ide-floppy.c =================================================================== --- linux.prev.orig/drivers/ide/ide-floppy.c +++ linux.prev/drivers/ide/ide-floppy.c @@ -838,7 +838,7 @@ static ide_startstop_t idefloppy_pc_intr "transferred\n", pc->actually_transferred); clear_bit(PC_DMA_IN_PROGRESS, &pc->flags); - local_irq_enable(); + local_irq_enable_nort(); if (status.b.check || test_bit(PC_DMA_ERROR, &pc->flags)) { /* Error detected */ @@ -1670,9 +1670,9 @@ static int idefloppy_get_format_progress atapi_status_t status; unsigned long flags; - local_irq_save(flags); + local_irq_save_nort(flags); status.all = HWIF(drive)->INB(IDE_STATUS_REG); - local_irq_restore(flags); + local_irq_restore_nort(flags); progress_indication = !status.b.dsc ? 0 : 0x10000; } Index: linux.prev/drivers/ide/ide-io.c =================================================================== --- linux.prev.orig/drivers/ide/ide-io.c +++ linux.prev/drivers/ide/ide-io.c @@ -636,7 +636,7 @@ static ide_startstop_t drive_cmd_intr (i u8 stat = hwif->INB(IDE_STATUS_REG); int retries = 10; - local_irq_enable(); + local_irq_enable_nort(); if ((stat & DRQ_STAT) && args && args[3]) { u8 io_32bit = drive->io_32bit; drive->io_32bit = 0; @@ -1107,7 +1107,7 @@ static void ide_do_request (ide_hwgroup_ ide_get_lock(ide_intr, hwgroup); /* caller must own ide_lock */ - BUG_ON(!irqs_disabled()); + BUG_ON_NONRT(!irqs_disabled()); while (!hwgroup->busy) { hwgroup->busy = 1; @@ -1219,8 +1219,7 @@ static void ide_do_request (ide_hwgroup_ */ if (masked_irq != IDE_NO_IRQ && hwif->irq != masked_irq) disable_irq_nosync(hwif->irq); - spin_unlock(&ide_lock); - local_irq_enable(); + spin_unlock_irq(&ide_lock); /* allow other IRQs while we start this request */ startstop = start_request(drive, rq); spin_lock_irq(&ide_lock); @@ -1368,7 +1367,7 @@ void ide_timer_expiry (unsigned long dat #endif /* DISABLE_IRQ_NOSYNC */ /* local CPU only, * as if we were handling an interrupt */ - local_irq_disable(); + local_irq_disable_nort(); if (hwgroup->polling) { startstop = handler(drive); } else if (drive_is_ready(drive)) { @@ -1565,7 +1564,7 @@ irqreturn_t ide_intr (int irq, void *dev spin_unlock(&ide_lock); if (drive->unmask) - local_irq_enable(); + local_irq_enable_nort(); /* service this interrupt, may set handler for next interrupt */ startstop = handler(drive); spin_lock_irq(&ide_lock); Index: linux.prev/drivers/ide/ide-iops.c =================================================================== --- linux.prev.orig/drivers/ide/ide-iops.c +++ linux.prev/drivers/ide/ide-iops.c @@ -244,10 +244,10 @@ static void ata_input_data(ide_drive_t * if (io_32bit) { if (io_32bit & 2) { unsigned long flags; - local_irq_save(flags); + local_irq_save_nort(flags); ata_vlb_sync(drive, IDE_NSECTOR_REG); hwif->INSL(IDE_DATA_REG, buffer, wcount); - local_irq_restore(flags); + local_irq_restore_nort(flags); } else hwif->INSL(IDE_DATA_REG, buffer, wcount); } else { @@ -266,10 +266,10 @@ static void ata_output_data(ide_drive_t if (io_32bit) { if (io_32bit & 2) { unsigned long flags; - local_irq_save(flags); + local_irq_save_nort(flags); ata_vlb_sync(drive, IDE_NSECTOR_REG); hwif->OUTSL(IDE_DATA_REG, buffer, wcount); - local_irq_restore(flags); + local_irq_restore_nort(flags); } else hwif->OUTSL(IDE_DATA_REG, buffer, wcount); } else { @@ -564,12 +564,12 @@ int ide_wait_stat (ide_startstop_t *star if (!(stat & BUSY_STAT)) break; - local_irq_restore(flags); + local_irq_restore_nort(flags); *startstop = ide_error(drive, "status timeout", stat); return 1; } } - local_irq_restore(flags); + local_irq_restore_nort(flags); } /* * Allow status to settle, then read it again. @@ -727,17 +727,15 @@ int ide_driveid_update (ide_drive_t *dri printk("%s: CHECK for good STATUS\n", drive->name); return 0; } - local_irq_save(flags); - SELECT_MASK(drive, 0); id = kmalloc(SECTOR_WORDS*4, GFP_ATOMIC); - if (!id) { - local_irq_restore(flags); + if (!id) return 0; - } + local_irq_save_nort(flags); + SELECT_MASK(drive, 0); ata_input_data(drive, id, SECTOR_WORDS); (void) hwif->INB(IDE_STATUS_REG); /* clear drive IRQ */ - local_irq_enable(); - local_irq_restore(flags); + local_irq_enable_nort(); + local_irq_restore_nort(flags); ide_fix_driveid(id); if (id) { drive->id->dma_ultra = id->dma_ultra; @@ -817,7 +815,7 @@ int ide_config_drive_speed (ide_drive_t if (time_after(jiffies, timeout)) break; } - local_irq_restore(flags); + local_irq_restore_nort(flags); } /* @@ -1243,6 +1241,7 @@ int ide_wait_not_busy(ide_hwif_t *hwif, */ if (stat == 0xff) return -ENODEV; + touch_softlockup_watchdog(); } return -EBUSY; } Index: linux.prev/drivers/ide/ide-lib.c =================================================================== --- linux.prev.orig/drivers/ide/ide-lib.c +++ linux.prev/drivers/ide/ide-lib.c @@ -447,15 +447,16 @@ EXPORT_SYMBOL_GPL(ide_set_xfer_rate); static void ide_dump_opcode(ide_drive_t *drive) { + unsigned long flags; struct request *rq; u8 opcode = 0; int found = 0; - spin_lock(&ide_lock); + spin_lock_irqsave(&ide_lock, flags); rq = NULL; if (HWGROUP(drive)) rq = HWGROUP(drive)->rq; - spin_unlock(&ide_lock); + spin_unlock_irqrestore(&ide_lock, flags); if (!rq) return; if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK)) { @@ -483,10 +484,8 @@ static void ide_dump_opcode(ide_drive_t static u8 ide_dump_ata_status(ide_drive_t *drive, const char *msg, u8 stat) { ide_hwif_t *hwif = HWIF(drive); - unsigned long flags; u8 err = 0; - local_irq_set(flags); printk("%s: %s: status=0x%02x { ", drive->name, msg, stat); if (stat & BUSY_STAT) printk("Busy "); @@ -546,7 +545,7 @@ static u8 ide_dump_ata_status(ide_drive_ printk("\n"); } ide_dump_opcode(drive); - local_irq_restore(flags); + return err; } @@ -561,14 +560,12 @@ static u8 ide_dump_ata_status(ide_drive_ static u8 ide_dump_atapi_status(ide_drive_t *drive, const char *msg, u8 stat) { - unsigned long flags; - atapi_status_t status; atapi_error_t error; status.all = stat; error.all = 0; - local_irq_set(flags); + printk("%s: %s: status=0x%02x { ", drive->name, msg, stat); if (status.b.bsy) printk("Busy "); @@ -594,7 +591,7 @@ static u8 ide_dump_atapi_status(ide_driv printk("}\n"); } ide_dump_opcode(drive); - local_irq_restore(flags); + return error.all; } Index: linux.prev/drivers/ide/ide-probe.c =================================================================== --- linux.prev.orig/drivers/ide/ide-probe.c +++ linux.prev/drivers/ide/ide-probe.c @@ -184,7 +184,7 @@ static inline void do_identify (ide_driv hwif->ata_input_data(drive, id, SECTOR_WORDS); drive->id_read = 1; - local_irq_enable(); + local_irq_enable_nort(); ide_fix_driveid(id); #if defined (CONFIG_SCSI_EATA_DMA) || defined (CONFIG_SCSI_EATA_PIO) || defined (CONFIG_SCSI_EATA) @@ -362,14 +362,14 @@ static int actual_try_to_identify (ide_d unsigned long flags; /* local CPU only; some systems need this */ - local_irq_save(flags); + local_irq_save_nort(flags); /* drive returned ID */ do_identify(drive, cmd); /* drive responded with ID */ rc = 0; /* clear drive IRQ */ (void) hwif->INB(IDE_STATUS_REG); - local_irq_restore(flags); + local_irq_restore_nort(flags); } else { /* drive refused ID */ rc = 2; @@ -655,7 +655,7 @@ static void hwif_release_dev (struct dev { ide_hwif_t *hwif = container_of(dev, ide_hwif_t, gendev); - up(&hwif->gendev_rel_sem); + complete(&hwif->gendev_rel_comp); } static void hwif_register (ide_hwif_t *hwif) @@ -841,7 +841,7 @@ static void probe_hwif(ide_hwif_t *hwif) } while ((stat & BUSY_STAT) && time_after(timeout, jiffies)); } - local_irq_restore(flags); + local_irq_restore_nort(flags); /* * Use cached IRQ number. It might be (and is...) changed by probe * code above @@ -1325,7 +1325,7 @@ static void drive_release_dev (struct de drive->queue = NULL; spin_unlock_irq(&ide_lock); - up(&drive->gendev_rel_sem); + complete(&drive->gendev_rel_comp); } /* Index: linux.prev/drivers/ide/ide-taskfile.c =================================================================== --- linux.prev.orig/drivers/ide/ide-taskfile.c +++ linux.prev/drivers/ide/ide-taskfile.c @@ -223,7 +223,7 @@ ide_startstop_t task_no_data_intr (ide_d ide_hwif_t *hwif = HWIF(drive); u8 stat; - local_irq_enable(); + local_irq_enable_nort(); if (!OK_STAT(stat = hwif->INB(IDE_STATUS_REG),READY_STAT,BAD_STAT)) { return ide_error(drive, "task_no_data_intr", stat); /* calls ide_end_drive_cmd */ @@ -275,7 +275,7 @@ static void ide_pio_sector(ide_drive_t * offset %= PAGE_SIZE; #ifdef CONFIG_HIGHMEM - local_irq_save(flags); + local_irq_save_nort(flags); #endif buf = kmap_atomic(page, KM_BIO_SRC_IRQ) + offset; @@ -295,7 +295,7 @@ static void ide_pio_sector(ide_drive_t * kunmap_atomic(buf, KM_BIO_SRC_IRQ); #ifdef CONFIG_HIGHMEM - local_irq_restore(flags); + local_irq_restore_nort(flags); #endif } @@ -453,7 +453,7 @@ ide_startstop_t pre_task_out_intr (ide_d } if (!drive->unmask) - local_irq_disable(); + local_irq_disable_nort(); ide_set_handler(drive, &task_out_intr, WAIT_WORSTCASE, NULL); ide_pio_datablock(drive, rq, 1); Index: linux.prev/drivers/ide/ide.c =================================================================== --- linux.prev.orig/drivers/ide/ide.c +++ linux.prev/drivers/ide/ide.c @@ -222,7 +222,7 @@ static void init_hwif_data(ide_hwif_t *h hwif->mwdma_mask = 0x80; /* disable all mwdma */ hwif->swdma_mask = 0x80; /* disable all swdma */ - sema_init(&hwif->gendev_rel_sem, 0); + init_completion(&hwif->gendev_rel_comp); default_hwif_iops(hwif); default_hwif_transport(hwif); @@ -245,7 +245,7 @@ static void init_hwif_data(ide_hwif_t *h drive->is_flash = 0; drive->vdma = 0; INIT_LIST_HEAD(&drive->list); - sema_init(&drive->gendev_rel_sem, 0); + init_completion(&drive->gendev_rel_comp); } } @@ -602,7 +602,7 @@ void ide_unregister(unsigned int index) } spin_unlock_irq(&ide_lock); device_unregister(&drive->gendev); - down(&drive->gendev_rel_sem); + wait_for_completion(&drive->gendev_rel_comp); spin_lock_irq(&ide_lock); } hwif->present = 0; @@ -662,7 +662,7 @@ void ide_unregister(unsigned int index) /* More messed up locking ... */ spin_unlock_irq(&ide_lock); device_unregister(&hwif->gendev); - down(&hwif->gendev_rel_sem); + wait_for_completion(&hwif->gendev_rel_comp); /* * Remove us from the kernel's knowledge @@ -1048,15 +1048,13 @@ int ide_spin_wait_hwgroup (ide_drive_t * spin_lock_irq(&ide_lock); while (hwgroup->busy) { - unsigned long lflags; spin_unlock_irq(&ide_lock); - local_irq_set(lflags); + if (time_after(jiffies, timeout)) { - local_irq_restore(lflags); printk(KERN_ERR "%s: channel busy\n", drive->name); return -EBUSY; } - local_irq_restore(lflags); + spin_lock_irq(&ide_lock); } return 0; Index: linux.prev/drivers/ide/pci/alim15x3.c =================================================================== --- linux.prev.orig/drivers/ide/pci/alim15x3.c +++ linux.prev/drivers/ide/pci/alim15x3.c @@ -296,7 +296,6 @@ static void ali15x3_tune_drive (ide_driv struct pci_dev *dev = hwif->pci_dev; int s_time, a_time, c_time; u8 s_clc, a_clc, r_clc; - unsigned long flags; int bus_speed = system_bus_clock(); int port = hwif->channel ? 0x5c : 0x58; int portFIFO = hwif->channel ? 0x55 : 0x54; @@ -323,7 +322,6 @@ static void ali15x3_tune_drive (ide_driv if (r_clc >= 16) r_clc = 0; } - local_irq_save(flags); /* * PIO mode => ATA FIFO on, ATAPI FIFO off @@ -345,7 +343,6 @@ static void ali15x3_tune_drive (ide_driv pci_write_config_byte(dev, port, s_clc); pci_write_config_byte(dev, port+drive->select.b.unit+2, (a_clc << 4) | r_clc); - local_irq_restore(flags); /* * setup active rec @@ -585,7 +582,6 @@ static int ali15x3_dma_setup(ide_drive_t static unsigned int __devinit init_chipset_ali15x3 (struct pci_dev *dev, const char *name) { - unsigned long flags; u8 tmpbyte; struct pci_dev *north = pci_find_slot(0, PCI_DEVFN(0,0)); @@ -601,7 +597,6 @@ static unsigned int __devinit init_chips } #endif /* defined(DISPLAY_ALI_TIMINGS) && defined(CONFIG_PROC_FS) */ - local_irq_save(flags); if (m5229_revision < 0xC2) { /* @@ -614,7 +609,6 @@ static unsigned int __devinit init_chips * clear bit 7 */ pci_write_config_byte(dev, 0x4b, tmpbyte & 0x7F); - local_irq_restore(flags); return 0; } @@ -639,7 +633,6 @@ static unsigned int __devinit init_chips * 0:0.0 so if we didn't find one we know what is cooking. */ if (north && north->vendor != PCI_VENDOR_ID_AL) { - local_irq_restore(flags); return 0; } @@ -662,7 +655,6 @@ static unsigned int __devinit init_chips pci_write_config_byte(isa_dev, 0x79, tmpbyte | 0x02); } } - local_irq_restore(flags); return 0; } @@ -683,10 +675,8 @@ static unsigned int __devinit ata66_ali1 unsigned int ata66 = 0; u8 cable_80_pin[2] = { 0, 0 }; - unsigned long flags; u8 tmpbyte; - local_irq_save(flags); if (m5229_revision >= 0xC2) { /* @@ -736,7 +726,6 @@ static unsigned int __devinit ata66_ali1 pci_write_config_byte(dev, 0x53, tmpbyte); - local_irq_restore(flags); return(ata66); } Index: linux.prev/drivers/ide/pci/hpt366.c =================================================================== --- linux.prev.orig/drivers/ide/pci/hpt366.c +++ linux.prev/drivers/ide/pci/hpt366.c @@ -1481,7 +1481,6 @@ static void __devinit init_dma_hpt366(id u8 dma_new = 0, dma_old = 0; u8 primary = hwif->channel ? 0x4b : 0x43; u8 secondary = hwif->channel ? 0x4f : 0x47; - unsigned long flags; if (!dmabase) return; @@ -1493,8 +1492,6 @@ static void __devinit init_dma_hpt366(id dma_old = hwif->INB(dmabase+2); - local_irq_save(flags); - dma_new = dma_old; pci_read_config_byte(hwif->pci_dev, primary, &masterdma); pci_read_config_byte(hwif->pci_dev, secondary, &slavedma); @@ -1504,8 +1501,6 @@ static void __devinit init_dma_hpt366(id if (dma_new != dma_old) hwif->OUTB(dma_new, dmabase+2); - local_irq_restore(flags); - ide_setup_dma(hwif, dmabase, 8); } Index: linux.prev/drivers/ide/setup-pci.c =================================================================== --- linux.prev.orig/drivers/ide/setup-pci.c +++ linux.prev/drivers/ide/setup-pci.c @@ -665,8 +665,11 @@ static int do_ide_setup_pci_device(struc { static ata_index_t ata_index = { .b = { .low = 0xff, .high = 0xff } }; int tried_config = 0; + unsigned long flags; int pciirq, ret; + spin_lock_irqsave(&ide_lock, flags); + ret = ide_setup_pci_controller(dev, d, noisy, &tried_config); if (ret < 0) goto out; @@ -721,6 +724,8 @@ static int do_ide_setup_pci_device(struc *index = ata_index; ide_pci_setup_ports(dev, d, pciirq, index); out: + spin_unlock_irqrestore(&ide_lock, flags); + return ret; } Index: linux.prev/drivers/ieee1394/ieee1394_types.h =================================================================== --- linux.prev.orig/drivers/ieee1394/ieee1394_types.h +++ linux.prev/drivers/ieee1394/ieee1394_types.h @@ -19,7 +19,7 @@ struct hpsb_tlabel_pool { spinlock_t lock; u8 next; u32 allocations; - struct semaphore count; + struct compat_semaphore count; }; #define HPSB_TPOOL_INIT(_tp) \ Index: linux.prev/drivers/ieee1394/nodemgr.c =================================================================== --- linux.prev.orig/drivers/ieee1394/nodemgr.c +++ linux.prev/drivers/ieee1394/nodemgr.c @@ -114,7 +114,7 @@ struct host_info { struct hpsb_host *host; struct list_head list; struct completion exited; - struct semaphore reset_sem; + struct compat_semaphore reset_sem; int pid; char daemon_name[15]; int kill_me; Index: linux.prev/drivers/ieee1394/raw1394-private.h =================================================================== --- linux.prev.orig/drivers/ieee1394/raw1394-private.h +++ linux.prev/drivers/ieee1394/raw1394-private.h @@ -29,7 +29,7 @@ struct file_info { struct list_head req_pending; struct list_head req_complete; - struct semaphore complete_sem; + struct compat_semaphore complete_sem; spinlock_t reqlists_lock; wait_queue_head_t poll_wait_complete; Index: linux.prev/drivers/input/gameport/gameport.c =================================================================== --- linux.prev.orig/drivers/input/gameport/gameport.c +++ linux.prev/drivers/input/gameport/gameport.c @@ -21,6 +21,7 @@ #include #include #include +#include #include /* HZ */ /*#include */ @@ -101,12 +102,12 @@ static int gameport_measure_speed(struct tx = 1 << 30; for(i = 0; i < 50; i++) { - local_irq_save(flags); + local_irq_save_nort(flags); GET_TIME(t1); for (t = 0; t < 50; t++) gameport_read(gameport); GET_TIME(t2); GET_TIME(t3); - local_irq_restore(flags); + local_irq_restore_nort(flags); udelay(i * 10); if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t; } @@ -125,11 +126,11 @@ static int gameport_measure_speed(struct tx = 1 << 30; for(i = 0; i < 50; i++) { - local_irq_save(flags); + local_irq_save_nort(flags); rdtscl(t1); for (t = 0; t < 50; t++) gameport_read(gameport); rdtscl(t2); - local_irq_restore(flags); + local_irq_restore_nort(flags); udelay(i * 10); if (t2 - t1 < tx) tx = t2 - t1; } Index: linux.prev/drivers/input/serio/sa1111ps2.c =================================================================== --- linux.prev.orig/drivers/input/serio/sa1111ps2.c +++ linux.prev/drivers/input/serio/sa1111ps2.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include Index: linux.prev/drivers/media/dvb/dvb-core/dvb_frontend.c =================================================================== --- linux.prev.orig/drivers/media/dvb/dvb-core/dvb_frontend.c +++ linux.prev/drivers/media/dvb/dvb-core/dvb_frontend.c @@ -95,7 +95,7 @@ struct dvb_frontend_private { struct dvb_device *dvbdev; struct dvb_frontend_parameters parameters; struct dvb_fe_events events; - struct semaphore sem; + struct compat_semaphore sem; struct list_head list_head; wait_queue_head_t wait_queue; pid_t thread_pid; Index: linux.prev/drivers/media/dvb/dvb-core/dvb_frontend.h =================================================================== --- linux.prev.orig/drivers/media/dvb/dvb-core/dvb_frontend.h +++ linux.prev/drivers/media/dvb/dvb-core/dvb_frontend.h @@ -86,7 +86,7 @@ struct dvb_fe_events { int eventr; int overflow; wait_queue_head_t wait_queue; - struct semaphore sem; + struct compat_semaphore sem; }; struct dvb_frontend { Index: linux.prev/drivers/media/video/zr36120_i2c.c =================================================================== --- linux.prev.orig/drivers/media/video/zr36120_i2c.c +++ linux.prev/drivers/media/video/zr36120_i2c.c @@ -120,7 +120,7 @@ struct i2c_bus zoran_i2c_bus_template = I2C_BUSID_ZORAN, NULL, - SPIN_LOCK_UNLOCKED, + SPIN_LOCK_UNLOCKED(zoran_i2c_bus_template.lock), attach_inform, detach_inform, Index: linux.prev/drivers/message/i2o/exec-osm.c =================================================================== --- linux.prev.orig/drivers/message/i2o/exec-osm.c +++ linux.prev/drivers/message/i2o/exec-osm.c @@ -209,7 +209,7 @@ static int i2o_msg_post_wait_complete(st { struct i2o_exec_wait *wait, *tmp; unsigned long flags; - static spinlock_t lock = SPIN_LOCK_UNLOCKED; + static DEFINE_SPINLOCK(lock); int rc = 1; /* Index: linux.prev/drivers/misc/ibmasm/module.c =================================================================== --- linux.prev.orig/drivers/misc/ibmasm/module.c +++ linux.prev/drivers/misc/ibmasm/module.c @@ -85,7 +85,7 @@ static int __devinit ibmasm_init_one(str } memset(sp, 0, sizeof(struct service_processor)); - sp->lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&sp->lock); INIT_LIST_HEAD(&sp->command_queue); pci_set_drvdata(pdev, (void *)sp); Index: linux.prev/drivers/net/3c527.c =================================================================== --- linux.prev.orig/drivers/net/3c527.c +++ linux.prev/drivers/net/3c527.c @@ -182,7 +182,7 @@ struct mc32_local u16 rx_ring_tail; /* index to rx de-queue end */ - struct semaphore cmd_mutex; /* Serialises issuing of execute commands */ + struct compat_semaphore cmd_mutex; /* Serialises issuing of execute commands */ struct completion execution_cmd; /* Card has completed an execute command */ struct completion xceiver_cmd; /* Card has completed a tx or rx command */ }; Index: linux.prev/drivers/net/3c59x.c =================================================================== --- linux.prev.orig/drivers/net/3c59x.c +++ linux.prev/drivers/net/3c59x.c @@ -963,9 +963,9 @@ static void poll_vortex(struct net_devic struct vortex_private *vp = netdev_priv(dev); unsigned long flags; local_save_flags(flags); - local_irq_disable(); + local_irq_disable_nort(); (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev,NULL); - local_irq_restore(flags); + local_irq_restore_nort(flags); } #endif @@ -2035,13 +2035,17 @@ static void vortex_tx_timeout(struct net /* * Block interrupts because vortex_interrupt does a bare spin_lock() */ +#ifndef CONFIG_PREEMPT_RT unsigned long flags; local_irq_save(flags); +#endif if (vp->full_bus_master_tx) boomerang_interrupt(dev->irq, dev, NULL); else vortex_interrupt(dev->irq, dev, NULL); +#ifndef CONFIG_PREEMPT_RT local_irq_restore(flags); +#endif } } Index: linux.prev/drivers/net/8139too.c =================================================================== --- linux.prev.orig/drivers/net/8139too.c +++ linux.prev/drivers/net/8139too.c @@ -2130,10 +2130,10 @@ static int rtl8139_poll(struct net_devic * Order is important since data can get interrupted * again when we think we are done. */ - local_irq_disable(); + raw_local_irq_disable(); RTL_W16_F(IntrMask, rtl8139_intr_mask); __netif_rx_complete(dev); - local_irq_enable(); + raw_local_irq_enable(); } spin_unlock(&tp->rx_lock); Index: linux.prev/drivers/net/e1000/e1000_main.c =================================================================== --- linux.prev.orig/drivers/net/e1000/e1000_main.c +++ linux.prev/drivers/net/e1000/e1000_main.c @@ -2736,10 +2736,8 @@ e1000_xmit_frame(struct sk_buff *skb, st if(adapter->hw.tx_pkt_filtering && (adapter->hw.mac_type == e1000_82573) ) e1000_transfer_dhcp_info(adapter, skb); - local_irq_save(flags); - if (!spin_trylock(&tx_ring->tx_lock)) { + if (!spin_trylock_irqsave(&tx_ring->tx_lock, flags)) { /* Collision - tell upper layer to requeue */ - local_irq_restore(flags); return NETDEV_TX_LOCKED; } Index: linux.prev/drivers/net/hamradio/6pack.c =================================================================== --- linux.prev.orig/drivers/net/hamradio/6pack.c +++ linux.prev/drivers/net/hamradio/6pack.c @@ -124,7 +124,7 @@ struct sixpack { struct timer_list tx_t; struct timer_list resync_t; atomic_t refcnt; - struct semaphore dead_sem; + struct compat_semaphore dead_sem; spinlock_t lock; }; Index: linux.prev/drivers/net/hamradio/mkiss.c =================================================================== --- linux.prev.orig/drivers/net/hamradio/mkiss.c +++ linux.prev/drivers/net/hamradio/mkiss.c @@ -85,7 +85,7 @@ struct mkiss { #define CRC_MODE_SMACK_TEST 4 atomic_t refcnt; - struct semaphore dead_sem; + struct compat_semaphore dead_sem; }; /*---------------------------------------------------------------------------*/ Index: linux.prev/drivers/net/netconsole.c =================================================================== --- linux.prev.orig/drivers/net/netconsole.c +++ linux.prev/drivers/net/netconsole.c @@ -74,16 +74,22 @@ static void write_msg(struct console *co if (!np.dev) return; - local_irq_save(flags); + /* + * A bit hairy. Netconsole uses mutexes (indirectly) and + * thus must have interrupts enabled: + */ + local_irq_save_nort(flags); for(left = len; left; ) { frag = min(left, MAX_PRINT_CHUNK); + WARN_ON_RT(irqs_disabled()); netpoll_send_udp(&np, msg, frag); + WARN_ON_RT(irqs_disabled()); msg += frag; left -= frag; } - local_irq_restore(flags); + local_irq_restore_nort(flags); } static struct console netconsole = { Index: linux.prev/drivers/net/ns83820.c =================================================================== --- linux.prev.orig/drivers/net/ns83820.c +++ linux.prev/drivers/net/ns83820.c @@ -1012,8 +1012,6 @@ static void do_tx_done(struct net_device struct ns83820 *dev = PRIV(ndev); u32 cmdsts, tx_done_idx, *desc; - spin_lock_irq(&dev->tx_lock); - dprintk("do_tx_done(%p)\n", ndev); tx_done_idx = dev->tx_done_idx; desc = dev->tx_descs + (tx_done_idx * DESC_SIZE); @@ -1069,7 +1067,6 @@ static void do_tx_done(struct net_device netif_start_queue(ndev); netif_wake_queue(ndev); } - spin_unlock_irq(&dev->tx_lock); } static void ns83820_cleanup_tx(struct ns83820 *dev) @@ -1370,7 +1367,9 @@ static void ns83820_do_isr(struct net_de * work has accumulated */ if ((ISR_TXDESC | ISR_TXIDLE | ISR_TXOK | ISR_TXERR) & isr) { + spin_lock_irq(&dev->tx_lock); do_tx_done(ndev); + spin_unlock_irq(&dev->tx_lock); /* Disable TxOk if there are no outstanding tx packets. */ @@ -1455,7 +1454,7 @@ static void ns83820_tx_timeout(struct ne u32 tx_done_idx, *desc; unsigned long flags; - local_irq_save(flags); + spin_lock_irqsave(&dev->tx_lock, flags); tx_done_idx = dev->tx_done_idx; desc = dev->tx_descs + (tx_done_idx * DESC_SIZE); @@ -1482,7 +1481,7 @@ static void ns83820_tx_timeout(struct ne ndev->name, tx_done_idx, dev->tx_free_idx, le32_to_cpu(desc[DESC_CMDSTS])); - local_irq_restore(flags); + spin_unlock_irqrestore(&dev->tx_lock, flags); } static void ns83820_tx_watch(unsigned long data) Index: linux.prev/drivers/net/plip.c =================================================================== --- linux.prev.orig/drivers/net/plip.c +++ linux.prev/drivers/net/plip.c @@ -229,7 +229,10 @@ struct net_local { struct hh_cache *hh); spinlock_t lock; atomic_t kill_timer; - struct semaphore killed_timer_sem; + /* + * PREEMPT_RT: this isnt a mutex, it should be struct completion. + */ + struct compat_semaphore killed_timer_sem; }; static inline void enable_parport_interrupts (struct net_device *dev) Index: linux.prev/drivers/net/ppp_async.c =================================================================== --- linux.prev.orig/drivers/net/ppp_async.c +++ linux.prev/drivers/net/ppp_async.c @@ -66,7 +66,7 @@ struct asyncppp { struct tasklet_struct tsk; atomic_t refcnt; - struct semaphore dead_sem; + struct compat_semaphore dead_sem; struct ppp_channel chan; /* interface to generic ppp layer */ unsigned char obuf[OBUFSIZE]; }; Index: linux.prev/drivers/net/ppp_synctty.c =================================================================== --- linux.prev.orig/drivers/net/ppp_synctty.c +++ linux.prev/drivers/net/ppp_synctty.c @@ -70,7 +70,7 @@ struct syncppp { struct tasklet_struct tsk; atomic_t refcnt; - struct semaphore dead_sem; + struct compat_semaphore dead_sem; struct ppp_channel chan; /* interface to generic ppp layer */ }; Index: linux.prev/drivers/net/skge.c =================================================================== --- linux.prev.orig/drivers/net/skge.c +++ linux.prev/drivers/net/skge.c @@ -2272,12 +2272,9 @@ static int skge_xmit_frame(struct sk_buf if (!skb) return NETDEV_TX_OK; - local_irq_save(flags); - if (!spin_trylock(&skge->tx_lock)) { + if (!spin_trylock_irqsave(&skge->tx_lock, flags)) /* Collision - tell upper layer to requeue */ - local_irq_restore(flags); return NETDEV_TX_LOCKED; - } if (unlikely(skge->tx_avail < skb_shinfo(skb)->nr_frags +1)) { if (!netif_queue_stopped(dev)) { @@ -2823,10 +2820,10 @@ static void skge_extirq(unsigned long da } spin_unlock(&hw->phy_lock); - local_irq_disable(); + spin_lock_irq(&hw->hw_lock); hw->intr_mask |= IS_EXT_REG; skge_write32(hw, B0_IMSK, hw->intr_mask); - local_irq_enable(); + spin_unlock_irq(&hw->hw_lock); } static inline void skge_wakeup(struct net_device *dev) @@ -2845,6 +2842,8 @@ static irqreturn_t skge_intr(int irq, vo if (status == 0 || status == ~0) /* hotplug or shared irq */ return IRQ_NONE; + spin_lock(&hw->hw_lock); + status &= hw->intr_mask; if (status & IS_R1_F) { hw->intr_mask &= ~IS_R1_F; @@ -2896,6 +2895,8 @@ static irqreturn_t skge_intr(int irq, vo skge_write32(hw, B0_IMSK, hw->intr_mask); + spin_unlock(&hw->hw_lock); + return IRQ_HANDLED; } @@ -3252,6 +3253,7 @@ static int __devinit skge_probe(struct p } hw->pdev = pdev; + spin_lock_init(&hw->hw_lock); spin_lock_init(&hw->phy_lock); tasklet_init(&hw->ext_tasklet, skge_extirq, (unsigned long) hw); Index: linux.prev/drivers/net/skge.h =================================================================== --- linux.prev.orig/drivers/net/skge.h +++ linux.prev/drivers/net/skge.h @@ -2472,6 +2472,7 @@ struct skge_hw { u16 phy_addr; struct tasklet_struct ext_tasklet; + spinlock_t hw_lock; spinlock_t phy_lock; }; Index: linux.prev/drivers/net/smc91x.c =================================================================== --- linux.prev.orig/drivers/net/smc91x.c +++ linux.prev/drivers/net/smc91x.c @@ -74,6 +74,7 @@ static const char version[] = #include #include #include +#include #include #include #include @@ -2011,7 +2012,7 @@ static int __init smc_probe(struct net_d if (retval) goto err_out; - set_irq_type(dev->irq, SMC_IRQ_TRIGGER_TYPE); + SMC_SET_IRQ_TYPE(dev->irq, SMC_IRQ_TRIGGER_TYPE); #ifdef SMC_USE_PXA_DMA { Index: linux.prev/drivers/net/smc91x.h =================================================================== --- linux.prev.orig/drivers/net/smc91x.h +++ linux.prev/drivers/net/smc91x.h @@ -90,7 +90,7 @@ __l--; \ } \ } while (0) -#define set_irq_type(irq, type) +#define SMC_SET_IRQ_TYPE(irq, type) #elif defined(CONFIG_SA1100_PLEB) /* We can only do 16-bit reads and writes in the static memory space. */ @@ -109,7 +109,7 @@ #define SMC_outw(v, a, r) writew(v, (a) + (r)) #define SMC_outsw(a, r, p, l) writesw((a) + (r), p, l) -#define set_irq_type(irq, type) do {} while (0) +#define SMC_SET_IRQ_TYPE(irq, type) do {} while (0) #elif defined(CONFIG_SA1100_ASSABET) @@ -209,7 +209,7 @@ SMC_outw(u16 val, void __iomem *ioaddr, #define SMC_insw(a, r, p, l) insw((a) + (r) - 0xa0000000, p, l) #define SMC_outsw(a, r, p, l) outsw((a) + (r) - 0xa0000000, p, l) -#define set_irq_type(irq, type) do {} while(0) +#define SMC_SET_IRQ_TYPE(irq, type) do {} while(0) #elif defined(CONFIG_ISA) @@ -237,7 +237,7 @@ SMC_outw(u16 val, void __iomem *ioaddr, #define SMC_insw(a, r, p, l) insw(((u32)a) + (r), p, l) #define SMC_outsw(a, r, p, l) outsw(((u32)a) + (r), p, l) -#define set_irq_type(irq, type) do {} while(0) +#define SMC_SET_IRQ_TYPE(irq, type) do {} while(0) #define RPC_LSA_DEFAULT RPC_LED_TX_RX #define RPC_LSB_DEFAULT RPC_LED_100_10 @@ -342,6 +342,10 @@ static inline void SMC_outsw (unsigned l #endif +#ifndef SMC_SET_IRQ_TYPE +#define SMC_SET_IRQ_TYPE set_irq_type +#endif + #ifndef SMC_IRQ_TRIGGER_TYPE #define SMC_IRQ_TRIGGER_TYPE IRQT_RISING #endif Index: linux.prev/drivers/net/tulip/tulip_core.c =================================================================== --- linux.prev.orig/drivers/net/tulip/tulip_core.c +++ linux.prev/drivers/net/tulip/tulip_core.c @@ -1809,6 +1809,7 @@ static void __devexit tulip_remove_one ( pci_iounmap(pdev, tp->base_addr); free_netdev (dev); pci_release_regions (pdev); + pci_disable_device (pdev); pci_set_drvdata (pdev, NULL); /* pci_power_off (pdev, -1); */ Index: linux.prev/drivers/oprofile/buffer_sync.c =================================================================== --- linux.prev.orig/drivers/oprofile/buffer_sync.c +++ linux.prev/drivers/oprofile/buffer_sync.c @@ -43,13 +43,16 @@ static void process_task_mortuary(void); * list for processing. Only after two full buffer syncs * does the task eventually get freed, because by then * we are sure we will not reference it again. + * Can be invoked from softirq via RCU callback due to + * call_rcu() of the task struct, hence the _irqsave. */ static int task_free_notify(struct notifier_block * self, unsigned long val, void * data) { + unsigned long flags; struct task_struct * task = data; - spin_lock(&task_mortuary); + spin_lock_irqsave(&task_mortuary, flags); list_add(&task->tasks, &dying_tasks); - spin_unlock(&task_mortuary); + spin_unlock_irqrestore(&task_mortuary, flags); return NOTIFY_OK; } @@ -431,25 +434,22 @@ static void increment_tail(struct oprofi */ static void process_task_mortuary(void) { - struct list_head * pos; - struct list_head * pos2; + unsigned long flags; + LIST_HEAD(local_dead_tasks); struct task_struct * task; + struct task_struct * ttask; - spin_lock(&task_mortuary); + spin_lock_irqsave(&task_mortuary, flags); - list_for_each_safe(pos, pos2, &dead_tasks) { - task = list_entry(pos, struct task_struct, tasks); - list_del(&task->tasks); - free_task(task); - } + list_splice_init(&dead_tasks, &local_dead_tasks); + list_splice_init(&dying_tasks, &dead_tasks); - list_for_each_safe(pos, pos2, &dying_tasks) { - task = list_entry(pos, struct task_struct, tasks); + spin_unlock_irqrestore(&task_mortuary, flags); + + list_for_each_entry_safe(task, ttask, &local_dead_tasks, tasks) { list_del(&task->tasks); - list_add_tail(&task->tasks, &dead_tasks); + free_task(task); } - - spin_unlock(&task_mortuary); } Index: linux.prev/drivers/oprofile/oprofilefs.c =================================================================== --- linux.prev.orig/drivers/oprofile/oprofilefs.c +++ linux.prev/drivers/oprofile/oprofilefs.c @@ -21,7 +21,7 @@ #define OPROFILEFS_MAGIC 0x6f70726f -DEFINE_SPINLOCK(oprofilefs_lock); +DEFINE_RAW_SPINLOCK(oprofilefs_lock); static struct inode * oprofilefs_get_inode(struct super_block * sb, int mode) { Index: linux.prev/drivers/pci/hotplug/cpci_hotplug_core.c =================================================================== --- linux.prev.orig/drivers/pci/hotplug/cpci_hotplug_core.c +++ linux.prev/drivers/pci/hotplug/cpci_hotplug_core.c @@ -60,8 +60,8 @@ static int slots; static atomic_t extracting; int cpci_debug; static struct cpci_hp_controller *controller; -static struct semaphore event_semaphore; /* mutex for process loop (up if something to process) */ -static struct semaphore thread_exit; /* guard ensure thread has exited before calling it quits */ +static struct compat_semaphore event_semaphore; /* mutex for process loop (up if something to process) */ +static struct compat_semaphore thread_exit; /* guard ensure thread has exited before calling it quits */ static int thread_finished = 1; static int enable_slot(struct hotplug_slot *slot); Index: linux.prev/drivers/pci/hotplug/cpqphp_ctrl.c =================================================================== --- linux.prev.orig/drivers/pci/hotplug/cpqphp_ctrl.c +++ linux.prev/drivers/pci/hotplug/cpqphp_ctrl.c @@ -45,8 +45,8 @@ static int configure_new_function(struct u8 behind_bridge, struct resource_lists *resources); static void interrupt_event_handler(struct controller *ctrl); -static struct semaphore event_semaphore; /* mutex for process loop (up if something to process) */ -static struct semaphore event_exit; /* guard ensure thread has exited before calling it quits */ +static struct compat_semaphore event_semaphore; /* mutex for process loop (up if something to process) */ +static struct compat_semaphore event_exit; /* guard ensure thread has exited before calling it quits */ static int event_finished; static unsigned long pushbutton_pending; /* = 0 */ Index: linux.prev/drivers/pci/hotplug/ibmphp_hpc.c =================================================================== --- linux.prev.orig/drivers/pci/hotplug/ibmphp_hpc.c +++ linux.prev/drivers/pci/hotplug/ibmphp_hpc.c @@ -104,7 +104,7 @@ static int tid_poll; static struct semaphore sem_hpcaccess; // lock access to HPC static struct semaphore semOperations; // lock all operations and // access to data structures -static struct semaphore sem_exit; // make sure polling thread goes away +static struct compat_semaphore sem_exit; // make sure polling thread goes away //---------------------------------------------------------------------------- // local function prototypes //---------------------------------------------------------------------------- Index: linux.prev/drivers/pci/hotplug/pciehp_ctrl.c =================================================================== --- linux.prev.orig/drivers/pci/hotplug/pciehp_ctrl.c +++ linux.prev/drivers/pci/hotplug/pciehp_ctrl.c @@ -37,8 +37,8 @@ static void interrupt_event_handler(struct controller *ctrl); -static struct semaphore event_semaphore; /* mutex for process loop (up if something to process) */ -static struct semaphore event_exit; /* guard ensure thread has exited before calling it quits */ +static struct compat_semaphore event_semaphore; /* mutex for process loop (up if something to process) */ +static struct compat_semaphore event_exit; /* guard ensure thread has exited before calling it quits */ static int event_finished; static unsigned long pushbutton_pending; /* = 0 */ static unsigned long surprise_rm_pending; /* = 0 */ Index: linux.prev/drivers/pci/hotplug/shpchp_ctrl.c =================================================================== --- linux.prev.orig/drivers/pci/hotplug/shpchp_ctrl.c +++ linux.prev/drivers/pci/hotplug/shpchp_ctrl.c @@ -37,8 +37,8 @@ static void interrupt_event_handler(struct controller *ctrl); -static struct semaphore event_semaphore; /* mutex for process loop (up if something to process) */ -static struct semaphore event_exit; /* guard ensure thread has exited before calling it quits */ +static struct compat_semaphore event_semaphore; /* mutex for process loop (up if something to process) */ +static struct compat_semaphore event_exit; /* guard ensure thread has exited before calling it quits */ static int event_finished; static unsigned long pushbutton_pending; /* = 0 */ Index: linux.prev/drivers/pcmcia/soc_common.c =================================================================== --- linux.prev.orig/drivers/pcmcia/soc_common.c +++ linux.prev/drivers/pcmcia/soc_common.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include Index: linux.prev/drivers/s390/char/vmlogrdr.c =================================================================== --- linux.prev.orig/drivers/s390/char/vmlogrdr.c +++ linux.prev/drivers/s390/char/vmlogrdr.c @@ -145,7 +145,7 @@ static struct vmlogrdr_priv_t sys_ser[] .recording_name = "EREP", .minor_num = 0, .buffer_free = 1, - .priv_lock = SPIN_LOCK_UNLOCKED, + .priv_lock = SPIN_LOCK_UNLOCKED(sys_ser[0].priv_lock), .autorecording = 1, .autopurge = 1, }, @@ -154,7 +154,7 @@ static struct vmlogrdr_priv_t sys_ser[] .recording_name = "ACCOUNT", .minor_num = 1, .buffer_free = 1, - .priv_lock = SPIN_LOCK_UNLOCKED, + .priv_lock = SPIN_LOCK_UNLOCKED(sys_ser[1].priv_lock), .autorecording = 1, .autopurge = 1, }, @@ -163,7 +163,7 @@ static struct vmlogrdr_priv_t sys_ser[] .recording_name = "SYMPTOM", .minor_num = 2, .buffer_free = 1, - .priv_lock = SPIN_LOCK_UNLOCKED, + .priv_lock = SPIN_LOCK_UNLOCKED(sys_ser[2].priv_lock), .autorecording = 1, .autopurge = 1, } Index: linux.prev/drivers/s390/cio/cmf.c =================================================================== --- linux.prev.orig/drivers/s390/cio/cmf.c +++ linux.prev/drivers/s390/cio/cmf.c @@ -300,7 +300,7 @@ struct cmb_area { }; static struct cmb_area cmb_area = { - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(cmb_area.lock), .list = LIST_HEAD_INIT(cmb_area.list), .num_channels = 1024, }; Index: linux.prev/drivers/sbus/char/cpwatchdog.c =================================================================== --- linux.prev.orig/drivers/sbus/char/cpwatchdog.c +++ linux.prev/drivers/sbus/char/cpwatchdog.c @@ -156,7 +156,7 @@ struct wd_device { }; static struct wd_device wd_dev = { - 0, SPIN_LOCK_UNLOCKED, 0, 0, 0, 0, + 0, SPIN_LOCK_UNLOCKED(wd_dev.lock), 0, 0, 0, 0, }; static struct timer_list wd_timer; Index: linux.prev/drivers/scsi/aacraid/aacraid.h =================================================================== --- linux.prev.orig/drivers/scsi/aacraid/aacraid.h +++ linux.prev/drivers/scsi/aacraid/aacraid.h @@ -735,7 +735,7 @@ struct aac_fib_context { u32 unique; // unique value representing this context ulong jiffies; // used for cleanup - dmb changed to ulong struct list_head next; // used to link context's into a linked list - struct semaphore wait_sem; // this is used to wait for the next fib to arrive. + struct compat_semaphore wait_sem; // this is used to wait for the next fib to arrive. int wait; // Set to true when thread is in WaitForSingleObject unsigned long count; // total number of FIBs on FibList struct list_head fib_list; // this holds fibs and their attachd hw_fibs @@ -804,7 +804,7 @@ struct fib { * This is the event the sendfib routine will wait on if the * caller did not pass one and this is synch io. */ - struct semaphore event_wait; + struct compat_semaphore event_wait; spinlock_t event_lock; u32 done; /* gets set to 1 when fib is complete */ Index: linux.prev/drivers/scsi/aic7xxx/aic79xx_osm.h =================================================================== --- linux.prev.orig/drivers/scsi/aic7xxx/aic79xx_osm.h +++ linux.prev/drivers/scsi/aic7xxx/aic79xx_osm.h @@ -390,7 +390,7 @@ struct ahd_platform_data { spinlock_t spin_lock; u_int qfrozen; struct timer_list reset_timer; - struct semaphore eh_sem; + struct compat_semaphore eh_sem; struct Scsi_Host *host; /* pointer to scsi host */ #define AHD_LINUX_NOIRQ ((uint32_t)~0) uint32_t irq; /* IRQ for this adapter */ Index: linux.prev/drivers/scsi/aic7xxx/aic7xxx_osm.h =================================================================== --- linux.prev.orig/drivers/scsi/aic7xxx/aic7xxx_osm.h +++ linux.prev/drivers/scsi/aic7xxx/aic7xxx_osm.h @@ -394,7 +394,7 @@ struct ahc_platform_data { spinlock_t spin_lock; u_int qfrozen; struct timer_list reset_timer; - struct semaphore eh_sem; + struct compat_semaphore eh_sem; struct Scsi_Host *host; /* pointer to scsi host */ #define AHC_LINUX_NOIRQ ((uint32_t)~0) uint32_t irq; /* IRQ for this adapter */ Index: linux.prev/drivers/scsi/libata-core.c =================================================================== --- linux.prev.orig/drivers/scsi/libata-core.c +++ linux.prev/drivers/scsi/libata-core.c @@ -4882,7 +4882,7 @@ module_init(ata_init); module_exit(ata_exit); static unsigned long ratelimit_time; -static spinlock_t ata_ratelimit_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(ata_ratelimit_lock); int ata_ratelimit(void) { Index: linux.prev/drivers/scsi/qla2xxx/qla_def.h =================================================================== --- linux.prev.orig/drivers/scsi/qla2xxx/qla_def.h +++ linux.prev/drivers/scsi/qla2xxx/qla_def.h @@ -2411,7 +2411,7 @@ typedef struct scsi_qla_host { spinlock_t mbx_reg_lock; /* Mbx Cmd Register Lock */ struct semaphore mbx_cmd_sem; /* Serialialize mbx access */ - struct semaphore mbx_intr_sem; /* Used for completion notification */ + struct compat_semaphore mbx_intr_sem; /* Used for completion notification */ uint32_t mbx_flags; #define MBX_IN_PROGRESS BIT_0 Index: linux.prev/drivers/scsi/qla2xxx/qla_os.c =================================================================== --- linux.prev.orig/drivers/scsi/qla2xxx/qla_os.c +++ linux.prev/drivers/scsi/qla2xxx/qla_os.c @@ -2082,12 +2082,13 @@ qla2x00_free_sp_pool( scsi_qla_host_t *h static int qla2x00_do_dpc(void *data) { - DECLARE_MUTEX_LOCKED(sem); + DECLARE_MUTEX(sem); scsi_qla_host_t *ha; fc_port_t *fcport; uint8_t status; uint16_t next_loopid; + down(&sem); ha = (scsi_qla_host_t *)data; lock_kernel(); Index: linux.prev/drivers/scsi/scsi.c =================================================================== --- linux.prev.orig/drivers/scsi/scsi.c +++ linux.prev/drivers/scsi/scsi.c @@ -768,10 +768,10 @@ void __scsi_done(struct scsi_cmnd *cmd) * It is a per-CPU queue, so we just disable local interrupts * and need no spinlock. */ - local_irq_save(flags); + raw_local_irq_save(flags); list_add_tail(&cmd->eh_entry, &__get_cpu_var(scsi_done_q)); raise_softirq_irqoff(SCSI_SOFTIRQ); - local_irq_restore(flags); + raw_local_irq_restore(flags); } /** @@ -788,9 +788,9 @@ static void scsi_softirq(struct softirq_ int disposition; LIST_HEAD(local_q); - local_irq_disable(); + raw_local_irq_disable(); list_splice_init(&__get_cpu_var(scsi_done_q), &local_q); - local_irq_enable(); + raw_local_irq_enable(); while (!list_empty(&local_q)) { struct scsi_cmnd *cmd = list_entry(local_q.next, @@ -1282,11 +1282,11 @@ static int scsi_cpu_notify(struct notifi switch(action) { case CPU_DEAD: /* Drain scsi_done_q. */ - local_irq_disable(); + raw_local_irq_disable(); list_splice_init(&per_cpu(scsi_done_q, cpu), &__get_cpu_var(scsi_done_q)); raise_softirq_irqoff(SCSI_SOFTIRQ); - local_irq_enable(); + raw_local_irq_enable(); break; default: break; Index: linux.prev/drivers/serial/8250.c =================================================================== --- linux.prev.orig/drivers/serial/8250.c +++ linux.prev/drivers/serial/8250.c @@ -1344,6 +1344,17 @@ static irqreturn_t serial8250_interrupt( "irq%d\n", irq); break; } + /* + * If we have a buggy TX line, that doesn't + * notify us via iir that we need to transmit + * then force the call. + */ + if (!handled && (up->bugs & UART_BUG_TXEN)) { + spin_lock(&up->port.lock); + serial8250_handle_port(up, regs); + spin_unlock(&up->port.lock); + } + } while (l != end); spin_unlock(&i->lock); Index: linux.prev/drivers/serial/cpm_uart/cpm_uart_core.c =================================================================== --- linux.prev.orig/drivers/serial/cpm_uart/cpm_uart_core.c +++ linux.prev/drivers/serial/cpm_uart/cpm_uart_core.c @@ -909,7 +909,7 @@ struct uart_cpm_port cpm_uart_ports[UART .irq = SMC1_IRQ, .ops = &cpm_uart_pops, .iotype = SERIAL_IO_MEM, - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(cpm_uart_ports[UART_SMC1].port.lock), }, .flags = FLAG_SMC, .tx_nrfifos = TX_NUM_FIFO, @@ -923,7 +923,7 @@ struct uart_cpm_port cpm_uart_ports[UART .irq = SMC2_IRQ, .ops = &cpm_uart_pops, .iotype = SERIAL_IO_MEM, - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(cpm_uart_ports[UART_SMC2].port.lock), }, .flags = FLAG_SMC, .tx_nrfifos = TX_NUM_FIFO, @@ -940,7 +940,7 @@ struct uart_cpm_port cpm_uart_ports[UART .irq = SCC1_IRQ, .ops = &cpm_uart_pops, .iotype = SERIAL_IO_MEM, - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(cpm_uart_ports[UART_SCC1].port.lock), }, .tx_nrfifos = TX_NUM_FIFO, .tx_fifosize = TX_BUF_SIZE, @@ -954,7 +954,7 @@ struct uart_cpm_port cpm_uart_ports[UART .irq = SCC2_IRQ, .ops = &cpm_uart_pops, .iotype = SERIAL_IO_MEM, - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(cpm_uart_ports[UART_SCC2].port.lock), }, .tx_nrfifos = TX_NUM_FIFO, .tx_fifosize = TX_BUF_SIZE, @@ -968,7 +968,7 @@ struct uart_cpm_port cpm_uart_ports[UART .irq = SCC3_IRQ, .ops = &cpm_uart_pops, .iotype = SERIAL_IO_MEM, - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(cpm_uart_ports[UART_SCC3].port.lock), }, .tx_nrfifos = TX_NUM_FIFO, .tx_fifosize = TX_BUF_SIZE, @@ -982,7 +982,7 @@ struct uart_cpm_port cpm_uart_ports[UART .irq = SCC4_IRQ, .ops = &cpm_uart_pops, .iotype = SERIAL_IO_MEM, - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(cpm_uart_ports[UART_SCC4].port.lock), }, .tx_nrfifos = TX_NUM_FIFO, .tx_fifosize = TX_BUF_SIZE, Index: linux.prev/drivers/serial/s3c2410.c =================================================================== --- linux.prev.orig/drivers/serial/s3c2410.c +++ linux.prev/drivers/serial/s3c2410.c @@ -966,7 +966,7 @@ static struct uart_driver s3c24xx_uart_d static struct s3c24xx_uart_port s3c24xx_serial_ports[NR_PORTS] = { [0] = { .port = { - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(s3c24xx_serial_ports[0].port.lock), .iotype = UPIO_MEM, .irq = IRQ_S3CUART_RX0, .uartclk = 0, @@ -978,7 +978,7 @@ static struct s3c24xx_uart_port s3c24xx_ }, [1] = { .port = { - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(s3c24xx_serial_ports[1].port.lock), .iotype = UPIO_MEM, .irq = IRQ_S3CUART_RX1, .uartclk = 0, @@ -992,7 +992,7 @@ static struct s3c24xx_uart_port s3c24xx_ [2] = { .port = { - .lock = SPIN_LOCK_UNLOCKED, + .lock = SPIN_LOCK_UNLOCKED(s3c24xx_serial_ports[2].port.lock), .iotype = UPIO_MEM, .irq = IRQ_S3CUART_RX2, .uartclk = 0, Index: linux.prev/drivers/usb/core/devio.c =================================================================== --- linux.prev.orig/drivers/usb/core/devio.c +++ linux.prev/drivers/usb/core/devio.c @@ -307,10 +307,11 @@ static void async_completed(struct urb * struct async *as = (struct async *)urb->context; struct dev_state *ps = as->ps; struct siginfo sinfo; + unsigned long flags; - spin_lock(&ps->lock); - list_move_tail(&as->asynclist, &ps->async_completed); - spin_unlock(&ps->lock); + spin_lock_irqsave(&ps->lock, flags); + list_move_tail(&as->asynclist, &ps->async_completed); + spin_unlock_irqrestore(&ps->lock, flags); if (as->signr) { sinfo.si_signo = as->signr; sinfo.si_errno = as->urb->status; Index: linux.prev/drivers/usb/core/hcd.c =================================================================== --- linux.prev.orig/drivers/usb/core/hcd.c +++ linux.prev/drivers/usb/core/hcd.c @@ -497,13 +497,11 @@ error: } /* any errors get returned through the urb completion */ - local_irq_save (flags); - spin_lock (&urb->lock); + spin_lock_irqsave(&urb->lock, flags); if (urb->status == -EINPROGRESS) urb->status = status; - spin_unlock (&urb->lock); + spin_unlock_irqrestore(&urb->lock, flags); usb_hcd_giveback_urb (hcd, urb, NULL); - local_irq_restore (flags); return 0; } @@ -531,8 +529,7 @@ void usb_hcd_poll_rh_status(struct usb_h if (length > 0) { /* try to complete the status urb */ - local_irq_save (flags); - spin_lock(&hcd_root_hub_lock); + spin_lock_irqsave(&hcd_root_hub_lock, flags); urb = hcd->status_urb; if (urb) { spin_lock(&urb->lock); @@ -548,14 +545,13 @@ void usb_hcd_poll_rh_status(struct usb_h spin_unlock(&urb->lock); } else length = 0; - spin_unlock(&hcd_root_hub_lock); + spin_unlock_irqrestore(&hcd_root_hub_lock, flags); /* local irqs are always blocked in completions */ if (length > 0) usb_hcd_giveback_urb (hcd, urb, NULL); else hcd->poll_pending = 1; - local_irq_restore (flags); } /* The USB 2.0 spec says 256 ms. This is close enough and won't @@ -638,17 +634,15 @@ static int usb_rh_urb_dequeue (struct us } else { /* Status URB */ if (!hcd->uses_new_polling) del_timer_sync (&hcd->rh_timer); - local_irq_disable (); - spin_lock (&hcd_root_hub_lock); + spin_lock_irq(&hcd_root_hub_lock); if (urb == hcd->status_urb) { hcd->status_urb = NULL; urb->hcpriv = NULL; } else urb = NULL; /* wasn't fully queued */ - spin_unlock (&hcd_root_hub_lock); + spin_unlock_irq(&hcd_root_hub_lock); if (urb) usb_hcd_giveback_urb (hcd, urb, NULL); - local_irq_enable (); } return 0; @@ -1361,15 +1355,13 @@ hcd_endpoint_disable (struct usb_device WARN_ON (!HC_IS_RUNNING (hcd->state) && hcd->state != HC_STATE_HALT && udev->state != USB_STATE_NOTATTACHED); - local_irq_disable (); - /* FIXME move most of this into message.c as part of its * endpoint disable logic */ /* ep is already gone from udev->ep_{in,out}[]; no more submits */ rescan: - spin_lock (&hcd_data_lock); + spin_lock_irq(&hcd_data_lock); list_for_each_entry (urb, &ep->urb_list, urb_list) { int tmp; @@ -1382,13 +1374,13 @@ rescan: if (urb->status != -EINPROGRESS) continue; usb_get_urb (urb); - spin_unlock (&hcd_data_lock); + spin_unlock_irq(&hcd_data_lock); - spin_lock (&urb->lock); + spin_lock_irq(&urb->lock); tmp = urb->status; if (tmp == -EINPROGRESS) urb->status = -ESHUTDOWN; - spin_unlock (&urb->lock); + spin_unlock_irq(&urb->lock); /* kick hcd unless it's already returning this */ if (tmp == -EINPROGRESS) { @@ -1411,8 +1403,7 @@ rescan: /* list contents may have changed */ goto rescan; } - spin_unlock (&hcd_data_lock); - local_irq_enable (); + spin_unlock_irq(&hcd_data_lock); /* synchronize with the hardware, so old configuration state * clears out immediately (and will be freed). Index: linux.prev/drivers/usb/core/message.c =================================================================== --- linux.prev.orig/drivers/usb/core/message.c +++ linux.prev/drivers/usb/core/message.c @@ -233,8 +233,9 @@ static void sg_clean (struct usb_sg_requ static void sg_complete (struct urb *urb, struct pt_regs *regs) { struct usb_sg_request *io = (struct usb_sg_request *) urb->context; + unsigned long flags; - spin_lock (&io->lock); + spin_lock_irqsave (&io->lock, flags); /* In 2.5 we require hcds' endpoint queues not to progress after fault * reports, until the completion callback (this!) returns. That lets @@ -268,7 +269,7 @@ static void sg_complete (struct urb *urb * unlink pending urbs so they won't rx/tx bad data. * careful: unlink can sometimes be synchronous... */ - spin_unlock (&io->lock); + spin_unlock_irqrestore (&io->lock, flags); for (i = 0, found = 0; i < io->entries; i++) { if (!io->urbs [i] || !io->urbs [i]->dev) continue; @@ -283,7 +284,7 @@ static void sg_complete (struct urb *urb } else if (urb == io->urbs [i]) found = 1; } - spin_lock (&io->lock); + spin_lock_irqsave (&io->lock, flags); } urb->dev = NULL; @@ -293,7 +294,7 @@ static void sg_complete (struct urb *urb if (!io->count) complete (&io->complete); - spin_unlock (&io->lock); + spin_unlock_irqrestore (&io->lock, flags); } Index: linux.prev/drivers/usb/net/usbnet.c =================================================================== --- linux.prev.orig/drivers/usb/net/usbnet.c +++ linux.prev/drivers/usb/net/usbnet.c @@ -819,6 +819,8 @@ static void tx_complete (struct urb *urb urb->dev = NULL; entry->state = tx_done; + spin_lock_rt(&dev->txq.lock); + spin_unlock_rt(&dev->txq.lock); defer_bh(dev, skb, &dev->txq); } Index: linux.prev/drivers/usb/storage/usb.c =================================================================== --- linux.prev.orig/drivers/usb/storage/usb.c +++ linux.prev/drivers/usb/storage/usb.c @@ -327,6 +327,7 @@ static int usb_stor_control_thread(void if (test_bit(US_FLIDX_DISCONNECTING, &us->flags)) { US_DEBUGP("-- exiting\n"); up(&(us->dev_semaphore)); + up(&us->sema); break; } Index: linux.prev/drivers/usb/storage/usb.h =================================================================== --- linux.prev.orig/drivers/usb/storage/usb.h +++ linux.prev/drivers/usb/storage/usb.h @@ -171,7 +171,7 @@ struct us_data { dma_addr_t iobuf_dma; /* mutual exclusion and synchronization structures */ - struct semaphore sema; /* to sleep thread on */ + struct compat_semaphore sema; /* to sleep thread on */ struct completion notify; /* thread begin/end */ wait_queue_head_t delay_wait; /* wait during scan, reset */ Index: linux.prev/drivers/video/backlight/corgi_bl.c =================================================================== --- linux.prev.orig/drivers/video/backlight/corgi_bl.c +++ linux.prev/drivers/video/backlight/corgi_bl.c @@ -28,7 +28,7 @@ static int corgibl_powermode = FB_BLANK_ static int current_intensity = 0; static int corgibl_limit = 0; static void (*corgibl_mach_set_intensity)(int intensity); -static spinlock_t bl_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(bl_lock); static struct backlight_properties corgibl_data; static void corgibl_send_intensity(int intensity) Index: linux.prev/drivers/video/console/fbcon.c =================================================================== --- linux.prev.orig/drivers/video/console/fbcon.c +++ linux.prev/drivers/video/console/fbcon.c @@ -1187,7 +1187,6 @@ static void fbcon_clear(struct vc_data * { struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]]; struct fbcon_ops *ops = info->fbcon_par; - struct display *p = &fb_display[vc->vc_num]; u_int y_break; @@ -1216,10 +1215,11 @@ static void fbcon_putcs(struct vc_data * struct display *p = &fb_display[vc->vc_num]; struct fbcon_ops *ops = info->fbcon_par; - if (!fbcon_is_inactive(vc, info)) + if (!fbcon_is_inactive(vc, info)) { ops->putcs(vc, info, s, count, real_y(p, ypos), xpos, get_color(vc, info, scr_readw(s), 1), get_color(vc, info, scr_readw(s), 0)); + } } static void fbcon_putc(struct vc_data *vc, int c, int ypos, int xpos) @@ -2990,6 +2990,7 @@ static const struct consw fb_con = { .con_screen_pos = fbcon_screen_pos, .con_getxy = fbcon_getxy, .con_resize = fbcon_resize, + .con_preemptible = 1, }; static struct notifier_block fbcon_event_notifier = { Index: linux.prev/drivers/video/console/vgacon.c =================================================================== --- linux.prev.orig/drivers/video/console/vgacon.c +++ linux.prev/drivers/video/console/vgacon.c @@ -53,7 +53,7 @@ #include