GIT de7860c3f3272086a4c3a1b4280b11ffae7c32be git+ssh://master.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git commit cfbf07f2a80b618c42a42c20d83647ea8fcceca0 Author: Christoph Lameter Date: Tue May 15 01:42:06 2007 -0700 SLUB: CONFIG_LARGE_ALLOCS must consider MAX_ORDER limit Take MAX_ORDER into consideration when determining KMALLOC_SHIFT_HIGH. Otherwise we may run into a situation where we attempt to create general slabs larger than MAX_ORDER. Signed-off-by: Christoph Lameter Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds commit 6a3ee3d5529c5e66aedf91401bfac65c61998639 Author: Jeremy Fitzhardinge Date: Tue May 15 01:41:59 2007 -0700 i386: fix voyager build This adds an smp_ops for voyager, and hooks things up appropriately. This is the first baby-step to making subarch runtime switchable. Signed-off-by: Jeremy Fitzhardinge Cc: James Bottomley Cc: Eric W. Biederman Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds commit 297d9c035edd04327fedc0d1da27c2b112b66fcc Author: Jeremy Fitzhardinge Date: Tue May 15 01:41:48 2007 -0700 i386: move common parts of smp into their own file Several parts of kernel/smp.c and smpboot.c are generally useful for other subarchitectures and paravirt_ops implementations, so make them available for reuse. Signed-off-by: Jeremy Fitzhardinge Acked-by: Chris Wright Cc: James Bottomley Cc: Eric W. Biederman Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds commit 838c41184fee5e151c09972f2ba90c16493af614 Author: Prarit Bhargava Date: Tue May 15 01:41:43 2007 -0700 Remove cpu hotplug defines for __INIT & __INITDATA After examining what was checked in and the code base I discovered that most of 86c0baf123e474b6eb404798926ecf62b426bf3a wasn't necessary anymore.... So here's a patch that reverts the last part of that changeset: Revert part of 86c0baf123e474b6eb404798926ecf62b426bf3a. The kernel has moved forward to a state where the original change is not necessary. After porting forward, this final version of the patch was applied and broke non-x86 architectures. Signed-off-by: Prarit Bhargava Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds commit 3bd2aad2103314a0a09614dc29926a1437db02f7 Author: Stephen Rothwell Date: Tue May 15 01:41:36 2007 -0700 Revert "MAINTAINERS: remove invalid list address for TPM" This reverts commit b6d1c9a44744224d83125a5a89c1a6cc4db27361. Others tell me that this address has worked for them, so I can only assume that I hit a glitch in the sourceforge mail system. Signed-off-by: Stephen Rothwell Cc: Kylene Hall Cc: Marcel Selhorst Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds commit 8f89441b37536fea92b1ed7004e5e2dda011473d Author: Thomas Gleixner Date: Tue May 15 01:41:32 2007 -0700 clocksource: fix lock order in the resume path lockdep complains about the lock nesting of clocksource and watchdog lock in the resume path. Change the resume marker to a bit operation and remove the lock from this path. Signed-off-by: Thomas Gleixner Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds commit 3c46bdcaec53eda069a8a9cd60621c7431aa7842 Author: Geert Uytterhoeven Date: Tue May 15 01:41:29 2007 -0700 m68k: implement __clear_user() m68k: implement __clear_user(), which is needed by fs/signalfd.c Since we always let the MMU do all checking, clear_user() and __clear_user() are identical. The old clear_user() is renamed to __clear_user() for consistency. Signed-off-by: Geert Uytterhoeven Cc: Davide Libenzi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds commit 0fcdf96ca95f81a0e1fd91a2de16dc67c641c958 Author: Simon Horman Date: Tue May 15 01:41:15 2007 -0700 alpha: fix hard_smp_processor_id compile error "Remove hardcoding of hard_smp_processor_id on UP systems", 2f4dfe206a2fc07099dfad77a8ea2f4b4ae2140f in Linus' tree, moved the definition of hard_smp_processor_id linux/smp.h to asm/smp.h for UP systems. This causes a regression on Alpha. cc1: warnings being treated as errors arch/alpha/kernel/setup.c: In function 'setup_arch': arch/alpha/kernel/setup.c:506: warning: implicit declaration of function 'hard_smp_processor_id' make[1]: *** [arch/alpha/kernel/setup.o] error 1 make: *** [arch/alpha/kernel] error 2 By including asm/smp.h non-conditionally in asm/mmu_context.h the problem appears to be resolved. Cc: Fernando Luis Vazquez Cao Signed-off-by: Simon Horman Cc: Richard Henderson Cc: Ivan Kokshaysky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds commit b67405bbbba6bbd28dfd5337b29d5bc5a1140afb Author: Yoshinori Sato Date: Tue May 15 01:41:07 2007 -0700 h8300 atomic.h update add atomic_sub_and_test define. Signed-off-by: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds commit 218f0aaee8a6b0e5772b95b154dea5b7701b33aa Author: Paul Mundt Date: Tue May 15 01:41:02 2007 -0700 nommu: add ioremap_page_range() lib/ioremap.c is presently only built in if CONFIG_MMU is set. While this is reasonable, platforms that support both CONFIG_MMU=y or n need to be able to call in to this regardless. As none of the current nommu platforms do anything special with ioremap(), we assume that it's always successful. This fixes the SH-4 build with CONFIG_MMU=n. Signed-off-by: Paul Mundt Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds commit f0ee9aabb0520adea5937855a9575c08a97b16e7 Author: Davide Libenzi Date: Tue May 15 01:40:57 2007 -0700 epoll: move kfree inside ep_free Move the kfree() call inside the ep_free() function. Signed-off-by: Davide Libenzi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds commit 67647d0fb8bc03609d045a9cce85f7ef6d763036 Author: Davide Libenzi Date: Tue May 15 01:40:52 2007 -0700 epoll: fix some comments Fixes some epoll code comments. Signed-off-by: Davide Libenzi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds commit c7ea76302547f81e4583d0d7c52a1c37c6747f5d Author: Davide Libenzi Date: Tue May 15 01:40:47 2007 -0700 epoll locks changes and cleanups Changes the rwlock to a spinlock, and drops the use-count variable. Operations are always bound by the mutex now, so the use-count is no more needed. For the same reason, the rwlock can become a simple spinlock. Signed-off-by: Davide Libenzi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds commit d47de16c7221968d3eab899d7540efa5ba77af5a Author: Davide Libenzi Date: Tue May 15 01:40:41 2007 -0700 fix epoll single pass code and add wait-exclusive flag Fixes the epoll single pass code. During the unlocked event delivery (to userspace) code, the poll callback can re-issue new events, and we must receive them correctly. Since we loop in a lockless fashion, we want to be O(nready), and we don't want to flash on/off the spinlock for every event, we have the poll callback to use a secondary list to queue events while we're inside the event delivery loop. The rw_semaphore has been turned into a mutex. This patch also adds the wait-exclusive flag, as suggested by Davi Arnaut. Signed-off-by: Davide Libenzi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds commit faa8b6c3c2e1454175609167a25ae525d075f045 Author: Linus Torvalds Date: Mon May 14 15:24:24 2007 -0700 Revert "ipmi: add new IPMI nmi watchdog handling" This reverts commit f64da958dfc83335de1d2bef9d3868f30feb4e53. Andi Kleen is unhappy with the changes, and they really do not seem worth it. IPMI could use DIE_NMI_IPI instead of the new callback, even though that ends up having its own set of problems too, mainly because the IPMI code cannot really know the NMI was from IPMI or not. Manually fix up conflicts in arch/x86_64/kernel/traps.c and drivers/char/ipmi/ipmi_watchdog.c. Cc: Andi Kleen Cc: Mathieu Desnoyers Cc: Corey Minyard Cc: Andrew Morton Signed-off-by: Linus Torvalds commit 7c5b9ef8577bfa7b74ea58fc9ff2934ffce13532 Author: Michael S. Tsirkin Date: Mon May 14 07:26:51 2007 +0300 IPoIB/cm: Optimize stale connection detection In the presence of some running RX connections, we repeat queue_delayed_work calls each 4 RX WRs, which is a waste. It's enough to start stale task when a first passive connection is added, and rerun it every IPOIB_CM_RX_DELAY as long as there are outstanding passive connections. This removes some code from RX data path. Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier commit bd18c112774db5bb887adb981ffbe9bfe00b2f3a Author: Michael S. Tsirkin Date: Mon May 14 17:14:50 2007 +0300 IB/mthca: Set cleaned CQEs back to HW ownership when cleaning CQ mthca_cq_clean() updates the CQ consumer index without moving CQEs back to HW ownership. As a result, the same WRID might get reported twice, resulting in a use-after-free. This was observed in IPoIB CM. Fix by moving all freed CQEs to HW ownership. This fixes Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier commit 3e28c56b9b67347b42ba06f9a9373b408902beee Author: Michael S. Tsirkin Date: Mon May 14 07:26:51 2007 +0300 IB/mthca: Fix posting >255 recv WRs for Tavor Fix posting lists of > 255 receive WRs for Tavor: rq.next_ind must be updated each doorbell, otherwise the next doorbell will use an incorrect index. Found by Ronni Zimmermann at Mellanox. Signed-off-by: Michael S. Tsirkin Signed-off-by: Roland Dreier commit 6c719f5c6c823901fac2d46b83db5a69ba7e9152 Author: Sean Hefty Date: Mon May 7 11:49:27 2007 -0700 RDMA/cma: Add check to validate that cm_id is bound to a device Several checks in the rdma_cm check against the state of the cm_id, but only to validate that the cm_id is bound to an underlying transport specific CM and an RDMA device. Make the check explicit in what we're trying to check for, since we're not synchronizing against the cm_id state. This will allow a user to disconnect a cm_id or reject a connection after receiving a device removal event. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier commit be65f086f2a50c478b2f5ecf4c55a52a4e95059a Author: Sean Hefty Date: Mon May 7 11:49:12 2007 -0700 RDMA/cma: Fix synchronization with device removal in cma_iw_handler The cma_iw_handler needs to validate the state of the rdma_cm_id before processing a new connection request to ensure that a device removal is not already being processed for the same rdma_cm_id. Without the state check, the user can receive simultaneous callbacks for the same cm_id, or a callback after they've destroyed the cm_id. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier commit 8aa08602bdd617a9cdd147f19076a8c8a70e03ef Author: Sean Hefty Date: Mon May 7 11:49:00 2007 -0700 RDMA/cma: Simplify device removal handling code Add a new routine and rename another to encapsulate common code for synchronizing with device removal. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier commit 4e430dcb7b132a4076e533a9d69907acecbe71be Author: Joachim Fenkes Date: Wed May 9 13:48:31 2007 +0200 IB/ehca: Disable scaling code by default, bump version number - Scaling code is still considered experimental, so disable it by default - Increase version to SVNEHCA_0023 Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier commit bba9b6013e604fadb298191c058149acf1cdfced Author: Joachim Fenkes Date: Wed May 9 13:48:25 2007 +0200 IB/ehca: Beautify sysfs attribute code and fix compiler warnings eHCA's sysfs attributes are now being created via sysfs_create_group(), making the process neatly table-driven. The return value is checked, thus fixing a few compiler warnings. Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier commit c7a14939e78e75dd90b54cb0df371019bc6d3e89 Author: Joachim Fenkes Date: Wed May 9 13:48:20 2007 +0200 IB/ehca: Remove _irqsave, move #ifdef - In ehca_process_eq(), we're IRQ safe throughout the whole function, so we don't need another _irqsave in the middle of flight. - take_over_work() is only called by comp_pool_callback(), so it can move into the same #ifdef block. Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier commit c55a0ddd8ebdd657224449c2fbfcd427e054c8cc Author: Hoang-Nam Nguyen Date: Wed May 9 13:48:11 2007 +0200 IB/ehca: Fix AQP0/1 QP number AQP0/1 should report qp_num={0|1} and the actual QP# should be stored in struct ehca_qp, not the other way round. Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier commit 92761cdaf215599a1bd81d383facb32adabfa620 Author: Joachim Fenkes Date: Wed May 9 13:48:01 2007 +0200 IB/ehca: Correctly set GRH mask bit in ehca_modify_qp() The driver needs to always supply the "GRH present" flag to the hypervisor, whether it's true or false. Not supplying it (i.e. not setting the corresponding mask bit) amounts to a "perhaps", which we don't want. Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier commit 5d88278e3bdb6f2e4ed43306659e930ecd715f0c Author: Stefan Roscher Date: Wed May 9 13:47:56 2007 +0200 IB/ehca: Serialize hypervisor calls in ehca_register_mr() Some pSeries hypervisor versions show a race condition in the allocate MR hCall. Serialize this call per adapter to circumvent this problem. Signed-off-by: Joachim Fenkes Signed-off-by: Roland Dreier commit 8f140b407f3be04e7202be9aa0cfef3006d14c9f Author: Arthur Jones Date: Thu May 10 12:10:49 2007 -0700 IB/ipath: Shadow the gpio_mask register Once upon a time, GPIO interrupts were rare. But then a chip bug in the waldo series forced the use of a GPIO interrupt to signal packet reception. This greatly increased the frequency of GPIO interrupts which have the gpio_mask bits set on the waldo chips. Other bits in the gpio_status register are used for I2C clock and data lines, these bits are usually on. An "unlikely" annotation leftover from the old days was improperly applied to these bits, and an unnecessary chip mmio read was being accessed in the interrupt fast path on waldo. Remove the stagnant unlikely annotation in the interrupt handler and keep a shadow copy of the gpio_mask register to avoid the slow mmio read when testing for interruptable GPIO bits. Signed-off-by: Arthur Jones Signed-off-by: Roland Dreier commit 26c6bc7b812b4157ba929035e467c0f4dd165916 Author: Jack Morgenstein Date: Sun May 13 17:18:23 2007 +0300 IB/mlx4: Fix uninitialized spinlock for 32-bit archs uar_lock spinlock was used in mlx4_ib_cq_arm without being initialized (this only affects 32-bit archs, because uar_lock is not used on 64-bit archs and MLX4_INIT_DOORBELL_LOCK() is a NOP). Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier commit 140ff8b0459cac5ade7a42131f561a9ee0fa3cc4 Author: Stephen Rothwell Date: Mon May 14 13:47:47 2007 +1000 Declare another couple of compat syscalls. compat_sys_signalfd and compat_sys_timerfd need declarations before PowerPC can wire them up. Signed-off-by: Stephen Rothwell Signed-off-by: Linus Torvalds commit a34be83a9cfa15d596bfeb498b13380f0544249f Author: Dave Jones Date: Sun May 13 19:07:46 2007 -0400 MAINTAINERS update. I've not really 'maintained' this code for years, and others are doing a much more thorough job these days. Removing myself might stem some of the crazier emails I get. Signed-off-by: Dave Jones Acked-by: H. Peter Anvin Signed-off-by: Linus Torvalds commit d10ff3fb62bd38415c0f7be3d75d107e1f67e59a Author: Thomas Gleixner Date: Mon May 14 11:10:02 2007 +0200 timekeeping fix patch got mis-applied The time keeping code move to kernel/time/timekeeping.c broke the clocksource resume logic patch, which got applied to the old file by a fuzzy application. Fix it up and move the clocksource_resume() call to the appropriate place. Signed-off-by: Thomas Gleixner [ tssk, tssk, everybody should use --fuzz=0 ] Signed-off-by: Linus Torvalds commit 90e07d9f54c61449dd48eff82e2354d0124d4f7e Author: Nicolas Pitre Date: Sun May 13 18:03:08 2007 +0200 pxamci: fix PXA27x MMC workaround for bad CRC with 136 bit response ... and make it depend on the response flag instead of the command type. Signed-off-by: Nicolas Pitre Signed-off-by: Pierre Ossman commit fe6b4c8840c5e23fe9b8696450cee8f2e8cebffd Author: Pierre Ossman Date: Mon May 14 17:27:29 2007 +0200 mmc: use assigned major for block device The MMC block devices now have an assigned major. Make sure we actually use it. Signed-off-by: Pierre Ossman commit 6ba736a10e4ae63b38ccfee9f22b3263a6e5d050 Author: Pierre Ossman Date: Sun May 13 22:39:23 2007 +0200 sdhci: handle dma boundary interrupts When the device hits certain memory boundaries, it signals an interrupt and expects to be serviced. We don't need the feature but we need to make sure the device doesn't stall. Signed-off-by: Pierre Ossman commit c0f3b6c777af071063984fdbc4efb6c904f665f4 Author: Yoichi Yuasa Date: Sun May 13 18:23:15 2007 +0200 mmc: au1xmmc command types check from data flags This patch has changed command types check from data flags. MMC_STOP_TRANSMISSION is never passed to au1xmmc_send_command(). SEND_STOP() is used for MMC_STOP_TRANSMISSION. Signed-off-by: Yoichi Yuasa Signed-off-by: Pierre Ossman commit be35cf01a9889e82da3bbda1d7b161f036424225 Author: David S. Miller Date: Mon May 14 04:19:01 2007 -0700 [SPARC64]: Update defconfig. Signed-off-by: David S. Miller commit d0842f40a025521beb48063e02a8cc57ebd075e0 Author: David S. Miller Date: Mon May 14 04:18:38 2007 -0700 [VIDEO]: XVR-500 and XVR-2500 need FB=y. Otherwise the allmodconfig build breaks. Signed-off-by: David S. Miller commit 6e46507d0178ad4b33ed3bfcc72dc6e1e8a17790 Author: David S. Miller Date: Mon May 14 03:53:47 2007 -0700 [SPARC32]: asm/system.h needs asm/smp.h To get hard_smp_processor_id() even on UP builds. Signed-off-by: David S. Miller commit 2b6d868fa62b51500dd95a036229adc4a4a7bdae Author: David S. Miller Date: Mon May 14 03:53:12 2007 -0700 [SPARC32]: Update defconfig. Signed-off-by: David S. Miller commit 19fce2b96655d2b3b758e0f783314678d89b47c5 Author: Robert Reif Date: Mon May 14 03:22:08 2007 -0700 [SPARC32]: Fix sparc32 kdebug changes. Fix recent kdebug changes to compile on sparc32. Signed-off-by: Robert Reif Signed-off-by: David S. Miller commit d831666e98b4f1e19ebdd2349735f47bf37cd293 Author: Mitsuru Chinen Date: Mon May 14 03:07:30 2007 -0700 [IPV4] SNMP: Display new statistics at /proc/net/netstat This displays the statistics specified in the updated IP-MIB RFC (RFC4293) in /proc/net/netstat. The reason why these are not displayed in /proc/net/snmp is that some existing utilities are developed under the assumption which ipstat items in /proc/net/snmp is unchanged. Signed-off-by: Mitsuru Chinen Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller commit ae7bf20a6316272acfcaef5d265b18aaa54b41e4 Author: Corey Mutter Date: Mon May 14 03:00:27 2007 -0700 [IPV6]: Reverse sense of promisc tests in ip6_mc_input Reverse the sense of the promiscuous-mode tests in ip6_mc_input(). Signed-off-by: Corey Mutter Signed-off-by: David L Stevens Signed-off-by: David S. Miller commit 3e5c2d3bdbe2c047b9853c4248f881f5ac645c89 Author: Jamal Hadi Salim Date: Mon May 14 02:57:19 2007 -0700 [NET_SCHED]: prio qdisc boundary condition This fixes an out-of-boundary condition when the classified band equals q->bands. Caught by Alexey Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller commit 9ac6d4a4b85004dbb907f1d3b34412afe16f3758 Author: David S. Miller Date: Mon May 14 02:56:03 2007 -0700 [SPARC64]: Accept ebus_bus_type for generic DMA ops. Based upon a bug report by Meelis Roos. Signed-off-by: David S. Miller commit 6253db055eb62a1bd0a18a1d8489565303b2b1dd Author: Herbert Xu Date: Mon May 14 02:19:11 2007 -0700 [IPSEC]: Don't warn if high-order hash resize fails Multi-page allocations are always likely to fail. Since such failures are expected and non-critical in xfrm_hash_alloc, we shouldn't warn about them. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller commit b5505c6e1071b32176c7548a3aaf0be49f7c763e Author: Herbert Xu Date: Mon May 14 02:15:47 2007 -0700 [IPSEC]: Check validity of direction in xfrm_policy_byid The function xfrm_policy_byid takes a dir argument but finds the policy using the index instead. We only use the dir argument to update the policy count for that direction. Since the user can supply any value for dir, this can corrupt our policy count. I know this is the problem because a few days ago I was deleting policies by hand using indicies and accidentally typed in the wrong direction. It still deleted the policy and at the time I thought that was cool. In retrospect it isn't such a good idea :) I decided against letting it delete the policy anyway just in case we ever remove the connection between indicies and direction. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller commit 17f34f0ec96783851abcabf9878f8237ac67e4e7 Author: David S. Miller Date: Mon May 14 02:01:52 2007 -0700 [SPARC64]: Add missing cpus_empty() check in hypervisor xcall handling. Signed-off-by: David S. Miller commit ec6fb1ad84551741c14a05918cc0e04dea928fc0 Author: David S. Miller Date: Sun May 13 23:52:14 2007 -0700 [SCSI]: Add help text for SCSI_ESP_CORE. Signed-off-by: David S. Miller commit 3049f89df13bb8f5ded8737631a6c2dd6209b5ca Author: David S. Miller Date: Sun May 13 22:22:47 2007 -0700 [SPARC] SBUS: display7seg.c needs asm/io.h Signed-off-by: David S. Miller commit 4b5dff76a70cb1d8b935b8b93fe0df0bbe66640d Author: David S. Miller Date: Sun May 13 22:22:13 2007 -0700 [SPARC] SBUS: bbc_i2c.c needs asm/io.h Signed-off-by: David S. Miller commit 49d23cfcec5d36a91e118d28148d353bf8f0bc03 Author: David S. Miller Date: Sun May 13 22:01:18 2007 -0700 [SPARC64]: Be more resiliant with PCI I/O space regs. If we miss on the ranges, just toss the translation up to the parent instead of failing. Signed-off-by: David S. Miller commit 7a05b591a3de20466a775f62369a42ea0fe1345d Author: David S. Miller Date: Sun May 13 21:27:37 2007 -0700 [SERIAL] SUNHV: Add an ID string. The 'compatible' property can be SUNW,sun4v-console as well as 'qcn'. Signed-off-by: David S. Miller commit 705962ccc9d21a08b74b6b6e1d3cf10f98968a67 Author: Al Viro Date: Sun May 13 05:52:32 2007 -0400 fix deadlock in loop.c ... doh Jeremy Fitzhardinge noted that the recent loop.c cleanups worked, but cause lockdep to complain. Ouch. OK, the deadlock is real and yes, I'm an idiot. Speaking of which, we probably want to s/lock/pin/ in drivers/base/map.c to avoid such brainos again. And yes, this stuff needs clear documentation. Will try to put one together once I get some sleep... Signed-off-by: Al Viro Cc: Jeremy Fitzhardinge Signed-off-by: Linus Torvalds commit 20eebcf09c2d329e4dcdd765634c0a524195e16d Author: Roland Dreier Date: Sun May 13 08:54:18 2007 -0700 mlx4_core: Remove unused doorbell_lock struct mlx4_priv.doorbell_lock is never used, so delete it. Signed-off-by: Roland Dreier commit f550d94cf9f86bc54e31dae2aee1a03d678c6e7f Author: Paul Mundt Date: Thu May 10 12:50:28 2007 +0900 net: Trivial MLX4_DEBUG dependency fix. CONFIG_MLX4_DEBUG works out to a def_bool y for those that have CONFIG_EMBEDDED set. Make it depend on MLX4_CORE. Signed-off-by: Paul Mundt Signed-off-by: Roland Dreier MAINTAINERS | 3 arch/i386/kernel/Makefile | 1 arch/i386/kernel/smp.c | 65 --- arch/i386/kernel/smpboot.c | 22 - arch/i386/kernel/smpcommon.c | 79 ++++ arch/i386/kernel/traps.c | 5 arch/i386/mach-voyager/voyager_smp.c | 106 ++--- arch/m68k/lib/uaccess.c | 4 arch/sparc/defconfig | 151 ++++--- arch/sparc/kernel/head.S | 2 arch/sparc64/defconfig | 26 - arch/sparc64/kernel/of_device.c | 7 arch/sparc64/kernel/smp.c | 3 arch/x86_64/kernel/traps.c | 3 drivers/block/loop.c | 17 - drivers/char/ipmi/ipmi_watchdog.c | 134 ++---- drivers/infiniband/core/cma.c | 106 +++-- drivers/infiniband/hw/ehca/ehca_classes.h | 1 drivers/infiniband/hw/ehca/ehca_irq.c | 7 drivers/infiniband/hw/ehca/ehca_main.c | 94 ++--- drivers/infiniband/hw/ehca/ehca_qp.c | 17 + drivers/infiniband/hw/ehca/hcp_if.c | 13 + drivers/infiniband/hw/ipath/ipath_iba6120.c | 7 drivers/infiniband/hw/ipath/ipath_intr.c | 7 drivers/infiniband/hw/ipath/ipath_kernel.h | 2 drivers/infiniband/hw/ipath/ipath_verbs.c | 12 - drivers/infiniband/hw/mlx4/main.c | 1 drivers/infiniband/hw/mthca/mthca_cq.c | 4 drivers/infiniband/hw/mthca/mthca_qp.c | 1 drivers/infiniband/ulp/ipoib/ipoib_cm.c | 11 - drivers/mmc/card/block.c | 17 - drivers/mmc/host/au1xmmc.c | 35 +- drivers/mmc/host/pxamci.c | 18 - drivers/mmc/host/sdhci.c | 9 drivers/net/Kconfig | 1 drivers/net/mlx4/main.c | 2 drivers/net/mlx4/mlx4.h | 1 drivers/sbus/char/bbc_i2c.c | 1 drivers/sbus/char/display7seg.c | 1 drivers/scsi/Kconfig | 8 drivers/serial/sunhv.c | 4 drivers/video/Kconfig | 4 fs/eventpoll.c | 561 ++++++++++++--------------- include/asm-alpha/mmu_context.h | 2 include/asm-h8300/atomic.h | 1 include/asm-i386/kdebug.h | 1 include/asm-i386/processor.h | 4 include/asm-m68k/uaccess.h | 4 include/asm-sparc/kdebug.h | 8 include/asm-sparc/system.h | 1 include/asm-sparc64/dma-mapping.h | 44 +- include/asm-x86_64/kdebug.h | 1 include/linux/compat.h | 6 include/linux/init.h | 7 include/linux/io.h | 8 include/linux/major.h | 2 include/linux/slub_def.h | 6 kernel/time/clocksource.c | 10 kernel/time/timekeeping.c | 2 kernel/timer.c | 2 net/ipv4/proc.c | 21 + net/ipv6/ip6_input.c | 2 net/sched/sch_prio.c | 2 net/xfrm/xfrm_hash.c | 3 net/xfrm/xfrm_policy.c | 4 65 files changed, 828 insertions(+), 886 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 68a56ad..bbeb5b6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1712,8 +1712,6 @@ L: Linux-Kernel@vger.kernel.org S: Maintained i386 SETUP CODE / CPU ERRATA WORKAROUNDS -P: Dave Jones -M: davej@codemonkey.org.uk P: H. Peter Anvin M: hpa@zytor.com S: Maintained @@ -3269,6 +3267,7 @@ W: http://tpmdd.sourceforge.net P: Marcel Selhorst M: tpm@selhorst.net W: http://www.prosec.rub.de/tpm/ +L: tpmdd-devel@lists.sourceforge.net S: Maintained Telecom Clock Driver for MCPL0010 diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 91cff8d..06da59f 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_X86_SMP) += smp.o smpboot.o tsc_sync.o +obj-$(CONFIG_SMP) += smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 706bda7..c9a7c98 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -467,7 +467,7 @@ void flush_tlb_all(void) * it goes straight through and wastes no time serializing * anything. Worst case is that we lose a reschedule ... */ -void native_smp_send_reschedule(int cpu) +static void native_smp_send_reschedule(int cpu) { WARN_ON(cpu_is_offline(cpu)); send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); @@ -546,9 +546,10 @@ static void __smp_call_function(void (*f * You must not call this function with disabled interrupts or from a * hardware interrupt handler or from a bottom half handler. */ -int native_smp_call_function_mask(cpumask_t mask, - void (*func)(void *), void *info, - int wait) +static int +native_smp_call_function_mask(cpumask_t mask, + void (*func)(void *), void *info, + int wait) { struct call_data_struct data; cpumask_t allbutself; @@ -599,60 +600,6 @@ int native_smp_call_function_mask(cpumas return 0; } -/** - * smp_call_function(): Run a function on all other CPUs. - * @func: The function to run. This must be fast and non-blocking. - * @info: An arbitrary pointer to pass to the function. - * @nonatomic: Unused. - * @wait: If true, wait (atomically) until function has completed on other CPUs. - * - * Returns 0 on success, else a negative status code. - * - * If @wait is true, then returns once @func has returned; otherwise - * it returns just before the target cpu calls @func. - * - * You must not call this function with disabled interrupts or from a - * hardware interrupt handler or from a bottom half handler. - */ -int smp_call_function(void (*func) (void *info), void *info, int nonatomic, - int wait) -{ - return smp_call_function_mask(cpu_online_map, func, info, wait); -} -EXPORT_SYMBOL(smp_call_function); - -/** - * smp_call_function_single - Run a function on another CPU - * @cpu: The target CPU. Cannot be the calling CPU. - * @func: The function to run. This must be fast and non-blocking. - * @info: An arbitrary pointer to pass to the function. - * @nonatomic: Unused. - * @wait: If true, wait until function has completed on other CPUs. - * - * Returns 0 on success, else a negative status code. - * - * If @wait is true, then returns once @func has returned; otherwise - * it returns just before the target cpu calls @func. - */ -int smp_call_function_single(int cpu, void (*func) (void *info), void *info, - int nonatomic, int wait) -{ - /* prevent preemption and reschedule on another processor */ - int ret; - int me = get_cpu(); - if (cpu == me) { - WARN_ON(1); - put_cpu(); - return -EBUSY; - } - - ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait); - - put_cpu(); - return ret; -} -EXPORT_SYMBOL(smp_call_function_single); - static void stop_this_cpu (void * dummy) { local_irq_disable(); @@ -670,7 +617,7 @@ static void stop_this_cpu (void * dummy) * this function calls the 'stop' function on all other CPUs in the system. */ -void native_smp_send_stop(void) +static void native_smp_send_stop(void) { /* Don't deadlock on the call lock in panic */ int nolock = !spin_trylock(&call_lock); diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index b92cc4e..08f07a7 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -98,9 +98,6 @@ EXPORT_SYMBOL(x86_cpu_to_apicid); u8 apicid_2_node[MAX_APICID]; -DEFINE_PER_CPU(unsigned long, this_cpu_off); -EXPORT_PER_CPU_SYMBOL(this_cpu_off); - /* * Trampoline 80x86 program as an array. */ @@ -763,25 +760,6 @@ #else #define alloc_idle_task(cpu) fork_idle(cpu) #endif -/* Initialize the CPU's GDT. This is either the boot CPU doing itself - (still using the master per-cpu area), or a CPU doing it for a - secondary which will soon come up. */ -static __cpuinit void init_gdt(int cpu) -{ - struct desc_struct *gdt = get_cpu_gdt_table(cpu); - - pack_descriptor((u32 *)&gdt[GDT_ENTRY_PERCPU].a, - (u32 *)&gdt[GDT_ENTRY_PERCPU].b, - __per_cpu_offset[cpu], 0xFFFFF, - 0x80 | DESCTYPE_S | 0x2, 0x8); - - per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; - per_cpu(cpu_number, cpu) = cpu; -} - -/* Defined in head.S */ -extern struct Xgt_desc_struct early_gdt_descr; - static int __cpuinit do_boot_cpu(int apicid, int cpu) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad diff --git a/arch/i386/kernel/smpcommon.c b/arch/i386/kernel/smpcommon.c new file mode 100644 index 0000000..1868ae1 --- /dev/null +++ b/arch/i386/kernel/smpcommon.c @@ -0,0 +1,79 @@ +/* + * SMP stuff which is common to all sub-architectures. + */ +#include +#include + +DEFINE_PER_CPU(unsigned long, this_cpu_off); +EXPORT_PER_CPU_SYMBOL(this_cpu_off); + +/* Initialize the CPU's GDT. This is either the boot CPU doing itself + (still using the master per-cpu area), or a CPU doing it for a + secondary which will soon come up. */ +__cpuinit void init_gdt(int cpu) +{ + struct desc_struct *gdt = get_cpu_gdt_table(cpu); + + pack_descriptor((u32 *)&gdt[GDT_ENTRY_PERCPU].a, + (u32 *)&gdt[GDT_ENTRY_PERCPU].b, + __per_cpu_offset[cpu], 0xFFFFF, + 0x80 | DESCTYPE_S | 0x2, 0x8); + + per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; + per_cpu(cpu_number, cpu) = cpu; +} + + +/** + * smp_call_function(): Run a function on all other CPUs. + * @func: The function to run. This must be fast and non-blocking. + * @info: An arbitrary pointer to pass to the function. + * @nonatomic: Unused. + * @wait: If true, wait (atomically) until function has completed on other CPUs. + * + * Returns 0 on success, else a negative status code. + * + * If @wait is true, then returns once @func has returned; otherwise + * it returns just before the target cpu calls @func. + * + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler or from a bottom half handler. + */ +int smp_call_function(void (*func) (void *info), void *info, int nonatomic, + int wait) +{ + return smp_call_function_mask(cpu_online_map, func, info, wait); +} +EXPORT_SYMBOL(smp_call_function); + +/** + * smp_call_function_single - Run a function on another CPU + * @cpu: The target CPU. Cannot be the calling CPU. + * @func: The function to run. This must be fast and non-blocking. + * @info: An arbitrary pointer to pass to the function. + * @nonatomic: Unused. + * @wait: If true, wait until function has completed on other CPUs. + * + * Returns 0 on success, else a negative status code. + * + * If @wait is true, then returns once @func has returned; otherwise + * it returns just before the target cpu calls @func. + */ +int smp_call_function_single(int cpu, void (*func) (void *info), void *info, + int nonatomic, int wait) +{ + /* prevent preemption and reschedule on another processor */ + int ret; + int me = get_cpu(); + if (cpu == me) { + WARN_ON(1); + put_cpu(); + return -EBUSY; + } + + ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait); + + put_cpu(); + return ret; +} +EXPORT_SYMBOL(smp_call_function_single); diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index c05e7e8..90da057 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -733,11 +733,6 @@ #ifdef CONFIG_X86_LOCAL_APIC */ if (nmi_watchdog_tick(regs, reason)) return; -#endif - if (notify_die(DIE_NMI_POST, "nmi_post", regs, reason, 2, 0) - == NOTIFY_STOP) - return; -#ifdef CONFIG_X86_LOCAL_APIC if (!do_nmi_callback(regs, smp_processor_id())) #endif unknown_nmi_error(reason, regs); diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c index 50d9c52..b87f854 100644 --- a/arch/i386/mach-voyager/voyager_smp.c +++ b/arch/i386/mach-voyager/voyager_smp.c @@ -27,7 +27,6 @@ #include #include #include #include -#include /* TLB state -- visible externally, indexed physically */ DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0 }; @@ -422,7 +421,7 @@ find_smp_config(void) VOYAGER_SUS_IN_CONTROL_PORT); current_thread_info()->cpu = boot_cpu_id; - write_pda(cpu_number, boot_cpu_id); + x86_write_percpu(cpu_number, boot_cpu_id); } /* @@ -435,7 +434,7 @@ smp_store_cpu_info(int id) *c = boot_cpu_data; - identify_cpu(c); + identify_secondary_cpu(c); } /* set up the trampoline and return the physical address of the code */ @@ -459,7 +458,7 @@ start_secondary(void *unused) /* external functions not defined in the headers */ extern void calibrate_delay(void); - secondary_cpu_init(); + cpu_init(); /* OK, we're in the routine */ ack_CPI(VIC_CPU_BOOT_CPI); @@ -572,7 +571,9 @@ do_boot_cpu(__u8 cpu) /* init_tasks (in sched.c) is indexed logically */ stack_start.esp = (void *) idle->thread.esp; - init_gdt(cpu, idle); + init_gdt(cpu); + per_cpu(current_task, cpu) = idle; + early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); irq_ctx_init(cpu); /* Note: Don't modify initial ss override */ @@ -859,8 +860,8 @@ smp_invalidate_interrupt(void) /* This routine is called with a physical cpu mask */ static void -flush_tlb_others (unsigned long cpumask, struct mm_struct *mm, - unsigned long va) +voyager_flush_tlb_others (unsigned long cpumask, struct mm_struct *mm, + unsigned long va) { int stuck = 50000; @@ -912,7 +913,7 @@ flush_tlb_current_task(void) cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id()); local_flush_tlb(); if (cpu_mask) - flush_tlb_others(cpu_mask, mm, FLUSH_ALL); + voyager_flush_tlb_others(cpu_mask, mm, FLUSH_ALL); preempt_enable(); } @@ -934,7 +935,7 @@ flush_tlb_mm (struct mm_struct * mm) leave_mm(smp_processor_id()); } if (cpu_mask) - flush_tlb_others(cpu_mask, mm, FLUSH_ALL); + voyager_flush_tlb_others(cpu_mask, mm, FLUSH_ALL); preempt_enable(); } @@ -955,7 +956,7 @@ void flush_tlb_page(struct vm_area_struc } if (cpu_mask) - flush_tlb_others(cpu_mask, mm, va); + voyager_flush_tlb_others(cpu_mask, mm, va); preempt_enable(); } @@ -1044,10 +1045,12 @@ smp_call_function_interrupt(void) } static int -__smp_call_function_mask (void (*func) (void *info), void *info, int retry, - int wait, __u32 mask) +voyager_smp_call_function_mask (cpumask_t cpumask, + void (*func) (void *info), void *info, + int wait) { struct call_data_struct data; + u32 mask = cpus_addr(cpumask)[0]; mask &= ~(1< The function to run. This must be fast and non-blocking. - An arbitrary pointer to pass to the function. - If true, keep retrying until ready. - If true, wait until function has completed on other CPUs. - [RETURNS] 0 on success, else a negative status code. Does not return until - remote CPUs are nearly ready to execute <> or are or have executed. -*/ -int -smp_call_function(void (*func) (void *info), void *info, int retry, - int wait) -{ - __u32 mask = cpus_addr(cpu_online_map)[0]; - - return __smp_call_function_mask(func, info, retry, wait, mask); -} -EXPORT_SYMBOL(smp_call_function); - -/* - * smp_call_function_single - Run a function on another CPU - * @func: The function to run. This must be fast and non-blocking. - * @info: An arbitrary pointer to pass to the function. - * @nonatomic: Currently unused. - * @wait: If true, wait until function has completed on other CPUs. - * - * Retrurns 0 on success, else a negative status code. - * - * Does not return until the remote CPU is nearly ready to execute - * or is or has executed. - */ - -int -smp_call_function_single(int cpu, void (*func) (void *info), void *info, - int nonatomic, int wait) -{ - __u32 mask = 1 << cpu; - - return __smp_call_function_mask(func, info, nonatomic, wait, mask); -} -EXPORT_SYMBOL(smp_call_function_single); - /* Sorry about the name. In an APIC based system, the APICs * themselves are programmed to send a timer interrupt. This is used * by linux to reschedule the processor. Voyager doesn't have this, @@ -1237,8 +1199,8 @@ smp_alloc_memory(void) } /* send a reschedule CPI to one CPU by physical CPU number*/ -void -smp_send_reschedule(int cpu) +static void +voyager_smp_send_reschedule(int cpu) { send_one_CPI(cpu, VIC_RESCHEDULE_CPI); } @@ -1267,8 +1229,8 @@ safe_smp_processor_id(void) } /* broadcast a halt to all other CPUs */ -void -smp_send_stop(void) +static void +voyager_smp_send_stop(void) { smp_call_function(smp_stop_cpu_function, NULL, 1, 1); } @@ -1930,23 +1892,26 @@ smp_voyager_power_off(void *dummy) smp_stop_cpu_function(NULL); } -void __init -smp_prepare_cpus(unsigned int max_cpus) +static void __init +voyager_smp_prepare_cpus(unsigned int max_cpus) { /* FIXME: ignore max_cpus for now */ smp_boot_cpus(); } -void __devinit smp_prepare_boot_cpu(void) +static void __devinit voyager_smp_prepare_boot_cpu(void) { + init_gdt(smp_processor_id()); + switch_to_new_gdt(); + cpu_set(smp_processor_id(), cpu_online_map); cpu_set(smp_processor_id(), cpu_callout_map); cpu_set(smp_processor_id(), cpu_possible_map); cpu_set(smp_processor_id(), cpu_present_map); } -int __devinit -__cpu_up(unsigned int cpu) +static int __devinit +voyager_cpu_up(unsigned int cpu) { /* This only works at boot for x86. See "rewrite" above. */ if (cpu_isset(cpu, smp_commenced_mask)) @@ -1962,8 +1927,8 @@ __cpu_up(unsigned int cpu) return 0; } -void __init -smp_cpus_done(unsigned int max_cpus) +static void __init +voyager_smp_cpus_done(unsigned int max_cpus) { zap_low_mappings(); } @@ -1972,5 +1937,16 @@ void __init smp_setup_processor_id(void) { current_thread_info()->cpu = hard_smp_processor_id(); - write_pda(cpu_number, hard_smp_processor_id()); + x86_write_percpu(cpu_number, hard_smp_processor_id()); } + +struct smp_ops smp_ops = { + .smp_prepare_boot_cpu = voyager_smp_prepare_boot_cpu, + .smp_prepare_cpus = voyager_smp_prepare_cpus, + .cpu_up = voyager_cpu_up, + .smp_cpus_done = voyager_smp_cpus_done, + + .smp_send_stop = voyager_smp_send_stop, + .smp_send_reschedule = voyager_smp_send_reschedule, + .smp_call_function_mask = voyager_smp_call_function_mask, +}; diff --git a/arch/m68k/lib/uaccess.c b/arch/m68k/lib/uaccess.c index 865f9fb..13854ed 100644 --- a/arch/m68k/lib/uaccess.c +++ b/arch/m68k/lib/uaccess.c @@ -181,7 +181,7 @@ EXPORT_SYMBOL(strnlen_user); * Zero Userspace */ -unsigned long clear_user(void __user *to, unsigned long n) +unsigned long __clear_user(void __user *to, unsigned long n) { unsigned long res; @@ -219,4 +219,4 @@ unsigned long clear_user(void __user *to return res; } -EXPORT_SYMBOL(clear_user); +EXPORT_SYMBOL(__clear_user); diff --git a/arch/sparc/defconfig b/arch/sparc/defconfig index 79e5489..38bd79f 100644 --- a/arch/sparc/defconfig +++ b/arch/sparc/defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.20-rc1 -# Sun Dec 17 14:20:47 2006 +# Linux kernel version: 2.6.22-rc1 +# Mon May 14 03:25:14 2007 # CONFIG_MMU=y CONFIG_HIGHMEM=y +CONFIG_ZONE_DMA=y CONFIG_GENERIC_ISA_DMA=y CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" @@ -23,14 +24,17 @@ CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y # CONFIG_IPC_NS is not set +CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y # CONFIG_BSD_PROCESS_ACCT is not set # CONFIG_TASKSTATS is not set # CONFIG_UTS_NS is not set # CONFIG_AUDIT is not set # CONFIG_IKCONFIG is not set +CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED=y # CONFIG_RELAY is not set +CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SYSCTL=y @@ -46,14 +50,19 @@ CONFIG_BUG=y CONFIG_ELF_CORE=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y +CONFIG_ANON_INODES=y CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y CONFIG_SHMEM=y -CONFIG_SLAB=y CONFIG_VM_EVENT_COUNTERS=y +CONFIG_SLAB=y +# CONFIG_SLUB is not set +# CONFIG_SLOB is not set CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 -# CONFIG_SLOB is not set # # Loadable module support @@ -107,7 +116,7 @@ # CONFIG_ARCH_HAS_ILOG2_U64 is not set CONFIG_SUN_PM=y # CONFIG_SUN4 is not set CONFIG_PCI=y -# CONFIG_PCI_MULTITHREAD_PROBE is not set +# CONFIG_ARCH_SUPPORTS_MSI is not set # CONFIG_PCI_DEBUG is not set CONFIG_SUN_OPENPROMFS=m # CONFIG_SPARC_LED is not set @@ -124,6 +133,7 @@ CONFIG_FLAT_NODE_MEM_MAP=y # CONFIG_SPARSEMEM_STATIC is not set CONFIG_SPLIT_PTLOCK_CPUS=4 # CONFIG_RESOURCES_64BIT is not set +CONFIG_ZONE_DMA_FLAG=1 # # Networking @@ -133,14 +143,15 @@ CONFIG_NET=y # # Networking options # -# CONFIG_NETDEBUG is not set CONFIG_PACKET=y # CONFIG_PACKET_MMAP is not set CONFIG_UNIX=y CONFIG_XFRM=y CONFIG_XFRM_USER=m # CONFIG_XFRM_SUB_POLICY is not set +# CONFIG_XFRM_MIGRATE is not set CONFIG_NET_KEY=m +# CONFIG_NET_KEY_MIGRATE is not set CONFIG_INET=y # CONFIG_IP_MULTICAST is not set # CONFIG_IP_ADVANCED_ROUTER is not set @@ -170,6 +181,7 @@ # CONFIG_TCP_MD5SIG is not set CONFIG_IPV6=m CONFIG_IPV6_PRIVACY=y # CONFIG_IPV6_ROUTER_PREF is not set +# CONFIG_IPV6_OPTIMISTIC_DAD is not set CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m @@ -229,7 +241,18 @@ CONFIG_NET_PKTGEN=m # CONFIG_HAMRADIO is not set # CONFIG_IRDA is not set # CONFIG_BT is not set +CONFIG_AF_RXRPC=m +# CONFIG_AF_RXRPC_DEBUG is not set +# CONFIG_RXKAD is not set + +# +# Wireless +# +# CONFIG_CFG80211 is not set +# CONFIG_WIRELESS_EXT is not set +# CONFIG_MAC80211 is not set # CONFIG_IEEE80211 is not set +# CONFIG_RFKILL is not set # # Device Drivers @@ -242,16 +265,13 @@ CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y # CONFIG_FW_LOADER is not set # CONFIG_DEBUG_DRIVER is not set +# CONFIG_DEBUG_DEVRES is not set # CONFIG_SYS_HYPERVISOR is not set # # Connector - unified userspace <-> kernelspace linker # # CONFIG_CONNECTOR is not set - -# -# Memory Technology Devices (MTD) -# # CONFIG_MTD is not set # @@ -262,6 +282,7 @@ # CONFIG_PARPORT is not set # # Plug and Play support # +# CONFIG_PNPACPI is not set # # Block devices @@ -280,15 +301,16 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=4096 CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 -CONFIG_BLK_DEV_INITRD=y # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set # # Misc devices # +# CONFIG_PHANTOM is not set # CONFIG_SGI_IOC4 is not set # CONFIG_TIFM_CORE is not set +# CONFIG_BLINK is not set # # ATA/ATAPI/MFM/RLL support @@ -322,11 +344,12 @@ # CONFIG_SCSI_MULTI_LUN is not set # CONFIG_SCSI_CONSTANTS is not set # CONFIG_SCSI_LOGGING is not set # CONFIG_SCSI_SCAN_ASYNC is not set +CONFIG_SCSI_WAIT_SCAN=m # # SCSI Transports # -CONFIG_SCSI_SPI_ATTRS=m +CONFIG_SCSI_SPI_ATTRS=y # CONFIG_SCSI_FC_ATTRS is not set # CONFIG_SCSI_ISCSI_ATTRS is not set # CONFIG_SCSI_SAS_ATTRS is not set @@ -366,12 +389,9 @@ # CONFIG_SCSI_DC395x is not set # CONFIG_SCSI_DC390T is not set # CONFIG_SCSI_NSP32 is not set # CONFIG_SCSI_DEBUG is not set +CONFIG_SCSI_ESP_CORE=y CONFIG_SCSI_SUNESP=y # CONFIG_SCSI_SRP is not set - -# -# Serial ATA (prod) and Parallel ATA (experimental) drivers -# # CONFIG_ATA is not set # @@ -390,6 +410,7 @@ # CONFIG_FUSION_SAS is not set # # IEEE 1394 (FireWire) support # +# CONFIG_FIREWIRE is not set # CONFIG_IEEE1394 is not set # @@ -410,10 +431,6 @@ # # ARCnet devices # # CONFIG_ARCNET is not set - -# -# PHY device support -# # CONFIG_PHYLIB is not set # @@ -435,10 +452,7 @@ # # CONFIG_NET_TULIP is not set # CONFIG_HP100 is not set # CONFIG_NET_PCI is not set - -# -# Ethernet (1000 Mbit) -# +CONFIG_NETDEV_1000=y # CONFIG_ACENIC is not set # CONFIG_DL2K is not set # CONFIG_E1000 is not set @@ -454,15 +468,16 @@ # CONFIG_SK98LIN is not set # CONFIG_TIGON3 is not set # CONFIG_BNX2 is not set # CONFIG_QLA3XXX is not set - -# -# Ethernet (10000 Mbit) -# +# CONFIG_ATL1 is not set +CONFIG_NETDEV_10000=y # CONFIG_CHELSIO_T1 is not set +# CONFIG_CHELSIO_T3 is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set +# CONFIG_MLX4_CORE is not set +CONFIG_MLX4_DEBUG=y # # Token Ring devices @@ -470,13 +485,10 @@ # # CONFIG_TR is not set # -# Wireless LAN (non-hamradio) -# -# CONFIG_NET_RADIO is not set - -# -# Wan interfaces +# Wireless LAN # +# CONFIG_WLAN_PRE80211 is not set +# CONFIG_WLAN_80211 is not set # CONFIG_WAN is not set # CONFIG_FDDI is not set # CONFIG_HIPPI is not set @@ -528,9 +540,17 @@ # CONFIG_KEYBOARD_NEWTON is not set # CONFIG_KEYBOARD_STOWAWAY is not set CONFIG_INPUT_MOUSE=y CONFIG_MOUSE_PS2=m +CONFIG_MOUSE_PS2_ALPS=y +CONFIG_MOUSE_PS2_LOGIPS2PP=y +CONFIG_MOUSE_PS2_SYNAPTICS=y +CONFIG_MOUSE_PS2_LIFEBOOK=y +CONFIG_MOUSE_PS2_TRACKPOINT=y +# CONFIG_MOUSE_PS2_TOUCHKIT is not set CONFIG_MOUSE_SERIAL=m +# CONFIG_MOUSE_APPLETOUCH is not set # CONFIG_MOUSE_VSXXXAA is not set # CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TABLET is not set # CONFIG_INPUT_TOUCHSCREEN is not set # CONFIG_INPUT_MISC is not set @@ -578,14 +598,9 @@ # # IPMI # # CONFIG_IPMI_HANDLER is not set - -# -# Watchdog Cards -# # CONFIG_WATCHDOG is not set CONFIG_HW_RANDOM=m CONFIG_RTC=m -# CONFIG_DTLK is not set # CONFIG_R3964 is not set # CONFIG_APPLICOM is not set # CONFIG_DRM is not set @@ -595,10 +610,7 @@ # # TPM devices # # CONFIG_TCG_TPM is not set - -# -# I2C support -# +CONFIG_DEVPORT=y # CONFIG_I2C is not set # @@ -611,32 +623,39 @@ # # Dallas's 1-wire bus # # CONFIG_W1 is not set - -# -# Hardware Monitoring support -# CONFIG_HWMON=y # CONFIG_HWMON_VID is not set # CONFIG_SENSORS_ABITUGURU is not set # CONFIG_SENSORS_F71805F is not set # CONFIG_SENSORS_PC87427 is not set +# CONFIG_SENSORS_SMSC47M1 is not set +# CONFIG_SENSORS_SMSC47B397 is not set # CONFIG_SENSORS_VT1211 is not set +# CONFIG_SENSORS_W83627HF is not set # CONFIG_HWMON_DEBUG_CHIP is not set # +# Multifunction device drivers +# +# CONFIG_MFD_SM501 is not set + +# # Multimedia devices # # CONFIG_VIDEO_DEV is not set +# CONFIG_DVB_CORE is not set +# CONFIG_DAB is not set # -# Digital Video Broadcasting Devices +# Graphics support # -# CONFIG_DVB is not set +# CONFIG_BACKLIGHT_LCD_SUPPORT is not set # -# Graphics support +# Display device support # -CONFIG_FIRMWARE_EDID=y +# CONFIG_DISPLAY_SUPPORT is not set +# CONFIG_VGASTATE is not set # CONFIG_FB is not set # @@ -644,7 +663,6 @@ # Console display driver support # # CONFIG_PROM_CONSOLE is not set CONFIG_DUMMY_CONSOLE=y -# CONFIG_BACKLIGHT_LCD_SUPPORT is not set # # Sound @@ -655,6 +673,7 @@ # # HID Devices # CONFIG_HID=y +# CONFIG_HID_DEBUG is not set # # USB support @@ -672,10 +691,6 @@ # # USB Gadget Support # # CONFIG_USB_GADGET is not set - -# -# MMC/SD Card support -# # CONFIG_MMC is not set # @@ -719,10 +734,6 @@ # DMA Devices # # -# Virtualization -# - -# # Misc Linux/SPARC drivers # CONFIG_SUN_OPENPROMIO=m @@ -801,6 +812,7 @@ # Miscellaneous filesystems # # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set +# CONFIG_ECRYPT_FS is not set # CONFIG_HFS_FS is not set # CONFIG_HFSPLUS_FS is not set CONFIG_BEFS_FS=m @@ -827,6 +839,7 @@ CONFIG_LOCKD=y CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y CONFIG_SUNRPC_GSS=m +# CONFIG_SUNRPC_BIND34 is not set CONFIG_RPCSEC_GSS_KRB5=m # CONFIG_RPCSEC_GSS_SPKM3 is not set # CONFIG_SMB_FS is not set @@ -839,7 +852,7 @@ # CONFIG_CIFS_EXPERIMENTAL is not set # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set CONFIG_AFS_FS=m -CONFIG_RXRPC=m +# CONFIG_AFS_DEBUG is not set # CONFIG_9P_FS is not set # @@ -913,15 +926,14 @@ # CONFIG_UNUSED_SYMBOLS is not set # CONFIG_DEBUG_FS is not set # CONFIG_HEADERS_CHECK is not set CONFIG_DEBUG_KERNEL=y -CONFIG_LOG_BUF_SHIFT=14 CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_SCHEDSTATS is not set +# CONFIG_TIMER_STATS is not set # CONFIG_DEBUG_SLAB is not set # CONFIG_DEBUG_RT_MUTEXES is not set # CONFIG_RT_MUTEX_TESTER is not set # CONFIG_DEBUG_SPINLOCK is not set # CONFIG_DEBUG_MUTEXES is not set -# CONFIG_DEBUG_RWSEMS is not set # CONFIG_DEBUG_SPINLOCK_SLEEP is not set # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set # CONFIG_DEBUG_KOBJECT is not set @@ -932,12 +944,14 @@ # CONFIG_DEBUG_VM is not set # CONFIG_DEBUG_LIST is not set CONFIG_FORCED_INLINING=y # CONFIG_RCU_TORTURE_TEST is not set +# CONFIG_FAULT_INJECTION is not set # CONFIG_DEBUG_STACK_USAGE is not set # # Security options # -# CONFIG_KEYS is not set +CONFIG_KEYS=y +# CONFIG_KEYS_DEBUG_PROC_KEYS is not set # CONFIG_SECURITY is not set # @@ -961,8 +975,11 @@ # CONFIG_CRYPTO_TGR192 is not set # CONFIG_CRYPTO_GF128MUL is not set CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_CBC=y +CONFIG_CRYPTO_PCBC=m # CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_CRYPTD is not set CONFIG_CRYPTO_DES=y +# CONFIG_CRYPTO_FCRYPT is not set CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_TWOFISH_COMMON=m @@ -977,6 +994,7 @@ # CONFIG_CRYPTO_ANUBIS is not set CONFIG_CRYPTO_DEFLATE=y CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_CRC32C=m +# CONFIG_CRYPTO_CAMELLIA is not set # CONFIG_CRYPTO_TEST is not set # @@ -989,9 +1007,12 @@ # CONFIG_BITREVERSE=y # CONFIG_CRC_CCITT is not set # CONFIG_CRC16 is not set +# CONFIG_CRC_ITU_T is not set CONFIG_CRC32=y CONFIG_LIBCRC32C=m CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=y CONFIG_PLIST=y -CONFIG_IOMAP_COPY=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y diff --git a/arch/sparc/kernel/head.S b/arch/sparc/kernel/head.S index 97da13c..9a219e8 100644 --- a/arch/sparc/kernel/head.S +++ b/arch/sparc/kernel/head.S @@ -19,7 +19,7 @@ #include #include #include #include -#include +#include #include #include /* TI_UWINMASK */ #include diff --git a/arch/sparc64/defconfig b/arch/sparc64/defconfig index 585ef4f..65840a6 100644 --- a/arch/sparc64/defconfig +++ b/arch/sparc64/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.21 -# Fri May 11 14:31:45 2007 +# Linux kernel version: 2.6.22-rc1 +# Mon May 14 04:17:48 2007 # CONFIG_SPARC=y CONFIG_SPARC64=y @@ -508,10 +508,6 @@ # CONFIG_SCSI_DEBUG is not set # CONFIG_SCSI_ESP_CORE is not set # CONFIG_SCSI_SUNESP is not set # CONFIG_SCSI_SRP is not set - -# -# Serial ATA (prod) and Parallel ATA (experimental) drivers -# # CONFIG_ATA is not set # @@ -568,10 +564,6 @@ # # ARCnet devices # # CONFIG_ARCNET is not set - -# -# PHY device support -# # CONFIG_PHYLIB is not set # @@ -611,10 +603,7 @@ # CONFIG_EPIC100 is not set # CONFIG_SUNDANCE is not set # CONFIG_VIA_RHINE is not set # CONFIG_SC92031 is not set - -# -# Ethernet (1000 Mbit) -# +CONFIG_NETDEV_1000=y # CONFIG_ACENIC is not set # CONFIG_DL2K is not set CONFIG_E1000=m @@ -634,10 +623,7 @@ CONFIG_TIGON3=m CONFIG_BNX2=m # CONFIG_QLA3XXX is not set # CONFIG_ATL1 is not set - -# -# Ethernet (10000 Mbit) -# +CONFIG_NETDEV_10000=y # CONFIG_CHELSIO_T1 is not set # CONFIG_CHELSIO_T3 is not set # CONFIG_IXGB is not set @@ -667,10 +653,6 @@ # CONFIG_USB_PEGASUS is not set # CONFIG_USB_RTL8150 is not set # CONFIG_USB_USBNET_MII is not set # CONFIG_USB_USBNET is not set - -# -# Wan interfaces -# # CONFIG_WAN is not set # CONFIG_FDDI is not set # CONFIG_HIPPI is not set diff --git a/arch/sparc64/kernel/of_device.c b/arch/sparc64/kernel/of_device.c index 7455f5d..16cc46a 100644 --- a/arch/sparc64/kernel/of_device.c +++ b/arch/sparc64/kernel/of_device.c @@ -537,6 +537,13 @@ static int __init build_one_resource(str return 0; } + /* When we miss an I/O space match on PCI, just pass it up + * to the next PCI bridge and/or controller. + */ + if (!strcmp(bus->name, "pci") && + (addr[0] & 0x03000000) == 0x01000000) + return 0; + return 1; } diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 8087d67..24fdf1d 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -561,6 +561,9 @@ static void hypervisor_xcall_deliver(u64 unsigned long flags, status; int cnt, retries, this_cpu, prev_sent, i; + if (cpus_empty(mask)) + return; + /* We have to do this whole thing with interrupts fully disabled. * Otherwise if we send an xcall from interrupt context it will * corrupt both our mondo block and cpu list state. diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index d28f013..cb29fb9 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -776,9 +776,6 @@ asmlinkage __kprobes void default_do_nmi */ if (nmi_watchdog_tick(regs,reason)) return; - if (notify_die(DIE_NMI_POST, "nmi_post", regs, reason, 2, 0) - == NOTIFY_STOP) - return; if (!do_nmi_callback(regs,cpu)) unknown_nmi_error(reason, regs); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index e2fc4b6..5526ead 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1399,6 +1399,11 @@ static struct loop_device *loop_init_one struct loop_device *lo; struct gendisk *disk; + list_for_each_entry(lo, &loop_devices, lo_list) { + if (lo->lo_number == i) + return lo; + } + lo = kzalloc(sizeof(*lo), GFP_KERNEL); if (!lo) goto out; @@ -1443,17 +1448,13 @@ static void loop_del_one(struct loop_dev kfree(lo); } -static int loop_lock(dev_t dev, void *data) -{ - mutex_lock(&loop_devices_mutex); - return 0; -} - static struct kobject *loop_probe(dev_t dev, int *part, void *data) { - struct loop_device *lo = loop_init_one(dev & MINORMASK); + struct loop_device *lo; struct kobject *kobj; + mutex_lock(&loop_devices_mutex); + lo = loop_init_one(dev & MINORMASK); kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM); mutex_unlock(&loop_devices_mutex); @@ -1466,7 +1467,7 @@ static int __init loop_init(void) if (register_blkdev(LOOP_MAJOR, "loop")) return -EIO; blk_register_region(MKDEV(LOOP_MAJOR, 0), 1UL << MINORBITS, - THIS_MODULE, loop_probe, loop_lock, NULL); + THIS_MODULE, loop_probe, NULL, NULL); if (max_loop) { printk(KERN_INFO "loop: the max_loop option is obsolete " diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index 147c120..41f78e2 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -50,18 +50,10 @@ #include #include #include #include -#include #include -#ifdef CONFIG_X86 -/* This is ugly, but I've determined that x86 is the only architecture - that can reasonably support the IPMI NMI watchdog timeout at this - time. If another architecture adds this capability somehow, it - will have to be a somewhat different mechanism and I have no idea - how it will work. So in the unlikely event that another - architecture supports this, we can figure out a good generic - mechanism for it at that time. */ -#define HAVE_DIE_NMI_POST +#ifdef CONFIG_X86_LOCAL_APIC +#include #endif #define PFX "IPMI Watchdog: " @@ -327,11 +319,6 @@ static unsigned char ipmi_version_minor; /* If a pretimeout occurs, this is used to allow only one panic to happen. */ static atomic_t preop_panic_excl = ATOMIC_INIT(-1); -#ifdef HAVE_DIE_NMI_POST -static int testing_nmi; -static int nmi_handler_registered; -#endif - static int ipmi_heartbeat(void); static void panic_halt_ipmi_heartbeat(void); @@ -373,10 +360,6 @@ static int i_ipmi_set_timeout(struct ipm int hbnow = 0; - /* These can be cleared as we are setting the timeout. */ - ipmi_start_timer_on_heartbeat = 0; - pretimeout_since_last_heartbeat = 0; - data[0] = 0; WDOG_SET_TIMER_USE(data[0], WDOG_TIMER_USE_SMS_OS); @@ -451,12 +434,13 @@ static int ipmi_set_timeout(int do_heart wait_for_completion(&set_timeout_wait); - mutex_unlock(&set_timeout_lock); - if ((do_heartbeat == IPMI_SET_TIMEOUT_FORCE_HB) || ((send_heartbeat_now) && (do_heartbeat == IPMI_SET_TIMEOUT_HB_IF_NECESSARY))) + { rv = ipmi_heartbeat(); + } + mutex_unlock(&set_timeout_lock); out: return rv; @@ -536,10 +520,12 @@ static int ipmi_heartbeat(void) int rv; struct ipmi_system_interface_addr addr; - if (ipmi_ignore_heartbeat) + if (ipmi_ignore_heartbeat) { return 0; + } if (ipmi_start_timer_on_heartbeat) { + ipmi_start_timer_on_heartbeat = 0; ipmi_watchdog_state = action_val; return ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB); } else if (pretimeout_since_last_heartbeat) { @@ -547,6 +533,7 @@ static int ipmi_heartbeat(void) We don't want to set the action, though, we want to leave that alone (thus it can't be combined with the above operation. */ + pretimeout_since_last_heartbeat = 0; return ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY); } @@ -934,45 +921,6 @@ static void ipmi_register_watchdog(int i printk(KERN_CRIT PFX "Unable to register misc device\n"); } -#ifdef HAVE_DIE_NMI_POST - if (nmi_handler_registered) { - int old_pretimeout = pretimeout; - int old_timeout = timeout; - int old_preop_val = preop_val; - - /* Set the pretimeout to go off in a second and give - ourselves plenty of time to stop the timer. */ - ipmi_watchdog_state = WDOG_TIMEOUT_RESET; - preop_val = WDOG_PREOP_NONE; /* Make sure nothing happens */ - pretimeout = 99; - timeout = 100; - - testing_nmi = 1; - - rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB); - if (rv) { - printk(KERN_WARNING PFX "Error starting timer to" - " test NMI: 0x%x. The NMI pretimeout will" - " likely not work\n", rv); - rv = 0; - goto out_restore; - } - - msleep(1500); - - if (testing_nmi != 2) { - printk(KERN_WARNING PFX "IPMI NMI didn't seem to" - " occur. The NMI pretimeout will" - " likely not work\n"); - } - out_restore: - testing_nmi = 0; - preop_val = old_preop_val; - pretimeout = old_pretimeout; - timeout = old_timeout; - } -#endif - out: up_write(®ister_sem); @@ -982,10 +930,6 @@ #endif ipmi_watchdog_state = action_val; ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB); printk(KERN_INFO PFX "Starting now!\n"); - } else { - /* Stop the timer now. */ - ipmi_watchdog_state = WDOG_TIMEOUT_NONE; - ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB); } } @@ -1022,28 +966,17 @@ static void ipmi_unregister_watchdog(int up_write(®ister_sem); } -#ifdef HAVE_DIE_NMI_POST +#ifdef HAVE_NMI_HANDLER static int -ipmi_nmi(struct notifier_block *self, unsigned long val, void *data) +ipmi_nmi(void *dev_id, int cpu, int handled) { - if (val != DIE_NMI_POST) - return NOTIFY_OK; - - if (testing_nmi) { - testing_nmi = 2; - return NOTIFY_STOP; - } - /* If we are not expecting a timeout, ignore it. */ if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE) - return NOTIFY_OK; - - if (preaction_val != WDOG_PRETIMEOUT_NMI) - return NOTIFY_OK; + return NOTIFY_DONE; /* If no one else handled the NMI, we assume it was the IPMI watchdog. */ - if (preop_val == WDOG_PREOP_PANIC) { + if ((!handled) && (preop_val == WDOG_PREOP_PANIC)) { /* On some machines, the heartbeat will give an error and not work unless we re-enable the timer. So do so. */ @@ -1052,12 +985,18 @@ ipmi_nmi(struct notifier_block *self, un panic(PFX "pre-timeout"); } - return NOTIFY_STOP; + return NOTIFY_DONE; } -static struct notifier_block ipmi_nmi_handler = { - .notifier_call = ipmi_nmi +static struct nmi_handler ipmi_nmi_handler = +{ + .link = LIST_HEAD_INIT(ipmi_nmi_handler.link), + .dev_name = "ipmi_watchdog", + .dev_id = NULL, + .handler = ipmi_nmi, + .priority = 0, /* Call us last. */ }; +int nmi_handler_registered; #endif static int wdog_reboot_handler(struct notifier_block *this, @@ -1174,7 +1113,7 @@ static int preaction_op(const char *inva preaction_val = WDOG_PRETIMEOUT_NONE; else if (strcmp(inval, "pre_smi") == 0) preaction_val = WDOG_PRETIMEOUT_SMI; -#ifdef HAVE_DIE_NMI_POST +#ifdef HAVE_NMI_HANDLER else if (strcmp(inval, "pre_nmi") == 0) preaction_val = WDOG_PRETIMEOUT_NMI; #endif @@ -1208,7 +1147,7 @@ static int preop_op(const char *inval, c static void check_parms(void) { -#ifdef HAVE_DIE_NMI_POST +#ifdef HAVE_NMI_HANDLER int do_nmi = 0; int rv; @@ -1221,9 +1160,20 @@ #ifdef HAVE_DIE_NMI_POST preop_op("preop_none", NULL); do_nmi = 0; } +#ifdef CONFIG_X86_LOCAL_APIC + if (nmi_watchdog == NMI_IO_APIC) { + printk(KERN_WARNING PFX "nmi_watchdog is set to IO APIC" + " mode (value is %d), that is incompatible" + " with using NMI in the IPMI watchdog." + " Disabling IPMI nmi pretimeout.\n", + nmi_watchdog); + preaction_val = WDOG_PRETIMEOUT_NONE; + do_nmi = 0; + } +#endif } if (do_nmi && !nmi_handler_registered) { - rv = register_die_notifier(&ipmi_nmi_handler); + rv = request_nmi(&ipmi_nmi_handler); if (rv) { printk(KERN_WARNING PFX "Can't register nmi handler\n"); @@ -1231,7 +1181,7 @@ #ifdef HAVE_DIE_NMI_POST } else nmi_handler_registered = 1; } else if (!do_nmi && nmi_handler_registered) { - unregister_die_notifier(&ipmi_nmi_handler); + release_nmi(&ipmi_nmi_handler); nmi_handler_registered = 0; } #endif @@ -1267,9 +1217,9 @@ static int __init ipmi_wdog_init(void) rv = ipmi_smi_watcher_register(&smi_watcher); if (rv) { -#ifdef HAVE_DIE_NMI_POST - if (nmi_handler_registered) - unregister_die_notifier(&ipmi_nmi_handler); +#ifdef HAVE_NMI_HANDLER + if (preaction_val == WDOG_PRETIMEOUT_NMI) + release_nmi(&ipmi_nmi_handler); #endif atomic_notifier_chain_unregister(&panic_notifier_list, &wdog_panic_notifier); @@ -1288,9 +1238,9 @@ static void __exit ipmi_wdog_exit(void) ipmi_smi_watcher_unregister(&smi_watcher); ipmi_unregister_watchdog(watchdog_ifnum); -#ifdef HAVE_DIE_NMI_POST +#ifdef HAVE_NMI_HANDLER if (nmi_handler_registered) - unregister_die_notifier(&ipmi_nmi_handler); + release_nmi(&ipmi_nmi_handler); #endif atomic_notifier_chain_unregister(&panic_notifier_list, diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index fde92ce..2eb52b7 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -346,12 +346,33 @@ static void cma_deref_id(struct rdma_id_ complete(&id_priv->comp); } -static void cma_release_remove(struct rdma_id_private *id_priv) +static int cma_disable_remove(struct rdma_id_private *id_priv, + enum cma_state state) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&id_priv->lock, flags); + if (id_priv->state == state) { + atomic_inc(&id_priv->dev_remove); + ret = 0; + } else + ret = -EINVAL; + spin_unlock_irqrestore(&id_priv->lock, flags); + return ret; +} + +static void cma_enable_remove(struct rdma_id_private *id_priv) { if (atomic_dec_and_test(&id_priv->dev_remove)) wake_up(&id_priv->wait_remove); } +static int cma_has_cm_dev(struct rdma_id_private *id_priv) +{ + return (id_priv->id.device && id_priv->cm_id.ib); +} + struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, void *context, enum rdma_port_space ps) { @@ -884,9 +905,8 @@ static int cma_ib_handler(struct ib_cm_i struct rdma_cm_event event; int ret = 0; - atomic_inc(&id_priv->dev_remove); - if (!cma_comp(id_priv, CMA_CONNECT)) - goto out; + if (cma_disable_remove(id_priv, CMA_CONNECT)) + return 0; memset(&event, 0, sizeof event); switch (ib_event->event) { @@ -942,12 +962,12 @@ static int cma_ib_handler(struct ib_cm_i /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; cma_exch(id_priv, CMA_DESTROYING); - cma_release_remove(id_priv); + cma_enable_remove(id_priv); rdma_destroy_id(&id_priv->id); return ret; } out: - cma_release_remove(id_priv); + cma_enable_remove(id_priv); return ret; } @@ -1057,11 +1077,8 @@ static int cma_req_handler(struct ib_cm_ int offset, ret; listen_id = cm_id->context; - atomic_inc(&listen_id->dev_remove); - if (!cma_comp(listen_id, CMA_LISTEN)) { - ret = -ECONNABORTED; - goto out; - } + if (cma_disable_remove(listen_id, CMA_LISTEN)) + return -ECONNABORTED; memset(&event, 0, sizeof event); offset = cma_user_data_offset(listen_id->id.ps); @@ -1101,11 +1118,11 @@ static int cma_req_handler(struct ib_cm_ release_conn_id: cma_exch(conn_id, CMA_DESTROYING); - cma_release_remove(conn_id); + cma_enable_remove(conn_id); rdma_destroy_id(&conn_id->id); out: - cma_release_remove(listen_id); + cma_enable_remove(listen_id); return ret; } @@ -1171,9 +1188,10 @@ static int cma_iw_handler(struct iw_cm_i struct sockaddr_in *sin; int ret = 0; - memset(&event, 0, sizeof event); - atomic_inc(&id_priv->dev_remove); + if (cma_disable_remove(id_priv, CMA_CONNECT)) + return 0; + memset(&event, 0, sizeof event); switch (iw_event->event) { case IW_CM_EVENT_CLOSE: event.event = RDMA_CM_EVENT_DISCONNECTED; @@ -1214,12 +1232,12 @@ static int cma_iw_handler(struct iw_cm_i /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.iw = NULL; cma_exch(id_priv, CMA_DESTROYING); - cma_release_remove(id_priv); + cma_enable_remove(id_priv); rdma_destroy_id(&id_priv->id); return ret; } - cma_release_remove(id_priv); + cma_enable_remove(id_priv); return ret; } @@ -1234,11 +1252,8 @@ static int iw_conn_req_handler(struct iw int ret; listen_id = cm_id->context; - atomic_inc(&listen_id->dev_remove); - if (!cma_comp(listen_id, CMA_LISTEN)) { - ret = -ECONNABORTED; - goto out; - } + if (cma_disable_remove(listen_id, CMA_LISTEN)) + return -ECONNABORTED; /* Create a new RDMA id for the new IW CM ID */ new_cm_id = rdma_create_id(listen_id->id.event_handler, @@ -1255,13 +1270,13 @@ static int iw_conn_req_handler(struct iw dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr); if (!dev) { ret = -EADDRNOTAVAIL; - cma_release_remove(conn_id); + cma_enable_remove(conn_id); rdma_destroy_id(new_cm_id); goto out; } ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL); if (ret) { - cma_release_remove(conn_id); + cma_enable_remove(conn_id); rdma_destroy_id(new_cm_id); goto out; } @@ -1270,7 +1285,7 @@ static int iw_conn_req_handler(struct iw ret = cma_acquire_dev(conn_id); mutex_unlock(&lock); if (ret) { - cma_release_remove(conn_id); + cma_enable_remove(conn_id); rdma_destroy_id(new_cm_id); goto out; } @@ -1293,14 +1308,14 @@ static int iw_conn_req_handler(struct iw /* User wants to destroy the CM ID */ conn_id->cm_id.iw = NULL; cma_exch(conn_id, CMA_DESTROYING); - cma_release_remove(conn_id); + cma_enable_remove(conn_id); rdma_destroy_id(&conn_id->id); } out: if (dev) dev_put(dev); - cma_release_remove(listen_id); + cma_enable_remove(listen_id); return ret; } @@ -1519,7 +1534,7 @@ static void cma_work_handler(struct work destroy = 1; } out: - cma_release_remove(id_priv); + cma_enable_remove(id_priv); cma_deref_id(id_priv); if (destroy) rdma_destroy_id(&id_priv->id); @@ -1711,13 +1726,13 @@ static void addr_handler(int status, str if (id_priv->id.event_handler(&id_priv->id, &event)) { cma_exch(id_priv, CMA_DESTROYING); - cma_release_remove(id_priv); + cma_enable_remove(id_priv); cma_deref_id(id_priv); rdma_destroy_id(&id_priv->id); return; } out: - cma_release_remove(id_priv); + cma_enable_remove(id_priv); cma_deref_id(id_priv); } @@ -2042,11 +2057,10 @@ static int cma_sidr_rep_handler(struct i struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; int ret = 0; - memset(&event, 0, sizeof event); - atomic_inc(&id_priv->dev_remove); - if (!cma_comp(id_priv, CMA_CONNECT)) - goto out; + if (cma_disable_remove(id_priv, CMA_CONNECT)) + return 0; + memset(&event, 0, sizeof event); switch (ib_event->event) { case IB_CM_SIDR_REQ_ERROR: event.event = RDMA_CM_EVENT_UNREACHABLE; @@ -2084,12 +2098,12 @@ static int cma_sidr_rep_handler(struct i /* Destroy the CM ID by returning a non-zero value. */ id_priv->cm_id.ib = NULL; cma_exch(id_priv, CMA_DESTROYING); - cma_release_remove(id_priv); + cma_enable_remove(id_priv); rdma_destroy_id(&id_priv->id); return ret; } out: - cma_release_remove(id_priv); + cma_enable_remove(id_priv); return ret; } @@ -2413,7 +2427,7 @@ int rdma_notify(struct rdma_cm_id *id, e int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp(id_priv, CMA_CONNECT)) + if (!cma_has_cm_dev(id_priv)) return -EINVAL; switch (id->device->node_type) { @@ -2435,7 +2449,7 @@ int rdma_reject(struct rdma_cm_id *id, c int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp(id_priv, CMA_CONNECT)) + if (!cma_has_cm_dev(id_priv)) return -EINVAL; switch (rdma_node_get_transport(id->device->node_type)) { @@ -2466,8 +2480,7 @@ int rdma_disconnect(struct rdma_cm_id *i int ret; id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp(id_priv, CMA_CONNECT) && - !cma_comp(id_priv, CMA_DISCONNECT)) + if (!cma_has_cm_dev(id_priv)) return -EINVAL; switch (rdma_node_get_transport(id->device->node_type)) { @@ -2499,10 +2512,9 @@ static int cma_ib_mc_handler(int status, int ret; id_priv = mc->id_priv; - atomic_inc(&id_priv->dev_remove); - if (!cma_comp(id_priv, CMA_ADDR_BOUND) && - !cma_comp(id_priv, CMA_ADDR_RESOLVED)) - goto out; + if (cma_disable_remove(id_priv, CMA_ADDR_BOUND) && + cma_disable_remove(id_priv, CMA_ADDR_RESOLVED)) + return 0; if (!status && id_priv->id.qp) status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, @@ -2524,12 +2536,12 @@ static int cma_ib_mc_handler(int status, ret = id_priv->id.event_handler(&id_priv->id, &event); if (ret) { cma_exch(id_priv, CMA_DESTROYING); - cma_release_remove(id_priv); + cma_enable_remove(id_priv); rdma_destroy_id(&id_priv->id); return 0; } -out: - cma_release_remove(id_priv); + + cma_enable_remove(id_priv); return 0; } diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index f64d42b..1d286d3 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -277,6 +277,7 @@ void ehca_cleanup_mrmw_cache(void); extern spinlock_t ehca_qp_idr_lock; extern spinlock_t ehca_cq_idr_lock; +extern spinlock_t hcall_lock; extern struct idr ehca_qp_idr; extern struct idr ehca_cq_idr; diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index 82dda2f..100329b 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -517,12 +517,11 @@ void ehca_process_eq(struct ehca_shca *s else { struct ehca_cq *cq = eq->eqe_cache[i].cq; comp_event_callback(cq); - spin_lock_irqsave(&ehca_cq_idr_lock, flags); + spin_lock(&ehca_cq_idr_lock); cq->nr_events--; if (!cq->nr_events) wake_up(&cq->wait_completion); - spin_unlock_irqrestore(&ehca_cq_idr_lock, - flags); + spin_unlock(&ehca_cq_idr_lock); } } else { ehca_dbg(&shca->ib_device, "Got non completion event"); @@ -711,6 +710,7 @@ static void destroy_comp_task(struct ehc kthread_stop(task); } +#ifdef CONFIG_HOTPLUG_CPU static void take_over_work(struct ehca_comp_pool *pool, int cpu) { @@ -735,7 +735,6 @@ static void take_over_work(struct ehca_c } -#ifdef CONFIG_HOTPLUG_CPU static int comp_pool_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index fe90e74..c3f99f3 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -52,7 +52,7 @@ #include "hcp_if.h" MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Christoph Raisch "); MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); -MODULE_VERSION("SVNEHCA_0022"); +MODULE_VERSION("SVNEHCA_0023"); int ehca_open_aqp1 = 0; int ehca_debug_level = 0; @@ -62,7 +62,7 @@ int ehca_use_hp_mr = 0; int ehca_port_act_time = 30; int ehca_poll_all_eqs = 1; int ehca_static_rate = -1; -int ehca_scaling_code = 1; +int ehca_scaling_code = 0; module_param_named(open_aqp1, ehca_open_aqp1, int, 0); module_param_named(debug_level, ehca_debug_level, int, 0); @@ -98,6 +98,7 @@ MODULE_PARM_DESC(scaling_code, spinlock_t ehca_qp_idr_lock; spinlock_t ehca_cq_idr_lock; +spinlock_t hcall_lock; DEFINE_IDR(ehca_qp_idr); DEFINE_IDR(ehca_cq_idr); @@ -453,15 +454,14 @@ static ssize_t ehca_store_debug_level(st DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR, ehca_show_debug_level, ehca_store_debug_level); -void ehca_create_driver_sysfs(struct ibmebus_driver *drv) -{ - driver_create_file(&drv->driver, &driver_attr_debug_level); -} +static struct attribute *ehca_drv_attrs[] = { + &driver_attr_debug_level.attr, + NULL +}; -void ehca_remove_driver_sysfs(struct ibmebus_driver *drv) -{ - driver_remove_file(&drv->driver, &driver_attr_debug_level); -} +static struct attribute_group ehca_drv_attr_grp = { + .attrs = ehca_drv_attrs +}; #define EHCA_RESOURCE_ATTR(name) \ static ssize_t ehca_show_##name(struct device *dev, \ @@ -523,44 +523,28 @@ static ssize_t ehca_show_adapter_handle( } static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL); +static struct attribute *ehca_dev_attrs[] = { + &dev_attr_adapter_handle.attr, + &dev_attr_num_ports.attr, + &dev_attr_hw_ver.attr, + &dev_attr_max_eq.attr, + &dev_attr_cur_eq.attr, + &dev_attr_max_cq.attr, + &dev_attr_cur_cq.attr, + &dev_attr_max_qp.attr, + &dev_attr_cur_qp.attr, + &dev_attr_max_mr.attr, + &dev_attr_cur_mr.attr, + &dev_attr_max_mw.attr, + &dev_attr_cur_mw.attr, + &dev_attr_max_pd.attr, + &dev_attr_max_ah.attr, + NULL +}; -void ehca_create_device_sysfs(struct ibmebus_dev *dev) -{ - device_create_file(&dev->ofdev.dev, &dev_attr_adapter_handle); - device_create_file(&dev->ofdev.dev, &dev_attr_num_ports); - device_create_file(&dev->ofdev.dev, &dev_attr_hw_ver); - device_create_file(&dev->ofdev.dev, &dev_attr_max_eq); - device_create_file(&dev->ofdev.dev, &dev_attr_cur_eq); - device_create_file(&dev->ofdev.dev, &dev_attr_max_cq); - device_create_file(&dev->ofdev.dev, &dev_attr_cur_cq); - device_create_file(&dev->ofdev.dev, &dev_attr_max_qp); - device_create_file(&dev->ofdev.dev, &dev_attr_cur_qp); - device_create_file(&dev->ofdev.dev, &dev_attr_max_mr); - device_create_file(&dev->ofdev.dev, &dev_attr_cur_mr); - device_create_file(&dev->ofdev.dev, &dev_attr_max_mw); - device_create_file(&dev->ofdev.dev, &dev_attr_cur_mw); - device_create_file(&dev->ofdev.dev, &dev_attr_max_pd); - device_create_file(&dev->ofdev.dev, &dev_attr_max_ah); -} - -void ehca_remove_device_sysfs(struct ibmebus_dev *dev) -{ - device_remove_file(&dev->ofdev.dev, &dev_attr_adapter_handle); - device_remove_file(&dev->ofdev.dev, &dev_attr_num_ports); - device_remove_file(&dev->ofdev.dev, &dev_attr_hw_ver); - device_remove_file(&dev->ofdev.dev, &dev_attr_max_eq); - device_remove_file(&dev->ofdev.dev, &dev_attr_cur_eq); - device_remove_file(&dev->ofdev.dev, &dev_attr_max_cq); - device_remove_file(&dev->ofdev.dev, &dev_attr_cur_cq); - device_remove_file(&dev->ofdev.dev, &dev_attr_max_qp); - device_remove_file(&dev->ofdev.dev, &dev_attr_cur_qp); - device_remove_file(&dev->ofdev.dev, &dev_attr_max_mr); - device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mr); - device_remove_file(&dev->ofdev.dev, &dev_attr_max_mw); - device_remove_file(&dev->ofdev.dev, &dev_attr_cur_mw); - device_remove_file(&dev->ofdev.dev, &dev_attr_max_pd); - device_remove_file(&dev->ofdev.dev, &dev_attr_max_ah); -} +static struct attribute_group ehca_dev_attr_grp = { + .attrs = ehca_dev_attrs +}; static int __devinit ehca_probe(struct ibmebus_dev *dev, const struct of_device_id *id) @@ -668,7 +652,10 @@ static int __devinit ehca_probe(struct i } } - ehca_create_device_sysfs(dev); + ret = sysfs_create_group(&dev->ofdev.dev.kobj, &ehca_dev_attr_grp); + if (ret) /* only complain; we can live without attributes */ + ehca_err(&shca->ib_device, + "Cannot create device attributes ret=%d", ret); spin_lock(&shca_list_lock); list_add(&shca->shca_list, &shca_list); @@ -720,7 +707,7 @@ static int __devexit ehca_remove(struct struct ehca_shca *shca = dev->ofdev.dev.driver_data; int ret; - ehca_remove_device_sysfs(dev); + sysfs_remove_group(&dev->ofdev.dev.kobj, &ehca_dev_attr_grp); if (ehca_open_aqp1 == 1) { int i; @@ -812,11 +799,12 @@ int __init ehca_module_init(void) int ret; printk(KERN_INFO "eHCA Infiniband Device Driver " - "(Rel.: SVNEHCA_0022)\n"); + "(Rel.: SVNEHCA_0023)\n"); idr_init(&ehca_qp_idr); idr_init(&ehca_cq_idr); spin_lock_init(&ehca_qp_idr_lock); spin_lock_init(&ehca_cq_idr_lock); + spin_lock_init(&hcall_lock); INIT_LIST_HEAD(&shca_list); spin_lock_init(&shca_list_lock); @@ -838,7 +826,9 @@ int __init ehca_module_init(void) goto module_init2; } - ehca_create_driver_sysfs(&ehca_driver); + ret = sysfs_create_group(&ehca_driver.driver.kobj, &ehca_drv_attr_grp); + if (ret) /* only complain; we can live without attributes */ + ehca_gen_err("Cannot create driver attributes ret=%d", ret); if (ehca_poll_all_eqs != 1) { ehca_gen_err("WARNING!!!"); @@ -865,7 +855,7 @@ void __exit ehca_module_exit(void) if (ehca_poll_all_eqs == 1) del_timer_sync(&poll_eqs_timer); - ehca_remove_driver_sysfs(&ehca_driver); + sysfs_remove_group(&ehca_driver.driver.kobj, &ehca_drv_attr_grp); ibmebus_unregister_driver(&ehca_driver); ehca_destroy_slab_caches(); diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index df0516f..b5bc787 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -523,6 +523,8 @@ struct ib_qp *ehca_create_qp(struct ib_p goto create_qp_exit1; } + my_qp->ib_qp.qp_num = my_qp->real_qp_num; + switch (init_attr->qp_type) { case IB_QPT_RC: if (isdaqp == 0) { @@ -568,7 +570,7 @@ struct ib_qp *ehca_create_qp(struct ib_p parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr; parms.act_nr_send_sges = init_attr->cap.max_send_sge; parms.act_nr_recv_sges = init_attr->cap.max_recv_sge; - my_qp->real_qp_num = + my_qp->ib_qp.qp_num = (init_attr->qp_type == IB_QPT_SMI) ? 0 : 1; } @@ -595,7 +597,6 @@ struct ib_qp *ehca_create_qp(struct ib_p my_qp->ib_qp.recv_cq = init_attr->recv_cq; my_qp->ib_qp.send_cq = init_attr->send_cq; - my_qp->ib_qp.qp_num = my_qp->real_qp_num; my_qp->ib_qp.qp_type = init_attr->qp_type; my_qp->qp_type = init_attr->qp_type; @@ -968,17 +969,21 @@ static int internal_modify_qp(struct ib_ ((ehca_mult - 1) / ah_mult) : 0; else mqpcb->max_static_rate = 0; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1); /* + * Always supply the GRH flag, even if it's zero, to give the + * hypervisor a clear "yes" or "no" instead of a "perhaps" + */ + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1); + + /* * only if GRH is TRUE we might consider SOURCE_GID_IDX * and DEST_GID otherwise phype will return H_ATTR_PARM!!! */ if (attr->ah_attr.ah_flags == IB_AH_GRH) { - mqpcb->send_grh_flag = 1 << 31; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1); + mqpcb->send_grh_flag = 1; + mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index; update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1); diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c index b564fcd..7f0beec 100644 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ b/drivers/infiniband/hw/ehca/hcp_if.c @@ -154,7 +154,8 @@ static long ehca_plpar_hcall9(unsigned l unsigned long arg9) { long ret; - int i, sleep_msecs; + int i, sleep_msecs, lock_is_set = 0; + unsigned long flags; ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx " "arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx", @@ -162,10 +163,18 @@ static long ehca_plpar_hcall9(unsigned l arg8, arg9); for (i = 0; i < 5; i++) { + if ((opcode == H_ALLOC_RESOURCE) && (arg2 == 5)) { + spin_lock_irqsave(&hcall_lock, flags); + lock_is_set = 1; + } + ret = plpar_hcall9(opcode, outs, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9); + if (lock_is_set) + spin_unlock_irqrestore(&hcall_lock, flags); + if (H_IS_LONG_BUSY(ret)) { sleep_msecs = get_longbusy_msecs(ret); msleep_interruptible(sleep_msecs); @@ -193,11 +202,11 @@ static long ehca_plpar_hcall9(unsigned l opcode, ret, outs[0], outs[1], outs[2], outs[3], outs[4], outs[5], outs[6], outs[7], outs[8]); return ret; - } return H_BUSY; } + u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, struct ehca_pfeq *pfeq, const u32 neq_control, diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c index 1b9c308..4e2e3df 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6120.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c @@ -747,7 +747,6 @@ static void ipath_pe_quiet_serdes(struct static int ipath_pe_intconfig(struct ipath_devdata *dd) { - u64 val; u32 chiprev; /* @@ -760,9 +759,9 @@ static int ipath_pe_intconfig(struct ipa if ((chiprev & INFINIPATH_R_CHIPREVMINOR_MASK) > 1) { /* Rev2+ reports extra errors via internal GPIO pins */ dd->ipath_flags |= IPATH_GPIO_ERRINTRS; - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask); - val |= IPATH_GPIO_ERRINTR_MASK; - ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val); + dd->ipath_gpio_mask |= IPATH_GPIO_ERRINTR_MASK; + ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, + dd->ipath_gpio_mask); } return 0; } diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 45d0331..a90d3b5 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c @@ -1056,7 +1056,7 @@ irqreturn_t ipath_intr(int irq, void *da gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT); chk0rcv = 1; } - if (unlikely(gpiostatus)) { + if (gpiostatus) { /* * Some unexpected bits remain. If they could have * caused the interrupt, complain and clear. @@ -1065,9 +1065,8 @@ irqreturn_t ipath_intr(int irq, void *da * GPIO interrupts, possibly on a "three strikes" * basis. */ - u32 mask; - mask = ipath_read_kreg32( - dd, dd->ipath_kregs->kr_gpio_mask); + const u32 mask = (u32) dd->ipath_gpio_mask; + if (mask & gpiostatus) { ipath_dbg("Unexpected GPIO IRQ bits %x\n", gpiostatus & mask); diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index e900c25..12194f3 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h @@ -397,6 +397,8 @@ struct ipath_devdata { unsigned long ipath_pioavailshadow[8]; /* shadow of kr_gpio_out, for rmw ops */ u64 ipath_gpio_out; + /* shadow the gpio mask register */ + u64 ipath_gpio_mask; /* kr_revision shadow */ u64 ipath_revision; /* diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index 12933e7..bb70845 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -1387,13 +1387,12 @@ static int enable_timer(struct ipath_dev * processing. */ if (dd->ipath_flags & IPATH_GPIO_INTR) { - u64 val; ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect, 0x2074076542310ULL); /* Enable GPIO bit 2 interrupt */ - val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask); - val |= (u64) (1 << IPATH_GPIO_PORT0_BIT); - ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val); + dd->ipath_gpio_mask |= (u64) (1 << IPATH_GPIO_PORT0_BIT); + ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, + dd->ipath_gpio_mask); } init_timer(&dd->verbs_timer); @@ -1412,8 +1411,9 @@ static int disable_timer(struct ipath_de u64 val; /* Disable GPIO bit 2 interrupt */ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask); - val &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT)); - ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val); + dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT)); + ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, + dd->ipath_gpio_mask); /* * We might want to undo changes to debugportselect, * but how? diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 688ecb4..402f3a2 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -489,6 +489,7 @@ static void *mlx4_ib_add(struct mlx4_dev ibdev->uar_map = ioremap(ibdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE); if (!ibdev->uar_map) goto err_uar; + MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock); INIT_LIST_HEAD(&ibdev->pgdir_list); mutex_init(&ibdev->pgdir_mutex); diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index cf0868f..ca224d0 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -284,7 +284,7 @@ void mthca_cq_clean(struct mthca_dev *de { struct mthca_cqe *cqe; u32 prod_index; - int nfreed = 0; + int i, nfreed = 0; spin_lock_irq(&cq->lock); @@ -321,6 +321,8 @@ void mthca_cq_clean(struct mthca_dev *de } if (nfreed) { + for (i = 0; i < nfreed; ++i) + set_cqe_hw(get_cqe(cq, (cq->cons_index + i) & cq->ibcq.cqe)); wmb(); cq->cons_index += nfreed; update_cons_index(dev, cq, nfreed); diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index fee60c8..72fabb8 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -1862,6 +1862,7 @@ int mthca_tavor_post_receive(struct ib_q dev->kar + MTHCA_RECEIVE_DOORBELL, MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + qp->rq.next_ind = ind; qp->rq.head += MTHCA_TAVOR_MAX_WQES_PER_RECV_DB; size0 = 0; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 785bc85..eec833b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -257,10 +257,11 @@ static int ipoib_cm_req_handler(struct i cm_id->context = p; p->jiffies = jiffies; spin_lock_irq(&priv->lock); + if (list_empty(&priv->cm.passive_ids)) + queue_delayed_work(ipoib_workqueue, + &priv->cm.stale_task, IPOIB_CM_RX_DELAY); list_add(&p->list, &priv->cm.passive_ids); spin_unlock_irq(&priv->lock); - queue_delayed_work(ipoib_workqueue, - &priv->cm.stale_task, IPOIB_CM_RX_DELAY); return 0; err_rep: @@ -378,8 +379,6 @@ void ipoib_cm_handle_rx_wc(struct net_de if (!list_empty(&p->list)) list_move(&p->list, &priv->cm.passive_ids); spin_unlock_irqrestore(&priv->lock, flags); - queue_delayed_work(ipoib_workqueue, - &priv->cm.stale_task, IPOIB_CM_RX_DELAY); } } @@ -1100,6 +1099,10 @@ static void ipoib_cm_stale_task(struct w kfree(p); spin_lock_irq(&priv->lock); } + + if (!list_empty(&priv->cm.passive_ids)) + queue_delayed_work(ipoib_workqueue, + &priv->cm.stale_task, IPOIB_CM_RX_DELAY); spin_unlock_irq(&priv->lock); } diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index d24ab23..a7562f7 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -45,8 +45,6 @@ #include "queue.h" */ #define MMC_SHIFT 3 -static int major; - /* * There is one mmc_blk_data per slot. */ @@ -466,7 +464,7 @@ static struct mmc_blk_data *mmc_blk_allo md->queue.issue_fn = mmc_blk_issue_rq; md->queue.data = md; - md->disk->major = major; + md->disk->major = MMC_BLOCK_MAJOR; md->disk->first_minor = devidx << MMC_SHIFT; md->disk->fops = &mmc_bdops; md->disk->private_data = md; @@ -634,14 +632,9 @@ static int __init mmc_blk_init(void) { int res = -ENOMEM; - res = register_blkdev(major, "mmc"); - if (res < 0) { - printk(KERN_WARNING "Unable to get major %d for MMC media: %d\n", - major, res); + res = register_blkdev(MMC_BLOCK_MAJOR, "mmc"); + if (res) goto out; - } - if (major == 0) - major = res; return mmc_register_driver(&mmc_driver); @@ -652,7 +645,7 @@ static int __init mmc_blk_init(void) static void __exit mmc_blk_exit(void) { mmc_unregister_driver(&mmc_driver); - unregister_blkdev(major, "mmc"); + unregister_blkdev(MMC_BLOCK_MAJOR, "mmc"); } module_init(mmc_blk_init); @@ -661,5 +654,3 @@ module_exit(mmc_blk_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Multimedia Card (MMC) block device driver"); -module_param(major, int, 0444); -MODULE_PARM_DESC(major, "specify the major device number for MMC block driver"); diff --git a/drivers/mmc/host/au1xmmc.c b/drivers/mmc/host/au1xmmc.c index b7156a4..f967226 100644 --- a/drivers/mmc/host/au1xmmc.c +++ b/drivers/mmc/host/au1xmmc.c @@ -187,9 +187,8 @@ static void au1xmmc_tasklet_finish(unsig } static int au1xmmc_send_command(struct au1xmmc_host *host, int wait, - struct mmc_command *cmd) + struct mmc_command *cmd, unsigned int flags) { - u32 mmccmd = (cmd->opcode << SD_CMD_CI_SHIFT); switch (mmc_resp_type(cmd)) { @@ -213,24 +212,16 @@ static int au1xmmc_send_command(struct a return MMC_ERR_INVALID; } - switch(cmd->opcode) { - case MMC_READ_SINGLE_BLOCK: - case SD_APP_SEND_SCR: - mmccmd |= SD_CMD_CT_2; - break; - case MMC_READ_MULTIPLE_BLOCK: - mmccmd |= SD_CMD_CT_4; - break; - case MMC_WRITE_BLOCK: - mmccmd |= SD_CMD_CT_1; - break; - - case MMC_WRITE_MULTIPLE_BLOCK: - mmccmd |= SD_CMD_CT_3; - break; - case MMC_STOP_TRANSMISSION: - mmccmd |= SD_CMD_CT_7; - break; + if (flags & MMC_DATA_READ) { + if (flags & MMC_DATA_MULTI) + mmccmd |= SD_CMD_CT_4; + else + mmccmd |= SD_CMD_CT_2; + } else if (flags & MMC_DATA_WRITE) { + if (flags & MMC_DATA_MULTI) + mmccmd |= SD_CMD_CT_3; + else + mmccmd |= SD_CMD_CT_1; } au_writel(cmd->arg, HOST_CMDARG(host)); @@ -665,6 +656,7 @@ static void au1xmmc_request(struct mmc_h { struct au1xmmc_host *host = mmc_priv(mmc); + unsigned int flags = 0; int ret = MMC_ERR_NONE; WARN_ON(irqs_disabled()); @@ -677,11 +669,12 @@ static void au1xmmc_request(struct mmc_h if (mrq->data) { FLUSH_FIFO(host); + flags = mrq->data->flags; ret = au1xmmc_prepare_data(host, mrq->data); } if (ret == MMC_ERR_NONE) - ret = au1xmmc_send_command(host, 0, mrq->cmd); + ret = au1xmmc_send_command(host, 0, mrq->cmd, flags); if (ret != MMC_ERR_NONE) { mrq->cmd->error = ret; diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c index d97d386..f8985c5 100644 --- a/drivers/mmc/host/pxamci.c +++ b/drivers/mmc/host/pxamci.c @@ -232,20 +232,14 @@ #ifdef CONFIG_PXA27x /* * workaround for erratum #42: * Intel PXA27x Family Processor Specification Update Rev 001 + * A bogus CRC error can appear if the msb of a 136 bit + * response is a one. */ - if (cmd->opcode == MMC_ALL_SEND_CID || - cmd->opcode == MMC_SEND_CSD || - cmd->opcode == MMC_SEND_CID) { - /* a bogus CRC error can appear if the msb of - the 15 byte response is a one */ - if ((cmd->resp[0] & 0x80000000) == 0) - cmd->error = MMC_ERR_BADCRC; - } else { - pr_debug("ignoring CRC from command %d - *risky*\n",cmd->opcode); - } -#else - cmd->error = MMC_ERR_BADCRC; + if (cmd->flags & MMC_RSP_136 && cmd->resp[0] & 0x80000000) { + pr_debug("ignoring CRC from command %d - *risky*\n", cmd->opcode); + } else #endif + cmd->error = MMC_ERR_BADCRC; } pxamci_disable_irq(host, END_CMD_RES); diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index ff5bf73..a359efd 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -963,6 +963,15 @@ static void sdhci_data_irq(struct sdhci_ if (intmask & (SDHCI_INT_DATA_AVAIL | SDHCI_INT_SPACE_AVAIL)) sdhci_transfer_pio(host); + /* + * We currently don't do anything fancy with DMA + * boundaries, but as we can't disable the feature + * we need to at least restart the transfer. + */ + if (intmask & SDHCI_INT_DMA_END) + writel(readl(host->ioaddr + SDHCI_DMA_ADDRESS), + host->ioaddr + SDHCI_DMA_ADDRESS); + if (intmask & SDHCI_INT_DATA_END) sdhci_finish_data(host); } diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index fb99cd4..c5baa19 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -2508,6 +2508,7 @@ config MLX4_CORE config MLX4_DEBUG bool "Verbose debugging output" if (MLX4_CORE && EMBEDDED) + depends on MLX4_CORE default y ---help--- This option causes debugging code to be compiled into the diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index 4debb02..20b8c0d 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -542,8 +542,6 @@ static int __devinit mlx4_setup_hca(stru struct mlx4_priv *priv = mlx4_priv(dev); int err; - MLX4_INIT_DOORBELL_LOCK(&priv->doorbell_lock); - err = mlx4_init_uar_table(dev); if (err) { mlx4_err(dev, "Failed to initialize " diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index 9befbae..3d3b6d2 100644 --- a/drivers/net/mlx4/mlx4.h +++ b/drivers/net/mlx4/mlx4.h @@ -275,7 +275,6 @@ struct mlx4_priv { struct mlx4_uar driver_uar; void __iomem *kar; - MLX4_DECLARE_DOORBELL_LOCK(doorbell_lock) u32 rev_id; char board_id[MLX4_BOARD_ID_LEN]; diff --git a/drivers/sbus/char/bbc_i2c.c b/drivers/sbus/char/bbc_i2c.c index 8410587..178155b 100644 --- a/drivers/sbus/char/bbc_i2c.c +++ b/drivers/sbus/char/bbc_i2c.c @@ -18,6 +18,7 @@ #include #include #include #include +#include #include "bbc_i2c.h" diff --git a/drivers/sbus/char/display7seg.c b/drivers/sbus/char/display7seg.c index 2d14a29..3279a1b 100644 --- a/drivers/sbus/char/display7seg.c +++ b/drivers/sbus/char/display7seg.c @@ -20,6 +20,7 @@ #include #include /* EBus device */ #include /* OpenProm Library */ #include /* put_/get_user */ +#include #include diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index e62d23f..d28c14e 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -1757,6 +1757,14 @@ config SCSI_ESP_CORE tristate "ESP Scsi Driver Core" depends on SCSI select SCSI_SPI_ATTRS + help + This is a core driver for NCR53c9x based scsi chipsets, + also known as "ESP" for Emulex Scsi Processor or + Enhanced Scsi Processor. This driver does not exist by + itself, there are front-end drivers which, when enabled, + select and enable this driver. One example is SCSI_SUNESP. + These front-end drivers provide probing, DMA, and register + access support for the core driver. config SCSI_SUNESP tristate "Sparc ESP Scsi Driver" diff --git a/drivers/serial/sunhv.c b/drivers/serial/sunhv.c index 40d4856..c3a6bd2 100644 --- a/drivers/serial/sunhv.c +++ b/drivers/serial/sunhv.c @@ -493,6 +493,10 @@ static struct of_device_id hv_match[] = .name = "console", .compatible = "qcn", }, + { + .name = "console", + .compatible = "SUNW,sun4v-console", + }, {}, }; MODULE_DEVICE_TABLE(of, hv_match); diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index eebcb70..4d7485f 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -1535,7 +1535,7 @@ config FB_LEO config FB_XVR500 bool "Sun XVR-500 3DLABS Wildcat support" - depends on FB && PCI && SPARC64 + depends on (FB = y) && PCI && SPARC64 select FB_CFB_FILLRECT select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT @@ -1548,7 +1548,7 @@ config FB_XVR500 config FB_XVR2500 bool "Sun XVR-2500 3DLABS Wildcat support" - depends on FB && PCI && SPARC64 + depends on (FB = y) && PCI && SPARC64 select FB_CFB_FILLRECT select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 1aad34e..0b73cd4 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1,6 +1,6 @@ /* - * fs/eventpoll.c ( Efficent event polling implementation ) - * Copyright (C) 2001,...,2006 Davide Libenzi + * fs/eventpoll.c (Efficent event polling implementation) + * Copyright (C) 2001,...,2007 Davide Libenzi * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -26,7 +26,6 @@ #include #include #include #include -#include #include #include #include @@ -39,15 +38,14 @@ #include #include #include #include -#include /* * LOCKING: * There are three level of locking required by epoll : * * 1) epmutex (mutex) - * 2) ep->sem (rw_semaphore) - * 3) ep->lock (rw_lock) + * 2) ep->mtx (mutex) + * 3) ep->lock (spinlock) * * The acquire order is the one listed above, from 1 to 3. * We need a spinlock (ep->lock) because we manipulate objects @@ -57,20 +55,20 @@ #include * a spinlock. During the event transfer loop (from kernel to * user space) we could end up sleeping due a copy_to_user(), so * we need a lock that will allow us to sleep. This lock is a - * read-write semaphore (ep->sem). It is acquired on read during - * the event transfer loop and in write during epoll_ctl(EPOLL_CTL_DEL) - * and during eventpoll_release_file(). Then we also need a global - * semaphore to serialize eventpoll_release_file() and ep_free(). - * This semaphore is acquired by ep_free() during the epoll file + * mutex (ep->mtx). It is acquired during the event transfer loop, + * during epoll_ctl(EPOLL_CTL_DEL) and during eventpoll_release_file(). + * Then we also need a global mutex to serialize eventpoll_release_file() + * and ep_free(). + * This mutex is acquired by ep_free() during the epoll file * cleanup path and it is also acquired by eventpoll_release_file() * if a file has been pushed inside an epoll set and it is then * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL). - * It is possible to drop the "ep->sem" and to use the global - * semaphore "epmutex" (together with "ep->lock") to have it working, - * but having "ep->sem" will make the interface more scalable. + * It is possible to drop the "ep->mtx" and to use the global + * mutex "epmutex" (together with "ep->lock") to have it working, + * but having "ep->mtx" will make the interface more scalable. * Events that require holding "epmutex" are very rare, while for - * normal operations the epoll private "ep->sem" will guarantee - * a greater scalability. + * normal operations the epoll private "ep->mtx" will guarantee + * a better scalability. */ #define DEBUG_EPOLL 0 @@ -102,6 +100,8 @@ #define EP_MAX_MSTIMEO min(1000ULL * MAX #define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) +#define EP_UNACTIVE_PTR ((void *) -1L) + struct epoll_filefd { struct file *file; int fd; @@ -111,7 +111,7 @@ struct epoll_filefd { * Node that is linked into the "wake_task_list" member of the "struct poll_safewake". * It is used to keep track on all tasks that are currently inside the wake_up() code * to 1) short-circuit the one coming from the same task and same wait queue head - * ( loop ) 2) allow a maximum number of epoll descriptors inclusion nesting + * (loop) 2) allow a maximum number of epoll descriptors inclusion nesting * 3) let go the ones coming from other tasks. */ struct wake_task_node { @@ -130,21 +130,57 @@ struct poll_safewake { }; /* + * Each file descriptor added to the eventpoll interface will + * have an entry of this type linked to the "rbr" RB tree. + */ +struct epitem { + /* RB tree node used to link this structure to the eventpoll RB tree */ + struct rb_node rbn; + + /* List header used to link this structure to the eventpoll ready list */ + struct list_head rdllink; + + /* + * Works together "struct eventpoll"->ovflist in keeping the + * single linked chain of items. + */ + struct epitem *next; + + /* The file descriptor information this item refers to */ + struct epoll_filefd ffd; + + /* Number of active wait queue attached to poll operations */ + int nwait; + + /* List containing poll wait queues */ + struct list_head pwqlist; + + /* The "container" of this item */ + struct eventpoll *ep; + + /* List header used to link this item to the "struct file" items list */ + struct list_head fllink; + + /* The structure that describe the interested events and the source fd */ + struct epoll_event event; +}; + +/* * This structure is stored inside the "private_data" member of the file * structure and rapresent the main data sructure for the eventpoll * interface. */ struct eventpoll { /* Protect the this structure access */ - rwlock_t lock; + spinlock_t lock; /* - * This semaphore is used to ensure that files are not removed - * while epoll is using them. This is read-held during the event - * collection loop and it is write-held during the file cleanup - * path, the epoll file exit code and the ctl operations. + * This mutex is used to ensure that files are not removed + * while epoll is using them. This is held during the event + * collection loop, the file cleanup path, the epoll file exit + * code and the ctl operations. */ - struct rw_semaphore sem; + struct mutex mtx; /* Wait queue used by sys_epoll_wait() */ wait_queue_head_t wq; @@ -155,8 +191,15 @@ struct eventpoll { /* List of ready file descriptors */ struct list_head rdllist; - /* RB-Tree root used to store monitored fd structs */ + /* RB tree root used to store monitored fd structs */ struct rb_root rbr; + + /* + * This is a single linked list that chains all the "struct epitem" that + * happened while transfering ready events to userspace w/out + * holding ->lock. + */ + struct epitem *ovflist; }; /* Wait structure used by the poll hooks */ @@ -177,42 +220,6 @@ struct eppoll_entry { wait_queue_head_t *whead; }; -/* - * Each file descriptor added to the eventpoll interface will - * have an entry of this type linked to the "rbr" RB tree. - */ -struct epitem { - /* RB-Tree node used to link this structure to the eventpoll rb-tree */ - struct rb_node rbn; - - /* List header used to link this structure to the eventpoll ready list */ - struct list_head rdllink; - - /* The file descriptor information this item refers to */ - struct epoll_filefd ffd; - - /* Number of active wait queue attached to poll operations */ - int nwait; - - /* List containing poll wait queues */ - struct list_head pwqlist; - - /* The "container" of this item */ - struct eventpoll *ep; - - /* The structure that describe the interested events and the source fd */ - struct epoll_event event; - - /* - * Used to keep track of the usage count of the structure. This avoids - * that the structure will desappear from underneath our processing. - */ - atomic_t usecnt; - - /* List header used to link this item to the "struct file" items list */ - struct list_head fllink; -}; - /* Wrapper struct used by poll queueing */ struct ep_pqueue { poll_table pt; @@ -220,7 +227,7 @@ struct ep_pqueue { }; /* - * This semaphore is used to serialize ep_free() and eventpoll_release_file(). + * This mutex is used to serialize ep_free() and eventpoll_release_file(). */ static struct mutex epmutex; @@ -234,7 +241,7 @@ static struct kmem_cache *epi_cache __re static struct kmem_cache *pwq_cache __read_mostly; -/* Setup the structure that is used as key for the rb-tree */ +/* Setup the structure that is used as key for the RB tree */ static inline void ep_set_ffd(struct epoll_filefd *ffd, struct file *file, int fd) { @@ -242,7 +249,7 @@ static inline void ep_set_ffd(struct epo ffd->fd = fd; } -/* Compare rb-tree keys */ +/* Compare RB tree keys */ static inline int ep_cmp_ffd(struct epoll_filefd *p1, struct epoll_filefd *p2) { @@ -250,20 +257,20 @@ static inline int ep_cmp_ffd(struct epol (p1->file < p2->file ? -1 : p1->fd - p2->fd)); } -/* Special initialization for the rb-tree node to detect linkage */ +/* Special initialization for the RB tree node to detect linkage */ static inline void ep_rb_initnode(struct rb_node *n) { rb_set_parent(n, n); } -/* Removes a node from the rb-tree and marks it for a fast is-linked check */ +/* Removes a node from the RB tree and marks it for a fast is-linked check */ static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r) { rb_erase(n, r); rb_set_parent(n, n); } -/* Fast check to verify that the item is linked to the main rb-tree */ +/* Fast check to verify that the item is linked to the main RB tree */ static inline int ep_rb_linked(struct rb_node *n) { return rb_parent(n) != n; @@ -381,78 +388,11 @@ static void ep_unregister_pollwait(struc } /* - * Unlink the "struct epitem" from all places it might have been hooked up. - * This function must be called with write IRQ lock on "ep->lock". - */ -static int ep_unlink(struct eventpoll *ep, struct epitem *epi) -{ - int error; - - /* - * It can happen that this one is called for an item already unlinked. - * The check protect us from doing a double unlink ( crash ). - */ - error = -ENOENT; - if (!ep_rb_linked(&epi->rbn)) - goto error_return; - - /* - * Clear the event mask for the unlinked item. This will avoid item - * notifications to be sent after the unlink operation from inside - * the kernel->userspace event transfer loop. - */ - epi->event.events = 0; - - /* - * At this point is safe to do the job, unlink the item from our rb-tree. - * This operation togheter with the above check closes the door to - * double unlinks. - */ - ep_rb_erase(&epi->rbn, &ep->rbr); - - /* - * If the item we are going to remove is inside the ready file descriptors - * we want to remove it from this list to avoid stale events. - */ - if (ep_is_linked(&epi->rdllink)) - list_del_init(&epi->rdllink); - - error = 0; -error_return: - - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_unlink(%p, %p) = %d\n", - current, ep, epi->ffd.file, error)); - - return error; -} - -/* - * Increment the usage count of the "struct epitem" making it sure - * that the user will have a valid pointer to reference. - */ -static void ep_use_epitem(struct epitem *epi) -{ - atomic_inc(&epi->usecnt); -} - -/* - * Decrement ( release ) the usage count by signaling that the user - * has finished using the structure. It might lead to freeing the - * structure itself if the count goes to zero. - */ -static void ep_release_epitem(struct epitem *epi) -{ - if (atomic_dec_and_test(&epi->usecnt)) - kmem_cache_free(epi_cache, epi); -} - -/* * Removes a "struct epitem" from the eventpoll RB tree and deallocates - * all the associated resources. + * all the associated resources. Must be called with "mtx" held. */ static int ep_remove(struct eventpoll *ep, struct epitem *epi) { - int error; unsigned long flags; struct file *file = epi->ffd.file; @@ -472,26 +412,21 @@ static int ep_remove(struct eventpoll *e list_del_init(&epi->fllink); spin_unlock(&file->f_ep_lock); - /* We need to acquire the write IRQ lock before calling ep_unlink() */ - write_lock_irqsave(&ep->lock, flags); - - /* Really unlink the item from the RB tree */ - error = ep_unlink(ep, epi); - - write_unlock_irqrestore(&ep->lock, flags); + if (ep_rb_linked(&epi->rbn)) + ep_rb_erase(&epi->rbn, &ep->rbr); - if (error) - goto error_return; + spin_lock_irqsave(&ep->lock, flags); + if (ep_is_linked(&epi->rdllink)) + list_del_init(&epi->rdllink); + spin_unlock_irqrestore(&ep->lock, flags); /* At this point it is safe to free the eventpoll item */ - ep_release_epitem(epi); + kmem_cache_free(epi_cache, epi); - error = 0; -error_return: - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p) = %d\n", - current, ep, file, error)); + DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p)\n", + current, ep, file)); - return error; + return 0; } static void ep_free(struct eventpoll *ep) @@ -506,7 +441,7 @@ static void ep_free(struct eventpoll *ep /* * We need to lock this because we could be hit by * eventpoll_release_file() while we're freeing the "struct eventpoll". - * We do not need to hold "ep->sem" here because the epoll file + * We do not need to hold "ep->mtx" here because the epoll file * is on the way to be removed and no one has references to it * anymore. The only hit might come from eventpoll_release_file() but * holding "epmutex" is sufficent here. @@ -525,7 +460,7 @@ static void ep_free(struct eventpoll *ep /* * Walks through the whole tree by freeing each "struct epitem". At this * point we are sure no poll callbacks will be lingering around, and also by - * write-holding "sem" we can be sure that no file cleanup code will hit + * holding "epmutex" we can be sure that no file cleanup code will hit * us during this operation. So we can avoid the lock on "ep->lock". */ while ((rbp = rb_first(&ep->rbr)) != 0) { @@ -534,16 +469,16 @@ static void ep_free(struct eventpoll *ep } mutex_unlock(&epmutex); + mutex_destroy(&ep->mtx); + kfree(ep); } static int ep_eventpoll_release(struct inode *inode, struct file *file) { struct eventpoll *ep = file->private_data; - if (ep) { + if (ep) ep_free(ep); - kfree(ep); - } DNPRINTK(3, (KERN_INFO "[%p] eventpoll: close() ep=%p\n", current, ep)); return 0; @@ -559,10 +494,10 @@ static unsigned int ep_eventpoll_poll(st poll_wait(file, &ep->poll_wait, wait); /* Check our condition */ - read_lock_irqsave(&ep->lock, flags); + spin_lock_irqsave(&ep->lock, flags); if (!list_empty(&ep->rdllist)) pollflags = POLLIN | POLLRDNORM; - read_unlock_irqrestore(&ep->lock, flags); + spin_unlock_irqrestore(&ep->lock, flags); return pollflags; } @@ -594,9 +529,11 @@ void eventpoll_release_file(struct file * We don't want to get "file->f_ep_lock" because it is not * necessary. It is not necessary because we're in the "struct file" * cleanup path, and this means that noone is using this file anymore. - * The only hit might come from ep_free() but by holding the semaphore + * So, for example, epoll_ctl() cannot hit here sicne if we reach this + * point, the file counter already went to zero and fget() would fail. + * The only hit might come from ep_free() but by holding the mutex * will correctly serialize the operation. We do need to acquire - * "ep->sem" after "epmutex" because ep_remove() requires it when called + * "ep->mtx" after "epmutex" because ep_remove() requires it when called * from anywhere but ep_free(). */ mutex_lock(&epmutex); @@ -606,9 +543,9 @@ void eventpoll_release_file(struct file ep = epi->ep; list_del_init(&epi->fllink); - down_write(&ep->sem); + mutex_lock(&ep->mtx); ep_remove(ep, epi); - up_write(&ep->sem); + mutex_unlock(&ep->mtx); } mutex_unlock(&epmutex); @@ -621,12 +558,13 @@ static int ep_alloc(struct eventpoll **p if (!ep) return -ENOMEM; - rwlock_init(&ep->lock); - init_rwsem(&ep->sem); + spin_lock_init(&ep->lock); + mutex_init(&ep->mtx); init_waitqueue_head(&ep->wq); init_waitqueue_head(&ep->poll_wait); INIT_LIST_HEAD(&ep->rdllist); ep->rbr = RB_ROOT; + ep->ovflist = EP_UNACTIVE_PTR; *pep = ep; @@ -636,20 +574,18 @@ static int ep_alloc(struct eventpoll **p } /* - * Search the file inside the eventpoll tree. It add usage count to - * the returned item, so the caller must call ep_release_epitem() - * after finished using the "struct epitem". + * Search the file inside the eventpoll tree. The RB tree operations + * are protected by the "mtx" mutex, and ep_find() must be called with + * "mtx" held. */ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd) { int kcmp; - unsigned long flags; struct rb_node *rbp; struct epitem *epi, *epir = NULL; struct epoll_filefd ffd; ep_set_ffd(&ffd, file, fd); - read_lock_irqsave(&ep->lock, flags); for (rbp = ep->rbr.rb_node; rbp; ) { epi = rb_entry(rbp, struct epitem, rbn); kcmp = ep_cmp_ffd(&ffd, &epi->ffd); @@ -658,12 +594,10 @@ static struct epitem *ep_find(struct eve else if (kcmp < 0) rbp = rbp->rb_left; else { - ep_use_epitem(epi); epir = epi; break; } } - read_unlock_irqrestore(&ep->lock, flags); DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_find(%p) -> %p\n", current, file, epir)); @@ -686,7 +620,7 @@ static int ep_poll_callback(wait_queue_t DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n", current, epi->ffd.file, epi, ep)); - write_lock_irqsave(&ep->lock, flags); + spin_lock_irqsave(&ep->lock, flags); /* * If the event mask does not contain any poll(2) event, we consider the @@ -695,7 +629,21 @@ static int ep_poll_callback(wait_queue_t * until the next EPOLL_CTL_MOD will be issued. */ if (!(epi->event.events & ~EP_PRIVATE_BITS)) - goto is_disabled; + goto out_unlock; + + /* + * If we are trasfering events to userspace, we can hold no locks + * (because we're accessing user memory, and because of linux f_op->poll() + * semantics). All the events that happens during that period of time are + * chained in ep->ovflist and requeued later on. + */ + if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) { + if (epi->next == EP_UNACTIVE_PTR) { + epi->next = ep->ovflist; + ep->ovflist = epi; + } + goto out_unlock; + } /* If this file is already in the ready list we exit soon */ if (ep_is_linked(&epi->rdllink)) @@ -714,8 +662,8 @@ is_linked: if (waitqueue_active(&ep->poll_wait)) pwake++; -is_disabled: - write_unlock_irqrestore(&ep->lock, flags); +out_unlock: + spin_unlock_irqrestore(&ep->lock, flags); /* We have to call this outside the lock */ if (pwake) @@ -766,6 +714,9 @@ static void ep_rbtree_insert(struct even rb_insert_color(&epi->rbn, &ep->rbr); } +/* + * Must be called with "mtx" held. + */ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, struct file *tfile, int fd) { @@ -786,8 +737,8 @@ static int ep_insert(struct eventpoll *e epi->ep = ep; ep_set_ffd(&epi->ffd, tfile, fd); epi->event = *event; - atomic_set(&epi->usecnt, 1); epi->nwait = 0; + epi->next = EP_UNACTIVE_PTR; /* Initialize the poll table using the queue callback */ epq.epi = epi; @@ -796,7 +747,9 @@ static int ep_insert(struct eventpoll *e /* * Attach the item to the poll hooks and get current event bits. * We can safely use the file* here because its usage count has - * been increased by the caller of this function. + * been increased by the caller of this function. Note that after + * this operation completes, the poll callback can start hitting + * the new item. */ revents = tfile->f_op->poll(tfile, &epq.pt); @@ -813,12 +766,15 @@ static int ep_insert(struct eventpoll *e list_add_tail(&epi->fllink, &tfile->f_ep_links); spin_unlock(&tfile->f_ep_lock); - /* We have to drop the new item inside our item list to keep track of it */ - write_lock_irqsave(&ep->lock, flags); - - /* Add the current item to the rb-tree */ + /* + * Add the current item to the RB tree. All RB tree operations are + * protected by "mtx", and ep_insert() is called with "mtx" held. + */ ep_rbtree_insert(ep, epi); + /* We have to drop the new item inside our item list to keep track of it */ + spin_lock_irqsave(&ep->lock, flags); + /* If the file is already "ready" we drop it inside the ready list */ if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); @@ -830,7 +786,7 @@ static int ep_insert(struct eventpoll *e pwake++; } - write_unlock_irqrestore(&ep->lock, flags); + spin_unlock_irqrestore(&ep->lock, flags); /* We have to call this outside the lock */ if (pwake) @@ -846,12 +802,14 @@ error_unregister: /* * We need to do this because an event could have been arrived on some - * allocated wait queue. + * allocated wait queue. Note that we don't care about the ep->ovflist + * list, since that is used/cleaned only inside a section bound by "mtx". + * And ep_insert() is called with "mtx" held. */ - write_lock_irqsave(&ep->lock, flags); + spin_lock_irqsave(&ep->lock, flags); if (ep_is_linked(&epi->rdllink)) list_del_init(&epi->rdllink); - write_unlock_irqrestore(&ep->lock, flags); + spin_unlock_irqrestore(&ep->lock, flags); kmem_cache_free(epi_cache, epi); error_return: @@ -860,7 +818,7 @@ error_return: /* * Modify the interest event mask by dropping an event if the new mask - * has a match in the current file status. + * has a match in the current file status. Must be called with "mtx" held. */ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_event *event) { @@ -882,36 +840,28 @@ static int ep_modify(struct eventpoll *e */ revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); - write_lock_irqsave(&ep->lock, flags); + spin_lock_irqsave(&ep->lock, flags); /* Copy the data member from inside the lock */ epi->event.data = event->data; /* - * If the item is not linked to the RB tree it means that it's on its - * way toward the removal. Do nothing in this case. + * If the item is "hot" and it is not registered inside the ready + * list, push it inside. */ - if (ep_rb_linked(&epi->rbn)) { - /* - * If the item is "hot" and it is not registered inside the ready - * list, push it inside. If the item is not "hot" and it is currently - * registered inside the ready list, unlink it. - */ - if (revents & event->events) { - if (!ep_is_linked(&epi->rdllink)) { - list_add_tail(&epi->rdllink, &ep->rdllist); - - /* Notify waiting tasks that events are available */ - if (waitqueue_active(&ep->wq)) - __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | - TASK_INTERRUPTIBLE); - if (waitqueue_active(&ep->poll_wait)) - pwake++; - } + if (revents & event->events) { + if (!ep_is_linked(&epi->rdllink)) { + list_add_tail(&epi->rdllink, &ep->rdllist); + + /* Notify waiting tasks that events are available */ + if (waitqueue_active(&ep->wq)) + __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | + TASK_INTERRUPTIBLE); + if (waitqueue_active(&ep->poll_wait)) + pwake++; } } - - write_unlock_irqrestore(&ep->lock, flags); + spin_unlock_irqrestore(&ep->lock, flags); /* We have to call this outside the lock */ if (pwake) @@ -920,36 +870,50 @@ static int ep_modify(struct eventpoll *e return 0; } -/* - * This function is called without holding the "ep->lock" since the call to - * __copy_to_user() might sleep, and also f_op->poll() might reenable the IRQ - * because of the way poll() is traditionally implemented in Linux. - */ -static int ep_send_events(struct eventpoll *ep, struct list_head *txlist, - struct epoll_event __user *events, int maxevents) +static int ep_send_events(struct eventpoll *ep, struct epoll_event __user *events, + int maxevents) { int eventcnt, error = -EFAULT, pwake = 0; unsigned int revents; unsigned long flags; - struct epitem *epi; - struct list_head injlist; + struct epitem *epi, *nepi; + struct list_head txlist; + + INIT_LIST_HEAD(&txlist); + + /* + * We need to lock this because we could be hit by + * eventpoll_release_file() and epoll_ctl(EPOLL_CTL_DEL). + */ + mutex_lock(&ep->mtx); - INIT_LIST_HEAD(&injlist); + /* + * Steal the ready list, and re-init the original one to the + * empty list. Also, set ep->ovflist to NULL so that events + * happening while looping w/out locks, are not lost. We cannot + * have the poll callback to queue directly on ep->rdllist, + * because we are doing it in the loop below, in a lockless way. + */ + spin_lock_irqsave(&ep->lock, flags); + list_splice(&ep->rdllist, &txlist); + INIT_LIST_HEAD(&ep->rdllist); + ep->ovflist = NULL; + spin_unlock_irqrestore(&ep->lock, flags); /* * We can loop without lock because this is a task private list. * We just splice'd out the ep->rdllist in ep_collect_ready_items(). - * Items cannot vanish during the loop because we are holding "sem" in - * read. + * Items cannot vanish during the loop because we are holding "mtx". */ - for (eventcnt = 0; !list_empty(txlist) && eventcnt < maxevents;) { - epi = list_first_entry(txlist, struct epitem, rdllink); - prefetch(epi->rdllink.next); + for (eventcnt = 0; !list_empty(&txlist) && eventcnt < maxevents;) { + epi = list_first_entry(&txlist, struct epitem, rdllink); + + list_del_init(&epi->rdllink); /* * Get the ready file event set. We can safely use the file - * because we are holding the "sem" in read and this will - * guarantee that both the file and the item will not vanish. + * because we are holding the "mtx" and this will guarantee + * that both the file and the item will not vanish. */ revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); revents &= epi->event.events; @@ -957,8 +921,8 @@ static int ep_send_events(struct eventpo /* * Is the event mask intersect the caller-requested one, * deliver the event to userspace. Again, we are holding - * "sem" in read, so no operations coming from userspace - * can change the item. + * "mtx", so no operations coming from userspace can change + * the item. */ if (revents) { if (__put_user(revents, @@ -970,59 +934,59 @@ static int ep_send_events(struct eventpo epi->event.events &= EP_PRIVATE_BITS; eventcnt++; } - /* - * This is tricky. We are holding the "sem" in read, and this - * means that the operations that can change the "linked" status - * of the epoll item (epi->rbn and epi->rdllink), cannot touch - * them. Also, since we are "linked" from a epi->rdllink POV - * (the item is linked to our transmission list we just - * spliced), the ep_poll_callback() cannot touch us either, - * because of the check present in there. Another parallel - * epoll_wait() will not get the same result set, since we - * spliced the ready list before. Note that list_del() still - * shows the item as linked to the test in ep_poll_callback(). + * At this point, noone can insert into ep->rdllist besides + * us. The epoll_ctl() callers are locked out by us holding + * "mtx" and the poll callback will queue them in ep->ovflist. */ - list_del(&epi->rdllink); if (!(epi->event.events & EPOLLET) && - (revents & epi->event.events)) - list_add_tail(&epi->rdllink, &injlist); - else { - /* - * Be sure the item is totally detached before re-init - * the list_head. After INIT_LIST_HEAD() is committed, - * the ep_poll_callback() can requeue the item again, - * but we don't care since we are already past it. - */ - smp_mb(); - INIT_LIST_HEAD(&epi->rdllink); - } + (revents & epi->event.events)) + list_add_tail(&epi->rdllink, &ep->rdllist); } error = 0; - errxit: +errxit: + spin_lock_irqsave(&ep->lock, flags); /* - * If the re-injection list or the txlist are not empty, re-splice - * them to the ready list and do proper wakeups. + * During the time we spent in the loop above, some other events + * might have been queued by the poll callback. We re-insert them + * here (in case they are not already queued, or they're one-shot). */ - if (!list_empty(&injlist) || !list_empty(txlist)) { - write_lock_irqsave(&ep->lock, flags); + for (nepi = ep->ovflist; (epi = nepi) != NULL; + nepi = epi->next, epi->next = EP_UNACTIVE_PTR) { + if (!ep_is_linked(&epi->rdllink) && + (epi->event.events & ~EP_PRIVATE_BITS)) + list_add_tail(&epi->rdllink, &ep->rdllist); + } + /* + * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after + * releasing the lock, events will be queued in the normal way inside + * ep->rdllist. + */ + ep->ovflist = EP_UNACTIVE_PTR; + + /* + * In case of error in the event-send loop, or in case the number of + * ready events exceeds the userspace limit, we need to splice the + * "txlist" back inside ep->rdllist. + */ + list_splice(&txlist, &ep->rdllist); - list_splice(txlist, &ep->rdllist); - list_splice(&injlist, &ep->rdllist); + if (!list_empty(&ep->rdllist)) { /* - * Wake up ( if active ) both the eventpoll wait list and the ->poll() - * wait list. + * Wake up (if active) both the eventpoll wait list and the ->poll() + * wait list (delayed after we release the lock). */ if (waitqueue_active(&ep->wq)) __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE); if (waitqueue_active(&ep->poll_wait)) pwake++; - - write_unlock_irqrestore(&ep->lock, flags); } + spin_unlock_irqrestore(&ep->lock, flags); + + mutex_unlock(&ep->mtx); /* We have to call this outside the lock */ if (pwake) @@ -1031,41 +995,6 @@ static int ep_send_events(struct eventpo return eventcnt == 0 ? error: eventcnt; } -/* - * Perform the transfer of events to user space. - */ -static int ep_events_transfer(struct eventpoll *ep, - struct epoll_event __user *events, int maxevents) -{ - int eventcnt; - unsigned long flags; - struct list_head txlist; - - INIT_LIST_HEAD(&txlist); - - /* - * We need to lock this because we could be hit by - * eventpoll_release_file() and epoll_ctl(EPOLL_CTL_DEL). - */ - down_read(&ep->sem); - - /* - * Steal the ready list, and re-init the original one to the - * empty list. - */ - write_lock_irqsave(&ep->lock, flags); - list_splice(&ep->rdllist, &txlist); - INIT_LIST_HEAD(&ep->rdllist); - write_unlock_irqrestore(&ep->lock, flags); - - /* Build result set in userspace */ - eventcnt = ep_send_events(ep, &txlist, events, maxevents); - - up_read(&ep->sem); - - return eventcnt; -} - static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, int maxevents, long timeout) { @@ -1083,7 +1012,7 @@ static int ep_poll(struct eventpoll *ep, MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000; retry: - write_lock_irqsave(&ep->lock, flags); + spin_lock_irqsave(&ep->lock, flags); res = 0; if (list_empty(&ep->rdllist)) { @@ -1093,6 +1022,7 @@ retry: * ep_poll_callback() when events will become available. */ init_waitqueue_entry(&wait, current); + wait.flags |= WQ_FLAG_EXCLUSIVE; __add_wait_queue(&ep->wq, &wait); for (;;) { @@ -1109,9 +1039,9 @@ retry: break; } - write_unlock_irqrestore(&ep->lock, flags); + spin_unlock_irqrestore(&ep->lock, flags); jtimeout = schedule_timeout(jtimeout); - write_lock_irqsave(&ep->lock, flags); + spin_lock_irqsave(&ep->lock, flags); } __remove_wait_queue(&ep->wq, &wait); @@ -1121,7 +1051,7 @@ retry: /* Is it worth to try to dig for events ? */ eavail = !list_empty(&ep->rdllist); - write_unlock_irqrestore(&ep->lock, flags); + spin_unlock_irqrestore(&ep->lock, flags); /* * Try to transfer events to user space. In case we get 0 events and @@ -1129,18 +1059,17 @@ retry: * more luck. */ if (!res && eavail && - !(res = ep_events_transfer(ep, events, maxevents)) && jtimeout) + !(res = ep_send_events(ep, events, maxevents)) && jtimeout) goto retry; return res; } /* - * It opens an eventpoll file descriptor by suggesting a storage of "size" - * file descriptors. The size parameter is just an hint about how to size - * data structures. It won't prevent the user to store more than "size" - * file descriptors inside the epoll interface. It is the kernel part of - * the userspace epoll_create(2). + * It opens an eventpoll file descriptor. The "size" parameter is there + * for historical reasons, when epoll was using an hash instead of an + * RB tree. With the current implementation, the "size" parameter is ignored + * (besides sanity checks). */ asmlinkage long sys_epoll_create(int size) { @@ -1176,7 +1105,6 @@ asmlinkage long sys_epoll_create(int siz error_free: ep_free(ep); - kfree(ep); error_return: DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", current, size, error)); @@ -1186,8 +1114,7 @@ error_return: /* * The following function implements the controller interface for * the eventpoll file that enables the insertion/removal/change of - * file descriptors inside the interest set. It represents - * the kernel part of the user space epoll_ctl(2). + * file descriptors inside the interest set. */ asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event) @@ -1237,9 +1164,13 @@ asmlinkage long sys_epoll_ctl(int epfd, */ ep = file->private_data; - down_write(&ep->sem); + mutex_lock(&ep->mtx); - /* Try to lookup the file inside our RB tree */ + /* + * Try to lookup the file inside our RB tree, Since we grabbed "mtx" + * above, we can be sure to be able to use the item looked up by + * ep_find() till we release the mutex. + */ epi = ep_find(ep, tfile, fd); error = -EINVAL; @@ -1266,13 +1197,7 @@ asmlinkage long sys_epoll_ctl(int epfd, error = -ENOENT; break; } - /* - * The function ep_find() increments the usage count of the structure - * so, if this is not NULL, we need to release it. - */ - if (epi) - ep_release_epitem(epi); - up_write(&ep->sem); + mutex_unlock(&ep->mtx); error_tgt_fput: fput(tfile); @@ -1378,7 +1303,7 @@ asmlinkage long sys_epoll_pwait(int epfd if (sigmask) { if (error == -EINTR) { memcpy(¤t->saved_sigmask, &sigsaved, - sizeof(sigsaved)); + sizeof(sigsaved)); set_thread_flag(TIF_RESTORE_SIGMASK); } else sigprocmask(SIG_SETMASK, &sigsaved, NULL); diff --git a/include/asm-alpha/mmu_context.h b/include/asm-alpha/mmu_context.h index 0bd7bd2..6a5be1f 100644 --- a/include/asm-alpha/mmu_context.h +++ b/include/asm-alpha/mmu_context.h @@ -85,8 +85,8 @@ #endif * +-------------+----------------+--------------+ */ -#ifdef CONFIG_SMP #include +#ifdef CONFIG_SMP #define cpu_last_asn(cpuid) (cpu_data[cpuid].last_asn) #else extern unsigned long last_asn; diff --git a/include/asm-h8300/atomic.h b/include/asm-h8300/atomic.h index 21f5442..b4cf0ea 100644 --- a/include/asm-h8300/atomic.h +++ b/include/asm-h8300/atomic.h @@ -37,6 +37,7 @@ static __inline__ int atomic_sub_return( } #define atomic_sub(i, v) atomic_sub_return(i, v) +#define atomic_sub_and_test(i,v) (atomic_sub_return(i, v) == 0) static __inline__ int atomic_inc_return(atomic_t *v) { diff --git a/include/asm-i386/kdebug.h b/include/asm-i386/kdebug.h index 05c3117..a185b5f 100644 --- a/include/asm-i386/kdebug.h +++ b/include/asm-i386/kdebug.h @@ -27,7 +27,6 @@ enum die_val { DIE_GPF, DIE_CALL, DIE_NMI_IPI, - DIE_NMI_POST, DIE_PAGE_FAULT, }; diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 70f3515..338668b 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -749,9 +749,13 @@ extern unsigned long boot_option_idle_ov extern void enable_sep_cpu(void); extern int sysenter_setup(void); +/* Defined in head.S */ +extern struct Xgt_desc_struct early_gdt_descr; + extern void cpu_set_gdt(int); extern void switch_to_new_gdt(void); extern void cpu_init(void); +extern void init_gdt(int cpu); extern int force_mwait; diff --git a/include/asm-m68k/uaccess.h b/include/asm-m68k/uaccess.h index 6a4cf20..5c1264c 100644 --- a/include/asm-m68k/uaccess.h +++ b/include/asm-m68k/uaccess.h @@ -361,7 +361,9 @@ #define copy_to_user(to, from, n) __copy long strncpy_from_user(char *dst, const char __user *src, long count); long strnlen_user(const char __user *src, long n); -unsigned long clear_user(void __user *to, unsigned long n); +unsigned long __clear_user(void __user *to, unsigned long n); + +#define clear_user __clear_user #define strlen_user(str) strnlen_user(str, 32767) diff --git a/include/asm-sparc/kdebug.h b/include/asm-sparc/kdebug.h index 404d807..631f15f 100644 --- a/include/asm-sparc/kdebug.h +++ b/include/asm-sparc/kdebug.h @@ -58,6 +58,10 @@ #define SP_ENTER_DEBUGGER do { \ sp_enter_debugger(); \ } while(0) +enum die_val { + DIE_UNUSED, +}; + #endif /* !(__ASSEMBLY__) */ /* Some nice offset defines for assembler code. */ @@ -66,8 +70,4 @@ #define KDEBUG_DUNNO_OFF 0x4 #define KDEBUG_DUNNO2_OFF 0x8 #define KDEBUG_TEACH_OFF 0xc -enum die_val { - DIE_UNUSED, -}; - #endif /* !(_SPARC_KDEBUG_H) */ diff --git a/include/asm-sparc/system.h b/include/asm-sparc/system.h index 8b6d9c9..8b4e23b 100644 --- a/include/asm-sparc/system.h +++ b/include/asm-sparc/system.h @@ -11,6 +11,7 @@ #include #include #include #include +#include #ifndef __ASSEMBLY__ diff --git a/include/asm-sparc64/dma-mapping.h b/include/asm-sparc64/dma-mapping.h index 2f858a2..9329429 100644 --- a/include/asm-sparc64/dma-mapping.h +++ b/include/asm-sparc64/dma-mapping.h @@ -10,10 +10,13 @@ #include /* need struct page definitions */ #include +#include + static inline int dma_supported(struct device *dev, u64 mask) { - BUG_ON(dev->bus != &pci_bus_type); + BUG_ON(dev->bus != &pci_bus_type && + dev->bus != &ebus_bus_type); return pci_dma_supported(to_pci_dev(dev), mask); } @@ -21,7 +24,8 @@ dma_supported(struct device *dev, u64 ma static inline int dma_set_mask(struct device *dev, u64 dma_mask) { - BUG_ON(dev->bus != &pci_bus_type); + BUG_ON(dev->bus != &pci_bus_type && + dev->bus != &ebus_bus_type); return pci_set_dma_mask(to_pci_dev(dev), dma_mask); } @@ -30,7 +34,8 @@ static inline void * dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag) { - BUG_ON(dev->bus != &pci_bus_type); + BUG_ON(dev->bus != &pci_bus_type && + dev->bus != &ebus_bus_type); return pci_iommu_ops->alloc_consistent(to_pci_dev(dev), size, dma_handle, flag); } @@ -39,7 +44,8 @@ static inline void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_handle) { - BUG_ON(dev->bus != &pci_bus_type); + BUG_ON(dev->bus != &pci_bus_type && + dev->bus != &ebus_bus_type); pci_free_consistent(to_pci_dev(dev), size, cpu_addr, dma_handle); } @@ -48,7 +54,8 @@ static inline dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, size_t size, enum dma_data_direction direction) { - BUG_ON(dev->bus != &pci_bus_type); + BUG_ON(dev->bus != &pci_bus_type && + dev->bus != &ebus_bus_type); return pci_map_single(to_pci_dev(dev), cpu_addr, size, (int)direction); } @@ -57,7 +64,8 @@ static inline void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction direction) { - BUG_ON(dev->bus != &pci_bus_type); + BUG_ON(dev->bus != &pci_bus_type && + dev->bus != &ebus_bus_type); pci_unmap_single(to_pci_dev(dev), dma_addr, size, (int)direction); } @@ -67,7 +75,8 @@ dma_map_page(struct device *dev, struct unsigned long offset, size_t size, enum dma_data_direction direction) { - BUG_ON(dev->bus != &pci_bus_type); + BUG_ON(dev->bus != &pci_bus_type && + dev->bus != &ebus_bus_type); return pci_map_page(to_pci_dev(dev), page, offset, size, (int)direction); } @@ -76,7 +85,8 @@ static inline void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, enum dma_data_direction direction) { - BUG_ON(dev->bus != &pci_bus_type); + BUG_ON(dev->bus != &pci_bus_type && + dev->bus != &ebus_bus_type); pci_unmap_page(to_pci_dev(dev), dma_address, size, (int)direction); } @@ -85,7 +95,8 @@ static inline int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction) { - BUG_ON(dev->bus != &pci_bus_type); + BUG_ON(dev->bus != &pci_bus_type && + dev->bus != &ebus_bus_type); return pci_map_sg(to_pci_dev(dev), sg, nents, (int)direction); } @@ -94,7 +105,8 @@ static inline void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, enum dma_data_direction direction) { - BUG_ON(dev->bus != &pci_bus_type); + BUG_ON(dev->bus != &pci_bus_type && + dev->bus != &ebus_bus_type); pci_unmap_sg(to_pci_dev(dev), sg, nhwentries, (int)direction); } @@ -103,7 +115,8 @@ static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction) { - BUG_ON(dev->bus != &pci_bus_type); + BUG_ON(dev->bus != &pci_bus_type && + dev->bus != &ebus_bus_type); pci_dma_sync_single_for_cpu(to_pci_dev(dev), dma_handle, size, (int)direction); @@ -113,7 +126,8 @@ static inline void dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction direction) { - BUG_ON(dev->bus != &pci_bus_type); + BUG_ON(dev->bus != &pci_bus_type && + dev->bus != &ebus_bus_type); pci_dma_sync_single_for_device(to_pci_dev(dev), dma_handle, size, (int)direction); @@ -123,7 +137,8 @@ static inline void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction direction) { - BUG_ON(dev->bus != &pci_bus_type); + BUG_ON(dev->bus != &pci_bus_type && + dev->bus != &ebus_bus_type); pci_dma_sync_sg_for_cpu(to_pci_dev(dev), sg, nelems, (int)direction); } @@ -132,7 +147,8 @@ static inline void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction direction) { - BUG_ON(dev->bus != &pci_bus_type); + BUG_ON(dev->bus != &pci_bus_type && + dev->bus != &ebus_bus_type); pci_dma_sync_sg_for_device(to_pci_dev(dev), sg, nelems, (int)direction); } diff --git a/include/asm-x86_64/kdebug.h b/include/asm-x86_64/kdebug.h index 74feae9..d7e2bcf 100644 --- a/include/asm-x86_64/kdebug.h +++ b/include/asm-x86_64/kdebug.h @@ -22,7 +22,6 @@ enum die_val { DIE_GPF, DIE_CALL, DIE_NMI_IPI, - DIE_NMI_POST, DIE_PAGE_FAULT, }; diff --git a/include/linux/compat.h b/include/linux/compat.h index 636502c..0e69d2c 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -261,5 +261,11 @@ asmlinkage long compat_sys_epoll_pwait(i asmlinkage long compat_sys_utimensat(unsigned int dfd, char __user *filename, struct compat_timespec __user *t, int flags); +asmlinkage long compat_sys_signalfd(int ufd, + const compat_sigset_t __user *sigmask, + compat_size_t sigsetsize); +asmlinkage long compat_sys_timerfd(int ufd, int clockid, int flags, + const struct compat_itimerspec __user *utmr); + #endif /* CONFIG_COMPAT */ #endif /* _LINUX_COMPAT_H */ diff --git a/include/linux/init.h b/include/linux/init.h index 8bc32bb..e007ae4 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -52,14 +52,9 @@ #define __exit __attribute_used__ __att #endif /* For assembly routines */ -#ifdef CONFIG_HOTPLUG_CPU -#define __INIT .section ".text","ax" -#define __INITDATA .section ".data","aw" -#else #define __INIT .section ".init.text","ax" -#define __INITDATA .section ".init.data","aw" -#endif #define __FINIT .previous +#define __INITDATA .section ".init.data","aw" #ifndef __ASSEMBLY__ /* diff --git a/include/linux/io.h b/include/linux/io.h index 09d3512..8423dd3 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -27,8 +27,16 @@ struct device; void __iowrite32_copy(void __iomem *to, const void *from, size_t count); void __iowrite64_copy(void __iomem *to, const void *from, size_t count); +#ifdef CONFIG_MMU int ioremap_page_range(unsigned long addr, unsigned long end, unsigned long phys_addr, pgprot_t prot); +#else +static inline int ioremap_page_range(unsigned long addr, unsigned long end, + unsigned long phys_addr, pgprot_t prot) +{ + return 0; +} +#endif /* * Managed iomap interface diff --git a/include/linux/major.h b/include/linux/major.h index 0a74c52..7e7c909 100644 --- a/include/linux/major.h +++ b/include/linux/major.h @@ -152,6 +152,8 @@ #define USB_ACM_MAJOR 166 #define USB_ACM_AUX_MAJOR 167 #define USB_CHAR_MAJOR 180 +#define MMC_BLOCK_MAJOR 179 + #define VXVM_MAJOR 199 /* VERITAS volume i/o driver */ #define VXSPEC_MAJOR 200 /* VERITAS volume config driver */ #define VXDMP_MAJOR 201 /* VERITAS volume multipath driver */ diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index ea27065..fd6627e 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -60,7 +60,8 @@ #endif #define KMALLOC_SHIFT_LOW 3 #ifdef CONFIG_LARGE_ALLOCS -#define KMALLOC_SHIFT_HIGH 25 +#define KMALLOC_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT) =< 25 ? \ + (MAX_ORDER + PAGE_SHIFT - 1) : 25) #else #if !defined(CONFIG_MMU) || NR_CPUS > 512 || MAX_NUMNODES > 256 #define KMALLOC_SHIFT_HIGH 20 @@ -87,6 +88,9 @@ static inline int kmalloc_index(int size */ WARN_ON_ONCE(size == 0); + if (size >= (1 << KMALLOC_SHIFT_HIGH)) + return -1; + if (size > 64 && size <= 96) return 1; if (size > 128 && size <= 192) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 3db5c3c..51b6a6a 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -74,7 +74,7 @@ static struct clocksource *watchdog; static struct timer_list watchdog_timer; static DEFINE_SPINLOCK(watchdog_lock); static cycle_t watchdog_last; -static int watchdog_resumed; +static unsigned long watchdog_resumed; /* * Interval: 0.5sec Threshold: 0.0625s @@ -104,9 +104,7 @@ static void clocksource_watchdog(unsigne spin_lock(&watchdog_lock); - resumed = watchdog_resumed; - if (unlikely(resumed)) - watchdog_resumed = 0; + resumed = test_and_clear_bit(0, &watchdog_resumed); wdnow = watchdog->read(); wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); @@ -151,9 +149,7 @@ static void clocksource_watchdog(unsigne } static void clocksource_resume_watchdog(void) { - spin_lock(&watchdog_lock); - watchdog_resumed = 1; - spin_unlock(&watchdog_lock); + set_bit(0, &watchdog_resumed); } static void clocksource_check_watchdog(struct clocksource *cs) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index f9217bf..3d1042f 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -273,6 +273,8 @@ static int timekeeping_resume(struct sys unsigned long flags; unsigned long now = read_persistent_clock(); + clocksource_resume(); + write_seqlock_irqsave(&xtime_lock, flags); if (now && (now > timekeeping_suspend_time)) { diff --git a/kernel/timer.c b/kernel/timer.c index a6c580a..5ec5490 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1499,8 +1499,6 @@ unregister_time_interpolator(struct time prev = &curr->next; } - clocksource_resume(); - write_seqlock_irqsave(&xtime_lock, flags); if (ti == time_interpolator) { /* we lost the best time-interpolator: */ diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 37ab580..cdbc6c1 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -109,6 +109,17 @@ static const struct snmp_mib snmp4_ipsta SNMP_MIB_SENTINEL }; +/* Following RFC4293 items are displayed in /proc/net/netstat */ +static const struct snmp_mib snmp4_ipextstats_list[] = { + SNMP_MIB_ITEM("InNoRoutes", IPSTATS_MIB_INNOROUTES), + SNMP_MIB_ITEM("InTruncatedPkts", IPSTATS_MIB_INTRUNCATEDPKTS), + SNMP_MIB_ITEM("InMcastPkts", IPSTATS_MIB_INMCASTPKTS), + SNMP_MIB_ITEM("OutMcastPkts", IPSTATS_MIB_OUTMCASTPKTS), + SNMP_MIB_ITEM("InBcastPkts", IPSTATS_MIB_INBCASTPKTS), + SNMP_MIB_ITEM("OutBcastPkts", IPSTATS_MIB_OUTBCASTPKTS), + SNMP_MIB_SENTINEL +}; + static const struct snmp_mib snmp4_icmp_list[] = { SNMP_MIB_ITEM("InMsgs", ICMP_MIB_INMSGS), SNMP_MIB_ITEM("InErrors", ICMP_MIB_INERRORS), @@ -338,6 +349,16 @@ static int netstat_seq_show(struct seq_f snmp_fold_field((void **)net_statistics, snmp4_net_list[i].entry)); + seq_puts(seq, "\nIpExt:"); + for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) + seq_printf(seq, " %s", snmp4_ipextstats_list[i].name); + + seq_puts(seq, "\nIpExt:"); + for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) + seq_printf(seq, " %lu", + snmp_fold_field((void **)ip_statistics, + snmp4_ipextstats_list[i].entry)); + seq_putc(seq, '\n'); return 0; } diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index be0ee8a..30a5cb1 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -235,7 +235,7 @@ int ip6_mc_input(struct sk_buff *skb) IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INMCASTPKTS); hdr = ipv6_hdr(skb); - deliver = likely(!(skb->dev->flags & (IFF_PROMISC|IFF_ALLMULTI))) || + deliver = unlikely(skb->dev->flags & (IFF_PROMISC|IFF_ALLMULTI)) || ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL); /* diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 269a6e1..6d7542c 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -75,7 +75,7 @@ #endif band = res.classid; } band = TC_H_MIN(band) - 1; - if (band > q->bands) + if (band >= q->bands) return q->queues[q->prio2band[0]]; return q->queues[band]; diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c index 37643bb..55ab579 100644 --- a/net/xfrm/xfrm_hash.c +++ b/net/xfrm/xfrm_hash.c @@ -22,7 +22,8 @@ struct hlist_head *xfrm_hash_alloc(unsig n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL); else n = (struct hlist_head *) - __get_free_pages(GFP_KERNEL, get_order(sz)); + __get_free_pages(GFP_KERNEL | __GFP_NOWARN, + get_order(sz)); if (n) memset(n, 0, sz); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 95271e8..d0882e5 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -796,6 +796,10 @@ struct xfrm_policy *xfrm_policy_byid(u8 struct hlist_head *chain; struct hlist_node *entry; + *err = -ENOENT; + if (xfrm_policy_id2dir(id) != dir) + return NULL; + *err = 0; write_lock_bh(&xfrm_policy_lock); chain = xfrm_policy_byidx + idx_hash(id);