GIT da657f28b6fcf60b91033a08c530b86016118361 git://git.linux-nfs.org/pub/linux/nfs-2.6.git commit da657f28b6fcf60b91033a08c530b86016118361 Author: Trond Myklebust Date: Fri Aug 10 17:45:11 2007 -0400 NFSv4: Make NFSv4 ACCESS calls return attributes too... It doesn't really make sense to cache an access call without also revalidating the attributes. Signed-off-by: Trond Myklebust commit 97fea1e7ac457481da51c4eba6e6b2b60da3fbf6 Author: Trond Myklebust Date: Fri Aug 10 17:45:10 2007 -0400 NFSv4: Simplify _nfs4_do_access() Currently, _nfs4_do_access() is just a copy of nfs_do_access() with added conversion of the open flags into an access mask. This patch merges the duplicate functionality. Signed-off-by: Trond Myklebust commit 6ce577b93bf5083a840b7017ec5205304ee8abd9 Author: Trond Myklebust Date: Fri Aug 10 17:44:32 2007 -0400 NFS: Replace file->private_data with calls to nfs_file_open_context() Signed-off-by: Trond Myklebust commit 8f93a43ec643329965df8ae9bc7a02728feec97e Author: Trond Myklebust Date: Fri Aug 10 17:44:28 2007 -0400 NFS: Add a helper to extract the nfs_open_context from a struct file Signed-off-by: Trond Myklebust commit f9ffb85e13eabe6e09581709642abe317d93bd2b Author: Chuck Lever Date: Thu Aug 16 16:03:31 2007 -0400 SUNRPC: Fix generation of universal addresses for Fix some problems with rpcbind v3 and v4 queries from the in-kernel rpcbind client: 1. The r_addr argument must be a full universal address, not just an IP address, and 2. The universal address in r_addr is the address of the remote rpcbind server, not the RPC service being requested This addresses bugzilla.kernel.org report 8891 for 2.6.23-rc and greater. In addition, if the rpcbind client is unable to start the rpcbind request, make sure not to leak the xprt. Signed-off-by: Chuck Lever commit 2f0eb92c59d727ee2ff34b8fa783ba0f9083af9c Author: Chuck Lever Date: Thu Aug 16 16:03:26 2007 -0400 SUNRPC: Add support for formatted universal addresses "Universal addresses" are a string representation of an IP address and port. They are described fully in RFC 3530, section 2.2. Add support for generating them in the RPC client's socket transport module. Signed-off-by: Chuck Lever commit 78cc4f6045ae91c6b72f59a0be0b9d5b25d271a0 Author: Chuck Lever Date: Mon Aug 6 11:58:04 2007 -0400 SUNRPC: Split xs_reclassify_socket into an IPv4 and IPv6 version Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust commit f53fcd8131301e100fe92c2392b5b40742add2ea Author: Chuck Lever Date: Mon Aug 6 11:57:58 2007 -0400 SUNRPC: Add a helper for extracting the address using the correct type Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust commit ebf9053f9045a1f48cc01951f7d02aab4695aae9 Author: Chuck Lever Date: Mon Aug 6 11:57:53 2007 -0400 SUNRPC: Add IPv6 address support to net/sunrpc/xprtsock.c Finalize support for setting up RPC client transports to remote RPC services addressed via IPv6. Based on work done by Gilles Quillard at Bull Open Source. Signed-off-by: Chuck Lever Cc: Aurelien Charbon Signed-off-by: Trond Myklebust commit 7a3b12e121c6afd6c20ad8ba82fc73228307682b Author: Chuck Lever Date: Mon Aug 6 11:57:48 2007 -0400 SUNRPC: create connect workers for IPv6 Clone separate connect worker functions for connecting AF_INET6 sockets. Signed-off-by: Chuck Lever Cc: Aurelien Charbon Signed-off-by: Trond Myklebust commit 629d9cbf1afe47d2bd7fe5f15cd43151d200cee8 Author: Chuck Lever Date: Mon Aug 6 11:57:43 2007 -0400 SUNRPC: Rename IPv4 connect workers Prepare for introduction of IPv6 versions of same. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust commit 51c2f72193cebd335c67b0cefe58d9094ae7b361 Author: Chuck Lever Date: Mon Aug 6 11:57:38 2007 -0400 SUNRPC: Refactor a part of socket connect logic into a helper function Finishing a socket connect is the same for IPv4 and IPv6, so split it out into a helper. Signed-off-by: Chuck Lever Cc: Aurelien Charbon Signed-off-by: Trond Myklebust commit 50d60a8098e800278aea56773ddbb1d2bd452c96 Author: Chuck Lever Date: Mon Aug 6 11:57:33 2007 -0400 SUNRPC: create an IPv6-savvy mechanism for binding to a reserved port Clone xs_bindresvport into two functions, one that can handle IPv4 addresses, and one that can handle IPv6 addresses. Signed-off-by: Chuck Lever Cc: Aurelien Charbon Signed-off-by: Trond Myklebust commit 41effb7dbb8f6d43e8cfbc5f9c61f961952cadbe Author: Chuck Lever Date: Mon Aug 6 11:57:28 2007 -0400 SUNRPC: Rename xs_bind() to prepare for IPv6-specific bind method Prepare for introduction of IPv6-specific socket bind function. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust commit c63a2d0efa587e49c342656431df9b646b4b3b65 Author: Chuck Lever Date: Mon Aug 6 11:57:23 2007 -0400 SUNRPC: Introduce support for setting the port number in IPv6 addresses We could clone xs_set_port, but this is easier overall. Signed-off-by: Chuck Lever Cc: Aurelien Charbon Signed-off-by: Trond Myklebust commit 7fc45e88d1a28f47e771b7457a20bbd807192056 Author: Chuck Lever Date: Mon Aug 6 11:57:18 2007 -0400 SUNRPC: add support for IPv6 to the kernel's rpcbind client Prepare for adding IPv6 support to the RPC client by adding IPv6 capabilities to rpcbind. Note that this is support on the query side only; registering IPv6 addresses with the local portmapper will come later. Note we have to take care not to fall back to using version 2 of the rpcbind protocol if we're dealing with IPv6 address. Version 2 doesn't support IPv6 at all. Signed-off-by: Chuck Lever Cc: Aurelien Charbon Signed-off-by: Trond Myklebust commit 7515218c9ff12944b0cfd4cfb6634bd8936436c8 Author: Chuck Lever Date: Mon Aug 6 11:57:12 2007 -0400 SUNRPC: add a function to format IPv6 addresses Clone xs_format_ipv4_peer_addresses into an IPv6 version. Signed-off-by: Chuck Lever Cc: Aurelien Charbon Signed-off-by: Trond Myklebust commit 4a719655465444741f703578c57d0721dd8ec3b3 Author: Chuck Lever Date: Mon Aug 6 11:57:07 2007 -0400 SUNRPC: Rename xs_format_peer_addresses Prepare to add an IPv6 version of xs_format_peer_addresses by renaming it to xs_format_ipv4_peer_addresses. Signed-off-by: Chuck Lever Cc: Aurelien Charbon Signed-off-by: Trond Myklebust commit 754bd605da9fa5a3f6be89024e470b1268b13f44 Author: Chuck Lever Date: Mon Aug 6 11:57:02 2007 -0400 SUNRPC: Add hex-formatted address support to rpc_peeraddr2str() Add support for the NFS client's need to export volume information with IP addresses formatted in hex instead of decimal. This isn't used yet, but subsequent patches (not in this series) will change the NFS client to use this functionality. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust commit 58e5f474da8511c540e9abdc9dbf149f2e7ac218 Author: Chuck Lever Date: Mon Aug 6 11:56:57 2007 -0400 SUNRPC: Free address buffers in a loop Use more generic logic to free buffers holding formatted addresses. This makes it less likely a bug will be introduced when adding additional buffer types in xs_format_peer_address(). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust commit 730317e2c7c9fd98948060f31a8f5c0792c65b9a Author: Chuck Lever Date: Mon Aug 6 11:56:52 2007 -0400 SUNRPC: Use standard macros for printing IP addresses include/linux/kernel.h gives us some nice macros for formatting IP addresses. Use them. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust commit 58ba0cbb007c7815fc869dc419b8b429f8b32769 Author: Chuck Lever Date: Mon Aug 6 11:56:42 2007 -0400 SUNRPC: Fix a signed v. unsigned comparison in net/sunrpc/xprtsock.c Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust commit fb8036c78ee3155a4105387984b313317fab1881 Author: Chuck Lever Date: Mon Aug 6 11:56:31 2007 -0400 SUNRPC: Fix a signed v. unsigned comparison in rpcbind's XDR routines Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust commit 2e891f3b2f37d020a2e0c3a0239f571c293d2fae Author: Jeff Layton Date: Mon Jul 30 08:47:38 2007 -0400 [NFS] [PATCH] NFS: show addr=ipaddr in /proc/mounts rather than A minor thing, but useful when working with a server with multiple addrs. This looks like it might also be necessary if Miklos' effort to eliminate /etc/mtab ever comes to fruition. When displaying mount options in /proc/mounts, the kernel prints "addr=hostname". This info is redundant since we already have the hostname displayed as part of the "device" section of the mount. This patch changes it to display the IP address to which the socket is connected. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust commit 48f0926aa52fa32e1bb04d164301c1ebb6d57562 Author: Christoph Hellwig Date: Fri Aug 3 16:20:32 2007 +0200 [NFS] [PATCH] nfs: tiny makefile cleanup no need to set up foo-objs these days. Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust commit ee239ba5cf18003e41ab8d94bb6f743ab1ec6115 Author: Fabio Olive Leite Date: Thu Jul 26 22:59:00 2007 -0300 Re: [NFS] [PATCH] Attribute timeout handling and wrapping u32 jiffies I would like to discuss the idea that the current checks for attribute timeout using time_after are inadequate for 32bit architectures, since time_after works correctly only when the two timestamps being compared are within 2^31 jiffies of each other. The signed overflow caused by comparing values more than 2^31 jiffies apart will flip the result, causing incorrect assumptions of validity. 2^31 jiffies is a fairly large period of time (~25 days) when compared to the lifetime of most kernel data structures, but for long lived NFS mounts that can sit idle for months (think that for some reason autofs cannot be used), it is easy to compare inode attribute timestamps with very disparate or even bogus values (as in when jiffies have wrapped many times, where the comparison doesn't even make sense). Currently the code tests for attribute timeout by simply adding the desired amount of jiffies to the stored timestamp and comparing that with the current timestamp of obtained attribute data with time_after. This is incorrect, as it returns true for the desired timeout period and another full 2^31 range of jiffies. In testing with artificial jumps (several small jumps, not one big crank) of the jiffies I was able to reproduce a problem found in a server with very long lived NFS mounts, where attributes would not be refreshed even after touching files and directories in the server: Initial uptime: 03:42:01 up 6 min, 0 users, load average: 0.01, 0.12, 0.07 NFS volume is mounted and time is advanced: 03:38:09 up 25 days, 2 min, 0 users, load average: 1.22, 1.05, 1.08 # ls -l /local/A/foo/bar /nfs/A/foo/bar -rw-r--r-- 1 root root 0 Dec 17 03:38 /local/A/foo/bar -rw-r--r-- 1 root root 0 Nov 22 00:36 /nfs/A/foo/bar # touch /local/A/foo/bar # ls -l /local/A/foo/bar /nfs/A/foo/bar -rw-r--r-- 1 root root 0 Dec 17 03:47 /local/A/foo/bar -rw-r--r-- 1 root root 0 Nov 22 00:36 /nfs/A/foo/bar We can see the local mtime is updated, but the NFS mount still shows the old value. The patch below makes it work: Initial setup... 07:11:02 up 25 days, 1 min, 0 users, load average: 0.15, 0.03, 0.04 # ls -l /local/A/foo/bar /nfs/A/foo/bar -rw-r--r-- 1 root root 0 Jan 11 07:11 /local/A/foo/bar -rw-r--r-- 1 root root 0 Jan 11 07:11 /nfs/A/foo/bar # touch /local/A/foo/bar # ls -l /local/A/foo/bar /nfs/A/foo/bar -rw-r--r-- 1 root root 0 Jan 11 07:14 /local/A/foo/bar -rw-r--r-- 1 root root 0 Jan 11 07:14 /nfs/A/foo/bar Signed-off-by: Fabio Olive Leite Signed-off-by: Trond Myklebust commit b7fb57d31b92ee608b402924398f2a0dd594bcbe Author: Peter Staubach Date: Fri Aug 3 15:07:10 2007 -0400 64 bit ino support for NFS client Hi. Attached is a patch to modify the NFS client code to support 64 bit ino's, as appropriate for the system and the NFS protocol version. The code basically just expand the NFS interfaces for routines which handle ino's from using ino_t to u64 and then uses the fileid in the nfs_inode instead of i_ino in the inode. The code paths that were updated are in the getattr method and the readdir methods. This should be no real change on 64 bit platforms. Since the ino_t is an unsigned long, it would already be 64 bits wide. Thanx... ps Signed-off-by: Peter Staubach Signed-off-by: Trond Myklebust commit eb3a1ae1130d51074ab3ce24c75c25c8ae88123b Author: Trond Myklebust Date: Thu Jun 7 22:44:34 2007 -0400 SUNRPC: Convert rpc_pipefs to use the generic filesystem notification hooks This will allow rpc.gssd to use inotify instead of dnotify in order to locate new rpc upcall pipes. This also requires the exporting of __audit_inode_child(), which is used by fsnotify_create() and fsnotify_mkdir(). Ccing David Woodhouse. Cc: David Woodhouse Signed-off-by: Trond Myklebust commit 51648fa48254b0308a794a987e70a5be19bf2340 Author: J. Bruce Fields Date: Thu Jul 5 13:55:09 2007 -0400 locks: kill redundant local variable There's no need for another variable local to this loop; we can use the variable (of the same name!) already declared at the top of the function, and not used till later (at which point it's initialized, so this is safe). Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust commit b45a8b1393495925f7d82cbebaf943d829cd9bc6 Author: Trond Myklebust Date: Wed Jul 25 14:09:54 2007 -0400 NFS: Fall back to synchronous writes when a background write errors... This helps prevent huge queues of background writes from building up whenever the server runs out of disk or quota space, or if someone changes the file access modes behind our backs. Signed-off-by: Trond Myklebust commit d661c982316cce70b838d3942513b646cf903835 Author: Trond Myklebust Date: Wed Jul 25 14:09:54 2007 -0400 NFS: Writeback optimisation Schedule writes using WB_SYNC_NONE first, then come back for a second pass using WB_SYNC_ALL. Signed-off-by: Trond Myklebust commit 29564fd3459990f9e450713d8291fd8c72f85846 Author: Trond Myklebust Date: Fri Jul 20 13:13:28 2007 -0400 NFS: Clean up NFS writeback flush code The only user of nfs_sync_mapping_range() is nfs_getattr(), which uses it to flush out the entire inode without sending a commit. We therefore replace nfs_sync_mapping_range with a more appropriate helper. Signed-off-by: Trond Myklebust commit a68e291f0bcc163676c4dc12119eaeb2f3e87ae8 Author: Trond Myklebust Date: Sun Jul 22 19:27:46 2007 -0400 VFS: Remove writeback_control->fs_private The only user of this field was NFS. Signed-off-by: Trond Myklebust commit 03d45889d5b2983fd8be2bf225f04c196f0edc9e Author: Trond Myklebust Date: Sun Jul 22 19:27:32 2007 -0400 NFS: Clean up nfs_writepages() Just call write_cache_pages directly instead of hacking the writeback control structure in order to find out if we were called from writepages() or directly from the VM. Signed-off-by: Trond Myklebust commit 4b1c0fa7e346b864c06b86b63c5ba5475194400a Author: Trond Myklebust Date: Sun Jul 22 17:09:05 2007 -0400 NFS: Clean up write code... The addition of nfs_page_mkwrite means that We should no longer need to create requests inside nfs_writepage() Signed-off-by: Trond Myklebust commit 8e14262611548ee254bb98a3ee567057e17f2717 Author: Trond Myklebust Date: Sun Jul 22 17:09:05 2007 -0400 NFS: Add the helper nfs_vm_page_mkwrite This is needed in order to set up a proper nfs_page request for mmapped files. Signed-off-by: Trond Myklebust commit a04451d683e1b8536eb5a3b36cded06e02ec62cd Author: Trond Myklebust Date: Tue Aug 28 10:29:36 2007 -0400 NFS: Fix a write request leak in nfs_invalidate_page() Ryusuke Konishi says: The recent truncate_complete_page() clears the dirty flag from a page before calling a_ops->invalidatepage(), ^^^^^^ static void truncate_complete_page(struct address_space *mapping, struct page *page) { ... cancel_dirty_page(page, PAGE_CACHE_SIZE); <--- Inserted here at kernel 2.6.20 if (PagePrivate(page)) do_invalidatepage(page, 0); ---> will call a_ops->invalidatepage() ... } and this is disturbing nfs_wb_page_priority() from calling nfs_writepage_locked() that is expected to handle the pending request (=nfs_page) associated with the page. int nfs_wb_page_priority(struct inode *inode, struct page *page, int how) { ... if (clear_page_dirty_for_io(page)) { ret = nfs_writepage_locked(page, &wbc); if (ret < 0) goto out; } ... } Since truncate_complete_page() will get rid of the page after a_ops->invalidatepage() returns, the request (=nfs_page) associated with the page becomes a garbage in nfs_inode->nfs_page_tree. ------------------------ Fix this by ensuring that nfs_wb_page_priority() recognises that it may also need to clear out non-dirty pages that have an nfs_page associated with them. Signed-off-by: Trond Myklebust commit 5e3ce7aac624adfc0ab935e04bce899ced6949ac Author: Chuck Lever Date: Wed Aug 29 17:59:03 2007 -0400 NFS: change NFS mount error return when According to the mount(2) man page, the proper error return code for the mount(2) system call when the special device name or the mounted-on directory name is too long is ENAMETOOLONG. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust commit cb4ee4253babb419daaad0d251cef354f600f079 Author: Chuck Lever Date: Wed Aug 29 17:59:01 2007 -0400 NFS: Off-by-one length error in string handling The hostname was getting truncated in the new text-based NFS mount API. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust commit cee6af123e884b3e7cf8595c3aa96f78f9791c09 Author: Chuck Lever Date: Wed Aug 29 17:58:59 2007 -0400 NFS: Return a real error code from mount(2) Don't filter the return code from the in-kernel rpcbind or NFS mount clients. Return the real error code so that callers of the new NFS text-based mount API can apply a useful retry strategy. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust commit b5a281a4bb32df131720686f2aa0eb5c1a0b13ea Author: Chuck Lever Date: Wed Aug 29 17:58:57 2007 -0400 NFS: mount option parser chokes on proto= The new text-based NFS mount option parsing logic doesn't recognize any valid transport protocols due to a silly mistake in the protocol token matching logic. This prevents basic mount requests such as: mount.nfs server:/export /mnt -o proto=tcp from working with the new text-based NFS mount API. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust commit d3b7d27ddf55ae0ad5864c95b02411d013b200ca Author: Trond Myklebust Date: Mon Aug 27 11:33:00 2007 -0400 NFSv4: Ensure that we pass the correct dentry to nfs4_intent_set_file This patch fixes an Oops that was reported by Gabriel Barazer. Signed-off-by: Trond Myklebust commit 6dcb8387f37c86fd814ead60c80c35937ab9cdc6 Author: Trond Myklebust Date: Mon Aug 27 09:57:46 2007 -0400 NFSv4: Fix a typo in _nfs4_do_open_reclaim This should fix the following Oops reported by Jeff Garzik: kernel BUG at fs/nfs/nfs4xdr.c:1040! invalid opcode: 0000 [1] SMP CPU 0 Modules linked in: nfs lockd sunrpc af_packet ipv6 cpufreq_ondemand acpi_cpufreq battery floppy nvram sg snd_hda_intel ata_generic snd_pcm_oss snd_mixer_oss snd_pcm i2c_i801 snd_page_alloc e1000 firewire_ohci ata_piix i2c_core sr_mod cdrom sata_sil ahci libata sd_mod scsi_mod ext3 jbd ehci_hcd uhci_hcd Pid: 16353, comm: 10.10.10.1-recl Not tainted 2.6.23-rc3 #1 RIP: 0010:[] [] :nfs:encode_open+0x1c0/0x330 RSP: 0018:ffff8100467c5c60 EFLAGS: 00010202 RAX: ffff81000f89b8b8 RBX: 00000000697a6f6d RCX: ffff81000f89b8b8 RDX: 0000000000000004 RSI: 0000000000000004 RDI: ffff8100467c5c80 RBP: ffff8100467c5c80 R08: ffff81000f89bc30 R09: ffff81000f89b83f R10: 0000000000000001 R11: ffffffff881e79e0 R12: ffff81003cbd1808 R13: ffff81000f89b860 R14: ffff81005fc984e0 R15: ffffffff88240af0 FS: 0000000000000000(0000) GS:ffffffff8052a000(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: 00002adb9e51a030 CR3: 000000007ea7e000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process 10.10.10.1-recl (pid: 16353, threadinfo ffff8100467c4000, task ffff8100038ce780) Stack: ffff81004aeb6a40 ffff81003cbd1808 ffff81003cbd1808 ffffffff88240b5d ffff81000f89b8bc ffff81005fc984e8 ffff81000f89bc30 ffff81005fc984e8 0000000300000000 0000000000000000 0000000000000000 ffff81003cbd1800 Call Trace: [] :nfs:nfs4_xdr_enc_open_noattr+0x6d/0x90 [] :sunrpc:rpcauth_wrap_req+0x97/0xf0 [] :nfs:nfs4_xdr_enc_open_noattr+0x0/0x90 [] :sunrpc:call_transmit+0x18a/0x290 [] :sunrpc:__rpc_execute+0x6b/0x290 [] :sunrpc:rpc_do_run_task+0x76/0xd0 [] :nfs:_nfs4_proc_open+0x76/0x230 [] :nfs:nfs4_open_recover_helper+0x5e/0xc0 [] :nfs:nfs4_open_recover+0xe4/0x120 [] :nfs:nfs4_open_reclaim+0xa4/0xf0 [] :nfs:nfs4_reclaim_open_state+0x55/0x1b0 [] :nfs:reclaimer+0x2ca/0x390 [] :nfs:reclaimer+0x0/0x390 [] kthread+0x4b/0x80 [] child_rip+0xa/0x12 [] kthread+0x0/0x80 [] child_rip+0x0/0x12 Code: 0f 0b eb fe 48 89 ef c7 00 00 00 00 02 be 08 00 00 00 e8 79 RIP [] :nfs:encode_open+0x1c0/0x330 RSP Signed-off-by: Trond Myklebust commit 49ee2cc5610ebfa09bd7fe37fa294cedec3af6ab Author: Trond Myklebust Date: Mon Aug 27 09:14:56 2007 -0400 NFS: Fix use of cancel_delayed_work_sync in nfs_release_automount_timer Doh! We can't use cancel_delayed_work_sync because we may have been called from an unmount that was being performed by nfs_automount_task. Signed-off-by: Trond Myklebust Signed-off-by: Andrew Morton --- fs/locks.c | 2 fs/nfs/Makefile | 1 fs/nfs/delegation.c | 2 fs/nfs/dir.c | 43 ++ fs/nfs/direct.c | 4 fs/nfs/file.c | 107 +++++-- fs/nfs/inode.c | 12 fs/nfs/namespace.c | 2 fs/nfs/nfs4proc.c | 62 +--- fs/nfs/nfs4state.c | 2 fs/nfs/nfs4xdr.c | 27 + fs/nfs/read.c | 6 fs/nfs/super.c | 24 - fs/nfs/write.c | 233 +++++++-------- include/linux/jiffies.h | 4 include/linux/nfs_fs.h | 25 - include/linux/nfs_page.h | 1 include/linux/nfs_xdr.h | 3 include/linux/sunrpc/xprt.h | 3 include/linux/writeback.h | 2 kernel/auditsc.c | 1 net/sunrpc/rpc_pipe.c | 7 net/sunrpc/rpcb_clnt.c | 80 ++++- net/sunrpc/xprtsock.c | 498 +++++++++++++++++++++++++++------- 24 files changed, 783 insertions(+), 368 deletions(-) diff -puN fs/locks.c~git-nfs fs/locks.c --- a/fs/locks.c~git-nfs +++ a/fs/locks.c @@ -819,7 +819,7 @@ static int __posix_lock_file(struct inod lock_kernel(); if (request->fl_type != F_UNLCK) { for_each_lock(inode, before) { - struct file_lock *fl = *before; + fl = *before; if (!IS_POSIX(fl)) continue; if (!posix_locks_conflict(request, fl)) diff -puN fs/nfs/Makefile~git-nfs fs/nfs/Makefile --- a/fs/nfs/Makefile~git-nfs +++ a/fs/nfs/Makefile @@ -16,4 +16,3 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4x nfs4namespace.o nfs-$(CONFIG_NFS_DIRECTIO) += direct.o nfs-$(CONFIG_SYSCTL) += sysctl.o -nfs-objs := $(nfs-y) diff -puN fs/nfs/delegation.c~git-nfs fs/nfs/delegation.c --- a/fs/nfs/delegation.c~git-nfs +++ a/fs/nfs/delegation.c @@ -52,7 +52,7 @@ static int nfs_delegation_claim_locks(st for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) { if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) continue; - if ((struct nfs_open_context *)fl->fl_file->private_data != ctx) + if (nfs_file_open_context(fl->fl_file) != ctx) continue; status = nfs4_lock_delegation_recall(state, fl); if (status >= 0) diff -puN fs/nfs/dir.c~git-nfs fs/nfs/dir.c --- a/fs/nfs/dir.c~git-nfs +++ a/fs/nfs/dir.c @@ -407,7 +407,7 @@ int nfs_do_filldir(nfs_readdir_descripto struct file *file = desc->file; struct nfs_entry *entry = desc->entry; struct dentry *dentry = NULL; - unsigned long fileid; + u64 fileid; int loop_count = 0, res; @@ -418,7 +418,7 @@ int nfs_do_filldir(nfs_readdir_descripto unsigned d_type = DT_UNKNOWN; /* Note: entry->prev_cookie contains the cookie for * retrieving the current dirent on the server */ - fileid = nfs_fileid_to_ino_t(entry->ino); + fileid = entry->ino; /* Get a dentry if we have one */ if (dentry != NULL) @@ -428,7 +428,7 @@ int nfs_do_filldir(nfs_readdir_descripto /* Use readdirplus info */ if (dentry != NULL && dentry->d_inode != NULL) { d_type = dt_type(dentry->d_inode); - fileid = dentry->d_inode->i_ino; + fileid = NFS_FILEID(dentry->d_inode); } res = filldir(dirent, entry->name, entry->len, @@ -558,7 +558,7 @@ static int nfs_readdir(struct file *filp memset(desc, 0, sizeof(*desc)); desc->file = filp; - desc->dir_cookie = &((struct nfs_open_context *)filp->private_data)->dir_cookie; + desc->dir_cookie = &nfs_file_open_context(filp)->dir_cookie; desc->decode = NFS_PROTO(inode)->decode_dirent; desc->plus = NFS_USE_READDIRPLUS(inode); @@ -623,7 +623,7 @@ static loff_t nfs_llseek_dir(struct file } if (offset != filp->f_pos) { filp->f_pos = offset; - ((struct nfs_open_context *)filp->private_data)->dir_cookie = 0; + nfs_file_open_context(filp)->dir_cookie = 0; } out: mutex_unlock(&filp->f_path.dentry->d_inode->i_mutex); @@ -1348,9 +1348,9 @@ static int nfs_rmdir(struct inode *dir, static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) { static unsigned int sillycounter; - const int i_inosize = sizeof(dir->i_ino)*2; + const int fileidsize = sizeof(NFS_FILEID(dentry->d_inode))*2; const int countersize = sizeof(sillycounter)*2; - const int slen = sizeof(".nfs") + i_inosize + countersize - 1; + const int slen = sizeof(".nfs")+fileidsize+countersize-1; char silly[slen+1]; struct qstr qsilly; struct dentry *sdentry; @@ -1368,8 +1368,9 @@ static int nfs_sillyrename(struct inode if (dentry->d_flags & DCACHE_NFSFS_RENAMED) goto out; - sprintf(silly, ".nfs%*.*lx", - i_inosize, i_inosize, dentry->d_inode->i_ino); + sprintf(silly, ".nfs%*.*Lx", + fileidsize, fileidsize, + (unsigned long long)NFS_FILEID(dentry->d_inode)); /* Return delegation in anticipation of the rename */ nfs_inode_return_delegation(dentry->d_inode); @@ -1840,7 +1841,7 @@ static struct nfs_access_entry *nfs_acce return NULL; } -int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) +static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_access_entry *cache; @@ -1852,7 +1853,7 @@ int nfs_access_get_cached(struct inode * cache = nfs_access_search_rbtree(inode, cred); if (cache == NULL) goto out; - if (time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))) + if (!time_in_range(jiffies, cache->jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))) goto out_stale; res->jiffies = cache->jiffies; res->cred = cache->cred; @@ -1907,7 +1908,7 @@ found: nfs_access_free_entry(entry); } -void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) +static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) { struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL); if (cache == NULL) @@ -1955,6 +1956,24 @@ out: return -EACCES; } +static int nfs_open_permission_mask(int openflags) +{ + int mask = 0; + + if (openflags & FMODE_READ) + mask |= MAY_READ; + if (openflags & FMODE_WRITE) + mask |= MAY_WRITE; + if (openflags & FMODE_EXEC) + mask |= MAY_EXEC; + return mask; +} + +int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags) +{ + return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); +} + int nfs_permission(struct inode *inode, int mask, struct nameidata *nd) { struct rpc_cred *cred; diff -puN fs/nfs/direct.c~git-nfs fs/nfs/direct.c --- a/fs/nfs/direct.c~git-nfs +++ a/fs/nfs/direct.c @@ -368,7 +368,7 @@ static ssize_t nfs_direct_read(struct ki return -ENOMEM; dreq->inode = inode; - dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); + dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; @@ -718,7 +718,7 @@ static ssize_t nfs_direct_write(struct k sync = FLUSH_STABLE; dreq->inode = inode; - dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); + dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; diff -puN fs/nfs/file.c~git-nfs fs/nfs/file.c --- a/fs/nfs/file.c~git-nfs +++ a/fs/nfs/file.c @@ -33,6 +33,7 @@ #include #include "delegation.h" +#include "internal.h" #include "iostat.h" #define NFSDBG_FACILITY NFSDBG_FILE @@ -55,6 +56,8 @@ static int nfs_lock(struct file *filp, i static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); static int nfs_setlease(struct file *file, long arg, struct file_lock **fl); +static struct vm_operations_struct nfs_file_vm_ops; + const struct file_operations nfs_file_operations = { .llseek = nfs_file_llseek, .read = do_sync_read, @@ -174,13 +177,38 @@ static loff_t nfs_file_llseek(struct fil } /* + * Helper for nfs_file_flush() and nfs_fsync() + * + * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to + * disk, but it retrieves and clears ctx->error after synching, despite + * the two being set at the same time in nfs_context_set_write_error(). + * This is because the former is used to notify the _next_ call to + * nfs_file_write() that a write error occured, and hence cause it to + * fall back to doing a synchronous write. + */ +static int nfs_do_fsync(struct nfs_open_context *ctx, struct inode *inode) +{ + int have_error, status; + int ret = 0; + + have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); + status = nfs_wb_all(inode); + have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); + if (have_error) + ret = xchg(&ctx->error, 0); + if (!ret) + ret = status; + return ret; +} + +/* * Flush all dirty pages, and check for write errors. * */ static int nfs_file_flush(struct file *file, fl_owner_t id) { - struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; + struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = file->f_path.dentry->d_inode; int status; @@ -189,16 +217,11 @@ nfs_file_flush(struct file *file, fl_own if ((file->f_mode & FMODE_WRITE) == 0) return 0; nfs_inc_stats(inode, NFSIOS_VFSFLUSH); - lock_kernel(); + /* Ensure that data+attribute caches are up to date after close() */ - status = nfs_wb_all(inode); - if (!status) { - status = ctx->error; - ctx->error = 0; - if (!status) - nfs_revalidate_inode(NFS_SERVER(inode), inode); - } - unlock_kernel(); + status = nfs_do_fsync(ctx, inode); + if (!status) + nfs_revalidate_inode(NFS_SERVER(inode), inode); return status; } @@ -257,8 +280,11 @@ nfs_file_mmap(struct file * file, struct dentry->d_parent->d_name.name, dentry->d_name.name); status = nfs_revalidate_mapping(inode, file->f_mapping); - if (!status) - status = generic_file_mmap(file, vma); + if (!status) { + vma->vm_ops = &nfs_file_vm_ops; + vma->vm_flags |= VM_CAN_NONLINEAR; + file_accessed(file); + } return status; } @@ -270,21 +296,13 @@ nfs_file_mmap(struct file * file, struct static int nfs_fsync(struct file *file, struct dentry *dentry, int datasync) { - struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; + struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = dentry->d_inode; - int status; dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); nfs_inc_stats(inode, NFSIOS_VFSFSYNC); - lock_kernel(); - status = nfs_wb_all(inode); - if (!status) { - status = ctx->error; - ctx->error = 0; - } - unlock_kernel(); - return status; + return nfs_do_fsync(ctx, inode); } /* @@ -316,7 +334,7 @@ static void nfs_invalidate_page(struct p if (offset != 0) return; /* Cancel any unstarted writes on this page */ - nfs_wb_page_priority(page->mapping->host, page, FLUSH_INVALIDATE); + nfs_wb_page_cancel(page->mapping->host, page); } static int nfs_release_page(struct page *page, gfp_t gfp) @@ -333,7 +351,7 @@ static int nfs_launder_page(struct page const struct address_space_operations nfs_file_aops = { .readpage = nfs_readpage, .readpages = nfs_readpages, - .set_page_dirty = nfs_set_page_dirty, + .set_page_dirty = __set_page_dirty_nobuffers, .writepage = nfs_writepage, .writepages = nfs_writepages, .prepare_write = nfs_prepare_write, @@ -346,6 +364,43 @@ const struct address_space_operations nf .launder_page = nfs_launder_page, }; +static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) +{ + struct file *filp = vma->vm_file; + unsigned pagelen; + int ret = -EINVAL; + + lock_page(page); + if (page->mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping) + goto out_unlock; + pagelen = nfs_page_length(page); + if (pagelen == 0) + goto out_unlock; + ret = nfs_prepare_write(filp, page, 0, pagelen); + if (!ret) + ret = nfs_commit_write(filp, page, 0, pagelen); +out_unlock: + unlock_page(page); + return ret; +} + +static struct vm_operations_struct nfs_file_vm_ops = { + .fault = filemap_fault, + .page_mkwrite = nfs_vm_page_mkwrite, +}; + +static int nfs_need_sync_write(struct file *filp, struct inode *inode) +{ + struct nfs_open_context *ctx; + + if (IS_SYNC(inode) || (filp->f_flags & O_SYNC)) + return 1; + ctx = nfs_file_open_context(filp); + if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) + return 1; + return 0; +} + static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { @@ -382,8 +437,8 @@ static ssize_t nfs_file_write(struct kio nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); result = generic_file_aio_write(iocb, iov, nr_segs, pos); /* Return error values for O_SYNC and IS_SYNC() */ - if (result >= 0 && (IS_SYNC(inode) || (iocb->ki_filp->f_flags & O_SYNC))) { - int err = nfs_fsync(iocb->ki_filp, dentry, 1); + if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { + int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode); if (err < 0) result = err; } diff -puN fs/nfs/inode.c~git-nfs fs/nfs/inode.c --- a/fs/nfs/inode.c~git-nfs +++ a/fs/nfs/inode.c @@ -431,7 +431,7 @@ int nfs_getattr(struct vfsmount *mnt, st /* Flush out writes to the server in order to update c/mtime */ if (S_ISREG(inode->i_mode)) - nfs_sync_mapping_range(inode->i_mapping, 0, 0, FLUSH_NOCOMMIT); + nfs_wb_nocommit(inode); /* * We may force a getattr if the user cares about atime. @@ -450,8 +450,10 @@ int nfs_getattr(struct vfsmount *mnt, st err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); else err = nfs_revalidate_inode(NFS_SERVER(inode), inode); - if (!err) + if (!err) { generic_fillattr(inode, stat); + stat->ino = NFS_FILEID(inode); + } return err; } @@ -536,7 +538,7 @@ struct nfs_open_context *nfs_find_open_c static void nfs_file_clear_open_context(struct file *filp) { struct inode *inode = filp->f_path.dentry->d_inode; - struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data; + struct nfs_open_context *ctx = nfs_file_open_context(filp); if (ctx) { filp->private_data = NULL; @@ -654,7 +656,7 @@ int nfs_attribute_timeout(struct inode * if (nfs_have_delegation(inode, FMODE_READ)) return 0; - return time_after(jiffies, nfsi->read_cache_jiffies+nfsi->attrtimeo); + return !time_in_range(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); } /** @@ -1053,7 +1055,7 @@ static int nfs_update_inode(struct inode nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; - } else if (time_after(now, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) { + } else if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; diff -puN fs/nfs/namespace.c~git-nfs fs/nfs/namespace.c --- a/fs/nfs/namespace.c~git-nfs +++ a/fs/nfs/namespace.c @@ -176,7 +176,7 @@ static void nfs_expire_automounts(struct void nfs_release_automount_timer(void) { if (list_empty(&nfs_automount_list)) - cancel_delayed_work_sync(&nfs_automount_task); + cancel_delayed_work(&nfs_automount_task); } /* diff -puN fs/nfs/nfs4proc.c~git-nfs fs/nfs/nfs4proc.c --- a/fs/nfs/nfs4proc.c~git-nfs +++ a/fs/nfs/nfs4proc.c @@ -62,10 +62,8 @@ struct nfs4_opendata; static int _nfs4_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *); -static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp); -static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags); static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr); static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); @@ -177,7 +175,7 @@ static void nfs4_setup_readdir(u64 cooki *p++ = xdr_one; /* bitmap length */ *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */ *p++ = htonl(8); /* attribute buffer length */ - p = xdr_encode_hyper(p, dentry->d_inode->i_ino); + p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_inode)); } *p++ = xdr_one; /* next */ @@ -189,7 +187,7 @@ static void nfs4_setup_readdir(u64 cooki *p++ = xdr_one; /* bitmap length */ *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */ *p++ = htonl(8); /* attribute buffer length */ - p = xdr_encode_hyper(p, dentry->d_parent->d_inode->i_ino); + p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_parent->d_inode)); readdir->pgbase = (char *)p - (char *)start; readdir->count -= readdir->pgbase; @@ -454,7 +452,7 @@ static struct nfs4_state *nfs4_try_open_ memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data)); rcu_read_unlock(); lock_kernel(); - ret = _nfs4_do_access(state->inode, state->owner->so_cred, open_mode); + ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode); unlock_kernel(); if (ret != 0) goto out; @@ -646,7 +644,7 @@ static int _nfs4_do_open_reclaim(struct rcu_read_lock(); delegation = rcu_dereference(NFS_I(state->inode)->delegation); if (delegation != NULL && (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) != 0) - delegation_type = delegation->flags; + delegation_type = delegation->type; rcu_read_unlock(); opendata->o_arg.u.delegation_type = delegation_type; status = nfs4_open_recover(opendata, state); @@ -948,36 +946,6 @@ static int _nfs4_proc_open(struct nfs4_o return 0; } -static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags) -{ - struct nfs_access_entry cache; - int mask = 0; - int status; - - if (openflags & FMODE_READ) - mask |= MAY_READ; - if (openflags & FMODE_WRITE) - mask |= MAY_WRITE; - if (openflags & FMODE_EXEC) - mask |= MAY_EXEC; - status = nfs_access_get_cached(inode, cred, &cache); - if (status == 0) - goto out; - - /* Be clever: ask server to check for all possible rights */ - cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ; - cache.cred = cred; - cache.jiffies = jiffies; - status = _nfs4_proc_access(inode, &cache); - if (status != 0) - return status; - nfs_access_add_cache(inode, &cache); -out: - if ((cache.mask & mask) == mask) - return 0; - return -EACCES; -} - static int nfs4_recover_expired_lease(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; @@ -1381,7 +1349,7 @@ static int nfs4_intent_set_file(struct n /* If the open_intent is for execute, we have an extra check to make */ if (nd->intent.open.flags & FMODE_EXEC) { - ret = _nfs4_do_access(state->inode, + ret = nfs_may_open(state->inode, state->owner->so_cred, nd->intent.open.flags); if (ret < 0) @@ -1390,7 +1358,7 @@ static int nfs4_intent_set_file(struct n filp = lookup_instantiate_filp(nd, path->dentry, NULL); if (!IS_ERR(filp)) { struct nfs_open_context *ctx; - ctx = (struct nfs_open_context *)filp->private_data; + ctx = nfs_file_open_context(filp); ctx->state = state; return 0; } @@ -1434,7 +1402,7 @@ nfs4_atomic_open(struct inode *dir, stru } res = d_add_unique(dentry, igrab(state->inode)); if (res != NULL) - dentry = res; + path.dentry = res; nfs4_intent_set_file(nd, &path, state); return res; } @@ -1757,10 +1725,16 @@ static int nfs4_proc_lookup(struct inode static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) { + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_fattr fattr; struct nfs4_accessargs args = { .fh = NFS_FH(inode), + .bitmask = server->attr_bitmask, + }; + struct nfs4_accessres res = { + .server = server, + .fattr = &fattr, }; - struct nfs4_accessres res = { 0 }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS], .rpc_argp = &args, @@ -1786,6 +1760,7 @@ static int _nfs4_proc_access(struct inod if (mode & MAY_EXEC) args.access |= NFS4_ACCESS_EXECUTE; } + nfs_fattr_init(&fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (!status) { entry->mask = 0; @@ -1795,6 +1770,7 @@ static int _nfs4_proc_access(struct inod entry->mask |= MAY_WRITE; if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE)) entry->mask |= MAY_EXEC; + nfs_refresh_inode(inode, &fattr); } return status; } @@ -3303,7 +3279,7 @@ static int nfs4_proc_unlck(struct nfs4_s status = -ENOMEM; if (seqid == NULL) goto out; - task = nfs4_do_unlck(request, request->fl_file->private_data, lsp, seqid); + task = nfs4_do_unlck(request, nfs_file_open_context(request->fl_file), lsp, seqid); status = PTR_ERR(task); if (IS_ERR(task)) goto out; @@ -3447,7 +3423,7 @@ static int _nfs4_do_setlk(struct nfs4_st int ret; dprintk("%s: begin!\n", __FUNCTION__); - data = nfs4_alloc_lockdata(fl, fl->fl_file->private_data, + data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file), fl->fl_u.nfs4_fl.owner); if (data == NULL) return -ENOMEM; @@ -3573,7 +3549,7 @@ nfs4_proc_lock(struct file *filp, int cm int status; /* verify open state */ - ctx = (struct nfs_open_context *)filp->private_data; + ctx = nfs_file_open_context(filp); state = ctx->state; if (request->fl_start < 0 || request->fl_end < 0) diff -puN fs/nfs/nfs4state.c~git-nfs fs/nfs/nfs4state.c --- a/fs/nfs/nfs4state.c~git-nfs +++ a/fs/nfs/nfs4state.c @@ -774,7 +774,7 @@ static int nfs4_reclaim_locks(struct nfs for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) { if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) continue; - if (((struct nfs_open_context *)fl->fl_file->private_data)->state != state) + if (nfs_file_open_context(fl->fl_file)->state != state) continue; status = ops->recover_lock(state, fl); if (status >= 0) diff -puN fs/nfs/nfs4xdr.c~git-nfs fs/nfs/nfs4xdr.c --- a/fs/nfs/nfs4xdr.c~git-nfs +++ a/fs/nfs/nfs4xdr.c @@ -376,10 +376,12 @@ static int nfs4_stat_to_errno(int); decode_locku_maxsz) #define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ - encode_access_maxsz) + encode_access_maxsz + \ + encode_getattr_maxsz) #define NFS4_dec_access_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ - decode_access_maxsz) + decode_access_maxsz + \ + decode_getattr_maxsz) #define NFS4_enc_getattr_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_getattr_maxsz) @@ -1376,14 +1378,20 @@ static int nfs4_xdr_enc_access(struct rp { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 3, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - if ((status = encode_putfh(&xdr, args->fh)) == 0) - status = encode_access(&xdr, args->access); + status = encode_putfh(&xdr, args->fh); + if (status != 0) + goto out; + status = encode_access(&xdr, args->access); + if (status != 0) + goto out; + status = encode_getfattr(&xdr, args->bitmask); +out: return status; } @@ -3783,8 +3791,13 @@ static int nfs4_xdr_dec_access(struct rp xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) goto out; - if ((status = decode_putfh(&xdr)) == 0) - status = decode_access(&xdr, res); + status = decode_putfh(&xdr); + if (status != 0) + goto out; + status = decode_access(&xdr, res); + if (status != 0) + goto out; + decode_getfattr(&xdr, res->fattr, res->server); out: return status; } diff -puN fs/nfs/read.c~git-nfs fs/nfs/read.c --- a/fs/nfs/read.c~git-nfs +++ a/fs/nfs/read.c @@ -497,8 +497,7 @@ int nfs_readpage(struct file *file, stru if (ctx == NULL) goto out_unlock; } else - ctx = get_nfs_open_context((struct nfs_open_context *) - file->private_data); + ctx = get_nfs_open_context(nfs_file_open_context(file)); error = nfs_readpage_async(ctx, inode, page); @@ -576,8 +575,7 @@ int nfs_readpages(struct file *filp, str if (desc.ctx == NULL) return -EBADF; } else - desc.ctx = get_nfs_open_context((struct nfs_open_context *) - filp->private_data); + desc.ctx = get_nfs_open_context(nfs_file_open_context(filp)); if (rsize < PAGE_CACHE_SIZE) nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); else diff -puN fs/nfs/super.c~git-nfs fs/nfs/super.c --- a/fs/nfs/super.c~git-nfs +++ a/fs/nfs/super.c @@ -506,8 +506,8 @@ static int nfs_show_options(struct seq_f nfs_show_mount_options(m, nfss, 0); - seq_puts(m, ",addr="); - seq_escape(m, nfss->nfs_client->cl_hostname, " \t\n\\"); + seq_printf(m, ",addr="NIPQUAD_FMT, + NIPQUAD(nfss->nfs_client->cl_addr.sin_addr)); return 0; } @@ -911,13 +911,13 @@ static int nfs_parse_mount_options(char kfree(string); switch (token) { - case Opt_udp: + case Opt_xprt_udp: mnt->flags &= ~NFS_MOUNT_TCP; mnt->nfs_server.protocol = IPPROTO_UDP; mnt->timeo = 7; mnt->retrans = 5; break; - case Opt_tcp: + case Opt_xprt_tcp: mnt->flags |= NFS_MOUNT_TCP; mnt->nfs_server.protocol = IPPROTO_TCP; mnt->timeo = 600; @@ -936,10 +936,10 @@ static int nfs_parse_mount_options(char kfree(string); switch (token) { - case Opt_udp: + case Opt_xprt_udp: mnt->mount_server.protocol = IPPROTO_UDP; break; - case Opt_tcp: + case Opt_xprt_tcp: mnt->mount_server.protocol = IPPROTO_TCP; break; default: @@ -1153,20 +1153,20 @@ static int nfs_validate_mount_data(struc c = strchr(dev_name, ':'); if (c == NULL) return -EINVAL; - len = c - dev_name - 1; + len = c - dev_name; if (len > sizeof(data->hostname)) - return -EINVAL; + return -ENAMETOOLONG; strncpy(data->hostname, dev_name, len); args.nfs_server.hostname = data->hostname; c++; if (strlen(c) > NFS_MAXPATHLEN) - return -EINVAL; + return -ENAMETOOLONG; args.nfs_server.export_path = c; status = nfs_try_mount(&args, mntfh); if (status) - return -EINVAL; + return status; /* * Translate to nfs_mount_data, which nfs_fill_super @@ -1668,7 +1668,7 @@ static int nfs4_validate_mount_data(stru /* while calculating len, pretend ':' is '\0' */ len = c - dev_name; if (len > NFS4_MAXNAMLEN) - return -EINVAL; + return -ENAMETOOLONG; *hostname = kzalloc(len, GFP_KERNEL); if (*hostname == NULL) return -ENOMEM; @@ -1677,7 +1677,7 @@ static int nfs4_validate_mount_data(stru c++; /* step over the ':' */ len = strlen(c); if (len > NFS4_MAXPATHLEN) - return -EINVAL; + return -ENAMETOOLONG; *mntpath = kzalloc(len + 1, GFP_KERNEL); if (*mntpath == NULL) return -ENOMEM; diff -puN fs/nfs/write.c~git-nfs fs/nfs/write.c --- a/fs/nfs/write.c~git-nfs +++ a/fs/nfs/write.c @@ -110,6 +110,13 @@ void nfs_writedata_release(void *wdata) nfs_writedata_free(wdata); } +static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) +{ + ctx->error = error; + smp_wmb(); + set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); +} + static struct nfs_page *nfs_page_find_request_locked(struct page *page) { struct nfs_page *req = NULL; @@ -243,10 +250,7 @@ static void nfs_end_page_writeback(struc /* * Find an associated nfs write request, and prepare to flush it out - * Returns 1 if there was no write request, or if the request was - * already tagged by nfs_set_page_dirty.Returns 0 if the request - * was not tagged. - * May also return an error if the user signalled nfs_wait_on_request(). + * May return an error if the user signalled nfs_wait_on_request(). */ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, struct page *page) @@ -261,7 +265,7 @@ static int nfs_page_async_flush(struct n req = nfs_page_find_request_locked(page); if (req == NULL) { spin_unlock(&inode->i_lock); - return 1; + return 0; } if (nfs_lock_request_dontget(req)) break; @@ -282,7 +286,7 @@ static int nfs_page_async_flush(struct n spin_unlock(&inode->i_lock); nfs_unlock_request(req); nfs_pageio_complete(pgio); - return 1; + return 0; } if (nfs_set_page_writeback(page) != 0) { spin_unlock(&inode->i_lock); @@ -290,70 +294,56 @@ static int nfs_page_async_flush(struct n } radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); - ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); spin_unlock(&inode->i_lock); nfs_pageio_add_request(pgio, req); - return ret; + return 0; } -/* - * Write an mmapped page to the server. - */ -static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) +static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) { - struct nfs_pageio_descriptor mypgio, *pgio; - struct nfs_open_context *ctx; struct inode *inode = page->mapping->host; - unsigned offset; - int err; nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); - if (wbc->for_writepages) - pgio = wbc->fs_private; - else { - nfs_pageio_init_write(&mypgio, inode, wb_priority(wbc)); - pgio = &mypgio; - } - nfs_pageio_cond_complete(pgio, page->index); + return nfs_page_async_flush(pgio, page); +} - err = nfs_page_async_flush(pgio, page); - if (err <= 0) - goto out; - err = 0; - offset = nfs_page_length(page); - if (!offset) - goto out; - - nfs_pageio_cond_complete(pgio, page->index); +/* + * Write an mmapped page to the server. + */ +static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) +{ + struct nfs_pageio_descriptor pgio; + int err; - ctx = nfs_find_open_context(inode, NULL, FMODE_WRITE); - if (ctx == NULL) { - err = -EBADF; - goto out; - } - err = nfs_writepage_setup(ctx, page, 0, offset); - put_nfs_open_context(ctx); - if (err != 0) - goto out; - err = nfs_page_async_flush(pgio, page); - if (err > 0) - err = 0; -out: - if (!wbc->for_writepages) - nfs_pageio_complete(pgio); - return err; + nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc)); + err = nfs_do_writepage(page, wbc, &pgio); + nfs_pageio_complete(&pgio); + if (err < 0) + return err; + if (pgio.pg_error < 0) + return pgio.pg_error; + return 0; } int nfs_writepage(struct page *page, struct writeback_control *wbc) { - int err; + int ret; - err = nfs_writepage_locked(page, wbc); + ret = nfs_writepage_locked(page, wbc); unlock_page(page); - return err; + return ret; +} + +static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data) +{ + int ret; + + ret = nfs_do_writepage(page, wbc, data); + unlock_page(page); + return ret; } int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) @@ -365,12 +355,11 @@ int nfs_writepages(struct address_space nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); nfs_pageio_init_write(&pgio, inode, wb_priority(wbc)); - wbc->fs_private = &pgio; - err = generic_writepages(mapping, wbc); + err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); nfs_pageio_complete(&pgio); - if (err) + if (err < 0) return err; - if (pgio.pg_error) + if (pgio.pg_error < 0) return pgio.pg_error; return 0; } @@ -395,8 +384,6 @@ static int nfs_inode_add_request(struct } SetPagePrivate(req->wb_page); set_page_private(req->wb_page, (unsigned long)req); - if (PageDirty(req->wb_page)) - set_bit(PG_NEED_FLUSH, &req->wb_flags); nfsi->npages++; kref_get(&req->wb_kref); return 0; @@ -416,8 +403,6 @@ static void nfs_inode_remove_request(str set_page_private(req->wb_page, 0); ClearPagePrivate(req->wb_page); radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); - if (test_and_clear_bit(PG_NEED_FLUSH, &req->wb_flags)) - __set_page_dirty_nobuffers(req->wb_page); nfsi->npages--; if (!nfsi->npages) { spin_unlock(&inode->i_lock); @@ -682,7 +667,7 @@ static struct nfs_page * nfs_update_requ int nfs_flush_incompatible(struct file *file, struct page *page) { - struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; + struct nfs_open_context *ctx = nfs_file_open_context(file); struct nfs_page *req; int do_flush, status; /* @@ -716,7 +701,7 @@ int nfs_flush_incompatible(struct file * int nfs_updatepage(struct file *file, struct page *page, unsigned int offset, unsigned int count) { - struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; + struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = page->mapping->host; int status = 0; @@ -967,7 +952,7 @@ static void nfs_writeback_done_partial(s if (task->tk_status < 0) { nfs_set_pageerror(page); - req->wb_context->error = task->tk_status; + nfs_context_set_write_error(req->wb_context, task->tk_status); dprintk(", error = %d\n", task->tk_status); goto out; } @@ -1030,7 +1015,7 @@ static void nfs_writeback_done_full(stru if (task->tk_status < 0) { nfs_set_pageerror(page); - req->wb_context->error = task->tk_status; + nfs_context_set_write_error(req->wb_context, task->tk_status); dprintk(", error = %d\n", task->tk_status); goto remove_request; } @@ -1244,7 +1229,7 @@ static void nfs_commit_done(struct rpc_t req->wb_bytes, (long long)req_offset(req)); if (task->tk_status < 0) { - req->wb_context->error = task->tk_status; + nfs_context_set_write_error(req->wb_context, task->tk_status); nfs_inode_remove_request(req); dprintk(", error = %d\n", task->tk_status); goto next; @@ -1347,52 +1332,95 @@ long nfs_sync_mapping_wait(struct addres return ret; } -/* - * flush the inode to disk. - */ -int nfs_wb_all(struct inode *inode) +static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how) +{ + int ret; + + ret = nfs_writepages(mapping, wbc); + if (ret < 0) + goto out; + ret = nfs_sync_mapping_wait(mapping, wbc, how); + if (ret < 0) + goto out; + return 0; +out: + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); + return ret; +} + +/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */ +static int nfs_write_mapping(struct address_space *mapping, int how) { - struct address_space *mapping = inode->i_mapping; struct writeback_control wbc = { .bdi = mapping->backing_dev_info, - .sync_mode = WB_SYNC_ALL, + .sync_mode = WB_SYNC_NONE, .nr_to_write = LONG_MAX, .for_writepages = 1, .range_cyclic = 1, }; int ret; - ret = nfs_writepages(mapping, &wbc); + ret = __nfs_write_mapping(mapping, &wbc, how); if (ret < 0) - goto out; - ret = nfs_sync_mapping_wait(mapping, &wbc, 0); - if (ret >= 0) - return 0; -out: - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - return ret; + return ret; + wbc.sync_mode = WB_SYNC_ALL; + return __nfs_write_mapping(mapping, &wbc, how); } -int nfs_sync_mapping_range(struct address_space *mapping, loff_t range_start, loff_t range_end, int how) +/* + * flush the inode to disk. + */ +int nfs_wb_all(struct inode *inode) { + return nfs_write_mapping(inode->i_mapping, 0); +} + +int nfs_wb_nocommit(struct inode *inode) +{ + return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT); +} + +int nfs_wb_page_cancel(struct inode *inode, struct page *page) +{ + struct nfs_page *req; + loff_t range_start = page_offset(page); + loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); struct writeback_control wbc = { - .bdi = mapping->backing_dev_info, + .bdi = page->mapping->backing_dev_info, .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, .range_start = range_start, .range_end = range_end, - .for_writepages = 1, }; - int ret; + int ret = 0; - ret = nfs_writepages(mapping, &wbc); - if (ret < 0) - goto out; - ret = nfs_sync_mapping_wait(mapping, &wbc, how); - if (ret >= 0) + BUG_ON(!PageLocked(page)); + for (;;) { + req = nfs_page_find_request(page); + if (req == NULL) + goto out; + if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { + nfs_release_request(req); + break; + } + if (nfs_lock_request_dontget(req)) { + nfs_inode_remove_request(req); + /* + * In case nfs_inode_remove_request has marked the + * page as being dirty + */ + cancel_dirty_page(page, PAGE_CACHE_SIZE); + nfs_unlock_request(req); + break; + } + ret = nfs_wait_on_request(req); + if (ret < 0) + goto out; + } + if (!PagePrivate(page)) return 0; + ret = nfs_sync_mapping_wait(page->mapping, &wbc, FLUSH_INVALIDATE); out: - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); return ret; } @@ -1433,35 +1461,6 @@ int nfs_wb_page(struct inode *inode, str return nfs_wb_page_priority(inode, page, FLUSH_STABLE); } -int nfs_set_page_dirty(struct page *page) -{ - struct address_space *mapping = page->mapping; - struct inode *inode; - struct nfs_page *req; - int ret; - - if (!mapping) - goto out_raced; - inode = mapping->host; - if (!inode) - goto out_raced; - spin_lock(&inode->i_lock); - req = nfs_page_find_request_locked(page); - if (req != NULL) { - /* Mark any existing write requests for flushing */ - ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags); - spin_unlock(&inode->i_lock); - nfs_release_request(req); - return ret; - } - ret = __set_page_dirty_nobuffers(page); - spin_unlock(&inode->i_lock); - return ret; -out_raced: - return !TestSetPageDirty(page); -} - - int __init nfs_init_writepagecache(void) { nfs_wdata_cachep = kmem_cache_create("nfs_write_data", diff -puN include/linux/jiffies.h~git-nfs include/linux/jiffies.h --- a/include/linux/jiffies.h~git-nfs +++ a/include/linux/jiffies.h @@ -115,6 +115,10 @@ static inline u64 get_jiffies_64(void) ((long)(a) - (long)(b) >= 0)) #define time_before_eq(a,b) time_after_eq(b,a) +#define time_in_range(a,b,c) \ + (time_after_eq(a,b) && \ + time_before_eq(a,c)) + /* Same as above, but does so with platform independent 64bit types. * These must be used when utilizing jiffies_64 (i.e. return value of * get_jiffies_64() */ diff -puN include/linux/nfs_fs.h~git-nfs include/linux/nfs_fs.h --- a/include/linux/nfs_fs.h~git-nfs +++ a/include/linux/nfs_fs.h @@ -77,6 +77,9 @@ struct nfs_open_context { struct nfs4_state *state; fl_owner_t lockowner; int mode; + + unsigned long flags; +#define NFS_CONTEXT_ERROR_WRITE (0) int error; struct list_head list; @@ -289,9 +292,6 @@ extern int nfs_refresh_inode(struct inod extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int nfs_permission(struct inode *, int, struct nameidata *); -extern int nfs_access_get_cached(struct inode *, struct rpc_cred *, struct nfs_access_entry *); -extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *); -extern void nfs_access_zap_cache(struct inode *inode); extern int nfs_open(struct inode *, struct file *); extern int nfs_release(struct inode *, struct file *); extern int nfs_attribute_timeout(struct inode *inode); @@ -328,14 +328,15 @@ extern const struct inode_operations nfs extern const struct file_operations nfs_file_operations; extern const struct address_space_operations nfs_file_aops; -static inline struct rpc_cred *nfs_file_cred(struct file *file) +static inline struct nfs_open_context *nfs_file_open_context(struct file *filp) { - if (file != NULL) { - struct nfs_open_context *ctx; + return filp->private_data; +} - ctx = (struct nfs_open_context*)file->private_data; - return ctx->cred; - } +static inline struct rpc_cred *nfs_file_cred(struct file *file) +{ + if (file != NULL) + return nfs_file_open_context(file)->cred; return NULL; } @@ -378,6 +379,8 @@ extern const struct file_operations nfs_ extern struct dentry_operations nfs_dentry_operations; extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr); +extern int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags); +extern void nfs_access_zap_cache(struct inode *inode); /* * linux/fs/nfs/symlink.c @@ -420,17 +423,17 @@ extern int nfs_flush_incompatible(struc extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); extern void nfs_writedata_release(void *); -extern int nfs_set_page_dirty(struct page *); /* * Try to write back everything synchronously (but check the * return value!) */ extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int); -extern int nfs_sync_mapping_range(struct address_space *, loff_t, loff_t, int); extern int nfs_wb_all(struct inode *inode); +extern int nfs_wb_nocommit(struct inode *inode); extern int nfs_wb_page(struct inode *inode, struct page* page); extern int nfs_wb_page_priority(struct inode *inode, struct page* page, int how); +extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) extern int nfs_commit_inode(struct inode *, int); extern struct nfs_write_data *nfs_commit_alloc(void); diff -puN include/linux/nfs_page.h~git-nfs include/linux/nfs_page.h --- a/include/linux/nfs_page.h~git-nfs +++ a/include/linux/nfs_page.h @@ -30,7 +30,6 @@ #define PG_BUSY 0 #define PG_NEED_COMMIT 1 #define PG_NEED_RESCHED 2 -#define PG_NEED_FLUSH 3 struct nfs_inode; struct nfs_page { diff -puN include/linux/nfs_xdr.h~git-nfs include/linux/nfs_xdr.h --- a/include/linux/nfs_xdr.h~git-nfs +++ a/include/linux/nfs_xdr.h @@ -538,10 +538,13 @@ typedef u64 clientid4; struct nfs4_accessargs { const struct nfs_fh * fh; + const u32 * bitmask; u32 access; }; struct nfs4_accessres { + const struct nfs_server * server; + struct nfs_fattr * fattr; u32 supported; u32 access; }; diff -puN include/linux/sunrpc/xprt.h~git-nfs include/linux/sunrpc/xprt.h --- a/include/linux/sunrpc/xprt.h~git-nfs +++ a/include/linux/sunrpc/xprt.h @@ -53,6 +53,9 @@ enum rpc_display_format_t { RPC_DISPLAY_PORT, RPC_DISPLAY_PROTO, RPC_DISPLAY_ALL, + RPC_DISPLAY_HEX_ADDR, + RPC_DISPLAY_HEX_PORT, + RPC_DISPLAY_UNIVERSAL_ADDR, RPC_DISPLAY_MAX, }; diff -puN include/linux/writeback.h~git-nfs include/linux/writeback.h --- a/include/linux/writeback.h~git-nfs +++ a/include/linux/writeback.h @@ -61,8 +61,6 @@ struct writeback_control { unsigned for_reclaim:1; /* Invoked from the page allocator */ unsigned for_writepages:1; /* This is a writepages() call */ unsigned range_cyclic:1; /* range_start is cyclic */ - - void *fs_private; /* For use by ->writepages() */ }; /* diff -puN kernel/auditsc.c~git-nfs kernel/auditsc.c --- a/kernel/auditsc.c~git-nfs +++ a/kernel/auditsc.c @@ -1746,6 +1746,7 @@ add_names: context->names[idx].ino = (unsigned long)-1; } } +EXPORT_SYMBOL_GPL(__audit_inode_child); /** * auditsc_get_stamp - get local copies of audit_context values diff -puN net/sunrpc/rpc_pipe.c~git-nfs net/sunrpc/rpc_pipe.c --- a/net/sunrpc/rpc_pipe.c~git-nfs +++ a/net/sunrpc/rpc_pipe.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include @@ -585,6 +585,7 @@ rpc_populate(struct dentry *parent, if (S_ISDIR(mode)) inc_nlink(dir); d_add(dentry, inode); + fsnotify_create(dir, dentry); } mutex_unlock(&dir->i_mutex); return 0; @@ -606,7 +607,7 @@ __rpc_mkdir(struct inode *dir, struct de inode->i_ino = iunique(dir->i_sb, 100); d_instantiate(dentry, inode); inc_nlink(dir); - inode_dir_notify(dir, DN_CREATE); + fsnotify_mkdir(dir, dentry); return 0; out_err: printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n", @@ -748,7 +749,7 @@ rpc_mkpipe(struct dentry *parent, const rpci->flags = flags; rpci->ops = ops; rpci->nkern_readwriters = 1; - inode_dir_notify(dir, DN_CREATE); + fsnotify_create(dir, dentry); dget(dentry); out: mutex_unlock(&dir->i_mutex); diff -puN net/sunrpc/rpcb_clnt.c~git-nfs net/sunrpc/rpcb_clnt.c --- a/net/sunrpc/rpcb_clnt.c~git-nfs +++ a/net/sunrpc/rpcb_clnt.c @@ -16,6 +16,8 @@ #include #include +#include +#include #include #include @@ -137,10 +139,13 @@ struct rpcbind_args { static struct rpc_procinfo rpcb_procedures2[]; static struct rpc_procinfo rpcb_procedures3[]; -static struct rpcb_info { +struct rpcb_info { int rpc_vers; struct rpc_procinfo * rpc_proc; -} rpcb_next_version[]; +}; + +static struct rpcb_info rpcb_next_version[]; +static struct rpcb_info rpcb_next_version6[]; static void rpcb_getport_prepare(struct rpc_task *task, void *calldata) { @@ -190,7 +195,17 @@ static struct rpc_clnt *rpcb_create(char RPC_CLNT_CREATE_INTR), }; - ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT); + switch (srvaddr->sa_family) { + case AF_INET: + ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT); + break; + case AF_INET6: + ((struct sockaddr_in6 *)srvaddr)->sin6_port = htons(RPCBIND_PORT); + break; + default: + return NULL; + } + if (!privileged) args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; return rpc_create(&args); @@ -316,6 +331,7 @@ void rpcb_getport_async(struct rpc_task struct rpc_task *child; struct sockaddr addr; int status; + struct rpcb_info *info; dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", task->tk_pid, __FUNCTION__, @@ -343,18 +359,43 @@ void rpcb_getport_async(struct rpc_task goto bailout_nofree; } - if (rpcb_next_version[xprt->bind_index].rpc_proc == NULL) { + rpc_peeraddr(clnt, (void *)&addr, sizeof(addr)); + + /* Don't ever use rpcbind v2 for AF_INET6 requests */ + switch (addr.sa_family) { + case AF_INET: + info = rpcb_next_version; + break; + case AF_INET6: + info = rpcb_next_version6; + break; + default: + status = -EAFNOSUPPORT; + dprintk("RPC: %5u %s: bad address family\n", + task->tk_pid, __FUNCTION__); + goto bailout_nofree; + } + if (info[xprt->bind_index].rpc_proc == NULL) { xprt->bind_index = 0; status = -EACCES; /* tell caller to try again later */ dprintk("RPC: %5u %s: no more getport versions available\n", task->tk_pid, __FUNCTION__); goto bailout_nofree; } - bind_version = rpcb_next_version[xprt->bind_index].rpc_vers; + bind_version = info[xprt->bind_index].rpc_vers; dprintk("RPC: %5u %s: trying rpcbind version %u\n", task->tk_pid, __FUNCTION__, bind_version); + rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, + bind_version, 0); + if (IS_ERR(rpcb_clnt)) { + status = PTR_ERR(rpcb_clnt); + dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", + task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt)); + goto bailout_nofree; + } + map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC); if (!map) { status = -ENOMEM; @@ -369,26 +410,18 @@ void rpcb_getport_async(struct rpc_task map->r_xprt = xprt_get(xprt); map->r_netid = (xprt->prot == IPPROTO_TCP) ? RPCB_NETID_TCP : RPCB_NETID_UDP; - memcpy(&map->r_addr, rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR), - sizeof(map->r_addr)); + memcpy(&map->r_addr, + rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR), + sizeof(map->r_addr)); map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */ - rpc_peeraddr(clnt, (void *)&addr, sizeof(addr)); - rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, bind_version, 0); - if (IS_ERR(rpcb_clnt)) { - status = PTR_ERR(rpcb_clnt); - dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", - task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt)); - goto bailout; - } - child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map); rpc_release_client(rpcb_clnt); if (IS_ERR(child)) { status = -EIO; dprintk("RPC: %5u %s: rpc_run_task failed\n", task->tk_pid, __FUNCTION__); - goto bailout_nofree; + goto bailout; } rpc_put_task(child); @@ -490,10 +523,11 @@ static int rpcb_decode_getaddr(struct rp unsigned short *portp) { char *addr; - int addr_len, c, i, f, first, val; + u32 addr_len; + int c, i, f, first, val; *portp = 0; - addr_len = (unsigned int) ntohl(*p++); + addr_len = ntohl(*p++); if (addr_len > RPCB_MAXADDRLEN) /* sanity */ return -EINVAL; @@ -593,6 +627,14 @@ static struct rpcb_info rpcb_next_versio { 0, NULL }, }; +static struct rpcb_info rpcb_next_version6[] = { +#ifdef CONFIG_SUNRPC_BIND34 + { 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] }, + { 3, &rpcb_procedures3[RPCBPROC_GETADDR] }, +#endif + { 0, NULL }, +}; + static struct rpc_version rpcb_version2 = { .number = 2, .nrprocs = RPCB_HIGHPROC_2, diff -puN net/sunrpc/xprtsock.c~git-nfs net/sunrpc/xprtsock.c --- a/net/sunrpc/xprtsock.c~git-nfs +++ a/net/sunrpc/xprtsock.c @@ -13,6 +13,9 @@ * (C) 1999 Trond Myklebust * * IP socket transport implementation, (C) 2005 Chuck Lever + * + * IPv6 support contributed by Gilles Quillard, Bull Open Source, 2005. + * */ #include @@ -260,14 +263,29 @@ struct sock_xprt { #define TCP_RCV_COPY_XID (1UL << 2) #define TCP_RCV_COPY_DATA (1UL << 3) -static void xs_format_peer_addresses(struct rpc_xprt *xprt) +static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt) +{ + return (struct sockaddr *) &xprt->addr; +} + +static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt) +{ + return (struct sockaddr_in *) &xprt->addr; +} + +static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt) +{ + return (struct sockaddr_in6 *) &xprt->addr; +} + +static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt) { - struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; + struct sockaddr_in *addr = xs_addr_in(xprt); char *buf; buf = kzalloc(20, GFP_KERNEL); if (buf) { - snprintf(buf, 20, "%u.%u.%u.%u", + snprintf(buf, 20, NIPQUAD_FMT, NIPQUAD(addr->sin_addr.s_addr)); } xprt->address_strings[RPC_DISPLAY_ADDR] = buf; @@ -279,26 +297,115 @@ static void xs_format_peer_addresses(str } xprt->address_strings[RPC_DISPLAY_PORT] = buf; - if (xprt->prot == IPPROTO_UDP) - xprt->address_strings[RPC_DISPLAY_PROTO] = "udp"; - else - xprt->address_strings[RPC_DISPLAY_PROTO] = "tcp"; + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + if (xprt->prot == IPPROTO_UDP) + snprintf(buf, 8, "udp"); + else + snprintf(buf, 8, "tcp"); + } + xprt->address_strings[RPC_DISPLAY_PROTO] = buf; buf = kzalloc(48, GFP_KERNEL); if (buf) { - snprintf(buf, 48, "addr=%u.%u.%u.%u port=%u proto=%s", + snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s", NIPQUAD(addr->sin_addr.s_addr), ntohs(addr->sin_port), xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); } xprt->address_strings[RPC_DISPLAY_ALL] = buf; + + buf = kzalloc(10, GFP_KERNEL); + if (buf) { + snprintf(buf, 10, "%02x%02x%02x%02x", + NIPQUAD(addr->sin_addr.s_addr)); + } + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + snprintf(buf, 8, "%4hx", + ntohs(addr->sin_port)); + } + xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; + + buf = kzalloc(30, GFP_KERNEL); + if (buf) { + snprintf(buf, 30, NIPQUAD_FMT".%u.%u", + NIPQUAD(addr->sin_addr.s_addr), + ntohs(addr->sin_port) >> 8, + ntohs(addr->sin_port) & 0xff); + } + xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; +} + +static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt) +{ + struct sockaddr_in6 *addr = xs_addr_in6(xprt); + char *buf; + + buf = kzalloc(40, GFP_KERNEL); + if (buf) { + snprintf(buf, 40, NIP6_FMT, + NIP6(addr->sin6_addr)); + } + xprt->address_strings[RPC_DISPLAY_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + snprintf(buf, 8, "%u", + ntohs(addr->sin6_port)); + } + xprt->address_strings[RPC_DISPLAY_PORT] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + if (xprt->prot == IPPROTO_UDP) + snprintf(buf, 8, "udp"); + else + snprintf(buf, 8, "tcp"); + } + xprt->address_strings[RPC_DISPLAY_PROTO] = buf; + + buf = kzalloc(64, GFP_KERNEL); + if (buf) { + snprintf(buf, 64, "addr="NIP6_FMT" port=%u proto=%s", + NIP6(addr->sin6_addr), + ntohs(addr->sin6_port), + xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); + } + xprt->address_strings[RPC_DISPLAY_ALL] = buf; + + buf = kzalloc(36, GFP_KERNEL); + if (buf) { + snprintf(buf, 36, NIP6_SEQFMT, + NIP6(addr->sin6_addr)); + } + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + snprintf(buf, 8, "%4hx", + ntohs(addr->sin6_port)); + } + xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; + + buf = kzalloc(50, GFP_KERNEL); + if (buf) { + snprintf(buf, 50, NIP6_FMT".%u.%u", + NIP6(addr->sin6_addr), + ntohs(addr->sin6_port) >> 8, + ntohs(addr->sin6_port) & 0xff); + } + xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; } static void xs_free_peer_addresses(struct rpc_xprt *xprt) { - kfree(xprt->address_strings[RPC_DISPLAY_ADDR]); - kfree(xprt->address_strings[RPC_DISPLAY_PORT]); - kfree(xprt->address_strings[RPC_DISPLAY_ALL]); + int i; + + for (i = 0; i < RPC_DISPLAY_MAX; i++) + kfree(xprt->address_strings[i]); } #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) @@ -463,7 +570,7 @@ static int xs_udp_send_request(struct rp req->rq_xtime = jiffies; status = xs_sendpages(transport->sock, - (struct sockaddr *) &xprt->addr, + xs_addr(xprt), xprt->addrlen, xdr, req->rq_bytes_sent); @@ -523,7 +630,8 @@ static int xs_tcp_send_request(struct rp struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *xdr = &req->rq_snd_buf; - int status, retry = 0; + int status; + unsigned int retry = 0; xs_encode_tcp_record_marker(&req->rq_snd_buf); @@ -1139,14 +1247,23 @@ static unsigned short xs_get_random_port */ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) { - struct sockaddr_in *sap = (struct sockaddr_in *) &xprt->addr; + struct sockaddr *addr = xs_addr(xprt); dprintk("RPC: setting port for xprt %p to %u\n", xprt, port); - sap->sin_port = htons(port); + switch (addr->sa_family) { + case AF_INET: + ((struct sockaddr_in *)addr)->sin_port = htons(port); + break; + case AF_INET6: + ((struct sockaddr_in6 *)addr)->sin6_port = htons(port); + break; + default: + BUG(); + } } -static int xs_bind(struct sock_xprt *transport, struct socket *sock) +static int xs_bind4(struct sock_xprt *transport, struct socket *sock) { struct sockaddr_in myaddr = { .sin_family = AF_INET, @@ -1174,8 +1291,42 @@ static int xs_bind(struct sock_xprt *tra else port--; } while (err == -EADDRINUSE && port != transport->port); - dprintk("RPC: xs_bind "NIPQUAD_FMT":%u: %s (%d)\n", - NIPQUAD(myaddr.sin_addr), port, err ? "failed" : "ok", err); + dprintk("RPC: %s "NIPQUAD_FMT":%u: %s (%d)\n", + __FUNCTION__, NIPQUAD(myaddr.sin_addr), + port, err ? "failed" : "ok", err); + return err; +} + +static int xs_bind6(struct sock_xprt *transport, struct socket *sock) +{ + struct sockaddr_in6 myaddr = { + .sin6_family = AF_INET6, + }; + struct sockaddr_in6 *sa; + int err; + unsigned short port = transport->port; + + if (!transport->xprt.resvport) + port = 0; + sa = (struct sockaddr_in6 *)&transport->addr; + myaddr.sin6_addr = sa->sin6_addr; + do { + myaddr.sin6_port = htons(port); + err = kernel_bind(sock, (struct sockaddr *) &myaddr, + sizeof(myaddr)); + if (!transport->xprt.resvport) + break; + if (err == 0) { + transport->port = port; + break; + } + if (port <= xprt_min_resvport) + port = xprt_max_resvport; + else + port--; + } while (err == -EADDRINUSE && port != transport->port); + dprintk("RPC: xs_bind6 "NIP6_FMT":%u: %s (%d)\n", + NIP6(myaddr.sin6_addr), port, err ? "failed" : "ok", err); return err; } @@ -1183,38 +1334,69 @@ static int xs_bind(struct sock_xprt *tra static struct lock_class_key xs_key[2]; static struct lock_class_key xs_slock_key[2]; -static inline void xs_reclassify_socket(struct socket *sock) +static inline void xs_reclassify_socket4(struct socket *sock) { struct sock *sk = sock->sk; + BUG_ON(sk->sk_lock.owner != NULL); - switch (sk->sk_family) { - case AF_INET: - sock_lock_init_class_and_name(sk, "slock-AF_INET-NFS", - &xs_slock_key[0], "sk_lock-AF_INET-NFS", &xs_key[0]); - break; + sock_lock_init_class_and_name(sk, "slock-AF_INET-RPC", + &xs_slock_key[0], "sk_lock-AF_INET-RPC", &xs_key[0]); +} - case AF_INET6: - sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFS", - &xs_slock_key[1], "sk_lock-AF_INET6-NFS", &xs_key[1]); - break; +static inline void xs_reclassify_socket6(struct socket *sock) +{ + struct sock *sk = sock->sk; - default: - BUG(); - } + BUG_ON(sk->sk_lock.owner != NULL); + sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC", + &xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]); } #else -static inline void xs_reclassify_socket(struct socket *sock) +static inline void xs_reclassify_socket4(struct socket *sock) +{ +} + +static inline void xs_reclassify_socket6(struct socket *sock) { } #endif +static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + + if (!transport->inet) { + struct sock *sk = sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + + sk->sk_user_data = xprt; + transport->old_data_ready = sk->sk_data_ready; + transport->old_state_change = sk->sk_state_change; + transport->old_write_space = sk->sk_write_space; + sk->sk_data_ready = xs_udp_data_ready; + sk->sk_write_space = xs_udp_write_space; + sk->sk_no_check = UDP_CSUM_NORCV; + sk->sk_allocation = GFP_ATOMIC; + + xprt_set_connected(xprt); + + /* Reset to new socket */ + transport->sock = sock; + transport->inet = sk; + + write_unlock_bh(&sk->sk_callback_lock); + } + xs_udp_do_set_buffer_size(xprt); +} + /** - * xs_udp_connect_worker - set up a UDP socket + * xs_udp_connect_worker4 - set up a UDP socket * @work: RPC transport to connect * * Invoked by a work queue tasklet. */ -static void xs_udp_connect_worker(struct work_struct *work) +static void xs_udp_connect_worker4(struct work_struct *work) { struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); @@ -1232,9 +1414,9 @@ static void xs_udp_connect_worker(struct dprintk("RPC: can't create UDP transport socket (%d).\n", -err); goto out; } - xs_reclassify_socket(sock); + xs_reclassify_socket4(sock); - if (xs_bind(transport, sock)) { + if (xs_bind4(transport, sock)) { sock_release(sock); goto out; } @@ -1242,29 +1424,48 @@ static void xs_udp_connect_worker(struct dprintk("RPC: worker connecting xprt %p to address: %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ALL]); - if (!transport->inet) { - struct sock *sk = sock->sk; + xs_udp_finish_connecting(xprt, sock); + status = 0; +out: + xprt_wake_pending_tasks(xprt, status); + xprt_clear_connecting(xprt); +} - write_lock_bh(&sk->sk_callback_lock); +/** + * xs_udp_connect_worker6 - set up a UDP socket + * @work: RPC transport to connect + * + * Invoked by a work queue tasklet. + */ +static void xs_udp_connect_worker6(struct work_struct *work) +{ + struct sock_xprt *transport = + container_of(work, struct sock_xprt, connect_worker.work); + struct rpc_xprt *xprt = &transport->xprt; + struct socket *sock = transport->sock; + int err, status = -EIO; - sk->sk_user_data = xprt; - transport->old_data_ready = sk->sk_data_ready; - transport->old_state_change = sk->sk_state_change; - transport->old_write_space = sk->sk_write_space; - sk->sk_data_ready = xs_udp_data_ready; - sk->sk_write_space = xs_udp_write_space; - sk->sk_no_check = UDP_CSUM_NORCV; - sk->sk_allocation = GFP_ATOMIC; + if (xprt->shutdown || !xprt_bound(xprt)) + goto out; - xprt_set_connected(xprt); + /* Start by resetting any existing state */ + xs_close(xprt); - /* Reset to new socket */ - transport->sock = sock; - transport->inet = sk; + if ((err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { + dprintk("RPC: can't create UDP transport socket (%d).\n", -err); + goto out; + } + xs_reclassify_socket6(sock); - write_unlock_bh(&sk->sk_callback_lock); + if (xs_bind6(transport, sock) < 0) { + sock_release(sock); + goto out; } - xs_udp_do_set_buffer_size(xprt); + + dprintk("RPC: worker connecting xprt %p to address: %s\n", + xprt, xprt->address_strings[RPC_DISPLAY_ALL]); + + xs_udp_finish_connecting(xprt, sock); status = 0; out: xprt_wake_pending_tasks(xprt, status); @@ -1295,13 +1496,52 @@ static void xs_tcp_reuse_connection(stru result); } +static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + + if (!transport->inet) { + struct sock *sk = sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + + sk->sk_user_data = xprt; + transport->old_data_ready = sk->sk_data_ready; + transport->old_state_change = sk->sk_state_change; + transport->old_write_space = sk->sk_write_space; + sk->sk_data_ready = xs_tcp_data_ready; + sk->sk_state_change = xs_tcp_state_change; + sk->sk_write_space = xs_tcp_write_space; + sk->sk_allocation = GFP_ATOMIC; + + /* socket options */ + sk->sk_userlocks |= SOCK_BINDPORT_LOCK; + sock_reset_flag(sk, SOCK_LINGER); + tcp_sk(sk)->linger2 = 0; + tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; + + xprt_clear_connected(xprt); + + /* Reset to new socket */ + transport->sock = sock; + transport->inet = sk; + + write_unlock_bh(&sk->sk_callback_lock); + } + + /* Tell the socket layer to start connecting... */ + xprt->stat.connect_count++; + xprt->stat.connect_start = jiffies; + return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); +} + /** - * xs_tcp_connect_worker - connect a TCP socket to a remote endpoint + * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint * @work: RPC transport to connect * * Invoked by a work queue tasklet. */ -static void xs_tcp_connect_worker(struct work_struct *work) +static void xs_tcp_connect_worker4(struct work_struct *work) { struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); @@ -1315,13 +1555,12 @@ static void xs_tcp_connect_worker(struct if (!sock) { /* start from scratch */ if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { - dprintk("RPC: can't create TCP transport " - "socket (%d).\n", -err); + dprintk("RPC: can't create TCP transport socket (%d).\n", -err); goto out; } - xs_reclassify_socket(sock); + xs_reclassify_socket4(sock); - if (xs_bind(transport, sock)) { + if (xs_bind4(transport, sock) < 0) { sock_release(sock); goto out; } @@ -1332,43 +1571,70 @@ static void xs_tcp_connect_worker(struct dprintk("RPC: worker connecting xprt %p to address: %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ALL]); - if (!transport->inet) { - struct sock *sk = sock->sk; - - write_lock_bh(&sk->sk_callback_lock); + status = xs_tcp_finish_connecting(xprt, sock); + dprintk("RPC: %p connect status %d connected %d sock state %d\n", + xprt, -status, xprt_connected(xprt), + sock->sk->sk_state); + if (status < 0) { + switch (status) { + case -EINPROGRESS: + case -EALREADY: + goto out_clear; + case -ECONNREFUSED: + case -ECONNRESET: + /* retry with existing socket, after a delay */ + break; + default: + /* get rid of existing socket, and retry */ + xs_close(xprt); + break; + } + } +out: + xprt_wake_pending_tasks(xprt, status); +out_clear: + xprt_clear_connecting(xprt); +} - sk->sk_user_data = xprt; - transport->old_data_ready = sk->sk_data_ready; - transport->old_state_change = sk->sk_state_change; - transport->old_write_space = sk->sk_write_space; - sk->sk_data_ready = xs_tcp_data_ready; - sk->sk_state_change = xs_tcp_state_change; - sk->sk_write_space = xs_tcp_write_space; - sk->sk_allocation = GFP_ATOMIC; +/** + * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint + * @work: RPC transport to connect + * + * Invoked by a work queue tasklet. + */ +static void xs_tcp_connect_worker6(struct work_struct *work) +{ + struct sock_xprt *transport = + container_of(work, struct sock_xprt, connect_worker.work); + struct rpc_xprt *xprt = &transport->xprt; + struct socket *sock = transport->sock; + int err, status = -EIO; - /* socket options */ - sk->sk_userlocks |= SOCK_BINDPORT_LOCK; - sock_reset_flag(sk, SOCK_LINGER); - tcp_sk(sk)->linger2 = 0; - tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; + if (xprt->shutdown || !xprt_bound(xprt)) + goto out; - xprt_clear_connected(xprt); + if (!sock) { + /* start from scratch */ + if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { + dprintk("RPC: can't create TCP transport socket (%d).\n", -err); + goto out; + } + xs_reclassify_socket6(sock); - /* Reset to new socket */ - transport->sock = sock; - transport->inet = sk; + if (xs_bind6(transport, sock) < 0) { + sock_release(sock); + goto out; + } + } else + /* "close" the socket, preserving the local port */ + xs_tcp_reuse_connection(xprt); - write_unlock_bh(&sk->sk_callback_lock); - } + dprintk("RPC: worker connecting xprt %p to address: %s\n", + xprt, xprt->address_strings[RPC_DISPLAY_ALL]); - /* Tell the socket layer to start connecting... */ - xprt->stat.connect_count++; - xprt->stat.connect_start = jiffies; - status = kernel_connect(sock, (struct sockaddr *) &xprt->addr, - xprt->addrlen, O_NONBLOCK); + status = xs_tcp_finish_connecting(xprt, sock); dprintk("RPC: %p connect status %d connected %d sock state %d\n", - xprt, -status, xprt_connected(xprt), - sock->sk->sk_state); + xprt, -status, xprt_connected(xprt), sock->sk->sk_state); if (status < 0) { switch (status) { case -EINPROGRESS: @@ -1551,6 +1817,7 @@ static struct rpc_xprt *xs_setup_xprt(st */ struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args) { + struct sockaddr *addr = args->dstaddr; struct rpc_xprt *xprt; struct sock_xprt *transport; @@ -1559,15 +1826,11 @@ struct rpc_xprt *xs_setup_udp(struct rpc return xprt; transport = container_of(xprt, struct sock_xprt, xprt); - if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0) - xprt_set_bound(xprt); - xprt->prot = IPPROTO_UDP; xprt->tsh_size = 0; /* XXX: header size can vary due to auth type, IPv6, etc. */ xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); - INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker); xprt->bind_timeout = XS_BIND_TO; xprt->connect_timeout = XS_UDP_CONN_TO; xprt->reestablish_timeout = XS_UDP_REEST_TO; @@ -1580,7 +1843,28 @@ struct rpc_xprt *xs_setup_udp(struct rpc else xprt_set_timeout(&xprt->timeout, 5, 5 * HZ); - xs_format_peer_addresses(xprt); + switch (addr->sa_family) { + case AF_INET: + if (((struct sockaddr_in *)addr)->sin_port != htons(0)) + xprt_set_bound(xprt); + + INIT_DELAYED_WORK(&transport->connect_worker, + xs_udp_connect_worker4); + xs_format_ipv4_peer_addresses(xprt); + break; + case AF_INET6: + if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) + xprt_set_bound(xprt); + + INIT_DELAYED_WORK(&transport->connect_worker, + xs_udp_connect_worker6); + xs_format_ipv6_peer_addresses(xprt); + break; + default: + kfree(xprt); + return NULL; + } + dprintk("RPC: set up transport to address %s\n", xprt->address_strings[RPC_DISPLAY_ALL]); @@ -1594,6 +1878,7 @@ struct rpc_xprt *xs_setup_udp(struct rpc */ struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args) { + struct sockaddr *addr = args->dstaddr; struct rpc_xprt *xprt; struct sock_xprt *transport; @@ -1602,14 +1887,10 @@ struct rpc_xprt *xs_setup_tcp(struct rpc return xprt; transport = container_of(xprt, struct sock_xprt, xprt); - if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0) - xprt_set_bound(xprt); - xprt->prot = IPPROTO_TCP; xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; - INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker); xprt->bind_timeout = XS_BIND_TO; xprt->connect_timeout = XS_TCP_CONN_TO; xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; @@ -1622,7 +1903,26 @@ struct rpc_xprt *xs_setup_tcp(struct rpc else xprt_set_timeout(&xprt->timeout, 2, 60 * HZ); - xs_format_peer_addresses(xprt); + switch (addr->sa_family) { + case AF_INET: + if (((struct sockaddr_in *)addr)->sin_port != htons(0)) + xprt_set_bound(xprt); + + INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker4); + xs_format_ipv4_peer_addresses(xprt); + break; + case AF_INET6: + if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) + xprt_set_bound(xprt); + + INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker6); + xs_format_ipv6_peer_addresses(xprt); + break; + default: + kfree(xprt); + return NULL; + } + dprintk("RPC: set up transport to address %s\n", xprt->address_strings[RPC_DISPLAY_ALL]); _