GIT 1e28855867c31925e2ffa3a8acf16bb3ad8b634c git://git.linux-nfs.org/pub/linux/nfs-2.6.git

commit 1e28855867c31925e2ffa3a8acf16bb3ad8b634c
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:33 2006 -0500

    lockd: Add helper for *_RES callbacks
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit ca91c90591da17cbd97faf0d40ea7f09c41c518d
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:33 2006 -0500

    NLM: Add nlmclnt_release_call
    
    Add a helper function to simplify the freeing of NLM client requests.
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 5410b7146ab304f3d5042643d1b58346b008a702
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:32 2006 -0500

    NLM: Fix nlmclnt_test to not copy private part of locks
    
    The struct file_lock does not carry a properly initialised lock,
    so don't copy it as if it were.
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit b62279dbb338a28fcfb3db5503f343b47ed0f270
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:32 2006 -0500

    NLM: Simplify client locks
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 72107867e60d69f192131324bce5bbba35ca84f5
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:31 2006 -0500

    NFS: O_DIRECT needs to use a completion
    
    Now that we have aio writes, it is possible for dreq->outstanding to be
    zero, but for the I/O not to have completed. Convert struct nfs_direct_req
    to use a completion to signal when the I/O is done.
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit a429e3e20b09ed137c723ac014fe9c29cd566798
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:31 2006 -0500

    NFS: Clean up nfs_get_user_pages
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit b06c6c098078bd5501a389407b3fd0854f9eadb6
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:30 2006 -0500

    NFS: fix compiler warnings on 64-bit platforms
    
    Introduced by NFS aio+dio patches.
    
    Test plan:
    Compile kernel with CONFIG_NFS enabled on 64-bit hardware.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 5fe677942e79c4b0bf1c6937a0827f74a6f35e46
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:30 2006 -0500

    SUNRPC: fix compile warnings on 64-bit platforms
    
    Introduced by NFS metrics patch.
    
    Test plan:
    Compile kernel with CONFIG_NFS enabled on a 64-bit platform.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit fea3a1a5ebfd99d6175f642153b47e17e241c4d1
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:29 2006 -0500

    NLM: nlmclnt_cancel_callback should accept NLM_LCK_DENIED errors
    
    NLM_LCK_DENIED is a valid error return for an NLM_CANCEL call by the
    client.
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit d09828c3e21dae5d176cf12302c503171bedca07
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:29 2006 -0500

    lockd: Fix Oopses due to list manipulation errors.
    
    The patch "stop abusing file_lock_list introduces a couple of bugs since
    the locks may be copied and need to be removed from the lists when they are
    destroyed.
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 54a8d59712241450ac187bff12cf168814deaa0c
Author: Christoph Hellwig <hch@lst.de>
Date:   Sun Feb 26 00:54:29 2006 -0500

    lockd: stop abusing file_lock_list
    
    Currently lockd directly access the file_lock_list from fs/locks.c.
    It does so to mark locks granted or reclaimable.  This is very
    suboptimal, because a) lockd needs to poke into locks.c internals, and
    b) it needs to iterate over all locks in the system for marking locks
    granted or reclaimable.
    
    This patch adds lists for granted and reclaimable locks to the nlm_host
    structure instead, and adds locks to those.
    
    nlmclnt_lock:
    	now adds the lock to h_granted instead of setting the
    	NFS_LCK_GRANTED, still O(1)
    
    nlmclnt_mark_reclaim:
    	goes away completely, replaced by a list_splice_init.
    	Complexity reduced from O(locks in the system) to O(1)
    
    reclaimer:
    	iterates over h_reclaim now, complexity reduced from
    	O(locks in the system) to O(locks per nlm_host)
    
    Signed-off-by: Christoph Hellwig <hch@lst.de>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit ddfee955be4cf359d16198e50f0f4d4f9b53d478
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:28 2006 -0500

    lockd: Make lockd use rpc_new_client() instead of rpc_create_client
    
    When doing NLM_GRANTED requests, lockd may end up blocking if we use
    rpc_create_client() due to the synchronous call to rpc_ping(). Instead, use
    rpc_new_client().
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 128529ce28f4bc457a3f6075c5456f2007083bb4
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:28 2006 -0500

    lockd: Make nlmsvc_create_block() use nlmsvc_lookup_host()
    
    Currently it uses nlmclnt_lookup_host(), which puts the resulting host
    structure on a different list.
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit e4bed7d8aa4e60c41bd80f5d06e18e934d5e7ff7
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:27 2006 -0500

    lockd: Clean up of the server-side GRANTED code
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 1fda510ffaadb460a9b3146abfac8ae6428ee9d8
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:27 2006 -0500

    lockd: Add refcounting to struct nlm_block
    
    Otherwise, the block may disappear from underneath us when in
    nlmsvc_retry_blocked.
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit ff9562f93f8d2d16902c466c7eb8df730bacafc9
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:26 2006 -0500

    lockd: Fix server-side lock blocking code
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 3385487dd88ed727d27bd810e1cc44a1ababae4b
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:26 2006 -0500

    lockd: posix_test_lock() should not call locks_copy_lock()
    
    The caller of posix_test_lock() should never need to look at the lock
    private data, so do not copy that information. This also means that there
    is no need to call the fl_release_private methods.
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 52139b08ed8ae08b1ac2e39cee0228173146eda9
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:25 2006 -0500

    NFS: Uninline nfs_writedata_(alloc|free) and nfs_readdata_(alloc|free)
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit d7ca06d8f02300276e292fdb90eee10c4ff5419a
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:25 2006 -0500

    NFS: Debugging code for nfs_direct_(read|write)_schedule()
    
    Make sure that we're doing our list accounting correctly.
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 5e1f2987ee5cf64308d97fe739d04ee9c0c131d0
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:25 2006 -0500

    NFS: O_DIRECT async IO may lose context
    
    The struct nfs_direct_req currently keeps a pointer to the file descriptor
    without referencing it. This may cause problems if the parent process is
    killed.
    
    The nfs_open_context should normally have all the information that we're
    currently using the filp for, and unlike fput(), is safe to release from
    an rpciod process context.
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit a817f871c072d4a76fb6c29aecfb64801e4e9626
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:24 2006 -0500

    nfs: Use UNSTABLE + COMMIT for NFS O_DIRECT writes
    
    Currently NFS O_DIRECT writes use FILE_SYNC so that a COMMIT is not
    necessary.  This simplifies the internal logic, but this could be a
    difficult workload for some servers.
    
    Instead, let's send UNSTABLE writes, and after they all complete, send a
    COMMIT for the dirty range.  After the COMMIT returns successfully, then do
    the wake_up or fire off aio_complete().
    
    Test plan:
    Async direct I/O tests against Solaris (or any server that requires
    committed unstable writes).  Reboot server during test.
    
    Based on an earlier patch by Chuck Lever <cel@netapp.com>
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 64dee7a67c3d5a6489d62edfd45686755ab8d296
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:24 2006 -0500

    NFS: Make nfs_commit_alloc() extern
    
    We need to use nfs_commit_alloc() in fs/nfs/direct.c.
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 1587a1bf94b518879a8492661f7b3920a6654239
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:23 2006 -0500

    NFS: fix data_update accounting in NFS direct I/O path
    
    ^C against "iozone -I" is hitting the assertion in nfs_clear_inode().
    
    Test plan:
    "iozone -i0 -I -a -c" against a slow server, then control C.  This should
    not cause an oops.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit cf972b80fb93540d64b1b1a803b3626827d8b786
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:23 2006 -0500

    NFS: Replace atomic_t variables in nfs_direct_req with a single spin lock
    
    Three atomic_t variables cause a lot of bus locking.  Because they are all
    used in the same places in the code, just use a single spin lock.
    
    Now that the atomic_t variables are gone, we can remove the request size
    limitation since the code no longer depends on the limited width of atomic_t
    on some platforms.
    
    Test plan:
    Compile with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.  Millions of fsx
    operations, iozone, OraSim.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit fbc8585232bfe0c50aceb8eefdffd5a8f9abd65f
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:22 2006 -0500

    NFS: clean up comments and tab damage in direct.c
    
    Clean up tab damage and comments.  Replace "file_offset" with more commonly
    used "pos".
    
    Test plan:
    Compile with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 4003c9d4f998c906c5d1759c3a78947e7f459d75
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:22 2006 -0500

    NFS: support EIOCBQUEUED return in direct write path
    
    For async iocb's, the NFS direct write path now returns EIOCBQUEUED,
    and calls aio_complete when all the requested writes are finished.  The
    synchronous part of the NFS direct write path behaves exactly as it
    was before.
    
    Shared mapped NFS files will have some coherency difficulties when
    accessed concurrently with aio+dio.  Will need to explore how this
    is handled in the local file system case.
    
    Test plan:
    aio-stress with "-O". OraSim.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 8cc691f4af1df6337cdf91b80e9d4302f835bb58
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:21 2006 -0500

    NFS: make iocb available everywhere in direct write path
    
    Pass the iocb argument all the way down to the direct write request
    scheduler, and make it available in nfs_direct_write_result.
    
    Test plan:
    Compile the kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.
    Millions of fsx-odirect ops.  OraSim.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 0a4f7f68747b7cf48ffae7cbc7975af5204fd7f7
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:21 2006 -0500

    NFS: remove support for multi-segment iovs in the direct write path
    
    Eliminate the persistent use of automatic storage in all parts of the
    NFS client's direct write path to pave the way for introducing support
    for aio against files opened with the O_DIRECT flag.
    
    Test plan:
    Compile the kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.
    Millions of fsx-odirect ops.  OraSim.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 44fedbe57789e85f7ea499d4133b4d04c353dceb
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:21 2006 -0500

    NFS: make direct write path generate write requests concurrently
    
    Duplicate infrastructure from direct read path that will allow write
    path to generate multiple write requests concurrently.  This will
    enable us to add support for aio in this path.
    
    Temporarily we will lose the ability to do UNSTABLE writes followed by
    a COMMIT in the direct write path.  However, all applications I am
    aware of that use NFS O_DIRECT currently write in relatively small
    chunks, so this should not be inconvenient in any way.
    
    Test plan:
    Millions of fsx-odirect ops. OraSim.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 3143feb2e75414f20a3f6ab2dc89db8af4f8dbdc
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:20 2006 -0500

    NFS: create common routine for handling direct I/O completion
    
    Factor out the common piece of completing an NFS direct I/O request.
    
    Test plan:
    Compile kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit ffa599e5aea3b844fb96621ae8f704c4fb2fc757
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:20 2006 -0500

    NFS: create common routine for allocating nfs_direct_req
    
    Factor out a small common piece of the path that allocate nfs_direct_req
    structures.
    
    Test plan:
    Compile kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit f3d2d40a3287b84c857c1ae9a7c1c785c0222e1a
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:19 2006 -0500

    NFS: create common routine for waiting for direct I/O to complete
    
    We're about to add asynchrony to the NFS direct write path.  Begin by
    abstracting out the common pieces in the read path.
    
    The first piece is nfs_direct_read_wait, which works the same whether the
    process is waiting for a read or a write.
    
    Test plan:
    Compile kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 4ccda478a87de4e1ccbfebe67a7cec09fe62e49c
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:19 2006 -0500

    NFS: support EIOCBQUEUED return in direct read path
    
    For async iocb's, the NFS direct read path should return EIOCBQUEUED and
    call aio_complete when all the requested reads are finished.  The
    synchronous part of the NFS direct read path behaves exactly as it was
    before.
    
    Test plan:
    aio-stress with "-O".  OraSim.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 354a4fa45bfb9e48325732f66a7b389ca812cc4e
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:18 2006 -0500

    NFS: make iocb available everywhere in direct read path
    
    Pass the iocb argument all the way down to the direct read request
    scheduler, and make it available in nfs_direct_read_result.
    
    Test plan:
    Compile the kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.
    Millions of fsx-odirect ops.  OraSim.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit e6cf0d0d7bb9bb44e4b3ae61bc72ce399e0e8e95
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:18 2006 -0500

    NFS: remove support for multi-segment iovs in the direct read path
    
    Eliminate the persistent use of automatic storage in all parts of the NFS
    client's direct read path to pave the way for introducing support for aio
    against files opened with the O_DIRECT flag.
    
    Test plan:
    Compile the kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.
    Millions of fsx-odirect ops.  OraSim.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit cc6b176dd56a4dd1f5c07c5348d90b4d2da651bf
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:18 2006 -0500

    NFS: use size_t type for holding rsize bytes in NFS O_DIRECT read path
    
    size_t is used for holding byte counts, so use it for variables storing rsize.
    Note that the write path will be updated as we add support for async
    O_DIRECT writes.
    
    Test plan:
    Need to verify that existing comparisons against new size_t variables behave
    correctly.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 00e3b72d1832f7be04bdc1fca01de57afa115fae
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:17 2006 -0500

    NFS: update comments and function definitions in fs/nfs/direct.c
    
    Update to latest coding style standards.  Remove block comments on
    statically defined functions, and place function definitions all on
    one line.
    
    Test plan:
    Compile kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit ff3993ca427de248e947f32296ced7a907b5ccca
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:17 2006 -0500

    NFS: clean up NFS client's a_ops->direct_IO method
    
    The NFS client's a_ops->direct_IO method, nfs_direct_IO, is required to
    be present to allow NFS files to be opened with O_DIRECT, but is never
    called because the NFS client shunts reads and writes to files opened
    with O_DIRECT directly to its own routines.
    
    Gut the nfs_direct_IO function.  This eliminates the only part of the
    NFS client's direct I/O path that requires support for multi-segment
    iovs, allowing further simplification in subsequent patches.
    
    Test plan:
    Compile the kernel with CONFIG_NFS and CONFIG_NFS_DIRECTIO enabled.  Millions
    of fsx-odirect ops.  OraSim.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 7bdcb20eedb87eaa04f330ce19c88a47eb69dcc9
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:16 2006 -0500

    NFS: Cleanup of NFS read code
    
    Same callback hierarchy inversion as for the NFS write calls. This patch is
    not strictly speaking needed by the O_DIRECT code, but avoids confusing
    differences between the asynchronous read and write code.
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit b652315b8764a126626d9dcde7417ac1571c98a3
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:16 2006 -0500

    NFS: Cleanup of NFS write code in preparation for asynchronous o_direct
    
    This patch inverts the callback hierarchy for NFS write calls.
    
    Instead of having the NFSv2/v3/v4-specific code set up the RPC callback
    ops, we allow the original caller to do so. This allows for more
    flexibility w.r.t. how to set up and tear down the nfs_write_data
    structure while still allowing the NFSv3/v4 code to perform error
    handling.
    
    The greater flexibility is needed by the asynchronous O_DIRECT code, which
    wants to be able to hold on to the original nfs_write_data structures after
    the WRITE RPC call has completed in order to be able to replay them if the
    COMMIT call determines that the server has rebooted.
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 3a7585f5f4a36c583472bdfcd06445b8af430466
Author: J. Bruce Fields <bfields@fieldses.org>
Date:   Sun Feb 26 00:54:15 2006 -0500

    lockd: Remove FL_LOCKD flag
    
    Currently lockd identifies its own locks using the FL_LOCKD flag.  This
    doesn't scale well to multiple lock managers--if we did this in nfsv4 too,
    for example, we'd be left with only one free flag bit.
    
    Instead, we just check whether the file manager ops (fl_lmops) set on this
    lock are our own.
    
    The only use for this is in nlm_traverse_locks, which uses it to find locks
    that need cleaning up when freeing a host or a file.
    
    In the long run it might be nice to do reference counting instead of
    traversing all the locks like this....
    
    Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 65203679dedcf8d9b5903c0d992eda589fb3c39e
Author: Andy Adamson <andros@citi.umich.edu>
Date:   Sun Feb 26 00:54:15 2006 -0500

    locks,lockd: fix race in nlmsvc_testlock
    
    posix_test_lock() returns a pointer to a struct file_lock which is unprotected
    and can be removed while in use by the caller.  Move the conflicting lock from
    the return to a parameter, and copy the conflicting lock.
    
    In most cases the caller ends up putting the copy of the conflicting lock on
    the stack.  On i386, sizeof(struct file_lock) appears to be about 100 bytes.
    We're assuming that's reasonable.
    
    Signed-off-by: Andy Adamson <andros@citi.umich.edu>
    Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 3b45055d5da1c42a92fc8fd6093b835a9bd3ecbb
Author: Andy Adamson <andros@citi.umich.edu>
Date:   Sun Feb 26 00:54:14 2006 -0500

    locks: remove unused posix_block_lock
    
    posix_lock_file() is used to add a blocked lock to Lockd's block, so
    posix_block_lock() is no longer needed.
    
    Signed-off-by: Andy Adamson <andros@citi.umich.edu>
    Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit b5c3eca92b96b7c0ad9634862bde7be3f497beb9
Author: Andy Adamson <andros@citi.umich.edu>
Date:   Sun Feb 26 00:54:14 2006 -0500

    lockd: make nlmsvc_lock use only posix_lock_file
    
    Reorganize nlmsvc_lock() to make full use of posix_lock_file(), which does
    eveything nlmsvc_lock() needs - no need to call posix_test_lock(),
    posix_locks_deadlock(), or posix_block_lock() separately.
    
    Signed-off-by: Andy Adamson <andros@citi.umich.edu>
    Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 5dcf97752803055e8af8bfce2d5a666481eb45b4
Author: Andy Adamson <andros@citi.umich.edu>
Date:   Sun Feb 26 00:54:14 2006 -0500

    lockd: simplify nlmsvc_grant_blocked
    
    Reorganize nlmsvc_grant_blocked() to make full use of posix_lock_file().  Note
    that there's no need for separate calls to posix_test_lock(),
    posix_locks_deadlock(), or posix_block_lock().
    
    Signed-off-by: Andy Adamson <andros@citi.umich.edu>
    Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 6b3562af209bf7ab61b14324f3bbe7e908b2e608
Author: Andy Adamson <andros@citi.umich.edu>
Date:   Sun Feb 26 00:54:13 2006 -0500

    lockd: clean up nlmsvc_lock
    
    Slightly more consistent dprintk error reporting, consolidate some up()'s.
    
    Signed-off-by: Andy Adamson <andros@citi.umich.edu>
    Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 671ec0a131bc4632fb78c4a9928ef790a7d49fe8
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:13 2006 -0500

    NFS: directory trace messages
    
    Reuse NFSDBG_DIRCACHE and NFSDBG_LOOKUPCACHE to provide additional
    diagnostic messages that trace the operation of the NFS client's
    directory behavior.  A few new messages are now generated when NFSDBG_VFS
    is active, as well, to trace normal VFS activity.  This compromise
    provides better trace debugging for those who use pre-built kernels,
    without adding a lot of extra noise to the standard debug settings.
    
    Test-plan:
    Enable NFS trace debugging with flags 1, 2, or 4.  You should be able to
    see different types of trace messages with each flag setting.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 842db74b61dd1cce3a094a7d5907d559e62fdb3a
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:12 2006 -0500

    SUNRPC: minor cleanup
    
    RPC_DEBUG_DATA no longer needed in net/sunrpc/xprt.c.
    
    Test plan:
    Compile kernel with CONFIG_NFS enabled.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit a7c8d8bb19476bcc8be27c56c4fdf86878ff460d
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:12 2006 -0500

    SUNRPC: eliminate rpc_call()
    
    Clean-up: replace rpc_call() helper with direct call to rpc_call_sync.
    
    This makes NFSv2 and NFSv3 synchronous calls more computationally
    efficient, and reduces stack consumption in functions that used to
    invoke rpc_call more than once.
    
    Test plan:
    Compile kernel with CONFIG_NFS enabled.  Connectathon on NFS version 2,
    version 3, and version 4 mount points.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit aace71104677d3267c51993520513f959da885a4
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:11 2006 -0500

    SUNRPC: display human-readable procedure name in rpc_iostats output
    
    Add fields to the rpc_procinfo struct that allow the display of a
    human-readable name for each procedure in the rpc_iostats output.
    
    Also fix it so that the NFSv4 stats are broken up correctly by
    sub-procedure number.  NFSv4 uses only two real RPC procedures:
    NULL, and COMPOUND.
    
    Test plan:
    Mount with NFSv2, NFSv3, and NFSv4, and do "cat /proc/self/mountstats".
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 46881796a7541b29b08b944ea64588f6de6b12ff
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:11 2006 -0500

    NFS: add RPC I/O statistics to /proc/self/mountstats
    
    NFS client now shows various RPC I/O metrics in /proc/self/mountstats.
    
    Test plan:
    Mount/umount while doing "cat /proc/self/mountstats", multiple iterations
    of connectathon locking suite.  Test with NFS version 2, 3, and 4.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 1938ebdef5edea1454f6e4739fdf1857c0ddcfbf
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:10 2006 -0500

    SUNRPC: provide a mechanism for collecting stats in the RPC client
    
    Add a simple mechanism for collecting stats in the RPC client.  Stats are
    tabulated during xprt_release.  Note that per_cpu shenanigans are not
    required here because the RPC client already serializes on the transport
    write lock.
    
    Test plan:
    Compile kernel with CONFIG_NFS enabled.  Basic performance regression
    testing with high-speed networking and high performance server.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 7dd9281b61a7e1d1611b4d55e5829610ebd7780c
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:10 2006 -0500

    SUNRPC: introduce per-task RPC iostats
    
    Account for various things that occur while an RPC task is executed.
    Separate timers for RPC round trip and RPC execution time show how
    long RPC requests wait in queue before being sent.  Eventually these
    will be accumulated at xprt_release time in one place where they can
    be viewed from userland.
    
    Test plan:
    Compile kernel with CONFIG_NFS enabled.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 7cae0b57aa996eebe7d53282fdac3675d1e63f60
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:10 2006 -0500

    SUNRPC: add a handful of per-xprt counters
    
    Monitor generic transport events.  Add a transport switch callout to
    format transport counters for export to user-land.
    
    Test plan:
    Compile kernel with CONFIG_NFS enabled.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 4a309e1ec7a39b29a4bbd44ee6432f07ba153542
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:09 2006 -0500

    SUNRPC: track length of RPC wait queues
    
    RPC wait queue length will eventually be exported to userland via the RPC
    iostats interface.
    
    Test plan:
    Compile kernel with CONFIG_NFS enabled.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 1e2b67c3505df90a3b3f4f160e5c4bd5d9b06776
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:09 2006 -0500

    NFS: report how long an NFS file system has been mounted
    
    Add a field in nfs_server to record a timestamp when a mount succeeds.
    Report the number of seconds the file system has been mounted via
    nfs_show_stats().
    
    Test plan:
    Mount an NFS file system, watch the mountstats reports and compare with
    clock time.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 3420039b93ceb1811a1acf4205c373b77a9de09b
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:08 2006 -0500

    NFS: add hooks to account for NFSERR_JUKEBOX errors
    
    Make an inode or an nfs_server struct available in the logic that handles
    JUKEBOX/DELAY type errors so the NFS client can account for them.
    
    This patch is split out from the main nfs iostat patch to highlight minor
    architectural changes required to support this statistic.
    
    Test plan:
    None.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit f9f38a15337bc8614dc607c55a5cb3737253fcbe
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:08 2006 -0500

    NFS: add I/O performance counters
    
    Invoke the byte and event counter macros where we want to count bytes and
    events.
    
    Clean-up: fix a possible NULL dereference in nfs_lock, and simplify
    nfs_file_open.
    
    Test-plan:
    fsx and iozone on UP and SMP systems, with and without pre-emption.  Watch
    for memory overwrite bugs, and performance loss (significantly more CPU
    required per op).
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 22505bd829bf44ba005a53b0053f3eace4dc262b
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:07 2006 -0500

    NFS: introduce mechanism for tracking NFS client metrics
    
    Add a per-superblock performance counter facility to the NFS client.  This
    facility mimics the counters available for block devices and for
    networking.  Expose these new counters via the new /proc/self/mountstats
    interface.
    
    Thanks to Andrew Morton and Trond Myklebust for their review and comments.
    
    Test plan:
    fsx and iozone on UP and SMP systems, with and without pre-emption.  Watch
    for memory overwrite bugs, and performance loss (significantly more CPU
    required per op).
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 182902e78bf9ef8e7314dce5ce2197c91f1c6925
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:07 2006 -0500

    NFS: clean up some mount options
    
    Get rid of "lock" and "posix", and spell out "vers=".
    
    Test plan:
    None.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit ad3c1b47b41f19caff1bd555f7cf05f9c09cbace
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:06 2006 -0500

    NFS: show retransmit settings when displaying mount options
    
    Sometimes it's important to know the exact RPC retransmit settings the
    kernel is using for an NFS mount point.  Add this facility to the NFS
    client's show_options method.
    
    Test plan:
    Set various retransmit settings via the mount command, and check that the
    settings are reflected in /proc/mounts.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 8596a3f93ea9e2dcae1121b22b14da9cdc6bd2a9
Author: Chuck Lever <cel@netapp.com>
Date:   Sun Feb 26 00:54:06 2006 -0500

    VFS: New /proc file /proc/self/mountstats
    
    Create a new file under /proc/self, called mountstats, where mounted file
    systems can export information (configuration options, performance counters,
    and so on).  Use a mechanism similar to /proc/mounts and s_ops->show_options.
    
    This mechanism does not violate namespace security, and is safe to use while
    other processes are unmounting file systems.
    
    Thanks to Mike Waychison for his review and comments.
    
    Test-plan:
    Test concurrent mount/unmount operations while cat'ing /proc/self/mountstats.
    
    Signed-off-by: Chuck Lever <cel@netapp.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit d57002502881ae995cb97b3867d12503bd48632b
Author: Levent Serinol <lserinol@gmail.com>
Date:   Sun Feb 26 00:54:05 2006 -0500

    SUNRPC: more verbose output for rpc auth weak error
    
    This patch adds server ip address to be printed out when "server
    requires stronger authentication" error occured.
    
    Signed-off-by: Levent Serinol <lserinol@gmail.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit ca03147251bc52765a6f62a7734c28f7c7c0e9e5
Author: Goldwyn Rodrigues <rgoldwyn@gmail.com>
Date:   Sun Feb 26 00:54:05 2006 -0500

    NFS: Code comments update in NFS
    
    read_cache_mtime is no longer used in nfs_inode. This patch removes
    references of read_cache_mtime in the code comments.
    
    Signed-off-by: Goldwyn Rodrigues <rgoldwyn@gmail.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit b09ea2e98e0a9979162fd06c0117de5a527d7df6
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sun Feb 26 00:54:05 2006 -0500

    NFS: sem2mutex idmap.c
    
    semaphore to mutex conversion.
    
    the conversion was generated via scripts, and the result was validated
    automatically via a script as well.
    
    build and boot tested.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 1ccfa35b8fb2016d4dd79b9ea6c32cc80fdfab47
Author: Eric Sesterhenn <snakebyte@gmx.de>
Date:   Sun Feb 26 00:54:04 2006 -0500

    NFS: kzalloc conversion in fs/nfs
    
    this converts fs/nfs to kzalloc() usage.
    compile tested with make allyesconfig
    
    Signed-off-by: Eric Sesterhenn <snakebyte@gmx.de>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 0e3b69dd16d1c3a89e709bcc2e3118c87b2c3b8f
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:04 2006 -0500

    NFSv4: Kill braindead gcc warnings
    
    nfs4_open_revalidate: 'res' may be used uninitialized
    nfs4_callback_compound: ‘hdr_res.nops’ may be used uninitialized
    			'op_nr’ may be used uninitialized
    encode_getattr_res: ‘savep’ may be used uninitialized
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 756d43190cf19cb41c129fba5b600b9418b640d8
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:03 2006 -0500

    NFSv4: Do not call rpciod_down() before call to destroy_nfsv4_state()
    
    The reason is that the idmapper cleanup may call flush_workqueue() on
    rpciod_workqueue.
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 2f8b1bd97a1cc37e2b19441b19dc63e9d3a82ca3
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:03 2006 -0500

    SUNRPC: Ensure that rpc_mkpipe returns a refcounted dentry
    
    If not, we cannot guarantee that idmap->idmap_dentry, gss_auth->dentry and
    clnt->cl_dentry are valid dentries.
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 68c38b5b83a9a50d58d5fbbd3fdb9bbbda55fe55
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:02 2006 -0500

    SUNRPC: Run rpci->queue_timeout on the rpciod workqueue instead of generic
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit c26b8b32e0c0a128095a95ded4e952c7393c1cc8
Author: Olaf Kirch <okir@suse.de>
Date:   Sun Feb 26 00:54:02 2006 -0500

    SUNRPC: Auto-load RPC authentication kernel modules
    
    This patch adds a request_module call to rpcauth_create which will try
    to auto-load the kernel module for the requested authentication flavor.
    For kernels with modular sunrpc, this reduces the admin overhead for
    the user.
    
    Signed-off-by: Olaf Kirch <okir@suse.de>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 9fbc256bee4b711f164d197fcef6dfb661b51525
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:02 2006 -0500

    NFS: reduce the number of false cache invalidations.
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 698dea5e1844a086242f775068492bcdf9986e1a
Author: Jesper Juhl <jesper.juhl@gmail.com>
Date:   Sun Feb 26 00:54:01 2006 -0500

    NFS: "const static" vs "static const" in nfs4
    
    My previous "const static" vs "static const" cleanup missed a single case,
    patch below takes care of it.
    
    Signed-off-by: Jesper Juhl <jesper.juhl@gmail.com>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 546224f3c01abbb695d55ba4acfa622871937924
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:01 2006 -0500

    NFSv4: Don't invalidate cached attributes if change attribute is unchanged
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit f32275936040419817b28f4b60a0c3f1e2f2466e
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:00 2006 -0500

    NFS: writes should not clobber utimes() calls
    
    Ensure that we flush out writes in the case when someone calls utimes() in
    order to set the file times.
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 40a69a3c115e8935b9f185d931b5a26675fdac62
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:00 2006 -0500

    lockd: Don't expose the process pid to the NLM server
    
    Instead we use the nlm_lockowner->pid.
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit d35e35b4749e45bf0e37a2618047e6f736b65347
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:54:00 2006 -0500

    NLM: nlm_alloc_call should not immediately fail on signal
    
    Currently, nlm_alloc_call tests for a signal before it even tries to
    allocate memory.
    Fix it so that it tries at least once.
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 2c09dbcbdb0ba80f2561e4f351ef385e8c44bd0d
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:53:59 2006 -0500

    VFS: Fix __posix_lock_file() copy of private lock area
    
    The struct file_lock->fl_u area must be copied using the fl_copy_lock()
    operation.
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit d1b9ed669737a5a0e1f6c3f1058e91432a999ffa
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:53:59 2006 -0500

    NLM: Ensure we do not Oops in the case of an unlock
    
    In theory, NLM specs assure us that the server will only reply LCK_GRANTED
    or LCK_DENIED_GRACE_PERIOD to our NLM_UNLOCK request.
    
    In practice, we should not assume this to be the case, and the code will
    currently Oops if we do.
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 301833719b740cd5225ea74db683a8d3940cbd25
Author: Neil Brown <neilb@suse.de>
Date:   Sun Feb 26 00:53:58 2006 -0500

    NFS: Fix buglet in fs/nfs/write.c
    
    I've been reading through fs/nfs/write.c trying to track down a bug
    that seems to be related to pages loosing a refcount and getting
    freed too early (you interested in detail??) and I spotted a little
    bug which the following patch should fix.
    
    Signed-off-by: Neil Brown <neilb@suse.de>
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 955df0b8648f89ee91ddfa31654b523fbdce5bde
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:53:56 2006 -0500

    NFS: Avoid races between writebacks and truncation
    
    Currently, there is no serialisation between NFS asynchronous writebacks
    and truncation at the page level due to the fact that nfs_sync_inode()
    cannot lock the pages that it is about to write out.
    
    This means that it is possible to be flushing out data (and calling something
    like set_page_writeback()) while the page cache is busy evicting the page.
    Oops...
    
    Use the hooks provided in try_to_release_page() to ensure that dirty pages
    are always written back to storage before we evict them.
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>

commit 977fe9d26b414e8410998bc457b57e14fdeb4d3a
Author: Trond Myklebust <Trond.Myklebust@netapp.com>
Date:   Sun Feb 26 00:53:56 2006 -0500

    NFS: Fix a potential panic in O_DIRECT
    
    Based on an original patch by Mike O'Connor and Greg Banks of SGI.
    
    Mike states:
    
    A normal user can panic an NFS client and cause a local DoS with
    'judicious'(?) use of O_DIRECT. Any O_DIRECT write to an NFS file where the
    user buffer starts with a valid mapped page and contains an unmapped page,
    will crash in this way.  I haven't followed the code, but O_DIRECT reads
    with similar user buffers will probably also crash albeit in different
    ways.
    
    Details:  when nfs_get_user_pages() calls get_user_pages(), it detects
    and correctly handles get_user_pages() returning an error, which happens
    if the first page covered by the user buffer's address range is unmapped.
    However, if the first page is mapped but some subsequent page isn't,
    get_user_pages() will return a positive number which is less than the
    number of pages requested (this behaviour is sort of analagous to a
    short write() call and appears to be intentional).  nfs_get_user_pages()
    doesn't detect this and hands off the array of pages (whose last few
    elements are random rubbish from the newly allocated array memory) to
    it's caller, whence they go to nfs_direct_write_seg(), which then totally
    ignores the nr_pages it's given, and calculates its own idea of how many
    pages are in the array from the user buffer length.  Needless to say,
    when it comes to transmit those uninitialised page* pointers, we see
    a crash in the network stack.
    
    Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>


---
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index da6354b..bce7444 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -44,32 +44,25 @@ static LIST_HEAD(nlm_blocked);
 /*
  * Queue up a lock for blocking so that the GRANTED request can see it
  */
-int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl)
+struct nlm_wait *nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl)
 {
 	struct nlm_wait *block;
 
-	BUG_ON(req->a_block != NULL);
 	block = kmalloc(sizeof(*block), GFP_KERNEL);
-	if (block == NULL)
-		return -ENOMEM;
-	block->b_host = host;
-	block->b_lock = fl;
-	init_waitqueue_head(&block->b_wait);
-	block->b_status = NLM_LCK_BLOCKED;
-
-	list_add(&block->b_list, &nlm_blocked);
-	req->a_block = block;
-
-	return 0;
+	if (block != NULL) {
+		block->b_host = host;
+		block->b_lock = fl;
+		init_waitqueue_head(&block->b_wait);
+		block->b_status = NLM_LCK_BLOCKED;
+		list_add(&block->b_list, &nlm_blocked);
+	}
+	return block;
 }
 
-void nlmclnt_finish_block(struct nlm_rqst *req)
+void nlmclnt_finish_block(struct nlm_wait *block)
 {
-	struct nlm_wait *block = req->a_block;
-
 	if (block == NULL)
 		return;
-	req->a_block = NULL;
 	list_del(&block->b_list);
 	kfree(block);
 }
@@ -77,15 +70,14 @@ void nlmclnt_finish_block(struct nlm_rqs
 /*
  * Block on a lock
  */
-long nlmclnt_block(struct nlm_rqst *req, long timeout)
+int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
 {
-	struct nlm_wait	*block = req->a_block;
 	long ret;
 
 	/* A borken server might ask us to block even if we didn't
 	 * request it. Just say no!
 	 */
-	if (!req->a_args.block)
+	if (block == NULL)
 		return -EAGAIN;
 
 	/* Go to sleep waiting for GRANT callback. Some servers seem
@@ -99,13 +91,10 @@ long nlmclnt_block(struct nlm_rqst *req,
 	ret = wait_event_interruptible_timeout(block->b_wait,
 			block->b_status != NLM_LCK_BLOCKED,
 			timeout);
-
-	if (block->b_status != NLM_LCK_BLOCKED) {
-		req->a_res.status = block->b_status;
-		block->b_status = NLM_LCK_BLOCKED;
-	}
-
-	return ret;
+	if (ret < 0)
+		return -ERESTARTSYS;
+	req->a_res.status = block->b_status;
+	return 0;
 }
 
 /*
@@ -125,7 +114,15 @@ u32 nlmclnt_grant(const struct sockaddr_
 	list_for_each_entry(block, &nlm_blocked, b_list) {
 		struct file_lock *fl_blocked = block->b_lock;
 
-		if (!nlm_compare_locks(fl_blocked, fl))
+		if (fl_blocked->fl_start != fl->fl_start)
+			continue;
+		if (fl_blocked->fl_end != fl->fl_end)
+			continue;
+		/*
+		 * Careful! The NLM server will return the 32-bit "pid" that
+		 * we put on the wire: in this case the lockowner "pid".
+		 */
+		if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid)
 			continue;
 		if (!nlm_cmp_addr(&block->b_host->h_addr, addr))
 			continue;
@@ -147,34 +144,6 @@ u32 nlmclnt_grant(const struct sockaddr_
  */
 
 /*
- * Mark the locks for reclaiming.
- * FIXME: In 2.5 we don't want to iterate through any global file_lock_list.
- *        Maintain NLM lock reclaiming lists in the nlm_host instead.
- */
-static
-void nlmclnt_mark_reclaim(struct nlm_host *host)
-{
-	struct file_lock *fl;
-	struct inode *inode;
-	struct list_head *tmp;
-
-	list_for_each(tmp, &file_lock_list) {
-		fl = list_entry(tmp, struct file_lock, fl_link);
-
-		inode = fl->fl_file->f_dentry->d_inode;
-		if (inode->i_sb->s_magic != NFS_SUPER_MAGIC)
-			continue;
-		if (fl->fl_u.nfs_fl.owner == NULL)
-			continue;
-		if (fl->fl_u.nfs_fl.owner->host != host)
-			continue;
-		if (!(fl->fl_u.nfs_fl.flags & NFS_LCK_GRANTED))
-			continue;
-		fl->fl_u.nfs_fl.flags |= NFS_LCK_RECLAIM;
-	}
-}
-
-/*
  * Someone has sent us an SM_NOTIFY. Ensure we bind to the new port number,
  * that we mark locks for reclaiming, and that we bump the pseudo NSM state.
  */
@@ -186,7 +155,12 @@ void nlmclnt_prepare_reclaim(struct nlm_
 	host->h_state++;
 	host->h_nextrebind = 0;
 	nlm_rebind_host(host);
-	nlmclnt_mark_reclaim(host);
+
+	/*
+	 * Mark the locks for reclaiming.
+	 */
+	list_splice_init(&host->h_granted, &host->h_reclaim);
+
 	dprintk("NLM: reclaiming locks for host %s", host->h_name);
 }
 
@@ -215,9 +189,7 @@ reclaimer(void *ptr)
 {
 	struct nlm_host	  *host = (struct nlm_host *) ptr;
 	struct nlm_wait	  *block;
-	struct list_head *tmp;
-	struct file_lock *fl;
-	struct inode *inode;
+	struct file_lock *fl, *next;
 
 	daemonize("%s-reclaim", host->h_name);
 	allow_signal(SIGKILL);
@@ -229,23 +201,13 @@ reclaimer(void *ptr)
 
 	/* First, reclaim all locks that have been marked. */
 restart:
-	list_for_each(tmp, &file_lock_list) {
-		fl = list_entry(tmp, struct file_lock, fl_link);
+	list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) {
+		list_del_init(&fl->fl_u.nfs_fl.list);
 
-		inode = fl->fl_file->f_dentry->d_inode;
-		if (inode->i_sb->s_magic != NFS_SUPER_MAGIC)
-			continue;
-		if (fl->fl_u.nfs_fl.owner == NULL)
-			continue;
-		if (fl->fl_u.nfs_fl.owner->host != host)
-			continue;
-		if (!(fl->fl_u.nfs_fl.flags & NFS_LCK_RECLAIM))
-			continue;
-
-		fl->fl_u.nfs_fl.flags &= ~NFS_LCK_RECLAIM;
-		nlmclnt_reclaim(host, fl);
 		if (signalled())
-			break;
+			continue;
+		if (nlmclnt_reclaim(host, fl) == 0)
+			list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted);
 		goto restart;
 	}
 
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 220058d..f96e381 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -132,59 +132,18 @@ static void nlmclnt_setlockargs(struct n
 	memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh));
 	lock->caller  = system_utsname.nodename;
 	lock->oh.data = req->a_owner;
-	lock->oh.len  = sprintf(req->a_owner, "%d@%s",
-				current->pid, system_utsname.nodename);
-	locks_copy_lock(&lock->fl, fl);
+	lock->oh.len  = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
+				(unsigned int)fl->fl_u.nfs_fl.owner->pid,
+				system_utsname.nodename);
+	lock->svid = fl->fl_u.nfs_fl.owner->pid;
+	lock->fl.fl_start = fl->fl_start;
+	lock->fl.fl_end = fl->fl_end;
+	lock->fl.fl_type = fl->fl_type;
 }
 
 static void nlmclnt_release_lockargs(struct nlm_rqst *req)
 {
-	struct file_lock *fl = &req->a_args.lock.fl;
-
-	if (fl->fl_ops && fl->fl_ops->fl_release_private)
-		fl->fl_ops->fl_release_private(fl);
-}
-
-/*
- * Initialize arguments for GRANTED call. The nlm_rqst structure
- * has been cleared already.
- */
-int
-nlmclnt_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
-{
-	locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
-	memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
-	call->a_args.lock.caller = system_utsname.nodename;
-	call->a_args.lock.oh.len = lock->oh.len;
-
-	/* set default data area */
-	call->a_args.lock.oh.data = call->a_owner;
-
-	if (lock->oh.len > NLMCLNT_OHSIZE) {
-		void *data = kmalloc(lock->oh.len, GFP_KERNEL);
-		if (!data) {
-			nlmclnt_freegrantargs(call);
-			return 0;
-		}
-		call->a_args.lock.oh.data = (u8 *) data;
-	}
-
-	memcpy(call->a_args.lock.oh.data, lock->oh.data, lock->oh.len);
-	return 1;
-}
-
-void
-nlmclnt_freegrantargs(struct nlm_rqst *call)
-{
-	struct file_lock *fl = &call->a_args.lock.fl;
-	/*
-	 * Check whether we allocated memory for the owner.
-	 */
-	if (call->a_args.lock.oh.data != (u8 *) call->a_owner) {
-		kfree(call->a_args.lock.oh.data);
-	}
-	if (fl->fl_ops && fl->fl_ops->fl_release_private)
-		fl->fl_ops->fl_release_private(fl);
+	BUG_ON(req->a_args.lock.fl.fl_ops != NULL);
 }
 
 /*
@@ -193,9 +152,8 @@ nlmclnt_freegrantargs(struct nlm_rqst *c
 int
 nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 {
-	struct nfs_server	*nfssrv = NFS_SERVER(inode);
 	struct nlm_host		*host;
-	struct nlm_rqst		reqst, *call = &reqst;
+	struct nlm_rqst		*call;
 	sigset_t		oldset;
 	unsigned long		flags;
 	int			status, proto, vers;
@@ -209,23 +167,17 @@ nlmclnt_proc(struct inode *inode, int cm
 	/* Retrieve transport protocol from NFS client */
 	proto = NFS_CLIENT(inode)->cl_xprt->prot;
 
-	if (!(host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers)))
+	host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers);
+	if (host == NULL)
 		return -ENOLCK;
 
-	/* Create RPC client handle if not there, and copy soft
-	 * and intr flags from NFS client. */
-	if (host->h_rpcclnt == NULL) {
-		struct rpc_clnt	*clnt;
-
-		/* Bind an rpc client to this host handle (does not
-		 * perform a portmapper lookup) */
-		if (!(clnt = nlm_bind_host(host))) {
-			status = -ENOLCK;
-			goto done;
-		}
-		clnt->cl_softrtry = nfssrv->client->cl_softrtry;
-		clnt->cl_intr = nfssrv->client->cl_intr;
-	}
+	call = nlm_alloc_call(host);
+	if (call == NULL)
+		return -ENOMEM;
+
+	nlmclnt_locks_init_private(fl, host);
+	/* Set up the argument struct */
+	nlmclnt_setlockargs(call, fl);
 
 	/* Keep the old signal mask */
 	spin_lock_irqsave(&current->sighand->siglock, flags);
@@ -238,26 +190,10 @@ nlmclnt_proc(struct inode *inode, int cm
 	    && (current->flags & PF_EXITING)) {
 		sigfillset(&current->blocked);	/* Mask all signals */
 		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
-		call = nlmclnt_alloc_call();
-		if (!call) {
-			status = -ENOMEM;
-			goto out_restore;
-		}
 		call->a_flags = RPC_TASK_ASYNC;
-	} else {
-		spin_unlock_irqrestore(&current->sighand->siglock, flags);
-		memset(call, 0, sizeof(*call));
-		locks_init_lock(&call->a_args.lock.fl);
-		locks_init_lock(&call->a_res.lock.fl);
 	}
-	call->a_host = host;
-
-	nlmclnt_locks_init_private(fl, host);
-
-	/* Set up the argument struct */
-	nlmclnt_setlockargs(call, fl);
+	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
 	if (IS_SETLK(cmd) || IS_SETLKW(cmd)) {
 		if (fl->fl_type != F_UNLCK) {
@@ -270,41 +206,58 @@ nlmclnt_proc(struct inode *inode, int cm
 	else
 		status = -EINVAL;
 
- out_restore:
+	fl->fl_ops->fl_release_private(fl);
+	fl->fl_ops = NULL;
+
 	spin_lock_irqsave(&current->sighand->siglock, flags);
 	current->blocked = oldset;
 	recalc_sigpending();
 	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
-done:
 	dprintk("lockd: clnt proc returns %d\n", status);
-	nlm_release_host(host);
 	return status;
 }
 EXPORT_SYMBOL(nlmclnt_proc);
 
 /*
  * Allocate an NLM RPC call struct
+ *
+ * Note: the caller must hold a reference to host. In case of failure,
+ * this reference will be released.
  */
-struct nlm_rqst *
-nlmclnt_alloc_call(void)
+struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
 {
 	struct nlm_rqst	*call;
 
-	while (!signalled()) {
-		call = (struct nlm_rqst *) kmalloc(sizeof(struct nlm_rqst), GFP_KERNEL);
-		if (call) {
-			memset(call, 0, sizeof(*call));
+	for(;;) {
+		call = kzalloc(sizeof(*call), GFP_KERNEL);
+		if (call != NULL) {
 			locks_init_lock(&call->a_args.lock.fl);
 			locks_init_lock(&call->a_res.lock.fl);
+			call->a_host = host;
 			return call;
 		}
-		printk("nlmclnt_alloc_call: failed, waiting for memory\n");
+		if (signalled())
+			break;
+		printk("nlm_alloc_call: failed, waiting for memory\n");
 		schedule_timeout_interruptible(5*HZ);
 	}
+	nlm_release_host(host);
 	return NULL;
 }
 
+void nlm_release_call(struct nlm_rqst *call)
+{
+	nlm_release_host(call->a_host);
+	nlmclnt_release_lockargs(call);
+	kfree(call);
+}
+
+static void nlmclnt_rpc_release(void *data)
+{
+	return nlm_release_call(data);
+}
+
 static int nlm_wait_on_grace(wait_queue_head_t *queue)
 {
 	DEFINE_WAIT(wait);
@@ -401,57 +354,45 @@ in_grace_period:
 /*
  * Generic NLM call, async version.
  */
-int nlmsvc_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
+static int __nlm_async_call(struct nlm_rqst *req, u32 proc, struct rpc_message *msg, const struct rpc_call_ops *tk_ops)
 {
 	struct nlm_host	*host = req->a_host;
 	struct rpc_clnt	*clnt;
-	struct rpc_message msg = {
-		.rpc_argp	= &req->a_args,
-		.rpc_resp	= &req->a_res,
-	};
-	int		status;
+	int status = -ENOLCK;
 
 	dprintk("lockd: call procedure %d on %s (async)\n",
 			(int)proc, host->h_name);
 
 	/* If we have no RPC client yet, create one. */
-	if ((clnt = nlm_bind_host(host)) == NULL)
-		return -ENOLCK;
-	msg.rpc_proc = &clnt->cl_procinfo[proc];
+	clnt = nlm_bind_host(host);
+	if (clnt == NULL)
+		goto out_err;
+	msg->rpc_proc = &clnt->cl_procinfo[proc];
 
         /* bootstrap and kick off the async RPC call */
-        status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, tk_ops, req);
-
+        status = rpc_call_async(clnt, msg, RPC_TASK_ASYNC, tk_ops, req);
+	if (status == 0)
+		return 0;
+out_err:
+	nlm_release_call(req);
 	return status;
 }
 
-static int nlmclnt_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
+int nlm_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
 {
-	struct nlm_host	*host = req->a_host;
-	struct rpc_clnt	*clnt;
-	struct nlm_args	*argp = &req->a_args;
-	struct nlm_res	*resp = &req->a_res;
 	struct rpc_message msg = {
-		.rpc_argp	= argp,
-		.rpc_resp	= resp,
+		.rpc_argp	= &req->a_args,
+		.rpc_resp	= &req->a_res,
 	};
-	int		status;
-
-	dprintk("lockd: call procedure %d on %s (async)\n",
-			(int)proc, host->h_name);
-
-	/* If we have no RPC client yet, create one. */
-	if ((clnt = nlm_bind_host(host)) == NULL)
-		return -ENOLCK;
-	msg.rpc_proc = &clnt->cl_procinfo[proc];
+	return __nlm_async_call(req, proc, &msg, tk_ops);
+}
 
-	/* Increment host refcount */
-	nlm_get_host(host);
-        /* bootstrap and kick off the async RPC call */
-        status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, tk_ops, req);
-	if (status < 0)
-		nlm_release_host(host);
-	return status;
+int nlm_async_reply(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
+{
+	struct rpc_message msg = {
+		.rpc_argp	= &req->a_res,
+	};
+	return __nlm_async_call(req, proc, &msg, tk_ops);
 }
 
 /*
@@ -463,36 +404,41 @@ nlmclnt_test(struct nlm_rqst *req, struc
 	int	status;
 
 	status = nlmclnt_call(req, NLMPROC_TEST);
-	nlmclnt_release_lockargs(req);
 	if (status < 0)
-		return status;
+		goto out;
 
-	status = req->a_res.status;
-	if (status == NLM_LCK_GRANTED) {
-		fl->fl_type = F_UNLCK;
-	} if (status == NLM_LCK_DENIED) {
-		/*
-		 * Report the conflicting lock back to the application.
-		 */
-		locks_copy_lock(fl, &req->a_res.lock.fl);
-		fl->fl_pid = 0;
-	} else {
-		return nlm_stat_to_errno(req->a_res.status);
+	switch (req->a_res.status) {
+		case NLM_LCK_GRANTED:
+			fl->fl_type = F_UNLCK;
+			break;
+		case NLM_LCK_DENIED:
+			/*
+			 * Report the conflicting lock back to the application.
+			 */
+			fl->fl_start = req->a_res.lock.fl.fl_start;
+			fl->fl_end = req->a_res.lock.fl.fl_start;
+			fl->fl_type = req->a_res.lock.fl.fl_type;
+			fl->fl_pid = 0;
+			break;
+		default:
+			status = nlm_stat_to_errno(req->a_res.status);
 	}
-
-	return 0;
+out:
+	nlm_release_call(req);
+	return status;
 }
 
 static void nlmclnt_locks_copy_lock(struct file_lock *new, struct file_lock *fl)
 {
-	memcpy(&new->fl_u.nfs_fl, &fl->fl_u.nfs_fl, sizeof(new->fl_u.nfs_fl));
-	nlm_get_lockowner(new->fl_u.nfs_fl.owner);
+	new->fl_u.nfs_fl.state = fl->fl_u.nfs_fl.state;
+	new->fl_u.nfs_fl.owner = nlm_get_lockowner(fl->fl_u.nfs_fl.owner);
+	list_add_tail(&new->fl_u.nfs_fl.list, &fl->fl_u.nfs_fl.owner->host->h_granted);
 }
 
 static void nlmclnt_locks_release_private(struct file_lock *fl)
 {
+	list_del(&fl->fl_u.nfs_fl.list);
 	nlm_put_lockowner(fl->fl_u.nfs_fl.owner);
-	fl->fl_ops = NULL;
 }
 
 static struct file_lock_operations nlmclnt_lock_ops = {
@@ -504,8 +450,8 @@ static void nlmclnt_locks_init_private(s
 {
 	BUG_ON(fl->fl_ops != NULL);
 	fl->fl_u.nfs_fl.state = 0;
-	fl->fl_u.nfs_fl.flags = 0;
 	fl->fl_u.nfs_fl.owner = nlm_find_lockowner(host, fl->fl_owner);
+	INIT_LIST_HEAD(&fl->fl_u.nfs_fl.list);
 	fl->fl_ops = &nlmclnt_lock_ops;
 }
 
@@ -552,57 +498,52 @@ nlmclnt_lock(struct nlm_rqst *req, struc
 {
 	struct nlm_host	*host = req->a_host;
 	struct nlm_res	*resp = &req->a_res;
-	long timeout;
-	int status;
+	struct nlm_wait *block = NULL;
+	int status = -ENOLCK;
 
 	if (!host->h_monitored && nsm_monitor(host) < 0) {
 		printk(KERN_NOTICE "lockd: failed to monitor %s\n",
 					host->h_name);
-		status = -ENOLCK;
 		goto out;
 	}
 
-	if (req->a_args.block) {
-		status = nlmclnt_prepare_block(req, host, fl);
-		if (status < 0)
-			goto out;
-	}
+	block = nlmclnt_prepare_block(host, fl);
 	for(;;) {
 		status = nlmclnt_call(req, NLMPROC_LOCK);
 		if (status < 0)
 			goto out_unblock;
-		if (resp->status != NLM_LCK_BLOCKED)
+		if (!req->a_args.block)
 			break;
-		/* Wait on an NLM blocking lock */
-		timeout = nlmclnt_block(req, NLMCLNT_POLL_TIMEOUT);
 		/* Did a reclaimer thread notify us of a server reboot? */
 		if (resp->status ==  NLM_LCK_DENIED_GRACE_PERIOD)
 			continue;
 		if (resp->status != NLM_LCK_BLOCKED)
 			break;
-		if (timeout >= 0)
-			continue;
-		/* We were interrupted. Send a CANCEL request to the server
+		/* Wait on an NLM blocking lock */
+		status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT);
+		/* if we were interrupted. Send a CANCEL request to the server
 		 * and exit
 		 */
-		status = (int)timeout;
-		goto out_unblock;
+		if (status < 0)
+			goto out_unblock;
+		if (resp->status != NLM_LCK_BLOCKED)
+			break;
 	}
 
 	if (resp->status == NLM_LCK_GRANTED) {
 		fl->fl_u.nfs_fl.state = host->h_state;
-		fl->fl_u.nfs_fl.flags |= NFS_LCK_GRANTED;
 		fl->fl_flags |= FL_SLEEP;
+		/* Ensure the resulting lock will get added to granted list */
 		do_vfs_lock(fl);
 	}
 	status = nlm_stat_to_errno(resp->status);
 out_unblock:
-	nlmclnt_finish_block(req);
+	nlmclnt_finish_block(block);
 	/* Cancel the blocked request if it is still pending */
 	if (resp->status == NLM_LCK_BLOCKED)
 		nlmclnt_cancel(host, req->a_args.block, fl);
 out:
-	nlmclnt_release_lockargs(req);
+	nlm_release_call(req);
 	return status;
 }
 
@@ -658,38 +599,31 @@ nlmclnt_unlock(struct nlm_rqst *req, str
 	struct nlm_res	*resp = &req->a_res;
 	int		status;
 
-	/* Clean the GRANTED flag now so the lock doesn't get
-	 * reclaimed while we're stuck in the unlock call. */
-	fl->fl_u.nfs_fl.flags &= ~NFS_LCK_GRANTED;
-
-	if (req->a_flags & RPC_TASK_ASYNC) {
-		status = nlmclnt_async_call(req, NLMPROC_UNLOCK,
-					&nlmclnt_unlock_ops);
-		/* Hrmf... Do the unlock early since locks_remove_posix()
-		 * really expects us to free the lock synchronously */
-		do_vfs_lock(fl);
-		if (status < 0) {
-			nlmclnt_release_lockargs(req);
-			kfree(req);
-		}
-		return status;
-	}
+	/*
+	 * Note: the server is supposed to either grant us the unlock
+	 * request, or to deny it with NLM_LCK_DENIED_GRACE_PERIOD. In either
+	 * case, we want to unlock.
+	 */
+	do_vfs_lock(fl);
+
+	if (req->a_flags & RPC_TASK_ASYNC)
+		return nlm_async_call(req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
 
 	status = nlmclnt_call(req, NLMPROC_UNLOCK);
-	nlmclnt_release_lockargs(req);
 	if (status < 0)
-		return status;
+		goto out;
 
-	do_vfs_lock(fl);
+	status = 0;
 	if (resp->status == NLM_LCK_GRANTED)
-		return 0;
+		goto out;
 
 	if (resp->status != NLM_LCK_DENIED_NOLOCKS)
 		printk("lockd: unexpected unlock status: %d\n", resp->status);
-
 	/* What to do now? I'm out of my depth... */
-
-	return -ENOLCK;
+	status = -ENOLCK;
+out:
+	nlm_release_call(req);
+	return status;
 }
 
 static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
@@ -711,9 +645,6 @@ static void nlmclnt_unlock_callback(stru
 	if (status != NLM_LCK_GRANTED)
 		printk(KERN_WARNING "lockd: unexpected unlock status: %d\n", status);
 die:
-	nlm_release_host(req->a_host);
-	nlmclnt_release_lockargs(req);
-	kfree(req);
 	return;
  retry_rebind:
 	nlm_rebind_host(req->a_host);
@@ -723,6 +654,7 @@ die:
 
 static const struct rpc_call_ops nlmclnt_unlock_ops = {
 	.rpc_call_done = nlmclnt_unlock_callback,
+	.rpc_release = nlmclnt_rpc_release,
 };
 
 /*
@@ -744,20 +676,15 @@ static int nlmclnt_cancel(struct nlm_hos
 	recalc_sigpending();
 	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
-	req = nlmclnt_alloc_call();
+	req = nlm_alloc_call(nlm_get_host(host));
 	if (!req)
 		return -ENOMEM;
-	req->a_host  = host;
 	req->a_flags = RPC_TASK_ASYNC;
 
 	nlmclnt_setlockargs(req, fl);
 	req->a_args.block = block;
 
-	status = nlmclnt_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops);
-	if (status < 0) {
-		nlmclnt_release_lockargs(req);
-		kfree(req);
-	}
+	status = nlm_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops);
 
 	spin_lock_irqsave(&current->sighand->siglock, flags);
 	current->blocked = oldset;
@@ -786,6 +713,7 @@ static void nlmclnt_cancel_callback(stru
 	switch (req->a_res.status) {
 	case NLM_LCK_GRANTED:
 	case NLM_LCK_DENIED_GRACE_PERIOD:
+	case NLM_LCK_DENIED:
 		/* Everything's good */
 		break;
 	case NLM_LCK_DENIED_NOLOCKS:
@@ -797,9 +725,6 @@ static void nlmclnt_cancel_callback(stru
 	}
 
 die:
-	nlm_release_host(req->a_host);
-	nlmclnt_release_lockargs(req);
-	kfree(req);
 	return;
 
 retry_cancel:
@@ -813,6 +738,7 @@ retry_cancel:
 
 static const struct rpc_call_ops nlmclnt_cancel_ops = {
 	.rpc_call_done = nlmclnt_cancel_callback,
+	.rpc_release = nlmclnt_rpc_release,
 };
 
 /*
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 82f7a0b..112ebf8 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -123,6 +123,8 @@ nlm_lookup_host(int server, struct socka
 	nlm_hosts[hash]    = host;
 	INIT_LIST_HEAD(&host->h_lockowners);
 	spin_lock_init(&host->h_lock);
+	INIT_LIST_HEAD(&host->h_granted);
+	INIT_LIST_HEAD(&host->h_reclaim);
 
 	if (++nrhosts > NLM_HOST_MAX)
 		next_gc = 0;
@@ -191,11 +193,12 @@ nlm_bind_host(struct nlm_host *host)
 		xprt->resvport = 1;	/* NLM requires a reserved port */
 
 		/* Existing NLM servers accept AUTH_UNIX only */
-		clnt = rpc_create_client(xprt, host->h_name, &nlm_program,
+		clnt = rpc_new_client(xprt, host->h_name, &nlm_program,
 					host->h_version, RPC_AUTH_UNIX);
 		if (IS_ERR(clnt))
 			goto forgetit;
 		clnt->cl_autobind = 1;	/* turn on pmap queries */
+		clnt->cl_softrtry = 1; /* All queries are soft */
 
 		host->h_rpcclnt = clnt;
 	}
@@ -242,8 +245,12 @@ void nlm_release_host(struct nlm_host *h
 {
 	if (host != NULL) {
 		dprintk("lockd: release host %s\n", host->h_name);
-		atomic_dec(&host->h_count);
 		BUG_ON(atomic_read(&host->h_count) < 0);
+		if (atomic_dec_and_test(&host->h_count)) {
+			BUG_ON(!list_empty(&host->h_lockowners));
+			BUG_ON(!list_empty(&host->h_granted));
+			BUG_ON(!list_empty(&host->h_reclaim));
+		}
 	}
 }
 
@@ -331,7 +338,6 @@ nlm_gc_hosts(void)
 					rpc_destroy_client(host->h_rpcclnt);
 				}
 			}
-			BUG_ON(!list_empty(&host->h_lockowners));
 			kfree(host);
 			nrhosts--;
 		}
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 0edc03e..5dd52b7 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -35,6 +35,10 @@ nsm_mon_unmon(struct nlm_host *host, u32
 	struct rpc_clnt	*clnt;
 	int		status;
 	struct nsm_args	args;
+	struct rpc_message msg = {
+		.rpc_argp	= &args,
+		.rpc_resp	= res,
+	};
 
 	clnt = nsm_create();
 	if (IS_ERR(clnt)) {
@@ -49,7 +53,8 @@ nsm_mon_unmon(struct nlm_host *host, u32
 	args.proc = NLMPROC_NSM_NOTIFY;
 	memset(res, 0, sizeof(*res));
 
-	status = rpc_call(clnt, proc, &args, res, 0);
+	msg.rpc_proc = &clnt->cl_procinfo[proc];
+	status = rpc_call_sync(clnt, &msg, 0);
 	if (status < 0)
 		printk(KERN_DEBUG "nsm_mon_unmon: rpc failed, status=%d\n",
 			status);
@@ -214,12 +219,16 @@ static struct rpc_procinfo	nsm_procedure
 		.p_encode	= (kxdrproc_t) xdr_encode_mon,
 		.p_decode	= (kxdrproc_t) xdr_decode_stat_res,
 		.p_bufsiz	= MAX(SM_mon_sz, SM_monres_sz) << 2,
+		.p_statidx	= SM_MON,
+		.p_name		= "MONITOR",
 	},
 [SM_UNMON] = {
 		.p_proc		= SM_UNMON,
 		.p_encode	= (kxdrproc_t) xdr_encode_unmon,
 		.p_decode	= (kxdrproc_t) xdr_decode_stat,
 		.p_bufsiz	= MAX(SM_mon_id_sz, SM_unmonres_sz) << 2,
+		.p_statidx	= SM_UNMON,
+		.p_name		= "UNMONITOR",
 	},
 };
 
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index b10f913..a2dd9cc 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -21,10 +21,6 @@
 
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 
-static u32	nlm4svc_callback(struct svc_rqst *, u32, struct nlm_res *);
-
-static const struct rpc_call_ops nlm4svc_callback_ops;
-
 /*
  * Obtain client and file from arguments
  */
@@ -234,83 +230,89 @@ nlm4svc_proc_granted(struct svc_rqst *rq
 }
 
 /*
+ * This is the generic lockd callback for async RPC calls
+ */
+static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
+{
+	dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
+			-task->tk_status);
+}
+
+static void nlm4svc_callback_release(void *data)
+{
+	nlm_release_call(data);
+}
+
+static const struct rpc_call_ops nlm4svc_callback_ops = {
+	.rpc_call_done = nlm4svc_callback_exit,
+	.rpc_release = nlm4svc_callback_release,
+};
+
+/*
  * `Async' versions of the above service routines. They aren't really,
  * because we send the callback before the reply proper. I hope this
  * doesn't break any clients.
  */
-static int
-nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static int nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
+		int (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
 {
-	struct nlm_res	res;
-	u32		stat;
+	struct nlm_host	*host;
+	struct nlm_rqst	*call;
+	int stat;
 
-	dprintk("lockd: TEST_MSG      called\n");
-	memset(&res, 0, sizeof(res));
+	host = nlmsvc_lookup_host(rqstp);
+	if (host == NULL)
+		return rpc_system_err;
+
+	call = nlm_alloc_call(host);
+	if (call == NULL)
+		return rpc_system_err;
 
-	if ((stat = nlm4svc_proc_test(rqstp, argp, &res)) == 0)
-		stat = nlm4svc_callback(rqstp, NLMPROC_TEST_RES, &res);
-	return stat;
+	stat = func(rqstp, argp, &call->a_res);
+	if (stat != 0) {
+		nlm_release_call(call);
+		return stat;
+	}
+
+	call->a_flags = RPC_TASK_ASYNC;
+	if (nlm_async_reply(call, proc, &nlm4svc_callback_ops) < 0)
+		return rpc_system_err;
+	return rpc_success;
 }
 
-static int
-nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					     void	     *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
+	dprintk("lockd: TEST_MSG      called\n");
+	return nlm4svc_callback(rqstp, NLMPROC_TEST_RES, argp, nlm4svc_proc_test);
+}
 
+static int nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+					     void	     *resp)
+{
 	dprintk("lockd: LOCK_MSG      called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlm4svc_proc_lock(rqstp, argp, &res)) == 0)
-		stat = nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, &res);
-	return stat;
+	return nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlm4svc_proc_lock);
 }
 
-static int
-nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					       void	       *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
-
 	dprintk("lockd: CANCEL_MSG    called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlm4svc_proc_cancel(rqstp, argp, &res)) == 0)
-		stat = nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, &res);
-	return stat;
+	return nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlm4svc_proc_cancel);
 }
 
-static int
-nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                void            *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
-
 	dprintk("lockd: UNLOCK_MSG    called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlm4svc_proc_unlock(rqstp, argp, &res)) == 0)
-		stat = nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, &res);
-	return stat;
+	return nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlm4svc_proc_unlock);
 }
 
-static int
-nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                 void            *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
-
 	dprintk("lockd: GRANTED_MSG   called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlm4svc_proc_granted(rqstp, argp, &res)) == 0)
-		stat = nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, &res);
-	return stat;
+	return nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlm4svc_proc_granted);
 }
 
 /*
@@ -472,55 +474,6 @@ nlm4svc_proc_granted_res(struct svc_rqst
 
 
 /*
- * This is the generic lockd callback for async RPC calls
- */
-static u32
-nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
-{
-	struct nlm_host	*host;
-	struct nlm_rqst	*call;
-
-	if (!(call = nlmclnt_alloc_call()))
-		return rpc_system_err;
-
-	host = nlmclnt_lookup_host(&rqstp->rq_addr,
-				rqstp->rq_prot, rqstp->rq_vers);
-	if (!host) {
-		kfree(call);
-		return rpc_system_err;
-	}
-
-	call->a_flags = RPC_TASK_ASYNC;
-	call->a_host  = host;
-	memcpy(&call->a_args, resp, sizeof(*resp));
-
-	if (nlmsvc_async_call(call, proc, &nlm4svc_callback_ops) < 0)
-		goto error;
-
-	return rpc_success;
- error:
-	kfree(call);
-	nlm_release_host(host);
-	return rpc_system_err;
-}
-
-static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
-{
-	struct nlm_rqst	*call = data;
-
-	if (task->tk_status < 0) {
-		dprintk("lockd: %4d callback failed (errno = %d)\n",
-					task->tk_pid, -task->tk_status);
-	}
-	nlm_release_host(call->a_host);
-	kfree(call);
-}
-
-static const struct rpc_call_ops nlm4svc_callback_ops = {
-	.rpc_call_done = nlm4svc_callback_exit,
-};
-
-/*
  * NLM Server procedures.
  */
 
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 9cfced6..1432d52 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -39,9 +39,12 @@
 #define nlm_deadlock	nlm_lck_denied
 #endif
 
+static void nlmsvc_release_block(struct nlm_block *block);
 static void	nlmsvc_insert_block(struct nlm_block *block, unsigned long);
 static int	nlmsvc_remove_block(struct nlm_block *block);
 
+static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock);
+static void nlmsvc_freegrantargs(struct nlm_rqst *call);
 static const struct rpc_call_ops nlmsvc_grant_ops;
 
 /*
@@ -58,6 +61,7 @@ nlmsvc_insert_block(struct nlm_block *bl
 	struct nlm_block **bp, *b;
 
 	dprintk("lockd: nlmsvc_insert_block(%p, %ld)\n", block, when);
+	kref_get(&block->b_count);
 	if (block->b_queued)
 		nlmsvc_remove_block(block);
 	bp = &nlm_blocked;
@@ -90,6 +94,7 @@ nlmsvc_remove_block(struct nlm_block *bl
 		if (b == block) {
 			*bp = block->b_next;
 			block->b_queued = 0;
+			nlmsvc_release_block(block);
 			return 1;
 		}
 	}
@@ -112,17 +117,18 @@ nlmsvc_lookup_block(struct nlm_file *fil
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end, lock->fl.fl_type);
 	for (head = &nlm_blocked; (block = *head) != 0; head = &block->b_next) {
-		fl = &block->b_call.a_args.lock.fl;
+		fl = &block->b_call->a_args.lock.fl;
 		dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%s\n",
 				block->b_file, fl->fl_pid,
 				(long long)fl->fl_start,
 				(long long)fl->fl_end, fl->fl_type,
-				nlmdbg_cookie2a(&block->b_call.a_args.cookie));
+				nlmdbg_cookie2a(&block->b_call->a_args.cookie));
 		if (block->b_file == file && nlm_compare_locks(fl, &lock->fl)) {
 			if (remove) {
 				*head = block->b_next;
 				block->b_queued = 0;
 			}
+			kref_get(&block->b_count);
 			return block;
 		}
 	}
@@ -150,11 +156,13 @@ nlmsvc_find_block(struct nlm_cookie *coo
 	for (block = nlm_blocked; block; block = block->b_next) {
 		dprintk("cookie: head of blocked queue %p, block %p\n", 
 			nlm_blocked, block);
-		if (nlm_cookie_match(&block->b_call.a_args.cookie,cookie)
+		if (nlm_cookie_match(&block->b_call->a_args.cookie,cookie)
 				&& nlm_cmp_addr(sin, &block->b_host->h_addr))
 			break;
 	}
 
+	if (block != NULL) 
+		kref_get(&block->b_count);
 	return block;
 }
 
@@ -174,27 +182,30 @@ nlmsvc_create_block(struct svc_rqst *rqs
 {
 	struct nlm_block	*block;
 	struct nlm_host		*host;
-	struct nlm_rqst		*call;
+	struct nlm_rqst		*call = NULL;
 
 	/* Create host handle for callback */
-	host = nlmclnt_lookup_host(&rqstp->rq_addr,
-				rqstp->rq_prot, rqstp->rq_vers);
+	host = nlmsvc_lookup_host(rqstp);
 	if (host == NULL)
 		return NULL;
 
+	call = nlm_alloc_call(host);
+	if (call == NULL)
+		return NULL;
+
 	/* Allocate memory for block, and initialize arguments */
-	if (!(block = (struct nlm_block *) kmalloc(sizeof(*block), GFP_KERNEL)))
+	block = kzalloc(sizeof(*block), GFP_KERNEL);
+	if (block == NULL)
 		goto failed;
-	memset(block, 0, sizeof(*block));
-	locks_init_lock(&block->b_call.a_args.lock.fl);
-	locks_init_lock(&block->b_call.a_res.lock.fl);
+	kref_init(&block->b_count);
 
-	if (!nlmclnt_setgrantargs(&block->b_call, lock))
+	if (!nlmsvc_setgrantargs(call, lock))
 		goto failed_free;
 
 	/* Set notifier function for VFS, and init args */
-	block->b_call.a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations;
-	block->b_call.a_args.cookie = *cookie;	/* see above */
+	call->a_args.lock.fl.fl_flags |= FL_SLEEP;
+	call->a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations;
+	call->a_args.cookie = *cookie;	/* see above */
 
 	dprintk("lockd: created block %p...\n", block);
 
@@ -208,16 +219,16 @@ nlmsvc_create_block(struct svc_rqst *rqs
 	file->f_blocks  = block;
 
 	/* Set up RPC arguments for callback */
-	call = &block->b_call;
-	call->a_host    = host;
+	block->b_call = call;
 	call->a_flags   = RPC_TASK_ASYNC;
+	call->a_block = block;
 
 	return block;
 
 failed_free:
 	kfree(block);
 failed:
-	nlm_release_host(host);
+	nlm_release_call(call);
 	return NULL;
 }
 
@@ -227,28 +238,24 @@ failed:
  * It is the caller's responsibility to check whether the file
  * can be closed hereafter.
  */
-static int
-nlmsvc_delete_block(struct nlm_block *block, int unlock)
+static int nlmsvc_unlink_block(struct nlm_block *block)
 {
-	struct file_lock	*fl = &block->b_call.a_args.lock.fl;
-	struct nlm_file		*file = block->b_file;
-	struct nlm_block	**bp;
-	int status = 0;
-
-	dprintk("lockd: deleting block %p...\n", block);
+	int status;
+	dprintk("lockd: unlinking block %p...\n", block);
 
 	/* Remove block from list */
+	status = posix_unblock_lock(block->b_file->f_file, &block->b_call->a_args.lock.fl);
 	nlmsvc_remove_block(block);
-	if (unlock)
-		status = posix_unblock_lock(file->f_file, fl);
+	return status;
+}
 
-	/* If the block is in the middle of a GRANT callback,
-	 * don't kill it yet. */
-	if (block->b_incall) {
-		nlmsvc_insert_block(block, NLM_NEVER);
-		block->b_done = 1;
-		return status;
-	}
+static void nlmsvc_free_block(struct kref *kref)
+{
+	struct nlm_block *block = container_of(kref, struct nlm_block, b_count);
+	struct nlm_file		*file = block->b_file;
+	struct nlm_block	**bp;
+
+	dprintk("lockd: freeing block %p...\n", block);
 
 	/* Remove block from file's list of blocks */
 	for (bp = &file->f_blocks; *bp; bp = &(*bp)->b_fnext) {
@@ -258,11 +265,15 @@ nlmsvc_delete_block(struct nlm_block *bl
 		}
 	}
 
-	if (block->b_host)
-		nlm_release_host(block->b_host);
-	nlmclnt_freegrantargs(&block->b_call);
+	nlmsvc_freegrantargs(block->b_call);
+	nlm_release_call(block->b_call);
 	kfree(block);
-	return status;
+}
+
+static void nlmsvc_release_block(struct nlm_block *block)
+{
+	if (block != NULL)
+		kref_put(&block->b_count, nlmsvc_free_block);
 }
 
 /*
@@ -282,7 +293,7 @@ nlmsvc_traverse_blocks(struct nlm_host *
 			block->b_host->h_inuse = 1;
 		else if (action == NLM_ACT_UNLOCK) {
 			if (host == NULL || host == block->b_host)
-				nlmsvc_delete_block(block, 1);
+				nlmsvc_unlink_block(block);
 		}
 	}
 	up(&file->f_sema);
@@ -290,6 +301,38 @@ nlmsvc_traverse_blocks(struct nlm_host *
 }
 
 /*
+ * Initialize arguments for GRANTED call. The nlm_rqst structure
+ * has been cleared already.
+ */
+static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
+{
+	locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
+	memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
+	call->a_args.lock.caller = system_utsname.nodename;
+	call->a_args.lock.oh.len = lock->oh.len;
+
+	/* set default data area */
+	call->a_args.lock.oh.data = call->a_owner;
+	call->a_args.lock.svid = lock->fl.fl_pid;
+
+	if (lock->oh.len > NLMCLNT_OHSIZE) {
+		void *data = kmalloc(lock->oh.len, GFP_KERNEL);
+		if (!data)
+			return 0;
+		call->a_args.lock.oh.data = (u8 *) data;
+	}
+
+	memcpy(call->a_args.lock.oh.data, lock->oh.data, lock->oh.len);
+	return 1;
+}
+
+static void nlmsvc_freegrantargs(struct nlm_rqst *call)
+{
+	if (call->a_args.lock.oh.data != call->a_owner)
+		kfree(call->a_args.lock.oh.data);
+}
+
+/*
  * Attempt to establish a lock, and if it can't be granted, block it
  * if required.
  */
@@ -297,9 +340,9 @@ u32
 nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 			struct nlm_lock *lock, int wait, struct nlm_cookie *cookie)
 {
-	struct file_lock	*conflock;
-	struct nlm_block	*block;
+	struct nlm_block	*block, *newblock = NULL;
 	int			error;
+	u32			ret;
 
 	dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
 				file->f_file->f_dentry->d_inode->i_sb->s_id,
@@ -310,69 +353,65 @@ nlmsvc_lock(struct svc_rqst *rqstp, stru
 				wait);
 
 
-	/* Get existing block (in case client is busy-waiting) */
-	block = nlmsvc_lookup_block(file, lock, 0);
-
-	lock->fl.fl_flags |= FL_LOCKD;
-
+	lock->fl.fl_flags &= ~FL_SLEEP;
 again:
 	/* Lock file against concurrent access */
 	down(&file->f_sema);
+	/* Get existing block (in case client is busy-waiting) */
+	block = nlmsvc_lookup_block(file, lock, 0);
+	if (block == NULL) {
+		if (newblock != NULL)
+			lock = &newblock->b_call->a_args.lock;
+	} else
+		lock = &block->b_call->a_args.lock;
 
-	if (!(conflock = posix_test_lock(file->f_file, &lock->fl))) {
-		error = posix_lock_file(file->f_file, &lock->fl);
+	error = posix_lock_file(file->f_file, &lock->fl);
+	lock->fl.fl_flags &= ~FL_SLEEP;
 
-		if (block)
-			nlmsvc_delete_block(block, 0);
-		up(&file->f_sema);
+	dprintk("lockd: posix_lock_file returned %d\n", error);
 
-		dprintk("lockd: posix_lock_file returned %d\n", -error);
-		switch(-error) {
+	switch(error) {
 		case 0:
-			return nlm_granted;
-		case EDEADLK:
-			return nlm_deadlock;
-		case EAGAIN:
-			return nlm_lck_denied;
+			ret = nlm_granted;
+			goto out;
+		case -EAGAIN:
+			break;
+		case -EDEADLK:
+			ret = nlm_deadlock;
+			goto out;
 		default:			/* includes ENOLCK */
-			return nlm_lck_denied_nolocks;
-		}
+			ret = nlm_lck_denied_nolocks;
+			goto out;
 	}
 
-	if (!wait) {
-		up(&file->f_sema);
-		return nlm_lck_denied;
-	}
+	ret = nlm_lck_denied;
+	if (!wait)
+		goto out;
 
-	if (posix_locks_deadlock(&lock->fl, conflock)) {
-		up(&file->f_sema);
-		return nlm_deadlock;
-	}
+	ret = nlm_lck_blocked;
+	if (block != NULL)
+		goto out;
 
 	/* If we don't have a block, create and initialize it. Then
 	 * retry because we may have slept in kmalloc. */
 	/* We have to release f_sema as nlmsvc_create_block may try to
 	 * to claim it while doing host garbage collection */
-	if (block == NULL) {
+	if (newblock == NULL) {
 		up(&file->f_sema);
 		dprintk("lockd: blocking on this lock (allocating).\n");
-		if (!(block = nlmsvc_create_block(rqstp, file, lock, cookie)))
+		if (!(newblock = nlmsvc_create_block(rqstp, file, lock, cookie)))
 			return nlm_lck_denied_nolocks;
 		goto again;
 	}
 
 	/* Append to list of blocked */
-	nlmsvc_insert_block(block, NLM_NEVER);
-
-	if (list_empty(&block->b_call.a_args.lock.fl.fl_block)) {
-		/* Now add block to block list of the conflicting lock
-		   if we haven't done so. */
-		dprintk("lockd: blocking on this lock.\n");
-		posix_block_lock(conflock, &block->b_call.a_args.lock.fl);
-	}
-
+	nlmsvc_insert_block(newblock, NLM_NEVER);
+out:
 	up(&file->f_sema);
-	return nlm_lck_blocked;
+	nlmsvc_release_block(newblock);
+	nlmsvc_release_block(block);
+	dprintk("lockd: nlmsvc_lock returned %u\n", ret);
+	return ret;
 }
 
 /*
@@ -382,8 +421,6 @@ u32
 nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
 				       struct nlm_lock *conflock)
 {
-	struct file_lock	*fl;
-
 	dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
 				file->f_file->f_dentry->d_inode->i_sb->s_id,
 				file->f_file->f_dentry->d_inode->i_ino,
@@ -391,13 +428,14 @@ nlmsvc_testlock(struct nlm_file *file, s
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end);
 
-	if ((fl = posix_test_lock(file->f_file, &lock->fl)) != NULL) {
+	if (posix_test_lock(file->f_file, &lock->fl, &conflock->fl)) {
 		dprintk("lockd: conflicting lock(ty=%d, %Ld-%Ld)\n",
-				fl->fl_type, (long long)fl->fl_start,
-				(long long)fl->fl_end);
+				conflock->fl.fl_type,
+				(long long)conflock->fl.fl_start,
+				(long long)conflock->fl.fl_end);
 		conflock->caller = "somehost";	/* FIXME */
 		conflock->oh.len = 0;		/* don't return OH info */
-		conflock->fl = *fl;
+		conflock->svid = conflock->fl.fl_pid;
 		return nlm_lck_denied;
 	}
 
@@ -453,8 +491,10 @@ nlmsvc_cancel_blocked(struct nlm_file *f
 				(long long)lock->fl.fl_end);
 
 	down(&file->f_sema);
-	if ((block = nlmsvc_lookup_block(file, lock, 1)) != NULL)
-		status = nlmsvc_delete_block(block, 1);
+	if ((block = nlmsvc_lookup_block(file, lock, 1)) != NULL) {
+		status = nlmsvc_unlink_block(block);
+		nlmsvc_release_block(block);
+	}
 	up(&file->f_sema);
 	return status ? nlm_lck_denied : nlm_granted;
 }
@@ -473,7 +513,7 @@ nlmsvc_notify_blocked(struct file_lock *
 
 	dprintk("lockd: VFS unblock notification for block %p\n", fl);
 	for (bp = &nlm_blocked; (block = *bp) != 0; bp = &block->b_next) {
-		if (nlm_compare_locks(&block->b_call.a_args.lock.fl, fl)) {
+		if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) {
 			nlmsvc_insert_block(block, 0);
 			svc_wake_up(block->b_daemon);
 			return;
@@ -508,8 +548,7 @@ static void
 nlmsvc_grant_blocked(struct nlm_block *block)
 {
 	struct nlm_file		*file = block->b_file;
-	struct nlm_lock		*lock = &block->b_call.a_args.lock;
-	struct file_lock	*conflock;
+	struct nlm_lock		*lock = &block->b_call->a_args.lock;
 	int			error;
 
 	dprintk("lockd: grant blocked lock %p\n", block);
@@ -518,7 +557,7 @@ nlmsvc_grant_blocked(struct nlm_block *b
 	down(&file->f_sema);
 
 	/* Unlink block request from list */
-	nlmsvc_remove_block(block);
+	nlmsvc_unlink_block(block);
 
 	/* If b_granted is true this means we've been here before.
 	 * Just retry the grant callback, possibly refreshing the RPC
@@ -529,41 +568,38 @@ nlmsvc_grant_blocked(struct nlm_block *b
 	}
 
 	/* Try the lock operation again */
-	if ((conflock = posix_test_lock(file->f_file, &lock->fl)) != NULL) {
-		/* Bummer, we blocked again */
+	lock->fl.fl_flags |= FL_SLEEP;
+	error = posix_lock_file(file->f_file, &lock->fl);
+	lock->fl.fl_flags &= ~FL_SLEEP;
+
+	switch (error) {
+	case 0:
+		break;
+	case -EAGAIN:
 		dprintk("lockd: lock still blocked\n");
 		nlmsvc_insert_block(block, NLM_NEVER);
-		posix_block_lock(conflock, &lock->fl);
-		up(&file->f_sema);
-		return;
-	}
-
-	/* Alright, no conflicting lock. Now lock it for real. If the
-	 * following yields an error, this is most probably due to low
-	 * memory. Retry the lock in a few seconds.
-	 */
-	if ((error = posix_lock_file(file->f_file, &lock->fl)) < 0) {
+		goto out_unlock;
+	default:
 		printk(KERN_WARNING "lockd: unexpected error %d in %s!\n",
 				-error, __FUNCTION__);
 		nlmsvc_insert_block(block, 10 * HZ);
-		up(&file->f_sema);
-		return;
+		goto out_unlock;
 	}
 
 callback:
 	/* Lock was granted by VFS. */
 	dprintk("lockd: GRANTing blocked lock.\n");
 	block->b_granted = 1;
-	block->b_incall  = 1;
 
 	/* Schedule next grant callback in 30 seconds */
 	nlmsvc_insert_block(block, 30 * HZ);
 
 	/* Call the client */
-	nlm_get_host(block->b_call.a_host);
-	if (nlmsvc_async_call(&block->b_call, NLMPROC_GRANTED_MSG,
+	kref_get(&block->b_count);
+	if (nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG,
 						&nlmsvc_grant_ops) < 0)
-		nlm_release_host(block->b_call.a_host);
+		nlmsvc_release_block(block);
+out_unlock:
 	up(&file->f_sema);
 }
 
@@ -578,20 +614,10 @@ callback:
 static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
 {
 	struct nlm_rqst		*call = data;
-	struct nlm_block	*block;
+	struct nlm_block	*block = call->a_block;
 	unsigned long		timeout;
-	struct sockaddr_in	*peer_addr = RPC_PEERADDR(task->tk_client);
 
 	dprintk("lockd: GRANT_MSG RPC callback\n");
-	dprintk("callback: looking for cookie %s, host (%u.%u.%u.%u)\n",
-		nlmdbg_cookie2a(&call->a_args.cookie),
-		NIPQUAD(peer_addr->sin_addr.s_addr));
-	if (!(block = nlmsvc_find_block(&call->a_args.cookie, peer_addr))) {
-		dprintk("lockd: no block for cookie %s, host (%u.%u.%u.%u)\n",
-			nlmdbg_cookie2a(&call->a_args.cookie),
-			NIPQUAD(peer_addr->sin_addr.s_addr));
-		return;
-	}
 
 	/* Technically, we should down the file semaphore here. Since we
 	 * move the block towards the head of the queue only, no harm
@@ -608,13 +634,16 @@ static void nlmsvc_grant_callback(struct
 	}
 	nlmsvc_insert_block(block, timeout);
 	svc_wake_up(block->b_daemon);
-	block->b_incall = 0;
+}
 
-	nlm_release_host(call->a_host);
+void nlmsvc_grant_release(void *data)
+{
+	nlmsvc_release_block(data);
 }
 
 static const struct rpc_call_ops nlmsvc_grant_ops = {
 	.rpc_call_done = nlmsvc_grant_callback,
+	.rpc_release = nlmsvc_grant_release,
 };
 
 /*
@@ -636,23 +665,19 @@ nlmsvc_grant_reply(struct svc_rqst *rqst
 
 	file->f_count++;
 	down(&file->f_sema);
-	block = nlmsvc_find_block(cookie, &rqstp->rq_addr);
 	if (block) {
 		if (status == NLM_LCK_DENIED_GRACE_PERIOD) {
 			/* Try again in a couple of seconds */
 			nlmsvc_insert_block(block, 10 * HZ);
-			up(&file->f_sema);
 		} else {
 			/* Lock is now held by client, or has been rejected.
 			 * In both cases, the block should be removed. */
-			up(&file->f_sema);
-			if (status == NLM_LCK_GRANTED)
-				nlmsvc_delete_block(block, 0);
-			else
-				nlmsvc_delete_block(block, 1);
+			nlmsvc_unlink_block(block);
 		}
 	}
+	up(&file->f_sema);
 	nlm_release_file(file);
+	nlmsvc_release_block(block);
 }
 
 /*
@@ -675,10 +700,12 @@ nlmsvc_retry_blocked(void)
 			break;
 		dprintk("nlmsvc_retry_blocked(%p, when=%ld, done=%d)\n",
 			block, block->b_when, block->b_done);
+		kref_get(&block->b_count);
 		if (block->b_done)
-			nlmsvc_delete_block(block, 0);
+			nlmsvc_unlink_block(block);
 		else
 			nlmsvc_grant_blocked(block);
+		nlmsvc_release_block(block);
 	}
 
 	if ((block = nlm_blocked) && block->b_when != NLM_NEVER)
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 35681d9..d210cf3 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -22,10 +22,6 @@
 
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 
-static u32	nlmsvc_callback(struct svc_rqst *, u32, struct nlm_res *);
-
-static const struct rpc_call_ops nlmsvc_callback_ops;
-
 #ifdef CONFIG_LOCKD_V4
 static u32
 cast_to_nlm(u32 status, u32 vers)
@@ -262,83 +258,91 @@ nlmsvc_proc_granted(struct svc_rqst *rqs
 }
 
 /*
+ * This is the generic lockd callback for async RPC calls
+ */
+static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
+{
+	dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
+			-task->tk_status);
+}
+
+static void nlmsvc_callback_release(void *data)
+{
+	nlm_release_call(data);
+}
+
+static const struct rpc_call_ops nlmsvc_callback_ops = {
+	.rpc_call_done = nlmsvc_callback_exit,
+	.rpc_release = nlmsvc_callback_release,
+};
+
+/*
  * `Async' versions of the above service routines. They aren't really,
  * because we send the callback before the reply proper. I hope this
  * doesn't break any clients.
  */
-static int
-nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-					     void	     *resp)
+static int nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
+		int (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
 {
-	struct nlm_res	res;
-	u32		stat;
+	struct nlm_host	*host;
+	struct nlm_rqst	*call;
+	int stat;
 
-	dprintk("lockd: TEST_MSG      called\n");
-	memset(&res, 0, sizeof(res));
+	host = nlmsvc_lookup_host(rqstp);
+	if (host == NULL)
+		return rpc_system_err;
 
-	if ((stat = nlmsvc_proc_test(rqstp, argp, &res)) == 0)
-		stat = nlmsvc_callback(rqstp, NLMPROC_TEST_RES, &res);
-	return stat;
+	call = nlm_alloc_call(host);
+	if (call == NULL)
+		return rpc_system_err;
+
+	stat = func(rqstp, argp, &call->a_res);
+	if (stat != 0) {
+		nlm_release_call(call);
+		return stat;
+	}
+
+	call->a_flags = RPC_TASK_ASYNC;
+	if (nlm_async_reply(call, proc, &nlmsvc_callback_ops) < 0)
+		return rpc_system_err;
+	return rpc_success;
 }
 
-static int
-nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					     void	     *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
+	dprintk("lockd: TEST_MSG      called\n");
+	return nlmsvc_callback(rqstp, NLMPROC_TEST_RES, argp, nlmsvc_proc_test);
+}
 
+static int nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+					     void	     *resp)
+{
 	dprintk("lockd: LOCK_MSG      called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlmsvc_proc_lock(rqstp, argp, &res)) == 0)
-		stat = nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, &res);
-	return stat;
+	return nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlmsvc_proc_lock);
 }
 
-static int
-nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					       void	       *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
-
 	dprintk("lockd: CANCEL_MSG    called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlmsvc_proc_cancel(rqstp, argp, &res)) == 0)
-		stat = nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, &res);
-	return stat;
+	return nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlmsvc_proc_cancel);
 }
 
 static int
 nlmsvc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                void            *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
-
 	dprintk("lockd: UNLOCK_MSG    called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlmsvc_proc_unlock(rqstp, argp, &res)) == 0)
-		stat = nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, &res);
-	return stat;
+	return nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlmsvc_proc_unlock);
 }
 
 static int
 nlmsvc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                 void            *resp)
 {
-	struct nlm_res	res;
-	u32		stat;
-
 	dprintk("lockd: GRANTED_MSG   called\n");
-	memset(&res, 0, sizeof(res));
-
-	if ((stat = nlmsvc_proc_granted(rqstp, argp, &res)) == 0)
-		stat = nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, &res);
-	return stat;
+	return nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlmsvc_proc_granted);
 }
 
 /*
@@ -497,55 +501,6 @@ nlmsvc_proc_granted_res(struct svc_rqst 
 }
 
 /*
- * This is the generic lockd callback for async RPC calls
- */
-static u32
-nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
-{
-	struct nlm_host	*host;
-	struct nlm_rqst	*call;
-
-	if (!(call = nlmclnt_alloc_call()))
-		return rpc_system_err;
-
-	host = nlmclnt_lookup_host(&rqstp->rq_addr,
-				rqstp->rq_prot, rqstp->rq_vers);
-	if (!host) {
-		kfree(call);
-		return rpc_system_err;
-	}
-
-	call->a_flags = RPC_TASK_ASYNC;
-	call->a_host  = host;
-	memcpy(&call->a_args, resp, sizeof(*resp));
-
-	if (nlmsvc_async_call(call, proc, &nlmsvc_callback_ops) < 0)
-		goto error;
-
-	return rpc_success;
- error:
-	nlm_release_host(host);
-	kfree(call);
-	return rpc_system_err;
-}
-
-static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
-{
-	struct nlm_rqst	*call = data;
-
-	if (task->tk_status < 0) {
-		dprintk("lockd: %4d callback failed (errno = %d)\n",
-					task->tk_pid, -task->tk_status);
-	}
-	nlm_release_host(call->a_host);
-	kfree(call);
-}
-
-static const struct rpc_call_ops nlmsvc_callback_ops = {
-	.rpc_call_done = nlmsvc_callback_exit,
-};
-
-/*
  * NLM Server procedures.
  */
 
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 62f4a38..601e5b3 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -182,7 +182,7 @@ nlm_traverse_locks(struct nlm_host *host
 again:
 	file->f_locks = 0;
 	for (fl = inode->i_flock; fl; fl = fl->fl_next) {
-		if (!(fl->fl_flags & FL_LOCKD))
+		if (fl->fl_lmops != &nlmsvc_lock_operations)
 			continue;
 
 		/* update current lock count */
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 200fbda..766ce06 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -131,10 +131,11 @@ nlm_decode_lock(u32 *p, struct nlm_lock 
 	 || !(p = nlm_decode_fh(p, &lock->fh))
 	 || !(p = nlm_decode_oh(p, &lock->oh)))
 		return NULL;
+	lock->svid  = ntohl(*p++);
 
 	locks_init_lock(fl);
 	fl->fl_owner = current->files;
-	fl->fl_pid   = ntohl(*p++);
+	fl->fl_pid   = (pid_t)lock->svid;
 	fl->fl_flags = FL_POSIX;
 	fl->fl_type  = F_RDLCK;		/* as good as anything else */
 	start = ntohl(*p++);
@@ -174,7 +175,7 @@ nlm_encode_lock(u32 *p, struct nlm_lock 
 	else
 		len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1);
 
-	*p++ = htonl(fl->fl_pid);
+	*p++ = htonl(lock->svid);
 	*p++ = htonl(start);
 	*p++ = htonl(len);
 
@@ -197,7 +198,7 @@ nlm_encode_testres(u32 *p, struct nlm_re
 		struct file_lock	*fl = &resp->lock.fl;
 
 		*p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one;
-		*p++ = htonl(fl->fl_pid);
+		*p++ = htonl(resp->lock.svid);
 
 		/* Encode owner handle. */
 		if (!(p = xdr_encode_netobj(p, &resp->lock.oh)))
@@ -298,7 +299,8 @@ nlmsvc_decode_shareargs(struct svc_rqst 
 
 	memset(lock, 0, sizeof(*lock));
 	locks_init_lock(&lock->fl);
-	lock->fl.fl_pid = ~(u32) 0;
+	lock->svid = ~(u32) 0;
+	lock->fl.fl_pid = (pid_t)lock->svid;
 
 	if (!(p = nlm_decode_cookie(p, &argp->cookie))
 	 || !(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -415,7 +417,8 @@ nlmclt_decode_testres(struct rpc_rqst *r
 		memset(&resp->lock, 0, sizeof(resp->lock));
 		locks_init_lock(fl);
 		excl = ntohl(*p++);
-		fl->fl_pid = ntohl(*p++);
+		resp->lock.svid = ntohl(*p++);
+		fl->fl_pid = (pid_t)resp->lock.svid;
 		if (!(p = nlm_decode_oh(p, &resp->lock.oh)))
 			return -EIO;
 
@@ -543,7 +546,9 @@ nlmclt_decode_res(struct rpc_rqst *req, 
 	.p_proc      = NLMPROC_##proc,					\
 	.p_encode    = (kxdrproc_t) nlmclt_encode_##argtype,		\
 	.p_decode    = (kxdrproc_t) nlmclt_decode_##restype,		\
-	.p_bufsiz    = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2	\
+	.p_bufsiz    = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2,	\
+	.p_statidx   = NLMPROC_##proc,					\
+	.p_name      = #proc,						\
 	}
 
 static struct rpc_procinfo	nlm_procedures[] = {
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index fdcf105..36eb175 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -130,10 +130,11 @@ nlm4_decode_lock(u32 *p, struct nlm_lock
 	 || !(p = nlm4_decode_fh(p, &lock->fh))
 	 || !(p = nlm4_decode_oh(p, &lock->oh)))
 		return NULL;
+	lock->svid  = ntohl(*p++);
 
 	locks_init_lock(fl);
 	fl->fl_owner = current->files;
-	fl->fl_pid   = ntohl(*p++);
+	fl->fl_pid   = (pid_t)lock->svid;
 	fl->fl_flags = FL_POSIX;
 	fl->fl_type  = F_RDLCK;		/* as good as anything else */
 	p = xdr_decode_hyper(p, &start);
@@ -167,7 +168,7 @@ nlm4_encode_lock(u32 *p, struct nlm_lock
 	 || (fl->fl_end > NLM4_OFFSET_MAX && fl->fl_end != OFFSET_MAX))
 		return NULL;
 
-	*p++ = htonl(fl->fl_pid);
+	*p++ = htonl(lock->svid);
 
 	start = loff_t_to_s64(fl->fl_start);
 	if (fl->fl_end == OFFSET_MAX)
@@ -198,7 +199,7 @@ nlm4_encode_testres(u32 *p, struct nlm_r
 		struct file_lock	*fl = &resp->lock.fl;
 
 		*p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one;
-		*p++ = htonl(fl->fl_pid);
+		*p++ = htonl(resp->lock.svid);
 
 		/* Encode owner handle. */
 		if (!(p = xdr_encode_netobj(p, &resp->lock.oh)))
@@ -212,8 +213,8 @@ nlm4_encode_testres(u32 *p, struct nlm_r
 		
 		p = xdr_encode_hyper(p, start);
 		p = xdr_encode_hyper(p, len);
-		dprintk("xdr: encode_testres (status %d pid %d type %d start %Ld end %Ld)\n",
-			resp->status, fl->fl_pid, fl->fl_type,
+		dprintk("xdr: encode_testres (status %u pid %d type %d start %Ld end %Ld)\n",
+			resp->status, (int)resp->lock.svid, fl->fl_type,
 			(long long)fl->fl_start,  (long long)fl->fl_end);
 	}
 
@@ -303,7 +304,8 @@ nlm4svc_decode_shareargs(struct svc_rqst
 
 	memset(lock, 0, sizeof(*lock));
 	locks_init_lock(&lock->fl);
-	lock->fl.fl_pid = ~(u32) 0;
+	lock->svid = ~(u32) 0;
+	lock->fl.fl_pid = (pid_t)lock->svid;
 
 	if (!(p = nlm4_decode_cookie(p, &argp->cookie))
 	 || !(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -420,7 +422,8 @@ nlm4clt_decode_testres(struct rpc_rqst *
 		memset(&resp->lock, 0, sizeof(resp->lock));
 		locks_init_lock(fl);
 		excl = ntohl(*p++);
-		fl->fl_pid = ntohl(*p++);
+		resp->lock.svid = ntohl(*p++);
+		fl->fl_pid = (pid_t)resp->lock.svid;
 		if (!(p = nlm4_decode_oh(p, &resp->lock.oh)))
 			return -EIO;
 
@@ -548,7 +551,9 @@ nlm4clt_decode_res(struct rpc_rqst *req,
 	.p_proc      = NLMPROC_##proc,					\
 	.p_encode    = (kxdrproc_t) nlm4clt_encode_##argtype,		\
 	.p_decode    = (kxdrproc_t) nlm4clt_decode_##restype,		\
-	.p_bufsiz    = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2	\
+	.p_bufsiz    = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2,	\
+	.p_statidx   = NLMPROC_##proc,					\
+	.p_name      = #proc,						\
 	}
 
 static struct rpc_procinfo	nlm4_procedures[] = {
diff --git a/fs/locks.c b/fs/locks.c
index 909eab8..56f996e 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -139,10 +139,7 @@ int lease_break_time = 45;
 #define for_each_lock(inode, lockp) \
 	for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next)
 
-LIST_HEAD(file_lock_list);
-
-EXPORT_SYMBOL(file_lock_list);
-
+static LIST_HEAD(file_lock_list);
 static LIST_HEAD(blocked_list);
 
 static kmem_cache_t *filelock_cache;
@@ -153,6 +150,21 @@ static struct file_lock *locks_alloc_loc
 	return kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
 }
 
+static void locks_release_private(struct file_lock *fl)
+{
+	if (fl->fl_ops) {
+		if (fl->fl_ops->fl_release_private)
+			fl->fl_ops->fl_release_private(fl);
+		fl->fl_ops = NULL;
+	}
+	if (fl->fl_lmops) {
+		if (fl->fl_lmops->fl_release_private)
+			fl->fl_lmops->fl_release_private(fl);
+		fl->fl_lmops = NULL;
+	}
+
+}
+
 /* Free a lock which is not in use. */
 static void locks_free_lock(struct file_lock *fl)
 {
@@ -169,18 +181,7 @@ static void locks_free_lock(struct file_
 	if (!list_empty(&fl->fl_link))
 		panic("Attempting to free lock on active lock list");
 
-	if (fl->fl_ops) {
-		if (fl->fl_ops->fl_release_private)
-			fl->fl_ops->fl_release_private(fl);
-		fl->fl_ops = NULL;
-	}
-
-	if (fl->fl_lmops) {
-		if (fl->fl_lmops->fl_release_private)
-			fl->fl_lmops->fl_release_private(fl);
-		fl->fl_lmops = NULL;
-	}
-
+	locks_release_private(fl);
 	kmem_cache_free(filelock_cache, fl);
 }
 
@@ -218,24 +219,46 @@ static void init_once(void *foo, kmem_ca
 	locks_init_lock(lock);
 }
 
+static void locks_copy_private(struct file_lock *new, struct file_lock *fl)
+{
+	if (fl->fl_ops) {
+		if (fl->fl_ops->fl_copy_lock)
+			fl->fl_ops->fl_copy_lock(new, fl);
+		new->fl_ops = fl->fl_ops;
+	}
+	if (fl->fl_lmops) {
+		if (fl->fl_lmops->fl_copy_lock)
+			fl->fl_lmops->fl_copy_lock(new, fl);
+		new->fl_lmops = fl->fl_lmops;
+	}
+}
+
 /*
  * Initialize a new lock from an existing file_lock structure.
  */
-void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
+static void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl)
 {
 	new->fl_owner = fl->fl_owner;
 	new->fl_pid = fl->fl_pid;
-	new->fl_file = fl->fl_file;
+	new->fl_file = NULL;
 	new->fl_flags = fl->fl_flags;
 	new->fl_type = fl->fl_type;
 	new->fl_start = fl->fl_start;
 	new->fl_end = fl->fl_end;
+	new->fl_ops = NULL;
+	new->fl_lmops = NULL;
+}
+
+void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
+{
+	locks_release_private(new);
+
+	__locks_copy_lock(new, fl);
+	new->fl_file = fl->fl_file;
 	new->fl_ops = fl->fl_ops;
 	new->fl_lmops = fl->fl_lmops;
-	if (fl->fl_ops && fl->fl_ops->fl_copy_lock)
-		fl->fl_ops->fl_copy_lock(new, fl);
-	if (fl->fl_lmops && fl->fl_lmops->fl_copy_lock)
-		fl->fl_lmops->fl_copy_lock(new, fl);
+
+	locks_copy_private(new, fl);
 }
 
 EXPORT_SYMBOL(locks_copy_lock);
@@ -654,8 +677,9 @@ static int locks_block_on_timeout(struct
 	return result;
 }
 
-struct file_lock *
-posix_test_lock(struct file *filp, struct file_lock *fl)
+int
+posix_test_lock(struct file *filp, struct file_lock *fl,
+		struct file_lock *conflock)
 {
 	struct file_lock *cfl;
 
@@ -666,9 +690,13 @@ posix_test_lock(struct file *filp, struc
 		if (posix_locks_conflict(cfl, fl))
 			break;
 	}
+	if (cfl) {
+		__locks_copy_lock(conflock, cfl);
+		unlock_kernel();
+		return 1;
+	}
 	unlock_kernel();
-
-	return (cfl);
+	return 0;
 }
 
 EXPORT_SYMBOL(posix_test_lock);
@@ -904,7 +932,8 @@ static int __posix_lock_file(struct inod
 				fl->fl_start = request->fl_start;
 				fl->fl_end = request->fl_end;
 				fl->fl_type = request->fl_type;
-				fl->fl_u = request->fl_u;
+				locks_release_private(fl);
+				locks_copy_private(fl, request);
 				request = fl;
 				added = 1;
 			}
@@ -1544,7 +1573,7 @@ asmlinkage long sys_flock(unsigned int f
  */
 int fcntl_getlk(struct file *filp, struct flock __user *l)
 {
-	struct file_lock *fl, file_lock;
+	struct file_lock *fl, cfl, file_lock;
 	struct flock flock;
 	int error;
 
@@ -1568,7 +1597,7 @@ int fcntl_getlk(struct file *filp, struc
 		else
 		  fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
 	} else {
-		fl = posix_test_lock(filp, &file_lock);
+		fl = (posix_test_lock(filp, &file_lock, &cfl) ? &cfl : NULL);
 	}
  
 	flock.l_type = F_UNLCK;
@@ -1698,7 +1727,7 @@ out:
  */
 int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
 {
-	struct file_lock *fl, file_lock;
+	struct file_lock *fl, cfl, file_lock;
 	struct flock64 flock;
 	int error;
 
@@ -1722,7 +1751,7 @@ int fcntl_getlk64(struct file *filp, str
 		else
 		  fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
 	} else {
-		fl = posix_test_lock(filp, &file_lock);
+		fl = (posix_test_lock(filp, &file_lock, &cfl) ? &cfl : NULL);
 	}
  
 	flock.l_type = F_UNLCK;
@@ -1936,21 +1965,6 @@ void locks_remove_flock(struct file *fil
 }
 
 /**
- *	posix_block_lock - blocks waiting for a file lock
- *	@blocker: the lock which is blocking
- *	@waiter: the lock which conflicts and has to wait
- *
- * lockd needs to block waiting for locks.
- */
-void
-posix_block_lock(struct file_lock *blocker, struct file_lock *waiter)
-{
-	locks_insert_block(blocker, waiter);
-}
-
-EXPORT_SYMBOL(posix_block_lock);
-
-/**
  *	posix_unblock_lock - stop waiting for a file lock
  *      @filp:   how the file was opened
  *	@waiter: the lock which was waiting
diff --git a/fs/namespace.c b/fs/namespace.c
index 058a448..70bba4b 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -399,6 +399,44 @@ struct seq_operations mounts_op = {
 	.show	= show_vfsmnt
 };
 
+static int show_vfsstat(struct seq_file *m, void *v)
+{
+	struct vfsmount *mnt = v;
+	int err = 0;
+
+	/* device */
+	if (mnt->mnt_devname) {
+		seq_puts(m, "device ");
+		mangle(m, mnt->mnt_devname);
+	} else
+		seq_puts(m, "no device");
+
+	/* mount point */
+	seq_puts(m, " mounted on ");
+	seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
+	seq_putc(m, ' ');
+
+	/* file system type */
+	seq_puts(m, "with fstype ");
+	mangle(m, mnt->mnt_sb->s_type->name);
+
+	/* optional statistics */
+	if (mnt->mnt_sb->s_op->show_stats) {
+		seq_putc(m, ' ');
+		err = mnt->mnt_sb->s_op->show_stats(m, mnt);
+	}
+
+	seq_putc(m, '\n');
+	return err;
+}
+
+struct seq_operations mountstats_op = {
+	.start	= m_start,
+	.next	= m_next,
+	.stop	= m_stop,
+	.show	= show_vfsstat,
+};
+
 /**
  * may_umount_tree - check if a mount tree is busy
  * @mnt: root of mount tree
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 7c33b9a..05c38cf 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -330,7 +330,7 @@ static unsigned encode_op_hdr(struct xdr
 
 static unsigned encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res)
 {
-	uint32_t *savep;
+	uint32_t *savep = NULL;
 	unsigned status = res->status;
 	
 	if (unlikely(status != 0))
@@ -358,23 +358,26 @@ static unsigned process_op(struct svc_rq
 		struct xdr_stream *xdr_in, void *argp,
 		struct xdr_stream *xdr_out, void *resp)
 {
-	struct callback_op *op;
-	unsigned int op_nr;
+	struct callback_op *op = &callback_ops[0];
+	unsigned int op_nr = OP_CB_ILLEGAL;
 	unsigned int status = 0;
 	long maxlen;
 	unsigned res;
 
 	dprintk("%s: start\n", __FUNCTION__);
 	status = decode_op_hdr(xdr_in, &op_nr);
-	if (unlikely(status != 0)) {
-		op_nr = OP_CB_ILLEGAL;
-		op = &callback_ops[0];
-	} else if (unlikely(op_nr != OP_CB_GETATTR && op_nr != OP_CB_RECALL)) {
-		op_nr = OP_CB_ILLEGAL;
-		op = &callback_ops[0];
-		status = htonl(NFS4ERR_OP_ILLEGAL);
-	} else
-		op = &callback_ops[op_nr];
+	if (likely(status == 0)) {
+		switch (op_nr) {
+			case OP_CB_GETATTR:
+			case OP_CB_RECALL:
+				op = &callback_ops[op_nr];
+				break;
+			default:
+				op_nr = OP_CB_ILLEGAL;
+				op = &callback_ops[0];
+				status = htonl(NFS4ERR_OP_ILLEGAL);
+		}
+	}
 
 	maxlen = xdr_out->end - xdr_out->p;
 	if (maxlen > 0 && maxlen < PAGE_SIZE) {
@@ -416,6 +419,7 @@ static int nfs4_callback_compound(struct
 	decode_compound_hdr_arg(&xdr_in, &hdr_arg);
 	hdr_res.taglen = hdr_arg.taglen;
 	hdr_res.tag = hdr_arg.tag;
+	hdr_res.nops = NULL;
 	encode_compound_hdr_res(&xdr_out, &hdr_res);
 
 	for (;;) {
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a1554be..609185a 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -34,6 +34,7 @@
 
 #include "nfs4_fs.h"
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFS_PARANOIA 1
 /* #define NFS_DEBUG_VERBOSE 1 */
@@ -129,6 +130,9 @@ nfs_opendir(struct inode *inode, struct 
 {
 	int res = 0;
 
+	dfprintk(VFS, "NFS: opendir(%s/%ld)\n",
+			inode->i_sb->s_id, inode->i_ino);
+
 	lock_kernel();
 	/* Call generic open code in order to cache credentials */
 	if (!res)
@@ -172,7 +176,9 @@ int nfs_readdir_filler(nfs_readdir_descr
 	unsigned long	timestamp;
 	int		error;
 
-	dfprintk(VFS, "NFS: nfs_readdir_filler() reading cookie %Lu into page %lu.\n", (long long)desc->entry->cookie, page->index);
+	dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n",
+			__FUNCTION__, (long long)desc->entry->cookie,
+			page->index);
 
  again:
 	timestamp = jiffies;
@@ -244,7 +250,8 @@ int find_dirent(nfs_readdir_descriptor_t
 			status;
 
 	while((status = dir_decode(desc)) == 0) {
-		dfprintk(VFS, "NFS: found cookie %Lu\n", (unsigned long long)entry->cookie);
+		dfprintk(DIRCACHE, "NFS: %s: examining cookie %Lu\n",
+				__FUNCTION__, (unsigned long long)entry->cookie);
 		if (entry->prev_cookie == *desc->dir_cookie)
 			break;
 		if (loop_count++ > 200) {
@@ -252,7 +259,6 @@ int find_dirent(nfs_readdir_descriptor_t
 			schedule();
 		}
 	}
-	dfprintk(VFS, "NFS: find_dirent() returns %d\n", status);
 	return status;
 }
 
@@ -276,7 +282,8 @@ int find_dirent_index(nfs_readdir_descri
 		if (status)
 			break;
 
-		dfprintk(VFS, "NFS: found cookie %Lu at index %Ld\n", (unsigned long long)entry->cookie, desc->current_index);
+		dfprintk(DIRCACHE, "NFS: found cookie %Lu at index %Ld\n",
+				(unsigned long long)entry->cookie, desc->current_index);
 
 		if (desc->file->f_pos == desc->current_index) {
 			*desc->dir_cookie = entry->cookie;
@@ -288,7 +295,6 @@ int find_dirent_index(nfs_readdir_descri
 			schedule();
 		}
 	}
-	dfprintk(VFS, "NFS: find_dirent_index() returns %d\n", status);
 	return status;
 }
 
@@ -303,7 +309,9 @@ int find_dirent_page(nfs_readdir_descrip
 	struct page	*page;
 	int		status;
 
-	dfprintk(VFS, "NFS: find_dirent_page() searching directory page %ld\n", desc->page_index);
+	dfprintk(DIRCACHE, "NFS: %s: searching page %ld for target %Lu\n",
+			__FUNCTION__, desc->page_index,
+			(long long) *desc->dir_cookie);
 
 	page = read_cache_page(inode->i_mapping, desc->page_index,
 			       (filler_t *)nfs_readdir_filler, desc);
@@ -324,7 +332,7 @@ int find_dirent_page(nfs_readdir_descrip
 	if (status < 0)
 		dir_page_release(desc);
  out:
-	dfprintk(VFS, "NFS: find_dirent_page() returns %d\n", status);
+	dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, status);
 	return status;
  read_error:
 	page_cache_release(page);
@@ -346,13 +354,15 @@ int readdir_search_pagecache(nfs_readdir
 
 	/* Always search-by-index from the beginning of the cache */
 	if (*desc->dir_cookie == 0) {
-		dfprintk(VFS, "NFS: readdir_search_pagecache() searching for offset %Ld\n", (long long)desc->file->f_pos);
+		dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for offset %Ld\n",
+				(long long)desc->file->f_pos);
 		desc->page_index = 0;
 		desc->entry->cookie = desc->entry->prev_cookie = 0;
 		desc->entry->eof = 0;
 		desc->current_index = 0;
 	} else
-		dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie);
+		dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for cookie %Lu\n",
+				(unsigned long long)*desc->dir_cookie);
 
 	for (;;) {
 		res = find_dirent_page(desc);
@@ -365,7 +375,8 @@ int readdir_search_pagecache(nfs_readdir
 			schedule();
 		}
 	}
-	dfprintk(VFS, "NFS: readdir_search_pagecache() returned %d\n", res);
+
+	dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, res);
 	return res;
 }
 
@@ -390,7 +401,8 @@ int nfs_do_filldir(nfs_readdir_descripto
 	int		loop_count = 0,
 			res;
 
-	dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)entry->cookie);
+	dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n",
+			(unsigned long long)entry->cookie);
 
 	for(;;) {
 		unsigned d_type = DT_UNKNOWN;
@@ -427,7 +439,8 @@ int nfs_do_filldir(nfs_readdir_descripto
 	dir_page_release(desc);
 	if (dentry != NULL)
 		dput(dentry);
-	dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res);
+	dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
+			(unsigned long long)*desc->dir_cookie, res);
 	return res;
 }
 
@@ -453,7 +466,8 @@ int uncached_readdir(nfs_readdir_descrip
 	struct page	*page = NULL;
 	int		status;
 
-	dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie);
+	dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
+			(unsigned long long)*desc->dir_cookie);
 
 	page = alloc_page(GFP_HIGHUSER);
 	if (!page) {
@@ -485,7 +499,8 @@ int uncached_readdir(nfs_readdir_descrip
 	desc->entry->cookie = desc->entry->prev_cookie = 0;
 	desc->entry->eof = 0;
  out:
-	dfprintk(VFS, "NFS: uncached_readdir() returns %d\n", status);
+	dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
+			__FUNCTION__, status);
 	return status;
  out_release:
 	dir_page_release(desc);
@@ -507,6 +522,11 @@ static int nfs_readdir(struct file *filp
 	struct nfs_fattr fattr;
 	long		res;
 
+	dfprintk(VFS, "NFS: readdir(%s/%s) starting at cookie %Lu\n",
+			dentry->d_parent->d_name.name, dentry->d_name.name,
+			(long long)filp->f_pos);
+	nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
+
 	lock_kernel();
 
 	res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
@@ -566,9 +586,12 @@ static int nfs_readdir(struct file *filp
 		}
 	}
 	unlock_kernel();
-	if (res < 0)
-		return res;
-	return 0;
+	if (res > 0)
+		res = 0;
+	dfprintk(VFS, "NFS: readdir(%s/%s) returns %ld\n",
+			dentry->d_parent->d_name.name, dentry->d_name.name,
+			res);
+	return res;
 }
 
 loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
@@ -599,6 +622,10 @@ out:
  */
 int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync)
 {
+	dfprintk(VFS, "NFS: fsync_dir(%s/%s) datasync %d\n",
+			dentry->d_parent->d_name.name, dentry->d_name.name,
+			datasync);
+
 	return 0;
 }
 
@@ -713,6 +740,7 @@ static int nfs_lookup_revalidate(struct 
 	parent = dget_parent(dentry);
 	lock_kernel();
 	dir = parent->d_inode;
+	nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
 	inode = dentry->d_inode;
 
 	if (!inode) {
@@ -722,8 +750,9 @@ static int nfs_lookup_revalidate(struct 
 	}
 
 	if (is_bad_inode(inode)) {
-		dfprintk(VFS, "nfs_lookup_validate: %s/%s has dud inode\n",
-			dentry->d_parent->d_name.name, dentry->d_name.name);
+		dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n",
+				__FUNCTION__, dentry->d_parent->d_name.name,
+				dentry->d_name.name);
 		goto out_bad;
 	}
 
@@ -755,6 +784,9 @@ static int nfs_lookup_revalidate(struct 
  out_valid:
 	unlock_kernel();
 	dput(parent);
+	dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n",
+			__FUNCTION__, dentry->d_parent->d_name.name,
+			dentry->d_name.name);
 	return 1;
 out_zap_parent:
 	nfs_zap_caches(dir);
@@ -771,6 +803,9 @@ out_zap_parent:
 	d_drop(dentry);
 	unlock_kernel();
 	dput(parent);
+	dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
+			__FUNCTION__, dentry->d_parent->d_name.name,
+			dentry->d_name.name);
 	return 0;
 }
 
@@ -844,6 +879,7 @@ static struct dentry *nfs_lookup(struct 
 
 	dfprintk(VFS, "NFS: lookup(%s/%s)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name);
+	nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
 
 	res = ERR_PTR(-ENAMETOOLONG);
 	if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
@@ -912,6 +948,9 @@ static struct dentry *nfs_atomic_lookup(
 	struct dentry *res = NULL;
 	int error;
 
+	dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n",
+			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
+
 	/* Check that we are indeed trying to open this file */
 	if (!is_atomic_open(dir, nd))
 		goto no_open;
@@ -1119,8 +1158,8 @@ static int nfs_create(struct inode *dir,
 	int error;
 	int open_flags = 0;
 
-	dfprintk(VFS, "NFS: create(%s/%ld, %s\n", dir->i_sb->s_id, 
-		dir->i_ino, dentry->d_name.name);
+	dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
+			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
 	attr.ia_mode = mode;
 	attr.ia_valid = ATTR_MODE;
@@ -1153,8 +1192,8 @@ nfs_mknod(struct inode *dir, struct dent
 	struct iattr attr;
 	int status;
 
-	dfprintk(VFS, "NFS: mknod(%s/%ld, %s\n", dir->i_sb->s_id,
-		dir->i_ino, dentry->d_name.name);
+	dfprintk(VFS, "NFS: mknod(%s/%ld), %s\n",
+			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
@@ -1186,8 +1225,8 @@ static int nfs_mkdir(struct inode *dir, 
 	struct iattr attr;
 	int error;
 
-	dfprintk(VFS, "NFS: mkdir(%s/%ld, %s\n", dir->i_sb->s_id,
-		dir->i_ino, dentry->d_name.name);
+	dfprintk(VFS, "NFS: mkdir(%s/%ld), %s\n",
+			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
 	attr.ia_valid = ATTR_MODE;
 	attr.ia_mode = mode | S_IFDIR;
@@ -1212,8 +1251,8 @@ static int nfs_rmdir(struct inode *dir, 
 {
 	int error;
 
-	dfprintk(VFS, "NFS: rmdir(%s/%ld, %s\n", dir->i_sb->s_id,
-		dir->i_ino, dentry->d_name.name);
+	dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n",
+			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
 	lock_kernel();
 	nfs_begin_data_update(dir);
@@ -1241,6 +1280,7 @@ static int nfs_sillyrename(struct inode 
 	dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name, 
 		atomic_read(&dentry->d_count));
+	nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
 
 #ifdef NFS_PARANOIA
 if (!dentry->d_inode)
@@ -1268,8 +1308,8 @@ dentry->d_parent->d_name.name, dentry->d
 		sillycounter++;
 		sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
 
-		dfprintk(VFS, "trying to rename %s to %s\n",
-			 dentry->d_name.name, silly);
+		dfprintk(VFS, "NFS: trying to rename %s to %s\n",
+				dentry->d_name.name, silly);
 		
 		sdentry = lookup_one_len(silly, dentry->d_parent, slen);
 		/*
@@ -1640,6 +1680,8 @@ int nfs_permission(struct inode *inode, 
 	struct rpc_cred *cred;
 	int res = 0;
 
+	nfs_inc_stats(inode, NFSIOS_VFSACCESS);
+
 	if (mask == 0)
 		goto out;
 	/* Is this sys_access() ? */
@@ -1679,13 +1721,15 @@ force_lookup:
 		res = PTR_ERR(cred);
 	unlock_kernel();
 out:
+	dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n",
+		inode->i_sb->s_id, inode->i_ino, mask, res);
 	return res;
 out_notsup:
 	res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
 	if (res == 0)
 		res = generic_permission(inode, mask, NULL);
 	unlock_kernel();
-	return res;
+	goto out;
 }
 
 /*
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 04ab2fc..193ef4c 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -7,11 +7,11 @@
  *
  * There are important applications whose performance or correctness
  * depends on uncached access to file data.  Database clusters
- * (multiple copies of the same instance running on separate hosts) 
+ * (multiple copies of the same instance running on separate hosts)
  * implement their own cache coherency protocol that subsumes file
- * system cache protocols.  Applications that process datasets 
- * considerably larger than the client's memory do not always benefit 
- * from a local cache.  A streaming video server, for instance, has no 
+ * system cache protocols.  Applications that process datasets
+ * considerably larger than the client's memory do not always benefit
+ * from a local cache.  A streaming video server, for instance, has no
  * need to cache the contents of a file.
  *
  * When an application requests uncached I/O, all read and write requests
@@ -34,6 +34,7 @@
  * 08 Jun 2003	Port to 2.5 APIs  --cel
  * 31 Mar 2004	Handle direct I/O without VFS support  --cel
  * 15 Sep 2004	Parallel async reads  --cel
+ * 04 May 2005	support O_DIRECT with aio  --cel
  *
  */
 
@@ -54,8 +55,9 @@
 #include <asm/uaccess.h>
 #include <asm/atomic.h>
 
+#include "iostat.h"
+
 #define NFSDBG_FACILITY		NFSDBG_VFS
-#define MAX_DIRECTIO_SIZE	(4096UL << PAGE_SHIFT)
 
 static kmem_cache_t *nfs_direct_cachep;
 
@@ -64,38 +66,78 @@ static kmem_cache_t *nfs_direct_cachep;
  */
 struct nfs_direct_req {
 	struct kref		kref;		/* release manager */
-	struct list_head	list;		/* nfs_read_data structs */
-	wait_queue_head_t	wait;		/* wait for i/o completion */
+
+	/* I/O parameters */
+	struct list_head	list,		/* nfs_read/write_data structs */
+				rewrite_list;	/* saved nfs_write_data structs */
+	struct nfs_open_context	*ctx;		/* file open context info */
+	struct kiocb *		iocb;		/* controlling i/o request */
+	struct inode *		inode;		/* target file of i/o */
+	unsigned long		user_addr;	/* location of user's buffer */
+	size_t			user_count;	/* total bytes to move */
+	loff_t			pos;		/* starting offset in file */
 	struct page **		pages;		/* pages in our buffer */
 	unsigned int		npages;		/* count of pages */
-	atomic_t		complete,	/* i/os we're waiting for */
-				count,		/* bytes actually processed */
+
+	/* completion state */
+	spinlock_t		lock;		/* protect completion state */
+	int			outstanding;	/* i/os we're waiting for */
+	ssize_t			count,		/* bytes actually processed */
 				error;		/* any reported error */
+	struct completion	completion;	/* wait for i/o completion */
+
+	/* commit state */
+	struct nfs_write_data *	commit_data;	/* special write_data for commits */
+	int			flags;
+#define NFS_ODIRECT_DO_COMMIT		(1)	/* an unstable reply was received */
+#define NFS_ODIRECT_RESCHED_WRITES	(2)	/* write verification failed */
+	struct nfs_writeverf	verf;		/* unstable write verifier */
 };
 
+static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync);
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode);
 
 /**
- * nfs_get_user_pages - find and set up pages underlying user's buffer
- * rw: direction (read or write)
- * user_addr: starting address of this segment of user's buffer
- * count: size of this segment
- * @pages: returned array of page struct pointers underlying user's buffer
- */
-static inline int
-nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
-		struct page ***pages)
+ * nfs_direct_IO - NFS address space operation for direct I/O
+ * @rw: direction (read or write)
+ * @iocb: target I/O control block
+ * @iov: array of vectors that define I/O buffer
+ * @pos: offset in file to begin the operation
+ * @nr_segs: size of iovec array
+ *
+ * The presence of this routine in the address space ops vector means
+ * the NFS client supports direct I/O.  However, we shunt off direct
+ * read and write requests before the VFS gets them, so this method
+ * should never be called.
+ */
+ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
+{
+	struct dentry *dentry = iocb->ki_filp->f_dentry;
+
+	dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
+			dentry->d_name.name, (long long) pos, nr_segs);
+
+	return -EINVAL;
+}
+
+static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
+{
+	int i;
+	for (i = 0; i < npages; i++) {
+		struct page *page = pages[i];
+		if (do_dirty && !PageCompound(page))
+			set_page_dirty_lock(page);
+		page_cache_release(page);
+	}
+	kfree(pages);
+}
+
+static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, struct page ***pages)
 {
 	int result = -ENOMEM;
 	unsigned long page_count;
 	size_t array_size;
 
-	/* set an arbitrary limit to prevent type overflow */
-	/* XXX: this can probably be as large as INT_MAX */
-	if (size > MAX_DIRECTIO_SIZE) {
-		*pages = NULL;
-		return -EFBIG;
-	}
-
 	page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	page_count -= user_addr >> PAGE_SHIFT;
 
@@ -107,66 +149,117 @@ nfs_get_user_pages(int rw, unsigned long
 					page_count, (rw == READ), 0,
 					*pages, NULL);
 		up_read(&current->mm->mmap_sem);
+		if (result != page_count) {
+			/*
+			 * If we got fewer pages than expected from
+			 * get_user_pages(), the user buffer runs off the
+			 * end of a mapping; return EFAULT.
+			 */
+			if (result >= 0) {
+				nfs_free_user_pages(*pages, result, 0);
+				result = -EFAULT;
+			} else
+				kfree(*pages);
+			*pages = NULL;
+		}
 	}
 	return result;
 }
 
-/**
- * nfs_free_user_pages - tear down page struct array
- * @pages: array of page struct pointers underlying target buffer
- * @npages: number of pages in the array
- * @do_dirty: dirty the pages as we release them
- */
-static void
-nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
+static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 {
-	int i;
-	for (i = 0; i < npages; i++) {
-		struct page *page = pages[i];
-		if (do_dirty && !PageCompound(page))
-			set_page_dirty_lock(page);
-		page_cache_release(page);
-	}
-	kfree(pages);
+	struct nfs_direct_req *dreq;
+
+	dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
+	if (!dreq)
+		return NULL;
+
+	kref_init(&dreq->kref);
+	init_completion(&dreq->completion);
+	INIT_LIST_HEAD(&dreq->list);
+	INIT_LIST_HEAD(&dreq->rewrite_list);
+	dreq->iocb = NULL;
+	dreq->ctx = NULL;
+	spin_lock_init(&dreq->lock);
+	dreq->outstanding = 0;
+	dreq->count = 0;
+	dreq->error = 0;
+	dreq->flags = 0;
+
+	return dreq;
 }
 
-/**
- * nfs_direct_req_release - release  nfs_direct_req structure for direct read
- * @kref: kref object embedded in an nfs_direct_req structure
- *
- */
 static void nfs_direct_req_release(struct kref *kref)
 {
 	struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
+
+	if (dreq->ctx != NULL)
+		put_nfs_open_context(dreq->ctx);
 	kmem_cache_free(nfs_direct_cachep, dreq);
 }
 
-/**
- * nfs_direct_read_alloc - allocate nfs_read_data structures for direct read
- * @count: count of bytes for the read request
- * @rsize: local rsize setting
+/*
+ * Collects and returns the final error value/byte-count.
+ */
+static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
+{
+	ssize_t result = -EIOCBQUEUED;
+
+	/* Async requests don't wait here */
+	if (dreq->iocb)
+		goto out;
+
+	result = wait_for_completion_interruptible(&dreq->completion);
+
+	if (!result)
+		result = dreq->error;
+	if (!result)
+		result = dreq->count;
+
+out:
+	kref_put(&dreq->kref, nfs_direct_req_release);
+	return (ssize_t) result;
+}
+
+/*
+ * We must hold a reference to all the pages in this direct read request
+ * until the RPCs complete.  This could be long *after* we are woken up in
+ * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
  *
+ * In addition, synchronous I/O uses a stack-allocated iocb.  Thus we
+ * can't trust the iocb is still valid here if this is a synchronous
+ * request.  If the waiter is woken prematurely, the iocb is long gone.
+ */
+static void nfs_direct_complete(struct nfs_direct_req *dreq)
+{
+	nfs_free_user_pages(dreq->pages, dreq->npages, 1);
+
+	if (dreq->iocb) {
+		long res = (long) dreq->error;
+		if (!res)
+			res = (long) dreq->count;
+		aio_complete(dreq->iocb, res, 0);
+	} 
+	complete_all(&dreq->completion);
+
+	kref_put(&dreq->kref, nfs_direct_req_release);
+}
+
+/*
  * Note we also set the number of requests we have in the dreq when we are
  * done.  This prevents races with I/O completion so we will always wait
  * until all requests have been dispatched and completed.
  */
-static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int rsize)
+static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
 {
 	struct list_head *list;
 	struct nfs_direct_req *dreq;
-	unsigned int reads = 0;
 	unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
-	dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
+	dreq = nfs_direct_req_alloc();
 	if (!dreq)
 		return NULL;
 
-	kref_init(&dreq->kref);
-	init_waitqueue_head(&dreq->wait);
-	INIT_LIST_HEAD(&dreq->list);
-	atomic_set(&dreq->count, 0);
-	atomic_set(&dreq->error, 0);
-
 	list = &dreq->list;
 	for(;;) {
 		struct nfs_read_data *data = nfs_readdata_alloc(rpages);
@@ -186,72 +279,70 @@ static struct nfs_direct_req *nfs_direct
 		list_add(&data->pages, list);
 
 		data->req = (struct nfs_page *) dreq;
-		reads++;
+		dreq->outstanding++;
 		if (nbytes <= rsize)
 			break;
 		nbytes -= rsize;
 	}
 	kref_get(&dreq->kref);
-	atomic_set(&dreq->complete, reads);
 	return dreq;
 }
 
-/**
- * nfs_direct_read_result - handle a read reply for a direct read request
- * @data: address of NFS READ operation control block
- * @status: status of this NFS READ operation
- *
- * We must hold a reference to all the pages in this direct read request
- * until the RPCs complete.  This could be long *after* we are woken up in
- * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server).
- */
-static void nfs_direct_read_result(struct nfs_read_data *data, int status)
+static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
 {
+	struct nfs_read_data *data = calldata;
 	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
 
-	if (likely(status >= 0))
-		atomic_add(data->res.count, &dreq->count);
+	if (nfs_readpage_result(task, data) != 0)
+		return;
+
+	spin_lock(&dreq->lock);
+
+	if (likely(task->tk_status >= 0))
+		dreq->count += data->res.count;
 	else
-		atomic_set(&dreq->error, status);
+		dreq->error = task->tk_status;
 
-	if (unlikely(atomic_dec_and_test(&dreq->complete))) {
-		nfs_free_user_pages(dreq->pages, dreq->npages, 1);
-		wake_up(&dreq->wait);
-		kref_put(&dreq->kref, nfs_direct_req_release);
+	if (--dreq->outstanding) {
+		spin_unlock(&dreq->lock);
+		return;
 	}
+
+	spin_unlock(&dreq->lock);
+	nfs_direct_complete(dreq);
 }
 
-/**
- * nfs_direct_read_schedule - dispatch NFS READ operations for a direct read
- * @dreq: address of nfs_direct_req struct for this request
- * @inode: target inode
- * @ctx: target file open context
- * @user_addr: starting address of this segment of user's buffer
- * @count: size of this segment
- * @file_offset: offset in file to begin the operation
- *
+static const struct rpc_call_ops nfs_read_direct_ops = {
+	.rpc_call_done = nfs_direct_read_result,
+	.rpc_release = nfs_readdata_release,
+};
+
+/*
  * For each nfs_read_data struct that was allocated on the list, dispatch
  * an NFS READ operation
  */
-static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
-		struct inode *inode, struct nfs_open_context *ctx,
-		unsigned long user_addr, size_t count, loff_t file_offset)
+static void nfs_direct_read_schedule(struct nfs_direct_req *dreq)
 {
+	struct nfs_open_context *ctx = dreq->ctx;
+	struct inode *inode = ctx->dentry->d_inode;
 	struct list_head *list = &dreq->list;
 	struct page **pages = dreq->pages;
+	size_t count = dreq->user_count;
+	loff_t pos = dreq->pos;
+	size_t rsize = NFS_SERVER(inode)->rsize;
 	unsigned int curpage, pgbase;
-	unsigned int rsize = NFS_SERVER(inode)->rsize;
 
 	curpage = 0;
-	pgbase = user_addr & ~PAGE_MASK;
+	pgbase = dreq->user_addr & ~PAGE_MASK;
 	do {
 		struct nfs_read_data *data;
-		unsigned int bytes;
+		size_t bytes;
 
 		bytes = rsize;
 		if (count < rsize)
 			bytes = count;
 
+		BUG_ON(list_empty(list));
 		data = list_entry(list->next, struct nfs_read_data, pages);
 		list_del_init(&data->pages);
 
@@ -259,7 +350,7 @@ static void nfs_direct_read_schedule(str
 		data->cred = ctx->cred;
 		data->args.fh = NFS_FH(inode);
 		data->args.context = ctx;
-		data->args.offset = file_offset;
+		data->args.offset = pos;
 		data->args.pgbase = pgbase;
 		data->args.pages = &pages[curpage];
 		data->args.count = bytes;
@@ -267,77 +358,38 @@ static void nfs_direct_read_schedule(str
 		data->res.eof = 0;
 		data->res.count = bytes;
 
+		rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
+				&nfs_read_direct_ops, data);
 		NFS_PROTO(inode)->read_setup(data);
 
 		data->task.tk_cookie = (unsigned long) inode;
-		data->complete = nfs_direct_read_result;
 
 		lock_kernel();
 		rpc_execute(&data->task);
 		unlock_kernel();
 
-		dfprintk(VFS, "NFS: %4d initiated direct read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+		dfprintk(VFS, "NFS: %5u initiated direct read call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
 				data->task.tk_pid,
 				inode->i_sb->s_id,
 				(long long)NFS_FILEID(inode),
 				bytes,
 				(unsigned long long)data->args.offset);
 
-		file_offset += bytes;
+		pos += bytes;
 		pgbase += bytes;
 		curpage += pgbase >> PAGE_SHIFT;
 		pgbase &= ~PAGE_MASK;
 
 		count -= bytes;
 	} while (count != 0);
+	BUG_ON(!list_empty(list));
 }
 
-/**
- * nfs_direct_read_wait - wait for I/O completion for direct reads
- * @dreq: request on which we are to wait
- * @intr: whether or not this wait can be interrupted
- *
- * Collects and returns the final error value/byte-count.
- */
-static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
-{
-	int result = 0;
-
-	if (intr) {
-		result = wait_event_interruptible(dreq->wait,
-					(atomic_read(&dreq->complete) == 0));
-	} else {
-		wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0));
-	}
-
-	if (!result)
-		result = atomic_read(&dreq->error);
-	if (!result)
-		result = atomic_read(&dreq->count);
-
-	kref_put(&dreq->kref, nfs_direct_req_release);
-	return (ssize_t) result;
-}
-
-/**
- * nfs_direct_read_seg - Read in one iov segment.  Generate separate
- *                        read RPCs for each "rsize" bytes.
- * @inode: target inode
- * @ctx: target file open context
- * @user_addr: starting address of this segment of user's buffer
- * @count: size of this segment
- * @file_offset: offset in file to begin the operation
- * @pages: array of addresses of page structs defining user's buffer
- * @nr_pages: number of pages in the array
- *
- */
-static ssize_t nfs_direct_read_seg(struct inode *inode,
-		struct nfs_open_context *ctx, unsigned long user_addr,
-		size_t count, loff_t file_offset, struct page **pages,
-		unsigned int nr_pages)
+static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, unsigned int nr_pages)
 {
 	ssize_t result;
 	sigset_t oldset;
+	struct inode *inode = iocb->ki_filp->f_mapping->host;
 	struct rpc_clnt *clnt = NFS_CLIENT(inode);
 	struct nfs_direct_req *dreq;
 
@@ -345,284 +397,350 @@ static ssize_t nfs_direct_read_seg(struc
 	if (!dreq)
 		return -ENOMEM;
 
+	dreq->user_addr = user_addr;
+	dreq->user_count = count;
+	dreq->pos = pos;
 	dreq->pages = pages;
 	dreq->npages = nr_pages;
+	dreq->inode = inode;
+	dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
+	if (!is_sync_kiocb(iocb))
+		dreq->iocb = iocb;
 
+	nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count);
 	rpc_clnt_sigmask(clnt, &oldset);
-	nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count,
-				 file_offset);
-	result = nfs_direct_read_wait(dreq, clnt->cl_intr);
+	nfs_direct_read_schedule(dreq);
+	result = nfs_direct_wait(dreq);
 	rpc_clnt_sigunmask(clnt, &oldset);
 
 	return result;
 }
 
-/**
- * nfs_direct_read - For each iov segment, map the user's buffer
- *                   then generate read RPCs.
- * @inode: target inode
- * @ctx: target file open context
- * @iov: array of vectors that define I/O buffer
- * file_offset: offset in file to begin the operation
- * nr_segs: size of iovec array
- *
- * We've already pushed out any non-direct writes so that this read
- * will see them when we read from the server.
- */
-static ssize_t
-nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
-		const struct iovec *iov, loff_t file_offset,
-		unsigned long nr_segs)
-{
-	ssize_t tot_bytes = 0;
-	unsigned long seg = 0;
-
-	while ((seg < nr_segs) && (tot_bytes >= 0)) {
-		ssize_t result;
-		int page_count;
-		struct page **pages;
-		const struct iovec *vec = &iov[seg++];
-		unsigned long user_addr = (unsigned long) vec->iov_base;
-		size_t size = vec->iov_len;
-
-                page_count = nfs_get_user_pages(READ, user_addr, size, &pages);
-                if (page_count < 0) {
-                        nfs_free_user_pages(pages, 0, 0);
-			if (tot_bytes > 0)
-				break;
-                        return page_count;
-                }
+static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
+{
+	list_splice_init(&dreq->rewrite_list, &dreq->list);
+	while (!list_empty(&dreq->list)) {
+		struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages);
+		list_del(&data->pages);
+		nfs_writedata_release(data);
+	}
+}
 
-		result = nfs_direct_read_seg(inode, ctx, user_addr, size,
-				file_offset, pages, page_count);
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
+{
+	struct list_head *pos;
 
-		if (result <= 0) {
-			if (tot_bytes > 0)
-				break;
-			return result;
-		}
-		tot_bytes += result;
-		file_offset += result;
-		if (result < size)
-			break;
+	list_splice_init(&dreq->rewrite_list, &dreq->list);
+	list_for_each(pos, &dreq->list)
+		dreq->outstanding++;
+	dreq->count = 0;
+
+	nfs_direct_write_schedule(dreq, FLUSH_STABLE);
+}
+
+static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
+{
+	struct nfs_write_data *data = calldata;
+	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+
+	/* Call the NFS version-specific code */
+	if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
+		return;
+	if (unlikely(task->tk_status < 0)) {
+		dreq->error = task->tk_status;
+		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+	}
+	if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
+		dprintk("NFS: %5u commit verify failed\n", task->tk_pid);
+		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
 	}
 
-	return tot_bytes;
+	dprintk("NFS: %5u commit returned %d\n", task->tk_pid, task->tk_status);
+	nfs_direct_write_complete(dreq, data->inode);
 }
 
-/**
- * nfs_direct_write_seg - Write out one iov segment.  Generate separate
- *                        write RPCs for each "wsize" bytes, then commit.
- * @inode: target inode
- * @ctx: target file open context
- * user_addr: starting address of this segment of user's buffer
- * count: size of this segment
- * file_offset: offset in file to begin the operation
- * @pages: array of addresses of page structs defining user's buffer
- * nr_pages: size of pages array
- */
-static ssize_t nfs_direct_write_seg(struct inode *inode,
-		struct nfs_open_context *ctx, unsigned long user_addr,
-		size_t count, loff_t file_offset, struct page **pages,
-		int nr_pages)
-{
-	const unsigned int wsize = NFS_SERVER(inode)->wsize;
-	size_t request;
-	int curpage, need_commit;
-	ssize_t result, tot_bytes;
-	struct nfs_writeverf first_verf;
-	struct nfs_write_data *wdata;
+static const struct rpc_call_ops nfs_commit_direct_ops = {
+	.rpc_call_done = nfs_direct_commit_result,
+	.rpc_release = nfs_commit_release,
+};
 
-	wdata = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
-	if (!wdata)
-		return -ENOMEM;
+static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
+{
+	struct nfs_write_data *data = dreq->commit_data;
+	struct rpc_task *task = &data->task;
 
-	wdata->inode = inode;
-	wdata->cred = ctx->cred;
-	wdata->args.fh = NFS_FH(inode);
-	wdata->args.context = ctx;
-	wdata->args.stable = NFS_UNSTABLE;
-	if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize)
-		wdata->args.stable = NFS_FILE_SYNC;
-	wdata->res.fattr = &wdata->fattr;
-	wdata->res.verf = &wdata->verf;
+	data->inode = dreq->inode;
+	data->cred = dreq->ctx->cred;
 
-	nfs_begin_data_update(inode);
-retry:
-	need_commit = 0;
-	tot_bytes = 0;
-	curpage = 0;
-	request = count;
-	wdata->args.pgbase = user_addr & ~PAGE_MASK;
-	wdata->args.offset = file_offset;
-	do {
-		wdata->args.count = request;
-		if (wdata->args.count > wsize)
-			wdata->args.count = wsize;
-		wdata->args.pages = &pages[curpage];
-
-		dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n",
-			wdata->args.count, (long long) wdata->args.offset,
-			user_addr + tot_bytes, wdata->args.pgbase, curpage);
+	data->args.fh = NFS_FH(data->inode);
+	data->args.offset = dreq->pos;
+	data->args.count = dreq->user_count;
+	data->res.count = 0;
+	data->res.fattr = &data->fattr;
+	data->res.verf = &data->verf;
 
-		lock_kernel();
-		result = NFS_PROTO(inode)->write(wdata);
-		unlock_kernel();
+	rpc_init_task(&data->task, NFS_CLIENT(dreq->inode), RPC_TASK_ASYNC,
+				&nfs_commit_direct_ops, data);
+	NFS_PROTO(data->inode)->commit_setup(data, 0);
 
-		if (result <= 0) {
-			if (tot_bytes > 0)
-				break;
-			goto out;
-		}
+	data->task.tk_priority = RPC_PRIORITY_NORMAL;
+	data->task.tk_cookie = (unsigned long)data->inode;
+	/* Note: task.tk_ops->rpc_release will free dreq->commit_data */
+	dreq->commit_data = NULL;
+	
+	dprintk("NFS: %5u initiated commit call\n", task->tk_pid);
 
-		if (tot_bytes == 0)
-			memcpy(&first_verf.verifier, &wdata->verf.verifier,
-						sizeof(first_verf.verifier));
-		if (wdata->verf.committed != NFS_FILE_SYNC) {
-			need_commit = 1;
-			if (memcmp(&first_verf.verifier, &wdata->verf.verifier,
-					sizeof(first_verf.verifier)))
-				goto sync_retry;
-		}
+	lock_kernel();
+	rpc_execute(&data->task);
+	unlock_kernel();
+}
 
-		tot_bytes += result;
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+{
+	int flags = dreq->flags;
 
-		/* in case of a short write: stop now, let the app recover */
-		if (result < wdata->args.count)
+	dreq->flags = 0;
+	switch (flags) {
+		case NFS_ODIRECT_DO_COMMIT:
+			nfs_direct_commit_schedule(dreq);
 			break;
+		case NFS_ODIRECT_RESCHED_WRITES:
+			nfs_direct_write_reschedule(dreq);
+			break;
+		default:
+			nfs_end_data_update(inode);
+			if (dreq->commit_data != NULL)
+				nfs_commit_free(dreq->commit_data);
+			nfs_direct_free_writedata(dreq);
+			nfs_direct_complete(dreq);
+	}
+}
 
-		wdata->args.offset += result;
-		wdata->args.pgbase += result;
-		curpage += wdata->args.pgbase >> PAGE_SHIFT;
-		wdata->args.pgbase &= ~PAGE_MASK;
-		request -= result;
-	} while (request != 0);
+static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
+{
+	dreq->commit_data = nfs_commit_alloc(0);
+	if (dreq->commit_data != NULL)
+		dreq->commit_data->req = (struct nfs_page *) dreq;
+}
+#else
+static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
+{
+	dreq->commit_data = NULL;
+}
 
-	/*
-	 * Commit data written so far, even in the event of an error
-	 */
-	if (need_commit) {
-		wdata->args.count = tot_bytes;
-		wdata->args.offset = file_offset;
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+{
+	nfs_end_data_update(inode);
+	nfs_direct_free_writedata(dreq);
+	nfs_direct_complete(dreq);
+}
+#endif
 
-		lock_kernel();
-		result = NFS_PROTO(inode)->commit(wdata);
-		unlock_kernel();
+static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize)
+{
+	struct list_head *list;
+	struct nfs_direct_req *dreq;
+	unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
+	dreq = nfs_direct_req_alloc();
+	if (!dreq)
+		return NULL;
+
+	list = &dreq->list;
+	for(;;) {
+		struct nfs_write_data *data = nfs_writedata_alloc(wpages);
 
-		if (result < 0 || memcmp(&first_verf.verifier,
-					 &wdata->verf.verifier,
-					 sizeof(first_verf.verifier)) != 0)
-			goto sync_retry;
+		if (unlikely(!data)) {
+			while (!list_empty(list)) {
+				data = list_entry(list->next,
+						  struct nfs_write_data, pages);
+				list_del(&data->pages);
+				nfs_writedata_free(data);
+			}
+			kref_put(&dreq->kref, nfs_direct_req_release);
+			return NULL;
+		}
+
+		INIT_LIST_HEAD(&data->pages);
+		list_add(&data->pages, list);
+
+		data->req = (struct nfs_page *) dreq;
+		dreq->outstanding++;
+		if (nbytes <= wsize)
+			break;
+		nbytes -= wsize;
 	}
-	result = tot_bytes;
 
-out:
-	nfs_end_data_update(inode);
-	nfs_writedata_free(wdata);
-	return result;
+	nfs_alloc_commit_data(dreq);
 
-sync_retry:
-	wdata->args.stable = NFS_FILE_SYNC;
-	goto retry;
+	kref_get(&dreq->kref);
+	return dreq;
 }
 
-/**
- * nfs_direct_write - For each iov segment, map the user's buffer
- *                    then generate write and commit RPCs.
- * @inode: target inode
- * @ctx: target file open context
- * @iov: array of vectors that define I/O buffer
- * file_offset: offset in file to begin the operation
- * nr_segs: size of iovec array
- *
- * Upon return, generic_file_direct_IO invalidates any cached pages
- * that non-direct readers might access, so they will pick up these
- * writes immediately.
- */
-static ssize_t nfs_direct_write(struct inode *inode,
-		struct nfs_open_context *ctx, const struct iovec *iov,
-		loff_t file_offset, unsigned long nr_segs)
-{
-	ssize_t tot_bytes = 0;
-	unsigned long seg = 0;
-
-	while ((seg < nr_segs) && (tot_bytes >= 0)) {
-		ssize_t result;
-		int page_count;
-		struct page **pages;
-		const struct iovec *vec = &iov[seg++];
-		unsigned long user_addr = (unsigned long) vec->iov_base;
-		size_t size = vec->iov_len;
-
-                page_count = nfs_get_user_pages(WRITE, user_addr, size, &pages);
-                if (page_count < 0) {
-                        nfs_free_user_pages(pages, 0, 0);
-			if (tot_bytes > 0)
-				break;
-                        return page_count;
-                }
+static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
+{
+	struct nfs_write_data *data = calldata;
+	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+	int status = task->tk_status;
+
+	if (nfs_writeback_done(task, data) != 0)
+		return;
+
+	spin_lock(&dreq->lock);
 
-		result = nfs_direct_write_seg(inode, ctx, user_addr, size,
-				file_offset, pages, page_count);
-		nfs_free_user_pages(pages, page_count, 0);
+	if (likely(status >= 0))
+		dreq->count += data->res.count;
+	else
+		dreq->error = task->tk_status;
 
-		if (result <= 0) {
-			if (tot_bytes > 0)
+	if (data->res.verf->committed != NFS_FILE_SYNC) {
+		switch (dreq->flags) {
+			case 0:
+				memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf));
+				dreq->flags = NFS_ODIRECT_DO_COMMIT;
 				break;
-			return result;
+			case NFS_ODIRECT_DO_COMMIT:
+				if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) {
+					dprintk("NFS: %5u write verify failed\n", task->tk_pid);
+					dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+				}
 		}
-		tot_bytes += result;
-		file_offset += result;
-		if (result < size)
-			break;
 	}
-	return tot_bytes;
+	/* In case we have to resend */
+	data->args.stable = NFS_FILE_SYNC;
+
+	spin_unlock(&dreq->lock);
 }
 
-/**
- * nfs_direct_IO - NFS address space operation for direct I/O
- * rw: direction (read or write)
- * @iocb: target I/O control block
- * @iov: array of vectors that define I/O buffer
- * file_offset: offset in file to begin the operation
- * nr_segs: size of iovec array
- *
+/*
+ * NB: Return the value of the first error return code.  Subsequent
+ *     errors after the first one are ignored.
  */
-ssize_t
-nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-		loff_t file_offset, unsigned long nr_segs)
+static void nfs_direct_write_release(void *calldata)
 {
-	ssize_t result = -EINVAL;
-	struct file *file = iocb->ki_filp;
-	struct nfs_open_context *ctx;
-	struct dentry *dentry = file->f_dentry;
-	struct inode *inode = dentry->d_inode;
+	struct nfs_write_data *data = calldata;
+	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
 
-	/*
-	 * No support for async yet
-	 */
+	spin_lock(&dreq->lock);
+	if (--dreq->outstanding) {
+		spin_unlock(&dreq->lock);
+		return;
+	}
+	spin_unlock(&dreq->lock);
+
+	nfs_direct_write_complete(dreq, data->inode);
+}
+
+static const struct rpc_call_ops nfs_write_direct_ops = {
+	.rpc_call_done = nfs_direct_write_result,
+	.rpc_release = nfs_direct_write_release,
+};
+
+/*
+ * For each nfs_write_data struct that was allocated on the list, dispatch
+ * an NFS WRITE operation
+ */
+static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync)
+{
+	struct nfs_open_context *ctx = dreq->ctx;
+	struct inode *inode = ctx->dentry->d_inode;
+	struct list_head *list = &dreq->list;
+	struct page **pages = dreq->pages;
+	size_t count = dreq->user_count;
+	loff_t pos = dreq->pos;
+	size_t wsize = NFS_SERVER(inode)->wsize;
+	unsigned int curpage, pgbase;
+
+	curpage = 0;
+	pgbase = dreq->user_addr & ~PAGE_MASK;
+	do {
+		struct nfs_write_data *data;
+		size_t bytes;
+
+		bytes = wsize;
+		if (count < wsize)
+			bytes = count;
+
+		BUG_ON(list_empty(list));
+		data = list_entry(list->next, struct nfs_write_data, pages);
+		list_move_tail(&data->pages, &dreq->rewrite_list);
+
+		data->inode = inode;
+		data->cred = ctx->cred;
+		data->args.fh = NFS_FH(inode);
+		data->args.context = ctx;
+		data->args.offset = pos;
+		data->args.pgbase = pgbase;
+		data->args.pages = &pages[curpage];
+		data->args.count = bytes;
+		data->res.fattr = &data->fattr;
+		data->res.count = bytes;
+		data->res.verf = &data->verf;
+
+		rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
+				&nfs_write_direct_ops, data);
+		NFS_PROTO(inode)->write_setup(data, sync);
+
+		data->task.tk_priority = RPC_PRIORITY_NORMAL;
+		data->task.tk_cookie = (unsigned long) inode;
+
+		lock_kernel();
+		rpc_execute(&data->task);
+		unlock_kernel();
+
+		dfprintk(VFS, "NFS: %5u initiated direct write call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
+				data->task.tk_pid,
+				inode->i_sb->s_id,
+				(long long)NFS_FILEID(inode),
+				bytes,
+				(unsigned long long)data->args.offset);
+
+		pos += bytes;
+		pgbase += bytes;
+		curpage += pgbase >> PAGE_SHIFT;
+		pgbase &= ~PAGE_MASK;
+
+		count -= bytes;
+	} while (count != 0);
+	BUG_ON(!list_empty(list));
+}
+
+static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, int nr_pages)
+{
+	ssize_t result;
+	sigset_t oldset;
+	struct inode *inode = iocb->ki_filp->f_mapping->host;
+	struct rpc_clnt *clnt = NFS_CLIENT(inode);
+	struct nfs_direct_req *dreq;
+	size_t wsize = NFS_SERVER(inode)->wsize;
+	int sync = 0;
+
+	dreq = nfs_direct_write_alloc(count, wsize);
+	if (!dreq)
+		return -ENOMEM;
+	if (dreq->commit_data == NULL || count < wsize)
+		sync = FLUSH_STABLE;
+
+	dreq->user_addr = user_addr;
+	dreq->user_count = count;
+	dreq->pos = pos;
+	dreq->pages = pages;
+	dreq->npages = nr_pages;
+	dreq->inode = inode;
+	dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
 	if (!is_sync_kiocb(iocb))
-		return result;
+		dreq->iocb = iocb;
+
+	nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count);
+
+	nfs_begin_data_update(inode);
+
+	rpc_clnt_sigmask(clnt, &oldset);
+	nfs_direct_write_schedule(dreq, sync);
+	result = nfs_direct_wait(dreq);
+	rpc_clnt_sigunmask(clnt, &oldset);
 
-	ctx = (struct nfs_open_context *)file->private_data;
-	switch (rw) {
-	case READ:
-		dprintk("NFS: direct_IO(read) (%s) off/no(%Lu/%lu)\n",
-				dentry->d_name.name, file_offset, nr_segs);
-
-		result = nfs_direct_read(inode, ctx, iov,
-						file_offset, nr_segs);
-		break;
-	case WRITE:
-		dprintk("NFS: direct_IO(write) (%s) off/no(%Lu/%lu)\n",
-				dentry->d_name.name, file_offset, nr_segs);
-
-		result = nfs_direct_write(inode, ctx, iov,
-						file_offset, nr_segs);
-		break;
-	default:
-		break;
-	}
 	return result;
 }
 
@@ -630,49 +748,40 @@ nfs_direct_IO(int rw, struct kiocb *iocb
  * nfs_file_direct_read - file direct read operation for NFS files
  * @iocb: target I/O control block
  * @buf: user's buffer into which to read data
- * count: number of bytes to read
- * pos: byte offset in file where reading starts
+ * @count: number of bytes to read
+ * @pos: byte offset in file where reading starts
  *
  * We use this function for direct reads instead of calling
  * generic_file_aio_read() in order to avoid gfar's check to see if
  * the request starts before the end of the file.  For that check
  * to work, we must generate a GETATTR before each direct read, and
  * even then there is a window between the GETATTR and the subsequent
- * READ where the file size could change.  So our preference is simply
+ * READ where the file size could change.  Our preference is simply
  * to do all reads the application wants, and the server will take
  * care of managing the end of file boundary.
- * 
+ *
  * This function also eliminates unnecessarily updating the file's
  * atime locally, as the NFS server sets the file's atime, and this
  * client must read the updated atime from the server back into its
  * cache.
  */
-ssize_t
-nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
 {
 	ssize_t retval = -EINVAL;
-	loff_t *ppos = &iocb->ki_pos;
+	int page_count;
+	struct page **pages;
 	struct file *file = iocb->ki_filp;
-	struct nfs_open_context *ctx =
-			(struct nfs_open_context *) file->private_data;
 	struct address_space *mapping = file->f_mapping;
-	struct inode *inode = mapping->host;
-	struct iovec iov = {
-		.iov_base = buf,
-		.iov_len = count,
-	};
 
 	dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n",
 		file->f_dentry->d_parent->d_name.name,
 		file->f_dentry->d_name.name,
 		(unsigned long) count, (long long) pos);
 
-	if (!is_sync_kiocb(iocb))
-		goto out;
 	if (count < 0)
 		goto out;
 	retval = -EFAULT;
-	if (!access_ok(VERIFY_WRITE, iov.iov_base, iov.iov_len))
+	if (!access_ok(VERIFY_WRITE, buf, count))
 		goto out;
 	retval = 0;
 	if (!count)
@@ -682,9 +791,16 @@ nfs_file_direct_read(struct kiocb *iocb,
 	if (retval)
 		goto out;
 
-	retval = nfs_direct_read(inode, ctx, &iov, pos, 1);
+	retval = nfs_get_user_pages(READ, (unsigned long) buf,
+						count, &pages);
+	if (retval < 0)
+		goto out;
+	page_count = retval;
+
+	retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos,
+						pages, page_count);
 	if (retval > 0)
-		*ppos = pos + retval;
+		iocb->ki_pos = pos + retval;
 
 out:
 	return retval;
@@ -694,8 +810,8 @@ out:
  * nfs_file_direct_write - file direct write operation for NFS files
  * @iocb: target I/O control block
  * @buf: user's buffer from which to write data
- * count: number of bytes to write
- * pos: byte offset in file where writing starts
+ * @count: number of bytes to write
+ * @pos: byte offset in file where writing starts
  *
  * We use this function for direct writes instead of calling
  * generic_file_aio_write() in order to avoid taking the inode
@@ -715,28 +831,19 @@ out:
  * Note that O_APPEND is not supported for NFS direct writes, as there
  * is no atomic O_APPEND write facility in the NFS protocol.
  */
-ssize_t
-nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
 {
 	ssize_t retval;
+	int page_count;
+	struct page **pages;
 	struct file *file = iocb->ki_filp;
-	struct nfs_open_context *ctx =
-			(struct nfs_open_context *) file->private_data;
 	struct address_space *mapping = file->f_mapping;
-	struct inode *inode = mapping->host;
-	struct iovec iov = {
-		.iov_base = (char __user *)buf,
-	};
 
 	dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n",
 		file->f_dentry->d_parent->d_name.name,
 		file->f_dentry->d_name.name,
 		(unsigned long) count, (long long) pos);
 
-	retval = -EINVAL;
-	if (!is_sync_kiocb(iocb))
-		goto out;
-
 	retval = generic_write_checks(file, &pos, &count, 0);
 	if (retval)
 		goto out;
@@ -747,19 +854,35 @@ nfs_file_direct_write(struct kiocb *iocb
 	retval = 0;
 	if (!count)
 		goto out;
-	iov.iov_len = count,
 
 	retval = -EFAULT;
-	if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len))
+	if (!access_ok(VERIFY_READ, buf, count))
 		goto out;
 
 	retval = nfs_sync_mapping(mapping);
 	if (retval)
 		goto out;
 
-	retval = nfs_direct_write(inode, ctx, &iov, pos, 1);
+	retval = nfs_get_user_pages(WRITE, (unsigned long) buf,
+						count, &pages);
+	if (retval < 0)
+		goto out;
+	page_count = retval;
+
+	retval = nfs_direct_write(iocb, (unsigned long) buf, count,
+					pos, pages, page_count);
+
+	/*
+	 * XXX: nfs_end_data_update() already ensures this file's
+	 *      cached data is subsequently invalidated.  Do we really
+	 *      need to call invalidate_inode_pages2() again here?
+	 *
+	 *      For aio writes, this invalidation will almost certainly
+	 *      occur before the writes complete.  Kind of racey.
+	 */
 	if (mapping->nrpages)
 		invalidate_inode_pages2(mapping);
+
 	if (retval > 0)
 		iocb->ki_pos = pos + retval;
 
@@ -767,6 +890,10 @@ out:
 	return retval;
 }
 
+/**
+ * nfs_init_directcache - create a slab cache for nfs_direct_req structures
+ *
+ */
 int nfs_init_directcache(void)
 {
 	nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
@@ -779,6 +906,10 @@ int nfs_init_directcache(void)
 	return 0;
 }
 
+/**
+ * nfs_init_directcache - destroy the slab cache for nfs_direct_req structures
+ *
+ */
 void nfs_destroy_directcache(void)
 {
 	if (kmem_cache_destroy(nfs_direct_cachep))
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 7a79fbe..6bcbc4d 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -32,6 +32,7 @@
 #include <asm/system.h>
 
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFSDBG_FACILITY		NFSDBG_FILE
 
@@ -102,18 +103,15 @@ static int nfs_check_flags(int flags)
 static int
 nfs_file_open(struct inode *inode, struct file *filp)
 {
-	struct nfs_server *server = NFS_SERVER(inode);
-	int (*open)(struct inode *, struct file *);
 	int res;
 
 	res = nfs_check_flags(filp->f_flags);
 	if (res)
 		return res;
 
+	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
 	lock_kernel();
-	/* Do NFSv4 open() call */
-	if ((open = server->rpc_ops->file_open) != NULL)
-		res = open(inode, filp);
+	res = NFS_SERVER(inode)->rpc_ops->file_open(inode, filp);
 	unlock_kernel();
 	return res;
 }
@@ -124,6 +122,7 @@ nfs_file_release(struct inode *inode, st
 	/* Ensure that dirty pages are flushed out with the right creds */
 	if (filp->f_mode & FMODE_WRITE)
 		filemap_fdatawrite(filp->f_mapping);
+	nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
 	return NFS_PROTO(inode)->file_release(inode, filp);
 }
 
@@ -199,6 +198,7 @@ nfs_file_flush(struct file *file)
 
 	if ((file->f_mode & FMODE_WRITE) == 0)
 		return 0;
+	nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
 	lock_kernel();
 	/* Ensure that data+attribute caches are up to date after close() */
 	status = nfs_wb_all(inode);
@@ -229,6 +229,7 @@ nfs_file_read(struct kiocb *iocb, char _
 		(unsigned long) count, (unsigned long) pos);
 
 	result = nfs_revalidate_file(inode, iocb->ki_filp);
+	nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count);
 	if (!result)
 		result = generic_file_aio_read(iocb, buf, count, pos);
 	return result;
@@ -282,6 +283,7 @@ nfs_fsync(struct file *file, struct dent
 
 	dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
 
+	nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
 	lock_kernel();
 	status = nfs_wb_all(inode);
 	if (!status) {
@@ -316,6 +318,17 @@ static int nfs_commit_write(struct file 
 	return status;
 }
 
+static int nfs_invalidate_page(struct page *page, unsigned long offset)
+{
+	/* FIXME: we really should cancel any unstarted writes on this page */
+	return 1;
+}
+
+static int nfs_release_page(struct page *page, gfp_t gfp)
+{
+	return !nfs_wb_page(page->mapping->host, page);
+}
+
 struct address_space_operations nfs_file_aops = {
 	.readpage = nfs_readpage,
 	.readpages = nfs_readpages,
@@ -324,6 +337,8 @@ struct address_space_operations nfs_file
 	.writepages = nfs_writepages,
 	.prepare_write = nfs_prepare_write,
 	.commit_write = nfs_commit_write,
+	.invalidatepage = nfs_invalidate_page,
+	.releasepage = nfs_release_page,
 #ifdef CONFIG_NFS_DIRECTIO
 	.direct_IO = nfs_direct_IO,
 #endif
@@ -365,6 +380,7 @@ nfs_file_write(struct kiocb *iocb, const
 	if (!count)
 		goto out;
 
+	nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
 	result = generic_file_aio_write(iocb, buf, count, pos);
 out:
 	return result;
@@ -376,15 +392,17 @@ out_swapfile:
 
 static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
 {
-	struct file_lock *cfl;
+	struct file_lock cfl;
 	struct inode *inode = filp->f_mapping->host;
 	int status = 0;
 
 	lock_kernel();
 	/* Try local locking first */
-	cfl = posix_test_lock(filp, fl);
-	if (cfl != NULL) {
-		locks_copy_lock(fl, cfl);
+	if (posix_test_lock(filp, fl, &cfl)) {
+		fl->fl_start = cfl.fl_start;
+		fl->fl_end = cfl.fl_end;
+		fl->fl_type = cfl.fl_type;
+		fl->fl_pid = cfl.fl_pid;
 		goto out;
 	}
 
@@ -504,9 +522,7 @@ static int nfs_lock(struct file *filp, i
 			inode->i_sb->s_id, inode->i_ino,
 			fl->fl_type, fl->fl_flags,
 			(long long)fl->fl_start, (long long)fl->fl_end);
-
-	if (!inode)
-		return -EINVAL;
+	nfs_inc_stats(inode, NFSIOS_VFSLOCK);
 
 	/* No mandatory locks over NFS */
 	if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID &&
@@ -531,9 +547,6 @@ static int nfs_flock(struct file *filp, 
 			inode->i_sb->s_id, inode->i_ino,
 			fl->fl_type, fl->fl_flags);
 
-	if (!inode)
-		return -EINVAL;
-
 	/*
 	 * No BSD flocks over NFS allowed.
 	 * Note: we could try to fake a POSIX lock request here by
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 821edd3..3fab5b0 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -35,6 +35,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/slab.h>
@@ -74,8 +75,8 @@ struct idmap {
 	struct dentry        *idmap_dentry;
 	wait_queue_head_t     idmap_wq;
 	struct idmap_msg      idmap_im;
-	struct semaphore      idmap_lock;    /* Serializes upcalls */
-	struct semaphore      idmap_im_lock; /* Protects the hashtable */
+	struct mutex          idmap_lock;    /* Serializes upcalls */
+	struct mutex          idmap_im_lock; /* Protects the hashtable */
 	struct idmap_hashtable idmap_user_hash;
 	struct idmap_hashtable idmap_group_hash;
 };
@@ -101,11 +102,9 @@ nfs_idmap_new(struct nfs4_client *clp)
 
 	if (clp->cl_idmap != NULL)
 		return;
-        if ((idmap = kmalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
+        if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
                 return;
 
-	memset(idmap, 0, sizeof(*idmap));
-
 	snprintf(idmap->idmap_path, sizeof(idmap->idmap_path),
 	    "%s/idmap", clp->cl_rpcclient->cl_pathname);
 
@@ -116,8 +115,8 @@ nfs_idmap_new(struct nfs4_client *clp)
 		return;
 	}
 
-        init_MUTEX(&idmap->idmap_lock);
-        init_MUTEX(&idmap->idmap_im_lock);
+        mutex_init(&idmap->idmap_lock);
+        mutex_init(&idmap->idmap_im_lock);
 	init_waitqueue_head(&idmap->idmap_wq);
 	idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER;
 	idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP;
@@ -132,6 +131,8 @@ nfs_idmap_delete(struct nfs4_client *clp
 
 	if (!idmap)
 		return;
+	dput(idmap->idmap_dentry);
+	idmap->idmap_dentry = NULL;
 	rpc_unlink(idmap->idmap_path);
 	clp->cl_idmap = NULL;
 	kfree(idmap);
@@ -232,8 +233,8 @@ nfs_idmap_id(struct idmap *idmap, struct
 	if (namelen >= IDMAP_NAMESZ)
 		return -EINVAL;
 
-	down(&idmap->idmap_lock);
-	down(&idmap->idmap_im_lock);
+	mutex_lock(&idmap->idmap_lock);
+	mutex_lock(&idmap->idmap_im_lock);
 
 	he = idmap_lookup_name(h, name, namelen);
 	if (he != NULL) {
@@ -259,11 +260,11 @@ nfs_idmap_id(struct idmap *idmap, struct
 	}
 
 	set_current_state(TASK_UNINTERRUPTIBLE);
-	up(&idmap->idmap_im_lock);
+	mutex_unlock(&idmap->idmap_im_lock);
 	schedule();
 	current->state = TASK_RUNNING;
 	remove_wait_queue(&idmap->idmap_wq, &wq);
-	down(&idmap->idmap_im_lock);
+	mutex_lock(&idmap->idmap_im_lock);
 
 	if (im->im_status & IDMAP_STATUS_SUCCESS) {
 		*id = im->im_id;
@@ -272,8 +273,8 @@ nfs_idmap_id(struct idmap *idmap, struct
 
  out:
 	memset(im, 0, sizeof(*im));
-	up(&idmap->idmap_im_lock);
-	up(&idmap->idmap_lock);
+	mutex_unlock(&idmap->idmap_im_lock);
+	mutex_unlock(&idmap->idmap_lock);
 	return (ret);
 }
 
@@ -293,8 +294,8 @@ nfs_idmap_name(struct idmap *idmap, stru
 
 	im = &idmap->idmap_im;
 
-	down(&idmap->idmap_lock);
-	down(&idmap->idmap_im_lock);
+	mutex_lock(&idmap->idmap_lock);
+	mutex_lock(&idmap->idmap_im_lock);
 
 	he = idmap_lookup_id(h, id);
 	if (he != 0) {
@@ -320,11 +321,11 @@ nfs_idmap_name(struct idmap *idmap, stru
 	}
 
 	set_current_state(TASK_UNINTERRUPTIBLE);
-	up(&idmap->idmap_im_lock);
+	mutex_unlock(&idmap->idmap_im_lock);
 	schedule();
 	current->state = TASK_RUNNING;
 	remove_wait_queue(&idmap->idmap_wq, &wq);
-	down(&idmap->idmap_im_lock);
+	mutex_lock(&idmap->idmap_im_lock);
 
 	if (im->im_status & IDMAP_STATUS_SUCCESS) {
 		if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0)
@@ -335,8 +336,8 @@ nfs_idmap_name(struct idmap *idmap, stru
 
  out:
 	memset(im, 0, sizeof(*im));
-	up(&idmap->idmap_im_lock);
-	up(&idmap->idmap_lock);
+	mutex_unlock(&idmap->idmap_im_lock);
+	mutex_unlock(&idmap->idmap_lock);
 	return ret;
 }
 
@@ -380,7 +381,7 @@ idmap_pipe_downcall(struct file *filp, c
         if (copy_from_user(&im_in, src, mlen) != 0)
 		return (-EFAULT);
 
-	down(&idmap->idmap_im_lock);
+	mutex_lock(&idmap->idmap_im_lock);
 
 	ret = mlen;
 	im->im_status = im_in.im_status;
@@ -440,7 +441,7 @@ idmap_pipe_downcall(struct file *filp, c
 		idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id);
 	ret = mlen;
 out:
-	up(&idmap->idmap_im_lock);
+	mutex_unlock(&idmap->idmap_im_lock);
 	return ret;
 }
 
@@ -452,10 +453,10 @@ idmap_pipe_destroy_msg(struct rpc_pipe_m
 
 	if (msg->errno >= 0)
 		return;
-	down(&idmap->idmap_im_lock);
+	mutex_lock(&idmap->idmap_im_lock);
 	im->im_status = IDMAP_STATUS_LOOKUPFAIL;
 	wake_up(&idmap->idmap_wq);
-	up(&idmap->idmap_im_lock);
+	mutex_unlock(&idmap->idmap_im_lock);
 }
 
 /* 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index a77ee95..22606ba 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -26,6 +26,7 @@
 #include <linux/unistd.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/metrics.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_mount.h>
 #include <linux/nfs4_mount.h>
@@ -42,6 +43,7 @@
 #include "nfs4_fs.h"
 #include "callback.h"
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 #define NFS_PARANOIA 1
@@ -65,6 +67,7 @@ static void nfs_clear_inode(struct inode
 static void nfs_umount_begin(struct super_block *);
 static int  nfs_statfs(struct super_block *, struct kstatfs *);
 static int  nfs_show_options(struct seq_file *, struct vfsmount *);
+static int  nfs_show_stats(struct seq_file *, struct vfsmount *);
 static void nfs_zap_acl_cache(struct inode *);
 
 static struct rpc_program	nfs_program;
@@ -78,6 +81,7 @@ static struct super_operations nfs_sops 
 	.clear_inode	= nfs_clear_inode,
 	.umount_begin	= nfs_umount_begin,
 	.show_options	= nfs_show_options,
+	.show_stats	= nfs_show_stats,
 };
 
 /*
@@ -290,6 +294,15 @@ nfs_sb_init(struct super_block *sb, rpc_
 	}
 	sb->s_root->d_op = server->rpc_ops->dentry_ops;
 
+	server->io_stats = nfs_alloc_iostats();
+	if (!server->io_stats) {
+		no_root_error = -ENOMEM;
+		goto out_no_root;
+	}
+
+	/* mount time stamp, in seconds */
+	server->mount_time = jiffies;
+
 	/* Get some general file system info */
 	if (server->namelen == 0 &&
 	    server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
@@ -396,6 +409,9 @@ nfs_create_client(struct nfs_server *ser
 
 	nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans);
 
+	server->retrans_timeo = timeparms.to_initval;
+	server->retrans_count = timeparms.to_retries;
+
 	/* create transport and client */
 	xprt = xprt_create_proto(proto, &server->addr, &timeparms);
 	if (IS_ERR(xprt)) {
@@ -579,7 +595,7 @@ nfs_statfs(struct super_block *sb, struc
 
 }
 
-static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults)
 {
 	static struct proc_nfs_info {
 		int flag;
@@ -588,28 +604,26 @@ static int nfs_show_options(struct seq_f
 	} nfs_info[] = {
 		{ NFS_MOUNT_SOFT, ",soft", ",hard" },
 		{ NFS_MOUNT_INTR, ",intr", "" },
-		{ NFS_MOUNT_POSIX, ",posix", "" },
 		{ NFS_MOUNT_NOCTO, ",nocto", "" },
 		{ NFS_MOUNT_NOAC, ",noac", "" },
-		{ NFS_MOUNT_NONLM, ",nolock", ",lock" },
+		{ NFS_MOUNT_NONLM, ",nolock", "" },
 		{ NFS_MOUNT_NOACL, ",noacl", "" },
 		{ 0, NULL, NULL }
 	};
 	struct proc_nfs_info *nfs_infop;
-	struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
 	char buf[12];
 	char *proto;
 
-	seq_printf(m, ",v%d", nfss->rpc_ops->version);
+	seq_printf(m, ",vers=%d", nfss->rpc_ops->version);
 	seq_printf(m, ",rsize=%d", nfss->rsize);
 	seq_printf(m, ",wsize=%d", nfss->wsize);
-	if (nfss->acregmin != 3*HZ)
+	if (nfss->acregmin != 3*HZ || showdefaults)
 		seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ);
-	if (nfss->acregmax != 60*HZ)
+	if (nfss->acregmax != 60*HZ || showdefaults)
 		seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ);
-	if (nfss->acdirmin != 30*HZ)
+	if (nfss->acdirmin != 30*HZ || showdefaults)
 		seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ);
-	if (nfss->acdirmax != 60*HZ)
+	if (nfss->acdirmax != 60*HZ || showdefaults)
 		seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ);
 	for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
 		if (nfss->flags & nfs_infop->flag)
@@ -629,8 +643,96 @@ static int nfs_show_options(struct seq_f
 			proto = buf;
 	}
 	seq_printf(m, ",proto=%s", proto);
+	seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ);
+	seq_printf(m, ",retrans=%u", nfss->retrans_count);
+}
+
+static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+	struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
+
+	nfs_show_mount_options(m, nfss, 0);
+
 	seq_puts(m, ",addr=");
 	seq_escape(m, nfss->hostname, " \t\n\\");
+
+	return 0;
+}
+
+static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
+{
+	int i, cpu;
+	struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
+	struct rpc_auth *auth = nfss->client->cl_auth;
+	struct nfs_iostats totals = { };
+
+	seq_printf(m, "statvers=%s", NFS_IOSTAT_VERS);
+
+	/*
+	 * Display all mount option settings
+	 */
+	seq_printf(m, "\n\topts:\t");
+	seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw");
+	seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : "");
+	seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : "");
+	seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : "");
+	nfs_show_mount_options(m, nfss, 1);
+
+	seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ);
+
+	seq_printf(m, "\n\tcaps:\t");
+	seq_printf(m, "caps=0x%x", nfss->caps);
+	seq_printf(m, ",wtmult=%d", nfss->wtmult);
+	seq_printf(m, ",dtsize=%d", nfss->dtsize);
+	seq_printf(m, ",bsize=%d", nfss->bsize);
+	seq_printf(m, ",namelen=%d", nfss->namelen);
+
+#ifdef CONFIG_NFS_V4
+	if (nfss->rpc_ops->version == 4) {
+		seq_printf(m, "\n\tnfsv4:\t");
+		seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
+		seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
+		seq_printf(m, ",acl=0x%x", nfss->acl_bitmask);
+	}
+#endif
+
+	/*
+	 * Display security flavor in effect for this mount
+	 */
+	seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor);
+	if (auth->au_flavor)
+		seq_printf(m, ",pseudoflavor=%d", auth->au_flavor);
+
+	/*
+	 * Display superblock I/O counters
+	 */
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		struct nfs_iostats *stats;
+
+		if (!cpu_possible(cpu))
+			continue;
+
+		preempt_disable();
+		stats = per_cpu_ptr(nfss->io_stats, cpu);
+
+		for (i = 0; i < __NFSIOS_COUNTSMAX; i++)
+			totals.events[i] += stats->events[i];
+		for (i = 0; i < __NFSIOS_BYTESMAX; i++)
+			totals.bytes[i] += stats->bytes[i];
+
+		preempt_enable();
+	}
+
+	seq_printf(m, "\n\tevents:\t");
+	for (i = 0; i < __NFSIOS_COUNTSMAX; i++)
+		seq_printf(m, "%lu ", totals.events[i]);
+	seq_printf(m, "\n\tbytes:\t");
+	for (i = 0; i < __NFSIOS_BYTESMAX; i++)
+		seq_printf(m, "%Lu ", totals.bytes[i]);
+	seq_printf(m, "\n");
+
+	rpc_print_iostats(m, nfss->client);
+
 	return 0;
 }
 
@@ -660,6 +762,8 @@ static void nfs_zap_caches_locked(struct
 	struct nfs_inode *nfsi = NFS_I(inode);
 	int mode = inode->i_mode;
 
+	nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
+
 	NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
 	NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
 
@@ -847,6 +951,8 @@ nfs_setattr(struct dentry *dentry, struc
 	struct nfs_fattr fattr;
 	int error;
 
+	nfs_inc_stats(inode, NFSIOS_VFSSETATTR);
+
 	if (attr->ia_valid & ATTR_SIZE) {
 		if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode))
 			attr->ia_valid &= ~ATTR_SIZE;
@@ -859,11 +965,9 @@ nfs_setattr(struct dentry *dentry, struc
 
 	lock_kernel();
 	nfs_begin_data_update(inode);
-	/* Write all dirty data if we're changing file permissions or size */
-	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) {
-		filemap_write_and_wait(inode->i_mapping);
-		nfs_wb_all(inode);
-	}
+	/* Write all dirty data */
+	filemap_write_and_wait(inode->i_mapping);
+	nfs_wb_all(inode);
 	/*
 	 * Return any delegations if we're going to change ACLs
 	 */
@@ -902,6 +1006,7 @@ void nfs_setattr_update_inode(struct ino
 		spin_unlock(&inode->i_lock);
 	}
 	if ((attr->ia_valid & ATTR_SIZE) != 0) {
+		nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
 		inode->i_size = attr->ia_size;
 		vmtruncate(inode, attr->ia_size);
 	}
@@ -1185,6 +1290,7 @@ int nfs_attribute_timeout(struct inode *
  */
 int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 {
+	nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
 	if (!(NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
 			&& !nfs_attribute_timeout(inode))
 		return NFS_STALE(inode) ? -ESTALE : 0;
@@ -1201,6 +1307,7 @@ void nfs_revalidate_mapping(struct inode
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	if (nfsi->cache_validity & NFS_INO_INVALID_DATA) {
+		nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
 		if (S_ISREG(inode->i_mode))
 			nfs_sync_mapping(mapping);
 		invalidate_inode_pages2(mapping);
@@ -1299,39 +1406,37 @@ static int nfs_check_inode_attributes(st
 	if ((fattr->valid & NFS_ATTR_FATTR) == 0)
 		return 0;
 
+	/* Has the inode gone and changed behind our back? */
+	if (nfsi->fileid != fattr->fileid
+			|| (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
+		return -EIO;
+	}
+
 	/* Are we in the process of updating data on the server? */
 	data_unstable = nfs_caches_unstable(inode);
 
 	/* Do atomic weak cache consistency updates */
 	nfs_wcc_update_inode(inode, fattr);
 
-	if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
-			nfsi->change_attr != fattr->change_attr) {
+	if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0) {
+		if (nfsi->change_attr == fattr->change_attr)
+			goto out;
 		nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
 		if (!data_unstable)
 			nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
 	}
 
-	/* Has the inode gone and changed behind our back? */
-	if (nfsi->fileid != fattr->fileid
-			|| (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
-		return -EIO;
-	}
-
-	cur_size = i_size_read(inode);
- 	new_isize = nfs_size_to_loff_t(fattr->size);
-
 	/* Verify a few of the more important attributes */
 	if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
 		nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
 		if (!data_unstable)
 			nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
 	}
-	if (cur_size != new_isize) {
-		nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
-		if (nfsi->npages == 0)
-			nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
-	}
+
+	cur_size = i_size_read(inode);
+ 	new_isize = nfs_size_to_loff_t(fattr->size);
+	if (cur_size != new_isize && nfsi->npages == 0)
+		nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
 
 	/* Have any file permissions changed? */
 	if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
@@ -1343,6 +1448,7 @@ static int nfs_check_inode_attributes(st
 	if (inode->i_nlink != fattr->nlink)
 		nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
 
+out:
 	if (!timespec_equal(&inode->i_atime, &fattr->atime))
 		nfsi->cache_validity |= NFS_INO_INVALID_ATIME;
 
@@ -1481,15 +1587,6 @@ static int nfs_update_inode(struct inode
 		nfsi->cache_change_attribute = jiffies;
 	}
 
-	if ((fattr->valid & NFS_ATTR_FATTR_V4)
-	    && nfsi->change_attr != fattr->change_attr) {
-		dprintk("NFS: change_attr change on server for file %s/%ld\n",
-		       inode->i_sb->s_id, inode->i_ino);
-		nfsi->change_attr = fattr->change_attr;
-		invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
-		nfsi->cache_change_attribute = jiffies;
-	}
-
 	/* If ctime has changed we should definitely clear access+acl caches */
 	if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
 		invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
@@ -1519,8 +1616,20 @@ static int nfs_update_inode(struct inode
  		inode->i_blksize = fattr->du.nfs2.blocksize;
  	}
 
+	if ((fattr->valid & NFS_ATTR_FATTR_V4)) {
+		if (nfsi->change_attr != fattr->change_attr) {
+			dprintk("NFS: change_attr change on server for file %s/%ld\n",
+					inode->i_sb->s_id, inode->i_ino);
+			nfsi->change_attr = fattr->change_attr;
+			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+			nfsi->cache_change_attribute = jiffies;
+		} else
+			invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA);
+	}
+
 	/* Update attrtimeo value if we're out of the unstable period */
 	if (invalid & NFS_INO_INVALID_ATTR) {
+		nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
 		nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
 		nfsi->attrtimeo_timestamp = jiffies;
 	} else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) {
@@ -1637,10 +1746,9 @@ static struct super_block *nfs_get_sb(st
 #endif /* CONFIG_NFS_V3 */
 
 	s = ERR_PTR(-ENOMEM);
-	server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
 	if (!server)
 		goto out_err;
-	memset(server, 0, sizeof(struct nfs_server));
 	/* Zero out the NFS state stuff */
 	init_nfsv4_state(server);
 	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
@@ -1738,6 +1846,7 @@ static struct super_operations nfs4_sops
 	.clear_inode	= nfs4_clear_inode,
 	.umount_begin	= nfs_umount_begin,
 	.show_options	= nfs_show_options,
+	.show_stats	= nfs_show_stats,
 };
 
 /*
@@ -1800,6 +1909,9 @@ static int nfs4_fill_super(struct super_
 
 	nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans);
 
+	server->retrans_timeo = timeparms.to_initval;
+	server->retrans_count = timeparms.to_retries;
+
 	clp = nfs4_get_client(&server->addr.sin_addr);
 	if (!clp) {
 		dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
@@ -1941,10 +2053,9 @@ static struct super_block *nfs4_get_sb(s
 		return ERR_PTR(-EINVAL);
 	}
 
-	server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
 	if (!server)
 		return ERR_PTR(-ENOMEM);
-	memset(server, 0, sizeof(struct nfs_server));
 	/* Zero out the NFS state stuff */
 	init_nfsv4_state(server);
 	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
@@ -2009,6 +2120,7 @@ out_err:
 out_free:
 	kfree(server->mnt_path);
 	kfree(server->hostname);
+	nfs_free_iostats(server->io_stats);
 	kfree(server);
 	return s;
 }
@@ -2024,10 +2136,11 @@ static void nfs4_kill_super(struct super
 
 	if (server->client != NULL && !IS_ERR(server->client))
 		rpc_shutdown_client(server->client);
-	rpciod_down();		/* release rpciod */
 
 	destroy_nfsv4_state(server);
 
+	rpciod_down();
+
 	kfree(server->hostname);
 	kfree(server);
 }
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
new file mode 100644
index 0000000..7a74951
--- /dev/null
+++ b/fs/nfs/iostat.h
@@ -0,0 +1,163 @@
+/*
+ *  linux/fs/nfs/iostat.h
+ *
+ *  Declarations for NFS client per-mount statistics
+ *
+ *  Copyright (C) 2005, 2006 Chuck Lever <cel@netapp.com>
+ *
+ *  NFS client per-mount statistics provide information about the health of
+ *  the NFS client and the health of each NFS mount point.  Generally these
+ *  are not for detailed problem diagnosis, but simply to indicate that there
+ *  is a problem.
+ *
+ *  These counters are not meant to be human-readable, but are meant to be
+ *  integrated into system monitoring tools such as "sar" and "iostat".  As
+ *  such, the counters are sampled by the tools over time, and are never
+ *  zeroed after a file system is mounted.  Moving averages can be computed
+ *  by the tools by taking the difference between two instantaneous samples
+ *  and dividing that by the time between the samples.
+ */
+
+#ifndef _NFS_IOSTAT
+#define _NFS_IOSTAT
+
+#define NFS_IOSTAT_VERS		"1.0"
+
+/*
+ * NFS byte counters
+ *
+ * 1.  SERVER - the number of payload bytes read from or written to the
+ *     server by the NFS client via an NFS READ or WRITE request.
+ *
+ * 2.  NORMAL - the number of bytes read or written by applications via
+ *     the read(2) and write(2) system call interfaces.
+ *
+ * 3.  DIRECT - the number of bytes read or written from files opened
+ *     with the O_DIRECT flag.
+ *
+ * These counters give a view of the data throughput into and out of the NFS
+ * client.  Comparing the number of bytes requested by an application with the
+ * number of bytes the client requests from the server can provide an
+ * indication of client efficiency (per-op, cache hits, etc).
+ *
+ * These counters can also help characterize which access methods are in
+ * use.  DIRECT by itself shows whether there is any O_DIRECT traffic.
+ * NORMAL + DIRECT shows how much data is going through the system call
+ * interface.  A large amount of SERVER traffic without much NORMAL or
+ * DIRECT traffic shows that applications are using mapped files.
+ *
+ * NFS page counters
+ *
+ * These count the number of pages read or written via nfs_readpage(),
+ * nfs_readpages(), or their write equivalents.
+ */
+enum nfs_stat_bytecounters {
+	NFSIOS_NORMALREADBYTES = 0,
+	NFSIOS_NORMALWRITTENBYTES,
+	NFSIOS_DIRECTREADBYTES,
+	NFSIOS_DIRECTWRITTENBYTES,
+	NFSIOS_SERVERREADBYTES,
+	NFSIOS_SERVERWRITTENBYTES,
+	NFSIOS_READPAGES,
+	NFSIOS_WRITEPAGES,
+	__NFSIOS_BYTESMAX,
+};
+
+/*
+ * NFS event counters
+ *
+ * These counters provide a low-overhead way of monitoring client activity
+ * without enabling NFS trace debugging.  The counters show the rate at
+ * which VFS requests are made, and how often the client invalidates its
+ * data and attribute caches.  This allows system administrators to monitor
+ * such things as how close-to-open is working, and answer questions such
+ * as "why are there so many GETATTR requests on the wire?"
+ *
+ * They also count anamolous events such as short reads and writes, silly
+ * renames due to close-after-delete, and operations that change the size
+ * of a file (such operations can often be the source of data corruption
+ * if applications aren't using file locking properly).
+ */
+enum nfs_stat_eventcounters {
+	NFSIOS_INODEREVALIDATE = 0,
+	NFSIOS_DENTRYREVALIDATE,
+	NFSIOS_DATAINVALIDATE,
+	NFSIOS_ATTRINVALIDATE,
+	NFSIOS_VFSOPEN,
+	NFSIOS_VFSLOOKUP,
+	NFSIOS_VFSACCESS,
+	NFSIOS_VFSUPDATEPAGE,
+	NFSIOS_VFSREADPAGE,
+	NFSIOS_VFSREADPAGES,
+	NFSIOS_VFSWRITEPAGE,
+	NFSIOS_VFSWRITEPAGES,
+	NFSIOS_VFSGETDENTS,
+	NFSIOS_VFSSETATTR,
+	NFSIOS_VFSFLUSH,
+	NFSIOS_VFSFSYNC,
+	NFSIOS_VFSLOCK,
+	NFSIOS_VFSRELEASE,
+	NFSIOS_CONGESTIONWAIT,
+	NFSIOS_SETATTRTRUNC,
+	NFSIOS_EXTENDWRITE,
+	NFSIOS_SILLYRENAME,
+	NFSIOS_SHORTREAD,
+	NFSIOS_SHORTWRITE,
+	NFSIOS_DELAY,
+	__NFSIOS_COUNTSMAX,
+};
+
+#ifdef __KERNEL__
+
+#include <linux/percpu.h>
+#include <linux/cache.h>
+
+struct nfs_iostats {
+	unsigned long long	bytes[__NFSIOS_BYTESMAX];
+	unsigned long		events[__NFSIOS_COUNTSMAX];
+} ____cacheline_aligned;
+
+static inline void nfs_inc_server_stats(struct nfs_server *server, enum nfs_stat_eventcounters stat)
+{
+	struct nfs_iostats *iostats;
+	int cpu;
+
+	cpu = get_cpu();
+	iostats = per_cpu_ptr(server->io_stats, cpu);
+	iostats->events[stat] ++;
+	put_cpu_no_resched();
+}
+
+static inline void nfs_inc_stats(struct inode *inode, enum nfs_stat_eventcounters stat)
+{
+	nfs_inc_server_stats(NFS_SERVER(inode), stat);
+}
+
+static inline void nfs_add_server_stats(struct nfs_server *server, enum nfs_stat_bytecounters stat, unsigned long addend)
+{
+	struct nfs_iostats *iostats;
+	int cpu;
+
+	cpu = get_cpu();
+	iostats = per_cpu_ptr(server->io_stats, cpu);
+	iostats->bytes[stat] += addend;
+	put_cpu_no_resched();
+}
+
+static inline void nfs_add_stats(struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend)
+{
+	nfs_add_server_stats(NFS_SERVER(inode), stat, addend);
+}
+
+static inline struct nfs_iostats *nfs_alloc_iostats(void)
+{
+	return alloc_percpu(struct nfs_iostats);
+}
+
+static inline void nfs_free_iostats(struct nfs_iostats *stats)
+{
+	free_percpu(stats);
+}
+
+#endif
+#endif
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index db99b8f..c44d87b 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -49,9 +49,12 @@ nfsroot_mount(struct sockaddr_in *addr, 
 	struct mnt_fhstatus	result = {
 		.fh		= fh
 	};
+	struct rpc_message msg	= {
+		.rpc_argp	= path,
+		.rpc_resp	= &result,
+	};
 	char			hostname[32];
 	int			status;
-	int			call;
 
 	dprintk("NFS:      nfs_mount(%08x:%s)\n",
 			(unsigned)ntohl(addr->sin_addr.s_addr), path);
@@ -61,8 +64,12 @@ nfsroot_mount(struct sockaddr_in *addr, 
 	if (IS_ERR(mnt_clnt))
 		return PTR_ERR(mnt_clnt);
 
-	call = (version == NFS_MNT3_VERSION) ? MOUNTPROC3_MNT : MNTPROC_MNT;
-	status = rpc_call(mnt_clnt, call, path, &result, 0);
+	if (version == NFS_MNT3_VERSION)
+		msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT];
+	else
+		msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT];
+
+	status = rpc_call_sync(mnt_clnt, &msg, 0);
 	return status < 0? status : (result.status? -EACCES : 0);
 }
 
@@ -137,6 +144,8 @@ static struct rpc_procinfo	mnt_procedure
 	  .p_encode		= (kxdrproc_t) xdr_encode_dirpath,	
 	  .p_decode		= (kxdrproc_t) xdr_decode_fhstatus,
 	  .p_bufsiz		= MNT_dirpath_sz << 2,
+	  .p_statidx		= MNTPROC_MNT,
+	  .p_name		= "MOUNT",
 	},
 };
 
@@ -146,6 +155,8 @@ static struct rpc_procinfo mnt3_procedur
 	  .p_encode		= (kxdrproc_t) xdr_encode_dirpath,
 	  .p_decode		= (kxdrproc_t) xdr_decode_fhstatus3,
 	  .p_bufsiz		= MNT_dirpath_sz << 2,
+	  .p_statidx		= MOUNTPROC3_MNT,
+	  .p_name		= "MOUNT",
 	},
 };
 
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 7fc0560..8cdc792 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -682,7 +682,9 @@ nfs_stat_to_errno(int stat)
 	.p_encode   =  (kxdrproc_t) nfs_xdr_##argtype,			\
 	.p_decode   =  (kxdrproc_t) nfs_xdr_##restype,			\
 	.p_bufsiz   =  MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2,	\
-	.p_timer    =  timer						\
+	.p_timer    =  timer,						\
+	.p_statidx  =  NFSPROC_##proc,					\
+	.p_name     =  #proc,						\
 	}
 struct rpc_procinfo	nfs_procedures[] = {
     PROC(GETATTR,	fhandle,	attrstat, 1),
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 6a5bbc0..3328787 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -190,6 +190,10 @@ struct posix_acl *nfs3_proc_getacl(struc
 	struct nfs3_getaclres res = {
 		.fattr =	&fattr,
 	};
+	struct rpc_message msg = {
+		.rpc_argp	= &args,
+		.rpc_resp	= &res,
+	};
 	struct posix_acl *acl;
 	int status, count;
 
@@ -218,8 +222,8 @@ struct posix_acl *nfs3_proc_getacl(struc
 		return NULL;
 
 	dprintk("NFS call getacl\n");
-	status = rpc_call(server->client_acl, ACLPROC3_GETACL,
-			  &args, &res, 0);
+	msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL];
+	status = rpc_call_sync(server->client_acl, &msg, 0);
 	dprintk("NFS reply getacl: %d\n", status);
 
 	/* pages may have been allocated at the xdr layer. */
@@ -286,6 +290,10 @@ static int nfs3_proc_setacls(struct inod
 		.acl_access = acl,
 		.pages = pages,
 	};
+	struct rpc_message msg = {
+		.rpc_argp	= &args,
+		.rpc_resp	= &fattr,
+	};
 	int status, count;
 
 	status = -EOPNOTSUPP;
@@ -306,8 +314,8 @@ static int nfs3_proc_setacls(struct inod
 
 	dprintk("NFS call setacl\n");
 	nfs_begin_data_update(inode);
-	status = rpc_call(server->client_acl, ACLPROC3_SETACL,
-			  &args, &fattr, 0);
+	msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL];
+	status = rpc_call_sync(server->client_acl, &msg, 0);
 	spin_lock(&inode->i_lock);
 	NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS;
 	spin_unlock(&inode->i_lock);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index ed67567..cf186f0 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -19,6 +19,8 @@
 #include <linux/smp_lock.h>
 #include <linux/nfs_mount.h>
 
+#include "iostat.h"
+
 #define NFSDBG_FACILITY		NFSDBG_PROC
 
 extern struct rpc_procinfo nfs3_procedures[];
@@ -41,27 +43,14 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, 
 	return res;
 }
 
-static inline int
-nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
-{
-	struct rpc_message msg = {
-		.rpc_proc	= &clnt->cl_procinfo[proc],
-		.rpc_argp	= argp,
-		.rpc_resp	= resp,
-	};
-	return nfs3_rpc_wrapper(clnt, &msg, flags);
-}
-
-#define rpc_call(clnt, proc, argp, resp, flags) \
-		nfs3_rpc_call_wrapper(clnt, proc, argp, resp, flags)
-#define rpc_call_sync(clnt, msg, flags) \
-		nfs3_rpc_wrapper(clnt, msg, flags)
+#define rpc_call_sync(clnt, msg, flags)	nfs3_rpc_wrapper(clnt, msg, flags)
 
 static int
-nfs3_async_handle_jukebox(struct rpc_task *task)
+nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode)
 {
 	if (task->tk_status != -EJUKEBOX)
 		return 0;
+	nfs_inc_stats(inode, NFSIOS_DELAY);
 	task->tk_status = 0;
 	rpc_restart_call(task);
 	rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
@@ -72,14 +61,21 @@ static int
 do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle,
 		 struct nfs_fsinfo *info)
 {
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_FSINFO],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= info,
+	};
 	int	status;
 
 	dprintk("%s: call  fsinfo\n", __FUNCTION__);
 	nfs_fattr_init(info->fattr);
-	status = rpc_call(client, NFS3PROC_FSINFO, fhandle, info, 0);
+	status = rpc_call_sync(client, &msg, 0);
 	dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status);
 	if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
-		status = rpc_call(client, NFS3PROC_GETATTR, fhandle, info->fattr, 0);
+		msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
+		msg.rpc_resp = info->fattr;
+		status = rpc_call_sync(client, &msg, 0);
 		dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
 	}
 	return status;
@@ -107,12 +103,16 @@ static int
 nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
 		struct nfs_fattr *fattr)
 {
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_GETATTR],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= fattr,
+	};
 	int	status;
 
 	dprintk("NFS call  getattr\n");
 	nfs_fattr_init(fattr);
-	status = rpc_call(server->client, NFS3PROC_GETATTR,
-			  fhandle, fattr, 0);
+	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply getattr: %d\n", status);
 	return status;
 }
@@ -126,11 +126,16 @@ nfs3_proc_setattr(struct dentry *dentry,
 		.fh		= NFS_FH(inode),
 		.sattr		= sattr,
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_SETATTR],
+		.rpc_argp	= &arg,
+		.rpc_resp	= fattr,
+	};
 	int	status;
 
 	dprintk("NFS call  setattr\n");
 	nfs_fattr_init(fattr);
-	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0);
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	if (status == 0)
 		nfs_setattr_update_inode(inode, sattr);
 	dprintk("NFS reply setattr: %d\n", status);
@@ -152,15 +157,23 @@ nfs3_proc_lookup(struct inode *dir, stru
 		.fh		= fhandle,
 		.fattr		= fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_LOOKUP],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	dprintk("NFS call  lookup %s\n", name->name);
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_LOOKUP, &arg, &res, 0);
-	if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR))
-		status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR,
-			 fhandle, fattr, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) {
+		msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
+		msg.rpc_argp = fhandle;
+		msg.rpc_resp = fattr;
+		status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	}
 	dprintk("NFS reply lookup: %d\n", status);
 	if (status >= 0)
 		status = nfs_refresh_inode(dir, &dir_attr);
@@ -180,7 +193,7 @@ static int nfs3_proc_access(struct inode
 		.rpc_proc	= &nfs3_procedures[NFS3PROC_ACCESS],
 		.rpc_argp	= &arg,
 		.rpc_resp	= &res,
-		.rpc_cred	= entry->cred
+		.rpc_cred	= entry->cred,
 	};
 	int mode = entry->mask;
 	int status;
@@ -226,12 +239,16 @@ static int nfs3_proc_readlink(struct ino
 		.pglen		= pglen,
 		.pages		= &page
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_READLINK],
+		.rpc_argp	= &args,
+		.rpc_resp	= &fattr,
+	};
 	int			status;
 
 	dprintk("NFS call  readlink\n");
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_READLINK,
-			  &args, &fattr, 0);
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	nfs_refresh_inode(inode, &fattr);
 	dprintk("NFS reply readlink: %d\n", status);
 	return status;
@@ -327,6 +344,11 @@ nfs3_proc_create(struct inode *dir, stru
 		.fh		= &fhandle,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_CREATE],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	mode_t mode = sattr->ia_mode;
 	int status;
 
@@ -343,8 +365,8 @@ nfs3_proc_create(struct inode *dir, stru
 again:
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_CREATE, &arg, &res, 0);
-	nfs_post_op_update_inode(dir, &dir_attr);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	nfs_refresh_inode(dir, &dir_attr);
 
 	/* If the server doesn't support the exclusive creation semantics,
 	 * try again with simple 'guarded' mode. */
@@ -447,7 +469,7 @@ nfs3_proc_unlink_done(struct dentry *dir
 	struct rpc_message *msg = &task->tk_msg;
 	struct nfs_fattr	*dir_attr;
 
-	if (nfs3_async_handle_jukebox(task))
+	if (nfs3_async_handle_jukebox(task, dir->d_inode))
 		return 1;
 	if (msg->rpc_argp) {
 		dir_attr = (struct nfs_fattr*)msg->rpc_resp;
@@ -474,12 +496,17 @@ nfs3_proc_rename(struct inode *old_dir, 
 		.fromattr	= &old_dir_attr,
 		.toattr		= &new_dir_attr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_RENAME],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	dprintk("NFS call  rename %s -> %s\n", old_name->name, new_name->name);
 	nfs_fattr_init(&old_dir_attr);
 	nfs_fattr_init(&new_dir_attr);
-	status = rpc_call(NFS_CLIENT(old_dir), NFS3PROC_RENAME, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
 	nfs_post_op_update_inode(old_dir, &old_dir_attr);
 	nfs_post_op_update_inode(new_dir, &new_dir_attr);
 	dprintk("NFS reply rename: %d\n", status);
@@ -500,12 +527,17 @@ nfs3_proc_link(struct inode *inode, stru
 		.dir_attr	= &dir_attr,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_LINK],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	dprintk("NFS call  link %s\n", name->name);
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_LINK, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	nfs_post_op_update_inode(dir, &dir_attr);
 	nfs_post_op_update_inode(inode, &fattr);
 	dprintk("NFS reply link: %d\n", status);
@@ -531,6 +563,11 @@ nfs3_proc_symlink(struct inode *dir, str
 		.fh		= fhandle,
 		.fattr		= fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_SYMLINK],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	if (path->len > NFS3_MAXPATHLEN)
@@ -538,7 +575,7 @@ nfs3_proc_symlink(struct inode *dir, str
 	dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SYMLINK, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_post_op_update_inode(dir, &dir_attr);
 	dprintk("NFS reply symlink: %d\n", status);
 	return status;
@@ -560,6 +597,11 @@ nfs3_proc_mkdir(struct inode *dir, struc
 		.fh		= &fhandle,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_MKDIR],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int mode = sattr->ia_mode;
 	int status;
 
@@ -569,7 +611,7 @@ nfs3_proc_mkdir(struct inode *dir, struc
 
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_post_op_update_inode(dir, &dir_attr);
 	if (status != 0)
 		goto out;
@@ -591,11 +633,16 @@ nfs3_proc_rmdir(struct inode *dir, struc
 		.name		= name->name,
 		.len		= name->len
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_RMDIR],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &dir_attr,
+	};
 	int			status;
 
 	dprintk("NFS call  rmdir %s\n", name->name);
 	nfs_fattr_init(&dir_attr);
-	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_RMDIR, &arg, &dir_attr, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_post_op_update_inode(dir, &dir_attr);
 	dprintk("NFS reply rmdir: %d\n", status);
 	return status;
@@ -672,6 +719,11 @@ nfs3_proc_mknod(struct inode *dir, struc
 		.fh		= &fh,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_MKNOD],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	mode_t mode = sattr->ia_mode;
 	int status;
 
@@ -690,7 +742,7 @@ nfs3_proc_mknod(struct inode *dir, struc
 
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_post_op_update_inode(dir, &dir_attr);
 	if (status != 0)
 		goto out;
@@ -707,11 +759,16 @@ static int
 nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
 		 struct nfs_fsstat *stat)
 {
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_FSSTAT],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= stat,
+	};
 	int	status;
 
 	dprintk("NFS call  fsstat\n");
 	nfs_fattr_init(stat->fattr);
-	status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0);
+	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply statfs: %d\n", status);
 	return status;
 }
@@ -720,11 +777,16 @@ static int
 nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
 		 struct nfs_fsinfo *info)
 {
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_FSINFO],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= info,
+	};
 	int	status;
 
 	dprintk("NFS call  fsinfo\n");
 	nfs_fattr_init(info->fattr);
-	status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
+	status = rpc_call_sync(server->client_sys, &msg, 0);
 	dprintk("NFS reply fsinfo: %d\n", status);
 	return status;
 }
@@ -733,40 +795,34 @@ static int
 nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 		   struct nfs_pathconf *info)
 {
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs3_procedures[NFS3PROC_PATHCONF],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= info,
+	};
 	int	status;
 
 	dprintk("NFS call  pathconf\n");
 	nfs_fattr_init(info->fattr);
-	status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0);
+	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply pathconf: %d\n", status);
 	return status;
 }
 
 extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
 
-static void nfs3_read_done(struct rpc_task *task, void *calldata)
+static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
 {
-	struct nfs_read_data *data = calldata;
-
-	if (nfs3_async_handle_jukebox(task))
-		return;
+	if (nfs3_async_handle_jukebox(task, data->inode))
+		return -EAGAIN;
 	/* Call back common NFS readpage processing */
 	if (task->tk_status >= 0)
 		nfs_refresh_inode(data->inode, &data->fattr);
-	nfs_readpage_result(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs3_read_ops = {
-	.rpc_call_done = nfs3_read_done,
-	.rpc_release = nfs_readdata_release,
-};
-
-static void
-nfs3_proc_read_setup(struct nfs_read_data *data)
+static void nfs3_proc_read_setup(struct nfs_read_data *data)
 {
-	struct rpc_task		*task = &data->task;
-	struct inode		*inode = data->inode;
-	int			flags;
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs3_procedures[NFS3PROC_READ],
 		.rpc_argp	= &data->args,
@@ -774,37 +830,20 @@ nfs3_proc_read_setup(struct nfs_read_dat
 		.rpc_cred	= data->cred,
 	};
 
-	/* N.B. Do we need to test? Never called for swapfile inode */
-	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-
-	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_read_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
-static void nfs3_write_done(struct rpc_task *task, void *calldata)
+static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct nfs_write_data *data = calldata;
-
-	if (nfs3_async_handle_jukebox(task))
-		return;
+	if (nfs3_async_handle_jukebox(task, data->inode))
+		return -EAGAIN;
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(data->inode, data->res.fattr);
-	nfs_writeback_done(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs3_write_ops = {
-	.rpc_call_done = nfs3_write_done,
-	.rpc_release = nfs_writedata_release,
-};
-
-static void
-nfs3_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs3_proc_write_setup(struct nfs_write_data *data, int how)
 {
-	struct rpc_task		*task = &data->task;
-	struct inode		*inode = data->inode;
-	int			stable;
-	int			flags;
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs3_procedures[NFS3PROC_WRITE],
 		.rpc_argp	= &data->args,
@@ -812,45 +851,28 @@ nfs3_proc_write_setup(struct nfs_write_d
 		.rpc_cred	= data->cred,
 	};
 
+	data->args.stable = NFS_UNSTABLE;
 	if (how & FLUSH_STABLE) {
-		if (!NFS_I(inode)->ncommit)
-			stable = NFS_FILE_SYNC;
-		else
-			stable = NFS_DATA_SYNC;
-	} else
-		stable = NFS_UNSTABLE;
-	data->args.stable = stable;
-
-	/* Set the initial flags for the task.  */
-	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+		data->args.stable = NFS_FILE_SYNC;
+		if (NFS_I(data->inode)->ncommit)
+			data->args.stable = NFS_DATA_SYNC;
+	}
 
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_write_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
-static void nfs3_commit_done(struct rpc_task *task, void *calldata)
+static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct nfs_write_data *data = calldata;
-
-	if (nfs3_async_handle_jukebox(task))
-		return;
+	if (nfs3_async_handle_jukebox(task, data->inode))
+		return -EAGAIN;
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(data->inode, data->res.fattr);
-	nfs_commit_done(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs3_commit_ops = {
-	.rpc_call_done = nfs3_commit_done,
-	.rpc_release = nfs_commit_release,
-};
-
-static void
-nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
+static void nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
 {
-	struct rpc_task		*task = &data->task;
-	struct inode		*inode = data->inode;
-	int			flags;
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs3_procedures[NFS3PROC_COMMIT],
 		.rpc_argp	= &data->args,
@@ -858,12 +880,7 @@ nfs3_proc_commit_setup(struct nfs_write_
 		.rpc_cred	= data->cred,
 	};
 
-	/* Set the initial flags for the task.  */
-	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
-	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_commit_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
 static int
@@ -902,8 +919,11 @@ struct nfs_rpc_ops	nfs_v3_clientops = {
 	.pathconf	= nfs3_proc_pathconf,
 	.decode_dirent	= nfs3_decode_dirent,
 	.read_setup	= nfs3_proc_read_setup,
+	.read_done	= nfs3_read_done,
 	.write_setup	= nfs3_proc_write_setup,
+	.write_done	= nfs3_write_done,
 	.commit_setup	= nfs3_proc_commit_setup,
+	.commit_done	= nfs3_commit_done,
 	.file_open	= nfs_open,
 	.file_release	= nfs_release,
 	.lock		= nfs3_proc_lock,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index b6c0b50..2d8701a 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1109,7 +1109,9 @@ nfs3_xdr_setaclres(struct rpc_rqst *req,
 	.p_encode    = (kxdrproc_t) nfs3_xdr_##argtype,			\
 	.p_decode    = (kxdrproc_t) nfs3_xdr_##restype,			\
 	.p_bufsiz    = MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2,	\
-	.p_timer     = timer						\
+	.p_timer     = timer,						\
+	.p_statidx   = NFS3PROC_##proc,					\
+	.p_name      = #proc,						\
 	}
 
 struct rpc_procinfo	nfs3_procedures[] = {
@@ -1150,6 +1152,7 @@ static struct rpc_procinfo	nfs3_acl_proc
 		.p_decode = (kxdrproc_t) nfs3_xdr_getaclres,
 		.p_bufsiz = MAX(ACL3_getaclargs_sz, ACL3_getaclres_sz) << 2,
 		.p_timer = 1,
+		.p_name = "GETACL",
 	},
 	[ACLPROC3_SETACL] = {
 		.p_proc = ACLPROC3_SETACL,
@@ -1157,6 +1160,7 @@ static struct rpc_procinfo	nfs3_acl_proc
 		.p_decode = (kxdrproc_t) nfs3_xdr_setaclres,
 		.p_bufsiz = MAX(ACL3_setaclargs_sz, ACL3_setaclres_sz) << 2,
 		.p_timer = 0,
+		.p_name = "SETACL",
 	},
 };
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 984ca34..2144129 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -51,6 +51,7 @@
 
 #include "nfs4_fs.h"
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PROC
 
@@ -908,7 +909,7 @@ out_put_state_owner:
 static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred)
 {
 	struct nfs4_exception exception = { };
-	struct nfs4_state *res;
+	struct nfs4_state *res = ERR_PTR(-EIO);
 	int err;
 
 	do {
@@ -2344,75 +2345,50 @@ static int nfs4_proc_pathconf(struct nfs
 	return err;
 }
 
-static void nfs4_read_done(struct rpc_task *task, void *calldata)
+static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
 {
-	struct nfs_read_data *data = calldata;
-	struct inode *inode = data->inode;
+	struct nfs_server *server = NFS_SERVER(data->inode);
 
-	if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
+	if (nfs4_async_handle_error(task, server) == -EAGAIN) {
 		rpc_restart_call(task);
-		return;
+		return -EAGAIN;
 	}
 	if (task->tk_status > 0)
-		renew_lease(NFS_SERVER(inode), data->timestamp);
-	/* Call back common NFS readpage processing */
-	nfs_readpage_result(task, calldata);
+		renew_lease(server, data->timestamp);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs4_read_ops = {
-	.rpc_call_done = nfs4_read_done,
-	.rpc_release = nfs_readdata_release,
-};
-
-static void
-nfs4_proc_read_setup(struct nfs_read_data *data)
+static void nfs4_proc_read_setup(struct nfs_read_data *data)
 {
-	struct rpc_task	*task = &data->task;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ],
 		.rpc_argp = &data->args,
 		.rpc_resp = &data->res,
 		.rpc_cred = data->cred,
 	};
-	struct inode *inode = data->inode;
-	int flags;
 
 	data->timestamp   = jiffies;
 
-	/* N.B. Do we need to test? Never called for swapfile inode */
-	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-
-	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_read_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
-static void nfs4_write_done(struct rpc_task *task, void *calldata)
+static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct nfs_write_data *data = calldata;
 	struct inode *inode = data->inode;
 	
 	if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
 		rpc_restart_call(task);
-		return;
+		return -EAGAIN;
 	}
 	if (task->tk_status >= 0) {
 		renew_lease(NFS_SERVER(inode), data->timestamp);
 		nfs_post_op_update_inode(inode, data->res.fattr);
 	}
-	/* Call back common NFS writeback processing */
-	nfs_writeback_done(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs4_write_ops = {
-	.rpc_call_done = nfs4_write_done,
-	.rpc_release = nfs_writedata_release,
-};
-
-static void
-nfs4_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs4_proc_write_setup(struct nfs_write_data *data, int how)
 {
-	struct rpc_task	*task = &data->task;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE],
 		.rpc_argp = &data->args,
@@ -2422,7 +2398,6 @@ nfs4_proc_write_setup(struct nfs_write_d
 	struct inode *inode = data->inode;
 	struct nfs_server *server = NFS_SERVER(inode);
 	int stable;
-	int flags;
 	
 	if (how & FLUSH_STABLE) {
 		if (!NFS_I(inode)->ncommit)
@@ -2437,57 +2412,37 @@ nfs4_proc_write_setup(struct nfs_write_d
 
 	data->timestamp   = jiffies;
 
-	/* Set the initial flags for the task.  */
-	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_write_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
-static void nfs4_commit_done(struct rpc_task *task, void *calldata)
+static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct nfs_write_data *data = calldata;
 	struct inode *inode = data->inode;
 	
 	if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
 		rpc_restart_call(task);
-		return;
+		return -EAGAIN;
 	}
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(inode, data->res.fattr);
-	/* Call back common NFS writeback processing */
-	nfs_commit_done(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs4_commit_ops = {
-	.rpc_call_done = nfs4_commit_done,
-	.rpc_release = nfs_commit_release,
-};
-
-static void
-nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
+static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
 {
-	struct rpc_task	*task = &data->task;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT],
 		.rpc_argp = &data->args,
 		.rpc_resp = &data->res,
 		.rpc_cred = data->cred,
 	};	
-	struct inode *inode = data->inode;
-	struct nfs_server *server = NFS_SERVER(inode);
-	int flags;
+	struct nfs_server *server = NFS_SERVER(data->inode);
 	
 	data->args.bitmask = server->attr_bitmask;
 	data->res.server = server;
 
-	/* Set the initial flags for the task.  */
-	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
-	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_commit_ops, data);
-	rpc_call_setup(task, &msg, 0);	
+	rpc_call_setup(&data->task, &msg, 0);	
 }
 
 /*
@@ -2755,8 +2710,10 @@ nfs4_async_handle_error(struct rpc_task 
 				rpc_wake_up_task(task);
 			task->tk_status = 0;
 			return -EAGAIN;
-		case -NFS4ERR_GRACE:
 		case -NFS4ERR_DELAY:
+			nfs_inc_server_stats((struct nfs_server *) server,
+						NFSIOS_DELAY);
+		case -NFS4ERR_GRACE:
 			rpc_delay(task, NFS4_POLL_RETRY_MAX);
 			task->tk_status = 0;
 			return -EAGAIN;
@@ -2958,7 +2915,7 @@ static void nfs4_delegreturn_release(voi
 	kfree(calldata);
 }
 
-const static struct rpc_call_ops nfs4_delegreturn_ops = {
+static const struct rpc_call_ops nfs4_delegreturn_ops = {
 	.rpc_call_prepare = nfs4_delegreturn_prepare,
 	.rpc_call_done = nfs4_delegreturn_done,
 	.rpc_release = nfs4_delegreturn_release,
@@ -3644,8 +3601,11 @@ struct nfs_rpc_ops	nfs_v4_clientops = {
 	.pathconf	= nfs4_proc_pathconf,
 	.decode_dirent	= nfs4_decode_dirent,
 	.read_setup	= nfs4_proc_read_setup,
+	.read_done	= nfs4_read_done,
 	.write_setup	= nfs4_proc_write_setup,
+	.write_done	= nfs4_write_done,
 	.commit_setup	= nfs4_proc_commit_setup,
+	.commit_done	= nfs4_commit_done,
 	.file_open      = nfs_open,
 	.file_release   = nfs_release,
 	.lock		= nfs4_proc_lock,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4bbf5ef..b956753 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4344,6 +4344,8 @@ nfs_stat_to_errno(int stat)
 	.p_encode = (kxdrproc_t) nfs4_xdr_##argtype,		\
 	.p_decode = (kxdrproc_t) nfs4_xdr_##restype,		\
 	.p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2,	\
+	.p_statidx = NFSPROC4_CLNT_##proc,			\
+	.p_name   = #proc,					\
     }
 
 struct rpc_procinfo	nfs4_procedures[] = {
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d53857b..d6e076c 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -85,6 +85,10 @@ nfs_create_request(struct nfs_open_conte
 	atomic_set(&req->wb_complete, 0);
 	req->wb_index	= page->index;
 	page_cache_get(page);
+	BUG_ON(PagePrivate(page));
+	BUG_ON(!PageLocked(page));
+	BUG_ON(page->mapping->host != inode);
+	SetPagePrivate(page);
 	req->wb_offset  = offset;
 	req->wb_pgbase	= offset;
 	req->wb_bytes   = count;
@@ -147,8 +151,10 @@ void nfs_clear_page_writeback(struct nfs
  */
 void nfs_clear_request(struct nfs_page *req)
 {
-	if (req->wb_page) {
-		page_cache_release(req->wb_page);
+	struct page *page = req->wb_page;
+	if (page != NULL) {
+		ClearPagePrivate(page);
+		page_cache_release(page);
 		req->wb_page = NULL;
 	}
 }
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index f5150d7..9dd85ca 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -58,16 +58,23 @@ nfs_proc_get_root(struct nfs_server *ser
 {
 	struct nfs_fattr *fattr = info->fattr;
 	struct nfs2_fsstat fsinfo;
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_GETATTR],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= fattr,
+	};
 	int status;
 
 	dprintk("%s: call getattr\n", __FUNCTION__);
 	nfs_fattr_init(fattr);
-	status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0);
+	status = rpc_call_sync(server->client_sys, &msg, 0);
 	dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
 	if (status)
 		return status;
 	dprintk("%s: call statfs\n", __FUNCTION__);
-	status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+	msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS];
+	msg.rpc_resp = &fsinfo;
+	status = rpc_call_sync(server->client_sys, &msg, 0);
 	dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
 	if (status)
 		return status;
@@ -90,12 +97,16 @@ static int
 nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
 		struct nfs_fattr *fattr)
 {
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_GETATTR],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= fattr,
+	};
 	int	status;
 
 	dprintk("NFS call  getattr\n");
 	nfs_fattr_init(fattr);
-	status = rpc_call(server->client, NFSPROC_GETATTR,
-				fhandle, fattr, 0);
+	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply getattr: %d\n", status);
 	return status;
 }
@@ -109,6 +120,11 @@ nfs_proc_setattr(struct dentry *dentry, 
 		.fh	= NFS_FH(inode),
 		.sattr	= sattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_SETATTR],
+		.rpc_argp	= &arg,
+		.rpc_resp	= fattr,
+	};
 	int	status;
 
 	/* Mask out the non-modebit related stuff from attr->ia_mode */
@@ -116,7 +132,7 @@ nfs_proc_setattr(struct dentry *dentry, 
 
 	dprintk("NFS call  setattr\n");
 	nfs_fattr_init(fattr);
-	status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0);
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	if (status == 0)
 		nfs_setattr_update_inode(inode, sattr);
 	dprintk("NFS reply setattr: %d\n", status);
@@ -136,11 +152,16 @@ nfs_proc_lookup(struct inode *dir, struc
 		.fh		= fhandle,
 		.fattr		= fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_LOOKUP],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	dprintk("NFS call  lookup %s\n", name->name);
 	nfs_fattr_init(fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFSPROC_LOOKUP, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	dprintk("NFS reply lookup: %d\n", status);
 	return status;
 }
@@ -154,10 +175,14 @@ static int nfs_proc_readlink(struct inod
 		.pglen		= pglen,
 		.pages		= &page
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_READLINK],
+		.rpc_argp	= &args,
+	};
 	int			status;
 
 	dprintk("NFS call  readlink\n");
-	status = rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK, &args, NULL, 0);
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	dprintk("NFS reply readlink: %d\n", status);
 	return status;
 }
@@ -233,11 +258,16 @@ nfs_proc_create(struct inode *dir, struc
 		.fh		= &fhandle,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_CREATE],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	nfs_fattr_init(&fattr);
 	dprintk("NFS call  create %s\n", dentry->d_name.name);
-	status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	if (status == 0)
 		status = nfs_instantiate(dentry, &fhandle, &fattr);
 	dprintk("NFS reply create: %d\n", status);
@@ -263,6 +293,11 @@ nfs_proc_mknod(struct inode *dir, struct
 		.fh		= &fhandle,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_CREATE],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int status, mode;
 
 	dprintk("NFS call  mknod %s\n", dentry->d_name.name);
@@ -277,13 +312,13 @@ nfs_proc_mknod(struct inode *dir, struct
 	}
 
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
 
 	if (status == -EINVAL && S_ISFIFO(mode)) {
 		sattr->ia_mode = mode;
 		nfs_fattr_init(&fattr);
-		status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+		status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	}
 	if (status == 0)
 		status = nfs_instantiate(dentry, &fhandle, &fattr);
@@ -302,8 +337,6 @@ nfs_proc_remove(struct inode *dir, struc
 	struct rpc_message	msg = { 
 		.rpc_proc	= &nfs_procedures[NFSPROC_REMOVE],
 		.rpc_argp	= &arg,
-		.rpc_resp	= NULL,
-		.rpc_cred	= NULL
 	};
 	int			status;
 
@@ -355,10 +388,14 @@ nfs_proc_rename(struct inode *old_dir, s
 		.toname		= new_name->name,
 		.tolen		= new_name->len
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_RENAME],
+		.rpc_argp	= &arg,
+	};
 	int			status;
 
 	dprintk("NFS call  rename %s -> %s\n", old_name->name, new_name->name);
-	status = rpc_call(NFS_CLIENT(old_dir), NFSPROC_RENAME, &arg, NULL, 0);
+	status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
 	nfs_mark_for_revalidate(old_dir);
 	nfs_mark_for_revalidate(new_dir);
 	dprintk("NFS reply rename: %d\n", status);
@@ -374,10 +411,14 @@ nfs_proc_link(struct inode *inode, struc
 		.toname		= name->name,
 		.tolen		= name->len
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_LINK],
+		.rpc_argp	= &arg,
+	};
 	int			status;
 
 	dprintk("NFS call  link %s\n", name->name);
-	status = rpc_call(NFS_CLIENT(inode), NFSPROC_LINK, &arg, NULL, 0);
+	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	nfs_mark_for_revalidate(inode);
 	nfs_mark_for_revalidate(dir);
 	dprintk("NFS reply link: %d\n", status);
@@ -397,6 +438,10 @@ nfs_proc_symlink(struct inode *dir, stru
 		.tolen		= path->len,
 		.sattr		= sattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_SYMLINK],
+		.rpc_argp	= &arg,
+	};
 	int			status;
 
 	if (path->len > NFS2_MAXPATHLEN)
@@ -404,7 +449,7 @@ nfs_proc_symlink(struct inode *dir, stru
 	dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
 	nfs_fattr_init(fattr);
 	fhandle->size = 0;
-	status = rpc_call(NFS_CLIENT(dir), NFSPROC_SYMLINK, &arg, NULL, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
 	dprintk("NFS reply symlink: %d\n", status);
 	return status;
@@ -425,11 +470,16 @@ nfs_proc_mkdir(struct inode *dir, struct
 		.fh		= &fhandle,
 		.fattr		= &fattr
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_MKDIR],
+		.rpc_argp	= &arg,
+		.rpc_resp	= &res,
+	};
 	int			status;
 
 	dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
 	nfs_fattr_init(&fattr);
-	status = rpc_call(NFS_CLIENT(dir), NFSPROC_MKDIR, &arg, &res, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
 	if (status == 0)
 		status = nfs_instantiate(dentry, &fhandle, &fattr);
@@ -445,10 +495,14 @@ nfs_proc_rmdir(struct inode *dir, struct
 		.name		= name->name,
 		.len		= name->len
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_RMDIR],
+		.rpc_argp	= &arg,
+	};
 	int			status;
 
 	dprintk("NFS call  rmdir %s\n", name->name);
-	status = rpc_call(NFS_CLIENT(dir), NFSPROC_RMDIR, &arg, NULL, 0);
+	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
 	dprintk("NFS reply rmdir: %d\n", status);
 	return status;
@@ -470,13 +524,12 @@ nfs_proc_readdir(struct dentry *dentry, 
 		.fh		= NFS_FH(dir),
 		.cookie		= cookie,
 		.count		= count,
-		.pages		= &page
+		.pages		= &page,
 	};
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs_procedures[NFSPROC_READDIR],
 		.rpc_argp	= &arg,
-		.rpc_resp	= NULL,
-		.rpc_cred	= cred
+		.rpc_cred	= cred,
 	};
 	int			status;
 
@@ -495,11 +548,16 @@ nfs_proc_statfs(struct nfs_server *serve
 			struct nfs_fsstat *stat)
 {
 	struct nfs2_fsstat fsinfo;
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_STATFS],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= &fsinfo,
+	};
 	int	status;
 
 	dprintk("NFS call  statfs\n");
 	nfs_fattr_init(stat->fattr);
-	status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply statfs: %d\n", status);
 	if (status)
 		goto out;
@@ -518,11 +576,16 @@ nfs_proc_fsinfo(struct nfs_server *serve
 			struct nfs_fsinfo *info)
 {
 	struct nfs2_fsstat fsinfo;
+	struct rpc_message msg = {
+		.rpc_proc	= &nfs_procedures[NFSPROC_STATFS],
+		.rpc_argp	= fhandle,
+		.rpc_resp	= &fsinfo,
+	};
 	int	status;
 
 	dprintk("NFS call  fsinfo\n");
 	nfs_fattr_init(info->fattr);
-	status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply fsinfo: %d\n", status);
 	if (status)
 		goto out;
@@ -550,10 +613,8 @@ nfs_proc_pathconf(struct nfs_server *ser
 
 extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
 
-static void nfs_read_done(struct rpc_task *task, void *calldata)
+static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
 {
-	struct nfs_read_data *data = calldata;
-
 	if (task->tk_status >= 0) {
 		nfs_refresh_inode(data->inode, data->res.fattr);
 		/* Emulate the eof flag, which isn't normally needed in NFSv2
@@ -562,20 +623,11 @@ static void nfs_read_done(struct rpc_tas
 		if (data->args.offset + data->args.count >= data->res.fattr->size)
 			data->res.eof = 1;
 	}
-	nfs_readpage_result(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs_read_ops = {
-	.rpc_call_done = nfs_read_done,
-	.rpc_release = nfs_readdata_release,
-};
-
-static void
-nfs_proc_read_setup(struct nfs_read_data *data)
+static void nfs_proc_read_setup(struct nfs_read_data *data)
 {
-	struct rpc_task		*task = &data->task;
-	struct inode		*inode = data->inode;
-	int			flags;
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs_procedures[NFSPROC_READ],
 		.rpc_argp	= &data->args,
@@ -583,34 +635,18 @@ nfs_proc_read_setup(struct nfs_read_data
 		.rpc_cred	= data->cred,
 	};
 
-	/* N.B. Do we need to test? Never called for swapfile inode */
-	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-
-	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_read_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
-static void nfs_write_done(struct rpc_task *task, void *calldata)
+static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct nfs_write_data *data = calldata;
-
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(data->inode, data->res.fattr);
-	nfs_writeback_done(task, calldata);
+	return 0;
 }
 
-static const struct rpc_call_ops nfs_write_ops = {
-	.rpc_call_done = nfs_write_done,
-	.rpc_release = nfs_writedata_release,
-};
-
-static void
-nfs_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs_proc_write_setup(struct nfs_write_data *data, int how)
 {
-	struct rpc_task		*task = &data->task;
-	struct inode		*inode = data->inode;
-	int			flags;
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs_procedures[NFSPROC_WRITE],
 		.rpc_argp	= &data->args,
@@ -621,12 +657,8 @@ nfs_proc_write_setup(struct nfs_write_da
 	/* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
 	data->args.stable = NFS_FILE_SYNC;
 
-	/* Set the initial flags for the task.  */
-	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_write_ops, data);
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_setup(&data->task, &msg, 0);
 }
 
 static void
@@ -672,7 +704,9 @@ struct nfs_rpc_ops	nfs_v2_clientops = {
 	.pathconf	= nfs_proc_pathconf,
 	.decode_dirent	= nfs_decode_dirent,
 	.read_setup	= nfs_proc_read_setup,
+	.read_done	= nfs_read_done,
 	.write_setup	= nfs_proc_write_setup,
+	.write_done	= nfs_write_done,
 	.commit_setup	= nfs_proc_commit_setup,
 	.file_open	= nfs_open,
 	.file_release	= nfs_release,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 05eb43f..3961524 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -31,17 +31,49 @@
 
 #include <asm/system.h>
 
+#include "iostat.h"
+
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
 static int nfs_pagein_one(struct list_head *, struct inode *);
-static void nfs_readpage_result_partial(struct nfs_read_data *, int);
-static void nfs_readpage_result_full(struct nfs_read_data *, int);
+static const struct rpc_call_ops nfs_read_partial_ops;
+static const struct rpc_call_ops nfs_read_full_ops;
 
 static kmem_cache_t *nfs_rdata_cachep;
-mempool_t *nfs_rdata_mempool;
+static mempool_t *nfs_rdata_mempool;
 
 #define MIN_POOL_READ	(32)
 
+struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
+{
+	struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
+
+	if (p) {
+		memset(p, 0, sizeof(*p));
+		INIT_LIST_HEAD(&p->pages);
+		if (pagecount < NFS_PAGEVEC_SIZE)
+			p->pagevec = &p->page_array[0];
+		else {
+			size_t size = ++pagecount * sizeof(struct page *);
+			p->pagevec = kmalloc(size, GFP_NOFS);
+			if (p->pagevec) {
+				memset(p->pagevec, 0, size);
+			} else {
+				mempool_free(p, nfs_rdata_mempool);
+				p = NULL;
+			}
+		}
+	}
+	return p;
+}
+
+void nfs_readdata_free(struct nfs_read_data *p)
+{
+	if (p && (p->pagevec != &p->page_array[0]))
+		kfree(p->pagevec);
+	mempool_free(p, nfs_rdata_mempool);
+}
+
 void nfs_readdata_release(void *data)
 {
         nfs_readdata_free(data);
@@ -133,6 +165,8 @@ static int nfs_readpage_sync(struct nfs_
 		}
 		count -= result;
 		rdata->args.pgbase += result;
+		nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, result);
+
 		/* Note: result == 0 should only happen if we're caching
 		 * a write that extends the file and punches a hole.
 		 */
@@ -196,9 +230,11 @@ static void nfs_readpage_release(struct 
  * Set up the NFS read request struct
  */
 static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+		const struct rpc_call_ops *call_ops,
 		unsigned int count, unsigned int offset)
 {
 	struct inode		*inode;
+	int flags;
 
 	data->req	  = req;
 	data->inode	  = inode = req->wb_context->dentry->d_inode;
@@ -216,6 +252,9 @@ static void nfs_read_rpcsetup(struct nfs
 	data->res.eof     = 0;
 	nfs_fattr_init(&data->fattr);
 
+	/* Set up the initial task struct. */
+	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
+	rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
 	NFS_PROTO(inode)->read_setup(data);
 
 	data->task.tk_cookie = (unsigned long)inode;
@@ -303,14 +342,15 @@ static int nfs_pagein_multi(struct list_
 		list_del_init(&data->pages);
 
 		data->pagevec[0] = page;
-		data->complete = nfs_readpage_result_partial;
 
 		if (nbytes > rsize) {
-			nfs_read_rpcsetup(req, data, rsize, offset);
+			nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
+					rsize, offset);
 			offset += rsize;
 			nbytes -= rsize;
 		} else {
-			nfs_read_rpcsetup(req, data, nbytes, offset);
+			nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
+					nbytes, offset);
 			nbytes = 0;
 		}
 		nfs_execute_read(data);
@@ -356,8 +396,7 @@ static int nfs_pagein_one(struct list_he
 	}
 	req = nfs_list_entry(data->pages.next);
 
-	data->complete = nfs_readpage_result_full;
-	nfs_read_rpcsetup(req, data, count, 0);
+	nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0);
 
 	nfs_execute_read(data);
 	return 0;
@@ -391,12 +430,15 @@ nfs_pagein_list(struct list_head *head, 
 /*
  * Handle a read reply that fills part of a page.
  */
-static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
+static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
 {
+	struct nfs_read_data *data = calldata;
 	struct nfs_page *req = data->req;
 	struct page *page = req->wb_page;
  
-	if (status >= 0) {
+	if (nfs_readpage_result(task, data) != 0)
+		return;
+	if (task->tk_status >= 0) {
 		unsigned int request = data->args.count;
 		unsigned int result = data->res.count;
 
@@ -415,20 +457,28 @@ static void nfs_readpage_result_partial(
 	}
 }
 
+static const struct rpc_call_ops nfs_read_partial_ops = {
+	.rpc_call_done = nfs_readpage_result_partial,
+	.rpc_release = nfs_readdata_release,
+};
+
 /*
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
  */
-static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
+static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
 {
+	struct nfs_read_data *data = calldata;
 	unsigned int count = data->res.count;
 
+	if (nfs_readpage_result(task, data) != 0)
+		return;
 	while (!list_empty(&data->pages)) {
 		struct nfs_page *req = nfs_list_entry(data->pages.next);
 		struct page *page = req->wb_page;
 		nfs_list_remove_request(req);
 
-		if (status >= 0) {
+		if (task->tk_status >= 0) {
 			if (count < PAGE_CACHE_SIZE) {
 				if (count < req->wb_bytes)
 					memclear_highpage_flush(page,
@@ -444,22 +494,33 @@ static void nfs_readpage_result_full(str
 	}
 }
 
+static const struct rpc_call_ops nfs_read_full_ops = {
+	.rpc_call_done = nfs_readpage_result_full,
+	.rpc_release = nfs_readdata_release,
+};
+
 /*
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
  */
-void nfs_readpage_result(struct rpc_task *task, void *calldata)
+int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
 {
-	struct nfs_read_data *data = calldata;
 	struct nfs_readargs *argp = &data->args;
 	struct nfs_readres *resp = &data->res;
-	int status = task->tk_status;
+	int status;
 
 	dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
-		task->tk_pid, status);
+		task->tk_pid, task->tk_status);
+
+	status = NFS_PROTO(data->inode)->read_done(task, data);
+	if (status != 0)
+		return status;
+
+	nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
 
 	/* Is this a short read? */
 	if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) {
+		nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
 		/* Has the server at least made some progress? */
 		if (resp->count != 0) {
 			/* Yes, so retry the read at the end of the data */
@@ -467,14 +528,14 @@ void nfs_readpage_result(struct rpc_task
 			argp->pgbase += resp->count;
 			argp->count -= resp->count;
 			rpc_restart_call(task);
-			return;
+			return -EAGAIN;
 		}
 		task->tk_status = -EIO;
 	}
 	spin_lock(&data->inode->i_lock);
 	NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
 	spin_unlock(&data->inode->i_lock);
-	data->complete(data, status);
+	return 0;
 }
 
 /*
@@ -491,6 +552,9 @@ int nfs_readpage(struct file *file, stru
 
 	dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
 		page, PAGE_CACHE_SIZE, page->index);
+	nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
+	nfs_add_stats(inode, NFSIOS_READPAGES, 1);
+
 	/*
 	 * Try to flush any pending writes to the file..
 	 *
@@ -570,6 +634,7 @@ int nfs_readpages(struct file *filp, str
 			inode->i_sb->s_id,
 			(long long)NFS_FILEID(inode),
 			nr_pages);
+	nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
 
 	if (filp == NULL) {
 		desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
@@ -582,6 +647,7 @@ int nfs_readpages(struct file *filp, str
 	if (!list_empty(&head)) {
 		int err = nfs_pagein_list(&head, server->rpages);
 		if (!ret)
+			nfs_add_stats(inode, NFSIOS_READPAGES, err);
 			ret = err;
 	}
 	put_nfs_open_context(desc.ctx);
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index a65c7b5..0e28189 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -163,10 +163,9 @@ nfs_async_unlink(struct dentry *dentry)
 	struct rpc_clnt	*clnt = NFS_CLIENT(dir->d_inode);
 	int		status = -ENOMEM;
 
-	data = kmalloc(sizeof(*data), GFP_KERNEL);
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)
 		goto out;
-	memset(data, 0, sizeof(*data));
 
 	data->cred = rpcauth_lookupcred(clnt->cl_auth, 0);
 	if (IS_ERR(data->cred)) {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9449b68..647e321 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -63,6 +63,7 @@
 #include <linux/smp_lock.h>
 
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
@@ -76,20 +77,21 @@ static struct nfs_page * nfs_update_requ
 					    struct inode *,
 					    struct page *,
 					    unsigned int, unsigned int);
-static void nfs_writeback_done_partial(struct nfs_write_data *, int);
-static void nfs_writeback_done_full(struct nfs_write_data *, int);
 static int nfs_wait_on_write_congestion(struct address_space *, int);
 static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
 static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
 			   unsigned int npages, int how);
+static const struct rpc_call_ops nfs_write_partial_ops;
+static const struct rpc_call_ops nfs_write_full_ops;
+static const struct rpc_call_ops nfs_commit_ops;
 
 static kmem_cache_t *nfs_wdata_cachep;
-mempool_t *nfs_wdata_mempool;
+static mempool_t *nfs_wdata_mempool;
 static mempool_t *nfs_commit_mempool;
 
 static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
 
-static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
+struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
 {
 	struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
 
@@ -100,11 +102,39 @@ static inline struct nfs_write_data *nfs
 			p->pagevec = &p->page_array[0];
 		else {
 			size_t size = ++pagecount * sizeof(struct page *);
+			p->pagevec = kzalloc(size, GFP_NOFS);
+			if (!p->pagevec) {
+				mempool_free(p, nfs_commit_mempool);
+				p = NULL;
+			}
+		}
+	}
+	return p;
+}
+
+void nfs_commit_free(struct nfs_write_data *p)
+{
+	if (p && (p->pagevec != &p->page_array[0]))
+		kfree(p->pagevec);
+	mempool_free(p, nfs_commit_mempool);
+}
+
+struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
+{
+	struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
+
+	if (p) {
+		memset(p, 0, sizeof(*p));
+		INIT_LIST_HEAD(&p->pages);
+		if (pagecount < NFS_PAGEVEC_SIZE)
+			p->pagevec = &p->page_array[0];
+		else {
+			size_t size = ++pagecount * sizeof(struct page *);
 			p->pagevec = kmalloc(size, GFP_NOFS);
 			if (p->pagevec) {
 				memset(p->pagevec, 0, size);
 			} else {
-				mempool_free(p, nfs_commit_mempool);
+				mempool_free(p, nfs_wdata_mempool);
 				p = NULL;
 			}
 		}
@@ -112,11 +142,11 @@ static inline struct nfs_write_data *nfs
 	return p;
 }
 
-static inline void nfs_commit_free(struct nfs_write_data *p)
+void nfs_writedata_free(struct nfs_write_data *p)
 {
 	if (p && (p->pagevec != &p->page_array[0]))
 		kfree(p->pagevec);
-	mempool_free(p, nfs_commit_mempool);
+	mempool_free(p, nfs_wdata_mempool);
 }
 
 void nfs_writedata_release(void *wdata)
@@ -136,6 +166,7 @@ static void nfs_grow_file(struct page *p
 	end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
 	if (i_size >= end)
 		return;
+	nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
 	i_size_write(inode, end);
 }
 
@@ -225,6 +256,7 @@ static int nfs_writepage_sync(struct nfs
 	        wdata->args.pgbase += result;
 		written += result;
 		count -= result;
+		nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result);
 	} while (count);
 	/* Update file length */
 	nfs_grow_file(page, offset, written);
@@ -281,6 +313,9 @@ int nfs_writepage(struct page *page, str
 	int priority = wb_priority(wbc);
 	int err;
 
+	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
+	nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
+
 	/*
 	 * Note: We need to ensure that we have a reference to the inode
 	 *       if we are to do asynchronous writes. If not, waiting
@@ -345,6 +380,8 @@ int nfs_writepages(struct address_space 
 	struct inode *inode = mapping->host;
 	int err;
 
+	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
+
 	err = generic_writepages(mapping, wbc);
 	if (err)
 		return err;
@@ -356,6 +393,7 @@ int nfs_writepages(struct address_space 
 	err = nfs_flush_inode(inode, 0, 0, wb_priority(wbc));
 	if (err < 0)
 		goto out;
+	nfs_add_stats(inode, NFSIOS_WRITEPAGES, err);
 	wbc->nr_to_write -= err;
 	if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) {
 		err = nfs_wait_on_requests(inode, 0, 0);
@@ -598,6 +636,9 @@ static int nfs_wait_on_write_congestion(
 
 	if (!bdi_write_congested(bdi))
 		return 0;
+
+	nfs_inc_stats(mapping->host, NFSIOS_CONGESTIONWAIT);
+
 	if (intr) {
 		struct rpc_clnt *clnt = NFS_CLIENT(mapping->host);
 		sigset_t oldset;
@@ -653,8 +694,11 @@ static struct nfs_page * nfs_update_requ
 				spin_unlock(&nfsi->req_lock);
 				error = nfs_wait_on_request(req);
 				nfs_release_request(req);
-				if (error < 0)
+				if (error < 0) {
+					if (new)
+						nfs_release_request(new);
 					return ERR_PTR(error);
+				}
 				continue;
 			}
 			spin_unlock(&nfsi->req_lock);
@@ -748,6 +792,8 @@ int nfs_updatepage(struct file *file, st
 	struct nfs_page	*req;
 	int		status = 0;
 
+	nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
+
 	dprintk("NFS:      nfs_updatepage(%s/%s %d@%Ld)\n",
 		file->f_dentry->d_parent->d_name.name,
 		file->f_dentry->d_name.name, count,
@@ -857,10 +903,12 @@ static inline int flush_task_priority(in
  */
 static void nfs_write_rpcsetup(struct nfs_page *req,
 		struct nfs_write_data *data,
+		const struct rpc_call_ops *call_ops,
 		unsigned int count, unsigned int offset,
 		int how)
 {
 	struct inode		*inode;
+	int flags;
 
 	/* Set up the RPC argument and reply structs
 	 * NB: take care not to mess about with data->commit et al. */
@@ -881,6 +929,9 @@ static void nfs_write_rpcsetup(struct nf
 	data->res.verf    = &data->verf;
 	nfs_fattr_init(&data->fattr);
 
+	/* Set up the initial task struct.  */
+	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+	rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
 	NFS_PROTO(inode)->write_setup(data, how);
 
 	data->task.tk_priority = flush_task_priority(how);
@@ -944,14 +995,15 @@ static int nfs_flush_multi(struct list_h
 		list_del_init(&data->pages);
 
 		data->pagevec[0] = page;
-		data->complete = nfs_writeback_done_partial;
 
 		if (nbytes > wsize) {
-			nfs_write_rpcsetup(req, data, wsize, offset, how);
+			nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
+					wsize, offset, how);
 			offset += wsize;
 			nbytes -= wsize;
 		} else {
-			nfs_write_rpcsetup(req, data, nbytes, offset, how);
+			nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
+					nbytes, offset, how);
 			nbytes = 0;
 		}
 		nfs_execute_write(data);
@@ -1005,9 +1057,8 @@ static int nfs_flush_one(struct list_hea
 	}
 	req = nfs_list_entry(data->pages.next);
 
-	data->complete = nfs_writeback_done_full;
 	/* Set up the argument struct */
-	nfs_write_rpcsetup(req, data, count, 0, how);
+	nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
 
 	nfs_execute_write(data);
 	return 0;
@@ -1051,8 +1102,9 @@ nfs_flush_list(struct list_head *head, i
 /*
  * Handle a write reply that flushed part of a page.
  */
-static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
+static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
 {
+	struct nfs_write_data	*data = calldata;
 	struct nfs_page		*req = data->req;
 	struct page		*page = req->wb_page;
 
@@ -1062,11 +1114,14 @@ static void nfs_writeback_done_partial(s
 		req->wb_bytes,
 		(long long)req_offset(req));
 
-	if (status < 0) {
+	if (nfs_writeback_done(task, data) != 0)
+		return;
+
+	if (task->tk_status < 0) {
 		ClearPageUptodate(page);
 		SetPageError(page);
-		req->wb_context->error = status;
-		dprintk(", error = %d\n", status);
+		req->wb_context->error = task->tk_status;
+		dprintk(", error = %d\n", task->tk_status);
 	} else {
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 		if (data->verf.committed < NFS_FILE_SYNC) {
@@ -1087,6 +1142,11 @@ static void nfs_writeback_done_partial(s
 		nfs_writepage_release(req);
 }
 
+static const struct rpc_call_ops nfs_write_partial_ops = {
+	.rpc_call_done = nfs_writeback_done_partial,
+	.rpc_release = nfs_writedata_release,
+};
+
 /*
  * Handle a write reply that flushes a whole page.
  *
@@ -1094,11 +1154,15 @@ static void nfs_writeback_done_partial(s
  *	  writebacks since the page->count is kept > 1 for as long
  *	  as the page has a write request pending.
  */
-static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
+static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
 {
+	struct nfs_write_data	*data = calldata;
 	struct nfs_page		*req;
 	struct page		*page;
 
+	if (nfs_writeback_done(task, data) != 0)
+		return;
+
 	/* Update attributes as result of writeback. */
 	while (!list_empty(&data->pages)) {
 		req = nfs_list_entry(data->pages.next);
@@ -1111,13 +1175,13 @@ static void nfs_writeback_done_full(stru
 			req->wb_bytes,
 			(long long)req_offset(req));
 
-		if (status < 0) {
+		if (task->tk_status < 0) {
 			ClearPageUptodate(page);
 			SetPageError(page);
-			req->wb_context->error = status;
+			req->wb_context->error = task->tk_status;
 			end_page_writeback(page);
 			nfs_inode_remove_request(req);
-			dprintk(", error = %d\n", status);
+			dprintk(", error = %d\n", task->tk_status);
 			goto next;
 		}
 		end_page_writeback(page);
@@ -1139,18 +1203,30 @@ static void nfs_writeback_done_full(stru
 	}
 }
 
+static const struct rpc_call_ops nfs_write_full_ops = {
+	.rpc_call_done = nfs_writeback_done_full,
+	.rpc_release = nfs_writedata_release,
+};
+
+
 /*
  * This function is called when the WRITE call is complete.
  */
-void nfs_writeback_done(struct rpc_task *task, void *calldata)
+int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct nfs_write_data	*data = calldata;
 	struct nfs_writeargs	*argp = &data->args;
 	struct nfs_writeres	*resp = &data->res;
+	int status;
 
 	dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
 		task->tk_pid, task->tk_status);
 
+	/* Call the NFS version-specific code */
+	status = NFS_PROTO(data->inode)->write_done(task, data);
+	if (status != 0)
+		return status;
+	nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
+
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 	if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
 		/* We tried a write call, but the server did not
@@ -1176,6 +1252,8 @@ void nfs_writeback_done(struct rpc_task 
 	if (task->tk_status >= 0 && resp->count < argp->count) {
 		static unsigned long    complain;
 
+		nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE);
+
 		/* Has the server at least made some progress? */
 		if (resp->count != 0) {
 			/* Was this an NFSv2 write or an NFSv3 stable write? */
@@ -1191,7 +1269,7 @@ void nfs_writeback_done(struct rpc_task 
 				argp->stable = NFS_FILE_SYNC;
 			}
 			rpc_restart_call(task);
-			return;
+			return -EAGAIN;
 		}
 		if (time_before(complain, jiffies)) {
 			printk(KERN_WARNING
@@ -1202,11 +1280,7 @@ void nfs_writeback_done(struct rpc_task 
 		/* Can't do anything about it except throw an error. */
 		task->tk_status = -EIO;
 	}
-
-	/*
-	 * Process the nfs_page list
-	 */
-	data->complete(data, task->tk_status);
+	return 0;
 }
 
 
@@ -1220,10 +1294,12 @@ void nfs_commit_release(void *wdata)
  * Set up the argument/result storage required for the RPC call.
  */
 static void nfs_commit_rpcsetup(struct list_head *head,
-		struct nfs_write_data *data, int how)
+		struct nfs_write_data *data,
+		int how)
 {
 	struct nfs_page		*first;
 	struct inode		*inode;
+	int flags;
 
 	/* Set up the RPC argument and reply structs
 	 * NB: take care not to mess about with data->commit et al. */
@@ -1243,7 +1319,10 @@ static void nfs_commit_rpcsetup(struct l
 	data->res.fattr   = &data->fattr;
 	data->res.verf    = &data->verf;
 	nfs_fattr_init(&data->fattr);
-	
+
+	/* Set up the initial task struct.  */
+	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+	rpc_init_task(&data->task, NFS_CLIENT(inode), flags, &nfs_commit_ops, data);
 	NFS_PROTO(inode)->commit_setup(data, how);
 
 	data->task.tk_priority = flush_task_priority(how);
@@ -1284,7 +1363,7 @@ nfs_commit_list(struct inode *inode, str
 /*
  * COMMIT call returned
  */
-void nfs_commit_done(struct rpc_task *task, void *calldata)
+static void nfs_commit_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs_write_data	*data = calldata;
 	struct nfs_page		*req;
@@ -1293,6 +1372,10 @@ void nfs_commit_done(struct rpc_task *ta
         dprintk("NFS: %4d nfs_commit_done (status %d)\n",
                                 task->tk_pid, task->tk_status);
 
+	/* Call the NFS version-specific code */
+	if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
+		return;
+
 	while (!list_empty(&data->pages)) {
 		req = nfs_list_entry(data->pages.next);
 		nfs_list_remove_request(req);
@@ -1326,6 +1409,11 @@ void nfs_commit_done(struct rpc_task *ta
 	}
 	sub_page_state(nr_unstable,res);
 }
+
+static const struct rpc_call_ops nfs_commit_ops = {
+	.rpc_call_done = nfs_commit_done,
+	.rpc_release = nfs_commit_release,
+};
 #endif
 
 static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index d828662..4f391cb 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -326,6 +326,8 @@ out:
         .p_encode = (kxdrproc_t) nfs4_xdr_##argtype,                    \
         .p_decode = (kxdrproc_t) nfs4_xdr_##restype,                    \
         .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2,  \
+        .p_statidx = NFSPROC4_CB_##call,				\
+	.p_name   = #proc,                                              \
 }
 
 static struct rpc_procinfo     nfs4_cb_procedures[] = {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1143cfb..f6ab762 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2639,7 +2639,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struc
 	struct nfs4_stateid *lock_stp;
 	struct file *filp;
 	struct file_lock file_lock;
-	struct file_lock *conflock;
+	struct file_lock conflock;
 	int status = 0;
 	unsigned int strhashval;
 
@@ -2775,11 +2775,11 @@ conflicting_lock:
 	/* XXX There is a race here. Future patch needed to provide 
 	 * an atomic posix_lock_and_test_file
 	 */
-	if (!(conflock = posix_test_lock(filp, &file_lock))) {
+	if (!posix_test_lock(filp, &file_lock, &conflock)) {
 		status = nfserr_serverfault;
 		goto out;
 	}
-	nfs4_set_lock_denied(conflock, &lock->lk_denied);
+	nfs4_set_lock_denied(&conflock, &lock->lk_denied);
 out:
 	if (status && lock->lk_is_new && lock_sop)
 		release_stateowner(lock_sop);
@@ -2800,7 +2800,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, stru
 	struct inode *inode;
 	struct file file;
 	struct file_lock file_lock;
-	struct file_lock *conflicting_lock;
+	struct file_lock conflock;
 	int status;
 
 	if (nfs4_in_grace())
@@ -2864,10 +2864,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, stru
 	file.f_dentry = current_fh->fh_dentry;
 
 	status = nfs_ok;
-	conflicting_lock = posix_test_lock(&file, &file_lock);
-	if (conflicting_lock) {
+	if (posix_test_lock(&file, &file_lock, &conflock)) {
 		status = nfserr_denied;
-		nfs4_set_lock_denied(conflicting_lock, &lockt->lt_denied);
+		nfs4_set_lock_denied(&conflock, &lockt->lt_denied);
 	}
 out:
 	nfs4_unlock_state();
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 20feb75..8f1f49c 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -104,6 +104,7 @@ enum pid_directory_inos {
 	PROC_TGID_MAPS,
 	PROC_TGID_NUMA_MAPS,
 	PROC_TGID_MOUNTS,
+	PROC_TGID_MOUNTSTATS,
 	PROC_TGID_WCHAN,
 #ifdef CONFIG_MMU
 	PROC_TGID_SMAPS,
@@ -144,6 +145,7 @@ enum pid_directory_inos {
 	PROC_TID_MAPS,
 	PROC_TID_NUMA_MAPS,
 	PROC_TID_MOUNTS,
+	PROC_TID_MOUNTSTATS,
 	PROC_TID_WCHAN,
 #ifdef CONFIG_MMU
 	PROC_TID_SMAPS,
@@ -201,6 +203,7 @@ static struct pid_entry tgid_base_stuff[
 	E(PROC_TGID_ROOT,      "root",    S_IFLNK|S_IRWXUGO),
 	E(PROC_TGID_EXE,       "exe",     S_IFLNK|S_IRWXUGO),
 	E(PROC_TGID_MOUNTS,    "mounts",  S_IFREG|S_IRUGO),
+	E(PROC_TGID_MOUNTSTATS, "mountstats", S_IFREG|S_IRUSR),
 #ifdef CONFIG_MMU
 	E(PROC_TGID_SMAPS,     "smaps",   S_IFREG|S_IRUGO),
 #endif
@@ -732,6 +735,38 @@ static struct file_operations proc_mount
 	.poll		= mounts_poll,
 };
 
+extern struct seq_operations mountstats_op;
+static int mountstats_open(struct inode *inode, struct file *file)
+{
+	struct task_struct *task = proc_task(inode);
+	int ret = seq_open(file, &mountstats_op);
+
+	if (!ret) {
+		struct seq_file *m = file->private_data;
+		struct namespace *namespace;
+		task_lock(task);
+		namespace = task->namespace;
+		if (namespace)
+			get_namespace(namespace);
+		task_unlock(task);
+
+		if (namespace)
+			m->private = namespace;
+		else {
+			seq_release(inode, file);
+			ret = -EINVAL;
+		}
+	}
+	return ret;
+}
+
+static struct file_operations proc_mountstats_operations = {
+	.open		= mountstats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= mounts_release,
+};
+
 #define PROC_BLOCK_SIZE	(3*1024)		/* 4K page size but our output routines use some slack for overruns */
 
 static ssize_t proc_info_read(struct file * file, char __user * buf,
@@ -1730,6 +1765,10 @@ static struct dentry *proc_pident_lookup
 			inode->i_fop = &proc_smaps_operations;
 			break;
 #endif
+		case PROC_TID_MOUNTSTATS:
+		case PROC_TGID_MOUNTSTATS:
+			inode->i_fop = &proc_mountstats_operations;
+			break;
 #ifdef CONFIG_SECURITY
 		case PROC_TID_ATTR:
 			inode->i_nlink = 2;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e059da9..4652e42 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -666,7 +666,6 @@ extern spinlock_t files_lock;
 #define FL_POSIX	1
 #define FL_FLOCK	2
 #define FL_ACCESS	8	/* not trying to lock, just looking */
-#define FL_LOCKD	16	/* lock held by rpc.lockd */
 #define FL_LEASE	32	/* lease held on this file */
 #define FL_SLEEP	128	/* A blocking lock */
 
@@ -730,8 +729,6 @@ struct file_lock {
 #define OFFT_OFFSET_MAX	INT_LIMIT(off_t)
 #endif
 
-extern struct list_head file_lock_list;
-
 #include <linux/fcntl.h>
 
 extern int fcntl_getlk(struct file *, struct flock __user *);
@@ -753,10 +750,9 @@ extern void locks_init_lock(struct file_
 extern void locks_copy_lock(struct file_lock *, struct file_lock *);
 extern void locks_remove_posix(struct file *, fl_owner_t);
 extern void locks_remove_flock(struct file *);
-extern struct file_lock *posix_test_lock(struct file *, struct file_lock *);
+extern int posix_test_lock(struct file *, struct file_lock *, struct file_lock *);
 extern int posix_lock_file(struct file *, struct file_lock *);
 extern int posix_lock_file_wait(struct file *, struct file_lock *);
-extern void posix_block_lock(struct file_lock *, struct file_lock *);
 extern int posix_unblock_lock(struct file *, struct file_lock *);
 extern int posix_locks_deadlock(struct file_lock *, struct file_lock *);
 extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl);
@@ -1085,6 +1081,7 @@ struct super_operations {
 	void (*umount_begin) (struct super_block *);
 
 	int (*show_options)(struct seq_file *, struct vfsmount *);
+	int (*show_stats)(struct seq_file *, struct vfsmount *);
 
 	ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
 	ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index ef21ed2..a04137d 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -14,6 +14,7 @@
 #include <linux/config.h>
 #include <linux/in.h>
 #include <linux/fs.h>
+#include <linux/kref.h>
 #include <linux/utsname.h>
 #include <linux/nfsd/nfsfh.h>
 #include <linux/lockd/bind.h>
@@ -58,6 +59,8 @@ struct nlm_host {
 	unsigned long		h_expires;	/* eligible for GC */
 	struct list_head	h_lockowners;	/* Lockowners for the client */
 	spinlock_t		h_lock;
+	struct list_head	h_granted;	/* Locks in GRANTED state */
+	struct list_head	h_reclaim;	/* Locks in RECLAIM state */
 };
 
 /*
@@ -83,9 +86,9 @@ struct nlm_rqst {
 	struct nlm_host *	a_host;		/* host handle */
 	struct nlm_args		a_args;		/* arguments */
 	struct nlm_res		a_res;		/* result */
-	struct nlm_wait *	a_block;
+	struct nlm_block *	a_block;
 	unsigned int		a_retries;	/* Retry count */
-	char			a_owner[NLMCLNT_OHSIZE];
+	u8			a_owner[NLMCLNT_OHSIZE];
 };
 
 /*
@@ -110,16 +113,16 @@ struct nlm_file {
  */
 #define NLM_NEVER		(~(unsigned long) 0)
 struct nlm_block {
+	struct kref		b_count;	/* Reference count */
 	struct nlm_block *	b_next;		/* linked list (all blocks) */
 	struct nlm_block *	b_fnext;	/* linked list (per file) */
-	struct nlm_rqst		b_call;		/* RPC args & callback info */
+	struct nlm_rqst	*	b_call;		/* RPC args & callback info */
 	struct svc_serv *	b_daemon;	/* NLM service */
 	struct nlm_host *	b_host;		/* host handle for RPC clnt */
 	unsigned long		b_when;		/* next re-xmit */
 	unsigned int		b_id;		/* block id */
 	unsigned char		b_queued;	/* re-queued */
 	unsigned char		b_granted;	/* VFS granted lock */
-	unsigned char		b_incall;	/* doing callback */
 	unsigned char		b_done;		/* callback complete */
 	struct nlm_file *	b_file;		/* file in question */
 };
@@ -145,15 +148,16 @@ extern unsigned long		nlmsvc_timeout;
 /*
  * Lockd client functions
  */
-struct nlm_rqst * nlmclnt_alloc_call(void);
-int		  nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl);
-void		  nlmclnt_finish_block(struct nlm_rqst *req);
-long		  nlmclnt_block(struct nlm_rqst *req, long timeout);
+struct nlm_rqst * nlm_alloc_call(struct nlm_host *host);
+void		  nlm_release_call(struct nlm_rqst *);
+int		  nlm_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *);
+int		  nlm_async_reply(struct nlm_rqst *, u32, const struct rpc_call_ops *);
+struct nlm_wait * nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl);
+void		  nlmclnt_finish_block(struct nlm_wait *block);
+int		  nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout);
 u32		  nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *);
 void		  nlmclnt_recovery(struct nlm_host *, u32);
 int		  nlmclnt_reclaim(struct nlm_host *, struct file_lock *);
-int		  nlmclnt_setgrantargs(struct nlm_rqst *, struct nlm_lock *);
-void		  nlmclnt_freegrantargs(struct nlm_rqst *);
 
 /*
  * Host cache
@@ -172,7 +176,6 @@ extern struct nlm_host *nlm_find_client(
 /*
  * Server-side lock handling
  */
-int		  nlmsvc_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *);
 u32		  nlmsvc_lock(struct svc_rqst *, struct nlm_file *,
 					struct nlm_lock *, int, struct nlm_cookie *);
 u32		  nlmsvc_unlock(struct nlm_file *, struct nlm_lock *);
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index d7a5cc4..bb0a0f1 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -28,6 +28,7 @@ struct nlm_lock {
 	int			len; 	/* length of "caller" */
 	struct nfs_fh		fh;
 	struct xdr_netobj	oh;
+	u32			svid;
 	struct file_lock	fl;
 };
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index b4dc6e2..7f36fe9 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -118,8 +118,7 @@ struct nfs_inode {
 	unsigned long		cache_validity;		/* bit mask */
 
 	/*
-	 * read_cache_jiffies is when we started read-caching this inode,
-	 * and read_cache_mtime is the mtime of the inode at that time.
+	 * read_cache_jiffies is when we started read-caching this inode.
 	 * attrtimeo is for how long the cached information is assumed
 	 * to be valid. A successful attribute revalidation doubles
 	 * attrtimeo (up to acregmax/acdirmax), a failure resets it to
@@ -128,11 +127,6 @@ struct nfs_inode {
 	 * We need to revalidate the cached attrs for this inode if
 	 *
 	 *	jiffies - read_cache_jiffies > attrtimeo
-	 *
-	 * and invalidate any cached data/flush out any dirty pages if
-	 * we find that
-	 *
-	 *	mtime != read_cache_mtime
 	 */
 	unsigned long		read_cache_jiffies;
 	unsigned long		attrtimeo;
@@ -415,12 +409,12 @@ extern int  nfs_writepage(struct page *p
 extern int  nfs_writepages(struct address_space *, struct writeback_control *);
 extern int  nfs_flush_incompatible(struct file *file, struct page *page);
 extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
-extern void nfs_writeback_done(struct rpc_task *task, void *data);
-extern void nfs_writedata_release(void *data);
+extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
+extern void nfs_writedata_release(void *);
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-extern void nfs_commit_done(struct rpc_task *, void *data);
-extern void nfs_commit_release(void *data);
+struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount);
+void nfs_commit_free(struct nfs_write_data *p);
 #endif
 
 /*
@@ -430,6 +424,7 @@ extern void nfs_commit_release(void *dat
 extern int  nfs_sync_inode(struct inode *, unsigned long, unsigned int, int);
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 extern int  nfs_commit_inode(struct inode *, int);
+extern void nfs_commit_release(void *wdata);
 #else
 static inline int
 nfs_commit_inode(struct inode *inode, int how)
@@ -469,37 +464,8 @@ static inline int nfs_wb_page(struct ino
 /*
  * Allocate and free nfs_write_data structures
  */
-extern mempool_t *nfs_wdata_mempool;
-
-static inline struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
-{
-	struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
-
-	if (p) {
-		memset(p, 0, sizeof(*p));
-		INIT_LIST_HEAD(&p->pages);
-		if (pagecount < NFS_PAGEVEC_SIZE)
-			p->pagevec = &p->page_array[0];
-		else {
-			size_t size = ++pagecount * sizeof(struct page *);
-			p->pagevec = kmalloc(size, GFP_NOFS);
-			if (p->pagevec) {
-				memset(p->pagevec, 0, size);
-			} else {
-				mempool_free(p, nfs_wdata_mempool);
-				p = NULL;
-			}
-		}
-	}
-	return p;
-}
-
-static inline void nfs_writedata_free(struct nfs_write_data *p)
-{
-	if (p && (p->pagevec != &p->page_array[0]))
-		kfree(p->pagevec);
-	mempool_free(p, nfs_wdata_mempool);
-}
+extern struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount);
+extern void nfs_writedata_free(struct nfs_write_data *p);
 
 /*
  * linux/fs/nfs/read.c
@@ -507,44 +473,14 @@ static inline void nfs_writedata_free(st
 extern int  nfs_readpage(struct file *, struct page *);
 extern int  nfs_readpages(struct file *, struct address_space *,
 		struct list_head *, unsigned);
-extern void nfs_readpage_result(struct rpc_task *, void *);
-extern void  nfs_readdata_release(void *data);
-
+extern int  nfs_readpage_result(struct rpc_task *, struct nfs_read_data *);
+extern void nfs_readdata_release(void *data);
 
 /*
  * Allocate and free nfs_read_data structures
  */
-extern mempool_t *nfs_rdata_mempool;
-
-static inline struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
-{
-	struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
-
-	if (p) {
-		memset(p, 0, sizeof(*p));
-		INIT_LIST_HEAD(&p->pages);
-		if (pagecount < NFS_PAGEVEC_SIZE)
-			p->pagevec = &p->page_array[0];
-		else {
-			size_t size = ++pagecount * sizeof(struct page *);
-			p->pagevec = kmalloc(size, GFP_NOFS);
-			if (p->pagevec) {
-				memset(p->pagevec, 0, size);
-			} else {
-				mempool_free(p, nfs_rdata_mempool);
-				p = NULL;
-			}
-		}
-	}
-	return p;
-}
-
-static inline void nfs_readdata_free(struct nfs_read_data *p)
-{
-	if (p && (p->pagevec != &p->page_array[0]))
-		kfree(p->pagevec);
-	mempool_free(p, nfs_rdata_mempool);
-}
+extern struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount);
+extern void nfs_readdata_free(struct nfs_read_data *p);
 
 /*
  * linux/fs/nfs3proc.c
diff --git a/include/linux/nfs_fs_i.h b/include/linux/nfs_fs_i.h
index e2c18da..8617302 100644
--- a/include/linux/nfs_fs_i.h
+++ b/include/linux/nfs_fs_i.h
@@ -12,8 +12,8 @@ struct nlm_lockowner;
  */
 struct nfs_lock_info {
 	u32		state;
-	u32		flags;
 	struct nlm_lockowner *owner;
+	struct list_head list;
 };
 
 struct nfs4_lock_state;
@@ -21,10 +21,4 @@ struct nfs4_lock_info {
 	struct nfs4_lock_state *owner;
 };
 
-/*
- * Lock flag values
- */
-#define NFS_LCK_GRANTED		0x0001		/* lock has been granted */
-#define NFS_LCK_RECLAIM		0x0002		/* lock marked for reclaiming */
-
 #endif
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 3d3a305..65dec21 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -4,6 +4,8 @@
 #include <linux/list.h>
 #include <linux/backing-dev.h>
 
+struct nfs_iostats;
+
 /*
  * NFS client parameters stored in the superblock.
  */
@@ -12,6 +14,7 @@ struct nfs_server {
 	struct rpc_clnt *	client_sys;	/* 2nd handle for FSINFO */
 	struct rpc_clnt *	client_acl;	/* ACL RPC client handle */
 	struct nfs_rpc_ops *	rpc_ops;	/* NFS protocol vector */
+	struct nfs_iostats *	io_stats;	/* I/O statistics */
 	struct backing_dev_info	backing_dev_info;
 	int			flags;		/* various flags */
 	unsigned int		caps;		/* server capabilities */
@@ -26,10 +29,13 @@ struct nfs_server {
 	unsigned int		acregmax;
 	unsigned int		acdirmin;
 	unsigned int		acdirmax;
+	unsigned long		retrans_timeo;	/* retransmit timeout */
+	unsigned int		retrans_count;	/* number of retransmit tries */
 	unsigned int		namelen;
 	char *			hostname;	/* remote hostname */
 	struct nfs_fh		fh;
 	struct sockaddr_in	addr;
+	unsigned long		mount_time;	/* when this fs was mounted */
 #ifdef CONFIG_NFS_V4
 	/* Our own IP address, as a null-terminated string.
 	 * This is used to generate the clientid, and the callback address.
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 6d6f69e..7fafc4c 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -695,7 +695,6 @@ struct nfs_read_data {
 #ifdef CONFIG_NFS_V4
 	unsigned long		timestamp;	/* For lease renewal */
 #endif
-	void (*complete) (struct nfs_read_data *, int);
 	struct page		*page_array[NFS_PAGEVEC_SIZE + 1];
 };
 
@@ -714,7 +713,6 @@ struct nfs_write_data {
 #ifdef CONFIG_NFS_V4
 	unsigned long		timestamp;	/* For lease renewal */
 #endif
-	void (*complete) (struct nfs_write_data *, int);
 	struct page		*page_array[NFS_PAGEVEC_SIZE + 1];
 };
 
@@ -769,8 +767,11 @@ struct nfs_rpc_ops {
 			     struct nfs_pathconf *);
 	u32 *	(*decode_dirent)(u32 *, struct nfs_entry *, int plus);
 	void	(*read_setup)   (struct nfs_read_data *);
+	int	(*read_done)  (struct rpc_task *, struct nfs_read_data *);
 	void	(*write_setup)  (struct nfs_write_data *, int how);
+	int	(*write_done)  (struct rpc_task *, struct nfs_write_data *);
 	void	(*commit_setup) (struct nfs_write_data *, int how);
+	int	(*commit_done) (struct rpc_task *, struct nfs_write_data *);
 	int	(*file_open)   (struct inode *, struct file *);
 	int	(*file_release) (struct inode *, struct file *);
 	int	(*lock)(struct file *, int, struct file_lock *);
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index f147e6b..e37c061 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -45,7 +45,8 @@ struct rpc_clnt {
 	char *			cl_server;	/* server machine name */
 	char *			cl_protname;	/* protocol name */
 	struct rpc_auth *	cl_auth;	/* authenticator */
-	struct rpc_stat *	cl_stats;	/* statistics */
+	struct rpc_stat *	cl_stats;	/* per-program statistics */
+	struct rpc_iostats *	cl_metrics;	/* per-client statistics */
 
 	unsigned int		cl_softrtry : 1,/* soft timeouts */
 				cl_intr     : 1,/* interruptible */
@@ -100,6 +101,8 @@ struct rpc_procinfo {
 	unsigned int		p_bufsiz;	/* req. buffer size */
 	unsigned int		p_count;	/* call count */
 	unsigned int		p_timer;	/* Which RTT timer to use */
+	u32			p_statidx;	/* Which procedure to account */
+	char *			p_name;		/* name of procedure */
 };
 
 #define RPC_CONGESTED(clnt)	(RPCXPRT_CONGESTED((clnt)->cl_xprt))
@@ -137,20 +140,6 @@ size_t		rpc_max_payload(struct rpc_clnt 
 void		rpc_force_rebind(struct rpc_clnt *);
 int		rpc_ping(struct rpc_clnt *clnt, int flags);
 
-static __inline__
-int rpc_call(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
-{
-	struct rpc_message msg = {
-		.rpc_proc	= &clnt->cl_procinfo[proc],
-		.rpc_argp	= argp,
-		.rpc_resp	= resp,
-		.rpc_cred	= NULL
-	};
-	return rpc_call_sync(clnt, &msg, flags);
-}
-		
-extern void rpciod_wake_up(void);
-
 /*
  * Helper function for NFSroot support
  */
diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h
new file mode 100644
index 0000000..8f96e9d
--- /dev/null
+++ b/include/linux/sunrpc/metrics.h
@@ -0,0 +1,77 @@
+/*
+ *  linux/include/linux/sunrpc/metrics.h
+ *
+ *  Declarations for RPC client per-operation metrics
+ *
+ *  Copyright (C) 2005	Chuck Lever <cel@netapp.com>
+ *
+ *  RPC client per-operation statistics provide latency and retry
+ *  information about each type of RPC procedure in a given RPC program.
+ *  These statistics are not for detailed problem diagnosis, but simply
+ *  to indicate whether the problem is local or remote.
+ *
+ *  These counters are not meant to be human-readable, but are meant to be
+ *  integrated into system monitoring tools such as "sar" and "iostat".  As
+ *  such, the counters are sampled by the tools over time, and are never
+ *  zeroed after a file system is mounted.  Moving averages can be computed
+ *  by the tools by taking the difference between two instantaneous samples
+ *  and dividing that by the time between the samples.
+ *
+ *  The counters are maintained in a single array per RPC client, indexed
+ *  by procedure number.  There is no need to maintain separate counter
+ *  arrays per-CPU because these counters are always modified behind locks.
+ */
+
+#ifndef _LINUX_SUNRPC_METRICS_H
+#define _LINUX_SUNRPC_METRICS_H
+
+#include <linux/seq_file.h>
+
+#define RPC_IOSTATS_VERS	"1.0"
+
+struct rpc_iostats {
+	/*
+	 * These counters give an idea about how many request
+	 * transmissions are required, on average, to complete that
+	 * particular procedure.  Some procedures may require more
+	 * than one transmission because the server is unresponsive,
+	 * the client is retransmitting too aggressively, or the
+	 * requests are large and the network is congested.
+	 */
+	unsigned long		om_ops,		/* count of operations */
+				om_ntrans,	/* count of RPC transmissions */
+				om_timeouts;	/* count of major timeouts */
+
+	/*
+	 * These count how many bytes are sent and received for a
+	 * given RPC procedure type.  This indicates how much load a
+	 * particular procedure is putting on the network.  These
+	 * counts include the RPC and ULP headers, and the request
+	 * payload.
+	 */
+	unsigned long long      om_bytes_sent,	/* count of bytes out */
+				om_bytes_recv;	/* count of bytes in */
+
+	/*
+	 * The length of time an RPC request waits in queue before
+	 * transmission, the network + server latency of the request,
+	 * and the total time the request spent from init to release
+	 * are measured.
+	 */
+	unsigned long long	om_queue,	/* jiffies queued for xmit */
+				om_rtt,		/* jiffies for RPC RTT */
+				om_execute;	/* jiffies for RPC execution */
+} ____cacheline_aligned;
+
+struct rpc_task;
+struct rpc_clnt;
+
+/*
+ * EXPORTed functions for managing rpc_iostats structures
+ */
+struct rpc_iostats *	rpc_alloc_iostats(struct rpc_clnt *);
+void			rpc_count_iostats(struct rpc_task *);
+void			rpc_print_iostats(struct seq_file *, struct rpc_clnt *);
+void			rpc_free_iostats(struct rpc_iostats *);
+
+#endif /* _LINUX_SUNRPC_METRICS_H */
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 8b25629..82a91bb 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -86,6 +86,12 @@ struct rpc_task {
 		struct work_struct	tk_work;	/* Async task work queue */
 		struct rpc_wait		tk_wait;	/* RPC wait */
 	} u;
+
+	unsigned short		tk_timeouts;	/* maj timeouts */
+	size_t			tk_bytes_sent;	/* total bytes sent */
+	unsigned long		tk_start;	/* RPC task init timestamp */
+	long			tk_rtt;		/* round-trip time (jiffies) */
+
 #ifdef RPC_DEBUG
 	unsigned short		tk_pid;		/* debugging aid */
 #endif
@@ -203,6 +209,7 @@ struct rpc_wait_queue {
 	unsigned char		priority;		/* current priority */
 	unsigned char		count;			/* # task groups remaining serviced so far */
 	unsigned char		nr;			/* # tasks remaining for cookie */
+	unsigned short		qlen;			/* total # tasks waiting in queue */
 #ifdef RPC_DEBUG
 	const char *		name;
 #endif
@@ -269,13 +276,13 @@ void *		rpc_malloc(struct rpc_task *, si
 void		rpc_free(struct rpc_task *);
 int		rpciod_up(void);
 void		rpciod_down(void);
-void		rpciod_wake_up(void);
 int		__rpc_wait_for_completion_task(struct rpc_task *task, int (*)(void *));
 #ifdef RPC_DEBUG
 void		rpc_show_tasks(void);
 #endif
 int		rpc_init_mempool(void);
 void		rpc_destroy_mempool(void);
+extern struct workqueue_struct *rpciod_workqueue;
 
 static inline void rpc_exit(struct rpc_task *task, int status)
 {
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 6ef99b1..7eebbab 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -114,6 +114,7 @@ struct rpc_xprt_ops {
 	void		(*release_request)(struct rpc_task *task);
 	void		(*close)(struct rpc_xprt *xprt);
 	void		(*destroy)(struct rpc_xprt *xprt);
+	void		(*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq);
 };
 
 struct rpc_xprt {
@@ -187,6 +188,18 @@ struct rpc_xprt {
 
 	struct list_head	recv;
 
+	struct {
+		unsigned long		bind_count,	/* total number of binds */
+					connect_count,	/* total number of connects */
+					connect_start,	/* connect start timestamp */
+					connect_time,	/* jiffies waiting for connect */
+					sends,		/* how many complete requests */
+					recvs,		/* how many complete requests */
+					bad_xids;	/* lookup_rqst didn't find XID */
+
+		unsigned long long	req_u,		/* average requests on the wire */
+					bklog_u;	/* backlog queue utilization */
+	} stat;
 
 	void			(*old_data_ready)(struct sock *, int);
 	void			(*old_state_change)(struct sock *);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 8d6f1a1..55163af 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -64,14 +64,26 @@ rpcauth_create(rpc_authflavor_t pseudofl
 	struct rpc_authops	*ops;
 	u32			flavor = pseudoflavor_to_flavor(pseudoflavor);
 
-	if (flavor >= RPC_AUTH_MAXFLAVOR || !(ops = auth_flavors[flavor]))
-		return ERR_PTR(-EINVAL);
+	auth = ERR_PTR(-EINVAL);
+	if (flavor >= RPC_AUTH_MAXFLAVOR)
+		goto out;
+
+	/* FIXME - auth_flavors[] really needs an rw lock,
+	 * and module refcounting. */
+#ifdef CONFIG_KMOD
+	if ((ops = auth_flavors[flavor]) == NULL)
+		request_module("rpc-auth-%u", flavor);
+#endif
+	if ((ops = auth_flavors[flavor]) == NULL)
+		goto out;
 	auth = ops->create(clnt, pseudoflavor);
 	if (IS_ERR(auth))
 		return auth;
 	if (clnt->cl_auth)
 		rpcauth_destroy(clnt->cl_auth);
 	clnt->cl_auth = auth;
+
+out:
 	return auth;
 }
 
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index bb46efd..900ef31 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -721,6 +721,8 @@ gss_destroy(struct rpc_auth *auth)
 
 	gss_auth = container_of(auth, struct gss_auth, rpc_auth);
 	rpc_unlink(gss_auth->path);
+	dput(gss_auth->dentry);
+	gss_auth->dentry = NULL;
 	gss_mech_put(gss_auth->mech);
 
 	rpcauth_free_credcache(auth);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d2f0550..9d56fc8 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -28,12 +28,11 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/utsname.h>
+#include <linux/workqueue.h>
 
 #include <linux/sunrpc/clnt.h>
-#include <linux/workqueue.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
-
-#include <linux/nfs.h>
+#include <linux/sunrpc/metrics.h>
 
 
 #define RPC_SLACK_SPACE		(1024)	/* total overkill */
@@ -147,6 +146,7 @@ rpc_new_client(struct rpc_xprt *xprt, ch
 	clnt->cl_vers     = version->number;
 	clnt->cl_prot     = xprt->prot;
 	clnt->cl_stats    = program->stats;
+	clnt->cl_metrics  = rpc_alloc_iostats(clnt);
 	rpc_init_wait_queue(&clnt->cl_pmap_default.pm_bindwait, "bindwait");
 
 	if (!clnt->cl_port)
@@ -239,10 +239,12 @@ rpc_clone_client(struct rpc_clnt *clnt)
 	new->cl_autobind = 0;
 	new->cl_oneshot = 0;
 	new->cl_dead = 0;
+	dget(new->cl_dentry);
 	rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
 	if (new->cl_auth)
 		atomic_inc(&new->cl_auth->au_count);
 	new->cl_pmap		= &new->cl_pmap_default;
+	new->cl_metrics         = rpc_alloc_iostats(clnt);
 	rpc_init_wait_queue(&new->cl_pmap_default.pm_bindwait, "bindwait");
 	return new;
 out_no_clnt:
@@ -313,6 +315,10 @@ rpc_destroy_client(struct rpc_clnt *clnt
 	if (clnt->cl_server != clnt->cl_inline_name)
 		kfree(clnt->cl_server);
 out_free:
+	rpc_free_iostats(clnt->cl_metrics);
+	clnt->cl_metrics = NULL;
+	if (clnt->cl_dentry)
+		dput(clnt->cl_dentry);
 	kfree(clnt);
 	return 0;
 }
@@ -992,6 +998,8 @@ call_timeout(struct rpc_task *task)
 	}
 
 	dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid);
+	task->tk_timeouts++;
+
 	if (RPC_IS_SOFT(task)) {
 		printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
 				clnt->cl_protname, clnt->cl_server);
@@ -1193,8 +1201,8 @@ call_verify(struct rpc_task *task)
 			task->tk_action = call_bind;
 			goto out_retry;
 		case RPC_AUTH_TOOWEAK:
-			printk(KERN_NOTICE "call_verify: server requires stronger "
-			       "authentication.\n");
+			printk(KERN_NOTICE "call_verify: server %s requires stronger "
+			       "authentication.\n", task->tk_client->cl_server);
 			break;
 		default:
 			printk(KERN_WARNING "call_verify: unknown auth error: %x\n", n);
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index 8139ce6..d25b054 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -82,6 +82,7 @@ rpc_getport(struct rpc_task *task, struc
 	rpc_call_setup(child, &msg, 0);
 
 	/* ... and run the child task */
+	task->tk_xprt->stat.bind_count++;
 	rpc_run_child(task, child, pmap_getport_done);
 	return;
 
@@ -103,6 +104,11 @@ rpc_getport_external(struct sockaddr_in 
 		.pm_prot	= prot,
 		.pm_port	= 0
 	};
+	struct rpc_message msg = {
+		.rpc_proc	= &pmap_procedures[PMAP_GETPORT],
+		.rpc_argp	= &map,
+		.rpc_resp	= &map.pm_port,
+	};
 	struct rpc_clnt	*pmap_clnt;
 	char		hostname[32];
 	int		status;
@@ -116,7 +122,7 @@ rpc_getport_external(struct sockaddr_in 
 		return PTR_ERR(pmap_clnt);
 
 	/* Setup the call info struct */
-	status = rpc_call(pmap_clnt, PMAP_GETPORT, &map, &map.pm_port, 0);
+	status = rpc_call_sync(pmap_clnt, &msg, 0);
 
 	if (status >= 0) {
 		if (map.pm_port != 0)
@@ -161,16 +167,27 @@ pmap_getport_done(struct rpc_task *task)
 int
 rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
 {
-	struct sockaddr_in	sin;
-	struct rpc_portmap	map;
+	struct sockaddr_in	sin = {
+		.sin_family	= AF_INET,
+		.sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+	};
+	struct rpc_portmap	map = {
+		.pm_prog	= prog,
+		.pm_vers	= vers,
+		.pm_prot	= prot,
+		.pm_port	= port,
+	};
+	struct rpc_message msg = {
+		.rpc_proc	= &pmap_procedures[port ? PMAP_SET : PMAP_UNSET],
+		.rpc_argp	= &map,
+		.rpc_resp	= okay,
+	};
 	struct rpc_clnt		*pmap_clnt;
 	int error = 0;
 
 	dprintk("RPC: registering (%d, %d, %d, %d) with portmapper.\n",
 			prog, vers, prot, port);
 
-	sin.sin_family = AF_INET;
-	sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
 	pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP, 1);
 	if (IS_ERR(pmap_clnt)) {
 		error = PTR_ERR(pmap_clnt);
@@ -178,13 +195,7 @@ rpc_register(u32 prog, u32 vers, int pro
 		return error;
 	}
 
-	map.pm_prog = prog;
-	map.pm_vers = vers;
-	map.pm_prot = prot;
-	map.pm_port = port;
-
-	error = rpc_call(pmap_clnt, port? PMAP_SET : PMAP_UNSET,
-					&map, okay, 0);
+	error = rpc_call_sync(pmap_clnt, &msg, 0);
 
 	if (error < 0) {
 		printk(KERN_WARNING
@@ -260,6 +271,8 @@ static struct rpc_procinfo	pmap_procedur
 	  .p_decode		= (kxdrproc_t) xdr_decode_bool,
 	  .p_bufsiz		= 4,
 	  .p_count		= 1,
+	  .p_statidx		= PMAP_SET,
+	  .p_name		= "SET",
 	},
 [PMAP_UNSET] = {
 	  .p_proc		= PMAP_UNSET,
@@ -267,6 +280,8 @@ static struct rpc_procinfo	pmap_procedur
 	  .p_decode		= (kxdrproc_t) xdr_decode_bool,
 	  .p_bufsiz		= 4,
 	  .p_count		= 1,
+	  .p_statidx		= PMAP_UNSET,
+	  .p_name		= "UNSET",
 	},
 [PMAP_GETPORT] = {
 	  .p_proc		= PMAP_GETPORT,
@@ -274,6 +289,8 @@ static struct rpc_procinfo	pmap_procedur
 	  .p_decode		= (kxdrproc_t) xdr_decode_port,
 	  .p_bufsiz		= 4,
 	  .p_count		= 1,
+	  .p_statidx		= PMAP_GETPORT,
+	  .p_name		= "GETPORT",
 	},
 };
 
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index a5c0c7b..72b2217 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -91,7 +91,8 @@ rpc_queue_upcall(struct inode *inode, st
 		res = 0;
 	} else if (rpci->flags & RPC_PIPE_WAIT_FOR_OPEN) {
 		if (list_empty(&rpci->pipe))
-			schedule_delayed_work(&rpci->queue_timeout,
+			queue_delayed_work(rpciod_workqueue,
+					&rpci->queue_timeout,
 					RPC_UPCALL_TIMEOUT);
 		list_add_tail(&msg->list, &rpci->pipe);
 		rpci->pipelen += msg->len;
@@ -132,7 +133,7 @@ rpc_close_pipes(struct inode *inode)
 		if (ops->release_pipe)
 			ops->release_pipe(inode);
 		cancel_delayed_work(&rpci->queue_timeout);
-		flush_scheduled_work();
+		flush_workqueue(rpciod_workqueue);
 	}
 	rpc_inode_setowner(inode, NULL);
 	mutex_unlock(&inode->i_mutex);
@@ -668,7 +669,7 @@ rpc_mkdir(char *path, struct rpc_clnt *r
 out:
 	mutex_unlock(&dir->i_mutex);
 	rpc_release_path(&nd);
-	return dentry;
+	return dget(dentry);
 err_depopulate:
 	rpc_depopulate(dentry);
 	__rpc_rmdir(dir, dentry);
@@ -732,7 +733,7 @@ rpc_mkpipe(char *path, void *private, st
 out:
 	mutex_unlock(&dir->i_mutex);
 	rpc_release_path(&nd);
-	return dentry;
+	return dget(dentry);
 err_dput:
 	dput(dentry);
 	dentry = ERR_PTR(-ENOMEM);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 802d4fe..a04cf3b 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -64,7 +64,7 @@ static LIST_HEAD(all_tasks);
  */
 static DECLARE_MUTEX(rpciod_sema);
 static unsigned int		rpciod_users;
-static struct workqueue_struct *rpciod_workqueue;
+struct workqueue_struct *rpciod_workqueue;
 
 /*
  * Spinlock for other critical sections of code.
@@ -181,6 +181,7 @@ static void __rpc_add_wait_queue(struct 
 	else
 		list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
 	task->u.tk_wait.rpc_waitq = queue;
+	queue->qlen++;
 	rpc_set_queued(task);
 
 	dprintk("RPC: %4d added to queue %p \"%s\"\n",
@@ -215,6 +216,7 @@ static void __rpc_remove_wait_queue(stru
 		__rpc_remove_wait_queue_priority(task);
 	else
 		list_del(&task->u.tk_wait.list);
+	queue->qlen--;
 	dprintk("RPC: %4d removed from queue %p \"%s\"\n",
 				task->tk_pid, queue, rpc_qname(queue));
 }
@@ -818,6 +820,9 @@ void rpc_init_task(struct rpc_task *task
 
 	BUG_ON(task->tk_ops == NULL);
 
+	/* starting timestamp */
+	task->tk_start = jiffies;
+
 	dprintk("RPC: %4d new task procpid %d\n", task->tk_pid,
 				current->pid);
 }
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 4979f22..790941e 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -21,6 +21,7 @@
 #include <linux/seq_file.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svcsock.h>
+#include <linux/sunrpc/metrics.h>
 
 #define RPCDBG_FACILITY	RPCDBG_MISC
 
@@ -106,6 +107,120 @@ void svc_seq_show(struct seq_file *seq, 
 	}
 }
 
+/**
+ * rpc_alloc_iostats - allocate an rpc_iostats structure
+ * @clnt: RPC program, version, and xprt
+ *
+ */
+struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt)
+{
+	unsigned int ops = clnt->cl_maxproc;
+	size_t size = ops * sizeof(struct rpc_iostats);
+	struct rpc_iostats *new;
+
+	new = kmalloc(size, GFP_KERNEL);
+	if (new)
+		memset(new, 0 , size);
+	return new;
+}
+EXPORT_SYMBOL(rpc_alloc_iostats);
+
+/**
+ * rpc_free_iostats - release an rpc_iostats structure
+ * @stats: doomed rpc_iostats structure
+ *
+ */
+void rpc_free_iostats(struct rpc_iostats *stats)
+{
+	kfree(stats);
+}
+EXPORT_SYMBOL(rpc_free_iostats);
+
+/**
+ * rpc_count_iostats - tally up per-task stats
+ * @task: completed rpc_task
+ *
+ * Relies on the caller for serialization.
+ */
+void rpc_count_iostats(struct rpc_task *task)
+{
+	struct rpc_rqst *req = task->tk_rqstp;
+	struct rpc_iostats *stats = task->tk_client->cl_metrics;
+	struct rpc_iostats *op_metrics;
+	long rtt, execute, queue;
+
+	if (!stats || !req)
+		return;
+	op_metrics = &stats[task->tk_msg.rpc_proc->p_statidx];
+
+	op_metrics->om_ops++;
+	op_metrics->om_ntrans += req->rq_ntrans;
+	op_metrics->om_timeouts += task->tk_timeouts;
+
+	op_metrics->om_bytes_sent += task->tk_bytes_sent;
+	op_metrics->om_bytes_recv += req->rq_received;
+
+	queue = (long)req->rq_xtime - task->tk_start;
+	if (queue < 0)
+		queue = -queue;
+	op_metrics->om_queue += queue;
+
+	rtt = task->tk_rtt;
+	if (rtt < 0)
+		rtt = -rtt;
+	op_metrics->om_rtt += rtt;
+
+	execute = (long)jiffies - task->tk_start;
+	if (execute < 0)
+		execute = -execute;
+	op_metrics->om_execute += execute;
+}
+
+void _print_name(struct seq_file *seq, unsigned int op, struct rpc_procinfo *procs)
+{
+	if (procs[op].p_name)
+		seq_printf(seq, "\t%12s: ", procs[op].p_name);
+	else if (op == 0)
+		seq_printf(seq, "\t        NULL: ");
+	else
+		seq_printf(seq, "\t%12u: ", op);
+}
+
+#define MILLISECS_PER_JIFFY	(1000 / HZ)
+
+void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
+{
+	struct rpc_iostats *stats = clnt->cl_metrics;
+	struct rpc_xprt *xprt = clnt->cl_xprt;
+	unsigned int op, maxproc = clnt->cl_maxproc;
+
+	if (!stats)
+		return;
+
+	seq_printf(seq, "\tRPC iostats version: %s  ", RPC_IOSTATS_VERS);
+	seq_printf(seq, "p/v: %u/%u (%s)\n",
+			clnt->cl_prog, clnt->cl_vers, clnt->cl_protname);
+
+	if (xprt)
+		xprt->ops->print_stats(xprt, seq);
+
+	seq_printf(seq, "\tper-op statistics\n");
+	for (op = 0; op < maxproc; op++) {
+		struct rpc_iostats *metrics = &stats[op];
+		_print_name(seq, op, clnt->cl_procinfo);
+		seq_printf(seq, "%lu %lu %lu %Lu %Lu %Lu %Lu %Lu\n",
+				metrics->om_ops,
+				metrics->om_ntrans,
+				metrics->om_timeouts,
+				metrics->om_bytes_sent,
+				metrics->om_bytes_recv,
+				metrics->om_queue * MILLISECS_PER_JIFFY,
+				metrics->om_rtt * MILLISECS_PER_JIFFY,
+				metrics->om_execute * MILLISECS_PER_JIFFY);
+	}
+}
+EXPORT_SYMBOL(rpc_print_iostats);
+
 /*
  * Register/unregister RPC proc files
  */
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 8ff2c8a..940dba9 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -44,13 +44,13 @@
 #include <linux/random.h>
 
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/metrics.h>
 
 /*
  * Local variables
  */
 
 #ifdef RPC_DEBUG
-# undef  RPC_DEBUG_DATA
 # define RPCDBG_FACILITY	RPCDBG_XPRT
 #endif
 
@@ -548,6 +548,7 @@ void xprt_connect(struct rpc_task *task)
 
 		task->tk_timeout = xprt->connect_timeout;
 		rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL);
+		xprt->stat.connect_start = jiffies;
 		xprt->ops->connect(task);
 	}
 	return;
@@ -558,6 +559,8 @@ static void xprt_connect_status(struct r
 	struct rpc_xprt	*xprt = task->tk_xprt;
 
 	if (task->tk_status >= 0) {
+		xprt->stat.connect_count++;
+		xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start;
 		dprintk("RPC: %4d xprt_connect_status: connection established\n",
 				task->tk_pid);
 		return;
@@ -601,16 +604,14 @@ static void xprt_connect_status(struct r
 struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid)
 {
 	struct list_head *pos;
-	struct rpc_rqst	*req = NULL;
 
 	list_for_each(pos, &xprt->recv) {
 		struct rpc_rqst *entry = list_entry(pos, struct rpc_rqst, rq_list);
-		if (entry->rq_xid == xid) {
-			req = entry;
-			break;
-		}
+		if (entry->rq_xid == xid)
+			return entry;
 	}
-	return req;
+	xprt->stat.bad_xids++;
+	return NULL;
 }
 
 /**
@@ -646,6 +647,9 @@ void xprt_complete_rqst(struct rpc_task 
 	dprintk("RPC: %5u xid %08x complete (%d bytes received)\n",
 			task->tk_pid, ntohl(req->rq_xid), copied);
 
+	task->tk_xprt->stat.recvs++;
+	task->tk_rtt = (long)jiffies - req->rq_xtime;
+
 	list_del_init(&req->rq_list);
 	req->rq_received = req->rq_private_buf.len = copied;
 	rpc_wake_up_task(task);
@@ -744,12 +748,19 @@ void xprt_transmit(struct rpc_task *task
 	if (status == 0) {
 		dprintk("RPC: %4d xmit complete\n", task->tk_pid);
 		spin_lock_bh(&xprt->transport_lock);
+
 		xprt->ops->set_retrans_timeout(task);
+
+		xprt->stat.sends++;
+		xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
+		xprt->stat.bklog_u += xprt->backlog.qlen;
+
 		/* Don't race with disconnect */
 		if (!xprt_connected(xprt))
 			task->tk_status = -ENOTCONN;
 		else if (!req->rq_received)
 			rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer);
+
 		xprt->ops->release_xprt(xprt, task);
 		spin_unlock_bh(&xprt->transport_lock);
 		return;
@@ -848,6 +859,7 @@ void xprt_release(struct rpc_task *task)
 
 	if (!(req = task->tk_rqstp))
 		return;
+	rpc_count_iostats(task);
 	spin_lock_bh(&xprt->transport_lock);
 	xprt->ops->release_xprt(xprt, task);
 	if (xprt->ops->release_request)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index c458f8d..4b4e7df 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -382,6 +382,7 @@ static int xs_tcp_send_request(struct rp
 		/* If we've sent the entire packet, immediately
 		 * reset the count of bytes sent. */
 		req->rq_bytes_sent += status;
+		task->tk_bytes_sent += status;
 		if (likely(req->rq_bytes_sent >= req->rq_slen)) {
 			req->rq_bytes_sent = 0;
 			return 0;
@@ -1114,6 +1115,8 @@ static void xs_tcp_connect_worker(void *
 	}
 
 	/* Tell the socket layer to start connecting... */
+	xprt->stat.connect_count++;
+	xprt->stat.connect_start = jiffies;
 	status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr,
 			sizeof(xprt->addr), O_NONBLOCK);
 	dprintk("RPC: %p  connect status %d connected %d sock state %d\n",
@@ -1177,6 +1180,50 @@ static void xs_connect(struct rpc_task *
 	}
 }
 
+/**
+ * xs_udp_print_stats - display UDP socket-specifc stats
+ * @xprt: rpc_xprt struct containing statistics
+ * @seq: output file
+ *
+ */
+static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
+{
+	seq_printf(seq, "\txprt:\tudp %u %lu %lu %lu %lu %Lu %Lu\n",
+			xprt->port,
+			xprt->stat.bind_count,
+			xprt->stat.sends,
+			xprt->stat.recvs,
+			xprt->stat.bad_xids,
+			xprt->stat.req_u,
+			xprt->stat.bklog_u);
+}
+
+/**
+ * xs_tcp_print_stats - display TCP socket-specifc stats
+ * @xprt: rpc_xprt struct containing statistics
+ * @seq: output file
+ *
+ */
+static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
+{
+	long idle_time = 0;
+
+	if (xprt_connected(xprt))
+		idle_time = (long)(jiffies - xprt->last_used) / HZ;
+
+	seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu\n",
+			xprt->port,
+			xprt->stat.bind_count,
+			xprt->stat.connect_count,
+			xprt->stat.connect_time,
+			idle_time,
+			xprt->stat.sends,
+			xprt->stat.recvs,
+			xprt->stat.bad_xids,
+			xprt->stat.req_u,
+			xprt->stat.bklog_u);
+}
+
 static struct rpc_xprt_ops xs_udp_ops = {
 	.set_buffer_size	= xs_udp_set_buffer_size,
 	.reserve_xprt		= xprt_reserve_xprt_cong,
@@ -1191,6 +1238,7 @@ static struct rpc_xprt_ops xs_udp_ops = 
 	.release_request	= xprt_release_rqst_cong,
 	.close			= xs_close,
 	.destroy		= xs_destroy,
+	.print_stats		= xs_udp_print_stats,
 };
 
 static struct rpc_xprt_ops xs_tcp_ops = {
@@ -1204,6 +1252,7 @@ static struct rpc_xprt_ops xs_tcp_ops = 
 	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
 	.close			= xs_close,
 	.destroy		= xs_destroy,
+	.print_stats		= xs_tcp_print_stats,
 };
 
 /**