GIT 61c9fcd198238730db891af69027e733a7588ab6 git://oss.sgi.com:8090/xfs/xfs-2.6.git

commit 
Author: David Chinner <dgc@sgi.com>
Date:   Thu Mar 6 13:49:43 2008 +1100

    [XFS] fix inode leak in xfs_iget_core()
    
    If the radix_tree_preload() fails, we need to destroy the inode we just
    read in before trying again. This could leak xfs_vnode structures when
    there is memory pressure. Noticed by Christoph Hellwig.
    
    SGI-PV: 977823
    SGI-Modid: xfs-linux-melb:xfs-kern:30606a
    
    Signed-off-by: David Chinner <dgc@sgi.com>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
    Signed-off-by: Christoph Hellwig <hch@infradead.org>

commit 24c281e89ecf546dd69e65b74ccb90c5e3850228
Author: Christoph Hellwig <hch@infradead.org>
Date:   Thu Mar 6 13:49:36 2008 +1100

    [XFS] Remove superflous xfs_readsb call in xfs_mountfs.
    
    When xfs_mountfs is called by xfs_mount xfs_readsb was called 35 lines
    above unconditionally, so there is no need to try to read the superblock
    if it's not present. If any other port doesn't have the superblock read at
    this point it should just call it directly from it's xfs_mount equivalent.
    
    SGI-PV: 976035
    SGI-Modid: xfs-linux-melb:xfs-kern:30603a
    
    Signed-off-by: Christoph Hellwig <hch@infradead.org>
    Signed-off-by: Donald Douwsma <donaldd@sgi.com>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit fd5150834e292efb5367314f7dc9f8ba01d4b19e
Author: Niv Sardi <xaiki@sgi.com>
Date:   Thu Mar 6 13:49:26 2008 +1100

    [XFS] kill t_sema member of struct xfs_trans
    
    It's completely unused so we might aswell kill it. Note that there is
    another t_sema in struct xlog_ticket, which is used and actually an sv_t
    despite the name. That one is left untouched by this patch.
    
    SGI-PV: 971186
    SGI-Modid: xfs-linux-melb:xfs-kern:30591a
    
    Signed-off-by: Niv Sardi <xaiki@sgi.com>
    Signed-off-by: Christoph Hellwig <hch@infradead.org>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit ce500c06bd5d242a3b0878839669296f33bf5e29
Author: Christoph Hellwig <hch@infradead.org>
Date:   Thu Mar 6 13:46:49 2008 +1100

    [XFS] cleanup vnode use in xfs_bmap.c
    
    SGI-PV: 976035
    SGI-Modid: xfs-linux-melb:xfs-kern:30553a
    
    Signed-off-by: Christoph Hellwig <hch@infradead.org>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit e1c03a89f15d98a90fd691dafba8d0a628734a77
Author: Christoph Hellwig <hch@infradead.org>
Date:   Thu Mar 6 13:46:43 2008 +1100

    [XFS] cleanup vnode use in xfs_iops.c
    
    SGI-PV: 976035
    SGI-Modid: xfs-linux-melb:xfs-kern:30552a
    
    Signed-off-by: Christoph Hellwig <hch@infradead.org>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit e57c749cb9242aa508feed64f524dd6d44eab90f
Author: Christoph Hellwig <hch@infradead.org>
Date:   Thu Mar 6 13:46:37 2008 +1100

    [XFS] cleanup vnode use in xfs_lrw.c
    
    SGI-PV: 976035
    SGI-Modid: xfs-linux-melb:xfs-kern:30551a
    
    Signed-off-by: Christoph Hellwig <hch@infradead.org>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit ce535a9f2bf4539260aeef394e88804e916ac6a9
Author: Christoph Hellwig <hch@infradead.org>
Date:   Thu Mar 6 13:46:25 2008 +1100

    [XFS] cleanup vnode use in xfs_lookup
    
    SGI-PV: 976035
    SGI-Modid: xfs-linux-melb:xfs-kern:30550a
    
    Signed-off-by: Christoph Hellwig <hch@infradead.org>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit b0481a0217985fdcfdd9e3f2254db59c4b90372c
Author: Christoph Hellwig <hch@infradead.org>
Date:   Thu Mar 6 13:46:19 2008 +1100

    [XFS] cleanup vnode use in xfs_symlink and xfs_rename
    
    SGI-PV: 976035
    SGI-Modid: xfs-linux-melb:xfs-kern:30548a
    
    Signed-off-by: Christoph Hellwig <hch@infradead.org>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit 628dc8c106b0c6202ae4627c189f7c12e194a7a2
Author: Christoph Hellwig <hch@infradead.org>
Date:   Thu Mar 6 13:46:12 2008 +1100

    [XFS] cleanup vnode use in xfs_link
    
    SGI-PV: 976035
    SGI-Modid: xfs-linux-melb:xfs-kern:30547a
    
    Signed-off-by: Christoph Hellwig <hch@infradead.org>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit f0cc891093489cf2c29e89b2677ddc6af7ab58a4
Author: Christoph Hellwig <hch@infradead.org>
Date:   Thu Mar 6 13:46:05 2008 +1100

    [XFS] cleanup vnode use in xfs_create/mknod/mkdir
    
    SGI-PV: 976035
    SGI-Modid: xfs-linux-melb:xfs-kern:30546a
    
    Signed-off-by: Christoph Hellwig <hch@infradead.org>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit 5b3334aca915e7b8bba977273ee89cdff0d6eeed
Author: Christoph Hellwig <hch@infradead.org>
Date:   Thu Mar 6 13:45:58 2008 +1100

    [XFS] cleanup vnode use in dmapi calls
    
    SGI-PV: 976035
    SGI-Modid: xfs-linux-melb:xfs-kern:30545a
    
    Signed-off-by: Christoph Hellwig <hch@infradead.org>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit f5fc8e8dd68da02295c54c99a4b4d08870e50b7a
Author: David Chinner <dgc@sgi.com>
Date:   Thu Mar 6 13:45:50 2008 +1100

    [XFS] Fix superblock features2 field alignment problem
    
    Due to the xfs_dsb_t structure not being 64 bit aligned, the last field of
    the on-disk superblock can vary in location This causes problems when the
    filesystem gets moved to a different platform, or there is a 32 bit
    userspace and 64 bit kernel.
    
    This patch detects the defect at mount time, logs a warning such as:
    
    XFS: correcting sb_features alignment problem
    
    in dmesg and corrects the problem so that everything is OK. it also
    blacklists the bad field in the superblock so it does not get used for
    something else later on.
    
    SGI-PV: 977636
    SGI-Modid: xfs-linux-melb:xfs-kern:30539a
    
    Signed-off-by: David Chinner <dgc@sgi.com>
    Signed-off-by: Christoph Hellwig <hch@infradead.org>
    Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit ef11434e04b7553372cc99322c5aedb59f8fe5be
Author: David Chinner <dgc@sgi.com>
Date:   Thu Mar 6 13:45:43 2008 +1100

    [XFS] Use power-of-2 sized buffers to reduce overhead
    
    Now that the ktrace_enter() code is using atomics, the non-power-of-2
    buffer sizes - which require modulus operations to get the index - are
    showing up as using substantial CPU in the profiles.
    
    Force the buffer sizes to be rounded up to the nearest power of two and
    use masking rather than modulus operations to convert the index counter to
    the buffer index. This reduces ktrace_enter overhead to 8% of a CPU time,
    and again almost halves the trace intensive test runtime.
    
    SGI-PV: 977546
    SGI-Modid: xfs-linux-melb:xfs-kern:30538a
    
    Signed-off-by: David Chinner <dgc@sgi.com>
    Signed-off-by: Christoph Hellwig <hch@infradead.org>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit 2d2f30e3ef953141d756e07c1d8cc0d7cc652557
Author: David Chinner <dgc@sgi.com>
Date:   Thu Mar 6 13:45:35 2008 +1100

    [XFS] Use atomic counters for ktrace buffer indexes
    
    ktrace_enter() is consuming vast amounts of CPU time due to the use of a
    single global lock for protecting buffer index increments. Change it to
    use per-buffer atomic counters - this reduces ktrace_enter() overhead
    during a trace intensive test on a 4p machine from 58% of all CPU time to
    12% and halves test runtime.
    
    SGI-PV: 977546
    SGI-Modid: xfs-linux-melb:xfs-kern:30537a
    
    Signed-off-by: David Chinner <dgc@sgi.com>
    Signed-off-by: Christoph Hellwig <hch@infradead.org>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit 170c815e88c41859c9d79c9454c92dd9dd455bbe
Author: David Chinner <dgc@sgi.com>
Date:   Thu Mar 6 13:45:29 2008 +1100

    [XFS] Update c/mtime correctly on truncates
    
    XFS changes the c/mtime of an inode when truncating it to the same size.
    The c/mtime is only supposed to change if the size is changed. Not to be
    confused with ftruncate, where the c/mtime is supposed to be changed even
    if the size is not changed.
    
    The Linux VFS encodes this semantic difference in the flags it sends down
    to ->setattr, which XFS currently ignores. We need to make XFS pay
    attention to the VFS flags and hence Do The Right Thing.
    
    SGI-PV: 977547
    SGI-Modid: xfs-linux-melb:xfs-kern:30536a
    
    Signed-off-by: David Chinner <dgc@sgi.com>
    Signed-off-by: Christoph Hellwig <hch@infradead.org>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit c657925dc0057ed2ec0db845a1a4f56651adfe39
Author: Eric Sandeen <sandeen@sandeen.net>
Date:   Thu Mar 6 13:45:22 2008 +1100

    [XFS] The forward declarations for the xfs_ioctl() helpers and the
    associated comment about gcc behavior really aren't needed; all of these
    functions are marked STATIC which includes noinline, and the stack usage
    won't be a problem.
    
    This effectively just removes the forward declarations and moves
    xfs_ioctl() back to the end of the file.
    
    SGI-PV: 971186
    SGI-Modid: xfs-linux-melb:xfs-kern:30534a
    
    Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
    Signed-off-by: Niv Sardi <xaiki@sgi.com>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit c8911a1e34a4c3d858a4a6c176175c168da91256
Author: Christoph Hellwig <hch@infradead.org>
Date:   Thu Mar 6 13:45:16 2008 +1100

    [XFS] don't encode parent in nfs filehandles unless nessecary
    
    As Dave pointed out after the export ops changes we now always encode the
    parent into the filehandle for regular files, but it's not actually needed
    when the filesystem is export with no_subtree_check. This one-liner fixes
    xfs_fs_encode_fh to skip encoding the parent unless nessecary.
    
    SGI-PV: 976035
    SGI-Modid: xfs-linux-melb:xfs-kern:30535a
    
    Signed-off-by: Christoph Hellwig <hch@infradead.org>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit 08d618d64e9f77e89b50ae55c19e29ac9af8d6d9
Author: David Chinner <dgc@sgi.com>
Date:   Thu Mar 6 13:45:10 2008 +1100

    [XFS] 977545 977545 977545 977545 977545 977545 xfsaild causing too many
    wakeups
    
    Idle state is not being detected properly by the xfsaild push code. The
    current idle state is detected by an empty list which may never happen
    with mostly idle filesystem or one using lazy superblock counters. A
    single dirty item in the list that exists beyond the push target can
    result repeated looping attempting to push up to the target because it
    fails to check if the push target has been acheived or not.
    
    Fix by considering a dirty list with everything past the target as an idle
    state and set the timeout appropriately.
    
    SGI-PV: 977545
    SGI-Modid: xfs-linux-melb:xfs-kern:30532a
    
    Signed-off-by: David Chinner <dgc@sgi.com>
    Signed-off-by: Christoph Hellwig <hch@infradead.org>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit 95a8d08938c941ec5b955b019cbe0205029b1088
Author: Christoph Hellwig <hch@infradead.org>
Date:   Thu Mar 6 13:44:57 2008 +1100

    [XFS] kill xfs_rwlock/xfs_rwunlock
    
    We can just use xfs_ilock/xfs_iunlock instead and get rid of the ugly
    bhv_vrwlock_t.
    
    SGI-PV: 976035
    SGI-Modid: xfs-linux-melb:xfs-kern:30533a
    
    Signed-off-by: Christoph Hellwig <hch@infradead.org>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit ce1824a8ee563c36474b3346e3f5898229f5a5df
Author: Christoph Hellwig <hch@infradead.org>
Date:   Thu Mar 6 13:44:50 2008 +1100

    [XFS] kill xfs_get_dir_entry
    
    Instead of of xfs_get_dir_entry use a macro to get the xfs_inode from the
    dentry in the callers and grab the reference manually.
    
    Only grab the reference once as it's fine to keep it over the dmapi calls.
    (And even that reference is actually superflous in Linux but I'll leave
    that for another patch)
    
    SGI-PV: 976035
    SGI-Modid: xfs-linux-melb:xfs-kern:30531a
    
    Signed-off-by: Christoph Hellwig <hch@infradead.org>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit 265eef775f891d5a4114fc888e4e99a6f9957819
Author: Christoph Hellwig <hch@infradead.org>
Date:   Thu Mar 6 13:44:41 2008 +1100

    [XFS] vnode cleanup in xfs_fs_subr.c
    
    Cleanup the unneeded intermediate vnode step in the flushing helpers and
    go directly from the xfs_inode to the struct address_space.
    
    SGI-PV: 976035
    SGI-Modid: xfs-linux-melb:xfs-kern:30530a
    
    Signed-off-by: Christoph Hellwig <hch@infradead.org>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit 2eecc359daed82612ff72b0dbe51407fa2018d9b
Author: Christoph Hellwig <hch@infradead.org>
Date:   Thu Mar 6 13:44:35 2008 +1100

    [XFS] cleanup xfs_vn_mknod
    
    - use proper goto based unwinding instead of the current mess of
    
    multiple conditionals
    
    - rename ip to inode because that's the normal convention for Linux
    
    inodes while ip is the convention for xfs_inodes
    
    - remove unlikely checks for the default_acl - branches marked unlikely
    
    might lead to extreme branch bredictor slowdons if taken and for some
    
    workloads a default acl is quite common
    
    - properly indent the switch statements
    
    - remove xfs_has_fs_struct as nfsd has a fs_struct in any semi-recent
    
    kernel
    
    SGI-PV: 976035
    SGI-Modid: xfs-linux-melb:xfs-kern:30529a
    
    Signed-off-by: Christoph Hellwig <hch@infradead.org>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit 50ed1cf6950b5787d7cad811055b7fe2bf47de01
Author: Eric Sandeen <sandeen@sandeen.net>
Date:   Thu Mar 6 13:44:28 2008 +1100

    [XFS] remove shouting-indirection macros from xfs_sb.h
    
    Remove macro-to-small-function indirection from xfs_sb.h, and remove some
    which are completely unused.
    
    SGI-PV: 976035
    SGI-Modid: xfs-linux-melb:xfs-kern:30528a
    
    Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
    Signed-off-by: Donald Douwsma <donaldd@sgi.com>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit 04f7551c9dc0e8c7cf98f3b31e3121fa427d96ff
Author: David Chinner <dgc@sgi.com>
Date:   Thu Mar 6 13:44:14 2008 +1100

    [XFS] Use atomics for iclog reference counting
    
    Now that we update the log tail LSN less frequently on transaction
    completion, we pass the contention straight to the global log state lock
    (l_iclog_lock) during transaction completion.
    
    We currently have to take this lock to decrement the iclog reference
    count. there is a reference count on each iclog, so we need to take þhe
    global lock for all refcount changes.
    
    When large numbers of processes are all doing small trnasctions, the iclog
    reference counts will be quite high, and the state change that absolutely
    requires the l_iclog_lock is the except rather than the norm.
    
    Change the reference counting on the iclogs to use atomic_inc/dec so that
    we can use atomic_dec_and_lock during transaction completion and avoid the
    need for grabbing the l_iclog_lock for every reference count decrement
    except the one that matters - the last.
    
    SGI-PV: 975671
    SGI-Modid: xfs-linux-melb:xfs-kern:30505a
    
    Signed-off-by: David Chinner <dgc@sgi.com>
    Signed-off-by: Tim Shimmin <tes@sgi.com>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit f81d5fa628d0891b128fe9ddb6593e25b3f78ff3
Author: David Chinner <dgc@sgi.com>
Date:   Thu Mar 6 13:44:06 2008 +1100

    [XFS] Prevent AIL lock contention during transaction completion
    
    When hundreds of processors attempt to commit transactions at the same
    time, they can contend on the AIL lock when updating the tail LSN held in
    the in-core log structure.
    
    At the moment, the tail LSN is only needed when actually writing out an
    iclog, so it really does not need to be updated on every single
    transaction completion - only those that result in switching iclogs and
    flushing them to disk.
    
    The result is that we reduce the number of times we need to grab the AIL
    lock and the log grant lock by up to two orders of magnitude on large
    processor count machines. The problem has previously been hidden by AIL
    lock contention walking the AIL list which was recently solved and
    uncovered this issue.
    
    SGI-PV: 975671
    SGI-Modid: xfs-linux-melb:xfs-kern:30504a
    
    Signed-off-by: David Chinner <dgc@sgi.com>
    Signed-off-by: Tim Shimmin <tes@sgi.com>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit 73fd8efbc7f407291a5136ba2c945b48b816244c
Author: David Chinner <dgc@sgi.com>
Date:   Thu Mar 6 13:43:59 2008 +1100

    [XFS] Use xfs_inode_clean() in more places
    
    Remove open coded checks for the whether the inode is clean and replace
    them with an inlined function.
    
    SGI-PV: 977461
    SGI-Modid: xfs-linux-melb:xfs-kern:30503a
    
    Signed-off-by: David Chinner <dgc@sgi.com>
    Signed-off-by: Christoph Hellwig <hch@infradead.org>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit 0b63ab688acec9b55b59efbc67987f245074c2a6
Author: David Chinner <dgc@sgi.com>
Date:   Thu Mar 6 13:43:49 2008 +1100

    [XFS] Remove the xfs_icluster structure
    
    Remove the xfs_icluster structure and replace with a radix tree lookup.
    
    We don't need to keep a list of inodes in each cluster around anymore as
    we can look them up quickly when we need to. The only time we need to do
    this now is during inode writeback.
    
    Factor the inode cluster writeback code out of xfs_iflush and convert it
    to use radix_tree_gang_lookup() instead of walking a list of inodes built
    when we first read in the inodes.
    
    This remove 3 pointers from each xfs_inode structure and the xfs_icluster
    structure per inode cluster. Hence we reduce the cache footprint of the
    xfs_inodes by between 5-10% depending on cluster sparseness.
    
    To be truly efficient we need a radix_tree_gang_lookup_range() call to
    stop searching once we are past the end of the cluster instead of trying
    to find a full cluster's worth of inodes.
    
    Before (ia64):
    
    $ cat /sys/slab/xfs_inode/object_size 536
    
    After:
    
    $ cat /sys/slab/xfs_inode/object_size 512
    
    SGI-PV: 977460
    SGI-Modid: xfs-linux-melb:xfs-kern:30502a
    
    Signed-off-by: David Chinner <dgc@sgi.com>
    Signed-off-by: Christoph Hellwig <hch@infradead.org>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit 1d53d23f1fd43baee04418a20dc4fa13e620a582
Author: David Chinner <dgc@sgi.com>
Date:   Thu Mar 6 13:43:42 2008 +1100

    [XFS] Don't block pdflush when writing back inodes
    
    When pdflush is writing back inodes, it can get stuck on inode cluster
    buffers that are currently under I/O. This occurs when we write data to
    multiple inodes in the same inode cluster at the same time.
    
    Effectively, delayed allocation marks the inode dirty during the data
    writeback. Hence if the inode cluster was flushed during the writeback of
    the first inode, the writeback of the second inode will block waiting for
    the inode cluster write to complete before writing it again for the newly
    dirtied inode.
    
    Basically, we want to avoid this from happening so we don't block pdflush
    and slow down all of writeback. Hence we introduce a non-blocking async
    inode flush flag that pdflush uses. If this flag is set, we use
    non-blocking operations (e.g. try locks) whereever we can to avoid
    blocking or extra I/O being issued.
    
    SGI-PV: 970925
    SGI-Modid: xfs-linux-melb:xfs-kern:30501a
    
    Signed-off-by: David Chinner <dgc@sgi.com>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit c228466268bc28bac1bb59306c0c3b5515e308f9
Author: David Chinner <dgc@sgi.com>
Date:   Thu Mar 6 13:43:34 2008 +1100

    [XFS] Factor xfs_itobp() and xfs_inotobp().
    
    The only difference between the functions is one passes an inode for the
    lookup, the other passes an inode number. However, they don't do the same
    validity checking or set all the same state on the buffer that is returned
    yet they should.
    
    Factor the functions into a common implementation.
    
    SGI-PV: 970925
    SGI-Modid: xfs-linux-melb:xfs-kern:30500a
    
    Signed-off-by: David Chinner <dgc@sgi.com>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit ccc6812e4da9c0a9d45f9c21a45dc9a24a598845
Author: Lachlan McIlroy <lachlan@sgi.com>
Date:   Thu Mar 6 13:43:27 2008 +1100

    [XFS] Fix regression due to refcache removal
    
    SGI-PV: 971186
    SGI-Modid: xfs-linux-melb:xfs-kern:30490a
    
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
    Signed-off-by: Donald Douwsma <donaldd@sgi.com>

commit 53c1cb8d6852aa34a09b14241944a7806ab8f341
Author: Donald Douwsma <donaldd@sgi.com>
Date:   Thu Mar 6 13:43:20 2008 +1100

    [XFS] Remove the xfs_refcache, it was only needed while we were still
    building for 2.4 kernels.
    
    SGI-PV: 971186
    SGI-Modid: xfs-linux-melb:xfs-kern:30472a
    
    Signed-off-by: Donald Douwsma <donaldd@sgi.com>
    Signed-off-by: Christoph Hellwig <hch@infradead.org>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>

commit a818f7ddf92088d9edf5fdc1f846c7c0147c6337
Author: Lachlan McIlroy <lachlan@sgi.com>
Date:   Thu Mar 6 13:43:11 2008 +1100

    [XFS] make inode reclaim synchronise with xfs_iflush_done()
    
    On a forced shutdown, xfs_finish_reclaim() will skip flushing the inode.
    If the inode flush lock is not already held and there is an outstanding
    xfs_iflush_done() then we might free the inode prematurely. By acquiring
    and releasing the flush lock we will synchronise with xfs_iflush_done().
    
    SGI-PV: 909874
    SGI-Modid: xfs-linux-melb:xfs-kern:30468a
    
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
    Signed-off-by: David Chinner <dgc@sgi.com>

commit e999831223874108192b9b72fff329af3934c821
Author: Niv Sardi <xaiki@sgi.com>
Date:   Thu Mar 6 13:43:03 2008 +1100

    [XFS] actually check error returned by xfs_flush_pages, clean up and
    bailout if fails.
    
    SGI-PV: 973041
    SGI-Modid: xfs-linux-melb:xfs-kern:30462a
    
    Signed-off-by: Niv Sardi <xaiki@sgi.com>
    Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
 fs/xfs/linux-2.6/xfs_aops.c    |    4 +-
 fs/xfs/linux-2.6/xfs_export.c  |   11 +-
 fs/xfs/linux-2.6/xfs_file.c    |   13 +-
 fs/xfs/linux-2.6/xfs_fs_subr.c |   36 +--
 fs/xfs/linux-2.6/xfs_ioctl.c   |  562 +++++++++++++++-------------------
 fs/xfs/linux-2.6/xfs_iops.c    |  180 ++++++------
 fs/xfs/linux-2.6/xfs_linux.h   |    1 -
 fs/xfs/linux-2.6/xfs_lrw.c     |   53 ++--
 fs/xfs/linux-2.6/xfs_super.c   |    3 +-
 fs/xfs/linux-2.6/xfs_vnode.h   |   19 +-
 fs/xfs/quota/xfs_qm.c          |    6 +-
 fs/xfs/quota/xfs_qm_bhv.c      |    2 +-
 fs/xfs/quota/xfs_qm_syscalls.c |    4 +-
 fs/xfs/support/ktrace.c        |   37 ++--
 fs/xfs/support/ktrace.h        |    3 +-
 fs/xfs/xfs_attr_leaf.c         |    6 +-
 fs/xfs/xfs_bmap.c              |   26 +-
 fs/xfs/xfs_bmap_btree.h        |    2 +-
 fs/xfs/xfs_dir2.c              |    2 +-
 fs/xfs/xfs_fsops.c             |   24 +-
 fs/xfs/xfs_ialloc.c            |    6 +-
 fs/xfs/xfs_iget.c              |   49 +---
 fs/xfs/xfs_inode.c             |  661 ++++++++++++++++++++--------------------
 fs/xfs/xfs_inode.h             |   23 +--
 fs/xfs/xfs_inode_item.c        |    4 +-
 fs/xfs/xfs_inode_item.h        |    8 +
 fs/xfs/xfs_itable.c            |    5 +-
 fs/xfs/xfs_log.c               |   61 ++--
 fs/xfs/xfs_log_priv.h          |    6 +-
 fs/xfs/xfs_log_recover.c       |   19 +-
 fs/xfs/xfs_mount.c             |   49 ++-
 fs/xfs/xfs_mount.h             |   24 +-
 fs/xfs/xfs_rename.c            |   45 +--
 fs/xfs/xfs_sb.h                |  106 +++----
 fs/xfs/xfs_trans.h             |    1 -
 fs/xfs/xfs_trans_buf.c         |    3 +-
 fs/xfs/xfs_utils.c             |   28 +--
 fs/xfs/xfs_utils.h             |    1 -
 fs/xfs/xfs_vfsops.c            |   38 +--
 fs/xfs/xfs_vnodeops.c          |  328 ++++++---------------
 fs/xfs/xfs_vnodeops.h          |   14 +-
 41 files changed, 1042 insertions(+), 1431 deletions(-)

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index e051952..169e6c0 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1532,9 +1532,9 @@ xfs_vm_bmap(
 	struct xfs_inode	*ip = XFS_I(inode);
 
 	xfs_itrace_entry(XFS_I(inode));
-	xfs_rwlock(ip, VRWLOCK_READ);
+	xfs_ilock(ip, XFS_IOLOCK_SHARED);
 	xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF);
-	xfs_rwunlock(ip, VRWLOCK_READ);
+	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 	return generic_block_bmap(mapping, block, xfs_get_blocks);
 }
 
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index ca4f66c..66a9a9e 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -66,7 +66,7 @@ xfs_fs_encode_fh(
 	int			len;
 
 	/* Directories don't need their parent encoded, they have ".." */
-	if (S_ISDIR(inode->i_mode))
+	if (S_ISDIR(inode->i_mode) || !connectable)
 		fileid_type = FILEID_INO32_GEN;
 	else
 		fileid_type = FILEID_INO32_GEN_PARENT;
@@ -213,17 +213,16 @@ xfs_fs_get_parent(
 	struct dentry		*child)
 {
 	int			error;
-	bhv_vnode_t		*cvp;
+	struct xfs_inode	*cip;
 	struct dentry		*parent;
 
-	cvp = NULL;
-	error = xfs_lookup(XFS_I(child->d_inode), &dotdot, &cvp);
+	error = xfs_lookup(XFS_I(child->d_inode), &dotdot, &cip);
 	if (unlikely(error))
 		return ERR_PTR(-error);
 
-	parent = d_alloc_anon(vn_to_inode(cvp));
+	parent = d_alloc_anon(cip->i_vnode);
 	if (unlikely(!parent)) {
-		VN_RELE(cvp);
+		iput(cip->i_vnode);
 		return ERR_PTR(-ENOMEM);
 	}
 	return parent;
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index edab1ff..0590524 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -469,16 +469,11 @@ xfs_file_open_exec(
 	struct inode	*inode)
 {
 	struct xfs_mount *mp = XFS_M(inode->i_sb);
+	struct xfs_inode *ip = XFS_I(inode);
 
-	if (unlikely(mp->m_flags & XFS_MOUNT_DMAPI)) {
-		if (DM_EVENT_ENABLED(XFS_I(inode), DM_EVENT_READ)) {
-			bhv_vnode_t *vp = vn_from_inode(inode);
-
-			return -XFS_SEND_DATA(mp, DM_EVENT_READ,
-						vp, 0, 0, 0, NULL);
-		}
-	}
-
+	if (unlikely(mp->m_flags & XFS_MOUNT_DMAPI) &&
+	             DM_EVENT_ENABLED(ip, DM_EVENT_READ))
+		return -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL);
 	return 0;
 }
 #endif /* HAVE_FOP_OPEN_EXEC */
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index ac6d34c..1eefe61 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -17,18 +17,7 @@
  */
 #include "xfs.h"
 #include "xfs_vnodeops.h"
-
-/*
- * The following six includes are needed so that we can include
- * xfs_inode.h.  What a mess..
- */
 #include "xfs_bmap_btree.h"
-#include "xfs_inum.h"
-#include "xfs_dir2.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
-
 #include "xfs_inode.h"
 
 int  fs_noerr(void) { return 0; }
@@ -42,11 +31,10 @@ xfs_tosspages(
 	xfs_off_t	last,
 	int		fiopt)
 {
-	bhv_vnode_t	*vp = XFS_ITOV(ip);
-	struct inode	*inode = vn_to_inode(vp);
+	struct address_space *mapping = ip->i_vnode->i_mapping;
 
-	if (VN_CACHED(vp))
-		truncate_inode_pages(inode->i_mapping, first);
+	if (mapping->nrpages)
+		truncate_inode_pages(mapping, first);
 }
 
 int
@@ -56,15 +44,14 @@ xfs_flushinval_pages(
 	xfs_off_t	last,
 	int		fiopt)
 {
-	bhv_vnode_t	*vp = XFS_ITOV(ip);
-	struct inode	*inode = vn_to_inode(vp);
+	struct address_space *mapping = ip->i_vnode->i_mapping;
 	int		ret = 0;
 
-	if (VN_CACHED(vp)) {
+	if (mapping->nrpages) {
 		xfs_iflags_clear(ip, XFS_ITRUNCATED);
-		ret = filemap_write_and_wait(inode->i_mapping);
+		ret = filemap_write_and_wait(mapping);
 		if (!ret)
-			truncate_inode_pages(inode->i_mapping, first);
+			truncate_inode_pages(mapping, first);
 	}
 	return ret;
 }
@@ -77,17 +64,16 @@ xfs_flush_pages(
 	uint64_t	flags,
 	int		fiopt)
 {
-	bhv_vnode_t	*vp = XFS_ITOV(ip);
-	struct inode	*inode = vn_to_inode(vp);
+	struct address_space *mapping = ip->i_vnode->i_mapping;
 	int		ret = 0;
 	int		ret2;
 
-	if (VN_DIRTY(vp)) {
+	if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
 		xfs_iflags_clear(ip, XFS_ITRUNCATED);
-		ret = filemap_fdatawrite(inode->i_mapping);
+		ret = filemap_fdatawrite(mapping);
 		if (flags & XFS_B_ASYNC)
 			return ret;
-		ret2 = filemap_fdatawait(inode->i_mapping);
+		ret2 = filemap_fdatawait(mapping);
 		if (!ret)
 			ret = ret2;
 	}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index a9952e4..7252963 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -651,314 +651,6 @@ xfs_attrmulti_by_handle(
 	return -error;
 }
 
-/* prototypes for a few of the stack-hungry cases that have
- * their own functions.  Functions are defined after their use
- * so gcc doesn't get fancy and inline them with -03 */
-
-STATIC int
-xfs_ioc_space(
-	struct xfs_inode	*ip,
-	struct inode		*inode,
-	struct file		*filp,
-	int			flags,
-	unsigned int		cmd,
-	void			__user *arg);
-
-STATIC int
-xfs_ioc_bulkstat(
-	xfs_mount_t		*mp,
-	unsigned int		cmd,
-	void			__user *arg);
-
-STATIC int
-xfs_ioc_fsgeometry_v1(
-	xfs_mount_t		*mp,
-	void			__user *arg);
-
-STATIC int
-xfs_ioc_fsgeometry(
-	xfs_mount_t		*mp,
-	void			__user *arg);
-
-STATIC int
-xfs_ioc_xattr(
-	xfs_inode_t		*ip,
-	struct file		*filp,
-	unsigned int		cmd,
-	void			__user *arg);
-
-STATIC int
-xfs_ioc_fsgetxattr(
-	xfs_inode_t		*ip,
-	int			attr,
-	void			__user *arg);
-
-STATIC int
-xfs_ioc_getbmap(
-	struct xfs_inode	*ip,
-	int			flags,
-	unsigned int		cmd,
-	void			__user *arg);
-
-STATIC int
-xfs_ioc_getbmapx(
-	struct xfs_inode	*ip,
-	void			__user *arg);
-
-int
-xfs_ioctl(
-	xfs_inode_t		*ip,
-	struct file		*filp,
-	int			ioflags,
-	unsigned int		cmd,
-	void			__user *arg)
-{
-	struct inode		*inode = filp->f_path.dentry->d_inode;
-	xfs_mount_t		*mp = ip->i_mount;
-	int			error;
-
-	xfs_itrace_entry(XFS_I(inode));
-	switch (cmd) {
-
-	case XFS_IOC_ALLOCSP:
-	case XFS_IOC_FREESP:
-	case XFS_IOC_RESVSP:
-	case XFS_IOC_UNRESVSP:
-	case XFS_IOC_ALLOCSP64:
-	case XFS_IOC_FREESP64:
-	case XFS_IOC_RESVSP64:
-	case XFS_IOC_UNRESVSP64:
-		/*
-		 * Only allow the sys admin to reserve space unless
-		 * unwritten extents are enabled.
-		 */
-		if (!XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb) &&
-		    !capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		return xfs_ioc_space(ip, inode, filp, ioflags, cmd, arg);
-
-	case XFS_IOC_DIOINFO: {
-		struct dioattr	da;
-		xfs_buftarg_t	*target =
-			XFS_IS_REALTIME_INODE(ip) ?
-			mp->m_rtdev_targp : mp->m_ddev_targp;
-
-		da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
-		da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
-
-		if (copy_to_user(arg, &da, sizeof(da)))
-			return -XFS_ERROR(EFAULT);
-		return 0;
-	}
-
-	case XFS_IOC_FSBULKSTAT_SINGLE:
-	case XFS_IOC_FSBULKSTAT:
-	case XFS_IOC_FSINUMBERS:
-		return xfs_ioc_bulkstat(mp, cmd, arg);
-
-	case XFS_IOC_FSGEOMETRY_V1:
-		return xfs_ioc_fsgeometry_v1(mp, arg);
-
-	case XFS_IOC_FSGEOMETRY:
-		return xfs_ioc_fsgeometry(mp, arg);
-
-	case XFS_IOC_GETVERSION:
-		return put_user(inode->i_generation, (int __user *)arg);
-
-	case XFS_IOC_FSGETXATTR:
-		return xfs_ioc_fsgetxattr(ip, 0, arg);
-	case XFS_IOC_FSGETXATTRA:
-		return xfs_ioc_fsgetxattr(ip, 1, arg);
-	case XFS_IOC_GETXFLAGS:
-	case XFS_IOC_SETXFLAGS:
-	case XFS_IOC_FSSETXATTR:
-		return xfs_ioc_xattr(ip, filp, cmd, arg);
-
-	case XFS_IOC_FSSETDM: {
-		struct fsdmidata	dmi;
-
-		if (copy_from_user(&dmi, arg, sizeof(dmi)))
-			return -XFS_ERROR(EFAULT);
-
-		error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
-				dmi.fsd_dmstate);
-		return -error;
-	}
-
-	case XFS_IOC_GETBMAP:
-	case XFS_IOC_GETBMAPA:
-		return xfs_ioc_getbmap(ip, ioflags, cmd, arg);
-
-	case XFS_IOC_GETBMAPX:
-		return xfs_ioc_getbmapx(ip, arg);
-
-	case XFS_IOC_FD_TO_HANDLE:
-	case XFS_IOC_PATH_TO_HANDLE:
-	case XFS_IOC_PATH_TO_FSHANDLE:
-		return xfs_find_handle(cmd, arg);
-
-	case XFS_IOC_OPEN_BY_HANDLE:
-		return xfs_open_by_handle(mp, arg, filp, inode);
-
-	case XFS_IOC_FSSETDM_BY_HANDLE:
-		return xfs_fssetdm_by_handle(mp, arg, inode);
-
-	case XFS_IOC_READLINK_BY_HANDLE:
-		return xfs_readlink_by_handle(mp, arg, inode);
-
-	case XFS_IOC_ATTRLIST_BY_HANDLE:
-		return xfs_attrlist_by_handle(mp, arg, inode);
-
-	case XFS_IOC_ATTRMULTI_BY_HANDLE:
-		return xfs_attrmulti_by_handle(mp, arg, inode);
-
-	case XFS_IOC_SWAPEXT: {
-		error = xfs_swapext((struct xfs_swapext __user *)arg);
-		return -error;
-	}
-
-	case XFS_IOC_FSCOUNTS: {
-		xfs_fsop_counts_t out;
-
-		error = xfs_fs_counts(mp, &out);
-		if (error)
-			return -error;
-
-		if (copy_to_user(arg, &out, sizeof(out)))
-			return -XFS_ERROR(EFAULT);
-		return 0;
-	}
-
-	case XFS_IOC_SET_RESBLKS: {
-		xfs_fsop_resblks_t inout;
-		__uint64_t	   in;
-
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		if (copy_from_user(&inout, arg, sizeof(inout)))
-			return -XFS_ERROR(EFAULT);
-
-		/* input parameter is passed in resblks field of structure */
-		in = inout.resblks;
-		error = xfs_reserve_blocks(mp, &in, &inout);
-		if (error)
-			return -error;
-
-		if (copy_to_user(arg, &inout, sizeof(inout)))
-			return -XFS_ERROR(EFAULT);
-		return 0;
-	}
-
-	case XFS_IOC_GET_RESBLKS: {
-		xfs_fsop_resblks_t out;
-
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		error = xfs_reserve_blocks(mp, NULL, &out);
-		if (error)
-			return -error;
-
-		if (copy_to_user(arg, &out, sizeof(out)))
-			return -XFS_ERROR(EFAULT);
-
-		return 0;
-	}
-
-	case XFS_IOC_FSGROWFSDATA: {
-		xfs_growfs_data_t in;
-
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		if (copy_from_user(&in, arg, sizeof(in)))
-			return -XFS_ERROR(EFAULT);
-
-		error = xfs_growfs_data(mp, &in);
-		return -error;
-	}
-
-	case XFS_IOC_FSGROWFSLOG: {
-		xfs_growfs_log_t in;
-
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		if (copy_from_user(&in, arg, sizeof(in)))
-			return -XFS_ERROR(EFAULT);
-
-		error = xfs_growfs_log(mp, &in);
-		return -error;
-	}
-
-	case XFS_IOC_FSGROWFSRT: {
-		xfs_growfs_rt_t in;
-
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		if (copy_from_user(&in, arg, sizeof(in)))
-			return -XFS_ERROR(EFAULT);
-
-		error = xfs_growfs_rt(mp, &in);
-		return -error;
-	}
-
-	case XFS_IOC_FREEZE:
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		if (inode->i_sb->s_frozen == SB_UNFROZEN)
-			freeze_bdev(inode->i_sb->s_bdev);
-		return 0;
-
-	case XFS_IOC_THAW:
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-		if (inode->i_sb->s_frozen != SB_UNFROZEN)
-			thaw_bdev(inode->i_sb->s_bdev, inode->i_sb);
-		return 0;
-
-	case XFS_IOC_GOINGDOWN: {
-		__uint32_t in;
-
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		if (get_user(in, (__uint32_t __user *)arg))
-			return -XFS_ERROR(EFAULT);
-
-		error = xfs_fs_goingdown(mp, in);
-		return -error;
-	}
-
-	case XFS_IOC_ERROR_INJECTION: {
-		xfs_error_injection_t in;
-
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		if (copy_from_user(&in, arg, sizeof(in)))
-			return -XFS_ERROR(EFAULT);
-
-		error = xfs_errortag_add(in.errtag, mp);
-		return -error;
-	}
-
-	case XFS_IOC_ERROR_CLEARALL:
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		error = xfs_errortag_clearall(mp, 1);
-		return -error;
-
-	default:
-		return -ENOTTY;
-	}
-}
-
 STATIC int
 xfs_ioc_space(
 	struct xfs_inode	*ip,
@@ -1332,3 +1024,257 @@ xfs_ioc_getbmapx(
 
 	return 0;
 }
+
+int
+xfs_ioctl(
+	xfs_inode_t		*ip,
+	struct file		*filp,
+	int			ioflags,
+	unsigned int		cmd,
+	void			__user *arg)
+{
+	struct inode		*inode = filp->f_path.dentry->d_inode;
+	xfs_mount_t		*mp = ip->i_mount;
+	int			error;
+
+	xfs_itrace_entry(XFS_I(inode));
+	switch (cmd) {
+
+	case XFS_IOC_ALLOCSP:
+	case XFS_IOC_FREESP:
+	case XFS_IOC_RESVSP:
+	case XFS_IOC_UNRESVSP:
+	case XFS_IOC_ALLOCSP64:
+	case XFS_IOC_FREESP64:
+	case XFS_IOC_RESVSP64:
+	case XFS_IOC_UNRESVSP64:
+		/*
+		 * Only allow the sys admin to reserve space unless
+		 * unwritten extents are enabled.
+		 */
+		if (!xfs_sb_version_hasextflgbit(&mp->m_sb) &&
+		    !capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		return xfs_ioc_space(ip, inode, filp, ioflags, cmd, arg);
+
+	case XFS_IOC_DIOINFO: {
+		struct dioattr	da;
+		xfs_buftarg_t	*target =
+			XFS_IS_REALTIME_INODE(ip) ?
+			mp->m_rtdev_targp : mp->m_ddev_targp;
+
+		da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
+		da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
+
+		if (copy_to_user(arg, &da, sizeof(da)))
+			return -XFS_ERROR(EFAULT);
+		return 0;
+	}
+
+	case XFS_IOC_FSBULKSTAT_SINGLE:
+	case XFS_IOC_FSBULKSTAT:
+	case XFS_IOC_FSINUMBERS:
+		return xfs_ioc_bulkstat(mp, cmd, arg);
+
+	case XFS_IOC_FSGEOMETRY_V1:
+		return xfs_ioc_fsgeometry_v1(mp, arg);
+
+	case XFS_IOC_FSGEOMETRY:
+		return xfs_ioc_fsgeometry(mp, arg);
+
+	case XFS_IOC_GETVERSION:
+		return put_user(inode->i_generation, (int __user *)arg);
+
+	case XFS_IOC_FSGETXATTR:
+		return xfs_ioc_fsgetxattr(ip, 0, arg);
+	case XFS_IOC_FSGETXATTRA:
+		return xfs_ioc_fsgetxattr(ip, 1, arg);
+	case XFS_IOC_GETXFLAGS:
+	case XFS_IOC_SETXFLAGS:
+	case XFS_IOC_FSSETXATTR:
+		return xfs_ioc_xattr(ip, filp, cmd, arg);
+
+	case XFS_IOC_FSSETDM: {
+		struct fsdmidata	dmi;
+
+		if (copy_from_user(&dmi, arg, sizeof(dmi)))
+			return -XFS_ERROR(EFAULT);
+
+		error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
+				dmi.fsd_dmstate);
+		return -error;
+	}
+
+	case XFS_IOC_GETBMAP:
+	case XFS_IOC_GETBMAPA:
+		return xfs_ioc_getbmap(ip, ioflags, cmd, arg);
+
+	case XFS_IOC_GETBMAPX:
+		return xfs_ioc_getbmapx(ip, arg);
+
+	case XFS_IOC_FD_TO_HANDLE:
+	case XFS_IOC_PATH_TO_HANDLE:
+	case XFS_IOC_PATH_TO_FSHANDLE:
+		return xfs_find_handle(cmd, arg);
+
+	case XFS_IOC_OPEN_BY_HANDLE:
+		return xfs_open_by_handle(mp, arg, filp, inode);
+
+	case XFS_IOC_FSSETDM_BY_HANDLE:
+		return xfs_fssetdm_by_handle(mp, arg, inode);
+
+	case XFS_IOC_READLINK_BY_HANDLE:
+		return xfs_readlink_by_handle(mp, arg, inode);
+
+	case XFS_IOC_ATTRLIST_BY_HANDLE:
+		return xfs_attrlist_by_handle(mp, arg, inode);
+
+	case XFS_IOC_ATTRMULTI_BY_HANDLE:
+		return xfs_attrmulti_by_handle(mp, arg, inode);
+
+	case XFS_IOC_SWAPEXT: {
+		error = xfs_swapext((struct xfs_swapext __user *)arg);
+		return -error;
+	}
+
+	case XFS_IOC_FSCOUNTS: {
+		xfs_fsop_counts_t out;
+
+		error = xfs_fs_counts(mp, &out);
+		if (error)
+			return -error;
+
+		if (copy_to_user(arg, &out, sizeof(out)))
+			return -XFS_ERROR(EFAULT);
+		return 0;
+	}
+
+	case XFS_IOC_SET_RESBLKS: {
+		xfs_fsop_resblks_t inout;
+		__uint64_t	   in;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (copy_from_user(&inout, arg, sizeof(inout)))
+			return -XFS_ERROR(EFAULT);
+
+		/* input parameter is passed in resblks field of structure */
+		in = inout.resblks;
+		error = xfs_reserve_blocks(mp, &in, &inout);
+		if (error)
+			return -error;
+
+		if (copy_to_user(arg, &inout, sizeof(inout)))
+			return -XFS_ERROR(EFAULT);
+		return 0;
+	}
+
+	case XFS_IOC_GET_RESBLKS: {
+		xfs_fsop_resblks_t out;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		error = xfs_reserve_blocks(mp, NULL, &out);
+		if (error)
+			return -error;
+
+		if (copy_to_user(arg, &out, sizeof(out)))
+			return -XFS_ERROR(EFAULT);
+
+		return 0;
+	}
+
+	case XFS_IOC_FSGROWFSDATA: {
+		xfs_growfs_data_t in;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (copy_from_user(&in, arg, sizeof(in)))
+			return -XFS_ERROR(EFAULT);
+
+		error = xfs_growfs_data(mp, &in);
+		return -error;
+	}
+
+	case XFS_IOC_FSGROWFSLOG: {
+		xfs_growfs_log_t in;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (copy_from_user(&in, arg, sizeof(in)))
+			return -XFS_ERROR(EFAULT);
+
+		error = xfs_growfs_log(mp, &in);
+		return -error;
+	}
+
+	case XFS_IOC_FSGROWFSRT: {
+		xfs_growfs_rt_t in;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (copy_from_user(&in, arg, sizeof(in)))
+			return -XFS_ERROR(EFAULT);
+
+		error = xfs_growfs_rt(mp, &in);
+		return -error;
+	}
+
+	case XFS_IOC_FREEZE:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (inode->i_sb->s_frozen == SB_UNFROZEN)
+			freeze_bdev(inode->i_sb->s_bdev);
+		return 0;
+
+	case XFS_IOC_THAW:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		if (inode->i_sb->s_frozen != SB_UNFROZEN)
+			thaw_bdev(inode->i_sb->s_bdev, inode->i_sb);
+		return 0;
+
+	case XFS_IOC_GOINGDOWN: {
+		__uint32_t in;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (get_user(in, (__uint32_t __user *)arg))
+			return -XFS_ERROR(EFAULT);
+
+		error = xfs_fs_goingdown(mp, in);
+		return -error;
+	}
+
+	case XFS_IOC_ERROR_INJECTION: {
+		xfs_error_injection_t in;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (copy_from_user(&in, arg, sizeof(in)))
+			return -XFS_ERROR(EFAULT);
+
+		error = xfs_errortag_add(in.errtag, mp);
+		return -error;
+	}
+
+	case XFS_IOC_ERROR_CLEARALL:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		error = xfs_errortag_clearall(mp, 1);
+		return -error;
+
+	default:
+		return -ENOTTY;
+	}
+}
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index cc4abd3..53f8feb 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -62,12 +62,11 @@ void
 xfs_synchronize_atime(
 	xfs_inode_t	*ip)
 {
-	bhv_vnode_t	*vp;
+	struct inode	*inode = ip->i_vnode;
 
-	vp = XFS_ITOV_NULL(ip);
-	if (vp) {
-		ip->i_d.di_atime.t_sec = (__int32_t)vp->i_atime.tv_sec;
-		ip->i_d.di_atime.t_nsec = (__int32_t)vp->i_atime.tv_nsec;
+	if (inode) {
+		ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
+		ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
 	}
 }
 
@@ -80,11 +79,10 @@ void
 xfs_mark_inode_dirty_sync(
 	xfs_inode_t	*ip)
 {
-	bhv_vnode_t	*vp;
+	struct inode	*inode = ip->i_vnode;
 
-	vp = XFS_ITOV_NULL(ip);
-	if (vp)
-		mark_inode_dirty_sync(vn_to_inode(vp));
+	if (inode)
+		mark_inode_dirty_sync(inode);
 }
 
 /*
@@ -215,48 +213,36 @@ xfs_validate_fields(
  */
 STATIC int
 xfs_init_security(
-	bhv_vnode_t	*vp,
+	struct inode	*inode,
 	struct inode	*dir)
 {
-	struct inode	*ip = vn_to_inode(vp);
+	struct xfs_inode *ip = XFS_I(inode);
 	size_t		length;
 	void		*value;
 	char		*name;
 	int		error;
 
-	error = security_inode_init_security(ip, dir, &name, &value, &length);
+	error = security_inode_init_security(inode, dir, &name,
+					     &value, &length);
 	if (error) {
 		if (error == -EOPNOTSUPP)
 			return 0;
 		return -error;
 	}
 
-	error = xfs_attr_set(XFS_I(ip), name, value,
-			length, ATTR_SECURE);
+	error = xfs_attr_set(ip, name, value, length, ATTR_SECURE);
 	if (!error)
-		xfs_iflags_set(XFS_I(ip), XFS_IMODIFIED);
+		xfs_iflags_set(ip, XFS_IMODIFIED);
 
 	kfree(name);
 	kfree(value);
 	return error;
 }
 
-/*
- * Determine whether a process has a valid fs_struct (kernel daemons
- * like knfsd don't have an fs_struct).
- *
- * XXX(hch):  nfsd is broken, better fix it instead.
- */
-STATIC_INLINE int
-xfs_has_fs_struct(struct task_struct *task)
-{
-	return (task->fs != init_task.fs);
-}
-
 STATIC void
 xfs_cleanup_inode(
 	struct inode	*dir,
-	bhv_vnode_t	*vp,
+	struct inode	*inode,
 	struct dentry	*dentry,
 	int		mode)
 {
@@ -267,14 +253,14 @@ xfs_cleanup_inode(
 	 * xfs_init_security we must back out.
 	 * ENOSPC can hit here, among other things.
 	 */
-	teardown.d_inode = vn_to_inode(vp);
+	teardown.d_inode = inode;
 	teardown.d_name = dentry->d_name;
 
 	if (S_ISDIR(mode))
 		xfs_rmdir(XFS_I(dir), &teardown);
 	else
 		xfs_remove(XFS_I(dir), &teardown);
-	VN_RELE(vp);
+	iput(inode);
 }
 
 STATIC int
@@ -284,8 +270,8 @@ xfs_vn_mknod(
 	int		mode,
 	dev_t		rdev)
 {
-	struct inode	*ip;
-	bhv_vnode_t	*vp = NULL, *dvp = vn_from_inode(dir);
+	struct inode	*inode;
+	struct xfs_inode *ip = NULL;
 	xfs_acl_t	*default_acl = NULL;
 	attrexists_t	test_default_acl = _ACL_DEFAULT_EXISTS;
 	int		error;
@@ -297,59 +283,65 @@ xfs_vn_mknod(
 	if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
 		return -EINVAL;
 
-	if (unlikely(test_default_acl && test_default_acl(dvp))) {
+	if (test_default_acl && test_default_acl(dir)) {
 		if (!_ACL_ALLOC(default_acl)) {
 			return -ENOMEM;
 		}
-		if (!_ACL_GET_DEFAULT(dvp, default_acl)) {
+		if (!_ACL_GET_DEFAULT(dir, default_acl)) {
 			_ACL_FREE(default_acl);
 			default_acl = NULL;
 		}
 	}
 
-	if (IS_POSIXACL(dir) && !default_acl && xfs_has_fs_struct(current))
+	if (IS_POSIXACL(dir) && !default_acl)
 		mode &= ~current->fs->umask;
 
 	switch (mode & S_IFMT) {
-	case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
+	case S_IFCHR:
+	case S_IFBLK:
+	case S_IFIFO:
+	case S_IFSOCK:
 		rdev = sysv_encode_dev(rdev);
 	case S_IFREG:
-		error = xfs_create(XFS_I(dir), dentry, mode, rdev, &vp, NULL);
+		error = xfs_create(XFS_I(dir), dentry, mode, rdev, &ip, NULL);
 		break;
 	case S_IFDIR:
-		error = xfs_mkdir(XFS_I(dir), dentry, mode, &vp, NULL);
+		error = xfs_mkdir(XFS_I(dir), dentry, mode, &ip, NULL);
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 
-	if (unlikely(!error)) {
-		error = xfs_init_security(vp, dir);
-		if (error)
-			xfs_cleanup_inode(dir, vp, dentry, mode);
-	}
+	if (unlikely(error))
+		goto out_free_acl;
 
-	if (unlikely(default_acl)) {
-		if (!error) {
-			error = _ACL_INHERIT(vp, mode, default_acl);
-			if (!error)
-				xfs_iflags_set(XFS_I(vp), XFS_IMODIFIED);
-			else
-				xfs_cleanup_inode(dir, vp, dentry, mode);
-		}
+	inode = ip->i_vnode;
+
+	error = xfs_init_security(inode, dir);
+	if (unlikely(error))
+		goto out_cleanup_inode;
+
+	if (default_acl) {
+		error = _ACL_INHERIT(inode, mode, default_acl);
+		if (unlikely(error))
+			goto out_cleanup_inode;
+		xfs_iflags_set(ip, XFS_IMODIFIED);
 		_ACL_FREE(default_acl);
 	}
 
-	if (likely(!error)) {
-		ASSERT(vp);
-		ip = vn_to_inode(vp);
 
-		if (S_ISDIR(mode))
-			xfs_validate_fields(ip);
-		d_instantiate(dentry, ip);
-		xfs_validate_fields(dir);
-	}
+	if (S_ISDIR(mode))
+		xfs_validate_fields(inode);
+	d_instantiate(dentry, inode);
+	xfs_validate_fields(dir);
+	return -error;
+
+ out_cleanup_inode:
+	xfs_cleanup_inode(dir, inode, dentry, mode);
+ out_free_acl:
+	if (default_acl)
+		_ACL_FREE(default_acl);
 	return -error;
 }
 
@@ -378,13 +370,13 @@ xfs_vn_lookup(
 	struct dentry	*dentry,
 	struct nameidata *nd)
 {
-	bhv_vnode_t	*cvp;
+	struct xfs_inode *cip;
 	int		error;
 
 	if (dentry->d_name.len >= MAXNAMELEN)
 		return ERR_PTR(-ENAMETOOLONG);
 
-	error = xfs_lookup(XFS_I(dir), dentry, &cvp);
+	error = xfs_lookup(XFS_I(dir), dentry, &cip);
 	if (unlikely(error)) {
 		if (unlikely(error != ENOENT))
 			return ERR_PTR(-error);
@@ -392,7 +384,7 @@ xfs_vn_lookup(
 		return NULL;
 	}
 
-	return d_splice_alias(vn_to_inode(cvp), dentry);
+	return d_splice_alias(cip->i_vnode, dentry);
 }
 
 STATIC int
@@ -401,23 +393,22 @@ xfs_vn_link(
 	struct inode	*dir,
 	struct dentry	*dentry)
 {
-	struct inode	*ip;	/* inode of guy being linked to */
-	bhv_vnode_t	*vp;	/* vp of name being linked */
+	struct inode	*inode;	/* inode of guy being linked to */
 	int		error;
 
-	ip = old_dentry->d_inode;	/* inode being linked to */
-	vp = vn_from_inode(ip);
+	inode = old_dentry->d_inode;
 
-	VN_HOLD(vp);
-	error = xfs_link(XFS_I(dir), vp, dentry);
+	igrab(inode);
+	error = xfs_link(XFS_I(dir), XFS_I(inode), dentry);
 	if (unlikely(error)) {
-		VN_RELE(vp);
-	} else {
-		xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED);
-		xfs_validate_fields(ip);
-		d_instantiate(dentry, ip);
+		iput(inode);
+		return -error;
 	}
-	return -error;
+
+	xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED);
+	xfs_validate_fields(inode);
+	d_instantiate(dentry, inode);
+	return 0;
 }
 
 STATIC int
@@ -444,29 +435,33 @@ xfs_vn_symlink(
 	struct dentry	*dentry,
 	const char	*symname)
 {
-	struct inode	*ip;
-	bhv_vnode_t	*cvp;	/* used to lookup symlink to put in dentry */
+	struct inode	*inode;
+	struct xfs_inode *cip = NULL;
 	int		error;
 	mode_t		mode;
 
-	cvp = NULL;
-
 	mode = S_IFLNK |
 		(irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO);
 
 	error = xfs_symlink(XFS_I(dir), dentry, (char *)symname, mode,
-			    &cvp, NULL);
-	if (likely(!error && cvp)) {
-		error = xfs_init_security(cvp, dir);
-		if (likely(!error)) {
-			ip = vn_to_inode(cvp);
-			d_instantiate(dentry, ip);
-			xfs_validate_fields(dir);
-			xfs_validate_fields(ip);
-		} else {
-			xfs_cleanup_inode(dir, cvp, dentry, 0);
-		}
-	}
+			    &cip, NULL);
+	if (unlikely(error))
+		goto out;
+
+	inode = cip->i_vnode;
+
+	error = xfs_init_security(inode, dir);
+	if (unlikely(error))
+		goto out_cleanup_inode;
+
+	d_instantiate(dentry, inode);
+	xfs_validate_fields(dir);
+	xfs_validate_fields(inode);
+	return 0;
+
+ out_cleanup_inode:
+	xfs_cleanup_inode(dir, inode, dentry, 0);
+ out:
 	return -error;
 }
 
@@ -494,12 +489,9 @@ xfs_vn_rename(
 	struct dentry	*ndentry)
 {
 	struct inode	*new_inode = ndentry->d_inode;
-	bhv_vnode_t	*tvp;	/* target directory */
 	int		error;
 
-	tvp = vn_from_inode(ndir);
-
-	error = xfs_rename(XFS_I(odir), odentry, tvp, ndentry);
+	error = xfs_rename(XFS_I(odir), odentry, XFS_I(ndir), ndentry);
 	if (likely(!error)) {
 		if (new_inode)
 			xfs_validate_fields(new_inode);
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 3ca39c4..e514332 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -99,7 +99,6 @@
 /*
  * Feature macros (disable/enable)
  */
-#undef  HAVE_REFCACHE	/* reference cache not needed for NFS in 2.6 */
 #define HAVE_SPLICE	/* a splice(2) exists in 2.6, but not in 2.4 */
 #ifdef CONFIG_SMP
 #define HAVE_PERCPU_SB	/* per cpu superblock counters are a 2.6 feature */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 1663533..1d95dca 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -176,7 +176,6 @@ xfs_read(
 {
 	struct file		*file = iocb->ki_filp;
 	struct inode		*inode = file->f_mapping->host;
-	bhv_vnode_t		*vp = XFS_ITOV(ip);
 	xfs_mount_t		*mp = ip->i_mount;
 	size_t			size = 0;
 	ssize_t			ret = 0;
@@ -228,11 +227,11 @@ xfs_read(
 	xfs_ilock(ip, XFS_IOLOCK_SHARED);
 
 	if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
-		bhv_vrwlock_t locktype = VRWLOCK_READ;
 		int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
+		int iolock = XFS_IOLOCK_SHARED;
 
-		ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp, *offset, size,
-					dmflags, &locktype);
+		ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *offset, size,
+					dmflags, &iolock);
 		if (ret) {
 			xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 			if (unlikely(ioflags & IO_ISDIRECT))
@@ -242,7 +241,7 @@ xfs_read(
 	}
 
 	if (unlikely(ioflags & IO_ISDIRECT)) {
-		if (VN_CACHED(vp))
+		if (inode->i_mapping->nrpages)
 			ret = xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK),
 						    -1, FI_REMAPF_LOCKED);
 		mutex_unlock(&inode->i_mutex);
@@ -276,7 +275,6 @@ xfs_splice_read(
 	int			flags,
 	int			ioflags)
 {
-	bhv_vnode_t		*vp = XFS_ITOV(ip);
 	xfs_mount_t		*mp = ip->i_mount;
 	ssize_t			ret;
 
@@ -287,11 +285,11 @@ xfs_splice_read(
 	xfs_ilock(ip, XFS_IOLOCK_SHARED);
 
 	if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
-		bhv_vrwlock_t locktype = VRWLOCK_READ;
+		int iolock = XFS_IOLOCK_SHARED;
 		int error;
 
-		error = XFS_SEND_DATA(mp, DM_EVENT_READ, vp, *ppos, count,
-					FILP_DELAY_FLAG(infilp), &locktype);
+		error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count,
+					FILP_DELAY_FLAG(infilp), &iolock);
 		if (error) {
 			xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 			return -error;
@@ -317,7 +315,6 @@ xfs_splice_write(
 	int			flags,
 	int			ioflags)
 {
-	bhv_vnode_t		*vp = XFS_ITOV(ip);
 	xfs_mount_t		*mp = ip->i_mount;
 	ssize_t			ret;
 	struct inode		*inode = outfilp->f_mapping->host;
@@ -330,11 +327,11 @@ xfs_splice_write(
 	xfs_ilock(ip, XFS_IOLOCK_EXCL);
 
 	if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) {
-		bhv_vrwlock_t locktype = VRWLOCK_WRITE;
+		int iolock = XFS_IOLOCK_EXCL;
 		int error;
 
-		error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp, *ppos, count,
-					FILP_DELAY_FLAG(outfilp), &locktype);
+		error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count,
+					FILP_DELAY_FLAG(outfilp), &iolock);
 		if (error) {
 			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
 			return -error;
@@ -573,14 +570,12 @@ xfs_write(
 	struct file		*file = iocb->ki_filp;
 	struct address_space	*mapping = file->f_mapping;
 	struct inode		*inode = mapping->host;
-	bhv_vnode_t		*vp = XFS_ITOV(xip);
 	unsigned long		segs = nsegs;
 	xfs_mount_t		*mp;
 	ssize_t			ret = 0, error = 0;
 	xfs_fsize_t		isize, new_size;
 	int			iolock;
 	int			eventsent = 0;
-	bhv_vrwlock_t		locktype;
 	size_t			ocount = 0, count;
 	loff_t			pos;
 	int			need_i_mutex;
@@ -607,11 +602,9 @@ xfs_write(
 relock:
 	if (ioflags & IO_ISDIRECT) {
 		iolock = XFS_IOLOCK_SHARED;
-		locktype = VRWLOCK_WRITE_DIRECT;
 		need_i_mutex = 0;
 	} else {
 		iolock = XFS_IOLOCK_EXCL;
-		locktype = VRWLOCK_WRITE;
 		need_i_mutex = 1;
 		mutex_lock(&inode->i_mutex);
 	}
@@ -634,9 +627,8 @@ start:
 			dmflags |= DM_FLAGS_IMUX;
 
 		xfs_iunlock(xip, XFS_ILOCK_EXCL);
-		error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp,
-				      pos, count,
-				      dmflags, &locktype);
+		error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, xip,
+				      pos, count, dmflags, &iolock);
 		if (error) {
 			goto out_unlock_internal;
 		}
@@ -664,10 +656,9 @@ start:
 			return XFS_ERROR(-EINVAL);
 		}
 
-		if (!need_i_mutex && (VN_CACHED(vp) || pos > xip->i_size)) {
+		if (!need_i_mutex && (mapping->nrpages || pos > xip->i_size)) {
 			xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
 			iolock = XFS_IOLOCK_EXCL;
-			locktype = VRWLOCK_WRITE;
 			need_i_mutex = 1;
 			mutex_lock(&inode->i_mutex);
 			xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
@@ -727,7 +718,7 @@ retry:
 	current->backing_dev_info = mapping->backing_dev_info;
 
 	if ((ioflags & IO_ISDIRECT)) {
-		if (VN_CACHED(vp)) {
+		if (mapping->nrpages) {
 			WARN_ON(need_i_mutex == 0);
 			xfs_inval_cached_trace(xip, pos, -1,
 					(pos & PAGE_CACHE_MASK), -1);
@@ -744,7 +735,6 @@ retry:
 			mutex_unlock(&inode->i_mutex);
 
 			iolock = XFS_IOLOCK_SHARED;
-			locktype = VRWLOCK_WRITE_DIRECT;
 			need_i_mutex = 0;
 		}
 
@@ -781,15 +771,15 @@ retry:
 
 	if (ret == -ENOSPC &&
 	    DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) {
-		xfs_rwunlock(xip, locktype);
+		xfs_iunlock(xip, iolock);
 		if (need_i_mutex)
 			mutex_unlock(&inode->i_mutex);
-		error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp,
-				DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL,
+		error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, xip,
+				DM_RIGHT_NULL, xip, DM_RIGHT_NULL, NULL, NULL,
 				0, 0, 0); /* Delay flag intentionally  unused */
 		if (need_i_mutex)
 			mutex_lock(&inode->i_mutex);
-		xfs_rwlock(xip, locktype);
+		xfs_ilock(xip, iolock);
 		if (error)
 			goto out_unlock_internal;
 		pos = xip->i_size;
@@ -817,7 +807,8 @@ retry:
 	/* Handle various SYNC-type writes */
 	if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
 		int error2;
-		xfs_rwunlock(xip, locktype);
+
+		xfs_iunlock(xip, iolock);
 		if (need_i_mutex)
 			mutex_unlock(&inode->i_mutex);
 		error2 = sync_page_range(inode, mapping, pos, ret);
@@ -825,7 +816,7 @@ retry:
 			error = error2;
 		if (need_i_mutex)
 			mutex_lock(&inode->i_mutex);
-		xfs_rwlock(xip, locktype);
+		xfs_ilock(xip, iolock);
 		error2 = xfs_write_sync_logforce(mp, xip);
 		if (!error)
 			error = error2;
@@ -846,7 +837,7 @@ retry:
 			xip->i_d.di_size = xip->i_size;
 		xfs_iunlock(xip, XFS_ILOCK_EXCL);
 	}
-	xfs_rwunlock(xip, locktype);
+	xfs_iunlock(xip, iolock);
  out_unlock_mutex:
 	if (need_i_mutex)
 		mutex_unlock(&inode->i_mutex);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 8831d95..cb9ce90 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -896,7 +896,8 @@ xfs_fs_write_inode(
 	struct inode		*inode,
 	int			sync)
 {
-	int			error = 0, flags = FLUSH_INODE;
+	int			error = 0;
+	int			flags = 0;
 
 	xfs_itrace_entry(XFS_I(inode));
 	if (sync) {
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index b5ea418..4ed5914 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -46,18 +46,6 @@ static inline struct inode *vn_to_inode(bhv_vnode_t *vnode)
 }
 
 /*
- * Values for the vop_rwlock/rwunlock flags parameter.
- */
-typedef enum bhv_vrwlock {
-	VRWLOCK_NONE,
-	VRWLOCK_READ,
-	VRWLOCK_WRITE,
-	VRWLOCK_WRITE_DIRECT,
-	VRWLOCK_TRY_READ,
-	VRWLOCK_TRY_WRITE
-} bhv_vrwlock_t;
-
-/*
  * Return values for xfs_inactive.  A return value of
  * VN_INACTIVE_NOCACHE implies that the file system behavior
  * has disassociated its state and bhv_desc_t from the vnode.
@@ -73,12 +61,9 @@ typedef enum bhv_vrwlock {
 #define IO_INVIS	0x00020		/* don't update inode timestamps */
 
 /*
- * Flags for vop_iflush call
+ * Flags for xfs_inode_flush
  */
 #define FLUSH_SYNC		1	/* wait for flush to complete	*/
-#define FLUSH_INODE		2	/* flush the inode itself	*/
-#define FLUSH_LOG		4	/* force the last log entry for
-					 * this inode out to disk	*/
 
 /*
  * Flush/Invalidate options for vop_toss/flush/flushinval_pages.
@@ -230,7 +215,7 @@ static inline bhv_vnode_t *vn_grab(bhv_vnode_t *vp)
  */
 #define VNAME(dentry)		((char *) (dentry)->d_name.name)
 #define VNAMELEN(dentry)	((dentry)->d_name.len)
-#define VNAME_TO_VNODE(dentry)	(vn_from_inode((dentry)->d_inode))
+#define VNAME_TO_INODE(dentry)	(XFS_I((dentry)->d_inode))
 
 /*
  * Dealing with bad inodes
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 1f3da5b..8e9c5ae 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -1405,13 +1405,13 @@ xfs_qm_qino_alloc(
 #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
 		unsigned oldv = mp->m_sb.sb_versionnum;
 #endif
-		ASSERT(!XFS_SB_VERSION_HASQUOTA(&mp->m_sb));
+		ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
 		ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
 				   XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
 		       (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
 			XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
 
-		XFS_SB_VERSION_ADDQUOTA(&mp->m_sb);
+		xfs_sb_version_addquota(&mp->m_sb);
 		mp->m_sb.sb_uquotino = NULLFSINO;
 		mp->m_sb.sb_gquotino = NULLFSINO;
 
@@ -1954,7 +1954,7 @@ xfs_qm_init_quotainos(
 	/*
 	 * Get the uquota and gquota inodes
 	 */
-	if (XFS_SB_VERSION_HASQUOTA(&mp->m_sb)) {
+	if (xfs_sb_version_hasquota(&mp->m_sb)) {
 		if (XFS_IS_UQUOTA_ON(mp) &&
 		    mp->m_sb.sb_uquotino != NULLFSINO) {
 			ASSERT(mp->m_sb.sb_uquotino > 0);
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index 97bb329..f4f6c4c 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -118,7 +118,7 @@ xfs_qm_newmount(
 	*quotaflags = 0;
 	*needquotamount = B_FALSE;
 
-	quotaondisk = XFS_SB_VERSION_HASQUOTA(&mp->m_sb) &&
+	quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) &&
 				(mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT);
 
 	if (quotaondisk) {
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 2cc5886..d2b8be7 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -377,7 +377,7 @@ xfs_qm_scall_trunc_qfiles(
 	if (!capable(CAP_SYS_ADMIN))
 		return XFS_ERROR(EPERM);
 	error = 0;
-	if (!XFS_SB_VERSION_HASQUOTA(&mp->m_sb) || flags == 0) {
+	if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) {
 		qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags);
 		return XFS_ERROR(EINVAL);
 	}
@@ -522,7 +522,7 @@ xfs_qm_scall_getqstat(
 	memset(out, 0, sizeof(fs_quota_stat_t));
 
 	out->qs_version = FS_QSTAT_VERSION;
-	if (! XFS_SB_VERSION_HASQUOTA(&mp->m_sb)) {
+	if (!xfs_sb_version_hasquota(&mp->m_sb)) {
 		out->qs_uquota.qfs_ino = NULLFSINO;
 		out->qs_gquota.qfs_ino = NULLFSINO;
 		return (0);
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
index 129067c..0b75d30 100644
--- a/fs/xfs/support/ktrace.c
+++ b/fs/xfs/support/ktrace.c
@@ -24,7 +24,7 @@ static int          ktrace_zentries;
 void __init
 ktrace_init(int zentries)
 {
-	ktrace_zentries = zentries;
+	ktrace_zentries = roundup_pow_of_two(zentries);
 
 	ktrace_hdr_zone = kmem_zone_init(sizeof(ktrace_t),
 					"ktrace_hdr");
@@ -47,13 +47,16 @@ ktrace_uninit(void)
  * ktrace_alloc()
  *
  * Allocate a ktrace header and enough buffering for the given
- * number of entries.
+ * number of entries. Round the number of entries up to a
+ * power of 2 so we can do fast masking to get the index from
+ * the atomic index counter.
  */
 ktrace_t *
 ktrace_alloc(int nentries, unsigned int __nocast sleep)
 {
 	ktrace_t        *ktp;
 	ktrace_entry_t  *ktep;
+	int		entries;
 
 	ktp = (ktrace_t*)kmem_zone_alloc(ktrace_hdr_zone, sleep);
 
@@ -70,11 +73,12 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep)
 	/*
 	 * Special treatment for buffers with the ktrace_zentries entries
 	 */
-	if (nentries == ktrace_zentries) {
+	entries = roundup_pow_of_two(nentries);
+	if (entries == ktrace_zentries) {
 		ktep = (ktrace_entry_t*)kmem_zone_zalloc(ktrace_ent_zone,
 							    sleep);
 	} else {
-		ktep = (ktrace_entry_t*)kmem_zalloc((nentries * sizeof(*ktep)),
+		ktep = (ktrace_entry_t*)kmem_zalloc((entries * sizeof(*ktep)),
 							    sleep | KM_LARGE);
 	}
 
@@ -91,8 +95,10 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep)
 	}
 
 	ktp->kt_entries  = ktep;
-	ktp->kt_nentries = nentries;
-	ktp->kt_index    = 0;
+	ktp->kt_nentries = entries;
+	ASSERT(is_power_of_2(entries));
+	ktp->kt_index_mask = entries - 1;
+	atomic_set(&ktp->kt_index, 0);
 	ktp->kt_rollover = 0;
 	return ktp;
 }
@@ -151,8 +157,6 @@ ktrace_enter(
 	void            *val14,
 	void            *val15)
 {
-	static DEFINE_SPINLOCK(wrap_lock);
-	unsigned long	flags;
 	int             index;
 	ktrace_entry_t  *ktep;
 
@@ -161,12 +165,8 @@ ktrace_enter(
 	/*
 	 * Grab an entry by pushing the index up to the next one.
 	 */
-	spin_lock_irqsave(&wrap_lock, flags);
-	index = ktp->kt_index;
-	if (++ktp->kt_index == ktp->kt_nentries)
-		ktp->kt_index = 0;
-	spin_unlock_irqrestore(&wrap_lock, flags);
-
+	index = atomic_add_return(1, &ktp->kt_index);
+	index = (index - 1) & ktp->kt_index_mask;
 	if (!ktp->kt_rollover && index == ktp->kt_nentries - 1)
 		ktp->kt_rollover = 1;
 
@@ -199,11 +199,12 @@ int
 ktrace_nentries(
 	ktrace_t        *ktp)
 {
-	if (ktp == NULL) {
+	int	index;
+	if (ktp == NULL)
 		return 0;
-	}
 
-	return (ktp->kt_rollover ? ktp->kt_nentries : ktp->kt_index);
+	index = atomic_read(&ktp->kt_index) & ktp->kt_index_mask;
+	return (ktp->kt_rollover ? ktp->kt_nentries : index);
 }
 
 /*
@@ -228,7 +229,7 @@ ktrace_first(ktrace_t   *ktp, ktrace_snap_t     *ktsp)
 	int             nentries;
 
 	if (ktp->kt_rollover)
-		index = ktp->kt_index;
+		index = atomic_read(&ktp->kt_index) & ktp->kt_index_mask;
 	else
 		index = 0;
 
diff --git a/fs/xfs/support/ktrace.h b/fs/xfs/support/ktrace.h
index 56e72b4..741d694 100644
--- a/fs/xfs/support/ktrace.h
+++ b/fs/xfs/support/ktrace.h
@@ -30,7 +30,8 @@ typedef struct ktrace_entry {
  */
 typedef struct ktrace {
 	int		kt_nentries;	/* number of entries in trace buf */
-	int		kt_index;	/* current index in entries */
+	atomic_t	kt_index;	/* current index in entries */
+	unsigned int	kt_index_mask;
 	int		kt_rollover;
 	ktrace_entry_t	*kt_entries;	/* buffer of entries */
 } ktrace_t;
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index b08e2a2..96ba6aa 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -227,10 +227,10 @@ STATIC void
 xfs_sbversion_add_attr2(xfs_mount_t *mp, xfs_trans_t *tp)
 {
 	if ((mp->m_flags & XFS_MOUNT_ATTR2) &&
-	    !(XFS_SB_VERSION_HASATTR2(&mp->m_sb))) {
+	    !(xfs_sb_version_hasattr2(&mp->m_sb))) {
 		spin_lock(&mp->m_sb_lock);
-		if (!XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {
-			XFS_SB_VERSION_ADDATTR2(&mp->m_sb);
+		if (!xfs_sb_version_hasattr2(&mp->m_sb)) {
+			xfs_sb_version_addattr2(&mp->m_sb);
 			spin_unlock(&mp->m_sb_lock);
 			xfs_mod_sb(tp, XFS_SB_VERSIONNUM | XFS_SB_FEATURES2);
 		} else
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 1c0a5a5..bce8e3b 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -4047,17 +4047,17 @@ xfs_bmap_add_attrfork(
 		xfs_trans_log_inode(tp, ip, logflags);
 	if (error)
 		goto error2;
-	if (!XFS_SB_VERSION_HASATTR(&mp->m_sb) ||
-	   (!XFS_SB_VERSION_HASATTR2(&mp->m_sb) && version == 2)) {
+	if (!xfs_sb_version_hasattr(&mp->m_sb) ||
+	   (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
 		__int64_t sbfields = 0;
 
 		spin_lock(&mp->m_sb_lock);
-		if (!XFS_SB_VERSION_HASATTR(&mp->m_sb)) {
-			XFS_SB_VERSION_ADDATTR(&mp->m_sb);
+		if (!xfs_sb_version_hasattr(&mp->m_sb)) {
+			xfs_sb_version_addattr(&mp->m_sb);
 			sbfields |= XFS_SB_VERSIONNUM;
 		}
-		if (!XFS_SB_VERSION_HASATTR2(&mp->m_sb) && version == 2) {
-			XFS_SB_VERSION_ADDATTR2(&mp->m_sb);
+		if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
+			xfs_sb_version_addattr2(&mp->m_sb);
 			sbfields |= (XFS_SB_VERSIONNUM | XFS_SB_FEATURES2);
 		}
 		if (sbfields) {
@@ -5043,7 +5043,7 @@ xfs_bmapi(
 			 * A wasdelay extent has been initialized, so
 			 * shouldn't be flagged as unwritten.
 			 */
-			if (wr && XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) {
+			if (wr && xfs_sb_version_hasextflgbit(&mp->m_sb)) {
 				if (!wasdelay && (flags & XFS_BMAPI_PREALLOC))
 					got.br_state = XFS_EXT_UNWRITTEN;
 			}
@@ -5483,7 +5483,7 @@ xfs_bunmapi(
 			 * get rid of part of a realtime extent.
 			 */
 			if (del.br_state == XFS_EXT_UNWRITTEN ||
-			    !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) {
+			    !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
 				/*
 				 * This piece is unwritten, or we're not
 				 * using unwritten extents.  Skip over it.
@@ -5535,7 +5535,7 @@ xfs_bunmapi(
 			} else if ((del.br_startoff == start &&
 				    (del.br_state == XFS_EXT_UNWRITTEN ||
 				     xfs_trans_get_block_res(tp) == 0)) ||
-				   !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) {
+				   !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
 				/*
 				 * Can't make it unwritten.  There isn't
 				 * a full extent here so just skip it.
@@ -5772,7 +5772,6 @@ xfs_getbmap(
 	int			error;		/* return value */
 	__int64_t		fixlen;		/* length for -1 case */
 	int			i;		/* extent number */
-	bhv_vnode_t		*vp;		/* corresponding vnode */
 	int			lock;		/* lock state */
 	xfs_bmbt_irec_t		*map;		/* buffer for user's data */
 	xfs_mount_t		*mp;		/* file system mount point */
@@ -5789,7 +5788,6 @@ xfs_getbmap(
 	int			bmapi_flags;	/* flags for xfs_bmapi */
 	__int32_t		oflags;		/* getbmapx bmv_oflags field */
 
-	vp = XFS_ITOV(ip);
 	mp = ip->i_mount;
 
 	whichfork = interface & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK;
@@ -5811,7 +5809,7 @@ xfs_getbmap(
 	if ((interface & BMV_IF_NO_DMAPI_READ) == 0 &&
 	    DM_EVENT_ENABLED(ip, DM_EVENT_READ) &&
 	    whichfork == XFS_DATA_FORK) {
-		error = XFS_SEND_DATA(mp, DM_EVENT_READ, vp, 0, 0, 0, NULL);
+		error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL);
 		if (error)
 			return XFS_ERROR(error);
 	}
@@ -5869,6 +5867,10 @@ xfs_getbmap(
 		/* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */
 		error = xfs_flush_pages(ip, (xfs_off_t)0,
 					       -1, 0, FI_REMAPF);
+		if (error) {
+			xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+		return error;
+		}
 	}
 
 	ASSERT(whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0);
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 2d950e9..cd0d4b4 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -120,7 +120,7 @@ typedef enum {
  * Extent state and extent format macros.
  */
 #define XFS_EXTFMT_INODE(x)	\
-	(XFS_SB_VERSION_HASEXTFLGBIT(&((x)->i_mount->m_sb)) ? \
+	(xfs_sb_version_hasextflgbit(&((x)->i_mount->m_sb)) ? \
 		XFS_EXTFMT_HASSTATE : XFS_EXTFMT_NOSTATE)
 #define ISUNWRITTEN(x)	((x)->br_state == XFS_EXT_UNWRITTEN)
 
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index be7c425..e92e73f 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -49,7 +49,7 @@ void
 xfs_dir_mount(
 	xfs_mount_t	*mp)
 {
-	ASSERT(XFS_SB_VERSION_HASDIRV2(&mp->m_sb));
+	ASSERT(xfs_sb_version_hasdirv2(&mp->m_sb));
 	ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <=
 	       XFS_MAX_BLOCKSIZE);
 	mp->m_dirblksize = 1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index eadc159..d3a0f53 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -77,36 +77,36 @@ xfs_fs_geometry(
 	if (new_version >= 3) {
 		geo->version = XFS_FSOP_GEOM_VERSION;
 		geo->flags =
-			(XFS_SB_VERSION_HASATTR(&mp->m_sb) ?
+			(xfs_sb_version_hasattr(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_ATTR : 0) |
-			(XFS_SB_VERSION_HASNLINK(&mp->m_sb) ?
+			(xfs_sb_version_hasnlink(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_NLINK : 0) |
-			(XFS_SB_VERSION_HASQUOTA(&mp->m_sb) ?
+			(xfs_sb_version_hasquota(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_QUOTA : 0) |
-			(XFS_SB_VERSION_HASALIGN(&mp->m_sb) ?
+			(xfs_sb_version_hasalign(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_IALIGN : 0) |
-			(XFS_SB_VERSION_HASDALIGN(&mp->m_sb) ?
+			(xfs_sb_version_hasdalign(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_DALIGN : 0) |
-			(XFS_SB_VERSION_HASSHARED(&mp->m_sb) ?
+			(xfs_sb_version_hasshared(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_SHARED : 0) |
-			(XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb) ?
+			(xfs_sb_version_hasextflgbit(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_EXTFLG : 0) |
-			(XFS_SB_VERSION_HASDIRV2(&mp->m_sb) ?
+			(xfs_sb_version_hasdirv2(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) |
-			(XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ?
+			(xfs_sb_version_hassector(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_SECTOR : 0) |
 			(xfs_sb_version_haslazysbcount(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) |
-			(XFS_SB_VERSION_HASATTR2(&mp->m_sb) ?
+			(xfs_sb_version_hasattr2(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_ATTR2 : 0);
-		geo->logsectsize = XFS_SB_VERSION_HASSECTOR(&mp->m_sb) ?
+		geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
 				mp->m_sb.sb_logsectsize : BBSIZE;
 		geo->rtsectsize = mp->m_sb.sb_blocksize;
 		geo->dirblocksize = mp->m_dirblksize;
 	}
 	if (new_version >= 4) {
 		geo->flags |=
-			(XFS_SB_VERSION_HASLOGV2(&mp->m_sb) ?
+			(xfs_sb_version_haslogv2(&mp->m_sb) ?
 				XFS_FSOP_GEOM_FLAGS_LOGV2 : 0);
 		geo->logsunit = mp->m_sb.sb_logsunit;
 	}
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index db9d5fa..5a146cb 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -191,7 +191,7 @@ xfs_ialloc_ag_alloc(
 			ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
 			args.alignment = args.mp->m_dalign;
 			isaligned = 1;
-		} else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
+		} else if (xfs_sb_version_hasalign(&args.mp->m_sb) &&
 			   args.mp->m_sb.sb_inoalignmt >=
 			   XFS_B_TO_FSBT(args.mp,
 			  	XFS_INODE_CLUSTER_SIZE(args.mp)))
@@ -230,7 +230,7 @@ xfs_ialloc_ag_alloc(
 		args.agbno = be32_to_cpu(agi->agi_root);
 		args.fsbno = XFS_AGB_TO_FSB(args.mp,
 				be32_to_cpu(agi->agi_seqno), args.agbno);
-		if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
+		if (xfs_sb_version_hasalign(&args.mp->m_sb) &&
 			args.mp->m_sb.sb_inoalignmt >=
 			XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp)))
 				args.alignment = args.mp->m_sb.sb_inoalignmt;
@@ -271,7 +271,7 @@ xfs_ialloc_ag_alloc(
 	 * use the old version so that old kernels will continue to be
 	 * able to use the file system.
 	 */
-	if (XFS_SB_VERSION_HASNLINK(&args.mp->m_sb))
+	if (xfs_sb_version_hasnlink(&args.mp->m_sb))
 		version = XFS_DINODE_VERSION_2;
 	else
 		version = XFS_DINODE_VERSION_1;
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 8e09b71..e657c51 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -78,7 +78,6 @@ xfs_iget_core(
 	xfs_inode_t	*ip;
 	xfs_inode_t	*iq;
 	int		error;
-	xfs_icluster_t	*icl, *new_icl = NULL;
 	unsigned long	first_index, mask;
 	xfs_perag_t	*pag;
 	xfs_agino_t	agino;
@@ -229,11 +228,9 @@ finish_inode:
 	}
 
 	/*
-	 * This is a bit messy - we preallocate everything we _might_
-	 * need before we pick up the ici lock. That way we don't have to
-	 * juggle locks and go all the way back to the start.
+	 * Preload the radix tree so we can insert safely under the
+	 * write spinlock.
 	 */
-	new_icl = kmem_zone_alloc(xfs_icluster_zone, KM_SLEEP);
 	if (radix_tree_preload(GFP_KERNEL)) {
 		xfs_idestroy(ip);
 		delay(1);
@@ -242,17 +239,6 @@ finish_inode:
 	mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
 	first_index = agino & mask;
 	write_lock(&pag->pag_ici_lock);
-
-	/*
-	 * Find the cluster if it exists
-	 */
-	icl = NULL;
-	if (radix_tree_gang_lookup(&pag->pag_ici_root, (void**)&iq,
-							first_index, 1)) {
-		if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) == first_index)
-			icl = iq->i_cluster;
-	}
-
 	/*
 	 * insert the new inode
 	 */
@@ -267,30 +253,13 @@ finish_inode:
 	}
 
 	/*
-	 * These values _must_ be set before releasing ihlock!
+	 * These values _must_ be set before releasing the radix tree lock!
 	 */
 	ip->i_udquot = ip->i_gdquot = NULL;
 	xfs_iflags_set(ip, XFS_INEW);
 
-	ASSERT(ip->i_cluster == NULL);
-
-	if (!icl) {
-		spin_lock_init(&new_icl->icl_lock);
-		INIT_HLIST_HEAD(&new_icl->icl_inodes);
-		icl = new_icl;
-		new_icl = NULL;
-	} else {
-		ASSERT(!hlist_empty(&icl->icl_inodes));
-	}
-	spin_lock(&icl->icl_lock);
-	hlist_add_head(&ip->i_cnode, &icl->icl_inodes);
-	ip->i_cluster = icl;
-	spin_unlock(&icl->icl_lock);
-
 	write_unlock(&pag->pag_ici_lock);
 	radix_tree_preload_end();
-	if (new_icl)
-		kmem_zone_free(xfs_icluster_zone, new_icl);
 
 	/*
 	 * Link ip to its mount and thread it on the mount's inode list.
@@ -529,18 +498,6 @@ xfs_iextract(
 	xfs_put_perag(mp, pag);
 
 	/*
-	 * Remove from cluster list
-	 */
-	mp = ip->i_mount;
-	spin_lock(&ip->i_cluster->icl_lock);
-	hlist_del(&ip->i_cnode);
-	spin_unlock(&ip->i_cluster->icl_lock);
-
-	/* was last inode in cluster? */
-	if (hlist_empty(&ip->i_cluster->icl_inodes))
-		kmem_zone_free(xfs_icluster_zone, ip->i_cluster);
-
-	/*
 	 * Remove from mount's inode list.
 	 */
 	XFS_MOUNT_ILOCK(mp);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a550546..d7514f8 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -55,7 +55,6 @@
 
 kmem_zone_t *xfs_ifork_zone;
 kmem_zone_t *xfs_inode_zone;
-kmem_zone_t *xfs_icluster_zone;
 
 /*
  * Used in xfs_itruncate().  This is the maximum number of extents
@@ -126,6 +125,90 @@ xfs_inobp_check(
 #endif
 
 /*
+ * Find the buffer associated with the given inode map
+ * We do basic validation checks on the buffer once it has been
+ * retrieved from disk.
+ */
+STATIC int
+xfs_imap_to_bp(
+	xfs_mount_t	*mp,
+	xfs_trans_t	*tp,
+	xfs_imap_t	*imap,
+	xfs_buf_t	**bpp,
+	uint		buf_flags,
+	uint		imap_flags)
+{
+	int		error;
+	int		i;
+	int		ni;
+	xfs_buf_t	*bp;
+
+	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
+				   (int)imap->im_len, buf_flags, &bp);
+	if (error) {
+		if (error != EAGAIN) {
+			cmn_err(CE_WARN,
+				"xfs_imap_to_bp: xfs_trans_read_buf()returned "
+				"an error %d on %s.  Returning error.",
+				error, mp->m_fsname);
+		} else {
+			ASSERT(buf_flags & XFS_BUF_TRYLOCK);
+		}
+		return error;
+	}
+
+	/*
+	 * Validate the magic number and version of every inode in the buffer
+	 * (if DEBUG kernel) or the first inode in the buffer, otherwise.
+	 */
+#ifdef DEBUG
+	ni = BBTOB(imap->im_len) >> mp->m_sb.sb_inodelog;
+#else	/* usual case */
+	ni = 1;
+#endif
+
+	for (i = 0; i < ni; i++) {
+		int		di_ok;
+		xfs_dinode_t	*dip;
+
+		dip = (xfs_dinode_t *)xfs_buf_offset(bp,
+					(i << mp->m_sb.sb_inodelog));
+		di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC &&
+			    XFS_DINODE_GOOD_VERSION(dip->di_core.di_version);
+		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
+						XFS_ERRTAG_ITOBP_INOTOBP,
+						XFS_RANDOM_ITOBP_INOTOBP))) {
+			if (imap_flags & XFS_IMAP_BULKSTAT) {
+				xfs_trans_brelse(tp, bp);
+				return XFS_ERROR(EINVAL);
+			}
+			XFS_CORRUPTION_ERROR("xfs_imap_to_bp",
+						XFS_ERRLEVEL_HIGH, mp, dip);
+#ifdef DEBUG
+			cmn_err(CE_PANIC,
+					"Device %s - bad inode magic/vsn "
+					"daddr %lld #%d (magic=%x)",
+				XFS_BUFTARG_NAME(mp->m_ddev_targp),
+				(unsigned long long)imap->im_blkno, i,
+				be16_to_cpu(dip->di_core.di_magic));
+#endif
+			xfs_trans_brelse(tp, bp);
+			return XFS_ERROR(EFSCORRUPTED);
+		}
+	}
+
+	xfs_inobp_check(mp, bp);
+
+	/*
+	 * Mark the buffer as an inode buffer now that it looks good
+	 */
+	XFS_BUF_SET_VTYPE(bp, B_FS_INO);
+
+	*bpp = bp;
+	return 0;
+}
+
+/*
  * This routine is called to map an inode number within a file
  * system to the buffer containing the on-disk version of the
  * inode.  It returns a pointer to the buffer containing the
@@ -147,72 +230,19 @@ xfs_inotobp(
 	xfs_buf_t	**bpp,
 	int		*offset)
 {
-	int		di_ok;
 	xfs_imap_t	imap;
 	xfs_buf_t	*bp;
 	int		error;
-	xfs_dinode_t	*dip;
 
-	/*
-	 * Call the space management code to find the location of the
-	 * inode on disk.
-	 */
 	imap.im_blkno = 0;
 	error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP);
-	if (error != 0) {
-		cmn_err(CE_WARN,
-	"xfs_inotobp: xfs_imap()  returned an "
-	"error %d on %s.  Returning error.", error, mp->m_fsname);
+	if (error)
 		return error;
-	}
-
-	/*
-	 * If the inode number maps to a block outside the bounds of the
-	 * file system then return NULL rather than calling read_buf
-	 * and panicing when we get an error from the driver.
-	 */
-	if ((imap.im_blkno + imap.im_len) >
-	    XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
-		cmn_err(CE_WARN,
-	"xfs_inotobp: inode number (%llu + %d) maps to a block outside the bounds "
-	"of the file system %s.  Returning EINVAL.",
-			(unsigned long long)imap.im_blkno,
-			imap.im_len, mp->m_fsname);
-		return XFS_ERROR(EINVAL);
-	}
 
-	/*
-	 * Read in the buffer.  If tp is NULL, xfs_trans_read_buf() will
-	 * default to just a read_buf() call.
-	 */
-	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno,
-				   (int)imap.im_len, XFS_BUF_LOCK, &bp);
-
-	if (error) {
-		cmn_err(CE_WARN,
-	"xfs_inotobp: xfs_trans_read_buf()  returned an "
-	"error %d on %s.  Returning error.", error, mp->m_fsname);
+	error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, 0);
+	if (error)
 		return error;
-	}
-	dip = (xfs_dinode_t *)xfs_buf_offset(bp, 0);
-	di_ok =
-		be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC &&
-		XFS_DINODE_GOOD_VERSION(dip->di_core.di_version);
-	if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP,
-			XFS_RANDOM_ITOBP_INOTOBP))) {
-		XFS_CORRUPTION_ERROR("xfs_inotobp", XFS_ERRLEVEL_LOW, mp, dip);
-		xfs_trans_brelse(tp, bp);
-		cmn_err(CE_WARN,
-	"xfs_inotobp: XFS_TEST_ERROR()  returned an "
-	"error on %s.  Returning EFSCORRUPTED.",  mp->m_fsname);
-		return XFS_ERROR(EFSCORRUPTED);
-	}
-
-	xfs_inobp_check(mp, bp);
 
-	/*
-	 * Set *dipp to point to the on-disk inode in the buffer.
-	 */
 	*dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
 	*bpp = bp;
 	*offset = imap.im_boffset;
@@ -248,46 +278,21 @@ xfs_itobp(
 	xfs_dinode_t	**dipp,
 	xfs_buf_t	**bpp,
 	xfs_daddr_t	bno,
-	uint		imap_flags)
+	uint		imap_flags,
+	uint		buf_flags)
 {
 	xfs_imap_t	imap;
 	xfs_buf_t	*bp;
 	int		error;
-	int		i;
-	int		ni;
 
 	if (ip->i_blkno == (xfs_daddr_t)0) {
-		/*
-		 * Call the space management code to find the location of the
-		 * inode on disk.
-		 */
 		imap.im_blkno = bno;
-		if ((error = xfs_imap(mp, tp, ip->i_ino, &imap,
-					XFS_IMAP_LOOKUP | imap_flags)))
+		error = xfs_imap(mp, tp, ip->i_ino, &imap,
+					XFS_IMAP_LOOKUP | imap_flags);
+		if (error)
 			return error;
 
 		/*
-		 * If the inode number maps to a block outside the bounds
-		 * of the file system then return NULL rather than calling
-		 * read_buf and panicing when we get an error from the
-		 * driver.
-		 */
-		if ((imap.im_blkno + imap.im_len) >
-		    XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
-#ifdef DEBUG
-			xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: "
-					"(imap.im_blkno (0x%llx) "
-					"+ imap.im_len (0x%llx)) > "
-					" XFS_FSB_TO_BB(mp, "
-					"mp->m_sb.sb_dblocks) (0x%llx)",
-					(unsigned long long) imap.im_blkno,
-					(unsigned long long) imap.im_len,
-					XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
-#endif /* DEBUG */
-			return XFS_ERROR(EINVAL);
-		}
-
-		/*
 		 * Fill in the fields in the inode that will be used to
 		 * map the inode to its buffer from now on.
 		 */
@@ -305,76 +310,17 @@ xfs_itobp(
 	}
 	ASSERT(bno == 0 || bno == imap.im_blkno);
 
-	/*
-	 * Read in the buffer.  If tp is NULL, xfs_trans_read_buf() will
-	 * default to just a read_buf() call.
-	 */
-	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno,
-				   (int)imap.im_len, XFS_BUF_LOCK, &bp);
-	if (error) {
-#ifdef DEBUG
-		xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: "
-				"xfs_trans_read_buf() returned error %d, "
-				"imap.im_blkno 0x%llx, imap.im_len 0x%llx",
-				error, (unsigned long long) imap.im_blkno,
-				(unsigned long long) imap.im_len);
-#endif /* DEBUG */
+	error = xfs_imap_to_bp(mp, tp, &imap, &bp, buf_flags, imap_flags);
+	if (error)
 		return error;
-	}
 
-	/*
-	 * Validate the magic number and version of every inode in the buffer
-	 * (if DEBUG kernel) or the first inode in the buffer, otherwise.
-	 * No validation is done here in userspace (xfs_repair).
-	 */
-#if !defined(__KERNEL__)
-	ni = 0;
-#elif defined(DEBUG)
-	ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog;
-#else	/* usual case */
-	ni = 1;
-#endif
-
-	for (i = 0; i < ni; i++) {
-		int		di_ok;
-		xfs_dinode_t	*dip;
-
-		dip = (xfs_dinode_t *)xfs_buf_offset(bp,
-					(i << mp->m_sb.sb_inodelog));
-		di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC &&
-			    XFS_DINODE_GOOD_VERSION(dip->di_core.di_version);
-		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
-						XFS_ERRTAG_ITOBP_INOTOBP,
-						XFS_RANDOM_ITOBP_INOTOBP))) {
-			if (imap_flags & XFS_IMAP_BULKSTAT) {
-				xfs_trans_brelse(tp, bp);
-				return XFS_ERROR(EINVAL);
-			}
-#ifdef DEBUG
-			cmn_err(CE_ALERT,
-					"Device %s - bad inode magic/vsn "
-					"daddr %lld #%d (magic=%x)",
-				XFS_BUFTARG_NAME(mp->m_ddev_targp),
-				(unsigned long long)imap.im_blkno, i,
-				be16_to_cpu(dip->di_core.di_magic));
-#endif
-			XFS_CORRUPTION_ERROR("xfs_itobp", XFS_ERRLEVEL_HIGH,
-					     mp, dip);
-			xfs_trans_brelse(tp, bp);
-			return XFS_ERROR(EFSCORRUPTED);
-		}
+	if (!bp) {
+		ASSERT(buf_flags & XFS_BUF_TRYLOCK);
+		ASSERT(tp == NULL);
+		*bpp = NULL;
+		return EAGAIN;
 	}
 
-	xfs_inobp_check(mp, bp);
-
-	/*
-	 * Mark the buffer as an inode buffer now that it looks good
-	 */
-	XFS_BUF_SET_VTYPE(bp, B_FS_INO);
-
-	/*
-	 * Set *dipp to point to the on-disk inode in the buffer.
-	 */
 	*dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
 	*bpp = bp;
 	return 0;
@@ -878,7 +824,7 @@ xfs_iread(
 	 * return NULL as well.  Set i_blkno to 0 so that xfs_itobp() will
 	 * know that this is a new incore inode.
 	 */
-	error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags);
+	error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK);
 	if (error) {
 		kmem_zone_free(xfs_inode_zone, ip);
 		return error;
@@ -1147,7 +1093,7 @@ xfs_ialloc(
 	 * the inode version number now.  This way we only do the conversion
 	 * here rather than here and in the flush/logging code.
 	 */
-	if (XFS_SB_VERSION_HASNLINK(&tp->t_mountp->m_sb) &&
+	if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) &&
 	    ip->i_d.di_version == XFS_DINODE_VERSION_1) {
 		ip->i_d.di_version = XFS_DINODE_VERSION_2;
 		/*
@@ -1967,7 +1913,7 @@ xfs_iunlink(
 		 * Here we put the head pointer into our next pointer,
 		 * and then we fall through to point the head at us.
 		 */
-		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
+		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK);
 		if (error)
 			return error;
 
@@ -2075,7 +2021,7 @@ xfs_iunlink_remove(
 		 * of dealing with the buffer when there is no need to
 		 * change it.
 		 */
-		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
+		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK);
 		if (error) {
 			cmn_err(CE_WARN,
 				"xfs_iunlink_remove: xfs_itobp()  returned an error %d on %s.  Returning error.",
@@ -2137,7 +2083,7 @@ xfs_iunlink_remove(
 		 * Now last_ibp points to the buffer previous to us on
 		 * the unlinked list.  Pull us from the list.
 		 */
-		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
+		error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK);
 		if (error) {
 			cmn_err(CE_WARN,
 				"xfs_iunlink_remove: xfs_itobp()  returned an error %d on %s.  Returning error.",
@@ -2172,13 +2118,6 @@ xfs_iunlink_remove(
 	return 0;
 }
 
-STATIC_INLINE int xfs_inode_clean(xfs_inode_t *ip)
-{
-	return (((ip->i_itemp == NULL) ||
-		!(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
-		(ip->i_update_core == 0));
-}
-
 STATIC void
 xfs_ifree_cluster(
 	xfs_inode_t	*free_ip,
@@ -2400,7 +2339,7 @@ xfs_ifree(
 
 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 
-	error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0);
+	error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK);
 	if (error)
 		return error;
 
@@ -2678,14 +2617,31 @@ xfs_imap(
 	fsbno = imap->im_blkno ?
 		XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK;
 	error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags);
-	if (error != 0) {
+	if (error)
 		return error;
-	}
+
 	imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno);
 	imap->im_len = XFS_FSB_TO_BB(mp, len);
 	imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno);
 	imap->im_ioffset = (ushort)off;
 	imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog);
+
+	/*
+	 * If the inode number maps to a block outside the bounds
+	 * of the file system then return NULL rather than calling
+	 * read_buf and panicing when we get an error from the
+	 * driver.
+	 */
+	if ((imap->im_blkno + imap->im_len) >
+	    XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
+		xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
+			"(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > "
+			" XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)",
+			(unsigned long long) imap->im_blkno,
+			(unsigned long long) imap->im_len,
+			XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
+		return EINVAL;
+	}
 	return 0;
 }
 
@@ -2826,38 +2782,41 @@ xfs_iunpin(
 }
 
 /*
- * This is called to wait for the given inode to be unpinned.
- * It will sleep until this happens.  The caller must have the
- * inode locked in at least shared mode so that the buffer cannot
- * be subsequently pinned once someone is waiting for it to be
- * unpinned.
+ * This is called to unpin an inode. It can be directed to wait or to return
+ * immediately without waiting for the inode to be unpinned.  The caller must
+ * have the inode locked in at least shared mode so that the buffer cannot be
+ * subsequently pinned once someone is waiting for it to be unpinned.
  */
 STATIC void
-xfs_iunpin_wait(
-	xfs_inode_t	*ip)
+__xfs_iunpin_wait(
+	xfs_inode_t	*ip,
+	int		wait)
 {
-	xfs_inode_log_item_t	*iip;
-	xfs_lsn_t	lsn;
+	xfs_inode_log_item_t	*iip = ip->i_itemp;
 
 	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS));
-
-	if (atomic_read(&ip->i_pincount) == 0) {
+	if (atomic_read(&ip->i_pincount) == 0)
 		return;
-	}
 
-	iip = ip->i_itemp;
-	if (iip && iip->ili_last_lsn) {
-		lsn = iip->ili_last_lsn;
-	} else {
-		lsn = (xfs_lsn_t)0;
-	}
+	/* Give the log a push to start the unpinning I/O */
+	xfs_log_force(ip->i_mount, (iip && iip->ili_last_lsn) ?
+				iip->ili_last_lsn : 0, XFS_LOG_FORCE);
+	if (wait)
+		wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0));
+}
 
-	/*
-	 * Give the log a push so we don't wait here too long.
-	 */
-	xfs_log_force(ip->i_mount, lsn, XFS_LOG_FORCE);
+static inline void
+xfs_iunpin_wait(
+	xfs_inode_t	*ip)
+{
+	__xfs_iunpin_wait(ip, 1);
+}
 
-	wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0));
+static inline void
+xfs_iunpin_nowait(
+	xfs_inode_t	*ip)
+{
+	__xfs_iunpin_wait(ip, 0);
 }
 
 
@@ -3027,6 +2986,145 @@ xfs_iflush_fork(
 	return 0;
 }
 
+STATIC int
+xfs_iflush_cluster(
+	xfs_inode_t	*ip,
+	xfs_buf_t	*bp)
+{
+	xfs_mount_t		*mp = ip->i_mount;
+	xfs_perag_t		*pag = xfs_get_perag(mp, ip->i_ino);
+	unsigned long		first_index, mask;
+	int			ilist_size;
+	xfs_inode_t		**ilist;
+	xfs_inode_t		*iq;
+	int			nr_found;
+	int			clcount = 0;
+	int			bufwasdelwri;
+	int			i;
+
+	ASSERT(pag->pagi_inodeok);
+	ASSERT(pag->pag_ici_init);
+
+	ilist_size = XFS_INODE_CLUSTER_SIZE(mp) * sizeof(xfs_inode_t *);
+	ilist = kmem_alloc(ilist_size, KM_MAYFAIL);
+	if (!ilist)
+		return 0;
+
+	mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
+	first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
+	read_lock(&pag->pag_ici_lock);
+	/* really need a gang lookup range call here */
+	nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
+					first_index,
+					XFS_INODE_CLUSTER_SIZE(mp));
+	if (nr_found == 0)
+		goto out_free;
+
+	for (i = 0; i < nr_found; i++) {
+		iq = ilist[i];
+		if (iq == ip)
+			continue;
+		/* if the inode lies outside this cluster, we're done. */
+		if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index)
+			break;
+		/*
+		 * Do an un-protected check to see if the inode is dirty and
+		 * is a candidate for flushing.  These checks will be repeated
+		 * later after the appropriate locks are acquired.
+		 */
+		if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0)
+			continue;
+
+		/*
+		 * Try to get locks.  If any are unavailable or it is pinned,
+		 * then this inode cannot be flushed and is skipped.
+		 */
+
+		if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED))
+			continue;
+		if (!xfs_iflock_nowait(iq)) {
+			xfs_iunlock(iq, XFS_ILOCK_SHARED);
+			continue;
+		}
+		if (xfs_ipincount(iq)) {
+			xfs_ifunlock(iq);
+			xfs_iunlock(iq, XFS_ILOCK_SHARED);
+			continue;
+		}
+
+		/*
+		 * arriving here means that this inode can be flushed.  First
+		 * re-check that it's dirty before flushing.
+		 */
+		if (!xfs_inode_clean(iq)) {
+			int	error;
+			error = xfs_iflush_int(iq, bp);
+			if (error) {
+				xfs_iunlock(iq, XFS_ILOCK_SHARED);
+				goto cluster_corrupt_out;
+			}
+			clcount++;
+		} else {
+			xfs_ifunlock(iq);
+		}
+		xfs_iunlock(iq, XFS_ILOCK_SHARED);
+	}
+
+	if (clcount) {
+		XFS_STATS_INC(xs_icluster_flushcnt);
+		XFS_STATS_ADD(xs_icluster_flushinode, clcount);
+	}
+
+out_free:
+	read_unlock(&pag->pag_ici_lock);
+	kmem_free(ilist, ilist_size);
+	return 0;
+
+
+cluster_corrupt_out:
+	/*
+	 * Corruption detected in the clustering loop.  Invalidate the
+	 * inode buffer and shut down the filesystem.
+	 */
+	read_unlock(&pag->pag_ici_lock);
+	/*
+	 * Clean up the buffer.  If it was B_DELWRI, just release it --
+	 * brelse can handle it with no problems.  If not, shut down the
+	 * filesystem before releasing the buffer.
+	 */
+	bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp);
+	if (bufwasdelwri)
+		xfs_buf_relse(bp);
+
+	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+
+	if (!bufwasdelwri) {
+		/*
+		 * Just like incore_relse: if we have b_iodone functions,
+		 * mark the buffer as an error and call them.  Otherwise
+		 * mark it as stale and brelse.
+		 */
+		if (XFS_BUF_IODONE_FUNC(bp)) {
+			XFS_BUF_CLR_BDSTRAT_FUNC(bp);
+			XFS_BUF_UNDONE(bp);
+			XFS_BUF_STALE(bp);
+			XFS_BUF_SHUT(bp);
+			XFS_BUF_ERROR(bp,EIO);
+			xfs_biodone(bp);
+		} else {
+			XFS_BUF_STALE(bp);
+			xfs_buf_relse(bp);
+		}
+	}
+
+	/*
+	 * Unlocks the flush lock
+	 */
+	xfs_iflush_abort(iq);
+	kmem_free(ilist, ilist_size);
+	return XFS_ERROR(EFSCORRUPTED);
+}
+
 /*
  * xfs_iflush() will write a modified inode's changes out to the
  * inode's on disk home.  The caller must have the inode lock held
@@ -3046,11 +3144,7 @@ xfs_iflush(
 	xfs_dinode_t		*dip;
 	xfs_mount_t		*mp;
 	int			error;
-	/* REFERENCED */
-	xfs_inode_t		*iq;
-	int			clcount;	/* count of inodes clustered */
-	int			bufwasdelwri;
-	struct hlist_node	*entry;
+	int			noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK);
 	enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) };
 
 	XFS_STATS_INC(xs_iflush_count);
@@ -3067,8 +3161,7 @@ xfs_iflush(
 	 * If the inode isn't dirty, then just release the inode
 	 * flush lock and do nothing.
 	 */
-	if ((ip->i_update_core == 0) &&
-	    ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) {
+	if (xfs_inode_clean(ip)) {
 		ASSERT((iip != NULL) ?
 			 !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1);
 		xfs_ifunlock(ip);
@@ -3076,11 +3169,21 @@ xfs_iflush(
 	}
 
 	/*
-	 * We can't flush the inode until it is unpinned, so
-	 * wait for it.  We know noone new can pin it, because
-	 * we are holding the inode lock shared and you need
-	 * to hold it exclusively to pin the inode.
+	 * We can't flush the inode until it is unpinned, so wait for it if we
+	 * are allowed to block.  We know noone new can pin it, because we are
+	 * holding the inode lock shared and you need to hold it exclusively to
+	 * pin the inode.
+	 *
+	 * If we are not allowed to block, force the log out asynchronously so
+	 * that when we come back the inode will be unpinned. If other inodes
+	 * in the same cluster are dirty, they will probably write the inode
+	 * out for us if they occur after the log force completes.
 	 */
+	if (noblock && xfs_ipincount(ip)) {
+		xfs_iunpin_nowait(ip);
+		xfs_ifunlock(ip);
+		return EAGAIN;
+	}
 	xfs_iunpin_wait(ip);
 
 	/*
@@ -3097,15 +3200,6 @@ xfs_iflush(
 	}
 
 	/*
-	 * Get the buffer containing the on-disk inode.
-	 */
-	error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0);
-	if (error) {
-		xfs_ifunlock(ip);
-		return error;
-	}
-
-	/*
 	 * Decide how buffer will be flushed out.  This is done before
 	 * the call to xfs_iflush_int because this field is zeroed by it.
 	 */
@@ -3121,6 +3215,7 @@ xfs_iflush(
 		case XFS_IFLUSH_DELWRI_ELSE_SYNC:
 			flags = 0;
 			break;
+		case XFS_IFLUSH_ASYNC_NOBLOCK:
 		case XFS_IFLUSH_ASYNC:
 		case XFS_IFLUSH_DELWRI_ELSE_ASYNC:
 			flags = INT_ASYNC;
@@ -3140,6 +3235,7 @@ xfs_iflush(
 		case XFS_IFLUSH_DELWRI:
 			flags = INT_DELWRI;
 			break;
+		case XFS_IFLUSH_ASYNC_NOBLOCK:
 		case XFS_IFLUSH_ASYNC:
 			flags = INT_ASYNC;
 			break;
@@ -3154,89 +3250,36 @@ xfs_iflush(
 	}
 
 	/*
-	 * First flush out the inode that xfs_iflush was called with.
+	 * Get the buffer containing the on-disk inode.
 	 */
-	error = xfs_iflush_int(ip, bp);
-	if (error) {
-		goto corrupt_out;
+	error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0,
+				noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK);
+	if (error || !bp) {
+		xfs_ifunlock(ip);
+		return error;
 	}
 
 	/*
-	 * inode clustering:
-	 * see if other inodes can be gathered into this write
+	 * First flush out the inode that xfs_iflush was called with.
 	 */
-	spin_lock(&ip->i_cluster->icl_lock);
-	ip->i_cluster->icl_buf = bp;
-
-	clcount = 0;
-	hlist_for_each_entry(iq, entry, &ip->i_cluster->icl_inodes, i_cnode) {
-		if (iq == ip)
-			continue;
-
-		/*
-		 * Do an un-protected check to see if the inode is dirty and
-		 * is a candidate for flushing.  These checks will be repeated
-		 * later after the appropriate locks are acquired.
-		 */
-		iip = iq->i_itemp;
-		if ((iq->i_update_core == 0) &&
-		    ((iip == NULL) ||
-		     !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
-		      xfs_ipincount(iq) == 0) {
-			continue;
-		}
-
-		/*
-		 * Try to get locks.  If any are unavailable,
-		 * then this inode cannot be flushed and is skipped.
-		 */
-
-		/* get inode locks (just i_lock) */
-		if (xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) {
-			/* get inode flush lock */
-			if (xfs_iflock_nowait(iq)) {
-				/* check if pinned */
-				if (xfs_ipincount(iq) == 0) {
-					/* arriving here means that
-					 * this inode can be flushed.
-					 * first re-check that it's
-					 * dirty
-					 */
-					iip = iq->i_itemp;
-					if ((iq->i_update_core != 0)||
-					    ((iip != NULL) &&
-					     (iip->ili_format.ilf_fields & XFS_ILOG_ALL))) {
-						clcount++;
-						error = xfs_iflush_int(iq, bp);
-						if (error) {
-							xfs_iunlock(iq,
-								    XFS_ILOCK_SHARED);
-							goto cluster_corrupt_out;
-						}
-					} else {
-						xfs_ifunlock(iq);
-					}
-				} else {
-					xfs_ifunlock(iq);
-				}
-			}
-			xfs_iunlock(iq, XFS_ILOCK_SHARED);
-		}
-	}
-	spin_unlock(&ip->i_cluster->icl_lock);
-
-	if (clcount) {
-		XFS_STATS_INC(xs_icluster_flushcnt);
-		XFS_STATS_ADD(xs_icluster_flushinode, clcount);
-	}
+	error = xfs_iflush_int(ip, bp);
+	if (error)
+		goto corrupt_out;
 
 	/*
-	 * If the buffer is pinned then push on the log so we won't
+	 * If the buffer is pinned then push on the log now so we won't
 	 * get stuck waiting in the write for too long.
 	 */
-	if (XFS_BUF_ISPINNED(bp)){
+	if (XFS_BUF_ISPINNED(bp))
 		xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
-	}
+
+	/*
+	 * inode clustering:
+	 * see if other inodes can be gathered into this write
+	 */
+	error = xfs_iflush_cluster(ip, bp);
+	if (error)
+		goto cluster_corrupt_out;
 
 	if (flags & INT_DELWRI) {
 		xfs_bdwrite(mp, bp);
@@ -3250,52 +3293,11 @@ xfs_iflush(
 corrupt_out:
 	xfs_buf_relse(bp);
 	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-	xfs_iflush_abort(ip);
-	/*
-	 * Unlocks the flush lock
-	 */
-	return XFS_ERROR(EFSCORRUPTED);
-
 cluster_corrupt_out:
-	/* Corruption detected in the clustering loop.  Invalidate the
-	 * inode buffer and shut down the filesystem.
-	 */
-	spin_unlock(&ip->i_cluster->icl_lock);
-
-	/*
-	 * Clean up the buffer.  If it was B_DELWRI, just release it --
-	 * brelse can handle it with no problems.  If not, shut down the
-	 * filesystem before releasing the buffer.
-	 */
-	if ((bufwasdelwri= XFS_BUF_ISDELAYWRITE(bp))) {
-		xfs_buf_relse(bp);
-	}
-
-	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-
-	if(!bufwasdelwri)  {
-		/*
-		 * Just like incore_relse: if we have b_iodone functions,
-		 * mark the buffer as an error and call them.  Otherwise
-		 * mark it as stale and brelse.
-		 */
-		if (XFS_BUF_IODONE_FUNC(bp)) {
-			XFS_BUF_CLR_BDSTRAT_FUNC(bp);
-			XFS_BUF_UNDONE(bp);
-			XFS_BUF_STALE(bp);
-			XFS_BUF_SHUT(bp);
-			XFS_BUF_ERROR(bp,EIO);
-			xfs_biodone(bp);
-		} else {
-			XFS_BUF_STALE(bp);
-			xfs_buf_relse(bp);
-		}
-	}
-
-	xfs_iflush_abort(iq);
 	/*
 	 * Unlocks the flush lock
 	 */
+	xfs_iflush_abort(ip);
 	return XFS_ERROR(EFSCORRUPTED);
 }
 
@@ -3325,8 +3327,7 @@ xfs_iflush_int(
 	 * If the inode isn't dirty, then just release the inode
 	 * flush lock and do nothing.
 	 */
-	if ((ip->i_update_core == 0) &&
-	    ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) {
+	if (xfs_inode_clean(ip)) {
 		xfs_ifunlock(ip);
 		return 0;
 	}
@@ -3434,9 +3435,9 @@ xfs_iflush_int(
 	 * has been updated, then make the conversion permanent.
 	 */
 	ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 ||
-	       XFS_SB_VERSION_HASNLINK(&mp->m_sb));
+	       xfs_sb_version_hasnlink(&mp->m_sb));
 	if (ip->i_d.di_version == XFS_DINODE_VERSION_1) {
-		if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) {
+		if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
 			/*
 			 * Convert it back.
 			 */
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index bfcd72c..93c3769 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -133,19 +133,6 @@ typedef struct dm_attrs_s {
 } dm_attrs_t;
 
 /*
- * This is the xfs inode cluster structure.  This structure is used by
- * xfs_iflush to find inodes that share a cluster and can be flushed to disk at
- * the same time.
- */
-typedef struct xfs_icluster {
-	struct hlist_head	icl_inodes;	/* list of inodes on cluster */
-	xfs_daddr_t		icl_blkno;	/* starting block number of
-						 * the cluster */
-	struct xfs_buf		*icl_buf;	/* the inode buffer */
-	spinlock_t		icl_lock;	/* inode list lock */
-} xfs_icluster_t;
-
-/*
  * This is the xfs in-core inode structure.
  * Most of the on-disk inode is embedded in the i_d field.
  *
@@ -240,10 +227,6 @@ typedef struct xfs_inode {
 	atomic_t		i_pincount;	/* inode pin count */
 	wait_queue_head_t	i_ipin_wait;	/* inode pinning wait queue */
 	spinlock_t		i_flags_lock;	/* inode i_flags lock */
-#ifdef HAVE_REFCACHE
-	struct xfs_inode	**i_refcache;	/* ptr to entry in ref cache */
-	struct xfs_inode	*i_release;	/* inode to unref */
-#endif
 	/* Miscellaneous state. */
 	unsigned short		i_flags;	/* see defined flags below */
 	unsigned char		i_update_core;	/* timestamps/size is dirty */
@@ -252,8 +235,6 @@ typedef struct xfs_inode {
 	unsigned int		i_delayed_blks;	/* count of delay alloc blks */
 
 	xfs_icdinode_t		i_d;		/* most of ondisk inode */
-	xfs_icluster_t		*i_cluster;	/* cluster list header */
-	struct hlist_node	i_cnode;	/* cluster link node */
 
 	xfs_fsize_t		i_size;		/* in-memory size */
 	xfs_fsize_t		i_new_size;	/* size when write completes */
@@ -461,6 +442,7 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
 #define	XFS_IFLUSH_SYNC			3
 #define	XFS_IFLUSH_ASYNC		4
 #define	XFS_IFLUSH_DELWRI		5
+#define	XFS_IFLUSH_ASYNC_NOBLOCK	6
 
 /*
  * Flags for xfs_itruncate_start().
@@ -515,7 +497,7 @@ int		xfs_finish_reclaim_all(struct xfs_mount *, int);
  */
 int		xfs_itobp(struct xfs_mount *, struct xfs_trans *,
 			  xfs_inode_t *, struct xfs_dinode **, struct xfs_buf **,
-			  xfs_daddr_t, uint);
+			  xfs_daddr_t, uint, uint);
 int		xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
 			  xfs_inode_t **, xfs_daddr_t, uint);
 int		xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int);
@@ -597,7 +579,6 @@ void		xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
 #define	xfs_inobp_check(mp, bp)
 #endif /* DEBUG */
 
-extern struct kmem_zone	*xfs_icluster_zone;
 extern struct kmem_zone	*xfs_ifork_zone;
 extern struct kmem_zone	*xfs_inode_zone;
 extern struct kmem_zone	*xfs_ili_zone;
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 034ca72..2c775b4 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -296,9 +296,9 @@ xfs_inode_item_format(
 	 */
 	mp = ip->i_mount;
 	ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 ||
-	       XFS_SB_VERSION_HASNLINK(&mp->m_sb));
+	       xfs_sb_version_hasnlink(&mp->m_sb));
 	if (ip->i_d.di_version == XFS_DINODE_VERSION_1) {
-		if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) {
+		if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
 			/*
 			 * Convert it back.
 			 */
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index bfe92ea..4051307 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -168,6 +168,14 @@ static inline int xfs_ilog_fext(int w)
 	return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT);
 }
 
+static inline int xfs_inode_clean(xfs_inode_t *ip)
+{
+	return (!ip->i_itemp ||
+		!(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
+	       !ip->i_update_core;
+}
+
+
 #ifdef __KERNEL__
 
 extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 658aab6..45d8776 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -45,7 +45,7 @@ xfs_internal_inum(
 	xfs_ino_t	ino)
 {
 	return (ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino ||
-		(XFS_SB_VERSION_HASQUOTA(&mp->m_sb) &&
+		(xfs_sb_version_hasquota(&mp->m_sb) &&
 		 (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino)));
 }
 
@@ -614,7 +614,8 @@ xfs_bulkstat(
 							xfs_buf_relse(bp);
 						error = xfs_itobp(mp, NULL, ip,
 								&dip, &bp, bno,
-								XFS_IMAP_BULKSTAT);
+								XFS_IMAP_BULKSTAT,
+								XFS_BUF_LOCK);
 						if (!error)
 							clustidx = ip->i_boffset / mp->m_sb.sb_inodesize;
 						kmem_zone_free(xfs_inode_zone, ip);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index a75edca..1fa9809 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -675,7 +675,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
 
 		spin_lock(&log->l_icloglock);
 		iclog = log->l_iclog;
-		iclog->ic_refcnt++;
+		atomic_inc(&iclog->ic_refcnt);
 		spin_unlock(&log->l_icloglock);
 		xlog_state_want_sync(log, iclog);
 		(void) xlog_state_release_iclog(log, iclog);
@@ -713,7 +713,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
 		 */
 		spin_lock(&log->l_icloglock);
 		iclog = log->l_iclog;
-		iclog->ic_refcnt++;
+		atomic_inc(&iclog->ic_refcnt);
 		spin_unlock(&log->l_icloglock);
 
 		xlog_state_want_sync(log, iclog);
@@ -1090,7 +1090,7 @@ xlog_get_iclog_buffer_size(xfs_mount_t	*mp,
 			size >>= 1;
 		}
 
-		if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) {
+		if (xfs_sb_version_haslogv2(&mp->m_sb)) {
 			/* # headers = size / 32K
 			 * one header holds cycles from 32K of data
 			 */
@@ -1186,13 +1186,13 @@ xlog_alloc_log(xfs_mount_t	*mp,
 	log->l_grant_reserve_cycle = 1;
 	log->l_grant_write_cycle = 1;
 
-	if (XFS_SB_VERSION_HASSECTOR(&mp->m_sb)) {
+	if (xfs_sb_version_hassector(&mp->m_sb)) {
 		log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT;
 		ASSERT(log->l_sectbb_log <= mp->m_sectbb_log);
 		/* for larger sector sizes, must have v2 or external log */
 		ASSERT(log->l_sectbb_log == 0 ||
 			log->l_logBBstart == 0 ||
-			XFS_SB_VERSION_HASLOGV2(&mp->m_sb));
+			xfs_sb_version_haslogv2(&mp->m_sb));
 		ASSERT(mp->m_sb.sb_logsectlog >= BBSHIFT);
 	}
 	log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1;
@@ -1247,7 +1247,7 @@ xlog_alloc_log(xfs_mount_t	*mp,
 		memset(head, 0, sizeof(xlog_rec_header_t));
 		head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
 		head->h_version = cpu_to_be32(
-			XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? 2 : 1);
+			xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1);
 		head->h_size = cpu_to_be32(log->l_iclog_size);
 		/* new fields */
 		head->h_fmt = cpu_to_be32(XLOG_FMT);
@@ -1402,10 +1402,10 @@ xlog_sync(xlog_t		*log,
 	int		roundoff;       /* roundoff to BB or stripe */
 	int		split = 0;	/* split write into two regions */
 	int		error;
-	int		v2 = XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb);
+	int		v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb);
 
 	XFS_STATS_INC(xs_log_writes);
-	ASSERT(iclog->ic_refcnt == 0);
+	ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
 
 	/* Add for LR header */
 	count_init = log->l_iclog_hsize + iclog->ic_offset;
@@ -2309,7 +2309,7 @@ xlog_state_done_syncing(
 
 	ASSERT(iclog->ic_state == XLOG_STATE_SYNCING ||
 	       iclog->ic_state == XLOG_STATE_IOERROR);
-	ASSERT(iclog->ic_refcnt == 0);
+	ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
 	ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2);
 
 
@@ -2391,7 +2391,7 @@ restart:
 	ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
 	head = &iclog->ic_header;
 
-	iclog->ic_refcnt++;			/* prevents sync */
+	atomic_inc(&iclog->ic_refcnt);	/* prevents sync */
 	log_offset = iclog->ic_offset;
 
 	/* On the 1st write to an iclog, figure out lsn.  This works
@@ -2423,12 +2423,12 @@ restart:
 		xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
 
 		/* If I'm the only one writing to this iclog, sync it to disk */
-		if (iclog->ic_refcnt == 1) {
+		if (atomic_read(&iclog->ic_refcnt) == 1) {
 			spin_unlock(&log->l_icloglock);
 			if ((error = xlog_state_release_iclog(log, iclog)))
 				return error;
 		} else {
-			iclog->ic_refcnt--;
+			atomic_dec(&iclog->ic_refcnt);
 			spin_unlock(&log->l_icloglock);
 		}
 		goto restart;
@@ -2813,33 +2813,35 @@ xlog_state_put_ticket(xlog_t	    *log,
  *
  */
 STATIC int
-xlog_state_release_iclog(xlog_t		*log,
-			 xlog_in_core_t	*iclog)
+xlog_state_release_iclog(
+	xlog_t		*log,
+	xlog_in_core_t	*iclog)
 {
 	int		sync = 0;	/* do we sync? */
 
-	xlog_assign_tail_lsn(log->l_mp);
+	if (iclog->ic_state & XLOG_STATE_IOERROR)
+		return XFS_ERROR(EIO);
 
-	spin_lock(&log->l_icloglock);
+	ASSERT(atomic_read(&iclog->ic_refcnt) > 0);
+	if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock))
+		return 0;
 
 	if (iclog->ic_state & XLOG_STATE_IOERROR) {
 		spin_unlock(&log->l_icloglock);
 		return XFS_ERROR(EIO);
 	}
-
-	ASSERT(iclog->ic_refcnt > 0);
 	ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE ||
 	       iclog->ic_state == XLOG_STATE_WANT_SYNC);
 
-	if (--iclog->ic_refcnt == 0 &&
-	    iclog->ic_state == XLOG_STATE_WANT_SYNC) {
+	if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
+		/* update tail before writing to iclog */
+		xlog_assign_tail_lsn(log->l_mp);
 		sync++;
 		iclog->ic_state = XLOG_STATE_SYNCING;
 		iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn);
 		xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn);
 		/* cycle incremented when incrementing curr_block */
 	}
-
 	spin_unlock(&log->l_icloglock);
 
 	/*
@@ -2849,11 +2851,9 @@ xlog_state_release_iclog(xlog_t		*log,
 	 * this iclog has consistent data, so we ignore IOERROR
 	 * flags after this point.
 	 */
-	if (sync) {
+	if (sync)
 		return xlog_sync(log, iclog);
-	}
 	return 0;
-
 }	/* xlog_state_release_iclog */
 
 
@@ -2881,7 +2881,7 @@ xlog_state_switch_iclogs(xlog_t		*log,
 	log->l_curr_block += BTOBB(eventual_size)+BTOBB(log->l_iclog_hsize);
 
 	/* Round up to next log-sunit */
-	if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) &&
+	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
 	    log->l_mp->m_sb.sb_logsunit > 1) {
 		__uint32_t sunit_bb = BTOBB(log->l_mp->m_sb.sb_logsunit);
 		log->l_curr_block = roundup(log->l_curr_block, sunit_bb);
@@ -2953,7 +2953,8 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
 		 * previous iclog and go to sleep.
 		 */
 		if (iclog->ic_state == XLOG_STATE_DIRTY ||
-		    (iclog->ic_refcnt == 0 && iclog->ic_offset == 0)) {
+		    (atomic_read(&iclog->ic_refcnt) == 0
+		     && iclog->ic_offset == 0)) {
 			iclog = iclog->ic_prev;
 			if (iclog->ic_state == XLOG_STATE_ACTIVE ||
 			    iclog->ic_state == XLOG_STATE_DIRTY)
@@ -2961,14 +2962,14 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
 			else
 				goto maybe_sleep;
 		} else {
-			if (iclog->ic_refcnt == 0) {
+			if (atomic_read(&iclog->ic_refcnt) == 0) {
 				/* We are the only one with access to this
 				 * iclog.  Flush it out now.  There should
 				 * be a roundoff of zero to show that someone
 				 * has already taken care of the roundoff from
 				 * the previous sync.
 				 */
-				iclog->ic_refcnt++;
+				atomic_inc(&iclog->ic_refcnt);
 				lsn = be64_to_cpu(iclog->ic_header.h_lsn);
 				xlog_state_switch_iclogs(log, iclog, 0);
 				spin_unlock(&log->l_icloglock);
@@ -3100,7 +3101,7 @@ try_again:
 			already_slept = 1;
 			goto try_again;
 		} else {
-			iclog->ic_refcnt++;
+			atomic_inc(&iclog->ic_refcnt);
 			xlog_state_switch_iclogs(log, iclog, 0);
 			spin_unlock(&log->l_icloglock);
 			if (xlog_state_release_iclog(log, iclog))
@@ -3334,7 +3335,7 @@ xlog_ticket_get(xlog_t		*log,
 	unit_bytes += sizeof(xlog_op_header_t) * num_headers;
 
 	/* for roundoff padding for transaction data and one for commit record */
-	if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) &&
+	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
 	    log->l_mp->m_sb.sb_logsunit > 1) {
 		/* log su roundoff */
 		unit_bytes += 2*log->l_mp->m_sb.sb_logsunit;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index e008233..01c63db 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -49,10 +49,10 @@ struct xfs_mount;
 #define XLOG_HEADER_SIZE	512
 
 #define XLOG_REC_SHIFT(log) \
-	BTOBB(1 << (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? \
+	BTOBB(1 << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \
 	 XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT))
 #define XLOG_TOTAL_REC_SHIFT(log) \
-	BTOBB(XLOG_MAX_ICLOGS << (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? \
+	BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \
 	 XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT))
 
 
@@ -339,7 +339,7 @@ typedef struct xlog_iclog_fields {
 #endif
 	int			ic_size;
 	int			ic_offset;
-	int			ic_refcnt;
+	atomic_t		ic_refcnt;
 	int			ic_bwritecnt;
 	ushort_t		ic_state;
 	char			*ic_datap;	/* pointer to iclog data */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index b82d5d4..cd24711 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -478,7 +478,7 @@ xlog_find_verify_log_record(
 	 * reset last_blk.  Only when last_blk points in the middle of a log
 	 * record do we update last_blk.
 	 */
-	if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
+	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
 		uint	h_size = be32_to_cpu(head->h_size);
 
 		xhdrs = h_size / XLOG_HEADER_CYCLE_SIZE;
@@ -888,7 +888,7 @@ xlog_find_tail(
 	 * unmount record if there is one, so we pass the lsn of the
 	 * unmount record rather than the block after it.
 	 */
-	if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
+	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
 		int	h_size = be32_to_cpu(rhead->h_size);
 		int	h_version = be32_to_cpu(rhead->h_version);
 
@@ -1101,7 +1101,7 @@ xlog_add_record(
 	recp->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
 	recp->h_cycle = cpu_to_be32(cycle);
 	recp->h_version = cpu_to_be32(
-			XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? 2 : 1);
+			xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1);
 	recp->h_lsn = cpu_to_be64(xlog_assign_lsn(cycle, block));
 	recp->h_tail_lsn = cpu_to_be64(xlog_assign_lsn(tail_cycle, tail_block));
 	recp->h_fmt = cpu_to_be32(XLOG_FMT);
@@ -3214,7 +3214,8 @@ xlog_recover_process_iunlinks(
 					 * next inode in the bucket.
 					 */
 					error = xfs_itobp(mp, NULL, ip, &dip,
-							&ibp, 0, 0);
+							&ibp, 0, 0,
+							XFS_BUF_LOCK);
 					ASSERT(error || (dip != NULL));
 				}
 
@@ -3348,7 +3349,7 @@ xlog_pack_data(
 		dp += BBSIZE;
 	}
 
-	if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
+	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
 		xhdr = (xlog_in_core_2_t *)&iclog->ic_header;
 		for ( ; i < BTOBB(size); i++) {
 			j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
@@ -3388,7 +3389,7 @@ xlog_unpack_data_checksum(
 			    be32_to_cpu(rhead->h_chksum), chksum);
 		    cmn_err(CE_DEBUG,
 "XFS: Disregard message if filesystem was created with non-DEBUG kernel");
-		    if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
+		    if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
 			    cmn_err(CE_DEBUG,
 				"XFS: LogR this is a LogV2 filesystem\n");
 		    }
@@ -3415,7 +3416,7 @@ xlog_unpack_data(
 		dp += BBSIZE;
 	}
 
-	if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
+	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
 		xhdr = (xlog_in_core_2_t *)rhead;
 		for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) {
 			j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
@@ -3494,7 +3495,7 @@ xlog_do_recovery_pass(
 	 * Read the header of the tail block and get the iclog buffer size from
 	 * h_size.  Use this to tell how many sectors make up the log header.
 	 */
-	if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb)) {
+	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
 		/*
 		 * When using variable length iclogs, read first sector of
 		 * iclog header and extract the header size from it.  Get a
@@ -3838,7 +3839,7 @@ xlog_do_recover(
 	sbp = &log->l_mp->m_sb;
 	xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
 	ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC);
-	ASSERT(XFS_SB_GOOD_VERSION(sbp));
+	ASSERT(xfs_sb_good_version(sbp));
 	xfs_buf_relse(bp);
 
 	/* We've re-read the superblock so re-initialize per-cpu counters */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 6409b37..5be0328 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -44,7 +44,7 @@
 #include "xfs_quota.h"
 #include "xfs_fsops.h"
 
-STATIC void	xfs_mount_log_sbunit(xfs_mount_t *, __int64_t);
+STATIC void	xfs_mount_log_sb(xfs_mount_t *, __int64_t);
 STATIC int	xfs_uuid_mount(xfs_mount_t *);
 STATIC void	xfs_uuid_unmount(xfs_mount_t *mp);
 STATIC void	xfs_unmountfs_wait(xfs_mount_t *);
@@ -119,6 +119,7 @@ static const struct {
     { offsetof(xfs_sb_t, sb_logsectsize),0 },
     { offsetof(xfs_sb_t, sb_logsunit),	 0 },
     { offsetof(xfs_sb_t, sb_features2),	 0 },
+    { offsetof(xfs_sb_t, sb_bad_features2), 0 },
     { sizeof(xfs_sb_t),			 0 }
 };
 
@@ -225,7 +226,7 @@ xfs_mount_validate_sb(
 		return XFS_ERROR(EWRONGFS);
 	}
 
-	if (!XFS_SB_GOOD_VERSION(sbp)) {
+	if (!xfs_sb_good_version(sbp)) {
 		xfs_fs_mount_cmn_err(flags, "bad version");
 		return XFS_ERROR(EWRONGFS);
 	}
@@ -300,7 +301,7 @@ xfs_mount_validate_sb(
 	/*
 	 * Version 1 directory format has never worked on Linux.
 	 */
-	if (unlikely(!XFS_SB_VERSION_HASDIRV2(sbp))) {
+	if (unlikely(!xfs_sb_version_hasdirv2(sbp))) {
 		xfs_fs_mount_cmn_err(flags,
 			"file system using version 1 directory format");
 		return XFS_ERROR(ENOSYS);
@@ -449,6 +450,7 @@ xfs_sb_from_disk(
 	to->sb_logsectsize = be16_to_cpu(from->sb_logsectsize);
 	to->sb_logsunit = be32_to_cpu(from->sb_logsunit);
 	to->sb_features2 = be32_to_cpu(from->sb_features2);
+	to->sb_bad_features2 = be32_to_cpu(from->sb_bad_features2);
 }
 
 /*
@@ -781,7 +783,7 @@ xfs_update_alignment(xfs_mount_t *mp, int mfsi_flags, __uint64_t *update_flags)
 		 * Update superblock with new values
 		 * and log changes
 		 */
-		if (XFS_SB_VERSION_HASDALIGN(sbp)) {
+		if (xfs_sb_version_hasdalign(sbp)) {
 			if (sbp->sb_unit != mp->m_dalign) {
 				sbp->sb_unit = mp->m_dalign;
 				*update_flags |= XFS_SB_UNIT;
@@ -792,7 +794,7 @@ xfs_update_alignment(xfs_mount_t *mp, int mfsi_flags, __uint64_t *update_flags)
 			}
 		}
 	} else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
-		    XFS_SB_VERSION_HASDALIGN(&mp->m_sb)) {
+		    xfs_sb_version_hasdalign(&mp->m_sb)) {
 			mp->m_dalign = sbp->sb_unit;
 			mp->m_swidth = sbp->sb_width;
 	}
@@ -869,7 +871,7 @@ xfs_set_rw_sizes(xfs_mount_t *mp)
 STATIC void
 xfs_set_inoalignment(xfs_mount_t *mp)
 {
-	if (XFS_SB_VERSION_HASALIGN(&mp->m_sb) &&
+	if (xfs_sb_version_hasalign(&mp->m_sb) &&
 	    mp->m_sb.sb_inoalignmt >=
 	    XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
 		mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
@@ -962,14 +964,29 @@ xfs_mountfs(
 	int		uuid_mounted = 0;
 	int		error = 0;
 
-	if (mp->m_sb_bp == NULL) {
-		error = xfs_readsb(mp, mfsi_flags);
-		if (error)
-			return error;
-	}
 	xfs_mount_common(mp, sbp);
 
 	/*
+	 * Check for a bad features2 field alignment. This happened on
+	 * some platforms due to xfs_sb_t not being 64bit size aligned
+	 * when sb_features was added and hence the compiler put it in
+	 * the wrong place.
+	 *
+	 * If we detect a bad field, we or the set bits into the existing
+	 * features2 field in case it has already been modified and we
+	 * don't want to lose any features. Zero the bad one and mark
+	 * the two fields as needing updates once the transaction subsystem
+	 * is online.
+	 */
+	if (xfs_sb_has_bad_features2(sbp)) {
+		cmn_err(CE_WARN,
+			"XFS: correcting sb_features alignment problem");
+		sbp->sb_features2 |= sbp->sb_bad_features2;
+		sbp->sb_bad_features2 = 0;
+		update_flags |= XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2;
+	}
+
+	/*
 	 * Check if sb_agblocks is aligned at stripe boundary
 	 * If sb_agblocks is NOT aligned turn off m_dalign since
 	 * allocator alignment is within an ag, therefore ag has
@@ -1159,11 +1176,10 @@ xfs_mountfs(
 	}
 
 	/*
-	 * If fs is not mounted readonly, then update the superblock
-	 * unit and width changes.
+	 * If fs is not mounted readonly, then update the superblock changes.
 	 */
 	if (update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY))
-		xfs_mount_log_sbunit(mp, update_flags);
+		xfs_mount_log_sb(mp, update_flags);
 
 	/*
 	 * Initialise the XFS quota management subsystem for this mount
@@ -1878,13 +1894,14 @@ xfs_uuid_unmount(
  * be altered by the mount options. Only the first superblock is updated.
  */
 STATIC void
-xfs_mount_log_sbunit(
+xfs_mount_log_sb(
 	xfs_mount_t	*mp,
 	__int64_t	fields)
 {
 	xfs_trans_t	*tp;
 
-	ASSERT(fields & (XFS_SB_UNIT|XFS_SB_WIDTH|XFS_SB_UUID));
+	ASSERT(fields & (XFS_SB_UNIT | XFS_SB_WIDTH | XFS_SB_UUID |
+			 XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2));
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
 	if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1d8a472..7b37fa0 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -66,17 +66,17 @@ struct xfs_mru_cache;
  * Prototypes and functions for the Data Migration subsystem.
  */
 
-typedef int	(*xfs_send_data_t)(int, bhv_vnode_t *,
-			xfs_off_t, size_t, int, bhv_vrwlock_t *);
+typedef int	(*xfs_send_data_t)(int, struct xfs_inode *,
+			xfs_off_t, size_t, int, int *);
 typedef int	(*xfs_send_mmap_t)(struct vm_area_struct *, uint);
-typedef int	(*xfs_send_destroy_t)(bhv_vnode_t *, dm_right_t);
+typedef int	(*xfs_send_destroy_t)(struct xfs_inode *, dm_right_t);
 typedef int	(*xfs_send_namesp_t)(dm_eventtype_t, struct xfs_mount *,
-			bhv_vnode_t *,
-			dm_right_t, bhv_vnode_t *, dm_right_t,
+			struct xfs_inode *, dm_right_t,
+			struct xfs_inode *, dm_right_t,
 			char *, char *, mode_t, int, int);
 typedef int	(*xfs_send_mount_t)(struct xfs_mount *, dm_right_t,
 			char *, char *);
-typedef void	(*xfs_send_unmount_t)(struct xfs_mount *, bhv_vnode_t *,
+typedef void	(*xfs_send_unmount_t)(struct xfs_mount *, struct xfs_inode *,
 			dm_right_t, mode_t, int, int);
 
 typedef struct xfs_dmops {
@@ -88,20 +88,20 @@ typedef struct xfs_dmops {
 	xfs_send_unmount_t	xfs_send_unmount;
 } xfs_dmops_t;
 
-#define XFS_SEND_DATA(mp, ev,vp,off,len,fl,lock) \
-	(*(mp)->m_dm_ops->xfs_send_data)(ev,vp,off,len,fl,lock)
+#define XFS_SEND_DATA(mp, ev,ip,off,len,fl,lock) \
+	(*(mp)->m_dm_ops->xfs_send_data)(ev,ip,off,len,fl,lock)
 #define XFS_SEND_MMAP(mp, vma,fl) \
 	(*(mp)->m_dm_ops->xfs_send_mmap)(vma,fl)
-#define XFS_SEND_DESTROY(mp, vp,right) \
-	(*(mp)->m_dm_ops->xfs_send_destroy)(vp,right)
+#define XFS_SEND_DESTROY(mp, ip,right) \
+	(*(mp)->m_dm_ops->xfs_send_destroy)(ip,right)
 #define XFS_SEND_NAMESP(mp, ev,b1,r1,b2,r2,n1,n2,mode,rval,fl) \
 	(*(mp)->m_dm_ops->xfs_send_namesp)(ev,NULL,b1,r1,b2,r2,n1,n2,mode,rval,fl)
 #define XFS_SEND_PREUNMOUNT(mp,b1,r1,b2,r2,n1,n2,mode,rval,fl) \
 	(*(mp)->m_dm_ops->xfs_send_namesp)(DM_EVENT_PREUNMOUNT,mp,b1,r1,b2,r2,n1,n2,mode,rval,fl)
 #define XFS_SEND_MOUNT(mp,right,path,name) \
 	(*(mp)->m_dm_ops->xfs_send_mount)(mp,right,path,name)
-#define XFS_SEND_UNMOUNT(mp, vp,right,mode,rval,fl) \
-	(*(mp)->m_dm_ops->xfs_send_unmount)(mp,vp,right,mode,rval,fl)
+#define XFS_SEND_UNMOUNT(mp, ip,right,mode,rval,fl) \
+	(*(mp)->m_dm_ops->xfs_send_unmount)(mp,ip,right,mode,rval,fl)
 
 
 /*
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 7eb157a..c4d0bac 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -36,7 +36,6 @@
 #include "xfs_bmap.h"
 #include "xfs_error.h"
 #include "xfs_quota.h"
-#include "xfs_refcache.h"
 #include "xfs_utils.h"
 #include "xfs_trans_space.h"
 #include "xfs_vnodeops.h"
@@ -94,7 +93,8 @@ xfs_lock_for_rename(
 	xfs_inode_t	**i_tab,/* array of inode returned, sorted */
 	int		*num_inodes)  /* number of inodes in array */
 {
-	xfs_inode_t		*ip1, *ip2, *temp;
+	xfs_inode_t		*ip1 = VNAME_TO_INODE(vname1);
+	xfs_inode_t		*ip2, *temp;
 	xfs_ino_t		inum1, inum2;
 	int			error;
 	int			i, j;
@@ -110,16 +110,11 @@ xfs_lock_for_rename(
 	 * to see if we still have the right inodes, directories, etc.
 	 */
 	lock_mode = xfs_ilock_map_shared(dp1);
-	error = xfs_get_dir_entry(vname1, &ip1);
-	if (error) {
-		xfs_iunlock_map_shared(dp1, lock_mode);
-		return error;
-	}
+	IHOLD(ip1);
+	xfs_itrace_ref(ip1);
 
 	inum1 = ip1->i_ino;
 
-	ASSERT(ip1);
-	xfs_itrace_ref(ip1);
 
 	/*
 	 * Unlock dp1 and lock dp2 if they are different.
@@ -224,12 +219,11 @@ int
 xfs_rename(
 	xfs_inode_t	*src_dp,
 	bhv_vname_t	*src_vname,
-	bhv_vnode_t	*target_dir_vp,
+	xfs_inode_t	*target_dp,
 	bhv_vname_t	*target_vname)
 {
-	bhv_vnode_t	*src_dir_vp = XFS_ITOV(src_dp);
 	xfs_trans_t	*tp;
-	xfs_inode_t	*target_dp, *src_ip, *target_ip;
+	xfs_inode_t	*src_ip, *target_ip;
 	xfs_mount_t	*mp = src_dp->i_mount;
 	int		new_parent;		/* moving to a new dir */
 	int		src_is_directory;	/* src_name is a directory */
@@ -249,22 +243,13 @@ xfs_rename(
 	int		target_namelen = VNAMELEN(target_vname);
 
 	xfs_itrace_entry(src_dp);
-	xfs_itrace_entry(xfs_vtoi(target_dir_vp));
-
-	/*
-	 * Find the XFS behavior descriptor for the target directory
-	 * vnode since it was not handed to us.
-	 */
-	target_dp = xfs_vtoi(target_dir_vp);
-	if (target_dp == NULL) {
-		return XFS_ERROR(EXDEV);
-	}
+	xfs_itrace_entry(target_dp);
 
 	if (DM_EVENT_ENABLED(src_dp, DM_EVENT_RENAME) ||
 	    DM_EVENT_ENABLED(target_dp, DM_EVENT_RENAME)) {
 		error = XFS_SEND_NAMESP(mp, DM_EVENT_RENAME,
-					src_dir_vp, DM_RIGHT_NULL,
-					target_dir_vp, DM_RIGHT_NULL,
+					src_dp, DM_RIGHT_NULL,
+					target_dp, DM_RIGHT_NULL,
 					src_name, target_name,
 					0, 0, 0);
 		if (error) {
@@ -365,10 +350,10 @@ xfs_rename(
 	 * them when they unlock the inodes.  Also, we need to be careful
 	 * not to add an inode to the transaction more than once.
 	 */
-	VN_HOLD(src_dir_vp);
+	IHOLD(src_dp);
 	xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
 	if (new_parent) {
-		VN_HOLD(target_dir_vp);
+		IHOLD(target_dp);
 		xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
 	}
 	if ((src_ip != src_dp) && (src_ip != target_dp)) {
@@ -580,10 +565,8 @@ xfs_rename(
 	 * the vnode references.
 	 */
 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-	if (target_ip != NULL) {
-		xfs_refcache_purge_ip(target_ip);
+	if (target_ip != NULL)
 		IRELE(target_ip);
-	}
 	/*
 	 * Let interposed file systems know about removed links.
 	 */
@@ -598,8 +581,8 @@ std_return:
 	if (DM_EVENT_ENABLED(src_dp, DM_EVENT_POSTRENAME) ||
 	    DM_EVENT_ENABLED(target_dp, DM_EVENT_POSTRENAME)) {
 		(void) XFS_SEND_NAMESP (mp, DM_EVENT_POSTRENAME,
-					src_dir_vp, DM_RIGHT_NULL,
-					target_dir_vp, DM_RIGHT_NULL,
+					src_dp, DM_RIGHT_NULL,
+					target_dp, DM_RIGHT_NULL,
 					src_name, target_name,
 					0, error, 0);
 	}
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index 94660b1..b1a83f8 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -89,6 +89,7 @@ struct xfs_mount;
 
 /*
  * Superblock - in core version.  Must match the ondisk version below.
+ * Must be padded to 64 bit alignment.
  */
 typedef struct xfs_sb {
 	__uint32_t	sb_magicnum;	/* magic number == XFS_SB_MAGIC */
@@ -145,10 +146,21 @@ typedef struct xfs_sb {
 	__uint16_t	sb_logsectsize;	/* sector size for the log, bytes */
 	__uint32_t	sb_logsunit;	/* stripe unit size for the log */
 	__uint32_t	sb_features2;	/* additional feature bits */
+
+	/*
+	 * bad features2 field as a result of failing to pad the sb
+	 * structure to 64 bits. Some machines will be using this field
+	 * for features2 bits. Easiest just to mark it bad and not use
+	 * it for anything else.
+	 */
+	__uint32_t	sb_bad_features2;
+
+	/* must be padded to 64 bit alignment */
 } xfs_sb_t;
 
 /*
- * Superblock - on disk version.  Must match the in core version below.
+ * Superblock - on disk version.  Must match the in core version above.
+ * Must be padded to 64 bit alignment.
  */
 typedef struct xfs_dsb {
 	__be32		sb_magicnum;	/* magic number == XFS_SB_MAGIC */
@@ -205,6 +217,15 @@ typedef struct xfs_dsb {
 	__be16		sb_logsectsize;	/* sector size for the log, bytes */
 	__be32		sb_logsunit;	/* stripe unit size for the log */
 	__be32		sb_features2;	/* additional feature bits */
+	/*
+	 * bad features2 field as a result of failing to pad the sb
+	 * structure to 64 bits. Some machines will be using this field
+	 * for features2 bits. Easiest just to mark it bad and not use
+	 * it for anything else.
+	 */
+	__be32	sb_bad_features2;
+
+	/* must be padded to 64 bit alignment */
 } xfs_dsb_t;
 
 /*
@@ -223,7 +244,7 @@ typedef enum {
 	XFS_SBS_GQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN,
 	XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG,
 	XFS_SBS_LOGSECTLOG, XFS_SBS_LOGSECTSIZE, XFS_SBS_LOGSUNIT,
-	XFS_SBS_FEATURES2,
+	XFS_SBS_FEATURES2, XFS_SBS_BAD_FEATURES2,
 	XFS_SBS_FIELDCOUNT
 } xfs_sb_field_t;
 
@@ -248,13 +269,15 @@ typedef enum {
 #define XFS_SB_IFREE		XFS_SB_MVAL(IFREE)
 #define XFS_SB_FDBLOCKS		XFS_SB_MVAL(FDBLOCKS)
 #define XFS_SB_FEATURES2	XFS_SB_MVAL(FEATURES2)
+#define XFS_SB_BAD_FEATURES2	XFS_SB_MVAL(BAD_FEATURES2)
 #define	XFS_SB_NUM_BITS		((int)XFS_SBS_FIELDCOUNT)
 #define	XFS_SB_ALL_BITS		((1LL << XFS_SB_NUM_BITS) - 1)
 #define	XFS_SB_MOD_BITS		\
 	(XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \
 	 XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \
 	 XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \
-	 XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2)
+	 XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \
+	 XFS_SB_BAD_FEATURES2)
 
 
 /*
@@ -271,7 +294,6 @@ typedef enum {
 
 #define	XFS_SB_VERSION_NUM(sbp)	((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS)
 
-#define	XFS_SB_GOOD_VERSION(sbp)	xfs_sb_good_version(sbp)
 #ifdef __KERNEL__
 static inline int xfs_sb_good_version(xfs_sb_t *sbp)
 {
@@ -297,7 +319,14 @@ static inline int xfs_sb_good_version(xfs_sb_t *sbp)
 }
 #endif /* __KERNEL__ */
 
-#define	XFS_SB_VERSION_TONEW(v)	xfs_sb_version_tonew(v)
+/*
+ * Detect a bad features2 field
+ */
+static inline int xfs_sb_has_bad_features2(xfs_sb_t *sbp)
+{
+	return (sbp->sb_bad_features2 != 0);
+}
+
 static inline unsigned xfs_sb_version_tonew(unsigned v)
 {
 	return ((((v) == XFS_SB_VERSION_1) ? \
@@ -308,7 +337,6 @@ static inline unsigned xfs_sb_version_tonew(unsigned v)
 		XFS_SB_VERSION_4);
 }
 
-#define	XFS_SB_VERSION_TOOLD(v)	xfs_sb_version_toold(v)
 static inline unsigned xfs_sb_version_toold(unsigned v)
 {
 	return (((v) & (XFS_SB_VERSION_QUOTABIT | XFS_SB_VERSION_ALIGNBIT)) ? \
@@ -320,7 +348,6 @@ static inline unsigned xfs_sb_version_toold(unsigned v)
 				XFS_SB_VERSION_1)));
 }
 
-#define	XFS_SB_VERSION_HASATTR(sbp)	xfs_sb_version_hasattr(sbp)
 static inline int xfs_sb_version_hasattr(xfs_sb_t *sbp)
 {
 	return ((sbp)->sb_versionnum == XFS_SB_VERSION_2) || \
@@ -329,7 +356,6 @@ static inline int xfs_sb_version_hasattr(xfs_sb_t *sbp)
 		  ((sbp)->sb_versionnum & XFS_SB_VERSION_ATTRBIT));
 }
 
-#define	XFS_SB_VERSION_ADDATTR(sbp)	xfs_sb_version_addattr(sbp)
 static inline void xfs_sb_version_addattr(xfs_sb_t *sbp)
 {
 	(sbp)->sb_versionnum = (((sbp)->sb_versionnum == XFS_SB_VERSION_1) ? \
@@ -339,7 +365,6 @@ static inline void xfs_sb_version_addattr(xfs_sb_t *sbp)
 			(XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT)));
 }
 
-#define	XFS_SB_VERSION_HASNLINK(sbp)	xfs_sb_version_hasnlink(sbp)
 static inline int xfs_sb_version_hasnlink(xfs_sb_t *sbp)
 {
 	return ((sbp)->sb_versionnum == XFS_SB_VERSION_3) || \
@@ -347,7 +372,6 @@ static inline int xfs_sb_version_hasnlink(xfs_sb_t *sbp)
 		  ((sbp)->sb_versionnum & XFS_SB_VERSION_NLINKBIT));
 }
 
-#define	XFS_SB_VERSION_ADDNLINK(sbp)	xfs_sb_version_addnlink(sbp)
 static inline void xfs_sb_version_addnlink(xfs_sb_t *sbp)
 {
 	(sbp)->sb_versionnum = ((sbp)->sb_versionnum <= XFS_SB_VERSION_2 ? \
@@ -355,115 +379,63 @@ static inline void xfs_sb_version_addnlink(xfs_sb_t *sbp)
 		((sbp)->sb_versionnum | XFS_SB_VERSION_NLINKBIT));
 }
 
-#define	XFS_SB_VERSION_HASQUOTA(sbp)	xfs_sb_version_hasquota(sbp)
 static inline int xfs_sb_version_hasquota(xfs_sb_t *sbp)
 {
 	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
 		((sbp)->sb_versionnum & XFS_SB_VERSION_QUOTABIT);
 }
 
-#define	XFS_SB_VERSION_ADDQUOTA(sbp)	xfs_sb_version_addquota(sbp)
 static inline void xfs_sb_version_addquota(xfs_sb_t *sbp)
 {
 	(sbp)->sb_versionnum = \
 		 (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 ? \
 			((sbp)->sb_versionnum | XFS_SB_VERSION_QUOTABIT) : \
-			(XFS_SB_VERSION_TONEW((sbp)->sb_versionnum) | \
+			(xfs_sb_version_tonew((sbp)->sb_versionnum) | \
 			 XFS_SB_VERSION_QUOTABIT));
 }
 
-#define	XFS_SB_VERSION_HASALIGN(sbp)	xfs_sb_version_hasalign(sbp)
 static inline int xfs_sb_version_hasalign(xfs_sb_t *sbp)
 {
 	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
 		((sbp)->sb_versionnum & XFS_SB_VERSION_ALIGNBIT);
 }
 
-#define	XFS_SB_VERSION_SUBALIGN(sbp)	xfs_sb_version_subalign(sbp)
-static inline void xfs_sb_version_subalign(xfs_sb_t *sbp)
-{
-	(sbp)->sb_versionnum = \
-	 XFS_SB_VERSION_TOOLD((sbp)->sb_versionnum & ~XFS_SB_VERSION_ALIGNBIT);
-}
-
-#define XFS_SB_VERSION_HASDALIGN(sbp)	xfs_sb_version_hasdalign(sbp)
 static inline int xfs_sb_version_hasdalign(xfs_sb_t *sbp)
 {
 	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
 		((sbp)->sb_versionnum & XFS_SB_VERSION_DALIGNBIT);
 }
 
-#define XFS_SB_VERSION_ADDDALIGN(sbp)	xfs_sb_version_adddalign(sbp)
-static inline int xfs_sb_version_adddalign(xfs_sb_t *sbp)
-{
-	return (sbp)->sb_versionnum = \
-		((sbp)->sb_versionnum | XFS_SB_VERSION_DALIGNBIT);
-}
-
-#define XFS_SB_VERSION_HASSHARED(sbp)	xfs_sb_version_hasshared(sbp)
 static inline int xfs_sb_version_hasshared(xfs_sb_t *sbp)
 {
 	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
 		((sbp)->sb_versionnum & XFS_SB_VERSION_SHAREDBIT);
 }
 
-#define XFS_SB_VERSION_ADDSHARED(sbp)	xfs_sb_version_addshared(sbp)
-static inline int xfs_sb_version_addshared(xfs_sb_t *sbp)
-{
-	return (sbp)->sb_versionnum = \
-		((sbp)->sb_versionnum | XFS_SB_VERSION_SHAREDBIT);
-}
-
-#define XFS_SB_VERSION_SUBSHARED(sbp)	xfs_sb_version_subshared(sbp)
-static inline int xfs_sb_version_subshared(xfs_sb_t *sbp)
-{
-	return (sbp)->sb_versionnum = \
-		((sbp)->sb_versionnum & ~XFS_SB_VERSION_SHAREDBIT);
-}
-
-#define XFS_SB_VERSION_HASDIRV2(sbp)	xfs_sb_version_hasdirv2(sbp)
 static inline int xfs_sb_version_hasdirv2(xfs_sb_t *sbp)
 {
 	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
 		((sbp)->sb_versionnum & XFS_SB_VERSION_DIRV2BIT);
 }
 
-#define XFS_SB_VERSION_HASLOGV2(sbp)   xfs_sb_version_haslogv2(sbp)
 static inline int xfs_sb_version_haslogv2(xfs_sb_t *sbp)
 {
 	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
 		((sbp)->sb_versionnum & XFS_SB_VERSION_LOGV2BIT);
 }
 
-#define XFS_SB_VERSION_HASEXTFLGBIT(sbp)	xfs_sb_version_hasextflgbit(sbp)
 static inline int xfs_sb_version_hasextflgbit(xfs_sb_t *sbp)
 {
 	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
 		((sbp)->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT);
 }
 
-#define XFS_SB_VERSION_ADDEXTFLGBIT(sbp)	xfs_sb_version_addextflgbit(sbp)
-static inline int xfs_sb_version_addextflgbit(xfs_sb_t *sbp)
-{
-	return (sbp)->sb_versionnum = \
-		((sbp)->sb_versionnum | XFS_SB_VERSION_EXTFLGBIT);
-}
-
-#define XFS_SB_VERSION_SUBEXTFLGBIT(sbp)	xfs_sb_version_subextflgbit(sbp)
-static inline int xfs_sb_version_subextflgbit(xfs_sb_t *sbp)
-{
-	return (sbp)->sb_versionnum = \
-		((sbp)->sb_versionnum & ~XFS_SB_VERSION_EXTFLGBIT);
-}
-
-#define XFS_SB_VERSION_HASSECTOR(sbp)   xfs_sb_version_hassector(sbp)
 static inline int xfs_sb_version_hassector(xfs_sb_t *sbp)
 {
 	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
 		((sbp)->sb_versionnum & XFS_SB_VERSION_SECTORBIT);
 }
 
-#define XFS_SB_VERSION_HASMOREBITS(sbp)	xfs_sb_version_hasmorebits(sbp)
 static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
 {
 	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
@@ -476,24 +448,22 @@ static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
  * For example, for a bit defined as XFS_SB_VERSION2_FUNBIT, has a macro:
  *
  * SB_VERSION_HASFUNBIT(xfs_sb_t *sbp)
- *	((XFS_SB_VERSION_HASMOREBITS(sbp) &&
+ *	((xfs_sb_version_hasmorebits(sbp) &&
  *	 ((sbp)->sb_features2 & XFS_SB_VERSION2_FUNBIT)
  */
 
 static inline int xfs_sb_version_haslazysbcount(xfs_sb_t *sbp)
 {
-	return (XFS_SB_VERSION_HASMOREBITS(sbp) &&	\
+	return (xfs_sb_version_hasmorebits(sbp) &&	\
 		((sbp)->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT));
 }
 
-#define XFS_SB_VERSION_HASATTR2(sbp)	xfs_sb_version_hasattr2(sbp)
 static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp)
 {
-	return (XFS_SB_VERSION_HASMOREBITS(sbp)) &&	\
+	return (xfs_sb_version_hasmorebits(sbp)) &&	\
 		((sbp)->sb_features2 & XFS_SB_VERSION2_ATTR2BIT);
 }
 
-#define XFS_SB_VERSION_ADDATTR2(sbp)	xfs_sb_version_addattr2(sbp)
 static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp)
 {
 	((sbp)->sb_versionnum =	\
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 7f40628..b5effce 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -341,7 +341,6 @@ typedef struct xfs_trans {
 	unsigned int		t_rtx_res;	/* # of rt extents resvd */
 	unsigned int		t_rtx_res_used;	/* # of resvd rt extents used */
 	xfs_log_ticket_t	t_ticket;	/* log mgr ticket */
-	sema_t			t_sema;		/* sema for commit completion */
 	xfs_lsn_t		t_lsn;		/* log seq num of start of
 						 * transaction. */
 	xfs_lsn_t		t_commit_lsn;	/* log seq num of end of
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 60b6b89..4e5c010 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -304,7 +304,8 @@ xfs_trans_read_buf(
 	if (tp == NULL) {
 		bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY);
 		if (!bp)
-			return XFS_ERROR(ENOMEM);
+			return (flags & XFS_BUF_TRYLOCK) ?
+					EAGAIN : XFS_ERROR(ENOMEM);
 
 		if ((bp != NULL) && (XFS_BUF_GETERROR(bp) != 0)) {
 			xfs_ioerror_alert("xfs_trans_read_buf", mp,
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 45d740d..47c45ff 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -40,28 +40,6 @@
 #include "xfs_itable.h"
 #include "xfs_utils.h"
 
-/*
- * xfs_get_dir_entry is used to get a reference to an inode given
- * its parent directory inode and the name of the file.	 It does
- * not lock the child inode, and it unlocks the directory before
- * returning.  The directory's generation number is returned for
- * use by a later call to xfs_lock_dir_and_entry.
- */
-int
-xfs_get_dir_entry(
-	bhv_vname_t	*dentry,
-	xfs_inode_t	**ipp)
-{
-	bhv_vnode_t	*vp;
-
-	vp = VNAME_TO_VNODE(dentry);
-
-	*ipp = xfs_vtoi(vp);
-	if (!*ipp)
-		return XFS_ERROR(ENOENT);
-	VN_HOLD(vp);
-	return 0;
-}
 
 int
 xfs_dir_lookup_int(
@@ -339,10 +317,10 @@ xfs_bump_ino_vers2(
 	ip->i_d.di_onlink = 0;
 	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
 	mp = tp->t_mountp;
-	if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) {
+	if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
 		spin_lock(&mp->m_sb_lock);
-		if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) {
-			XFS_SB_VERSION_ADDNLINK(&mp->m_sb);
+		if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
+			xfs_sb_version_addnlink(&mp->m_sb);
 			spin_unlock(&mp->m_sb_lock);
 			xfs_mod_sb(tp, XFS_SB_VERSIONNUM);
 		} else {
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
index f857fcc..c4c4a6a 100644
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -21,7 +21,6 @@
 #define IRELE(ip)	VN_RELE(XFS_ITOV(ip))
 #define IHOLD(ip)	VN_HOLD(XFS_ITOV(ip))
 
-extern int xfs_get_dir_entry (bhv_vname_t *, xfs_inode_t **);
 extern int xfs_dir_lookup_int (xfs_inode_t *, uint, bhv_vname_t *, xfs_ino_t *,
 				xfs_inode_t **);
 extern int xfs_truncate_file (xfs_mount_t *, xfs_inode_t *);
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 7321304..4c132a8 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -43,7 +43,6 @@
 #include "xfs_error.h"
 #include "xfs_bmap.h"
 #include "xfs_rw.h"
-#include "xfs_refcache.h"
 #include "xfs_buf_item.h"
 #include "xfs_log_priv.h"
 #include "xfs_dir2_trace.h"
@@ -113,9 +112,6 @@ xfs_init(void)
 	xfs_ili_zone =
 		kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
 					KM_ZONE_SPREAD, NULL);
-	xfs_icluster_zone =
-		kmem_zone_init_flags(sizeof(xfs_icluster_t), "xfs_icluster",
-					KM_ZONE_SPREAD, NULL);
 
 	/*
 	 * Allocate global trace buffers.
@@ -153,11 +149,9 @@ xfs_cleanup(void)
 	extern kmem_zone_t	*xfs_inode_zone;
 	extern kmem_zone_t	*xfs_efd_zone;
 	extern kmem_zone_t	*xfs_efi_zone;
-	extern kmem_zone_t	*xfs_icluster_zone;
 
 	xfs_cleanup_procfs();
 	xfs_sysctl_unregister();
-	xfs_refcache_destroy();
 	xfs_filestream_uninit();
 	xfs_mru_cache_uninit();
 	xfs_acl_zone_destroy(xfs_acl_zone);
@@ -189,7 +183,6 @@ xfs_cleanup(void)
 	kmem_zone_destroy(xfs_efi_zone);
 	kmem_zone_destroy(xfs_ifork_zone);
 	kmem_zone_destroy(xfs_ili_zone);
-	kmem_zone_destroy(xfs_icluster_zone);
 }
 
 /*
@@ -330,7 +323,7 @@ xfs_finish_flags(
 	int			ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
 
 	/* Fail a mount where the logbuf is smaller then the log stripe */
-	if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) {
+	if (xfs_sb_version_haslogv2(&mp->m_sb)) {
 		if ((ap->logbufsize <= 0) &&
 		    (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
 			mp->m_logbsize = mp->m_sb.sb_logsunit;
@@ -349,9 +342,8 @@ xfs_finish_flags(
 		}
 	}
 
-	if (XFS_SB_VERSION_HASATTR2(&mp->m_sb)) {
+	if (xfs_sb_version_hasattr2(&mp->m_sb))
 		mp->m_flags |= XFS_MOUNT_ATTR2;
-	}
 
 	/*
 	 * prohibit r/w mounts of read-only filesystems
@@ -366,7 +358,7 @@ xfs_finish_flags(
 	 * check for shared mount.
 	 */
 	if (ap->flags & XFSMNT_SHARED) {
-		if (!XFS_SB_VERSION_HASSHARED(&mp->m_sb))
+		if (!xfs_sb_version_hasshared(&mp->m_sb))
 			return XFS_ERROR(EINVAL);
 
 		/*
@@ -512,7 +504,7 @@ xfs_mount(
 	if (!error && logdev && logdev != ddev) {
 		unsigned int	log_sector_size = BBSIZE;
 
-		if (XFS_SB_VERSION_HASSECTOR(&mp->m_sb))
+		if (xfs_sb_version_hassector(&mp->m_sb))
 			log_sector_size = mp->m_sb.sb_logsectsize;
 		error = xfs_setsize_buftarg(mp->m_logdev_targp,
 					    mp->m_sb.sb_blocksize,
@@ -574,7 +566,7 @@ xfs_unmount(
 #ifdef HAVE_DMAPI
 	if (mp->m_flags & XFS_MOUNT_DMAPI) {
 		error = XFS_SEND_PREUNMOUNT(mp,
-				rvp, DM_RIGHT_NULL, rvp, DM_RIGHT_NULL,
+				rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL,
 				NULL, NULL, 0, 0,
 				(mp->m_dmevmask & (1<<DM_EVENT_PREUNMOUNT))?
 					0:DM_FLAGS_UNWANTED);
@@ -585,11 +577,6 @@ xfs_unmount(
 					0 : DM_FLAGS_UNWANTED;
 	}
 #endif
-	/*
-	 * First blow any referenced inode from this file system
-	 * out of the reference cache, and delete the timer.
-	 */
-	xfs_refcache_purge_mp(mp);
 
 	/*
 	 * Blow away any referenced inode in the filestreams cache.
@@ -630,7 +617,7 @@ out:
 		/* Note: mp structure must still exist for
 		 * XFS_SEND_UNMOUNT() call.
 		 */
-		XFS_SEND_UNMOUNT(mp, error == 0 ? rvp : NULL,
+		XFS_SEND_UNMOUNT(mp, error == 0 ? rip : NULL,
 			DM_RIGHT_NULL, 0, error, unmount_event_flags);
 	}
 	if (xfs_unmountfs_needed) {
@@ -653,7 +640,6 @@ xfs_quiesce_fs(
 {
 	int			count = 0, pincount;
 
-	xfs_refcache_purge_mp(mp);
 	xfs_flush_buftarg(mp->m_ddev_targp, 0);
 	xfs_finish_reclaim_all(mp, 0);
 
@@ -1324,18 +1310,6 @@ xfs_syncsub(
 	}
 
 	/*
-	 * If this is the periodic sync, then kick some entries out of
-	 * the reference cache.  This ensures that idle entries are
-	 * eventually kicked out of the cache.
-	 */
-	if (flags & SYNC_REFCACHE) {
-		if (flags & SYNC_WAIT)
-			xfs_refcache_purge_mp(mp);
-		else
-			xfs_refcache_purge_some(mp);
-	}
-
-	/*
 	 * If asked, update the disk superblock with incore counter values if we
 	 * are using non-persistent counters so that they don't get too far out
 	 * of sync if we crash or get a forced shutdown. We don't want to force
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 5130524..3418c94 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -48,7 +48,6 @@
 #include "xfs_quota.h"
 #include "xfs_utils.h"
 #include "xfs_rtalloc.h"
-#include "xfs_refcache.h"
 #include "xfs_trans_space.h"
 #include "xfs_log_priv.h"
 #include "xfs_filestream.h"
@@ -327,7 +326,7 @@ xfs_setattr(
 		if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) &&
 		    !(flags & ATTR_DMI)) {
 			int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR;
-			code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, vp,
+			code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, ip,
 				vap->va_size, 0, dmflags, NULL);
 			if (code) {
 				lock_flags = 0;
@@ -634,6 +633,15 @@ xfs_setattr(
 	 * Truncate file.  Must have write permission and not be a directory.
 	 */
 	if (mask & XFS_AT_SIZE) {
+		/*
+		 * Only change the c/mtime if we are changing the size
+		 * or we are explicitly asked to change it. This handles
+		 * the semantic difference between truncate() and ftruncate()
+		 * as implemented in the VFS.
+		 */
+		if (vap->va_size != ip->i_size || (mask & XFS_AT_CTIME))
+			timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
+
 		if (vap->va_size > ip->i_size) {
 			xfs_igrow_finish(tp, ip, vap->va_size,
 			    !(flags & ATTR_DMI));
@@ -662,10 +670,6 @@ xfs_setattr(
 			 */
 			xfs_iflags_set(ip, XFS_ITRUNCATED);
 		}
-		/*
-		 * Have to do this even if the file's size doesn't change.
-		 */
-		timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
 	}
 
 	/*
@@ -877,7 +881,7 @@ xfs_setattr(
 
 	if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) &&
 	    !(flags & ATTR_DMI)) {
-		(void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, vp, DM_RIGHT_NULL,
+		(void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL,
 					NULL, DM_RIGHT_NULL, NULL, NULL,
 					0, 0, AT_DELAY_FLAG(flags));
 	}
@@ -1520,12 +1524,6 @@ xfs_release(
 			xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE);
 	}
 
-#ifdef HAVE_REFCACHE
-	/* If we are in the NFS reference cache then don't do this now */
-	if (ip->i_refcache)
-		return 0;
-#endif
-
 	if (ip->i_d.di_nlink != 0) {
 		if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
 		     ((ip->i_size > 0) || (VN_CACHED(vp) > 0 ||
@@ -1588,9 +1586,8 @@ xfs_inactive(
 
 	mp = ip->i_mount;
 
-	if (ip->i_d.di_nlink == 0 && DM_EVENT_ENABLED(ip, DM_EVENT_DESTROY)) {
-		(void) XFS_SEND_DESTROY(mp, vp, DM_RIGHT_NULL);
-	}
+	if (ip->i_d.di_nlink == 0 && DM_EVENT_ENABLED(ip, DM_EVENT_DESTROY))
+		XFS_SEND_DESTROY(mp, ip, DM_RIGHT_NULL);
 
 	error = 0;
 
@@ -1766,7 +1763,7 @@ int
 xfs_lookup(
 	xfs_inode_t		*dp,
 	bhv_vname_t		*dentry,
-	bhv_vnode_t		**vpp)
+	xfs_inode_t		**ipp)
 {
 	xfs_inode_t		*ip;
 	xfs_ino_t		e_inum;
@@ -1781,7 +1778,7 @@ xfs_lookup(
 	lock_mode = xfs_ilock_map_shared(dp);
 	error = xfs_dir_lookup_int(dp, lock_mode, dentry, &e_inum, &ip);
 	if (!error) {
-		*vpp = XFS_ITOV(ip);
+		*ipp = ip;
 		xfs_itrace_ref(ip);
 	}
 	xfs_iunlock_map_shared(dp, lock_mode);
@@ -1794,14 +1791,12 @@ xfs_create(
 	bhv_vname_t		*dentry,
 	mode_t			mode,
 	xfs_dev_t		rdev,
-	bhv_vnode_t		**vpp,
+	xfs_inode_t		**ipp,
 	cred_t			*credp)
 {
 	char			*name = VNAME(dentry);
 	xfs_mount_t	        *mp = dp->i_mount;
-	bhv_vnode_t		*dir_vp = XFS_ITOV(dp);
 	xfs_inode_t		*ip;
-	bhv_vnode_t	        *vp = NULL;
 	xfs_trans_t		*tp;
 	int                     error;
 	xfs_bmap_free_t		free_list;
@@ -1815,14 +1810,14 @@ xfs_create(
 	uint			resblks;
 	int			namelen;
 
-	ASSERT(!*vpp);
+	ASSERT(!*ipp);
 	xfs_itrace_entry(dp);
 
 	namelen = VNAMELEN(dentry);
 
 	if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) {
 		error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
-				dir_vp, DM_RIGHT_NULL, NULL,
+				dp, DM_RIGHT_NULL, NULL,
 				DM_RIGHT_NULL, name, NULL,
 				mode, 0, 0);
 
@@ -1914,7 +1909,7 @@ xfs_create(
 	 * the transaction cancel unlocking dp so don't do it explicitly in the
 	 * error path.
 	 */
-	VN_HOLD(dir_vp);
+	IHOLD(dp);
 	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
 	unlock_dp_on_error = B_FALSE;
 
@@ -1952,7 +1947,6 @@ xfs_create(
 	 * vnode to the caller, we bump the vnode ref count now.
 	 */
 	IHOLD(ip);
-	vp = XFS_ITOV(ip);
 
 	error = xfs_bmap_finish(&tp, &free_list, &committed);
 	if (error) {
@@ -1970,16 +1964,16 @@ xfs_create(
 	XFS_QM_DQRELE(mp, udqp);
 	XFS_QM_DQRELE(mp, gdqp);
 
-	*vpp = vp;
+	*ipp = ip;
 
 	/* Fallthrough to std_return with error = 0  */
 
 std_return:
-	if ((*vpp || (error != 0 && dm_event_sent != 0)) &&
+	if ((*ipp || (error != 0 && dm_event_sent != 0)) &&
 	    DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) {
 		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE,
-			dir_vp, DM_RIGHT_NULL,
-			*vpp ? vp:NULL,
+			dp, DM_RIGHT_NULL,
+			*ipp ? ip : NULL,
 			DM_RIGHT_NULL, name, NULL,
 			mode, error, 0);
 	}
@@ -2274,44 +2268,32 @@ xfs_remove(
 	xfs_inode_t             *dp,
 	bhv_vname_t		*dentry)
 {
-	bhv_vnode_t		*dir_vp = XFS_ITOV(dp);
 	char			*name = VNAME(dentry);
 	xfs_mount_t		*mp = dp->i_mount;
-	xfs_inode_t             *ip;
+	xfs_inode_t             *ip = VNAME_TO_INODE(dentry);
+	int			namelen = VNAMELEN(dentry);
 	xfs_trans_t             *tp = NULL;
 	int                     error = 0;
 	xfs_bmap_free_t         free_list;
 	xfs_fsblock_t           first_block;
 	int			cancel_flags;
 	int			committed;
-	int			dm_di_mode = 0;
 	int			link_zero;
 	uint			resblks;
-	int			namelen;
 
 	xfs_itrace_entry(dp);
 
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
 
-	namelen = VNAMELEN(dentry);
-
-	if (!xfs_get_dir_entry(dentry, &ip)) {
-	        dm_di_mode = ip->i_d.di_mode;
-		IRELE(ip);
-	}
-
 	if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) {
-		error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dir_vp,
+		error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dp,
 					DM_RIGHT_NULL, NULL, DM_RIGHT_NULL,
-					name, NULL, dm_di_mode, 0, 0);
+					name, NULL, ip->i_d.di_mode, 0, 0);
 		if (error)
 			return error;
 	}
 
-	/* From this point on, return through std_return */
-	ip = NULL;
-
 	/*
 	 * We need to get a reference to ip before we get our log
 	 * reservation. The reason for this is that we cannot call
@@ -2324,13 +2306,7 @@ xfs_remove(
 	 * when we call xfs_iget.  Instead we get an unlocked reference
 	 * to the inode before getting our log reservation.
 	 */
-	error = xfs_get_dir_entry(dentry, &ip);
-	if (error) {
-		REMOVE_DEBUG_TRACE(__LINE__);
-		goto std_return;
-	}
-
-	dm_di_mode = ip->i_d.di_mode;
+	IHOLD(ip);
 
 	xfs_itrace_entry(ip);
 	xfs_itrace_ref(ip);
@@ -2449,14 +2425,6 @@ xfs_remove(
 	}
 
 	/*
-	 * Before we drop our extra reference to the inode, purge it
-	 * from the refcache if it is there.  By waiting until afterwards
-	 * to do the IRELE, we ensure that we won't go inactive in the
-	 * xfs_refcache_purge_ip routine (although that would be OK).
-	 */
-	xfs_refcache_purge_ip(ip);
-
-	/*
 	 * If we are using filestreams, kill the stream association.
 	 * If the file is still open it may get a new one but that
 	 * will get killed on last close in xfs_close() so we don't
@@ -2472,9 +2440,9 @@ xfs_remove(
  std_return:
 	if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
 		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
-				dir_vp, DM_RIGHT_NULL,
+				dp, DM_RIGHT_NULL,
 				NULL, DM_RIGHT_NULL,
-				name, NULL, dm_di_mode, error, 0);
+				name, NULL, ip->i_d.di_mode, error, 0);
 	}
 	return error;
 
@@ -2495,14 +2463,6 @@ xfs_remove(
 	cancel_flags |= XFS_TRANS_ABORT;
 	xfs_trans_cancel(tp, cancel_flags);
 
-	/*
-	 * Before we drop our extra reference to the inode, purge it
-	 * from the refcache if it is there.  By waiting until afterwards
-	 * to do the IRELE, we ensure that we won't go inactive in the
-	 * xfs_refcache_purge_ip routine (although that would be OK).
-	 */
-	xfs_refcache_purge_ip(ip);
-
 	IRELE(ip);
 
 	goto std_return;
@@ -2511,12 +2471,10 @@ xfs_remove(
 int
 xfs_link(
 	xfs_inode_t		*tdp,
-	bhv_vnode_t		*src_vp,
+	xfs_inode_t		*sip,
 	bhv_vname_t		*dentry)
 {
-	bhv_vnode_t		*target_dir_vp = XFS_ITOV(tdp);
 	xfs_mount_t		*mp = tdp->i_mount;
-	xfs_inode_t		*sip = xfs_vtoi(src_vp);
 	xfs_trans_t		*tp;
 	xfs_inode_t		*ips[2];
 	int			error;
@@ -2529,18 +2487,18 @@ xfs_link(
 	int			target_namelen;
 
 	xfs_itrace_entry(tdp);
-	xfs_itrace_entry(xfs_vtoi(src_vp));
+	xfs_itrace_entry(sip);
 
 	target_namelen = VNAMELEN(dentry);
-	ASSERT(!VN_ISDIR(src_vp));
+	ASSERT(!S_ISDIR(sip->i_d.di_mode));
 
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
 
 	if (DM_EVENT_ENABLED(tdp, DM_EVENT_LINK)) {
 		error = XFS_SEND_NAMESP(mp, DM_EVENT_LINK,
-					target_dir_vp, DM_RIGHT_NULL,
-					src_vp, DM_RIGHT_NULL,
+					tdp, DM_RIGHT_NULL,
+					sip, DM_RIGHT_NULL,
 					target_name, NULL, 0, 0, 0);
 		if (error)
 			return error;
@@ -2584,8 +2542,8 @@ xfs_link(
 	 * xfs_trans_cancel will both unlock the inodes and
 	 * decrement the associated ref counts.
 	 */
-	VN_HOLD(src_vp);
-	VN_HOLD(target_dir_vp);
+	IHOLD(sip);
+	IHOLD(tdp);
 	xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
 	xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
 
@@ -2650,8 +2608,8 @@ xfs_link(
 std_return:
 	if (DM_EVENT_ENABLED(sip, DM_EVENT_POSTLINK)) {
 		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTLINK,
-				target_dir_vp, DM_RIGHT_NULL,
-				src_vp, DM_RIGHT_NULL,
+				tdp, DM_RIGHT_NULL,
+				sip, DM_RIGHT_NULL,
 				target_name, NULL, 0, error, 0);
 	}
 	return error;
@@ -2671,15 +2629,13 @@ xfs_mkdir(
 	xfs_inode_t             *dp,
 	bhv_vname_t		*dentry,
 	mode_t			mode,
-	bhv_vnode_t		**vpp,
+	xfs_inode_t		**ipp,
 	cred_t			*credp)
 {
-	bhv_vnode_t		*dir_vp = XFS_ITOV(dp);
 	char			*dir_name = VNAME(dentry);
 	int			dir_namelen = VNAMELEN(dentry);
 	xfs_mount_t		*mp = dp->i_mount;
 	xfs_inode_t		*cdp;	/* inode of created dir */
-	bhv_vnode_t		*cvp;	/* vnode of created dir */
 	xfs_trans_t		*tp;
 	int			cancel_flags;
 	int			error;
@@ -2700,7 +2656,7 @@ xfs_mkdir(
 
 	if (DM_EVENT_ENABLED(dp, DM_EVENT_CREATE)) {
 		error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
-					dir_vp, DM_RIGHT_NULL, NULL,
+					dp, DM_RIGHT_NULL, NULL,
 					DM_RIGHT_NULL, dir_name, NULL,
 					mode, 0, 0);
 		if (error)
@@ -2786,7 +2742,7 @@ xfs_mkdir(
 	 * from here on will result in the transaction cancel
 	 * unlocking dp so don't do it explicitly in the error path.
 	 */
-	VN_HOLD(dir_vp);
+	IHOLD(dp);
 	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
 	unlock_dp_on_error = B_FALSE;
 
@@ -2817,11 +2773,9 @@ xfs_mkdir(
 	if (error)
 		goto error2;
 
-	cvp = XFS_ITOV(cdp);
-
 	created = B_TRUE;
 
-	*vpp = cvp;
+	*ipp = cdp;
 	IHOLD(cdp);
 
 	/*
@@ -2858,8 +2812,8 @@ std_return:
 	if ((created || (error != 0 && dm_event_sent != 0)) &&
 	    DM_EVENT_ENABLED(dp, DM_EVENT_POSTCREATE)) {
 		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE,
-					dir_vp, DM_RIGHT_NULL,
-					created ? XFS_ITOV(cdp):NULL,
+					dp, DM_RIGHT_NULL,
+					created ? cdp : NULL,
 					DM_RIGHT_NULL,
 					dir_name, NULL,
 					mode, error, 0);
@@ -2891,14 +2845,13 @@ xfs_rmdir(
 	char			*name = VNAME(dentry);
 	int			namelen = VNAMELEN(dentry);
 	xfs_mount_t		*mp = dp->i_mount;
-  	xfs_inode_t             *cdp;   /* child directory */
+  	xfs_inode_t             *cdp = VNAME_TO_INODE(dentry);
 	xfs_trans_t             *tp;
 	int                     error;
 	xfs_bmap_free_t         free_list;
 	xfs_fsblock_t           first_block;
 	int			cancel_flags;
 	int			committed;
-	int			dm_di_mode = S_IFDIR;
 	int			last_cdp_link;
 	uint			resblks;
 
@@ -2907,24 +2860,15 @@ xfs_rmdir(
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return XFS_ERROR(EIO);
 
-	if (!xfs_get_dir_entry(dentry, &cdp)) {
-	        dm_di_mode = cdp->i_d.di_mode;
-		IRELE(cdp);
-	}
-
 	if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) {
 		error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE,
-					dir_vp, DM_RIGHT_NULL,
+					dp, DM_RIGHT_NULL,
 					NULL, DM_RIGHT_NULL,
-					name, NULL, dm_di_mode, 0, 0);
+					name, NULL, cdp->i_d.di_mode, 0, 0);
 		if (error)
 			return XFS_ERROR(error);
 	}
 
-	/* Return through std_return after this point. */
-
-	cdp = NULL;
-
 	/*
 	 * We need to get a reference to cdp before we get our log
 	 * reservation.  The reason for this is that we cannot call
@@ -2937,13 +2881,7 @@ xfs_rmdir(
 	 * when we call xfs_iget.  Instead we get an unlocked reference
 	 * to the inode before getting our log reservation.
 	 */
-	error = xfs_get_dir_entry(dentry, &cdp);
-	if (error) {
-		REMOVE_DEBUG_TRACE(__LINE__);
-		goto std_return;
-	}
-	mp = dp->i_mount;
-	dm_di_mode = cdp->i_d.di_mode;
+	IHOLD(cdp);
 
 	/*
 	 * Get the dquots for the inodes.
@@ -3098,9 +3036,9 @@ xfs_rmdir(
  std_return:
 	if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
 		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
-					dir_vp, DM_RIGHT_NULL,
+					dp, DM_RIGHT_NULL,
 					NULL, DM_RIGHT_NULL,
-					name, NULL, dm_di_mode,
+					name, NULL, cdp->i_d.di_mode,
 					error, 0);
 	}
 	return error;
@@ -3121,10 +3059,9 @@ xfs_symlink(
 	bhv_vname_t		*dentry,
 	char			*target_path,
 	mode_t			mode,
-	bhv_vnode_t		**vpp,
+	xfs_inode_t		**ipp,
 	cred_t			*credp)
 {
-	bhv_vnode_t		*dir_vp = XFS_ITOV(dp);
 	xfs_mount_t		*mp = dp->i_mount;
 	xfs_trans_t		*tp;
 	xfs_inode_t		*ip;
@@ -3150,7 +3087,7 @@ xfs_symlink(
 	char			*link_name = VNAME(dentry);
 	int			link_namelen;
 
-	*vpp = NULL;
+	*ipp = NULL;
 	error = 0;
 	ip = NULL;
 	tp = NULL;
@@ -3195,7 +3132,7 @@ xfs_symlink(
 	}
 
 	if (DM_EVENT_ENABLED(dp, DM_EVENT_SYMLINK)) {
-		error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dir_vp,
+		error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dp,
 					DM_RIGHT_NULL, NULL, DM_RIGHT_NULL,
 					link_name, target_path, 0, 0, 0);
 		if (error)
@@ -3289,7 +3226,7 @@ xfs_symlink(
 	 * transaction cancel unlocking dp so don't do it explicitly in the
 	 * error path.
 	 */
-	VN_HOLD(dir_vp);
+	IHOLD(dp);
 	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
 	unlock_dp_on_error = B_FALSE;
 
@@ -3399,19 +3336,14 @@ xfs_symlink(
 std_return:
 	if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTSYMLINK)) {
 		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTSYMLINK,
-					dir_vp, DM_RIGHT_NULL,
-					error ? NULL : XFS_ITOV(ip),
+					dp, DM_RIGHT_NULL,
+					error ? NULL : ip,
 					DM_RIGHT_NULL, link_name, target_path,
 					0, error, 0);
 	}
 
-	if (!error) {
-		bhv_vnode_t *vp;
-
-		ASSERT(ip);
-		vp = XFS_ITOV(ip);
-		*vpp = vp;
-	}
+	if (!error)
+		*ipp = ip;
 	return error;
 
  error2:
@@ -3431,60 +3363,11 @@ std_return:
 }
 
 int
-xfs_rwlock(
-	xfs_inode_t	*ip,
-	bhv_vrwlock_t	locktype)
-{
-	if (S_ISDIR(ip->i_d.di_mode))
-		return 1;
-	if (locktype == VRWLOCK_WRITE) {
-		xfs_ilock(ip, XFS_IOLOCK_EXCL);
-	} else if (locktype == VRWLOCK_TRY_READ) {
-		return xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED);
-	} else if (locktype == VRWLOCK_TRY_WRITE) {
-		return xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL);
-	} else {
-		ASSERT((locktype == VRWLOCK_READ) ||
-		       (locktype == VRWLOCK_WRITE_DIRECT));
-		xfs_ilock(ip, XFS_IOLOCK_SHARED);
-	}
-
-	return 1;
-}
-
-
-void
-xfs_rwunlock(
-	xfs_inode_t     *ip,
-	bhv_vrwlock_t	locktype)
-{
- 	if (S_ISDIR(ip->i_d.di_mode))
-  		return;
-	if (locktype == VRWLOCK_WRITE) {
-		/*
-		 * In the write case, we may have added a new entry to
-		 * the reference cache.  This might store a pointer to
-		 * an inode to be released in this inode.  If it is there,
-		 * clear the pointer and release the inode after unlocking
-		 * this one.
-		 */
-		xfs_refcache_iunlock(ip, XFS_IOLOCK_EXCL);
-	} else {
-		ASSERT((locktype == VRWLOCK_READ) ||
-		       (locktype == VRWLOCK_WRITE_DIRECT));
-		xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-	}
-	return;
-}
-
-
-int
 xfs_inode_flush(
 	xfs_inode_t	*ip,
 	int		flags)
 {
 	xfs_mount_t	*mp = ip->i_mount;
-	xfs_inode_log_item_t *iip = ip->i_itemp;
 	int		error = 0;
 
 	if (XFS_FORCED_SHUTDOWN(mp))
@@ -3494,33 +3377,9 @@ xfs_inode_flush(
 	 * Bypass inodes which have already been cleaned by
 	 * the inode flush clustering code inside xfs_iflush
 	 */
-	if ((ip->i_update_core == 0) &&
-	    ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)))
+	if (xfs_inode_clean(ip))
 		return 0;
 
-	if (flags & FLUSH_LOG) {
-		if (iip && iip->ili_last_lsn) {
-			xlog_t		*log = mp->m_log;
-			xfs_lsn_t	sync_lsn;
-			int		log_flags = XFS_LOG_FORCE;
-
-			spin_lock(&log->l_grant_lock);
-			sync_lsn = log->l_last_sync_lsn;
-			spin_unlock(&log->l_grant_lock);
-
-			if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) {
-				if (flags & FLUSH_SYNC)
-					log_flags |= XFS_LOG_SYNC;
-				error = xfs_log_force(mp, iip->ili_last_lsn, log_flags);
-				if (error)
-					return error;
-			}
-
-			if (ip->i_update_core == 0)
-				return 0;
-		}
-	}
-
 	/*
 	 * We make this non-blocking if the inode is contended,
 	 * return EAGAIN to indicate to the caller that they
@@ -3528,30 +3387,22 @@ xfs_inode_flush(
 	 * blocking on inodes inside another operation right
 	 * now, they get caught later by xfs_sync.
 	 */
-	if (flags & FLUSH_INODE) {
-		int	flush_flags;
-
-		if (flags & FLUSH_SYNC) {
-			xfs_ilock(ip, XFS_ILOCK_SHARED);
-			xfs_iflock(ip);
-		} else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
-			if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) {
-				xfs_iunlock(ip, XFS_ILOCK_SHARED);
-				return EAGAIN;
-			}
-		} else {
+	if (flags & FLUSH_SYNC) {
+		xfs_ilock(ip, XFS_ILOCK_SHARED);
+		xfs_iflock(ip);
+	} else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
+		if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) {
+			xfs_iunlock(ip, XFS_ILOCK_SHARED);
 			return EAGAIN;
 		}
-
-		if (flags & FLUSH_SYNC)
-			flush_flags = XFS_IFLUSH_SYNC;
-		else
-			flush_flags = XFS_IFLUSH_ASYNC;
-
-		error = xfs_iflush(ip, flush_flags);
-		xfs_iunlock(ip, XFS_ILOCK_SHARED);
+	} else {
+		return EAGAIN;
 	}
 
+	error = xfs_iflush(ip, (flags & FLUSH_SYNC) ? XFS_IFLUSH_SYNC
+						    : XFS_IFLUSH_ASYNC_NOBLOCK);
+	xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
 	return error;
 }
 
@@ -3694,12 +3545,12 @@ xfs_finish_reclaim(
 	 * We get the flush lock regardless, though, just to make sure
 	 * we don't free it while it is being flushed.
 	 */
-	if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-		if (!locked) {
-			xfs_ilock(ip, XFS_ILOCK_EXCL);
-			xfs_iflock(ip);
-		}
+	if (!locked) {
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+		xfs_iflock(ip);
+	}
 
+	if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
 		if (ip->i_update_core ||
 		    ((ip->i_itemp != NULL) &&
 		     (ip->i_itemp->ili_format.ilf_fields != 0))) {
@@ -3719,17 +3570,11 @@ xfs_finish_reclaim(
 		ASSERT(ip->i_update_core == 0);
 		ASSERT(ip->i_itemp == NULL ||
 		       ip->i_itemp->ili_format.ilf_fields == 0);
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	} else if (locked) {
-		/*
-		 * We are not interested in doing an iflush if we're
-		 * in the process of shutting down the filesystem forcibly.
-		 * So, just reclaim the inode.
-		 */
-		xfs_ifunlock(ip);
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	}
 
+	xfs_ifunlock(ip);
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
  reclaim:
 	xfs_ireclaim(ip);
 	return 0;
@@ -3845,9 +3690,8 @@ xfs_alloc_file_space(
 		end_dmi_offset = offset+len;
 		if (end_dmi_offset > ip->i_size)
 			end_dmi_offset = ip->i_size;
-		error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip),
-			offset, end_dmi_offset - offset,
-			0, NULL);
+		error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, offset,
+				      end_dmi_offset - offset, 0, NULL);
 		if (error)
 			return error;
 	}
@@ -3956,8 +3800,8 @@ dmapi_enospc_check:
 	if (error == ENOSPC && (attr_flags & ATTR_DMI) == 0 &&
 	    DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) {
 		error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE,
-				XFS_ITOV(ip), DM_RIGHT_NULL,
-				XFS_ITOV(ip), DM_RIGHT_NULL,
+				ip, DM_RIGHT_NULL,
+				ip, DM_RIGHT_NULL,
 				NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */
 		if (error == 0)
 			goto retry;	/* Maybe DMAPI app. has made space */
@@ -4102,7 +3946,7 @@ xfs_free_file_space(
 	    DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) {
 		if (end_dmi_offset > ip->i_size)
 			end_dmi_offset = ip->i_size;
-		error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp,
+		error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip,
 				offset, end_dmi_offset - offset,
 				AT_DELAY_FLAG(attr_flags), NULL);
 		if (error)
@@ -4132,7 +3976,7 @@ xfs_free_file_space(
 	 * actually need to zero the extent edges.  Otherwise xfs_bunmapi
 	 * will take care of it for us.
 	 */
-	if (rt && !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) {
+	if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
 		nimap = 1;
 		error = xfs_bmapi(NULL, ip, startoffset_fsb,
 			1, 0, NULL, 0, &imap, &nimap, NULL, NULL);
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 4e3970f..12e5818 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -24,22 +24,20 @@ int xfs_fsync(struct xfs_inode *ip, int flag, xfs_off_t start,
 int xfs_release(struct xfs_inode *ip);
 int xfs_inactive(struct xfs_inode *ip);
 int xfs_lookup(struct xfs_inode *dp, bhv_vname_t *dentry,
-		bhv_vnode_t **vpp);
+		struct xfs_inode **ipp);
 int xfs_create(struct xfs_inode *dp, bhv_vname_t *dentry, mode_t mode,
-		xfs_dev_t rdev, bhv_vnode_t **vpp, struct cred *credp);
+		xfs_dev_t rdev, struct xfs_inode **ipp, struct cred *credp);
 int xfs_remove(struct xfs_inode *dp, bhv_vname_t	*dentry);
-int xfs_link(struct xfs_inode *tdp, bhv_vnode_t *src_vp,
+int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
 		bhv_vname_t *dentry);
 int xfs_mkdir(struct xfs_inode *dp, bhv_vname_t *dentry,
-		mode_t mode, bhv_vnode_t **vpp, struct cred *credp);
+		mode_t mode, struct xfs_inode **ipp, struct cred *credp);
 int xfs_rmdir(struct xfs_inode *dp, bhv_vname_t *dentry);
 int xfs_readdir(struct xfs_inode	*dp, void *dirent, size_t bufsize,
 		       xfs_off_t *offset, filldir_t filldir);
 int xfs_symlink(struct xfs_inode *dp, bhv_vname_t *dentry,
-		char *target_path, mode_t mode, bhv_vnode_t **vpp,
+		char *target_path, mode_t mode, struct xfs_inode **ipp,
 		struct cred *credp);
-int xfs_rwlock(struct xfs_inode *ip, bhv_vrwlock_t locktype);
-void xfs_rwunlock(struct xfs_inode *ip, bhv_vrwlock_t locktype);
 int xfs_inode_flush(struct xfs_inode *ip, int flags);
 int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state);
 int xfs_reclaim(struct xfs_inode *ip);
@@ -47,7 +45,7 @@ int xfs_change_file_space(struct xfs_inode *ip, int cmd,
 		xfs_flock64_t *bf, xfs_off_t offset,
 		struct cred *credp, int	attr_flags);
 int xfs_rename(struct xfs_inode *src_dp, bhv_vname_t *src_vname,
-		bhv_vnode_t *target_dir_vp, bhv_vname_t *target_vname);
+		struct xfs_inode *target_dp, bhv_vname_t *target_vname);
 int xfs_attr_get(struct xfs_inode *ip, const char *name, char *value,
 		int *valuelenp, int flags, cred_t *cred);
 int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value,