GIT b836b5d9997b09a4d326554441372abde9d12d32 git+ssh://master.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-2.6-nmw.git

commit b836b5d9997b09a4d326554441372abde9d12d32
Author: Bob Peterson <rpeterso@redhat.com>
Date:   Wed Jan 16 08:45:39 2008 -0600

    [GFS2] Fix typo
    
    This patch fixes a minor typo.  Surprisingly, it still compiled.
    
    Signed-off-by: Bob Peterson <rpeterso@redhat.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 7c11de3f041de98f77e59ee6881160d8eb68070e
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Wed Jan 16 14:24:05 2008 +0000

    [GFS2] Fix write alloc required shortcut calculation
    
    The comparison was being made against the wrong quantity.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 6e824f1e2bf77de4be4373edb2a7fb8f23e74496
Author: Bob Peterson <rpeterso@redhat.com>
Date:   Fri Jan 11 13:44:50 2008 -0600

    [GFS2] gfs2_alloc_required performance
    
    This is a small I/O performance enhancement to gfs2.  (Actually, it is a rework of
    an earlier version I got wrong).  The idea here is to check if the write extends
    past the last block in the file.  If so, the function can save itself a lot of
    time and trouble because it knows an allocate will be required.  Benchmarks like
    iozone should see better performance.
    
    Signed-off-by: Bob Peterson <rpeterso@redhat.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit e61ac0a0d1f515653aa817c9ab0f11a490c31ec3
Author: Bob Peterson <rpeterso@redhat.com>
Date:   Fri Jan 11 13:31:12 2008 -0600

    [GFS2] Remove unneeded i_spin
    
    This patch removes a vestigial variable "i_spin" from the gfs2_inode
    structure.  This not only saves us memory (>300000 of these in memory
    for the oom test) it also saves us time because we don't have to
    spend time initializing it (i.e. slightly better performance).
    
    Signed-off-by: Bob Peterson <rpeterso@redhat.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 2b8b095b44a4b29881ed300d84a2bab23466f13f
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Thu Jan 10 15:18:55 2008 +0000

    [GFS2] Reduce inode size by moving i_alloc out of line
    
    It is possible to reduce the size of GFS2 inodes by taking the i_alloc
    structure out of the gfs2_inode. This patch allocates the i_alloc
    structure whenever its needed, and frees it afterward. This decreases
    the amount of low memory we use at the expense of requiring a memory
    allocation for each page or partial page that we write. A quick test
    with postmark shows that the overhead is not measurable and I also note
    that OCFS2 use the same approach.
    
    In the future I'd like to solve the problem by shrinking down the size
    of the members of the i_alloc structure, but for now, this reduces the
    immediate problem of using too much low-memory on x86 and doesn't add
    too much overhead.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 3a46e3bef51f7a1feffafcac855c1b252fcc5877
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Thu Jan 10 14:49:43 2008 +0000

    [GFS2] Fix assert in log code
    
    Although the values were all being calculated correctly, there was a
    race in the assert due to the way it was using atomic variables. This
    changes the value we assert on so that we get the same effect by testing
    a different variable. This prevents the assert triggering when it shouldn't.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 89f0bb2873fcaa8e57831dd3f0c013cabbd99012
Author: Patrick Caulfeld <pcaulfie@redhat.com>
Date:   Wed Jan 9 15:06:27 2008 +0000

    [DLM] close othercons
    
    This patch addresses a problem introduced with the last round of
    lowcomms patches where the 'othercon' connections do not get freed when
    the DLM shuts down.
    
    This results in the error message
    "slab error in kmem_cache_destroy(): cache `dlm_conn': Can't free all
    objects"
    
    and the DLM cannot be restarted without a system reboot.
    
    See bz#428119
    
    Signed-Off-By: Patrick Caulfield <pcaulfie@redhat.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 27ac6a8027db2a4362c632bb6cfe71ed3732ef09
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Tue Jan 8 08:14:30 2008 +0000

    [GFS2] Fix problems relating to execution of files on GFS2
    
    This patch fixes a couple of problems which affected the execution of files
    on GFS2. The first is that there was a corner case where inodes were not
    always uptodate at the point at which permissions checks were being carried
    out, this was resulting in refusal of execute permission, but only on the
    first lookup, subsequent requests worked correctly. The second was a problem
    relating to incorrect updating of file sizes which was introduced with the
    write_begin/end code for GFS2 a little while back.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
    Cc: Abhijith Das <adas@redhat.com>

commit 8df8eeec6c622d0137fb331f449fa67f1ce1ee45
Author: Bob Peterson <rpeterso@redhat.com>
Date:   Thu Jan 3 09:24:53 2008 -0600

    [GFS2] Initialize extent_list earlier
    
    Here is a patch for the latest upstream GFS2 code:
    The journal extent map needs to be initialized sooner than it
    currently is.  Otherwise failed mount attempts (e.g. not enough
    journals, etc.) may panic trying to access the uninitialized list.
    
    Signed-off-by: Bob Peterson <rpeterso@redhat.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit c2d6be02fdb6a968e3d5eef1a3c13dbf731fd0e9
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Thu Jan 3 11:31:38 2008 +0000

    [GFS2] Allow page migration for writeback and ordered pages
    
    To improve performance on NUMA, we use the VM's standard page
    migration for writeback and ordered pages. Probably we could
    also do the same for journaled data, but that would need a
    careful audit of the code, so will be the subject of a later
    patch.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit c6c1c7c8f68d98958a34a03b011e62f6f327656d
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Wed Jan 2 10:16:56 2008 +0000

    [GFS2] Remove unused variable
    
    The go_drop_th function is never called or referenced.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 0f9f168713eb660438ad8e6f5acc4e5b05e38229
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Fri Dec 14 14:04:34 2007 +0000

    [GFS2] Fix log block mapper
    
    A missing offset in the calculation.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit d3b5af1dc7eff503fe8a2836730052d122978a97
Author: Bob Peterson <rpeterso@redhat.com>
Date:   Wed Dec 12 17:52:13 2007 -0600

    [GFS2] Minor correction
    
    This is a small correction to my previously posted patch1.
    It just changes a divide to a shift.  It's faster and doesn't
    introduce odd dependencies on 32-bit compiles.
    
    Signed-off-by: Bob Peterson <rpeterso@redhat.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit ad314930e0183ec0012aa81dee71196d11a4fcb4
Author: Bob Peterson <rpeterso@redhat.com>
Date:   Wed Dec 12 11:44:41 2007 -0600

    [GFS2] Eliminate the no longer needed sd_statfs_mutex
    
    This patch eliminates the unneeded sd_statfs_mutex mutex but preserves
    the ordering as discussed.
    
    Signed-off-by: Bob Peterson <rpeterso@redhat.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit b70bfd954fb68153785923860a6de6ed285b4d0c
Author: Bob Peterson <rpeterso@redhat.com>
Date:   Wed Dec 12 09:24:08 2007 -0600

    [GFS2] Incremental patch to fix compiler warning
    
    Signed-off-by: Bob Peterson <rpeterso@redhat.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 832ccd3b79f7d32899f3bfa3927b5e129943c404
Author: Bob Peterson <rpeterso@redhat.com>
Date:   Tue Dec 11 19:29:17 2007 -0600

    [GFS2] Function meta_read optimization
    
    This patch optimizes function gfs2_meta_read.  Basically, gfs2_meta_wait
    was being called regardless of whether a disk read was requested.
    This just pulls that wait into the if that triggers the read.
    
    Signed-off-by: Bob Peterson <rpeterso@redhat.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit ada2cf04d469dc6dd1d36fda4fa1b57b53dd4cc7
Author: Bob Peterson <rpeterso@redhat.com>
Date:   Tue Dec 11 19:16:09 2007 -0600

    [GFS2] Only fetch the dinode once in block_map
    
    Function gfs2_block_map was often looking up the disk inode twice.
    This optimizes it so that only does it once.
    
    Signed-off-by: Bob Peterson <rpeterso@redhat.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 2818c9602797d9ab6b81082a725167e797a70910
Author: Bob Peterson <rpeterso@redhat.com>
Date:   Tue Dec 11 19:13:54 2007 -0600

    [GFS2] Reorganize function gfs2_glmutex_lock
    
    This patch optimizes the function gfs2_glmutex_lock.
    The basic theory is: Why bother initializing a holder, setting up
    wait bits and then waiting on them, if you know the glock can be
    yours.  So the holder stuff is placed inside the if checking if the
    glock is locked.  This one needs careful scrutiny because changing
    anything to do with locking should strike terror into one's heart.
    
    Signed-off-by: Bob Peterson <rpeterso@redhat.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 7a32e0149d664b7d884e3176e32eaade52c7b5c8
Author: Bob Peterson <rpeterso@redhat.com>
Date:   Tue Dec 11 19:00:16 2007 -0600

    [GFS2] Run through full bitmaps quicker in gfs2_bitfit
    
    I eliminated the passing of an unused parameter into gfs2_bitfit called rgd.
    
    This also changes the gfs2_bitfit code that searches for free (or used) blocks.
    Before, the code was trying to check for bytes that indicated 4 blocks in
    the undesired state.  The problem is, it was spending more time trying to
    do this than it actually was saving.  This version only optimizes the case
    where we're looking for free blocks, and it checks a machine word at a time.
    So on 32-bit machines, it will check 32-bits (16 blocks) and on 64-bit
    machines, it will check 64-bits (32 blocks) at a time.  The compiler
    optimizes that quite well and we save some time, especially when running
    through full bitmaps (like the bitmaps allocated for the journals).
    
    There's probably a more elegant or optimized way to do this, but I haven't
    thought of it yet.  I'm open to suggestions.
    
    Signed-off-by: Bob Peterson <rpeterso@redhat.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit b776776e5dbb86199547e1e9da2a8963d921919f
Author: Bob Peterson <rpeterso@redhat.com>
Date:   Tue Dec 11 18:51:25 2007 -0600

    [GFS2] Get rid of useless "found" variable in quota.c
    
    This just eliminates an unused variable from the quota code.
    Not likely to be a time saver.
    
    Signed-off-by: Bob Peterson <rpeterso@redhat.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit f8d945e8f2ff32ab434f37febf32c3fde3afd5fc
Author: Bob Peterson <rpeterso@redhat.com>
Date:   Tue Dec 11 18:49:21 2007 -0600

    [GFS2] Journal extent mapping
    
    This patch saves a little time when gfs2 writes to the journals by
    keeping a mapping between logical and physical blocks on disk.
    That's better than constantly looking up indirect pointers in
    buffers, when the journals are several levels of indirection
    (which they typically are).
    
    Signed-off-by: Bob Peterson <rpeterso@redhat.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 20eff25474893aaf801c2884e997f9b791b07458
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Tue Dec 11 14:40:23 2007 +0000

    [GFS2] Fix typo in log.c
    
    An inequality was the wrong way around causing gfs2_logd to wake
    up too often. This fixes it.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 7a25b2693149b5e70a51f6be119ecdbf545cba5a
Author: Bob Peterson <rpeterso@redhat.com>
Date:   Mon Dec 10 14:13:27 2007 -0600

    [GFS2] Remove function gfs2_get_block
    
    This patch is just a cleanup.  Function gfs2_get_block() just calls
    function gfs2_block_map reversing the last two parameters.  By
    reversing the parameters, gfs2_block_map() may be called directly
    and function gfs2_get_block may be eliminated altogether.
    Since this function is done for every block operation,
    this streamlines the code and makes it a little bit more efficient.
    
    Signed-off-by: Bob Peterson <rpeterso@redhat.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 35b9bfbca78de81320932b0753a0e0638bf7f5a0
Author: David Teigland <teigland@redhat.com>
Date:   Thu Dec 6 09:35:25 2007 -0600

    [GFS2] use pid for plock owner for nfs clients
    
    The fl_owner is that of lockd when posix locks arrive from nfs
    clients, so it can't be used to distinguish between lock holders.
    Use fl_pid as owner instead; it's the pid of the process on the
    nfs client.
    
    Signed-off-by: David Teigland <teigland@redhat.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 6048e0703d8c76ad363b36d4905a881ed511733a
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Fri Nov 30 08:17:15 2007 +0000

    [GFS2] Remove unused variable
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 1f8afbae88b2bb630015caaba870f2b51c304c27
Author: Abhijith Das <adas@redhat.com>
Date:   Thu Nov 29 14:13:54 2007 -0600

    [GFS2] patch to check for recursive lock requests in gfs2_rename code path
    
    A certain scenario in the rename code path triggers a kernel BUG()
    because it accidentally does recursive locking The first lock is
    requested to unlink an already existing inode (replacing a file) and the
    second lock is requested when the destination directory needs to alloc
    some space. It is rare that these two
    events happen during the same rename call, and even more rare that these
    two instances try to lock the same rgrp. It is, however, possible.
    https://bugzilla.redhat.com/show_bug.cgi?id=404711
    
    Signed-off-by: Abhijith Das <adas@redhat.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 424cdc7c180f3227660818e37e2876ffb2f7a92c
Author: Wendy Cheng <wcheng@redhat.com>
Date:   Thu Nov 29 17:56:51 2007 -0500

    [GFS2] Remove lock methods for lock_nolock protocol
    
    GFS2 supports two modes of locking - lock_nolock for single node filesystem
    and lock_dlm for cluster mode locking. The gfs2 lock methods are removed from
    file operation table for lock_nolock protocol. This would allow VFS to handle
    posix lock and flock logics just like other in-tree filesystems without
    duplication.
    
    Signed-off-by: S. Wendy Cheng <wcheng@redhat.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 892ec91e1c6fd60673eb11a9751b24a014d55841
Author: Fabio M. Di Nitto <fabbione@ubuntu.com>
Date:   Wed Nov 28 16:22:09 2007 +0100

    [GFS2] Remove unrequired code
    
    Signed-off-by: Fabio M. Di Nitto <fabbione@ubuntu.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 6358e207e9b026be3a1fefa1b677bda110012f22
Author: Fabio Massimo Di Nitto <fabbione@ubuntu.com>
Date:   Tue Nov 27 06:16:42 2007 +0100

    [GFS2] Fix build warnings
    
    Hi Steven,
    
    Steven Whitehouse wrote:
    > Hi,
    >
    > Now in the -nmw git tree. Thanks,
    >
    > Steve.
    >
    > On Wed, 2007-11-21 at 11:54 -0600, Ryan O'Hara wrote:
    
    this patch introduces a bunch of build warnings by leaving around
    
    struct inode *inode = &ip->i_inode;
    
    The patch in attachment cleans them up. Please apply.
    
    Signed-off-by: Fabio Massimo Di Nitto <fabbione@ubuntu.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 1e0640fc0b7dd82189940d87fa7fd91c78415345
Author: Ryan O'Hara <rohara@redhat.com>
Date:   Wed Nov 21 11:54:54 2007 -0600

    [GFS2] remove unnecessary permission checks
    
    Remove read/write permission() checks from xattr operations.
    VFS layer is already handling permission for xattrs via the
    xattr_permission() call, so there is no need for gfs2 to
    check permissions. Futhermore, using permission() for SELinux
    xattrs ops is incorrect.
    
    Signed-off-by: Ryan O'Hara <rohara@redhat.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit aaa42dd70ca81503acabafc13fe0aae4e4c09710
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Thu Nov 15 10:04:42 2007 +0000

    [GFS2] Revise gfs2_logd and flush thresholds
    
    This patch intriduces two new log thresholds:
    
     o thresh1 is the point at which we wake up gfs2_logd due to the pinned
       block count. It is initialised at mount time to 2/5ths of the size
       of the journal. Currently it does not change during the course of
       the mount, but the intention is to adjust it automatically based
       upon various conditions. This automatic adjustment will be the subject
       of later patches.
    
     o thresh2 is the point at which we wake up gfs2_logd due to the total
       of pinned blocks and AIL blocks. It is initialised at mount time
       to 4/5ths of the size of the journal. The reason for not making it
       equal to 100% of journal size is to give gfs2_logd time to start up
       and do something before the processes filling the journal before
       they land up stalling, and waiting on journal flushes.
    
    At the same time, the incore_log_blocks tunable is removed since it
    was wrong (just a basic fixed threshold set to a number plucked out
    of the air) and it was being compared against the wrong thing (the
    amount of metadata in the journal) rather than the total number of
    blocks.
    
    Also, since the free blocks count is now an atomic variable, a
    number of these comparisons now do not need locking, so that
    the log lock has been removed around some operations.
    
    This patch also ensures that there are no races when gfs2_logd is
    woken up. It also changes the behavour of the periodic sync
    so that instead of occuring every 60 secs, they will now
    occur every 30 secs (which can be set via /sysfs still) if
    there have been no other log flushes in the mean time.
    
    When we reserve blocks at the start of a transaction, we now
    use a waitqueue too. This means we can remove the old mutex
    and the fast path through that code is just a couple of atomic
    operations now. Also we no longer do log flushing at this point
    in the code. Instead we wake up gfs2_logd to do it for us (this
    shouldn't happen if the log is large enough and if gfs2_logd is
    properly tuned) and do an exclusive wait.
    
    As a result of these changes, postmark on my test machine runs about
    20% faster, mainly due to increased efficiency in flushing the
    journal.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 37a314fb03979b58828c8327adb71cbac51750fd
Author: Fabio Massimo Di Nitto <fabbione@ubuntu.com>
Date:   Fri Nov 16 09:50:40 2007 +0000

    [GFS2] Fix runtime issue with UP kernels
    
    The issue is indeed UP vs SMP and it is totally random.
    
    spin_is_locked() is a bad assertion because there is no correct answer on UP.
    on UP spin_is_locked() has to return either one value or another, always.
    
    This means that in my setup I am lucky enough to trigger the issue and your you
    are lucky enough not to.
    
    the patch in attachment removes the bogus calls to BUG_ON and according to David
    (in CC and thanks for the long explanation on the problem) we can rely upon
    things like lockdep to find problem that might be trying to catch.
    
    Signed-off-by: Fabio M. Di Nitto <fabbione@ubuntu.com>
    Cc: David S. Miller <davem@davemloft.net>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 99206df330cae75cddaa27349d32b5134d672073
Author: David Teigland <teigland@redhat.com>
Date:   Thu Nov 15 09:01:13 2007 -0600

    [GFS2] tidy up error message
    
    Print error with log_error() to be consistent with others.
    
    Signed-off-by: David Teigland <teigland@redhat.com>
    Signed-off-by: Fabio M. Di Nitto <fabbione@ubuntu.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit d1a3604ec58a9aedc9301d10c7ffe0ed425a4abb
Author: Fabio Massimo Di Nitto <fabbione@ubuntu.com>
Date:   Thu Nov 15 13:48:52 2007 +0000

    [GFS2] Check for installation of mount helpers for DLM mounts
    
    The patch is a fix to abort mount if the mount.gfs* and possible
    umount.* are missing from /sbin.
    
    While we do what we can to guarantee that they are installed properly in
    userland (CVS HEAD), we want to make sure that mount still aborts properly.
    
    The only sign of missing helpers is that lock_dlm will receive no mount options
    at all. According to David the problem does not exist for lock_nolock as the
    helpers are not required.
    
    The patch has been tested for both gfs and gfs2 and it works as expected. The
    lack of mount.gfs* will generate an error that is propagated to mount:
    
    oot@node1:~# mount -t  gfs2 /dev/nbd2 /mnt/
    mount: wrong fs type, bad option, bad superblock on /dev/nbd2,
           missing codepage or helper program, or other error
           In some cases useful info is found in syslog - try
           dmesg | tail  or so
    
    [ 3513.303346] GFS2: fsid=: Trying to join cluster "lock_dlm", "gutsy:gfs2"
    [ 3513.304546] DLM/GFS2/GFS ERROR: (u)mount helpers are not installed properly!
    [ 3513.306290] GFS2: fsid=: can't mount proto=lock_dlm, table=gutsy:gfs2, hostdata=
    
    You might want to notice that it will also avoid mount to hang or fail silently
    or with strange errors that will require the cluster to reboot/restart before
    you can actually mount the filesystem again.
    
    Signed-off-by: Fabio M. Di Nitto <fabbione@ubuntu.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 30a5d70ddf0fe6ce490eb915867d1718316f9774
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Fri Nov 9 10:07:21 2007 +0000

    [GFS2] Don't periodically update the jindex
    
    We only care about the content of the jindex in two cases,
    one is when we mount the fs and the other is when we need
    to recover another journal. In both cases we have to update
    the jindex anyway, so there is no point in updating it
    periodically between times, so this removes it to simplify
    gfs2_logd.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 1c2a96d7fdf2de2838f792b4dfe9d9571842d3f3
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Fri Nov 9 10:01:41 2007 +0000

    [GFS2] Move gfs2_logd into log.c
    
    This means that we can mark gfs2_ail1_empty static and prepares
    the way for further changes.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 23b20e366937b59e288eed8122103cf80a222772
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Thu Nov 8 14:55:03 2007 +0000

    [GFS2] Use atomic_t for journal free blocks counter
    
    This patch changes the counter which keeps track of the free
    blocks in the journal to an atomic_t in preparation for the
    following patch which will update the log reservation code.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit b1e497ca09a8781721bbbf25373854cbd3f00146
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Thu Nov 8 14:25:12 2007 +0000

    [GFS2] Don't add glocks to the journal
    
    The only reason for adding glocks to the journal was to keep track
    of which locks required a log flush prior to release. We add a
    flag to the glock to allow this check to be made in a simpler way.
    
    This reduces the size of a glock (by 12 bytes on i386, 24 on x86_64)
    and means that we can avoid extra work during the journal flush.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit d527b9342f434189906738e8008cdc0dae65dd5d
Author: David Teigland <teigland@redhat.com>
Date:   Wed Nov 7 09:06:49 2007 -0600

    [DLM] use dlm prefix on alloc and free functions
    
    The dlm functions in memory.c should use the dlm_ prefix.  Also, use
    kzalloc/kfree directly for dlm_direntry's, removing the wrapper functions.
    
    Signed-off-by: David Teigland <teigland@redhat.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit a0d1035972c173332da4a400f364e090207aaa1d
Author: David Teigland <teigland@redhat.com>
Date:   Wed Nov 7 09:06:06 2007 -0600

    [DLM] don't print common non-errors
    
    Change log_error() to log_debug() for conditions that can occur in
    large number in normal operation.
    
    Signed-off-by: David Teigland <teigland@redhat.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 74cd5d41cc7b8fb66fcc2137d148926f01163ee5
Author: David Teigland <teigland@redhat.com>
Date:   Wed Nov 7 09:03:56 2007 -0600

    [GFS2] check kthread_should_stop when waiting
    
    Use wait_event_interruptible() in the lock_dlm thread instead
    of an open coded equivalent, and include a kthread_should_stop()
    check in the wait test so we don't miss a kthread_stop().
    
    Signed-off-by: David Teigland <teigland@redhat.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 146374f57cd41aa80d25541ef030b150f030784b
Author: Adrian Bunk <bunk@kernel.org>
Date:   Sat Nov 3 01:04:30 2007 +0100

    [DLM] proper prototypes
    
    This patch adds a proper prototype for some functions in
    fs/dlm/dlm_internal.h
    
    Signed-off-by: Adrian Bunk <bunk@kernel.org>
    Acked-by: David Teigland <teigland@redhat.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 0f7244a7385872b531f669c99bc66a4ff273b287
Author: Bob Peterson <rpeterso@redhat.com>
Date:   Fri Nov 2 09:37:15 2007 -0500

    [GFS2] Given device ID rather than s_id in "id" sysfs file
    
    This patch changes the /sys/fs/gfs2/<s_id>/id file to give the device
    id "major:minor" rather than the s_id.  That enables gfs2_tool to
    match devices properly (by id, not name) when locating the tuning files.
    
    Signed-off-by: Bob Peterson <rpeterso@redhat.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 6b1f6243828ab452a7c26b6541e81845dd70723a
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Fri Nov 2 09:14:31 2007 +0000

    [GFS2] Remove flags no longer required
    
    The HIF_MUTEX and HIF_PROMOTE flags were set on the glock holders
    depending upon which of the two waiters lists they were going to
    be queued upon. They were then tested when the holders were taken
    off the lists to ensure that the right type of holder was being
    dequeued.
    
    Since we are already using separate lists, there doesn't seem a
    lot of point having these flags as well, and since setting them
    and testing them is in the fast path for locking and unlocking
    glock, this patch removes them.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit adcbcd5827ed49bd10722806fa579fb9928c2de6
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Fri Nov 2 08:39:34 2007 +0000

    [GFS2] Reorder writeback for glock sync
    
    Previously we were doing (write data, wait for data, write metadata, wait
    for metadata). After this patch we so (write metadata, write data, wait for
    data, wait for metadata) which should be more efficient.
    
    Also I noticed that the drop_bh and xmote_bh functions were almost
    identical. In fact the only difference was a single test, and that
    test is such that in the drop_bh case, it would always evaluate to
    the correct result. As such we can use the xmote_bh functions in
    all the places where we were using the drop_bh function and remove
    the drop_bh functions.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 081097fd2854d2bf224eb55773f52fb8e8f0886a
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Thu Nov 1 09:34:14 2007 +0000

    [GFS2] Add sync_page to metadata address space operations
    
    This set of address space operations was missing a sync_page
    operation.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 7168a8f81dcffbf050310b0ef646d9370a1daa37
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Thu Nov 1 09:26:54 2007 +0000

    [GFS2] Remove "reclaim limit"
    
    This call to reclaim glocks is not needed, and in particular we don't want it
    in the fast path for locking glocks. The limit was entirely arbitrary anyway
    and we can't expect users to adjust things like this, the remaining code will
    do the right thing on its own.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 64a3f78cc35fc34e263a74db0b0c35da170cff64
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Wed Oct 31 14:24:33 2007 +0000

    [GFS2] Remove unused variables
    
    These haven't been used for some time, remove them.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 9edddef5b5799653ca8e1ec5b9b42a8941047a1e
Author: Lon Hohberger <lhh@redhat.com>
Date:   Thu Oct 25 18:51:54 2007 -0400

    [DLM] Bind connections from known local address when using TCP
    
    A common problem occurs when multiple IP addresses within the same
    subnet are assigned to the same NIC.  If we make a connection attempt to
    another address on the same subnet as one of those addresses, the
    connection attempt will not necessarily be routed from the address we
    want.
    
    In the case of the DLM, the other nodes will quickly drop the connection
    attempt, causing problems.
    
    This patch makes the DLM bind to the local address it acquired from the
    cluster manager when using TCP prior to making a connection, obviating
    the need for administrators to "fix" their systems or use clever routing
    tricks.
    
    Signed-off-by: Lon Hohberger <lhh@redhat.com>
    Signed-off-by: Patrick Caulfield <pcaulfie@redhat.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit f7900eef269bb5935a117f38a16e2b9f0e45c3c2
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Thu Oct 18 11:15:50 2007 +0100

    [GFS2] Use correct include file in ops_address.c
    
    Something changed in the upstream kernel, and it needs this
    one-liner to allow ops_address.c to build.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit bfccd53153c98907f2e4887f58537fb960be679c
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Wed Oct 17 14:05:41 2007 +0100

    [GFS2] Don't hold page lock when starting transaction
    
    This is an addendum to the new AOPs work which moves the point
    at which we take the page lock so that we don't get it until
    the last possible moment. This resolves a conflict between
    starting transactions and the page lock.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit ab072af9da7750c8f2615d355791cac0ea84e61d
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Wed Oct 17 09:04:24 2007 +0100

    [GFS2] Add writepages for GFS2 jdata
    
    This patch resolves a lock ordering issue where we had been getting
    a transaction lock in the wrong order with respect to the page lock.
    By using writepages rather than just writepage, it is then possible
    to start a transaction before locking the page, and thus matching the
    locking order elsewhere in the code.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit e694bd8bebf3d5e74a2079d3c7363ba10df854f3
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Fri Sep 28 13:49:05 2007 +0100

    [GFS2] Split gfs2_writepage into three cases
    
    This patch splits gfs2_writepage into separate functions for each of
    the three cases: writeback, ordered and journalled. As a result
    it becomes a lot easier to see what each one is doing. The common
    code is moved into gfs2_writepage_common.
    
    This fixes a performance bug where we were doing more work than
    strictly required in the ordered write case.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 53f42c621da51dcf901b754b0bbbcdfea3e6f814
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Wed Oct 17 08:47:38 2007 +0100

    [GFS2] Introduce gfs2_set_aops()
    
    Just like ext3 we now have three sets of address space operations
    to cover the cases of writeback, ordered and journalled data
    writes. This means that the individual operations can now become
    less complicated as we are able to remove some of the tests for
    file data mode from the code.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 7add611e238a009fa4b897502ca1e9046da451a4
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Wed Oct 17 08:35:19 2007 +0100

    [GFS2] Add gfs2_is_writeback()
    
    This adds a function "gfs2_is_writeback()" along the lines of the
    existing "gfs2_is_jdata()" in order to clean up the code and make
    the various tests for the inode mode more obvious. It also fixes
    the PageChecked() logic where we were resetting the flag too early
    in the case of an error path.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 87b6c0fa29fa7c67516271289dd6557e375b0ae9
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Tue Oct 16 11:47:04 2007 +0100

    [GFS2] Remove unused field in struct gfs2_inode
    
    Removes a field that is not used.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 8f57225af6b94a72ff6b36ccdf1fd35fbc183321
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Mon Oct 15 16:29:05 2007 +0100

    [GFS2] Remove useless i_cache from inodes
    
    The i_cache was designed to keep references to the indirect blocks
    used during block mapping so that they didn't have to be looked
    up continually. The idea failed because there are too many places
    where the i_cache needs to be freed, and this has in the past been
    the cause of many bugs.
    
    In addition there was no performance benefit being gained since the
    disk blocks in question were cached anyway. So this patch removes
    it in order to simplify the code to prepare for other changes which
    would otherwise have had to add further support for this feature.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 4d8408292d5d97ba2e87445d6d88923e7516d4f2
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Mon Oct 15 15:40:33 2007 +0100

    [GFS2] Use ->page_mkwrite() for mmap()
    
    This cleans up the mmap() code path for GFS2 by implementing the
    page_mkwrite function for GFS2. We are thus able to use the
    generic filemap_fault function for our ->fault() implementation.
    
    This now means that shared writable mappings will be much more
    efficiently shared across the cluster if there is a reasonable
    proportion of read activity (the greater proportion, the better).
    
    As a side effect, it also reduces the size of the code, removes
    special cases from readpage and readpages, and makes the code
    path easier to follow.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

commit 19f210f34681701d474a73374d2907023940a016
Author: Steven Whitehouse <swhiteho@redhat.com>
Date:   Mon Oct 15 14:42:35 2007 +0100

    [GFS2] Clean up internal read function
    
    As requested by Christoph, this patch cleans up GFS2's internal
    read function so that it no longer uses the do_generic_mapping_read
    function. This function is obsolete and GFS2 is the last user of it.
    
    As a side effect the internal read code gets smaller and easier
    to read and gfs2_readpage is split into two. One function has the locking
    and the other function has the rest of the logic.
    
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
    Cc: Christoph Hellwig <hch@infradead.org>

commit 93f41afba0287dc6ddd7a168b5e4a30c1954ddbe
Author: Wendy Cheng <wcheng@redhat.com>
Date:   Fri Oct 5 00:27:58 2007 -0400

    [GFS2] Handle multiple glock demote requests
    
    Fix a race condition where multiple glock demote requests are sent to
    a node back-to-back. This patch does a check inside handle_callback()
    to see whether a demote request is in progress. If true, it sets a flag
    to make sure run_queue() will loop again to handle the new request,
    instead of erronously setting gl_demote_state to a different state.
    
    Signed-off-by: S. Wendy Cheng <wcheng@redhat.com>
    Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 fs/dlm/dir.c                 |   10 
 fs/dlm/dlm_internal.h        |   16 
 fs/dlm/lock.c                |   29 -
 fs/dlm/lockspace.c           |   16 
 fs/dlm/lowcomms.c            |   15 
 fs/dlm/main.c                |   10 
 fs/dlm/memory.c              |   32 -
 fs/dlm/memory.h              |   16 
 fs/dlm/recover.c             |    4 
 fs/gfs2/Makefile             |    2 
 fs/gfs2/bmap.c               |   37 +
 fs/gfs2/bmap.h               |    2 
 fs/gfs2/daemon.c             |   50 --
 fs/gfs2/daemon.h             |    1 
 fs/gfs2/dir.c                |    4 
 fs/gfs2/eaops.c              |   84 ----
 fs/gfs2/eattr.c              |    2 
 fs/gfs2/glock.c              |   83 +---
 fs/gfs2/glops.c              |  110 -----
 fs/gfs2/incore.h             |   57 +-
 fs/gfs2/inode.c              |   39 +
 fs/gfs2/inode.h              |   12 
 fs/gfs2/locking/dlm/mount.c  |    5 
 fs/gfs2/locking/dlm/plock.c  |   18 
 fs/gfs2/locking/dlm/thread.c |    9 
 fs/gfs2/log.c                |  231 +++++++----
 fs/gfs2/log.h                |   15 
 fs/gfs2/lops.c               |   73 ---
 fs/gfs2/main.c               |    3 
 fs/gfs2/meta_io.c            |   98 +---
 fs/gfs2/meta_io.h            |    1 
 fs/gfs2/ops_address.c        |  649 +++++++++++++++++++++++----------
 fs/gfs2/ops_address.h        |    7 
 fs/gfs2/ops_file.c           |  228 ++++++++---
 fs/gfs2/ops_file.h           |   24 -
 fs/gfs2/ops_fstype.c         |   85 +++-
 fs/gfs2/ops_inode.c          |   20 -
 fs/gfs2/ops_inode.h          |    6 
 fs/gfs2/ops_super.c          |    1 
 fs/gfs2/ops_vm.c             |  169 --------
 fs/gfs2/ops_vm.h             |   18 
 fs/gfs2/quota.c              |   29 -
 fs/gfs2/recovery.c           |    2 
 fs/gfs2/rgrp.c               |  104 +++--
 fs/gfs2/rgrp.h               |    4 
 fs/gfs2/super.c              |   29 -
 fs/gfs2/sys.c                |   11 
 fs/gfs2/trans.c              |   21 -
 fs/gfs2/trans.h              |    1 
 49 files changed, 1248 insertions(+), 1244 deletions(-)

diff -puN fs/dlm/dir.c~git-gfs2-nmw fs/dlm/dir.c
--- a/fs/dlm/dir.c~git-gfs2-nmw
+++ a/fs/dlm/dir.c
@@ -49,7 +49,7 @@ static struct dlm_direntry *get_free_de(
 	spin_unlock(&ls->ls_recover_list_lock);
 
 	if (!found)
-		de = allocate_direntry(ls, len);
+		de = kzalloc(sizeof(struct dlm_direntry) + len, GFP_KERNEL);
 	return de;
 }
 
@@ -62,7 +62,7 @@ void dlm_clear_free_entries(struct dlm_l
 		de = list_entry(ls->ls_recover_list.next, struct dlm_direntry,
 				list);
 		list_del(&de->list);
-		free_direntry(de);
+		kfree(de);
 	}
 	spin_unlock(&ls->ls_recover_list_lock);
 }
@@ -171,7 +171,7 @@ void dlm_dir_remove_entry(struct dlm_ls 
 	}
 
 	list_del(&de->list);
-	free_direntry(de);
+	kfree(de);
  out:
 	write_unlock(&ls->ls_dirtbl[bucket].lock);
 }
@@ -302,7 +302,7 @@ static int get_entry(struct dlm_ls *ls, 
 
 	write_unlock(&ls->ls_dirtbl[bucket].lock);
 
-	de = allocate_direntry(ls, namelen);
+	de = kzalloc(sizeof(struct dlm_direntry) + namelen, GFP_KERNEL);
 	if (!de)
 		return -ENOMEM;
 
@@ -313,7 +313,7 @@ static int get_entry(struct dlm_ls *ls, 
 	write_lock(&ls->ls_dirtbl[bucket].lock);
 	tmp = search_bucket(ls, name, namelen, bucket);
 	if (tmp) {
-		free_direntry(de);
+		kfree(de);
 		de = tmp;
 	} else {
 		list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list);
diff -puN fs/dlm/dlm_internal.h~git-gfs2-nmw fs/dlm/dlm_internal.h
--- a/fs/dlm/dlm_internal.h~git-gfs2-nmw
+++ a/fs/dlm/dlm_internal.h
@@ -570,5 +570,21 @@ static inline int dlm_no_directory(struc
 	return (ls->ls_exflags & DLM_LSFL_NODIR) ? 1 : 0;
 }
 
+int dlm_netlink_init(void);
+void dlm_netlink_exit(void);
+void dlm_timeout_warn(struct dlm_lkb *lkb);
+
+#ifdef CONFIG_DLM_DEBUG
+int dlm_register_debugfs(void);
+void dlm_unregister_debugfs(void);
+int dlm_create_debug_file(struct dlm_ls *ls);
+void dlm_delete_debug_file(struct dlm_ls *ls);
+#else
+static inline int dlm_register_debugfs(void) { return 0; }
+static inline void dlm_unregister_debugfs(void) { }
+static inline int dlm_create_debug_file(struct dlm_ls *ls) { return 0; }
+static inline void dlm_delete_debug_file(struct dlm_ls *ls) { }
+#endif
+
 #endif				/* __DLM_INTERNAL_DOT_H__ */
 
diff -puN fs/dlm/lock.c~git-gfs2-nmw fs/dlm/lock.c
--- a/fs/dlm/lock.c~git-gfs2-nmw
+++ a/fs/dlm/lock.c
@@ -88,7 +88,6 @@ static void __receive_convert_reply(stru
 static int receive_extralen(struct dlm_message *ms);
 static void do_purge(struct dlm_ls *ls, int nodeid, int pid);
 static void del_timeout(struct dlm_lkb *lkb);
-void dlm_timeout_warn(struct dlm_lkb *lkb);
 
 /*
  * Lock compatibilty matrix - thanks Steve
@@ -335,7 +334,7 @@ static struct dlm_rsb *create_rsb(struct
 {
 	struct dlm_rsb *r;
 
-	r = allocate_rsb(ls, len);
+	r = dlm_allocate_rsb(ls, len);
 	if (!r)
 		return NULL;
 
@@ -478,7 +477,7 @@ static int find_rsb(struct dlm_ls *ls, c
 	error = _search_rsb(ls, name, namelen, bucket, 0, &tmp);
 	if (!error) {
 		write_unlock(&ls->ls_rsbtbl[bucket].lock);
-		free_rsb(r);
+		dlm_free_rsb(r);
 		r = tmp;
 		goto out;
 	}
@@ -519,7 +518,7 @@ static void toss_rsb(struct kref *kref)
 	list_move(&r->res_hashchain, &ls->ls_rsbtbl[r->res_bucket].toss);
 	r->res_toss_time = jiffies;
 	if (r->res_lvbptr) {
-		free_lvb(r->res_lvbptr);
+		dlm_free_lvb(r->res_lvbptr);
 		r->res_lvbptr = NULL;
 	}
 }
@@ -589,7 +588,7 @@ static int create_lkb(struct dlm_ls *ls,
 	uint32_t lkid = 0;
 	uint16_t bucket;
 
-	lkb = allocate_lkb(ls);
+	lkb = dlm_allocate_lkb(ls);
 	if (!lkb)
 		return -ENOMEM;
 
@@ -683,8 +682,8 @@ static int __put_lkb(struct dlm_ls *ls, 
 
 		/* for local/process lkbs, lvbptr points to caller's lksb */
 		if (lkb->lkb_lvbptr && is_master_copy(lkb))
-			free_lvb(lkb->lkb_lvbptr);
-		free_lkb(lkb);
+			dlm_free_lvb(lkb->lkb_lvbptr);
+		dlm_free_lkb(lkb);
 		return 1;
 	} else {
 		write_unlock(&ls->ls_lkbtbl[bucket].lock);
@@ -988,7 +987,7 @@ static int shrink_bucket(struct dlm_ls *
 
 			if (is_master(r))
 				dir_remove(r);
-			free_rsb(r);
+			dlm_free_rsb(r);
 			count++;
 		} else {
 			write_unlock(&ls->ls_rsbtbl[b].lock);
@@ -1171,7 +1170,7 @@ static void set_lvb_lock(struct dlm_rsb 
 			return;
 
 		if (!r->res_lvbptr)
-			r->res_lvbptr = allocate_lvb(r->res_ls);
+			r->res_lvbptr = dlm_allocate_lvb(r->res_ls);
 
 		if (!r->res_lvbptr)
 			return;
@@ -1203,7 +1202,7 @@ static void set_lvb_unlock(struct dlm_rs
 		return;
 
 	if (!r->res_lvbptr)
-		r->res_lvbptr = allocate_lvb(r->res_ls);
+		r->res_lvbptr = dlm_allocate_lvb(r->res_ls);
 
 	if (!r->res_lvbptr)
 		return;
@@ -2986,7 +2985,7 @@ static int receive_lvb(struct dlm_ls *ls
 
 	if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
 		if (!lkb->lkb_lvbptr)
-			lkb->lkb_lvbptr = allocate_lvb(ls);
+			lkb->lkb_lvbptr = dlm_allocate_lvb(ls);
 		if (!lkb->lkb_lvbptr)
 			return -ENOMEM;
 		len = receive_extralen(ms);
@@ -3010,7 +3009,7 @@ static int receive_request_args(struct d
 
 	if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
 		/* lkb was just created so there won't be an lvb yet */
-		lkb->lkb_lvbptr = allocate_lvb(ls);
+		lkb->lkb_lvbptr = dlm_allocate_lvb(ls);
 		if (!lkb->lkb_lvbptr)
 			return -ENOMEM;
 	}
@@ -4184,7 +4183,7 @@ static int receive_rcom_lock_args(struct
 	lkb->lkb_astaddr = (void *) (long) (rl->rl_asts & AST_COMP);
 
 	if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
-		lkb->lkb_lvbptr = allocate_lvb(ls);
+		lkb->lkb_lvbptr = dlm_allocate_lvb(ls);
 		if (!lkb->lkb_lvbptr)
 			return -ENOMEM;
 		lvblen = rc->rc_header.h_length - sizeof(struct dlm_rcom) -
@@ -4259,7 +4258,7 @@ int dlm_recover_master_copy(struct dlm_l
 	put_rsb(r);
  out:
 	if (error)
-		log_print("recover_master_copy %d %x", error, rl->rl_lkid);
+		log_debug(ls, "recover_master_copy %d %x", error, rl->rl_lkid);
 	rl->rl_result = error;
 	return error;
 }
@@ -4342,7 +4341,7 @@ int dlm_user_request(struct dlm_ls *ls, 
 		}
 	}
 
-	/* After ua is attached to lkb it will be freed by free_lkb().
+	/* After ua is attached to lkb it will be freed by dlm_free_lkb().
 	   When DLM_IFL_USER is set, the dlm knows that this is a userspace
 	   lock and that lkb_astparam is the dlm_user_args structure. */
 
diff -puN fs/dlm/lockspace.c~git-gfs2-nmw fs/dlm/lockspace.c
--- a/fs/dlm/lockspace.c~git-gfs2-nmw
+++ a/fs/dlm/lockspace.c
@@ -24,14 +24,6 @@
 #include "recover.h"
 #include "requestqueue.h"
 
-#ifdef CONFIG_DLM_DEBUG
-int dlm_create_debug_file(struct dlm_ls *ls);
-void dlm_delete_debug_file(struct dlm_ls *ls);
-#else
-static inline int dlm_create_debug_file(struct dlm_ls *ls) { return 0; }
-static inline void dlm_delete_debug_file(struct dlm_ls *ls) { }
-#endif
-
 static int			ls_count;
 static struct mutex		ls_lock;
 static struct list_head		lslist;
@@ -684,9 +676,9 @@ static int release_lockspace(struct dlm_
 			dlm_del_ast(lkb);
 
 			if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY)
-				free_lvb(lkb->lkb_lvbptr);
+				dlm_free_lvb(lkb->lkb_lvbptr);
 
-			free_lkb(lkb);
+			dlm_free_lkb(lkb);
 		}
 	}
 	dlm_astd_resume();
@@ -704,7 +696,7 @@ static int release_lockspace(struct dlm_
 					 res_hashchain);
 
 			list_del(&rsb->res_hashchain);
-			free_rsb(rsb);
+			dlm_free_rsb(rsb);
 		}
 
 		head = &ls->ls_rsbtbl[i].toss;
@@ -712,7 +704,7 @@ static int release_lockspace(struct dlm_
 			rsb = list_entry(head->next, struct dlm_rsb,
 					 res_hashchain);
 			list_del(&rsb->res_hashchain);
-			free_rsb(rsb);
+			dlm_free_rsb(rsb);
 		}
 	}
 
diff -puN fs/dlm/lowcomms.c~git-gfs2-nmw fs/dlm/lowcomms.c
--- a/fs/dlm/lowcomms.c~git-gfs2-nmw
+++ a/fs/dlm/lowcomms.c
@@ -864,7 +864,7 @@ static void sctp_init_assoc(struct conne
 static void tcp_connect_to_sock(struct connection *con)
 {
 	int result = -EHOSTUNREACH;
-	struct sockaddr_storage saddr;
+	struct sockaddr_storage saddr, src_addr;
 	int addr_len;
 	struct socket *sock;
 
@@ -898,6 +898,17 @@ static void tcp_connect_to_sock(struct c
 	con->connect_action = tcp_connect_to_sock;
 	add_sock(sock, con);
 
+	/* Bind to our cluster-known address connecting to avoid
+	   routing problems */
+	memcpy(&src_addr, dlm_local_addr[0], sizeof(src_addr));
+	make_sockaddr(&src_addr, 0, &addr_len);
+	result = sock->ops->bind(sock, (struct sockaddr *) &src_addr,
+				 addr_len);
+	if (result < 0) {
+		printk("dlm: could not bind for connect: %d\n", result);
+		/* This *may* not indicate a critical error */
+	}
+
 	make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len);
 
 	log_print("connecting to %d", con->nodeid);
@@ -1427,6 +1438,8 @@ void dlm_lowcomms_stop(void)
 		if (con) {
 			close_connection(con, true);
 			kmem_cache_free(con_cache, con);
+			if (con->othercon)
+				kmem_cache_free(con_cache, con->othercon);
 		}
 	}
 	max_nodeid = 0;
diff -puN fs/dlm/main.c~git-gfs2-nmw fs/dlm/main.c
--- a/fs/dlm/main.c~git-gfs2-nmw
+++ a/fs/dlm/main.c
@@ -18,16 +18,6 @@
 #include "memory.h"
 #include "config.h"
 
-#ifdef CONFIG_DLM_DEBUG
-int dlm_register_debugfs(void);
-void dlm_unregister_debugfs(void);
-#else
-static inline int dlm_register_debugfs(void) { return 0; }
-static inline void dlm_unregister_debugfs(void) { }
-#endif
-int dlm_netlink_init(void);
-void dlm_netlink_exit(void);
-
 static int __init init_dlm(void)
 {
 	int error;
diff -puN fs/dlm/memory.c~git-gfs2-nmw fs/dlm/memory.c
--- a/fs/dlm/memory.c~git-gfs2-nmw
+++ a/fs/dlm/memory.c
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -35,7 +35,7 @@ void dlm_memory_exit(void)
 		kmem_cache_destroy(lkb_cache);
 }
 
-char *allocate_lvb(struct dlm_ls *ls)
+char *dlm_allocate_lvb(struct dlm_ls *ls)
 {
 	char *p;
 
@@ -43,7 +43,7 @@ char *allocate_lvb(struct dlm_ls *ls)
 	return p;
 }
 
-void free_lvb(char *p)
+void dlm_free_lvb(char *p)
 {
 	kfree(p);
 }
@@ -51,7 +51,7 @@ void free_lvb(char *p)
 /* FIXME: have some minimal space built-in to rsb for the name and
    kmalloc a separate name if needed, like dentries are done */
 
-struct dlm_rsb *allocate_rsb(struct dlm_ls *ls, int namelen)
+struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen)
 {
 	struct dlm_rsb *r;
 
@@ -61,14 +61,14 @@ struct dlm_rsb *allocate_rsb(struct dlm_
 	return r;
 }
 
-void free_rsb(struct dlm_rsb *r)
+void dlm_free_rsb(struct dlm_rsb *r)
 {
 	if (r->res_lvbptr)
-		free_lvb(r->res_lvbptr);
+		dlm_free_lvb(r->res_lvbptr);
 	kfree(r);
 }
 
-struct dlm_lkb *allocate_lkb(struct dlm_ls *ls)
+struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls)
 {
 	struct dlm_lkb *lkb;
 
@@ -76,7 +76,7 @@ struct dlm_lkb *allocate_lkb(struct dlm_
 	return lkb;
 }
 
-void free_lkb(struct dlm_lkb *lkb)
+void dlm_free_lkb(struct dlm_lkb *lkb)
 {
 	if (lkb->lkb_flags & DLM_IFL_USER) {
 		struct dlm_user_args *ua;
@@ -90,19 +90,3 @@ void free_lkb(struct dlm_lkb *lkb)
 	kmem_cache_free(lkb_cache, lkb);
 }
 
-struct dlm_direntry *allocate_direntry(struct dlm_ls *ls, int namelen)
-{
-	struct dlm_direntry *de;
-
-	DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,
-		   printk("namelen = %d\n", namelen););
-
-	de = kzalloc(sizeof(*de) + namelen, GFP_KERNEL);
-	return de;
-}
-
-void free_direntry(struct dlm_direntry *de)
-{
-	kfree(de);
-}
-
diff -puN fs/dlm/memory.h~git-gfs2-nmw fs/dlm/memory.h
--- a/fs/dlm/memory.h~git-gfs2-nmw
+++ a/fs/dlm/memory.h
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -16,14 +16,12 @@
 
 int dlm_memory_init(void);
 void dlm_memory_exit(void);
-struct dlm_rsb *allocate_rsb(struct dlm_ls *ls, int namelen);
-void free_rsb(struct dlm_rsb *r);
-struct dlm_lkb *allocate_lkb(struct dlm_ls *ls);
-void free_lkb(struct dlm_lkb *l);
-struct dlm_direntry *allocate_direntry(struct dlm_ls *ls, int namelen);
-void free_direntry(struct dlm_direntry *de);
-char *allocate_lvb(struct dlm_ls *ls);
-void free_lvb(char *l);
+struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen);
+void dlm_free_rsb(struct dlm_rsb *r);
+struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls);
+void dlm_free_lkb(struct dlm_lkb *l);
+char *dlm_allocate_lvb(struct dlm_ls *ls);
+void dlm_free_lvb(char *l);
 
 #endif		/* __MEMORY_DOT_H__ */
 
diff -puN fs/dlm/recover.c~git-gfs2-nmw fs/dlm/recover.c
--- a/fs/dlm/recover.c~git-gfs2-nmw
+++ a/fs/dlm/recover.c
@@ -629,7 +629,7 @@ static void recover_lvb(struct dlm_rsb *
 		goto out;
 
 	if (!r->res_lvbptr) {
-		r->res_lvbptr = allocate_lvb(r->res_ls);
+		r->res_lvbptr = dlm_allocate_lvb(r->res_ls);
 		if (!r->res_lvbptr)
 			goto out;
 	}
@@ -760,7 +760,7 @@ void dlm_clear_toss_list(struct dlm_ls *
 		list_for_each_entry_safe(r, safe, &ls->ls_rsbtbl[i].toss,
 					 res_hashchain) {
 			list_del(&r->res_hashchain);
-			free_rsb(r);
+			dlm_free_rsb(r);
 		}
 		write_unlock(&ls->ls_rsbtbl[i].lock);
 	}
diff -puN fs/gfs2/Makefile~git-gfs2-nmw fs/gfs2/Makefile
--- a/fs/gfs2/Makefile~git-gfs2-nmw
+++ a/fs/gfs2/Makefile
@@ -2,7 +2,7 @@ obj-$(CONFIG_GFS2_FS) += gfs2.o
 gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \
 	glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \
 	mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
-	ops_fstype.o ops_inode.o ops_super.o ops_vm.o quota.o \
+	ops_fstype.o ops_inode.o ops_super.o quota.o \
 	recovery.o rgrp.o super.o sys.o trans.o util.o
 
 obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += locking/nolock/
diff -puN fs/gfs2/bmap.c~git-gfs2-nmw fs/gfs2/bmap.c
--- a/fs/gfs2/bmap.c~git-gfs2-nmw
+++ a/fs/gfs2/bmap.c
@@ -59,7 +59,6 @@ struct strip_mine {
 static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
 			       u64 block, struct page *page)
 {
-	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct inode *inode = &ip->i_inode;
 	struct buffer_head *bh;
 	int release = 0;
@@ -95,7 +94,7 @@ static int gfs2_unstuffer_page(struct gf
 	set_buffer_uptodate(bh);
 	if (!gfs2_is_jdata(ip))
 		mark_buffer_dirty(bh);
-	if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
+	if (!gfs2_is_writeback(ip))
 		gfs2_trans_add_bh(ip->i_gl, bh, 0);
 
 	if (release) {
@@ -453,8 +452,8 @@ static inline void bmap_unlock(struct in
  * Returns: errno
  */
 
-int gfs2_block_map(struct inode *inode, u64 lblock, int create,
-		   struct buffer_head *bh_map)
+int gfs2_block_map(struct inode *inode, sector_t lblock,
+		   struct buffer_head *bh_map, int create)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -470,6 +469,7 @@ int gfs2_block_map(struct inode *inode, 
 	unsigned int maxlen = bh_map->b_size >> inode->i_blkbits;
 	struct metapath mp;
 	u64 size;
+	struct buffer_head *dibh = NULL;
 
 	BUG_ON(maxlen == 0);
 
@@ -500,6 +500,8 @@ int gfs2_block_map(struct inode *inode, 
 	error = gfs2_meta_inode_buffer(ip, &bh);
 	if (error)
 		goto out_fail;
+	dibh = bh;
+	get_bh(dibh);
 
 	for (x = 0; x < end_of_metadata; x++) {
 		lookup_block(ip, bh, x, &mp, create, &new, &dblock);
@@ -518,13 +520,8 @@ int gfs2_block_map(struct inode *inode, 
 		if (boundary)
 			set_buffer_boundary(bh_map);
 		if (new) {
-			struct buffer_head *dibh;
-			error = gfs2_meta_inode_buffer(ip, &dibh);
-			if (!error) {
-				gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-				gfs2_dinode_out(ip, dibh->b_data);
-				brelse(dibh);
-			}
+			gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+			gfs2_dinode_out(ip, dibh->b_data);
 			set_buffer_new(bh_map);
 			goto out_brelse;
 		}
@@ -545,6 +542,8 @@ out_brelse:
 out_ok:
 	error = 0;
 out_fail:
+	if (dibh)
+		brelse(dibh);
 	bmap_unlock(inode, create);
 	return error;
 }
@@ -560,7 +559,7 @@ int gfs2_extent_map(struct inode *inode,
 	BUG_ON(!new);
 
 	bh.b_size = 1 << (inode->i_blkbits + 5);
-	ret = gfs2_block_map(inode, lblock, create, &bh);
+	ret = gfs2_block_map(inode, lblock, &bh, create);
 	*extlen = bh.b_size >> inode->i_blkbits;
 	*dblock = bh.b_blocknr;
 	if (buffer_new(&bh))
@@ -684,7 +683,7 @@ static int do_strip(struct gfs2_inode *i
 	if (metadata)
 		revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
 
-	error = gfs2_rindex_hold(sdp, &ip->i_alloc.al_ri_gh);
+	error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh);
 	if (error)
 		return error;
 
@@ -786,7 +785,7 @@ out_rg_gunlock:
 out_rlist:
 	gfs2_rlist_free(&rlist);
 out:
-	gfs2_glock_dq_uninit(&ip->i_alloc.al_ri_gh);
+	gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh);
 	return error;
 }
 
@@ -879,7 +878,6 @@ static int gfs2_block_truncate_page(stru
 {
 	struct inode *inode = mapping->host;
 	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	loff_t from = inode->i_size;
 	unsigned long index = from >> PAGE_CACHE_SHIFT;
 	unsigned offset = from & (PAGE_CACHE_SIZE-1);
@@ -911,7 +909,7 @@ static int gfs2_block_truncate_page(stru
 	err = 0;
 
 	if (!buffer_mapped(bh)) {
-		gfs2_get_block(inode, iblock, bh, 0);
+		gfs2_block_map(inode, iblock, bh, 0);
 		/* unmapped? It's a hole - nothing to do */
 		if (!buffer_mapped(bh))
 			goto unlock;
@@ -931,7 +929,7 @@ static int gfs2_block_truncate_page(stru
 		err = 0;
 	}
 
-	if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
+	if (!gfs2_is_writeback(ip))
 		gfs2_trans_add_bh(ip->i_gl, bh, 0);
 
 	zero_user_page(page, offset, length, KM_USER0);
@@ -1224,8 +1222,13 @@ int gfs2_write_alloc_required(struct gfs
 		do_div(lblock_stop, bsize);
 	} else {
 		unsigned int shift = sdp->sd_sb.sb_bsize_shift;
+		u64 end_of_file = (ip->i_di.di_size + sdp->sd_sb.sb_bsize - 1) >> shift;
 		lblock = offset >> shift;
 		lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
+		if (lblock_stop > end_of_file) {
+			*alloc_required = 1;
+			return 0;
+		}
 	}
 
 	for (; lblock < lblock_stop; lblock += extlen) {
diff -puN fs/gfs2/bmap.h~git-gfs2-nmw fs/gfs2/bmap.h
--- a/fs/gfs2/bmap.h~git-gfs2-nmw
+++ a/fs/gfs2/bmap.h
@@ -15,7 +15,7 @@ struct gfs2_inode;
 struct page;
 
 int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page);
-int gfs2_block_map(struct inode *inode, u64 lblock, int create, struct buffer_head *bh);
+int gfs2_block_map(struct inode *inode, sector_t lblock, struct buffer_head *bh, int create);
 int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen);
 
 int gfs2_truncatei(struct gfs2_inode *ip, u64 size);
diff -puN fs/gfs2/daemon.c~git-gfs2-nmw fs/gfs2/daemon.c
--- a/fs/gfs2/daemon.c~git-gfs2-nmw
+++ a/fs/gfs2/daemon.c
@@ -83,56 +83,6 @@ int gfs2_recoverd(void *data)
 }
 
 /**
- * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks
- * @sdp: Pointer to GFS2 superblock
- *
- * Also, periodically check to make sure that we're using the most recent
- * journal index.
- */
-
-int gfs2_logd(void *data)
-{
-	struct gfs2_sbd *sdp = data;
-	struct gfs2_holder ji_gh;
-	unsigned long t;
-	int need_flush;
-
-	while (!kthread_should_stop()) {
-		/* Advance the log tail */
-
-		t = sdp->sd_log_flush_time +
-		    gfs2_tune_get(sdp, gt_log_flush_secs) * HZ;
-
-		gfs2_ail1_empty(sdp, DIO_ALL);
-		gfs2_log_lock(sdp);
-		need_flush = sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks);
-		gfs2_log_unlock(sdp);
-		if (need_flush || time_after_eq(jiffies, t)) {
-			gfs2_log_flush(sdp, NULL);
-			sdp->sd_log_flush_time = jiffies;
-		}
-
-		/* Check for latest journal index */
-
-		t = sdp->sd_jindex_refresh_time +
-		    gfs2_tune_get(sdp, gt_jindex_refresh_secs) * HZ;
-
-		if (time_after_eq(jiffies, t)) {
-			if (!gfs2_jindex_hold(sdp, &ji_gh))
-				gfs2_glock_dq_uninit(&ji_gh);
-			sdp->sd_jindex_refresh_time = jiffies;
-		}
-
-		t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
-		if (freezing(current))
-			refrigerator();
-		schedule_timeout_interruptible(t);
-	}
-
-	return 0;
-}
-
-/**
  * gfs2_quotad - Write cached quota changes into the quota file
  * @sdp: Pointer to GFS2 superblock
  *
diff -puN fs/gfs2/daemon.h~git-gfs2-nmw fs/gfs2/daemon.h
--- a/fs/gfs2/daemon.h~git-gfs2-nmw
+++ a/fs/gfs2/daemon.h
@@ -12,7 +12,6 @@
 
 int gfs2_glockd(void *data);
 int gfs2_recoverd(void *data);
-int gfs2_logd(void *data);
 int gfs2_quotad(void *data);
 
 #endif /* __DAEMON_DOT_H__ */
diff -puN fs/gfs2/dir.c~git-gfs2-nmw fs/gfs2/dir.c
--- a/fs/gfs2/dir.c~git-gfs2-nmw
+++ a/fs/gfs2/dir.c
@@ -1876,7 +1876,7 @@ static int leaf_dealloc(struct gfs2_inod
 	if (error)
 		goto out;
 
-	error = gfs2_rindex_hold(sdp, &dip->i_alloc.al_ri_gh);
+	error = gfs2_rindex_hold(sdp, &dip->i_alloc->al_ri_gh);
 	if (error)
 		goto out_qs;
 
@@ -1949,7 +1949,7 @@ out_rg_gunlock:
 	gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
 out_rlist:
 	gfs2_rlist_free(&rlist);
-	gfs2_glock_dq_uninit(&dip->i_alloc.al_ri_gh);
+	gfs2_glock_dq_uninit(&dip->i_alloc->al_ri_gh);
 out_qs:
 	gfs2_quota_unhold(dip);
 out:
diff -puN fs/gfs2/eaops.c~git-gfs2-nmw fs/gfs2/eaops.c
--- a/fs/gfs2/eaops.c~git-gfs2-nmw
+++ a/fs/gfs2/eaops.c
@@ -56,46 +56,6 @@ unsigned int gfs2_ea_name2type(const cha
 	return type;
 }
 
-static int user_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
-{
-	struct inode *inode = &ip->i_inode;
-	int error = permission(inode, MAY_READ, NULL);
-	if (error)
-		return error;
-
-	return gfs2_ea_get_i(ip, er);
-}
-
-static int user_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
-{
-	struct inode *inode = &ip->i_inode;
-
-	if (S_ISREG(inode->i_mode) ||
-	    (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) {
-		int error = permission(inode, MAY_WRITE, NULL);
-		if (error)
-			return error;
-	} else
-		return -EPERM;
-
-	return gfs2_ea_set_i(ip, er);
-}
-
-static int user_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
-{
-	struct inode *inode = &ip->i_inode;
-
-	if (S_ISREG(inode->i_mode) ||
-	    (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) {
-		int error = permission(inode, MAY_WRITE, NULL);
-		if (error)
-			return error;
-	} else
-		return -EPERM;
-
-	return gfs2_ea_remove_i(ip, er);
-}
-
 static int system_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
 {
 	if (!GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) &&
@@ -108,8 +68,6 @@ static int system_eo_get(struct gfs2_ino
 	     GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)))
 		return -EOPNOTSUPP;
 
-
-
 	return gfs2_ea_get_i(ip, er);
 }
 
@@ -170,40 +128,10 @@ static int system_eo_remove(struct gfs2_
 	return gfs2_ea_remove_i(ip, er);
 }
 
-static int security_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
-{
-	struct inode *inode = &ip->i_inode;
-	int error = permission(inode, MAY_READ, NULL);
-	if (error)
-		return error;
-
-	return gfs2_ea_get_i(ip, er);
-}
-
-static int security_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
-{
-	struct inode *inode = &ip->i_inode;
-	int error = permission(inode, MAY_WRITE, NULL);
-	if (error)
-		return error;
-
-	return gfs2_ea_set_i(ip, er);
-}
-
-static int security_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
-{
-	struct inode *inode = &ip->i_inode;
-	int error = permission(inode, MAY_WRITE, NULL);
-	if (error)
-		return error;
-
-	return gfs2_ea_remove_i(ip, er);
-}
-
 static const struct gfs2_eattr_operations gfs2_user_eaops = {
-	.eo_get = user_eo_get,
-	.eo_set = user_eo_set,
-	.eo_remove = user_eo_remove,
+	.eo_get = gfs2_ea_get_i,
+	.eo_set = gfs2_ea_set_i,
+	.eo_remove = gfs2_ea_remove_i,
 	.eo_name = "user",
 };
 
@@ -215,9 +143,9 @@ const struct gfs2_eattr_operations gfs2_
 };
 
 static const struct gfs2_eattr_operations gfs2_security_eaops = {
-	.eo_get = security_eo_get,
-	.eo_set = security_eo_set,
-	.eo_remove = security_eo_remove,
+	.eo_get = gfs2_ea_get_i,
+	.eo_set = gfs2_ea_set_i,
+	.eo_remove = gfs2_ea_remove_i,
 	.eo_name = "security",
 };
 
diff -puN fs/gfs2/eattr.c~git-gfs2-nmw fs/gfs2/eattr.c
--- a/fs/gfs2/eattr.c~git-gfs2-nmw
+++ a/fs/gfs2/eattr.c
@@ -1418,7 +1418,7 @@ out:
 static int ea_dealloc_block(struct gfs2_inode *ip)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct gfs2_alloc *al = &ip->i_alloc;
+	struct gfs2_alloc *al = ip->i_alloc;
 	struct gfs2_rgrpd *rgd;
 	struct buffer_head *dibh;
 	int error;
diff -puN fs/gfs2/glock.c~git-gfs2-nmw fs/gfs2/glock.c
--- a/fs/gfs2/glock.c~git-gfs2-nmw
+++ a/fs/gfs2/glock.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
  *
  * This copyrighted material is made available to anyone wishing to use,
  * modify, copy, or redistribute it subject to the terms and conditions
@@ -217,7 +217,6 @@ int gfs2_glock_put(struct gfs2_glock *gl
 	if (atomic_dec_and_test(&gl->gl_ref)) {
 		hlist_del(&gl->gl_list);
 		write_unlock(gl_lock_addr(gl->gl_hash));
-		BUG_ON(spin_is_locked(&gl->gl_spin));
 		gfs2_assert(sdp, gl->gl_state == LM_ST_UNLOCKED);
 		gfs2_assert(sdp, list_empty(&gl->gl_reclaim));
 		gfs2_assert(sdp, list_empty(&gl->gl_holders));
@@ -346,7 +345,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp,
 	gl->gl_object = NULL;
 	gl->gl_sbd = sdp;
 	gl->gl_aspace = NULL;
-	lops_init_le(&gl->gl_le, &gfs2_glock_lops);
 	INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
 
 	/* If this glock protects actual on-disk data or metadata blocks,
@@ -461,7 +459,6 @@ static void wait_on_holder(struct gfs2_h
 
 static void gfs2_demote_wake(struct gfs2_glock *gl)
 {
-	BUG_ON(!spin_is_locked(&gl->gl_spin));
 	gl->gl_demote_state = LM_ST_EXCLUSIVE;
         clear_bit(GLF_DEMOTE, &gl->gl_flags);
         smp_mb__after_clear_bit();
@@ -507,21 +504,12 @@ static int rq_mutex(struct gfs2_holder *
 static int rq_promote(struct gfs2_holder *gh)
 {
 	struct gfs2_glock *gl = gh->gh_gl;
-	struct gfs2_sbd *sdp = gl->gl_sbd;
 
 	if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
 		if (list_empty(&gl->gl_holders)) {
 			gl->gl_req_gh = gh;
 			set_bit(GLF_LOCK, &gl->gl_flags);
 			spin_unlock(&gl->gl_spin);
-
-			if (atomic_read(&sdp->sd_reclaim_count) >
-			    gfs2_tune_get(sdp, gt_reclaim_limit) &&
-			    !(gh->gh_flags & LM_FLAG_PRIORITY)) {
-				gfs2_reclaim_glock(sdp);
-				gfs2_reclaim_glock(sdp);
-			}
-
 			gfs2_glock_xmote_th(gh->gh_gl, gh);
 			spin_lock(&gl->gl_spin);
 		}
@@ -567,7 +555,10 @@ static int rq_demote(struct gfs2_glock *
 		gfs2_demote_wake(gl);
 		return 0;
 	}
+
 	set_bit(GLF_LOCK, &gl->gl_flags);
+	set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
+
 	if (gl->gl_demote_state == LM_ST_UNLOCKED ||
 	    gl->gl_state != LM_ST_EXCLUSIVE) {
 		spin_unlock(&gl->gl_spin);
@@ -576,7 +567,9 @@ static int rq_demote(struct gfs2_glock *
 		spin_unlock(&gl->gl_spin);
 		gfs2_glock_xmote_th(gl, NULL);
 	}
+
 	spin_lock(&gl->gl_spin);
+	clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
 
 	return 0;
 }
@@ -598,23 +591,18 @@ static void run_queue(struct gfs2_glock 
 		if (!list_empty(&gl->gl_waiters1)) {
 			gh = list_entry(gl->gl_waiters1.next,
 					struct gfs2_holder, gh_list);
-
-			if (test_bit(HIF_MUTEX, &gh->gh_iflags))
-				blocked = rq_mutex(gh);
-			else
-				gfs2_assert_warn(gl->gl_sbd, 0);
-
+			blocked = rq_mutex(gh);
 		} else if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
 			blocked = rq_demote(gl);
+			if (gl->gl_waiters2 && !blocked) {
+				set_bit(GLF_DEMOTE, &gl->gl_flags);
+				gl->gl_demote_state = LM_ST_UNLOCKED;
+			}
+			gl->gl_waiters2 = 0;
 		} else if (!list_empty(&gl->gl_waiters3)) {
 			gh = list_entry(gl->gl_waiters3.next,
 					struct gfs2_holder, gh_list);
-
-			if (test_bit(HIF_PROMOTE, &gh->gh_iflags))
-				blocked = rq_promote(gh);
-			else
-				gfs2_assert_warn(gl->gl_sbd, 0);
-
+			blocked = rq_promote(gh);
 		} else
 			break;
 
@@ -632,27 +620,21 @@ static void run_queue(struct gfs2_glock 
 
 static void gfs2_glmutex_lock(struct gfs2_glock *gl)
 {
-	struct gfs2_holder gh;
-
-	gfs2_holder_init(gl, 0, 0, &gh);
-	set_bit(HIF_MUTEX, &gh.gh_iflags);
-	if (test_and_set_bit(HIF_WAIT, &gh.gh_iflags))
-		BUG();
-
 	spin_lock(&gl->gl_spin);
 	if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
+		struct gfs2_holder gh;
+
+		gfs2_holder_init(gl, 0, 0, &gh);
+		set_bit(HIF_WAIT, &gh.gh_iflags);
 		list_add_tail(&gh.gh_list, &gl->gl_waiters1);
+		spin_unlock(&gl->gl_spin);
+		wait_on_holder(&gh);
+		gfs2_holder_uninit(&gh);
 	} else {
 		gl->gl_owner_pid = current->pid;
 		gl->gl_ip = (unsigned long)__builtin_return_address(0);
-		clear_bit(HIF_WAIT, &gh.gh_iflags);
-		smp_mb();
-		wake_up_bit(&gh.gh_iflags, HIF_WAIT);
+		spin_unlock(&gl->gl_spin);
 	}
-	spin_unlock(&gl->gl_spin);
-
-	wait_on_holder(&gh);
-	gfs2_holder_uninit(&gh);
 }
 
 /**
@@ -691,7 +673,6 @@ static void gfs2_glmutex_unlock(struct g
 	gl->gl_owner_pid = 0;
 	gl->gl_ip = 0;
 	run_queue(gl);
-	BUG_ON(!spin_is_locked(&gl->gl_spin));
 	spin_unlock(&gl->gl_spin);
 }
 
@@ -722,7 +703,10 @@ static void handle_callback(struct gfs2_
 		}
 	} else if (gl->gl_demote_state != LM_ST_UNLOCKED &&
 			gl->gl_demote_state != state) {
-		gl->gl_demote_state = LM_ST_UNLOCKED;
+		if (test_bit(GLF_DEMOTE_IN_PROGRESS,  &gl->gl_flags)) 
+			gl->gl_waiters2 = 1;
+		else 
+			gl->gl_demote_state = LM_ST_UNLOCKED;
 	}
 	spin_unlock(&gl->gl_spin);
 }
@@ -943,8 +927,8 @@ static void gfs2_glock_drop_th(struct gf
 	const struct gfs2_glock_operations *glops = gl->gl_ops;
 	unsigned int ret;
 
-	if (glops->go_drop_th)
-		glops->go_drop_th(gl);
+	if (glops->go_xmote_th)
+		glops->go_xmote_th(gl);
 
 	gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
 	gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
@@ -1156,8 +1140,6 @@ restart:
 		return -EIO;
 	}
 
-	set_bit(HIF_PROMOTE, &gh->gh_iflags);
-
 	spin_lock(&gl->gl_spin);
 	add_to_queue(gh);
 	run_queue(gl);
@@ -1248,12 +1230,11 @@ void gfs2_glock_dq(struct gfs2_holder *g
 	list_del_init(&gh->gh_list);
 
 	if (list_empty(&gl->gl_holders)) {
-		spin_unlock(&gl->gl_spin);
-
-		if (glops->go_unlock)
+		if (glops->go_unlock) {
+			spin_unlock(&gl->gl_spin);
 			glops->go_unlock(gh);
-
-		spin_lock(&gl->gl_spin);
+			spin_lock(&gl->gl_spin);
+		}
 		gl->gl_stamp = jiffies;
 	}
 
@@ -1910,8 +1891,6 @@ static int dump_glock(struct glock_iter 
 	print_dbg(gi, "  req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no");
 	print_dbg(gi, "  lvb_count = %d\n", atomic_read(&gl->gl_lvb_count));
 	print_dbg(gi, "  object = %s\n", (gl->gl_object) ? "yes" : "no");
-	print_dbg(gi, "  le = %s\n",
-		   (list_empty(&gl->gl_le.le_list)) ? "no" : "yes");
 	print_dbg(gi, "  reclaim = %s\n",
 		   (list_empty(&gl->gl_reclaim)) ? "no" : "yes");
 	if (gl->gl_aspace)
diff -puN fs/gfs2/glops.c~git-gfs2-nmw fs/gfs2/glops.c
--- a/fs/gfs2/glops.c~git-gfs2-nmw
+++ a/fs/gfs2/glops.c
@@ -56,7 +56,7 @@ static void gfs2_ail_empty_gl(struct gfs
 		bd = list_entry(head->next, struct gfs2_bufdata,
 				bd_ail_gl_list);
 		bh = bd->bd_bh;
-		gfs2_remove_from_ail(NULL, bd);
+		gfs2_remove_from_ail(bd);
 		bd->bd_bh = NULL;
 		bh->b_private = NULL;
 		bd->bd_blkno = bh->b_blocknr;
@@ -86,15 +86,10 @@ static void gfs2_pte_inval(struct gfs2_g
 	if (!ip || !S_ISREG(inode->i_mode))
 		return;
 
-	if (!test_bit(GIF_PAGED, &ip->i_flags))
-		return;
-
 	unmap_shared_mapping_range(inode->i_mapping, 0, 0);
-
 	if (test_bit(GIF_SW_PAGED, &ip->i_flags))
 		set_bit(GLF_DIRTY, &gl->gl_flags);
 
-	clear_bit(GIF_SW_PAGED, &ip->i_flags);
 }
 
 /**
@@ -143,44 +138,34 @@ static void meta_go_inval(struct gfs2_gl
 static void inode_go_sync(struct gfs2_glock *gl)
 {
 	struct gfs2_inode *ip = gl->gl_object;
+	struct address_space *metamapping = gl->gl_aspace->i_mapping;
+	int error;
+
+	if (gl->gl_state != LM_ST_UNLOCKED)
+		gfs2_pte_inval(gl);
+	if (gl->gl_state != LM_ST_EXCLUSIVE)
+		return;
 
 	if (ip && !S_ISREG(ip->i_inode.i_mode))
 		ip = NULL;
 
 	if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
-		if (ip && !gfs2_is_jdata(ip))
-			filemap_fdatawrite(ip->i_inode.i_mapping);
 		gfs2_log_flush(gl->gl_sbd, gl);
-		if (ip && gfs2_is_jdata(ip))
-			filemap_fdatawrite(ip->i_inode.i_mapping);
-		gfs2_meta_sync(gl);
+		filemap_fdatawrite(metamapping);
 		if (ip) {
 			struct address_space *mapping = ip->i_inode.i_mapping;
-			int error = filemap_fdatawait(mapping);
+			filemap_fdatawrite(mapping);
+			error = filemap_fdatawait(mapping);
 			mapping_set_error(mapping, error);
 		}
+		error = filemap_fdatawait(metamapping);
+		mapping_set_error(metamapping, error);
 		clear_bit(GLF_DIRTY, &gl->gl_flags);
 		gfs2_ail_empty_gl(gl);
 	}
 }
 
 /**
- * inode_go_xmote_th - promote/demote a glock
- * @gl: the glock
- * @state: the requested state
- * @flags:
- *
- */
-
-static void inode_go_xmote_th(struct gfs2_glock *gl)
-{
-	if (gl->gl_state != LM_ST_UNLOCKED)
-		gfs2_pte_inval(gl);
-	if (gl->gl_state == LM_ST_EXCLUSIVE)
-		inode_go_sync(gl);
-}
-
-/**
  * inode_go_xmote_bh - After promoting/demoting a glock
  * @gl: the glock
  *
@@ -201,22 +186,6 @@ static void inode_go_xmote_bh(struct gfs
 }
 
 /**
- * inode_go_drop_th - unlock a glock
- * @gl: the glock
- *
- * Invoked from rq_demote().
- * Another node needs the lock in EXCLUSIVE mode, or lock (unused for too long)
- * is being purged from our node's glock cache; we're dropping lock.
- */
-
-static void inode_go_drop_th(struct gfs2_glock *gl)
-{
-	gfs2_pte_inval(gl);
-	if (gl->gl_state == LM_ST_EXCLUSIVE)
-		inode_go_sync(gl);
-}
-
-/**
  * inode_go_inval - prepare a inode glock to be released
  * @gl: the glock
  * @flags:
@@ -234,10 +203,8 @@ static void inode_go_inval(struct gfs2_g
 			set_bit(GIF_INVALID, &ip->i_flags);
 	}
 
-	if (ip && S_ISREG(ip->i_inode.i_mode)) {
+	if (ip && S_ISREG(ip->i_inode.i_mode))
 		truncate_inode_pages(ip->i_inode.i_mapping, 0);
-		clear_bit(GIF_PAGED, &ip->i_flags);
-	}
 }
 
 /**
@@ -294,23 +261,6 @@ static int inode_go_lock(struct gfs2_hol
 }
 
 /**
- * inode_go_unlock - operation done before an inode lock is unlocked by a
- *		     process
- * @gl: the glock
- * @flags:
- *
- */
-
-static void inode_go_unlock(struct gfs2_holder *gh)
-{
-	struct gfs2_glock *gl = gh->gh_gl;
-	struct gfs2_inode *ip = gl->gl_object;
-
-	if (ip)
-		gfs2_meta_cache_flush(ip);
-}
-
-/**
  * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock
  * @gl: the glock
  *
@@ -350,14 +300,14 @@ static void rgrp_go_unlock(struct gfs2_h
 }
 
 /**
- * trans_go_xmote_th - promote/demote the transaction glock
+ * trans_go_sync - promote/demote the transaction glock
  * @gl: the glock
  * @state: the requested state
  * @flags:
  *
  */
 
-static void trans_go_xmote_th(struct gfs2_glock *gl)
+static void trans_go_sync(struct gfs2_glock *gl)
 {
 	struct gfs2_sbd *sdp = gl->gl_sbd;
 
@@ -384,7 +334,6 @@ static void trans_go_xmote_bh(struct gfs
 
 	if (gl->gl_state != LM_ST_UNLOCKED &&
 	    test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
-		gfs2_meta_cache_flush(GFS2_I(sdp->sd_jdesc->jd_inode));
 		j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
 
 		error = gfs2_find_jhead(sdp->sd_jdesc, &head);
@@ -402,24 +351,6 @@ static void trans_go_xmote_bh(struct gfs
 }
 
 /**
- * trans_go_drop_th - unlock the transaction glock
- * @gl: the glock
- *
- * We want to sync the device even with localcaching.  Remember
- * that localcaching journal replay only marks buffers dirty.
- */
-
-static void trans_go_drop_th(struct gfs2_glock *gl)
-{
-	struct gfs2_sbd *sdp = gl->gl_sbd;
-
-	if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
-		gfs2_meta_syncfs(sdp);
-		gfs2_log_shutdown(sdp);
-	}
-}
-
-/**
  * quota_go_demote_ok - Check to see if it's ok to unlock a quota glock
  * @gl: the glock
  *
@@ -433,25 +364,21 @@ static int quota_go_demote_ok(struct gfs
 
 const struct gfs2_glock_operations gfs2_meta_glops = {
 	.go_xmote_th = meta_go_sync,
-	.go_drop_th = meta_go_sync,
 	.go_type = LM_TYPE_META,
 };
 
 const struct gfs2_glock_operations gfs2_inode_glops = {
-	.go_xmote_th = inode_go_xmote_th,
+	.go_xmote_th = inode_go_sync,
 	.go_xmote_bh = inode_go_xmote_bh,
-	.go_drop_th = inode_go_drop_th,
 	.go_inval = inode_go_inval,
 	.go_demote_ok = inode_go_demote_ok,
 	.go_lock = inode_go_lock,
-	.go_unlock = inode_go_unlock,
 	.go_type = LM_TYPE_INODE,
 	.go_min_hold_time = HZ / 10,
 };
 
 const struct gfs2_glock_operations gfs2_rgrp_glops = {
 	.go_xmote_th = meta_go_sync,
-	.go_drop_th = meta_go_sync,
 	.go_inval = meta_go_inval,
 	.go_demote_ok = rgrp_go_demote_ok,
 	.go_lock = rgrp_go_lock,
@@ -461,9 +388,8 @@ const struct gfs2_glock_operations gfs2_
 };
 
 const struct gfs2_glock_operations gfs2_trans_glops = {
-	.go_xmote_th = trans_go_xmote_th,
+	.go_xmote_th = trans_go_sync,
 	.go_xmote_bh = trans_go_xmote_bh,
-	.go_drop_th = trans_go_drop_th,
 	.go_type = LM_TYPE_NONDISK,
 };
 
diff -puN fs/gfs2/incore.h~git-gfs2-nmw fs/gfs2/incore.h
--- a/fs/gfs2/incore.h~git-gfs2-nmw
+++ a/fs/gfs2/incore.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
  *
  * This copyrighted material is made available to anyone wishing to use,
  * modify, copy, or redistribute it subject to the terms and conditions
@@ -131,7 +131,6 @@ struct gfs2_bufdata {
 struct gfs2_glock_operations {
 	void (*go_xmote_th) (struct gfs2_glock *gl);
 	void (*go_xmote_bh) (struct gfs2_glock *gl);
-	void (*go_drop_th) (struct gfs2_glock *gl);
 	void (*go_inval) (struct gfs2_glock *gl, int flags);
 	int (*go_demote_ok) (struct gfs2_glock *gl);
 	int (*go_lock) (struct gfs2_holder *gh);
@@ -141,10 +140,6 @@ struct gfs2_glock_operations {
 };
 
 enum {
-	/* Actions */
-	HIF_MUTEX		= 0,
-	HIF_PROMOTE		= 1,
-
 	/* States */
 	HIF_HOLDER		= 6,
 	HIF_FIRST		= 7,
@@ -171,6 +166,8 @@ enum {
 	GLF_DEMOTE		= 3,
 	GLF_PENDING_DEMOTE	= 4,
 	GLF_DIRTY		= 5,
+	GLF_DEMOTE_IN_PROGRESS	= 6,
+	GLF_LFLUSH		= 7,
 };
 
 struct gfs2_glock {
@@ -190,6 +187,7 @@ struct gfs2_glock {
 	struct list_head gl_holders;
 	struct list_head gl_waiters1;	/* HIF_MUTEX */
 	struct list_head gl_waiters3;	/* HIF_PROMOTE */
+	int gl_waiters2;		/* GIF_DEMOTE */
 
 	const struct gfs2_glock_operations *gl_ops;
 
@@ -210,7 +208,6 @@ struct gfs2_glock {
 	struct gfs2_sbd *gl_sbd;
 
 	struct inode *gl_aspace;
-	struct gfs2_log_element gl_le;
 	struct list_head gl_ail_list;
 	atomic_t gl_ail_count;
 	struct delayed_work gl_work;
@@ -239,7 +236,6 @@ struct gfs2_alloc {
 enum {
 	GIF_INVALID		= 0,
 	GIF_QD_LOCKED		= 1,
-	GIF_PAGED		= 2,
 	GIF_SW_PAGED		= 3,
 };
 
@@ -268,14 +264,10 @@ struct gfs2_inode {
 	struct gfs2_glock *i_gl; /* Move into i_gh? */
 	struct gfs2_holder i_iopen_gh;
 	struct gfs2_holder i_gh; /* for prepare/commit_write only */
-	struct gfs2_alloc i_alloc;
+	struct gfs2_alloc *i_alloc;
 	u64 i_last_rg_alloc;
 
-	spinlock_t i_spin;
 	struct rw_semaphore i_rw_mutex;
-	unsigned long i_last_pfault;
-
-	struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT];
 };
 
 /*
@@ -287,19 +279,12 @@ static inline struct gfs2_inode *GFS2_I(
 	return container_of(inode, struct gfs2_inode, i_inode);
 }
 
-/* To be removed? */
-static inline struct gfs2_sbd *GFS2_SB(struct inode *inode)
+static inline struct gfs2_sbd *GFS2_SB(const struct inode *inode)
 {
 	return inode->i_sb->s_fs_info;
 }
 
-enum {
-	GFF_DID_DIRECT_ALLOC	= 0,
-	GFF_EXLOCK = 1,
-};
-
 struct gfs2_file {
-	unsigned long f_flags;		/* GFF_... */
 	struct mutex f_fl_mutex;
 	struct gfs2_holder f_fl_gh;
 };
@@ -373,8 +358,17 @@ struct gfs2_ail {
 	u64 ai_sync_gen;
 };
 
+struct gfs2_journal_extent {
+	struct list_head extent_list;
+
+	unsigned int lblock; /* First logical block */
+	u64 dblock; /* First disk block */
+	u64 blocks;
+};
+
 struct gfs2_jdesc {
 	struct list_head jd_list;
+	struct list_head extent_list;
 
 	struct inode *jd_inode;
 	unsigned int jd_jid;
@@ -421,13 +415,7 @@ struct gfs2_args {
 struct gfs2_tune {
 	spinlock_t gt_spin;
 
-	unsigned int gt_ilimit;
-	unsigned int gt_ilimit_tries;
-	unsigned int gt_ilimit_min;
 	unsigned int gt_demote_secs; /* Cache retention for unheld glock */
-	unsigned int gt_incore_log_blocks;
-	unsigned int gt_log_flush_secs;
-	unsigned int gt_jindex_refresh_secs; /* Check for new journal index */
 
 	unsigned int gt_recoverd_secs;
 	unsigned int gt_logd_secs;
@@ -443,10 +431,8 @@ struct gfs2_tune {
 	unsigned int gt_new_files_jdata;
 	unsigned int gt_new_files_directio;
 	unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
-	unsigned int gt_lockdump_size;
 	unsigned int gt_stall_secs; /* Detects trouble! */
 	unsigned int gt_complain_secs;
-	unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */
 	unsigned int gt_statfs_quantum;
 	unsigned int gt_statfs_slow;
 };
@@ -539,7 +525,6 @@ struct gfs2_sbd {
 	/* StatFS stuff */
 
 	spinlock_t sd_statfs_spin;
-	struct mutex sd_statfs_mutex;
 	struct gfs2_statfs_change_host sd_statfs_master;
 	struct gfs2_statfs_change_host sd_statfs_local;
 	unsigned long sd_statfs_sync_time;
@@ -561,7 +546,6 @@ struct gfs2_sbd {
 	spinlock_t sd_jindex_spin;
 	struct mutex sd_jindex_mutex;
 	unsigned int sd_journals;
-	unsigned long sd_jindex_refresh_time;
 
 	struct gfs2_jdesc *sd_jdesc;
 	struct gfs2_holder sd_journal_gh;
@@ -602,28 +586,29 @@ struct gfs2_sbd {
 	unsigned int sd_log_commited_databuf;
 	unsigned int sd_log_commited_revoke;
 
-	unsigned int sd_log_num_gl;
+	atomic_t sd_log_pinned;
 	unsigned int sd_log_num_buf;
 	unsigned int sd_log_num_revoke;
 	unsigned int sd_log_num_rg;
 	unsigned int sd_log_num_databuf;
 
-	struct list_head sd_log_le_gl;
 	struct list_head sd_log_le_buf;
 	struct list_head sd_log_le_revoke;
 	struct list_head sd_log_le_rg;
 	struct list_head sd_log_le_databuf;
 	struct list_head sd_log_le_ordered;
 
-	unsigned int sd_log_blks_free;
-	struct mutex sd_log_reserve_mutex;
+	atomic_t sd_log_thresh1;
+	atomic_t sd_log_thresh2;
+	atomic_t sd_log_blks_free;
+	wait_queue_head_t sd_log_waitq;
+	wait_queue_head_t sd_logd_waitq;
 
 	u64 sd_log_sequence;
 	unsigned int sd_log_head;
 	unsigned int sd_log_tail;
 	int sd_log_idle;
 
-	unsigned long sd_log_flush_time;
 	struct rw_semaphore sd_log_flush_lock;
 	atomic_t sd_log_in_flight;
 	wait_queue_head_t sd_log_flush_wait;
diff -puN fs/gfs2/inode.c~git-gfs2-nmw fs/gfs2/inode.c
--- a/fs/gfs2/inode.c~git-gfs2-nmw
+++ a/fs/gfs2/inode.c
@@ -31,7 +31,6 @@
 #include "log.h"
 #include "meta_io.h"
 #include "ops_address.h"
-#include "ops_file.h"
 #include "ops_inode.h"
 #include "quota.h"
 #include "rgrp.h"
@@ -132,15 +131,21 @@ static struct inode *gfs2_iget_skip(stru
 
 void gfs2_set_iop(struct inode *inode)
 {
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	umode_t mode = inode->i_mode;
 
 	if (S_ISREG(mode)) {
 		inode->i_op = &gfs2_file_iops;
-		inode->i_fop = &gfs2_file_fops;
-		inode->i_mapping->a_ops = &gfs2_file_aops;
+		if (sdp->sd_args.ar_localflocks)
+			inode->i_fop = &gfs2_file_fops_nolock;
+		else
+			inode->i_fop = &gfs2_file_fops;
 	} else if (S_ISDIR(mode)) {
 		inode->i_op = &gfs2_dir_iops;
-		inode->i_fop = &gfs2_dir_fops;
+		if (sdp->sd_args.ar_localflocks)
+			inode->i_fop = &gfs2_dir_fops_nolock;
+		else
+			inode->i_fop = &gfs2_dir_fops;
 	} else if (S_ISLNK(mode)) {
 		inode->i_op = &gfs2_symlink_iops;
 	} else {
@@ -291,12 +296,10 @@ static int gfs2_dinode_in(struct gfs2_in
 	di->di_entries = be32_to_cpu(str->di_entries);
 
 	di->di_eattr = be64_to_cpu(str->di_eattr);
-	return 0;
-}
+	if (S_ISREG(ip->i_inode.i_mode))
+		gfs2_set_aops(&ip->i_inode);
 
-static void gfs2_inode_bh(struct gfs2_inode *ip, struct buffer_head *bh)
-{
-	ip->i_cache[0] = bh;
+	return 0;
 }
 
 /**
@@ -366,7 +369,8 @@ int gfs2_dinode_dealloc(struct gfs2_inod
 	if (error)
 		goto out_rg_gunlock;
 
-	gfs2_trans_add_gl(ip->i_gl);
+	set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
+	set_bit(GLF_LFLUSH, &ip->i_gl->gl_flags);
 
 	gfs2_free_di(rgd, ip);
 
@@ -707,9 +711,10 @@ static int alloc_dinode(struct gfs2_inod
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	int error;
 
-	gfs2_alloc_get(dip);
+	if (gfs2_alloc_get(dip) == NULL)
+		return -ENOMEM;
 
-	dip->i_alloc.al_requested = RES_DINODE;
+	dip->i_alloc->al_requested = RES_DINODE;
 	error = gfs2_inplace_reserve(dip);
 	if (error)
 		goto out;
@@ -896,7 +901,7 @@ fail_end_trans:
 	gfs2_trans_end(sdp);
 
 fail_ipreserv:
-	if (dip->i_alloc.al_rgd)
+	if (dip->i_alloc->al_rgd)
 		gfs2_inplace_release(dip);
 
 fail_quota_locks:
@@ -966,7 +971,7 @@ struct inode *gfs2_createi(struct gfs2_h
 	struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
 	int error;
 	u64 generation;
-	struct buffer_head *bh=NULL;
+	struct buffer_head *bh = NULL;
 
 	if (!name->len || name->len > GFS2_FNAMESIZE)
 		return ERR_PTR(-ENAMETOOLONG);
@@ -1003,8 +1008,6 @@ struct inode *gfs2_createi(struct gfs2_h
 	if (IS_ERR(inode))
 		goto fail_gunlock2;
 
-	gfs2_inode_bh(GFS2_I(inode), bh);
-
 	error = gfs2_inode_refresh(GFS2_I(inode));
 	if (error)
 		goto fail_gunlock2;
@@ -1021,6 +1024,8 @@ struct inode *gfs2_createi(struct gfs2_h
 	if (error)
 		goto fail_gunlock2;
 
+	if (bh)
+		brelse(bh);
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
 	return inode;
@@ -1032,6 +1037,8 @@ fail_gunlock2:
 fail_gunlock:
 	gfs2_glock_dq(ghs);
 fail:
+	if (bh)
+		brelse(bh);
 	return ERR_PTR(error);
 }
 
diff -puN fs/gfs2/inode.h~git-gfs2-nmw fs/gfs2/inode.h
--- a/fs/gfs2/inode.h~git-gfs2-nmw
+++ a/fs/gfs2/inode.h
@@ -20,6 +20,18 @@ static inline int gfs2_is_jdata(const st
 	return ip->i_di.di_flags & GFS2_DIF_JDATA;
 }
 
+static inline int gfs2_is_writeback(const struct gfs2_inode *ip)
+{
+	const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+	return (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK) && !gfs2_is_jdata(ip);
+}
+
+static inline int gfs2_is_ordered(const struct gfs2_inode *ip)
+{
+	const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+	return (sdp->sd_args.ar_data == GFS2_DATA_ORDERED) && !gfs2_is_jdata(ip);
+}
+
 static inline int gfs2_is_dir(const struct gfs2_inode *ip)
 {
 	return S_ISDIR(ip->i_inode.i_mode);
diff -puN fs/gfs2/locking/dlm/mount.c~git-gfs2-nmw fs/gfs2/locking/dlm/mount.c
--- a/fs/gfs2/locking/dlm/mount.c~git-gfs2-nmw
+++ a/fs/gfs2/locking/dlm/mount.c
@@ -67,6 +67,11 @@ static int make_args(struct gdlm_ls *ls,
 	memset(data, 0, 256);
 	strncpy(data, data_arg, 255);
 
+	if (!strlen(data)) {
+		log_error("no mount options, (u)mount helpers not installed");
+		return -EINVAL;
+	}
+
 	for (options = data; (x = strsep(&options, ":")); ) {
 		if (!*x)
 			continue;
diff -puN fs/gfs2/locking/dlm/plock.c~git-gfs2-nmw fs/gfs2/locking/dlm/plock.c
--- a/fs/gfs2/locking/dlm/plock.c~git-gfs2-nmw
+++ a/fs/gfs2/locking/dlm/plock.c
@@ -89,15 +89,19 @@ int gdlm_plock(void *lockspace, struct l
 	op->info.number		= name->ln_number;
 	op->info.start		= fl->fl_start;
 	op->info.end		= fl->fl_end;
-	op->info.owner		= (__u64)(long) fl->fl_owner;
 	if (fl->fl_lmops && fl->fl_lmops->fl_grant) {
+		/* fl_owner is lockd which doesn't distinguish
+		   processes on the nfs client */
+		op->info.owner	= (__u64) fl->fl_pid;
 		xop->callback	= fl->fl_lmops->fl_grant;
 		locks_init_lock(&xop->flc);
 		locks_copy_lock(&xop->flc, fl);
 		xop->fl		= fl;
 		xop->file	= file;
-	} else
+	} else {
+		op->info.owner	= (__u64)(long) fl->fl_owner;
 		xop->callback	= NULL;
+	}
 
 	send_op(op);
 
@@ -203,7 +207,10 @@ int gdlm_punlock(void *lockspace, struct
 	op->info.number		= name->ln_number;
 	op->info.start		= fl->fl_start;
 	op->info.end		= fl->fl_end;
-	op->info.owner		= (__u64)(long) fl->fl_owner;
+	if (fl->fl_lmops && fl->fl_lmops->fl_grant)
+		op->info.owner	= (__u64) fl->fl_pid;
+	else
+		op->info.owner	= (__u64)(long) fl->fl_owner;
 
 	send_op(op);
 	wait_event(recv_wq, (op->done != 0));
@@ -242,7 +249,10 @@ int gdlm_plock_get(void *lockspace, stru
 	op->info.number		= name->ln_number;
 	op->info.start		= fl->fl_start;
 	op->info.end		= fl->fl_end;
-	op->info.owner		= (__u64)(long) fl->fl_owner;
+	if (fl->fl_lmops && fl->fl_lmops->fl_grant)
+		op->info.owner	= (__u64) fl->fl_pid;
+	else
+		op->info.owner	= (__u64)(long) fl->fl_owner;
 
 	send_op(op);
 	wait_event(recv_wq, (op->done != 0));
diff -puN fs/gfs2/locking/dlm/thread.c~git-gfs2-nmw fs/gfs2/locking/dlm/thread.c
--- a/fs/gfs2/locking/dlm/thread.c~git-gfs2-nmw
+++ a/fs/gfs2/locking/dlm/thread.c
@@ -273,18 +273,13 @@ static int gdlm_thread(void *data, int b
 	struct gdlm_ls *ls = (struct gdlm_ls *) data;
 	struct gdlm_lock *lp = NULL;
 	uint8_t complete, blocking, submit, drop;
-	DECLARE_WAITQUEUE(wait, current);
 
 	/* Only thread1 is allowed to do blocking callbacks since gfs
 	   may wait for a completion callback within a blocking cb. */
 
 	while (!kthread_should_stop()) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		add_wait_queue(&ls->thread_wait, &wait);
-		if (no_work(ls, blist))
-			schedule();
-		remove_wait_queue(&ls->thread_wait, &wait);
-		set_current_state(TASK_RUNNING);
+		wait_event_interruptible(ls->thread_wait,
+				!no_work(ls, blist) || kthread_should_stop());
 
 		complete = blocking = submit = drop = 0;
 
diff -puN fs/gfs2/log.c~git-gfs2-nmw fs/gfs2/log.c
--- a/fs/gfs2/log.c~git-gfs2-nmw
+++ a/fs/gfs2/log.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
  *
  * This copyrighted material is made available to anyone wishing to use,
  * modify, copy, or redistribute it subject to the terms and conditions
@@ -16,6 +16,8 @@
 #include <linux/crc32.h>
 #include <linux/lm_interface.h>
 #include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -68,14 +70,12 @@ unsigned int gfs2_struct2blk(struct gfs2
  *
  */
 
-void gfs2_remove_from_ail(struct address_space *mapping, struct gfs2_bufdata *bd)
+void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
 {
 	bd->bd_ail = NULL;
 	list_del_init(&bd->bd_ail_st_list);
 	list_del_init(&bd->bd_ail_gl_list);
 	atomic_dec(&bd->bd_gl->gl_ail_count);
-	if (mapping)
-		gfs2_meta_cache_flush(GFS2_I(mapping->host));
 	brelse(bd->bd_bh);
 }
 
@@ -92,8 +92,6 @@ static void gfs2_ail1_start_one(struct g
 	struct buffer_head *bh;
 	int retry;
 
-	BUG_ON(!spin_is_locked(&sdp->sd_log_lock));
-
 	do {
 		retry = 0;
 
@@ -167,7 +165,7 @@ static int gfs2_ail1_empty_one(struct gf
 	return list_empty(&ai->ai_ail1_list);
 }
 
-static void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags)
+static void gfs2_ail1_start(struct gfs2_sbd *sdp)
 {
 	struct list_head *head;
 	u64 sync_gen;
@@ -188,14 +186,7 @@ static void gfs2_ail1_start(struct gfs2_
 	first_ai->ai_sync_gen = sync_gen;
 	gfs2_ail1_start_one(sdp, first_ai); /* This may drop log lock */
 
-	if (flags & DIO_ALL)
-		first = NULL;
-
 	while(!done) {
-		if (first && (head->prev != first ||
-			      gfs2_ail1_empty_one(sdp, first_ai, 0)))
-			break;
-
 		done = 1;
 		list_for_each_entry_safe_reverse(ai, tmp, head, ai_list) {
 			if (ai->ai_sync_gen >= sync_gen)
@@ -210,7 +201,7 @@ static void gfs2_ail1_start(struct gfs2_
 	gfs2_log_unlock(sdp);
 }
 
-int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
+static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
 {
 	struct gfs2_ail *ai, *s;
 	int ret;
@@ -248,7 +239,7 @@ static void gfs2_ail2_empty_one(struct g
 		bd = list_entry(head->prev, struct gfs2_bufdata,
 				bd_ail_st_list);
 		gfs2_assert(sdp, bd->bd_ail == ai);
-		gfs2_remove_from_ail(bd->bd_bh->b_page->mapping, bd);
+		gfs2_remove_from_ail(bd);
 	}
 }
 
@@ -289,70 +280,66 @@ static void ail2_empty(struct gfs2_sbd *
  * flush time, so we ensure that we have just enough free blocks at all
  * times to avoid running out during a log flush.
  *
+ * We no longer flush the log here, instead we wake up logd to do that
+ * for us. To avoid the thundering herd and to ensure that we deal fairly
+ * with queued waiters, we use an exclusive wait. This means that when we
+ * get woken with enough journal space to get our reservation, we need to
+ * wake the next waiter on the list.
+ *
  * Returns: errno
  */
 
 int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
 {
-	unsigned int try = 0;
 	unsigned reserved_blks = 6 * (4096 / sdp->sd_vfs->s_blocksize);
+	unsigned wanted = blks + reserved_blks;
+	DEFINE_WAIT(wait);
+	int did_wait = 0;
+	unsigned int free_blocks;
 
 	if (gfs2_assert_warn(sdp, blks) ||
 	    gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
 		return -EINVAL;
-
-	mutex_lock(&sdp->sd_log_reserve_mutex);
-	gfs2_log_lock(sdp);
-	while(sdp->sd_log_blks_free <= (blks + reserved_blks)) {
-		gfs2_log_unlock(sdp);
-		gfs2_ail1_empty(sdp, 0);
-		gfs2_log_flush(sdp, NULL);
-
-		if (try++)
-			gfs2_ail1_start(sdp, 0);
-		gfs2_log_lock(sdp);
-	}
-	sdp->sd_log_blks_free -= blks;
-	gfs2_log_unlock(sdp);
-	mutex_unlock(&sdp->sd_log_reserve_mutex);
+retry:
+	free_blocks = atomic_read(&sdp->sd_log_blks_free);
+	if (unlikely(free_blocks <= wanted)) {
+		wake_up(&sdp->sd_logd_waitq);
+		do {
+			prepare_to_wait_exclusive(&sdp->sd_log_waitq, &wait,
+						  TASK_UNINTERRUPTIBLE);
+			did_wait = 1;
+			if (atomic_read(&sdp->sd_log_blks_free) <= wanted)
+				io_schedule();
+			free_blocks = atomic_read(&sdp->sd_log_blks_free);
+		} while(free_blocks <= wanted);
+		finish_wait(&sdp->sd_log_waitq, &wait);
+	}
+	if (atomic_cmpxchg(&sdp->sd_log_blks_free, free_blocks,
+			   free_blocks - blks) != free_blocks)
+		goto retry;
+
+	/* 
+	 * If we waited, then so might others, wake them up _after_ we get
+	 * our share of the log.
+	 */
+	if (unlikely(did_wait))
+		wake_up(&sdp->sd_log_waitq);
 
 	down_read(&sdp->sd_log_flush_lock);
 
 	return 0;
 }
 
-/**
- * gfs2_log_release - Release a given number of log blocks
- * @sdp: The GFS2 superblock
- * @blks: The number of blocks
- *
- */
-
-void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
-{
-
-	gfs2_log_lock(sdp);
-	sdp->sd_log_blks_free += blks;
-	gfs2_assert_withdraw(sdp,
-			     sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
-	gfs2_log_unlock(sdp);
-	up_read(&sdp->sd_log_flush_lock);
-}
-
 static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn)
 {
-	struct inode *inode = sdp->sd_jdesc->jd_inode;
-	int error;
-	struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
-
-	bh_map.b_size = 1 << inode->i_blkbits;
-	error = gfs2_block_map(inode, lbn, 0, &bh_map);
-	if (error || !bh_map.b_blocknr)
-		printk(KERN_INFO "error=%d, dbn=%llu lbn=%u", error,
-		       (unsigned long long)bh_map.b_blocknr, lbn);
-	gfs2_assert_withdraw(sdp, !error && bh_map.b_blocknr);
+	struct gfs2_journal_extent *je;
+
+	list_for_each_entry(je, &sdp->sd_jdesc->extent_list, extent_list) {
+		if (lbn >= je->lblock && lbn < je->lblock + je->blocks)
+			return je->dblock + lbn - je->lblock;
+	}
 
-	return bh_map.b_blocknr;
+	return -1;
 }
 
 /**
@@ -560,10 +547,9 @@ static void log_pull_tail(struct gfs2_sb
 
 	ail2_empty(sdp, new_tail);
 
-	gfs2_log_lock(sdp);
-	sdp->sd_log_blks_free += dist;
-	gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
-	gfs2_log_unlock(sdp);
+	atomic_add(dist, &sdp->sd_log_blks_free);
+	gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
+				  sdp->sd_jdesc->jd_blocks);
 
 	sdp->sd_log_tail = new_tail;
 }
@@ -652,7 +638,7 @@ static void gfs2_ordered_write(struct gf
 		get_bh(bh);
 		gfs2_log_unlock(sdp);
 		lock_buffer(bh);
-		if (test_clear_buffer_dirty(bh)) {
+		if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) {
 			bh->b_end_io = end_buffer_write_sync;
 			submit_bh(WRITE, bh);
 		} else {
@@ -694,20 +680,16 @@ static void gfs2_ordered_wait(struct gfs
  *
  */
 
-void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
+void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
 {
 	struct gfs2_ail *ai;
 
 	down_write(&sdp->sd_log_flush_lock);
 
-	if (gl) {
-		gfs2_log_lock(sdp);
-		if (list_empty(&gl->gl_le.le_list)) {
-			gfs2_log_unlock(sdp);
-			up_write(&sdp->sd_log_flush_lock);
-			return;
-		}
-		gfs2_log_unlock(sdp);
+	/* Log might have been flushed while we waited for the flush lock */
+	if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags)) {
+		up_write(&sdp->sd_log_flush_lock);
+		return;
 	}
 
 	ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL);
@@ -739,7 +721,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp
 		log_flush_commit(sdp);
 	else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
 		gfs2_log_lock(sdp);
-		sdp->sd_log_blks_free--; /* Adjust for unreserved buffer */
+		atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
 		gfs2_log_unlock(sdp);
 		log_write_header(sdp, 0, PULL);
 	}
@@ -767,7 +749,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp
 static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 {
 	unsigned int reserved;
-	unsigned int old;
+	unsigned int unused;
 
 	gfs2_log_lock(sdp);
 
@@ -779,14 +761,11 @@ static void log_refund(struct gfs2_sbd *
 	sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
 	gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0);
 	reserved = calc_reserved(sdp);
-	old = sdp->sd_log_blks_free;
-	sdp->sd_log_blks_free += tr->tr_reserved -
-				 (reserved - sdp->sd_log_blks_reserved);
-
-	gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free >= old);
-	gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <=
+	unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved;
+	gfs2_assert_withdraw(sdp, unused >= 0);
+	atomic_add(unused, &sdp->sd_log_blks_free);
+	gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
 			     sdp->sd_jdesc->jd_blocks);
-
 	sdp->sd_log_blks_reserved = reserved;
 
 	gfs2_log_unlock(sdp);
@@ -797,6 +776,13 @@ static void log_refund(struct gfs2_sbd *
  * @sdp: the filesystem
  * @tr: the transaction
  *
+ * We wake up gfs2_logd if the number of pinned blocks exceed thresh1
+ * or the total number of used blocks (pinned blocks plus AIL blocks)
+ * is greater than thresh2.
+ *
+ * At mount time thresh1 is 1/3rd of journal size, thresh2 is 2/3rd of
+ * journal size.
+ *
  * Returns: errno
  */
 
@@ -808,10 +794,10 @@ void gfs2_log_commit(struct gfs2_sbd *sd
 	sdp->sd_vfs->s_dirt = 1;
 	up_read(&sdp->sd_log_flush_lock);
 
-	gfs2_log_lock(sdp);
-	if (sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks))
-		wake_up_process(sdp->sd_logd_process);
-	gfs2_log_unlock(sdp);
+	if (atomic_read(&sdp->sd_log_pinned) > atomic_read(&sdp->sd_log_thresh1) ||
+	    ((sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free)) >
+	    atomic_read(&sdp->sd_log_thresh2)))
+		wake_up(&sdp->sd_logd_waitq);
 }
 
 /**
@@ -825,7 +811,6 @@ void gfs2_log_shutdown(struct gfs2_sbd *
 	down_write(&sdp->sd_log_flush_lock);
 
 	gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved);
-	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl);
 	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf);
 	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
 	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
@@ -838,7 +823,7 @@ void gfs2_log_shutdown(struct gfs2_sbd *
 	log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT,
 			 (sdp->sd_log_tail == current_tail(sdp)) ? 0 : PULL);
 
-	gfs2_assert_warn(sdp, sdp->sd_log_blks_free == sdp->sd_jdesc->jd_blocks);
+	gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks);
 	gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
 	gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list));
 
@@ -859,10 +844,74 @@ void gfs2_meta_syncfs(struct gfs2_sbd *s
 {
 	gfs2_log_flush(sdp, NULL);
 	for (;;) {
-		gfs2_ail1_start(sdp, DIO_ALL);
+		gfs2_ail1_start(sdp);
 		if (gfs2_ail1_empty(sdp, DIO_ALL))
 			break;
 		msleep(10);
 	}
 }
 
+static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
+{
+	return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1));
+}
+
+static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
+{
+	unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free);
+	return used_blocks >= atomic_read(&sdp->sd_log_thresh2);
+}
+
+/**
+ * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks
+ * @sdp: Pointer to GFS2 superblock
+ *
+ * Also, periodically check to make sure that we're using the most recent
+ * journal index.
+ */
+
+int gfs2_logd(void *data)
+{
+	struct gfs2_sbd *sdp = data;
+	unsigned long t = 1;
+	DEFINE_WAIT(wait);
+	unsigned preflush;
+
+	while (!kthread_should_stop()) {
+
+		preflush = atomic_read(&sdp->sd_log_pinned);
+		if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
+			gfs2_ail1_empty(sdp, DIO_ALL);
+			gfs2_log_flush(sdp, NULL);
+			gfs2_ail1_empty(sdp, DIO_ALL);
+		}
+
+		if (gfs2_ail_flush_reqd(sdp)) {
+			gfs2_ail1_start(sdp);
+			io_schedule();
+			gfs2_ail1_empty(sdp, 0);
+			gfs2_log_flush(sdp, NULL);
+			gfs2_ail1_empty(sdp, DIO_ALL);
+		}
+
+		wake_up(&sdp->sd_log_waitq);
+		t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
+		if (freezing(current))
+			refrigerator();
+
+		do {
+			prepare_to_wait(&sdp->sd_logd_waitq, &wait,
+					TASK_UNINTERRUPTIBLE);
+			if (!gfs2_ail_flush_reqd(sdp) &&
+			    !gfs2_jrnl_flush_reqd(sdp) &&
+			    !kthread_should_stop())
+				t = schedule_timeout(t);
+		} while(t && !gfs2_ail_flush_reqd(sdp) &&
+			!gfs2_jrnl_flush_reqd(sdp) &&
+			!kthread_should_stop());
+		finish_wait(&sdp->sd_logd_waitq, &wait);
+	}
+
+	return 0;
+}
+
diff -puN fs/gfs2/log.h~git-gfs2-nmw fs/gfs2/log.h
--- a/fs/gfs2/log.h~git-gfs2-nmw
+++ a/fs/gfs2/log.h
@@ -48,20 +48,25 @@ static inline void gfs2_log_pointers_ini
 unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
 			    unsigned int ssize);
 
-int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags);
-
 int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
-void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
 void gfs2_log_incr_head(struct gfs2_sbd *sdp);
 
 struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp);
 struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
 				      struct buffer_head *real);
-void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
+void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
+
+static inline void gfs2_log_flush(struct gfs2_sbd *sbd, struct gfs2_glock *gl)
+{
+	if (!gl || test_bit(GLF_LFLUSH, &gl->gl_flags))
+		__gfs2_log_flush(sbd, gl);
+}
+
 void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
-void gfs2_remove_from_ail(struct address_space *mapping, struct gfs2_bufdata *bd);
+void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
 
 void gfs2_log_shutdown(struct gfs2_sbd *sdp);
 void gfs2_meta_syncfs(struct gfs2_sbd *sdp);
+int gfs2_logd(void *data);
 
 #endif /* __LOG_DOT_H__ */
diff -puN fs/gfs2/lops.c~git-gfs2-nmw fs/gfs2/lops.c
--- a/fs/gfs2/lops.c~git-gfs2-nmw
+++ a/fs/gfs2/lops.c
@@ -52,6 +52,7 @@ static void gfs2_pin(struct gfs2_sbd *sd
 	if (bd->bd_ail)
 		list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
 	get_bh(bh);
+	atomic_inc(&sdp->sd_log_pinned);
 }
 
 /**
@@ -87,8 +88,10 @@ static void gfs2_unpin(struct gfs2_sbd *
 	}
 	bd->bd_ail = ai;
 	list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
+	clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
 	gfs2_log_unlock(sdp);
 	unlock_buffer(bh);
+	atomic_dec(&sdp->sd_log_pinned);
 }
 
 
@@ -124,49 +127,6 @@ static struct buffer_head *gfs2_get_log_
 	return bh;
 }
 
-static void __glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
-{
-	struct gfs2_glock *gl;
-	struct gfs2_trans *tr = current->journal_info;
-
-	tr->tr_touched = 1;
-
-	gl = container_of(le, struct gfs2_glock, gl_le);
-	if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl)))
-		return;
-
-	if (!list_empty(&le->le_list))
-		return;
-
-	gfs2_glock_hold(gl);
-	set_bit(GLF_DIRTY, &gl->gl_flags);
-	sdp->sd_log_num_gl++;
-	list_add(&le->le_list, &sdp->sd_log_le_gl);
-}
-
-static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
-{
-	gfs2_log_lock(sdp);
-	__glock_lo_add(sdp, le);
-	gfs2_log_unlock(sdp);
-}
-
-static void glock_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
-{
-	struct list_head *head = &sdp->sd_log_le_gl;
-	struct gfs2_glock *gl;
-
-	while (!list_empty(head)) {
-		gl = list_entry(head->next, struct gfs2_glock, gl_le.le_list);
-		list_del_init(&gl->gl_le.le_list);
-		sdp->sd_log_num_gl--;
-
-		gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl));
-		gfs2_glock_put(gl);
-	}
-	gfs2_assert_warn(sdp, !sdp->sd_log_num_gl);
-}
-
 static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
 {
 	struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
@@ -182,7 +142,8 @@ static void buf_lo_add(struct gfs2_sbd *
 	list_add(&bd->bd_list_tr, &tr->tr_list_buf);
 	if (!list_empty(&le->le_list))
 		goto out;
-	__glock_lo_add(sdp, &bd->bd_gl->gl_le);
+	set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
+	set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
 	gfs2_meta_check(sdp, bd->bd_bh);
 	gfs2_pin(sdp, bd->bd_bh);
 	sdp->sd_log_num_buf++;
@@ -556,17 +517,20 @@ static void databuf_lo_add(struct gfs2_s
 
 	lock_buffer(bd->bd_bh);
 	gfs2_log_lock(sdp);
-	if (!list_empty(&bd->bd_list_tr))
-		goto out;
-	tr->tr_touched = 1;
-	if (gfs2_is_jdata(ip)) {
-		tr->tr_num_buf++;
-		list_add(&bd->bd_list_tr, &tr->tr_list_buf);
+	if (tr) {
+		if (!list_empty(&bd->bd_list_tr))
+			goto out;
+		tr->tr_touched = 1;
+		if (gfs2_is_jdata(ip)) {
+			tr->tr_num_buf++;
+			list_add(&bd->bd_list_tr, &tr->tr_list_buf);
+		}
 	}
 	if (!list_empty(&le->le_list))
 		goto out;
 
-	__glock_lo_add(sdp, &bd->bd_gl->gl_le);
+	set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
+	set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
 	if (gfs2_is_jdata(ip)) {
 		gfs2_pin(sdp, bd->bd_bh);
 		tr->tr_num_databuf_new++;
@@ -773,12 +737,6 @@ static void databuf_lo_after_commit(stru
 }
 
 
-const struct gfs2_log_operations gfs2_glock_lops = {
-	.lo_add = glock_lo_add,
-	.lo_after_commit = glock_lo_after_commit,
-	.lo_name = "glock",
-};
-
 const struct gfs2_log_operations gfs2_buf_lops = {
 	.lo_add = buf_lo_add,
 	.lo_incore_commit = buf_lo_incore_commit,
@@ -816,7 +774,6 @@ const struct gfs2_log_operations gfs2_da
 };
 
 const struct gfs2_log_operations *gfs2_log_ops[] = {
-	&gfs2_glock_lops,
 	&gfs2_databuf_lops,
 	&gfs2_buf_lops,
 	&gfs2_rg_lops,
diff -puN fs/gfs2/main.c~git-gfs2-nmw fs/gfs2/main.c
--- a/fs/gfs2/main.c~git-gfs2-nmw
+++ a/fs/gfs2/main.c
@@ -29,9 +29,8 @@ static void gfs2_init_inode_once(struct 
 	struct gfs2_inode *ip = foo;
 
 	inode_init_once(&ip->i_inode);
-	spin_lock_init(&ip->i_spin);
 	init_rwsem(&ip->i_rw_mutex);
-	memset(ip->i_cache, 0, sizeof(ip->i_cache));
+	ip->i_alloc = NULL;
 }
 
 static void gfs2_init_glock_once(struct kmem_cache *cachep, void *foo)
diff -puN fs/gfs2/meta_io.c~git-gfs2-nmw fs/gfs2/meta_io.c
--- a/fs/gfs2/meta_io.c~git-gfs2-nmw
+++ a/fs/gfs2/meta_io.c
@@ -50,6 +50,7 @@ static int gfs2_aspace_writepage(struct 
 static const struct address_space_operations aspace_aops = {
 	.writepage = gfs2_aspace_writepage,
 	.releasepage = gfs2_releasepage,
+	.sync_page = block_sync_page,
 };
 
 /**
@@ -221,13 +222,14 @@ int gfs2_meta_read(struct gfs2_glock *gl
 		   struct buffer_head **bhp)
 {
 	*bhp = getbuf(gl, blkno, CREATE);
-	if (!buffer_uptodate(*bhp))
+	if (!buffer_uptodate(*bhp)) {
 		ll_rw_block(READ_META, 1, bhp);
-	if (flags & DIO_WAIT) {
-		int error = gfs2_meta_wait(gl->gl_sbd, *bhp);
-		if (error) {
-			brelse(*bhp);
-			return error;
+		if (flags & DIO_WAIT) {
+			int error = gfs2_meta_wait(gl->gl_sbd, *bhp);
+			if (error) {
+				brelse(*bhp);
+				return error;
+			}
 		}
 	}
 
@@ -282,7 +284,7 @@ void gfs2_attach_bufdata(struct gfs2_glo
 		return;
 	}
 
-	bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL),
+	bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL);
 	bd->bd_bh = bh;
 	bd->bd_gl = gl;
 
@@ -302,6 +304,7 @@ void gfs2_remove_from_journal(struct buf
 	struct gfs2_sbd *sdp = GFS2_SB(bh->b_page->mapping->host);
 	struct gfs2_bufdata *bd = bh->b_private;
 	if (test_clear_buffer_pinned(bh)) {
+		atomic_dec(&sdp->sd_log_pinned);
 		list_del_init(&bd->bd_le.le_list);
 		if (meta) {
 			gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
@@ -317,7 +320,7 @@ void gfs2_remove_from_journal(struct buf
 	}
 	if (bd) {
 		if (bd->bd_ail) {
-			gfs2_remove_from_ail(NULL, bd);
+			gfs2_remove_from_ail(bd);
 			bh->b_private = NULL;
 			bd->bd_bh = NULL;
 			bd->bd_blkno = bh->b_blocknr;
@@ -358,32 +361,6 @@ void gfs2_meta_wipe(struct gfs2_inode *i
 }
 
 /**
- * gfs2_meta_cache_flush - get rid of any references on buffers for this inode
- * @ip: The GFS2 inode
- *
- * This releases buffers that are in the most-recently-used array of
- * blocks used for indirect block addressing for this inode.
- */
-
-void gfs2_meta_cache_flush(struct gfs2_inode *ip)
-{
-	struct buffer_head **bh_slot;
-	unsigned int x;
-
-	spin_lock(&ip->i_spin);
-
-	for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) {
-		bh_slot = &ip->i_cache[x];
-		if (*bh_slot) {
-			brelse(*bh_slot);
-			*bh_slot = NULL;
-		}
-	}
-
-	spin_unlock(&ip->i_spin);
-}
-
-/**
  * gfs2_meta_indirect_buffer - Get a metadata buffer
  * @ip: The GFS2 inode
  * @height: The level of this buf in the metadata (indir addr) tree (if any)
@@ -391,8 +368,6 @@ void gfs2_meta_cache_flush(struct gfs2_i
  * @new: Non-zero if we may create a new buffer
  * @bhp: the buffer is returned here
  *
- * Try to use the gfs2_inode's MRU metadata tree cache.
- *
  * Returns: errno
  */
 
@@ -401,58 +376,25 @@ int gfs2_meta_indirect_buffer(struct gfs
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_glock *gl = ip->i_gl;
-	struct buffer_head *bh = NULL, **bh_slot = ip->i_cache + height;
-	int in_cache = 0;
-
-	BUG_ON(!gl);
-	BUG_ON(!sdp);
-
-	spin_lock(&ip->i_spin);
-	if (*bh_slot && (*bh_slot)->b_blocknr == num) {
-		bh = *bh_slot;
-		get_bh(bh);
-		in_cache = 1;
-	}
-	spin_unlock(&ip->i_spin);
-
-	if (!bh)
-		bh = getbuf(gl, num, CREATE);
-
-	if (!bh)
-		return -ENOBUFS;
+	struct buffer_head *bh;
+	int ret = 0;
 
 	if (new) {
-		if (gfs2_assert_warn(sdp, height))
-			goto err;
-		meta_prep_new(bh);
+		BUG_ON(height == 0);
+		bh = gfs2_meta_new(gl, num);
 		gfs2_trans_add_bh(ip->i_gl, bh, 1);
 		gfs2_metatype_set(bh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
 		gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
 	} else {
 		u32 mtype = height ? GFS2_METATYPE_IN : GFS2_METATYPE_DI;
-		if (!buffer_uptodate(bh)) {
-			ll_rw_block(READ_META, 1, &bh);
-			if (gfs2_meta_wait(sdp, bh))
-				goto err;
+		ret = gfs2_meta_read(gl, num, DIO_WAIT, &bh);
+		if (ret == 0 && gfs2_metatype_check(sdp, bh, mtype)) {
+			brelse(bh);
+			ret = -EIO;
 		}
-		if (gfs2_metatype_check(sdp, bh, mtype))
-			goto err;
-	}
-
-	if (!in_cache) {
-		spin_lock(&ip->i_spin);
-		if (*bh_slot)
-			brelse(*bh_slot);
-		*bh_slot = bh;
-		get_bh(bh);
-		spin_unlock(&ip->i_spin);
 	}
-
 	*bhp = bh;
-	return 0;
-err:
-	brelse(bh);
-	return -EIO;
+	return ret;
 }
 
 /**
diff -puN fs/gfs2/meta_io.h~git-gfs2-nmw fs/gfs2/meta_io.h
--- a/fs/gfs2/meta_io.h~git-gfs2-nmw
+++ a/fs/gfs2/meta_io.h
@@ -56,7 +56,6 @@ void gfs2_remove_from_journal(struct buf
 
 void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen);
 
-void gfs2_meta_cache_flush(struct gfs2_inode *ip);
 int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
 			      int new, struct buffer_head **bhp);
 
diff -puN fs/gfs2/ops_address.c~git-gfs2-nmw fs/gfs2/ops_address.c
--- a/fs/gfs2/ops_address.c~git-gfs2-nmw
+++ a/fs/gfs2/ops_address.c
@@ -20,6 +20,8 @@
 #include <linux/swap.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/lm_interface.h>
+#include <linux/backing-dev.h>
+#include <linux/pagevec.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -32,7 +34,6 @@
 #include "quota.h"
 #include "trans.h"
 #include "rgrp.h"
-#include "ops_file.h"
 #include "super.h"
 #include "util.h"
 #include "glops.h"
@@ -58,22 +59,6 @@ static void gfs2_page_add_databufs(struc
 }
 
 /**
- * gfs2_get_block - Fills in a buffer head with details about a block
- * @inode: The inode
- * @lblock: The block number to look up
- * @bh_result: The buffer head to return the result in
- * @create: Non-zero if we may add block to the file
- *
- * Returns: errno
- */
-
-int gfs2_get_block(struct inode *inode, sector_t lblock,
-	           struct buffer_head *bh_result, int create)
-{
-	return gfs2_block_map(inode, lblock, create, bh_result);
-}
-
-/**
  * gfs2_get_block_noalloc - Fills in a buffer head with details about a block
  * @inode: The inode
  * @lblock: The block number to look up
@@ -88,7 +73,7 @@ static int gfs2_get_block_noalloc(struct
 {
 	int error;
 
-	error = gfs2_block_map(inode, lblock, 0, bh_result);
+	error = gfs2_block_map(inode, lblock, bh_result, 0);
 	if (error)
 		return error;
 	if (!buffer_mapped(bh_result))
@@ -99,20 +84,19 @@ static int gfs2_get_block_noalloc(struct
 static int gfs2_get_block_direct(struct inode *inode, sector_t lblock,
 				 struct buffer_head *bh_result, int create)
 {
-	return gfs2_block_map(inode, lblock, 0, bh_result);
+	return gfs2_block_map(inode, lblock, bh_result, 0);
 }
 
 /**
- * gfs2_writepage - Write complete page
- * @page: Page to write
+ * gfs2_writepage_common - Common bits of writepage
+ * @page: The page to be written
+ * @wbc: The writeback control
  *
- * Returns: errno
- *
- * Some of this is copied from block_write_full_page() although we still
- * call it to do most of the work.
+ * Returns: 1 if writepage is ok, otherwise an error code or zero if no error.
  */
 
-static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
+static int gfs2_writepage_common(struct page *page,
+				 struct writeback_control *wbc)
 {
 	struct inode *inode = page->mapping->host;
 	struct gfs2_inode *ip = GFS2_I(inode);
@@ -120,41 +104,133 @@ static int gfs2_writepage(struct page *p
 	loff_t i_size = i_size_read(inode);
 	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
 	unsigned offset;
-	int error;
-	int done_trans = 0;
+	int ret = -EIO;
 
-	if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) {
-		unlock_page(page);
-		return -EIO;
-	}
+	if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
+		goto out;
+	ret = 0;
 	if (current->journal_info)
-		goto out_ignore;
-
+		goto redirty;
 	/* Is the page fully outside i_size? (truncate in progress) */
-        offset = i_size & (PAGE_CACHE_SIZE-1);
+	offset = i_size & (PAGE_CACHE_SIZE-1);
 	if (page->index > end_index || (page->index == end_index && !offset)) {
 		page->mapping->a_ops->invalidatepage(page, 0);
-		unlock_page(page);
-		return 0; /* don't care */
+		goto out;
+	}
+	return 1;
+redirty:
+	redirty_page_for_writepage(wbc, page);
+out:
+	unlock_page(page);
+	return 0;
+}
+
+/**
+ * gfs2_writeback_writepage - Write page for writeback mappings
+ * @page: The page
+ * @wbc: The writeback control
+ *
+ */
+
+static int gfs2_writeback_writepage(struct page *page,
+				    struct writeback_control *wbc)
+{
+	int ret;
+
+	ret = gfs2_writepage_common(page, wbc);
+	if (ret <= 0)
+		return ret;
+
+	ret = mpage_writepage(page, gfs2_get_block_noalloc, wbc);
+	if (ret == -EAGAIN)
+		ret = block_write_full_page(page, gfs2_get_block_noalloc, wbc);
+	return ret;
+}
+
+/**
+ * gfs2_ordered_writepage - Write page for ordered data files
+ * @page: The page to write
+ * @wbc: The writeback control
+ *
+ */
+
+static int gfs2_ordered_writepage(struct page *page,
+				  struct writeback_control *wbc)
+{
+	struct inode *inode = page->mapping->host;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	int ret;
+
+	ret = gfs2_writepage_common(page, wbc);
+	if (ret <= 0)
+		return ret;
+
+	if (!page_has_buffers(page)) {
+		create_empty_buffers(page, inode->i_sb->s_blocksize,
+				     (1 << BH_Dirty)|(1 << BH_Uptodate));
 	}
+	gfs2_page_add_databufs(ip, page, 0, inode->i_sb->s_blocksize-1);
+	return block_write_full_page(page, gfs2_get_block_noalloc, wbc);
+}
+
+/**
+ * __gfs2_jdata_writepage - The core of jdata writepage
+ * @page: The page to write
+ * @wbc: The writeback control
+ *
+ * This is shared between writepage and writepages and implements the
+ * core of the writepage operation. If a transaction is required then
+ * PageChecked will have been set and the transaction will have
+ * already been started before this is called.
+ */
 
-	if ((sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) &&
-	    PageChecked(page)) {
+static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
+{
+	struct inode *inode = page->mapping->host;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+
+	if (PageChecked(page)) {
 		ClearPageChecked(page);
-		error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
-		if (error)
-			goto out_ignore;
 		if (!page_has_buffers(page)) {
 			create_empty_buffers(page, inode->i_sb->s_blocksize,
 					     (1 << BH_Dirty)|(1 << BH_Uptodate));
 		}
 		gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
+	}
+	return block_write_full_page(page, gfs2_get_block_noalloc, wbc);
+}
+
+/**
+ * gfs2_jdata_writepage - Write complete page
+ * @page: Page to write
+ *
+ * Returns: errno
+ *
+ */
+
+static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
+{
+	struct inode *inode = page->mapping->host;
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	int error;
+	int done_trans = 0;
+
+	error = gfs2_writepage_common(page, wbc);
+	if (error <= 0)
+		return error;
+
+	if (PageChecked(page)) {
+		if (wbc->sync_mode != WB_SYNC_ALL)
+			goto out_ignore;
+		error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
+		if (error)
+			goto out_ignore;
 		done_trans = 1;
 	}
-	error = block_write_full_page(page, gfs2_get_block_noalloc, wbc);
+	error = __gfs2_jdata_writepage(page, wbc);
 	if (done_trans)
 		gfs2_trans_end(sdp);
-	gfs2_meta_cache_flush(ip);
 	return error;
 
 out_ignore:
@@ -164,29 +240,190 @@ out_ignore:
 }
 
 /**
- * gfs2_writepages - Write a bunch of dirty pages back to disk
+ * gfs2_writeback_writepages - Write a bunch of dirty pages back to disk
  * @mapping: The mapping to write
  * @wbc: Write-back control
  *
- * For journaled files and/or ordered writes this just falls back to the
- * kernel's default writepages path for now. We will probably want to change
- * that eventually (i.e. when we look at allocate on flush).
- *
- * For the data=writeback case though we can already ignore buffer heads
+ * For the data=writeback case we can already ignore buffer heads
  * and write whole extents at once. This is a big reduction in the
  * number of I/O requests we send and the bmap calls we make in this case.
  */
-static int gfs2_writepages(struct address_space *mapping,
-			   struct writeback_control *wbc)
+static int gfs2_writeback_writepages(struct address_space *mapping,
+				     struct writeback_control *wbc)
+{
+	return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
+}
+
+/**
+ * gfs2_write_jdata_pagevec - Write back a pagevec's worth of pages
+ * @mapping: The mapping
+ * @wbc: The writeback control
+ * @writepage: The writepage function to call for each page
+ * @pvec: The vector of pages
+ * @nr_pages: The number of pages to write
+ *
+ * Returns: non-zero if loop should terminate, zero otherwise
+ */
+
+static int gfs2_write_jdata_pagevec(struct address_space *mapping,
+				    struct writeback_control *wbc,
+				    struct pagevec *pvec,
+				    int nr_pages, pgoff_t end)
 {
 	struct inode *inode = mapping->host;
-	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	loff_t i_size = i_size_read(inode);
+	pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+	unsigned offset = i_size & (PAGE_CACHE_SIZE-1);
+	unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize);
+	struct backing_dev_info *bdi = mapping->backing_dev_info;
+	int i;
+	int ret;
+
+	ret = gfs2_trans_begin(sdp, nrblocks, 0);
+	if (ret < 0)
+		return ret;
+
+	for(i = 0; i < nr_pages; i++) {
+		struct page *page = pvec->pages[i];
+
+		lock_page(page);
+
+		if (unlikely(page->mapping != mapping)) {
+			unlock_page(page);
+			continue;
+		}
+
+		if (!wbc->range_cyclic && page->index > end) {
+			ret = 1;
+			unlock_page(page);
+			continue;
+		}
+
+		if (wbc->sync_mode != WB_SYNC_NONE)
+			wait_on_page_writeback(page);
+
+		if (PageWriteback(page) ||
+		    !clear_page_dirty_for_io(page)) {
+			unlock_page(page);
+			continue;
+		}
+
+		/* Is the page fully outside i_size? (truncate in progress) */
+		if (page->index > end_index || (page->index == end_index && !offset)) {
+			page->mapping->a_ops->invalidatepage(page, 0);
+			unlock_page(page);
+			continue;
+		}
+
+		ret = __gfs2_jdata_writepage(page, wbc);
+
+		if (ret || (--(wbc->nr_to_write) <= 0))
+			ret = 1;
+		if (wbc->nonblocking && bdi_write_congested(bdi)) {
+			wbc->encountered_congestion = 1;
+			ret = 1;
+		}
+
+	}
+	gfs2_trans_end(sdp);
+	return ret;
+}
+
+/**
+ * gfs2_write_cache_jdata - Like write_cache_pages but different
+ * @mapping: The mapping to write
+ * @wbc: The writeback control
+ * @writepage: The writepage function to call
+ * @data: The data to pass to writepage
+ *
+ * The reason that we use our own function here is that we need to
+ * start transactions before we grab page locks. This allows us
+ * to get the ordering right.
+ */
+
+static int gfs2_write_cache_jdata(struct address_space *mapping,
+				  struct writeback_control *wbc)
+{
+	struct backing_dev_info *bdi = mapping->backing_dev_info;
+	int ret = 0;
+	int done = 0;
+	struct pagevec pvec;
+	int nr_pages;
+	pgoff_t index;
+	pgoff_t end;
+	int scanned = 0;
+	int range_whole = 0;
+
+	if (wbc->nonblocking && bdi_write_congested(bdi)) {
+		wbc->encountered_congestion = 1;
+		return 0;
+	}
+
+	pagevec_init(&pvec, 0);
+	if (wbc->range_cyclic) {
+		index = mapping->writeback_index; /* Start from prev offset */
+		end = -1;
+	} else {
+		index = wbc->range_start >> PAGE_CACHE_SHIFT;
+		end = wbc->range_end >> PAGE_CACHE_SHIFT;
+		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+			range_whole = 1;
+		scanned = 1;
+	}
+
+retry:
+	 while (!done && (index <= end) &&
+		(nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+					       PAGECACHE_TAG_DIRTY,
+					       min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+		scanned = 1;
+		ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end);
+		if (ret)
+			done = 1;
+		if (ret > 0)
+			ret = 0;
+
+		pagevec_release(&pvec);
+		cond_resched();
+	}
+
+	if (!scanned && !done) {
+		/*
+		 * We hit the last page and there is more work to be done: wrap
+		 * back to the start of the file
+		 */
+		scanned = 1;
+		index = 0;
+		goto retry;
+	}
+
+	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+		mapping->writeback_index = index;
+	return ret;
+}
+
+
+/**
+ * gfs2_jdata_writepages - Write a bunch of dirty pages back to disk
+ * @mapping: The mapping to write
+ * @wbc: The writeback control
+ * 
+ */
 
-	if (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK && !gfs2_is_jdata(ip))
-		return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
+static int gfs2_jdata_writepages(struct address_space *mapping,
+				 struct writeback_control *wbc)
+{
+	struct gfs2_inode *ip = GFS2_I(mapping->host);
+	struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
+	int ret;
 
-	return generic_writepages(mapping, wbc);
+	ret = gfs2_write_cache_jdata(mapping, wbc);
+	if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) {
+		gfs2_log_flush(sdp, ip->i_gl);
+		ret = gfs2_write_cache_jdata(mapping, wbc);
+	}
+	return ret;
 }
 
 /**
@@ -231,62 +468,107 @@ static int stuffed_readpage(struct gfs2_
 
 
 /**
- * gfs2_readpage - readpage with locking
- * @file: The file to read a page for. N.B. This may be NULL if we are
- * reading an internal file.
+ * __gfs2_readpage - readpage
+ * @file: The file to read a page for
  * @page: The page to read
  *
- * Returns: errno
+ * This is the core of gfs2's readpage. Its used by the internal file
+ * reading code as in that case we already hold the glock. Also its
+ * called by gfs2_readpage() once the required lock has been granted.
+ *
  */
 
-static int gfs2_readpage(struct file *file, struct page *page)
+static int __gfs2_readpage(void *file, struct page *page)
 {
 	struct gfs2_inode *ip = GFS2_I(page->mapping->host);
 	struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
-	struct gfs2_file *gf = NULL;
-	struct gfs2_holder gh;
 	int error;
-	int do_unlock = 0;
 
-	if (likely(file != &gfs2_internal_file_sentinel)) {
-		if (file) {
-			gf = file->private_data;
-			if (test_bit(GFF_EXLOCK, &gf->f_flags))
-				/* gfs2_sharewrite_fault has grabbed the ip->i_gl already */
-				goto skip_lock;
-		}
-		gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh);
-		do_unlock = 1;
-		error = gfs2_glock_nq_atime(&gh);
-		if (unlikely(error))
-			goto out_unlock;
-	}
-
-skip_lock:
 	if (gfs2_is_stuffed(ip)) {
 		error = stuffed_readpage(ip, page);
 		unlock_page(page);
-	} else
-		error = mpage_readpage(page, gfs2_get_block);
+	} else {
+		error = mpage_readpage(page, gfs2_block_map);
+	}
 
 	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
-		error = -EIO;
+		return -EIO;
+
+	return error;
+}
+
+/**
+ * gfs2_readpage - read a page of a file
+ * @file: The file to read
+ * @page: The page of the file
+ *
+ * This deals with the locking required. We use a trylock in order to
+ * avoid the page lock / glock ordering problems returning AOP_TRUNCATED_PAGE
+ * in the event that we are unable to get the lock.
+ */
+
+static int gfs2_readpage(struct file *file, struct page *page)
+{
+	struct gfs2_inode *ip = GFS2_I(page->mapping->host);
+	struct gfs2_holder gh;
+	int error;
 
-	if (do_unlock) {
-		gfs2_glock_dq_m(1, &gh);
-		gfs2_holder_uninit(&gh);
+	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh);
+	error = gfs2_glock_nq_atime(&gh);
+	if (unlikely(error)) {
+		unlock_page(page);
+		goto out;
 	}
+	error = __gfs2_readpage(file, page);
+	gfs2_glock_dq(&gh);
 out:
-	return error;
-out_unlock:
-	unlock_page(page);
+	gfs2_holder_uninit(&gh);
 	if (error == GLR_TRYFAILED) {
-		error = AOP_TRUNCATED_PAGE;
 		yield();
+		return AOP_TRUNCATED_PAGE;
 	}
-	if (do_unlock)
-		gfs2_holder_uninit(&gh);
-	goto out;
+	return error;
+}
+
+/**
+ * gfs2_internal_read - read an internal file
+ * @ip: The gfs2 inode
+ * @ra_state: The readahead state (or NULL for no readahead)
+ * @buf: The buffer to fill
+ * @pos: The file position
+ * @size: The amount to read
+ *
+ */
+
+int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
+                       char *buf, loff_t *pos, unsigned size)
+{
+	struct address_space *mapping = ip->i_inode.i_mapping;
+	unsigned long index = *pos / PAGE_CACHE_SIZE;
+	unsigned offset = *pos & (PAGE_CACHE_SIZE - 1);
+	unsigned copied = 0;
+	unsigned amt;
+	struct page *page;
+	void *p;
+
+	do {
+		amt = size - copied;
+		if (offset + size > PAGE_CACHE_SIZE)
+			amt = PAGE_CACHE_SIZE - offset;
+		page = read_cache_page(mapping, index, __gfs2_readpage, NULL);
+		if (IS_ERR(page))
+			return PTR_ERR(page);
+		p = kmap_atomic(page, KM_USER0);
+		memcpy(buf + copied, p + offset, amt);
+		kunmap_atomic(p, KM_USER0);
+		mark_page_accessed(page);
+		page_cache_release(page);
+		copied += amt;
+		index++;
+		offset = 0;
+	} while(copied < size);
+	(*pos) += size;
+	return size;
 }
 
 /**
@@ -300,10 +582,9 @@ out_unlock:
  *    Any I/O we ignore at this time will be done via readpage later.
  * 2. We don't handle stuffed files here we let readpage do the honours.
  * 3. mpage_readpages() does most of the heavy lifting in the common case.
- * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places.
- * 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as
- *    well as read-ahead.
+ * 4. gfs2_block_map() is relied upon to set BH_Boundary in the right places.
  */
+
 static int gfs2_readpages(struct file *file, struct address_space *mapping,
 			  struct list_head *pages, unsigned nr_pages)
 {
@@ -311,42 +592,20 @@ static int gfs2_readpages(struct file *f
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct gfs2_holder gh;
-	int ret = 0;
-	int do_unlock = 0;
+	int ret;
 
-	if (likely(file != &gfs2_internal_file_sentinel)) {
-		if (file) {
-			struct gfs2_file *gf = file->private_data;
-			if (test_bit(GFF_EXLOCK, &gf->f_flags))
-				goto skip_lock;
-		}
-		gfs2_holder_init(ip->i_gl, LM_ST_SHARED,
-				 LM_FLAG_TRY_1CB|GL_ATIME, &gh);
-		do_unlock = 1;
-		ret = gfs2_glock_nq_atime(&gh);
-		if (ret == GLR_TRYFAILED)
-			goto out_noerror;
-		if (unlikely(ret))
-			goto out_unlock;
-	}
-skip_lock:
+	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
+	ret = gfs2_glock_nq_atime(&gh);
+	if (unlikely(ret))
+		goto out_uninit;
 	if (!gfs2_is_stuffed(ip))
-		ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block);
-
-	if (do_unlock) {
-		gfs2_glock_dq_m(1, &gh);
-		gfs2_holder_uninit(&gh);
-	}
-out:
+		ret = mpage_readpages(mapping, pages, nr_pages, gfs2_block_map);
+	gfs2_glock_dq(&gh);
+out_uninit:
+	gfs2_holder_uninit(&gh);
 	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
 		ret = -EIO;
 	return ret;
-out_noerror:
-	ret = 0;
-out_unlock:
-	if (do_unlock)
-		gfs2_holder_uninit(&gh);
-	goto out;
 }
 
 /**
@@ -382,20 +641,11 @@ static int gfs2_write_begin(struct file 
 	if (unlikely(error))
 		goto out_uninit;
 
-	error = -ENOMEM;
-	page = __grab_cache_page(mapping, index);
-	*pagep = page;
-	if (!page)
-		goto out_unlock;
-
 	gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks);
-
 	error = gfs2_write_alloc_required(ip, pos, len, &alloc_required);
 	if (error)
-		goto out_putpage;
-
+		goto out_unlock;
 
-	ip->i_alloc.al_requested = 0;
 	if (alloc_required) {
 		al = gfs2_alloc_get(ip);
 
@@ -424,40 +674,47 @@ static int gfs2_write_begin(struct file 
 	if (error)
 		goto out_trans_fail;
 
+	error = -ENOMEM;
+	page = __grab_cache_page(mapping, index);
+	*pagep = page;
+	if (unlikely(!page))
+		goto out_endtrans;
+
 	if (gfs2_is_stuffed(ip)) {
+		error = 0;
 		if (pos + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
 			error = gfs2_unstuff_dinode(ip, page);
 			if (error == 0)
 				goto prepare_write;
-		} else if (!PageUptodate(page))
+		} else if (!PageUptodate(page)) {
 			error = stuffed_readpage(ip, page);
+		}
 		goto out;
 	}
 
 prepare_write:
-	error = block_prepare_write(page, from, to, gfs2_get_block);
-
+	error = block_prepare_write(page, from, to, gfs2_block_map);
 out:
-	if (error) {
-		gfs2_trans_end(sdp);
+	if (error == 0)
+		return 0;
+
+	page_cache_release(page);
+	if (pos + len > ip->i_inode.i_size)
+		vmtruncate(&ip->i_inode, ip->i_inode.i_size);
+out_endtrans:
+	gfs2_trans_end(sdp);
 out_trans_fail:
-		if (alloc_required) {
-			gfs2_inplace_release(ip);
+	if (alloc_required) {
+		gfs2_inplace_release(ip);
 out_qunlock:
-			gfs2_quota_unlock(ip);
+		gfs2_quota_unlock(ip);
 out_alloc_put:
-			gfs2_alloc_put(ip);
-		}
-out_putpage:
-		page_cache_release(page);
-		if (pos + len > ip->i_inode.i_size)
-			vmtruncate(&ip->i_inode, ip->i_inode.i_size);
+		gfs2_alloc_put(ip);
+	}
 out_unlock:
-		gfs2_glock_dq_m(1, &ip->i_gh);
+	gfs2_glock_dq(&ip->i_gh);
 out_uninit:
-		gfs2_holder_uninit(&ip->i_gh);
-	}
-
+	gfs2_holder_uninit(&ip->i_gh);
 	return error;
 }
 
@@ -565,7 +822,7 @@ static int gfs2_write_end(struct file *f
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct buffer_head *dibh;
-	struct gfs2_alloc *al = &ip->i_alloc;
+	struct gfs2_alloc *al = ip->i_alloc;
 	struct gfs2_dinode *di;
 	unsigned int from = pos & (PAGE_CACHE_SIZE - 1);
 	unsigned int to = from + len;
@@ -585,19 +842,16 @@ static int gfs2_write_end(struct file *f
 	if (gfs2_is_stuffed(ip))
 		return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page);
 
-	if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
+	if (!gfs2_is_writeback(ip))
 		gfs2_page_add_databufs(ip, page, from, to);
 
 	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
 
-	if (likely(ret >= 0)) {
-		copied = ret;
-		if  ((pos + copied) > inode->i_size) {
-			di = (struct gfs2_dinode *)dibh->b_data;
-			ip->i_di.di_size = inode->i_size;
-			di->di_size = cpu_to_be64(inode->i_size);
-			mark_inode_dirty(inode);
-		}
+	if (likely(ret >= 0) && (inode->i_size > ip->i_di.di_size)) {
+		di = (struct gfs2_dinode *)dibh->b_data;
+		ip->i_di.di_size = inode->i_size;
+		di->di_size = cpu_to_be64(inode->i_size);
+		mark_inode_dirty(inode);
 	}
 
 	if (inode == sdp->sd_rindex)
@@ -606,7 +860,7 @@ static int gfs2_write_end(struct file *f
 	brelse(dibh);
 	gfs2_trans_end(sdp);
 failed:
-	if (al->al_requested) {
+	if (al) {
 		gfs2_inplace_release(ip);
 		gfs2_quota_unlock(ip);
 		gfs2_alloc_put(ip);
@@ -625,11 +879,7 @@ failed:
  
 static int gfs2_set_page_dirty(struct page *page)
 {
-	struct gfs2_inode *ip = GFS2_I(page->mapping->host);
-	struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
-
-	if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
-		SetPageChecked(page);
+	SetPageChecked(page);
 	return __set_page_dirty_buffers(page);
 }
 
@@ -653,7 +903,7 @@ static sector_t gfs2_bmap(struct address
 		return 0;
 
 	if (!gfs2_is_stuffed(ip))
-		dblock = generic_block_bmap(mapping, lblock, gfs2_get_block);
+		dblock = generic_block_bmap(mapping, lblock, gfs2_block_map);
 
 	gfs2_glock_dq_uninit(&i_gh);
 
@@ -719,13 +969,9 @@ static int gfs2_ok_for_dio(struct gfs2_i
 {
 	/*
 	 * Should we return an error here? I can't see that O_DIRECT for
-	 * a journaled file makes any sense. For now we'll silently fall
-	 * back to buffered I/O, likewise we do the same for stuffed
-	 * files since they are (a) small and (b) unaligned.
+	 * a stuffed file makes any sense. For now we'll silently fall
+	 * back to buffered I/O
 	 */
-	if (gfs2_is_jdata(ip))
-		return 0;
-
 	if (gfs2_is_stuffed(ip))
 		return 0;
 
@@ -836,9 +1082,23 @@ cannot_release:
 	return 0;
 }
 
-const struct address_space_operations gfs2_file_aops = {
-	.writepage = gfs2_writepage,
-	.writepages = gfs2_writepages,
+static const struct address_space_operations gfs2_writeback_aops = {
+	.writepage = gfs2_writeback_writepage,
+	.writepages = gfs2_writeback_writepages,
+	.readpage = gfs2_readpage,
+	.readpages = gfs2_readpages,
+	.sync_page = block_sync_page,
+	.write_begin = gfs2_write_begin,
+	.write_end = gfs2_write_end,
+	.bmap = gfs2_bmap,
+	.invalidatepage = gfs2_invalidatepage,
+	.releasepage = gfs2_releasepage,
+	.direct_IO = gfs2_direct_IO,
+	.migratepage = buffer_migrate_page,
+};
+
+static const struct address_space_operations gfs2_ordered_aops = {
+	.writepage = gfs2_ordered_writepage,
 	.readpage = gfs2_readpage,
 	.readpages = gfs2_readpages,
 	.sync_page = block_sync_page,
@@ -849,5 +1109,34 @@ const struct address_space_operations gf
 	.invalidatepage = gfs2_invalidatepage,
 	.releasepage = gfs2_releasepage,
 	.direct_IO = gfs2_direct_IO,
+	.migratepage = buffer_migrate_page,
 };
 
+static const struct address_space_operations gfs2_jdata_aops = {
+	.writepage = gfs2_jdata_writepage,
+	.writepages = gfs2_jdata_writepages,
+	.readpage = gfs2_readpage,
+	.readpages = gfs2_readpages,
+	.sync_page = block_sync_page,
+	.write_begin = gfs2_write_begin,
+	.write_end = gfs2_write_end,
+	.set_page_dirty = gfs2_set_page_dirty,
+	.bmap = gfs2_bmap,
+	.invalidatepage = gfs2_invalidatepage,
+	.releasepage = gfs2_releasepage,
+};
+
+void gfs2_set_aops(struct inode *inode)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+
+	if (gfs2_is_writeback(ip))
+		inode->i_mapping->a_ops = &gfs2_writeback_aops;
+	else if (gfs2_is_ordered(ip))
+		inode->i_mapping->a_ops = &gfs2_ordered_aops;
+	else if (gfs2_is_jdata(ip))
+		inode->i_mapping->a_ops = &gfs2_jdata_aops;
+	else
+		BUG();
+}
+
diff -puN fs/gfs2/ops_address.h~git-gfs2-nmw fs/gfs2/ops_address.h
--- a/fs/gfs2/ops_address.h~git-gfs2-nmw
+++ a/fs/gfs2/ops_address.h
@@ -14,9 +14,10 @@
 #include <linux/buffer_head.h>
 #include <linux/mm.h>
 
-extern const struct address_space_operations gfs2_file_aops;
-extern int gfs2_get_block(struct inode *inode, sector_t lblock,
-			  struct buffer_head *bh_result, int create);
 extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask);
+extern int gfs2_internal_read(struct gfs2_inode *ip,
+			      struct file_ra_state *ra_state,
+			      char *buf, loff_t *pos, unsigned size);
+extern void gfs2_set_aops(struct inode *inode);
 
 #endif /* __OPS_ADDRESS_DOT_H__ */
diff -puN fs/gfs2/ops_file.c~git-gfs2-nmw fs/gfs2/ops_file.c
--- a/fs/gfs2/ops_file.c~git-gfs2-nmw
+++ a/fs/gfs2/ops_file.c
@@ -33,57 +33,12 @@
 #include "lm.h"
 #include "log.h"
 #include "meta_io.h"
-#include "ops_file.h"
-#include "ops_vm.h"
 #include "quota.h"
 #include "rgrp.h"
 #include "trans.h"
 #include "util.h"
 #include "eaops.h"
-
-/*
- * Most fields left uninitialised to catch anybody who tries to
- * use them. f_flags set to prevent file_accessed() from touching
- * any other part of this. Its use is purely as a flag so that we
- * know (in readpage()) whether or not do to locking.
- */
-struct file gfs2_internal_file_sentinel = {
-	.f_flags = O_NOATIME|O_RDONLY,
-};
-
-static int gfs2_read_actor(read_descriptor_t *desc, struct page *page,
-			   unsigned long offset, unsigned long size)
-{
-	char *kaddr;
-	unsigned long count = desc->count;
-
-	if (size > count)
-		size = count;
-
-	kaddr = kmap(page);
-	memcpy(desc->arg.data, kaddr + offset, size);
-	kunmap(page);
-
-	desc->count = count - size;
-	desc->written += size;
-	desc->arg.buf += size;
-	return size;
-}
-
-int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
-		       char *buf, loff_t *pos, unsigned size)
-{
-	struct inode *inode = &ip->i_inode;
-	read_descriptor_t desc;
-	desc.written = 0;
-	desc.arg.data = buf;
-	desc.count = size;
-	desc.error = 0;
-	do_generic_mapping_read(inode->i_mapping, ra_state,
-				&gfs2_internal_file_sentinel, pos, &desc,
-				gfs2_read_actor);
-	return desc.written ? desc.written : desc.error;
-}
+#include "ops_address.h"
 
 /**
  * gfs2_llseek - seek to a location in a file
@@ -214,7 +169,7 @@ static int gfs2_get_flags(struct file *f
 	if (put_user(fsflags, ptr))
 		error = -EFAULT;
 
-	gfs2_glock_dq_m(1, &gh);
+	gfs2_glock_dq(&gh);
 	gfs2_holder_uninit(&gh);
 	return error;
 }
@@ -291,7 +246,16 @@ static int do_gfs2_set_flags(struct file
 		if (error)
 			goto out;
 	}
-
+	if ((flags ^ new_flags) & GFS2_DIF_JDATA) {
+		if (flags & GFS2_DIF_JDATA)
+			gfs2_log_flush(sdp, ip->i_gl);
+		error = filemap_fdatawrite(inode->i_mapping);
+		if (error)
+			goto out;
+		error = filemap_fdatawait(inode->i_mapping);
+		if (error)
+			goto out;
+	}
 	error = gfs2_trans_begin(sdp, RES_DINODE, 0);
 	if (error)
 		goto out;
@@ -303,6 +267,7 @@ static int do_gfs2_set_flags(struct file
 	gfs2_dinode_out(ip, bh->b_data);
 	brelse(bh);
 	gfs2_set_inode_flags(inode);
+	gfs2_set_aops(inode);
 out_trans_end:
 	gfs2_trans_end(sdp);
 out:
@@ -338,6 +303,127 @@ static long gfs2_ioctl(struct file *filp
 	return -ENOTTY;
 }
 
+/**
+ * gfs2_allocate_page_backing - Use bmap to allocate blocks
+ * @page: The (locked) page to allocate backing for
+ *
+ * We try to allocate all the blocks required for the page in
+ * one go. This might fail for various reasons, so we keep
+ * trying until all the blocks to back this page are allocated.
+ * If some of the blocks are already allocated, thats ok too.
+ */
+
+static int gfs2_allocate_page_backing(struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	struct buffer_head bh;
+	unsigned long size = PAGE_CACHE_SIZE;
+	u64 lblock = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+
+	do {
+		bh.b_state = 0;
+		bh.b_size = size;
+		gfs2_block_map(inode, lblock, &bh, 1);
+		if (!buffer_mapped(&bh))
+			return -EIO;
+		size -= bh.b_size;
+		lblock += (bh.b_size >> inode->i_blkbits);
+	} while(size > 0);
+	return 0;
+}
+
+/**
+ * gfs2_page_mkwrite - Make a shared, mmap()ed, page writable
+ * @vma: The virtual memory area
+ * @page: The page which is about to become writable
+ *
+ * When the page becomes writable, we need to ensure that we have
+ * blocks allocated on disk to back that page.
+ */
+
+static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	unsigned long last_index;
+	u64 pos = page->index << (PAGE_CACHE_SIZE - inode->i_blkbits);
+	unsigned int data_blocks, ind_blocks, rblocks;
+	int alloc_required = 0;
+	struct gfs2_holder gh;
+	struct gfs2_alloc *al;
+	int ret;
+
+	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &gh);
+	ret = gfs2_glock_nq_atime(&gh);
+	if (ret)
+		goto out;
+
+	set_bit(GIF_SW_PAGED, &ip->i_flags);
+	gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
+	ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required);
+	if (ret || !alloc_required)
+		goto out_unlock;
+	ret = -ENOMEM;
+	al = gfs2_alloc_get(ip);
+	if (al == NULL)
+		goto out_unlock;
+
+	ret = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+	if (ret)
+		goto out_alloc_put;
+	ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
+	if (ret)
+		goto out_quota_unlock;
+	al->al_requested = data_blocks + ind_blocks;
+	ret = gfs2_inplace_reserve(ip);
+	if (ret)
+		goto out_quota_unlock;
+
+	rblocks = RES_DINODE + ind_blocks;
+	if (gfs2_is_jdata(ip))
+		rblocks += data_blocks ? data_blocks : 1;
+	if (ind_blocks || data_blocks)
+		rblocks += RES_STATFS + RES_QUOTA;
+	ret = gfs2_trans_begin(sdp, rblocks, 0);
+	if (ret)
+		goto out_trans_fail;
+
+	lock_page(page);
+	ret = -EINVAL;
+	last_index = ip->i_inode.i_size >> PAGE_CACHE_SHIFT;
+	if (page->index > last_index)
+		goto out_unlock_page;
+	if (!PageUptodate(page) || page->mapping != ip->i_inode.i_mapping)
+		goto out_unlock_page;
+	if (gfs2_is_stuffed(ip)) {
+		ret = gfs2_unstuff_dinode(ip, page);
+		if (ret)
+			goto out_unlock_page;
+	}
+	ret = gfs2_allocate_page_backing(page);
+
+out_unlock_page:
+	unlock_page(page);
+	gfs2_trans_end(sdp);
+out_trans_fail:
+	gfs2_inplace_release(ip);
+out_quota_unlock:
+	gfs2_quota_unlock(ip);
+out_alloc_put:
+	gfs2_alloc_put(ip);
+out_unlock:
+	gfs2_glock_dq(&gh);
+out:
+	gfs2_holder_uninit(&gh);
+	return ret;
+}
+
+static struct vm_operations_struct gfs2_vm_ops = {
+	.fault = filemap_fault,
+	.page_mkwrite = gfs2_page_mkwrite,
+};
+
 
 /**
  * gfs2_mmap -
@@ -360,14 +446,7 @@ static int gfs2_mmap(struct file *file, 
 		return error;
 	}
 
-	/* This is VM_MAYWRITE instead of VM_WRITE because a call
-	   to mprotect() can turn on VM_WRITE later. */
-
-	if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) ==
-	    (VM_MAYSHARE | VM_MAYWRITE))
-		vma->vm_ops = &gfs2_vm_ops_sharewrite;
-	else
-		vma->vm_ops = &gfs2_vm_ops_private;
+	vma->vm_ops = &gfs2_vm_ops;
 
 	gfs2_glock_dq_uninit(&i_gh);
 
@@ -538,15 +617,6 @@ static int gfs2_lock(struct file *file, 
 	if (__mandatory_lock(&ip->i_inode))
 		return -ENOLCK;
 
-	if (sdp->sd_args.ar_localflocks) {
-		if (IS_GETLK(cmd)) {
-			posix_test_lock(file, fl);
-			return 0;
-		} else {
-			return posix_lock_file_wait(file, fl);
-		}
-	}
-
 	if (cmd == F_CANCELLK) {
 		/* Hack: */
 		cmd = F_SETLK;
@@ -632,16 +702,12 @@ static void do_unflock(struct file *file
 static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
 {
 	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
-	struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
 
 	if (!(fl->fl_flags & FL_FLOCK))
 		return -ENOLCK;
 	if (__mandatory_lock(&ip->i_inode))
 		return -ENOLCK;
 
-	if (sdp->sd_args.ar_localflocks)
-		return flock_lock_file_wait(file, fl);
-
 	if (fl->fl_type == F_UNLCK) {
 		do_unflock(file, fl);
 		return 0;
@@ -678,3 +744,27 @@ const struct file_operations gfs2_dir_fo
 	.flock		= gfs2_flock,
 };
 
+const struct file_operations gfs2_file_fops_nolock = {
+	.llseek		= gfs2_llseek,
+	.read		= do_sync_read,
+	.aio_read	= generic_file_aio_read,
+	.write		= do_sync_write,
+	.aio_write	= generic_file_aio_write,
+	.unlocked_ioctl	= gfs2_ioctl,
+	.mmap		= gfs2_mmap,
+	.open		= gfs2_open,
+	.release	= gfs2_close,
+	.fsync		= gfs2_fsync,
+	.splice_read	= generic_file_splice_read,
+	.splice_write	= generic_file_splice_write,
+	.setlease	= gfs2_setlease,
+};
+
+const struct file_operations gfs2_dir_fops_nolock = {
+	.readdir	= gfs2_readdir,
+	.unlocked_ioctl	= gfs2_ioctl,
+	.open		= gfs2_open,
+	.release	= gfs2_close,
+	.fsync		= gfs2_fsync,
+};
+
diff -puN fs/gfs2/ops_file.h~git-gfs2-nmw /dev/null
--- a/fs/gfs2/ops_file.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#ifndef __OPS_FILE_DOT_H__
-#define __OPS_FILE_DOT_H__
-
-#include <linux/fs.h>
-struct gfs2_inode;
-
-extern struct file gfs2_internal_file_sentinel;
-extern int gfs2_internal_read(struct gfs2_inode *ip,
-			      struct file_ra_state *ra_state,
-			      char *buf, loff_t *pos, unsigned size);
-extern void gfs2_set_inode_flags(struct inode *inode);
-extern const struct file_operations gfs2_file_fops;
-extern const struct file_operations gfs2_dir_fops;
-
-#endif /* __OPS_FILE_DOT_H__ */
diff -puN fs/gfs2/ops_fstype.c~git-gfs2-nmw fs/gfs2/ops_fstype.c
--- a/fs/gfs2/ops_fstype.c~git-gfs2-nmw
+++ a/fs/gfs2/ops_fstype.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
  *
  * This copyrighted material is made available to anyone wishing to use,
  * modify, copy, or redistribute it subject to the terms and conditions
@@ -21,6 +21,7 @@
 
 #include "gfs2.h"
 #include "incore.h"
+#include "bmap.h"
 #include "daemon.h"
 #include "glock.h"
 #include "glops.h"
@@ -59,7 +60,6 @@ static struct gfs2_sbd *init_sbd(struct 
 
 	mutex_init(&sdp->sd_inum_mutex);
 	spin_lock_init(&sdp->sd_statfs_spin);
-	mutex_init(&sdp->sd_statfs_mutex);
 
 	spin_lock_init(&sdp->sd_rindex_spin);
 	mutex_init(&sdp->sd_rindex_mutex);
@@ -76,15 +76,15 @@ static struct gfs2_sbd *init_sbd(struct 
 	mutex_init(&sdp->sd_quota_mutex);
 
 	spin_lock_init(&sdp->sd_log_lock);
-
-	INIT_LIST_HEAD(&sdp->sd_log_le_gl);
+	atomic_set(&sdp->sd_log_pinned, 0);
 	INIT_LIST_HEAD(&sdp->sd_log_le_buf);
 	INIT_LIST_HEAD(&sdp->sd_log_le_revoke);
 	INIT_LIST_HEAD(&sdp->sd_log_le_rg);
 	INIT_LIST_HEAD(&sdp->sd_log_le_databuf);
 	INIT_LIST_HEAD(&sdp->sd_log_le_ordered);
 
-	mutex_init(&sdp->sd_log_reserve_mutex);
+	init_waitqueue_head(&sdp->sd_log_waitq);
+	init_waitqueue_head(&sdp->sd_logd_waitq);
 	INIT_LIST_HEAD(&sdp->sd_ail1_list);
 	INIT_LIST_HEAD(&sdp->sd_ail2_list);
 
@@ -303,6 +303,67 @@ out:
 	return error;
 }
 
+/**
+ * map_journal_extents - create a reusable "extent" mapping from all logical
+ * blocks to all physical blocks for the given journal.  This will save
+ * us time when writing journal blocks.  Most journals will have only one
+ * extent that maps all their logical blocks.  That's because gfs2.mkfs
+ * arranges the journal blocks sequentially to maximize performance.
+ * So the extent would map the first block for the entire file length.
+ * However, gfs2_jadd can happen while file activity is happening, so
+ * those journals may not be sequential.  Less likely is the case where
+ * the users created their own journals by mounting the metafs and
+ * laying it out.  But it's still possible.  These journals might have
+ * several extents.
+ *
+ * TODO: This should be done in bigger chunks rather than one block at a time,
+ *       but since it's only done at mount time, I'm not worried about the
+ *       time it takes.
+ */
+static int map_journal_extents(struct gfs2_sbd *sdp)
+{
+	struct gfs2_jdesc *jd = sdp->sd_jdesc;
+	unsigned int lb;
+	u64 db, prev_db; /* logical block, disk block, prev disk block */
+	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
+	struct gfs2_journal_extent *jext = NULL;
+	struct buffer_head bh;
+	int rc = 0;
+
+	prev_db = 0;
+
+	for (lb = 0; lb < ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift; lb++) {
+		bh.b_state = 0;
+		bh.b_blocknr = 0;
+		bh.b_size = 1 << ip->i_inode.i_blkbits;
+		rc = gfs2_block_map(jd->jd_inode, lb, &bh, 0);
+		db = bh.b_blocknr;
+		if (rc || !db) {
+			printk(KERN_INFO "GFS2 journal mapping error %d: lb="
+			       "%u db=%llu\n", rc, lb, (unsigned long long)db);
+			break;
+		}
+		if (!prev_db || db != prev_db + 1) {
+			jext = kzalloc(sizeof(struct gfs2_journal_extent),
+				       GFP_KERNEL);
+			if (!jext) {
+				printk(KERN_INFO "GFS2 error: out of memory "
+				       "mapping journal extents.\n");
+				rc = -ENOMEM;
+				break;
+			}
+			jext->dblock = db;
+			jext->lblock = lb;
+			jext->blocks = 1;
+			list_add_tail(&jext->extent_list, &jd->extent_list);
+		} else {
+			jext->blocks++;
+		}
+		prev_db = db;
+	}
+	return rc;
+}
+
 static int init_journal(struct gfs2_sbd *sdp, int undo)
 {
 	struct gfs2_holder ji_gh;
@@ -340,7 +401,9 @@ static int init_journal(struct gfs2_sbd 
 
 	if (sdp->sd_args.ar_spectator) {
 		sdp->sd_jdesc = gfs2_jdesc_find(sdp, 0);
-		sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks;
+		atomic_set(&sdp->sd_log_blks_free, sdp->sd_jdesc->jd_blocks);
+		atomic_set(&sdp->sd_log_thresh1, 2*sdp->sd_jdesc->jd_blocks/5);
+		atomic_set(&sdp->sd_log_thresh2, 4*sdp->sd_jdesc->jd_blocks/5);
 	} else {
 		if (sdp->sd_lockstruct.ls_jid >= gfs2_jindex_size(sdp)) {
 			fs_err(sdp, "can't mount journal #%u\n",
@@ -377,7 +440,12 @@ static int init_journal(struct gfs2_sbd 
 			       sdp->sd_jdesc->jd_jid, error);
 			goto fail_jinode_gh;
 		}
-		sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks;
+		atomic_set(&sdp->sd_log_blks_free, sdp->sd_jdesc->jd_blocks);
+		atomic_set(&sdp->sd_log_thresh1, 2*sdp->sd_jdesc->jd_blocks/5);
+		atomic_set(&sdp->sd_log_thresh2, 4*sdp->sd_jdesc->jd_blocks/5);
+
+		/* Map the extents for this journal's blocks */
+		map_journal_extents(sdp);
 	}
 
 	if (sdp->sd_lockstruct.ls_first) {
@@ -609,9 +677,6 @@ static int init_threads(struct gfs2_sbd 
 	if (undo)
 		goto fail_quotad;
 
-	sdp->sd_log_flush_time = jiffies;
-	sdp->sd_jindex_refresh_time = jiffies;
-
 	p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
 	error = IS_ERR(p);
 	if (error) {
diff -puN fs/gfs2/ops_inode.c~git-gfs2-nmw fs/gfs2/ops_inode.c
--- a/fs/gfs2/ops_inode.c~git-gfs2-nmw
+++ a/fs/gfs2/ops_inode.c
@@ -61,7 +61,7 @@ static int gfs2_create(struct inode *dir
 		inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode, 0);
 		if (!IS_ERR(inode)) {
 			gfs2_trans_end(sdp);
-			if (dip->i_alloc.al_rgd)
+			if (dip->i_alloc->al_rgd)
 				gfs2_inplace_release(dip);
 			gfs2_quota_unlock(dip);
 			gfs2_alloc_put(dip);
@@ -113,8 +113,18 @@ static struct dentry *gfs2_lookup(struct
 	if (inode && IS_ERR(inode))
 		return ERR_PTR(PTR_ERR(inode));
 
-	if (inode)
+	if (inode) {
+		struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
+		struct gfs2_holder gh;
+		int error;
+		error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
+		if (error) {
+			iput(inode);
+			return ERR_PTR(error);
+		}
+		gfs2_glock_dq_uninit(&gh);
 		return d_splice_alias(inode, dentry);
+	}
 	d_add(dentry, inode);
 
 	return NULL;
@@ -366,7 +376,7 @@ static int gfs2_symlink(struct inode *di
 	}
 
 	gfs2_trans_end(sdp);
-	if (dip->i_alloc.al_rgd)
+	if (dip->i_alloc->al_rgd)
 		gfs2_inplace_release(dip);
 	gfs2_quota_unlock(dip);
 	gfs2_alloc_put(dip);
@@ -442,7 +452,7 @@ static int gfs2_mkdir(struct inode *dir,
 	gfs2_assert_withdraw(sdp, !error); /* dip already pinned */
 
 	gfs2_trans_end(sdp);
-	if (dip->i_alloc.al_rgd)
+	if (dip->i_alloc->al_rgd)
 		gfs2_inplace_release(dip);
 	gfs2_quota_unlock(dip);
 	gfs2_alloc_put(dip);
@@ -548,7 +558,7 @@ static int gfs2_mknod(struct inode *dir,
 	}
 
 	gfs2_trans_end(sdp);
-	if (dip->i_alloc.al_rgd)
+	if (dip->i_alloc->al_rgd)
 		gfs2_inplace_release(dip);
 	gfs2_quota_unlock(dip);
 	gfs2_alloc_put(dip);
diff -puN fs/gfs2/ops_inode.h~git-gfs2-nmw fs/gfs2/ops_inode.h
--- a/fs/gfs2/ops_inode.h~git-gfs2-nmw
+++ a/fs/gfs2/ops_inode.h
@@ -16,5 +16,11 @@ extern const struct inode_operations gfs
 extern const struct inode_operations gfs2_dir_iops;
 extern const struct inode_operations gfs2_symlink_iops;
 extern const struct inode_operations gfs2_dev_iops;
+extern const struct file_operations gfs2_file_fops;
+extern const struct file_operations gfs2_dir_fops;
+extern const struct file_operations gfs2_file_fops_nolock;
+extern const struct file_operations gfs2_dir_fops_nolock;
+
+extern void gfs2_set_inode_flags(struct inode *inode);
 
 #endif /* __OPS_INODE_DOT_H__ */
diff -puN fs/gfs2/ops_super.c~git-gfs2-nmw fs/gfs2/ops_super.c
--- a/fs/gfs2/ops_super.c~git-gfs2-nmw
+++ a/fs/gfs2/ops_super.c
@@ -487,7 +487,6 @@ static struct inode *gfs2_alloc_inode(st
 	if (ip) {
 		ip->i_flags = 0;
 		ip->i_gl = NULL;
-		ip->i_last_pfault = jiffies;
 	}
 	return &ip->i_inode;
 }
diff -puN fs/gfs2/ops_vm.c~git-gfs2-nmw /dev/null
--- a/fs/gfs2/ops_vm.c
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/completion.h>
-#include <linux/buffer_head.h>
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
-
-#include "gfs2.h"
-#include "incore.h"
-#include "bmap.h"
-#include "glock.h"
-#include "inode.h"
-#include "ops_vm.h"
-#include "quota.h"
-#include "rgrp.h"
-#include "trans.h"
-#include "util.h"
-
-static int gfs2_private_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	struct gfs2_inode *ip = GFS2_I(vma->vm_file->f_mapping->host);
-
-	set_bit(GIF_PAGED, &ip->i_flags);
-	return filemap_fault(vma, vmf);
-}
-
-static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
-{
-	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	unsigned long index = page->index;
-	u64 lblock = index << (PAGE_CACHE_SHIFT -
-				    sdp->sd_sb.sb_bsize_shift);
-	unsigned int blocks = PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift;
-	struct gfs2_alloc *al;
-	unsigned int data_blocks, ind_blocks;
-	unsigned int x;
-	int error;
-
-	al = gfs2_alloc_get(ip);
-
-	error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
-	if (error)
-		goto out;
-
-	error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
-	if (error)
-		goto out_gunlock_q;
-
-	gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
-
-	al->al_requested = data_blocks + ind_blocks;
-
-	error = gfs2_inplace_reserve(ip);
-	if (error)
-		goto out_gunlock_q;
-
-	error = gfs2_trans_begin(sdp, al->al_rgd->rd_length +
-				 ind_blocks + RES_DINODE +
-				 RES_STATFS + RES_QUOTA, 0);
-	if (error)
-		goto out_ipres;
-
-	if (gfs2_is_stuffed(ip)) {
-		error = gfs2_unstuff_dinode(ip, NULL);
-		if (error)
-			goto out_trans;
-	}
-
-	for (x = 0; x < blocks; ) {
-		u64 dblock;
-		unsigned int extlen;
-		int new = 1;
-
-		error = gfs2_extent_map(&ip->i_inode, lblock, &new, &dblock, &extlen);
-		if (error)
-			goto out_trans;
-
-		lblock += extlen;
-		x += extlen;
-	}
-
-	gfs2_assert_warn(sdp, al->al_alloced);
-
-out_trans:
-	gfs2_trans_end(sdp);
-out_ipres:
-	gfs2_inplace_release(ip);
-out_gunlock_q:
-	gfs2_quota_unlock(ip);
-out:
-	gfs2_alloc_put(ip);
-	return error;
-}
-
-static int gfs2_sharewrite_fault(struct vm_area_struct *vma,
-						struct vm_fault *vmf)
-{
-	struct file *file = vma->vm_file;
-	struct gfs2_file *gf = file->private_data;
-	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
-	struct gfs2_holder i_gh;
-	int alloc_required;
-	int error;
-	int ret = 0;
-
-	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
-	if (error)
-		goto out;
-
-	set_bit(GIF_PAGED, &ip->i_flags);
-	set_bit(GIF_SW_PAGED, &ip->i_flags);
-
-	error = gfs2_write_alloc_required(ip,
-					(u64)vmf->pgoff << PAGE_CACHE_SHIFT,
-					PAGE_CACHE_SIZE, &alloc_required);
-	if (error) {
-		ret = VM_FAULT_OOM; /* XXX: are these right? */
-		goto out_unlock;
-	}
-
-	set_bit(GFF_EXLOCK, &gf->f_flags);
-	ret = filemap_fault(vma, vmf);
-	clear_bit(GFF_EXLOCK, &gf->f_flags);
-	if (ret & VM_FAULT_ERROR)
-		goto out_unlock;
-
-	if (alloc_required) {
-		/* XXX: do we need to drop page lock around alloc_page_backing?*/
-		error = alloc_page_backing(ip, vmf->page);
-		if (error) {
-			/*
-			 * VM_FAULT_LOCKED should always be the case for
-			 * filemap_fault, but it may not be in a future
-			 * implementation.
-			 */
-			if (ret & VM_FAULT_LOCKED)
-				unlock_page(vmf->page);
-			page_cache_release(vmf->page);
-			ret = VM_FAULT_OOM;
-			goto out_unlock;
-		}
-		set_page_dirty(vmf->page);
-	}
-
-out_unlock:
-	gfs2_glock_dq_uninit(&i_gh);
-out:
-	return ret;
-}
-
-struct vm_operations_struct gfs2_vm_ops_private = {
-	.fault = gfs2_private_fault,
-};
-
-struct vm_operations_struct gfs2_vm_ops_sharewrite = {
-	.fault = gfs2_sharewrite_fault,
-};
-
diff -puN fs/gfs2/ops_vm.h~git-gfs2-nmw /dev/null
--- a/fs/gfs2/ops_vm.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#ifndef __OPS_VM_DOT_H__
-#define __OPS_VM_DOT_H__
-
-#include <linux/mm.h>
-
-extern struct vm_operations_struct gfs2_vm_ops_private;
-extern struct vm_operations_struct gfs2_vm_ops_sharewrite;
-
-#endif /* __OPS_VM_DOT_H__ */
diff -puN fs/gfs2/quota.c~git-gfs2-nmw fs/gfs2/quota.c
--- a/fs/gfs2/quota.c~git-gfs2-nmw
+++ a/fs/gfs2/quota.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
  *
  * This copyrighted material is made available to anyone wishing to use,
  * modify, copy, or redistribute it subject to the terms and conditions
@@ -59,7 +59,6 @@
 #include "super.h"
 #include "trans.h"
 #include "inode.h"
-#include "ops_file.h"
 #include "ops_address.h"
 #include "util.h"
 
@@ -274,10 +273,10 @@ static int bh_get(struct gfs2_quota_data
 	}
 
 	block = qd->qd_slot / sdp->sd_qc_per_block;
-	offset = qd->qd_slot % sdp->sd_qc_per_block;;
+	offset = qd->qd_slot % sdp->sd_qc_per_block;
 
 	bh_map.b_size = 1 << ip->i_inode.i_blkbits;
-	error = gfs2_block_map(&ip->i_inode, block, 0, &bh_map);
+	error = gfs2_block_map(&ip->i_inode, block, &bh_map, 0);
 	if (error)
 		goto fail;
 	error = gfs2_meta_read(ip->i_gl, bh_map.b_blocknr, DIO_WAIT, &bh);
@@ -454,7 +453,7 @@ static void qdsb_put(struct gfs2_quota_d
 int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct gfs2_alloc *al = &ip->i_alloc;
+	struct gfs2_alloc *al = ip->i_alloc;
 	struct gfs2_quota_data **qd = al->al_qd;
 	int error;
 
@@ -502,7 +501,7 @@ out:
 void gfs2_quota_unhold(struct gfs2_inode *ip)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct gfs2_alloc *al = &ip->i_alloc;
+	struct gfs2_alloc *al = ip->i_alloc;
 	unsigned int x;
 
 	gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags));
@@ -646,7 +645,7 @@ static int gfs2_adjust_quota(struct gfs2
 	}
 
 	if (!buffer_mapped(bh)) {
-		gfs2_get_block(inode, iblock, bh, 1);
+		gfs2_block_map(inode, iblock, bh, 1);
 		if (!buffer_mapped(bh))
 			goto unlock;
 	}
@@ -793,11 +792,9 @@ static int do_glock(struct gfs2_quota_da
 	struct gfs2_holder i_gh;
 	struct gfs2_quota_host q;
 	char buf[sizeof(struct gfs2_quota)];
-	struct file_ra_state ra_state;
 	int error;
 	struct gfs2_quota_lvb *qlvb;
 
-	file_ra_state_init(&ra_state, sdp->sd_quota_inode->i_mapping);
 restart:
 	error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh);
 	if (error)
@@ -820,8 +817,8 @@ restart:
 
 		memset(buf, 0, sizeof(struct gfs2_quota));
 		pos = qd2offset(qd);
-		error = gfs2_internal_read(ip, &ra_state, buf,
-					   &pos, sizeof(struct gfs2_quota));
+		error = gfs2_internal_read(ip, NULL, buf, &pos,
+					   sizeof(struct gfs2_quota));
 		if (error < 0)
 			goto fail_gunlock;
 
@@ -856,7 +853,7 @@ fail:
 int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct gfs2_alloc *al = &ip->i_alloc;
+	struct gfs2_alloc *al = ip->i_alloc;
 	unsigned int x;
 	int error = 0;
 
@@ -924,7 +921,7 @@ static int need_sync(struct gfs2_quota_d
 
 void gfs2_quota_unlock(struct gfs2_inode *ip)
 {
-	struct gfs2_alloc *al = &ip->i_alloc;
+	struct gfs2_alloc *al = ip->i_alloc;
 	struct gfs2_quota_data *qda[4];
 	unsigned int count = 0;
 	unsigned int x;
@@ -972,7 +969,7 @@ static int print_message(struct gfs2_quo
 int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct gfs2_alloc *al = &ip->i_alloc;
+	struct gfs2_alloc *al = ip->i_alloc;
 	struct gfs2_quota_data *qd;
 	s64 value;
 	unsigned int x;
@@ -1016,10 +1013,9 @@ int gfs2_quota_check(struct gfs2_inode *
 void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
 		       u32 uid, u32 gid)
 {
-	struct gfs2_alloc *al = &ip->i_alloc;
+	struct gfs2_alloc *al = ip->i_alloc;
 	struct gfs2_quota_data *qd;
 	unsigned int x;
-	unsigned int found = 0;
 
 	if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), change))
 		return;
@@ -1032,7 +1028,6 @@ void gfs2_quota_change(struct gfs2_inode
 		if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
 		    (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) {
 			do_qc(qd, change);
-			found++;
 		}
 	}
 }
diff -puN fs/gfs2/recovery.c~git-gfs2-nmw fs/gfs2/recovery.c
--- a/fs/gfs2/recovery.c~git-gfs2-nmw
+++ a/fs/gfs2/recovery.c
@@ -391,7 +391,7 @@ static int clean_journal(struct gfs2_jde
 	lblock = head->lh_blkno;
 	gfs2_replay_incr_blk(sdp, &lblock);
 	bh_map.b_size = 1 << ip->i_inode.i_blkbits;
-	error = gfs2_block_map(&ip->i_inode, lblock, 0, &bh_map);
+	error = gfs2_block_map(&ip->i_inode, lblock, &bh_map, 0);
 	if (error)
 		return error;
 	if (!bh_map.b_blocknr) {
diff -puN fs/gfs2/rgrp.c~git-gfs2-nmw fs/gfs2/rgrp.c
--- a/fs/gfs2/rgrp.c~git-gfs2-nmw
+++ a/fs/gfs2/rgrp.c
@@ -25,10 +25,10 @@
 #include "rgrp.h"
 #include "super.h"
 #include "trans.h"
-#include "ops_file.h"
 #include "util.h"
 #include "log.h"
 #include "inode.h"
+#include "ops_address.h"
 
 #define BFITNOENT ((u32)~0)
 #define NO_BLOCK ((u64)~0)
@@ -126,41 +126,43 @@ static unsigned char gfs2_testbit(struct
  * Return: the block number (bitmap buffer scope) that was found
  */
 
-static u32 gfs2_bitfit(struct gfs2_rgrpd *rgd, unsigned char *buffer,
-			    unsigned int buflen, u32 goal,
-			    unsigned char old_state)
+static u32 gfs2_bitfit(unsigned char *buffer, unsigned int buflen, u32 goal,
+		       unsigned char old_state)
 {
-	unsigned char *byte, *end, alloc;
+	unsigned char *byte;
 	u32 blk = goal;
-	unsigned int bit;
+	unsigned int bit, bitlong;
+	unsigned long *plong, plong55;
 
 	byte = buffer + (goal / GFS2_NBBY);
+	plong = (unsigned long *)(buffer + (goal / GFS2_NBBY));
 	bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
-	end = buffer + buflen;
-	alloc = (old_state == GFS2_BLKST_FREE) ? 0x55 : 0;
-
-	while (byte < end) {
-		/* If we're looking for a free block we can eliminate all
-		   bitmap settings with 0x55, which represents four data
-		   blocks in a row.  If we're looking for a data block, we can
-		   eliminate 0x00 which corresponds to four free blocks. */
-		if ((*byte & 0x55) == alloc) {
-			blk += (8 - bit) >> 1;
-
-			bit = 0;
-			byte++;
-
+	bitlong = bit;
+#if BITS_PER_LONG == 32
+	plong55 = 0x55555555;
+#else
+	plong55 = 0x5555555555555555;
+#endif
+	while (byte < buffer + buflen) {
+
+		if (bitlong == 0 && old_state == 0 && *plong == plong55) {
+			plong++;
+			byte += sizeof(unsigned long);
+			blk += sizeof(unsigned long) * GFS2_NBBY;
 			continue;
 		}
-
 		if (((*byte >> bit) & GFS2_BIT_MASK) == old_state)
 			return blk;
-
 		bit += GFS2_BIT_SIZE;
 		if (bit >= 8) {
 			bit = 0;
 			byte++;
 		}
+		bitlong += GFS2_BIT_SIZE;
+		if (bitlong >= sizeof(unsigned long) * 8) {
+			bitlong = 0;
+			plong++;
+		}
 
 		blk++;
 	}
@@ -817,11 +819,9 @@ void gfs2_rgrp_repolish_clones(struct gf
 
 struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip)
 {
-	struct gfs2_alloc *al = &ip->i_alloc;
-
-	/* FIXME: Should assert that the correct locks are held here... */
-	memset(al, 0, sizeof(*al));
-	return al;
+	BUG_ON(ip->i_alloc != NULL);
+	ip->i_alloc = kzalloc(sizeof(struct gfs2_alloc), GFP_KERNEL);
+	return ip->i_alloc;
 }
 
 /**
@@ -1059,26 +1059,34 @@ static struct inode *get_local_rgrp(stru
 	struct inode *inode = NULL;
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_rgrpd *rgd, *begin = NULL;
-	struct gfs2_alloc *al = &ip->i_alloc;
+	struct gfs2_alloc *al = ip->i_alloc;
 	int flags = LM_FLAG_TRY;
 	int skipped = 0;
 	int loops = 0;
-	int error;
+	int error, rg_locked;
 
 	/* Try recently successful rgrps */
 
 	rgd = recent_rgrp_first(sdp, ip->i_last_rg_alloc);
 
 	while (rgd) {
-		error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
-					   LM_FLAG_TRY, &al->al_rgd_gh);
+		rg_locked = 0;
+
+		if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) {
+			rg_locked = 1;
+			error = 0;
+		} else {
+			error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
+						   LM_FLAG_TRY, &al->al_rgd_gh);
+		}
 		switch (error) {
 		case 0:
 			if (try_rgrp_fit(rgd, al))
 				goto out;
 			if (rgd->rd_flags & GFS2_RDF_CHECK)
 				inode = try_rgrp_unlink(rgd, last_unlinked);
-			gfs2_glock_dq_uninit(&al->al_rgd_gh);
+			if (!rg_locked)
+				gfs2_glock_dq_uninit(&al->al_rgd_gh);
 			if (inode)
 				return inode;
 			rgd = recent_rgrp_next(rgd, 1);
@@ -1098,15 +1106,23 @@ static struct inode *get_local_rgrp(stru
 	begin = rgd = forward_rgrp_get(sdp);
 
 	for (;;) {
-		error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, flags,
-					  &al->al_rgd_gh);
+		rg_locked = 0;
+
+		if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) {
+			rg_locked = 1;
+			error = 0;
+		} else {
+			error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, flags,
+						   &al->al_rgd_gh);
+		}
 		switch (error) {
 		case 0:
 			if (try_rgrp_fit(rgd, al))
 				goto out;
 			if (rgd->rd_flags & GFS2_RDF_CHECK)
 				inode = try_rgrp_unlink(rgd, last_unlinked);
-			gfs2_glock_dq_uninit(&al->al_rgd_gh);
+			if (!rg_locked)
+				gfs2_glock_dq_uninit(&al->al_rgd_gh);
 			if (inode)
 				return inode;
 			break;
@@ -1158,7 +1174,7 @@ out:
 int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct gfs2_alloc *al = &ip->i_alloc;
+	struct gfs2_alloc *al = ip->i_alloc;
 	struct inode *inode;
 	int error = 0;
 	u64 last_unlinked = NO_BLOCK;
@@ -1204,7 +1220,7 @@ try_again:
 void gfs2_inplace_release(struct gfs2_inode *ip)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct gfs2_alloc *al = &ip->i_alloc;
+	struct gfs2_alloc *al = ip->i_alloc;
 
 	if (gfs2_assert_warn(sdp, al->al_alloced <= al->al_requested) == -1)
 		fs_warn(sdp, "al_alloced = %u, al_requested = %u "
@@ -1213,7 +1229,8 @@ void gfs2_inplace_release(struct gfs2_in
 			     al->al_line);
 
 	al->al_rgd = NULL;
-	gfs2_glock_dq_uninit(&al->al_rgd_gh);
+	if (al->al_rgd_gh.gh_gl)
+		gfs2_glock_dq_uninit(&al->al_rgd_gh);
 	if (ip != GFS2_I(sdp->sd_rindex))
 		gfs2_glock_dq_uninit(&al->al_ri_gh);
 }
@@ -1301,11 +1318,10 @@ static u32 rgblk_search(struct gfs2_rgrp
 		/* The GFS2_BLKST_UNLINKED state doesn't apply to the clone
 		   bitmaps, so we must search the originals for that. */
 		if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone)
-			blk = gfs2_bitfit(rgd, bi->bi_clone + bi->bi_offset,
+			blk = gfs2_bitfit(bi->bi_clone + bi->bi_offset,
 					  bi->bi_len, goal, old_state);
 		else
-			blk = gfs2_bitfit(rgd,
-					  bi->bi_bh->b_data + bi->bi_offset,
+			blk = gfs2_bitfit(bi->bi_bh->b_data + bi->bi_offset,
 					  bi->bi_len, goal, old_state);
 		if (blk != BFITNOENT)
 			break;
@@ -1394,7 +1410,7 @@ static struct gfs2_rgrpd *rgblk_free(str
 u64 gfs2_alloc_data(struct gfs2_inode *ip)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct gfs2_alloc *al = &ip->i_alloc;
+	struct gfs2_alloc *al = ip->i_alloc;
 	struct gfs2_rgrpd *rgd = al->al_rgd;
 	u32 goal, blk;
 	u64 block;
@@ -1439,7 +1455,7 @@ u64 gfs2_alloc_data(struct gfs2_inode *i
 u64 gfs2_alloc_meta(struct gfs2_inode *ip)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-	struct gfs2_alloc *al = &ip->i_alloc;
+	struct gfs2_alloc *al = ip->i_alloc;
 	struct gfs2_rgrpd *rgd = al->al_rgd;
 	u32 goal, blk;
 	u64 block;
@@ -1485,7 +1501,7 @@ u64 gfs2_alloc_meta(struct gfs2_inode *i
 u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
-	struct gfs2_alloc *al = &dip->i_alloc;
+	struct gfs2_alloc *al = dip->i_alloc;
 	struct gfs2_rgrpd *rgd = al->al_rgd;
 	u32 blk;
 	u64 block;
diff -puN fs/gfs2/rgrp.h~git-gfs2-nmw fs/gfs2/rgrp.h
--- a/fs/gfs2/rgrp.h~git-gfs2-nmw
+++ a/fs/gfs2/rgrp.h
@@ -32,7 +32,9 @@ void gfs2_rgrp_repolish_clones(struct gf
 struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
 static inline void gfs2_alloc_put(struct gfs2_inode *ip)
 {
-	return; /* So we can see where ip->i_alloc is used */
+	BUG_ON(ip->i_alloc == NULL);
+	kfree(ip->i_alloc);
+	ip->i_alloc = NULL;
 }
 
 int gfs2_inplace_reserve_i(struct gfs2_inode *ip,
diff -puN fs/gfs2/super.c~git-gfs2-nmw fs/gfs2/super.c
--- a/fs/gfs2/super.c~git-gfs2-nmw
+++ a/fs/gfs2/super.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
  *
  * This copyrighted material is made available to anyone wishing to use,
  * modify, copy, or redistribute it subject to the terms and conditions
@@ -51,15 +51,9 @@ void gfs2_tune_init(struct gfs2_tune *gt
 {
 	spin_lock_init(&gt->gt_spin);
 
-	gt->gt_ilimit = 100;
-	gt->gt_ilimit_tries = 3;
-	gt->gt_ilimit_min = 1;
 	gt->gt_demote_secs = 300;
-	gt->gt_incore_log_blocks = 1024;
-	gt->gt_log_flush_secs = 60;
-	gt->gt_jindex_refresh_secs = 60;
 	gt->gt_recoverd_secs = 60;
-	gt->gt_logd_secs = 1;
+	gt->gt_logd_secs = 30;
 	gt->gt_quotad_secs = 5;
 	gt->gt_quota_simul_sync = 64;
 	gt->gt_quota_warn_period = 10;
@@ -71,10 +65,8 @@ void gfs2_tune_init(struct gfs2_tune *gt
 	gt->gt_new_files_jdata = 0;
 	gt->gt_new_files_directio = 0;
 	gt->gt_max_readahead = 1 << 18;
-	gt->gt_lockdump_size = 131072;
 	gt->gt_stall_secs = 600;
 	gt->gt_complain_secs = 10;
-	gt->gt_reclaim_limit = 5000;
 	gt->gt_statfs_quantum = 30;
 	gt->gt_statfs_slow = 0;
 }
@@ -393,6 +385,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sd
 		if (!jd)
 			break;
 
+		INIT_LIST_HEAD(&jd->extent_list);
 		jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1, NULL);
 		if (!jd->jd_inode || IS_ERR(jd->jd_inode)) {
 			if (!jd->jd_inode)
@@ -422,8 +415,9 @@ int gfs2_jindex_hold(struct gfs2_sbd *sd
 
 void gfs2_jindex_free(struct gfs2_sbd *sdp)
 {
-	struct list_head list;
+	struct list_head list, *head;
 	struct gfs2_jdesc *jd;
+	struct gfs2_journal_extent *jext;
 
 	spin_lock(&sdp->sd_jindex_spin);
 	list_add(&list, &sdp->sd_jindex_list);
@@ -433,6 +427,14 @@ void gfs2_jindex_free(struct gfs2_sbd *s
 
 	while (!list_empty(&list)) {
 		jd = list_entry(list.next, struct gfs2_jdesc, jd_list);
+		head = &jd->extent_list;
+		while (!list_empty(head)) {
+			jext = list_entry(head->next,
+					  struct gfs2_journal_extent,
+					  extent_list);
+			list_del(&jext->extent_list);
+			kfree(jext);
+		}
 		list_del(&jd->jd_list);
 		iput(jd->jd_inode);
 		kfree(jd);
@@ -543,7 +545,6 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp
 	if (error)
 		return error;
 
-	gfs2_meta_cache_flush(ip);
 	j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
 
 	error = gfs2_find_jhead(sdp->sd_jdesc, &head);
@@ -686,9 +687,7 @@ void gfs2_statfs_change(struct gfs2_sbd 
 	if (error)
 		return;
 
-	mutex_lock(&sdp->sd_statfs_mutex);
 	gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1);
-	mutex_unlock(&sdp->sd_statfs_mutex);
 
 	spin_lock(&sdp->sd_statfs_spin);
 	l_sc->sc_total += total;
@@ -736,9 +735,7 @@ int gfs2_statfs_sync(struct gfs2_sbd *sd
 	if (error)
 		goto out_bh2;
 
-	mutex_lock(&sdp->sd_statfs_mutex);
 	gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1);
-	mutex_unlock(&sdp->sd_statfs_mutex);
 
 	spin_lock(&sdp->sd_statfs_spin);
 	m_sc->sc_total += l_sc->sc_total;
diff -puN fs/gfs2/sys.c~git-gfs2-nmw fs/gfs2/sys.c
--- a/fs/gfs2/sys.c~git-gfs2-nmw
+++ a/fs/gfs2/sys.c
@@ -32,7 +32,8 @@ spinlock_t gfs2_sys_margs_lock;
 
 static ssize_t id_show(struct gfs2_sbd *sdp, char *buf)
 {
-	return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_vfs->s_id);
+	return snprintf(buf, PAGE_SIZE, "%u:%u\n",
+			MAJOR(sdp->sd_vfs->s_dev), MINOR(sdp->sd_vfs->s_dev));
 }
 
 static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf)
@@ -423,15 +424,11 @@ static ssize_t name##_store(struct gfs2_
 TUNE_ATTR_2(name, name##_store)
 
 TUNE_ATTR(demote_secs, 0);
-TUNE_ATTR(incore_log_blocks, 0);
-TUNE_ATTR(log_flush_secs, 0);
-TUNE_ATTR(jindex_refresh_secs, 0);
 TUNE_ATTR(quota_warn_period, 0);
 TUNE_ATTR(quota_quantum, 0);
 TUNE_ATTR(atime_quantum, 0);
 TUNE_ATTR(max_readahead, 0);
 TUNE_ATTR(complain_secs, 0);
-TUNE_ATTR(reclaim_limit, 0);
 TUNE_ATTR(statfs_slow, 0);
 TUNE_ATTR(new_files_jdata, 0);
 TUNE_ATTR(new_files_directio, 0);
@@ -446,15 +443,11 @@ TUNE_ATTR_3(quota_scale, quota_scale_sho
 
 static struct attribute *tune_attrs[] = {
 	&tune_attr_demote_secs.attr,
-	&tune_attr_incore_log_blocks.attr,
-	&tune_attr_log_flush_secs.attr,
-	&tune_attr_jindex_refresh_secs.attr,
 	&tune_attr_quota_warn_period.attr,
 	&tune_attr_quota_quantum.attr,
 	&tune_attr_atime_quantum.attr,
 	&tune_attr_max_readahead.attr,
 	&tune_attr_complain_secs.attr,
-	&tune_attr_reclaim_limit.attr,
 	&tune_attr_statfs_slow.attr,
 	&tune_attr_quota_simul_sync.attr,
 	&tune_attr_quota_cache_secs.attr,
diff -puN fs/gfs2/trans.c~git-gfs2-nmw fs/gfs2/trans.c
--- a/fs/gfs2/trans.c~git-gfs2-nmw
+++ a/fs/gfs2/trans.c
@@ -79,6 +79,22 @@ fail_holder_uninit:
 	return error;
 }
 
+/**
+ * gfs2_log_release - Release a given number of log blocks
+ * @sdp: The GFS2 superblock
+ * @blks: The number of blocks
+ *
+ */
+
+static void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
+{
+
+	atomic_add(blks, &sdp->sd_log_blks_free);
+	gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
+				  sdp->sd_jdesc->jd_blocks);
+	up_read(&sdp->sd_log_flush_lock);
+}
+
 void gfs2_trans_end(struct gfs2_sbd *sdp)
 {
 	struct gfs2_trans *tr = current->journal_info;
@@ -114,11 +130,6 @@ void gfs2_trans_end(struct gfs2_sbd *sdp
 		gfs2_log_flush(sdp, NULL);
 }
 
-void gfs2_trans_add_gl(struct gfs2_glock *gl)
-{
-	lops_add(gl->gl_sbd, &gl->gl_le);
-}
-
 /**
  * gfs2_trans_add_bh - Add a to-be-modified buffer to the current transaction
  * @gl: the glock the buffer belongs to
diff -puN fs/gfs2/trans.h~git-gfs2-nmw fs/gfs2/trans.h
--- a/fs/gfs2/trans.h~git-gfs2-nmw
+++ a/fs/gfs2/trans.h
@@ -30,7 +30,6 @@ int gfs2_trans_begin(struct gfs2_sbd *sd
 
 void gfs2_trans_end(struct gfs2_sbd *sdp);
 
-void gfs2_trans_add_gl(struct gfs2_glock *gl);
 void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta);
 void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
 void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno);
_