GIT 8f30cf58c8d2fecf58b2d054c26b71d4536025b8 git://git.linux-nfs.org/~bfields/linux.git#server-cluster-locking-api

commit 
Author: Marc Eshel <eshel@almaden.ibm.com>
Date:   Tue Nov 14 16:37:25 2006 -0500

    gfs2: nfs lock support for gfs2
    
    Add NFS lock support to GFS2.
    
    Signed-off-by: Marc Eshel <eshel@almaden.ibm.com>
    Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
    Acked-by: Steven Whitehouse <swhiteho@redhat.com>

commit f0cf6fcc524bd4036cc85c2d6ecdd245a5702280
Author: Marc Eshel <eshel@almaden.ibm.com>
Date:   Tue Nov 28 16:27:06 2006 -0500

    lockd: add code to handle deferred lock requests
    
    Rewrite nlmsvc_lock() to use the asynchronous interface.
    
    As with testlock, we answer nlm requests in nlmsvc_lock by first looking up
    the block and then using the results we find in the block if B_QUEUED is
    set, and calling vfs_lock_file() otherwise.
    
    If this a new lock request and we get -EINPROGRESS return on a non-blocking
    request then we defer the request.
    
    Also modify nlmsvc_unlock() to call the filesystem method if appropriate.
    
    Signed-off-by: Marc Eshel <eshel@almaden.ibm.com>
    Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

commit ea922bdcc6914c20eb4e9256fae88fe49955f0a9
Author: Marc Eshel <eshel@almaden.ibm.com>
Date:   Tue Dec 5 23:48:10 2006 -0500

    lockd: always preallocate block in nlmsvc_lock()
    
    Normally we could skip ever having to allocate a block in the case where
    the client asks for a non-blocking lock, or asks for a blocking lock that
    succeeds immediately.
    
    However we're going to want to always look up a block first in order to
    check whether we're revisiting a deferred lock call, and to be prepared to
    handle the case where the filesystem returns -EINPROGRESS--in that case we
    want to make sure the lock we've given the filesystem is the one embedded
    in the block that we'll use to track the deferred request.
    
    Signed-off-by: Marc Eshel <eshel@almaden.ibm.com>
    Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

commit 57395b18b93b19f64c4716bd24afd13cb60cb0c4
Author: Marc Eshel <eshel@almaden.ibm.com>
Date:   Tue Nov 28 16:27:06 2006 -0500

    lockd: handle test_lock deferrals
    
    Rewrite nlmsvc_testlock() to use the new asynchronous interface: instead of
    immediately doing a posix_test_lock(), we first look for a matching block.
    If the subsequent test_lock returns anything other than -EINPROGRESS, we
    then remove the block we've found and return the results.
    
    If it returns -EINPROGRESS, then we defer the lock request.
    
    In the case where the block we find in the first step has B_QUEUED set,
    we bypass the vfs_test_lock entirely, instead using the block to decide how
    to respond:
    	with nlm_lck_denied if B_TOO_LATE is set.
    	with nlm_granted if B_GOT_CALLBACK is set.
    	by dropping if neither B_TOO_LATE nor B_GOT_CALLBACK is set
    
    Signed-off-by: Marc Eshel <eshel@almaden.ibm.com>
    Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

commit bf8158db5b0a1e16d2d179c1b26d7f2ec74ba86f
Author: Marc Eshel <eshel@almaden.ibm.com>
Date:   Tue Nov 28 16:27:06 2006 -0500

    lockd: pass cookie in nlmsvc_testlock
    
    Change NLM internal interface to pass more information for test lock; we
    need this to make sure the cookie information is pushed down to the place
    where we do request deferral, which is handled for testlock by the
    following patch.
    
    Signed-off-by: Marc Eshel <eshel@almaden.ibm.com>
    Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

commit c3d57abb1cacd1323ffd2548637bf63a8149ee6e
Author: Marc Eshel <eshel@almaden.ibm.com>
Date:   Tue Nov 28 16:26:51 2006 -0500

    lockd: handle fl_grant callbacks
    
    Add code to handle file system callback when the lock is finally granted.
    
    Signed-off-by: Marc Eshel <eshel@almaden.ibm.com>
    Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

commit a960fea508492c03d4723eb0114d7be35fb0a38f
Author: Marc Eshel <eshel@almaden.ibm.com>
Date:   Tue Nov 28 16:26:47 2006 -0500

    lockd: save lock state on deferral
    
    We need to keep some state for a pending asynchronous lock request, so this
    patch adds that state to struct nlm_block.
    
    This also adds a function which defers the request, by calling
    rqstp->rq_chandle.defer and storing the resulting deferred request in a
    nlm_block structure which we insert into lockd's global block list.  That
    new function isn't called yet, so it's dead code until a later patch.
    
    Signed-off-by: Marc Eshel <eshel@almaden.ibm.com>
    Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

commit 91ef10df6eed6c05cc31a459213d8f80a14139ce
Author: Marc Eshel <eshel@almaden.ibm.com>
Date:   Tue Dec 5 23:31:28 2006 -0500

    locks: add fl_grant callback for asynchronous lock return
    
    Acquiring a lock on a cluster filesystem may require communication with
    remote hosts, and to avoid blocking lockd or nfsd threads during such
    communication, we allow the results to be returned asynchronously.
    
    When a ->lock() call needs to block, the file system will return
    -EINPROGRESS, and then later return the results with a call to the
    routine in the fl_grant field of the lock_manager_operations struct.
    
    This differs from the case when ->lock returns -EAGAIN to a blocking
    lock request; in that case, the filesystem calls fl_notify when the lock
    is granted, and the caller retries the original lock.  So while
    fl_notify is merely a hint to the caller that it should retry, fl_grant
    actually communicates the final result of the lock operation (with the
    lock already acquired in the succesful case).
    
    Therefore fl_grant takes a lock, a status and, for the test lock case, a
    conflicting lock.  We also allow fl_grant to return an error to the
    filesystem, to handle the case where the fl_grant requests arrives after
    the lock manager has already given up waiting for it.
    
    Signed-off-by: Marc Eshel <eshel@almaden.ibm.com>
    Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

commit 6942fc15484de7bbb73e658f642cb2c00dae0196
Author: Marc Eshel <eshel@almaden.ibm.com>
Date:   Tue Nov 28 16:26:41 2006 -0500

    nfsd4: Convert NFSv4 to new lock interface
    
    Convert NFSv4 to the new lock interface.  We don't define any callback for now,
    so we're not taking advantage of the asynchronous feature--that's less critical
    for the multi-threaded nfsd then it is for the single-threaded lockd.  But this
    does allow a cluster filesystems to export cluster-coherent locking to NFS.
    
    Note that it's cluster filesystems that are the issue--of the filesystems that
    define lock methods (nfs, cifs, etc.), most are not exportable by nfsd.
    
    Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>

commit 44d0a0840bad1ada86cd967304939610ca272dca
Author: Marc Eshel <eshel@almaden.ibm.com>
Date:   Thu Jan 18 17:52:58 2007 -0500

    locks: add lock cancel command
    
    Lock managers need to be able to cancel pending lock requests.  In the case
    where the exported filesystem manages its own locks, it's not sufficient just
    to call posix_unblock_lock(); we need to let the filesystem know what's
    happening too.
    
    We do this by adding a new fcntl lock command: FL_CANCELLK.  Some day this
    might also be made available to userspace applications that could benefit from
    an asynchronous locking api.
    
    Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>

commit 2dfd4535d2f0395527613b9df516d8a7decf54f2
Author: Marc Eshel <eshel@almaden.ibm.com>
Date:   Thu Jan 18 16:15:35 2007 -0500

    locks: allow {vfs,posix}_lock_file to return conflicting lock
    
    The nfsv4 protocol's lock operation, in the case of a conflict, returns
    information about the conflicting lock.
    
    It's unclear how clients can use this, so for now we're not going so far as to
    add a filesystem method that can return a conflicting lock, but we may as well
    return something in the local case when it's easy to.
    
    Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>

commit a3fd0ba0791f1d4356ea30f702854fe257e64300
Author: Marc Eshel <eshel@almaden.ibm.com>
Date:   Thu Jan 18 15:08:55 2007 -0500

    locks: factor out generic/filesystem switch from setlock code
    
    Factor out the code that switches between generic and filesystem-specific lock
    methods; eventually we want to call this from lock managers (lockd and nfsd)
    too; currently they only call the generic methods.
    
    This patch does that for all the setlk code.
    
    Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>

commit 56cc3151430e490bb7155e3eee40eb24ed53263f
Author: J. Bruce Fields <bfields@citi.umich.edu>
Date:   Wed Feb 21 00:58:50 2007 -0500

    locks: factor out generic/filesystem switch from test_lock
    
    Factor out the code that switches between generic and filesystem-specific lock
    methods; eventually we want to call this from lock managers (lockd and nfsd)
    too; currently they only call the generic methods.
    
    This patch does that for test_lock.
    
    Note that this hasn't been necessary until recently, because the few
    filesystems that define ->lock() (nfs, cifs...) aren't exportable via NFS.
    However GFS (and, in the future, other cluster filesystems) need to implement
    their own locking to get cluster-coherent locking, and also want to be able to
    export locking to NFS (lockd and NFSv4).
    
    So we accomplish this by factoring out code such as this and exporting it for
    the use of lockd and nfsd.
    
    Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>

commit 0901422274e20227da2e99665b0b7c5b92585321
Author: Marc Eshel <eshel@almaden.ibm.com>
Date:   Wed Feb 21 00:55:18 2007 -0500

    locks: give posix_test_lock same interface as ->lock
    
    posix_test_lock() and ->lock() do the same job but have gratuitously
    different interfaces.  Modify posix_test_lock() so the two agree,
    simplifying some code in the process.
    
    Signed-off-by: Marc Eshel <eshel@almaden.ibm.com>
    Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>

commit 0723ed5f60a7274b9abfb699b88b420650146a2e
Author: J. Bruce Fields <bfields@citi.umich.edu>
Date:   Thu Feb 22 18:48:53 2007 -0500

    locks: make ->lock release private data before returning in GETLK case
    
    The file_lock argument to ->lock is used to return the conflicting lock
    when found.  There's no reason for the filesystem to return any private
    information with this conflicting lock, but nfsv4 is.
    
    Fix nfsv4 client, and modify locks.c to stop calling fl_release_private
    for it in this case.
    
    Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>

commit c2fa1b8a6c059dd08a802545fed3badc8df2adc1
Author: J. Bruce Fields <bfields@citi.umich.edu>
Date:   Tue Feb 20 16:10:11 2007 -0500

    locks: create posix-to-flock helper functions
    
    Factor out a bit of messy code by creating posix-to-flock counterparts
    to the existing flock-to-posix helper functions.
    
    Cc: Christoph Hellwig <hch@infradead.org>
    Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>

commit 226a998dbf3c6f9b85f67d08a52c5a2143ed9d88
Author: J. Bruce Fields <bfields@citi.umich.edu>
Date:   Wed Feb 14 14:25:00 2007 -0500

    locks: trivial removal of unnecessary parentheses
    
    Remove some unnecessary parentheses.
    
    Signed-off-by: "J. Bruce Fields" <bfields@citi.umich.edu>
 fs/fuse/file.c                |    3 
 fs/gfs2/locking/dlm/plock.c   |  104 ++++++++++++++--
 fs/gfs2/locking/nolock/main.c |    8 -
 fs/gfs2/ops_file.c            |   12 +-
 fs/lockd/svc4proc.c           |    6 +
 fs/lockd/svclock.c            |  270 ++++++++++++++++++++++++++++++++++-------
 fs/lockd/svcproc.c            |    7 +
 fs/lockd/svcsubs.c            |    2 
 fs/locks.c                    |  259 +++++++++++++++++++++++----------------
 fs/nfs/file.c                 |    7 -
 fs/nfs/nfs4proc.c             |    2 
 fs/nfsd/nfs4state.c           |   30 +++--
 include/linux/fcntl.h         |    4 +
 include/linux/fs.h            |    9 +
 include/linux/lockd/lockd.h   |   14 ++
 15 files changed, 532 insertions(+), 205 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 2fd0692..acfad65 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -738,8 +738,7 @@ static int fuse_file_lock(struct file *f
 
 	if (cmd == F_GETLK) {
 		if (fc->no_lock) {
-			if (!posix_test_lock(file, fl, fl))
-				fl->fl_type = F_UNLCK;
+			posix_test_lock(file, fl);
 			err = 0;
 		} else
 			err = fuse_getlk(file, fl);
diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c
index 1dd4215..5207e4e 100644
--- a/fs/gfs2/locking/dlm/plock.c
+++ b/fs/gfs2/locking/dlm/plock.c
@@ -25,6 +25,14 @@ struct plock_op {
 	struct gdlm_plock_info info;
 };
 
+struct plock_xop {
+	struct plock_op xop;
+	void *callback;
+	void *fl;
+	void *file;
+};
+
+
 static inline void set_version(struct gdlm_plock_info *info)
 {
 	info->version[0] = GDLM_PLOCK_VERSION_MAJOR;
@@ -64,12 +72,14 @@ int gdlm_plock(void *lockspace, struct l
 {
 	struct gdlm_ls *ls = lockspace;
 	struct plock_op *op;
+	struct plock_xop *xop;
 	int rv;
 
-	op = kzalloc(sizeof(*op), GFP_KERNEL);
-	if (!op)
+	xop = kzalloc(sizeof(*xop), GFP_KERNEL);
+	if (!xop)
 		return -ENOMEM;
 
+	op = &xop->xop;
 	op->info.optype		= GDLM_PLOCK_OP_LOCK;
 	op->info.pid		= fl->fl_pid;
 	op->info.ex		= (fl->fl_type == F_WRLCK);
@@ -79,9 +89,20 @@ int gdlm_plock(void *lockspace, struct l
 	op->info.start		= fl->fl_start;
 	op->info.end		= fl->fl_end;
 	op->info.owner		= (__u64)(long) fl->fl_owner;
+	if (fl->fl_lmops && fl->fl_lmops->fl_grant) {
+		xop->callback	= fl->fl_lmops->fl_grant;
+		/* might need to make a copy */
+		xop->fl		= fl;
+		xop->file	= file;
+	} else
+		xop->callback	= NULL;
 
 	send_op(op);
-	wait_event(recv_wq, (op->done != 0));
+
+	if (xop->callback == NULL)
+		wait_event(recv_wq, (op->done != 0));
+	else
+		return -EINPROGRESS;
 
 	spin_lock(&ops_lock);
 	if (!list_empty(&op->list)) {
@@ -99,7 +120,60 @@ int gdlm_plock(void *lockspace, struct l
 				  (unsigned long long)name->ln_number);
 	}
 
-	kfree(op);
+	kfree(xop);
+	return rv;
+}
+
+/* Returns failure iff a succesful lock operation should be canceled */
+static int gdlm_plock_callback(struct plock_op *op)
+{
+	struct file *file;
+	struct file_lock *fl;
+	int (*notify)(void *, void *, int) = NULL;
+	struct plock_xop *xop = (struct plock_xop *)op;
+	int rv = 0;
+
+	spin_lock(&ops_lock);
+	if (!list_empty(&op->list)) {
+		printk(KERN_INFO "plock op on list\n");
+		list_del(&op->list);
+	}
+	spin_unlock(&ops_lock);
+
+	/* check if the following 2 are still valid or make a copy */
+	file = xop->file;
+	fl = xop->fl;
+	notify = xop->callback;
+
+	if (op->info.rv) {
+		notify(fl, NULL, op->info.rv);
+		goto out;
+	}
+
+	/* got fs lock; bookkeep locally as well: */
+	if (posix_lock_file(file, fl, NULL)) {
+		/*
+		 * This can only happen in the case of kmalloc() failure.
+		 * The filesystem's own lock is the authoritative lock,
+		 * so a failure to get the lock locally is not a disaster.
+		 * As long as GFS cannot reliably cancel locks (especially
+		 * in a low-memory situation), we're better off ignoring
+		 * this failure than trying to recover.
+		 */
+		log_error("gdlm_plock: vfs lock error file %p fl %p",
+				file, fl);
+	}
+
+	rv = notify(fl, NULL, 0);
+	if (rv) {
+		/* XXX: We need to cancel the fs lock here: */
+		printk("gfs2 lock granted after lock request failed;"
+						" dangling lock!\n");
+		goto out;
+	}
+
+out:
+	kfree(xop);
 	return rv;
 }
 
@@ -138,6 +212,9 @@ int gdlm_punlock(void *lockspace, struct
 
 	rv = op->info.rv;
 
+	if (rv == -ENOENT)
+		rv = 0;
+
 	kfree(op);
 	return rv;
 }
@@ -161,6 +238,7 @@ int gdlm_plock_get(void *lockspace, stru
 	op->info.start		= fl->fl_start;
 	op->info.end		= fl->fl_end;
 
+
 	send_op(op);
 	wait_event(recv_wq, (op->done != 0));
 
@@ -173,9 +251,10 @@ int gdlm_plock_get(void *lockspace, stru
 
 	rv = op->info.rv;
 
-	if (rv == 0)
-		fl->fl_type = F_UNLCK;
-	else if (rv > 0) {
+	fl->fl_type = F_UNLCK;
+	if (rv == -ENOENT)
+		rv = 0;
+	else if (rv == 0 && op->info.pid != fl->fl_pid) {
 		fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
 		fl->fl_pid = op->info.pid;
 		fl->fl_start = op->info.start;
@@ -243,9 +322,14 @@ static ssize_t dev_write(struct file *fi
 	}
 	spin_unlock(&ops_lock);
 
-	if (found)
-		wake_up(&recv_wq);
-	else
+	if (found) {
+		struct plock_xop *xop;
+		xop = (struct plock_xop *)op;
+		if (xop->callback)
+			count = gdlm_plock_callback(op);
+		else
+			wake_up(&recv_wq);
+	} else
 		printk(KERN_INFO "gdlm dev_write no op %x %llx\n", info.fsid,
 			(unsigned long long)info.number);
 	return count;
diff --git a/fs/gfs2/locking/nolock/main.c b/fs/gfs2/locking/nolock/main.c
index acfbc94..5cc1dfa 100644
--- a/fs/gfs2/locking/nolock/main.c
+++ b/fs/gfs2/locking/nolock/main.c
@@ -164,13 +164,7 @@ static void nolock_unhold_lvb(void *lock
 static int nolock_plock_get(void *lockspace, struct lm_lockname *name,
 			    struct file *file, struct file_lock *fl)
 {
-	struct file_lock tmp;
-	int ret;
-
-	ret = posix_test_lock(file, fl, &tmp);
-	fl->fl_type = F_UNLCK;
-	if (ret)
-		memcpy(fl, &tmp, sizeof(struct file_lock));
+	posix_test_lock(file, fl);
 
 	return 0;
 }
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index b50180e..329c4dc 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -513,18 +513,18 @@ static int gfs2_lock(struct file *file, 
 
 	if (sdp->sd_args.ar_localflocks) {
 		if (IS_GETLK(cmd)) {
-			struct file_lock tmp;
-			int ret;
-			ret = posix_test_lock(file, fl, &tmp);
-			fl->fl_type = F_UNLCK;
-			if (ret)
-				memcpy(fl, &tmp, sizeof(struct file_lock));
+			posix_test_lock(file, fl);
 			return 0;
 		} else {
 			return posix_lock_file_wait(file, fl);
 		}
 	}
 
+	if (cmd == F_CANCELLK) {
+		/* Hack: */
+		cmd = F_SETLK;
+		fl->fl_type = F_UNLCK;
+	}
 	if (IS_GETLK(cmd))
 		return gfs2_lm_plock_get(sdp, &name, file, fl);
 	else if (fl->fl_type == F_UNLCK)
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 47a66aa..bf27b6c 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -99,7 +99,9 @@ nlm4svc_proc_test(struct svc_rqst *rqstp
 		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
 	/* Now check for conflicting locks */
-	resp->status = nlmsvc_testlock(file, &argp->lock, &resp->lock);
+	resp->status = nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie);
+	if (resp->status == nlm_drop_reply)
+		return rpc_drop_reply;
 
 	dprintk("lockd: TEST4          status %d\n", ntohl(resp->status));
 	nlm_release_host(host);
@@ -143,6 +145,8 @@ #endif
 	/* Now try to lock the file */
 	resp->status = nlmsvc_lock(rqstp, file, &argp->lock,
 					argp->block, &argp->cookie);
+	if (resp->status == nlm_drop_reply)
+		return rpc_drop_reply;
 
 	dprintk("lockd: LOCK          status %d\n", ntohl(resp->status));
 	nlm_release_host(host);
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index cf51f84..4606651 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -173,7 +173,7 @@ found:
  */
 static inline struct nlm_block *
 nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
-				struct nlm_lock *lock, struct nlm_cookie *cookie)
+		struct nlm_lock *lock, struct nlm_cookie *cookie, int conf)
 {
 	struct nlm_block	*block;
 	struct nlm_host		*host;
@@ -206,6 +206,11 @@ nlmsvc_create_block(struct svc_rqst *rqs
 
 	dprintk("lockd: created block %p...\n", block);
 
+	if (conf) {
+		block->b_fl = kzalloc(sizeof(struct file_lock), GFP_KERNEL);
+		if (!block->b_fl)
+			goto failed_free;
+	}
 	/* Create and initialize the block */
 	block->b_daemon = rqstp->rq_server;
 	block->b_host   = host;
@@ -261,6 +266,7 @@ static void nlmsvc_free_block(struct kre
 	nlmsvc_freegrantargs(block->b_call);
 	nlm_release_call(block->b_call);
 	nlm_release_file(block->b_file);
+	kfree(block->b_fl);
 	kfree(block);
 }
 
@@ -331,6 +337,31 @@ static void nlmsvc_freegrantargs(struct 
 }
 
 /*
+ * Deferred lock request handling for non-blocking lock
+ */
+static u32
+nlmsvc_defer_lock_rqst(struct svc_rqst *rqstp, struct nlm_block *block)
+{
+	u32 status = nlm_lck_denied_nolocks;
+
+	block->b_flags |= B_QUEUED;
+
+	nlmsvc_insert_block(block, NLM_TIMEOUT);
+
+	block->b_cache_req = &rqstp->rq_chandle;
+	if (rqstp->rq_chandle.defer) {
+		block->b_deferred_req =
+			rqstp->rq_chandle.defer(block->b_cache_req);
+		if (block->b_deferred_req != NULL)
+			status = nlm_drop_reply;
+	}
+	dprintk("lockd: nlmsvc_defer_lock_rqst block %p flags %d status %d\n",
+		block, block->b_flags, status);
+
+	return status;
+}
+
+/*
  * Attempt to establish a lock, and if it can't be granted, block it
  * if required.
  */
@@ -338,7 +369,7 @@ __be32
 nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 			struct nlm_lock *lock, int wait, struct nlm_cookie *cookie)
 {
-	struct nlm_block	*block, *newblock = NULL;
+	struct nlm_block	*block = NULL;
 	int			error;
 	__be32			ret;
 
@@ -351,29 +382,58 @@ nlmsvc_lock(struct svc_rqst *rqstp, stru
 				wait);
 
 
-	lock->fl.fl_flags &= ~FL_SLEEP;
-again:
 	/* Lock file against concurrent access */
 	mutex_lock(&file->f_mutex);
-	/* Get existing block (in case client is busy-waiting) */
+	/* Get existing block (in case client is busy-waiting)
+	 * or create new block
+	 */
 	block = nlmsvc_lookup_block(file, lock);
 	if (block == NULL) {
-		if (newblock != NULL)
-			lock = &newblock->b_call->a_args.lock;
-	} else
+		block = nlmsvc_create_block(rqstp, file, lock, cookie, 0);
+		ret = nlm_lck_denied_nolocks;
+		if (block == NULL)
+			goto out;
 		lock = &block->b_call->a_args.lock;
+	} else
+		lock->fl.fl_flags &= ~FL_SLEEP;
 
-	error = posix_lock_file(file->f_file, &lock->fl);
-	lock->fl.fl_flags &= ~FL_SLEEP;
+	if (block->b_flags & B_QUEUED) {
+		dprintk("lockd: nlmsvc_lock deferred block %p flags %d\n",
+							block, block->b_flags);
+		if (block->b_granted) {
+			nlmsvc_unlink_block(block);
+			ret = nlm_granted;
+			goto out;
+		}
+		if (block->b_flags & B_TOO_LATE) {
+			nlmsvc_unlink_block(block);
+			ret = nlm_lck_denied;
+			goto out;
+		}
+		ret = nlm_drop_reply;
+		goto out;
+	}
 
-	dprintk("lockd: posix_lock_file returned %d\n", error);
+	if (!wait)
+		lock->fl.fl_flags &= ~FL_SLEEP;
+	error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
+	lock->fl.fl_flags &= ~FL_SLEEP;
 
+	dprintk("lockd: vfs_lock_file returned %d\n", error);
 	switch(error) {
 		case 0:
 			ret = nlm_granted;
 			goto out;
 		case -EAGAIN:
+			ret = nlm_lck_denied;
 			break;
+		case -EINPROGRESS:
+			if (wait)
+				break;
+			/* Filesystem lock operation is in progress
+			   Add it to the queue waiting for callback */
+			ret = nlmsvc_defer_lock_rqst(rqstp, block);
+			goto out;
 		case -EDEADLK:
 			ret = nlm_deadlock;
 			goto out;
@@ -387,26 +447,11 @@ again:
 		goto out;
 
 	ret = nlm_lck_blocked;
-	if (block != NULL)
-		goto out;
-
-	/* If we don't have a block, create and initialize it. Then
-	 * retry because we may have slept in kmalloc. */
-	/* We have to release f_mutex as nlmsvc_create_block may try to
-	 * to claim it while doing host garbage collection */
-	if (newblock == NULL) {
-		mutex_unlock(&file->f_mutex);
-		dprintk("lockd: blocking on this lock (allocating).\n");
-		if (!(newblock = nlmsvc_create_block(rqstp, file, lock, cookie)))
-			return nlm_lck_denied_nolocks;
-		goto again;
-	}
 
 	/* Append to list of blocked */
-	nlmsvc_insert_block(newblock, NLM_NEVER);
+	nlmsvc_insert_block(block, NLM_NEVER);
 out:
 	mutex_unlock(&file->f_mutex);
-	nlmsvc_release_block(newblock);
 	nlmsvc_release_block(block);
 	dprintk("lockd: nlmsvc_lock returned %u\n", ret);
 	return ret;
@@ -416,9 +461,14 @@ out:
  * Test for presence of a conflicting lock.
  */
 __be32
-nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
-				       struct nlm_lock *conflock)
+nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
+		struct nlm_lock *lock, struct nlm_lock *conflock,
+		struct nlm_cookie *cookie)
 {
+	struct nlm_block 	*block = NULL;
+	int			error;
+	__be32			ret;
+
 	dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
 				file->f_file->f_path.dentry->d_inode->i_sb->s_id,
 				file->f_file->f_path.dentry->d_inode->i_ino,
@@ -426,19 +476,63 @@ nlmsvc_testlock(struct nlm_file *file, s
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end);
 
-	if (posix_test_lock(file->f_file, &lock->fl, &conflock->fl)) {
-		dprintk("lockd: conflicting lock(ty=%d, %Ld-%Ld)\n",
-				conflock->fl.fl_type,
-				(long long)conflock->fl.fl_start,
-				(long long)conflock->fl.fl_end);
-		conflock->caller = "somehost";	/* FIXME */
-		conflock->len = strlen(conflock->caller);
-		conflock->oh.len = 0;		/* don't return OH info */
-		conflock->svid = conflock->fl.fl_pid;
-		return nlm_lck_denied;
+	/* Get existing block (in case client is busy-waiting) */
+	block = nlmsvc_lookup_block(file, lock);
+
+	if (block == NULL) {
+		block = nlmsvc_create_block(rqstp, file, lock, cookie, 1);
+		if (block == NULL)
+			return nlm_granted;
+	}
+	if (block->b_flags & B_QUEUED) {
+		dprintk("lockd: nlmsvc_testlock deferred block %p flags %d fl %p\n",
+			block, block->b_flags, block->b_fl);
+		if (block->b_flags & B_TOO_LATE) {
+			nlmsvc_unlink_block(block);
+			return nlm_lck_denied;
+		}
+		if (block->b_flags & B_GOT_CALLBACK) {
+			if (block->b_fl != NULL
+					&& block->b_fl->fl_type != F_UNLCK) {
+				lock->fl = *block->b_fl;
+				goto conf_lock;
+			}
+			else {
+				nlmsvc_unlink_block(block);
+				return nlm_granted;
+			}
+		}
+		return nlm_drop_reply;
 	}
 
-	return nlm_granted;
+	error = vfs_test_lock(file->f_file, &lock->fl);
+	if (error == -EINPROGRESS)
+		return nlmsvc_defer_lock_rqst(rqstp, block);
+	if (error) {
+		ret = nlm_lck_denied_nolocks;
+		goto out;
+	}
+	if (lock->fl.fl_type == F_UNLCK) {
+		ret = nlm_granted;
+		goto out;
+	}
+
+conf_lock:
+	dprintk("lockd: conflicting lock(ty=%d, %Ld-%Ld)\n",
+		lock->fl.fl_type, (long long)lock->fl.fl_start,
+		(long long)lock->fl.fl_end);
+	conflock->caller = "somehost";	/* FIXME */
+	conflock->len = strlen(conflock->caller);
+	conflock->oh.len = 0;		/* don't return OH info */
+	conflock->svid = lock->fl.fl_pid;
+	conflock->fl.fl_type = lock->fl.fl_type;
+	conflock->fl.fl_start = lock->fl.fl_start;
+	conflock->fl.fl_end = lock->fl.fl_end;
+	ret = nlm_lck_denied;
+out:
+	if (block)
+		nlmsvc_release_block(block);
+	return ret;
 }
 
 /*
@@ -464,7 +558,7 @@ nlmsvc_unlock(struct nlm_file *file, str
 	nlmsvc_cancel_blocked(file, lock);
 
 	lock->fl.fl_type = F_UNLCK;
-	error = posix_lock_file(file->f_file, &lock->fl);
+	error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
 
 	return (error < 0)? nlm_lck_denied_nolocks : nlm_granted;
 }
@@ -500,6 +594,63 @@ nlmsvc_cancel_blocked(struct nlm_file *f
 }
 
 /*
+ * This is a callback from the filesystem for VFS file lock requests.
+ * It will be used if fl_grant is defined and the filesystem can not
+ * respond to the request immediately.
+ * For GETLK request it will copy the reply to the nlm_block.
+ * For SETLK or SETLKW request it will get the local posix lock.
+ * In all cases it will move the block to the head of nlm_blocked q where
+ * nlmsvc_retry_blocked() can send back a reply for SETLKW or revisit the
+ * deferred rpc for GETLK and SETLK.
+ */
+static void
+nlmsvc_update_deferred_block(struct nlm_block *block, struct file_lock *conf,
+			     int result)
+{
+	block->b_flags |= B_GOT_CALLBACK;
+	if (result == 0)
+		block->b_granted = 1;
+	else
+		block->b_flags |= B_TOO_LATE;
+	if (conf) {
+		if (block->b_fl)
+			locks_copy_lock(block->b_fl, conf);
+	}
+}
+
+static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf,
+					int result)
+{
+	struct nlm_block *block;
+	int rc = -ENOENT;
+
+	lock_kernel();
+	list_for_each_entry(block, &nlm_blocked, b_list) {
+		if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) {
+			dprintk("lockd: nlmsvc_notify_blocked block %p flags %d\n",
+							block, block->b_flags);
+			if (block->b_flags & B_QUEUED) {
+				if (block->b_flags & B_TOO_LATE) {
+					rc = -ENOLCK;
+					break;
+				}
+				nlmsvc_update_deferred_block(block, conf, result);
+			} else if (result == 0)
+				block->b_granted = 1;
+
+			nlmsvc_insert_block(block, 0);
+			svc_wake_up(block->b_daemon);
+			rc = 0;
+			break;
+		}
+	}
+	unlock_kernel();
+	if (rc == -ENOENT)
+		printk(KERN_WARNING "lockd: grant for unknown block\n");
+	return rc;
+}
+
+/*
  * Unblock a blocked lock request. This is a callback invoked from the
  * VFS layer when a lock on which we blocked is removed.
  *
@@ -531,6 +682,7 @@ static int nlmsvc_same_owner(struct file
 struct lock_manager_operations nlmsvc_lock_operations = {
 	.fl_compare_owner = nlmsvc_same_owner,
 	.fl_notify = nlmsvc_notify_blocked,
+	.fl_grant = nlmsvc_grant_deferred,
 };
 
 /*
@@ -553,6 +705,8 @@ nlmsvc_grant_blocked(struct nlm_block *b
 
 	dprintk("lockd: grant blocked lock %p\n", block);
 
+	kref_get(&block->b_count);
+
 	/* Unlink block request from list */
 	nlmsvc_unlink_block(block);
 
@@ -566,20 +720,23 @@ nlmsvc_grant_blocked(struct nlm_block *b
 
 	/* Try the lock operation again */
 	lock->fl.fl_flags |= FL_SLEEP;
-	error = posix_lock_file(file->f_file, &lock->fl);
+	error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
 	lock->fl.fl_flags &= ~FL_SLEEP;
 
 	switch (error) {
 	case 0:
 		break;
 	case -EAGAIN:
-		dprintk("lockd: lock still blocked\n");
+	case -EINPROGRESS:
+		dprintk("lockd: lock still blocked error %d\n", error);
 		nlmsvc_insert_block(block, NLM_NEVER);
+		nlmsvc_release_block(block);
 		return;
 	default:
 		printk(KERN_WARNING "lockd: unexpected error %d in %s!\n",
 				-error, __FUNCTION__);
 		nlmsvc_insert_block(block, 10 * HZ);
+		nlmsvc_release_block(block);
 		return;
 	}
 
@@ -592,7 +749,6 @@ callback:
 	nlmsvc_insert_block(block, 30 * HZ);
 
 	/* Call the client */
-	kref_get(&block->b_count);
 	nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG, &nlmsvc_grant_ops);
 }
 
@@ -665,6 +821,23 @@ nlmsvc_grant_reply(struct nlm_cookie *co
 	nlmsvc_release_block(block);
 }
 
+/* Helper function to handle retry of a deferred block.
+ * If it is a blocking lock, call grant_blocked.
+ * For a non-blocking lock or test lock, revisit the request.
+ */
+static void
+retry_deferred_block(struct nlm_block *block)
+{
+	if (!(block->b_flags & B_GOT_CALLBACK))
+		block->b_flags |= B_TOO_LATE;
+	nlmsvc_insert_block(block, NLM_TIMEOUT);
+	dprintk("revisit block %p flags %d\n",	block, block->b_flags);
+	if (block->b_deferred_req) {
+		block->b_deferred_req->revisit(block->b_deferred_req, 0);
+		block->b_deferred_req = NULL;
+	}
+}
+
 /*
  * Retry all blocked locks that have been notified. This is where lockd
  * picks up locks that can be granted, or grant notifications that must
@@ -688,9 +861,12 @@ nlmsvc_retry_blocked(void)
 
 		dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n",
 			block, block->b_when);
-		kref_get(&block->b_count);
-		nlmsvc_grant_blocked(block);
-		nlmsvc_release_block(block);
+		if (block->b_flags & B_QUEUED) {
+			dprintk("nlmsvc_retry_blocked delete block (%p, granted=%d, flags=%d)\n",
+				block, block->b_granted, block->b_flags);
+			retry_deferred_block(block);
+		} else
+			nlmsvc_grant_blocked(block);
 	}
 
 	return timeout;
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 31cb484..9cd5c8b 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -33,6 +33,7 @@ cast_to_nlm(__be32 status, u32 vers)
 		case nlm_lck_denied_nolocks:
 		case nlm_lck_blocked:
 		case nlm_lck_denied_grace_period:
+		case nlm_drop_reply:
 			break;
 		case nlm4_deadlock:
 			status = nlm_lck_denied;
@@ -127,7 +128,9 @@ nlmsvc_proc_test(struct svc_rqst *rqstp,
 		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
 	/* Now check for conflicting locks */
-	resp->status = cast_status(nlmsvc_testlock(file, &argp->lock, &resp->lock));
+	resp->status = cast_status(nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie));
+	if (resp->status == nlm_drop_reply)
+		return rpc_drop_reply;
 
 	dprintk("lockd: TEST          status %d vers %d\n",
 		ntohl(resp->status), rqstp->rq_vers);
@@ -172,6 +175,8 @@ #endif
 	/* Now try to lock the file */
 	resp->status = cast_status(nlmsvc_lock(rqstp, file, &argp->lock,
 					       argp->block, &argp->cookie));
+	if (resp->status == nlm_drop_reply)
+		return rpc_drop_reply;
 
 	dprintk("lockd: LOCK          status %d\n", ntohl(resp->status));
 	nlm_release_host(host);
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index c0df00c..84ebba3 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -182,7 +182,7 @@ again:
 			lock.fl_type  = F_UNLCK;
 			lock.fl_start = 0;
 			lock.fl_end   = OFFSET_MAX;
-			if (posix_lock_file(file->f_file, &lock) < 0) {
+			if (vfs_lock_file(file->f_file, F_SETLK, &lock, NULL) < 0) {
 				printk("lockd: unlock failure in %s:%d\n",
 						__FILE__, __LINE__);
 				return 1;
diff --git a/fs/locks.c b/fs/locks.c
index 52a8100..13a3e02 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -666,11 +666,11 @@ static int locks_block_on_timeout(struct
 }
 
 int
-posix_test_lock(struct file *filp, struct file_lock *fl,
-		struct file_lock *conflock)
+posix_test_lock(struct file *filp, struct file_lock *fl)
 {
 	struct file_lock *cfl;
 
+	fl->fl_type = F_UNLCK;
 	lock_kernel();
 	for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) {
 		if (!IS_POSIX(cfl))
@@ -679,7 +679,7 @@ posix_test_lock(struct file *filp, struc
 			break;
 	}
 	if (cfl) {
-		__locks_copy_lock(conflock, cfl);
+		__locks_copy_lock(fl, cfl);
 		unlock_kernel();
 		return 1;
 	}
@@ -801,7 +801,7 @@ out:
 	return error;
 }
 
-static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
+static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
 {
 	struct file_lock *fl;
 	struct file_lock *new_fl = NULL;
@@ -1007,6 +1007,7 @@ static int __posix_lock_file_conf(struct
  * posix_lock_file - Apply a POSIX-style lock to a file
  * @filp: The file to apply the lock to
  * @fl: The lock to be applied
+ * @conflock: Place to return a copy of the conflicting lock, if found.
  *
  * Add a POSIX style lock to a file.
  * We merge adjacent & overlapping locks whenever possible.
@@ -1016,26 +1017,12 @@ static int __posix_lock_file_conf(struct
  * whether or not a lock was successfully freed by testing the return
  * value for -ENOENT.
  */
-int posix_lock_file(struct file *filp, struct file_lock *fl)
-{
-	return __posix_lock_file_conf(filp->f_path.dentry->d_inode, fl, NULL);
-}
-EXPORT_SYMBOL(posix_lock_file);
-
-/**
- * posix_lock_file_conf - Apply a POSIX-style lock to a file
- * @filp: The file to apply the lock to
- * @fl: The lock to be applied
- * @conflock: Place to return a copy of the conflicting lock, if found.
- *
- * Except for the conflock parameter, acts just like posix_lock_file.
- */
-int posix_lock_file_conf(struct file *filp, struct file_lock *fl,
+int posix_lock_file(struct file *filp, struct file_lock *fl,
 			struct file_lock *conflock)
 {
-	return __posix_lock_file_conf(filp->f_path.dentry->d_inode, fl, conflock);
+	return __posix_lock_file(filp->f_path.dentry->d_inode, fl, conflock);
 }
-EXPORT_SYMBOL(posix_lock_file_conf);
+EXPORT_SYMBOL(posix_lock_file);
 
 /**
  * posix_lock_file_wait - Apply a POSIX-style lock to a file
@@ -1051,7 +1038,7 @@ int posix_lock_file_wait(struct file *fi
 	int error;
 	might_sleep ();
 	for (;;) {
-		error = posix_lock_file(filp, fl);
+		error = posix_lock_file(filp, fl, NULL);
 		if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP))
 			break;
 		error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
@@ -1123,7 +1110,7 @@ int locks_mandatory_area(int read_write,
 	fl.fl_end = offset + count - 1;
 
 	for (;;) {
-		error = __posix_lock_file_conf(inode, &fl, NULL);
+		error = __posix_lock_file(inode, &fl, NULL);
 		if (error != -EAGAIN)
 			break;
 		if (!(fl.fl_flags & FL_SLEEP))
@@ -1611,12 +1598,63 @@ asmlinkage long sys_flock(unsigned int f
 	return error;
 }
 
+/**
+ * vfs_test_lock - test file byte range lock
+ * @filp: The file to test lock for
+ * @fl: The lock to test
+ * @conf: Place to return a copy of the conflicting lock, if found
+ *
+ * Returns -ERRNO on failure.  Indicates presence of conflicting lock by
+ * setting conf->fl_type to something other than F_UNLCK.
+ */
+int vfs_test_lock(struct file *filp, struct file_lock *fl)
+{
+	fl->fl_type = F_UNLCK;
+	if (filp->f_op && filp->f_op->lock)
+		return filp->f_op->lock(filp, F_GETLK, fl);
+	posix_test_lock(filp, fl);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vfs_test_lock);
+
+static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
+{
+	flock->l_pid = fl->fl_pid;
+#if BITS_PER_LONG == 32
+	/*
+	 * Make sure we can represent the posix lock via
+	 * legacy 32bit flock.
+	 */
+	if (fl->fl_start > OFFT_OFFSET_MAX)
+		return -EOVERFLOW;
+	if (fl->fl_end != OFFSET_MAX && fl->fl_end > OFFT_OFFSET_MAX)
+		return -EOVERFLOW;
+#endif
+	flock->l_start = fl->fl_start;
+	flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
+		fl->fl_end - fl->fl_start + 1;
+	flock->l_whence = 0;
+	return 0;
+}
+
+#if BITS_PER_LONG == 32
+static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)
+{
+	flock->l_pid = fl->fl_pid;
+	flock->l_start = fl->fl_start;
+	flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
+		fl->fl_end - fl->fl_start + 1;
+	flock->l_whence = 0;
+	flock->l_type = fl->fl_type;
+}
+#endif
+
 /* Report the first existing lock that would conflict with l.
  * This implements the F_GETLK command of fcntl().
  */
 int fcntl_getlk(struct file *filp, struct flock __user *l)
 {
-	struct file_lock *fl, cfl, file_lock;
+	struct file_lock file_lock;
 	struct flock flock;
 	int error;
 
@@ -1631,38 +1669,15 @@ int fcntl_getlk(struct file *filp, struc
 	if (error)
 		goto out;
 
-	if (filp->f_op && filp->f_op->lock) {
-		error = filp->f_op->lock(filp, F_GETLK, &file_lock);
-		if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private)
-			file_lock.fl_ops->fl_release_private(&file_lock);
-		if (error < 0)
-			goto out;
-		else
-		  fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
-	} else {
-		fl = (posix_test_lock(filp, &file_lock, &cfl) ? &cfl : NULL);
-	}
+	error = vfs_test_lock(filp, &file_lock);
+	if (error)
+		goto out;
  
-	flock.l_type = F_UNLCK;
-	if (fl != NULL) {
-		flock.l_pid = fl->fl_pid;
-#if BITS_PER_LONG == 32
-		/*
-		 * Make sure we can represent the posix lock via
-		 * legacy 32bit flock.
-		 */
-		error = -EOVERFLOW;
-		if (fl->fl_start > OFFT_OFFSET_MAX)
-			goto out;
-		if ((fl->fl_end != OFFSET_MAX)
-		    && (fl->fl_end > OFFT_OFFSET_MAX))
+	flock.l_type = file_lock.fl_type;
+	if (file_lock.fl_type != F_UNLCK) {
+		error = posix_lock_to_flock(&flock, &file_lock);
+		if (error)
 			goto out;
-#endif
-		flock.l_start = fl->fl_start;
-		flock.l_len = fl->fl_end == OFFSET_MAX ? 0 :
-			fl->fl_end - fl->fl_start + 1;
-		flock.l_whence = 0;
-		flock.l_type = fl->fl_type;
 	}
 	error = -EFAULT;
 	if (!copy_to_user(l, &flock, sizeof(flock)))
@@ -1671,6 +1686,41 @@ out:
 	return error;
 }
 
+/**
+ * vfs_lock_file - file byte range lock
+ * @filp: The file to apply the lock to
+ * @cmd: type of locking operation (F_SETLK, F_GETLK, etc.)
+ * @fl: The lock to be applied
+ * @conf: Place to return a copy of the conflicting lock, if found.
+ *
+ * To avoid blocking kernel daemons, such as lockd, that need to acquire POSIX
+ * locks, the ->lock() interface may return asynchronously, before the lock has
+ * been granted or denied by the underlying filesystem, if (and only if)
+ * fl_grant is set. Callers expecting ->lock() to return asynchronously
+ * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if)
+ * the request is for a blocking lock. When ->lock() does return asynchronously,
+ * it must return -EINPROGRESS, and call ->fl_grant() when the lock
+ * request completes.
+ * If the request is for non-blocking lock the file system should return
+ * -EINPROGRESS then try to get the lock and call the callback routine with
+ * the result. If the request timed out the callback routine will return a
+ * nonzero return code and the file system should release the lock. The file
+ * system is also responsible to keep a corresponding posix lock when it
+ * grants a lock so the VFS can find out which locks are locally held and do
+ * the correct lock cleanup when required.
+ * The underlying filesystem must not drop the kernel lock or call
+ * ->fl_grant() before returning to the caller with a -EINPROGRESS
+ * return code.
+ */
+int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
+{
+	if (filp->f_op && filp->f_op->lock)
+		return filp->f_op->lock(filp, cmd, fl);
+	else
+		return posix_lock_file(filp, fl, conf);
+}
+EXPORT_SYMBOL_GPL(vfs_lock_file);
+
 /* Apply the lock described by l to an open file descriptor.
  * This implements both the F_SETLK and F_SETLKW commands of fcntl().
  */
@@ -1733,21 +1783,17 @@ again:
 	if (error)
 		goto out;
 
-	if (filp->f_op && filp->f_op->lock != NULL)
-		error = filp->f_op->lock(filp, cmd, file_lock);
-	else {
-		for (;;) {
-			error = posix_lock_file(filp, file_lock);
-			if ((error != -EAGAIN) || (cmd == F_SETLK))
-				break;
-			error = wait_event_interruptible(file_lock->fl_wait,
-					!file_lock->fl_next);
-			if (!error)
-				continue;
-
-			locks_delete_block(file_lock);
+	for (;;) {
+		error = vfs_lock_file(filp, cmd, file_lock, NULL);
+		if (error != -EAGAIN || cmd == F_SETLK)
 			break;
-		}
+		error = wait_event_interruptible(file_lock->fl_wait,
+				!file_lock->fl_next);
+		if (!error)
+			continue;
+
+		locks_delete_block(file_lock);
+		break;
 	}
 
 	/*
@@ -1770,7 +1816,7 @@ #if BITS_PER_LONG == 32
  */
 int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
 {
-	struct file_lock *fl, cfl, file_lock;
+	struct file_lock file_lock;
 	struct flock64 flock;
 	int error;
 
@@ -1785,27 +1831,14 @@ int fcntl_getlk64(struct file *filp, str
 	if (error)
 		goto out;
 
-	if (filp->f_op && filp->f_op->lock) {
-		error = filp->f_op->lock(filp, F_GETLK, &file_lock);
-		if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private)
-			file_lock.fl_ops->fl_release_private(&file_lock);
-		if (error < 0)
-			goto out;
-		else
-		  fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
-	} else {
-		fl = (posix_test_lock(filp, &file_lock, &cfl) ? &cfl : NULL);
-	}
- 
-	flock.l_type = F_UNLCK;
-	if (fl != NULL) {
-		flock.l_pid = fl->fl_pid;
-		flock.l_start = fl->fl_start;
-		flock.l_len = fl->fl_end == OFFSET_MAX ? 0 :
-			fl->fl_end - fl->fl_start + 1;
-		flock.l_whence = 0;
-		flock.l_type = fl->fl_type;
-	}
+	error = vfs_test_lock(filp, &file_lock);
+	if (error)
+		goto out;
+
+	flock.l_type = file_lock.fl_type;
+	if (file_lock.fl_type != F_UNLCK)
+		posix_lock_to_flock64(&flock, &file_lock);
+
 	error = -EFAULT;
 	if (!copy_to_user(l, &flock, sizeof(flock)))
 		error = 0;
@@ -1876,21 +1909,17 @@ again:
 	if (error)
 		goto out;
 
-	if (filp->f_op && filp->f_op->lock != NULL)
-		error = filp->f_op->lock(filp, cmd, file_lock);
-	else {
-		for (;;) {
-			error = posix_lock_file(filp, file_lock);
-			if ((error != -EAGAIN) || (cmd == F_SETLK64))
-				break;
-			error = wait_event_interruptible(file_lock->fl_wait,
-					!file_lock->fl_next);
-			if (!error)
-				continue;
-
-			locks_delete_block(file_lock);
+	for (;;) {
+		error = vfs_lock_file(filp, cmd, file_lock, NULL);
+		if (error != -EAGAIN || cmd == F_SETLK64)
 			break;
-		}
+		error = wait_event_interruptible(file_lock->fl_wait,
+				!file_lock->fl_next);
+		if (!error)
+			continue;
+
+		locks_delete_block(file_lock);
+		break;
 	}
 
 	/*
@@ -1935,10 +1964,7 @@ void locks_remove_posix(struct file *fil
 	lock.fl_ops = NULL;
 	lock.fl_lmops = NULL;
 
-	if (filp->f_op && filp->f_op->lock != NULL)
-		filp->f_op->lock(filp, F_SETLK, &lock);
-	else
-		posix_lock_file(filp, &lock);
+	vfs_lock_file(filp, F_SETLK, &lock, NULL);
 
 	if (lock.fl_ops && lock.fl_ops->fl_release_private)
 		lock.fl_ops->fl_release_private(&lock);
@@ -2015,6 +2041,23 @@ posix_unblock_lock(struct file *filp, st
 
 EXPORT_SYMBOL(posix_unblock_lock);
 
+/**
+ * vfs_cancel_lock - file byte range unblock lock
+ * @filp: The file to apply the unblock to
+ * @fl: The lock to be unblocked
+ *
+ * Used by lock managers to cancel blocked requests
+ */
+int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
+{
+	if (filp->f_op && filp->f_op->lock)
+		return filp->f_op->lock(filp, F_CANCELLK, fl);
+	else
+		return posix_unblock_lock(filp, fl);
+}
+
+EXPORT_SYMBOL_GPL(vfs_cancel_lock);
+
 static void lock_get_status(char* out, struct file_lock *fl, int id, char *pfx)
 {
 	struct inode *inode = NULL;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 8e66b5a..5eaee6d 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -391,17 +391,12 @@ out_swapfile:
 
 static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
 {
-	struct file_lock cfl;
 	struct inode *inode = filp->f_mapping->host;
 	int status = 0;
 
 	lock_kernel();
 	/* Try local locking first */
-	if (posix_test_lock(filp, fl, &cfl)) {
-		fl->fl_start = cfl.fl_start;
-		fl->fl_end = cfl.fl_end;
-		fl->fl_type = cfl.fl_type;
-		fl->fl_pid = cfl.fl_pid;
+	if (posix_test_lock(filp, fl)) {
 		goto out;
 	}
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f52cf5c..d557a51 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3019,6 +3019,8 @@ static int _nfs4_proc_getlk(struct nfs4_
 			status = 0;
 	}
 out:
+	if (request->fl_ops)
+		request->fl_ops->fl_release_private(request);
 	up_read(&clp->cl_sem);
 	return status;
 }
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index af36070..678f3be 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -50,6 +50,7 @@ #include <linux/nfsd/state.h>
 #include <linux/nfsd/xdr4.h>
 #include <linux/namei.h>
 #include <linux/mutex.h>
+#include <linux/lockd/bind.h>
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
@@ -2657,6 +2658,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struc
 	struct file_lock conflock;
 	__be32 status = 0;
 	unsigned int strhashval;
+	unsigned int cmd;
 	int err;
 
 	dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n",
@@ -2739,10 +2741,12 @@ nfsd4_lock(struct svc_rqst *rqstp, struc
 		case NFS4_READ_LT:
 		case NFS4_READW_LT:
 			file_lock.fl_type = F_RDLCK;
+			cmd = F_SETLK;
 		break;
 		case NFS4_WRITE_LT:
 		case NFS4_WRITEW_LT:
 			file_lock.fl_type = F_WRLCK;
+			cmd = F_SETLK;
 		break;
 		default:
 			status = nfserr_inval;
@@ -2769,9 +2773,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struc
 
 	/* XXX?: Just to divert the locks_release_private at the start of
 	 * locks_copy_lock: */
-	conflock.fl_ops = NULL;
-	conflock.fl_lmops = NULL;
-	err = posix_lock_file_conf(filp, &file_lock, &conflock);
+	locks_init_lock(&conflock);
+	err = vfs_lock_file(filp, cmd, &file_lock, &conflock);
 	switch (-err) {
 	case 0: /* success! */
 		update_stateid(&lock_stp->st_stateid);
@@ -2788,7 +2791,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struc
 		status = nfserr_deadlock;
 		break;
 	default:        
-		dprintk("NFSD: nfsd4_lock: posix_lock_file_conf() failed! status %d\n",err);
+		dprintk("NFSD: nfsd4_lock: vfs_lock_file() failed! status %d\n",err);
 		status = nfserr_resource;
 		break;
 	}
@@ -2813,7 +2816,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, stru
 	struct inode *inode;
 	struct file file;
 	struct file_lock file_lock;
-	struct file_lock conflock;
+	int error;
 	__be32 status;
 
 	if (nfs4_in_grace())
@@ -2869,18 +2872,23 @@ nfsd4_lockt(struct svc_rqst *rqstp, stru
 
 	nfs4_transform_lock_offset(&file_lock);
 
-	/* posix_test_lock uses the struct file _only_ to resolve the inode.
+	/* vfs_test_lock uses the struct file _only_ to resolve the inode.
 	 * since LOCKT doesn't require an OPEN, and therefore a struct
-	 * file may not exist, pass posix_test_lock a struct file with
+	 * file may not exist, pass vfs_test_lock a struct file with
 	 * only the dentry:inode set.
 	 */
 	memset(&file, 0, sizeof (struct file));
 	file.f_path.dentry = cstate->current_fh.fh_dentry;
 
 	status = nfs_ok;
-	if (posix_test_lock(&file, &file_lock, &conflock)) {
+	error = vfs_test_lock(&file, &file_lock);
+	if (error) {
+		status = nfserrno(error);
+		goto out;
+	}
+	if (file_lock.fl_type != F_UNLCK) {
 		status = nfserr_denied;
-		nfs4_set_lock_denied(&conflock, &lockt->lt_denied);
+		nfs4_set_lock_denied(&file_lock, &lockt->lt_denied);
 	}
 out:
 	nfs4_unlock_state();
@@ -2933,9 +2941,9 @@ nfsd4_locku(struct svc_rqst *rqstp, stru
 	/*
 	*  Try to unlock the file in the VFS.
 	*/
-	err = posix_lock_file(filp, &file_lock);
+	err = vfs_lock_file(filp, F_SETLK, &file_lock, NULL);
 	if (err) {
-		dprintk("NFSD: nfs4_locku: posix_lock_file failed!\n");
+		dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n");
 		goto out_nfserr;
 	}
 	/*
diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h
index 996f561..40b9326 100644
--- a/include/linux/fcntl.h
+++ b/include/linux/fcntl.h
@@ -3,6 +3,10 @@ #define _LINUX_FCNTL_H
 
 #include <asm/fcntl.h>
 
+/* Cancel a blocking posix lock; internal use only until we expose an
+ * asynchronous lock api to userspace: */
+#define F_CANCELLK	(F_LINUX_SPECIFIC_BASE+5)
+
 #define F_SETLEASE	(F_LINUX_SPECIFIC_BASE+0)
 #define F_GETLEASE	(F_LINUX_SPECIFIC_BASE+1)
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 095a9c9..6aaa45f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -785,6 +785,7 @@ struct file_lock_operations {
 struct lock_manager_operations {
 	int (*fl_compare_owner)(struct file_lock *, struct file_lock *);
 	void (*fl_notify)(struct file_lock *);	/* unblock callback */
+	int (*fl_grant)(struct file_lock *, struct file_lock *, int);
 	void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
 	void (*fl_release_private)(struct file_lock *);
 	void (*fl_break)(struct file_lock *);
@@ -856,11 +857,13 @@ extern void locks_init_lock(struct file_
 extern void locks_copy_lock(struct file_lock *, struct file_lock *);
 extern void locks_remove_posix(struct file *, fl_owner_t);
 extern void locks_remove_flock(struct file *);
-extern int posix_test_lock(struct file *, struct file_lock *, struct file_lock *);
-extern int posix_lock_file_conf(struct file *, struct file_lock *, struct file_lock *);
-extern int posix_lock_file(struct file *, struct file_lock *);
+extern int posix_test_lock(struct file *, struct file_lock *);
+extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *);
 extern int posix_lock_file_wait(struct file *, struct file_lock *);
 extern int posix_unblock_lock(struct file *, struct file_lock *);
+extern int vfs_test_lock(struct file *, struct file_lock *);
+extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *);
+extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
 extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl);
 extern int __break_lease(struct inode *inode, unsigned int flags);
 extern void lease_get_mtime(struct inode *, struct timespec *time);
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index ac25b56..f79ac52 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -119,6 +119,9 @@ struct nlm_file {
  * couldn't be granted because of a conflicting lock).
  */
 #define NLM_NEVER		(~(unsigned long) 0)
+/* timeout on non-blocking call: */
+#define NLM_TIMEOUT		(7 * HZ)
+
 struct nlm_block {
 	struct kref		b_count;	/* Reference count */
 	struct list_head	b_list;		/* linked list of all blocks */
@@ -130,6 +133,13 @@ struct nlm_block {
 	unsigned int		b_id;		/* block id */
 	unsigned char		b_granted;	/* VFS granted lock */
 	struct nlm_file *	b_file;		/* file in question */
+	struct cache_req *	b_cache_req;	/* deferred request handling */
+	struct file_lock *	b_fl;		/* set for GETLK */
+	struct cache_deferred_req * b_deferred_req;
+	unsigned int		b_flags;	/* block flags */
+#define B_QUEUED		1	/* lock queued */
+#define B_GOT_CALLBACK		2	/* got lock or conflicting lock */
+#define B_TOO_LATE		4	/* too late for non-blocking lock */
 };
 
 /*
@@ -185,8 +195,8 @@ typedef int	  (*nlm_host_match_fn_t)(str
 __be32		  nlmsvc_lock(struct svc_rqst *, struct nlm_file *,
 					struct nlm_lock *, int, struct nlm_cookie *);
 __be32		  nlmsvc_unlock(struct nlm_file *, struct nlm_lock *);
-__be32		  nlmsvc_testlock(struct nlm_file *, struct nlm_lock *,
-					struct nlm_lock *);
+__be32		  nlmsvc_testlock(struct svc_rqst *, struct nlm_file *,
+			struct nlm_lock *, struct nlm_lock *, struct nlm_cookie *);
 __be32		  nlmsvc_cancel_blocked(struct nlm_file *, struct nlm_lock *);
 unsigned long	  nlmsvc_retry_blocked(void);
 void		  nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,