GIT 264478665400b174a6461e88865cf503113e329b git+ssh://master.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2.git#ALL

commit 4130a1182288d4bc918bdf9dc2fbf74284d3f998
Author: Akinobu Mita <akinobu.mita@gmail.com>
Date:   Thu Oct 12 14:29:33 2006 +0900

    ocfs2: delete redundant memcmp()
    
    This patch deletes redundant memcmp() while looking up in rb tree.
    
    Signed-off-by: Akinbou Mita <akinobu.mita@gmail.com>
    Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>

commit 94e0f296d71eda836f4d331cb858399ece74e9c5
Author: Sunil Mushran <sunil.mushran@oracle.com>
Date:   Wed Oct 11 12:23:02 2006 -0700

    ocfs2: remove spurious d_count check in ocfs2_rename()
    
    This was causing some folks to incorrectly get -EBUSY during rename.
    
    Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
    Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>

commit dd112138b5de5bbf514a9c3d81a0281a78b5c90c
Author: Chandra Seetharaman <sekharan@us.ibm.com>
Date:   Tue Oct 10 15:15:55 2006 -0700

    configfs: handle kzalloc() failure in check_perm()
    
    check_perm() does not drop the reference to the module when kzalloc()
    failure occurs.
    
    Signed-Off-By: Chandra Seetharaman <sekharan@us.ibm.com>
    Signed-off-by: Joel Becker <joel.becker@oracle.com>

commit aa03464b4c6ea8e8bdb4f4e8633efeb978aa3d93
Author: Mark Fasheh <mark.fasheh@oracle.com>
Date:   Tue Oct 3 17:53:05 2006 -0700

    ocfs2: cond_resched() in ocfs2_zero_extend()
    
    The loop within ocfs2_zero_extend() can execute for a long time, causing
    spurious soft lockup warnings.
    
    Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>

commit 9e4a4b87d81672a02c503508fe9c2c1c26dc6791
Author: Mark Fasheh <mark.fasheh@oracle.com>
Date:   Tue Oct 3 17:44:42 2006 -0700

    ocfs2: fix page zeroing during simple extends
    
    The page zeroing code was missing the region between old i_size and new
    i_size for those extends that didn't actually require a change in space
    allocation.
    
    Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>

commit 3fdf42fd63f1206fb235aa39b49ad0696fe8f6f6
Author: Mark Fasheh <mark.fasheh@oracle.com>
Date:   Wed Jul 5 13:15:54 2006 -0700

    ocfs2: Shared writeable mmap
    
    Implement cluster consistent shared writeable mappings using the
    ->page_mkwrite() callback.
    
    Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
 fs/configfs/file.c             |   14 +++---
 fs/ocfs2/cluster/nodemanager.c |   10 ++--
 fs/ocfs2/dlmglue.c             |   10 ++++
 fs/ocfs2/file.c                |   51 ++++++++++++--------
 fs/ocfs2/mmap.c                |  100 ++++++++++++++++++++++++++++++++--------
 fs/ocfs2/namei.c               |    8 ---
 6 files changed, 135 insertions(+), 58 deletions(-)

diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index e6d5754..cf33fac 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -275,13 +275,14 @@ static int check_perm(struct inode * ino
 	 * it in file->private_data for easy access.
 	 */
 	buffer = kzalloc(sizeof(struct configfs_buffer),GFP_KERNEL);
-	if (buffer) {
-		init_MUTEX(&buffer->sem);
-		buffer->needs_read_fill = 1;
-		buffer->ops = ops;
-		file->private_data = buffer;
-	} else
+	if (!buffer) {
 		error = -ENOMEM;
+		goto Enomem;
+	}
+	init_MUTEX(&buffer->sem);
+	buffer->needs_read_fill = 1;
+	buffer->ops = ops;
+	file->private_data = buffer;
 	goto Done;
 
  Einval:
@@ -289,6 +290,7 @@ static int check_perm(struct inode * ino
 	goto Done;
  Eaccess:
 	error = -EACCES;
+ Enomem:
 	module_put(attr->ca_owner);
  Done:
 	if (error && item)
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index e1fceb8..d11753c 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -152,14 +152,16 @@ static struct o2nm_node *o2nm_node_ip_tr
 	struct o2nm_node *node, *ret = NULL;
 
 	while (*p) {
+		int cmp;
+
 		parent = *p;
 		node = rb_entry(parent, struct o2nm_node, nd_ip_node);
 
-		if (memcmp(&ip_needle, &node->nd_ipv4_address,
-		           sizeof(ip_needle)) < 0)
+		cmp = memcmp(&ip_needle, &node->nd_ipv4_address,
+				sizeof(ip_needle));
+		if (cmp < 0)
 			p = &(*p)->rb_left;
-		else if (memcmp(&ip_needle, &node->nd_ipv4_address,
-			        sizeof(ip_needle)) > 0)
+		else if (cmp > 0)
 			p = &(*p)->rb_right;
 		else {
 			ret = node;
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 8801e41..7691f8a 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2649,6 +2649,15 @@ static int ocfs2_data_convert_worker(str
        	inode = ocfs2_lock_res_inode(lockres);
 	mapping = inode->i_mapping;
 
+	/*
+	 * We need this before the filemap_fdatawrite() so that it can
+	 * transfer the dirty bit from the PTE to the
+	 * page. Unfortunately this means that even for EX->PR
+	 * downconverts, we'll lose our mappings and have to build
+	 * them up again.
+	 */
+	unmap_mapping_range(mapping, 0, 0, 0);
+
 	if (filemap_fdatawrite(mapping)) {
 		mlog(ML_ERROR, "Could not sync inode %llu for downconvert!",
 		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -2656,7 +2665,6 @@ static int ocfs2_data_convert_worker(str
 	sync_mapping_buffers(mapping);
 	if (blocking == LKM_EXMODE) {
 		truncate_inode_pages(mapping, 0);
-		unmap_mapping_range(mapping, 0, 0, 0);
 	} else {
 		/* We only need to wait on the I/O if we're not also
 		 * truncating pages because truncate_inode_pages waits
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index d9ba0a9..1be74c4 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -30,6 +30,7 @@ #include <linux/slab.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
 #include <linux/uio.h>
+#include <linux/sched.h>
 
 #define MLOG_MASK_PREFIX ML_INODE
 #include <cluster/masklog.h>
@@ -691,6 +692,12 @@ static int ocfs2_zero_extend(struct inod
 		}
 
 		start_off += sb->s_blocksize;
+
+		/*
+		 * Very large extends have the potential to lock up
+		 * the cpu for extended periods of time.
+		 */
+		cond_resched();
 	}
 
 out:
@@ -728,31 +735,36 @@ static int ocfs2_extend_file(struct inod
 	clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size) - 
 		OCFS2_I(inode)->ip_clusters;
 
-	if (clusters_to_add) {
-		/* 
-		 * protect the pages that ocfs2_zero_extend is going to
-		 * be pulling into the page cache.. we do this before the
-		 * metadata extend so that we don't get into the situation
-		 * where we've extended the metadata but can't get the data
-		 * lock to zero.
-		 */
-		ret = ocfs2_data_lock(inode, 1);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto out;
-		}
+	/* 
+	 * protect the pages that ocfs2_zero_extend is going to be
+	 * pulling into the page cache.. we do this before the
+	 * metadata extend so that we don't get into the situation
+	 * where we've extended the metadata but can't get the data
+	 * lock to zero.
+	 */
+	ret = ocfs2_data_lock(inode, 1);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
+	}
 
+	if (clusters_to_add) {
 		ret = ocfs2_extend_allocation(inode, clusters_to_add);
 		if (ret < 0) {
 			mlog_errno(ret);
 			goto out_unlock;
 		}
+	}
 
-		ret = ocfs2_zero_extend(inode, (u64)new_i_size - tail_to_skip);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto out_unlock;
-		}
+	/*
+	 * Call this even if we don't add any clusters to the tree. We
+	 * still need to zero the area between the old i_size and the
+	 * new i_size.
+	 */
+	ret = ocfs2_zero_extend(inode, (u64)new_i_size - tail_to_skip);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out_unlock;
 	}
 
 	if (!tail_to_skip) {
@@ -764,8 +776,7 @@ static int ocfs2_extend_file(struct inod
 	}
 
 out_unlock:
-	if (clusters_to_add) /* this is the only case in which we lock */
-		ocfs2_data_unlock(inode, 1);
+	ocfs2_data_unlock(inode, 1);
 
 out:
 	return ret;
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 83934e3..fb5b18f 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -42,6 +42,23 @@ #include "file.h"
 #include "inode.h"
 #include "mmap.h"
 
+static inline int ocfs2_vm_op_block_sigs(sigset_t *blocked, sigset_t *oldset)
+{
+	/* The best way to deal with signals in the vm path is
+	 * to block them upfront, rather than allowing the
+	 * locking paths to return -ERESTARTSYS. */
+	sigfillset(blocked);
+
+	/* We should technically never get a bad return value
+	 * from sigprocmask */
+	return sigprocmask(SIG_BLOCK, blocked, oldset);
+}
+
+static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset)
+{
+	return sigprocmask(SIG_SETMASK, oldset, NULL);
+}
+
 static struct page *ocfs2_nopage(struct vm_area_struct * area,
 				 unsigned long address,
 				 int *type)
@@ -53,14 +70,7 @@ static struct page *ocfs2_nopage(struct 
 	mlog_entry("(area=%p, address=%lu, type=%p)\n", area, address,
 		   type);
 
-	/* The best way to deal with signals in this path is
-	 * to block them upfront, rather than allowing the
-	 * locking paths to return -ERESTARTSYS. */
-	sigfillset(&blocked);
-
-	/* We should technically never get a bad ret return
-	 * from sigprocmask */
-	ret = sigprocmask(SIG_BLOCK, &blocked, &oldset);
+	ret = ocfs2_vm_op_block_sigs(&blocked, &oldset);
 	if (ret < 0) {
 		mlog_errno(ret);
 		goto out;
@@ -68,7 +78,7 @@ static struct page *ocfs2_nopage(struct 
 
 	page = filemap_nopage(area, address, type);
 
-	ret = sigprocmask(SIG_SETMASK, &oldset, NULL);
+	ret = ocfs2_vm_op_unblock_sigs(&oldset);
 	if (ret < 0)
 		mlog_errno(ret);
 out:
@@ -76,21 +86,73 @@ out:
 	return page;
 }
 
+static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+	struct inode *inode = vma->vm_file->f_dentry->d_inode;
+	sigset_t blocked, oldset;
+	int ret, ret2;
+	pgoff_t last_index;
+
+	mlog_entry("(inode %llu, page index %lu)\n",
+		   (unsigned long long)OCFS2_I(inode)->ip_blkno, page->index);
+
+	ret = ocfs2_vm_op_block_sigs(&blocked, &oldset);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	/* Take a meta data lock so that we can test the page location
+	 * against the proper end of file. This particular check may
+	 * be a little paranoid. */
+	ret = ocfs2_meta_lock(inode, NULL, NULL, 0);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out_restore_signals;
+	}
+
+	/*
+	 * When we support holes, allocation should be handled here,
+	 * as writepage() is too late to handle ENOSPC issues.
+	 */
+	last_index = i_size_read(inode) << PAGE_CACHE_SHIFT;
+	if (page->index > last_index) {
+		ret = -EFBIG;
+		goto out_meta_unlock;
+	}
+
+	/*
+	 * Take and drop an exclusive data lock here. This will ensure
+	 * that other nodes write out and invalidate their pages for
+	 * this inode. Dlmglue handles caching of the exclusive lock,
+	 * so the page can be safely marked writeable until another
+	 * node notifies us of competing access.
+	 */
+	ret = ocfs2_data_lock(inode, 1);
+	if (ret < 0)
+		mlog_errno(ret);
+	else
+		ocfs2_data_unlock(inode, 1);
+
+out_meta_unlock:
+	ocfs2_meta_unlock(inode, 0);
+
+out_restore_signals:
+	ret2 = ocfs2_vm_op_unblock_sigs(&oldset);
+	if (ret2 < 0)
+		mlog_errno(ret2);
+
+out:
+	return ret;
+}
+
 static struct vm_operations_struct ocfs2_file_vm_ops = {
-	.nopage = ocfs2_nopage,
+	.nopage		= ocfs2_nopage,
+	.page_mkwrite	= ocfs2_page_mkwrite,
 };
 
 int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	/* We don't want to support shared writable mappings yet. */
-	if (((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE))
-	    && ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) {
-		mlog(0, "disallow shared writable mmaps %lx\n", vma->vm_flags);
-		/* This is -EINVAL because generic_file_readonly_mmap
-		 * returns it in a similar situation. */
-		return -EINVAL;
-	}
-
 	file_accessed(file);
 	vma->vm_ops = &ocfs2_file_vm_ops;
 	return 0;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 259155f..a57b751 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1085,14 +1085,6 @@ static int ocfs2_rename(struct inode *ol
 			BUG();
 	}
 
-	if (atomic_read(&old_dentry->d_count) > 2) {
-		shrink_dcache_parent(old_dentry);
-		if (atomic_read(&old_dentry->d_count) > 2) {
-			status = -EBUSY;
-			goto bail;
-		}
-	}
-
 	/* Assume a directory heirarchy thusly:
 	 * a/b/c
 	 * a/d