GIT 264478665400b174a6461e88865cf503113e329b git+ssh://master.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2.git#ALL commit 4130a1182288d4bc918bdf9dc2fbf74284d3f998 Author: Akinobu Mita Date: Thu Oct 12 14:29:33 2006 +0900 ocfs2: delete redundant memcmp() This patch deletes redundant memcmp() while looking up in rb tree. Signed-off-by: Akinbou Mita Signed-off-by: Mark Fasheh commit 94e0f296d71eda836f4d331cb858399ece74e9c5 Author: Sunil Mushran Date: Wed Oct 11 12:23:02 2006 -0700 ocfs2: remove spurious d_count check in ocfs2_rename() This was causing some folks to incorrectly get -EBUSY during rename. Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh commit dd112138b5de5bbf514a9c3d81a0281a78b5c90c Author: Chandra Seetharaman Date: Tue Oct 10 15:15:55 2006 -0700 configfs: handle kzalloc() failure in check_perm() check_perm() does not drop the reference to the module when kzalloc() failure occurs. Signed-Off-By: Chandra Seetharaman Signed-off-by: Joel Becker commit aa03464b4c6ea8e8bdb4f4e8633efeb978aa3d93 Author: Mark Fasheh Date: Tue Oct 3 17:53:05 2006 -0700 ocfs2: cond_resched() in ocfs2_zero_extend() The loop within ocfs2_zero_extend() can execute for a long time, causing spurious soft lockup warnings. Signed-off-by: Mark Fasheh commit 9e4a4b87d81672a02c503508fe9c2c1c26dc6791 Author: Mark Fasheh Date: Tue Oct 3 17:44:42 2006 -0700 ocfs2: fix page zeroing during simple extends The page zeroing code was missing the region between old i_size and new i_size for those extends that didn't actually require a change in space allocation. Signed-off-by: Mark Fasheh commit 3fdf42fd63f1206fb235aa39b49ad0696fe8f6f6 Author: Mark Fasheh Date: Wed Jul 5 13:15:54 2006 -0700 ocfs2: Shared writeable mmap Implement cluster consistent shared writeable mappings using the ->page_mkwrite() callback. Signed-off-by: Mark Fasheh fs/configfs/file.c | 14 +++--- fs/ocfs2/cluster/nodemanager.c | 10 ++-- fs/ocfs2/dlmglue.c | 10 ++++ fs/ocfs2/file.c | 51 ++++++++++++-------- fs/ocfs2/mmap.c | 100 ++++++++++++++++++++++++++++++++-------- fs/ocfs2/namei.c | 8 --- 6 files changed, 135 insertions(+), 58 deletions(-) diff --git a/fs/configfs/file.c b/fs/configfs/file.c index e6d5754..cf33fac 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -275,13 +275,14 @@ static int check_perm(struct inode * ino * it in file->private_data for easy access. */ buffer = kzalloc(sizeof(struct configfs_buffer),GFP_KERNEL); - if (buffer) { - init_MUTEX(&buffer->sem); - buffer->needs_read_fill = 1; - buffer->ops = ops; - file->private_data = buffer; - } else + if (!buffer) { error = -ENOMEM; + goto Enomem; + } + init_MUTEX(&buffer->sem); + buffer->needs_read_fill = 1; + buffer->ops = ops; + file->private_data = buffer; goto Done; Einval: @@ -289,6 +290,7 @@ static int check_perm(struct inode * ino goto Done; Eaccess: error = -EACCES; + Enomem: module_put(attr->ca_owner); Done: if (error && item) diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index e1fceb8..d11753c 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c @@ -152,14 +152,16 @@ static struct o2nm_node *o2nm_node_ip_tr struct o2nm_node *node, *ret = NULL; while (*p) { + int cmp; + parent = *p; node = rb_entry(parent, struct o2nm_node, nd_ip_node); - if (memcmp(&ip_needle, &node->nd_ipv4_address, - sizeof(ip_needle)) < 0) + cmp = memcmp(&ip_needle, &node->nd_ipv4_address, + sizeof(ip_needle)); + if (cmp < 0) p = &(*p)->rb_left; - else if (memcmp(&ip_needle, &node->nd_ipv4_address, - sizeof(ip_needle)) > 0) + else if (cmp > 0) p = &(*p)->rb_right; else { ret = node; diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 8801e41..7691f8a 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2649,6 +2649,15 @@ static int ocfs2_data_convert_worker(str inode = ocfs2_lock_res_inode(lockres); mapping = inode->i_mapping; + /* + * We need this before the filemap_fdatawrite() so that it can + * transfer the dirty bit from the PTE to the + * page. Unfortunately this means that even for EX->PR + * downconverts, we'll lose our mappings and have to build + * them up again. + */ + unmap_mapping_range(mapping, 0, 0, 0); + if (filemap_fdatawrite(mapping)) { mlog(ML_ERROR, "Could not sync inode %llu for downconvert!", (unsigned long long)OCFS2_I(inode)->ip_blkno); @@ -2656,7 +2665,6 @@ static int ocfs2_data_convert_worker(str sync_mapping_buffers(mapping); if (blocking == LKM_EXMODE) { truncate_inode_pages(mapping, 0); - unmap_mapping_range(mapping, 0, 0, 0); } else { /* We only need to wait on the I/O if we're not also * truncating pages because truncate_inode_pages waits diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index d9ba0a9..1be74c4 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -30,6 +30,7 @@ #include #include #include #include +#include #define MLOG_MASK_PREFIX ML_INODE #include @@ -691,6 +692,12 @@ static int ocfs2_zero_extend(struct inod } start_off += sb->s_blocksize; + + /* + * Very large extends have the potential to lock up + * the cpu for extended periods of time. + */ + cond_resched(); } out: @@ -728,31 +735,36 @@ static int ocfs2_extend_file(struct inod clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size) - OCFS2_I(inode)->ip_clusters; - if (clusters_to_add) { - /* - * protect the pages that ocfs2_zero_extend is going to - * be pulling into the page cache.. we do this before the - * metadata extend so that we don't get into the situation - * where we've extended the metadata but can't get the data - * lock to zero. - */ - ret = ocfs2_data_lock(inode, 1); - if (ret < 0) { - mlog_errno(ret); - goto out; - } + /* + * protect the pages that ocfs2_zero_extend is going to be + * pulling into the page cache.. we do this before the + * metadata extend so that we don't get into the situation + * where we've extended the metadata but can't get the data + * lock to zero. + */ + ret = ocfs2_data_lock(inode, 1); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + if (clusters_to_add) { ret = ocfs2_extend_allocation(inode, clusters_to_add); if (ret < 0) { mlog_errno(ret); goto out_unlock; } + } - ret = ocfs2_zero_extend(inode, (u64)new_i_size - tail_to_skip); - if (ret < 0) { - mlog_errno(ret); - goto out_unlock; - } + /* + * Call this even if we don't add any clusters to the tree. We + * still need to zero the area between the old i_size and the + * new i_size. + */ + ret = ocfs2_zero_extend(inode, (u64)new_i_size - tail_to_skip); + if (ret < 0) { + mlog_errno(ret); + goto out_unlock; } if (!tail_to_skip) { @@ -764,8 +776,7 @@ static int ocfs2_extend_file(struct inod } out_unlock: - if (clusters_to_add) /* this is the only case in which we lock */ - ocfs2_data_unlock(inode, 1); + ocfs2_data_unlock(inode, 1); out: return ret; diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 83934e3..fb5b18f 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -42,6 +42,23 @@ #include "file.h" #include "inode.h" #include "mmap.h" +static inline int ocfs2_vm_op_block_sigs(sigset_t *blocked, sigset_t *oldset) +{ + /* The best way to deal with signals in the vm path is + * to block them upfront, rather than allowing the + * locking paths to return -ERESTARTSYS. */ + sigfillset(blocked); + + /* We should technically never get a bad return value + * from sigprocmask */ + return sigprocmask(SIG_BLOCK, blocked, oldset); +} + +static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset) +{ + return sigprocmask(SIG_SETMASK, oldset, NULL); +} + static struct page *ocfs2_nopage(struct vm_area_struct * area, unsigned long address, int *type) @@ -53,14 +70,7 @@ static struct page *ocfs2_nopage(struct mlog_entry("(area=%p, address=%lu, type=%p)\n", area, address, type); - /* The best way to deal with signals in this path is - * to block them upfront, rather than allowing the - * locking paths to return -ERESTARTSYS. */ - sigfillset(&blocked); - - /* We should technically never get a bad ret return - * from sigprocmask */ - ret = sigprocmask(SIG_BLOCK, &blocked, &oldset); + ret = ocfs2_vm_op_block_sigs(&blocked, &oldset); if (ret < 0) { mlog_errno(ret); goto out; @@ -68,7 +78,7 @@ static struct page *ocfs2_nopage(struct page = filemap_nopage(area, address, type); - ret = sigprocmask(SIG_SETMASK, &oldset, NULL); + ret = ocfs2_vm_op_unblock_sigs(&oldset); if (ret < 0) mlog_errno(ret); out: @@ -76,21 +86,73 @@ out: return page; } +static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) +{ + struct inode *inode = vma->vm_file->f_dentry->d_inode; + sigset_t blocked, oldset; + int ret, ret2; + pgoff_t last_index; + + mlog_entry("(inode %llu, page index %lu)\n", + (unsigned long long)OCFS2_I(inode)->ip_blkno, page->index); + + ret = ocfs2_vm_op_block_sigs(&blocked, &oldset); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + /* Take a meta data lock so that we can test the page location + * against the proper end of file. This particular check may + * be a little paranoid. */ + ret = ocfs2_meta_lock(inode, NULL, NULL, 0); + if (ret < 0) { + mlog_errno(ret); + goto out_restore_signals; + } + + /* + * When we support holes, allocation should be handled here, + * as writepage() is too late to handle ENOSPC issues. + */ + last_index = i_size_read(inode) << PAGE_CACHE_SHIFT; + if (page->index > last_index) { + ret = -EFBIG; + goto out_meta_unlock; + } + + /* + * Take and drop an exclusive data lock here. This will ensure + * that other nodes write out and invalidate their pages for + * this inode. Dlmglue handles caching of the exclusive lock, + * so the page can be safely marked writeable until another + * node notifies us of competing access. + */ + ret = ocfs2_data_lock(inode, 1); + if (ret < 0) + mlog_errno(ret); + else + ocfs2_data_unlock(inode, 1); + +out_meta_unlock: + ocfs2_meta_unlock(inode, 0); + +out_restore_signals: + ret2 = ocfs2_vm_op_unblock_sigs(&oldset); + if (ret2 < 0) + mlog_errno(ret2); + +out: + return ret; +} + static struct vm_operations_struct ocfs2_file_vm_ops = { - .nopage = ocfs2_nopage, + .nopage = ocfs2_nopage, + .page_mkwrite = ocfs2_page_mkwrite, }; int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) { - /* We don't want to support shared writable mappings yet. */ - if (((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE)) - && ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) { - mlog(0, "disallow shared writable mmaps %lx\n", vma->vm_flags); - /* This is -EINVAL because generic_file_readonly_mmap - * returns it in a similar situation. */ - return -EINVAL; - } - file_accessed(file); vma->vm_ops = &ocfs2_file_vm_ops; return 0; diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 259155f..a57b751 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -1085,14 +1085,6 @@ static int ocfs2_rename(struct inode *ol BUG(); } - if (atomic_read(&old_dentry->d_count) > 2) { - shrink_dcache_parent(old_dentry); - if (atomic_read(&old_dentry->d_count) > 2) { - status = -EBUSY; - goto bail; - } - } - /* Assume a directory heirarchy thusly: * a/b/c * a/d