From nobody Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Wed, 5 Jul 2006 13:15:54 -0700 Subject: [PATCH] ocfs2: Shared writeable mmap Implement cluster consistent shared writeable mappings using the ->page_mkwrite() callback. Signed-off-by: Mark Fasheh --- fs/ocfs2/dlmglue.c | 10 +++++ fs/ocfs2/mmap.c | 100 ++++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 90 insertions(+), 20 deletions(-) 59690556d210b0f35e8fe38d9a823456f38ce0f6 diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 762eb1f..c00a843 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2684,6 +2684,15 @@ static void ocfs2_data_convert_worker(st inode = ocfs2_lock_res_inode(lockres); mapping = inode->i_mapping; + /* + * We need this before the filemap_fdatawrite() so that it can + * transfer the dirty bit from the PTE to the + * page. Unfortunately this means that even for EX->PR + * downconverts, we'll lose our mappings and have to build + * them up again. + */ + unmap_mapping_range(mapping, 0, 0, 0); + if (filemap_fdatawrite(mapping)) { mlog(ML_ERROR, "Could not sync inode %llu for downconvert!", (unsigned long long)OCFS2_I(inode)->ip_blkno); @@ -2691,7 +2700,6 @@ static void ocfs2_data_convert_worker(st sync_mapping_buffers(mapping); if (blocking == LKM_EXMODE) { truncate_inode_pages(mapping, 0); - unmap_mapping_range(mapping, 0, 0, 0); } else { /* We only need to wait on the I/O if we're not also * truncating pages because truncate_inode_pages waits diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 83934e3..fb5b18f 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -42,6 +42,23 @@ #include "file.h" #include "inode.h" #include "mmap.h" +static inline int ocfs2_vm_op_block_sigs(sigset_t *blocked, sigset_t *oldset) +{ + /* The best way to deal with signals in the vm path is + * to block them upfront, rather than allowing the + * locking paths to return -ERESTARTSYS. */ + sigfillset(blocked); + + /* We should technically never get a bad return value + * from sigprocmask */ + return sigprocmask(SIG_BLOCK, blocked, oldset); +} + +static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset) +{ + return sigprocmask(SIG_SETMASK, oldset, NULL); +} + static struct page *ocfs2_nopage(struct vm_area_struct * area, unsigned long address, int *type) @@ -53,14 +70,7 @@ static struct page *ocfs2_nopage(struct mlog_entry("(area=%p, address=%lu, type=%p)\n", area, address, type); - /* The best way to deal with signals in this path is - * to block them upfront, rather than allowing the - * locking paths to return -ERESTARTSYS. */ - sigfillset(&blocked); - - /* We should technically never get a bad ret return - * from sigprocmask */ - ret = sigprocmask(SIG_BLOCK, &blocked, &oldset); + ret = ocfs2_vm_op_block_sigs(&blocked, &oldset); if (ret < 0) { mlog_errno(ret); goto out; @@ -68,7 +78,7 @@ static struct page *ocfs2_nopage(struct page = filemap_nopage(area, address, type); - ret = sigprocmask(SIG_SETMASK, &oldset, NULL); + ret = ocfs2_vm_op_unblock_sigs(&oldset); if (ret < 0) mlog_errno(ret); out: @@ -76,21 +86,73 @@ out: return page; } +static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) +{ + struct inode *inode = vma->vm_file->f_dentry->d_inode; + sigset_t blocked, oldset; + int ret, ret2; + pgoff_t last_index; + + mlog_entry("(inode %llu, page index %lu)\n", + (unsigned long long)OCFS2_I(inode)->ip_blkno, page->index); + + ret = ocfs2_vm_op_block_sigs(&blocked, &oldset); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + /* Take a meta data lock so that we can test the page location + * against the proper end of file. This particular check may + * be a little paranoid. */ + ret = ocfs2_meta_lock(inode, NULL, NULL, 0); + if (ret < 0) { + mlog_errno(ret); + goto out_restore_signals; + } + + /* + * When we support holes, allocation should be handled here, + * as writepage() is too late to handle ENOSPC issues. + */ + last_index = i_size_read(inode) << PAGE_CACHE_SHIFT; + if (page->index > last_index) { + ret = -EFBIG; + goto out_meta_unlock; + } + + /* + * Take and drop an exclusive data lock here. This will ensure + * that other nodes write out and invalidate their pages for + * this inode. Dlmglue handles caching of the exclusive lock, + * so the page can be safely marked writeable until another + * node notifies us of competing access. + */ + ret = ocfs2_data_lock(inode, 1); + if (ret < 0) + mlog_errno(ret); + else + ocfs2_data_unlock(inode, 1); + +out_meta_unlock: + ocfs2_meta_unlock(inode, 0); + +out_restore_signals: + ret2 = ocfs2_vm_op_unblock_sigs(&oldset); + if (ret2 < 0) + mlog_errno(ret2); + +out: + return ret; +} + static struct vm_operations_struct ocfs2_file_vm_ops = { - .nopage = ocfs2_nopage, + .nopage = ocfs2_nopage, + .page_mkwrite = ocfs2_page_mkwrite, }; int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) { - /* We don't want to support shared writable mappings yet. */ - if (((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE)) - && ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) { - mlog(0, "disallow shared writable mmaps %lx\n", vma->vm_flags); - /* This is -EINVAL because generic_file_readonly_mmap - * returns it in a similar situation. */ - return -EINVAL; - } - file_accessed(file); vma->vm_ops = &ocfs2_file_vm_ops; return 0; -- 1.3.3