From nobody Mon Sep 17 00:00:00 2001 From: tiger yang Date: Thu, 28 Sep 2006 16:37:32 -0700 Subject: ocfs2: add sys_splice support in ocfs2 Add ocfs2_file_splice_read/write to support sys_splice in ocfs2. Ocfs2_file_splice_read/write was similar to ocfs2_file_aio_read/write. Ocfs2_file_splice_read copy data from ocfs2 file to pipe. Ocfs2_file_splice_write copy data from pipe to ocfs2 file. Signed-off-by: Tiger Yang, tiger.yang@oracle.com Signed-off-by: Mark Fasheh --- fs/ocfs2/file.c | 189 +++++++++++++++++++++++++++++++++++++++---------------- 1 files changed, 135 insertions(+), 54 deletions(-) 78a57fdb70d892847992e7166128377b82cb4099 diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 2bbfa17..7094bb4 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -960,59 +960,23 @@ static inline int ocfs2_write_should_rem return 0; } -static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, - const char __user *buf, - size_t count, - loff_t pos) +static int ocfs2_prepare_inode_for_write(struct inode *inode, + loff_t *ppos, + size_t count, + int appending) { - struct iovec local_iov = { .iov_base = (void __user *)buf, - .iov_len = count }; - int ret, rw_level = -1, meta_level = -1, have_alloc_sem = 0; + int ret = 0, meta_level = appending; u32 clusters; - struct file *filp = iocb->ki_filp; - struct inode *inode = filp->f_dentry->d_inode; loff_t newsize, saved_pos; - mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf, - (unsigned int)count, - filp->f_dentry->d_name.len, - filp->f_dentry->d_name.name); - - /* happy write of zero bytes */ - if (count == 0) - return 0; - - if (!inode) { - mlog(0, "bad inode\n"); - return -EIO; - } - - mutex_lock(&inode->i_mutex); - /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ - if (filp->f_flags & O_DIRECT) { - have_alloc_sem = 1; - down_read(&inode->i_alloc_sem); - } - - /* concurrent O_DIRECT writes are allowed */ - rw_level = (filp->f_flags & O_DIRECT) ? 0 : 1; - ret = ocfs2_rw_lock(inode, rw_level); - if (ret < 0) { - rw_level = -1; - mlog_errno(ret); - goto out; - } - - /* + /* * We sample i_size under a read level meta lock to see if our write * is extending the file, if it is we back off and get a write level * meta lock. */ - meta_level = (filp->f_flags & O_APPEND) ? 1 : 0; for(;;) { ret = ocfs2_meta_lock(inode, NULL, NULL, meta_level); if (ret < 0) { - meta_level = -1; mlog_errno(ret); goto out; } @@ -1036,17 +1000,17 @@ static ssize_t ocfs2_file_aio_write(stru ret = ocfs2_write_remove_suid(inode); if (ret < 0) { mlog_errno(ret); - goto out; + goto out_unlock; } } /* work on a copy of ppos until we're sure that we won't have * to recalculate it due to relocking. */ - if (filp->f_flags & O_APPEND) { + if (appending) { saved_pos = i_size_read(inode); mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_pos); } else { - saved_pos = iocb->ki_pos; + saved_pos = *ppos; } newsize = count + saved_pos; @@ -1085,16 +1049,62 @@ static ssize_t ocfs2_file_aio_write(stru if (ret < 0) { if (ret != -ENOSPC) mlog_errno(ret); - goto out; + goto out_unlock; } break; } - - /* ok, we're done with i_size and alloc work */ - iocb->ki_pos = saved_pos; + if (appending) + *ppos = saved_pos; +out_unlock: ocfs2_meta_unlock(inode, meta_level); - meta_level = -1; +out: + return ret; +} + +static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, + const char __user *buf, + size_t count, + loff_t pos) +{ + struct iovec local_iov = { .iov_base = (void __user *)buf, + .iov_len = count }; + int ret, rw_level = -1, have_alloc_sem = 0; + struct file *filp = iocb->ki_filp; + struct inode *inode = filp->f_dentry->d_inode; + mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf, + (unsigned int)count, + filp->f_dentry->d_name.len, + filp->f_dentry->d_name.name); + + /* happy write of zero bytes */ + if (count == 0) + return 0; + + mutex_lock(&inode->i_mutex); + /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ + if (filp->f_flags & O_DIRECT) { + have_alloc_sem = 1; + down_read(&inode->i_alloc_sem); + } + + /* concurrent O_DIRECT writes are allowed */ + rw_level = (filp->f_flags & O_DIRECT) ? 0 : 1; + ret = ocfs2_rw_lock(inode, rw_level); + if (ret < 0) { + rw_level = -1; + mlog_errno(ret); + goto out; + } + + ret = ocfs2_prepare_inode_for_write(inode, &iocb->ki_pos, count, + (filp->f_flags & O_APPEND ? 1 : 0)); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + /* ok, we're done with i_size and alloc work */ /* communicate with ocfs2_dio_end_io */ ocfs2_iocb_set_rw_locked(iocb); @@ -1103,7 +1113,7 @@ static ssize_t ocfs2_file_aio_write(stru /* buffered aio wouldn't have proper lock coverage today */ BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); - /* + /* * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io * function pointer which is called when o_direct io completes so that * it can unlock our rw lock. (it's the clustered equivalent of @@ -1119,11 +1129,9 @@ static ssize_t ocfs2_file_aio_write(stru } out: - if (meta_level != -1) - ocfs2_meta_unlock(inode, meta_level); if (have_alloc_sem) up_read(&inode->i_alloc_sem); - if (rw_level != -1) + if (rw_level != -1) ocfs2_rw_unlock(inode, rw_level); mutex_unlock(&inode->i_mutex); @@ -1208,6 +1216,77 @@ bail: return ret; } +static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, + struct file *out, + loff_t *ppos, + size_t len, + unsigned int flags) +{ + int ret; + struct inode *inode = out->f_dentry->d_inode; + + mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe, + (unsigned int)len, + out->f_dentry->d_name.len, + out->f_dentry->d_name.name); + + mutex_lock(&inode->i_mutex); + + ret = ocfs2_rw_lock(inode, 1); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_prepare_inode_for_write(inode, ppos, len, 0); + if (ret < 0) { + mlog_errno(ret); + goto out_unlock; + } + + /* ok, we're done with i_size and alloc work */ + ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags); + +out_unlock: + ocfs2_rw_unlock(inode, 1); +out: + mutex_unlock(&inode->i_mutex); + + mlog_exit(ret); + return ret; +} + +static ssize_t ocfs2_file_splice_read(struct file *in, + loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, + unsigned int flags) +{ + int ret = 0; + struct inode *inode = in->f_dentry->d_inode; + + mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe, + (unsigned int)len, + in->f_dentry->d_name.len, + in->f_dentry->d_name.name); + + /* + * See the comment in ocfs2_file_aio_read() + */ + ret = ocfs2_meta_lock(inode, NULL, NULL, 0); + if (ret < 0) { + mlog_errno(ret); + goto bail; + } + ocfs2_meta_unlock(inode, 0); + + ret = generic_file_splice_read(in, ppos, pipe, len, flags); + +bail: + mlog_exit(ret); + return ret; +} + struct inode_operations ocfs2_file_iops = { .setattr = ocfs2_setattr, .getattr = ocfs2_getattr, @@ -1229,6 +1308,8 @@ const struct file_operations ocfs2_fops .aio_read = ocfs2_file_aio_read, .aio_write = ocfs2_file_aio_write, .ioctl = ocfs2_ioctl, + .splice_read = ocfs2_file_splice_read, + .splice_write = ocfs2_file_splice_write, }; const struct file_operations ocfs2_dops = { -- 1.3.3