Index: linux-2.6/mm/filemap.c =================================================================== --- linux-2.6.orig/mm/filemap.c +++ linux-2.6/mm/filemap.c @@ -30,7 +30,7 @@ #include #include #include -#include "filemap.h" +#include /* for BUG_ON(!in_atomic()) only */ #include "internal.h" /* @@ -107,6 +107,13 @@ generic_file_direct_IO(int rw, struct ki * ->dcache_lock (proc_pid_lookup) */ +static int failcnt; +static int FAIL(int x) +{ + failcnt++; + return (failcnt % x == 0); +} + /* * Remove a page from the page cache and free it. Caller has to make * sure the page is locked and that nobody else uses it - or that usage @@ -664,26 +671,22 @@ EXPORT_SYMBOL(find_lock_page); struct page *find_or_create_page(struct address_space *mapping, unsigned long index, gfp_t gfp_mask) { - struct page *page, *cached_page = NULL; + struct page *page; int err; repeat: page = find_lock_page(mapping, index); if (!page) { - if (!cached_page) { - cached_page = alloc_page(gfp_mask); - if (!cached_page) - return NULL; - } - err = add_to_page_cache_lru(cached_page, mapping, - index, gfp_mask); - if (!err) { - page = cached_page; - cached_page = NULL; - } else if (err == -EEXIST) - goto repeat; + page = alloc_page(gfp_mask); + if (!page) + return NULL; + err = add_to_page_cache_lru(page, mapping, index, gfp_mask); + if (unlikely(err)) { + page_cache_release(page); + page = NULL; + if (err == -EEXIST) + goto repeat; + } } - if (cached_page) - page_cache_release(cached_page); return page; } EXPORT_SYMBOL(find_or_create_page); @@ -869,11 +872,9 @@ void do_generic_mapping_read(struct addr unsigned long next_index; unsigned long prev_index; loff_t isize; - struct page *cached_page; int error; struct file_ra_state ra = *_ra; - cached_page = NULL; index = *ppos >> PAGE_CACHE_SHIFT; next_index = index; prev_index = ra.prev_page; @@ -1037,23 +1038,20 @@ no_cached_page: * Ok, it wasn't cached, so we need to create a new * page.. */ - if (!cached_page) { - cached_page = page_cache_alloc_cold(mapping); - if (!cached_page) { - desc->error = -ENOMEM; - goto out; - } + page = page_cache_alloc_cold(mapping); + if (!page) { + desc->error = -ENOMEM; + goto out; } - error = add_to_page_cache_lru(cached_page, mapping, + error = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); if (error) { + page_cache_release(page); if (error == -EEXIST) goto find_page; desc->error = error; goto out; } - page = cached_page; - cached_page = NULL; goto readpage; } @@ -1061,8 +1059,6 @@ out: *_ra = ra; *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset; - if (cached_page) - page_cache_release(cached_page); if (filp) file_accessed(filp); } @@ -1731,35 +1727,28 @@ static inline struct page *__read_cache_ int (*filler)(void *,struct page*), void *data) { - struct page *page, *cached_page = NULL; + struct page *page; int err; repeat: page = find_get_page(mapping, index); if (!page) { - if (!cached_page) { - cached_page = page_cache_alloc_cold(mapping); - if (!cached_page) - return ERR_PTR(-ENOMEM); - } - err = add_to_page_cache_lru(cached_page, mapping, - index, GFP_KERNEL); - if (err == -EEXIST) - goto repeat; - if (err < 0) { + page = page_cache_alloc_cold(mapping); + if (!page) + return ERR_PTR(-ENOMEM); + err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); + if (unlikely(err)) { + page_cache_release(page); + if (err == -EEXIST) + goto repeat; /* Presumably ENOMEM for radix tree node */ - page_cache_release(cached_page); return ERR_PTR(err); } - page = cached_page; - cached_page = NULL; err = filler(data, page); if (err < 0) { page_cache_release(page); page = ERR_PTR(err); } } - if (cached_page) - page_cache_release(cached_page); return page; } @@ -1810,40 +1799,6 @@ retry: EXPORT_SYMBOL(read_cache_page); /* - * If the page was newly created, increment its refcount and add it to the - * caller's lru-buffering pagevec. This function is specifically for - * generic_file_write(). - */ -static inline struct page * -__grab_cache_page(struct address_space *mapping, unsigned long index, - struct page **cached_page, struct pagevec *lru_pvec) -{ - int err; - struct page *page; -repeat: - page = find_lock_page(mapping, index); - if (!page) { - if (!*cached_page) { - *cached_page = page_cache_alloc(mapping); - if (!*cached_page) - return NULL; - } - err = add_to_page_cache(*cached_page, mapping, - index, GFP_KERNEL); - if (err == -EEXIST) - goto repeat; - if (err == 0) { - page = *cached_page; - page_cache_get(page); - if (!pagevec_add(lru_pvec, page)) - __pagevec_lru_add(lru_pvec); - *cached_page = NULL; - } - } - return page; -} - -/* * The logic we want is * * if suid or (sgid and xgrp) @@ -1891,8 +1846,7 @@ int remove_suid(struct dentry *dentry) } EXPORT_SYMBOL(remove_suid); -size_t -__filemap_copy_from_user_iovec_inatomic(char *vaddr, +static size_t __iovec_copy_from_user_inatomic(char *vaddr, const struct iovec *iov, size_t base, size_t bytes) { size_t copied = 0, left = 0; @@ -1915,6 +1869,115 @@ __filemap_copy_from_user_iovec_inatomic( } /* + * Copy as much as we can into the page and return the number of bytes which + * were sucessfully copied. If a fault is encountered then return the number of + * bytes which were copied. + */ +size_t iov_iter_copy_from_user_atomic(struct page *page, + struct iov_iter *i, unsigned long offset, size_t bytes) +{ + char *kaddr; + size_t copied; + + BUG_ON(!in_atomic()); + kaddr = kmap_atomic(page, KM_USER0); + if (likely(i->nr_segs == 1)) { + int left; + char __user *buf = i->iov->iov_base + i->iov_offset; + left = __copy_from_user_inatomic_nocache(kaddr + offset, + buf, bytes); + copied = bytes - left; + } else { + copied = __iovec_copy_from_user_inatomic(kaddr + offset, + i->iov, i->iov_offset, bytes); + } + kunmap_atomic(kaddr, KM_USER0); + + return copied; +} +EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); + +/* + * This has the same sideeffects and return value as + * iov_iter_copy_from_user_atomic(). + * The difference is that it attempts to resolve faults. + * Page must not be locked. + */ +size_t iov_iter_copy_from_user(struct page *page, + struct iov_iter *i, unsigned long offset, size_t bytes) +{ + char *kaddr; + size_t copied; + + kaddr = kmap(page); + if (likely(i->nr_segs == 1)) { + int left; + char __user *buf = i->iov->iov_base + i->iov_offset; + left = __copy_from_user_nocache(kaddr + offset, buf, bytes); + copied = bytes - left; + } else { + copied = __iovec_copy_from_user_inatomic(kaddr + offset, + i->iov, i->iov_offset, bytes); + } + kunmap(page); + return copied; +} +EXPORT_SYMBOL(iov_iter_copy_from_user); + +static void __iov_iter_advance_iov(struct iov_iter *i, size_t bytes) +{ + if (likely(i->nr_segs == 1)) { + i->iov_offset += bytes; + } else { + const struct iovec *iov = i->iov; + size_t base = i->iov_offset; + + while (bytes) { + int copy = min(bytes, iov->iov_len - base); + + bytes -= copy; + base += copy; + if (iov->iov_len == base) { + iov++; + base = 0; + } + } + i->iov = iov; + i->iov_offset = base; + } +} + +void iov_iter_advance(struct iov_iter *i, size_t bytes) +{ + BUG_ON(i->count < bytes); + + __iov_iter_advance_iov(i, bytes); + i->count -= bytes; +} +EXPORT_SYMBOL(iov_iter_advance); + +int iov_iter_fault_in_readable(struct iov_iter *i) +{ + size_t seglen = min(i->iov->iov_len - i->iov_offset, i->count); + char __user *buf = i->iov->iov_base + i->iov_offset; + return fault_in_pages_readable(buf, seglen); +} +EXPORT_SYMBOL(iov_iter_fault_in_readable); + +/* + * Return the count of just the current iov_iter segment. + */ +size_t iov_iter_single_seg_count(struct iov_iter *i) +{ + const struct iovec *iov = i->iov; + if (i->nr_segs == 1) + return i->count; + else + return min(i->count, iov->iov_len - i->iov_offset); +} +EXPORT_SYMBOL(iov_iter_single_seg_count); + +/* * Performs necessary checks before doing a write * * Can adjust writing position or amount of bytes to write. @@ -1998,6 +2061,92 @@ inline int generic_write_checks(struct f } EXPORT_SYMBOL(generic_write_checks); +int pagecache_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + const struct address_space_operations *aops = mapping->a_ops; + + if (aops->write_begin) { + return aops->write_begin(file, mapping, pos, len, flags, + pagep, fsdata); + } else { + int ret; + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + unsigned offset = pos & (PAGE_CACHE_SIZE - 1); + struct inode *inode = mapping->host; + struct page *page; +again: + page = __grab_cache_page(mapping, index); + *pagep = page; + if (!page) + return -ENOMEM; + + if (flags & AOP_FLAG_UNINTERRUPTIBLE && !PageUptodate(page)) { + /* + * There is no way to resolve a short write situation + * for a !Uptodate page (except by double copying in + * the caller done by generic_perform_write_2copy). + * + * Instead, we have to bring it uptodate here. + */ + ret = aops->readpage(file, page); + page_cache_release(page); + if (ret) { + /* + * ret cannot be AOP_TRUNCATED_PAGE, because + * the only filesystems that return that from + * ->readpage actually use ->write_begin + */ + return ret; + } + goto again; + } + + ret = aops->prepare_write(file, page, offset, offset+len); + if (ret) { + unlock_page(page); + page_cache_release(page); + if (pos + len > inode->i_size) + vmtruncate(inode, inode->i_size); + } + return ret; + } +} +EXPORT_SYMBOL(pagecache_write_begin); + +int pagecache_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + const struct address_space_operations *aops = mapping->a_ops; + int ret; + + if (aops->write_begin) { + ret = aops->write_end(file, mapping, pos, len, copied, + page, fsdata); + } else { + unsigned offset = pos & (PAGE_CACHE_SIZE - 1); + struct inode *inode = mapping->host; + + flush_dcache_page(page); + ret = aops->commit_write(file, page, offset, offset+len); + unlock_page(page); + page_cache_release(page); + + if (ret < 0) { + if (pos + len > inode->i_size) + vmtruncate(inode, inode->i_size); + } else if (ret > 0) + ret = min_t(size_t, copied, ret); + else + ret = copied; + } + + return ret; +} +EXPORT_SYMBOL(pagecache_write_end); + ssize_t generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, unsigned long *nr_segs, loff_t pos, loff_t *ppos, @@ -2037,151 +2186,332 @@ generic_file_direct_write(struct kiocb * } EXPORT_SYMBOL(generic_file_direct_write); -ssize_t -generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos, loff_t *ppos, - size_t count, ssize_t written) +/* + * Find or create a page at the given pagecache position. Return the locked + * page. This function is specifically for buffered writes. + */ +struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index) { - struct file *file = iocb->ki_filp; - struct address_space * mapping = file->f_mapping; - const struct address_space_operations *a_ops = mapping->a_ops; - struct inode *inode = mapping->host; - long status = 0; - struct page *page; - struct page *cached_page = NULL; - size_t bytes; - struct pagevec lru_pvec; - const struct iovec *cur_iov = iov; /* current iovec */ - size_t iov_base = 0; /* offset in the current iovec */ - char __user *buf; - - pagevec_init(&lru_pvec, 0); - - /* - * handle partial DIO write. Adjust cur_iov if needed. - */ - if (likely(nr_segs == 1)) - buf = iov->iov_base + written; - else { - filemap_set_next_iovec(&cur_iov, &iov_base, written); - buf = cur_iov->iov_base + iov_base; + int status; + struct page *page; +repeat: + page = find_lock_page(mapping, index); + if (likely(page)) + return page; + + page = page_cache_alloc(mapping); + if (!page) + return NULL; + status = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); + if (unlikely(status)) { + page_cache_release(page); + if (status == -EEXIST) + goto repeat; + return NULL; } + return page; +} +EXPORT_SYMBOL(__grab_cache_page); + +static ssize_t generic_perform_write_2copy(struct file *file, + struct iov_iter *i, loff_t pos) +{ + struct address_space *mapping = file->f_mapping; + const struct address_space_operations *a_ops = mapping->a_ops; + struct inode *inode = mapping->host; + long status = 0; + ssize_t written = 0; do { - unsigned long index; - unsigned long offset; - size_t copied; + struct page *src_page; + struct page *page; + pgoff_t index; /* Pagecache index for current page */ + unsigned long offset; /* Offset into pagecache page */ + unsigned long bytes; /* Bytes to write to page */ + size_t copied; /* Bytes copied from user */ - offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ + offset = (pos & (PAGE_CACHE_SIZE - 1)); index = pos >> PAGE_CACHE_SHIFT; - bytes = PAGE_CACHE_SIZE - offset; - - /* Limit the size of the copy to the caller's write size */ - bytes = min(bytes, count); + bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset, + iov_iter_count(i)); - /* We only need to worry about prefaulting when writes are from - * user-space. NFSd uses vfs_writev with several non-aligned - * segments in the vector, and limiting to one segment a time is - * a noticeable performance for re-write + /* + * a non-NULL src_page indicates that we're doing the + * copy via get_user_pages and kmap. */ - if (!segment_eq(get_fs(), KERNEL_DS)) { - /* - * Limit the size of the copy to that of the current - * segment, because fault_in_pages_readable() doesn't - * know how to walk segments. - */ - bytes = min(bytes, cur_iov->iov_len - iov_base); + src_page = NULL; - /* - * Bring in the user page that we will copy from - * _first_. Otherwise there's a nasty deadlock on - * copying from the same page as we're writing to, - * without it being marked up-to-date. - */ - fault_in_pages_readable(buf, bytes); + /* + * Bring in the user page that we will copy from _first_. + * Otherwise there's a nasty deadlock on copying from the + * same page as we're writing to, without it being marked + * up-to-date. + * + * Not only is this an optimisation, but it is also required + * to check that the address is actually valid, when atomic + * usercopies are used, below. + */ + if (unlikely(iov_iter_fault_in_readable(i))) { + status = -EFAULT; + break; } - page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec); + + page = __grab_cache_page(mapping, index); if (!page) { status = -ENOMEM; break; } - if (unlikely(bytes == 0)) { - status = 0; - copied = 0; - goto zero_length_segment; - } + /* + * non-uptodate pages cannot cope with short copies, and we + * cannot take a pagefault with the destination page locked. + * So pin the source page to copy it. + */ + if (!PageUptodate(page) && !segment_eq(get_fs(), KERNEL_DS)) { + unlock_page(page); - status = a_ops->prepare_write(file, page, offset, offset+bytes); - if (unlikely(status)) { - loff_t isize = i_size_read(inode); + src_page = alloc_page(GFP_KERNEL); + if (!src_page) { + page_cache_release(page); + status = -ENOMEM; + break; + } - if (status != AOP_TRUNCATED_PAGE) + /* + * Cannot get_user_pages with a page locked for the + * same reason as we can't take a page fault with a + * page locked (as explained below). + */ + copied = iov_iter_copy_from_user(src_page, i, + offset, bytes); + if (unlikely(copied == 0)) { + status = -EFAULT; + page_cache_release(page); + page_cache_release(src_page); + break; + } + bytes = copied; + + lock_page(page); + /* + * Can't handle the page going uptodate here, because + * that means we would use non-atomic usercopies, which + * zero out the tail of the page, which can cause + * zeroes to become transiently visible. We could just + * use a non-zeroing copy, but the APIs aren't too + * consistent. + */ + if (unlikely(!page->mapping || PageUptodate(page))) { unlock_page(page); - page_cache_release(page); - if (status == AOP_TRUNCATED_PAGE) + page_cache_release(page); + page_cache_release(src_page); continue; + } + } + + status = a_ops->prepare_write(file, page, offset, offset+bytes); + if (unlikely(status)) + goto fs_write_aop_error; + + if (!src_page) { + int tmp; /* - * prepare_write() may have instantiated a few blocks - * outside i_size. Trim these off again. + * Must not enter the pagefault handler here, because + * we hold the page lock, so we might recursively + * deadlock on the same lock, or get an ABBA deadlock + * against a different lock, or against the mmap_sem + * (which nests outside the page lock). So increment + * preempt count, and use _atomic usercopies. + * + * The page is uptodate so we are OK to encounter a + * short copy: if unmodified parts of the page are + * marked dirty and written out to disk, it doesn't + * really matter. */ - if (pos + bytes > isize) - vmtruncate(inode, isize); - break; + + tmp = bytes; + if (!segment_eq(get_fs(), KERNEL_DS)) { + if (FAIL(13)) + tmp = tmp / 2; + if (FAIL(17)) + tmp = 0; + } + pagefault_disable(); + copied = iov_iter_copy_from_user_atomic(page, i, + offset, tmp); + pagefault_enable(); + } else { + void *src, *dst; + src = kmap_atomic(src_page, KM_USER0); + dst = kmap_atomic(page, KM_USER1); + memcpy(dst + offset, src + offset, bytes); + kunmap_atomic(dst, KM_USER1); + kunmap_atomic(src, KM_USER0); + copied = bytes; } - if (likely(nr_segs == 1)) - copied = filemap_copy_from_user(page, offset, - buf, bytes); - else - copied = filemap_copy_from_user_iovec(page, offset, - cur_iov, iov_base, bytes); flush_dcache_page(page); + status = a_ops->commit_write(file, page, offset, offset+bytes); - if (status == AOP_TRUNCATED_PAGE) { - page_cache_release(page); - continue; - } -zero_length_segment: - if (likely(copied >= 0)) { - if (!status) - status = copied; - - if (status >= 0) { - written += status; - count -= status; - pos += status; - buf += status; - if (unlikely(nr_segs > 1)) { - filemap_set_next_iovec(&cur_iov, - &iov_base, status); - if (count) - buf = cur_iov->iov_base + - iov_base; - } else { - iov_base += status; - } - } - } - if (unlikely(copied != bytes)) - if (status >= 0) - status = -EFAULT; + if (unlikely(status < 0)) + goto fs_write_aop_error; + if (unlikely(status > 0)) /* filesystem did partial write */ + copied = min_t(size_t, copied, status); + unlock_page(page); mark_page_accessed(page); page_cache_release(page); - if (status < 0) - break; + if (src_page) + page_cache_release(src_page); + + iov_iter_advance(i, copied); + pos += copied; + written += copied; + balance_dirty_pages_ratelimited(mapping); cond_resched(); - } while (count); - *ppos = pos; + continue; + +fs_write_aop_error: + unlock_page(page); + page_cache_release(page); + if (src_page) + page_cache_release(src_page); + + /* + * prepare_write() may have instantiated a few blocks + * outside i_size. Trim these off again. Don't need + * i_size_read because we hold i_mutex. + */ + if (pos + bytes > inode->i_size) + vmtruncate(inode, inode->i_size); + break; + } while (iov_iter_count(i)); + + return written ? written : status; +} - if (cached_page) - page_cache_release(cached_page); +static ssize_t generic_perform_write(struct file *file, + struct iov_iter *i, loff_t pos) +{ + struct address_space *mapping = file->f_mapping; + const struct address_space_operations *a_ops = mapping->a_ops; + long status = 0; + ssize_t written = 0; + unsigned int flags = 0; /* - * For now, when the user asks for O_SYNC, we'll actually give O_DSYNC + * Copies from kernel address space cannot fail (NFSD is a big user). */ + if (segment_eq(get_fs(), KERNEL_DS)) + flags |= AOP_FLAG_UNINTERRUPTIBLE; + + do { + struct page *page; + pgoff_t index; /* Pagecache index for current page */ + unsigned long offset; /* Offset into pagecache page */ + unsigned long bytes; /* Bytes to write to page */ + int tmp; + size_t copied; /* Bytes copied from user */ + void *fsdata; + + offset = (pos & (PAGE_CACHE_SIZE - 1)); + index = pos >> PAGE_CACHE_SHIFT; + bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset, + iov_iter_count(i)); + +again: + + /* + * Bring in the user page that we will copy from _first_. + * Otherwise there's a nasty deadlock on copying from the + * same page as we're writing to, without it being marked + * up-to-date. + * + * Not only is this an optimisation, but it is also required + * to check that the address is actually valid, when atomic + * usercopies are used, below. + */ + if (unlikely(iov_iter_fault_in_readable(i))) { + status = -EFAULT; + break; + } + + status = a_ops->write_begin(file, mapping, pos, bytes, flags, + &page, &fsdata); + if (unlikely(status)) + break; + + tmp = bytes; + if (!(flags & AOP_FLAG_UNINTERRUPTIBLE)) { + if (FAIL(13)) + tmp = tmp / 2; + if (FAIL(17)) + tmp = 0; + } + + pagefault_disable(); + copied = iov_iter_copy_from_user_atomic(page, i, offset, tmp); + pagefault_enable(); + flush_dcache_page(page); + + status = a_ops->write_end(file, mapping, pos, bytes, copied, + page, fsdata); + if (unlikely(status < 0)) + break; + copied = status; + + cond_resched(); + + if (unlikely(copied == 0)) { + /* + * If we were unable to copy any data at all, we must + * fall back to a single segment length write. + * + * If we didn't fallback here, we could livelock + * because not all segments in the iov can be copied at + * once without a pagefault. + */ + bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset, + iov_iter_single_seg_count(i)); + goto again; + } + iov_iter_advance(i, copied); + pos += copied; + written += copied; + + balance_dirty_pages_ratelimited(mapping); + + } while (iov_iter_count(i)); + + return written ? written : status; +} + +ssize_t +generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos, loff_t *ppos, + size_t count, ssize_t written) +{ + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + const struct address_space_operations *a_ops = mapping->a_ops; + struct inode *inode = mapping->host; + ssize_t status; + struct iov_iter i; + + iov_iter_init(&i, iov, nr_segs, count, written); + if (a_ops->write_begin) + status = generic_perform_write(file, &i, pos); + else + status = generic_perform_write_2copy(file, &i, pos); + if (likely(status >= 0)) { + written += status; + *ppos = pos + status; + + /* + * For now, when the user asks for O_SYNC, we'll actually give + * O_DSYNC + */ if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))) { if (!a_ops->writepage || !is_sync_kiocb(iocb)) status = generic_osync_inode(inode, mapping, @@ -2197,7 +2527,6 @@ zero_length_segment: if (unlikely(file->f_flags & O_DIRECT) && written) status = filemap_write_and_wait(mapping); - pagevec_lru_add(&lru_pvec); return written ? written : status; } EXPORT_SYMBOL(generic_file_buffered_write); Index: linux-2.6/mm/filemap.h =================================================================== --- linux-2.6.orig/mm/filemap.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * linux/mm/filemap.h - * - * Copyright (C) 1994-1999 Linus Torvalds - */ - -#ifndef __FILEMAP_H -#define __FILEMAP_H - -#include -#include -#include -#include -#include -#include - -size_t -__filemap_copy_from_user_iovec_inatomic(char *vaddr, - const struct iovec *iov, - size_t base, - size_t bytes); - -/* - * Copy as much as we can into the page and return the number of bytes which - * were sucessfully copied. If a fault is encountered then clear the page - * out to (offset+bytes) and return the number of bytes which were copied. - * - * NOTE: For this to work reliably we really want copy_from_user_inatomic_nocache - * to *NOT* zero any tail of the buffer that it failed to copy. If it does, - * and if the following non-atomic copy succeeds, then there is a small window - * where the target page contains neither the data before the write, nor the - * data after the write (it contains zero). A read at this time will see - * data that is inconsistent with any ordering of the read and the write. - * (This has been detected in practice). - */ -static inline size_t -filemap_copy_from_user(struct page *page, unsigned long offset, - const char __user *buf, unsigned bytes) -{ - char *kaddr; - int left; - - kaddr = kmap_atomic(page, KM_USER0); - left = __copy_from_user_inatomic_nocache(kaddr + offset, buf, bytes); - kunmap_atomic(kaddr, KM_USER0); - - if (left != 0) { - /* Do it the slow way */ - kaddr = kmap(page); - left = __copy_from_user_nocache(kaddr + offset, buf, bytes); - kunmap(page); - } - return bytes - left; -} - -/* - * This has the same sideeffects and return value as filemap_copy_from_user(). - * The difference is that on a fault we need to memset the remainder of the - * page (out to offset+bytes), to emulate filemap_copy_from_user()'s - * single-segment behaviour. - */ -static inline size_t -filemap_copy_from_user_iovec(struct page *page, unsigned long offset, - const struct iovec *iov, size_t base, size_t bytes) -{ - char *kaddr; - size_t copied; - - kaddr = kmap_atomic(page, KM_USER0); - copied = __filemap_copy_from_user_iovec_inatomic(kaddr + offset, iov, - base, bytes); - kunmap_atomic(kaddr, KM_USER0); - if (copied != bytes) { - kaddr = kmap(page); - copied = __filemap_copy_from_user_iovec_inatomic(kaddr + offset, iov, - base, bytes); - if (bytes - copied) - memset(kaddr + offset + copied, 0, bytes - copied); - kunmap(page); - } - return copied; -} - -static inline void -filemap_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes) -{ - const struct iovec *iov = *iovp; - size_t base = *basep; - - do { - int copy = min(bytes, iov->iov_len - base); - - bytes -= copy; - base += copy; - if (iov->iov_len == base) { - iov++; - base = 0; - } - } while (bytes); - *iovp = iov; - *basep = base; -} -#endif Index: linux-2.6/fs/mpage.c =================================================================== --- linux-2.6.orig/fs/mpage.c +++ linux-2.6/fs/mpage.c @@ -389,31 +389,25 @@ mpage_readpages(struct address_space *ma struct bio *bio = NULL; unsigned page_idx; sector_t last_block_in_bio = 0; - struct pagevec lru_pvec; struct buffer_head map_bh; unsigned long first_logical_block = 0; clear_buffer_mapped(&map_bh); - pagevec_init(&lru_pvec, 0); for (page_idx = 0; page_idx < nr_pages; page_idx++) { struct page *page = list_entry(pages->prev, struct page, lru); prefetchw(&page->flags); list_del(&page->lru); - if (!add_to_page_cache(page, mapping, + if (!add_to_page_cache_lru(page, mapping, page->index, GFP_KERNEL)) { bio = do_mpage_readpage(bio, page, nr_pages - page_idx, &last_block_in_bio, &map_bh, &first_logical_block, get_block); - if (!pagevec_add(&lru_pvec, page)) - __pagevec_lru_add(&lru_pvec); - } else { - page_cache_release(page); } + page_cache_release(page); } - pagevec_lru_add(&lru_pvec); BUG_ON(!list_empty(pages)); if (bio) mpage_bio_submit(READ, bio); Index: linux-2.6/mm/readahead.c =================================================================== --- linux-2.6.orig/mm/readahead.c +++ linux-2.6/mm/readahead.c @@ -133,28 +133,25 @@ int read_cache_pages(struct address_spac int (*filler)(void *, struct page *), void *data) { struct page *page; - struct pagevec lru_pvec; int ret = 0; - pagevec_init(&lru_pvec, 0); - while (!list_empty(pages)) { page = list_to_page(pages); list_del(&page->lru); - if (add_to_page_cache(page, mapping, page->index, GFP_KERNEL)) { + if (add_to_page_cache_lru(page, mapping, + page->index, GFP_KERNEL)) { page_cache_release(page); continue; } + page_cache_release(page); + ret = filler(data, page); - if (!pagevec_add(&lru_pvec, page)) - __pagevec_lru_add(&lru_pvec); - if (ret) { + if (unlikely(ret)) { put_pages_list(pages); break; } task_io_account_read(PAGE_CACHE_SIZE); } - pagevec_lru_add(&lru_pvec); return ret; } @@ -164,7 +161,6 @@ static int read_pages(struct address_spa struct list_head *pages, unsigned nr_pages) { unsigned page_idx; - struct pagevec lru_pvec; int ret; if (mapping->a_ops->readpages) { @@ -174,19 +170,15 @@ static int read_pages(struct address_spa goto out; } - pagevec_init(&lru_pvec, 0); for (page_idx = 0; page_idx < nr_pages; page_idx++) { struct page *page = list_to_page(pages); list_del(&page->lru); - if (!add_to_page_cache(page, mapping, + if (!add_to_page_cache_lru(page, mapping, page->index, GFP_KERNEL)) { mapping->a_ops->readpage(filp, page); - if (!pagevec_add(&lru_pvec, page)) - __pagevec_lru_add(&lru_pvec); - } else - page_cache_release(page); + } + page_cache_release(page); } - pagevec_lru_add(&lru_pvec); ret = 0; out: return ret; Index: linux-2.6/mm/filemap_xip.c =================================================================== --- linux-2.6.orig/mm/filemap_xip.c +++ linux-2.6/mm/filemap_xip.c @@ -14,7 +14,6 @@ #include #include #include -#include "filemap.h" /* * We do use our own empty page to avoid interference with other users @@ -312,6 +311,7 @@ __xip_file_write(struct file *filp, cons unsigned long index; unsigned long offset; size_t copied; + char *kaddr; offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ index = pos >> PAGE_CACHE_SHIFT; @@ -319,14 +319,6 @@ __xip_file_write(struct file *filp, cons if (bytes > count) bytes = count; - /* - * Bring in the user page that we will copy from _first_. - * Otherwise there's a nasty deadlock on copying from the - * same page as we're writing to, without it being marked - * up-to-date. - */ - fault_in_pages_readable(buf, bytes); - page = a_ops->get_xip_page(mapping, index*(PAGE_SIZE/512), 0); if (IS_ERR(page) && (PTR_ERR(page) == -ENODATA)) { @@ -343,8 +335,13 @@ __xip_file_write(struct file *filp, cons break; } - copied = filemap_copy_from_user(page, offset, buf, bytes); + fault_in_pages_readable(buf, bytes); + kaddr = kmap_atomic(page, KM_USER0); + copied = bytes - + __copy_from_user_inatomic_nocache(kaddr, buf, bytes); + kunmap_atomic(kaddr, KM_USER0); flush_dcache_page(page); + if (likely(copied > 0)) { status = copied; Index: linux-2.6/include/linux/pagemap.h =================================================================== --- linux-2.6.orig/include/linux/pagemap.h +++ linux-2.6/include/linux/pagemap.h @@ -85,6 +85,8 @@ unsigned find_get_pages_contig(struct ad unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, int tag, unsigned int nr_pages, struct page **pages); +struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index); + /* * Returns locked page at given index in given cache, creating it if needed. */ @@ -196,6 +198,9 @@ static inline int fault_in_pages_writeab { int ret; + if (unlikely(size == 0)) + return 0; + /* * Writing zeroes into userspace here is OK, because we know that if * the zero gets there, we'll be overwriting it. @@ -215,19 +220,23 @@ static inline int fault_in_pages_writeab return ret; } -static inline void fault_in_pages_readable(const char __user *uaddr, int size) +static inline int fault_in_pages_readable(const char __user *uaddr, int size) { volatile char c; int ret; + if (unlikely(size == 0)) + return 0; + ret = __get_user(c, uaddr); if (ret == 0) { const char __user *end = uaddr + size - 1; if (((unsigned long)uaddr & PAGE_MASK) != ((unsigned long)end & PAGE_MASK)) - __get_user(c, end); + ret = __get_user(c, end); } + return ret; } #endif /* _LINUX_PAGEMAP_H */ Index: linux-2.6/include/linux/fs.h =================================================================== --- linux-2.6.orig/include/linux/fs.h +++ linux-2.6/include/linux/fs.h @@ -376,7 +376,7 @@ struct iattr { * trying again. The aop will be taking reasonable * precautions not to livelock. If the caller held a page * reference, it should drop it before retrying. Returned - * by readpage(), prepare_write(), and commit_write(). + * by readpage(). * * address_space_operation functions return these large constants to indicate * special semantics to the caller. These are much larger than the bytes in a @@ -389,6 +389,9 @@ enum positive_aop_returns { AOP_TRUNCATED_PAGE = 0x80001, }; +#define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */ +#define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */ + /* * oh the beauties of C type declarations. */ @@ -396,6 +399,39 @@ struct page; struct address_space; struct writeback_control; +struct iov_iter { + const struct iovec *iov; + unsigned long nr_segs; + size_t iov_offset; + size_t count; +}; + +size_t iov_iter_copy_from_user_atomic(struct page *page, + struct iov_iter *i, unsigned long offset, size_t bytes); +size_t iov_iter_copy_from_user(struct page *page, + struct iov_iter *i, unsigned long offset, size_t bytes); +void iov_iter_advance(struct iov_iter *i, size_t bytes); +int iov_iter_fault_in_readable(struct iov_iter *i); +size_t iov_iter_single_seg_count(struct iov_iter *i); + +static inline void iov_iter_init(struct iov_iter *i, + const struct iovec *iov, unsigned long nr_segs, + size_t count, size_t written) +{ + i->iov = iov; + i->nr_segs = nr_segs; + i->iov_offset = 0; + i->count = count + written; + + iov_iter_advance(i, written); +} + +static inline size_t iov_iter_count(struct iov_iter *i) +{ + return i->count; +} + + struct address_space_operations { int (*writepage)(struct page *page, struct writeback_control *wbc); int (*readpage)(struct file *, struct page *); @@ -416,6 +452,14 @@ struct address_space_operations { */ int (*prepare_write)(struct file *, struct page *, unsigned, unsigned); int (*commit_write)(struct file *, struct page *, unsigned, unsigned); + + int (*write_begin)(struct file *, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata); + int (*write_end)(struct file *, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata); + /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ sector_t (*bmap)(struct address_space *, sector_t); void (*invalidatepage) (struct page *, unsigned long); @@ -430,6 +474,18 @@ struct address_space_operations { int (*launder_page) (struct page *); }; +/* + * pagecache_write_begin/pagecache_write_end must be used by general code + * to write into the pagecache. + */ +int pagecache_write_begin(struct file *, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata); + +int pagecache_write_end(struct file *, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata); + struct backing_dev_info; struct address_space { struct inode *host; /* owner: inode, block_device */ @@ -1869,6 +1925,12 @@ extern int simple_prepare_write(struct f unsigned offset, unsigned to); extern int simple_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to); +extern int simple_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata); +extern int simple_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata); extern struct dentry *simple_lookup(struct inode *, struct dentry *, struct nameidata *); extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); Index: linux-2.6/fs/buffer.c =================================================================== --- linux-2.6.orig/fs/buffer.c +++ linux-2.6/fs/buffer.c @@ -1562,6 +1562,15 @@ void unmap_underlying_metadata(struct bl } EXPORT_SYMBOL(unmap_underlying_metadata); +static void poison_new_buffer(struct page *page, struct buffer_head *bh, int nr, int blocksize) +{ + if (buffer_new(bh) && !buffer_uptodate(bh)) { + void *kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + nr*blocksize, 0x0f, blocksize); + kunmap_atomic(kaddr, KM_USER0); + } +} + /* * NOTE! All mapped/uptodate combinations are valid: * @@ -1592,6 +1601,7 @@ static int __block_write_full_page(struc { int err; sector_t block; + int i = 0; sector_t last_block; struct buffer_head *bh, *head; const unsigned blocksize = 1 << inode->i_blkbits; @@ -1641,6 +1651,7 @@ static int __block_write_full_page(struc err = get_block(inode, block, bh, 1); if (err) goto recover; + poison_new_buffer(page, bh, i, blocksize); if (buffer_new(bh)) { /* blockdev mappings never come here */ clear_buffer_new(bh); @@ -1650,6 +1661,7 @@ static int __block_write_full_page(struc } bh = bh->b_this_page; block++; + i++; } while (bh != head); do { @@ -1756,12 +1768,59 @@ recover: goto done; } +/* + * If a page has any new buffers, zero them out here, and mark them uptodate + * and dirty so they'll be written out (in order to prevent uninitialised + * block data from leaking). And clear the new bit. + */ +void page_zero_new_buffers(struct page *page, unsigned from, unsigned to) +{ + unsigned int block_start, block_end; + struct buffer_head *head, *bh; + + BUG_ON(!PageLocked(page)); + if (!page_has_buffers(page)) + return; + + bh = head = page_buffers(page); + block_start = 0; + do { + block_end = block_start + bh->b_size; + + if (buffer_new(bh)) { + if (block_end > from && block_start < to) { + if (!PageUptodate(page)) { + unsigned start, end; + void *kaddr; + + start = max(from, block_start); + end = min(to, block_end); + + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr+start, 0, end - start); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + set_buffer_uptodate(bh); + } + + clear_buffer_new(bh); + mark_buffer_dirty(bh); + } + } + + block_start = block_end; + bh = bh->b_this_page; + } while (bh != head); +} +EXPORT_SYMBOL(page_zero_new_buffers); + static int __block_prepare_write(struct inode *inode, struct page *page, unsigned from, unsigned to, get_block_t *get_block) { unsigned block_start, block_end; sector_t block; int err = 0; + int i = 0; unsigned blocksize, bbits; struct buffer_head *bh, *head, *wait[2], **wait_bh=wait; @@ -1779,7 +1838,7 @@ static int __block_prepare_write(struct block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); for(bh = head, block_start = 0; bh != head || !block_start; - block++, block_start=block_end, bh = bh->b_this_page) { + block++, i++, block_start=block_end, bh = bh->b_this_page) { block_end = block_start + blocksize; if (block_end <= from || block_start >= to) { if (PageUptodate(page)) { @@ -1795,11 +1854,14 @@ static int __block_prepare_write(struct err = get_block(inode, block, bh, 1); if (err) break; + poison_new_buffer(page, bh, i, blocksize); if (buffer_new(bh)) { unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); if (PageUptodate(page)) { + clear_buffer_new(bh); set_buffer_uptodate(bh); + mark_buffer_dirty(bh); continue; } if (block_end > to || block_start < from) { @@ -1838,43 +1900,8 @@ static int __block_prepare_write(struct if (!buffer_uptodate(*wait_bh)) err = -EIO; } - if (!err) { - bh = head; - do { - if (buffer_new(bh)) - clear_buffer_new(bh); - } while ((bh = bh->b_this_page) != head); - return 0; - } - /* Error case: */ - /* - * Zero out any newly allocated blocks to avoid exposing stale - * data. If BH_New is set, we know that the block was newly - * allocated in the above loop. - */ - bh = head; - block_start = 0; - do { - block_end = block_start+blocksize; - if (block_end <= from) - goto next_bh; - if (block_start >= to) - break; - if (buffer_new(bh)) { - void *kaddr; - - clear_buffer_new(bh); - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr+block_start, 0, bh->b_size); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - set_buffer_uptodate(bh); - mark_buffer_dirty(bh); - } -next_bh: - block_start = block_end; - bh = bh->b_this_page; - } while (bh != head); + if (unlikely(err)) + page_zero_new_buffers(page, from, to); return err; } @@ -1899,6 +1926,7 @@ static int __block_commit_write(struct i set_buffer_uptodate(bh); mark_buffer_dirty(bh); } + clear_buffer_new(bh); } /* @@ -1913,6 +1941,126 @@ static int __block_commit_write(struct i } /* + * block_write_begin takes care of the basic task of block allocation and + * bringing partial write blocks uptodate first. + * + * If *pagep is not NULL, then block_write_begin uses the locked page + * at *pagep rather than allocating its own. In this case, the page will + * not be unlocked or deallocated on failure. + */ +int block_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata, + get_block_t *get_block) +{ + struct inode *inode = mapping->host; + int status = 0; + struct page *page; + pgoff_t index; + unsigned start, end; + int ownpage = 0; + + index = pos >> PAGE_CACHE_SHIFT; + start = pos & (PAGE_CACHE_SIZE - 1); + end = start + len; + + page = *pagep; + if (page == NULL) { + ownpage = 1; + page = __grab_cache_page(mapping, index); + if (!page) { + status = -ENOMEM; + goto out; + } + *pagep = page; + } else + BUG_ON(!PageLocked(page)); + + status = __block_prepare_write(inode, page, start, end, get_block); + if (unlikely(status)) { + ClearPageUptodate(page); + + if (ownpage) { + unlock_page(page); + page_cache_release(page); + + /* + * prepare_write() may have instantiated a few blocks + * outside i_size. Trim these off again. Don't need + * i_size_read because we hold i_mutex. + */ + if (pos + len > inode->i_size) + vmtruncate(inode, inode->i_size); + } + goto out; + } + +out: + return status; +} +EXPORT_SYMBOL(block_write_begin); + +int block_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + struct inode *inode = mapping->host; + unsigned start; + + start = pos & (PAGE_CACHE_SIZE - 1); + + if (unlikely(copied < len)) { + /* + * The buffers that were written will now be uptodate, so we + * don't have to worry about a readpage reading them and + * overwriting a partial write. However if we have encountered + * a short write and only partially written into a buffer, it + * will not be marked uptodate, so a readpage might come in and + * destroy our partial write. + * + * Do the simplest thing, and just treat any short write to a + * non uptodate page as a zero-length write, and force the + * caller to redo the whole thing. + */ + if (!PageUptodate(page)) + copied = 0; + + page_zero_new_buffers(page, start+copied, start+len); + } + flush_dcache_page(page); + + /* This could be a short (even 0-length) commit */ + __block_commit_write(inode, page, start, start+copied); + + return copied; +} +EXPORT_SYMBOL(block_write_end); + +int generic_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + struct inode *inode = mapping->host; + copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); + + unlock_page(page); + mark_page_accessed(page); /* XXX: put this in caller? */ + page_cache_release(page); + + /* + * No need to use i_size_read() here, the i_size + * cannot change under us because we hold i_mutex. + */ + if (pos+copied > inode->i_size) { + i_size_write(inode, pos+copied); + mark_inode_dirty(inode); + } + + return copied; +} +EXPORT_SYMBOL(generic_write_end); + +/* * Generic "read page" function for block devices that have the normal * get_block functionality. This is most of the block device filesystems. * Reads the page asynchronously --- the unlock_buffer() and @@ -1953,6 +2101,7 @@ int block_read_full_page(struct page *pa err = get_block(inode, iblock, bh, 0); if (err) SetPageError(page); + poison_new_buffer(page, bh, i, blocksize); } if (!buffer_mapped(bh)) { void *kaddr = kmap_atomic(page, KM_USER0); @@ -2010,14 +2159,14 @@ int block_read_full_page(struct page *pa } /* utility function for filesystems that need to do work on expanding - * truncates. Uses prepare/commit_write to allow the filesystem to + * truncates. Uses filesystem pagecache writes to allow the filesystem to * deal with the hole. */ -static int __generic_cont_expand(struct inode *inode, loff_t size, - pgoff_t index, unsigned int offset) +int generic_cont_expand_simple(struct inode *inode, loff_t size) { struct address_space *mapping = inode->i_mapping; struct page *page; + void *fsdata; unsigned long limit; int err; @@ -2030,146 +2179,141 @@ static int __generic_cont_expand(struct if (size > inode->i_sb->s_maxbytes) goto out; - err = -ENOMEM; - page = grab_cache_page(mapping, index); - if (!page) - goto out; - err = mapping->a_ops->prepare_write(NULL, page, offset, offset); - if (err) { - /* - * ->prepare_write() may have instantiated a few blocks - * outside i_size. Trim these off again. - */ - unlock_page(page); - page_cache_release(page); - vmtruncate(inode, inode->i_size); + err = pagecache_write_begin(NULL, mapping, size, 0, + AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND, + &page, &fsdata); + if (err) goto out; - } - err = mapping->a_ops->commit_write(NULL, page, offset, offset); + err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata); + BUG_ON(err > 0); - unlock_page(page); - page_cache_release(page); - if (err > 0) - err = 0; out: return err; } int generic_cont_expand(struct inode *inode, loff_t size) { - pgoff_t index; unsigned int offset; offset = (size & (PAGE_CACHE_SIZE - 1)); /* Within page */ /* ugh. in prepare/commit_write, if from==to==start of block, we - ** skip the prepare. make sure we never send an offset for the start - ** of a block - */ + * skip the prepare. make sure we never send an offset for the start + * of a block. + * XXX: actually, this should be handled in those filesystems by + * checking for the AOP_FLAG_CONT_EXPAND flag. + */ if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) { /* caller must handle this extra byte. */ - offset++; + size++; } - index = size >> PAGE_CACHE_SHIFT; - - return __generic_cont_expand(inode, size, index, offset); + return generic_cont_expand_simple(inode, size); } -int generic_cont_expand_simple(struct inode *inode, loff_t size) +int cont_expand_zero(struct file *file, struct address_space *mapping, + loff_t pos, loff_t *bytes) { - loff_t pos = size - 1; - pgoff_t index = pos >> PAGE_CACHE_SHIFT; - unsigned int offset = (pos & (PAGE_CACHE_SIZE - 1)) + 1; - - /* prepare/commit_write can handle even if from==to==start of block. */ - return __generic_cont_expand(inode, size, index, offset); -} - -/* - * For moronic filesystems that do not allow holes in file. - * We may have to extend the file. - */ - -int cont_prepare_write(struct page *page, unsigned offset, - unsigned to, get_block_t *get_block, loff_t *bytes) -{ - struct address_space *mapping = page->mapping; struct inode *inode = mapping->host; - struct page *new_page; - pgoff_t pgpos; - long status; - unsigned zerofrom; unsigned blocksize = 1 << inode->i_blkbits; + struct page *page; + void *fsdata; + pgoff_t index, curidx; + loff_t curpos; + unsigned zerofrom, offset, len; void *kaddr; + int err = 0; - while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) { - status = -ENOMEM; - new_page = grab_cache_page(mapping, pgpos); - if (!new_page) - goto out; - /* we might sleep */ - if (*bytes>>PAGE_CACHE_SHIFT != pgpos) { - unlock_page(new_page); - page_cache_release(new_page); - continue; - } - zerofrom = *bytes & ~PAGE_CACHE_MASK; + index = pos >> PAGE_CACHE_SHIFT; + offset = pos & ~PAGE_CACHE_MASK; + + while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) { + zerofrom = curpos & ~PAGE_CACHE_MASK; if (zerofrom & (blocksize-1)) { *bytes |= (blocksize-1); (*bytes)++; } - status = __block_prepare_write(inode, new_page, zerofrom, - PAGE_CACHE_SIZE, get_block); - if (status) - goto out_unmap; - kaddr = kmap_atomic(new_page, KM_USER0); - memset(kaddr+zerofrom, 0, PAGE_CACHE_SIZE-zerofrom); - flush_dcache_page(new_page); + len = PAGE_CACHE_SIZE - zerofrom; + + err = pagecache_write_begin(file, mapping, curpos, len, + AOP_FLAG_UNINTERRUPTIBLE, + &page, &fsdata); + if (err) + goto out; + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr+zerofrom, 0, len); + flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); - generic_commit_write(NULL, new_page, zerofrom, PAGE_CACHE_SIZE); - unlock_page(new_page); - page_cache_release(new_page); + err = pagecache_write_end(file, mapping, curpos, len, len, + page, fsdata); + if (err < 0) + goto out; + BUG_ON(err != len); + err = 0; } - if (page->index < pgpos) { - /* completely inside the area */ - zerofrom = offset; - } else { - /* page covers the boundary, find the boundary offset */ - zerofrom = *bytes & ~PAGE_CACHE_MASK; - + /* page covers the boundary, find the boundary offset */ + if (index == curidx) { + zerofrom = curpos & ~PAGE_CACHE_MASK; /* if we will expand the thing last block will be filled */ - if (to > zerofrom && (zerofrom & (blocksize-1))) { + if (offset <= zerofrom) { + goto out; + } + if (zerofrom & (blocksize-1)) { *bytes |= (blocksize-1); (*bytes)++; } + len = offset - zerofrom; - /* starting below the boundary? Nothing to zero out */ - if (offset <= zerofrom) - zerofrom = offset; - } - status = __block_prepare_write(inode, page, zerofrom, to, get_block); - if (status) - goto out1; - if (zerofrom < offset) { + err = pagecache_write_begin(file, mapping, curpos, len, + AOP_FLAG_UNINTERRUPTIBLE, + &page, &fsdata); + if (err) + goto out; kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr+zerofrom, 0, offset-zerofrom); + memset(kaddr+zerofrom, 0, len); flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); - __block_commit_write(inode, page, zerofrom, offset); + err = pagecache_write_end(file, mapping, curpos, len, len, + page, fsdata); + if (err < 0) + goto out; + BUG_ON(err != len); + err = 0; } - return 0; -out1: - ClearPageUptodate(page); - return status; +out: + return err; +} + +/* + * For moronic filesystems that do not allow holes in file. + * We may have to extend the file. + */ +int cont_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata, + get_block_t *get_block, loff_t *bytes) +{ + struct inode *inode = mapping->host; + unsigned blocksize = 1 << inode->i_blkbits; + unsigned zerofrom; + int err; + + err = cont_expand_zero(file, mapping, pos, bytes); + if (err) + goto out; -out_unmap: - ClearPageUptodate(new_page); - unlock_page(new_page); - page_cache_release(new_page); + zerofrom = *bytes & ~PAGE_CACHE_MASK; + if (pos+len > *bytes && zerofrom & (blocksize-1)) { + *bytes |= (blocksize-1); + (*bytes)++; + } + + *pagep = NULL; + err = block_write_begin(file, mapping, pos, len, + flags, pagep, fsdata, get_block); out: - return status; + return err; } int block_prepare_write(struct page *page, unsigned from, unsigned to, @@ -2275,6 +2419,7 @@ int nobh_prepare_write(struct page *page &map_bh, create); if (ret) goto failed; + poison_new_buffer(page, &map_bh, block_in_page, blocksize); if (!buffer_mapped(&map_bh)) is_mapped_to_disk = 0; if (buffer_new(&map_bh)) @@ -2490,7 +2635,7 @@ int block_truncate_page(struct address_s pgoff_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); unsigned blocksize; - sector_t iblock; + sector_t iblock, i; unsigned length, pos; struct inode *inode = mapping->host; struct page *page; @@ -2519,8 +2664,10 @@ int block_truncate_page(struct address_s /* Find the buffer that contains "offset" */ bh = page_buffers(page); pos = blocksize; + i = 0; while (offset >= pos) { bh = bh->b_this_page; + i++; iblock++; pos += blocksize; } @@ -2531,6 +2678,7 @@ int block_truncate_page(struct address_s err = get_block(inode, iblock, bh, 0); if (err) goto unlock; + poison_new_buffer(page, bh, i, blocksize); /* unmapped? It's a hole - nothing to do */ if (!buffer_mapped(bh)) goto unlock; @@ -3027,7 +3175,7 @@ EXPORT_SYMBOL(block_read_full_page); EXPORT_SYMBOL(block_sync_page); EXPORT_SYMBOL(block_truncate_page); EXPORT_SYMBOL(block_write_full_page); -EXPORT_SYMBOL(cont_prepare_write); +EXPORT_SYMBOL(cont_write_begin); EXPORT_SYMBOL(end_buffer_read_sync); EXPORT_SYMBOL(end_buffer_write_sync); EXPORT_SYMBOL(file_fsync); Index: linux-2.6/include/linux/buffer_head.h =================================================================== --- linux-2.6.orig/include/linux/buffer_head.h +++ linux-2.6/include/linux/buffer_head.h @@ -202,9 +202,20 @@ void block_invalidatepage(struct page *p int block_write_full_page(struct page *page, get_block_t *get_block, struct writeback_control *wbc); int block_read_full_page(struct page*, get_block_t*); +int block_write_begin(struct file *, struct address_space *, + loff_t, unsigned, unsigned, + struct page **, void **, get_block_t*); +int block_write_end(struct file *, struct address_space *, + loff_t, unsigned, unsigned, + struct page *, void *); +int generic_write_end(struct file *, struct address_space *, + loff_t, unsigned, unsigned, + struct page *, void *); +void page_zero_new_buffers(struct page *page, unsigned from, unsigned to); int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*); -int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*, - loff_t *); +int cont_write_begin(struct file *, struct address_space *, loff_t, + unsigned, unsigned, struct page **, void **, + get_block_t *, loff_t *); int generic_cont_expand(struct inode *inode, loff_t size); int generic_cont_expand_simple(struct inode *inode, loff_t size); int block_commit_write(struct page *page, unsigned from, unsigned to); Index: linux-2.6/fs/libfs.c =================================================================== --- linux-2.6.orig/fs/libfs.c +++ linux-2.6/fs/libfs.c @@ -342,6 +342,26 @@ int simple_prepare_write(struct file *fi return 0; } +int simple_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + struct page *page; + pgoff_t index; + unsigned from; + + index = pos >> PAGE_CACHE_SHIFT; + from = pos & (PAGE_CACHE_SIZE - 1); + + page = __grab_cache_page(mapping, index); + if (!page) + return -ENOMEM; + + *pagep = page; + + return simple_prepare_write(file, page, from, from+len); +} + int simple_commit_write(struct file *file, struct page *page, unsigned from, unsigned to) { @@ -360,6 +380,28 @@ int simple_commit_write(struct file *fil return 0; } +int simple_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + unsigned from = pos & (PAGE_CACHE_SIZE - 1); + + /* zero the stale part of the page if we did a short copy */ + if (copied < len) { + void *kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + from + copied, 0, len - copied); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + } + + simple_commit_write(file, page, from, from+copied); + + unlock_page(page); + page_cache_release(page); + + return copied; +} + int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files) { struct inode *inode; @@ -616,6 +658,8 @@ EXPORT_SYMBOL(dcache_dir_open); EXPORT_SYMBOL(dcache_readdir); EXPORT_SYMBOL(generic_read_dir); EXPORT_SYMBOL(get_sb_pseudo); +EXPORT_SYMBOL(simple_write_begin); +EXPORT_SYMBOL(simple_write_end); EXPORT_SYMBOL(simple_commit_write); EXPORT_SYMBOL(simple_dir_inode_operations); EXPORT_SYMBOL(simple_dir_operations); Index: linux-2.6/drivers/block/loop.c =================================================================== --- linux-2.6.orig/drivers/block/loop.c +++ linux-2.6/drivers/block/loop.c @@ -203,14 +203,13 @@ lo_do_transfer(struct loop_device *lo, i * do_lo_send_aops - helper for writing data to a loop device * * This is the fast version for backing filesystems which implement the address - * space operations prepare_write and commit_write. + * space operations write_begin and write_end. */ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, - int bsize, loff_t pos, struct page *page) + int bsize, loff_t pos, struct page *unused) { struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */ struct address_space *mapping = file->f_mapping; - const struct address_space_operations *aops = mapping->a_ops; pgoff_t index; unsigned offset, bv_offs; int len, ret; @@ -222,67 +221,47 @@ static int do_lo_send_aops(struct loop_d len = bvec->bv_len; while (len > 0) { sector_t IV; - unsigned size; + unsigned size, copied; int transfer_result; + struct page *page; + void *fsdata; IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9); size = PAGE_CACHE_SIZE - offset; if (size > len) size = len; - page = grab_cache_page(mapping, index); - if (unlikely(!page)) + + ret = pagecache_write_begin(file, mapping, pos, size, 0, + &page, &fsdata); + if (ret) goto fail; - ret = aops->prepare_write(file, page, offset, - offset + size); - if (unlikely(ret)) { - if (ret == AOP_TRUNCATED_PAGE) { - page_cache_release(page); - continue; - } - goto unlock; - } + transfer_result = lo_do_transfer(lo, WRITE, page, offset, bvec->bv_page, bv_offs, size, IV); - if (unlikely(transfer_result)) { - char *kaddr; + copied = size; + if (unlikely(transfer_result)) + copied = 0; + + ret = pagecache_write_end(file, mapping, pos, size, copied, + page, fsdata); + if (ret < 0) + goto fail; + if (ret < copied) + copied = ret; - /* - * The transfer failed, but we still write the data to - * keep prepare/commit calls balanced. - */ - printk(KERN_ERR "loop: transfer error block %llu\n", - (unsigned long long)index); - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, size); - kunmap_atomic(kaddr, KM_USER0); - } - flush_dcache_page(page); - ret = aops->commit_write(file, page, offset, - offset + size); - if (unlikely(ret)) { - if (ret == AOP_TRUNCATED_PAGE) { - page_cache_release(page); - continue; - } - goto unlock; - } if (unlikely(transfer_result)) - goto unlock; - bv_offs += size; - len -= size; + goto fail; + + bv_offs += copied; + len -= copied; offset = 0; index++; - pos += size; - unlock_page(page); - page_cache_release(page); + pos += copied; } ret = 0; out: mutex_unlock(&mapping->host->i_mutex); return ret; -unlock: - unlock_page(page); - page_cache_release(page); fail: ret = -1; goto out; @@ -316,7 +295,7 @@ static int __do_lo_send_write(struct fil * do_lo_send_direct_write - helper for writing data to a loop device * * This is the fast, non-transforming version for backing filesystems which do - * not implement the address space operations prepare_write and commit_write. + * not implement the address space operations write_begin and write_end. * It uses the write file operation which should be present on all writeable * filesystems. */ @@ -335,7 +314,7 @@ static int do_lo_send_direct_write(struc * do_lo_send_write - helper for writing data to a loop device * * This is the slow, transforming version for filesystems which do not - * implement the address space operations prepare_write and commit_write. It + * implement the address space operations write_begin and write_end. It * uses the write file operation which should be present on all writeable * filesystems. * @@ -767,7 +746,7 @@ static int loop_set_fd(struct loop_devic */ if (!file->f_op->sendfile) goto out_putf; - if (aops->prepare_write && aops->commit_write) + if (aops->prepare_write || aops->write_begin) lo_flags |= LO_FLAGS_USE_AOPS; if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write) lo_flags |= LO_FLAGS_READ_ONLY; Index: linux-2.6/fs/namei.c =================================================================== --- linux-2.6.orig/fs/namei.c +++ linux-2.6/fs/namei.c @@ -2688,53 +2688,30 @@ int __page_symlink(struct inode *inode, { struct address_space *mapping = inode->i_mapping; struct page *page; + void *fsdata; int err; char *kaddr; retry: - err = -ENOMEM; - page = find_or_create_page(mapping, 0, gfp_mask); - if (!page) - goto fail; - err = mapping->a_ops->prepare_write(NULL, page, 0, len-1); - if (err == AOP_TRUNCATED_PAGE) { - page_cache_release(page); - goto retry; - } + err = pagecache_write_begin(NULL, mapping, 0, PAGE_CACHE_SIZE, + AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); if (err) - goto fail_map; + goto fail; + kaddr = kmap_atomic(page, KM_USER0); memcpy(kaddr, symname, len-1); + memset(kaddr+len-1, 0, PAGE_CACHE_SIZE-(len-1)); kunmap_atomic(kaddr, KM_USER0); - err = mapping->a_ops->commit_write(NULL, page, 0, len-1); - if (err == AOP_TRUNCATED_PAGE) { - page_cache_release(page); - goto retry; - } - if (err) - goto fail_map; - /* - * Notice that we are _not_ going to block here - end of page is - * unmapped, so this will only try to map the rest of page, see - * that it is unmapped (typically even will not look into inode - - * ->i_size will be enough for everything) and zero it out. - * OTOH it's obviously correct and should make the page up-to-date. - */ - if (!PageUptodate(page)) { - err = mapping->a_ops->readpage(NULL, page); - if (err != AOP_TRUNCATED_PAGE) - wait_on_page_locked(page); - } else { - unlock_page(page); - } - page_cache_release(page); + + err = pagecache_write_end(NULL, mapping, 0, PAGE_CACHE_SIZE, PAGE_CACHE_SIZE, + page, fsdata); if (err < 0) goto fail; + if (err < PAGE_CACHE_SIZE) + goto retry; + mark_inode_dirty(inode); return 0; -fail_map: - unlock_page(page); - page_cache_release(page); fail: return err; } Index: linux-2.6/fs/splice.c =================================================================== --- linux-2.6.orig/fs/splice.c +++ linux-2.6/fs/splice.c @@ -559,7 +559,7 @@ static int pipe_to_file(struct pipe_inod struct address_space *mapping = file->f_mapping; unsigned int offset, this_len; struct page *page; - pgoff_t index; + void *fsdata; int ret; /* @@ -569,49 +569,16 @@ static int pipe_to_file(struct pipe_inod if (unlikely(ret)) return ret; - index = sd->pos >> PAGE_CACHE_SHIFT; offset = sd->pos & ~PAGE_CACHE_MASK; this_len = sd->len; if (this_len + offset > PAGE_CACHE_SIZE) this_len = PAGE_CACHE_SIZE - offset; -find_page: - page = find_lock_page(mapping, index); - if (!page) { - ret = -ENOMEM; - page = page_cache_alloc_cold(mapping); - if (unlikely(!page)) - goto out_ret; - - /* - * This will also lock the page - */ - ret = add_to_page_cache_lru(page, mapping, index, - GFP_KERNEL); - if (unlikely(ret)) - goto out; - } - - ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len); - if (unlikely(ret)) { - loff_t isize = i_size_read(mapping->host); - - if (ret != AOP_TRUNCATED_PAGE) - unlock_page(page); - page_cache_release(page); - if (ret == AOP_TRUNCATED_PAGE) - goto find_page; - - /* - * prepare_write() may have instantiated a few blocks - * outside i_size. Trim these off again. - */ - if (sd->pos + this_len > isize) - vmtruncate(mapping->host, isize); - - goto out_ret; - } + ret = pagecache_write_begin(file, mapping, sd->pos, sd->len, + AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); + if (unlikely(ret)) + goto out; if (buf->page != page) { /* @@ -621,35 +588,14 @@ find_page: char *dst = kmap_atomic(page, KM_USER1); memcpy(dst + offset, src + buf->offset, this_len); - flush_dcache_page(page); kunmap_atomic(dst, KM_USER1); buf->ops->unmap(pipe, buf, src); } - ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len); - if (ret) { - if (ret == AOP_TRUNCATED_PAGE) { - page_cache_release(page); - goto find_page; - } - if (ret < 0) - goto out; - /* - * Partial write has happened, so 'ret' already initialized by - * number of bytes written, Where is nothing we have to do here. - */ - } else - ret = this_len; - /* - * Return the number of bytes written and mark page as - * accessed, we are now done! - */ - mark_page_accessed(page); - balance_dirty_pages_ratelimited(mapping); + ret = pagecache_write_end(file, mapping, sd->pos, sd->len, sd->len, page, fsdata); + out: - page_cache_release(page); - unlock_page(page); -out_ret: + return ret; } Index: linux-2.6/Documentation/filesystems/Locking =================================================================== --- linux-2.6.orig/Documentation/filesystems/Locking +++ linux-2.6/Documentation/filesystems/Locking @@ -176,15 +176,18 @@ prototypes: locking rules: All except set_page_dirty may block - BKL PageLocked(page) + BKL PageLocked(page) i_sem writepage: no yes, unlocks (see below) readpage: no yes, unlocks sync_page: no maybe writepages: no set_page_dirty no no readpages: no -prepare_write: no yes -commit_write: no yes +prepare_write: no yes yes +commit_write: no yes yes +write_begin: no locks the page yes +write_end: no yes, unlocks yes +perform_write: no n/a yes bmap: yes invalidatepage: no yes releasepage: no yes Index: linux-2.6/Documentation/filesystems/vfs.txt =================================================================== --- linux-2.6.orig/Documentation/filesystems/vfs.txt +++ linux-2.6/Documentation/filesystems/vfs.txt @@ -534,6 +534,14 @@ struct address_space_operations { struct list_head *pages, unsigned nr_pages); int (*prepare_write)(struct file *, struct page *, unsigned, unsigned); int (*commit_write)(struct file *, struct page *, unsigned, unsigned); + int (*write_begin)(struct file *, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata); + int (*write_end)(struct file *, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata); + ssize_t (*perform_write)(struct file *, struct address_space *mapping, + struct iov_iter *i, loff_t pos); sector_t (*bmap)(struct address_space *, sector_t); int (*invalidatepage) (struct page *, unsigned long); int (*releasepage) (struct page *, int); @@ -611,11 +619,7 @@ struct address_space_operations { any basic-blocks on storage, then those blocks should be pre-read (if they haven't been read already) so that the updated blocks can be written out properly. - The page will be locked. If prepare_write wants to unlock the - page it, like readpage, may do so and return - AOP_TRUNCATED_PAGE. - In this case the prepare_write will be retried one the lock is - regained. + The page will be locked. Note: the page _must not_ be marked uptodate in this function (or anywhere else) unless it actually is uptodate right now. As @@ -629,6 +633,46 @@ struct address_space_operations { operations. It should avoid returning an error if possible - errors should have been handled by prepare_write. + write_begin: This is intended as a replacement for prepare_write. Called + by the generic buffered write code to ask the filesystem to prepare + to write len bytes at the given offset in the file. flags is a field + for AOP_FLAG_xxx flags, described in include/linux/mm.h. + + The filesystem must return the locked pagecache page for the caller + to write into. + + A void * may be returned in fsdata, which then gets passed into + write_end. + + Returns < 0 on failure, in which case all cleanup must be done and + write_end not called. 0 on success, in which case write_end must + be called. + + write_end: After a successful write_begin, and data copy, write_end must + be called. len is the original len passed to write_begin, and copied + is the amount that was able to be copied (they must be equal if + write_begin was called with intr == 0). + + The filesystem must take care of unlocking the page and dropping its + refcount, and updating i_size. + + Returns < 0 on failure, otherwise the number of bytes (<= 'copied') + that were able to be copied into pagecache. + + perform_write: This is a single-call, bulk version of write_begin/write_end + operations. It is only used in the buffered write path (write_begin + must still be implemented), and not for in-kernel writes to pagecache. + It takes an iov_iter structure, which provides a descriptor for the + source data (and has associated iov_iter_xxx helpers to operate on + that data). There are also file, mapping, and pos arguments, which + specify the destination of the data. + + Returns < 0 on failure if nothing was written out, otherwise returns + the number of bytes copied into pagecache. + + fs/libfs.c provides a reasonable template to start with, demonstrating + iov_iter routines, and iteration over the destination pagecache. + bmap: called by the VFS to map a logical block offset within object to physical block number. This method is used by the FIBMAP ioctl and for working with swap-files. To be able to swap to Index: linux-2.6/mm/shmem.c =================================================================== --- linux-2.6.orig/mm/shmem.c +++ linux-2.6/mm/shmem.c @@ -1098,7 +1098,7 @@ static int shmem_getpage(struct inode *i * Normally, filepage is NULL on entry, and either found * uptodate immediately, or allocated and zeroed, or read * in under swappage, which is then assigned to filepage. - * But shmem_prepare_write passes in a locked filepage, + * But shmem_write_begin passes in a locked filepage, * which may be found not uptodate by other callers too, * and may need to be copied from the swappage read in. */ @@ -1483,14 +1483,35 @@ static const struct inode_operations shm static const struct inode_operations shmem_symlink_inline_operations; /* - * Normally tmpfs makes no use of shmem_prepare_write, but it + * Normally tmpfs makes no use of shmem_write_begin, but it * lets a tmpfs file be used read-write below the loop driver. */ static int -shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to) +shmem_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + struct inode *inode = mapping->host; + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + *pagep = NULL; + return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL); +} + +static int +shmem_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { - struct inode *inode = page->mapping->host; - return shmem_getpage(inode, page->index, &page, SGP_WRITE, NULL); + struct inode *inode = mapping->host; + + set_page_dirty(page); + mark_page_accessed(page); + page_cache_release(page); + + if (pos+copied > inode->i_size) + i_size_write(inode, pos+copied); + + return copied; } static ssize_t @@ -2387,8 +2408,8 @@ static const struct address_space_operat .writepage = shmem_writepage, .set_page_dirty = __set_page_dirty_no_writeback, #ifdef CONFIG_TMPFS - .prepare_write = shmem_prepare_write, - .commit_write = simple_commit_write, + .write_begin = shmem_write_begin, + .write_end = shmem_write_end, #endif .migratepage = migrate_page, }; Index: linux-2.6/fs/configfs/inode.c =================================================================== --- linux-2.6.orig/fs/configfs/inode.c +++ linux-2.6/fs/configfs/inode.c @@ -40,8 +40,8 @@ extern struct super_block * configfs_sb; static const struct address_space_operations configfs_aops = { .readpage = simple_readpage, - .prepare_write = simple_prepare_write, - .commit_write = simple_commit_write + .write_begin = simple_write_begin, + .write_end = simple_write_end, }; static struct backing_dev_info configfs_backing_dev_info = { Index: linux-2.6/fs/sysfs/inode.c =================================================================== --- linux-2.6.orig/fs/sysfs/inode.c +++ linux-2.6/fs/sysfs/inode.c @@ -20,8 +20,8 @@ extern struct super_block * sysfs_sb; static const struct address_space_operations sysfs_aops = { .readpage = simple_readpage, - .prepare_write = simple_prepare_write, - .commit_write = simple_commit_write + .write_begin = simple_write_begin, + .write_end = simple_write_end, }; static struct backing_dev_info sysfs_backing_dev_info = { Index: linux-2.6/fs/ramfs/file-mmu.c =================================================================== --- linux-2.6.orig/fs/ramfs/file-mmu.c +++ linux-2.6/fs/ramfs/file-mmu.c @@ -29,8 +29,8 @@ const struct address_space_operations ramfs_aops = { .readpage = simple_readpage, - .prepare_write = simple_prepare_write, - .commit_write = simple_commit_write, + .write_begin = simple_write_begin, + .write_end = simple_write_end, .set_page_dirty = __set_page_dirty_no_writeback, }; Index: linux-2.6/fs/ramfs/file-nommu.c =================================================================== --- linux-2.6.orig/fs/ramfs/file-nommu.c +++ linux-2.6/fs/ramfs/file-nommu.c @@ -30,8 +30,8 @@ static int ramfs_nommu_setattr(struct de const struct address_space_operations ramfs_aops = { .readpage = simple_readpage, - .prepare_write = simple_prepare_write, - .commit_write = simple_commit_write, + .write_begin = simple_write_begin, + .write_end = simple_write_end, .set_page_dirty = __set_page_dirty_no_writeback, }; Index: linux-2.6/fs/hugetlbfs/inode.c =================================================================== --- linux-2.6.orig/fs/hugetlbfs/inode.c +++ linux-2.6/fs/hugetlbfs/inode.c @@ -162,15 +162,19 @@ static int hugetlbfs_readpage(struct fil return -EINVAL; } -static int hugetlbfs_prepare_write(struct file *file, - struct page *page, unsigned offset, unsigned to) +static int hugetlbfs_write_begin(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { return -EINVAL; } -static int hugetlbfs_commit_write(struct file *file, - struct page *page, unsigned offset, unsigned to) +static int hugetlbfs_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { + BUG(); return -EINVAL; } @@ -542,8 +546,8 @@ static void hugetlbfs_destroy_inode(stru static const struct address_space_operations hugetlbfs_aops = { .readpage = hugetlbfs_readpage, - .prepare_write = hugetlbfs_prepare_write, - .commit_write = hugetlbfs_commit_write, + .write_begin = hugetlbfs_write_begin, + .write_end = hugetlbfs_write_end, .set_page_dirty = hugetlbfs_set_page_dirty, }; Index: linux-2.6/fs/block_dev.c =================================================================== --- linux-2.6.orig/fs/block_dev.c +++ linux-2.6/fs/block_dev.c @@ -375,14 +375,26 @@ static int blkdev_readpage(struct file * return block_read_full_page(page, blkdev_get_block); } -static int blkdev_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) -{ - return block_prepare_write(page, from, to, blkdev_get_block); +static int blkdev_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + *pagep = NULL; + return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + blkdev_get_block); } -static int blkdev_commit_write(struct file *file, struct page *page, unsigned from, unsigned to) +static int blkdev_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { - return block_commit_write(page, from, to); + int ret; + ret = block_write_end(file, mapping, pos, len, copied, page, fsdata); + + unlock_page(page); + page_cache_release(page); + + return ret; } /* @@ -1327,8 +1339,8 @@ const struct address_space_operations de .readpage = blkdev_readpage, .writepage = blkdev_writepage, .sync_page = block_sync_page, - .prepare_write = blkdev_prepare_write, - .commit_write = blkdev_commit_write, + .write_begin = blkdev_write_begin, + .write_end = blkdev_write_end, .writepages = generic_writepages, .direct_IO = blkdev_direct_IO, }; Index: linux-2.6/drivers/block/rd.c =================================================================== --- linux-2.6.orig/drivers/block/rd.c +++ linux-2.6/drivers/block/rd.c @@ -104,50 +104,60 @@ static void make_page_uptodate(struct pa struct buffer_head *head = bh; do { - if (!buffer_uptodate(bh)) { - memset(bh->b_data, 0, bh->b_size); - /* - * akpm: I'm totally undecided about this. The - * buffer has just been magically brought "up to - * date", but nobody should want to be reading - * it anyway, because it hasn't been used for - * anything yet. It is still in a "not read - * from disk yet" state. - * - * But non-uptodate buffers against an uptodate - * page are against the rules. So do it anyway. - */ + if (!buffer_uptodate(bh)) set_buffer_uptodate(bh); - } } while ((bh = bh->b_this_page) != head); - } else { - memset(page_address(page), 0, PAGE_CACHE_SIZE); } - flush_dcache_page(page); SetPageUptodate(page); } static int ramdisk_readpage(struct file *file, struct page *page) { - if (!PageUptodate(page)) + if (!PageUptodate(page)) { + memclear_highpage_flush(page, 0, PAGE_CACHE_SIZE); make_page_uptodate(page); + } unlock_page(page); return 0; } -static int ramdisk_prepare_write(struct file *file, struct page *page, - unsigned offset, unsigned to) -{ - if (!PageUptodate(page)) - make_page_uptodate(page); +static int ramdisk_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + struct page *page; + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + + page = __grab_cache_page(mapping, index); + if (!page) + return -ENOMEM; + *pagep = page; return 0; } -static int ramdisk_commit_write(struct file *file, struct page *page, - unsigned offset, unsigned to) -{ +static int ramdisk_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + if (!PageUptodate(page)) { + if (copied != PAGE_CACHE_SIZE) { + void *dst; + unsigned from = pos & (PAGE_CACHE_SIZE - 1); + unsigned to = from + copied; + + dst = kmap_atomic(page, KM_USER0); + memset(dst, 0, from); + memset(dst + to, 0, PAGE_CACHE_SIZE - to); + flush_dcache_page(page); + kunmap_atomic(dst, KM_USER0); + } + make_page_uptodate(page); + } + set_page_dirty(page); - return 0; + unlock_page(page); + page_cache_release(page); + return copied; } /* @@ -191,8 +201,8 @@ static int ramdisk_set_page_dirty(struct static const struct address_space_operations ramdisk_aops = { .readpage = ramdisk_readpage, - .prepare_write = ramdisk_prepare_write, - .commit_write = ramdisk_commit_write, + .write_begin = ramdisk_write_begin, + .write_end = ramdisk_write_end, .writepage = ramdisk_writepage, .set_page_dirty = ramdisk_set_page_dirty, .writepages = ramdisk_writepages, @@ -201,13 +211,14 @@ static const struct address_space_operat static int rd_blkdev_pagecache_IO(int rw, struct bio_vec *vec, sector_t sector, struct address_space *mapping) { - pgoff_t index = sector >> (PAGE_CACHE_SHIFT - 9); + loff_t pos = sector << 9; unsigned int vec_offset = vec->bv_offset; - int offset = (sector << 9) & ~PAGE_CACHE_MASK; int size = vec->bv_len; int err = 0; do { + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + unsigned offset = pos & ~PAGE_CACHE_MASK; int count; struct page *page; char *src; @@ -216,40 +227,50 @@ static int rd_blkdev_pagecache_IO(int rw count = PAGE_CACHE_SIZE - offset; if (count > size) count = size; - size -= count; - - page = grab_cache_page(mapping, index); - if (!page) { - err = -ENOMEM; - goto out; - } - if (!PageUptodate(page)) - make_page_uptodate(page); + if (rw == WRITE) { + err = pagecache_write_begin(NULL, mapping, pos, count, + 0, &page, NULL); + if (err) + goto out; - index++; + src = kmap_atomic(vec->bv_page, KM_USER0) + vec_offset; + dst = kmap_atomic(page, KM_USER1) + offset; + } else { +again: + page = __grab_cache_page(mapping, index); + if (!page) { + err = -ENOMEM; + goto out; + } + if (!PageUptodate(page)) { + mapping->a_ops->readpage(NULL, page); + goto again; + } - if (rw == READ) { src = kmap_atomic(page, KM_USER0) + offset; dst = kmap_atomic(vec->bv_page, KM_USER1) + vec_offset; - } else { - src = kmap_atomic(vec->bv_page, KM_USER0) + vec_offset; - dst = kmap_atomic(page, KM_USER1) + offset; } - offset = 0; - vec_offset += count; + memcpy(dst, src, count); kunmap_atomic(src, KM_USER0); kunmap_atomic(dst, KM_USER1); - if (rw == READ) + if (rw == READ) { flush_dcache_page(vec->bv_page); - else - set_page_dirty(page); - unlock_page(page); - put_page(page); + unlock_page(page); + page_cache_release(page); + } else { + flush_dcache_page(page); + pagecache_write_end(NULL, mapping, pos, count, + count, page, NULL); + } + + pos += count; + vec_offset += count; + size -= count; } while (size); out: Index: linux-2.6/fs/ext2/inode.c =================================================================== --- linux-2.6.orig/fs/ext2/inode.c +++ linux-2.6/fs/ext2/inode.c @@ -642,18 +642,21 @@ ext2_readpages(struct file *file, struct return mpage_readpages(mapping, pages, nr_pages, ext2_get_block); } -static int -ext2_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) +int __ext2_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return block_prepare_write(page,from,to,ext2_get_block); + return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + ext2_get_block); } static int -ext2_nobh_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) +ext2_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return nobh_prepare_write(page,from,to,ext2_get_block); + *pagep = NULL; + return __ext2_write_begin(file, mapping, pos, len, flags, pagep,fsdata); } static int ext2_nobh_writepage(struct page *page, @@ -689,8 +692,8 @@ const struct address_space_operations ex .readpages = ext2_readpages, .writepage = ext2_writepage, .sync_page = block_sync_page, - .prepare_write = ext2_prepare_write, - .commit_write = generic_commit_write, + .write_begin = ext2_write_begin, + .write_end = generic_write_end, .bmap = ext2_bmap, .direct_IO = ext2_direct_IO, .writepages = ext2_writepages, @@ -707,8 +710,7 @@ const struct address_space_operations ex .readpages = ext2_readpages, .writepage = ext2_nobh_writepage, .sync_page = block_sync_page, - .prepare_write = ext2_nobh_prepare_write, - .commit_write = nobh_commit_write, + /* XXX: todo */ .bmap = ext2_bmap, .direct_IO = ext2_direct_IO, .writepages = ext2_writepages, Index: linux-2.6/fs/ext2/dir.c =================================================================== --- linux-2.6.orig/fs/ext2/dir.c +++ linux-2.6/fs/ext2/dir.c @@ -22,6 +22,7 @@ */ #include "ext2.h" +#include #include #include @@ -62,12 +63,14 @@ ext2_last_byte(struct inode *inode, unsi return last_byte; } -static int ext2_commit_chunk(struct page *page, unsigned from, unsigned to) +static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len) { - struct inode *dir = page->mapping->host; + struct address_space *mapping = page->mapping; + struct inode *dir = mapping->host; int err = 0; + dir->i_version++; - page->mapping->a_ops->commit_write(NULL, page, from, to); + block_write_end(NULL, mapping, pos, len, len, page, NULL); if (IS_DIRSYNC(dir)) err = write_one_page(page, 1); else @@ -416,16 +419,18 @@ ino_t ext2_inode_by_name(struct inode * void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, struct page *page, struct inode *inode) { - unsigned from = (char *) de - (char *) page_address(page); - unsigned to = from + le16_to_cpu(de->rec_len); + loff_t pos = (page->index << PAGE_CACHE_SHIFT) + + (char *) de - (char *) page_address(page); + unsigned len = le16_to_cpu(de->rec_len); int err; lock_page(page); - err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + err = __ext2_write_begin(NULL, page->mapping, pos, len, + AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); BUG_ON(err); de->inode = cpu_to_le32(inode->i_ino); - ext2_set_de_type (de, inode); - err = ext2_commit_chunk(page, from, to); + ext2_set_de_type(de, inode); + err = ext2_commit_chunk(page, pos, len); ext2_put_page(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL; @@ -448,7 +453,7 @@ int ext2_add_link (struct dentry *dentry unsigned long npages = dir_pages(dir); unsigned long n; char *kaddr; - unsigned from, to; + loff_t pos; int err; /* @@ -501,9 +506,10 @@ int ext2_add_link (struct dentry *dentry return -EINVAL; got_it: - from = (char*)de - (char*)page_address(page); - to = from + rec_len; - err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + pos = (page->index << PAGE_CACHE_SHIFT) + + (char*)de - (char*)page_address(page); + err = __ext2_write_begin(NULL, page->mapping, pos, rec_len, 0, + &page, NULL); if (err) goto out_unlock; if (de->inode) { @@ -513,10 +519,10 @@ got_it: de = de1; } de->name_len = namelen; - memcpy (de->name, name, namelen); + memcpy(de->name, name, namelen); de->inode = cpu_to_le32(inode->i_ino); ext2_set_de_type (de, inode); - err = ext2_commit_chunk(page, from, to); + err = ext2_commit_chunk(page, pos, rec_len); dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL; mark_inode_dirty(dir); @@ -541,6 +547,7 @@ int ext2_delete_entry (struct ext2_dir_e char *kaddr = page_address(page); unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1); unsigned to = ((char*)dir - kaddr) + le16_to_cpu(dir->rec_len); + loff_t pos; ext2_dirent * pde = NULL; ext2_dirent * de = (ext2_dirent *) (kaddr + from); int err; @@ -557,13 +564,15 @@ int ext2_delete_entry (struct ext2_dir_e } if (pde) from = (char*)pde - (char*)page_address(page); + pos = (page->index << PAGE_CACHE_SHIFT) + from; lock_page(page); - err = mapping->a_ops->prepare_write(NULL, page, from, to); + err = __ext2_write_begin(NULL, page->mapping, pos, to - from, 0, + &page, NULL); BUG_ON(err); if (pde) - pde->rec_len = cpu_to_le16(to-from); + pde->rec_len = cpu_to_le16(to - from); dir->inode = 0; - err = ext2_commit_chunk(page, from, to); + err = ext2_commit_chunk(page, pos, to - from); inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; EXT2_I(inode)->i_flags &= ~EXT2_BTREE_FL; mark_inode_dirty(inode); @@ -586,7 +595,9 @@ int ext2_make_empty(struct inode *inode, if (!page) return -ENOMEM; - err = mapping->a_ops->prepare_write(NULL, page, 0, chunk_size); + + err = __ext2_write_begin(NULL, page->mapping, 0, chunk_size, 0, + &page, NULL); if (err) { unlock_page(page); goto fail; Index: linux-2.6/fs/ext2/ext2.h =================================================================== --- linux-2.6.orig/fs/ext2/ext2.h +++ linux-2.6/fs/ext2/ext2.h @@ -133,6 +133,9 @@ extern int ext2_get_block(struct inode * extern void ext2_truncate (struct inode *); extern int ext2_setattr (struct dentry *, struct iattr *); extern void ext2_set_inode_flags(struct inode *inode); +int __ext2_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata); /* ioctl.c */ extern int ext2_ioctl (struct inode *, struct file *, unsigned int, Index: linux-2.6/fs/ext3/inode.c =================================================================== --- linux-2.6.orig/fs/ext3/inode.c +++ linux-2.6/fs/ext3/inode.c @@ -1148,51 +1148,68 @@ static int do_journal_get_write_access(h return ext3_journal_get_write_access(handle, bh); } -static int ext3_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int ext3_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - struct inode *inode = page->mapping->host; + struct inode *inode = mapping->host; int ret, needed_blocks = ext3_writepage_trans_blocks(inode); handle_t *handle; int retries = 0; + struct page *page; + pgoff_t index; + unsigned from, to; + + index = pos >> PAGE_CACHE_SHIFT; + from = pos & (PAGE_CACHE_SIZE - 1); + to = from + len; retry: + page = __grab_cache_page(mapping, index); + if (!page) + return -ENOMEM; + *pagep = page; + handle = ext3_journal_start(inode, needed_blocks); if (IS_ERR(handle)) { + unlock_page(page); + page_cache_release(page); ret = PTR_ERR(handle); goto out; } - if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode)) - ret = nobh_prepare_write(page, from, to, ext3_get_block); - else - ret = block_prepare_write(page, from, to, ext3_get_block); + ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + ext3_get_block); if (ret) - goto prepare_write_failed; + goto write_begin_failed; if (ext3_should_journal_data(inode)) { ret = walk_page_buffers(handle, page_buffers(page), from, to, NULL, do_journal_get_write_access); } -prepare_write_failed: - if (ret) +write_begin_failed: + if (ret) { ext3_journal_stop(handle); + unlock_page(page); + page_cache_release(page); + } if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) goto retry; out: return ret; } + int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh) { int err = journal_dirty_data(handle, bh); if (err) ext3_journal_abort_handle(__FUNCTION__, __FUNCTION__, - bh, handle,err); + bh, handle, err); return err; } -/* For commit_write() in data=journal mode */ -static int commit_write_fn(handle_t *handle, struct buffer_head *bh) +/* For write_end() in data=journal mode */ +static int write_end_fn(handle_t *handle, struct buffer_head *bh) { if (!buffer_mapped(bh) || buffer_freed(bh)) return 0; @@ -1207,78 +1224,100 @@ static int commit_write_fn(handle_t *han * ext3 never places buffers on inode->i_mapping->private_list. metadata * buffers are managed internally. */ -static int ext3_ordered_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int ext3_ordered_write_end(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { handle_t *handle = ext3_journal_current_handle(); - struct inode *inode = page->mapping->host; + struct inode *inode = file->f_mapping->host; + unsigned from, to; int ret = 0, ret2; + from = pos & (PAGE_CACHE_SIZE - 1); + to = from + len; + ret = walk_page_buffers(handle, page_buffers(page), from, to, NULL, ext3_journal_dirty_data); if (ret == 0) { /* - * generic_commit_write() will run mark_inode_dirty() if i_size + * generic_write_end() will run mark_inode_dirty() if i_size * changes. So let's piggyback the i_disksize mark_inode_dirty * into that. */ loff_t new_i_size; - new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + new_i_size = pos + copied; if (new_i_size > EXT3_I(inode)->i_disksize) EXT3_I(inode)->i_disksize = new_i_size; - ret = generic_commit_write(file, page, from, to); + copied = generic_write_end(file, mapping, pos, len, copied, + page, fsdata); + if (copied < 0) + ret = copied; + } else { + unlock_page(page); + page_cache_release(page); } ret2 = ext3_journal_stop(handle); if (!ret) ret = ret2; - return ret; + return ret ? ret : copied; } -static int ext3_writeback_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int ext3_writeback_write_end(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { handle_t *handle = ext3_journal_current_handle(); - struct inode *inode = page->mapping->host; + struct inode *inode = file->f_mapping->host; int ret = 0, ret2; loff_t new_i_size; - new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + new_i_size = pos + copied; if (new_i_size > EXT3_I(inode)->i_disksize) EXT3_I(inode)->i_disksize = new_i_size; - if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode)) - ret = nobh_commit_write(file, page, from, to); - else - ret = generic_commit_write(file, page, from, to); + copied = generic_write_end(file, mapping, pos, len, copied, + page, fsdata); + if (copied < 0) + ret = copied; ret2 = ext3_journal_stop(handle); if (!ret) ret = ret2; - return ret; + return ret ? ret : copied; } -static int ext3_journalled_commit_write(struct file *file, - struct page *page, unsigned from, unsigned to) +static int ext3_journalled_write_end(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { handle_t *handle = ext3_journal_current_handle(); - struct inode *inode = page->mapping->host; + struct inode *inode = mapping->host; int ret = 0, ret2; int partial = 0; - loff_t pos; + unsigned from, to; - /* - * Here we duplicate the generic_commit_write() functionality - */ - pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + from = pos & (PAGE_CACHE_SIZE - 1); + to = from + len; + + if (copied < len) { + if (!PageUptodate(page)) + copied = 0; + page_zero_new_buffers(page, from+copied, to); + } ret = walk_page_buffers(handle, page_buffers(page), from, - to, &partial, commit_write_fn); + to, &partial, write_end_fn); if (!partial) SetPageUptodate(page); - if (pos > inode->i_size) - i_size_write(inode, pos); + unlock_page(page); + page_cache_release(page); + if (pos+copied > inode->i_size) + i_size_write(inode, pos+copied); EXT3_I(inode)->i_state |= EXT3_STATE_JDATA; if (inode->i_size > EXT3_I(inode)->i_disksize) { EXT3_I(inode)->i_disksize = inode->i_size; @@ -1286,10 +1325,12 @@ static int ext3_journalled_commit_write( if (!ret) ret = ret2; } + +out: ret2 = ext3_journal_stop(handle); if (!ret) ret = ret2; - return ret; + return ret ? ret : copied; } /* @@ -1547,7 +1588,7 @@ static int ext3_journalled_writepage(str PAGE_CACHE_SIZE, NULL, do_journal_get_write_access); err = walk_page_buffers(handle, page_buffers(page), 0, - PAGE_CACHE_SIZE, NULL, commit_write_fn); + PAGE_CACHE_SIZE, NULL, write_end_fn); if (ret == 0) ret = err; EXT3_I(inode)->i_state |= EXT3_STATE_JDATA; @@ -1707,8 +1748,8 @@ static const struct address_space_operat .readpages = ext3_readpages, .writepage = ext3_ordered_writepage, .sync_page = block_sync_page, - .prepare_write = ext3_prepare_write, - .commit_write = ext3_ordered_commit_write, + .write_begin = ext3_write_begin, + .write_end = ext3_ordered_write_end, .bmap = ext3_bmap, .invalidatepage = ext3_invalidatepage, .releasepage = ext3_releasepage, @@ -1721,8 +1762,8 @@ static const struct address_space_operat .readpages = ext3_readpages, .writepage = ext3_writeback_writepage, .sync_page = block_sync_page, - .prepare_write = ext3_prepare_write, - .commit_write = ext3_writeback_commit_write, + .write_begin = ext3_write_begin, + .write_end = ext3_writeback_write_end, .bmap = ext3_bmap, .invalidatepage = ext3_invalidatepage, .releasepage = ext3_releasepage, @@ -1735,8 +1776,8 @@ static const struct address_space_operat .readpages = ext3_readpages, .writepage = ext3_journalled_writepage, .sync_page = block_sync_page, - .prepare_write = ext3_prepare_write, - .commit_write = ext3_journalled_commit_write, + .write_begin = ext3_write_begin, + .write_end = ext3_journalled_write_end, .set_page_dirty = ext3_journalled_set_page_dirty, .bmap = ext3_bmap, .invalidatepage = ext3_invalidatepage, Index: linux-2.6/fs/ext4/inode.c =================================================================== --- linux-2.6.orig/fs/ext4/inode.c +++ linux-2.6/fs/ext4/inode.c @@ -1147,34 +1147,50 @@ static int do_journal_get_write_access(h return ext4_journal_get_write_access(handle, bh); } -static int ext4_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int ext4_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - struct inode *inode = page->mapping->host; + struct inode *inode = mapping->host; int ret, needed_blocks = ext4_writepage_trans_blocks(inode); handle_t *handle; int retries = 0; + struct page *page; + pgoff_t index; + unsigned from, to; + + index = pos >> PAGE_CACHE_SHIFT; + from = pos & (PAGE_CACHE_SIZE - 1); + to = from + len; retry: - handle = ext4_journal_start(inode, needed_blocks); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; + page = __grab_cache_page(mapping, index); + if (!page) + return -ENOMEM; + *pagep = page; + + handle = ext4_journal_start(inode, needed_blocks); + if (IS_ERR(handle)) { + unlock_page(page); + page_cache_release(page); + ret = PTR_ERR(handle); + goto out; } - if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) - ret = nobh_prepare_write(page, from, to, ext4_get_block); - else - ret = block_prepare_write(page, from, to, ext4_get_block); - if (ret) - goto prepare_write_failed; - if (ext4_should_journal_data(inode)) { + ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + ext4_get_block); + + if (!ret && ext4_should_journal_data(inode)) { ret = walk_page_buffers(handle, page_buffers(page), from, to, NULL, do_journal_get_write_access); } -prepare_write_failed: - if (ret) + + if (ret) { ext4_journal_stop(handle); + unlock_page(page); + page_cache_release(page); + } + if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; out: @@ -1186,12 +1202,12 @@ int ext4_journal_dirty_data(handle_t *ha int err = jbd2_journal_dirty_data(handle, bh); if (err) ext4_journal_abort_handle(__FUNCTION__, __FUNCTION__, - bh, handle,err); + bh, handle, err); return err; } -/* For commit_write() in data=journal mode */ -static int commit_write_fn(handle_t *handle, struct buffer_head *bh) +/* For write_end() in data=journal mode */ +static int write_end_fn(handle_t *handle, struct buffer_head *bh) { if (!buffer_mapped(bh) || buffer_freed(bh)) return 0; @@ -1206,78 +1222,100 @@ static int commit_write_fn(handle_t *han * ext4 never places buffers on inode->i_mapping->private_list. metadata * buffers are managed internally. */ -static int ext4_ordered_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int ext4_ordered_write_end(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { handle_t *handle = ext4_journal_current_handle(); - struct inode *inode = page->mapping->host; + struct inode *inode = file->f_mapping->host; + unsigned from, to; int ret = 0, ret2; + from = pos & (PAGE_CACHE_SIZE - 1); + to = from + len; + ret = walk_page_buffers(handle, page_buffers(page), from, to, NULL, ext4_journal_dirty_data); if (ret == 0) { /* - * generic_commit_write() will run mark_inode_dirty() if i_size + * generic_write_end() will run mark_inode_dirty() if i_size * changes. So let's piggyback the i_disksize mark_inode_dirty * into that. */ loff_t new_i_size; - new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + new_i_size = pos + copied; if (new_i_size > EXT4_I(inode)->i_disksize) EXT4_I(inode)->i_disksize = new_i_size; - ret = generic_commit_write(file, page, from, to); + copied = generic_write_end(file, mapping, pos, len, copied, + page, fsdata); + if (copied < 0) + ret = copied; + } else { + unlock_page(page); + page_cache_release(page); } ret2 = ext4_journal_stop(handle); if (!ret) ret = ret2; - return ret; + return ret ? ret : copied; } -static int ext4_writeback_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int ext4_writeback_write_end(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { handle_t *handle = ext4_journal_current_handle(); - struct inode *inode = page->mapping->host; + struct inode *inode = file->f_mapping->host; int ret = 0, ret2; loff_t new_i_size; - new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + new_i_size = pos + copied; if (new_i_size > EXT4_I(inode)->i_disksize) EXT4_I(inode)->i_disksize = new_i_size; - if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) - ret = nobh_commit_write(file, page, from, to); - else - ret = generic_commit_write(file, page, from, to); + copied = generic_write_end(file, mapping, pos, len, copied, + page, fsdata); + if (copied < 0) + ret = copied; ret2 = ext4_journal_stop(handle); if (!ret) ret = ret2; - return ret; + return ret ? ret : copied; } -static int ext4_journalled_commit_write(struct file *file, - struct page *page, unsigned from, unsigned to) +static int ext4_journalled_write_end(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { handle_t *handle = ext4_journal_current_handle(); - struct inode *inode = page->mapping->host; + struct inode *inode = mapping->host; int ret = 0, ret2; int partial = 0; - loff_t pos; + unsigned from, to; - /* - * Here we duplicate the generic_commit_write() functionality - */ - pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + from = pos & (PAGE_CACHE_SIZE - 1); + to = from + len; + + if (copied < len) { + if (!PageUptodate(page)) + copied = 0; + page_zero_new_buffers(page, from+copied, to); + } ret = walk_page_buffers(handle, page_buffers(page), from, - to, &partial, commit_write_fn); + to, &partial, write_end_fn); if (!partial) SetPageUptodate(page); - if (pos > inode->i_size) - i_size_write(inode, pos); + unlock_page(page); + page_cache_release(page); + if (pos+copied > inode->i_size) + i_size_write(inode, pos+copied); EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; if (inode->i_size > EXT4_I(inode)->i_disksize) { EXT4_I(inode)->i_disksize = inode->i_size; @@ -1285,10 +1323,12 @@ static int ext4_journalled_commit_write( if (!ret) ret = ret2; } + +out: ret2 = ext4_journal_stop(handle); if (!ret) ret = ret2; - return ret; + return ret ? ret : copied; } /* @@ -1546,7 +1586,7 @@ static int ext4_journalled_writepage(str PAGE_CACHE_SIZE, NULL, do_journal_get_write_access); err = walk_page_buffers(handle, page_buffers(page), 0, - PAGE_CACHE_SIZE, NULL, commit_write_fn); + PAGE_CACHE_SIZE, NULL, write_end_fn); if (ret == 0) ret = err; EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; @@ -1706,8 +1746,8 @@ static const struct address_space_operat .readpages = ext4_readpages, .writepage = ext4_ordered_writepage, .sync_page = block_sync_page, - .prepare_write = ext4_prepare_write, - .commit_write = ext4_ordered_commit_write, + .write_begin = ext4_write_begin, + .write_end = ext4_ordered_write_end, .bmap = ext4_bmap, .invalidatepage = ext4_invalidatepage, .releasepage = ext4_releasepage, @@ -1720,8 +1760,8 @@ static const struct address_space_operat .readpages = ext4_readpages, .writepage = ext4_writeback_writepage, .sync_page = block_sync_page, - .prepare_write = ext4_prepare_write, - .commit_write = ext4_writeback_commit_write, + .write_begin = ext4_write_begin, + .write_end = ext4_writeback_write_end, .bmap = ext4_bmap, .invalidatepage = ext4_invalidatepage, .releasepage = ext4_releasepage, @@ -1734,8 +1774,8 @@ static const struct address_space_operat .readpages = ext4_readpages, .writepage = ext4_journalled_writepage, .sync_page = block_sync_page, - .prepare_write = ext4_prepare_write, - .commit_write = ext4_journalled_commit_write, + .write_begin = ext4_write_begin, + .write_end = ext4_journalled_write_end, .set_page_dirty = ext4_journalled_set_page_dirty, .bmap = ext4_bmap, .invalidatepage = ext4_invalidatepage, Index: linux-2.6/fs/xfs/linux-2.6/xfs_aops.c =================================================================== --- linux-2.6.orig/fs/xfs/linux-2.6/xfs_aops.c +++ linux-2.6/fs/xfs/linux-2.6/xfs_aops.c @@ -1414,13 +1414,18 @@ xfs_vm_direct_IO( } STATIC int -xfs_vm_prepare_write( +xfs_vm_write_begin( struct file *file, - struct page *page, - unsigned int from, - unsigned int to) + struct address_space *mapping, + loff_t pos, + unsigned len, + unsigned flags, + struct page **pagep, + void **fsdata) { - return block_prepare_write(page, from, to, xfs_get_blocks); + *pagep = NULL; + return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + xfs_get_blocks); } STATIC sector_t @@ -1474,8 +1479,8 @@ const struct address_space_operations xf .sync_page = block_sync_page, .releasepage = xfs_vm_releasepage, .invalidatepage = xfs_vm_invalidatepage, - .prepare_write = xfs_vm_prepare_write, - .commit_write = generic_commit_write, + .write_begin = xfs_vm_write_begin, + .write_end = generic_write_end, .bmap = xfs_vm_bmap, .direct_IO = xfs_vm_direct_IO, .migratepage = buffer_migrate_page, Index: linux-2.6/fs/xfs/linux-2.6/xfs_lrw.c =================================================================== --- linux-2.6.orig/fs/xfs/linux-2.6/xfs_lrw.c +++ linux-2.6/fs/xfs/linux-2.6/xfs_lrw.c @@ -134,45 +134,34 @@ xfs_iozero( loff_t pos, /* offset in file */ size_t count) /* size of data to zero */ { - unsigned bytes; struct page *page; struct address_space *mapping; int status; mapping = ip->i_mapping; do { - unsigned long index, offset; + unsigned offset, bytes; + void *fsdata; offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ - index = pos >> PAGE_CACHE_SHIFT; bytes = PAGE_CACHE_SIZE - offset; if (bytes > count) bytes = count; - status = -ENOMEM; - page = grab_cache_page(mapping, index); - if (!page) - break; - - status = mapping->a_ops->prepare_write(NULL, page, offset, - offset + bytes); + status = pagecache_write_begin(NULL, mapping, pos, bytes, + AOP_FLAG_UNINTERRUPTIBLE, + &page, &fsdata); if (status) - goto unlock; + break; memclear_highpage_flush(page, offset, bytes); - status = mapping->a_ops->commit_write(NULL, page, offset, - offset + bytes); - if (!status) { - pos += bytes; - count -= bytes; - } - -unlock: - unlock_page(page); - page_cache_release(page); - if (status) - break; + status = pagecache_write_end(NULL, mapping, pos, bytes, bytes, + page, fsdata); + WARN_ON(status <= 0); /* can't return less than zero! */ + pos += bytes; + count -= bytes; + status = 0; } while (count); return (-status); Index: linux-2.6/fs/fat/inode.c =================================================================== --- linux-2.6.orig/fs/fat/inode.c +++ linux-2.6/fs/fat/inode.c @@ -139,19 +139,24 @@ static int fat_readpages(struct file *fi return mpage_readpages(mapping, pages, nr_pages, fat_get_block); } -static int fat_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int fat_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return cont_prepare_write(page, from, to, fat_get_block, - &MSDOS_I(page->mapping->host)->mmu_private); + *pagep = NULL; + return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + fat_get_block, + &MSDOS_I(mapping->host)->mmu_private); } -static int fat_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int fat_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *pagep, void *fsdata) { - struct inode *inode = page->mapping->host; - int err = generic_commit_write(file, page, from, to); - if (!err && !(MSDOS_I(inode)->i_attrs & ATTR_ARCH)) { + struct inode *inode = mapping->host; + int err; + err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata); + if (!(err < 0) && !(MSDOS_I(inode)->i_attrs & ATTR_ARCH)) { inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; MSDOS_I(inode)->i_attrs |= ATTR_ARCH; mark_inode_dirty(inode); @@ -200,8 +205,8 @@ static const struct address_space_operat .writepage = fat_writepage, .writepages = fat_writepages, .sync_page = block_sync_page, - .prepare_write = fat_prepare_write, - .commit_write = fat_commit_write, + .write_begin = fat_write_begin, + .write_end = fat_write_end, .direct_IO = fat_direct_IO, .bmap = _fat_bmap }; Index: linux-2.6/fs/adfs/inode.c =================================================================== --- linux-2.6.orig/fs/adfs/inode.c +++ linux-2.6/fs/adfs/inode.c @@ -61,10 +61,14 @@ static int adfs_readpage(struct file *fi return block_read_full_page(page, adfs_get_block); } -static int adfs_prepare_write(struct file *file, struct page *page, unsigned int from, unsigned int to) +static int adfs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return cont_prepare_write(page, from, to, adfs_get_block, - &ADFS_I(page->mapping->host)->mmu_private); + *pagep = NULL; + return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + adfs_get_block, + &ADFS_I(mapping->host)->mmu_private); } static sector_t _adfs_bmap(struct address_space *mapping, sector_t block) @@ -76,8 +80,8 @@ static const struct address_space_operat .readpage = adfs_readpage, .writepage = adfs_writepage, .sync_page = block_sync_page, - .prepare_write = adfs_prepare_write, - .commit_write = generic_commit_write, + .write_begin = adfs_write_begin, + .write_end = generic_write_end, .bmap = _adfs_bmap }; Index: linux-2.6/fs/affs/file.c =================================================================== --- linux-2.6.orig/fs/affs/file.c +++ linux-2.6/fs/affs/file.c @@ -395,25 +395,33 @@ static int affs_writepage(struct page *p { return block_write_full_page(page, affs_get_block, wbc); } + static int affs_readpage(struct file *file, struct page *page) { return block_read_full_page(page, affs_get_block); } -static int affs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) -{ - return cont_prepare_write(page, from, to, affs_get_block, - &AFFS_I(page->mapping->host)->mmu_private); + +static int affs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + *pagep = NULL; + return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + affs_get_block, + &AFFS_I(mapping->host)->mmu_private); } + static sector_t _affs_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping,block,affs_get_block); } + const struct address_space_operations affs_aops = { .readpage = affs_readpage, .writepage = affs_writepage, .sync_page = block_sync_page, - .prepare_write = affs_prepare_write, - .commit_write = generic_commit_write, + .write_begin = affs_write_begin, + .write_end = generic_write_end, .bmap = _affs_bmap }; @@ -603,58 +611,65 @@ affs_readpage_ofs(struct file *file, str return err; } -static int affs_prepare_write_ofs(struct file *file, struct page *page, unsigned from, unsigned to) -{ - struct inode *inode = page->mapping->host; - u32 size, offset; - u32 tmp; +static int affs_write_begin_ofs(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + struct inode *inode = mapping->host; + struct page *page; + pgoff_t index; int err = 0; - pr_debug("AFFS: prepare_write(%u, %ld, %d, %d)\n", (u32)inode->i_ino, page->index, from, to); - offset = page->index << PAGE_CACHE_SHIFT; - if (offset + from > AFFS_I(inode)->mmu_private) { - err = affs_extent_file_ofs(inode, offset + from); + pr_debug("AFFS: write_begin(%u, %llu, %llu)\n", (u32)inode->i_ino, (unsigned long long)pos, (unsigned long long)pos + len); + if (pos > AFFS_I(inode)->mmu_private) { + /* XXX: this probably leaves a too-big i_size in case of + * failure. Should really be updating i_size at write_end time + */ + err = affs_extent_file_ofs(inode, pos); if (err) return err; } - size = inode->i_size; + + index = pos >> PAGE_CACHE_SHIFT; + page = __grab_cache_page(mapping, index); + if (!page) + return -ENOMEM; + *pagep = page; if (PageUptodate(page)) return 0; - if (from) { - err = affs_do_readpage_ofs(file, page, 0, from); - if (err) - return err; - } - if (to < PAGE_CACHE_SIZE) { - char *kaddr = kmap_atomic(page, KM_USER0); - - memset(kaddr + to, 0, PAGE_CACHE_SIZE - to); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - if (size > offset + to) { - if (size < offset + PAGE_CACHE_SIZE) - tmp = size & ~PAGE_CACHE_MASK; - else - tmp = PAGE_CACHE_SIZE; - err = affs_do_readpage_ofs(file, page, to, tmp); - } + /* XXX: inefficient but safe in the face of short writes */ + err = affs_do_readpage_ofs(file, page, 0, PAGE_CACHE_SIZE); + if (err) { + unlock_page(page); + page_cache_release(page); } return err; } -static int affs_commit_write_ofs(struct file *file, struct page *page, unsigned from, unsigned to) +static int affs_write_end_ofs(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { - struct inode *inode = page->mapping->host; + struct inode *inode = mapping->host; struct super_block *sb = inode->i_sb; struct buffer_head *bh, *prev_bh; char *data; u32 bidx, boff, bsize; + unsigned from, to; u32 tmp; int written; - pr_debug("AFFS: commit_write(%u, %ld, %d, %d)\n", (u32)inode->i_ino, page->index, from, to); + from = pos & (PAGE_CACHE_SIZE - 1); + to = pos + len; + /* + * XXX: not sure if this can handle short copies (len < copied), but + * we don't have to, because the page should always be uptodate here, + * due to write_begin. + */ + + pr_debug("AFFS: write_begin(%u, %llu, %llu)\n", (u32)inode->i_ino, (unsigned long long)pos, (unsigned long long)pos + len); bsize = AFFS_SB(sb)->s_data_blksize; data = page_address(page); @@ -765,8 +780,8 @@ const struct address_space_operations af .readpage = affs_readpage_ofs, //.writepage = affs_writepage_ofs, //.sync_page = affs_sync_page_ofs, - .prepare_write = affs_prepare_write_ofs, - .commit_write = affs_commit_write_ofs + .write_begin = affs_write_begin_ofs, + .write_end = affs_write_end_ofs }; /* Free any preallocated blocks. */ @@ -809,18 +824,13 @@ affs_truncate(struct inode *inode) if (inode->i_size > AFFS_I(inode)->mmu_private) { struct address_space *mapping = inode->i_mapping; struct page *page; - u32 size = inode->i_size - 1; + void *fsdata; + u32 size = inode->i_size; int res; - page = grab_cache_page(mapping, size >> PAGE_CACHE_SHIFT); - if (!page) - return; - size = (size & (PAGE_CACHE_SIZE - 1)) + 1; - res = mapping->a_ops->prepare_write(NULL, page, size, size); + res = mapping->a_ops->write_begin(NULL, mapping, size, 0, 0, &page, &fsdata); if (!res) - res = mapping->a_ops->commit_write(NULL, page, size, size); - unlock_page(page); - page_cache_release(page); + res = mapping->a_ops->write_end(NULL, mapping, size, 0, 0, page, fsdata); mark_inode_dirty(inode); return; } else if (inode->i_size == AFFS_I(inode)->mmu_private) Index: linux-2.6/fs/hfs/inode.c =================================================================== --- linux-2.6.orig/fs/hfs/inode.c +++ linux-2.6/fs/hfs/inode.c @@ -34,10 +34,14 @@ static int hfs_readpage(struct file *fil return block_read_full_page(page, hfs_get_block); } -static int hfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) -{ - return cont_prepare_write(page, from, to, hfs_get_block, - &HFS_I(page->mapping->host)->phys_size); +static int hfs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + *pagep = NULL; + return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + hfs_get_block, + &HFS_I(mapping->host)->phys_size); } static sector_t hfs_bmap(struct address_space *mapping, sector_t block) @@ -118,8 +122,8 @@ const struct address_space_operations hf .readpage = hfs_readpage, .writepage = hfs_writepage, .sync_page = block_sync_page, - .prepare_write = hfs_prepare_write, - .commit_write = generic_commit_write, + .write_begin = hfs_write_begin, + .write_end = generic_write_end, .bmap = hfs_bmap, .releasepage = hfs_releasepage, }; @@ -128,8 +132,8 @@ const struct address_space_operations hf .readpage = hfs_readpage, .writepage = hfs_writepage, .sync_page = block_sync_page, - .prepare_write = hfs_prepare_write, - .commit_write = generic_commit_write, + .write_begin = hfs_write_begin, + .write_end = generic_write_end, .bmap = hfs_bmap, .direct_IO = hfs_direct_IO, .writepages = hfs_writepages, Index: linux-2.6/fs/hfs/extent.c =================================================================== --- linux-2.6.orig/fs/hfs/extent.c +++ linux-2.6/fs/hfs/extent.c @@ -464,23 +464,20 @@ void hfs_file_truncate(struct inode *ino (long long)HFS_I(inode)->phys_size, inode->i_size); if (inode->i_size > HFS_I(inode)->phys_size) { struct address_space *mapping = inode->i_mapping; + void *fsdata; struct page *page; int res; + /* XXX: Can use generic_cont_expand? */ size = inode->i_size - 1; - page = grab_cache_page(mapping, size >> PAGE_CACHE_SHIFT); - if (!page) - return; - size &= PAGE_CACHE_SIZE - 1; - size++; - res = mapping->a_ops->prepare_write(NULL, page, size, size); - if (!res) - res = mapping->a_ops->commit_write(NULL, page, size, size); + res = pagecache_write_begin(NULL, mapping, size+1, 0, + AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); + if (!res) { + res = pagecache_write_end(NULL, mapping, size+1, 0, 0, + page, fsdata); + } if (res) inode->i_size = HFS_I(inode)->phys_size; - unlock_page(page); - page_cache_release(page); - mark_inode_dirty(inode); return; } else if (inode->i_size == HFS_I(inode)->phys_size) return; Index: linux-2.6/fs/hfsplus/inode.c =================================================================== --- linux-2.6.orig/fs/hfsplus/inode.c +++ linux-2.6/fs/hfsplus/inode.c @@ -26,10 +26,14 @@ static int hfsplus_writepage(struct page return block_write_full_page(page, hfsplus_get_block, wbc); } -static int hfsplus_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) -{ - return cont_prepare_write(page, from, to, hfsplus_get_block, - &HFSPLUS_I(page->mapping->host).phys_size); +static int hfsplus_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + *pagep = NULL; + return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + hfsplus_get_block, + &HFSPLUS_I(mapping->host).phys_size); } static sector_t hfsplus_bmap(struct address_space *mapping, sector_t block) @@ -113,8 +117,8 @@ const struct address_space_operations hf .readpage = hfsplus_readpage, .writepage = hfsplus_writepage, .sync_page = block_sync_page, - .prepare_write = hfsplus_prepare_write, - .commit_write = generic_commit_write, + .write_begin = hfsplus_write_begin, + .write_end = generic_write_end, .bmap = hfsplus_bmap, .releasepage = hfsplus_releasepage, }; @@ -123,8 +127,8 @@ const struct address_space_operations hf .readpage = hfsplus_readpage, .writepage = hfsplus_writepage, .sync_page = block_sync_page, - .prepare_write = hfsplus_prepare_write, - .commit_write = generic_commit_write, + .write_begin = hfsplus_write_begin, + .write_end = generic_write_end, .bmap = hfsplus_bmap, .direct_IO = hfsplus_direct_IO, .writepages = hfsplus_writepages, Index: linux-2.6/fs/hfsplus/extents.c =================================================================== --- linux-2.6.orig/fs/hfsplus/extents.c +++ linux-2.6/fs/hfsplus/extents.c @@ -443,21 +443,18 @@ void hfsplus_file_truncate(struct inode if (inode->i_size > HFSPLUS_I(inode).phys_size) { struct address_space *mapping = inode->i_mapping; struct page *page; - u32 size = inode->i_size - 1; + void *fsdata; + u32 size = inode->i_size; int res; - page = grab_cache_page(mapping, size >> PAGE_CACHE_SHIFT); - if (!page) - return; - size &= PAGE_CACHE_SIZE - 1; - size++; - res = mapping->a_ops->prepare_write(NULL, page, size, size); - if (!res) - res = mapping->a_ops->commit_write(NULL, page, size, size); + res = pagecache_write_begin(NULL, mapping, size, 0, + AOP_FLAG_UNINTERRUPTIBLE, + &page, &fsdata); if (res) - inode->i_size = HFSPLUS_I(inode).phys_size; - unlock_page(page); - page_cache_release(page); + return; + res = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata); + if (res < 0) + return; mark_inode_dirty(inode); return; } else if (inode->i_size == HFSPLUS_I(inode).phys_size) Index: linux-2.6/fs/hpfs/file.c =================================================================== --- linux-2.6.orig/fs/hpfs/file.c +++ linux-2.6/fs/hpfs/file.c @@ -86,25 +86,33 @@ static int hpfs_writepage(struct page *p { return block_write_full_page(page,hpfs_get_block, wbc); } + static int hpfs_readpage(struct file *file, struct page *page) { return block_read_full_page(page,hpfs_get_block); } -static int hpfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) -{ - return cont_prepare_write(page,from,to,hpfs_get_block, - &hpfs_i(page->mapping->host)->mmu_private); + +static int hpfs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + *pagep = NULL; + return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + hpfs_get_block, + &hpfs_i(mapping->host)->mmu_private); } + static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping,block,hpfs_get_block); } + const struct address_space_operations hpfs_aops = { .readpage = hpfs_readpage, .writepage = hpfs_writepage, .sync_page = block_sync_page, - .prepare_write = hpfs_prepare_write, - .commit_write = generic_commit_write, + .write_begin = hpfs_write_begin, + .write_end = generic_write_end, .bmap = _hpfs_bmap }; Index: linux-2.6/fs/bfs/file.c =================================================================== --- linux-2.6.orig/fs/bfs/file.c +++ linux-2.6/fs/bfs/file.c @@ -145,9 +145,13 @@ static int bfs_readpage(struct file *fil return block_read_full_page(page, bfs_get_block); } -static int bfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) +static int bfs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return block_prepare_write(page, from, to, bfs_get_block); + *pagep = NULL; + return block_write_begin(file, mapping, pos, len, flags, + pagep, fsdata, bfs_get_block); } static sector_t bfs_bmap(struct address_space *mapping, sector_t block) @@ -159,8 +163,8 @@ const struct address_space_operations bf .readpage = bfs_readpage, .writepage = bfs_writepage, .sync_page = block_sync_page, - .prepare_write = bfs_prepare_write, - .commit_write = generic_commit_write, + .write_begin = bfs_write_begin, + .write_end = generic_write_end, .bmap = bfs_bmap, }; Index: linux-2.6/fs/qnx4/inode.c =================================================================== --- linux-2.6.orig/fs/qnx4/inode.c +++ linux-2.6/fs/qnx4/inode.c @@ -433,16 +433,21 @@ static int qnx4_writepage(struct page *p { return block_write_full_page(page,qnx4_get_block, wbc); } + static int qnx4_readpage(struct file *file, struct page *page) { return block_read_full_page(page,qnx4_get_block); } -static int qnx4_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - struct qnx4_inode_info *qnx4_inode = qnx4_i(page->mapping->host); - return cont_prepare_write(page, from, to, qnx4_get_block, - &qnx4_inode->mmu_private); + +static int qnx4_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + struct qnx4_inode_info *qnx4_inode = qnx4_i(mapping->host); + *pagep = NULL; + return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + qnx4_get_block, + &qnx4_inode->mmu_private); } static sector_t qnx4_bmap(struct address_space *mapping, sector_t block) { @@ -452,8 +457,8 @@ static const struct address_space_operat .readpage = qnx4_readpage, .writepage = qnx4_writepage, .sync_page = block_sync_page, - .prepare_write = qnx4_prepare_write, - .commit_write = generic_commit_write, + .write_begin = qnx4_write_begin, + .write_end = generic_write_end, .bmap = qnx4_bmap }; Index: linux-2.6/fs/nfs/file.c =================================================================== --- linux-2.6.orig/fs/nfs/file.c +++ linux-2.6/fs/nfs/file.c @@ -282,27 +282,50 @@ nfs_fsync(struct file *file, struct dent } /* - * This does the "real" work of the write. The generic routine has - * allocated the page, locked it, done all the page alignment stuff - * calculations etc. Now we should just copy the data from user - * space and write it back to the real medium.. + * This does the "real" work of the write. We must allocate and lock the + * page to be sent back to the generic routine, which then copies the + * data from user space. * * If the writer ends up delaying the write, the writer needs to * increment the page use counts until he is done with the page. */ -static int nfs_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to) -{ - return nfs_flush_incompatible(file, page); +static int nfs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + int ret; + pgoff_t index; + struct page *page; + index = pos >> PAGE_CACHE_SHIFT; + + page = __grab_cache_page(mapping, index); + if (!page) + return -ENOMEM; + *pagep = page; + + ret = nfs_flush_incompatible(file, page); + if (ret) { + unlock_page(page); + page_cache_release(page); + } + return ret; } -static int nfs_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to) +static int nfs_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { - long status; + unsigned offset = pos & (PAGE_CACHE_SIZE - 1); + int status; lock_kernel(); - status = nfs_updatepage(file, page, offset, to-offset); + status = nfs_updatepage(file, page, offset, copied); unlock_kernel(); - return status; + + unlock_page(page); + page_cache_release(page); + + return status < 0 ? status : copied; } static void nfs_invalidate_page(struct page *page, unsigned long offset) @@ -330,8 +353,8 @@ const struct address_space_operations nf .set_page_dirty = nfs_set_page_dirty, .writepage = nfs_writepage, .writepages = nfs_writepages, - .prepare_write = nfs_prepare_write, - .commit_write = nfs_commit_write, + .write_begin = nfs_write_begin, + .write_end = nfs_write_end, .invalidatepage = nfs_invalidate_page, .releasepage = nfs_release_page, #ifdef CONFIG_NFS_DIRECTIO Index: linux-2.6/fs/smbfs/file.c =================================================================== --- linux-2.6.orig/fs/smbfs/file.c +++ linux-2.6/fs/smbfs/file.c @@ -290,29 +290,45 @@ out: * If the writer ends up delaying the write, the writer needs to * increment the page use counts until he is done with the page. */ -static int smb_prepare_write(struct file *file, struct page *page, - unsigned offset, unsigned to) -{ +static int smb_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + *pagep = __grab_cache_page(mapping, index); + if (!*pagep) + return -ENOMEM; return 0; } -static int smb_commit_write(struct file *file, struct page *page, - unsigned offset, unsigned to) +static int smb_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { int status; + unsigned offset = pos & (PAGE_CACHE_SIZE - 1); - status = -EFAULT; lock_kernel(); - status = smb_updatepage(file, page, offset, to-offset); + status = smb_updatepage(file, page, offset, copied); unlock_kernel(); + + if (!status) { + if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE) + SetPageUptodate(page); + status = copied; + } + + unlock_page(page); + page_cache_release(page); + return status; } const struct address_space_operations smb_file_aops = { .readpage = smb_readpage, .writepage = smb_writepage, - .prepare_write = smb_prepare_write, - .commit_write = smb_commit_write + .write_begin = smb_write_begin, + .write_end = smb_write_end, }; /* Index: linux-2.6/fs/ocfs2/aops.c =================================================================== --- linux-2.6.orig/fs/ocfs2/aops.c +++ linux-2.6/fs/ocfs2/aops.c @@ -293,29 +293,67 @@ int ocfs2_prepare_write_nolock(struct in } /* - * ocfs2_prepare_write() can be an outer-most ocfs2 call when it is called - * from loopback. It must be able to perform its own locking around - * ocfs2_get_block(). + * ocfs2_write_begin() can be an outer-most ocfs2 call when it is + * called from elsewhere in the kernel. It must be able to perform its + * own locking around ocfs2_get_block(). */ -static int ocfs2_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int ocfs2_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - struct inode *inode = page->mapping->host; + struct inode *inode = mapping->host; + struct buffer_head *di_bh = NULL; + struct page *page = NULL; int ret; - mlog_entry("(0x%p, 0x%p, %u, %u)\n", file, page, from, to); - - ret = ocfs2_meta_lock_with_page(inode, NULL, 0, page); + ret = ocfs2_meta_lock(inode, &di_bh, 1); if (ret != 0) { mlog_errno(ret); + return ret; + } + + ret = ocfs2_data_lock(inode, 1); + if (ret) { + ocfs2_meta_unlock(inode, 1); + + mlog_errno(ret); + return ret; + } + + /* + * Lock the page out here to preserve ordering with + * ip_alloc_sem. + */ + page = __grab_cache_page(mapping, pos >> PAGE_CACHE_SHIFT); + if (!page) { + ret = -ENOMEM; + mlog_errno(ret); goto out; } - ret = ocfs2_prepare_write_nolock(inode, page, from, to); + *pagep = page; - ocfs2_meta_unlock(inode, 0); + down_read(&OCFS2_I(inode)->ip_alloc_sem); + ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + ocfs2_get_block); + up_read(&OCFS2_I(inode)->ip_alloc_sem); out: - mlog_exit(ret); + if (ret == 0) { + *fsdata = di_bh; + } else { + /* + * Error return - the caller won't call + * ocfs2_write_end, so drop cluster locks here. + */ + brelse(di_bh); + if (page) { + unlock_page(page); + page_cache_release(page); + } + ocfs2_data_unlock(inode, 1); + ocfs2_meta_unlock(inode, 1); + } + return ret; } @@ -388,16 +426,18 @@ out: return handle; } -static int ocfs2_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int ocfs2_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { int ret; - struct buffer_head *di_bh = NULL; + unsigned from, to; + struct buffer_head *di_bh = fsdata; struct inode *inode = page->mapping->host; handle_t *handle = NULL; struct ocfs2_dinode *di; - mlog_entry("(0x%p, 0x%p, %u, %u)\n", file, page, from, to); + mlog_entry("(0x%p, 0x%p)\n", file, page); /* NOTE: ocfs2_file_aio_write has ensured that it's safe for * us to continue here without rechecking the I/O against @@ -412,22 +452,13 @@ static int ocfs2_commit_write(struct fil * stale inode allocation image (i_size, i_clusters, etc). */ - ret = ocfs2_meta_lock_with_page(inode, &di_bh, 1, page); - if (ret != 0) { - mlog_errno(ret); - goto out; - } - - ret = ocfs2_data_lock_with_page(inode, 1, page); - if (ret != 0) { - mlog_errno(ret); - goto out_unlock_meta; - } + from = pos & (PAGE_CACHE_SIZE - 1); + to = from + len; handle = ocfs2_start_walk_page_trans(inode, page, from, to); if (IS_ERR(handle)) { ret = PTR_ERR(handle); - goto out_unlock_data; + goto out_unlock; } /* Mark our buffer early. We'd rather catch this error up here @@ -441,8 +472,10 @@ static int ocfs2_commit_write(struct fil } /* might update i_size */ - ret = generic_commit_write(file, page, from, to); - if (ret < 0) { + copied = generic_write_end(file, mapping, pos, len, copied, page, fsdata); + if (copied < 0) { + ret = copied; + copied = 0; mlog_errno(ret); goto out_commit; } @@ -458,23 +491,30 @@ static int ocfs2_commit_write(struct fil di->i_size = cpu_to_le64((u64)i_size_read(inode)); ret = ocfs2_journal_dirty(handle, di_bh); - if (ret < 0) { + if (ret < 0) mlog_errno(ret); - goto out_commit; - } + ret = 0; out_commit: ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); -out_unlock_data: +out_unlock: ocfs2_data_unlock(inode, 1); -out_unlock_meta: ocfs2_meta_unlock(inode, 1); -out: + + if (ret) { + /* + * We caught an error before generic_write_end() - + * unlock and free the page. + */ + unlock_page(page); + page_cache_release(page); + } + if (di_bh) brelse(di_bh); mlog_exit(ret); - return ret; + return copied ? copied : ret; } static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) @@ -678,8 +718,8 @@ out: const struct address_space_operations ocfs2_aops = { .readpage = ocfs2_readpage, .writepage = ocfs2_writepage, - .prepare_write = ocfs2_prepare_write, - .commit_write = ocfs2_commit_write, + .write_begin = ocfs2_write_begin, + .write_end = ocfs2_write_end, .bmap = ocfs2_bmap, .sync_page = block_sync_page, .direct_IO = ocfs2_direct_IO, Index: linux-2.6/fs/gfs2/ops_address.c =================================================================== --- linux-2.6.orig/fs/gfs2/ops_address.c +++ linux-2.6/fs/gfs2/ops_address.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -337,45 +338,49 @@ out_unlock: } /** - * gfs2_prepare_write - Prepare to write a page to a file + * gfs2_write_begin - Begin to write to a file * @file: The file to write to - * @page: The page which is to be prepared for writing - * @from: From (byte range within page) - * @to: To (byte range within page) + * @mapping: The mapping in which to write + * @pos: The file offset at which to start writing + * @len: Length of the write + * @flags: Various flags + * @pagep: Pointer to return the page + * @fsdata: Pointer to return fs data (unused by GFS2) * * Returns: errno */ -static int gfs2_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int gfs2_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - struct gfs2_inode *ip = GFS2_I(page->mapping->host); - struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); + struct gfs2_inode *ip = GFS2_I(mapping->host); + struct gfs2_sbd *sdp = GFS2_SB(mapping->host); unsigned int data_blocks, ind_blocks, rblocks; int alloc_required; int error = 0; - loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + from; - loff_t end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; struct gfs2_alloc *al; - unsigned int write_len = to - from; - + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + unsigned from = pos & (PAGE_CACHE_SIZE - 1); + unsigned to = from + len; + struct page *page; - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh); + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &ip->i_gh); error = gfs2_glock_nq_atime(&ip->i_gh); - if (unlikely(error)) { - if (error == GLR_TRYFAILED) { - unlock_page(page); - error = AOP_TRUNCATED_PAGE; - yield(); - } + if (unlikely(error)) goto out_uninit; - } - gfs2_write_calc_reserv(ip, write_len, &data_blocks, &ind_blocks); + error = -ENOMEM; + page = __grab_cache_page(mapping, index); + *pagep = page; + if (!page) + goto out_unlock; + + gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); - error = gfs2_write_alloc_required(ip, pos, write_len, &alloc_required); + error = gfs2_write_alloc_required(ip, pos, len, &alloc_required); if (error) - goto out_unlock; + goto out_putpage; ip->i_alloc.al_requested = 0; @@ -407,7 +412,7 @@ static int gfs2_prepare_write(struct fil goto out; if (gfs2_is_stuffed(ip)) { - if (end > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { + if (pos + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { error = gfs2_unstuff_dinode(ip, page); if (error == 0) goto prepare_write; @@ -429,6 +434,10 @@ out_qunlock: out_alloc_put: gfs2_alloc_put(ip); } +out_putpage: + page_cache_release(page); + if (pos + len > ip->i_inode.i_size) + vmtruncate(&ip->i_inode, ip->i_inode.i_size); out_unlock: gfs2_glock_dq_m(1, &ip->i_gh); out_uninit: @@ -439,92 +448,128 @@ out_uninit: } /** - * gfs2_commit_write - Commit write to a file + * gfs2_stuffed_write_end - Write end for stuffed files + * @inode: The inode + * @dibh: The buffer_head containing the on-disk inode + * @pos: The file position + * @len: The length of the write + * @copied: How much was actually copied by the VFS + * @page: The page + * + * This copies the data from the page into the inode block after + * the inode data structure itself. + * + * Returns: errno + */ +static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, + loff_t pos, unsigned len, unsigned copied, + struct page *page) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + u64 to = pos + copied; + void *kaddr; + unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode); + struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; + + BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode))); + kaddr = kmap_atomic(page, KM_USER0); + memcpy(buf + pos, kaddr + pos, copied); + memset(kaddr + pos + copied, 0, len - copied); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + + if (!PageUptodate(page)) + SetPageUptodate(page); + unlock_page(page); + mark_page_accessed(page); + page_cache_release(page); + + if (inode->i_size < to) { + i_size_write(inode, to); + ip->i_di.di_size = inode->i_size; + di->di_size = cpu_to_be64(inode->i_size); + mark_inode_dirty(inode); + } + + brelse(dibh); + gfs2_trans_end(sdp); + gfs2_glock_dq(&ip->i_gh); + gfs2_holder_uninit(&ip->i_gh); + return copied; +} + +/** + * gfs2_write_end * @file: The file to write to - * @page: The page containing the data - * @from: From (byte range within page) - * @to: To (byte range within page) + * @mapping: The address space to write to + * @pos: The file position + * @len: The length of the data + * @copied: + * @page: The page that has been written + * @fsdata: The fsdata (unused in GFS2) + * + * The main write_end function for GFS2. We have a separate one for + * stuffed files as they are slightly different, otherwise we just + * put our locking around the VFS provided functions. * * Returns: errno */ -static int gfs2_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int gfs2_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { struct inode *inode = page->mapping->host; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); - int error = -EOPNOTSUPP; struct buffer_head *dibh; struct gfs2_alloc *al = &ip->i_alloc; struct gfs2_dinode *di; + unsigned int from = pos & (PAGE_CACHE_SIZE - 1); + unsigned int to = from + len; + int ret; - if (gfs2_assert_withdraw(sdp, gfs2_glock_is_locked_by_me(ip->i_gl))) - goto fail_nounlock; + BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == 0); - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - goto fail_endtrans; + ret = gfs2_meta_inode_buffer(ip, &dibh); + if (unlikely(ret)) { + unlock_page(page); + page_cache_release(page); + goto failed; + } gfs2_trans_add_bh(ip->i_gl, dibh, 1); - di = (struct gfs2_dinode *)dibh->b_data; - if (gfs2_is_stuffed(ip)) { - u64 file_size; - void *kaddr; + if (gfs2_is_stuffed(ip)) + return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page); - file_size = ((u64)page->index << PAGE_CACHE_SHIFT) + to; + if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) + gfs2_page_add_databufs(ip, page, from, to); - kaddr = kmap_atomic(page, KM_USER0); - memcpy(dibh->b_data + sizeof(struct gfs2_dinode) + from, - kaddr + from, to - from); - kunmap_atomic(kaddr, KM_USER0); + ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); - SetPageUptodate(page); - - if (inode->i_size < file_size) { - i_size_write(inode, file_size); + if (likely(ret >= 0)) { + copied = ret; + if ((pos + copied) > inode->i_size) { + di = (struct gfs2_dinode *)dibh->b_data; + ip->i_di.di_size = inode->i_size; + di->di_size = cpu_to_be64(inode->i_size); mark_inode_dirty(inode); } - } else { - if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || - gfs2_is_jdata(ip)) - gfs2_page_add_databufs(ip, page, from, to); - error = generic_commit_write(file, page, from, to); - if (error) - goto fail; - } - - if (ip->i_di.di_size < inode->i_size) { - ip->i_di.di_size = inode->i_size; - di->di_size = cpu_to_be64(inode->i_size); } brelse(dibh); gfs2_trans_end(sdp); +failed: if (al->al_requested) { gfs2_inplace_release(ip); gfs2_quota_unlock(ip); gfs2_alloc_put(ip); } - gfs2_glock_dq_m(1, &ip->i_gh); + gfs2_glock_dq(&ip->i_gh); gfs2_holder_uninit(&ip->i_gh); - return 0; - -fail: - brelse(dibh); -fail_endtrans: - gfs2_trans_end(sdp); - if (al->al_requested) { - gfs2_inplace_release(ip); - gfs2_quota_unlock(ip); - gfs2_alloc_put(ip); - } - gfs2_glock_dq_m(1, &ip->i_gh); - gfs2_holder_uninit(&ip->i_gh); -fail_nounlock: - ClearPageUptodate(page); - return error; + return ret; } /** @@ -793,8 +838,8 @@ const struct address_space_operations gf .readpage = gfs2_readpage, .readpages = gfs2_readpages, .sync_page = block_sync_page, - .prepare_write = gfs2_prepare_write, - .commit_write = gfs2_commit_write, + .write_begin = gfs2_write_begin, + .write_end = gfs2_write_end, .bmap = gfs2_bmap, .invalidatepage = gfs2_invalidatepage, .releasepage = gfs2_releasepage, Index: linux-2.6/fs/ecryptfs/mmap.c =================================================================== --- linux-2.6.orig/fs/ecryptfs/mmap.c +++ linux-2.6/fs/ecryptfs/mmap.c @@ -36,34 +36,6 @@ struct kmem_cache *ecryptfs_lower_page_cache; -/** - * ecryptfs_get1page - * - * Get one page from cache or lower f/s, return error otherwise. - * - * Returns unlocked and up-to-date page (if ok), with increased - * refcnt. - */ -static struct page *ecryptfs_get1page(struct file *file, int index) -{ - struct page *page; - struct dentry *dentry; - struct inode *inode; - struct address_space *mapping; - - dentry = file->f_path.dentry; - inode = dentry->d_inode; - mapping = inode->i_mapping; - page = read_cache_page(mapping, index, - (filler_t *)mapping->a_ops->readpage, - (void *)file); - if (IS_ERR(page)) - goto out; - wait_on_page_locked(page); -out: - return page; -} - static int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros); @@ -369,17 +341,14 @@ out: /** * Called with lower inode mutex held. */ -static int fill_zeros_to_end_of_page(struct page *page, unsigned int to) +static int fill_zeros_to_end_of_page(struct page *page, loff_t new_isize) { - struct inode *inode = page->mapping->host; int end_byte_in_page; char *page_virt; - if ((i_size_read(inode) / PAGE_CACHE_SIZE) != page->index) + if ((new_isize >> PAGE_CACHE_SHIFT) != page->index) goto out; - end_byte_in_page = i_size_read(inode) % PAGE_CACHE_SIZE; - if (to > end_byte_in_page) - end_byte_in_page = to; + end_byte_in_page = new_isize % PAGE_CACHE_SIZE; page_virt = kmap_atomic(page, KM_USER0); memset((page_virt + end_byte_in_page), 0, (PAGE_CACHE_SIZE - end_byte_in_page)); @@ -389,16 +358,35 @@ out: return 0; } -static int ecryptfs_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int ecryptfs_write_begin(struct file *file,struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { + struct page *page; + pgoff_t index; int rc = 0; - if (from == 0 && to == PAGE_CACHE_SIZE) - goto out; /* If we are writing a full page, it will be - up to date. */ - if (!PageUptodate(page)) - rc = ecryptfs_do_readpage(file, page, page->index); + index = pos >> PAGE_CACHE_SHIFT; + page = __grab_cache_page(mapping, index); + if (!page) { + rc = -ENOMEM; + goto out; + } + + /* + * If we are writing a full page (with no possibility of a short + * write), it will be guaranteed to end up being uptodate at + * write_end-time + */ + if (flags & AOP_FLAG_UNINTERRUPTIBLE && len == PAGE_CACHE_SIZE) + goto out; + if (!PageUptodate(page)) { + rc = ecryptfs_do_readpage(file, page, index); + if (rc) { + unlock_page(page); + page_cache_release(page); + } + } out: return rc; } @@ -421,14 +409,6 @@ out: return rc; } -static -void ecryptfs_release_lower_page(struct page *lower_page, int page_locked) -{ - if (page_locked) - unlock_page(lower_page); - page_cache_release(lower_page); -} - /** * ecryptfs_write_inode_size_to_header * @@ -442,28 +422,17 @@ static int ecryptfs_write_inode_size_to_ { int rc = 0; struct page *header_page; + void *fsdata; char *header_virt; - const struct address_space_operations *lower_a_ops; + struct address_space *lower_mapping = lower_inode->i_mapping; u64 file_size; -retry: - header_page = grab_cache_page(lower_inode->i_mapping, 0); - if (!header_page) { - ecryptfs_printk(KERN_ERR, "grab_cache_page for " - "lower_page_index 0 failed\n"); - rc = -EINVAL; - goto out; - } - lower_a_ops = lower_inode->i_mapping->a_ops; - rc = lower_a_ops->prepare_write(lower_file, header_page, 0, 8); - if (rc) { - if (rc == AOP_TRUNCATED_PAGE) { - ecryptfs_release_lower_page(header_page, 0); - goto retry; - } else - ecryptfs_release_lower_page(header_page, 1); + rc = pagecache_write_begin(lower_file, lower_mapping, 0, sizeof(u64), + AOP_FLAG_UNINTERRUPTIBLE, + &header_page, &fsdata); + if (rc) goto out; - } + file_size = (u64)i_size_read(inode); ecryptfs_printk(KERN_DEBUG, "Writing size: [0x%.16x]\n", file_size); file_size = cpu_to_be64(file_size); @@ -471,17 +440,17 @@ retry: memcpy(header_virt, &file_size, sizeof(u64)); kunmap_atomic(header_virt, KM_USER0); flush_dcache_page(header_page); - rc = lower_a_ops->commit_write(lower_file, header_page, 0, 8); - if (rc < 0) + + rc = pagecache_write_end(lower_file, lower_mapping, 0, sizeof(u64), + sizeof(u64), header_page, fsdata); + if (rc != sizeof(u64)) { ecryptfs_printk(KERN_ERR, "Error commiting header page " "write\n"); - if (rc == AOP_TRUNCATED_PAGE) { - ecryptfs_release_lower_page(header_page, 0); - goto retry; - } else - ecryptfs_release_lower_page(header_page, 1); + if (rc > 0) + rc = -EINVAL; /* XXX: can we do better? */ + } lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME; - mark_inode_dirty_sync(inode); + mark_inode_dirty_sync(inode); /* XXX: lower_inode? */ out: return rc; } @@ -564,36 +533,21 @@ ecryptfs_write_inode_size_to_metadata(st int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode, struct file *lower_file, unsigned long lower_page_index, int byte_offset, - int region_bytes) + int region_bytes, void **fsdata) { - int rc = 0; + int rc; + struct address_space *lower_mapping = lower_inode->i_mapping; + loff_t pos = (lower_page_index << PAGE_CACHE_SHIFT) + byte_offset; -retry: - *lower_page = grab_cache_page(lower_inode->i_mapping, lower_page_index); - if (!(*lower_page)) { - rc = -EINVAL; - ecryptfs_printk(KERN_ERR, "Error attempting to grab " - "lower page with index [0x%.16x]\n", - lower_page_index); - goto out; - } - rc = lower_inode->i_mapping->a_ops->prepare_write(lower_file, - (*lower_page), - byte_offset, - region_bytes); + rc = pagecache_write_begin(lower_file, lower_mapping, pos, region_bytes, + AOP_FLAG_UNINTERRUPTIBLE, /* XXX: ok? */ + lower_page, fsdata); if (rc) { - if (rc == AOP_TRUNCATED_PAGE) { - ecryptfs_release_lower_page(*lower_page, 0); - goto retry; - } else { - ecryptfs_printk(KERN_ERR, "prepare_write for " - "lower_page_index = [0x%.16x] failed; rc = " - "[%d]\n", lower_page_index, rc); - ecryptfs_release_lower_page(*lower_page, 1); - (*lower_page) = NULL; - } + ecryptfs_printk(KERN_ERR, "pagecache_write_begin for " + "lower_page_index = [0x%.16x] failed; rc = " + "[%d]\n", lower_page_index, rc); + (*lower_page) = NULL; } -out: return rc; } @@ -605,21 +559,21 @@ out: int ecryptfs_commit_lower_page(struct page *lower_page, struct inode *lower_inode, struct file *lower_file, int byte_offset, - int region_size) + int region_size, void *fsdata) { - int page_locked = 1; - int rc = 0; + int rc; + struct address_space *lower_mapping = lower_inode->i_mapping; + loff_t pos = (lower_page->index << PAGE_CACHE_SHIFT) + byte_offset; - rc = lower_inode->i_mapping->a_ops->commit_write( - lower_file, lower_page, byte_offset, region_size); - if (rc == AOP_TRUNCATED_PAGE) - page_locked = 0; - if (rc < 0) { + rc = pagecache_write_end(lower_file, lower_mapping, pos, region_size, + region_size, lower_page, fsdata); + if (rc != region_size) { ecryptfs_printk(KERN_ERR, "Error committing write; rc = [%d]\n", rc); + if (rc > 0) + rc = -EINVAL; } else rc = 0; - ecryptfs_release_lower_page(lower_page, page_locked); return rc; } @@ -634,9 +588,10 @@ int ecryptfs_copy_page_to_lower(struct p { int rc = 0; struct page *lower_page; + void *fsdata; rc = ecryptfs_get_lower_page(&lower_page, lower_inode, lower_file, - page->index, 0, PAGE_CACHE_SIZE); + page->index, 0, PAGE_CACHE_SIZE, &fsdata); if (rc) { ecryptfs_printk(KERN_ERR, "Error attempting to get page " "at index [0x%.16x]\n", page->index); @@ -646,7 +601,7 @@ int ecryptfs_copy_page_to_lower(struct p memcpy((char *)page_address(lower_page), page_address(page), PAGE_CACHE_SIZE); rc = ecryptfs_commit_lower_page(lower_page, lower_inode, lower_file, - 0, PAGE_CACHE_SIZE); + 0, PAGE_CACHE_SIZE, fsdata); if (rc) ecryptfs_printk(KERN_ERR, "Error attempting to commit page " "at index [0x%.16x]\n", page->index); @@ -657,31 +612,37 @@ out: struct kmem_cache *ecryptfs_xattr_cache; /** - * ecryptfs_commit_write + * ecryptfs_write_end * @file: The eCryptfs file object - * @page: The eCryptfs page - * @from: Ignored (we rotate the page IV on each write) - * @to: Ignored + * @mapping: The eCryptfs address_space + * @pos: The start of the write + * @len: The length passed to write_begin (unused) + * @copied: The actual amount copied + * @page: The eCryptfs page returned by write_begin + * @fsdata: Filesystem private data (unused) * * This is where we encrypt the data and pass the encrypted data to * the lower filesystem. In OpenPGP-compatible mode, we operate on * entire underlying packets. */ -static int ecryptfs_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int ecryptfs_write_end(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { struct ecryptfs_page_crypt_context ctx; - loff_t pos; + loff_t isize; struct inode *inode; struct inode *lower_inode; struct file *lower_file; struct ecryptfs_crypt_stat *crypt_stat; int rc; - inode = page->mapping->host; + inode = mapping->host; + isize = inode->i_size; /* i_mutex is held */ lower_inode = ecryptfs_inode_to_lower(inode); lower_file = ecryptfs_file_to_lower(file); - mutex_lock(&lower_inode->i_mutex); + mutex_lock(&lower_inode->i_mutex); /* XXX: put this in write_begin? */ crypt_stat = &ecryptfs_inode_to_private(file->f_path.dentry->d_inode) ->crypt_stat; if (crypt_stat->flags & ECRYPTFS_NEW_FILE) { @@ -692,8 +653,8 @@ static int ecryptfs_commit_write(struct ecryptfs_printk(KERN_DEBUG, "Not a new file\n"); ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" "(page w/ index = [0x%.16x], to = [%d])\n", page->index, - to); - rc = fill_zeros_to_end_of_page(page, to); + max(isize, pos+copied)); + rc = fill_zeros_to_end_of_page(page, max(isize, pos+copied)); if (rc) { ecryptfs_printk(KERN_WARNING, "Error attempting to fill " "zeros in page with index = [0x%.16x]\n", @@ -710,11 +671,10 @@ static int ecryptfs_commit_write(struct goto out; } inode->i_blocks = lower_inode->i_blocks; - pos = (page->index << PAGE_CACHE_SHIFT) + to; - if (pos > i_size_read(inode)) { - i_size_write(inode, pos); + if (pos + copied > isize) { + i_size_write(inode, pos + copied); ecryptfs_printk(KERN_DEBUG, "Expanded file size to " - "[0x%.16x]\n", i_size_read(inode)); + "[0x%.16x]\n", pos + copied); } rc = ecryptfs_write_inode_size_to_metadata(lower_file, lower_inode, inode, file->f_dentry, @@ -730,6 +690,9 @@ out: else SetPageUptodate(page); mutex_unlock(&lower_inode->i_mutex); + unlock_page(page); + page_cache_release(page); + return rc; } @@ -750,32 +713,31 @@ int write_zeros(struct file *file, pgoff int rc = 0; struct page *tmp_page; char *tmp_page_virt; - - tmp_page = ecryptfs_get1page(file, index); - if (IS_ERR(tmp_page)) { - ecryptfs_printk(KERN_ERR, "Error getting page at index " - "[0x%.16x]\n", index); - rc = PTR_ERR(tmp_page); - goto out; - } - rc = ecryptfs_prepare_write(file, tmp_page, start, start + num_zeros); + void *fsdata; + struct address_space *mapping = file->f_path.dentry->d_inode->i_mapping; + loff_t pos = (index << PAGE_CACHE_SHIFT) + start; + + rc = pagecache_write_begin(file, mapping, pos, num_zeros, + AOP_FLAG_UNINTERRUPTIBLE, + &tmp_page, &fsdata); if (rc) { ecryptfs_printk(KERN_ERR, "Error preparing to write zero's " "to remainder of page at index [0x%.16x]\n", index); - page_cache_release(tmp_page); goto out; } tmp_page_virt = kmap_atomic(tmp_page, KM_USER0); memset(((char *)tmp_page_virt + start), 0, num_zeros); kunmap_atomic(tmp_page_virt, KM_USER0); flush_dcache_page(tmp_page); - rc = ecryptfs_commit_write(file, tmp_page, start, start + num_zeros); - if (rc < 0) { + rc = pagecache_write_end(file, mapping, pos, num_zeros, num_zeros, + tmp_page, fsdata); + if (rc != num_zeros) { ecryptfs_printk(KERN_ERR, "Error attempting to write zero's " "to remainder of page at index [0x%.16x]\n", index); - page_cache_release(tmp_page); + if (rc > 0) + rc = -EINVAL; goto out; } rc = 0; @@ -823,8 +785,8 @@ static void ecryptfs_sync_page(struct pa struct address_space_operations ecryptfs_aops = { .writepage = ecryptfs_writepage, .readpage = ecryptfs_readpage, - .prepare_write = ecryptfs_prepare_write, - .commit_write = ecryptfs_commit_write, + .write_begin = ecryptfs_write_begin, + .write_end = ecryptfs_write_end, .bmap = ecryptfs_bmap, .sync_page = ecryptfs_sync_page, }; Index: linux-2.6/fs/ecryptfs/crypto.c =================================================================== --- linux-2.6.orig/fs/ecryptfs/crypto.c +++ linux-2.6/fs/ecryptfs/crypto.c @@ -375,7 +375,8 @@ ecryptfs_extent_to_lwr_pg_idx_and_offset static int ecryptfs_write_out_page(struct ecryptfs_page_crypt_context *ctx, struct page *lower_page, struct inode *lower_inode, - int byte_offset_in_page, int bytes_to_write) + int byte_offset_in_page, int bytes_to_write, + void *fsdata) { int rc = 0; @@ -383,7 +384,7 @@ static int ecryptfs_write_out_page(struc rc = ecryptfs_commit_lower_page(lower_page, lower_inode, ctx->param.lower_file, byte_offset_in_page, - bytes_to_write); + bytes_to_write, fsdata); if (rc) { ecryptfs_printk(KERN_ERR, "Error calling lower " "commit; rc = [%d]\n", rc); @@ -407,7 +408,7 @@ static int ecryptfs_read_in_page(struct struct page **lower_page, struct inode *lower_inode, unsigned long lower_page_idx, - int byte_offset_in_page) + int byte_offset_in_page, void **fsdata) { int rc = 0; @@ -419,13 +420,12 @@ static int ecryptfs_read_in_page(struct lower_page_idx, byte_offset_in_page, (PAGE_CACHE_SIZE - - byte_offset_in_page)); + - byte_offset_in_page), fsdata); if (rc) { ecryptfs_printk( - KERN_ERR, "Error attempting to grab, map, " - "and prepare_write lower page with index " + KERN_ERR, "Error in ecryptfs_get_lower_page " + "lower page with index " "[0x%.16x]; rc = [%d]\n", lower_page_idx, rc); - goto out; } } else { *lower_page = grab_cache_page(lower_inode->i_mapping, @@ -436,10 +436,9 @@ static int ecryptfs_read_in_page(struct KERN_ERR, "Error attempting to grab and map " "lower page with index [0x%.16x]; rc = [%d]\n", lower_page_idx, rc); - goto out; } } -out: + return rc; } @@ -475,6 +474,8 @@ int ecryptfs_encrypt_page(struct ecryptf int lower_byte_offset = 0; int orig_byte_offset = 0; int num_extents_per_page; + void *fsdata; + #define ECRYPTFS_PAGE_STATE_UNREAD 0 #define ECRYPTFS_PAGE_STATE_READ 1 #define ECRYPTFS_PAGE_STATE_MODIFIED 2 @@ -503,10 +504,9 @@ int ecryptfs_encrypt_page(struct ecryptf if (prior_lower_page_idx != lower_page_idx && page_state == ECRYPTFS_PAGE_STATE_MODIFIED) { rc = ecryptfs_write_out_page(ctx, lower_page, - lower_inode, - orig_byte_offset, - (PAGE_CACHE_SIZE - - orig_byte_offset)); + lower_inode, orig_byte_offset, + (PAGE_CACHE_SIZE - orig_byte_offset), + fsdata); if (rc) { ecryptfs_printk(KERN_ERR, "Error attempting " "to write out page; rc = [%d]" @@ -519,7 +519,7 @@ int ecryptfs_encrypt_page(struct ecryptf || page_state == ECRYPTFS_PAGE_STATE_WRITTEN) { rc = ecryptfs_read_in_page(ctx, &lower_page, lower_inode, lower_page_idx, - lower_byte_offset); + lower_byte_offset, &fsdata); if (rc) { ecryptfs_printk(KERN_ERR, "Error attempting " "to read in lower page with " @@ -571,8 +571,8 @@ int ecryptfs_encrypt_page(struct ecryptf } BUG_ON(orig_byte_offset != 0); rc = ecryptfs_write_out_page(ctx, lower_page, lower_inode, 0, - (lower_byte_offset - + crypt_stat->extent_size)); + (lower_byte_offset + crypt_stat->extent_size), + fsdata); if (rc) { ecryptfs_printk(KERN_ERR, "Error attempting to write out " "page; rc = [%d]\n", rc); Index: linux-2.6/fs/ecryptfs/ecryptfs_kernel.h =================================================================== --- linux-2.6.orig/fs/ecryptfs/ecryptfs_kernel.h +++ linux-2.6/fs/ecryptfs/ecryptfs_kernel.h @@ -503,11 +503,11 @@ int ecryptfs_write_inode_size_to_metadat int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode, struct file *lower_file, unsigned long lower_page_index, int byte_offset, - int region_bytes); + int region_bytes, void **fsdata); int ecryptfs_commit_lower_page(struct page *lower_page, struct inode *lower_inode, struct file *lower_file, int byte_offset, - int region_size); + int region_size, void *fsdata); int ecryptfs_copy_page_to_lower(struct page *page, struct inode *lower_inode, struct file *lower_file); int ecryptfs_do_readpage(struct file *file, struct page *page, Index: linux-2.6/fs/fuse/file.c =================================================================== --- linux-2.6.orig/fs/fuse/file.c +++ linux-2.6/fs/fuse/file.c @@ -443,51 +443,61 @@ static size_t fuse_send_write(struct fus return outarg.size; } -static int fuse_prepare_write(struct file *file, struct page *page, - unsigned offset, unsigned to) -{ - /* No op */ +static int fuse_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + + *pagep = __grab_cache_page(mapping, index); + if (!*pagep) + return -ENOMEM; return 0; } -static int fuse_commit_write(struct file *file, struct page *page, - unsigned offset, unsigned to) +static int fuse_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { int err; - size_t nres; - unsigned count = to - offset; - struct inode *inode = page->mapping->host; + size_t nres = 0; + struct inode *inode = mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); - loff_t pos = page_offset(page) + offset; struct fuse_req *req; + unsigned offset = pos & (PAGE_CACHE_SIZE - 1); - if (is_bad_inode(inode)) - return -EIO; + if (is_bad_inode(inode)) { + err = -EIO; + goto out; + } req = fuse_get_req(fc); - if (IS_ERR(req)) - return PTR_ERR(req); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto out; + } req->num_pages = 1; req->pages[0] = page; req->page_offset = offset; - nres = fuse_send_write(req, file, inode, pos, count); + nres = fuse_send_write(req, file, inode, pos, copied); err = req->out.h.error; fuse_put_request(fc, req); - if (!err && nres != count) - err = -EIO; if (!err) { - pos += count; + pos += nres; spin_lock(&fc->lock); if (pos > inode->i_size) i_size_write(inode, pos); spin_unlock(&fc->lock); - if (offset == 0 && to == PAGE_CACHE_SIZE) + if (copied == PAGE_CACHE_SIZE) SetPageUptodate(page); } fuse_invalidate_attr(inode); - return err; +out: + unlock_page(page); + page_cache_release(page); + return nres > 0 ? nres : err; } static void fuse_release_user_pages(struct fuse_req *req, int write) @@ -817,8 +827,8 @@ static const struct file_operations fuse static const struct address_space_operations fuse_file_aops = { .readpage = fuse_readpage, - .prepare_write = fuse_prepare_write, - .commit_write = fuse_commit_write, + .write_begin = fuse_write_begin, + .write_end = fuse_write_end, .readpages = fuse_readpages, .set_page_dirty = fuse_set_page_dirty, .bmap = fuse_bmap, Index: linux-2.6/fs/hostfs/hostfs_kern.c =================================================================== --- linux-2.6.orig/fs/hostfs/hostfs_kern.c +++ linux-2.6/fs/hostfs/hostfs_kern.c @@ -461,56 +461,42 @@ int hostfs_readpage(struct file *file, s return(err); } -int hostfs_prepare_write(struct file *file, struct page *page, - unsigned int from, unsigned int to) +int hostfs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - char *buffer; - long long start, tmp; - int err; + pgoff_t index = pos >> PAGE_CACHE_SHIFT; - start = (long long) page->index << PAGE_CACHE_SHIFT; - buffer = kmap(page); - if(from != 0){ - tmp = start; - err = read_file(FILE_HOSTFS_I(file)->fd, &tmp, buffer, - from); - if(err < 0) goto out; - } - if(to != PAGE_CACHE_SIZE){ - start += to; - err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer + to, - PAGE_CACHE_SIZE - to); - if(err < 0) goto out; - } - err = 0; - out: - kunmap(page); - return(err); + *pagep = __grab_cache_page(mapping, index); + if (!*pagep) + return -ENOMEM; + return 0; } -int hostfs_commit_write(struct file *file, struct page *page, unsigned from, - unsigned to) +int hostfs_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { - struct address_space *mapping = page->mapping; struct inode *inode = mapping->host; - char *buffer; - long long start; - int err = 0; + void *buffer; + unsigned from = pos & (PAGE_CACHE_SIZE - 1); + int err; - start = (((long long) page->index) << PAGE_CACHE_SHIFT) + from; buffer = kmap(page); - err = write_file(FILE_HOSTFS_I(file)->fd, &start, buffer + from, - to - from); - if(err > 0) err = 0; - - /* Actually, if !err, write_file has added to-from to start, so, despite - * the appearance, we are comparing i_size against the _last_ written - * location, as we should. */ + err = write_file(FILE_HOSTFS_I(file)->fd, &pos, buffer + from, copied); + kunmap(page); + + if (!PageUptodate(page) && err == PAGE_CACHE_SIZE) + SetPageUptodate(page); + unlock_page(page); + page_cache_release(page); - if(!err && (start > inode->i_size)) - inode->i_size = start; + /* If err > 0, write_file has added err to pos, so we are comparing + * i_size against the last byte written. + */ + if (err > 0 && (pos > inode->i_size)) + inode->i_size = pos; - kunmap(page); return(err); } @@ -518,8 +504,8 @@ static const struct address_space_operat .writepage = hostfs_writepage, .readpage = hostfs_readpage, .set_page_dirty = __set_page_dirty_nobuffers, - .prepare_write = hostfs_prepare_write, - .commit_write = hostfs_commit_write + .write_begin = hostfs_write_begin, + .write_end = hostfs_write_end, }; static int init_inode(struct inode *inode, struct dentry *dentry) Index: linux-2.6/fs/jffs2/file.c =================================================================== --- linux-2.6.orig/fs/jffs2/file.c +++ linux-2.6/fs/jffs2/file.c @@ -21,10 +21,12 @@ #include #include "nodelist.h" -static int jffs2_commit_write (struct file *filp, struct page *pg, - unsigned start, unsigned end); -static int jffs2_prepare_write (struct file *filp, struct page *pg, - unsigned start, unsigned end); +static int jffs2_write_end(struct file *filp, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *pg, void *fsdata); +static int jffs2_write_begin(struct file *filp, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata); static int jffs2_readpage (struct file *filp, struct page *pg); int jffs2_fsync(struct file *filp, struct dentry *dentry, int datasync) @@ -67,8 +69,8 @@ const struct inode_operations jffs2_file const struct address_space_operations jffs2_file_address_operations = { .readpage = jffs2_readpage, - .prepare_write =jffs2_prepare_write, - .commit_write = jffs2_commit_write + .write_begin = jffs2_write_begin, + .write_end = jffs2_write_end, }; static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg) @@ -121,15 +123,23 @@ static int jffs2_readpage (struct file * return ret; } -static int jffs2_prepare_write (struct file *filp, struct page *pg, - unsigned start, unsigned end) +static int jffs2_write_begin(struct file *filp, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - struct inode *inode = pg->mapping->host; + struct page *pg; + struct inode *inode = mapping->host; struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); - uint32_t pageofs = pg->index << PAGE_CACHE_SHIFT; + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + uint32_t pageofs = pos & (PAGE_CACHE_SIZE - 1); int ret = 0; - D1(printk(KERN_DEBUG "jffs2_prepare_write()\n")); + pg = __grab_cache_page(mapping, index); + if (!pg) + return -ENOMEM; + *pagep = pg; + + D1(printk(KERN_DEBUG "jffs2_write_begin()\n")); if (pageofs > inode->i_size) { /* Make new hole frag from old EOF to new page */ @@ -144,7 +154,7 @@ static int jffs2_prepare_write (struct f ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); if (ret) - return ret; + goto out_page; down(&f->sem); memset(&ri, 0, sizeof(ri)); @@ -174,7 +184,7 @@ static int jffs2_prepare_write (struct f ret = PTR_ERR(fn); jffs2_complete_reservation(c); up(&f->sem); - return ret; + goto out_page; } ret = jffs2_add_full_dnode_to_inode(c, f, fn); if (f->metadata) { @@ -183,65 +193,79 @@ static int jffs2_prepare_write (struct f f->metadata = NULL; } if (ret) { - D1(printk(KERN_DEBUG "Eep. add_full_dnode_to_inode() failed in prepare_write, returned %d\n", ret)); + D1(printk(KERN_DEBUG "Eep. add_full_dnode_to_inode() failed in write_begin, returned %d\n", ret)); jffs2_mark_node_obsolete(c, fn->raw); jffs2_free_full_dnode(fn); jffs2_complete_reservation(c); up(&f->sem); - return ret; + goto out_page; } jffs2_complete_reservation(c); inode->i_size = pageofs; up(&f->sem); } - /* Read in the page if it wasn't already present, unless it's a whole page */ - if (!PageUptodate(pg) && (start || end < PAGE_CACHE_SIZE)) { + /* + * Read in the page if it wasn't already present. Cannot optimize away + * the whole page write case until jffs2_write_end can handle the + * case of a short-copy. + */ + if (!PageUptodate(pg)) { down(&f->sem); ret = jffs2_do_readpage_nolock(inode, pg); up(&f->sem); + if (ret) + goto out_page; } - D1(printk(KERN_DEBUG "end prepare_write(). pg->flags %lx\n", pg->flags)); + D1(printk(KERN_DEBUG "end write_begin(). pg->flags %lx\n", pg->flags)); + return ret; + +out_page: + unlock_page(pg); + page_cache_release(pg); return ret; } -static int jffs2_commit_write (struct file *filp, struct page *pg, - unsigned start, unsigned end) +static int jffs2_write_end(struct file *filp, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *pg, void *fsdata) { /* Actually commit the write from the page cache page we're looking at. * For now, we write the full page out each time. It sucks, but it's simple */ - struct inode *inode = pg->mapping->host; + struct inode *inode = mapping->host; struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); struct jffs2_raw_inode *ri; + unsigned start = pos & (PAGE_CACHE_SIZE - 1); + unsigned end = start + copied; unsigned aligned_start = start & ~3; int ret = 0; uint32_t writtenlen = 0; - D1(printk(KERN_DEBUG "jffs2_commit_write(): ino #%lu, page at 0x%lx, range %d-%d, flags %lx\n", + D1(printk(KERN_DEBUG "jffs2_write_end(): ino #%lu, page at 0x%lx, range %d-%d, flags %lx\n", inode->i_ino, pg->index << PAGE_CACHE_SHIFT, start, end, pg->flags)); + /* We need to avoid deadlock with page_cache_read() in + jffs2_garbage_collect_pass(). So the page must be + up to date to prevent page_cache_read() from trying + to re-lock it. */ + BUG_ON(!PageUptodate(pg)); + if (end == PAGE_CACHE_SIZE) { - if (!start) { - /* We need to avoid deadlock with page_cache_read() in - jffs2_garbage_collect_pass(). So we have to mark the - page up to date, to prevent page_cache_read() from - trying to re-lock it. */ - SetPageUptodate(pg); - } else { - /* When writing out the end of a page, write out the - _whole_ page. This helps to reduce the number of - nodes in files which have many short writes, like - syslog files. */ - start = aligned_start = 0; - } + /* When writing out the end of a page, write out the + _whole_ page. This helps to reduce the number of + nodes in files which have many short writes, like + syslog files. */ + start = aligned_start = 0; } ri = jffs2_alloc_raw_inode(); if (!ri) { - D1(printk(KERN_DEBUG "jffs2_commit_write(): Allocation of raw inode failed\n")); + D1(printk(KERN_DEBUG "jffs2_write_end(): Allocation of raw inode failed\n")); + unlock_page(pg); + page_cache_release(pg); return -ENOMEM; } @@ -289,11 +313,14 @@ static int jffs2_commit_write (struct fi /* generic_file_write has written more to the page cache than we've actually written to the medium. Mark the page !Uptodate so that it gets reread */ - D1(printk(KERN_DEBUG "jffs2_commit_write(): Not all bytes written. Marking page !uptodate\n")); + D1(printk(KERN_DEBUG "jffs2_write_end(): Not all bytes written. Marking page !uptodate\n")); SetPageError(pg); ClearPageUptodate(pg); } - D1(printk(KERN_DEBUG "jffs2_commit_write() returning %d\n",start+writtenlen==end?0:ret)); - return start+writtenlen==end?0:ret; + D1(printk(KERN_DEBUG "jffs2_write_end() returning %d\n", + writtenlen > 0 ? writtenlen : ret)); + unlock_page(pg); + page_cache_release(pg); + return writtenlen > 0 ? writtenlen : ret; } Index: linux-2.6/fs/cifs/file.c =================================================================== --- linux-2.6.orig/fs/cifs/file.c +++ linux-2.6/fs/cifs/file.c @@ -104,7 +104,7 @@ static inline int cifs_open_inode_helper /* want handles we can use to read with first in the list so we do not have to walk the - list to search for one in prepare_write */ + list to search for one in write_begin */ if ((file->f_flags & O_ACCMODE) == O_WRONLY) { list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList); @@ -1389,18 +1389,19 @@ static int cifs_writepage(struct page* p return rc; } -static int cifs_commit_write(struct file *file, struct page *page, - unsigned offset, unsigned to) +static int cifs_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { int xid; int rc = 0; - struct inode *inode = page->mapping->host; - loff_t position = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + struct inode *inode = mapping->host; + loff_t position = pos + copied; char *page_data; xid = GetXid(); - cFYI(1, ("commit write for page %p up to position %lld for %d", - page, position, to)); + cFYI(1, ("write end for page %p at pos %lld, copied %d", + page, pos, copied)); spin_lock(&inode->i_lock); if (position > inode->i_size) { i_size_write(inode, position); @@ -1432,23 +1433,19 @@ static int cifs_commit_write(struct file } */ } spin_unlock(&inode->i_lock); + if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE) + SetPageUptodate(page); + if (!PageUptodate(page)) { - position = ((loff_t)page->index << PAGE_CACHE_SHIFT) + offset; - /* can not rely on (or let) writepage write this data */ - if (to < offset) { - cFYI(1, ("Illegal offsets, can not copy from %d to %d", - offset, to)); - FreeXid(xid); - return rc; - } + unsigned long offset = pos & (PAGE_CACHE_SIZE - 1); + /* this is probably better than directly calling partialpage_write since in this function the file handle is known which we might as well leverage */ /* BB check if anything else missing out of ppw such as updating last write time */ page_data = kmap(page); - rc = cifs_write(file, page_data + offset, to-offset, - &position); + rc = cifs_write(file, page_data + offset, copied, &pos); if (rc > 0) rc = 0; /* else if (rc < 0) should we set writebehind rc? */ @@ -1456,9 +1453,12 @@ static int cifs_commit_write(struct file } else { set_page_dirty(page); } - FreeXid(xid); - return rc; + + unlock_page(page); + page_cache_release(page); + + return rc < 0 ? rc : copied; } int cifs_fsync(struct file *file, struct dentry *dentry, int datasync) @@ -1988,34 +1988,47 @@ int is_size_safe_to_change(struct cifsIn return 1; } -static int cifs_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int cifs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { int rc = 0; loff_t i_size; loff_t offset; + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + struct page *page; + + page = __grab_cache_page(mapping, index); + if (!page) + return -ENOMEM; + *pagep = page; - cFYI(1, ("prepare write for page %p from %d to %d",page,from,to)); + cFYI(1, ("write begin for page %p at pos %lld, length %d", + page, pos, len)); if (PageUptodate(page)) return 0; - /* If we are writing a full page it will be up to date, - no need to read from the server */ - if ((to == PAGE_CACHE_SIZE) && (from == 0)) { - SetPageUptodate(page); + /* If we are writing a full page it will become up to date, + no need to read from the server (although we may encounter a + short copy, so write_end has to handle this) */ + if (len == PAGE_CACHE_SIZE) return 0; - } - offset = (loff_t)page->index << PAGE_CACHE_SHIFT; - i_size = i_size_read(page->mapping->host); + offset = index << PAGE_CACHE_SHIFT; + i_size = i_size_read(mapping->host); + + if (offset >= i_size) { + void *kaddr; + unsigned from, to; - if ((offset >= i_size) || - ((from == 0) && (offset + to) >= i_size)) { /* * We don't need to read data beyond the end of the file. * zero it, and set the page uptodate */ - void *kaddr = kmap_atomic(page, KM_USER0); + from = pos & (PAGE_CACHE_SIZE - 1); + to = from + len; + + kaddr = kmap_atomic(page, KM_USER0); if (from) memset(kaddr, 0, from); @@ -2031,12 +2044,12 @@ static int cifs_prepare_write(struct fil /* we could try using another file handle if there is one - but how would we lock it to prevent close of that handle racing with this read? In any case - this will be written out by commit_write so is fine */ + this will be written out by write_end so is fine */ } /* we do not need to pass errors back e.g. if we do not have read access to the file - because cifs_commit_write will do the right thing. -- shaggy */ + because cifs_write_end will do the right thing. -- shaggy */ return 0; } @@ -2046,8 +2059,8 @@ const struct address_space_operations ci .readpages = cifs_readpages, .writepage = cifs_writepage, .writepages = cifs_writepages, - .prepare_write = cifs_prepare_write, - .commit_write = cifs_commit_write, + .write_begin = cifs_write_begin, + .write_end = cifs_write_end, .set_page_dirty = __set_page_dirty_nobuffers, /* .sync_page = cifs_sync_page, */ /* .direct_IO = */ @@ -2062,8 +2075,8 @@ const struct address_space_operations ci .readpage = cifs_readpage, .writepage = cifs_writepage, .writepages = cifs_writepages, - .prepare_write = cifs_prepare_write, - .commit_write = cifs_commit_write, + .write_begin = cifs_write_begin, + .write_end = cifs_write_end, .set_page_dirty = __set_page_dirty_nobuffers, /* .sync_page = cifs_sync_page, */ /* .direct_IO = */ Index: linux-2.6/fs/ufs/inode.c =================================================================== --- linux-2.6.orig/fs/ufs/inode.c +++ linux-2.6/fs/ufs/inode.c @@ -558,24 +558,39 @@ static int ufs_writepage(struct page *pa { return block_write_full_page(page,ufs_getfrag_block,wbc); } + static int ufs_readpage(struct file *file, struct page *page) { return block_read_full_page(page,ufs_getfrag_block); } -static int ufs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) + +int __ufs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return block_prepare_write(page,from,to,ufs_getfrag_block); + return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + ufs_getfrag_block); } + +static int ufs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + *pagep = NULL; + return __ufs_write_begin(file, mapping, pos, len, flags, pagep, fsdata); +} + static sector_t ufs_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping,block,ufs_getfrag_block); } + const struct address_space_operations ufs_aops = { .readpage = ufs_readpage, .writepage = ufs_writepage, .sync_page = block_sync_page, - .prepare_write = ufs_prepare_write, - .commit_write = generic_commit_write, + .write_begin = ufs_write_begin, + .write_end = generic_write_end, .bmap = ufs_bmap }; Index: linux-2.6/fs/ufs/dir.c =================================================================== --- linux-2.6.orig/fs/ufs/dir.c +++ linux-2.6/fs/ufs/dir.c @@ -39,12 +39,14 @@ static inline int ufs_match(struct super return !memcmp(name, de->d_name, len); } -static int ufs_commit_chunk(struct page *page, unsigned from, unsigned to) +static int ufs_commit_chunk(struct page *page, loff_t pos, unsigned len) { - struct inode *dir = page->mapping->host; + struct address_space *mapping = page->mapping; + struct inode *dir = mapping->host; int err = 0; + dir->i_version++; - page->mapping->a_ops->commit_write(NULL, page, from, to); + block_write_end(NULL, mapping, pos, len, len, page, NULL); if (IS_DIRSYNC(dir)) err = write_one_page(page, 1); else @@ -82,16 +84,20 @@ ino_t ufs_inode_by_name(struct inode *di void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de, struct page *page, struct inode *inode) { - unsigned from = (char *) de - (char *) page_address(page); - unsigned to = from + fs16_to_cpu(dir->i_sb, de->d_reclen); + loff_t pos = (page->index << PAGE_CACHE_SHIFT) + + (char *) de - (char *) page_address(page); + unsigned len = fs16_to_cpu(dir->i_sb, de->d_reclen); int err; lock_page(page); - err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + err = __ufs_write_begin(NULL, page->mapping, pos, len, + AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); BUG_ON(err); + de->d_ino = cpu_to_fs32(dir->i_sb, inode->i_ino); ufs_set_de_type(dir->i_sb, de, inode->i_mode); - err = ufs_commit_chunk(page, from, to); + + err = ufs_commit_chunk(page, pos, len); ufs_put_page(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(dir); @@ -317,7 +323,7 @@ int ufs_add_link(struct dentry *dentry, unsigned long npages = ufs_dir_pages(dir); unsigned long n; char *kaddr; - unsigned from, to; + loff_t pos; int err; UFSD("ENTER, name %s, namelen %u\n", name, namelen); @@ -372,9 +378,10 @@ int ufs_add_link(struct dentry *dentry, return -EINVAL; got_it: - from = (char*)de - (char*)page_address(page); - to = from + rec_len; - err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + pos = (page->index << PAGE_CACHE_SHIFT) + + (char*)de - (char*)page_address(page); + err = __ufs_write_begin(NULL, page->mapping, pos, rec_len, + AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); if (err) goto out_unlock; if (de->d_ino) { @@ -391,7 +398,7 @@ got_it: de->d_ino = cpu_to_fs32(sb, inode->i_ino); ufs_set_de_type(sb, de, inode->i_mode); - err = ufs_commit_chunk(page, from, to); + err = ufs_commit_chunk(page, pos, rec_len); dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(dir); @@ -514,6 +521,7 @@ int ufs_delete_entry(struct inode *inode char *kaddr = page_address(page); unsigned from = ((char*)dir - kaddr) & ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1); unsigned to = ((char*)dir - kaddr) + fs16_to_cpu(sb, dir->d_reclen); + loff_t pos; struct ufs_dir_entry *pde = NULL; struct ufs_dir_entry *de = (struct ufs_dir_entry *) (kaddr + from); int err; @@ -537,13 +545,16 @@ int ufs_delete_entry(struct inode *inode } if (pde) from = (char*)pde - (char*)page_address(page); + + pos = (page->index << PAGE_CACHE_SHIFT) + from; lock_page(page); - err = mapping->a_ops->prepare_write(NULL, page, from, to); + err = __ufs_write_begin(NULL, mapping, pos, to - from, + AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); BUG_ON(err); if (pde) - pde->d_reclen = cpu_to_fs16(sb, to-from); + pde->d_reclen = cpu_to_fs16(sb, to - from); dir->d_ino = 0; - err = ufs_commit_chunk(page, from, to); + err = ufs_commit_chunk(page, pos, to - from); inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; mark_inode_dirty(inode); out: @@ -564,14 +575,15 @@ int ufs_make_empty(struct inode * inode, if (!page) return -ENOMEM; - kmap(page); - err = mapping->a_ops->prepare_write(NULL, page, 0, chunk_size); + + err = __ufs_write_begin(NULL, mapping, 0, chunk_size, + AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); if (err) { unlock_page(page); goto fail; } - + kmap(page); base = (char*)page_address(page); memset(base, 0, PAGE_CACHE_SIZE); @@ -589,10 +601,10 @@ int ufs_make_empty(struct inode * inode, de->d_reclen = cpu_to_fs16(sb, chunk_size - UFS_DIR_REC_LEN(1)); ufs_set_de_namlen(sb, de, 2); strcpy (de->d_name, ".."); + kunmap(page); err = ufs_commit_chunk(page, 0, chunk_size); fail: - kunmap(page); page_cache_release(page); return err; } Index: linux-2.6/fs/udf/file.c =================================================================== --- linux-2.6.orig/fs/udf/file.c +++ linux-2.6/fs/udf/file.c @@ -73,34 +73,28 @@ static int udf_adinicb_writepage(struct return 0; } -static int udf_adinicb_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to) +static int udf_adinicb_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { - kmap(page); - return 0; -} - -static int udf_adinicb_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to) -{ - struct inode *inode = page->mapping->host; - char *kaddr = page_address(page); + struct inode *inode = mapping->host; + unsigned offset = pos & (PAGE_CACHE_SIZE - 1); + char *kaddr; + kaddr = kmap_atomic(page, KM_USER0); memcpy(UDF_I_DATA(inode) + UDF_I_LENEATTR(inode) + offset, - kaddr + offset, to - offset); - mark_inode_dirty(inode); - SetPageUptodate(page); - kunmap(page); - /* only one page here */ - if (to > inode->i_size) - inode->i_size = to; - return 0; + kaddr + offset, copied); + kunmap_atomic(kaddr, KM_USER0); + + return simple_write_end(file, mapping, pos, len, copied, page, fsdata); } const struct address_space_operations udf_adinicb_aops = { .readpage = udf_adinicb_readpage, .writepage = udf_adinicb_writepage, .sync_page = block_sync_page, - .prepare_write = udf_adinicb_prepare_write, - .commit_write = udf_adinicb_commit_write, + .write_begin = simple_write_begin, + .write_end = udf_adinicb_write_end, }; static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, Index: linux-2.6/fs/udf/inode.c =================================================================== --- linux-2.6.orig/fs/udf/inode.c +++ linux-2.6/fs/udf/inode.c @@ -122,9 +122,12 @@ static int udf_readpage(struct file *fil return block_read_full_page(page, udf_get_block); } -static int udf_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) +static int udf_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return block_prepare_write(page, from, to, udf_get_block); + return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + udf_get_block); } static sector_t udf_bmap(struct address_space *mapping, sector_t block) @@ -136,8 +139,8 @@ const struct address_space_operations ud .readpage = udf_readpage, .writepage = udf_writepage, .sync_page = block_sync_page, - .prepare_write = udf_prepare_write, - .commit_write = generic_commit_write, + .write_begin = udf_write_begin, + .write_end = generic_write_end, .bmap = udf_bmap, }; Index: linux-2.6/fs/sysv/itree.c =================================================================== --- linux-2.6.orig/fs/sysv/itree.c +++ linux-2.6/fs/sysv/itree.c @@ -453,23 +453,38 @@ static int sysv_writepage(struct page *p { return block_write_full_page(page,get_block,wbc); } + static int sysv_readpage(struct file *file, struct page *page) { return block_read_full_page(page,get_block); } -static int sysv_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) + +int __sysv_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return block_prepare_write(page,from,to,get_block); + return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + get_block); } + +static int sysv_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + *pagep = NULL; + return __sysv_write_begin(file, mapping, pos, len, flags, pagep, fsdata); +} + static sector_t sysv_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping,block,get_block); } + const struct address_space_operations sysv_aops = { .readpage = sysv_readpage, .writepage = sysv_writepage, .sync_page = block_sync_page, - .prepare_write = sysv_prepare_write, - .commit_write = generic_commit_write, + .write_begin = sysv_write_begin, + .write_end = generic_write_end, .bmap = sysv_bmap }; Index: linux-2.6/fs/sysv/dir.c =================================================================== --- linux-2.6.orig/fs/sysv/dir.c +++ linux-2.6/fs/sysv/dir.c @@ -37,12 +37,13 @@ static inline unsigned long dir_pages(st return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; } -static int dir_commit_chunk(struct page *page, unsigned from, unsigned to) +static int dir_commit_chunk(struct page *page, loff_t pos, unsigned len) { - struct inode *dir = (struct inode *)page->mapping->host; + struct address_space *mapping = page->mapping; + struct inode *dir = mapping->host; int err = 0; - page->mapping->a_ops->commit_write(NULL, page, from, to); + block_write_end(NULL, mapping, pos, len, len, page, NULL); if (IS_DIRSYNC(dir)) err = write_one_page(page, 1); else @@ -194,7 +195,7 @@ int sysv_add_link(struct dentry *dentry, unsigned long npages = dir_pages(dir); unsigned long n; char *kaddr; - unsigned from, to; + loff_t pos; int err; /* We take care of directory expansion in the same loop */ @@ -220,16 +221,17 @@ int sysv_add_link(struct dentry *dentry, return -EINVAL; got_it: - from = (char*)de - (char*)page_address(page); - to = from + SYSV_DIRSIZE; + pos = (page->index << PAGE_CACHE_SHIFT) + + (char*)de - (char*)page_address(page); lock_page(page); - err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + err = __sysv_write_begin(NULL, page->mapping, pos, SYSV_DIRSIZE, + AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); if (err) goto out_unlock; memcpy (de->name, name, namelen); memset (de->name + namelen, 0, SYSV_DIRSIZE - namelen - 2); de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino); - err = dir_commit_chunk(page, from, to); + err = dir_commit_chunk(page, pos, SYSV_DIRSIZE); dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(dir); out_page: @@ -246,15 +248,15 @@ int sysv_delete_entry(struct sysv_dir_en struct address_space *mapping = page->mapping; struct inode *inode = (struct inode*)mapping->host; char *kaddr = (char*)page_address(page); - unsigned from = (char*)de - kaddr; - unsigned to = from + SYSV_DIRSIZE; + loff_t pos = (page->index << PAGE_CACHE_SHIFT) + (char *)de - kaddr; int err; lock_page(page); - err = mapping->a_ops->prepare_write(NULL, page, from, to); + err = __sysv_write_begin(NULL, mapping, pos, SYSV_DIRSIZE, + AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); BUG_ON(err); de->inode = 0; - err = dir_commit_chunk(page, from, to); + err = dir_commit_chunk(page, pos, SYSV_DIRSIZE); dir_put_page(page); inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; mark_inode_dirty(inode); @@ -271,12 +273,13 @@ int sysv_make_empty(struct inode *inode, if (!page) return -ENOMEM; - kmap(page); - err = mapping->a_ops->prepare_write(NULL, page, 0, 2 * SYSV_DIRSIZE); + err = __sysv_write_begin(NULL, mapping, 0, 2 * SYSV_DIRSIZE, + AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); if (err) { unlock_page(page); goto fail; } + kmap(page); base = (char*)page_address(page); memset(base, 0, PAGE_CACHE_SIZE); @@ -288,9 +291,9 @@ int sysv_make_empty(struct inode *inode, de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), dir->i_ino); strcpy(de->name,".."); + kunmap(page); err = dir_commit_chunk(page, 0, 2 * SYSV_DIRSIZE); fail: - kunmap(page); page_cache_release(page); return err; } @@ -344,16 +347,18 @@ not_empty: void sysv_set_link(struct sysv_dir_entry *de, struct page *page, struct inode *inode) { - struct inode *dir = (struct inode*)page->mapping->host; - unsigned from = (char *)de-(char*)page_address(page); - unsigned to = from + SYSV_DIRSIZE; + struct address_space *mapping = page->mapping; + struct inode *dir = mapping->host; + loff_t pos = (page->index << PAGE_CACHE_SHIFT) + + (char *)de-(char*)page_address(page); int err; lock_page(page); - err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + err = __sysv_write_begin(NULL, mapping, pos, SYSV_DIRSIZE, + AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); BUG_ON(err); de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino); - err = dir_commit_chunk(page, from, to); + err = dir_commit_chunk(page, pos, SYSV_DIRSIZE); dir_put_page(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(dir); Index: linux-2.6/fs/minix/inode.c =================================================================== --- linux-2.6.orig/fs/minix/inode.c +++ linux-2.6/fs/minix/inode.c @@ -348,24 +348,39 @@ static int minix_writepage(struct page * { return block_write_full_page(page, minix_get_block, wbc); } + static int minix_readpage(struct file *file, struct page *page) { return block_read_full_page(page,minix_get_block); } -static int minix_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) + +int __minix_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return block_prepare_write(page,from,to,minix_get_block); + return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + minix_get_block); } + +static int minix_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + *pagep = NULL; + return __minix_write_begin(file, mapping, pos, len, flags, pagep, fsdata); +} + static sector_t minix_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping,block,minix_get_block); } + static const struct address_space_operations minix_aops = { .readpage = minix_readpage, .writepage = minix_writepage, .sync_page = block_sync_page, - .prepare_write = minix_prepare_write, - .commit_write = generic_commit_write, + .write_begin = minix_write_begin, + .write_end = generic_write_end, .bmap = minix_bmap }; Index: linux-2.6/fs/minix/dir.c =================================================================== --- linux-2.6.orig/fs/minix/dir.c +++ linux-2.6/fs/minix/dir.c @@ -9,6 +9,7 @@ */ #include "minix.h" +#include #include #include @@ -48,11 +49,12 @@ static inline unsigned long dir_pages(st return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; } -static int dir_commit_chunk(struct page *page, unsigned from, unsigned to) +static int dir_commit_chunk(struct page *page, loff_t pos, unsigned len) { - struct inode *dir = (struct inode *)page->mapping->host; + struct address_space *mapping = page->mapping; + struct inode *dir = mapping->host; int err = 0; - page->mapping->a_ops->commit_write(NULL, page, from, to); + block_write_end(NULL, mapping, pos, len, len, page, NULL); if (IS_DIRSYNC(dir)) err = write_one_page(page, 1); else @@ -221,7 +223,7 @@ int minix_add_link(struct dentry *dentry char *kaddr, *p; minix_dirent *de; minix3_dirent *de3; - unsigned from, to; + loff_t pos; int err; char *namx = NULL; __u32 inumber; @@ -273,9 +275,9 @@ int minix_add_link(struct dentry *dentry return -EINVAL; got_it: - from = p - (char*)page_address(page); - to = from + sbi->s_dirsize; - err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + pos = (page->index >> PAGE_CACHE_SHIFT) + p - (char*)page_address(page); + err = __minix_write_begin(NULL, page->mapping, pos, sbi->s_dirsize, + AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); if (err) goto out_unlock; memcpy (namx, name, namelen); @@ -286,7 +288,7 @@ got_it: memset (namx + namelen, 0, sbi->s_dirsize - namelen - 2); de->inode = inode->i_ino; } - err = dir_commit_chunk(page, from, to); + err = dir_commit_chunk(page, pos, sbi->s_dirsize); dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(dir); out_put: @@ -303,15 +305,16 @@ int minix_delete_entry(struct minix_dir_ struct address_space *mapping = page->mapping; struct inode *inode = (struct inode*)mapping->host; char *kaddr = page_address(page); - unsigned from = (char*)de - kaddr; - unsigned to = from + minix_sb(inode->i_sb)->s_dirsize; + loff_t pos = (page->index << PAGE_CACHE_SHIFT) + (char*)de - kaddr; + unsigned len = minix_sb(inode->i_sb)->s_dirsize; int err; lock_page(page); - err = mapping->a_ops->prepare_write(NULL, page, from, to); + err = __minix_write_begin(NULL, mapping, pos, len, + AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); if (err == 0) { de->inode = 0; - err = dir_commit_chunk(page, from, to); + err = dir_commit_chunk(page, pos, len); } else { unlock_page(page); } @@ -331,7 +334,8 @@ int minix_make_empty(struct inode *inode if (!page) return -ENOMEM; - err = mapping->a_ops->prepare_write(NULL, page, 0, 2 * sbi->s_dirsize); + err = __minix_write_begin(NULL, mapping, 0, 2 * sbi->s_dirsize, + AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); if (err) { unlock_page(page); goto fail; @@ -422,17 +426,20 @@ not_empty: void minix_set_link(struct minix_dir_entry *de, struct page *page, struct inode *inode) { - struct inode *dir = (struct inode*)page->mapping->host; + struct address_space *mapping = page->mapping; + struct inode *dir = mapping->host; struct minix_sb_info *sbi = minix_sb(dir->i_sb); - unsigned from = (char *)de-(char*)page_address(page); - unsigned to = from + sbi->s_dirsize; + loff_t pos = (page->index << PAGE_CACHE_SHIFT) + + (char *)de-(char*)page_address(page); int err; lock_page(page); - err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + + err = __minix_write_begin(NULL, mapping, pos, sbi->s_dirsize, + AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); if (err == 0) { de->inode = inode->i_ino; - err = dir_commit_chunk(page, from, to); + err = dir_commit_chunk(page, pos, sbi->s_dirsize); } else { unlock_page(page); } Index: linux-2.6/fs/jfs/inode.c =================================================================== --- linux-2.6.orig/fs/jfs/inode.c +++ linux-2.6/fs/jfs/inode.c @@ -256,7 +256,7 @@ int jfs_get_block(struct inode *ip, sect static int jfs_writepage(struct page *page, struct writeback_control *wbc) { - return nobh_writepage(page, jfs_get_block, wbc); + return block_write_full_page(page, jfs_get_block, wbc); } static int jfs_writepages(struct address_space *mapping, @@ -276,10 +276,13 @@ static int jfs_readpages(struct file *fi return mpage_readpages(mapping, pages, nr_pages, jfs_get_block); } -static int jfs_prepare_write(struct file *file, - struct page *page, unsigned from, unsigned to) -{ - return nobh_prepare_write(page, from, to, jfs_get_block); +static int jfs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + *pagep = NULL; + return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + jfs_get_block); } static sector_t jfs_bmap(struct address_space *mapping, sector_t block) @@ -303,8 +306,8 @@ const struct address_space_operations jf .writepage = jfs_writepage, .writepages = jfs_writepages, .sync_page = block_sync_page, - .prepare_write = jfs_prepare_write, - .commit_write = nobh_commit_write, + .write_begin = jfs_write_begin, + .write_end = generic_write_end, .bmap = jfs_bmap, .direct_IO = jfs_direct_IO, }; @@ -357,7 +360,7 @@ void jfs_truncate(struct inode *ip) { jfs_info("jfs_truncate: size = 0x%lx", (ulong) ip->i_size); - nobh_truncate_page(ip->i_mapping, ip->i_size); + block_truncate_page(ip->i_mapping, ip->i_size, jfs_get_block); IWRITE_LOCK(ip, RDWRLOCK_NORMAL); jfs_truncate_nolock(ip, ip->i_size);