Rework the generic block "cont" routines, which become much simpler and able to support the new aops. Convert FAT filesystem to these new cont routines. The cost is that generic_cont_expand no longer quite so generic (because it doesn't use ->prepare_write), and thus reiserfs may have to code their own zero fill code, however all the other cont_prepare_write users should be OK. Possibly reiserfs (maybe also OCFS2) should get together to create some slightly simpler but more generic helpers to do zeroing for them. Signed-off-by: Nick Piggin Index: linux-2.6/fs/buffer.c =================================================================== --- linux-2.6.orig/fs/buffer.c +++ linux-2.6/fs/buffer.c @@ -2131,167 +2131,145 @@ int block_read_full_page(struct page *pa return 0; } -/* utility function for filesystems that need to do work on expanding - * truncates. Uses prepare/commit_write to allow the filesystem to - * deal with the hole. - */ -static int __generic_cont_expand(struct inode *inode, loff_t size, - pgoff_t index, unsigned int offset) +int generic_cont_expand(struct inode *inode, loff_t size, + get_block_t *get_block, loff_t *bytes) { struct address_space *mapping = inode->i_mapping; - struct page *page; + unsigned blocksize = 1 << inode->i_blkbits; unsigned long limit; - int err; + unsigned zerofrom; + pgoff_t index, new_index; + void *kaddr; + struct page *page; + int err = 0; + + if (size < *bytes) + goto out; - err = -EFBIG; limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; if (limit != RLIM_INFINITY && size > (loff_t)limit) { send_sig(SIGXFSZ, current, 0); - goto out; + return -EFBIG; } if (size > inode->i_sb->s_maxbytes) - goto out; + return -EFBIG; - err = -ENOMEM; - page = grab_cache_page(mapping, index); - if (!page) - goto out; - err = mapping->a_ops->prepare_write(NULL, page, offset, offset); - if (err) { - /* - * ->prepare_write() may have instantiated a few blocks - * outside i_size. Trim these off again. - */ + new_index = size >> PAGE_CACHE_SHIFT; + index = *bytes >> PAGE_CACHE_SHIFT; + + while (new_index > index) { + page = grab_cache_page(mapping, index); + if (!page) { + err = -ENOMEM; + goto out; + } + /* we might sleep (XXX: but we hold i_mutex?) */ + if (*bytes>>PAGE_CACHE_SHIFT != index) { + unlock_page(page); + page_cache_release(page); + continue; + } + zerofrom = *bytes & ~PAGE_CACHE_MASK; + err = __block_prepare_write(inode, page, zerofrom, + PAGE_CACHE_SIZE, get_block); + if (err) { + unlock_page(page); + page_cache_release(page); + goto out; + } + + if (zerofrom & (blocksize-1)) { + *bytes |= (blocksize-1); + (*bytes)++; + } + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr+zerofrom, 0, PAGE_CACHE_SIZE-zerofrom); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + generic_commit_write(NULL, page, zerofrom, PAGE_CACHE_SIZE); unlock_page(page); page_cache_release(page); - vmtruncate(inode, inode->i_size); - goto out; + + index = *bytes >> PAGE_CACHE_SHIFT; } - err = mapping->a_ops->commit_write(NULL, page, offset, offset); + if (new_index == index) { + unsigned offset = size & ~PAGE_CACHE_MASK; - unlock_page(page); - page_cache_release(page); - if (err > 0) - err = 0; -out: - return err; -} + /* page covers the boundary, find the boundary offset */ + zerofrom = *bytes & ~PAGE_CACHE_MASK; -int generic_cont_expand(struct inode *inode, loff_t size) -{ - pgoff_t index; - unsigned int offset; + /* starting below the boundary? Nothing to zero out */ + if (zerofrom < offset) { + page = grab_cache_page(mapping, index); + if (!page) { + err = -ENOMEM; + goto out; + } - offset = (size & (PAGE_CACHE_SIZE - 1)); /* Within page */ + err = __block_prepare_write(inode, page, zerofrom, + offset, get_block); + if (err) { + unlock_page(page); + page_cache_release(page); + goto out; + } - /* ugh. in prepare/commit_write, if from==to==start of block, we - ** skip the prepare. make sure we never send an offset for the start - ** of a block - */ - if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) { - /* caller must handle this extra byte. */ - offset++; + if (zerofrom & (blocksize-1)) { + *bytes |= (blocksize-1); + (*bytes)++; + } + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr+zerofrom, 0, offset-zerofrom); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + generic_commit_write(NULL, page, zerofrom, offset); + unlock_page(page); + page_cache_release(page); + } } - index = size >> PAGE_CACHE_SHIFT; - - return __generic_cont_expand(inode, size, index, offset); -} - -int generic_cont_expand_simple(struct inode *inode, loff_t size) -{ - loff_t pos = size - 1; - pgoff_t index = pos >> PAGE_CACHE_SHIFT; - unsigned int offset = (pos & (PAGE_CACHE_SIZE - 1)) + 1; - /* prepare/commit_write can handle even if from==to==start of block. */ - return __generic_cont_expand(inode, size, index, offset); +out: + return err; } /* * For moronic filesystems that do not allow holes in file. * We may have to extend the file. */ +int cont_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata, + get_block_t *get_block, loff_t *bytes) +{ + struct inode *inode = mapping->host; + int err; + + if (*bytes < pos) { + err = generic_cont_expand(inode, pos, get_block, bytes); + if (err) + return err; + } + + return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + get_block); +} int cont_prepare_write(struct page *page, unsigned offset, unsigned to, get_block_t *get_block, loff_t *bytes) { struct address_space *mapping = page->mapping; struct inode *inode = mapping->host; - struct page *new_page; - pgoff_t pgpos; - long status; - unsigned zerofrom; - unsigned blocksize = 1 << inode->i_blkbits; - void *kaddr; + loff_t pos = page_offset(page) + offset; + int err; - while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) { - status = -ENOMEM; - new_page = grab_cache_page(mapping, pgpos); - if (!new_page) - goto out; - /* we might sleep */ - if (*bytes>>PAGE_CACHE_SHIFT != pgpos) { - unlock_page(new_page); - page_cache_release(new_page); - continue; - } - zerofrom = *bytes & ~PAGE_CACHE_MASK; - if (zerofrom & (blocksize-1)) { - *bytes |= (blocksize-1); - (*bytes)++; - } - status = __block_prepare_write(inode, new_page, zerofrom, - PAGE_CACHE_SIZE, get_block); - if (status) - goto out_unmap; - kaddr = kmap_atomic(new_page, KM_USER0); - memset(kaddr+zerofrom, 0, PAGE_CACHE_SIZE-zerofrom); - flush_dcache_page(new_page); - kunmap_atomic(kaddr, KM_USER0); - generic_commit_write(NULL, new_page, zerofrom, PAGE_CACHE_SIZE); - unlock_page(new_page); - page_cache_release(new_page); + if (*bytes < pos) { + err = generic_cont_expand(inode, pos, get_block, bytes); + if (err) + return err; } - if (page->index < pgpos) { - /* completely inside the area */ - zerofrom = offset; - } else { - /* page covers the boundary, find the boundary offset */ - zerofrom = *bytes & ~PAGE_CACHE_MASK; - - /* if we will expand the thing last block will be filled */ - if (to > zerofrom && (zerofrom & (blocksize-1))) { - *bytes |= (blocksize-1); - (*bytes)++; - } - - /* starting below the boundary? Nothing to zero out */ - if (offset <= zerofrom) - zerofrom = offset; - } - status = __block_prepare_write(inode, page, zerofrom, to, get_block); - if (status) - goto out1; - if (zerofrom < offset) { - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr+zerofrom, 0, offset-zerofrom); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - __block_commit_write(inode, page, zerofrom, offset); - } - return 0; -out1: - ClearPageUptodate(page); - return status; - -out_unmap: - ClearPageUptodate(new_page); - unlock_page(new_page); - page_cache_release(new_page); -out: - return status; + return __block_prepare_write(inode, page, offset, to, get_block); } int block_prepare_write(struct page *page, unsigned from, unsigned to, @@ -3150,6 +3128,7 @@ EXPORT_SYMBOL(block_sync_page); EXPORT_SYMBOL(block_truncate_page); EXPORT_SYMBOL(block_write_full_page); EXPORT_SYMBOL(cont_prepare_write); +EXPORT_SYMBOL(cont_write_begin); EXPORT_SYMBOL(end_buffer_read_sync); EXPORT_SYMBOL(end_buffer_write_sync); EXPORT_SYMBOL(file_fsync); @@ -3157,7 +3136,6 @@ EXPORT_SYMBOL(fsync_bdev); EXPORT_SYMBOL(generic_block_bmap); EXPORT_SYMBOL(generic_commit_write); EXPORT_SYMBOL(generic_cont_expand); -EXPORT_SYMBOL(generic_cont_expand_simple); EXPORT_SYMBOL(init_buffer); EXPORT_SYMBOL(invalidate_bdev); EXPORT_SYMBOL(ll_rw_block); Index: linux-2.6/include/linux/buffer_head.h =================================================================== --- linux-2.6.orig/include/linux/buffer_head.h +++ linux-2.6/include/linux/buffer_head.h @@ -210,10 +210,11 @@ int block_write_end(struct file *, struc struct page *, void *); void page_zero_new_buffers(struct page *page, unsigned from, unsigned to); int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*); -int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*, - loff_t *); -int generic_cont_expand(struct inode *inode, loff_t size); -int generic_cont_expand_simple(struct inode *inode, loff_t size); +int cont_write_begin(struct file *, struct address_space *, loff_t, + unsigned, unsigned, struct page **, void **, + get_block_t *, loff_t *); +int generic_cont_expand(struct inode *inode, loff_t size, + get_block_t *get_block, loff_t *bytes); int block_commit_write(struct page *page, unsigned from, unsigned to); void block_sync_page(struct page *); sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); Index: linux-2.6/fs/fat/file.c =================================================================== --- linux-2.6.orig/fs/fat/file.c +++ linux-2.6/fs/fat/file.c @@ -137,24 +137,6 @@ const struct file_operations fat_file_op .sendfile = generic_file_sendfile, }; -static int fat_cont_expand(struct inode *inode, loff_t size) -{ - struct address_space *mapping = inode->i_mapping; - loff_t start = inode->i_size, count = size - inode->i_size; - int err; - - err = generic_cont_expand_simple(inode, size); - if (err) - goto out; - - inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; - mark_inode_dirty(inode); - if (IS_SYNC(inode)) - err = sync_page_range_nolock(inode, mapping, start, count); -out: - return err; -} - int fat_notify_change(struct dentry *dentry, struct iattr *attr) { struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); Index: linux-2.6/fs/fat/inode.c =================================================================== --- linux-2.6.orig/fs/fat/inode.c +++ linux-2.6/fs/fat/inode.c @@ -139,18 +139,42 @@ static int fat_readpages(struct file *fi return mpage_readpages(mapping, pages, nr_pages, fat_get_block); } -static int fat_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) +int fat_cont_expand(struct inode *inode, loff_t size) { - return cont_prepare_write(page, from, to, fat_get_block, - &MSDOS_I(page->mapping->host)->mmu_private); + struct address_space *mapping = inode->i_mapping; + loff_t start = inode->i_size, count = size - inode->i_size; + int err; + + err = generic_cont_expand(inode, size, fat_get_block, + &MSDOS_I(inode)->mmu_private); + if (err) + goto out; + + inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; + mark_inode_dirty(inode); + if (IS_SYNC(inode)) + err = sync_page_range_nolock(inode, mapping, start, count); +out: + return err; +} + +static int fat_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + *pagep = NULL; + return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + fat_get_block, + &MSDOS_I(mapping->host)->mmu_private); } -static int fat_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) +static int fat_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *pagep, void *fsdata) { - struct inode *inode = page->mapping->host; - int err = generic_commit_write(file, page, from, to); + struct inode *inode = mapping->host; + int err; + err = block_write_end(file, mapping, pos, len, copied, pagep, fsdata); if (!err && !(MSDOS_I(inode)->i_attrs & ATTR_ARCH)) { inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; MSDOS_I(inode)->i_attrs |= ATTR_ARCH; @@ -200,8 +224,8 @@ static const struct address_space_operat .writepage = fat_writepage, .writepages = fat_writepages, .sync_page = block_sync_page, - .prepare_write = fat_prepare_write, - .commit_write = fat_commit_write, + .write_begin = fat_write_begin, + .write_end = fat_write_end, .direct_IO = fat_direct_IO, .bmap = _fat_bmap }; Index: linux-2.6/include/linux/msdos_fs.h =================================================================== --- linux-2.6.orig/include/linux/msdos_fs.h +++ linux-2.6/include/linux/msdos_fs.h @@ -406,6 +406,7 @@ extern int fat_getattr(struct vfsmount * struct kstat *stat); /* fat/inode.c */ +extern int fat_cont_expand(struct inode *inode, loff_t size); extern void fat_attach(struct inode *inode, loff_t i_pos); extern void fat_detach(struct inode *inode); extern struct inode *fat_iget(struct super_block *sb, loff_t i_pos); Index: linux-2.6/fs/reiserfs/file.c =================================================================== --- linux-2.6.orig/fs/reiserfs/file.c +++ linux-2.6/fs/reiserfs/file.c @@ -947,16 +947,18 @@ static int reiserfs_check_for_tail_and_c ih = get_ih(&path); res = 0; if (is_direct_le_ih(ih)) { + struct buffer_head tmp_bh; + /* Ok, closest item is file tail (tails are stored in "direct" - * items), so we need to unpack it. */ - /* To not overcomplicate matters, we just call generic_cont_expand - which will in turn call other stuff and finally will boil down to - reiserfs_get_block() that would do necessary conversion. */ + * items), so we need to unpack it. reiserfs_get_block will + * do that for us. */ cont_expand_offset = le_key_k_offset(get_inode_item_key_version(inode), &(ih->ih_key)); pathrelse(&path); - res = generic_cont_expand(inode, cont_expand_offset); + res = reiserfs_get_block(inode, + cont_expand_offset / inode->i_sb->s_blocksize, + &tmp_bh, 1); } else pathrelse(&path);