ext4: online defrag-- Allocate new contiguous blocks with mballoc From: Akira Fujita Search contiguous free blocks with mutil-block allocation and allocate them for the temporary inode. Signed-off-by: Mingming Cao Signed-off-by: Takashi Sato Signed-off-by: Akira Fujita --- --- fs/ext4/defrag.c | 766 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/ext4/extents.c | 8 fs/ext4/inode.c | 2 fs/ext4/ioctl.c | 13 fs/ext4/mballoc.c | 7 5 files changed, 791 insertions(+), 5 deletions(-) Index: linux-2.6.24-rc8/fs/ext4/defrag.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.24-rc8/fs/ext4/defrag.c 2008-01-17 12:05:47.000000000 -0800 @@ -0,0 +1,766 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "group.h" + +/* + * this structure is used to gather extents from the tree via ioctl + */ +struct ext4_extent_buf { + ext4_fsblk_t start; + int buflen; + void *buffer; + void *cur; + int err; +}; + +/* + * this structure is used to collect stats info about the tree + */ +struct ext4_extent_tree_stats { + int depth; + int extents_num; + int leaf_num; +}; + +static int +ext4_ext_store_extent_cb(struct inode *inode, + struct ext4_ext_path *path, + struct ext4_ext_cache *newex, + struct ext4_extent_buf *buf) +{ + + if (newex->ec_type != EXT4_EXT_CACHE_EXTENT) + return EXT_CONTINUE; + + if (buf->err < 0) + return EXT_BREAK; + if (buf->cur - buf->buffer + sizeof(*newex) > buf->buflen) + return EXT_BREAK; + + if (!copy_to_user(buf->cur, newex, sizeof(*newex))) { + buf->err++; + buf->cur += sizeof(*newex); + } else { + buf->err = -EFAULT; + return EXT_BREAK; + } + return EXT_CONTINUE; +} + +static int +ext4_ext_collect_stats_cb(struct inode *inode, + struct ext4_ext_path *path, + struct ext4_ext_cache *ex, + struct ext4_extent_tree_stats *buf) +{ + int depth; + + if (ex->ec_type != EXT4_EXT_CACHE_EXTENT) + return EXT_CONTINUE; + + depth = ext_depth(inode); + buf->extents_num++; + if (path[depth].p_ext == EXT_FIRST_EXTENT(path[depth].p_hdr)) + buf->leaf_num++; + return EXT_CONTINUE; +} + +/** + * ext4_ext_next_extent - search for next extent and set it to "extent" + * @inode: inode of the the original file + * @path: this will obtain data for next extent + * @extent: pointer to next extent we have just gotten + * + * This function returns 0 or 1(last_entry) if succeeded, otherwise + * returns -EIO + */ +static int +ext4_ext_next_extent(struct inode *inode, + struct ext4_ext_path *path, + struct ext4_extent **extent) +{ + int ppos; + int leaf_ppos = path->p_depth; + + ppos = leaf_ppos; + if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { + /* leaf block */ + *extent = ++path[ppos].p_ext; + return 0; + } + + while (--ppos >= 0) { + if (EXT_LAST_INDEX(path[ppos].p_hdr) > + path[ppos].p_idx) { + int cur_ppos = ppos; + + /* index block */ + path[ppos].p_idx++; + path[ppos].p_block = + idx_pblock(path[ppos].p_idx); + if (path[ppos+1].p_bh) + brelse(path[ppos+1].p_bh); + path[ppos+1].p_bh = + sb_bread(inode->i_sb, path[ppos].p_block); + if (!path[ppos+1].p_bh) + return -EIO; + path[ppos+1].p_hdr = + ext_block_hdr(path[ppos+1].p_bh); + + /* halfway index block */ + while (++cur_ppos < leaf_ppos) { + path[cur_ppos].p_idx = + EXT_FIRST_INDEX(path[cur_ppos].p_hdr); + path[cur_ppos].p_block = + idx_pblock(path[cur_ppos].p_idx); + if (path[cur_ppos+1].p_bh) + brelse(path[cur_ppos+1].p_bh); + path[cur_ppos+1].p_bh = sb_bread(inode->i_sb, + path[cur_ppos].p_block); + if (!path[cur_ppos+1].p_bh) + return -EIO; + path[cur_ppos+1].p_hdr = + ext_block_hdr(path[cur_ppos+1].p_bh); + } + + /* leaf block */ + path[leaf_ppos].p_ext = *extent = + EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); + return 0; + } + } + /* last_extent */ + return 1; +} + +int ext4_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, + unsigned long arg) +{ + int err = 0; + if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL || + cmd == EXT4_IOC_FIBMAP)) + return -EINVAL; + + if (cmd == EXT4_IOC_GET_EXTENTS) { + struct ext4_extent_buf buf; + + if (copy_from_user(&buf, (void *) arg, sizeof(buf))) + return -EFAULT; + + buf.cur = buf.buffer; + buf.err = 0; + down_write(&EXT4_I(inode)->i_data_sem); + err = ext4_ext_walk_space(inode, buf.start, EXT_MAX_BLOCK, + (void *)ext4_ext_store_extent_cb, &buf); + up_write(&EXT4_I(inode)->i_data_sem); + if (err == 0) + err = buf.err; + } else if (cmd == EXT4_IOC_GET_TREE_STATS) { + struct ext4_extent_tree_stats buf; + + down_write(&EXT4_I(inode)->i_data_sem); + buf.depth = ext_depth(inode); + buf.extents_num = 0; + buf.leaf_num = 0; + err = ext4_ext_walk_space(inode, 0, EXT_MAX_BLOCK, + (void *)ext4_ext_collect_stats_cb, &buf); + up_write(&EXT4_I(inode)->i_data_sem); + if (!err) + err = copy_to_user((void *) arg, &buf, sizeof(buf)); + } else if (cmd == EXT4_IOC_GET_TREE_DEPTH) { + down_write(&EXT4_I(inode)->i_data_sem); + err = ext_depth(inode); + up_write(&EXT4_I(inode)->i_data_sem); + } else if (cmd == EXT4_IOC_FIBMAP) { + ext4_fsblk_t __user *p = (ext4_fsblk_t __user *)arg; + ext4_fsblk_t block = 0; + struct address_space *mapping = filp->f_mapping; + + if (copy_from_user(&block, (ext4_fsblk_t __user *)arg, + sizeof(block))) + return -EFAULT; + + lock_kernel(); + block = ext4_bmap(mapping, block); + unlock_kernel(); + + return put_user(block, p); + } else if (cmd == EXT4_IOC_DEFRAG) { + struct ext4_ext_defrag_data defrag; + + if (copy_from_user(&defrag, + (struct ext4_ext_defrag_data __user *)arg, + sizeof(defrag))) + return -EFAULT; + err = ext4_ext_defrag(filp, defrag.start_offset, + defrag.defrag_size, defrag.goal, defrag.flag, + &defrag.ext); + } + + return err; +} + +/** + * ext4_ext_alloc_blocks - allocate contiguous blocks to temporary inode + * @dest_inode temporary inode for multiple block allocation + * @org_inode original inode + * @iblock file related offset + * @total_blocks contiguous blocks count + * @goal block offset for allocation + * @phase phase of create free space mode + * + * If succeed, fuction returns count of extent we got, + * otherwise returns err. + */ +static int ext4_ext_alloc_blocks(struct inode *dest_inode, + struct inode *org_inode, ext4_lblk_t iblock, + ext4_fsblk_t total_blocks, ext4_fsblk_t goal, int phase) +{ + handle_t *handle = NULL; + struct ext4_ext_path *dest_path = NULL; + struct ext4_ext_path *org_path = NULL; + struct ext4_extent newex; + struct ext4_allocation_request ar; + struct buffer_head *bh = NULL; + struct super_block *org_sb = org_inode->i_sb; + ext4_fsblk_t newblock = 0; + ext4_fsblk_t rest = total_blocks; + ext4_fsblk_t alloc_total = 0; + unsigned long org_len; + ext4_group_t dest_grp_no, org_grp_no, goal_grp_no; + ext4_grpblk_t dest_blk_off, org_blk_off, goal_blk_off; + int org_depth = ext_depth(org_inode); + int metadata = 1; + int count = 0; + int credits = 0; + int err = 0; + int err2 = 0; + int len_cnt = 0; + + ar.len = total_blocks; + org_len = ar.len; + + /* Calculate group nubmer of org_inode block */ + if (phase == DEFRAG_FORCE_VICTIM) { + org_path = ext4_ext_find_extent(org_inode, iblock, org_path); + if (IS_ERR(org_path)) { + err = PTR_ERR(org_path); + org_path = NULL; + goto out2; + } + ext4_get_group_no_and_offset(org_inode->i_sb, + ext_pblock(org_path[org_depth].p_ext), + &org_grp_no, &org_blk_off); + ar.excepted_group = org_grp_no; + } else { + ar.excepted_group = -1; + } + + /* Find first extent. */ + dest_path = ext4_ext_find_extent(dest_inode, iblock, dest_path); + if (IS_ERR(dest_path)) { + err = PTR_ERR(dest_path); + dest_path = NULL; + goto out2; + } + + ar.inode = dest_inode; + ar.flags = EXT4_MB_HINT_DATA | EXT4_MB_HINT_RESERVED + | EXT4_MB_HINT_NOPREALLOC; + if (goal) { + ar.goal = goal; + } else { + ar.goal = ext4_ext_find_goal(dest_inode, dest_path, iblock); + } + + ar.logical = iblock; + ar.lleft = 0; + ar.pleft = 0; + ar.lright = 0; + ar.pright = 0; + + handle = ext4_journal_start(dest_inode, credits); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + goto out2; + } + + while (alloc_total != total_blocks) { + credits = ext4_ext_calc_credits_for_insert(dest_inode, + dest_path); + handle = ext4_ext_journal_restart(handle, + credits + EXT4_TRANS_META_BLOCKS); + + if (IS_ERR(handle)) + return PTR_ERR(handle); + + newblock = ext4_mb_new_blocks(handle, &ar, &err); + + if (err) { + /* Failed to get the contiguous blocks */ + goto out; + } else if ((ar.len != org_len) && + (phase == DEFRAG_FORCE_TRY)) { + ext4_free_blocks(handle, org_inode, newblock, + ar.len, metadata); + /* go to force mode */ + err = -ENOSPC; + goto out; + } else { + /* + * If ext4_mb_new_blocks() allcates + * the block which used to be the metadata block, + * its dirty buffer_head causes the overwriting + * with old metadata. + * We should call unmap_underlying_metadata() + * to clear the dirty flag. + */ + for (len_cnt = 0; len_cnt < ar.len; len_cnt++) { + bh = sb_find_get_block(org_sb, + newblock + len_cnt); + unmap_underlying_metadata(org_sb->s_bdev, + newblock + len_cnt); + } + + alloc_total += ar.len; + ext4_get_group_no_and_offset(dest_inode->i_sb, + goal, &goal_grp_no, &goal_blk_off); + ext4_get_group_no_and_offset(dest_inode->i_sb, + newblock, &dest_grp_no, &dest_blk_off); + /* We can't allocate at the same block group */ + switch (phase) { + case DEFRAG_FORCE_VICTIM: + if (dest_grp_no == org_grp_no) { + printk(KERN_ERR "defrag: Can't allocate" + " in same block group\n"); + ext4_free_blocks(handle, org_inode, + newblock, ar.len, metadata); + err = -ENOSPC; + goto out; + } + break; + case DEFRAG_FORCE_GATHER: + /* Maybe reserved blocks are already used by + other process */ + if (dest_grp_no != goal_grp_no + || alloc_total != total_blocks) { + printk(KERN_ERR "defrag: Already used" + " the specified blocks\n"); + ext4_free_blocks(handle, org_inode, + newblock, ar.len, metadata); + err = -EIO; + goto out; + } + break; + } + + newex.ee_block = cpu_to_le32(alloc_total - ar.len); + ext4_ext_store_pblock(&newex, newblock); + newex.ee_len = cpu_to_le16(ar.len); + + if (!phase) + ar.goal = newblock + ar.len; + rest = rest - ar.len; + ar.len = rest; + + err = ext4_ext_insert_extent(handle, dest_inode, + dest_path, &newex); + if (!err) { + count++; + } else { + ext4_free_blocks(handle, org_inode, + newblock, ar.len, metadata); + goto out; + } + } + } + +out: + /* Faild case: We have to remove halfway blocks */ + if (err) + err2 = ext4_ext_remove_space(dest_inode, 0); + + /* Successful case */ + if (dest_path) { + ext4_ext_drop_refs(dest_path); + kfree(dest_path); + } + if (org_path) { + ext4_ext_drop_refs(org_path); + kfree(org_path); + } +out2: + ext4_journal_stop(handle); + + if (err2) { + return err2; + } else if (err) { + return err; + } + /* return extents count */ + return count; +} + +/** + * ext4_ext_new_extent_tree - allocate contiguous blocks + * @inode: inode of the original file + * @tmp_inode: inode of the temporary file + * @path: the structure holding some info about + * original extent tree + * @tar_start: starting offset to allocate in blocks + * @tar_blocks: the number of blocks to allocate + * @iblock: file related offset + * @goal: block offset for allocaton + * @flag: phase of create free space mode + * + * This function returns the value as below: + * 0(succeeded) + * 1(not improved) + * negative value(error) + */ +static int +ext4_ext_new_extent_tree(struct inode *inode, struct inode *tmp_inode, + struct ext4_ext_path *path, ext4_lblk_t tar_start, + ext4_lblk_t tar_blocks, ext4_lblk_t iblock, + ext4_fsblk_t goal, int flag) +{ + struct ext4_extent *ext = NULL; + struct ext4_extent_header *eh = NULL; + ext4_lblk_t tar_end = tar_start + tar_blocks - 1; + int sum_org = 0, sum_tmp = 0; + int ret = 0, depth; + int last_extent = 0; + + eh = ext_inode_hdr(tmp_inode); + eh->eh_depth = 0; + + /* allocate contiguous blocks */ + sum_tmp = ext4_ext_alloc_blocks(tmp_inode, inode, iblock, + tar_blocks, goal, flag); + if (sum_tmp < 0) { + ret = sum_tmp; + goto ERR; + } + + depth = ext_depth(inode); + ext = path[depth].p_ext; + while (1) { + if (!last_extent) + ++sum_org; + + if (tar_end <= le32_to_cpu(ext->ee_block) + + le32_to_cpu(ext->ee_len) - 1 || + last_extent) { + + if ((sum_org == sum_tmp) && !goal) { + /* not improved */ + if (!(ret = + ext4_ext_remove_space(tmp_inode, 0))) + ret = 1; + } else if (sum_org < sum_tmp && + flag != DEFRAG_FORCE_VICTIM) { + /* fragment increased */ + if (!(ret = + ext4_ext_remove_space(tmp_inode, 0))) + ret = -ENOSPC; + printk("defrag failed due to no space\n"); + } + break; + } + if ((last_extent = + ext4_ext_next_extent(tmp_inode, + path, &ext)) < 0) { + ret = last_extent; + break; + } + } +ERR: + return ret; +} + +/** + * ext4_ext_defrag - defrag whole file + * @filp: pointer to file + * @from: starting offset to defrag in blocks + * @defrag_size: size of defrag in blocks + * @goal: block offset for allocation + * @flag: phase of create free space mode + * @ext: extent to be moved (only -f) + * + * This function returns the number of blocks if succeeded, otherwise + * returns error value + */ +int +ext4_ext_defrag(struct file *filp, ext4_lblk_t block_start, + ext4_lblk_t defrag_size, ext4_fsblk_t goal, + int flag, struct ext4_extent_data *ext) +{ + struct inode *inode = filp->f_dentry->d_inode, *tmp_inode = NULL; + struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; + struct ext4_ext_path *path = NULL, *holecheck_path = NULL; + struct ext4_extent *ext_prev = NULL, *ext_cur = NULL, *ext_dummy = NULL; + handle_t *handle; + ext4_lblk_t block_end = block_start + defrag_size - 1; + ext4_lblk_t seq_blocks = 0, seq_start = 0; + ext4_lblk_t add_blocks = 0; + ext4_lblk_t file_end = (inode->i_size - 1) >> inode->i_blkbits; + pgoff_t page_offset = 0, dest_offset = 0, seq_end_page = 0; + int ret = 0, depth = 0, last_extent = 0, seq_extents = 0; + + /* Check goal offset if goal offset was given from userspace. */ + if (((0 < goal) && (ext4_blocks_count(es) < goal)) && (goal != -1)) { + printk(KERN_ERR "defrag: incorrect goal number %llu, " + "you can set goal until %llu\n", goal, + ext4_blocks_count(es)); + ret = -EINVAL; + goto ERR1; + } + + /* Setup for fixed blocks mode */ + if (ext->len) { + if (ext->len < defrag_size) { + printk("Cannot defrag due to the insufficient" + " specified free blocks\n"); + return -EINVAL; + } + flag = DEFRAG_FORCE_GATHER; + goal = ext->start; + } + + if (file_end < block_end) + defrag_size -= block_end - file_end; + + mutex_lock(&inode->i_mutex); + down_write(&EXT4_I(inode)->i_data_sem); + + path = ext4_ext_find_extent(inode, block_start, NULL); + if (IS_ERR(path)) { + ret = PTR_ERR(path); + path = NULL; + goto ERR2; + } + + /* get path structure to check hole */ + holecheck_path = ext4_ext_find_extent(inode, block_start, NULL); + if (IS_ERR(holecheck_path)) { + ret = PTR_ERR(holecheck_path); + holecheck_path = NULL; + goto ERR2; + } + + depth = ext_depth(inode); + ext_cur = holecheck_path[depth].p_ext; + if (ext_cur == NULL) + goto ERR2; + + /* + * if block_start was within the hole, get proper extent whose ee_block + * is beyond block_start + */ + if (le32_to_cpu(ext_cur->ee_block) + + le32_to_cpu(ext_cur->ee_len) - 1 < block_start) { + if ((last_extent = + ext4_ext_next_extent(inode, holecheck_path, + &ext_cur)) < 0) { + ret = last_extent; + goto ERR2; + } + if ((last_extent = + ext4_ext_next_extent(inode, path, + &ext_dummy)) < 0) { + ret = last_extent; + goto ERR2; + } + } + seq_extents = 1; + seq_start = ext_cur->ee_block; + + /* no blocks existed within designated range */ + if (le32_to_cpu(ext_cur->ee_block) > block_end) { + printk("nothing done due to the lack of contiguous blocks\n"); + goto ERR2; + } + + /* adjust start blocks */ + add_blocks = min(ext_cur->ee_block + + ext_cur->ee_len, block_end + 1) - + max(ext_cur->ee_block, block_start); + + while (!last_extent && ext_cur->ee_block <= block_end) { + seq_blocks += add_blocks; + + handle = ext4_journal_start(inode, + EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + + 2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb) + 1); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto ERR1; + } + tmp_inode = ext4_new_inode(handle, + inode->i_sb->s_root->d_inode, S_IFREG); + if (IS_ERR(tmp_inode)) { + ret = -ENOMEM; + ext4_journal_stop(handle); + tmp_inode = NULL; + goto ERR1; + } + + i_size_write(tmp_inode, i_size_read(inode)); + tmp_inode->i_nlink = 0; + ext4_ext_tree_init(handle, tmp_inode); + ext4_orphan_add(handle, tmp_inode); + ext4_journal_stop(handle); + + /* adjust tail blocks */ + if (seq_start + seq_blocks - 1 > block_end) + seq_blocks = block_end - seq_start + 1; + + ext_prev = ext_cur; + if ((last_extent = + ext4_ext_next_extent(inode, holecheck_path, + &ext_cur)) < 0) { + ret = last_extent; + break; + } + if (!last_extent) + seq_extents++; + add_blocks = le16_to_cpu(ext_cur->ee_len); + + /* found hole or reached the tail of either a designated range + * or the file + */ + if ((le32_to_cpu(ext_prev->ee_block) + + le16_to_cpu(ext_prev->ee_len) == + le32_to_cpu(ext_cur->ee_block) && + block_end >= le32_to_cpu(ext_cur->ee_block) && + !last_extent)) { + if (tmp_inode) { + iput(tmp_inode); + tmp_inode = NULL; + } + continue; + } + + /* found an isolated block */ + if ((seq_extents == 1) && !goal) { + seq_start = ext_cur->ee_block; + goto CLEANUP; + } + + ret = ext4_ext_new_extent_tree(inode, tmp_inode, path, + seq_start, seq_blocks, block_start, goal, flag); + + if (ret < 0) { + break; + } else if ((ret == 1) && (!goal || (goal && !flag))) { + ret = 0; + seq_start = le32_to_cpu(ext_cur->ee_block); + goto CLEANUP; + } + + page_offset = seq_start >> + (PAGE_CACHE_SHIFT - inode->i_blkbits); + seq_end_page = (seq_start + seq_blocks - 1) >> + (PAGE_CACHE_SHIFT - inode->i_blkbits); + + dest_offset = 0; + seq_start = le32_to_cpu(ext_cur->ee_block); + + /* Discard all preallocations. + * This is provisional solution. + * When true ext4_mb_return_to_preallocation() is + * implemented, this will be removed. + */ + ext4_mb_discard_inode_preallocations(inode); + + if (inode->i_mapping->a_ops->write_begin) { + while (page_offset <= seq_end_page) { + /* replace original branches for new branches */ + ret = ext4_ext_defrag_partial2(tmp_inode, + filp, page_offset, + dest_offset, flag); + if (ret < 0) + goto ERR2; + + page_offset++; + dest_offset++; + } + } else { + while (page_offset <= seq_end_page) { + /* replace original branches for new branches */ + ret = ext4_ext_defrag_partial(tmp_inode, + filp, page_offset, + dest_offset, flag); + if (ret < 0) + goto ERR2; + + page_offset++; + dest_offset++; + } + } + + /* decrease buffer counter */ + if (holecheck_path) + ext4_ext_drop_refs(holecheck_path); + holecheck_path = + ext4_ext_find_extent(inode, seq_start, holecheck_path); + if (IS_ERR(holecheck_path)) { + ret = PTR_ERR(holecheck_path); + holecheck_path = NULL; + break; + } + depth = holecheck_path->p_depth; + +CLEANUP: + /* decrease buffer counter */ + if (path) + ext4_ext_drop_refs(path); + path = ext4_ext_find_extent(inode, seq_start, path); + if (IS_ERR(path)) { + ret = PTR_ERR(path); + path = NULL; + break; + } + + ext_cur = holecheck_path[depth].p_ext; + add_blocks = le16_to_cpu(ext_cur->ee_len); + seq_blocks = 0; + dest_offset = 0; + seq_extents = 1; + + if (tmp_inode) { + iput(tmp_inode); + tmp_inode = NULL; + } + } + +ERR2: + if (path) { + ext4_ext_drop_refs(path); + kfree(path); + } + if (holecheck_path) { + ext4_ext_drop_refs(holecheck_path); + kfree(holecheck_path); + } +ERR1: + up_write(&EXT4_I(inode)->i_data_sem); + mutex_unlock(&inode->i_mutex); + + if (tmp_inode) + iput(tmp_inode); + + return (ret ? ret : defrag_size); +} Index: linux-2.6.24-rc8/fs/ext4/extents.c =================================================================== --- linux-2.6.24-rc8.orig/fs/ext4/extents.c 2008-01-17 12:05:40.000000000 -0800 +++ linux-2.6.24-rc8/fs/ext4/extents.c 2008-01-17 12:05:47.000000000 -0800 @@ -48,7 +48,7 @@ * ext_pblock: * combine low and high parts of physical block number into ext4_fsblk_t */ -static ext4_fsblk_t ext_pblock(struct ext4_extent *ex) +ext4_fsblk_t ext_pblock(struct ext4_extent *ex) { ext4_fsblk_t block; @@ -92,7 +92,7 @@ static void ext4_idx_store_pblock(struct ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); } -static handle_t *ext4_ext_journal_restart(handle_t *handle, int needed) +handle_t *ext4_ext_journal_restart(handle_t *handle, int needed) { int err; @@ -142,7 +142,7 @@ static int ext4_ext_dirty(handle_t *hand return err; } -static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, +ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, struct ext4_ext_path *path, ext4_lblk_t block) { @@ -1952,7 +1952,7 @@ ext4_ext_more_to_rm(struct ext4_ext_path return 1; } -static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) +int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) { struct super_block *sb = inode->i_sb; int depth = ext_depth(inode); Index: linux-2.6.24-rc8/fs/ext4/inode.c =================================================================== --- linux-2.6.24-rc8.orig/fs/ext4/inode.c 2008-01-17 12:05:41.000000000 -0800 +++ linux-2.6.24-rc8/fs/ext4/inode.c 2008-01-17 12:05:47.000000000 -0800 @@ -1507,7 +1507,7 @@ out: * So, if we see any bmap calls here on a modified, data-journaled file, * take extra steps to flush any blocks which might be in the cache. */ -static sector_t ext4_bmap(struct address_space *mapping, sector_t block) +sector_t ext4_bmap(struct address_space *mapping, sector_t block) { struct inode *inode = mapping->host; journal_t *journal; Index: linux-2.6.24-rc8/fs/ext4/ioctl.c =================================================================== --- linux-2.6.24-rc8.orig/fs/ext4/ioctl.c 2008-01-17 12:05:00.000000000 -0800 +++ linux-2.6.24-rc8/fs/ext4/ioctl.c 2008-01-17 12:05:47.000000000 -0800 @@ -231,6 +231,19 @@ flags_err: return err; } + case EXT4_IOC_GET_EXTENTS: + case EXT4_IOC_GET_TREE_STATS: + case EXT4_IOC_GET_TREE_DEPTH: + case EXT4_IOC_FIBMAP: + case EXT4_IOC_DEFRAG: + case EXT4_IOC_GROUP_INFO: + case EXT4_IOC_FREE_BLOCKS_INFO: + case EXT4_IOC_EXTENTS_INFO: + case EXT4_IOC_RESERVE_BLOCK: + case EXT4_IOC_MOVE_VICTIM: + case EXT4_IOC_BLOCK_RELEASE: { + return ext4_ext_ioctl(inode, filp, cmd, arg); + } case EXT4_IOC_GROUP_ADD: { struct ext4_new_group_data input; struct super_block *sb = inode->i_sb; Index: linux-2.6.24-rc8/fs/ext4/mballoc.c =================================================================== --- linux-2.6.24-rc8.orig/fs/ext4/mballoc.c 2008-01-17 12:05:20.000000000 -0800 +++ linux-2.6.24-rc8/fs/ext4/mballoc.c 2008-01-17 12:05:47.000000000 -0800 @@ -413,6 +413,7 @@ struct ext4_allocation_context { struct page *ac_buddy_page; struct ext4_prealloc_space *ac_pa; struct ext4_locality_group *ac_lg; + long long ac_excepted_group; }; #define AC_STATUS_CONTINUE 1 @@ -1900,6 +1901,11 @@ repeat: if (group == EXT4_SB(sb)->s_groups_count) group = 0; + if (ac->ac_excepted_group != -1 && + group == ac->ac_excepted_group) { + continue; + } + /* quick check to skip empty groups */ grp = ext4_get_group_info(ac->ac_sb, group); if (grp->bb_free == 0) @@ -4050,6 +4056,7 @@ static int ext4_mb_initialize_context(st ac->ac_bitmap_page = NULL; ac->ac_buddy_page = NULL; ac->ac_lg = NULL; + ac->ac_excepted_group = ar->excepted_group; /* we have to define context: we'll we work with a file or * locality group. this is a policy, actually */