ext4: online defrag-- Allocate new contiguous blocks with mballoc From: Akira Fujita Search contiguous free blocks with mutil-block allocation and allocate them for the temporary inode. Signed-off-by: Akira Fujita Signed-off-by: Takashi Sato Signed-off-by: Mingming Cao --- fs/ext4/defrag.c | 286 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/ext4/ext4.h | 5 fs/ext4/ext4_extents.h | 3 fs/ext4/extents.c | 6 - 4 files changed, 297 insertions(+), 3 deletions(-) Index: linux-2.6.26-rc1/fs/ext4/defrag.c =================================================================== --- linux-2.6.26-rc1.orig/fs/ext4/defrag.c 2008-05-05 17:10:19.000000000 -0700 +++ linux-2.6.26-rc1/fs/ext4/defrag.c 2008-05-05 17:10:19.000000000 -0700 @@ -6,6 +6,75 @@ #include "group.h" /** + * ext4_defrag_next_extent - Search for the next extent and set it to "extent" + * + * @inode: inode of the the original file + * @path: this will obtain data for the next extent + * @extent: pointer to the next extent we have just gotten + * + * This function returns 0 or 1(last entry) if succeeded, otherwise + * returns -EIO. + */ +static int +ext4_defrag_next_extent(struct inode *inode, + struct ext4_ext_path *path, + struct ext4_extent **extent) +{ + int ppos; + int leaf_ppos = path->p_depth; + + ppos = leaf_ppos; + if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { + /* leaf block */ + *extent = ++path[ppos].p_ext; + return 0; + } + + while (--ppos >= 0) { + if (EXT_LAST_INDEX(path[ppos].p_hdr) > + path[ppos].p_idx) { + int cur_ppos = ppos; + + /* index block */ + path[ppos].p_idx++; + path[ppos].p_block = + idx_pblock(path[ppos].p_idx); + if (path[ppos+1].p_bh) + brelse(path[ppos+1].p_bh); + path[ppos+1].p_bh = + sb_bread(inode->i_sb, path[ppos].p_block); + if (!path[ppos+1].p_bh) + return -EIO; + path[ppos+1].p_hdr = + ext_block_hdr(path[ppos+1].p_bh); + + /* Halfway index block */ + while (++cur_ppos < leaf_ppos) { + path[cur_ppos].p_idx = + EXT_FIRST_INDEX(path[cur_ppos].p_hdr); + path[cur_ppos].p_block = + idx_pblock(path[cur_ppos].p_idx); + if (path[cur_ppos+1].p_bh) + brelse(path[cur_ppos+1].p_bh); + path[cur_ppos+1].p_bh = sb_bread(inode->i_sb, + path[cur_ppos].p_block); + if (!path[cur_ppos+1].p_bh) + return -EIO; + path[cur_ppos+1].p_hdr = + ext_block_hdr(path[cur_ppos+1].p_bh); + } + + /* leaf block */ + path[leaf_ppos].p_ext = *extent = + EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); + return 0; + } + } + /* We found the last extent */ + return 1; +} + +/** * ext4_defrag_merge_across_blocks - Merge extents across leaf block * * @handle journal handle @@ -618,6 +687,148 @@ out: } /** + * ext4_defrag_alloc_blocks - Allocate contiguous blocks to temporary inode + * + * @dest_inode temporary inode for multiple block allocation + * @org_inode original inode + * @iblock file related offset + * @total_blocks contiguous blocks count + * + * If succeed, fuction returns count of extent we got, + * otherwise returns err. + */ +static int ext4_defrag_alloc_blocks(struct inode *dest_inode, + struct inode *org_inode, ext4_lblk_t iblock, + ext4_fsblk_t total_blocks) +{ + handle_t *handle = NULL; + struct ext4_ext_path *dest_path = NULL; + struct ext4_ext_path *org_path = NULL; + struct ext4_extent newex; + struct ext4_allocation_request ar; + struct buffer_head *bh = NULL; + struct super_block *org_sb = org_inode->i_sb; + ext4_fsblk_t newblock = 0; + ext4_fsblk_t rest = total_blocks; + ext4_fsblk_t alloc_total = 0; + unsigned long org_len; + ext4_group_t dest_grp_no; + ext4_grpblk_t dest_blk_off; + int metadata = 1; + int count = 0; + int credits = 0; + int err = 0; + int err2 = 0; + int len_cnt = 0; + + ar.len = total_blocks; + org_len = ar.len; + + /* Find first extent */ + dest_path = ext4_ext_find_extent(dest_inode, iblock, dest_path); + if (IS_ERR(dest_path)) { + err = PTR_ERR(dest_path); + dest_path = NULL; + goto out2; + } + + ar.inode = dest_inode; + ar.flags = EXT4_MB_HINT_DATA | EXT4_MB_HINT_RESERVED + | EXT4_MB_HINT_NOPREALLOC; + + ar.goal = ext4_ext_find_goal(dest_inode, dest_path, iblock); + + ar.logical = iblock; + ar.lleft = 0; + ar.pleft = 0; + ar.lright = 0; + ar.pright = 0; + + handle = ext4_journal_start(dest_inode, credits); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + goto out2; + } + + while (alloc_total != total_blocks) { + credits = ext4_ext_calc_credits_for_insert(dest_inode, + dest_path); + handle = ext4_ext_journal_restart(handle, + credits + EXT4_TRANS_META_BLOCKS); + + if (IS_ERR(handle)) + return PTR_ERR(handle); + + newblock = ext4_mb_new_blocks(handle, &ar, &err); + + if (err) { + /* Failed to get the contiguous blocks */ + goto out; + } else { + /* + * Dirty buffer_head causes the overwriting + * if ext4_mb_new_blocks() allocates the block + * which used to be the metadata block. + * We should call unmap_underlying_metadata() + * to clear the dirty flag. + */ + for (len_cnt = 0; len_cnt < ar.len; len_cnt++) { + bh = sb_find_get_block(org_sb, + newblock + len_cnt); + unmap_underlying_metadata(org_sb->s_bdev, + newblock + len_cnt); + } + + alloc_total += ar.len; + ext4_get_group_no_and_offset(dest_inode->i_sb, + newblock, &dest_grp_no, &dest_blk_off); + + newex.ee_block = cpu_to_le32(alloc_total - ar.len); + ext4_ext_store_pblock(&newex, newblock); + newex.ee_len = cpu_to_le16(ar.len); + + ar.goal = newblock + ar.len; + rest = rest - ar.len; + ar.len = rest; + + err = ext4_ext_insert_extent(handle, dest_inode, + dest_path, &newex); + if (!err) { + count++; + } else { + ext4_free_blocks(handle, org_inode, + newblock, ar.len, metadata); + goto out; + } + } + } + +out: + if (err) { + /* Faild case: We have to remove halfway blocks */ + err2 = ext4_ext_remove_space(dest_inode, 0); + if (err2) + printk(KERN_ERR "ext4 defrag: " + "Failed to remove temporary inode blocks\n"); + } +out2: + if (dest_path) { + ext4_ext_drop_refs(dest_path); + kfree(dest_path); + } + if (org_path) { + ext4_ext_drop_refs(org_path); + kfree(org_path); + } + + ext4_journal_stop(handle); + + /* Return extents count or err value */ + return (!err ? count : err); + +} + +/** * ext4_defrag_partial - Defrag a file per page * * @tmp_inode: the inode which has blocks to swap with original @@ -736,3 +947,78 @@ out: return (ret < 0 ? ret : 0); } + +/** + * ext4_defrag_new_extent_tree - Check extents improves or not + * + * @inode: inode of the original file + * @tmp_inode: inode of the temporary file + * @path: the structure holding some info about + * original extent tree + * @tar_start: starting offset to allocate in blocks + * @tar_blocks: the number of blocks to allocate + * @iblock: file related offset + * + * This function returns the value as below: + * 0(succeeded) + * 1(not improved) + * negative value(error) + */ +static int +ext4_defrag_new_extent_tree(struct inode *inode, struct inode *tmp_inode, + struct ext4_ext_path *path, ext4_lblk_t tar_start, + ext4_lblk_t tar_blocks, ext4_lblk_t iblock) +{ + struct ext4_extent *ext = NULL; + struct ext4_extent_header *eh = NULL; + ext4_lblk_t tar_end = tar_start + tar_blocks - 1; + int sum_org = 0, sum_tmp = 0; + int ret = 0, depth; + int last_extent = 0; + + eh = ext_inode_hdr(tmp_inode); + eh->eh_depth = 0; + + /* Allocate contiguous blocks */ + sum_tmp = ext4_defrag_alloc_blocks(tmp_inode, inode, iblock, + tar_blocks); + if (sum_tmp < 0) { + ret = sum_tmp; + goto out; + } + + depth = ext_depth(inode); + ext = path[depth].p_ext; + /* Compare the number of the original extents with new one. */ + while (1) { + if (!last_extent) + ++sum_org; + + if (tar_end <= le32_to_cpu(ext->ee_block) + + le16_to_cpu(ext->ee_len) - 1 || + last_extent) { + + if (sum_org == sum_tmp) { + /* Not improved */ + ret = ext4_ext_remove_space(tmp_inode, 0); + if (!ret) + ret = 1; + } else if (sum_org < sum_tmp) { + /* Fragment increased */ + ret = ext4_ext_remove_space(tmp_inode, 0); + if (!ret) + ret = -ENOSPC; + printk(KERN_ERR "ext4 defrag: " + "Insufficient free blocks\n"); + } + break; + } + last_extent = ext4_defrag_next_extent(tmp_inode, path, &ext); + if (last_extent < 0) { + ret = last_extent; + break; + } + } +out: + return ret; +} Index: linux-2.6.26-rc1/fs/ext4/ext4.h =================================================================== --- linux-2.6.26-rc1.orig/fs/ext4/ext4.h 2008-05-05 17:10:19.000000000 -0700 +++ linux-2.6.26-rc1/fs/ext4/ext4.h 2008-05-05 17:10:19.000000000 -0700 @@ -315,6 +315,8 @@ struct ext4_new_group_data { #define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION #define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION +#define EXT4_TRANS_META_BLOCKS 4 /* bitmap + group desc + sb + inode */ + /* * Mount options @@ -1110,6 +1112,8 @@ extern void ext4_inode_bitmap_set(struct struct ext4_group_desc *bg, ext4_fsblk_t blk); extern void ext4_inode_table_set(struct super_block *sb, struct ext4_group_desc *bg, ext4_fsblk_t blk); +/* extents.c */ +extern handle_t *ext4_ext_journal_restart(handle_t *handle, int needed); static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) { @@ -1227,6 +1231,7 @@ extern int ext4_get_blocks_wrap(handle_t sector_t block, unsigned long max_blocks, struct buffer_head *bh, int create, int extend_disksize); +extern int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start); #endif /* __KERNEL__ */ #endif /* _EXT4_H */ Index: linux-2.6.26-rc1/fs/ext4/ext4_extents.h =================================================================== --- linux-2.6.26-rc1.orig/fs/ext4/ext4_extents.h 2008-05-05 17:10:19.000000000 -0700 +++ linux-2.6.26-rc1/fs/ext4/ext4_extents.h 2008-05-05 17:10:19.000000000 -0700 @@ -230,5 +230,8 @@ extern int ext4_ext_search_right(struct extern void ext4_ext_drop_refs(struct ext4_ext_path *); extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex); extern void ext4_ext_drop_refs(struct ext4_ext_path *path); +extern ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, + struct ext4_ext_path *path, + ext4_lblk_t block); #endif /* _EXT4_EXTENTS */ Index: linux-2.6.26-rc1/fs/ext4/extents.c =================================================================== --- linux-2.6.26-rc1.orig/fs/ext4/extents.c 2008-05-05 17:10:19.000000000 -0700 +++ linux-2.6.26-rc1/fs/ext4/extents.c 2008-05-05 17:10:19.000000000 -0700 @@ -92,7 +92,7 @@ static void ext4_idx_store_pblock(struct ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); } -static handle_t *ext4_ext_journal_restart(handle_t *handle, int needed) +handle_t *ext4_ext_journal_restart(handle_t *handle, int needed) { int err; @@ -142,7 +142,7 @@ static int ext4_ext_dirty(handle_t *hand return err; } -static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, +ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, struct ext4_ext_path *path, ext4_lblk_t block) { @@ -1956,7 +1956,7 @@ ext4_ext_more_to_rm(struct ext4_ext_path return 1; } -static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) +int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) { struct super_block *sb = inode->i_sb; int depth = ext_depth(inode);