ext4: online defrag-- Check the free space fragmentation (-f mode) From: Akira Fujita Check the free space fragmentation in the block group where target file is located. Signed-off-by: Akira Fujita Signed-off-by: Takashi Sato --- fs/ext4/balloc.c | 2 fs/ext4/defrag.c | 275 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- fs/ext4/ext4.h | 34 ++++++ fs/ext4/ioctl.c | 5 - 4 files changed, 312 insertions(+), 4 deletions(-) Index: linux-2.6.26-rc4/fs/ext4/balloc.c =================================================================== --- linux-2.6.26-rc4.orig/fs/ext4/balloc.c 2008-05-30 11:41:05.000000000 -0700 +++ linux-2.6.26-rc4/fs/ext4/balloc.c 2008-05-30 11:41:27.000000000 -0700 @@ -919,7 +919,7 @@ static int ext4_test_allocatable(ext4_gr * bitmap on disk and the last-committed copy in journal, until we find a * bit free in both bitmaps. */ -static ext4_grpblk_t +ext4_grpblk_t bitmap_search_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh, ext4_grpblk_t maxblocks) { Index: linux-2.6.26-rc4/fs/ext4/defrag.c =================================================================== --- linux-2.6.26-rc4.orig/fs/ext4/defrag.c 2008-05-30 11:41:25.000000000 -0700 +++ linux-2.6.26-rc4/fs/ext4/defrag.c 2008-05-30 11:41:27.000000000 -0700 @@ -20,6 +20,12 @@ #include "ext4_extents.h" #include "group.h" +#define EXT_SET_EXTENT_DATA(src, dest) do { \ + dest.block = le32_to_cpu(src->ee_block); \ + dest.start = ext_pblock(src); \ + dest.len = le16_to_cpu(src->ee_len); \ + } while (0) + /** * ext4_defrag_next_extent - Search for the next extent and set it to "extent" * @@ -90,6 +96,223 @@ err: return -EIO; } +/** + * ext4_defrag_extents_info - Get extents information + * + * @sb: for ext4_iget() + * @ext_info: pointer to ext4_extents_info + * @ext_info->ino: describe an inode which is used to get + * extent information + * @ext_info->max_entries: defined by DEFRAG_MAX_ENT + * @ext_info->entries: amount of extents (output) + * @ext_info->ext[]: array of extent (output) + * @ext_info->offset: starting block offset of targeted extent + * (file relative) + * + * This function returns 0 if the next extent(s) exists, + * or returns 1 if the next extent doesn't exist, + * otherwise returns error value. + */ +static int +ext4_defrag_extents_info(struct super_block *sb, + struct ext4_extents_info *ext_info) +{ + struct ext4_ext_path *path = NULL; + struct ext4_extent *ext = NULL; + struct inode *inode = NULL; + ext4_lblk_t offset = ext_info->f_offset; + int max_entries = ext_info->max_entries; + int depth, entries = 0; + int err = 0; + int ret = 0; + + inode = ext4_iget(sb, ext_info->ino); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + down_write(&EXT4_I(inode)->i_data_sem); + + /* Return -ENOENT if a file does not exist */ + if (!inode->i_nlink || inode->i_ino < EXT4_GOOD_OLD_FIRST_INO || + !S_ISREG(inode->i_mode)) { + ext_info->entries = 0; + err = -ENOENT; + goto out; + } + + path = ext4_ext_find_extent(inode, offset, NULL); + if (IS_ERR(path)) { + err = PTR_ERR(path); + path = NULL; + goto out; + } + depth = ext_depth(inode); + + /* Skip the 0 size file */ + if (path[depth].p_ext == NULL) { + ext_info->entries = 0; + goto out; + } + ext = path[depth].p_ext; + EXT_SET_EXTENT_DATA(ext, ext_info->ext[entries]); + entries = 1; + + /* + * The ioctl repeats this loop 'max_entries' times. + * So we have to call this function again if @inode had + * more the number of extents than 'max_entries'. + */ + while (entries < max_entries) { + ret = ext4_defrag_next_extent(inode, path, &ext); + if (ret == 0) { + /* Found the next extent (it means not the last one) */ + EXT_SET_EXTENT_DATA(ext, ext_info->ext[entries]); + entries++; + + /* + * In case @inode has > 'max_entries' extents, + * we must call this function again and restart from + * 'max_entries * n + 1'th extent. + * 'n' is the number of calling this function + * at the same @inode. + */ + if (entries == max_entries) { + ext_info->f_offset = + le32_to_cpu(ext->ee_block) + + le16_to_cpu(ext->ee_len); + /* Check the extent is the last one or not */ + ret = + ext4_defrag_next_extent(inode, path, &ext); + if (ret == 1) { + err = ret; + } else if (ret < 0) { + /* Failed to get the next extent */ + err = ret; + goto out; + } + break; + } + + } else if (ret == 1) { + /* The extent is the last one */ + ext_info->f_offset = 0; + err = ret; + break; + } else { + /* Failed to get the next extent */ + err = ret; + goto out; + } + } + + ext_info->entries = entries; + +out: + if (path) { + ext4_ext_drop_refs(path); + kfree(path); + } + up_write(&EXT4_I(inode)->i_data_sem); + iput(inode); + return err; +} + +/** + * ext4_defrag_fblocks_distribution - Search free blocks distribution + * + * @org_inode: original inode + * @ext_info: ext4_extents_info + * + * This function returns 0 if succeed, otherwise returns error value. + */ +static int +ext4_defrag_fblocks_distribution(struct inode *org_inode, + struct ext4_extents_info *ext_info) +{ + struct buffer_head *bitmap_bh = NULL; + struct super_block *sb = org_inode->i_sb; + handle_t *handle; + ext4_group_t group_no; + ext4_grpblk_t start, end; + ext4_fsblk_t start_block = 0; + int i, err; + int num = 0; + int len = 0; + int block_set = 0; + int extra_block = 0; + + if (!sb) { + printk(KERN_ERR "ext4 defrag: Non-existent device\n"); + return -ENOSPC; + } + + group_no = (org_inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb); + start = ext_info->g_offset; + end = EXT4_BLOCKS_PER_GROUP(sb) - 1; + + /* We consider about the boot block if bs = 1k */ + if (sb->s_blocksize == 1024) + extra_block = 1; + + handle = ext4_journal_start(org_inode, 1); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + return err; + } + + bitmap_bh = read_block_bitmap(sb, group_no); + if (!bitmap_bh) { + err = -EIO; + goto out; + } + + BUFFER_TRACE(bitmap_bh, "get undo access for new block"); + err = ext4_journal_get_undo_access(handle, bitmap_bh); + if (err) + goto out; + + for (i = start; i <= end ; i++) { + if (bitmap_search_next_usable_block(i, bitmap_bh, i + 1) >= 0) { + len++; + /* + * Reset start_block if the free block is + * the head of region. + */ + if (!block_set) { + start_block = + i + group_no * EXT4_BLOCKS_PER_GROUP(sb) + + extra_block; + block_set = 1; + } + } else if (len) { + ext_info->ext[num].start = start_block; + ext_info->ext[num].len = len; + num++; + len = 0; + block_set = 0; + if (num == ext_info->max_entries) { + ext_info->g_offset = i + 1; + break; + } + } + if (i == end && len) { + ext_info->ext[num].start = start_block; + ext_info->ext[num].len = len; + num++; + } + } + + ext_info->entries = num; +out: + ext4_journal_release_buffer(handle, bitmap_bh); + brelse(bitmap_bh); + + if (handle) + ext4_journal_stop(handle); + + return err; +} + int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) { @@ -114,6 +337,52 @@ int ext4_defrag_ioctl(struct inode *inod block = ext4_bmap(mapping, block); return put_user(block, p); + } else if (cmd == EXT4_IOC_GROUP_INFO) { + struct ext4_group_data_info grp_data; + + if (copy_from_user(&grp_data, + (struct ext4_group_data_info __user *)arg, + sizeof(grp_data))) + return -EFAULT; + + grp_data.s_blocks_per_group = + EXT4_BLOCKS_PER_GROUP(inode->i_sb); + grp_data.s_inodes_per_group = + EXT4_INODES_PER_GROUP(inode->i_sb); + + if (copy_to_user((struct ext4_group_data_info __user *)arg, + &grp_data, sizeof(grp_data))) + return -EFAULT; + } else if (cmd == EXT4_IOC_FREE_BLOCKS_INFO) { + struct ext4_extents_info ext_info; + + if (copy_from_user(&ext_info, + (struct ext4_extents_info __user *)arg, + sizeof(ext_info))) + return -EFAULT; + + BUG_ON(ext_info.ino != inode->i_ino); + + err = ext4_defrag_fblocks_distribution(inode, &ext_info); + + if (!err) + err = copy_to_user( + (struct ext4_extents_info __user *)arg, + &ext_info, sizeof(ext_info)); + } else if (cmd == EXT4_IOC_EXTENTS_INFO) { + struct ext4_extents_info ext_info; + + if (copy_from_user(&ext_info, + (struct ext4_extents_info __user *)arg, + sizeof(ext_info))) + return -EFAULT; + + err = ext4_defrag_extents_info(inode->i_sb, &ext_info); + if (err >= 0) { + if (copy_to_user((struct ext4_extents_info __user *)arg, + &ext_info, sizeof(ext_info))) + return -EFAULT; + } } else if (cmd == EXT4_IOC_DEFRAG) { struct ext4_ext_defrag_data defrag; struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; @@ -1127,11 +1396,13 @@ out2: * * @org_inode: original inode * @defrag_size: size of defrag in blocks + * @goal: poiter to block offset for allocation * * This function returns 0 if succeed, otherwise returns error value. */ static int -ext4_defrag_check(struct inode *org_inode, ext4_lblk_t defrag_size) +ext4_defrag_check(struct inode *org_inode, ext4_lblk_t defrag_size, + ext4_fsblk_t *goal) { /* ext4 online defrag supports only 4KB block size */ @@ -1242,7 +1513,7 @@ ext4_defrag(struct file *filp, ext4_lblk int ret, depth, seq_extents, last_extent = 0; /* Check the filesystem enviroment whether defrag can be done */ - ret = ext4_defrag_check(org_inode, defrag_size); + ret = ext4_defrag_check(org_inode, defrag_size, &goal); if (ret < 0) return ret; Index: linux-2.6.26-rc4/fs/ext4/ext4.h =================================================================== --- linux-2.6.26-rc4.orig/fs/ext4/ext4.h 2008-05-30 11:41:25.000000000 -0700 +++ linux-2.6.26-rc4/fs/ext4/ext4.h 2008-05-30 11:41:27.000000000 -0700 @@ -300,6 +300,9 @@ struct ext4_new_group_data { #define EXT4_IOC_MIGRATE _IO('f', 7) #define EXT4_IOC_FIBMAP _IOW('f', 9, ext4_fsblk_t) #define EXT4_IOC_DEFRAG _IOW('f', 10, struct ext4_ext_defrag_data) +#define EXT4_IOC_GROUP_INFO _IOW('f', 11, struct ext4_group_data_info) +#define EXT4_IOC_FREE_BLOCKS_INFO _IOW('f', 12, struct ext4_extents_info) +#define EXT4_IOC_EXTENTS_INFO _IOW('f', 13, struct ext4_extents_info) /* * ioctl commands in 32 bit emulation @@ -323,12 +326,41 @@ struct ext4_new_group_data { */ #define DEFRAG_BLOCK_SIZE 4096 +/* + * The following four macros are used for the defrag force mode. + * + * DEFRAG_MAX_ENT: the maximum number of extents for exchanging between + * kernel-space and user-space per an ioctl + */ +#define DEFRAG_MAX_ENT 32 + +struct ext4_extent_data { + ext4_lblk_t block; /* start logical block number */ + ext4_fsblk_t start; /* start physical block number */ + int len; /* blocks count */ +}; + struct ext4_ext_defrag_data { ext4_lblk_t start_offset; /* start offset to defrag in blocks */ ext4_lblk_t defrag_size; /* size of defrag in blocks */ ext4_fsblk_t goal; /* block offset for allocation */ }; +struct ext4_group_data_info { + int s_blocks_per_group; /* blocks per group */ + int s_inodes_per_group; /* inodes per group */ +}; + +struct ext4_extents_info { + unsigned long long ino; /* inode number */ + int max_entries; /* maximum extents count */ + int entries; /* extent number/count */ + ext4_lblk_t f_offset; /* file offset */ + ext4_grpblk_t g_offset; /* group offset */ + ext4_fsblk_t goal; /* block offset for allocation */ + struct ext4_extent_data ext[DEFRAG_MAX_ENT]; +}; + #define EXT4_TRANS_META_BLOCKS 4 /* bitmap + group desc + sb + inode */ /* @@ -1010,6 +1042,8 @@ extern struct ext4_group_desc * ext4_get extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); extern void ext4_init_block_alloc_info(struct inode *); extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv); +extern ext4_grpblk_t bitmap_search_next_usable_block(ext4_grpblk_t, + struct buffer_head *, ext4_grpblk_t); /* dir.c */ extern int ext4_check_dir_entry(const char *, struct inode *, Index: linux-2.6.26-rc4/fs/ext4/ioctl.c =================================================================== --- linux-2.6.26-rc4.orig/fs/ext4/ioctl.c 2008-05-30 11:41:25.000000000 -0700 +++ linux-2.6.26-rc4/fs/ext4/ioctl.c 2008-05-30 11:41:27.000000000 -0700 @@ -242,7 +242,10 @@ setversion_out: return err; } case EXT4_IOC_FIBMAP: - case EXT4_IOC_DEFRAG: { + case EXT4_IOC_DEFRAG: + case EXT4_IOC_GROUP_INFO: + case EXT4_IOC_FREE_BLOCKS_INFO: + case EXT4_IOC_EXTENTS_INFO: { return ext4_defrag_ioctl(inode, filp, cmd, arg); } case EXT4_IOC_GROUP_ADD: {