Generic inode defragmentation This implements the ability to remove a list of inodes from the inode cache. In order to remove an inode we may have to write out the pages of an inode, the inode itself and remove the dentries referring to the node. Provide generic functionality that can be used by filesystems that have their own inode caches to also tie into the defragmentation functions that are made available here. Signed-off-by: Christoph Lameter --- fs/inode.c | 149 +++++++++++++++++++++++++++++++++++++++++++++++++++-- include/linux/fs.h | 5 + 2 files changed, 149 insertions(+), 5 deletions(-) Index: slub/fs/inode.c =================================================================== --- slub.orig/fs/inode.c 2007-05-18 18:27:13.000000000 -0700 +++ slub/fs/inode.c 2007-05-18 20:42:29.000000000 -0700 @@ -270,6 +270,46 @@ void clear_inode(struct inode *inode) EXPORT_SYMBOL(clear_inode); +#define SLABLIST_SIZE (L1_CACHE_BYTES / sizeof(void *)) + +static inline struct kmem_cache**slablist_alloc(void) +{ + return kzalloc(L1_CACHE_BYTES, GFP_KERNEL); +} + +static inline void slablist_add(struct inode *i, struct kmem_cache **slablist) +{ + struct kmem_cache *s; + int hash; + + if (!slablist) + return; + + s = kmem_cache_slab(i); + hash =((unsigned long)s >> PAGE_SHIFT) % SLABLIST_SIZE; + slablist[hash] = s; +} + +/* + * Shrink the slabs that participated in the dispose + * operation + */ +static void slablist_shrink(struct kmem_cache **slablist) +{ + int i; + + if (!slablist) + return; + + for (i = 0; i < SLABLIST_SIZE; i++) { + struct kmem_cache *s = slablist[i]; + + if (s) + kmem_cache_shrink(s); + } + kfree(slablist); +} + /* * dispose_list - dispose of the contents of a local list * @head: the head of the list to free @@ -277,7 +317,7 @@ EXPORT_SYMBOL(clear_inode); * Dispose-list gets a local list with local inodes in it, so it doesn't * need to worry about list corruption and SMP locks. */ -static void dispose_list(struct list_head *head) +static void dispose_list(struct list_head *head, struct kmem_cache **slablist) { int nr_disposed = 0; @@ -287,6 +327,8 @@ static void dispose_list(struct list_hea inode = list_first_entry(head, struct inode, i_list); list_del(&inode->i_list); + slablist_add(inode, slablist); + if (inode->i_data.nrpages) truncate_inode_pages(&inode->i_data, 0); clear_inode(inode); @@ -356,6 +398,7 @@ int invalidate_inodes(struct super_block { int busy; LIST_HEAD(throw_away); + struct kmem_cache **slablist = slablist_alloc(); mutex_lock(&iprune_mutex); spin_lock(&inode_lock); @@ -363,9 +406,10 @@ int invalidate_inodes(struct super_block busy = invalidate_list(&sb->s_inodes, &throw_away); spin_unlock(&inode_lock); - dispose_list(&throw_away); + dispose_list(&throw_away, slablist); mutex_unlock(&iprune_mutex); - + if (sb->ops->shrink) + sb->ops->shrink(); return busy; } @@ -403,6 +447,7 @@ static void prune_icache(int nr_to_scan) int nr_pruned = 0; int nr_scanned; unsigned long reap = 0; + struct kmem_cache **slablist = slablist_alloc(); mutex_lock(&iprune_mutex); spin_lock(&inode_lock); @@ -444,8 +489,12 @@ static void prune_icache(int nr_to_scan) __count_vm_events(PGINODESTEAL, reap); spin_unlock(&inode_lock); - dispose_list(&freeable); + dispose_list(&freeable, slablist); mutex_unlock(&iprune_mutex); + /* + * We need to compress memory of all inode caches we have touched + */ + slablist_shrink(slablist); } /* @@ -1361,6 +1410,96 @@ static int __init set_ihash_entries(char } __setup("ihash_entries=", set_ihash_entries); +static void *get_inodes(struct kmem_cache *s, int nr, void **v) +{ + int i; + + spin_lock(&inode_lock); + for (i = 0; i < nr; i++) { + struct inode *inode = v[i]; + + if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) + v[i] = NULL; + else + __iget(inode); + } + spin_unlock(&inode_lock); + return NULL; +} + +/* + * Function for filesystems that embedd struct inode into their own + * structures. The offset is the offset of the struct inode in the fs inode. + */ +void *fs_get_inodes(struct kmem_cache *s, int nr, void **v, unsigned long offset) +{ + int i; + + for (i = 0; i < nr; i++) + v[i] += offset; + + return get_inodes(s, nr, v); +} +EXPORT_SYMBOL(fs_get_inodes); + +void kick_inodes(struct kmem_cache *s, int nr, void **v, void *private) +{ + struct inode *inode; + int i; + int abort = 0; + LIST_HEAD(freeable); + struct super_block *sb; + + for (i = 0; i < nr; i++) { + inode = v[i]; + if (!inode) + continue; + + if (inode_has_buffers(inode) || inode->i_data.nrpages) { + if (remove_inode_buffers(inode)) + invalidate_mapping_pages(&inode->i_data, + 0, -1); + } + + if (inode->i_state & I_DIRTY) + write_inode_now(inode, 1); + + if (atomic_read(&inode->i_count) > 1) + d_prune_aliases(inode); + } + + mutex_lock(&iprune_mutex); + for (i = 0; i < nr; i++) { + inode = v[i]; + if (!inode) + continue; + + sb = inode->i_sb; + iput(inode); + if (abort || !(sb->s_flags & MS_ACTIVE)) + continue; + + spin_lock(&inode_lock); + if (!can_unuse(inode)) { + abort = 1; + spin_unlock(&inode_lock); + continue; + } + list_move(&inode->i_list, &freeable); + inode->i_state |= I_FREEING; + inodes_stat.nr_unused--; + spin_unlock(&inode_lock); + } + dispose_list(&freeable, NULL); + mutex_unlock(&iprune_mutex); +} +EXPORT_SYMBOL(kick_inodes); + +static struct kmem_cache_ops inode_kmem_cache_ops = { + .get = get_inodes, + .kick = kick_inodes +}; + /* * Initialize the waitqueues and inode hash table. */ @@ -1399,7 +1538,7 @@ void __init inode_init(unsigned long mem (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| SLAB_MEM_SPREAD), init_once, - NULL); + &inode_kmem_cache_ops); register_shrinker(&icache_shrinker); /* Hash may have been set up in inode_init_early */ Index: slub/include/linux/fs.h =================================================================== --- slub.orig/include/linux/fs.h 2007-05-18 18:27:13.000000000 -0700 +++ slub/include/linux/fs.h 2007-05-18 18:27:27.000000000 -0700 @@ -1608,6 +1608,11 @@ static inline void insert_inode_hash(str __insert_inode_hash(inode, inode->i_ino); } +/* Helpers to realize inode defrag support in filesystems */ +extern void kick_inodes(struct kmem_cache *, int, void **, void *); +extern void *fs_get_inodes(struct kmem_cache *, int nr, void **, + unsigned long offset); + extern struct file * get_empty_filp(void); extern void file_move(struct file *f, struct list_head *list); extern void file_kill(struct file *f);