From cmm@us.ibm.com Tue Sep 11 18:27:07 2007 Date: Tue, 11 Sep 2007 18:27:01 -0700 From: Mingming Cao To: Christoph Lameter Subject: Re: JBD fixes Hi Christoph, Just back home last night, sorry for the delaying. The problem is in fs/jbd/transaction.c, where new_transaction is allocated via jbd_kmalloc but freedd by kfree. use jbd_free instead. Attached is the updated patch. it also renames the jbd_slab_alloc to jbd_alloc().I have compile tested, haven't get a chance to test it with your latest git tree yet. But I think that's the problem. After tested I think we should push this patch to lkml and Linus separately. regards, Mingming Date: Thu, 6 Sep 2007 05:47:15 -0700 (PDT) From: Christoph Lameter Subject: JBD fixes JBD: Replace slab allocations with page cache allocations JBD should not pass slab pages down to the block layer. Use page allocator pages instead. This will also prepare JBD for the large blocksize patchset. Signed-off-by: Christoph Lameter Signed-off-by: Mingming Cao --- fs/jbd/commit.c | 6 +-- fs/jbd/journal.c | 97 ++------------------------------------------------ fs/jbd/transaction.c | 10 ++--- fs/jbd2/commit.c | 6 +-- fs/jbd2/journal.c | 97 ++------------------------------------------------ fs/jbd2/transaction.c | 10 ++--- include/linux/jbd.h | 19 ++++++++- include/linux/jbd2.h | 19 ++++++++- 8 files changed, 56 insertions(+), 208 deletions(-) Index: linux-2.6/fs/jbd/journal.c =================================================================== --- linux-2.6.orig/fs/jbd/journal.c 2007-09-12 19:53:25.000000000 -0700 +++ linux-2.6/fs/jbd/journal.c 2007-09-12 19:54:31.000000000 -0700 @@ -83,7 +83,6 @@ EXPORT_SYMBOL(journal_force_commit); static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); static void __journal_abort_soft (journal_t *journal, int errno); -static int journal_create_jbd_slab(size_t slab_size); /* * Helper function used to manage commit timeouts @@ -334,10 +333,10 @@ repeat: char *tmp; jbd_unlock_bh_state(bh_in); - tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS); + tmp = jbd_alloc(bh_in->b_size, GFP_NOFS); jbd_lock_bh_state(bh_in); if (jh_in->b_frozen_data) { - jbd_slab_free(tmp, bh_in->b_size); + jbd_free(tmp, bh_in->b_size); goto repeat; } @@ -1095,13 +1094,6 @@ int journal_load(journal_t *journal) } } - /* - * Create a slab for this blocksize - */ - err = journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize)); - if (err) - return err; - /* Let the recovery code check whether it needs to recover any * data from the journal. */ if (journal_recover(journal)) @@ -1615,86 +1607,6 @@ int journal_blocks_per_page(struct inode } /* - * Simple support for retrying memory allocations. Introduced to help to - * debug different VM deadlock avoidance strategies. - */ -void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry) -{ - return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0)); -} - -/* - * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed - * and allocate frozen and commit buffers from these slabs. - * - * Reason for doing this is to avoid, SLAB_DEBUG - since it could - * cause bh to cross page boundary. - */ - -#define JBD_MAX_SLABS 5 -#define JBD_SLAB_INDEX(size) (size >> 11) - -static struct kmem_cache *jbd_slab[JBD_MAX_SLABS]; -static const char *jbd_slab_names[JBD_MAX_SLABS] = { - "jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k" -}; - -static void journal_destroy_jbd_slabs(void) -{ - int i; - - for (i = 0; i < JBD_MAX_SLABS; i++) { - if (jbd_slab[i]) - kmem_cache_destroy(jbd_slab[i]); - jbd_slab[i] = NULL; - } -} - -static int journal_create_jbd_slab(size_t slab_size) -{ - int i = JBD_SLAB_INDEX(slab_size); - - BUG_ON(i >= JBD_MAX_SLABS); - - /* - * Check if we already have a slab created for this size - */ - if (jbd_slab[i]) - return 0; - - /* - * Create a slab and force alignment to be same as slabsize - - * this will make sure that allocations won't cross the page - * boundary. - */ - jbd_slab[i] = kmem_cache_create(jbd_slab_names[i], - slab_size, slab_size, 0, NULL); - if (!jbd_slab[i]) { - printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n"); - return -ENOMEM; - } - return 0; -} - -void * jbd_slab_alloc(size_t size, gfp_t flags) -{ - int idx; - - idx = JBD_SLAB_INDEX(size); - BUG_ON(jbd_slab[idx] == NULL); - return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL); -} - -void jbd_slab_free(void *ptr, size_t size) -{ - int idx; - - idx = JBD_SLAB_INDEX(size); - BUG_ON(jbd_slab[idx] == NULL); - kmem_cache_free(jbd_slab[idx], ptr); -} - -/* * Journal_head storage management */ static struct kmem_cache *journal_head_cache; @@ -1881,13 +1793,13 @@ static void __journal_remove_journal_hea printk(KERN_WARNING "%s: freeing " "b_frozen_data\n", __FUNCTION__); - jbd_slab_free(jh->b_frozen_data, bh->b_size); + jbd_free(jh->b_frozen_data, bh->b_size); } if (jh->b_committed_data) { printk(KERN_WARNING "%s: freeing " "b_committed_data\n", __FUNCTION__); - jbd_slab_free(jh->b_committed_data, bh->b_size); + jbd_free(jh->b_committed_data, bh->b_size); } bh->b_private = NULL; jh->b_bh = NULL; /* debug, really */ @@ -2042,7 +1954,6 @@ static void journal_destroy_caches(void) journal_destroy_revoke_caches(); journal_destroy_journal_head_cache(); journal_destroy_handle_cache(); - journal_destroy_jbd_slabs(); } static int __init journal_init(void) Index: linux-2.6/include/linux/jbd.h =================================================================== --- linux-2.6.orig/include/linux/jbd.h 2007-09-12 19:53:25.000000000 -0700 +++ linux-2.6/include/linux/jbd.h 2007-09-12 19:54:31.000000000 -0700 @@ -71,9 +71,22 @@ extern int journal_enable_debug; #define jbd_debug(f, a...) /**/ #endif -extern void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry); -extern void * jbd_slab_alloc(size_t size, gfp_t flags); -extern void jbd_slab_free(void *ptr, size_t size); +static inline void * __jbd_kmalloc (const char *where, size_t size, + gfp_t flags, int retry) +{ + return (void *)__get_free_pages(flags | (retry ? __GFP_NOFAIL : 0), + get_order(size)); +} + +static inline void *jbd_alloc(size_t size, gfp_t flags) +{ + return (void *)__get_free_pages(flags, get_order(size)); +} + +static inline void jbd_free(void *ptr, size_t size) +{ + free_pages((unsigned long)ptr, get_order(size)); +}; #define jbd_kmalloc(size, flags) \ __jbd_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry) Index: linux-2.6/include/linux/jbd2.h =================================================================== --- linux-2.6.orig/include/linux/jbd2.h 2007-09-12 19:53:25.000000000 -0700 +++ linux-2.6/include/linux/jbd2.h 2007-09-12 19:54:31.000000000 -0700 @@ -71,9 +71,22 @@ extern u8 jbd2_journal_enable_debug; #define jbd_debug(f, a...) /**/ #endif -extern void * __jbd2_kmalloc (const char *where, size_t size, gfp_t flags, int retry); -extern void * jbd2_slab_alloc(size_t size, gfp_t flags); -extern void jbd2_slab_free(void *ptr, size_t size); +static inline void * __jbd2_kmalloc (const char *where, size_t size, + gfp_t flags, int retry) +{ + return (void *)__get_free_pages(flags | (retry ? __GFP_NOFAIL : 0), + get_order(size)); +} + +static inline void *jbd2_alloc(size_t size, gfp_t flags) +{ + return (void *)__get_free_pages(flags, get_order(size)); +} + +static inline void jbd2_free(void *ptr, size_t size) +{ + free_pages((unsigned long)ptr, get_order(size)); +}; #define jbd_kmalloc(size, flags) \ __jbd2_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry) Index: linux-2.6/fs/jbd2/journal.c =================================================================== --- linux-2.6.orig/fs/jbd2/journal.c 2007-09-12 19:53:25.000000000 -0700 +++ linux-2.6/fs/jbd2/journal.c 2007-09-12 19:54:31.000000000 -0700 @@ -84,7 +84,6 @@ EXPORT_SYMBOL(jbd2_journal_force_commit) static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); static void __journal_abort_soft (journal_t *journal, int errno); -static int jbd2_journal_create_jbd_slab(size_t slab_size); /* * Helper function used to manage commit timeouts @@ -335,10 +334,10 @@ repeat: char *tmp; jbd_unlock_bh_state(bh_in); - tmp = jbd2_slab_alloc(bh_in->b_size, GFP_NOFS); + tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); jbd_lock_bh_state(bh_in); if (jh_in->b_frozen_data) { - jbd2_slab_free(tmp, bh_in->b_size); + jbd2_free(tmp, bh_in->b_size); goto repeat; } @@ -1096,13 +1095,6 @@ int jbd2_journal_load(journal_t *journal } } - /* - * Create a slab for this blocksize - */ - err = jbd2_journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize)); - if (err) - return err; - /* Let the recovery code check whether it needs to recover any * data from the journal. */ if (jbd2_journal_recover(journal)) @@ -1627,86 +1619,6 @@ size_t journal_tag_bytes(journal_t *jour } /* - * Simple support for retrying memory allocations. Introduced to help to - * debug different VM deadlock avoidance strategies. - */ -void * __jbd2_kmalloc (const char *where, size_t size, gfp_t flags, int retry) -{ - return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0)); -} - -/* - * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed - * and allocate frozen and commit buffers from these slabs. - * - * Reason for doing this is to avoid, SLAB_DEBUG - since it could - * cause bh to cross page boundary. - */ - -#define JBD_MAX_SLABS 5 -#define JBD_SLAB_INDEX(size) (size >> 11) - -static struct kmem_cache *jbd_slab[JBD_MAX_SLABS]; -static const char *jbd_slab_names[JBD_MAX_SLABS] = { - "jbd2_1k", "jbd2_2k", "jbd2_4k", NULL, "jbd2_8k" -}; - -static void jbd2_journal_destroy_jbd_slabs(void) -{ - int i; - - for (i = 0; i < JBD_MAX_SLABS; i++) { - if (jbd_slab[i]) - kmem_cache_destroy(jbd_slab[i]); - jbd_slab[i] = NULL; - } -} - -static int jbd2_journal_create_jbd_slab(size_t slab_size) -{ - int i = JBD_SLAB_INDEX(slab_size); - - BUG_ON(i >= JBD_MAX_SLABS); - - /* - * Check if we already have a slab created for this size - */ - if (jbd_slab[i]) - return 0; - - /* - * Create a slab and force alignment to be same as slabsize - - * this will make sure that allocations won't cross the page - * boundary. - */ - jbd_slab[i] = kmem_cache_create(jbd_slab_names[i], - slab_size, slab_size, 0, NULL); - if (!jbd_slab[i]) { - printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n"); - return -ENOMEM; - } - return 0; -} - -void * jbd2_slab_alloc(size_t size, gfp_t flags) -{ - int idx; - - idx = JBD_SLAB_INDEX(size); - BUG_ON(jbd_slab[idx] == NULL); - return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL); -} - -void jbd2_slab_free(void *ptr, size_t size) -{ - int idx; - - idx = JBD_SLAB_INDEX(size); - BUG_ON(jbd_slab[idx] == NULL); - kmem_cache_free(jbd_slab[idx], ptr); -} - -/* * Journal_head storage management */ static struct kmem_cache *jbd2_journal_head_cache; @@ -1893,13 +1805,13 @@ static void __journal_remove_journal_hea printk(KERN_WARNING "%s: freeing " "b_frozen_data\n", __FUNCTION__); - jbd2_slab_free(jh->b_frozen_data, bh->b_size); + jbd2_free(jh->b_frozen_data, bh->b_size); } if (jh->b_committed_data) { printk(KERN_WARNING "%s: freeing " "b_committed_data\n", __FUNCTION__); - jbd2_slab_free(jh->b_committed_data, bh->b_size); + jbd2_free(jh->b_committed_data, bh->b_size); } bh->b_private = NULL; jh->b_bh = NULL; /* debug, really */ @@ -2040,7 +1952,6 @@ static void jbd2_journal_destroy_caches( jbd2_journal_destroy_revoke_caches(); jbd2_journal_destroy_jbd2_journal_head_cache(); jbd2_journal_destroy_handle_cache(); - jbd2_journal_destroy_jbd_slabs(); } static int __init journal_init(void) Index: linux-2.6/fs/jbd/commit.c =================================================================== --- linux-2.6.orig/fs/jbd/commit.c 2007-09-12 19:53:25.000000000 -0700 +++ linux-2.6/fs/jbd/commit.c 2007-09-12 19:54:31.000000000 -0700 @@ -375,7 +375,7 @@ void journal_commit_transaction(journal_ struct buffer_head *bh = jh2bh(jh); jbd_lock_bh_state(bh); - jbd_slab_free(jh->b_committed_data, bh->b_size); + jbd_free(jh->b_committed_data, bh->b_size); jh->b_committed_data = NULL; jbd_unlock_bh_state(bh); } @@ -792,14 +792,14 @@ restart_loop: * Otherwise, we can just throw away the frozen data now. */ if (jh->b_committed_data) { - jbd_slab_free(jh->b_committed_data, bh->b_size); + jbd_free(jh->b_committed_data, bh->b_size); jh->b_committed_data = NULL; if (jh->b_frozen_data) { jh->b_committed_data = jh->b_frozen_data; jh->b_frozen_data = NULL; } } else if (jh->b_frozen_data) { - jbd_slab_free(jh->b_frozen_data, bh->b_size); + jbd_free(jh->b_frozen_data, bh->b_size); jh->b_frozen_data = NULL; } Index: linux-2.6/fs/jbd2/commit.c =================================================================== --- linux-2.6.orig/fs/jbd2/commit.c 2007-09-12 19:53:25.000000000 -0700 +++ linux-2.6/fs/jbd2/commit.c 2007-09-12 19:54:31.000000000 -0700 @@ -384,7 +384,7 @@ void jbd2_journal_commit_transaction(jou struct buffer_head *bh = jh2bh(jh); jbd_lock_bh_state(bh); - jbd2_slab_free(jh->b_committed_data, bh->b_size); + jbd2_free(jh->b_committed_data, bh->b_size); jh->b_committed_data = NULL; jbd_unlock_bh_state(bh); } @@ -801,14 +801,14 @@ restart_loop: * Otherwise, we can just throw away the frozen data now. */ if (jh->b_committed_data) { - jbd2_slab_free(jh->b_committed_data, bh->b_size); + jbd2_free(jh->b_committed_data, bh->b_size); jh->b_committed_data = NULL; if (jh->b_frozen_data) { jh->b_committed_data = jh->b_frozen_data; jh->b_frozen_data = NULL; } } else if (jh->b_frozen_data) { - jbd2_slab_free(jh->b_frozen_data, bh->b_size); + jbd2_free(jh->b_frozen_data, bh->b_size); jh->b_frozen_data = NULL; } Index: linux-2.6/fs/jbd/transaction.c =================================================================== --- linux-2.6.orig/fs/jbd/transaction.c 2007-09-12 19:53:25.000000000 -0700 +++ linux-2.6/fs/jbd/transaction.c 2007-09-12 19:54:31.000000000 -0700 @@ -229,7 +229,7 @@ repeat_locked: spin_unlock(&journal->j_state_lock); out: if (unlikely(new_transaction)) /* It's usually NULL */ - kfree(new_transaction); + jbd_free(new_transaction, sizeof(*new_transaction)); return ret; } @@ -668,7 +668,7 @@ repeat: JBUFFER_TRACE(jh, "allocate memory for buffer"); jbd_unlock_bh_state(bh); frozen_buffer = - jbd_slab_alloc(jh2bh(jh)->b_size, + jbd_alloc(jh2bh(jh)->b_size, GFP_NOFS); if (!frozen_buffer) { printk(KERN_EMERG @@ -728,7 +728,7 @@ done: out: if (unlikely(frozen_buffer)) /* It's usually NULL */ - jbd_slab_free(frozen_buffer, bh->b_size); + jbd_free(frozen_buffer, bh->b_size); JBUFFER_TRACE(jh, "exit"); return error; @@ -881,7 +881,7 @@ int journal_get_undo_access(handle_t *ha repeat: if (!jh->b_committed_data) { - committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS); + committed_data = jbd_alloc(jh2bh(jh)->b_size, GFP_NOFS); if (!committed_data) { printk(KERN_EMERG "%s: No memory for committed data\n", __FUNCTION__); @@ -908,7 +908,7 @@ repeat: out: journal_put_journal_head(jh); if (unlikely(committed_data)) - jbd_slab_free(committed_data, bh->b_size); + jbd_free(committed_data, bh->b_size); return err; } Index: linux-2.6/fs/jbd2/transaction.c =================================================================== --- linux-2.6.orig/fs/jbd2/transaction.c 2007-09-12 19:53:25.000000000 -0700 +++ linux-2.6/fs/jbd2/transaction.c 2007-09-12 19:54:31.000000000 -0700 @@ -229,7 +229,7 @@ repeat_locked: spin_unlock(&journal->j_state_lock); out: if (unlikely(new_transaction)) /* It's usually NULL */ - kfree(new_transaction); + jbd2_free(new_transaction, sizeof(*new_transaction)); return ret; } @@ -668,7 +668,7 @@ repeat: JBUFFER_TRACE(jh, "allocate memory for buffer"); jbd_unlock_bh_state(bh); frozen_buffer = - jbd2_slab_alloc(jh2bh(jh)->b_size, + jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS); if (!frozen_buffer) { printk(KERN_EMERG @@ -728,7 +728,7 @@ done: out: if (unlikely(frozen_buffer)) /* It's usually NULL */ - jbd2_slab_free(frozen_buffer, bh->b_size); + jbd2_free(frozen_buffer, bh->b_size); JBUFFER_TRACE(jh, "exit"); return error; @@ -881,7 +881,7 @@ int jbd2_journal_get_undo_access(handle_ repeat: if (!jh->b_committed_data) { - committed_data = jbd2_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS); + committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS); if (!committed_data) { printk(KERN_EMERG "%s: No memory for committed data\n", __FUNCTION__); @@ -908,7 +908,7 @@ repeat: out: jbd2_journal_put_journal_head(jh); if (unlikely(committed_data)) - jbd2_slab_free(committed_data, bh->b_size); + jbd2_free(committed_data, bh->b_size); return err; }