ext4: akpm's locking hack to fix locking delays This is a port of the following patch from Andrew Morton to ext4: http://lkml.org/lkml/2008/10/3/22 This fixes a major contention problem in do_get_write_access() when a buffer is modified in both the current and committing transaction. Signed-off-by: "Theodore Ts'o" Cc: akpm@linux-foundation.org diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0aea14c..f039c97 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -543,6 +543,7 @@ do { \ #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ +#define EXT4_MOUNT_AKPM_LOCK_HACK 0x20000000 /* akpm lock hack */ /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 22c4fde..04ab984 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -830,6 +830,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",journal_async_commit"); if (test_opt(sb, NOBH)) seq_puts(seq, ",nobh"); + if (test_opt(sb, AKPM_LOCK_HACK)) + seq_puts(seq, ",akpm_lock_hack"); if (test_opt(sb, I_VERSION)) seq_puts(seq, ",i_version"); if (!test_opt(sb, DELALLOC)) @@ -1012,7 +1014,7 @@ enum { Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_stripe, Opt_delalloc, Opt_nodelalloc, - Opt_inode_readahead_blks, Opt_journal_ioprio + Opt_inode_readahead_blks, Opt_journal_ioprio, Opt_akpm_lock_hack }; static const match_table_t tokens = { @@ -1072,6 +1074,7 @@ static const match_table_t tokens = { {Opt_nodelalloc, "nodelalloc"}, {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, {Opt_journal_ioprio, "journal_ioprio=%u"}, + {Opt_akpm_lock_hack, "akpm_lock_hack"}, {Opt_err, NULL}, }; @@ -1473,6 +1476,9 @@ set_qf_format: *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, option); break; + case Opt_akpm_lock_hack: + set_opt(sbi->s_mount_opt, AKPM_LOCK_HACK); + break; default: printk(KERN_ERR "EXT4-fs: Unrecognized mount option \"%s\" " @@ -2656,6 +2662,10 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; else journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; + if (test_opt(sb, AKPM_LOCK_HACK)) + journal->j_flags |= JBD2_LOCK_HACK; + else + journal->j_flags &= ~JBD2_LOCK_HACK; spin_unlock(&journal->j_state_lock); } diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 48c21ba..add3be0 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -549,6 +549,7 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, int error; char *frozen_buffer = NULL; int need_copy = 0; + int locked = 0; if (is_handle_aborted(handle)) return -EROFS; @@ -564,7 +565,13 @@ repeat: /* @@@ Need to check for errors here at some point. */ - lock_buffer(bh); + if (journal->j_flags & JBD2_LOCK_HACK) { + if (trylock_buffer(bh)) + locked = 1; /* lolz */ + } else { + lock_buffer(bh); + locked = 1; + } jbd_lock_bh_state(bh); /* We now hold the buffer lock so it is safe to query the buffer @@ -603,7 +610,8 @@ repeat: jbd_unexpected_dirty_buffer(jh); } - unlock_buffer(bh); + if (locked) + unlock_buffer(bh); error = -EROFS; if (is_handle_aborted(handle)) { diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index adef1c9..4748e5e 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1013,6 +1013,7 @@ struct journal_s #define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file * data write error in ordered * mode */ +#define JBD2_LOCK_HACK 0x080 /* akpm's locking hack */ /* * Function declarations for the journaling transaction and buffer