Index: linux-2.6/fs/super.c
===================================================================
--- linux-2.6.orig/fs/super.c
+++ linux-2.6/fs/super.c
@@ -62,10 +62,41 @@ static struct super_block *alloc_super(s
 			s = NULL;
 			goto out;
 		}
+#ifdef CONFIG_SMP
+		s->s_files = alloc_percpu(struct list_head);
+		if (!s->s_files) {
+			security_sb_free(s);
+			kfree(s);
+			s = NULL;
+			goto out;
+		} else {
+			int i;
+
+			for_each_possible_cpu(i)
+				INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i));
+		}
+#else
 		INIT_LIST_HEAD(&s->s_files);
+#endif
+#ifdef CONFIG_SMP
+		s->s_inodes = alloc_percpu(struct list_head);
+		if (!s->s_inodes) {
+			free_percpu(s->s_files);
+			security_sb_free(s);
+			kfree(s);
+			s = NULL;
+			goto out;
+		} else {
+			int i;
+
+			for_each_possible_cpu(i)
+				INIT_LIST_HEAD(per_cpu_ptr(s->s_inodes, i));
+		}
+#else
+		INIT_LIST_HEAD(&s->s_inodes);
+#endif
 		INIT_LIST_HEAD(&s->s_instances);
 		INIT_HLIST_HEAD(&s->s_anon);
-		INIT_LIST_HEAD(&s->s_inodes);
 		INIT_LIST_HEAD(&s->s_dentry_lru);
 		init_rwsem(&s->s_umount);
 		mutex_init(&s->s_lock);
@@ -117,6 +148,10 @@ out:
  */
 static inline void destroy_super(struct super_block *s)
 {
+#ifdef CONFIG_SMP
+	free_percpu(s->s_inodes);
+	free_percpu(s->s_files);
+#endif
 	security_sb_free(s);
 	kfree(s->s_subtype);
 	kfree(s->s_options);
@@ -568,7 +603,7 @@ out:
 int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 {
 	int retval;
-	int remount_rw;
+	int remount_rw, remount_ro;
 
 	if (sb->s_frozen != SB_UNFROZEN)
 		return -EBUSY;
@@ -583,9 +618,12 @@ int do_remount_sb(struct super_block *sb
 	shrink_dcache_sb(sb);
 	sync_filesystem(sb);
 
+	remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY);
+	remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY);
+
 	/* If we are remounting RDONLY and current sb is read/write,
 	   make sure there are no rw files opened */
-	if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY)) {
+	if (remount_ro) {
 		if (force)
 			mark_files_ro(sb);
 		else if (!fs_may_remount_ro(sb))
@@ -594,7 +632,6 @@ int do_remount_sb(struct super_block *sb
 		if (retval < 0 && retval != -ENOSYS)
 			return -EBUSY;
 	}
-	remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY);
 
 	if (sb->s_op->remount_fs) {
 		retval = sb->s_op->remount_fs(sb, &flags, data);
@@ -604,6 +641,14 @@ int do_remount_sb(struct super_block *sb
 	sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
 	if (remount_rw)
 		vfs_dq_quota_on_remount(sb);
+	 /* Some filesystems modify their metadata via some other path
+	    than the bdev buffer cache (eg. use a private mapping, or
+	    directories in pagecache, etc). Also file data modifications
+	    go via their own mappings. So If we try to mount readonly
+	    then copy the filesystem from bdev, we could get stale data,
+	    so invalidate it to give a best effort at coherency. */
+	if (remount_ro && sb->s_bdev)
+		invalidate_bdev(sb->s_bdev);
 	return 0;
 }
 
Index: linux-2.6/fs/pipe.c
===================================================================
--- linux-2.6.orig/fs/pipe.c
+++ linux-2.6/fs/pipe.c
@@ -887,17 +887,6 @@ void free_pipe_info(struct inode *inode)
 }
 
 static struct vfsmount *pipe_mnt __read_mostly;
-static int pipefs_delete_dentry(struct dentry *dentry)
-{
-	/*
-	 * At creation time, we pretended this dentry was hashed
-	 * (by clearing DCACHE_UNHASHED bit in d_flags)
-	 * At delete time, we restore the truth : not hashed.
-	 * (so that dput() can proceed correctly)
-	 */
-	dentry->d_flags |= DCACHE_UNHASHED;
-	return 0;
-}
 
 /*
  * pipefs_dname() is called from d_path().
@@ -909,7 +898,6 @@ static char *pipefs_dname(struct dentry
 }
 
 static const struct dentry_operations pipefs_dentry_operations = {
-	.d_delete	= pipefs_delete_dentry,
 	.d_dname	= pipefs_dname,
 };
 
@@ -964,17 +952,12 @@ struct file *create_write_pipe(int flags
 		goto err;
 
 	err = -ENOMEM;
-	dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name);
+	dentry = d_alloc(NULL, &name);
 	if (!dentry)
 		goto err_inode;
-
+	dentry->d_parent = dentry;
+	dentry->d_flags |= DCACHE_DISCONNECTED;
 	dentry->d_op = &pipefs_dentry_operations;
-	/*
-	 * We dont want to publish this dentry into global dentry hash table.
-	 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
-	 * This permits a working /proc/$pid/fd/XXX on pipes
-	 */
-	dentry->d_flags &= ~DCACHE_UNHASHED;
 	d_instantiate(dentry, inode);
 
 	err = -ENFILE;
Index: linux-2.6/net/socket.c
===================================================================
--- linux-2.6.orig/net/socket.c
+++ linux-2.6/net/socket.c
@@ -257,12 +257,19 @@ static struct inode *sock_alloc_inode(st
 	return &ei->vfs_inode;
 }
 
-static void sock_destroy_inode(struct inode *inode)
+static void sock_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(sock_inode_cachep,
 			container_of(inode, struct socket_alloc, vfs_inode));
 }
 
+static void sock_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, sock_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct socket_alloc *ei = (struct socket_alloc *)foo;
@@ -306,18 +313,6 @@ static struct file_system_type sock_fs_t
 	.kill_sb =	kill_anon_super,
 };
 
-static int sockfs_delete_dentry(struct dentry *dentry)
-{
-	/*
-	 * At creation time, we pretended this dentry was hashed
-	 * (by clearing DCACHE_UNHASHED bit in d_flags)
-	 * At delete time, we restore the truth : not hashed.
-	 * (so that dput() can proceed correctly)
-	 */
-	dentry->d_flags |= DCACHE_UNHASHED;
-	return 0;
-}
-
 /*
  * sockfs_dname() is called from d_path().
  */
@@ -328,7 +323,6 @@ static char *sockfs_dname(struct dentry
 }
 
 static const struct dentry_operations sockfs_dentry_operations = {
-	.d_delete = sockfs_delete_dentry,
 	.d_dname  = sockfs_dname,
 };
 
@@ -372,17 +366,12 @@ static int sock_attach_fd(struct socket
 	struct dentry *dentry;
 	struct qstr name = { .name = "" };
 
-	dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
+	dentry = d_alloc(NULL, &name);
 	if (unlikely(!dentry))
 		return -ENOMEM;
-
+	dentry->d_parent = dentry;
+	dentry->d_flags |= DCACHE_DISCONNECTED;
 	dentry->d_op = &sockfs_dentry_operations;
-	/*
-	 * We dont want to push this dentry into global dentry hash table.
-	 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
-	 * This permits a working /proc/$pid/fd/XXX on sockets
-	 */
-	dentry->d_flags &= ~DCACHE_UNHASHED;
 	d_instantiate(dentry, SOCK_INODE(sock));
 
 	sock->file = file;
Index: linux-2.6/fs/anon_inodes.c
===================================================================
--- linux-2.6.orig/fs/anon_inodes.c
+++ linux-2.6/fs/anon_inodes.c
@@ -35,24 +35,11 @@ static int anon_inodefs_get_sb(struct fi
 			     mnt);
 }
 
-static int anon_inodefs_delete_dentry(struct dentry *dentry)
-{
-	/*
-	 * We faked vfs to believe the dentry was hashed when we created it.
-	 * Now we restore the flag so that dput() will work correctly.
-	 */
-	dentry->d_flags |= DCACHE_UNHASHED;
-	return 1;
-}
-
 static struct file_system_type anon_inode_fs_type = {
 	.name		= "anon_inodefs",
 	.get_sb		= anon_inodefs_get_sb,
 	.kill_sb	= kill_anon_super,
 };
-static const struct dentry_operations anon_inodefs_dentry_operations = {
-	.d_delete	= anon_inodefs_delete_dentry,
-};
 
 /*
  * nop .set_page_dirty method so that people can use .page_mkwrite on
@@ -106,20 +93,21 @@ struct file *anon_inode_getfile(const ch
 	this.name = name;
 	this.len = strlen(name);
 	this.hash = 0;
-	dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this);
+	dentry = d_alloc(NULL, &this);
 	if (!dentry)
 		goto err_module;
+	dentry->d_parent = dentry;
+	dentry->d_flags |= DCACHE_DISCONNECTED;
 
 	/*
 	 * We know the anon_inode inode count is always greater than zero,
 	 * so we can avoid doing an igrab() and we can use an open-coded
 	 * atomic_inc().
 	 */
-	atomic_inc(&anon_inode_inode->i_count);
+	spin_lock(&anon_inode_inode->i_lock);
+	anon_inode_inode->i_count++;
+	spin_unlock(&anon_inode_inode->i_lock);
 
-	dentry->d_op = &anon_inodefs_dentry_operations;
-	/* Do not publish this dentry inside the global dentry hash table */
-	dentry->d_flags &= ~DCACHE_UNHASHED;
 	d_instantiate(dentry, anon_inode_inode);
 
 	error = -ENFILE;
Index: linux-2.6/fs/nfs/write.c
===================================================================
--- linux-2.6.orig/fs/nfs/write.c
+++ linux-2.6/fs/nfs/write.c
@@ -377,7 +377,7 @@ static int nfs_inode_add_request(struct
 	error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
 	BUG_ON(error);
 	if (!nfsi->npages) {
-		igrab(inode);
+		__iget(inode);
 		if (nfs_have_delegation(inode, FMODE_WRITE))
 			nfsi->change_attr++;
 	}
Index: linux-2.6/drivers/char/pty.c
===================================================================
--- linux-2.6.orig/drivers/char/pty.c
+++ linux-2.6/drivers/char/pty.c
@@ -654,7 +654,11 @@ static int __ptmx_open(struct inode *ino
 
 	set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
 	filp->private_data = tty;
-	file_move(filp, &tty->tty_files);
+
+	file_sb_list_del(filp); /* __dentry_open has put it on the sb list */
+	spin_lock(&tty_files_lock);
+	list_add(&filp->f_u.fu_list, &tty->tty_files);
+	spin_unlock(&tty_files_lock);
 
 	retval = devpts_pty_new(inode, tty->link);
 	if (retval)
Index: linux-2.6/drivers/char/tty_io.c
===================================================================
--- linux-2.6.orig/drivers/char/tty_io.c
+++ linux-2.6/drivers/char/tty_io.c
@@ -136,6 +136,9 @@ LIST_HEAD(tty_drivers);			/* linked list
 DEFINE_MUTEX(tty_mutex);
 EXPORT_SYMBOL(tty_mutex);
 
+/* Spinlock to protect the tty->tty_files list */
+DEFINE_SPINLOCK(tty_files_lock);
+
 static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *);
 static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *);
 ssize_t redirected_tty_write(struct file *, const char __user *,
@@ -235,11 +238,11 @@ static int check_tty_count(struct tty_st
 	struct list_head *p;
 	int count = 0;
 
-	file_list_lock();
+	spin_lock(&tty_files_lock);
 	list_for_each(p, &tty->tty_files) {
 		count++;
 	}
-	file_list_unlock();
+	spin_unlock(&tty_files_lock);
 	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
 	    tty->driver->subtype == PTY_TYPE_SLAVE &&
 	    tty->link && tty->link->count)
@@ -517,7 +520,7 @@ static void do_tty_hangup(struct work_st
 	spin_unlock(&redirect_lock);
 
 	check_tty_count(tty, "do_tty_hangup");
-	file_list_lock();
+	spin_lock(&tty_files_lock);
 	/* This breaks for file handles being sent over AF_UNIX sockets ? */
 	list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) {
 		if (filp->f_op->write == redirected_tty_write)
@@ -528,7 +531,7 @@ static void do_tty_hangup(struct work_st
 		tty_fasync(-1, filp, 0);	/* can't block */
 		filp->f_op = &hung_up_tty_fops;
 	}
-	file_list_unlock();
+	spin_unlock(&tty_files_lock);
 
 	tty_ldisc_hangup(tty);
 
@@ -1404,9 +1407,9 @@ static void release_one_tty(struct work_
 	tty_driver_kref_put(driver);
 	module_put(driver->owner);
 
-	file_list_lock();
+	spin_lock(&tty_files_lock);
 	list_del_init(&tty->tty_files);
-	file_list_unlock();
+	spin_unlock(&tty_files_lock);
 
 	free_tty_struct(tty);
 }
@@ -1630,7 +1633,10 @@ void tty_release_dev(struct file *filp)
 	 *  - do_tty_hangup no longer sees this file descriptor as
 	 *    something that needs to be handled for hangups.
 	 */
-	file_kill(filp);
+	spin_lock(&tty_files_lock);
+	BUG_ON(list_empty(&filp->f_u.fu_list));
+	list_del_init(&filp->f_u.fu_list);
+	spin_unlock(&tty_files_lock);
 	filp->private_data = NULL;
 
 	/*
@@ -1788,7 +1794,11 @@ got_driver:
 		return PTR_ERR(tty);
 
 	filp->private_data = tty;
-	file_move(filp, &tty->tty_files);
+	BUG_ON(list_empty(&filp->f_u.fu_list));
+	file_sb_list_del(filp); /* __dentry_open has put it on the sb list */
+	spin_lock(&tty_files_lock);
+	list_add(&filp->f_u.fu_list, &tty->tty_files);
+	spin_unlock(&tty_files_lock);
 	check_tty_count(tty, "tty_open");
 	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
 	    tty->driver->subtype == PTY_TYPE_MASTER)
Index: linux-2.6/fs/file_table.c
===================================================================
--- linux-2.6.orig/fs/file_table.c
+++ linux-2.6/fs/file_table.c
@@ -22,6 +22,7 @@
 #include <linux/fsnotify.h>
 #include <linux/sysctl.h>
 #include <linux/percpu_counter.h>
+#include <linux/percpu.h>
 
 #include <asm/atomic.h>
 
@@ -30,8 +31,7 @@ struct files_stat_struct files_stat = {
 	.max_files = NR_FILE
 };
 
-/* public. Not pretty! */
-__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
+static DEFINE_PER_CPU(spinlock_t, files_cpulock);
 
 /* SLAB cache for file structures */
 static struct kmem_cache *filp_cachep __read_mostly;
@@ -285,7 +285,7 @@ void __fput(struct file *file)
 		cdev_put(inode->i_cdev);
 	fops_put(file->f_op);
 	put_pid(file->f_owner.pid);
-	file_kill(file);
+	file_sb_list_del(file);
 	if (file->f_mode & FMODE_WRITE)
 		drop_file_write_access(file);
 	file->f_path.dentry = NULL;
@@ -347,55 +347,112 @@ struct file *fget_light(unsigned int fd,
 	return file;
 }
 
-
 void put_filp(struct file *file)
 {
 	if (atomic_long_dec_and_test(&file->f_count)) {
 		security_file_free(file);
-		file_kill(file);
+		file_sb_list_del(file);
 		file_free(file);
 	}
 }
 
-void file_move(struct file *file, struct list_head *list)
+void file_sb_list_add(struct file *file, struct super_block *sb)
 {
-	if (!list)
-		return;
-	file_list_lock();
-	list_move(&file->f_u.fu_list, list);
-	file_list_unlock();
+	spinlock_t *lock;
+	struct list_head *list;
+#ifdef CONFIG_SMP
+	int cpu;
+#endif
+
+	lock = &get_cpu_var(files_cpulock);
+#ifdef CONFIG_SMP
+	cpu = smp_processor_id();
+	list = per_cpu_ptr(sb->s_files, cpu);
+	file->f_sb_list_cpu = cpu;
+#else
+	list = &sb->s_files;
+#endif
+	spin_lock(lock);
+	BUG_ON(!list_empty(&file->f_u.fu_list));
+	list_add(&file->f_u.fu_list, list);
+	spin_unlock(lock);
+	put_cpu_var(files_cpulock);
 }
 
-void file_kill(struct file *file)
+void file_sb_list_del(struct file *file)
 {
 	if (!list_empty(&file->f_u.fu_list)) {
-		file_list_lock();
+		spinlock_t *lock;
+
+#ifdef CONFIG_SMP
+		lock = &per_cpu(files_cpulock, file->f_sb_list_cpu);
+#else
+		lock = &__get_cpu_var(files_cpulock);
+#endif
+		spin_lock(lock);
 		list_del_init(&file->f_u.fu_list);
-		file_list_unlock();
+		spin_unlock(lock);
+	}
+}
+
+static void file_list_lock_all(void)
+{
+	int i;
+	int nr = 0;
+
+	for_each_possible_cpu(i) {
+		spinlock_t *lock;
+
+		lock = &per_cpu(files_cpulock, i);
+		spin_lock_nested(lock, nr);
+		nr++;
+	}
+}
+
+static void file_list_unlock_all(void)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		spinlock_t *lock;
+
+		lock = &per_cpu(files_cpulock, i);
+		spin_unlock(lock);
 	}
 }
 
 int fs_may_remount_ro(struct super_block *sb)
 {
-	struct file *file;
+	int i;
 
 	/* Check that no files are currently opened for writing. */
-	file_list_lock();
-	list_for_each_entry(file, &sb->s_files, f_u.fu_list) {
-		struct inode *inode = file->f_path.dentry->d_inode;
-
-		/* File with pending delete? */
-		if (inode->i_nlink == 0)
-			goto too_bad;
-
-		/* Writeable file? */
-		if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
-			goto too_bad;
+	file_list_lock_all();
+	for_each_possible_cpu(i) {
+		struct file *file;
+		struct list_head *list;
+
+#ifdef CONFIG_SMP
+		list = per_cpu_ptr(sb->s_files, i);
+#else
+		list = &sb->s_files;
+#endif
+		list_for_each_entry(file, list, f_u.fu_list) {
+			struct inode *inode = file->f_path.dentry->d_inode;
+
+			/* File with pending delete? */
+			if (inode->i_nlink == 0)
+				goto too_bad;
+
+			/* Writeable file? */
+			if (S_ISREG(inode->i_mode) &&
+					(file->f_mode & FMODE_WRITE))
+				goto too_bad;
+		}
 	}
-	file_list_unlock();
+	file_list_unlock_all();
 	return 1; /* Tis' cool bro. */
 too_bad:
-	file_list_unlock();
+	file_list_unlock_all();
 	return 0;
 }
 
@@ -408,38 +465,46 @@ too_bad:
  */
 void mark_files_ro(struct super_block *sb)
 {
-	struct file *f;
+	int i;
 
 retry:
-	file_list_lock();
-	list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
-		struct vfsmount *mnt;
-		if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
-		       continue;
-		if (!file_count(f))
-			continue;
-		if (!(f->f_mode & FMODE_WRITE))
-			continue;
-		f->f_mode &= ~FMODE_WRITE;
-		if (file_check_writeable(f) != 0)
-			continue;
-		file_release_write(f);
-		mnt = mntget(f->f_path.mnt);
-		file_list_unlock();
-		/*
-		 * This can sleep, so we can't hold
-		 * the file_list_lock() spinlock.
-		 */
-		mnt_drop_write(mnt);
-		mntput(mnt);
-		goto retry;
+	file_list_lock_all();
+	for_each_possible_cpu(i) {
+		struct file *f;
+		struct list_head *list;
+
+#ifdef CONFIG_SMP
+		list = per_cpu_ptr(sb->s_files, i);
+#else
+		list = &sb->s_files;
+#endif
+		list_for_each_entry(f, list, f_u.fu_list) {
+			struct vfsmount *mnt;
+			if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
+			       continue;
+			if (!file_count(f))
+				continue;
+			if (!(f->f_mode & FMODE_WRITE))
+				continue;
+			f->f_mode &= ~FMODE_WRITE;
+			if (file_check_writeable(f) != 0)
+				continue;
+			file_release_write(f);
+			mnt = mntget(f->f_path.mnt);
+			/* This can sleep, so we can't hold the spinlock. */
+			file_list_unlock_all();
+			mnt_drop_write(mnt);
+			mntput(mnt);
+			goto retry;
+		}
 	}
-	file_list_unlock();
+	file_list_unlock_all();
 }
 
 void __init files_init(unsigned long mempages)
 { 
 	int n; 
+	int i;
 
 	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
 			SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
@@ -454,5 +519,7 @@ void __init files_init(unsigned long mem
 	if (files_stat.max_files < NR_FILE)
 		files_stat.max_files = NR_FILE;
 	files_defer_init();
+	for_each_possible_cpu(i)
+		spin_lock_init(&per_cpu(files_cpulock, i));
 	percpu_counter_init(&nr_files, 0);
 } 
Index: linux-2.6/fs/open.c
===================================================================
--- linux-2.6.orig/fs/open.c
+++ linux-2.6/fs/open.c
@@ -829,7 +829,7 @@ static struct file *__dentry_open(struct
 	f->f_path.mnt = mnt;
 	f->f_pos = 0;
 	f->f_op = fops_get(inode->i_fop);
-	file_move(f, &inode->i_sb->s_files);
+	file_sb_list_add(f, inode->i_sb);
 
 	error = security_dentry_open(f, cred);
 	if (error)
@@ -874,7 +874,7 @@ cleanup_all:
 			mnt_drop_write(mnt);
 		}
 	}
-	file_kill(f);
+	file_sb_list_del(f);
 	f->f_path.dentry = NULL;
 	f->f_path.mnt = NULL;
 cleanup_file:
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h
+++ linux-2.6/include/linux/fs.h
@@ -8,6 +8,7 @@
 
 #include <linux/limits.h>
 #include <linux/ioctl.h>
+#include <asm/atomic.h>
 
 /*
  * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
@@ -401,6 +402,8 @@ extern struct files_stat_struct files_st
 extern int get_max_files(void);
 extern int sysctl_nr_open;
 extern struct inodes_stat_t inodes_stat;
+extern struct percpu_counter nr_inodes;
+extern int get_nr_inodes(void);
 extern int leases_enable, lease_break_time;
 #ifdef CONFIG_DNOTIFY
 extern int dir_notify_enable;
@@ -720,9 +723,15 @@ struct inode {
 	struct hlist_node	i_hash;
 	struct list_head	i_list;		/* backing dev IO list */
 	struct list_head	i_sb_list;
-	struct list_head	i_dentry;
+	union {
+		struct list_head	i_dentry;
+		struct rcu_head		i_rcu;
+	};
 	unsigned long		i_ino;
-	atomic_t		i_count;
+#ifdef CONFIG_SMP
+	int			i_sb_list_cpu;
+#endif
+	unsigned int		i_count;
 	unsigned int		i_nlink;
 	uid_t			i_uid;
 	gid_t			i_gid;
@@ -919,6 +928,9 @@ struct file {
 #define f_vfsmnt	f_path.mnt
 	const struct file_operations	*f_op;
 	spinlock_t		f_lock;  /* f_ep_links, f_flags, no IRQ */
+#ifdef CONFIG_SMP
+	int			f_sb_list_cpu;
+#endif
 	atomic_long_t		f_count;
 	unsigned int 		f_flags;
 	fmode_t			f_mode;
@@ -943,9 +955,6 @@ struct file {
 	unsigned long f_mnt_write_state;
 #endif
 };
-extern spinlock_t files_lock;
-#define file_list_lock() spin_lock(&files_lock);
-#define file_list_unlock() spin_unlock(&files_lock);
 
 #define get_file(x)	atomic_long_inc(&(x)->f_count)
 #define file_count(x)	atomic_long_read(&(x)->f_count)
@@ -1338,9 +1347,17 @@ struct super_block {
 #endif
 	struct xattr_handler	**s_xattr;
 
-	struct list_head	s_inodes;	/* all inodes */
 	struct hlist_head	s_anon;		/* anonymous dentries for (nfs) exporting */
+#ifdef CONFIG_SMP
+	struct list_head	*s_inodes;
+#else
+	struct list_head	s_inodes;	/* all inodes */
+#endif
+#ifdef CONFIG_SMP
+	struct list_head	*s_files;
+#else
 	struct list_head	s_files;
+#endif
 	/* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */
 	struct list_head	s_dentry_lru;	/* unused dentry lru */
 	int			s_nr_dentry_unused;	/* # of dentry on lru */
@@ -2042,6 +2059,7 @@ extern const struct file_operations read
 extern const struct file_operations write_pipefifo_fops;
 extern const struct file_operations rdwr_pipefifo_fops;
 
+extern void mark_files_ro(struct super_block *sb);
 extern int fs_may_remount_ro(struct super_block *);
 
 #ifdef CONFIG_BLOCK
@@ -2175,7 +2193,6 @@ extern int insert_inode_locked4(struct i
 extern int insert_inode_locked(struct inode *);
 extern void unlock_new_inode(struct inode *);
 
-extern void __iget(struct inode * inode);
 extern void iget_failed(struct inode *);
 extern void clear_inode(struct inode *);
 extern void destroy_inode(struct inode *);
@@ -2184,15 +2201,17 @@ extern struct inode *new_inode(struct su
 extern int should_remove_suid(struct dentry *);
 extern int file_remove_suid(struct file *);
 
+extern void inode_sb_list_del(struct inode *inode);
 extern void __insert_inode_hash(struct inode *, unsigned long hashval);
+extern void __remove_inode_hash(struct inode *);
 extern void remove_inode_hash(struct inode *);
 static inline void insert_inode_hash(struct inode *inode) {
 	__insert_inode_hash(inode, inode->i_ino);
 }
 
 extern struct file * get_empty_filp(void);
-extern void file_move(struct file *f, struct list_head *list);
-extern void file_kill(struct file *f);
+extern void file_sb_list_add(struct file *f, struct super_block *sb);
+extern void file_sb_list_del(struct file *f);
 #ifdef CONFIG_BLOCK
 struct bio;
 extern void submit_bio(int, struct bio *);
@@ -2397,10 +2416,20 @@ extern int generic_show_options(struct s
 extern void save_mount_options(struct super_block *sb, char *options);
 extern void replace_mount_options(struct super_block *sb, char *options);
 
+static inline void __iget(struct inode *inode)
+{
+	assert_spin_locked(&inode->i_lock);
+	inode->i_count++;
+}
+
 static inline ino_t parent_ino(struct dentry *dentry)
 {
 	ino_t res;
 
+	/*
+	 * Don't strictly need d_lock here? If the parent ino could change
+	 * then surely we'd have a deeper race in the caller?
+	 */
 	spin_lock(&dentry->d_lock);
 	res = dentry->d_parent->d_inode->i_ino;
 	spin_unlock(&dentry->d_lock);
@@ -2476,7 +2505,8 @@ ssize_t simple_attr_write(struct file *f
 struct ctl_table;
 int proc_nr_files(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos);
-
+int proc_nr_inodes(struct ctl_table *table, int write,
+		   void __user *buffer, size_t *lenp, loff_t *ppos);
 int __init get_filesystem_list(char *buf);
 
 #endif /* __KERNEL__ */
Index: linux-2.6/security/selinux/hooks.c
===================================================================
--- linux-2.6.orig/security/selinux/hooks.c
+++ linux-2.6/security/selinux/hooks.c
@@ -2247,7 +2247,7 @@ static inline void flush_unauthorized_fi
 
 	tty = get_current_tty();
 	if (tty) {
-		file_list_lock();
+		spin_lock(&tty_files_lock);
 		if (!list_empty(&tty->tty_files)) {
 			struct inode *inode;
 
@@ -2263,7 +2263,7 @@ static inline void flush_unauthorized_fi
 				drop_tty = 1;
 			}
 		}
-		file_list_unlock();
+		spin_unlock(&tty_files_lock);
 		tty_kref_put(tty);
 	}
 	/* Reset controlling tty. */
Index: linux-2.6/include/linux/tty.h
===================================================================
--- linux-2.6.orig/include/linux/tty.h
+++ linux-2.6/include/linux/tty.h
@@ -446,6 +446,7 @@ extern struct tty_struct *tty_pair_get_t
 extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty);
 
 extern struct mutex tty_mutex;
+extern spinlock_t tty_files_lock;
 
 extern void tty_write_unlock(struct tty_struct *tty);
 extern int tty_write_lock(struct tty_struct *tty, int ndelay);
Index: linux-2.6/fs/dcache.c
===================================================================
--- linux-2.6.orig/fs/dcache.c
+++ linux-2.6/fs/dcache.c
@@ -35,13 +35,34 @@
 #include <linux/hardirq.h>
 #include "internal.h"
 
+/*
+ * Usage:
+ * dcache->d_inode->i_lock protects:
+ *   - the inode alias lists, d_inode
+ * dcache_hash_bucket->lock protects:
+ *   - the dcache hash table
+ * dcache_lru_lock protects:
+ *   - the dcache lru lists and counters
+ * d_lock protects:
+ *   - d_flags
+ *   - d_name
+ *   - d_lru
+ *   - d_unhashed
+ *   - d_subdirs and children's d_child
+ *
+ * Ordering:
+ * dcache->d_inode->i_lock
+ *   dentry->d_lock
+ *     dcache_lru_lock
+ *     dcache_hash_bucket->lock
+ */
 int sysctl_vfs_cache_pressure __read_mostly = 100;
 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
 
- __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
 
-EXPORT_SYMBOL(dcache_lock);
+EXPORT_SYMBOL(rename_lock);
 
 static struct kmem_cache *dentry_cache __read_mostly;
 
@@ -60,13 +81,27 @@ static struct kmem_cache *dentry_cache _
 
 static unsigned int d_hash_mask __read_mostly;
 static unsigned int d_hash_shift __read_mostly;
-static struct hlist_head *dentry_hashtable __read_mostly;
+
+struct dcache_hash_bucket {
+	spinlock_t lock;
+	struct hlist_head head;
+};
+static struct dcache_hash_bucket *dentry_hashtable __read_mostly;
 
 /* Statistics gathering. */
 struct dentry_stat_t dentry_stat = {
+	.nr_dentry = 0,
 	.age_limit = 45,
 };
 
+static inline struct dcache_hash_bucket *d_hash(struct dentry *parent,
+					unsigned long hash)
+{
+	hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
+	hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
+	return dentry_hashtable + (hash & D_HASHMASK);
+}
+
 static void __d_free(struct dentry *dentry)
 {
 	WARN_ON(!list_empty(&dentry->d_alias));
@@ -82,11 +117,11 @@ static void d_callback(struct rcu_head *
 }
 
 /*
- * no dcache_lock, please.  The caller must decrement dentry_stat.nr_dentry
- * inside dcache_lock.
+ * no locks, please.
  */
 static void d_free(struct dentry *dentry)
 {
+	BUG_ON(dentry->d_count);
 	if (dentry->d_op && dentry->d_op->d_release)
 		dentry->d_op->d_release(dentry);
 	/* if dentry was never inserted into hash, immediate free is OK */
@@ -102,14 +137,13 @@ static void d_free(struct dentry *dentry
  */
 static void dentry_iput(struct dentry * dentry)
 	__releases(dentry->d_lock)
-	__releases(dcache_lock)
 {
 	struct inode *inode = dentry->d_inode;
 	if (inode) {
 		dentry->d_inode = NULL;
 		list_del_init(&dentry->d_alias);
 		spin_unlock(&dentry->d_lock);
-		spin_unlock(&dcache_lock);
+		spin_unlock(&inode->i_lock);
 		if (!inode->i_nlink)
 			fsnotify_inoderemove(inode);
 		if (dentry->d_op && dentry->d_op->d_iput)
@@ -118,42 +152,60 @@ static void dentry_iput(struct dentry *
 			iput(inode);
 	} else {
 		spin_unlock(&dentry->d_lock);
-		spin_unlock(&dcache_lock);
 	}
 }
 
 /*
- * dentry_lru_(add|add_tail|del|del_init) must be called with dcache_lock held.
+ * dentry_lru_(add|add_tail|del|del_init) must be called with d_lock held
+ * to protect list_empty(d_lru) condition.
  */
 static void dentry_lru_add(struct dentry *dentry)
 {
+	spin_lock(&dcache_lru_lock);
 	list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
 	dentry->d_sb->s_nr_dentry_unused++;
 	dentry_stat.nr_unused++;
+	spin_unlock(&dcache_lru_lock);
 }
 
 static void dentry_lru_add_tail(struct dentry *dentry)
 {
+	spin_lock(&dcache_lru_lock);
 	list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
 	dentry->d_sb->s_nr_dentry_unused++;
 	dentry_stat.nr_unused++;
+	spin_unlock(&dcache_lru_lock);
+}
+
+static void __dentry_lru_del(struct dentry *dentry)
+{
+	list_del(&dentry->d_lru);
+	dentry->d_sb->s_nr_dentry_unused--;
+	dentry_stat.nr_unused--;
+}
+
+static void __dentry_lru_del_init(struct dentry *dentry)
+{
+	list_del_init(&dentry->d_lru);
+	dentry->d_sb->s_nr_dentry_unused--;
+	dentry_stat.nr_unused--;
 }
 
 static void dentry_lru_del(struct dentry *dentry)
 {
 	if (!list_empty(&dentry->d_lru)) {
-		list_del(&dentry->d_lru);
-		dentry->d_sb->s_nr_dentry_unused--;
-		dentry_stat.nr_unused--;
+		spin_lock(&dcache_lru_lock);
+		__dentry_lru_del(dentry);
+		spin_unlock(&dcache_lru_lock);
 	}
 }
 
 static void dentry_lru_del_init(struct dentry *dentry)
 {
 	if (likely(!list_empty(&dentry->d_lru))) {
-		list_del_init(&dentry->d_lru);
-		dentry->d_sb->s_nr_dentry_unused--;
-		dentry_stat.nr_unused--;
+		spin_lock(&dcache_lru_lock);
+		__dentry_lru_del_init(dentry);
+		spin_unlock(&dcache_lru_lock);
 	}
 }
 
@@ -164,25 +216,89 @@ static void dentry_lru_del_init(struct d
  * The dentry must already be unhashed and removed from the LRU.
  *
  * If this is the root of the dentry tree, return NULL.
+ *
+ * d_lock and d_parent->d_lock must be held by caller, and
+ * are dropped by d_kill.
  */
 static struct dentry *d_kill(struct dentry *dentry)
 	__releases(dentry->d_lock)
-	__releases(dcache_lock)
 {
 	struct dentry *parent;
 
 	list_del(&dentry->d_u.d_child);
-	dentry_stat.nr_dentry--;	/* For d_free, below */
-	/*drops the locks, at that point nobody can reach this dentry */
-	dentry_iput(dentry);
+	if (dentry->d_parent && dentry != dentry->d_parent)
+		spin_unlock(&dentry->d_parent->d_lock);
 	if (IS_ROOT(dentry))
 		parent = NULL;
 	else
 		parent = dentry->d_parent;
+	/*drops the locks, at that point nobody can reach this dentry */
+	dentry_iput(dentry);
 	d_free(dentry);
 	return parent;
 }
 
+void __d_drop(struct dentry *dentry)
+{
+	if (!(dentry->d_flags & DCACHE_UNHASHED)) {
+		struct dcache_hash_bucket *b;
+		b = d_hash(dentry->d_parent, dentry->d_name.hash);
+		dentry->d_flags |= DCACHE_UNHASHED;
+		spin_lock(&b->lock);
+		hlist_del_rcu(&dentry->d_hash);
+		spin_unlock(&b->lock);
+	}
+}
+EXPORT_SYMBOL(__d_drop);
+
+void d_drop(struct dentry *dentry)
+{
+	spin_lock(&dentry->d_lock);
+ 	__d_drop(dentry);
+	spin_unlock(&dentry->d_lock);
+}
+EXPORT_SYMBOL(d_drop);
+
+static inline struct dentry *__dget_dlock(struct dentry *dentry)
+{
+	dentry->d_count++;
+	return dentry;
+}
+
+static inline struct dentry *__dget(struct dentry *dentry)
+{
+	spin_lock(&dentry->d_lock);
+	__dget_dlock(dentry);
+	spin_unlock(&dentry->d_lock);
+	return dentry;
+}
+
+struct dentry *dget_parent(struct dentry *dentry)
+{
+	struct dentry *ret;
+
+repeat:
+	spin_lock(&dentry->d_lock);
+	ret = dentry->d_parent;
+	if (!ret)
+		goto out;
+	if (dentry == ret) {
+		ret->d_count++;
+		goto out;
+	}
+	if (!spin_trylock(&ret->d_lock)) {
+		spin_unlock(&dentry->d_lock);
+		goto repeat;
+	}
+	BUG_ON(!ret->d_count);
+	ret->d_count++;
+	spin_unlock(&ret->d_lock);
+out:
+	spin_unlock(&dentry->d_lock);
+	return ret;
+}
+EXPORT_SYMBOL(dget_parent);
+
 /* 
  * This is dput
  *
@@ -214,19 +330,20 @@ static struct dentry *d_kill(struct dent
 
 void dput(struct dentry *dentry)
 {
+	struct dentry *parent;
+	struct inode *inode;
+
 	if (!dentry)
 		return;
 
 repeat:
-	if (atomic_read(&dentry->d_count) == 1)
+	if (dentry->d_count == 1)
 		might_sleep();
-	if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
-		return;
-
 	spin_lock(&dentry->d_lock);
-	if (atomic_read(&dentry->d_count)) {
+	BUG_ON(!dentry->d_count);
+	if (dentry->d_count > 1) {
+		dentry->d_count--;
 		spin_unlock(&dentry->d_lock);
-		spin_unlock(&dcache_lock);
 		return;
 	}
 
@@ -234,8 +351,10 @@ repeat:
 	 * AV: ->d_delete() is _NOT_ allowed to block now.
 	 */
 	if (dentry->d_op && dentry->d_op->d_delete) {
-		if (dentry->d_op->d_delete(dentry))
-			goto unhash_it;
+		if (dentry->d_op->d_delete(dentry)) {
+			__d_drop(dentry);
+			goto kill_it;
+		}
 	}
 	/* Unreachable? Get rid of it */
  	if (d_unhashed(dentry))
@@ -244,13 +363,39 @@ repeat:
   		dentry->d_flags |= DCACHE_REFERENCED;
 		dentry_lru_add(dentry);
   	}
- 	spin_unlock(&dentry->d_lock);
-	spin_unlock(&dcache_lock);
-	return;
+	dentry->d_count--;
+	spin_unlock(&dentry->d_lock);
+  	return;
 
-unhash_it:
-	__d_drop(dentry);
+relock1:
+	spin_lock(&dentry->d_lock);
 kill_it:
+	inode = dentry->d_inode;
+	if (inode) {
+		if (!spin_trylock(&inode->i_lock)) {
+relock2:
+			spin_unlock(&dentry->d_lock);
+			goto relock1;
+		}
+	}
+	parent = dentry->d_parent;
+	if (parent && parent != dentry) {
+		if (!spin_trylock(&parent->d_lock)) {
+			if (inode)
+				spin_unlock(&inode->i_lock);
+			goto relock2;
+		}
+	}
+	dentry->d_count--;
+	if (dentry->d_count) {
+		/* This case should be fine */
+		spin_unlock(&dentry->d_lock);
+		if (parent && parent != dentry)
+			spin_unlock(&parent->d_lock);
+		if (inode)
+			spin_unlock(&inode->i_lock);
+		return;
+	}
 	/* if dentry was on the d_lru list delete it from there */
 	dentry_lru_del(dentry);
 	dentry = d_kill(dentry);
@@ -275,9 +420,9 @@ int d_invalidate(struct dentry * dentry)
 	/*
 	 * If it's already been dropped, return OK.
 	 */
-	spin_lock(&dcache_lock);
+	spin_lock(&dentry->d_lock);
 	if (d_unhashed(dentry)) {
-		spin_unlock(&dcache_lock);
+		spin_unlock(&dentry->d_lock);
 		return 0;
 	}
 	/*
@@ -285,9 +430,9 @@ int d_invalidate(struct dentry * dentry)
 	 * to get rid of unused child entries.
 	 */
 	if (!list_empty(&dentry->d_subdirs)) {
-		spin_unlock(&dcache_lock);
+		spin_unlock(&dentry->d_lock);
 		shrink_dcache_parent(dentry);
-		spin_lock(&dcache_lock);
+		spin_lock(&dentry->d_lock);
 	}
 
 	/*
@@ -300,35 +445,18 @@ int d_invalidate(struct dentry * dentry)
 	 * we might still populate it if it was a
 	 * working directory or similar).
 	 */
-	spin_lock(&dentry->d_lock);
-	if (atomic_read(&dentry->d_count) > 1) {
+	if (dentry->d_count > 1) {
 		if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
 			spin_unlock(&dentry->d_lock);
-			spin_unlock(&dcache_lock);
 			return -EBUSY;
 		}
 	}
 
 	__d_drop(dentry);
 	spin_unlock(&dentry->d_lock);
-	spin_unlock(&dcache_lock);
 	return 0;
 }
 
-/* This should be called _only_ with dcache_lock held */
-
-static inline struct dentry * __dget_locked(struct dentry *dentry)
-{
-	atomic_inc(&dentry->d_count);
-	dentry_lru_del_init(dentry);
-	return dentry;
-}
-
-struct dentry * dget_locked(struct dentry *dentry)
-{
-	return __dget_locked(dentry);
-}
-
 /**
  * d_find_alias - grab a hashed alias of inode
  * @inode: inode in question
@@ -358,18 +486,21 @@ static struct dentry * __d_find_alias(st
 		next = tmp->next;
 		prefetch(next);
 		alias = list_entry(tmp, struct dentry, d_alias);
+		spin_lock(&alias->d_lock);
  		if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
 			if (IS_ROOT(alias) &&
 			    (alias->d_flags & DCACHE_DISCONNECTED))
 				discon_alias = alias;
 			else if (!want_discon) {
-				__dget_locked(alias);
+				__dget_dlock(alias);
+				spin_unlock(&alias->d_lock);
 				return alias;
 			}
 		}
+		spin_unlock(&alias->d_lock);
 	}
 	if (discon_alias)
-		__dget_locked(discon_alias);
+		__dget(discon_alias);
 	return discon_alias;
 }
 
@@ -378,9 +509,9 @@ struct dentry * d_find_alias(struct inod
 	struct dentry *de = NULL;
 
 	if (!list_empty(&inode->i_dentry)) {
-		spin_lock(&dcache_lock);
+		spin_lock(&inode->i_lock);
 		de = __d_find_alias(inode, 0);
-		spin_unlock(&dcache_lock);
+		spin_unlock(&inode->i_lock);
 	}
 	return de;
 }
@@ -393,20 +524,20 @@ void d_prune_aliases(struct inode *inode
 {
 	struct dentry *dentry;
 restart:
-	spin_lock(&dcache_lock);
+	spin_lock(&inode->i_lock);
 	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
 		spin_lock(&dentry->d_lock);
-		if (!atomic_read(&dentry->d_count)) {
-			__dget_locked(dentry);
+		if (!dentry->d_count) {
+			__dget_dlock(dentry);
 			__d_drop(dentry);
 			spin_unlock(&dentry->d_lock);
-			spin_unlock(&dcache_lock);
+			spin_unlock(&inode->i_lock);
 			dput(dentry);
 			goto restart;
 		}
 		spin_unlock(&dentry->d_lock);
 	}
-	spin_unlock(&dcache_lock);
+	spin_unlock(&inode->i_lock);
 }
 
 /*
@@ -419,27 +550,43 @@ restart:
  */
 static void prune_one_dentry(struct dentry * dentry)
 	__releases(dentry->d_lock)
-	__releases(dcache_lock)
-	__acquires(dcache_lock)
 {
 	__d_drop(dentry);
 	dentry = d_kill(dentry);
 
 	/*
-	 * Prune ancestors.  Locking is simpler than in dput(),
-	 * because dcache_lock needs to be taken anyway.
+	 * Prune ancestors.
 	 */
-	spin_lock(&dcache_lock);
 	while (dentry) {
-		if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock))
+		struct dentry *parent = NULL;
+		struct inode *inode = dentry->d_inode;
+
+		if (inode)
+			spin_lock(&inode->i_lock);
+again:
+		spin_lock(&dentry->d_lock);
+		if (dentry->d_parent && dentry != dentry->d_parent) {
+			if (!spin_trylock(&dentry->d_parent->d_lock)) {
+				spin_unlock(&dentry->d_lock);
+				goto again;
+			}
+ 			parent = dentry->d_parent;
+		}
+		dentry->d_count--;
+		if (dentry->d_count) {
+			if (parent)
+				spin_unlock(&parent->d_lock);
+			spin_unlock(&dentry->d_lock);
+			if (inode)
+				spin_unlock(&inode->i_lock);
 			return;
+		}
 
 		if (dentry->d_op && dentry->d_op->d_delete)
 			dentry->d_op->d_delete(dentry);
 		dentry_lru_del_init(dentry);
 		__d_drop(dentry);
 		dentry = d_kill(dentry);
-		spin_lock(&dcache_lock);
 	}
 }
 
@@ -460,10 +607,11 @@ static void __shrink_dcache_sb(struct su
 
 	BUG_ON(!sb);
 	BUG_ON((flags & DCACHE_REFERENCED) && count == NULL);
-	spin_lock(&dcache_lock);
 	if (count != NULL)
 		/* called from prune_dcache() and shrink_dcache_parent() */
 		cnt = *count;
+relock:
+	spin_lock(&dcache_lru_lock);
 restart:
 	if (count == NULL)
 		list_splice_init(&sb->s_dentry_lru, &tmp);
@@ -473,7 +621,10 @@ restart:
 					struct dentry, d_lru);
 			BUG_ON(dentry->d_sb != sb);
 
-			spin_lock(&dentry->d_lock);
+			if (!spin_trylock(&dentry->d_lock)) {
+				spin_unlock(&dcache_lru_lock);
+				goto relock;
+			}
 			/*
 			 * If we are honouring the DCACHE_REFERENCED flag and
 			 * the dentry has this flag set, don't free it. Clear
@@ -491,33 +642,61 @@ restart:
 				if (!cnt)
 					break;
 			}
-			cond_resched_lock(&dcache_lock);
+			cond_resched_lock(&dcache_lru_lock);
 		}
 	}
+	spin_unlock(&dcache_lru_lock);
+
+again:
+	spin_lock(&dcache_lru_lock); /* lru_lock also protects tmp list */
 	while (!list_empty(&tmp)) {
+		struct inode *inode;
+
 		dentry = list_entry(tmp.prev, struct dentry, d_lru);
-		dentry_lru_del_init(dentry);
-		spin_lock(&dentry->d_lock);
+
+		if (!spin_trylock(&dentry->d_lock)) {
+again1:
+			spin_unlock(&dcache_lru_lock);
+			goto again;
+		}
 		/*
 		 * We found an inuse dentry which was not removed from
 		 * the LRU because of laziness during lookup.  Do not free
 		 * it - just keep it off the LRU list.
 		 */
-		if (atomic_read(&dentry->d_count)) {
+		if (dentry->d_count) {
+			__dentry_lru_del_init(dentry);
 			spin_unlock(&dentry->d_lock);
 			continue;
 		}
+		inode = dentry->d_inode;
+		if (inode && !spin_trylock(&inode->i_lock)) {
+again2:
+			spin_unlock(&dentry->d_lock);
+			goto again1;
+		}
+		if (dentry->d_parent && dentry->d_parent != dentry) {
+			if (!spin_trylock(&dentry->d_parent->d_lock)) {
+				if (inode)
+					spin_unlock(&inode->i_lock);
+				goto again2;
+			}
+		}
+		__dentry_lru_del_init(dentry);
+		spin_unlock(&dcache_lru_lock);
+
 		prune_one_dentry(dentry);
-		/* dentry->d_lock was dropped in prune_one_dentry() */
-		cond_resched_lock(&dcache_lock);
+		/* dentry->d_lock dropped */
+		spin_lock(&dcache_lru_lock);
 	}
+
 	if (count == NULL && !list_empty(&sb->s_dentry_lru))
 		goto restart;
 	if (count != NULL)
 		*count = cnt;
 	if (!list_empty(&referenced))
 		list_splice(&referenced, &sb->s_dentry_lru);
-	spin_unlock(&dcache_lock);
+	spin_unlock(&dcache_lru_lock);
 }
 
 /**
@@ -539,7 +718,6 @@ static void prune_dcache(int count)
 
 	if (unused == 0 || count == 0)
 		return;
-	spin_lock(&dcache_lock);
 restart:
 	if (count >= unused)
 		prune_ratio = 1;
@@ -575,11 +753,9 @@ restart:
 		if (down_read_trylock(&sb->s_umount)) {
 			if ((sb->s_root != NULL) &&
 			    (!list_empty(&sb->s_dentry_lru))) {
-				spin_unlock(&dcache_lock);
 				__shrink_dcache_sb(sb, &w_count,
 						DCACHE_REFERENCED);
 				pruned -= w_count;
-				spin_lock(&dcache_lock);
 			}
 			up_read(&sb->s_umount);
 		}
@@ -595,7 +771,6 @@ restart:
 		}
 	}
 	spin_unlock(&sb_lock);
-	spin_unlock(&dcache_lock);
 }
 
 /**
@@ -624,10 +799,10 @@ static void shrink_dcache_for_umount_sub
 	BUG_ON(!IS_ROOT(dentry));
 
 	/* detach this root from the system */
-	spin_lock(&dcache_lock);
+	spin_lock(&dentry->d_lock);
 	dentry_lru_del_init(dentry);
 	__d_drop(dentry);
-	spin_unlock(&dcache_lock);
+	spin_unlock(&dentry->d_lock);
 
 	for (;;) {
 		/* descend to the first leaf in the current subtree */
@@ -636,14 +811,15 @@ static void shrink_dcache_for_umount_sub
 
 			/* this is a branch with children - detach all of them
 			 * from the system in one go */
-			spin_lock(&dcache_lock);
+			spin_lock(&dentry->d_lock);
 			list_for_each_entry(loop, &dentry->d_subdirs,
 					    d_u.d_child) {
+				spin_lock_nested(&loop->d_lock, DENTRY_D_LOCK_NESTED);
 				dentry_lru_del_init(loop);
 				__d_drop(loop);
-				cond_resched_lock(&dcache_lock);
+				spin_unlock(&loop->d_lock);
 			}
-			spin_unlock(&dcache_lock);
+			spin_unlock(&dentry->d_lock);
 
 			/* move to the first child */
 			dentry = list_entry(dentry->d_subdirs.next,
@@ -655,7 +831,7 @@ static void shrink_dcache_for_umount_sub
 		do {
 			struct inode *inode;
 
-			if (atomic_read(&dentry->d_count) != 0) {
+			if (dentry->d_count != 0) {
 				printk(KERN_ERR
 				       "BUG: Dentry %p{i=%lx,n=%s}"
 				       " still in use (%d)"
@@ -664,20 +840,23 @@ static void shrink_dcache_for_umount_sub
 				       dentry->d_inode ?
 				       dentry->d_inode->i_ino : 0UL,
 				       dentry->d_name.name,
-				       atomic_read(&dentry->d_count),
+				       dentry->d_count,
 				       dentry->d_sb->s_type->name,
 				       dentry->d_sb->s_id);
 				BUG();
 			}
 
-			if (IS_ROOT(dentry))
+			if (IS_ROOT(dentry)) {
 				parent = NULL;
-			else {
+				list_del(&dentry->d_u.d_child);
+			} else {
 				parent = dentry->d_parent;
-				atomic_dec(&parent->d_count);
+				spin_lock(&parent->d_lock);
+				parent->d_count--;
+				list_del(&dentry->d_u.d_child);
+				spin_unlock(&parent->d_lock);
 			}
 
-			list_del(&dentry->d_u.d_child);
 			detached++;
 
 			inode = dentry->d_inode;
@@ -706,16 +885,12 @@ static void shrink_dcache_for_umount_sub
 				    struct dentry, d_u.d_child);
 	}
 out:
-	/* several dentries were freed, need to correct nr_dentry */
-	spin_lock(&dcache_lock);
-	dentry_stat.nr_dentry -= detached;
-	spin_unlock(&dcache_lock);
+	return;
 }
 
 /*
  * destroy the dentries attached to a superblock on unmounting
- * - we don't need to use dentry->d_lock, and only need dcache_lock when
- *   removing the dentry from the system lists and hashes because:
+ * - we don't need to use dentry->d_lock because:
  *   - the superblock is detached from all mountings and open files, so the
  *     dentry trees will not be rearranged by the VFS
  *   - s_umount is write-locked, so the memory pressure shrinker will ignore
@@ -732,7 +907,9 @@ void shrink_dcache_for_umount(struct sup
 
 	dentry = sb->s_root;
 	sb->s_root = NULL;
-	atomic_dec(&dentry->d_count);
+	spin_lock(&dentry->d_lock);
+	dentry->d_count--;
+	spin_unlock(&dentry->d_lock);
 	shrink_dcache_for_umount_subtree(dentry);
 
 	while (!hlist_empty(&sb->s_anon)) {
@@ -754,15 +931,19 @@ void shrink_dcache_for_umount(struct sup
  * Return true if the parent or its subdirectories contain
  * a mount point
  */
- 
 int have_submounts(struct dentry *parent)
 {
-	struct dentry *this_parent = parent;
+	struct dentry *this_parent;
 	struct list_head *next;
+	unsigned seq;
+
+rename_retry:
+	this_parent = parent;
+	seq = read_seqbegin(&rename_lock);
 
-	spin_lock(&dcache_lock);
 	if (d_mountpoint(parent))
 		goto positive;
+	spin_lock(&this_parent->d_lock);
 repeat:
 	next = this_parent->d_subdirs.next;
 resume:
@@ -770,26 +951,56 @@ resume:
 		struct list_head *tmp = next;
 		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
 		next = tmp->next;
+
+		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
 		/* Have we found a mount point ? */
-		if (d_mountpoint(dentry))
+		if (d_mountpoint(dentry)) {
+			spin_unlock(&dentry->d_lock);
+			spin_unlock(&this_parent->d_lock);
 			goto positive;
+		}
 		if (!list_empty(&dentry->d_subdirs)) {
+			spin_unlock(&this_parent->d_lock);
+			spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
 			this_parent = dentry;
+			spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
 			goto repeat;
 		}
+		spin_unlock(&dentry->d_lock);
 	}
 	/*
 	 * All done at this level ... ascend and resume the search.
 	 */
 	if (this_parent != parent) {
-		next = this_parent->d_u.d_child.next;
-		this_parent = this_parent->d_parent;
+		struct dentry *tmp;
+		struct dentry *child;
+
+		tmp = this_parent->d_parent;
+		rcu_read_lock();
+		spin_unlock(&this_parent->d_lock);
+		child = this_parent;
+		this_parent = tmp;
+		spin_lock(&this_parent->d_lock);
+		/* might go back up the wrong parent if we have had a rename
+		 * or deletion */
+		if (this_parent != child->d_parent ||
+				// d_unlinked(this_parent) || XXX
+				read_seqretry(&rename_lock, seq)) {
+			spin_unlock(&this_parent->d_lock);
+			rcu_read_unlock();
+			goto rename_retry;
+		}
+		rcu_read_unlock();
+		next = child->d_u.d_child.next;
 		goto resume;
 	}
-	spin_unlock(&dcache_lock);
+	spin_unlock(&this_parent->d_lock);
+	if (read_seqretry(&rename_lock, seq))
+		goto rename_retry;
 	return 0; /* No mount points found in tree */
 positive:
-	spin_unlock(&dcache_lock);
+	if (read_seqretry(&rename_lock, seq))
+		goto rename_retry;
 	return 1;
 }
 
@@ -809,11 +1020,17 @@ positive:
  */
 static int select_parent(struct dentry * parent)
 {
-	struct dentry *this_parent = parent;
+	struct dentry *this_parent;
 	struct list_head *next;
-	int found = 0;
+	unsigned seq;
+	int found;
+
+rename_retry:
+	found = 0;
+	this_parent = parent;
+	seq = read_seqbegin(&rename_lock);
 
-	spin_lock(&dcache_lock);
+	spin_lock(&this_parent->d_lock);
 repeat:
 	next = this_parent->d_subdirs.next;
 resume:
@@ -822,12 +1039,13 @@ resume:
 		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
 		next = tmp->next;
 
+		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
 		dentry_lru_del_init(dentry);
 		/* 
 		 * move only zero ref count dentries to the end 
 		 * of the unused list for prune_dcache
 		 */
-		if (!atomic_read(&dentry->d_count)) {
+		if (!dentry->d_count) {
 			dentry_lru_add_tail(dentry);
 			found++;
 		}
@@ -837,27 +1055,54 @@ resume:
 		 * ensures forward progress). We'll be coming back to find
 		 * the rest.
 		 */
-		if (found && need_resched())
+		if (found && need_resched()) {
+			spin_unlock(&dentry->d_lock);
 			goto out;
+		}
 
 		/*
 		 * Descend a level if the d_subdirs list is non-empty.
 		 */
 		if (!list_empty(&dentry->d_subdirs)) {
+			spin_unlock(&this_parent->d_lock);
+			spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
 			this_parent = dentry;
+			spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
 			goto repeat;
 		}
+
+		spin_unlock(&dentry->d_lock);
 	}
 	/*
 	 * All done at this level ... ascend and resume the search.
 	 */
 	if (this_parent != parent) {
-		next = this_parent->d_u.d_child.next;
-		this_parent = this_parent->d_parent;
+		struct dentry *tmp;
+		struct dentry *child;
+
+		tmp = this_parent->d_parent;
+		rcu_read_lock();
+		spin_unlock(&this_parent->d_lock);
+		child = this_parent;
+		this_parent = tmp;
+		spin_lock(&this_parent->d_lock);
+		/* might go back up the wrong parent if we have had a rename
+		 * or deletion */
+		if (this_parent != child->d_parent ||
+				// d_unlinked(this_parent) || XXX
+				read_seqretry(&rename_lock, seq)) {
+			spin_unlock(&this_parent->d_lock);
+			rcu_read_unlock();
+			goto rename_retry;
+		}
+		rcu_read_unlock();
+		next = child->d_u.d_child.next;
 		goto resume;
 	}
 out:
-	spin_unlock(&dcache_lock);
+	spin_unlock(&this_parent->d_lock);
+	if (read_seqretry(&rename_lock, seq))
+		goto rename_retry;
 	return found;
 }
 
@@ -939,7 +1184,7 @@ struct dentry *d_alloc(struct dentry * p
 	memcpy(dname, name->name, name->len);
 	dname[name->len] = 0;
 
-	atomic_set(&dentry->d_count, 1);
+	dentry->d_count = 1;
 	dentry->d_flags = DCACHE_UNHASHED;
 	spin_lock_init(&dentry->d_lock);
 	dentry->d_inode = NULL;
@@ -952,19 +1197,17 @@ struct dentry *d_alloc(struct dentry * p
 	INIT_LIST_HEAD(&dentry->d_lru);
 	INIT_LIST_HEAD(&dentry->d_subdirs);
 	INIT_LIST_HEAD(&dentry->d_alias);
+	INIT_LIST_HEAD(&dentry->d_u.d_child);
 
 	if (parent) {
-		dentry->d_parent = dget(parent);
+		spin_lock(&parent->d_lock);
+		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+		dentry->d_parent = dget_dlock(parent);
 		dentry->d_sb = parent->d_sb;
-	} else {
-		INIT_LIST_HEAD(&dentry->d_u.d_child);
-	}
-
-	spin_lock(&dcache_lock);
-	if (parent)
 		list_add(&dentry->d_u.d_child, &parent->d_subdirs);
-	dentry_stat.nr_dentry++;
-	spin_unlock(&dcache_lock);
+		spin_unlock(&dentry->d_lock);
+		spin_unlock(&parent->d_lock);
+	}
 
 	return dentry;
 }
@@ -979,7 +1222,6 @@ struct dentry *d_alloc_name(struct dentr
 	return d_alloc(parent, &q);
 }
 
-/* the caller must hold dcache_lock */
 static void __d_instantiate(struct dentry *dentry, struct inode *inode)
 {
 	if (inode)
@@ -1006,9 +1248,11 @@ static void __d_instantiate(struct dentr
 void d_instantiate(struct dentry *entry, struct inode * inode)
 {
 	BUG_ON(!list_empty(&entry->d_alias));
-	spin_lock(&dcache_lock);
+	if (inode)
+		spin_lock(&inode->i_lock);
 	__d_instantiate(entry, inode);
-	spin_unlock(&dcache_lock);
+	if (inode)
+		spin_unlock(&inode->i_lock);
 	security_d_instantiate(entry, inode);
 }
 
@@ -1052,7 +1296,7 @@ static struct dentry *__d_instantiate_un
 			continue;
 		if (memcmp(qstr->name, name, len))
 			continue;
-		dget_locked(alias);
+		dget(alias);
 		return alias;
 	}
 
@@ -1066,9 +1310,11 @@ struct dentry *d_instantiate_unique(stru
 
 	BUG_ON(!list_empty(&entry->d_alias));
 
-	spin_lock(&dcache_lock);
+	if (inode)
+		spin_lock(&inode->i_lock);
 	result = __d_instantiate_unique(entry, inode);
-	spin_unlock(&dcache_lock);
+	if (inode)
+		spin_unlock(&inode->i_lock);
 
 	if (!result) {
 		security_d_instantiate(entry, inode);
@@ -1108,14 +1354,6 @@ struct dentry * d_alloc_root(struct inod
 	return res;
 }
 
-static inline struct hlist_head *d_hash(struct dentry *parent,
-					unsigned long hash)
-{
-	hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
-	hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
-	return dentry_hashtable + (hash & D_HASHMASK);
-}
-
 /**
  * d_obtain_alias - find or allocate a dentry for a given inode
  * @inode: inode to allocate the dentry for
@@ -1156,10 +1394,10 @@ struct dentry *d_obtain_alias(struct ino
 	}
 	tmp->d_parent = tmp; /* make sure dput doesn't croak */
 
-	spin_lock(&dcache_lock);
+	spin_lock(&inode->i_lock);
 	res = __d_find_alias(inode, 0);
 	if (res) {
-		spin_unlock(&dcache_lock);
+		spin_unlock(&inode->i_lock);
 		dput(tmp);
 		goto out_iput;
 	}
@@ -1173,8 +1411,8 @@ struct dentry *d_obtain_alias(struct ino
 	list_add(&tmp->d_alias, &inode->i_dentry);
 	hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon);
 	spin_unlock(&tmp->d_lock);
+	spin_unlock(&inode->i_lock);
 
-	spin_unlock(&dcache_lock);
 	return tmp;
 
  out_iput:
@@ -1204,19 +1442,19 @@ struct dentry *d_splice_alias(struct ino
 	struct dentry *new = NULL;
 
 	if (inode && S_ISDIR(inode->i_mode)) {
-		spin_lock(&dcache_lock);
+		spin_lock(&inode->i_lock);
 		new = __d_find_alias(inode, 1);
 		if (new) {
 			BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
-			spin_unlock(&dcache_lock);
+			spin_unlock(&inode->i_lock);
 			security_d_instantiate(new, inode);
 			d_rehash(dentry);
 			d_move(new, dentry);
 			iput(inode);
 		} else {
-			/* already taking dcache_lock, so d_add() by hand */
+			/* already taken inode->i_lock, d_add() by hand */
 			__d_instantiate(dentry, inode);
-			spin_unlock(&dcache_lock);
+			spin_unlock(&inode->i_lock);
 			security_d_instantiate(dentry, inode);
 			d_rehash(dentry);
 		}
@@ -1288,10 +1526,10 @@ struct dentry *d_add_ci(struct dentry *d
 	 * Negative dentry: instantiate it unless the inode is a directory and
 	 * already has a dentry.
 	 */
-	spin_lock(&dcache_lock);
+	spin_lock(&inode->i_lock);
 	if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) {
 		__d_instantiate(found, inode);
-		spin_unlock(&dcache_lock);
+		spin_unlock(&inode->i_lock);
 		security_d_instantiate(found, inode);
 		return found;
 	}
@@ -1301,8 +1539,8 @@ struct dentry *d_add_ci(struct dentry *d
 	 * reference to it, move it in place and use it.
 	 */
 	new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
-	dget_locked(new);
-	spin_unlock(&dcache_lock);
+	dget(new);
+	spin_unlock(&inode->i_lock);
 	security_d_instantiate(found, inode);
 	d_move(new, found);
 	iput(inode);
@@ -1324,7 +1562,7 @@ err_out:
  * is returned. The caller must use dput to free the entry when it has
  * finished using it. %NULL is returned on failure.
  *
- * __d_lookup is dcache_lock free. The hash list is protected using RCU.
+ * __d_lookup is global lock free. The hash list is protected using RCU.
  * Memory barriers are used while updating and doing lockless traversal. 
  * To avoid races with d_move while rename is happening, d_lock is used.
  *
@@ -1336,8 +1574,7 @@ err_out:
  *
  * The dentry unused LRU is not updated even if lookup finds the required dentry
  * in there. It is updated in places such as prune_dcache, shrink_dcache_sb,
- * select_parent and __dget_locked. This laziness saves lookup from dcache_lock
- * acquisition.
+ * select_parent. This laziness saves lookup from LRU lock acquisition.
  *
  * d_lookup() is protected against the concurrent renames in some unrelated
  * directory using the seqlockt_t rename_lock.
@@ -1346,7 +1583,7 @@ err_out:
 struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
 {
 	struct dentry * dentry = NULL;
-	unsigned long seq;
+	unsigned seq;
 
         do {
                 seq = read_seqbegin(&rename_lock);
@@ -1362,7 +1599,8 @@ struct dentry * __d_lookup(struct dentry
 	unsigned int len = name->len;
 	unsigned int hash = name->hash;
 	const unsigned char *str = name->name;
-	struct hlist_head *head = d_hash(parent,hash);
+	struct dcache_hash_bucket *b = d_hash(parent, hash);
+	struct hlist_head *head = &b->head;
 	struct dentry *found = NULL;
 	struct hlist_node *node;
 	struct dentry *dentry;
@@ -1406,7 +1644,7 @@ struct dentry * __d_lookup(struct dentry
 				goto next;
 		}
 
-		atomic_inc(&dentry->d_count);
+		dentry->d_count++;
 		found = dentry;
 		spin_unlock(&dentry->d_lock);
 		break;
@@ -1456,6 +1694,7 @@ out:
  
 int d_validate(struct dentry *dentry, struct dentry *dparent)
 {
+	struct dcache_hash_bucket *b;
 	struct hlist_head *base;
 	struct hlist_node *lhp;
 
@@ -1466,19 +1705,23 @@ int d_validate(struct dentry *dentry, st
 	if (dentry->d_parent != dparent)
 		goto out;
 
-	spin_lock(&dcache_lock);
-	base = d_hash(dparent, dentry->d_name.hash);
-	hlist_for_each(lhp,base) { 
+	spin_lock(&dentry->d_lock);
+	b = d_hash(dparent, dentry->d_name.hash);
+	base = &b->head;
+	spin_lock(&b->lock);
+	hlist_for_each(lhp, base) {
 		/* hlist_for_each_entry_rcu() not required for d_hash list
-		 * as it is parsed under dcache_lock
+		 * as it is parsed under dcache_hash_bucket->lock
 		 */
 		if (dentry == hlist_entry(lhp, struct dentry, d_hash)) {
-			__dget_locked(dentry);
-			spin_unlock(&dcache_lock);
+			spin_unlock(&b->lock);
+			__dget_dlock(dentry);
+			spin_unlock(&dentry->d_lock);
 			return 1;
 		}
 	}
-	spin_unlock(&dcache_lock);
+	spin_unlock(&b->lock);
+	spin_unlock(&dentry->d_lock);
 out:
 	return 0;
 }
@@ -1506,14 +1749,20 @@ out:
  
 void d_delete(struct dentry * dentry)
 {
+	struct inode *inode;
 	int isdir = 0;
 	/*
 	 * Are we the only user?
 	 */
-	spin_lock(&dcache_lock);
+again:
 	spin_lock(&dentry->d_lock);
-	isdir = S_ISDIR(dentry->d_inode->i_mode);
-	if (atomic_read(&dentry->d_count) == 1) {
+	inode = dentry->d_inode;
+	isdir = S_ISDIR(inode->i_mode);
+	if (dentry->d_count == 1) {
+		if (inode && !spin_trylock(&inode->i_lock)) {
+			spin_unlock(&dentry->d_lock);
+			goto again;
+		}
 		dentry_iput(dentry);
 		fsnotify_nameremove(dentry, isdir);
 		return;
@@ -1523,16 +1772,16 @@ void d_delete(struct dentry * dentry)
 		__d_drop(dentry);
 
 	spin_unlock(&dentry->d_lock);
-	spin_unlock(&dcache_lock);
 
 	fsnotify_nameremove(dentry, isdir);
 }
 
-static void __d_rehash(struct dentry * entry, struct hlist_head *list)
+static void __d_rehash(struct dentry * entry, struct dcache_hash_bucket *b)
 {
-
  	entry->d_flags &= ~DCACHE_UNHASHED;
- 	hlist_add_head_rcu(&entry->d_hash, list);
+	spin_lock(&b->lock);
+ 	hlist_add_head_rcu(&entry->d_hash, &b->head);
+	spin_unlock(&b->lock);
 }
 
 static void _d_rehash(struct dentry * entry)
@@ -1549,11 +1798,9 @@ static void _d_rehash(struct dentry * en
  
 void d_rehash(struct dentry * entry)
 {
-	spin_lock(&dcache_lock);
 	spin_lock(&entry->d_lock);
 	_d_rehash(entry);
 	spin_unlock(&entry->d_lock);
-	spin_unlock(&dcache_lock);
 }
 
 /*
@@ -1630,32 +1877,46 @@ static void switch_names(struct dentry *
  */
 static void d_move_locked(struct dentry * dentry, struct dentry * target)
 {
-	struct hlist_head *list;
-
+	struct dcache_hash_bucket *b;
 	if (!dentry->d_inode)
 		printk(KERN_WARNING "VFS: moving negative dcache entry\n");
 
 	write_seqlock(&rename_lock);
-	/*
-	 * XXXX: do we really need to take target->d_lock?
-	 */
-	if (target < dentry) {
-		spin_lock(&target->d_lock);
-		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+
+	if (target->d_parent != dentry->d_parent) {
+		if (target->d_parent < dentry->d_parent) {
+			spin_lock(&target->d_parent->d_lock);
+			spin_lock_nested(&dentry->d_parent->d_lock,
+						DENTRY_D_LOCK_NESTED);
+		} else {
+			spin_lock(&dentry->d_parent->d_lock);
+			spin_lock_nested(&target->d_parent->d_lock,
+						DENTRY_D_LOCK_NESTED);
+		}
 	} else {
-		spin_lock(&dentry->d_lock);
-		spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);
+		spin_lock(&target->d_parent->d_lock);
 	}
 
-	/* Move the dentry to the target hash queue, if on different bucket */
-	if (d_unhashed(dentry))
-		goto already_unhashed;
-
-	hlist_del_rcu(&dentry->d_hash);
+	if (dentry != dentry->d_parent) {
+		if (target < dentry) {
+			spin_lock_nested(&target->d_lock, 2);
+			spin_lock_nested(&dentry->d_lock, 3);
+		} else {
+			spin_lock_nested(&dentry->d_lock, 2);
+			spin_lock_nested(&target->d_lock, 3);
+		}
+	} else {
+		spin_lock_nested(&target->d_lock, 2);
+	}
 
-already_unhashed:
-	list = d_hash(target->d_parent, target->d_name.hash);
-	__d_rehash(dentry, list);
+	/* Move the dentry to the target hash queue, if on different bucket */
+	if (!d_unhashed(dentry)) {
+		b = d_hash(dentry->d_parent, dentry->d_name.hash);
+		spin_lock(&b->lock);
+		hlist_del_rcu(&dentry->d_hash);
+		spin_unlock(&b->lock);
+	}
+	__d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
 
 	/* Unhash the target: dput() will then get rid of it */
 	__d_drop(target);
@@ -1680,6 +1941,10 @@ already_unhashed:
 	}
 
 	list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
+	if (target->d_parent != dentry->d_parent)
+		spin_unlock(&dentry->d_parent->d_lock);
+	if (target->d_parent != target)
+		spin_unlock(&target->d_parent->d_lock);
 	spin_unlock(&target->d_lock);
 	fsnotify_d_move(dentry);
 	spin_unlock(&dentry->d_lock);
@@ -1697,9 +1962,7 @@ already_unhashed:
 
 void d_move(struct dentry * dentry, struct dentry * target)
 {
-	spin_lock(&dcache_lock);
 	d_move_locked(dentry, target);
-	spin_unlock(&dcache_lock);
 }
 
 /**
@@ -1725,16 +1988,16 @@ struct dentry *d_ancestor(struct dentry
  * This helper attempts to cope with remotely renamed directories
  *
  * It assumes that the caller is already holding
- * dentry->d_parent->d_inode->i_mutex and the dcache_lock
+ * dentry->d_parent->d_inode->i_mutex
  *
  * Note: If ever the locking in lock_rename() changes, then please
  * remember to update this too...
  */
 static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
-	__releases(dcache_lock)
 {
 	struct mutex *m1 = NULL, *m2 = NULL;
 	struct dentry *ret;
+	struct inode *inode;
 
 	/* If alias and dentry share a parent, then no extra locks required */
 	if (alias->d_parent == dentry->d_parent)
@@ -1750,14 +2013,15 @@ static struct dentry *__d_unalias(struct
 	if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
 		goto out_err;
 	m1 = &dentry->d_sb->s_vfs_rename_mutex;
-	if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
+	inode = alias->d_parent->d_inode;
+	if (!mutex_trylock(&inode->i_mutex))
 		goto out_err;
-	m2 = &alias->d_parent->d_inode->i_mutex;
+	m2 = &inode->i_mutex;
 out_unalias:
 	d_move_locked(alias, dentry);
 	ret = alias;
 out_err:
-	spin_unlock(&dcache_lock);
+	spin_unlock(&inode->i_lock);
 	if (m2)
 		mutex_unlock(m2);
 	if (m1)
@@ -1779,6 +2043,12 @@ static void __d_materialise_dentry(struc
 	dparent = dentry->d_parent;
 	aparent = anon->d_parent;
 
+	/* XXX: hack */
+	spin_lock(&aparent->d_lock);
+	spin_lock(&dparent->d_lock);
+	spin_lock(&dentry->d_lock);
+	spin_lock(&anon->d_lock);
+
 	dentry->d_parent = (aparent == anon) ? dentry : aparent;
 	list_del(&dentry->d_u.d_child);
 	if (!IS_ROOT(dentry))
@@ -1793,6 +2063,11 @@ static void __d_materialise_dentry(struc
 	else
 		INIT_LIST_HEAD(&anon->d_u.d_child);
 
+	spin_unlock(&anon->d_lock);
+	spin_unlock(&dentry->d_lock);
+	spin_unlock(&dparent->d_lock);
+	spin_unlock(&aparent->d_lock);
+
 	anon->d_flags &= ~DCACHE_DISCONNECTED;
 }
 
@@ -1810,14 +2085,15 @@ struct dentry *d_materialise_unique(stru
 
 	BUG_ON(!d_unhashed(dentry));
 
-	spin_lock(&dcache_lock);
-
 	if (!inode) {
 		actual = dentry;
 		__d_instantiate(dentry, NULL);
-		goto found_lock;
+		d_rehash(actual);
+		goto out_nolock;
 	}
 
+	spin_lock(&inode->i_lock);
+
 	if (S_ISDIR(inode->i_mode)) {
 		struct dentry *alias;
 
@@ -1845,15 +2121,14 @@ struct dentry *d_materialise_unique(stru
 	actual = __d_instantiate_unique(dentry, inode);
 	if (!actual)
 		actual = dentry;
-	else if (unlikely(!d_unhashed(actual)))
-		goto shouldnt_be_hashed;
+	else
+		BUG_ON(!d_unhashed(actual));
 
-found_lock:
 	spin_lock(&actual->d_lock);
 found:
 	_d_rehash(actual);
 	spin_unlock(&actual->d_lock);
-	spin_unlock(&dcache_lock);
+	spin_unlock(&inode->i_lock);
 out_nolock:
 	if (actual == dentry) {
 		security_d_instantiate(dentry, inode);
@@ -1862,10 +2137,6 @@ out_nolock:
 
 	iput(inode);
 	return actual;
-
-shouldnt_be_hashed:
-	spin_unlock(&dcache_lock);
-	BUG();
 }
 
 static int prepend(char **buffer, int *buflen, const char *str, int namelen)
@@ -1896,7 +2167,7 @@ static int prepend_name(char **buffer, i
  * Returns a pointer into the buffer or an error code if the
  * path was too long.
  *
- * "buflen" should be positive. Caller holds the dcache_lock.
+ * "buflen" should be positive. Caller holds the path->dentry->d_lock.
  *
  * If path is not reachable from the supplied root, then the value of
  * root is changed (without modifying refcounts).
@@ -1904,13 +2175,22 @@ static int prepend_name(char **buffer, i
 char *__d_path(const struct path *path, struct path *root,
 	       char *buffer, int buflen)
 {
-	struct dentry *dentry = path->dentry;
-	struct vfsmount *vfsmnt = path->mnt;
-	char *end = buffer + buflen;
+	struct dentry *dentry;
+	struct vfsmount *vfsmnt;
+	char *end;
 	char *retval;
+	unsigned seq;
 
-	spin_lock(&vfsmount_lock);
+rename_retry:
+	dentry = path->dentry;
+	vfsmnt = path->mnt;
+	end = buffer + buflen;
 	prepend(&end, &buflen, "\0", 1);
+
+	seq = read_seqbegin(&rename_lock);
+	rcu_read_lock();
+	spin_lock(&dentry->d_lock);
+unlinked:
 	if (d_unlinked(dentry) &&
 		(prepend(&end, &buflen, " (deleted)", 10) != 0))
 			goto Elong;
@@ -1922,7 +2202,7 @@ char *__d_path(const struct path *path,
 	*retval = '/';
 
 	for (;;) {
-		struct dentry * parent;
+		struct dentry *parent;
 
 		if (dentry == root->dentry && vfsmnt == root->mnt)
 			break;
@@ -1931,8 +2211,10 @@ char *__d_path(const struct path *path,
 			if (vfsmnt->mnt_parent == vfsmnt) {
 				goto global_root;
 			}
+			spin_unlock(&dentry->d_lock);
 			dentry = vfsmnt->mnt_mountpoint;
 			vfsmnt = vfsmnt->mnt_parent;
+			spin_lock(&dentry->d_lock); /* can't get unlinked because locked vfsmount */
 			continue;
 		}
 		parent = dentry->d_parent;
@@ -1941,11 +2223,18 @@ char *__d_path(const struct path *path,
 		    (prepend(&end, &buflen, "/", 1) != 0))
 			goto Elong;
 		retval = end;
+		spin_unlock(&dentry->d_lock);
 		dentry = parent;
+		spin_lock(&dentry->d_lock);
+		if (d_unlinked(dentry))
+			goto unlinked;
 	}
 
 out:
-	spin_unlock(&vfsmount_lock);
+	spin_unlock(&dentry->d_lock);
+	rcu_read_unlock();
+	if (read_seqretry(&rename_lock, seq))
+		goto rename_retry;
 	return retval;
 
 global_root:
@@ -1954,6 +2243,7 @@ global_root:
 		goto Elong;
 	root->mnt = vfsmnt;
 	root->dentry = dentry;
+	/* XXX: this could wrongly modify root if we rename retry */
 	goto out;
 
 Elong:
@@ -1997,10 +2287,12 @@ char *d_path(const struct path *path, ch
 	root = current->fs->root;
 	path_get(&root);
 	read_unlock(&current->fs->lock);
-	spin_lock(&dcache_lock);
+
+	vfsmount_read_lock();
 	tmp = root;
 	res = __d_path(path, &tmp, buf, buflen);
-	spin_unlock(&dcache_lock);
+	vfsmount_read_unlock();
+
 	path_put(&root);
 	return res;
 }
@@ -2031,11 +2323,19 @@ char *dynamic_dname(struct dentry *dentr
  */
 char *dentry_path(struct dentry *dentry, char *buf, int buflen)
 {
-	char *end = buf + buflen;
+	char *end;
 	char *retval;
+	unsigned seq;
 
-	spin_lock(&dcache_lock);
+rename_retry:
+	end = buf + buflen;
 	prepend(&end, &buflen, "\0", 1);
+
+	seq = read_seqbegin(&rename_lock);
+	vfsmount_read_lock();
+	rcu_read_lock(); /* protect parent */
+	spin_lock(&dentry->d_lock);
+unlinked:
 	if (d_unlinked(dentry) &&
 		(prepend(&end, &buflen, "//deleted", 9) != 0))
 			goto Elong;
@@ -2054,13 +2354,22 @@ char *dentry_path(struct dentry *dentry,
 			goto Elong;
 
 		retval = end;
+		spin_unlock(&dentry->d_lock);
 		dentry = parent;
+		spin_lock(&dentry->d_lock);
+		if (d_unlinked(dentry))
+			goto unlinked;
 	}
-	spin_unlock(&dcache_lock);
+out:
+	spin_unlock(&dentry->d_lock);
+	rcu_read_unlock();
+	vfsmount_read_unlock();
+	if (read_seqretry(&rename_lock, seq))
+		goto rename_retry;
 	return retval;
 Elong:
-	spin_unlock(&dcache_lock);
-	return ERR_PTR(-ENAMETOOLONG);
+	retval = ERR_PTR(-ENAMETOOLONG);
+	goto out;
 }
 
 /*
@@ -2098,14 +2407,17 @@ SYSCALL_DEFINE2(getcwd, char __user *, b
 	read_unlock(&current->fs->lock);
 
 	error = -ENOENT;
-	spin_lock(&dcache_lock);
+	vfsmount_read_lock();
+	spin_lock(&pwd.dentry->d_lock);
 	if (!d_unlinked(pwd.dentry)) {
 		unsigned long len;
 		struct path tmp = root;
 		char * cwd;
 
+		spin_unlock(&pwd.dentry->d_lock);
+		/* XXX: race here, have to close (eg. return unlinked from __d_path) */
 		cwd = __d_path(&pwd, &tmp, page, PAGE_SIZE);
-		spin_unlock(&dcache_lock);
+		vfsmount_read_unlock();
 
 		error = PTR_ERR(cwd);
 		if (IS_ERR(cwd))
@@ -2118,8 +2430,10 @@ SYSCALL_DEFINE2(getcwd, char __user *, b
 			if (copy_to_user(buf, cwd, len))
 				error = -EFAULT;
 		}
-	} else
-		spin_unlock(&dcache_lock);
+	} else {
+		spin_unlock(&pwd.dentry->d_lock);
+		vfsmount_read_unlock();
+	}
 
 out:
 	path_put(&pwd);
@@ -2147,35 +2461,39 @@ out:
 int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
 {
 	int result;
-	unsigned long seq;
+	unsigned seq;
 
 	if (new_dentry == old_dentry)
 		return 1;
 
-	/*
-	 * Need rcu_readlock to protect against the d_parent trashing
-	 * due to d_move
-	 */
-	rcu_read_lock();
 	do {
 		/* for restarting inner loop in case of seq retry */
 		seq = read_seqbegin(&rename_lock);
+		/*
+		 * Need rcu_readlock to protect against the d_parent trashing
+		 * due to d_move
+		 */
+		rcu_read_lock();
 		if (d_ancestor(old_dentry, new_dentry))
 			result = 1;
 		else
 			result = 0;
+		rcu_read_unlock();
 	} while (read_seqretry(&rename_lock, seq));
-	rcu_read_unlock();
 
 	return result;
 }
 
 void d_genocide(struct dentry *root)
 {
-	struct dentry *this_parent = root;
+	struct dentry *this_parent;
 	struct list_head *next;
+	unsigned seq;
 
-	spin_lock(&dcache_lock);
+rename_retry:
+	this_parent = root;
+	seq = read_seqbegin(&rename_lock);
+	spin_lock(&this_parent->d_lock);
 repeat:
 	next = this_parent->d_subdirs.next;
 resume:
@@ -2183,21 +2501,49 @@ resume:
 		struct list_head *tmp = next;
 		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
 		next = tmp->next;
-		if (d_unhashed(dentry)||!dentry->d_inode)
+
+		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+		if (d_unhashed(dentry) || !dentry->d_inode) {
+			spin_unlock(&dentry->d_lock);
 			continue;
+		}
 		if (!list_empty(&dentry->d_subdirs)) {
+			spin_unlock(&this_parent->d_lock);
+			spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
 			this_parent = dentry;
+			spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
 			goto repeat;
 		}
-		atomic_dec(&dentry->d_count);
+		dentry->d_count--;
+		spin_unlock(&dentry->d_lock);
 	}
 	if (this_parent != root) {
-		next = this_parent->d_u.d_child.next;
-		atomic_dec(&this_parent->d_count);
-		this_parent = this_parent->d_parent;
+		struct dentry *tmp;
+		struct dentry *child;
+
+		tmp = this_parent->d_parent;
+		this_parent->d_count--;
+		rcu_read_lock();
+		spin_unlock(&this_parent->d_lock);
+		child = this_parent;
+		this_parent = tmp;
+		spin_lock(&this_parent->d_lock);
+		/* might go back up the wrong parent if we have had a rename
+		 * or deletion */
+		if (this_parent != child->d_parent ||
+				// d_unlinked(this_parent) || XXX
+				read_seqretry(&rename_lock, seq)) {
+			spin_unlock(&this_parent->d_lock);
+			rcu_read_unlock();
+			goto rename_retry;
+		}
+		rcu_read_unlock();
+		next = child->d_u.d_child.next;
 		goto resume;
 	}
-	spin_unlock(&dcache_lock);
+	spin_unlock(&this_parent->d_lock);
+	if (read_seqretry(&rename_lock, seq))
+		goto rename_retry;
 }
 
 /**
@@ -2250,7 +2596,7 @@ static void __init dcache_init_early(voi
 
 	dentry_hashtable =
 		alloc_large_system_hash("Dentry cache",
-					sizeof(struct hlist_head),
+					sizeof(struct dcache_hash_bucket),
 					dhash_entries,
 					13,
 					HASH_EARLY,
@@ -2258,8 +2604,10 @@ static void __init dcache_init_early(voi
 					&d_hash_mask,
 					0);
 
-	for (loop = 0; loop < (1 << d_hash_shift); loop++)
-		INIT_HLIST_HEAD(&dentry_hashtable[loop]);
+	for (loop = 0; loop < (1 << d_hash_shift); loop++) {
+		spin_lock_init(&dentry_hashtable[loop].lock);
+		INIT_HLIST_HEAD(&dentry_hashtable[loop].head);
+	}
 }
 
 static void __init dcache_init(void)
@@ -2282,7 +2630,7 @@ static void __init dcache_init(void)
 
 	dentry_hashtable =
 		alloc_large_system_hash("Dentry cache",
-					sizeof(struct hlist_head),
+					sizeof(struct dcache_hash_bucket),
 					dhash_entries,
 					13,
 					0,
@@ -2290,8 +2638,10 @@ static void __init dcache_init(void)
 					&d_hash_mask,
 					0);
 
-	for (loop = 0; loop < (1 << d_hash_shift); loop++)
-		INIT_HLIST_HEAD(&dentry_hashtable[loop]);
+	for (loop = 0; loop < (1 << d_hash_shift); loop++) {
+		spin_lock_init(&dentry_hashtable[loop].lock);
+		INIT_HLIST_HEAD(&dentry_hashtable[loop].head);
+	}
 }
 
 /* SLAB cache for __getname() consumers */
@@ -2341,7 +2691,6 @@ EXPORT_SYMBOL(d_rehash);
 EXPORT_SYMBOL(d_splice_alias);
 EXPORT_SYMBOL(d_add_ci);
 EXPORT_SYMBOL(d_validate);
-EXPORT_SYMBOL(dget_locked);
 EXPORT_SYMBOL(dput);
 EXPORT_SYMBOL(find_inode_number);
 EXPORT_SYMBOL(have_submounts);
Index: linux-2.6/fs/namei.c
===================================================================
--- linux-2.6.orig/fs/namei.c
+++ linux-2.6/fs/namei.c
@@ -685,15 +685,16 @@ int follow_up(struct path *path)
 {
 	struct vfsmount *parent;
 	struct dentry *mountpoint;
-	spin_lock(&vfsmount_lock);
+
+	vfsmount_read_lock();
 	parent = path->mnt->mnt_parent;
 	if (parent == path->mnt) {
-		spin_unlock(&vfsmount_lock);
+		vfsmount_read_unlock();
 		return 0;
 	}
 	mntget(parent);
 	mountpoint = dget(path->mnt->mnt_mountpoint);
-	spin_unlock(&vfsmount_lock);
+	vfsmount_read_unlock();
 	dput(path->dentry);
 	path->dentry = mountpoint;
 	mntput(path->mnt);
@@ -764,23 +765,20 @@ static __always_inline void follow_dotdo
 		    nd->path.mnt == nd->root.mnt) {
 			break;
 		}
-		spin_lock(&dcache_lock);
 		if (nd->path.dentry != nd->path.mnt->mnt_root) {
 			nd->path.dentry = dget(nd->path.dentry->d_parent);
-			spin_unlock(&dcache_lock);
 			dput(old);
 			break;
 		}
-		spin_unlock(&dcache_lock);
-		spin_lock(&vfsmount_lock);
+		vfsmount_read_lock();
 		parent = nd->path.mnt->mnt_parent;
 		if (parent == nd->path.mnt) {
-			spin_unlock(&vfsmount_lock);
+			vfsmount_read_unlock();
 			break;
 		}
 		mntget(parent);
 		nd->path.dentry = dget(nd->path.mnt->mnt_mountpoint);
-		spin_unlock(&vfsmount_lock);
+		vfsmount_read_unlock();
 		dput(old);
 		mntput(nd->path.mnt);
 		nd->path.mnt = parent;
@@ -2168,12 +2166,10 @@ void dentry_unhash(struct dentry *dentry
 {
 	dget(dentry);
 	shrink_dcache_parent(dentry);
-	spin_lock(&dcache_lock);
 	spin_lock(&dentry->d_lock);
-	if (atomic_read(&dentry->d_count) == 2)
+	if (dentry->d_count == 2)
 		__d_drop(dentry);
 	spin_unlock(&dentry->d_lock);
-	spin_unlock(&dcache_lock);
 }
 
 int vfs_rmdir(struct inode *dir, struct dentry *dentry)
@@ -2326,8 +2322,11 @@ static long do_unlinkat(int dfd, const c
 		if (nd.last.name[nd.last.len])
 			goto slashes;
 		inode = dentry->d_inode;
-		if (inode)
-			atomic_inc(&inode->i_count);
+		if (inode) {
+			spin_lock(&inode->i_lock);
+			inode->i_count++;
+			spin_unlock(&inode->i_lock);
+		}
 		error = mnt_want_write(nd.path.mnt);
 		if (error)
 			goto exit2;
Index: linux-2.6/fs/namespace.c
===================================================================
--- linux-2.6.orig/fs/namespace.c
+++ linux-2.6/fs/namespace.c
@@ -11,6 +11,8 @@
 #include <linux/syscalls.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
 #include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -37,12 +39,16 @@
 #define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head))
 #define HASH_SIZE (1UL << HASH_SHIFT)
 
-/* spinlock for vfsmount related operations, inplace of dcache_lock */
-__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
+/*
+ * vfsmount "brlock" style spinlock for vfsmount related operations, use
+ * vfsmount_read_lock/vfsmount_write_lock functions.
+ */
+static DEFINE_PER_CPU(spinlock_t, vfsmount_lock);
 
 static int event;
 static DEFINE_IDA(mnt_id_ida);
 static DEFINE_IDA(mnt_group_ida);
+static DEFINE_SPINLOCK(mnt_id_lock);
 static int mnt_id_start = 0;
 static int mnt_group_start = 1;
 
@@ -54,6 +60,49 @@ static struct rw_semaphore namespace_sem
 struct kobject *fs_kobj;
 EXPORT_SYMBOL_GPL(fs_kobj);
 
+void vfsmount_read_lock(void)
+{
+	spinlock_t *lock;
+
+	lock = &get_cpu_var(vfsmount_lock);
+	spin_lock(lock);
+}
+
+void vfsmount_read_unlock(void)
+{
+	spinlock_t *lock;
+
+	lock = &__get_cpu_var(vfsmount_lock);
+	spin_unlock(lock);
+	put_cpu_var(vfsmount_lock);
+}
+
+void vfsmount_write_lock(void)
+{
+	int i;
+	int nr = 0;
+
+	for_each_possible_cpu(i) {
+		spinlock_t *lock;
+
+		lock = &per_cpu(vfsmount_lock, i);
+		spin_lock_nested(lock, nr);
+		nr++;
+	}
+}
+
+void vfsmount_write_unlock(void)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		spinlock_t *lock;
+
+		lock = &per_cpu(vfsmount_lock, i);
+		spin_unlock(lock);
+	}
+}
+
 static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
 {
 	unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
@@ -64,18 +113,21 @@ static inline unsigned long hash(struct
 
 #define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)
 
-/* allocation is serialized by namespace_sem */
+/*
+ * allocation is serialized by namespace_sem, but we need the spinlock to
+ * serialise with freeing.
+ */
 static int mnt_alloc_id(struct vfsmount *mnt)
 {
 	int res;
 
 retry:
 	ida_pre_get(&mnt_id_ida, GFP_KERNEL);
-	spin_lock(&vfsmount_lock);
+	spin_lock(&mnt_id_lock);
 	res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
 	if (!res)
 		mnt_id_start = mnt->mnt_id + 1;
-	spin_unlock(&vfsmount_lock);
+	spin_unlock(&mnt_id_lock);
 	if (res == -EAGAIN)
 		goto retry;
 
@@ -85,11 +137,11 @@ retry:
 static void mnt_free_id(struct vfsmount *mnt)
 {
 	int id = mnt->mnt_id;
-	spin_lock(&vfsmount_lock);
+	spin_lock(&mnt_id_lock);
 	ida_remove(&mnt_id_ida, id);
 	if (mnt_id_start > id)
 		mnt_id_start = id;
-	spin_unlock(&vfsmount_lock);
+	spin_unlock(&mnt_id_lock);
 }
 
 /*
@@ -125,6 +177,49 @@ void mnt_release_group_id(struct vfsmoun
 	mnt->mnt_group_id = 0;
 }
 
+static inline void add_mnt_count(struct vfsmount *mnt, int n)
+{
+#ifdef CONFIG_SMP
+	(*per_cpu_ptr(mnt->mnt_count, smp_processor_id())) += n;
+#else
+	mnt->mnt_count += n;
+#endif
+}
+
+static inline void inc_mnt_count(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+	(*per_cpu_ptr(mnt->mnt_count, smp_processor_id()))++;
+#else
+	mnt->mnt_count++;
+#endif
+}
+
+static inline void dec_mnt_count(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+	(*per_cpu_ptr(mnt->mnt_count, smp_processor_id()))--;
+#else
+	mnt->mnt_count--;
+#endif
+}
+
+unsigned int count_mnt_count(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+	unsigned int count = 0;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		count += *per_cpu_ptr(mnt->mnt_count, cpu);
+	}
+
+	return count;
+#else
+	return mnt->mnt_count;
+#endif
+}
+
 struct vfsmount *alloc_vfsmnt(const char *name)
 {
 	struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
@@ -141,7 +236,13 @@ struct vfsmount *alloc_vfsmnt(const char
 				goto out_free_id;
 		}
 
-		atomic_set(&mnt->mnt_count, 1);
+#ifdef CONFIG_SMP
+		mnt->mnt_count = alloc_percpu(int);
+		if (!mnt->mnt_count)
+			goto out_free_devname;
+#else
+		mnt->mnt_count = 0;
+#endif
 		INIT_LIST_HEAD(&mnt->mnt_hash);
 		INIT_LIST_HEAD(&mnt->mnt_child);
 		INIT_LIST_HEAD(&mnt->mnt_mounts);
@@ -153,14 +254,19 @@ struct vfsmount *alloc_vfsmnt(const char
 #ifdef CONFIG_SMP
 		mnt->mnt_writers = alloc_percpu(int);
 		if (!mnt->mnt_writers)
-			goto out_free_devname;
+			goto out_free_mntcount;
 #else
 		mnt->mnt_writers = 0;
 #endif
+		preempt_disable();
+		inc_mnt_count(mnt);
+		preempt_enable();
 	}
 	return mnt;
 
 #ifdef CONFIG_SMP
+out_free_mntcount:
+	free_percpu(mnt->mnt_count);
 out_free_devname:
 	kfree(mnt->mnt_devname);
 #endif
@@ -344,7 +450,7 @@ static int mnt_make_readonly(struct vfsm
 {
 	int ret = 0;
 
-	spin_lock(&vfsmount_lock);
+	vfsmount_write_lock();
 	mnt->mnt_flags |= MNT_WRITE_HOLD;
 	/*
 	 * After storing MNT_WRITE_HOLD, we'll read the counters. This store
@@ -378,15 +484,15 @@ static int mnt_make_readonly(struct vfsm
 	 */
 	smp_wmb();
 	mnt->mnt_flags &= ~MNT_WRITE_HOLD;
-	spin_unlock(&vfsmount_lock);
+	vfsmount_write_unlock();
 	return ret;
 }
 
 static void __mnt_unmake_readonly(struct vfsmount *mnt)
 {
-	spin_lock(&vfsmount_lock);
+	vfsmount_write_lock();
 	mnt->mnt_flags &= ~MNT_READONLY;
-	spin_unlock(&vfsmount_lock);
+	vfsmount_write_unlock();
 }
 
 void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
@@ -439,10 +545,11 @@ struct vfsmount *__lookup_mnt(struct vfs
 struct vfsmount *lookup_mnt(struct path *path)
 {
 	struct vfsmount *child_mnt;
-	spin_lock(&vfsmount_lock);
+
+	vfsmount_read_lock();
 	if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1)))
 		mntget(child_mnt);
-	spin_unlock(&vfsmount_lock);
+	vfsmount_read_unlock();
 	return child_mnt;
 }
 
@@ -473,9 +580,11 @@ static void detach_mnt(struct vfsmount *
 	old_path->mnt = mnt->mnt_parent;
 	mnt->mnt_parent = mnt;
 	mnt->mnt_mountpoint = mnt->mnt_root;
-	list_del_init(&mnt->mnt_child);
 	list_del_init(&mnt->mnt_hash);
+	list_del_init(&mnt->mnt_child);
 	old_path->dentry->d_mounted--;
+	WARN_ON(mnt->mnt_mounted != 1);
+	mnt->mnt_mounted--;
 }
 
 void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
@@ -492,6 +601,8 @@ static void attach_mnt(struct vfsmount *
 	list_add_tail(&mnt->mnt_hash, mount_hashtable +
 			hash(path->mnt, path->dentry));
 	list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts);
+	WARN_ON(mnt->mnt_mounted != 0);
+	mnt->mnt_mounted++;
 }
 
 /*
@@ -514,6 +625,8 @@ static void commit_tree(struct vfsmount
 	list_add_tail(&mnt->mnt_hash, mount_hashtable +
 				hash(parent, mnt->mnt_mountpoint));
 	list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+	WARN_ON(mnt->mnt_mounted != 0);
+	mnt->mnt_mounted++;
 	touch_mnt_namespace(n);
 }
 
@@ -617,43 +730,79 @@ static inline void __mntput(struct vfsmo
 
 void mntput_no_expire(struct vfsmount *mnt)
 {
-repeat:
-	if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) {
-		if (likely(!mnt->mnt_pinned)) {
-			spin_unlock(&vfsmount_lock);
-			__mntput(mnt);
-			return;
+	if (likely(mnt->mnt_mounted)) {
+		vfsmount_read_lock();
+		if (unlikely(!mnt->mnt_mounted)) {
+			vfsmount_read_unlock();
+			goto repeat;
 		}
-		atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
-		mnt->mnt_pinned = 0;
-		spin_unlock(&vfsmount_lock);
-		acct_auto_close_mnt(mnt);
-		security_sb_umount_close(mnt);
-		goto repeat;
+		dec_mnt_count(mnt);
+		vfsmount_read_unlock();
+
+		return;
 	}
-}
 
+repeat:
+	vfsmount_write_lock();
+	BUG_ON(mnt->mnt_mounted);
+	dec_mnt_count(mnt);
+	if (count_mnt_count(mnt)) {
+		vfsmount_write_unlock();
+		return;
+	}
+	if (likely(!mnt->mnt_pinned)) {
+		vfsmount_write_unlock();
+		__mntput(mnt);
+		return;
+	}
+	add_mnt_count(mnt, mnt->mnt_pinned + 1);
+	mnt->mnt_pinned = 0;
+	vfsmount_write_unlock();
+	acct_auto_close_mnt(mnt);
+	security_sb_umount_close(mnt);
+	goto repeat;
+}
 EXPORT_SYMBOL(mntput_no_expire);
 
+void mntput(struct vfsmount *mnt)
+{
+	if (mnt) {
+		/* avoid cacheline pingpong */
+		if (unlikely(mnt->mnt_expiry_mark))
+			mnt->mnt_expiry_mark = 0;
+		mntput_no_expire(mnt);
+	}
+}
+EXPORT_SYMBOL(mntput);
+
+struct vfsmount *mntget(struct vfsmount *mnt)
+{
+	if (mnt) {
+		preempt_disable();
+		inc_mnt_count(mnt);
+		preempt_enable();
+	}
+	return mnt;
+}
+EXPORT_SYMBOL(mntget);
+
 void mnt_pin(struct vfsmount *mnt)
 {
-	spin_lock(&vfsmount_lock);
+	vfsmount_write_lock();
 	mnt->mnt_pinned++;
-	spin_unlock(&vfsmount_lock);
+	vfsmount_write_unlock();
 }
-
 EXPORT_SYMBOL(mnt_pin);
 
 void mnt_unpin(struct vfsmount *mnt)
 {
-	spin_lock(&vfsmount_lock);
+	vfsmount_write_lock();
 	if (mnt->mnt_pinned) {
-		atomic_inc(&mnt->mnt_count);
+		inc_mnt_count(mnt);
 		mnt->mnt_pinned--;
 	}
-	spin_unlock(&vfsmount_lock);
+	vfsmount_write_unlock();
 }
-
 EXPORT_SYMBOL(mnt_unpin);
 
 static inline void mangle(struct seq_file *m, const char *s)
@@ -934,12 +1083,13 @@ int may_umount_tree(struct vfsmount *mnt
 	int minimum_refs = 0;
 	struct vfsmount *p;
 
-	spin_lock(&vfsmount_lock);
+	/* write lock needed for count_mnt_count */
+	vfsmount_write_lock();
 	for (p = mnt; p; p = next_mnt(p, mnt)) {
-		actual_refs += atomic_read(&p->mnt_count);
+		actual_refs += count_mnt_count(p);
 		minimum_refs += 2;
 	}
-	spin_unlock(&vfsmount_lock);
+	vfsmount_write_unlock();
 
 	if (actual_refs > minimum_refs)
 		return 0;
@@ -965,10 +1115,12 @@ EXPORT_SYMBOL(may_umount_tree);
 int may_umount(struct vfsmount *mnt)
 {
 	int ret = 1;
-	spin_lock(&vfsmount_lock);
+
+	vfsmount_write_lock();
 	if (propagate_mount_busy(mnt, 2))
 		ret = 0;
-	spin_unlock(&vfsmount_lock);
+	vfsmount_write_unlock();
+
 	return ret;
 }
 
@@ -983,13 +1135,14 @@ void release_mounts(struct list_head *he
 		if (mnt->mnt_parent != mnt) {
 			struct dentry *dentry;
 			struct vfsmount *m;
-			spin_lock(&vfsmount_lock);
+
+			vfsmount_write_lock();
 			dentry = mnt->mnt_mountpoint;
 			m = mnt->mnt_parent;
 			mnt->mnt_mountpoint = mnt->mnt_root;
 			mnt->mnt_parent = mnt;
 			m->mnt_ghosts--;
-			spin_unlock(&vfsmount_lock);
+			vfsmount_write_unlock();
 			dput(dentry);
 			mntput(m);
 		}
@@ -1013,6 +1166,8 @@ void umount_tree(struct vfsmount *mnt, i
 		__touch_mnt_namespace(p->mnt_ns);
 		p->mnt_ns = NULL;
 		list_del_init(&p->mnt_child);
+		WARN_ON(p->mnt_mounted != 1);
+		p->mnt_mounted--;
 		if (p->mnt_parent != p) {
 			p->mnt_parent->mnt_ghosts++;
 			p->mnt_mountpoint->d_mounted--;
@@ -1044,8 +1199,16 @@ static int do_umount(struct vfsmount *mn
 		    flags & (MNT_FORCE | MNT_DETACH))
 			return -EINVAL;
 
-		if (atomic_read(&mnt->mnt_count) != 2)
+		/*
+		 * probably don't strictly need the lock here if we examined
+		 * all race cases, but it's a slowpath.
+		 */
+		vfsmount_write_lock();
+		if (count_mnt_count(mnt) != 2) {
+			vfsmount_write_lock();
 			return -EBUSY;
+		}
+		vfsmount_write_unlock();
 
 		if (!xchg(&mnt->mnt_expiry_mark, 1))
 			return -EAGAIN;
@@ -1087,7 +1250,7 @@ static int do_umount(struct vfsmount *mn
 	}
 
 	down_write(&namespace_sem);
-	spin_lock(&vfsmount_lock);
+	vfsmount_write_lock();
 	event++;
 
 	if (!(flags & MNT_DETACH))
@@ -1099,7 +1262,7 @@ static int do_umount(struct vfsmount *mn
 			umount_tree(mnt, 1, &umount_list);
 		retval = 0;
 	}
-	spin_unlock(&vfsmount_lock);
+	vfsmount_write_unlock();
 	if (retval)
 		security_sb_umount_busy(mnt);
 	up_write(&namespace_sem);
@@ -1206,19 +1369,19 @@ struct vfsmount *copy_tree(struct vfsmou
 			q = clone_mnt(p, p->mnt_root, flag);
 			if (!q)
 				goto Enomem;
-			spin_lock(&vfsmount_lock);
+			vfsmount_write_lock();
 			list_add_tail(&q->mnt_list, &res->mnt_list);
 			attach_mnt(q, &path);
-			spin_unlock(&vfsmount_lock);
+			vfsmount_write_unlock();
 		}
 	}
 	return res;
 Enomem:
 	if (res) {
 		LIST_HEAD(umount_list);
-		spin_lock(&vfsmount_lock);
+		vfsmount_write_lock();
 		umount_tree(res, 0, &umount_list);
-		spin_unlock(&vfsmount_lock);
+		vfsmount_write_unlock();
 		release_mounts(&umount_list);
 	}
 	return NULL;
@@ -1237,9 +1400,9 @@ void drop_collected_mounts(struct vfsmou
 {
 	LIST_HEAD(umount_list);
 	down_write(&namespace_sem);
-	spin_lock(&vfsmount_lock);
+	vfsmount_write_lock();
 	umount_tree(mnt, 0, &umount_list);
-	spin_unlock(&vfsmount_lock);
+	vfsmount_write_unlock();
 	up_write(&namespace_sem);
 	release_mounts(&umount_list);
 }
@@ -1357,7 +1520,7 @@ static int attach_recursive_mnt(struct v
 			set_mnt_shared(p);
 	}
 
-	spin_lock(&vfsmount_lock);
+	vfsmount_write_lock();
 	if (parent_path) {
 		detach_mnt(source_mnt, parent_path);
 		attach_mnt(source_mnt, path);
@@ -1371,7 +1534,8 @@ static int attach_recursive_mnt(struct v
 		list_del_init(&child->mnt_hash);
 		commit_tree(child);
 	}
-	spin_unlock(&vfsmount_lock);
+	vfsmount_write_unlock();
+
 	return 0;
 
  out_cleanup_ids:
@@ -1433,10 +1597,10 @@ static int do_change_type(struct path *p
 			goto out_unlock;
 	}
 
-	spin_lock(&vfsmount_lock);
+	vfsmount_write_lock();
 	for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
 		change_mnt_propagation(m, type);
-	spin_unlock(&vfsmount_lock);
+	vfsmount_write_unlock();
 
  out_unlock:
 	up_write(&namespace_sem);
@@ -1480,9 +1644,10 @@ static int do_loopback(struct path *path
 	err = graft_tree(mnt, path);
 	if (err) {
 		LIST_HEAD(umount_list);
-		spin_lock(&vfsmount_lock);
+
+		vfsmount_write_lock();
 		umount_tree(mnt, 0, &umount_list);
-		spin_unlock(&vfsmount_lock);
+		vfsmount_write_unlock();
 		release_mounts(&umount_list);
 	}
 
@@ -1540,9 +1705,9 @@ static int do_remount(struct path *path,
 	if (!err) {
 		security_sb_post_remount(path->mnt, flags, data);
 
-		spin_lock(&vfsmount_lock);
+		vfsmount_write_lock();
 		touch_mnt_namespace(path->mnt->mnt_ns);
-		spin_unlock(&vfsmount_lock);
+		vfsmount_write_unlock();
 	}
 	return err;
 }
@@ -1717,7 +1882,7 @@ void mark_mounts_for_expiry(struct list_
 		return;
 
 	down_write(&namespace_sem);
-	spin_lock(&vfsmount_lock);
+	vfsmount_write_lock();
 
 	/* extract from the expiration list every vfsmount that matches the
 	 * following criteria:
@@ -1736,7 +1901,7 @@ void mark_mounts_for_expiry(struct list_
 		touch_mnt_namespace(mnt->mnt_ns);
 		umount_tree(mnt, 1, &umounts);
 	}
-	spin_unlock(&vfsmount_lock);
+	vfsmount_write_unlock();
 	up_write(&namespace_sem);
 
 	release_mounts(&umounts);
@@ -2011,9 +2176,9 @@ static struct mnt_namespace *dup_mnt_ns(
 		kfree(new_ns);
 		return ERR_PTR(-ENOMEM);
 	}
-	spin_lock(&vfsmount_lock);
+	vfsmount_write_lock();
 	list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
-	spin_unlock(&vfsmount_lock);
+	vfsmount_write_unlock();
 
 	/*
 	 * Second pass: switch the tsk->fs->* elements and mark new vfsmounts
@@ -2210,7 +2375,7 @@ SYSCALL_DEFINE2(pivot_root, const char _
 		goto out2; /* not attached */
 	/* make sure we can reach put_old from new_root */
 	tmp = old.mnt;
-	spin_lock(&vfsmount_lock);
+	vfsmount_write_lock();
 	if (tmp != new.mnt) {
 		for (;;) {
 			if (tmp->mnt_parent == tmp)
@@ -2230,7 +2395,7 @@ SYSCALL_DEFINE2(pivot_root, const char _
 	/* mount new_root on / */
 	attach_mnt(new.mnt, &root_parent);
 	touch_mnt_namespace(current->nsproxy->mnt_ns);
-	spin_unlock(&vfsmount_lock);
+	vfsmount_write_unlock();
 	chroot_fs_refs(&root, &new);
 	security_sb_post_pivotroot(&root, &new);
 	error = 0;
@@ -2246,7 +2411,7 @@ out1:
 out0:
 	return error;
 out3:
-	spin_unlock(&vfsmount_lock);
+	vfsmount_write_unlock();
 	goto out2;
 }
 
@@ -2276,6 +2441,7 @@ static void __init init_mount_tree(void)
 void __init mnt_init(void)
 {
 	unsigned u;
+	int i;
 	int err;
 
 	init_rwsem(&namespace_sem);
@@ -2293,6 +2459,9 @@ void __init mnt_init(void)
 	for (u = 0; u < HASH_SIZE; u++)
 		INIT_LIST_HEAD(&mount_hashtable[u]);
 
+	for_each_possible_cpu(i)
+		spin_lock_init(&per_cpu(vfsmount_lock, i));
+
 	err = sysfs_init();
 	if (err)
 		printk(KERN_WARNING "%s: sysfs_init error: %d\n",
@@ -2308,16 +2477,22 @@ void put_mnt_ns(struct mnt_namespace *ns
 {
 	struct vfsmount *root;
 	LIST_HEAD(umount_list);
+	spinlock_t *lock;
 
-	if (!atomic_dec_and_lock(&ns->count, &vfsmount_lock))
+	lock = &get_cpu_var(vfsmount_lock);
+	if (!atomic_dec_and_lock(&ns->count, lock)) {
+		put_cpu_var(vfsmount_lock);
 		return;
+	}
 	root = ns->root;
 	ns->root = NULL;
-	spin_unlock(&vfsmount_lock);
+	spin_unlock(lock);
+	put_cpu_var(vfsmount_lock);
+
 	down_write(&namespace_sem);
-	spin_lock(&vfsmount_lock);
-	umount_tree(root, 0, &umount_list);
-	spin_unlock(&vfsmount_lock);
+	vfsmount_write_lock();
+  	umount_tree(root, 0, &umount_list);
+	vfsmount_write_unlock();
 	up_write(&namespace_sem);
 	release_mounts(&umount_list);
 	kfree(ns);
Index: linux-2.6/fs/pnode.c
===================================================================
--- linux-2.6.orig/fs/pnode.c
+++ linux-2.6/fs/pnode.c
@@ -264,12 +264,12 @@ int propagate_mnt(struct vfsmount *dest_
 		prev_src_mnt  = child;
 	}
 out:
-	spin_lock(&vfsmount_lock);
+	vfsmount_write_lock();
 	while (!list_empty(&tmp_list)) {
 		child = list_first_entry(&tmp_list, struct vfsmount, mnt_hash);
 		umount_tree(child, 0, &umount_list);
 	}
-	spin_unlock(&vfsmount_lock);
+	vfsmount_write_unlock();
 	release_mounts(&umount_list);
 	return ret;
 }
@@ -279,7 +279,7 @@ out:
  */
 static inline int do_refcount_check(struct vfsmount *mnt, int count)
 {
-	int mycount = atomic_read(&mnt->mnt_count) - mnt->mnt_ghosts;
+	int mycount = count_mnt_count(mnt) - mnt->mnt_ghosts;
 	return (mycount > count);
 }
 
Index: linux-2.6/fs/proc/base.c
===================================================================
--- linux-2.6.orig/fs/proc/base.c
+++ linux-2.6/fs/proc/base.c
@@ -652,12 +652,12 @@ static unsigned mounts_poll(struct file
 
 	poll_wait(file, &ns->poll, wait);
 
-	spin_lock(&vfsmount_lock);
+	vfsmount_read_lock();
 	if (p->event != ns->event) {
 		p->event = ns->event;
 		res |= POLLERR | POLLPRI;
 	}
-	spin_unlock(&vfsmount_lock);
+	vfsmount_read_unlock();
 
 	return res;
 }
Index: linux-2.6/include/linux/mount.h
===================================================================
--- linux-2.6.orig/include/linux/mount.h
+++ linux-2.6/include/linux/mount.h
@@ -56,20 +56,20 @@ struct vfsmount {
 	struct mnt_namespace *mnt_ns;	/* containing namespace */
 	int mnt_id;			/* mount identifier */
 	int mnt_group_id;		/* peer group identifier */
-	/*
-	 * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount
-	 * to let these frequently modified fields in a separate cache line
-	 * (so that reads of mnt_flags wont ping-pong on SMP machines)
-	 */
-	atomic_t mnt_count;
 	int mnt_expiry_mark;		/* true if marked for expiry */
 	int mnt_pinned;
 	int mnt_ghosts;
+	int mnt_mounted;
 #ifdef CONFIG_SMP
 	int *mnt_writers;
 #else
 	int mnt_writers;
 #endif
+#ifdef CONFIG_SMP
+	int *mnt_count;
+#else
+	int mnt_count;
+#endif
 };
 
 static inline int *get_mnt_writers_ptr(struct vfsmount *mnt)
@@ -81,32 +81,28 @@ static inline int *get_mnt_writers_ptr(s
 #endif
 }
 
-static inline struct vfsmount *mntget(struct vfsmount *mnt)
-{
-	if (mnt)
-		atomic_inc(&mnt->mnt_count);
-	return mnt;
-}
-
 struct file; /* forward dec */
 
+extern void vfsmount_read_lock(void);
+extern void vfsmount_read_unlock(void);
+extern void vfsmount_write_lock(void);
+extern void vfsmount_write_unlock(void);
+
+extern unsigned int count_mnt_count(struct vfsmount *mnt);
+
 extern int mnt_want_write(struct vfsmount *mnt);
 extern int mnt_want_write_file(struct file *file);
 extern int mnt_clone_write(struct vfsmount *mnt);
 extern void mnt_drop_write(struct vfsmount *mnt);
+
 extern void mntput_no_expire(struct vfsmount *mnt);
+extern struct vfsmount *mntget(struct vfsmount *mnt);
+extern void mntput(struct vfsmount *mnt);
+
 extern void mnt_pin(struct vfsmount *mnt);
 extern void mnt_unpin(struct vfsmount *mnt);
 extern int __mnt_is_readonly(struct vfsmount *mnt);
 
-static inline void mntput(struct vfsmount *mnt)
-{
-	if (mnt) {
-		mnt->mnt_expiry_mark = 0;
-		mntput_no_expire(mnt);
-	}
-}
-
 extern struct vfsmount *do_kern_mount(const char *fstype, int flags,
 				      const char *name, void *data);
 
@@ -123,7 +119,6 @@ extern int do_add_mount(struct vfsmount
 
 extern void mark_mounts_for_expiry(struct list_head *mounts);
 
-extern spinlock_t vfsmount_lock;
 extern dev_t name_to_dev_t(char *name);
 
 #endif /* _LINUX_MOUNT_H */
Index: linux-2.6/kernel/audit_tree.c
===================================================================
--- linux-2.6.orig/kernel/audit_tree.c
+++ linux-2.6/kernel/audit_tree.c
@@ -758,15 +758,15 @@ int audit_tag_tree(char *old, char *new)
 			continue;
 		}
 
-		spin_lock(&vfsmount_lock);
+		vfsmount_read_lock();
 		if (!is_under(mnt, dentry, &path)) {
-			spin_unlock(&vfsmount_lock);
+			vfsmount_read_unlock();
 			path_put(&path);
 			put_tree(tree);
 			mutex_lock(&audit_filter_mutex);
 			continue;
 		}
-		spin_unlock(&vfsmount_lock);
+		vfsmount_read_unlock();
 		path_put(&path);
 
 		list_for_each_entry(p, &list, mnt_list) {
Index: linux-2.6/security/tomoyo/realpath.c
===================================================================
--- linux-2.6.orig/security/tomoyo/realpath.c
+++ linux-2.6/security/tomoyo/realpath.c
@@ -96,16 +96,14 @@ int tomoyo_realpath_from_path2(struct pa
 		root = current->fs->root;
 		path_get(&root);
 		read_unlock(&current->fs->lock);
-		spin_lock(&vfsmount_lock);
+		vfsmount_read_lock();
 		if (root.mnt && root.mnt->mnt_ns)
 			ns_root.mnt = mntget(root.mnt->mnt_ns->root);
 		if (ns_root.mnt)
 			ns_root.dentry = dget(ns_root.mnt->mnt_root);
-		spin_unlock(&vfsmount_lock);
-		spin_lock(&dcache_lock);
 		tmp = ns_root;
 		sp = __d_path(path, &tmp, newname, newname_len);
-		spin_unlock(&dcache_lock);
+		vfsmount_read_unlock();
 		path_put(&root);
 		path_put(&ns_root);
 	}
Index: linux-2.6/fs/libfs.c
===================================================================
--- linux-2.6.orig/fs/libfs.c
+++ linux-2.6/fs/libfs.c
@@ -14,6 +14,11 @@
 
 #include <asm/uaccess.h>
 
+static inline int simple_positive(struct dentry *dentry)
+{
+	return dentry->d_inode && !d_unhashed(dentry);
+}
+
 int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
 		   struct kstat *stat)
 {
@@ -79,7 +84,8 @@ int dcache_dir_close(struct inode *inode
 
 loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
 {
-	mutex_lock(&file->f_path.dentry->d_inode->i_mutex);
+	struct dentry *dentry = file->f_path.dentry;
+	mutex_lock(&dentry->d_inode->i_mutex);
 	switch (origin) {
 		case 1:
 			offset += file->f_pos;
@@ -87,7 +93,7 @@ loff_t dcache_dir_lseek(struct file *fil
 			if (offset >= 0)
 				break;
 		default:
-			mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
+			mutex_unlock(&dentry->d_inode->i_mutex);
 			return -EINVAL;
 	}
 	if (offset != file->f_pos) {
@@ -97,21 +103,27 @@ loff_t dcache_dir_lseek(struct file *fil
 			struct dentry *cursor = file->private_data;
 			loff_t n = file->f_pos - 2;
 
-			spin_lock(&dcache_lock);
+			spin_lock(&dentry->d_lock);
+			spin_lock_nested(&cursor->d_lock, DENTRY_D_LOCK_NESTED);
 			list_del(&cursor->d_u.d_child);
-			p = file->f_path.dentry->d_subdirs.next;
-			while (n && p != &file->f_path.dentry->d_subdirs) {
+			spin_unlock(&cursor->d_lock);
+			p = dentry->d_subdirs.next;
+			while (n && p != &dentry->d_subdirs) {
 				struct dentry *next;
 				next = list_entry(p, struct dentry, d_u.d_child);
-				if (!d_unhashed(next) && next->d_inode)
+				spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
+				if (simple_positive(next))
 					n--;
+				spin_unlock(&next->d_lock);
 				p = p->next;
 			}
+			spin_lock_nested(&cursor->d_lock, DENTRY_D_LOCK_NESTED);
 			list_add_tail(&cursor->d_u.d_child, p);
-			spin_unlock(&dcache_lock);
+			spin_unlock(&cursor->d_lock);
+			spin_unlock(&dentry->d_lock);
 		}
 	}
-	mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
+	mutex_unlock(&dentry->d_inode->i_mutex);
 	return offset;
 }
 
@@ -151,29 +163,38 @@ int dcache_readdir(struct file * filp, v
 			i++;
 			/* fallthrough */
 		default:
-			spin_lock(&dcache_lock);
-			if (filp->f_pos == 2)
+			spin_lock(&dentry->d_lock);
+			if (filp->f_pos == 2) {
+				spin_lock_nested(&cursor->d_lock, DENTRY_D_LOCK_NESTED);
 				list_move(q, &dentry->d_subdirs);
+				spin_unlock(&cursor->d_lock);
+			}
 
 			for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
 				struct dentry *next;
 				next = list_entry(p, struct dentry, d_u.d_child);
-				if (d_unhashed(next) || !next->d_inode)
+				spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
+				if (!simple_positive(next)) {
+					spin_unlock(&next->d_lock);
 					continue;
+				}
 
-				spin_unlock(&dcache_lock);
+				spin_unlock(&next->d_lock);
+				spin_unlock(&dentry->d_lock);
 				if (filldir(dirent, next->d_name.name, 
 					    next->d_name.len, filp->f_pos, 
 					    next->d_inode->i_ino, 
 					    dt_type(next->d_inode)) < 0)
 					return 0;
-				spin_lock(&dcache_lock);
+				spin_lock(&dentry->d_lock);
+				spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
 				/* next is still alive */
 				list_move(q, p);
+				spin_unlock(&next->d_lock);
 				p = q;
 				filp->f_pos++;
 			}
-			spin_unlock(&dcache_lock);
+			spin_unlock(&dentry->d_lock);
 	}
 	return 0;
 }
@@ -244,6 +265,7 @@ int get_sb_pseudo(struct file_system_typ
 	d_instantiate(dentry, root);
 	s->s_root = dentry;
 	s->s_flags |= MS_ACTIVE;
+	mnt->mnt_mounted++;
 	simple_set_mnt(mnt, s);
 	return 0;
 
@@ -258,29 +280,31 @@ int simple_link(struct dentry *old_dentr
 
 	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
 	inc_nlink(inode);
-	atomic_inc(&inode->i_count);
+	spin_lock(&inode->i_lock);
+	inode->i_count++;
+	spin_unlock(&inode->i_lock);
 	dget(dentry);
 	d_instantiate(dentry, inode);
 	return 0;
 }
 
-static inline int simple_positive(struct dentry *dentry)
-{
-	return dentry->d_inode && !d_unhashed(dentry);
-}
-
 int simple_empty(struct dentry *dentry)
 {
 	struct dentry *child;
 	int ret = 0;
 
-	spin_lock(&dcache_lock);
-	list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child)
-		if (simple_positive(child))
+	spin_lock(&dentry->d_lock);
+	list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) {
+		spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
+		if (simple_positive(child)) {
+			spin_unlock(&child->d_lock);
 			goto out;
+		}
+		spin_unlock(&child->d_lock);
+	}
 	ret = 1;
 out:
-	spin_unlock(&dcache_lock);
+	spin_unlock(&dentry->d_lock);
 	return ret;
 }
 
Index: linux-2.6/include/linux/dcache.h
===================================================================
--- linux-2.6.orig/include/linux/dcache.h
+++ linux-2.6/include/linux/dcache.h
@@ -37,8 +37,8 @@ struct qstr {
 };
 
 struct dentry_stat_t {
-	int nr_dentry;
-	int nr_unused;
+	int nr_dentry;		/* unused */
+	int nr_unused;		/* protected by dcache_lru_lock */
 	int age_limit;          /* age in seconds */
 	int want_pages;         /* pages requested by system */
 	int dummy[2];
@@ -87,7 +87,7 @@ full_name_hash(const unsigned char *name
 #endif
 
 struct dentry {
-	atomic_t d_count;
+	unsigned int d_count;		/* protected by d_lock */
 	unsigned int d_flags;		/* protected by d_lock */
 	spinlock_t d_lock;		/* per dentry lock */
 	int d_mounted;
@@ -150,13 +150,13 @@ struct dentry_operations {
 
 /*
 locking rules:
-		big lock	dcache_lock	d_lock   may block
-d_revalidate:	no		no		no       yes
-d_hash		no		no		no       yes
-d_compare:	no		yes		yes      no
-d_delete:	no		yes		no       no
-d_release:	no		no		no       yes
-d_iput:		no		no		no       yes
+		big lock	d_lock   may block
+d_revalidate:	no		no       yes
+d_hash		no		no       yes
+d_compare:	no		yes      no
+d_delete:	no		no       no
+d_release:	no		no       yes
+d_iput:		no		no       yes
  */
 
 /* d_flags entries */
@@ -186,7 +186,6 @@ d_iput:		no		no		no       yes
 
 #define DCACHE_FSNOTIFY_PARENT_WATCHED	0x0080 /* Parent inode is watched by some fsnotify listener */
 
-extern spinlock_t dcache_lock;
 extern seqlock_t rename_lock;
 
 /**
@@ -204,23 +203,8 @@ extern seqlock_t rename_lock;
  *
  * __d_drop requires dentry->d_lock.
  */
-
-static inline void __d_drop(struct dentry *dentry)
-{
-	if (!(dentry->d_flags & DCACHE_UNHASHED)) {
-		dentry->d_flags |= DCACHE_UNHASHED;
-		hlist_del_rcu(&dentry->d_hash);
-	}
-}
-
-static inline void d_drop(struct dentry *dentry)
-{
-	spin_lock(&dcache_lock);
-	spin_lock(&dentry->d_lock);
- 	__d_drop(dentry);
-	spin_unlock(&dentry->d_lock);
-	spin_unlock(&dcache_lock);
-}
+void d_drop(struct dentry *dentry);
+void __d_drop(struct dentry *dentry);
 
 static inline int dname_external(struct dentry *dentry)
 {
@@ -318,28 +302,31 @@ extern char *dentry_path(struct dentry *
 /* Allocation counts.. */
 
 /**
- *	dget, dget_locked	-	get a reference to a dentry
+ *	dget, dget_dlock -	get a reference to a dentry
  *	@dentry: dentry to get a reference to
  *
  *	Given a dentry or %NULL pointer increment the reference count
  *	if appropriate and return the dentry. A dentry will not be 
- *	destroyed when it has references. dget() should never be
- *	called for dentries with zero reference counter. For these cases
- *	(preferably none, functions in dcache.c are sufficient for normal
- *	needs and they take necessary precautions) you should hold dcache_lock
- *	and call dget_locked() instead of dget().
+ *	destroyed when it has references.
  */
- 
+static inline struct dentry *dget_dlock(struct dentry *dentry)
+{
+	if (dentry)
+		dentry->d_count++;
+	return dentry;
+}
+
 static inline struct dentry *dget(struct dentry *dentry)
 {
 	if (dentry) {
-		BUG_ON(!atomic_read(&dentry->d_count));
-		atomic_inc(&dentry->d_count);
+		spin_lock(&dentry->d_lock);
+		dget_dlock(dentry);
+		spin_unlock(&dentry->d_lock);
 	}
 	return dentry;
 }
 
-extern struct dentry * dget_locked(struct dentry *);
+extern struct dentry *dget_parent(struct dentry *dentry);
 
 /**
  *	d_unhashed -	is dentry hashed
@@ -358,16 +345,6 @@ static inline int d_unlinked(struct dent
 	return d_unhashed(dentry) && !IS_ROOT(dentry);
 }
 
-static inline struct dentry *dget_parent(struct dentry *dentry)
-{
-	struct dentry *ret;
-
-	spin_lock(&dentry->d_lock);
-	ret = dget(dentry->d_parent);
-	spin_unlock(&dentry->d_lock);
-	return ret;
-}
-
 extern void dput(struct dentry *);
 
 static inline int d_mountpoint(struct dentry *dentry)
Index: linux-2.6/kernel/sysctl.c
===================================================================
--- linux-2.6.orig/kernel/sysctl.c
+++ linux-2.6/kernel/sysctl.c
@@ -1443,7 +1443,7 @@ static struct ctl_table fs_table[] = {
 		.data		= &inodes_stat,
 		.maxlen		= 2*sizeof(int),
 		.mode		= 0444,
-		.proc_handler	= &proc_dointvec,
+		.proc_handler	= &proc_nr_inodes,
 	},
 	{
 		.ctl_name	= FS_STATINODE,
@@ -1451,7 +1451,7 @@ static struct ctl_table fs_table[] = {
 		.data		= &inodes_stat,
 		.maxlen		= 7*sizeof(int),
 		.mode		= 0444,
-		.proc_handler	= &proc_dointvec,
+		.proc_handler	= &proc_nr_inodes,
 	},
 	{
 		.procname	= "file-nr",
@@ -1479,6 +1479,12 @@ static struct ctl_table fs_table[] = {
 		.extra2		= &sysctl_nr_open_max,
 	},
 	{
+		/*
+		 * dentry_stat has an atomic_t member, so this is a bit of
+		 * a hack, but it works for the moment, and I won't bother
+		 * changing it now because we'll probably want to change to
+		 * a more scalable counter anyway.
+		 */
 		.ctl_name	= FS_DENTRY,
 		.procname	= "dentry-state",
 		.data		= &dentry_stat,
Index: linux-2.6/fs/configfs/dir.c
===================================================================
--- linux-2.6.orig/fs/configfs/dir.c
+++ linux-2.6/fs/configfs/dir.c
@@ -399,8 +399,7 @@ static void remove_dir(struct dentry * d
 	if (d->d_inode)
 		simple_rmdir(parent->d_inode,d);
 
-	pr_debug(" o %s removing done (%d)\n",d->d_name.name,
-		 atomic_read(&d->d_count));
+	pr_debug(" o %s removing done (%d)\n",d->d_name.name, d->d_count);
 
 	dput(parent);
 }
Index: linux-2.6/fs/locks.c
===================================================================
--- linux-2.6.orig/fs/locks.c
+++ linux-2.6/fs/locks.c
@@ -1374,8 +1374,7 @@ int generic_setlease(struct file *filp,
 		if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
 			goto out;
 		if ((arg == F_WRLCK)
-		    && ((atomic_read(&dentry->d_count) > 1)
-			|| (atomic_read(&inode->i_count) > 1)))
+		    && (dentry->d_count > 1 || inode->i_count > 1))
 			goto out;
 	}
 
Index: linux-2.6/fs/autofs4/expire.c
===================================================================
--- linux-2.6.orig/fs/autofs4/expire.c
+++ linux-2.6/fs/autofs4/expire.c
@@ -93,22 +93,59 @@ done:
 /*
  * Calculate next entry in top down tree traversal.
  * From next_mnt in namespace.c - elegant.
+ *
+ * How is this supposed to work if we drop autofs4_lock between calls anyway?
+ * How does it cope with renames?
+ * And also callers dput the returned dentry before taking autofs4_lock again
+ * so what prevents it from being freed??
  */
-static struct dentry *next_dentry(struct dentry *p, struct dentry *root)
+static struct dentry *get_next_positive_dentry(struct dentry *p,
+						struct dentry *root)
 {
-	struct list_head *next = p->d_subdirs.next;
+	struct list_head *next;
+	struct dentry *ret;
 
+	spin_lock(&autofs4_lock);
+again:
+	spin_lock(&p->d_lock);
+	next = p->d_subdirs.next;
 	if (next == &p->d_subdirs) {
 		while (1) {
-			if (p == root)
+			struct dentry *parent;
+
+			if (p == root) {
+				spin_unlock(&p->d_lock);
+				spin_unlock(&autofs4_lock);
 				return NULL;
+			}
+
+			parent = p->d_parent;
+			if (!spin_trylock(&parent->d_lock)) {
+				spin_unlock(&p->d_lock);
+				goto again;
+			}
+			spin_unlock(&p->d_lock);
 			next = p->d_u.d_child.next;
-			if (next != &p->d_parent->d_subdirs)
+			p = parent;
+			if (next != &parent->d_subdirs)
 				break;
-			p = p->d_parent;
 		}
 	}
-	return list_entry(next, struct dentry, d_u.d_child);
+	ret = list_entry(next, struct dentry, d_u.d_child);
+
+	spin_lock_nested(&ret->d_lock, DENTRY_D_LOCK_NESTED);
+	/* Negative dentry - try next */
+	if (!simple_positive(ret)) {
+		spin_unlock(&ret->d_lock);
+		p = ret;
+		goto again;
+	}
+	dget_dlock(ret);
+	spin_unlock(&ret->d_lock);
+	spin_unlock(&p->d_lock);
+	spin_unlock(&autofs4_lock);
+
+	return ret;
 }
 
 /*
@@ -158,18 +195,11 @@ static int autofs4_tree_busy(struct vfsm
 	if (!simple_positive(top))
 		return 1;
 
-	spin_lock(&dcache_lock);
-	for (p = top; p; p = next_dentry(p, top)) {
-		/* Negative dentry - give up */
-		if (!simple_positive(p))
-			continue;
+	for (p = dget(top); p; p = get_next_positive_dentry(p, top)) {
 
 		DPRINTK("dentry %p %.*s",
 			p, (int) p->d_name.len, p->d_name.name);
 
-		p = dget(p);
-		spin_unlock(&dcache_lock);
-
 		/*
 		 * Is someone visiting anywhere in the subtree ?
 		 * If there's no mount we need to check the usage
@@ -198,16 +228,14 @@ static int autofs4_tree_busy(struct vfsm
 			else
 				ino_count++;
 
-			if (atomic_read(&p->d_count) > ino_count) {
+			if (p->d_count > ino_count) {
 				top_ino->last_used = jiffies;
 				dput(p);
 				return 1;
 			}
 		}
 		dput(p);
-		spin_lock(&dcache_lock);
 	}
-	spin_unlock(&dcache_lock);
 
 	/* Timeout of a tree mount is ultimately determined by its top dentry */
 	if (!autofs4_can_expire(top, timeout, do_now))
@@ -226,18 +254,11 @@ static struct dentry *autofs4_check_leav
 	DPRINTK("parent %p %.*s",
 		parent, (int)parent->d_name.len, parent->d_name.name);
 
-	spin_lock(&dcache_lock);
-	for (p = parent; p; p = next_dentry(p, parent)) {
-		/* Negative dentry - give up */
-		if (!simple_positive(p))
-			continue;
+	for (p = dget(parent); p; p = get_next_positive_dentry(p, parent)) {
 
 		DPRINTK("dentry %p %.*s",
 			p, (int) p->d_name.len, p->d_name.name);
 
-		p = dget(p);
-		spin_unlock(&dcache_lock);
-
 		if (d_mountpoint(p)) {
 			/* Can we umount this guy */
 			if (autofs4_mount_busy(mnt, p))
@@ -249,9 +270,7 @@ static struct dentry *autofs4_check_leav
 		}
 cont:
 		dput(p);
-		spin_lock(&dcache_lock);
 	}
-	spin_unlock(&dcache_lock);
 	return NULL;
 }
 
@@ -294,6 +313,8 @@ struct dentry *autofs4_expire_direct(str
  * A tree is eligible if :-
  *  - it is unused by any user process
  *  - it has been unused for exp_timeout time
+ * This seems to be racy dropping autofs4_lock and asking for next->next after
+ * the lock has been dropped.
  */
 struct dentry *autofs4_expire_indirect(struct super_block *sb,
 				       struct vfsmount *mnt,
@@ -315,7 +336,8 @@ struct dentry *autofs4_expire_indirect(s
 	now = jiffies;
 	timeout = sbi->exp_timeout;
 
-	spin_lock(&dcache_lock);
+	spin_lock(&autofs4_lock);
+	spin_lock(&root->d_lock);
 	next = root->d_subdirs.next;
 
 	/* On exit from the loop expire is set to a dgot dentry
@@ -329,8 +351,11 @@ struct dentry *autofs4_expire_indirect(s
 			continue;
 		}
 
-		dentry = dget(dentry);
-		spin_unlock(&dcache_lock);
+		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+		dentry = dget_dlock(dentry);
+		spin_unlock(&dentry->d_lock);
+		spin_unlock(&root->d_lock);
+		spin_unlock(&autofs4_lock);
 
 		spin_lock(&sbi->fs_lock);
 		ino = autofs4_dentry_ino(dentry);
@@ -347,7 +372,7 @@ struct dentry *autofs4_expire_indirect(s
 
 			/* Path walk currently on this dentry? */
 			ino_count = atomic_read(&ino->count) + 2;
-			if (atomic_read(&dentry->d_count) > ino_count)
+			if (dentry->d_count > ino_count)
 				goto next;
 
 			/* Can we umount this guy */
@@ -369,7 +394,7 @@ struct dentry *autofs4_expire_indirect(s
 		if (!exp_leaves) {
 			/* Path walk currently on this dentry? */
 			ino_count = atomic_read(&ino->count) + 1;
-			if (atomic_read(&dentry->d_count) > ino_count)
+			if (dentry->d_count > ino_count)
 				goto next;
 
 			if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) {
@@ -383,7 +408,7 @@ struct dentry *autofs4_expire_indirect(s
 		} else {
 			/* Path walk currently on this dentry? */
 			ino_count = atomic_read(&ino->count) + 1;
-			if (atomic_read(&dentry->d_count) > ino_count)
+			if (dentry->d_count > ino_count)
 				goto next;
 
 			expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
@@ -395,10 +420,12 @@ struct dentry *autofs4_expire_indirect(s
 next:
 		spin_unlock(&sbi->fs_lock);
 		dput(dentry);
-		spin_lock(&dcache_lock);
+		spin_lock(&autofs4_lock);
+		spin_lock(&root->d_lock);
 		next = next->next;
 	}
-	spin_unlock(&dcache_lock);
+	spin_unlock(&root->d_lock);
+	spin_unlock(&autofs4_lock);
 	return NULL;
 
 found:
@@ -408,9 +435,13 @@ found:
 	ino->flags |= AUTOFS_INF_EXPIRING;
 	init_completion(&ino->expire_complete);
 	spin_unlock(&sbi->fs_lock);
-	spin_lock(&dcache_lock);
-	list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
-	spin_unlock(&dcache_lock);
+	spin_lock(&autofs4_lock);
+	spin_lock(&expired->d_parent->d_lock);
+	spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED);
+  	list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
+	spin_unlock(&expired->d_lock);
+	spin_unlock(&expired->d_parent->d_lock);
+	spin_unlock(&autofs4_lock);
 	return expired;
 }
 
Index: linux-2.6/fs/autofs4/root.c
===================================================================
--- linux-2.6.orig/fs/autofs4/root.c
+++ linux-2.6/fs/autofs4/root.c
@@ -17,8 +17,11 @@
 #include <linux/stat.h>
 #include <linux/param.h>
 #include <linux/time.h>
+#include <linux/spinlock.h>
 #include "autofs_i.h"
 
+DEFINE_SPINLOCK(autofs4_lock);
+
 static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *);
 static int autofs4_dir_unlink(struct inode *,struct dentry *);
 static int autofs4_dir_rmdir(struct inode *,struct dentry *);
@@ -92,12 +95,15 @@ static int autofs4_dir_open(struct inode
 	 * autofs file system so just let the libfs routines handle
 	 * it.
 	 */
-	spin_lock(&dcache_lock);
+	spin_lock(&autofs4_lock);
+	spin_lock(&dentry->d_lock);
 	if (!d_mountpoint(dentry) && __simple_empty(dentry)) {
-		spin_unlock(&dcache_lock);
+		spin_unlock(&dentry->d_lock);
+		spin_unlock(&autofs4_lock);
 		return -ENOENT;
 	}
-	spin_unlock(&dcache_lock);
+	spin_unlock(&dentry->d_lock);
+	spin_unlock(&autofs4_lock);
 
 out:
 	return dcache_dir_open(inode, file);
@@ -211,10 +217,12 @@ static void *autofs4_follow_link(struct
 	 * multi-mount with no root mount offset. So don't try to
 	 * mount it again.
 	 */
-	spin_lock(&dcache_lock);
+	spin_lock(&autofs4_lock);
+	spin_lock(&dentry->d_lock);
 	if (dentry->d_flags & DCACHE_AUTOFS_PENDING ||
 	    (!d_mountpoint(dentry) && __simple_empty(dentry))) {
-		spin_unlock(&dcache_lock);
+		spin_unlock(&dentry->d_lock);
+		spin_unlock(&autofs4_lock);
 
 		status = try_to_fill_dentry(dentry, 0);
 		if (status)
@@ -222,7 +230,8 @@ static void *autofs4_follow_link(struct
 
 		goto follow;
 	}
-	spin_unlock(&dcache_lock);
+	spin_unlock(&dentry->d_lock);
+	spin_unlock(&autofs4_lock);
 follow:
 	/*
 	 * If there is no root mount it must be an autofs
@@ -292,13 +301,13 @@ static int autofs4_revalidate(struct den
 		return 0;
 
 	/* Check for a non-mountpoint directory with no contents */
-	spin_lock(&dcache_lock);
+	spin_lock(&autofs4_lock);
 	if (S_ISDIR(dentry->d_inode->i_mode) &&
 	    !d_mountpoint(dentry) && 
 	    __simple_empty(dentry)) {
 		DPRINTK("dentry=%p %.*s, emptydir",
 			 dentry, dentry->d_name.len, dentry->d_name.name);
-		spin_unlock(&dcache_lock);
+		spin_unlock(&autofs4_lock);
 
 		/* The daemon never causes a mount to trigger */
 		if (oz_mode)
@@ -314,7 +323,7 @@ static int autofs4_revalidate(struct den
 
 		return status;
 	}
-	spin_unlock(&dcache_lock);
+	spin_unlock(&autofs4_lock);
 
 	return 1;
 }
@@ -366,7 +375,7 @@ static struct dentry *autofs4_lookup_act
 	const unsigned char *str = name->name;
 	struct list_head *p, *head;
 
-	spin_lock(&dcache_lock);
+	spin_lock(&autofs4_lock);
 	spin_lock(&sbi->lookup_lock);
 	head = &sbi->active_list;
 	list_for_each(p, head) {
@@ -380,7 +389,7 @@ static struct dentry *autofs4_lookup_act
 		spin_lock(&dentry->d_lock);
 
 		/* Already gone? */
-		if (atomic_read(&dentry->d_count) == 0)
+		if (dentry->d_count == 0)
 			goto next;
 
 		qstr = &dentry->d_name;
@@ -396,17 +405,17 @@ static struct dentry *autofs4_lookup_act
 			goto next;
 
 		if (d_unhashed(dentry)) {
-			dget(dentry);
+			dget_dlock(dentry);
 			spin_unlock(&dentry->d_lock);
 			spin_unlock(&sbi->lookup_lock);
-			spin_unlock(&dcache_lock);
+			spin_unlock(&autofs4_lock);
 			return dentry;
 		}
 next:
 		spin_unlock(&dentry->d_lock);
 	}
 	spin_unlock(&sbi->lookup_lock);
-	spin_unlock(&dcache_lock);
+	spin_unlock(&autofs4_lock);
 
 	return NULL;
 }
@@ -418,7 +427,7 @@ static struct dentry *autofs4_lookup_exp
 	const unsigned char *str = name->name;
 	struct list_head *p, *head;
 
-	spin_lock(&dcache_lock);
+	spin_lock(&autofs4_lock);
 	spin_lock(&sbi->lookup_lock);
 	head = &sbi->expiring_list;
 	list_for_each(p, head) {
@@ -448,17 +457,17 @@ static struct dentry *autofs4_lookup_exp
 			goto next;
 
 		if (d_unhashed(dentry)) {
-			dget(dentry);
+			dget_dlock(dentry);
 			spin_unlock(&dentry->d_lock);
 			spin_unlock(&sbi->lookup_lock);
-			spin_unlock(&dcache_lock);
+			spin_unlock(&autofs4_lock);
 			return dentry;
 		}
 next:
 		spin_unlock(&dentry->d_lock);
 	}
 	spin_unlock(&sbi->lookup_lock);
-	spin_unlock(&dcache_lock);
+	spin_unlock(&autofs4_lock);
 
 	return NULL;
 }
@@ -704,7 +713,7 @@ static int autofs4_dir_unlink(struct ino
 
 	dir->i_mtime = CURRENT_TIME;
 
-	spin_lock(&dcache_lock);
+	spin_lock(&autofs4_lock);
 	spin_lock(&sbi->lookup_lock);
 	if (list_empty(&ino->expiring))
 		list_add(&ino->expiring, &sbi->expiring_list);
@@ -712,7 +721,7 @@ static int autofs4_dir_unlink(struct ino
 	spin_lock(&dentry->d_lock);
 	__d_drop(dentry);
 	spin_unlock(&dentry->d_lock);
-	spin_unlock(&dcache_lock);
+	spin_unlock(&autofs4_lock);
 
 	return 0;
 }
@@ -729,19 +738,21 @@ static int autofs4_dir_rmdir(struct inod
 	if (!autofs4_oz_mode(sbi))
 		return -EACCES;
 
-	spin_lock(&dcache_lock);
+	spin_lock(&autofs4_lock);
+	spin_lock(&sbi->lookup_lock);
+	spin_lock(&dentry->d_lock);
 	if (!list_empty(&dentry->d_subdirs)) {
-		spin_unlock(&dcache_lock);
+		spin_unlock(&dentry->d_lock);
+		spin_unlock(&sbi->lookup_lock);
+		spin_unlock(&autofs4_lock);
 		return -ENOTEMPTY;
 	}
-	spin_lock(&sbi->lookup_lock);
 	if (list_empty(&ino->expiring))
 		list_add(&ino->expiring, &sbi->expiring_list);
 	spin_unlock(&sbi->lookup_lock);
-	spin_lock(&dentry->d_lock);
 	__d_drop(dentry);
 	spin_unlock(&dentry->d_lock);
-	spin_unlock(&dcache_lock);
+	spin_unlock(&autofs4_lock);
 
 	if (atomic_dec_and_test(&ino->count)) {
 		p_ino = autofs4_dentry_ino(dentry->d_parent);
Index: linux-2.6/fs/coda/dir.c
===================================================================
--- linux-2.6.orig/fs/coda/dir.c
+++ linux-2.6/fs/coda/dir.c
@@ -302,7 +302,9 @@ static int coda_link(struct dentry *sour
 	}
 
 	coda_dir_update_mtime(dir_inode);
-	atomic_inc(&inode->i_count);
+	spin_lock(&inode->i_lock);
+	inode->i_count++;
+	spin_unlock(&inode->i_lock);
 	d_instantiate(de, inode);
 	inc_nlink(inode);
 
@@ -611,7 +613,7 @@ static int coda_dentry_revalidate(struct
 	if (cii->c_flags & C_FLUSH) 
 		coda_flag_inode_children(inode, C_FLUSH);
 
-	if (atomic_read(&de->d_count) > 1)
+	if (de->d_count > 1)
 		/* pretend it's valid, but don't change the flags */
 		goto out;
 
Index: linux-2.6/fs/ecryptfs/inode.c
===================================================================
--- linux-2.6.orig/fs/ecryptfs/inode.c
+++ linux-2.6/fs/ecryptfs/inode.c
@@ -263,7 +263,7 @@ int ecryptfs_lookup_and_interpose_lower(
 				   ecryptfs_dentry->d_parent));
 	lower_inode = lower_dentry->d_inode;
 	fsstack_copy_attr_atime(ecryptfs_dir_inode, lower_dir_dentry->d_inode);
-	BUG_ON(!atomic_read(&lower_dentry->d_count));
+	BUG_ON(!lower_dentry->d_count);
 	ecryptfs_set_dentry_private(ecryptfs_dentry,
 				    kmem_cache_alloc(ecryptfs_dentry_info_cache,
 						     GFP_KERNEL));
Index: linux-2.6/fs/hpfs/namei.c
===================================================================
--- linux-2.6.orig/fs/hpfs/namei.c
+++ linux-2.6/fs/hpfs/namei.c
@@ -415,7 +415,7 @@ again:
 		mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
 		d_drop(dentry);
 		spin_lock(&dentry->d_lock);
-		if (atomic_read(&dentry->d_count) > 1 ||
+		if (dentry->d_count > 1 ||
 		    generic_permission(inode, MAY_WRITE, NULL) ||
 		    !S_ISREG(inode->i_mode) ||
 		    get_write_access(inode)) {
Index: linux-2.6/fs/nfs/dir.c
===================================================================
--- linux-2.6.orig/fs/nfs/dir.c
+++ linux-2.6/fs/nfs/dir.c
@@ -1325,7 +1325,7 @@ static int nfs_sillyrename(struct inode
 
 	dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name, 
-		atomic_read(&dentry->d_count));
+		dentry->d_count);
 	nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
 
 	/*
@@ -1432,11 +1432,9 @@ static int nfs_unlink(struct inode *dir,
 	dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id,
 		dir->i_ino, dentry->d_name.name);
 
-	spin_lock(&dcache_lock);
 	spin_lock(&dentry->d_lock);
-	if (atomic_read(&dentry->d_count) > 1) {
+	if (dentry->d_count > 1) {
 		spin_unlock(&dentry->d_lock);
-		spin_unlock(&dcache_lock);
 		/* Start asynchronous writeout of the inode */
 		write_inode_now(dentry->d_inode, 0);
 		error = nfs_sillyrename(dir, dentry);
@@ -1447,7 +1445,6 @@ static int nfs_unlink(struct inode *dir,
 		need_rehash = 1;
 	}
 	spin_unlock(&dentry->d_lock);
-	spin_unlock(&dcache_lock);
 	error = nfs_safe_remove(dentry);
 	if (!error || error == -ENOENT) {
 		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
@@ -1539,7 +1536,9 @@ nfs_link(struct dentry *old_dentry, stru
 	d_drop(dentry);
 	error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
 	if (error == 0) {
-		atomic_inc(&inode->i_count);
+		spin_lock(&inode->i_lock);
+		inode->i_count++;
+		spin_unlock(&inode->i_lock);
 		d_add(dentry, inode);
 	}
 	return error;
@@ -1589,7 +1588,7 @@ static int nfs_rename(struct inode *old_
 	dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n",
 		 old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
 		 new_dentry->d_parent->d_name.name, new_dentry->d_name.name,
-		 atomic_read(&new_dentry->d_count));
+		 new_dentry->d_count);
 
 	/*
 	 * First check whether the target is busy ... we can't
@@ -1605,7 +1604,7 @@ static int nfs_rename(struct inode *old_
 		error = -EISDIR;
 		if (!S_ISDIR(old_inode->i_mode))
 			goto out;
-	} else if (atomic_read(&new_dentry->d_count) > 2) {
+	} else if (new_dentry->d_count > 2) {
 		int err;
 		/* copy the target dentry's name */
 		dentry = d_alloc(new_dentry->d_parent,
@@ -1620,7 +1619,7 @@ static int nfs_rename(struct inode *old_
 			new_inode = NULL;
 			/* instantiate the replacement target */
 			d_instantiate(new_dentry, NULL);
-		} else if (atomic_read(&new_dentry->d_count) > 1)
+		} else if (new_dentry->d_count > 1)
 			/* dentry still busy? */
 			goto out;
 	}
@@ -1629,7 +1628,7 @@ go_ahead:
 	/*
 	 * ... prune child dentries and writebacks if needed.
 	 */
-	if (atomic_read(&old_dentry->d_count) > 1) {
+	if (old_dentry->d_count > 1) {
 		if (S_ISREG(old_inode->i_mode))
 			nfs_wb_all(old_inode);
 		shrink_dcache_parent(old_dentry);
Index: linux-2.6/fs/nfsd/vfs.c
===================================================================
--- linux-2.6.orig/fs/nfsd/vfs.c
+++ linux-2.6/fs/nfsd/vfs.c
@@ -1754,8 +1754,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru
 		goto out_dput_new;
 
 	if (svc_msnfs(ffhp) &&
-		((atomic_read(&odentry->d_count) > 1)
-		 || (atomic_read(&ndentry->d_count) > 1))) {
+		((odentry->d_count > 1) || (ndentry->d_count > 1))) {
 			host_err = -EPERM;
 			goto out_dput_new;
 	}
@@ -1841,7 +1840,7 @@ nfsd_unlink(struct svc_rqst *rqstp, stru
 	if (type != S_IFDIR) { /* It's UNLINK */
 #ifdef MSNFS
 		if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
-			(atomic_read(&rdentry->d_count) > 1)) {
+			(rdentry->d_count > 1)) {
 			host_err = -EPERM;
 		} else
 #endif
Index: linux-2.6/fs/exportfs/expfs.c
===================================================================
--- linux-2.6.orig/fs/exportfs/expfs.c
+++ linux-2.6/fs/exportfs/expfs.c
@@ -43,24 +43,26 @@ find_acceptable_alias(struct dentry *res
 		void *context)
 {
 	struct dentry *dentry, *toput = NULL;
+	struct inode *inode;
 
 	if (acceptable(context, result))
 		return result;
 
-	spin_lock(&dcache_lock);
-	list_for_each_entry(dentry, &result->d_inode->i_dentry, d_alias) {
-		dget_locked(dentry);
-		spin_unlock(&dcache_lock);
+	inode = result->d_inode;
+	spin_lock(&inode->i_lock);
+	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+		dget(dentry);
+		spin_unlock(&inode->i_lock);
 		if (toput)
 			dput(toput);
 		if (dentry != result && acceptable(context, dentry)) {
 			dput(result);
 			return dentry;
 		}
-		spin_lock(&dcache_lock);
+		spin_lock(&inode->i_lock);
 		toput = dentry;
 	}
-	spin_unlock(&dcache_lock);
+	spin_unlock(&inode->i_lock);
 
 	if (toput)
 		dput(toput);
@@ -74,12 +76,19 @@ static struct dentry *
 find_disconnected_root(struct dentry *dentry)
 {
 	dget(dentry);
+again:
 	spin_lock(&dentry->d_lock);
 	while (!IS_ROOT(dentry) &&
 	       (dentry->d_parent->d_flags & DCACHE_DISCONNECTED)) {
 		struct dentry *parent = dentry->d_parent;
-		dget(parent);
+
+		if (!spin_trylock(&parent->d_lock)) {
+			spin_unlock(&dentry->d_lock);
+			goto again;
+		}
+		dget_dlock(parent);
 		spin_unlock(&dentry->d_lock);
+		spin_unlock(&parent->d_lock);
 		dput(dentry);
 		dentry = parent;
 		spin_lock(&dentry->d_lock);
Index: linux-2.6/fs/notify/inotify/inotify.c
===================================================================
--- linux-2.6.orig/fs/notify/inotify/inotify.c
+++ linux-2.6/fs/notify/inotify/inotify.c
@@ -185,23 +185,25 @@ static void set_dentry_child_flags(struc
 {
 	struct dentry *alias;
 
-	spin_lock(&dcache_lock);
+	spin_lock(&inode->i_lock);
 	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
 		struct dentry *child;
 
+		spin_lock(&alias->d_lock);
 		list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
 			if (!child->d_inode)
 				continue;
 
-			spin_lock(&child->d_lock);
+			spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
 			if (watched)
 				child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
 			else
 				child->d_flags &=~DCACHE_INOTIFY_PARENT_WATCHED;
 			spin_unlock(&child->d_lock);
 		}
+		spin_unlock(&alias->d_lock);
 	}
-	spin_unlock(&dcache_lock);
+	spin_unlock(&inode->i_lock);
 }
 
 /*
@@ -269,6 +271,7 @@ void inotify_d_instantiate(struct dentry
 	if (!inode)
 		return;
 
+	/* XXX: need parent lock in place of dcache_lock? */
 	spin_lock(&entry->d_lock);
 	parent = entry->d_parent;
 	if (parent->d_inode && inotify_inode_watched(parent->d_inode))
@@ -283,6 +286,7 @@ void inotify_d_move(struct dentry *entry
 {
 	struct dentry *parent;
 
+	/* XXX: need parent lock in place of dcache_lock? */
 	parent = entry->d_parent;
 	if (inotify_inode_watched(parent->d_inode))
 		entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
@@ -339,18 +343,28 @@ void inotify_dentry_parent_queue_event(s
 	if (!(dentry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED))
 		return;
 
+again:
 	spin_lock(&dentry->d_lock);
 	parent = dentry->d_parent;
+	if (parent != dentry && !spin_trylock(&parent->d_lock)) {
+		spin_unlock(&dentry->d_lock);
+		goto again;
+	}
 	inode = parent->d_inode;
 
 	if (inotify_inode_watched(inode)) {
-		dget(parent);
+		dget_dlock(parent);
 		spin_unlock(&dentry->d_lock);
+		if (parent != dentry)
+			spin_unlock(&parent->d_lock);
 		inotify_inode_queue_event(inode, mask, cookie, name,
 					  dentry->d_inode);
 		dput(parent);
-	} else
+	} else {
 		spin_unlock(&dentry->d_lock);
+		if (parent != dentry)
+			spin_unlock(&parent->d_lock);
+	}
 }
 EXPORT_SYMBOL_GPL(inotify_dentry_parent_queue_event);
 
@@ -371,76 +385,86 @@ EXPORT_SYMBOL_GPL(inotify_get_cookie);
  * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
  * We temporarily drop inode_lock, however, and CAN block.
  */
-void inotify_unmount_inodes(struct list_head *list)
+void inotify_unmount_inodes(struct super_block *sb)
 {
-	struct inode *inode, *next_i, *need_iput = NULL;
+	int i;
 
-	list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
-		struct inotify_watch *watch, *next_w;
-		struct inode *need_iput_tmp;
-		struct list_head *watches;
-
-		/*
-		 * We cannot __iget() an inode in state I_CLEAR, I_FREEING,
-		 * I_WILL_FREE, or I_NEW which is fine because by that point
-		 * the inode cannot have any associated watches.
-		 */
-		if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW))
-			continue;
-
-		/*
-		 * If i_count is zero, the inode cannot have any watches and
-		 * doing an __iget/iput with MS_ACTIVE clear would actually
-		 * evict all inodes with zero i_count from icache which is
-		 * unnecessarily violent and may in fact be illegal to do.
-		 */
-		if (!atomic_read(&inode->i_count))
-			continue;
-
-		need_iput_tmp = need_iput;
-		need_iput = NULL;
-		/* In case inotify_remove_watch_locked() drops a reference. */
-		if (inode != need_iput_tmp)
-			__iget(inode);
-		else
-			need_iput_tmp = NULL;
-		/* In case the dropping of a reference would nuke next_i. */
-		if ((&next_i->i_sb_list != list) &&
-				atomic_read(&next_i->i_count) &&
-				!(next_i->i_state & (I_CLEAR | I_FREEING |
-					I_WILL_FREE))) {
-			__iget(next_i);
-			need_iput = next_i;
-		}
+	for_each_possible_cpu(i) {
+		struct inode *inode, *next_i, *need_iput = NULL;
+		struct list_head *list;
+#ifdef CONFIG_SMP
+                list = per_cpu_ptr(sb->s_inodes, i);
+#else
+                list = &sb->s_inodes;
+#endif
+
+		list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
+			struct inotify_watch *watch, *next_w;
+			struct inode *need_iput_tmp;
+			struct list_head *watches;
+
+			spin_lock(&inode->i_lock);
+			/*
+			 * We cannot __iget() an inode in state I_CLEAR, I_FREEING,
+			 * I_WILL_FREE, or I_NEW which is fine because by that point
+			 * the inode cannot have any associated watches.
+			 */
+			if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW)) {
+				spin_unlock(&inode->i_lock);
+				continue;
+			}
 
-		/*
-		 * We can safely drop inode_lock here because we hold
-		 * references on both inode and next_i.  Also no new inodes
-		 * will be added since the umount has begun.  Finally,
-		 * iprune_mutex keeps shrink_icache_memory() away.
-		 */
-		spin_unlock(&inode_lock);
-
-		if (need_iput_tmp)
-			iput(need_iput_tmp);
-
-		/* for each watch, send IN_UNMOUNT and then remove it */
-		mutex_lock(&inode->inotify_mutex);
-		watches = &inode->inotify_watches;
-		list_for_each_entry_safe(watch, next_w, watches, i_list) {
-			struct inotify_handle *ih= watch->ih;
-			get_inotify_watch(watch);
-			mutex_lock(&ih->mutex);
-			ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0,
-						 NULL, NULL);
-			inotify_remove_watch_locked(ih, watch);
-			mutex_unlock(&ih->mutex);
-			put_inotify_watch(watch);
-		}
-		mutex_unlock(&inode->inotify_mutex);
-		iput(inode);		
+			/*
+			 * If i_count is zero, the inode cannot have any watches and
+			 * doing an __iget/iput with MS_ACTIVE clear would actually
+			 * evict all inodes with zero i_count from icache which is
+			 * unnecessarily violent and may in fact be illegal to do.
+			 */
+			if (!inode->i_count) {
+				spin_unlock(&inode->i_lock);
+				continue;
+			}
 
-		spin_lock(&inode_lock);
+			need_iput_tmp = need_iput;
+			need_iput = NULL;
+			/* In case inotify_remove_watch_locked() drops a reference. */
+			if (inode != need_iput_tmp) {
+				__iget(inode);
+			} else
+				need_iput_tmp = NULL;
+
+			spin_unlock(&inode->i_lock);
+
+			/* In case the dropping of a reference would nuke next_i. */
+			if (&next_i->i_sb_list != list) {
+				spin_lock(&next_i->i_lock);
+				if (next_i->i_count &&
+					!(next_i->i_state &
+						(I_CLEAR|I_FREEING|I_WILL_FREE))) {
+					__iget(next_i);
+					need_iput = next_i;
+				}
+				spin_unlock(&next_i->i_lock);
+			}
+
+			if (need_iput_tmp)
+				iput(need_iput_tmp);
+
+			/* for each watch, send IN_UNMOUNT and then remove it */
+			mutex_lock(&inode->inotify_mutex);
+			watches = &inode->inotify_watches;
+			list_for_each_entry_safe(watch, next_w, watches, i_list) {
+				struct inotify_handle *ih = watch->ih;
+				get_inotify_watch(watch);
+				mutex_lock(&ih->mutex);
+				ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0, NULL, NULL);
+				inotify_remove_watch_locked(ih, watch);
+				mutex_unlock(&ih->mutex);
+				put_inotify_watch(watch);
+			}
+			mutex_unlock(&inode->inotify_mutex);
+			iput(inode);
+		}
 	}
 }
 EXPORT_SYMBOL_GPL(inotify_unmount_inodes);
Index: linux-2.6/fs/smbfs/dir.c
===================================================================
--- linux-2.6.orig/fs/smbfs/dir.c
+++ linux-2.6/fs/smbfs/dir.c
@@ -405,6 +405,7 @@ void
 smb_renew_times(struct dentry * dentry)
 {
 	dget(dentry);
+again:
 	spin_lock(&dentry->d_lock);
 	for (;;) {
 		struct dentry *parent;
@@ -413,8 +414,13 @@ smb_renew_times(struct dentry * dentry)
 		if (IS_ROOT(dentry))
 			break;
 		parent = dentry->d_parent;
-		dget(parent);
+		if (!spin_trylock(&parent->d_lock)) {
+			spin_unlock(&dentry->d_lock);
+			goto again;
+		}
+		dget_dlock(parent);
 		spin_unlock(&dentry->d_lock);
+		spin_unlock(&parent->d_lock);
 		dput(dentry);
 		dentry = parent;
 		spin_lock(&dentry->d_lock);
Index: linux-2.6/fs/smbfs/proc.c
===================================================================
--- linux-2.6.orig/fs/smbfs/proc.c
+++ linux-2.6/fs/smbfs/proc.c
@@ -332,6 +332,7 @@ static int smb_build_path(struct smb_sb_
 	 * and store it in reversed order [see reverse_string()]
 	 */
 	dget(entry);
+again:
 	spin_lock(&entry->d_lock);
 	while (!IS_ROOT(entry)) {
 		struct dentry *parent;
@@ -350,6 +351,7 @@ static int smb_build_path(struct smb_sb_
 			dput(entry);
 			return len;
 		}
+
 		reverse_string(path, len);
 		path += len;
 		if (unicode) {
@@ -361,7 +363,11 @@ static int smb_build_path(struct smb_sb_
 		maxlen -= len+1;
 
 		parent = entry->d_parent;
-		dget(parent);
+		if (!spin_trylock(&parent->d_lock)) {
+			spin_unlock(&entry->d_lock);
+			goto again;
+		}
+		dget_dlock(parent);
 		spin_unlock(&entry->d_lock);
 		dput(entry);
 		entry = parent;
Index: linux-2.6/kernel/cgroup.c
===================================================================
--- linux-2.6.orig/kernel/cgroup.c
+++ linux-2.6/kernel/cgroup.c
@@ -808,25 +808,29 @@ static void cgroup_clear_directory(struc
 	struct list_head *node;
 
 	BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
-	spin_lock(&dcache_lock);
+	spin_lock(&dentry->d_lock);
 	node = dentry->d_subdirs.next;
 	while (node != &dentry->d_subdirs) {
 		struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
+
+		spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
 		list_del_init(node);
 		if (d->d_inode) {
 			/* This should never be called on a cgroup
 			 * directory with child cgroups */
 			BUG_ON(d->d_inode->i_mode & S_IFDIR);
-			d = dget_locked(d);
-			spin_unlock(&dcache_lock);
+			dget_dlock(d);
+			spin_unlock(&d->d_lock);
+			spin_unlock(&dentry->d_lock);
 			d_delete(d);
 			simple_unlink(dentry->d_inode, d);
 			dput(d);
-			spin_lock(&dcache_lock);
-		}
+			spin_lock(&dentry->d_lock);
+		} else
+			spin_unlock(&d->d_lock);
 		node = dentry->d_subdirs.next;
 	}
-	spin_unlock(&dcache_lock);
+	spin_unlock(&dentry->d_lock);
 }
 
 /*
@@ -834,11 +838,16 @@ static void cgroup_clear_directory(struc
  */
 static void cgroup_d_remove_dir(struct dentry *dentry)
 {
+	struct dentry *parent;
+
 	cgroup_clear_directory(dentry);
 
-	spin_lock(&dcache_lock);
+	parent = dentry->d_parent;
+	spin_lock(&parent->d_lock);
+	spin_lock(&dentry->d_lock);
 	list_del_init(&dentry->d_u.d_child);
-	spin_unlock(&dcache_lock);
+	spin_unlock(&dentry->d_lock);
+	spin_unlock(&parent->d_lock);
 	remove_dir(dentry);
 }
 
@@ -3164,9 +3173,7 @@ again:
 	list_del(&cgrp->sibling);
 	cgroup_unlock_hierarchy(cgrp->root);
 
-	spin_lock(&cgrp->dentry->d_lock);
 	d = dget(cgrp->dentry);
-	spin_unlock(&d->d_lock);
 
 	cgroup_d_remove_dir(d);
 	dput(d);
Index: linux-2.6/arch/powerpc/platforms/cell/spufs/inode.c
===================================================================
--- linux-2.6.orig/arch/powerpc/platforms/cell/spufs/inode.c
+++ linux-2.6/arch/powerpc/platforms/cell/spufs/inode.c
@@ -158,18 +158,18 @@ static void spufs_prune_dir(struct dentr
 
 	mutex_lock(&dir->d_inode->i_mutex);
 	list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) {
-		spin_lock(&dcache_lock);
 		spin_lock(&dentry->d_lock);
 		if (!(d_unhashed(dentry)) && dentry->d_inode) {
-			dget_locked(dentry);
+			dget_dlock(dentry);
 			__d_drop(dentry);
 			spin_unlock(&dentry->d_lock);
 			simple_unlink(dir->d_inode, dentry);
-			spin_unlock(&dcache_lock);
+			/* XXX: what was dcache_lock protecting here? Other
+			 * filesystems (IB, configfs) release dcache_lock
+			 * before unlink */
 			dput(dentry);
 		} else {
 			spin_unlock(&dentry->d_lock);
-			spin_unlock(&dcache_lock);
 		}
 	}
 	shrink_dcache_parent(dir);
Index: linux-2.6/drivers/infiniband/hw/ipath/ipath_fs.c
===================================================================
--- linux-2.6.orig/drivers/infiniband/hw/ipath/ipath_fs.c
+++ linux-2.6/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -272,18 +272,14 @@ static int remove_file(struct dentry *pa
 		goto bail;
 	}
 
-	spin_lock(&dcache_lock);
 	spin_lock(&tmp->d_lock);
 	if (!(d_unhashed(tmp) && tmp->d_inode)) {
-		dget_locked(tmp);
+		dget_dlock(tmp);
 		__d_drop(tmp);
 		spin_unlock(&tmp->d_lock);
-		spin_unlock(&dcache_lock);
 		simple_unlink(parent->d_inode, tmp);
-	} else {
+	} else
 		spin_unlock(&tmp->d_lock);
-		spin_unlock(&dcache_lock);
-	}
 
 	ret = 0;
 bail:
Index: linux-2.6/fs/configfs/inode.c
===================================================================
--- linux-2.6.orig/fs/configfs/inode.c
+++ linux-2.6/fs/configfs/inode.c
@@ -254,18 +254,14 @@ void configfs_drop_dentry(struct configf
 	struct dentry * dentry = sd->s_dentry;
 
 	if (dentry) {
-		spin_lock(&dcache_lock);
 		spin_lock(&dentry->d_lock);
 		if (!(d_unhashed(dentry) && dentry->d_inode)) {
-			dget_locked(dentry);
+			dget_dlock(dentry);
 			__d_drop(dentry);
 			spin_unlock(&dentry->d_lock);
-			spin_unlock(&dcache_lock);
 			simple_unlink(parent->d_inode, dentry);
-		} else {
+		} else
 			spin_unlock(&dentry->d_lock);
-			spin_unlock(&dcache_lock);
-		}
 	}
 }
 
Index: linux-2.6/fs/notify/fsnotify.c
===================================================================
--- linux-2.6.orig/fs/notify/fsnotify.c
+++ linux-2.6/fs/notify/fsnotify.c
@@ -52,7 +52,7 @@ void __fsnotify_update_child_dentry_flag
 	/* determine if the children should tell inode about their events */
 	watched = fsnotify_inode_watches_children(inode);
 
-	spin_lock(&dcache_lock);
+	spin_lock(&inode->i_lock);
 	/* run all of the dentries associated with this inode.  Since this is a
 	 * directory, there damn well better only be one item on this list */
 	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
@@ -61,19 +61,21 @@ void __fsnotify_update_child_dentry_flag
 		/* run all of the children of the original inode and fix their
 		 * d_flags to indicate parental interest (their parent is the
 		 * original inode) */
+		spin_lock(&alias->d_lock);
 		list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
 			if (!child->d_inode)
 				continue;
 
-			spin_lock(&child->d_lock);
+			spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
 			if (watched)
 				child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
 			else
 				child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
 			spin_unlock(&child->d_lock);
 		}
+		spin_unlock(&alias->d_lock);
 	}
-	spin_unlock(&dcache_lock);
+	spin_unlock(&inode->i_lock);
 }
 
 /* Notify this dentry's parent about a child's events. */
@@ -87,13 +89,18 @@ void __fsnotify_parent(struct dentry *de
 	if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
 		return;
 
+again:
 	spin_lock(&dentry->d_lock);
 	parent = dentry->d_parent;
+	if (parent != dentry && !spin_trylock(&parent->d_lock)) {
+		spin_unlock(&dentry->d_lock);
+		goto again;
+	}
 	p_inode = parent->d_inode;
 
 	if (fsnotify_inode_watches_children(p_inode)) {
 		if (p_inode->i_fsnotify_mask & mask) {
-			dget(parent);
+			dget_dlock(parent);
 			send = true;
 		}
 	} else {
@@ -103,11 +110,13 @@ void __fsnotify_parent(struct dentry *de
 		 * children and update their d_flags to let them know p_inode
 		 * doesn't care about them any more.
 		 */
-		dget(parent);
+		dget_dlock(parent);
 		should_update_children = true;
 	}
 
 	spin_unlock(&dentry->d_lock);
+	if (parent != dentry)
+		spin_unlock(&parent->d_lock);
 
 	if (send) {
 		/* we are notifying a parent so come up with the new mask which
Index: linux-2.6/fs/sysfs/dir.c
===================================================================
--- linux-2.6.orig/fs/sysfs/dir.c
+++ linux-2.6/fs/sysfs/dir.c
@@ -547,19 +547,21 @@ static void sysfs_drop_dentry(struct sys
 	 * dput to immediately free the dentry  if it is not in use.
 	 */
 repeat:
-	spin_lock(&dcache_lock);
+	spin_lock(&inode->i_lock);
 	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
-		if (d_unhashed(dentry))
-			continue;
-		dget_locked(dentry);
 		spin_lock(&dentry->d_lock);
+		if (d_unhashed(dentry)) {
+			spin_unlock(&dentry->d_lock);
+			continue;
+		}
+		dget_dlock(dentry);
 		__d_drop(dentry);
 		spin_unlock(&dentry->d_lock);
-		spin_unlock(&dcache_lock);
+		spin_unlock(&inode->i_lock);
 		dput(dentry);
 		goto repeat;
 	}
-	spin_unlock(&dcache_lock);
+	spin_unlock(&inode->i_lock);
 
 	/* adjust nlink and update timestamp */
 	mutex_lock(&inode->i_mutex);
Index: linux-2.6/fs/seq_file.c
===================================================================
--- linux-2.6.orig/fs/seq_file.c
+++ linux-2.6/fs/seq_file.c
@@ -6,6 +6,7 @@
  */
 
 #include <linux/fs.h>
+#include <linux/mount.h>
 #include <linux/module.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
@@ -462,9 +463,10 @@ int seq_path_root(struct seq_file *m, st
 	if (size) {
 		char *p;
 
-		spin_lock(&dcache_lock);
+		vfsmount_read_lock();
 		p = __d_path(path, root, buf, size);
-		spin_unlock(&dcache_lock);
+		vfsmount_read_unlock();
+
 		res = PTR_ERR(p);
 		if (!IS_ERR(p)) {
 			char *end = mangle_path(buf, p, esc);
Index: linux-2.6/fs/configfs/configfs_internal.h
===================================================================
--- linux-2.6.orig/fs/configfs/configfs_internal.h
+++ linux-2.6/fs/configfs/configfs_internal.h
@@ -120,7 +120,7 @@ static inline struct config_item *config
 {
 	struct config_item * item = NULL;
 
-	spin_lock(&dcache_lock);
+	spin_lock(&dentry->d_lock);
 	if (!d_unhashed(dentry)) {
 		struct configfs_dirent * sd = dentry->d_fsdata;
 		if (sd->s_type & CONFIGFS_ITEM_LINK) {
@@ -129,7 +129,7 @@ static inline struct config_item *config
 		} else
 			item = config_item_get(sd->s_element);
 	}
-	spin_unlock(&dcache_lock);
+	spin_unlock(&dentry->d_lock);
 
 	return item;
 }
Index: linux-2.6/fs/ocfs2/dcache.c
===================================================================
--- linux-2.6.orig/fs/ocfs2/dcache.c
+++ linux-2.6/fs/ocfs2/dcache.c
@@ -151,23 +151,25 @@ struct dentry *ocfs2_find_local_alias(st
 	struct list_head *p;
 	struct dentry *dentry = NULL;
 
-	spin_lock(&dcache_lock);
-
+	spin_lock(&inode->i_lock);
 	list_for_each(p, &inode->i_dentry) {
 		dentry = list_entry(p, struct dentry, d_alias);
 
+		spin_lock(&dentry->d_lock);
 		if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
 			mlog(0, "dentry found: %.*s\n",
 			     dentry->d_name.len, dentry->d_name.name);
 
-			dget_locked(dentry);
+			dget_dlock(dentry);
+			spin_unlock(&dentry->d_lock);
 			break;
 		}
+		spin_unlock(&dentry->d_lock);
 
 		dentry = NULL;
 	}
 
-	spin_unlock(&dcache_lock);
+	spin_unlock(&inode->i_lock);
 
 	return dentry;
 }
Index: linux-2.6/drivers/usb/core/inode.c
===================================================================
--- linux-2.6.orig/drivers/usb/core/inode.c
+++ linux-2.6/drivers/usb/core/inode.c
@@ -347,17 +347,16 @@ static int usbfs_empty (struct dentry *d
 {
 	struct list_head *list;
 
-	spin_lock(&dcache_lock);
-
+	spin_lock(&dentry->d_lock);
 	list_for_each(list, &dentry->d_subdirs) {
 		struct dentry *de = list_entry(list, struct dentry, d_u.d_child);
 		if (usbfs_positive(de)) {
-			spin_unlock(&dcache_lock);
+			spin_unlock(&dentry->d_lock);
 			return 0;
 		}
 	}
+	spin_unlock(&dentry->d_lock);
 
-	spin_unlock(&dcache_lock);
 	return 1;
 }
 
Index: linux-2.6/fs/autofs4/inode.c
===================================================================
--- linux-2.6.orig/fs/autofs4/inode.c
+++ linux-2.6/fs/autofs4/inode.c
@@ -109,8 +109,9 @@ static void autofs4_force_release(struct
 	if (!sbi->sb->s_root)
 		return;
 
-	spin_lock(&dcache_lock);
+	spin_lock(&autofs4_lock);
 repeat:
+	spin_lock(&this_parent->d_lock);
 	next = this_parent->d_subdirs.next;
 resume:
 	while (next != &this_parent->d_subdirs) {
@@ -123,33 +124,39 @@ resume:
 		}
 
 		if (!list_empty(&dentry->d_subdirs)) {
+			spin_unlock(&this_parent->d_lock);
 			this_parent = dentry;
 			goto repeat;
 		}
 
 		next = next->next;
-		spin_unlock(&dcache_lock);
+		spin_unlock(&this_parent->d_lock);
+		spin_unlock(&autofs4_lock);
 
 		DPRINTK("dentry %p %.*s",
 			dentry, (int)dentry->d_name.len, dentry->d_name.name);
 
 		dput(dentry);
-		spin_lock(&dcache_lock);
+		spin_lock(&autofs4_lock);
+		spin_lock(&this_parent->d_lock);
 	}
 
 	if (this_parent != sbi->sb->s_root) {
 		struct dentry *dentry = this_parent;
 
 		next = this_parent->d_u.d_child.next;
+		spin_unlock(&this_parent->d_lock);
 		this_parent = this_parent->d_parent;
-		spin_unlock(&dcache_lock);
+		spin_unlock(&autofs4_lock);
 		DPRINTK("parent dentry %p %.*s",
 			dentry, (int)dentry->d_name.len, dentry->d_name.name);
 		dput(dentry);
-		spin_lock(&dcache_lock);
+		spin_lock(&autofs4_lock);
+		spin_lock(&this_parent->d_lock);
 		goto resume;
 	}
-	spin_unlock(&dcache_lock);
+	spin_unlock(&this_parent->d_lock);
+	spin_unlock(&autofs4_lock);
 }
 
 void autofs4_kill_sb(struct super_block *sb)
Index: linux-2.6/fs/coda/cache.c
===================================================================
--- linux-2.6.orig/fs/coda/cache.c
+++ linux-2.6/fs/coda/cache.c
@@ -86,7 +86,7 @@ static void coda_flag_children(struct de
 	struct list_head *child;
 	struct dentry *de;
 
-	spin_lock(&dcache_lock);
+	spin_lock(&parent->d_lock);
 	list_for_each(child, &parent->d_subdirs)
 	{
 		de = list_entry(child, struct dentry, d_u.d_child);
@@ -95,7 +95,7 @@ static void coda_flag_children(struct de
 			continue;
 		coda_flag_inode(de->d_inode, flag);
 	}
-	spin_unlock(&dcache_lock);
+	spin_unlock(&parent->d_lock);
 	return; 
 }
 
Index: linux-2.6/fs/ncpfs/dir.c
===================================================================
--- linux-2.6.orig/fs/ncpfs/dir.c
+++ linux-2.6/fs/ncpfs/dir.c
@@ -364,21 +364,21 @@ ncp_dget_fpos(struct dentry *dentry, str
 	}
 
 	/* If a pointer is invalid, we search the dentry. */
-	spin_lock(&dcache_lock);
+	spin_lock(&parent->d_lock);
 	next = parent->d_subdirs.next;
 	while (next != &parent->d_subdirs) {
 		dent = list_entry(next, struct dentry, d_u.d_child);
 		if ((unsigned long)dent->d_fsdata == fpos) {
 			if (dent->d_inode)
-				dget_locked(dent);
+				dget(dent);
 			else
 				dent = NULL;
-			spin_unlock(&dcache_lock);
+			spin_unlock(&parent->d_lock);
 			goto out;
 		}
 		next = next->next;
 	}
-	spin_unlock(&dcache_lock);
+	spin_unlock(&parent->d_lock);
 	return NULL;
 
 out:
Index: linux-2.6/fs/ncpfs/ncplib_kernel.h
===================================================================
--- linux-2.6.orig/fs/ncpfs/ncplib_kernel.h
+++ linux-2.6/fs/ncpfs/ncplib_kernel.h
@@ -192,7 +192,7 @@ ncp_renew_dentries(struct dentry *parent
 	struct list_head *next;
 	struct dentry *dentry;
 
-	spin_lock(&dcache_lock);
+	spin_lock(&parent->d_lock);
 	next = parent->d_subdirs.next;
 	while (next != &parent->d_subdirs) {
 		dentry = list_entry(next, struct dentry, d_u.d_child);
@@ -204,7 +204,7 @@ ncp_renew_dentries(struct dentry *parent
 
 		next = next->next;
 	}
-	spin_unlock(&dcache_lock);
+	spin_unlock(&parent->d_lock);
 }
 
 static inline void
@@ -214,7 +214,7 @@ ncp_invalidate_dircache_entries(struct d
 	struct list_head *next;
 	struct dentry *dentry;
 
-	spin_lock(&dcache_lock);
+	spin_lock(&parent->d_lock);
 	next = parent->d_subdirs.next;
 	while (next != &parent->d_subdirs) {
 		dentry = list_entry(next, struct dentry, d_u.d_child);
@@ -222,7 +222,7 @@ ncp_invalidate_dircache_entries(struct d
 		ncp_age_dentry(server, dentry);
 		next = next->next;
 	}
-	spin_unlock(&dcache_lock);
+	spin_unlock(&parent->d_lock);
 }
 
 struct ncp_cache_head {
Index: linux-2.6/fs/smbfs/cache.c
===================================================================
--- linux-2.6.orig/fs/smbfs/cache.c
+++ linux-2.6/fs/smbfs/cache.c
@@ -62,7 +62,7 @@ smb_invalidate_dircache_entries(struct d
 	struct list_head *next;
 	struct dentry *dentry;
 
-	spin_lock(&dcache_lock);
+	spin_lock(&parent->d_lock);
 	next = parent->d_subdirs.next;
 	while (next != &parent->d_subdirs) {
 		dentry = list_entry(next, struct dentry, d_u.d_child);
@@ -70,7 +70,7 @@ smb_invalidate_dircache_entries(struct d
 		smb_age_dentry(server, dentry);
 		next = next->next;
 	}
-	spin_unlock(&dcache_lock);
+	spin_unlock(&parent->d_lock);
 }
 
 /*
@@ -96,13 +96,13 @@ smb_dget_fpos(struct dentry *dentry, str
 	}
 
 	/* If a pointer is invalid, we search the dentry. */
-	spin_lock(&dcache_lock);
+	spin_lock(&parent->d_lock);
 	next = parent->d_subdirs.next;
 	while (next != &parent->d_subdirs) {
 		dent = list_entry(next, struct dentry, d_u.d_child);
 		if ((unsigned long)dent->d_fsdata == fpos) {
 			if (dent->d_inode)
-				dget_locked(dent);
+				dget(dent);
 			else
 				dent = NULL;
 			goto out_unlock;
@@ -111,7 +111,7 @@ smb_dget_fpos(struct dentry *dentry, str
 	}
 	dent = NULL;
 out_unlock:
-	spin_unlock(&dcache_lock);
+	spin_unlock(&parent->d_lock);
 	return dent;
 }
 
Index: linux-2.6/security/selinux/selinuxfs.c
===================================================================
--- linux-2.6.orig/security/selinux/selinuxfs.c
+++ linux-2.6/security/selinux/selinuxfs.c
@@ -943,24 +943,28 @@ static void sel_remove_entries(struct de
 {
 	struct list_head *node;
 
-	spin_lock(&dcache_lock);
+	spin_lock(&de->d_lock);
 	node = de->d_subdirs.next;
 	while (node != &de->d_subdirs) {
 		struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
+
+		spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
 		list_del_init(node);
 
 		if (d->d_inode) {
-			d = dget_locked(d);
-			spin_unlock(&dcache_lock);
+			dget_dlock(d);
+			spin_unlock(&de->d_lock);
+			spin_unlock(&d->d_lock);
 			d_delete(d);
 			simple_unlink(de->d_inode, d);
 			dput(d);
-			spin_lock(&dcache_lock);
-		}
+			spin_lock(&de->d_lock);
+		} else
+			spin_unlock(&d->d_lock);
 		node = de->d_subdirs.next;
 	}
 
-	spin_unlock(&dcache_lock);
+	spin_unlock(&de->d_lock);
 }
 
 #define BOOL_DIR_NAME "booleans"
Index: linux-2.6/fs/affs/amigaffs.c
===================================================================
--- linux-2.6.orig/fs/affs/amigaffs.c
+++ linux-2.6/fs/affs/amigaffs.c
@@ -128,7 +128,7 @@ affs_fix_dcache(struct dentry *dentry, u
 	void *data = dentry->d_fsdata;
 	struct list_head *head, *next;
 
-	spin_lock(&dcache_lock);
+	spin_lock(&inode->i_lock);
 	head = &inode->i_dentry;
 	next = head->next;
 	while (next != head) {
@@ -139,7 +139,7 @@ affs_fix_dcache(struct dentry *dentry, u
 		}
 		next = next->next;
 	}
-	spin_unlock(&dcache_lock);
+	spin_unlock(&inode->i_lock);
 }
 
 
Index: linux-2.6/fs/nfs/getroot.c
===================================================================
--- linux-2.6.orig/fs/nfs/getroot.c
+++ linux-2.6/fs/nfs/getroot.c
@@ -55,7 +55,9 @@ static int nfs_superblock_set_dummy_root
 			return -ENOMEM;
 		}
 		/* Circumvent igrab(): we know the inode is not being freed */
-		atomic_inc(&inode->i_count);
+		spin_lock(&inode->i_lock);
+		inode->i_count++;
+		spin_unlock(&inode->i_lock);
 		/*
 		 * Ensure that this dentry is invisible to d_find_alias().
 		 * Otherwise, it may be spliced into the tree by
@@ -64,9 +66,11 @@ static int nfs_superblock_set_dummy_root
 		 * This again causes shrink_dcache_for_umount_subtree() to
 		 * Oops, since the test for IS_ROOT() will fail.
 		 */
-		spin_lock(&dcache_lock);
+		spin_lock(&sb->s_root->d_inode->i_lock);
+		spin_lock(&sb->s_root->d_lock);
 		list_del_init(&sb->s_root->d_alias);
-		spin_unlock(&dcache_lock);
+		spin_unlock(&sb->s_root->d_lock);
+		spin_unlock(&sb->s_root->d_inode->i_lock);
 	}
 	return 0;
 }
Index: linux-2.6/drivers/staging/pohmelfs/path_entry.c
===================================================================
--- linux-2.6.orig/drivers/staging/pohmelfs/path_entry.c
+++ linux-2.6/drivers/staging/pohmelfs/path_entry.c
@@ -84,10 +84,11 @@ out:
 int pohmelfs_path_length(struct pohmelfs_inode *pi)
 {
 	struct dentry *d, *root, *first;
-	int len = 1; /* Root slash */
+	int len;
+	unsigned seq;
 
-	first = d = d_find_alias(&pi->vfs_inode);
-	if (!d) {
+	first = d_find_alias(&pi->vfs_inode);
+	if (!first) {
 		dprintk("%s: ino: %llu, mode: %o.\n", __func__, pi->ino, pi->vfs_inode.i_mode);
 		return -ENOENT;
 	}
@@ -96,7 +97,11 @@ int pohmelfs_path_length(struct pohmelfs
 	root = dget(current->fs->root.dentry);
 	read_unlock(&current->fs->lock);
 
-	spin_lock(&dcache_lock);
+rename_retry:
+	len = 1; /* Root slash */
+	d = first;
+	seq = read_seqbegin(&rename_lock);
+	rcu_read_lock();
 
 	if (!IS_ROOT(d) && d_unhashed(d))
 		len += UNHASHED_OBSCURE_STRING_SIZE; /* Obscure " (deleted)" string */
@@ -105,7 +110,9 @@ int pohmelfs_path_length(struct pohmelfs
 		len += d->d_name.len + 1; /* Plus slash */
 		d = d->d_parent;
 	}
-	spin_unlock(&dcache_lock);
+	rcu_read_unlock();
+	if (read_seqretry(&rename_lock, seq))
+		goto rename_retry;
 
 	dput(root);
 	dput(first);
Index: linux-2.6/fs/autofs4/waitq.c
===================================================================
--- linux-2.6.orig/fs/autofs4/waitq.c
+++ linux-2.6/fs/autofs4/waitq.c
@@ -186,16 +186,26 @@ static int autofs4_getpath(struct autofs
 {
 	struct dentry *root = sbi->sb->s_root;
 	struct dentry *tmp;
-	char *buf = *name;
+	char *buf;
 	char *p;
-	int len = 0;
+	int len;
+	unsigned seq;
 
-	spin_lock(&dcache_lock);
+rename_retry:
+	buf = *name;
+	len = 0;
+
+	seq = read_seqbegin(&rename_lock);
+	rcu_read_lock();
+	spin_lock(&autofs4_lock);
 	for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent)
 		len += tmp->d_name.len + 1;
 
 	if (!len || --len > NAME_MAX) {
-		spin_unlock(&dcache_lock);
+		spin_unlock(&autofs4_lock);
+		rcu_read_unlock();
+		if (read_seqretry(&rename_lock, seq))
+			goto rename_retry;
 		return 0;
 	}
 
@@ -208,7 +218,10 @@ static int autofs4_getpath(struct autofs
 		p -= tmp->d_name.len;
 		strncpy(p, tmp->d_name.name, tmp->d_name.len);
 	}
-	spin_unlock(&dcache_lock);
+	spin_unlock(&autofs4_lock);
+	rcu_read_unlock();
+	if (read_seqretry(&rename_lock, seq))
+		goto rename_retry;
 
 	return len;
 }
Index: linux-2.6/fs/nfs/namespace.c
===================================================================
--- linux-2.6.orig/fs/nfs/namespace.c
+++ linux-2.6/fs/nfs/namespace.c
@@ -48,12 +48,17 @@ char *nfs_path(const char *base,
 	       const struct dentry *dentry,
 	       char *buffer, ssize_t buflen)
 {
-	char *end = buffer+buflen;
+	char *end;
 	int namelen;
+	unsigned seq;
 
+rename_retry:
+	end = buffer+buflen;
 	*--end = '\0';
 	buflen--;
-	spin_lock(&dcache_lock);
+
+	seq = read_seqbegin(&rename_lock);
+	rcu_read_lock();
 	while (!IS_ROOT(dentry) && dentry != droot) {
 		namelen = dentry->d_name.len;
 		buflen -= namelen + 1;
@@ -64,7 +69,9 @@ char *nfs_path(const char *base,
 		*--end = '/';
 		dentry = dentry->d_parent;
 	}
-	spin_unlock(&dcache_lock);
+	rcu_read_unlock();
+	if (read_seqretry(&rename_lock, seq))
+		goto rename_retry;
 	if (*end != '/') {
 		if (--buflen < 0)
 			goto Elong;
@@ -81,7 +88,9 @@ char *nfs_path(const char *base,
 	memcpy(end, base, namelen);
 	return end;
 Elong_unlock:
-	spin_unlock(&dcache_lock);
+	rcu_read_unlock();
+	if (read_seqretry(&rename_lock, seq))
+		goto rename_retry;
 Elong:
 	return ERR_PTR(-ENAMETOOLONG);
 }
Index: linux-2.6/Documentation/filesystems/Locking
===================================================================
--- linux-2.6.orig/Documentation/filesystems/Locking
+++ linux-2.6/Documentation/filesystems/Locking
@@ -17,7 +17,7 @@ prototypes:
 	void (*d_iput)(struct dentry *, struct inode *);
 	char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
 
-locking rules:
+locking rules: XXX: update these!!
 	none have BKL
 		dcache_lock	rename_lock	->d_lock	may block
 d_revalidate:	no		no		no		yes
Index: linux-2.6/include/linux/fsnotify_backend.h
===================================================================
--- linux-2.6.orig/include/linux/fsnotify_backend.h
+++ linux-2.6/include/linux/fsnotify_backend.h
@@ -276,10 +276,10 @@ static inline void __fsnotify_update_dca
 {
 	struct dentry *parent;
 
-	assert_spin_locked(&dcache_lock);
 	assert_spin_locked(&dentry->d_lock);
 
 	parent = dentry->d_parent;
+	/* XXX: after dcache_lock removal, there is a race with parent->d_inode and fsnotify_inode_watches_children. must fix */
 	if (parent->d_inode && fsnotify_inode_watches_children(parent->d_inode))
 		dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
 	else
@@ -288,15 +288,12 @@ static inline void __fsnotify_update_dca
 
 /*
  * fsnotify_d_instantiate - instantiate a dentry for inode
- * Called with dcache_lock held.
  */
 static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode)
 {
 	if (!inode)
 		return;
 
-	assert_spin_locked(&dcache_lock);
-
 	spin_lock(&dentry->d_lock);
 	__fsnotify_update_dcache_flags(dentry);
 	spin_unlock(&dentry->d_lock);
@@ -347,7 +344,7 @@ extern void fsnotify_destroy_mark_by_ent
 extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group);
 extern void fsnotify_get_mark(struct fsnotify_mark_entry *entry);
 extern void fsnotify_put_mark(struct fsnotify_mark_entry *entry);
-extern void fsnotify_unmount_inodes(struct list_head *list);
+extern void fsnotify_unmount_inodes(struct super_block *sb);
 
 /* put here because inotify does some weird stuff when destroying watches */
 extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
@@ -377,7 +374,7 @@ static inline u32 fsnotify_get_cookie(vo
 	return 0;
 }
 
-static inline void fsnotify_unmount_inodes(struct list_head *list)
+static inline void fsnotify_unmount_inodes(struct super_block *sb)
 {}
 
 #endif	/* CONFIG_FSNOTIFY */
Index: linux-2.6/fs/autofs4/autofs_i.h
===================================================================
--- linux-2.6.orig/fs/autofs4/autofs_i.h
+++ linux-2.6/fs/autofs4/autofs_i.h
@@ -16,6 +16,7 @@
 #include <linux/auto_fs4.h>
 #include <linux/auto_dev-ioctl.h>
 #include <linux/mutex.h>
+#include <linux/spinlock.h>
 #include <linux/list.h>
 
 /* This is the range of ioctl() numbers we claim as ours */
@@ -60,6 +61,8 @@ do {							\
 		current->pid, __func__, ##args);	\
 } while (0)
 
+extern spinlock_t autofs4_lock;
+
 /* Unified info structure.  This is pointed to by both the dentry and
    inode structures.  Each file in the filesystem has an instance of this
    structure.  It holds a reference to the dentry, so dentries are never
Index: linux-2.6/fs/drop_caches.c
===================================================================
--- linux-2.6.orig/fs/drop_caches.c
+++ linux-2.6/fs/drop_caches.c
@@ -14,23 +14,35 @@ int sysctl_drop_caches;
 
 static void drop_pagecache_sb(struct super_block *sb)
 {
-	struct inode *inode, *toput_inode = NULL;
+	int i;
 
-	spin_lock(&inode_lock);
-	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
-		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
-			continue;
-		if (inode->i_mapping->nrpages == 0)
-			continue;
-		__iget(inode);
-		spin_unlock(&inode_lock);
-		invalidate_mapping_pages(inode->i_mapping, 0, -1);
+	for_each_possible_cpu(i) {
+		struct inode *inode, *toput_inode = NULL;
+		struct list_head *list;
+#ifdef CONFIG_SMP
+                list = per_cpu_ptr(sb->s_inodes, i);
+#else
+                list = &sb->s_inodes;
+#endif
+		rcu_read_lock();
+		list_for_each_entry_rcu(inode, list, i_sb_list) {
+			spin_lock(&inode->i_lock);
+			if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)
+					|| inode->i_mapping->nrpages == 0) {
+				spin_unlock(&inode->i_lock);
+				continue;
+			}
+			__iget(inode);
+			spin_unlock(&inode->i_lock);
+			rcu_read_unlock();
+			invalidate_mapping_pages(inode->i_mapping, 0, -1);
+			iput(toput_inode);
+			toput_inode = inode;
+			rcu_read_lock();
+		}
+		rcu_read_unlock();
 		iput(toput_inode);
-		toput_inode = inode;
-		spin_lock(&inode_lock);
 	}
-	spin_unlock(&inode_lock);
-	iput(toput_inode);
 }
 
 static void drop_pagecache(void)
Index: linux-2.6/fs/fs-writeback.c
===================================================================
--- linux-2.6.orig/fs/fs-writeback.c
+++ linux-2.6/fs/fs-writeback.c
@@ -285,6 +285,7 @@ static void redirty_tail(struct inode *i
 {
 	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
 
+	assert_spin_locked(&wb_inode_list_lock);
 	if (!list_empty(&wb->b_dirty)) {
 		struct inode *tail;
 
@@ -302,13 +303,14 @@ static void requeue_io(struct inode *ino
 {
 	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
 
+	assert_spin_locked(&wb_inode_list_lock);
 	list_move(&inode->i_list, &wb->b_more_io);
 }
 
 static void inode_sync_complete(struct inode *inode)
 {
 	/*
-	 * Prevent speculative execution through spin_unlock(&inode_lock);
+	 * Prevent speculative execution through spin_unlock(&inode->i_lock);
 	 */
 	smp_mb();
 	wake_up_bit(&inode->i_state, __I_SYNC);
@@ -342,6 +344,7 @@ static void move_expired_inodes(struct l
 	struct inode *inode;
 	int do_sb_sort = 0;
 
+	assert_spin_locked(&wb_inode_list_lock);
 	while (!list_empty(delaying_queue)) {
 		inode = list_entry(delaying_queue->prev, struct inode, i_list);
 		if (older_than_this &&
@@ -397,9 +400,11 @@ static void inode_wait_for_writeback(str
 
 	wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
 	do {
-		spin_unlock(&inode_lock);
+		spin_unlock(&wb_inode_list_lock);
+		spin_unlock(&inode->i_lock);
 		__wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
-		spin_lock(&inode_lock);
+		spin_lock(&inode->i_lock);
+		spin_lock(&wb_inode_list_lock);
 	} while (inode->i_state & I_SYNC);
 }
 
@@ -424,7 +429,7 @@ writeback_single_inode(struct inode *ino
 	unsigned dirty;
 	int ret;
 
-	if (!atomic_read(&inode->i_count))
+	if (!inode->i_count)
 		WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
 	else
 		WARN_ON(inode->i_state & I_WILL_FREE);
@@ -456,7 +461,8 @@ writeback_single_inode(struct inode *ino
 	inode->i_state |= I_SYNC;
 	inode->i_state &= ~I_DIRTY;
 
-	spin_unlock(&inode_lock);
+	spin_unlock(&wb_inode_list_lock);
+	spin_unlock(&inode->i_lock);
 
 	ret = do_writepages(mapping, wbc);
 
@@ -473,7 +479,8 @@ writeback_single_inode(struct inode *ino
 			ret = err;
 	}
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode->i_lock);
+	spin_lock(&wb_inode_list_lock);
 	inode->i_state &= ~I_SYNC;
 	if (!(inode->i_state & (I_FREEING | I_CLEAR))) {
 		if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) {
@@ -532,11 +539,11 @@ select_queue:
 				inode->i_state |= I_DIRTY_PAGES;
 				redirty_tail(inode);
 			}
-		} else if (atomic_read(&inode->i_count)) {
+		} else if (inode->i_count) {
 			/*
 			 * The inode is clean, inuse
 			 */
-			list_move(&inode->i_list, &inode_in_use);
+			list_del_init(&inode->i_list);
 		} else {
 			/*
 			 * The inode is clean, unused
@@ -617,7 +624,8 @@ static void writeback_inodes_wb(struct b
 	const int is_blkdev_sb = sb_is_blkdev_sb(sb);
 	const unsigned long start = jiffies;	/* livelock avoidance */
 
-	spin_lock(&inode_lock);
+again:
+	spin_lock(&wb_inode_list_lock);
 
 	if (!wbc->for_kupdate || list_empty(&wb->b_io))
 		queue_io(wb, wbc->older_than_this);
@@ -627,11 +635,17 @@ static void writeback_inodes_wb(struct b
 						struct inode, i_list);
 		long pages_skipped;
 
+		if (!spin_trylock(&inode->i_lock)) {
+			spin_unlock(&wb_inode_list_lock);
+			goto again;
+		}
+
 		/*
 		 * super block given and doesn't match, skip this inode
 		 */
 		if (sb && sb != inode->i_sb) {
 			redirty_tail(inode);
+			spin_unlock(&inode->i_lock);
 			continue;
 		}
 
@@ -642,6 +656,7 @@ static void writeback_inodes_wb(struct b
 				 * Dirty memory-backed blockdev: the ramdisk
 				 * driver does this.  Skip just this inode
 				 */
+				spin_unlock(&inode->i_lock);
 				continue;
 			}
 			/*
@@ -649,31 +664,39 @@ static void writeback_inodes_wb(struct b
 			 * than the kernel-internal bdev filesystem.  Skip the
 			 * entire superblock.
 			 */
+			spin_unlock(&inode->i_lock);
 			break;
 		}
 
-		if (inode->i_state & (I_NEW | I_WILL_FREE)) {
-			requeue_io(inode);
-			continue;
-		}
-
 		if (wbc->nonblocking && bdi_write_congested(wb->bdi)) {
 			wbc->encountered_congestion = 1;
-			if (!is_blkdev_sb)
+			if (!is_blkdev_sb) {
+				spin_unlock(&inode->i_lock);
 				break;		/* Skip a congested fs */
+			}
 			requeue_io(inode);
+			spin_unlock(&inode->i_lock);
 			continue;		/* Skip a congested blockdev */
 		}
 
+		if (inode->i_state & (I_NEW | I_WILL_FREE)) {
+			requeue_io(inode);
+			spin_unlock(&inode->i_lock);
+			continue;
+		}
+
 		/*
 		 * Was this inode dirtied after sync_sb_inodes was called?
 		 * This keeps sync from extra jobs and livelock.
 		 */
-		if (inode_dirtied_after(inode, start))
+		if (inode_dirtied_after(inode, start)) {
+			spin_unlock(&inode->i_lock);
 			break;
+		}
 
 		if (pin_sb_for_writeback(wbc, inode, &pin_sb)) {
 			requeue_io(inode);
+			spin_unlock(&inode->i_lock);
 			continue;
 		}
 
@@ -688,10 +711,11 @@ static void writeback_inodes_wb(struct b
 			 */
 			redirty_tail(inode);
 		}
-		spin_unlock(&inode_lock);
+		spin_unlock(&wb_inode_list_lock);
+		spin_unlock(&inode->i_lock);
 		iput(inode);
 		cond_resched();
-		spin_lock(&inode_lock);
+		spin_lock(&wb_inode_list_lock);
 		if (wbc->nr_to_write <= 0) {
 			wbc->more_io = 1;
 			break;
@@ -699,10 +723,9 @@ static void writeback_inodes_wb(struct b
 		if (!list_empty(&wb->b_more_io))
 			wbc->more_io = 1;
 	}
+	spin_unlock(&wb_inode_list_lock);
 
 	unpin_sb_for_writeback(&pin_sb);
-
-	spin_unlock(&inode_lock);
 	/* Leave any unwritten inodes on b_io */
 }
 
@@ -814,13 +837,19 @@ static long wb_writeback(struct bdi_writ
 		 * become available for writeback. Otherwise
 		 * we'll just busyloop.
 		 */
-		spin_lock(&inode_lock);
+retry:
+		spin_lock(&wb_inode_list_lock);
 		if (!list_empty(&wb->b_more_io))  {
 			inode = list_entry(wb->b_more_io.prev,
 						struct inode, i_list);
+			if (!spin_trylock(&inode->i_lock)) {
+				spin_unlock(&wb_inode_list_lock);
+				goto retry;
+			}
 			inode_wait_for_writeback(inode);
+			spin_unlock(&inode->i_lock);
 		}
-		spin_unlock(&inode_lock);
+		spin_unlock(&wb_inode_list_lock);
 	}
 
 	return wrote;
@@ -867,7 +896,7 @@ static long wb_check_old_data_flush(stru
 	wb->last_old_flush = jiffies;
 	nr_pages = global_page_state(NR_FILE_DIRTY) +
 			global_page_state(NR_UNSTABLE_NFS) +
-			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
+			get_nr_inodes() - inodes_stat.nr_unused;
 
 	if (nr_pages) {
 		struct wb_writeback_args args = {
@@ -1074,7 +1103,7 @@ void __mark_inode_dirty(struct inode *in
 	if (unlikely(block_dump))
 		block_dump___mark_inode_dirty(inode);
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode->i_lock);
 	if ((inode->i_state & flags) != flags) {
 		const int was_dirty = inode->i_state & I_DIRTY;
 
@@ -1115,11 +1144,13 @@ void __mark_inode_dirty(struct inode *in
 			}
 
 			inode->dirtied_when = jiffies;
+			spin_lock(&wb_inode_list_lock);
 			list_move(&inode->i_list, &wb->b_dirty);
+			spin_unlock(&wb_inode_list_lock);
 		}
 	}
 out:
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode->i_lock);
 }
 EXPORT_SYMBOL(__mark_inode_dirty);
 
@@ -1142,7 +1173,7 @@ EXPORT_SYMBOL(__mark_inode_dirty);
  */
 static void wait_sb_inodes(struct super_block *sb)
 {
-	struct inode *inode, *old_inode = NULL;
+	int i;
 
 	/*
 	 * We need to be protected against the filesystem going from
@@ -1150,44 +1181,57 @@ static void wait_sb_inodes(struct super_
 	 */
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
 
-	spin_lock(&inode_lock);
-
-	/*
-	 * Data integrity sync. Must wait for all pages under writeback,
-	 * because there may have been pages dirtied before our sync
-	 * call, but which had writeout started before we write it out.
-	 * In which case, the inode may not be on the dirty list, but
-	 * we still have to wait for that writeout.
-	 */
-	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
-		struct address_space *mapping;
-
-		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
-			continue;
-		mapping = inode->i_mapping;
-		if (mapping->nrpages == 0)
-			continue;
-		__iget(inode);
-		spin_unlock(&inode_lock);
+	for_each_possible_cpu(i) {
+		struct inode *inode, *old_inode = NULL;
+		struct list_head *list;
+#ifdef CONFIG_SMP
+                list = per_cpu_ptr(sb->s_inodes, i);
+#else
+                list = &sb->s_inodes;
+#endif
 		/*
-		 * We hold a reference to 'inode' so it couldn't have
-		 * been removed from s_inodes list while we dropped the
-		 * inode_lock.  We cannot iput the inode now as we can
-		 * be holding the last reference and we cannot iput it
-		 * under inode_lock. So we keep the reference and iput
-		 * it later.
+		 * Data integrity sync. Must wait for all pages under writeback,
+		 * because there may have been pages dirtied before our sync
+		 * call, but which had writeout started before we write it out.
+		 * In which case, the inode may not be on the dirty list, but
+		 * we still have to wait for that writeout.
 		 */
-		iput(old_inode);
-		old_inode = inode;
+		rcu_read_lock();
+		list_for_each_entry_rcu(inode, list, i_sb_list) {
+			struct address_space *mapping;
 
-		filemap_fdatawait(mapping);
+			mapping = inode->i_mapping;
+			if (mapping->nrpages == 0)
+				continue;
 
-		cond_resched();
+			spin_lock(&inode->i_lock);
+			if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) {
+				spin_unlock(&inode->i_lock);
+				continue;
+			}
+			__iget(inode);
+			spin_unlock(&inode->i_lock);
+			rcu_read_unlock();
+			/*
+			 * We hold a reference to 'inode' so it couldn't have
+			 * been removed from s_inodes list while we dropped the
+			 * i_lock.  We cannot iput the inode now as we can be
+			 * holding the last reference and we cannot iput it
+			 * under spinlock. So we keep the reference and iput it
+			 * later.
+			 */
+			iput(old_inode);
+			old_inode = inode;
 
-		spin_lock(&inode_lock);
+			filemap_fdatawait(mapping);
+
+			cond_resched();
+
+			rcu_read_lock();
+		}
+		rcu_read_unlock();
+		iput(old_inode);
 	}
-	spin_unlock(&inode_lock);
-	iput(old_inode);
 }
 
 /**
@@ -1206,7 +1250,7 @@ void writeback_inodes_sb(struct super_bl
 	long nr_to_write;
 
 	nr_to_write = nr_dirty + nr_unstable +
-			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
+			get_nr_inodes() - inodes_stat.nr_unused;
 
 	bdi_start_writeback(sb->s_bdi, sb, nr_to_write);
 }
@@ -1250,9 +1294,11 @@ int write_inode_now(struct inode *inode,
 		wbc.nr_to_write = 0;
 
 	might_sleep();
-	spin_lock(&inode_lock);
+	spin_lock(&inode->i_lock);
+	spin_lock(&wb_inode_list_lock);
 	ret = writeback_single_inode(inode, &wbc);
-	spin_unlock(&inode_lock);
+	spin_unlock(&wb_inode_list_lock);
+	spin_unlock(&inode->i_lock);
 	if (sync)
 		inode_sync_wait(inode);
 	return ret;
@@ -1274,9 +1320,11 @@ int sync_inode(struct inode *inode, stru
 {
 	int ret;
 
-	spin_lock(&inode_lock);
+	spin_lock(&inode->i_lock);
+	spin_lock(&wb_inode_list_lock);
 	ret = writeback_single_inode(inode, wbc);
-	spin_unlock(&inode_lock);
+	spin_unlock(&wb_inode_list_lock);
+	spin_unlock(&inode->i_lock);
 	return ret;
 }
 EXPORT_SYMBOL(sync_inode);
Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c
+++ linux-2.6/fs/inode.c
@@ -75,9 +75,13 @@ static unsigned int i_hash_shift __read_
  * allowing for low-overhead inode sync() operations.
  */
 
-LIST_HEAD(inode_in_use);
 LIST_HEAD(inode_unused);
-static struct hlist_head *inode_hashtable __read_mostly;
+
+struct inode_hash_bucket {
+	spinlock_t lock;
+	struct hlist_head head;
+};
+static struct inode_hash_bucket *inode_hashtable __read_mostly;
 
 /*
  * A simple spinlock to protect the list manipulations.
@@ -85,7 +89,8 @@ static struct hlist_head *inode_hashtabl
  * NOTE! You also have to own the lock if you change
  * the i_state of an inode while it is in use..
  */
-DEFINE_SPINLOCK(inode_lock);
+static DEFINE_PER_CPU(spinlock_t, inode_cpulock);
+DEFINE_SPINLOCK(wb_inode_list_lock);
 
 /*
  * iprune_sem provides exclusion between the kswapd or try_to_free_pages
@@ -104,10 +109,37 @@ static DECLARE_RWSEM(iprune_sem);
 /*
  * Statistics gathering..
  */
-struct inodes_stat_t inodes_stat;
+struct inodes_stat_t inodes_stat = {
+	.nr_inodes = 0,
+	.nr_unused = 0,
+};
+struct percpu_counter nr_inodes;
 
 static struct kmem_cache *inode_cachep __read_mostly;
 
+int get_nr_inodes(void)
+{
+	return percpu_counter_sum_positive(&nr_inodes);
+}
+
+/*
+ * Handle nr_dentry sysctl
+ */
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
+int proc_nr_inodes(ctl_table *table, int write,
+		   void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	inodes_stat.nr_inodes = get_nr_inodes();
+	return proc_dointvec(table, write, buffer, lenp, ppos);
+}
+#else
+int proc_nr_inodes(ctl_table *table, int write,
+		   void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
+#endif
+
 static void wake_up_inode(struct inode *inode)
 {
 	/*
@@ -135,7 +167,7 @@ int inode_init_always(struct super_block
 	inode->i_sb = sb;
 	inode->i_blkbits = sb->s_blocksize_bits;
 	inode->i_flags = 0;
-	atomic_set(&inode->i_count, 1);
+	inode->i_count = 1;
 	inode->i_op = &empty_iops;
 	inode->i_fop = &empty_fops;
 	inode->i_nlink = 1;
@@ -247,13 +279,20 @@ void __destroy_inode(struct inode *inode
 }
 EXPORT_SYMBOL(__destroy_inode);
 
+static void i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(inode_cachep, inode);
+}
+
 void destroy_inode(struct inode *inode)
 {
 	__destroy_inode(inode);
 	if (inode->i_sb->s_op->destroy_inode)
 		inode->i_sb->s_op->destroy_inode(inode);
 	else
-		kmem_cache_free(inode_cachep, (inode));
+		call_rcu(&inode->i_rcu, i_callback);
 }
 
 /*
@@ -267,6 +306,7 @@ void inode_init_once(struct inode *inode
 	INIT_HLIST_NODE(&inode->i_hash);
 	INIT_LIST_HEAD(&inode->i_dentry);
 	INIT_LIST_HEAD(&inode->i_devices);
+	INIT_LIST_HEAD(&inode->i_list);
 	INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
 	spin_lock_init(&inode->i_data.tree_lock);
 	spin_lock_init(&inode->i_data.i_mmap_lock);
@@ -292,21 +332,6 @@ static void init_once(void *foo)
 	inode_init_once(inode);
 }
 
-/*
- * inode_lock must be held
- */
-void __iget(struct inode *inode)
-{
-	if (atomic_read(&inode->i_count)) {
-		atomic_inc(&inode->i_count);
-		return;
-	}
-	atomic_inc(&inode->i_count);
-	if (!(inode->i_state & (I_DIRTY|I_SYNC)))
-		list_move(&inode->i_list, &inode_in_use);
-	inodes_stat.nr_unused--;
-}
-
 /**
  * clear_inode - clear an inode
  * @inode: inode to clear
@@ -350,65 +375,70 @@ static void dispose_list(struct list_hea
 		struct inode *inode;
 
 		inode = list_first_entry(head, struct inode, i_list);
-		list_del(&inode->i_list);
+		list_del_init(&inode->i_list);
 
 		if (inode->i_data.nrpages)
 			truncate_inode_pages(&inode->i_data, 0);
 		clear_inode(inode);
 
-		spin_lock(&inode_lock);
-		hlist_del_init(&inode->i_hash);
-		list_del_init(&inode->i_sb_list);
-		spin_unlock(&inode_lock);
+		spin_lock(&inode->i_lock);
+		__remove_inode_hash(inode);
+		inode_sb_list_del(inode);
+		spin_unlock(&inode->i_lock);
 
 		wake_up_inode(inode);
 		destroy_inode(inode);
 		nr_disposed++;
 	}
-	spin_lock(&inode_lock);
-	inodes_stat.nr_inodes -= nr_disposed;
-	spin_unlock(&inode_lock);
 }
 
 /*
  * Invalidate all inodes for a device.
  */
-static int invalidate_list(struct list_head *head, struct list_head *dispose)
+static int invalidate_sb_inodes(struct super_block *sb, struct list_head *dispose)
 {
-	struct list_head *next;
-	int busy = 0, count = 0;
-
-	next = head->next;
-	for (;;) {
-		struct list_head *tmp = next;
-		struct inode *inode;
+	int busy = 0;
+	int i;
 
-		/*
-		 * We can reschedule here without worrying about the list's
-		 * consistency because the per-sb list of inodes must not
-		 * change during umount anymore, and because iprune_sem keeps
-		 * shrink_icache_memory() away.
-		 */
-		cond_resched_lock(&inode_lock);
+	for_each_possible_cpu(i) {
+		struct list_head *next;
+		struct list_head *head;
+#ifdef CONFIG_SMP
+                head = per_cpu_ptr(sb->s_inodes, i);
+#else
+		head = &sb->s_inodes;
+#endif
 
-		next = next->next;
-		if (tmp == head)
-			break;
-		inode = list_entry(tmp, struct inode, i_sb_list);
-		if (inode->i_state & I_NEW)
-			continue;
-		invalidate_inode_buffers(inode);
-		if (!atomic_read(&inode->i_count)) {
-			list_move(&inode->i_list, dispose);
-			WARN_ON(inode->i_state & I_NEW);
-			inode->i_state |= I_FREEING;
-			count++;
-			continue;
+		next = head->next;
+		for (;;) {
+			struct list_head *tmp = next;
+			struct inode *inode;
+
+			next = next->next;
+			if (tmp == head)
+				break;
+			inode = list_entry(tmp, struct inode, i_sb_list);
+			spin_lock(&inode->i_lock);
+			if (inode->i_state & I_NEW) {
+				spin_unlock(&inode->i_lock);
+				continue;
+			}
+			invalidate_inode_buffers(inode);
+			if (!inode->i_count) {
+				spin_lock(&wb_inode_list_lock);
+				list_del(&inode->i_list);
+				inodes_stat.nr_unused--;
+				spin_unlock(&wb_inode_list_lock);
+				WARN_ON(inode->i_state & I_NEW);
+				inode->i_state |= I_FREEING;
+				spin_unlock(&inode->i_lock);
+				list_add(&inode->i_list, dispose);
+				continue;
+			}
+			spin_unlock(&inode->i_lock);
+			busy = 1;
 		}
-		busy = 1;
 	}
-	/* only unused inodes may be cached with i_count zero */
-	inodes_stat.nr_unused -= count;
 	return busy;
 }
 
@@ -425,12 +455,17 @@ int invalidate_inodes(struct super_block
 	int busy;
 	LIST_HEAD(throw_away);
 
+	/*
+	 * Don't need to worry about the list's consistency because the per-sb
+	 * list of inodes must not change during umount anymore, and because
+	 * iprune_sem keeps shrink_icache_memory() away.
+	 */
 	down_write(&iprune_sem);
-	spin_lock(&inode_lock);
-	inotify_unmount_inodes(&sb->s_inodes);
-	fsnotify_unmount_inodes(&sb->s_inodes);
-	busy = invalidate_list(&sb->s_inodes, &throw_away);
-	spin_unlock(&inode_lock);
+//	spin_lock(&sb_inode_list_lock); XXX: is this safe?
+	inotify_unmount_inodes(sb);
+	fsnotify_unmount_inodes(sb);
+	busy = invalidate_sb_inodes(sb, &throw_away);
+//	spin_unlock(&sb_inode_list_lock);
 
 	dispose_list(&throw_away);
 	up_write(&iprune_sem);
@@ -445,7 +480,7 @@ static int can_unuse(struct inode *inode
 		return 0;
 	if (inode_has_buffers(inode))
 		return 0;
-	if (atomic_read(&inode->i_count))
+	if (inode->i_count)
 		return 0;
 	if (inode->i_data.nrpages)
 		return 0;
@@ -468,12 +503,12 @@ static int can_unuse(struct inode *inode
 static void prune_icache(int nr_to_scan)
 {
 	LIST_HEAD(freeable);
-	int nr_pruned = 0;
 	int nr_scanned;
 	unsigned long reap = 0;
 
 	down_read(&iprune_sem);
-	spin_lock(&inode_lock);
+again:
+	spin_lock(&wb_inode_list_lock);
 	for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
 		struct inode *inode;
 
@@ -482,36 +517,56 @@ static void prune_icache(int nr_to_scan)
 
 		inode = list_entry(inode_unused.prev, struct inode, i_list);
 
-		if (inode->i_state || atomic_read(&inode->i_count)) {
+		if (!spin_trylock(&inode->i_lock)) {
+			spin_unlock(&wb_inode_list_lock);
+			goto again;
+		}
+		if (inode->i_count) {
+			list_del_init(&inode->i_list);
+			spin_unlock(&inode->i_lock);
+			inodes_stat.nr_unused--;
+			continue;
+		}
+		if (inode->i_state) {
 			list_move(&inode->i_list, &inode_unused);
+			spin_unlock(&inode->i_lock);
 			continue;
 		}
 		if (inode_has_buffers(inode) || inode->i_data.nrpages) {
+			spin_unlock(&wb_inode_list_lock);
 			__iget(inode);
-			spin_unlock(&inode_lock);
+			spin_unlock(&inode->i_lock);
 			if (remove_inode_buffers(inode))
 				reap += invalidate_mapping_pages(&inode->i_data,
 								0, -1);
 			iput(inode);
-			spin_lock(&inode_lock);
+again2:
+			spin_lock(&wb_inode_list_lock);
 
+			/* XXX: may no longer work well */
 			if (inode != list_entry(inode_unused.next,
 						struct inode, i_list))
 				continue;	/* wrong inode or list_empty */
-			if (!can_unuse(inode))
+			if (!spin_trylock(&inode->i_lock)) {
+				spin_unlock(&wb_inode_list_lock);
+				goto again2;
+			}
+			if (!can_unuse(inode)) {
+				spin_unlock(&inode->i_lock);
 				continue;
+			}
 		}
 		list_move(&inode->i_list, &freeable);
 		WARN_ON(inode->i_state & I_NEW);
 		inode->i_state |= I_FREEING;
-		nr_pruned++;
+		spin_unlock(&inode->i_lock);
+		inodes_stat.nr_unused--;
 	}
-	inodes_stat.nr_unused -= nr_pruned;
 	if (current_is_kswapd())
 		__count_vm_events(KSWAPD_INODESTEAL, reap);
 	else
 		__count_vm_events(PGINODESTEAL, reap);
-	spin_unlock(&inode_lock);
+	spin_unlock(&wb_inode_list_lock);
 
 	dispose_list(&freeable);
 	up_read(&iprune_sem);
@@ -538,7 +593,7 @@ static int shrink_icache_memory(int nr,
 			return -1;
 		prune_icache(nr);
 	}
-	return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
+	return inodes_stat.nr_unused / 100 * sysctl_vfs_cache_pressure;
 }
 
 static struct shrinker icache_shrinker = {
@@ -554,7 +609,7 @@ static void __wait_on_freeing_inode(stru
  * add any additional branch in the common code.
  */
 static struct inode *find_inode(struct super_block *sb,
-				struct hlist_head *head,
+				struct inode_hash_bucket *b,
 				int (*test)(struct inode *, void *),
 				void *data)
 {
@@ -562,17 +617,27 @@ static struct inode *find_inode(struct s
 	struct inode *inode = NULL;
 
 repeat:
-	hlist_for_each_entry(inode, node, head, i_hash) {
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(inode, node, &b->head, i_hash) {
 		if (inode->i_sb != sb)
 			continue;
-		if (!test(inode, data))
+		spin_lock(&inode->i_lock);
+		if (hlist_unhashed(&inode->i_hash)) {
+			spin_unlock(&inode->i_lock);
+			continue;
+		}
+		if (!test(inode, data)) {
+			spin_unlock(&inode->i_lock);
 			continue;
+		}
 		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
+			rcu_read_unlock();
 			__wait_on_freeing_inode(inode);
 			goto repeat;
 		}
 		break;
 	}
+	rcu_read_unlock();
 	return node ? inode : NULL;
 }
 
@@ -581,23 +646,32 @@ repeat:
  * iget_locked for details.
  */
 static struct inode *find_inode_fast(struct super_block *sb,
-				struct hlist_head *head, unsigned long ino)
+				struct inode_hash_bucket *b,
+				unsigned long ino)
 {
 	struct hlist_node *node;
 	struct inode *inode = NULL;
 
 repeat:
-	hlist_for_each_entry(inode, node, head, i_hash) {
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(inode, node, &b->head, i_hash) {
 		if (inode->i_ino != ino)
 			continue;
 		if (inode->i_sb != sb)
 			continue;
+		spin_lock(&inode->i_lock);
+		if (hlist_unhashed(&inode->i_hash)) {
+			spin_unlock(&inode->i_lock);
+			continue;
+		}
 		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
+			rcu_read_unlock();
 			__wait_on_freeing_inode(inode);
 			goto repeat;
 		}
 		break;
 	}
+	rcu_read_unlock();
 	return node ? inode : NULL;
 }
 
@@ -611,17 +685,89 @@ static unsigned long hash(struct super_b
 	return tmp & I_HASHMASK;
 }
 
+static void inode_sb_list_add(struct inode *inode, struct super_block *sb)
+{
+	spinlock_t *lock;
+	struct list_head *list;
+#ifdef CONFIG_SMP
+	int cpu;
+#endif
+
+	lock = &get_cpu_var(inode_cpulock);
+#ifdef CONFIG_SMP
+	cpu = smp_processor_id();
+	list = per_cpu_ptr(sb->s_inodes, cpu);
+	inode->i_sb_list_cpu = cpu;
+#else
+	list = &sb->s_files;
+#endif
+	spin_lock(lock);
+	list_add_rcu(&inode->i_sb_list, list);
+	spin_unlock(lock);
+	put_cpu_var(inode_cpulock);
+}
+
+void inode_sb_list_del(struct inode *inode)
+{
+	spinlock_t *lock;
+
+#ifdef CONFIG_SMP
+	lock = &per_cpu(inode_cpulock, inode->i_sb_list_cpu);
+#else
+	lock = &__get_cpu_var(inode_cpulock);
+#endif
+	spin_lock(lock);
+	list_del_rcu(&inode->i_sb_list);
+	spin_unlock(lock);
+}
+
 static inline void
-__inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
+__inode_add_to_lists(struct super_block *sb, struct inode_hash_bucket *b,
 			struct inode *inode)
 {
-	inodes_stat.nr_inodes++;
-	list_add(&inode->i_list, &inode_in_use);
-	list_add(&inode->i_sb_list, &sb->s_inodes);
-	if (head)
-		hlist_add_head(&inode->i_hash, head);
+	inode_sb_list_add(inode, sb);
+	percpu_counter_inc(&nr_inodes);
+	if (b) {
+		spin_lock(&b->lock);
+		hlist_add_head(&inode->i_hash, &b->head);
+		spin_unlock(&b->lock);
+	}
 }
 
+#ifdef CONFIG_SMP
+/*
+ * Each cpu owns a range of 1024 numbers.
+ * 'shared_last_ino' is dirtied only once out of 1024 allocations,
+ * to renew the exhausted range.
+ *
+ * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
+ * error if st_ino won't fit in target struct field. Use 32bit counter
+ * here to attempt to avoid that.
+ */
+static DEFINE_PER_CPU(int, last_ino);
+static atomic_t shared_last_ino;
+
+static int last_ino_get(void)
+{
+	int *p = &get_cpu_var(last_ino);
+	int res = *p;
+
+	if (unlikely((res & 1023) == 0))
+		res = atomic_add_return(1024, &shared_last_ino) - 1024;
+
+	*p = ++res;
+	put_cpu_var(last_ino);
+	return res;
+}
+#else
+static int last_ino_get(void)
+{
+	static int last_ino;
+
+	return ++last_ino;
+}
+#endif
+
 /**
  * inode_add_to_lists - add a new inode to relevant lists
  * @sb: superblock inode belongs to
@@ -636,11 +782,11 @@ __inode_add_to_lists(struct super_block
  */
 void inode_add_to_lists(struct super_block *sb, struct inode *inode)
 {
-	struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino);
+	struct inode_hash_bucket *b = inode_hashtable + hash(sb, inode->i_ino);
 
-	spin_lock(&inode_lock);
-	__inode_add_to_lists(sb, head, inode);
-	spin_unlock(&inode_lock);
+	spin_lock(&inode->i_lock);
+	__inode_add_to_lists(sb, b, inode);
+	spin_unlock(&inode->i_lock);
 }
 EXPORT_SYMBOL_GPL(inode_add_to_lists);
 
@@ -658,23 +804,15 @@ EXPORT_SYMBOL_GPL(inode_add_to_lists);
  */
 struct inode *new_inode(struct super_block *sb)
 {
-	/*
-	 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
-	 * error if st_ino won't fit in target struct field. Use 32bit counter
-	 * here to attempt to avoid that.
-	 */
-	static unsigned int last_ino;
 	struct inode *inode;
 
-	spin_lock_prefetch(&inode_lock);
-
 	inode = alloc_inode(sb);
 	if (inode) {
-		spin_lock(&inode_lock);
-		__inode_add_to_lists(sb, NULL, inode);
-		inode->i_ino = ++last_ino;
+		spin_lock(&inode->i_lock);
+		inode->i_ino = last_ino_get();
 		inode->i_state = 0;
-		spin_unlock(&inode_lock);
+		__inode_add_to_lists(sb, NULL, inode);
+		spin_unlock(&inode->i_lock);
 	}
 	return inode;
 }
@@ -722,7 +860,7 @@ EXPORT_SYMBOL(unlock_new_inode);
  *	-- rmk@arm.uk.linux.org
  */
 static struct inode *get_new_inode(struct super_block *sb,
-				struct hlist_head *head,
+				struct inode_hash_bucket *b,
 				int (*test)(struct inode *, void *),
 				int (*set)(struct inode *, void *),
 				void *data)
@@ -733,16 +871,16 @@ static struct inode *get_new_inode(struc
 	if (inode) {
 		struct inode *old;
 
-		spin_lock(&inode_lock);
 		/* We released the lock, so.. */
-		old = find_inode(sb, head, test, data);
+		old = find_inode(sb, b, test, data);
 		if (!old) {
+			spin_lock(&inode->i_lock);
 			if (set(inode, data))
 				goto set_failed;
 
-			__inode_add_to_lists(sb, head, inode);
 			inode->i_state = I_LOCK|I_NEW;
-			spin_unlock(&inode_lock);
+			__inode_add_to_lists(sb, b, inode);
+			spin_unlock(&inode->i_lock);
 
 			/* Return the locked inode with I_NEW set, the
 			 * caller is responsible for filling in the contents
@@ -756,7 +894,7 @@ static struct inode *get_new_inode(struc
 		 * allocated.
 		 */
 		__iget(old);
-		spin_unlock(&inode_lock);
+		spin_unlock(&old->i_lock);
 		destroy_inode(inode);
 		inode = old;
 		wait_on_inode(inode);
@@ -764,7 +902,7 @@ static struct inode *get_new_inode(struc
 	return inode;
 
 set_failed:
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode->i_lock);
 	destroy_inode(inode);
 	return NULL;
 }
@@ -774,7 +912,7 @@ set_failed:
  * comment at iget_locked for details.
  */
 static struct inode *get_new_inode_fast(struct super_block *sb,
-				struct hlist_head *head, unsigned long ino)
+				struct inode_hash_bucket *b, unsigned long ino)
 {
 	struct inode *inode;
 
@@ -782,14 +920,14 @@ static struct inode *get_new_inode_fast(
 	if (inode) {
 		struct inode *old;
 
-		spin_lock(&inode_lock);
 		/* We released the lock, so.. */
-		old = find_inode_fast(sb, head, ino);
+		old = find_inode_fast(sb, b, ino);
 		if (!old) {
+			spin_lock(&inode->i_lock);
 			inode->i_ino = ino;
-			__inode_add_to_lists(sb, head, inode);
 			inode->i_state = I_LOCK|I_NEW;
-			spin_unlock(&inode_lock);
+			__inode_add_to_lists(sb, b, inode);
+			spin_unlock(&inode->i_lock);
 
 			/* Return the locked inode with I_NEW set, the
 			 * caller is responsible for filling in the contents
@@ -803,7 +941,7 @@ static struct inode *get_new_inode_fast(
 		 * allocated.
 		 */
 		__iget(old);
-		spin_unlock(&inode_lock);
+		spin_unlock(&old->i_lock);
 		destroy_inode(inode);
 		inode = old;
 		wait_on_inode(inode);
@@ -811,6 +949,23 @@ static struct inode *get_new_inode_fast(
 	return inode;
 }
 
+static int test_inode_iunique(struct super_block *sb,
+				struct inode_hash_bucket *b, unsigned long ino)
+{
+	struct hlist_node *node;
+	struct inode *inode = NULL;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(inode, node, &b->head, i_hash) {
+		if (inode->i_ino == ino && inode->i_sb == sb) {
+			rcu_read_unlock();
+			return 0;
+		}
+	}
+	rcu_read_unlock();
+	return 1;
+}
+
 /**
  *	iunique - get a unique inode number
  *	@sb: superblock
@@ -832,20 +987,19 @@ ino_t iunique(struct super_block *sb, in
 	 * error if st_ino won't fit in target struct field. Use 32bit counter
 	 * here to attempt to avoid that.
 	 */
+	static DEFINE_SPINLOCK(unique_lock);
 	static unsigned int counter;
-	struct inode *inode;
-	struct hlist_head *head;
+	struct inode_hash_bucket *b;
 	ino_t res;
 
-	spin_lock(&inode_lock);
+	spin_lock(&unique_lock);
 	do {
 		if (counter <= max_reserved)
 			counter = max_reserved + 1;
 		res = counter++;
-		head = inode_hashtable + hash(sb, res);
-		inode = find_inode_fast(sb, head, res);
-	} while (inode != NULL);
-	spin_unlock(&inode_lock);
+		b = inode_hashtable + hash(sb, res);
+	} while (!test_inode_iunique(sb, b, res));
+	spin_unlock(&unique_lock);
 
 	return res;
 }
@@ -853,7 +1007,9 @@ EXPORT_SYMBOL(iunique);
 
 struct inode *igrab(struct inode *inode)
 {
-	spin_lock(&inode_lock);
+	struct inode *ret = inode;
+
+	spin_lock(&inode->i_lock);
 	if (!(inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)))
 		__iget(inode);
 	else
@@ -862,9 +1018,10 @@ struct inode *igrab(struct inode *inode)
 		 * called yet, and somebody is calling igrab
 		 * while the inode is getting freed.
 		 */
-		inode = NULL;
-	spin_unlock(&inode_lock);
-	return inode;
+		ret = NULL;
+	spin_unlock(&inode->i_lock);
+
+	return ret;
 }
 EXPORT_SYMBOL(igrab);
 
@@ -888,21 +1045,20 @@ EXPORT_SYMBOL(igrab);
  * Note, @test is called with the inode_lock held, so can't sleep.
  */
 static struct inode *ifind(struct super_block *sb,
-		struct hlist_head *head, int (*test)(struct inode *, void *),
+		struct inode_hash_bucket *b,
+		int (*test)(struct inode *, void *),
 		void *data, const int wait)
 {
 	struct inode *inode;
 
-	spin_lock(&inode_lock);
-	inode = find_inode(sb, head, test, data);
+	inode = find_inode(sb, b, test, data);
 	if (inode) {
 		__iget(inode);
-		spin_unlock(&inode_lock);
+		spin_unlock(&inode->i_lock);
 		if (likely(wait))
 			wait_on_inode(inode);
 		return inode;
 	}
-	spin_unlock(&inode_lock);
 	return NULL;
 }
 
@@ -922,19 +1078,18 @@ static struct inode *ifind(struct super_
  * Otherwise NULL is returned.
  */
 static struct inode *ifind_fast(struct super_block *sb,
-		struct hlist_head *head, unsigned long ino)
+		struct inode_hash_bucket *b,
+		unsigned long ino)
 {
 	struct inode *inode;
 
-	spin_lock(&inode_lock);
-	inode = find_inode_fast(sb, head, ino);
+	inode = find_inode_fast(sb, b, ino);
 	if (inode) {
 		__iget(inode);
-		spin_unlock(&inode_lock);
+		spin_unlock(&inode->i_lock);
 		wait_on_inode(inode);
 		return inode;
 	}
-	spin_unlock(&inode_lock);
 	return NULL;
 }
 
@@ -962,9 +1117,9 @@ static struct inode *ifind_fast(struct s
 struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
 		int (*test)(struct inode *, void *), void *data)
 {
-	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
+	struct inode_hash_bucket *b = inode_hashtable + hash(sb, hashval);
 
-	return ifind(sb, head, test, data, 0);
+	return ifind(sb, b, test, data, 0);
 }
 EXPORT_SYMBOL(ilookup5_nowait);
 
@@ -990,9 +1145,9 @@ EXPORT_SYMBOL(ilookup5_nowait);
 struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
 		int (*test)(struct inode *, void *), void *data)
 {
-	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
+	struct inode_hash_bucket *b = inode_hashtable + hash(sb, hashval);
 
-	return ifind(sb, head, test, data, 1);
+	return ifind(sb, b, test, data, 1);
 }
 EXPORT_SYMBOL(ilookup5);
 
@@ -1012,9 +1167,9 @@ EXPORT_SYMBOL(ilookup5);
  */
 struct inode *ilookup(struct super_block *sb, unsigned long ino)
 {
-	struct hlist_head *head = inode_hashtable + hash(sb, ino);
+	struct inode_hash_bucket *b = inode_hashtable + hash(sb, ino);
 
-	return ifind_fast(sb, head, ino);
+	return ifind_fast(sb, b, ino);
 }
 EXPORT_SYMBOL(ilookup);
 
@@ -1042,17 +1197,17 @@ struct inode *iget5_locked(struct super_
 		int (*test)(struct inode *, void *),
 		int (*set)(struct inode *, void *), void *data)
 {
-	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
+	struct inode_hash_bucket *b = inode_hashtable + hash(sb, hashval);
 	struct inode *inode;
 
-	inode = ifind(sb, head, test, data, 1);
+	inode = ifind(sb, b, test, data, 1);
 	if (inode)
 		return inode;
 	/*
 	 * get_new_inode() will do the right thing, re-trying the search
 	 * in case it had to block at any point.
 	 */
-	return get_new_inode(sb, head, test, set, data);
+	return get_new_inode(sb, b, test, set, data);
 }
 EXPORT_SYMBOL(iget5_locked);
 
@@ -1073,17 +1228,17 @@ EXPORT_SYMBOL(iget5_locked);
  */
 struct inode *iget_locked(struct super_block *sb, unsigned long ino)
 {
-	struct hlist_head *head = inode_hashtable + hash(sb, ino);
+	struct inode_hash_bucket *b = inode_hashtable + hash(sb, ino);
 	struct inode *inode;
 
-	inode = ifind_fast(sb, head, ino);
+	inode = ifind_fast(sb, b, ino);
 	if (inode)
 		return inode;
 	/*
 	 * get_new_inode_fast() will do the right thing, re-trying the search
 	 * in case it had to block at any point.
 	 */
-	return get_new_inode_fast(sb, head, ino);
+	return get_new_inode_fast(sb, b, ino);
 }
 EXPORT_SYMBOL(iget_locked);
 
@@ -1091,29 +1246,37 @@ int insert_inode_locked(struct inode *in
 {
 	struct super_block *sb = inode->i_sb;
 	ino_t ino = inode->i_ino;
-	struct hlist_head *head = inode_hashtable + hash(sb, ino);
+	struct inode_hash_bucket *b = inode_hashtable + hash(sb, ino);
 
 	inode->i_state |= I_LOCK|I_NEW;
 	while (1) {
 		struct hlist_node *node;
 		struct inode *old = NULL;
-		spin_lock(&inode_lock);
-		hlist_for_each_entry(old, node, head, i_hash) {
+
+repeat:
+		spin_lock(&b->lock);
+		hlist_for_each_entry(old, node, &b->head, i_hash) {
 			if (old->i_ino != ino)
 				continue;
 			if (old->i_sb != sb)
 				continue;
 			if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
 				continue;
+			if (!spin_trylock(&old->i_lock)) {
+				spin_unlock(&b->lock);
+				goto repeat;
+			}
 			break;
 		}
 		if (likely(!node)) {
-			hlist_add_head(&inode->i_hash, head);
-			spin_unlock(&inode_lock);
+			/* XXX: initialize inode->i_lock to locked? */
+			hlist_add_head(&inode->i_hash, &b->head);
+			spin_unlock(&b->lock);
 			return 0;
 		}
+		spin_unlock(&b->lock);
 		__iget(old);
-		spin_unlock(&inode_lock);
+		spin_unlock(&old->i_lock);
 		wait_on_inode(old);
 		if (unlikely(!hlist_unhashed(&old->i_hash))) {
 			iput(old);
@@ -1128,7 +1291,7 @@ int insert_inode_locked4(struct inode *i
 		int (*test)(struct inode *, void *), void *data)
 {
 	struct super_block *sb = inode->i_sb;
-	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
+	struct inode_hash_bucket *b = inode_hashtable + hash(sb, hashval);
 
 	inode->i_state |= I_LOCK|I_NEW;
 
@@ -1136,23 +1299,30 @@ int insert_inode_locked4(struct inode *i
 		struct hlist_node *node;
 		struct inode *old = NULL;
 
-		spin_lock(&inode_lock);
-		hlist_for_each_entry(old, node, head, i_hash) {
+repeat:
+		spin_lock(&b->lock);
+		hlist_for_each_entry(old, node, &b->head, i_hash) {
 			if (old->i_sb != sb)
 				continue;
 			if (!test(old, data))
 				continue;
 			if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
 				continue;
+			if (!spin_trylock(&old->i_lock)) {
+				spin_unlock(&b->lock);
+				goto repeat;
+			}
 			break;
 		}
 		if (likely(!node)) {
-			hlist_add_head(&inode->i_hash, head);
-			spin_unlock(&inode_lock);
+			/* XXX: initialize inode->i_lock to locked? */
+			hlist_add_head(&inode->i_hash, &b->head);
+			spin_unlock(&b->lock);
 			return 0;
 		}
+		spin_unlock(&b->lock);
 		__iget(old);
-		spin_unlock(&inode_lock);
+		spin_unlock(&old->i_lock);
 		wait_on_inode(old);
 		if (unlikely(!hlist_unhashed(&old->i_hash))) {
 			iput(old);
@@ -1173,14 +1343,32 @@ EXPORT_SYMBOL(insert_inode_locked4);
  */
 void __insert_inode_hash(struct inode *inode, unsigned long hashval)
 {
-	struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
-	spin_lock(&inode_lock);
-	hlist_add_head(&inode->i_hash, head);
-	spin_unlock(&inode_lock);
+	struct inode_hash_bucket *b = inode_hashtable + hash(inode->i_sb, hashval);
+
+	spin_lock(&inode->i_lock);
+	spin_lock(&b->lock);
+	hlist_add_head(&inode->i_hash, &b->head);
+	spin_unlock(&b->lock);
+	spin_unlock(&inode->i_lock);
 }
 EXPORT_SYMBOL(__insert_inode_hash);
 
 /**
+ *	__remove_inode_hash - remove an inode from the hash
+ *	@inode: inode to unhash
+ *
+ *	Remove an inode from the superblock. inode->i_lock must be
+ *	held.
+ */
+void __remove_inode_hash(struct inode *inode)
+{
+	struct inode_hash_bucket *b = inode_hashtable + hash(inode->i_sb, inode->i_ino);
+	spin_lock(&b->lock);
+	hlist_del_init(&inode->i_hash);
+	spin_unlock(&b->lock);
+}
+
+/**
  *	remove_inode_hash - remove an inode from the hash
  *	@inode: inode to unhash
  *
@@ -1188,9 +1376,9 @@ EXPORT_SYMBOL(__insert_inode_hash);
  */
 void remove_inode_hash(struct inode *inode)
 {
-	spin_lock(&inode_lock);
-	hlist_del_init(&inode->i_hash);
-	spin_unlock(&inode_lock);
+	spin_lock(&inode->i_lock);
+	__remove_inode_hash(inode);
+	spin_unlock(&inode->i_lock);
 }
 EXPORT_SYMBOL(remove_inode_hash);
 
@@ -1210,12 +1398,16 @@ void generic_delete_inode(struct inode *
 {
 	const struct super_operations *op = inode->i_sb->s_op;
 
-	list_del_init(&inode->i_list);
-	list_del_init(&inode->i_sb_list);
+	if (!list_empty(&inode->i_list)) {
+		spin_lock(&wb_inode_list_lock);
+		list_del_init(&inode->i_list);
+		spin_unlock(&wb_inode_list_lock);
+	}
+	inode_sb_list_del(inode);
+	percpu_counter_dec(&nr_inodes);
 	WARN_ON(inode->i_state & I_NEW);
 	inode->i_state |= I_FREEING;
-	inodes_stat.nr_inodes--;
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode->i_lock);
 
 	security_inode_delete(inode);
 
@@ -1232,9 +1424,15 @@ void generic_delete_inode(struct inode *
 		truncate_inode_pages(&inode->i_data, 0);
 		clear_inode(inode);
 	}
-	spin_lock(&inode_lock);
-	hlist_del_init(&inode->i_hash);
-	spin_unlock(&inode_lock);
+	/*
+	 * i_lock not required to delete from hash. If there was a
+	 * concurrency window, then it would be possible for the other
+	 * thread to touch the inode after it has been freed, with
+	 * destroy_inode.
+	 * XXX: yes it is because find_inode_fast checks it. Maybe we
+	 * can avoid it though...
+	 */
+	remove_inode_hash(inode);
 	wake_up_inode(inode);
 	BUG_ON(inode->i_state != I_CLEAR);
 	destroy_inode(inode);
@@ -1255,29 +1453,36 @@ int generic_detach_inode(struct inode *i
 	struct super_block *sb = inode->i_sb;
 
 	if (!hlist_unhashed(&inode->i_hash)) {
-		if (!(inode->i_state & (I_DIRTY|I_SYNC)))
-			list_move(&inode->i_list, &inode_unused);
-		inodes_stat.nr_unused++;
+		if (list_empty(&inode->i_list)) {
+			spin_lock(&wb_inode_list_lock);
+			list_add(&inode->i_list, &inode_unused);
+			inodes_stat.nr_unused++;
+			spin_unlock(&wb_inode_list_lock);
+		}
 		if (sb->s_flags & MS_ACTIVE) {
-			spin_unlock(&inode_lock);
+			spin_unlock(&inode->i_lock);
 			return 0;
 		}
 		WARN_ON(inode->i_state & I_NEW);
 		inode->i_state |= I_WILL_FREE;
-		spin_unlock(&inode_lock);
+		spin_unlock(&inode->i_lock);
 		write_inode_now(inode, 1);
-		spin_lock(&inode_lock);
+		spin_lock(&inode->i_lock);
 		WARN_ON(inode->i_state & I_NEW);
 		inode->i_state &= ~I_WILL_FREE;
+		__remove_inode_hash(inode);
+	}
+	if (!list_empty(&inode->i_list)) {
+		spin_lock(&wb_inode_list_lock);
+		list_del_init(&inode->i_list);
 		inodes_stat.nr_unused--;
-		hlist_del_init(&inode->i_hash);
+		spin_unlock(&wb_inode_list_lock);
 	}
-	list_del_init(&inode->i_list);
-	list_del_init(&inode->i_sb_list);
+	inode_sb_list_del(inode);
+	percpu_counter_dec(&nr_inodes);
 	WARN_ON(inode->i_state & I_NEW);
 	inode->i_state |= I_FREEING;
-	inodes_stat.nr_inodes--;
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode->i_lock);
 	return 1;
 }
 EXPORT_SYMBOL_GPL(generic_detach_inode);
@@ -1342,8 +1547,12 @@ void iput(struct inode *inode)
 	if (inode) {
 		BUG_ON(inode->i_state == I_CLEAR);
 
-		if (atomic_dec_and_lock(&inode->i_count, &inode_lock))
+		spin_lock(&inode->i_lock);
+		inode->i_count--;
+		if (inode->i_count == 0)
 			iput_final(inode);
+		else
+			spin_unlock(&inode->i_lock);
 	}
 }
 EXPORT_SYMBOL(iput);
@@ -1524,6 +1733,8 @@ EXPORT_SYMBOL(inode_wait);
  * wake_up_inode() after removing from the hash list will DTRT.
  *
  * This is called with inode_lock held.
+ *
+ * Called with i_lock held and returns with it dropped.
  */
 static void __wait_on_freeing_inode(struct inode *inode)
 {
@@ -1531,10 +1742,9 @@ static void __wait_on_freeing_inode(stru
 	DEFINE_WAIT_BIT(wait, &inode->i_state, __I_LOCK);
 	wq = bit_waitqueue(&inode->i_state, __I_LOCK);
 	prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
-	spin_unlock(&inode_lock);
+	spin_unlock(&inode->i_lock);
 	schedule();
 	finish_wait(wq, &wait.wait);
-	spin_lock(&inode_lock);
 }
 
 static __initdata unsigned long ihash_entries;
@@ -1562,7 +1772,7 @@ void __init inode_init_early(void)
 
 	inode_hashtable =
 		alloc_large_system_hash("Inode-cache",
-					sizeof(struct hlist_head),
+					sizeof(struct inode_hash_bucket),
 					ihash_entries,
 					14,
 					HASH_EARLY,
@@ -1570,14 +1780,17 @@ void __init inode_init_early(void)
 					&i_hash_mask,
 					0);
 
-	for (loop = 0; loop < (1 << i_hash_shift); loop++)
-		INIT_HLIST_HEAD(&inode_hashtable[loop]);
+	for (loop = 0; loop < (1 << i_hash_shift); loop++) {
+		spin_lock_init(&inode_hashtable[loop].lock);
+		INIT_HLIST_HEAD(&inode_hashtable[loop].head);
+	}
 }
 
 void __init inode_init(void)
 {
 	int loop;
 
+	percpu_counter_init(&nr_inodes, 0);
 	/* inode slab cache */
 	inode_cachep = kmem_cache_create("inode_cache",
 					 sizeof(struct inode),
@@ -1587,13 +1800,17 @@ void __init inode_init(void)
 					 init_once);
 	register_shrinker(&icache_shrinker);
 
+	for_each_possible_cpu(loop) {
+		spin_lock_init(&per_cpu(inode_cpulock, loop));
+	}
+
 	/* Hash may have been set up in inode_init_early */
 	if (!hashdist)
 		return;
 
 	inode_hashtable =
 		alloc_large_system_hash("Inode-cache",
-					sizeof(struct hlist_head),
+					sizeof(struct inode_hash_bucket),
 					ihash_entries,
 					14,
 					0,
@@ -1601,8 +1818,10 @@ void __init inode_init(void)
 					&i_hash_mask,
 					0);
 
-	for (loop = 0; loop < (1 << i_hash_shift); loop++)
-		INIT_HLIST_HEAD(&inode_hashtable[loop]);
+	for (loop = 0; loop < (1 << i_hash_shift); loop++) {
+		spin_lock_init(&inode_hashtable[loop].lock);
+		INIT_HLIST_HEAD(&inode_hashtable[loop].head);
+	}
 }
 
 void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
Index: linux-2.6/fs/quota/dquot.c
===================================================================
--- linux-2.6.orig/fs/quota/dquot.c
+++ linux-2.6/fs/quota/dquot.c
@@ -819,32 +819,49 @@ static int dqinit_needed(struct inode *i
 /* This routine is guarded by dqonoff_mutex mutex */
 static void add_dquot_ref(struct super_block *sb, int type)
 {
-	struct inode *inode, *old_inode = NULL;
-
-	spin_lock(&inode_lock);
-	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
-		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
-			continue;
-		if (!atomic_read(&inode->i_writecount))
-			continue;
-		if (!dqinit_needed(inode, type))
-			continue;
-
-		__iget(inode);
-		spin_unlock(&inode_lock);
+	int i;
+	for_each_possible_cpu(i) {
+		struct inode *inode, *old_inode = NULL;
+		struct list_head *list;
+#ifdef CONFIG_SMP
+                list = per_cpu_ptr(sb->s_inodes, i);
+#else
+                list = &sb->s_inodes;
+#endif
 
+		rcu_read_lock();
+		list_for_each_entry_rcu(inode, list, i_sb_list) {
+			spin_lock(&inode->i_lock);
+			if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) {
+				spin_unlock(&inode->i_lock);
+				continue;
+			}
+			if (!atomic_read(&inode->i_writecount)) {
+				spin_unlock(&inode->i_lock);
+				continue;
+			}
+			if (!dqinit_needed(inode, type)) {
+				spin_unlock(&inode->i_lock);
+				continue;
+			}
+
+			__iget(inode);
+			spin_unlock(&inode->i_lock);
+			rcu_read_unlock();
+
+			iput(old_inode);
+			sb->dq_op->initialize(inode, type);
+			/* We hold a reference to 'inode' so it couldn't have been
+			 * removed from s_inodes list while we dropped the inode_lock.
+			 * We cannot iput the inode now as we can be holding the last
+			 * reference and we cannot iput it under inode_lock. So we
+			 * keep the reference and iput it later. */
+			old_inode = inode;
+			rcu_read_lock();
+		}
+		rcu_read_unlock();
 		iput(old_inode);
-		sb->dq_op->initialize(inode, type);
-		/* We hold a reference to 'inode' so it couldn't have been
-		 * removed from s_inodes list while we dropped the inode_lock.
-		 * We cannot iput the inode now as we can be holding the last
-		 * reference and we cannot iput it under inode_lock. So we
-		 * keep the reference and iput it later. */
-		old_inode = inode;
-		spin_lock(&inode_lock);
 	}
-	spin_unlock(&inode_lock);
-	iput(old_inode);
 }
 
 /*
@@ -911,20 +928,29 @@ static void put_dquot_list(struct list_h
 static void remove_dquot_ref(struct super_block *sb, int type,
 		struct list_head *tofree_head)
 {
-	struct inode *inode;
+	int i;
+	for_each_possible_cpu(i) {
+		struct inode *inode;
+		struct list_head *list;
+#ifdef CONFIG_SMP
+                list = per_cpu_ptr(sb->s_inodes, i);
+#else
+                list = &sb->s_inodes;
+#endif
 
-	spin_lock(&inode_lock);
-	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
-		/*
-		 *  We have to scan also I_NEW inodes because they can already
-		 *  have quota pointer initialized. Luckily, we need to touch
-		 *  only quota pointers and these have separate locking
-		 *  (dqptr_sem).
-		 */
-		if (!IS_NOQUOTA(inode))
-			remove_inode_dquot_ref(inode, type, tofree_head);
+		rcu_read_lock();
+		list_for_each_entry_rcu(inode, list, i_sb_list) {
+			/*
+			 *  We have to scan also I_NEW inodes because they can already
+			 *  have quota pointer initialized. Luckily, we need to touch
+			 *  only quota pointers and these have separate locking
+			 *  (dqptr_sem).
+			 */
+			if (!IS_NOQUOTA(inode))
+				remove_inode_dquot_ref(inode, type, tofree_head);
+		}
+		rcu_read_unlock();
 	}
-	spin_unlock(&inode_lock);
 }
 
 /* Gather all references from inodes and drop them */
Index: linux-2.6/include/linux/writeback.h
===================================================================
--- linux-2.6.orig/include/linux/writeback.h
+++ linux-2.6/include/linux/writeback.h
@@ -9,8 +9,8 @@
 
 struct backing_dev_info;
 
-extern spinlock_t inode_lock;
-extern struct list_head inode_in_use;
+extern spinlock_t sb_inode_list_lock;
+extern spinlock_t wb_inode_list_lock;
 extern struct list_head inode_unused;
 
 /*
Index: linux-2.6/fs/notify/inode_mark.c
===================================================================
--- linux-2.6.orig/fs/notify/inode_mark.c
+++ linux-2.6/fs/notify/inode_mark.c
@@ -362,65 +362,75 @@ int fsnotify_add_mark(struct fsnotify_ma
  * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
  * We temporarily drop inode_lock, however, and CAN block.
  */
-void fsnotify_unmount_inodes(struct list_head *list)
+void fsnotify_unmount_inodes(struct super_block *sb)
 {
-	struct inode *inode, *next_i, *need_iput = NULL;
+	int i;
 
-	list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
-		struct inode *need_iput_tmp;
+	for_each_possible_cpu(i) {
+		struct inode *inode, *next_i, *need_iput = NULL;
+		struct list_head *list;
+#ifdef CONFIG_SMP
+                list = per_cpu_ptr(sb->s_inodes, i);
+#else
+                list = &sb->s_inodes;
+#endif
+
+		list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
+			struct inode *need_iput_tmp;
+
+			spin_lock(&inode->i_lock);
+			/*
+			 * We cannot __iget() an inode in state I_CLEAR, I_FREEING,
+			 * I_WILL_FREE, or I_NEW which is fine because by that point
+			 * the inode cannot have any associated watches.
+			 */
+			if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW)) {
+				spin_unlock(&inode->i_lock);
+				continue;
+			}
+
+			/*
+			 * If i_count is zero, the inode cannot have any watches and
+			 * doing an __iget/iput with MS_ACTIVE clear would actually
+			 * evict all inodes with zero i_count from icache which is
+			 * unnecessarily violent and may in fact be illegal to do.
+			 */
+			if (!inode->i_count) {
+				spin_unlock(&inode->i_lock);
+				continue;
+			}
+
+			need_iput_tmp = need_iput;
+			need_iput = NULL;
+
+			/* In case fsnotify_inode_delete() drops a reference. */
+			if (inode != need_iput_tmp) {
+				__iget(inode);
+			} else
+				need_iput_tmp = NULL;
+			spin_unlock(&inode->i_lock);
+
+			/* In case the dropping of a reference would nuke next_i. */
+			if (&next_i->i_sb_list != list) {
+				spin_lock(&next_i->i_lock);
+				if (next_i->i_count &&
+					!(next_i->i_state &
+						(I_CLEAR | I_FREEING | I_WILL_FREE))) {
+					__iget(next_i);
+					need_iput = next_i;
+				}
+				spin_unlock(&next_i->i_lock);
+			}
+
+			if (need_iput_tmp)
+				iput(need_iput_tmp);
 
-		/*
-		 * We cannot __iget() an inode in state I_CLEAR, I_FREEING,
-		 * I_WILL_FREE, or I_NEW which is fine because by that point
-		 * the inode cannot have any associated watches.
-		 */
-		if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW))
-			continue;
-
-		/*
-		 * If i_count is zero, the inode cannot have any watches and
-		 * doing an __iget/iput with MS_ACTIVE clear would actually
-		 * evict all inodes with zero i_count from icache which is
-		 * unnecessarily violent and may in fact be illegal to do.
-		 */
-		if (!atomic_read(&inode->i_count))
-			continue;
-
-		need_iput_tmp = need_iput;
-		need_iput = NULL;
-
-		/* In case fsnotify_inode_delete() drops a reference. */
-		if (inode != need_iput_tmp)
-			__iget(inode);
-		else
-			need_iput_tmp = NULL;
-
-		/* In case the dropping of a reference would nuke next_i. */
-		if ((&next_i->i_sb_list != list) &&
-		    atomic_read(&next_i->i_count) &&
-		    !(next_i->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE))) {
-			__iget(next_i);
-			need_iput = next_i;
-		}
-
-		/*
-		 * We can safely drop inode_lock here because we hold
-		 * references on both inode and next_i.  Also no new inodes
-		 * will be added since the umount has begun.  Finally,
-		 * iprune_mutex keeps shrink_icache_memory() away.
-		 */
-		spin_unlock(&inode_lock);
-
-		if (need_iput_tmp)
-			iput(need_iput_tmp);
+			/* for each watch, send FS_UNMOUNT and then remove it */
+			fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 
-		/* for each watch, send FS_UNMOUNT and then remove it */
-		fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
+			fsnotify_inode_delete(inode);
 
-		fsnotify_inode_delete(inode);
-
-		iput(inode);
-
-		spin_lock(&inode_lock);
+			iput(inode);
+		}
 	}
 }
Index: linux-2.6/fs/nilfs2/gcdat.c
===================================================================
--- linux-2.6.orig/fs/nilfs2/gcdat.c
+++ linux-2.6/fs/nilfs2/gcdat.c
@@ -27,6 +27,7 @@
 #include "page.h"
 #include "mdt.h"
 
+/* XXX: what protects i_state? */
 int nilfs_init_gcdat_inode(struct the_nilfs *nilfs)
 {
 	struct inode *dat = nilfs->ns_dat, *gcdat = nilfs->ns_gc_dat;
Index: linux-2.6/arch/powerpc/platforms/cell/spufs/file.c
===================================================================
--- linux-2.6.orig/arch/powerpc/platforms/cell/spufs/file.c
+++ linux-2.6/arch/powerpc/platforms/cell/spufs/file.c
@@ -1548,7 +1548,7 @@ static int spufs_mfc_open(struct inode *
 	if (ctx->owner != current->mm)
 		return -EINVAL;
 
-	if (atomic_read(&inode->i_count) != 1)
+	if (inode->i_count != 1)
 		return -EBUSY;
 
 	mutex_lock(&ctx->mapping_lock);
Index: linux-2.6/fs/affs/inode.c
===================================================================
--- linux-2.6.orig/fs/affs/inode.c
+++ linux-2.6/fs/affs/inode.c
@@ -379,7 +379,9 @@ affs_add_entry(struct inode *dir, struct
 		affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain));
 		mark_buffer_dirty_inode(inode_bh, inode);
 		inode->i_nlink = 2;
-		atomic_inc(&inode->i_count);
+		spin_lock(&inode->i_lock);
+		inode->i_count++;
+		spin_unlock(&inode->i_lock);
 	}
 	affs_fix_checksum(sb, bh);
 	mark_buffer_dirty_inode(bh, inode);
Index: linux-2.6/fs/afs/dir.c
===================================================================
--- linux-2.6.orig/fs/afs/dir.c
+++ linux-2.6/fs/afs/dir.c
@@ -1007,7 +1007,9 @@ static int afs_link(struct dentry *from,
 	if (ret < 0)
 		goto link_error;
 
-	atomic_inc(&vnode->vfs_inode.i_count);
+	spin_lock(&vnode->vfs_inode.i_lock);
+	vnode->vfs_inode.i_count++;
+	spin_unlock(&vnode->vfs_inode.i_lock);
 	d_instantiate(dentry, &vnode->vfs_inode);
 	key_put(key);
 	_leave(" = 0");
Index: linux-2.6/fs/block_dev.c
===================================================================
--- linux-2.6.orig/fs/block_dev.c
+++ linux-2.6/fs/block_dev.c
@@ -423,13 +423,20 @@ static struct inode *bdev_alloc_inode(st
 	return &ei->vfs_inode;
 }
 
-static void bdev_destroy_inode(struct inode *inode)
+static void bdev_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
 	struct bdev_inode *bdi = BDEV_I(inode);
 
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(bdev_cachep, bdi);
 }
 
+static void bdev_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, bdev_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct bdev_inode *ei = (struct bdev_inode *) foo;
@@ -575,7 +582,12 @@ EXPORT_SYMBOL(bdget);
  */
 struct block_device *bdgrab(struct block_device *bdev)
 {
-	atomic_inc(&bdev->bd_inode->i_count);
+	struct inode *inode = bdev->bd_inode;
+
+	spin_lock(&inode->i_lock);
+	inode->i_count++;
+	spin_unlock(&inode->i_lock);
+
 	return bdev;
 }
 
@@ -605,7 +617,9 @@ static struct block_device *bd_acquire(s
 	spin_lock(&bdev_lock);
 	bdev = inode->i_bdev;
 	if (bdev) {
-		atomic_inc(&bdev->bd_inode->i_count);
+		spin_lock(&inode->i_lock);
+		bdev->bd_inode->i_count++;
+		spin_unlock(&inode->i_lock);
 		spin_unlock(&bdev_lock);
 		return bdev;
 	}
@@ -621,7 +635,9 @@ static struct block_device *bd_acquire(s
 			 * So, we can access it via ->i_mapping always
 			 * without igrab().
 			 */
-			atomic_inc(&bdev->bd_inode->i_count);
+			spin_lock(&inode->i_lock);
+			bdev->bd_inode->i_count++;
+			spin_unlock(&inode->i_lock);
 			inode->i_bdev = bdev;
 			inode->i_mapping = bdev->bd_inode->i_mapping;
 			list_add(&inode->i_devices, &bdev->bd_inodes);
Index: linux-2.6/fs/ext2/namei.c
===================================================================
--- linux-2.6.orig/fs/ext2/namei.c
+++ linux-2.6/fs/ext2/namei.c
@@ -196,7 +196,9 @@ static int ext2_link (struct dentry * ol
 
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inode_inc_link_count(inode);
-	atomic_inc(&inode->i_count);
+	spin_lock(&inode->i_lock);
+	inode->i_count++;
+	spin_unlock(&inode->i_lock);
 
 	err = ext2_add_link(dentry, inode);
 	if (!err) {
Index: linux-2.6/fs/ext3/ialloc.c
===================================================================
--- linux-2.6.orig/fs/ext3/ialloc.c
+++ linux-2.6/fs/ext3/ialloc.c
@@ -100,9 +100,9 @@ void ext3_free_inode (handle_t *handle,
 	struct ext3_sb_info *sbi;
 	int fatal = 0, err;
 
-	if (atomic_read(&inode->i_count) > 1) {
+	if (inode->i_count > 1) {
 		printk ("ext3_free_inode: inode has count=%d\n",
-					atomic_read(&inode->i_count));
+					inode->i_count);
 		return;
 	}
 	if (inode->i_nlink) {
Index: linux-2.6/fs/ext3/namei.c
===================================================================
--- linux-2.6.orig/fs/ext3/namei.c
+++ linux-2.6/fs/ext3/namei.c
@@ -2244,7 +2244,9 @@ retry:
 
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inc_nlink(inode);
-	atomic_inc(&inode->i_count);
+	spin_lock(&inode->i_lock);
+	inode->i_count++;
+	spin_unlock(&inode->i_lock);
 
 	err = ext3_add_entry(handle, dentry, inode);
 	if (!err) {
Index: linux-2.6/fs/xfs/linux-2.6/xfs_iops.c
===================================================================
--- linux-2.6.orig/fs/xfs/linux-2.6/xfs_iops.c
+++ linux-2.6/fs/xfs/linux-2.6/xfs_iops.c
@@ -357,7 +357,9 @@ xfs_vn_link(
 	if (unlikely(error))
 		return -error;
 
-	atomic_inc(&inode->i_count);
+	spin_lock(&inode->i_lock);
+	inode->i_count++;
+	spin_unlock(&inode->i_lock);
 	d_instantiate(dentry, inode);
 	return 0;
 }
Index: linux-2.6/fs/xfs/xfs_iget.c
===================================================================
--- linux-2.6.orig/fs/xfs/xfs_iget.c
+++ linux-2.6/fs/xfs/xfs_iget.c
@@ -800,7 +800,7 @@ xfs_isilocked(
 /*  0 */		(void *)(__psint_t)(vk),		\
 /*  1 */		(void *)(s),				\
 /*  2 */		(void *)(__psint_t) line,		\
-/*  3 */		(void *)(__psint_t)atomic_read(&VFS_I(ip)->i_count), \
+/*  3 */		(void *)(__psint_t)&VFS_I(ip)->i_count,	\
 /*  4 */		(void *)(ra),				\
 /*  5 */		NULL,					\
 /*  6 */		(void *)(__psint_t)current_cpu(),	\
Index: linux-2.6/fs/xfs/xfs_inode.h
===================================================================
--- linux-2.6.orig/fs/xfs/xfs_inode.h
+++ linux-2.6/fs/xfs/xfs_inode.h
@@ -541,8 +541,10 @@ extern void xfs_itrace_rele(struct xfs_i
 
 #define IHOLD(ip) \
 do { \
-	ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
-	atomic_inc(&(VFS_I(ip)->i_count)); \
+	spin_lock(&VFS_I(ip)->i_lock);		\
+	ASSERT(&VFS_I(ip)->i_count > 0);	\
+	VFS_I(ip)->i_count++;			\
+	spin_unlock(&VFS_I(ip)->i_lock);	\
 	xfs_itrace_hold((ip), __FILE__, __LINE__, (inst_t *)__return_address); \
 } while (0)
 
Index: linux-2.6/ipc/mqueue.c
===================================================================
--- linux-2.6.orig/ipc/mqueue.c
+++ linux-2.6/ipc/mqueue.c
@@ -238,11 +238,18 @@ static struct inode *mqueue_alloc_inode(
 	return &ei->vfs_inode;
 }
 
-static void mqueue_destroy_inode(struct inode *inode)
+static void mqueue_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode));
 }
 
+static void mqueue_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, mqueue_i_callback);
+}
+
 static void mqueue_delete_inode(struct inode *inode)
 {
 	struct mqueue_inode_info *info;
@@ -779,8 +786,11 @@ SYSCALL_DEFINE1(mq_unlink, const char __
 	}
 
 	inode = dentry->d_inode;
-	if (inode)
-		atomic_inc(&inode->i_count);
+	if (inode) {
+		spin_lock(&inode->i_lock);
+		inode->i_count++;
+		spin_unlock(&inode->i_lock);
+	}
 	err = mnt_want_write(ipc_ns->mq_mnt);
 	if (err)
 		goto out_err;
Index: linux-2.6/kernel/futex.c
===================================================================
--- linux-2.6.orig/kernel/futex.c
+++ linux-2.6/kernel/futex.c
@@ -167,7 +167,9 @@ static void get_futex_key_refs(union fut
 
 	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
 	case FUT_OFF_INODE:
-		atomic_inc(&key->shared.inode->i_count);
+		spin_lock(&key->shared.inode->i_lock);
+		key->shared.inode->i_count++;
+		spin_unlock(&key->shared.inode->i_lock);
 		break;
 	case FUT_OFF_MMSHARED:
 		atomic_inc(&key->private.mm->mm_count);
Index: linux-2.6/mm/shmem.c
===================================================================
--- linux-2.6.orig/mm/shmem.c
+++ linux-2.6/mm/shmem.c
@@ -1871,7 +1871,9 @@ static int shmem_link(struct dentry *old
 	dir->i_size += BOGO_DIRENT_SIZE;
 	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
 	inc_nlink(inode);
-	atomic_inc(&inode->i_count);	/* New dentry reference */
+	spin_lock(&inode->i_lock);
+	inode->i_count++;	/* New dentry reference */
+	spin_unlock(&inode->i_lock);
 	dget(dentry);		/* Extra pinning count for the created dentry */
 	d_instantiate(dentry, inode);
 out:
@@ -2383,13 +2385,20 @@ static struct inode *shmem_alloc_inode(s
 	return &p->vfs_inode;
 }
 
+static void shmem_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
+}
+
 static void shmem_destroy_inode(struct inode *inode)
 {
 	if ((inode->i_mode & S_IFMT) == S_IFREG) {
 		/* only struct inode is valid if it's an inline symlink */
 		mpol_free_shared_policy(&SHMEM_I(inode)->policy);
 	}
-	kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
+	call_rcu(&inode->i_rcu, shmem_i_callback);
 }
 
 static void init_once(void *foo)
Index: linux-2.6/fs/bfs/dir.c
===================================================================
--- linux-2.6.orig/fs/bfs/dir.c
+++ linux-2.6/fs/bfs/dir.c
@@ -178,7 +178,9 @@ static int bfs_link(struct dentry *old,
 	inc_nlink(inode);
 	inode->i_ctime = CURRENT_TIME_SEC;
 	mark_inode_dirty(inode);
-	atomic_inc(&inode->i_count);
+	spin_lock(&inode->i_lock);
+	inode->i_count++;
+	spin_unlock(&inode->i_lock);
 	d_instantiate(new, inode);
 	mutex_unlock(&info->bfs_lock);
 	return 0;
Index: linux-2.6/fs/btrfs/inode.c
===================================================================
--- linux-2.6.orig/fs/btrfs/inode.c
+++ linux-2.6/fs/btrfs/inode.c
@@ -4247,7 +4247,9 @@ static int btrfs_link(struct dentry *old
 	trans = btrfs_start_transaction(root, 1);
 
 	btrfs_set_trans_block_group(trans, dir);
-	atomic_inc(&inode->i_count);
+	spin_lock(&inode->i_lock);
+	inode->i_count++;
+	spin_unlock(&inode->i_lock);
 
 	err = btrfs_add_nondir(trans, dentry, inode, 1, index);
 
Index: linux-2.6/fs/exofs/inode.c
===================================================================
--- linux-2.6.orig/fs/exofs/inode.c
+++ linux-2.6/fs/exofs/inode.c
@@ -1038,7 +1038,9 @@ static void create_done(struct osd_reque
 	} else
 		set_obj_created(oi);
 
-	atomic_dec(&inode->i_count);
+	spin_lock(&inode->i_lock);
+	inode->i_count--;
+	spin_unlock(&inode->i_lock);
 	wake_up(&oi->i_wq);
 }
 
@@ -1104,11 +1106,15 @@ struct inode *exofs_new_inode(struct ino
 	/* increment the refcount so that the inode will still be around when we
 	 * reach the callback
 	 */
-	atomic_inc(&inode->i_count);
+	spin_lock(&inode->i_lock);
+	inode->i_count++;
+	spin_unlock(&inode->i_lock);
 
 	ret = exofs_async_op(or, create_done, inode, oi->i_cred);
 	if (ret) {
-		atomic_dec(&inode->i_count);
+		spin_lock(&inode->i_lock);
+		inode->i_count--;
+		spin_unlock(&inode->i_lock);
 		osd_end_request(or);
 		return ERR_PTR(-EIO);
 	}
Index: linux-2.6/fs/exofs/namei.c
===================================================================
--- linux-2.6.orig/fs/exofs/namei.c
+++ linux-2.6/fs/exofs/namei.c
@@ -153,7 +153,9 @@ static int exofs_link(struct dentry *old
 
 	inode->i_ctime = CURRENT_TIME;
 	inode_inc_link_count(inode);
-	atomic_inc(&inode->i_count);
+	spin_lock(&inode->i_lock);
+	inode->i_count++;
+	spin_unlock(&inode->i_lock);
 
 	return exofs_add_nondir(dentry, inode);
 }
Index: linux-2.6/fs/ext4/ialloc.c
===================================================================
--- linux-2.6.orig/fs/ext4/ialloc.c
+++ linux-2.6/fs/ext4/ialloc.c
@@ -192,9 +192,9 @@ void ext4_free_inode(handle_t *handle, s
 	struct ext4_sb_info *sbi;
 	int fatal = 0, err, count, cleared;
 
-	if (atomic_read(&inode->i_count) > 1) {
+	if (inode->i_count > 1) {
 		printk(KERN_ERR "ext4_free_inode: inode has count=%d\n",
-		       atomic_read(&inode->i_count));
+		       inode->i_count);
 		return;
 	}
 	if (inode->i_nlink) {
Index: linux-2.6/fs/ext4/namei.c
===================================================================
--- linux-2.6.orig/fs/ext4/namei.c
+++ linux-2.6/fs/ext4/namei.c
@@ -2340,7 +2340,9 @@ retry:
 
 	inode->i_ctime = ext4_current_time(inode);
 	ext4_inc_count(handle, inode);
-	atomic_inc(&inode->i_count);
+	spin_lock(&inode->i_lock);
+	inode->i_count++;
+	spin_unlock(&inode->i_lock);
 
 	err = ext4_add_entry(handle, dentry, inode);
 	if (!err) {
Index: linux-2.6/fs/gfs2/ops_inode.c
===================================================================
--- linux-2.6.orig/fs/gfs2/ops_inode.c
+++ linux-2.6/fs/gfs2/ops_inode.c
@@ -253,7 +253,9 @@ out_parent:
 	gfs2_holder_uninit(ghs);
 	gfs2_holder_uninit(ghs + 1);
 	if (!error) {
-		atomic_inc(&inode->i_count);
+		spin_lock(&inode->i_lock);
+		inode->i_count++;
+		spin_unlock(&inode->i_lock);
 		d_instantiate(dentry, inode);
 		mark_inode_dirty(inode);
 	}
Index: linux-2.6/fs/hfsplus/dir.c
===================================================================
--- linux-2.6.orig/fs/hfsplus/dir.c
+++ linux-2.6/fs/hfsplus/dir.c
@@ -301,7 +301,9 @@ static int hfsplus_link(struct dentry *s
 
 	inc_nlink(inode);
 	hfsplus_instantiate(dst_dentry, inode, cnid);
-	atomic_inc(&inode->i_count);
+	spin_lock(&inode->i_lock);
+	inode->i_count++;
+	spin_unlock(&inode->i_lock);
 	inode->i_ctime = CURRENT_TIME_SEC;
 	mark_inode_dirty(inode);
 	HFSPLUS_SB(sb).file_count++;
Index: linux-2.6/fs/hpfs/inode.c
===================================================================
--- linux-2.6.orig/fs/hpfs/inode.c
+++ linux-2.6/fs/hpfs/inode.c
@@ -182,7 +182,7 @@ void hpfs_write_inode(struct inode *i)
 	struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
 	struct inode *parent;
 	if (i->i_ino == hpfs_sb(i->i_sb)->sb_root) return;
-	if (hpfs_inode->i_rddir_off && !atomic_read(&i->i_count)) {
+	if (hpfs_inode->i_rddir_off && !i->i_count) {
 		if (*hpfs_inode->i_rddir_off) printk("HPFS: write_inode: some position still there\n");
 		kfree(hpfs_inode->i_rddir_off);
 		hpfs_inode->i_rddir_off = NULL;
Index: linux-2.6/fs/jffs2/dir.c
===================================================================
--- linux-2.6.orig/fs/jffs2/dir.c
+++ linux-2.6/fs/jffs2/dir.c
@@ -287,7 +287,9 @@ static int jffs2_link (struct dentry *ol
 		mutex_unlock(&f->sem);
 		d_instantiate(dentry, old_dentry->d_inode);
 		dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
-		atomic_inc(&old_dentry->d_inode->i_count);
+		spin_lock(&old_dentry->d_inode->i_lock);
+		old_dentry->d_inode->i_count++;
+		spin_unlock(&old_dentry->d_inode->i_lock);
 	}
 	return ret;
 }
@@ -866,7 +868,9 @@ static int jffs2_rename (struct inode *o
 		printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret);
 		/* Might as well let the VFS know */
 		d_instantiate(new_dentry, old_dentry->d_inode);
-		atomic_inc(&old_dentry->d_inode->i_count);
+		spin_lock(&old_dentry->d_inode->i_lock);
+		old_dentry->d_inode->i_count++;
+		spin_unlock(&old_dentry->d_inode->i_lock);
 		new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now);
 		return ret;
 	}
Index: linux-2.6/fs/jfs/jfs_txnmgr.c
===================================================================
--- linux-2.6.orig/fs/jfs/jfs_txnmgr.c
+++ linux-2.6/fs/jfs/jfs_txnmgr.c
@@ -1279,7 +1279,9 @@ int txCommit(tid_t tid,		/* transaction
 	 * lazy commit thread finishes processing
 	 */
 	if (tblk->xflag & COMMIT_DELETE) {
-		atomic_inc(&tblk->u.ip->i_count);
+		spin_lock(&tblk->u.ip->i_lock);
+		tblk->u.ip->i_count++;
+		spin_unlock(&tblk->u.ip->i_lock);
 		/*
 		 * Avoid a rare deadlock
 		 *
Index: linux-2.6/fs/jfs/namei.c
===================================================================
--- linux-2.6.orig/fs/jfs/namei.c
+++ linux-2.6/fs/jfs/namei.c
@@ -831,7 +831,9 @@ static int jfs_link(struct dentry *old_d
 	ip->i_ctime = CURRENT_TIME;
 	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
 	mark_inode_dirty(dir);
-	atomic_inc(&ip->i_count);
+	spin_lock(&ip->i_lock);
+	ip->i_count++;
+	spin_unlock(&ip->i_lock);
 
 	iplist[0] = ip;
 	iplist[1] = dir;
Index: linux-2.6/fs/minix/namei.c
===================================================================
--- linux-2.6.orig/fs/minix/namei.c
+++ linux-2.6/fs/minix/namei.c
@@ -103,7 +103,9 @@ static int minix_link(struct dentry * ol
 
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inode_inc_link_count(inode);
-	atomic_inc(&inode->i_count);
+	spin_lock(&inode->i_lock);
+	inode->i_count++;
+	spin_unlock(&inode->i_lock);
 	return add_nondir(dentry, inode);
 }
 
Index: linux-2.6/fs/nfs/inode.c
===================================================================
--- linux-2.6.orig/fs/nfs/inode.c
+++ linux-2.6/fs/nfs/inode.c
@@ -396,7 +396,7 @@ nfs_fhget(struct super_block *sb, struct
 	dprintk("NFS: nfs_fhget(%s/%Ld ct=%d)\n",
 		inode->i_sb->s_id,
 		(long long)NFS_FILEID(inode),
-		atomic_read(&inode->i_count));
+		inode->i_count);
 
 out:
 	return inode;
@@ -1153,7 +1153,7 @@ static int nfs_update_inode(struct inode
 
 	dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
 			__func__, inode->i_sb->s_id, inode->i_ino,
-			atomic_read(&inode->i_count), fattr->valid);
+			inode->i_count, fattr->valid);
 
 	if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
 		goto out_fileid;
@@ -1393,11 +1393,18 @@ struct inode *nfs_alloc_inode(struct sup
 	return &nfsi->vfs_inode;
 }
 
-void nfs_destroy_inode(struct inode *inode)
+static void nfs_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
 }
 
+void nfs_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, nfs_i_callback);
+}
+
 static inline void nfs4_init_once(struct nfs_inode *nfsi)
 {
 #ifdef CONFIG_NFS_V4
Index: linux-2.6/fs/nilfs2/mdt.c
===================================================================
--- linux-2.6.orig/fs/nilfs2/mdt.c
+++ linux-2.6/fs/nilfs2/mdt.c
@@ -467,7 +467,7 @@ nilfs_mdt_new_common(struct the_nilfs *n
 		inode->i_sb = sb; /* sb may be NULL for some meta data files */
 		inode->i_blkbits = nilfs->ns_blocksize_bits;
 		inode->i_flags = 0;
-		atomic_set(&inode->i_count, 1);
+		inode->i_count = 1;
 		inode->i_nlink = 1;
 		inode->i_ino = ino;
 		inode->i_mode = S_IFREG;
Index: linux-2.6/fs/nilfs2/namei.c
===================================================================
--- linux-2.6.orig/fs/nilfs2/namei.c
+++ linux-2.6/fs/nilfs2/namei.c
@@ -221,7 +221,9 @@ static int nilfs_link(struct dentry *old
 
 	inode->i_ctime = CURRENT_TIME;
 	inode_inc_link_count(inode);
-	atomic_inc(&inode->i_count);
+	spin_lock(&inode->i_lock);
+	inode->i_count++;
+	spin_unlock(&inode->i_lock);
 
 	err = nilfs_add_nondir(dentry, inode);
 	if (!err)
Index: linux-2.6/fs/ocfs2/namei.c
===================================================================
--- linux-2.6.orig/fs/ocfs2/namei.c
+++ linux-2.6/fs/ocfs2/namei.c
@@ -719,7 +719,9 @@ static int ocfs2_link(struct dentry *old
 		goto out_commit;
 	}
 
-	atomic_inc(&inode->i_count);
+	spin_lock(&inode->i_lock);
+	inode->i_count++;
+	spin_unlock(&inode->i_lock);
 	dentry->d_op = &ocfs2_dentry_ops;
 	d_instantiate(dentry, inode);
 
Index: linux-2.6/fs/reiserfs/file.c
===================================================================
--- linux-2.6.orig/fs/reiserfs/file.c
+++ linux-2.6/fs/reiserfs/file.c
@@ -39,7 +39,7 @@ static int reiserfs_file_release(struct
 	BUG_ON(!S_ISREG(inode->i_mode));
 
 	/* fast out for when nothing needs to be done */
-	if ((atomic_read(&inode->i_count) > 1 ||
+	if ((inode->i_count > 1 ||
 	     !(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) ||
 	     !tail_has_to_be_packed(inode)) &&
 	    REISERFS_I(inode)->i_prealloc_count <= 0) {
@@ -94,7 +94,7 @@ static int reiserfs_file_release(struct
 	if (!err)
 		err = jbegin_failure;
 
-	if (!err && atomic_read(&inode->i_count) <= 1 &&
+	if (!err && inode->i_count <= 1 &&
 	    (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) &&
 	    tail_has_to_be_packed(inode)) {
 		/* if regular file is released by last holder and it has been
Index: linux-2.6/fs/reiserfs/namei.c
===================================================================
--- linux-2.6.orig/fs/reiserfs/namei.c
+++ linux-2.6/fs/reiserfs/namei.c
@@ -1142,7 +1142,9 @@ static int reiserfs_link(struct dentry *
 	inode->i_ctime = CURRENT_TIME_SEC;
 	reiserfs_update_sd(&th, inode);
 
-	atomic_inc(&inode->i_count);
+	spin_lock(&inode->i_lock);
+	inode->i_count++;
+	spin_unlock(&inode->i_lock);
 	d_instantiate(dentry, inode);
 	retval = journal_end(&th, dir->i_sb, jbegin_count);
 	reiserfs_write_unlock(dir->i_sb);
Index: linux-2.6/fs/reiserfs/stree.c
===================================================================
--- linux-2.6.orig/fs/reiserfs/stree.c
+++ linux-2.6/fs/reiserfs/stree.c
@@ -1440,7 +1440,7 @@ static int maybe_indirect_to_direct(stru
 	 ** reading in the last block.  The user will hit problems trying to
 	 ** read the file, but for now we just skip the indirect2direct
 	 */
-	if (atomic_read(&inode->i_count) > 1 ||
+	if (inode->i_count > 1 ||
 	    !tail_has_to_be_packed(inode) ||
 	    !page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) {
 		/* leave tail in an unformatted node */
Index: linux-2.6/fs/sysv/namei.c
===================================================================
--- linux-2.6.orig/fs/sysv/namei.c
+++ linux-2.6/fs/sysv/namei.c
@@ -126,7 +126,9 @@ static int sysv_link(struct dentry * old
 
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inode_inc_link_count(inode);
-	atomic_inc(&inode->i_count);
+	spin_lock(&inode->i_lock);
+	inode->i_count++;
+	spin_unlock(&inode->i_lock);
 
 	return add_nondir(dentry, inode);
 }
Index: linux-2.6/fs/ubifs/dir.c
===================================================================
--- linux-2.6.orig/fs/ubifs/dir.c
+++ linux-2.6/fs/ubifs/dir.c
@@ -557,7 +557,9 @@ static int ubifs_link(struct dentry *old
 
 	lock_2_inodes(dir, inode);
 	inc_nlink(inode);
-	atomic_inc(&inode->i_count);
+	spin_lock(&inode->i_lock);
+	inode->i_count++;
+	spin_unlock(&inode->i_lock);
 	inode->i_ctime = ubifs_current_time(inode);
 	dir->i_size += sz_change;
 	dir_ui->ui_size = dir->i_size;
Index: linux-2.6/fs/ubifs/super.c
===================================================================
--- linux-2.6.orig/fs/ubifs/super.c
+++ linux-2.6/fs/ubifs/super.c
@@ -342,7 +342,7 @@ static void ubifs_delete_inode(struct in
 		goto out;
 
 	dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode);
-	ubifs_assert(!atomic_read(&inode->i_count));
+	ubifs_assert(!inode->i_count);
 	ubifs_assert(inode->i_nlink == 0);
 
 	truncate_inode_pages(&inode->i_data, 0);
Index: linux-2.6/fs/udf/namei.c
===================================================================
--- linux-2.6.orig/fs/udf/namei.c
+++ linux-2.6/fs/udf/namei.c
@@ -1090,7 +1090,9 @@ static int udf_link(struct dentry *old_d
 	inc_nlink(inode);
 	inode->i_ctime = current_fs_time(inode->i_sb);
 	mark_inode_dirty(inode);
-	atomic_inc(&inode->i_count);
+	spin_lock(&inode->i_lock);
+	inode->i_count++;
+	spin_unlock(&inode->i_lock);
 	d_instantiate(dentry, inode);
 	unlock_kernel();
 
Index: linux-2.6/fs/ufs/namei.c
===================================================================
--- linux-2.6.orig/fs/ufs/namei.c
+++ linux-2.6/fs/ufs/namei.c
@@ -178,7 +178,9 @@ static int ufs_link (struct dentry * old
 
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inode_inc_link_count(inode);
-	atomic_inc(&inode->i_count);
+	spin_lock(&inode->i_lock);
+	inode->i_count++;
+	spin_unlock(&inode->i_lock);
 
 	error = ufs_add_nondir(dentry, inode);
 	unlock_kernel();
Index: linux-2.6/fs/ntfs/super.c
===================================================================
--- linux-2.6.orig/fs/ntfs/super.c
+++ linux-2.6/fs/ntfs/super.c
@@ -2921,7 +2921,9 @@ static int ntfs_fill_super(struct super_
 	}
 	if ((sb->s_root = d_alloc_root(vol->root_ino))) {
 		/* We increment i_count simulating an ntfs_iget(). */
-		atomic_inc(&vol->root_ino->i_count);
+		spin_lock(&vol->root_ino->i_lock);
+		vol->root_ino->i_count++;
+		spin_unlock(&vol->root_ino->i_lock);
 		ntfs_debug("Exiting, status successful.");
 		/* Release the default upcase if it has no users. */
 		mutex_lock(&ntfs_lock);
Index: linux-2.6/fs/cifs/inode.c
===================================================================
--- linux-2.6.orig/fs/cifs/inode.c
+++ linux-2.6/fs/cifs/inode.c
@@ -1428,7 +1428,7 @@ int cifs_revalidate(struct dentry *diren
 	}
 	cFYI(1, ("Revalidate: %s inode 0x%p count %d dentry: 0x%p d_time %ld "
 		 "jiffies %ld", full_path, direntry->d_inode,
-		 direntry->d_inode->i_count.counter, direntry,
+		 direntry->d_inode->i_count, direntry,
 		 direntry->d_time, jiffies));
 
 	if (cifsInode->time == 0) {
Index: linux-2.6/mm/backing-dev.c
===================================================================
--- linux-2.6.orig/mm/backing-dev.c
+++ linux-2.6/mm/backing-dev.c
@@ -71,7 +71,7 @@ static int bdi_debug_stats_show(struct s
 	 * RCU on the reader side
 	 */
 	nr_wb = nr_dirty = nr_io = nr_more_io = 0;
-	spin_lock(&inode_lock);
+	spin_lock(&wb_inode_list_lock);
 	list_for_each_entry(wb, &bdi->wb_list, list) {
 		nr_wb++;
 		list_for_each_entry(inode, &wb->b_dirty, i_list)
@@ -81,7 +81,7 @@ static int bdi_debug_stats_show(struct s
 		list_for_each_entry(inode, &wb->b_more_io, i_list)
 			nr_more_io++;
 	}
-	spin_unlock(&inode_lock);
+	spin_unlock(&wb_inode_list_lock);
 
 	get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi);
 
@@ -675,11 +675,11 @@ void bdi_destroy(struct backing_dev_info
 	if (bdi_has_dirty_io(bdi)) {
 		struct bdi_writeback *dst = &default_backing_dev_info.wb;
 
-		spin_lock(&inode_lock);
+		spin_lock(&wb_inode_list_lock);
 		list_splice(&bdi->wb.b_dirty, &dst->b_dirty);
 		list_splice(&bdi->wb.b_io, &dst->b_io);
 		list_splice(&bdi->wb.b_more_io, &dst->b_more_io);
-		spin_unlock(&inode_lock);
+		spin_unlock(&wb_inode_list_lock);
 	}
 
 	bdi_unregister(bdi);
Index: linux-2.6/fs/hugetlbfs/inode.c
===================================================================
--- linux-2.6.orig/fs/hugetlbfs/inode.c
+++ linux-2.6/fs/hugetlbfs/inode.c
@@ -378,11 +378,12 @@ static void hugetlbfs_delete_inode(struc
 	clear_inode(inode);
 }
 
-static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock)
+static void hugetlbfs_forget_inode(struct inode *inode)
 {
 	if (generic_detach_inode(inode)) {
 		truncate_hugepages(inode, 0);
 		clear_inode(inode);
+		/* XXX: why no wake_up_inode? */
 		destroy_inode(inode);
 	}
 }
@@ -665,11 +666,18 @@ static struct inode *hugetlbfs_alloc_ino
 	return &p->vfs_inode;
 }
 
+static void hugetlbfs_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
+}
+
 static void hugetlbfs_destroy_inode(struct inode *inode)
 {
 	hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
 	mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
-	kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
+	call_rcu(&inode->i_rcu, hugetlbfs_i_callback);
 }
 
 static const struct address_space_operations hugetlbfs_aops = {
Index: linux-2.6/fs/buffer.c
===================================================================
--- linux-2.6.orig/fs/buffer.c
+++ linux-2.6/fs/buffer.c
@@ -1152,7 +1152,7 @@ __getblk_slow(struct block_device *bdev,
  * inode list.
  *
  * mark_buffer_dirty() is atomic.  It takes bh->b_page->mapping->private_lock,
- * mapping->tree_lock and the global inode_lock.
+ * and mapping->tree_lock.
  */
 void mark_buffer_dirty(struct buffer_head *bh)
 {
Index: linux-2.6/fs/ext2/super.c
===================================================================
--- linux-2.6.orig/fs/ext2/super.c
+++ linux-2.6/fs/ext2/super.c
@@ -157,11 +157,18 @@ static struct inode *ext2_alloc_inode(st
 	return &ei->vfs_inode;
 }
 
-static void ext2_destroy_inode(struct inode *inode)
+static void ext2_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
 }
 
+static void ext2_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, ext2_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct ext2_inode_info *ei = (struct ext2_inode_info *) foo;
Index: linux-2.6/fs/ext3/super.c
===================================================================
--- linux-2.6.orig/fs/ext3/super.c
+++ linux-2.6/fs/ext3/super.c
@@ -469,6 +469,13 @@ static struct inode *ext3_alloc_inode(st
 	return &ei->vfs_inode;
 }
 
+static void ext3_i_callback(struct rcu_head *head)
+{
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
+	kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
+}
+
 static void ext3_destroy_inode(struct inode *inode)
 {
 	if (!list_empty(&(EXT3_I(inode)->i_orphan))) {
@@ -479,7 +486,7 @@ static void ext3_destroy_inode(struct in
 				false);
 		dump_stack();
 	}
-	kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
+	call_rcu(&inode->i_rcu, ext3_i_callback);
 }
 
 static void init_once(void *foo)
Index: linux-2.6/fs/proc/inode.c
===================================================================
--- linux-2.6.orig/fs/proc/inode.c
+++ linux-2.6/fs/proc/inode.c
@@ -88,11 +88,18 @@ static struct inode *proc_alloc_inode(st
 	return inode;
 }
 
-static void proc_destroy_inode(struct inode *inode)
+static void proc_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(proc_inode_cachep, PROC_I(inode));
 }
 
+static void proc_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, proc_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct proc_inode *ei = (struct proc_inode *) foo;
Index: linux-2.6/fs/fat/inode.c
===================================================================
--- linux-2.6.orig/fs/fat/inode.c
+++ linux-2.6/fs/fat/inode.c
@@ -497,11 +497,18 @@ static struct inode *fat_alloc_inode(str
 	return &ei->vfs_inode;
 }
 
-static void fat_destroy_inode(struct inode *inode)
+static void fat_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
 }
 
+static void fat_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, fat_i_callback);
+}
+
 static void init_once(void *foo)
 {
 	struct msdos_inode_info *ei = (struct msdos_inode_info *)foo;
Index: linux-2.6/net/sunrpc/rpc_pipe.c
===================================================================
--- linux-2.6.orig/net/sunrpc/rpc_pipe.c
+++ linux-2.6/net/sunrpc/rpc_pipe.c
@@ -162,11 +162,19 @@ rpc_alloc_inode(struct super_block *sb)
 }
 
 static void
-rpc_destroy_inode(struct inode *inode)
+rpc_i_callback(struct rcu_head *head)
 {
+	struct inode *inode = container_of(head, struct inode, i_rcu);
+	INIT_LIST_HEAD(&inode->i_dentry);
 	kmem_cache_free(rpc_inode_cachep, RPC_I(inode));
 }
 
+static void
+rpc_destroy_inode(struct inode *inode)
+{
+	call_rcu(&inode->i_rcu, rpc_i_callback);
+}
+
 static int
 rpc_pipe_open(struct inode *inode, struct file *filp)
 {
Index: linux-2.6/include/linux/inotify.h
===================================================================
--- linux-2.6.orig/include/linux/inotify.h
+++ linux-2.6/include/linux/inotify.h
@@ -111,7 +111,7 @@ extern void inotify_inode_queue_event(st
 				      const char *, struct inode *);
 extern void inotify_dentry_parent_queue_event(struct dentry *, __u32, __u32,
 					      const char *);
-extern void inotify_unmount_inodes(struct list_head *);
+extern void inotify_unmount_inodes(struct super_block *);
 extern void inotify_inode_is_dead(struct inode *);
 extern u32 inotify_get_cookie(void);
 
@@ -161,7 +161,7 @@ static inline void inotify_dentry_parent
 {
 }
 
-static inline void inotify_unmount_inodes(struct list_head *list)
+static inline void inotify_unmount_inodes(struct super_block *sb)
 {
 }