GIT 918a1913aa540496479d85458dd49aa326cfb07b git://oss.oracle.com/home/sourcebo/git/ocfs2.git/#ALL commit 0e6dd31c00fe0ab96feeb9c0cbae95660d545dc3 Author: Joel Becker Date: Wed Apr 12 18:34:43 2006 -0700 configfs: Release memory in configfs_example. The configfs_example module was missing a ->release(). Signed-off-by: Joel Becker commit 7d2311cbbd1482fe9e453ed8f4d83bfa91575e7f Author: Joel Becker Date: Tue Apr 11 21:37:20 2006 -0700 configfs: configfs_mkdir() failed to cleanup linkage. If configfs_mkdir() errored in certain ways after the parent<->child linkage was already created, it would not undo the linkage. Also, comment the reference counting for clarity. Signed-off-by: Joel Becker commit a1331a4da17c4358889c6422dc3d53dbed55ddd0 Author: Joel Becker Date: Mon Mar 27 18:46:09 2006 -0800 configfs: Fix a reference leak in configfs_mkdir(). configfs_mkdir() failed to release the working parent reference in most exit paths. Also changed the exit path for readability. Signed-off-by: Joel Becker commit ae5718a4efefeb07fb803c739300cf2310ca3733 Author: Sunil Mushran Date: Wed Apr 12 14:37:00 2006 -0700 ocfs2: fix gfp mask in some file system paths We were using GFP_KERNEL in a handful of places which really wanted GFP_NOFS. Fix this. Signed-off-by: Sunil Mushran Signed-off-by: Mark Fasheh commit 0b94501e6fca092e6d25bdc9f61da883d2ac1f7a Author: Mark Fasheh Date: Wed Apr 12 14:24:05 2006 -0700 ocfs2: Don't populate uptodate cache in ocfs2_force_read_journal() Signed-off-by: Mark Fasheh commit 10e5be93b9410d044de64850085f2d72e4d2ee85 Author: Adrian Bunk Date: Fri Mar 31 16:53:55 2006 +0200 [PATCH] fs/ocfs2/dlm/dlmrecovery.c: make dlm_lockres_master_requery() static dlm_lockres_master_requery() became global without any external usage. Signed-off-by: Adrian Bunk Signed-off-by: Mark Fasheh commit 765f81792db9f8a9542478337e54abfc9df1e685 Author: Adrian Bunk Date: Sun Mar 26 14:25:52 2006 +0200 ocfs2: OCFS2_FS must depend on SYSFS Signed-off-by: Adrian Bunk Signed-off-by: Mark Fasheh commit f03eda9fdeba97ecbcc7bec614a2028de032ef5f Author: Joel Becker Date: Fri Mar 10 11:42:30 2006 -0800 configfs: Clear up a few extra spaces where there should be TABs. Signed-off-by: Joel Becker commit 205cdd86deb287fecc80672c53a11e88e1f8c626 Author: Mark Fasheh Date: Thu Mar 23 11:23:29 2006 -0800 ocfs2: silence a compile warning in dlm_alloc_pagevec() Reported by Andrew Morton. Signed-off-by: Mark Fasheh commit 53cdfd3bc5395ddd63793c07efe1a559b87046d5 Author: Joel Becker Date: Thu Mar 16 17:40:37 2006 -0800 [PATCH] ocfs2: Alloc at least a page for the DLM hash The OCFS2 DLM allocates a number of pages for a hash to lookup locks. There was a bug where a PAGE_SIZE bigger than the hash size (eg, 64K pages) would result in zero pages allocated. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh commit ad3d6591f18daa7c793d4857f0b1d970d5169f89 Author: Daniel Phillips Date: Fri Mar 10 18:08:16 2006 -0800 ocfs2: allocate lockres hash pages in an array This allows us to have a hash table greater than a single page which greatly improves dlm performance on some tests. Signed-off-by: Daniel Phillips Signed-off-by: Mark Fasheh commit 77ab4955107cbadc467879c5cc8a7f8c52ceb86d Author: Mark Fasheh Date: Fri Mar 10 13:44:00 2006 -0800 ocfs2: inline dlm_lockres_get() It's called on every lookup so this might help performance a bit. Signed-off-by: Mark Fasheh commit 0bee389c4d9e5bca96b7385e0442e3d8bb8758ac Author: Daniel Phillips Date: Fri Mar 10 13:31:47 2006 -0800 [PATCH] Clean up ocfs2 hash probe and make it faster Signed-Off-By: Daniel Phillips Signed-off-by: Mark Fasheh commit 11bbd4deb5e7955520f98bb5d07c601671cfb909 Author: Mark Fasheh Date: Thu Mar 9 17:55:56 2006 -0800 ocfs2: calculate lockid hash values outside of the spinlock Fixes a performance bug - pointed out by Andrew. Signed-off-by: Mark Fasheh commit f1208939da5bafc80299e52d060d3bbcbf9ca34f Author: Mark Fasheh Date: Mon Mar 6 15:36:17 2006 -0800 ocfs2: move lockres qstr next to hlist_node structure Gains us a bit of performance on loads which heavily hit the lockres hash. Patch suggested by Daniel Phillips . Signed-off-by: Mark Fasheh --- diff --git a/Documentation/filesystems/configfs/configfs_example.c b/Documentation/filesystems/configfs/configfs_example.c index 3d4713a..2d6a14a 100644 --- a/Documentation/filesystems/configfs/configfs_example.c +++ b/Documentation/filesystems/configfs/configfs_example.c @@ -264,6 +264,15 @@ static struct config_item_type simple_ch }; +struct simple_children { + struct config_group group; +}; + +static inline struct simple_children *to_simple_children(struct config_item *item) +{ + return item ? container_of(to_config_group(item), struct simple_children, group) : NULL; +} + static struct config_item *simple_children_make_item(struct config_group *group, const char *name) { struct simple_child *simple_child; @@ -304,7 +313,13 @@ static ssize_t simple_children_attr_show "items have only one attribute that is readable and writeable.\n"); } +static void simple_children_release(struct config_item *item) +{ + kfree(to_simple_children(item)); +} + static struct configfs_item_operations simple_children_item_ops = { + .release = simple_children_release, .show_attribute = simple_children_attr_show, }; @@ -345,10 +360,6 @@ static struct configfs_subsystem simple_ * children of its own. */ -struct simple_children { - struct config_group group; -}; - static struct config_group *group_children_make_group(struct config_group *group, const char *name) { struct simple_children *simple_children; diff --git a/fs/Kconfig b/fs/Kconfig index 2524629..83da138 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -326,7 +326,7 @@ source "fs/xfs/Kconfig" config OCFS2_FS tristate "OCFS2 file system support (EXPERIMENTAL)" - depends on NET && EXPERIMENTAL + depends on NET && SYSFS && EXPERIMENTAL select CONFIGFS_FS select JBD select CRC32 diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 5638c8f..880c9ca 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -211,7 +211,7 @@ static void remove_dir(struct dentry * d struct configfs_dirent * sd; sd = d->d_fsdata; - list_del_init(&sd->s_sibling); + list_del_init(&sd->s_sibling); configfs_put(sd); if (d->d_inode) simple_rmdir(parent->d_inode,d); @@ -330,7 +330,7 @@ static int configfs_detach_prep(struct d ret = configfs_detach_prep(sd->s_dentry); if (!ret) - continue; + continue; } else ret = -ENOTEMPTY; @@ -505,13 +505,15 @@ static int populate_groups(struct config int i; if (group->default_groups) { - /* FYI, we're faking mkdir here + /* + * FYI, we're faking mkdir here * I'm not sure we need this semaphore, as we're called * from our parent's mkdir. That holds our parent's * i_mutex, so afaik lookup cannot continue through our * parent to find us, let alone mess with our tree. * That said, taking our i_mutex is closer to mkdir - * emulation, and shouldn't hurt. */ + * emulation, and shouldn't hurt. + */ mutex_lock(&dentry->d_inode->i_mutex); for (i = 0; group->default_groups[i]; i++) { @@ -546,20 +548,34 @@ static void unlink_obj(struct config_ite item->ci_group = NULL; item->ci_parent = NULL; + + /* Drop the reference for ci_entry */ config_item_put(item); + /* Drop the reference for ci_parent */ config_group_put(group); } } static void link_obj(struct config_item *parent_item, struct config_item *item) { - /* Parent seems redundant with group, but it makes certain - * traversals much nicer. */ + /* + * Parent seems redundant with group, but it makes certain + * traversals much nicer. + */ item->ci_parent = parent_item; + + /* + * We hold a reference on the parent for the child's ci_parent + * link. + */ item->ci_group = config_group_get(to_config_group(parent_item)); list_add_tail(&item->ci_entry, &item->ci_group->cg_children); + /* + * We hold a reference on the child for ci_entry on the parent's + * cg_children + */ config_item_get(item); } @@ -684,6 +700,10 @@ static void client_drop_item(struct conf type = parent_item->ci_type; BUG_ON(!type); + /* + * If ->drop_item() exists, it is responsible for the + * config_item_put(). + */ if (type->ct_group_ops && type->ct_group_ops->drop_item) type->ct_group_ops->drop_item(to_config_group(parent_item), item); @@ -694,23 +714,28 @@ static void client_drop_item(struct conf static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) { - int ret; + int ret, module_got = 0; struct config_group *group; struct config_item *item; struct config_item *parent_item; struct configfs_subsystem *subsys; struct configfs_dirent *sd; struct config_item_type *type; - struct module *owner; + struct module *owner = NULL; char *name; - if (dentry->d_parent == configfs_sb->s_root) - return -EPERM; + if (dentry->d_parent == configfs_sb->s_root) { + ret = -EPERM; + goto out; + } sd = dentry->d_parent->d_fsdata; - if (!(sd->s_type & CONFIGFS_USET_DIR)) - return -EPERM; + if (!(sd->s_type & CONFIGFS_USET_DIR)) { + ret = -EPERM; + goto out; + } + /* Get a working ref for the duration of this function */ parent_item = configfs_get_config_item(dentry->d_parent); type = parent_item->ci_type; subsys = to_config_group(parent_item)->cg_subsys; @@ -719,15 +744,16 @@ static int configfs_mkdir(struct inode * if (!type || !type->ct_group_ops || (!type->ct_group_ops->make_group && !type->ct_group_ops->make_item)) { - config_item_put(parent_item); - return -EPERM; /* What lack-of-mkdir returns */ + ret = -EPERM; /* Lack-of-mkdir returns -EPERM */ + goto out_put; } name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL); if (!name) { - config_item_put(parent_item); - return -ENOMEM; + ret = -ENOMEM; + goto out_put; } + snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name); down(&subsys->su_sem); @@ -748,40 +774,67 @@ static int configfs_mkdir(struct inode * kfree(name); if (!item) { - config_item_put(parent_item); - return -ENOMEM; + /* + * If item == NULL, then link_obj() was never called. + * There are no extra references to clean up. + */ + ret = -ENOMEM; + goto out_put; } - ret = -EINVAL; - type = item->ci_type; - if (type) { - owner = type->ct_owner; - if (try_module_get(owner)) { - if (group) { - ret = configfs_attach_group(parent_item, - item, - dentry); - } else { - ret = configfs_attach_item(parent_item, - item, - dentry); - } + /* + * link_obj() has been called (via link_group() for groups). + * From here on out, errors must clean that up. + */ - if (ret) { - down(&subsys->su_sem); - if (group) - unlink_group(group); - else - unlink_obj(item); - client_drop_item(parent_item, item); - up(&subsys->su_sem); + type = item->ci_type; + if (!type) { + ret = -EINVAL; + goto out_unlink; + } - config_item_put(parent_item); - module_put(owner); - } - } + owner = type->ct_owner; + if (!try_module_get(owner)) { + ret = -EINVAL; + goto out_unlink; } + /* + * I hate doing it this way, but if there is + * an error, module_put() probably should + * happen after any cleanup. + */ + module_got = 1; + + if (group) + ret = configfs_attach_group(parent_item, item, dentry); + else + ret = configfs_attach_item(parent_item, item, dentry); + +out_unlink: + if (ret) { + /* Tear down everything we built up */ + down(&subsys->su_sem); + if (group) + unlink_group(group); + else + unlink_obj(item); + client_drop_item(parent_item, item); + up(&subsys->su_sem); + + if (module_got) + module_put(owner); + } + +out_put: + /* + * link_obj()/link_group() took a reference from child->parent, + * so the parent is safely pinned. We can drop our working + * reference. + */ + config_item_put(parent_item); + +out: return ret; } @@ -801,6 +854,7 @@ static int configfs_rmdir(struct inode * if (sd->s_type & CONFIGFS_USET_DEFAULT) return -EPERM; + /* Get a working ref until we have the child */ parent_item = configfs_get_config_item(dentry->d_parent); subsys = to_config_group(parent_item)->cg_subsys; BUG_ON(!subsys); @@ -817,6 +871,7 @@ static int configfs_rmdir(struct inode * return ret; } + /* Get a working ref for the duration of this function */ item = configfs_get_config_item(dentry); /* Drop reference from above, item already holds one. */ @@ -876,7 +931,7 @@ int configfs_rename_dir(struct config_it new_dentry = lookup_one_len(new_name, parent, strlen(new_name)); if (!IS_ERR(new_dentry)) { - if (!new_dentry->d_inode) { + if (!new_dentry->d_inode) { error = config_item_set_name(item, "%s", new_name); if (!error) { d_add(new_dentry, NULL); diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index e5512e2..fb65e08 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -66,7 +66,7 @@ static void fill_item_path(struct config } static int create_link(struct config_item *parent_item, - struct config_item *item, + struct config_item *item, struct dentry *dentry) { struct configfs_dirent *target_sd = item->ci_dentry->d_fsdata; diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 88cc43d..bd8bab1 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -37,7 +37,17 @@ #define DLM_THREAD_SHUFFLE_INTERVAL 5 // flush everything every 5 passes #define DLM_THREAD_MS 200 // flush at least every 200 ms -#define DLM_HASH_BUCKETS (PAGE_SIZE / sizeof(struct hlist_head)) +#define DLM_HASH_SIZE_DEFAULT (1 << 14) +#if DLM_HASH_SIZE_DEFAULT < PAGE_SIZE +# define DLM_HASH_PAGES 1 +#else +# define DLM_HASH_PAGES (DLM_HASH_SIZE_DEFAULT / PAGE_SIZE) +#endif +#define DLM_BUCKETS_PER_PAGE (PAGE_SIZE / sizeof(struct hlist_head)) +#define DLM_HASH_BUCKETS (DLM_HASH_PAGES * DLM_BUCKETS_PER_PAGE) + +/* Intended to make it easier for us to switch out hash functions */ +#define dlm_lockid_hash(_n, _l) full_name_hash(_n, _l) enum dlm_ast_type { DLM_AST = 0, @@ -85,7 +95,7 @@ enum dlm_ctxt_state { struct dlm_ctxt { struct list_head list; - struct hlist_head *lockres_hash; + struct hlist_head **lockres_hash; struct list_head dirty_list; struct list_head purge_list; struct list_head pending_asts; @@ -132,6 +142,11 @@ struct dlm_ctxt struct list_head dlm_eviction_callbacks; }; +static inline struct hlist_head *dlm_lockres_hash(struct dlm_ctxt *dlm, unsigned i) +{ + return dlm->lockres_hash[(i / DLM_BUCKETS_PER_PAGE) % DLM_HASH_PAGES] + (i % DLM_BUCKETS_PER_PAGE); +} + /* these keventd work queue items are for less-frequently * called functions that cannot be directly called from the * net message handlers for some reason, usually because @@ -216,6 +231,7 @@ struct dlm_lock_resource /* WARNING: Please see the comment in dlm_init_lockres before * adding fields here. */ struct hlist_node hash_node; + struct qstr lockname; struct kref refs; /* please keep these next 3 in this order @@ -238,7 +254,6 @@ struct dlm_lock_resource wait_queue_head_t wq; u8 owner; //node which owns the lock resource, or unknown u16 state; - struct qstr lockname; char lvb[DLM_LVB_LEN]; }; @@ -687,14 +702,20 @@ void dlm_lockres_calc_usage(struct dlm_c struct dlm_lock_resource *res); void dlm_purge_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *lockres); -void dlm_lockres_get(struct dlm_lock_resource *res); +static inline void dlm_lockres_get(struct dlm_lock_resource *res) +{ + /* This is called on every lookup, so it might be worth + * inlining. */ + kref_get(&res->refs); +} void dlm_lockres_put(struct dlm_lock_resource *res); void __dlm_unhash_lockres(struct dlm_lock_resource *res); void __dlm_insert_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, const char *name, - unsigned int len); + unsigned int len, + unsigned int hash); struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, const char *name, unsigned int len); @@ -780,8 +801,6 @@ int dlm_begin_reco_handler(struct o2net_ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data); int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, u8 nodenum, u8 *real_master); -int dlm_lockres_master_requery(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, u8 *real_master); int dlm_dispatch_assert_master(struct dlm_ctxt *dlm, diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index c7eae5d..e119e4d 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -136,7 +136,7 @@ void dlm_dump_lock_resources(struct dlm_ spin_lock(&dlm->spinlock); for (i=0; ilockres_hash[i]); + bucket = dlm_lockres_hash(dlm, i); hlist_for_each_entry(res, iter, bucket, hash_node) dlm_print_one_lock_resource(res); } diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 8f3a9e3..3511930 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -49,6 +49,33 @@ #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN) #include "cluster/masklog.h" +static void dlm_free_pagevec(void **vec, int pages) +{ + while (pages--) + free_page((unsigned long)vec[pages]); + kfree(vec); +} + +static void **dlm_alloc_pagevec(int pages) +{ + void **vec = kmalloc(pages * sizeof(void *), GFP_KERNEL); + int i; + + if (!vec) + return NULL; + + for (i = 0; i < pages; i++) + if (!(vec[i] = (void *)__get_free_page(GFP_KERNEL))) + goto out_free; + + mlog(0, "Allocated DLM hash pagevec; %d pages (%lu expected), %lu buckets per page\n", + pages, DLM_HASH_PAGES, (unsigned long)DLM_BUCKETS_PER_PAGE); + return vec; +out_free: + dlm_free_pagevec(vec, i); + return NULL; +} + /* * * spinlock lock ordering: if multiple locks are needed, obey this ordering: @@ -90,8 +117,7 @@ void __dlm_insert_lockres(struct dlm_ctx assert_spin_locked(&dlm->spinlock); q = &res->lockname; - q->hash = full_name_hash(q->name, q->len); - bucket = &(dlm->lockres_hash[q->hash % DLM_HASH_BUCKETS]); + bucket = dlm_lockres_hash(dlm, q->hash); /* get a reference for our hashtable */ dlm_lockres_get(res); @@ -100,34 +126,32 @@ void __dlm_insert_lockres(struct dlm_ctx } struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm, - const char *name, - unsigned int len) + const char *name, + unsigned int len, + unsigned int hash) { - unsigned int hash; - struct hlist_node *iter; - struct dlm_lock_resource *tmpres=NULL; struct hlist_head *bucket; + struct hlist_node *list; mlog_entry("%.*s\n", len, name); assert_spin_locked(&dlm->spinlock); - hash = full_name_hash(name, len); - - bucket = &(dlm->lockres_hash[hash % DLM_HASH_BUCKETS]); - - /* check for pre-existing lock */ - hlist_for_each(iter, bucket) { - tmpres = hlist_entry(iter, struct dlm_lock_resource, hash_node); - if (tmpres->lockname.len == len && - memcmp(tmpres->lockname.name, name, len) == 0) { - dlm_lockres_get(tmpres); - break; - } + bucket = dlm_lockres_hash(dlm, hash); - tmpres = NULL; + hlist_for_each(list, bucket) { + struct dlm_lock_resource *res = hlist_entry(list, + struct dlm_lock_resource, hash_node); + if (res->lockname.name[0] != name[0]) + continue; + if (unlikely(res->lockname.len != len)) + continue; + if (memcmp(res->lockname.name + 1, name + 1, len - 1)) + continue; + dlm_lockres_get(res); + return res; } - return tmpres; + return NULL; } struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, @@ -135,9 +159,10 @@ struct dlm_lock_resource * dlm_lookup_lo unsigned int len) { struct dlm_lock_resource *res; + unsigned int hash = dlm_lockid_hash(name, len); spin_lock(&dlm->spinlock); - res = __dlm_lookup_lockres(dlm, name, len); + res = __dlm_lookup_lockres(dlm, name, len, hash); spin_unlock(&dlm->spinlock); return res; } @@ -194,7 +219,7 @@ static int dlm_wait_on_domain_helper(con static void dlm_free_ctxt_mem(struct dlm_ctxt *dlm) { if (dlm->lockres_hash) - free_page((unsigned long) dlm->lockres_hash); + dlm_free_pagevec((void **)dlm->lockres_hash, DLM_HASH_PAGES); if (dlm->name) kfree(dlm->name); @@ -304,8 +329,8 @@ static void dlm_migrate_all_locks(struct restart: spin_lock(&dlm->spinlock); for (i = 0; i < DLM_HASH_BUCKETS; i++) { - while (!hlist_empty(&dlm->lockres_hash[i])) { - res = hlist_entry(dlm->lockres_hash[i].first, + while (!hlist_empty(dlm_lockres_hash(dlm, i))) { + res = hlist_entry(dlm_lockres_hash(dlm, i)->first, struct dlm_lock_resource, hash_node); /* need reference when manually grabbing lockres */ dlm_lockres_get(res); @@ -1191,7 +1216,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(c goto leave; } - dlm->lockres_hash = (struct hlist_head *) __get_free_page(GFP_KERNEL); + dlm->lockres_hash = (struct hlist_head **)dlm_alloc_pagevec(DLM_HASH_PAGES); if (!dlm->lockres_hash) { mlog_errno(-ENOMEM); kfree(dlm->name); @@ -1200,8 +1225,8 @@ static struct dlm_ctxt *dlm_alloc_ctxt(c goto leave; } - for (i=0; ilockres_hash[i]); + for (i = 0; i < DLM_HASH_BUCKETS; i++) + INIT_HLIST_HEAD(dlm_lockres_hash(dlm, i)); strcpy(dlm->name, domain); dlm->key = key; diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 940be4c..f1fbf2f 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -579,11 +579,6 @@ static void dlm_lockres_release(struct k kfree(res); } -void dlm_lockres_get(struct dlm_lock_resource *res) -{ - kref_get(&res->refs); -} - void dlm_lockres_put(struct dlm_lock_resource *res) { kref_put(&res->refs, dlm_lockres_release); @@ -603,7 +598,7 @@ static void dlm_init_lockres(struct dlm_ memcpy(qname, name, namelen); res->lockname.len = namelen; - res->lockname.hash = full_name_hash(name, namelen); + res->lockname.hash = dlm_lockid_hash(name, namelen); init_waitqueue_head(&res->wq); spin_lock_init(&res->spinlock); @@ -677,19 +672,20 @@ struct dlm_lock_resource * dlm_get_lock_ int blocked = 0; int ret, nodenum; struct dlm_node_iter iter; - unsigned int namelen; + unsigned int namelen, hash; int tries = 0; int bit, wait_on_recovery = 0; BUG_ON(!lockid); namelen = strlen(lockid); + hash = dlm_lockid_hash(lockid, namelen); mlog(0, "get lockres %s (len %d)\n", lockid, namelen); lookup: spin_lock(&dlm->spinlock); - tmpres = __dlm_lookup_lockres(dlm, lockid, namelen); + tmpres = __dlm_lookup_lockres(dlm, lockid, namelen, hash); if (tmpres) { spin_unlock(&dlm->spinlock); mlog(0, "found in hash!\n"); @@ -1316,7 +1312,7 @@ int dlm_master_request_handler(struct o2 struct dlm_master_request *request = (struct dlm_master_request *) msg->buf; struct dlm_master_list_entry *mle = NULL, *tmpmle = NULL; char *name; - unsigned int namelen; + unsigned int namelen, hash; int found, ret; int set_maybe; int dispatch_assert = 0; @@ -1331,6 +1327,7 @@ int dlm_master_request_handler(struct o2 name = request->name; namelen = request->namelen; + hash = dlm_lockid_hash(name, namelen); if (namelen > DLM_LOCKID_NAME_MAX) { response = DLM_IVBUFLEN; @@ -1339,7 +1336,7 @@ int dlm_master_request_handler(struct o2 way_up_top: spin_lock(&dlm->spinlock); - res = __dlm_lookup_lockres(dlm, name, namelen); + res = __dlm_lookup_lockres(dlm, name, namelen, hash); if (res) { spin_unlock(&dlm->spinlock); @@ -1612,7 +1609,7 @@ int dlm_assert_master_handler(struct o2n struct dlm_assert_master *assert = (struct dlm_assert_master *)msg->buf; struct dlm_lock_resource *res = NULL; char *name; - unsigned int namelen; + unsigned int namelen, hash; u32 flags; int master_request = 0; int ret = 0; @@ -1622,6 +1619,7 @@ int dlm_assert_master_handler(struct o2n name = assert->name; namelen = assert->namelen; + hash = dlm_lockid_hash(name, namelen); flags = be32_to_cpu(assert->flags); if (namelen > DLM_LOCKID_NAME_MAX) { @@ -1670,7 +1668,7 @@ int dlm_assert_master_handler(struct o2n /* ok everything checks out with the MLE * now check to see if there is a lockres */ - res = __dlm_lookup_lockres(dlm, name, namelen); + res = __dlm_lookup_lockres(dlm, name, namelen, hash); if (res) { spin_lock(&res->spinlock); if (res->state & DLM_LOCK_RES_RECOVERING) { @@ -2462,7 +2460,7 @@ int dlm_migrate_request_handler(struct o struct dlm_migrate_request *migrate = (struct dlm_migrate_request *) msg->buf; struct dlm_master_list_entry *mle = NULL, *oldmle = NULL; const char *name; - unsigned int namelen; + unsigned int namelen, hash; int ret = 0; if (!dlm_grab(dlm)) @@ -2470,6 +2468,7 @@ int dlm_migrate_request_handler(struct o name = migrate->name; namelen = migrate->namelen; + hash = dlm_lockid_hash(name, namelen); /* preallocate.. if this fails, abort */ mle = (struct dlm_master_list_entry *) kmem_cache_alloc(dlm_mle_cache, @@ -2482,7 +2481,7 @@ int dlm_migrate_request_handler(struct o /* check for pre-existing lock */ spin_lock(&dlm->spinlock); - res = __dlm_lookup_lockres(dlm, name, namelen); + res = __dlm_lookup_lockres(dlm, name, namelen, hash); spin_lock(&dlm->master_lock); if (res) { @@ -2601,6 +2600,7 @@ void dlm_clean_master_list(struct dlm_ct struct list_head *iter, *iter2; struct dlm_master_list_entry *mle; struct dlm_lock_resource *res; + unsigned int hash; mlog_entry("dlm=%s, dead node=%u\n", dlm->name, dead_node); top: @@ -2684,8 +2684,9 @@ top: mle->master, mle->new_master); /* if there is a lockres associated with this * mle, find it and set its owner to UNKNOWN */ + hash = dlm_lockid_hash(mle->u.name.name, mle->u.name.len); res = __dlm_lookup_lockres(dlm, mle->u.name.name, - mle->u.name.len); + mle->u.name.len, hash); if (res) { /* unfortunately if we hit this rare case, our * lock ordering is messed. we need to drop diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 805cbab..96c0aad 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -95,6 +95,9 @@ static void dlm_reco_unlock_ast(void *as static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data); static void dlm_mig_lockres_worker(struct dlm_work_item *item, void *data); +static int dlm_lockres_master_requery(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res, + u8 *real_master); static u64 dlm_get_next_mig_cookie(void); @@ -1312,8 +1315,9 @@ leave: -int dlm_lockres_master_requery(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res, u8 *real_master) +static int dlm_lockres_master_requery(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res, + u8 *real_master) { struct dlm_node_iter iter; int nodenum; @@ -1406,6 +1410,7 @@ int dlm_master_requery_handler(struct o2 struct dlm_ctxt *dlm = data; struct dlm_master_requery *req = (struct dlm_master_requery *)msg->buf; struct dlm_lock_resource *res = NULL; + unsigned int hash; int master = DLM_LOCK_RES_OWNER_UNKNOWN; u32 flags = DLM_ASSERT_MASTER_REQUERY; @@ -1415,8 +1420,10 @@ int dlm_master_requery_handler(struct o2 return master; } + hash = dlm_lockid_hash(req->name, req->namelen); + spin_lock(&dlm->spinlock); - res = __dlm_lookup_lockres(dlm, req->name, req->namelen); + res = __dlm_lookup_lockres(dlm, req->name, req->namelen, hash); if (res) { spin_lock(&res->spinlock); master = res->owner; @@ -1719,7 +1726,7 @@ static void dlm_finish_local_lockres_rec * the RECOVERING state and set the owner * if necessary */ for (i = 0; i < DLM_HASH_BUCKETS; i++) { - bucket = &(dlm->lockres_hash[i]); + bucket = dlm_lockres_hash(dlm, i); hlist_for_each_entry(res, hash_iter, bucket, hash_node) { if (res->state & DLM_LOCK_RES_RECOVERING) { if (res->owner == dead_node) { @@ -1884,7 +1891,7 @@ static void dlm_do_local_recovery_cleanu * need to be fired as a result. */ for (i = 0; i < DLM_HASH_BUCKETS; i++) { - bucket = &(dlm->lockres_hash[i]); + bucket = dlm_lockres_hash(dlm, i); hlist_for_each_entry(res, iter, bucket, hash_node) { /* always prune any $RECOVERY entries for dead nodes, * otherwise hangs can occur during later recovery */ diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 4601fc2..1a5c690 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -569,7 +569,7 @@ static int ocfs2_extent_map_insert(struc ret = -ENOMEM; ctxt.new_ent = kmem_cache_alloc(ocfs2_em_ent_cachep, - GFP_KERNEL); + GFP_NOFS); if (!ctxt.new_ent) { mlog_errno(ret); return ret; @@ -583,14 +583,14 @@ static int ocfs2_extent_map_insert(struc if (ctxt.need_left && !ctxt.left_ent) { ctxt.left_ent = kmem_cache_alloc(ocfs2_em_ent_cachep, - GFP_KERNEL); + GFP_NOFS); if (!ctxt.left_ent) break; } if (ctxt.need_right && !ctxt.right_ent) { ctxt.right_ent = kmem_cache_alloc(ocfs2_em_ent_cachep, - GFP_KERNEL); + GFP_NOFS); if (!ctxt.right_ent) break; } diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 6a610ae..eebc3cf 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -117,7 +117,7 @@ struct ocfs2_journal_handle *ocfs2_alloc { struct ocfs2_journal_handle *retval = NULL; - retval = kcalloc(1, sizeof(*retval), GFP_KERNEL); + retval = kcalloc(1, sizeof(*retval), GFP_NOFS); if (!retval) { mlog(ML_ERROR, "Failed to allocate memory for journal " "handle!\n"); @@ -870,9 +870,11 @@ static int ocfs2_force_read_journal(stru if (p_blocks > CONCURRENT_JOURNAL_FILL) p_blocks = CONCURRENT_JOURNAL_FILL; + /* We are reading journal data which should not + * be put in the uptodate cache */ status = ocfs2_read_blocks(OCFS2_SB(inode->i_sb), p_blkno, p_blocks, bhs, 0, - inode); + NULL); if (status < 0) { mlog_errno(status); goto bail; @@ -982,7 +984,7 @@ static void ocfs2_queue_recovery_complet { struct ocfs2_la_recovery_item *item; - item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_KERNEL); + item = kmalloc(sizeof(struct ocfs2_la_recovery_item), GFP_NOFS); if (!item) { /* Though we wish to avoid it, we are in fact safe in * skipping local alloc cleanup as fsck.ocfs2 is more diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c index 04a684d..b8a00a7 100644 --- a/fs/ocfs2/uptodate.c +++ b/fs/ocfs2/uptodate.c @@ -337,7 +337,7 @@ static void __ocfs2_set_buffer_uptodate( (unsigned long long)oi->ip_blkno, (unsigned long long)block, expand_tree); - new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_KERNEL); + new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_NOFS); if (!new) { mlog_errno(-ENOMEM); return; @@ -349,7 +349,7 @@ static void __ocfs2_set_buffer_uptodate( * has no way of tracking that. */ for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++) { tree[i] = kmem_cache_alloc(ocfs2_uptodate_cachep, - GFP_KERNEL); + GFP_NOFS); if (!tree[i]) { mlog_errno(-ENOMEM); goto out_free; diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c index 53049a2..ee42765 100644 --- a/fs/ocfs2/vote.c +++ b/fs/ocfs2/vote.c @@ -586,7 +586,7 @@ static struct ocfs2_net_wait_ctxt *ocfs2 { struct ocfs2_net_wait_ctxt *w; - w = kcalloc(1, sizeof(*w), GFP_KERNEL); + w = kcalloc(1, sizeof(*w), GFP_NOFS); if (!w) { mlog_errno(-ENOMEM); goto bail; @@ -749,7 +749,7 @@ static struct ocfs2_vote_msg * ocfs2_new BUG_ON(!ocfs2_is_valid_vote_request(type)); - request = kcalloc(1, sizeof(*request), GFP_KERNEL); + request = kcalloc(1, sizeof(*request), GFP_NOFS); if (!request) { mlog_errno(-ENOMEM); } else { @@ -1129,7 +1129,7 @@ static int ocfs2_handle_vote_message(str struct ocfs2_super *osb = data; struct ocfs2_vote_work *work; - work = kmalloc(sizeof(struct ocfs2_vote_work), GFP_KERNEL); + work = kmalloc(sizeof(struct ocfs2_vote_work), GFP_NOFS); if (!work) { status = -ENOMEM; mlog_errno(status);