GIT 886f22ebcf59c1db2e2aa453566066eaa1753dae git+ssh://master.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2.git#ALL commit ef8917a1e25012c4484865d922a20b8cfc05573b Author: Joel Becker Date: Sat Feb 3 03:14:30 2007 -0800 ocfs2: Proper cleanup in case of error in ocfs2_register_hb_callbacks() If ocfs2_register_hb_callbacks() succeeds on its first callback but fails its second, it doesn't release the first on the way out. Fix that. While we're at it, o2hb_unregister_callback() never returns anything but 0, so let's make it void. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh commit fba9960d5e78d7e7beaff28db4715990e8d1bc5b Author: Joel Becker Date: Sat Feb 3 03:04:20 2007 -0800 ocfs2: Concurrent access of o2hb_region->hr_task was not locked This means that a build-up and a teardown could race which would result in a double-kthread_stop(). Protect the setting and clearing of hr_task with o2hb_live_lock, as it's not a common thing and not performance critical. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh commit f93ab779f18a8e8cd0e64382d6504d6b477d29ad Author: Joel Becker Date: Thu Jan 4 14:54:41 2007 -0800 Under load, OCFS2 would crash in invalidate_inode_pages2_range() because invalidate_complete_page2() was unable to invalidate a page. It would appear that JBD is holding on to the page. ext3 has a specific ->releasepage() handler to cover this case. Steal ext3's ->releasepage(), ->invalidatepage(), and ->migratepage(), as they appear completely appropriate for OCFS2. Signed-off-by: Joel Becker commit e47575895914402e1e72f00b3db539f468b849e3 Author: Joel Becker Date: Fri Oct 6 17:33:23 2006 -0700 configfs: accessing item hierarchy during rmdir(2) Add a notification callback, ops->disconnect_notify(). It has the same prototype as ->drop_item(), but it will be called just before the item linkage is broken. This way, configfs users who want to do work while the object is still in the heirarchy have a chance. Client drivers will still need to config_item_put() in their ->drop_item(), if they implement it. They need do nothing in ->disconnect_notify(). They don't have to provide it if they don't care. But someone who wants to be notified before ci_parent is set to NULL can now be notified. Signed-off-by: Joel Becker Documentation/filesystems/configfs/configfs.txt | 12 ++++++ fs/configfs/dir.c | 29 +++++++++++++ fs/ocfs2/aops.c | 26 ++++++++++++ fs/ocfs2/cluster/heartbeat.c | 50 ++++++++++++++++------- fs/ocfs2/cluster/heartbeat.h | 2 - fs/ocfs2/cluster/tcp.c | 13 +----- fs/ocfs2/heartbeat.c | 15 ++----- include/linux/configfs.h | 1 8 files changed, 108 insertions(+), 40 deletions(-) diff --git a/Documentation/filesystems/configfs/configfs.txt b/Documentation/filesystems/configfs/configfs.txt index b34cdb5..c199484 100644 --- a/Documentation/filesystems/configfs/configfs.txt +++ b/Documentation/filesystems/configfs/configfs.txt @@ -238,6 +238,8 @@ config_item_type. struct config_group *(*make_group)(struct config_group *group, const char *name); int (*commit_item)(struct config_item *item); + void (*disconnect_notify)(struct config_group *group, + struct config_item *item); void (*drop_item)(struct config_group *group, struct config_item *item); }; @@ -268,6 +270,16 @@ the item in other threads, the memory is for the item to actually disappear from the subsystem's usage. But it is gone from configfs. +When drop_item() is called, the item's linkage has already been torn +down. It no longer has a reference on its parent and has no place in +the item hierarchy. If a client needs to do some cleanup before this +teardown happens, the subsystem can implement the +ct_group_ops->disconnect_notify() method. The method is called after +configfs has removed the item from the filesystem view but before the +item is removed from its parent group. Like drop_item(), +disconnect_notify() is void and cannot fail. Client subsystems should +not drop any references here, as they still must do it in drop_item(). + A config_group cannot be removed while it still has child items. This is implemented in the configfs rmdir(2) code. ->drop_item() will not be called, as the item has not been dropped. rmdir(2) will fail, as the diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 34750d5..6d7f5bf 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -714,6 +714,28 @@ static void configfs_detach_group(struct } /* + * After the item has been detached from the filesystem view, we are + * ready to tear it out of the hierarchy. Notify the client before + * we do that so they can perform any cleanup that requires + * navigating the hierarchy. A client does not need to provide this + * callback. The subsystem semaphore MUST be held by the caller, and + * references must be valid for both items. It also assumes the + * caller has validated ci_type. + */ +static void client_disconnect_notify(struct config_item *parent_item, + struct config_item *item) +{ + struct config_item_type *type; + + type = parent_item->ci_type; + BUG_ON(!type); + + if (type->ct_group_ops && type->ct_group_ops->disconnect_notify) + type->ct_group_ops->disconnect_notify(to_config_group(parent_item), + item); +} + +/* * Drop the initial reference from make_item()/make_group() * This function assumes that reference is held on item * and that item holds a valid reference to the parent. Also, it @@ -733,7 +755,7 @@ static void client_drop_item(struct conf */ if (type->ct_group_ops && type->ct_group_ops->drop_item) type->ct_group_ops->drop_item(to_config_group(parent_item), - item); + item); else config_item_put(item); } @@ -842,11 +864,14 @@ out_unlink: if (ret) { /* Tear down everything we built up */ down(&subsys->su_sem); + + client_disconnect_notify(parent_item, item); if (group) unlink_group(group); else unlink_obj(item); client_drop_item(parent_item, item); + up(&subsys->su_sem); if (module_got) @@ -911,11 +936,13 @@ static int configfs_rmdir(struct inode * configfs_detach_group(item); down(&subsys->su_sem); + client_disconnect_notify(parent_item, item); unlink_group(to_config_group(item)); } else { configfs_detach_item(item); down(&subsys->su_sem); + client_disconnect_notify(parent_item, item); unlink_obj(item); } diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 93628b0..875c114 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -614,6 +614,27 @@ static void ocfs2_dio_end_io(struct kioc ocfs2_rw_unlock(inode, 0); } +/* + * ocfs2_invalidatepage() and ocfs2_releasepage() are shamelessly stolen + * from ext3. PageChecked() bits have been removed as OCFS2 does not + * do journalled data. + */ +static void ocfs2_invalidatepage(struct page *page, unsigned long offset) +{ + journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; + + journal_invalidatepage(journal, page, offset); +} + +static int ocfs2_releasepage(struct page *page, gfp_t wait) +{ + journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; + + if (!page_has_buffers(page)) + return 0; + return journal_try_to_free_buffers(journal, page, wait); +} + static ssize_t ocfs2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, @@ -661,5 +682,8 @@ const struct address_space_operations oc .commit_write = ocfs2_commit_write, .bmap = ocfs2_bmap, .sync_page = block_sync_page, - .direct_IO = ocfs2_direct_IO + .direct_IO = ocfs2_direct_IO, + .invalidatepage = ocfs2_invalidatepage, + .releasepage = ocfs2_releasepage, + .migratepage = buffer_migrate_page, }; diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 5a9779b..eba282d 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1234,6 +1234,7 @@ static ssize_t o2hb_region_dev_write(str const char *page, size_t count) { + struct task_struct *hb_task; long fd; int sectsize; char *p = (char *)page; @@ -1319,20 +1320,28 @@ static ssize_t o2hb_region_dev_write(str */ atomic_set(®->hr_steady_iterations, O2HB_LIVE_THRESHOLD + 1); - reg->hr_task = kthread_run(o2hb_thread, reg, "o2hb-%s", - reg->hr_item.ci_name); - if (IS_ERR(reg->hr_task)) { - ret = PTR_ERR(reg->hr_task); + hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s", + reg->hr_item.ci_name); + if (IS_ERR(hb_task)) { + ret = PTR_ERR(hb_task); mlog_errno(ret); - reg->hr_task = NULL; goto out; } + spin_lock(&o2hb_live_lock); + reg->hr_task = hb_task; + spin_unlock(&o2hb_live_lock); + ret = wait_event_interruptible(o2hb_steady_queue, atomic_read(®->hr_steady_iterations) == 0); if (ret) { - kthread_stop(reg->hr_task); + spin_lock(&o2hb_live_lock); + hb_task = reg->hr_task; reg->hr_task = NULL; + spin_unlock(&o2hb_live_lock); + + if (hb_task) + kthread_stop(hb_task); goto out; } @@ -1354,10 +1363,17 @@ out: static ssize_t o2hb_region_pid_read(struct o2hb_region *reg, char *page) { - if (!reg->hr_task) + pid_t pid = 0; + + spin_lock(&o2hb_live_lock); + if (reg->hr_task) + pid = reg->hr_task->pid; + spin_unlock(&o2hb_live_lock); + + if (!pid) return 0; - return sprintf(page, "%u\n", reg->hr_task->pid); + return sprintf(page, "%u\n", pid); } struct o2hb_region_attribute { @@ -1495,13 +1511,17 @@ out: static void o2hb_heartbeat_group_drop_item(struct config_group *group, struct config_item *item) { + struct task_struct *hb_task; struct o2hb_region *reg = to_o2hb_region(item); /* stop the thread when the user removes the region dir */ - if (reg->hr_task) { - kthread_stop(reg->hr_task); - reg->hr_task = NULL; - } + spin_lock(&o2hb_live_lock); + hb_task = reg->hr_task; + reg->hr_task = NULL; + spin_unlock(&o2hb_live_lock); + + if (hb_task) + kthread_stop(hb_task); config_item_put(item); } @@ -1682,7 +1702,7 @@ out: } EXPORT_SYMBOL_GPL(o2hb_register_callback); -int o2hb_unregister_callback(struct o2hb_callback_func *hc) +void o2hb_unregister_callback(struct o2hb_callback_func *hc) { BUG_ON(hc->hc_magic != O2HB_CB_MAGIC); @@ -1690,15 +1710,13 @@ int o2hb_unregister_callback(struct o2hb __builtin_return_address(0), hc); if (list_empty(&hc->hc_item)) - return 0; + return; down_write(&o2hb_callback_sem); list_del_init(&hc->hc_item); up_write(&o2hb_callback_sem); - - return 0; } EXPORT_SYMBOL_GPL(o2hb_unregister_callback); diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h index cac6223..cc6d40b 100644 --- a/fs/ocfs2/cluster/heartbeat.h +++ b/fs/ocfs2/cluster/heartbeat.h @@ -70,7 +70,7 @@ void o2hb_setup_callback(struct o2hb_cal void *data, int priority); int o2hb_register_callback(struct o2hb_callback_func *hc); -int o2hb_unregister_callback(struct o2hb_callback_func *hc); +void o2hb_unregister_callback(struct o2hb_callback_func *hc); void o2hb_fill_node_map(unsigned long *map, unsigned bytes); void o2hb_init(void); diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 1718215..69caf3e 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1638,17 +1638,8 @@ static void o2net_hb_node_up_cb(struct o void o2net_unregister_hb_callbacks(void) { - int ret; - - ret = o2hb_unregister_callback(&o2net_hb_up); - if (ret < 0) - mlog(ML_ERROR, "Status return %d unregistering heartbeat up " - "callback!\n", ret); - - ret = o2hb_unregister_callback(&o2net_hb_down); - if (ret < 0) - mlog(ML_ERROR, "Status return %d unregistering heartbeat down " - "callback!\n", ret); + o2hb_unregister_callback(&o2net_hb_up); + o2hb_unregister_callback(&o2net_hb_down); } int o2net_register_hb_callbacks(void) diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c index 8fc52d6..b25ef63 100644 --- a/fs/ocfs2/heartbeat.c +++ b/fs/ocfs2/heartbeat.c @@ -164,8 +164,10 @@ int ocfs2_register_hb_callbacks(struct o } status = o2hb_register_callback(&osb->osb_hb_up); - if (status < 0) + if (status < 0) { mlog_errno(status); + o2hb_unregister_callback(&osb->osb_hb_down); + } bail: return status; @@ -173,18 +175,11 @@ bail: void ocfs2_clear_hb_callbacks(struct ocfs2_super *osb) { - int status; - if (ocfs2_mount_local(osb)) return; - status = o2hb_unregister_callback(&osb->osb_hb_down); - if (status < 0) - mlog_errno(status); - - status = o2hb_unregister_callback(&osb->osb_hb_up); - if (status < 0) - mlog_errno(status); + o2hb_unregister_callback(&osb->osb_hb_down); + o2hb_unregister_callback(&osb->osb_hb_up); } void ocfs2_stop_heartbeat(struct ocfs2_super *osb) diff --git a/include/linux/configfs.h b/include/linux/configfs.h index fef6f3d..62f3763 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -157,6 +157,7 @@ struct configfs_group_operations { struct config_item *(*make_item)(struct config_group *group, const char *name); struct config_group *(*make_group)(struct config_group *group, const char *name); int (*commit_item)(struct config_item *item); + void (*disconnect_notify)(struct config_group *group, struct config_item *item); void (*drop_item)(struct config_group *group, struct config_item *item); };