GIT 32d88a1c031eb2acca1ca9efaef908c230a4d165 git+ssh://master.kernel.org/pub/scm/linux/kernel/git/teigland/dlm.git#test 32d88a1c031eb2acca1ca9efaef908c230a4d165 commit 32d88a1c031eb2acca1ca9efaef908c230a4d165 Author: David Teigland Date: Tue Mar 18 14:22:11 2008 -0500 dlm: recover nodes that are removed and re-added If a node is removed from a lockspace, and then added back before the dlm is notified of the removal, the dlm will not detect the removal and won't clear the old state from the node. This is fixed by using a list of added nodes so the membership recovery can detect when a newly added node is already in the member list. Signed-off-by: David Teigland commit 1cbc8947c4988413663d7cff13ce25699581e29c Author: David Teigland Date: Thu Feb 21 11:25:42 2008 -0600 dlm: save master info after failed no-queue request When a NOQUEUE request fails, the rsb res_master field is unnecessarily reset to -1, instead of leaving the valid master setting in place. We want to save the looked-up master values while the rsb is on the "toss list" so that another lookup can be avoided if the rsb is soon reused. The fix is to simply leave res_master value alone. Signed-off-by: David Teigland commit f21181de3839dd4ddbb9c96bb5078e6c3393400c Author: Adrian Bunk Date: Wed Feb 13 23:29:38 2008 +0200 dlm: make dlm_print_rsb() static dlm_print_rsb() can now become static. Signed-off-by: Adrian Bunk Signed-off-by: David Teigland commit f5cfe7279ca5161c535ca44cfb078d1b201a6242 Author: Harvey Harrison Date: Wed Feb 13 16:54:29 2008 -0800 dlm: match signedness between dlm_config_info and cluster_set cluster_set is only called from the macro CLUSTER_ATTR which defines read/write access functions. Make the signedness match to avoid sparse warnings every time CLUSTER_ATTR is used (lines 149-159) all of the form: fs/dlm/config.c:149:1: warning: incorrect type in argument 3 (different signedness) fs/dlm/config.c:149:1: expected unsigned int *info_field fs/dlm/config.c:149:1: got int extern [toplevel] * Signed-off-by: Harvey Harrison Signed-off-by: David Teigland fs/dlm/config.c | 47 ++++++++++++++++++++++++++++++++++++++--------- fs/dlm/config.h | 3 ++- fs/dlm/dlm_internal.h | 4 +++- fs/dlm/lock.c | 5 ++--- fs/dlm/lock.h | 1 - fs/dlm/member.c | 32 ++++++++++++++++++++++++++++---- fs/dlm/recoverd.c | 1 + 7 files changed, 74 insertions(+), 19 deletions(-) diff --git a/fs/dlm/config.c b/fs/dlm/config.c index c3ad1df..708caf0 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -114,7 +114,7 @@ struct cluster_attribute { }; static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field, - unsigned int *info_field, int check_zero, + int *info_field, int check_zero, const char *buf, size_t len) { unsigned int x; @@ -284,6 +284,7 @@ struct node { struct list_head list; /* space->members */ int nodeid; int weight; + int new; }; static struct configfs_group_operations clusters_ops = { @@ -565,6 +566,7 @@ static struct config_item *make_node(struct config_group *g, const char *name) config_item_init_type_name(&nd->item, name, &node_type); nd->nodeid = -1; nd->weight = 1; /* default weight of 1 if none is set */ + nd->new = 1; /* set to 0 once it's been read by dlm_nodeid_list() */ mutex_lock(&sp->members_lock); list_add(&nd->list, &sp->members); @@ -805,12 +807,13 @@ static void put_comm(struct comm *cm) } /* caller must free mem */ -int dlm_nodeid_list(char *lsname, int **ids_out) +int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, + int **new_out, int *new_count_out) { struct space *sp; struct node *nd; - int i = 0, rv = 0; - int *ids; + int i = 0, rv = 0, ids_count = 0, new_count = 0; + int *ids, *new; sp = get_space(lsname); if (!sp) @@ -822,19 +825,45 @@ int dlm_nodeid_list(char *lsname, int **ids_out) goto out; } - ids = kcalloc(sp->members_count, sizeof(int), GFP_KERNEL); + ids_count = sp->members_count; + + ids = kcalloc(ids_count, sizeof(int), GFP_KERNEL); if (!ids) { rv = -ENOMEM; goto out; } - rv = sp->members_count; - list_for_each_entry(nd, &sp->members, list) + list_for_each_entry(nd, &sp->members, list) { ids[i++] = nd->nodeid; + if (nd->new) + new_count++; + } + + if (ids_count != i) + printk(KERN_ERR "dlm: bad nodeid count %d %d\n", ids_count, i); + + if (!new_count) + goto out_ids; + + new = kcalloc(new_count, sizeof(int), GFP_KERNEL); + if (!new) { + kfree(ids); + rv = -ENOMEM; + goto out; + } - if (rv != i) - printk("bad nodeid count %d %d\n", rv, i); + i = 0; + list_for_each_entry(nd, &sp->members, list) { + if (nd->new) { + new[i++] = nd->nodeid; + nd->new = 0; + } + } + *new_count_out = new_count; + *new_out = new; + out_ids: + *ids_count_out = ids_count; *ids_out = ids; out: mutex_unlock(&sp->members_lock); diff --git a/fs/dlm/config.h b/fs/dlm/config.h index a3170fe..4f1d6fc 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h @@ -35,7 +35,8 @@ extern struct dlm_config_info dlm_config; int dlm_config_init(void); void dlm_config_exit(void); int dlm_node_weight(char *lsname, int nodeid); -int dlm_nodeid_list(char *lsname, int **ids_out); +int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, + int **new_out, int *new_count_out); int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr); int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid); int dlm_our_nodeid(void); diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index d30ea8b..c70c8e5 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -133,8 +133,10 @@ struct dlm_member { struct dlm_recover { struct list_head list; - int *nodeids; + int *nodeids; /* nodeids of all members */ int node_count; + int *new; /* nodeids of new members */ + int new_count; uint64_t seq; }; diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 8f250ac..2d3d102 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -165,7 +165,7 @@ void dlm_print_lkb(struct dlm_lkb *lkb) lkb->lkb_grmode, lkb->lkb_wait_type, lkb->lkb_ast_type); } -void dlm_print_rsb(struct dlm_rsb *r) +static void dlm_print_rsb(struct dlm_rsb *r) { printk(KERN_ERR "rsb: nodeid %d flags %lx first %x rlc %d name %s\n", r->res_nodeid, r->res_flags, r->res_first_lkid, @@ -1956,8 +1956,7 @@ static void confirm_master(struct dlm_rsb *r, int error) list_del_init(&lkb->lkb_rsb_lookup); r->res_first_lkid = lkb->lkb_id; _request_lock(r, lkb); - } else - r->res_nodeid = -1; + } break; default: diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h index 05d9c82..88e93c8 100644 --- a/fs/dlm/lock.h +++ b/fs/dlm/lock.h @@ -13,7 +13,6 @@ #ifndef __LOCK_DOT_H__ #define __LOCK_DOT_H__ -void dlm_print_rsb(struct dlm_rsb *r); void dlm_dump_rsb(struct dlm_rsb *r); void dlm_print_lkb(struct dlm_lkb *lkb); void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms); diff --git a/fs/dlm/member.c b/fs/dlm/member.c index fa17f5a..0d17ffc 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -210,6 +210,23 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) } } + /* Add an entry to ls_nodes_gone for members that were removed and + then added again, so that previous state for these nodes will be + cleared during recovery. */ + + for (i = 0; i < rv->new_count; i++) { + if (!dlm_is_member(ls, rv->new[i])) + continue; + log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]); + + memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL); + if (!memb) + return -ENOMEM; + memb->nodeid = rv->new[i]; + list_add_tail(&memb->list, &ls->ls_nodes_gone); + neg++; + } + /* add new members to ls_nodes */ for (i = 0; i < rv->node_count; i++) { @@ -314,14 +331,15 @@ int dlm_ls_stop(struct dlm_ls *ls) int dlm_ls_start(struct dlm_ls *ls) { struct dlm_recover *rv = NULL, *rv_old; - int *ids = NULL; - int error, count; + int *ids = NULL, *new = NULL; + int error, ids_count = 0, new_count = 0; rv = kzalloc(sizeof(struct dlm_recover), GFP_KERNEL); if (!rv) return -ENOMEM; - error = count = dlm_nodeid_list(ls->ls_name, &ids); + error = dlm_nodeid_list(ls->ls_name, &ids, &ids_count, + &new, &new_count); if (error <= 0) goto fail; @@ -337,14 +355,19 @@ int dlm_ls_start(struct dlm_ls *ls) } rv->nodeids = ids; - rv->node_count = count; + rv->node_count = ids_count; + rv->new = new; + rv->new_count = new_count; rv->seq = ++ls->ls_recover_seq; rv_old = ls->ls_recover_args; ls->ls_recover_args = rv; spin_unlock(&ls->ls_recover_lock); if (rv_old) { + log_error(ls, "unused recovery %llx %d", + (unsigned long long)rv_old->seq, rv_old->node_count); kfree(rv_old->nodeids); + kfree(rv_old->new); kfree(rv_old); } @@ -354,6 +377,7 @@ int dlm_ls_start(struct dlm_ls *ls) fail: kfree(rv); kfree(ids); + kfree(new); return error; } diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index 997f953..fd677c8 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c @@ -257,6 +257,7 @@ static void do_ls_recovery(struct dlm_ls *ls) if (rv) { ls_recover(ls, rv); kfree(rv->nodeids); + kfree(rv->new); kfree(rv); } }