GIT cc84a4d71627c7ca034ee5c4a6afa7f41276df55 git+ssh://master.kernel.org/pub/scm/linux/kernel/git/teigland/dlm.git#test commit cc84a4d71627c7ca034ee5c4a6afa7f41276df55 Author: David Teigland Date: Fri Mar 14 15:09:15 2008 -0500 dlm: move plock code from gfs2 Move the code that handles cluster posix locks from gfs2 into the dlm so that it can be used by both gfs2 and ocfs2. Signed-off-by: David Teigland commit 82ca961409088585df83fb044b529d726e6821c2 Author: David Teigland Date: Tue Mar 18 14:22:11 2008 -0500 dlm: recover nodes that are removed and re-added If a node is removed from a lockspace, and then added back before the dlm is notified of the removal, the dlm will not detect the removal and won't clear the old state from the node. This is fixed by using a list of added nodes so the membership recovery can detect when a newly added node is already in the member list. Signed-off-by: David Teigland commit 629e2e3c315de6d786ed663f221bbf8338a80cb2 Author: David Teigland Date: Thu Feb 21 11:25:42 2008 -0600 dlm: save master info after failed no-queue request When a NOQUEUE request fails, the rsb res_master field is unnecessarily reset to -1, instead of leaving the valid master setting in place. We want to save the looked-up master values while the rsb is on the "toss list" so that another lookup can be avoided if the rsb is soon reused. The fix is to simply leave res_master value alone. Signed-off-by: David Teigland commit c08e32ad26136e12ea9cea2969e86c939de13a08 Author: Adrian Bunk Date: Wed Feb 13 23:29:38 2008 +0200 dlm: make dlm_print_rsb() static dlm_print_rsb() can now become static. Signed-off-by: Adrian Bunk Signed-off-by: David Teigland commit f45a25d6a2bfa7a3792386e0e2fe04fe109b3e87 Author: Harvey Harrison Date: Wed Feb 13 16:54:29 2008 -0800 dlm: match signedness between dlm_config_info and cluster_set cluster_set is only called from the macro CLUSTER_ATTR which defines read/write access functions. Make the signedness match to avoid sparse warnings every time CLUSTER_ATTR is used (lines 149-159) all of the form: fs/dlm/config.c:149:1: warning: incorrect type in argument 3 (different signedness) fs/dlm/config.c:149:1: expected unsigned int *info_field fs/dlm/config.c:149:1: got int extern [toplevel] * Signed-off-by: Harvey Harrison Signed-off-by: David Teigland Signed-off-by: Andrew Morton --- fs/dlm/Makefile | 1 fs/dlm/config.c | 50 ++- fs/dlm/config.h | 3 fs/dlm/dlm_internal.h | 6 fs/dlm/lock.c | 5 fs/dlm/lock.h | 1 fs/dlm/main.c | 7 fs/dlm/member.c | 34 ++ fs/dlm/plock.c | 439 +++++++++++++++++++++++++++++++ fs/dlm/recoverd.c | 1 fs/gfs2/locking/dlm/Makefile | 2 fs/gfs2/locking/dlm/lock_dlm.h | 1 fs/gfs2/locking/dlm/main.c | 8 fs/gfs2/locking/dlm/mount.c | 21 + fs/gfs2/locking/dlm/plock.c | 406 ---------------------------- include/linux/Kbuild | 2 include/linux/dlm_plock.h | 50 +++ include/linux/lock_dlm_plock.h | 41 -- 18 files changed, 600 insertions(+), 478 deletions(-) diff -puN fs/dlm/Makefile~git-dlm fs/dlm/Makefile --- a/fs/dlm/Makefile~git-dlm +++ a/fs/dlm/Makefile @@ -10,6 +10,7 @@ dlm-y := ast.o \ midcomms.o \ netlink.o \ lowcomms.o \ + plock.o \ rcom.o \ recover.o \ recoverd.o \ diff -puN fs/dlm/config.c~git-dlm fs/dlm/config.c --- a/fs/dlm/config.c~git-dlm +++ a/fs/dlm/config.c @@ -114,7 +114,7 @@ struct cluster_attribute { }; static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field, - unsigned int *info_field, int check_zero, + int *info_field, int check_zero, const char *buf, size_t len) { unsigned int x; @@ -284,6 +284,7 @@ struct node { struct list_head list; /* space->members */ int nodeid; int weight; + int new; }; static struct configfs_group_operations clusters_ops = { @@ -565,6 +566,7 @@ static struct config_item *make_node(str config_item_init_type_name(&nd->item, name, &node_type); nd->nodeid = -1; nd->weight = 1; /* default weight of 1 if none is set */ + nd->new = 1; /* set to 0 once it's been read by dlm_nodeid_list() */ mutex_lock(&sp->members_lock); list_add(&nd->list, &sp->members); @@ -805,12 +807,13 @@ static void put_comm(struct comm *cm) } /* caller must free mem */ -int dlm_nodeid_list(char *lsname, int **ids_out) +int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, + int **new_out, int *new_count_out) { struct space *sp; struct node *nd; - int i = 0, rv = 0; - int *ids; + int i = 0, rv = 0, ids_count = 0, new_count = 0; + int *ids, *new; sp = get_space(lsname); if (!sp) @@ -818,23 +821,50 @@ int dlm_nodeid_list(char *lsname, int ** mutex_lock(&sp->members_lock); if (!sp->members_count) { - rv = 0; + rv = -EINVAL; + printk(KERN_ERR "dlm: zero members_count\n"); goto out; } - ids = kcalloc(sp->members_count, sizeof(int), GFP_KERNEL); + ids_count = sp->members_count; + + ids = kcalloc(ids_count, sizeof(int), GFP_KERNEL); if (!ids) { rv = -ENOMEM; goto out; } - rv = sp->members_count; - list_for_each_entry(nd, &sp->members, list) + list_for_each_entry(nd, &sp->members, list) { ids[i++] = nd->nodeid; + if (nd->new) + new_count++; + } + + if (ids_count != i) + printk(KERN_ERR "dlm: bad nodeid count %d %d\n", ids_count, i); + + if (!new_count) + goto out_ids; + + new = kcalloc(new_count, sizeof(int), GFP_KERNEL); + if (!new) { + kfree(ids); + rv = -ENOMEM; + goto out; + } - if (rv != i) - printk("bad nodeid count %d %d\n", rv, i); + i = 0; + list_for_each_entry(nd, &sp->members, list) { + if (nd->new) { + new[i++] = nd->nodeid; + nd->new = 0; + } + } + *new_count_out = new_count; + *new_out = new; + out_ids: + *ids_count_out = ids_count; *ids_out = ids; out: mutex_unlock(&sp->members_lock); diff -puN fs/dlm/config.h~git-dlm fs/dlm/config.h --- a/fs/dlm/config.h~git-dlm +++ a/fs/dlm/config.h @@ -35,7 +35,8 @@ extern struct dlm_config_info dlm_config int dlm_config_init(void); void dlm_config_exit(void); int dlm_node_weight(char *lsname, int nodeid); -int dlm_nodeid_list(char *lsname, int **ids_out); +int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, + int **new_out, int *new_count_out); int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr); int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid); int dlm_our_nodeid(void); diff -puN fs/dlm/dlm_internal.h~git-dlm fs/dlm/dlm_internal.h --- a/fs/dlm/dlm_internal.h~git-dlm +++ a/fs/dlm/dlm_internal.h @@ -133,8 +133,10 @@ struct dlm_member { struct dlm_recover { struct list_head list; - int *nodeids; + int *nodeids; /* nodeids of all members */ int node_count; + int *new; /* nodeids of new members */ + int new_count; uint64_t seq; }; @@ -580,6 +582,8 @@ static inline int dlm_no_directory(struc int dlm_netlink_init(void); void dlm_netlink_exit(void); void dlm_timeout_warn(struct dlm_lkb *lkb); +int dlm_plock_init(void); +void dlm_plock_exit(void); #ifdef CONFIG_DLM_DEBUG int dlm_register_debugfs(void); diff -puN fs/dlm/lock.c~git-dlm fs/dlm/lock.c --- a/fs/dlm/lock.c~git-dlm +++ a/fs/dlm/lock.c @@ -165,7 +165,7 @@ void dlm_print_lkb(struct dlm_lkb *lkb) lkb->lkb_grmode, lkb->lkb_wait_type, lkb->lkb_ast_type); } -void dlm_print_rsb(struct dlm_rsb *r) +static void dlm_print_rsb(struct dlm_rsb *r) { printk(KERN_ERR "rsb: nodeid %d flags %lx first %x rlc %d name %s\n", r->res_nodeid, r->res_flags, r->res_first_lkid, @@ -1956,8 +1956,7 @@ static void confirm_master(struct dlm_rs list_del_init(&lkb->lkb_rsb_lookup); r->res_first_lkid = lkb->lkb_id; _request_lock(r, lkb); - } else - r->res_nodeid = -1; + } break; default: diff -puN fs/dlm/lock.h~git-dlm fs/dlm/lock.h --- a/fs/dlm/lock.h~git-dlm +++ a/fs/dlm/lock.h @@ -13,7 +13,6 @@ #ifndef __LOCK_DOT_H__ #define __LOCK_DOT_H__ -void dlm_print_rsb(struct dlm_rsb *r); void dlm_dump_rsb(struct dlm_rsb *r); void dlm_print_lkb(struct dlm_lkb *lkb); void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms); diff -puN fs/dlm/main.c~git-dlm fs/dlm/main.c --- a/fs/dlm/main.c~git-dlm +++ a/fs/dlm/main.c @@ -46,10 +46,16 @@ static int __init init_dlm(void) if (error) goto out_user; + error = dlm_plock_init(); + if (error) + goto out_netlink; + printk("DLM (built %s %s) installed\n", __DATE__, __TIME__); return 0; + out_netlink: + dlm_netlink_exit(); out_user: dlm_user_exit(); out_debug: @@ -66,6 +72,7 @@ static int __init init_dlm(void) static void __exit exit_dlm(void) { + dlm_plock_exit(); dlm_netlink_exit(); dlm_user_exit(); dlm_config_exit(); diff -puN fs/dlm/member.c~git-dlm fs/dlm/member.c --- a/fs/dlm/member.c~git-dlm +++ a/fs/dlm/member.c @@ -210,6 +210,23 @@ int dlm_recover_members(struct dlm_ls *l } } + /* Add an entry to ls_nodes_gone for members that were removed and + then added again, so that previous state for these nodes will be + cleared during recovery. */ + + for (i = 0; i < rv->new_count; i++) { + if (!dlm_is_member(ls, rv->new[i])) + continue; + log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]); + + memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL); + if (!memb) + return -ENOMEM; + memb->nodeid = rv->new[i]; + list_add_tail(&memb->list, &ls->ls_nodes_gone); + neg++; + } + /* add new members to ls_nodes */ for (i = 0; i < rv->node_count; i++) { @@ -314,15 +331,16 @@ int dlm_ls_stop(struct dlm_ls *ls) int dlm_ls_start(struct dlm_ls *ls) { struct dlm_recover *rv = NULL, *rv_old; - int *ids = NULL; - int error, count; + int *ids = NULL, *new = NULL; + int error, ids_count = 0, new_count = 0; rv = kzalloc(sizeof(struct dlm_recover), GFP_KERNEL); if (!rv) return -ENOMEM; - error = count = dlm_nodeid_list(ls->ls_name, &ids); - if (error <= 0) + error = dlm_nodeid_list(ls->ls_name, &ids, &ids_count, + &new, &new_count); + if (error < 0) goto fail; spin_lock(&ls->ls_recover_lock); @@ -337,14 +355,19 @@ int dlm_ls_start(struct dlm_ls *ls) } rv->nodeids = ids; - rv->node_count = count; + rv->node_count = ids_count; + rv->new = new; + rv->new_count = new_count; rv->seq = ++ls->ls_recover_seq; rv_old = ls->ls_recover_args; ls->ls_recover_args = rv; spin_unlock(&ls->ls_recover_lock); if (rv_old) { + log_error(ls, "unused recovery %llx %d", + (unsigned long long)rv_old->seq, rv_old->node_count); kfree(rv_old->nodeids); + kfree(rv_old->new); kfree(rv_old); } @@ -354,6 +377,7 @@ int dlm_ls_start(struct dlm_ls *ls) fail: kfree(rv); kfree(ids); + kfree(new); return error; } diff -puN /dev/null fs/dlm/plock.c --- /dev/null +++ a/fs/dlm/plock.c @@ -0,0 +1,439 @@ +/* + * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License version 2. + */ + +#include +#include +#include +#include +#include + +#include "dlm_internal.h" +#include "lockspace.h" + +static spinlock_t ops_lock; +static struct list_head send_list; +static struct list_head recv_list; +static wait_queue_head_t send_wq; +static wait_queue_head_t recv_wq; + +struct plock_op { + struct list_head list; + int done; + struct dlm_plock_info info; +}; + +struct plock_xop { + struct plock_op xop; + void *callback; + void *fl; + void *file; + struct file_lock flc; +}; + + +static inline void set_version(struct dlm_plock_info *info) +{ + info->version[0] = DLM_PLOCK_VERSION_MAJOR; + info->version[1] = DLM_PLOCK_VERSION_MINOR; + info->version[2] = DLM_PLOCK_VERSION_PATCH; +} + +static int check_version(struct dlm_plock_info *info) +{ + if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) || + (DLM_PLOCK_VERSION_MINOR < info->version[1])) { + log_print("plock device version mismatch: " + "kernel (%u.%u.%u), user (%u.%u.%u)", + DLM_PLOCK_VERSION_MAJOR, + DLM_PLOCK_VERSION_MINOR, + DLM_PLOCK_VERSION_PATCH, + info->version[0], + info->version[1], + info->version[2]); + return -EINVAL; + } + return 0; +} + +static void send_op(struct plock_op *op) +{ + set_version(&op->info); + INIT_LIST_HEAD(&op->list); + spin_lock(&ops_lock); + list_add_tail(&op->list, &send_list); + spin_unlock(&ops_lock); + wake_up(&send_wq); +} + +int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, + int cmd, struct file_lock *fl) +{ + struct dlm_ls *ls; + struct plock_op *op; + struct plock_xop *xop; + int rv; + + ls = dlm_find_lockspace_local(lockspace); + if (!ls) + return -EINVAL; + + xop = kzalloc(sizeof(*xop), GFP_KERNEL); + if (!xop) { + rv = -ENOMEM; + goto out; + } + + op = &xop->xop; + op->info.optype = DLM_PLOCK_OP_LOCK; + op->info.pid = fl->fl_pid; + op->info.ex = (fl->fl_type == F_WRLCK); + op->info.wait = IS_SETLKW(cmd); + op->info.fsid = ls->ls_global_id; + op->info.number = number; + op->info.start = fl->fl_start; + op->info.end = fl->fl_end; + if (fl->fl_lmops && fl->fl_lmops->fl_grant) { + /* fl_owner is lockd which doesn't distinguish + processes on the nfs client */ + op->info.owner = (__u64) fl->fl_pid; + xop->callback = fl->fl_lmops->fl_grant; + locks_init_lock(&xop->flc); + locks_copy_lock(&xop->flc, fl); + xop->fl = fl; + xop->file = file; + } else { + op->info.owner = (__u64)(long) fl->fl_owner; + xop->callback = NULL; + } + + send_op(op); + + if (xop->callback == NULL) + wait_event(recv_wq, (op->done != 0)); + else { + rv = -EINPROGRESS; + goto out; + } + + spin_lock(&ops_lock); + if (!list_empty(&op->list)) { + log_error(ls, "dlm_posix_lock: op on list %llx", + (unsigned long long)number); + list_del(&op->list); + } + spin_unlock(&ops_lock); + + rv = op->info.rv; + + if (!rv) { + if (posix_lock_file_wait(file, fl) < 0) + log_error(ls, "dlm_posix_lock: vfs lock error %llx", + (unsigned long long)number); + } + + kfree(xop); +out: + dlm_put_lockspace(ls); + return rv; +} +EXPORT_SYMBOL_GPL(dlm_posix_lock); + +/* Returns failure iff a succesful lock operation should be canceled */ +static int dlm_plock_callback(struct plock_op *op) +{ + struct file *file; + struct file_lock *fl; + struct file_lock *flc; + int (*notify)(void *, void *, int) = NULL; + struct plock_xop *xop = (struct plock_xop *)op; + int rv = 0; + + spin_lock(&ops_lock); + if (!list_empty(&op->list)) { + log_print("dlm_plock_callback: op on list %llx", + (unsigned long long)op->info.number); + list_del(&op->list); + } + spin_unlock(&ops_lock); + + /* check if the following 2 are still valid or make a copy */ + file = xop->file; + flc = &xop->flc; + fl = xop->fl; + notify = xop->callback; + + if (op->info.rv) { + notify(flc, NULL, op->info.rv); + goto out; + } + + /* got fs lock; bookkeep locally as well: */ + flc->fl_flags &= ~FL_SLEEP; + if (posix_lock_file(file, flc, NULL)) { + /* + * This can only happen in the case of kmalloc() failure. + * The filesystem's own lock is the authoritative lock, + * so a failure to get the lock locally is not a disaster. + * As long as the fs cannot reliably cancel locks (especially + * in a low-memory situation), we're better off ignoring + * this failure than trying to recover. + */ + log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p", + (unsigned long long)op->info.number, file, fl); + } + + rv = notify(flc, NULL, 0); + if (rv) { + /* XXX: We need to cancel the fs lock here: */ + log_print("dlm_plock_callback: lock granted after lock request " + "failed; dangling lock!\n"); + goto out; + } + +out: + kfree(xop); + return rv; +} + +int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file, + struct file_lock *fl) +{ + struct dlm_ls *ls; + struct plock_op *op; + int rv; + + ls = dlm_find_lockspace_local(lockspace); + if (!ls) + return -EINVAL; + + op = kzalloc(sizeof(*op), GFP_KERNEL); + if (!op) { + rv = -ENOMEM; + goto out; + } + + if (posix_lock_file_wait(file, fl) < 0) + log_error(ls, "dlm_posix_unlock: vfs unlock error %llx", + (unsigned long long)number); + + op->info.optype = DLM_PLOCK_OP_UNLOCK; + op->info.pid = fl->fl_pid; + op->info.fsid = ls->ls_global_id; + op->info.number = number; + op->info.start = fl->fl_start; + op->info.end = fl->fl_end; + if (fl->fl_lmops && fl->fl_lmops->fl_grant) + op->info.owner = (__u64) fl->fl_pid; + else + op->info.owner = (__u64)(long) fl->fl_owner; + + send_op(op); + wait_event(recv_wq, (op->done != 0)); + + spin_lock(&ops_lock); + if (!list_empty(&op->list)) { + log_error(ls, "dlm_posix_unlock: op on list %llx", + (unsigned long long)number); + list_del(&op->list); + } + spin_unlock(&ops_lock); + + rv = op->info.rv; + + if (rv == -ENOENT) + rv = 0; + + kfree(op); +out: + dlm_put_lockspace(ls); + return rv; +} +EXPORT_SYMBOL_GPL(dlm_posix_unlock); + +int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file, + struct file_lock *fl) +{ + struct dlm_ls *ls; + struct plock_op *op; + int rv; + + ls = dlm_find_lockspace_local(lockspace); + if (!ls) + return -EINVAL; + + op = kzalloc(sizeof(*op), GFP_KERNEL); + if (!op) { + rv = -ENOMEM; + goto out; + } + + op->info.optype = DLM_PLOCK_OP_GET; + op->info.pid = fl->fl_pid; + op->info.ex = (fl->fl_type == F_WRLCK); + op->info.fsid = ls->ls_global_id; + op->info.number = number; + op->info.start = fl->fl_start; + op->info.end = fl->fl_end; + if (fl->fl_lmops && fl->fl_lmops->fl_grant) + op->info.owner = (__u64) fl->fl_pid; + else + op->info.owner = (__u64)(long) fl->fl_owner; + + send_op(op); + wait_event(recv_wq, (op->done != 0)); + + spin_lock(&ops_lock); + if (!list_empty(&op->list)) { + log_error(ls, "dlm_posix_get: op on list %llx", + (unsigned long long)number); + list_del(&op->list); + } + spin_unlock(&ops_lock); + + /* info.rv from userspace is 1 for conflict, 0 for no-conflict, + -ENOENT if there are no locks on the file */ + + rv = op->info.rv; + + fl->fl_type = F_UNLCK; + if (rv == -ENOENT) + rv = 0; + else if (rv > 0) { + fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK; + fl->fl_pid = op->info.pid; + fl->fl_start = op->info.start; + fl->fl_end = op->info.end; + rv = 0; + } + + kfree(op); +out: + dlm_put_lockspace(ls); + return rv; +} +EXPORT_SYMBOL_GPL(dlm_posix_get); + +/* a read copies out one plock request from the send list */ +static ssize_t dev_read(struct file *file, char __user *u, size_t count, + loff_t *ppos) +{ + struct dlm_plock_info info; + struct plock_op *op = NULL; + + if (count < sizeof(info)) + return -EINVAL; + + spin_lock(&ops_lock); + if (!list_empty(&send_list)) { + op = list_entry(send_list.next, struct plock_op, list); + list_move(&op->list, &recv_list); + memcpy(&info, &op->info, sizeof(info)); + } + spin_unlock(&ops_lock); + + if (!op) + return -EAGAIN; + + if (copy_to_user(u, &info, sizeof(info))) + return -EFAULT; + return sizeof(info); +} + +/* a write copies in one plock result that should match a plock_op + on the recv list */ +static ssize_t dev_write(struct file *file, const char __user *u, size_t count, + loff_t *ppos) +{ + struct dlm_plock_info info; + struct plock_op *op; + int found = 0; + + if (count != sizeof(info)) + return -EINVAL; + + if (copy_from_user(&info, u, sizeof(info))) + return -EFAULT; + + if (check_version(&info)) + return -EINVAL; + + spin_lock(&ops_lock); + list_for_each_entry(op, &recv_list, list) { + if (op->info.fsid == info.fsid && op->info.number == info.number && + op->info.owner == info.owner) { + list_del_init(&op->list); + found = 1; + op->done = 1; + memcpy(&op->info, &info, sizeof(info)); + break; + } + } + spin_unlock(&ops_lock); + + if (found) { + struct plock_xop *xop; + xop = (struct plock_xop *)op; + if (xop->callback) + count = dlm_plock_callback(op); + else + wake_up(&recv_wq); + } else + log_print("dev_write no op %x %llx", info.fsid, + (unsigned long long)info.number); + return count; +} + +static unsigned int dev_poll(struct file *file, poll_table *wait) +{ + unsigned int mask = 0; + + poll_wait(file, &send_wq, wait); + + spin_lock(&ops_lock); + if (!list_empty(&send_list)) + mask = POLLIN | POLLRDNORM; + spin_unlock(&ops_lock); + + return mask; +} + +static const struct file_operations dev_fops = { + .read = dev_read, + .write = dev_write, + .poll = dev_poll, + .owner = THIS_MODULE +}; + +static struct miscdevice plock_dev_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = DLM_PLOCK_MISC_NAME, + .fops = &dev_fops +}; + +int dlm_plock_init(void) +{ + int rv; + + spin_lock_init(&ops_lock); + INIT_LIST_HEAD(&send_list); + INIT_LIST_HEAD(&recv_list); + init_waitqueue_head(&send_wq); + init_waitqueue_head(&recv_wq); + + rv = misc_register(&plock_dev_misc); + if (rv) + log_print("dlm_plock_init: misc_register failed %d", rv); + return rv; +} + +void dlm_plock_exit(void) +{ + if (misc_deregister(&plock_dev_misc) < 0) + log_print("dlm_plock_exit: misc_deregister failed"); +} + diff -puN fs/dlm/recoverd.c~git-dlm fs/dlm/recoverd.c --- a/fs/dlm/recoverd.c~git-dlm +++ a/fs/dlm/recoverd.c @@ -257,6 +257,7 @@ static void do_ls_recovery(struct dlm_ls if (rv) { ls_recover(ls, rv); kfree(rv->nodeids); + kfree(rv->new); kfree(rv); } } diff -puN fs/gfs2/locking/dlm/Makefile~git-dlm fs/gfs2/locking/dlm/Makefile --- a/fs/gfs2/locking/dlm/Makefile~git-dlm +++ a/fs/gfs2/locking/dlm/Makefile @@ -1,3 +1,3 @@ obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o -lock_dlm-y := lock.o main.o mount.o sysfs.o thread.o plock.o +lock_dlm-y := lock.o main.o mount.o sysfs.o thread.o diff -puN fs/gfs2/locking/dlm/lock_dlm.h~git-dlm fs/gfs2/locking/dlm/lock_dlm.h --- a/fs/gfs2/locking/dlm/lock_dlm.h~git-dlm +++ a/fs/gfs2/locking/dlm/lock_dlm.h @@ -25,6 +25,7 @@ #include #include +#include #include /* diff -puN fs/gfs2/locking/dlm/main.c~git-dlm fs/gfs2/locking/dlm/main.c --- a/fs/gfs2/locking/dlm/main.c~git-dlm +++ a/fs/gfs2/locking/dlm/main.c @@ -28,13 +28,6 @@ static int __init init_lock_dlm(void) return error; } - error = gdlm_plock_init(); - if (error) { - gdlm_sysfs_exit(); - gfs2_unregister_lockproto(&gdlm_ops); - return error; - } - printk(KERN_INFO "Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__); return 0; @@ -42,7 +35,6 @@ static int __init init_lock_dlm(void) static void __exit exit_lock_dlm(void) { - gdlm_plock_exit(); gdlm_sysfs_exit(); gfs2_unregister_lockproto(&gdlm_ops); } diff -puN fs/gfs2/locking/dlm/mount.c~git-dlm fs/gfs2/locking/dlm/mount.c --- a/fs/gfs2/locking/dlm/mount.c~git-dlm +++ a/fs/gfs2/locking/dlm/mount.c @@ -236,6 +236,27 @@ static void gdlm_withdraw(void *lockspac gdlm_kobject_release(ls); } +static int gdlm_plock(void *lockspace, struct lm_lockname *name, + struct file *file, int cmd, struct file_lock *fl) +{ + struct gdlm_ls *ls = lockspace; + return dlm_posix_lock(ls->dlm_lockspace, name->ln_number, file, cmd, fl); +} + +static int gdlm_punlock(void *lockspace, struct lm_lockname *name, + struct file *file, struct file_lock *fl) +{ + struct gdlm_ls *ls = lockspace; + return dlm_posix_unlock(ls->dlm_lockspace, name->ln_number, file, fl); +} + +static int gdlm_plock_get(void *lockspace, struct lm_lockname *name, + struct file *file, struct file_lock *fl) +{ + struct gdlm_ls *ls = lockspace; + return dlm_posix_get(ls->dlm_lockspace, name->ln_number, file, fl); +} + const struct lm_lockops gdlm_ops = { .lm_proto_name = "lock_dlm", .lm_mount = gdlm_mount, diff -puN fs/gfs2/locking/dlm/plock.c~git-dlm /dev/null --- a/fs/gfs2/locking/dlm/plock.c +++ /dev/null @@ -1,406 +0,0 @@ -/* - * Copyright (C) 2005 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. - */ - -#include -#include -#include - -#include "lock_dlm.h" - - -static spinlock_t ops_lock; -static struct list_head send_list; -static struct list_head recv_list; -static wait_queue_head_t send_wq; -static wait_queue_head_t recv_wq; - -struct plock_op { - struct list_head list; - int done; - struct gdlm_plock_info info; -}; - -struct plock_xop { - struct plock_op xop; - void *callback; - void *fl; - void *file; - struct file_lock flc; -}; - - -static inline void set_version(struct gdlm_plock_info *info) -{ - info->version[0] = GDLM_PLOCK_VERSION_MAJOR; - info->version[1] = GDLM_PLOCK_VERSION_MINOR; - info->version[2] = GDLM_PLOCK_VERSION_PATCH; -} - -static int check_version(struct gdlm_plock_info *info) -{ - if ((GDLM_PLOCK_VERSION_MAJOR != info->version[0]) || - (GDLM_PLOCK_VERSION_MINOR < info->version[1])) { - log_error("plock device version mismatch: " - "kernel (%u.%u.%u), user (%u.%u.%u)", - GDLM_PLOCK_VERSION_MAJOR, - GDLM_PLOCK_VERSION_MINOR, - GDLM_PLOCK_VERSION_PATCH, - info->version[0], - info->version[1], - info->version[2]); - return -EINVAL; - } - return 0; -} - -static void send_op(struct plock_op *op) -{ - set_version(&op->info); - INIT_LIST_HEAD(&op->list); - spin_lock(&ops_lock); - list_add_tail(&op->list, &send_list); - spin_unlock(&ops_lock); - wake_up(&send_wq); -} - -int gdlm_plock(void *lockspace, struct lm_lockname *name, - struct file *file, int cmd, struct file_lock *fl) -{ - struct gdlm_ls *ls = lockspace; - struct plock_op *op; - struct plock_xop *xop; - int rv; - - xop = kzalloc(sizeof(*xop), GFP_KERNEL); - if (!xop) - return -ENOMEM; - - op = &xop->xop; - op->info.optype = GDLM_PLOCK_OP_LOCK; - op->info.pid = fl->fl_pid; - op->info.ex = (fl->fl_type == F_WRLCK); - op->info.wait = IS_SETLKW(cmd); - op->info.fsid = ls->id; - op->info.number = name->ln_number; - op->info.start = fl->fl_start; - op->info.end = fl->fl_end; - if (fl->fl_lmops && fl->fl_lmops->fl_grant) { - /* fl_owner is lockd which doesn't distinguish - processes on the nfs client */ - op->info.owner = (__u64) fl->fl_pid; - xop->callback = fl->fl_lmops->fl_grant; - locks_init_lock(&xop->flc); - locks_copy_lock(&xop->flc, fl); - xop->fl = fl; - xop->file = file; - } else { - op->info.owner = (__u64)(long) fl->fl_owner; - xop->callback = NULL; - } - - send_op(op); - - if (xop->callback == NULL) - wait_event(recv_wq, (op->done != 0)); - else - return -EINPROGRESS; - - spin_lock(&ops_lock); - if (!list_empty(&op->list)) { - printk(KERN_INFO "plock op on list\n"); - list_del(&op->list); - } - spin_unlock(&ops_lock); - - rv = op->info.rv; - - if (!rv) { - if (posix_lock_file_wait(file, fl) < 0) - log_error("gdlm_plock: vfs lock error %x,%llx", - name->ln_type, - (unsigned long long)name->ln_number); - } - - kfree(xop); - return rv; -} - -/* Returns failure iff a succesful lock operation should be canceled */ -static int gdlm_plock_callback(struct plock_op *op) -{ - struct file *file; - struct file_lock *fl; - struct file_lock *flc; - int (*notify)(void *, void *, int) = NULL; - struct plock_xop *xop = (struct plock_xop *)op; - int rv = 0; - - spin_lock(&ops_lock); - if (!list_empty(&op->list)) { - printk(KERN_INFO "plock op on list\n"); - list_del(&op->list); - } - spin_unlock(&ops_lock); - - /* check if the following 2 are still valid or make a copy */ - file = xop->file; - flc = &xop->flc; - fl = xop->fl; - notify = xop->callback; - - if (op->info.rv) { - notify(flc, NULL, op->info.rv); - goto out; - } - - /* got fs lock; bookkeep locally as well: */ - flc->fl_flags &= ~FL_SLEEP; - if (posix_lock_file(file, flc, NULL)) { - /* - * This can only happen in the case of kmalloc() failure. - * The filesystem's own lock is the authoritative lock, - * so a failure to get the lock locally is not a disaster. - * As long as GFS cannot reliably cancel locks (especially - * in a low-memory situation), we're better off ignoring - * this failure than trying to recover. - */ - log_error("gdlm_plock: vfs lock error file %p fl %p", - file, fl); - } - - rv = notify(flc, NULL, 0); - if (rv) { - /* XXX: We need to cancel the fs lock here: */ - printk("gfs2 lock granted after lock request failed;" - " dangling lock!\n"); - goto out; - } - -out: - kfree(xop); - return rv; -} - -int gdlm_punlock(void *lockspace, struct lm_lockname *name, - struct file *file, struct file_lock *fl) -{ - struct gdlm_ls *ls = lockspace; - struct plock_op *op; - int rv; - - op = kzalloc(sizeof(*op), GFP_KERNEL); - if (!op) - return -ENOMEM; - - if (posix_lock_file_wait(file, fl) < 0) - log_error("gdlm_punlock: vfs unlock error %x,%llx", - name->ln_type, (unsigned long long)name->ln_number); - - op->info.optype = GDLM_PLOCK_OP_UNLOCK; - op->info.pid = fl->fl_pid; - op->info.fsid = ls->id; - op->info.number = name->ln_number; - op->info.start = fl->fl_start; - op->info.end = fl->fl_end; - if (fl->fl_lmops && fl->fl_lmops->fl_grant) - op->info.owner = (__u64) fl->fl_pid; - else - op->info.owner = (__u64)(long) fl->fl_owner; - - send_op(op); - wait_event(recv_wq, (op->done != 0)); - - spin_lock(&ops_lock); - if (!list_empty(&op->list)) { - printk(KERN_INFO "punlock op on list\n"); - list_del(&op->list); - } - spin_unlock(&ops_lock); - - rv = op->info.rv; - - if (rv == -ENOENT) - rv = 0; - - kfree(op); - return rv; -} - -int gdlm_plock_get(void *lockspace, struct lm_lockname *name, - struct file *file, struct file_lock *fl) -{ - struct gdlm_ls *ls = lockspace; - struct plock_op *op; - int rv; - - op = kzalloc(sizeof(*op), GFP_KERNEL); - if (!op) - return -ENOMEM; - - op->info.optype = GDLM_PLOCK_OP_GET; - op->info.pid = fl->fl_pid; - op->info.ex = (fl->fl_type == F_WRLCK); - op->info.fsid = ls->id; - op->info.number = name->ln_number; - op->info.start = fl->fl_start; - op->info.end = fl->fl_end; - if (fl->fl_lmops && fl->fl_lmops->fl_grant) - op->info.owner = (__u64) fl->fl_pid; - else - op->info.owner = (__u64)(long) fl->fl_owner; - - send_op(op); - wait_event(recv_wq, (op->done != 0)); - - spin_lock(&ops_lock); - if (!list_empty(&op->list)) { - printk(KERN_INFO "plock_get op on list\n"); - list_del(&op->list); - } - spin_unlock(&ops_lock); - - /* info.rv from userspace is 1 for conflict, 0 for no-conflict, - -ENOENT if there are no locks on the file */ - - rv = op->info.rv; - - fl->fl_type = F_UNLCK; - if (rv == -ENOENT) - rv = 0; - else if (rv > 0) { - fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK; - fl->fl_pid = op->info.pid; - fl->fl_start = op->info.start; - fl->fl_end = op->info.end; - rv = 0; - } - - kfree(op); - return rv; -} - -/* a read copies out one plock request from the send list */ -static ssize_t dev_read(struct file *file, char __user *u, size_t count, - loff_t *ppos) -{ - struct gdlm_plock_info info; - struct plock_op *op = NULL; - - if (count < sizeof(info)) - return -EINVAL; - - spin_lock(&ops_lock); - if (!list_empty(&send_list)) { - op = list_entry(send_list.next, struct plock_op, list); - list_move(&op->list, &recv_list); - memcpy(&info, &op->info, sizeof(info)); - } - spin_unlock(&ops_lock); - - if (!op) - return -EAGAIN; - - if (copy_to_user(u, &info, sizeof(info))) - return -EFAULT; - return sizeof(info); -} - -/* a write copies in one plock result that should match a plock_op - on the recv list */ -static ssize_t dev_write(struct file *file, const char __user *u, size_t count, - loff_t *ppos) -{ - struct gdlm_plock_info info; - struct plock_op *op; - int found = 0; - - if (count != sizeof(info)) - return -EINVAL; - - if (copy_from_user(&info, u, sizeof(info))) - return -EFAULT; - - if (check_version(&info)) - return -EINVAL; - - spin_lock(&ops_lock); - list_for_each_entry(op, &recv_list, list) { - if (op->info.fsid == info.fsid && op->info.number == info.number && - op->info.owner == info.owner) { - list_del_init(&op->list); - found = 1; - op->done = 1; - memcpy(&op->info, &info, sizeof(info)); - break; - } - } - spin_unlock(&ops_lock); - - if (found) { - struct plock_xop *xop; - xop = (struct plock_xop *)op; - if (xop->callback) - count = gdlm_plock_callback(op); - else - wake_up(&recv_wq); - } else - printk(KERN_INFO "gdlm dev_write no op %x %llx\n", info.fsid, - (unsigned long long)info.number); - return count; -} - -static unsigned int dev_poll(struct file *file, poll_table *wait) -{ - unsigned int mask = 0; - - poll_wait(file, &send_wq, wait); - - spin_lock(&ops_lock); - if (!list_empty(&send_list)) - mask = POLLIN | POLLRDNORM; - spin_unlock(&ops_lock); - - return mask; -} - -static const struct file_operations dev_fops = { - .read = dev_read, - .write = dev_write, - .poll = dev_poll, - .owner = THIS_MODULE -}; - -static struct miscdevice plock_dev_misc = { - .minor = MISC_DYNAMIC_MINOR, - .name = GDLM_PLOCK_MISC_NAME, - .fops = &dev_fops -}; - -int gdlm_plock_init(void) -{ - int rv; - - spin_lock_init(&ops_lock); - INIT_LIST_HEAD(&send_list); - INIT_LIST_HEAD(&recv_list); - init_waitqueue_head(&send_wq); - init_waitqueue_head(&recv_wq); - - rv = misc_register(&plock_dev_misc); - if (rv) - printk(KERN_INFO "gdlm_plock_init: misc_register failed %d", - rv); - return rv; -} - -void gdlm_plock_exit(void) -{ - if (misc_deregister(&plock_dev_misc) < 0) - printk(KERN_INFO "gdlm_plock_exit: misc_deregister failed"); -} - diff -puN include/linux/Kbuild~git-dlm include/linux/Kbuild --- a/include/linux/Kbuild~git-dlm +++ a/include/linux/Kbuild @@ -99,7 +99,7 @@ header-y += ixjuser.h header-y += jffs2.h header-y += keyctl.h header-y += limits.h -header-y += lock_dlm_plock.h +header-y += dlm_plock.h header-y += magic.h header-y += major.h header-y += matroxfb.h diff -puN /dev/null include/linux/dlm_plock.h --- /dev/null +++ a/include/linux/dlm_plock.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __DLM_PLOCK_DOT_H__ +#define __DLM_PLOCK_DOT_H__ + +#define DLM_PLOCK_MISC_NAME "dlm_plock" + +#define DLM_PLOCK_VERSION_MAJOR 1 +#define DLM_PLOCK_VERSION_MINOR 1 +#define DLM_PLOCK_VERSION_PATCH 0 + +enum { + DLM_PLOCK_OP_LOCK = 1, + DLM_PLOCK_OP_UNLOCK, + DLM_PLOCK_OP_GET, +}; + +struct dlm_plock_info { + __u32 version[3]; + __u8 optype; + __u8 ex; + __u8 wait; + __u8 pad; + __u32 pid; + __s32 nodeid; + __s32 rv; + __u32 fsid; + __u64 number; + __u64 start; + __u64 end; + __u64 owner; +}; + +#ifdef __KERNEL__ +int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, + int cmd, struct file_lock *fl); +int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file, + struct file_lock *fl); +int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file, + struct file_lock *fl); +#endif /* __KERNEL__ */ + +#endif + diff -puN include/linux/lock_dlm_plock.h~git-dlm /dev/null --- a/include/linux/lock_dlm_plock.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (C) 2005 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - */ - -#ifndef __LOCK_DLM_PLOCK_DOT_H__ -#define __LOCK_DLM_PLOCK_DOT_H__ - -#define GDLM_PLOCK_MISC_NAME "lock_dlm_plock" - -#define GDLM_PLOCK_VERSION_MAJOR 1 -#define GDLM_PLOCK_VERSION_MINOR 1 -#define GDLM_PLOCK_VERSION_PATCH 0 - -enum { - GDLM_PLOCK_OP_LOCK = 1, - GDLM_PLOCK_OP_UNLOCK, - GDLM_PLOCK_OP_GET, -}; - -struct gdlm_plock_info { - __u32 version[3]; - __u8 optype; - __u8 ex; - __u8 wait; - __u8 pad; - __u32 pid; - __s32 nodeid; - __s32 rv; - __u32 fsid; - __u64 number; - __u64 start; - __u64 end; - __u64 owner; -}; - -#endif - _