1 /******************************************************************************
2 *******************************************************************************
4 ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5 ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
7 ** This copyrighted material is made available to anyone wishing to use,
8 ** modify, copy, or redistribute it subject to the terms and conditions
9 ** of the GNU General Public License v.2.
11 *******************************************************************************
12 ******************************************************************************/
14 #include <linux/module.h>
16 #include "dlm_internal.h"
17 #include "lockspace.h"
26 #include "requestqueue.h"
31 static struct mutex ls_lock;
32 static struct list_head lslist;
33 static spinlock_t lslist_lock;
34 static struct task_struct * scand_task;
37 static ssize_t dlm_control_store(struct dlm_ls *ls, const char *buf, size_t len)
41 int rc = kstrtoint(buf, 0, &n);
45 ls = dlm_find_lockspace_local(ls->ls_local_handle);
59 dlm_put_lockspace(ls);
63 static ssize_t dlm_event_store(struct dlm_ls *ls, const char *buf, size_t len)
65 int rc = kstrtoint(buf, 0, &ls->ls_uevent_result);
69 set_bit(LSFL_UEVENT_WAIT, &ls->ls_flags);
70 wake_up(&ls->ls_uevent_wait);
74 static ssize_t dlm_id_show(struct dlm_ls *ls, char *buf)
76 return snprintf(buf, PAGE_SIZE, "%u\n", ls->ls_global_id);
79 static ssize_t dlm_id_store(struct dlm_ls *ls, const char *buf, size_t len)
81 int rc = kstrtouint(buf, 0, &ls->ls_global_id);
88 static ssize_t dlm_nodir_show(struct dlm_ls *ls, char *buf)
90 return snprintf(buf, PAGE_SIZE, "%u\n", dlm_no_directory(ls));
93 static ssize_t dlm_nodir_store(struct dlm_ls *ls, const char *buf, size_t len)
96 int rc = kstrtoint(buf, 0, &val);
101 set_bit(LSFL_NODIR, &ls->ls_flags);
105 static ssize_t dlm_recover_status_show(struct dlm_ls *ls, char *buf)
107 uint32_t status = dlm_recover_status(ls);
108 return snprintf(buf, PAGE_SIZE, "%x\n", status);
111 static ssize_t dlm_recover_nodeid_show(struct dlm_ls *ls, char *buf)
113 return snprintf(buf, PAGE_SIZE, "%d\n", ls->ls_recover_nodeid);
117 struct attribute attr;
118 ssize_t (*show)(struct dlm_ls *, char *);
119 ssize_t (*store)(struct dlm_ls *, const char *, size_t);
122 static struct dlm_attr dlm_attr_control = {
123 .attr = {.name = "control", .mode = S_IWUSR},
124 .store = dlm_control_store
127 static struct dlm_attr dlm_attr_event = {
128 .attr = {.name = "event_done", .mode = S_IWUSR},
129 .store = dlm_event_store
132 static struct dlm_attr dlm_attr_id = {
133 .attr = {.name = "id", .mode = S_IRUGO | S_IWUSR},
135 .store = dlm_id_store
138 static struct dlm_attr dlm_attr_nodir = {
139 .attr = {.name = "nodir", .mode = S_IRUGO | S_IWUSR},
140 .show = dlm_nodir_show,
141 .store = dlm_nodir_store
144 static struct dlm_attr dlm_attr_recover_status = {
145 .attr = {.name = "recover_status", .mode = S_IRUGO},
146 .show = dlm_recover_status_show
149 static struct dlm_attr dlm_attr_recover_nodeid = {
150 .attr = {.name = "recover_nodeid", .mode = S_IRUGO},
151 .show = dlm_recover_nodeid_show
154 static struct attribute *dlm_attrs[] = {
155 &dlm_attr_control.attr,
156 &dlm_attr_event.attr,
158 &dlm_attr_nodir.attr,
159 &dlm_attr_recover_status.attr,
160 &dlm_attr_recover_nodeid.attr,
164 static ssize_t dlm_attr_show(struct kobject *kobj, struct attribute *attr,
167 struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
168 struct dlm_attr *a = container_of(attr, struct dlm_attr, attr);
169 return a->show ? a->show(ls, buf) : 0;
172 static ssize_t dlm_attr_store(struct kobject *kobj, struct attribute *attr,
173 const char *buf, size_t len)
175 struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
176 struct dlm_attr *a = container_of(attr, struct dlm_attr, attr);
177 return a->store ? a->store(ls, buf, len) : len;
180 static void lockspace_kobj_release(struct kobject *k)
182 struct dlm_ls *ls = container_of(k, struct dlm_ls, ls_kobj);
186 static const struct sysfs_ops dlm_attr_ops = {
187 .show = dlm_attr_show,
188 .store = dlm_attr_store,
191 static struct kobj_type dlm_ktype = {
192 .default_attrs = dlm_attrs,
193 .sysfs_ops = &dlm_attr_ops,
194 .release = lockspace_kobj_release,
197 static struct kset *dlm_kset;
199 static int do_uevent(struct dlm_ls *ls, int in)
204 kobject_uevent(&ls->ls_kobj, KOBJ_ONLINE);
206 kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE);
208 log_rinfo(ls, "%s the lockspace group...", in ? "joining" : "leaving");
210 /* dlm_controld will see the uevent, do the necessary group management
211 and then write to sysfs to wake us */
213 error = wait_event_interruptible(ls->ls_uevent_wait,
214 test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags));
216 log_rinfo(ls, "group event done %d %d", error, ls->ls_uevent_result);
221 error = ls->ls_uevent_result;
224 log_error(ls, "group %s failed %d %d", in ? "join" : "leave",
225 error, ls->ls_uevent_result);
229 static int dlm_uevent(struct kset *kset, struct kobject *kobj,
230 struct kobj_uevent_env *env)
232 struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
234 add_uevent_var(env, "LOCKSPACE=%s", ls->ls_name);
238 static const struct kset_uevent_ops dlm_uevent_ops = {
239 .uevent = dlm_uevent,
242 int __init dlm_lockspace_init(void)
245 mutex_init(&ls_lock);
246 INIT_LIST_HEAD(&lslist);
247 spin_lock_init(&lslist_lock);
249 dlm_kset = kset_create_and_add("dlm", &dlm_uevent_ops, kernel_kobj);
251 printk(KERN_WARNING "%s: can not create kset\n", __func__);
257 void dlm_lockspace_exit(void)
259 kset_unregister(dlm_kset);
262 static struct dlm_ls *find_ls_to_scan(void)
266 spin_lock(&lslist_lock);
267 list_for_each_entry(ls, &lslist, ls_list) {
268 if (time_after_eq(jiffies, ls->ls_scan_time +
269 dlm_config.ci_scan_secs * HZ)) {
270 spin_unlock(&lslist_lock);
274 spin_unlock(&lslist_lock);
278 static int dlm_scand(void *data)
282 while (!kthread_should_stop()) {
283 ls = find_ls_to_scan();
285 if (dlm_lock_recovery_try(ls)) {
286 ls->ls_scan_time = jiffies;
288 dlm_scan_timeout(ls);
289 dlm_scan_waiters(ls);
290 dlm_unlock_recovery(ls);
292 ls->ls_scan_time += HZ;
296 schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
301 static int dlm_scand_start(void)
303 struct task_struct *p;
306 p = kthread_run(dlm_scand, NULL, "dlm_scand");
314 static void dlm_scand_stop(void)
316 kthread_stop(scand_task);
319 struct dlm_ls *dlm_find_lockspace_global(uint32_t id)
323 spin_lock(&lslist_lock);
325 list_for_each_entry(ls, &lslist, ls_list) {
326 if (ls->ls_global_id == id) {
333 spin_unlock(&lslist_lock);
337 struct dlm_ls *dlm_find_lockspace_local(dlm_lockspace_t *lockspace)
341 spin_lock(&lslist_lock);
342 list_for_each_entry(ls, &lslist, ls_list) {
343 if (ls->ls_local_handle == lockspace) {
350 spin_unlock(&lslist_lock);
354 struct dlm_ls *dlm_find_lockspace_device(int minor)
358 spin_lock(&lslist_lock);
359 list_for_each_entry(ls, &lslist, ls_list) {
360 if (ls->ls_device.minor == minor) {
367 spin_unlock(&lslist_lock);
371 void dlm_put_lockspace(struct dlm_ls *ls)
373 spin_lock(&lslist_lock);
375 spin_unlock(&lslist_lock);
378 static void remove_lockspace(struct dlm_ls *ls)
381 spin_lock(&lslist_lock);
382 if (ls->ls_count == 0) {
383 WARN_ON(ls->ls_create_count != 0);
384 list_del(&ls->ls_list);
385 spin_unlock(&lslist_lock);
388 spin_unlock(&lslist_lock);
393 static int threads_start(void)
397 error = dlm_scand_start();
399 log_print("cannot start dlm_scand thread %d", error);
403 /* Thread for sending/receiving messages for all lockspace's */
404 error = dlm_lowcomms_start();
406 log_print("cannot start dlm lowcomms %d", error);
418 static void threads_stop(void)
424 static int new_lockspace(const char *name, const char *cluster,
425 uint32_t flags, int lvblen,
426 const struct dlm_lockspace_ops *ops, void *ops_arg,
427 int *ops_result, dlm_lockspace_t **lockspace)
432 int namelen = strlen(name);
434 if (namelen > DLM_LOCKSPACE_LEN)
437 if (!lvblen || (lvblen % 8))
440 if (!try_module_get(THIS_MODULE))
443 if (!dlm_user_daemon_available()) {
444 log_print("dlm user daemon not available");
449 if (ops && ops_result) {
450 if (!dlm_config.ci_recover_callbacks)
451 *ops_result = -EOPNOTSUPP;
457 log_print("dlm cluster name '%s' is being used without an application provided cluster name",
458 dlm_config.ci_cluster_name);
460 if (dlm_config.ci_recover_callbacks && cluster &&
461 strncmp(cluster, dlm_config.ci_cluster_name, DLM_LOCKSPACE_LEN)) {
462 log_print("dlm cluster name '%s' does not match "
463 "the application cluster name '%s'",
464 dlm_config.ci_cluster_name, cluster);
471 spin_lock(&lslist_lock);
472 list_for_each_entry(ls, &lslist, ls_list) {
473 WARN_ON(ls->ls_create_count <= 0);
474 if (ls->ls_namelen != namelen)
476 if (memcmp(ls->ls_name, name, namelen))
478 if (flags & DLM_LSFL_NEWEXCL) {
482 ls->ls_create_count++;
487 spin_unlock(&lslist_lock);
494 ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_NOFS);
497 memcpy(ls->ls_name, name, namelen);
498 ls->ls_namelen = namelen;
499 ls->ls_lvblen = lvblen;
502 ls->ls_scan_time = jiffies;
504 if (ops && dlm_config.ci_recover_callbacks) {
506 ls->ls_ops_arg = ops_arg;
509 if (flags & DLM_LSFL_TIMEWARN)
510 set_bit(LSFL_TIMEWARN, &ls->ls_flags);
512 /* ls_exflags are forced to match among nodes, and we don't
513 need to require all nodes to have some flags set */
514 ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS |
517 size = dlm_config.ci_rsbtbl_size;
518 ls->ls_rsbtbl_size = size;
520 ls->ls_rsbtbl = vmalloc(array_size(size, sizeof(struct dlm_rsbtable)));
523 for (i = 0; i < size; i++) {
524 ls->ls_rsbtbl[i].keep.rb_node = NULL;
525 ls->ls_rsbtbl[i].toss.rb_node = NULL;
526 spin_lock_init(&ls->ls_rsbtbl[i].lock);
529 spin_lock_init(&ls->ls_remove_spin);
531 for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++) {
532 ls->ls_remove_names[i] = kzalloc(DLM_RESNAME_MAXLEN+1,
534 if (!ls->ls_remove_names[i])
538 idr_init(&ls->ls_lkbidr);
539 spin_lock_init(&ls->ls_lkbidr_spin);
541 INIT_LIST_HEAD(&ls->ls_waiters);
542 mutex_init(&ls->ls_waiters_mutex);
543 INIT_LIST_HEAD(&ls->ls_orphans);
544 mutex_init(&ls->ls_orphans_mutex);
545 INIT_LIST_HEAD(&ls->ls_timeout);
546 mutex_init(&ls->ls_timeout_mutex);
548 INIT_LIST_HEAD(&ls->ls_new_rsb);
549 spin_lock_init(&ls->ls_new_rsb_spin);
551 INIT_LIST_HEAD(&ls->ls_nodes);
552 INIT_LIST_HEAD(&ls->ls_nodes_gone);
553 ls->ls_num_nodes = 0;
554 ls->ls_low_nodeid = 0;
555 ls->ls_total_weight = 0;
556 ls->ls_node_array = NULL;
558 memset(&ls->ls_stub_rsb, 0, sizeof(struct dlm_rsb));
559 ls->ls_stub_rsb.res_ls = ls;
561 ls->ls_debug_rsb_dentry = NULL;
562 ls->ls_debug_waiters_dentry = NULL;
564 init_waitqueue_head(&ls->ls_uevent_wait);
565 ls->ls_uevent_result = 0;
566 init_completion(&ls->ls_members_done);
567 ls->ls_members_result = -1;
569 mutex_init(&ls->ls_cb_mutex);
570 INIT_LIST_HEAD(&ls->ls_cb_delay);
572 ls->ls_recoverd_task = NULL;
573 mutex_init(&ls->ls_recoverd_active);
574 spin_lock_init(&ls->ls_recover_lock);
575 spin_lock_init(&ls->ls_rcom_spin);
576 get_random_bytes(&ls->ls_rcom_seq, sizeof(uint64_t));
577 ls->ls_recover_status = 0;
578 ls->ls_recover_seq = 0;
579 ls->ls_recover_args = NULL;
580 init_rwsem(&ls->ls_in_recovery);
581 init_rwsem(&ls->ls_recv_active);
582 INIT_LIST_HEAD(&ls->ls_requestqueue);
583 mutex_init(&ls->ls_requestqueue_mutex);
584 mutex_init(&ls->ls_clear_proc_locks);
586 ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_NOFS);
587 if (!ls->ls_recover_buf)
591 ls->ls_num_slots = 0;
592 ls->ls_slots_size = 0;
595 INIT_LIST_HEAD(&ls->ls_recover_list);
596 spin_lock_init(&ls->ls_recover_list_lock);
597 idr_init(&ls->ls_recover_idr);
598 spin_lock_init(&ls->ls_recover_idr_lock);
599 ls->ls_recover_list_count = 0;
600 ls->ls_local_handle = ls;
601 init_waitqueue_head(&ls->ls_wait_general);
602 INIT_LIST_HEAD(&ls->ls_root_list);
603 init_rwsem(&ls->ls_root_sem);
605 spin_lock(&lslist_lock);
606 ls->ls_create_count = 1;
607 list_add(&ls->ls_list, &lslist);
608 spin_unlock(&lslist_lock);
610 if (flags & DLM_LSFL_FS) {
611 error = dlm_callback_start(ls);
613 log_error(ls, "can't start dlm_callback %d", error);
618 init_waitqueue_head(&ls->ls_recover_lock_wait);
621 * Once started, dlm_recoverd first looks for ls in lslist, then
622 * initializes ls_in_recovery as locked in "down" mode. We need
623 * to wait for the wakeup from dlm_recoverd because in_recovery
624 * has to start out in down mode.
627 error = dlm_recoverd_start(ls);
629 log_error(ls, "can't start dlm_recoverd %d", error);
633 wait_event(ls->ls_recover_lock_wait,
634 test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags));
636 ls->ls_kobj.kset = dlm_kset;
637 error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
641 kobject_uevent(&ls->ls_kobj, KOBJ_ADD);
643 /* let kobject handle freeing of ls if there's an error */
646 /* This uevent triggers dlm_controld in userspace to add us to the
647 group of nodes that are members of this lockspace (managed by the
648 cluster infrastructure.) Once it's done that, it tells us who the
649 current lockspace members are (via configfs) and then tells the
650 lockspace to start running (via sysfs) in dlm_ls_start(). */
652 error = do_uevent(ls, 1);
656 wait_for_completion(&ls->ls_members_done);
657 error = ls->ls_members_result;
661 dlm_create_debug_file(ls);
663 log_rinfo(ls, "join complete");
669 dlm_clear_members(ls);
670 kfree(ls->ls_node_array);
672 dlm_recoverd_stop(ls);
674 dlm_callback_stop(ls);
676 spin_lock(&lslist_lock);
677 list_del(&ls->ls_list);
678 spin_unlock(&lslist_lock);
679 idr_destroy(&ls->ls_recover_idr);
680 kfree(ls->ls_recover_buf);
682 idr_destroy(&ls->ls_lkbidr);
683 for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++) {
684 if (ls->ls_remove_names[i])
685 kfree(ls->ls_remove_names[i]);
688 vfree(ls->ls_rsbtbl);
691 kobject_put(&ls->ls_kobj);
695 module_put(THIS_MODULE);
699 int dlm_new_lockspace(const char *name, const char *cluster,
700 uint32_t flags, int lvblen,
701 const struct dlm_lockspace_ops *ops, void *ops_arg,
702 int *ops_result, dlm_lockspace_t **lockspace)
706 mutex_lock(&ls_lock);
708 error = threads_start();
712 error = new_lockspace(name, cluster, flags, lvblen, ops, ops_arg,
713 ops_result, lockspace);
721 mutex_unlock(&ls_lock);
725 static int lkb_idr_is_local(int id, void *p, void *data)
727 struct dlm_lkb *lkb = p;
729 return lkb->lkb_nodeid == 0 && lkb->lkb_grmode != DLM_LOCK_IV;
732 static int lkb_idr_is_any(int id, void *p, void *data)
737 static int lkb_idr_free(int id, void *p, void *data)
739 struct dlm_lkb *lkb = p;
741 if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY)
742 dlm_free_lvb(lkb->lkb_lvbptr);
748 /* NOTE: We check the lkbidr here rather than the resource table.
749 This is because there may be LKBs queued as ASTs that have been unlinked
750 from their RSBs and are pending deletion once the AST has been delivered */
752 static int lockspace_busy(struct dlm_ls *ls, int force)
756 spin_lock(&ls->ls_lkbidr_spin);
758 rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_any, ls);
759 } else if (force == 1) {
760 rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_local, ls);
764 spin_unlock(&ls->ls_lkbidr_spin);
768 static int release_lockspace(struct dlm_ls *ls, int force)
774 busy = lockspace_busy(ls, force);
776 spin_lock(&lslist_lock);
777 if (ls->ls_create_count == 1) {
781 /* remove_lockspace takes ls off lslist */
782 ls->ls_create_count = 0;
785 } else if (ls->ls_create_count > 1) {
786 rv = --ls->ls_create_count;
790 spin_unlock(&lslist_lock);
793 log_debug(ls, "release_lockspace no remove %d", rv);
797 dlm_device_deregister(ls);
799 if (force < 3 && dlm_user_daemon_available())
802 dlm_recoverd_stop(ls);
804 dlm_callback_stop(ls);
806 remove_lockspace(ls);
808 dlm_delete_debug_file(ls);
810 kfree(ls->ls_recover_buf);
813 * Free all lkb's in idr
816 idr_for_each(&ls->ls_lkbidr, lkb_idr_free, ls);
817 idr_destroy(&ls->ls_lkbidr);
820 * Free all rsb's on rsbtbl[] lists
823 for (i = 0; i < ls->ls_rsbtbl_size; i++) {
824 while ((n = rb_first(&ls->ls_rsbtbl[i].keep))) {
825 rsb = rb_entry(n, struct dlm_rsb, res_hashnode);
826 rb_erase(n, &ls->ls_rsbtbl[i].keep);
830 while ((n = rb_first(&ls->ls_rsbtbl[i].toss))) {
831 rsb = rb_entry(n, struct dlm_rsb, res_hashnode);
832 rb_erase(n, &ls->ls_rsbtbl[i].toss);
837 vfree(ls->ls_rsbtbl);
839 for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++)
840 kfree(ls->ls_remove_names[i]);
842 while (!list_empty(&ls->ls_new_rsb)) {
843 rsb = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb,
845 list_del(&rsb->res_hashchain);
850 * Free structures on any other lists
853 dlm_purge_requestqueue(ls);
854 kfree(ls->ls_recover_args);
855 dlm_clear_members(ls);
856 dlm_clear_members_gone(ls);
857 kfree(ls->ls_node_array);
858 log_rinfo(ls, "release_lockspace final free");
859 kobject_put(&ls->ls_kobj);
860 /* The ls structure will be freed when the kobject is done with */
862 module_put(THIS_MODULE);
867 * Called when a system has released all its locks and is not going to use the
868 * lockspace any longer. We free everything we're managing for this lockspace.
869 * Remaining nodes will go through the recovery process as if we'd died. The
870 * lockspace must continue to function as usual, participating in recoveries,
871 * until this returns.
873 * Force has 4 possible values:
874 * 0 - don't destroy locksapce if it has any LKBs
875 * 1 - destroy lockspace if it has remote LKBs but not if it has local LKBs
876 * 2 - destroy lockspace regardless of LKBs
877 * 3 - destroy lockspace as part of a forced shutdown
880 int dlm_release_lockspace(void *lockspace, int force)
885 ls = dlm_find_lockspace_local(lockspace);
888 dlm_put_lockspace(ls);
890 mutex_lock(&ls_lock);
891 error = release_lockspace(ls, force);
896 mutex_unlock(&ls_lock);
901 void dlm_stop_lockspaces(void)
908 spin_lock(&lslist_lock);
909 list_for_each_entry(ls, &lslist, ls_list) {
910 if (!test_bit(LSFL_RUNNING, &ls->ls_flags)) {
914 spin_unlock(&lslist_lock);
915 log_error(ls, "no userland control daemon, stopping lockspace");
919 spin_unlock(&lslist_lock);
922 log_print("dlm user daemon left %d lockspaces", count);