1 /******************************************************************************
2 *******************************************************************************
4 ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5 ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
7 ** This copyrighted material is made available to anyone wishing to use,
8 ** modify, copy, or redistribute it subject to the terms and conditions
9 ** of the GNU General Public License v.2.
11 *******************************************************************************
12 ******************************************************************************/
14 #include "dlm_internal.h"
15 #include "lockspace.h"
24 #include "requestqueue.h"
29 static struct mutex ls_lock;
30 static struct list_head lslist;
31 static spinlock_t lslist_lock;
32 static struct task_struct * scand_task;
35 static ssize_t dlm_control_store(struct dlm_ls *ls, const char *buf, size_t len)
38 int n = simple_strtol(buf, NULL, 0);
40 ls = dlm_find_lockspace_local(ls->ls_local_handle);
54 dlm_put_lockspace(ls);
58 static ssize_t dlm_event_store(struct dlm_ls *ls, const char *buf, size_t len)
60 ls->ls_uevent_result = simple_strtol(buf, NULL, 0);
61 set_bit(LSFL_UEVENT_WAIT, &ls->ls_flags);
62 wake_up(&ls->ls_uevent_wait);
66 static ssize_t dlm_id_show(struct dlm_ls *ls, char *buf)
68 return snprintf(buf, PAGE_SIZE, "%u\n", ls->ls_global_id);
71 static ssize_t dlm_id_store(struct dlm_ls *ls, const char *buf, size_t len)
73 ls->ls_global_id = simple_strtoul(buf, NULL, 0);
77 static ssize_t dlm_nodir_show(struct dlm_ls *ls, char *buf)
79 return snprintf(buf, PAGE_SIZE, "%u\n", dlm_no_directory(ls));
82 static ssize_t dlm_nodir_store(struct dlm_ls *ls, const char *buf, size_t len)
84 int val = simple_strtoul(buf, NULL, 0);
86 set_bit(LSFL_NODIR, &ls->ls_flags);
90 static ssize_t dlm_recover_status_show(struct dlm_ls *ls, char *buf)
92 uint32_t status = dlm_recover_status(ls);
93 return snprintf(buf, PAGE_SIZE, "%x\n", status);
96 static ssize_t dlm_recover_nodeid_show(struct dlm_ls *ls, char *buf)
98 return snprintf(buf, PAGE_SIZE, "%d\n", ls->ls_recover_nodeid);
102 struct attribute attr;
103 ssize_t (*show)(struct dlm_ls *, char *);
104 ssize_t (*store)(struct dlm_ls *, const char *, size_t);
107 static struct dlm_attr dlm_attr_control = {
108 .attr = {.name = "control", .mode = S_IWUSR},
109 .store = dlm_control_store
112 static struct dlm_attr dlm_attr_event = {
113 .attr = {.name = "event_done", .mode = S_IWUSR},
114 .store = dlm_event_store
117 static struct dlm_attr dlm_attr_id = {
118 .attr = {.name = "id", .mode = S_IRUGO | S_IWUSR},
120 .store = dlm_id_store
123 static struct dlm_attr dlm_attr_nodir = {
124 .attr = {.name = "nodir", .mode = S_IRUGO | S_IWUSR},
125 .show = dlm_nodir_show,
126 .store = dlm_nodir_store
129 static struct dlm_attr dlm_attr_recover_status = {
130 .attr = {.name = "recover_status", .mode = S_IRUGO},
131 .show = dlm_recover_status_show
134 static struct dlm_attr dlm_attr_recover_nodeid = {
135 .attr = {.name = "recover_nodeid", .mode = S_IRUGO},
136 .show = dlm_recover_nodeid_show
139 static struct attribute *dlm_attrs[] = {
140 &dlm_attr_control.attr,
141 &dlm_attr_event.attr,
143 &dlm_attr_nodir.attr,
144 &dlm_attr_recover_status.attr,
145 &dlm_attr_recover_nodeid.attr,
149 static ssize_t dlm_attr_show(struct kobject *kobj, struct attribute *attr,
152 struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
153 struct dlm_attr *a = container_of(attr, struct dlm_attr, attr);
154 return a->show ? a->show(ls, buf) : 0;
157 static ssize_t dlm_attr_store(struct kobject *kobj, struct attribute *attr,
158 const char *buf, size_t len)
160 struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
161 struct dlm_attr *a = container_of(attr, struct dlm_attr, attr);
162 return a->store ? a->store(ls, buf, len) : len;
165 static void lockspace_kobj_release(struct kobject *k)
167 struct dlm_ls *ls = container_of(k, struct dlm_ls, ls_kobj);
171 static const struct sysfs_ops dlm_attr_ops = {
172 .show = dlm_attr_show,
173 .store = dlm_attr_store,
176 static struct kobj_type dlm_ktype = {
177 .default_attrs = dlm_attrs,
178 .sysfs_ops = &dlm_attr_ops,
179 .release = lockspace_kobj_release,
182 static struct kset *dlm_kset;
184 static int do_uevent(struct dlm_ls *ls, int in)
189 kobject_uevent(&ls->ls_kobj, KOBJ_ONLINE);
191 kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE);
193 log_debug(ls, "%s the lockspace group...", in ? "joining" : "leaving");
195 /* dlm_controld will see the uevent, do the necessary group management
196 and then write to sysfs to wake us */
198 error = wait_event_interruptible(ls->ls_uevent_wait,
199 test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags));
201 log_debug(ls, "group event done %d %d", error, ls->ls_uevent_result);
206 error = ls->ls_uevent_result;
209 log_error(ls, "group %s failed %d %d", in ? "join" : "leave",
210 error, ls->ls_uevent_result);
214 static int dlm_uevent(struct kset *kset, struct kobject *kobj,
215 struct kobj_uevent_env *env)
217 struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
219 add_uevent_var(env, "LOCKSPACE=%s", ls->ls_name);
223 static struct kset_uevent_ops dlm_uevent_ops = {
224 .uevent = dlm_uevent,
227 int __init dlm_lockspace_init(void)
230 mutex_init(&ls_lock);
231 INIT_LIST_HEAD(&lslist);
232 spin_lock_init(&lslist_lock);
234 dlm_kset = kset_create_and_add("dlm", &dlm_uevent_ops, kernel_kobj);
236 printk(KERN_WARNING "%s: can not create kset\n", __func__);
242 void dlm_lockspace_exit(void)
244 kset_unregister(dlm_kset);
247 static struct dlm_ls *find_ls_to_scan(void)
251 spin_lock(&lslist_lock);
252 list_for_each_entry(ls, &lslist, ls_list) {
253 if (time_after_eq(jiffies, ls->ls_scan_time +
254 dlm_config.ci_scan_secs * HZ)) {
255 spin_unlock(&lslist_lock);
259 spin_unlock(&lslist_lock);
263 static int dlm_scand(void *data)
267 while (!kthread_should_stop()) {
268 ls = find_ls_to_scan();
270 if (dlm_lock_recovery_try(ls)) {
271 ls->ls_scan_time = jiffies;
273 dlm_scan_timeout(ls);
274 dlm_scan_waiters(ls);
275 dlm_unlock_recovery(ls);
277 ls->ls_scan_time += HZ;
281 schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
286 static int dlm_scand_start(void)
288 struct task_struct *p;
291 p = kthread_run(dlm_scand, NULL, "dlm_scand");
299 static void dlm_scand_stop(void)
301 kthread_stop(scand_task);
304 struct dlm_ls *dlm_find_lockspace_global(uint32_t id)
308 spin_lock(&lslist_lock);
310 list_for_each_entry(ls, &lslist, ls_list) {
311 if (ls->ls_global_id == id) {
318 spin_unlock(&lslist_lock);
322 struct dlm_ls *dlm_find_lockspace_local(dlm_lockspace_t *lockspace)
326 spin_lock(&lslist_lock);
327 list_for_each_entry(ls, &lslist, ls_list) {
328 if (ls->ls_local_handle == lockspace) {
335 spin_unlock(&lslist_lock);
339 struct dlm_ls *dlm_find_lockspace_device(int minor)
343 spin_lock(&lslist_lock);
344 list_for_each_entry(ls, &lslist, ls_list) {
345 if (ls->ls_device.minor == minor) {
352 spin_unlock(&lslist_lock);
356 void dlm_put_lockspace(struct dlm_ls *ls)
358 spin_lock(&lslist_lock);
360 spin_unlock(&lslist_lock);
363 static void remove_lockspace(struct dlm_ls *ls)
366 spin_lock(&lslist_lock);
367 if (ls->ls_count == 0) {
368 WARN_ON(ls->ls_create_count != 0);
369 list_del(&ls->ls_list);
370 spin_unlock(&lslist_lock);
373 spin_unlock(&lslist_lock);
378 static int threads_start(void)
382 error = dlm_scand_start();
384 log_print("cannot start dlm_scand thread %d", error);
388 /* Thread for sending/receiving messages for all lockspace's */
389 error = dlm_lowcomms_start();
391 log_print("cannot start dlm lowcomms %d", error);
403 static void threads_stop(void)
409 static int new_lockspace(const char *name, const char *cluster,
410 uint32_t flags, int lvblen,
411 const struct dlm_lockspace_ops *ops, void *ops_arg,
412 int *ops_result, dlm_lockspace_t **lockspace)
417 int namelen = strlen(name);
419 if (namelen > DLM_LOCKSPACE_LEN)
422 if (!lvblen || (lvblen % 8))
425 if (!try_module_get(THIS_MODULE))
428 if (!dlm_user_daemon_available()) {
429 log_print("dlm user daemon not available");
434 if (ops && ops_result) {
435 if (!dlm_config.ci_recover_callbacks)
436 *ops_result = -EOPNOTSUPP;
441 if (dlm_config.ci_recover_callbacks && cluster &&
442 strncmp(cluster, dlm_config.ci_cluster_name, DLM_LOCKSPACE_LEN)) {
443 log_print("dlm cluster name %s mismatch %s",
444 dlm_config.ci_cluster_name, cluster);
451 spin_lock(&lslist_lock);
452 list_for_each_entry(ls, &lslist, ls_list) {
453 WARN_ON(ls->ls_create_count <= 0);
454 if (ls->ls_namelen != namelen)
456 if (memcmp(ls->ls_name, name, namelen))
458 if (flags & DLM_LSFL_NEWEXCL) {
462 ls->ls_create_count++;
467 spin_unlock(&lslist_lock);
474 ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_NOFS);
477 memcpy(ls->ls_name, name, namelen);
478 ls->ls_namelen = namelen;
479 ls->ls_lvblen = lvblen;
482 ls->ls_scan_time = jiffies;
484 if (ops && dlm_config.ci_recover_callbacks) {
486 ls->ls_ops_arg = ops_arg;
489 if (flags & DLM_LSFL_TIMEWARN)
490 set_bit(LSFL_TIMEWARN, &ls->ls_flags);
492 /* ls_exflags are forced to match among nodes, and we don't
493 need to require all nodes to have some flags set */
494 ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS |
497 size = dlm_config.ci_rsbtbl_size;
498 ls->ls_rsbtbl_size = size;
500 ls->ls_rsbtbl = vmalloc(sizeof(struct dlm_rsbtable) * size);
503 for (i = 0; i < size; i++) {
504 ls->ls_rsbtbl[i].keep.rb_node = NULL;
505 ls->ls_rsbtbl[i].toss.rb_node = NULL;
506 spin_lock_init(&ls->ls_rsbtbl[i].lock);
509 spin_lock_init(&ls->ls_remove_spin);
511 for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++) {
512 ls->ls_remove_names[i] = kzalloc(DLM_RESNAME_MAXLEN+1,
514 if (!ls->ls_remove_names[i])
518 idr_init(&ls->ls_lkbidr);
519 spin_lock_init(&ls->ls_lkbidr_spin);
521 INIT_LIST_HEAD(&ls->ls_waiters);
522 mutex_init(&ls->ls_waiters_mutex);
523 INIT_LIST_HEAD(&ls->ls_orphans);
524 mutex_init(&ls->ls_orphans_mutex);
525 INIT_LIST_HEAD(&ls->ls_timeout);
526 mutex_init(&ls->ls_timeout_mutex);
528 INIT_LIST_HEAD(&ls->ls_new_rsb);
529 spin_lock_init(&ls->ls_new_rsb_spin);
531 INIT_LIST_HEAD(&ls->ls_nodes);
532 INIT_LIST_HEAD(&ls->ls_nodes_gone);
533 ls->ls_num_nodes = 0;
534 ls->ls_low_nodeid = 0;
535 ls->ls_total_weight = 0;
536 ls->ls_node_array = NULL;
538 memset(&ls->ls_stub_rsb, 0, sizeof(struct dlm_rsb));
539 ls->ls_stub_rsb.res_ls = ls;
541 ls->ls_debug_rsb_dentry = NULL;
542 ls->ls_debug_waiters_dentry = NULL;
544 init_waitqueue_head(&ls->ls_uevent_wait);
545 ls->ls_uevent_result = 0;
546 init_completion(&ls->ls_members_done);
547 ls->ls_members_result = -1;
549 mutex_init(&ls->ls_cb_mutex);
550 INIT_LIST_HEAD(&ls->ls_cb_delay);
552 ls->ls_recoverd_task = NULL;
553 mutex_init(&ls->ls_recoverd_active);
554 spin_lock_init(&ls->ls_recover_lock);
555 spin_lock_init(&ls->ls_rcom_spin);
556 get_random_bytes(&ls->ls_rcom_seq, sizeof(uint64_t));
557 ls->ls_recover_status = 0;
558 ls->ls_recover_seq = 0;
559 ls->ls_recover_args = NULL;
560 init_rwsem(&ls->ls_in_recovery);
561 init_rwsem(&ls->ls_recv_active);
562 INIT_LIST_HEAD(&ls->ls_requestqueue);
563 mutex_init(&ls->ls_requestqueue_mutex);
564 mutex_init(&ls->ls_clear_proc_locks);
566 ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_NOFS);
567 if (!ls->ls_recover_buf)
571 ls->ls_num_slots = 0;
572 ls->ls_slots_size = 0;
575 INIT_LIST_HEAD(&ls->ls_recover_list);
576 spin_lock_init(&ls->ls_recover_list_lock);
577 idr_init(&ls->ls_recover_idr);
578 spin_lock_init(&ls->ls_recover_idr_lock);
579 ls->ls_recover_list_count = 0;
580 ls->ls_local_handle = ls;
581 init_waitqueue_head(&ls->ls_wait_general);
582 INIT_LIST_HEAD(&ls->ls_root_list);
583 init_rwsem(&ls->ls_root_sem);
585 down_write(&ls->ls_in_recovery);
587 spin_lock(&lslist_lock);
588 ls->ls_create_count = 1;
589 list_add(&ls->ls_list, &lslist);
590 spin_unlock(&lslist_lock);
592 if (flags & DLM_LSFL_FS) {
593 error = dlm_callback_start(ls);
595 log_error(ls, "can't start dlm_callback %d", error);
600 /* needs to find ls in lslist */
601 error = dlm_recoverd_start(ls);
603 log_error(ls, "can't start dlm_recoverd %d", error);
607 ls->ls_kobj.kset = dlm_kset;
608 error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
612 kobject_uevent(&ls->ls_kobj, KOBJ_ADD);
614 /* let kobject handle freeing of ls if there's an error */
617 /* This uevent triggers dlm_controld in userspace to add us to the
618 group of nodes that are members of this lockspace (managed by the
619 cluster infrastructure.) Once it's done that, it tells us who the
620 current lockspace members are (via configfs) and then tells the
621 lockspace to start running (via sysfs) in dlm_ls_start(). */
623 error = do_uevent(ls, 1);
627 wait_for_completion(&ls->ls_members_done);
628 error = ls->ls_members_result;
632 dlm_create_debug_file(ls);
634 log_debug(ls, "join complete");
640 dlm_clear_members(ls);
641 kfree(ls->ls_node_array);
643 dlm_recoverd_stop(ls);
645 dlm_callback_stop(ls);
647 spin_lock(&lslist_lock);
648 list_del(&ls->ls_list);
649 spin_unlock(&lslist_lock);
650 idr_destroy(&ls->ls_recover_idr);
651 kfree(ls->ls_recover_buf);
653 idr_destroy(&ls->ls_lkbidr);
654 for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++) {
655 if (ls->ls_remove_names[i])
656 kfree(ls->ls_remove_names[i]);
659 vfree(ls->ls_rsbtbl);
662 kobject_put(&ls->ls_kobj);
666 module_put(THIS_MODULE);
670 int dlm_new_lockspace(const char *name, const char *cluster,
671 uint32_t flags, int lvblen,
672 const struct dlm_lockspace_ops *ops, void *ops_arg,
673 int *ops_result, dlm_lockspace_t **lockspace)
677 mutex_lock(&ls_lock);
679 error = threads_start();
683 error = new_lockspace(name, cluster, flags, lvblen, ops, ops_arg,
684 ops_result, lockspace);
692 mutex_unlock(&ls_lock);
696 static int lkb_idr_is_local(int id, void *p, void *data)
698 struct dlm_lkb *lkb = p;
700 if (!lkb->lkb_nodeid)
705 static int lkb_idr_is_any(int id, void *p, void *data)
710 static int lkb_idr_free(int id, void *p, void *data)
712 struct dlm_lkb *lkb = p;
714 if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY)
715 dlm_free_lvb(lkb->lkb_lvbptr);
721 /* NOTE: We check the lkbidr here rather than the resource table.
722 This is because there may be LKBs queued as ASTs that have been unlinked
723 from their RSBs and are pending deletion once the AST has been delivered */
725 static int lockspace_busy(struct dlm_ls *ls, int force)
729 spin_lock(&ls->ls_lkbidr_spin);
731 rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_any, ls);
732 } else if (force == 1) {
733 rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_local, ls);
737 spin_unlock(&ls->ls_lkbidr_spin);
741 static int release_lockspace(struct dlm_ls *ls, int force)
747 busy = lockspace_busy(ls, force);
749 spin_lock(&lslist_lock);
750 if (ls->ls_create_count == 1) {
754 /* remove_lockspace takes ls off lslist */
755 ls->ls_create_count = 0;
758 } else if (ls->ls_create_count > 1) {
759 rv = --ls->ls_create_count;
763 spin_unlock(&lslist_lock);
766 log_debug(ls, "release_lockspace no remove %d", rv);
770 dlm_device_deregister(ls);
772 if (force < 3 && dlm_user_daemon_available())
775 dlm_recoverd_stop(ls);
777 dlm_callback_stop(ls);
779 remove_lockspace(ls);
781 dlm_delete_debug_file(ls);
783 kfree(ls->ls_recover_buf);
786 * Free all lkb's in idr
789 idr_for_each(&ls->ls_lkbidr, lkb_idr_free, ls);
790 idr_remove_all(&ls->ls_lkbidr);
791 idr_destroy(&ls->ls_lkbidr);
794 * Free all rsb's on rsbtbl[] lists
797 for (i = 0; i < ls->ls_rsbtbl_size; i++) {
798 while ((n = rb_first(&ls->ls_rsbtbl[i].keep))) {
799 rsb = rb_entry(n, struct dlm_rsb, res_hashnode);
800 rb_erase(n, &ls->ls_rsbtbl[i].keep);
804 while ((n = rb_first(&ls->ls_rsbtbl[i].toss))) {
805 rsb = rb_entry(n, struct dlm_rsb, res_hashnode);
806 rb_erase(n, &ls->ls_rsbtbl[i].toss);
811 vfree(ls->ls_rsbtbl);
813 for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++)
814 kfree(ls->ls_remove_names[i]);
816 while (!list_empty(&ls->ls_new_rsb)) {
817 rsb = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb,
819 list_del(&rsb->res_hashchain);
824 * Free structures on any other lists
827 dlm_purge_requestqueue(ls);
828 kfree(ls->ls_recover_args);
829 dlm_clear_members(ls);
830 dlm_clear_members_gone(ls);
831 kfree(ls->ls_node_array);
832 log_debug(ls, "release_lockspace final free");
833 kobject_put(&ls->ls_kobj);
834 /* The ls structure will be freed when the kobject is done with */
836 module_put(THIS_MODULE);
841 * Called when a system has released all its locks and is not going to use the
842 * lockspace any longer. We free everything we're managing for this lockspace.
843 * Remaining nodes will go through the recovery process as if we'd died. The
844 * lockspace must continue to function as usual, participating in recoveries,
845 * until this returns.
847 * Force has 4 possible values:
848 * 0 - don't destroy locksapce if it has any LKBs
849 * 1 - destroy lockspace if it has remote LKBs but not if it has local LKBs
850 * 2 - destroy lockspace regardless of LKBs
851 * 3 - destroy lockspace as part of a forced shutdown
854 int dlm_release_lockspace(void *lockspace, int force)
859 ls = dlm_find_lockspace_local(lockspace);
862 dlm_put_lockspace(ls);
864 mutex_lock(&ls_lock);
865 error = release_lockspace(ls, force);
870 mutex_unlock(&ls_lock);
875 void dlm_stop_lockspaces(void)
880 spin_lock(&lslist_lock);
881 list_for_each_entry(ls, &lslist, ls_list) {
882 if (!test_bit(LSFL_RUNNING, &ls->ls_flags))
884 spin_unlock(&lslist_lock);
885 log_error(ls, "no userland control daemon, stopping lockspace");
889 spin_unlock(&lslist_lock);