1 /******************************************************************************
2 *******************************************************************************
4 ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5 ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
7 ** This copyrighted material is made available to anyone wishing to use,
8 ** modify, copy, or redistribute it subject to the terms and conditions
9 ** of the GNU General Public License v.2.
11 *******************************************************************************
12 ******************************************************************************/
14 #include "dlm_internal.h"
15 #include "lockspace.h"
24 #include "requestqueue.h"
29 static struct mutex ls_lock;
30 static struct list_head lslist;
31 static spinlock_t lslist_lock;
32 static struct task_struct * scand_task;
35 static ssize_t dlm_control_store(struct dlm_ls *ls, const char *buf, size_t len)
38 int n = simple_strtol(buf, NULL, 0);
40 ls = dlm_find_lockspace_local(ls->ls_local_handle);
54 dlm_put_lockspace(ls);
58 static ssize_t dlm_event_store(struct dlm_ls *ls, const char *buf, size_t len)
60 ls->ls_uevent_result = simple_strtol(buf, NULL, 0);
61 set_bit(LSFL_UEVENT_WAIT, &ls->ls_flags);
62 wake_up(&ls->ls_uevent_wait);
66 static ssize_t dlm_id_show(struct dlm_ls *ls, char *buf)
68 return snprintf(buf, PAGE_SIZE, "%u\n", ls->ls_global_id);
71 static ssize_t dlm_id_store(struct dlm_ls *ls, const char *buf, size_t len)
73 ls->ls_global_id = simple_strtoul(buf, NULL, 0);
77 static ssize_t dlm_nodir_show(struct dlm_ls *ls, char *buf)
79 return snprintf(buf, PAGE_SIZE, "%u\n", dlm_no_directory(ls));
82 static ssize_t dlm_nodir_store(struct dlm_ls *ls, const char *buf, size_t len)
84 int val = simple_strtoul(buf, NULL, 0);
86 set_bit(LSFL_NODIR, &ls->ls_flags);
90 static ssize_t dlm_recover_status_show(struct dlm_ls *ls, char *buf)
92 uint32_t status = dlm_recover_status(ls);
93 return snprintf(buf, PAGE_SIZE, "%x\n", status);
96 static ssize_t dlm_recover_nodeid_show(struct dlm_ls *ls, char *buf)
98 return snprintf(buf, PAGE_SIZE, "%d\n", ls->ls_recover_nodeid);
102 struct attribute attr;
103 ssize_t (*show)(struct dlm_ls *, char *);
104 ssize_t (*store)(struct dlm_ls *, const char *, size_t);
107 static struct dlm_attr dlm_attr_control = {
108 .attr = {.name = "control", .mode = S_IWUSR},
109 .store = dlm_control_store
112 static struct dlm_attr dlm_attr_event = {
113 .attr = {.name = "event_done", .mode = S_IWUSR},
114 .store = dlm_event_store
117 static struct dlm_attr dlm_attr_id = {
118 .attr = {.name = "id", .mode = S_IRUGO | S_IWUSR},
120 .store = dlm_id_store
123 static struct dlm_attr dlm_attr_nodir = {
124 .attr = {.name = "nodir", .mode = S_IRUGO | S_IWUSR},
125 .show = dlm_nodir_show,
126 .store = dlm_nodir_store
129 static struct dlm_attr dlm_attr_recover_status = {
130 .attr = {.name = "recover_status", .mode = S_IRUGO},
131 .show = dlm_recover_status_show
134 static struct dlm_attr dlm_attr_recover_nodeid = {
135 .attr = {.name = "recover_nodeid", .mode = S_IRUGO},
136 .show = dlm_recover_nodeid_show
139 static struct attribute *dlm_attrs[] = {
140 &dlm_attr_control.attr,
141 &dlm_attr_event.attr,
143 &dlm_attr_nodir.attr,
144 &dlm_attr_recover_status.attr,
145 &dlm_attr_recover_nodeid.attr,
149 static ssize_t dlm_attr_show(struct kobject *kobj, struct attribute *attr,
152 struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
153 struct dlm_attr *a = container_of(attr, struct dlm_attr, attr);
154 return a->show ? a->show(ls, buf) : 0;
157 static ssize_t dlm_attr_store(struct kobject *kobj, struct attribute *attr,
158 const char *buf, size_t len)
160 struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
161 struct dlm_attr *a = container_of(attr, struct dlm_attr, attr);
162 return a->store ? a->store(ls, buf, len) : len;
165 static void lockspace_kobj_release(struct kobject *k)
167 struct dlm_ls *ls = container_of(k, struct dlm_ls, ls_kobj);
171 static const struct sysfs_ops dlm_attr_ops = {
172 .show = dlm_attr_show,
173 .store = dlm_attr_store,
176 static struct kobj_type dlm_ktype = {
177 .default_attrs = dlm_attrs,
178 .sysfs_ops = &dlm_attr_ops,
179 .release = lockspace_kobj_release,
182 static struct kset *dlm_kset;
184 static int do_uevent(struct dlm_ls *ls, int in)
189 kobject_uevent(&ls->ls_kobj, KOBJ_ONLINE);
191 kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE);
193 log_debug(ls, "%s the lockspace group...", in ? "joining" : "leaving");
195 /* dlm_controld will see the uevent, do the necessary group management
196 and then write to sysfs to wake us */
198 error = wait_event_interruptible(ls->ls_uevent_wait,
199 test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags));
201 log_debug(ls, "group event done %d %d", error, ls->ls_uevent_result);
206 error = ls->ls_uevent_result;
209 log_error(ls, "group %s failed %d %d", in ? "join" : "leave",
210 error, ls->ls_uevent_result);
214 static int dlm_uevent(struct kset *kset, struct kobject *kobj,
215 struct kobj_uevent_env *env)
217 struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
219 add_uevent_var(env, "LOCKSPACE=%s", ls->ls_name);
223 static struct kset_uevent_ops dlm_uevent_ops = {
224 .uevent = dlm_uevent,
227 int __init dlm_lockspace_init(void)
230 mutex_init(&ls_lock);
231 INIT_LIST_HEAD(&lslist);
232 spin_lock_init(&lslist_lock);
234 dlm_kset = kset_create_and_add("dlm", &dlm_uevent_ops, kernel_kobj);
236 printk(KERN_WARNING "%s: can not create kset\n", __func__);
242 void dlm_lockspace_exit(void)
244 kset_unregister(dlm_kset);
247 static struct dlm_ls *find_ls_to_scan(void)
251 spin_lock(&lslist_lock);
252 list_for_each_entry(ls, &lslist, ls_list) {
253 if (time_after_eq(jiffies, ls->ls_scan_time +
254 dlm_config.ci_scan_secs * HZ)) {
255 spin_unlock(&lslist_lock);
259 spin_unlock(&lslist_lock);
263 static int dlm_scand(void *data)
267 while (!kthread_should_stop()) {
268 ls = find_ls_to_scan();
270 if (dlm_lock_recovery_try(ls)) {
271 ls->ls_scan_time = jiffies;
273 dlm_scan_timeout(ls);
274 dlm_scan_waiters(ls);
275 dlm_unlock_recovery(ls);
277 ls->ls_scan_time += HZ;
281 schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
286 static int dlm_scand_start(void)
288 struct task_struct *p;
291 p = kthread_run(dlm_scand, NULL, "dlm_scand");
299 static void dlm_scand_stop(void)
301 kthread_stop(scand_task);
304 struct dlm_ls *dlm_find_lockspace_global(uint32_t id)
308 spin_lock(&lslist_lock);
310 list_for_each_entry(ls, &lslist, ls_list) {
311 if (ls->ls_global_id == id) {
318 spin_unlock(&lslist_lock);
322 struct dlm_ls *dlm_find_lockspace_local(dlm_lockspace_t *lockspace)
326 spin_lock(&lslist_lock);
327 list_for_each_entry(ls, &lslist, ls_list) {
328 if (ls->ls_local_handle == lockspace) {
335 spin_unlock(&lslist_lock);
339 struct dlm_ls *dlm_find_lockspace_device(int minor)
343 spin_lock(&lslist_lock);
344 list_for_each_entry(ls, &lslist, ls_list) {
345 if (ls->ls_device.minor == minor) {
352 spin_unlock(&lslist_lock);
356 void dlm_put_lockspace(struct dlm_ls *ls)
358 spin_lock(&lslist_lock);
360 spin_unlock(&lslist_lock);
363 static void remove_lockspace(struct dlm_ls *ls)
366 spin_lock(&lslist_lock);
367 if (ls->ls_count == 0) {
368 WARN_ON(ls->ls_create_count != 0);
369 list_del(&ls->ls_list);
370 spin_unlock(&lslist_lock);
373 spin_unlock(&lslist_lock);
378 static int threads_start(void)
382 error = dlm_scand_start();
384 log_print("cannot start dlm_scand thread %d", error);
388 /* Thread for sending/receiving messages for all lockspace's */
389 error = dlm_lowcomms_start();
391 log_print("cannot start dlm lowcomms %d", error);
403 static void threads_stop(void)
409 static int new_lockspace(const char *name, const char *cluster,
410 uint32_t flags, int lvblen,
411 const struct dlm_lockspace_ops *ops, void *ops_arg,
412 int *ops_result, dlm_lockspace_t **lockspace)
417 int namelen = strlen(name);
419 if (namelen > DLM_LOCKSPACE_LEN)
422 if (!lvblen || (lvblen % 8))
425 if (!try_module_get(THIS_MODULE))
428 if (!dlm_user_daemon_available()) {
429 log_print("dlm user daemon not available");
434 if (ops && ops_result) {
435 if (!dlm_config.ci_recover_callbacks)
436 *ops_result = -EOPNOTSUPP;
441 if (dlm_config.ci_recover_callbacks && cluster &&
442 strncmp(cluster, dlm_config.ci_cluster_name, DLM_LOCKSPACE_LEN)) {
443 log_print("dlm cluster name %s mismatch %s",
444 dlm_config.ci_cluster_name, cluster);
451 spin_lock(&lslist_lock);
452 list_for_each_entry(ls, &lslist, ls_list) {
453 WARN_ON(ls->ls_create_count <= 0);
454 if (ls->ls_namelen != namelen)
456 if (memcmp(ls->ls_name, name, namelen))
458 if (flags & DLM_LSFL_NEWEXCL) {
462 ls->ls_create_count++;
467 spin_unlock(&lslist_lock);
474 ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_NOFS);
477 memcpy(ls->ls_name, name, namelen);
478 ls->ls_namelen = namelen;
479 ls->ls_lvblen = lvblen;
482 ls->ls_scan_time = jiffies;
484 if (ops && dlm_config.ci_recover_callbacks) {
486 ls->ls_ops_arg = ops_arg;
489 if (flags & DLM_LSFL_TIMEWARN)
490 set_bit(LSFL_TIMEWARN, &ls->ls_flags);
492 /* ls_exflags are forced to match among nodes, and we don't
493 need to require all nodes to have some flags set */
494 ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS |
497 size = dlm_config.ci_rsbtbl_size;
498 ls->ls_rsbtbl_size = size;
500 ls->ls_rsbtbl = vmalloc(sizeof(struct dlm_rsbtable) * size);
503 for (i = 0; i < size; i++) {
504 ls->ls_rsbtbl[i].keep.rb_node = NULL;
505 ls->ls_rsbtbl[i].toss.rb_node = NULL;
506 spin_lock_init(&ls->ls_rsbtbl[i].lock);
509 idr_init(&ls->ls_lkbidr);
510 spin_lock_init(&ls->ls_lkbidr_spin);
512 size = dlm_config.ci_dirtbl_size;
513 ls->ls_dirtbl_size = size;
515 ls->ls_dirtbl = vmalloc(sizeof(struct dlm_dirtable) * size);
518 for (i = 0; i < size; i++) {
519 INIT_LIST_HEAD(&ls->ls_dirtbl[i].list);
520 spin_lock_init(&ls->ls_dirtbl[i].lock);
523 INIT_LIST_HEAD(&ls->ls_waiters);
524 mutex_init(&ls->ls_waiters_mutex);
525 INIT_LIST_HEAD(&ls->ls_orphans);
526 mutex_init(&ls->ls_orphans_mutex);
527 INIT_LIST_HEAD(&ls->ls_timeout);
528 mutex_init(&ls->ls_timeout_mutex);
530 INIT_LIST_HEAD(&ls->ls_new_rsb);
531 spin_lock_init(&ls->ls_new_rsb_spin);
533 INIT_LIST_HEAD(&ls->ls_nodes);
534 INIT_LIST_HEAD(&ls->ls_nodes_gone);
535 ls->ls_num_nodes = 0;
536 ls->ls_low_nodeid = 0;
537 ls->ls_total_weight = 0;
538 ls->ls_node_array = NULL;
540 memset(&ls->ls_stub_rsb, 0, sizeof(struct dlm_rsb));
541 ls->ls_stub_rsb.res_ls = ls;
543 ls->ls_debug_rsb_dentry = NULL;
544 ls->ls_debug_waiters_dentry = NULL;
546 init_waitqueue_head(&ls->ls_uevent_wait);
547 ls->ls_uevent_result = 0;
548 init_completion(&ls->ls_members_done);
549 ls->ls_members_result = -1;
551 mutex_init(&ls->ls_cb_mutex);
552 INIT_LIST_HEAD(&ls->ls_cb_delay);
554 ls->ls_recoverd_task = NULL;
555 mutex_init(&ls->ls_recoverd_active);
556 spin_lock_init(&ls->ls_recover_lock);
557 spin_lock_init(&ls->ls_rcom_spin);
558 get_random_bytes(&ls->ls_rcom_seq, sizeof(uint64_t));
559 ls->ls_recover_status = 0;
560 ls->ls_recover_seq = 0;
561 ls->ls_recover_args = NULL;
562 init_rwsem(&ls->ls_in_recovery);
563 init_rwsem(&ls->ls_recv_active);
564 INIT_LIST_HEAD(&ls->ls_requestqueue);
565 mutex_init(&ls->ls_requestqueue_mutex);
566 mutex_init(&ls->ls_clear_proc_locks);
568 ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_NOFS);
569 if (!ls->ls_recover_buf)
573 ls->ls_num_slots = 0;
574 ls->ls_slots_size = 0;
577 INIT_LIST_HEAD(&ls->ls_recover_list);
578 spin_lock_init(&ls->ls_recover_list_lock);
579 ls->ls_recover_list_count = 0;
580 ls->ls_local_handle = ls;
581 init_waitqueue_head(&ls->ls_wait_general);
582 INIT_LIST_HEAD(&ls->ls_root_list);
583 init_rwsem(&ls->ls_root_sem);
585 down_write(&ls->ls_in_recovery);
587 spin_lock(&lslist_lock);
588 ls->ls_create_count = 1;
589 list_add(&ls->ls_list, &lslist);
590 spin_unlock(&lslist_lock);
592 if (flags & DLM_LSFL_FS) {
593 error = dlm_callback_start(ls);
595 log_error(ls, "can't start dlm_callback %d", error);
600 /* needs to find ls in lslist */
601 error = dlm_recoverd_start(ls);
603 log_error(ls, "can't start dlm_recoverd %d", error);
607 ls->ls_kobj.kset = dlm_kset;
608 error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
612 kobject_uevent(&ls->ls_kobj, KOBJ_ADD);
614 /* let kobject handle freeing of ls if there's an error */
617 /* This uevent triggers dlm_controld in userspace to add us to the
618 group of nodes that are members of this lockspace (managed by the
619 cluster infrastructure.) Once it's done that, it tells us who the
620 current lockspace members are (via configfs) and then tells the
621 lockspace to start running (via sysfs) in dlm_ls_start(). */
623 error = do_uevent(ls, 1);
627 wait_for_completion(&ls->ls_members_done);
628 error = ls->ls_members_result;
632 dlm_create_debug_file(ls);
634 log_debug(ls, "join complete");
640 dlm_clear_members(ls);
641 kfree(ls->ls_node_array);
643 dlm_recoverd_stop(ls);
645 dlm_callback_stop(ls);
647 spin_lock(&lslist_lock);
648 list_del(&ls->ls_list);
649 spin_unlock(&lslist_lock);
650 kfree(ls->ls_recover_buf);
652 vfree(ls->ls_dirtbl);
654 idr_destroy(&ls->ls_lkbidr);
655 vfree(ls->ls_rsbtbl);
658 kobject_put(&ls->ls_kobj);
662 module_put(THIS_MODULE);
666 int dlm_new_lockspace(const char *name, const char *cluster,
667 uint32_t flags, int lvblen,
668 const struct dlm_lockspace_ops *ops, void *ops_arg,
669 int *ops_result, dlm_lockspace_t **lockspace)
673 mutex_lock(&ls_lock);
675 error = threads_start();
679 error = new_lockspace(name, cluster, flags, lvblen, ops, ops_arg,
680 ops_result, lockspace);
688 mutex_unlock(&ls_lock);
692 static int lkb_idr_is_local(int id, void *p, void *data)
694 struct dlm_lkb *lkb = p;
696 if (!lkb->lkb_nodeid)
701 static int lkb_idr_is_any(int id, void *p, void *data)
706 static int lkb_idr_free(int id, void *p, void *data)
708 struct dlm_lkb *lkb = p;
710 if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY)
711 dlm_free_lvb(lkb->lkb_lvbptr);
717 /* NOTE: We check the lkbidr here rather than the resource table.
718 This is because there may be LKBs queued as ASTs that have been unlinked
719 from their RSBs and are pending deletion once the AST has been delivered */
721 static int lockspace_busy(struct dlm_ls *ls, int force)
725 spin_lock(&ls->ls_lkbidr_spin);
727 rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_any, ls);
728 } else if (force == 1) {
729 rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_local, ls);
733 spin_unlock(&ls->ls_lkbidr_spin);
737 static int release_lockspace(struct dlm_ls *ls, int force)
743 busy = lockspace_busy(ls, force);
745 spin_lock(&lslist_lock);
746 if (ls->ls_create_count == 1) {
750 /* remove_lockspace takes ls off lslist */
751 ls->ls_create_count = 0;
754 } else if (ls->ls_create_count > 1) {
755 rv = --ls->ls_create_count;
759 spin_unlock(&lslist_lock);
762 log_debug(ls, "release_lockspace no remove %d", rv);
766 dlm_device_deregister(ls);
768 if (force < 3 && dlm_user_daemon_available())
771 dlm_recoverd_stop(ls);
773 dlm_callback_stop(ls);
775 remove_lockspace(ls);
777 dlm_delete_debug_file(ls);
779 kfree(ls->ls_recover_buf);
782 * Free direntry structs.
786 vfree(ls->ls_dirtbl);
789 * Free all lkb's in idr
792 idr_for_each(&ls->ls_lkbidr, lkb_idr_free, ls);
793 idr_remove_all(&ls->ls_lkbidr);
794 idr_destroy(&ls->ls_lkbidr);
797 * Free all rsb's on rsbtbl[] lists
800 for (i = 0; i < ls->ls_rsbtbl_size; i++) {
801 while ((n = rb_first(&ls->ls_rsbtbl[i].keep))) {
802 rsb = rb_entry(n, struct dlm_rsb, res_hashnode);
803 rb_erase(n, &ls->ls_rsbtbl[i].keep);
807 while ((n = rb_first(&ls->ls_rsbtbl[i].toss))) {
808 rsb = rb_entry(n, struct dlm_rsb, res_hashnode);
809 rb_erase(n, &ls->ls_rsbtbl[i].toss);
814 vfree(ls->ls_rsbtbl);
816 while (!list_empty(&ls->ls_new_rsb)) {
817 rsb = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb,
819 list_del(&rsb->res_hashchain);
824 * Free structures on any other lists
827 dlm_purge_requestqueue(ls);
828 kfree(ls->ls_recover_args);
829 dlm_clear_free_entries(ls);
830 dlm_clear_members(ls);
831 dlm_clear_members_gone(ls);
832 kfree(ls->ls_node_array);
833 log_debug(ls, "release_lockspace final free");
834 kobject_put(&ls->ls_kobj);
835 /* The ls structure will be freed when the kobject is done with */
837 module_put(THIS_MODULE);
842 * Called when a system has released all its locks and is not going to use the
843 * lockspace any longer. We free everything we're managing for this lockspace.
844 * Remaining nodes will go through the recovery process as if we'd died. The
845 * lockspace must continue to function as usual, participating in recoveries,
846 * until this returns.
848 * Force has 4 possible values:
849 * 0 - don't destroy locksapce if it has any LKBs
850 * 1 - destroy lockspace if it has remote LKBs but not if it has local LKBs
851 * 2 - destroy lockspace regardless of LKBs
852 * 3 - destroy lockspace as part of a forced shutdown
855 int dlm_release_lockspace(void *lockspace, int force)
860 ls = dlm_find_lockspace_local(lockspace);
863 dlm_put_lockspace(ls);
865 mutex_lock(&ls_lock);
866 error = release_lockspace(ls, force);
871 mutex_unlock(&ls_lock);
876 void dlm_stop_lockspaces(void)
881 spin_lock(&lslist_lock);
882 list_for_each_entry(ls, &lslist, ls_list) {
883 if (!test_bit(LSFL_RUNNING, &ls->ls_flags))
885 spin_unlock(&lslist_lock);
886 log_error(ls, "no userland control daemon, stopping lockspace");
890 spin_unlock(&lslist_lock);