1 // SPDX-License-Identifier: GPL-2.0-only
2 /******************************************************************************
3 *******************************************************************************
5 ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
6 ** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
9 *******************************************************************************
10 ******************************************************************************/
12 #include <linux/module.h>
14 #include "dlm_internal.h"
15 #include "lockspace.h"
25 #include "requestqueue.h"
30 static struct mutex ls_lock;
31 static struct list_head lslist;
32 static spinlock_t lslist_lock;
33 static struct task_struct * scand_task;
36 static ssize_t dlm_control_store(struct dlm_ls *ls, const char *buf, size_t len)
40 int rc = kstrtoint(buf, 0, &n);
44 ls = dlm_find_lockspace_local(ls->ls_local_handle);
58 dlm_put_lockspace(ls);
62 static ssize_t dlm_event_store(struct dlm_ls *ls, const char *buf, size_t len)
64 int rc = kstrtoint(buf, 0, &ls->ls_uevent_result);
68 set_bit(LSFL_UEVENT_WAIT, &ls->ls_flags);
69 wake_up(&ls->ls_uevent_wait);
73 static ssize_t dlm_id_show(struct dlm_ls *ls, char *buf)
75 return snprintf(buf, PAGE_SIZE, "%u\n", ls->ls_global_id);
78 static ssize_t dlm_id_store(struct dlm_ls *ls, const char *buf, size_t len)
80 int rc = kstrtouint(buf, 0, &ls->ls_global_id);
87 static ssize_t dlm_nodir_show(struct dlm_ls *ls, char *buf)
89 return snprintf(buf, PAGE_SIZE, "%u\n", dlm_no_directory(ls));
92 static ssize_t dlm_nodir_store(struct dlm_ls *ls, const char *buf, size_t len)
95 int rc = kstrtoint(buf, 0, &val);
100 set_bit(LSFL_NODIR, &ls->ls_flags);
104 static ssize_t dlm_recover_status_show(struct dlm_ls *ls, char *buf)
106 uint32_t status = dlm_recover_status(ls);
107 return snprintf(buf, PAGE_SIZE, "%x\n", status);
110 static ssize_t dlm_recover_nodeid_show(struct dlm_ls *ls, char *buf)
112 return snprintf(buf, PAGE_SIZE, "%d\n", ls->ls_recover_nodeid);
116 struct attribute attr;
117 ssize_t (*show)(struct dlm_ls *, char *);
118 ssize_t (*store)(struct dlm_ls *, const char *, size_t);
121 static struct dlm_attr dlm_attr_control = {
122 .attr = {.name = "control", .mode = S_IWUSR},
123 .store = dlm_control_store
126 static struct dlm_attr dlm_attr_event = {
127 .attr = {.name = "event_done", .mode = S_IWUSR},
128 .store = dlm_event_store
131 static struct dlm_attr dlm_attr_id = {
132 .attr = {.name = "id", .mode = S_IRUGO | S_IWUSR},
134 .store = dlm_id_store
137 static struct dlm_attr dlm_attr_nodir = {
138 .attr = {.name = "nodir", .mode = S_IRUGO | S_IWUSR},
139 .show = dlm_nodir_show,
140 .store = dlm_nodir_store
143 static struct dlm_attr dlm_attr_recover_status = {
144 .attr = {.name = "recover_status", .mode = S_IRUGO},
145 .show = dlm_recover_status_show
148 static struct dlm_attr dlm_attr_recover_nodeid = {
149 .attr = {.name = "recover_nodeid", .mode = S_IRUGO},
150 .show = dlm_recover_nodeid_show
153 static struct attribute *dlm_attrs[] = {
154 &dlm_attr_control.attr,
155 &dlm_attr_event.attr,
157 &dlm_attr_nodir.attr,
158 &dlm_attr_recover_status.attr,
159 &dlm_attr_recover_nodeid.attr,
162 ATTRIBUTE_GROUPS(dlm);
164 static ssize_t dlm_attr_show(struct kobject *kobj, struct attribute *attr,
167 struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
168 struct dlm_attr *a = container_of(attr, struct dlm_attr, attr);
169 return a->show ? a->show(ls, buf) : 0;
172 static ssize_t dlm_attr_store(struct kobject *kobj, struct attribute *attr,
173 const char *buf, size_t len)
175 struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
176 struct dlm_attr *a = container_of(attr, struct dlm_attr, attr);
177 return a->store ? a->store(ls, buf, len) : len;
180 static void lockspace_kobj_release(struct kobject *k)
182 struct dlm_ls *ls = container_of(k, struct dlm_ls, ls_kobj);
186 static const struct sysfs_ops dlm_attr_ops = {
187 .show = dlm_attr_show,
188 .store = dlm_attr_store,
191 static struct kobj_type dlm_ktype = {
192 .default_groups = dlm_groups,
193 .sysfs_ops = &dlm_attr_ops,
194 .release = lockspace_kobj_release,
197 static struct kset *dlm_kset;
199 static int do_uevent(struct dlm_ls *ls, int in)
202 kobject_uevent(&ls->ls_kobj, KOBJ_ONLINE);
204 kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE);
206 log_rinfo(ls, "%s the lockspace group...", in ? "joining" : "leaving");
208 /* dlm_controld will see the uevent, do the necessary group management
209 and then write to sysfs to wake us */
211 wait_event(ls->ls_uevent_wait,
212 test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags));
214 log_rinfo(ls, "group event done %d", ls->ls_uevent_result);
216 return ls->ls_uevent_result;
219 static int dlm_uevent(struct kset *kset, struct kobject *kobj,
220 struct kobj_uevent_env *env)
222 struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
224 add_uevent_var(env, "LOCKSPACE=%s", ls->ls_name);
228 static const struct kset_uevent_ops dlm_uevent_ops = {
229 .uevent = dlm_uevent,
232 int __init dlm_lockspace_init(void)
235 mutex_init(&ls_lock);
236 INIT_LIST_HEAD(&lslist);
237 spin_lock_init(&lslist_lock);
239 dlm_kset = kset_create_and_add("dlm", &dlm_uevent_ops, kernel_kobj);
241 printk(KERN_WARNING "%s: can not create kset\n", __func__);
247 void dlm_lockspace_exit(void)
249 kset_unregister(dlm_kset);
252 static struct dlm_ls *find_ls_to_scan(void)
256 spin_lock(&lslist_lock);
257 list_for_each_entry(ls, &lslist, ls_list) {
258 if (time_after_eq(jiffies, ls->ls_scan_time +
259 dlm_config.ci_scan_secs * HZ)) {
260 spin_unlock(&lslist_lock);
264 spin_unlock(&lslist_lock);
268 static int dlm_scand(void *data)
272 while (!kthread_should_stop()) {
273 ls = find_ls_to_scan();
275 if (dlm_lock_recovery_try(ls)) {
276 ls->ls_scan_time = jiffies;
278 dlm_scan_timeout(ls);
279 dlm_scan_waiters(ls);
280 dlm_unlock_recovery(ls);
282 ls->ls_scan_time += HZ;
286 schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
291 static int dlm_scand_start(void)
293 struct task_struct *p;
296 p = kthread_run(dlm_scand, NULL, "dlm_scand");
304 static void dlm_scand_stop(void)
306 kthread_stop(scand_task);
309 struct dlm_ls *dlm_find_lockspace_global(uint32_t id)
313 spin_lock(&lslist_lock);
315 list_for_each_entry(ls, &lslist, ls_list) {
316 if (ls->ls_global_id == id) {
323 spin_unlock(&lslist_lock);
327 struct dlm_ls *dlm_find_lockspace_local(dlm_lockspace_t *lockspace)
331 spin_lock(&lslist_lock);
332 list_for_each_entry(ls, &lslist, ls_list) {
333 if (ls->ls_local_handle == lockspace) {
340 spin_unlock(&lslist_lock);
344 struct dlm_ls *dlm_find_lockspace_device(int minor)
348 spin_lock(&lslist_lock);
349 list_for_each_entry(ls, &lslist, ls_list) {
350 if (ls->ls_device.minor == minor) {
357 spin_unlock(&lslist_lock);
361 void dlm_put_lockspace(struct dlm_ls *ls)
363 spin_lock(&lslist_lock);
365 spin_unlock(&lslist_lock);
368 static void remove_lockspace(struct dlm_ls *ls)
371 spin_lock(&lslist_lock);
372 if (ls->ls_count == 0) {
373 WARN_ON(ls->ls_create_count != 0);
374 list_del(&ls->ls_list);
375 spin_unlock(&lslist_lock);
378 spin_unlock(&lslist_lock);
383 static int threads_start(void)
387 error = dlm_scand_start();
389 log_print("cannot start dlm_scand thread %d", error);
393 /* Thread for sending/receiving messages for all lockspace's */
394 error = dlm_midcomms_start();
396 log_print("cannot start dlm lowcomms %d", error);
408 static int new_lockspace(const char *name, const char *cluster,
409 uint32_t flags, int lvblen,
410 const struct dlm_lockspace_ops *ops, void *ops_arg,
411 int *ops_result, dlm_lockspace_t **lockspace)
416 int namelen = strlen(name);
418 if (namelen > DLM_LOCKSPACE_LEN || namelen == 0)
421 if (!lvblen || (lvblen % 8))
424 if (!try_module_get(THIS_MODULE))
427 if (!dlm_user_daemon_available()) {
428 log_print("dlm user daemon not available");
433 if (ops && ops_result) {
434 if (!dlm_config.ci_recover_callbacks)
435 *ops_result = -EOPNOTSUPP;
441 log_print("dlm cluster name '%s' is being used without an application provided cluster name",
442 dlm_config.ci_cluster_name);
444 if (dlm_config.ci_recover_callbacks && cluster &&
445 strncmp(cluster, dlm_config.ci_cluster_name, DLM_LOCKSPACE_LEN)) {
446 log_print("dlm cluster name '%s' does not match "
447 "the application cluster name '%s'",
448 dlm_config.ci_cluster_name, cluster);
455 spin_lock(&lslist_lock);
456 list_for_each_entry(ls, &lslist, ls_list) {
457 WARN_ON(ls->ls_create_count <= 0);
458 if (ls->ls_namelen != namelen)
460 if (memcmp(ls->ls_name, name, namelen))
462 if (flags & DLM_LSFL_NEWEXCL) {
466 ls->ls_create_count++;
471 spin_unlock(&lslist_lock);
478 ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_NOFS);
481 memcpy(ls->ls_name, name, namelen);
482 ls->ls_namelen = namelen;
483 ls->ls_lvblen = lvblen;
486 ls->ls_scan_time = jiffies;
488 if (ops && dlm_config.ci_recover_callbacks) {
490 ls->ls_ops_arg = ops_arg;
493 if (flags & DLM_LSFL_TIMEWARN)
494 set_bit(LSFL_TIMEWARN, &ls->ls_flags);
496 /* ls_exflags are forced to match among nodes, and we don't
497 need to require all nodes to have some flags set */
498 ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS |
501 size = READ_ONCE(dlm_config.ci_rsbtbl_size);
502 ls->ls_rsbtbl_size = size;
504 ls->ls_rsbtbl = vmalloc(array_size(size, sizeof(struct dlm_rsbtable)));
507 for (i = 0; i < size; i++) {
508 ls->ls_rsbtbl[i].keep.rb_node = NULL;
509 ls->ls_rsbtbl[i].toss.rb_node = NULL;
510 spin_lock_init(&ls->ls_rsbtbl[i].lock);
513 spin_lock_init(&ls->ls_remove_spin);
515 for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++) {
516 ls->ls_remove_names[i] = kzalloc(DLM_RESNAME_MAXLEN+1,
518 if (!ls->ls_remove_names[i])
522 idr_init(&ls->ls_lkbidr);
523 spin_lock_init(&ls->ls_lkbidr_spin);
525 INIT_LIST_HEAD(&ls->ls_waiters);
526 mutex_init(&ls->ls_waiters_mutex);
527 INIT_LIST_HEAD(&ls->ls_orphans);
528 mutex_init(&ls->ls_orphans_mutex);
529 INIT_LIST_HEAD(&ls->ls_timeout);
530 mutex_init(&ls->ls_timeout_mutex);
532 INIT_LIST_HEAD(&ls->ls_new_rsb);
533 spin_lock_init(&ls->ls_new_rsb_spin);
535 INIT_LIST_HEAD(&ls->ls_nodes);
536 INIT_LIST_HEAD(&ls->ls_nodes_gone);
537 ls->ls_num_nodes = 0;
538 ls->ls_low_nodeid = 0;
539 ls->ls_total_weight = 0;
540 ls->ls_node_array = NULL;
542 memset(&ls->ls_stub_rsb, 0, sizeof(struct dlm_rsb));
543 ls->ls_stub_rsb.res_ls = ls;
545 ls->ls_debug_rsb_dentry = NULL;
546 ls->ls_debug_waiters_dentry = NULL;
548 init_waitqueue_head(&ls->ls_uevent_wait);
549 ls->ls_uevent_result = 0;
550 init_completion(&ls->ls_members_done);
551 ls->ls_members_result = -1;
553 mutex_init(&ls->ls_cb_mutex);
554 INIT_LIST_HEAD(&ls->ls_cb_delay);
556 ls->ls_recoverd_task = NULL;
557 mutex_init(&ls->ls_recoverd_active);
558 spin_lock_init(&ls->ls_recover_lock);
559 spin_lock_init(&ls->ls_rcom_spin);
560 get_random_bytes(&ls->ls_rcom_seq, sizeof(uint64_t));
561 ls->ls_recover_status = 0;
562 ls->ls_recover_seq = 0;
563 ls->ls_recover_args = NULL;
564 init_rwsem(&ls->ls_in_recovery);
565 init_rwsem(&ls->ls_recv_active);
566 INIT_LIST_HEAD(&ls->ls_requestqueue);
567 mutex_init(&ls->ls_requestqueue_mutex);
568 mutex_init(&ls->ls_clear_proc_locks);
570 /* Due backwards compatibility with 3.1 we need to use maximum
571 * possible dlm message size to be sure the message will fit and
572 * not having out of bounds issues. However on sending side 3.2
575 ls->ls_recover_buf = kmalloc(DLM_MAX_SOCKET_BUFSIZE, GFP_NOFS);
576 if (!ls->ls_recover_buf)
580 ls->ls_num_slots = 0;
581 ls->ls_slots_size = 0;
584 INIT_LIST_HEAD(&ls->ls_recover_list);
585 spin_lock_init(&ls->ls_recover_list_lock);
586 idr_init(&ls->ls_recover_idr);
587 spin_lock_init(&ls->ls_recover_idr_lock);
588 ls->ls_recover_list_count = 0;
589 ls->ls_local_handle = ls;
590 init_waitqueue_head(&ls->ls_wait_general);
591 INIT_LIST_HEAD(&ls->ls_root_list);
592 init_rwsem(&ls->ls_root_sem);
594 spin_lock(&lslist_lock);
595 ls->ls_create_count = 1;
596 list_add(&ls->ls_list, &lslist);
597 spin_unlock(&lslist_lock);
599 if (flags & DLM_LSFL_FS) {
600 error = dlm_callback_start(ls);
602 log_error(ls, "can't start dlm_callback %d", error);
607 init_waitqueue_head(&ls->ls_recover_lock_wait);
610 * Once started, dlm_recoverd first looks for ls in lslist, then
611 * initializes ls_in_recovery as locked in "down" mode. We need
612 * to wait for the wakeup from dlm_recoverd because in_recovery
613 * has to start out in down mode.
616 error = dlm_recoverd_start(ls);
618 log_error(ls, "can't start dlm_recoverd %d", error);
622 wait_event(ls->ls_recover_lock_wait,
623 test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags));
625 /* let kobject handle freeing of ls if there's an error */
628 ls->ls_kobj.kset = dlm_kset;
629 error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
633 kobject_uevent(&ls->ls_kobj, KOBJ_ADD);
635 /* This uevent triggers dlm_controld in userspace to add us to the
636 group of nodes that are members of this lockspace (managed by the
637 cluster infrastructure.) Once it's done that, it tells us who the
638 current lockspace members are (via configfs) and then tells the
639 lockspace to start running (via sysfs) in dlm_ls_start(). */
641 error = do_uevent(ls, 1);
645 wait_for_completion(&ls->ls_members_done);
646 error = ls->ls_members_result;
650 dlm_create_debug_file(ls);
652 log_rinfo(ls, "join complete");
658 dlm_clear_members(ls);
659 kfree(ls->ls_node_array);
661 dlm_recoverd_stop(ls);
663 dlm_callback_stop(ls);
665 spin_lock(&lslist_lock);
666 list_del(&ls->ls_list);
667 spin_unlock(&lslist_lock);
668 idr_destroy(&ls->ls_recover_idr);
669 kfree(ls->ls_recover_buf);
671 idr_destroy(&ls->ls_lkbidr);
673 for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++)
674 kfree(ls->ls_remove_names[i]);
675 vfree(ls->ls_rsbtbl);
678 kobject_put(&ls->ls_kobj);
682 module_put(THIS_MODULE);
686 int dlm_new_lockspace(const char *name, const char *cluster,
687 uint32_t flags, int lvblen,
688 const struct dlm_lockspace_ops *ops, void *ops_arg,
689 int *ops_result, dlm_lockspace_t **lockspace)
693 mutex_lock(&ls_lock);
695 error = threads_start();
699 error = new_lockspace(name, cluster, flags, lvblen, ops, ops_arg,
700 ops_result, lockspace);
707 dlm_midcomms_shutdown();
711 mutex_unlock(&ls_lock);
715 static int lkb_idr_is_local(int id, void *p, void *data)
717 struct dlm_lkb *lkb = p;
719 return lkb->lkb_nodeid == 0 && lkb->lkb_grmode != DLM_LOCK_IV;
722 static int lkb_idr_is_any(int id, void *p, void *data)
727 static int lkb_idr_free(int id, void *p, void *data)
729 struct dlm_lkb *lkb = p;
731 if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY)
732 dlm_free_lvb(lkb->lkb_lvbptr);
738 /* NOTE: We check the lkbidr here rather than the resource table.
739 This is because there may be LKBs queued as ASTs that have been unlinked
740 from their RSBs and are pending deletion once the AST has been delivered */
742 static int lockspace_busy(struct dlm_ls *ls, int force)
746 spin_lock(&ls->ls_lkbidr_spin);
748 rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_any, ls);
749 } else if (force == 1) {
750 rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_local, ls);
754 spin_unlock(&ls->ls_lkbidr_spin);
758 static int release_lockspace(struct dlm_ls *ls, int force)
764 busy = lockspace_busy(ls, force);
766 spin_lock(&lslist_lock);
767 if (ls->ls_create_count == 1) {
771 /* remove_lockspace takes ls off lslist */
772 ls->ls_create_count = 0;
775 } else if (ls->ls_create_count > 1) {
776 rv = --ls->ls_create_count;
780 spin_unlock(&lslist_lock);
783 log_debug(ls, "release_lockspace no remove %d", rv);
787 dlm_device_deregister(ls);
789 if (force < 3 && dlm_user_daemon_available())
792 dlm_recoverd_stop(ls);
796 dlm_clear_members(ls);
797 dlm_midcomms_shutdown();
800 dlm_callback_stop(ls);
802 remove_lockspace(ls);
804 dlm_delete_debug_file(ls);
806 idr_destroy(&ls->ls_recover_idr);
807 kfree(ls->ls_recover_buf);
810 * Free all lkb's in idr
813 idr_for_each(&ls->ls_lkbidr, lkb_idr_free, ls);
814 idr_destroy(&ls->ls_lkbidr);
817 * Free all rsb's on rsbtbl[] lists
820 for (i = 0; i < ls->ls_rsbtbl_size; i++) {
821 while ((n = rb_first(&ls->ls_rsbtbl[i].keep))) {
822 rsb = rb_entry(n, struct dlm_rsb, res_hashnode);
823 rb_erase(n, &ls->ls_rsbtbl[i].keep);
827 while ((n = rb_first(&ls->ls_rsbtbl[i].toss))) {
828 rsb = rb_entry(n, struct dlm_rsb, res_hashnode);
829 rb_erase(n, &ls->ls_rsbtbl[i].toss);
834 vfree(ls->ls_rsbtbl);
836 for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++)
837 kfree(ls->ls_remove_names[i]);
839 while (!list_empty(&ls->ls_new_rsb)) {
840 rsb = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb,
842 list_del(&rsb->res_hashchain);
847 * Free structures on any other lists
850 dlm_purge_requestqueue(ls);
851 kfree(ls->ls_recover_args);
852 dlm_clear_members(ls);
853 dlm_clear_members_gone(ls);
854 kfree(ls->ls_node_array);
855 log_rinfo(ls, "release_lockspace final free");
856 kobject_put(&ls->ls_kobj);
857 /* The ls structure will be freed when the kobject is done with */
859 module_put(THIS_MODULE);
864 * Called when a system has released all its locks and is not going to use the
865 * lockspace any longer. We free everything we're managing for this lockspace.
866 * Remaining nodes will go through the recovery process as if we'd died. The
867 * lockspace must continue to function as usual, participating in recoveries,
868 * until this returns.
870 * Force has 4 possible values:
871 * 0 - don't destroy locksapce if it has any LKBs
872 * 1 - destroy lockspace if it has remote LKBs but not if it has local LKBs
873 * 2 - destroy lockspace regardless of LKBs
874 * 3 - destroy lockspace as part of a forced shutdown
877 int dlm_release_lockspace(void *lockspace, int force)
882 ls = dlm_find_lockspace_local(lockspace);
885 dlm_put_lockspace(ls);
887 mutex_lock(&ls_lock);
888 error = release_lockspace(ls, force);
893 mutex_unlock(&ls_lock);
898 void dlm_stop_lockspaces(void)
905 spin_lock(&lslist_lock);
906 list_for_each_entry(ls, &lslist, ls_list) {
907 if (!test_bit(LSFL_RUNNING, &ls->ls_flags)) {
911 spin_unlock(&lslist_lock);
912 log_error(ls, "no userland control daemon, stopping lockspace");
916 spin_unlock(&lslist_lock);
919 log_print("dlm user daemon left %d lockspaces", count);