2 * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
3 * Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
5 * This file is part of LVM2.
7 * This copyrighted material is made available to anyone wishing to use,
8 * modify, copy, or redistribute it subject to the terms and conditions
9 * of the GNU General Public License v.2.
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software Foundation,
13 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 #include "clvmd-common.h"
20 #include "lvm-types.h"
22 #include "clvmd-comms.h"
24 #include "lvm-functions.h"
27 #include "toolcontext.h"
29 #include "lvm-globals.h"
36 static struct cmd_context *cmd = NULL;
37 static struct dm_hash_table *lv_hash = NULL;
38 static pthread_mutex_t lv_hash_lock;
39 static pthread_mutex_t lvm_lock;
40 static char last_error[1024];
47 static const char *decode_full_locking_cmd(uint32_t cmdl)
54 switch (cmdl & LCK_TYPE_MASK) {
78 switch (cmdl & LCK_SCOPE_MASK) {
85 switch (cmdl & LCK_MASK) {
86 case LCK_LV_EXCLUSIVE & LCK_MASK:
87 command = "LCK_LV_EXCLUSIVE";
89 case LCK_LV_SUSPEND & LCK_MASK:
90 command = "LCK_LV_SUSPEND";
92 case LCK_LV_RESUME & LCK_MASK:
93 command = "LCK_LV_RESUME";
95 case LCK_LV_ACTIVATE & LCK_MASK:
96 command = "LCK_LV_ACTIVATE";
98 case LCK_LV_DEACTIVATE & LCK_MASK:
99 command = "LCK_LV_DEACTIVATE";
112 sprintf(buf, "0x%x %s (%s|%s%s%s%s%s%s)", cmdl, command, type, scope,
113 cmdl & LCK_NONBLOCK ? "|NONBLOCK" : "",
114 cmdl & LCK_HOLD ? "|HOLD" : "",
115 cmdl & LCK_LOCAL ? "|LOCAL" : "",
116 cmdl & LCK_CLUSTER_VG ? "|CLUSTER_VG" : "",
117 cmdl & LCK_CACHE ? "|CACHE" : "");
123 * Only processes 8 bits: excludes LCK_CACHE.
125 static const char *decode_locking_cmd(unsigned char cmdl)
127 return decode_full_locking_cmd((uint32_t) cmdl);
130 static const char *decode_flags(unsigned char flags)
132 static char buf[128];
135 len = sprintf(buf, "0x%x ( %s%s%s%s%s)", flags,
136 flags & LCK_PARTIAL_MODE ? "PARTIAL_MODE|" : "",
137 flags & LCK_MIRROR_NOSYNC_MODE ? "MIRROR_NOSYNC|" : "",
138 flags & LCK_DMEVENTD_MONITOR_MODE ? "DMEVENTD_MONITOR|" : "",
139 flags & LCK_ORIGIN_ONLY_MODE ? "ORIGIN_ONLY|" : "",
140 flags & LCK_CONVERT ? "CONVERT|" : "");
150 char *get_last_lvm_error()
156 * Hash lock info helpers
158 static struct lv_info *lookup_info(const char *resource)
162 pthread_mutex_lock(&lv_hash_lock);
163 lvi = dm_hash_lookup(lv_hash, resource);
164 pthread_mutex_unlock(&lv_hash_lock);
169 static void insert_info(const char *resource, struct lv_info *lvi)
171 pthread_mutex_lock(&lv_hash_lock);
172 dm_hash_insert(lv_hash, resource, lvi);
173 pthread_mutex_unlock(&lv_hash_lock);
176 static void remove_info(const char *resource)
178 pthread_mutex_lock(&lv_hash_lock);
179 dm_hash_remove(lv_hash, resource);
180 pthread_mutex_unlock(&lv_hash_lock);
184 * Return the mode a lock is currently held at (or -1 if not held)
186 static int get_current_lock(char *resource)
190 if ((lvi = lookup_info(resource)))
191 return lvi->lock_mode;
199 /* Create hash table for keeping LV locks & status */
200 lv_hash = dm_hash_create(100);
201 pthread_mutex_init(&lv_hash_lock, NULL);
202 pthread_mutex_init(&lvm_lock, NULL);
205 /* Called at shutdown to tidy the lockspace */
206 void destroy_lvhash()
208 struct dm_hash_node *v;
213 pthread_mutex_lock(&lv_hash_lock);
215 dm_hash_iterate(v, lv_hash) {
216 lvi = dm_hash_get_data(lv_hash, v);
217 resource = dm_hash_get_key(lv_hash, v);
219 if ((status = sync_unlock(resource, lvi->lock_id)))
220 DEBUGLOG("unlock_all. unlock failed(%d): %s\n",
221 status, strerror(errno));
225 dm_hash_destroy(lv_hash);
228 pthread_mutex_unlock(&lv_hash_lock);
231 /* Gets a real lock and keeps the info in the hash table */
232 static int hold_lock(char *resource, int mode, int flags)
238 /* Mask off invalid options */
239 flags &= LCKF_NOQUEUE | LCKF_CONVERT;
241 lvi = lookup_info(resource);
243 if (lvi && lvi->lock_mode == mode) {
244 DEBUGLOG("hold_lock, lock mode %d already held\n", mode);
248 /* Only allow explicit conversions */
249 if (lvi && !(flags & LCKF_CONVERT)) {
254 /* Already exists - convert it */
256 sync_lock(resource, mode, flags, &lvi->lock_id);
259 lvi->lock_mode = mode;
262 DEBUGLOG("hold_lock. convert to %d failed: %s\n", mode,
267 lvi = malloc(sizeof(struct lv_info));
271 lvi->lock_mode = mode;
272 status = sync_lock(resource, mode, flags & ~LCKF_CONVERT, &lvi->lock_id);
276 DEBUGLOG("hold_lock. lock at %d failed: %s\n", mode,
279 insert_info(resource, lvi);
286 /* Unlock and remove it from the hash table */
287 static int hold_unlock(char *resource)
293 if (!(lvi = lookup_info(resource))) {
294 DEBUGLOG("hold_unlock, lock not already held\n");
298 status = sync_unlock(resource, lvi->lock_id);
301 remove_info(resource);
304 DEBUGLOG("hold_unlock. unlock failed(%d): %s\n", status,
312 /* Watch the return codes here.
313 liblvm API functions return 1(true) for success, 0(false) for failure and don't set errno.
314 libdlm API functions return 0 for success, -1 for failure and do set errno.
315 These functions here return 0 for success or >0 for failure (where the retcode is errno)
318 /* Activate LV exclusive or non-exclusive */
319 static int do_activate_lv(char *resource, unsigned char lock_flags, int mode)
327 /* Is it already open ? */
328 oldmode = get_current_lock(resource);
329 if (oldmode == mode && (lock_flags & LCK_CLUSTER_VG)) {
330 DEBUGLOG("do_activate_lv, lock already held at %d\n", oldmode);
331 return 0; /* Nothing to do */
334 /* Does the config file want us to activate this LV ? */
335 if (!lv_activation_filter(cmd, resource, &activate_lv))
339 return 0; /* Success, we did nothing! */
341 /* Do we need to activate exclusively? */
342 if ((activate_lv == 2) || (mode == LCK_EXCL)) {
348 * Try to get the lock if it's a clustered volume group.
349 * Use lock conversion only if requested, to prevent implicit conversion
350 * of exclusive lock to shared one during activation.
352 if (lock_flags & LCK_CLUSTER_VG) {
353 status = hold_lock(resource, mode, LCKF_NOQUEUE | (lock_flags & LCK_CONVERT ? LCKF_CONVERT:0));
355 /* Return an LVM-sensible error for this.
356 * Forcing EIO makes the upper level return this text
357 * rather than the strerror text for EAGAIN.
359 if (errno == EAGAIN) {
360 sprintf(last_error, "Volume is busy on another node");
367 /* If it's suspended then resume it */
368 if (!lv_info_by_lvid(cmd, resource, 0, &lvi, 0, 0))
373 if (!lv_resume(cmd, resource, 0)) {
379 /* Now activate it */
380 if (!lv_activate(cmd, resource, exclusive))
386 if (oldmode == -1 || oldmode != mode)
387 (void)hold_unlock(resource);
391 /* Resume the LV if it was active */
392 static int do_resume_lv(char *resource, unsigned char lock_flags)
397 oldmode = get_current_lock(resource);
398 if (oldmode == -1 && (lock_flags & LCK_CLUSTER_VG)) {
399 DEBUGLOG("do_resume_lv, lock not already held\n");
400 return 0; /* We don't need to do anything */
403 if (!lv_resume_if_active(cmd, resource, (lock_flags & LCK_ORIGIN_ONLY_MODE) ? 1 : 0))
409 /* Suspend the device if active */
410 static int do_suspend_lv(char *resource, unsigned char lock_flags)
414 unsigned origin_only = (lock_flags & LCK_ORIGIN_ONLY_MODE) ? 1 : 0;
417 oldmode = get_current_lock(resource);
418 if (oldmode == -1 && (lock_flags & LCK_CLUSTER_VG)) {
419 DEBUGLOG("do_suspend_lv, lock not already held\n");
420 return 0; /* Not active, so it's OK */
423 /* Only suspend it if it exists */
424 if (!lv_info_by_lvid(cmd, resource, origin_only, &lvi, 0, 0))
427 if (lvi.exists && !lv_suspend_if_active(cmd, resource, origin_only))
433 static int do_deactivate_lv(char *resource, unsigned char lock_flags)
439 oldmode = get_current_lock(resource);
440 if (oldmode == -1 && (lock_flags & LCK_CLUSTER_VG)) {
441 DEBUGLOG("do_deactivate_lock, lock not already held\n");
442 return 0; /* We don't need to do anything */
445 if (!lv_deactivate(cmd, resource))
448 if (lock_flags & LCK_CLUSTER_VG) {
449 status = hold_unlock(resource);
457 const char *do_lock_query(char *resource)
460 const char *type = NULL;
462 mode = get_current_lock(resource);
464 case LCK_NULL: type = "NL"; break;
465 case LCK_READ: type = "CR"; break;
466 case LCK_PREAD:type = "PR"; break;
467 case LCK_WRITE:type = "PW"; break;
468 case LCK_EXCL: type = "EX"; break;
471 DEBUGLOG("do_lock_query: resource '%s', mode %i (%s)\n", resource, mode, type ?: "?");
476 /* This is the LOCK_LV part that happens on all nodes in the cluster -
477 it is responsible for the interaction with device-mapper and LVM */
478 int do_lock_lv(unsigned char command, unsigned char lock_flags, char *resource)
482 DEBUGLOG("do_lock_lv: resource '%s', cmd = %s, flags = %s, memlock = %d\n",
483 resource, decode_locking_cmd(command), decode_flags(lock_flags), memlock());
485 if (!cmd->config_valid || config_files_changed(cmd)) {
486 /* Reinitialise various settings inc. logging, filters */
487 if (do_refresh_cache()) {
488 log_error("Updated config file invalid. Aborting.");
493 pthread_mutex_lock(&lvm_lock);
494 if (lock_flags & LCK_MIRROR_NOSYNC_MODE)
495 init_mirror_in_sync(1);
497 if (lock_flags & LCK_DMEVENTD_MONITOR_MODE)
498 init_dmeventd_monitor(1);
500 init_dmeventd_monitor(0);
502 cmd->partial_activation = (lock_flags & LCK_PARTIAL_MODE) ? 1 : 0;
504 /* clvmd should never try to read suspended device */
505 init_ignore_suspended_devices(1);
507 switch (command & LCK_MASK) {
508 case LCK_LV_EXCLUSIVE:
509 status = do_activate_lv(resource, lock_flags, LCK_EXCL);
513 status = do_suspend_lv(resource, lock_flags);
517 case LCK_LV_RESUME: /* if active */
518 status = do_resume_lv(resource, lock_flags);
521 case LCK_LV_ACTIVATE:
522 status = do_activate_lv(resource, lock_flags, LCK_READ);
525 case LCK_LV_DEACTIVATE:
526 status = do_deactivate_lv(resource, lock_flags);
530 DEBUGLOG("Invalid LV command 0x%x\n", command);
535 if (lock_flags & LCK_MIRROR_NOSYNC_MODE)
536 init_mirror_in_sync(0);
538 cmd->partial_activation = 0;
540 /* clean the pool for another command */
541 dm_pool_empty(cmd->mem);
542 pthread_mutex_unlock(&lvm_lock);
544 DEBUGLOG("Command return is %d, memlock is %d\n", status, memlock());
548 /* Functions to do on the local node only BEFORE the cluster-wide stuff above happens */
549 int pre_lock_lv(unsigned char command, unsigned char lock_flags, char *resource)
551 /* Nearly all the stuff happens cluster-wide. Apart from SUSPEND. Here we get the
552 lock out on this node (because we are the node modifying the metadata)
553 before suspending cluster-wide.
554 LCKF_CONVERT is used always, local node is going to modify metadata
556 if ((command & (LCK_SCOPE_MASK | LCK_TYPE_MASK)) == LCK_LV_SUSPEND &&
557 (lock_flags & LCK_CLUSTER_VG)) {
558 DEBUGLOG("pre_lock_lv: resource '%s', cmd = %s, flags = %s\n",
559 resource, decode_locking_cmd(command), decode_flags(lock_flags));
561 if (hold_lock(resource, LCK_WRITE, LCKF_NOQUEUE | LCKF_CONVERT))
567 /* Functions to do on the local node only AFTER the cluster-wide stuff above happens */
568 int post_lock_lv(unsigned char command, unsigned char lock_flags,
572 unsigned origin_only = (lock_flags & LCK_ORIGIN_ONLY_MODE) ? 1 : 0;
574 /* Opposite of above, done on resume after a metadata update */
575 if ((command & (LCK_SCOPE_MASK | LCK_TYPE_MASK)) == LCK_LV_RESUME &&
576 (lock_flags & LCK_CLUSTER_VG)) {
580 ("post_lock_lv: resource '%s', cmd = %s, flags = %s\n",
581 resource, decode_locking_cmd(command), decode_flags(lock_flags));
583 /* If the lock state is PW then restore it to what it was */
584 oldmode = get_current_lock(resource);
585 if (oldmode == LCK_WRITE) {
588 pthread_mutex_lock(&lvm_lock);
589 status = lv_info_by_lvid(cmd, resource, origin_only, &lvi, 0, 0);
590 pthread_mutex_unlock(&lvm_lock);
595 if (hold_lock(resource, LCK_READ, LCKF_CONVERT))
597 } else if (hold_unlock(resource))
604 /* Check if a VG is in use by LVM1 so we don't stomp on it */
605 int do_check_lvm1(const char *vgname)
609 status = check_lvm1_vg_inactive(cmd, vgname);
611 return status == 1 ? 0 : EBUSY;
614 int do_refresh_cache()
616 DEBUGLOG("Refreshing context\n");
617 log_notice("Refreshing context");
619 pthread_mutex_lock(&lvm_lock);
621 if (!refresh_toolcontext(cmd)) {
622 pthread_mutex_unlock(&lvm_lock);
626 init_full_scan_done(0);
627 init_ignore_suspended_devices(1);
628 lvmcache_label_scan(cmd, 2);
629 dm_pool_empty(cmd->mem);
631 pthread_mutex_unlock(&lvm_lock);
637 /* Only called at gulm startup. Drop any leftover VG or P_orphan locks
638 that might be hanging around if we died for any reason
640 static void drop_vg_locks(void)
646 (LVM_PATH " pvs --config 'log{command_names=0 prefix=\"\"}' --nolocking --noheadings -o vg_name", "r");
648 sync_unlock("P_" VG_ORPHANS, LCK_EXCL);
649 sync_unlock("P_" VG_GLOBAL, LCK_EXCL);
654 while (fgets(line, sizeof(line), vgs)) {
658 if (line[strlen(line)-1] == '\n')
659 line[strlen(line)-1] = '\0';
661 vgstart = line + strspn(line, " ");
662 vgend = vgstart + strcspn(vgstart, " ");
665 if (strncmp(vgstart, "WARNING:", 8) == 0)
668 sprintf(vg, "V_%s", vgstart);
669 sync_unlock(vg, LCK_EXCL);
673 DEBUGLOG("vgs fclose failed: %s\n", strerror(errno));
677 * Handle VG lock - drop metadata or update lvmcache state
679 void do_lock_vg(unsigned char command, unsigned char lock_flags, char *resource)
681 uint32_t lock_cmd = command;
682 char *vgname = resource + 2;
684 lock_cmd &= (LCK_SCOPE_MASK | LCK_TYPE_MASK | LCK_HOLD);
687 * Check if LCK_CACHE should be set. All P_ locks except # are cache related.
689 if (strncmp(resource, "P_#", 3) && !strncmp(resource, "P_", 2))
690 lock_cmd |= LCK_CACHE;
692 DEBUGLOG("do_lock_vg: resource '%s', cmd = %s, flags = %s, memlock = %d\n",
693 resource, decode_full_locking_cmd(lock_cmd), decode_flags(lock_flags), memlock());
695 /* P_#global causes a full cache refresh */
696 if (!strcmp(resource, "P_" VG_GLOBAL)) {
701 pthread_mutex_lock(&lvm_lock);
704 DEBUGLOG("vg_commit notification for VG %s\n", vgname);
705 lvmcache_commit_metadata(vgname);
708 DEBUGLOG("vg_revert notification for VG %s\n", vgname);
709 lvmcache_drop_metadata(vgname, 1);
711 case LCK_VG_DROP_CACHE:
713 DEBUGLOG("Invalidating cached metadata for VG %s\n", vgname);
714 lvmcache_drop_metadata(vgname, 0);
716 pthread_mutex_unlock(&lvm_lock);
720 * Compare the uuid with the list of exclusive locks that clvmd
721 * held before it was restarted, so we can get the right kind
722 * of lock now we are restarting.
724 static int was_ex_lock(char *uuid, char **argv)
727 char *opt = argv[optnum];
730 if (strcmp(opt, "-E") == 0) {
731 opt = argv[++optnum];
732 if (opt && (strcmp(opt, uuid) == 0)) {
733 DEBUGLOG("Lock %s is exclusive\n", uuid);
737 opt = argv[++optnum];
743 * Ideally, clvmd should be started before any LVs are active
744 * but this may not be the case...
745 * I suppose this also comes in handy if clvmd crashes, not that it would!
747 static void *get_initial_state(char **argv)
750 char lv[64], vg[64], flags[25], vg_flags[25];
755 (LVM_PATH " lvs --config 'log{command_names=0 prefix=\"\"}' --nolocking --noheadings -o vg_uuid,lv_uuid,lv_attr,vg_attr",
761 while (fgets(line, sizeof(line), lvs)) {
762 if (sscanf(line, "%s %s %s %s\n", vg, lv, flags, vg_flags) == 4) {
764 /* States: s:suspended a:active S:dropped snapshot I:invalid snapshot */
765 if (strlen(vg) == 38 && /* is is a valid UUID ? */
766 (flags[4] == 'a' || flags[4] == 's') && /* is it active or suspended? */
767 vg_flags[5] == 'c') { /* is it clustered ? */
768 /* Convert hyphen-separated UUIDs into one */
769 memcpy(&uuid[0], &vg[0], 6);
770 memcpy(&uuid[6], &vg[7], 4);
771 memcpy(&uuid[10], &vg[12], 4);
772 memcpy(&uuid[14], &vg[17], 4);
773 memcpy(&uuid[18], &vg[22], 4);
774 memcpy(&uuid[22], &vg[27], 4);
775 memcpy(&uuid[26], &vg[32], 6);
776 memcpy(&uuid[32], &lv[0], 6);
777 memcpy(&uuid[38], &lv[7], 4);
778 memcpy(&uuid[42], &lv[12], 4);
779 memcpy(&uuid[46], &lv[17], 4);
780 memcpy(&uuid[50], &lv[22], 4);
781 memcpy(&uuid[54], &lv[27], 4);
782 memcpy(&uuid[58], &lv[32], 6);
785 lock_mode = LCK_READ;
787 /* Look for this lock in the list of EX locks
788 we were passed on the command-line */
789 if (was_ex_lock(uuid, argv))
790 lock_mode = LCK_EXCL;
792 DEBUGLOG("getting initial lock for %s\n", uuid);
793 hold_lock(uuid, lock_mode, LCKF_NOQUEUE);
798 DEBUGLOG("lvs fclose failed: %s\n", strerror(errno));
802 static void lvm2_log_fn(int level, const char *file, int line, int dm_errno,
806 /* Send messages to the normal LVM2 logging system too,
807 so we get debug output when it's asked for.
808 We need to NULL the function ptr otherwise it will just call
811 print_log(level, file, line, dm_errno, "%s", message);
812 init_log_fn(lvm2_log_fn);
815 * Ignore non-error messages, but store the latest one for returning
818 if (level != _LOG_ERR && level != _LOG_FATAL)
821 strncpy(last_error, message, sizeof(last_error));
822 last_error[sizeof(last_error)-1] = '\0';
825 /* This checks some basic cluster-LVM configuration stuff */
826 static void check_config(void)
830 locking_type = find_config_tree_int(cmd, "global/locking_type", 1);
832 if (locking_type == 3) /* compiled-in cluster support */
835 if (locking_type == 2) { /* External library, check name */
838 libname = find_config_tree_str(cmd, "global/locking_library",
840 if (strstr(libname, "liblvm2clusterlock.so"))
843 log_error("Incorrect LVM locking library specified in lvm.conf, cluster operations may not work.");
846 log_error("locking_type not set correctly in lvm.conf, cluster operations will not work.");
849 /* Backups up the LVM metadata if it's changed */
850 void lvm_do_backup(const char *vgname)
852 struct volume_group * vg;
855 DEBUGLOG("Triggering backup of VG metadata for %s.\n", vgname);
857 pthread_mutex_lock(&lvm_lock);
859 vg = vg_read_internal(cmd, vgname, NULL /*vgid*/, 1, &consistent);
861 if (vg && consistent)
862 check_current_backup(vg);
864 log_error("Error backing up metadata, can't find VG for group %s", vgname);
867 dm_pool_empty(cmd->mem);
869 pthread_mutex_unlock(&lvm_lock);
872 struct dm_hash_node *get_next_excl_lock(struct dm_hash_node *v, char **name)
878 v = dm_hash_get_first(lv_hash);
882 lvi = dm_hash_get_data(lv_hash, v);
883 DEBUGLOG("Looking for EX locks. found %x mode %d\n", lvi->lock_id, lvi->lock_mode);
885 if (lvi->lock_mode == LCK_EXCL) {
886 *name = dm_hash_get_key(lv_hash, v);
888 v = dm_hash_get_next(lv_hash, v);
890 } while (v && !*name);
893 DEBUGLOG("returning EXclusive UUID %s\n", *name);
897 /* Called to initialise the LVM context of the daemon */
898 int init_clvm(int using_gulm, char **argv)
900 if (!(cmd = create_toolcontext(1, NULL))) {
901 log_error("Failed to allocate command context");
905 if (stored_errno()) {
906 destroy_toolcontext(cmd);
910 /* Use LOG_DAEMON for syslog messages instead of LOG_USER */
911 init_syslog(LOG_DAEMON);
912 openlog("clvmd", LOG_PID, LOG_DAEMON);
913 cmd->cmd_line = "clvmd";
915 /* Check lvm.conf is setup for cluster-LVM */
917 init_ignore_suspended_devices(1);
919 /* Remove any non-LV locks that may have been left around */
923 get_initial_state(argv);
925 /* Trap log messages so we can pass them back to the user */
926 init_log_fn(lvm2_log_fn);
927 memlock_inc_daemon(cmd);
932 void destroy_lvm(void)
935 memlock_dec_daemon(cmd);
936 destroy_toolcontext(cmd);