2 * Copyright (c) 2004, 2005 Christophe Varoqui
3 * Copyright (c) 2005 Kiyoshi Ueda, NEC
4 * Copyright (c) 2005 Benjamin Marzinski, Redhat
5 * Copyright (c) 2005 Edward Goggin, EMC
9 #include <libdevmapper.h>
12 #include <sys/types.h>
16 #include <sys/resource.h>
18 #include <linux/oom.h>
22 #include <systemd/sd-daemon.h>
24 #include <semaphore.h>
31 #include "time-util.h"
39 static int use_watchdog;
53 #include "blacklist.h"
54 #include "structs_vec.h"
56 #include "devmapper.h"
59 #include "discovery.h"
63 #include "switchgroup.h"
65 #include "configure.h"
68 #include "pgpolicies.h"
72 #include "mpath_cmd.h"
73 #include "mpath_persist.h"
75 #include "prioritizers/alua_rtpg.h"
82 #include "cli_handlers.h"
85 #include "io_err_stat.h"
88 #include "../third-party/valgrind/drd.h"
90 #define FILE_NAME_SIZE 256
93 #define LOG_MSG(a, b) \
96 condlog(a, "%s: %s - path offline", pp->mpp->alias, pp->dev); \
98 condlog(a, "%s: %s - %s", pp->mpp->alias, pp->dev, b); \
101 struct mpath_event_param
104 struct multipath *mpp;
110 int bindings_read_only;
112 enum daemon_status running_state = DAEMON_INIT;
114 pthread_mutex_t config_lock = PTHREAD_MUTEX_INITIALIZER;
115 pthread_cond_t config_cond;
118 * global copy of vecs for use in sig handlers
120 struct vectors * gvecs;
124 struct config *multipath_conf;
126 /* Local variables */
127 static volatile sig_atomic_t exit_sig;
128 static volatile sig_atomic_t reconfig_sig;
129 static volatile sig_atomic_t log_reset_sig;
134 switch (running_state) {
139 case DAEMON_CONFIGURE:
145 case DAEMON_SHUTDOWN:
152 * I love you too, systemd ...
155 sd_notify_status(void)
157 switch (running_state) {
159 return "STATUS=init";
161 return "STATUS=startup";
162 case DAEMON_CONFIGURE:
163 return "STATUS=configure";
167 case DAEMON_SHUTDOWN:
168 return "STATUS=shutdown";
174 static void do_sd_notify(enum daemon_status old_state)
177 * Checkerloop switches back and forth between idle and running state.
178 * No need to tell systemd each time.
179 * These notifications cause a lot of overhead on dbus.
181 if ((running_state == DAEMON_IDLE || running_state == DAEMON_RUNNING) &&
182 (old_state == DAEMON_IDLE || old_state == DAEMON_RUNNING))
184 sd_notify(0, sd_notify_status());
188 static void config_cleanup(void *arg)
190 pthread_mutex_unlock(&config_lock);
193 void post_config_state(enum daemon_status state)
195 pthread_mutex_lock(&config_lock);
196 if (state != running_state) {
197 enum daemon_status old_state = running_state;
199 running_state = state;
200 pthread_cond_broadcast(&config_cond);
202 do_sd_notify(old_state);
205 pthread_mutex_unlock(&config_lock);
208 int set_config_state(enum daemon_status state)
212 pthread_cleanup_push(config_cleanup, NULL);
213 pthread_mutex_lock(&config_lock);
214 if (running_state != state) {
215 enum daemon_status old_state = running_state;
217 if (running_state != DAEMON_IDLE) {
220 clock_gettime(CLOCK_MONOTONIC, &ts);
222 rc = pthread_cond_timedwait(&config_cond,
226 running_state = state;
227 pthread_cond_broadcast(&config_cond);
229 do_sd_notify(old_state);
233 pthread_cleanup_pop(1);
237 struct config *get_multipath_config(void)
240 return rcu_dereference(multipath_conf);
243 void put_multipath_config(struct config *conf)
249 need_switch_pathgroup (struct multipath * mpp, int refresh)
251 struct pathgroup * pgp;
261 * Refresh path priority values
264 vector_foreach_slot (mpp->pg, pgp, i) {
265 vector_foreach_slot (pgp->paths, pp, j) {
266 conf = get_multipath_config();
267 pathinfo(pp, conf, DI_PRIO);
268 put_multipath_config(conf);
273 if (!mpp->pg || VECTOR_SIZE(mpp->paths) == 0)
276 bestpg = select_path_group(mpp);
277 if (mpp->pgfailback == -FAILBACK_MANUAL)
280 mpp->bestpg = bestpg;
281 if (mpp->bestpg != mpp->nextpg)
288 switch_pathgroup (struct multipath * mpp)
290 mpp->stat_switchgroup++;
291 dm_switchgroup(mpp->alias, mpp->bestpg);
292 condlog(2, "%s: switch to path group #%i",
293 mpp->alias, mpp->bestpg);
297 coalesce_maps(struct vectors *vecs, vector nmpv)
299 struct multipath * ompp;
300 vector ompv = vecs->mpvec;
301 unsigned int i, reassign_maps;
304 conf = get_multipath_config();
305 reassign_maps = conf->reassign_maps;
306 put_multipath_config(conf);
307 vector_foreach_slot (ompv, ompp, i) {
308 condlog(3, "%s: coalesce map", ompp->alias);
309 if (!find_mp_by_wwid(nmpv, ompp->wwid)) {
311 * remove all current maps not allowed by the
312 * current configuration
314 if (dm_flush_map(ompp->alias)) {
315 condlog(0, "%s: unable to flush devmap",
318 * may be just because the device is open
320 if (setup_multipath(vecs, ompp) != 0) {
324 if (!vector_alloc_slot(nmpv))
327 vector_set_slot(nmpv, ompp);
329 vector_del_slot(ompv, i);
334 condlog(2, "%s devmap removed", ompp->alias);
336 } else if (reassign_maps) {
337 condlog(3, "%s: Reassign existing device-mapper"
338 " devices", ompp->alias);
339 dm_reassign(ompp->alias);
346 sync_maps_state(vector mpvec)
349 struct multipath *mpp;
351 vector_foreach_slot (mpvec, mpp, i)
356 flush_map(struct multipath * mpp, struct vectors * vecs, int nopaths)
361 r = dm_flush_map_nopaths(mpp->alias, mpp->deferred_remove);
363 r = dm_flush_map(mpp->alias);
365 * clear references to this map before flushing so we can ignore
366 * the spurious uevent we may generate with the dm_flush_map call below
370 * May not really be an error -- if the map was already flushed
371 * from the device mapper by dmsetup(8) for instance.
374 condlog(0, "%s: can't flush", mpp->alias);
376 condlog(2, "%s: devmap deferred remove", mpp->alias);
377 mpp->deferred_remove = DEFERRED_REMOVE_IN_PROGRESS;
383 condlog(2, "%s: map flushed", mpp->alias);
386 orphan_paths(vecs->pathvec, mpp);
387 remove_map_and_stop_waiter(mpp, vecs, 1);
393 uev_add_map (struct uevent * uev, struct vectors * vecs)
396 int major = -1, minor = -1, rc;
398 condlog(3, "%s: add map (uevent)", uev->kernel);
399 alias = uevent_get_dm_name(uev);
401 condlog(3, "%s: No DM_NAME in uevent", uev->kernel);
402 major = uevent_get_major(uev);
403 minor = uevent_get_minor(uev);
404 alias = dm_mapname(major, minor);
406 condlog(2, "%s: mapname not found for %d:%d",
407 uev->kernel, major, minor);
411 pthread_cleanup_push(cleanup_lock, &vecs->lock);
413 pthread_testcancel();
414 rc = ev_add_map(uev->kernel, alias, vecs);
415 lock_cleanup_pop(vecs->lock);
421 * ev_add_map expects that the multipath device already exists in kernel
422 * before it is called. It just adds a device to multipathd or updates an
426 ev_add_map (char * dev, const char * alias, struct vectors * vecs)
428 struct multipath * mpp;
429 int delayed_reconfig, reassign_maps;
432 if (!dm_is_mpath(alias)) {
433 condlog(4, "%s: not a multipath map", alias);
437 mpp = find_mp_by_alias(vecs->mpvec, alias);
440 if (mpp->wait_for_udev > 1) {
441 condlog(2, "%s: performing delayed actions",
443 if (update_map(mpp, vecs))
444 /* setup multipathd removed the map */
447 conf = get_multipath_config();
448 delayed_reconfig = conf->delayed_reconfig;
449 reassign_maps = conf->reassign_maps;
450 put_multipath_config(conf);
451 if (mpp->wait_for_udev) {
452 mpp->wait_for_udev = 0;
453 if (delayed_reconfig &&
454 !need_to_delay_reconfig(vecs)) {
455 condlog(2, "reconfigure (delayed)");
456 set_config_state(DAEMON_CONFIGURE);
461 * Not really an error -- we generate our own uevent
462 * if we create a multipath mapped device as a result
466 condlog(3, "%s: Reassign existing device-mapper devices",
472 condlog(2, "%s: adding map", alias);
475 * now we can register the map
477 if ((mpp = add_map_without_path(vecs, alias))) {
479 condlog(2, "%s: devmap %s registered", alias, dev);
482 condlog(2, "%s: ev_add_map failed", dev);
488 uev_remove_map (struct uevent * uev, struct vectors * vecs)
492 struct multipath *mpp;
494 condlog(2, "%s: remove map (uevent)", uev->kernel);
495 alias = uevent_get_dm_name(uev);
497 condlog(3, "%s: No DM_NAME in uevent, ignoring", uev->kernel);
500 minor = uevent_get_minor(uev);
502 pthread_cleanup_push(cleanup_lock, &vecs->lock);
504 pthread_testcancel();
505 mpp = find_mp_by_minor(vecs->mpvec, minor);
508 condlog(2, "%s: devmap not registered, can't remove",
512 if (strcmp(mpp->alias, alias)) {
513 condlog(2, "%s: minor number mismatch (map %d, event %d)",
514 mpp->alias, mpp->dmi->minor, minor);
518 orphan_paths(vecs->pathvec, mpp);
519 remove_map_and_stop_waiter(mpp, vecs, 1);
521 lock_cleanup_pop(vecs->lock);
526 /* Called from CLI handler */
528 ev_remove_map (char * devname, char * alias, int minor, struct vectors * vecs)
530 struct multipath * mpp;
532 mpp = find_mp_by_minor(vecs->mpvec, minor);
535 condlog(2, "%s: devmap not registered, can't remove",
539 if (strcmp(mpp->alias, alias)) {
540 condlog(2, "%s: minor number mismatch (map %d, event %d)",
541 mpp->alias, mpp->dmi->minor, minor);
544 return flush_map(mpp, vecs, 0);
548 uev_add_path (struct uevent *uev, struct vectors * vecs, int need_do_map)
554 condlog(2, "%s: add path (uevent)", uev->kernel);
555 if (strstr(uev->kernel, "..") != NULL) {
557 * Don't allow relative device names in the pathvec
559 condlog(0, "%s: path name is invalid", uev->kernel);
563 pthread_cleanup_push(cleanup_lock, &vecs->lock);
565 pthread_testcancel();
566 pp = find_path_by_dev(vecs->pathvec, uev->kernel);
570 condlog(2, "%s: spurious uevent, path already in pathvec",
572 if (!pp->mpp && !strlen(pp->wwid)) {
573 condlog(3, "%s: reinitialize path", uev->kernel);
574 udev_device_unref(pp->udev);
575 pp->udev = udev_device_ref(uev->udev);
576 conf = get_multipath_config();
577 r = pathinfo(pp, conf,
578 DI_ALL | DI_BLACKLIST);
579 put_multipath_config(conf);
580 if (r == PATHINFO_OK)
581 ret = ev_add_path(pp, vecs, need_do_map);
582 else if (r == PATHINFO_SKIPPED) {
583 condlog(3, "%s: remove blacklisted path",
585 i = find_slot(vecs->pathvec, (void *)pp);
587 vector_del_slot(vecs->pathvec, i);
590 condlog(0, "%s: failed to reinitialize path",
596 lock_cleanup_pop(vecs->lock);
601 * get path vital state
603 conf = get_multipath_config();
604 ret = alloc_path_with_pathinfo(conf, uev->udev,
605 uev->wwid, DI_ALL, &pp);
606 put_multipath_config(conf);
608 if (ret == PATHINFO_SKIPPED)
610 condlog(3, "%s: failed to get path info", uev->kernel);
613 pthread_cleanup_push(cleanup_lock, &vecs->lock);
615 pthread_testcancel();
616 ret = store_path(vecs->pathvec, pp);
618 conf = get_multipath_config();
619 pp->checkint = conf->checkint;
620 put_multipath_config(conf);
621 ret = ev_add_path(pp, vecs, need_do_map);
623 condlog(0, "%s: failed to store path info, "
629 lock_cleanup_pop(vecs->lock);
639 ev_add_path (struct path * pp, struct vectors * vecs, int need_do_map)
641 struct multipath * mpp;
642 char params[PARAMS_SIZE] = {0};
644 int start_waiter = 0;
648 * need path UID to go any further
650 if (strlen(pp->wwid) == 0) {
651 condlog(0, "%s: failed to get path uid", pp->dev);
652 goto fail; /* leave path added to pathvec */
654 mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
655 if (mpp && mpp->wait_for_udev &&
656 (pathcount(mpp, PATH_UP) > 0 ||
657 (pathcount(mpp, PATH_GHOST) > 0 && pp->tpgs != TPGS_IMPLICIT &&
658 mpp->ghost_delay_tick <= 0))) {
659 /* if wait_for_udev is set and valid paths exist */
660 condlog(2, "%s: delaying path addition until %s is fully initialized", pp->dev, mpp->alias);
661 mpp->wait_for_udev = 2;
662 orphan_path(pp, "waiting for create to complete");
669 if (pp->size && mpp->size != pp->size) {
670 condlog(0, "%s: failed to add new path %s, "
671 "device size mismatch",
672 mpp->alias, pp->dev);
673 int i = find_slot(vecs->pathvec, (void *)pp);
675 vector_del_slot(vecs->pathvec, i);
680 condlog(4,"%s: adopting all paths for path %s",
681 mpp->alias, pp->dev);
682 if (adopt_paths(vecs->pathvec, mpp))
683 goto fail; /* leave path added to pathvec */
685 verify_paths(mpp, vecs);
686 mpp->action = ACT_RELOAD;
687 extract_hwe_from_path(mpp);
689 if (!should_multipath(pp, vecs->pathvec)) {
690 orphan_path(pp, "only one path");
693 condlog(4,"%s: creating new map", pp->dev);
694 if ((mpp = add_map_with_path(vecs, pp, 1))) {
695 mpp->action = ACT_CREATE;
697 * We don't depend on ACT_CREATE, as domap will
698 * set it to ACT_NOTHING when complete.
703 goto fail; /* leave path added to pathvec */
706 /* persistent reservation check*/
707 mpath_pr_event_handle(pp);
712 if (!dm_map_present(mpp->alias)) {
713 mpp->action = ACT_CREATE;
717 * push the map to the device-mapper
719 if (setup_map(mpp, params, PARAMS_SIZE, vecs)) {
720 condlog(0, "%s: failed to setup map for addition of new "
721 "path %s", mpp->alias, pp->dev);
725 * reload the map for the multipath mapped device
728 ret = domap(mpp, params, 1);
730 if (ret < 0 && retries-- > 0) {
731 condlog(0, "%s: retry domap for addition of new "
732 "path %s", mpp->alias, pp->dev);
736 condlog(0, "%s: failed in domap for addition of new "
737 "path %s", mpp->alias, pp->dev);
739 * deal with asynchronous uevents :((
741 if (mpp->action == ACT_RELOAD && retries-- > 0) {
742 condlog(0, "%s: ev_add_path sleep", mpp->alias);
744 update_mpp_paths(mpp, vecs->pathvec);
747 else if (mpp->action == ACT_RELOAD)
748 condlog(0, "%s: giving up reload", mpp->alias);
755 * update our state from kernel regardless of create or reload
757 if (setup_multipath(vecs, mpp))
758 goto fail; /* if setup_multipath fails, it removes the map */
762 if ((mpp->action == ACT_CREATE ||
763 (mpp->action == ACT_NOTHING && start_waiter && !mpp->waiter)) &&
764 start_waiter_thread(mpp, vecs))
768 condlog(2, "%s [%s]: path added to devmap %s",
769 pp->dev, pp->dev_t, mpp->alias);
775 remove_map(mpp, vecs, 1);
777 orphan_path(pp, "failed to add path");
782 uev_remove_path (struct uevent *uev, struct vectors * vecs, int need_do_map)
787 condlog(2, "%s: remove path (uevent)", uev->kernel);
788 delete_foreign(uev->udev);
790 pthread_cleanup_push(cleanup_lock, &vecs->lock);
792 pthread_testcancel();
793 pp = find_path_by_dev(vecs->pathvec, uev->kernel);
795 ret = ev_remove_path(pp, vecs, need_do_map);
796 lock_cleanup_pop(vecs->lock);
798 /* Not an error; path might have been purged earlier */
799 condlog(0, "%s: path already removed", uev->kernel);
806 ev_remove_path (struct path *pp, struct vectors * vecs, int need_do_map)
808 struct multipath * mpp;
810 char params[PARAMS_SIZE] = {0};
813 * avoid referring to the map of an orphaned path
815 if ((mpp = pp->mpp)) {
817 * transform the mp->pg vector of vectors of paths
818 * into a mp->params string to feed the device-mapper
820 if (update_mpp_paths(mpp, vecs->pathvec)) {
821 condlog(0, "%s: failed to update paths",
825 if ((i = find_slot(mpp->paths, (void *)pp)) != -1)
826 vector_del_slot(mpp->paths, i);
829 * remove the map IFF removing the last path
831 if (VECTOR_SIZE(mpp->paths) == 0) {
832 char alias[WWID_SIZE];
835 * flush_map will fail if the device is open
837 strncpy(alias, mpp->alias, WWID_SIZE);
838 if (mpp->flush_on_last_del == FLUSH_ENABLED) {
839 condlog(2, "%s Last path deleted, disabling queueing", mpp->alias);
841 mpp->no_path_retry = NO_PATH_RETRY_FAIL;
842 mpp->disable_queueing = 1;
843 mpp->stat_map_failures++;
844 dm_queue_if_no_path(mpp->alias, 0);
846 if (!flush_map(mpp, vecs, 1)) {
847 condlog(2, "%s: removed map after"
848 " removing all paths",
854 * Not an error, continue
858 if (setup_map(mpp, params, PARAMS_SIZE, vecs)) {
859 condlog(0, "%s: failed to setup map for"
860 " removal of path %s", mpp->alias, pp->dev);
864 if (mpp->wait_for_udev) {
865 mpp->wait_for_udev = 2;
874 mpp->action = ACT_RELOAD;
875 if (domap(mpp, params, 1) <= 0) {
876 condlog(0, "%s: failed in domap for "
877 "removal of path %s",
878 mpp->alias, pp->dev);
882 * update our state from kernel
884 if (setup_multipath(vecs, mpp))
888 condlog(2, "%s [%s]: path removed from map %s",
889 pp->dev, pp->dev_t, mpp->alias);
894 if ((i = find_slot(vecs->pathvec, (void *)pp)) != -1)
895 vector_del_slot(vecs->pathvec, i);
902 remove_map_and_stop_waiter(mpp, vecs, 1);
907 uev_update_path (struct uevent *uev, struct vectors * vecs)
909 int ro, retval = 0, rc;
912 int disable_changed_wwids;
913 int needs_reinit = 0;
915 switch ((rc = change_foreign(uev->udev))) {
917 /* known foreign path, ignore event */
919 case FOREIGN_IGNORED:
922 condlog(3, "%s: error in change_foreign", __func__);
925 condlog(1, "%s: return code %d of change_forein is unsupported",
930 conf = get_multipath_config();
931 disable_changed_wwids = conf->disable_changed_wwids;
932 put_multipath_config(conf);
934 ro = uevent_get_disk_ro(uev);
936 pthread_cleanup_push(cleanup_lock, &vecs->lock);
938 pthread_testcancel();
940 pp = find_path_by_dev(vecs->pathvec, uev->kernel);
942 struct multipath *mpp = pp->mpp;
944 if (disable_changed_wwids &&
945 (strlen(pp->wwid) || pp->wwid_changed)) {
946 char wwid[WWID_SIZE];
948 strcpy(wwid, pp->wwid);
949 get_uid(pp, pp->state, uev->udev);
950 if (strcmp(wwid, pp->wwid) != 0) {
951 condlog(0, "%s: path wwid changed from '%s' to '%s'. disallowing", uev->kernel, wwid, pp->wwid);
952 strcpy(pp->wwid, wwid);
953 if (!pp->wwid_changed) {
954 pp->wwid_changed = 1;
957 dm_fail_path(pp->mpp->alias, pp->dev_t);
961 pp->wwid_changed = 0;
964 if (pp->initialized == INIT_REQUESTED_UDEV)
966 else if (mpp && ro >= 0) {
967 condlog(2, "%s: update path write_protect to '%d' (uevent)", uev->kernel, ro);
969 if (mpp->wait_for_udev)
970 mpp->wait_for_udev = 2;
973 pp->mpp->force_readonly = 1;
974 retval = reload_map(vecs, mpp, 0, 1);
975 pp->mpp->force_readonly = 0;
976 condlog(2, "%s: map %s reloaded (retval %d)",
977 uev->kernel, mpp->alias, retval);
982 lock_cleanup_pop(vecs->lock);
984 /* If the path is blacklisted, print a debug/non-default verbosity message. */
986 int flag = DI_SYSFS | DI_WWID;
988 conf = get_multipath_config();
989 retval = alloc_path_with_pathinfo(conf, uev->udev, uev->wwid, flag, NULL);
990 put_multipath_config(conf);
992 if (retval == PATHINFO_SKIPPED) {
993 condlog(3, "%s: spurious uevent, path is blacklisted", uev->kernel);
998 condlog(0, "%s: spurious uevent, path not found", uev->kernel);
1001 retval = uev_add_path(uev, vecs, 1);
1006 uev_pathfail_check(struct uevent *uev, struct vectors *vecs)
1008 const char *action = NULL, *devt = NULL;
1012 action = uevent_get_dm_action(uev);
1015 if (strncmp(action, "PATH_FAILED", 11))
1017 devt = uevent_get_dm_path(uev);
1019 condlog(3, "%s: No DM_PATH in uevent", uev->kernel);
1023 pthread_cleanup_push(cleanup_lock, &vecs->lock);
1025 pthread_testcancel();
1026 pp = find_path_by_devt(vecs->pathvec, devt);
1029 r = io_err_stat_handle_pathfail(pp);
1031 condlog(3, "io_err_stat: %s: cannot handle pathfail uevent",
1034 lock_cleanup_pop(vecs->lock);
1044 map_discovery (struct vectors * vecs)
1046 struct multipath * mpp;
1049 if (dm_get_maps(vecs->mpvec))
1052 vector_foreach_slot (vecs->mpvec, mpp, i)
1053 if (update_multipath_table(mpp, vecs->pathvec, 1) ||
1054 update_multipath_status(mpp)) {
1055 remove_map(mpp, vecs, 1);
1063 uxsock_trigger (char * str, char ** reply, int * len, bool is_root,
1064 void * trigger_data)
1066 struct vectors * vecs;
1071 vecs = (struct vectors *)trigger_data;
1073 if ((str != NULL) && (is_root == false) &&
1074 (strncmp(str, "list", strlen("list")) != 0) &&
1075 (strncmp(str, "show", strlen("show")) != 0)) {
1076 *reply = STRDUP("permission deny: need to be root");
1078 *len = strlen(*reply) + 1;
1082 r = parse_cmd(str, reply, len, vecs, uxsock_timeout / 1000);
1086 *reply = STRDUP("timeout\n");
1088 *reply = STRDUP("fail\n");
1090 *len = strlen(*reply) + 1;
1093 else if (!r && *len == 0) {
1094 *reply = STRDUP("ok\n");
1096 *len = strlen(*reply) + 1;
1099 /* else if (r < 0) leave *reply alone */
1105 uev_trigger (struct uevent * uev, void * trigger_data)
1108 struct vectors * vecs;
1109 struct uevent *merge_uev, *tmp;
1111 vecs = (struct vectors *)trigger_data;
1113 pthread_cleanup_push(config_cleanup, NULL);
1114 pthread_mutex_lock(&config_lock);
1115 if (running_state != DAEMON_IDLE &&
1116 running_state != DAEMON_RUNNING)
1117 pthread_cond_wait(&config_cond, &config_lock);
1118 pthread_cleanup_pop(1);
1120 if (running_state == DAEMON_SHUTDOWN)
1125 * Add events are ignored here as the tables
1126 * are not fully initialised then.
1128 if (!strncmp(uev->kernel, "dm-", 3)) {
1129 if (!uevent_is_mpath(uev)) {
1130 if (!strncmp(uev->action, "change", 6))
1131 (void)add_foreign(uev->udev);
1132 else if (!strncmp(uev->action, "remove", 6))
1133 (void)delete_foreign(uev->udev);
1136 if (!strncmp(uev->action, "change", 6)) {
1137 r = uev_add_map(uev, vecs);
1140 * the kernel-side dm-mpath issues a PATH_FAILED event
1141 * when it encounters a path IO error. It is reason-
1142 * able be the entry of path IO error accounting pro-
1145 uev_pathfail_check(uev, vecs);
1146 } else if (!strncmp(uev->action, "remove", 6)) {
1147 r = uev_remove_map(uev, vecs);
1153 * path add/remove/change event, add/remove maybe merged
1155 list_for_each_entry_safe(merge_uev, tmp, &uev->merge_node, node) {
1156 if (!strncmp(merge_uev->action, "add", 3))
1157 r += uev_add_path(merge_uev, vecs, 0);
1158 if (!strncmp(merge_uev->action, "remove", 6))
1159 r += uev_remove_path(merge_uev, vecs, 0);
1162 if (!strncmp(uev->action, "add", 3))
1163 r += uev_add_path(uev, vecs, 1);
1164 if (!strncmp(uev->action, "remove", 6))
1165 r += uev_remove_path(uev, vecs, 1);
1166 if (!strncmp(uev->action, "change", 6))
1167 r += uev_update_path(uev, vecs);
1173 static void rcu_unregister(void *param)
1175 rcu_unregister_thread();
1179 ueventloop (void * ap)
1181 struct udev *udev = ap;
1183 pthread_cleanup_push(rcu_unregister, NULL);
1184 rcu_register_thread();
1185 if (uevent_listen(udev))
1186 condlog(0, "error starting uevent listener");
1187 pthread_cleanup_pop(1);
1192 uevqloop (void * ap)
1194 pthread_cleanup_push(rcu_unregister, NULL);
1195 rcu_register_thread();
1196 if (uevent_dispatch(&uev_trigger, ap))
1197 condlog(0, "error starting uevent dispatcher");
1198 pthread_cleanup_pop(1);
1202 uxlsnrloop (void * ap)
1205 condlog(1, "Failed to init uxsock listener");
1208 pthread_cleanup_push(rcu_unregister, NULL);
1209 rcu_register_thread();
1210 set_handler_callback(LIST+PATHS, cli_list_paths);
1211 set_handler_callback(LIST+PATHS+FMT, cli_list_paths_fmt);
1212 set_handler_callback(LIST+PATHS+RAW+FMT, cli_list_paths_raw);
1213 set_handler_callback(LIST+PATH, cli_list_path);
1214 set_handler_callback(LIST+MAPS, cli_list_maps);
1215 set_unlocked_handler_callback(LIST+STATUS, cli_list_status);
1216 set_unlocked_handler_callback(LIST+DAEMON, cli_list_daemon);
1217 set_handler_callback(LIST+MAPS+STATUS, cli_list_maps_status);
1218 set_handler_callback(LIST+MAPS+STATS, cli_list_maps_stats);
1219 set_handler_callback(LIST+MAPS+FMT, cli_list_maps_fmt);
1220 set_handler_callback(LIST+MAPS+RAW+FMT, cli_list_maps_raw);
1221 set_handler_callback(LIST+MAPS+TOPOLOGY, cli_list_maps_topology);
1222 set_handler_callback(LIST+TOPOLOGY, cli_list_maps_topology);
1223 set_handler_callback(LIST+MAPS+JSON, cli_list_maps_json);
1224 set_handler_callback(LIST+MAP+TOPOLOGY, cli_list_map_topology);
1225 set_handler_callback(LIST+MAP+FMT, cli_list_map_fmt);
1226 set_handler_callback(LIST+MAP+RAW+FMT, cli_list_map_fmt);
1227 set_handler_callback(LIST+MAP+JSON, cli_list_map_json);
1228 set_handler_callback(LIST+CONFIG, cli_list_config);
1229 set_handler_callback(LIST+BLACKLIST, cli_list_blacklist);
1230 set_handler_callback(LIST+DEVICES, cli_list_devices);
1231 set_handler_callback(LIST+WILDCARDS, cli_list_wildcards);
1232 set_handler_callback(RESET+MAPS+STATS, cli_reset_maps_stats);
1233 set_handler_callback(RESET+MAP+STATS, cli_reset_map_stats);
1234 set_handler_callback(ADD+PATH, cli_add_path);
1235 set_handler_callback(DEL+PATH, cli_del_path);
1236 set_handler_callback(ADD+MAP, cli_add_map);
1237 set_handler_callback(DEL+MAP, cli_del_map);
1238 set_handler_callback(SWITCH+MAP+GROUP, cli_switch_group);
1239 set_unlocked_handler_callback(RECONFIGURE, cli_reconfigure);
1240 set_handler_callback(SUSPEND+MAP, cli_suspend);
1241 set_handler_callback(RESUME+MAP, cli_resume);
1242 set_handler_callback(RESIZE+MAP, cli_resize);
1243 set_handler_callback(RELOAD+MAP, cli_reload);
1244 set_handler_callback(RESET+MAP, cli_reassign);
1245 set_handler_callback(REINSTATE+PATH, cli_reinstate);
1246 set_handler_callback(FAIL+PATH, cli_fail);
1247 set_handler_callback(DISABLEQ+MAP, cli_disable_queueing);
1248 set_handler_callback(RESTOREQ+MAP, cli_restore_queueing);
1249 set_handler_callback(DISABLEQ+MAPS, cli_disable_all_queueing);
1250 set_handler_callback(RESTOREQ+MAPS, cli_restore_all_queueing);
1251 set_unlocked_handler_callback(QUIT, cli_quit);
1252 set_unlocked_handler_callback(SHUTDOWN, cli_shutdown);
1253 set_handler_callback(GETPRSTATUS+MAP, cli_getprstatus);
1254 set_handler_callback(SETPRSTATUS+MAP, cli_setprstatus);
1255 set_handler_callback(UNSETPRSTATUS+MAP, cli_unsetprstatus);
1256 set_handler_callback(FORCEQ+DAEMON, cli_force_no_daemon_q);
1257 set_handler_callback(RESTOREQ+DAEMON, cli_restore_no_daemon_q);
1258 set_handler_callback(GETPRKEY+MAP, cli_getprkey);
1259 set_handler_callback(SETPRKEY+MAP+KEY, cli_setprkey);
1260 set_handler_callback(UNSETPRKEY+MAP, cli_unsetprkey);
1263 uxsock_listen(&uxsock_trigger, ap);
1264 pthread_cleanup_pop(1);
1271 post_config_state(DAEMON_SHUTDOWN);
1275 fail_path (struct path * pp, int del_active)
1280 condlog(2, "checker failed path %s in map %s",
1281 pp->dev_t, pp->mpp->alias);
1283 dm_fail_path(pp->mpp->alias, pp->dev_t);
1285 update_queue_mode_del_path(pp->mpp);
1289 * caller must have locked the path list before calling that function
1292 reinstate_path (struct path * pp, int add_active)
1299 if (dm_reinstate_path(pp->mpp->alias, pp->dev_t)) {
1300 condlog(0, "%s: reinstate failed", pp->dev_t);
1303 condlog(2, "%s: reinstated", pp->dev_t);
1305 update_queue_mode_add_path(pp->mpp);
1311 enable_group(struct path * pp)
1313 struct pathgroup * pgp;
1316 * if path is added through uev_add_path, pgindex can be unset.
1317 * next update_strings() will set it, upon map reload event.
1319 * we can safely return here, because upon map reload, all
1320 * PG will be enabled.
1322 if (!pp->mpp->pg || !pp->pgindex)
1325 pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1327 if (pgp->status == PGSTATE_DISABLED) {
1328 condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
1329 dm_enablegroup(pp->mpp->alias, pp->pgindex);
1334 mpvec_garbage_collector (struct vectors * vecs)
1336 struct multipath * mpp;
1342 vector_foreach_slot (vecs->mpvec, mpp, i) {
1343 if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
1344 condlog(2, "%s: remove dead map", mpp->alias);
1345 remove_map_and_stop_waiter(mpp, vecs, 1);
1351 /* This is called after a path has started working again. It the multipath
1352 * device for this path uses the followover failback type, and this is the
1353 * best pathgroup, and this is the first path in the pathgroup to come back
1354 * up, then switch to this pathgroup */
1356 followover_should_failback(struct path * pp)
1358 struct pathgroup * pgp;
1362 if (pp->mpp->pgfailback != -FAILBACK_FOLLOWOVER ||
1363 !pp->mpp->pg || !pp->pgindex ||
1364 pp->pgindex != pp->mpp->bestpg)
1367 pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
1368 vector_foreach_slot(pgp->paths, pp1, i) {
1371 if (pp1->chkrstate != PATH_DOWN && pp1->chkrstate != PATH_SHAKY)
1378 missing_uev_wait_tick(struct vectors *vecs)
1380 struct multipath * mpp;
1382 int timed_out = 0, delayed_reconfig;
1383 struct config *conf;
1385 vector_foreach_slot (vecs->mpvec, mpp, i) {
1386 if (mpp->wait_for_udev && --mpp->uev_wait_tick <= 0) {
1388 condlog(0, "%s: timeout waiting on creation uevent. enabling reloads", mpp->alias);
1389 if (mpp->wait_for_udev > 1 && update_map(mpp, vecs)) {
1390 /* update_map removed map */
1394 mpp->wait_for_udev = 0;
1398 conf = get_multipath_config();
1399 delayed_reconfig = conf->delayed_reconfig;
1400 put_multipath_config(conf);
1401 if (timed_out && delayed_reconfig &&
1402 !need_to_delay_reconfig(vecs)) {
1403 condlog(2, "reconfigure (delayed)");
1404 set_config_state(DAEMON_CONFIGURE);
1409 ghost_delay_tick(struct vectors *vecs)
1411 struct multipath * mpp;
1414 vector_foreach_slot (vecs->mpvec, mpp, i) {
1415 if (mpp->ghost_delay_tick <= 0)
1417 if (--mpp->ghost_delay_tick <= 0) {
1418 condlog(0, "%s: timed out waiting for active path",
1420 mpp->force_udev_reload = 1;
1421 if (update_map(mpp, vecs) != 0) {
1422 /* update_map removed map */
1431 defered_failback_tick (vector mpvec)
1433 struct multipath * mpp;
1436 vector_foreach_slot (mpvec, mpp, i) {
1438 * defered failback getting sooner
1440 if (mpp->pgfailback > 0 && mpp->failback_tick > 0) {
1441 mpp->failback_tick--;
1443 if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1))
1444 switch_pathgroup(mpp);
1450 retry_count_tick(vector mpvec)
1452 struct multipath *mpp;
1455 vector_foreach_slot (mpvec, mpp, i) {
1456 if (mpp->retry_tick > 0) {
1457 mpp->stat_total_queueing_time++;
1458 condlog(4, "%s: Retrying.. No active path", mpp->alias);
1459 if(--mpp->retry_tick == 0) {
1460 mpp->stat_map_failures++;
1461 dm_queue_if_no_path(mpp->alias, 0);
1462 condlog(2, "%s: Disable queueing", mpp->alias);
1468 int update_prio(struct path *pp, int refresh_all)
1472 struct pathgroup * pgp;
1473 int i, j, changed = 0;
1474 struct config *conf;
1477 vector_foreach_slot (pp->mpp->pg, pgp, i) {
1478 vector_foreach_slot (pgp->paths, pp1, j) {
1479 oldpriority = pp1->priority;
1480 conf = get_multipath_config();
1481 pathinfo(pp1, conf, DI_PRIO);
1482 put_multipath_config(conf);
1483 if (pp1->priority != oldpriority)
1489 oldpriority = pp->priority;
1490 conf = get_multipath_config();
1491 if (pp->state != PATH_DOWN)
1492 pathinfo(pp, conf, DI_PRIO);
1493 put_multipath_config(conf);
1495 if (pp->priority == oldpriority)
1500 int update_path_groups(struct multipath *mpp, struct vectors *vecs, int refresh)
1502 if (reload_map(vecs, mpp, refresh, 1))
1506 if (setup_multipath(vecs, mpp) != 0)
1508 sync_map_state(mpp);
1513 void repair_path(struct path * pp)
1515 if (pp->state != PATH_DOWN)
1518 checker_repair(&pp->checker);
1519 LOG_MSG(1, checker_message(&pp->checker));
1523 * Returns '1' if the path has been checked, '-1' if it was blacklisted
1527 check_path (struct vectors * vecs, struct path * pp, int ticks)
1530 int new_path_up = 0;
1531 int chkr_new_path_up = 0;
1533 int disable_reinstate = 0;
1534 int oldchkrstate = pp->chkrstate;
1535 int retrigger_tries, checkint;
1536 struct config *conf;
1539 if ((pp->initialized == INIT_OK ||
1540 pp->initialized == INIT_REQUESTED_UDEV) && !pp->mpp)
1544 pp->tick -= (pp->tick > ticks) ? ticks : pp->tick;
1546 return 0; /* don't check this path yet */
1548 conf = get_multipath_config();
1549 retrigger_tries = conf->retrigger_tries;
1550 checkint = conf->checkint;
1551 put_multipath_config(conf);
1552 if (!pp->mpp && pp->initialized == INIT_MISSING_UDEV &&
1553 pp->retriggers < retrigger_tries) {
1554 condlog(2, "%s: triggering change event to reinitialize",
1556 pp->initialized = INIT_REQUESTED_UDEV;
1558 sysfs_attr_set_value(pp->udev, "uevent", "change",
1564 * provision a next check soonest,
1565 * in case we exit abnormaly from here
1567 pp->tick = checkint;
1569 newstate = path_offline(pp);
1571 * Wait for uevent for removed paths;
1572 * some LLDDs like zfcp keep paths unavailable
1573 * without sending uevents.
1575 if (newstate == PATH_REMOVED)
1576 newstate = PATH_DOWN;
1578 if (newstate == PATH_UP) {
1579 conf = get_multipath_config();
1580 newstate = get_state(pp, conf, 1, newstate);
1581 put_multipath_config(conf);
1583 checker_clear_message(&pp->checker);
1585 if (pp->wwid_changed) {
1586 condlog(2, "%s: path wwid has changed. Refusing to use",
1588 newstate = PATH_DOWN;
1591 if (newstate == PATH_WILD || newstate == PATH_UNCHECKED) {
1592 condlog(2, "%s: unusable path", pp->dev);
1593 conf = get_multipath_config();
1594 pathinfo(pp, conf, 0);
1595 put_multipath_config(conf);
1599 if (!strlen(pp->wwid) && pp->initialized != INIT_MISSING_UDEV &&
1600 (newstate == PATH_UP || newstate == PATH_GHOST)) {
1601 condlog(2, "%s: add missing path", pp->dev);
1602 conf = get_multipath_config();
1603 ret = pathinfo(pp, conf, DI_ALL | DI_BLACKLIST);
1604 if (ret == PATHINFO_OK) {
1605 ev_add_path(pp, vecs, 1);
1607 } else if (ret == PATHINFO_SKIPPED) {
1608 put_multipath_config(conf);
1611 put_multipath_config(conf);
1616 * Async IO in flight. Keep the previous path state
1617 * and reschedule as soon as possible
1619 if (newstate == PATH_PENDING) {
1624 * Synchronize with kernel state
1626 if (update_multipath_strings(pp->mpp, vecs->pathvec, 1)) {
1627 condlog(1, "%s: Could not synchronize with kernel state",
1629 pp->dmstate = PSTATE_UNDEF;
1631 /* if update_multipath_strings orphaned the path, quit early */
1635 if (pp->io_err_disable_reinstate && hit_io_err_recheck_time(pp)) {
1636 pp->state = PATH_SHAKY;
1638 * to reschedule as soon as possible,so that this path can
1639 * be recoverd in time
1645 if ((newstate == PATH_UP || newstate == PATH_GHOST) &&
1646 pp->wait_checks > 0) {
1647 if (pp->mpp->nr_active > 0) {
1648 pp->state = PATH_DELAYED;
1652 pp->wait_checks = 0;
1656 * don't reinstate failed path, if its in stand-by
1657 * and if target supports only implicit tpgs mode.
1658 * this will prevent unnecessary i/o by dm on stand-by
1659 * paths if there are no other active paths in map.
1661 disable_reinstate = (newstate == PATH_GHOST &&
1662 pp->mpp->nr_active == 0 &&
1663 pp->tpgs == TPGS_IMPLICIT) ? 1 : 0;
1665 pp->chkrstate = newstate;
1666 if (newstate != pp->state) {
1667 int oldstate = pp->state;
1668 pp->state = newstate;
1670 LOG_MSG(1, checker_message(&pp->checker));
1673 * upon state change, reset the checkint
1674 * to the shortest delay
1676 conf = get_multipath_config();
1677 pp->checkint = conf->checkint;
1678 put_multipath_config(conf);
1680 if (newstate != PATH_UP && newstate != PATH_GHOST) {
1682 * proactively fail path in the DM
1684 if (oldstate == PATH_UP ||
1685 oldstate == PATH_GHOST) {
1687 if (pp->mpp->delay_wait_checks > 0 &&
1688 pp->watch_checks > 0) {
1689 pp->wait_checks = pp->mpp->delay_wait_checks;
1690 pp->watch_checks = 0;
1696 * cancel scheduled failback
1698 pp->mpp->failback_tick = 0;
1700 pp->mpp->stat_path_failures++;
1705 if(newstate == PATH_UP || newstate == PATH_GHOST){
1706 if ( pp->mpp && pp->mpp->prflag ){
1708 * Check Persistent Reservation.
1710 condlog(2, "%s: checking persistent reservation "
1711 "registration", pp->dev);
1712 mpath_pr_event_handle(pp);
1717 * reinstate this path
1719 if (oldstate != PATH_UP &&
1720 oldstate != PATH_GHOST) {
1721 if (pp->mpp->delay_watch_checks > 0)
1722 pp->watch_checks = pp->mpp->delay_watch_checks;
1725 if (pp->watch_checks > 0)
1729 if (!disable_reinstate && reinstate_path(pp, add_active)) {
1730 condlog(3, "%s: reload map", pp->dev);
1731 ev_add_path(pp, vecs, 1);
1737 if (oldchkrstate != PATH_UP && oldchkrstate != PATH_GHOST)
1738 chkr_new_path_up = 1;
1741 * if at least one path is up in a group, and
1742 * the group is disabled, re-enable it
1744 if (newstate == PATH_UP)
1747 else if (newstate == PATH_UP || newstate == PATH_GHOST) {
1748 if ((pp->dmstate == PSTATE_FAILED ||
1749 pp->dmstate == PSTATE_UNDEF) &&
1750 !disable_reinstate) {
1751 /* Clear IO errors */
1752 if (reinstate_path(pp, 0)) {
1753 condlog(3, "%s: reload map", pp->dev);
1754 ev_add_path(pp, vecs, 1);
1759 unsigned int max_checkint;
1760 LOG_MSG(4, checker_message(&pp->checker));
1761 conf = get_multipath_config();
1762 max_checkint = conf->max_checkint;
1763 put_multipath_config(conf);
1764 if (pp->checkint != max_checkint) {
1766 * double the next check delay.
1767 * max at conf->max_checkint
1769 if (pp->checkint < (max_checkint / 2))
1770 pp->checkint = 2 * pp->checkint;
1772 pp->checkint = max_checkint;
1774 condlog(4, "%s: delay next check %is",
1775 pp->dev_t, pp->checkint);
1777 if (pp->watch_checks > 0)
1779 pp->tick = pp->checkint;
1782 else if (newstate != PATH_UP && newstate != PATH_GHOST) {
1783 if (pp->dmstate == PSTATE_ACTIVE ||
1784 pp->dmstate == PSTATE_UNDEF)
1786 if (newstate == PATH_DOWN) {
1787 int log_checker_err;
1789 conf = get_multipath_config();
1790 log_checker_err = conf->log_checker_err;
1791 put_multipath_config(conf);
1792 if (log_checker_err == LOG_CHKR_ERR_ONCE)
1793 LOG_MSG(3, checker_message(&pp->checker));
1795 LOG_MSG(2, checker_message(&pp->checker));
1799 pp->state = newstate;
1802 if (pp->mpp->wait_for_udev)
1805 * path prio refreshing
1807 condlog(4, "path prio refresh");
1809 if (update_prio(pp, new_path_up) &&
1810 (pp->mpp->pgpolicyfn == (pgpolicyfn *)group_by_prio) &&
1811 pp->mpp->pgfailback == -FAILBACK_IMMEDIATE)
1812 update_path_groups(pp->mpp, vecs, !new_path_up);
1813 else if (need_switch_pathgroup(pp->mpp, 0)) {
1814 if (pp->mpp->pgfailback > 0 &&
1815 (new_path_up || pp->mpp->failback_tick <= 0))
1816 pp->mpp->failback_tick =
1817 pp->mpp->pgfailback + 1;
1818 else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE ||
1819 (chkr_new_path_up && followover_should_failback(pp)))
1820 switch_pathgroup(pp->mpp);
1825 static void init_path_check_interval(struct vectors *vecs)
1827 struct config *conf;
1831 vector_foreach_slot (vecs->pathvec, pp, i) {
1832 conf = get_multipath_config();
1833 pp->checkint = conf->checkint;
1834 put_multipath_config(conf);
1839 checkerloop (void *ap)
1841 struct vectors *vecs;
1845 struct itimerval timer_tick_it;
1846 struct timespec last_time;
1847 struct config *conf;
1849 pthread_cleanup_push(rcu_unregister, NULL);
1850 rcu_register_thread();
1851 mlockall(MCL_CURRENT | MCL_FUTURE);
1852 vecs = (struct vectors *)ap;
1853 condlog(2, "path checkers start up");
1855 /* Tweak start time for initial path check */
1856 if (clock_gettime(CLOCK_MONOTONIC, &last_time) != 0)
1857 last_time.tv_sec = 0;
1859 last_time.tv_sec -= 1;
1862 struct timespec diff_time, start_time, end_time;
1863 int num_paths = 0, ticks = 0, signo, strict_timing, rc = 0;
1866 if (clock_gettime(CLOCK_MONOTONIC, &start_time) != 0)
1867 start_time.tv_sec = 0;
1868 if (start_time.tv_sec && last_time.tv_sec) {
1869 timespecsub(&start_time, &last_time, &diff_time);
1870 condlog(4, "tick (%lu.%06lu secs)",
1871 diff_time.tv_sec, diff_time.tv_nsec / 1000);
1872 last_time = start_time;
1873 ticks = diff_time.tv_sec;
1876 condlog(4, "tick (%d ticks)", ticks);
1880 sd_notify(0, "WATCHDOG=1");
1882 rc = set_config_state(DAEMON_RUNNING);
1883 if (rc == ETIMEDOUT) {
1884 condlog(4, "timeout waiting for DAEMON_IDLE");
1888 pthread_cleanup_push(cleanup_lock, &vecs->lock);
1890 pthread_testcancel();
1891 vector_foreach_slot (vecs->pathvec, pp, i) {
1892 rc = check_path(vecs, pp, ticks);
1894 vector_del_slot(vecs->pathvec, i);
1900 lock_cleanup_pop(vecs->lock);
1902 pthread_cleanup_push(cleanup_lock, &vecs->lock);
1904 pthread_testcancel();
1905 defered_failback_tick(vecs->mpvec);
1906 retry_count_tick(vecs->mpvec);
1907 missing_uev_wait_tick(vecs);
1908 ghost_delay_tick(vecs);
1909 lock_cleanup_pop(vecs->lock);
1914 pthread_cleanup_push(cleanup_lock, &vecs->lock);
1916 pthread_testcancel();
1917 condlog(4, "map garbage collection");
1918 mpvec_garbage_collector(vecs);
1920 lock_cleanup_pop(vecs->lock);
1923 diff_time.tv_nsec = 0;
1924 if (start_time.tv_sec &&
1925 clock_gettime(CLOCK_MONOTONIC, &end_time) == 0) {
1926 timespecsub(&end_time, &start_time, &diff_time);
1928 unsigned int max_checkint;
1930 condlog(3, "checked %d path%s in %lu.%06lu secs",
1931 num_paths, num_paths > 1 ? "s" : "",
1933 diff_time.tv_nsec / 1000);
1934 conf = get_multipath_config();
1935 max_checkint = conf->max_checkint;
1936 put_multipath_config(conf);
1937 if (diff_time.tv_sec > max_checkint)
1938 condlog(1, "path checkers took longer "
1939 "than %lu seconds, consider "
1940 "increasing max_polling_interval",
1945 post_config_state(DAEMON_IDLE);
1946 conf = get_multipath_config();
1947 strict_timing = conf->strict_timing;
1948 put_multipath_config(conf);
1952 timer_tick_it.it_interval.tv_sec = 0;
1953 timer_tick_it.it_interval.tv_usec = 0;
1954 if (diff_time.tv_nsec) {
1955 timer_tick_it.it_value.tv_sec = 0;
1956 timer_tick_it.it_value.tv_usec =
1957 1000UL * 1000 * 1000 - diff_time.tv_nsec;
1959 timer_tick_it.it_value.tv_sec = 1;
1960 timer_tick_it.it_value.tv_usec = 0;
1962 setitimer(ITIMER_REAL, &timer_tick_it, NULL);
1965 sigaddset(&mask, SIGALRM);
1966 condlog(3, "waiting for %lu.%06lu secs",
1967 timer_tick_it.it_value.tv_sec,
1968 timer_tick_it.it_value.tv_usec);
1969 if (sigwait(&mask, &signo) != 0) {
1970 condlog(3, "sigwait failed with error %d",
1972 conf = get_multipath_config();
1973 conf->strict_timing = 0;
1974 put_multipath_config(conf);
1979 pthread_cleanup_pop(1);
1984 configure (struct vectors * vecs)
1986 struct multipath * mpp;
1990 struct config *conf;
1991 static int force_reload = FORCE_RELOAD_WEAK;
1993 if (!vecs->pathvec && !(vecs->pathvec = vector_alloc())) {
1994 condlog(0, "couldn't allocate path vec in configure");
1998 if (!vecs->mpvec && !(vecs->mpvec = vector_alloc())) {
1999 condlog(0, "couldn't allocate multipath vec in configure");
2003 if (!(mpvec = vector_alloc())) {
2004 condlog(0, "couldn't allocate new maps vec in configure");
2009 * probe for current path (from sysfs) and map (from dm) sets
2011 ret = path_discovery(vecs->pathvec, DI_ALL);
2013 condlog(0, "configure failed at path discovery");
2017 vector_foreach_slot (vecs->pathvec, pp, i){
2018 conf = get_multipath_config();
2019 if (filter_path(conf, pp) > 0){
2020 vector_del_slot(vecs->pathvec, i);
2025 pp->checkint = conf->checkint;
2026 put_multipath_config(conf);
2028 if (map_discovery(vecs)) {
2029 condlog(0, "configure failed at map discovery");
2034 * create new set of maps & push changed ones into dm
2035 * In the first call, use FORCE_RELOAD_WEAK to avoid making
2036 * superfluous ACT_RELOAD ioctls. Later calls are done
2037 * with FORCE_RELOAD_YES.
2039 ret = coalesce_paths(vecs, mpvec, NULL, force_reload, CMD_NONE);
2040 if (force_reload == FORCE_RELOAD_WEAK)
2041 force_reload = FORCE_RELOAD_YES;
2043 condlog(0, "configure failed while coalescing paths");
2048 * may need to remove some maps which are no longer relevant
2049 * e.g., due to blacklist changes in conf file
2051 if (coalesce_maps(vecs, mpvec)) {
2052 condlog(0, "configure failed while coalescing maps");
2058 sync_maps_state(mpvec);
2059 vector_foreach_slot(mpvec, mpp, i){
2060 remember_wwid(mpp->wwid);
2065 * purge dm of old maps
2070 * save new set of maps formed by considering current path state
2072 vector_free(vecs->mpvec);
2073 vecs->mpvec = mpvec;
2076 * start dm event waiter threads for these new maps
2078 vector_foreach_slot(vecs->mpvec, mpp, i) {
2079 if (setup_multipath(vecs, mpp)) {
2083 if (start_waiter_thread(mpp, vecs)) {
2084 remove_map(mpp, vecs, 1);
2092 need_to_delay_reconfig(struct vectors * vecs)
2094 struct multipath *mpp;
2097 if (!VECTOR_SIZE(vecs->mpvec))
2100 vector_foreach_slot(vecs->mpvec, mpp, i) {
2101 if (mpp->wait_for_udev)
2107 void rcu_free_config(struct rcu_head *head)
2109 struct config *conf = container_of(head, struct config, rcu);
2115 reconfigure (struct vectors * vecs)
2117 struct config * old, *conf;
2119 conf = load_config(DEFAULT_CONFIGFILE);
2124 * free old map and path vectors ... they use old conf state
2126 if (VECTOR_SIZE(vecs->mpvec))
2127 remove_maps_and_stop_waiters(vecs);
2129 free_pathvec(vecs->pathvec, FREE_PATHS);
2130 vecs->pathvec = NULL;
2131 delete_all_foreign();
2133 /* Re-read any timezone changes */
2136 dm_drv_version(conf->version, TGT_MPATH);
2138 conf->verbosity = verbosity;
2139 if (bindings_read_only)
2140 conf->bindings_read_only = bindings_read_only;
2141 if (conf->find_multipaths) {
2142 condlog(2, "find_multipaths is set: -n is implied");
2143 ignore_new_devs = 1;
2145 if (ignore_new_devs)
2146 conf->ignore_new_devs = ignore_new_devs;
2147 uxsock_timeout = conf->uxsock_timeout;
2149 old = rcu_dereference(multipath_conf);
2150 rcu_assign_pointer(multipath_conf, conf);
2151 call_rcu(&old->rcu, rcu_free_config);
2159 static struct vectors *
2162 struct vectors * vecs;
2164 vecs = (struct vectors *)MALLOC(sizeof(struct vectors));
2169 pthread_mutex_init(&vecs->lock.mutex, NULL);
2175 signal_set(int signo, void (*func) (int))
2178 struct sigaction sig;
2179 struct sigaction osig;
2181 sig.sa_handler = func;
2182 sigemptyset(&sig.sa_mask);
2185 r = sigaction(signo, &sig, &osig);
2190 return (osig.sa_handler);
2194 handle_signals(bool nonfatal)
2197 condlog(2, "exit (signal)");
2204 condlog(2, "reconfigure (signal)");
2205 set_config_state(DAEMON_CONFIGURE);
2207 if (log_reset_sig) {
2208 condlog(2, "reset log (signal)");
2209 pthread_mutex_lock(&logq_lock);
2210 log_reset("multipathd");
2211 pthread_mutex_unlock(&logq_lock);
2238 condlog(3, "SIGUSR2 received");
2246 /* block all signals */
2248 /* SIGPIPE occurs if logging fails */
2249 sigdelset(&set, SIGPIPE);
2250 pthread_sigmask(SIG_SETMASK, &set, NULL);
2252 /* Other signals will be unblocked in the uxlsnr thread */
2253 signal_set(SIGHUP, sighup);
2254 signal_set(SIGUSR1, sigusr1);
2255 signal_set(SIGUSR2, sigusr2);
2256 signal_set(SIGINT, sigend);
2257 signal_set(SIGTERM, sigend);
2258 signal_set(SIGPIPE, sigend);
2265 static struct sched_param sched_param = {
2266 .sched_priority = 99
2269 res = sched_setscheduler (0, SCHED_RR, &sched_param);
2272 condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
2279 #ifdef OOM_SCORE_ADJ_MIN
2281 char *file = "/proc/self/oom_score_adj";
2282 int score = OOM_SCORE_ADJ_MIN;
2285 char *file = "/proc/self/oom_adj";
2286 int score = OOM_ADJUST_MIN;
2292 envp = getenv("OOMScoreAdjust");
2294 condlog(3, "Using systemd provided OOMScoreAdjust");
2298 if (stat(file, &st) == 0){
2299 fp = fopen(file, "w");
2301 condlog(0, "couldn't fopen %s : %s", file,
2305 fprintf(fp, "%i", score);
2309 if (errno != ENOENT) {
2310 condlog(0, "couldn't stat %s : %s", file,
2314 #ifdef OOM_ADJUST_MIN
2315 file = "/proc/self/oom_adj";
2316 score = OOM_ADJUST_MIN;
2321 condlog(0, "couldn't adjust oom score");
2325 child (void * param)
2327 pthread_t check_thr, uevent_thr, uxlsnr_thr, uevq_thr;
2328 pthread_attr_t log_attr, misc_attr, uevent_attr;
2329 struct vectors * vecs;
2330 struct multipath * mpp;
2333 unsigned long checkint;
2334 int startup_done = 0;
2338 struct config *conf;
2341 mlockall(MCL_CURRENT | MCL_FUTURE);
2345 setup_thread_attr(&misc_attr, 64 * 1024, 0);
2346 setup_thread_attr(&uevent_attr, DEFAULT_UEVENT_STACKSIZE * 1024, 0);
2347 setup_thread_attr(&waiter_attr, 32 * 1024, 1);
2348 setup_thread_attr(&io_err_stat_attr, 32 * 1024, 0);
2351 setup_thread_attr(&log_attr, 64 * 1024, 0);
2352 log_thread_start(&log_attr);
2353 pthread_attr_destroy(&log_attr);
2355 pid_fd = pidfile_create(DEFAULT_PIDFILE, daemon_pid);
2357 condlog(1, "failed to create pidfile");
2363 post_config_state(DAEMON_START);
2365 condlog(2, "--------start up--------");
2366 condlog(2, "read " DEFAULT_CONFIGFILE);
2368 conf = load_config(DEFAULT_CONFIGFILE);
2373 conf->verbosity = verbosity;
2374 if (bindings_read_only)
2375 conf->bindings_read_only = bindings_read_only;
2376 if (ignore_new_devs)
2377 conf->ignore_new_devs = ignore_new_devs;
2378 uxsock_timeout = conf->uxsock_timeout;
2379 rcu_assign_pointer(multipath_conf, conf);
2380 if (init_checkers(conf->multipath_dir)) {
2381 condlog(0, "failed to initialize checkers");
2384 if (init_prio(conf->multipath_dir)) {
2385 condlog(0, "failed to initialize prioritizers");
2388 /* Failing this is non-fatal */
2390 init_foreign(conf->multipath_dir);
2392 setlogmask(LOG_UPTO(conf->verbosity + 3));
2394 envp = getenv("LimitNOFILE");
2397 condlog(2,"Using systemd provided open fds limit of %s", envp);
2398 } else if (conf->max_fds) {
2399 struct rlimit fd_limit;
2401 if (getrlimit(RLIMIT_NOFILE, &fd_limit) < 0) {
2402 condlog(0, "can't get open fds limit: %s",
2404 fd_limit.rlim_cur = 0;
2405 fd_limit.rlim_max = 0;
2407 if (fd_limit.rlim_cur < conf->max_fds) {
2408 fd_limit.rlim_cur = conf->max_fds;
2409 if (fd_limit.rlim_max < conf->max_fds)
2410 fd_limit.rlim_max = conf->max_fds;
2411 if (setrlimit(RLIMIT_NOFILE, &fd_limit) < 0) {
2412 condlog(0, "can't set open fds limit to "
2414 fd_limit.rlim_cur, fd_limit.rlim_max,
2417 condlog(3, "set open fds limit to %lu/%lu",
2418 fd_limit.rlim_cur, fd_limit.rlim_max);
2424 vecs = gvecs = init_vecs();
2432 envp = getenv("WATCHDOG_USEC");
2433 if (envp && sscanf(envp, "%lu", &checkint) == 1) {
2434 /* Value is in microseconds */
2435 conf->max_checkint = checkint / 1000000;
2436 /* Rescale checkint */
2437 if (conf->checkint > conf->max_checkint)
2438 conf->checkint = conf->max_checkint;
2440 conf->checkint = conf->max_checkint / 4;
2441 condlog(3, "enabling watchdog, interval %d max %d",
2442 conf->checkint, conf->max_checkint);
2443 use_watchdog = conf->checkint;
2447 * Startup done, invalidate configuration
2452 * Signal start of configuration
2454 post_config_state(DAEMON_CONFIGURE);
2456 init_path_check_interval(vecs);
2459 * Start uevent listener early to catch events
2461 if ((rc = pthread_create(&uevent_thr, &uevent_attr, ueventloop, udev))) {
2462 condlog(0, "failed to create uevent thread: %d", rc);
2465 pthread_attr_destroy(&uevent_attr);
2466 if ((rc = pthread_create(&uxlsnr_thr, &misc_attr, uxlsnrloop, vecs))) {
2467 condlog(0, "failed to create cli listener: %d", rc);
2474 if ((rc = pthread_create(&check_thr, &misc_attr, checkerloop, vecs))) {
2475 condlog(0,"failed to create checker loop thread: %d", rc);
2478 if ((rc = pthread_create(&uevq_thr, &misc_attr, uevqloop, vecs))) {
2479 condlog(0, "failed to create uevent dispatcher: %d", rc);
2482 pthread_attr_destroy(&misc_attr);
2484 while (running_state != DAEMON_SHUTDOWN) {
2485 pthread_cleanup_push(config_cleanup, NULL);
2486 pthread_mutex_lock(&config_lock);
2487 if (running_state != DAEMON_CONFIGURE &&
2488 running_state != DAEMON_SHUTDOWN) {
2489 pthread_cond_wait(&config_cond, &config_lock);
2491 pthread_cleanup_pop(1);
2492 if (running_state == DAEMON_CONFIGURE) {
2493 pthread_cleanup_push(cleanup_lock, &vecs->lock);
2495 pthread_testcancel();
2496 if (!need_to_delay_reconfig(vecs)) {
2499 conf = get_multipath_config();
2500 conf->delayed_reconfig = 1;
2501 put_multipath_config(conf);
2503 lock_cleanup_pop(vecs->lock);
2504 post_config_state(DAEMON_IDLE);
2506 if (!startup_done) {
2507 sd_notify(0, "READY=1");
2515 conf = get_multipath_config();
2516 if (conf->queue_without_daemon == QUE_NO_DAEMON_OFF)
2517 vector_foreach_slot(vecs->mpvec, mpp, i)
2518 dm_queue_if_no_path(mpp->alias, 0);
2519 put_multipath_config(conf);
2520 remove_maps_and_stop_waiters(vecs);
2521 unlock(&vecs->lock);
2523 pthread_cancel(check_thr);
2524 pthread_cancel(uevent_thr);
2525 pthread_cancel(uxlsnr_thr);
2526 pthread_cancel(uevq_thr);
2528 pthread_join(check_thr, NULL);
2529 pthread_join(uevent_thr, NULL);
2530 pthread_join(uxlsnr_thr, NULL);
2531 pthread_join(uevq_thr, NULL);
2533 stop_io_err_stat_thread();
2536 free_pathvec(vecs->pathvec, FREE_PATHS);
2537 vecs->pathvec = NULL;
2538 unlock(&vecs->lock);
2540 pthread_mutex_destroy(&vecs->lock.mutex);
2551 /* We're done here */
2552 condlog(3, "unlink pidfile");
2553 unlink(DEFAULT_PIDFILE);
2555 condlog(2, "--------shut down-------");
2561 * Freeing config must be done after condlog() and dm_lib_exit(),
2562 * because logging functions like dlog() and dm_write_log()
2563 * reference the config.
2565 conf = rcu_dereference(multipath_conf);
2566 rcu_assign_pointer(multipath_conf, NULL);
2567 call_rcu(&conf->rcu, rcu_free_config);
2570 pthread_attr_destroy(&waiter_attr);
2571 pthread_attr_destroy(&io_err_stat_attr);
2573 dbg_free_final(NULL);
2577 sd_notify(0, "ERRNO=0");
2583 sd_notify(0, "ERRNO=1");
2596 if( (pid = fork()) < 0){
2597 fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
2605 if ( (pid = fork()) < 0)
2606 fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
2611 fprintf(stderr, "cannot chdir to '/', continuing\n");
2613 dev_null_fd = open("/dev/null", O_RDWR);
2614 if (dev_null_fd < 0){
2615 fprintf(stderr, "cannot open /dev/null for input & output : %s\n",
2620 close(STDIN_FILENO);
2621 if (dup(dev_null_fd) < 0) {
2622 fprintf(stderr, "cannot dup /dev/null to stdin : %s\n",
2626 close(STDOUT_FILENO);
2627 if (dup(dev_null_fd) < 0) {
2628 fprintf(stderr, "cannot dup /dev/null to stdout : %s\n",
2632 close(STDERR_FILENO);
2633 if (dup(dev_null_fd) < 0) {
2634 fprintf(stderr, "cannot dup /dev/null to stderr : %s\n",
2639 daemon_pid = getpid();
2644 main (int argc, char *argv[])
2646 extern char *optarg;
2651 struct config *conf;
2653 ANNOTATE_BENIGN_RACE_SIZED(&multipath_conf, sizeof(multipath_conf),
2654 "Manipulated through RCU");
2655 ANNOTATE_BENIGN_RACE_SIZED(&running_state, sizeof(running_state),
2656 "Suppress complaints about unprotected running_state reads");
2657 ANNOTATE_BENIGN_RACE_SIZED(&uxsock_timeout, sizeof(uxsock_timeout),
2658 "Suppress complaints about this scalar variable");
2662 if (getuid() != 0) {
2663 fprintf(stderr, "need to be root\n");
2667 /* make sure we don't lock any path */
2669 fprintf(stderr, "can't chdir to root directory : %s\n",
2671 umask(umask(077) | 022);
2673 pthread_cond_init_mono(&config_cond);
2676 libmp_udev_set_sync_support(0);
2678 while ((arg = getopt(argc, argv, ":dsv:k::Bn")) != EOF ) {
2684 //debug=1; /* ### comment me out ### */
2687 if (sizeof(optarg) > sizeof(char *) ||
2688 !isdigit(optarg[0]))
2691 verbosity = atoi(optarg);
2697 conf = load_config(DEFAULT_CONFIGFILE);
2701 conf->verbosity = verbosity;
2702 uxsock_timeout = conf->uxsock_timeout;
2703 uxclnt(optarg, uxsock_timeout + 100);
2707 bindings_read_only = 1;
2710 ignore_new_devs = 1;
2713 fprintf(stderr, "Invalid argument '-%c'\n",
2718 if (optind < argc) {
2723 conf = load_config(DEFAULT_CONFIGFILE);
2727 conf->verbosity = verbosity;
2728 uxsock_timeout = conf->uxsock_timeout;
2729 memset(cmd, 0x0, CMDSIZE);
2730 while (optind < argc) {
2731 if (strchr(argv[optind], ' '))
2732 c += snprintf(c, s + CMDSIZE - c, "\"%s\" ", argv[optind]);
2734 c += snprintf(c, s + CMDSIZE - c, "%s ", argv[optind]);
2737 c += snprintf(c, s + CMDSIZE - c, "\n");
2738 uxclnt(s, uxsock_timeout + 100);
2744 if (!isatty(fileno(stdout)))
2745 setbuf(stdout, NULL);
2747 daemon_pid = getpid();
2759 return (child(NULL));
2762 void * mpath_pr_event_handler_fn (void * pathp )
2764 struct multipath * mpp;
2765 int i, ret, isFound;
2766 struct path * pp = (struct path *)pathp;
2767 struct prout_param_descriptor *param;
2768 struct prin_resp *resp;
2772 resp = mpath_alloc_prin_response(MPATH_PRIN_RKEY_SA);
2774 condlog(0,"%s Alloc failed for prin response", pp->dev);
2778 ret = prin_do_scsi_ioctl(pp->dev, MPATH_PRIN_RKEY_SA, resp, 0);
2779 if (ret != MPATH_PR_SUCCESS )
2781 condlog(0,"%s : pr in read keys service action failed. Error=%d", pp->dev, ret);
2785 condlog(3, " event pr=%d addlen=%d",resp->prin_descriptor.prin_readkeys.prgeneration,
2786 resp->prin_descriptor.prin_readkeys.additional_length );
2788 if (resp->prin_descriptor.prin_readkeys.additional_length == 0 )
2790 condlog(1, "%s: No key found. Device may not be registered.", pp->dev);
2791 ret = MPATH_PR_SUCCESS;
2794 condlog(2, "Multipath reservation_key: 0x%" PRIx64 " ",
2795 get_be64(mpp->reservation_key));
2798 for (i = 0; i < resp->prin_descriptor.prin_readkeys.additional_length/8; i++ )
2800 condlog(2, "PR IN READKEYS[%d] reservation key:",i);
2801 dumpHex((char *)&resp->prin_descriptor.prin_readkeys.key_list[i*8], 8 , -1);
2802 if (!memcmp(&mpp->reservation_key, &resp->prin_descriptor.prin_readkeys.key_list[i*8], 8))
2804 condlog(2, "%s: pr key found in prin readkeys response", mpp->alias);
2811 condlog(0, "%s: Either device not registered or ", pp->dev);
2812 condlog(0, "host is not authorised for registration. Skip path");
2813 ret = MPATH_PR_OTHER;
2817 param= malloc(sizeof(struct prout_param_descriptor));
2818 memset(param, 0 , sizeof(struct prout_param_descriptor));
2819 memcpy(param->sa_key, &mpp->reservation_key, 8);
2820 param->num_transportid = 0;
2822 condlog(3, "device %s:%s", pp->dev, pp->mpp->wwid);
2824 ret = prout_do_scsi_ioctl(pp->dev, MPATH_PROUT_REG_IGN_SA, 0, 0, param, 0);
2825 if (ret != MPATH_PR_SUCCESS )
2827 condlog(0,"%s: Reservation registration failed. Error: %d", pp->dev, ret);
2837 int mpath_pr_event_handle(struct path *pp)
2841 pthread_attr_t attr;
2842 struct multipath * mpp;
2846 if (get_be64(mpp->reservation_key))
2849 pthread_attr_init(&attr);
2850 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
2852 rc = pthread_create(&thread, NULL , mpath_pr_event_handler_fn, pp);
2854 condlog(0, "%s: ERROR; return code from pthread_create() is %d", pp->dev, rc);
2857 pthread_attr_destroy(&attr);
2858 rc = pthread_join(thread, NULL);