2 * Copyright (c) 2003, 2004, 2005 Christophe Varoqui
3 * Copyright (c) 2005 Benjamin Marzinski, Redhat
4 * Copyright (c) 2005 Kiyoshi Ueda, NEC
5 * Copyright (c) 2005 Patrick Caulfield, Redhat
6 * Copyright (c) 2005 Edward Goggin, EMC
15 #include <libdevmapper.h>
17 #include <mpath_cmd.h>
22 #include "devmapper.h"
25 #include "structs_vec.h"
28 #include "blacklist.h"
30 #include "discovery.h"
32 #include "switchgroup.h"
34 #include "configure.h"
35 #include "pgpolicies.h"
43 /* group paths in pg by host adapter
45 int group_by_host_adapter(struct pathgroup *pgp, vector adapters)
47 struct adapter_group *agp;
48 struct host_group *hgp;
49 struct path *pp, *pp1;
50 char adapter_name1[SLOT_NAME_SIZE];
51 char adapter_name2[SLOT_NAME_SIZE];
53 int found_hostgroup = 0;
55 while (VECTOR_SIZE(pgp->paths) > 0) {
57 pp = VECTOR_SLOT(pgp->paths, 0);
59 if (sysfs_get_host_adapter_name(pp, adapter_name1))
61 /* create a new host adapter group
63 agp = alloc_adaptergroup();
68 strncpy(agp->adapter_name, adapter_name1, SLOT_NAME_SIZE);
69 store_adaptergroup(adapters, agp);
71 /* create a new host port group
73 hgp = alloc_hostgroup();
76 if (store_hostgroup(agp->host_groups, hgp))
79 hgp->host_no = pp->sg_id.host_no;
81 if (store_path(hgp->paths, pp))
85 /* delete path from path group
87 vector_del_slot(pgp->paths, 0);
89 /* add all paths belonging to same host adapter
91 vector_foreach_slot(pgp->paths, pp1, i) {
92 if (sysfs_get_host_adapter_name(pp1, adapter_name2))
94 if (strcmp(adapter_name1, adapter_name2) == 0) {
96 vector_foreach_slot(agp->host_groups, hgp, j) {
97 if (hgp->host_no == pp1->sg_id.host_no) {
98 if (store_path(hgp->paths, pp1))
105 if (!found_hostgroup) {
106 /* this path belongs to new host port
107 * within this adapter
109 hgp = alloc_hostgroup();
113 if (store_hostgroup(agp->host_groups, hgp))
117 if (store_path(hgp->paths, pp1))
120 hgp->host_no = pp1->sg_id.host_no;
123 /* delete paths from original path_group
124 * as they are added into adapter group now
126 vector_del_slot(pgp->paths, i);
133 out: /* add back paths into pg as re-ordering failed
135 vector_foreach_slot(adapters, agp, i) {
136 vector_foreach_slot(agp->host_groups, hgp, j) {
137 while (VECTOR_SIZE(hgp->paths) > 0) {
138 pp = VECTOR_SLOT(hgp->paths, 0);
139 if (store_path(pgp->paths, pp))
140 condlog(3, "failed to restore "
141 "path %s into path group",
143 vector_del_slot(hgp->paths, 0);
147 free_adaptergroup(adapters);
151 /* re-order paths in pg by alternating adapters and host ports
152 * for optimized selection
154 int order_paths_in_pg_by_alt_adapters(struct pathgroup *pgp, vector adapters,
157 int next_adapter_index = 0;
158 struct adapter_group *agp;
159 struct host_group *hgp;
162 while (total_paths > 0) {
163 agp = VECTOR_SLOT(adapters, next_adapter_index);
165 condlog(0, "can't get adapter group %d", next_adapter_index);
169 hgp = VECTOR_SLOT(agp->host_groups, agp->next_host_index);
171 condlog(0, "can't get host group %d of adapter group %d", next_adapter_index, agp->next_host_index);
175 if (!hgp->num_paths) {
176 agp->next_host_index++;
177 agp->next_host_index %= agp->num_hosts;
178 next_adapter_index++;
179 next_adapter_index %= VECTOR_SIZE(adapters);
183 pp = VECTOR_SLOT(hgp->paths, 0);
185 if (store_path(pgp->paths, pp))
190 vector_del_slot(hgp->paths, 0);
194 agp->next_host_index++;
195 agp->next_host_index %= agp->num_hosts;
196 next_adapter_index++;
197 next_adapter_index %= VECTOR_SIZE(adapters);
200 /* all paths are added into path_group
201 * in crafted child order
206 /* round-robin: order paths in path group to alternate
207 * between all host adapters
209 int rr_optimize_path_order(struct pathgroup *pgp)
216 total_paths = VECTOR_SIZE(pgp->paths);
217 vector_foreach_slot(pgp->paths, pp, i) {
218 if (pp->sg_id.proto_id != SCSI_PROTOCOL_FCP &&
219 pp->sg_id.proto_id != SCSI_PROTOCOL_SAS &&
220 pp->sg_id.proto_id != SCSI_PROTOCOL_ISCSI &&
221 pp->sg_id.proto_id != SCSI_PROTOCOL_SRP) {
222 /* return success as default path order
223 * is maintained in path group
228 adapters = vector_alloc();
232 /* group paths in path group by host adapters
234 if (group_by_host_adapter(pgp, adapters)) {
235 /* already freed adapters */
236 condlog(3, "Failed to group paths by adapters");
240 /* re-order paths in pg to alternate between adapters and host ports
242 if (order_paths_in_pg_by_alt_adapters(pgp, adapters, total_paths)) {
243 condlog(3, "Failed to re-order paths in pg by adapters "
245 free_adaptergroup(adapters);
246 /* return failure as original paths are
252 free_adaptergroup(adapters);
257 setup_map (struct multipath * mpp, char * params, int params_size)
259 struct pathgroup * pgp;
263 * don't bother if devmap size is unknown
265 if (mpp->size <= 0) {
266 condlog(3, "%s: devmap size is unknown", mpp->alias);
271 * free features, selector, and hwhandler properties if they are being reused
273 free_multipath_attributes(mpp);
276 * properties selectors
278 select_pgfailback(mpp);
279 select_pgpolicy(mpp);
280 select_selector(mpp);
281 select_features(mpp);
282 select_hwhandler(mpp);
283 select_rr_weight(mpp);
285 select_no_path_retry(mpp);
289 select_fast_io_fail(mpp);
290 select_dev_loss(mpp);
291 select_reservation_key(mpp);
292 select_retain_hwhandler(mpp);
293 select_deferred_remove(mpp);
294 select_delay_watch_checks(mpp);
295 select_delay_wait_checks(mpp);
297 sysfs_set_scsi_tmo(mpp);
299 * assign paths to path groups -- start with no groups and all paths
303 vector_foreach_slot (mpp->pg, pgp, i)
304 free_pathgroup(pgp, KEEP_PATHS);
306 vector_free(mpp->pg);
309 if (mpp->pgpolicyfn && mpp->pgpolicyfn(mpp))
312 mpp->nr_active = pathcount(mpp, PATH_UP) + pathcount(mpp, PATH_GHOST);
315 * ponders each path group and determine highest prio pg
316 * to switch over (default to first)
318 mpp->bestpg = select_path_group(mpp);
320 /* re-order paths in all path groups in an optimized way
321 * for round-robin path selectors to get maximum throughput.
323 if (!strncmp(mpp->selector, "round-robin", 11)) {
324 vector_foreach_slot(mpp->pg, pgp, i) {
325 if (VECTOR_SIZE(pgp->paths) <= 2)
327 if (rr_optimize_path_order(pgp)) {
328 condlog(2, "cannot re-order paths for "
337 * transform the mp->pg vector of vectors of paths
338 * into a mp->params strings to feed the device-mapper
340 if (assemble_map(mpp, params, params_size)) {
341 condlog(0, "%s: problem assembing map", mpp->alias);
348 compute_pgid(struct pathgroup * pgp)
353 vector_foreach_slot (pgp->paths, pp, i)
358 pgcmp (struct multipath * mpp, struct multipath * cmpp)
361 struct pathgroup * pgp;
362 struct pathgroup * cpgp;
368 vector_foreach_slot (mpp->pg, pgp, i) {
371 vector_foreach_slot (cmpp->pg, cpgp, j) {
372 if (pgp->id == cpgp->id &&
373 !pathcmp(pgp, cpgp)) {
386 select_action (struct multipath * mpp, vector curmp, int force_reload)
388 struct multipath * cmpp;
389 struct multipath * cmpp_by_name;
391 cmpp = find_mp_by_wwid(curmp, mpp->wwid);
392 cmpp_by_name = find_mp_by_alias(curmp, mpp->alias);
396 condlog(2, "%s: rename %s to %s", mpp->wwid,
397 cmpp->alias, mpp->alias);
398 strncpy(mpp->alias_old, cmpp->alias, WWID_SIZE);
399 mpp->action = ACT_RENAME;
401 mpp->action = ACT_FORCERENAME;
404 mpp->action = ACT_CREATE;
405 condlog(3, "%s: set ACT_CREATE (map does not exist)",
411 condlog(2, "%s: remove (wwid changed)", mpp->alias);
412 dm_flush_map(mpp->alias);
413 strncpy(cmpp_by_name->wwid, mpp->wwid, WWID_SIZE);
414 drop_multipath(curmp, cmpp_by_name->wwid, KEEP_PATHS);
415 mpp->action = ACT_CREATE;
416 condlog(3, "%s: set ACT_CREATE (map wwid change)",
421 if (cmpp != cmpp_by_name) {
422 condlog(2, "%s: unable to rename %s to %s (%s is used by %s)",
423 mpp->wwid, cmpp->alias, mpp->alias,
424 mpp->alias, cmpp_by_name->wwid);
425 /* reset alias to existing alias */
427 mpp->alias = STRDUP(cmpp->alias);
428 mpp->action = ACT_NOTHING;
432 if (pathcount(mpp, PATH_UP) == 0) {
433 mpp->action = ACT_NOTHING;
434 condlog(3, "%s: set ACT_NOTHING (no usable path)",
439 mpp->action = ACT_RELOAD;
440 condlog(3, "%s: set ACT_RELOAD (forced by user)",
444 if (cmpp->size != mpp->size) {
445 mpp->action = ACT_RESIZE;
446 condlog(3, "%s: set ACT_RESIZE (size change)",
450 if (!mpp->no_path_retry &&
451 (strlen(cmpp->features) != strlen(mpp->features) ||
452 strcmp(cmpp->features, mpp->features))) {
453 mpp->action = ACT_RELOAD;
454 condlog(3, "%s: set ACT_RELOAD (features change)",
458 if (mpp->retain_hwhandler != RETAIN_HWHANDLER_ON &&
459 (strlen(cmpp->hwhandler) != strlen(mpp->hwhandler) ||
460 strncmp(cmpp->hwhandler, mpp->hwhandler,
461 strlen(mpp->hwhandler)))) {
462 mpp->action = ACT_RELOAD;
463 condlog(3, "%s: set ACT_RELOAD (hwhandler change)",
467 if (!cmpp->selector || strncmp(cmpp->selector, mpp->selector,
468 strlen(mpp->selector))) {
469 mpp->action = ACT_RELOAD;
470 condlog(3, "%s: set ACT_RELOAD (selector change)",
474 if (cmpp->minio != mpp->minio) {
475 mpp->action = ACT_RELOAD;
476 condlog(3, "%s: set ACT_RELOAD (minio change, %u->%u)",
477 mpp->alias, cmpp->minio, mpp->minio);
480 if (!cmpp->pg || VECTOR_SIZE(cmpp->pg) != VECTOR_SIZE(mpp->pg)) {
481 mpp->action = ACT_RELOAD;
482 condlog(3, "%s: set ACT_RELOAD (path group number change)",
486 if (pgcmp(mpp, cmpp)) {
487 mpp->action = ACT_RELOAD;
488 condlog(3, "%s: set ACT_RELOAD (path group topology change)",
492 if (cmpp->nextpg != mpp->bestpg) {
493 mpp->action = ACT_SWITCHPG;
494 condlog(3, "%s: set ACT_SWITCHPG (next path group change)",
498 mpp->action = ACT_NOTHING;
499 condlog(3, "%s: set ACT_NOTHING (map unchanged)",
505 reinstate_paths (struct multipath * mpp)
508 struct pathgroup * pgp;
514 vector_foreach_slot (mpp->pg, pgp, i) {
518 vector_foreach_slot (pgp->paths, pp, j) {
519 if (pp->state != PATH_UP &&
520 (pgp->status == PGSTATE_DISABLED ||
521 pgp->status == PGSTATE_ACTIVE))
524 if (pp->dmstate == PSTATE_FAILED) {
525 if (dm_reinstate_path(mpp->alias, pp->dev_t))
526 condlog(0, "%s: error reinstating",
535 lock_multipath (struct multipath * mpp, int lock)
537 struct pathgroup * pgp;
542 if (!mpp || !mpp->pg)
545 vector_foreach_slot (mpp->pg, pgp, i) {
548 vector_foreach_slot(pgp->paths, pp, j) {
549 if (lock && flock(pp->fd, LOCK_SH | LOCK_NB) &&
550 errno == EWOULDBLOCK)
553 flock(pp->fd, LOCK_UN);
558 vector_foreach_slot (mpp->pg, pgp, x) {
563 vector_foreach_slot(pgp->paths, pp, y) {
564 if (x == i && y >= j)
566 flock(pp->fd, LOCK_UN);
575 #define DOMAP_RETRY -1
578 #define DOMAP_EXIST 2
582 domap (struct multipath * mpp, char * params)
587 * last chance to quit before touching the devmaps
589 if (conf->cmd == CMD_DRY_RUN && mpp->action != ACT_NOTHING) {
590 print_multipath_topology(mpp, conf->verbosity);
594 if (mpp->action == ACT_CREATE &&
595 dm_map_present(mpp->alias)) {
596 condlog(3, "%s: map already present", mpp->alias);
597 mpp->action = ACT_RELOAD;
600 switch (mpp->action) {
606 dm_switchgroup(mpp->alias, mpp->bestpg);
608 * we may have avoided reinstating paths because there where in
609 * active or disabled PG. Now that the topology has changed,
612 reinstate_paths(mpp);
616 if (lock_multipath(mpp, 1)) {
617 condlog(3, "%s: failed to create map (in use)",
622 r = dm_addmap_create(mpp, params);
624 lock_multipath(mpp, 0);
628 r = dm_addmap_reload(mpp, params, 0);
632 r = dm_addmap_reload(mpp, params, 1);
636 r = dm_rename(mpp->alias_old, mpp->alias);
639 case ACT_FORCERENAME:
640 r = dm_rename(mpp->alias_old, mpp->alias);
642 r = dm_addmap_reload(mpp, params, 0);
651 * DM_DEVICE_CREATE, DM_DEVICE_RENAME, or DM_DEVICE_RELOAD
654 if (mpp->action == ACT_CREATE)
655 remember_wwid(mpp->wwid);
657 /* multipath client mode */
658 dm_switchgroup(mpp->alias, mpp->bestpg);
660 /* multipath daemon mode */
661 mpp->stat_map_loads++;
662 condlog(2, "%s: load table [0 %llu %s %s]", mpp->alias,
663 mpp->size, TGT_MPATH, params);
665 * Required action is over, reset for the stateful daemon.
666 * But don't do it for creation as we use in the caller the
667 * mpp->action to figure out whether to start the watievent checker.
669 if (mpp->action != ACT_CREATE)
670 mpp->action = ACT_NOTHING;
672 mpp->wait_for_udev = 1;
673 mpp->uev_wait_tick = conf->uev_wait_timeout;
683 deadmap (struct multipath * mpp)
686 struct pathgroup * pgp;
692 vector_foreach_slot (mpp->pg, pgp, i) {
696 vector_foreach_slot (pgp->paths, pp, j)
698 return 0; /* alive */
704 int check_daemon(void)
710 fd = mpath_connect();
714 if (send_packet(fd, "show daemon") != 0)
716 if (recv_packet(fd, &reply, conf->uxsock_timeout) != 0)
719 if (strstr(reply, "shutdown"))
727 mpath_disconnect(fd);
732 coalesce_paths (struct vectors * vecs, vector newmp, char * refwwid, int force_reload)
736 char params[PARAMS_SIZE];
737 struct multipath * mpp;
740 vector curmp = vecs->mpvec;
741 vector pathvec = vecs->pathvec;
743 /* ignore refwwid if it's empty */
744 if (refwwid && !strlen(refwwid))
748 vector_foreach_slot (pathvec, pp1, k) {
752 vector_foreach_slot (pathvec, pp1, k) {
753 /* skip this path for some reason */
755 /* 1. if path has no unique id or wwid blacklisted */
756 if (strlen(pp1->wwid) == 0 ||
757 filter_path(conf, pp1) > 0) {
758 orphan_path(pp1, "wwid blacklisted");
762 /* 2. if path already coalesced */
766 /* 3. if path has disappeared */
767 if (pp1->state == PATH_REMOVED) {
768 orphan_path(pp1, "path removed");
772 /* 4. path is out of scope */
773 if (refwwid && strncmp(pp1->wwid, refwwid, WWID_SIZE))
776 /* If find_multipaths was selected check if the path is valid */
777 if (!refwwid && !should_multipath(pp1, pathvec)) {
778 orphan_path(pp1, "only one path");
783 * at this point, we know we really got a new mp
785 mpp = add_map_with_path(vecs, pp1, 0);
789 if (pp1->priority == PRIO_UNDEF)
790 mpp->action = ACT_REJECT;
793 condlog(0, "%s: skip coalesce (no paths)", mpp->alias);
794 remove_map(mpp, vecs, 0);
798 for (i = k + 1; i < VECTOR_SIZE(pathvec); i++) {
799 pp2 = VECTOR_SLOT(pathvec, i);
801 if (strcmp(pp1->wwid, pp2->wwid))
804 if (!mpp->size && pp2->size)
805 mpp->size = pp2->size;
807 if (mpp->size && pp2->size &&
808 pp2->size != mpp->size) {
810 * ouch, avoid feeding that to the DM
812 condlog(0, "%s: size %llu, expected %llu. "
813 "Discard", pp2->dev_t, pp2->size,
815 mpp->action = ACT_REJECT;
817 if (pp2->priority == PRIO_UNDEF)
818 mpp->action = ACT_REJECT;
820 verify_paths(mpp, vecs);
823 if (setup_map(mpp, params, PARAMS_SIZE)) {
824 remove_map(mpp, vecs, 0);
828 if (mpp->action == ACT_UNDEF)
829 select_action(mpp, curmp, force_reload);
831 r = domap(mpp, params);
833 if (r == DOMAP_FAIL || r == DOMAP_RETRY) {
834 condlog(3, "%s: domap (%u) failure "
835 "for create/reload map",
837 if (r == DOMAP_FAIL) {
838 condlog(2, "%s: %s map",
839 mpp->alias, (mpp->action == ACT_CREATE)?
840 "ignoring" : "removing");
841 remove_map(mpp, vecs, 0);
843 } else /* if (r == DOMAP_RETRY) */
849 if (!conf->daemon && !conf->allow_queueing && !check_daemon()) {
850 if (mpp->no_path_retry != NO_PATH_RETRY_UNDEF &&
851 mpp->no_path_retry != NO_PATH_RETRY_FAIL)
852 condlog(3, "%s: multipathd not running, unset "
853 "queue_if_no_path feature", mpp->alias);
854 if (!dm_queue_if_no_path(mpp->alias, 0))
855 remove_feature(&mpp->features,
858 else if (mpp->no_path_retry != NO_PATH_RETRY_UNDEF) {
859 if (mpp->no_path_retry == NO_PATH_RETRY_FAIL) {
860 condlog(3, "%s: unset queue_if_no_path feature",
862 if (!dm_queue_if_no_path(mpp->alias, 0))
863 remove_feature(&mpp->features,
866 condlog(3, "%s: set queue_if_no_path feature",
868 if (!dm_queue_if_no_path(mpp->alias, 1))
869 add_feature(&mpp->features,
874 if (!conf->daemon && mpp->action != ACT_NOTHING)
875 print_multipath_topology(mpp, conf->verbosity);
878 if (mpp->action != ACT_REJECT) {
879 if (!vector_alloc_slot(newmp))
881 vector_set_slot(newmp, mpp);
884 remove_map(mpp, vecs, 0);
888 * Flush maps with only dead paths (ie not in sysfs)
889 * Keep maps with only failed paths
892 vector_foreach_slot (newmp, mpp, i) {
893 char alias[WWID_SIZE];
899 strncpy(alias, mpp->alias, WWID_SIZE);
901 if ((j = find_slot(newmp, (void *)mpp)) != -1)
902 vector_del_slot(newmp, j);
904 remove_map(mpp, vecs, 0);
906 if (dm_flush_map(alias))
907 condlog(2, "%s: remove failed (dead)",
910 condlog(2, "%s: remove (dead)", alias);
923 get_refwwid (char * dev, enum devtypes dev_type, vector pathvec, char **wwid)
927 char buff[FILE_NAME_SIZE];
928 char * refwwid = NULL, tmpwwid[WWID_SIZE];
934 if (dev_type == DEV_NONE)
937 if (dev_type == DEV_DEVNODE) {
938 if (basenamecpy(dev, buff, FILE_NAME_SIZE) == 0) {
939 condlog(1, "basename failed for '%s' (%s)",
944 pp = find_path_by_dev(pathvec, buff);
946 struct udev_device *udevice = udev_device_new_from_subsystem_sysname(conf->udev, "block", buff);
949 condlog(2, "%s: can't get udev device", buff);
952 ret = store_pathinfo(pathvec, conf->hwtable, udevice,
953 DI_SYSFS | DI_WWID, &pp);
954 udev_device_unref(udevice);
957 condlog(0, "%s: can't store path info",
962 if (pp->udev && pp->uid_attribute &&
963 filter_property(conf, pp->udev) > 0)
970 if (dev_type == DEV_DEVT) {
972 if (devt2devname(buff, FILE_NAME_SIZE, dev)) {
973 condlog(0, "%s: cannot find block device\n", dev);
976 pp = find_path_by_dev(pathvec, buff);
978 struct udev_device *udevice = udev_device_new_from_devnum(conf->udev, 'b', parse_devt(dev));
981 condlog(2, "%s: can't get udev device", dev);
984 ret = store_pathinfo(pathvec, conf->hwtable, udevice,
985 DI_SYSFS | DI_WWID, &pp);
986 udev_device_unref(udevice);
989 condlog(0, "%s can't store path info",
994 if (pp->udev && pp->uid_attribute &&
995 filter_property(conf, pp->udev) > 0)
1002 if (dev_type == DEV_UEVENT) {
1003 struct udev_device *udevice = udev_device_new_from_environment(conf->udev);
1006 condlog(2, "%s: can't get udev device", dev);
1009 ret = store_pathinfo(pathvec, conf->hwtable, udevice,
1010 DI_SYSFS | DI_WWID, &pp);
1011 udev_device_unref(udevice);
1014 condlog(0, "%s: can't store path info",
1018 if (pp->udev && pp->uid_attribute &&
1019 filter_property(conf, pp->udev) > 0)
1026 if (dev_type == DEV_DEVMAP) {
1028 if (((dm_get_uuid(dev, tmpwwid)) == 0) && (strlen(tmpwwid))) {
1036 if (get_user_friendly_wwid(dev, tmpwwid,
1037 conf->bindings_file) == 0) {
1043 * or may be an alias
1045 refwwid = get_mpe_wwid(dev);
1048 * or directly a wwid
1054 if (refwwid && strlen(refwwid)) {
1055 if (filter_wwid(conf->blist_wwid, conf->elist_wwid,
1061 if (refwwid && strlen(refwwid)) {
1062 *wwid = STRDUP(refwwid);
1069 extern int reload_map(struct vectors *vecs, struct multipath *mpp, int refresh)
1071 char params[PARAMS_SIZE] = {0};
1075 update_mpp_paths(mpp, vecs->pathvec);
1077 vector_foreach_slot (mpp->paths, pp, i) {
1078 r = pathinfo(pp, conf->hwtable, DI_PRIO);
1080 condlog(2, "%s: failed to refresh pathinfo",
1086 if (setup_map(mpp, params, PARAMS_SIZE)) {
1087 condlog(0, "%s: failed to setup map", mpp->alias);
1090 select_action(mpp, vecs->mpvec, 1);
1092 r = domap(mpp, params);
1093 if (r == DOMAP_FAIL || r == DOMAP_RETRY) {
1094 condlog(3, "%s: domap (%u) failure "
1095 "for reload map", mpp->alias, r);
1098 if (mpp->no_path_retry != NO_PATH_RETRY_UNDEF) {
1099 if (mpp->no_path_retry == NO_PATH_RETRY_FAIL)
1100 dm_queue_if_no_path(mpp->alias, 0);
1102 dm_queue_if_no_path(mpp->alias, 1);