2 Copyright (c) 2018 Martin Wilck, SUSE Linux GmbH
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License
6 as published by the Free Software Foundation; either version 2
7 of the License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>.
19 #include <sys/types.h>
20 #include <sys/sysmacros.h>
41 static const char nvme_vendor[] = "NVMe";
42 static const char N_A[] = "n/a";
46 struct nvme_pathgroup {
47 struct gen_pathgroup gen;
48 struct _vector pathvec;
53 struct udev_device *udev;
54 struct udev_device *ctl;
58 * The kernel works in failover mode.
59 * Each path has a separate path group.
61 struct nvme_pathgroup pg;
65 struct gen_multipath gen;
66 struct udev_device *udev;
67 struct udev_device *subsys;
74 #define NAME_LEN 64 /* buffer length for temp attributes */
75 #define const_gen_mp_to_nvme(g) ((const struct nvme_map*)(g))
76 #define gen_mp_to_nvme(g) ((struct nvme_map*)(g))
77 #define nvme_mp_to_gen(n) &((n)->gen)
78 #define const_gen_pg_to_nvme(g) ((const struct nvme_pathgroup*)(g))
79 #define gen_pg_to_nvme(g) ((struct nvme_pathgroup*)(g))
80 #define nvme_pg_to_gen(n) &((n)->gen)
81 #define const_gen_path_to_nvme(g) ((const struct nvme_path*)(g))
82 #define gen_path_to_nvme(g) ((struct nvme_path*)(g))
83 #define nvme_path_to_gen(n) &((n)->gen)
84 #define nvme_pg_to_path(x) (VECTOR_SLOT(&((x)->pathvec), 0))
85 #define nvme_path_to_pg(x) &((x)->pg)
87 static void cleanup_nvme_path(struct nvme_path *path)
89 condlog(5, "%s: %p %p", __func__, path, path->udev);
91 udev_device_unref(path->udev);
92 vector_reset(&path->pg.pathvec);
94 /* ctl is implicitly referenced by udev, no need to unref */
98 static void cleanup_nvme_map(struct nvme_map *map)
100 struct nvme_pathgroup *pg;
101 struct nvme_path *path;
104 vector_foreach_slot_backwards(&map->pgvec, pg, i) {
105 path = nvme_pg_to_path(pg);
106 condlog(5, "%s: %d %p", __func__, i, path);
107 cleanup_nvme_path(path);
108 vector_del_slot(&map->pgvec, i);
110 vector_reset(&map->pgvec);
112 udev_device_unref(map->udev);
113 /* subsys is implicitly referenced by udev, no need to unref */
117 static const struct _vector*
118 nvme_mp_get_pgs(const struct gen_multipath *gmp) {
119 const struct nvme_map *nvme = const_gen_mp_to_nvme(gmp);
121 /* This is all used under the lock, no need to copy */
126 nvme_mp_rel_pgs(const struct gen_multipath *gmp, const struct _vector *v)
131 static void rstrip(char *str)
135 for (n = strlen(str) - 1; n >= 0 && str[n] == ' '; n--);
139 static int snprint_nvme_map(const struct gen_multipath *gmp,
140 char *buff, int len, char wildcard)
142 const struct nvme_map *nvm = const_gen_mp_to_nvme(gmp);
148 return snprintf(buff, len, "%s",
149 udev_device_get_sysname(nvm->udev));
151 return snprintf(buff, len, "%s:nsid.%s",
152 udev_device_get_sysattr_value(nvm->subsys,
154 udev_device_get_sysattr_value(nvm->udev,
157 return snprintf(buff, len, "%s",
158 udev_device_get_sysattr_value(nvm->udev,
161 return snprintf(buff, len, "%u", nvm->nr_live);
163 return snprintf(buff, len, "%s",
164 udev_device_get_sysattr_value(nvm->udev,
167 return snprintf(buff, len, "%s", nvme_vendor);
170 snprintf(fld, sizeof(fld), "%s",
171 udev_device_get_sysattr_value(nvm->subsys,
175 return snprintf(buff, len, "%s", fld);
176 return snprintf(buff, len, "%s,%s,%s", nvme_vendor, fld,
177 udev_device_get_sysattr_value(nvm->subsys,
180 return snprintf(buff, len, "%s",
181 udev_device_get_sysattr_value(nvm->subsys,
184 val = udev_device_get_sysattr_value(nvm->udev, "ro");
186 return snprintf(buff, len, "%s", "ro");
188 return snprintf(buff, len, "%s", "rw");
190 return snprintf(buff, len, "%s", THIS);
192 if (nvm->ana_supported == YNU_YES)
193 return snprintf(buff, len, "ANA");
198 return snprintf(buff, len, N_A);
201 static const struct _vector*
202 nvme_pg_get_paths(const struct gen_pathgroup *gpg) {
203 const struct nvme_pathgroup *gp = const_gen_pg_to_nvme(gpg);
205 /* This is all used under the lock, no need to copy */
210 nvme_pg_rel_paths(const struct gen_pathgroup *gpg, const struct _vector *v)
215 static int snprint_hcil(const struct nvme_path *np, char *buf, int len)
217 unsigned int nvmeid, ctlid, nsid;
219 const char *sysname = udev_device_get_sysname(np->udev);
221 rc = sscanf(sysname, "nvme%uc%un%u", &nvmeid, &ctlid, &nsid);
223 condlog(1, "%s: failed to scan %s", __func__, sysname);
224 rc = snprintf(buf, len, "(ERR:%s)", sysname);
226 rc = snprintf(buf, len, "%u:%u:%u", nvmeid, ctlid, nsid);
227 return (rc < len ? rc : len);
230 static int snprint_nvme_path(const struct gen_path *gp,
231 char *buff, int len, char wildcard)
233 const struct nvme_path *np = const_gen_path_to_nvme(gp);
236 struct udev_device *pci;
240 return snprintf(buff, len, "%s",
241 udev_device_get_sysattr_value(np->udev,
244 return snprintf(buff, len, "%s",
245 udev_device_get_sysname(np->udev));
247 return snprint_hcil(np, buff, len);
249 devt = udev_device_get_devnum(np->udev);
250 return snprintf(buff, len, "%u:%u", major(devt), minor(devt));
252 if (sysfs_attr_get_value(np->ctl, "state",
253 fld, sizeof(fld)) > 0)
254 return snprintf(buff, len, "%s", fld);
257 if (sysfs_attr_get_value(np->udev, "ana_state", fld,
259 return snprintf(buff, len, "%s", fld);
262 if (sysfs_attr_get_value(np->udev, "ana_state", fld,
265 if (!strcmp(fld, "optimized"))
266 return snprintf(buff, len, "%d", 50);
267 else if (!strcmp(fld, "non-optimized"))
268 return snprintf(buff, len, "%d", 10);
270 return snprintf(buff, len, "%d", 0);
274 snprintf(fld, sizeof(fld), "%s",
275 udev_device_get_sysattr_value(np->ctl,
278 return snprintf(buff, len, "%s,%s,%s", nvme_vendor, fld,
279 udev_device_get_sysattr_value(np->ctl,
282 return snprintf(buff, len, "%s",
283 udev_device_get_sysattr_value(np->udev,
286 return snprintf(buff, len, "%s",
287 udev_device_get_sysattr_value(np->ctl,
290 return snprintf(buff, len, "%s",
291 udev_device_get_sysname(np->map->udev));
294 return snprintf(buff, len, "%s:%s",
295 udev_device_get_sysattr_value(np->ctl,
297 udev_device_get_sysattr_value(np->ctl,
300 return snprintf(buff, len, "[%s]", THIS);
302 pci = udev_device_get_parent_with_subsystem_devtype(np->ctl,
306 return snprintf(buff, len, "PCI:%s",
307 udev_device_get_sysname(pci));
312 return snprintf(buff, len, "%s", N_A);
316 static int snprint_nvme_pg(const struct gen_pathgroup *gmp,
317 char *buff, int len, char wildcard)
319 const struct nvme_pathgroup *pg = const_gen_pg_to_nvme(gmp);
320 const struct nvme_path *path = nvme_pg_to_path(pg);
324 return snprint_nvme_path(nvme_path_to_gen(path),
327 return snprint_nvme_path(nvme_path_to_gen(path),
330 return snprintf(buff, len, N_A);
334 static int nvme_style(const struct gen_multipath* gm,
335 char *buf, int len, int verbosity)
337 int n = snprintf(buf, len, "%%w [%%G]:%%d %%s");
339 return (n < len ? n : len - 1);
342 static const struct gen_multipath_ops nvme_map_ops = {
343 .get_pathgroups = nvme_mp_get_pgs,
344 .rel_pathgroups = nvme_mp_rel_pgs,
346 .snprint = snprint_nvme_map,
349 static const struct gen_pathgroup_ops nvme_pg_ops __attribute__((unused)) = {
350 .get_paths = nvme_pg_get_paths,
351 .rel_paths = nvme_pg_rel_paths,
352 .snprint = snprint_nvme_pg,
355 static const struct gen_path_ops nvme_path_ops __attribute__((unused)) = {
356 .snprint = snprint_nvme_path,
360 pthread_mutex_t mutex;
365 void lock(struct context *ctx)
367 pthread_mutex_lock(&ctx->mutex);
370 void unlock(void *arg)
372 struct context *ctx = arg;
374 pthread_mutex_unlock(&ctx->mutex);
377 static int _delete_all(struct context *ctx)
380 int n = VECTOR_SIZE(ctx->mpvec), i;
383 return FOREIGN_IGNORED;
385 vector_foreach_slot_backwards(ctx->mpvec, nm, i) {
386 vector_del_slot(ctx->mpvec, i);
387 cleanup_nvme_map(nm);
392 int delete_all(struct context *ctx)
396 condlog(5, "%s called for \"%s\"", __func__, THIS);
399 pthread_cleanup_push(unlock, ctx);
400 rc = _delete_all(ctx);
401 pthread_cleanup_pop(1);
406 void cleanup(struct context *ctx)
408 (void)delete_all(ctx);
412 * Locking is not strictly necessary here, locking in foreign.c
413 * makes sure that no other code is called with this ctx any more.
414 * But this should make static checkers feel better.
416 pthread_cleanup_push(unlock, ctx);
418 udev_unref(ctx->udev);
420 vector_free(ctx->mpvec);
423 pthread_cleanup_pop(1);
424 pthread_mutex_destroy(&ctx->mutex);
429 struct context *init(unsigned int api, const char *name)
433 if (api > LIBMP_FOREIGN_API) {
434 condlog(0, "%s: api version mismatch: %08x > %08x\n",
435 __func__, api, LIBMP_FOREIGN_API);
439 if ((ctx = calloc(1, sizeof(*ctx)))== NULL)
442 pthread_mutex_init(&ctx->mutex, NULL);
444 ctx->udev = udev_new();
445 if (ctx->udev == NULL)
448 ctx->mpvec = vector_alloc();
449 if (ctx->mpvec == NULL)
459 static struct nvme_map *_find_nvme_map_by_devt(const struct context *ctx,
465 if (ctx->mpvec == NULL)
468 vector_foreach_slot(ctx->mpvec, nm, i) {
469 if (nm->devt == devt)
476 static struct nvme_path *
477 _find_path_by_syspath(struct nvme_map *map, const char *syspath)
479 struct nvme_pathgroup *pg;
484 ppath = realpath(syspath, real);
486 condlog(1, "%s: %s: error in realpath", __func__, THIS);
490 vector_foreach_slot(&map->pgvec, pg, i) {
491 struct nvme_path *path = nvme_pg_to_path(pg);
494 udev_device_get_syspath(path->udev)))
497 condlog(4, "%s: %s: %s not found", __func__, THIS, ppath);
501 static void _udev_device_unref(void *p)
503 udev_device_unref(p);
506 static void _udev_enumerate_unref(void *p)
508 udev_enumerate_unref(p);
511 static int _dirent_controller(const struct dirent *di)
513 static const char nvme_prefix[] = "nvme";
516 #ifdef _DIRENT_HAVE_D_TYPE
517 if (di->d_type != DT_LNK)
520 if (strncmp(di->d_name, nvme_prefix, sizeof(nvme_prefix) - 1))
522 p = di->d_name + sizeof(nvme_prefix) - 1;
523 if (*p == '\0' || !isdigit(*p))
525 for (++p; *p != '\0'; ++p)
531 /* Find the block device for a given nvme controller */
532 struct udev_device *get_ctrl_blkdev(const struct context *ctx,
533 struct udev_device *ctrl)
535 struct udev_list_entry *item;
536 struct udev_device *blkdev = NULL;
537 struct udev_enumerate *enm = udev_enumerate_new(ctx->udev);
542 pthread_cleanup_push(_udev_enumerate_unref, enm);
543 if (udev_enumerate_add_match_parent(enm, ctrl) < 0)
545 if (udev_enumerate_add_match_subsystem(enm, "block"))
548 if (udev_enumerate_scan_devices(enm) < 0) {
549 condlog(1, "%s: %s: error enumerating devices", __func__, THIS);
553 for (item = udev_enumerate_get_list_entry(enm);
555 item = udev_list_entry_get_next(item)) {
556 struct udev_device *tmp;
558 tmp = udev_device_new_from_syspath(ctx->udev,
559 udev_list_entry_get_name(item));
562 if (!strcmp(udev_device_get_devtype(tmp), "disk")) {
566 udev_device_unref(tmp);
570 condlog(1, "%s: %s: failed to get blockdev for %s",
571 __func__, THIS, udev_device_get_sysname(ctrl));
573 condlog(5, "%s: %s: got %s", __func__, THIS,
574 udev_device_get_sysname(blkdev));
576 pthread_cleanup_pop(1);
580 static void test_ana_support(struct nvme_map *map, struct udev_device *ctl)
587 if (map->ana_supported != YNU_UNDEF)
590 dev_t = udev_device_get_sysattr_value(ctl, "dev");
591 if (snprintf(sys_path, sizeof(sys_path), "/dev/char/%s", dev_t)
595 fd = open(sys_path, O_RDONLY);
597 condlog(2, "%s: error opening %s", __func__, sys_path);
601 pthread_cleanup_push(close_fd, (void *)fd);
602 rc = nvme_id_ctrl_ana(fd, NULL);
604 condlog(2, "%s: error in nvme_id_ctrl: %s", __func__,
607 map->ana_supported = (rc == 1 ? YNU_YES : YNU_NO);
608 condlog(3, "%s: NVMe ctrl %s: ANA %s supported", __func__, dev_t,
609 rc == 1 ? "is" : "is not");
611 pthread_cleanup_pop(1);
614 static void _find_controllers(struct context *ctx, struct nvme_map *map)
616 char pathbuf[PATH_MAX], realbuf[PATH_MAX];
617 struct dirent **di = NULL;
618 struct scandir_result sr;
619 struct udev_device *subsys;
620 struct nvme_pathgroup *pg;
621 struct nvme_path *path;
624 if (map == NULL || map->udev == NULL)
627 vector_foreach_slot(&map->pgvec, pg, i) {
628 path = nvme_pg_to_path(pg);
632 subsys = udev_device_get_parent_with_subsystem_devtype(map->udev,
635 if (subsys == NULL) {
636 condlog(1, "%s: %s: BUG: no NVME subsys for %s", __func__, THIS,
637 udev_device_get_sysname(map->udev));
641 n = snprintf(pathbuf, sizeof(pathbuf), "%s",
642 udev_device_get_syspath(subsys));
643 r = scandir(pathbuf, &di, _dirent_controller, alphasort);
646 condlog(3, "%s: %s: no controllers for %s", __func__, THIS,
647 udev_device_get_sysname(map->udev));
650 condlog(1, "%s: %s: error %d scanning controllers of %s",
651 __func__, THIS, errno,
652 udev_device_get_sysname(map->udev));
658 pthread_cleanup_push_cast(free_scandir_result, &sr);
659 for (i = 0; i < r; i++) {
660 char *fn = di[i]->d_name;
661 struct udev_device *ctrl, *udev;
663 if (snprintf(pathbuf + n, sizeof(pathbuf) - n, "/%s", fn)
664 >= sizeof(pathbuf) - n)
666 if (realpath(pathbuf, realbuf) == NULL) {
667 condlog(3, "%s: %s: realpath: %s", __func__, THIS,
671 condlog(4, "%s: %s: found %s", __func__, THIS, realbuf);
673 ctrl = udev_device_new_from_syspath(ctx->udev, realbuf);
675 condlog(1, "%s: %s: failed to get udev device for %s",
676 __func__, THIS, realbuf);
680 pthread_cleanup_push(_udev_device_unref, ctrl);
681 udev = get_ctrl_blkdev(ctx, ctrl);
683 * We give up the reference to the nvme device here and get
684 * it back from the child below.
685 * This way we don't need to worry about unreffing it.
687 pthread_cleanup_pop(1);
692 path = _find_path_by_syspath(map,
693 udev_device_get_syspath(udev));
696 condlog(4, "%s: %s already known",
701 path = calloc(1, sizeof(*path));
705 path->gen.ops = &nvme_path_ops;
709 path->ctl = udev_device_get_parent_with_subsystem_devtype
710 (udev, "nvme", NULL);
711 if (path->ctl == NULL) {
712 condlog(1, "%s: %s: failed to get controller for %s",
714 cleanup_nvme_path(path);
717 test_ana_support(map, path->ctl);
719 path->pg.gen.ops = &nvme_pg_ops;
720 if (vector_alloc_slot(&path->pg.pathvec) == NULL) {
721 cleanup_nvme_path(path);
724 vector_set_slot(&path->pg.pathvec, path);
725 if (vector_alloc_slot(&map->pgvec) == NULL) {
726 cleanup_nvme_path(path);
729 vector_set_slot(&map->pgvec, &path->pg);
730 condlog(3, "%s: %s: new path %s added to %s",
731 __func__, THIS, udev_device_get_sysname(udev),
732 udev_device_get_sysname(map->udev));
734 pthread_cleanup_pop(1);
737 vector_foreach_slot_backwards(&map->pgvec, pg, i) {
738 path = nvme_pg_to_path(pg);
740 condlog(1, "path %d not found in %s any more",
741 i, udev_device_get_sysname(map->udev));
742 vector_del_slot(&map->pgvec, i);
743 cleanup_nvme_path(path);
745 static const char live_state[] = "live";
748 if ((sysfs_attr_get_value(path->ctl, "state", state,
749 sizeof(state)) > 0) &&
750 !strncmp(state, live_state, sizeof(live_state) - 1))
754 condlog(3, "%s: %s: map %s has %d/%d live paths", __func__, THIS,
755 udev_device_get_sysname(map->udev), map->nr_live,
756 VECTOR_SIZE(&map->pgvec));
759 static int _add_map(struct context *ctx, struct udev_device *ud,
760 struct udev_device *subsys)
762 dev_t devt = udev_device_get_devnum(ud);
763 struct nvme_map *map;
765 if (_find_nvme_map_by_devt(ctx, devt) != NULL)
768 map = calloc(1, sizeof(*map));
773 map->udev = udev_device_ref(ud);
775 * subsys is implicitly referenced by map->udev,
776 * no need to take a reference here.
778 map->subsys = subsys;
779 map->gen.ops = &nvme_map_ops;
781 if (vector_alloc_slot(ctx->mpvec) == NULL) {
782 cleanup_nvme_map(map);
785 vector_set_slot(ctx->mpvec, map);
786 _find_controllers(ctx, map);
788 return FOREIGN_CLAIMED;
791 int add(struct context *ctx, struct udev_device *ud)
793 struct udev_device *subsys;
796 condlog(5, "%s called for \"%s\"", __func__, THIS);
800 if (strcmp("disk", udev_device_get_devtype(ud)))
801 return FOREIGN_IGNORED;
803 subsys = udev_device_get_parent_with_subsystem_devtype(ud,
807 return FOREIGN_IGNORED;
810 pthread_cleanup_push(unlock, ctx);
811 rc = _add_map(ctx, ud, subsys);
812 pthread_cleanup_pop(1);
814 if (rc == FOREIGN_CLAIMED)
815 condlog(3, "%s: %s: added map %s", __func__, THIS,
816 udev_device_get_sysname(ud));
817 else if (rc != FOREIGN_OK)
818 condlog(1, "%s: %s: retcode %d adding %s",
819 __func__, THIS, rc, udev_device_get_sysname(ud));
824 int change(struct context *ctx, struct udev_device *ud)
826 condlog(5, "%s called for \"%s\"", __func__, THIS);
827 return FOREIGN_IGNORED;
830 static int _delete_map(struct context *ctx, struct udev_device *ud)
833 struct nvme_map *map;
834 dev_t devt = udev_device_get_devnum(ud);
836 map = _find_nvme_map_by_devt(ctx, devt);
838 return FOREIGN_IGNORED;
840 k = find_slot(ctx->mpvec, map);
844 vector_del_slot(ctx->mpvec, k);
846 cleanup_nvme_map(map);
851 int delete(struct context *ctx, struct udev_device *ud)
855 condlog(5, "%s called for \"%s\"", __func__, THIS);
861 pthread_cleanup_push(unlock, ctx);
862 rc = _delete_map(ctx, ud);
863 pthread_cleanup_pop(1);
865 if (rc == FOREIGN_OK)
866 condlog(3, "%s: %s: map %s deleted", __func__, THIS,
867 udev_device_get_sysname(ud));
868 else if (rc != FOREIGN_IGNORED)
869 condlog(1, "%s: %s: retcode %d deleting map %s", __func__,
870 THIS, rc, udev_device_get_sysname(ud));
875 void _check(struct context *ctx)
877 struct gen_multipath *gm;
880 vector_foreach_slot(ctx->mpvec, gm, i) {
881 struct nvme_map *map = gen_mp_to_nvme(gm);
883 _find_controllers(ctx, map);
887 void check(struct context *ctx)
889 condlog(4, "%s called for \"%s\"", __func__, THIS);
891 pthread_cleanup_push(unlock, ctx);
893 pthread_cleanup_pop(1);
898 * It's safe to pass our internal pointer, this is only used under the lock.
900 const struct _vector *get_multipaths(const struct context *ctx)
902 condlog(5, "%s called for \"%s\"", __func__, THIS);
906 void release_multipaths(const struct context *ctx, const struct _vector *mpvec)
908 condlog(5, "%s called for \"%s\"", __func__, THIS);
913 * It's safe to pass our internal pointer, this is only used under the lock.
915 const struct _vector * get_paths(const struct context *ctx)
918 const struct gen_multipath *gm;
921 condlog(5, "%s called for \"%s\"", __func__, THIS);
922 vector_foreach_slot(ctx->mpvec, gm, i) {
923 const struct nvme_map *nm = const_gen_mp_to_nvme(gm);
924 paths = vector_convert(paths, &nm->pgvec,
925 struct nvme_pathgroup, nvme_pg_to_path);
930 void release_paths(const struct context *ctx, const struct _vector *mpvec)
932 condlog(5, "%s called for \"%s\"", __func__, THIS);
933 vector_free_const(mpvec);
936 /* compile-time check whether all methods are present and correctly typed */
937 #define _METHOD_INIT(x) .x = x
938 static struct foreign __methods __attribute__((unused)) = {
940 _METHOD_INIT(cleanup),
941 _METHOD_INIT(change),
942 _METHOD_INIT(delete),
943 _METHOD_INIT(delete_all),
946 _METHOD_INIT(unlock),
947 _METHOD_INIT(get_multipaths),
948 _METHOD_INIT(release_multipaths),
949 _METHOD_INIT(get_paths),
950 _METHOD_INIT(release_paths),