2 Copyright (c) 2018 Martin Wilck, SUSE Linux GmbH
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License
6 as published by the Free Software Foundation; either version 2
7 of the License, or (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>.
19 #include <sys/types.h>
20 #include <sys/sysmacros.h>
42 static const char nvme_vendor[] = "NVMe";
43 static const char N_A[] = "n/a";
47 struct nvme_pathgroup {
48 struct gen_pathgroup gen;
49 struct _vector pathvec;
54 struct udev_device *udev;
55 struct udev_device *ctl;
59 * The kernel works in failover mode.
60 * Each path has a separate path group.
62 struct nvme_pathgroup pg;
66 struct gen_multipath gen;
67 struct udev_device *udev;
68 struct udev_device *subsys;
75 #define NAME_LEN 64 /* buffer length for temp attributes */
76 #define const_gen_mp_to_nvme(g) ((const struct nvme_map*)(g))
77 #define gen_mp_to_nvme(g) ((struct nvme_map*)(g))
78 #define nvme_mp_to_gen(n) &((n)->gen)
79 #define const_gen_pg_to_nvme(g) ((const struct nvme_pathgroup*)(g))
80 #define gen_pg_to_nvme(g) ((struct nvme_pathgroup*)(g))
81 #define nvme_pg_to_gen(n) &((n)->gen)
82 #define const_gen_path_to_nvme(g) ((const struct nvme_path*)(g))
83 #define gen_path_to_nvme(g) ((struct nvme_path*)(g))
84 #define nvme_path_to_gen(n) &((n)->gen)
85 #define nvme_pg_to_path(x) (VECTOR_SLOT(&((x)->pathvec), 0))
86 #define nvme_path_to_pg(x) &((x)->pg)
88 static void cleanup_nvme_path(struct nvme_path *path)
90 condlog(5, "%s: %p %p", __func__, path, path->udev);
92 udev_device_unref(path->udev);
93 vector_reset(&path->pg.pathvec);
95 /* ctl is implicitly referenced by udev, no need to unref */
99 static void cleanup_nvme_map(struct nvme_map *map)
101 struct nvme_pathgroup *pg;
102 struct nvme_path *path;
105 vector_foreach_slot_backwards(&map->pgvec, pg, i) {
106 path = nvme_pg_to_path(pg);
107 condlog(5, "%s: %d %p", __func__, i, path);
108 cleanup_nvme_path(path);
109 vector_del_slot(&map->pgvec, i);
111 vector_reset(&map->pgvec);
113 udev_device_unref(map->udev);
114 /* subsys is implicitly referenced by udev, no need to unref */
118 static const struct _vector*
119 nvme_mp_get_pgs(const struct gen_multipath *gmp) {
120 const struct nvme_map *nvme = const_gen_mp_to_nvme(gmp);
122 /* This is all used under the lock, no need to copy */
127 nvme_mp_rel_pgs(__attribute__((unused)) const struct gen_multipath *gmp,
128 __attribute__((unused)) const struct _vector *v)
133 static void rstrip(char *str)
137 for (n = strlen(str) - 1; n >= 0 && str[n] == ' '; n--);
141 static int snprint_nvme_map(const struct gen_multipath *gmp,
142 struct strbuf *buff, char wildcard)
144 const struct nvme_map *nvm = const_gen_mp_to_nvme(gmp);
150 return append_strbuf_str(buff,
151 udev_device_get_sysname(nvm->udev));
153 return print_strbuf(buff, "%s:nsid.%s",
154 udev_device_get_sysattr_value(nvm->subsys,
156 udev_device_get_sysattr_value(nvm->udev,
159 return append_strbuf_str(buff,
160 udev_device_get_sysattr_value(nvm->udev,
163 return print_strbuf(buff, "%u", nvm->nr_live);
165 return append_strbuf_str(buff,
166 udev_device_get_sysattr_value(nvm->udev,
169 return append_strbuf_str(buff, nvme_vendor);
172 snprintf(fld, sizeof(fld), "%s",
173 udev_device_get_sysattr_value(nvm->subsys,
177 return append_strbuf_str(buff, fld);
178 return print_strbuf(buff, "%s,%s,%s", nvme_vendor, fld,
179 udev_device_get_sysattr_value(nvm->subsys,
182 return append_strbuf_str(buff,
183 udev_device_get_sysattr_value(nvm->subsys,
186 val = udev_device_get_sysattr_value(nvm->udev, "ro");
188 return append_strbuf_str(buff, "undef");
189 else if (val[0] == 1)
190 return append_strbuf_str(buff, "ro");
192 return append_strbuf_str(buff, "rw");
194 return append_strbuf_str(buff, THIS);
196 if (nvm->ana_supported == YNU_YES)
197 return append_strbuf_str(buff, "ANA");
202 return append_strbuf_str(buff, N_A);
205 static const struct _vector*
206 nvme_pg_get_paths(const struct gen_pathgroup *gpg) {
207 const struct nvme_pathgroup *gp = const_gen_pg_to_nvme(gpg);
209 /* This is all used under the lock, no need to copy */
214 nvme_pg_rel_paths(__attribute__((unused)) const struct gen_pathgroup *gpg,
215 __attribute__((unused)) const struct _vector *v)
220 static int snprint_hcil(const struct nvme_path *np, struct strbuf *buf)
222 unsigned int nvmeid, ctlid, nsid;
224 const char *sysname = udev_device_get_sysname(np->udev);
226 rc = sscanf(sysname, "nvme%uc%un%u", &nvmeid, &ctlid, &nsid);
228 condlog(1, "%s: failed to scan %s", __func__, sysname);
229 return print_strbuf(buf, "(ERR:%s)", sysname);
231 return print_strbuf(buf, "%u:%u:%u", nvmeid, ctlid, nsid);
234 static int snprint_nvme_path(const struct gen_path *gp,
235 struct strbuf *buff, char wildcard)
237 const struct nvme_path *np = const_gen_path_to_nvme(gp);
240 struct udev_device *pci;
244 return print_strbuf(buff, "%s",
245 udev_device_get_sysattr_value(np->udev,
248 return print_strbuf(buff, "%s",
249 udev_device_get_sysname(np->udev));
251 return snprint_hcil(np, buff);
253 devt = udev_device_get_devnum(np->udev);
254 return print_strbuf(buff, "%u:%u", major(devt), minor(devt));
256 if (sysfs_attr_get_value(np->ctl, "state",
257 fld, sizeof(fld)) > 0)
258 return append_strbuf_str(buff, fld);
261 if (sysfs_attr_get_value(np->udev, "ana_state", fld,
263 return append_strbuf_str(buff, fld);
266 if (sysfs_attr_get_value(np->udev, "ana_state", fld,
269 if (!strcmp(fld, "optimized"))
270 return print_strbuf(buff, "%d", 50);
271 else if (!strcmp(fld, "non-optimized"))
272 return print_strbuf(buff, "%d", 10);
274 return print_strbuf(buff, "%d", 0);
278 snprintf(fld, sizeof(fld), "%s",
279 udev_device_get_sysattr_value(np->ctl,
282 return print_strbuf(buff, "%s,%s,%s", nvme_vendor, fld,
283 udev_device_get_sysattr_value(np->ctl,
286 return append_strbuf_str(buff,
287 udev_device_get_sysattr_value(np->udev,
290 return append_strbuf_str(buff,
291 udev_device_get_sysattr_value(np->ctl,
294 return append_strbuf_str(buff,
295 udev_device_get_sysname(np->map->udev));
298 return print_strbuf(buff, "%s:%s",
299 udev_device_get_sysattr_value(np->ctl,
301 udev_device_get_sysattr_value(np->ctl,
304 return print_strbuf(buff, "[%s]", THIS);
306 pci = udev_device_get_parent_with_subsystem_devtype(np->ctl,
310 return print_strbuf(buff, "PCI:%s",
311 udev_device_get_sysname(pci));
316 return append_strbuf_str(buff, N_A);
319 static int snprint_nvme_pg(const struct gen_pathgroup *gmp,
320 struct strbuf *buff, char wildcard)
322 const struct nvme_pathgroup *pg = const_gen_pg_to_nvme(gmp);
323 const struct nvme_path *path = nvme_pg_to_path(pg);
327 return snprint_nvme_path(nvme_path_to_gen(path),
330 return snprint_nvme_path(nvme_path_to_gen(path),
333 return append_strbuf_str(buff, N_A);
337 static int nvme_style(__attribute__((unused)) const struct gen_multipath* gm,
338 struct strbuf *buf, __attribute__((unused)) int verbosity)
340 return append_strbuf_str(buf, "%%w [%%G]:%%d %%s");
343 static const struct gen_multipath_ops nvme_map_ops = {
344 .get_pathgroups = nvme_mp_get_pgs,
345 .rel_pathgroups = nvme_mp_rel_pgs,
347 .snprint = snprint_nvme_map,
350 static const struct gen_pathgroup_ops nvme_pg_ops __attribute__((unused)) = {
351 .get_paths = nvme_pg_get_paths,
352 .rel_paths = nvme_pg_rel_paths,
353 .snprint = snprint_nvme_pg,
356 static const struct gen_path_ops nvme_path_ops __attribute__((unused)) = {
357 .snprint = snprint_nvme_path,
361 pthread_mutex_t mutex;
366 void lock(struct context *ctx)
368 pthread_mutex_lock(&ctx->mutex);
371 void unlock(void *arg)
373 struct context *ctx = arg;
375 pthread_mutex_unlock(&ctx->mutex);
378 static int _delete_all(struct context *ctx)
381 int n = VECTOR_SIZE(ctx->mpvec), i;
384 return FOREIGN_IGNORED;
386 vector_foreach_slot_backwards(ctx->mpvec, nm, i) {
387 vector_del_slot(ctx->mpvec, i);
388 cleanup_nvme_map(nm);
393 int delete_all(struct context *ctx)
397 condlog(5, "%s called for \"%s\"", __func__, THIS);
400 pthread_cleanup_push(unlock, ctx);
401 rc = _delete_all(ctx);
402 pthread_cleanup_pop(1);
407 void cleanup(struct context *ctx)
409 (void)delete_all(ctx);
413 * Locking is not strictly necessary here, locking in foreign.c
414 * makes sure that no other code is called with this ctx any more.
415 * But this should make static checkers feel better.
417 pthread_cleanup_push(unlock, ctx);
419 udev_unref(ctx->udev);
421 vector_free(ctx->mpvec);
424 pthread_cleanup_pop(1);
425 pthread_mutex_destroy(&ctx->mutex);
430 struct context *init(unsigned int api, const char *name)
434 if (api > LIBMP_FOREIGN_API) {
435 condlog(0, "%s: api version mismatch: %08x > %08x\n",
436 __func__, api, LIBMP_FOREIGN_API);
440 if ((ctx = calloc(1, sizeof(*ctx)))== NULL)
443 pthread_mutex_init(&ctx->mutex, NULL);
445 ctx->udev = udev_new();
446 if (ctx->udev == NULL)
449 ctx->mpvec = vector_alloc();
450 if (ctx->mpvec == NULL)
460 static struct nvme_map *_find_nvme_map_by_devt(const struct context *ctx,
466 if (ctx->mpvec == NULL)
469 vector_foreach_slot(ctx->mpvec, nm, i) {
470 if (nm->devt == devt)
477 static struct nvme_path *
478 _find_path_by_syspath(struct nvme_map *map, const char *syspath)
480 struct nvme_pathgroup *pg;
483 const char *psyspath;
486 ppath = realpath(syspath, real);
488 condlog(1, "%s: %s: error in realpath", __func__, THIS);
492 vector_foreach_slot(&map->pgvec, pg, i) {
493 struct nvme_path *path = nvme_pg_to_path(pg);
495 psyspath = udev_device_get_syspath(path->udev);
496 if (psyspath && !strcmp(ppath, psyspath))
499 condlog(4, "%s: %s: %s not found", __func__, THIS, ppath);
503 static void _udev_device_unref(void *p)
505 udev_device_unref(p);
508 static void _udev_enumerate_unref(void *p)
510 udev_enumerate_unref(p);
513 static int _dirent_controller(const struct dirent *di)
515 static const char nvme_prefix[] = "nvme";
518 #ifdef _DIRENT_HAVE_D_TYPE
519 if (di->d_type != DT_LNK)
522 if (strncmp(di->d_name, nvme_prefix, sizeof(nvme_prefix) - 1))
524 p = di->d_name + sizeof(nvme_prefix) - 1;
525 if (*p == '\0' || !isdigit(*p))
527 for (++p; *p != '\0'; ++p)
533 /* Find the block device for a given nvme controller */
534 struct udev_device *get_ctrl_blkdev(const struct context *ctx,
535 struct udev_device *ctrl)
537 struct udev_list_entry *item;
538 struct udev_device *blkdev = NULL;
539 struct udev_enumerate *enm = udev_enumerate_new(ctx->udev);
545 pthread_cleanup_push(_udev_enumerate_unref, enm);
546 if (udev_enumerate_add_match_parent(enm, ctrl) < 0)
548 if (udev_enumerate_add_match_subsystem(enm, "block"))
551 if (udev_enumerate_scan_devices(enm) < 0) {
552 condlog(1, "%s: %s: error enumerating devices", __func__, THIS);
556 for (item = udev_enumerate_get_list_entry(enm);
558 item = udev_list_entry_get_next(item)) {
559 struct udev_device *tmp;
561 tmp = udev_device_new_from_syspath(ctx->udev,
562 udev_list_entry_get_name(item));
566 devtype = udev_device_get_devtype(tmp);
567 if (devtype && !strcmp(devtype, "disk")) {
571 udev_device_unref(tmp);
575 condlog(1, "%s: %s: failed to get blockdev for %s",
576 __func__, THIS, udev_device_get_sysname(ctrl));
578 condlog(5, "%s: %s: got %s", __func__, THIS,
579 udev_device_get_sysname(blkdev));
581 pthread_cleanup_pop(1);
585 static void test_ana_support(struct nvme_map *map, struct udev_device *ctl)
592 if (map->ana_supported != YNU_UNDEF)
595 dev_t = udev_device_get_sysattr_value(ctl, "dev");
596 if (safe_sprintf(sys_path, "/dev/char/%s", dev_t))
599 fd = open(sys_path, O_RDONLY);
601 condlog(2, "%s: error opening %s", __func__, sys_path);
605 pthread_cleanup_push(close_fd, (void *)fd);
606 rc = nvme_id_ctrl_ana(fd, NULL);
608 condlog(2, "%s: error in nvme_id_ctrl: %s", __func__,
611 map->ana_supported = (rc == 1 ? YNU_YES : YNU_NO);
612 condlog(3, "%s: NVMe ctrl %s: ANA %s supported", __func__, dev_t,
613 rc == 1 ? "is" : "is not");
615 pthread_cleanup_pop(1);
618 static void _find_controllers(struct context *ctx, struct nvme_map *map)
620 char pathbuf[PATH_MAX], realbuf[PATH_MAX];
621 struct dirent **di = NULL;
622 struct scandir_result sr;
623 struct udev_device *subsys;
624 struct nvme_pathgroup *pg;
625 struct nvme_path *path;
628 if (map == NULL || map->udev == NULL)
631 vector_foreach_slot(&map->pgvec, pg, i) {
632 path = nvme_pg_to_path(pg);
636 subsys = udev_device_get_parent_with_subsystem_devtype(map->udev,
639 if (subsys == NULL) {
640 condlog(1, "%s: %s: BUG: no NVME subsys for %s", __func__, THIS,
641 udev_device_get_sysname(map->udev));
645 n = snprintf(pathbuf, sizeof(pathbuf), "%s",
646 udev_device_get_syspath(subsys));
647 r = scandir(pathbuf, &di, _dirent_controller, alphasort);
650 condlog(3, "%s: %s: no controllers for %s", __func__, THIS,
651 udev_device_get_sysname(map->udev));
654 condlog(1, "%s: %s: error %d scanning controllers of %s",
655 __func__, THIS, errno,
656 udev_device_get_sysname(map->udev));
662 pthread_cleanup_push_cast(free_scandir_result, &sr);
663 for (i = 0; i < r; i++) {
664 char *fn = di[i]->d_name;
665 struct udev_device *ctrl, *udev;
667 if (safe_snprintf(pathbuf + n, sizeof(pathbuf) - n, "/%s", fn))
669 if (realpath(pathbuf, realbuf) == NULL) {
670 condlog(3, "%s: %s: realpath: %s", __func__, THIS,
674 condlog(4, "%s: %s: found %s", __func__, THIS, realbuf);
676 ctrl = udev_device_new_from_syspath(ctx->udev, realbuf);
678 condlog(1, "%s: %s: failed to get udev device for %s",
679 __func__, THIS, realbuf);
683 pthread_cleanup_push(_udev_device_unref, ctrl);
684 udev = get_ctrl_blkdev(ctx, ctrl);
686 * We give up the reference to the nvme device here and get
687 * it back from the child below.
688 * This way we don't need to worry about unreffing it.
690 pthread_cleanup_pop(1);
695 path = _find_path_by_syspath(map,
696 udev_device_get_syspath(udev));
699 condlog(4, "%s: %s already known",
704 path = calloc(1, sizeof(*path));
708 path->gen.ops = &nvme_path_ops;
712 path->ctl = udev_device_get_parent_with_subsystem_devtype
713 (udev, "nvme", NULL);
714 if (path->ctl == NULL) {
715 condlog(1, "%s: %s: failed to get controller for %s",
717 cleanup_nvme_path(path);
720 test_ana_support(map, path->ctl);
722 path->pg.gen.ops = &nvme_pg_ops;
723 if (!vector_alloc_slot(&path->pg.pathvec)) {
724 cleanup_nvme_path(path);
727 vector_set_slot(&path->pg.pathvec, path);
728 if (!vector_alloc_slot(&map->pgvec)) {
729 cleanup_nvme_path(path);
732 vector_set_slot(&map->pgvec, &path->pg);
733 condlog(3, "%s: %s: new path %s added to %s",
734 __func__, THIS, udev_device_get_sysname(udev),
735 udev_device_get_sysname(map->udev));
737 pthread_cleanup_pop(1);
740 vector_foreach_slot_backwards(&map->pgvec, pg, i) {
741 path = nvme_pg_to_path(pg);
743 condlog(1, "path %d not found in %s any more",
744 i, udev_device_get_sysname(map->udev));
745 vector_del_slot(&map->pgvec, i);
746 cleanup_nvme_path(path);
748 static const char live_state[] = "live";
751 if ((sysfs_attr_get_value(path->ctl, "state", state,
752 sizeof(state)) > 0) &&
753 !strncmp(state, live_state, sizeof(live_state) - 1))
757 condlog(3, "%s: %s: map %s has %d/%d live paths", __func__, THIS,
758 udev_device_get_sysname(map->udev), map->nr_live,
759 VECTOR_SIZE(&map->pgvec));
762 static int _add_map(struct context *ctx, struct udev_device *ud,
763 struct udev_device *subsys)
765 dev_t devt = udev_device_get_devnum(ud);
766 struct nvme_map *map;
768 if (_find_nvme_map_by_devt(ctx, devt) != NULL)
771 map = calloc(1, sizeof(*map));
776 map->udev = udev_device_ref(ud);
778 * subsys is implicitly referenced by map->udev,
779 * no need to take a reference here.
781 map->subsys = subsys;
782 map->gen.ops = &nvme_map_ops;
784 if (!vector_alloc_slot(ctx->mpvec)) {
785 cleanup_nvme_map(map);
788 vector_set_slot(ctx->mpvec, map);
789 _find_controllers(ctx, map);
791 return FOREIGN_CLAIMED;
794 int add(struct context *ctx, struct udev_device *ud)
796 struct udev_device *subsys;
800 condlog(5, "%s called for \"%s\"", __func__, THIS);
804 if ((devtype = udev_device_get_devtype(ud)) == NULL ||
805 strcmp("disk", devtype))
806 return FOREIGN_IGNORED;
808 subsys = udev_device_get_parent_with_subsystem_devtype(ud,
812 return FOREIGN_IGNORED;
815 pthread_cleanup_push(unlock, ctx);
816 rc = _add_map(ctx, ud, subsys);
817 pthread_cleanup_pop(1);
819 if (rc == FOREIGN_CLAIMED)
820 condlog(3, "%s: %s: added map %s", __func__, THIS,
821 udev_device_get_sysname(ud));
822 else if (rc != FOREIGN_OK)
823 condlog(1, "%s: %s: retcode %d adding %s",
824 __func__, THIS, rc, udev_device_get_sysname(ud));
829 int change(__attribute__((unused)) struct context *ctx,
830 __attribute__((unused)) struct udev_device *ud)
832 condlog(5, "%s called for \"%s\"", __func__, THIS);
833 return FOREIGN_IGNORED;
836 static int _delete_map(struct context *ctx, struct udev_device *ud)
839 struct nvme_map *map;
840 dev_t devt = udev_device_get_devnum(ud);
842 map = _find_nvme_map_by_devt(ctx, devt);
844 return FOREIGN_IGNORED;
846 k = find_slot(ctx->mpvec, map);
850 vector_del_slot(ctx->mpvec, k);
852 cleanup_nvme_map(map);
857 int delete(struct context *ctx, struct udev_device *ud)
861 condlog(5, "%s called for \"%s\"", __func__, THIS);
867 pthread_cleanup_push(unlock, ctx);
868 rc = _delete_map(ctx, ud);
869 pthread_cleanup_pop(1);
871 if (rc == FOREIGN_OK)
872 condlog(3, "%s: %s: map %s deleted", __func__, THIS,
873 udev_device_get_sysname(ud));
874 else if (rc != FOREIGN_IGNORED)
875 condlog(1, "%s: %s: retcode %d deleting map %s", __func__,
876 THIS, rc, udev_device_get_sysname(ud));
881 void _check(struct context *ctx)
883 struct gen_multipath *gm;
886 vector_foreach_slot(ctx->mpvec, gm, i) {
887 struct nvme_map *map = gen_mp_to_nvme(gm);
889 _find_controllers(ctx, map);
893 void check(struct context *ctx)
895 condlog(4, "%s called for \"%s\"", __func__, THIS);
897 pthread_cleanup_push(unlock, ctx);
899 pthread_cleanup_pop(1);
904 * It's safe to pass our internal pointer, this is only used under the lock.
906 const struct _vector *get_multipaths(const struct context *ctx)
908 condlog(5, "%s called for \"%s\"", __func__, THIS);
912 void release_multipaths(__attribute__((unused)) const struct context *ctx,
913 __attribute__((unused)) const struct _vector *mpvec)
915 condlog(5, "%s called for \"%s\"", __func__, THIS);
920 * It's safe to pass our internal pointer, this is only used under the lock.
922 const struct _vector * get_paths(const struct context *ctx)
925 const struct gen_multipath *gm;
928 condlog(5, "%s called for \"%s\"", __func__, THIS);
929 vector_foreach_slot(ctx->mpvec, gm, i) {
930 const struct nvme_map *nm = const_gen_mp_to_nvme(gm);
931 paths = vector_convert(paths, &nm->pgvec,
932 struct nvme_pathgroup, nvme_pg_to_path);
937 void release_paths(__attribute__((unused)) const struct context *ctx,
938 const struct _vector *mpvec)
940 condlog(5, "%s called for \"%s\"", __func__, THIS);
941 vector_free_const(mpvec);
944 /* compile-time check whether all methods are present and correctly typed */
945 #define _METHOD_INIT(x) .x = x
946 static struct foreign __methods __attribute__((unused)) = {
948 _METHOD_INIT(cleanup),
949 _METHOD_INIT(change),
950 _METHOD_INIT(delete),
951 _METHOD_INIT(delete_all),
954 _METHOD_INIT(unlock),
955 _METHOD_INIT(get_multipaths),
956 _METHOD_INIT(release_multipaths),
957 _METHOD_INIT(get_paths),
958 _METHOD_INIT(release_paths),