Imported Upstream version 0.8.0 upstream/0.8.0
authorDongHun Kwak <dh0128.kwak@samsung.com>
Fri, 14 Jan 2022 04:50:19 +0000 (13:50 +0900)
committerDongHun Kwak <dh0128.kwak@samsung.com>
Fri, 14 Jan 2022 04:50:19 +0000 (13:50 +0900)
60 files changed:
.gitignore
Makefile.inc
kpartx/dasd.c
libmpathcmd/mpath_cmd.c
libmpathcmd/mpath_cmd.h
libmpathpersist/mpath_persist.c
libmpathpersist/mpath_pr_ioctl.c
libmultipath/Makefile
libmultipath/blacklist.c
libmultipath/blacklist.h
libmultipath/callout.c
libmultipath/checkers.c
libmultipath/checkers.h
libmultipath/checkers/tur.c
libmultipath/config.c
libmultipath/config.h
libmultipath/configure.c
libmultipath/configure.h
libmultipath/devmapper.c
libmultipath/dict.c
libmultipath/discovery.c
libmultipath/dmparser.c
libmultipath/foreign/Makefile
libmultipath/foreign/nvme.c
libmultipath/log_pthread.c
libmultipath/nvme-lib.c [new file with mode: 0644]
libmultipath/nvme-lib.h [new file with mode: 0644]
libmultipath/nvme/argconfig.h [new file with mode: 0644]
libmultipath/nvme/json.h [new file with mode: 0644]
libmultipath/nvme/linux/nvme.h [new file with mode: 0644]
libmultipath/nvme/linux/nvme_ioctl.h [new file with mode: 0644]
libmultipath/nvme/nvme-ioctl.c [new file with mode: 0644]
libmultipath/nvme/nvme-ioctl.h [new file with mode: 0644]
libmultipath/nvme/nvme.h [new file with mode: 0644]
libmultipath/nvme/plugin.h [new file with mode: 0644]
libmultipath/prio.c
libmultipath/prio.h
libmultipath/prioritizers/Makefile
libmultipath/prioritizers/ana.c [new file with mode: 0644]
libmultipath/propsel.c
libmultipath/propsel.h
libmultipath/structs.h
libmultipath/structs_vec.c
libmultipath/structs_vec.h
libmultipath/sysfs.c
libmultipath/uevent.c
libmultipath/util.c
libmultipath/util.h
libmultipath/version.h
multipath/main.c
multipath/multipath.conf.5
multipathd/cli.c
multipathd/cli.h
multipathd/cli_handlers.c
multipathd/dmevents.c
multipathd/main.c
tests/Makefile
tests/blacklist.c
tests/hwtable.c
tests/util.c

index 35c59a7e2d513a6293f71a67f7b25ed36be5996b..9926756b8b936838bfe399989be69fae0e159567 100644 (file)
@@ -21,4 +21,5 @@ libdmmp/test/libdmmp_test
 libdmmp/test/libdmmp_speed_test
 tests/*-test
 tests/*.out
-
+libmultipath/nvme-ioctl.c
+libmultipath/nvme-ioctl.h
index a83f02c72ed6d5dc2e5b81da6fdfa45656b0289d..fc728ca90095bf8a19c6dde0cf7eea13108d9efd 100644 (file)
@@ -9,9 +9,6 @@
 # WITH_LOCAL_LIBDM     = 1
 # WITH_LOCAL_LIBSYSFS  = 1
 #
-# Uncomment to disable RADOS support (e.g. if rados headers are missing).
-# ENABLE_RADOS = 0
-#
 # Uncomment to disable libdmmp support
 # ENABLE_LIBDMMP = 0
 #
@@ -66,6 +63,7 @@ mpathpersistdir       = $(TOPDIR)/libmpathpersist
 mpathcmddir    = $(TOPDIR)/libmpathcmd
 thirdpartydir  = $(TOPDIR)/third-party
 libdmmpdir     = $(TOPDIR)/libdmmp
+nvmedir                = $(TOPDIR)/libmultipath/nvme
 includedir     = $(prefix)/usr/include
 pkgconfdir     = $(usrlibdir)/pkgconfig
 
index 94ae81b7a2da06a76819cb4cb9251363ad609f18..61b609a507be108957abbccef3fe4328396eecbe 100644 (file)
@@ -137,7 +137,7 @@ read_dasd_pt(int fd, struct slice all, struct slice *sp, int ns)
                        /* Not a DASD */
                        return -1;
        } else {
-               fd_dasd = fd;
+               fd_dasd = dup(fd);
        }
 
        if (ioctl(fd_dasd, BIODASDINFO, (unsigned long)&info) != 0) {
@@ -190,7 +190,7 @@ read_dasd_pt(int fd, struct slice all, struct slice *sp, int ns)
                memcpy (&vlabel, data, sizeof(vlabel));
        else {
                bzero(&vlabel,4);
-               memcpy (&vlabel.vollbl, data, sizeof(vlabel) - 4);
+               memcpy ((char *)&vlabel + 4, data, sizeof(vlabel) - 4);
        }
        vtoc_ebcdic_dec(vlabel.vollbl, type, 4);
 
@@ -288,7 +288,6 @@ read_dasd_pt(int fd, struct slice all, struct slice *sp, int ns)
 out:
        if (data != NULL)
                free(data);
-       if (fd_dasd != -1 && fd_dasd != fd)
-               close(fd_dasd);
+       close(fd_dasd);
        return retval;
 }
index 61e6a98cf3c8555f359f1fc7d45e32b0cdffc91a..df4ca541f07f2cad0ee467e13ab35b0c3661f698 100644 (file)
@@ -133,6 +133,10 @@ ssize_t mpath_recv_reply_len(int fd, unsigned int timeout)
                errno = EIO;
                return -1;
        }
+       if (len <= 0 || len >= MAX_REPLY_LEN) {
+               errno = ERANGE;
+               return -1;
+       }
        return len;
 }
 
index df9d9388c598c8dea6d35f39676eb9e9c1f4da9c..15aeb0673527a4f8d882fa1622634a1d9f7b89d1 100644 (file)
 #ifndef LIB_MPATH_CMD_H
 #define LIB_MPATH_CMD_H
 
+/*
+ * This should be sufficient for json output for >10000 maps,
+ * and >60000 paths.
+ */
+#define MAX_REPLY_LEN (32 * 1024 * 1024)
+
 #ifdef __cplusplus
 extern "C" {
 #endif
index 2ffe56ea71fa4fac06eeda9c0ef22895bd62b78c..6505774f98dad44b54bdd59430d8d75cc02ef313 100644 (file)
@@ -188,7 +188,7 @@ int mpath_persistent_reserve_in (int fd, int rq_servact,
 
        condlog(3, "alias = %s", alias);
        map_present = dm_map_present(alias);
-       if (map_present && !dm_is_mpath(alias)){
+       if (map_present && dm_is_mpath(alias) != 1){
                condlog( 0, "%s: not a multipath device.", alias);
                ret = MPATH_PR_DMMP_ERROR;
                goto out;
@@ -283,7 +283,7 @@ int mpath_persistent_reserve_out ( int fd, int rq_servact, int rq_scope,
        condlog(3, "alias = %s", alias);
        map_present = dm_map_present(alias);
 
-       if (map_present && !dm_is_mpath(alias)){
+       if (map_present && dm_is_mpath(alias) != 1){
                condlog(3, "%s: not a multipath device.", alias);
                ret = MPATH_PR_DMMP_ERROR;
                goto out;
@@ -889,7 +889,8 @@ int update_map_pr(struct multipath *mpp)
        if (!get_be64(mpp->reservation_key))
        {
                /* Nothing to do. Assuming pr mgmt feature is disabled*/
-               condlog(3, "%s: reservation_key not set in multipath.conf", mpp->alias);
+               condlog(4, "%s: reservation_key not set in multipath.conf",
+                       mpp->alias);
                return MPATH_PR_SUCCESS;
        }
 
index a222b1e13739aab6c6a5b5026d975a09111a17c3..cf528feb23d2832374289b0f7204fb292ed94915 100644 (file)
@@ -211,7 +211,8 @@ void mpath_format_readfullstatus(struct prin_resp *pr_buff, int len, int noisy)
        unsigned char *p;
        char  *ppbuff;
        uint32_t additional_length;
-
+       char tempbuff[MPATH_MAX_PARAM_LEN];
+       struct prin_fulldescr fdesc;
 
        convert_be32_to_cpu(&pr_buff->prin_descriptor.prin_readfd.prgeneration);
        convert_be32_to_cpu(&pr_buff->prin_descriptor.prin_readfd.number_of_descriptor);
@@ -223,9 +224,12 @@ void mpath_format_readfullstatus(struct prin_resp *pr_buff, int len, int noisy)
        }
 
        additional_length = pr_buff->prin_descriptor.prin_readfd.number_of_descriptor;
+       if (additional_length > MPATH_MAX_PARAM_LEN) {
+               condlog(3, "PRIN length %u exceeds max length %d", additional_length,
+                       MPATH_MAX_PARAM_LEN);
+               return;
+       }
 
-       char tempbuff[MPATH_MAX_PARAM_LEN];
-       struct prin_fulldescr fdesc;
        memset(&fdesc, 0, sizeof(struct prin_fulldescr));
 
        memcpy( tempbuff, pr_buff->prin_descriptor.prin_readfd.private_buffer,MPATH_MAX_PARAM_LEN );
@@ -241,7 +245,7 @@ void mpath_format_readfullstatus(struct prin_resp *pr_buff, int len, int noisy)
                fdesc.rtpi = get_unaligned_be16(&p[18]);
 
                tid_len_len = get_unaligned_be32(&p[20]);
-               if (tid_len_len + 24 + k >= additional_length) {
+               if (tid_len_len + 24 + k > additional_length) {
                        condlog(0,
                                "%s: corrupt PRIN response: status descriptor end %d exceeds length %d",
                                __func__, tid_len_len + k + 24,
index 33f52691e37145adc58f8a38415e7a9f403726aa..a2be42ea91ae27a4f24c2f6b37c8dc241a5dbc52 100644 (file)
@@ -7,7 +7,7 @@ SONAME = 0
 DEVLIB = libmultipath.so
 LIBS = $(DEVLIB).$(SONAME)
 
-CFLAGS += $(LIB_CFLAGS) -I$(mpathcmddir) -I$(mpathpersistdir)
+CFLAGS += $(LIB_CFLAGS) -I$(mpathcmddir) -I$(mpathpersistdir) -I$(nvmedir)
 
 LIBDEPS += -lpthread -ldl -ldevmapper -ludev -L$(mpathcmddir) -lmpathcmd -lurcu -laio
 
@@ -43,10 +43,21 @@ OBJS = memory.o parser.o vector.o devmapper.o callout.o \
        switchgroup.o uxsock.o print.o alias.o log_pthread.o \
        log.o configure.o structs_vec.o sysfs.o prio.o checkers.o \
        lock.o file.o wwids.o prioritizers/alua_rtpg.o prkey.o \
-       io_err_stat.o dm-generic.o generic.o foreign.o
+       io_err_stat.o dm-generic.o generic.o foreign.o nvme-lib.o
 
 all: $(LIBS)
 
+nvme-lib.o: nvme-lib.c nvme-ioctl.c nvme-ioctl.h
+       $(CC) $(CFLAGS) -Wno-unused-function -c -o $@ $<
+
+make_static = $(shell sed '/^static/!s/^\([a-z]\{1,\} \)/static \1/' <$1 >$2)
+
+nvme-ioctl.c: nvme/nvme-ioctl.c
+       $(call make_static,$<,$@)
+
+nvme-ioctl.h: nvme/nvme-ioctl.h
+       $(call make_static,$<,$@)
+
 $(LIBS): $(OBJS)
        $(CC) $(LDFLAGS) $(SHARED_FLAGS) -Wl,-soname=$@ -o $@ $(OBJS) $(LIBDEPS)
        $(LN) $@ $(DEVLIB)
@@ -62,7 +73,7 @@ uninstall:
        $(RM) $(DESTDIR)$(syslibdir)/$(DEVLIB)
 
 clean: dep_clean
-       $(RM) core *.a *.o *.so *.so.* *.gz
+       $(RM) core *.a *.o *.so *.so.* *.gz nvme-ioctl.c nvme-ioctl.h
 
 include $(wildcard $(OBJS:.o=.d))
 
index 318ec03f97929cda1b95a1c66b36504da1e8114e..e0d02794c72dac10a7da39302def8d422102f61a 100644 (file)
@@ -192,7 +192,7 @@ setup_default_blist (struct config * conf)
        char * str;
        int i;
 
-       str = STRDUP("^(ram|raw|loop|fd|md|dm-|sr|scd|st|dcssblk)[0-9]");
+       str = STRDUP("^(ram|zram|raw|loop|fd|md|dm-|sr|scd|st|dcssblk)[0-9]");
        if (!str)
                return 1;
        if (store_ble(conf->blist_devnode, str, ORIGIN_DEFAULT))
@@ -232,24 +232,24 @@ setup_default_blist (struct config * conf)
        return 0;
 }
 
-#define LOG_BLIST(M,S)                                                 \
+#define LOG_BLIST(M, S, lvl)                                           \
        if (vendor && product)                                          \
-               condlog(3, "%s: (%s:%s) %s %s",                         \
+               condlog(lvl, "%s: (%s:%s) %s %s",                       \
                        dev, vendor, product, (M), (S));                \
        else if (wwid && !dev)                                          \
-               condlog(3, "%s: %s %s", wwid, (M), (S));                \
+               condlog(lvl, "%s: %s %s", wwid, (M), (S));              \
        else if (wwid)                                                  \
-               condlog(3, "%s: %s %s %s", dev, (M), wwid, (S));        \
+               condlog(lvl, "%s: %s %s %s", dev, (M), wwid, (S));      \
        else if (env)                                                   \
-               condlog(3, "%s: %s %s %s", dev, (M), env, (S));         \
+               condlog(lvl, "%s: %s %s %s", dev, (M), env, (S));       \
        else if (protocol)                                              \
-               condlog(3, "%s: %s %s %s", dev, (M), protocol, (S));    \
+               condlog(lvl, "%s: %s %s %s", dev, (M), protocol, (S));  \
        else                                                            \
-               condlog(3, "%s: %s %s", dev, (M), (S))
+               condlog(lvl, "%s: %s %s", dev, (M), (S))
 
-void
+static void
 log_filter (const char *dev, char *vendor, char *product, char *wwid,
-           const char *env, const char *protocol, int r)
+           const char *env, const char *protocol, int r, int lvl)
 {
        /*
         * Try to sort from most likely to least.
@@ -258,37 +258,37 @@ log_filter (const char *dev, char *vendor, char *product, char *wwid,
        case MATCH_NOTHING:
                break;
        case MATCH_DEVICE_BLIST:
-               LOG_BLIST("vendor/product", "blacklisted");
+               LOG_BLIST("vendor/product", "blacklisted", lvl);
                break;
        case MATCH_WWID_BLIST:
-               LOG_BLIST("wwid", "blacklisted");
+               LOG_BLIST("wwid", "blacklisted", lvl);
                break;
        case MATCH_DEVNODE_BLIST:
-               LOG_BLIST("device node name", "blacklisted");
+               LOG_BLIST("device node name", "blacklisted", lvl);
                break;
        case MATCH_PROPERTY_BLIST:
-               LOG_BLIST("udev property", "blacklisted");
+               LOG_BLIST("udev property", "blacklisted", lvl);
                break;
        case MATCH_PROTOCOL_BLIST:
-               LOG_BLIST("protocol", "blacklisted");
+               LOG_BLIST("protocol", "blacklisted", lvl);
                break;
        case MATCH_DEVICE_BLIST_EXCEPT:
-               LOG_BLIST("vendor/product", "whitelisted");
+               LOG_BLIST("vendor/product", "whitelisted", lvl);
                break;
        case MATCH_WWID_BLIST_EXCEPT:
-               LOG_BLIST("wwid", "whitelisted");
+               LOG_BLIST("wwid", "whitelisted", lvl);
                break;
        case MATCH_DEVNODE_BLIST_EXCEPT:
-               LOG_BLIST("device node name", "whitelisted");
+               LOG_BLIST("device node name", "whitelisted", lvl);
                break;
        case MATCH_PROPERTY_BLIST_EXCEPT:
-               LOG_BLIST("udev property", "whitelisted");
+               LOG_BLIST("udev property", "whitelisted", lvl);
                break;
        case MATCH_PROPERTY_BLIST_MISSING:
-               LOG_BLIST("blacklisted,", "udev property missing");
+               LOG_BLIST("blacklisted,", "udev property missing", lvl);
                break;
        case MATCH_PROTOCOL_BLIST_EXCEPT:
-               LOG_BLIST("protocol", "whitelisted");
+               LOG_BLIST("protocol", "whitelisted", lvl);
                break;
        }
 }
@@ -306,7 +306,7 @@ filter_device (vector blist, vector elist, char * vendor, char * product,
                        r = MATCH_DEVICE_BLIST;
        }
 
-       log_filter(dev, vendor, product, NULL, NULL, NULL, r);
+       log_filter(dev, vendor, product, NULL, NULL, NULL, r, 3);
        return r;
 }
 
@@ -322,7 +322,7 @@ filter_devnode (vector blist, vector elist, char * dev)
                        r = MATCH_DEVNODE_BLIST;
        }
 
-       log_filter(dev, NULL, NULL, NULL, NULL, NULL, r);
+       log_filter(dev, NULL, NULL, NULL, NULL, NULL, r, 3);
        return r;
 }
 
@@ -338,7 +338,7 @@ filter_wwid (vector blist, vector elist, char * wwid, char * dev)
                        r = MATCH_WWID_BLIST;
        }
 
-       log_filter(dev, NULL, NULL, wwid, NULL, NULL, r);
+       log_filter(dev, NULL, NULL, wwid, NULL, NULL, r, 3);
        return r;
 }
 
@@ -357,7 +357,7 @@ filter_protocol(vector blist, vector elist, struct path * pp)
                        r = MATCH_PROTOCOL_BLIST;
        }
 
-       log_filter(pp->dev, NULL, NULL, NULL, NULL, buf, r);
+       log_filter(pp->dev, NULL, NULL, NULL, NULL, buf, r, 3);
        return r;
 }
 
@@ -366,7 +366,7 @@ filter_path (struct config * conf, struct path * pp)
 {
        int r;
 
-       r = filter_property(conf, pp->udev);
+       r = filter_property(conf, pp->udev, 3);
        if (r > 0)
                return r;
        r = filter_devnode(conf->blist_devnode, conf->elist_devnode, pp->dev);
@@ -384,7 +384,7 @@ filter_path (struct config * conf, struct path * pp)
 }
 
 int
-filter_property(struct config * conf, struct udev_device * udev)
+filter_property(struct config *conf, struct udev_device *udev, int lvl)
 {
        const char *devname = udev_device_get_sysname(udev);
        struct udev_list_entry *list_entry;
@@ -415,7 +415,7 @@ filter_property(struct config * conf, struct udev_device * udev)
                }
        }
 
-       log_filter(devname, NULL, NULL, NULL, env, NULL, r);
+       log_filter(devname, NULL, NULL, NULL, env, NULL, r, lvl);
        return r;
 }
 
index 18903b6bd1ac22c8217efb4a14cd4a8b9e24a644..4c8ec99e084e1a32cde07e378a5b3a8f6be6c150 100644 (file)
@@ -37,7 +37,7 @@ int filter_devnode (vector, vector, char *);
 int filter_wwid (vector, vector, char *, char *);
 int filter_device (vector, vector, char *, char *, char *);
 int filter_path (struct config *, struct path *);
-int filter_property(struct config *, struct udev_device *);
+int filter_property(struct config *, struct udev_device *, int);
 int filter_protocol(vector, vector, struct path *);
 int store_ble (vector, char *, int);
 int set_ble_device (vector, char *, char *, int);
index d5ca27b1dd765578dbc582d8cd167abbf07b76bd..dac088c4653ef1a9161654f0482c9c259b0e618d 100644 (file)
@@ -68,19 +68,20 @@ int execute_program(char *path, char *value, int len)
        switch(pid) {
        case 0:
                /* child */
-               close(STDOUT_FILENO);
 
                /* dup write side of pipe to STDOUT */
-               if (dup(fds[1]) < 0)
+               if (dup2(fds[1], STDOUT_FILENO) < 0) {
+                       condlog(1, "failed to dup2 stdout: %m");
                        return -1;
+               }
+               close(fds[0]);
+               close(fds[1]);
 
                /* Ignore writes to stderr */
                null_fd = open("/dev/null", O_WRONLY);
                if (null_fd > 0) {
-                       int err_fd __attribute__ ((unused));
-
-                       close(STDERR_FILENO);
-                       err_fd = dup(null_fd);
+                       if (dup2(null_fd, STDERR_FILENO) < 0)
+                               condlog(1, "failed to dup2 stderr: %m");
                        close(null_fd);
                }
 
index 848c4c34512eac21ef0babdb50fc010f734aebd5..f4fdcae91a409225b9a8a44af255712df6cb3ce9 100644 (file)
@@ -261,13 +261,6 @@ int checker_check (struct checker * c, int path_state)
        return r;
 }
 
-int checker_selected(const struct checker *c)
-{
-       if (!c)
-               return 0;
-       return c->cls != NULL;
-}
-
 const char *checker_name(const struct checker *c)
 {
        if (!c || !c->cls)
@@ -295,7 +288,7 @@ const char *checker_message(const struct checker *c)
 {
        int id;
 
-       if (!c || c->msgid < 0 ||
+       if (!c || !c->cls || c->msgid < 0 ||
            (c->msgid >= CHECKER_GENERIC_MSGTABLE_SIZE &&
             c->msgid < CHECKER_FIRST_MSGID))
                goto bad_id;
index b2e8f9aaa7f43aab73875c99d325c85e333420c1..dab197f9c53440184b0ab78bc6f7273911ab0702 100644 (file)
@@ -129,6 +129,11 @@ struct checker {
                                                you want to stuff data in. */
 };
 
+static inline int checker_selected(const struct checker *c)
+{
+       return c != NULL && c->cls != NULL;
+}
+
 const char *checker_state_name(int);
 int init_checkers(const char *);
 void cleanup_checkers (void);
@@ -142,7 +147,6 @@ void checker_set_fd (struct checker *, int);
 void checker_enable (struct checker *);
 void checker_disable (struct checker *);
 int checker_check (struct checker *, int);
-int checker_selected(const struct checker *);
 int checker_is_sync(const struct checker *);
 const char *checker_name (const struct checker *);
 /*
index 63b19624c03fc65aff6d79dbdb699d1c7010e945..6b08dbbbb025698bb1097cc1a43542e484d630e7 100644 (file)
@@ -261,7 +261,7 @@ static void *tur_thread(void *ctx)
        tur_thread_cleanup_push(ct);
        rcu_register_thread();
 
-       condlog(3, "%d:%d : tur checker starting up", major(ct->devt),
+       condlog(4, "%d:%d : tur checker starting up", major(ct->devt),
                minor(ct->devt));
 
        tur_deep_sleep(ct);
@@ -275,7 +275,7 @@ static void *tur_thread(void *ctx)
        pthread_cond_signal(&ct->active);
        pthread_mutex_unlock(&ct->lock);
 
-       condlog(3, "%d:%d : tur checker finished, state %s", major(ct->devt),
+       condlog(4, "%d:%d : tur checker finished, state %s", major(ct->devt),
                minor(ct->devt), checker_state_name(state));
 
        running = uatomic_xchg(&ct->running, 0);
@@ -415,7 +415,7 @@ int libcheck_check(struct checker * c)
                }
                pthread_mutex_unlock(&ct->lock);
                if (tur_status == PATH_PENDING) {
-                       condlog(3, "%d:%d : tur checker still running",
+                       condlog(4, "%d:%d : tur checker still running",
                                major(ct->devt), minor(ct->devt));
                } else {
                        int running = uatomic_xchg(&ct->running, 0);
index 5af7af58ea5fc4ab5c6173495503e171d5885c23..24d71aed10eba1cace4947d6ae08021eed9cea1a 100644 (file)
@@ -369,6 +369,9 @@ merge_hwe (struct hwentry * dst, struct hwentry * src)
        merge_num(max_sectors_kb);
        merge_num(ghost_delay);
        merge_num(all_tg_pt);
+       merge_num(san_path_err_threshold);
+       merge_num(san_path_err_forget_rate);
+       merge_num(san_path_err_recovery_time);
 
        snprintf(id, sizeof(id), "%s/%s", dst->vendor, dst->product);
        reconcile_features_with_options(id, &dst->features,
index 7d0cd9a6f197030b9712b9cd98418329ce018344..b938c26c5b4aaeeb9ac6ae0a2bb8b2a76ad79177 100644 (file)
@@ -76,6 +76,9 @@ struct hwentry {
        int deferred_remove;
        int delay_watch_checks;
        int delay_wait_checks;
+       int san_path_err_threshold;
+       int san_path_err_forget_rate;
+       int san_path_err_recovery_time;
        int marginal_path_err_sample_time;
        int marginal_path_err_rate_threshold;
        int marginal_path_err_recheck_gap_time;
@@ -112,6 +115,9 @@ struct mpentry {
        int deferred_remove;
        int delay_watch_checks;
        int delay_wait_checks;
+       int san_path_err_threshold;
+       int san_path_err_forget_rate;
+       int san_path_err_recovery_time;
        int marginal_path_err_sample_time;
        int marginal_path_err_rate_threshold;
        int marginal_path_err_recheck_gap_time;
@@ -162,6 +168,9 @@ struct config {
        int processed_main_config;
        int delay_watch_checks;
        int delay_wait_checks;
+       int san_path_err_threshold;
+       int san_path_err_forget_rate;
+       int san_path_err_recovery_time;
        int marginal_path_err_sample_time;
        int marginal_path_err_rate_threshold;
        int marginal_path_err_recheck_gap_time;
index ed3e30f5e2ee1756a678c07cf29a525cd557f785..af4d78de3153e3f82b7a4900983e8afc82da89a3 100644 (file)
 #include "sysfs.h"
 #include "io_err_stat.h"
 
+/* Time in ms to wait for pending checkers in setup_map() */
+#define WAIT_CHECKERS_PENDING_MS 10
+#define WAIT_ALL_CHECKERS_PENDING_MS 90
+
 /* group paths in pg by host adapter
  */
 int group_by_host_adapter(struct pathgroup *pgp, vector adapters)
@@ -257,12 +261,43 @@ int rr_optimize_path_order(struct pathgroup *pgp)
        return 0;
 }
 
+static int wait_for_pending_paths(struct multipath *mpp,
+                                 struct config *conf,
+                                 int n_pending, int goal, int wait_ms)
+{
+       static const struct timespec millisec =
+               { .tv_sec = 0, .tv_nsec = 1000*1000 };
+       int i, j;
+       struct path *pp;
+       struct pathgroup *pgp;
+       struct timespec ts;
+
+       do {
+               vector_foreach_slot(mpp->pg, pgp, i) {
+                       vector_foreach_slot(pgp->paths, pp, j) {
+                               if (pp->state != PATH_PENDING)
+                                       continue;
+                               pp->state = get_state(pp, conf,
+                                                     0, PATH_PENDING);
+                               if (pp->state != PATH_PENDING &&
+                                   --n_pending <= goal)
+                                       return 0;
+                       }
+               }
+               ts = millisec;
+               while (nanosleep(&ts, &ts) != 0 && errno == EINTR)
+                       /* nothing */;
+       } while (--wait_ms > 0);
+
+       return n_pending;
+}
+
 int setup_map(struct multipath *mpp, char *params, int params_size,
              struct vectors *vecs)
 {
        struct pathgroup * pgp;
        struct config *conf;
-       int i;
+       int i, n_paths;
 
        /*
         * don't bother if devmap size is unknown
@@ -313,6 +348,9 @@ int setup_map(struct multipath *mpp, char *params, int params_size,
        select_marginal_path_err_rate_threshold(conf, mpp);
        select_marginal_path_err_recheck_gap_time(conf, mpp);
        select_marginal_path_double_failed_time(conf, mpp);
+       select_san_path_err_threshold(conf, mpp);
+       select_san_path_err_forget_rate(conf, mpp);
+       select_san_path_err_recovery_time(conf, mpp);
        select_skip_kpartx(conf, mpp);
        select_max_sectors_kb(conf, mpp);
        select_ghost_delay(conf, mpp);
@@ -321,12 +359,24 @@ int setup_map(struct multipath *mpp, char *params, int params_size,
        sysfs_set_scsi_tmo(mpp, conf->checkint);
        pthread_cleanup_pop(1);
 
-       if (mpp->marginal_path_double_failed_time > 0 &&
-           mpp->marginal_path_err_sample_time > 0 &&
-           mpp->marginal_path_err_recheck_gap_time > 0 &&
-           mpp->marginal_path_err_rate_threshold >= 0)
+       if (marginal_path_check_enabled(mpp)) {
+               if (delay_check_enabled(mpp)) {
+                       condlog(1, "%s: WARNING: both marginal_path and delay_checks error detection selected",
+                               mpp->alias);
+                       condlog(0, "%s: unexpected behavior may occur!",
+                               mpp->alias);
+               }
                start_io_err_stat_thread(vecs);
-       /*
+       }
+       if (san_path_check_enabled(mpp) && delay_check_enabled(mpp)) {
+               condlog(1, "%s: WARNING: both san_path_err and delay_checks error detection selected",
+                       mpp->alias);
+               condlog(0, "%s: unexpected behavior may occur!",
+                       mpp->alias);
+       }
+
+       n_paths = VECTOR_SIZE(mpp->paths);
+        /*
         * assign paths to path groups -- start with no groups and all paths
         * in mpp->paths
         */
@@ -340,6 +390,30 @@ int setup_map(struct multipath *mpp, char *params, int params_size,
        if (mpp->pgpolicyfn && mpp->pgpolicyfn(mpp))
                return 1;
 
+       /*
+        * If async state detection is used, see if pending state checks
+        * have finished, to get nr_active right. We can't wait until the
+        * checkers time out, as that may take 30s or more, and we are
+        * holding the vecs lock.
+        */
+       if (conf->force_sync == 0 && n_paths > 0) {
+               int n_pending = pathcount(mpp, PATH_PENDING);
+
+               if (n_pending > 0)
+                       n_pending = wait_for_pending_paths(
+                               mpp, conf, n_pending, 0,
+                               WAIT_CHECKERS_PENDING_MS);
+               /* ALL paths pending - wait some more, but be satisfied
+                  with only some paths finished */
+               if (n_pending == n_paths)
+                       n_pending = wait_for_pending_paths(
+                               mpp, conf, n_pending,
+                               n_paths >= 4 ? 2 : 1,
+                               WAIT_ALL_CHECKERS_PENDING_MS);
+               if (n_pending > 0)
+                       condlog(2, "%s: setting up map with %d/%d path checkers pending",
+                               mpp->alias, n_pending, n_paths);
+       }
        mpp->nr_active = pathcount(mpp, PATH_UP) + pathcount(mpp, PATH_GHOST);
 
        /*
@@ -788,15 +862,6 @@ fail:
        return 1;
 }
 
-/*
- * Return value:
- */
-#define DOMAP_RETRY    -1
-#define DOMAP_FAIL     0
-#define DOMAP_OK       1
-#define DOMAP_EXIST    2
-#define DOMAP_DRY      3
-
 int domap(struct multipath *mpp, char *params, int is_daemon)
 {
        int r = DOMAP_FAIL;
@@ -976,7 +1041,7 @@ int check_daemon(void)
        if (recv_packet(fd, &reply, timeout) != 0)
                goto out;
 
-       if (strstr(reply, "shutdown"))
+       if (reply && strstr(reply, "shutdown"))
                goto out_free;
 
        ret = 1;
@@ -998,8 +1063,8 @@ out:
 int coalesce_paths (struct vectors * vecs, vector newmp, char * refwwid,
                    int force_reload, enum mpath_cmds cmd)
 {
-       int r = 1;
-       int k, i;
+       int ret = CP_FAIL;
+       int k, i, r;
        int is_daemon = (cmd == CMD_NONE) ? 1 : 0;
        char params[PARAMS_SIZE];
        struct multipath * mpp;
@@ -1009,6 +1074,7 @@ int coalesce_paths (struct vectors * vecs, vector newmp, char * refwwid,
        vector pathvec = vecs->pathvec;
        struct config *conf;
        int allow_queueing;
+       uint64_t *size_mismatch_seen;
 
        /* ignore refwwid if it's empty */
        if (refwwid && !strlen(refwwid))
@@ -1019,6 +1085,14 @@ int coalesce_paths (struct vectors * vecs, vector newmp, char * refwwid,
                        pp1->mpp = NULL;
                }
        }
+
+       if (VECTOR_SIZE(pathvec) == 0)
+               return CP_OK;
+       size_mismatch_seen = calloc((VECTOR_SIZE(pathvec) - 1) / 64 + 1,
+                                   sizeof(uint64_t));
+       if (size_mismatch_seen == NULL)
+               return CP_FAIL;
+
        vector_foreach_slot (pathvec, pp1, k) {
                int invalid;
                /* skip this path for some reason */
@@ -1038,8 +1112,8 @@ int coalesce_paths (struct vectors * vecs, vector newmp, char * refwwid,
                        continue;
                }
 
-               /* 2. if path already coalesced */
-               if (pp1->mpp)
+               /* 2. if path already coalesced, or seen and discarded */
+               if (pp1->mpp || is_bit_set_in_array(k, size_mismatch_seen))
                        continue;
 
                /* 3. if path has disappeared */
@@ -1088,9 +1162,10 @@ int coalesce_paths (struct vectors * vecs, vector newmp, char * refwwid,
                                 * ouch, avoid feeding that to the DM
                                 */
                                condlog(0, "%s: size %llu, expected %llu. "
-                                       "Discard", pp2->dev_t, pp2->size,
+                                       "Discard", pp2->dev, pp2->size,
                                        mpp->size);
                                mpp->action = ACT_REJECT;
+                               set_bit_in_array(i, size_mismatch_seen);
                        }
                }
                verify_paths(mpp, vecs);
@@ -1119,8 +1194,10 @@ int coalesce_paths (struct vectors * vecs, vector newmp, char * refwwid,
                                        "ignoring" : "removing");
                                remove_map(mpp, vecs, 0);
                                continue;
-                       } else /* if (r == DOMAP_RETRY) */
-                               return r;
+                       } else /* if (r == DOMAP_RETRY && !is_daemon) */ {
+                               ret = CP_RETRY;
+                               goto out;
+                       }
                }
                if (r == DOMAP_DRY)
                        continue;
@@ -1162,7 +1239,7 @@ int coalesce_paths (struct vectors * vecs, vector newmp, char * refwwid,
                if (newmp) {
                        if (mpp->action != ACT_REJECT) {
                                if (!vector_alloc_slot(newmp))
-                                       return 1;
+                                       goto out;
                                vector_set_slot(newmp, mpp);
                        }
                        else
@@ -1193,7 +1270,10 @@ int coalesce_paths (struct vectors * vecs, vector newmp, char * refwwid,
                                condlog(2, "%s: remove (dead)", alias);
                }
        }
-       return 0;
+       ret = CP_OK;
+out:
+       free(size_mismatch_seen);
+       return ret;
 }
 
 struct udev_device *get_udev_device(const char *dev, enum devtypes dev_type)
@@ -1289,7 +1369,7 @@ int get_refwwid(enum mpath_cmds cmd, char *dev, enum devtypes dev_type,
                conf = get_multipath_config();
                pthread_cleanup_push(put_multipath_config, conf);
                if (pp->udev && pp->uid_attribute &&
-                   filter_property(conf, pp->udev) > 0)
+                   filter_property(conf, pp->udev, 3) > 0)
                        invalid = 1;
                pthread_cleanup_pop(1);
                if (invalid)
@@ -1329,7 +1409,7 @@ int get_refwwid(enum mpath_cmds cmd, char *dev, enum devtypes dev_type,
                conf = get_multipath_config();
                pthread_cleanup_push(put_multipath_config, conf);
                if (pp->udev && pp->uid_attribute &&
-                   filter_property(conf, pp->udev) > 0)
+                   filter_property(conf, pp->udev, 3) > 0)
                        invalid = 1;
                pthread_cleanup_pop(1);
                if (invalid)
@@ -1358,7 +1438,7 @@ int get_refwwid(enum mpath_cmds cmd, char *dev, enum devtypes dev_type,
                conf = get_multipath_config();
                pthread_cleanup_push(put_multipath_config, conf);
                if (pp->udev && pp->uid_attribute &&
-                   filter_property(conf, pp->udev) > 0)
+                   filter_property(conf, pp->udev, 3) > 0)
                        invalid = 1;
                pthread_cleanup_pop(1);
                if (invalid)
index 8b56d33a1d0bc5140b49a23f634dc0e933c9bc31..d75090002839241f4600e2ddba338427e9153e9e 100644 (file)
@@ -23,6 +23,28 @@ enum actions {
        ACT_IMPOSSIBLE,
 };
 
+/*
+ * Return value of domap()
+ * DAEMON_RETRY is only used for ACT_CREATE (see domap()).
+ */
+enum {
+       DOMAP_RETRY     = -1,
+       DOMAP_FAIL      = 0,
+       DOMAP_OK        = 1,
+       DOMAP_EXIST     = 2,
+       DOMAP_DRY       = 3
+};
+
+/*
+ * Return value of coalesce_paths()
+ * CP_RETRY is only used in non-daemon case (multipath).
+ */
+enum {
+       CP_OK = 0,
+       CP_FAIL,
+       CP_RETRY,
+};
+
 #define FLUSH_ONE 1
 #define FLUSH_ALL 2
 
index 0433b49142af68a4a52f7abd99949138546f684c..3294bd487d58eb2e138fb456ae73a111e61b6fa5 100644 (file)
@@ -692,9 +692,15 @@ out:
        return r;
 }
 
+/*
+ * returns:
+ * 1  : is multipath device
+ * 0  : is not multipath device
+ * -1 : error
+ */
 int dm_is_mpath(const char *name)
 {
-       int r = 0;
+       int r = -1;
        struct dm_task *dmt;
        struct dm_info info;
        uint64_t start, length;
@@ -703,33 +709,44 @@ int dm_is_mpath(const char *name)
        const char *uuid;
 
        if (!(dmt = libmp_dm_task_create(DM_DEVICE_TABLE)))
-               return 0;
+               goto out;
 
        if (!dm_task_set_name(dmt, name))
-               goto out;
+               goto out_task;
 
        dm_task_no_open_count(dmt);
 
        if (!dm_task_run(dmt))
-               goto out;
+               goto out_task;
 
-       if (!dm_task_get_info(dmt, &info) || !info.exists)
-               goto out;
+       if (!dm_task_get_info(dmt, &info))
+               goto out_task;
+
+       r = 0;
+
+       if (!info.exists)
+               goto out_task;
 
        uuid = dm_task_get_uuid(dmt);
 
        if (!uuid || strncmp(uuid, UUID_PREFIX, UUID_PREFIX_LEN) != 0)
-               goto out;
+               goto out_task;
 
        /* Fetch 1st target */
-       dm_get_next_target(dmt, NULL, &start, &length, &target_type, &params);
+       if (dm_get_next_target(dmt, NULL, &start, &length, &target_type,
+                              &params) != NULL)
+               /* multiple targets */
+               goto out_task;
 
        if (!target_type || strcmp(target_type, TGT_MPATH) != 0)
-               goto out;
+               goto out_task;
 
        r = 1;
-out:
+out_task:
        dm_task_destroy(dmt);
+out:
+       if (r < 0)
+               condlog(2, "%s: dm command failed in %s", name, __FUNCTION__);
        return r;
 }
 
@@ -823,7 +840,7 @@ int _dm_flush_map (const char * mapname, int need_sync, int deferred_remove,
        unsigned long long mapsize;
        char params[PARAMS_SIZE] = {0};
 
-       if (!dm_is_mpath(mapname))
+       if (dm_is_mpath(mapname) != 1)
                return 0; /* nothing to do */
 
        /* if the device currently has no partitions, do not
@@ -1087,7 +1104,7 @@ dm_get_maps (vector mp)
        }
 
        do {
-               if (!dm_is_mpath(names->name))
+               if (dm_is_mpath(names->name) != 1)
                        goto next;
 
                mpp = dm_get_multipath(names->name);
index a81c051ffc9609a3ac0d6bc41cd853f972979d6c..eaad4f18f4497cffe02ab41137f66a2b72b40ba5 100644 (file)
@@ -327,7 +327,7 @@ def_find_multipaths_handler(struct config *conf, vector strvec)
        int i;
 
        if (set_yes_no_undef(strvec, &conf->find_multipaths) == 0 &&
-           conf->find_multipaths != YNU_UNDEF)
+           conf->find_multipaths != FIND_MULTIPATHS_UNDEF)
                return 0;
 
        buff = set_value(strvec);
@@ -1217,6 +1217,33 @@ declare_hw_handler(delay_wait_checks, set_off_int_undef)
 declare_hw_snprint(delay_wait_checks, print_off_int_undef)
 declare_mp_handler(delay_wait_checks, set_off_int_undef)
 declare_mp_snprint(delay_wait_checks, print_off_int_undef)
+declare_def_handler(san_path_err_threshold, set_off_int_undef)
+declare_def_snprint_defint(san_path_err_threshold, print_off_int_undef,
+                          DEFAULT_ERR_CHECKS)
+declare_ovr_handler(san_path_err_threshold, set_off_int_undef)
+declare_ovr_snprint(san_path_err_threshold, print_off_int_undef)
+declare_hw_handler(san_path_err_threshold, set_off_int_undef)
+declare_hw_snprint(san_path_err_threshold, print_off_int_undef)
+declare_mp_handler(san_path_err_threshold, set_off_int_undef)
+declare_mp_snprint(san_path_err_threshold, print_off_int_undef)
+declare_def_handler(san_path_err_forget_rate, set_off_int_undef)
+declare_def_snprint_defint(san_path_err_forget_rate, print_off_int_undef,
+                          DEFAULT_ERR_CHECKS)
+declare_ovr_handler(san_path_err_forget_rate, set_off_int_undef)
+declare_ovr_snprint(san_path_err_forget_rate, print_off_int_undef)
+declare_hw_handler(san_path_err_forget_rate, set_off_int_undef)
+declare_hw_snprint(san_path_err_forget_rate, print_off_int_undef)
+declare_mp_handler(san_path_err_forget_rate, set_off_int_undef)
+declare_mp_snprint(san_path_err_forget_rate, print_off_int_undef)
+declare_def_handler(san_path_err_recovery_time, set_off_int_undef)
+declare_def_snprint_defint(san_path_err_recovery_time, print_off_int_undef,
+                          DEFAULT_ERR_CHECKS)
+declare_ovr_handler(san_path_err_recovery_time, set_off_int_undef)
+declare_ovr_snprint(san_path_err_recovery_time, print_off_int_undef)
+declare_hw_handler(san_path_err_recovery_time, set_off_int_undef)
+declare_hw_snprint(san_path_err_recovery_time, print_off_int_undef)
+declare_mp_handler(san_path_err_recovery_time, set_off_int_undef)
+declare_mp_snprint(san_path_err_recovery_time, print_off_int_undef)
 declare_def_handler(marginal_path_err_sample_time, set_off_int_undef)
 declare_def_snprint_defint(marginal_path_err_sample_time, print_off_int_undef,
                           DEFAULT_ERR_CHECKS)
@@ -1620,6 +1647,9 @@ init_keywords(vector keywords)
        install_keyword("config_dir", &def_config_dir_handler, &snprint_def_config_dir);
        install_keyword("delay_watch_checks", &def_delay_watch_checks_handler, &snprint_def_delay_watch_checks);
        install_keyword("delay_wait_checks", &def_delay_wait_checks_handler, &snprint_def_delay_wait_checks);
+       install_keyword("san_path_err_threshold", &def_san_path_err_threshold_handler, &snprint_def_san_path_err_threshold);
+       install_keyword("san_path_err_forget_rate", &def_san_path_err_forget_rate_handler, &snprint_def_san_path_err_forget_rate);
+       install_keyword("san_path_err_recovery_time", &def_san_path_err_recovery_time_handler, &snprint_def_san_path_err_recovery_time);
        install_keyword("marginal_path_err_sample_time", &def_marginal_path_err_sample_time_handler, &snprint_def_marginal_path_err_sample_time);
        install_keyword("marginal_path_err_rate_threshold", &def_marginal_path_err_rate_threshold_handler, &snprint_def_marginal_path_err_rate_threshold);
        install_keyword("marginal_path_err_recheck_gap_time", &def_marginal_path_err_recheck_gap_time_handler, &snprint_def_marginal_path_err_recheck_gap_time);
@@ -1714,6 +1744,9 @@ init_keywords(vector keywords)
        install_keyword("deferred_remove", &hw_deferred_remove_handler, &snprint_hw_deferred_remove);
        install_keyword("delay_watch_checks", &hw_delay_watch_checks_handler, &snprint_hw_delay_watch_checks);
        install_keyword("delay_wait_checks", &hw_delay_wait_checks_handler, &snprint_hw_delay_wait_checks);
+       install_keyword("san_path_err_threshold", &hw_san_path_err_threshold_handler, &snprint_hw_san_path_err_threshold);
+       install_keyword("san_path_err_forget_rate", &hw_san_path_err_forget_rate_handler, &snprint_hw_san_path_err_forget_rate);
+       install_keyword("san_path_err_recovery_time", &hw_san_path_err_recovery_time_handler, &snprint_hw_san_path_err_recovery_time);
        install_keyword("marginal_path_err_sample_time", &hw_marginal_path_err_sample_time_handler, &snprint_hw_marginal_path_err_sample_time);
        install_keyword("marginal_path_err_rate_threshold", &hw_marginal_path_err_rate_threshold_handler, &snprint_hw_marginal_path_err_rate_threshold);
        install_keyword("marginal_path_err_recheck_gap_time", &hw_marginal_path_err_recheck_gap_time_handler, &snprint_hw_marginal_path_err_recheck_gap_time);
@@ -1750,6 +1783,9 @@ init_keywords(vector keywords)
        install_keyword("deferred_remove", &ovr_deferred_remove_handler, &snprint_ovr_deferred_remove);
        install_keyword("delay_watch_checks", &ovr_delay_watch_checks_handler, &snprint_ovr_delay_watch_checks);
        install_keyword("delay_wait_checks", &ovr_delay_wait_checks_handler, &snprint_ovr_delay_wait_checks);
+       install_keyword("san_path_err_threshold", &ovr_san_path_err_threshold_handler, &snprint_ovr_san_path_err_threshold);
+       install_keyword("san_path_err_forget_rate", &ovr_san_path_err_forget_rate_handler, &snprint_ovr_san_path_err_forget_rate);
+       install_keyword("san_path_err_recovery_time", &ovr_san_path_err_recovery_time_handler, &snprint_ovr_san_path_err_recovery_time);
        install_keyword("marginal_path_err_sample_time", &ovr_marginal_path_err_sample_time_handler, &snprint_ovr_marginal_path_err_sample_time);
        install_keyword("marginal_path_err_rate_threshold", &ovr_marginal_path_err_rate_threshold_handler, &snprint_ovr_marginal_path_err_rate_threshold);
        install_keyword("marginal_path_err_recheck_gap_time", &ovr_marginal_path_err_recheck_gap_time_handler, &snprint_ovr_marginal_path_err_recheck_gap_time);
@@ -1785,6 +1821,9 @@ init_keywords(vector keywords)
        install_keyword("deferred_remove", &mp_deferred_remove_handler, &snprint_mp_deferred_remove);
        install_keyword("delay_watch_checks", &mp_delay_watch_checks_handler, &snprint_mp_delay_watch_checks);
        install_keyword("delay_wait_checks", &mp_delay_wait_checks_handler, &snprint_mp_delay_wait_checks);
+       install_keyword("san_path_err_threshold", &mp_san_path_err_threshold_handler, &snprint_mp_san_path_err_threshold);
+       install_keyword("san_path_err_forget_rate", &mp_san_path_err_forget_rate_handler, &snprint_mp_san_path_err_forget_rate);
+       install_keyword("san_path_err_recovery_time", &mp_san_path_err_recovery_time_handler, &snprint_mp_san_path_err_recovery_time);
        install_keyword("marginal_path_err_sample_time", &mp_marginal_path_err_sample_time_handler, &snprint_mp_marginal_path_err_sample_time);
        install_keyword("marginal_path_err_rate_threshold", &mp_marginal_path_err_rate_threshold_handler, &snprint_mp_marginal_path_err_rate_threshold);
        install_keyword("marginal_path_err_recheck_gap_time", &mp_marginal_path_err_recheck_gap_time_handler, &snprint_mp_marginal_path_err_recheck_gap_time);
index 63558ad84030985ada95182e603d56f6623859c4..10bd8cd6d8e0041500f361b79eb654b02b508499 100644 (file)
@@ -520,7 +520,7 @@ sysfs_get_asymmetric_access_state(struct path *pp, char *buff, int buflen)
                /* Parse error, ignore */
                return 0;
        }
-       return  preferred;
+       return !!preferred;
 }
 
 static void
@@ -711,7 +711,7 @@ sysfs_set_scsi_tmo (struct multipath *mpp, int checkint)
        int dev_loss_tmo = mpp->dev_loss;
 
        if (mpp->no_path_retry > 0) {
-               uint64_t no_path_retry_tmo = mpp->no_path_retry * checkint;
+               uint64_t no_path_retry_tmo = (uint64_t)mpp->no_path_retry * checkint;
 
                if (no_path_retry_tmo > MAX_DEV_LOSS_TMO)
                        no_path_retry_tmo = MAX_DEV_LOSS_TMO;
@@ -1106,7 +1106,9 @@ get_vpd_sgio (int fd, int pg, char * str, int maxlen)
 
        memset(buff, 0x0, 4096);
        if (sgio_get_vpd(buff, 4096, fd, pg) < 0) {
-               condlog(3, "failed to issue vpd inquiry for pg%02x",
+               int lvl = pg == 0x80 || pg == 0x83 ? 3 : 4;
+
+               condlog(lvl, "failed to issue vpd inquiry for pg%02x",
                        pg);
                return -errno;
        }
@@ -1382,7 +1384,7 @@ common_sysfs_pathinfo (struct path * pp)
        devt = udev_device_get_devnum(pp->udev);
        snprintf(pp->dev_t, BLK_DEV_SIZE, "%d:%d", major(devt), minor(devt));
 
-       condlog(3, "%s: dev_t = %s", pp->dev, pp->dev_t);
+       condlog(4, "%s: dev_t = %s", pp->dev, pp->dev_t);
 
        if (sysfs_get_size(pp, &pp->size))
                return PATHINFO_FAILED;
@@ -1433,7 +1435,7 @@ path_offline (struct path * pp)
        }
 
 
-       condlog(3, "%s: path state = %s", pp->dev, buff);
+       condlog(4, "%s: path state = %s", pp->dev, buff);
 
        if (pp->bus == SYSFS_BUS_SCSI) {
                if (!strncmp(buff, "offline", 7)) {
@@ -1552,8 +1554,6 @@ get_state (struct path * pp, struct config *conf, int daemon, int oldstate)
        struct checker * c = &pp->checker;
        int state;
 
-       condlog(3, "%s: get_state", pp->dev);
-
        if (!checker_selected(c)) {
                if (daemon) {
                        if (pathinfo(pp, conf, DI_SYSFS) != PATHINFO_OK) {
@@ -1601,6 +1601,7 @@ get_prio (struct path * pp)
        struct prio * p;
        struct config *conf;
        int checker_timeout;
+       int old_prio;
 
        if (!pp)
                return 0;
@@ -1621,13 +1622,14 @@ get_prio (struct path * pp)
        conf = get_multipath_config();
        checker_timeout = conf->checker_timeout;
        put_multipath_config(conf);
+       old_prio = pp->priority;
        pp->priority = prio_getprio(p, pp, checker_timeout);
        if (pp->priority < 0) {
                condlog(3, "%s: %s prio error", pp->dev, prio_name(p));
                pp->priority = PRIO_UNDEF;
                return 1;
        }
-       condlog(3, "%s: %s prio = %u",
+       condlog((old_prio == pp->priority ? 4 : 3), "%s: %s prio = %u",
                pp->dev, prio_name(p), pp->priority);
        return 0;
 }
@@ -1865,11 +1867,11 @@ int pathinfo(struct path *pp, struct config *conf, int mask)
                        udev_device_get_sysattr_value(pp->udev, "hidden");
 
                if (hidden && !strcmp(hidden, "1")) {
-                       condlog(3, "%s: hidden", pp->dev);
+                       condlog(4, "%s: hidden", pp->dev);
                        return PATHINFO_SKIPPED;
                }
                if (is_claimed_by_foreign(pp->udev) ||
-                        filter_property(conf, pp->udev) > 0)
+                   filter_property(conf, pp->udev, 4) > 0)
                        return PATHINFO_SKIPPED;
        }
 
@@ -1878,7 +1880,7 @@ int pathinfo(struct path *pp, struct config *conf, int mask)
                           pp->dev) > 0)
                return PATHINFO_SKIPPED;
 
-       condlog(3, "%s: mask = 0x%x", pp->dev, mask);
+       condlog(4, "%s: mask = 0x%x", pp->dev, mask);
 
        /*
         * Sanity check: we need the device number to
@@ -1964,8 +1966,12 @@ int pathinfo(struct path *pp, struct config *conf, int mask)
        if ((mask & DI_WWID) && !strlen(pp->wwid)) {
                get_uid(pp, path_state, pp->udev);
                if (!strlen(pp->wwid)) {
-                       pp->initialized = INIT_MISSING_UDEV;
-                       pp->tick = conf->retrigger_delay;
+                       if (pp->bus == SYSFS_BUS_UNDEF)
+                               return PATHINFO_SKIPPED;
+                       if (pp->initialized != INIT_FAILED) {
+                               pp->initialized = INIT_MISSING_UDEV;
+                               pp->tick = conf->retrigger_delay;
+                       }
                        return PATHINFO_OK;
                }
                else
@@ -1998,7 +2004,7 @@ blank:
         * Recoverable error, for example faulty or offline path
         */
        pp->chkrstate = pp->state = PATH_DOWN;
-       if (pp->initialized == INIT_FAILED)
+       if (pp->initialized == INIT_NEW || pp->initialized == INIT_FAILED)
                memset(pp->wwid, 0, WWID_SIZE);
 
        return PATHINFO_OK;
index 620f507dbecd378884c50e0a18a8a9a1079433d2..ac13ec06035085e8f4ba61a0668ec6ac328984c3 100644 (file)
@@ -117,7 +117,7 @@ assemble_map (struct multipath * mp, char * params, int len)
        }
 
        FREE(f);
-       condlog(3, "%s: assembled map [%s]", mp->alias, params);
+       condlog(4, "%s: assembled map [%s]", mp->alias, params);
        return 0;
 
 err:
@@ -145,7 +145,7 @@ int disassemble_map(vector pathvec, char *params, struct multipath *mpp,
 
        p = params;
 
-       condlog(3, "%s: disassemble map [%s]", mpp->alias, params);
+       condlog(4, "%s: disassemble map [%s]", mpp->alias, params);
 
        /*
         * features
@@ -410,7 +410,7 @@ int disassemble_status(char *params, struct multipath *mpp)
 
        p = params;
 
-       condlog(3, "%s: disassemble status [%s]", mpp->alias, params);
+       condlog(4, "%s: disassemble status [%s]", mpp->alias, params);
 
        /*
         * features
index fe98ddf7e4e98418c005c1fd4276ebcde24dfd7d..fae58a0df7126693089ccc8e7e95a0ca8840fa4a 100644 (file)
@@ -1,13 +1,12 @@
 #
 # Copyright (C) 2003 Christophe Varoqui, <christophe.varoqui@opensvc.com>
 #
+TOPDIR=../..
 include ../../Makefile.inc
 
-CFLAGS += $(LIB_CFLAGS) -I..
+CFLAGS += $(LIB_CFLAGS) -I.. -I$(nvmedir)
 
-# If you add or remove a checker also update multipath/multipath.conf.5
-LIBS= \
-       libforeign-nvme.so
+LIBS = libforeign-nvme.so
 
 all: $(LIBS)
 
index c753a74703ede37a86d42ca6f11f7c973cf757b1..7e654ec5bdd46d31d25583c3ed72fbbf2fa5e258 100644 (file)
@@ -15,6 +15,8 @@
   along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */
 
+#include "nvme-lib.h"
+#include <sys/types.h>
 #include <sys/sysmacros.h>
 #include <libudev.h>
 #include <stdio.h>
@@ -27,6 +29,7 @@
 #include <dirent.h>
 #include <errno.h>
 #include <ctype.h>
+#include <fcntl.h>
 #include "util.h"
 #include "vector.h"
 #include "generic.h"
@@ -40,17 +43,22 @@ static const char N_A[] = "n/a";
 const char *THIS;
 
 struct nvme_map;
+struct nvme_pathgroup {
+       struct gen_pathgroup gen;
+       struct _vector pathvec;
+};
+
 struct nvme_path {
        struct gen_path gen;
        struct udev_device *udev;
        struct udev_device *ctl;
        struct nvme_map *map;
        bool seen;
-};
-
-struct nvme_pathgroup {
-       struct gen_pathgroup gen;
-       vector pathvec;
+       /*
+        * The kernel works in failover mode.
+        * Each path has a separate path group.
+        */
+       struct nvme_pathgroup pg;
 };
 
 struct nvme_map {
@@ -58,12 +66,9 @@ struct nvme_map {
        struct udev_device *udev;
        struct udev_device *subsys;
        dev_t devt;
-       /* Just one static pathgroup for NVMe for now */
-       struct nvme_pathgroup pg;
-       struct gen_pathgroup *gpg;
        struct _vector pgvec;
-       vector pathvec;
        int nr_live;
+       int ana_supported;
 };
 
 #define NAME_LEN 64 /* buffer length for temp attributes */
@@ -76,29 +81,33 @@ struct nvme_map {
 #define const_gen_path_to_nvme(g) ((const struct nvme_path*)(g))
 #define gen_path_to_nvme(g) ((struct nvme_path*)(g))
 #define nvme_path_to_gen(n) &((n)->gen)
+#define nvme_pg_to_path(x) (VECTOR_SLOT(&((x)->pathvec), 0))
+#define nvme_path_to_pg(x) &((x)->pg)
 
 static void cleanup_nvme_path(struct nvme_path *path)
 {
        condlog(5, "%s: %p %p", __func__, path, path->udev);
        if (path->udev)
                udev_device_unref(path->udev);
+       vector_reset(&path->pg.pathvec);
+
        /* ctl is implicitly referenced by udev, no need to unref */
        free(path);
 }
 
 static void cleanup_nvme_map(struct nvme_map *map)
 {
-       if (map->pathvec) {
-               struct nvme_path *path;
-               int i;
+       struct nvme_pathgroup *pg;
+       struct nvme_path *path;
+       int i;
 
-               vector_foreach_slot_backwards(map->pathvec, path, i) {
-                       condlog(5, "%s: %d %p", __func__, i, path);
-                       cleanup_nvme_path(path);
-                       vector_del_slot(map->pathvec, i);
-               }
+       vector_foreach_slot_backwards(&map->pgvec, pg, i) {
+               path = nvme_pg_to_path(pg);
+               condlog(5, "%s: %d %p", __func__, i, path);
+               cleanup_nvme_path(path);
+               vector_del_slot(&map->pgvec, i);
        }
-       vector_free(map->pathvec);
+       vector_reset(&map->pgvec);
        if (map->udev)
                udev_device_unref(map->udev);
        /* subsys is implicitly referenced by udev, no need to unref */
@@ -139,10 +148,11 @@ static int snprint_nvme_map(const struct gen_multipath *gmp,
                return snprintf(buff, len, "%s",
                                udev_device_get_sysname(nvm->udev));
        case 'n':
-               return snprintf(buff, len, "%s:NQN:%s",
-                               udev_device_get_sysname(nvm->subsys),
+               return snprintf(buff, len, "%s:nsid.%s",
                                udev_device_get_sysattr_value(nvm->subsys,
-                                                             "subsysnqn"));
+                                                             "subsysnqn"),
+                               udev_device_get_sysattr_value(nvm->udev,
+                                                             "nsid"));
        case 'w':
                return snprintf(buff, len, "%s",
                                udev_device_get_sysattr_value(nvm->udev,
@@ -178,11 +188,14 @@ static int snprint_nvme_map(const struct gen_multipath *gmp,
                        return snprintf(buff, len, "%s", "rw");
        case 'G':
                return snprintf(buff, len, "%s", THIS);
+       case 'h':
+               if (nvm->ana_supported == YNU_YES)
+                       return snprintf(buff, len, "ANA");
        default:
-               return snprintf(buff, len, N_A);
                break;
        }
-       return 0;
+
+       return snprintf(buff, len, N_A);
 }
 
 static const struct _vector*
@@ -190,7 +203,7 @@ nvme_pg_get_paths(const struct gen_pathgroup *gpg) {
        const struct nvme_pathgroup *gp = const_gen_pg_to_nvme(gpg);
 
        /* This is all used under the lock, no need to copy */
-       return gp->pathvec;
+       return &gp->pathvec;
 }
 
 static void
@@ -199,12 +212,6 @@ nvme_pg_rel_paths(const struct gen_pathgroup *gpg, const struct _vector *v)
        /* empty */
 }
 
-static int snprint_nvme_pg(const struct gen_pathgroup *gmp,
-                          char *buff, int len, char wildcard)
-{
-       return snprintf(buff, len, N_A);
-}
-
 static int snprint_hcil(const struct nvme_path *np, char *buf, int len)
 {
        unsigned int nvmeid, ctlid, nsid;
@@ -242,8 +249,27 @@ static int snprint_nvme_path(const struct gen_path *gp,
                devt = udev_device_get_devnum(np->udev);
                return snprintf(buff, len, "%u:%u", major(devt), minor(devt));
        case 'o':
-               sysfs_attr_get_value(np->ctl, "state", fld, sizeof(fld));
-               return snprintf(buff, len, "%s", fld);
+               if (sysfs_attr_get_value(np->ctl, "state",
+                                        fld, sizeof(fld)) > 0)
+                       return snprintf(buff, len, "%s", fld);
+               break;
+       case 'T':
+               if (sysfs_attr_get_value(np->udev, "ana_state", fld,
+                                        sizeof(fld)) > 0)
+                       return snprintf(buff, len, "%s", fld);
+               break;
+       case 'p':
+               if (sysfs_attr_get_value(np->udev, "ana_state", fld,
+                                        sizeof(fld)) > 0) {
+                       rstrip(fld);
+                       if (!strcmp(fld, "optimized"))
+                               return snprintf(buff, len, "%d", 50);
+                       else if (!strcmp(fld, "non-optimized"))
+                               return snprintf(buff, len, "%d", 10);
+                       else
+                               return snprintf(buff, len, "%d", 0);
+               }
+               break;
        case 's':
                snprintf(fld, sizeof(fld), "%s",
                         udev_device_get_sysattr_value(np->ctl,
@@ -281,12 +307,30 @@ static int snprint_nvme_path(const struct gen_path *gp,
                                        udev_device_get_sysname(pci));
                /* fall through */
        default:
-               return snprintf(buff, len, "%s", N_A);
                break;
        }
+       return snprintf(buff, len, "%s", N_A);
        return 0;
 }
 
+static int snprint_nvme_pg(const struct gen_pathgroup *gmp,
+                          char *buff, int len, char wildcard)
+{
+       const struct nvme_pathgroup *pg = const_gen_pg_to_nvme(gmp);
+       const struct nvme_path *path = nvme_pg_to_path(pg);
+
+       switch (wildcard) {
+       case 't':
+               return snprint_nvme_path(nvme_path_to_gen(path),
+                                        buff, len, 'T');
+       case 'p':
+               return snprint_nvme_path(nvme_path_to_gen(path),
+                                        buff, len, 'p');
+       default:
+               return snprintf(buff, len, N_A);
+       }
+}
+
 static int nvme_style(const struct gen_multipath* gm,
                      char *buf, int len, int verbosity)
 {
@@ -432,7 +476,7 @@ static struct nvme_map *_find_nvme_map_by_devt(const struct context *ctx,
 static struct nvme_path *
 _find_path_by_syspath(struct nvme_map *map, const char *syspath)
 {
-       struct nvme_path *path;
+       struct nvme_pathgroup *pg;
        char real[PATH_MAX];
        const char *ppath;
        int i;
@@ -443,7 +487,9 @@ _find_path_by_syspath(struct nvme_map *map, const char *syspath)
                ppath = syspath;
        }
 
-       vector_foreach_slot(map->pathvec, path, i) {
+       vector_foreach_slot(&map->pgvec, pg, i) {
+               struct nvme_path *path = nvme_pg_to_path(pg);
+
                if (!strcmp(ppath,
                            udev_device_get_syspath(path->udev)))
                        return path;
@@ -531,20 +577,57 @@ out:
        return blkdev;
 }
 
+static void test_ana_support(struct nvme_map *map, struct udev_device *ctl)
+{
+       const char *dev_t;
+       char sys_path[64];
+       long fd;
+       int rc;
+
+       if (map->ana_supported != YNU_UNDEF)
+               return;
+
+       dev_t = udev_device_get_sysattr_value(ctl, "dev");
+       if (snprintf(sys_path, sizeof(sys_path), "/dev/char/%s", dev_t)
+           >= sizeof(sys_path))
+               return;
+
+       fd = open(sys_path, O_RDONLY);
+       if (fd == -1) {
+               condlog(2, "%s: error opening %s", __func__, sys_path);
+               return;
+       }
+
+       pthread_cleanup_push(close_fd, (void *)fd);
+       rc = nvme_id_ctrl_ana(fd, NULL);
+       if (rc < 0)
+               condlog(2, "%s: error in nvme_id_ctrl: %s", __func__,
+                       strerror(errno));
+       else {
+               map->ana_supported = (rc == 1 ? YNU_YES : YNU_NO);
+               condlog(3, "%s: NVMe ctrl %s: ANA %s supported", __func__, dev_t,
+                       rc == 1 ? "is" : "is not");
+       }
+       pthread_cleanup_pop(1);
+}
+
 static void _find_controllers(struct context *ctx, struct nvme_map *map)
 {
        char pathbuf[PATH_MAX], realbuf[PATH_MAX];
        struct dirent **di = NULL;
        struct scandir_result sr;
        struct udev_device *subsys;
+       struct nvme_pathgroup *pg;
        struct nvme_path *path;
        int r, i, n;
 
        if (map == NULL || map->udev == NULL)
                return;
 
-       vector_foreach_slot(map->pathvec, path, i)
+       vector_foreach_slot(&map->pgvec, pg, i) {
+               path = nvme_pg_to_path(pg);
                path->seen = false;
+       }
 
        subsys = udev_device_get_parent_with_subsystem_devtype(map->udev,
                                                               "nvme-subsystem",
@@ -606,7 +689,8 @@ static void _find_controllers(struct context *ctx, struct nvme_map *map)
                if (udev == NULL)
                        continue;
 
-               path = _find_path_by_syspath(map, udev_device_get_syspath(udev));
+               path = _find_path_by_syspath(map,
+                                            udev_device_get_syspath(udev));
                if (path != NULL) {
                        path->seen = true;
                        condlog(4, "%s: %s already known",
@@ -630,24 +714,32 @@ static void _find_controllers(struct context *ctx, struct nvme_map *map)
                        cleanup_nvme_path(path);
                        continue;
                }
+               test_ana_support(map, path->ctl);
 
-               if (vector_alloc_slot(map->pathvec) == NULL) {
+               path->pg.gen.ops = &nvme_pg_ops;
+               if (vector_alloc_slot(&path->pg.pathvec) == NULL) {
                        cleanup_nvme_path(path);
                        continue;
                }
+               vector_set_slot(&path->pg.pathvec, path);
+               if (vector_alloc_slot(&map->pgvec) == NULL) {
+                       cleanup_nvme_path(path);
+                       continue;
+               }
+               vector_set_slot(&map->pgvec, &path->pg);
                condlog(3, "%s: %s: new path %s added to %s",
                        __func__, THIS, udev_device_get_sysname(udev),
                        udev_device_get_sysname(map->udev));
-               vector_set_slot(map->pathvec, path);
        }
        pthread_cleanup_pop(1);
 
        map->nr_live = 0;
-       vector_foreach_slot_backwards(map->pathvec, path, i) {
+       vector_foreach_slot_backwards(&map->pgvec, pg, i) {
+               path = nvme_pg_to_path(pg);
                if (!path->seen) {
                        condlog(1, "path %d not found in %s any more",
                                i, udev_device_get_sysname(map->udev));
-                       vector_del_slot(map->pathvec, i);
+                       vector_del_slot(&map->pgvec, i);
                        cleanup_nvme_path(path);
                } else {
                        static const char live_state[] = "live";
@@ -661,7 +753,7 @@ static void _find_controllers(struct context *ctx, struct nvme_map *map)
        }
        condlog(3, "%s: %s: map %s has %d/%d live paths", __func__, THIS,
                udev_device_get_sysname(map->udev), map->nr_live,
-               VECTOR_SIZE(map->pathvec));
+               VECTOR_SIZE(&map->pgvec));
 }
 
 static int _add_map(struct context *ctx, struct udev_device *ud,
@@ -686,19 +778,6 @@ static int _add_map(struct context *ctx, struct udev_device *ud,
        map->subsys = subsys;
        map->gen.ops = &nvme_map_ops;
 
-       map->pathvec = vector_alloc();
-       if (map->pathvec == NULL) {
-               cleanup_nvme_map(map);
-               return FOREIGN_ERR;
-       }
-
-       map->pg.gen.ops = &nvme_pg_ops;
-       map->pg.pathvec = map->pathvec;
-       map->gpg = nvme_pg_to_gen(&map->pg);
-
-       map->pgvec.allocated = 1;
-       map->pgvec.slot = (void**)&map->gpg;
-
        if (vector_alloc_slot(ctx->mpvec) == NULL) {
                cleanup_nvme_map(map);
                return FOREIGN_ERR;
@@ -842,8 +921,8 @@ const struct _vector * get_paths(const struct context *ctx)
        condlog(5, "%s called for \"%s\"", __func__, THIS);
        vector_foreach_slot(ctx->mpvec, gm, i) {
                const struct nvme_map *nm = const_gen_mp_to_nvme(gm);
-               paths = vector_convert(paths, nm->pathvec,
-                                      struct gen_path, identity);
+               paths = vector_convert(paths, &nm->pgvec,
+                                      struct nvme_pathgroup, nvme_pg_to_path);
        }
        return paths;
 }
index bb35dfc7bc81f93579f3e7c628e38f06d9dbb838..be57bb1a9337d3db8520a7129aa78411925e4c16 100644 (file)
@@ -25,6 +25,9 @@ static int log_messages_pending;
 
 void log_safe (int prio, const char * fmt, va_list ap)
 {
+       if (prio > LOG_DEBUG)
+               prio = LOG_DEBUG;
+
        if (log_thr == (pthread_t)0) {
                vsyslog(prio, fmt, ap);
                return;
diff --git a/libmultipath/nvme-lib.c b/libmultipath/nvme-lib.c
new file mode 100644 (file)
index 0000000..f30e769
--- /dev/null
@@ -0,0 +1,49 @@
+#include <sys/types.h>
+/* avoid inclusion of standard API */
+#define _NVME_LIB_C 1
+#include "nvme-lib.h"
+#include "nvme-ioctl.c"
+#include "debug.h"
+
+int log_nvme_errcode(int err, const char *dev, const char *msg)
+{
+       if (err > 0)
+               condlog(3, "%s: %s: NVMe status %d", dev, msg, err);
+       else if (err < 0)
+               condlog(3, "%s: %s: %s", dev, msg, strerror(errno));
+       return err;
+}
+
+int libmp_nvme_get_nsid(int fd)
+{
+       return nvme_get_nsid(fd);
+}
+
+int libmp_nvme_identify_ctrl(int fd, struct nvme_id_ctrl *ctrl)
+{
+       return nvme_identify_ctrl(fd, ctrl);
+}
+
+int libmp_nvme_identify_ns(int fd, __u32 nsid, bool present,
+                          struct nvme_id_ns *ns)
+{
+       return nvme_identify_ns(fd, nsid, present, ns);
+}
+
+int libmp_nvme_ana_log(int fd, void *ana_log, size_t ana_log_len, int rgo)
+{
+       return nvme_ana_log(fd, ana_log, ana_log_len, rgo);
+}
+
+int nvme_id_ctrl_ana(int fd, struct nvme_id_ctrl *ctrl)
+{
+       int rc;
+       struct nvme_id_ctrl c;
+
+       rc = nvme_identify_ctrl(fd, &c);
+       if (rc < 0)
+               return rc;
+       if (ctrl)
+               *ctrl = c;
+       return c.cmic & (1 << 3) ? 1 : 0;
+}
diff --git a/libmultipath/nvme-lib.h b/libmultipath/nvme-lib.h
new file mode 100644 (file)
index 0000000..448dd99
--- /dev/null
@@ -0,0 +1,39 @@
+#ifndef NVME_LIB_H
+#define NVME_LIB_H
+
+#include "nvme.h"
+
+int log_nvme_errcode(int err, const char *dev, const char *msg);
+int libmp_nvme_get_nsid(int fd);
+int libmp_nvme_identify_ctrl(int fd, struct nvme_id_ctrl *ctrl);
+int libmp_nvme_identify_ns(int fd, __u32 nsid, bool present,
+                          struct nvme_id_ns *ns);
+int libmp_nvme_ana_log(int fd, void *ana_log, size_t ana_log_len, int rgo);
+/*
+ * Identify controller, and return true if ANA is supported
+ * ctrl will be filled in if controller is identified, even w/o ANA
+ * ctrl may be NULL
+ */
+int nvme_id_ctrl_ana(int fd, struct nvme_id_ctrl *ctrl);
+
+#ifndef _NVME_LIB_C
+/*
+ * In all files except nvme-lib.c, the nvme functions can be called
+ * by their usual name.
+ */
+#define nvme_get_nsid libmp_nvme_get_nsid
+#define nvme_identify_ctrl libmp_nvme_identify_ctrl
+#define nvme_identify_ns libmp_nvme_identify_ns
+#define nvme_ana_log libmp_nvme_ana_log
+/*
+ * Undefine these to avoid clashes with libmultipath's byteorder.h
+ */
+#undef cpu_to_le16
+#undef cpu_to_le32
+#undef cpu_to_le64
+#undef le16_to_cpu
+#undef le32_to_cpu
+#undef le64_to_cpu
+#endif
+
+#endif /* NVME_LIB_H */
diff --git a/libmultipath/nvme/argconfig.h b/libmultipath/nvme/argconfig.h
new file mode 100644 (file)
index 0000000..adb192b
--- /dev/null
@@ -0,0 +1,99 @@
+////////////////////////////////////////////////////////////////////////
+//
+// Copyright 2014 PMC-Sierra, Inc.
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+//
+////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////
+//
+//   Author: Logan Gunthorpe <logang@deltatee.com>
+//           Logan Gunthorpe
+//
+//   Date:   Oct 23 2014
+//
+//   Description:
+//     Header file for argconfig.c
+//
+////////////////////////////////////////////////////////////////////////
+
+#ifndef argconfig_H
+#define argconfig_H
+
+#include <string.h>
+#include <getopt.h>
+#include <stdarg.h>
+
+enum argconfig_types {
+       CFG_NONE,
+       CFG_STRING,
+       CFG_INT,
+       CFG_SIZE,
+       CFG_LONG,
+       CFG_LONG_SUFFIX,
+       CFG_DOUBLE,
+       CFG_BOOL,
+       CFG_BYTE,
+       CFG_SHORT,
+       CFG_POSITIVE,
+       CFG_INCREMENT,
+       CFG_SUBOPTS,
+       CFG_FILE_A,
+       CFG_FILE_W,
+       CFG_FILE_R,
+       CFG_FILE_AP,
+       CFG_FILE_WP,
+       CFG_FILE_RP,
+};
+
+struct argconfig_commandline_options {
+       const char *option;
+       const char short_option;
+       const char *meta;
+       enum argconfig_types config_type;
+       void *default_value;
+       int argument_type;
+       const char *help;
+};
+
+#define CFG_MAX_SUBOPTS 500
+#define MAX_HELP_FUNC 20
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void argconfig_help_func();
+void argconfig_append_usage(const char *str);
+void argconfig_print_help(const char *program_desc,
+                         const struct argconfig_commandline_options *options);
+int argconfig_parse(int argc, char *argv[], const char *program_desc,
+                   const struct argconfig_commandline_options *options,
+                   void *config_out, size_t config_size);
+int argconfig_parse_subopt_string(char *string, char **options,
+                                 size_t max_options);
+unsigned argconfig_parse_comma_sep_array(char *string, int *ret,
+                                        unsigned max_length);
+unsigned argconfig_parse_comma_sep_array_long(char *string,
+                                             unsigned long long *ret,
+                                             unsigned max_length);
+void argconfig_register_help_func(argconfig_help_func * f);
+
+void print_word_wrapped(const char *s, int indent, int start);
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/libmultipath/nvme/json.h b/libmultipath/nvme/json.h
new file mode 100644 (file)
index 0000000..c4ea531
--- /dev/null
@@ -0,0 +1,87 @@
+#ifndef __JSON__H
+#define __JSON__H
+
+struct json_object;
+struct json_array;
+struct json_pair;
+
+#define JSON_TYPE_STRING 0
+#define JSON_TYPE_INTEGER 1
+#define JSON_TYPE_FLOAT 2
+#define JSON_TYPE_OBJECT 3
+#define JSON_TYPE_ARRAY 4
+#define JSON_TYPE_UINT 5
+#define JSON_PARENT_TYPE_PAIR 0
+#define JSON_PARENT_TYPE_ARRAY 1
+struct json_value {
+       int type;
+       union {
+               long long integer_number;
+               unsigned long long uint_number;
+               long double float_number;
+               char *string;
+               struct json_object *object;
+               struct json_array *array;
+       };
+       int parent_type;
+       union {
+               struct json_pair *parent_pair;
+               struct json_array *parent_array;
+       };
+};
+
+struct json_array {
+       struct json_value **values;
+       int value_cnt;
+       struct json_value *parent;
+};
+
+struct json_object {
+       struct json_pair **pairs;
+       int pair_cnt;
+       struct json_value *parent;
+};
+
+struct json_pair {
+       char *name;
+       struct json_value *value;
+       struct json_object *parent;
+};
+
+struct json_object *json_create_object(void);
+struct json_array *json_create_array(void);
+
+void json_free_object(struct json_object *obj);
+
+int json_object_add_value_type(struct json_object *obj, const char *name, int type, ...);
+#define json_object_add_value_int(obj, name, val) \
+       json_object_add_value_type((obj), name, JSON_TYPE_INTEGER, (long long) (val))
+#define json_object_add_value_uint(obj, name, val) \
+       json_object_add_value_type((obj), name, JSON_TYPE_UINT, (unsigned long long) (val))
+#define json_object_add_value_float(obj, name, val) \
+       json_object_add_value_type((obj), name, JSON_TYPE_FLOAT, (val))
+#define json_object_add_value_string(obj, name, val) \
+       json_object_add_value_type((obj), name, JSON_TYPE_STRING, (val))
+#define json_object_add_value_object(obj, name, val) \
+       json_object_add_value_type((obj), name, JSON_TYPE_OBJECT, (val))
+#define json_object_add_value_array(obj, name, val) \
+       json_object_add_value_type((obj), name, JSON_TYPE_ARRAY, (val))
+int json_array_add_value_type(struct json_array *array, int type, ...);
+#define json_array_add_value_int(obj, val) \
+       json_array_add_value_type((obj), JSON_TYPE_INTEGER, (val))
+#define json_array_add_value_uint(obj, val) \
+       json_array_add_value_type((obj), JSON_TYPE_UINT, (val))
+#define json_array_add_value_float(obj, val) \
+       json_array_add_value_type((obj), JSON_TYPE_FLOAT, (val))
+#define json_array_add_value_string(obj, val) \
+       json_array_add_value_type((obj), JSON_TYPE_STRING, (val))
+#define json_array_add_value_object(obj, val) \
+       json_array_add_value_type((obj), JSON_TYPE_OBJECT, (val))
+#define json_array_add_value_array(obj, val) \
+       json_array_add_value_type((obj), JSON_TYPE_ARRAY, (val))
+
+#define json_array_last_value_object(obj) \
+       (obj->values[obj->value_cnt - 1]->object)
+
+void json_print_object(struct json_object *obj, void *);
+#endif
diff --git a/libmultipath/nvme/linux/nvme.h b/libmultipath/nvme/linux/nvme.h
new file mode 100644 (file)
index 0000000..68000eb
--- /dev/null
@@ -0,0 +1,1450 @@
+/*
+ * Definitions for the NVM Express interface
+ * Copyright (c) 2011-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _LINUX_NVME_H
+#define _LINUX_NVME_H
+
+#include <linux/types.h>
+#include <linux/uuid.h>
+
+/* NQN names in commands fields specified one size */
+#define NVMF_NQN_FIELD_LEN     256
+
+/* However the max length of a qualified name is another size */
+#define NVMF_NQN_SIZE          223
+
+#define NVMF_TRSVCID_SIZE      32
+#define NVMF_TRADDR_SIZE       256
+#define NVMF_TSAS_SIZE         256
+
+#define NVME_DISC_SUBSYS_NAME  "nqn.2014-08.org.nvmexpress.discovery"
+
+#define NVME_RDMA_IP_PORT      4420
+
+#define NVME_NSID_ALL          0xffffffff
+
+enum nvme_subsys_type {
+       NVME_NQN_DISC   = 1,            /* Discovery type target subsystem */
+       NVME_NQN_NVME   = 2,            /* NVME type target subsystem */
+};
+
+/* Address Family codes for Discovery Log Page entry ADRFAM field */
+enum {
+       NVMF_ADDR_FAMILY_PCI    = 0,    /* PCIe */
+       NVMF_ADDR_FAMILY_IP4    = 1,    /* IP4 */
+       NVMF_ADDR_FAMILY_IP6    = 2,    /* IP6 */
+       NVMF_ADDR_FAMILY_IB     = 3,    /* InfiniBand */
+       NVMF_ADDR_FAMILY_FC     = 4,    /* Fibre Channel */
+};
+
+/* Transport Type codes for Discovery Log Page entry TRTYPE field */
+enum {
+       NVMF_TRTYPE_RDMA        = 1,    /* RDMA */
+       NVMF_TRTYPE_FC          = 2,    /* Fibre Channel */
+       NVMF_TRTYPE_TCP         = 3,    /* TCP */
+       NVMF_TRTYPE_LOOP        = 254,  /* Reserved for host usage */
+       NVMF_TRTYPE_MAX,
+};
+
+/* Transport Requirements codes for Discovery Log Page entry TREQ field */
+enum {
+       NVMF_TREQ_NOT_SPECIFIED = 0,            /* Not specified */
+       NVMF_TREQ_REQUIRED      = 1,            /* Required */
+       NVMF_TREQ_NOT_REQUIRED  = 2,            /* Not Required */
+       NVMF_TREQ_DISABLE_SQFLOW = (1 << 2),    /* SQ flow control disable supported */
+};
+
+/* RDMA QP Service Type codes for Discovery Log Page entry TSAS
+ * RDMA_QPTYPE field
+ */
+enum {
+       NVMF_RDMA_QPTYPE_CONNECTED      = 1, /* Reliable Connected */
+       NVMF_RDMA_QPTYPE_DATAGRAM       = 2, /* Reliable Datagram */
+};
+
+/* RDMA QP Service Type codes for Discovery Log Page entry TSAS
+ * RDMA_QPTYPE field
+ */
+enum {
+       NVMF_RDMA_PRTYPE_NOT_SPECIFIED  = 1, /* No Provider Specified */
+       NVMF_RDMA_PRTYPE_IB             = 2, /* InfiniBand */
+       NVMF_RDMA_PRTYPE_ROCE           = 3, /* InfiniBand RoCE */
+       NVMF_RDMA_PRTYPE_ROCEV2         = 4, /* InfiniBand RoCEV2 */
+       NVMF_RDMA_PRTYPE_IWARP          = 5, /* IWARP */
+};
+
+/* RDMA Connection Management Service Type codes for Discovery Log Page
+ * entry TSAS RDMA_CMS field
+ */
+enum {
+       NVMF_RDMA_CMS_RDMA_CM   = 1, /* Sockets based endpoint addressing */
+};
+
+/* TCP port security type for  Discovery Log Page entry TSAS
+ */
+enum {
+       NVMF_TCP_SECTYPE_NONE   = 0, /* No Security */
+       NVMF_TCP_SECTYPE_TLS    = 1, /* Transport Layer Security */
+};
+
+#define NVME_AQ_DEPTH          32
+#define NVME_NR_AEN_COMMANDS   1
+#define NVME_AQ_BLK_MQ_DEPTH   (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS)
+
+/*
+ * Subtract one to leave an empty queue entry for 'Full Queue' condition. See
+ * NVM-Express 1.2 specification, section 4.1.2.
+ */
+#define NVME_AQ_MQ_TAG_DEPTH   (NVME_AQ_BLK_MQ_DEPTH - 1)
+
+enum {
+       NVME_REG_CAP    = 0x0000,       /* Controller Capabilities */
+       NVME_REG_VS     = 0x0008,       /* Version */
+       NVME_REG_INTMS  = 0x000c,       /* Interrupt Mask Set */
+       NVME_REG_INTMC  = 0x0010,       /* Interrupt Mask Clear */
+       NVME_REG_CC     = 0x0014,       /* Controller Configuration */
+       NVME_REG_CSTS   = 0x001c,       /* Controller Status */
+       NVME_REG_NSSR   = 0x0020,       /* NVM Subsystem Reset */
+       NVME_REG_AQA    = 0x0024,       /* Admin Queue Attributes */
+       NVME_REG_ASQ    = 0x0028,       /* Admin SQ Base Address */
+       NVME_REG_ACQ    = 0x0030,       /* Admin CQ Base Address */
+       NVME_REG_CMBLOC = 0x0038,       /* Controller Memory Buffer Location */
+       NVME_REG_CMBSZ  = 0x003c,       /* Controller Memory Buffer Size */
+       NVME_REG_BPINFO = 0x0040,       /* Boot Partition Information */
+       NVME_REG_BPRSEL = 0x0044,       /* Boot Partition Read Select */
+       NVME_REG_BPMBL  = 0x0048,       /* Boot Partition Memory Buffer Location */
+       NVME_REG_DBS    = 0x1000,       /* SQ 0 Tail Doorbell */
+};
+
+#define NVME_CAP_MQES(cap)     ((cap) & 0xffff)
+#define NVME_CAP_TIMEOUT(cap)  (((cap) >> 24) & 0xff)
+#define NVME_CAP_STRIDE(cap)   (((cap) >> 32) & 0xf)
+#define NVME_CAP_NSSRC(cap)    (((cap) >> 36) & 0x1)
+#define NVME_CAP_MPSMIN(cap)   (((cap) >> 48) & 0xf)
+#define NVME_CAP_MPSMAX(cap)   (((cap) >> 52) & 0xf)
+
+#define NVME_CMB_BIR(cmbloc)   ((cmbloc) & 0x7)
+#define NVME_CMB_OFST(cmbloc)  (((cmbloc) >> 12) & 0xfffff)
+#define NVME_CMB_SZ(cmbsz)     (((cmbsz) >> 12) & 0xfffff)
+#define NVME_CMB_SZU(cmbsz)    (((cmbsz) >> 8) & 0xf)
+
+#define NVME_CMB_WDS(cmbsz)    ((cmbsz) & 0x10)
+#define NVME_CMB_RDS(cmbsz)    ((cmbsz) & 0x8)
+#define NVME_CMB_LISTS(cmbsz)  ((cmbsz) & 0x4)
+#define NVME_CMB_CQS(cmbsz)    ((cmbsz) & 0x2)
+#define NVME_CMB_SQS(cmbsz)    ((cmbsz) & 0x1)
+
+/*
+ * Submission and Completion Queue Entry Sizes for the NVM command set.
+ * (In bytes and specified as a power of two (2^n)).
+ */
+#define NVME_NVM_IOSQES                6
+#define NVME_NVM_IOCQES                4
+
+enum {
+       NVME_CC_ENABLE          = 1 << 0,
+       NVME_CC_CSS_NVM         = 0 << 4,
+       NVME_CC_EN_SHIFT        = 0,
+       NVME_CC_CSS_SHIFT       = 4,
+       NVME_CC_MPS_SHIFT       = 7,
+       NVME_CC_AMS_SHIFT       = 11,
+       NVME_CC_SHN_SHIFT       = 14,
+       NVME_CC_IOSQES_SHIFT    = 16,
+       NVME_CC_IOCQES_SHIFT    = 20,
+       NVME_CC_AMS_RR          = 0 << NVME_CC_AMS_SHIFT,
+       NVME_CC_AMS_WRRU        = 1 << NVME_CC_AMS_SHIFT,
+       NVME_CC_AMS_VS          = 7 << NVME_CC_AMS_SHIFT,
+       NVME_CC_SHN_NONE        = 0 << NVME_CC_SHN_SHIFT,
+       NVME_CC_SHN_NORMAL      = 1 << NVME_CC_SHN_SHIFT,
+       NVME_CC_SHN_ABRUPT      = 2 << NVME_CC_SHN_SHIFT,
+       NVME_CC_SHN_MASK        = 3 << NVME_CC_SHN_SHIFT,
+       NVME_CC_IOSQES          = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT,
+       NVME_CC_IOCQES          = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT,
+       NVME_CSTS_RDY           = 1 << 0,
+       NVME_CSTS_CFS           = 1 << 1,
+       NVME_CSTS_NSSRO         = 1 << 4,
+       NVME_CSTS_PP            = 1 << 5,
+       NVME_CSTS_SHST_NORMAL   = 0 << 2,
+       NVME_CSTS_SHST_OCCUR    = 1 << 2,
+       NVME_CSTS_SHST_CMPLT    = 2 << 2,
+       NVME_CSTS_SHST_MASK     = 3 << 2,
+};
+
+struct nvme_id_power_state {
+       __le16                  max_power;      /* centiwatts */
+       __u8                    rsvd2;
+       __u8                    flags;
+       __le32                  entry_lat;      /* microseconds */
+       __le32                  exit_lat;       /* microseconds */
+       __u8                    read_tput;
+       __u8                    read_lat;
+       __u8                    write_tput;
+       __u8                    write_lat;
+       __le16                  idle_power;
+       __u8                    idle_scale;
+       __u8                    rsvd19;
+       __le16                  active_power;
+       __u8                    active_work_scale;
+       __u8                    rsvd23[9];
+};
+
+enum {
+       NVME_PS_FLAGS_MAX_POWER_SCALE   = 1 << 0,
+       NVME_PS_FLAGS_NON_OP_STATE      = 1 << 1,
+};
+
+struct nvme_id_ctrl {
+       __le16                  vid;
+       __le16                  ssvid;
+       char                    sn[20];
+       char                    mn[40];
+       char                    fr[8];
+       __u8                    rab;
+       __u8                    ieee[3];
+       __u8                    cmic;
+       __u8                    mdts;
+       __le16                  cntlid;
+       __le32                  ver;
+       __le32                  rtd3r;
+       __le32                  rtd3e;
+       __le32                  oaes;
+       __le32                  ctratt;
+       __le16                  rrls;
+       __u8                    rsvd102[154];
+       __le16                  oacs;
+       __u8                    acl;
+       __u8                    aerl;
+       __u8                    frmw;
+       __u8                    lpa;
+       __u8                    elpe;
+       __u8                    npss;
+       __u8                    avscc;
+       __u8                    apsta;
+       __le16                  wctemp;
+       __le16                  cctemp;
+       __le16                  mtfa;
+       __le32                  hmpre;
+       __le32                  hmmin;
+       __u8                    tnvmcap[16];
+       __u8                    unvmcap[16];
+       __le32                  rpmbs;
+       __le16                  edstt;
+       __u8                    dsto;
+       __u8                    fwug;
+       __le16                  kas;
+       __le16                  hctma;
+       __le16                  mntmt;
+       __le16                  mxtmt;
+       __le32                  sanicap;
+       __le32                  hmminds;
+       __le16                  hmmaxd;
+       __le16                  nsetidmax;
+       __u8                    rsvd340[2];
+       __u8                    anatt;
+       __u8                    anacap;
+       __le32                  anagrpmax;
+       __le32                  nanagrpid;
+       __u8                    rsvd352[160];
+       __u8                    sqes;
+       __u8                    cqes;
+       __le16                  maxcmd;
+       __le32                  nn;
+       __le16                  oncs;
+       __le16                  fuses;
+       __u8                    fna;
+       __u8                    vwc;
+       __le16                  awun;
+       __le16                  awupf;
+       __u8                    nvscc;
+       __u8                    nwpc;
+       __le16                  acwu;
+       __u8                    rsvd534[2];
+       __le32                  sgls;
+       __le32                  mnan;
+       __u8                    rsvd544[224];
+       char                    subnqn[256];
+       __u8                    rsvd1024[768];
+       __le32                  ioccsz;
+       __le32                  iorcsz;
+       __le16                  icdoff;
+       __u8                    ctrattr;
+       __u8                    msdbd;
+       __u8                    rsvd1804[244];
+       struct nvme_id_power_state      psd[32];
+       __u8                    vs[1024];
+};
+
+enum {
+       NVME_CTRL_ONCS_COMPARE                  = 1 << 0,
+       NVME_CTRL_ONCS_WRITE_UNCORRECTABLE      = 1 << 1,
+       NVME_CTRL_ONCS_DSM                      = 1 << 2,
+       NVME_CTRL_ONCS_WRITE_ZEROES             = 1 << 3,
+       NVME_CTRL_ONCS_TIMESTAMP                = 1 << 6,
+       NVME_CTRL_VWC_PRESENT                   = 1 << 0,
+       NVME_CTRL_OACS_SEC_SUPP                 = 1 << 0,
+       NVME_CTRL_OACS_DIRECTIVES               = 1 << 5,
+       NVME_CTRL_OACS_DBBUF_SUPP               = 1 << 8,
+       NVME_CTRL_LPA_CMD_EFFECTS_LOG           = 1 << 1,
+       NVME_CTRL_CTRATT_128_ID                 = 1 << 0,
+       NVME_CTRL_CTRATT_NON_OP_PSP             = 1 << 1,
+       NVME_CTRL_CTRATT_NVM_SETS               = 1 << 2,
+       NVME_CTRL_CTRATT_READ_RECV_LVLS         = 1 << 3,
+       NVME_CTRL_CTRATT_ENDURANCE_GROUPS       = 1 << 4,
+       NVME_CTRL_CTRATT_PREDICTABLE_LAT        = 1 << 5,
+};
+
+struct nvme_lbaf {
+       __le16                  ms;
+       __u8                    ds;
+       __u8                    rp;
+};
+
+struct nvme_id_ns {
+       __le64                  nsze;
+       __le64                  ncap;
+       __le64                  nuse;
+       __u8                    nsfeat;
+       __u8                    nlbaf;
+       __u8                    flbas;
+       __u8                    mc;
+       __u8                    dpc;
+       __u8                    dps;
+       __u8                    nmic;
+       __u8                    rescap;
+       __u8                    fpi;
+       __u8                    dlfeat;
+       __le16                  nawun;
+       __le16                  nawupf;
+       __le16                  nacwu;
+       __le16                  nabsn;
+       __le16                  nabo;
+       __le16                  nabspf;
+       __le16                  noiob;
+       __u8                    nvmcap[16];
+       __u8                    rsvd64[28];
+       __le32                  anagrpid;
+       __u8                    rsvd96[3];
+       __u8                    nsattr;
+       __le16                  nvmsetid;
+       __le16                  endgid;
+       __u8                    nguid[16];
+       __u8                    eui64[8];
+       struct nvme_lbaf        lbaf[16];
+       __u8                    rsvd192[192];
+       __u8                    vs[3712];
+};
+
+enum {
+       NVME_ID_CNS_NS                  = 0x00,
+       NVME_ID_CNS_CTRL                = 0x01,
+       NVME_ID_CNS_NS_ACTIVE_LIST      = 0x02,
+       NVME_ID_CNS_NS_DESC_LIST        = 0x03,
+       NVME_ID_CNS_NVMSET_LIST         = 0x04,
+       NVME_ID_CNS_NS_PRESENT_LIST     = 0x10,
+       NVME_ID_CNS_NS_PRESENT          = 0x11,
+       NVME_ID_CNS_CTRL_NS_LIST        = 0x12,
+       NVME_ID_CNS_CTRL_LIST           = 0x13,
+};
+
+enum {
+       NVME_DIR_IDENTIFY               = 0x00,
+       NVME_DIR_STREAMS                = 0x01,
+       NVME_DIR_SND_ID_OP_ENABLE       = 0x01,
+       NVME_DIR_SND_ST_OP_REL_ID       = 0x01,
+       NVME_DIR_SND_ST_OP_REL_RSC      = 0x02,
+       NVME_DIR_RCV_ID_OP_PARAM        = 0x01,
+       NVME_DIR_RCV_ST_OP_PARAM        = 0x01,
+       NVME_DIR_RCV_ST_OP_STATUS       = 0x02,
+       NVME_DIR_RCV_ST_OP_RESOURCE     = 0x03,
+       NVME_DIR_ENDIR                  = 0x01,
+};
+
+enum {
+       NVME_NS_FEAT_THIN       = 1 << 0,
+       NVME_NS_FLBAS_LBA_MASK  = 0xf,
+       NVME_NS_FLBAS_META_EXT  = 0x10,
+       NVME_LBAF_RP_BEST       = 0,
+       NVME_LBAF_RP_BETTER     = 1,
+       NVME_LBAF_RP_GOOD       = 2,
+       NVME_LBAF_RP_DEGRADED   = 3,
+       NVME_NS_DPC_PI_LAST     = 1 << 4,
+       NVME_NS_DPC_PI_FIRST    = 1 << 3,
+       NVME_NS_DPC_PI_TYPE3    = 1 << 2,
+       NVME_NS_DPC_PI_TYPE2    = 1 << 1,
+       NVME_NS_DPC_PI_TYPE1    = 1 << 0,
+       NVME_NS_DPS_PI_FIRST    = 1 << 3,
+       NVME_NS_DPS_PI_MASK     = 0x7,
+       NVME_NS_DPS_PI_TYPE1    = 1,
+       NVME_NS_DPS_PI_TYPE2    = 2,
+       NVME_NS_DPS_PI_TYPE3    = 3,
+};
+
+struct nvme_ns_id_desc {
+       __u8 nidt;
+       __u8 nidl;
+       __le16 reserved;
+};
+
+#define NVME_NIDT_EUI64_LEN    8
+#define NVME_NIDT_NGUID_LEN    16
+#define NVME_NIDT_UUID_LEN     16
+
+enum {
+       NVME_NIDT_EUI64         = 0x01,
+       NVME_NIDT_NGUID         = 0x02,
+       NVME_NIDT_UUID          = 0x03,
+};
+
+#define NVME_MAX_NVMSET                31
+
+struct nvme_nvmset_attr_entry {
+       __le16                  id;
+       __le16                  endurance_group_id;
+       __u8                    rsvd4[4];
+       __le32                  random_4k_read_typical;
+       __le32                  opt_write_size;
+       __u8                    total_nvmset_cap[16];
+       __u8                    unalloc_nvmset_cap[16];
+       __u8                    rsvd48[80];
+};
+
+struct nvme_id_nvmset {
+       __u8                            nid;
+       __u8                            rsvd1[127];
+       struct nvme_nvmset_attr_entry   ent[NVME_MAX_NVMSET];
+};
+
+/* Derived from 1.3a Figure 101: Get Log Page â€“ Telemetry Host
+ * -Initiated Log (Log Identifier 07h)
+ */
+struct nvme_telemetry_log_page_hdr {
+       __u8    lpi; /* Log page identifier */
+       __u8    rsvd[4];
+       __u8    iee_oui[3];
+       __u16   dalb1; /* Data area 1 last block */
+       __u16   dalb2; /* Data area 2 last block */
+       __u16   dalb3; /* Data area 3 last block */
+       __u8    rsvd1[368]; /* TODO verify */
+       __u8    ctrlavail; /* Controller initiated data avail?*/
+       __u8    ctrldgn; /* Controller initiated telemetry Data Gen # */
+       __u8    rsnident[128];
+       /* We'll have to double fetch so we can get the header,
+        * parse dalb1->3 determine how much size we need for the
+        * log then alloc below. Or just do a secondary non-struct
+        * allocation.
+        */
+       __u8    telemetry_dataarea[0];
+};
+
+struct nvme_endurance_group_log {
+       __u32   rsvd0;
+       __u8    avl_spare_threshold;
+       __u8    percent_used;
+       __u8    rsvd6[26];
+       __u8    endurance_estimate[16];
+       __u8    data_units_read[16];
+       __u8    data_units_written[16];
+       __u8    media_units_written[16];
+       __u8    rsvd96[416];
+};
+
+struct nvme_smart_log {
+       __u8                    critical_warning;
+       __u8                    temperature[2];
+       __u8                    avail_spare;
+       __u8                    spare_thresh;
+       __u8                    percent_used;
+       __u8                    rsvd6[26];
+       __u8                    data_units_read[16];
+       __u8                    data_units_written[16];
+       __u8                    host_reads[16];
+       __u8                    host_writes[16];
+       __u8                    ctrl_busy_time[16];
+       __u8                    power_cycles[16];
+       __u8                    power_on_hours[16];
+       __u8                    unsafe_shutdowns[16];
+       __u8                    media_errors[16];
+       __u8                    num_err_log_entries[16];
+       __le32                  warning_temp_time;
+       __le32                  critical_comp_time;
+       __le16                  temp_sensor[8];
+       __le32                  thm_temp1_trans_count;
+       __le32                  thm_temp2_trans_count;
+       __le32                  thm_temp1_total_time;
+       __le32                  thm_temp2_total_time;
+       __u8                    rsvd232[280];
+};
+
+struct nvme_self_test_res {
+       __u8                    device_self_test_status;
+       __u8                    segment_num;
+       __u8                    valid_diagnostic_info;
+       __u8                    rsvd;
+       __le64                  power_on_hours;
+       __le32                  nsid;
+       __le64                  failing_lba;
+       __u8                    status_code_type;
+       __u8                    status_code;
+       __u8                    vendor_specific[2];
+} __attribute__((packed));
+
+struct nvme_self_test_log {
+       __u8                      crnt_dev_selftest_oprn;
+       __u8                      crnt_dev_selftest_compln;
+       __u8                      rsvd[2];
+       struct nvme_self_test_res result[20];
+} __attribute__((packed));
+
+struct nvme_fw_slot_info_log {
+       __u8                    afi;
+       __u8                    rsvd1[7];
+       __le64                  frs[7];
+       __u8                    rsvd64[448];
+};
+
+/* NVMe Namespace Write Protect State */
+enum {
+       NVME_NS_NO_WRITE_PROTECT = 0,
+       NVME_NS_WRITE_PROTECT,
+       NVME_NS_WRITE_PROTECT_POWER_CYCLE,
+       NVME_NS_WRITE_PROTECT_PERMANENT,
+};
+
+#define NVME_MAX_CHANGED_NAMESPACES     1024
+
+struct nvme_changed_ns_list_log {
+       __le32                  log[NVME_MAX_CHANGED_NAMESPACES];
+};
+
+enum {
+       NVME_CMD_EFFECTS_CSUPP          = 1 << 0,
+       NVME_CMD_EFFECTS_LBCC           = 1 << 1,
+       NVME_CMD_EFFECTS_NCC            = 1 << 2,
+       NVME_CMD_EFFECTS_NIC            = 1 << 3,
+       NVME_CMD_EFFECTS_CCC            = 1 << 4,
+       NVME_CMD_EFFECTS_CSE_MASK       = 3 << 16,
+};
+
+struct nvme_effects_log {
+       __le32 acs[256];
+       __le32 iocs[256];
+       __u8   resv[2048];
+};
+
+enum nvme_ana_state {
+       NVME_ANA_OPTIMIZED              = 0x01,
+       NVME_ANA_NONOPTIMIZED           = 0x02,
+       NVME_ANA_INACCESSIBLE           = 0x03,
+       NVME_ANA_PERSISTENT_LOSS        = 0x04,
+       NVME_ANA_CHANGE                 = 0x0f,
+};
+
+struct nvme_ana_group_desc {
+       __le32  grpid;
+       __le32  nnsids;
+       __le64  chgcnt;
+       __u8    state;
+       __u8    rsvd17[15];
+       __le32  nsids[];
+};
+
+/* flag for the log specific field of the ANA log */
+#define NVME_ANA_LOG_RGO   (1 << 0)
+
+struct nvme_ana_rsp_hdr {
+       __le64  chgcnt;
+       __le16  ngrps;
+       __le16  rsvd10[3];
+};
+
+enum {
+       NVME_SMART_CRIT_SPARE           = 1 << 0,
+       NVME_SMART_CRIT_TEMPERATURE     = 1 << 1,
+       NVME_SMART_CRIT_RELIABILITY     = 1 << 2,
+       NVME_SMART_CRIT_MEDIA           = 1 << 3,
+       NVME_SMART_CRIT_VOLATILE_MEMORY = 1 << 4,
+};
+
+enum {
+       NVME_AER_ERROR                  = 0,
+       NVME_AER_SMART                  = 1,
+       NVME_AER_CSS                    = 6,
+       NVME_AER_VS                     = 7,
+       NVME_AER_NOTICE_NS_CHANGED      = 0x0002,
+       NVME_AER_NOTICE_ANA             = 0x0003,
+       NVME_AER_NOTICE_FW_ACT_STARTING = 0x0102,
+};
+
+struct nvme_lba_range_type {
+       __u8                    type;
+       __u8                    attributes;
+       __u8                    rsvd2[14];
+       __u64                   slba;
+       __u64                   nlb;
+       __u8                    guid[16];
+       __u8                    rsvd48[16];
+};
+
+enum {
+       NVME_LBART_TYPE_FS      = 0x01,
+       NVME_LBART_TYPE_RAID    = 0x02,
+       NVME_LBART_TYPE_CACHE   = 0x03,
+       NVME_LBART_TYPE_SWAP    = 0x04,
+
+       NVME_LBART_ATTRIB_TEMP  = 1 << 0,
+       NVME_LBART_ATTRIB_HIDE  = 1 << 1,
+};
+
+struct nvme_plm_config {
+       __u16   enable_event;
+       __u8    rsvd2[30];
+       __u64   dtwin_reads_thresh;
+       __u64   dtwin_writes_thresh;
+       __u64   dtwin_time_thresh;
+       __u8    rsvd56[456];
+};
+
+struct nvme_reservation_status {
+       __le32  gen;
+       __u8    rtype;
+       __u8    regctl[2];
+       __u8    resv5[2];
+       __u8    ptpls;
+       __u8    resv10[13];
+       struct {
+               __le16  cntlid;
+               __u8    rcsts;
+               __u8    resv3[5];
+               __le64  hostid;
+               __le64  rkey;
+       } regctl_ds[];
+};
+
+struct nvme_reservation_status_ext {
+       __le32  gen;
+       __u8    rtype;
+       __u8    regctl[2];
+       __u8    resv5[2];
+       __u8    ptpls;
+       __u8    resv10[14];
+       __u8    resv24[40];
+       struct {
+               __le16  cntlid;
+               __u8    rcsts;
+               __u8    resv3[5];
+               __le64  rkey;
+               __u8    hostid[16];
+               __u8    resv32[32];
+       } regctl_eds[];
+};
+
+enum nvme_async_event_type {
+       NVME_AER_TYPE_ERROR     = 0,
+       NVME_AER_TYPE_SMART     = 1,
+       NVME_AER_TYPE_NOTICE    = 2,
+};
+
+/* I/O commands */
+
+enum nvme_opcode {
+       nvme_cmd_flush          = 0x00,
+       nvme_cmd_write          = 0x01,
+       nvme_cmd_read           = 0x02,
+       nvme_cmd_write_uncor    = 0x04,
+       nvme_cmd_compare        = 0x05,
+       nvme_cmd_write_zeroes   = 0x08,
+       nvme_cmd_dsm            = 0x09,
+       nvme_cmd_resv_register  = 0x0d,
+       nvme_cmd_resv_report    = 0x0e,
+       nvme_cmd_resv_acquire   = 0x11,
+       nvme_cmd_resv_release   = 0x15,
+};
+
+/*
+ * Descriptor subtype - lower 4 bits of nvme_(keyed_)sgl_desc identifier
+ *
+ * @NVME_SGL_FMT_ADDRESS:     absolute address of the data block
+ * @NVME_SGL_FMT_OFFSET:      relative offset of the in-capsule data block
+ * @NVME_SGL_FMT_TRANSPORT_A: transport defined format, value 0xA
+ * @NVME_SGL_FMT_INVALIDATE:  RDMA transport specific remote invalidation
+ *                            request subtype
+ */
+enum {
+       NVME_SGL_FMT_ADDRESS            = 0x00,
+       NVME_SGL_FMT_OFFSET             = 0x01,
+       NVME_SGL_FMT_TRANSPORT_A        = 0x0A,
+       NVME_SGL_FMT_INVALIDATE         = 0x0f,
+};
+
+/*
+ * Descriptor type - upper 4 bits of nvme_(keyed_)sgl_desc identifier
+ *
+ * For struct nvme_sgl_desc:
+ *   @NVME_SGL_FMT_DATA_DESC:          data block descriptor
+ *   @NVME_SGL_FMT_SEG_DESC:           sgl segment descriptor
+ *   @NVME_SGL_FMT_LAST_SEG_DESC:      last sgl segment descriptor
+ *
+ * For struct nvme_keyed_sgl_desc:
+ *   @NVME_KEY_SGL_FMT_DATA_DESC:      keyed data block descriptor
+ *
+ * Transport-specific SGL types:
+ *   @NVME_TRANSPORT_SGL_DATA_DESC:    Transport SGL data dlock descriptor
+ */
+enum {
+       NVME_SGL_FMT_DATA_DESC          = 0x00,
+       NVME_SGL_FMT_SEG_DESC           = 0x02,
+       NVME_SGL_FMT_LAST_SEG_DESC      = 0x03,
+       NVME_KEY_SGL_FMT_DATA_DESC      = 0x04,
+       NVME_TRANSPORT_SGL_DATA_DESC    = 0x05,
+};
+
+struct nvme_sgl_desc {
+       __le64  addr;
+       __le32  length;
+       __u8    rsvd[3];
+       __u8    type;
+};
+
+struct nvme_keyed_sgl_desc {
+       __le64  addr;
+       __u8    length[3];
+       __u8    key[4];
+       __u8    type;
+};
+
+union nvme_data_ptr {
+       struct {
+               __le64  prp1;
+               __le64  prp2;
+       };
+       struct nvme_sgl_desc    sgl;
+       struct nvme_keyed_sgl_desc ksgl;
+};
+
+/*
+ * Lowest two bits of our flags field (FUSE field in the spec):
+ *
+ * @NVME_CMD_FUSE_FIRST:   Fused Operation, first command
+ * @NVME_CMD_FUSE_SECOND:  Fused Operation, second command
+ *
+ * Highest two bits in our flags field (PSDT field in the spec):
+ *
+ * @NVME_CMD_PSDT_SGL_METABUF: Use SGLS for this transfer,
+ *     If used, MPTR contains addr of single physical buffer (byte aligned).
+ * @NVME_CMD_PSDT_SGL_METASEG: Use SGLS for this transfer,
+ *     If used, MPTR contains an address of an SGL segment containing
+ *     exactly 1 SGL descriptor (qword aligned).
+ */
+enum {
+       NVME_CMD_FUSE_FIRST     = (1 << 0),
+       NVME_CMD_FUSE_SECOND    = (1 << 1),
+
+       NVME_CMD_SGL_METABUF    = (1 << 6),
+       NVME_CMD_SGL_METASEG    = (1 << 7),
+       NVME_CMD_SGL_ALL        = NVME_CMD_SGL_METABUF | NVME_CMD_SGL_METASEG,
+};
+
+struct nvme_common_command {
+       __u8                    opcode;
+       __u8                    flags;
+       __u16                   command_id;
+       __le32                  nsid;
+       __le32                  cdw2[2];
+       __le64                  metadata;
+       union nvme_data_ptr     dptr;
+       __le32                  cdw10[6];
+};
+
+struct nvme_rw_command {
+       __u8                    opcode;
+       __u8                    flags;
+       __u16                   command_id;
+       __le32                  nsid;
+       __u64                   rsvd2;
+       __le64                  metadata;
+       union nvme_data_ptr     dptr;
+       __le64                  slba;
+       __le16                  length;
+       __le16                  control;
+       __le32                  dsmgmt;
+       __le32                  reftag;
+       __le16                  apptag;
+       __le16                  appmask;
+};
+
+enum {
+       NVME_RW_LR                      = 1 << 15,
+       NVME_RW_FUA                     = 1 << 14,
+       NVME_RW_DEAC                    = 1 << 9,
+       NVME_RW_DSM_FREQ_UNSPEC         = 0,
+       NVME_RW_DSM_FREQ_TYPICAL        = 1,
+       NVME_RW_DSM_FREQ_RARE           = 2,
+       NVME_RW_DSM_FREQ_READS          = 3,
+       NVME_RW_DSM_FREQ_WRITES         = 4,
+       NVME_RW_DSM_FREQ_RW             = 5,
+       NVME_RW_DSM_FREQ_ONCE           = 6,
+       NVME_RW_DSM_FREQ_PREFETCH       = 7,
+       NVME_RW_DSM_FREQ_TEMP           = 8,
+       NVME_RW_DSM_LATENCY_NONE        = 0 << 4,
+       NVME_RW_DSM_LATENCY_IDLE        = 1 << 4,
+       NVME_RW_DSM_LATENCY_NORM        = 2 << 4,
+       NVME_RW_DSM_LATENCY_LOW         = 3 << 4,
+       NVME_RW_DSM_SEQ_REQ             = 1 << 6,
+       NVME_RW_DSM_COMPRESSED          = 1 << 7,
+       NVME_RW_PRINFO_PRCHK_REF        = 1 << 10,
+       NVME_RW_PRINFO_PRCHK_APP        = 1 << 11,
+       NVME_RW_PRINFO_PRCHK_GUARD      = 1 << 12,
+       NVME_RW_PRINFO_PRACT            = 1 << 13,
+       NVME_RW_DTYPE_STREAMS           = 1 << 4,
+};
+
+struct nvme_dsm_cmd {
+       __u8                    opcode;
+       __u8                    flags;
+       __u16                   command_id;
+       __le32                  nsid;
+       __u64                   rsvd2[2];
+       union nvme_data_ptr     dptr;
+       __le32                  nr;
+       __le32                  attributes;
+       __u32                   rsvd12[4];
+};
+
+enum {
+       NVME_DSMGMT_IDR         = 1 << 0,
+       NVME_DSMGMT_IDW         = 1 << 1,
+       NVME_DSMGMT_AD          = 1 << 2,
+};
+
+#define NVME_DSM_MAX_RANGES    256
+
+struct nvme_dsm_range {
+       __le32                  cattr;
+       __le32                  nlb;
+       __le64                  slba;
+};
+
+struct nvme_write_zeroes_cmd {
+       __u8                    opcode;
+       __u8                    flags;
+       __u16                   command_id;
+       __le32                  nsid;
+       __u64                   rsvd2;
+       __le64                  metadata;
+       union nvme_data_ptr     dptr;
+       __le64                  slba;
+       __le16                  length;
+       __le16                  control;
+       __le32                  dsmgmt;
+       __le32                  reftag;
+       __le16                  apptag;
+       __le16                  appmask;
+};
+
+/* Features */
+
+struct nvme_feat_auto_pst {
+       __le64 entries[32];
+};
+
+enum {
+       NVME_HOST_MEM_ENABLE    = (1 << 0),
+       NVME_HOST_MEM_RETURN    = (1 << 1),
+};
+
+/* Admin commands */
+
+enum nvme_admin_opcode {
+       nvme_admin_delete_sq            = 0x00,
+       nvme_admin_create_sq            = 0x01,
+       nvme_admin_get_log_page         = 0x02,
+       nvme_admin_delete_cq            = 0x04,
+       nvme_admin_create_cq            = 0x05,
+       nvme_admin_identify             = 0x06,
+       nvme_admin_abort_cmd            = 0x08,
+       nvme_admin_set_features         = 0x09,
+       nvme_admin_get_features         = 0x0a,
+       nvme_admin_async_event          = 0x0c,
+       nvme_admin_ns_mgmt              = 0x0d,
+       nvme_admin_activate_fw          = 0x10,
+       nvme_admin_download_fw          = 0x11,
+       nvme_admin_dev_self_test        = 0x14,
+       nvme_admin_ns_attach            = 0x15,
+       nvme_admin_keep_alive           = 0x18,
+       nvme_admin_directive_send       = 0x19,
+       nvme_admin_directive_recv       = 0x1a,
+       nvme_admin_virtual_mgmt         = 0x1c,
+       nvme_admin_nvme_mi_send         = 0x1d,
+       nvme_admin_nvme_mi_recv         = 0x1e,
+       nvme_admin_dbbuf                = 0x7C,
+       nvme_admin_format_nvm           = 0x80,
+       nvme_admin_security_send        = 0x81,
+       nvme_admin_security_recv        = 0x82,
+       nvme_admin_sanitize_nvm         = 0x84,
+};
+
+enum {
+       NVME_QUEUE_PHYS_CONTIG  = (1 << 0),
+       NVME_CQ_IRQ_ENABLED     = (1 << 1),
+       NVME_SQ_PRIO_URGENT     = (0 << 1),
+       NVME_SQ_PRIO_HIGH       = (1 << 1),
+       NVME_SQ_PRIO_MEDIUM     = (2 << 1),
+       NVME_SQ_PRIO_LOW        = (3 << 1),
+       NVME_FEAT_ARBITRATION   = 0x01,
+       NVME_FEAT_POWER_MGMT    = 0x02,
+       NVME_FEAT_LBA_RANGE     = 0x03,
+       NVME_FEAT_TEMP_THRESH   = 0x04,
+       NVME_FEAT_ERR_RECOVERY  = 0x05,
+       NVME_FEAT_VOLATILE_WC   = 0x06,
+       NVME_FEAT_NUM_QUEUES    = 0x07,
+       NVME_FEAT_IRQ_COALESCE  = 0x08,
+       NVME_FEAT_IRQ_CONFIG    = 0x09,
+       NVME_FEAT_WRITE_ATOMIC  = 0x0a,
+       NVME_FEAT_ASYNC_EVENT   = 0x0b,
+       NVME_FEAT_AUTO_PST      = 0x0c,
+       NVME_FEAT_HOST_MEM_BUF  = 0x0d,
+       NVME_FEAT_TIMESTAMP     = 0x0e,
+       NVME_FEAT_KATO          = 0x0f,
+       NVME_FEAT_HCTM          = 0X10,
+       NVME_FEAT_NOPSC         = 0X11,
+       NVME_FEAT_RRL           = 0x12,
+       NVME_FEAT_PLM_CONFIG    = 0x13,
+       NVME_FEAT_PLM_WINDOW    = 0x14,
+       NVME_FEAT_SW_PROGRESS   = 0x80,
+       NVME_FEAT_HOST_ID       = 0x81,
+       NVME_FEAT_RESV_MASK     = 0x82,
+       NVME_FEAT_RESV_PERSIST  = 0x83,
+       NVME_FEAT_WRITE_PROTECT = 0x84,
+       NVME_LOG_ERROR          = 0x01,
+       NVME_LOG_SMART          = 0x02,
+       NVME_LOG_FW_SLOT        = 0x03,
+       NVME_LOG_CHANGED_NS     = 0x04,
+       NVME_LOG_CMD_EFFECTS    = 0x05,
+       NVME_LOG_DEVICE_SELF_TEST = 0x06,
+       NVME_LOG_TELEMETRY_HOST = 0x07,
+       NVME_LOG_TELEMETRY_CTRL = 0x08,
+       NVME_LOG_ENDURANCE_GROUP = 0x09,
+       NVME_LOG_ANA            = 0x0c,
+       NVME_LOG_DISC           = 0x70,
+       NVME_LOG_RESERVATION    = 0x80,
+       NVME_LOG_SANITIZE       = 0x81,
+       NVME_FWACT_REPL         = (0 << 3),
+       NVME_FWACT_REPL_ACTV    = (1 << 3),
+       NVME_FWACT_ACTV         = (2 << 3),
+};
+
+enum {
+       NVME_NO_LOG_LSP       = 0x0,
+       NVME_NO_LOG_LPO       = 0x0,
+       NVME_LOG_ANA_LSP_RGO  = 0x1,
+       NVME_TELEM_LSP_CREATE = 0x1,
+};
+
+/* Sanitize and Sanitize Monitor/Log */
+enum {
+       /* Sanitize */
+       NVME_SANITIZE_NO_DEALLOC        = 0x00000200,
+       NVME_SANITIZE_OIPBP             = 0x00000100,
+       NVME_SANITIZE_OWPASS_SHIFT      = 0x00000004,
+       NVME_SANITIZE_AUSE              = 0x00000008,
+       NVME_SANITIZE_ACT_CRYPTO_ERASE  = 0x00000004,
+       NVME_SANITIZE_ACT_OVERWRITE     = 0x00000003,
+       NVME_SANITIZE_ACT_BLOCK_ERASE   = 0x00000002,
+       NVME_SANITIZE_ACT_EXIT          = 0x00000001,
+
+       /* Sanitize Monitor/Log */
+       NVME_SANITIZE_LOG_DATA_LEN              = 0x0014,
+       NVME_SANITIZE_LOG_GLOBAL_DATA_ERASED    = 0x0100,
+       NVME_SANITIZE_LOG_NUM_CMPLTED_PASS_MASK = 0x00F8,
+       NVME_SANITIZE_LOG_STATUS_MASK           = 0x0007,
+       NVME_SANITIZE_LOG_NEVER_SANITIZED       = 0x0000,
+       NVME_SANITIZE_LOG_COMPLETED_SUCCESS     = 0x0001,
+       NVME_SANITIZE_LOG_IN_PROGESS            = 0x0002,
+       NVME_SANITIZE_LOG_COMPLETED_FAILED      = 0x0003,
+};
+
+enum {
+       /* Self-test log Validation bits */
+       NVME_SELF_TEST_VALID_NSID       = 1 << 0,
+       NVME_SELF_TEST_VALID_FLBA       = 1 << 1,
+       NVME_SELF_TEST_VALID_SCT        = 1 << 2,
+       NVME_SELF_TEST_VALID_SC         = 1 << 3,
+       NVME_SELF_TEST_REPORTS          = 20,
+};
+
+struct nvme_identify {
+       __u8                    opcode;
+       __u8                    flags;
+       __u16                   command_id;
+       __le32                  nsid;
+       __u64                   rsvd2[2];
+       union nvme_data_ptr     dptr;
+       __u8                    cns;
+       __u8                    rsvd3;
+       __le16                  ctrlid;
+       __u32                   rsvd11[5];
+};
+
+#define NVME_IDENTIFY_DATA_SIZE 4096
+
+struct nvme_features {
+       __u8                    opcode;
+       __u8                    flags;
+       __u16                   command_id;
+       __le32                  nsid;
+       __u64                   rsvd2[2];
+       union nvme_data_ptr     dptr;
+       __le32                  fid;
+       __le32                  dword11;
+       __le32                  dword12;
+       __le32                  dword13;
+       __le32                  dword14;
+       __le32                  dword15;
+};
+
+struct nvme_host_mem_buf_desc {
+       __le64                  addr;
+       __le32                  size;
+       __u32                   rsvd;
+};
+
+struct nvme_create_cq {
+       __u8                    opcode;
+       __u8                    flags;
+       __u16                   command_id;
+       __u32                   rsvd1[5];
+       __le64                  prp1;
+       __u64                   rsvd8;
+       __le16                  cqid;
+       __le16                  qsize;
+       __le16                  cq_flags;
+       __le16                  irq_vector;
+       __u32                   rsvd12[4];
+};
+
+struct nvme_create_sq {
+       __u8                    opcode;
+       __u8                    flags;
+       __u16                   command_id;
+       __u32                   rsvd1[5];
+       __le64                  prp1;
+       __u64                   rsvd8;
+       __le16                  sqid;
+       __le16                  qsize;
+       __le16                  sq_flags;
+       __le16                  cqid;
+       __u32                   rsvd12[4];
+};
+
+struct nvme_delete_queue {
+       __u8                    opcode;
+       __u8                    flags;
+       __u16                   command_id;
+       __u32                   rsvd1[9];
+       __le16                  qid;
+       __u16                   rsvd10;
+       __u32                   rsvd11[5];
+};
+
+struct nvme_abort_cmd {
+       __u8                    opcode;
+       __u8                    flags;
+       __u16                   command_id;
+       __u32                   rsvd1[9];
+       __le16                  sqid;
+       __u16                   cid;
+       __u32                   rsvd11[5];
+};
+
+struct nvme_download_firmware {
+       __u8                    opcode;
+       __u8                    flags;
+       __u16                   command_id;
+       __u32                   rsvd1[5];
+       union nvme_data_ptr     dptr;
+       __le32                  numd;
+       __le32                  offset;
+       __u32                   rsvd12[4];
+};
+
+struct nvme_format_cmd {
+       __u8                    opcode;
+       __u8                    flags;
+       __u16                   command_id;
+       __le32                  nsid;
+       __u64                   rsvd2[4];
+       __le32                  cdw10;
+       __u32                   rsvd11[5];
+};
+
+struct nvme_get_log_page_command {
+       __u8                    opcode;
+       __u8                    flags;
+       __u16                   command_id;
+       __le32                  nsid;
+       __u64                   rsvd2[2];
+       union nvme_data_ptr     dptr;
+       __u8                    lid;
+       __u8                    lsp;
+       __le16                  numdl;
+       __le16                  numdu;
+       __u16                   rsvd11;
+       __le32                  lpol;
+       __le32                  lpou;
+       __u32                   rsvd14[2];
+};
+
+struct nvme_directive_cmd {
+       __u8                    opcode;
+       __u8                    flags;
+       __u16                   command_id;
+       __le32                  nsid;
+       __u64                   rsvd2[2];
+       union nvme_data_ptr     dptr;
+       __le32                  numd;
+       __u8                    doper;
+       __u8                    dtype;
+       __le16                  dspec;
+       __u8                    endir;
+       __u8                    tdtype;
+       __u16                   rsvd15;
+
+       __u32                   rsvd16[3];
+};
+
+/* Sanitize Log Page */
+struct nvme_sanitize_log_page {
+       __le16                  progress;
+       __le16                  status;
+       __le32                  cdw10_info;
+       __le32                  est_ovrwrt_time;
+       __le32                  est_blk_erase_time;
+       __le32                  est_crypto_erase_time;
+};
+
+/*
+ * Fabrics subcommands.
+ */
+enum nvmf_fabrics_opcode {
+       nvme_fabrics_command            = 0x7f,
+};
+
+enum nvmf_capsule_command {
+       nvme_fabrics_type_property_set  = 0x00,
+       nvme_fabrics_type_connect       = 0x01,
+       nvme_fabrics_type_property_get  = 0x04,
+};
+
+struct nvmf_common_command {
+       __u8    opcode;
+       __u8    resv1;
+       __u16   command_id;
+       __u8    fctype;
+       __u8    resv2[35];
+       __u8    ts[24];
+};
+
+/*
+ * The legal cntlid range a NVMe Target will provide.
+ * Note that cntlid of value 0 is considered illegal in the fabrics world.
+ * Devices based on earlier specs did not have the subsystem concept;
+ * therefore, those devices had their cntlid value set to 0 as a result.
+ */
+#define NVME_CNTLID_MIN                1
+#define NVME_CNTLID_MAX                0xffef
+#define NVME_CNTLID_DYNAMIC    0xffff
+
+#define MAX_DISC_LOGS  255
+
+/* Discovery log page entry */
+struct nvmf_disc_rsp_page_entry {
+       __u8            trtype;
+       __u8            adrfam;
+       __u8            subtype;
+       __u8            treq;
+       __le16          portid;
+       __le16          cntlid;
+       __le16          asqsz;
+       __u8            resv8[22];
+       char            trsvcid[NVMF_TRSVCID_SIZE];
+       __u8            resv64[192];
+       char            subnqn[NVMF_NQN_FIELD_LEN];
+       char            traddr[NVMF_TRADDR_SIZE];
+       union tsas {
+               char            common[NVMF_TSAS_SIZE];
+               struct rdma {
+                       __u8    qptype;
+                       __u8    prtype;
+                       __u8    cms;
+                       __u8    resv3[5];
+                       __u16   pkey;
+                       __u8    resv10[246];
+               } rdma;
+               struct tcp {
+                       __u8    sectype;
+               } tcp;
+       } tsas;
+};
+
+/* Discovery log page header */
+struct nvmf_disc_rsp_page_hdr {
+       __le64          genctr;
+       __le64          numrec;
+       __le16          recfmt;
+       __u8            resv14[1006];
+       struct nvmf_disc_rsp_page_entry entries[0];
+};
+
+struct nvmf_connect_command {
+       __u8            opcode;
+       __u8            resv1;
+       __u16           command_id;
+       __u8            fctype;
+       __u8            resv2[19];
+       union nvme_data_ptr dptr;
+       __le16          recfmt;
+       __le16          qid;
+       __le16          sqsize;
+       __u8            cattr;
+       __u8            resv3;
+       __le32          kato;
+       __u8            resv4[12];
+};
+
+struct nvmf_connect_data {
+       uuid_t          hostid;
+       __le16          cntlid;
+       char            resv4[238];
+       char            subsysnqn[NVMF_NQN_FIELD_LEN];
+       char            hostnqn[NVMF_NQN_FIELD_LEN];
+       char            resv5[256];
+};
+
+struct nvmf_property_set_command {
+       __u8            opcode;
+       __u8            resv1;
+       __u16           command_id;
+       __u8            fctype;
+       __u8            resv2[35];
+       __u8            attrib;
+       __u8            resv3[3];
+       __le32          offset;
+       __le64          value;
+       __u8            resv4[8];
+};
+
+struct nvmf_property_get_command {
+       __u8            opcode;
+       __u8            resv1;
+       __u16           command_id;
+       __u8            fctype;
+       __u8            resv2[35];
+       __u8            attrib;
+       __u8            resv3[3];
+       __le32          offset;
+       __u8            resv4[16];
+};
+
+struct nvme_dbbuf {
+       __u8                    opcode;
+       __u8                    flags;
+       __u16                   command_id;
+       __u32                   rsvd1[5];
+       __le64                  prp1;
+       __le64                  prp2;
+       __u32                   rsvd12[6];
+};
+
+struct streams_directive_params {
+       __le16  msl;
+       __le16  nssa;
+       __le16  nsso;
+       __u8    rsvd[10];
+       __le32  sws;
+       __le16  sgs;
+       __le16  nsa;
+       __le16  nso;
+       __u8    rsvd2[6];
+};
+
+struct nvme_command {
+       union {
+               struct nvme_common_command common;
+               struct nvme_rw_command rw;
+               struct nvme_identify identify;
+               struct nvme_features features;
+               struct nvme_create_cq create_cq;
+               struct nvme_create_sq create_sq;
+               struct nvme_delete_queue delete_queue;
+               struct nvme_download_firmware dlfw;
+               struct nvme_format_cmd format;
+               struct nvme_dsm_cmd dsm;
+               struct nvme_write_zeroes_cmd write_zeroes;
+               struct nvme_abort_cmd abort;
+               struct nvme_get_log_page_command get_log_page;
+               struct nvmf_common_command fabrics;
+               struct nvmf_connect_command connect;
+               struct nvmf_property_set_command prop_set;
+               struct nvmf_property_get_command prop_get;
+               struct nvme_dbbuf dbbuf;
+               struct nvme_directive_cmd directive;
+       };
+};
+
+static inline bool nvme_is_write(struct nvme_command *cmd)
+{
+       /*
+        * What a mess...
+        *
+        * Why can't we simply have a Fabrics In and Fabrics out command?
+        */
+       if (unlikely(cmd->common.opcode == nvme_fabrics_command))
+               return cmd->fabrics.fctype & 1;
+       return cmd->common.opcode & 1;
+}
+
+enum {
+       /*
+        * Generic Command Status:
+        */
+       NVME_SC_SUCCESS                 = 0x0,
+       NVME_SC_INVALID_OPCODE          = 0x1,
+       NVME_SC_INVALID_FIELD           = 0x2,
+       NVME_SC_CMDID_CONFLICT          = 0x3,
+       NVME_SC_DATA_XFER_ERROR         = 0x4,
+       NVME_SC_POWER_LOSS              = 0x5,
+       NVME_SC_INTERNAL                = 0x6,
+       NVME_SC_ABORT_REQ               = 0x7,
+       NVME_SC_ABORT_QUEUE             = 0x8,
+       NVME_SC_FUSED_FAIL              = 0x9,
+       NVME_SC_FUSED_MISSING           = 0xa,
+       NVME_SC_INVALID_NS              = 0xb,
+       NVME_SC_CMD_SEQ_ERROR           = 0xc,
+       NVME_SC_SGL_INVALID_LAST        = 0xd,
+       NVME_SC_SGL_INVALID_COUNT       = 0xe,
+       NVME_SC_SGL_INVALID_DATA        = 0xf,
+       NVME_SC_SGL_INVALID_METADATA    = 0x10,
+       NVME_SC_SGL_INVALID_TYPE        = 0x11,
+
+       NVME_SC_SGL_INVALID_OFFSET      = 0x16,
+       NVME_SC_SGL_INVALID_SUBTYPE     = 0x17,
+
+       NVME_SC_SANITIZE_FAILED         = 0x1C,
+       NVME_SC_SANITIZE_IN_PROGRESS    = 0x1D,
+
+       NVME_SC_NS_WRITE_PROTECTED      = 0x20,
+
+       NVME_SC_LBA_RANGE               = 0x80,
+       NVME_SC_CAP_EXCEEDED            = 0x81,
+       NVME_SC_NS_NOT_READY            = 0x82,
+       NVME_SC_RESERVATION_CONFLICT    = 0x83,
+
+       /*
+        * Command Specific Status:
+        */
+       NVME_SC_CQ_INVALID              = 0x100,
+       NVME_SC_QID_INVALID             = 0x101,
+       NVME_SC_QUEUE_SIZE              = 0x102,
+       NVME_SC_ABORT_LIMIT             = 0x103,
+       NVME_SC_ABORT_MISSING           = 0x104,
+       NVME_SC_ASYNC_LIMIT             = 0x105,
+       NVME_SC_FIRMWARE_SLOT           = 0x106,
+       NVME_SC_FIRMWARE_IMAGE          = 0x107,
+       NVME_SC_INVALID_VECTOR          = 0x108,
+       NVME_SC_INVALID_LOG_PAGE        = 0x109,
+       NVME_SC_INVALID_FORMAT          = 0x10a,
+       NVME_SC_FW_NEEDS_CONV_RESET     = 0x10b,
+       NVME_SC_INVALID_QUEUE           = 0x10c,
+       NVME_SC_FEATURE_NOT_SAVEABLE    = 0x10d,
+       NVME_SC_FEATURE_NOT_CHANGEABLE  = 0x10e,
+       NVME_SC_FEATURE_NOT_PER_NS      = 0x10f,
+       NVME_SC_FW_NEEDS_SUBSYS_RESET   = 0x110,
+       NVME_SC_FW_NEEDS_RESET          = 0x111,
+       NVME_SC_FW_NEEDS_MAX_TIME       = 0x112,
+       NVME_SC_FW_ACIVATE_PROHIBITED   = 0x113,
+       NVME_SC_OVERLAPPING_RANGE       = 0x114,
+       NVME_SC_NS_INSUFFICENT_CAP      = 0x115,
+       NVME_SC_NS_ID_UNAVAILABLE       = 0x116,
+       NVME_SC_NS_ALREADY_ATTACHED     = 0x118,
+       NVME_SC_NS_IS_PRIVATE           = 0x119,
+       NVME_SC_NS_NOT_ATTACHED         = 0x11a,
+       NVME_SC_THIN_PROV_NOT_SUPP      = 0x11b,
+       NVME_SC_CTRL_LIST_INVALID       = 0x11c,
+       NVME_SC_BP_WRITE_PROHIBITED     = 0x11e,
+
+       /*
+        * I/O Command Set Specific - NVM commands:
+        */
+       NVME_SC_BAD_ATTRIBUTES          = 0x180,
+       NVME_SC_INVALID_PI              = 0x181,
+       NVME_SC_READ_ONLY               = 0x182,
+       NVME_SC_ONCS_NOT_SUPPORTED      = 0x183,
+
+       /*
+        * I/O Command Set Specific - Fabrics commands:
+        */
+       NVME_SC_CONNECT_FORMAT          = 0x180,
+       NVME_SC_CONNECT_CTRL_BUSY       = 0x181,
+       NVME_SC_CONNECT_INVALID_PARAM   = 0x182,
+       NVME_SC_CONNECT_RESTART_DISC    = 0x183,
+       NVME_SC_CONNECT_INVALID_HOST    = 0x184,
+
+       NVME_SC_DISCOVERY_RESTART       = 0x190,
+       NVME_SC_AUTH_REQUIRED           = 0x191,
+
+       /*
+        * Media and Data Integrity Errors:
+        */
+       NVME_SC_WRITE_FAULT             = 0x280,
+       NVME_SC_READ_ERROR              = 0x281,
+       NVME_SC_GUARD_CHECK             = 0x282,
+       NVME_SC_APPTAG_CHECK            = 0x283,
+       NVME_SC_REFTAG_CHECK            = 0x284,
+       NVME_SC_COMPARE_FAILED          = 0x285,
+       NVME_SC_ACCESS_DENIED           = 0x286,
+       NVME_SC_UNWRITTEN_BLOCK         = 0x287,
+
+       /*
+        * Path-related Errors:
+        */
+       NVME_SC_ANA_PERSISTENT_LOSS     = 0x301,
+       NVME_SC_ANA_INACCESSIBLE        = 0x302,
+       NVME_SC_ANA_TRANSITION          = 0x303,
+
+       NVME_SC_DNR                     = 0x4000,
+};
+
+struct nvme_completion {
+       /*
+        * Used by Admin and Fabrics commands to return data:
+        */
+       union nvme_result {
+               __le16  u16;
+               __le32  u32;
+               __le64  u64;
+       } result;
+       __le16  sq_head;        /* how much of this queue may be reclaimed */
+       __le16  sq_id;          /* submission queue that generated this entry */
+       __u16   command_id;     /* of the command which completed */
+       __le16  status;         /* did the command fail, and if so, why? */
+};
+
+#define NVME_VS(major, minor, tertiary) \
+       (((major) << 16) | ((minor) << 8) | (tertiary))
+
+#define NVME_MAJOR(ver)                ((ver) >> 16)
+#define NVME_MINOR(ver)                (((ver) >> 8) & 0xff)
+#define NVME_TERTIARY(ver)     ((ver) & 0xff)
+
+#endif /* _LINUX_NVME_H */
diff --git a/libmultipath/nvme/linux/nvme_ioctl.h b/libmultipath/nvme/linux/nvme_ioctl.h
new file mode 100644 (file)
index 0000000..d25a532
--- /dev/null
@@ -0,0 +1,67 @@
+/*
+ * Definitions for the NVM Express ioctl interface
+ * Copyright (c) 2011-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _UAPI_LINUX_NVME_IOCTL_H
+#define _UAPI_LINUX_NVME_IOCTL_H
+
+#include <linux/types.h>
+#include <sys/ioctl.h>
+
+struct nvme_user_io {
+       __u8    opcode;
+       __u8    flags;
+       __u16   control;
+       __u16   nblocks;
+       __u16   rsvd;
+       __u64   metadata;
+       __u64   addr;
+       __u64   slba;
+       __u32   dsmgmt;
+       __u32   reftag;
+       __u16   apptag;
+       __u16   appmask;
+};
+
+struct nvme_passthru_cmd {
+       __u8    opcode;
+       __u8    flags;
+       __u16   rsvd1;
+       __u32   nsid;
+       __u32   cdw2;
+       __u32   cdw3;
+       __u64   metadata;
+       __u64   addr;
+       __u32   metadata_len;
+       __u32   data_len;
+       __u32   cdw10;
+       __u32   cdw11;
+       __u32   cdw12;
+       __u32   cdw13;
+       __u32   cdw14;
+       __u32   cdw15;
+       __u32   timeout_ms;
+       __u32   result;
+};
+
+#define nvme_admin_cmd nvme_passthru_cmd
+
+#define NVME_IOCTL_ID          _IO('N', 0x40)
+#define NVME_IOCTL_ADMIN_CMD   _IOWR('N', 0x41, struct nvme_admin_cmd)
+#define NVME_IOCTL_SUBMIT_IO   _IOW('N', 0x42, struct nvme_user_io)
+#define NVME_IOCTL_IO_CMD      _IOWR('N', 0x43, struct nvme_passthru_cmd)
+#define NVME_IOCTL_RESET       _IO('N', 0x44)
+#define NVME_IOCTL_SUBSYS_RESET        _IO('N', 0x45)
+#define NVME_IOCTL_RESCAN      _IO('N', 0x46)
+
+#endif /* _UAPI_LINUX_NVME_IOCTL_H */
diff --git a/libmultipath/nvme/nvme-ioctl.c b/libmultipath/nvme/nvme-ioctl.c
new file mode 100644 (file)
index 0000000..70a16ce
--- /dev/null
@@ -0,0 +1,869 @@
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include <errno.h>
+#include <getopt.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <math.h>
+
+#include "nvme-ioctl.h"
+
+static int nvme_verify_chr(int fd)
+{
+       static struct stat nvme_stat;
+       int err = fstat(fd, &nvme_stat);
+
+       if (err < 0) {
+               perror("fstat");
+               return errno;
+       }
+       if (!S_ISCHR(nvme_stat.st_mode)) {
+               fprintf(stderr,
+                       "Error: requesting reset on non-controller handle\n");
+               return ENOTBLK;
+       }
+       return 0;
+}
+
+int nvme_subsystem_reset(int fd)
+{
+       int ret;
+
+       ret = nvme_verify_chr(fd);
+       if (ret)
+               return ret;
+       return ioctl(fd, NVME_IOCTL_SUBSYS_RESET);
+}
+
+int nvme_reset_controller(int fd)
+{
+       int ret;
+
+       ret = nvme_verify_chr(fd);
+       if (ret)
+               return ret;
+       return ioctl(fd, NVME_IOCTL_RESET);
+}
+
+int nvme_ns_rescan(int fd)
+{
+       int ret;
+
+       ret = nvme_verify_chr(fd);
+       if (ret)
+               return ret;
+       return ioctl(fd, NVME_IOCTL_RESCAN);
+}
+
+int nvme_get_nsid(int fd)
+{
+       static struct stat nvme_stat;
+       int err = fstat(fd, &nvme_stat);
+
+       if (err < 0)
+               return -errno;
+
+       if (!S_ISBLK(nvme_stat.st_mode)) {
+               fprintf(stderr,
+                       "Error: requesting namespace-id from non-block device\n");
+               errno = ENOTBLK;
+               return -errno;
+       }
+       return ioctl(fd, NVME_IOCTL_ID);
+}
+
+int nvme_submit_passthru(int fd, unsigned long ioctl_cmd,
+                        struct nvme_passthru_cmd *cmd)
+{
+       return ioctl(fd, ioctl_cmd, cmd);
+}
+
+static int nvme_submit_admin_passthru(int fd, struct nvme_passthru_cmd *cmd)
+{
+       return ioctl(fd, NVME_IOCTL_ADMIN_CMD, cmd);
+}
+
+static int nvme_submit_io_passthru(int fd, struct nvme_passthru_cmd *cmd)
+{
+       return ioctl(fd, NVME_IOCTL_IO_CMD, cmd);
+}
+
+int nvme_passthru(int fd, unsigned long ioctl_cmd, __u8 opcode,
+                 __u8 flags, __u16 rsvd,
+                 __u32 nsid, __u32 cdw2, __u32 cdw3, __u32 cdw10, __u32 cdw11,
+                 __u32 cdw12, __u32 cdw13, __u32 cdw14, __u32 cdw15,
+                 __u32 data_len, void *data, __u32 metadata_len,
+                 void *metadata, __u32 timeout_ms, __u32 *result)
+{
+       struct nvme_passthru_cmd cmd = {
+               .opcode         = opcode,
+               .flags          = flags,
+               .rsvd1          = rsvd,
+               .nsid           = nsid,
+               .cdw2           = cdw2,
+               .cdw3           = cdw3,
+               .metadata       = (__u64)(uintptr_t) metadata,
+               .addr           = (__u64)(uintptr_t) data,
+               .metadata_len   = metadata_len,
+               .data_len       = data_len,
+               .cdw10          = cdw10,
+               .cdw11          = cdw11,
+               .cdw12          = cdw12,
+               .cdw13          = cdw13,
+               .cdw14          = cdw14,
+               .cdw15          = cdw15,
+               .timeout_ms     = timeout_ms,
+               .result         = 0,
+       };
+       int err;
+
+       err = nvme_submit_passthru(fd, ioctl_cmd, &cmd);
+       if (!err && result)
+               *result = cmd.result;
+       return err;
+}
+
+int nvme_io(int fd, __u8 opcode, __u64 slba, __u16 nblocks, __u16 control,
+           __u32 dsmgmt, __u32 reftag, __u16 apptag, __u16 appmask, void *data,
+           void *metadata)
+{
+       struct nvme_user_io io = {
+               .opcode         = opcode,
+               .flags          = 0,
+               .control        = control,
+               .nblocks        = nblocks,
+               .rsvd           = 0,
+               .metadata       = (__u64)(uintptr_t) metadata,
+               .addr           = (__u64)(uintptr_t) data,
+               .slba           = slba,
+               .dsmgmt         = dsmgmt,
+               .reftag         = reftag,
+               .appmask        = appmask,
+               .apptag         = apptag,
+       };
+       return ioctl(fd, NVME_IOCTL_SUBMIT_IO, &io);
+}
+
+int nvme_read(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt,
+             __u32 reftag, __u16 apptag, __u16 appmask, void *data,
+             void *metadata)
+{
+       return nvme_io(fd, nvme_cmd_read, slba, nblocks, control, dsmgmt,
+                      reftag, apptag, appmask, data, metadata);
+}
+
+int nvme_write(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt,
+              __u32 reftag, __u16 apptag, __u16 appmask, void *data,
+              void *metadata)
+{
+       return nvme_io(fd, nvme_cmd_write, slba, nblocks, control, dsmgmt,
+                      reftag, apptag, appmask, data, metadata);
+}
+
+int nvme_compare(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt,
+                __u32 reftag, __u16 apptag, __u16 appmask, void *data,
+                void *metadata)
+{
+       return nvme_io(fd, nvme_cmd_compare, slba, nblocks, control, dsmgmt,
+                      reftag, apptag, appmask, data, metadata);
+}
+
+int nvme_passthru_io(int fd, __u8 opcode, __u8 flags, __u16 rsvd,
+                    __u32 nsid, __u32 cdw2, __u32 cdw3, __u32 cdw10,
+                    __u32 cdw11, __u32 cdw12, __u32 cdw13, __u32 cdw14,
+                    __u32 cdw15, __u32 data_len, void *data,
+                    __u32 metadata_len, void *metadata, __u32 timeout_ms)
+{
+       return nvme_passthru(fd, NVME_IOCTL_IO_CMD, opcode, flags, rsvd, nsid,
+                            cdw2, cdw3, cdw10, cdw11, cdw12, cdw13, cdw14,
+                            cdw15, data_len, data, metadata_len, metadata,
+                            timeout_ms, NULL);
+}
+
+int nvme_write_zeros(int fd, __u32 nsid, __u64 slba, __u16 nlb,
+                    __u16 control, __u32 reftag, __u16 apptag, __u16 appmask)
+{
+       struct nvme_passthru_cmd cmd = {
+               .opcode         = nvme_cmd_write_zeroes,
+               .nsid           = nsid,
+               .cdw10          = slba & 0xffffffff,
+               .cdw11          = slba >> 32,
+               .cdw12          = nlb | (control << 16),
+               .cdw14          = reftag,
+               .cdw15          = apptag | (appmask << 16),
+       };
+
+       return nvme_submit_io_passthru(fd, &cmd);
+}
+
+int nvme_write_uncorrectable(int fd, __u32 nsid, __u64 slba, __u16 nlb)
+{
+       struct nvme_passthru_cmd cmd = {
+               .opcode         = nvme_cmd_write_uncor,
+               .nsid           = nsid,
+               .cdw10          = slba & 0xffffffff,
+               .cdw11          = slba >> 32,
+               .cdw12          = nlb,
+       };
+
+       return nvme_submit_io_passthru(fd, &cmd);
+}
+
+int nvme_flush(int fd, __u32 nsid)
+{
+       struct nvme_passthru_cmd cmd = {
+               .opcode         = nvme_cmd_flush,
+               .nsid           = nsid,
+       };
+
+       return nvme_submit_io_passthru(fd, &cmd);
+}
+
+int nvme_dsm(int fd, __u32 nsid, __u32 cdw11, struct nvme_dsm_range *dsm,
+            __u16 nr_ranges)
+{
+       struct nvme_passthru_cmd cmd = {
+               .opcode         = nvme_cmd_dsm,
+               .nsid           = nsid,
+               .addr           = (__u64)(uintptr_t) dsm,
+               .data_len       = nr_ranges * sizeof(*dsm),
+               .cdw10          = nr_ranges - 1,
+               .cdw11          = cdw11,
+       };
+
+       return nvme_submit_io_passthru(fd, &cmd);
+}
+
+struct nvme_dsm_range *nvme_setup_dsm_range(__u32 *ctx_attrs, __u32 *llbas,
+                                           __u64 *slbas, __u16 nr_ranges)
+{
+       int i;
+       struct nvme_dsm_range *dsm = malloc(nr_ranges * sizeof(*dsm));
+
+       if (!dsm) {
+               fprintf(stderr, "malloc: %s\n", strerror(errno));
+               return NULL;
+       }
+       for (i = 0; i < nr_ranges; i++) {
+               dsm[i].cattr = cpu_to_le32(ctx_attrs[i]);
+               dsm[i].nlb = cpu_to_le32(llbas[i]);
+               dsm[i].slba = cpu_to_le64(slbas[i]);
+       }
+       return dsm;
+}
+
+int nvme_resv_acquire(int fd, __u32 nsid, __u8 rtype, __u8 racqa,
+                     bool iekey, __u64 crkey, __u64 nrkey)
+{
+       __le64 payload[2] = { cpu_to_le64(crkey), cpu_to_le64(nrkey) };
+       __u32 cdw10 = (racqa & 0x7) | (iekey ? 1 << 3 : 0) | rtype << 8;
+       struct nvme_passthru_cmd cmd = {
+               .opcode         = nvme_cmd_resv_acquire,
+               .nsid           = nsid,
+               .cdw10          = cdw10,
+               .addr           = (__u64)(uintptr_t) (payload),
+               .data_len       = sizeof(payload),
+       };
+
+       return nvme_submit_io_passthru(fd, &cmd);
+}
+
+int nvme_resv_register(int fd, __u32 nsid, __u8 rrega, __u8 cptpl,
+                      bool iekey, __u64 crkey, __u64 nrkey)
+{
+       __le64 payload[2] = { cpu_to_le64(crkey), cpu_to_le64(nrkey) };
+       __u32 cdw10 = (rrega & 0x7) | (iekey ? 1 << 3 : 0) | cptpl << 30;
+
+       struct nvme_passthru_cmd cmd = {
+               .opcode         = nvme_cmd_resv_register,
+               .nsid           = nsid,
+               .cdw10          = cdw10,
+               .addr           = (__u64)(uintptr_t) (payload),
+               .data_len       = sizeof(payload),
+       };
+
+       return nvme_submit_io_passthru(fd, &cmd);
+}
+
+int nvme_resv_release(int fd, __u32 nsid, __u8 rtype, __u8 rrela,
+                     bool iekey, __u64 crkey)
+{
+       __le64 payload[1] = { cpu_to_le64(crkey) };
+       __u32 cdw10 = (rrela & 0x7) | (iekey ? 1 << 3 : 0) | rtype << 8;
+
+       struct nvme_passthru_cmd cmd = {
+               .opcode         = nvme_cmd_resv_release,
+               .nsid           = nsid,
+               .cdw10          = cdw10,
+               .addr           = (__u64)(uintptr_t) (payload),
+               .data_len       = sizeof(payload),
+       };
+
+       return nvme_submit_io_passthru(fd, &cmd);
+}
+
+int nvme_resv_report(int fd, __u32 nsid, __u32 numd, __u32 cdw11, void *data)
+{
+       struct nvme_passthru_cmd cmd = {
+               .opcode         = nvme_cmd_resv_report,
+               .nsid           = nsid,
+               .cdw10          = numd,
+               .cdw11          = cdw11,
+               .addr           = (__u64)(uintptr_t) data,
+               .data_len       = (numd + 1) << 2,
+       };
+
+       return nvme_submit_io_passthru(fd, &cmd);
+}
+
+int nvme_identify13(int fd, __u32 nsid, __u32 cdw10, __u32 cdw11, void *data)
+{
+       struct nvme_admin_cmd cmd = {
+               .opcode         = nvme_admin_identify,
+               .nsid           = nsid,
+               .addr           = (__u64)(uintptr_t) data,
+               .data_len       = NVME_IDENTIFY_DATA_SIZE,
+               .cdw10          = cdw10,
+               .cdw11          = cdw11,
+       };
+
+       return nvme_submit_admin_passthru(fd, &cmd);
+}
+
+int nvme_identify(int fd, __u32 nsid, __u32 cdw10, void *data)
+{
+       return nvme_identify13(fd, nsid, cdw10, 0, data);
+}
+
+int nvme_identify_ctrl(int fd, void *data)
+{
+       return nvme_identify(fd, 0, 1, data);
+}
+
+int nvme_identify_ns(int fd, __u32 nsid, bool present, void *data)
+{
+       int cns = present ? NVME_ID_CNS_NS_PRESENT : NVME_ID_CNS_NS;
+
+       return nvme_identify(fd, nsid, cns, data);
+}
+
+int nvme_identify_ns_list(int fd, __u32 nsid, bool all, void *data)
+{
+       int cns = all ? NVME_ID_CNS_NS_PRESENT_LIST : NVME_ID_CNS_NS_ACTIVE_LIST;
+
+       return nvme_identify(fd, nsid, cns, data);
+}
+
+int nvme_identify_ctrl_list(int fd, __u32 nsid, __u16 cntid, void *data)
+{
+       int cns = nsid ? NVME_ID_CNS_CTRL_NS_LIST : NVME_ID_CNS_CTRL_LIST;
+
+       return nvme_identify(fd, nsid, (cntid << 16) | cns, data);
+}
+
+int nvme_identify_ns_descs(int fd, __u32 nsid, void *data)
+{
+
+       return nvme_identify(fd, nsid, NVME_ID_CNS_NS_DESC_LIST, data);
+}
+
+int nvme_identify_nvmset(int fd, __u16 nvmset_id, void *data)
+{
+       return nvme_identify13(fd, 0, NVME_ID_CNS_NVMSET_LIST, nvmset_id, data);
+}
+
+int nvme_get_log13(int fd, __u32 nsid, __u8 log_id, __u8 lsp, __u64 lpo,
+                 __u16 lsi, bool rae, __u32 data_len, void *data)
+{
+       struct nvme_admin_cmd cmd = {
+               .opcode         = nvme_admin_get_log_page,
+               .nsid           = nsid,
+               .addr           = (__u64)(uintptr_t) data,
+               .data_len       = data_len,
+       };
+       __u32 numd = (data_len >> 2) - 1;
+       __u16 numdu = numd >> 16, numdl = numd & 0xffff;
+
+       cmd.cdw10 = log_id | (numdl << 16) | (rae ? 1 << 15 : 0);
+       if (lsp)
+                cmd.cdw10 |= lsp << 8;
+
+       cmd.cdw11 = numdu | (lsi << 16);
+       cmd.cdw12 = lpo;
+       cmd.cdw13 = (lpo >> 32);
+
+       return nvme_submit_admin_passthru(fd, &cmd);
+
+}
+
+int nvme_get_log(int fd, __u32 nsid, __u8 log_id, bool rae,
+                __u32 data_len, void *data)
+{
+       void *ptr = data;
+       __u32 offset = 0, xfer_len = data_len;
+       int ret;
+
+       /*
+        * 4k is the smallest possible transfer unit, so by
+        * restricting ourselves for 4k transfers we avoid having
+        * to check the MDTS value of the controller.
+        */
+       do {
+               xfer_len = data_len - offset;
+               if (xfer_len > 4096)
+                       xfer_len = 4096;
+
+               ret = nvme_get_log13(fd, nsid, log_id, NVME_NO_LOG_LSP,
+                                    offset, 0, rae, xfer_len, ptr);
+               if (ret)
+                       return ret;
+
+               offset += xfer_len;
+               ptr += xfer_len;
+       } while (offset < data_len);
+
+       return 0;
+}
+
+int nvme_get_telemetry_log(int fd, void *lp, int generate_report,
+                          int ctrl_init, size_t log_page_size, __u64 offset)
+{
+       if (ctrl_init)
+               return nvme_get_log13(fd, NVME_NSID_ALL, NVME_LOG_TELEMETRY_CTRL,
+                                     NVME_NO_LOG_LSP, offset,
+                                     0, 1, log_page_size, lp);
+       if (generate_report)
+               return nvme_get_log13(fd, NVME_NSID_ALL, NVME_LOG_TELEMETRY_HOST,
+                                     NVME_TELEM_LSP_CREATE, offset,
+                                     0, 1, log_page_size, lp);
+       else
+               return nvme_get_log13(fd, NVME_NSID_ALL, NVME_LOG_TELEMETRY_HOST,
+                                     NVME_NO_LOG_LSP, offset,
+                                     0, 1, log_page_size, lp);
+}
+
+int nvme_fw_log(int fd, struct nvme_firmware_log_page *fw_log)
+{
+       return nvme_get_log(fd, NVME_NSID_ALL, NVME_LOG_FW_SLOT, true,
+                       sizeof(*fw_log), fw_log);
+}
+
+int nvme_changed_ns_list_log(int fd, struct nvme_changed_ns_list_log *changed_ns_list_log)
+{
+       return nvme_get_log(fd, 0, NVME_LOG_CHANGED_NS, true,
+                       sizeof(changed_ns_list_log->log),
+                       changed_ns_list_log->log);
+}
+
+int nvme_error_log(int fd, int entries, struct nvme_error_log_page *err_log)
+{
+       return nvme_get_log(fd, NVME_NSID_ALL, NVME_LOG_ERROR, false,
+                       entries * sizeof(*err_log), err_log);
+}
+
+int nvme_endurance_log(int fd, __u16 group_id, struct nvme_endurance_group_log *endurance_log)
+{
+       return nvme_get_log13(fd, 0, NVME_LOG_ENDURANCE_GROUP, 0, 0, group_id, 0,
+                       sizeof(*endurance_log), endurance_log);
+}
+
+int nvme_smart_log(int fd, __u32 nsid, struct nvme_smart_log *smart_log)
+{
+       return nvme_get_log(fd, nsid, NVME_LOG_SMART, false,
+                       sizeof(*smart_log), smart_log);
+}
+
+int nvme_ana_log(int fd, void *ana_log, size_t ana_log_len, int rgo)
+{
+       __u64 lpo = 0;
+
+       return nvme_get_log13(fd, NVME_NSID_ALL, NVME_LOG_ANA, rgo, lpo, 0,
+                       true, ana_log_len, ana_log);
+}
+
+int nvme_self_test_log(int fd, struct nvme_self_test_log *self_test_log)
+{
+       return nvme_get_log(fd, NVME_NSID_ALL, NVME_LOG_DEVICE_SELF_TEST, false,
+               sizeof(*self_test_log), self_test_log);
+}
+
+int nvme_effects_log(int fd, struct nvme_effects_log_page *effects_log)
+{
+       return nvme_get_log(fd, 0, NVME_LOG_CMD_EFFECTS, false,
+                       sizeof(*effects_log), effects_log);
+}
+
+int nvme_discovery_log(int fd, struct nvmf_disc_rsp_page_hdr *log, __u32 size)
+{
+       return nvme_get_log(fd, 0, NVME_LOG_DISC, false, size, log);
+}
+
+int nvme_sanitize_log(int fd, struct nvme_sanitize_log_page *sanitize_log)
+{
+       return nvme_get_log(fd, 0, NVME_LOG_SANITIZE, false,
+                       sizeof(*sanitize_log), sanitize_log);
+}
+
+int nvme_feature(int fd, __u8 opcode, __u32 nsid, __u32 cdw10, __u32 cdw11,
+                __u32 cdw12, __u32 data_len, void *data, __u32 *result)
+{
+       struct nvme_admin_cmd cmd = {
+               .opcode         = opcode,
+               .nsid           = nsid,
+               .cdw10          = cdw10,
+               .cdw11          = cdw11,
+               .cdw12          = cdw12,
+               .addr           = (__u64)(uintptr_t) data,
+               .data_len       = data_len,
+       };
+       int err;
+
+       err = nvme_submit_admin_passthru(fd, &cmd);
+       if (!err && result)
+               *result = cmd.result;
+       return err;
+}
+
+int nvme_set_feature(int fd, __u32 nsid, __u8 fid, __u32 value, __u32 cdw12,
+                    bool save, __u32 data_len, void *data, __u32 *result)
+{
+       __u32 cdw10 = fid | (save ? 1 << 31 : 0);
+
+       return nvme_feature(fd, nvme_admin_set_features, nsid, cdw10, value,
+                           cdw12, data_len, data, result);
+}
+
+static int nvme_property(int fd, __u8 fctype, __le32 off, __le64 *value, __u8 attrib)
+{
+       int err;
+       struct nvme_admin_cmd cmd = {
+               .opcode         = nvme_fabrics_command,
+               .cdw10          = attrib,
+               .cdw11          = off,
+       };
+
+       if (!value) {
+               errno = EINVAL;
+               return -errno;
+       }
+
+       if (fctype == nvme_fabrics_type_property_get){
+               cmd.nsid = nvme_fabrics_type_property_get;
+       } else if(fctype == nvme_fabrics_type_property_set) {
+               cmd.nsid = nvme_fabrics_type_property_set;
+               cmd.cdw12 = *value;
+       } else {
+               errno = EINVAL;
+               return -errno;
+       }
+
+       err = nvme_submit_admin_passthru(fd, &cmd);
+       if (!err && fctype == nvme_fabrics_type_property_get)
+               *value = cpu_to_le64(cmd.result);
+       return err;
+}
+
+static int get_property_helper(int fd, int offset, void *value, int *advance)
+{
+       __le64 value64;
+       int err = -EINVAL;
+
+       switch (offset) {
+       case NVME_REG_CAP:
+       case NVME_REG_ASQ:
+       case NVME_REG_ACQ:
+               *advance = 8;
+               break;
+       default:
+               *advance = 4;
+       }
+
+       if (!value)
+               return err;
+
+       err = nvme_property(fd, nvme_fabrics_type_property_get,
+                       cpu_to_le32(offset), &value64, (*advance == 8));
+
+       if (!err) {
+               if (*advance == 8)
+                       *((uint64_t *)value) = le64_to_cpu(value64);
+               else
+                       *((uint32_t *)value) = le32_to_cpu(value64);
+       }
+
+       return err;
+}
+
+int nvme_get_property(int fd, int offset, uint64_t *value)
+{
+       int advance;
+       return get_property_helper(fd, offset, value, &advance);
+}
+
+int nvme_get_properties(int fd, void **pbar)
+{
+       int offset, advance;
+       int err, ret = -EINVAL;
+       int size = getpagesize();
+
+       *pbar = malloc(size);
+       if (!*pbar) {
+               fprintf(stderr, "malloc: %s\n", strerror(errno));
+               return -ENOMEM;
+       }
+
+       memset(*pbar, 0xff, size);
+       for (offset = NVME_REG_CAP; offset <= NVME_REG_CMBSZ; offset += advance) {
+               err = get_property_helper(fd, offset, *pbar + offset, &advance);
+               if (!err)
+                       ret = 0;
+       }
+
+       return ret;
+}
+
+int nvme_set_property(int fd, int offset, int value)
+{
+       __le64 val = cpu_to_le64(value);
+       __le32 off = cpu_to_le32(offset);
+       bool is64bit;
+
+       switch (off) {
+       case NVME_REG_CAP:
+       case NVME_REG_ASQ:
+       case NVME_REG_ACQ:
+               is64bit = true;
+               break;
+       default:
+               is64bit = false;
+       }
+
+       return nvme_property(fd, nvme_fabrics_type_property_set,
+                       off, &val, is64bit ? 1: 0);
+}
+
+int nvme_get_feature(int fd, __u32 nsid, __u8 fid, __u8 sel, __u32 cdw11,
+                    __u32 data_len, void *data, __u32 *result)
+{
+       __u32 cdw10 = fid | sel << 8;
+
+       return nvme_feature(fd, nvme_admin_get_features, nsid, cdw10, cdw11,
+                           0, data_len, data, result);
+}
+
+int nvme_format(int fd, __u32 nsid, __u8 lbaf, __u8 ses, __u8 pi,
+               __u8 pil, __u8 ms, __u32 timeout)
+{
+       __u32 cdw10 = lbaf | ms << 4 | pi << 5 | pil << 8 | ses << 9;
+       struct nvme_admin_cmd cmd = {
+               .opcode         = nvme_admin_format_nvm,
+               .nsid           = nsid,
+               .cdw10          = cdw10,
+               .timeout_ms     = timeout,
+       };
+
+       return nvme_submit_admin_passthru(fd, &cmd);
+}
+
+int nvme_ns_create(int fd, __u64 nsze, __u64 ncap, __u8 flbas,
+                  __u8 dps, __u8 nmic, __u32 *result)
+{
+       struct nvme_id_ns ns = {
+               .nsze           = cpu_to_le64(nsze),
+               .ncap           = cpu_to_le64(ncap),
+               .flbas          = flbas,
+               .dps            = dps,
+               .nmic           = nmic,
+       };
+       struct nvme_admin_cmd cmd = {
+               .opcode         = nvme_admin_ns_mgmt,
+               .addr           = (__u64)(uintptr_t) ((void *)&ns),
+               .cdw10          = 0,
+               .data_len       = 0x1000,
+       };
+       int err;
+
+       err = nvme_submit_admin_passthru(fd, &cmd);
+       if (!err && result)
+               *result = cmd.result;
+       return err;
+}
+
+int nvme_ns_delete(int fd, __u32 nsid)
+{
+       struct nvme_admin_cmd cmd = {
+               .opcode         = nvme_admin_ns_mgmt,
+               .nsid           = nsid,
+               .cdw10          = 1,
+       };
+
+       return nvme_submit_admin_passthru(fd, &cmd);
+}
+
+int nvme_ns_attachment(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist,
+                      bool attach)
+{
+       int i;
+       __u8 buf[0x1000];
+       struct nvme_controller_list *cntlist =
+                                       (struct nvme_controller_list *)buf;
+       struct nvme_admin_cmd cmd = {
+               .opcode         = nvme_admin_ns_attach,
+               .nsid           = nsid,
+               .addr           = (__u64)(uintptr_t) cntlist,
+               .cdw10          = attach ? 0 : 1,
+               .data_len       = 0x1000,
+       };
+
+       memset(buf, 0, sizeof(buf));
+       cntlist->num = cpu_to_le16(num_ctrls);
+       for (i = 0; i < num_ctrls; i++)
+               cntlist->identifier[i] = cpu_to_le16(ctrlist[i]);
+
+       return nvme_submit_admin_passthru(fd, &cmd);
+}
+
+int nvme_ns_attach_ctrls(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist)
+{
+       return nvme_ns_attachment(fd, nsid, num_ctrls, ctrlist, true);
+}
+
+int nvme_ns_detach_ctrls(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist)
+{
+       return nvme_ns_attachment(fd, nsid, num_ctrls, ctrlist, false);
+}
+
+int nvme_fw_download(int fd, __u32 offset, __u32 data_len, void *data)
+{
+       struct nvme_admin_cmd cmd = {
+               .opcode         = nvme_admin_download_fw,
+               .addr           = (__u64)(uintptr_t) data,
+               .data_len       = data_len,
+               .cdw10          = (data_len >> 2) - 1,
+               .cdw11          = offset >> 2,
+       };
+
+       return nvme_submit_admin_passthru(fd, &cmd);
+}
+
+int nvme_fw_commit(int fd, __u8 slot, __u8 action, __u8 bpid)
+{
+       struct nvme_admin_cmd cmd = {
+               .opcode         = nvme_admin_activate_fw,
+               .cdw10          = (bpid << 31) | (action << 3) | slot,
+       };
+
+       return nvme_submit_admin_passthru(fd, &cmd);
+}
+
+int nvme_sec_send(int fd, __u32 nsid, __u8 nssf, __u16 spsp,
+                 __u8 secp, __u32 tl, __u32 data_len, void *data, __u32 *result)
+{
+       struct nvme_admin_cmd cmd = {
+               .opcode         = nvme_admin_security_send,
+               .addr           = (__u64)(uintptr_t) data,
+               .data_len       = data_len,
+               .nsid           = nsid,
+               .cdw10          = secp << 24 | spsp << 8 | nssf,
+               .cdw11          = tl,
+       };
+       int err;
+
+       err = nvme_submit_admin_passthru(fd, &cmd);
+       if (!err && result)
+               *result = cmd.result;
+       return err;
+}
+
+int nvme_sec_recv(int fd, __u32 nsid, __u8 nssf, __u16 spsp,
+                 __u8 secp, __u32 al, __u32 data_len, void *data, __u32 *result)
+{
+       struct nvme_admin_cmd cmd = {
+               .opcode         = nvme_admin_security_recv,
+               .nsid           = nsid,
+               .cdw10          = secp << 24 | spsp << 8 | nssf,
+               .cdw11          = al,
+               .addr           = (__u64)(uintptr_t) data,
+               .data_len       = data_len,
+       };
+       int err;
+
+       err = nvme_submit_admin_passthru(fd, &cmd);
+       if (!err && result)
+               *result = cmd.result;
+       return err;
+}
+
+int nvme_dir_send(int fd, __u32 nsid, __u16 dspec, __u8 dtype, __u8 doper,
+                  __u32 data_len, __u32 dw12, void *data, __u32 *result)
+{
+        struct nvme_admin_cmd cmd = {
+                .opcode         = nvme_admin_directive_send,
+                .addr           = (__u64)(uintptr_t) data,
+                .data_len       = data_len,
+                .nsid           = nsid,
+                .cdw10          = data_len? (data_len >> 2) - 1 : 0,
+                .cdw11          = dspec << 16 | dtype << 8 | doper,
+                .cdw12          = dw12,
+        };
+        int err;
+
+        err = nvme_submit_admin_passthru(fd, &cmd);
+        if (!err && result)
+                *result = cmd.result;
+        return err;
+}
+
+int nvme_dir_recv(int fd, __u32 nsid, __u16 dspec, __u8 dtype, __u8 doper,
+                  __u32 data_len, __u32 dw12, void *data, __u32 *result)
+{
+        struct nvme_admin_cmd cmd = {
+                .opcode         = nvme_admin_directive_recv,
+                .addr           = (__u64)(uintptr_t) data,
+                .data_len       = data_len,
+                .nsid           = nsid,
+                .cdw10          = data_len? (data_len >> 2) - 1 : 0,
+                .cdw11          = dspec << 16 | dtype << 8 | doper,
+                .cdw12          = dw12,
+        };
+        int err;
+
+        err = nvme_submit_admin_passthru(fd, &cmd);
+        if (!err && result)
+                *result = cmd.result;
+        return err;
+}
+
+int nvme_sanitize(int fd, __u8 sanact, __u8 ause, __u8 owpass, __u8 oipbp,
+                 __u8 no_dealloc, __u32 ovrpat)
+{
+       struct nvme_admin_cmd cmd = {
+               .opcode         = nvme_admin_sanitize_nvm,
+               .cdw10          = no_dealloc << 9 | oipbp << 8 |
+                                 owpass << NVME_SANITIZE_OWPASS_SHIFT |
+                                 ause << 3 | sanact,
+               .cdw11          = ovrpat,
+       };
+
+       return nvme_submit_admin_passthru(fd, &cmd);
+}
+
+int nvme_self_test_start(int fd, __u32 nsid, __u32 cdw10)
+{
+       struct nvme_admin_cmd cmd = {
+               .opcode = nvme_admin_dev_self_test,
+               .nsid = nsid,
+               .cdw10 = cdw10,
+       };
+
+       return nvme_submit_admin_passthru(fd, &cmd);
+}
diff --git a/libmultipath/nvme/nvme-ioctl.h b/libmultipath/nvme/nvme-ioctl.h
new file mode 100644 (file)
index 0000000..3fb740c
--- /dev/null
@@ -0,0 +1,139 @@
+#ifndef _NVME_LIB_H
+#define _NVME_LIB_H
+
+#include <linux/types.h>
+#include <stdbool.h>
+#include "linux/nvme_ioctl.h"
+#include "nvme.h"
+
+int nvme_get_nsid(int fd);
+
+/* Generic passthrough */
+int nvme_submit_passthru(int fd, unsigned long ioctl_cmd,
+                        struct nvme_passthru_cmd *cmd);
+
+int nvme_passthru(int fd, unsigned long ioctl_cmd, __u8 opcode, __u8 flags,
+                 __u16 rsvd, __u32 nsid, __u32 cdw2, __u32 cdw3,
+                 __u32 cdw10, __u32 cdw11, __u32 cdw12,
+                 __u32 cdw13, __u32 cdw14, __u32 cdw15,
+                 __u32 data_len, void *data, __u32 metadata_len,
+                 void *metadata, __u32 timeout_ms, __u32 *result);
+
+/* NVME_SUBMIT_IO */
+int nvme_io(int fd, __u8 opcode, __u64 slba, __u16 nblocks, __u16 control,
+             __u32 dsmgmt, __u32 reftag, __u16 apptag,
+             __u16 appmask, void *data, void *metadata);
+
+int nvme_read(int fd, __u64 slba, __u16 nblocks, __u16 control,
+             __u32 dsmgmt, __u32 reftag, __u16 apptag,
+             __u16 appmask, void *data, void *metadata);
+
+int nvme_write(int fd, __u64 slba, __u16 nblocks, __u16 control,
+              __u32 dsmgmt, __u32 reftag, __u16 apptag,
+              __u16 appmask, void *data, void *metadata);
+
+int nvme_compare(int fd, __u64 slba, __u16 nblocks, __u16 control,
+                __u32 dsmgmt, __u32 reftag, __u16 apptag,
+                __u16 appmask, void *data, void *metadata);
+
+/* NVME_IO_CMD */
+int nvme_passthru_io(int fd, __u8 opcode, __u8 flags, __u16 rsvd,
+                    __u32 nsid, __u32 cdw2, __u32 cdw3,
+                    __u32 cdw10, __u32 cdw11, __u32 cdw12,
+                    __u32 cdw13, __u32 cdw14, __u32 cdw15,
+                    __u32 data_len, void *data, __u32 metadata_len,
+                    void *metadata, __u32 timeout);
+
+int nvme_write_zeros(int fd, __u32 nsid, __u64 slba, __u16 nlb,
+                    __u16 control, __u32 reftag, __u16 apptag, __u16 appmask);
+
+int nvme_write_uncorrectable(int fd, __u32 nsid, __u64 slba, __u16 nlb);
+
+int nvme_flush(int fd, __u32 nsid);
+
+int nvme_dsm(int fd, __u32 nsid, __u32 cdw11, struct nvme_dsm_range *dsm,
+            __u16 nr_ranges);
+struct nvme_dsm_range *nvme_setup_dsm_range(__u32 *ctx_attrs,
+                                           __u32 *llbas, __u64 *slbas,
+                                           __u16 nr_ranges);
+
+int nvme_resv_acquire(int fd, __u32 nsid, __u8 rtype, __u8 racqa,
+                     bool iekey, __u64 crkey, __u64 nrkey);
+int nvme_resv_register(int fd, __u32 nsid, __u8 rrega, __u8 cptpl,
+                      bool iekey, __u64 crkey, __u64 nrkey);
+int nvme_resv_release(int fd, __u32 nsid, __u8 rtype, __u8 rrela,
+                     bool iekey, __u64 crkey);
+int nvme_resv_report(int fd, __u32 nsid, __u32 numd, __u32 cdw11, void *data);
+
+int nvme_identify13(int fd, __u32 nsid, __u32 cdw10, __u32 cdw11, void *data);
+int nvme_identify(int fd, __u32 nsid, __u32 cdw10, void *data);
+int nvme_identify_ctrl(int fd, void *data);
+int nvme_identify_ns(int fd, __u32 nsid, bool present, void *data);
+int nvme_identify_ns_list(int fd, __u32 nsid, bool all, void *data);
+int nvme_identify_ctrl_list(int fd, __u32 nsid, __u16 cntid, void *data);
+int nvme_identify_ns_descs(int fd, __u32 nsid, void *data);
+int nvme_identify_nvmset(int fd, __u16 nvmset_id, void *data);
+int nvme_get_log13(int fd, __u32 nsid, __u8 log_id, __u8 lsp, __u64 lpo,
+                  __u16 group_id, bool rae, __u32 data_len, void *data);
+int nvme_get_log(int fd, __u32 nsid, __u8 log_id, bool rae,
+                __u32 data_len, void *data);
+
+
+int nvme_get_telemetry_log(int fd, void *lp, int generate_report,
+                          int ctrl_gen, size_t log_page_size, __u64 offset);
+int nvme_fw_log(int fd, struct nvme_firmware_log_page *fw_log);
+int nvme_changed_ns_list_log(int fd,
+               struct nvme_changed_ns_list_log *changed_ns_list_log);
+int nvme_error_log(int fd, int entries, struct nvme_error_log_page *err_log);
+int nvme_smart_log(int fd, __u32 nsid, struct nvme_smart_log *smart_log);
+int nvme_ana_log(int fd, void *ana_log, size_t ana_log_len, int rgo);
+int nvme_effects_log(int fd, struct nvme_effects_log_page *effects_log);
+int nvme_discovery_log(int fd, struct nvmf_disc_rsp_page_hdr *log, __u32 size);
+int nvme_sanitize_log(int fd, struct nvme_sanitize_log_page *sanitize_log);
+int nvme_endurance_log(int fd, __u16 group_id,
+                      struct nvme_endurance_group_log *endurance_log);
+
+int nvme_feature(int fd, __u8 opcode, __u32 nsid, __u32 cdw10,
+                __u32 cdw11, __u32 cdw12, __u32 data_len, void *data,
+                __u32 *result);
+int nvme_set_feature(int fd, __u32 nsid, __u8 fid, __u32 value, __u32 cdw12,
+                    bool save, __u32 data_len, void *data, __u32 *result);
+int nvme_get_feature(int fd, __u32 nsid, __u8 fid, __u8 sel,
+                    __u32 cdw11, __u32 data_len, void *data, __u32 *result);
+
+int nvme_format(int fd, __u32 nsid, __u8 lbaf, __u8 ses, __u8 pi,
+               __u8 pil, __u8 ms, __u32 timeout);
+
+int nvme_ns_create(int fd, __u64 nsze, __u64 ncap, __u8 flbas,
+                  __u8 dps, __u8 nmic, __u32 *result);
+int nvme_ns_delete(int fd, __u32 nsid);
+
+int nvme_ns_attachment(int fd, __u32 nsid, __u16 num_ctrls,
+                      __u16 *ctrlist, bool attach);
+int nvme_ns_attach_ctrls(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist);
+int nvme_ns_detach_ctrls(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist);
+
+int nvme_fw_download(int fd, __u32 offset, __u32 data_len, void *data);
+int nvme_fw_commit(int fd, __u8 slot, __u8 action, __u8 bpid);
+
+int nvme_sec_send(int fd, __u32 nsid, __u8 nssf, __u16 spsp,
+                 __u8 secp, __u32 tl, __u32 data_len, void *data, __u32 *result);
+int nvme_sec_recv(int fd, __u32 nsid, __u8 nssf, __u16 spsp,
+                 __u8 secp, __u32 al, __u32 data_len, void *data, __u32 *result);
+
+int nvme_subsystem_reset(int fd);
+int nvme_reset_controller(int fd);
+int nvme_ns_rescan(int fd);
+
+int nvme_dir_send(int fd, __u32 nsid, __u16 dspec, __u8 dtype, __u8 doper,
+                 __u32 data_len, __u32 dw12, void *data, __u32 *result);
+int nvme_dir_recv(int fd, __u32 nsid, __u16 dspec, __u8 dtype, __u8 doper,
+                 __u32 data_len, __u32 dw12, void *data, __u32 *result);
+int nvme_get_properties(int fd, void **pbar);
+int nvme_set_property(int fd, int offset, int value);
+int nvme_get_property(int fd, int offset, uint64_t *value);
+int nvme_sanitize(int fd, __u8 sanact, __u8 ause, __u8 owpass, __u8 oipbp,
+                 __u8 no_dealloc, __u32 ovrpat);
+int nvme_self_test_start(int fd, __u32 nsid, __u32 cdw10);
+int nvme_self_test_log(int fd, struct nvme_self_test_log *self_test_log);
+#endif                         /* _NVME_LIB_H */
diff --git a/libmultipath/nvme/nvme.h b/libmultipath/nvme/nvme.h
new file mode 100644 (file)
index 0000000..685d179
--- /dev/null
@@ -0,0 +1,163 @@
+/*
+ * Definitions for the NVM Express interface
+ * Copyright (c) 2011-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _NVME_H
+#define _NVME_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <endian.h>
+#include "plugin.h"
+#include "json.h"
+
+#define unlikely(x) x
+
+#ifdef LIBUUID
+#include <uuid/uuid.h>
+#else
+typedef struct {
+       uint8_t b[16];
+} uuid_t;
+#endif
+
+#include "linux/nvme.h"
+
+struct nvme_effects_log_page {
+       __le32 acs[256];
+       __le32 iocs[256];
+       __u8   resv[2048];
+};
+
+struct nvme_error_log_page {
+       __u64   error_count;
+       __u16   sqid;
+       __u16   cmdid;
+       __u16   status_field;
+       __u16   parm_error_location;
+       __u64   lba;
+       __u32   nsid;
+       __u8    vs;
+       __u8    resv[3];
+       __u64   cs;
+       __u8    resv2[24];
+};
+
+struct nvme_firmware_log_page {
+       __u8    afi;
+       __u8    resv[7];
+       __u64   frs[7];
+       __u8    resv2[448];
+};
+
+/* idle and active power scales occupy the last 2 bits of the field */
+#define POWER_SCALE(s) ((s) >> 6)
+
+struct nvme_host_mem_buffer {
+       __u32                   hsize;
+       __u32                   hmdlal;
+       __u32                   hmdlau;
+       __u32                   hmdlec;
+       __u8                    rsvd16[4080];
+};
+
+struct nvme_auto_pst {
+       __u32   data;
+       __u32   rsvd32;
+};
+
+struct nvme_timestamp {
+       __u8 timestamp[6];
+       __u8 attr;
+       __u8 rsvd;
+};
+
+struct nvme_controller_list {
+       __le16 num;
+       __le16 identifier[];
+};
+
+struct nvme_bar_cap {
+       __u16   mqes;
+       __u8    ams_cqr;
+       __u8    to;
+       __u16   bps_css_nssrs_dstrd;
+       __u8    mpsmax_mpsmin;
+       __u8    reserved;
+};
+
+#ifdef __CHECKER__
+#define __force       __attribute__((force))
+#else
+#define __force
+#endif
+
+#define cpu_to_le16(x) \
+       ((__force __le16)htole16(x))
+#define cpu_to_le32(x) \
+       ((__force __le32)htole32(x))
+#define cpu_to_le64(x) \
+       ((__force __le64)htole64(x))
+
+#define le16_to_cpu(x) \
+       le16toh((__force __u16)(x))
+#define le32_to_cpu(x) \
+       le32toh((__force __u32)(x))
+#define le64_to_cpu(x) \
+       le64toh((__force __u64)(x))
+
+#define MAX_LIST_ITEMS 256
+struct list_item {
+       char                node[1024];
+       struct nvme_id_ctrl ctrl;
+       int                 nsid;
+       struct nvme_id_ns   ns;
+       unsigned            block;
+};
+
+struct ctrl_list_item {
+       char *name;
+       char *address;
+       char *transport;
+       char *state;
+       char *ana_state;
+};
+
+struct subsys_list_item {
+       char *name;
+       char *subsysnqn;
+       int nctrls;
+       struct ctrl_list_item *ctrls;
+};
+
+enum {
+       NORMAL,
+       JSON,
+       BINARY,
+};
+
+void register_extension(struct plugin *plugin);
+
+#include "argconfig.h"
+int parse_and_open(int argc, char **argv, const char *desc,
+       const struct argconfig_commandline_options *clo, void *cfg, size_t size);
+
+extern const char *devicename;
+
+int __id_ctrl(int argc, char **argv, struct command *cmd, struct plugin *plugin, void (*vs)(__u8 *vs, struct json_object *root));
+int    validate_output_format(char *format);
+
+struct subsys_list_item *get_subsys_list(int *subcnt, char *subsysnqn, __u32 nsid);
+void free_subsys_list(struct subsys_list_item *slist, int n);
+char *nvme_char_from_block(char *block);
+#endif /* _NVME_H */
diff --git a/libmultipath/nvme/plugin.h b/libmultipath/nvme/plugin.h
new file mode 100644 (file)
index 0000000..91079fb
--- /dev/null
@@ -0,0 +1,36 @@
+#ifndef PLUGIN_H
+#define PLUGIN_H
+
+#include <stdbool.h>
+
+struct program {
+       const char *name;
+       const char *version;
+       const char *usage;
+       const char *desc;
+       const char *more;
+       struct command **commands;
+       struct plugin *extensions;
+};
+
+struct plugin {
+       const char *name;
+       const char *desc;
+       struct command **commands;
+       struct program *parent;
+       struct plugin *next;
+       struct plugin *tail;
+};
+
+struct command {
+       char *name;
+       char *help;
+       int (*fn)(int argc, char **argv, struct command *command, struct plugin *plugin);
+       char *alias;
+};
+
+void usage(struct plugin *plugin);
+void general_help(struct plugin *plugin);
+int handle_plugin(int argc, char **argv, struct plugin *plugin);
+
+#endif
index 17acfd0500abd769a371f29336bf47dbc956ca16..0590218d0df1d362f9dbc511a73a2c467c58f367 100644 (file)
@@ -42,7 +42,7 @@ void free_prio (struct prio * p)
                return;
        p->refcount--;
        if (p->refcount) {
-               condlog(3, "%s prioritizer refcount %d",
+               condlog(4, "%s prioritizer refcount %d",
                        p->name, p->refcount);
                return;
        }
index aa587ccdf1961cb7cf3eece96f9451a52d757377..599d1d884d4a802134eb17d2c32a778e4e2586f8 100644 (file)
@@ -30,6 +30,7 @@ struct path;
 #define PRIO_WEIGHTED_PATH     "weightedpath"
 #define PRIO_SYSFS             "sysfs"
 #define PRIO_PATH_LATENCY      "path_latency"
+#define PRIO_ANA               "ana"
 
 /*
  * Value used to mark the fact prio was not defined
index ab7bc07572eccbc94b022baa3b2bad113ec9b665..4d80c20cbf48c7d8f4d0c7155e0498c260cc9b6d 100644 (file)
@@ -21,6 +21,11 @@ LIBS = \
        libpriopath_latency.so \
        libpriosysfs.so
 
+ifneq ($(call check_file,/usr/include/linux/nvme_ioctl.h),0)
+       LIBS += libprioana.so
+       CFLAGS += -I../nvme
+endif
+
 all: $(LIBS)
 
 libprioalua.so: alua.o alua_rtpg.o
diff --git a/libmultipath/prioritizers/ana.c b/libmultipath/prioritizers/ana.c
new file mode 100644 (file)
index 0000000..990d935
--- /dev/null
@@ -0,0 +1,232 @@
+/*
+ * (C) Copyright HUAWEI Technology Corp. 2017   All Rights Reserved.
+ *
+ * ana.c
+ * Version 1.00
+ *
+ * Tool to make use of a NVMe-feature called  Asymmetric Namespace Access.
+ * It determines the ANA state of a device and prints a priority value to stdout.
+ *
+ * Author(s): Cheng Jike <chengjike.cheng@huawei.com>
+ *            Li Jie <lijie34@huawei.com>
+ *
+ * This file is released under the GPL version 2, or any later version.
+ */
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <stdbool.h>
+#include <libudev.h>
+
+#include "debug.h"
+#include "nvme-lib.h"
+#include "prio.h"
+#include "util.h"
+#include "structs.h"
+
+enum {
+       ANA_ERR_GETCTRL_FAILED          = 1,
+       ANA_ERR_NOT_NVME,
+       ANA_ERR_NOT_SUPPORTED,
+       ANA_ERR_GETANAS_OVERFLOW,
+       ANA_ERR_GETANAS_NOTFOUND,
+       ANA_ERR_GETANALOG_FAILED,
+       ANA_ERR_GETNSID_FAILED,
+       ANA_ERR_GETNS_FAILED,
+       ANA_ERR_NO_MEMORY,
+       ANA_ERR_NO_INFORMATION,
+};
+
+static const char *ana_errmsg[] = {
+       [ANA_ERR_GETCTRL_FAILED]        = "couldn't get ctrl info",
+       [ANA_ERR_NOT_NVME]              = "not an NVMe device",
+       [ANA_ERR_NOT_SUPPORTED]         = "ANA not supported",
+       [ANA_ERR_GETANAS_OVERFLOW]      = "buffer overflow in ANA log",
+       [ANA_ERR_GETANAS_NOTFOUND]      = "NSID or ANAGRPID not found",
+       [ANA_ERR_GETANALOG_FAILED]      = "couldn't get ana log",
+       [ANA_ERR_GETNSID_FAILED]        = "couldn't get NSID",
+       [ANA_ERR_GETNS_FAILED]          = "couldn't get namespace info",
+       [ANA_ERR_NO_MEMORY]             = "out of memory",
+       [ANA_ERR_NO_INFORMATION]        = "invalid fd",
+};
+
+static const char *anas_string[] = {
+       [NVME_ANA_OPTIMIZED]                    = "ANA Optimized State",
+       [NVME_ANA_NONOPTIMIZED]                 = "ANA Non-Optimized State",
+       [NVME_ANA_INACCESSIBLE]                 = "ANA Inaccessible State",
+       [NVME_ANA_PERSISTENT_LOSS]              = "ANA Persistent Loss State",
+       [NVME_ANA_CHANGE]                       = "ANA Change state",
+};
+
+static const char *aas_print_string(int rc)
+{
+       rc &= 0xff;
+       if (rc >= 0 && rc < ARRAY_SIZE(anas_string) &&
+           anas_string[rc] != NULL)
+               return anas_string[rc];
+
+       return "invalid ANA state";
+}
+
+static int get_ana_state(__u32 nsid, __u32 anagrpid, void *ana_log,
+                        size_t ana_log_len)
+{
+       void *base = ana_log;
+       struct nvme_ana_rsp_hdr *hdr = base;
+       struct nvme_ana_group_desc *ana_desc;
+       size_t offset = sizeof(struct nvme_ana_rsp_hdr);
+       __u32 nr_nsids;
+       size_t nsid_buf_size;
+       int i, j;
+
+       for (i = 0; i < le16_to_cpu(hdr->ngrps); i++) {
+               ana_desc = base + offset;
+
+               offset += sizeof(*ana_desc);
+               if (offset > ana_log_len)
+                       return -ANA_ERR_GETANAS_OVERFLOW;
+
+               nr_nsids = le32_to_cpu(ana_desc->nnsids);
+               nsid_buf_size = nr_nsids * sizeof(__le32);
+
+               offset += nsid_buf_size;
+               if (offset > ana_log_len)
+                       return -ANA_ERR_GETANAS_OVERFLOW;
+
+               for (j = 0; j < nr_nsids; j++) {
+                       if (nsid == le32_to_cpu(ana_desc->nsids[j]))
+                               return ana_desc->state;
+               }
+
+               if (anagrpid != 0 && anagrpid == le32_to_cpu(ana_desc->grpid))
+                       return ana_desc->state;
+
+       }
+       return -ANA_ERR_GETANAS_NOTFOUND;
+}
+
+int get_ana_info(struct path * pp, unsigned int timeout)
+{
+       int     rc;
+       __u32 nsid;
+       struct nvme_id_ctrl ctrl;
+       struct nvme_id_ns ns;
+       void *ana_log;
+       size_t ana_log_len;
+       bool is_anagrpid_const;
+
+       rc = nvme_id_ctrl_ana(pp->fd, &ctrl);
+       if (rc < 0) {
+               log_nvme_errcode(rc, pp->dev, "nvme_identify_ctrl");
+               return -ANA_ERR_GETCTRL_FAILED;
+       } else if (rc == 0)
+               return -ANA_ERR_NOT_SUPPORTED;
+
+       nsid = nvme_get_nsid(pp->fd);
+       if (nsid <= 0) {
+               log_nvme_errcode(rc, pp->dev, "nvme_get_nsid");
+               return -ANA_ERR_GETNSID_FAILED;
+       }
+       is_anagrpid_const = ctrl.anacap & (1 << 6);
+
+       /*
+        * Code copied from nvme-cli/nvme.c. We don't need to allocate an
+        * [nanagrpid*mnan] array of NSIDs because each NSID can occur at most
+        * in one ANA group.
+        */
+       ana_log_len = sizeof(struct nvme_ana_rsp_hdr) +
+               le32_to_cpu(ctrl.nanagrpid)
+               * sizeof(struct nvme_ana_group_desc);
+
+       if (is_anagrpid_const) {
+               rc = nvme_identify_ns(pp->fd, nsid, 0, &ns);
+               if (rc) {
+                       log_nvme_errcode(rc, pp->dev, "nvme_identify_ns");
+                       return -ANA_ERR_GETNS_FAILED;
+               }
+       } else
+               ana_log_len += le32_to_cpu(ctrl.mnan) * sizeof(__le32);
+
+       ana_log = malloc(ana_log_len);
+       if (!ana_log)
+               return -ANA_ERR_NO_MEMORY;
+       pthread_cleanup_push(free, ana_log);
+       rc = nvme_ana_log(pp->fd, ana_log, ana_log_len,
+                         is_anagrpid_const ? NVME_ANA_LOG_RGO : 0);
+       if (rc) {
+               log_nvme_errcode(rc, pp->dev, "nvme_ana_log");
+               rc = -ANA_ERR_GETANALOG_FAILED;
+       } else
+               rc = get_ana_state(nsid,
+                                  is_anagrpid_const ?
+                                  le32_to_cpu(ns.anagrpid) : 0,
+                                  ana_log, ana_log_len);
+       pthread_cleanup_pop(1);
+       if (rc >= 0)
+               condlog(3, "%s: ana state = %02x [%s]", pp->dev, rc,
+                       aas_print_string(rc));
+       return rc;
+}
+
+/*
+ * Priorities modeled roughly after the ALUA model (alua.c/sysfs.c)
+ * Reference: ANA Base Protocol (NVMe TP 4004a, 11/13/2018).
+ *
+ * Differences:
+ *
+ * - The ANA base spec defines no implicit or explicit (STPG) state management.
+ *   If a state is encountered that doesn't allow normal I/O (all except
+ *   OPTIMIZED and NON_OPTIMIZED), we can't do anything but either wait for a
+ *   Access State Change Notice (can't do that in multipathd as we don't receive
+ *   those), or retry commands in regular time intervals until ANATT is expired
+ *   (not implemented). Mapping UNAVAILABLE state to ALUA STANDBY is the best we
+ *   can currently do.
+ *
+ *   FIXME: Waiting for ANATT could be implemented with a "delayed failback"
+ *   mechanism. The current "failback" method can't be used, as it would
+ *   affect failback to every state, and here only failback to UNAVAILABLE
+ *   should be delayed.
+ *
+ * - PERSISTENT_LOSS state is even below ALUA's UNAVAILABLE state.
+ *   FIXME: According to the ANA TP, accessing paths in PERSISTENT_LOSS state
+ *   in any way makes no sense (e.g. Â§8.19.6 - paths in this state shouldn't
+ *   even be checked under "all paths down" conditions). Device mapper can,
+ *   and will, select a PG for IO if it has non-failed paths, even if the
+ *   PG has priority 0. We could avoid that only with an "ANA path checker".
+ *
+ * - ALUA has no CHANGE state. The ANA TP Â§8.18.3 / Â§8.19.4 suggests
+ *   that CHANGE state should be treated in roughly the same way as
+ *   INACCESSIBLE. Therefore we assign the same prio to it.
+ *
+ * - ALUA's LBA-dependent state has no ANA equivalent.
+ */
+
+int getprio(struct path *pp, char *args, unsigned int timeout)
+{
+       int rc;
+
+       if (pp->fd < 0)
+               rc = -ANA_ERR_NO_INFORMATION;
+       else
+               rc = get_ana_info(pp, timeout);
+
+       switch (rc) {
+       case NVME_ANA_OPTIMIZED:
+               return 50;
+       case NVME_ANA_NONOPTIMIZED:
+               return 10;
+       case NVME_ANA_INACCESSIBLE:
+       case NVME_ANA_CHANGE:
+               return 1;
+       case NVME_ANA_PERSISTENT_LOSS:
+               return 0;
+       default:
+               break;
+       }
+       if (rc < 0 && -rc < ARRAY_SIZE(ana_errmsg))
+               condlog(2, "%s: ANA error: %s", pp->dev, ana_errmsg[-rc]);
+       else
+               condlog(1, "%s: invalid ANA rc code %d", pp->dev, rc);
+       return -1;
+}
index 970a3b5ce325678afd9526546d9cef035921d79b..98068f344b540a41a01cfbfa6311f871a1b58118 100644 (file)
@@ -5,6 +5,7 @@
  */
 #include <stdio.h>
 
+#include "nvme-lib.h"
 #include "checkers.h"
 #include "memory.h"
 #include "vector.h"
@@ -74,6 +75,8 @@ static const char cmdline_origin[] =
        "(setting: multipath command line [-p] flag)";
 static const char autodetect_origin[] =
        "(setting: storage device autodetected)";
+static const char marginal_path_origin[] =
+       "(setting: implied by marginal_path check)";
 
 #define do_default(dest, value)                                                \
 do {                                                                   \
@@ -548,13 +551,25 @@ detect_prio(struct config *conf, struct path * pp)
 {
        struct prio *p = &pp->prio;
        char buff[512];
-       char *default_prio = PRIO_ALUA;
-
-       if (pp->tpgs <= 0)
-               return;
-       if (pp->tpgs == 2 || !check_rdac(pp)) {
-               if (sysfs_get_asymmetric_access_state(pp, buff, 512) >= 0)
+       char *default_prio;
+
+       switch(pp->bus) {
+       case SYSFS_BUS_NVME:
+               if (nvme_id_ctrl_ana(pp->fd, NULL) == 0)
+                       return;
+               default_prio = PRIO_ANA;
+               break;
+       case SYSFS_BUS_SCSI:
+               if (pp->tpgs <= 0)
+                       return;
+               if ((pp->tpgs == 2 || !check_rdac(pp)) &&
+                   sysfs_get_asymmetric_access_state(pp, buff, 512) >= 0)
                        default_prio = PRIO_SYSFS;
+               else
+                       default_prio = PRIO_ALUA;
+               break;
+       default:
+               return;
        }
        prio_get(conf->multipath_dir, p, default_prio, DEFAULT_PRIO_ARGS);
 }
@@ -855,8 +870,9 @@ int select_delay_watch_checks(struct config *conf, struct multipath *mp)
        mp_set_conf(delay_watch_checks);
        mp_set_default(delay_watch_checks, DEFAULT_DELAY_CHECKS);
 out:
-       print_off_int_undef(buff, 12, mp->delay_watch_checks);
-       condlog(3, "%s: delay_watch_checks = %s %s", mp->alias, buff, origin);
+       if (print_off_int_undef(buff, 12, mp->delay_watch_checks) != 0)
+               condlog(3, "%s: delay_watch_checks = %s %s",
+                       mp->alias, buff, origin);
        return 0;
 }
 
@@ -871,8 +887,91 @@ int select_delay_wait_checks(struct config *conf, struct multipath *mp)
        mp_set_conf(delay_wait_checks);
        mp_set_default(delay_wait_checks, DEFAULT_DELAY_CHECKS);
 out:
-       print_off_int_undef(buff, 12, mp->delay_wait_checks);
-       condlog(3, "%s: delay_wait_checks = %s %s", mp->alias, buff, origin);
+       if (print_off_int_undef(buff, 12, mp->delay_wait_checks) != 0)
+               condlog(3, "%s: delay_wait_checks = %s %s",
+                       mp->alias, buff, origin);
+       return 0;
+
+}
+
+static int san_path_deprecated_warned;
+#define warn_san_path_deprecated(v, x)                                 \
+       do {                                                            \
+               if (v->x > 0 && !san_path_deprecated_warned) {          \
+               san_path_deprecated_warned = 1;                         \
+               condlog(1, "WARNING: option %s is deprecated, "         \
+                       "please use marginal_path options instead",     \
+                       #x);                                            \
+               }                                                       \
+       } while(0)
+
+int select_san_path_err_threshold(struct config *conf, struct multipath *mp)
+{
+       const char *origin;
+       char buff[12];
+
+       if (marginal_path_check_enabled(mp)) {
+               mp->san_path_err_threshold = NU_NO;
+               origin = marginal_path_origin;
+               goto out;
+       }
+       mp_set_mpe(san_path_err_threshold);
+       mp_set_ovr(san_path_err_threshold);
+       mp_set_hwe(san_path_err_threshold);
+       mp_set_conf(san_path_err_threshold);
+       mp_set_default(san_path_err_threshold, DEFAULT_ERR_CHECKS);
+out:
+       if (print_off_int_undef(buff, 12, mp->san_path_err_threshold) != 0)
+               condlog(3, "%s: san_path_err_threshold = %s %s",
+                       mp->alias, buff, origin);
+       warn_san_path_deprecated(mp, san_path_err_threshold);
+       return 0;
+}
+
+int select_san_path_err_forget_rate(struct config *conf, struct multipath *mp)
+{
+       const char *origin;
+       char buff[12];
+
+       if (marginal_path_check_enabled(mp)) {
+               mp->san_path_err_forget_rate = NU_NO;
+               origin = marginal_path_origin;
+               goto out;
+       }
+       mp_set_mpe(san_path_err_forget_rate);
+       mp_set_ovr(san_path_err_forget_rate);
+       mp_set_hwe(san_path_err_forget_rate);
+       mp_set_conf(san_path_err_forget_rate);
+       mp_set_default(san_path_err_forget_rate, DEFAULT_ERR_CHECKS);
+out:
+       if (print_off_int_undef(buff, 12, mp->san_path_err_forget_rate) != 0)
+               condlog(3, "%s: san_path_err_forget_rate = %s %s", mp->alias,
+                       buff, origin);
+       warn_san_path_deprecated(mp, san_path_err_forget_rate);
+       return 0;
+
+}
+
+int select_san_path_err_recovery_time(struct config *conf, struct multipath *mp)
+{
+       const char *origin;
+       char buff[12];
+
+       if (marginal_path_check_enabled(mp)) {
+               mp->san_path_err_recovery_time = NU_NO;
+               origin = marginal_path_origin;
+               goto out;
+       }
+       mp_set_mpe(san_path_err_recovery_time);
+       mp_set_ovr(san_path_err_recovery_time);
+       mp_set_hwe(san_path_err_recovery_time);
+       mp_set_conf(san_path_err_recovery_time);
+       mp_set_default(san_path_err_recovery_time, DEFAULT_ERR_CHECKS);
+out:
+       if (print_off_int_undef(buff, 12, mp->san_path_err_recovery_time) != 0)
+               condlog(3, "%s: san_path_err_recovery_time = %s %s", mp->alias,
+                       buff, origin);
+       warn_san_path_deprecated(mp, san_path_err_recovery_time);
        return 0;
 
 }
@@ -888,9 +987,10 @@ int select_marginal_path_err_sample_time(struct config *conf, struct multipath *
        mp_set_conf(marginal_path_err_sample_time);
        mp_set_default(marginal_path_err_sample_time, DEFAULT_ERR_CHECKS);
 out:
-       print_off_int_undef(buff, 12, mp->marginal_path_err_sample_time);
-       condlog(3, "%s: marginal_path_err_sample_time = %s %s", mp->alias, buff,
-                       origin);
+       if (print_off_int_undef(buff, 12, mp->marginal_path_err_sample_time)
+           != 0)
+               condlog(3, "%s: marginal_path_err_sample_time = %s %s",
+                       mp->alias, buff, origin);
        return 0;
 }
 
@@ -905,9 +1005,10 @@ int select_marginal_path_err_rate_threshold(struct config *conf, struct multipat
        mp_set_conf(marginal_path_err_rate_threshold);
        mp_set_default(marginal_path_err_rate_threshold, DEFAULT_ERR_CHECKS);
 out:
-       print_off_int_undef(buff, 12, mp->marginal_path_err_rate_threshold);
-       condlog(3, "%s: marginal_path_err_rate_threshold = %s %s", mp->alias, buff,
-                       origin);
+       if (print_off_int_undef(buff, 12, mp->marginal_path_err_rate_threshold)
+           != 0)
+               condlog(3, "%s: marginal_path_err_rate_threshold = %s %s",
+                       mp->alias, buff, origin);
        return 0;
 }
 
@@ -922,9 +1023,10 @@ int select_marginal_path_err_recheck_gap_time(struct config *conf, struct multip
        mp_set_conf(marginal_path_err_recheck_gap_time);
        mp_set_default(marginal_path_err_recheck_gap_time, DEFAULT_ERR_CHECKS);
 out:
-       print_off_int_undef(buff, 12, mp->marginal_path_err_recheck_gap_time);
-       condlog(3, "%s: marginal_path_err_recheck_gap_time = %s %s", mp->alias, buff,
-                       origin);
+       if (print_off_int_undef(buff, 12,
+                               mp->marginal_path_err_recheck_gap_time) != 0)
+               condlog(3, "%s: marginal_path_err_recheck_gap_time = %s %s",
+                       mp->alias, buff, origin);
        return 0;
 }
 
@@ -939,9 +1041,10 @@ int select_marginal_path_double_failed_time(struct config *conf, struct multipat
        mp_set_conf(marginal_path_double_failed_time);
        mp_set_default(marginal_path_double_failed_time, DEFAULT_ERR_CHECKS);
 out:
-       print_off_int_undef(buff, 12, mp->marginal_path_double_failed_time);
-       condlog(3, "%s: marginal_path_double_failed_time = %s %s", mp->alias, buff,
-                       origin);
+       if (print_off_int_undef(buff, 12, mp->marginal_path_double_failed_time)
+           != 0)
+               condlog(3, "%s: marginal_path_double_failed_time = %s %s",
+                       mp->alias, buff, origin);
        return 0;
 }
 
@@ -993,8 +1096,8 @@ int select_ghost_delay (struct config *conf, struct multipath * mp)
        mp_set_conf(ghost_delay);
        mp_set_default(ghost_delay, DEFAULT_GHOST_DELAY);
 out:
-       print_off_int_undef(buff, 12, mp->ghost_delay);
-       condlog(3, "%s: ghost_delay = %s %s", mp->alias, buff, origin);
+       if (print_off_int_undef(buff, 12, mp->ghost_delay) != 0)
+               condlog(3, "%s: ghost_delay = %s %s", mp->alias, buff, origin);
        return 0;
 }
 
index ae99b927114256d7ef2ba0f298ab9f78b7bbb29f..b352c16ae1f8fb74e6c45feca73c3ad2e55751dd 100644 (file)
@@ -26,6 +26,9 @@ int select_delay_watch_checks (struct config *conf, struct multipath * mp);
 int select_delay_wait_checks (struct config *conf, struct multipath * mp);
 int select_skip_kpartx (struct config *conf, struct multipath * mp);
 int select_max_sectors_kb (struct config *conf, struct multipath * mp);
+int select_san_path_err_forget_rate(struct config *conf, struct multipath *mp);
+int select_san_path_err_threshold(struct config *conf, struct multipath *mp);
+int select_san_path_err_recovery_time(struct config *conf, struct multipath *mp);
 int select_marginal_path_err_sample_time(struct config *conf, struct multipath *mp);
 int select_marginal_path_err_rate_threshold(struct config *conf, struct multipath *mp);
 int select_marginal_path_err_recheck_gap_time(struct config *conf, struct multipath *mp);
index 0a2623a0c2758cef199d80539d7f7b84ee6a43e2..b794b0dc00a461728f4c4f99330bc9ce63b56bea 100644 (file)
@@ -9,7 +9,7 @@
 #include "generic.h"
 
 #define WWID_SIZE              128
-#define SERIAL_SIZE            65
+#define SERIAL_SIZE            128
 #define NODE_NAME_SIZE         224
 #define PATH_STR_SIZE          16
 #define PARAMS_SIZE            4096
@@ -202,6 +202,7 @@ enum ghost_delay_states {
 };
 
 enum initialized_states {
+       INIT_NEW,
        INIT_FAILED,
        INIT_MISSING_UDEV,
        INIT_REQUESTED_UDEV,
@@ -280,6 +281,10 @@ struct path {
        int initialized;
        int retriggers;
        int wwid_changed;
+       unsigned int path_failures;
+       time_t dis_reinstate_time;
+       int disable_reinstate;
+       int san_path_err_forget_rate;
        time_t io_err_dis_reinstate_time;
        int io_err_disable_reinstate;
        int io_err_pathfail_cnt;
@@ -318,6 +323,9 @@ struct multipath {
        int deferred_remove;
        int delay_watch_checks;
        int delay_wait_checks;
+       int san_path_err_threshold;
+       int san_path_err_forget_rate;
+       int san_path_err_recovery_time;
        int marginal_path_err_sample_time;
        int marginal_path_err_rate_threshold;
        int marginal_path_err_recheck_gap_time;
@@ -370,6 +378,27 @@ struct multipath {
        struct gen_multipath generic_mp;
 };
 
+static inline int marginal_path_check_enabled(const struct multipath *mpp)
+{
+       return mpp->marginal_path_double_failed_time > 0 &&
+               mpp->marginal_path_err_sample_time > 0 &&
+               mpp->marginal_path_err_recheck_gap_time > 0 &&
+               mpp->marginal_path_err_rate_threshold >= 0;
+}
+
+static inline int san_path_check_enabled(const struct multipath *mpp)
+{
+       return mpp->san_path_err_threshold > 0 &&
+               mpp->san_path_err_forget_rate > 0 &&
+               mpp->san_path_err_recovery_time > 0;
+}
+
+static inline int delay_check_enabled(const struct multipath *mpp)
+{
+       return mpp->delay_watch_checks != NU_NO ||
+               mpp->delay_wait_checks != NU_NO;
+}
+
 struct pathgroup {
        long id;
        int status;
index c85823a0c9248bb14d7932a4bcf5f8ab8deb3e74..db5d19daec9b301ddcfa8ecacd957a7a8b1f9c13 100644 (file)
@@ -18,6 +18,7 @@
 #include "configure.h"
 #include "libdevmapper.h"
 #include "io_err_stat.h"
+#include "switchgroup.h"
 
 /*
  * creates or updates mpp->paths reading mpp->pg
@@ -60,6 +61,12 @@ int adopt_paths(vector pathvec, struct multipath *mpp)
 
        vector_foreach_slot (pathvec, pp, i) {
                if (!strncmp(mpp->wwid, pp->wwid, WWID_SIZE)) {
+                       if (pp->size != 0 && mpp->size != 0 &&
+                           pp->size != mpp->size) {
+                               condlog(3, "%s: size mismatch for %s, not adding path",
+                                       pp->dev, mpp->alias);
+                               continue;
+                       }
                        condlog(3, "%s: ownership set to %s",
                                pp->dev, mpp->alias);
                        pp->mpp = mpp;
@@ -96,14 +103,14 @@ void orphan_path(struct path *pp, const char *reason)
        pp->fd = -1;
 }
 
-void orphan_paths(vector pathvec, struct multipath *mpp)
+void orphan_paths(vector pathvec, struct multipath *mpp, const char *reason)
 {
        int i;
        struct path * pp;
 
        vector_foreach_slot (pathvec, pp, i) {
                if (pp->mpp == mpp) {
-                       orphan_path(pp, "map flushed");
+                       orphan_path(pp, reason);
                }
        }
 }
@@ -113,12 +120,10 @@ remove_map(struct multipath * mpp, struct vectors * vecs, int purge_vec)
 {
        int i;
 
-       condlog(4, "%s: remove multipath map", mpp->alias);
-
        /*
         * clear references to this map
         */
-       orphan_paths(vecs->pathvec, mpp);
+       orphan_paths(vecs->pathvec, mpp, "map removed internally");
 
        if (purge_vec &&
            (i = find_slot(vecs->mpvec, (void *)mpp)) != -1)
@@ -134,8 +139,10 @@ void
 remove_map_by_alias(const char *alias, struct vectors * vecs, int purge_vec)
 {
        struct multipath * mpp = find_mp_by_alias(vecs->mpvec, alias);
-       if (mpp)
+       if (mpp) {
+               condlog(2, "%s: removing map by alias", alias);
                remove_map(mpp, vecs, purge_vec);
+       }
 }
 
 void
@@ -255,6 +262,9 @@ void sync_paths(struct multipath *mpp, vector pathvec)
 int
 update_multipath_strings(struct multipath *mpp, vector pathvec, int is_daemon)
 {
+       struct pathgroup *pgp;
+       int i;
+
        if (!mpp)
                return 1;
 
@@ -272,6 +282,10 @@ update_multipath_strings(struct multipath *mpp, vector pathvec, int is_daemon)
        if (update_multipath_status(mpp))
                return 1;
 
+       vector_foreach_slot(mpp->pg, pgp, i)
+               if (pgp->paths)
+                       path_group_prio_update(pgp);
+
        return 0;
 }
 
@@ -407,6 +421,12 @@ int verify_paths(struct multipath *mpp, struct vectors *vecs)
                        vector_del_slot(mpp->paths, i);
                        i--;
 
+                       /* Make sure mpp->hwe doesn't point to freed memory.
+                        * We call extract_hwe_from_path() below to restore
+                        * mpp->hwe
+                        */
+                       if (mpp->hwe == pp->hwe)
+                               mpp->hwe = NULL;
                        if ((j = find_slot(vecs->pathvec,
                                           (void *)pp)) != -1)
                                vector_del_slot(vecs->pathvec, j);
@@ -416,6 +436,7 @@ int verify_paths(struct multipath *mpp, struct vectors *vecs)
                                mpp->alias, pp->dev, pp->dev_t);
                }
        }
+       extract_hwe_from_path(mpp);
        return count;
 }
 
index f7777aaf3689803f95eab89bd5c4681c46c46f3e..f8b9f63edabdb215fbc11bd1fc101a0c70daf6e8 100644 (file)
@@ -14,7 +14,8 @@ struct vectors {
 void enter_recovery_mode(struct multipath *mpp);
 
 int adopt_paths (vector pathvec, struct multipath * mpp);
-void orphan_paths (vector pathvec, struct multipath * mpp);
+void orphan_paths(vector pathvec, struct multipath *mpp,
+                 const char *reason);
 void orphan_path (struct path * pp, const char *reason);
 
 int verify_paths(struct multipath * mpp, struct vectors * vecs);
index 558c8d6a78ce6d23f0ddb99899cfe2d7bc8b04c6..65904d7b185700d0f927c65e5ecc1e5084da547d 100644 (file)
@@ -295,11 +295,6 @@ static int select_dm_devs(const struct dirent *di)
        return fnmatch("dm-*", di->d_name, FNM_FILE_NAME) == 0;
 }
 
-static void close_fd(void *arg)
-{
-       close((long)arg);
-}
-
 bool sysfs_is_multipathed(const struct path *pp)
 {
        char pathbuf[PATH_MAX];
index 5f910e60661176a884056a7bf379f85481b56f00..f73de8cc745e95f96e8187e9ef6cee7c0212cb24 100644 (file)
@@ -806,7 +806,7 @@ int uevent_listen(struct udev *udev)
        monitor = udev_monitor_new_from_netlink(udev, "udev");
        if (!monitor) {
                condlog(2, "failed to create udev monitor");
-               goto out;
+               goto failback;
        }
        pthread_cleanup_push(monitor_cleanup, monitor);
 #ifdef LIBUDEV_API_RECVBUF
@@ -893,8 +893,8 @@ int uevent_listen(struct udev *udev)
        }
        need_failback = 0;
 out:
-       if (monitor)
-               pthread_cleanup_pop(1);
+       pthread_cleanup_pop(1);
+failback:
        if (need_failback)
                err = failback_listen();
        pthread_cleanup_pop(1);
index 66c47611aec02c81548bc285ed2e78bf78f178ad..5b838d51b01d1e647a158c2c40051c623ae39efd 100644 (file)
@@ -104,7 +104,7 @@ get_word (char * sentence, char ** word)
        }
        strncpy(*word, sentence, len);
        strchop(*word);
-       condlog(4, "*word = %s, len = %i", *word, len);
+       condlog(5, "*word = %s, len = %i", *word, len);
 
        if (*p == '\0')
                return 0;
@@ -176,6 +176,7 @@ int devt2devname(char *devname, int devname_len, char *devt)
        if (stat("/sys/dev/block", &statbuf) == 0) {
                /* Newer kernels have /sys/dev/block */
                sprintf(block_path,"/sys/dev/block/%u:%u", major, minor);
+               dev[FILE_NAME_SIZE - 1] = '\0';
                if (lstat(block_path, &statbuf) == 0) {
                        if (S_ISLNK(statbuf.st_mode) &&
                            readlink(block_path, dev, FILE_NAME_SIZE-1) > 0) {
@@ -191,7 +192,8 @@ int devt2devname(char *devname, int devname_len, char *devt)
                                return 0;
                        }
                }
-               goto skip_proc;
+               condlog(4, "%s is invalid", block_path);
+               return 1;
        }
        memset(block_path, 0, sizeof(block_path));
 
@@ -220,7 +222,7 @@ int devt2devname(char *devname, int devname_len, char *devt)
                }
        }
        fclose(fd);
-skip_proc:
+
        if (strncmp(block_path,"/sys/block", 10)) {
                condlog(3, "No device found for %u:%u", major, minor);
                return 1;
@@ -505,3 +507,8 @@ void free_scandir_result(struct scandir_result *res)
                FREE(res->di[i]);
        FREE(res->di);
 }
+
+void close_fd(void *arg)
+{
+       close((long)arg);
+}
index a818e29a6b23a5ec26c08fa29b30e9e6e6220e9e..1e0d832c60330273457e63e759e61b18bd42f7e0 100644 (file)
@@ -3,6 +3,7 @@
 
 #include <sys/types.h>
 #include <inttypes.h>
+#include <stdbool.h>
 
 size_t strchop(char *);
 int basenamecpy (const char *src, char *dst, size_t size);
@@ -24,6 +25,7 @@ int safe_write(int fd, const void *buf, size_t count);
 void set_max_fds(int max_fds);
 
 #define KERNEL_VERSION(maj, min, ptc) ((((maj) * 256) + (min)) * 256 + (ptc))
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
 
 #define safe_sprintf(var, format, args...)     \
        snprintf(var, sizeof(var), format, ##args) >= sizeof(var)
@@ -33,10 +35,27 @@ void set_max_fds(int max_fds);
 #define pthread_cleanup_push_cast(f, arg)              \
        pthread_cleanup_push(((void (*)(void *))&f), (arg))
 
+void close_fd(void *arg);
+
 struct scandir_result {
        struct dirent **di;
        int n;
 };
 void free_scandir_result(struct scandir_result *);
 
+static inline bool is_bit_set_in_array(unsigned int bit, const uint64_t *arr)
+{
+       return arr[bit / 64] & (1ULL << (bit % 64)) ? 1 : 0;
+}
+
+static inline void set_bit_in_array(unsigned int bit, uint64_t *arr)
+{
+       arr[bit / 64] |= (1ULL << (bit % 64));
+}
+
+static inline void clear_bit_in_array(unsigned int bit, uint64_t *arr)
+{
+       arr[bit / 64] &= ~(1ULL << (bit % 64));
+}
+
 #endif /* _UTIL_H */
index 65d0522c7a0e5ebd4eaa0cf91cfded8b700f92b1..f3c7a51b16785d900ffb463bf77e9c23178e59c4 100644 (file)
@@ -20,8 +20,8 @@
 #ifndef _VERSION_H
 #define _VERSION_H
 
-#define VERSION_CODE 0x000709
-#define DATE_CODE    0x0b0e12
+#define VERSION_CODE 0x000800
+#define DATE_CODE    0x020e13
 
 #define PROG    "multipath-tools"
 
index 05b7bf0c6a8178689eed5221ce212bd5e7d89388..5abb1181c5d71e5031bf33897a2f0e16f43596ef 100644 (file)
@@ -68,6 +68,19 @@ int logsink;
 struct udev *udev;
 struct config *multipath_conf;
 
+/*
+ * Return values of configure(), print_cmd_valid(), and main().
+ * RTVL_{YES,NO} are synonyms for RTVL_{OK,FAIL} for the CMD_VALID_PATH case.
+ */
+enum {
+       RTVL_OK = 0,
+       RTVL_YES = RTVL_OK,
+       RTVL_FAIL = 1,
+       RTVL_NO = RTVL_FAIL,
+       RTVL_MAYBE, /* only used internally, never returned */
+       RTVL_RETRY, /* returned by configure(), not by main() */
+};
+
 struct config *get_multipath_config(void)
 {
        return multipath_conf;
@@ -319,7 +332,7 @@ static int check_usable_paths(struct config *conf,
                goto out;
        }
 
-       if (!dm_is_mpath(mapname)) {
+       if (dm_is_mpath(mapname) != 1) {
                condlog(1, "%s is not a multipath map", devpath);
                goto free;
        }
@@ -375,10 +388,6 @@ enum {
 };
 
 static const char shm_find_mp_dir[] = MULTIPATH_SHM_BASE "find_multipaths";
-static void close_fd(void *arg)
-{
-       close((long)arg);
-}
 
 /**
  * find_multipaths_check_timeout(wwid, tmo)
@@ -401,7 +410,7 @@ static int find_multipaths_check_timeout(const struct path *pp, long tmo,
        struct timespec now, ftimes[2], tdiff;
        struct stat st;
        long fd;
-       int r, err, retries = 0;
+       int r, retries = 0;
 
        clock_gettime(CLOCK_REALTIME, &now);
 
@@ -421,8 +430,6 @@ retry:
        if (fd != -1) {
                pthread_cleanup_push(close_fd, (void *)fd);
                r = fstat(fd, &st);
-               if (r != 0)
-                       err = errno;
                pthread_cleanup_pop(1);
 
        } else if (tmo > 0) {
@@ -451,15 +458,12 @@ retry:
                                path, strerror(errno));
                }
                r = fstat(fd, &st);
-               if (r != 0)
-                       err = errno;
                pthread_cleanup_pop(1);
        } else
                return FIND_MULTIPATHS_NEVER;
 
        if (r != 0) {
-               condlog(1, "%s: error in fstat for %s: %s", __func__,
-                       path, strerror(err));
+               condlog(1, "%s: error in fstat for %s: %m", __func__, path);
                return FIND_MULTIPATHS_ERROR;
        }
 
@@ -475,15 +479,14 @@ retry:
 static int print_cmd_valid(int k, const vector pathvec,
                           struct config *conf)
 {
-       static const int vals[] = { 1, 0, 2 };
        int wait = FIND_MULTIPATHS_NEVER;
        struct timespec until;
        struct path *pp;
 
-       if (k < 0 || k >= (sizeof(vals) / sizeof(int)))
-               return 1;
+       if (k != RTVL_YES && k != RTVL_NO && k != RTVL_MAYBE)
+               return RTVL_NO;
 
-       if (k == 2) {
+       if (k == RTVL_MAYBE) {
                /*
                 * Caller ensures that pathvec[0] is the path to
                 * examine.
@@ -493,7 +496,7 @@ static int print_cmd_valid(int k, const vector pathvec,
                wait = find_multipaths_check_timeout(
                        pp, pp->find_multipaths_timeout, &until);
                if (wait != FIND_MULTIPATHS_WAITING)
-                       k = 1;
+                       k = RTVL_NO;
        } else if (pathvec != NULL && (pp = VECTOR_SLOT(pathvec, 0)))
                wait = find_multipaths_check_timeout(pp, 0, &until);
        if (wait == FIND_MULTIPATHS_WAITING)
@@ -501,8 +504,10 @@ static int print_cmd_valid(int k, const vector pathvec,
                               until.tv_sec, until.tv_nsec/1000);
        else if (wait == FIND_MULTIPATHS_WAIT_DONE)
                printf("FIND_MULTIPATHS_WAIT_UNTIL=\"0\"\n");
-       printf("DM_MULTIPATH_DEVICE_PATH=\"%d\"\n", vals[k]);
-       return k == 1;
+       printf("DM_MULTIPATH_DEVICE_PATH=\"%d\"\n",
+              k == RTVL_MAYBE ? 2 : k == RTVL_YES ? 1 : 0);
+       /* Never return RTVL_MAYBE */
+       return k == RTVL_NO ? RTVL_NO : RTVL_YES;
 }
 
 /*
@@ -524,12 +529,6 @@ static bool released_to_systemd(void)
        return ret;
 }
 
-/*
- * Return value:
- *  -1: Retry
- *   0: Success
- *   1: Failure
- */
 static int
 configure (struct config *conf, enum mpath_cmds cmd,
           enum devtypes dev_type, char *devpath)
@@ -537,7 +536,7 @@ configure (struct config *conf, enum mpath_cmds cmd,
        vector curmp = NULL;
        vector pathvec = NULL;
        struct vectors vecs;
-       int r = 1;
+       int r = RTVL_FAIL, rc;
        int di_flag = 0;
        char * refwwid = NULL;
        char * dev = NULL;
@@ -585,21 +584,23 @@ configure (struct config *conf, enum mpath_cmds cmd,
                        goto out;
                }
                if (cmd == CMD_REMOVE_WWID) {
-                       r = remove_wwid(refwwid);
-                       if (r == 0)
+                       rc = remove_wwid(refwwid);
+                       if (rc == 0) {
                                printf("wwid '%s' removed\n", refwwid);
-                       else if (r == 1) {
+                               r = RTVL_OK;
+                       } else if (rc == 1) {
                                printf("wwid '%s' not in wwids file\n",
                                        refwwid);
-                               r = 0;
+                               r = RTVL_OK;
                        }
                        goto out;
                }
                if (cmd == CMD_ADD_WWID) {
-                       r = remember_wwid(refwwid);
-                       if (r >= 0)
+                       rc = remember_wwid(refwwid);
+                       if (rc >= 0) {
                                printf("wwid '%s' added\n", refwwid);
-                       else
+                               r = RTVL_OK;
+                       } else
                                printf("failed adding '%s' to wwids file\n",
                                       refwwid);
                        goto out;
@@ -614,13 +615,13 @@ configure (struct config *conf, enum mpath_cmds cmd,
                 */
                if (cmd == CMD_VALID_PATH) {
                        if (is_failed_wwid(refwwid) == WWID_IS_FAILED) {
-                               r = 1;
+                               r = RTVL_NO;
                                goto print_valid;
                        }
                        if ((!find_multipaths_on(conf) &&
                                    ignore_wwids_on(conf)) ||
                                   check_wwids_file(refwwid, 0) == 0)
-                               r = 0;
+                               r = RTVL_YES;
                        if (!ignore_wwids_on(conf))
                                goto print_valid;
                        /* At this point, either r==0 or find_multipaths_on. */
@@ -630,7 +631,7 @@ configure (struct config *conf, enum mpath_cmds cmd,
                         * Quick check if path is already multipathed.
                         */
                        if (sysfs_is_multipathed(VECTOR_SLOT(pathvec, 0))) {
-                               r = 0;
+                               r = RTVL_YES;
                                goto print_valid;
                        }
 
@@ -644,10 +645,10 @@ configure (struct config *conf, enum mpath_cmds cmd,
                         * Leave DM_MULTIPATH_DEVICE_PATH="0".
                         */
                        if (released) {
-                               r = 1;
+                               r = RTVL_NO;
                                goto print_valid;
                        }
-                       if (r == 0)
+                       if (r == RTVL_YES)
                                goto print_valid;
                        /* find_multipaths_on: Fall through to path detection */
                }
@@ -703,13 +704,12 @@ configure (struct config *conf, enum mpath_cmds cmd,
                 * the refwwid, or there is more than one path matching
                 * the refwwid, then the path is valid */
                if (VECTOR_SIZE(curmp) != 0) {
-                       r = 0;
+                       r = RTVL_YES;
                        goto print_valid;
                } else if (VECTOR_SIZE(pathvec) > 1)
-                       r = 0;
+                       r = RTVL_YES;
                else
-                       /* Use r=2 as an indication for "maybe" */
-                       r = 2;
+                       r = RTVL_MAYBE;
 
                /*
                 * If opening the path with O_EXCL fails, the path
@@ -739,21 +739,23 @@ configure (struct config *conf, enum mpath_cmds cmd,
                        /*
                         * Check if we raced with multipathd
                         */
-                       r = !sysfs_is_multipathed(VECTOR_SLOT(pathvec, 0));
+                       r = sysfs_is_multipathed(VECTOR_SLOT(pathvec, 0)) ?
+                               RTVL_YES : RTVL_NO;
                }
                goto print_valid;
        }
 
        if (cmd != CMD_CREATE && cmd != CMD_DRY_RUN) {
-               r = 0;
+               r = RTVL_OK;
                goto out;
        }
 
        /*
         * core logic entry point
         */
-       r = coalesce_paths(&vecs, NULL, refwwid,
+       rc = coalesce_paths(&vecs, NULL, refwwid,
                           conf->force_reload, cmd);
+       r = rc == CP_RETRY ? RTVL_RETRY : rc == CP_OK ? RTVL_OK : RTVL_FAIL;
 
 print_valid:
        if (cmd == CMD_VALID_PATH)
@@ -854,7 +856,7 @@ main (int argc, char *argv[])
        int arg;
        extern char *optarg;
        extern int optind;
-       int r = 1;
+       int r = RTVL_FAIL;
        enum mpath_cmds cmd = CMD_CREATE;
        enum devtypes dev_type = DEV_NONE;
        char *dev = NULL;
@@ -865,7 +867,7 @@ main (int argc, char *argv[])
        logsink = 0;
        conf = load_config(DEFAULT_CONFIGFILE);
        if (!conf)
-               exit(1);
+               exit(RTVL_FAIL);
        multipath_conf = conf;
        conf->retrigger_tries = 0;
        while ((arg = getopt(argc, argv, ":adcChl::FfM:v:p:b:BrR:itTquUwW")) != EOF ) {
@@ -876,7 +878,7 @@ main (int argc, char *argv[])
                        if (sizeof(optarg) > sizeof(char *) ||
                            !isdigit(optarg[0])) {
                                usage (argv[0]);
-                               exit(1);
+                               exit(RTVL_FAIL);
                        }
 
                        conf->verbosity = atoi(optarg);
@@ -923,7 +925,7 @@ main (int argc, char *argv[])
                        if (conf->pgpolicy_flag == IOPOLICY_UNDEF) {
                                printf("'%s' is not a valid policy\n", optarg);
                                usage(argv[0]);
-                               exit(1);
+                               exit(RTVL_FAIL);
                        }
                        break;
                case 'r':
@@ -933,14 +935,14 @@ main (int argc, char *argv[])
                        conf->find_multipaths |= _FIND_MULTIPATHS_I;
                        break;
                case 't':
-                       r = dump_config(conf, NULL, NULL);
+                       r = dump_config(conf, NULL, NULL) ? RTVL_FAIL : RTVL_OK;
                        goto out_free_config;
                case 'T':
                        cmd = CMD_DUMP_CONFIG;
                        break;
                case 'h':
                        usage(argv[0]);
-                       exit(0);
+                       exit(RTVL_OK);
                case 'u':
                        cmd = CMD_VALID_PATH;
                        dev_type = DEV_UEVENT;
@@ -964,20 +966,20 @@ main (int argc, char *argv[])
                case ':':
                        fprintf(stderr, "Missing option argument\n");
                        usage(argv[0]);
-                       exit(1);
+                       exit(RTVL_FAIL);
                case '?':
                        fprintf(stderr, "Unknown switch: %s\n", optarg);
                        usage(argv[0]);
-                       exit(1);
+                       exit(RTVL_FAIL);
                default:
                        usage(argv[0]);
-                       exit(1);
+                       exit(RTVL_FAIL);
                }
        }
 
        if (getuid() != 0) {
                fprintf(stderr, "need to be root\n");
-               exit(1);
+               exit(RTVL_FAIL);
        }
 
        if (optind < argc) {
@@ -1015,7 +1017,8 @@ main (int argc, char *argv[])
        /* Failing here is non-fatal */
        init_foreign(conf->multipath_dir);
        if (cmd == CMD_USABLE_PATHS) {
-               r = check_usable_paths(conf, dev, dev_type);
+               r = check_usable_paths(conf, dev, dev_type) ?
+                       RTVL_FAIL : RTVL_OK;
                goto out;
        }
        if (cmd == CMD_VALID_PATH &&
@@ -1031,7 +1034,7 @@ main (int argc, char *argv[])
                if (fd == -1) {
                        condlog(3, "%s: daemon is not running", dev);
                        if (!systemd_service_enabled(dev)) {
-                               r = print_cmd_valid(1, NULL, conf);
+                               r = print_cmd_valid(RTVL_NO, NULL, conf);
                                goto out;
                        }
                } else
@@ -1045,9 +1048,9 @@ main (int argc, char *argv[])
 
        switch(delegate_to_multipathd(cmd, dev, dev_type, conf)) {
        case DELEGATE_OK:
-               exit(0);
+               exit(RTVL_OK);
        case DELEGATE_ERROR:
-               exit(1);
+               exit(RTVL_FAIL);
        case NOT_DELEGATED:
                break;
        }
@@ -1063,8 +1066,8 @@ main (int argc, char *argv[])
                        goto out;
                }
                if (dm_get_maps(curmp) == 0)
-                       r = replace_wwids(curmp);
-               if (r == 0)
+                       r = replace_wwids(curmp) ? RTVL_FAIL : RTVL_OK;
+               if (r == RTVL_OK)
                        printf("successfully reset wwids\n");
                vector_foreach_slot_backwards(curmp, mpp, i) {
                        vector_del_slot(curmp, i);
@@ -1077,17 +1080,18 @@ main (int argc, char *argv[])
                retries = conf->remove_retries;
        if (conf->remove == FLUSH_ONE) {
                if (dev_type == DEV_DEVMAP) {
-                       r = dm_suspend_and_flush_map(dev, retries);
+                       r = dm_suspend_and_flush_map(dev, retries) ?
+                               RTVL_FAIL : RTVL_OK;
                } else
                        condlog(0, "must provide a map name to remove");
 
                goto out;
        }
        else if (conf->remove == FLUSH_ALL) {
-               r = dm_flush_maps(retries);
+               r = dm_flush_maps(retries) ? RTVL_FAIL : RTVL_OK;
                goto out;
        }
-       while ((r = configure(conf, cmd, dev_type, dev)) < 0)
+       while ((r = configure(conf, cmd, dev_type, dev)) == RTVL_RETRY)
                condlog(3, "restart multipath configuration process");
 
 out:
@@ -1102,8 +1106,8 @@ out:
         * multipath -u must exit with status 0, otherwise udev won't
         * import its output.
         */
-       if (cmd == CMD_VALID_PATH && dev_type == DEV_UEVENT && r == 1)
-               r = 0;
+       if (cmd == CMD_VALID_PATH && dev_type == DEV_UEVENT && r == RTVL_NO)
+               r = RTVL_OK;
 
        if (dev_type == DEV_UEVENT)
                closelog();
index 6333366956c4ce53ba9c967fbebc7be2cc970790..0fe8461d7b161192275706da8e3fdc9c1203aa24 100644 (file)
@@ -334,6 +334,10 @@ priority provided as argument. Requires prio_args keyword.
 Generate the path priority based on a latency algorithm.
 Requires prio_args keyword.
 .TP
+.I ana
+(Hardware-dependent)
+Generate the path priority based on the NVMe ANA settings.
+.TP
 .I datacore
 (Hardware-dependent)
 Generate the path priority for some DataCore storage arrays. Requires prio_args
@@ -891,6 +895,46 @@ The default is: \fB/etc/multipath/conf.d/\fR
 .
 .
 .TP
+.B san_path_err_threshold
+If set to a value greater than 0, multipathd will watch paths and check how many
+times a path has been failed due to errors.If the number of failures on a particular
+path is greater then the san_path_err_threshold, then the path will not reinstate
+till san_path_err_recovery_time. These path failures should occur within a
+san_path_err_forget_rate checks, if not we will consider the path is good enough
+to reinstantate. See "Shaky paths detection" below.
+.RS
+.TP
+The default is: \fBno\fR
+.RE
+.
+.
+.TP
+.B san_path_err_forget_rate
+If set to a value greater than 0, multipathd will check whether the path failures
+has exceeded  the san_path_err_threshold within this many checks i.e
+san_path_err_forget_rate . If so we will not reinstante the path till
+san_path_err_recovery_time. See "Shaky paths detection" below.
+.RS
+.TP
+The default is: \fBno\fR
+.RE
+.
+.
+.TP
+.B san_path_err_recovery_time
+If set to a value greater than 0, multipathd will make sure that when path failures
+has exceeded the san_path_err_threshold within san_path_err_forget_rate then the path
+will be placed in failed state for san_path_err_recovery_time duration.Once san_path_err_recovery_time
+has timeout  we will reinstante the failed path .
+san_path_err_recovery_time value should be in secs.
+See "Shaky paths detection" below.
+.RS
+.TP
+The default is: \fBno\fR
+.RE
+.
+.
+.TP
 .B marginal_path_double_failed_time
 One of the four parameters of supporting path check based on accounting IO
 error such as intermittent error. When a path failed event occurs twice in
@@ -898,7 +942,7 @@ error such as intermittent error. When a path failed event occurs twice in
 other three parameters are set, multipathd will fail the path and enqueue
 this path into a queue of which members are sent a couple of continuous
 direct reading asynchronous IOs at a fixed sample rate of 10HZ to start IO
-error accounting process.
+error accounting process. See "Shaky paths detection" below.
 .RS
 .TP
 The default is: \fBno\fR
@@ -920,7 +964,7 @@ If the rate of IO error on a particular path is greater than the
 \fImarginal_path_err_recheck_gap_time\fR seconds unless there is only one
 active path. After \fImarginal_path_err_recheck_gap_time\fR expires, the path
 will be requeueed for rechecking. If checking result is good enough, the
-path will be reinstated.
+path will be reinstated. See "Shaky paths detection" below.
 .RS
 .TP
 The default is: \fBno\fR
@@ -934,7 +978,7 @@ of supporting path check based on accounting IO error such as intermittent
 error. Refer to \fImarginal_path_err_sample_time\fR. If the rate of IO errors
 on a particular path is greater than this parameter, then the path will not
 reinstate for \fImarginal_path_err_recheck_gap_time\fR seconds unless there is
-only one active path.
+only one active path. See "Shaky paths detection" below.
 .RS
 .TP
 The default is: \fBno\fR
@@ -951,7 +995,7 @@ value, the failed path of  which the IO error rate is larger than
 \fImarginal_path_err_recheck_gap_time\fR seconds. When
 \fImarginal_path_err_recheck_gap_time\fR seconds expires, the path will be
 requeueed for checking. If checking result is good enough, the path will be
-reinstated, or else it will keep failed.
+reinstated, or else it will keep failed. See "Shaky paths detection" below.
 .RS
 .TP
 The default is: \fBno\fR
@@ -963,7 +1007,7 @@ The default is: \fBno\fR
 If set to a value greater than 0, multipathd will watch paths that have
 recently become valid for this many checks. If they fail again while they are
 being watched, when they next become valid, they will not be used until they
-have stayed up for \fIdelay_wait_checks\fR checks.
+have stayed up for \fIdelay_wait_checks\fR checks. See "Shaky paths detection" below.
 .RS
 .TP
 The default is: \fBno\fR
@@ -975,7 +1019,7 @@ The default is: \fBno\fR
 If set to a value greater than 0, when a device that has recently come back
 online fails again within \fIdelay_watch_checks\fR checks, the next time it
 comes back online, it will marked and delayed, and not used until it has passed
-\fIdelay_wait_checks\fR checks.
+\fIdelay_wait_checks\fR checks. See "Shaky paths detection" below.
 .RS
 .TP
 The default is: \fBno\fR
@@ -1174,7 +1218,7 @@ Regular expression matching the device nodes to be excluded/included.
 .RS
 .PP
 The default \fIblacklist\fR consists of the regular expressions
-"^(ram|raw|loop|fd|md|dm-|sr|scd|st|dcssblk)[0-9]" and
+"^(ram|zram|raw|loop|fd|md|dm-|sr|scd|st|dcssblk)[0-9]" and
 "^(td|hd|vd)[a-z]". This causes virtual devices, non-disk devices, and some other
 device types to be excluded from multipath handling by default.
 .RE
@@ -1297,6 +1341,12 @@ section:
 .TP
 .B deferred_remove
 .TP
+.B san_path_err_threshold
+.TP
+.B san_path_err_forget_rate
+.TP
+.B san_path_err_recovery_time
+.TP
 .B marginal_path_err_sample_time
 .TP
 .B marginal_path_err_rate_threshold
@@ -1391,6 +1441,10 @@ Active/Standby mode exclusively.
 .I 1 alua
 (Hardware-dependent)
 Hardware handler for SCSI-3 ALUA compatible arrays.
+.TP
+.I 1 ana
+(Hardware-dependent)
+Hardware handler for NVMe ANA compatible arrays.
 .PP
 The default is: \fB<unset>\fR
 .PP
@@ -1448,6 +1502,12 @@ section:
 .TP
 .B deferred_remove
 .TP
+.B san_path_err_threshold
+.TP
+.B san_path_err_forget_rate
+.TP
+.B san_path_err_recovery_time
+.TP
 .B marginal_path_err_sample_time
 .TP
 .B marginal_path_err_rate_threshold
@@ -1524,6 +1584,12 @@ the values are taken from the \fIdevices\fR or \fIdefaults\fR sections:
 .TP
 .B deferred_remove
 .TP
+.B san_path_err_threshold
+.TP
+.B san_path_err_forget_rate
+.TP
+.B san_path_err_recovery_time
+.TP
 .B marginal_path_err_sample_time
 .TP
 .B marginal_path_err_rate_threshold
@@ -1578,6 +1644,69 @@ are present multipath will try to use the sysfs attribute
 .
 .
 .\" ----------------------------------------------------------------------------
+.SH "Shaky paths detection"
+.\" ----------------------------------------------------------------------------
+.
+A common problem in SAN setups is the occurence of intermittent errors: a
+path is unreachable, then reachable again for a short time, disappears again,
+and so forth. This happens typically on unstable interconnects. It is
+undesirable to switch pathgroups unnecessarily on such frequent, unreliable
+events. \fImultipathd\fR supports three different methods for detecting this
+situation and dealing with it. All methods share the same basic mode of
+operation: If a path is found to be \(dqshaky\(dq or \(dqflipping\(dq,
+and appears to be in healthy status, it is not reinstated (put back to use)
+immediately. Instead, it is watched for some time, and only reinstated
+if the healthy state appears to be stable. The logic of determining
+\(dqshaky\(dq condition, as well as the logic when to reinstate,
+differs between the three methods.
+.TP 8
+.B \(dqdelay_checks\(dq failure tracking
+If a path fails again within a
+\fIdelay_watch_checks\fR interval after a failure, don't
+reinstate it until it passes a \fIdelay_wait_checks\fR interval
+in always good status.
+The intervals are measured in \(dqticks\(dq, i.e. the
+time between path checks by multipathd, which is variable and controlled by the
+\fIpolling_interval\fR and \fImax_polling_interval\fR parameters.
+.TP
+.B \(dqmarginal_path\(dq failure tracking
+If a second failure event (good->bad transition) occurs within
+\fImarginal_path_double_failed_time\fR seconds after a failure, high-frequency
+monitoring is started for the affected path: I/O is sent at a rate of 10 per
+second. This is done for \fImarginal_path_err_sample_time\fR seconds. During
+this period, the path is not reinstated. If the
+rate of errors remains below \fImarginal_path_err_rate_threshold\fR during the
+monitoring period, the path is reinstated. Otherwise, it
+is kept in failed state for \fImarginal_path_err_recheck_gap_time\fR, and
+after that, it is monitored again. For this method, time intervals are measured
+in seconds.
+.TP
+.B \(dqsan_path_err\(dq failure tracking
+multipathd counts path failures for each path. Once the number of failures
+exceeds the value given by \fIsan_path_err_threshold\fR, the path is not
+reinstated for \fIsan_path_err_recovery_time\fR ticks. While counting
+failures, multipathd \(dqforgets\(dq one past failure every
+\(dqsan_path_err_forget_rate\(dq ticks; thus if errors don't occur more
+often then once in the forget rate interval, the failure count doesn't
+increase and the threshold is never reached. As for the \fIdelay_xy\fR method,
+intervals are measured in \(dqticks\(dq.
+.
+.RS 8
+.LP
+This method is \fBdeprecated\fR in favor of the \(dqmarginal_path\(dq failure
+tracking method, and only offered for backward compatibility.
+.
+.RE
+.LP
+See the documentation
+of the individual options above for details.
+It is \fBstrongly discouraged\fR to use more than one of these methods for any
+given multipath map, because the two concurrent methods may interact in
+unpredictable ways. If the \(dqmarginal_path\(dq method is active, the
+\(dqsan_path_err\(dq parameters are implicitly set to 0.
+.
+.
+.\" ----------------------------------------------------------------------------
 .SH "KNOWN ISSUES"
 .\" ----------------------------------------------------------------------------
 .
index a75afe3f4e56452e2cb5aecbba46d5c6a70ffc31..ca176a9964210d295d1b554c2e74d38f3d9d0130 100644 (file)
@@ -13,7 +13,9 @@
 #include "version.h"
 #include <readline/readline.h>
 
+#include "mpath_cmd.h"
 #include "cli.h"
+#include "debug.h"
 
 static vector keys;
 static vector handlers;
index 7cc7e4be53dbfee92e51eaf9f330318d554234d1..f3fa077aab1ba490cb0b733c872f3add815e2207 100644 (file)
@@ -96,6 +96,12 @@ enum {
        do {                                                    \
                if ((a)) {                                      \
                        char *tmp = (r);                        \
+                                                               \
+                       if (m >= MAX_REPLY_LEN) {               \
+                               condlog(1, "Warning: max reply length exceeded"); \
+                               free(tmp);                      \
+                               r = NULL;                       \
+                       }                                       \
                        (r) = REALLOC((r), (m) * 2);            \
                        if ((r)) {                              \
                                memset((r) + (m), 0, (m));      \
index a0d57a539306c1e790376a4874e6158291895db4..f95813e572f8b329478be79a463d84d173ec4c9b 100644 (file)
@@ -26,6 +26,7 @@
 #include "prkey.h"
 #include "propsel.h"
 #include "main.h"
+#include "mpath_cmd.h"
 #include "cli.h"
 #include "uevent.h"
 #include "foreign.h"
@@ -346,6 +347,8 @@ cli_list_path (void * v, char ** reply, int * len, void * data)
        condlog(3, "%s: list path (operator)", param);
 
        pp = find_path_by_dev(vecs->pathvec, param);
+       if (!pp)
+               return 1;
 
        return show_path(reply, len, vecs, pp, "%o");
 }
@@ -803,7 +806,8 @@ cli_add_map (void * v, char ** reply, int * len, void * data)
                                    vecs->pathvec, &refwwid);
                        if (refwwid) {
                                if (coalesce_paths(vecs, NULL, refwwid,
-                                                  FORCE_RELOAD_NONE, CMD_NONE))
+                                                  FORCE_RELOAD_NONE, CMD_NONE)
+                                   != CP_OK)
                                        condlog(2, "%s: coalesce_paths failed",
                                                                        param);
                                dm_lib_release();
@@ -892,7 +896,7 @@ int resize_map(struct multipath *mpp, unsigned long long size,
        }
        mpp->action = ACT_RESIZE;
        mpp->force_udev_reload = 1;
-       if (domap(mpp, params, 1) <= 0) {
+       if (domap(mpp, params, 1) == DOMAP_FAIL) {
                condlog(0, "%s: failed to resize map : %s", mpp->alias,
                        strerror(errno));
                mpp->size = orig_size;
index 31e64a7e9488ba341e8ca919570f1f3eb5b08dc4..003489269517f31f75053133dfffa22f4b72d598 100644 (file)
@@ -168,7 +168,9 @@ static int dm_get_events(void)
        while (names->dev) {
                uint32_t event_nr;
 
-               if (!dm_is_mpath(names->name))
+               /* Don't delete device if dm_is_mpath() fails without
+                * checking the device type */
+               if (dm_is_mpath(names->name) == 0)
                        goto next;
 
                event_nr = dm_event_nr(names);
@@ -204,7 +206,9 @@ int watch_dmevents(char *name)
        struct dev_event *dev_evt, *old_dev_evt;
        int i;
 
-       if (!dm_is_mpath(name)) {
+       /* We know that this is a multipath device, so only fail if
+        * device-mapper tells us that we're wrong */
+       if (dm_is_mpath(name) == 0) {
                condlog(0, "%s: not a multipath device. can't watch events",
                        name);
                return -1;
index cc555bb7ca28c9d3844b236a59d81db5c1a30dd9..fb520b64669c564b35fefd95c5fb7e4ff4d52fed 100644 (file)
@@ -92,7 +92,8 @@ static int use_watchdog;
 
 #define LOG_MSG(lvl, verb, pp)                                 \
 do {                                                           \
-       if (lvl <= verb) {                                      \
+       if (pp->mpp && checker_selected(&pp->checker) &&        \
+           lvl <= verb) {                                      \
                if (pp->offline)                                \
                        condlog(lvl, "%s: %s - path offline",   \
                                pp->mpp->alias, pp->dev);       \
@@ -242,10 +243,11 @@ int set_config_state(enum daemon_status state)
                else if (running_state != DAEMON_IDLE) {
                        struct timespec ts;
 
-                       clock_gettime(CLOCK_MONOTONIC, &ts);
-                       ts.tv_sec += 1;
-                       rc = pthread_cond_timedwait(&config_cond,
-                                                   &config_lock, &ts);
+                       if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0) {
+                               ts.tv_sec += 1;
+                               rc = pthread_cond_timedwait(&config_cond,
+                                                           &config_lock, &ts);
+                       }
                }
                if (!rc) {
                        running_state = state;
@@ -334,6 +336,7 @@ remove_map_and_stop_waiter(struct multipath *mpp, struct vectors *vecs)
 {
        /* devices are automatically removed by the dmevent polling code,
         * so they don't need to be manually removed here */
+       condlog(3, "%s: removing map from internal tables", mpp->alias);
        if (!poll_dmevents)
                stop_waiter_thread(mpp, vecs);
        remove_map(mpp, vecs, PURGE_VEC);
@@ -491,13 +494,12 @@ retry:
        verify_paths(mpp, vecs);
        mpp->action = ACT_RELOAD;
 
-       extract_hwe_from_path(mpp);
        if (setup_map(mpp, params, PARAMS_SIZE, vecs)) {
                condlog(0, "%s: failed to setup new map in update", mpp->alias);
                retries = -1;
                goto fail;
        }
-       if (domap(mpp, params, 1) <= 0 && retries-- > 0) {
+       if (domap(mpp, params, 1) == DOMAP_FAIL && retries-- > 0) {
                condlog(0, "%s: map_udate sleep", mpp->alias);
                sleep(1);
                goto retry;
@@ -654,7 +656,7 @@ flush_map(struct multipath * mpp, struct vectors * vecs, int nopaths)
                condlog(2, "%s: map flushed", mpp->alias);
        }
 
-       orphan_paths(vecs->pathvec, mpp);
+       orphan_paths(vecs->pathvec, mpp, "map flushed");
        remove_map_and_stop_waiter(mpp, vecs);
 
        return 0;
@@ -700,7 +702,7 @@ ev_add_map (char * dev, const char * alias, struct vectors * vecs)
        int delayed_reconfig, reassign_maps;
        struct config *conf;
 
-       if (!dm_is_mpath(alias)) {
+       if (dm_is_mpath(alias) != 1) {
                condlog(4, "%s: not a multipath map", alias);
                return 0;
        }
@@ -786,7 +788,6 @@ uev_remove_map (struct uevent * uev, struct vectors * vecs)
                goto out;
        }
 
-       orphan_paths(vecs->pathvec, mpp);
        remove_map_and_stop_waiter(mpp, vecs);
 out:
        lock_cleanup_pop(vecs->lock);
@@ -925,6 +926,14 @@ ev_add_path (struct path * pp, struct vectors * vecs, int need_do_map)
                goto fail; /* leave path added to pathvec */
        }
        mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
+       if (mpp && pp->size && mpp->size != pp->size) {
+               condlog(0, "%s: failed to add new path %s, device size mismatch", mpp->alias, pp->dev);
+               int i = find_slot(vecs->pathvec, (void *)pp);
+               if (i != -1)
+                       vector_del_slot(vecs->pathvec, i);
+               free_path(pp);
+               return 1;
+       }
        if (mpp && mpp->wait_for_udev &&
            (pathcount(mpp, PATH_UP) > 0 ||
             (pathcount(mpp, PATH_GHOST) > 0 && pp->tpgs != TPGS_IMPLICIT &&
@@ -940,17 +949,6 @@ ev_add_path (struct path * pp, struct vectors * vecs, int need_do_map)
        pp->mpp = mpp;
 rescan:
        if (mpp) {
-               if (pp->size && mpp->size != pp->size) {
-                       condlog(0, "%s: failed to add new path %s, "
-                               "device size mismatch",
-                               mpp->alias, pp->dev);
-                       int i = find_slot(vecs->pathvec, (void *)pp);
-                       if (i != -1)
-                               vector_del_slot(vecs->pathvec, i);
-                       free_path(pp);
-                       return 1;
-               }
-
                condlog(4,"%s: adopting all paths for path %s",
                        mpp->alias, pp->dev);
                if (adopt_paths(vecs->pathvec, mpp))
@@ -958,7 +956,6 @@ rescan:
 
                verify_paths(mpp, vecs);
                mpp->action = ACT_RELOAD;
-               extract_hwe_from_path(mpp);
        } else {
                if (!should_multipath(pp, vecs->pathvec, vecs->mpvec)) {
                        orphan_path(pp, "only one path");
@@ -998,15 +995,14 @@ rescan:
        /*
         * reload the map for the multipath mapped device
         */
-retry:
        ret = domap(mpp, params, 1);
-       if (ret <= 0) {
-               if (ret < 0 && retries-- > 0) {
-                       condlog(0, "%s: retry domap for addition of new "
-                               "path %s", mpp->alias, pp->dev);
-                       sleep(1);
-                       goto retry;
-               }
+       while (ret == DOMAP_RETRY && retries-- > 0) {
+               condlog(0, "%s: retry domap for addition of new "
+                       "path %s", mpp->alias, pp->dev);
+               sleep(1);
+               ret = domap(mpp, params, 1);
+       }
+       if (ret == DOMAP_FAIL || ret == DOMAP_RETRY) {
                condlog(0, "%s: failed in domap for addition of new "
                        "path %s", mpp->alias, pp->dev);
                /*
@@ -1157,7 +1153,7 @@ ev_remove_path (struct path *pp, struct vectors * vecs, int need_do_map)
                 * reload the map
                 */
                mpp->action = ACT_RELOAD;
-               if (domap(mpp, params, 1) <= 0) {
+               if (domap(mpp, params, 1) == DOMAP_FAIL) {
                        condlog(0, "%s: failed in domap for "
                                "removal of path %s",
                                mpp->alias, pp->dev);
@@ -1839,6 +1835,94 @@ int update_path_groups(struct multipath *mpp, struct vectors *vecs, int refresh)
        return 0;
 }
 
+static int check_path_reinstate_state(struct path * pp) {
+       struct timespec curr_time;
+
+       /*
+        * This function is only called when the path state changes
+        * from "bad" to "good". pp->state reflects the *previous* state.
+        * If this was "bad", we know that a failure must have occured
+        * beforehand, and count that.
+        * Note that we count path state _changes_ this way. If a path
+        * remains in "bad" state, failure count is not increased.
+        */
+
+       if (!((pp->mpp->san_path_err_threshold > 0) &&
+                               (pp->mpp->san_path_err_forget_rate > 0) &&
+                               (pp->mpp->san_path_err_recovery_time >0))) {
+               return 0;
+       }
+
+       if (pp->disable_reinstate) {
+               /* If we don't know how much time has passed, automatically
+                * reinstate the path, just to be safe. Also, if there are
+                * no other usable paths, reinstate the path
+                */
+               if (clock_gettime(CLOCK_MONOTONIC, &curr_time) != 0 ||
+                               pp->mpp->nr_active == 0) {
+                       condlog(2, "%s : reinstating path early", pp->dev);
+                       goto reinstate_path;
+               }
+               if ((curr_time.tv_sec - pp->dis_reinstate_time ) > pp->mpp->san_path_err_recovery_time) {
+                       condlog(2,"%s : reinstate the path after err recovery time", pp->dev);
+                       goto reinstate_path;
+               }
+               return 1;
+       }
+       /* forget errors on a working path */
+       if ((pp->state == PATH_UP || pp->state == PATH_GHOST) &&
+                       pp->path_failures > 0) {
+               if (pp->san_path_err_forget_rate > 0){
+                       pp->san_path_err_forget_rate--;
+               } else {
+                       /* for every san_path_err_forget_rate number of
+                        * successful path checks decrement path_failures by 1
+                        */
+                       pp->path_failures--;
+                       pp->san_path_err_forget_rate = pp->mpp->san_path_err_forget_rate;
+               }
+               return 0;
+       }
+
+       /* If the path isn't recovering from a failed state, do nothing */
+       if (pp->state != PATH_DOWN && pp->state != PATH_SHAKY &&
+                       pp->state != PATH_TIMEOUT)
+               return 0;
+
+       if (pp->path_failures == 0)
+               pp->san_path_err_forget_rate = pp->mpp->san_path_err_forget_rate;
+
+       pp->path_failures++;
+
+       /* if we don't know the currently time, we don't know how long to
+        * delay the path, so there's no point in checking if we should
+        */
+
+       if (clock_gettime(CLOCK_MONOTONIC, &curr_time) != 0)
+               return 0;
+       /* when path failures has exceeded the san_path_err_threshold
+        * place the path in delayed state till san_path_err_recovery_time
+        * so that the cutomer can rectify the issue within this time. After
+        * the completion of san_path_err_recovery_time it should
+        * automatically reinstate the path
+        */
+       if (pp->path_failures > pp->mpp->san_path_err_threshold) {
+               condlog(2, "%s : hit error threshold. Delaying path reinstatement", pp->dev);
+               pp->dis_reinstate_time = curr_time.tv_sec;
+               pp->disable_reinstate = 1;
+
+               return 1;
+       } else {
+               return 0;
+       }
+
+reinstate_path:
+       pp->path_failures = 0;
+       pp->disable_reinstate = 0;
+       pp->san_path_err_forget_rate = 0;
+       return 0;
+}
+
 /*
  * Returns '1' if the path has been checked, '-1' if it was blacklisted
  * and '0' otherwise
@@ -1909,6 +1993,16 @@ check_path (struct vectors * vecs, struct path * pp, int ticks)
        pp->tick = checkint;
 
        newstate = path_offline(pp);
+       if (newstate == PATH_UP) {
+               conf = get_multipath_config();
+               pthread_cleanup_push(put_multipath_config, conf);
+               newstate = get_state(pp, conf, 1, newstate);
+               pthread_cleanup_pop(1);
+       } else {
+               checker_clear_message(&pp->checker);
+               condlog(3, "%s: state %s, checker not called",
+                       pp->dev, checker_state_name(newstate));
+       }
        /*
         * Wait for uevent for removed paths;
         * some LLDDs like zfcp keep paths unavailable
@@ -1917,14 +2011,6 @@ check_path (struct vectors * vecs, struct path * pp, int ticks)
        if (newstate == PATH_REMOVED)
                newstate = PATH_DOWN;
 
-       if (newstate == PATH_UP) {
-               conf = get_multipath_config();
-               pthread_cleanup_push(put_multipath_config, conf);
-               newstate = get_state(pp, conf, 1, newstate);
-               pthread_cleanup_pop(1);
-       } else
-               checker_clear_message(&pp->checker);
-
        if (pp->wwid_changed) {
                condlog(2, "%s: path wwid has changed. Refusing to use",
                        pp->dev);
@@ -1932,7 +2018,8 @@ check_path (struct vectors * vecs, struct path * pp, int ticks)
        }
 
        if (newstate == PATH_WILD || newstate == PATH_UNCHECKED) {
-               condlog(2, "%s: unusable path - checker failed", pp->dev);
+               condlog(2, "%s: unusable path (%s) - checker failed",
+                       pp->dev, checker_state_name(newstate));
                LOG_MSG(2, verbosity, pp);
                conf = get_multipath_config();
                pthread_cleanup_push(put_multipath_config, conf);
@@ -1941,7 +2028,9 @@ check_path (struct vectors * vecs, struct path * pp, int ticks)
                return 1;
        }
        if (!pp->mpp) {
-               if (!strlen(pp->wwid) && pp->initialized == INIT_FAILED &&
+               if (!strlen(pp->wwid) &&
+                   (pp->initialized == INIT_FAILED ||
+                    pp->initialized == INIT_NEW) &&
                    (newstate == PATH_UP || newstate == PATH_GHOST)) {
                        condlog(2, "%s: add missing path", pp->dev);
                        conf = get_multipath_config();
@@ -1984,6 +2073,12 @@ check_path (struct vectors * vecs, struct path * pp, int ticks)
        if (!pp->mpp)
                return 0;
 
+       if ((newstate == PATH_UP || newstate == PATH_GHOST) &&
+                       check_path_reinstate_state(pp)) {
+               pp->state = PATH_DELAYED;
+               return 1;
+       }
+
        if (pp->io_err_disable_reinstate && hit_io_err_recheck_time(pp)) {
                pp->state = PATH_SHAKY;
                /*
@@ -2181,6 +2276,7 @@ checkerloop (void *ap)
        unsigned int i;
        struct timespec last_time;
        struct config *conf;
+       int foreign_tick = 0;
 
        pthread_cleanup_push(rcu_unregister, NULL);
        rcu_register_thread();
@@ -2264,7 +2360,7 @@ checkerloop (void *ap)
                        if (num_paths) {
                                unsigned int max_checkint;
 
-                               condlog(3, "checked %d path%s in %lu.%06lu secs",
+                               condlog(4, "checked %d path%s in %lu.%06lu secs",
                                        num_paths, num_paths > 1 ? "s" : "",
                                        diff_time.tv_sec,
                                        diff_time.tv_nsec / 1000);
@@ -2278,7 +2374,15 @@ checkerloop (void *ap)
                                                diff_time.tv_sec);
                        }
                }
-               check_foreign();
+
+               if (foreign_tick == 0) {
+                       conf = get_multipath_config();
+                       foreign_tick = conf->max_checkint;
+                       put_multipath_config(conf);
+               }
+               if (--foreign_tick == 0)
+                       check_foreign();
+
                post_config_state(DAEMON_IDLE);
                conf = get_multipath_config();
                strict_timing = conf->strict_timing;
@@ -2369,7 +2473,7 @@ configure (struct vectors * vecs)
        ret = coalesce_paths(vecs, mpvec, NULL, force_reload, CMD_NONE);
        if (force_reload == FORCE_RELOAD_WEAK)
                force_reload = FORCE_RELOAD_YES;
-       if (ret) {
+       if (ret != CP_OK) {
                condlog(0, "configure failed while coalescing paths");
                goto fail;
        }
index b37b5027a39e707a71dff4c8bbe8074e9667e431..ef900866f6fbc62464e67f51c19caee6679bf42f 100644 (file)
@@ -26,7 +26,11 @@ hwtable-test_LIBDEPS := -ludev -lpthread -ldl
 blacklist-test_OBJDEPS := ../libmultipath/blacklist.o
 blacklist-test_LIBDEPS := -ludev
 
-%.out: %-test
+lib/libchecktur.so:
+       mkdir lib
+       ln -t lib ../libmultipath/{checkers,prioritizers,foreign}/*.so
+
+%.out: %-test lib/libchecktur.so
        @echo == running $< ==
        @LD_LIBRARY_PATH=$(multipathdir):$(mpathcmddir) ./$< >$@
 
@@ -34,6 +38,7 @@ OBJS = $(TESTS:%=%.o) test-lib.o
 
 clean: dep_clean
        $(RM) $(TESTS:%=%-test) $(TESTS:%=%.out) $(OBJS)
+       $(RM) -rf lib
 
 .SECONDARY: $(OBJS)
 
index a55c1c079eb0a747d335d920ffa49440ac8404c4..54d568f5a8eddfe4ca07a044c259aed472cbe05f 100644 (file)
@@ -267,7 +267,8 @@ static void test_property_blacklist(void **state)
        static struct udev_device udev = { "sdb", { "ID_FOO", "ID_WWN", "ID_BAR", NULL } };
        conf.blist_property = blist_property_wwn;
        expect_condlog(3, "sdb: udev property ID_WWN blacklisted\n");
-       assert_int_equal(filter_property(&conf, &udev), MATCH_PROPERTY_BLIST);
+       assert_int_equal(filter_property(&conf, &udev, 3),
+                        MATCH_PROPERTY_BLIST);
 }
 
 /* the property check works different in that you check all the property
@@ -280,7 +281,7 @@ static void test_property_whitelist(void **state)
        static struct udev_device udev = { "sdb", { "ID_FOO", "ID_WWN", "ID_BAR", NULL } };
        conf.elist_property = blist_property_wwn;
        expect_condlog(3, "sdb: udev property ID_WWN whitelisted\n");
-       assert_int_equal(filter_property(&conf, &udev),
+       assert_int_equal(filter_property(&conf, &udev, 3),
                         MATCH_PROPERTY_BLIST_EXCEPT);
 }
 
@@ -289,7 +290,7 @@ static void test_property_missing(void **state)
        static struct udev_device udev = { "sdb", { "ID_FOO", "ID_BAZ", "ID_BAR", NULL } };
        conf.blist_property = blist_property_wwn;
        expect_condlog(3, "sdb: blacklisted, udev property missing\n");
-       assert_int_equal(filter_property(&conf, &udev),
+       assert_int_equal(filter_property(&conf, &udev, 3),
                         MATCH_PROPERTY_BLIST_MISSING);
 }
 
index 9146ecc3721c6ad42c9c0ee4e1745b488b55025b..ad863b0899ed46065745b4124979df916a4cecda 100644 (file)
@@ -24,8 +24,8 @@
 #include "pgpolicies.h"
 #include "test-lib.h"
 #include "print.h"
+#include "util.h"
 
-#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
 #define N_CONF_FILES 2
 
 static const char tmplate[] = "/tmp/hwtable-XXXXXX";
@@ -250,14 +250,19 @@ static void write_defaults(const struct hwt_state *hwt)
        static struct key_value defaults[] = {
                { "config_dir", NULL },
                { "bindings_file", NULL },
+               { "multipath_dir", NULL },
                { "detect_prio", "no" },
                { "detect_checker", "no" },
        };
        char buf[sizeof(tmplate) + sizeof(bindings_name)];
+       char dirbuf[PATH_MAX];
 
        snprintf(buf, sizeof(buf), "%s/%s", hwt->tmpname, bindings_name);
        defaults[0].value = hwt->dirname;
        defaults[1].value = buf;
+       assert_ptr_not_equal(getcwd(dirbuf, sizeof(dirbuf)), NULL);
+       strncat(dirbuf, "/lib", sizeof(dirbuf));
+       defaults[2].value = dirbuf;
        write_section(hwt->config_file, "defaults",
                      ARRAY_SIZE(defaults), defaults);
 }
@@ -565,7 +570,7 @@ static void test_internal_nvme(const struct hwt_state *hwt)
        pp = mock_path("NVME", "NoName");
        mp = mock_multipath(pp);
        assert_ptr_not_equal(mp, NULL);
-       TEST_PROP(pp->checker.name, NONE);
+       TEST_PROP(checker_name(&pp->checker), NONE);
        TEST_PROP(pp->uid_attribute, "ID_WWN");
        assert_int_equal(mp->pgpolicy, DEFAULT_PGPOLICY);
        assert_int_equal(mp->no_path_retry, DEFAULT_NO_PATH_RETRY);
@@ -578,7 +583,7 @@ static void test_internal_nvme(const struct hwt_state *hwt)
                            default_wwid_1);
        mp = mock_multipath(pp);
        assert_ptr_not_equal(mp, NULL);
-       TEST_PROP(pp->checker.name, NONE);
+       TEST_PROP(checker_name(&pp->checker), NONE);
        TEST_PROP(pp->uid_attribute, "ID_WWN");
        assert_int_equal(mp->pgpolicy, MULTIBUS);
        assert_int_equal(mp->no_path_retry, NO_PATH_RETRY_QUEUE);
@@ -750,31 +755,31 @@ static void test_regex_string_hwe(const struct hwt_state *hwt)
        pp = mock_path(vnd_foo.value, prd_baz.value);
        TEST_PROP(prio_name(&pp->prio), prio_emc.value);
        TEST_PROP(pp->getuid, NULL);
-       TEST_PROP(pp->checker.name, chk_hp.value);
+       TEST_PROP(checker_name(&pp->checker), chk_hp.value);
 
        /* boo:baz matches kv1 */
        pp = mock_path(vnd_boo.value, prd_baz.value);
        TEST_PROP(prio_name(&pp->prio), prio_emc.value);
        TEST_PROP(pp->getuid, NULL);
-       TEST_PROP(pp->checker.name, chk_hp.value);
+       TEST_PROP(checker_name(&pp->checker), chk_hp.value);
 
        /* .oo:ba. matches kv1 */
        pp = mock_path(vnd__oo.value, prd_ba_.value);
        TEST_PROP(prio_name(&pp->prio), prio_emc.value);
        TEST_PROP(pp->getuid, NULL);
-       TEST_PROP(pp->checker.name, chk_hp.value);
+       TEST_PROP(checker_name(&pp->checker), chk_hp.value);
 
        /* .foo:(bar|baz|ba\.) doesn't match */
        pp = mock_path(vnd__oo.value, prd_ba_s.value);
        TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO);
        TEST_PROP(pp->getuid, NULL);
-       TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+       TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
 
        /* foo:bar matches kv2 and kv1 */
        pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID);
        TEST_PROP(prio_name(&pp->prio), prio_hds.value);
        TEST_PROP(pp->getuid, gui_foo.value);
-       TEST_PROP(pp->checker.name, chk_hp.value);
+       TEST_PROP(checker_name(&pp->checker), chk_hp.value);
 }
 
 static int setup_regex_string_hwe(void **state)
@@ -807,32 +812,32 @@ static void test_regex_string_hwe_dir(const struct hwt_state *hwt)
        pp = mock_path(vnd_foo.value, prd_baz.value);
        TEST_PROP(prio_name(&pp->prio), prio_emc.value);
        TEST_PROP(pp->getuid, NULL);
-       TEST_PROP(pp->checker.name, chk_hp.value);
+       TEST_PROP(checker_name(&pp->checker), chk_hp.value);
 
        /* boo:baz matches kv1 */
        pp = mock_path(vnd_boo.value, prd_baz.value);
        TEST_PROP(prio_name(&pp->prio), prio_emc.value);
        TEST_PROP(pp->getuid, NULL);
-       TEST_PROP(pp->checker.name, chk_hp.value);
+       TEST_PROP(checker_name(&pp->checker), chk_hp.value);
 
        /* .oo:ba. matches kv1 */
        pp = mock_path(vnd__oo.value, prd_ba_.value);
        TEST_PROP(prio_name(&pp->prio), prio_emc.value);
        TEST_PROP(pp->getuid, NULL);
-       TEST_PROP(pp->checker.name, chk_hp.value);
+       TEST_PROP(checker_name(&pp->checker), chk_hp.value);
 
        /* .oo:(bar|baz|ba\.)$ doesn't match */
        pp = mock_path(vnd__oo.value, prd_ba_s.value);
        TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO);
        TEST_PROP(pp->getuid, NULL);
-       TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+       TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
 
        /* foo:bar matches kv2 */
        pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID);
        /* Later match takes prio */
        TEST_PROP(prio_name(&pp->prio), prio_hds.value);
        TEST_PROP(pp->getuid, gui_foo.value);
-       TEST_PROP(pp->checker.name, chk_hp.value);
+       TEST_PROP(checker_name(&pp->checker), chk_hp.value);
 }
 
 static int setup_regex_string_hwe_dir(void **state)
@@ -863,28 +868,28 @@ static void test_regex_2_strings_hwe_dir(const struct hwt_state *hwt)
        TEST_PROP(prio_name(&pp->prio), prio_emc.value);
        TEST_PROP(pp->getuid, NULL);
        TEST_PROP(pp->uid_attribute, DEFAULT_UID_ATTRIBUTE);
-       TEST_PROP(pp->checker.name, chk_hp.value);
+       TEST_PROP(checker_name(&pp->checker), chk_hp.value);
 
        /* boo:baz doesn't match */
        pp = mock_path(vnd_boo.value, prd_baz.value);
        TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO);
        TEST_PROP(pp->getuid, NULL);
        TEST_PROP(pp->uid_attribute, DEFAULT_UID_ATTRIBUTE);
-       TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+       TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
 
        /* foo:bar matches kv2 and kv1 */
        pp = mock_path(vnd_foo.value, prd_bar.value);
        TEST_PROP(prio_name(&pp->prio), prio_hds.value);
        TEST_PROP(pp->getuid, NULL);
        TEST_PROP(pp->uid_attribute, uid_baz.value);
-       TEST_PROP(pp->checker.name, chk_hp.value);
+       TEST_PROP(checker_name(&pp->checker), chk_hp.value);
 
        /* foo:barz matches kv3 and kv2 and kv1 */
        pp = mock_path_flags(vnd_foo.value, prd_barz.value, USE_GETUID);
        TEST_PROP(prio_name(&pp->prio), prio_rdac.value);
        TEST_PROP(pp->getuid, gui_foo.value);
        TEST_PROP(pp->uid_attribute, NULL);
-       TEST_PROP(pp->checker.name, chk_hp.value);
+       TEST_PROP(checker_name(&pp->checker), chk_hp.value);
 }
 
 static int setup_regex_2_strings_hwe_dir(void **state)
@@ -921,31 +926,31 @@ static void test_string_regex_hwe_dir(const struct hwt_state *hwt)
        pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID);
        TEST_PROP(prio_name(&pp->prio), prio_emc.value);
        TEST_PROP(pp->getuid, gui_foo.value);
-       TEST_PROP(pp->checker.name, chk_hp.value);
+       TEST_PROP(checker_name(&pp->checker), chk_hp.value);
 
        /* foo:baz matches kv1 */
        pp = mock_path(vnd_foo.value, prd_baz.value);
        TEST_PROP(prio_name(&pp->prio), prio_emc.value);
        TEST_PROP(pp->getuid, NULL);
-       TEST_PROP(pp->checker.name, chk_hp.value);
+       TEST_PROP(checker_name(&pp->checker), chk_hp.value);
 
        /* boo:baz matches kv1 */
        pp = mock_path(vnd_boo.value, prd_baz.value);
        TEST_PROP(prio_name(&pp->prio), prio_emc.value);
        TEST_PROP(pp->getuid, NULL);
-       TEST_PROP(pp->checker.name, chk_hp.value);
+       TEST_PROP(checker_name(&pp->checker), chk_hp.value);
 
        /* .oo:ba. matches kv1 */
        pp = mock_path(vnd__oo.value, prd_ba_.value);
        TEST_PROP(prio_name(&pp->prio), prio_emc.value);
        TEST_PROP(pp->getuid, NULL);
-       TEST_PROP(pp->checker.name, chk_hp.value);
+       TEST_PROP(checker_name(&pp->checker), chk_hp.value);
 
        /* .oo:(bar|baz|ba\.)$ doesn't match */
        pp = mock_path(vnd__oo.value, prd_ba_s.value);
        TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO);
        TEST_PROP(pp->getuid, NULL);
-       TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+       TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
 }
 
 static int setup_string_regex_hwe_dir(void **state)
@@ -975,13 +980,13 @@ static void test_2_ident_strings_hwe(const struct hwt_state *hwt)
        pp = mock_path(vnd_foo.value, prd_baz.value);
        TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO);
        TEST_PROP(pp->getuid, NULL);
-       TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+       TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
 
        /* foo:bar matches both */
        pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID);
        TEST_PROP(prio_name(&pp->prio), prio_hds.value);
        TEST_PROP(pp->getuid, gui_foo.value);
-       TEST_PROP(pp->checker.name, chk_hp.value);
+       TEST_PROP(checker_name(&pp->checker), chk_hp.value);
 }
 
 static int setup_2_ident_strings_hwe(void **state)
@@ -1010,13 +1015,13 @@ static void test_2_ident_strings_both_dir(const struct hwt_state *hwt)
        pp = mock_path(vnd_foo.value, prd_baz.value);
        TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO);
        TEST_PROP(pp->getuid, NULL);
-       TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+       TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
 
        /* foo:bar matches both */
        pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID);
        TEST_PROP(prio_name(&pp->prio), prio_hds.value);
        TEST_PROP(pp->getuid, gui_foo.value);
-       TEST_PROP(pp->checker.name, chk_hp.value);
+       TEST_PROP(checker_name(&pp->checker), chk_hp.value);
 }
 
 static int setup_2_ident_strings_both_dir(void **state)
@@ -1050,13 +1055,13 @@ static void test_2_ident_strings_both_dir_w_prev(const struct hwt_state *hwt)
        pp = mock_path(vnd_foo.value, prd_baz.value);
        TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO);
        TEST_PROP(pp->getuid, NULL);
-       TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+       TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
 
        /* foo:bar matches both */
        pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID);
        TEST_PROP(prio_name(&pp->prio), prio_hds.value);
        TEST_PROP(pp->getuid, gui_foo.value);
-       TEST_PROP(pp->checker.name, chk_hp.value);
+       TEST_PROP(checker_name(&pp->checker), chk_hp.value);
 }
 
 static int setup_2_ident_strings_both_dir_w_prev(void **state)
@@ -1095,13 +1100,13 @@ static void test_2_ident_strings_hwe_dir(const struct hwt_state *hwt)
        pp = mock_path(vnd_foo.value, prd_baz.value);
        TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO);
        TEST_PROP(pp->getuid, NULL);
-       TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+       TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
 
        /* foo:bar matches both */
        pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID);
        TEST_PROP(prio_name(&pp->prio), prio_hds.value);
        TEST_PROP(pp->getuid, gui_foo.value);
-       TEST_PROP(pp->checker.name, chk_hp.value);
+       TEST_PROP(checker_name(&pp->checker), chk_hp.value);
 }
 
 static int setup_2_ident_strings_hwe_dir(void **state)
@@ -1129,13 +1134,13 @@ static void test_3_ident_strings_hwe_dir(const struct hwt_state *hwt)
        pp = mock_path(vnd_foo.value, prd_baz.value);
        TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO);
        TEST_PROP(pp->getuid, NULL);
-       TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+       TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
 
        /* foo:bar matches both */
        pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID);
        TEST_PROP(prio_name(&pp->prio), prio_hds.value);
        TEST_PROP(pp->getuid, gui_foo.value);
-       TEST_PROP(pp->checker.name, chk_hp.value);
+       TEST_PROP(checker_name(&pp->checker), chk_hp.value);
 }
 
 static int setup_3_ident_strings_hwe_dir(void **state)
@@ -1173,13 +1178,13 @@ static void test_2_ident_self_matching_re_hwe_dir(const struct hwt_state *hwt)
        pp = mock_path(vnd_foo.value, prd_baz.value);
        TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO);
        TEST_PROP(pp->getuid, NULL);
-       TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+       TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
 
        /* foo:bar matches both */
        pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID);
        TEST_PROP(prio_name(&pp->prio), prio_hds.value);
        TEST_PROP(pp->getuid, gui_foo.value);
-       TEST_PROP(pp->checker.name, chk_hp.value);
+       TEST_PROP(checker_name(&pp->checker), chk_hp.value);
 }
 
 static int setup_2_ident_self_matching_re_hwe_dir(void **state)
@@ -1208,13 +1213,13 @@ static void test_2_ident_self_matching_re_hwe(const struct hwt_state *hwt)
        pp = mock_path(vnd_foo.value, prd_baz.value);
        TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO);
        TEST_PROP(pp->getuid, NULL);
-       TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+       TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
 
        /* foo:bar matches */
        pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID);
        TEST_PROP(prio_name(&pp->prio), prio_hds.value);
        TEST_PROP(pp->getuid, gui_foo.value);
-       TEST_PROP(pp->checker.name, chk_hp.value);
+       TEST_PROP(checker_name(&pp->checker), chk_hp.value);
 }
 
 static int setup_2_ident_self_matching_re_hwe(void **state)
@@ -1245,13 +1250,13 @@ test_2_ident_not_self_matching_re_hwe_dir(const struct hwt_state *hwt)
        pp = mock_path(vnd_foo.value, prd_baz.value);
        TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO);
        TEST_PROP(pp->getuid, NULL);
-       TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+       TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
 
        /* foo:bar matches both */
        pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID);
        TEST_PROP(prio_name(&pp->prio), prio_hds.value);
        TEST_PROP(pp->getuid, gui_foo.value);
-       TEST_PROP(pp->checker.name, chk_hp.value);
+       TEST_PROP(checker_name(&pp->checker), chk_hp.value);
 }
 
 static int setup_2_ident_not_self_matching_re_hwe_dir(void **state)
@@ -1282,19 +1287,19 @@ static void test_2_matching_res_hwe_dir(const struct hwt_state *hwt)
        pp = mock_path(vnd_foo.value, prd_bar.value);
        TEST_PROP(prio_name(&pp->prio), prio_emc.value);
        TEST_PROP(pp->getuid, NULL);
-       TEST_PROP(pp->checker.name, chk_hp.value);
+       TEST_PROP(checker_name(&pp->checker), chk_hp.value);
 
        /* foo:bay matches k1 and k2 */
        pp = mock_path_flags(vnd_foo.value, "bay", USE_GETUID);
        TEST_PROP(prio_name(&pp->prio), prio_hds.value);
        TEST_PROP(pp->getuid, gui_foo.value);
-       TEST_PROP(pp->checker.name, chk_hp.value);
+       TEST_PROP(checker_name(&pp->checker), chk_hp.value);
 
        /* foo:baz matches k2 only. */
        pp = mock_path_flags(vnd_foo.value, prd_baz.value, USE_GETUID);
        TEST_PROP(prio_name(&pp->prio), prio_hds.value);
        TEST_PROP(pp->getuid, gui_foo.value);
-       TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+       TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
 }
 
 static int setup_2_matching_res_hwe_dir(void **state)
@@ -1323,12 +1328,12 @@ static void test_2_nonmatching_res_hwe_dir(const struct hwt_state *hwt)
        pp = mock_path(vnd_foo.value, prd_bar.value);
        TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO);
        TEST_PROP(pp->getuid, NULL);
-       TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+       TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
 
        pp = mock_path_flags(vnd_foo.value, prd_baz.value, USE_GETUID);
        TEST_PROP(prio_name(&pp->prio), prio_hds.value);
        TEST_PROP(pp->getuid, gui_foo.value);
-       TEST_PROP(pp->checker.name, chk_hp.value);
+       TEST_PROP(checker_name(&pp->checker), chk_hp.value);
 }
 
 static int setup_2_nonmatching_res_hwe_dir(void **state)
index 839effd2a3130a002d84e19b7b4643179cd18382..e6d4b9ab0c37783436b0e80d9e844e87e4e78c63 100644 (file)
@@ -26,6 +26,8 @@
 
 #include "globals.c"
 
+#define BITARR_SZ 4
+
 static void test_basenamecpy_good0(void **state)
 {
        char dst[10];
@@ -139,6 +141,100 @@ static void test_basenamecpy_bad5(void **state)
         assert_int_equal(basenamecpy("baz/qux", NULL, sizeof(dst)), 0);
 }
 
+static void test_bitmask_1(void **state)
+{
+       uint64_t arr[BITARR_SZ];
+       int i, j, k, m, b;
+
+       memset(arr, 0, sizeof(arr));
+
+       for (j = 0; j < BITARR_SZ; j++) {
+               for (i = 0; i < 64; i++) {
+                       b = 64 * j + i;
+                       assert(!is_bit_set_in_array(b, arr));
+                       set_bit_in_array(b, arr);
+                       for (k = 0; k < BITARR_SZ; k++) {
+                               printf("b = %d j = %d k = %d a = %"PRIx64"\n",
+                                      b, j, k, arr[k]);
+                               if (k == j)
+                                       assert_int_equal(arr[j], 1ULL << i);
+                               else
+                                       assert_int_equal(arr[k], 0ULL);
+                       }
+                       for (m = 0; m < 64; m++)
+                               if (i == m)
+                                       assert(is_bit_set_in_array(64 * j + m,
+                                                                  arr));
+                               else
+                                       assert(!is_bit_set_in_array(64 * j + m,
+                                                                   arr));
+                       clear_bit_in_array(b, arr);
+                       assert(!is_bit_set_in_array(b, arr));
+                       for (k = 0; k < BITARR_SZ; k++)
+                               assert_int_equal(arr[k], 0ULL);
+               }
+       }
+}
+
+static void test_bitmask_2(void **state)
+{
+       uint64_t arr[BITARR_SZ];
+       int i, j, k, m, b;
+
+       memset(arr, 0, sizeof(arr));
+
+       for (j = 0; j < BITARR_SZ; j++) {
+               for (i = 0; i < 64; i++) {
+                       b = 64 * j + i;
+                       assert(!is_bit_set_in_array(b, arr));
+                       set_bit_in_array(b, arr);
+                       for (m = 0; m < 64; m++)
+                               if (m <= i)
+                                       assert(is_bit_set_in_array(64 * j + m,
+                                                                  arr));
+                               else
+                                       assert(!is_bit_set_in_array(64 * j + m,
+                                                                   arr));
+                       assert(is_bit_set_in_array(b, arr));
+                       for (k = 0; k < BITARR_SZ; k++) {
+                               if (k < j || (k == j && i == 63))
+                                       assert_int_equal(arr[k], ~0ULL);
+                               else if (k > j)
+                                       assert_int_equal(arr[k], 0ULL);
+                               else
+                                       assert_int_equal(
+                                               arr[k],
+                                               (1ULL << (i + 1)) - 1);
+                       }
+               }
+       }
+       for (j = 0; j < BITARR_SZ; j++) {
+               for (i = 0; i < 64; i++) {
+                       b = 64 * j + i;
+                       assert(is_bit_set_in_array(b, arr));
+                       clear_bit_in_array(b, arr);
+                       for (m = 0; m < 64; m++)
+                               if (m <= i)
+                                       assert(!is_bit_set_in_array(64 * j + m,
+                                                                   arr));
+                               else
+                                       assert(is_bit_set_in_array(64 * j + m,
+                                                                  arr));
+                       assert(!is_bit_set_in_array(b, arr));
+                       for (k = 0; k < BITARR_SZ; k++) {
+                               if (k < j || (k == j && i == 63))
+                                       assert_int_equal(arr[k], 0ULL);
+                               else if (k > j)
+                                       assert_int_equal(arr[k], ~0ULL);
+                               else
+                                       assert_int_equal(
+                                               arr[k],
+                                               ~((1ULL << (i + 1)) - 1));
+                       }
+               }
+       }
+}
+
 int test_basenamecpy(void)
 {
        const struct CMUnitTest tests[] = {
@@ -156,6 +252,8 @@ int test_basenamecpy(void)
                cmocka_unit_test(test_basenamecpy_bad3),
                cmocka_unit_test(test_basenamecpy_bad4),
                cmocka_unit_test(test_basenamecpy_bad5),
+               cmocka_unit_test(test_bitmask_1),
+               cmocka_unit_test(test_bitmask_2),
        };
        return cmocka_run_group_tests(tests, NULL, NULL);
 }