libdmmp/test/libdmmp_speed_test
tests/*-test
tests/*.out
-
+libmultipath/nvme-ioctl.c
+libmultipath/nvme-ioctl.h
# WITH_LOCAL_LIBDM = 1
# WITH_LOCAL_LIBSYSFS = 1
#
-# Uncomment to disable RADOS support (e.g. if rados headers are missing).
-# ENABLE_RADOS = 0
-#
# Uncomment to disable libdmmp support
# ENABLE_LIBDMMP = 0
#
mpathcmddir = $(TOPDIR)/libmpathcmd
thirdpartydir = $(TOPDIR)/third-party
libdmmpdir = $(TOPDIR)/libdmmp
+nvmedir = $(TOPDIR)/libmultipath/nvme
includedir = $(prefix)/usr/include
pkgconfdir = $(usrlibdir)/pkgconfig
/* Not a DASD */
return -1;
} else {
- fd_dasd = fd;
+ fd_dasd = dup(fd);
}
if (ioctl(fd_dasd, BIODASDINFO, (unsigned long)&info) != 0) {
memcpy (&vlabel, data, sizeof(vlabel));
else {
bzero(&vlabel,4);
- memcpy (&vlabel.vollbl, data, sizeof(vlabel) - 4);
+ memcpy ((char *)&vlabel + 4, data, sizeof(vlabel) - 4);
}
vtoc_ebcdic_dec(vlabel.vollbl, type, 4);
out:
if (data != NULL)
free(data);
- if (fd_dasd != -1 && fd_dasd != fd)
- close(fd_dasd);
+ close(fd_dasd);
return retval;
}
errno = EIO;
return -1;
}
+ if (len <= 0 || len >= MAX_REPLY_LEN) {
+ errno = ERANGE;
+ return -1;
+ }
return len;
}
#ifndef LIB_MPATH_CMD_H
#define LIB_MPATH_CMD_H
+/*
+ * This should be sufficient for json output for >10000 maps,
+ * and >60000 paths.
+ */
+#define MAX_REPLY_LEN (32 * 1024 * 1024)
+
#ifdef __cplusplus
extern "C" {
#endif
condlog(3, "alias = %s", alias);
map_present = dm_map_present(alias);
- if (map_present && !dm_is_mpath(alias)){
+ if (map_present && dm_is_mpath(alias) != 1){
condlog( 0, "%s: not a multipath device.", alias);
ret = MPATH_PR_DMMP_ERROR;
goto out;
condlog(3, "alias = %s", alias);
map_present = dm_map_present(alias);
- if (map_present && !dm_is_mpath(alias)){
+ if (map_present && dm_is_mpath(alias) != 1){
condlog(3, "%s: not a multipath device.", alias);
ret = MPATH_PR_DMMP_ERROR;
goto out;
if (!get_be64(mpp->reservation_key))
{
/* Nothing to do. Assuming pr mgmt feature is disabled*/
- condlog(3, "%s: reservation_key not set in multipath.conf", mpp->alias);
+ condlog(4, "%s: reservation_key not set in multipath.conf",
+ mpp->alias);
return MPATH_PR_SUCCESS;
}
unsigned char *p;
char *ppbuff;
uint32_t additional_length;
-
+ char tempbuff[MPATH_MAX_PARAM_LEN];
+ struct prin_fulldescr fdesc;
convert_be32_to_cpu(&pr_buff->prin_descriptor.prin_readfd.prgeneration);
convert_be32_to_cpu(&pr_buff->prin_descriptor.prin_readfd.number_of_descriptor);
}
additional_length = pr_buff->prin_descriptor.prin_readfd.number_of_descriptor;
+ if (additional_length > MPATH_MAX_PARAM_LEN) {
+ condlog(3, "PRIN length %u exceeds max length %d", additional_length,
+ MPATH_MAX_PARAM_LEN);
+ return;
+ }
- char tempbuff[MPATH_MAX_PARAM_LEN];
- struct prin_fulldescr fdesc;
memset(&fdesc, 0, sizeof(struct prin_fulldescr));
memcpy( tempbuff, pr_buff->prin_descriptor.prin_readfd.private_buffer,MPATH_MAX_PARAM_LEN );
fdesc.rtpi = get_unaligned_be16(&p[18]);
tid_len_len = get_unaligned_be32(&p[20]);
- if (tid_len_len + 24 + k >= additional_length) {
+ if (tid_len_len + 24 + k > additional_length) {
condlog(0,
"%s: corrupt PRIN response: status descriptor end %d exceeds length %d",
__func__, tid_len_len + k + 24,
DEVLIB = libmultipath.so
LIBS = $(DEVLIB).$(SONAME)
-CFLAGS += $(LIB_CFLAGS) -I$(mpathcmddir) -I$(mpathpersistdir)
+CFLAGS += $(LIB_CFLAGS) -I$(mpathcmddir) -I$(mpathpersistdir) -I$(nvmedir)
LIBDEPS += -lpthread -ldl -ldevmapper -ludev -L$(mpathcmddir) -lmpathcmd -lurcu -laio
switchgroup.o uxsock.o print.o alias.o log_pthread.o \
log.o configure.o structs_vec.o sysfs.o prio.o checkers.o \
lock.o file.o wwids.o prioritizers/alua_rtpg.o prkey.o \
- io_err_stat.o dm-generic.o generic.o foreign.o
+ io_err_stat.o dm-generic.o generic.o foreign.o nvme-lib.o
all: $(LIBS)
+nvme-lib.o: nvme-lib.c nvme-ioctl.c nvme-ioctl.h
+ $(CC) $(CFLAGS) -Wno-unused-function -c -o $@ $<
+
+make_static = $(shell sed '/^static/!s/^\([a-z]\{1,\} \)/static \1/' <$1 >$2)
+
+nvme-ioctl.c: nvme/nvme-ioctl.c
+ $(call make_static,$<,$@)
+
+nvme-ioctl.h: nvme/nvme-ioctl.h
+ $(call make_static,$<,$@)
+
$(LIBS): $(OBJS)
$(CC) $(LDFLAGS) $(SHARED_FLAGS) -Wl,-soname=$@ -o $@ $(OBJS) $(LIBDEPS)
$(LN) $@ $(DEVLIB)
$(RM) $(DESTDIR)$(syslibdir)/$(DEVLIB)
clean: dep_clean
- $(RM) core *.a *.o *.so *.so.* *.gz
+ $(RM) core *.a *.o *.so *.so.* *.gz nvme-ioctl.c nvme-ioctl.h
include $(wildcard $(OBJS:.o=.d))
char * str;
int i;
- str = STRDUP("^(ram|raw|loop|fd|md|dm-|sr|scd|st|dcssblk)[0-9]");
+ str = STRDUP("^(ram|zram|raw|loop|fd|md|dm-|sr|scd|st|dcssblk)[0-9]");
if (!str)
return 1;
if (store_ble(conf->blist_devnode, str, ORIGIN_DEFAULT))
return 0;
}
-#define LOG_BLIST(M,S) \
+#define LOG_BLIST(M, S, lvl) \
if (vendor && product) \
- condlog(3, "%s: (%s:%s) %s %s", \
+ condlog(lvl, "%s: (%s:%s) %s %s", \
dev, vendor, product, (M), (S)); \
else if (wwid && !dev) \
- condlog(3, "%s: %s %s", wwid, (M), (S)); \
+ condlog(lvl, "%s: %s %s", wwid, (M), (S)); \
else if (wwid) \
- condlog(3, "%s: %s %s %s", dev, (M), wwid, (S)); \
+ condlog(lvl, "%s: %s %s %s", dev, (M), wwid, (S)); \
else if (env) \
- condlog(3, "%s: %s %s %s", dev, (M), env, (S)); \
+ condlog(lvl, "%s: %s %s %s", dev, (M), env, (S)); \
else if (protocol) \
- condlog(3, "%s: %s %s %s", dev, (M), protocol, (S)); \
+ condlog(lvl, "%s: %s %s %s", dev, (M), protocol, (S)); \
else \
- condlog(3, "%s: %s %s", dev, (M), (S))
+ condlog(lvl, "%s: %s %s", dev, (M), (S))
-void
+static void
log_filter (const char *dev, char *vendor, char *product, char *wwid,
- const char *env, const char *protocol, int r)
+ const char *env, const char *protocol, int r, int lvl)
{
/*
* Try to sort from most likely to least.
case MATCH_NOTHING:
break;
case MATCH_DEVICE_BLIST:
- LOG_BLIST("vendor/product", "blacklisted");
+ LOG_BLIST("vendor/product", "blacklisted", lvl);
break;
case MATCH_WWID_BLIST:
- LOG_BLIST("wwid", "blacklisted");
+ LOG_BLIST("wwid", "blacklisted", lvl);
break;
case MATCH_DEVNODE_BLIST:
- LOG_BLIST("device node name", "blacklisted");
+ LOG_BLIST("device node name", "blacklisted", lvl);
break;
case MATCH_PROPERTY_BLIST:
- LOG_BLIST("udev property", "blacklisted");
+ LOG_BLIST("udev property", "blacklisted", lvl);
break;
case MATCH_PROTOCOL_BLIST:
- LOG_BLIST("protocol", "blacklisted");
+ LOG_BLIST("protocol", "blacklisted", lvl);
break;
case MATCH_DEVICE_BLIST_EXCEPT:
- LOG_BLIST("vendor/product", "whitelisted");
+ LOG_BLIST("vendor/product", "whitelisted", lvl);
break;
case MATCH_WWID_BLIST_EXCEPT:
- LOG_BLIST("wwid", "whitelisted");
+ LOG_BLIST("wwid", "whitelisted", lvl);
break;
case MATCH_DEVNODE_BLIST_EXCEPT:
- LOG_BLIST("device node name", "whitelisted");
+ LOG_BLIST("device node name", "whitelisted", lvl);
break;
case MATCH_PROPERTY_BLIST_EXCEPT:
- LOG_BLIST("udev property", "whitelisted");
+ LOG_BLIST("udev property", "whitelisted", lvl);
break;
case MATCH_PROPERTY_BLIST_MISSING:
- LOG_BLIST("blacklisted,", "udev property missing");
+ LOG_BLIST("blacklisted,", "udev property missing", lvl);
break;
case MATCH_PROTOCOL_BLIST_EXCEPT:
- LOG_BLIST("protocol", "whitelisted");
+ LOG_BLIST("protocol", "whitelisted", lvl);
break;
}
}
r = MATCH_DEVICE_BLIST;
}
- log_filter(dev, vendor, product, NULL, NULL, NULL, r);
+ log_filter(dev, vendor, product, NULL, NULL, NULL, r, 3);
return r;
}
r = MATCH_DEVNODE_BLIST;
}
- log_filter(dev, NULL, NULL, NULL, NULL, NULL, r);
+ log_filter(dev, NULL, NULL, NULL, NULL, NULL, r, 3);
return r;
}
r = MATCH_WWID_BLIST;
}
- log_filter(dev, NULL, NULL, wwid, NULL, NULL, r);
+ log_filter(dev, NULL, NULL, wwid, NULL, NULL, r, 3);
return r;
}
r = MATCH_PROTOCOL_BLIST;
}
- log_filter(pp->dev, NULL, NULL, NULL, NULL, buf, r);
+ log_filter(pp->dev, NULL, NULL, NULL, NULL, buf, r, 3);
return r;
}
{
int r;
- r = filter_property(conf, pp->udev);
+ r = filter_property(conf, pp->udev, 3);
if (r > 0)
return r;
r = filter_devnode(conf->blist_devnode, conf->elist_devnode, pp->dev);
}
int
-filter_property(struct config * conf, struct udev_device * udev)
+filter_property(struct config *conf, struct udev_device *udev, int lvl)
{
const char *devname = udev_device_get_sysname(udev);
struct udev_list_entry *list_entry;
}
}
- log_filter(devname, NULL, NULL, NULL, env, NULL, r);
+ log_filter(devname, NULL, NULL, NULL, env, NULL, r, lvl);
return r;
}
int filter_wwid (vector, vector, char *, char *);
int filter_device (vector, vector, char *, char *, char *);
int filter_path (struct config *, struct path *);
-int filter_property(struct config *, struct udev_device *);
+int filter_property(struct config *, struct udev_device *, int);
int filter_protocol(vector, vector, struct path *);
int store_ble (vector, char *, int);
int set_ble_device (vector, char *, char *, int);
switch(pid) {
case 0:
/* child */
- close(STDOUT_FILENO);
/* dup write side of pipe to STDOUT */
- if (dup(fds[1]) < 0)
+ if (dup2(fds[1], STDOUT_FILENO) < 0) {
+ condlog(1, "failed to dup2 stdout: %m");
return -1;
+ }
+ close(fds[0]);
+ close(fds[1]);
/* Ignore writes to stderr */
null_fd = open("/dev/null", O_WRONLY);
if (null_fd > 0) {
- int err_fd __attribute__ ((unused));
-
- close(STDERR_FILENO);
- err_fd = dup(null_fd);
+ if (dup2(null_fd, STDERR_FILENO) < 0)
+ condlog(1, "failed to dup2 stderr: %m");
close(null_fd);
}
return r;
}
-int checker_selected(const struct checker *c)
-{
- if (!c)
- return 0;
- return c->cls != NULL;
-}
-
const char *checker_name(const struct checker *c)
{
if (!c || !c->cls)
{
int id;
- if (!c || c->msgid < 0 ||
+ if (!c || !c->cls || c->msgid < 0 ||
(c->msgid >= CHECKER_GENERIC_MSGTABLE_SIZE &&
c->msgid < CHECKER_FIRST_MSGID))
goto bad_id;
you want to stuff data in. */
};
+static inline int checker_selected(const struct checker *c)
+{
+ return c != NULL && c->cls != NULL;
+}
+
const char *checker_state_name(int);
int init_checkers(const char *);
void cleanup_checkers (void);
void checker_enable (struct checker *);
void checker_disable (struct checker *);
int checker_check (struct checker *, int);
-int checker_selected(const struct checker *);
int checker_is_sync(const struct checker *);
const char *checker_name (const struct checker *);
/*
tur_thread_cleanup_push(ct);
rcu_register_thread();
- condlog(3, "%d:%d : tur checker starting up", major(ct->devt),
+ condlog(4, "%d:%d : tur checker starting up", major(ct->devt),
minor(ct->devt));
tur_deep_sleep(ct);
pthread_cond_signal(&ct->active);
pthread_mutex_unlock(&ct->lock);
- condlog(3, "%d:%d : tur checker finished, state %s", major(ct->devt),
+ condlog(4, "%d:%d : tur checker finished, state %s", major(ct->devt),
minor(ct->devt), checker_state_name(state));
running = uatomic_xchg(&ct->running, 0);
}
pthread_mutex_unlock(&ct->lock);
if (tur_status == PATH_PENDING) {
- condlog(3, "%d:%d : tur checker still running",
+ condlog(4, "%d:%d : tur checker still running",
major(ct->devt), minor(ct->devt));
} else {
int running = uatomic_xchg(&ct->running, 0);
merge_num(max_sectors_kb);
merge_num(ghost_delay);
merge_num(all_tg_pt);
+ merge_num(san_path_err_threshold);
+ merge_num(san_path_err_forget_rate);
+ merge_num(san_path_err_recovery_time);
snprintf(id, sizeof(id), "%s/%s", dst->vendor, dst->product);
reconcile_features_with_options(id, &dst->features,
int deferred_remove;
int delay_watch_checks;
int delay_wait_checks;
+ int san_path_err_threshold;
+ int san_path_err_forget_rate;
+ int san_path_err_recovery_time;
int marginal_path_err_sample_time;
int marginal_path_err_rate_threshold;
int marginal_path_err_recheck_gap_time;
int deferred_remove;
int delay_watch_checks;
int delay_wait_checks;
+ int san_path_err_threshold;
+ int san_path_err_forget_rate;
+ int san_path_err_recovery_time;
int marginal_path_err_sample_time;
int marginal_path_err_rate_threshold;
int marginal_path_err_recheck_gap_time;
int processed_main_config;
int delay_watch_checks;
int delay_wait_checks;
+ int san_path_err_threshold;
+ int san_path_err_forget_rate;
+ int san_path_err_recovery_time;
int marginal_path_err_sample_time;
int marginal_path_err_rate_threshold;
int marginal_path_err_recheck_gap_time;
#include "sysfs.h"
#include "io_err_stat.h"
+/* Time in ms to wait for pending checkers in setup_map() */
+#define WAIT_CHECKERS_PENDING_MS 10
+#define WAIT_ALL_CHECKERS_PENDING_MS 90
+
/* group paths in pg by host adapter
*/
int group_by_host_adapter(struct pathgroup *pgp, vector adapters)
return 0;
}
+static int wait_for_pending_paths(struct multipath *mpp,
+ struct config *conf,
+ int n_pending, int goal, int wait_ms)
+{
+ static const struct timespec millisec =
+ { .tv_sec = 0, .tv_nsec = 1000*1000 };
+ int i, j;
+ struct path *pp;
+ struct pathgroup *pgp;
+ struct timespec ts;
+
+ do {
+ vector_foreach_slot(mpp->pg, pgp, i) {
+ vector_foreach_slot(pgp->paths, pp, j) {
+ if (pp->state != PATH_PENDING)
+ continue;
+ pp->state = get_state(pp, conf,
+ 0, PATH_PENDING);
+ if (pp->state != PATH_PENDING &&
+ --n_pending <= goal)
+ return 0;
+ }
+ }
+ ts = millisec;
+ while (nanosleep(&ts, &ts) != 0 && errno == EINTR)
+ /* nothing */;
+ } while (--wait_ms > 0);
+
+ return n_pending;
+}
+
int setup_map(struct multipath *mpp, char *params, int params_size,
struct vectors *vecs)
{
struct pathgroup * pgp;
struct config *conf;
- int i;
+ int i, n_paths;
/*
* don't bother if devmap size is unknown
select_marginal_path_err_rate_threshold(conf, mpp);
select_marginal_path_err_recheck_gap_time(conf, mpp);
select_marginal_path_double_failed_time(conf, mpp);
+ select_san_path_err_threshold(conf, mpp);
+ select_san_path_err_forget_rate(conf, mpp);
+ select_san_path_err_recovery_time(conf, mpp);
select_skip_kpartx(conf, mpp);
select_max_sectors_kb(conf, mpp);
select_ghost_delay(conf, mpp);
sysfs_set_scsi_tmo(mpp, conf->checkint);
pthread_cleanup_pop(1);
- if (mpp->marginal_path_double_failed_time > 0 &&
- mpp->marginal_path_err_sample_time > 0 &&
- mpp->marginal_path_err_recheck_gap_time > 0 &&
- mpp->marginal_path_err_rate_threshold >= 0)
+ if (marginal_path_check_enabled(mpp)) {
+ if (delay_check_enabled(mpp)) {
+ condlog(1, "%s: WARNING: both marginal_path and delay_checks error detection selected",
+ mpp->alias);
+ condlog(0, "%s: unexpected behavior may occur!",
+ mpp->alias);
+ }
start_io_err_stat_thread(vecs);
- /*
+ }
+ if (san_path_check_enabled(mpp) && delay_check_enabled(mpp)) {
+ condlog(1, "%s: WARNING: both san_path_err and delay_checks error detection selected",
+ mpp->alias);
+ condlog(0, "%s: unexpected behavior may occur!",
+ mpp->alias);
+ }
+
+ n_paths = VECTOR_SIZE(mpp->paths);
+ /*
* assign paths to path groups -- start with no groups and all paths
* in mpp->paths
*/
if (mpp->pgpolicyfn && mpp->pgpolicyfn(mpp))
return 1;
+ /*
+ * If async state detection is used, see if pending state checks
+ * have finished, to get nr_active right. We can't wait until the
+ * checkers time out, as that may take 30s or more, and we are
+ * holding the vecs lock.
+ */
+ if (conf->force_sync == 0 && n_paths > 0) {
+ int n_pending = pathcount(mpp, PATH_PENDING);
+
+ if (n_pending > 0)
+ n_pending = wait_for_pending_paths(
+ mpp, conf, n_pending, 0,
+ WAIT_CHECKERS_PENDING_MS);
+ /* ALL paths pending - wait some more, but be satisfied
+ with only some paths finished */
+ if (n_pending == n_paths)
+ n_pending = wait_for_pending_paths(
+ mpp, conf, n_pending,
+ n_paths >= 4 ? 2 : 1,
+ WAIT_ALL_CHECKERS_PENDING_MS);
+ if (n_pending > 0)
+ condlog(2, "%s: setting up map with %d/%d path checkers pending",
+ mpp->alias, n_pending, n_paths);
+ }
mpp->nr_active = pathcount(mpp, PATH_UP) + pathcount(mpp, PATH_GHOST);
/*
return 1;
}
-/*
- * Return value:
- */
-#define DOMAP_RETRY -1
-#define DOMAP_FAIL 0
-#define DOMAP_OK 1
-#define DOMAP_EXIST 2
-#define DOMAP_DRY 3
-
int domap(struct multipath *mpp, char *params, int is_daemon)
{
int r = DOMAP_FAIL;
if (recv_packet(fd, &reply, timeout) != 0)
goto out;
- if (strstr(reply, "shutdown"))
+ if (reply && strstr(reply, "shutdown"))
goto out_free;
ret = 1;
int coalesce_paths (struct vectors * vecs, vector newmp, char * refwwid,
int force_reload, enum mpath_cmds cmd)
{
- int r = 1;
- int k, i;
+ int ret = CP_FAIL;
+ int k, i, r;
int is_daemon = (cmd == CMD_NONE) ? 1 : 0;
char params[PARAMS_SIZE];
struct multipath * mpp;
vector pathvec = vecs->pathvec;
struct config *conf;
int allow_queueing;
+ uint64_t *size_mismatch_seen;
/* ignore refwwid if it's empty */
if (refwwid && !strlen(refwwid))
pp1->mpp = NULL;
}
}
+
+ if (VECTOR_SIZE(pathvec) == 0)
+ return CP_OK;
+ size_mismatch_seen = calloc((VECTOR_SIZE(pathvec) - 1) / 64 + 1,
+ sizeof(uint64_t));
+ if (size_mismatch_seen == NULL)
+ return CP_FAIL;
+
vector_foreach_slot (pathvec, pp1, k) {
int invalid;
/* skip this path for some reason */
continue;
}
- /* 2. if path already coalesced */
- if (pp1->mpp)
+ /* 2. if path already coalesced, or seen and discarded */
+ if (pp1->mpp || is_bit_set_in_array(k, size_mismatch_seen))
continue;
/* 3. if path has disappeared */
* ouch, avoid feeding that to the DM
*/
condlog(0, "%s: size %llu, expected %llu. "
- "Discard", pp2->dev_t, pp2->size,
+ "Discard", pp2->dev, pp2->size,
mpp->size);
mpp->action = ACT_REJECT;
+ set_bit_in_array(i, size_mismatch_seen);
}
}
verify_paths(mpp, vecs);
"ignoring" : "removing");
remove_map(mpp, vecs, 0);
continue;
- } else /* if (r == DOMAP_RETRY) */
- return r;
+ } else /* if (r == DOMAP_RETRY && !is_daemon) */ {
+ ret = CP_RETRY;
+ goto out;
+ }
}
if (r == DOMAP_DRY)
continue;
if (newmp) {
if (mpp->action != ACT_REJECT) {
if (!vector_alloc_slot(newmp))
- return 1;
+ goto out;
vector_set_slot(newmp, mpp);
}
else
condlog(2, "%s: remove (dead)", alias);
}
}
- return 0;
+ ret = CP_OK;
+out:
+ free(size_mismatch_seen);
+ return ret;
}
struct udev_device *get_udev_device(const char *dev, enum devtypes dev_type)
conf = get_multipath_config();
pthread_cleanup_push(put_multipath_config, conf);
if (pp->udev && pp->uid_attribute &&
- filter_property(conf, pp->udev) > 0)
+ filter_property(conf, pp->udev, 3) > 0)
invalid = 1;
pthread_cleanup_pop(1);
if (invalid)
conf = get_multipath_config();
pthread_cleanup_push(put_multipath_config, conf);
if (pp->udev && pp->uid_attribute &&
- filter_property(conf, pp->udev) > 0)
+ filter_property(conf, pp->udev, 3) > 0)
invalid = 1;
pthread_cleanup_pop(1);
if (invalid)
conf = get_multipath_config();
pthread_cleanup_push(put_multipath_config, conf);
if (pp->udev && pp->uid_attribute &&
- filter_property(conf, pp->udev) > 0)
+ filter_property(conf, pp->udev, 3) > 0)
invalid = 1;
pthread_cleanup_pop(1);
if (invalid)
ACT_IMPOSSIBLE,
};
+/*
+ * Return value of domap()
+ * DAEMON_RETRY is only used for ACT_CREATE (see domap()).
+ */
+enum {
+ DOMAP_RETRY = -1,
+ DOMAP_FAIL = 0,
+ DOMAP_OK = 1,
+ DOMAP_EXIST = 2,
+ DOMAP_DRY = 3
+};
+
+/*
+ * Return value of coalesce_paths()
+ * CP_RETRY is only used in non-daemon case (multipath).
+ */
+enum {
+ CP_OK = 0,
+ CP_FAIL,
+ CP_RETRY,
+};
+
#define FLUSH_ONE 1
#define FLUSH_ALL 2
return r;
}
+/*
+ * returns:
+ * 1 : is multipath device
+ * 0 : is not multipath device
+ * -1 : error
+ */
int dm_is_mpath(const char *name)
{
- int r = 0;
+ int r = -1;
struct dm_task *dmt;
struct dm_info info;
uint64_t start, length;
const char *uuid;
if (!(dmt = libmp_dm_task_create(DM_DEVICE_TABLE)))
- return 0;
+ goto out;
if (!dm_task_set_name(dmt, name))
- goto out;
+ goto out_task;
dm_task_no_open_count(dmt);
if (!dm_task_run(dmt))
- goto out;
+ goto out_task;
- if (!dm_task_get_info(dmt, &info) || !info.exists)
- goto out;
+ if (!dm_task_get_info(dmt, &info))
+ goto out_task;
+
+ r = 0;
+
+ if (!info.exists)
+ goto out_task;
uuid = dm_task_get_uuid(dmt);
if (!uuid || strncmp(uuid, UUID_PREFIX, UUID_PREFIX_LEN) != 0)
- goto out;
+ goto out_task;
/* Fetch 1st target */
- dm_get_next_target(dmt, NULL, &start, &length, &target_type, ¶ms);
+ if (dm_get_next_target(dmt, NULL, &start, &length, &target_type,
+ ¶ms) != NULL)
+ /* multiple targets */
+ goto out_task;
if (!target_type || strcmp(target_type, TGT_MPATH) != 0)
- goto out;
+ goto out_task;
r = 1;
-out:
+out_task:
dm_task_destroy(dmt);
+out:
+ if (r < 0)
+ condlog(2, "%s: dm command failed in %s", name, __FUNCTION__);
return r;
}
unsigned long long mapsize;
char params[PARAMS_SIZE] = {0};
- if (!dm_is_mpath(mapname))
+ if (dm_is_mpath(mapname) != 1)
return 0; /* nothing to do */
/* if the device currently has no partitions, do not
}
do {
- if (!dm_is_mpath(names->name))
+ if (dm_is_mpath(names->name) != 1)
goto next;
mpp = dm_get_multipath(names->name);
int i;
if (set_yes_no_undef(strvec, &conf->find_multipaths) == 0 &&
- conf->find_multipaths != YNU_UNDEF)
+ conf->find_multipaths != FIND_MULTIPATHS_UNDEF)
return 0;
buff = set_value(strvec);
declare_hw_snprint(delay_wait_checks, print_off_int_undef)
declare_mp_handler(delay_wait_checks, set_off_int_undef)
declare_mp_snprint(delay_wait_checks, print_off_int_undef)
+declare_def_handler(san_path_err_threshold, set_off_int_undef)
+declare_def_snprint_defint(san_path_err_threshold, print_off_int_undef,
+ DEFAULT_ERR_CHECKS)
+declare_ovr_handler(san_path_err_threshold, set_off_int_undef)
+declare_ovr_snprint(san_path_err_threshold, print_off_int_undef)
+declare_hw_handler(san_path_err_threshold, set_off_int_undef)
+declare_hw_snprint(san_path_err_threshold, print_off_int_undef)
+declare_mp_handler(san_path_err_threshold, set_off_int_undef)
+declare_mp_snprint(san_path_err_threshold, print_off_int_undef)
+declare_def_handler(san_path_err_forget_rate, set_off_int_undef)
+declare_def_snprint_defint(san_path_err_forget_rate, print_off_int_undef,
+ DEFAULT_ERR_CHECKS)
+declare_ovr_handler(san_path_err_forget_rate, set_off_int_undef)
+declare_ovr_snprint(san_path_err_forget_rate, print_off_int_undef)
+declare_hw_handler(san_path_err_forget_rate, set_off_int_undef)
+declare_hw_snprint(san_path_err_forget_rate, print_off_int_undef)
+declare_mp_handler(san_path_err_forget_rate, set_off_int_undef)
+declare_mp_snprint(san_path_err_forget_rate, print_off_int_undef)
+declare_def_handler(san_path_err_recovery_time, set_off_int_undef)
+declare_def_snprint_defint(san_path_err_recovery_time, print_off_int_undef,
+ DEFAULT_ERR_CHECKS)
+declare_ovr_handler(san_path_err_recovery_time, set_off_int_undef)
+declare_ovr_snprint(san_path_err_recovery_time, print_off_int_undef)
+declare_hw_handler(san_path_err_recovery_time, set_off_int_undef)
+declare_hw_snprint(san_path_err_recovery_time, print_off_int_undef)
+declare_mp_handler(san_path_err_recovery_time, set_off_int_undef)
+declare_mp_snprint(san_path_err_recovery_time, print_off_int_undef)
declare_def_handler(marginal_path_err_sample_time, set_off_int_undef)
declare_def_snprint_defint(marginal_path_err_sample_time, print_off_int_undef,
DEFAULT_ERR_CHECKS)
install_keyword("config_dir", &def_config_dir_handler, &snprint_def_config_dir);
install_keyword("delay_watch_checks", &def_delay_watch_checks_handler, &snprint_def_delay_watch_checks);
install_keyword("delay_wait_checks", &def_delay_wait_checks_handler, &snprint_def_delay_wait_checks);
+ install_keyword("san_path_err_threshold", &def_san_path_err_threshold_handler, &snprint_def_san_path_err_threshold);
+ install_keyword("san_path_err_forget_rate", &def_san_path_err_forget_rate_handler, &snprint_def_san_path_err_forget_rate);
+ install_keyword("san_path_err_recovery_time", &def_san_path_err_recovery_time_handler, &snprint_def_san_path_err_recovery_time);
install_keyword("marginal_path_err_sample_time", &def_marginal_path_err_sample_time_handler, &snprint_def_marginal_path_err_sample_time);
install_keyword("marginal_path_err_rate_threshold", &def_marginal_path_err_rate_threshold_handler, &snprint_def_marginal_path_err_rate_threshold);
install_keyword("marginal_path_err_recheck_gap_time", &def_marginal_path_err_recheck_gap_time_handler, &snprint_def_marginal_path_err_recheck_gap_time);
install_keyword("deferred_remove", &hw_deferred_remove_handler, &snprint_hw_deferred_remove);
install_keyword("delay_watch_checks", &hw_delay_watch_checks_handler, &snprint_hw_delay_watch_checks);
install_keyword("delay_wait_checks", &hw_delay_wait_checks_handler, &snprint_hw_delay_wait_checks);
+ install_keyword("san_path_err_threshold", &hw_san_path_err_threshold_handler, &snprint_hw_san_path_err_threshold);
+ install_keyword("san_path_err_forget_rate", &hw_san_path_err_forget_rate_handler, &snprint_hw_san_path_err_forget_rate);
+ install_keyword("san_path_err_recovery_time", &hw_san_path_err_recovery_time_handler, &snprint_hw_san_path_err_recovery_time);
install_keyword("marginal_path_err_sample_time", &hw_marginal_path_err_sample_time_handler, &snprint_hw_marginal_path_err_sample_time);
install_keyword("marginal_path_err_rate_threshold", &hw_marginal_path_err_rate_threshold_handler, &snprint_hw_marginal_path_err_rate_threshold);
install_keyword("marginal_path_err_recheck_gap_time", &hw_marginal_path_err_recheck_gap_time_handler, &snprint_hw_marginal_path_err_recheck_gap_time);
install_keyword("deferred_remove", &ovr_deferred_remove_handler, &snprint_ovr_deferred_remove);
install_keyword("delay_watch_checks", &ovr_delay_watch_checks_handler, &snprint_ovr_delay_watch_checks);
install_keyword("delay_wait_checks", &ovr_delay_wait_checks_handler, &snprint_ovr_delay_wait_checks);
+ install_keyword("san_path_err_threshold", &ovr_san_path_err_threshold_handler, &snprint_ovr_san_path_err_threshold);
+ install_keyword("san_path_err_forget_rate", &ovr_san_path_err_forget_rate_handler, &snprint_ovr_san_path_err_forget_rate);
+ install_keyword("san_path_err_recovery_time", &ovr_san_path_err_recovery_time_handler, &snprint_ovr_san_path_err_recovery_time);
install_keyword("marginal_path_err_sample_time", &ovr_marginal_path_err_sample_time_handler, &snprint_ovr_marginal_path_err_sample_time);
install_keyword("marginal_path_err_rate_threshold", &ovr_marginal_path_err_rate_threshold_handler, &snprint_ovr_marginal_path_err_rate_threshold);
install_keyword("marginal_path_err_recheck_gap_time", &ovr_marginal_path_err_recheck_gap_time_handler, &snprint_ovr_marginal_path_err_recheck_gap_time);
install_keyword("deferred_remove", &mp_deferred_remove_handler, &snprint_mp_deferred_remove);
install_keyword("delay_watch_checks", &mp_delay_watch_checks_handler, &snprint_mp_delay_watch_checks);
install_keyword("delay_wait_checks", &mp_delay_wait_checks_handler, &snprint_mp_delay_wait_checks);
+ install_keyword("san_path_err_threshold", &mp_san_path_err_threshold_handler, &snprint_mp_san_path_err_threshold);
+ install_keyword("san_path_err_forget_rate", &mp_san_path_err_forget_rate_handler, &snprint_mp_san_path_err_forget_rate);
+ install_keyword("san_path_err_recovery_time", &mp_san_path_err_recovery_time_handler, &snprint_mp_san_path_err_recovery_time);
install_keyword("marginal_path_err_sample_time", &mp_marginal_path_err_sample_time_handler, &snprint_mp_marginal_path_err_sample_time);
install_keyword("marginal_path_err_rate_threshold", &mp_marginal_path_err_rate_threshold_handler, &snprint_mp_marginal_path_err_rate_threshold);
install_keyword("marginal_path_err_recheck_gap_time", &mp_marginal_path_err_recheck_gap_time_handler, &snprint_mp_marginal_path_err_recheck_gap_time);
/* Parse error, ignore */
return 0;
}
- return preferred;
+ return !!preferred;
}
static void
int dev_loss_tmo = mpp->dev_loss;
if (mpp->no_path_retry > 0) {
- uint64_t no_path_retry_tmo = mpp->no_path_retry * checkint;
+ uint64_t no_path_retry_tmo = (uint64_t)mpp->no_path_retry * checkint;
if (no_path_retry_tmo > MAX_DEV_LOSS_TMO)
no_path_retry_tmo = MAX_DEV_LOSS_TMO;
memset(buff, 0x0, 4096);
if (sgio_get_vpd(buff, 4096, fd, pg) < 0) {
- condlog(3, "failed to issue vpd inquiry for pg%02x",
+ int lvl = pg == 0x80 || pg == 0x83 ? 3 : 4;
+
+ condlog(lvl, "failed to issue vpd inquiry for pg%02x",
pg);
return -errno;
}
devt = udev_device_get_devnum(pp->udev);
snprintf(pp->dev_t, BLK_DEV_SIZE, "%d:%d", major(devt), minor(devt));
- condlog(3, "%s: dev_t = %s", pp->dev, pp->dev_t);
+ condlog(4, "%s: dev_t = %s", pp->dev, pp->dev_t);
if (sysfs_get_size(pp, &pp->size))
return PATHINFO_FAILED;
}
- condlog(3, "%s: path state = %s", pp->dev, buff);
+ condlog(4, "%s: path state = %s", pp->dev, buff);
if (pp->bus == SYSFS_BUS_SCSI) {
if (!strncmp(buff, "offline", 7)) {
struct checker * c = &pp->checker;
int state;
- condlog(3, "%s: get_state", pp->dev);
-
if (!checker_selected(c)) {
if (daemon) {
if (pathinfo(pp, conf, DI_SYSFS) != PATHINFO_OK) {
struct prio * p;
struct config *conf;
int checker_timeout;
+ int old_prio;
if (!pp)
return 0;
conf = get_multipath_config();
checker_timeout = conf->checker_timeout;
put_multipath_config(conf);
+ old_prio = pp->priority;
pp->priority = prio_getprio(p, pp, checker_timeout);
if (pp->priority < 0) {
condlog(3, "%s: %s prio error", pp->dev, prio_name(p));
pp->priority = PRIO_UNDEF;
return 1;
}
- condlog(3, "%s: %s prio = %u",
+ condlog((old_prio == pp->priority ? 4 : 3), "%s: %s prio = %u",
pp->dev, prio_name(p), pp->priority);
return 0;
}
udev_device_get_sysattr_value(pp->udev, "hidden");
if (hidden && !strcmp(hidden, "1")) {
- condlog(3, "%s: hidden", pp->dev);
+ condlog(4, "%s: hidden", pp->dev);
return PATHINFO_SKIPPED;
}
if (is_claimed_by_foreign(pp->udev) ||
- filter_property(conf, pp->udev) > 0)
+ filter_property(conf, pp->udev, 4) > 0)
return PATHINFO_SKIPPED;
}
pp->dev) > 0)
return PATHINFO_SKIPPED;
- condlog(3, "%s: mask = 0x%x", pp->dev, mask);
+ condlog(4, "%s: mask = 0x%x", pp->dev, mask);
/*
* Sanity check: we need the device number to
if ((mask & DI_WWID) && !strlen(pp->wwid)) {
get_uid(pp, path_state, pp->udev);
if (!strlen(pp->wwid)) {
- pp->initialized = INIT_MISSING_UDEV;
- pp->tick = conf->retrigger_delay;
+ if (pp->bus == SYSFS_BUS_UNDEF)
+ return PATHINFO_SKIPPED;
+ if (pp->initialized != INIT_FAILED) {
+ pp->initialized = INIT_MISSING_UDEV;
+ pp->tick = conf->retrigger_delay;
+ }
return PATHINFO_OK;
}
else
* Recoverable error, for example faulty or offline path
*/
pp->chkrstate = pp->state = PATH_DOWN;
- if (pp->initialized == INIT_FAILED)
+ if (pp->initialized == INIT_NEW || pp->initialized == INIT_FAILED)
memset(pp->wwid, 0, WWID_SIZE);
return PATHINFO_OK;
}
FREE(f);
- condlog(3, "%s: assembled map [%s]", mp->alias, params);
+ condlog(4, "%s: assembled map [%s]", mp->alias, params);
return 0;
err:
p = params;
- condlog(3, "%s: disassemble map [%s]", mpp->alias, params);
+ condlog(4, "%s: disassemble map [%s]", mpp->alias, params);
/*
* features
p = params;
- condlog(3, "%s: disassemble status [%s]", mpp->alias, params);
+ condlog(4, "%s: disassemble status [%s]", mpp->alias, params);
/*
* features
#
# Copyright (C) 2003 Christophe Varoqui, <christophe.varoqui@opensvc.com>
#
+TOPDIR=../..
include ../../Makefile.inc
-CFLAGS += $(LIB_CFLAGS) -I..
+CFLAGS += $(LIB_CFLAGS) -I.. -I$(nvmedir)
-# If you add or remove a checker also update multipath/multipath.conf.5
-LIBS= \
- libforeign-nvme.so
+LIBS = libforeign-nvme.so
all: $(LIBS)
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
+#include "nvme-lib.h"
+#include <sys/types.h>
#include <sys/sysmacros.h>
#include <libudev.h>
#include <stdio.h>
#include <dirent.h>
#include <errno.h>
#include <ctype.h>
+#include <fcntl.h>
#include "util.h"
#include "vector.h"
#include "generic.h"
const char *THIS;
struct nvme_map;
+struct nvme_pathgroup {
+ struct gen_pathgroup gen;
+ struct _vector pathvec;
+};
+
struct nvme_path {
struct gen_path gen;
struct udev_device *udev;
struct udev_device *ctl;
struct nvme_map *map;
bool seen;
-};
-
-struct nvme_pathgroup {
- struct gen_pathgroup gen;
- vector pathvec;
+ /*
+ * The kernel works in failover mode.
+ * Each path has a separate path group.
+ */
+ struct nvme_pathgroup pg;
};
struct nvme_map {
struct udev_device *udev;
struct udev_device *subsys;
dev_t devt;
- /* Just one static pathgroup for NVMe for now */
- struct nvme_pathgroup pg;
- struct gen_pathgroup *gpg;
struct _vector pgvec;
- vector pathvec;
int nr_live;
+ int ana_supported;
};
#define NAME_LEN 64 /* buffer length for temp attributes */
#define const_gen_path_to_nvme(g) ((const struct nvme_path*)(g))
#define gen_path_to_nvme(g) ((struct nvme_path*)(g))
#define nvme_path_to_gen(n) &((n)->gen)
+#define nvme_pg_to_path(x) (VECTOR_SLOT(&((x)->pathvec), 0))
+#define nvme_path_to_pg(x) &((x)->pg)
static void cleanup_nvme_path(struct nvme_path *path)
{
condlog(5, "%s: %p %p", __func__, path, path->udev);
if (path->udev)
udev_device_unref(path->udev);
+ vector_reset(&path->pg.pathvec);
+
/* ctl is implicitly referenced by udev, no need to unref */
free(path);
}
static void cleanup_nvme_map(struct nvme_map *map)
{
- if (map->pathvec) {
- struct nvme_path *path;
- int i;
+ struct nvme_pathgroup *pg;
+ struct nvme_path *path;
+ int i;
- vector_foreach_slot_backwards(map->pathvec, path, i) {
- condlog(5, "%s: %d %p", __func__, i, path);
- cleanup_nvme_path(path);
- vector_del_slot(map->pathvec, i);
- }
+ vector_foreach_slot_backwards(&map->pgvec, pg, i) {
+ path = nvme_pg_to_path(pg);
+ condlog(5, "%s: %d %p", __func__, i, path);
+ cleanup_nvme_path(path);
+ vector_del_slot(&map->pgvec, i);
}
- vector_free(map->pathvec);
+ vector_reset(&map->pgvec);
if (map->udev)
udev_device_unref(map->udev);
/* subsys is implicitly referenced by udev, no need to unref */
return snprintf(buff, len, "%s",
udev_device_get_sysname(nvm->udev));
case 'n':
- return snprintf(buff, len, "%s:NQN:%s",
- udev_device_get_sysname(nvm->subsys),
+ return snprintf(buff, len, "%s:nsid.%s",
udev_device_get_sysattr_value(nvm->subsys,
- "subsysnqn"));
+ "subsysnqn"),
+ udev_device_get_sysattr_value(nvm->udev,
+ "nsid"));
case 'w':
return snprintf(buff, len, "%s",
udev_device_get_sysattr_value(nvm->udev,
return snprintf(buff, len, "%s", "rw");
case 'G':
return snprintf(buff, len, "%s", THIS);
+ case 'h':
+ if (nvm->ana_supported == YNU_YES)
+ return snprintf(buff, len, "ANA");
default:
- return snprintf(buff, len, N_A);
break;
}
- return 0;
+
+ return snprintf(buff, len, N_A);
}
static const struct _vector*
const struct nvme_pathgroup *gp = const_gen_pg_to_nvme(gpg);
/* This is all used under the lock, no need to copy */
- return gp->pathvec;
+ return &gp->pathvec;
}
static void
/* empty */
}
-static int snprint_nvme_pg(const struct gen_pathgroup *gmp,
- char *buff, int len, char wildcard)
-{
- return snprintf(buff, len, N_A);
-}
-
static int snprint_hcil(const struct nvme_path *np, char *buf, int len)
{
unsigned int nvmeid, ctlid, nsid;
devt = udev_device_get_devnum(np->udev);
return snprintf(buff, len, "%u:%u", major(devt), minor(devt));
case 'o':
- sysfs_attr_get_value(np->ctl, "state", fld, sizeof(fld));
- return snprintf(buff, len, "%s", fld);
+ if (sysfs_attr_get_value(np->ctl, "state",
+ fld, sizeof(fld)) > 0)
+ return snprintf(buff, len, "%s", fld);
+ break;
+ case 'T':
+ if (sysfs_attr_get_value(np->udev, "ana_state", fld,
+ sizeof(fld)) > 0)
+ return snprintf(buff, len, "%s", fld);
+ break;
+ case 'p':
+ if (sysfs_attr_get_value(np->udev, "ana_state", fld,
+ sizeof(fld)) > 0) {
+ rstrip(fld);
+ if (!strcmp(fld, "optimized"))
+ return snprintf(buff, len, "%d", 50);
+ else if (!strcmp(fld, "non-optimized"))
+ return snprintf(buff, len, "%d", 10);
+ else
+ return snprintf(buff, len, "%d", 0);
+ }
+ break;
case 's':
snprintf(fld, sizeof(fld), "%s",
udev_device_get_sysattr_value(np->ctl,
udev_device_get_sysname(pci));
/* fall through */
default:
- return snprintf(buff, len, "%s", N_A);
break;
}
+ return snprintf(buff, len, "%s", N_A);
return 0;
}
+static int snprint_nvme_pg(const struct gen_pathgroup *gmp,
+ char *buff, int len, char wildcard)
+{
+ const struct nvme_pathgroup *pg = const_gen_pg_to_nvme(gmp);
+ const struct nvme_path *path = nvme_pg_to_path(pg);
+
+ switch (wildcard) {
+ case 't':
+ return snprint_nvme_path(nvme_path_to_gen(path),
+ buff, len, 'T');
+ case 'p':
+ return snprint_nvme_path(nvme_path_to_gen(path),
+ buff, len, 'p');
+ default:
+ return snprintf(buff, len, N_A);
+ }
+}
+
static int nvme_style(const struct gen_multipath* gm,
char *buf, int len, int verbosity)
{
static struct nvme_path *
_find_path_by_syspath(struct nvme_map *map, const char *syspath)
{
- struct nvme_path *path;
+ struct nvme_pathgroup *pg;
char real[PATH_MAX];
const char *ppath;
int i;
ppath = syspath;
}
- vector_foreach_slot(map->pathvec, path, i) {
+ vector_foreach_slot(&map->pgvec, pg, i) {
+ struct nvme_path *path = nvme_pg_to_path(pg);
+
if (!strcmp(ppath,
udev_device_get_syspath(path->udev)))
return path;
return blkdev;
}
+static void test_ana_support(struct nvme_map *map, struct udev_device *ctl)
+{
+ const char *dev_t;
+ char sys_path[64];
+ long fd;
+ int rc;
+
+ if (map->ana_supported != YNU_UNDEF)
+ return;
+
+ dev_t = udev_device_get_sysattr_value(ctl, "dev");
+ if (snprintf(sys_path, sizeof(sys_path), "/dev/char/%s", dev_t)
+ >= sizeof(sys_path))
+ return;
+
+ fd = open(sys_path, O_RDONLY);
+ if (fd == -1) {
+ condlog(2, "%s: error opening %s", __func__, sys_path);
+ return;
+ }
+
+ pthread_cleanup_push(close_fd, (void *)fd);
+ rc = nvme_id_ctrl_ana(fd, NULL);
+ if (rc < 0)
+ condlog(2, "%s: error in nvme_id_ctrl: %s", __func__,
+ strerror(errno));
+ else {
+ map->ana_supported = (rc == 1 ? YNU_YES : YNU_NO);
+ condlog(3, "%s: NVMe ctrl %s: ANA %s supported", __func__, dev_t,
+ rc == 1 ? "is" : "is not");
+ }
+ pthread_cleanup_pop(1);
+}
+
static void _find_controllers(struct context *ctx, struct nvme_map *map)
{
char pathbuf[PATH_MAX], realbuf[PATH_MAX];
struct dirent **di = NULL;
struct scandir_result sr;
struct udev_device *subsys;
+ struct nvme_pathgroup *pg;
struct nvme_path *path;
int r, i, n;
if (map == NULL || map->udev == NULL)
return;
- vector_foreach_slot(map->pathvec, path, i)
+ vector_foreach_slot(&map->pgvec, pg, i) {
+ path = nvme_pg_to_path(pg);
path->seen = false;
+ }
subsys = udev_device_get_parent_with_subsystem_devtype(map->udev,
"nvme-subsystem",
if (udev == NULL)
continue;
- path = _find_path_by_syspath(map, udev_device_get_syspath(udev));
+ path = _find_path_by_syspath(map,
+ udev_device_get_syspath(udev));
if (path != NULL) {
path->seen = true;
condlog(4, "%s: %s already known",
cleanup_nvme_path(path);
continue;
}
+ test_ana_support(map, path->ctl);
- if (vector_alloc_slot(map->pathvec) == NULL) {
+ path->pg.gen.ops = &nvme_pg_ops;
+ if (vector_alloc_slot(&path->pg.pathvec) == NULL) {
cleanup_nvme_path(path);
continue;
}
+ vector_set_slot(&path->pg.pathvec, path);
+ if (vector_alloc_slot(&map->pgvec) == NULL) {
+ cleanup_nvme_path(path);
+ continue;
+ }
+ vector_set_slot(&map->pgvec, &path->pg);
condlog(3, "%s: %s: new path %s added to %s",
__func__, THIS, udev_device_get_sysname(udev),
udev_device_get_sysname(map->udev));
- vector_set_slot(map->pathvec, path);
}
pthread_cleanup_pop(1);
map->nr_live = 0;
- vector_foreach_slot_backwards(map->pathvec, path, i) {
+ vector_foreach_slot_backwards(&map->pgvec, pg, i) {
+ path = nvme_pg_to_path(pg);
if (!path->seen) {
condlog(1, "path %d not found in %s any more",
i, udev_device_get_sysname(map->udev));
- vector_del_slot(map->pathvec, i);
+ vector_del_slot(&map->pgvec, i);
cleanup_nvme_path(path);
} else {
static const char live_state[] = "live";
}
condlog(3, "%s: %s: map %s has %d/%d live paths", __func__, THIS,
udev_device_get_sysname(map->udev), map->nr_live,
- VECTOR_SIZE(map->pathvec));
+ VECTOR_SIZE(&map->pgvec));
}
static int _add_map(struct context *ctx, struct udev_device *ud,
map->subsys = subsys;
map->gen.ops = &nvme_map_ops;
- map->pathvec = vector_alloc();
- if (map->pathvec == NULL) {
- cleanup_nvme_map(map);
- return FOREIGN_ERR;
- }
-
- map->pg.gen.ops = &nvme_pg_ops;
- map->pg.pathvec = map->pathvec;
- map->gpg = nvme_pg_to_gen(&map->pg);
-
- map->pgvec.allocated = 1;
- map->pgvec.slot = (void**)&map->gpg;
-
if (vector_alloc_slot(ctx->mpvec) == NULL) {
cleanup_nvme_map(map);
return FOREIGN_ERR;
condlog(5, "%s called for \"%s\"", __func__, THIS);
vector_foreach_slot(ctx->mpvec, gm, i) {
const struct nvme_map *nm = const_gen_mp_to_nvme(gm);
- paths = vector_convert(paths, nm->pathvec,
- struct gen_path, identity);
+ paths = vector_convert(paths, &nm->pgvec,
+ struct nvme_pathgroup, nvme_pg_to_path);
}
return paths;
}
void log_safe (int prio, const char * fmt, va_list ap)
{
+ if (prio > LOG_DEBUG)
+ prio = LOG_DEBUG;
+
if (log_thr == (pthread_t)0) {
vsyslog(prio, fmt, ap);
return;
--- /dev/null
+#include <sys/types.h>
+/* avoid inclusion of standard API */
+#define _NVME_LIB_C 1
+#include "nvme-lib.h"
+#include "nvme-ioctl.c"
+#include "debug.h"
+
+int log_nvme_errcode(int err, const char *dev, const char *msg)
+{
+ if (err > 0)
+ condlog(3, "%s: %s: NVMe status %d", dev, msg, err);
+ else if (err < 0)
+ condlog(3, "%s: %s: %s", dev, msg, strerror(errno));
+ return err;
+}
+
+int libmp_nvme_get_nsid(int fd)
+{
+ return nvme_get_nsid(fd);
+}
+
+int libmp_nvme_identify_ctrl(int fd, struct nvme_id_ctrl *ctrl)
+{
+ return nvme_identify_ctrl(fd, ctrl);
+}
+
+int libmp_nvme_identify_ns(int fd, __u32 nsid, bool present,
+ struct nvme_id_ns *ns)
+{
+ return nvme_identify_ns(fd, nsid, present, ns);
+}
+
+int libmp_nvme_ana_log(int fd, void *ana_log, size_t ana_log_len, int rgo)
+{
+ return nvme_ana_log(fd, ana_log, ana_log_len, rgo);
+}
+
+int nvme_id_ctrl_ana(int fd, struct nvme_id_ctrl *ctrl)
+{
+ int rc;
+ struct nvme_id_ctrl c;
+
+ rc = nvme_identify_ctrl(fd, &c);
+ if (rc < 0)
+ return rc;
+ if (ctrl)
+ *ctrl = c;
+ return c.cmic & (1 << 3) ? 1 : 0;
+}
--- /dev/null
+#ifndef NVME_LIB_H
+#define NVME_LIB_H
+
+#include "nvme.h"
+
+int log_nvme_errcode(int err, const char *dev, const char *msg);
+int libmp_nvme_get_nsid(int fd);
+int libmp_nvme_identify_ctrl(int fd, struct nvme_id_ctrl *ctrl);
+int libmp_nvme_identify_ns(int fd, __u32 nsid, bool present,
+ struct nvme_id_ns *ns);
+int libmp_nvme_ana_log(int fd, void *ana_log, size_t ana_log_len, int rgo);
+/*
+ * Identify controller, and return true if ANA is supported
+ * ctrl will be filled in if controller is identified, even w/o ANA
+ * ctrl may be NULL
+ */
+int nvme_id_ctrl_ana(int fd, struct nvme_id_ctrl *ctrl);
+
+#ifndef _NVME_LIB_C
+/*
+ * In all files except nvme-lib.c, the nvme functions can be called
+ * by their usual name.
+ */
+#define nvme_get_nsid libmp_nvme_get_nsid
+#define nvme_identify_ctrl libmp_nvme_identify_ctrl
+#define nvme_identify_ns libmp_nvme_identify_ns
+#define nvme_ana_log libmp_nvme_ana_log
+/*
+ * Undefine these to avoid clashes with libmultipath's byteorder.h
+ */
+#undef cpu_to_le16
+#undef cpu_to_le32
+#undef cpu_to_le64
+#undef le16_to_cpu
+#undef le32_to_cpu
+#undef le64_to_cpu
+#endif
+
+#endif /* NVME_LIB_H */
--- /dev/null
+////////////////////////////////////////////////////////////////////////
+//
+// Copyright 2014 PMC-Sierra, Inc.
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+//
+////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////
+//
+// Author: Logan Gunthorpe <logang@deltatee.com>
+// Logan Gunthorpe
+//
+// Date: Oct 23 2014
+//
+// Description:
+// Header file for argconfig.c
+//
+////////////////////////////////////////////////////////////////////////
+
+#ifndef argconfig_H
+#define argconfig_H
+
+#include <string.h>
+#include <getopt.h>
+#include <stdarg.h>
+
+enum argconfig_types {
+ CFG_NONE,
+ CFG_STRING,
+ CFG_INT,
+ CFG_SIZE,
+ CFG_LONG,
+ CFG_LONG_SUFFIX,
+ CFG_DOUBLE,
+ CFG_BOOL,
+ CFG_BYTE,
+ CFG_SHORT,
+ CFG_POSITIVE,
+ CFG_INCREMENT,
+ CFG_SUBOPTS,
+ CFG_FILE_A,
+ CFG_FILE_W,
+ CFG_FILE_R,
+ CFG_FILE_AP,
+ CFG_FILE_WP,
+ CFG_FILE_RP,
+};
+
+struct argconfig_commandline_options {
+ const char *option;
+ const char short_option;
+ const char *meta;
+ enum argconfig_types config_type;
+ void *default_value;
+ int argument_type;
+ const char *help;
+};
+
+#define CFG_MAX_SUBOPTS 500
+#define MAX_HELP_FUNC 20
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void argconfig_help_func();
+void argconfig_append_usage(const char *str);
+void argconfig_print_help(const char *program_desc,
+ const struct argconfig_commandline_options *options);
+int argconfig_parse(int argc, char *argv[], const char *program_desc,
+ const struct argconfig_commandline_options *options,
+ void *config_out, size_t config_size);
+int argconfig_parse_subopt_string(char *string, char **options,
+ size_t max_options);
+unsigned argconfig_parse_comma_sep_array(char *string, int *ret,
+ unsigned max_length);
+unsigned argconfig_parse_comma_sep_array_long(char *string,
+ unsigned long long *ret,
+ unsigned max_length);
+void argconfig_register_help_func(argconfig_help_func * f);
+
+void print_word_wrapped(const char *s, int indent, int start);
+#ifdef __cplusplus
+}
+#endif
+#endif
--- /dev/null
+#ifndef __JSON__H
+#define __JSON__H
+
+struct json_object;
+struct json_array;
+struct json_pair;
+
+#define JSON_TYPE_STRING 0
+#define JSON_TYPE_INTEGER 1
+#define JSON_TYPE_FLOAT 2
+#define JSON_TYPE_OBJECT 3
+#define JSON_TYPE_ARRAY 4
+#define JSON_TYPE_UINT 5
+#define JSON_PARENT_TYPE_PAIR 0
+#define JSON_PARENT_TYPE_ARRAY 1
+struct json_value {
+ int type;
+ union {
+ long long integer_number;
+ unsigned long long uint_number;
+ long double float_number;
+ char *string;
+ struct json_object *object;
+ struct json_array *array;
+ };
+ int parent_type;
+ union {
+ struct json_pair *parent_pair;
+ struct json_array *parent_array;
+ };
+};
+
+struct json_array {
+ struct json_value **values;
+ int value_cnt;
+ struct json_value *parent;
+};
+
+struct json_object {
+ struct json_pair **pairs;
+ int pair_cnt;
+ struct json_value *parent;
+};
+
+struct json_pair {
+ char *name;
+ struct json_value *value;
+ struct json_object *parent;
+};
+
+struct json_object *json_create_object(void);
+struct json_array *json_create_array(void);
+
+void json_free_object(struct json_object *obj);
+
+int json_object_add_value_type(struct json_object *obj, const char *name, int type, ...);
+#define json_object_add_value_int(obj, name, val) \
+ json_object_add_value_type((obj), name, JSON_TYPE_INTEGER, (long long) (val))
+#define json_object_add_value_uint(obj, name, val) \
+ json_object_add_value_type((obj), name, JSON_TYPE_UINT, (unsigned long long) (val))
+#define json_object_add_value_float(obj, name, val) \
+ json_object_add_value_type((obj), name, JSON_TYPE_FLOAT, (val))
+#define json_object_add_value_string(obj, name, val) \
+ json_object_add_value_type((obj), name, JSON_TYPE_STRING, (val))
+#define json_object_add_value_object(obj, name, val) \
+ json_object_add_value_type((obj), name, JSON_TYPE_OBJECT, (val))
+#define json_object_add_value_array(obj, name, val) \
+ json_object_add_value_type((obj), name, JSON_TYPE_ARRAY, (val))
+int json_array_add_value_type(struct json_array *array, int type, ...);
+#define json_array_add_value_int(obj, val) \
+ json_array_add_value_type((obj), JSON_TYPE_INTEGER, (val))
+#define json_array_add_value_uint(obj, val) \
+ json_array_add_value_type((obj), JSON_TYPE_UINT, (val))
+#define json_array_add_value_float(obj, val) \
+ json_array_add_value_type((obj), JSON_TYPE_FLOAT, (val))
+#define json_array_add_value_string(obj, val) \
+ json_array_add_value_type((obj), JSON_TYPE_STRING, (val))
+#define json_array_add_value_object(obj, val) \
+ json_array_add_value_type((obj), JSON_TYPE_OBJECT, (val))
+#define json_array_add_value_array(obj, val) \
+ json_array_add_value_type((obj), JSON_TYPE_ARRAY, (val))
+
+#define json_array_last_value_object(obj) \
+ (obj->values[obj->value_cnt - 1]->object)
+
+void json_print_object(struct json_object *obj, void *);
+#endif
--- /dev/null
+/*
+ * Definitions for the NVM Express interface
+ * Copyright (c) 2011-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _LINUX_NVME_H
+#define _LINUX_NVME_H
+
+#include <linux/types.h>
+#include <linux/uuid.h>
+
+/* NQN names in commands fields specified one size */
+#define NVMF_NQN_FIELD_LEN 256
+
+/* However the max length of a qualified name is another size */
+#define NVMF_NQN_SIZE 223
+
+#define NVMF_TRSVCID_SIZE 32
+#define NVMF_TRADDR_SIZE 256
+#define NVMF_TSAS_SIZE 256
+
+#define NVME_DISC_SUBSYS_NAME "nqn.2014-08.org.nvmexpress.discovery"
+
+#define NVME_RDMA_IP_PORT 4420
+
+#define NVME_NSID_ALL 0xffffffff
+
+enum nvme_subsys_type {
+ NVME_NQN_DISC = 1, /* Discovery type target subsystem */
+ NVME_NQN_NVME = 2, /* NVME type target subsystem */
+};
+
+/* Address Family codes for Discovery Log Page entry ADRFAM field */
+enum {
+ NVMF_ADDR_FAMILY_PCI = 0, /* PCIe */
+ NVMF_ADDR_FAMILY_IP4 = 1, /* IP4 */
+ NVMF_ADDR_FAMILY_IP6 = 2, /* IP6 */
+ NVMF_ADDR_FAMILY_IB = 3, /* InfiniBand */
+ NVMF_ADDR_FAMILY_FC = 4, /* Fibre Channel */
+};
+
+/* Transport Type codes for Discovery Log Page entry TRTYPE field */
+enum {
+ NVMF_TRTYPE_RDMA = 1, /* RDMA */
+ NVMF_TRTYPE_FC = 2, /* Fibre Channel */
+ NVMF_TRTYPE_TCP = 3, /* TCP */
+ NVMF_TRTYPE_LOOP = 254, /* Reserved for host usage */
+ NVMF_TRTYPE_MAX,
+};
+
+/* Transport Requirements codes for Discovery Log Page entry TREQ field */
+enum {
+ NVMF_TREQ_NOT_SPECIFIED = 0, /* Not specified */
+ NVMF_TREQ_REQUIRED = 1, /* Required */
+ NVMF_TREQ_NOT_REQUIRED = 2, /* Not Required */
+ NVMF_TREQ_DISABLE_SQFLOW = (1 << 2), /* SQ flow control disable supported */
+};
+
+/* RDMA QP Service Type codes for Discovery Log Page entry TSAS
+ * RDMA_QPTYPE field
+ */
+enum {
+ NVMF_RDMA_QPTYPE_CONNECTED = 1, /* Reliable Connected */
+ NVMF_RDMA_QPTYPE_DATAGRAM = 2, /* Reliable Datagram */
+};
+
+/* RDMA QP Service Type codes for Discovery Log Page entry TSAS
+ * RDMA_QPTYPE field
+ */
+enum {
+ NVMF_RDMA_PRTYPE_NOT_SPECIFIED = 1, /* No Provider Specified */
+ NVMF_RDMA_PRTYPE_IB = 2, /* InfiniBand */
+ NVMF_RDMA_PRTYPE_ROCE = 3, /* InfiniBand RoCE */
+ NVMF_RDMA_PRTYPE_ROCEV2 = 4, /* InfiniBand RoCEV2 */
+ NVMF_RDMA_PRTYPE_IWARP = 5, /* IWARP */
+};
+
+/* RDMA Connection Management Service Type codes for Discovery Log Page
+ * entry TSAS RDMA_CMS field
+ */
+enum {
+ NVMF_RDMA_CMS_RDMA_CM = 1, /* Sockets based endpoint addressing */
+};
+
+/* TCP port security type for Discovery Log Page entry TSAS
+ */
+enum {
+ NVMF_TCP_SECTYPE_NONE = 0, /* No Security */
+ NVMF_TCP_SECTYPE_TLS = 1, /* Transport Layer Security */
+};
+
+#define NVME_AQ_DEPTH 32
+#define NVME_NR_AEN_COMMANDS 1
+#define NVME_AQ_BLK_MQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS)
+
+/*
+ * Subtract one to leave an empty queue entry for 'Full Queue' condition. See
+ * NVM-Express 1.2 specification, section 4.1.2.
+ */
+#define NVME_AQ_MQ_TAG_DEPTH (NVME_AQ_BLK_MQ_DEPTH - 1)
+
+enum {
+ NVME_REG_CAP = 0x0000, /* Controller Capabilities */
+ NVME_REG_VS = 0x0008, /* Version */
+ NVME_REG_INTMS = 0x000c, /* Interrupt Mask Set */
+ NVME_REG_INTMC = 0x0010, /* Interrupt Mask Clear */
+ NVME_REG_CC = 0x0014, /* Controller Configuration */
+ NVME_REG_CSTS = 0x001c, /* Controller Status */
+ NVME_REG_NSSR = 0x0020, /* NVM Subsystem Reset */
+ NVME_REG_AQA = 0x0024, /* Admin Queue Attributes */
+ NVME_REG_ASQ = 0x0028, /* Admin SQ Base Address */
+ NVME_REG_ACQ = 0x0030, /* Admin CQ Base Address */
+ NVME_REG_CMBLOC = 0x0038, /* Controller Memory Buffer Location */
+ NVME_REG_CMBSZ = 0x003c, /* Controller Memory Buffer Size */
+ NVME_REG_BPINFO = 0x0040, /* Boot Partition Information */
+ NVME_REG_BPRSEL = 0x0044, /* Boot Partition Read Select */
+ NVME_REG_BPMBL = 0x0048, /* Boot Partition Memory Buffer Location */
+ NVME_REG_DBS = 0x1000, /* SQ 0 Tail Doorbell */
+};
+
+#define NVME_CAP_MQES(cap) ((cap) & 0xffff)
+#define NVME_CAP_TIMEOUT(cap) (((cap) >> 24) & 0xff)
+#define NVME_CAP_STRIDE(cap) (((cap) >> 32) & 0xf)
+#define NVME_CAP_NSSRC(cap) (((cap) >> 36) & 0x1)
+#define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf)
+#define NVME_CAP_MPSMAX(cap) (((cap) >> 52) & 0xf)
+
+#define NVME_CMB_BIR(cmbloc) ((cmbloc) & 0x7)
+#define NVME_CMB_OFST(cmbloc) (((cmbloc) >> 12) & 0xfffff)
+#define NVME_CMB_SZ(cmbsz) (((cmbsz) >> 12) & 0xfffff)
+#define NVME_CMB_SZU(cmbsz) (((cmbsz) >> 8) & 0xf)
+
+#define NVME_CMB_WDS(cmbsz) ((cmbsz) & 0x10)
+#define NVME_CMB_RDS(cmbsz) ((cmbsz) & 0x8)
+#define NVME_CMB_LISTS(cmbsz) ((cmbsz) & 0x4)
+#define NVME_CMB_CQS(cmbsz) ((cmbsz) & 0x2)
+#define NVME_CMB_SQS(cmbsz) ((cmbsz) & 0x1)
+
+/*
+ * Submission and Completion Queue Entry Sizes for the NVM command set.
+ * (In bytes and specified as a power of two (2^n)).
+ */
+#define NVME_NVM_IOSQES 6
+#define NVME_NVM_IOCQES 4
+
+enum {
+ NVME_CC_ENABLE = 1 << 0,
+ NVME_CC_CSS_NVM = 0 << 4,
+ NVME_CC_EN_SHIFT = 0,
+ NVME_CC_CSS_SHIFT = 4,
+ NVME_CC_MPS_SHIFT = 7,
+ NVME_CC_AMS_SHIFT = 11,
+ NVME_CC_SHN_SHIFT = 14,
+ NVME_CC_IOSQES_SHIFT = 16,
+ NVME_CC_IOCQES_SHIFT = 20,
+ NVME_CC_AMS_RR = 0 << NVME_CC_AMS_SHIFT,
+ NVME_CC_AMS_WRRU = 1 << NVME_CC_AMS_SHIFT,
+ NVME_CC_AMS_VS = 7 << NVME_CC_AMS_SHIFT,
+ NVME_CC_SHN_NONE = 0 << NVME_CC_SHN_SHIFT,
+ NVME_CC_SHN_NORMAL = 1 << NVME_CC_SHN_SHIFT,
+ NVME_CC_SHN_ABRUPT = 2 << NVME_CC_SHN_SHIFT,
+ NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT,
+ NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT,
+ NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT,
+ NVME_CSTS_RDY = 1 << 0,
+ NVME_CSTS_CFS = 1 << 1,
+ NVME_CSTS_NSSRO = 1 << 4,
+ NVME_CSTS_PP = 1 << 5,
+ NVME_CSTS_SHST_NORMAL = 0 << 2,
+ NVME_CSTS_SHST_OCCUR = 1 << 2,
+ NVME_CSTS_SHST_CMPLT = 2 << 2,
+ NVME_CSTS_SHST_MASK = 3 << 2,
+};
+
+struct nvme_id_power_state {
+ __le16 max_power; /* centiwatts */
+ __u8 rsvd2;
+ __u8 flags;
+ __le32 entry_lat; /* microseconds */
+ __le32 exit_lat; /* microseconds */
+ __u8 read_tput;
+ __u8 read_lat;
+ __u8 write_tput;
+ __u8 write_lat;
+ __le16 idle_power;
+ __u8 idle_scale;
+ __u8 rsvd19;
+ __le16 active_power;
+ __u8 active_work_scale;
+ __u8 rsvd23[9];
+};
+
+enum {
+ NVME_PS_FLAGS_MAX_POWER_SCALE = 1 << 0,
+ NVME_PS_FLAGS_NON_OP_STATE = 1 << 1,
+};
+
+struct nvme_id_ctrl {
+ __le16 vid;
+ __le16 ssvid;
+ char sn[20];
+ char mn[40];
+ char fr[8];
+ __u8 rab;
+ __u8 ieee[3];
+ __u8 cmic;
+ __u8 mdts;
+ __le16 cntlid;
+ __le32 ver;
+ __le32 rtd3r;
+ __le32 rtd3e;
+ __le32 oaes;
+ __le32 ctratt;
+ __le16 rrls;
+ __u8 rsvd102[154];
+ __le16 oacs;
+ __u8 acl;
+ __u8 aerl;
+ __u8 frmw;
+ __u8 lpa;
+ __u8 elpe;
+ __u8 npss;
+ __u8 avscc;
+ __u8 apsta;
+ __le16 wctemp;
+ __le16 cctemp;
+ __le16 mtfa;
+ __le32 hmpre;
+ __le32 hmmin;
+ __u8 tnvmcap[16];
+ __u8 unvmcap[16];
+ __le32 rpmbs;
+ __le16 edstt;
+ __u8 dsto;
+ __u8 fwug;
+ __le16 kas;
+ __le16 hctma;
+ __le16 mntmt;
+ __le16 mxtmt;
+ __le32 sanicap;
+ __le32 hmminds;
+ __le16 hmmaxd;
+ __le16 nsetidmax;
+ __u8 rsvd340[2];
+ __u8 anatt;
+ __u8 anacap;
+ __le32 anagrpmax;
+ __le32 nanagrpid;
+ __u8 rsvd352[160];
+ __u8 sqes;
+ __u8 cqes;
+ __le16 maxcmd;
+ __le32 nn;
+ __le16 oncs;
+ __le16 fuses;
+ __u8 fna;
+ __u8 vwc;
+ __le16 awun;
+ __le16 awupf;
+ __u8 nvscc;
+ __u8 nwpc;
+ __le16 acwu;
+ __u8 rsvd534[2];
+ __le32 sgls;
+ __le32 mnan;
+ __u8 rsvd544[224];
+ char subnqn[256];
+ __u8 rsvd1024[768];
+ __le32 ioccsz;
+ __le32 iorcsz;
+ __le16 icdoff;
+ __u8 ctrattr;
+ __u8 msdbd;
+ __u8 rsvd1804[244];
+ struct nvme_id_power_state psd[32];
+ __u8 vs[1024];
+};
+
+enum {
+ NVME_CTRL_ONCS_COMPARE = 1 << 0,
+ NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1,
+ NVME_CTRL_ONCS_DSM = 1 << 2,
+ NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3,
+ NVME_CTRL_ONCS_TIMESTAMP = 1 << 6,
+ NVME_CTRL_VWC_PRESENT = 1 << 0,
+ NVME_CTRL_OACS_SEC_SUPP = 1 << 0,
+ NVME_CTRL_OACS_DIRECTIVES = 1 << 5,
+ NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8,
+ NVME_CTRL_LPA_CMD_EFFECTS_LOG = 1 << 1,
+ NVME_CTRL_CTRATT_128_ID = 1 << 0,
+ NVME_CTRL_CTRATT_NON_OP_PSP = 1 << 1,
+ NVME_CTRL_CTRATT_NVM_SETS = 1 << 2,
+ NVME_CTRL_CTRATT_READ_RECV_LVLS = 1 << 3,
+ NVME_CTRL_CTRATT_ENDURANCE_GROUPS = 1 << 4,
+ NVME_CTRL_CTRATT_PREDICTABLE_LAT = 1 << 5,
+};
+
+struct nvme_lbaf {
+ __le16 ms;
+ __u8 ds;
+ __u8 rp;
+};
+
+struct nvme_id_ns {
+ __le64 nsze;
+ __le64 ncap;
+ __le64 nuse;
+ __u8 nsfeat;
+ __u8 nlbaf;
+ __u8 flbas;
+ __u8 mc;
+ __u8 dpc;
+ __u8 dps;
+ __u8 nmic;
+ __u8 rescap;
+ __u8 fpi;
+ __u8 dlfeat;
+ __le16 nawun;
+ __le16 nawupf;
+ __le16 nacwu;
+ __le16 nabsn;
+ __le16 nabo;
+ __le16 nabspf;
+ __le16 noiob;
+ __u8 nvmcap[16];
+ __u8 rsvd64[28];
+ __le32 anagrpid;
+ __u8 rsvd96[3];
+ __u8 nsattr;
+ __le16 nvmsetid;
+ __le16 endgid;
+ __u8 nguid[16];
+ __u8 eui64[8];
+ struct nvme_lbaf lbaf[16];
+ __u8 rsvd192[192];
+ __u8 vs[3712];
+};
+
+enum {
+ NVME_ID_CNS_NS = 0x00,
+ NVME_ID_CNS_CTRL = 0x01,
+ NVME_ID_CNS_NS_ACTIVE_LIST = 0x02,
+ NVME_ID_CNS_NS_DESC_LIST = 0x03,
+ NVME_ID_CNS_NVMSET_LIST = 0x04,
+ NVME_ID_CNS_NS_PRESENT_LIST = 0x10,
+ NVME_ID_CNS_NS_PRESENT = 0x11,
+ NVME_ID_CNS_CTRL_NS_LIST = 0x12,
+ NVME_ID_CNS_CTRL_LIST = 0x13,
+};
+
+enum {
+ NVME_DIR_IDENTIFY = 0x00,
+ NVME_DIR_STREAMS = 0x01,
+ NVME_DIR_SND_ID_OP_ENABLE = 0x01,
+ NVME_DIR_SND_ST_OP_REL_ID = 0x01,
+ NVME_DIR_SND_ST_OP_REL_RSC = 0x02,
+ NVME_DIR_RCV_ID_OP_PARAM = 0x01,
+ NVME_DIR_RCV_ST_OP_PARAM = 0x01,
+ NVME_DIR_RCV_ST_OP_STATUS = 0x02,
+ NVME_DIR_RCV_ST_OP_RESOURCE = 0x03,
+ NVME_DIR_ENDIR = 0x01,
+};
+
+enum {
+ NVME_NS_FEAT_THIN = 1 << 0,
+ NVME_NS_FLBAS_LBA_MASK = 0xf,
+ NVME_NS_FLBAS_META_EXT = 0x10,
+ NVME_LBAF_RP_BEST = 0,
+ NVME_LBAF_RP_BETTER = 1,
+ NVME_LBAF_RP_GOOD = 2,
+ NVME_LBAF_RP_DEGRADED = 3,
+ NVME_NS_DPC_PI_LAST = 1 << 4,
+ NVME_NS_DPC_PI_FIRST = 1 << 3,
+ NVME_NS_DPC_PI_TYPE3 = 1 << 2,
+ NVME_NS_DPC_PI_TYPE2 = 1 << 1,
+ NVME_NS_DPC_PI_TYPE1 = 1 << 0,
+ NVME_NS_DPS_PI_FIRST = 1 << 3,
+ NVME_NS_DPS_PI_MASK = 0x7,
+ NVME_NS_DPS_PI_TYPE1 = 1,
+ NVME_NS_DPS_PI_TYPE2 = 2,
+ NVME_NS_DPS_PI_TYPE3 = 3,
+};
+
+struct nvme_ns_id_desc {
+ __u8 nidt;
+ __u8 nidl;
+ __le16 reserved;
+};
+
+#define NVME_NIDT_EUI64_LEN 8
+#define NVME_NIDT_NGUID_LEN 16
+#define NVME_NIDT_UUID_LEN 16
+
+enum {
+ NVME_NIDT_EUI64 = 0x01,
+ NVME_NIDT_NGUID = 0x02,
+ NVME_NIDT_UUID = 0x03,
+};
+
+#define NVME_MAX_NVMSET 31
+
+struct nvme_nvmset_attr_entry {
+ __le16 id;
+ __le16 endurance_group_id;
+ __u8 rsvd4[4];
+ __le32 random_4k_read_typical;
+ __le32 opt_write_size;
+ __u8 total_nvmset_cap[16];
+ __u8 unalloc_nvmset_cap[16];
+ __u8 rsvd48[80];
+};
+
+struct nvme_id_nvmset {
+ __u8 nid;
+ __u8 rsvd1[127];
+ struct nvme_nvmset_attr_entry ent[NVME_MAX_NVMSET];
+};
+
+/* Derived from 1.3a Figure 101: Get Log Page – Telemetry Host
+ * -Initiated Log (Log Identifier 07h)
+ */
+struct nvme_telemetry_log_page_hdr {
+ __u8 lpi; /* Log page identifier */
+ __u8 rsvd[4];
+ __u8 iee_oui[3];
+ __u16 dalb1; /* Data area 1 last block */
+ __u16 dalb2; /* Data area 2 last block */
+ __u16 dalb3; /* Data area 3 last block */
+ __u8 rsvd1[368]; /* TODO verify */
+ __u8 ctrlavail; /* Controller initiated data avail?*/
+ __u8 ctrldgn; /* Controller initiated telemetry Data Gen # */
+ __u8 rsnident[128];
+ /* We'll have to double fetch so we can get the header,
+ * parse dalb1->3 determine how much size we need for the
+ * log then alloc below. Or just do a secondary non-struct
+ * allocation.
+ */
+ __u8 telemetry_dataarea[0];
+};
+
+struct nvme_endurance_group_log {
+ __u32 rsvd0;
+ __u8 avl_spare_threshold;
+ __u8 percent_used;
+ __u8 rsvd6[26];
+ __u8 endurance_estimate[16];
+ __u8 data_units_read[16];
+ __u8 data_units_written[16];
+ __u8 media_units_written[16];
+ __u8 rsvd96[416];
+};
+
+struct nvme_smart_log {
+ __u8 critical_warning;
+ __u8 temperature[2];
+ __u8 avail_spare;
+ __u8 spare_thresh;
+ __u8 percent_used;
+ __u8 rsvd6[26];
+ __u8 data_units_read[16];
+ __u8 data_units_written[16];
+ __u8 host_reads[16];
+ __u8 host_writes[16];
+ __u8 ctrl_busy_time[16];
+ __u8 power_cycles[16];
+ __u8 power_on_hours[16];
+ __u8 unsafe_shutdowns[16];
+ __u8 media_errors[16];
+ __u8 num_err_log_entries[16];
+ __le32 warning_temp_time;
+ __le32 critical_comp_time;
+ __le16 temp_sensor[8];
+ __le32 thm_temp1_trans_count;
+ __le32 thm_temp2_trans_count;
+ __le32 thm_temp1_total_time;
+ __le32 thm_temp2_total_time;
+ __u8 rsvd232[280];
+};
+
+struct nvme_self_test_res {
+ __u8 device_self_test_status;
+ __u8 segment_num;
+ __u8 valid_diagnostic_info;
+ __u8 rsvd;
+ __le64 power_on_hours;
+ __le32 nsid;
+ __le64 failing_lba;
+ __u8 status_code_type;
+ __u8 status_code;
+ __u8 vendor_specific[2];
+} __attribute__((packed));
+
+struct nvme_self_test_log {
+ __u8 crnt_dev_selftest_oprn;
+ __u8 crnt_dev_selftest_compln;
+ __u8 rsvd[2];
+ struct nvme_self_test_res result[20];
+} __attribute__((packed));
+
+struct nvme_fw_slot_info_log {
+ __u8 afi;
+ __u8 rsvd1[7];
+ __le64 frs[7];
+ __u8 rsvd64[448];
+};
+
+/* NVMe Namespace Write Protect State */
+enum {
+ NVME_NS_NO_WRITE_PROTECT = 0,
+ NVME_NS_WRITE_PROTECT,
+ NVME_NS_WRITE_PROTECT_POWER_CYCLE,
+ NVME_NS_WRITE_PROTECT_PERMANENT,
+};
+
+#define NVME_MAX_CHANGED_NAMESPACES 1024
+
+struct nvme_changed_ns_list_log {
+ __le32 log[NVME_MAX_CHANGED_NAMESPACES];
+};
+
+enum {
+ NVME_CMD_EFFECTS_CSUPP = 1 << 0,
+ NVME_CMD_EFFECTS_LBCC = 1 << 1,
+ NVME_CMD_EFFECTS_NCC = 1 << 2,
+ NVME_CMD_EFFECTS_NIC = 1 << 3,
+ NVME_CMD_EFFECTS_CCC = 1 << 4,
+ NVME_CMD_EFFECTS_CSE_MASK = 3 << 16,
+};
+
+struct nvme_effects_log {
+ __le32 acs[256];
+ __le32 iocs[256];
+ __u8 resv[2048];
+};
+
+enum nvme_ana_state {
+ NVME_ANA_OPTIMIZED = 0x01,
+ NVME_ANA_NONOPTIMIZED = 0x02,
+ NVME_ANA_INACCESSIBLE = 0x03,
+ NVME_ANA_PERSISTENT_LOSS = 0x04,
+ NVME_ANA_CHANGE = 0x0f,
+};
+
+struct nvme_ana_group_desc {
+ __le32 grpid;
+ __le32 nnsids;
+ __le64 chgcnt;
+ __u8 state;
+ __u8 rsvd17[15];
+ __le32 nsids[];
+};
+
+/* flag for the log specific field of the ANA log */
+#define NVME_ANA_LOG_RGO (1 << 0)
+
+struct nvme_ana_rsp_hdr {
+ __le64 chgcnt;
+ __le16 ngrps;
+ __le16 rsvd10[3];
+};
+
+enum {
+ NVME_SMART_CRIT_SPARE = 1 << 0,
+ NVME_SMART_CRIT_TEMPERATURE = 1 << 1,
+ NVME_SMART_CRIT_RELIABILITY = 1 << 2,
+ NVME_SMART_CRIT_MEDIA = 1 << 3,
+ NVME_SMART_CRIT_VOLATILE_MEMORY = 1 << 4,
+};
+
+enum {
+ NVME_AER_ERROR = 0,
+ NVME_AER_SMART = 1,
+ NVME_AER_CSS = 6,
+ NVME_AER_VS = 7,
+ NVME_AER_NOTICE_NS_CHANGED = 0x0002,
+ NVME_AER_NOTICE_ANA = 0x0003,
+ NVME_AER_NOTICE_FW_ACT_STARTING = 0x0102,
+};
+
+struct nvme_lba_range_type {
+ __u8 type;
+ __u8 attributes;
+ __u8 rsvd2[14];
+ __u64 slba;
+ __u64 nlb;
+ __u8 guid[16];
+ __u8 rsvd48[16];
+};
+
+enum {
+ NVME_LBART_TYPE_FS = 0x01,
+ NVME_LBART_TYPE_RAID = 0x02,
+ NVME_LBART_TYPE_CACHE = 0x03,
+ NVME_LBART_TYPE_SWAP = 0x04,
+
+ NVME_LBART_ATTRIB_TEMP = 1 << 0,
+ NVME_LBART_ATTRIB_HIDE = 1 << 1,
+};
+
+struct nvme_plm_config {
+ __u16 enable_event;
+ __u8 rsvd2[30];
+ __u64 dtwin_reads_thresh;
+ __u64 dtwin_writes_thresh;
+ __u64 dtwin_time_thresh;
+ __u8 rsvd56[456];
+};
+
+struct nvme_reservation_status {
+ __le32 gen;
+ __u8 rtype;
+ __u8 regctl[2];
+ __u8 resv5[2];
+ __u8 ptpls;
+ __u8 resv10[13];
+ struct {
+ __le16 cntlid;
+ __u8 rcsts;
+ __u8 resv3[5];
+ __le64 hostid;
+ __le64 rkey;
+ } regctl_ds[];
+};
+
+struct nvme_reservation_status_ext {
+ __le32 gen;
+ __u8 rtype;
+ __u8 regctl[2];
+ __u8 resv5[2];
+ __u8 ptpls;
+ __u8 resv10[14];
+ __u8 resv24[40];
+ struct {
+ __le16 cntlid;
+ __u8 rcsts;
+ __u8 resv3[5];
+ __le64 rkey;
+ __u8 hostid[16];
+ __u8 resv32[32];
+ } regctl_eds[];
+};
+
+enum nvme_async_event_type {
+ NVME_AER_TYPE_ERROR = 0,
+ NVME_AER_TYPE_SMART = 1,
+ NVME_AER_TYPE_NOTICE = 2,
+};
+
+/* I/O commands */
+
+enum nvme_opcode {
+ nvme_cmd_flush = 0x00,
+ nvme_cmd_write = 0x01,
+ nvme_cmd_read = 0x02,
+ nvme_cmd_write_uncor = 0x04,
+ nvme_cmd_compare = 0x05,
+ nvme_cmd_write_zeroes = 0x08,
+ nvme_cmd_dsm = 0x09,
+ nvme_cmd_resv_register = 0x0d,
+ nvme_cmd_resv_report = 0x0e,
+ nvme_cmd_resv_acquire = 0x11,
+ nvme_cmd_resv_release = 0x15,
+};
+
+/*
+ * Descriptor subtype - lower 4 bits of nvme_(keyed_)sgl_desc identifier
+ *
+ * @NVME_SGL_FMT_ADDRESS: absolute address of the data block
+ * @NVME_SGL_FMT_OFFSET: relative offset of the in-capsule data block
+ * @NVME_SGL_FMT_TRANSPORT_A: transport defined format, value 0xA
+ * @NVME_SGL_FMT_INVALIDATE: RDMA transport specific remote invalidation
+ * request subtype
+ */
+enum {
+ NVME_SGL_FMT_ADDRESS = 0x00,
+ NVME_SGL_FMT_OFFSET = 0x01,
+ NVME_SGL_FMT_TRANSPORT_A = 0x0A,
+ NVME_SGL_FMT_INVALIDATE = 0x0f,
+};
+
+/*
+ * Descriptor type - upper 4 bits of nvme_(keyed_)sgl_desc identifier
+ *
+ * For struct nvme_sgl_desc:
+ * @NVME_SGL_FMT_DATA_DESC: data block descriptor
+ * @NVME_SGL_FMT_SEG_DESC: sgl segment descriptor
+ * @NVME_SGL_FMT_LAST_SEG_DESC: last sgl segment descriptor
+ *
+ * For struct nvme_keyed_sgl_desc:
+ * @NVME_KEY_SGL_FMT_DATA_DESC: keyed data block descriptor
+ *
+ * Transport-specific SGL types:
+ * @NVME_TRANSPORT_SGL_DATA_DESC: Transport SGL data dlock descriptor
+ */
+enum {
+ NVME_SGL_FMT_DATA_DESC = 0x00,
+ NVME_SGL_FMT_SEG_DESC = 0x02,
+ NVME_SGL_FMT_LAST_SEG_DESC = 0x03,
+ NVME_KEY_SGL_FMT_DATA_DESC = 0x04,
+ NVME_TRANSPORT_SGL_DATA_DESC = 0x05,
+};
+
+struct nvme_sgl_desc {
+ __le64 addr;
+ __le32 length;
+ __u8 rsvd[3];
+ __u8 type;
+};
+
+struct nvme_keyed_sgl_desc {
+ __le64 addr;
+ __u8 length[3];
+ __u8 key[4];
+ __u8 type;
+};
+
+union nvme_data_ptr {
+ struct {
+ __le64 prp1;
+ __le64 prp2;
+ };
+ struct nvme_sgl_desc sgl;
+ struct nvme_keyed_sgl_desc ksgl;
+};
+
+/*
+ * Lowest two bits of our flags field (FUSE field in the spec):
+ *
+ * @NVME_CMD_FUSE_FIRST: Fused Operation, first command
+ * @NVME_CMD_FUSE_SECOND: Fused Operation, second command
+ *
+ * Highest two bits in our flags field (PSDT field in the spec):
+ *
+ * @NVME_CMD_PSDT_SGL_METABUF: Use SGLS for this transfer,
+ * If used, MPTR contains addr of single physical buffer (byte aligned).
+ * @NVME_CMD_PSDT_SGL_METASEG: Use SGLS for this transfer,
+ * If used, MPTR contains an address of an SGL segment containing
+ * exactly 1 SGL descriptor (qword aligned).
+ */
+enum {
+ NVME_CMD_FUSE_FIRST = (1 << 0),
+ NVME_CMD_FUSE_SECOND = (1 << 1),
+
+ NVME_CMD_SGL_METABUF = (1 << 6),
+ NVME_CMD_SGL_METASEG = (1 << 7),
+ NVME_CMD_SGL_ALL = NVME_CMD_SGL_METABUF | NVME_CMD_SGL_METASEG,
+};
+
+struct nvme_common_command {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __le32 cdw2[2];
+ __le64 metadata;
+ union nvme_data_ptr dptr;
+ __le32 cdw10[6];
+};
+
+struct nvme_rw_command {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2;
+ __le64 metadata;
+ union nvme_data_ptr dptr;
+ __le64 slba;
+ __le16 length;
+ __le16 control;
+ __le32 dsmgmt;
+ __le32 reftag;
+ __le16 apptag;
+ __le16 appmask;
+};
+
+enum {
+ NVME_RW_LR = 1 << 15,
+ NVME_RW_FUA = 1 << 14,
+ NVME_RW_DEAC = 1 << 9,
+ NVME_RW_DSM_FREQ_UNSPEC = 0,
+ NVME_RW_DSM_FREQ_TYPICAL = 1,
+ NVME_RW_DSM_FREQ_RARE = 2,
+ NVME_RW_DSM_FREQ_READS = 3,
+ NVME_RW_DSM_FREQ_WRITES = 4,
+ NVME_RW_DSM_FREQ_RW = 5,
+ NVME_RW_DSM_FREQ_ONCE = 6,
+ NVME_RW_DSM_FREQ_PREFETCH = 7,
+ NVME_RW_DSM_FREQ_TEMP = 8,
+ NVME_RW_DSM_LATENCY_NONE = 0 << 4,
+ NVME_RW_DSM_LATENCY_IDLE = 1 << 4,
+ NVME_RW_DSM_LATENCY_NORM = 2 << 4,
+ NVME_RW_DSM_LATENCY_LOW = 3 << 4,
+ NVME_RW_DSM_SEQ_REQ = 1 << 6,
+ NVME_RW_DSM_COMPRESSED = 1 << 7,
+ NVME_RW_PRINFO_PRCHK_REF = 1 << 10,
+ NVME_RW_PRINFO_PRCHK_APP = 1 << 11,
+ NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12,
+ NVME_RW_PRINFO_PRACT = 1 << 13,
+ NVME_RW_DTYPE_STREAMS = 1 << 4,
+};
+
+struct nvme_dsm_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2[2];
+ union nvme_data_ptr dptr;
+ __le32 nr;
+ __le32 attributes;
+ __u32 rsvd12[4];
+};
+
+enum {
+ NVME_DSMGMT_IDR = 1 << 0,
+ NVME_DSMGMT_IDW = 1 << 1,
+ NVME_DSMGMT_AD = 1 << 2,
+};
+
+#define NVME_DSM_MAX_RANGES 256
+
+struct nvme_dsm_range {
+ __le32 cattr;
+ __le32 nlb;
+ __le64 slba;
+};
+
+struct nvme_write_zeroes_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2;
+ __le64 metadata;
+ union nvme_data_ptr dptr;
+ __le64 slba;
+ __le16 length;
+ __le16 control;
+ __le32 dsmgmt;
+ __le32 reftag;
+ __le16 apptag;
+ __le16 appmask;
+};
+
+/* Features */
+
+struct nvme_feat_auto_pst {
+ __le64 entries[32];
+};
+
+enum {
+ NVME_HOST_MEM_ENABLE = (1 << 0),
+ NVME_HOST_MEM_RETURN = (1 << 1),
+};
+
+/* Admin commands */
+
+enum nvme_admin_opcode {
+ nvme_admin_delete_sq = 0x00,
+ nvme_admin_create_sq = 0x01,
+ nvme_admin_get_log_page = 0x02,
+ nvme_admin_delete_cq = 0x04,
+ nvme_admin_create_cq = 0x05,
+ nvme_admin_identify = 0x06,
+ nvme_admin_abort_cmd = 0x08,
+ nvme_admin_set_features = 0x09,
+ nvme_admin_get_features = 0x0a,
+ nvme_admin_async_event = 0x0c,
+ nvme_admin_ns_mgmt = 0x0d,
+ nvme_admin_activate_fw = 0x10,
+ nvme_admin_download_fw = 0x11,
+ nvme_admin_dev_self_test = 0x14,
+ nvme_admin_ns_attach = 0x15,
+ nvme_admin_keep_alive = 0x18,
+ nvme_admin_directive_send = 0x19,
+ nvme_admin_directive_recv = 0x1a,
+ nvme_admin_virtual_mgmt = 0x1c,
+ nvme_admin_nvme_mi_send = 0x1d,
+ nvme_admin_nvme_mi_recv = 0x1e,
+ nvme_admin_dbbuf = 0x7C,
+ nvme_admin_format_nvm = 0x80,
+ nvme_admin_security_send = 0x81,
+ nvme_admin_security_recv = 0x82,
+ nvme_admin_sanitize_nvm = 0x84,
+};
+
+enum {
+ NVME_QUEUE_PHYS_CONTIG = (1 << 0),
+ NVME_CQ_IRQ_ENABLED = (1 << 1),
+ NVME_SQ_PRIO_URGENT = (0 << 1),
+ NVME_SQ_PRIO_HIGH = (1 << 1),
+ NVME_SQ_PRIO_MEDIUM = (2 << 1),
+ NVME_SQ_PRIO_LOW = (3 << 1),
+ NVME_FEAT_ARBITRATION = 0x01,
+ NVME_FEAT_POWER_MGMT = 0x02,
+ NVME_FEAT_LBA_RANGE = 0x03,
+ NVME_FEAT_TEMP_THRESH = 0x04,
+ NVME_FEAT_ERR_RECOVERY = 0x05,
+ NVME_FEAT_VOLATILE_WC = 0x06,
+ NVME_FEAT_NUM_QUEUES = 0x07,
+ NVME_FEAT_IRQ_COALESCE = 0x08,
+ NVME_FEAT_IRQ_CONFIG = 0x09,
+ NVME_FEAT_WRITE_ATOMIC = 0x0a,
+ NVME_FEAT_ASYNC_EVENT = 0x0b,
+ NVME_FEAT_AUTO_PST = 0x0c,
+ NVME_FEAT_HOST_MEM_BUF = 0x0d,
+ NVME_FEAT_TIMESTAMP = 0x0e,
+ NVME_FEAT_KATO = 0x0f,
+ NVME_FEAT_HCTM = 0X10,
+ NVME_FEAT_NOPSC = 0X11,
+ NVME_FEAT_RRL = 0x12,
+ NVME_FEAT_PLM_CONFIG = 0x13,
+ NVME_FEAT_PLM_WINDOW = 0x14,
+ NVME_FEAT_SW_PROGRESS = 0x80,
+ NVME_FEAT_HOST_ID = 0x81,
+ NVME_FEAT_RESV_MASK = 0x82,
+ NVME_FEAT_RESV_PERSIST = 0x83,
+ NVME_FEAT_WRITE_PROTECT = 0x84,
+ NVME_LOG_ERROR = 0x01,
+ NVME_LOG_SMART = 0x02,
+ NVME_LOG_FW_SLOT = 0x03,
+ NVME_LOG_CHANGED_NS = 0x04,
+ NVME_LOG_CMD_EFFECTS = 0x05,
+ NVME_LOG_DEVICE_SELF_TEST = 0x06,
+ NVME_LOG_TELEMETRY_HOST = 0x07,
+ NVME_LOG_TELEMETRY_CTRL = 0x08,
+ NVME_LOG_ENDURANCE_GROUP = 0x09,
+ NVME_LOG_ANA = 0x0c,
+ NVME_LOG_DISC = 0x70,
+ NVME_LOG_RESERVATION = 0x80,
+ NVME_LOG_SANITIZE = 0x81,
+ NVME_FWACT_REPL = (0 << 3),
+ NVME_FWACT_REPL_ACTV = (1 << 3),
+ NVME_FWACT_ACTV = (2 << 3),
+};
+
+enum {
+ NVME_NO_LOG_LSP = 0x0,
+ NVME_NO_LOG_LPO = 0x0,
+ NVME_LOG_ANA_LSP_RGO = 0x1,
+ NVME_TELEM_LSP_CREATE = 0x1,
+};
+
+/* Sanitize and Sanitize Monitor/Log */
+enum {
+ /* Sanitize */
+ NVME_SANITIZE_NO_DEALLOC = 0x00000200,
+ NVME_SANITIZE_OIPBP = 0x00000100,
+ NVME_SANITIZE_OWPASS_SHIFT = 0x00000004,
+ NVME_SANITIZE_AUSE = 0x00000008,
+ NVME_SANITIZE_ACT_CRYPTO_ERASE = 0x00000004,
+ NVME_SANITIZE_ACT_OVERWRITE = 0x00000003,
+ NVME_SANITIZE_ACT_BLOCK_ERASE = 0x00000002,
+ NVME_SANITIZE_ACT_EXIT = 0x00000001,
+
+ /* Sanitize Monitor/Log */
+ NVME_SANITIZE_LOG_DATA_LEN = 0x0014,
+ NVME_SANITIZE_LOG_GLOBAL_DATA_ERASED = 0x0100,
+ NVME_SANITIZE_LOG_NUM_CMPLTED_PASS_MASK = 0x00F8,
+ NVME_SANITIZE_LOG_STATUS_MASK = 0x0007,
+ NVME_SANITIZE_LOG_NEVER_SANITIZED = 0x0000,
+ NVME_SANITIZE_LOG_COMPLETED_SUCCESS = 0x0001,
+ NVME_SANITIZE_LOG_IN_PROGESS = 0x0002,
+ NVME_SANITIZE_LOG_COMPLETED_FAILED = 0x0003,
+};
+
+enum {
+ /* Self-test log Validation bits */
+ NVME_SELF_TEST_VALID_NSID = 1 << 0,
+ NVME_SELF_TEST_VALID_FLBA = 1 << 1,
+ NVME_SELF_TEST_VALID_SCT = 1 << 2,
+ NVME_SELF_TEST_VALID_SC = 1 << 3,
+ NVME_SELF_TEST_REPORTS = 20,
+};
+
+struct nvme_identify {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2[2];
+ union nvme_data_ptr dptr;
+ __u8 cns;
+ __u8 rsvd3;
+ __le16 ctrlid;
+ __u32 rsvd11[5];
+};
+
+#define NVME_IDENTIFY_DATA_SIZE 4096
+
+struct nvme_features {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2[2];
+ union nvme_data_ptr dptr;
+ __le32 fid;
+ __le32 dword11;
+ __le32 dword12;
+ __le32 dword13;
+ __le32 dword14;
+ __le32 dword15;
+};
+
+struct nvme_host_mem_buf_desc {
+ __le64 addr;
+ __le32 size;
+ __u32 rsvd;
+};
+
+struct nvme_create_cq {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __u32 rsvd1[5];
+ __le64 prp1;
+ __u64 rsvd8;
+ __le16 cqid;
+ __le16 qsize;
+ __le16 cq_flags;
+ __le16 irq_vector;
+ __u32 rsvd12[4];
+};
+
+struct nvme_create_sq {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __u32 rsvd1[5];
+ __le64 prp1;
+ __u64 rsvd8;
+ __le16 sqid;
+ __le16 qsize;
+ __le16 sq_flags;
+ __le16 cqid;
+ __u32 rsvd12[4];
+};
+
+struct nvme_delete_queue {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __u32 rsvd1[9];
+ __le16 qid;
+ __u16 rsvd10;
+ __u32 rsvd11[5];
+};
+
+struct nvme_abort_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __u32 rsvd1[9];
+ __le16 sqid;
+ __u16 cid;
+ __u32 rsvd11[5];
+};
+
+struct nvme_download_firmware {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __u32 rsvd1[5];
+ union nvme_data_ptr dptr;
+ __le32 numd;
+ __le32 offset;
+ __u32 rsvd12[4];
+};
+
+struct nvme_format_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2[4];
+ __le32 cdw10;
+ __u32 rsvd11[5];
+};
+
+struct nvme_get_log_page_command {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2[2];
+ union nvme_data_ptr dptr;
+ __u8 lid;
+ __u8 lsp;
+ __le16 numdl;
+ __le16 numdu;
+ __u16 rsvd11;
+ __le32 lpol;
+ __le32 lpou;
+ __u32 rsvd14[2];
+};
+
+struct nvme_directive_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2[2];
+ union nvme_data_ptr dptr;
+ __le32 numd;
+ __u8 doper;
+ __u8 dtype;
+ __le16 dspec;
+ __u8 endir;
+ __u8 tdtype;
+ __u16 rsvd15;
+
+ __u32 rsvd16[3];
+};
+
+/* Sanitize Log Page */
+struct nvme_sanitize_log_page {
+ __le16 progress;
+ __le16 status;
+ __le32 cdw10_info;
+ __le32 est_ovrwrt_time;
+ __le32 est_blk_erase_time;
+ __le32 est_crypto_erase_time;
+};
+
+/*
+ * Fabrics subcommands.
+ */
+enum nvmf_fabrics_opcode {
+ nvme_fabrics_command = 0x7f,
+};
+
+enum nvmf_capsule_command {
+ nvme_fabrics_type_property_set = 0x00,
+ nvme_fabrics_type_connect = 0x01,
+ nvme_fabrics_type_property_get = 0x04,
+};
+
+struct nvmf_common_command {
+ __u8 opcode;
+ __u8 resv1;
+ __u16 command_id;
+ __u8 fctype;
+ __u8 resv2[35];
+ __u8 ts[24];
+};
+
+/*
+ * The legal cntlid range a NVMe Target will provide.
+ * Note that cntlid of value 0 is considered illegal in the fabrics world.
+ * Devices based on earlier specs did not have the subsystem concept;
+ * therefore, those devices had their cntlid value set to 0 as a result.
+ */
+#define NVME_CNTLID_MIN 1
+#define NVME_CNTLID_MAX 0xffef
+#define NVME_CNTLID_DYNAMIC 0xffff
+
+#define MAX_DISC_LOGS 255
+
+/* Discovery log page entry */
+struct nvmf_disc_rsp_page_entry {
+ __u8 trtype;
+ __u8 adrfam;
+ __u8 subtype;
+ __u8 treq;
+ __le16 portid;
+ __le16 cntlid;
+ __le16 asqsz;
+ __u8 resv8[22];
+ char trsvcid[NVMF_TRSVCID_SIZE];
+ __u8 resv64[192];
+ char subnqn[NVMF_NQN_FIELD_LEN];
+ char traddr[NVMF_TRADDR_SIZE];
+ union tsas {
+ char common[NVMF_TSAS_SIZE];
+ struct rdma {
+ __u8 qptype;
+ __u8 prtype;
+ __u8 cms;
+ __u8 resv3[5];
+ __u16 pkey;
+ __u8 resv10[246];
+ } rdma;
+ struct tcp {
+ __u8 sectype;
+ } tcp;
+ } tsas;
+};
+
+/* Discovery log page header */
+struct nvmf_disc_rsp_page_hdr {
+ __le64 genctr;
+ __le64 numrec;
+ __le16 recfmt;
+ __u8 resv14[1006];
+ struct nvmf_disc_rsp_page_entry entries[0];
+};
+
+struct nvmf_connect_command {
+ __u8 opcode;
+ __u8 resv1;
+ __u16 command_id;
+ __u8 fctype;
+ __u8 resv2[19];
+ union nvme_data_ptr dptr;
+ __le16 recfmt;
+ __le16 qid;
+ __le16 sqsize;
+ __u8 cattr;
+ __u8 resv3;
+ __le32 kato;
+ __u8 resv4[12];
+};
+
+struct nvmf_connect_data {
+ uuid_t hostid;
+ __le16 cntlid;
+ char resv4[238];
+ char subsysnqn[NVMF_NQN_FIELD_LEN];
+ char hostnqn[NVMF_NQN_FIELD_LEN];
+ char resv5[256];
+};
+
+struct nvmf_property_set_command {
+ __u8 opcode;
+ __u8 resv1;
+ __u16 command_id;
+ __u8 fctype;
+ __u8 resv2[35];
+ __u8 attrib;
+ __u8 resv3[3];
+ __le32 offset;
+ __le64 value;
+ __u8 resv4[8];
+};
+
+struct nvmf_property_get_command {
+ __u8 opcode;
+ __u8 resv1;
+ __u16 command_id;
+ __u8 fctype;
+ __u8 resv2[35];
+ __u8 attrib;
+ __u8 resv3[3];
+ __le32 offset;
+ __u8 resv4[16];
+};
+
+struct nvme_dbbuf {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __u32 rsvd1[5];
+ __le64 prp1;
+ __le64 prp2;
+ __u32 rsvd12[6];
+};
+
+struct streams_directive_params {
+ __le16 msl;
+ __le16 nssa;
+ __le16 nsso;
+ __u8 rsvd[10];
+ __le32 sws;
+ __le16 sgs;
+ __le16 nsa;
+ __le16 nso;
+ __u8 rsvd2[6];
+};
+
+struct nvme_command {
+ union {
+ struct nvme_common_command common;
+ struct nvme_rw_command rw;
+ struct nvme_identify identify;
+ struct nvme_features features;
+ struct nvme_create_cq create_cq;
+ struct nvme_create_sq create_sq;
+ struct nvme_delete_queue delete_queue;
+ struct nvme_download_firmware dlfw;
+ struct nvme_format_cmd format;
+ struct nvme_dsm_cmd dsm;
+ struct nvme_write_zeroes_cmd write_zeroes;
+ struct nvme_abort_cmd abort;
+ struct nvme_get_log_page_command get_log_page;
+ struct nvmf_common_command fabrics;
+ struct nvmf_connect_command connect;
+ struct nvmf_property_set_command prop_set;
+ struct nvmf_property_get_command prop_get;
+ struct nvme_dbbuf dbbuf;
+ struct nvme_directive_cmd directive;
+ };
+};
+
+static inline bool nvme_is_write(struct nvme_command *cmd)
+{
+ /*
+ * What a mess...
+ *
+ * Why can't we simply have a Fabrics In and Fabrics out command?
+ */
+ if (unlikely(cmd->common.opcode == nvme_fabrics_command))
+ return cmd->fabrics.fctype & 1;
+ return cmd->common.opcode & 1;
+}
+
+enum {
+ /*
+ * Generic Command Status:
+ */
+ NVME_SC_SUCCESS = 0x0,
+ NVME_SC_INVALID_OPCODE = 0x1,
+ NVME_SC_INVALID_FIELD = 0x2,
+ NVME_SC_CMDID_CONFLICT = 0x3,
+ NVME_SC_DATA_XFER_ERROR = 0x4,
+ NVME_SC_POWER_LOSS = 0x5,
+ NVME_SC_INTERNAL = 0x6,
+ NVME_SC_ABORT_REQ = 0x7,
+ NVME_SC_ABORT_QUEUE = 0x8,
+ NVME_SC_FUSED_FAIL = 0x9,
+ NVME_SC_FUSED_MISSING = 0xa,
+ NVME_SC_INVALID_NS = 0xb,
+ NVME_SC_CMD_SEQ_ERROR = 0xc,
+ NVME_SC_SGL_INVALID_LAST = 0xd,
+ NVME_SC_SGL_INVALID_COUNT = 0xe,
+ NVME_SC_SGL_INVALID_DATA = 0xf,
+ NVME_SC_SGL_INVALID_METADATA = 0x10,
+ NVME_SC_SGL_INVALID_TYPE = 0x11,
+
+ NVME_SC_SGL_INVALID_OFFSET = 0x16,
+ NVME_SC_SGL_INVALID_SUBTYPE = 0x17,
+
+ NVME_SC_SANITIZE_FAILED = 0x1C,
+ NVME_SC_SANITIZE_IN_PROGRESS = 0x1D,
+
+ NVME_SC_NS_WRITE_PROTECTED = 0x20,
+
+ NVME_SC_LBA_RANGE = 0x80,
+ NVME_SC_CAP_EXCEEDED = 0x81,
+ NVME_SC_NS_NOT_READY = 0x82,
+ NVME_SC_RESERVATION_CONFLICT = 0x83,
+
+ /*
+ * Command Specific Status:
+ */
+ NVME_SC_CQ_INVALID = 0x100,
+ NVME_SC_QID_INVALID = 0x101,
+ NVME_SC_QUEUE_SIZE = 0x102,
+ NVME_SC_ABORT_LIMIT = 0x103,
+ NVME_SC_ABORT_MISSING = 0x104,
+ NVME_SC_ASYNC_LIMIT = 0x105,
+ NVME_SC_FIRMWARE_SLOT = 0x106,
+ NVME_SC_FIRMWARE_IMAGE = 0x107,
+ NVME_SC_INVALID_VECTOR = 0x108,
+ NVME_SC_INVALID_LOG_PAGE = 0x109,
+ NVME_SC_INVALID_FORMAT = 0x10a,
+ NVME_SC_FW_NEEDS_CONV_RESET = 0x10b,
+ NVME_SC_INVALID_QUEUE = 0x10c,
+ NVME_SC_FEATURE_NOT_SAVEABLE = 0x10d,
+ NVME_SC_FEATURE_NOT_CHANGEABLE = 0x10e,
+ NVME_SC_FEATURE_NOT_PER_NS = 0x10f,
+ NVME_SC_FW_NEEDS_SUBSYS_RESET = 0x110,
+ NVME_SC_FW_NEEDS_RESET = 0x111,
+ NVME_SC_FW_NEEDS_MAX_TIME = 0x112,
+ NVME_SC_FW_ACIVATE_PROHIBITED = 0x113,
+ NVME_SC_OVERLAPPING_RANGE = 0x114,
+ NVME_SC_NS_INSUFFICENT_CAP = 0x115,
+ NVME_SC_NS_ID_UNAVAILABLE = 0x116,
+ NVME_SC_NS_ALREADY_ATTACHED = 0x118,
+ NVME_SC_NS_IS_PRIVATE = 0x119,
+ NVME_SC_NS_NOT_ATTACHED = 0x11a,
+ NVME_SC_THIN_PROV_NOT_SUPP = 0x11b,
+ NVME_SC_CTRL_LIST_INVALID = 0x11c,
+ NVME_SC_BP_WRITE_PROHIBITED = 0x11e,
+
+ /*
+ * I/O Command Set Specific - NVM commands:
+ */
+ NVME_SC_BAD_ATTRIBUTES = 0x180,
+ NVME_SC_INVALID_PI = 0x181,
+ NVME_SC_READ_ONLY = 0x182,
+ NVME_SC_ONCS_NOT_SUPPORTED = 0x183,
+
+ /*
+ * I/O Command Set Specific - Fabrics commands:
+ */
+ NVME_SC_CONNECT_FORMAT = 0x180,
+ NVME_SC_CONNECT_CTRL_BUSY = 0x181,
+ NVME_SC_CONNECT_INVALID_PARAM = 0x182,
+ NVME_SC_CONNECT_RESTART_DISC = 0x183,
+ NVME_SC_CONNECT_INVALID_HOST = 0x184,
+
+ NVME_SC_DISCOVERY_RESTART = 0x190,
+ NVME_SC_AUTH_REQUIRED = 0x191,
+
+ /*
+ * Media and Data Integrity Errors:
+ */
+ NVME_SC_WRITE_FAULT = 0x280,
+ NVME_SC_READ_ERROR = 0x281,
+ NVME_SC_GUARD_CHECK = 0x282,
+ NVME_SC_APPTAG_CHECK = 0x283,
+ NVME_SC_REFTAG_CHECK = 0x284,
+ NVME_SC_COMPARE_FAILED = 0x285,
+ NVME_SC_ACCESS_DENIED = 0x286,
+ NVME_SC_UNWRITTEN_BLOCK = 0x287,
+
+ /*
+ * Path-related Errors:
+ */
+ NVME_SC_ANA_PERSISTENT_LOSS = 0x301,
+ NVME_SC_ANA_INACCESSIBLE = 0x302,
+ NVME_SC_ANA_TRANSITION = 0x303,
+
+ NVME_SC_DNR = 0x4000,
+};
+
+struct nvme_completion {
+ /*
+ * Used by Admin and Fabrics commands to return data:
+ */
+ union nvme_result {
+ __le16 u16;
+ __le32 u32;
+ __le64 u64;
+ } result;
+ __le16 sq_head; /* how much of this queue may be reclaimed */
+ __le16 sq_id; /* submission queue that generated this entry */
+ __u16 command_id; /* of the command which completed */
+ __le16 status; /* did the command fail, and if so, why? */
+};
+
+#define NVME_VS(major, minor, tertiary) \
+ (((major) << 16) | ((minor) << 8) | (tertiary))
+
+#define NVME_MAJOR(ver) ((ver) >> 16)
+#define NVME_MINOR(ver) (((ver) >> 8) & 0xff)
+#define NVME_TERTIARY(ver) ((ver) & 0xff)
+
+#endif /* _LINUX_NVME_H */
--- /dev/null
+/*
+ * Definitions for the NVM Express ioctl interface
+ * Copyright (c) 2011-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _UAPI_LINUX_NVME_IOCTL_H
+#define _UAPI_LINUX_NVME_IOCTL_H
+
+#include <linux/types.h>
+#include <sys/ioctl.h>
+
+struct nvme_user_io {
+ __u8 opcode;
+ __u8 flags;
+ __u16 control;
+ __u16 nblocks;
+ __u16 rsvd;
+ __u64 metadata;
+ __u64 addr;
+ __u64 slba;
+ __u32 dsmgmt;
+ __u32 reftag;
+ __u16 apptag;
+ __u16 appmask;
+};
+
+struct nvme_passthru_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 rsvd1;
+ __u32 nsid;
+ __u32 cdw2;
+ __u32 cdw3;
+ __u64 metadata;
+ __u64 addr;
+ __u32 metadata_len;
+ __u32 data_len;
+ __u32 cdw10;
+ __u32 cdw11;
+ __u32 cdw12;
+ __u32 cdw13;
+ __u32 cdw14;
+ __u32 cdw15;
+ __u32 timeout_ms;
+ __u32 result;
+};
+
+#define nvme_admin_cmd nvme_passthru_cmd
+
+#define NVME_IOCTL_ID _IO('N', 0x40)
+#define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd)
+#define NVME_IOCTL_SUBMIT_IO _IOW('N', 0x42, struct nvme_user_io)
+#define NVME_IOCTL_IO_CMD _IOWR('N', 0x43, struct nvme_passthru_cmd)
+#define NVME_IOCTL_RESET _IO('N', 0x44)
+#define NVME_IOCTL_SUBSYS_RESET _IO('N', 0x45)
+#define NVME_IOCTL_RESCAN _IO('N', 0x46)
+
+#endif /* _UAPI_LINUX_NVME_IOCTL_H */
--- /dev/null
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include <errno.h>
+#include <getopt.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <math.h>
+
+#include "nvme-ioctl.h"
+
+static int nvme_verify_chr(int fd)
+{
+ static struct stat nvme_stat;
+ int err = fstat(fd, &nvme_stat);
+
+ if (err < 0) {
+ perror("fstat");
+ return errno;
+ }
+ if (!S_ISCHR(nvme_stat.st_mode)) {
+ fprintf(stderr,
+ "Error: requesting reset on non-controller handle\n");
+ return ENOTBLK;
+ }
+ return 0;
+}
+
+int nvme_subsystem_reset(int fd)
+{
+ int ret;
+
+ ret = nvme_verify_chr(fd);
+ if (ret)
+ return ret;
+ return ioctl(fd, NVME_IOCTL_SUBSYS_RESET);
+}
+
+int nvme_reset_controller(int fd)
+{
+ int ret;
+
+ ret = nvme_verify_chr(fd);
+ if (ret)
+ return ret;
+ return ioctl(fd, NVME_IOCTL_RESET);
+}
+
+int nvme_ns_rescan(int fd)
+{
+ int ret;
+
+ ret = nvme_verify_chr(fd);
+ if (ret)
+ return ret;
+ return ioctl(fd, NVME_IOCTL_RESCAN);
+}
+
+int nvme_get_nsid(int fd)
+{
+ static struct stat nvme_stat;
+ int err = fstat(fd, &nvme_stat);
+
+ if (err < 0)
+ return -errno;
+
+ if (!S_ISBLK(nvme_stat.st_mode)) {
+ fprintf(stderr,
+ "Error: requesting namespace-id from non-block device\n");
+ errno = ENOTBLK;
+ return -errno;
+ }
+ return ioctl(fd, NVME_IOCTL_ID);
+}
+
+int nvme_submit_passthru(int fd, unsigned long ioctl_cmd,
+ struct nvme_passthru_cmd *cmd)
+{
+ return ioctl(fd, ioctl_cmd, cmd);
+}
+
+static int nvme_submit_admin_passthru(int fd, struct nvme_passthru_cmd *cmd)
+{
+ return ioctl(fd, NVME_IOCTL_ADMIN_CMD, cmd);
+}
+
+static int nvme_submit_io_passthru(int fd, struct nvme_passthru_cmd *cmd)
+{
+ return ioctl(fd, NVME_IOCTL_IO_CMD, cmd);
+}
+
+int nvme_passthru(int fd, unsigned long ioctl_cmd, __u8 opcode,
+ __u8 flags, __u16 rsvd,
+ __u32 nsid, __u32 cdw2, __u32 cdw3, __u32 cdw10, __u32 cdw11,
+ __u32 cdw12, __u32 cdw13, __u32 cdw14, __u32 cdw15,
+ __u32 data_len, void *data, __u32 metadata_len,
+ void *metadata, __u32 timeout_ms, __u32 *result)
+{
+ struct nvme_passthru_cmd cmd = {
+ .opcode = opcode,
+ .flags = flags,
+ .rsvd1 = rsvd,
+ .nsid = nsid,
+ .cdw2 = cdw2,
+ .cdw3 = cdw3,
+ .metadata = (__u64)(uintptr_t) metadata,
+ .addr = (__u64)(uintptr_t) data,
+ .metadata_len = metadata_len,
+ .data_len = data_len,
+ .cdw10 = cdw10,
+ .cdw11 = cdw11,
+ .cdw12 = cdw12,
+ .cdw13 = cdw13,
+ .cdw14 = cdw14,
+ .cdw15 = cdw15,
+ .timeout_ms = timeout_ms,
+ .result = 0,
+ };
+ int err;
+
+ err = nvme_submit_passthru(fd, ioctl_cmd, &cmd);
+ if (!err && result)
+ *result = cmd.result;
+ return err;
+}
+
+int nvme_io(int fd, __u8 opcode, __u64 slba, __u16 nblocks, __u16 control,
+ __u32 dsmgmt, __u32 reftag, __u16 apptag, __u16 appmask, void *data,
+ void *metadata)
+{
+ struct nvme_user_io io = {
+ .opcode = opcode,
+ .flags = 0,
+ .control = control,
+ .nblocks = nblocks,
+ .rsvd = 0,
+ .metadata = (__u64)(uintptr_t) metadata,
+ .addr = (__u64)(uintptr_t) data,
+ .slba = slba,
+ .dsmgmt = dsmgmt,
+ .reftag = reftag,
+ .appmask = appmask,
+ .apptag = apptag,
+ };
+ return ioctl(fd, NVME_IOCTL_SUBMIT_IO, &io);
+}
+
+int nvme_read(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt,
+ __u32 reftag, __u16 apptag, __u16 appmask, void *data,
+ void *metadata)
+{
+ return nvme_io(fd, nvme_cmd_read, slba, nblocks, control, dsmgmt,
+ reftag, apptag, appmask, data, metadata);
+}
+
+int nvme_write(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt,
+ __u32 reftag, __u16 apptag, __u16 appmask, void *data,
+ void *metadata)
+{
+ return nvme_io(fd, nvme_cmd_write, slba, nblocks, control, dsmgmt,
+ reftag, apptag, appmask, data, metadata);
+}
+
+int nvme_compare(int fd, __u64 slba, __u16 nblocks, __u16 control, __u32 dsmgmt,
+ __u32 reftag, __u16 apptag, __u16 appmask, void *data,
+ void *metadata)
+{
+ return nvme_io(fd, nvme_cmd_compare, slba, nblocks, control, dsmgmt,
+ reftag, apptag, appmask, data, metadata);
+}
+
+int nvme_passthru_io(int fd, __u8 opcode, __u8 flags, __u16 rsvd,
+ __u32 nsid, __u32 cdw2, __u32 cdw3, __u32 cdw10,
+ __u32 cdw11, __u32 cdw12, __u32 cdw13, __u32 cdw14,
+ __u32 cdw15, __u32 data_len, void *data,
+ __u32 metadata_len, void *metadata, __u32 timeout_ms)
+{
+ return nvme_passthru(fd, NVME_IOCTL_IO_CMD, opcode, flags, rsvd, nsid,
+ cdw2, cdw3, cdw10, cdw11, cdw12, cdw13, cdw14,
+ cdw15, data_len, data, metadata_len, metadata,
+ timeout_ms, NULL);
+}
+
+int nvme_write_zeros(int fd, __u32 nsid, __u64 slba, __u16 nlb,
+ __u16 control, __u32 reftag, __u16 apptag, __u16 appmask)
+{
+ struct nvme_passthru_cmd cmd = {
+ .opcode = nvme_cmd_write_zeroes,
+ .nsid = nsid,
+ .cdw10 = slba & 0xffffffff,
+ .cdw11 = slba >> 32,
+ .cdw12 = nlb | (control << 16),
+ .cdw14 = reftag,
+ .cdw15 = apptag | (appmask << 16),
+ };
+
+ return nvme_submit_io_passthru(fd, &cmd);
+}
+
+int nvme_write_uncorrectable(int fd, __u32 nsid, __u64 slba, __u16 nlb)
+{
+ struct nvme_passthru_cmd cmd = {
+ .opcode = nvme_cmd_write_uncor,
+ .nsid = nsid,
+ .cdw10 = slba & 0xffffffff,
+ .cdw11 = slba >> 32,
+ .cdw12 = nlb,
+ };
+
+ return nvme_submit_io_passthru(fd, &cmd);
+}
+
+int nvme_flush(int fd, __u32 nsid)
+{
+ struct nvme_passthru_cmd cmd = {
+ .opcode = nvme_cmd_flush,
+ .nsid = nsid,
+ };
+
+ return nvme_submit_io_passthru(fd, &cmd);
+}
+
+int nvme_dsm(int fd, __u32 nsid, __u32 cdw11, struct nvme_dsm_range *dsm,
+ __u16 nr_ranges)
+{
+ struct nvme_passthru_cmd cmd = {
+ .opcode = nvme_cmd_dsm,
+ .nsid = nsid,
+ .addr = (__u64)(uintptr_t) dsm,
+ .data_len = nr_ranges * sizeof(*dsm),
+ .cdw10 = nr_ranges - 1,
+ .cdw11 = cdw11,
+ };
+
+ return nvme_submit_io_passthru(fd, &cmd);
+}
+
+struct nvme_dsm_range *nvme_setup_dsm_range(__u32 *ctx_attrs, __u32 *llbas,
+ __u64 *slbas, __u16 nr_ranges)
+{
+ int i;
+ struct nvme_dsm_range *dsm = malloc(nr_ranges * sizeof(*dsm));
+
+ if (!dsm) {
+ fprintf(stderr, "malloc: %s\n", strerror(errno));
+ return NULL;
+ }
+ for (i = 0; i < nr_ranges; i++) {
+ dsm[i].cattr = cpu_to_le32(ctx_attrs[i]);
+ dsm[i].nlb = cpu_to_le32(llbas[i]);
+ dsm[i].slba = cpu_to_le64(slbas[i]);
+ }
+ return dsm;
+}
+
+int nvme_resv_acquire(int fd, __u32 nsid, __u8 rtype, __u8 racqa,
+ bool iekey, __u64 crkey, __u64 nrkey)
+{
+ __le64 payload[2] = { cpu_to_le64(crkey), cpu_to_le64(nrkey) };
+ __u32 cdw10 = (racqa & 0x7) | (iekey ? 1 << 3 : 0) | rtype << 8;
+ struct nvme_passthru_cmd cmd = {
+ .opcode = nvme_cmd_resv_acquire,
+ .nsid = nsid,
+ .cdw10 = cdw10,
+ .addr = (__u64)(uintptr_t) (payload),
+ .data_len = sizeof(payload),
+ };
+
+ return nvme_submit_io_passthru(fd, &cmd);
+}
+
+int nvme_resv_register(int fd, __u32 nsid, __u8 rrega, __u8 cptpl,
+ bool iekey, __u64 crkey, __u64 nrkey)
+{
+ __le64 payload[2] = { cpu_to_le64(crkey), cpu_to_le64(nrkey) };
+ __u32 cdw10 = (rrega & 0x7) | (iekey ? 1 << 3 : 0) | cptpl << 30;
+
+ struct nvme_passthru_cmd cmd = {
+ .opcode = nvme_cmd_resv_register,
+ .nsid = nsid,
+ .cdw10 = cdw10,
+ .addr = (__u64)(uintptr_t) (payload),
+ .data_len = sizeof(payload),
+ };
+
+ return nvme_submit_io_passthru(fd, &cmd);
+}
+
+int nvme_resv_release(int fd, __u32 nsid, __u8 rtype, __u8 rrela,
+ bool iekey, __u64 crkey)
+{
+ __le64 payload[1] = { cpu_to_le64(crkey) };
+ __u32 cdw10 = (rrela & 0x7) | (iekey ? 1 << 3 : 0) | rtype << 8;
+
+ struct nvme_passthru_cmd cmd = {
+ .opcode = nvme_cmd_resv_release,
+ .nsid = nsid,
+ .cdw10 = cdw10,
+ .addr = (__u64)(uintptr_t) (payload),
+ .data_len = sizeof(payload),
+ };
+
+ return nvme_submit_io_passthru(fd, &cmd);
+}
+
+int nvme_resv_report(int fd, __u32 nsid, __u32 numd, __u32 cdw11, void *data)
+{
+ struct nvme_passthru_cmd cmd = {
+ .opcode = nvme_cmd_resv_report,
+ .nsid = nsid,
+ .cdw10 = numd,
+ .cdw11 = cdw11,
+ .addr = (__u64)(uintptr_t) data,
+ .data_len = (numd + 1) << 2,
+ };
+
+ return nvme_submit_io_passthru(fd, &cmd);
+}
+
+int nvme_identify13(int fd, __u32 nsid, __u32 cdw10, __u32 cdw11, void *data)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_identify,
+ .nsid = nsid,
+ .addr = (__u64)(uintptr_t) data,
+ .data_len = NVME_IDENTIFY_DATA_SIZE,
+ .cdw10 = cdw10,
+ .cdw11 = cdw11,
+ };
+
+ return nvme_submit_admin_passthru(fd, &cmd);
+}
+
+int nvme_identify(int fd, __u32 nsid, __u32 cdw10, void *data)
+{
+ return nvme_identify13(fd, nsid, cdw10, 0, data);
+}
+
+int nvme_identify_ctrl(int fd, void *data)
+{
+ return nvme_identify(fd, 0, 1, data);
+}
+
+int nvme_identify_ns(int fd, __u32 nsid, bool present, void *data)
+{
+ int cns = present ? NVME_ID_CNS_NS_PRESENT : NVME_ID_CNS_NS;
+
+ return nvme_identify(fd, nsid, cns, data);
+}
+
+int nvme_identify_ns_list(int fd, __u32 nsid, bool all, void *data)
+{
+ int cns = all ? NVME_ID_CNS_NS_PRESENT_LIST : NVME_ID_CNS_NS_ACTIVE_LIST;
+
+ return nvme_identify(fd, nsid, cns, data);
+}
+
+int nvme_identify_ctrl_list(int fd, __u32 nsid, __u16 cntid, void *data)
+{
+ int cns = nsid ? NVME_ID_CNS_CTRL_NS_LIST : NVME_ID_CNS_CTRL_LIST;
+
+ return nvme_identify(fd, nsid, (cntid << 16) | cns, data);
+}
+
+int nvme_identify_ns_descs(int fd, __u32 nsid, void *data)
+{
+
+ return nvme_identify(fd, nsid, NVME_ID_CNS_NS_DESC_LIST, data);
+}
+
+int nvme_identify_nvmset(int fd, __u16 nvmset_id, void *data)
+{
+ return nvme_identify13(fd, 0, NVME_ID_CNS_NVMSET_LIST, nvmset_id, data);
+}
+
+int nvme_get_log13(int fd, __u32 nsid, __u8 log_id, __u8 lsp, __u64 lpo,
+ __u16 lsi, bool rae, __u32 data_len, void *data)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_get_log_page,
+ .nsid = nsid,
+ .addr = (__u64)(uintptr_t) data,
+ .data_len = data_len,
+ };
+ __u32 numd = (data_len >> 2) - 1;
+ __u16 numdu = numd >> 16, numdl = numd & 0xffff;
+
+ cmd.cdw10 = log_id | (numdl << 16) | (rae ? 1 << 15 : 0);
+ if (lsp)
+ cmd.cdw10 |= lsp << 8;
+
+ cmd.cdw11 = numdu | (lsi << 16);
+ cmd.cdw12 = lpo;
+ cmd.cdw13 = (lpo >> 32);
+
+ return nvme_submit_admin_passthru(fd, &cmd);
+
+}
+
+int nvme_get_log(int fd, __u32 nsid, __u8 log_id, bool rae,
+ __u32 data_len, void *data)
+{
+ void *ptr = data;
+ __u32 offset = 0, xfer_len = data_len;
+ int ret;
+
+ /*
+ * 4k is the smallest possible transfer unit, so by
+ * restricting ourselves for 4k transfers we avoid having
+ * to check the MDTS value of the controller.
+ */
+ do {
+ xfer_len = data_len - offset;
+ if (xfer_len > 4096)
+ xfer_len = 4096;
+
+ ret = nvme_get_log13(fd, nsid, log_id, NVME_NO_LOG_LSP,
+ offset, 0, rae, xfer_len, ptr);
+ if (ret)
+ return ret;
+
+ offset += xfer_len;
+ ptr += xfer_len;
+ } while (offset < data_len);
+
+ return 0;
+}
+
+int nvme_get_telemetry_log(int fd, void *lp, int generate_report,
+ int ctrl_init, size_t log_page_size, __u64 offset)
+{
+ if (ctrl_init)
+ return nvme_get_log13(fd, NVME_NSID_ALL, NVME_LOG_TELEMETRY_CTRL,
+ NVME_NO_LOG_LSP, offset,
+ 0, 1, log_page_size, lp);
+ if (generate_report)
+ return nvme_get_log13(fd, NVME_NSID_ALL, NVME_LOG_TELEMETRY_HOST,
+ NVME_TELEM_LSP_CREATE, offset,
+ 0, 1, log_page_size, lp);
+ else
+ return nvme_get_log13(fd, NVME_NSID_ALL, NVME_LOG_TELEMETRY_HOST,
+ NVME_NO_LOG_LSP, offset,
+ 0, 1, log_page_size, lp);
+}
+
+int nvme_fw_log(int fd, struct nvme_firmware_log_page *fw_log)
+{
+ return nvme_get_log(fd, NVME_NSID_ALL, NVME_LOG_FW_SLOT, true,
+ sizeof(*fw_log), fw_log);
+}
+
+int nvme_changed_ns_list_log(int fd, struct nvme_changed_ns_list_log *changed_ns_list_log)
+{
+ return nvme_get_log(fd, 0, NVME_LOG_CHANGED_NS, true,
+ sizeof(changed_ns_list_log->log),
+ changed_ns_list_log->log);
+}
+
+int nvme_error_log(int fd, int entries, struct nvme_error_log_page *err_log)
+{
+ return nvme_get_log(fd, NVME_NSID_ALL, NVME_LOG_ERROR, false,
+ entries * sizeof(*err_log), err_log);
+}
+
+int nvme_endurance_log(int fd, __u16 group_id, struct nvme_endurance_group_log *endurance_log)
+{
+ return nvme_get_log13(fd, 0, NVME_LOG_ENDURANCE_GROUP, 0, 0, group_id, 0,
+ sizeof(*endurance_log), endurance_log);
+}
+
+int nvme_smart_log(int fd, __u32 nsid, struct nvme_smart_log *smart_log)
+{
+ return nvme_get_log(fd, nsid, NVME_LOG_SMART, false,
+ sizeof(*smart_log), smart_log);
+}
+
+int nvme_ana_log(int fd, void *ana_log, size_t ana_log_len, int rgo)
+{
+ __u64 lpo = 0;
+
+ return nvme_get_log13(fd, NVME_NSID_ALL, NVME_LOG_ANA, rgo, lpo, 0,
+ true, ana_log_len, ana_log);
+}
+
+int nvme_self_test_log(int fd, struct nvme_self_test_log *self_test_log)
+{
+ return nvme_get_log(fd, NVME_NSID_ALL, NVME_LOG_DEVICE_SELF_TEST, false,
+ sizeof(*self_test_log), self_test_log);
+}
+
+int nvme_effects_log(int fd, struct nvme_effects_log_page *effects_log)
+{
+ return nvme_get_log(fd, 0, NVME_LOG_CMD_EFFECTS, false,
+ sizeof(*effects_log), effects_log);
+}
+
+int nvme_discovery_log(int fd, struct nvmf_disc_rsp_page_hdr *log, __u32 size)
+{
+ return nvme_get_log(fd, 0, NVME_LOG_DISC, false, size, log);
+}
+
+int nvme_sanitize_log(int fd, struct nvme_sanitize_log_page *sanitize_log)
+{
+ return nvme_get_log(fd, 0, NVME_LOG_SANITIZE, false,
+ sizeof(*sanitize_log), sanitize_log);
+}
+
+int nvme_feature(int fd, __u8 opcode, __u32 nsid, __u32 cdw10, __u32 cdw11,
+ __u32 cdw12, __u32 data_len, void *data, __u32 *result)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = opcode,
+ .nsid = nsid,
+ .cdw10 = cdw10,
+ .cdw11 = cdw11,
+ .cdw12 = cdw12,
+ .addr = (__u64)(uintptr_t) data,
+ .data_len = data_len,
+ };
+ int err;
+
+ err = nvme_submit_admin_passthru(fd, &cmd);
+ if (!err && result)
+ *result = cmd.result;
+ return err;
+}
+
+int nvme_set_feature(int fd, __u32 nsid, __u8 fid, __u32 value, __u32 cdw12,
+ bool save, __u32 data_len, void *data, __u32 *result)
+{
+ __u32 cdw10 = fid | (save ? 1 << 31 : 0);
+
+ return nvme_feature(fd, nvme_admin_set_features, nsid, cdw10, value,
+ cdw12, data_len, data, result);
+}
+
+static int nvme_property(int fd, __u8 fctype, __le32 off, __le64 *value, __u8 attrib)
+{
+ int err;
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_fabrics_command,
+ .cdw10 = attrib,
+ .cdw11 = off,
+ };
+
+ if (!value) {
+ errno = EINVAL;
+ return -errno;
+ }
+
+ if (fctype == nvme_fabrics_type_property_get){
+ cmd.nsid = nvme_fabrics_type_property_get;
+ } else if(fctype == nvme_fabrics_type_property_set) {
+ cmd.nsid = nvme_fabrics_type_property_set;
+ cmd.cdw12 = *value;
+ } else {
+ errno = EINVAL;
+ return -errno;
+ }
+
+ err = nvme_submit_admin_passthru(fd, &cmd);
+ if (!err && fctype == nvme_fabrics_type_property_get)
+ *value = cpu_to_le64(cmd.result);
+ return err;
+}
+
+static int get_property_helper(int fd, int offset, void *value, int *advance)
+{
+ __le64 value64;
+ int err = -EINVAL;
+
+ switch (offset) {
+ case NVME_REG_CAP:
+ case NVME_REG_ASQ:
+ case NVME_REG_ACQ:
+ *advance = 8;
+ break;
+ default:
+ *advance = 4;
+ }
+
+ if (!value)
+ return err;
+
+ err = nvme_property(fd, nvme_fabrics_type_property_get,
+ cpu_to_le32(offset), &value64, (*advance == 8));
+
+ if (!err) {
+ if (*advance == 8)
+ *((uint64_t *)value) = le64_to_cpu(value64);
+ else
+ *((uint32_t *)value) = le32_to_cpu(value64);
+ }
+
+ return err;
+}
+
+int nvme_get_property(int fd, int offset, uint64_t *value)
+{
+ int advance;
+ return get_property_helper(fd, offset, value, &advance);
+}
+
+int nvme_get_properties(int fd, void **pbar)
+{
+ int offset, advance;
+ int err, ret = -EINVAL;
+ int size = getpagesize();
+
+ *pbar = malloc(size);
+ if (!*pbar) {
+ fprintf(stderr, "malloc: %s\n", strerror(errno));
+ return -ENOMEM;
+ }
+
+ memset(*pbar, 0xff, size);
+ for (offset = NVME_REG_CAP; offset <= NVME_REG_CMBSZ; offset += advance) {
+ err = get_property_helper(fd, offset, *pbar + offset, &advance);
+ if (!err)
+ ret = 0;
+ }
+
+ return ret;
+}
+
+int nvme_set_property(int fd, int offset, int value)
+{
+ __le64 val = cpu_to_le64(value);
+ __le32 off = cpu_to_le32(offset);
+ bool is64bit;
+
+ switch (off) {
+ case NVME_REG_CAP:
+ case NVME_REG_ASQ:
+ case NVME_REG_ACQ:
+ is64bit = true;
+ break;
+ default:
+ is64bit = false;
+ }
+
+ return nvme_property(fd, nvme_fabrics_type_property_set,
+ off, &val, is64bit ? 1: 0);
+}
+
+int nvme_get_feature(int fd, __u32 nsid, __u8 fid, __u8 sel, __u32 cdw11,
+ __u32 data_len, void *data, __u32 *result)
+{
+ __u32 cdw10 = fid | sel << 8;
+
+ return nvme_feature(fd, nvme_admin_get_features, nsid, cdw10, cdw11,
+ 0, data_len, data, result);
+}
+
+int nvme_format(int fd, __u32 nsid, __u8 lbaf, __u8 ses, __u8 pi,
+ __u8 pil, __u8 ms, __u32 timeout)
+{
+ __u32 cdw10 = lbaf | ms << 4 | pi << 5 | pil << 8 | ses << 9;
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_format_nvm,
+ .nsid = nsid,
+ .cdw10 = cdw10,
+ .timeout_ms = timeout,
+ };
+
+ return nvme_submit_admin_passthru(fd, &cmd);
+}
+
+int nvme_ns_create(int fd, __u64 nsze, __u64 ncap, __u8 flbas,
+ __u8 dps, __u8 nmic, __u32 *result)
+{
+ struct nvme_id_ns ns = {
+ .nsze = cpu_to_le64(nsze),
+ .ncap = cpu_to_le64(ncap),
+ .flbas = flbas,
+ .dps = dps,
+ .nmic = nmic,
+ };
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_ns_mgmt,
+ .addr = (__u64)(uintptr_t) ((void *)&ns),
+ .cdw10 = 0,
+ .data_len = 0x1000,
+ };
+ int err;
+
+ err = nvme_submit_admin_passthru(fd, &cmd);
+ if (!err && result)
+ *result = cmd.result;
+ return err;
+}
+
+int nvme_ns_delete(int fd, __u32 nsid)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_ns_mgmt,
+ .nsid = nsid,
+ .cdw10 = 1,
+ };
+
+ return nvme_submit_admin_passthru(fd, &cmd);
+}
+
+int nvme_ns_attachment(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist,
+ bool attach)
+{
+ int i;
+ __u8 buf[0x1000];
+ struct nvme_controller_list *cntlist =
+ (struct nvme_controller_list *)buf;
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_ns_attach,
+ .nsid = nsid,
+ .addr = (__u64)(uintptr_t) cntlist,
+ .cdw10 = attach ? 0 : 1,
+ .data_len = 0x1000,
+ };
+
+ memset(buf, 0, sizeof(buf));
+ cntlist->num = cpu_to_le16(num_ctrls);
+ for (i = 0; i < num_ctrls; i++)
+ cntlist->identifier[i] = cpu_to_le16(ctrlist[i]);
+
+ return nvme_submit_admin_passthru(fd, &cmd);
+}
+
+int nvme_ns_attach_ctrls(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist)
+{
+ return nvme_ns_attachment(fd, nsid, num_ctrls, ctrlist, true);
+}
+
+int nvme_ns_detach_ctrls(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist)
+{
+ return nvme_ns_attachment(fd, nsid, num_ctrls, ctrlist, false);
+}
+
+int nvme_fw_download(int fd, __u32 offset, __u32 data_len, void *data)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_download_fw,
+ .addr = (__u64)(uintptr_t) data,
+ .data_len = data_len,
+ .cdw10 = (data_len >> 2) - 1,
+ .cdw11 = offset >> 2,
+ };
+
+ return nvme_submit_admin_passthru(fd, &cmd);
+}
+
+int nvme_fw_commit(int fd, __u8 slot, __u8 action, __u8 bpid)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_activate_fw,
+ .cdw10 = (bpid << 31) | (action << 3) | slot,
+ };
+
+ return nvme_submit_admin_passthru(fd, &cmd);
+}
+
+int nvme_sec_send(int fd, __u32 nsid, __u8 nssf, __u16 spsp,
+ __u8 secp, __u32 tl, __u32 data_len, void *data, __u32 *result)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_security_send,
+ .addr = (__u64)(uintptr_t) data,
+ .data_len = data_len,
+ .nsid = nsid,
+ .cdw10 = secp << 24 | spsp << 8 | nssf,
+ .cdw11 = tl,
+ };
+ int err;
+
+ err = nvme_submit_admin_passthru(fd, &cmd);
+ if (!err && result)
+ *result = cmd.result;
+ return err;
+}
+
+int nvme_sec_recv(int fd, __u32 nsid, __u8 nssf, __u16 spsp,
+ __u8 secp, __u32 al, __u32 data_len, void *data, __u32 *result)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_security_recv,
+ .nsid = nsid,
+ .cdw10 = secp << 24 | spsp << 8 | nssf,
+ .cdw11 = al,
+ .addr = (__u64)(uintptr_t) data,
+ .data_len = data_len,
+ };
+ int err;
+
+ err = nvme_submit_admin_passthru(fd, &cmd);
+ if (!err && result)
+ *result = cmd.result;
+ return err;
+}
+
+int nvme_dir_send(int fd, __u32 nsid, __u16 dspec, __u8 dtype, __u8 doper,
+ __u32 data_len, __u32 dw12, void *data, __u32 *result)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_directive_send,
+ .addr = (__u64)(uintptr_t) data,
+ .data_len = data_len,
+ .nsid = nsid,
+ .cdw10 = data_len? (data_len >> 2) - 1 : 0,
+ .cdw11 = dspec << 16 | dtype << 8 | doper,
+ .cdw12 = dw12,
+ };
+ int err;
+
+ err = nvme_submit_admin_passthru(fd, &cmd);
+ if (!err && result)
+ *result = cmd.result;
+ return err;
+}
+
+int nvme_dir_recv(int fd, __u32 nsid, __u16 dspec, __u8 dtype, __u8 doper,
+ __u32 data_len, __u32 dw12, void *data, __u32 *result)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_directive_recv,
+ .addr = (__u64)(uintptr_t) data,
+ .data_len = data_len,
+ .nsid = nsid,
+ .cdw10 = data_len? (data_len >> 2) - 1 : 0,
+ .cdw11 = dspec << 16 | dtype << 8 | doper,
+ .cdw12 = dw12,
+ };
+ int err;
+
+ err = nvme_submit_admin_passthru(fd, &cmd);
+ if (!err && result)
+ *result = cmd.result;
+ return err;
+}
+
+int nvme_sanitize(int fd, __u8 sanact, __u8 ause, __u8 owpass, __u8 oipbp,
+ __u8 no_dealloc, __u32 ovrpat)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_sanitize_nvm,
+ .cdw10 = no_dealloc << 9 | oipbp << 8 |
+ owpass << NVME_SANITIZE_OWPASS_SHIFT |
+ ause << 3 | sanact,
+ .cdw11 = ovrpat,
+ };
+
+ return nvme_submit_admin_passthru(fd, &cmd);
+}
+
+int nvme_self_test_start(int fd, __u32 nsid, __u32 cdw10)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_dev_self_test,
+ .nsid = nsid,
+ .cdw10 = cdw10,
+ };
+
+ return nvme_submit_admin_passthru(fd, &cmd);
+}
--- /dev/null
+#ifndef _NVME_LIB_H
+#define _NVME_LIB_H
+
+#include <linux/types.h>
+#include <stdbool.h>
+#include "linux/nvme_ioctl.h"
+#include "nvme.h"
+
+int nvme_get_nsid(int fd);
+
+/* Generic passthrough */
+int nvme_submit_passthru(int fd, unsigned long ioctl_cmd,
+ struct nvme_passthru_cmd *cmd);
+
+int nvme_passthru(int fd, unsigned long ioctl_cmd, __u8 opcode, __u8 flags,
+ __u16 rsvd, __u32 nsid, __u32 cdw2, __u32 cdw3,
+ __u32 cdw10, __u32 cdw11, __u32 cdw12,
+ __u32 cdw13, __u32 cdw14, __u32 cdw15,
+ __u32 data_len, void *data, __u32 metadata_len,
+ void *metadata, __u32 timeout_ms, __u32 *result);
+
+/* NVME_SUBMIT_IO */
+int nvme_io(int fd, __u8 opcode, __u64 slba, __u16 nblocks, __u16 control,
+ __u32 dsmgmt, __u32 reftag, __u16 apptag,
+ __u16 appmask, void *data, void *metadata);
+
+int nvme_read(int fd, __u64 slba, __u16 nblocks, __u16 control,
+ __u32 dsmgmt, __u32 reftag, __u16 apptag,
+ __u16 appmask, void *data, void *metadata);
+
+int nvme_write(int fd, __u64 slba, __u16 nblocks, __u16 control,
+ __u32 dsmgmt, __u32 reftag, __u16 apptag,
+ __u16 appmask, void *data, void *metadata);
+
+int nvme_compare(int fd, __u64 slba, __u16 nblocks, __u16 control,
+ __u32 dsmgmt, __u32 reftag, __u16 apptag,
+ __u16 appmask, void *data, void *metadata);
+
+/* NVME_IO_CMD */
+int nvme_passthru_io(int fd, __u8 opcode, __u8 flags, __u16 rsvd,
+ __u32 nsid, __u32 cdw2, __u32 cdw3,
+ __u32 cdw10, __u32 cdw11, __u32 cdw12,
+ __u32 cdw13, __u32 cdw14, __u32 cdw15,
+ __u32 data_len, void *data, __u32 metadata_len,
+ void *metadata, __u32 timeout);
+
+int nvme_write_zeros(int fd, __u32 nsid, __u64 slba, __u16 nlb,
+ __u16 control, __u32 reftag, __u16 apptag, __u16 appmask);
+
+int nvme_write_uncorrectable(int fd, __u32 nsid, __u64 slba, __u16 nlb);
+
+int nvme_flush(int fd, __u32 nsid);
+
+int nvme_dsm(int fd, __u32 nsid, __u32 cdw11, struct nvme_dsm_range *dsm,
+ __u16 nr_ranges);
+struct nvme_dsm_range *nvme_setup_dsm_range(__u32 *ctx_attrs,
+ __u32 *llbas, __u64 *slbas,
+ __u16 nr_ranges);
+
+int nvme_resv_acquire(int fd, __u32 nsid, __u8 rtype, __u8 racqa,
+ bool iekey, __u64 crkey, __u64 nrkey);
+int nvme_resv_register(int fd, __u32 nsid, __u8 rrega, __u8 cptpl,
+ bool iekey, __u64 crkey, __u64 nrkey);
+int nvme_resv_release(int fd, __u32 nsid, __u8 rtype, __u8 rrela,
+ bool iekey, __u64 crkey);
+int nvme_resv_report(int fd, __u32 nsid, __u32 numd, __u32 cdw11, void *data);
+
+int nvme_identify13(int fd, __u32 nsid, __u32 cdw10, __u32 cdw11, void *data);
+int nvme_identify(int fd, __u32 nsid, __u32 cdw10, void *data);
+int nvme_identify_ctrl(int fd, void *data);
+int nvme_identify_ns(int fd, __u32 nsid, bool present, void *data);
+int nvme_identify_ns_list(int fd, __u32 nsid, bool all, void *data);
+int nvme_identify_ctrl_list(int fd, __u32 nsid, __u16 cntid, void *data);
+int nvme_identify_ns_descs(int fd, __u32 nsid, void *data);
+int nvme_identify_nvmset(int fd, __u16 nvmset_id, void *data);
+int nvme_get_log13(int fd, __u32 nsid, __u8 log_id, __u8 lsp, __u64 lpo,
+ __u16 group_id, bool rae, __u32 data_len, void *data);
+int nvme_get_log(int fd, __u32 nsid, __u8 log_id, bool rae,
+ __u32 data_len, void *data);
+
+
+int nvme_get_telemetry_log(int fd, void *lp, int generate_report,
+ int ctrl_gen, size_t log_page_size, __u64 offset);
+int nvme_fw_log(int fd, struct nvme_firmware_log_page *fw_log);
+int nvme_changed_ns_list_log(int fd,
+ struct nvme_changed_ns_list_log *changed_ns_list_log);
+int nvme_error_log(int fd, int entries, struct nvme_error_log_page *err_log);
+int nvme_smart_log(int fd, __u32 nsid, struct nvme_smart_log *smart_log);
+int nvme_ana_log(int fd, void *ana_log, size_t ana_log_len, int rgo);
+int nvme_effects_log(int fd, struct nvme_effects_log_page *effects_log);
+int nvme_discovery_log(int fd, struct nvmf_disc_rsp_page_hdr *log, __u32 size);
+int nvme_sanitize_log(int fd, struct nvme_sanitize_log_page *sanitize_log);
+int nvme_endurance_log(int fd, __u16 group_id,
+ struct nvme_endurance_group_log *endurance_log);
+
+int nvme_feature(int fd, __u8 opcode, __u32 nsid, __u32 cdw10,
+ __u32 cdw11, __u32 cdw12, __u32 data_len, void *data,
+ __u32 *result);
+int nvme_set_feature(int fd, __u32 nsid, __u8 fid, __u32 value, __u32 cdw12,
+ bool save, __u32 data_len, void *data, __u32 *result);
+int nvme_get_feature(int fd, __u32 nsid, __u8 fid, __u8 sel,
+ __u32 cdw11, __u32 data_len, void *data, __u32 *result);
+
+int nvme_format(int fd, __u32 nsid, __u8 lbaf, __u8 ses, __u8 pi,
+ __u8 pil, __u8 ms, __u32 timeout);
+
+int nvme_ns_create(int fd, __u64 nsze, __u64 ncap, __u8 flbas,
+ __u8 dps, __u8 nmic, __u32 *result);
+int nvme_ns_delete(int fd, __u32 nsid);
+
+int nvme_ns_attachment(int fd, __u32 nsid, __u16 num_ctrls,
+ __u16 *ctrlist, bool attach);
+int nvme_ns_attach_ctrls(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist);
+int nvme_ns_detach_ctrls(int fd, __u32 nsid, __u16 num_ctrls, __u16 *ctrlist);
+
+int nvme_fw_download(int fd, __u32 offset, __u32 data_len, void *data);
+int nvme_fw_commit(int fd, __u8 slot, __u8 action, __u8 bpid);
+
+int nvme_sec_send(int fd, __u32 nsid, __u8 nssf, __u16 spsp,
+ __u8 secp, __u32 tl, __u32 data_len, void *data, __u32 *result);
+int nvme_sec_recv(int fd, __u32 nsid, __u8 nssf, __u16 spsp,
+ __u8 secp, __u32 al, __u32 data_len, void *data, __u32 *result);
+
+int nvme_subsystem_reset(int fd);
+int nvme_reset_controller(int fd);
+int nvme_ns_rescan(int fd);
+
+int nvme_dir_send(int fd, __u32 nsid, __u16 dspec, __u8 dtype, __u8 doper,
+ __u32 data_len, __u32 dw12, void *data, __u32 *result);
+int nvme_dir_recv(int fd, __u32 nsid, __u16 dspec, __u8 dtype, __u8 doper,
+ __u32 data_len, __u32 dw12, void *data, __u32 *result);
+int nvme_get_properties(int fd, void **pbar);
+int nvme_set_property(int fd, int offset, int value);
+int nvme_get_property(int fd, int offset, uint64_t *value);
+int nvme_sanitize(int fd, __u8 sanact, __u8 ause, __u8 owpass, __u8 oipbp,
+ __u8 no_dealloc, __u32 ovrpat);
+int nvme_self_test_start(int fd, __u32 nsid, __u32 cdw10);
+int nvme_self_test_log(int fd, struct nvme_self_test_log *self_test_log);
+#endif /* _NVME_LIB_H */
--- /dev/null
+/*
+ * Definitions for the NVM Express interface
+ * Copyright (c) 2011-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _NVME_H
+#define _NVME_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <endian.h>
+#include "plugin.h"
+#include "json.h"
+
+#define unlikely(x) x
+
+#ifdef LIBUUID
+#include <uuid/uuid.h>
+#else
+typedef struct {
+ uint8_t b[16];
+} uuid_t;
+#endif
+
+#include "linux/nvme.h"
+
+struct nvme_effects_log_page {
+ __le32 acs[256];
+ __le32 iocs[256];
+ __u8 resv[2048];
+};
+
+struct nvme_error_log_page {
+ __u64 error_count;
+ __u16 sqid;
+ __u16 cmdid;
+ __u16 status_field;
+ __u16 parm_error_location;
+ __u64 lba;
+ __u32 nsid;
+ __u8 vs;
+ __u8 resv[3];
+ __u64 cs;
+ __u8 resv2[24];
+};
+
+struct nvme_firmware_log_page {
+ __u8 afi;
+ __u8 resv[7];
+ __u64 frs[7];
+ __u8 resv2[448];
+};
+
+/* idle and active power scales occupy the last 2 bits of the field */
+#define POWER_SCALE(s) ((s) >> 6)
+
+struct nvme_host_mem_buffer {
+ __u32 hsize;
+ __u32 hmdlal;
+ __u32 hmdlau;
+ __u32 hmdlec;
+ __u8 rsvd16[4080];
+};
+
+struct nvme_auto_pst {
+ __u32 data;
+ __u32 rsvd32;
+};
+
+struct nvme_timestamp {
+ __u8 timestamp[6];
+ __u8 attr;
+ __u8 rsvd;
+};
+
+struct nvme_controller_list {
+ __le16 num;
+ __le16 identifier[];
+};
+
+struct nvme_bar_cap {
+ __u16 mqes;
+ __u8 ams_cqr;
+ __u8 to;
+ __u16 bps_css_nssrs_dstrd;
+ __u8 mpsmax_mpsmin;
+ __u8 reserved;
+};
+
+#ifdef __CHECKER__
+#define __force __attribute__((force))
+#else
+#define __force
+#endif
+
+#define cpu_to_le16(x) \
+ ((__force __le16)htole16(x))
+#define cpu_to_le32(x) \
+ ((__force __le32)htole32(x))
+#define cpu_to_le64(x) \
+ ((__force __le64)htole64(x))
+
+#define le16_to_cpu(x) \
+ le16toh((__force __u16)(x))
+#define le32_to_cpu(x) \
+ le32toh((__force __u32)(x))
+#define le64_to_cpu(x) \
+ le64toh((__force __u64)(x))
+
+#define MAX_LIST_ITEMS 256
+struct list_item {
+ char node[1024];
+ struct nvme_id_ctrl ctrl;
+ int nsid;
+ struct nvme_id_ns ns;
+ unsigned block;
+};
+
+struct ctrl_list_item {
+ char *name;
+ char *address;
+ char *transport;
+ char *state;
+ char *ana_state;
+};
+
+struct subsys_list_item {
+ char *name;
+ char *subsysnqn;
+ int nctrls;
+ struct ctrl_list_item *ctrls;
+};
+
+enum {
+ NORMAL,
+ JSON,
+ BINARY,
+};
+
+void register_extension(struct plugin *plugin);
+
+#include "argconfig.h"
+int parse_and_open(int argc, char **argv, const char *desc,
+ const struct argconfig_commandline_options *clo, void *cfg, size_t size);
+
+extern const char *devicename;
+
+int __id_ctrl(int argc, char **argv, struct command *cmd, struct plugin *plugin, void (*vs)(__u8 *vs, struct json_object *root));
+int validate_output_format(char *format);
+
+struct subsys_list_item *get_subsys_list(int *subcnt, char *subsysnqn, __u32 nsid);
+void free_subsys_list(struct subsys_list_item *slist, int n);
+char *nvme_char_from_block(char *block);
+#endif /* _NVME_H */
--- /dev/null
+#ifndef PLUGIN_H
+#define PLUGIN_H
+
+#include <stdbool.h>
+
+struct program {
+ const char *name;
+ const char *version;
+ const char *usage;
+ const char *desc;
+ const char *more;
+ struct command **commands;
+ struct plugin *extensions;
+};
+
+struct plugin {
+ const char *name;
+ const char *desc;
+ struct command **commands;
+ struct program *parent;
+ struct plugin *next;
+ struct plugin *tail;
+};
+
+struct command {
+ char *name;
+ char *help;
+ int (*fn)(int argc, char **argv, struct command *command, struct plugin *plugin);
+ char *alias;
+};
+
+void usage(struct plugin *plugin);
+void general_help(struct plugin *plugin);
+int handle_plugin(int argc, char **argv, struct plugin *plugin);
+
+#endif
return;
p->refcount--;
if (p->refcount) {
- condlog(3, "%s prioritizer refcount %d",
+ condlog(4, "%s prioritizer refcount %d",
p->name, p->refcount);
return;
}
#define PRIO_WEIGHTED_PATH "weightedpath"
#define PRIO_SYSFS "sysfs"
#define PRIO_PATH_LATENCY "path_latency"
+#define PRIO_ANA "ana"
/*
* Value used to mark the fact prio was not defined
libpriopath_latency.so \
libpriosysfs.so
+ifneq ($(call check_file,/usr/include/linux/nvme_ioctl.h),0)
+ LIBS += libprioana.so
+ CFLAGS += -I../nvme
+endif
+
all: $(LIBS)
libprioalua.so: alua.o alua_rtpg.o
--- /dev/null
+/*
+ * (C) Copyright HUAWEI Technology Corp. 2017 All Rights Reserved.
+ *
+ * ana.c
+ * Version 1.00
+ *
+ * Tool to make use of a NVMe-feature called Asymmetric Namespace Access.
+ * It determines the ANA state of a device and prints a priority value to stdout.
+ *
+ * Author(s): Cheng Jike <chengjike.cheng@huawei.com>
+ * Li Jie <lijie34@huawei.com>
+ *
+ * This file is released under the GPL version 2, or any later version.
+ */
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <stdbool.h>
+#include <libudev.h>
+
+#include "debug.h"
+#include "nvme-lib.h"
+#include "prio.h"
+#include "util.h"
+#include "structs.h"
+
+enum {
+ ANA_ERR_GETCTRL_FAILED = 1,
+ ANA_ERR_NOT_NVME,
+ ANA_ERR_NOT_SUPPORTED,
+ ANA_ERR_GETANAS_OVERFLOW,
+ ANA_ERR_GETANAS_NOTFOUND,
+ ANA_ERR_GETANALOG_FAILED,
+ ANA_ERR_GETNSID_FAILED,
+ ANA_ERR_GETNS_FAILED,
+ ANA_ERR_NO_MEMORY,
+ ANA_ERR_NO_INFORMATION,
+};
+
+static const char *ana_errmsg[] = {
+ [ANA_ERR_GETCTRL_FAILED] = "couldn't get ctrl info",
+ [ANA_ERR_NOT_NVME] = "not an NVMe device",
+ [ANA_ERR_NOT_SUPPORTED] = "ANA not supported",
+ [ANA_ERR_GETANAS_OVERFLOW] = "buffer overflow in ANA log",
+ [ANA_ERR_GETANAS_NOTFOUND] = "NSID or ANAGRPID not found",
+ [ANA_ERR_GETANALOG_FAILED] = "couldn't get ana log",
+ [ANA_ERR_GETNSID_FAILED] = "couldn't get NSID",
+ [ANA_ERR_GETNS_FAILED] = "couldn't get namespace info",
+ [ANA_ERR_NO_MEMORY] = "out of memory",
+ [ANA_ERR_NO_INFORMATION] = "invalid fd",
+};
+
+static const char *anas_string[] = {
+ [NVME_ANA_OPTIMIZED] = "ANA Optimized State",
+ [NVME_ANA_NONOPTIMIZED] = "ANA Non-Optimized State",
+ [NVME_ANA_INACCESSIBLE] = "ANA Inaccessible State",
+ [NVME_ANA_PERSISTENT_LOSS] = "ANA Persistent Loss State",
+ [NVME_ANA_CHANGE] = "ANA Change state",
+};
+
+static const char *aas_print_string(int rc)
+{
+ rc &= 0xff;
+ if (rc >= 0 && rc < ARRAY_SIZE(anas_string) &&
+ anas_string[rc] != NULL)
+ return anas_string[rc];
+
+ return "invalid ANA state";
+}
+
+static int get_ana_state(__u32 nsid, __u32 anagrpid, void *ana_log,
+ size_t ana_log_len)
+{
+ void *base = ana_log;
+ struct nvme_ana_rsp_hdr *hdr = base;
+ struct nvme_ana_group_desc *ana_desc;
+ size_t offset = sizeof(struct nvme_ana_rsp_hdr);
+ __u32 nr_nsids;
+ size_t nsid_buf_size;
+ int i, j;
+
+ for (i = 0; i < le16_to_cpu(hdr->ngrps); i++) {
+ ana_desc = base + offset;
+
+ offset += sizeof(*ana_desc);
+ if (offset > ana_log_len)
+ return -ANA_ERR_GETANAS_OVERFLOW;
+
+ nr_nsids = le32_to_cpu(ana_desc->nnsids);
+ nsid_buf_size = nr_nsids * sizeof(__le32);
+
+ offset += nsid_buf_size;
+ if (offset > ana_log_len)
+ return -ANA_ERR_GETANAS_OVERFLOW;
+
+ for (j = 0; j < nr_nsids; j++) {
+ if (nsid == le32_to_cpu(ana_desc->nsids[j]))
+ return ana_desc->state;
+ }
+
+ if (anagrpid != 0 && anagrpid == le32_to_cpu(ana_desc->grpid))
+ return ana_desc->state;
+
+ }
+ return -ANA_ERR_GETANAS_NOTFOUND;
+}
+
+int get_ana_info(struct path * pp, unsigned int timeout)
+{
+ int rc;
+ __u32 nsid;
+ struct nvme_id_ctrl ctrl;
+ struct nvme_id_ns ns;
+ void *ana_log;
+ size_t ana_log_len;
+ bool is_anagrpid_const;
+
+ rc = nvme_id_ctrl_ana(pp->fd, &ctrl);
+ if (rc < 0) {
+ log_nvme_errcode(rc, pp->dev, "nvme_identify_ctrl");
+ return -ANA_ERR_GETCTRL_FAILED;
+ } else if (rc == 0)
+ return -ANA_ERR_NOT_SUPPORTED;
+
+ nsid = nvme_get_nsid(pp->fd);
+ if (nsid <= 0) {
+ log_nvme_errcode(rc, pp->dev, "nvme_get_nsid");
+ return -ANA_ERR_GETNSID_FAILED;
+ }
+ is_anagrpid_const = ctrl.anacap & (1 << 6);
+
+ /*
+ * Code copied from nvme-cli/nvme.c. We don't need to allocate an
+ * [nanagrpid*mnan] array of NSIDs because each NSID can occur at most
+ * in one ANA group.
+ */
+ ana_log_len = sizeof(struct nvme_ana_rsp_hdr) +
+ le32_to_cpu(ctrl.nanagrpid)
+ * sizeof(struct nvme_ana_group_desc);
+
+ if (is_anagrpid_const) {
+ rc = nvme_identify_ns(pp->fd, nsid, 0, &ns);
+ if (rc) {
+ log_nvme_errcode(rc, pp->dev, "nvme_identify_ns");
+ return -ANA_ERR_GETNS_FAILED;
+ }
+ } else
+ ana_log_len += le32_to_cpu(ctrl.mnan) * sizeof(__le32);
+
+ ana_log = malloc(ana_log_len);
+ if (!ana_log)
+ return -ANA_ERR_NO_MEMORY;
+ pthread_cleanup_push(free, ana_log);
+ rc = nvme_ana_log(pp->fd, ana_log, ana_log_len,
+ is_anagrpid_const ? NVME_ANA_LOG_RGO : 0);
+ if (rc) {
+ log_nvme_errcode(rc, pp->dev, "nvme_ana_log");
+ rc = -ANA_ERR_GETANALOG_FAILED;
+ } else
+ rc = get_ana_state(nsid,
+ is_anagrpid_const ?
+ le32_to_cpu(ns.anagrpid) : 0,
+ ana_log, ana_log_len);
+ pthread_cleanup_pop(1);
+ if (rc >= 0)
+ condlog(3, "%s: ana state = %02x [%s]", pp->dev, rc,
+ aas_print_string(rc));
+ return rc;
+}
+
+/*
+ * Priorities modeled roughly after the ALUA model (alua.c/sysfs.c)
+ * Reference: ANA Base Protocol (NVMe TP 4004a, 11/13/2018).
+ *
+ * Differences:
+ *
+ * - The ANA base spec defines no implicit or explicit (STPG) state management.
+ * If a state is encountered that doesn't allow normal I/O (all except
+ * OPTIMIZED and NON_OPTIMIZED), we can't do anything but either wait for a
+ * Access State Change Notice (can't do that in multipathd as we don't receive
+ * those), or retry commands in regular time intervals until ANATT is expired
+ * (not implemented). Mapping UNAVAILABLE state to ALUA STANDBY is the best we
+ * can currently do.
+ *
+ * FIXME: Waiting for ANATT could be implemented with a "delayed failback"
+ * mechanism. The current "failback" method can't be used, as it would
+ * affect failback to every state, and here only failback to UNAVAILABLE
+ * should be delayed.
+ *
+ * - PERSISTENT_LOSS state is even below ALUA's UNAVAILABLE state.
+ * FIXME: According to the ANA TP, accessing paths in PERSISTENT_LOSS state
+ * in any way makes no sense (e.g. §8.19.6 - paths in this state shouldn't
+ * even be checked under "all paths down" conditions). Device mapper can,
+ * and will, select a PG for IO if it has non-failed paths, even if the
+ * PG has priority 0. We could avoid that only with an "ANA path checker".
+ *
+ * - ALUA has no CHANGE state. The ANA TP §8.18.3 / §8.19.4 suggests
+ * that CHANGE state should be treated in roughly the same way as
+ * INACCESSIBLE. Therefore we assign the same prio to it.
+ *
+ * - ALUA's LBA-dependent state has no ANA equivalent.
+ */
+
+int getprio(struct path *pp, char *args, unsigned int timeout)
+{
+ int rc;
+
+ if (pp->fd < 0)
+ rc = -ANA_ERR_NO_INFORMATION;
+ else
+ rc = get_ana_info(pp, timeout);
+
+ switch (rc) {
+ case NVME_ANA_OPTIMIZED:
+ return 50;
+ case NVME_ANA_NONOPTIMIZED:
+ return 10;
+ case NVME_ANA_INACCESSIBLE:
+ case NVME_ANA_CHANGE:
+ return 1;
+ case NVME_ANA_PERSISTENT_LOSS:
+ return 0;
+ default:
+ break;
+ }
+ if (rc < 0 && -rc < ARRAY_SIZE(ana_errmsg))
+ condlog(2, "%s: ANA error: %s", pp->dev, ana_errmsg[-rc]);
+ else
+ condlog(1, "%s: invalid ANA rc code %d", pp->dev, rc);
+ return -1;
+}
*/
#include <stdio.h>
+#include "nvme-lib.h"
#include "checkers.h"
#include "memory.h"
#include "vector.h"
"(setting: multipath command line [-p] flag)";
static const char autodetect_origin[] =
"(setting: storage device autodetected)";
+static const char marginal_path_origin[] =
+ "(setting: implied by marginal_path check)";
#define do_default(dest, value) \
do { \
{
struct prio *p = &pp->prio;
char buff[512];
- char *default_prio = PRIO_ALUA;
-
- if (pp->tpgs <= 0)
- return;
- if (pp->tpgs == 2 || !check_rdac(pp)) {
- if (sysfs_get_asymmetric_access_state(pp, buff, 512) >= 0)
+ char *default_prio;
+
+ switch(pp->bus) {
+ case SYSFS_BUS_NVME:
+ if (nvme_id_ctrl_ana(pp->fd, NULL) == 0)
+ return;
+ default_prio = PRIO_ANA;
+ break;
+ case SYSFS_BUS_SCSI:
+ if (pp->tpgs <= 0)
+ return;
+ if ((pp->tpgs == 2 || !check_rdac(pp)) &&
+ sysfs_get_asymmetric_access_state(pp, buff, 512) >= 0)
default_prio = PRIO_SYSFS;
+ else
+ default_prio = PRIO_ALUA;
+ break;
+ default:
+ return;
}
prio_get(conf->multipath_dir, p, default_prio, DEFAULT_PRIO_ARGS);
}
mp_set_conf(delay_watch_checks);
mp_set_default(delay_watch_checks, DEFAULT_DELAY_CHECKS);
out:
- print_off_int_undef(buff, 12, mp->delay_watch_checks);
- condlog(3, "%s: delay_watch_checks = %s %s", mp->alias, buff, origin);
+ if (print_off_int_undef(buff, 12, mp->delay_watch_checks) != 0)
+ condlog(3, "%s: delay_watch_checks = %s %s",
+ mp->alias, buff, origin);
return 0;
}
mp_set_conf(delay_wait_checks);
mp_set_default(delay_wait_checks, DEFAULT_DELAY_CHECKS);
out:
- print_off_int_undef(buff, 12, mp->delay_wait_checks);
- condlog(3, "%s: delay_wait_checks = %s %s", mp->alias, buff, origin);
+ if (print_off_int_undef(buff, 12, mp->delay_wait_checks) != 0)
+ condlog(3, "%s: delay_wait_checks = %s %s",
+ mp->alias, buff, origin);
+ return 0;
+
+}
+
+static int san_path_deprecated_warned;
+#define warn_san_path_deprecated(v, x) \
+ do { \
+ if (v->x > 0 && !san_path_deprecated_warned) { \
+ san_path_deprecated_warned = 1; \
+ condlog(1, "WARNING: option %s is deprecated, " \
+ "please use marginal_path options instead", \
+ #x); \
+ } \
+ } while(0)
+
+int select_san_path_err_threshold(struct config *conf, struct multipath *mp)
+{
+ const char *origin;
+ char buff[12];
+
+ if (marginal_path_check_enabled(mp)) {
+ mp->san_path_err_threshold = NU_NO;
+ origin = marginal_path_origin;
+ goto out;
+ }
+ mp_set_mpe(san_path_err_threshold);
+ mp_set_ovr(san_path_err_threshold);
+ mp_set_hwe(san_path_err_threshold);
+ mp_set_conf(san_path_err_threshold);
+ mp_set_default(san_path_err_threshold, DEFAULT_ERR_CHECKS);
+out:
+ if (print_off_int_undef(buff, 12, mp->san_path_err_threshold) != 0)
+ condlog(3, "%s: san_path_err_threshold = %s %s",
+ mp->alias, buff, origin);
+ warn_san_path_deprecated(mp, san_path_err_threshold);
+ return 0;
+}
+
+int select_san_path_err_forget_rate(struct config *conf, struct multipath *mp)
+{
+ const char *origin;
+ char buff[12];
+
+ if (marginal_path_check_enabled(mp)) {
+ mp->san_path_err_forget_rate = NU_NO;
+ origin = marginal_path_origin;
+ goto out;
+ }
+ mp_set_mpe(san_path_err_forget_rate);
+ mp_set_ovr(san_path_err_forget_rate);
+ mp_set_hwe(san_path_err_forget_rate);
+ mp_set_conf(san_path_err_forget_rate);
+ mp_set_default(san_path_err_forget_rate, DEFAULT_ERR_CHECKS);
+out:
+ if (print_off_int_undef(buff, 12, mp->san_path_err_forget_rate) != 0)
+ condlog(3, "%s: san_path_err_forget_rate = %s %s", mp->alias,
+ buff, origin);
+ warn_san_path_deprecated(mp, san_path_err_forget_rate);
+ return 0;
+
+}
+
+int select_san_path_err_recovery_time(struct config *conf, struct multipath *mp)
+{
+ const char *origin;
+ char buff[12];
+
+ if (marginal_path_check_enabled(mp)) {
+ mp->san_path_err_recovery_time = NU_NO;
+ origin = marginal_path_origin;
+ goto out;
+ }
+ mp_set_mpe(san_path_err_recovery_time);
+ mp_set_ovr(san_path_err_recovery_time);
+ mp_set_hwe(san_path_err_recovery_time);
+ mp_set_conf(san_path_err_recovery_time);
+ mp_set_default(san_path_err_recovery_time, DEFAULT_ERR_CHECKS);
+out:
+ if (print_off_int_undef(buff, 12, mp->san_path_err_recovery_time) != 0)
+ condlog(3, "%s: san_path_err_recovery_time = %s %s", mp->alias,
+ buff, origin);
+ warn_san_path_deprecated(mp, san_path_err_recovery_time);
return 0;
}
mp_set_conf(marginal_path_err_sample_time);
mp_set_default(marginal_path_err_sample_time, DEFAULT_ERR_CHECKS);
out:
- print_off_int_undef(buff, 12, mp->marginal_path_err_sample_time);
- condlog(3, "%s: marginal_path_err_sample_time = %s %s", mp->alias, buff,
- origin);
+ if (print_off_int_undef(buff, 12, mp->marginal_path_err_sample_time)
+ != 0)
+ condlog(3, "%s: marginal_path_err_sample_time = %s %s",
+ mp->alias, buff, origin);
return 0;
}
mp_set_conf(marginal_path_err_rate_threshold);
mp_set_default(marginal_path_err_rate_threshold, DEFAULT_ERR_CHECKS);
out:
- print_off_int_undef(buff, 12, mp->marginal_path_err_rate_threshold);
- condlog(3, "%s: marginal_path_err_rate_threshold = %s %s", mp->alias, buff,
- origin);
+ if (print_off_int_undef(buff, 12, mp->marginal_path_err_rate_threshold)
+ != 0)
+ condlog(3, "%s: marginal_path_err_rate_threshold = %s %s",
+ mp->alias, buff, origin);
return 0;
}
mp_set_conf(marginal_path_err_recheck_gap_time);
mp_set_default(marginal_path_err_recheck_gap_time, DEFAULT_ERR_CHECKS);
out:
- print_off_int_undef(buff, 12, mp->marginal_path_err_recheck_gap_time);
- condlog(3, "%s: marginal_path_err_recheck_gap_time = %s %s", mp->alias, buff,
- origin);
+ if (print_off_int_undef(buff, 12,
+ mp->marginal_path_err_recheck_gap_time) != 0)
+ condlog(3, "%s: marginal_path_err_recheck_gap_time = %s %s",
+ mp->alias, buff, origin);
return 0;
}
mp_set_conf(marginal_path_double_failed_time);
mp_set_default(marginal_path_double_failed_time, DEFAULT_ERR_CHECKS);
out:
- print_off_int_undef(buff, 12, mp->marginal_path_double_failed_time);
- condlog(3, "%s: marginal_path_double_failed_time = %s %s", mp->alias, buff,
- origin);
+ if (print_off_int_undef(buff, 12, mp->marginal_path_double_failed_time)
+ != 0)
+ condlog(3, "%s: marginal_path_double_failed_time = %s %s",
+ mp->alias, buff, origin);
return 0;
}
mp_set_conf(ghost_delay);
mp_set_default(ghost_delay, DEFAULT_GHOST_DELAY);
out:
- print_off_int_undef(buff, 12, mp->ghost_delay);
- condlog(3, "%s: ghost_delay = %s %s", mp->alias, buff, origin);
+ if (print_off_int_undef(buff, 12, mp->ghost_delay) != 0)
+ condlog(3, "%s: ghost_delay = %s %s", mp->alias, buff, origin);
return 0;
}
int select_delay_wait_checks (struct config *conf, struct multipath * mp);
int select_skip_kpartx (struct config *conf, struct multipath * mp);
int select_max_sectors_kb (struct config *conf, struct multipath * mp);
+int select_san_path_err_forget_rate(struct config *conf, struct multipath *mp);
+int select_san_path_err_threshold(struct config *conf, struct multipath *mp);
+int select_san_path_err_recovery_time(struct config *conf, struct multipath *mp);
int select_marginal_path_err_sample_time(struct config *conf, struct multipath *mp);
int select_marginal_path_err_rate_threshold(struct config *conf, struct multipath *mp);
int select_marginal_path_err_recheck_gap_time(struct config *conf, struct multipath *mp);
#include "generic.h"
#define WWID_SIZE 128
-#define SERIAL_SIZE 65
+#define SERIAL_SIZE 128
#define NODE_NAME_SIZE 224
#define PATH_STR_SIZE 16
#define PARAMS_SIZE 4096
};
enum initialized_states {
+ INIT_NEW,
INIT_FAILED,
INIT_MISSING_UDEV,
INIT_REQUESTED_UDEV,
int initialized;
int retriggers;
int wwid_changed;
+ unsigned int path_failures;
+ time_t dis_reinstate_time;
+ int disable_reinstate;
+ int san_path_err_forget_rate;
time_t io_err_dis_reinstate_time;
int io_err_disable_reinstate;
int io_err_pathfail_cnt;
int deferred_remove;
int delay_watch_checks;
int delay_wait_checks;
+ int san_path_err_threshold;
+ int san_path_err_forget_rate;
+ int san_path_err_recovery_time;
int marginal_path_err_sample_time;
int marginal_path_err_rate_threshold;
int marginal_path_err_recheck_gap_time;
struct gen_multipath generic_mp;
};
+static inline int marginal_path_check_enabled(const struct multipath *mpp)
+{
+ return mpp->marginal_path_double_failed_time > 0 &&
+ mpp->marginal_path_err_sample_time > 0 &&
+ mpp->marginal_path_err_recheck_gap_time > 0 &&
+ mpp->marginal_path_err_rate_threshold >= 0;
+}
+
+static inline int san_path_check_enabled(const struct multipath *mpp)
+{
+ return mpp->san_path_err_threshold > 0 &&
+ mpp->san_path_err_forget_rate > 0 &&
+ mpp->san_path_err_recovery_time > 0;
+}
+
+static inline int delay_check_enabled(const struct multipath *mpp)
+{
+ return mpp->delay_watch_checks != NU_NO ||
+ mpp->delay_wait_checks != NU_NO;
+}
+
struct pathgroup {
long id;
int status;
#include "configure.h"
#include "libdevmapper.h"
#include "io_err_stat.h"
+#include "switchgroup.h"
/*
* creates or updates mpp->paths reading mpp->pg
vector_foreach_slot (pathvec, pp, i) {
if (!strncmp(mpp->wwid, pp->wwid, WWID_SIZE)) {
+ if (pp->size != 0 && mpp->size != 0 &&
+ pp->size != mpp->size) {
+ condlog(3, "%s: size mismatch for %s, not adding path",
+ pp->dev, mpp->alias);
+ continue;
+ }
condlog(3, "%s: ownership set to %s",
pp->dev, mpp->alias);
pp->mpp = mpp;
pp->fd = -1;
}
-void orphan_paths(vector pathvec, struct multipath *mpp)
+void orphan_paths(vector pathvec, struct multipath *mpp, const char *reason)
{
int i;
struct path * pp;
vector_foreach_slot (pathvec, pp, i) {
if (pp->mpp == mpp) {
- orphan_path(pp, "map flushed");
+ orphan_path(pp, reason);
}
}
}
{
int i;
- condlog(4, "%s: remove multipath map", mpp->alias);
-
/*
* clear references to this map
*/
- orphan_paths(vecs->pathvec, mpp);
+ orphan_paths(vecs->pathvec, mpp, "map removed internally");
if (purge_vec &&
(i = find_slot(vecs->mpvec, (void *)mpp)) != -1)
remove_map_by_alias(const char *alias, struct vectors * vecs, int purge_vec)
{
struct multipath * mpp = find_mp_by_alias(vecs->mpvec, alias);
- if (mpp)
+ if (mpp) {
+ condlog(2, "%s: removing map by alias", alias);
remove_map(mpp, vecs, purge_vec);
+ }
}
void
int
update_multipath_strings(struct multipath *mpp, vector pathvec, int is_daemon)
{
+ struct pathgroup *pgp;
+ int i;
+
if (!mpp)
return 1;
if (update_multipath_status(mpp))
return 1;
+ vector_foreach_slot(mpp->pg, pgp, i)
+ if (pgp->paths)
+ path_group_prio_update(pgp);
+
return 0;
}
vector_del_slot(mpp->paths, i);
i--;
+ /* Make sure mpp->hwe doesn't point to freed memory.
+ * We call extract_hwe_from_path() below to restore
+ * mpp->hwe
+ */
+ if (mpp->hwe == pp->hwe)
+ mpp->hwe = NULL;
if ((j = find_slot(vecs->pathvec,
(void *)pp)) != -1)
vector_del_slot(vecs->pathvec, j);
mpp->alias, pp->dev, pp->dev_t);
}
}
+ extract_hwe_from_path(mpp);
return count;
}
void enter_recovery_mode(struct multipath *mpp);
int adopt_paths (vector pathvec, struct multipath * mpp);
-void orphan_paths (vector pathvec, struct multipath * mpp);
+void orphan_paths(vector pathvec, struct multipath *mpp,
+ const char *reason);
void orphan_path (struct path * pp, const char *reason);
int verify_paths(struct multipath * mpp, struct vectors * vecs);
return fnmatch("dm-*", di->d_name, FNM_FILE_NAME) == 0;
}
-static void close_fd(void *arg)
-{
- close((long)arg);
-}
-
bool sysfs_is_multipathed(const struct path *pp)
{
char pathbuf[PATH_MAX];
monitor = udev_monitor_new_from_netlink(udev, "udev");
if (!monitor) {
condlog(2, "failed to create udev monitor");
- goto out;
+ goto failback;
}
pthread_cleanup_push(monitor_cleanup, monitor);
#ifdef LIBUDEV_API_RECVBUF
}
need_failback = 0;
out:
- if (monitor)
- pthread_cleanup_pop(1);
+ pthread_cleanup_pop(1);
+failback:
if (need_failback)
err = failback_listen();
pthread_cleanup_pop(1);
}
strncpy(*word, sentence, len);
strchop(*word);
- condlog(4, "*word = %s, len = %i", *word, len);
+ condlog(5, "*word = %s, len = %i", *word, len);
if (*p == '\0')
return 0;
if (stat("/sys/dev/block", &statbuf) == 0) {
/* Newer kernels have /sys/dev/block */
sprintf(block_path,"/sys/dev/block/%u:%u", major, minor);
+ dev[FILE_NAME_SIZE - 1] = '\0';
if (lstat(block_path, &statbuf) == 0) {
if (S_ISLNK(statbuf.st_mode) &&
readlink(block_path, dev, FILE_NAME_SIZE-1) > 0) {
return 0;
}
}
- goto skip_proc;
+ condlog(4, "%s is invalid", block_path);
+ return 1;
}
memset(block_path, 0, sizeof(block_path));
}
}
fclose(fd);
-skip_proc:
+
if (strncmp(block_path,"/sys/block", 10)) {
condlog(3, "No device found for %u:%u", major, minor);
return 1;
FREE(res->di[i]);
FREE(res->di);
}
+
+void close_fd(void *arg)
+{
+ close((long)arg);
+}
#include <sys/types.h>
#include <inttypes.h>
+#include <stdbool.h>
size_t strchop(char *);
int basenamecpy (const char *src, char *dst, size_t size);
void set_max_fds(int max_fds);
#define KERNEL_VERSION(maj, min, ptc) ((((maj) * 256) + (min)) * 256 + (ptc))
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
#define safe_sprintf(var, format, args...) \
snprintf(var, sizeof(var), format, ##args) >= sizeof(var)
#define pthread_cleanup_push_cast(f, arg) \
pthread_cleanup_push(((void (*)(void *))&f), (arg))
+void close_fd(void *arg);
+
struct scandir_result {
struct dirent **di;
int n;
};
void free_scandir_result(struct scandir_result *);
+static inline bool is_bit_set_in_array(unsigned int bit, const uint64_t *arr)
+{
+ return arr[bit / 64] & (1ULL << (bit % 64)) ? 1 : 0;
+}
+
+static inline void set_bit_in_array(unsigned int bit, uint64_t *arr)
+{
+ arr[bit / 64] |= (1ULL << (bit % 64));
+}
+
+static inline void clear_bit_in_array(unsigned int bit, uint64_t *arr)
+{
+ arr[bit / 64] &= ~(1ULL << (bit % 64));
+}
+
#endif /* _UTIL_H */
#ifndef _VERSION_H
#define _VERSION_H
-#define VERSION_CODE 0x000709
-#define DATE_CODE 0x0b0e12
+#define VERSION_CODE 0x000800
+#define DATE_CODE 0x020e13
#define PROG "multipath-tools"
struct udev *udev;
struct config *multipath_conf;
+/*
+ * Return values of configure(), print_cmd_valid(), and main().
+ * RTVL_{YES,NO} are synonyms for RTVL_{OK,FAIL} for the CMD_VALID_PATH case.
+ */
+enum {
+ RTVL_OK = 0,
+ RTVL_YES = RTVL_OK,
+ RTVL_FAIL = 1,
+ RTVL_NO = RTVL_FAIL,
+ RTVL_MAYBE, /* only used internally, never returned */
+ RTVL_RETRY, /* returned by configure(), not by main() */
+};
+
struct config *get_multipath_config(void)
{
return multipath_conf;
goto out;
}
- if (!dm_is_mpath(mapname)) {
+ if (dm_is_mpath(mapname) != 1) {
condlog(1, "%s is not a multipath map", devpath);
goto free;
}
};
static const char shm_find_mp_dir[] = MULTIPATH_SHM_BASE "find_multipaths";
-static void close_fd(void *arg)
-{
- close((long)arg);
-}
/**
* find_multipaths_check_timeout(wwid, tmo)
struct timespec now, ftimes[2], tdiff;
struct stat st;
long fd;
- int r, err, retries = 0;
+ int r, retries = 0;
clock_gettime(CLOCK_REALTIME, &now);
if (fd != -1) {
pthread_cleanup_push(close_fd, (void *)fd);
r = fstat(fd, &st);
- if (r != 0)
- err = errno;
pthread_cleanup_pop(1);
} else if (tmo > 0) {
path, strerror(errno));
}
r = fstat(fd, &st);
- if (r != 0)
- err = errno;
pthread_cleanup_pop(1);
} else
return FIND_MULTIPATHS_NEVER;
if (r != 0) {
- condlog(1, "%s: error in fstat for %s: %s", __func__,
- path, strerror(err));
+ condlog(1, "%s: error in fstat for %s: %m", __func__, path);
return FIND_MULTIPATHS_ERROR;
}
static int print_cmd_valid(int k, const vector pathvec,
struct config *conf)
{
- static const int vals[] = { 1, 0, 2 };
int wait = FIND_MULTIPATHS_NEVER;
struct timespec until;
struct path *pp;
- if (k < 0 || k >= (sizeof(vals) / sizeof(int)))
- return 1;
+ if (k != RTVL_YES && k != RTVL_NO && k != RTVL_MAYBE)
+ return RTVL_NO;
- if (k == 2) {
+ if (k == RTVL_MAYBE) {
/*
* Caller ensures that pathvec[0] is the path to
* examine.
wait = find_multipaths_check_timeout(
pp, pp->find_multipaths_timeout, &until);
if (wait != FIND_MULTIPATHS_WAITING)
- k = 1;
+ k = RTVL_NO;
} else if (pathvec != NULL && (pp = VECTOR_SLOT(pathvec, 0)))
wait = find_multipaths_check_timeout(pp, 0, &until);
if (wait == FIND_MULTIPATHS_WAITING)
until.tv_sec, until.tv_nsec/1000);
else if (wait == FIND_MULTIPATHS_WAIT_DONE)
printf("FIND_MULTIPATHS_WAIT_UNTIL=\"0\"\n");
- printf("DM_MULTIPATH_DEVICE_PATH=\"%d\"\n", vals[k]);
- return k == 1;
+ printf("DM_MULTIPATH_DEVICE_PATH=\"%d\"\n",
+ k == RTVL_MAYBE ? 2 : k == RTVL_YES ? 1 : 0);
+ /* Never return RTVL_MAYBE */
+ return k == RTVL_NO ? RTVL_NO : RTVL_YES;
}
/*
return ret;
}
-/*
- * Return value:
- * -1: Retry
- * 0: Success
- * 1: Failure
- */
static int
configure (struct config *conf, enum mpath_cmds cmd,
enum devtypes dev_type, char *devpath)
vector curmp = NULL;
vector pathvec = NULL;
struct vectors vecs;
- int r = 1;
+ int r = RTVL_FAIL, rc;
int di_flag = 0;
char * refwwid = NULL;
char * dev = NULL;
goto out;
}
if (cmd == CMD_REMOVE_WWID) {
- r = remove_wwid(refwwid);
- if (r == 0)
+ rc = remove_wwid(refwwid);
+ if (rc == 0) {
printf("wwid '%s' removed\n", refwwid);
- else if (r == 1) {
+ r = RTVL_OK;
+ } else if (rc == 1) {
printf("wwid '%s' not in wwids file\n",
refwwid);
- r = 0;
+ r = RTVL_OK;
}
goto out;
}
if (cmd == CMD_ADD_WWID) {
- r = remember_wwid(refwwid);
- if (r >= 0)
+ rc = remember_wwid(refwwid);
+ if (rc >= 0) {
printf("wwid '%s' added\n", refwwid);
- else
+ r = RTVL_OK;
+ } else
printf("failed adding '%s' to wwids file\n",
refwwid);
goto out;
*/
if (cmd == CMD_VALID_PATH) {
if (is_failed_wwid(refwwid) == WWID_IS_FAILED) {
- r = 1;
+ r = RTVL_NO;
goto print_valid;
}
if ((!find_multipaths_on(conf) &&
ignore_wwids_on(conf)) ||
check_wwids_file(refwwid, 0) == 0)
- r = 0;
+ r = RTVL_YES;
if (!ignore_wwids_on(conf))
goto print_valid;
/* At this point, either r==0 or find_multipaths_on. */
* Quick check if path is already multipathed.
*/
if (sysfs_is_multipathed(VECTOR_SLOT(pathvec, 0))) {
- r = 0;
+ r = RTVL_YES;
goto print_valid;
}
* Leave DM_MULTIPATH_DEVICE_PATH="0".
*/
if (released) {
- r = 1;
+ r = RTVL_NO;
goto print_valid;
}
- if (r == 0)
+ if (r == RTVL_YES)
goto print_valid;
/* find_multipaths_on: Fall through to path detection */
}
* the refwwid, or there is more than one path matching
* the refwwid, then the path is valid */
if (VECTOR_SIZE(curmp) != 0) {
- r = 0;
+ r = RTVL_YES;
goto print_valid;
} else if (VECTOR_SIZE(pathvec) > 1)
- r = 0;
+ r = RTVL_YES;
else
- /* Use r=2 as an indication for "maybe" */
- r = 2;
+ r = RTVL_MAYBE;
/*
* If opening the path with O_EXCL fails, the path
/*
* Check if we raced with multipathd
*/
- r = !sysfs_is_multipathed(VECTOR_SLOT(pathvec, 0));
+ r = sysfs_is_multipathed(VECTOR_SLOT(pathvec, 0)) ?
+ RTVL_YES : RTVL_NO;
}
goto print_valid;
}
if (cmd != CMD_CREATE && cmd != CMD_DRY_RUN) {
- r = 0;
+ r = RTVL_OK;
goto out;
}
/*
* core logic entry point
*/
- r = coalesce_paths(&vecs, NULL, refwwid,
+ rc = coalesce_paths(&vecs, NULL, refwwid,
conf->force_reload, cmd);
+ r = rc == CP_RETRY ? RTVL_RETRY : rc == CP_OK ? RTVL_OK : RTVL_FAIL;
print_valid:
if (cmd == CMD_VALID_PATH)
int arg;
extern char *optarg;
extern int optind;
- int r = 1;
+ int r = RTVL_FAIL;
enum mpath_cmds cmd = CMD_CREATE;
enum devtypes dev_type = DEV_NONE;
char *dev = NULL;
logsink = 0;
conf = load_config(DEFAULT_CONFIGFILE);
if (!conf)
- exit(1);
+ exit(RTVL_FAIL);
multipath_conf = conf;
conf->retrigger_tries = 0;
while ((arg = getopt(argc, argv, ":adcChl::FfM:v:p:b:BrR:itTquUwW")) != EOF ) {
if (sizeof(optarg) > sizeof(char *) ||
!isdigit(optarg[0])) {
usage (argv[0]);
- exit(1);
+ exit(RTVL_FAIL);
}
conf->verbosity = atoi(optarg);
if (conf->pgpolicy_flag == IOPOLICY_UNDEF) {
printf("'%s' is not a valid policy\n", optarg);
usage(argv[0]);
- exit(1);
+ exit(RTVL_FAIL);
}
break;
case 'r':
conf->find_multipaths |= _FIND_MULTIPATHS_I;
break;
case 't':
- r = dump_config(conf, NULL, NULL);
+ r = dump_config(conf, NULL, NULL) ? RTVL_FAIL : RTVL_OK;
goto out_free_config;
case 'T':
cmd = CMD_DUMP_CONFIG;
break;
case 'h':
usage(argv[0]);
- exit(0);
+ exit(RTVL_OK);
case 'u':
cmd = CMD_VALID_PATH;
dev_type = DEV_UEVENT;
case ':':
fprintf(stderr, "Missing option argument\n");
usage(argv[0]);
- exit(1);
+ exit(RTVL_FAIL);
case '?':
fprintf(stderr, "Unknown switch: %s\n", optarg);
usage(argv[0]);
- exit(1);
+ exit(RTVL_FAIL);
default:
usage(argv[0]);
- exit(1);
+ exit(RTVL_FAIL);
}
}
if (getuid() != 0) {
fprintf(stderr, "need to be root\n");
- exit(1);
+ exit(RTVL_FAIL);
}
if (optind < argc) {
/* Failing here is non-fatal */
init_foreign(conf->multipath_dir);
if (cmd == CMD_USABLE_PATHS) {
- r = check_usable_paths(conf, dev, dev_type);
+ r = check_usable_paths(conf, dev, dev_type) ?
+ RTVL_FAIL : RTVL_OK;
goto out;
}
if (cmd == CMD_VALID_PATH &&
if (fd == -1) {
condlog(3, "%s: daemon is not running", dev);
if (!systemd_service_enabled(dev)) {
- r = print_cmd_valid(1, NULL, conf);
+ r = print_cmd_valid(RTVL_NO, NULL, conf);
goto out;
}
} else
switch(delegate_to_multipathd(cmd, dev, dev_type, conf)) {
case DELEGATE_OK:
- exit(0);
+ exit(RTVL_OK);
case DELEGATE_ERROR:
- exit(1);
+ exit(RTVL_FAIL);
case NOT_DELEGATED:
break;
}
goto out;
}
if (dm_get_maps(curmp) == 0)
- r = replace_wwids(curmp);
- if (r == 0)
+ r = replace_wwids(curmp) ? RTVL_FAIL : RTVL_OK;
+ if (r == RTVL_OK)
printf("successfully reset wwids\n");
vector_foreach_slot_backwards(curmp, mpp, i) {
vector_del_slot(curmp, i);
retries = conf->remove_retries;
if (conf->remove == FLUSH_ONE) {
if (dev_type == DEV_DEVMAP) {
- r = dm_suspend_and_flush_map(dev, retries);
+ r = dm_suspend_and_flush_map(dev, retries) ?
+ RTVL_FAIL : RTVL_OK;
} else
condlog(0, "must provide a map name to remove");
goto out;
}
else if (conf->remove == FLUSH_ALL) {
- r = dm_flush_maps(retries);
+ r = dm_flush_maps(retries) ? RTVL_FAIL : RTVL_OK;
goto out;
}
- while ((r = configure(conf, cmd, dev_type, dev)) < 0)
+ while ((r = configure(conf, cmd, dev_type, dev)) == RTVL_RETRY)
condlog(3, "restart multipath configuration process");
out:
* multipath -u must exit with status 0, otherwise udev won't
* import its output.
*/
- if (cmd == CMD_VALID_PATH && dev_type == DEV_UEVENT && r == 1)
- r = 0;
+ if (cmd == CMD_VALID_PATH && dev_type == DEV_UEVENT && r == RTVL_NO)
+ r = RTVL_OK;
if (dev_type == DEV_UEVENT)
closelog();
Generate the path priority based on a latency algorithm.
Requires prio_args keyword.
.TP
+.I ana
+(Hardware-dependent)
+Generate the path priority based on the NVMe ANA settings.
+.TP
.I datacore
(Hardware-dependent)
Generate the path priority for some DataCore storage arrays. Requires prio_args
.
.
.TP
+.B san_path_err_threshold
+If set to a value greater than 0, multipathd will watch paths and check how many
+times a path has been failed due to errors.If the number of failures on a particular
+path is greater then the san_path_err_threshold, then the path will not reinstate
+till san_path_err_recovery_time. These path failures should occur within a
+san_path_err_forget_rate checks, if not we will consider the path is good enough
+to reinstantate. See "Shaky paths detection" below.
+.RS
+.TP
+The default is: \fBno\fR
+.RE
+.
+.
+.TP
+.B san_path_err_forget_rate
+If set to a value greater than 0, multipathd will check whether the path failures
+has exceeded the san_path_err_threshold within this many checks i.e
+san_path_err_forget_rate . If so we will not reinstante the path till
+san_path_err_recovery_time. See "Shaky paths detection" below.
+.RS
+.TP
+The default is: \fBno\fR
+.RE
+.
+.
+.TP
+.B san_path_err_recovery_time
+If set to a value greater than 0, multipathd will make sure that when path failures
+has exceeded the san_path_err_threshold within san_path_err_forget_rate then the path
+will be placed in failed state for san_path_err_recovery_time duration.Once san_path_err_recovery_time
+has timeout we will reinstante the failed path .
+san_path_err_recovery_time value should be in secs.
+See "Shaky paths detection" below.
+.RS
+.TP
+The default is: \fBno\fR
+.RE
+.
+.
+.TP
.B marginal_path_double_failed_time
One of the four parameters of supporting path check based on accounting IO
error such as intermittent error. When a path failed event occurs twice in
other three parameters are set, multipathd will fail the path and enqueue
this path into a queue of which members are sent a couple of continuous
direct reading asynchronous IOs at a fixed sample rate of 10HZ to start IO
-error accounting process.
+error accounting process. See "Shaky paths detection" below.
.RS
.TP
The default is: \fBno\fR
\fImarginal_path_err_recheck_gap_time\fR seconds unless there is only one
active path. After \fImarginal_path_err_recheck_gap_time\fR expires, the path
will be requeueed for rechecking. If checking result is good enough, the
-path will be reinstated.
+path will be reinstated. See "Shaky paths detection" below.
.RS
.TP
The default is: \fBno\fR
error. Refer to \fImarginal_path_err_sample_time\fR. If the rate of IO errors
on a particular path is greater than this parameter, then the path will not
reinstate for \fImarginal_path_err_recheck_gap_time\fR seconds unless there is
-only one active path.
+only one active path. See "Shaky paths detection" below.
.RS
.TP
The default is: \fBno\fR
\fImarginal_path_err_recheck_gap_time\fR seconds. When
\fImarginal_path_err_recheck_gap_time\fR seconds expires, the path will be
requeueed for checking. If checking result is good enough, the path will be
-reinstated, or else it will keep failed.
+reinstated, or else it will keep failed. See "Shaky paths detection" below.
.RS
.TP
The default is: \fBno\fR
If set to a value greater than 0, multipathd will watch paths that have
recently become valid for this many checks. If they fail again while they are
being watched, when they next become valid, they will not be used until they
-have stayed up for \fIdelay_wait_checks\fR checks.
+have stayed up for \fIdelay_wait_checks\fR checks. See "Shaky paths detection" below.
.RS
.TP
The default is: \fBno\fR
If set to a value greater than 0, when a device that has recently come back
online fails again within \fIdelay_watch_checks\fR checks, the next time it
comes back online, it will marked and delayed, and not used until it has passed
-\fIdelay_wait_checks\fR checks.
+\fIdelay_wait_checks\fR checks. See "Shaky paths detection" below.
.RS
.TP
The default is: \fBno\fR
.RS
.PP
The default \fIblacklist\fR consists of the regular expressions
-"^(ram|raw|loop|fd|md|dm-|sr|scd|st|dcssblk)[0-9]" and
+"^(ram|zram|raw|loop|fd|md|dm-|sr|scd|st|dcssblk)[0-9]" and
"^(td|hd|vd)[a-z]". This causes virtual devices, non-disk devices, and some other
device types to be excluded from multipath handling by default.
.RE
.TP
.B deferred_remove
.TP
+.B san_path_err_threshold
+.TP
+.B san_path_err_forget_rate
+.TP
+.B san_path_err_recovery_time
+.TP
.B marginal_path_err_sample_time
.TP
.B marginal_path_err_rate_threshold
.I 1 alua
(Hardware-dependent)
Hardware handler for SCSI-3 ALUA compatible arrays.
+.TP
+.I 1 ana
+(Hardware-dependent)
+Hardware handler for NVMe ANA compatible arrays.
.PP
The default is: \fB<unset>\fR
.PP
.TP
.B deferred_remove
.TP
+.B san_path_err_threshold
+.TP
+.B san_path_err_forget_rate
+.TP
+.B san_path_err_recovery_time
+.TP
.B marginal_path_err_sample_time
.TP
.B marginal_path_err_rate_threshold
.TP
.B deferred_remove
.TP
+.B san_path_err_threshold
+.TP
+.B san_path_err_forget_rate
+.TP
+.B san_path_err_recovery_time
+.TP
.B marginal_path_err_sample_time
.TP
.B marginal_path_err_rate_threshold
.
.
.\" ----------------------------------------------------------------------------
+.SH "Shaky paths detection"
+.\" ----------------------------------------------------------------------------
+.
+A common problem in SAN setups is the occurence of intermittent errors: a
+path is unreachable, then reachable again for a short time, disappears again,
+and so forth. This happens typically on unstable interconnects. It is
+undesirable to switch pathgroups unnecessarily on such frequent, unreliable
+events. \fImultipathd\fR supports three different methods for detecting this
+situation and dealing with it. All methods share the same basic mode of
+operation: If a path is found to be \(dqshaky\(dq or \(dqflipping\(dq,
+and appears to be in healthy status, it is not reinstated (put back to use)
+immediately. Instead, it is watched for some time, and only reinstated
+if the healthy state appears to be stable. The logic of determining
+\(dqshaky\(dq condition, as well as the logic when to reinstate,
+differs between the three methods.
+.TP 8
+.B \(dqdelay_checks\(dq failure tracking
+If a path fails again within a
+\fIdelay_watch_checks\fR interval after a failure, don't
+reinstate it until it passes a \fIdelay_wait_checks\fR interval
+in always good status.
+The intervals are measured in \(dqticks\(dq, i.e. the
+time between path checks by multipathd, which is variable and controlled by the
+\fIpolling_interval\fR and \fImax_polling_interval\fR parameters.
+.TP
+.B \(dqmarginal_path\(dq failure tracking
+If a second failure event (good->bad transition) occurs within
+\fImarginal_path_double_failed_time\fR seconds after a failure, high-frequency
+monitoring is started for the affected path: I/O is sent at a rate of 10 per
+second. This is done for \fImarginal_path_err_sample_time\fR seconds. During
+this period, the path is not reinstated. If the
+rate of errors remains below \fImarginal_path_err_rate_threshold\fR during the
+monitoring period, the path is reinstated. Otherwise, it
+is kept in failed state for \fImarginal_path_err_recheck_gap_time\fR, and
+after that, it is monitored again. For this method, time intervals are measured
+in seconds.
+.TP
+.B \(dqsan_path_err\(dq failure tracking
+multipathd counts path failures for each path. Once the number of failures
+exceeds the value given by \fIsan_path_err_threshold\fR, the path is not
+reinstated for \fIsan_path_err_recovery_time\fR ticks. While counting
+failures, multipathd \(dqforgets\(dq one past failure every
+\(dqsan_path_err_forget_rate\(dq ticks; thus if errors don't occur more
+often then once in the forget rate interval, the failure count doesn't
+increase and the threshold is never reached. As for the \fIdelay_xy\fR method,
+intervals are measured in \(dqticks\(dq.
+.
+.RS 8
+.LP
+This method is \fBdeprecated\fR in favor of the \(dqmarginal_path\(dq failure
+tracking method, and only offered for backward compatibility.
+.
+.RE
+.LP
+See the documentation
+of the individual options above for details.
+It is \fBstrongly discouraged\fR to use more than one of these methods for any
+given multipath map, because the two concurrent methods may interact in
+unpredictable ways. If the \(dqmarginal_path\(dq method is active, the
+\(dqsan_path_err\(dq parameters are implicitly set to 0.
+.
+.
+.\" ----------------------------------------------------------------------------
.SH "KNOWN ISSUES"
.\" ----------------------------------------------------------------------------
.
#include "version.h"
#include <readline/readline.h>
+#include "mpath_cmd.h"
#include "cli.h"
+#include "debug.h"
static vector keys;
static vector handlers;
do { \
if ((a)) { \
char *tmp = (r); \
+ \
+ if (m >= MAX_REPLY_LEN) { \
+ condlog(1, "Warning: max reply length exceeded"); \
+ free(tmp); \
+ r = NULL; \
+ } \
(r) = REALLOC((r), (m) * 2); \
if ((r)) { \
memset((r) + (m), 0, (m)); \
#include "prkey.h"
#include "propsel.h"
#include "main.h"
+#include "mpath_cmd.h"
#include "cli.h"
#include "uevent.h"
#include "foreign.h"
condlog(3, "%s: list path (operator)", param);
pp = find_path_by_dev(vecs->pathvec, param);
+ if (!pp)
+ return 1;
return show_path(reply, len, vecs, pp, "%o");
}
vecs->pathvec, &refwwid);
if (refwwid) {
if (coalesce_paths(vecs, NULL, refwwid,
- FORCE_RELOAD_NONE, CMD_NONE))
+ FORCE_RELOAD_NONE, CMD_NONE)
+ != CP_OK)
condlog(2, "%s: coalesce_paths failed",
param);
dm_lib_release();
}
mpp->action = ACT_RESIZE;
mpp->force_udev_reload = 1;
- if (domap(mpp, params, 1) <= 0) {
+ if (domap(mpp, params, 1) == DOMAP_FAIL) {
condlog(0, "%s: failed to resize map : %s", mpp->alias,
strerror(errno));
mpp->size = orig_size;
while (names->dev) {
uint32_t event_nr;
- if (!dm_is_mpath(names->name))
+ /* Don't delete device if dm_is_mpath() fails without
+ * checking the device type */
+ if (dm_is_mpath(names->name) == 0)
goto next;
event_nr = dm_event_nr(names);
struct dev_event *dev_evt, *old_dev_evt;
int i;
- if (!dm_is_mpath(name)) {
+ /* We know that this is a multipath device, so only fail if
+ * device-mapper tells us that we're wrong */
+ if (dm_is_mpath(name) == 0) {
condlog(0, "%s: not a multipath device. can't watch events",
name);
return -1;
#define LOG_MSG(lvl, verb, pp) \
do { \
- if (lvl <= verb) { \
+ if (pp->mpp && checker_selected(&pp->checker) && \
+ lvl <= verb) { \
if (pp->offline) \
condlog(lvl, "%s: %s - path offline", \
pp->mpp->alias, pp->dev); \
else if (running_state != DAEMON_IDLE) {
struct timespec ts;
- clock_gettime(CLOCK_MONOTONIC, &ts);
- ts.tv_sec += 1;
- rc = pthread_cond_timedwait(&config_cond,
- &config_lock, &ts);
+ if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0) {
+ ts.tv_sec += 1;
+ rc = pthread_cond_timedwait(&config_cond,
+ &config_lock, &ts);
+ }
}
if (!rc) {
running_state = state;
{
/* devices are automatically removed by the dmevent polling code,
* so they don't need to be manually removed here */
+ condlog(3, "%s: removing map from internal tables", mpp->alias);
if (!poll_dmevents)
stop_waiter_thread(mpp, vecs);
remove_map(mpp, vecs, PURGE_VEC);
verify_paths(mpp, vecs);
mpp->action = ACT_RELOAD;
- extract_hwe_from_path(mpp);
if (setup_map(mpp, params, PARAMS_SIZE, vecs)) {
condlog(0, "%s: failed to setup new map in update", mpp->alias);
retries = -1;
goto fail;
}
- if (domap(mpp, params, 1) <= 0 && retries-- > 0) {
+ if (domap(mpp, params, 1) == DOMAP_FAIL && retries-- > 0) {
condlog(0, "%s: map_udate sleep", mpp->alias);
sleep(1);
goto retry;
condlog(2, "%s: map flushed", mpp->alias);
}
- orphan_paths(vecs->pathvec, mpp);
+ orphan_paths(vecs->pathvec, mpp, "map flushed");
remove_map_and_stop_waiter(mpp, vecs);
return 0;
int delayed_reconfig, reassign_maps;
struct config *conf;
- if (!dm_is_mpath(alias)) {
+ if (dm_is_mpath(alias) != 1) {
condlog(4, "%s: not a multipath map", alias);
return 0;
}
goto out;
}
- orphan_paths(vecs->pathvec, mpp);
remove_map_and_stop_waiter(mpp, vecs);
out:
lock_cleanup_pop(vecs->lock);
goto fail; /* leave path added to pathvec */
}
mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
+ if (mpp && pp->size && mpp->size != pp->size) {
+ condlog(0, "%s: failed to add new path %s, device size mismatch", mpp->alias, pp->dev);
+ int i = find_slot(vecs->pathvec, (void *)pp);
+ if (i != -1)
+ vector_del_slot(vecs->pathvec, i);
+ free_path(pp);
+ return 1;
+ }
if (mpp && mpp->wait_for_udev &&
(pathcount(mpp, PATH_UP) > 0 ||
(pathcount(mpp, PATH_GHOST) > 0 && pp->tpgs != TPGS_IMPLICIT &&
pp->mpp = mpp;
rescan:
if (mpp) {
- if (pp->size && mpp->size != pp->size) {
- condlog(0, "%s: failed to add new path %s, "
- "device size mismatch",
- mpp->alias, pp->dev);
- int i = find_slot(vecs->pathvec, (void *)pp);
- if (i != -1)
- vector_del_slot(vecs->pathvec, i);
- free_path(pp);
- return 1;
- }
-
condlog(4,"%s: adopting all paths for path %s",
mpp->alias, pp->dev);
if (adopt_paths(vecs->pathvec, mpp))
verify_paths(mpp, vecs);
mpp->action = ACT_RELOAD;
- extract_hwe_from_path(mpp);
} else {
if (!should_multipath(pp, vecs->pathvec, vecs->mpvec)) {
orphan_path(pp, "only one path");
/*
* reload the map for the multipath mapped device
*/
-retry:
ret = domap(mpp, params, 1);
- if (ret <= 0) {
- if (ret < 0 && retries-- > 0) {
- condlog(0, "%s: retry domap for addition of new "
- "path %s", mpp->alias, pp->dev);
- sleep(1);
- goto retry;
- }
+ while (ret == DOMAP_RETRY && retries-- > 0) {
+ condlog(0, "%s: retry domap for addition of new "
+ "path %s", mpp->alias, pp->dev);
+ sleep(1);
+ ret = domap(mpp, params, 1);
+ }
+ if (ret == DOMAP_FAIL || ret == DOMAP_RETRY) {
condlog(0, "%s: failed in domap for addition of new "
"path %s", mpp->alias, pp->dev);
/*
* reload the map
*/
mpp->action = ACT_RELOAD;
- if (domap(mpp, params, 1) <= 0) {
+ if (domap(mpp, params, 1) == DOMAP_FAIL) {
condlog(0, "%s: failed in domap for "
"removal of path %s",
mpp->alias, pp->dev);
return 0;
}
+static int check_path_reinstate_state(struct path * pp) {
+ struct timespec curr_time;
+
+ /*
+ * This function is only called when the path state changes
+ * from "bad" to "good". pp->state reflects the *previous* state.
+ * If this was "bad", we know that a failure must have occured
+ * beforehand, and count that.
+ * Note that we count path state _changes_ this way. If a path
+ * remains in "bad" state, failure count is not increased.
+ */
+
+ if (!((pp->mpp->san_path_err_threshold > 0) &&
+ (pp->mpp->san_path_err_forget_rate > 0) &&
+ (pp->mpp->san_path_err_recovery_time >0))) {
+ return 0;
+ }
+
+ if (pp->disable_reinstate) {
+ /* If we don't know how much time has passed, automatically
+ * reinstate the path, just to be safe. Also, if there are
+ * no other usable paths, reinstate the path
+ */
+ if (clock_gettime(CLOCK_MONOTONIC, &curr_time) != 0 ||
+ pp->mpp->nr_active == 0) {
+ condlog(2, "%s : reinstating path early", pp->dev);
+ goto reinstate_path;
+ }
+ if ((curr_time.tv_sec - pp->dis_reinstate_time ) > pp->mpp->san_path_err_recovery_time) {
+ condlog(2,"%s : reinstate the path after err recovery time", pp->dev);
+ goto reinstate_path;
+ }
+ return 1;
+ }
+ /* forget errors on a working path */
+ if ((pp->state == PATH_UP || pp->state == PATH_GHOST) &&
+ pp->path_failures > 0) {
+ if (pp->san_path_err_forget_rate > 0){
+ pp->san_path_err_forget_rate--;
+ } else {
+ /* for every san_path_err_forget_rate number of
+ * successful path checks decrement path_failures by 1
+ */
+ pp->path_failures--;
+ pp->san_path_err_forget_rate = pp->mpp->san_path_err_forget_rate;
+ }
+ return 0;
+ }
+
+ /* If the path isn't recovering from a failed state, do nothing */
+ if (pp->state != PATH_DOWN && pp->state != PATH_SHAKY &&
+ pp->state != PATH_TIMEOUT)
+ return 0;
+
+ if (pp->path_failures == 0)
+ pp->san_path_err_forget_rate = pp->mpp->san_path_err_forget_rate;
+
+ pp->path_failures++;
+
+ /* if we don't know the currently time, we don't know how long to
+ * delay the path, so there's no point in checking if we should
+ */
+
+ if (clock_gettime(CLOCK_MONOTONIC, &curr_time) != 0)
+ return 0;
+ /* when path failures has exceeded the san_path_err_threshold
+ * place the path in delayed state till san_path_err_recovery_time
+ * so that the cutomer can rectify the issue within this time. After
+ * the completion of san_path_err_recovery_time it should
+ * automatically reinstate the path
+ */
+ if (pp->path_failures > pp->mpp->san_path_err_threshold) {
+ condlog(2, "%s : hit error threshold. Delaying path reinstatement", pp->dev);
+ pp->dis_reinstate_time = curr_time.tv_sec;
+ pp->disable_reinstate = 1;
+
+ return 1;
+ } else {
+ return 0;
+ }
+
+reinstate_path:
+ pp->path_failures = 0;
+ pp->disable_reinstate = 0;
+ pp->san_path_err_forget_rate = 0;
+ return 0;
+}
+
/*
* Returns '1' if the path has been checked, '-1' if it was blacklisted
* and '0' otherwise
pp->tick = checkint;
newstate = path_offline(pp);
+ if (newstate == PATH_UP) {
+ conf = get_multipath_config();
+ pthread_cleanup_push(put_multipath_config, conf);
+ newstate = get_state(pp, conf, 1, newstate);
+ pthread_cleanup_pop(1);
+ } else {
+ checker_clear_message(&pp->checker);
+ condlog(3, "%s: state %s, checker not called",
+ pp->dev, checker_state_name(newstate));
+ }
/*
* Wait for uevent for removed paths;
* some LLDDs like zfcp keep paths unavailable
if (newstate == PATH_REMOVED)
newstate = PATH_DOWN;
- if (newstate == PATH_UP) {
- conf = get_multipath_config();
- pthread_cleanup_push(put_multipath_config, conf);
- newstate = get_state(pp, conf, 1, newstate);
- pthread_cleanup_pop(1);
- } else
- checker_clear_message(&pp->checker);
-
if (pp->wwid_changed) {
condlog(2, "%s: path wwid has changed. Refusing to use",
pp->dev);
}
if (newstate == PATH_WILD || newstate == PATH_UNCHECKED) {
- condlog(2, "%s: unusable path - checker failed", pp->dev);
+ condlog(2, "%s: unusable path (%s) - checker failed",
+ pp->dev, checker_state_name(newstate));
LOG_MSG(2, verbosity, pp);
conf = get_multipath_config();
pthread_cleanup_push(put_multipath_config, conf);
return 1;
}
if (!pp->mpp) {
- if (!strlen(pp->wwid) && pp->initialized == INIT_FAILED &&
+ if (!strlen(pp->wwid) &&
+ (pp->initialized == INIT_FAILED ||
+ pp->initialized == INIT_NEW) &&
(newstate == PATH_UP || newstate == PATH_GHOST)) {
condlog(2, "%s: add missing path", pp->dev);
conf = get_multipath_config();
if (!pp->mpp)
return 0;
+ if ((newstate == PATH_UP || newstate == PATH_GHOST) &&
+ check_path_reinstate_state(pp)) {
+ pp->state = PATH_DELAYED;
+ return 1;
+ }
+
if (pp->io_err_disable_reinstate && hit_io_err_recheck_time(pp)) {
pp->state = PATH_SHAKY;
/*
unsigned int i;
struct timespec last_time;
struct config *conf;
+ int foreign_tick = 0;
pthread_cleanup_push(rcu_unregister, NULL);
rcu_register_thread();
if (num_paths) {
unsigned int max_checkint;
- condlog(3, "checked %d path%s in %lu.%06lu secs",
+ condlog(4, "checked %d path%s in %lu.%06lu secs",
num_paths, num_paths > 1 ? "s" : "",
diff_time.tv_sec,
diff_time.tv_nsec / 1000);
diff_time.tv_sec);
}
}
- check_foreign();
+
+ if (foreign_tick == 0) {
+ conf = get_multipath_config();
+ foreign_tick = conf->max_checkint;
+ put_multipath_config(conf);
+ }
+ if (--foreign_tick == 0)
+ check_foreign();
+
post_config_state(DAEMON_IDLE);
conf = get_multipath_config();
strict_timing = conf->strict_timing;
ret = coalesce_paths(vecs, mpvec, NULL, force_reload, CMD_NONE);
if (force_reload == FORCE_RELOAD_WEAK)
force_reload = FORCE_RELOAD_YES;
- if (ret) {
+ if (ret != CP_OK) {
condlog(0, "configure failed while coalescing paths");
goto fail;
}
blacklist-test_OBJDEPS := ../libmultipath/blacklist.o
blacklist-test_LIBDEPS := -ludev
-%.out: %-test
+lib/libchecktur.so:
+ mkdir lib
+ ln -t lib ../libmultipath/{checkers,prioritizers,foreign}/*.so
+
+%.out: %-test lib/libchecktur.so
@echo == running $< ==
@LD_LIBRARY_PATH=$(multipathdir):$(mpathcmddir) ./$< >$@
clean: dep_clean
$(RM) $(TESTS:%=%-test) $(TESTS:%=%.out) $(OBJS)
+ $(RM) -rf lib
.SECONDARY: $(OBJS)
static struct udev_device udev = { "sdb", { "ID_FOO", "ID_WWN", "ID_BAR", NULL } };
conf.blist_property = blist_property_wwn;
expect_condlog(3, "sdb: udev property ID_WWN blacklisted\n");
- assert_int_equal(filter_property(&conf, &udev), MATCH_PROPERTY_BLIST);
+ assert_int_equal(filter_property(&conf, &udev, 3),
+ MATCH_PROPERTY_BLIST);
}
/* the property check works different in that you check all the property
static struct udev_device udev = { "sdb", { "ID_FOO", "ID_WWN", "ID_BAR", NULL } };
conf.elist_property = blist_property_wwn;
expect_condlog(3, "sdb: udev property ID_WWN whitelisted\n");
- assert_int_equal(filter_property(&conf, &udev),
+ assert_int_equal(filter_property(&conf, &udev, 3),
MATCH_PROPERTY_BLIST_EXCEPT);
}
static struct udev_device udev = { "sdb", { "ID_FOO", "ID_BAZ", "ID_BAR", NULL } };
conf.blist_property = blist_property_wwn;
expect_condlog(3, "sdb: blacklisted, udev property missing\n");
- assert_int_equal(filter_property(&conf, &udev),
+ assert_int_equal(filter_property(&conf, &udev, 3),
MATCH_PROPERTY_BLIST_MISSING);
}
#include "pgpolicies.h"
#include "test-lib.h"
#include "print.h"
+#include "util.h"
-#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
#define N_CONF_FILES 2
static const char tmplate[] = "/tmp/hwtable-XXXXXX";
static struct key_value defaults[] = {
{ "config_dir", NULL },
{ "bindings_file", NULL },
+ { "multipath_dir", NULL },
{ "detect_prio", "no" },
{ "detect_checker", "no" },
};
char buf[sizeof(tmplate) + sizeof(bindings_name)];
+ char dirbuf[PATH_MAX];
snprintf(buf, sizeof(buf), "%s/%s", hwt->tmpname, bindings_name);
defaults[0].value = hwt->dirname;
defaults[1].value = buf;
+ assert_ptr_not_equal(getcwd(dirbuf, sizeof(dirbuf)), NULL);
+ strncat(dirbuf, "/lib", sizeof(dirbuf));
+ defaults[2].value = dirbuf;
write_section(hwt->config_file, "defaults",
ARRAY_SIZE(defaults), defaults);
}
pp = mock_path("NVME", "NoName");
mp = mock_multipath(pp);
assert_ptr_not_equal(mp, NULL);
- TEST_PROP(pp->checker.name, NONE);
+ TEST_PROP(checker_name(&pp->checker), NONE);
TEST_PROP(pp->uid_attribute, "ID_WWN");
assert_int_equal(mp->pgpolicy, DEFAULT_PGPOLICY);
assert_int_equal(mp->no_path_retry, DEFAULT_NO_PATH_RETRY);
default_wwid_1);
mp = mock_multipath(pp);
assert_ptr_not_equal(mp, NULL);
- TEST_PROP(pp->checker.name, NONE);
+ TEST_PROP(checker_name(&pp->checker), NONE);
TEST_PROP(pp->uid_attribute, "ID_WWN");
assert_int_equal(mp->pgpolicy, MULTIBUS);
assert_int_equal(mp->no_path_retry, NO_PATH_RETRY_QUEUE);
pp = mock_path(vnd_foo.value, prd_baz.value);
TEST_PROP(prio_name(&pp->prio), prio_emc.value);
TEST_PROP(pp->getuid, NULL);
- TEST_PROP(pp->checker.name, chk_hp.value);
+ TEST_PROP(checker_name(&pp->checker), chk_hp.value);
/* boo:baz matches kv1 */
pp = mock_path(vnd_boo.value, prd_baz.value);
TEST_PROP(prio_name(&pp->prio), prio_emc.value);
TEST_PROP(pp->getuid, NULL);
- TEST_PROP(pp->checker.name, chk_hp.value);
+ TEST_PROP(checker_name(&pp->checker), chk_hp.value);
/* .oo:ba. matches kv1 */
pp = mock_path(vnd__oo.value, prd_ba_.value);
TEST_PROP(prio_name(&pp->prio), prio_emc.value);
TEST_PROP(pp->getuid, NULL);
- TEST_PROP(pp->checker.name, chk_hp.value);
+ TEST_PROP(checker_name(&pp->checker), chk_hp.value);
/* .foo:(bar|baz|ba\.) doesn't match */
pp = mock_path(vnd__oo.value, prd_ba_s.value);
TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO);
TEST_PROP(pp->getuid, NULL);
- TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+ TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
/* foo:bar matches kv2 and kv1 */
pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID);
TEST_PROP(prio_name(&pp->prio), prio_hds.value);
TEST_PROP(pp->getuid, gui_foo.value);
- TEST_PROP(pp->checker.name, chk_hp.value);
+ TEST_PROP(checker_name(&pp->checker), chk_hp.value);
}
static int setup_regex_string_hwe(void **state)
pp = mock_path(vnd_foo.value, prd_baz.value);
TEST_PROP(prio_name(&pp->prio), prio_emc.value);
TEST_PROP(pp->getuid, NULL);
- TEST_PROP(pp->checker.name, chk_hp.value);
+ TEST_PROP(checker_name(&pp->checker), chk_hp.value);
/* boo:baz matches kv1 */
pp = mock_path(vnd_boo.value, prd_baz.value);
TEST_PROP(prio_name(&pp->prio), prio_emc.value);
TEST_PROP(pp->getuid, NULL);
- TEST_PROP(pp->checker.name, chk_hp.value);
+ TEST_PROP(checker_name(&pp->checker), chk_hp.value);
/* .oo:ba. matches kv1 */
pp = mock_path(vnd__oo.value, prd_ba_.value);
TEST_PROP(prio_name(&pp->prio), prio_emc.value);
TEST_PROP(pp->getuid, NULL);
- TEST_PROP(pp->checker.name, chk_hp.value);
+ TEST_PROP(checker_name(&pp->checker), chk_hp.value);
/* .oo:(bar|baz|ba\.)$ doesn't match */
pp = mock_path(vnd__oo.value, prd_ba_s.value);
TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO);
TEST_PROP(pp->getuid, NULL);
- TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+ TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
/* foo:bar matches kv2 */
pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID);
/* Later match takes prio */
TEST_PROP(prio_name(&pp->prio), prio_hds.value);
TEST_PROP(pp->getuid, gui_foo.value);
- TEST_PROP(pp->checker.name, chk_hp.value);
+ TEST_PROP(checker_name(&pp->checker), chk_hp.value);
}
static int setup_regex_string_hwe_dir(void **state)
TEST_PROP(prio_name(&pp->prio), prio_emc.value);
TEST_PROP(pp->getuid, NULL);
TEST_PROP(pp->uid_attribute, DEFAULT_UID_ATTRIBUTE);
- TEST_PROP(pp->checker.name, chk_hp.value);
+ TEST_PROP(checker_name(&pp->checker), chk_hp.value);
/* boo:baz doesn't match */
pp = mock_path(vnd_boo.value, prd_baz.value);
TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO);
TEST_PROP(pp->getuid, NULL);
TEST_PROP(pp->uid_attribute, DEFAULT_UID_ATTRIBUTE);
- TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+ TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
/* foo:bar matches kv2 and kv1 */
pp = mock_path(vnd_foo.value, prd_bar.value);
TEST_PROP(prio_name(&pp->prio), prio_hds.value);
TEST_PROP(pp->getuid, NULL);
TEST_PROP(pp->uid_attribute, uid_baz.value);
- TEST_PROP(pp->checker.name, chk_hp.value);
+ TEST_PROP(checker_name(&pp->checker), chk_hp.value);
/* foo:barz matches kv3 and kv2 and kv1 */
pp = mock_path_flags(vnd_foo.value, prd_barz.value, USE_GETUID);
TEST_PROP(prio_name(&pp->prio), prio_rdac.value);
TEST_PROP(pp->getuid, gui_foo.value);
TEST_PROP(pp->uid_attribute, NULL);
- TEST_PROP(pp->checker.name, chk_hp.value);
+ TEST_PROP(checker_name(&pp->checker), chk_hp.value);
}
static int setup_regex_2_strings_hwe_dir(void **state)
pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID);
TEST_PROP(prio_name(&pp->prio), prio_emc.value);
TEST_PROP(pp->getuid, gui_foo.value);
- TEST_PROP(pp->checker.name, chk_hp.value);
+ TEST_PROP(checker_name(&pp->checker), chk_hp.value);
/* foo:baz matches kv1 */
pp = mock_path(vnd_foo.value, prd_baz.value);
TEST_PROP(prio_name(&pp->prio), prio_emc.value);
TEST_PROP(pp->getuid, NULL);
- TEST_PROP(pp->checker.name, chk_hp.value);
+ TEST_PROP(checker_name(&pp->checker), chk_hp.value);
/* boo:baz matches kv1 */
pp = mock_path(vnd_boo.value, prd_baz.value);
TEST_PROP(prio_name(&pp->prio), prio_emc.value);
TEST_PROP(pp->getuid, NULL);
- TEST_PROP(pp->checker.name, chk_hp.value);
+ TEST_PROP(checker_name(&pp->checker), chk_hp.value);
/* .oo:ba. matches kv1 */
pp = mock_path(vnd__oo.value, prd_ba_.value);
TEST_PROP(prio_name(&pp->prio), prio_emc.value);
TEST_PROP(pp->getuid, NULL);
- TEST_PROP(pp->checker.name, chk_hp.value);
+ TEST_PROP(checker_name(&pp->checker), chk_hp.value);
/* .oo:(bar|baz|ba\.)$ doesn't match */
pp = mock_path(vnd__oo.value, prd_ba_s.value);
TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO);
TEST_PROP(pp->getuid, NULL);
- TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+ TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
}
static int setup_string_regex_hwe_dir(void **state)
pp = mock_path(vnd_foo.value, prd_baz.value);
TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO);
TEST_PROP(pp->getuid, NULL);
- TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+ TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
/* foo:bar matches both */
pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID);
TEST_PROP(prio_name(&pp->prio), prio_hds.value);
TEST_PROP(pp->getuid, gui_foo.value);
- TEST_PROP(pp->checker.name, chk_hp.value);
+ TEST_PROP(checker_name(&pp->checker), chk_hp.value);
}
static int setup_2_ident_strings_hwe(void **state)
pp = mock_path(vnd_foo.value, prd_baz.value);
TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO);
TEST_PROP(pp->getuid, NULL);
- TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+ TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
/* foo:bar matches both */
pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID);
TEST_PROP(prio_name(&pp->prio), prio_hds.value);
TEST_PROP(pp->getuid, gui_foo.value);
- TEST_PROP(pp->checker.name, chk_hp.value);
+ TEST_PROP(checker_name(&pp->checker), chk_hp.value);
}
static int setup_2_ident_strings_both_dir(void **state)
pp = mock_path(vnd_foo.value, prd_baz.value);
TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO);
TEST_PROP(pp->getuid, NULL);
- TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+ TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
/* foo:bar matches both */
pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID);
TEST_PROP(prio_name(&pp->prio), prio_hds.value);
TEST_PROP(pp->getuid, gui_foo.value);
- TEST_PROP(pp->checker.name, chk_hp.value);
+ TEST_PROP(checker_name(&pp->checker), chk_hp.value);
}
static int setup_2_ident_strings_both_dir_w_prev(void **state)
pp = mock_path(vnd_foo.value, prd_baz.value);
TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO);
TEST_PROP(pp->getuid, NULL);
- TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+ TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
/* foo:bar matches both */
pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID);
TEST_PROP(prio_name(&pp->prio), prio_hds.value);
TEST_PROP(pp->getuid, gui_foo.value);
- TEST_PROP(pp->checker.name, chk_hp.value);
+ TEST_PROP(checker_name(&pp->checker), chk_hp.value);
}
static int setup_2_ident_strings_hwe_dir(void **state)
pp = mock_path(vnd_foo.value, prd_baz.value);
TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO);
TEST_PROP(pp->getuid, NULL);
- TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+ TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
/* foo:bar matches both */
pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID);
TEST_PROP(prio_name(&pp->prio), prio_hds.value);
TEST_PROP(pp->getuid, gui_foo.value);
- TEST_PROP(pp->checker.name, chk_hp.value);
+ TEST_PROP(checker_name(&pp->checker), chk_hp.value);
}
static int setup_3_ident_strings_hwe_dir(void **state)
pp = mock_path(vnd_foo.value, prd_baz.value);
TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO);
TEST_PROP(pp->getuid, NULL);
- TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+ TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
/* foo:bar matches both */
pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID);
TEST_PROP(prio_name(&pp->prio), prio_hds.value);
TEST_PROP(pp->getuid, gui_foo.value);
- TEST_PROP(pp->checker.name, chk_hp.value);
+ TEST_PROP(checker_name(&pp->checker), chk_hp.value);
}
static int setup_2_ident_self_matching_re_hwe_dir(void **state)
pp = mock_path(vnd_foo.value, prd_baz.value);
TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO);
TEST_PROP(pp->getuid, NULL);
- TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+ TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
/* foo:bar matches */
pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID);
TEST_PROP(prio_name(&pp->prio), prio_hds.value);
TEST_PROP(pp->getuid, gui_foo.value);
- TEST_PROP(pp->checker.name, chk_hp.value);
+ TEST_PROP(checker_name(&pp->checker), chk_hp.value);
}
static int setup_2_ident_self_matching_re_hwe(void **state)
pp = mock_path(vnd_foo.value, prd_baz.value);
TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO);
TEST_PROP(pp->getuid, NULL);
- TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+ TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
/* foo:bar matches both */
pp = mock_path_flags(vnd_foo.value, prd_bar.value, USE_GETUID);
TEST_PROP(prio_name(&pp->prio), prio_hds.value);
TEST_PROP(pp->getuid, gui_foo.value);
- TEST_PROP(pp->checker.name, chk_hp.value);
+ TEST_PROP(checker_name(&pp->checker), chk_hp.value);
}
static int setup_2_ident_not_self_matching_re_hwe_dir(void **state)
pp = mock_path(vnd_foo.value, prd_bar.value);
TEST_PROP(prio_name(&pp->prio), prio_emc.value);
TEST_PROP(pp->getuid, NULL);
- TEST_PROP(pp->checker.name, chk_hp.value);
+ TEST_PROP(checker_name(&pp->checker), chk_hp.value);
/* foo:bay matches k1 and k2 */
pp = mock_path_flags(vnd_foo.value, "bay", USE_GETUID);
TEST_PROP(prio_name(&pp->prio), prio_hds.value);
TEST_PROP(pp->getuid, gui_foo.value);
- TEST_PROP(pp->checker.name, chk_hp.value);
+ TEST_PROP(checker_name(&pp->checker), chk_hp.value);
/* foo:baz matches k2 only. */
pp = mock_path_flags(vnd_foo.value, prd_baz.value, USE_GETUID);
TEST_PROP(prio_name(&pp->prio), prio_hds.value);
TEST_PROP(pp->getuid, gui_foo.value);
- TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+ TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
}
static int setup_2_matching_res_hwe_dir(void **state)
pp = mock_path(vnd_foo.value, prd_bar.value);
TEST_PROP(prio_name(&pp->prio), DEFAULT_PRIO);
TEST_PROP(pp->getuid, NULL);
- TEST_PROP(pp->checker.name, DEFAULT_CHECKER);
+ TEST_PROP(checker_name(&pp->checker), DEFAULT_CHECKER);
pp = mock_path_flags(vnd_foo.value, prd_baz.value, USE_GETUID);
TEST_PROP(prio_name(&pp->prio), prio_hds.value);
TEST_PROP(pp->getuid, gui_foo.value);
- TEST_PROP(pp->checker.name, chk_hp.value);
+ TEST_PROP(checker_name(&pp->checker), chk_hp.value);
}
static int setup_2_nonmatching_res_hwe_dir(void **state)
#include "globals.c"
+#define BITARR_SZ 4
+
static void test_basenamecpy_good0(void **state)
{
char dst[10];
assert_int_equal(basenamecpy("baz/qux", NULL, sizeof(dst)), 0);
}
+static void test_bitmask_1(void **state)
+{
+ uint64_t arr[BITARR_SZ];
+ int i, j, k, m, b;
+
+ memset(arr, 0, sizeof(arr));
+
+ for (j = 0; j < BITARR_SZ; j++) {
+ for (i = 0; i < 64; i++) {
+ b = 64 * j + i;
+ assert(!is_bit_set_in_array(b, arr));
+ set_bit_in_array(b, arr);
+ for (k = 0; k < BITARR_SZ; k++) {
+ printf("b = %d j = %d k = %d a = %"PRIx64"\n",
+ b, j, k, arr[k]);
+ if (k == j)
+ assert_int_equal(arr[j], 1ULL << i);
+ else
+ assert_int_equal(arr[k], 0ULL);
+ }
+ for (m = 0; m < 64; m++)
+ if (i == m)
+ assert(is_bit_set_in_array(64 * j + m,
+ arr));
+ else
+ assert(!is_bit_set_in_array(64 * j + m,
+ arr));
+ clear_bit_in_array(b, arr);
+ assert(!is_bit_set_in_array(b, arr));
+ for (k = 0; k < BITARR_SZ; k++)
+ assert_int_equal(arr[k], 0ULL);
+ }
+ }
+}
+
+static void test_bitmask_2(void **state)
+{
+ uint64_t arr[BITARR_SZ];
+ int i, j, k, m, b;
+
+ memset(arr, 0, sizeof(arr));
+
+ for (j = 0; j < BITARR_SZ; j++) {
+ for (i = 0; i < 64; i++) {
+ b = 64 * j + i;
+ assert(!is_bit_set_in_array(b, arr));
+ set_bit_in_array(b, arr);
+ for (m = 0; m < 64; m++)
+ if (m <= i)
+ assert(is_bit_set_in_array(64 * j + m,
+ arr));
+ else
+ assert(!is_bit_set_in_array(64 * j + m,
+ arr));
+ assert(is_bit_set_in_array(b, arr));
+ for (k = 0; k < BITARR_SZ; k++) {
+ if (k < j || (k == j && i == 63))
+ assert_int_equal(arr[k], ~0ULL);
+ else if (k > j)
+ assert_int_equal(arr[k], 0ULL);
+ else
+ assert_int_equal(
+ arr[k],
+ (1ULL << (i + 1)) - 1);
+ }
+ }
+ }
+ for (j = 0; j < BITARR_SZ; j++) {
+ for (i = 0; i < 64; i++) {
+ b = 64 * j + i;
+ assert(is_bit_set_in_array(b, arr));
+ clear_bit_in_array(b, arr);
+ for (m = 0; m < 64; m++)
+ if (m <= i)
+ assert(!is_bit_set_in_array(64 * j + m,
+ arr));
+ else
+ assert(is_bit_set_in_array(64 * j + m,
+ arr));
+ assert(!is_bit_set_in_array(b, arr));
+ for (k = 0; k < BITARR_SZ; k++) {
+ if (k < j || (k == j && i == 63))
+ assert_int_equal(arr[k], 0ULL);
+ else if (k > j)
+ assert_int_equal(arr[k], ~0ULL);
+ else
+ assert_int_equal(
+ arr[k],
+ ~((1ULL << (i + 1)) - 1));
+ }
+ }
+ }
+}
+
int test_basenamecpy(void)
{
const struct CMUnitTest tests[] = {
cmocka_unit_test(test_basenamecpy_bad3),
cmocka_unit_test(test_basenamecpy_bad4),
cmocka_unit_test(test_basenamecpy_bad5),
+ cmocka_unit_test(test_bitmask_1),
+ cmocka_unit_test(test_bitmask_2),
};
return cmocka_run_group_tests(tests, NULL, NULL);
}