[multipathd] add "show wildcards" cli command
[platform/upstream/multipath-tools.git] / multipathd / main.c
index b0860bc..84fb5c4 100644 (file)
@@ -1,20 +1,24 @@
+/*
+ * Copyright (c) 2004, 2005 Christophe Varoqui
+ * Copyright (c) 2005 Kiyoshi Ueda, NEC
+ * Copyright (c) 2005 Benjamin Marzinski, Redhat
+ * Copyright (c) 2005 Edward Goggin, EMC
+ */
 #include <unistd.h>
 #include <sys/stat.h>
 #include <libdevmapper.h>
 #include <wait.h>
 #include <sys/mman.h>
-
-/*
- * libsysfs
- */
-#include <sysfs/libsysfs.h>
-#include <sysfs/dlist.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/time.h>
+#include <sys/resource.h>
 
 /*
  * libcheckers
  */
 #include <checkers.h>
-#include <path_state.h>
 
 /*
  * libmultipath
 #include <vector.h>
 #include <memory.h>
 #include <config.h>
-#include <callout.h>
 #include <util.h>
-#include <blacklist.h>
 #include <hwtable.h>
 #include <defaults.h>
 #include <structs.h>
+#include <callout.h>
+#include <blacklist.h>
+#include <structs_vec.h>
 #include <dmparser.h>
 #include <devmapper.h>
+#include <sysfs.h>
 #include <dict.h>
 #include <discovery.h>
 #include <debug.h>
 #include <propsel.h>
 #include <uevent.h>
 #include <switchgroup.h>
-#include <path_state.h>
 #include <print.h>
+#include <configure.h>
+#include <prio.h>
 
 #include "main.h"
 #include "pidfile.h"
 #include "uxclnt.h"
 #include "cli.h"
 #include "cli_handlers.h"
+#include "lock.h"
+#include "waiter.h"
 
 #define FILE_NAME_SIZE 256
 #define CMDSIZE 160
 
 #define LOG_MSG(a,b) \
-       if (strlen(b)) { \
-               condlog(a, "%s: %s", pp->dev_t, b); \
-               memset(b, 0, MAX_CHECKER_MSG_SIZE); \
-       }
-
-#ifdef LCKDBG
-#define lock(a) \
-       fprintf(stderr, "%s:%s(%i) lock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
-       pthread_mutex_lock(a)
-#define unlock(a) \
-       fprintf(stderr, "%s:%s(%i) unlock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
-       pthread_mutex_unlock(a)
-#define lock_cleanup_pop(a) \
-       fprintf(stderr, "%s:%s(%i) unlock %p\n", __FILE__, __FUNCTION__, __LINE__, a); \
-       pthread_cleanup_pop(1);
-#else
-#define lock(a) pthread_mutex_lock(a)
-#define unlock(a) pthread_mutex_unlock(a)
-#define lock_cleanup_pop(a) pthread_cleanup_pop(1);
-#endif
+       if (strlen(b)) condlog(a, "%s: %s", pp->dev, b);
 
 pthread_cond_t exit_cond = PTHREAD_COND_INITIALIZER;
 pthread_mutex_t exit_mutex = PTHREAD_MUTEX_INITIALIZER;
 
+int logsink;
+
 /*
- * structs
+ * global copy of vecs for use in sig handlers
  */
-struct event_thread {
-       struct dm_task *dmt;
-       pthread_t thread;
-       int event_nr;
-       char mapname[WWID_SIZE];
-       struct vectors *vecs;
-};
+struct vectors * gvecs;
 
-static struct event_thread *
-alloc_waiter (void)
+static int
+need_switch_pathgroup (struct multipath * mpp, int refresh)
 {
+       struct pathgroup * pgp;
+       struct path * pp;
+       unsigned int i, j;
 
-       struct event_thread * wp;
+       if (!mpp || mpp->pgfailback == -FAILBACK_MANUAL)
+               return 0;
 
-       wp = (struct event_thread *)MALLOC(sizeof(struct event_thread));
+       /*
+        * Refresh path priority values
+        */
+       if (refresh)
+               vector_foreach_slot (mpp->pg, pgp, i)
+                       vector_foreach_slot (pgp->paths, pp, j)
+                               pathinfo(pp, conf->hwtable, DI_PRIO);
 
-       return wp;
-}
+       mpp->bestpg = select_path_group(mpp);
 
-static void
-free_waiter (void * data)
-{
-       struct event_thread * wp = (struct event_thread *)data;
+       if (mpp->bestpg != mpp->nextpg)
+               return 1;
 
-       if (wp->dmt)
-               dm_task_destroy(wp->dmt);
-       FREE(wp);
+       return 0;
 }
 
 static void
-stop_waiter_thread (struct multipath * mpp, struct vectors * vecs)
+switch_pathgroup (struct multipath * mpp)
 {
-       struct event_thread * wp = (struct event_thread *)mpp->waiter;
-       pthread_t thread;
-       
-       if (!wp) {
-               condlog(3, "%s: no waiter thread", mpp->alias);
-               return;
-       }
-       thread = wp->thread;
-
-       if (!wp) {
-               condlog(3, "%s: thread not started", mpp->alias);
-               return;
-       }
-       condlog(2, "%s: stop event checker thread", wp->mapname);
-       pthread_kill(thread, SIGHUP);
+       mpp->stat_switchgroup++;
+       dm_switchgroup(mpp->alias, mpp->bestpg);
+       condlog(2, "%s: switch to path group #%i",
+                mpp->alias, mpp->bestpg);
 }
 
-static void
-cleanup_lock (void * data)
+static int
+coalesce_maps(struct vectors *vecs, vector nmpv)
 {
-       pthread_mutex_unlock((pthread_mutex_t *)data);
-}
+       struct multipath * ompp;
+       vector ompv = vecs->mpvec;
+       unsigned int i;
+       int j;
 
-static void
-adopt_paths (struct vectors * vecs, struct multipath * mpp)
-{
-       int i;
-       struct path * pp;
+       vector_foreach_slot (ompv, ompp, i) {
+               if (!find_mp_by_wwid(nmpv, ompp->wwid)) {
+                       /*
+                        * remove all current maps not allowed by the
+                        * current configuration
+                        */
+                       if (dm_flush_map(ompp->alias)) {
+                               condlog(0, "%s: unable to flush devmap",
+                                       ompp->alias);
+                               /*
+                                * may be just because the device is open
+                                */
+                               if (!vector_alloc_slot(nmpv))
+                                       return 1;
 
-       if (!mpp)
-               return;
+                               vector_set_slot(nmpv, ompp);
+                               setup_multipath(vecs, ompp);
 
-       vector_foreach_slot (vecs->pathvec, pp, i) {
-               if (!strncmp(mpp->wwid, pp->wwid, WWID_SIZE)) {
-                       condlog(4, "%s ownership set", pp->dev_t);
-                       pp->mpp = mpp;
+                               if ((j = find_slot(ompv, (void *)ompp)) != -1)
+                                       vector_del_slot(ompv, j);
+
+                               continue;
+                       }
+                       else {
+                               dm_lib_release();
+                               condlog(2, "%s devmap removed", ompp->alias);
+                       }
                }
        }
+       return 0;
 }
 
 static void
-orphan_path (struct path * pp)
+sync_map_state(struct multipath *mpp)
 {
-       pp->mpp = NULL;
-       pp->checkfn = NULL;
-       pp->dmstate = PSTATE_UNDEF;
-       pp->checker_context = NULL;
-       pp->getuid = NULL;
-       pp->getprio = NULL;
+       struct pathgroup *pgp;
+       struct path *pp;
+       unsigned int i, j;
 
-       if (pp->fd >= 0)
-               close(pp->fd);
+       if (!mpp->pg)
+               return;
 
-       pp->fd = -1;
+       vector_foreach_slot (mpp->pg, pgp, i){
+               vector_foreach_slot (pgp->paths, pp, j){
+                       if (pp->state == PATH_UNCHECKED || 
+                           pp->state == PATH_WILD)
+                               continue;
+                       if ((pp->dmstate == PSTATE_FAILED ||
+                            pp->dmstate == PSTATE_UNDEF) &&
+                           (pp->state == PATH_UP || pp->state == PATH_GHOST))
+                               dm_reinstate_path(mpp->alias, pp->dev_t);
+                       else if ((pp->dmstate == PSTATE_ACTIVE ||
+                                 pp->dmstate == PSTATE_UNDEF) &&
+                                (pp->state == PATH_DOWN ||
+                                 pp->state == PATH_SHAKY))
+                               dm_fail_path(mpp->alias, pp->dev_t);
+               }
+       }
 }
 
 static void
-orphan_paths (struct vectors * vecs, struct multipath * mpp)
+sync_maps_state(vector mpvec)
 {
-       int i;
-       struct path * pp;
+       unsigned int i;
+       struct multipath *mpp;
 
-       vector_foreach_slot (vecs->pathvec, pp, i) {
-               if (pp->mpp == mpp) {
-                       condlog(4, "%s is orphaned", pp->dev_t);
-                       orphan_path(pp);
-               }
-       }
+       vector_foreach_slot (mpvec, mpp, i)
+               sync_map_state(mpp);
 }
 
 static int
-update_multipath_table (struct multipath *mpp, vector pathvec)
+flush_map(struct multipath * mpp, struct vectors * vecs)
 {
-       if (!mpp)
-               return 1;
-
-       if (dm_get_map(mpp->alias, &mpp->size, mpp->params))
+       /*
+        * clear references to this map before flushing so we can ignore
+        * the spurious uevent we may generate with the dm_flush_map call below
+        */
+       if (dm_flush_map(mpp->alias)) {
+               /*
+                * May not really be an error -- if the map was already flushed
+                * from the device mapper by dmsetup(8) for instance.
+                */
+               condlog(0, "%s: can't flush", mpp->alias);
                return 1;
+       }
+       else {
+               dm_lib_release();
+               condlog(2, "%s: devmap removed", mpp->alias);
+       }
 
-       if (disassemble_map(pathvec, mpp->params, mpp))
-               return 1;
+       orphan_paths(vecs->pathvec, mpp);
+       remove_map_and_stop_waiter(mpp, vecs, 1);
 
        return 0;
 }
 
 static int
-update_multipath_status (struct multipath *mpp)
+uev_add_map (struct sysfs_device * dev, struct vectors * vecs)
 {
-       if (!mpp)
-               return 1;
-
-       if(dm_get_status(mpp->alias, mpp->status))
-               return 1;
-
-       if (disassemble_status(mpp->status, mpp))
-               return 1;
-
-       return 0;
+       condlog(2, "%s: add map (uevent)", dev->kernel);
+       return ev_add_map(dev, vecs);
 }
 
-static int
-update_multipath_strings (struct multipath *mpp, vector pathvec)
+int
+ev_add_map (struct sysfs_device * dev, struct vectors * vecs)
 {
-       if (mpp->selector) {
-               FREE(mpp->selector);
-               mpp->selector = NULL;
-       }
-
-       if (mpp->features) {
-               FREE(mpp->features);
-               mpp->features = NULL;
-       }
-
-       if (mpp->hwhandler) {
-               FREE(mpp->hwhandler);
-               mpp->hwhandler = NULL;
-       }
+       char * alias;
+       char *dev_t;
+       int major, minor;
+       char * refwwid;
+       struct multipath * mpp;
+       int map_present;
+       int r = 1;
 
-       free_pgvec(mpp->pg, KEEP_PATHS);
-       mpp->pg = NULL;
+       dev_t = sysfs_attr_get_value(dev->devpath, "dev");
 
-       if (update_multipath_table(mpp, pathvec))
+       if (!dev_t || sscanf(dev_t, "%d:%d", &major, &minor) != 2)
                return 1;
 
-       if (update_multipath_status(mpp))
-               return 1;
-
-       return 0;
-}
+       alias = dm_mapname(major, minor);
 
-static void
-set_multipath_wwid (struct multipath * mpp)
-{
-       if (mpp->wwid)
-               return;
+       if (!alias)
+               return 1;
 
-       dm_get_uuid(mpp->alias, mpp->wwid);
-}
+       map_present = dm_map_present(alias);
 
-static int
-pathcount (struct multipath *mpp, int state)
-{
-       struct pathgroup *pgp;
-       struct path *pp;
-       int i, j;
-       int count = 0;
+       if (map_present && dm_type(alias, TGT_MPATH) <= 0) {
+               condlog(4, "%s: not a multipath map", alias);
+               return 0;
+       }
 
-       vector_foreach_slot (mpp->pg, pgp, i)
-               vector_foreach_slot (pgp->paths, pp, j)
-                       if (pp->state == state)
-                               count++;
-       return count;
-}
+       mpp = find_mp_by_alias(vecs->mpvec, alias);
 
-/*
- * mpp->no_path_retry:
- *   -2 (QUEUE) : queue_if_no_path enabled, never turned off
- *   -1 (FAIL)  : fail_if_no_path
- *    0 (UNDEF) : nothing
- *   >0         : queue_if_no_path enabled, turned off after polling n times
- */
-static void
-update_queue_mode_del_path(struct multipath *mpp)
-{
-       if (--mpp->nr_active == 0 && mpp->no_path_retry > 0) {
+       if (mpp) {
                /*
-                * Enter retry mode.
-                * meaning of +1: retry_tick may be decremented in
-                *                checkerloop before starting retry.
+                * Not really an error -- we generate our own uevent
+                * if we create a multipath mapped device as a result
+                * of uev_add_path
                 */
-               mpp->retry_tick = mpp->no_path_retry * conf->checkint + 1;
-               condlog(1, "%s: Entering recovery mode: max_retries=%d",
-                       mpp->alias, mpp->no_path_retry);
+               condlog(0, "%s: devmap already registered",
+                       dev->kernel);
+               return 0;
        }
-       condlog(2, "%s: remaining active paths: %d", mpp->alias, mpp->nr_active);
-}
 
-static void
-update_queue_mode_add_path(struct multipath *mpp)
-{
-       if (mpp->nr_active++ == 0 && mpp->no_path_retry > 0) {
-               /* come back to normal mode from retry mode */
-               mpp->retry_tick = 0;
-               dm_queue_if_no_path(mpp->alias, 1);
-               condlog(2, "%s: queue_if_no_path enabled", mpp->alias);
-               condlog(1, "%s: Recovered to normal mode", mpp->alias);
+       /*
+        * now we can register the map
+        */
+       if (map_present && (mpp = add_map_without_path(vecs, minor, alias))) {
+               sync_map_state(mpp);
+               condlog(2, "%s: devmap %s added", alias, dev->kernel);
+               return 0;
        }
-       condlog(2, "%s: remaining active paths: %d", mpp->alias, mpp->nr_active);
-}
+       refwwid = get_refwwid(dev->kernel, DEV_DEVMAP, vecs->pathvec);
 
-static void
-set_no_path_retry(struct multipath *mpp)
-{
-       mpp->retry_tick = 0;
-       mpp->nr_active = pathcount(mpp, PATH_UP);
-       select_no_path_retry(mpp);
-
-       switch (mpp->no_path_retry) {
-       case NO_PATH_RETRY_UNDEF:
-               break;
-       case NO_PATH_RETRY_FAIL:
-               dm_queue_if_no_path(mpp->alias, 0);
-               break;
-       case NO_PATH_RETRY_QUEUE:
-               dm_queue_if_no_path(mpp->alias, 1);
-               break;
-       default:
-               dm_queue_if_no_path(mpp->alias, 1);
-               if (mpp->nr_active == 0) {
-                       /* Enter retry mode */
-                       mpp->retry_tick = mpp->no_path_retry * conf->checkint;
-                       condlog(1, "%s: Entering recovery mode: max_retries=%d",
-                               mpp->alias, mpp->no_path_retry);
-               }
-               break;
+       if (refwwid) {
+               r = coalesce_paths(vecs, NULL, refwwid, 0);
+               dm_lib_release();
        }
-}
-
-static struct hwentry *
-extract_hwe_from_path(struct multipath * mpp)
-{
-       struct path * pp;
-       struct pathgroup * pgp;
 
-       pgp = VECTOR_SLOT(mpp->pg, 0);
-       pp = VECTOR_SLOT(pgp->paths, 0);
+       if (!r)
+               condlog(2, "%s: devmap %s added", alias, dev->kernel);
+       else
+               condlog(0, "%s: uev_add_map %s failed", alias, dev->kernel);
 
-       return pp->hwe;
+       FREE(refwwid);
+       return r;
 }
 
-static void
-remove_map (struct multipath * mpp, struct vectors * vecs)
+static int
+uev_remove_map (struct sysfs_device * dev, struct vectors * vecs)
 {
-       int i;
-
-       stop_waiter_thread(mpp, vecs);
-
-       /*
-        * clear references to this map
-        */
-       orphan_paths(vecs, mpp);
-
-       /*
-        * purge the multipath vector
-        */
-       i = find_slot(vecs->mpvec, (void *)mpp);
-       vector_del_slot(vecs->mpvec, i);
-
-       /*
-        * final free
-        */
-       free_multipath(mpp, KEEP_PATHS);
-       mpp = NULL;
+       condlog(2, "%s: remove map (uevent)", dev->kernel);
+       return ev_remove_map(dev->kernel, vecs);
 }
 
-static void
-remove_maps (struct vectors * vecs)
+int
+ev_remove_map (char * devname, struct vectors * vecs)
 {
-       int i;
        struct multipath * mpp;
 
-       vector_foreach_slot (vecs->mpvec, mpp, i) {
-               remove_map(mpp, vecs);
-               i--;
+       mpp = find_mp_by_str(vecs->mpvec, devname);
+
+       if (!mpp) {
+               condlog(2, "%s: devmap not registered, can't remove",
+                       devname);
+               return 0;
        }
+       flush_map(mpp, vecs);
 
-       vector_free(vecs->mpvec);
-       vecs->mpvec = NULL;
+       return 0;
 }
 
 static int
-setup_multipath (struct vectors * vecs, struct multipath * mpp)
+uev_umount_map (struct sysfs_device * dev, struct vectors * vecs)
 {
-       set_multipath_wwid(mpp);
-       mpp->mpe = find_mpe(mpp->wwid);
-       condlog(4, "discovered map %s", mpp->alias);
-
-       if (update_multipath_strings(mpp, vecs->pathvec))
-               goto out;
+       struct multipath * mpp;
 
-       adopt_paths(vecs, mpp);
-       select_pgfailback(mpp);
-       mpp->hwe = extract_hwe_from_path(mpp);
-       set_no_path_retry(mpp);
+       condlog(2, "%s: umount map (uevent)", dev->kernel);
 
-       return 0;
-out:
-       condlog(0, "%s: failed to setup multipath", mpp->alias);
-       remove_map(mpp, vecs);
-       return 1;
-}
+       mpp = find_mp_by_str(vecs->mpvec, dev->kernel);
 
-static int
-need_switch_pathgroup (struct multipath * mpp, int refresh)
-{
-       struct pathgroup * pgp;
-       struct path * pp;
-       int i, j;
-
-       if (!mpp || mpp->pgfailback == -FAILBACK_MANUAL)
+       if (!mpp)
                return 0;
 
-       /*
-        * Refresh path priority values
-        */
-       if (refresh)
-               vector_foreach_slot (mpp->pg, pgp, i)
-                       vector_foreach_slot (pgp->paths, pp, j)
-                               pathinfo(pp, conf->hwtable, DI_PRIO);
-
-       select_path_group(mpp); /* sets mpp->nextpg */
-       pgp = VECTOR_SLOT(mpp->pg, mpp->nextpg - 1);
+       update_mpp_paths(mpp, vecs->pathvec);
+       verify_paths(mpp, vecs, NULL);
 
-       if (pgp && pgp->status != PGSTATE_ACTIVE)
-               return 1;
+       if (!VECTOR_SIZE(mpp->paths))
+               flush_map(mpp, vecs);
 
        return 0;
 }
 
-static void
-switch_pathgroup (struct multipath * mpp)
-{
-       struct pathgroup * pgp;
-       
-       pgp = VECTOR_SLOT(mpp->pg, mpp->nextpg - 1);
-       
-       if (pgp && pgp->status != PGSTATE_ACTIVE) {
-               dm_switchgroup(mpp->alias, mpp->nextpg);
-               condlog(2, "%s: switch to path group #%i",
-                        mpp->alias, mpp->nextpg);
-       }
-}
-
 static int
-update_multipath (struct vectors *vecs, char *mapname)
+uev_add_path (struct sysfs_device * dev, struct vectors * vecs)
 {
-       struct multipath *mpp;
-       struct pathgroup  *pgp;
-       struct path *pp;
-       int i, j;
-       int r = 1;
-
-       mpp = find_mp(vecs->mpvec, mapname);
-
-       if (!mpp)
-               goto out;
-
-       free_pgvec(mpp->pg, KEEP_PATHS);
-       mpp->pg = NULL;
-
-       if (setup_multipath(vecs, mpp))
-               goto out; /* mpp freed in setup_multipath */
-
-       /*
-        * compare checkers states with DM states
-        */
-       vector_foreach_slot (mpp->pg, pgp, i) {
-               vector_foreach_slot (pgp->paths, pp, j) {
-                       if (pp->dmstate != PSTATE_FAILED)
-                               continue;
-
-                       if (pp->state != PATH_DOWN) {
-                               condlog(2, "%s: mark as failed", pp->dev_t);
-                               pp->state = PATH_DOWN;
-                               update_queue_mode_del_path(mpp);
-
-                               /*
-                                * if opportune,
-                                * schedule the next check earlier
-                                */
-                               if (pp->tick > conf->checkint)
-                                       pp->tick = conf->checkint;
-                       }
-               }
-       }
-       r = 0;
-out:
-       if (r)
-               condlog(0, "failed to update multipath");
-
-       return r;
+       condlog(2, "%s: add path (uevent)", dev->kernel);
+       return (ev_add_path(dev->kernel, vecs) != 1)? 0 : 1;
 }
 
-static sigset_t unblock_sighup(void)
-{
-       sigset_t set, old;
-
-       sigemptyset(&set);
-       sigaddset(&set, SIGHUP);
-       pthread_sigmask(SIG_UNBLOCK, &set, &old);
-       return old;
-}
 
 /*
- * returns the reschedule delay
- * negative means *stop*
+ * returns:
+ * 0: added
+ * 1: error
+ * 2: blacklisted
  */
-static int
-waiteventloop (struct event_thread * waiter)
+int
+ev_add_path (char * devname, struct vectors * vecs)
 {
-       sigset_t set;
-       int event_nr;
-       int r;
-
-       if (!waiter->event_nr)
-               waiter->event_nr = dm_geteventnr(waiter->mapname);
+       struct multipath * mpp;
+       struct path * pp;
+       char empty_buff[WWID_SIZE] = {0};
 
-       if (!(waiter->dmt = dm_task_create(DM_DEVICE_WAITEVENT)))
-               return 1;
+       pp = find_path_by_dev(vecs->pathvec, devname);
 
-       if (!dm_task_set_name(waiter->dmt, waiter->mapname)) {
-               dm_task_destroy(waiter->dmt);
-               return 1;
+       if (pp) {
+               condlog(0, "%s: spurious uevent, path already in pathvec",
+                       devname);
+               if (pp->mpp)
+                       return 0;
        }
-
-       if (waiter->event_nr && !dm_task_set_event_nr(waiter->dmt,
-                                                     waiter->event_nr)) {
-               dm_task_destroy(waiter->dmt);
-               return 1;
+       else {
+               /*
+                * get path vital state
+                */
+               if (!(pp = store_pathinfo(vecs->pathvec, conf->hwtable,
+                     devname, DI_ALL))) {
+                       condlog(0, "%s: failed to store path info", devname);
+                       return 1;
+               }
+               pp->checkint = conf->checkint;
        }
 
-       dm_task_no_open_count(waiter->dmt);
-       
-       /* accept wait interruption */
-       set = unblock_sighup();
-
-       /* interruption spits messages */
-       dm_shut_log();
-
-       /* wait */
-       r = dm_task_run(waiter->dmt);
-
-       /* wait is over : event or interrupt */
-       pthread_sigmask(SIG_SETMASK, &set, NULL);
-       //dm_restore_log();
-
-       if (!r) /* wait interrupted by signal */
-               return -1;
-
-       dm_task_destroy(waiter->dmt);
-       waiter->dmt = NULL;
-       waiter->event_nr++;
-
        /*
-        * upon event ...
+        * need path UID to go any further
         */
-       while (1) {
-               condlog(3, "%s: devmap event #%i",
-                               waiter->mapname, waiter->event_nr);
-
-               /*
-                * event might be :
-                *
-                * 1) a table reload, which means our mpp structure is
-                *    obsolete : refresh it through update_multipath()
-                * 2) a path failed by DM : mark as such through
-                *    update_multipath()
-                * 3) map has gone away : stop the thread.
-                * 4) a path reinstate : nothing to do
-                * 5) a switch group : nothing to do
-                */
-               pthread_cleanup_push(cleanup_lock, waiter->vecs->lock);
-               lock(waiter->vecs->lock);
-               r = update_multipath(waiter->vecs, waiter->mapname);
-               lock_cleanup_pop(waiter->vecs->lock);
-
-               if (r)
-                       return -1; /* stop the thread */
-
-               event_nr = dm_geteventnr(waiter->mapname);
-
-               if (waiter->event_nr == event_nr)
-                       return 1; /* upon problem reschedule 1s later */
-
-               waiter->event_nr = event_nr;
+       if (memcmp(empty_buff, pp->wwid, WWID_SIZE) == 0) {
+               condlog(0, "%s: failed to get path uid", devname);
+               return 1; /* leave path added to pathvec */
        }
-       return -1; /* never reach there */
-}
-
-static void *
-waitevent (void * et)
-{
-       int r;
-       struct event_thread *waiter;
-
-       mlockall(MCL_CURRENT | MCL_FUTURE);
-
-       waiter = (struct event_thread *)et;
-       pthread_cleanup_push(free_waiter, et);
-
-       while (1) {
-               r = waiteventloop(waiter);
-
-               if (r < 0)
-                       break;
-
-               sleep(r);
+       if (filter_path(conf, pp) > 0){
+               int i = find_slot(vecs->pathvec, (void *)pp);
+               if (i != -1)
+                       vector_del_slot(vecs->pathvec, i);
+               free_path(pp);
+               return 2;
        }
+       mpp = pp->mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
+rescan:
+       if (mpp) {
+               if (adopt_paths(vecs->pathvec, mpp))
+                       return 1; /* leave path added to pathvec */
 
-       pthread_cleanup_pop(1);
-       return NULL;
-}
-
-static int
-start_waiter_thread (struct multipath * mpp, struct vectors * vecs)
-{
-       pthread_attr_t attr;
-       struct event_thread * wp;
-
-       if (!mpp)
-               return 0;
-
-       if (pthread_attr_init(&attr))
-               goto out;
-
-       pthread_attr_setstacksize(&attr, 32 * 1024);
-       pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
-
-       wp = alloc_waiter();
-
-       if (!wp)
-               goto out;
-
-       mpp->waiter = (void *)wp;
-       strncpy(wp->mapname, mpp->alias, WWID_SIZE);
-       wp->vecs = vecs;
-
-       if (pthread_create(&wp->thread, &attr, waitevent, wp)) {
-               condlog(0, "%s: cannot create event checker", wp->mapname);
-               goto out1;
+               verify_paths(mpp, vecs, NULL);
+               mpp->action = ACT_RELOAD;
        }
-       condlog(2, "%s: event checker started", wp->mapname);
-
-       return 0;
-out1:
-       free_waiter(wp);
-       mpp->waiter = NULL;
-out:
-       condlog(0, "failed to start waiter thread");
-       return 1;
-}
-
-int
-uev_add_map (char * devname, struct vectors * vecs)
-{
-       int major, minor;
-       char dev_t[BLK_DEV_SIZE];
-       char * alias;
-       struct multipath * mpp;
-
-       if (sysfs_get_dev(sysfs_path, devname, dev_t, BLK_DEV_SIZE))
-               return 1;
-
-       if (sscanf(dev_t, "%d:%d", &major, &minor) != 2)
-               return 1;
-
-       alias = dm_mapname(major, minor);
-               
-       if (!alias)
-               return 1;
-       
-       if (!dm_type(alias, DEFAULT_TARGET)) {
-               condlog(4, "%s: not a multipath map", alias);
-               FREE(alias);
-               return 0;
+       else {
+               if ((mpp = add_map_with_path(vecs, pp, 1)))
+                       mpp->action = ACT_CREATE;
+               else
+                       return 1; /* leave path added to pathvec */
        }
 
-       mpp = find_mp(vecs->mpvec, alias);
-
-       if (mpp) {
+       /*
+        * push the map to the device-mapper
+        */
+       if (setup_map(mpp)) {
+               condlog(0, "%s: failed to setup map for addition of new "
+                       "path %s", mpp->alias, devname);
+               goto out;
+       }
+       /*
+        * reload the map for the multipath mapped device
+        */
+       if (domap(mpp) <= 0) {
+               condlog(0, "%s: failed in domap for addition of new "
+                       "path %s", mpp->alias, devname);
                /*
-                * this should not happen,
-                * we missed a remove map event (not sent ?)
+                * deal with asynchronous uevents :((
                 */
-               condlog(2, "%s: already registered", alias);
-               remove_map(mpp, vecs);
+               if (mpp->action == ACT_RELOAD) {
+                       condlog(0, "%s: uev_add_path sleep", mpp->alias);
+                       sleep(1);
+                       update_mpp_paths(mpp, vecs->pathvec);
+                       goto rescan;
+               }
+               else
+                       goto out;
        }
+       dm_lib_release();
 
        /*
-        * now we can allocate
+        * update our state from kernel regardless of create or reload
         */
-       mpp = alloc_multipath();
-
-       if (!mpp)
-               return 1;
-
-       mpp->minor = minor;
-       mpp->alias = alias;
-
        if (setup_multipath(vecs, mpp))
-               return 1; /* mpp freed in setup_multipath */
-
-       if (!vector_alloc_slot(vecs->mpvec))
                goto out;
 
-       vector_set_slot(vecs->mpvec, mpp);
-       adopt_paths(vecs, mpp);
+       sync_map_state(mpp);
 
-       if (start_waiter_thread(mpp, vecs))
-               goto out;
+       if (mpp->action == ACT_CREATE &&
+           start_waiter_thread(mpp, vecs))
+                       goto out;
 
+       condlog(2, "%s path added to devmap %s", devname, mpp->alias);
        return 0;
+
 out:
-       condlog(2, "%s: add devmap failed", mpp->alias);
-       remove_map(mpp, vecs);
+       remove_map(mpp, vecs, 1);
        return 1;
 }
 
-int
-uev_remove_map (char * devname, struct vectors * vecs)
+static int
+uev_remove_path (struct sysfs_device * dev, struct vectors * vecs)
 {
-       int minor;
-       struct multipath * mpp;
-
-       if (sscanf(devname, "dm-%d", &minor) != 1)
-               return 1;
+       int retval;
 
-       mpp = find_mp_by_minor(vecs->mpvec, minor);
+       condlog(2, "%s: remove path (uevent)", dev->kernel);
+       retval = ev_remove_path(dev->kernel, vecs);
+       if (!retval)
+               sysfs_device_put(dev);
 
-       if (!mpp) {
-               condlog(3, "%s: devmap not registered, can't remove",
-                       devname);
-               return 0;
-       }
-
-       condlog(2, "remove %s devmap", mpp->alias);
-       remove_map(mpp, vecs);
-
-       return 0;
+       return retval;
 }
 
 int
-uev_add_path (char * devname, struct vectors * vecs)
+ev_remove_path (char * devname, struct vectors * vecs)
 {
+       struct multipath * mpp;
        struct path * pp;
+       int i, retval = 0;
 
        pp = find_path_by_dev(vecs->pathvec, devname);
 
-       if (pp) {
-               condlog(3, "%s: already in pathvec");
-               return 1;
-       }
-       pp = store_pathinfo(vecs->pathvec, conf->hwtable,
-                      devname, DI_SYSFS | DI_WWID);
-
        if (!pp) {
-               condlog(0, "%s: failed to store path info", devname);
+               condlog(0, "%s: spurious uevent, path not in pathvec", devname);
                return 1;
        }
 
-       condlog(2, "%s: path checker registered", devname);
-       pp->mpp = find_mp_by_wwid(vecs->mpvec, pp->wwid);
-
-       if (pp->mpp) {
-               condlog(4, "%s: ownership set to %s",
-                               pp->dev_t, pp->mpp->alias);
-       } else {
-               condlog(4, "%s: orphaned", pp->dev_t);
-               orphan_path(pp);
-       }
+       /*
+        * avoid referring to the map of an orphaned path
+        */
+       if ((mpp = pp->mpp)) {
+               /*
+                * transform the mp->pg vector of vectors of paths
+                * into a mp->params string to feed the device-mapper
+                */
+               if (update_mpp_paths(mpp, vecs->pathvec)) {
+                       condlog(0, "%s: failed to update paths",
+                               mpp->alias);
+                       goto out;
+               }
+               if ((i = find_slot(mpp->paths, (void *)pp)) != -1)
+                       vector_del_slot(mpp->paths, i);
 
-       return 0;
-}
+               /*
+                * remove the map IFF removing the last path
+                */
+               if (VECTOR_SIZE(mpp->paths) == 0) {
+                       char alias[WWID_SIZE];
 
-int
-uev_remove_path (char * devname, struct vectors * vecs)
-{
-       int i;
-       struct path * pp;
+                       /*
+                        * flush_map will fail if the device is open
+                        */
+                       strncpy(alias, mpp->alias, WWID_SIZE);
+                       if (!flush_map(mpp, vecs)) {
+                               condlog(2, "%s: removed map after"
+                                       " removing all paths",
+                                       alias);
+                               free_path(pp);
+                               return 0;
+                       }
+                       /*
+                        * Not an error, continue
+                        */
+               }
 
-       pp = find_path_by_dev(vecs->pathvec, devname);
+               if (setup_map(mpp)) {
+                       condlog(0, "%s: failed to setup map for"
+                               " removal of path %s", mpp->alias,
+                               devname);
+                       goto out;
+               }
+               /*
+                * reload the map
+                */
+               mpp->action = ACT_RELOAD;
+               if (domap(mpp) <= 0) {
+                       condlog(0, "%s: failed in domap for "
+                               "removal of path %s",
+                               mpp->alias, devname);
+                       retval = 1;
+               } else {
+                       /*
+                        * update our state from kernel
+                        */
+                       if (setup_multipath(vecs, mpp)) {
+                               goto out;
+                       }
+                       sync_map_state(mpp);
 
-       if (!pp) {
-               condlog(3, "%s: not in pathvec");
-               return 1;
+                       condlog(2, "%s: path removed from map %s",
+                               devname, mpp->alias);
+               }
        }
 
-       if (pp->mpp && pp->state == PATH_UP)
-               update_queue_mode_del_path(pp->mpp);
+       if ((i = find_slot(vecs->pathvec, (void *)pp)) != -1)
+               vector_del_slot(vecs->pathvec, i);
 
-       condlog(2, "remove %s path checker", devname);
-       i = find_slot(vecs->pathvec, (void *)pp);
-       vector_del_slot(vecs->pathvec, i);
        free_path(pp);
 
-       return 0;
-}
+       return retval;
 
-int
-show_paths (char ** r, int * len, struct vectors * vecs)
-{
-       int i;
-       struct path * pp;
-       char * c;
-       char * reply;
-       struct path_layout pl;
-
-       get_path_layout(&pl, vecs->pathvec);
-       reply = MALLOC(MAX_REPLY_LEN);
-
-       if (!reply)
-               return 1;
-
-       c = reply;
-
-       if (VECTOR_SIZE(vecs->pathvec) > 0)
-               c += snprint_path_header(c, reply + MAX_REPLY_LEN - c,
-                                        PRINT_PATH_CHECKER, &pl);
-
-       vector_foreach_slot(vecs->pathvec, pp, i)
-               c += snprint_path(c, reply + MAX_REPLY_LEN - c,
-                                 PRINT_PATH_CHECKER, pp, &pl);
-
-       *r = reply;
-       *len = (int)(c - reply + 1);
-       return 0;
-}
-
-int
-show_maps (char ** r, int *len, struct vectors * vecs)
-{
-       int i;
-       struct multipath * mpp;
-       char * c;
-       char * reply;
-       struct map_layout ml;
-
-       get_map_layout(&ml, vecs->mpvec);
-       reply = MALLOC(MAX_REPLY_LEN);
-
-       if (!reply)
-               return 1;
-
-       c = reply;
-       if (VECTOR_SIZE(vecs->mpvec) > 0)
-               c += snprint_map_header(c, reply + MAX_REPLY_LEN - c,
-                                       PRINT_MAP_FAILBACK, &ml);
-
-       vector_foreach_slot(vecs->mpvec, mpp, i)
-               c += snprint_map(c, reply + MAX_REPLY_LEN - c,
-                                PRINT_MAP_FAILBACK, mpp, &ml);
-
-       *r = reply;
-       *len = (int)(c - reply + 1);
-       return 0;
-}
-
-int
-dump_pathvec (char ** r, int * len, struct vectors * vecs)
-{
-       int i;
-       struct path * pp;
-       char * reply;
-       char * p;
-
-       *len = VECTOR_SIZE(vecs->pathvec) * sizeof(struct path);
-       reply = (char *)MALLOC(*len);
-       *r = reply;
-
-       if (!reply)
-               return 1;
-
-       p = reply;
-
-       vector_foreach_slot (vecs->pathvec, pp, i) {
-               memcpy((void *)p, pp, sizeof(struct path));
-               p += sizeof(struct path);
-       }
-
-       /* return negative to hint caller not to add "ok" to the dump */
-       return -1;
+out:
+       remove_map_and_stop_waiter(mpp, vecs, 1);
+       return 1;
 }
 
 static int
 map_discovery (struct vectors * vecs)
 {
-       int i;
        struct multipath * mpp;
+       unsigned int i;
 
-       if (dm_get_maps(vecs->mpvec, "multipath"))
+       if (dm_get_maps(vecs->mpvec))
                return 1;
 
-       vector_foreach_slot (vecs->mpvec, mpp, i) {
+       vector_foreach_slot (vecs->mpvec, mpp, i)
                if (setup_multipath(vecs, mpp))
                        return 1;
-               mpp->minor = dm_get_minor(mpp->alias);
-               start_waiter_thread(mpp, vecs);
-       }
-
-       return 0;
-}
-
-int
-reconfigure (struct vectors * vecs)
-{
-       struct config * old = conf;
-       struct multipath * mpp;
-       struct path * pp;
-       int i;
-
-       conf = NULL;
 
-       if (load_config(DEFAULT_CONFIGFILE)) {
-               conf = old;
-               condlog(2, "reconfigure failed, continue with old config");
-               return 1;
-       }
-       conf->verbosity = old->verbosity;
-       free_config(old);
-
-       vector_foreach_slot (vecs->mpvec, mpp, i) {
-               mpp->mpe = find_mpe(mpp->wwid);
-               mpp->hwe = extract_hwe_from_path(mpp);
-               adopt_paths(vecs, mpp);
-               set_no_path_retry(mpp);
-       }
-       vector_foreach_slot (vecs->pathvec, pp, i) {
-               select_checkfn(pp);
-               select_getuid(pp);
-               select_getprio(pp);
-       }
-       condlog(2, "reconfigured");
        return 0;
 }
 
@@ -940,7 +574,7 @@ uxsock_trigger (char * str, char ** reply, int * len, void * trigger_data)
 {
        struct vectors * vecs;
        int r;
-       
+
        *reply = NULL;
        *len = 0;
        vecs = (struct vectors *)trigger_data;
@@ -982,50 +616,58 @@ uev_discard(char * devpath)
        return 0;
 }
 
-int 
+int
 uev_trigger (struct uevent * uev, void * trigger_data)
 {
        int r = 0;
-       char devname[32];
+       struct sysfs_device *sysdev;
        struct vectors * vecs;
 
        vecs = (struct vectors *)trigger_data;
 
        if (uev_discard(uev->devpath))
-               goto out;
+               return 0;
+
+       sysdev = sysfs_device_get(uev->devpath);
+       if(!sysdev)
+               return 0;
 
-       basename(uev->devpath, devname);
        lock(vecs->lock);
 
        /*
-        * device map add/remove event
+        * device map event
+        * Add events are ignored here as the tables
+        * are not fully initialised then.
         */
-       if (!strncmp(devname, "dm-", 3)) {
-               if (!strncmp(uev->action, "add", 3)) {
-                       r = uev_add_map(devname, vecs);
+       if (!strncmp(sysdev->kernel, "dm-", 3)) {
+               if (!strncmp(uev->action, "change", 6)) {
+                       r = uev_add_map(sysdev, vecs);
                        goto out;
                }
-#if 0
                if (!strncmp(uev->action, "remove", 6)) {
-                       r = uev_remove_map(devname, vecs);
+                       r = uev_remove_map(sysdev, vecs);
+                       goto out;
+               }
+               if (!strncmp(uev->action, "umount", 6)) {
+                       r = uev_umount_map(sysdev, vecs);
                        goto out;
                }
-#endif
                goto out;
        }
-       
+
        /*
         * path add/remove event
         */
-       if (blacklist(conf->blist, devname))
+       if (filter_devnode(conf->blist_devnode, conf->elist_devnode,
+                          sysdev->kernel) > 0)
                goto out;
 
        if (!strncmp(uev->action, "add", 3)) {
-               r = uev_add_path(devname, vecs);
+               r = uev_add_path(sysdev, vecs);
                goto out;
        }
        if (!strncmp(uev->action, "remove", 6)) {
-               r = uev_remove_path(devname, vecs);
+               r = uev_remove_path(sysdev, vecs);
                goto out;
        }
 
@@ -1039,28 +681,39 @@ ueventloop (void * ap)
 {
        if (uevent_listen(&uev_trigger, ap))
                fprintf(stderr, "error starting uevent listener");
-               
+
        return NULL;
 }
 
 static void *
 uxlsnrloop (void * ap)
 {
-       if (load_keys())
-               return NULL;
-       
-       if (alloc_handlers())
+       if (cli_init())
                return NULL;
 
-       add_handler(LIST+PATHS, cli_list_paths);
-       add_handler(LIST+MAPS, cli_list_maps);
-       add_handler(ADD+PATH, cli_add_path);
-       add_handler(DEL+PATH, cli_del_path);
-       add_handler(ADD+MAP, cli_add_map);
-       add_handler(DEL+MAP, cli_del_map);
-       add_handler(SWITCH+MAP+GROUP, cli_switch_group);
-       add_handler(DUMP+PATHVEC, cli_dump_pathvec);
-       add_handler(RECONFIGURE, cli_reconfigure);
+       set_handler_callback(LIST+PATHS, cli_list_paths);
+       set_handler_callback(LIST+PATHS+FMT, cli_list_paths_fmt);
+       set_handler_callback(LIST+MAPS, cli_list_maps);
+       set_handler_callback(LIST+STATUS, cli_list_status);
+       set_handler_callback(LIST+MAPS+STATUS, cli_list_maps_status);
+       set_handler_callback(LIST+MAPS+STATS, cli_list_maps_stats);
+       set_handler_callback(LIST+MAPS+TOPOLOGY, cli_list_maps_topology);
+       set_handler_callback(LIST+TOPOLOGY, cli_list_maps_topology);
+       set_handler_callback(LIST+MAP+TOPOLOGY, cli_list_map_topology);
+       set_handler_callback(LIST+CONFIG, cli_list_config);
+       set_handler_callback(LIST+BLACKLIST, cli_list_blacklist);
+       set_handler_callback(LIST+DEVICES, cli_list_devices);
+       set_handler_callback(LIST+WILDCARDS, cli_list_wildcards);
+       set_handler_callback(ADD+PATH, cli_add_path);
+       set_handler_callback(DEL+PATH, cli_del_path);
+       set_handler_callback(ADD+MAP, cli_add_map);
+       set_handler_callback(DEL+MAP, cli_del_map);
+       set_handler_callback(SWITCH+MAP+GROUP, cli_switch_group);
+       set_handler_callback(RECONFIGURE, cli_reconfigure);
+       set_handler_callback(SUSPEND+MAP, cli_suspend);
+       set_handler_callback(RESUME+MAP, cli_resume);
+       set_handler_callback(REINSTATE+PATH, cli_reinstate);
+       set_handler_callback(FAIL+PATH, cli_fail);
 
        uxsock_listen(&uxsock_trigger, ap);
 
@@ -1084,7 +737,7 @@ exit_daemon (int status)
 }
 
 static void
-fail_path (struct path * pp)
+fail_path (struct path * pp, int del_active)
 {
        if (!pp->mpp)
                return;
@@ -1093,23 +746,25 @@ fail_path (struct path * pp)
                 pp->dev_t, pp->mpp->alias);
 
        dm_fail_path(pp->mpp->alias, pp->dev_t);
-       update_queue_mode_del_path(pp->mpp);
+       if (del_active)
+               update_queue_mode_del_path(pp->mpp);
 }
 
 /*
  * caller must have locked the path list before calling that function
  */
 static void
-reinstate_path (struct path * pp)
+reinstate_path (struct path * pp, int add_active)
 {
        if (!pp->mpp)
                return;
 
-       if (dm_reinstate(pp->mpp->alias, pp->dev_t))
+       if (dm_reinstate_path(pp->mpp->alias, pp->dev_t))
                condlog(0, "%s: reinstate failed", pp->dev_t);
        else {
                condlog(2, "%s: reinstated", pp->dev_t);
-               update_queue_mode_add_path(pp->mpp);
+               if (add_active)
+                       update_queue_mode_add_path(pp->mpp);
        }
 }
 
@@ -1124,12 +779,12 @@ enable_group(struct path * pp)
         *
         * we can safely return here, because upon map reload, all
         * PG will be enabled.
-        */
-       if (!pp->pgindex)
+        */
+       if (!pp->mpp->pg || !pp->pgindex)
                return;
 
        pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
-       
+
        if (pgp->status == PGSTATE_DISABLED) {
                condlog(2, "%s: enable group #%i", pp->mpp->alias, pp->pgindex);
                dm_enablegroup(pp->mpp->alias, pp->pgindex);
@@ -1140,12 +795,12 @@ static void
 mpvec_garbage_collector (struct vectors * vecs)
 {
        struct multipath * mpp;
-       int i;
+       unsigned int i;
 
        vector_foreach_slot (vecs->mpvec, mpp, i) {
                if (mpp && mpp->alias && !dm_map_present(mpp->alias)) {
                        condlog(2, "%s: remove dead map", mpp->alias);
-                       remove_map(mpp, vecs);
+                       remove_map_and_stop_waiter(mpp, vecs, 1);
                        i--;
                }
        }
@@ -1155,7 +810,7 @@ static void
 defered_failback_tick (vector mpvec)
 {
        struct multipath * mpp;
-       int i;
+       unsigned int i;
 
        vector_foreach_slot (mpvec, mpp, i) {
                /*
@@ -1174,10 +829,11 @@ static void
 retry_count_tick(vector mpvec)
 {
        struct multipath *mpp;
-       int i;
+       unsigned int i;
 
        vector_foreach_slot (mpvec, mpp, i) {
                if (mpp->retry_tick) {
+                       mpp->stat_total_queueing_time++;
                        condlog(4, "%s: Retrying.. No active path", mpp->alias);
                        if(--mpp->retry_tick == 0) {
                                dm_queue_if_no_path(mpp->alias, 0);
@@ -1187,148 +843,182 @@ retry_count_tick(vector mpvec)
        }
 }
 
-static void *
-checkerloop (void *ap)
+void
+check_path (struct vectors * vecs, struct path * pp)
 {
-       struct vectors *vecs;
-       struct path *pp;
-       int i, count = 0;
        int newstate;
-       char checker_msg[MAX_CHECKER_MSG_SIZE];
-
-       mlockall(MCL_CURRENT | MCL_FUTURE);
 
-       memset(checker_msg, 0, MAX_CHECKER_MSG_SIZE);
-       vecs = (struct vectors *)ap;
+       if (!pp->mpp)
+               return;
 
-       condlog(2, "path checkers start up");
+       if (pp->tick && --pp->tick)
+               return; /* don't check this path yet */
 
        /*
-        * init the path check interval
+        * provision a next check soonest,
+        * in case we exit abnormaly from here
         */
-       vector_foreach_slot (vecs->pathvec, pp, i) {
-               pp->checkint = conf->checkint;
+       pp->tick = conf->checkint;
+
+       if (!checker_selected(&pp->checker)) {
+               pathinfo(pp, conf->hwtable, DI_SYSFS);
+               select_checker(pp);
+       }
+       if (!checker_selected(&pp->checker)) {
+               condlog(0, "%s: checker is not set", pp->dev);
+               return;
        }
+       /*
+        * Set checker in async mode.
+        * Honored only by checker implementing the said mode.
+        */
+       checker_set_async(&pp->checker);
 
-       while (1) {
-               pthread_cleanup_push(cleanup_lock, vecs->lock);
-               lock(vecs->lock);
-               condlog(4, "tick");
+       newstate = checker_check(&pp->checker);
 
-               vector_foreach_slot (vecs->pathvec, pp, i) {
-                       if (!pp->mpp)
-                               continue;
+       if (newstate < 0) {
+               condlog(2, "%s: unusable path", pp->dev);
+               pathinfo(pp, conf->hwtable, 0);
+               return;
+       }
+       /*
+        * Async IO in flight. Keep the previous path state
+        * and reschedule as soon as possible
+        */
+       if (newstate == PATH_PENDING) {
+               pp->tick = 1;
+               return;
+       }
+       if (newstate != pp->state) {
+               int oldstate = pp->state;
+               pp->state = newstate;
+               LOG_MSG(1, checker_message(&pp->checker));
 
-                       if (pp->tick && --pp->tick)
-                               continue; /* don't check this path yet */
+               /*
+                * upon state change, reset the checkint
+                * to the shortest delay
+                */
+               pp->checkint = conf->checkint;
 
+               if (newstate == PATH_DOWN || newstate == PATH_SHAKY ||
+                   update_multipath_strings(pp->mpp, vecs->pathvec)) {
                        /*
-                        * provision a next check soonest,
-                        * in case we exit abnormaly from here
+                        * proactively fail path in the DM
                         */
-                       pp->tick = conf->checkint;
-                       
-                       if (!pp->checkfn) {
-                               pathinfo(pp, conf->hwtable, DI_SYSFS);
-                               select_checkfn(pp);
-                       }
+                       if (oldstate == PATH_UP ||
+                           oldstate == PATH_GHOST)
+                               fail_path(pp, 1);
+                       else
+                               fail_path(pp, 0);
 
-                       if (!pp->checkfn) {
-                               condlog(0, "%s: checkfn is void", pp->dev);
-                               continue;
-                       }
-                       newstate = pp->checkfn(pp->fd, checker_msg,
-                                              &pp->checker_context);
-                       
-                       if (newstate != pp->state) {
-                               pp->state = newstate;
-                               LOG_MSG(1, checker_msg);
+                       /*
+                        * cancel scheduled failback
+                        */
+                       pp->mpp->failback_tick = 0;
 
-                               /*
-                                * upon state change, reset the checkint
-                                * to the shortest delay
-                                */
-                               pp->checkint = conf->checkint;
+                       pp->mpp->stat_path_failures++;
+                       return;
+               }
 
-                               if (newstate == PATH_DOWN ||
-                                   newstate == PATH_SHAKY) {
-                                       /*
-                                        * proactively fail path in the DM
-                                        */
-                                       fail_path(pp);
+               /*
+                * reinstate this path
+                */
+               if (oldstate != PATH_UP &&
+                   oldstate != PATH_GHOST)
+                       reinstate_path(pp, 1);
+               else
+                       reinstate_path(pp, 0);
 
-                                       /*
-                                        * cancel scheduled failback
-                                        */
-                                       pp->mpp->failback_tick = 0;
+               /*
+                * schedule [defered] failback
+                */
+               if (pp->mpp->pgfailback > 0)
+                       pp->mpp->failback_tick =
+                               pp->mpp->pgfailback + 1;
+               else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE &&
+                   need_switch_pathgroup(pp->mpp, 1))
+                       switch_pathgroup(pp->mpp);
 
-                                       continue;
-                               }
+               /*
+                * if at least one path is up in a group, and
+                * the group is disabled, re-enable it
+                */
+               if (newstate == PATH_UP)
+                       enable_group(pp);
+       }
+       else if (newstate == PATH_UP || newstate == PATH_GHOST) {
+               LOG_MSG(4, checker_message(&pp->checker));
+               /*
+                * double the next check delay.
+                * max at conf->max_checkint
+                */
+               if (pp->checkint < (conf->max_checkint / 2))
+                       pp->checkint = 2 * pp->checkint;
+               else
+                       pp->checkint = conf->max_checkint;
+
+               pp->tick = pp->checkint;
+               condlog(4, "%s: delay next check %is",
+                               pp->dev_t, pp->tick);
+       }
+       else if (newstate == PATH_DOWN)
+               LOG_MSG(2, checker_message(&pp->checker));
 
-                               /*
-                                * reinstate this path
-                                */
-                               reinstate_path(pp);
+       pp->state = newstate;
 
-                               /*
-                                * need to switch group ?
-                                */
-                               update_multipath_strings(pp->mpp,
-                                                        vecs->pathvec);
+       /*
+        * path prio refreshing
+        */
+       condlog(4, "path prio refresh");
+       pathinfo(pp, conf->hwtable, DI_PRIO);
 
-                               /*
-                                * schedule defered failback
-                                */
-                               if (pp->mpp->pgfailback > 0)
-                                       pp->mpp->failback_tick =
-                                               pp->mpp->pgfailback + 1;
-                               else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE &&
-                                   need_switch_pathgroup(pp->mpp, 1))
-                                       switch_pathgroup(pp->mpp);
+       /*
+        * pathgroup failback policy
+        */
+       if (need_switch_pathgroup(pp->mpp, 0)) {
+               if (pp->mpp->pgfailback > 0 &&
+                   pp->mpp->failback_tick <= 0)
+                       pp->mpp->failback_tick =
+                               pp->mpp->pgfailback + 1;
+               else if (pp->mpp->pgfailback ==
+                               -FAILBACK_IMMEDIATE)
+                       switch_pathgroup(pp->mpp);
+       }
+}
 
-                               /*
-                                * if at least one path is up in a group, and
-                                * the group is disabled, re-enable it
-                                */
-                               if (newstate == PATH_UP)
-                                       enable_group(pp);
-                       }
-                       else if (newstate == PATH_UP || newstate == PATH_GHOST) {
-                               LOG_MSG(4, checker_msg);
-                               /*
-                                * double the next check delay.
-                                * max at conf->max_checkint
-                                */
-                               if (pp->checkint < (conf->max_checkint / 2))
-                                       pp->checkint = 2 * pp->checkint;
-                               else
-                                       pp->checkint = conf->max_checkint;
+static void *
+checkerloop (void *ap)
+{
+       struct vectors *vecs;
+       struct path *pp;
+       int count = 0;
+       unsigned int i;
 
-                               pp->tick = pp->checkint;
-                               condlog(4, "%s: delay next check %is",
-                                               pp->dev_t, pp->tick);
+       mlockall(MCL_CURRENT | MCL_FUTURE);
+       vecs = (struct vectors *)ap;
+       condlog(2, "path checkers start up");
 
-                       }
-                       pp->state = newstate;
+       /*
+        * init the path check interval
+        */
+       vector_foreach_slot (vecs->pathvec, pp, i) {
+               pp->checkint = conf->checkint;
+       }
 
-                       /*
-                        * path prio refreshing
-                        */
-                       condlog(4, "path prio refresh");
-                       pathinfo(pp, conf->hwtable, DI_PRIO);
-
-                       if (need_switch_pathgroup(pp->mpp, 0)) {
-                               if (pp->mpp->pgfailback > 0)
-                                       pp->mpp->failback_tick =
-                                               pp->mpp->pgfailback + 1;
-                               else if (pp->mpp->pgfailback ==
-                                               -FAILBACK_IMMEDIATE)
-                                       switch_pathgroup(pp->mpp);
+       while (1) {
+               pthread_cleanup_push(cleanup_lock, vecs->lock);
+               lock(vecs->lock);
+               condlog(4, "tick");
+
+               if (vecs->pathvec) {
+                       vector_foreach_slot (vecs->pathvec, pp, i) {
+                               check_path(vecs, pp);
                        }
                }
-               defered_failback_tick(vecs->mpvec);
-               retry_count_tick(vecs->mpvec);
-
+               if (vecs->mpvec) {
+                       defered_failback_tick(vecs->mpvec);
+                       retry_count_tick(vecs->mpvec);
+               }
                if (count)
                        count--;
                else {
@@ -1336,15 +1026,121 @@ checkerloop (void *ap)
                        mpvec_garbage_collector(vecs);
                        count = MAPGCINT;
                }
-               
+
                lock_cleanup_pop(vecs->lock);
                sleep(1);
        }
        return NULL;
 }
 
+int
+configure (struct vectors * vecs, int start_waiters)
+{
+       struct multipath * mpp;
+       struct path * pp;
+       vector mpvec;
+       int i;
+
+       if (!vecs->pathvec && !(vecs->pathvec = vector_alloc()))
+               return 1;
+
+       if (!vecs->mpvec && !(vecs->mpvec = vector_alloc()))
+               return 1;
+
+       if (!(mpvec = vector_alloc()))
+               return 1;
+
+       /*
+        * probe for current path (from sysfs) and map (from dm) sets
+        */
+       path_discovery(vecs->pathvec, conf, DI_ALL);
+
+       vector_foreach_slot (vecs->pathvec, pp, i){
+               if (filter_path(conf, pp) > 0){
+                       vector_del_slot(vecs->pathvec, i);
+                       free_path(pp);
+                       i--;
+               }
+               else
+                       pp->checkint = conf->checkint;
+       }
+       if (map_discovery(vecs))
+               return 1;
+
+       /*
+        * create new set of maps & push changed ones into dm
+        */
+       if (coalesce_paths(vecs, mpvec, NULL, 0))
+               return 1;
+
+       /*
+        * may need to remove some maps which are no longer relevant
+        * e.g., due to blacklist changes in conf file
+        */
+       if (coalesce_maps(vecs, mpvec))
+               return 1;
+
+       dm_lib_release();
+
+       sync_maps_state(mpvec);
+
+       /*
+        * purge dm of old maps
+        */
+       remove_maps(vecs);
+
+       /*
+        * save new set of maps formed by considering current path state
+        */
+       vector_free(vecs->mpvec);
+       vecs->mpvec = mpvec;
+
+       /*
+        * start dm event waiter threads for these new maps
+        */
+       vector_foreach_slot(vecs->mpvec, mpp, i) {
+               if (setup_multipath(vecs, mpp))
+                       return 1;
+               if (start_waiters)
+                       if (start_waiter_thread(mpp, vecs))
+                               return 1;
+       }
+       return 0;
+}
+
+int
+reconfigure (struct vectors * vecs)
+{
+       struct config * old = conf;
+
+       /*
+        * free old map and path vectors ... they use old conf state
+        */
+       if (VECTOR_SIZE(vecs->mpvec))
+               remove_maps_and_stop_waiters(vecs);
+
+       if (VECTOR_SIZE(vecs->pathvec))
+               free_pathvec(vecs->pathvec, FREE_PATHS);
+
+       vecs->pathvec = NULL;
+       conf = NULL;
+
+       if (load_config(DEFAULT_CONFIGFILE))
+               return 1;
+
+       conf->verbosity = old->verbosity;
+
+       if (!conf->checkint) {
+               conf->checkint = DEFAULT_CHECKINT;
+               conf->max_checkint = MAX_CHECKINT(conf->checkint);
+       }
+       configure(vecs, 1);
+       free_config(old);
+       return 0;
+}
+
 static struct vectors *
-init_paths (void)
+init_vecs (void)
 {
        struct vectors * vecs;
 
@@ -1353,30 +1149,16 @@ init_paths (void)
        if (!vecs)
                return NULL;
 
-       vecs->lock = 
+       vecs->lock =
                (pthread_mutex_t *)MALLOC(sizeof(pthread_mutex_t));
 
        if (!vecs->lock)
                goto out;
 
-       vecs->pathvec = vector_alloc();
-
-       if (!vecs->pathvec)
-               goto out1;
-               
-       vecs->mpvec = vector_alloc();
-
-       if (!vecs->mpvec)
-               goto out2;
-       
        pthread_mutex_init(vecs->lock, NULL);
 
        return vecs;
 
-out2:
-       vector_free(vecs->pathvec);
-out1:
-       FREE(vecs->lock);
 out:
        FREE(vecs);
        condlog(0, "failed to init paths");
@@ -1405,7 +1187,11 @@ signal_set(int signo, void (*func) (int))
 static void
 sighup (int sig)
 {
-       condlog(3, "SIGHUP received");
+       condlog(2, "reconfigure (SIGHUP)");
+
+       lock(gvecs->lock);
+       reconfigure(gvecs);
+       unlock(gvecs->lock);
 
 #ifdef _DEBUG_
        dbg_free_final(NULL);
@@ -1419,26 +1205,33 @@ sigend (int sig)
 }
 
 static void
+sigusr1 (int sig)
+{
+       condlog(3, "SIGUSR1 received");
+}
+
+static void
 signal_init(void)
 {
        signal_set(SIGHUP, sighup);
+       signal_set(SIGUSR1, sigusr1);
        signal_set(SIGINT, sigend);
        signal_set(SIGTERM, sigend);
-       signal_set(SIGKILL, sigend);
+       signal(SIGPIPE, SIG_IGN);
 }
 
 static void
 setscheduler (void)
 {
-        int res;
+       int res;
        static struct sched_param sched_param = {
-               sched_priority: 99
+               .sched_priority = 99
        };
 
-        res = sched_setscheduler (0, SCHED_RR, &sched_param);
+       res = sched_setscheduler (0, SCHED_RR, &sched_param);
 
-        if (res == -1)
-                condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
+       if (res == -1)
+               condlog(LOG_WARNING, "Could not set SCHED_RR at priority 99");
        return;
 }
 
@@ -1455,7 +1248,7 @@ set_oom_adj (int val)
        fprintf(fp, "%i", val);
        fclose(fp);
 }
-       
+
 static int
 child (void * param)
 {
@@ -1474,14 +1267,38 @@ child (void * param)
        if (load_config(DEFAULT_CONFIGFILE))
                exit(1);
 
+       if (init_checkers()) {
+               condlog(0, "failed to initialize checkers");
+               exit(1);
+       }
+       if (init_prio()) {
+               condlog(0, "failed to initialize prioritizers");
+               exit(1);
+       }
+
        setlogmask(LOG_UPTO(conf->verbosity + 3));
 
        /*
         * fill the voids left in the config file
         */
        if (!conf->checkint) {
-               conf->checkint = CHECKINT;
-               conf->max_checkint = MAX_CHECKINT;
+               conf->checkint = DEFAULT_CHECKINT;
+               conf->max_checkint = MAX_CHECKINT(conf->checkint);
+       }
+
+       if (conf->max_fds) {
+               struct rlimit fd_limit;
+               if (conf->max_fds > 0) {
+                       fd_limit.rlim_cur = conf->max_fds;
+                       fd_limit.rlim_max = conf->max_fds;
+               }
+               else {
+                       fd_limit.rlim_cur = RLIM_INFINITY;
+                       fd_limit.rlim_max = RLIM_INFINITY;
+               }
+               if (setrlimit(RLIMIT_NOFILE, &fd_limit) < 0)
+                       condlog(0, "can't set open fds limit to %d : %s\n",
+                               conf->max_fds, strerror(errno));
        }
 
        if (pidfile_create(DEFAULT_PIDFILE, getpid())) {
@@ -1492,24 +1309,24 @@ child (void * param)
        }
        signal_init();
        setscheduler();
-       set_oom_adj(-17);
-       vecs = init_paths();
+       set_oom_adj(-16);
+       vecs = gvecs = init_vecs();
 
        if (!vecs)
                exit(1);
 
-       if (sysfs_get_mnt_path(sysfs_path, FILE_NAME_SIZE)) {
+       if (sysfs_init(conf->sysfs_dir, FILE_NAME_SIZE)) {
                condlog(0, "can not find sysfs mount point");
                exit(1);
        }
 
        /*
-        * fetch paths and multipaths lists
-        * no paths and/or no multipaths are valid scenarii
-        * vectors maintenance will be driven by events
+        * fetch and configure both paths and multipaths
         */
-       path_discovery(vecs->pathvec, conf, DI_SYSFS | DI_WWID | DI_CHECKER);
-       map_discovery(vecs);
+       if (configure(vecs, 1)) {
+               condlog(0, "failure during configuration");
+               exit(1);
+       }
 
        /*
         * start threads
@@ -1517,7 +1334,7 @@ child (void * param)
        pthread_attr_init(&attr);
        pthread_attr_setstacksize(&attr, 64 * 1024);
        pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
-       
+
        pthread_create(&check_thr, &attr, checkerloop, vecs);
        pthread_create(&uevent_thr, &attr, ueventloop, vecs);
        pthread_create(&uxlsnr_thr, &attr, uxlsnrloop, vecs);
@@ -1528,13 +1345,15 @@ child (void * param)
         * exit path
         */
        lock(vecs->lock);
-       remove_maps(vecs);
+       remove_maps_and_stop_waiters(vecs);
        free_pathvec(vecs->pathvec, FREE_PATHS);
 
        pthread_cancel(check_thr);
        pthread_cancel(uevent_thr);
        pthread_cancel(uxlsnr_thr);
 
+       sysfs_cleanup();
+
        free_keys(keys);
        keys = NULL;
        free_handlers(handlers);
@@ -1547,14 +1366,23 @@ child (void * param)
        vecs->lock = NULL;
        FREE(vecs);
        vecs = NULL;
-       free_config(conf);
-       conf = NULL;
 
        condlog(2, "--------shut down-------");
-       
+
        if (logsink)
                log_thread_stop();
 
+       dm_lib_release();
+       dm_lib_exit();
+
+       /*
+        * Freeing config must be done after condlog() and dm_lib_exit(),
+        * because logging functions like dlog() and dm_write_log()
+        * reference the config.
+        */
+       free_config(conf);
+       conf = NULL;
+
 #ifdef _DEBUG_
        dbg_free_final(NULL);
 #endif
@@ -1562,6 +1390,53 @@ child (void * param)
        exit(0);
 }
 
+static int
+daemonize(void)
+{
+       int pid;
+       int in_fd, out_fd;
+
+       if( (pid = fork()) < 0){
+               fprintf(stderr, "Failed first fork : %s\n", strerror(errno));
+               return -1;
+       }
+       else if (pid != 0)
+               return pid;
+
+       setsid();
+
+       if ( (pid = fork()) < 0)
+               fprintf(stderr, "Failed second fork : %s\n", strerror(errno));
+       else if (pid != 0)
+               _exit(0);
+
+       in_fd = open("/dev/null", O_RDONLY);
+       if (in_fd < 0){
+               fprintf(stderr, "cannot open /dev/null for input : %s\n",
+                       strerror(errno));
+               _exit(0);
+       }
+       out_fd = open("/dev/console", O_WRONLY);
+       if (out_fd < 0){
+               fprintf(stderr, "cannot open /dev/console for output : %s\n",
+                       strerror(errno));
+               _exit(0);
+       }
+
+       close(STDIN_FILENO);
+       dup(in_fd);
+       close(STDOUT_FILENO);
+       dup(out_fd);
+       close(STDERR_FILENO);
+       dup(out_fd);
+
+       close(in_fd);
+       close(out_fd);
+       chdir("/");
+       umask(0);
+       return 0;
+}
+
 int
 main (int argc, char *argv[])
 {
@@ -1569,8 +1444,9 @@ main (int argc, char *argv[])
        extern int optind;
        int arg;
        int err;
-       
+
        logsink = 1;
+       dm_init();
 
        if (getuid() != 0) {
                fprintf(stderr, "need to be root\n");
@@ -1610,8 +1486,8 @@ main (int argc, char *argv[])
        if (!logsink)
                err = 0;
        else
-               err = fork();
-       
+               err = daemonize();
+
        if (err < 0)
                /* error */
                exit(1);
@@ -1622,3 +1498,4 @@ main (int argc, char *argv[])
                /* child lives */
                return (child(NULL));
 }
+