[multipathd] switch on/off the queueing feature when oportune
authorChristophe Varoqui <root@xa-s05.(none)>
Fri, 7 Oct 2005 22:45:55 +0000 (00:45 +0200)
committerChristophe Varoqui <root@xa-s05.(none)>
Fri, 7 Oct 2005 22:45:55 +0000 (00:45 +0200)
The following patch adds time-based retry feature in no-path situation
to multipathd.  Any comments are welcome.

This patch adds 'no_path_retry' option to multipathd.

o no_path_retry = "fail" is equal to 'fail_if_no_path'.
  (i.e. I/O to the no-path map will immediately fail.)

o no_path_retry = "queue" is equal to 'queue_if_no_path'.
  (i.e. I/O to the no-path map will be queued until any path comes up.)

o If no_path_retry = <n> where n is positive number,
  then multipathd will set queue_if_no_path to the map,
  and if the all paths are down, multipathd will turn the feature
  off to fail_if_no_path after the checker tries <n> times for each
  paths in the map.

Multipathd re-writes queue_if_no_path feature parameter in the map,
if this option is specified.  But by default, this patch doesn't
change current multipathd behaviour.
So this patch don't break any existing configuration.

Kiyoshi Ueda, NEC

libmultipath/config.h
libmultipath/devmapper.c
libmultipath/devmapper.h
libmultipath/dict.c
libmultipath/propsel.c
libmultipath/propsel.h
libmultipath/structs.h
multipath.conf.annotated
multipath.conf.synthetic
multipathd/main.c

index 5979ebd..e87f149 100644 (file)
@@ -33,6 +33,7 @@ struct mpentry {
        int pgpolicy;
        int pgfailback;
        int rr_weight;
+       int no_path_retry;
 
        char * wwid;
        char * selector;
@@ -56,6 +57,7 @@ struct config {
        int pgfailback;
        int remove;
        int rr_weight;
+       int no_path_retry;
 
        char * dev;
        char * udev_dir;
index b662b74..d941e22 100644 (file)
@@ -490,6 +490,41 @@ out:
        return r;
 }
 
+int
+dm_queue_if_no_path(char *mapname, int enable)
+{
+       int r = 1;
+       struct dm_task *dmt;
+       char *str;
+
+       if (enable)
+               str = "queue_if_no_path\n";
+       else
+               str = "fail_if_no_path\n";
+
+       if (!(dmt = dm_task_create(DM_DEVICE_TARGET_MSG)))
+               return 1;
+
+       if (!dm_task_set_name(dmt, mapname))
+               goto out;
+
+       if (!dm_task_set_sector(dmt, 0))
+               goto out;
+
+       if (!dm_task_set_message(dmt, str))
+               goto out;
+
+       dm_task_no_open_count(dmt);
+
+       if (!dm_task_run(dmt))
+               goto out;
+
+       r = 0;
+out:
+       dm_task_destroy(dmt);
+       return r;
+}
+
 static int
 dm_groupmsg (char * msg, char * mapname, int index)
 {
index 4bdf705..b9bcaa7 100644 (file)
@@ -10,6 +10,7 @@ int dm_flush_map (char *, char *);
 int dm_flush_maps (char *);
 int dm_fail_path(char * mapname, char * path);
 int dm_reinstate(char * mapname, char * path);
+int dm_queue_if_no_path(char *mapname, int enable);
 int dm_switchgroup(char * mapname, int index);
 int dm_enablegroup(char * mapname, int index);
 int dm_disablegroup(char * mapname, int index);
index d21156e..1c306b6 100644 (file)
@@ -171,6 +171,26 @@ default_failback_handler(vector strvec)
        return 0;
 }
 
+static int
+def_no_path_retry_handler(vector strvec)
+{
+       char * buff;
+
+       buff = set_value(strvec);
+       if (!buff)
+               return 1;
+
+       if (!strncmp(buff, "fail", 4) || !strncmp(buff, "0", 1))
+               conf->no_path_retry = NO_PATH_RETRY_FAIL;
+       else if (!strncmp(buff, "queue", 5))
+               conf->no_path_retry = NO_PATH_RETRY_QUEUE;
+       else if ((conf->no_path_retry = atoi(buff)) < 1)
+               conf->no_path_retry = NO_PATH_RETRY_UNDEF;
+
+       FREE(buff);
+       return 0;
+}
+
 /*
  * blacklist block handlers
  */
@@ -579,6 +599,30 @@ mp_weight_handler(vector strvec)
        return 0;
 }
 
+static int
+mp_no_path_retry_handler(vector strvec)
+{
+       struct mpentry *mpe = VECTOR_LAST_SLOT(conf->mptable);
+       char *buff;
+
+       if (!mpe)
+               return 1;
+
+       buff = set_value(strvec);
+       if (!buff)
+               return 1;
+
+       if (!strncmp(buff, "fail", 4) || !strncmp(buff, "0", 1))
+               mpe->no_path_retry = NO_PATH_RETRY_FAIL;
+       else if (!strncmp(buff, "queue", 5))
+               mpe->no_path_retry = NO_PATH_RETRY_QUEUE;
+       else if ((mpe->no_path_retry = atoi(buff)) < 1)
+               mpe->no_path_retry = NO_PATH_RETRY_UNDEF;
+
+       FREE(buff);
+       return 0;
+}
+
 vector
 init_keywords(void)
 {
@@ -596,6 +640,7 @@ init_keywords(void)
        install_keyword("failback", &default_failback_handler);
        install_keyword("rr_min_io", &def_minio_handler);
        install_keyword("rr_weight", &def_weight_handler);
+       install_keyword("no_path_retry", &def_no_path_retry_handler);
        
        install_keyword_root("devnode_blacklist", &blacklist_handler);
        install_keyword("devnode", &ble_handler);
@@ -626,6 +671,7 @@ init_keywords(void)
        install_keyword("path_selector", &mp_selector_handler);
        install_keyword("failback", &mp_failback_handler);
        install_keyword("rr_weight", &mp_weight_handler);
+       install_keyword("no_path_retry", &mp_no_path_retry_handler);
        install_sublevel_end();
 
        return keywords;
index 9da4840..7c006c1 100644 (file)
@@ -201,3 +201,22 @@ select_getprio (struct path * pp)
        return 0;
 }
 
+extern int
+select_no_path_retry(struct multipath *mp)
+{
+       if (mp->mpe && mp->mpe->no_path_retry != NO_PATH_RETRY_UNDEF) {
+               mp->no_path_retry = mp->mpe->no_path_retry;
+               condlog(3, "no_path_retry = %i (controler setting)",
+                       mp->no_path_retry);
+               return 0;
+       }
+       if (conf->no_path_retry != NO_PATH_RETRY_UNDEF) {
+               mp->no_path_retry = conf->no_path_retry;
+               condlog(3, "no_path_retry = %i (config file default)",
+                       mp->no_path_retry);
+               return 0;
+       }
+       mp->no_path_retry = NO_PATH_RETRY_UNDEF;
+       condlog(3, "no_path_retry = NONE (internal default)");
+       return 0;
+}
index 5e57a32..e5124d0 100644 (file)
@@ -8,4 +8,4 @@ int select_hwhandler (struct multipath * mp);
 int select_checkfn(struct path *pp);
 int select_getuid (struct path * pp);
 int select_getprio (struct path * pp);
-
+int select_no_path_retry(struct multipath *mp);
index 4ed3bd0..d826410 100644 (file)
 #define SCSI_PRODUCT_SIZE      17
 #define SCSI_REV_SIZE          5
 
+#define NO_PATH_RETRY_UNDEF    0
+#define NO_PATH_RETRY_FAIL     -1
+#define NO_PATH_RETRY_QUEUE    -2
+
 enum free_path_switch {
        KEEP_PATHS,
        FREE_PATHS
@@ -115,6 +119,9 @@ struct multipath {
        int pgfailback;
        int failback_tick;
        int rr_weight;
+       int nr_active;     /* current available(= not known as failed) paths */
+       int no_path_retry; /* number of retries after all paths are down */
+       int retry_tick;    /* remaining times for retries */
        unsigned long long size;
        vector paths;
        vector pg;
index 57f9a05..567b225 100644 (file)
 #      # default : immediate
 #      #
 #      failback        manual
+#
+#      #
+#      # name    : no_path_retry
+#      # scope   : multipathd
+#      # desc    : tell the number of retries until disable queueing, or
+#      #           "fail" means immediate failure (no queueing),
+#      #           "queue" means never stop queueing
+#      # values  : queue|fail|n (>0)
+#      # default : (null)
+#      #
+#      #no_path_retry  queue
 #}
 #      
 ##
 #              # default : immediate
 #              #
 #              failback                manual
+#
+#              #
+#              # name    : no_path_retry
+#              # scope   : multipathd
+#              # desc    : tell the number of retries until disable queueing, or
+#              #           "fail" means immediate failure (no queueing),
+#              #           "queue" means never stop queueing
+#              # values  : queue|fail|n (>0)
+#              # default : (null)
+#              #
+#              #no_path_retry  queue
 #      }
 #      multipath {
 #              wwid    1DEC_____321816758474
index 670008f..2661ca1 100644 (file)
@@ -14,6 +14,7 @@
 #      rr_min_io               100
 #      rr_weight               priorities
 #      failback                immediate
+#      no_path_retry           queue
 #}
 #devnode_blacklist {
 #        wwid 26353900f02796769
@@ -30,6 +31,7 @@
 #              path_selector           "round-robin 0"
 #              failback                manual
 #              rr_weight               priorities
+#              no_path_retry           queue
 #      }
 #      multipath {
 #              wwid    1DEC_____321816758474
index 674f600..3bfbf71 100644 (file)
@@ -209,6 +209,21 @@ set_multipath_wwid (struct multipath * mpp)
 }
 
 static int
+pathcount (struct multipath *mpp, int state)
+{
+       struct pathgroup *pgp;
+       struct path *pp;
+       int i, j;
+       int count = 0;
+
+       vector_foreach_slot (mpp->pg, pgp, i)
+               vector_foreach_slot (pgp->paths, pp, j)
+                       if (pp->state == state)
+                               count++;
+       return count;
+}
+
+static int
 setup_multipath (struct vectors * vecs, struct multipath * mpp)
 {
        int i;
@@ -222,6 +237,14 @@ setup_multipath (struct vectors * vecs, struct multipath * mpp)
 
        set_paths_owner(vecs, mpp);
        select_pgfailback(mpp);
+       mpp->nr_active = pathcount(mpp, PATH_UP);
+       select_no_path_retry(mpp);
+       if (mpp->no_path_retry != NO_PATH_RETRY_UNDEF) {
+               if (mpp->no_path_retry == NO_PATH_RETRY_FAIL)
+                       dm_queue_if_no_path(mpp->alias, 0);
+               else
+                       dm_queue_if_no_path(mpp->alias, 1);
+       }
 
        return 0;
 out:
@@ -347,6 +370,38 @@ static sigset_t unblock_sighup(void)
 }
 
 /*
+ * mpp->no_path_retry:
+ *   -2 : queue_if_no_path enabled, never turned off
+ *   -1 : fail_if_no_path
+ *    0 : nothing
+ *   >0 : queue_if_no_path enabled, turned off after polling n times
+ */
+static void
+update_queue_mode_del_path(struct multipath *mpp)
+{
+       if (--mpp->nr_active == 0 && mpp->no_path_retry > 0) {
+               /* Enter retry mode */
+               mpp->retry_tick = mpp->no_path_retry * conf->checkint;
+               condlog(1, "%s: Entering recovery mode: max_retries=%d",
+                       mpp->alias, mpp->no_path_retry);
+       }
+       condlog(2, "%s: remaining active paths: %d", mpp->alias, mpp->nr_active);
+}
+
+static void
+update_queue_mode_add_path(struct multipath *mpp)
+{
+       if (mpp->nr_active++ == 0 && mpp->no_path_retry > 0) {
+               /* come back to normal mode from retry mode */
+               mpp->retry_tick = 0;
+               dm_queue_if_no_path(mpp->alias, 1);
+               condlog(2, "%s: queue_if_no_path enabled", mpp->alias);
+               condlog(1, "%s: Recovered to normal mode", mpp->alias);
+       }
+       condlog(2, "%s: remaining active paths: %d", mpp->alias, mpp->nr_active);
+}
+
+/*
  * returns the reschedule delay
  * negative means *stop*
  */
@@ -690,6 +745,10 @@ uev_remove_path (char * devname, struct vectors * vecs)
                condlog(3, "%s: not in pathvec");
                return 1;
        }
+
+       if (pp->mpp && pp->state == PATH_UP)
+               update_queue_mode_del_path(pp->mpp);
+
        condlog(2, "remove %s path checker", devname);
        i = find_slot(vecs->pathvec, (void *)pp);
        vector_del_slot(vecs->pathvec, i);
@@ -817,6 +876,13 @@ reconfigure (struct vectors * vecs)
        vector_foreach_slot (vecs->mpvec, mpp, i) {
                mpp->mpe = find_mpe(mpp->wwid);
                set_paths_owner(vecs, mpp);
+               select_no_path_retry(mpp);
+               if (mpp->no_path_retry != NO_PATH_RETRY_UNDEF) {
+                       if (mpp->no_path_retry == NO_PATH_RETRY_FAIL)
+                               dm_queue_if_no_path(mpp->alias, 0);
+                       else
+                               dm_queue_if_no_path(mpp->alias, 1);
+               }
        }
        vector_foreach_slot (vecs->pathvec, pp, i) {
                select_checkfn(pp);
@@ -983,6 +1049,7 @@ fail_path (struct path * pp)
                 pp->dev_t, pp->mpp->alias);
 
        dm_fail_path(pp->mpp->alias, pp->dev_t);
+       update_queue_mode_del_path(pp->mpp);
 }
 
 /*
@@ -991,11 +1058,14 @@ fail_path (struct path * pp)
 static void
 reinstate_path (struct path * pp)
 {
-       if (pp->mpp) {
-               if (dm_reinstate(pp->mpp->alias, pp->dev_t))
-                       condlog(0, "%s: reinstate failed", pp->dev_t);
-               else
-                       condlog(2, "%s: reinstated", pp->dev_t);
+       if (!pp->mpp)
+               return;
+
+       if (dm_reinstate(pp->mpp->alias, pp->dev_t))
+               condlog(0, "%s: reinstate failed", pp->dev_t);
+       else {
+               condlog(2, "%s: reinstated", pp->dev_t);
+               update_queue_mode_add_path(pp->mpp);
        }
 }
 
@@ -1056,6 +1126,23 @@ defered_failback_tick (vector mpvec)
        }
 }
 
+static void
+retry_count_tick(vector mpvec)
+{
+       struct multipath *mpp;
+       int i;
+
+       vector_foreach_slot (mpvec, mpp, i) {
+               if (mpp->retry_tick) {
+                       condlog(4, "%s: Retrying.. No active path", mpp->alias);
+                       if(--mpp->retry_tick == 0) {
+                               dm_queue_if_no_path(mpp->alias, 0);
+                               condlog(2, "%s: Disable queueing", mpp->alias);
+                       }
+               }
+       }
+}
+
 static void *
 checkerloop (void *ap)
 {
@@ -1201,6 +1288,7 @@ checkerloop (void *ap)
                        }
                }
                defered_failback_tick(vecs->mpvec);
+               retry_count_tick(vecs->mpvec);
 
                if (count)
                        count--;