ceph: only choose one MDS who is in up:active state without laggy
authorXiubo Li <xiubli@redhat.com>
Tue, 26 Nov 2019 12:24:22 +0000 (07:24 -0500)
committerIlya Dryomov <idryomov@gmail.com>
Mon, 27 Jan 2020 15:53:39 +0000 (16:53 +0100)
Even the MDS is in up:active state, but it also maybe laggy. Here
will skip the laggy MDSs.

Signed-off-by: Xiubo Li <xiubli@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
fs/ceph/mds_client.c
fs/ceph/mdsmap.c

index 69631d1..1b53ace 100644 (file)
@@ -974,14 +974,14 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
                                     frag.frag, mds,
                                     (int)r, frag.ndist);
                                if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
-                                   CEPH_MDS_STATE_ACTIVE)
+                                   CEPH_MDS_STATE_ACTIVE &&
+                                   !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds))
                                        goto out;
                        }
 
                        /* since this file/dir wasn't known to be
                         * replicated, then we want to look for the
                         * authoritative mds. */
-                       mode = USE_AUTH_MDS;
                        if (frag.mds >= 0) {
                                /* choose auth mds */
                                mds = frag.mds;
@@ -989,9 +989,14 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
                                     "frag %u mds%d (auth)\n",
                                     inode, ceph_vinop(inode), frag.frag, mds);
                                if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
-                                   CEPH_MDS_STATE_ACTIVE)
-                                       goto out;
+                                   CEPH_MDS_STATE_ACTIVE) {
+                                       if (mode == USE_ANY_MDS &&
+                                           !ceph_mdsmap_is_laggy(mdsc->mdsmap,
+                                                                 mds))
+                                               goto out;
+                               }
                        }
+                       mode = USE_AUTH_MDS;
                }
        }
 
index 7a925e0..a77e0ec 100644 (file)
 
 #include "super.h"
 
+#define CEPH_MDS_IS_READY(i, ignore_laggy) \
+       (m->m_info[i].state > 0 && (ignore_laggy ? true : !m->m_info[i].laggy))
 
-/*
- * choose a random mds that is "up" (i.e. has a state > 0), or -1.
- */
-int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
+static int __mdsmap_get_random_mds(struct ceph_mdsmap *m, bool ignore_laggy)
 {
        int n = 0;
        int i, j;
 
-       /* special case for one mds */
+       /*
+        * special case for one mds, no matter it is laggy or
+        * not we have no choice
+        */
        if (1 == m->m_num_mds && m->m_info[0].state > 0)
                return 0;
 
        /* count */
        for (i = 0; i < m->m_num_mds; i++)
-               if (m->m_info[i].state > 0)
+               if (CEPH_MDS_IS_READY(i, ignore_laggy))
                        n++;
        if (n == 0)
                return -1;
@@ -36,7 +38,7 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
        /* pick */
        n = prandom_u32() % n;
        for (j = 0, i = 0; i < m->m_num_mds; i++) {
-               if (m->m_info[i].state > 0)
+               if (CEPH_MDS_IS_READY(i, ignore_laggy))
                        j++;
                if (j > n)
                        break;
@@ -45,6 +47,20 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
        return i;
 }
 
+/*
+ * choose a random mds that is "up" (i.e. has a state > 0), or -1.
+ */
+int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
+{
+       int mds;
+
+       mds = __mdsmap_get_random_mds(m, false);
+       if (mds == m->m_num_mds || mds == -1)
+               mds = __mdsmap_get_random_mds(m, true);
+
+       return mds == m->m_num_mds ? -1 : mds;
+}
+
 #define __decode_and_drop_type(p, end, type, bad)              \
        do {                                                    \
                if (*p + sizeof(type) > end)                    \