RDMA/rtrs-clt: Add a minimum latency multipath policy
authorGioh Kim <gi-oh.kim@cloud.ionos.com>
Wed, 7 Apr 2021 11:34:41 +0000 (13:34 +0200)
committerJason Gunthorpe <jgg@nvidia.com>
Tue, 13 Apr 2021 22:44:54 +0000 (19:44 -0300)
This patch adds new multipath policy: min-latency.  Client checks the
latency of each path when it sends the heart-beat.  And it sends IO to the
path with the minimum latency.

Link: https://lore.kernel.org/r/20210407113444.150961-2-gi-oh.kim@ionos.com
Signed-off-by: Gioh Kim <gi-oh.kim@ionos.com>
Signed-off-by: Jack Wang <jinpu.wang@ionos.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c
drivers/infiniband/ulp/rtrs/rtrs-clt.c
drivers/infiniband/ulp/rtrs/rtrs-clt.h
drivers/infiniband/ulp/rtrs/rtrs-pri.h
drivers/infiniband/ulp/rtrs/rtrs.c

index eb92ec1..a5bf12a 100644 (file)
@@ -101,6 +101,9 @@ static ssize_t mpath_policy_show(struct device *dev,
        case MP_POLICY_MIN_INFLIGHT:
                return sysfs_emit(page, "min-inflight (MI: %d)\n",
                                  clt->mp_policy);
+       case MP_POLICY_MIN_LATENCY:
+               return sysfs_emit(page, "min-latency (ML: %d)\n",
+                                 clt->mp_policy);
        default:
                return sysfs_emit(page, "Unknown (%d)\n", clt->mp_policy);
        }
@@ -114,22 +117,32 @@ static ssize_t mpath_policy_store(struct device *dev,
        struct rtrs_clt *clt;
        int value;
        int ret;
+       size_t len = 0;
 
        clt = container_of(dev, struct rtrs_clt, dev);
 
        ret = kstrtoint(buf, 10, &value);
        if (!ret && (value == MP_POLICY_RR ||
-                    value == MP_POLICY_MIN_INFLIGHT)) {
+                    value == MP_POLICY_MIN_INFLIGHT ||
+                    value == MP_POLICY_MIN_LATENCY)) {
                clt->mp_policy = value;
                return count;
        }
 
+       /* distinguish "mi" and "min-latency" with length */
+       len = strnlen(buf, NAME_MAX);
+       if (buf[len - 1] == '\n')
+               len--;
+
        if (!strncasecmp(buf, "round-robin", 11) ||
-           !strncasecmp(buf, "rr", 2))
+           (len == 2 && !strncasecmp(buf, "rr", 2)))
                clt->mp_policy = MP_POLICY_RR;
        else if (!strncasecmp(buf, "min-inflight", 12) ||
-                !strncasecmp(buf, "mi", 2))
+                (len == 2 && !strncasecmp(buf, "mi", 2)))
                clt->mp_policy = MP_POLICY_MIN_INFLIGHT;
+       else if (!strncasecmp(buf, "min-latency", 11) ||
+                (len == 2 && !strncasecmp(buf, "ml", 2)))
+               clt->mp_policy = MP_POLICY_MIN_LATENCY;
        else
                return -EINVAL;
 
index cb1731a..8139c79 100644 (file)
@@ -628,6 +628,8 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc)
                } else if (imm_type == RTRS_HB_ACK_IMM) {
                        WARN_ON(con->c.cid);
                        sess->s.hb_missed_cnt = 0;
+                       sess->s.hb_cur_latency =
+                               ktime_sub(ktime_get(), sess->s.hb_last_sent);
                        if (sess->flags & RTRS_MSG_NEW_RKEY_F)
                                return  rtrs_clt_recv_done(con, wc);
                } else {
@@ -826,6 +828,57 @@ static struct rtrs_clt_sess *get_next_path_min_inflight(struct path_it *it)
        return min_path;
 }
 
+/**
+ * get_next_path_min_latency() - Returns path with minimal latency.
+ * @it:        the path pointer
+ *
+ * Return: a path with the lowest latency or NULL if all paths are tried
+ *
+ * Locks:
+ *    rcu_read_lock() must be hold.
+ *
+ * Related to @MP_POLICY_MIN_LATENCY
+ *
+ * This DOES skip an already-tried path.
+ * There is a skip-list to skip a path if the path has tried but failed.
+ * It will try the minimum latency path and then the second minimum latency
+ * path and so on. Finally it will return NULL if all paths are tried.
+ * Therefore the caller MUST check the returned
+ * path is NULL and trigger the IO error.
+ */
+static struct rtrs_clt_sess *get_next_path_min_latency(struct path_it *it)
+{
+       struct rtrs_clt_sess *min_path = NULL;
+       struct rtrs_clt *clt = it->clt;
+       struct rtrs_clt_sess *sess;
+       ktime_t min_latency = INT_MAX;
+       ktime_t latency;
+
+       list_for_each_entry_rcu(sess, &clt->paths_list, s.entry) {
+               if (unlikely(READ_ONCE(sess->state) != RTRS_CLT_CONNECTED))
+                       continue;
+
+               if (unlikely(!list_empty(raw_cpu_ptr(sess->mp_skip_entry))))
+                       continue;
+
+               latency = sess->s.hb_cur_latency;
+
+               if (latency < min_latency) {
+                       min_latency = latency;
+                       min_path = sess;
+               }
+       }
+
+       /*
+        * add the path to the skip list, so that next time we can get
+        * a different one
+        */
+       if (min_path)
+               list_add(raw_cpu_ptr(min_path->mp_skip_entry), &it->skip_list);
+
+       return min_path;
+}
+
 static inline void path_it_init(struct path_it *it, struct rtrs_clt *clt)
 {
        INIT_LIST_HEAD(&it->skip_list);
@@ -834,8 +887,10 @@ static inline void path_it_init(struct path_it *it, struct rtrs_clt *clt)
 
        if (clt->mp_policy == MP_POLICY_RR)
                it->next_path = get_next_path_rr;
-       else
+       else if (clt->mp_policy == MP_POLICY_MIN_INFLIGHT)
                it->next_path = get_next_path_min_inflight;
+       else
+               it->next_path = get_next_path_min_latency;
 }
 
 static inline void path_it_deinit(struct path_it *it)
index 692bc83..ef15927 100644 (file)
@@ -29,6 +29,7 @@ enum rtrs_clt_state {
 enum rtrs_mp_policy {
        MP_POLICY_RR,
        MP_POLICY_MIN_INFLIGHT,
+       MP_POLICY_MIN_LATENCY,
 };
 
 /* see Documentation/ABI/testing/sysfs-class-rtrs-client for details */
index 1b31bda..bcad5e2 100644 (file)
@@ -112,6 +112,8 @@ struct rtrs_sess {
        unsigned int            hb_interval_ms;
        unsigned int            hb_missed_cnt;
        unsigned int            hb_missed_max;
+       ktime_t                 hb_last_sent;
+       ktime_t                 hb_cur_latency;
 };
 
 /* rtrs information unit */
index bc08b7f..a784728 100644 (file)
@@ -337,6 +337,9 @@ static void hb_work(struct work_struct *work)
                schedule_hb(sess);
                return;
        }
+
+       sess->hb_last_sent = ktime_get();
+
        imm = rtrs_to_imm(RTRS_HB_MSG_IMM, 0);
        err = rtrs_post_rdma_write_imm_empty(usr_con, sess->hb_cqe, imm,
                                             0, NULL);