cifs: distribute channels across interfaces based on speed
authorShyam Prasad N <sprasad@microsoft.com>
Mon, 26 Dec 2022 11:24:56 +0000 (11:24 +0000)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 3 Dec 2023 06:32:09 +0000 (07:32 +0100)
[ Upstream commit a6d8fb54a515f0546ffdb7870102b1238917e567 ]

Today, if the server interfaces RSS capable, we simply
choose the fastest interface to setup a channel. This is not
a scalable approach, and does not make a lot of attempt to
distribute the connections.

This change does a weighted distribution of channels across
all the available server interfaces, where the weight is
a function of the advertised interface speed.

Also make sure that we don't mix rdma and non-rdma for channels.

Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Stable-dep-of: fa1d0508bdd4 ("cifs: account for primary channel in the interface list")
Signed-off-by: Sasha Levin <sashal@kernel.org>
fs/smb/client/cifs_debug.c
fs/smb/client/cifsglob.h
fs/smb/client/sess.c

index 8233fb2..0acb455 100644 (file)
@@ -220,6 +220,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
        struct cifs_ses *ses;
        struct cifs_tcon *tcon;
        struct cifs_server_iface *iface;
+       size_t iface_weight = 0, iface_min_speed = 0;
+       struct cifs_server_iface *last_iface = NULL;
        int c, i, j;
 
        seq_puts(m,
@@ -461,11 +463,25 @@ skip_rdma:
                                           "\tLast updated: %lu seconds ago",
                                           ses->iface_count,
                                           (jiffies - ses->iface_last_update) / HZ);
+
+                       last_iface = list_last_entry(&ses->iface_list,
+                                                    struct cifs_server_iface,
+                                                    iface_head);
+                       iface_min_speed = last_iface->speed;
+
                        j = 0;
                        list_for_each_entry(iface, &ses->iface_list,
                                                 iface_head) {
                                seq_printf(m, "\n\t%d)", ++j);
                                cifs_dump_iface(m, iface);
+
+                               iface_weight = iface->speed / iface_min_speed;
+                               seq_printf(m, "\t\tWeight (cur,total): (%zu,%zu)"
+                                          "\n\t\tAllocated channels: %u\n",
+                                          iface->weight_fulfilled,
+                                          iface_weight,
+                                          iface->num_channels);
+
                                if (is_ses_using_iface(ses, iface))
                                        seq_puts(m, "\t\t[CONNECTED]\n");
                        }
index 6c8a556..2e814ea 100644 (file)
@@ -956,6 +956,8 @@ struct cifs_server_iface {
        struct list_head iface_head;
        struct kref refcount;
        size_t speed;
+       size_t weight_fulfilled;
+       unsigned int num_channels;
        unsigned int rdma_capable : 1;
        unsigned int rss_capable : 1;
        unsigned int is_active : 1; /* unset if non existent */
index f0d1648..33e7245 100644 (file)
@@ -164,7 +164,9 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
        int left;
        int rc = 0;
        int tries = 0;
+       size_t iface_weight = 0, iface_min_speed = 0;
        struct cifs_server_iface *iface = NULL, *niface = NULL;
+       struct cifs_server_iface *last_iface = NULL;
 
        spin_lock(&ses->chan_lock);
 
@@ -192,21 +194,11 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
        }
        spin_unlock(&ses->chan_lock);
 
-       /*
-        * Keep connecting to same, fastest, iface for all channels as
-        * long as its RSS. Try next fastest one if not RSS or channel
-        * creation fails.
-        */
-       spin_lock(&ses->iface_lock);
-       iface = list_first_entry(&ses->iface_list, struct cifs_server_iface,
-                                iface_head);
-       spin_unlock(&ses->iface_lock);
-
        while (left > 0) {
 
                tries++;
                if (tries > 3*ses->chan_max) {
-                       cifs_dbg(FYI, "too many channel open attempts (%d channels left to open)\n",
+                       cifs_dbg(VFS, "too many channel open attempts (%d channels left to open)\n",
                                 left);
                        break;
                }
@@ -214,17 +206,35 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
                spin_lock(&ses->iface_lock);
                if (!ses->iface_count) {
                        spin_unlock(&ses->iface_lock);
+                       cifs_dbg(VFS, "server %s does not advertise interfaces\n",
+                                     ses->server->hostname);
                        break;
                }
 
+               if (!iface)
+                       iface = list_first_entry(&ses->iface_list, struct cifs_server_iface,
+                                                iface_head);
+               last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface,
+                                            iface_head);
+               iface_min_speed = last_iface->speed;
+
                list_for_each_entry_safe_from(iface, niface, &ses->iface_list,
                                    iface_head) {
+                       /* do not mix rdma and non-rdma interfaces */
+                       if (iface->rdma_capable != ses->server->rdma)
+                               continue;
+
                        /* skip ifaces that are unusable */
                        if (!iface->is_active ||
                            (is_ses_using_iface(ses, iface) &&
-                            !iface->rss_capable)) {
+                            !iface->rss_capable))
+                               continue;
+
+                       /* check if we already allocated enough channels */
+                       iface_weight = iface->speed / iface_min_speed;
+
+                       if (iface->weight_fulfilled >= iface_weight)
                                continue;
-                       }
 
                        /* take ref before unlock */
                        kref_get(&iface->refcount);
@@ -241,10 +251,21 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
                                continue;
                        }
 
-                       cifs_dbg(FYI, "successfully opened new channel on iface:%pIS\n",
+                       iface->num_channels++;
+                       iface->weight_fulfilled++;
+                       cifs_dbg(VFS, "successfully opened new channel on iface:%pIS\n",
                                 &iface->sockaddr);
                        break;
                }
+
+               /* reached end of list. reset weight_fulfilled and start over */
+               if (list_entry_is_head(iface, &ses->iface_list, iface_head)) {
+                       list_for_each_entry(iface, &ses->iface_list, iface_head)
+                               iface->weight_fulfilled = 0;
+                       spin_unlock(&ses->iface_lock);
+                       iface = NULL;
+                       continue;
+               }
                spin_unlock(&ses->iface_lock);
 
                left--;
@@ -263,8 +284,10 @@ int
 cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
 {
        unsigned int chan_index;
+       size_t iface_weight = 0, iface_min_speed = 0;
        struct cifs_server_iface *iface = NULL;
        struct cifs_server_iface *old_iface = NULL;
+       struct cifs_server_iface *last_iface = NULL;
        int rc = 0;
 
        spin_lock(&ses->chan_lock);
@@ -284,13 +307,34 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
        spin_unlock(&ses->chan_lock);
 
        spin_lock(&ses->iface_lock);
+       if (!ses->iface_count) {
+               spin_unlock(&ses->iface_lock);
+               cifs_dbg(VFS, "server %s does not advertise interfaces\n", ses->server->hostname);
+               return 0;
+       }
+
+       last_iface = list_last_entry(&ses->iface_list, struct cifs_server_iface,
+                                    iface_head);
+       iface_min_speed = last_iface->speed;
+
        /* then look for a new one */
        list_for_each_entry(iface, &ses->iface_list, iface_head) {
+               /* do not mix rdma and non-rdma interfaces */
+               if (iface->rdma_capable != server->rdma)
+                       continue;
+
                if (!iface->is_active ||
                    (is_ses_using_iface(ses, iface) &&
                     !iface->rss_capable)) {
                        continue;
                }
+
+               /* check if we already allocated enough channels */
+               iface_weight = iface->speed / iface_min_speed;
+
+               if (iface->weight_fulfilled >= iface_weight)
+                       continue;
+
                kref_get(&iface->refcount);
                break;
        }
@@ -306,10 +350,22 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
                cifs_dbg(FYI, "replacing iface: %pIS with %pIS\n",
                         &old_iface->sockaddr,
                         &iface->sockaddr);
+
+               old_iface->num_channels--;
+               if (old_iface->weight_fulfilled)
+                       old_iface->weight_fulfilled--;
+               iface->num_channels++;
+               iface->weight_fulfilled++;
+
                kref_put(&old_iface->refcount, release_iface);
        } else if (old_iface) {
                cifs_dbg(FYI, "releasing ref to iface: %pIS\n",
                         &old_iface->sockaddr);
+
+               old_iface->num_channels--;
+               if (old_iface->weight_fulfilled)
+                       old_iface->weight_fulfilled--;
+
                kref_put(&old_iface->refcount, release_iface);
        } else {
                WARN_ON(!iface);