cifs: change iface_list from array to sorted linked list
authorShyam Prasad N <sprasad@microsoft.com>
Sat, 1 Jan 2022 12:50:21 +0000 (12:50 +0000)
committerSteve French <stfrench@microsoft.com>
Thu, 23 Jun 2022 00:51:43 +0000 (19:51 -0500)
A server's published interface list can change over time, and needs
to be updated. We've storing iface_list as a simple array, which
makes it difficult to manipulate an existing list.

With this change, iface_list is modified into a linked list of
interfaces, which is kept sorted by speed.

Also added a reference counter for an iface entry, so that each
channel can maintain a backpointer to the iface and drop it
easily when needed.

Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
fs/cifs/cifs_debug.c
fs/cifs/cifsglob.h
fs/cifs/connect.c
fs/cifs/misc.c
fs/cifs/sess.c
fs/cifs/smb2ops.c

index 1dd995e..2cfbac8 100644 (file)
@@ -162,6 +162,8 @@ cifs_dump_iface(struct seq_file *m, struct cifs_server_iface *iface)
                seq_printf(m, "\t\tIPv4: %pI4\n", &ipv4->sin_addr);
        else if (iface->sockaddr.ss_family == AF_INET6)
                seq_printf(m, "\t\tIPv6: %pI6\n", &ipv6->sin6_addr);
+       if (!iface->is_active)
+               seq_puts(m, "\t\t[for-cleanup]\n");
 }
 
 static int cifs_debug_files_proc_show(struct seq_file *m, void *v)
@@ -221,6 +223,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
        struct TCP_Server_Info *server;
        struct cifs_ses *ses;
        struct cifs_tcon *tcon;
+       struct cifs_server_iface *iface;
        int c, i, j;
 
        seq_puts(m,
@@ -456,11 +459,10 @@ skip_rdma:
                        if (ses->iface_count)
                                seq_printf(m, "\n\n\tServer interfaces: %zu",
                                           ses->iface_count);
-                       for (j = 0; j < ses->iface_count; j++) {
-                               struct cifs_server_iface *iface;
-
-                               iface = &ses->iface_list[j];
-                               seq_printf(m, "\n\t%d)", j+1);
+                       j = 0;
+                       list_for_each_entry(iface, &ses->iface_list,
+                                                iface_head) {
+                               seq_printf(m, "\n\t%d)", ++j);
                                cifs_dump_iface(m, iface);
                                if (is_ses_using_iface(ses, iface))
                                        seq_puts(m, "\t\t[CONNECTED]\n");
index e773716..7fc2add 100644 (file)
@@ -933,15 +933,67 @@ static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net)
 #endif
 
 struct cifs_server_iface {
+       struct list_head iface_head;
+       struct kref refcount;
        size_t speed;
        unsigned int rdma_capable : 1;
        unsigned int rss_capable : 1;
+       unsigned int is_active : 1; /* unset if non existent */
        struct sockaddr_storage sockaddr;
 };
 
+/* release iface when last ref is dropped */
+static inline void
+release_iface(struct kref *ref)
+{
+       struct cifs_server_iface *iface = container_of(ref,
+                                                      struct cifs_server_iface,
+                                                      refcount);
+       list_del_init(&iface->iface_head);
+       kfree(iface);
+}
+
+/*
+ * compare two interfaces a and b
+ * return 0 if everything matches.
+ * return 1 if a has higher link speed, or rdma capable, or rss capable
+ * return -1 otherwise.
+ */
+static inline int
+iface_cmp(struct cifs_server_iface *a, struct cifs_server_iface *b)
+{
+       int cmp_ret = 0;
+
+       WARN_ON(!a || !b);
+       if (a->speed == b->speed) {
+               if (a->rdma_capable == b->rdma_capable) {
+                       if (a->rss_capable == b->rss_capable) {
+                               cmp_ret = memcmp(&a->sockaddr, &b->sockaddr,
+                                                sizeof(a->sockaddr));
+                               if (!cmp_ret)
+                                       return 0;
+                               else if (cmp_ret > 0)
+                                       return 1;
+                               else
+                                       return -1;
+                       } else if (a->rss_capable > b->rss_capable)
+                               return 1;
+                       else
+                               return -1;
+               } else if (a->rdma_capable > b->rdma_capable)
+                       return 1;
+               else
+                       return -1;
+       } else if (a->speed > b->speed)
+               return 1;
+       else
+               return -1;
+}
+
 struct cifs_chan {
        unsigned int in_reconnect : 1; /* if session setup in progress for this channel */
        struct TCP_Server_Info *server;
+       struct cifs_server_iface *iface; /* interface in use */
        __u8 signkey[SMB3_SIGN_KEY_SIZE];
 };
 
@@ -993,7 +1045,7 @@ struct cifs_ses {
         */
        spinlock_t iface_lock;
        /* ========= begin: protected by iface_lock ======== */
-       struct cifs_server_iface *iface_list;
+       struct list_head iface_list;
        size_t iface_count;
        unsigned long iface_last_update; /* jiffies */
        /* ========= end: protected by iface_lock ======== */
index 1849e34..248fe18 100644 (file)
@@ -1894,9 +1894,11 @@ void cifs_put_smb_ses(struct cifs_ses *ses)
                int i;
 
                for (i = 1; i < chan_count; i++) {
-                       spin_unlock(&ses->chan_lock);
+                       if (ses->chans[i].iface) {
+                               kref_put(&ses->chans[i].iface->refcount, release_iface);
+                               ses->chans[i].iface = NULL;
+                       }
                        cifs_put_tcp_session(ses->chans[i].server, 0);
-                       spin_lock(&ses->chan_lock);
                        ses->chans[i].server = NULL;
                }
        }
index c69e124..0e84e6f 100644 (file)
@@ -75,6 +75,7 @@ sesInfoAlloc(void)
                INIT_LIST_HEAD(&ret_buf->tcon_list);
                mutex_init(&ret_buf->session_mutex);
                spin_lock_init(&ret_buf->iface_lock);
+               INIT_LIST_HEAD(&ret_buf->iface_list);
                spin_lock_init(&ret_buf->chan_lock);
        }
        return ret_buf;
@@ -83,6 +84,8 @@ sesInfoAlloc(void)
 void
 sesInfoFree(struct cifs_ses *buf_to_free)
 {
+       struct cifs_server_iface *iface = NULL, *niface = NULL;
+
        if (buf_to_free == NULL) {
                cifs_dbg(FYI, "Null buffer passed to sesInfoFree\n");
                return;
@@ -96,7 +99,11 @@ sesInfoFree(struct cifs_ses *buf_to_free)
        kfree(buf_to_free->user_name);
        kfree(buf_to_free->domainName);
        kfree_sensitive(buf_to_free->auth_key.response);
-       kfree(buf_to_free->iface_list);
+       spin_lock(&buf_to_free->iface_lock);
+       list_for_each_entry_safe(iface, niface, &buf_to_free->iface_list,
+                                iface_head)
+               kref_put(&iface->refcount, release_iface);
+       spin_unlock(&buf_to_free->iface_lock);
        kfree_sensitive(buf_to_free);
 }
 
index d417de3..d7b6e56 100644 (file)
@@ -58,7 +58,7 @@ bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface)
 
        spin_lock(&ses->chan_lock);
        for (i = 0; i < ses->chan_count; i++) {
-               if (is_server_using_iface(ses->chans[i].server, iface)) {
+               if (ses->chans[i].iface == iface) {
                        spin_unlock(&ses->chan_lock);
                        return true;
                }
@@ -151,11 +151,9 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
 {
        int old_chan_count, new_chan_count;
        int left;
-       int i = 0;
        int rc = 0;
        int tries = 0;
-       struct cifs_server_iface *ifaces = NULL;
-       size_t iface_count;
+       struct cifs_server_iface *iface = NULL, *niface = NULL;
 
        spin_lock(&ses->chan_lock);
 
@@ -185,32 +183,16 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
        spin_unlock(&ses->chan_lock);
 
        /*
-        * Make a copy of the iface list at the time and use that
-        * instead so as to not hold the iface spinlock for opening
-        * channels
-        */
-       spin_lock(&ses->iface_lock);
-       iface_count = ses->iface_count;
-       if (iface_count <= 0) {
-               spin_unlock(&ses->iface_lock);
-               cifs_dbg(VFS, "no iface list available to open channels\n");
-               return 0;
-       }
-       ifaces = kmemdup(ses->iface_list, iface_count*sizeof(*ifaces),
-                        GFP_ATOMIC);
-       if (!ifaces) {
-               spin_unlock(&ses->iface_lock);
-               return 0;
-       }
-       spin_unlock(&ses->iface_lock);
-
-       /*
         * Keep connecting to same, fastest, iface for all channels as
         * long as its RSS. Try next fastest one if not RSS or channel
         * creation fails.
         */
+       spin_lock(&ses->iface_lock);
+       iface = list_first_entry(&ses->iface_list, struct cifs_server_iface,
+                                iface_head);
+       spin_unlock(&ses->iface_lock);
+
        while (left > 0) {
-               struct cifs_server_iface *iface;
 
                tries++;
                if (tries > 3*ses->chan_max) {
@@ -219,27 +201,46 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
                        break;
                }
 
-               iface = &ifaces[i];
-               if (is_ses_using_iface(ses, iface) && !iface->rss_capable) {
-                       i = (i+1) % iface_count;
-                       continue;
+               spin_lock(&ses->iface_lock);
+               if (!ses->iface_count) {
+                       spin_unlock(&ses->iface_lock);
+                       break;
                }
 
-               rc = cifs_ses_add_channel(cifs_sb, ses, iface);
-               if (rc) {
-                       cifs_dbg(FYI, "failed to open extra channel on iface#%d rc=%d\n",
-                                i, rc);
-                       i = (i+1) % iface_count;
-                       continue;
+               list_for_each_entry_safe_from(iface, niface, &ses->iface_list,
+                                   iface_head) {
+                       /* skip ifaces that are unusable */
+                       if (!iface->is_active ||
+                           (is_ses_using_iface(ses, iface) &&
+                            !iface->rss_capable)) {
+                               continue;
+                       }
+
+                       /* take ref before unlock */
+                       kref_get(&iface->refcount);
+
+                       spin_unlock(&ses->iface_lock);
+                       rc = cifs_ses_add_channel(cifs_sb, ses, iface);
+                       spin_lock(&ses->iface_lock);
+
+                       if (rc) {
+                               cifs_dbg(VFS, "failed to open extra channel on iface:%pIS rc=%d\n",
+                                        &iface->sockaddr,
+                                        rc);
+                               kref_put(&iface->refcount, release_iface);
+                               continue;
+                       }
+
+                       cifs_dbg(FYI, "successfully opened new channel on iface:%pIS\n",
+                                &iface->sockaddr);
+                       break;
                }
+               spin_unlock(&ses->iface_lock);
 
-               cifs_dbg(FYI, "successfully opened new channel on iface#%d\n",
-                        i);
                left--;
                new_chan_count++;
        }
 
-       kfree(ifaces);
        return new_chan_count - old_chan_count;
 }
 
@@ -355,6 +356,7 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
                spin_unlock(&ses->chan_lock);
                goto out;
        }
+       chan->iface = iface;
        ses->chan_count++;
        atomic_set(&ses->chan_seq, 0);
 
index 8543caf..db4ccf1 100644 (file)
@@ -512,73 +512,41 @@ smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
 static int
 parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
                        size_t buf_len,
-                       struct cifs_server_iface **iface_list,
-                       size_t *iface_count)
+                       struct cifs_ses *ses)
 {
        struct network_interface_info_ioctl_rsp *p;
        struct sockaddr_in *addr4;
        struct sockaddr_in6 *addr6;
        struct iface_info_ipv4 *p4;
        struct iface_info_ipv6 *p6;
-       struct cifs_server_iface *info;
+       struct cifs_server_iface *info = NULL, *iface = NULL, *niface = NULL;
+       struct cifs_server_iface tmp_iface;
        ssize_t bytes_left;
        size_t next = 0;
        int nb_iface = 0;
-       int rc = 0;
-
-       *iface_list = NULL;
-       *iface_count = 0;
-
-       /*
-        * Fist pass: count and sanity check
-        */
+       int rc = 0, ret = 0;
 
        bytes_left = buf_len;
        p = buf;
-       while (bytes_left >= sizeof(*p)) {
-               nb_iface++;
-               next = le32_to_cpu(p->Next);
-               if (!next) {
-                       bytes_left -= sizeof(*p);
-                       break;
-               }
-               p = (struct network_interface_info_ioctl_rsp *)((u8 *)p+next);
-               bytes_left -= next;
-       }
-
-       if (!nb_iface) {
-               cifs_dbg(VFS, "%s: malformed interface info\n", __func__);
-               rc = -EINVAL;
-               goto out;
-       }
-
-       /* Azure rounds the buffer size up 8, to a 16 byte boundary */
-       if ((bytes_left > 8) || p->Next)
-               cifs_dbg(VFS, "%s: incomplete interface info\n", __func__);
-
 
+       spin_lock(&ses->iface_lock);
        /*
-        * Second pass: extract info to internal structure
+        * Go through iface_list and do kref_put to remove
+        * any unused ifaces. ifaces in use will be removed
+        * when the last user calls a kref_put on it
         */
-
-       *iface_list = kcalloc(nb_iface, sizeof(**iface_list), GFP_KERNEL);
-       if (!*iface_list) {
-               rc = -ENOMEM;
-               goto out;
+       list_for_each_entry_safe(iface, niface, &ses->iface_list,
+                                iface_head) {
+               iface->is_active = 0;
+               kref_put(&iface->refcount, release_iface);
        }
+       spin_unlock(&ses->iface_lock);
 
-       info = *iface_list;
-       bytes_left = buf_len;
-       p = buf;
        while (bytes_left >= sizeof(*p)) {
-               info->speed = le64_to_cpu(p->LinkSpeed);
-               info->rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE) ? 1 : 0;
-               info->rss_capable = le32_to_cpu(p->Capability & RSS_CAPABLE) ? 1 : 0;
-
-               cifs_dbg(FYI, "%s: adding iface %zu\n", __func__, *iface_count);
-               cifs_dbg(FYI, "%s: speed %zu bps\n", __func__, info->speed);
-               cifs_dbg(FYI, "%s: capabilities 0x%08x\n", __func__,
-                        le32_to_cpu(p->Capability));
+               memset(&tmp_iface, 0, sizeof(tmp_iface));
+               tmp_iface.speed = le64_to_cpu(p->LinkSpeed);
+               tmp_iface.rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE) ? 1 : 0;
+               tmp_iface.rss_capable = le32_to_cpu(p->Capability & RSS_CAPABLE) ? 1 : 0;
 
                switch (p->Family) {
                /*
@@ -587,7 +555,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
                 * conversion explicit in case either one changes.
                 */
                case INTERNETWORK:
-                       addr4 = (struct sockaddr_in *)&info->sockaddr;
+                       addr4 = (struct sockaddr_in *)&tmp_iface.sockaddr;
                        p4 = (struct iface_info_ipv4 *)p->Buffer;
                        addr4->sin_family = AF_INET;
                        memcpy(&addr4->sin_addr, &p4->IPv4Address, 4);
@@ -599,7 +567,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
                                 &addr4->sin_addr);
                        break;
                case INTERNETWORKV6:
-                       addr6 = (struct sockaddr_in6 *)&info->sockaddr;
+                       addr6 = (struct sockaddr_in6 *)&tmp_iface.sockaddr;
                        p6 = (struct iface_info_ipv6 *)p->Buffer;
                        addr6->sin6_family = AF_INET6;
                        memcpy(&addr6->sin6_addr, &p6->IPv6Address, 16);
@@ -619,36 +587,88 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
                        goto next_iface;
                }
 
-               (*iface_count)++;
-               info++;
+               /*
+                * The iface_list is assumed to be sorted by speed.
+                * Check if the new interface exists in that list.
+                * NEVER change iface. it could be in use.
+                * Add a new one instead
+                */
+               spin_lock(&ses->iface_lock);
+               iface = niface = NULL;
+               list_for_each_entry_safe(iface, niface, &ses->iface_list,
+                                        iface_head) {
+                       ret = iface_cmp(iface, &tmp_iface);
+                       if (!ret) {
+                               /* just get a ref so that it doesn't get picked/freed */
+                               iface->is_active = 1;
+                               kref_get(&iface->refcount);
+                               spin_unlock(&ses->iface_lock);
+                               goto next_iface;
+                       } else if (ret < 0) {
+                               /* all remaining ifaces are slower */
+                               kref_get(&iface->refcount);
+                               break;
+                       }
+               }
+               spin_unlock(&ses->iface_lock);
+
+               /* no match. insert the entry in the list */
+               info = kmalloc(sizeof(struct cifs_server_iface),
+                              GFP_KERNEL);
+               if (!info) {
+                       rc = -ENOMEM;
+                       goto out;
+               }
+               memcpy(info, &tmp_iface, sizeof(tmp_iface));
+
+               /* add this new entry to the list */
+               kref_init(&info->refcount);
+               info->is_active = 1;
+
+               cifs_dbg(FYI, "%s: adding iface %zu\n", __func__, ses->iface_count);
+               cifs_dbg(FYI, "%s: speed %zu bps\n", __func__, info->speed);
+               cifs_dbg(FYI, "%s: capabilities 0x%08x\n", __func__,
+                        le32_to_cpu(p->Capability));
+
+               spin_lock(&ses->iface_lock);
+               if (!list_entry_is_head(iface, &ses->iface_list, iface_head)) {
+                       list_add_tail(&info->iface_head, &iface->iface_head);
+                       kref_put(&iface->refcount, release_iface);
+               } else
+                       list_add_tail(&info->iface_head, &ses->iface_list);
+               spin_unlock(&ses->iface_lock);
+
+               ses->iface_count++;
+               ses->iface_last_update = jiffies;
 next_iface:
+               nb_iface++;
                next = le32_to_cpu(p->Next);
-               if (!next)
+               if (!next) {
+                       bytes_left -= sizeof(*p);
                        break;
+               }
                p = (struct network_interface_info_ioctl_rsp *)((u8 *)p+next);
                bytes_left -= next;
        }
 
-       if (!*iface_count) {
+       if (!nb_iface) {
+               cifs_dbg(VFS, "%s: malformed interface info\n", __func__);
                rc = -EINVAL;
                goto out;
        }
 
-out:
-       if (rc) {
-               kfree(*iface_list);
-               *iface_count = 0;
-               *iface_list = NULL;
-       }
-       return rc;
-}
+       /* Azure rounds the buffer size up 8, to a 16 byte boundary */
+       if ((bytes_left > 8) || p->Next)
+               cifs_dbg(VFS, "%s: incomplete interface info\n", __func__);
 
-static int compare_iface(const void *ia, const void *ib)
-{
-       const struct cifs_server_iface *a = (struct cifs_server_iface *)ia;
-       const struct cifs_server_iface *b = (struct cifs_server_iface *)ib;
 
-       return a->speed == b->speed ? 0 : (a->speed > b->speed ? -1 : 1);
+       if (!ses->iface_count) {
+               rc = -EINVAL;
+               goto out;
+       }
+
+out:
+       return rc;
 }
 
 static int
@@ -657,8 +677,6 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
        int rc;
        unsigned int ret_data_len = 0;
        struct network_interface_info_ioctl_rsp *out_buf = NULL;
-       struct cifs_server_iface *iface_list;
-       size_t iface_count;
        struct cifs_ses *ses = tcon->ses;
 
        rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
@@ -674,21 +692,10 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
                goto out;
        }
 
-       rc = parse_server_interfaces(out_buf, ret_data_len,
-                                    &iface_list, &iface_count);
+       rc = parse_server_interfaces(out_buf, ret_data_len, ses);
        if (rc)
                goto out;
 
-       /* sort interfaces from fastest to slowest */
-       sort(iface_list, iface_count, sizeof(*iface_list), compare_iface, NULL);
-
-       spin_lock(&ses->iface_lock);
-       kfree(ses->iface_list);
-       ses->iface_list = iface_list;
-       ses->iface_count = iface_count;
-       ses->iface_last_update = jiffies;
-       spin_unlock(&ses->iface_lock);
-
 out:
        kfree(out_buf);
        return rc;