mptcp: introduce MPTCP_FULL_INFO getsockopt
authorPaolo Abeni <pabeni@redhat.com>
Tue, 20 Jun 2023 16:30:18 +0000 (18:30 +0200)
committerJakub Kicinski <kuba@kernel.org>
Thu, 22 Jun 2023 05:45:57 +0000 (22:45 -0700)
Some user-space applications want to monitor the subflows utilization.

Dumping the per subflow tcp_info is not enough, as the PM could close
and re-create the subflows under-the-hood, fooling the accounting.
Even checking the src/dst addresses used by each subflow could not
be enough, because new subflows could re-use the same address/port of
the just closed one.

This patch introduces a new socket option, allow dumping all the relevant
information all-at-once (everything, everywhere...), in a consistent
manner.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/388
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
include/uapi/linux/mptcp.h
net/mptcp/sockopt.c

index a124be6..ee9c49f 100644 (file)
@@ -249,9 +249,33 @@ struct mptcp_subflow_addrs {
        };
 };
 
+struct mptcp_subflow_info {
+       __u32                           id;
+       struct mptcp_subflow_addrs      addrs;
+};
+
+struct mptcp_full_info {
+       __u32           size_tcpinfo_kernel;    /* must be 0, set by kernel */
+       __u32           size_tcpinfo_user;
+       __u32           size_sfinfo_kernel;     /* must be 0, set by kernel */
+       __u32           size_sfinfo_user;
+       __u32           num_subflows;           /* must be 0, set by kernel (real subflow count) */
+       __u32           size_arrays_user;       /* max subflows that userspace is interested in;
+                                                * the buffers at subflow_info/tcp_info
+                                                * are respectively at least:
+                                                *  size_arrays * size_sfinfo_user
+                                                *  size_arrays * size_tcpinfo_user
+                                                * bytes wide
+                                                */
+       __aligned_u64           subflow_info;
+       __aligned_u64           tcp_info;
+       struct mptcp_info       mptcp_info;
+};
+
 /* MPTCP socket options */
 #define MPTCP_INFO             1
 #define MPTCP_TCPINFO          2
 #define MPTCP_SUBFLOW_ADDRS    3
+#define MPTCP_FULL_INFO                4
 
 #endif /* _UAPI_MPTCP_H */
index fa5055d..63f7a09 100644 (file)
@@ -14,7 +14,8 @@
 #include <net/mptcp.h>
 #include "protocol.h"
 
-#define MIN_INFO_OPTLEN_SIZE   16
+#define MIN_INFO_OPTLEN_SIZE           16
+#define MIN_FULL_INFO_OPTLEN_SIZE      40
 
 static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
 {
@@ -981,7 +982,8 @@ static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd,
 }
 
 static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd,
-                                 char __user *optval, int __user *optlen)
+                                 char __user *optval,
+                                 int __user *optlen)
 {
        int len, copylen;
 
@@ -1162,6 +1164,125 @@ static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *o
        return 0;
 }
 
+static int mptcp_get_full_info(struct mptcp_full_info *mfi,
+                              char __user *optval,
+                              int __user *optlen)
+{
+       int len;
+
+       BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) !=
+                    MIN_FULL_INFO_OPTLEN_SIZE);
+
+       if (get_user(len, optlen))
+               return -EFAULT;
+
+       if (len < MIN_FULL_INFO_OPTLEN_SIZE)
+               return -EINVAL;
+
+       memset(mfi, 0, sizeof(*mfi));
+       if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE))
+               return -EFAULT;
+
+       if (mfi->size_tcpinfo_kernel ||
+           mfi->size_sfinfo_kernel ||
+           mfi->num_subflows)
+               return -EINVAL;
+
+       if (mfi->size_sfinfo_user > INT_MAX ||
+           mfi->size_tcpinfo_user > INT_MAX)
+               return -EINVAL;
+
+       return len - MIN_FULL_INFO_OPTLEN_SIZE;
+}
+
+static int mptcp_put_full_info(struct mptcp_full_info *mfi,
+                              char __user *optval,
+                              u32 copylen,
+                              int __user *optlen)
+{
+       copylen += MIN_FULL_INFO_OPTLEN_SIZE;
+       if (put_user(copylen, optlen))
+               return -EFAULT;
+
+       if (copy_to_user(optval, mfi, copylen))
+               return -EFAULT;
+       return 0;
+}
+
+static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval,
+                                     int __user *optlen)
+{
+       unsigned int sfcount = 0, copylen = 0;
+       struct mptcp_subflow_context *subflow;
+       struct sock *sk = (struct sock *)msk;
+       void __user *tcpinfoptr, *sfinfoptr;
+       struct mptcp_full_info mfi;
+       int len;
+
+       len = mptcp_get_full_info(&mfi, optval, optlen);
+       if (len < 0)
+               return len;
+
+       /* don't bother filling the mptcp info if there is not enough
+        * user-space-provided storage
+        */
+       if (len > 0) {
+               mptcp_diag_fill_info(msk, &mfi.mptcp_info);
+               copylen += min_t(unsigned int, len, sizeof(struct mptcp_info));
+       }
+
+       mfi.size_tcpinfo_kernel = sizeof(struct tcp_info);
+       mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user,
+                                     sizeof(struct tcp_info));
+       sfinfoptr = u64_to_user_ptr(mfi.subflow_info);
+       mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info);
+       mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user,
+                                    sizeof(struct mptcp_subflow_info));
+       tcpinfoptr = u64_to_user_ptr(mfi.tcp_info);
+
+       lock_sock(sk);
+       mptcp_for_each_subflow(msk, subflow) {
+               struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+               struct mptcp_subflow_info sfinfo;
+               struct tcp_info tcp_info;
+
+               if (sfcount++ >= mfi.size_arrays_user)
+                       continue;
+
+               /* fetch addr/tcp_info only if the user space buffers
+                * are wide enough
+                */
+               memset(&sfinfo, 0, sizeof(sfinfo));
+               sfinfo.id = subflow->subflow_id;
+               if (mfi.size_sfinfo_user >
+                   offsetof(struct mptcp_subflow_info, addrs))
+                       mptcp_get_sub_addrs(ssk, &sfinfo.addrs);
+               if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user))
+                       goto fail_release;
+
+               if (mfi.size_tcpinfo_user) {
+                       tcp_get_info(ssk, &tcp_info);
+                       if (copy_to_user(tcpinfoptr, &tcp_info,
+                                        mfi.size_tcpinfo_user))
+                               goto fail_release;
+               }
+
+               tcpinfoptr += mfi.size_tcpinfo_user;
+               sfinfoptr += mfi.size_sfinfo_user;
+       }
+       release_sock(sk);
+
+       mfi.num_subflows = sfcount;
+       if (mptcp_put_full_info(&mfi, optval, copylen, optlen))
+               return -EFAULT;
+
+       return 0;
+
+fail_release:
+       release_sock(sk);
+       return -EFAULT;
+}
+
 static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval,
                                int __user *optlen, int val)
 {
@@ -1235,6 +1356,8 @@ static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname,
        switch (optname) {
        case MPTCP_INFO:
                return mptcp_getsockopt_info(msk, optval, optlen);
+       case MPTCP_FULL_INFO:
+               return mptcp_getsockopt_full_info(msk, optval, optlen);
        case MPTCP_TCPINFO:
                return mptcp_getsockopt_tcpinfo(msk, optval, optlen);
        case MPTCP_SUBFLOW_ADDRS: