mptcp: be careful on MPTCP-level ack.
authorPaolo Abeni <pabeni@redhat.com>
Tue, 24 Nov 2020 21:51:24 +0000 (22:51 +0100)
committerJakub Kicinski <kuba@kernel.org>
Wed, 25 Nov 2020 21:36:16 +0000 (13:36 -0800)
We can enter the main mptcp_recvmsg() loop even when
no subflows are connected. As note by Eric, that would
result in a divide by zero oops on ack generation.

Address the issue by checking the subflow status before
sending the ack.

Additionally protect mptcp_recvmsg() against invocation
with weird socket states.

v1 -> v2:
 - removed unneeded inline keyword - Jakub

Reported-and-suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Fixes: ea4ca586b16f ("mptcp: refine MPTCP-level ack scheduling")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Link: https://lore.kernel.org/r/5370c0ae03449239e3d1674ddcfb090cf6f20abe.1606253206.git.pabeni@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
net/mptcp/protocol.c

index dc97957..16e9cb1 100644 (file)
@@ -419,31 +419,57 @@ static bool mptcp_subflow_active(struct mptcp_subflow_context *subflow)
        return ((1 << ssk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT));
 }
 
-static void mptcp_send_ack(struct mptcp_sock *msk, bool force)
+static bool tcp_can_send_ack(const struct sock *ssk)
+{
+       return !((1 << inet_sk_state_load(ssk)) &
+              (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_TIME_WAIT | TCPF_CLOSE));
+}
+
+static void mptcp_send_ack(struct mptcp_sock *msk)
 {
        struct mptcp_subflow_context *subflow;
-       struct sock *pick = NULL;
 
        mptcp_for_each_subflow(msk, subflow) {
                struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 
-               if (force) {
-                       lock_sock(ssk);
+               lock_sock(ssk);
+               if (tcp_can_send_ack(ssk))
                        tcp_send_ack(ssk);
-                       release_sock(ssk);
-                       continue;
-               }
-
-               /* if the hintes ssk is still active, use it */
-               pick = ssk;
-               if (ssk == msk->ack_hint)
-                       break;
+               release_sock(ssk);
        }
-       if (!force && pick) {
-               lock_sock(pick);
-               tcp_cleanup_rbuf(pick, 1);
-               release_sock(pick);
+}
+
+static bool mptcp_subflow_cleanup_rbuf(struct sock *ssk)
+{
+       int ret;
+
+       lock_sock(ssk);
+       ret = tcp_can_send_ack(ssk);
+       if (ret)
+               tcp_cleanup_rbuf(ssk, 1);
+       release_sock(ssk);
+       return ret;
+}
+
+static void mptcp_cleanup_rbuf(struct mptcp_sock *msk)
+{
+       struct mptcp_subflow_context *subflow;
+
+       /* if the hinted ssk is still active, try to use it */
+       if (likely(msk->ack_hint)) {
+               mptcp_for_each_subflow(msk, subflow) {
+                       struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+                       if (msk->ack_hint == ssk &&
+                           mptcp_subflow_cleanup_rbuf(ssk))
+                               return;
+               }
        }
+
+       /* otherwise pick the first active subflow */
+       mptcp_for_each_subflow(msk, subflow)
+               if (mptcp_subflow_cleanup_rbuf(mptcp_subflow_tcp_sock(subflow)))
+                       return;
 }
 
 static bool mptcp_check_data_fin(struct sock *sk)
@@ -494,7 +520,7 @@ static bool mptcp_check_data_fin(struct sock *sk)
 
                ret = true;
                mptcp_set_timeout(sk, NULL);
-               mptcp_send_ack(msk, true);
+               mptcp_send_ack(msk);
                mptcp_close_wake_up(sk);
        }
        return ret;
@@ -1579,6 +1605,11 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
                return -EOPNOTSUPP;
 
        lock_sock(sk);
+       if (unlikely(sk->sk_state == TCP_LISTEN)) {
+               copied = -ENOTCONN;
+               goto out_err;
+       }
+
        timeo = sock_rcvtimeo(sk, nonblock);
 
        len = min_t(size_t, len, INT_MAX);
@@ -1604,7 +1635,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
                /* be sure to advertise window change */
                old_space = READ_ONCE(msk->old_wspace);
                if ((tcp_space(sk) - old_space) >= old_space)
-                       mptcp_send_ack(msk, false);
+                       mptcp_cleanup_rbuf(msk);
 
                /* only the master socket status is relevant here. The exit
                 * conditions mirror closely tcp_recvmsg()