mptcp: better msk receive window updates
authorPaolo Abeni <pabeni@redhat.com>
Thu, 11 Feb 2021 23:30:41 +0000 (15:30 -0800)
committerDavid S. Miller <davem@davemloft.net>
Fri, 12 Feb 2021 02:30:54 +0000 (18:30 -0800)
Move mptcp_cleanup_rbuf() related checks inside the mentioned
helper and extend them to mirror TCP checks more closely.

Additionally drop the 'rmem_pending' hack, since commit 879526030c8b
("mptcp: protect the rx path with the msk socket spinlock") we
can use instead 'rmem_released'.

Fixes: ea4ca586b16f ("mptcp: refine MPTCP-level ack scheduling")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/mptcp/options.c
net/mptcp/protocol.c
net/mptcp/protocol.h

index e0d21c0..82a37cc 100644 (file)
@@ -498,8 +498,8 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
 {
        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
        struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+       u64 snd_data_fin_enable, ack_seq;
        unsigned int dss_size = 0;
-       u64 snd_data_fin_enable;
        struct mptcp_ext *mpext;
        unsigned int ack_size;
        bool ret = false;
@@ -531,13 +531,14 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
                return ret;
        }
 
+       ack_seq = READ_ONCE(msk->ack_seq);
        if (READ_ONCE(msk->use_64bit_ack)) {
                ack_size = TCPOLEN_MPTCP_DSS_ACK64;
-               opts->ext_copy.data_ack = READ_ONCE(msk->ack_seq);
+               opts->ext_copy.data_ack = ack_seq;
                opts->ext_copy.ack64 = 1;
        } else {
                ack_size = TCPOLEN_MPTCP_DSS_ACK32;
-               opts->ext_copy.data_ack32 = (uint32_t)READ_ONCE(msk->ack_seq);
+               opts->ext_copy.data_ack32 = (uint32_t)ack_seq;
                opts->ext_copy.ack64 = 0;
        }
        opts->ext_copy.use_ack = 1;
index c11fcf6..0dbf5cb 100644 (file)
@@ -457,7 +457,18 @@ static bool mptcp_subflow_cleanup_rbuf(struct sock *ssk)
 static void mptcp_cleanup_rbuf(struct mptcp_sock *msk)
 {
        struct sock *ack_hint = READ_ONCE(msk->ack_hint);
+       int old_space = READ_ONCE(msk->old_wspace);
        struct mptcp_subflow_context *subflow;
+       struct sock *sk = (struct sock *)msk;
+       bool cleanup;
+
+       /* this is a simple superset of what tcp_cleanup_rbuf() implements
+        * so that we don't have to acquire the ssk socket lock most of the time
+        * to do actually nothing
+        */
+       cleanup = __mptcp_space(sk) - old_space >= max(0, old_space);
+       if (!cleanup)
+               return;
 
        /* if the hinted ssk is still active, try to use it */
        if (likely(ack_hint)) {
@@ -1865,7 +1876,7 @@ static void __mptcp_splice_receive_queue(struct sock *sk)
        skb_queue_splice_tail_init(&sk->sk_receive_queue, &msk->receive_queue);
 }
 
-static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv)
+static bool __mptcp_move_skbs(struct mptcp_sock *msk)
 {
        struct sock *sk = (struct sock *)msk;
        unsigned int moved = 0;
@@ -1885,13 +1896,10 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv)
 
                slowpath = lock_sock_fast(ssk);
                mptcp_data_lock(sk);
+               __mptcp_update_rmem(sk);
                done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
                mptcp_data_unlock(sk);
-               if (moved && rcv) {
-                       WRITE_ONCE(msk->rmem_pending, min(rcv, moved));
-                       tcp_cleanup_rbuf(ssk, 1);
-                       WRITE_ONCE(msk->rmem_pending, 0);
-               }
+               tcp_cleanup_rbuf(ssk, moved);
                unlock_sock_fast(ssk, slowpath);
        } while (!done);
 
@@ -1904,6 +1912,7 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv)
                ret |= __mptcp_ofo_queue(msk);
                __mptcp_splice_receive_queue(sk);
                mptcp_data_unlock(sk);
+               mptcp_cleanup_rbuf(msk);
        }
        if (ret)
                mptcp_check_data_fin((struct sock *)msk);
@@ -1933,7 +1942,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
        target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
 
        while (copied < len) {
-               int bytes_read, old_space;
+               int bytes_read;
 
                bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied);
                if (unlikely(bytes_read < 0)) {
@@ -1944,14 +1953,11 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 
                copied += bytes_read;
 
-               if (skb_queue_empty(&msk->receive_queue) &&
-                   __mptcp_move_skbs(msk, len - copied))
-                       continue;
-
                /* be sure to advertise window change */
-               old_space = READ_ONCE(msk->old_wspace);
-               if ((tcp_space(sk) - old_space) >= old_space)
-                       mptcp_cleanup_rbuf(msk);
+               mptcp_cleanup_rbuf(msk);
+
+               if (skb_queue_empty(&msk->receive_queue) && __mptcp_move_skbs(msk))
+                       continue;
 
                /* only the master socket status is relevant here. The exit
                 * conditions mirror closely tcp_recvmsg()
@@ -1979,7 +1985,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
                                /* race breaker: the shutdown could be after the
                                 * previous receive queue check
                                 */
-                               if (__mptcp_move_skbs(msk, len - copied))
+                               if (__mptcp_move_skbs(msk))
                                        continue;
                                break;
                        }
@@ -2012,7 +2018,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
                /* .. race-breaker: ssk might have gotten new data
                 * after last __mptcp_move_skbs() returned false.
                 */
-               if (unlikely(__mptcp_move_skbs(msk, 0)))
+               if (unlikely(__mptcp_move_skbs(msk)))
                        set_bit(MPTCP_DATA_READY, &msk->flags);
        } else if (unlikely(!test_bit(MPTCP_DATA_READY, &msk->flags))) {
                /* data to read but mptcp_wait_data() cleared DATA_READY */
index 6a164ad..8d9f0ff 100644 (file)
@@ -234,7 +234,6 @@ struct mptcp_sock {
        u64             wnd_end;
        unsigned long   timer_ival;
        u32             token;
-       int             rmem_pending;
        int             rmem_released;
        unsigned long   flags;
        bool            can_ack;
@@ -293,7 +292,7 @@ static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
 
 static inline int __mptcp_space(const struct sock *sk)
 {
-       return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_pending);
+       return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_released);
 }
 
 static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)