mptcp: use get_send wrapper
authorGeliang Tang <geliang.tang@suse.com>
Mon, 21 Aug 2023 22:25:19 +0000 (15:25 -0700)
committerJakub Kicinski <kuba@kernel.org>
Wed, 23 Aug 2023 00:31:19 +0000 (17:31 -0700)
This patch adds the multiple subflows support for __mptcp_push_pending
and __mptcp_subflow_push_pending. Use get_send() wrapper instead of
mptcp_subflow_get_send() in them.

Check the subflow scheduled flags to test which subflow or subflows are
picked by the scheduler, use them to send data.

Move msk_owned_by_me() and fallback checks into get_send() wrapper from
mptcp_subflow_get_send().

This commit allows the scheduler to set the subflow->scheduled bit in
multiple subflows, but it does not allow for sending redundant data.
Multiple scheduled subflows will send sequential data on each subflow.

Reviewed-by: Mat Martineau <martineau@kernel.org>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <martineau@kernel.org>
Link: https://lore.kernel.org/r/20230821-upstream-net-next-20230818-v1-8-0c860fb256a8@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
net/mptcp/protocol.c
net/mptcp/sched.c

index 9cd172d..77e94ee 100644 (file)
@@ -1377,15 +1377,6 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
        u64 linger_time;
        long tout = 0;
 
-       msk_owned_by_me(msk);
-
-       if (__mptcp_check_fallback(msk)) {
-               if (!msk->first)
-                       return NULL;
-               return __tcp_can_send(msk->first) &&
-                      sk_stream_memory_free(msk->first) ? msk->first : NULL;
-       }
-
        /* pick the subflow with the lower wmem/wspace ratio */
        for (i = 0; i < SSK_MODE_MAX; ++i) {
                send_info[i].ssk = NULL;
@@ -1538,43 +1529,56 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
                                .flags = flags,
        };
        bool do_check_data_fin = false;
+       int push_count = 1;
 
-       while (mptcp_send_head(sk)) {
+       while (mptcp_send_head(sk) && (push_count > 0)) {
+               struct mptcp_subflow_context *subflow;
                int ret = 0;
 
-               prev_ssk = ssk;
-               ssk = mptcp_subflow_get_send(msk);
+               if (mptcp_sched_get_send(msk))
+                       break;
 
-               /* First check. If the ssk has changed since
-                * the last round, release prev_ssk
-                */
-               if (ssk != prev_ssk && prev_ssk)
-                       mptcp_push_release(prev_ssk, &info);
-               if (!ssk)
-                       goto out;
+               push_count = 0;
 
-               /* Need to lock the new subflow only if different
-                * from the previous one, otherwise we are still
-                * helding the relevant lock
-                */
-               if (ssk != prev_ssk)
-                       lock_sock(ssk);
+               mptcp_for_each_subflow(msk, subflow) {
+                       if (READ_ONCE(subflow->scheduled)) {
+                               mptcp_subflow_set_scheduled(subflow, false);
 
-               ret = __subflow_push_pending(sk, ssk, &info);
-               if (ret <= 0) {
-                       if (ret == -EAGAIN)
-                               continue;
-                       mptcp_push_release(ssk, &info);
-                       goto out;
+                               prev_ssk = ssk;
+                               ssk = mptcp_subflow_tcp_sock(subflow);
+                               if (ssk != prev_ssk) {
+                                       /* First check. If the ssk has changed since
+                                        * the last round, release prev_ssk
+                                        */
+                                       if (prev_ssk)
+                                               mptcp_push_release(prev_ssk, &info);
+
+                                       /* Need to lock the new subflow only if different
+                                        * from the previous one, otherwise we are still
+                                        * helding the relevant lock
+                                        */
+                                       lock_sock(ssk);
+                               }
+
+                               push_count++;
+
+                               ret = __subflow_push_pending(sk, ssk, &info);
+                               if (ret <= 0) {
+                                       if (ret != -EAGAIN ||
+                                           (1 << ssk->sk_state) &
+                                            (TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSE))
+                                               push_count--;
+                                       continue;
+                               }
+                               do_check_data_fin = true;
+                       }
                }
-               do_check_data_fin = true;
        }
 
        /* at this point we held the socket lock for the last subflow we used */
        if (ssk)
                mptcp_push_release(ssk, &info);
 
-out:
        /* ensure the rtx timer is running */
        if (!mptcp_timer_pending(sk))
                mptcp_reset_timer(sk);
@@ -1588,30 +1592,49 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool
        struct mptcp_sendmsg_info info = {
                .data_lock_held = true,
        };
+       bool keep_pushing = true;
        struct sock *xmit_ssk;
        int copied = 0;
 
        info.flags = 0;
-       while (mptcp_send_head(sk)) {
+       while (mptcp_send_head(sk) && keep_pushing) {
+               struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
                int ret = 0;
 
                /* check for a different subflow usage only after
                 * spooling the first chunk of data
                 */
-               xmit_ssk = first ? ssk : mptcp_subflow_get_send(msk);
-               if (!xmit_ssk)
-                       goto out;
-               if (xmit_ssk != ssk) {
-                       mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk),
-                                              MPTCP_DELEGATE_SEND);
+               if (first) {
+                       mptcp_subflow_set_scheduled(subflow, false);
+                       ret = __subflow_push_pending(sk, ssk, &info);
+                       first = false;
+                       if (ret <= 0)
+                               break;
+                       copied += ret;
+                       continue;
+               }
+
+               if (mptcp_sched_get_send(msk))
                        goto out;
+
+               if (READ_ONCE(subflow->scheduled)) {
+                       mptcp_subflow_set_scheduled(subflow, false);
+                       ret = __subflow_push_pending(sk, ssk, &info);
+                       if (ret <= 0)
+                               keep_pushing = false;
+                       copied += ret;
                }
 
-               ret = __subflow_push_pending(sk, ssk, &info);
-               first = false;
-               if (ret <= 0)
-                       break;
-               copied += ret;
+               mptcp_for_each_subflow(msk, subflow) {
+                       if (READ_ONCE(subflow->scheduled)) {
+                               xmit_ssk = mptcp_subflow_tcp_sock(subflow);
+                               if (xmit_ssk != ssk) {
+                                       mptcp_subflow_delegate(subflow,
+                                                              MPTCP_DELEGATE_SEND);
+                                       keep_pushing = false;
+                               }
+                       }
+               }
        }
 
 out:
index 8846066..078b5d4 100644 (file)
@@ -99,6 +99,19 @@ int mptcp_sched_get_send(struct mptcp_sock *msk)
        struct mptcp_subflow_context *subflow;
        struct mptcp_sched_data data;
 
+       msk_owned_by_me(msk);
+
+       /* the following check is moved out of mptcp_subflow_get_send */
+       if (__mptcp_check_fallback(msk)) {
+               if (msk->first &&
+                   __tcp_can_send(msk->first) &&
+                   sk_stream_memory_free(msk->first)) {
+                       mptcp_subflow_set_scheduled(mptcp_subflow_ctx(msk->first), true);
+                       return 0;
+               }
+               return -EINVAL;
+       }
+
        mptcp_for_each_subflow(msk, subflow) {
                if (READ_ONCE(subflow->scheduled))
                        return 0;