mptcp: refactor push_pending logic
authorGeliang Tang <geliang.tang@suse.com>
Mon, 21 Aug 2023 22:25:12 +0000 (15:25 -0700)
committerJakub Kicinski <kuba@kernel.org>
Wed, 23 Aug 2023 00:31:18 +0000 (17:31 -0700)
To support redundant package schedulers more easily, this patch refactors
__mptcp_push_pending() logic from:

For each dfrag:
While sends succeed:
Call the scheduler (selects subflow and msk->snd_burst)
Update subflow locks (push/release/acquire as needed)
Send the dfrag data with mptcp_sendmsg_frag()
Update already_sent, snd_nxt, snd_burst
Update msk->first_pending
Push/release on final subflow

->

While first_pending isn't empty:
Call the scheduler (selects subflow and msk->snd_burst)
Update subflow locks (push/release/acquire as needed)
For each pending dfrag:
While sends succeed:
Send the dfrag data with mptcp_sendmsg_frag()
Update already_sent, snd_nxt, snd_burst
Update msk->first_pending
Break if required by msk->snd_burst / etc
Push/release on final subflow

Refactors __mptcp_subflow_push_pending logic from:

For each dfrag:
While sends succeed:
Call the scheduler (selects subflow and msk->snd_burst)
Send the dfrag data with mptcp_subflow_delegate(), break
Send the dfrag data with mptcp_sendmsg_frag()
Update dfrag->already_sent, msk->snd_nxt, msk->snd_burst
Update msk->first_pending

->

While first_pending isn't empty:
Call the scheduler (selects subflow and msk->snd_burst)
Send the dfrag data with mptcp_subflow_delegate(), break
Send the dfrag data with mptcp_sendmsg_frag()
For each pending dfrag:
While sends succeed:
Send the dfrag data with mptcp_sendmsg_frag()
Update already_sent, snd_nxt, snd_burst
Update msk->first_pending
Break if required by msk->snd_burst / etc

Move the duplicate code from __mptcp_push_pending() and
__mptcp_subflow_push_pending() into a new helper function, named
__subflow_push_pending(). Simplify __mptcp_push_pending() and
__mptcp_subflow_push_pending() by invoking this helper.

Also move the burst check conditions out of the function
mptcp_subflow_get_send(), check them in __subflow_push_pending() in
the inner "for each pending dfrag" loop.

Reviewed-by: Mat Martineau <martineau@kernel.org>
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Mat Martineau <martineau@kernel.org>
Link: https://lore.kernel.org/r/20230821-upstream-net-next-20230818-v1-1-0c860fb256a8@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
net/mptcp/protocol.c

index 6019a3cf1625f83c5ec9b78d58d3e8208a4b81cf..29c662ffcd05b91cf25c14c31ac59bf9f2582acc 100644 (file)
@@ -1386,14 +1386,6 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
                       sk_stream_memory_free(msk->first) ? msk->first : NULL;
        }
 
-       /* re-use last subflow, if the burst allow that */
-       if (msk->last_snd && msk->snd_burst > 0 &&
-           sk_stream_memory_free(msk->last_snd) &&
-           mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd))) {
-               mptcp_set_timeout(sk);
-               return msk->last_snd;
-       }
-
        /* pick the subflow with the lower wmem/wspace ratio */
        for (i = 0; i < SSK_MODE_MAX; ++i) {
                send_info[i].ssk = NULL;
@@ -1499,57 +1491,86 @@ void mptcp_check_and_set_pending(struct sock *sk)
                mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING);
 }
 
-void __mptcp_push_pending(struct sock *sk, unsigned int flags)
+static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
+                                 struct mptcp_sendmsg_info *info)
 {
-       struct sock *prev_ssk = NULL, *ssk = NULL;
        struct mptcp_sock *msk = mptcp_sk(sk);
-       struct mptcp_sendmsg_info info = {
-                               .flags = flags,
-       };
-       bool do_check_data_fin = false;
        struct mptcp_data_frag *dfrag;
-       int len;
+       int len, copied = 0, err = 0;
 
        while ((dfrag = mptcp_send_head(sk))) {
-               info.sent = dfrag->already_sent;
-               info.limit = dfrag->data_len;
+               info->sent = dfrag->already_sent;
+               info->limit = dfrag->data_len;
                len = dfrag->data_len - dfrag->already_sent;
                while (len > 0) {
                        int ret = 0;
 
-                       prev_ssk = ssk;
-                       ssk = mptcp_subflow_get_send(msk);
-
-                       /* First check. If the ssk has changed since
-                        * the last round, release prev_ssk
-                        */
-                       if (ssk != prev_ssk && prev_ssk)
-                               mptcp_push_release(prev_ssk, &info);
-                       if (!ssk)
-                               goto out;
-
-                       /* Need to lock the new subflow only if different
-                        * from the previous one, otherwise we are still
-                        * helding the relevant lock
-                        */
-                       if (ssk != prev_ssk)
-                               lock_sock(ssk);
-
-                       ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
+                       ret = mptcp_sendmsg_frag(sk, ssk, dfrag, info);
                        if (ret <= 0) {
-                               if (ret == -EAGAIN)
-                                       continue;
-                               mptcp_push_release(ssk, &info);
+                               err = copied ? : ret;
                                goto out;
                        }
 
-                       do_check_data_fin = true;
-                       info.sent += ret;
+                       info->sent += ret;
+                       copied += ret;
                        len -= ret;
 
                        mptcp_update_post_push(msk, dfrag, ret);
                }
                WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
+
+               if (msk->snd_burst <= 0 ||
+                   !sk_stream_memory_free(ssk) ||
+                   !mptcp_subflow_active(mptcp_subflow_ctx(ssk))) {
+                       err = copied;
+                       goto out;
+               }
+               mptcp_set_timeout(sk);
+       }
+       err = copied;
+
+out:
+       return err;
+}
+
+void __mptcp_push_pending(struct sock *sk, unsigned int flags)
+{
+       struct sock *prev_ssk = NULL, *ssk = NULL;
+       struct mptcp_sock *msk = mptcp_sk(sk);
+       struct mptcp_sendmsg_info info = {
+                               .flags = flags,
+       };
+       bool do_check_data_fin = false;
+
+       while (mptcp_send_head(sk)) {
+               int ret = 0;
+
+               prev_ssk = ssk;
+               ssk = mptcp_subflow_get_send(msk);
+
+               /* First check. If the ssk has changed since
+                * the last round, release prev_ssk
+                */
+               if (ssk != prev_ssk && prev_ssk)
+                       mptcp_push_release(prev_ssk, &info);
+               if (!ssk)
+                       goto out;
+
+               /* Need to lock the new subflow only if different
+                * from the previous one, otherwise we are still
+                * helding the relevant lock
+                */
+               if (ssk != prev_ssk)
+                       lock_sock(ssk);
+
+               ret = __subflow_push_pending(sk, ssk, &info);
+               if (ret <= 0) {
+                       if (ret == -EAGAIN)
+                               continue;
+                       mptcp_push_release(ssk, &info);
+                       goto out;
+               }
+               do_check_data_fin = true;
        }
 
        /* at this point we held the socket lock for the last subflow we used */
@@ -1570,42 +1591,30 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool
        struct mptcp_sendmsg_info info = {
                .data_lock_held = true,
        };
-       struct mptcp_data_frag *dfrag;
        struct sock *xmit_ssk;
-       int len, copied = 0;
+       int copied = 0;
 
        info.flags = 0;
-       while ((dfrag = mptcp_send_head(sk))) {
-               info.sent = dfrag->already_sent;
-               info.limit = dfrag->data_len;
-               len = dfrag->data_len - dfrag->already_sent;
-               while (len > 0) {
-                       int ret = 0;
-
-                       /* check for a different subflow usage only after
-                        * spooling the first chunk of data
-                        */
-                       xmit_ssk = first ? ssk : mptcp_subflow_get_send(msk);
-                       if (!xmit_ssk)
-                               goto out;
-                       if (xmit_ssk != ssk) {
-                               mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk),
-                                                      MPTCP_DELEGATE_SEND);
-                               goto out;
-                       }
-
-                       ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
-                       if (ret <= 0)
-                               goto out;
+       while (mptcp_send_head(sk)) {
+               int ret = 0;
 
-                       info.sent += ret;
-                       copied += ret;
-                       len -= ret;
-                       first = false;
-
-                       mptcp_update_post_push(msk, dfrag, ret);
+               /* check for a different subflow usage only after
+                * spooling the first chunk of data
+                */
+               xmit_ssk = first ? ssk : mptcp_subflow_get_send(msk);
+               if (!xmit_ssk)
+                       goto out;
+               if (xmit_ssk != ssk) {
+                       mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk),
+                                              MPTCP_DELEGATE_SEND);
+                       goto out;
                }
-               WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
+
+               ret = __subflow_push_pending(sk, ssk, &info);
+               first = false;
+               if (ret <= 0)
+                       break;
+               copied += ret;
        }
 
 out: