mptcp_sockopt_sync_locked(msk, ssk);
mptcp_subflow_joined(msk, ssk);
mptcp_stop_tout_timer(sk);
+ __mptcp_propagate_sndbuf(sk, ssk);
return true;
}
struct mptcp_sock *msk = mptcp_sk(sk);
bool first = true;
- sk_stream_moderate_sndbuf(sk);
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
if (first)
tcp_enter_memory_pressure(ssk);
sk_stream_moderate_sndbuf(ssk);
+
first = false;
}
+ __mptcp_sync_sndbuf(sk);
}
/* ensure we get enough memory for the frag hdr, beyond some minimal amount of
WRITE_ONCE(msk->first, NULL);
out:
+ __mptcp_sync_sndbuf(sk);
if (need_push)
__mptcp_push_pending(sk, 0);
* uses the correct data
*/
mptcp_copy_inaddrs(nsk, ssk);
- mptcp_propagate_sndbuf(nsk, ssk);
+ __mptcp_propagate_sndbuf(nsk, ssk);
mptcp_rcv_space_init(msk, ssk);
bh_unlock_sock(nsk);
__mptcp_set_connected(sk);
if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags))
__mptcp_error_report(sk);
+ if (__test_and_clear_bit(MPTCP_SYNC_SNDBUF, &msk->cb_flags))
+ __mptcp_sync_sndbuf(sk);
}
__mptcp_update_rmem(sk);
__set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags);
mptcp_data_unlock(sk);
}
+ if (status & BIT(MPTCP_DELEGATE_SNDBUF)) {
+ mptcp_data_lock(sk);
+ if (!sock_owned_by_user(sk))
+ __mptcp_sync_sndbuf(sk);
+ else
+ __set_bit(MPTCP_SYNC_SNDBUF, &mptcp_sk(sk)->cb_flags);
+ mptcp_data_unlock(sk);
+ }
if (status & BIT(MPTCP_DELEGATE_ACK))
schedule_3rdack_retransmission(ssk);
}
/* active subflow, already present inside the conn_list */
if (!list_empty(&subflow->node)) {
mptcp_subflow_joined(msk, ssk);
+ mptcp_propagate_sndbuf(parent, ssk);
return true;
}
#define MPTCP_RETRANSMIT 4
#define MPTCP_FLUSH_JOIN_LIST 5
#define MPTCP_CONNECTED 6
+#define MPTCP_SYNC_SNDBUF 7
struct mptcp_skb_cb {
u64 map_seq;
#define MPTCP_DELEGATE_SCHEDULED 0
#define MPTCP_DELEGATE_SEND 1
#define MPTCP_DELEGATE_ACK 2
+#define MPTCP_DELEGATE_SNDBUF 3
#define MPTCP_DELEGATE_ACTIONS_MASK (~BIT(MPTCP_DELEGATE_SCHEDULED))
/* MPTCP subflow context */
u32 setsockopt_seq;
u32 stale_rcv_tstamp;
+ int cached_sndbuf; /* sndbuf size when last synced with the msk sndbuf,
+ * protected by the msk socket lock
+ */
struct sock *tcp_sock; /* tcp sk backpointer */
struct sock *conn; /* parent mptcp_sock */
READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt);
}
-static inline bool mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
+static inline void __mptcp_sync_sndbuf(struct sock *sk)
{
- if ((sk->sk_userlocks & SOCK_SNDBUF_LOCK) || ssk->sk_sndbuf <= READ_ONCE(sk->sk_sndbuf))
- return false;
+ struct mptcp_subflow_context *subflow;
+ int ssk_sndbuf, new_sndbuf;
+
+ if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
+ return;
+
+ new_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[0];
+ mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
+ ssk_sndbuf = READ_ONCE(mptcp_subflow_tcp_sock(subflow)->sk_sndbuf);
+
+ subflow->cached_sndbuf = ssk_sndbuf;
+ new_sndbuf += ssk_sndbuf;
+ }
+
+ /* the msk max wmem limit is <nr_subflows> * tcp wmem[2] */
+ WRITE_ONCE(sk->sk_sndbuf, new_sndbuf);
+}
+
+/* The called held both the msk socket and the subflow socket locks,
+ * possibly under BH
+ */
+static inline void __mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+
+ if (READ_ONCE(ssk->sk_sndbuf) != subflow->cached_sndbuf)
+ __mptcp_sync_sndbuf(sk);
+}
+
+/* the caller held only the subflow socket lock, either in process or
+ * BH context. Additionally this can be called under the msk data lock,
+ * so we can't acquire such lock here: let the delegate action acquires
+ * the needed locks in suitable order.
+ */
+static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+
+ if (likely(READ_ONCE(ssk->sk_sndbuf) == subflow->cached_sndbuf))
+ return;
- WRITE_ONCE(sk->sk_sndbuf, ssk->sk_sndbuf);
- return true;
+ local_bh_disable();
+ mptcp_subflow_delegate(subflow, MPTCP_DELEGATE_SNDBUF);
+ local_bh_enable();
}
static inline void mptcp_write_space(struct sock *sk)
case SO_SNDBUFFORCE:
ssk->sk_userlocks |= SOCK_SNDBUF_LOCK;
WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf);
+ mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf;
break;
case SO_RCVBUF:
case SO_RCVBUFFORCE:
if (sk->sk_userlocks & tx_rx_locks) {
ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks;
- if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
+ if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) {
WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf);
+ mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf;
+ }
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf);
}
void __mptcp_set_connected(struct sock *sk)
{
+ __mptcp_propagate_sndbuf(sk, mptcp_sk(sk)->first);
if (sk->sk_state == TCP_SYN_SENT) {
inet_sk_state_store(sk, TCP_ESTABLISHED);
sk->sk_state_change(sk);
return;
msk = mptcp_sk(parent);
- mptcp_propagate_sndbuf(parent, sk);
subflow->rel_write_seq = 1;
subflow->conn_finished = 1;
subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
msk = mptcp_sk(parent);
if (subflow_simultaneous_connect(sk)) {
- mptcp_propagate_sndbuf(parent, sk);
mptcp_do_fallback(sk);
mptcp_rcv_space_init(msk, sk);
pr_fallback(msk);