tcp: bpf: Add TCP_BPF_DELACK_MAX setsockopt
authorMartin KaFai Lau <kafai@fb.com>
Thu, 20 Aug 2020 19:00:21 +0000 (12:00 -0700)
committerAlexei Starovoitov <ast@kernel.org>
Mon, 24 Aug 2020 21:34:59 +0000 (14:34 -0700)
This change is mostly from an internal patch and adapts it from sysctl
config to the bpf_setsockopt setup.

The bpf_prog can set the max delay ack by using
bpf_setsockopt(TCP_BPF_DELACK_MAX).  This max delay ack can be communicated
to its peer through bpf header option.  The receiving peer can then use
this max delay ack and set a potentially lower rto by using
bpf_setsockopt(TCP_BPF_RTO_MIN) which will be introduced
in the next patch.

Another later selftest patch will also use it like the above to show
how to write and parse bpf tcp header option.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200820190021.2884000-1-kafai@fb.com
include/net/inet_connection_sock.h
include/uapi/linux/bpf.h
net/core/filter.c
net/ipv4/tcp.c
net/ipv4/tcp_output.c
tools/include/uapi/linux/bpf.h

index aa8893c..da7264a 100644 (file)
@@ -86,6 +86,7 @@ struct inet_connection_sock {
        struct timer_list         icsk_retransmit_timer;
        struct timer_list         icsk_delack_timer;
        __u32                     icsk_rto;
+       __u32                     icsk_delack_max;
        __u32                     icsk_pmtu_cookie;
        const struct tcp_congestion_ops *icsk_ca_ops;
        const struct inet_connection_sock_af_ops *icsk_af_ops;
index a1bbaff..7b905cb 100644 (file)
@@ -4257,6 +4257,7 @@ enum {
 enum {
        TCP_BPF_IW              = 1001, /* Set TCP initial congestion window */
        TCP_BPF_SNDCWND_CLAMP   = 1002, /* Set sndcwnd_clamp */
+       TCP_BPF_DELACK_MAX      = 1003, /* Max delay ack in usecs */
 };
 
 struct bpf_perf_event_value {
index c847b12..80fe742 100644 (file)
@@ -4459,6 +4459,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
                } else {
                        struct inet_connection_sock *icsk = inet_csk(sk);
                        struct tcp_sock *tp = tcp_sk(sk);
+                       unsigned long timeout;
 
                        if (optlen != sizeof(int))
                                return -EINVAL;
@@ -4480,6 +4481,13 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
                                        tp->snd_ssthresh = val;
                                }
                                break;
+                       case TCP_BPF_DELACK_MAX:
+                               timeout = usecs_to_jiffies(val);
+                               if (timeout > TCP_DELACK_MAX ||
+                                   timeout < TCP_TIMEOUT_MIN)
+                                       return -EINVAL;
+                               inet_csk(sk)->icsk_delack_max = timeout;
+                               break;
                        case TCP_SAVE_SYN:
                                if (val < 0 || val > 1)
                                        ret = -EINVAL;
index 87d3036..44c353a 100644 (file)
@@ -418,6 +418,7 @@ void tcp_init_sock(struct sock *sk)
        INIT_LIST_HEAD(&tp->tsorted_sent_queue);
 
        icsk->icsk_rto = TCP_TIMEOUT_INIT;
+       icsk->icsk_delack_max = TCP_DELACK_MAX;
        tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
        minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U);
 
@@ -2685,6 +2686,7 @@ int tcp_disconnect(struct sock *sk, int flags)
        icsk->icsk_backoff = 0;
        icsk->icsk_probes_out = 0;
        icsk->icsk_rto = TCP_TIMEOUT_INIT;
+       icsk->icsk_delack_max = TCP_DELACK_MAX;
        tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
        tp->snd_cwnd = TCP_INIT_CWND;
        tp->snd_cwnd_cnt = 0;
index 85ff417..44ffa48 100644 (file)
@@ -3741,6 +3741,8 @@ void tcp_send_delayed_ack(struct sock *sk)
                ato = min(ato, max_ato);
        }
 
+       ato = min_t(u32, ato, inet_csk(sk)->icsk_delack_max);
+
        /* Stay within the limit we were given */
        timeout = jiffies + ato;
 
index a1bbaff..7b905cb 100644 (file)
@@ -4257,6 +4257,7 @@ enum {
 enum {
        TCP_BPF_IW              = 1001, /* Set TCP initial congestion window */
        TCP_BPF_SNDCWND_CLAMP   = 1002, /* Set sndcwnd_clamp */
+       TCP_BPF_DELACK_MAX      = 1003, /* Max delay ack in usecs */
 };
 
 struct bpf_perf_event_value {