bpf: Add SO_KEEPALIVE and related options to bpf_setsockopt
authorDmitry Yakunin <zeil@yandex-team.ru>
Sat, 20 Jun 2020 15:30:52 +0000 (18:30 +0300)
committerAlexei Starovoitov <ast@kernel.org>
Wed, 24 Jun 2020 18:21:03 +0000 (11:21 -0700)
This patch adds support of SO_KEEPALIVE flag and TCP related options
to bpf_setsockopt() routine. This is helpful if we want to enable or tune
TCP keepalive for applications which don't do it in the userspace code.

v3:
  - update kernel-doc in uapi (Nikita Vetoshkin <nekto0n@yandex-team.ru>)

v4:
  - update kernel-doc in tools too (Alexei Starovoitov)
  - add test to selftests (Alexei Starovoitov)

Signed-off-by: Dmitry Yakunin <zeil@yandex-team.ru>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200620153052.9439-3-zeil@yandex-team.ru
include/uapi/linux/bpf.h
net/core/filter.c
tools/include/uapi/linux/bpf.h
tools/testing/selftests/bpf/progs/connect4_prog.c

index 9d3923e..d9737d5 100644 (file)
@@ -1621,10 +1621,13 @@ union bpf_attr {
  *
  *             * **SOL_SOCKET**, which supports the following *optname*\ s:
  *               **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
- *               **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
+ *               **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**,
+ *               **SO_BINDTODEVICE**, **SO_KEEPALIVE**.
  *             * **IPPROTO_TCP**, which supports the following *optname*\ s:
  *               **TCP_CONGESTION**, **TCP_BPF_IW**,
- *               **TCP_BPF_SNDCWND_CLAMP**.
+ *               **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
+ *               **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
+ *               **TCP_SYNCNT**, **TCP_USER_TIMEOUT**.
  *             * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
  *             * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
  *     Return
index 7339538..c713b6b 100644 (file)
@@ -4289,10 +4289,10 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
                           char *optval, int optlen, u32 flags)
 {
        char devname[IFNAMSIZ];
+       int val, valbool;
        struct net *net;
        int ifindex;
        int ret = 0;
-       int val;
 
        if (!sk_fullsock(sk))
                return -EINVAL;
@@ -4303,6 +4303,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
                if (optlen != sizeof(int) && optname != SO_BINDTODEVICE)
                        return -EINVAL;
                val = *((int *)optval);
+               valbool = val ? 1 : 0;
 
                /* Only some socketops are supported */
                switch (optname) {
@@ -4361,6 +4362,11 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
                        }
                        ret = sock_bindtoindex(sk, ifindex, false);
                        break;
+               case SO_KEEPALIVE:
+                       if (sk->sk_prot->keepalive)
+                               sk->sk_prot->keepalive(sk, valbool);
+                       sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
+                       break;
                default:
                        ret = -EINVAL;
                }
@@ -4421,6 +4427,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
                        ret = tcp_set_congestion_control(sk, name, false,
                                                         reinit, true);
                } else {
+                       struct inet_connection_sock *icsk = inet_csk(sk);
                        struct tcp_sock *tp = tcp_sk(sk);
 
                        if (optlen != sizeof(int))
@@ -4449,6 +4456,33 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
                                else
                                        tp->save_syn = val;
                                break;
+                       case TCP_KEEPIDLE:
+                               ret = tcp_sock_set_keepidle_locked(sk, val);
+                               break;
+                       case TCP_KEEPINTVL:
+                               if (val < 1 || val > MAX_TCP_KEEPINTVL)
+                                       ret = -EINVAL;
+                               else
+                                       tp->keepalive_intvl = val * HZ;
+                               break;
+                       case TCP_KEEPCNT:
+                               if (val < 1 || val > MAX_TCP_KEEPCNT)
+                                       ret = -EINVAL;
+                               else
+                                       tp->keepalive_probes = val;
+                               break;
+                       case TCP_SYNCNT:
+                               if (val < 1 || val > MAX_TCP_SYNCNT)
+                                       ret = -EINVAL;
+                               else
+                                       icsk->icsk_syn_retries = val;
+                               break;
+                       case TCP_USER_TIMEOUT:
+                               if (val < 0)
+                                       ret = -EINVAL;
+                               else
+                                       icsk->icsk_user_timeout = val;
+                               break;
                        default:
                                ret = -EINVAL;
                        }
index 9d3923e..d9737d5 100644 (file)
@@ -1621,10 +1621,13 @@ union bpf_attr {
  *
  *             * **SOL_SOCKET**, which supports the following *optname*\ s:
  *               **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
- *               **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
+ *               **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**,
+ *               **SO_BINDTODEVICE**, **SO_KEEPALIVE**.
  *             * **IPPROTO_TCP**, which supports the following *optname*\ s:
  *               **TCP_CONGESTION**, **TCP_BPF_IW**,
- *               **TCP_BPF_SNDCWND_CLAMP**.
+ *               **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
+ *               **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
+ *               **TCP_SYNCNT**, **TCP_USER_TIMEOUT**.
  *             * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
  *             * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
  *     Return
index 1ab2c5e..b1b2773 100644 (file)
@@ -104,6 +104,30 @@ static __inline int bind_to_device(struct bpf_sock_addr *ctx)
        return 0;
 }
 
+static __inline int set_keepalive(struct bpf_sock_addr *ctx)
+{
+       int zero = 0, one = 1;
+
+       if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one)))
+               return 1;
+       if (ctx->type == SOCK_STREAM) {
+               if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPIDLE, &one, sizeof(one)))
+                       return 1;
+               if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPINTVL, &one, sizeof(one)))
+                       return 1;
+               if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPCNT, &one, sizeof(one)))
+                       return 1;
+               if (bpf_setsockopt(ctx, SOL_TCP, TCP_SYNCNT, &one, sizeof(one)))
+                       return 1;
+               if (bpf_setsockopt(ctx, SOL_TCP, TCP_USER_TIMEOUT, &one, sizeof(one)))
+                       return 1;
+       }
+       if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &zero, sizeof(zero)))
+               return 1;
+
+       return 0;
+}
+
 SEC("cgroup/connect4")
 int connect_v4_prog(struct bpf_sock_addr *ctx)
 {
@@ -121,6 +145,9 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
        if (bind_to_device(ctx))
                return 0;
 
+       if (set_keepalive(ctx))
+               return 0;
+
        if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
                return 0;
        else if (ctx->type == SOCK_STREAM)