bpf: Change bpf_setsockopt(SOL_TCP) to reuse do_tcp_setsockopt()
authorMartin KaFai Lau <kafai@fb.com>
Wed, 17 Aug 2022 06:18:19 +0000 (23:18 -0700)
committerAlexei Starovoitov <ast@kernel.org>
Fri, 19 Aug 2022 00:06:13 +0000 (17:06 -0700)
After the prep work in the previous patches,
this patch removes all the dup code from bpf_setsockopt(SOL_TCP)
and reuses the do_tcp_setsockopt().

The existing optname white-list is refactored into a new
function sol_tcp_setsockopt().  The sol_tcp_setsockopt()
also calls the bpf_sol_tcp_setsockopt() to handle
the TCP_BPF_XXX specific optnames.

bpf_setsockopt(TCP_SAVE_SYN) now also allows a value 2 to
save the eth header also and it comes for free from
do_tcp_setsockopt().

Reviewed-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/r/20220817061819.4180146-1-kafai@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
include/net/tcp.h
net/core/filter.c
net/ipv4/tcp.c

index d10962b..c03a50c 100644 (file)
@@ -405,6 +405,8 @@ __poll_t tcp_poll(struct file *file, struct socket *sock,
 int tcp_getsockopt(struct sock *sk, int level, int optname,
                   char __user *optval, int __user *optlen);
 bool tcp_bpf_bypass_getsockopt(int level, int optname);
+int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+                     sockptr_t optval, unsigned int optlen);
 int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
                   unsigned int optlen);
 void tcp_set_keepalive(struct sock *sk, int val);
index bb135d4..6687760 100644 (file)
@@ -5086,6 +5086,34 @@ static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname,
        return 0;
 }
 
+static int sol_tcp_setsockopt(struct sock *sk, int optname,
+                             char *optval, int optlen)
+{
+       if (sk->sk_prot->setsockopt != tcp_setsockopt)
+               return -EINVAL;
+
+       switch (optname) {
+       case TCP_KEEPIDLE:
+       case TCP_KEEPINTVL:
+       case TCP_KEEPCNT:
+       case TCP_SYNCNT:
+       case TCP_WINDOW_CLAMP:
+       case TCP_USER_TIMEOUT:
+       case TCP_NOTSENT_LOWAT:
+       case TCP_SAVE_SYN:
+               if (optlen != sizeof(int))
+                       return -EINVAL;
+               break;
+       case TCP_CONGESTION:
+               break;
+       default:
+               return bpf_sol_tcp_setsockopt(sk, optname, optval, optlen);
+       }
+
+       return do_tcp_setsockopt(sk, SOL_TCP, optname,
+                                KERNEL_SOCKPTR(optval), optlen);
+}
+
 static int __bpf_setsockopt(struct sock *sk, int level, int optname,
                            char *optval, int optlen)
 {
@@ -5138,73 +5166,8 @@ static int __bpf_setsockopt(struct sock *sk, int level, int optname,
                default:
                        ret = -EINVAL;
                }
-       } else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP &&
-                  sk->sk_prot->setsockopt == tcp_setsockopt) {
-               if (optname >= TCP_BPF_IW)
-                       return bpf_sol_tcp_setsockopt(sk, optname,
-                                                     optval, optlen);
-
-               if (optname == TCP_CONGESTION) {
-                       char name[TCP_CA_NAME_MAX];
-
-                       strncpy(name, optval, min_t(long, optlen,
-                                                   TCP_CA_NAME_MAX-1));
-                       name[TCP_CA_NAME_MAX-1] = 0;
-                       ret = tcp_set_congestion_control(sk, name, false, true);
-               } else {
-                       struct inet_connection_sock *icsk = inet_csk(sk);
-                       struct tcp_sock *tp = tcp_sk(sk);
-
-                       if (optlen != sizeof(int))
-                               return -EINVAL;
-
-                       val = *((int *)optval);
-                       /* Only some options are supported */
-                       switch (optname) {
-                       case TCP_SAVE_SYN:
-                               if (val < 0 || val > 1)
-                                       ret = -EINVAL;
-                               else
-                                       tp->save_syn = val;
-                               break;
-                       case TCP_KEEPIDLE:
-                               ret = tcp_sock_set_keepidle_locked(sk, val);
-                               break;
-                       case TCP_KEEPINTVL:
-                               if (val < 1 || val > MAX_TCP_KEEPINTVL)
-                                       ret = -EINVAL;
-                               else
-                                       tp->keepalive_intvl = val * HZ;
-                               break;
-                       case TCP_KEEPCNT:
-                               if (val < 1 || val > MAX_TCP_KEEPCNT)
-                                       ret = -EINVAL;
-                               else
-                                       tp->keepalive_probes = val;
-                               break;
-                       case TCP_SYNCNT:
-                               if (val < 1 || val > MAX_TCP_SYNCNT)
-                                       ret = -EINVAL;
-                               else
-                                       icsk->icsk_syn_retries = val;
-                               break;
-                       case TCP_USER_TIMEOUT:
-                               if (val < 0)
-                                       ret = -EINVAL;
-                               else
-                                       icsk->icsk_user_timeout = val;
-                               break;
-                       case TCP_NOTSENT_LOWAT:
-                               tp->notsent_lowat = val;
-                               sk->sk_write_space(sk);
-                               break;
-                       case TCP_WINDOW_CLAMP:
-                               ret = tcp_set_window_clamp(sk, val);
-                               break;
-                       default:
-                               ret = -EINVAL;
-                       }
-               }
+       } else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP) {
+               return sol_tcp_setsockopt(sk, optname, optval, optlen);
        } else {
                ret = -EINVAL;
        }
index cfed84b..a6986f2 100644 (file)
@@ -3479,8 +3479,8 @@ int tcp_set_window_clamp(struct sock *sk, int val)
 /*
  *     Socket option code for TCP.
  */
-static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
-               sockptr_t optval, unsigned int optlen)
+int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+                     sockptr_t optval, unsigned int optlen)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct inet_connection_sock *icsk = inet_csk(sk);