inet: implement lockless IP_TTL
authorEric Dumazet <edumazet@google.com>
Wed, 16 Aug 2023 08:15:46 +0000 (08:15 +0000)
committerDavid S. Miller <davem@davemloft.net>
Wed, 16 Aug 2023 10:09:18 +0000 (11:09 +0100)
ip_select_ttl() is racy, because it reads inet->uc_ttl
without proper locking.

Add READ_ONCE()/WRITE_ONCE() annotations while
allowing IP_TTL socket option to be set/read without
holding the socket lock.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/ipv4/ip_output.c
net/ipv4/ip_sockglue.c

index 8f396ea..ce62578 100644 (file)
@@ -133,7 +133,7 @@ EXPORT_SYMBOL_GPL(ip_local_out);
 static inline int ip_select_ttl(const struct inet_sock *inet,
                                const struct dst_entry *dst)
 {
-       int ttl = inet->uc_ttl;
+       int ttl = READ_ONCE(inet->uc_ttl);
 
        if (ttl < 0)
                ttl = ip4_dst_hoplimit(dst);
index cfa65a0..dbb2d23 100644 (file)
@@ -1023,6 +1023,13 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
        case IP_BIND_ADDRESS_NO_PORT:
                inet_assign_bit(BIND_ADDRESS_NO_PORT, sk, val);
                return 0;
+       case IP_TTL:
+               if (optlen < 1)
+                       return -EINVAL;
+               if (val != -1 && (val < 1 || val > 255))
+                       return -EINVAL;
+               WRITE_ONCE(inet->uc_ttl, val);
+               return 0;
        }
 
        err = 0;
@@ -1080,13 +1087,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
        case IP_TOS:    /* This sets both TOS and Precedence */
                __ip_sock_set_tos(sk, val);
                break;
-       case IP_TTL:
-               if (optlen < 1)
-                       goto e_inval;
-               if (val != -1 && (val < 1 || val > 255))
-                       goto e_inval;
-               inet->uc_ttl = val;
-               break;
        case IP_MTU_DISCOVER:
                if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
                        goto e_inval;
@@ -1590,6 +1590,11 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
        case IP_BIND_ADDRESS_NO_PORT:
                val = inet_test_bit(BIND_ADDRESS_NO_PORT, sk);
                goto copyval;
+       case IP_TTL:
+               val = READ_ONCE(inet->uc_ttl);
+               if (val < 0)
+                       val = READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_default_ttl);
+               goto copyval;
        }
 
        if (needs_rtnl)
@@ -1629,14 +1634,6 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
        case IP_TOS:
                val = inet->tos;
                break;
-       case IP_TTL:
-       {
-               struct net *net = sock_net(sk);
-               val = (inet->uc_ttl == -1 ?
-                      READ_ONCE(net->ipv4.sysctl_ip_default_ttl) :
-                      inet->uc_ttl);
-               break;
-       }
        case IP_MTU_DISCOVER:
                val = inet->pmtudisc;
                break;