inet: move inet->hdrincl to inet->inet_flags
authorEric Dumazet <edumazet@google.com>
Wed, 16 Aug 2023 08:15:38 +0000 (08:15 +0000)
committerDavid S. Miller <davem@davemloft.net>
Wed, 16 Aug 2023 10:09:17 +0000 (11:09 +0100)
IP_HDRINCL socket option can now be set/read
without locking the socket.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/inet_sock.h
net/ipv4/af_inet.c
net/ipv4/inet_diag.c
net/ipv4/ip_output.c
net/ipv4/ip_sockglue.c
net/ipv4/raw.c
net/ipv4/route.c
net/ipv6/af_inet6.c
net/ipv6/ip6_output.c
net/ipv6/raw.c

index d6ba963..ad1895e 100644 (file)
@@ -231,7 +231,6 @@ struct inet_sock {
        __u8                    mc_ttl;
        __u8                    pmtudisc;
        __u8                    is_icsk:1,
-                               hdrincl:1,
                                mc_loop:1,
                                transparent:1,
                                mc_all:1,
@@ -271,6 +270,7 @@ enum {
        INET_FLAGS_RECVERR      = 9,
        INET_FLAGS_RECVERR_RFC4884 = 10,
        INET_FLAGS_FREEBIND     = 11,
+       INET_FLAGS_HDRINCL      = 12,
 };
 
 /* cmsg flags for inet */
@@ -397,7 +397,7 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
 {
        __u8 flags = 0;
 
-       if (inet_sk(sk)->transparent || inet_sk(sk)->hdrincl)
+       if (inet_sk(sk)->transparent || inet_test_bit(HDRINCL, sk))
                flags |= FLOWI_FLAG_ANYSRC;
        return flags;
 }
index c59da65..5785fe9 100644 (file)
@@ -338,7 +338,7 @@ lookup_protocol:
        if (SOCK_RAW == sock->type) {
                inet->inet_num = protocol;
                if (IPPROTO_RAW == protocol)
-                       inet->hdrincl = 1;
+                       inet_set_bit(HDRINCL, sk);
        }
 
        if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
index 5a96f4f..98f3eb0 100644 (file)
@@ -185,7 +185,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
        inet_sockopt.recverr    = inet_test_bit(RECVERR, sk);
        inet_sockopt.is_icsk    = inet->is_icsk;
        inet_sockopt.freebind   = inet_test_bit(FREEBIND, sk);
-       inet_sockopt.hdrincl    = inet->hdrincl;
+       inet_sockopt.hdrincl    = inet_test_bit(HDRINCL, sk);
        inet_sockopt.mc_loop    = inet->mc_loop;
        inet_sockopt.transparent = inet->transparent;
        inet_sockopt.mc_all     = inet->mc_all;
index f28c875..8f396ea 100644 (file)
@@ -1039,7 +1039,7 @@ static int __ip_append_data(struct sock *sk,
                        }
                }
        } else if ((flags & MSG_SPLICE_PAGES) && length) {
-               if (inet->hdrincl)
+               if (inet_test_bit(HDRINCL, sk))
                        return -EPERM;
                if (rt->dst.dev->features & NETIF_F_SG &&
                    getfrag == ip_generic_getfrag)
@@ -1467,7 +1467,8 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
                 * so icmphdr does not in skb linear region and can not get icmp_type
                 * by icmp_hdr(skb)->type.
                 */
-               if (sk->sk_type == SOCK_RAW && !inet_sk(sk)->hdrincl)
+               if (sk->sk_type == SOCK_RAW &&
+                   !inet_test_bit(HDRINCL, sk))
                        icmp_type = fl4->fl4_icmp_type;
                else
                        icmp_type = icmp_hdr(skb)->type;
index 6af8431..763456f 100644 (file)
@@ -988,6 +988,11 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
                        return -EINVAL;
                inet_assign_bit(FREEBIND, sk, val);
                return 0;
+       case IP_HDRINCL:
+               if (sk->sk_type != SOCK_RAW)
+                       return -ENOPROTOOPT;
+               inet_assign_bit(HDRINCL, sk, val);
+               return 0;
        }
 
        err = 0;
@@ -1052,13 +1057,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
                        goto e_inval;
                inet->uc_ttl = val;
                break;
-       case IP_HDRINCL:
-               if (sk->sk_type != SOCK_RAW) {
-                       err = -ENOPROTOOPT;
-                       break;
-               }
-               inet->hdrincl = val ? 1 : 0;
-               break;
        case IP_NODEFRAG:
                if (sk->sk_type != SOCK_RAW) {
                        err = -ENOPROTOOPT;
@@ -1578,6 +1576,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
        case IP_FREEBIND:
                val = inet_test_bit(FREEBIND, sk);
                goto copyval;
+       case IP_HDRINCL:
+               val = inet_test_bit(HDRINCL, sk);
+               goto copyval;
        }
 
        if (needs_rtnl)
@@ -1625,9 +1626,6 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
                       inet->uc_ttl);
                break;
        }
-       case IP_HDRINCL:
-               val = inet->hdrincl;
-               break;
        case IP_NODEFRAG:
                val = inet->nodefrag;
                break;
index f4c27dc..4b5db5d 100644 (file)
@@ -251,7 +251,7 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
                const struct iphdr *iph = (const struct iphdr *)skb->data;
                u8 *payload = skb->data + (iph->ihl << 2);
 
-               if (inet->hdrincl)
+               if (inet_test_bit(HDRINCL, sk))
                        payload = skb->data;
                ip_icmp_error(sk, skb, err, 0, info, payload);
        }
@@ -491,12 +491,8 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        if (len > 0xFFFF)
                goto out;
 
-       /* hdrincl should be READ_ONCE(inet->hdrincl)
-        * but READ_ONCE() doesn't work with bit fields.
-        * Doing this indirectly yields the same result.
-        */
-       hdrincl = inet->hdrincl;
-       hdrincl = READ_ONCE(hdrincl);
+       hdrincl = inet_test_bit(HDRINCL, sk);
+
        /*
         *      Check the flags.
         */
index 92fede3..a4e153d 100644 (file)
@@ -515,13 +515,12 @@ static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
        __u8 scope = RT_SCOPE_UNIVERSE;
 
        if (sk) {
-               const struct inet_sock *inet = inet_sk(sk);
-
                oif = sk->sk_bound_dev_if;
                mark = READ_ONCE(sk->sk_mark);
                tos = ip_sock_rt_tos(sk);
                scope = ip_sock_rt_scope(sk);
-               prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
+               prot = inet_test_bit(HDRINCL, sk) ? IPPROTO_RAW :
+                                                   sk->sk_protocol;
        }
 
        flowi4_init_output(fl4, oif, mark, tos & IPTOS_RT_MASK, scope,
@@ -555,7 +554,8 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
        flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark),
                           ip_sock_rt_tos(sk) & IPTOS_RT_MASK,
                           ip_sock_rt_scope(sk),
-                          inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
+                          inet_test_bit(HDRINCL, sk) ?
+                               IPPROTO_RAW : sk->sk_protocol,
                           inet_sk_flowi_flags(sk),
                           daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
        rcu_read_unlock();
index 3ec0359..ed9d207 100644 (file)
@@ -205,7 +205,7 @@ lookup_protocol:
        if (SOCK_RAW == sock->type) {
                inet->inet_num = protocol;
                if (IPPROTO_RAW == protocol)
-                       inet->hdrincl = 1;
+                       inet_set_bit(HDRINCL, sk);
        }
 
        sk->sk_destruct         = inet6_sock_destruct;
index bc96559..f8a1f6b 100644 (file)
@@ -1591,7 +1591,7 @@ emsgsize:
                        }
                }
        } else if ((flags & MSG_SPLICE_PAGES) && length) {
-               if (inet_sk(sk)->hdrincl)
+               if (inet_test_bit(HDRINCL, sk))
                        return -EPERM;
                if (rt->dst.dev->features & NETIF_F_SG &&
                    getfrag == ip_generic_getfrag)
@@ -1995,7 +1995,8 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
                struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
                u8 icmp6_type;
 
-               if (sk->sk_socket->type == SOCK_RAW && !inet_sk(sk)->hdrincl)
+               if (sk->sk_socket->type == SOCK_RAW &&
+                  !inet_test_bit(HDRINCL, sk))
                        icmp6_type = fl6->fl6_icmp_type;
                else
                        icmp6_type = icmp6_hdr(skb)->icmp6_type;
index ea16734..0eae766 100644 (file)
@@ -291,7 +291,6 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
               struct inet6_skb_parm *opt,
               u8 type, u8 code, int offset, __be32 info)
 {
-       struct inet_sock *inet = inet_sk(sk);
        struct ipv6_pinfo *np = inet6_sk(sk);
        int err;
        int harderr;
@@ -315,7 +314,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
        }
        if (np->recverr) {
                u8 *payload = skb->data;
-               if (!inet->hdrincl)
+               if (!inet_test_bit(HDRINCL, sk))
                        payload += offset;
                ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload);
        }
@@ -406,7 +405,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
                                                         skb->len,
                                                         inet->inet_num, 0));
 
-       if (inet->hdrincl) {
+       if (inet_test_bit(HDRINCL, sk)) {
                if (skb_checksum_complete(skb)) {
                        atomic_inc(&sk->sk_drops);
                        kfree_skb_reason(skb, SKB_DROP_REASON_SKB_CSUM);
@@ -762,12 +761,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        if (msg->msg_flags & MSG_OOB)
                return -EOPNOTSUPP;
 
-       /* hdrincl should be READ_ONCE(inet->hdrincl)
-        * but READ_ONCE() doesn't work with bit fields.
-        * Doing this indirectly yields the same result.
-        */
-       hdrincl = inet->hdrincl;
-       hdrincl = READ_ONCE(hdrincl);
+       hdrincl = inet_test_bit(HDRINCL, sk);
 
        /*
         *      Get and verify the address.
@@ -1000,7 +994,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
        case IPV6_HDRINCL:
                if (sk->sk_type != SOCK_RAW)
                        return -EINVAL;
-               inet_sk(sk)->hdrincl = !!val;
+               inet_assign_bit(HDRINCL, sk, val);
                return 0;
        case IPV6_CHECKSUM:
                if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 &&
@@ -1068,7 +1062,7 @@ static int do_rawv6_getsockopt(struct sock *sk, int level, int optname,
 
        switch (optname) {
        case IPV6_HDRINCL:
-               val = inet_sk(sk)->hdrincl;
+               val = inet_test_bit(HDRINCL, sk);
                break;
        case IPV6_CHECKSUM:
                /*