From cafbe182a467bf6799242fd7468438cf1ab833dc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Aug 2023 08:15:38 +0000 Subject: [PATCH] inet: move inet->hdrincl to inet->inet_flags IP_HDRINCL socket option can now be set/read without locking the socket. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/inet_sock.h | 4 ++-- net/ipv4/af_inet.c | 2 +- net/ipv4/inet_diag.c | 2 +- net/ipv4/ip_output.c | 5 +++-- net/ipv4/ip_sockglue.c | 18 ++++++++---------- net/ipv4/raw.c | 10 +++------- net/ipv4/route.c | 8 ++++---- net/ipv6/af_inet6.c | 2 +- net/ipv6/ip6_output.c | 5 +++-- net/ipv6/raw.c | 16 +++++----------- 10 files changed, 31 insertions(+), 41 deletions(-) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index d6ba963..ad1895e 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -231,7 +231,6 @@ struct inet_sock { __u8 mc_ttl; __u8 pmtudisc; __u8 is_icsk:1, - hdrincl:1, mc_loop:1, transparent:1, mc_all:1, @@ -271,6 +270,7 @@ enum { INET_FLAGS_RECVERR = 9, INET_FLAGS_RECVERR_RFC4884 = 10, INET_FLAGS_FREEBIND = 11, + INET_FLAGS_HDRINCL = 12, }; /* cmsg flags for inet */ @@ -397,7 +397,7 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk) { __u8 flags = 0; - if (inet_sk(sk)->transparent || inet_sk(sk)->hdrincl) + if (inet_sk(sk)->transparent || inet_test_bit(HDRINCL, sk)) flags |= FLOWI_FLAG_ANYSRC; return flags; } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c59da65..5785fe9 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -338,7 +338,7 @@ lookup_protocol: if (SOCK_RAW == sock->type) { inet->inet_num = protocol; if (IPPROTO_RAW == protocol) - inet->hdrincl = 1; + inet_set_bit(HDRINCL, sk); } if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 5a96f4f..98f3eb0 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -185,7 +185,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, inet_sockopt.recverr = inet_test_bit(RECVERR, sk); inet_sockopt.is_icsk = inet->is_icsk; inet_sockopt.freebind = inet_test_bit(FREEBIND, sk); - inet_sockopt.hdrincl = inet->hdrincl; + inet_sockopt.hdrincl = inet_test_bit(HDRINCL, sk); inet_sockopt.mc_loop = inet->mc_loop; inet_sockopt.transparent = inet->transparent; inet_sockopt.mc_all = inet->mc_all; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index f28c875..8f396ea 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1039,7 +1039,7 @@ static int __ip_append_data(struct sock *sk, } } } else if ((flags & MSG_SPLICE_PAGES) && length) { - if (inet->hdrincl) + if (inet_test_bit(HDRINCL, sk)) return -EPERM; if (rt->dst.dev->features & NETIF_F_SG && getfrag == ip_generic_getfrag) @@ -1467,7 +1467,8 @@ struct sk_buff *__ip_make_skb(struct sock *sk, * so icmphdr does not in skb linear region and can not get icmp_type * by icmp_hdr(skb)->type. */ - if (sk->sk_type == SOCK_RAW && !inet_sk(sk)->hdrincl) + if (sk->sk_type == SOCK_RAW && + !inet_test_bit(HDRINCL, sk)) icmp_type = fl4->fl4_icmp_type; else icmp_type = icmp_hdr(skb)->type; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 6af8431..763456f 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -988,6 +988,11 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, return -EINVAL; inet_assign_bit(FREEBIND, sk, val); return 0; + case IP_HDRINCL: + if (sk->sk_type != SOCK_RAW) + return -ENOPROTOOPT; + inet_assign_bit(HDRINCL, sk, val); + return 0; } err = 0; @@ -1052,13 +1057,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, goto e_inval; inet->uc_ttl = val; break; - case IP_HDRINCL: - if (sk->sk_type != SOCK_RAW) { - err = -ENOPROTOOPT; - break; - } - inet->hdrincl = val ? 1 : 0; - break; case IP_NODEFRAG: if (sk->sk_type != SOCK_RAW) { err = -ENOPROTOOPT; @@ -1578,6 +1576,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_FREEBIND: val = inet_test_bit(FREEBIND, sk); goto copyval; + case IP_HDRINCL: + val = inet_test_bit(HDRINCL, sk); + goto copyval; } if (needs_rtnl) @@ -1625,9 +1626,6 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, inet->uc_ttl); break; } - case IP_HDRINCL: - val = inet->hdrincl; - break; case IP_NODEFRAG: val = inet->nodefrag; break; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index f4c27dc..4b5db5d 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -251,7 +251,7 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) const struct iphdr *iph = (const struct iphdr *)skb->data; u8 *payload = skb->data + (iph->ihl << 2); - if (inet->hdrincl) + if (inet_test_bit(HDRINCL, sk)) payload = skb->data; ip_icmp_error(sk, skb, err, 0, info, payload); } @@ -491,12 +491,8 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (len > 0xFFFF) goto out; - /* hdrincl should be READ_ONCE(inet->hdrincl) - * but READ_ONCE() doesn't work with bit fields. - * Doing this indirectly yields the same result. - */ - hdrincl = inet->hdrincl; - hdrincl = READ_ONCE(hdrincl); + hdrincl = inet_test_bit(HDRINCL, sk); + /* * Check the flags. */ diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 92fede3..a4e153d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -515,13 +515,12 @@ static void __build_flow_key(const struct net *net, struct flowi4 *fl4, __u8 scope = RT_SCOPE_UNIVERSE; if (sk) { - const struct inet_sock *inet = inet_sk(sk); - oif = sk->sk_bound_dev_if; mark = READ_ONCE(sk->sk_mark); tos = ip_sock_rt_tos(sk); scope = ip_sock_rt_scope(sk); - prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol; + prot = inet_test_bit(HDRINCL, sk) ? IPPROTO_RAW : + sk->sk_protocol; } flowi4_init_output(fl4, oif, mark, tos & IPTOS_RT_MASK, scope, @@ -555,7 +554,8 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk) & IPTOS_RT_MASK, ip_sock_rt_scope(sk), - inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, + inet_test_bit(HDRINCL, sk) ? + IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk), daddr, inet->inet_saddr, 0, 0, sk->sk_uid); rcu_read_unlock(); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3ec0359..ed9d207 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -205,7 +205,7 @@ lookup_protocol: if (SOCK_RAW == sock->type) { inet->inet_num = protocol; if (IPPROTO_RAW == protocol) - inet->hdrincl = 1; + inet_set_bit(HDRINCL, sk); } sk->sk_destruct = inet6_sock_destruct; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index bc96559..f8a1f6b 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1591,7 +1591,7 @@ emsgsize: } } } else if ((flags & MSG_SPLICE_PAGES) && length) { - if (inet_sk(sk)->hdrincl) + if (inet_test_bit(HDRINCL, sk)) return -EPERM; if (rt->dst.dev->features & NETIF_F_SG && getfrag == ip_generic_getfrag) @@ -1995,7 +1995,8 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); u8 icmp6_type; - if (sk->sk_socket->type == SOCK_RAW && !inet_sk(sk)->hdrincl) + if (sk->sk_socket->type == SOCK_RAW && + !inet_test_bit(HDRINCL, sk)) icmp6_type = fl6->fl6_icmp_type; else icmp6_type = icmp6_hdr(skb)->icmp6_type; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ea16734..0eae766 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -291,7 +291,6 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { - struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); int err; int harderr; @@ -315,7 +314,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, } if (np->recverr) { u8 *payload = skb->data; - if (!inet->hdrincl) + if (!inet_test_bit(HDRINCL, sk)) payload += offset; ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload); } @@ -406,7 +405,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) skb->len, inet->inet_num, 0)); - if (inet->hdrincl) { + if (inet_test_bit(HDRINCL, sk)) { if (skb_checksum_complete(skb)) { atomic_inc(&sk->sk_drops); kfree_skb_reason(skb, SKB_DROP_REASON_SKB_CSUM); @@ -762,12 +761,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; - /* hdrincl should be READ_ONCE(inet->hdrincl) - * but READ_ONCE() doesn't work with bit fields. - * Doing this indirectly yields the same result. - */ - hdrincl = inet->hdrincl; - hdrincl = READ_ONCE(hdrincl); + hdrincl = inet_test_bit(HDRINCL, sk); /* * Get and verify the address. @@ -1000,7 +994,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname, case IPV6_HDRINCL: if (sk->sk_type != SOCK_RAW) return -EINVAL; - inet_sk(sk)->hdrincl = !!val; + inet_assign_bit(HDRINCL, sk, val); return 0; case IPV6_CHECKSUM: if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 && @@ -1068,7 +1062,7 @@ static int do_rawv6_getsockopt(struct sock *sk, int level, int optname, switch (optname) { case IPV6_HDRINCL: - val = inet_sk(sk)->hdrincl; + val = inet_test_bit(HDRINCL, sk); break; case IPV6_CHECKSUM: /* -- 2.7.4