ipv4: fix data-races around inet->inet_id
authorEric Dumazet <edumazet@google.com>
Sat, 19 Aug 2023 03:17:07 +0000 (03:17 +0000)
committerDavid S. Miller <davem@davemloft.net>
Sun, 20 Aug 2023 10:40:49 +0000 (11:40 +0100)
UDP sendmsg() is lockless, so ip_select_ident_segs()
can very well be run from multiple cpus [1]

Convert inet->inet_id to an atomic_t, but implement
a dedicated path for TCP, avoiding cost of a locked
instruction (atomic_add_return())

Note that this patch will cause a trivial merge conflict
because we added inet->flags in net-next tree.

v2: added missing change in
drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
(David Ahern)

[1]

BUG: KCSAN: data-race in __ip_make_skb / __ip_make_skb

read-write to 0xffff888145af952a of 2 bytes by task 7803 on cpu 1:
ip_select_ident_segs include/net/ip.h:542 [inline]
ip_select_ident include/net/ip.h:556 [inline]
__ip_make_skb+0x844/0xc70 net/ipv4/ip_output.c:1446
ip_make_skb+0x233/0x2c0 net/ipv4/ip_output.c:1560
udp_sendmsg+0x1199/0x1250 net/ipv4/udp.c:1260
inet_sendmsg+0x63/0x80 net/ipv4/af_inet.c:830
sock_sendmsg_nosec net/socket.c:725 [inline]
sock_sendmsg net/socket.c:748 [inline]
____sys_sendmsg+0x37c/0x4d0 net/socket.c:2494
___sys_sendmsg net/socket.c:2548 [inline]
__sys_sendmmsg+0x269/0x500 net/socket.c:2634
__do_sys_sendmmsg net/socket.c:2663 [inline]
__se_sys_sendmmsg net/socket.c:2660 [inline]
__x64_sys_sendmmsg+0x57/0x60 net/socket.c:2660
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd

read to 0xffff888145af952a of 2 bytes by task 7804 on cpu 0:
ip_select_ident_segs include/net/ip.h:541 [inline]
ip_select_ident include/net/ip.h:556 [inline]
__ip_make_skb+0x817/0xc70 net/ipv4/ip_output.c:1446
ip_make_skb+0x233/0x2c0 net/ipv4/ip_output.c:1560
udp_sendmsg+0x1199/0x1250 net/ipv4/udp.c:1260
inet_sendmsg+0x63/0x80 net/ipv4/af_inet.c:830
sock_sendmsg_nosec net/socket.c:725 [inline]
sock_sendmsg net/socket.c:748 [inline]
____sys_sendmsg+0x37c/0x4d0 net/socket.c:2494
___sys_sendmsg net/socket.c:2548 [inline]
__sys_sendmmsg+0x269/0x500 net/socket.c:2634
__do_sys_sendmmsg net/socket.c:2663 [inline]
__se_sys_sendmmsg net/socket.c:2660 [inline]
__x64_sys_sendmmsg+0x57/0x60 net/socket.c:2660
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd

value changed: 0x184d -> 0x184e

Reported by Kernel Concurrency Sanitizer on:
CPU: 0 PID: 7804 Comm: syz-executor.1 Not tainted 6.5.0-rc6-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/26/2023
==================================================================

Fixes: 23f57406b82d ("ipv4: avoid using shared IP generator for connected sockets")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
include/net/inet_sock.h
include/net/ip.h
net/dccp/ipv4.c
net/ipv4/af_inet.c
net/ipv4/datagram.c
net/ipv4/tcp_ipv4.c
net/sctp/socket.c

index c2e7037..7750702 100644 (file)
@@ -1466,7 +1466,7 @@ static void make_established(struct sock *sk, u32 snd_isn, unsigned int opt)
        tp->write_seq = snd_isn;
        tp->snd_nxt = snd_isn;
        tp->snd_una = snd_isn;
-       inet_sk(sk)->inet_id = get_random_u16();
+       atomic_set(&inet_sk(sk)->inet_id, get_random_u16());
        assign_rxopt(sk, opt);
 
        if (tp->rcv_wnd > (RCV_BUFSIZ_M << 10))
index 0bb32bf..491ceb7 100644 (file)
@@ -222,8 +222,8 @@ struct inet_sock {
        __s16                   uc_ttl;
        __u16                   cmsg_flags;
        struct ip_options_rcu __rcu     *inet_opt;
+       atomic_t                inet_id;
        __be16                  inet_sport;
-       __u16                   inet_id;
 
        __u8                    tos;
        __u8                    min_ttl;
index 3325211..19adacd 100644 (file)
@@ -538,8 +538,19 @@ static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb,
         * generator as much as we can.
         */
        if (sk && inet_sk(sk)->inet_daddr) {
-               iph->id = htons(inet_sk(sk)->inet_id);
-               inet_sk(sk)->inet_id += segs;
+               int val;
+
+               /* avoid atomic operations for TCP,
+                * as we hold socket lock at this point.
+                */
+               if (sk_is_tcp(sk)) {
+                       sock_owned_by_me(sk);
+                       val = atomic_read(&inet_sk(sk)->inet_id);
+                       atomic_set(&inet_sk(sk)->inet_id, val + segs);
+               } else {
+                       val = atomic_add_return(segs, &inet_sk(sk)->inet_id);
+               }
+               iph->id = htons(val);
                return;
        }
        if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) {
index fa80793..a545ad7 100644 (file)
@@ -130,7 +130,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
                                                    inet->inet_daddr,
                                                    inet->inet_sport,
                                                    inet->inet_dport);
-       inet->inet_id = get_random_u16();
+       atomic_set(&inet->inet_id, get_random_u16());
 
        err = dccp_connect(sk);
        rt = NULL;
@@ -432,7 +432,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
        RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt));
        newinet->mc_index  = inet_iif(skb);
        newinet->mc_ttl    = ip_hdr(skb)->ttl;
-       newinet->inet_id   = get_random_u16();
+       atomic_set(&newinet->inet_id, get_random_u16());
 
        if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
                goto put_and_exit;
index 9b2ca2f..02736b8 100644 (file)
@@ -340,7 +340,7 @@ lookup_protocol:
        else
                inet->pmtudisc = IP_PMTUDISC_WANT;
 
-       inet->inet_id = 0;
+       atomic_set(&inet->inet_id, 0);
 
        sock_init_data(sock, sk);
 
index 4d1af0c..cb5dbee 100644 (file)
@@ -73,7 +73,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
        reuseport_has_conns_set(sk);
        sk->sk_state = TCP_ESTABLISHED;
        sk_set_txhash(sk);
-       inet->inet_id = get_random_u16();
+       atomic_set(&inet->inet_id, get_random_u16());
 
        sk_dst_set(sk, &rt->dst);
        err = 0;
index a59cc4b..2dbdc26 100644 (file)
@@ -312,7 +312,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
                                             inet->inet_daddr));
        }
 
-       inet->inet_id = get_random_u16();
+       atomic_set(&inet->inet_id, get_random_u16());
 
        if (tcp_fastopen_defer_connect(sk, &err))
                return err;
@@ -1596,7 +1596,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
        inet_csk(newsk)->icsk_ext_hdr_len = 0;
        if (inet_opt)
                inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
-       newinet->inet_id = get_random_u16();
+       atomic_set(&newinet->inet_id, get_random_u16());
 
        /* Set ToS of the new socket based upon the value of incoming SYN.
         * ECT bits are set later in tcp_init_transfer().
index 6da738f..76f1bce 100644 (file)
@@ -9479,7 +9479,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
        newinet->inet_rcv_saddr = inet->inet_rcv_saddr;
        newinet->inet_dport = htons(asoc->peer.port);
        newinet->pmtudisc = inet->pmtudisc;
-       newinet->inet_id = get_random_u16();
+       atomic_set(&newinet->inet_id, get_random_u16());
 
        newinet->uc_ttl = inet->uc_ttl;
        newinet->mc_loop = 1;