tcp: Fix data races around icsk->icsk_af_ops.
authorKuniyuki Iwashima <kuniyu@amazon.com>
Thu, 6 Oct 2022 18:53:49 +0000 (11:53 -0700)
committerJakub Kicinski <kuba@kernel.org>
Thu, 13 Oct 2022 00:50:37 +0000 (17:50 -0700)
setsockopt(IPV6_ADDRFORM) and tcp_v6_connect() change icsk->icsk_af_ops
under lock_sock(), but tcp_(get|set)sockopt() read it locklessly.  To
avoid load/store tearing, we need to add READ_ONCE() and WRITE_ONCE()
for the reads and writes.

Thanks to Eric Dumazet for providing the syzbot report:

BUG: KCSAN: data-race in tcp_setsockopt / tcp_v6_connect

write to 0xffff88813c624518 of 8 bytes by task 23936 on cpu 0:
tcp_v6_connect+0x5b3/0xce0 net/ipv6/tcp_ipv6.c:240
__inet_stream_connect+0x159/0x6d0 net/ipv4/af_inet.c:660
inet_stream_connect+0x44/0x70 net/ipv4/af_inet.c:724
__sys_connect_file net/socket.c:1976 [inline]
__sys_connect+0x197/0x1b0 net/socket.c:1993
__do_sys_connect net/socket.c:2003 [inline]
__se_sys_connect net/socket.c:2000 [inline]
__x64_sys_connect+0x3d/0x50 net/socket.c:2000
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd

read to 0xffff88813c624518 of 8 bytes by task 23937 on cpu 1:
tcp_setsockopt+0x147/0x1c80 net/ipv4/tcp.c:3789
sock_common_setsockopt+0x5d/0x70 net/core/sock.c:3585
__sys_setsockopt+0x212/0x2b0 net/socket.c:2252
__do_sys_setsockopt net/socket.c:2263 [inline]
__se_sys_setsockopt net/socket.c:2260 [inline]
__x64_sys_setsockopt+0x62/0x70 net/socket.c:2260
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd

value changed: 0xffffffff8539af68 -> 0xffffffff8539aff8

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 23937 Comm: syz-executor.5 Not tainted
6.0.0-rc4-syzkaller-00331-g4ed9c1e971b1-dirty #0

Hardware name: Google Google Compute Engine/Google Compute Engine,
BIOS Google 08/26/2022

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: syzbot <syzkaller@googlegroups.com>
Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
net/ipv4/tcp.c
net/ipv6/ipv6_sockglue.c
net/ipv6/tcp_ipv6.c

index 0c51abe..f823281 100644 (file)
@@ -3796,8 +3796,9 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
        const struct inet_connection_sock *icsk = inet_csk(sk);
 
        if (level != SOL_TCP)
-               return icsk->icsk_af_ops->setsockopt(sk, level, optname,
-                                                    optval, optlen);
+               /* Paired with WRITE_ONCE() in do_ipv6_setsockopt() and tcp_v6_connect() */
+               return READ_ONCE(icsk->icsk_af_ops)->setsockopt(sk, level, optname,
+                                                               optval, optlen);
        return do_tcp_setsockopt(sk, level, optname, optval, optlen);
 }
 EXPORT_SYMBOL(tcp_setsockopt);
@@ -4396,8 +4397,9 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
        struct inet_connection_sock *icsk = inet_csk(sk);
 
        if (level != SOL_TCP)
-               return icsk->icsk_af_ops->getsockopt(sk, level, optname,
-                                                    optval, optlen);
+               /* Paired with WRITE_ONCE() in do_ipv6_setsockopt() and tcp_v6_connect() */
+               return READ_ONCE(icsk->icsk_af_ops)->getsockopt(sk, level, optname,
+                                                               optval, optlen);
        return do_tcp_getsockopt(sk, level, optname, USER_SOCKPTR(optval),
                                 USER_SOCKPTR(optlen));
 }
index d7207a5..532f447 100644 (file)
@@ -479,7 +479,8 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 
                                /* Paired with READ_ONCE(sk->sk_prot) in inet6_stream_ops */
                                WRITE_ONCE(sk->sk_prot, &tcp_prot);
-                               icsk->icsk_af_ops = &ipv4_specific;
+                               /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
+                               WRITE_ONCE(icsk->icsk_af_ops, &ipv4_specific);
                                sk->sk_socket->ops = &inet_stream_ops;
                                sk->sk_family = PF_INET;
                                tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
index a8adda6..2a3f929 100644 (file)
@@ -238,7 +238,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
                sin.sin_port = usin->sin6_port;
                sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
 
-               icsk->icsk_af_ops = &ipv6_mapped;
+               /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
+               WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
                if (sk_is_mptcp(sk))
                        mptcpv6_handle_mapped(sk, true);
                sk->sk_backlog_rcv = tcp_v4_do_rcv;
@@ -250,7 +251,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 
                if (err) {
                        icsk->icsk_ext_hdr_len = exthdrlen;
-                       icsk->icsk_af_ops = &ipv6_specific;
+                       /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
+                       WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
                        if (sk_is_mptcp(sk))
                                mptcpv6_handle_mapped(sk, false);
                        sk->sk_backlog_rcv = tcp_v6_do_rcv;