af_unix: Fix data races around sk->sk_shutdown.
authorKuniyuki Iwashima <kuniyu@amazon.com>
Wed, 10 May 2023 00:34:56 +0000 (17:34 -0700)
committerJakub Kicinski <kuba@kernel.org>
Thu, 11 May 2023 02:06:53 +0000 (19:06 -0700)
KCSAN found a data race around sk->sk_shutdown where unix_release_sock()
and unix_shutdown() update it under unix_state_lock(), OTOH unix_poll()
and unix_dgram_poll() read it locklessly.

We need to annotate the writes and reads with WRITE_ONCE() and READ_ONCE().

BUG: KCSAN: data-race in unix_poll / unix_release_sock

write to 0xffff88800d0f8aec of 1 bytes by task 264 on cpu 0:
 unix_release_sock+0x75c/0x910 net/unix/af_unix.c:631
 unix_release+0x59/0x80 net/unix/af_unix.c:1042
 __sock_release+0x7d/0x170 net/socket.c:653
 sock_close+0x19/0x30 net/socket.c:1397
 __fput+0x179/0x5e0 fs/file_table.c:321
 ____fput+0x15/0x20 fs/file_table.c:349
 task_work_run+0x116/0x1a0 kernel/task_work.c:179
 resume_user_mode_work include/linux/resume_user_mode.h:49 [inline]
 exit_to_user_mode_loop kernel/entry/common.c:171 [inline]
 exit_to_user_mode_prepare+0x174/0x180 kernel/entry/common.c:204
 __syscall_exit_to_user_mode_work kernel/entry/common.c:286 [inline]
 syscall_exit_to_user_mode+0x1a/0x30 kernel/entry/common.c:297
 do_syscall_64+0x4b/0x90 arch/x86/entry/common.c:86
 entry_SYSCALL_64_after_hwframe+0x72/0xdc

read to 0xffff88800d0f8aec of 1 bytes by task 222 on cpu 1:
 unix_poll+0xa3/0x2a0 net/unix/af_unix.c:3170
 sock_poll+0xcf/0x2b0 net/socket.c:1385
 vfs_poll include/linux/poll.h:88 [inline]
 ep_item_poll.isra.0+0x78/0xc0 fs/eventpoll.c:855
 ep_send_events fs/eventpoll.c:1694 [inline]
 ep_poll fs/eventpoll.c:1823 [inline]
 do_epoll_wait+0x6c4/0xea0 fs/eventpoll.c:2258
 __do_sys_epoll_wait fs/eventpoll.c:2270 [inline]
 __se_sys_epoll_wait fs/eventpoll.c:2265 [inline]
 __x64_sys_epoll_wait+0xcc/0x190 fs/eventpoll.c:2265
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x3b/0x90 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x72/0xdc

value changed: 0x00 -> 0x03

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 222 Comm: dbus-broker Not tainted 6.3.0-rc7-02330-gca6270c12e20 #2
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014

Fixes: 3c73419c09a5 ("af_unix: fix 'poll for write'/ connected DGRAM sockets")
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
net/unix/af_unix.c

index 08102e7..cc695c9 100644 (file)
@@ -603,7 +603,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
        /* Clear state */
        unix_state_lock(sk);
        sock_orphan(sk);
-       sk->sk_shutdown = SHUTDOWN_MASK;
+       WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
        path         = u->path;
        u->path.dentry = NULL;
        u->path.mnt = NULL;
@@ -628,7 +628,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
                if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
                        unix_state_lock(skpair);
                        /* No more writes */
-                       skpair->sk_shutdown = SHUTDOWN_MASK;
+                       WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
                        if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
                                WRITE_ONCE(skpair->sk_err, ECONNRESET);
                        unix_state_unlock(skpair);
@@ -3008,7 +3008,7 @@ static int unix_shutdown(struct socket *sock, int mode)
        ++mode;
 
        unix_state_lock(sk);
-       sk->sk_shutdown |= mode;
+       WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
        other = unix_peer(sk);
        if (other)
                sock_hold(other);
@@ -3028,7 +3028,7 @@ static int unix_shutdown(struct socket *sock, int mode)
                if (mode&SEND_SHUTDOWN)
                        peer_mode |= RCV_SHUTDOWN;
                unix_state_lock(other);
-               other->sk_shutdown |= peer_mode;
+               WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
                unix_state_unlock(other);
                other->sk_state_change(other);
                if (peer_mode == SHUTDOWN_MASK)
@@ -3160,16 +3160,18 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
 {
        struct sock *sk = sock->sk;
        __poll_t mask;
+       u8 shutdown;
 
        sock_poll_wait(file, sock, wait);
        mask = 0;
+       shutdown = READ_ONCE(sk->sk_shutdown);
 
        /* exceptional events? */
        if (READ_ONCE(sk->sk_err))
                mask |= EPOLLERR;
-       if (sk->sk_shutdown == SHUTDOWN_MASK)
+       if (shutdown == SHUTDOWN_MASK)
                mask |= EPOLLHUP;
-       if (sk->sk_shutdown & RCV_SHUTDOWN)
+       if (shutdown & RCV_SHUTDOWN)
                mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
 
        /* readable? */
@@ -3203,9 +3205,11 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
        struct sock *sk = sock->sk, *other;
        unsigned int writable;
        __poll_t mask;
+       u8 shutdown;
 
        sock_poll_wait(file, sock, wait);
        mask = 0;
+       shutdown = READ_ONCE(sk->sk_shutdown);
 
        /* exceptional events? */
        if (READ_ONCE(sk->sk_err) ||
@@ -3213,9 +3217,9 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
                mask |= EPOLLERR |
                        (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
 
-       if (sk->sk_shutdown & RCV_SHUTDOWN)
+       if (shutdown & RCV_SHUTDOWN)
                mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
-       if (sk->sk_shutdown == SHUTDOWN_MASK)
+       if (shutdown == SHUTDOWN_MASK)
                mask |= EPOLLHUP;
 
        /* readable? */