af_unix: Fix some data-races around unix_sk(sk)->oob_skb.
authorKuniyuki Iwashima <kuniyu@amazon.co.jp>
Thu, 17 Mar 2022 03:08:08 +0000 (12:08 +0900)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 8 Apr 2022 12:23:41 +0000 (14:23 +0200)
[ Upstream commit e82025c623e2bf04d162bafceb66a59115814479 ]

Out-of-band data automatically places a "mark" showing wherein the
sequence the out-of-band data would have been.  If the out-of-band data
implies cancelling everything sent so far, the "mark" is helpful to flush
them.  When the socket's read pointer reaches the "mark", the ioctl() below
sets a non zero value to the arg `atmark`:

The out-of-band data is queued in sk->sk_receive_queue as well as ordinary
data and also saved in unix_sk(sk)->oob_skb.  It can be used to test if the
head of the receive queue is the out-of-band data meaning the socket is at
the "mark".

While testing that, unix_ioctl() reads unix_sk(sk)->oob_skb locklessly.
Thus, all accesses to oob_skb need some basic protection to avoid
load/store tearing which KCSAN detects when these are called concurrently:

  - ioctl(fd_a, SIOCATMARK, &atmark, sizeof(atmark))
  - send(fd_b_connected_to_a, buf, sizeof(buf), MSG_OOB)

BUG: KCSAN: data-race in unix_ioctl / unix_stream_sendmsg

write to 0xffff888003d9cff0 of 8 bytes by task 175 on cpu 1:
 unix_stream_sendmsg (net/unix/af_unix.c:2087 net/unix/af_unix.c:2191)
 sock_sendmsg (net/socket.c:705 net/socket.c:725)
 __sys_sendto (net/socket.c:2040)
 __x64_sys_sendto (net/socket.c:2048)
 do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
 entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:113)

read to 0xffff888003d9cff0 of 8 bytes by task 176 on cpu 0:
 unix_ioctl (net/unix/af_unix.c:3101 (discriminator 1))
 sock_do_ioctl (net/socket.c:1128)
 sock_ioctl (net/socket.c:1242)
 __x64_sys_ioctl (fs/ioctl.c:52 fs/ioctl.c:874 fs/ioctl.c:860 fs/ioctl.c:860)
 do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
 entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:113)

value changed: 0xffff888003da0c00 -> 0xffff888003da0d00

Reported by Kernel Concurrency Sanitizer on:
CPU: 0 PID: 176 Comm: unix_race_oob_i Not tainted 5.17.0-rc5-59529-g83dc4c2af682 #12
Hardware name: Red Hat KVM, BIOS 1.11.0-2.amzn2 04/01/2014

Fixes: 314001f0bf92 ("af_unix: Add OOB support")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
net/unix/af_unix.c

index b0bfc78..826ac39 100644 (file)
@@ -1996,7 +1996,7 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other
        if (ousk->oob_skb)
                consume_skb(ousk->oob_skb);
 
-       ousk->oob_skb = skb;
+       WRITE_ONCE(ousk->oob_skb, skb);
 
        scm_stat_add(other, skb);
        skb_queue_tail(&other->sk_receive_queue, skb);
@@ -2514,9 +2514,8 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state)
 
        oob_skb = u->oob_skb;
 
-       if (!(state->flags & MSG_PEEK)) {
-               u->oob_skb = NULL;
-       }
+       if (!(state->flags & MSG_PEEK))
+               WRITE_ONCE(u->oob_skb, NULL);
 
        unix_state_unlock(sk);
 
@@ -2551,7 +2550,7 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
                                skb = NULL;
                        } else if (sock_flag(sk, SOCK_URGINLINE)) {
                                if (!(flags & MSG_PEEK)) {
-                                       u->oob_skb = NULL;
+                                       WRITE_ONCE(u->oob_skb, NULL);
                                        consume_skb(skb);
                                }
                        } else if (!(flags & MSG_PEEK)) {
@@ -3006,11 +3005,10 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
        case SIOCATMARK:
                {
                        struct sk_buff *skb;
-                       struct unix_sock *u = unix_sk(sk);
                        int answ = 0;
 
                        skb = skb_peek(&sk->sk_receive_queue);
-                       if (skb && skb == u->oob_skb)
+                       if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
                                answ = 1;
                        err = put_user(answ, (int __user *)arg);
                }