[ Upstream commit
e1d09c2c2f5793474556b60f83900e088d0d366d ]
KCSAN found a data race around sk->sk_shutdown where unix_release_sock()
and unix_shutdown() update it under unix_state_lock(), OTOH unix_poll()
and unix_dgram_poll() read it locklessly.
We need to annotate the writes and reads with WRITE_ONCE() and READ_ONCE().
BUG: KCSAN: data-race in unix_poll / unix_release_sock
write to 0xffff88800d0f8aec of 1 bytes by task 264 on cpu 0:
unix_release_sock+0x75c/0x910 net/unix/af_unix.c:631
unix_release+0x59/0x80 net/unix/af_unix.c:1042
__sock_release+0x7d/0x170 net/socket.c:653
sock_close+0x19/0x30 net/socket.c:1397
__fput+0x179/0x5e0 fs/file_table.c:321
____fput+0x15/0x20 fs/file_table.c:349
task_work_run+0x116/0x1a0 kernel/task_work.c:179
resume_user_mode_work include/linux/resume_user_mode.h:49 [inline]
exit_to_user_mode_loop kernel/entry/common.c:171 [inline]
exit_to_user_mode_prepare+0x174/0x180 kernel/entry/common.c:204
__syscall_exit_to_user_mode_work kernel/entry/common.c:286 [inline]
syscall_exit_to_user_mode+0x1a/0x30 kernel/entry/common.c:297
do_syscall_64+0x4b/0x90 arch/x86/entry/common.c:86
entry_SYSCALL_64_after_hwframe+0x72/0xdc
read to 0xffff88800d0f8aec of 1 bytes by task 222 on cpu 1:
unix_poll+0xa3/0x2a0 net/unix/af_unix.c:3170
sock_poll+0xcf/0x2b0 net/socket.c:1385
vfs_poll include/linux/poll.h:88 [inline]
ep_item_poll.isra.0+0x78/0xc0 fs/eventpoll.c:855
ep_send_events fs/eventpoll.c:1694 [inline]
ep_poll fs/eventpoll.c:1823 [inline]
do_epoll_wait+0x6c4/0xea0 fs/eventpoll.c:2258
__do_sys_epoll_wait fs/eventpoll.c:2270 [inline]
__se_sys_epoll_wait fs/eventpoll.c:2265 [inline]
__x64_sys_epoll_wait+0xcc/0x190 fs/eventpoll.c:2265
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x3b/0x90 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x72/0xdc
value changed: 0x00 -> 0x03
Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 222 Comm: dbus-broker Not tainted
6.3.0-rc7-02330-gca6270c12e20 #2
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
Fixes: 2c1dcdb2f616 ("af_unix: fix 'poll for write'/ connected DGRAM sockets")
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
/* Clear state */
unix_state_lock(sk);
sock_orphan(sk);
- sk->sk_shutdown = SHUTDOWN_MASK;
+ WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
path = u->path;
u->path.dentry = NULL;
u->path.mnt = NULL;
if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
unix_state_lock(skpair);
/* No more writes */
- skpair->sk_shutdown = SHUTDOWN_MASK;
+ WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
skpair->sk_err = ECONNRESET;
unix_state_unlock(skpair);
++mode;
unix_state_lock(sk);
- sk->sk_shutdown |= mode;
+ WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
other = unix_peer(sk);
if (other)
sock_hold(other);
if (mode&SEND_SHUTDOWN)
peer_mode |= RCV_SHUTDOWN;
unix_state_lock(other);
- other->sk_shutdown |= peer_mode;
+ WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
unix_state_unlock(other);
other->sk_state_change(other);
if (peer_mode == SHUTDOWN_MASK)
{
struct sock *sk = sock->sk;
__poll_t mask;
+ u8 shutdown;
sock_poll_wait(file, sock, wait);
mask = 0;
+ shutdown = READ_ONCE(sk->sk_shutdown);
/* exceptional events? */
if (sk->sk_err)
mask |= EPOLLERR;
- if (sk->sk_shutdown == SHUTDOWN_MASK)
+ if (shutdown == SHUTDOWN_MASK)
mask |= EPOLLHUP;
- if (sk->sk_shutdown & RCV_SHUTDOWN)
+ if (shutdown & RCV_SHUTDOWN)
mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
/* readable? */
struct sock *sk = sock->sk, *other;
unsigned int writable;
__poll_t mask;
+ u8 shutdown;
sock_poll_wait(file, sock, wait);
mask = 0;
+ shutdown = READ_ONCE(sk->sk_shutdown);
/* exceptional events? */
if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
mask |= EPOLLERR |
(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
- if (sk->sk_shutdown & RCV_SHUTDOWN)
+ if (shutdown & RCV_SHUTDOWN)
mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
- if (sk->sk_shutdown == SHUTDOWN_MASK)
+ if (shutdown == SHUTDOWN_MASK)
mask |= EPOLLHUP;
/* readable? */