1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * NET4: Implementation of BSD Unix domain sockets.
5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
8 * Linus Torvalds : Assorted bug cures.
9 * Niibe Yutaka : async I/O support.
10 * Carsten Paeth : PF_UNIX check, address fixes.
11 * Alan Cox : Limit size of allocated blocks.
12 * Alan Cox : Fixed the stupid socketpair bug.
13 * Alan Cox : BSD compatibility fine tuning.
14 * Alan Cox : Fixed a bug in connect when interrupted.
15 * Alan Cox : Sorted out a proper draft version of
16 * file descriptor passing hacked up from
18 * Marty Leisner : Fixes to fd passing
19 * Nick Nevin : recvmsg bugfix.
20 * Alan Cox : Started proper garbage collector
21 * Heiko EiBfeldt : Missing verify_area check
22 * Alan Cox : Started POSIXisms
23 * Andreas Schwab : Replace inode by dentry for proper
25 * Kirk Petersen : Made this a module
26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
29 * by above two patches.
30 * Andrea Arcangeli : If possible we block in connect(2)
31 * if the max backlog of the listen socket
32 * is been reached. This won't break
33 * old apps and it will avoid huge amount
34 * of socks hashed (this for unix_gc()
35 * performances reasons).
36 * Security fix that limits the max
37 * number of socks to 2*max_files and
38 * the number of skb queueable in the
40 * Artur Skawina : Hash function optimizations
41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
42 * Malcolm Beattie : Set peercred for socketpair
43 * Michal Ostrowski : Module initialization cleanup.
44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
45 * the core infrastructure is doing that
46 * for all net proto families now (2.5.69+)
48 * Known differences from reference BSD that was tested:
51 * ECONNREFUSED is not returned from one end of a connected() socket to the
52 * other the moment one end closes.
53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
54 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
56 * accept() returns a path name even if the connecting socket has closed
57 * in the meantime (BSD loses the path and gives up).
58 * accept() returns 0 length path for an unbound connector. BSD returns 16
59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
61 * BSD af_unix apparently has connect forgetting to block properly.
62 * (need to check this with the POSIX spec in detail)
64 * Differences from 2.0.0-11-... (ANK)
65 * Bug fixes and improvements.
66 * - client shutdown killed server socket.
67 * - removed all useless cli/sti pairs.
69 * Semantic changes/extensions.
70 * - generic control message passing.
71 * - SCM_CREDENTIALS control message.
72 * - "Abstract" (not FS based) socket bindings.
73 * Abstract names are sequences of bytes (not zero terminated)
74 * started by 0, so that this name space does not intersect
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
80 #include <linux/module.h>
81 #include <linux/kernel.h>
82 #include <linux/signal.h>
83 #include <linux/sched/signal.h>
84 #include <linux/errno.h>
85 #include <linux/string.h>
86 #include <linux/stat.h>
87 #include <linux/dcache.h>
88 #include <linux/namei.h>
89 #include <linux/socket.h>
91 #include <linux/fcntl.h>
92 #include <linux/termios.h>
93 #include <linux/sockios.h>
94 #include <linux/net.h>
97 #include <linux/slab.h>
98 #include <linux/uaccess.h>
99 #include <linux/skbuff.h>
100 #include <linux/netdevice.h>
101 #include <net/net_namespace.h>
102 #include <net/sock.h>
103 #include <net/tcp_states.h>
104 #include <net/af_unix.h>
105 #include <linux/proc_fs.h>
106 #include <linux/seq_file.h>
108 #include <linux/init.h>
109 #include <linux/poll.h>
110 #include <linux/rtnetlink.h>
111 #include <linux/mount.h>
112 #include <net/checksum.h>
113 #include <linux/security.h>
114 #include <linux/freezer.h>
115 #include <linux/file.h>
119 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
120 EXPORT_SYMBOL_GPL(unix_socket_table);
121 DEFINE_SPINLOCK(unix_table_lock);
122 EXPORT_SYMBOL_GPL(unix_table_lock);
123 static atomic_long_t unix_nr_socks;
126 static struct hlist_head *unix_sockets_unbound(void *addr)
128 unsigned long hash = (unsigned long)addr;
132 hash %= UNIX_HASH_SIZE;
133 return &unix_socket_table[UNIX_HASH_SIZE + hash];
136 #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
138 #ifdef CONFIG_SECURITY_NETWORK
139 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
141 UNIXCB(skb).secid = scm->secid;
144 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
146 scm->secid = UNIXCB(skb).secid;
149 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
151 return (scm->secid == UNIXCB(skb).secid);
154 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
157 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
160 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
164 #endif /* CONFIG_SECURITY_NETWORK */
167 * SMP locking strategy:
168 * hash table is protected with spinlock unix_table_lock
169 * each socket state is protected by separate spin lock.
172 static inline unsigned int unix_hash_fold(__wsum n)
174 unsigned int hash = (__force unsigned int)csum_fold(n);
177 return hash&(UNIX_HASH_SIZE-1);
180 #define unix_peer(sk) (unix_sk(sk)->peer)
182 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
184 return unix_peer(osk) == sk;
187 static inline int unix_may_send(struct sock *sk, struct sock *osk)
189 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
192 static inline int unix_recvq_full(const struct sock *sk)
194 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
197 static inline int unix_recvq_full_lockless(const struct sock *sk)
199 return skb_queue_len_lockless(&sk->sk_receive_queue) >
200 READ_ONCE(sk->sk_max_ack_backlog);
203 struct sock *unix_peer_get(struct sock *s)
211 unix_state_unlock(s);
214 EXPORT_SYMBOL_GPL(unix_peer_get);
216 static inline void unix_release_addr(struct unix_address *addr)
218 if (refcount_dec_and_test(&addr->refcnt))
223 * Check unix socket name:
224 * - should be not zero length.
225 * - if started by not zero, should be NULL terminated (FS object)
226 * - if started by zero, it is abstract name.
229 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
233 if (len <= sizeof(short) || len > sizeof(*sunaddr))
235 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
237 if (sunaddr->sun_path[0]) {
239 * This may look like an off by one error but it is a bit more
240 * subtle. 108 is the longest valid AF_UNIX path for a binding.
241 * sun_path[108] doesn't as such exist. However in kernel space
242 * we are guaranteed that it is a valid memory location in our
243 * kernel address buffer.
245 ((char *)sunaddr)[len] = 0;
246 len = strlen(sunaddr->sun_path)+1+sizeof(short);
250 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
254 static void __unix_remove_socket(struct sock *sk)
256 sk_del_node_init(sk);
259 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
261 WARN_ON(!sk_unhashed(sk));
262 sk_add_node(sk, list);
265 static inline void unix_remove_socket(struct sock *sk)
267 spin_lock(&unix_table_lock);
268 __unix_remove_socket(sk);
269 spin_unlock(&unix_table_lock);
272 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
274 spin_lock(&unix_table_lock);
275 __unix_insert_socket(list, sk);
276 spin_unlock(&unix_table_lock);
279 static struct sock *__unix_find_socket_byname(struct net *net,
280 struct sockaddr_un *sunname,
281 int len, int type, unsigned int hash)
285 sk_for_each(s, &unix_socket_table[hash ^ type]) {
286 struct unix_sock *u = unix_sk(s);
288 if (!net_eq(sock_net(s), net))
291 if (u->addr->len == len &&
292 !memcmp(u->addr->name, sunname, len))
298 static inline struct sock *unix_find_socket_byname(struct net *net,
299 struct sockaddr_un *sunname,
305 spin_lock(&unix_table_lock);
306 s = __unix_find_socket_byname(net, sunname, len, type, hash);
309 spin_unlock(&unix_table_lock);
313 static struct sock *unix_find_socket_byinode(struct inode *i)
317 spin_lock(&unix_table_lock);
319 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
320 struct dentry *dentry = unix_sk(s)->path.dentry;
322 if (dentry && d_backing_inode(dentry) == i) {
329 spin_unlock(&unix_table_lock);
333 /* Support code for asymmetrically connected dgram sockets
335 * If a datagram socket is connected to a socket not itself connected
336 * to the first socket (eg, /dev/log), clients may only enqueue more
337 * messages if the present receive queue of the server socket is not
338 * "too large". This means there's a second writeability condition
339 * poll and sendmsg need to test. The dgram recv code will do a wake
340 * up on the peer_wait wait queue of a socket upon reception of a
341 * datagram which needs to be propagated to sleeping would-be writers
342 * since these might not have sent anything so far. This can't be
343 * accomplished via poll_wait because the lifetime of the server
344 * socket might be less than that of its clients if these break their
345 * association with it or if the server socket is closed while clients
346 * are still connected to it and there's no way to inform "a polling
347 * implementation" that it should let go of a certain wait queue
349 * In order to propagate a wake up, a wait_queue_entry_t of the client
350 * socket is enqueued on the peer_wait queue of the server socket
351 * whose wake function does a wake_up on the ordinary client socket
352 * wait queue. This connection is established whenever a write (or
353 * poll for write) hit the flow control condition and broken when the
354 * association to the server socket is dissolved or after a wake up
358 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
362 wait_queue_head_t *u_sleep;
364 u = container_of(q, struct unix_sock, peer_wake);
366 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
368 u->peer_wake.private = NULL;
370 /* relaying can only happen while the wq still exists */
371 u_sleep = sk_sleep(&u->sk);
373 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
378 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
380 struct unix_sock *u, *u_other;
384 u_other = unix_sk(other);
386 spin_lock(&u_other->peer_wait.lock);
388 if (!u->peer_wake.private) {
389 u->peer_wake.private = other;
390 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
395 spin_unlock(&u_other->peer_wait.lock);
399 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
402 struct unix_sock *u, *u_other;
405 u_other = unix_sk(other);
406 spin_lock(&u_other->peer_wait.lock);
408 if (u->peer_wake.private == other) {
409 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
410 u->peer_wake.private = NULL;
413 spin_unlock(&u_other->peer_wait.lock);
416 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
419 unix_dgram_peer_wake_disconnect(sk, other);
420 wake_up_interruptible_poll(sk_sleep(sk),
427 * - unix_peer(sk) == other
428 * - association is stable
430 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
434 connected = unix_dgram_peer_wake_connect(sk, other);
436 /* If other is SOCK_DEAD, we want to make sure we signal
437 * POLLOUT, such that a subsequent write() can get a
438 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
439 * to other and its full, we will hang waiting for POLLOUT.
441 if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
445 unix_dgram_peer_wake_disconnect(sk, other);
450 static int unix_writable(const struct sock *sk)
452 return sk->sk_state != TCP_LISTEN &&
453 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
456 static void unix_write_space(struct sock *sk)
458 struct socket_wq *wq;
461 if (unix_writable(sk)) {
462 wq = rcu_dereference(sk->sk_wq);
463 if (skwq_has_sleeper(wq))
464 wake_up_interruptible_sync_poll(&wq->wait,
465 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
466 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
471 /* When dgram socket disconnects (or changes its peer), we clear its receive
472 * queue of packets arrived from previous peer. First, it allows to do
473 * flow control based only on wmem_alloc; second, sk connected to peer
474 * may receive messages only from that peer. */
475 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
477 if (!skb_queue_empty(&sk->sk_receive_queue)) {
478 skb_queue_purge(&sk->sk_receive_queue);
479 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
481 /* If one link of bidirectional dgram pipe is disconnected,
482 * we signal error. Messages are lost. Do not make this,
483 * when peer was not connected to us.
485 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
486 other->sk_err = ECONNRESET;
487 other->sk_error_report(other);
492 static void unix_sock_destructor(struct sock *sk)
494 struct unix_sock *u = unix_sk(sk);
496 skb_queue_purge(&sk->sk_receive_queue);
498 WARN_ON(refcount_read(&sk->sk_wmem_alloc));
499 WARN_ON(!sk_unhashed(sk));
500 WARN_ON(sk->sk_socket);
501 if (!sock_flag(sk, SOCK_DEAD)) {
502 pr_info("Attempt to release alive unix socket: %p\n", sk);
507 unix_release_addr(u->addr);
509 atomic_long_dec(&unix_nr_socks);
511 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
513 #ifdef UNIX_REFCNT_DEBUG
514 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
515 atomic_long_read(&unix_nr_socks));
519 static void unix_release_sock(struct sock *sk, int embrion)
521 struct unix_sock *u = unix_sk(sk);
527 unix_remove_socket(sk);
532 sk->sk_shutdown = SHUTDOWN_MASK;
534 u->path.dentry = NULL;
536 state = sk->sk_state;
537 sk->sk_state = TCP_CLOSE;
538 unix_state_unlock(sk);
540 wake_up_interruptible_all(&u->peer_wait);
542 skpair = unix_peer(sk);
544 if (skpair != NULL) {
545 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
546 unix_state_lock(skpair);
548 skpair->sk_shutdown = SHUTDOWN_MASK;
549 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
550 skpair->sk_err = ECONNRESET;
551 unix_state_unlock(skpair);
552 skpair->sk_state_change(skpair);
553 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
556 unix_dgram_peer_wake_disconnect(sk, skpair);
557 sock_put(skpair); /* It may now die */
558 unix_peer(sk) = NULL;
561 /* Try to flush out this socket. Throw out buffers at least */
563 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
564 if (state == TCP_LISTEN)
565 unix_release_sock(skb->sk, 1);
566 /* passed fds are erased in the kfree_skb hook */
567 UNIXCB(skb).consumed = skb->len;
576 /* ---- Socket is dead now and most probably destroyed ---- */
579 * Fixme: BSD difference: In BSD all sockets connected to us get
580 * ECONNRESET and we die on the spot. In Linux we behave
581 * like files and pipes do and wait for the last
584 * Can't we simply set sock->err?
586 * What the above comment does talk about? --ANK(980817)
589 if (unix_tot_inflight)
590 unix_gc(); /* Garbage collect fds */
593 static void init_peercred(struct sock *sk)
595 put_pid(sk->sk_peer_pid);
596 if (sk->sk_peer_cred)
597 put_cred(sk->sk_peer_cred);
598 sk->sk_peer_pid = get_pid(task_tgid(current));
599 sk->sk_peer_cred = get_current_cred();
602 static void copy_peercred(struct sock *sk, struct sock *peersk)
604 put_pid(sk->sk_peer_pid);
605 if (sk->sk_peer_cred)
606 put_cred(sk->sk_peer_cred);
607 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
608 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
611 static int unix_listen(struct socket *sock, int backlog)
614 struct sock *sk = sock->sk;
615 struct unix_sock *u = unix_sk(sk);
616 struct pid *old_pid = NULL;
619 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
620 goto out; /* Only stream/seqpacket sockets accept */
623 goto out; /* No listens on an unbound socket */
625 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
627 if (backlog > sk->sk_max_ack_backlog)
628 wake_up_interruptible_all(&u->peer_wait);
629 sk->sk_max_ack_backlog = backlog;
630 sk->sk_state = TCP_LISTEN;
631 /* set credentials so connect can copy them */
636 unix_state_unlock(sk);
642 static int unix_release(struct socket *);
643 static int unix_bind(struct socket *, struct sockaddr *, int);
644 static int unix_stream_connect(struct socket *, struct sockaddr *,
645 int addr_len, int flags);
646 static int unix_socketpair(struct socket *, struct socket *);
647 static int unix_accept(struct socket *, struct socket *, int, bool);
648 static int unix_getname(struct socket *, struct sockaddr *, int);
649 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
650 static __poll_t unix_dgram_poll(struct file *, struct socket *,
652 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
654 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
656 static int unix_shutdown(struct socket *, int);
657 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
658 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
659 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
660 size_t size, int flags);
661 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
662 struct pipe_inode_info *, size_t size,
664 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
665 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
666 static int unix_dgram_connect(struct socket *, struct sockaddr *,
668 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
669 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
672 static int unix_set_peek_off(struct sock *sk, int val)
674 struct unix_sock *u = unix_sk(sk);
676 if (mutex_lock_interruptible(&u->iolock))
679 sk->sk_peek_off = val;
680 mutex_unlock(&u->iolock);
685 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
687 struct sock *sk = sock->sk;
691 u = unix_sk(sock->sk);
692 seq_printf(m, "scm_fds: %u\n", READ_ONCE(u->scm_stat.nr_fds));
696 static const struct proto_ops unix_stream_ops = {
698 .owner = THIS_MODULE,
699 .release = unix_release,
701 .connect = unix_stream_connect,
702 .socketpair = unix_socketpair,
703 .accept = unix_accept,
704 .getname = unix_getname,
708 .compat_ioctl = unix_compat_ioctl,
710 .listen = unix_listen,
711 .shutdown = unix_shutdown,
712 .setsockopt = sock_no_setsockopt,
713 .getsockopt = sock_no_getsockopt,
714 .sendmsg = unix_stream_sendmsg,
715 .recvmsg = unix_stream_recvmsg,
716 .mmap = sock_no_mmap,
717 .sendpage = unix_stream_sendpage,
718 .splice_read = unix_stream_splice_read,
719 .set_peek_off = unix_set_peek_off,
720 .show_fdinfo = unix_show_fdinfo,
723 static const struct proto_ops unix_dgram_ops = {
725 .owner = THIS_MODULE,
726 .release = unix_release,
728 .connect = unix_dgram_connect,
729 .socketpair = unix_socketpair,
730 .accept = sock_no_accept,
731 .getname = unix_getname,
732 .poll = unix_dgram_poll,
735 .compat_ioctl = unix_compat_ioctl,
737 .listen = sock_no_listen,
738 .shutdown = unix_shutdown,
739 .setsockopt = sock_no_setsockopt,
740 .getsockopt = sock_no_getsockopt,
741 .sendmsg = unix_dgram_sendmsg,
742 .recvmsg = unix_dgram_recvmsg,
743 .mmap = sock_no_mmap,
744 .sendpage = sock_no_sendpage,
745 .set_peek_off = unix_set_peek_off,
746 .show_fdinfo = unix_show_fdinfo,
749 static const struct proto_ops unix_seqpacket_ops = {
751 .owner = THIS_MODULE,
752 .release = unix_release,
754 .connect = unix_stream_connect,
755 .socketpair = unix_socketpair,
756 .accept = unix_accept,
757 .getname = unix_getname,
758 .poll = unix_dgram_poll,
761 .compat_ioctl = unix_compat_ioctl,
763 .listen = unix_listen,
764 .shutdown = unix_shutdown,
765 .setsockopt = sock_no_setsockopt,
766 .getsockopt = sock_no_getsockopt,
767 .sendmsg = unix_seqpacket_sendmsg,
768 .recvmsg = unix_seqpacket_recvmsg,
769 .mmap = sock_no_mmap,
770 .sendpage = sock_no_sendpage,
771 .set_peek_off = unix_set_peek_off,
772 .show_fdinfo = unix_show_fdinfo,
775 static struct proto unix_proto = {
777 .owner = THIS_MODULE,
778 .obj_size = sizeof(struct unix_sock),
781 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
783 struct sock *sk = NULL;
786 atomic_long_inc(&unix_nr_socks);
787 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
790 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
794 sock_init_data(sock, sk);
796 sk->sk_allocation = GFP_KERNEL_ACCOUNT;
797 sk->sk_write_space = unix_write_space;
798 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
799 sk->sk_destruct = unix_sock_destructor;
801 u->path.dentry = NULL;
803 spin_lock_init(&u->lock);
804 atomic_long_set(&u->inflight, 0);
805 INIT_LIST_HEAD(&u->link);
806 mutex_init(&u->iolock); /* single task reading lock */
807 mutex_init(&u->bindlock); /* single task binding lock */
808 init_waitqueue_head(&u->peer_wait);
809 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
810 memset(&u->scm_stat, 0, sizeof(struct scm_stat));
811 unix_insert_socket(unix_sockets_unbound(sk), sk);
814 atomic_long_dec(&unix_nr_socks);
817 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
823 static int unix_create(struct net *net, struct socket *sock, int protocol,
826 if (protocol && protocol != PF_UNIX)
827 return -EPROTONOSUPPORT;
829 sock->state = SS_UNCONNECTED;
831 switch (sock->type) {
833 sock->ops = &unix_stream_ops;
836 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
840 sock->type = SOCK_DGRAM;
843 sock->ops = &unix_dgram_ops;
846 sock->ops = &unix_seqpacket_ops;
849 return -ESOCKTNOSUPPORT;
852 return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
855 static int unix_release(struct socket *sock)
857 struct sock *sk = sock->sk;
862 unix_release_sock(sk, 0);
868 static int unix_autobind(struct socket *sock)
870 struct sock *sk = sock->sk;
871 struct net *net = sock_net(sk);
872 struct unix_sock *u = unix_sk(sk);
873 static u32 ordernum = 1;
874 struct unix_address *addr;
876 unsigned int retries = 0;
878 err = mutex_lock_interruptible(&u->bindlock);
887 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
891 addr->name->sun_family = AF_UNIX;
892 refcount_set(&addr->refcnt, 1);
895 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
896 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
898 spin_lock(&unix_table_lock);
899 ordernum = (ordernum+1)&0xFFFFF;
901 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
903 spin_unlock(&unix_table_lock);
905 * __unix_find_socket_byname() may take long time if many names
906 * are already in use.
909 /* Give up if all names seems to be in use. */
910 if (retries++ == 0xFFFFF) {
917 addr->hash ^= sk->sk_type;
919 __unix_remove_socket(sk);
920 smp_store_release(&u->addr, addr);
921 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
922 spin_unlock(&unix_table_lock);
925 out: mutex_unlock(&u->bindlock);
929 static struct sock *unix_find_other(struct net *net,
930 struct sockaddr_un *sunname, int len,
931 int type, unsigned int hash, int *error)
937 if (sunname->sun_path[0]) {
939 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
942 inode = d_backing_inode(path.dentry);
943 err = inode_permission(inode, MAY_WRITE);
948 if (!S_ISSOCK(inode->i_mode))
950 u = unix_find_socket_byinode(inode);
954 if (u->sk_type == type)
960 if (u->sk_type != type) {
966 u = unix_find_socket_byname(net, sunname, len, type, hash);
968 struct dentry *dentry;
969 dentry = unix_sk(u)->path.dentry;
971 touch_atime(&unix_sk(u)->path);
984 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
986 struct dentry *dentry;
990 * Get the parent directory, calculate the hash for last
993 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
994 err = PTR_ERR(dentry);
999 * All right, let's create it.
1001 err = security_path_mknod(&path, dentry, mode, 0);
1003 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
1005 res->mnt = mntget(path.mnt);
1006 res->dentry = dget(dentry);
1009 done_path_create(&path, dentry);
1013 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1015 struct sock *sk = sock->sk;
1016 struct net *net = sock_net(sk);
1017 struct unix_sock *u = unix_sk(sk);
1018 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1019 char *sun_path = sunaddr->sun_path;
1022 struct unix_address *addr;
1023 struct hlist_head *list;
1024 struct path path = { };
1027 if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1028 sunaddr->sun_family != AF_UNIX)
1031 if (addr_len == sizeof(short)) {
1032 err = unix_autobind(sock);
1036 err = unix_mkname(sunaddr, addr_len, &hash);
1042 umode_t mode = S_IFSOCK |
1043 (SOCK_INODE(sock)->i_mode & ~current_umask());
1044 err = unix_mknod(sun_path, mode, &path);
1052 err = mutex_lock_interruptible(&u->bindlock);
1061 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1065 memcpy(addr->name, sunaddr, addr_len);
1066 addr->len = addr_len;
1067 addr->hash = hash ^ sk->sk_type;
1068 refcount_set(&addr->refcnt, 1);
1071 addr->hash = UNIX_HASH_SIZE;
1072 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1073 spin_lock(&unix_table_lock);
1075 list = &unix_socket_table[hash];
1077 spin_lock(&unix_table_lock);
1079 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1080 sk->sk_type, hash)) {
1081 unix_release_addr(addr);
1085 list = &unix_socket_table[addr->hash];
1089 __unix_remove_socket(sk);
1090 smp_store_release(&u->addr, addr);
1091 __unix_insert_socket(list, sk);
1094 spin_unlock(&unix_table_lock);
1096 mutex_unlock(&u->bindlock);
1104 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1106 if (unlikely(sk1 == sk2) || !sk2) {
1107 unix_state_lock(sk1);
1111 unix_state_lock(sk1);
1112 unix_state_lock_nested(sk2);
1114 unix_state_lock(sk2);
1115 unix_state_lock_nested(sk1);
1119 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1121 if (unlikely(sk1 == sk2) || !sk2) {
1122 unix_state_unlock(sk1);
1125 unix_state_unlock(sk1);
1126 unix_state_unlock(sk2);
1129 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1130 int alen, int flags)
1132 struct sock *sk = sock->sk;
1133 struct net *net = sock_net(sk);
1134 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1140 if (alen < offsetofend(struct sockaddr, sa_family))
1143 if (addr->sa_family != AF_UNSPEC) {
1144 err = unix_mkname(sunaddr, alen, &hash);
1149 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1150 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1154 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1158 unix_state_double_lock(sk, other);
1160 /* Apparently VFS overslept socket death. Retry. */
1161 if (sock_flag(other, SOCK_DEAD)) {
1162 unix_state_double_unlock(sk, other);
1168 if (!unix_may_send(sk, other))
1171 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1177 * 1003.1g breaking connected state with AF_UNSPEC
1180 unix_state_double_lock(sk, other);
1184 * If it was connected, reconnect.
1186 if (unix_peer(sk)) {
1187 struct sock *old_peer = unix_peer(sk);
1188 unix_peer(sk) = other;
1189 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1191 unix_state_double_unlock(sk, other);
1193 if (other != old_peer)
1194 unix_dgram_disconnected(sk, old_peer);
1197 unix_peer(sk) = other;
1198 unix_state_double_unlock(sk, other);
1203 unix_state_double_unlock(sk, other);
1209 static long unix_wait_for_peer(struct sock *other, long timeo)
1211 struct unix_sock *u = unix_sk(other);
1215 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1217 sched = !sock_flag(other, SOCK_DEAD) &&
1218 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1219 unix_recvq_full(other);
1221 unix_state_unlock(other);
1224 timeo = schedule_timeout(timeo);
1226 finish_wait(&u->peer_wait, &wait);
1230 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1231 int addr_len, int flags)
1233 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1234 struct sock *sk = sock->sk;
1235 struct net *net = sock_net(sk);
1236 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1237 struct sock *newsk = NULL;
1238 struct sock *other = NULL;
1239 struct sk_buff *skb = NULL;
1245 err = unix_mkname(sunaddr, addr_len, &hash);
1250 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1251 (err = unix_autobind(sock)) != 0)
1254 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1256 /* First of all allocate resources.
1257 If we will make it after state is locked,
1258 we will have to recheck all again in any case.
1263 /* create new sock for complete connection */
1264 newsk = unix_create1(sock_net(sk), NULL, 0);
1268 /* Allocate skb for sending to listening sock */
1269 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1274 /* Find listening sock. */
1275 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1279 /* Latch state of peer */
1280 unix_state_lock(other);
1282 /* Apparently VFS overslept socket death. Retry. */
1283 if (sock_flag(other, SOCK_DEAD)) {
1284 unix_state_unlock(other);
1289 err = -ECONNREFUSED;
1290 if (other->sk_state != TCP_LISTEN)
1292 if (other->sk_shutdown & RCV_SHUTDOWN)
1295 if (unix_recvq_full(other)) {
1300 timeo = unix_wait_for_peer(other, timeo);
1302 err = sock_intr_errno(timeo);
1303 if (signal_pending(current))
1311 It is tricky place. We need to grab our state lock and cannot
1312 drop lock on peer. It is dangerous because deadlock is
1313 possible. Connect to self case and simultaneous
1314 attempt to connect are eliminated by checking socket
1315 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1316 check this before attempt to grab lock.
1318 Well, and we have to recheck the state after socket locked.
1324 /* This is ok... continue with connect */
1326 case TCP_ESTABLISHED:
1327 /* Socket is already connected */
1335 unix_state_lock_nested(sk);
1337 if (sk->sk_state != st) {
1338 unix_state_unlock(sk);
1339 unix_state_unlock(other);
1344 err = security_unix_stream_connect(sk, other, newsk);
1346 unix_state_unlock(sk);
1350 /* The way is open! Fastly set all the necessary fields... */
1353 unix_peer(newsk) = sk;
1354 newsk->sk_state = TCP_ESTABLISHED;
1355 newsk->sk_type = sk->sk_type;
1356 init_peercred(newsk);
1357 newu = unix_sk(newsk);
1358 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1359 otheru = unix_sk(other);
1361 /* copy address information from listening to new sock
1363 * The contents of *(otheru->addr) and otheru->path
1364 * are seen fully set up here, since we have found
1365 * otheru in hash under unix_table_lock. Insertion
1366 * into the hash chain we'd found it in had been done
1367 * in an earlier critical area protected by unix_table_lock,
1368 * the same one where we'd set *(otheru->addr) contents,
1369 * as well as otheru->path and otheru->addr itself.
1371 * Using smp_store_release() here to set newu->addr
1372 * is enough to make those stores, as well as stores
1373 * to newu->path visible to anyone who gets newu->addr
1374 * by smp_load_acquire(). IOW, the same warranties
1375 * as for unix_sock instances bound in unix_bind() or
1376 * in unix_autobind().
1378 if (otheru->path.dentry) {
1379 path_get(&otheru->path);
1380 newu->path = otheru->path;
1382 refcount_inc(&otheru->addr->refcnt);
1383 smp_store_release(&newu->addr, otheru->addr);
1385 /* Set credentials */
1386 copy_peercred(sk, other);
1388 sock->state = SS_CONNECTED;
1389 sk->sk_state = TCP_ESTABLISHED;
1392 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1393 unix_peer(sk) = newsk;
1395 unix_state_unlock(sk);
1397 /* take ten and and send info to listening sock */
1398 spin_lock(&other->sk_receive_queue.lock);
1399 __skb_queue_tail(&other->sk_receive_queue, skb);
1400 spin_unlock(&other->sk_receive_queue.lock);
1401 unix_state_unlock(other);
1402 other->sk_data_ready(other);
1408 unix_state_unlock(other);
1413 unix_release_sock(newsk, 0);
1419 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1421 struct sock *ska = socka->sk, *skb = sockb->sk;
1423 /* Join our sockets back to back */
1426 unix_peer(ska) = skb;
1427 unix_peer(skb) = ska;
1431 if (ska->sk_type != SOCK_DGRAM) {
1432 ska->sk_state = TCP_ESTABLISHED;
1433 skb->sk_state = TCP_ESTABLISHED;
1434 socka->state = SS_CONNECTED;
1435 sockb->state = SS_CONNECTED;
1440 static void unix_sock_inherit_flags(const struct socket *old,
1443 if (test_bit(SOCK_PASSCRED, &old->flags))
1444 set_bit(SOCK_PASSCRED, &new->flags);
1445 if (test_bit(SOCK_PASSSEC, &old->flags))
1446 set_bit(SOCK_PASSSEC, &new->flags);
1449 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1452 struct sock *sk = sock->sk;
1454 struct sk_buff *skb;
1458 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1462 if (sk->sk_state != TCP_LISTEN)
1465 /* If socket state is TCP_LISTEN it cannot change (for now...),
1466 * so that no locks are necessary.
1469 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1471 /* This means receive shutdown. */
1478 skb_free_datagram(sk, skb);
1479 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1481 /* attach accepted sock to socket */
1482 unix_state_lock(tsk);
1483 newsock->state = SS_CONNECTED;
1484 unix_sock_inherit_flags(sock, newsock);
1485 sock_graft(tsk, newsock);
1486 unix_state_unlock(tsk);
1494 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1496 struct sock *sk = sock->sk;
1497 struct unix_address *addr;
1498 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1502 sk = unix_peer_get(sk);
1512 addr = smp_load_acquire(&unix_sk(sk)->addr);
1514 sunaddr->sun_family = AF_UNIX;
1515 sunaddr->sun_path[0] = 0;
1516 err = sizeof(short);
1519 memcpy(sunaddr, addr->name, addr->len);
1526 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1530 UNIXCB(skb).pid = get_pid(scm->pid);
1531 UNIXCB(skb).uid = scm->creds.uid;
1532 UNIXCB(skb).gid = scm->creds.gid;
1533 UNIXCB(skb).fp = NULL;
1534 unix_get_secdata(scm, skb);
1535 if (scm->fp && send_fds)
1536 err = unix_attach_fds(scm, skb);
1538 skb->destructor = unix_destruct_scm;
1542 static bool unix_passcred_enabled(const struct socket *sock,
1543 const struct sock *other)
1545 return test_bit(SOCK_PASSCRED, &sock->flags) ||
1546 !other->sk_socket ||
1547 test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1551 * Some apps rely on write() giving SCM_CREDENTIALS
1552 * We include credentials if source or destination socket
1553 * asserted SOCK_PASSCRED.
1555 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1556 const struct sock *other)
1558 if (UNIXCB(skb).pid)
1560 if (unix_passcred_enabled(sock, other)) {
1561 UNIXCB(skb).pid = get_pid(task_tgid(current));
1562 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1566 static int maybe_init_creds(struct scm_cookie *scm,
1567 struct socket *socket,
1568 const struct sock *other)
1571 struct msghdr msg = { .msg_controllen = 0 };
1573 err = scm_send(socket, &msg, scm, false);
1577 if (unix_passcred_enabled(socket, other)) {
1578 scm->pid = get_pid(task_tgid(current));
1579 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1584 static bool unix_skb_scm_eq(struct sk_buff *skb,
1585 struct scm_cookie *scm)
1587 const struct unix_skb_parms *u = &UNIXCB(skb);
1589 return u->pid == scm->pid &&
1590 uid_eq(u->uid, scm->creds.uid) &&
1591 gid_eq(u->gid, scm->creds.gid) &&
1592 unix_secdata_eq(scm, skb);
1595 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1597 struct scm_fp_list *fp = UNIXCB(skb).fp;
1598 struct unix_sock *u = unix_sk(sk);
1600 lockdep_assert_held(&sk->sk_receive_queue.lock);
1602 if (unlikely(fp && fp->count))
1603 u->scm_stat.nr_fds += fp->count;
1606 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1608 struct scm_fp_list *fp = UNIXCB(skb).fp;
1609 struct unix_sock *u = unix_sk(sk);
1611 lockdep_assert_held(&sk->sk_receive_queue.lock);
1613 if (unlikely(fp && fp->count))
1614 u->scm_stat.nr_fds -= fp->count;
1618 * Send AF_UNIX data.
1621 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1624 struct sock *sk = sock->sk;
1625 struct net *net = sock_net(sk);
1626 struct unix_sock *u = unix_sk(sk);
1627 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1628 struct sock *other = NULL;
1629 int namelen = 0; /* fake GCC */
1632 struct sk_buff *skb;
1634 struct scm_cookie scm;
1639 err = scm_send(sock, msg, &scm, false);
1644 if (msg->msg_flags&MSG_OOB)
1647 if (msg->msg_namelen) {
1648 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1655 other = unix_peer_get(sk);
1660 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1661 && (err = unix_autobind(sock)) != 0)
1665 if (len > sk->sk_sndbuf - 32)
1668 if (len > SKB_MAX_ALLOC) {
1669 data_len = min_t(size_t,
1670 len - SKB_MAX_ALLOC,
1671 MAX_SKB_FRAGS * PAGE_SIZE);
1672 data_len = PAGE_ALIGN(data_len);
1674 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1677 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1678 msg->msg_flags & MSG_DONTWAIT, &err,
1679 PAGE_ALLOC_COSTLY_ORDER);
1683 err = unix_scm_to_skb(&scm, skb, true);
1687 skb_put(skb, len - data_len);
1688 skb->data_len = data_len;
1690 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1694 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1699 if (sunaddr == NULL)
1702 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1708 if (sk_filter(other, skb) < 0) {
1709 /* Toss the packet but do not return any error to the sender */
1715 unix_state_lock(other);
1718 if (!unix_may_send(sk, other))
1721 if (unlikely(sock_flag(other, SOCK_DEAD))) {
1723 * Check with 1003.1g - what should
1726 unix_state_unlock(other);
1730 unix_state_lock(sk);
1733 if (unix_peer(sk) == other) {
1734 unix_peer(sk) = NULL;
1735 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1737 unix_state_unlock(sk);
1739 unix_dgram_disconnected(sk, other);
1741 err = -ECONNREFUSED;
1743 unix_state_unlock(sk);
1753 if (other->sk_shutdown & RCV_SHUTDOWN)
1756 if (sk->sk_type != SOCK_SEQPACKET) {
1757 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1762 /* other == sk && unix_peer(other) != sk if
1763 * - unix_peer(sk) == NULL, destination address bound to sk
1764 * - unix_peer(sk) == sk by time of get but disconnected before lock
1767 unlikely(unix_peer(other) != sk &&
1768 unix_recvq_full_lockless(other))) {
1770 timeo = unix_wait_for_peer(other, timeo);
1772 err = sock_intr_errno(timeo);
1773 if (signal_pending(current))
1780 unix_state_unlock(other);
1781 unix_state_double_lock(sk, other);
1784 if (unix_peer(sk) != other ||
1785 unix_dgram_peer_wake_me(sk, other)) {
1793 goto restart_locked;
1797 if (unlikely(sk_locked))
1798 unix_state_unlock(sk);
1800 if (sock_flag(other, SOCK_RCVTSTAMP))
1801 __net_timestamp(skb);
1802 maybe_add_creds(skb, sock, other);
1803 spin_lock(&other->sk_receive_queue.lock);
1804 scm_stat_add(other, skb);
1805 __skb_queue_tail(&other->sk_receive_queue, skb);
1806 spin_unlock(&other->sk_receive_queue.lock);
1807 unix_state_unlock(other);
1808 other->sk_data_ready(other);
1815 unix_state_unlock(sk);
1816 unix_state_unlock(other);
1826 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1827 * bytes, and a minimum of a full page.
1829 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1831 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1834 struct sock *sk = sock->sk;
1835 struct sock *other = NULL;
1837 struct sk_buff *skb;
1839 struct scm_cookie scm;
1840 bool fds_sent = false;
1844 err = scm_send(sock, msg, &scm, false);
1849 if (msg->msg_flags&MSG_OOB)
1852 if (msg->msg_namelen) {
1853 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1857 other = unix_peer(sk);
1862 if (sk->sk_shutdown & SEND_SHUTDOWN)
1865 while (sent < len) {
1868 /* Keep two messages in the pipe so it schedules better */
1869 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1871 /* allow fallback to order-0 allocations */
1872 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1874 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1876 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1878 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1879 msg->msg_flags & MSG_DONTWAIT, &err,
1880 get_order(UNIX_SKB_FRAGS_SZ));
1884 /* Only send the fds in the first buffer */
1885 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1892 skb_put(skb, size - data_len);
1893 skb->data_len = data_len;
1895 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1901 unix_state_lock(other);
1903 if (sock_flag(other, SOCK_DEAD) ||
1904 (other->sk_shutdown & RCV_SHUTDOWN))
1907 maybe_add_creds(skb, sock, other);
1908 spin_lock(&other->sk_receive_queue.lock);
1909 scm_stat_add(other, skb);
1910 __skb_queue_tail(&other->sk_receive_queue, skb);
1911 spin_unlock(&other->sk_receive_queue.lock);
1912 unix_state_unlock(other);
1913 other->sk_data_ready(other);
1922 unix_state_unlock(other);
1925 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1926 send_sig(SIGPIPE, current, 0);
1930 return sent ? : err;
1933 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1934 int offset, size_t size, int flags)
1937 bool send_sigpipe = false;
1938 bool init_scm = true;
1939 struct scm_cookie scm;
1940 struct sock *other, *sk = socket->sk;
1941 struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1943 if (flags & MSG_OOB)
1946 other = unix_peer(sk);
1947 if (!other || sk->sk_state != TCP_ESTABLISHED)
1952 unix_state_unlock(other);
1953 mutex_unlock(&unix_sk(other)->iolock);
1954 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1960 /* we must acquire iolock as we modify already present
1961 * skbs in the sk_receive_queue and mess with skb->len
1963 err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1965 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1969 if (sk->sk_shutdown & SEND_SHUTDOWN) {
1971 send_sigpipe = true;
1975 unix_state_lock(other);
1977 if (sock_flag(other, SOCK_DEAD) ||
1978 other->sk_shutdown & RCV_SHUTDOWN) {
1980 send_sigpipe = true;
1981 goto err_state_unlock;
1985 err = maybe_init_creds(&scm, socket, other);
1987 goto err_state_unlock;
1991 skb = skb_peek_tail(&other->sk_receive_queue);
1992 if (tail && tail == skb) {
1994 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2001 } else if (newskb) {
2002 /* this is fast path, we don't necessarily need to
2003 * call to kfree_skb even though with newskb == NULL
2004 * this - does no harm
2006 consume_skb(newskb);
2010 if (skb_append_pagefrags(skb, page, offset, size)) {
2016 skb->data_len += size;
2017 skb->truesize += size;
2018 refcount_add(size, &sk->sk_wmem_alloc);
2021 err = unix_scm_to_skb(&scm, skb, false);
2023 goto err_state_unlock;
2024 spin_lock(&other->sk_receive_queue.lock);
2025 __skb_queue_tail(&other->sk_receive_queue, newskb);
2026 spin_unlock(&other->sk_receive_queue.lock);
2029 unix_state_unlock(other);
2030 mutex_unlock(&unix_sk(other)->iolock);
2032 other->sk_data_ready(other);
2037 unix_state_unlock(other);
2039 mutex_unlock(&unix_sk(other)->iolock);
2042 if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2043 send_sig(SIGPIPE, current, 0);
2049 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2053 struct sock *sk = sock->sk;
2055 err = sock_error(sk);
2059 if (sk->sk_state != TCP_ESTABLISHED)
2062 if (msg->msg_namelen)
2063 msg->msg_namelen = 0;
2065 return unix_dgram_sendmsg(sock, msg, len);
2068 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2069 size_t size, int flags)
2071 struct sock *sk = sock->sk;
2073 if (sk->sk_state != TCP_ESTABLISHED)
2076 return unix_dgram_recvmsg(sock, msg, size, flags);
2079 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2081 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2084 msg->msg_namelen = addr->len;
2085 memcpy(msg->msg_name, addr->name, addr->len);
2089 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2090 size_t size, int flags)
2092 struct scm_cookie scm;
2093 struct sock *sk = sock->sk;
2094 struct unix_sock *u = unix_sk(sk);
2095 struct sk_buff *skb, *last;
2104 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2107 mutex_lock(&u->iolock);
2109 skip = sk_peek_offset(sk, flags);
2110 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2111 scm_stat_del, &skip, &err, &last);
2115 mutex_unlock(&u->iolock);
2120 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2121 &err, &timeo, last));
2123 if (!skb) { /* implies iolock unlocked */
2124 unix_state_lock(sk);
2125 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2126 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2127 (sk->sk_shutdown & RCV_SHUTDOWN))
2129 unix_state_unlock(sk);
2133 if (wq_has_sleeper(&u->peer_wait))
2134 wake_up_interruptible_sync_poll(&u->peer_wait,
2135 EPOLLOUT | EPOLLWRNORM |
2139 unix_copy_addr(msg, skb->sk);
2141 if (size > skb->len - skip)
2142 size = skb->len - skip;
2143 else if (size < skb->len - skip)
2144 msg->msg_flags |= MSG_TRUNC;
2146 err = skb_copy_datagram_msg(skb, skip, msg, size);
2150 if (sock_flag(sk, SOCK_RCVTSTAMP))
2151 __sock_recv_timestamp(msg, sk, skb);
2153 memset(&scm, 0, sizeof(scm));
2155 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2156 unix_set_secdata(&scm, skb);
2158 if (!(flags & MSG_PEEK)) {
2160 unix_detach_fds(&scm, skb);
2162 sk_peek_offset_bwd(sk, skb->len);
2164 /* It is questionable: on PEEK we could:
2165 - do not return fds - good, but too simple 8)
2166 - return fds, and do not return them on read (old strategy,
2168 - clone fds (I chose it for now, it is the most universal
2171 POSIX 1003.1g does not actually define this clearly
2172 at all. POSIX 1003.1g doesn't define a lot of things
2177 sk_peek_offset_fwd(sk, size);
2180 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2182 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2184 scm_recv(sock, msg, &scm, flags);
2187 skb_free_datagram(sk, skb);
2188 mutex_unlock(&u->iolock);
2194 * Sleep until more data has arrived. But check for races..
2196 static long unix_stream_data_wait(struct sock *sk, long timeo,
2197 struct sk_buff *last, unsigned int last_len,
2200 struct sk_buff *tail;
2203 unix_state_lock(sk);
2206 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2208 tail = skb_peek_tail(&sk->sk_receive_queue);
2210 (tail && tail->len != last_len) ||
2212 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2213 signal_pending(current) ||
2217 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2218 unix_state_unlock(sk);
2220 timeo = freezable_schedule_timeout(timeo);
2222 timeo = schedule_timeout(timeo);
2223 unix_state_lock(sk);
2225 if (sock_flag(sk, SOCK_DEAD))
2228 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2231 finish_wait(sk_sleep(sk), &wait);
2232 unix_state_unlock(sk);
2236 static unsigned int unix_skb_len(const struct sk_buff *skb)
2238 return skb->len - UNIXCB(skb).consumed;
2241 struct unix_stream_read_state {
2242 int (*recv_actor)(struct sk_buff *, int, int,
2243 struct unix_stream_read_state *);
2244 struct socket *socket;
2246 struct pipe_inode_info *pipe;
2249 unsigned int splice_flags;
2252 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2255 struct scm_cookie scm;
2256 struct socket *sock = state->socket;
2257 struct sock *sk = sock->sk;
2258 struct unix_sock *u = unix_sk(sk);
2260 int flags = state->flags;
2261 int noblock = flags & MSG_DONTWAIT;
2262 bool check_creds = false;
2267 size_t size = state->size;
2268 unsigned int last_len;
2270 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2275 if (unlikely(flags & MSG_OOB)) {
2280 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2281 timeo = sock_rcvtimeo(sk, noblock);
2283 memset(&scm, 0, sizeof(scm));
2285 /* Lock the socket to prevent queue disordering
2286 * while sleeps in memcpy_tomsg
2288 mutex_lock(&u->iolock);
2290 skip = max(sk_peek_offset(sk, flags), 0);
2295 struct sk_buff *skb, *last;
2298 unix_state_lock(sk);
2299 if (sock_flag(sk, SOCK_DEAD)) {
2303 last = skb = skb_peek(&sk->sk_receive_queue);
2304 last_len = last ? last->len : 0;
2307 if (copied >= target)
2311 * POSIX 1003.1g mandates this order.
2314 err = sock_error(sk);
2317 if (sk->sk_shutdown & RCV_SHUTDOWN)
2320 unix_state_unlock(sk);
2326 mutex_unlock(&u->iolock);
2328 timeo = unix_stream_data_wait(sk, timeo, last,
2329 last_len, freezable);
2331 if (signal_pending(current)) {
2332 err = sock_intr_errno(timeo);
2337 mutex_lock(&u->iolock);
2340 unix_state_unlock(sk);
2344 while (skip >= unix_skb_len(skb)) {
2345 skip -= unix_skb_len(skb);
2347 last_len = skb->len;
2348 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2353 unix_state_unlock(sk);
2356 /* Never glue messages from different writers */
2357 if (!unix_skb_scm_eq(skb, &scm))
2359 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2360 /* Copy credentials */
2361 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2362 unix_set_secdata(&scm, skb);
2366 /* Copy address just once */
2367 if (state->msg && state->msg->msg_name) {
2368 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2369 state->msg->msg_name);
2370 unix_copy_addr(state->msg, skb->sk);
2374 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2376 chunk = state->recv_actor(skb, skip, chunk, state);
2377 drop_skb = !unix_skb_len(skb);
2378 /* skb is only safe to use if !drop_skb */
2389 /* the skb was touched by a concurrent reader;
2390 * we should not expect anything from this skb
2391 * anymore and assume it invalid - we can be
2392 * sure it was dropped from the socket queue
2394 * let's report a short read
2400 /* Mark read part of skb as used */
2401 if (!(flags & MSG_PEEK)) {
2402 UNIXCB(skb).consumed += chunk;
2404 sk_peek_offset_bwd(sk, chunk);
2406 if (UNIXCB(skb).fp) {
2407 spin_lock(&sk->sk_receive_queue.lock);
2408 scm_stat_del(sk, skb);
2409 spin_unlock(&sk->sk_receive_queue.lock);
2410 unix_detach_fds(&scm, skb);
2413 if (unix_skb_len(skb))
2416 skb_unlink(skb, &sk->sk_receive_queue);
2422 /* It is questionable, see note in unix_dgram_recvmsg.
2425 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2427 sk_peek_offset_fwd(sk, chunk);
2434 last_len = skb->len;
2435 unix_state_lock(sk);
2436 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2439 unix_state_unlock(sk);
2444 mutex_unlock(&u->iolock);
2446 scm_recv(sock, state->msg, &scm, flags);
2450 return copied ? : err;
2453 static int unix_stream_read_actor(struct sk_buff *skb,
2454 int skip, int chunk,
2455 struct unix_stream_read_state *state)
2459 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2461 return ret ?: chunk;
2464 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2465 size_t size, int flags)
2467 struct unix_stream_read_state state = {
2468 .recv_actor = unix_stream_read_actor,
2475 return unix_stream_read_generic(&state, true);
2478 static int unix_stream_splice_actor(struct sk_buff *skb,
2479 int skip, int chunk,
2480 struct unix_stream_read_state *state)
2482 return skb_splice_bits(skb, state->socket->sk,
2483 UNIXCB(skb).consumed + skip,
2484 state->pipe, chunk, state->splice_flags);
2487 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2488 struct pipe_inode_info *pipe,
2489 size_t size, unsigned int flags)
2491 struct unix_stream_read_state state = {
2492 .recv_actor = unix_stream_splice_actor,
2496 .splice_flags = flags,
2499 if (unlikely(*ppos))
2502 if (sock->file->f_flags & O_NONBLOCK ||
2503 flags & SPLICE_F_NONBLOCK)
2504 state.flags = MSG_DONTWAIT;
2506 return unix_stream_read_generic(&state, false);
2509 static int unix_shutdown(struct socket *sock, int mode)
2511 struct sock *sk = sock->sk;
2514 if (mode < SHUT_RD || mode > SHUT_RDWR)
2517 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2518 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2519 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2523 unix_state_lock(sk);
2524 sk->sk_shutdown |= mode;
2525 other = unix_peer(sk);
2528 unix_state_unlock(sk);
2529 sk->sk_state_change(sk);
2532 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2536 if (mode&RCV_SHUTDOWN)
2537 peer_mode |= SEND_SHUTDOWN;
2538 if (mode&SEND_SHUTDOWN)
2539 peer_mode |= RCV_SHUTDOWN;
2540 unix_state_lock(other);
2541 other->sk_shutdown |= peer_mode;
2542 unix_state_unlock(other);
2543 other->sk_state_change(other);
2544 if (peer_mode == SHUTDOWN_MASK)
2545 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2546 else if (peer_mode & RCV_SHUTDOWN)
2547 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2555 long unix_inq_len(struct sock *sk)
2557 struct sk_buff *skb;
2560 if (sk->sk_state == TCP_LISTEN)
2563 spin_lock(&sk->sk_receive_queue.lock);
2564 if (sk->sk_type == SOCK_STREAM ||
2565 sk->sk_type == SOCK_SEQPACKET) {
2566 skb_queue_walk(&sk->sk_receive_queue, skb)
2567 amount += unix_skb_len(skb);
2569 skb = skb_peek(&sk->sk_receive_queue);
2573 spin_unlock(&sk->sk_receive_queue.lock);
2577 EXPORT_SYMBOL_GPL(unix_inq_len);
2579 long unix_outq_len(struct sock *sk)
2581 return sk_wmem_alloc_get(sk);
2583 EXPORT_SYMBOL_GPL(unix_outq_len);
2585 static int unix_open_file(struct sock *sk)
2591 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2594 if (!smp_load_acquire(&unix_sk(sk)->addr))
2597 path = unix_sk(sk)->path;
2603 fd = get_unused_fd_flags(O_CLOEXEC);
2607 f = dentry_open(&path, O_PATH, current_cred());
2621 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2623 struct sock *sk = sock->sk;
2629 amount = unix_outq_len(sk);
2630 err = put_user(amount, (int __user *)arg);
2633 amount = unix_inq_len(sk);
2637 err = put_user(amount, (int __user *)arg);
2640 err = unix_open_file(sk);
2649 #ifdef CONFIG_COMPAT
2650 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2652 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
2656 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2658 struct sock *sk = sock->sk;
2661 sock_poll_wait(file, sock, wait);
2664 /* exceptional events? */
2667 if (sk->sk_shutdown == SHUTDOWN_MASK)
2669 if (sk->sk_shutdown & RCV_SHUTDOWN)
2670 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2673 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2674 mask |= EPOLLIN | EPOLLRDNORM;
2676 /* Connection-based need to check for termination and startup */
2677 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2678 sk->sk_state == TCP_CLOSE)
2682 * we set writable also when the other side has shut down the
2683 * connection. This prevents stuck sockets.
2685 if (unix_writable(sk))
2686 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2691 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2694 struct sock *sk = sock->sk, *other;
2695 unsigned int writable;
2698 sock_poll_wait(file, sock, wait);
2701 /* exceptional events? */
2702 if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
2704 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2706 if (sk->sk_shutdown & RCV_SHUTDOWN)
2707 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2708 if (sk->sk_shutdown == SHUTDOWN_MASK)
2712 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2713 mask |= EPOLLIN | EPOLLRDNORM;
2715 /* Connection-based need to check for termination and startup */
2716 if (sk->sk_type == SOCK_SEQPACKET) {
2717 if (sk->sk_state == TCP_CLOSE)
2719 /* connection hasn't started yet? */
2720 if (sk->sk_state == TCP_SYN_SENT)
2724 /* No write status requested, avoid expensive OUT tests. */
2725 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2728 writable = unix_writable(sk);
2730 unix_state_lock(sk);
2732 other = unix_peer(sk);
2733 if (other && unix_peer(other) != sk &&
2734 unix_recvq_full(other) &&
2735 unix_dgram_peer_wake_me(sk, other))
2738 unix_state_unlock(sk);
2742 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2744 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2749 #ifdef CONFIG_PROC_FS
2751 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2753 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2754 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2755 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2757 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2759 unsigned long offset = get_offset(*pos);
2760 unsigned long bucket = get_bucket(*pos);
2762 unsigned long count = 0;
2764 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2765 if (sock_net(sk) != seq_file_net(seq))
2767 if (++count == offset)
2774 static struct sock *unix_next_socket(struct seq_file *seq,
2778 unsigned long bucket;
2780 while (sk > (struct sock *)SEQ_START_TOKEN) {
2784 if (sock_net(sk) == seq_file_net(seq))
2789 sk = unix_from_bucket(seq, pos);
2794 bucket = get_bucket(*pos) + 1;
2795 *pos = set_bucket_offset(bucket, 1);
2796 } while (bucket < ARRAY_SIZE(unix_socket_table));
2801 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2802 __acquires(unix_table_lock)
2804 spin_lock(&unix_table_lock);
2807 return SEQ_START_TOKEN;
2809 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2812 return unix_next_socket(seq, NULL, pos);
2815 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2818 return unix_next_socket(seq, v, pos);
2821 static void unix_seq_stop(struct seq_file *seq, void *v)
2822 __releases(unix_table_lock)
2824 spin_unlock(&unix_table_lock);
2827 static int unix_seq_show(struct seq_file *seq, void *v)
2830 if (v == SEQ_START_TOKEN)
2831 seq_puts(seq, "Num RefCount Protocol Flags Type St "
2835 struct unix_sock *u = unix_sk(s);
2838 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2840 refcount_read(&s->sk_refcnt),
2842 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2845 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2846 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2849 if (u->addr) { // under unix_table_lock here
2854 len = u->addr->len - sizeof(short);
2855 if (!UNIX_ABSTRACT(s))
2861 for ( ; i < len; i++)
2862 seq_putc(seq, u->addr->name->sun_path[i] ?:
2865 unix_state_unlock(s);
2866 seq_putc(seq, '\n');
2872 static const struct seq_operations unix_seq_ops = {
2873 .start = unix_seq_start,
2874 .next = unix_seq_next,
2875 .stop = unix_seq_stop,
2876 .show = unix_seq_show,
2880 static const struct net_proto_family unix_family_ops = {
2882 .create = unix_create,
2883 .owner = THIS_MODULE,
2887 static int __net_init unix_net_init(struct net *net)
2889 int error = -ENOMEM;
2891 net->unx.sysctl_max_dgram_qlen = 10;
2892 if (unix_sysctl_register(net))
2895 #ifdef CONFIG_PROC_FS
2896 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2897 sizeof(struct seq_net_private))) {
2898 unix_sysctl_unregister(net);
2907 static void __net_exit unix_net_exit(struct net *net)
2909 unix_sysctl_unregister(net);
2910 remove_proc_entry("unix", net->proc_net);
2913 static struct pernet_operations unix_net_ops = {
2914 .init = unix_net_init,
2915 .exit = unix_net_exit,
2918 static int __init af_unix_init(void)
2922 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
2924 rc = proto_register(&unix_proto, 1);
2926 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2930 sock_register(&unix_family_ops);
2931 register_pernet_subsys(&unix_net_ops);
2936 static void __exit af_unix_exit(void)
2938 sock_unregister(PF_UNIX);
2939 proto_unregister(&unix_proto);
2940 unregister_pernet_subsys(&unix_net_ops);
2943 /* Earlier than device_initcall() so that other drivers invoking
2944 request_module() don't end up in a loop when modprobe tries
2945 to use a UNIX socket. But later than subsys_initcall() because
2946 we depend on stuff initialised there */
2947 fs_initcall(af_unix_init);
2948 module_exit(af_unix_exit);
2950 MODULE_LICENSE("GPL");
2951 MODULE_ALIAS_NETPROTO(PF_UNIX);