net/unix/af_unix.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * NET4:        Implementation of BSD Unix domain sockets.
   4  *
   5  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   6  *
   7  * Fixes:
   8  *              Linus Torvalds  :       Assorted bug cures.
   9  *              Niibe Yutaka    :       async I/O support.
  10  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  11  *              Alan Cox        :       Limit size of allocated blocks.
  12  *              Alan Cox        :       Fixed the stupid socketpair bug.
  13  *              Alan Cox        :       BSD compatibility fine tuning.
  14  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  15  *              Alan Cox        :       Sorted out a proper draft version of
  16  *                                      file descriptor passing hacked up from
  17  *                                      Mike Shaver's work.
  18  *              Marty Leisner   :       Fixes to fd passing
  19  *              Nick Nevin      :       recvmsg bugfix.
  20  *              Alan Cox        :       Started proper garbage collector
  21  *              Heiko EiBfeldt  :       Missing verify_area check
  22  *              Alan Cox        :       Started POSIXisms
  23  *              Andreas Schwab  :       Replace inode by dentry for proper
  24  *                                      reference counting
  25  *              Kirk Petersen   :       Made this a module
  26  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  27  *                                      Lots of bug fixes.
  28  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  29  *                                      by above two patches.
  30  *           Andrea Arcangeli   :       If possible we block in connect(2)
  31  *                                      if the max backlog of the listen socket
  32  *                                      is been reached. This won't break
  33  *                                      old apps and it will avoid huge amount
  34  *                                      of socks hashed (this for unix_gc()
  35  *                                      performances reasons).
  36  *                                      Security fix that limits the max
  37  *                                      number of socks to 2*max_files and
  38  *                                      the number of skb queueable in the
  39  *                                      dgram receiver.
  40  *              Artur Skawina   :       Hash function optimizations
  41  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  42  *            Malcolm Beattie   :       Set peercred for socketpair
  43  *           Michal Ostrowski   :       Module initialization cleanup.
  44  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  45  *                                      the core infrastructure is doing that
  46  *                                      for all net proto families now (2.5.69+)
  47  *
  48  * Known differences from reference BSD that was tested:
  49  *
  50  *      [TO FIX]
  51  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  52  *              other the moment one end closes.
  53  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  54  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  55  *      [NOT TO FIX]
  56  *      accept() returns a path name even if the connecting socket has closed
  57  *              in the meantime (BSD loses the path and gives up).
  58  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  59  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  60  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  61  *      BSD af_unix apparently has connect forgetting to block properly.
  62  *              (need to check this with the POSIX spec in detail)
  63  *
  64  * Differences from 2.0.0-11-... (ANK)
  65  *      Bug fixes and improvements.
  66  *              - client shutdown killed server socket.
  67  *              - removed all useless cli/sti pairs.
  68  *
  69  *      Semantic changes/extensions.
  70  *              - generic control message passing.
  71  *              - SCM_CREDENTIALS control message.
  72  *              - "Abstract" (not FS based) socket bindings.
  73  *                Abstract names are sequences of bytes (not zero terminated)
  74  *                started by 0, so that this name space does not intersect
  75  *                with BSD names.
  76  */
  77
  78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  79
  80 #include <linux/module.h>
  81 #include <linux/kernel.h>
  82 #include <linux/signal.h>
  83 #include <linux/sched/signal.h>
  84 #include <linux/errno.h>
  85 #include <linux/string.h>
  86 #include <linux/stat.h>
  87 #include <linux/dcache.h>
  88 #include <linux/namei.h>
  89 #include <linux/socket.h>
  90 #include <linux/un.h>
  91 #include <linux/fcntl.h>
  92 #include <linux/termios.h>
  93 #include <linux/sockios.h>
  94 #include <linux/net.h>
  95 #include <linux/in.h>
  96 #include <linux/fs.h>
  97 #include <linux/slab.h>
  98 #include <linux/uaccess.h>
  99 #include <linux/skbuff.h>
 100 #include <linux/netdevice.h>
 101 #include <net/net_namespace.h>
 102 #include <net/sock.h>
 103 #include <net/tcp_states.h>
 104 #include <net/af_unix.h>
 105 #include <linux/proc_fs.h>
 106 #include <linux/seq_file.h>
 107 #include <net/scm.h>
 108 #include <linux/init.h>
 109 #include <linux/poll.h>
 110 #include <linux/rtnetlink.h>
 111 #include <linux/mount.h>
 112 #include <net/checksum.h>
 113 #include <linux/security.h>
 114 #include <linux/freezer.h>
 115 #include <linux/file.h>
 116 #include <linux/btf_ids.h>
 117
 118 #include "scm.h"
 119
 120 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 121 EXPORT_SYMBOL_GPL(unix_socket_table);
 122 DEFINE_SPINLOCK(unix_table_lock);
 123 EXPORT_SYMBOL_GPL(unix_table_lock);
 124 static atomic_long_t unix_nr_socks;
 125
 126
 127 static struct hlist_head *unix_sockets_unbound(void *addr)
 128 {
 129         unsigned long hash = (unsigned long)addr;
 130
 131         hash ^= hash >> 16;
 132         hash ^= hash >> 8;
 133         hash %= UNIX_HASH_SIZE;
 134         return &unix_socket_table[UNIX_HASH_SIZE + hash];
 135 }
 136
 137 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 138
 139 #ifdef CONFIG_SECURITY_NETWORK
 140 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 141 {
 142         UNIXCB(skb).secid = scm->secid;
 143 }
 144
 145 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 146 {
 147         scm->secid = UNIXCB(skb).secid;
 148 }
 149
 150 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 151 {
 152         return (scm->secid == UNIXCB(skb).secid);
 153 }
 154 #else
 155 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 156 { }
 157
 158 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 159 { }
 160
 161 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 162 {
 163         return true;
 164 }
 165 #endif /* CONFIG_SECURITY_NETWORK */
 166
 167 /*
 168  *  SMP locking strategy:
 169  *    hash table is protected with spinlock unix_table_lock
 170  *    each socket state is protected by separate spin lock.
 171  */
 172
 173 static inline unsigned int unix_hash_fold(__wsum n)
 174 {
 175         unsigned int hash = (__force unsigned int)csum_fold(n);
 176
 177         hash ^= hash>>8;
 178         return hash&(UNIX_HASH_SIZE-1);
 179 }
 180
 181 #define unix_peer(sk) (unix_sk(sk)->peer)
 182
 183 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 184 {
 185         return unix_peer(osk) == sk;
 186 }
 187
 188 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 189 {
 190         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 191 }
 192
 193 static inline int unix_recvq_full(const struct sock *sk)
 194 {
 195         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 196 }
 197
 198 static inline int unix_recvq_full_lockless(const struct sock *sk)
 199 {
 200         return skb_queue_len_lockless(&sk->sk_receive_queue) >
 201                 READ_ONCE(sk->sk_max_ack_backlog);
 202 }
 203
 204 struct sock *unix_peer_get(struct sock *s)
 205 {
 206         struct sock *peer;
 207
 208         unix_state_lock(s);
 209         peer = unix_peer(s);
 210         if (peer)
 211                 sock_hold(peer);
 212         unix_state_unlock(s);
 213         return peer;
 214 }
 215 EXPORT_SYMBOL_GPL(unix_peer_get);
 216
 217 static inline void unix_release_addr(struct unix_address *addr)
 218 {
 219         if (refcount_dec_and_test(&addr->refcnt))
 220                 kfree(addr);
 221 }
 222
 223 /*
 224  *      Check unix socket name:
 225  *              - should be not zero length.
 226  *              - if started by not zero, should be NULL terminated (FS object)
 227  *              - if started by zero, it is abstract name.
 228  */
 229
 230 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 231 {
 232         *hashp = 0;
 233
 234         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 235                 return -EINVAL;
 236         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 237                 return -EINVAL;
 238         if (sunaddr->sun_path[0]) {
 239                 /*
 240                  * This may look like an off by one error but it is a bit more
 241                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 242                  * sun_path[108] doesn't as such exist.  However in kernel space
 243                  * we are guaranteed that it is a valid memory location in our
 244                  * kernel address buffer.
 245                  */
 246                 ((char *)sunaddr)[len] = 0;
 247                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 248                 return len;
 249         }
 250
 251         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 252         return len;
 253 }
 254
 255 static void __unix_remove_socket(struct sock *sk)
 256 {
 257         sk_del_node_init(sk);
 258 }
 259
 260 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 261 {
 262         WARN_ON(!sk_unhashed(sk));
 263         sk_add_node(sk, list);
 264 }
 265
 266 static void __unix_set_addr(struct sock *sk, struct unix_address *addr,
 267                             unsigned hash)
 268 {
 269         __unix_remove_socket(sk);
 270         smp_store_release(&unix_sk(sk)->addr, addr);
 271         __unix_insert_socket(&unix_socket_table[hash], sk);
 272 }
 273
 274 static inline void unix_remove_socket(struct sock *sk)
 275 {
 276         spin_lock(&unix_table_lock);
 277         __unix_remove_socket(sk);
 278         spin_unlock(&unix_table_lock);
 279 }
 280
 281 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 282 {
 283         spin_lock(&unix_table_lock);
 284         __unix_insert_socket(list, sk);
 285         spin_unlock(&unix_table_lock);
 286 }
 287
 288 static struct sock *__unix_find_socket_byname(struct net *net,
 289                                               struct sockaddr_un *sunname,
 290                                               int len, unsigned int hash)
 291 {
 292         struct sock *s;
 293
 294         sk_for_each(s, &unix_socket_table[hash]) {
 295                 struct unix_sock *u = unix_sk(s);
 296
 297                 if (!net_eq(sock_net(s), net))
 298                         continue;
 299
 300                 if (u->addr->len == len &&
 301                     !memcmp(u->addr->name, sunname, len))
 302                         return s;
 303         }
 304         return NULL;
 305 }
 306
 307 static inline struct sock *unix_find_socket_byname(struct net *net,
 308                                                    struct sockaddr_un *sunname,
 309                                                    int len, unsigned int hash)
 310 {
 311         struct sock *s;
 312
 313         spin_lock(&unix_table_lock);
 314         s = __unix_find_socket_byname(net, sunname, len, hash);
 315         if (s)
 316                 sock_hold(s);
 317         spin_unlock(&unix_table_lock);
 318         return s;
 319 }
 320
 321 static struct sock *unix_find_socket_byinode(struct inode *i)
 322 {
 323         struct sock *s;
 324
 325         spin_lock(&unix_table_lock);
 326         sk_for_each(s,
 327                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 328                 struct dentry *dentry = unix_sk(s)->path.dentry;
 329
 330                 if (dentry && d_backing_inode(dentry) == i) {
 331                         sock_hold(s);
 332                         goto found;
 333                 }
 334         }
 335         s = NULL;
 336 found:
 337         spin_unlock(&unix_table_lock);
 338         return s;
 339 }
 340
 341 /* Support code for asymmetrically connected dgram sockets
 342  *
 343  * If a datagram socket is connected to a socket not itself connected
 344  * to the first socket (eg, /dev/log), clients may only enqueue more
 345  * messages if the present receive queue of the server socket is not
 346  * "too large". This means there's a second writeability condition
 347  * poll and sendmsg need to test. The dgram recv code will do a wake
 348  * up on the peer_wait wait queue of a socket upon reception of a
 349  * datagram which needs to be propagated to sleeping would-be writers
 350  * since these might not have sent anything so far. This can't be
 351  * accomplished via poll_wait because the lifetime of the server
 352  * socket might be less than that of its clients if these break their
 353  * association with it or if the server socket is closed while clients
 354  * are still connected to it and there's no way to inform "a polling
 355  * implementation" that it should let go of a certain wait queue
 356  *
 357  * In order to propagate a wake up, a wait_queue_entry_t of the client
 358  * socket is enqueued on the peer_wait queue of the server socket
 359  * whose wake function does a wake_up on the ordinary client socket
 360  * wait queue. This connection is established whenever a write (or
 361  * poll for write) hit the flow control condition and broken when the
 362  * association to the server socket is dissolved or after a wake up
 363  * was relayed.
 364  */
 365
 366 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
 367                                       void *key)
 368 {
 369         struct unix_sock *u;
 370         wait_queue_head_t *u_sleep;
 371
 372         u = container_of(q, struct unix_sock, peer_wake);
 373
 374         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 375                             q);
 376         u->peer_wake.private = NULL;
 377
 378         /* relaying can only happen while the wq still exists */
 379         u_sleep = sk_sleep(&u->sk);
 380         if (u_sleep)
 381                 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
 382
 383         return 0;
 384 }
 385
 386 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 387 {
 388         struct unix_sock *u, *u_other;
 389         int rc;
 390
 391         u = unix_sk(sk);
 392         u_other = unix_sk(other);
 393         rc = 0;
 394         spin_lock(&u_other->peer_wait.lock);
 395
 396         if (!u->peer_wake.private) {
 397                 u->peer_wake.private = other;
 398                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 399
 400                 rc = 1;
 401         }
 402
 403         spin_unlock(&u_other->peer_wait.lock);
 404         return rc;
 405 }
 406
 407 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 408                                             struct sock *other)
 409 {
 410         struct unix_sock *u, *u_other;
 411
 412         u = unix_sk(sk);
 413         u_other = unix_sk(other);
 414         spin_lock(&u_other->peer_wait.lock);
 415
 416         if (u->peer_wake.private == other) {
 417                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 418                 u->peer_wake.private = NULL;
 419         }
 420
 421         spin_unlock(&u_other->peer_wait.lock);
 422 }
 423
 424 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 425                                                    struct sock *other)
 426 {
 427         unix_dgram_peer_wake_disconnect(sk, other);
 428         wake_up_interruptible_poll(sk_sleep(sk),
 429                                    EPOLLOUT |
 430                                    EPOLLWRNORM |
 431                                    EPOLLWRBAND);
 432 }
 433
 434 /* preconditions:
 435  *      - unix_peer(sk) == other
 436  *      - association is stable
 437  */
 438 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 439 {
 440         int connected;
 441
 442         connected = unix_dgram_peer_wake_connect(sk, other);
 443
 444         /* If other is SOCK_DEAD, we want to make sure we signal
 445          * POLLOUT, such that a subsequent write() can get a
 446          * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
 447          * to other and its full, we will hang waiting for POLLOUT.
 448          */
 449         if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
 450                 return 1;
 451
 452         if (connected)
 453                 unix_dgram_peer_wake_disconnect(sk, other);
 454
 455         return 0;
 456 }
 457
 458 static int unix_writable(const struct sock *sk)
 459 {
 460         return sk->sk_state != TCP_LISTEN &&
 461                (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 462 }
 463
 464 static void unix_write_space(struct sock *sk)
 465 {
 466         struct socket_wq *wq;
 467
 468         rcu_read_lock();
 469         if (unix_writable(sk)) {
 470                 wq = rcu_dereference(sk->sk_wq);
 471                 if (skwq_has_sleeper(wq))
 472                         wake_up_interruptible_sync_poll(&wq->wait,
 473                                 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
 474                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 475         }
 476         rcu_read_unlock();
 477 }
 478
 479 /* When dgram socket disconnects (or changes its peer), we clear its receive
 480  * queue of packets arrived from previous peer. First, it allows to do
 481  * flow control based only on wmem_alloc; second, sk connected to peer
 482  * may receive messages only from that peer. */
 483 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 484 {
 485         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 486                 skb_queue_purge(&sk->sk_receive_queue);
 487                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 488
 489                 /* If one link of bidirectional dgram pipe is disconnected,
 490                  * we signal error. Messages are lost. Do not make this,
 491                  * when peer was not connected to us.
 492                  */
 493                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 494                         other->sk_err = ECONNRESET;
 495                         sk_error_report(other);
 496                 }
 497         }
 498         other->sk_state = TCP_CLOSE;
 499 }
 500
 501 static void unix_sock_destructor(struct sock *sk)
 502 {
 503         struct unix_sock *u = unix_sk(sk);
 504
 505         skb_queue_purge(&sk->sk_receive_queue);
 506
 507         WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 508         WARN_ON(!sk_unhashed(sk));
 509         WARN_ON(sk->sk_socket);
 510         if (!sock_flag(sk, SOCK_DEAD)) {
 511                 pr_info("Attempt to release alive unix socket: %p\n", sk);
 512                 return;
 513         }
 514
 515         if (u->addr)
 516                 unix_release_addr(u->addr);
 517
 518         atomic_long_dec(&unix_nr_socks);
 519         local_bh_disable();
 520         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 521         local_bh_enable();
 522 #ifdef UNIX_REFCNT_DEBUG
 523         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 524                 atomic_long_read(&unix_nr_socks));
 525 #endif
 526 }
 527
 528 static void unix_release_sock(struct sock *sk, int embrion)
 529 {
 530         struct unix_sock *u = unix_sk(sk);
 531         struct path path;
 532         struct sock *skpair;
 533         struct sk_buff *skb;
 534         int state;
 535
 536         unix_remove_socket(sk);
 537
 538         /* Clear state */
 539         unix_state_lock(sk);
 540         sock_orphan(sk);
 541         sk->sk_shutdown = SHUTDOWN_MASK;
 542         path         = u->path;
 543         u->path.dentry = NULL;
 544         u->path.mnt = NULL;
 545         state = sk->sk_state;
 546         sk->sk_state = TCP_CLOSE;
 547
 548         skpair = unix_peer(sk);
 549         unix_peer(sk) = NULL;
 550
 551         unix_state_unlock(sk);
 552
 553 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
 554         if (u->oob_skb) {
 555                 kfree_skb(u->oob_skb);
 556                 u->oob_skb = NULL;
 557         }
 558 #endif
 559
 560         wake_up_interruptible_all(&u->peer_wait);
 561
 562         if (skpair != NULL) {
 563                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 564                         unix_state_lock(skpair);
 565                         /* No more writes */
 566                         skpair->sk_shutdown = SHUTDOWN_MASK;
 567                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 568                                 skpair->sk_err = ECONNRESET;
 569                         unix_state_unlock(skpair);
 570                         skpair->sk_state_change(skpair);
 571                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 572                 }
 573
 574                 unix_dgram_peer_wake_disconnect(sk, skpair);
 575                 sock_put(skpair); /* It may now die */
 576         }
 577
 578         /* Try to flush out this socket. Throw out buffers at least */
 579
 580         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 581                 if (state == TCP_LISTEN)
 582                         unix_release_sock(skb->sk, 1);
 583                 /* passed fds are erased in the kfree_skb hook        */
 584                 UNIXCB(skb).consumed = skb->len;
 585                 kfree_skb(skb);
 586         }
 587
 588         if (path.dentry)
 589                 path_put(&path);
 590
 591         sock_put(sk);
 592
 593         /* ---- Socket is dead now and most probably destroyed ---- */
 594
 595         /*
 596          * Fixme: BSD difference: In BSD all sockets connected to us get
 597          *        ECONNRESET and we die on the spot. In Linux we behave
 598          *        like files and pipes do and wait for the last
 599          *        dereference.
 600          *
 601          * Can't we simply set sock->err?
 602          *
 603          *        What the above comment does talk about? --ANK(980817)
 604          */
 605
 606         if (unix_tot_inflight)
 607                 unix_gc();              /* Garbage collect fds */
 608 }
 609
 610 static void init_peercred(struct sock *sk)
 611 {
 612         const struct cred *old_cred;
 613         struct pid *old_pid;
 614
 615         spin_lock(&sk->sk_peer_lock);
 616         old_pid = sk->sk_peer_pid;
 617         old_cred = sk->sk_peer_cred;
 618         sk->sk_peer_pid  = get_pid(task_tgid(current));
 619         sk->sk_peer_cred = get_current_cred();
 620         spin_unlock(&sk->sk_peer_lock);
 621
 622         put_pid(old_pid);
 623         put_cred(old_cred);
 624 }
 625
 626 static void copy_peercred(struct sock *sk, struct sock *peersk)
 627 {
 628         const struct cred *old_cred;
 629         struct pid *old_pid;
 630
 631         if (sk < peersk) {
 632                 spin_lock(&sk->sk_peer_lock);
 633                 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
 634         } else {
 635                 spin_lock(&peersk->sk_peer_lock);
 636                 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
 637         }
 638         old_pid = sk->sk_peer_pid;
 639         old_cred = sk->sk_peer_cred;
 640         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 641         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 642
 643         spin_unlock(&sk->sk_peer_lock);
 644         spin_unlock(&peersk->sk_peer_lock);
 645
 646         put_pid(old_pid);
 647         put_cred(old_cred);
 648 }
 649
 650 static int unix_listen(struct socket *sock, int backlog)
 651 {
 652         int err;
 653         struct sock *sk = sock->sk;
 654         struct unix_sock *u = unix_sk(sk);
 655
 656         err = -EOPNOTSUPP;
 657         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 658                 goto out;       /* Only stream/seqpacket sockets accept */
 659         err = -EINVAL;
 660         if (!u->addr)
 661                 goto out;       /* No listens on an unbound socket */
 662         unix_state_lock(sk);
 663         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 664                 goto out_unlock;
 665         if (backlog > sk->sk_max_ack_backlog)
 666                 wake_up_interruptible_all(&u->peer_wait);
 667         sk->sk_max_ack_backlog  = backlog;
 668         sk->sk_state            = TCP_LISTEN;
 669         /* set credentials so connect can copy them */
 670         init_peercred(sk);
 671         err = 0;
 672
 673 out_unlock:
 674         unix_state_unlock(sk);
 675 out:
 676         return err;
 677 }
 678
 679 static int unix_release(struct socket *);
 680 static int unix_bind(struct socket *, struct sockaddr *, int);
 681 static int unix_stream_connect(struct socket *, struct sockaddr *,
 682                                int addr_len, int flags);
 683 static int unix_socketpair(struct socket *, struct socket *);
 684 static int unix_accept(struct socket *, struct socket *, int, bool);
 685 static int unix_getname(struct socket *, struct sockaddr *, int);
 686 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
 687 static __poll_t unix_dgram_poll(struct file *, struct socket *,
 688                                     poll_table *);
 689 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 690 #ifdef CONFIG_COMPAT
 691 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 692 #endif
 693 static int unix_shutdown(struct socket *, int);
 694 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 695 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 696 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 697                                     size_t size, int flags);
 698 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 699                                        struct pipe_inode_info *, size_t size,
 700                                        unsigned int flags);
 701 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 702 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 703 static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
 704                           sk_read_actor_t recv_actor);
 705 static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
 706                                  sk_read_actor_t recv_actor);
 707 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 708                               int, int);
 709 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 710 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 711                                   int);
 712
 713 static int unix_set_peek_off(struct sock *sk, int val)
 714 {
 715         struct unix_sock *u = unix_sk(sk);
 716
 717         if (mutex_lock_interruptible(&u->iolock))
 718                 return -EINTR;
 719
 720         sk->sk_peek_off = val;
 721         mutex_unlock(&u->iolock);
 722
 723         return 0;
 724 }
 725
 726 #ifdef CONFIG_PROC_FS
 727 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
 728 {
 729         struct sock *sk = sock->sk;
 730         struct unix_sock *u;
 731
 732         if (sk) {
 733                 u = unix_sk(sock->sk);
 734                 seq_printf(m, "scm_fds: %u\n",
 735                            atomic_read(&u->scm_stat.nr_fds));
 736         }
 737 }
 738 #else
 739 #define unix_show_fdinfo NULL
 740 #endif
 741
 742 static const struct proto_ops unix_stream_ops = {
 743         .family =       PF_UNIX,
 744         .owner =        THIS_MODULE,
 745         .release =      unix_release,
 746         .bind =         unix_bind,
 747         .connect =      unix_stream_connect,
 748         .socketpair =   unix_socketpair,
 749         .accept =       unix_accept,
 750         .getname =      unix_getname,
 751         .poll =         unix_poll,
 752         .ioctl =        unix_ioctl,
 753 #ifdef CONFIG_COMPAT
 754         .compat_ioctl = unix_compat_ioctl,
 755 #endif
 756         .listen =       unix_listen,
 757         .shutdown =     unix_shutdown,
 758         .sendmsg =      unix_stream_sendmsg,
 759         .recvmsg =      unix_stream_recvmsg,
 760         .read_sock =    unix_stream_read_sock,
 761         .mmap =         sock_no_mmap,
 762         .sendpage =     unix_stream_sendpage,
 763         .splice_read =  unix_stream_splice_read,
 764         .set_peek_off = unix_set_peek_off,
 765         .show_fdinfo =  unix_show_fdinfo,
 766 };
 767
 768 static const struct proto_ops unix_dgram_ops = {
 769         .family =       PF_UNIX,
 770         .owner =        THIS_MODULE,
 771         .release =      unix_release,
 772         .bind =         unix_bind,
 773         .connect =      unix_dgram_connect,
 774         .socketpair =   unix_socketpair,
 775         .accept =       sock_no_accept,
 776         .getname =      unix_getname,
 777         .poll =         unix_dgram_poll,
 778         .ioctl =        unix_ioctl,
 779 #ifdef CONFIG_COMPAT
 780         .compat_ioctl = unix_compat_ioctl,
 781 #endif
 782         .listen =       sock_no_listen,
 783         .shutdown =     unix_shutdown,
 784         .sendmsg =      unix_dgram_sendmsg,
 785         .read_sock =    unix_read_sock,
 786         .recvmsg =      unix_dgram_recvmsg,
 787         .mmap =         sock_no_mmap,
 788         .sendpage =     sock_no_sendpage,
 789         .set_peek_off = unix_set_peek_off,
 790         .show_fdinfo =  unix_show_fdinfo,
 791 };
 792
 793 static const struct proto_ops unix_seqpacket_ops = {
 794         .family =       PF_UNIX,
 795         .owner =        THIS_MODULE,
 796         .release =      unix_release,
 797         .bind =         unix_bind,
 798         .connect =      unix_stream_connect,
 799         .socketpair =   unix_socketpair,
 800         .accept =       unix_accept,
 801         .getname =      unix_getname,
 802         .poll =         unix_dgram_poll,
 803         .ioctl =        unix_ioctl,
 804 #ifdef CONFIG_COMPAT
 805         .compat_ioctl = unix_compat_ioctl,
 806 #endif
 807         .listen =       unix_listen,
 808         .shutdown =     unix_shutdown,
 809         .sendmsg =      unix_seqpacket_sendmsg,
 810         .recvmsg =      unix_seqpacket_recvmsg,
 811         .mmap =         sock_no_mmap,
 812         .sendpage =     sock_no_sendpage,
 813         .set_peek_off = unix_set_peek_off,
 814         .show_fdinfo =  unix_show_fdinfo,
 815 };
 816
 817 static void unix_close(struct sock *sk, long timeout)
 818 {
 819         /* Nothing to do here, unix socket does not need a ->close().
 820          * This is merely for sockmap.
 821          */
 822 }
 823
 824 static void unix_unhash(struct sock *sk)
 825 {
 826         /* Nothing to do here, unix socket does not need a ->unhash().
 827          * This is merely for sockmap.
 828          */
 829 }
 830
 831 struct proto unix_dgram_proto = {
 832         .name                   = "UNIX",
 833         .owner                  = THIS_MODULE,
 834         .obj_size               = sizeof(struct unix_sock),
 835         .close                  = unix_close,
 836 #ifdef CONFIG_BPF_SYSCALL
 837         .psock_update_sk_prot   = unix_dgram_bpf_update_proto,
 838 #endif
 839 };
 840
 841 struct proto unix_stream_proto = {
 842         .name                   = "UNIX-STREAM",
 843         .owner                  = THIS_MODULE,
 844         .obj_size               = sizeof(struct unix_sock),
 845         .close                  = unix_close,
 846         .unhash                 = unix_unhash,
 847 #ifdef CONFIG_BPF_SYSCALL
 848         .psock_update_sk_prot   = unix_stream_bpf_update_proto,
 849 #endif
 850 };
 851
 852 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
 853 {
 854         struct unix_sock *u;
 855         struct sock *sk;
 856         int err;
 857
 858         atomic_long_inc(&unix_nr_socks);
 859         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
 860                 err = -ENFILE;
 861                 goto err;
 862         }
 863
 864         if (type == SOCK_STREAM)
 865                 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
 866         else /*dgram and  seqpacket */
 867                 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
 868
 869         if (!sk) {
 870                 err = -ENOMEM;
 871                 goto err;
 872         }
 873
 874         sock_init_data(sock, sk);
 875
 876         sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
 877         sk->sk_write_space      = unix_write_space;
 878         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 879         sk->sk_destruct         = unix_sock_destructor;
 880         u         = unix_sk(sk);
 881         u->path.dentry = NULL;
 882         u->path.mnt = NULL;
 883         spin_lock_init(&u->lock);
 884         atomic_long_set(&u->inflight, 0);
 885         INIT_LIST_HEAD(&u->link);
 886         mutex_init(&u->iolock); /* single task reading lock */
 887         mutex_init(&u->bindlock); /* single task binding lock */
 888         init_waitqueue_head(&u->peer_wait);
 889         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 890         memset(&u->scm_stat, 0, sizeof(struct scm_stat));
 891         unix_insert_socket(unix_sockets_unbound(sk), sk);
 892
 893         local_bh_disable();
 894         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 895         local_bh_enable();
 896
 897         return sk;
 898
 899 err:
 900         atomic_long_dec(&unix_nr_socks);
 901         return ERR_PTR(err);
 902 }
 903
 904 static int unix_create(struct net *net, struct socket *sock, int protocol,
 905                        int kern)
 906 {
 907         struct sock *sk;
 908
 909         if (protocol && protocol != PF_UNIX)
 910                 return -EPROTONOSUPPORT;
 911
 912         sock->state = SS_UNCONNECTED;
 913
 914         switch (sock->type) {
 915         case SOCK_STREAM:
 916                 sock->ops = &unix_stream_ops;
 917                 break;
 918                 /*
 919                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 920                  *      nothing uses it.
 921                  */
 922         case SOCK_RAW:
 923                 sock->type = SOCK_DGRAM;
 924                 fallthrough;
 925         case SOCK_DGRAM:
 926                 sock->ops = &unix_dgram_ops;
 927                 break;
 928         case SOCK_SEQPACKET:
 929                 sock->ops = &unix_seqpacket_ops;
 930                 break;
 931         default:
 932                 return -ESOCKTNOSUPPORT;
 933         }
 934
 935         sk = unix_create1(net, sock, kern, sock->type);
 936         if (IS_ERR(sk))
 937                 return PTR_ERR(sk);
 938
 939         return 0;
 940 }
 941
 942 static int unix_release(struct socket *sock)
 943 {
 944         struct sock *sk = sock->sk;
 945
 946         if (!sk)
 947                 return 0;
 948
 949         sk->sk_prot->close(sk, 0);
 950         unix_release_sock(sk, 0);
 951         sock->sk = NULL;
 952
 953         return 0;
 954 }
 955
 956 static int unix_autobind(struct socket *sock)
 957 {
 958         struct sock *sk = sock->sk;
 959         struct net *net = sock_net(sk);
 960         struct unix_sock *u = unix_sk(sk);
 961         static u32 ordernum = 1;
 962         struct unix_address *addr;
 963         int err;
 964         unsigned int retries = 0;
 965
 966         err = mutex_lock_interruptible(&u->bindlock);
 967         if (err)
 968                 return err;
 969
 970         if (u->addr)
 971                 goto out;
 972
 973         err = -ENOMEM;
 974         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 975         if (!addr)
 976                 goto out;
 977
 978         addr->name->sun_family = AF_UNIX;
 979         refcount_set(&addr->refcnt, 1);
 980
 981 retry:
 982         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 983         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 984         addr->hash ^= sk->sk_type;
 985
 986         spin_lock(&unix_table_lock);
 987         ordernum = (ordernum+1)&0xFFFFF;
 988
 989         if (__unix_find_socket_byname(net, addr->name, addr->len, addr->hash)) {
 990                 spin_unlock(&unix_table_lock);
 991                 /*
 992                  * __unix_find_socket_byname() may take long time if many names
 993                  * are already in use.
 994                  */
 995                 cond_resched();
 996                 /* Give up if all names seems to be in use. */
 997                 if (retries++ == 0xFFFFF) {
 998                         err = -ENOSPC;
 999                         kfree(addr);
1000                         goto out;
1001                 }
1002                 goto retry;
1003         }
1004
1005         __unix_set_addr(sk, addr, addr->hash);
1006         spin_unlock(&unix_table_lock);
1007         err = 0;
1008
1009 out:    mutex_unlock(&u->bindlock);
1010         return err;
1011 }
1012
1013 static struct sock *unix_find_other(struct net *net,
1014                                     struct sockaddr_un *sunname, int len,
1015                                     int type, unsigned int hash, int *error)
1016 {
1017         struct sock *u;
1018         struct path path;
1019         int err = 0;
1020
1021         if (sunname->sun_path[0]) {
1022                 struct inode *inode;
1023                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
1024                 if (err)
1025                         goto fail;
1026                 inode = d_backing_inode(path.dentry);
1027                 err = path_permission(&path, MAY_WRITE);
1028                 if (err)
1029                         goto put_fail;
1030
1031                 err = -ECONNREFUSED;
1032                 if (!S_ISSOCK(inode->i_mode))
1033                         goto put_fail;
1034                 u = unix_find_socket_byinode(inode);
1035                 if (!u)
1036                         goto put_fail;
1037
1038                 if (u->sk_type == type)
1039                         touch_atime(&path);
1040
1041                 path_put(&path);
1042
1043                 err = -EPROTOTYPE;
1044                 if (u->sk_type != type) {
1045                         sock_put(u);
1046                         goto fail;
1047                 }
1048         } else {
1049                 err = -ECONNREFUSED;
1050                 u = unix_find_socket_byname(net, sunname, len, type ^ hash);
1051                 if (u) {
1052                         struct dentry *dentry;
1053                         dentry = unix_sk(u)->path.dentry;
1054                         if (dentry)
1055                                 touch_atime(&unix_sk(u)->path);
1056                 } else
1057                         goto fail;
1058         }
1059         return u;
1060
1061 put_fail:
1062         path_put(&path);
1063 fail:
1064         *error = err;
1065         return NULL;
1066 }
1067
1068 static int unix_bind_bsd(struct sock *sk, struct unix_address *addr)
1069 {
1070         struct unix_sock *u = unix_sk(sk);
1071         umode_t mode = S_IFSOCK |
1072                (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
1073         struct user_namespace *ns; // barf...
1074         struct path parent;
1075         struct dentry *dentry;
1076         unsigned int hash;
1077         int err;
1078
1079         /*
1080          * Get the parent directory, calculate the hash for last
1081          * component.
1082          */
1083         dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
1084         if (IS_ERR(dentry))
1085                 return PTR_ERR(dentry);
1086         ns = mnt_user_ns(parent.mnt);
1087
1088         /*
1089          * All right, let's create it.
1090          */
1091         err = security_path_mknod(&parent, dentry, mode, 0);
1092         if (!err)
1093                 err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0);
1094         if (err)
1095                 goto out;
1096         err = mutex_lock_interruptible(&u->bindlock);
1097         if (err)
1098                 goto out_unlink;
1099         if (u->addr)
1100                 goto out_unlock;
1101
1102         addr->hash = UNIX_HASH_SIZE;
1103         hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1104         spin_lock(&unix_table_lock);
1105         u->path.mnt = mntget(parent.mnt);
1106         u->path.dentry = dget(dentry);
1107         __unix_set_addr(sk, addr, hash);
1108         spin_unlock(&unix_table_lock);
1109         mutex_unlock(&u->bindlock);
1110         done_path_create(&parent, dentry);
1111         return 0;
1112
1113 out_unlock:
1114         mutex_unlock(&u->bindlock);
1115         err = -EINVAL;
1116 out_unlink:
1117         /* failed after successful mknod?  unlink what we'd created... */
1118         vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL);
1119 out:
1120         done_path_create(&parent, dentry);
1121         return err;
1122 }
1123
1124 static int unix_bind_abstract(struct sock *sk, struct unix_address *addr)
1125 {
1126         struct unix_sock *u = unix_sk(sk);
1127         int err;
1128
1129         err = mutex_lock_interruptible(&u->bindlock);
1130         if (err)
1131                 return err;
1132
1133         if (u->addr) {
1134                 mutex_unlock(&u->bindlock);
1135                 return -EINVAL;
1136         }
1137
1138         spin_lock(&unix_table_lock);
1139         if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len,
1140                                       addr->hash)) {
1141                 spin_unlock(&unix_table_lock);
1142                 mutex_unlock(&u->bindlock);
1143                 return -EADDRINUSE;
1144         }
1145         __unix_set_addr(sk, addr, addr->hash);
1146         spin_unlock(&unix_table_lock);
1147         mutex_unlock(&u->bindlock);
1148         return 0;
1149 }
1150
1151 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1152 {
1153         struct sock *sk = sock->sk;
1154         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1155         char *sun_path = sunaddr->sun_path;
1156         int err;
1157         unsigned int hash;
1158         struct unix_address *addr;
1159
1160         if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1161             sunaddr->sun_family != AF_UNIX)
1162                 return -EINVAL;
1163
1164         if (addr_len == sizeof(short))
1165                 return unix_autobind(sock);
1166
1167         err = unix_mkname(sunaddr, addr_len, &hash);
1168         if (err < 0)
1169                 return err;
1170         addr_len = err;
1171         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1172         if (!addr)
1173                 return -ENOMEM;
1174
1175         memcpy(addr->name, sunaddr, addr_len);
1176         addr->len = addr_len;
1177         addr->hash = hash ^ sk->sk_type;
1178         refcount_set(&addr->refcnt, 1);
1179
1180         if (sun_path[0])
1181                 err = unix_bind_bsd(sk, addr);
1182         else
1183                 err = unix_bind_abstract(sk, addr);
1184         if (err)
1185                 unix_release_addr(addr);
1186         return err == -EEXIST ? -EADDRINUSE : err;
1187 }
1188
1189 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1190 {
1191         if (unlikely(sk1 == sk2) || !sk2) {
1192                 unix_state_lock(sk1);
1193                 return;
1194         }
1195         if (sk1 < sk2) {
1196                 unix_state_lock(sk1);
1197                 unix_state_lock_nested(sk2);
1198         } else {
1199                 unix_state_lock(sk2);
1200                 unix_state_lock_nested(sk1);
1201         }
1202 }
1203
1204 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1205 {
1206         if (unlikely(sk1 == sk2) || !sk2) {
1207                 unix_state_unlock(sk1);
1208                 return;
1209         }
1210         unix_state_unlock(sk1);
1211         unix_state_unlock(sk2);
1212 }
1213
1214 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1215                               int alen, int flags)
1216 {
1217         struct sock *sk = sock->sk;
1218         struct net *net = sock_net(sk);
1219         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1220         struct sock *other;
1221         unsigned int hash;
1222         int err;
1223
1224         err = -EINVAL;
1225         if (alen < offsetofend(struct sockaddr, sa_family))
1226                 goto out;
1227
1228         if (addr->sa_family != AF_UNSPEC) {
1229                 err = unix_mkname(sunaddr, alen, &hash);
1230                 if (err < 0)
1231                         goto out;
1232                 alen = err;
1233
1234                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1235                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1236                         goto out;
1237
1238 restart:
1239                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1240                 if (!other)
1241                         goto out;
1242
1243                 unix_state_double_lock(sk, other);
1244
1245                 /* Apparently VFS overslept socket death. Retry. */
1246                 if (sock_flag(other, SOCK_DEAD)) {
1247                         unix_state_double_unlock(sk, other);
1248                         sock_put(other);
1249                         goto restart;
1250                 }
1251
1252                 err = -EPERM;
1253                 if (!unix_may_send(sk, other))
1254                         goto out_unlock;
1255
1256                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1257                 if (err)
1258                         goto out_unlock;
1259
1260                 sk->sk_state = other->sk_state = TCP_ESTABLISHED;
1261         } else {
1262                 /*
1263                  *      1003.1g breaking connected state with AF_UNSPEC
1264                  */
1265                 other = NULL;
1266                 unix_state_double_lock(sk, other);
1267         }
1268
1269         /*
1270          * If it was connected, reconnect.
1271          */
1272         if (unix_peer(sk)) {
1273                 struct sock *old_peer = unix_peer(sk);
1274
1275                 unix_peer(sk) = other;
1276                 if (!other)
1277                         sk->sk_state = TCP_CLOSE;
1278                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1279
1280                 unix_state_double_unlock(sk, other);
1281
1282                 if (other != old_peer)
1283                         unix_dgram_disconnected(sk, old_peer);
1284                 sock_put(old_peer);
1285         } else {
1286                 unix_peer(sk) = other;
1287                 unix_state_double_unlock(sk, other);
1288         }
1289
1290         return 0;
1291
1292 out_unlock:
1293         unix_state_double_unlock(sk, other);
1294         sock_put(other);
1295 out:
1296         return err;
1297 }
1298
1299 static long unix_wait_for_peer(struct sock *other, long timeo)
1300         __releases(&unix_sk(other)->lock)
1301 {
1302         struct unix_sock *u = unix_sk(other);
1303         int sched;
1304         DEFINE_WAIT(wait);
1305
1306         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1307
1308         sched = !sock_flag(other, SOCK_DEAD) &&
1309                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1310                 unix_recvq_full(other);
1311
1312         unix_state_unlock(other);
1313
1314         if (sched)
1315                 timeo = schedule_timeout(timeo);
1316
1317         finish_wait(&u->peer_wait, &wait);
1318         return timeo;
1319 }
1320
1321 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1322                                int addr_len, int flags)
1323 {
1324         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1325         struct sock *sk = sock->sk;
1326         struct net *net = sock_net(sk);
1327         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1328         struct sock *newsk = NULL;
1329         struct sock *other = NULL;
1330         struct sk_buff *skb = NULL;
1331         unsigned int hash;
1332         int st;
1333         int err;
1334         long timeo;
1335
1336         err = unix_mkname(sunaddr, addr_len, &hash);
1337         if (err < 0)
1338                 goto out;
1339         addr_len = err;
1340
1341         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1342             (err = unix_autobind(sock)) != 0)
1343                 goto out;
1344
1345         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1346
1347         /* First of all allocate resources.
1348            If we will make it after state is locked,
1349            we will have to recheck all again in any case.
1350          */
1351
1352         /* create new sock for complete connection */
1353         newsk = unix_create1(sock_net(sk), NULL, 0, sock->type);
1354         if (IS_ERR(newsk)) {
1355                 err = PTR_ERR(newsk);
1356                 newsk = NULL;
1357                 goto out;
1358         }
1359
1360         err = -ENOMEM;
1361
1362         /* Allocate skb for sending to listening sock */
1363         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1364         if (skb == NULL)
1365                 goto out;
1366
1367 restart:
1368         /*  Find listening sock. */
1369         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1370         if (!other)
1371                 goto out;
1372
1373         /* Latch state of peer */
1374         unix_state_lock(other);
1375
1376         /* Apparently VFS overslept socket death. Retry. */
1377         if (sock_flag(other, SOCK_DEAD)) {
1378                 unix_state_unlock(other);
1379                 sock_put(other);
1380                 goto restart;
1381         }
1382
1383         err = -ECONNREFUSED;
1384         if (other->sk_state != TCP_LISTEN)
1385                 goto out_unlock;
1386         if (other->sk_shutdown & RCV_SHUTDOWN)
1387                 goto out_unlock;
1388
1389         if (unix_recvq_full(other)) {
1390                 err = -EAGAIN;
1391                 if (!timeo)
1392                         goto out_unlock;
1393
1394                 timeo = unix_wait_for_peer(other, timeo);
1395
1396                 err = sock_intr_errno(timeo);
1397                 if (signal_pending(current))
1398                         goto out;
1399                 sock_put(other);
1400                 goto restart;
1401         }
1402
1403         /* Latch our state.
1404
1405            It is tricky place. We need to grab our state lock and cannot
1406            drop lock on peer. It is dangerous because deadlock is
1407            possible. Connect to self case and simultaneous
1408            attempt to connect are eliminated by checking socket
1409            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1410            check this before attempt to grab lock.
1411
1412            Well, and we have to recheck the state after socket locked.
1413          */
1414         st = sk->sk_state;
1415
1416         switch (st) {
1417         case TCP_CLOSE:
1418                 /* This is ok... continue with connect */
1419                 break;
1420         case TCP_ESTABLISHED:
1421                 /* Socket is already connected */
1422                 err = -EISCONN;
1423                 goto out_unlock;
1424         default:
1425                 err = -EINVAL;
1426                 goto out_unlock;
1427         }
1428
1429         unix_state_lock_nested(sk);
1430
1431         if (sk->sk_state != st) {
1432                 unix_state_unlock(sk);
1433                 unix_state_unlock(other);
1434                 sock_put(other);
1435                 goto restart;
1436         }
1437
1438         err = security_unix_stream_connect(sk, other, newsk);
1439         if (err) {
1440                 unix_state_unlock(sk);
1441                 goto out_unlock;
1442         }
1443
1444         /* The way is open! Fastly set all the necessary fields... */
1445
1446         sock_hold(sk);
1447         unix_peer(newsk)        = sk;
1448         newsk->sk_state         = TCP_ESTABLISHED;
1449         newsk->sk_type          = sk->sk_type;
1450         init_peercred(newsk);
1451         newu = unix_sk(newsk);
1452         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1453         otheru = unix_sk(other);
1454
1455         /* copy address information from listening to new sock
1456          *
1457          * The contents of *(otheru->addr) and otheru->path
1458          * are seen fully set up here, since we have found
1459          * otheru in hash under unix_table_lock.  Insertion
1460          * into the hash chain we'd found it in had been done
1461          * in an earlier critical area protected by unix_table_lock,
1462          * the same one where we'd set *(otheru->addr) contents,
1463          * as well as otheru->path and otheru->addr itself.
1464          *
1465          * Using smp_store_release() here to set newu->addr
1466          * is enough to make those stores, as well as stores
1467          * to newu->path visible to anyone who gets newu->addr
1468          * by smp_load_acquire().  IOW, the same warranties
1469          * as for unix_sock instances bound in unix_bind() or
1470          * in unix_autobind().
1471          */
1472         if (otheru->path.dentry) {
1473                 path_get(&otheru->path);
1474                 newu->path = otheru->path;
1475         }
1476         refcount_inc(&otheru->addr->refcnt);
1477         smp_store_release(&newu->addr, otheru->addr);
1478
1479         /* Set credentials */
1480         copy_peercred(sk, other);
1481
1482         sock->state     = SS_CONNECTED;
1483         sk->sk_state    = TCP_ESTABLISHED;
1484         sock_hold(newsk);
1485
1486         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1487         unix_peer(sk)   = newsk;
1488
1489         unix_state_unlock(sk);
1490
1491         /* take ten and send info to listening sock */
1492         spin_lock(&other->sk_receive_queue.lock);
1493         __skb_queue_tail(&other->sk_receive_queue, skb);
1494         spin_unlock(&other->sk_receive_queue.lock);
1495         unix_state_unlock(other);
1496         other->sk_data_ready(other);
1497         sock_put(other);
1498         return 0;
1499
1500 out_unlock:
1501         if (other)
1502                 unix_state_unlock(other);
1503
1504 out:
1505         kfree_skb(skb);
1506         if (newsk)
1507                 unix_release_sock(newsk, 0);
1508         if (other)
1509                 sock_put(other);
1510         return err;
1511 }
1512
1513 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1514 {
1515         struct sock *ska = socka->sk, *skb = sockb->sk;
1516
1517         /* Join our sockets back to back */
1518         sock_hold(ska);
1519         sock_hold(skb);
1520         unix_peer(ska) = skb;
1521         unix_peer(skb) = ska;
1522         init_peercred(ska);
1523         init_peercred(skb);
1524
1525         ska->sk_state = TCP_ESTABLISHED;
1526         skb->sk_state = TCP_ESTABLISHED;
1527         socka->state  = SS_CONNECTED;
1528         sockb->state  = SS_CONNECTED;
1529         return 0;
1530 }
1531
1532 static void unix_sock_inherit_flags(const struct socket *old,
1533                                     struct socket *new)
1534 {
1535         if (test_bit(SOCK_PASSCRED, &old->flags))
1536                 set_bit(SOCK_PASSCRED, &new->flags);
1537         if (test_bit(SOCK_PASSSEC, &old->flags))
1538                 set_bit(SOCK_PASSSEC, &new->flags);
1539 }
1540
1541 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1542                        bool kern)
1543 {
1544         struct sock *sk = sock->sk;
1545         struct sock *tsk;
1546         struct sk_buff *skb;
1547         int err;
1548
1549         err = -EOPNOTSUPP;
1550         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1551                 goto out;
1552
1553         err = -EINVAL;
1554         if (sk->sk_state != TCP_LISTEN)
1555                 goto out;
1556
1557         /* If socket state is TCP_LISTEN it cannot change (for now...),
1558          * so that no locks are necessary.
1559          */
1560
1561         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1562         if (!skb) {
1563                 /* This means receive shutdown. */
1564                 if (err == 0)
1565                         err = -EINVAL;
1566                 goto out;
1567         }
1568
1569         tsk = skb->sk;
1570         skb_free_datagram(sk, skb);
1571         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1572
1573         /* attach accepted sock to socket */
1574         unix_state_lock(tsk);
1575         newsock->state = SS_CONNECTED;
1576         unix_sock_inherit_flags(sock, newsock);
1577         sock_graft(tsk, newsock);
1578         unix_state_unlock(tsk);
1579         return 0;
1580
1581 out:
1582         return err;
1583 }
1584
1585
1586 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1587 {
1588         struct sock *sk = sock->sk;
1589         struct unix_address *addr;
1590         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1591         int err = 0;
1592
1593         if (peer) {
1594                 sk = unix_peer_get(sk);
1595
1596                 err = -ENOTCONN;
1597                 if (!sk)
1598                         goto out;
1599                 err = 0;
1600         } else {
1601                 sock_hold(sk);
1602         }
1603
1604         addr = smp_load_acquire(&unix_sk(sk)->addr);
1605         if (!addr) {
1606                 sunaddr->sun_family = AF_UNIX;
1607                 sunaddr->sun_path[0] = 0;
1608                 err = sizeof(short);
1609         } else {
1610                 err = addr->len;
1611                 memcpy(sunaddr, addr->name, addr->len);
1612         }
1613         sock_put(sk);
1614 out:
1615         return err;
1616 }
1617
1618 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1619 {
1620         scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1621
1622         /*
1623          * Garbage collection of unix sockets starts by selecting a set of
1624          * candidate sockets which have reference only from being in flight
1625          * (total_refs == inflight_refs).  This condition is checked once during
1626          * the candidate collection phase, and candidates are marked as such, so
1627          * that non-candidates can later be ignored.  While inflight_refs is
1628          * protected by unix_gc_lock, total_refs (file count) is not, hence this
1629          * is an instantaneous decision.
1630          *
1631          * Once a candidate, however, the socket must not be reinstalled into a
1632          * file descriptor while the garbage collection is in progress.
1633          *
1634          * If the above conditions are met, then the directed graph of
1635          * candidates (*) does not change while unix_gc_lock is held.
1636          *
1637          * Any operations that changes the file count through file descriptors
1638          * (dup, close, sendmsg) does not change the graph since candidates are
1639          * not installed in fds.
1640          *
1641          * Dequeing a candidate via recvmsg would install it into an fd, but
1642          * that takes unix_gc_lock to decrement the inflight count, so it's
1643          * serialized with garbage collection.
1644          *
1645          * MSG_PEEK is special in that it does not change the inflight count,
1646          * yet does install the socket into an fd.  The following lock/unlock
1647          * pair is to ensure serialization with garbage collection.  It must be
1648          * done between incrementing the file count and installing the file into
1649          * an fd.
1650          *
1651          * If garbage collection starts after the barrier provided by the
1652          * lock/unlock, then it will see the elevated refcount and not mark this
1653          * as a candidate.  If a garbage collection is already in progress
1654          * before the file count was incremented, then the lock/unlock pair will
1655          * ensure that garbage collection is finished before progressing to
1656          * installing the fd.
1657          *
1658          * (*) A -> B where B is on the queue of A or B is on the queue of C
1659          * which is on the queue of listening socket A.
1660          */
1661         spin_lock(&unix_gc_lock);
1662         spin_unlock(&unix_gc_lock);
1663 }
1664
1665 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1666 {
1667         int err = 0;
1668
1669         UNIXCB(skb).pid  = get_pid(scm->pid);
1670         UNIXCB(skb).uid = scm->creds.uid;
1671         UNIXCB(skb).gid = scm->creds.gid;
1672         UNIXCB(skb).fp = NULL;
1673         unix_get_secdata(scm, skb);
1674         if (scm->fp && send_fds)
1675                 err = unix_attach_fds(scm, skb);
1676
1677         skb->destructor = unix_destruct_scm;
1678         return err;
1679 }
1680
1681 static bool unix_passcred_enabled(const struct socket *sock,
1682                                   const struct sock *other)
1683 {
1684         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1685                !other->sk_socket ||
1686                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1687 }
1688
1689 /*
1690  * Some apps rely on write() giving SCM_CREDENTIALS
1691  * We include credentials if source or destination socket
1692  * asserted SOCK_PASSCRED.
1693  */
1694 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1695                             const struct sock *other)
1696 {
1697         if (UNIXCB(skb).pid)
1698                 return;
1699         if (unix_passcred_enabled(sock, other)) {
1700                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1701                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1702         }
1703 }
1704
1705 static int maybe_init_creds(struct scm_cookie *scm,
1706                             struct socket *socket,
1707                             const struct sock *other)
1708 {
1709         int err;
1710         struct msghdr msg = { .msg_controllen = 0 };
1711
1712         err = scm_send(socket, &msg, scm, false);
1713         if (err)
1714                 return err;
1715
1716         if (unix_passcred_enabled(socket, other)) {
1717                 scm->pid = get_pid(task_tgid(current));
1718                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1719         }
1720         return err;
1721 }
1722
1723 static bool unix_skb_scm_eq(struct sk_buff *skb,
1724                             struct scm_cookie *scm)
1725 {
1726         const struct unix_skb_parms *u = &UNIXCB(skb);
1727
1728         return u->pid == scm->pid &&
1729                uid_eq(u->uid, scm->creds.uid) &&
1730                gid_eq(u->gid, scm->creds.gid) &&
1731                unix_secdata_eq(scm, skb);
1732 }
1733
1734 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1735 {
1736         struct scm_fp_list *fp = UNIXCB(skb).fp;
1737         struct unix_sock *u = unix_sk(sk);
1738
1739         if (unlikely(fp && fp->count))
1740                 atomic_add(fp->count, &u->scm_stat.nr_fds);
1741 }
1742
1743 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1744 {
1745         struct scm_fp_list *fp = UNIXCB(skb).fp;
1746         struct unix_sock *u = unix_sk(sk);
1747
1748         if (unlikely(fp && fp->count))
1749                 atomic_sub(fp->count, &u->scm_stat.nr_fds);
1750 }
1751
1752 /*
1753  *      Send AF_UNIX data.
1754  */
1755
1756 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1757                               size_t len)
1758 {
1759         struct sock *sk = sock->sk;
1760         struct net *net = sock_net(sk);
1761         struct unix_sock *u = unix_sk(sk);
1762         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1763         struct sock *other = NULL;
1764         int namelen = 0; /* fake GCC */
1765         int err;
1766         unsigned int hash;
1767         struct sk_buff *skb;
1768         long timeo;
1769         struct scm_cookie scm;
1770         int data_len = 0;
1771         int sk_locked;
1772
1773         wait_for_unix_gc();
1774         err = scm_send(sock, msg, &scm, false);
1775         if (err < 0)
1776                 return err;
1777
1778         err = -EOPNOTSUPP;
1779         if (msg->msg_flags&MSG_OOB)
1780                 goto out;
1781
1782         if (msg->msg_namelen) {
1783                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1784                 if (err < 0)
1785                         goto out;
1786                 namelen = err;
1787         } else {
1788                 sunaddr = NULL;
1789                 err = -ENOTCONN;
1790                 other = unix_peer_get(sk);
1791                 if (!other)
1792                         goto out;
1793         }
1794
1795         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1796             && (err = unix_autobind(sock)) != 0)
1797                 goto out;
1798
1799         err = -EMSGSIZE;
1800         if (len > sk->sk_sndbuf - 32)
1801                 goto out;
1802
1803         if (len > SKB_MAX_ALLOC) {
1804                 data_len = min_t(size_t,
1805                                  len - SKB_MAX_ALLOC,
1806                                  MAX_SKB_FRAGS * PAGE_SIZE);
1807                 data_len = PAGE_ALIGN(data_len);
1808
1809                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1810         }
1811
1812         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1813                                    msg->msg_flags & MSG_DONTWAIT, &err,
1814                                    PAGE_ALLOC_COSTLY_ORDER);
1815         if (skb == NULL)
1816                 goto out;
1817
1818         err = unix_scm_to_skb(&scm, skb, true);
1819         if (err < 0)
1820                 goto out_free;
1821
1822         skb_put(skb, len - data_len);
1823         skb->data_len = data_len;
1824         skb->len = len;
1825         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1826         if (err)
1827                 goto out_free;
1828
1829         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1830
1831 restart:
1832         if (!other) {
1833                 err = -ECONNRESET;
1834                 if (sunaddr == NULL)
1835                         goto out_free;
1836
1837                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1838                                         hash, &err);
1839                 if (other == NULL)
1840                         goto out_free;
1841         }
1842
1843         if (sk_filter(other, skb) < 0) {
1844                 /* Toss the packet but do not return any error to the sender */
1845                 err = len;
1846                 goto out_free;
1847         }
1848
1849         sk_locked = 0;
1850         unix_state_lock(other);
1851 restart_locked:
1852         err = -EPERM;
1853         if (!unix_may_send(sk, other))
1854                 goto out_unlock;
1855
1856         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1857                 /*
1858                  *      Check with 1003.1g - what should
1859                  *      datagram error
1860                  */
1861                 unix_state_unlock(other);
1862                 sock_put(other);
1863
1864                 if (!sk_locked)
1865                         unix_state_lock(sk);
1866
1867                 err = 0;
1868                 if (sk->sk_type == SOCK_SEQPACKET) {
1869                         /* We are here only when racing with unix_release_sock()
1870                          * is clearing @other. Never change state to TCP_CLOSE
1871                          * unlike SOCK_DGRAM wants.
1872                          */
1873                         unix_state_unlock(sk);
1874                         err = -EPIPE;
1875                 } else if (unix_peer(sk) == other) {
1876                         unix_peer(sk) = NULL;
1877                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1878
1879                         sk->sk_state = TCP_CLOSE;
1880                         unix_state_unlock(sk);
1881
1882                         unix_dgram_disconnected(sk, other);
1883                         sock_put(other);
1884                         err = -ECONNREFUSED;
1885                 } else {
1886                         unix_state_unlock(sk);
1887                 }
1888
1889                 other = NULL;
1890                 if (err)
1891                         goto out_free;
1892                 goto restart;
1893         }
1894
1895         err = -EPIPE;
1896         if (other->sk_shutdown & RCV_SHUTDOWN)
1897                 goto out_unlock;
1898
1899         if (sk->sk_type != SOCK_SEQPACKET) {
1900                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1901                 if (err)
1902                         goto out_unlock;
1903         }
1904
1905         /* other == sk && unix_peer(other) != sk if
1906          * - unix_peer(sk) == NULL, destination address bound to sk
1907          * - unix_peer(sk) == sk by time of get but disconnected before lock
1908          */
1909         if (other != sk &&
1910             unlikely(unix_peer(other) != sk &&
1911             unix_recvq_full_lockless(other))) {
1912                 if (timeo) {
1913                         timeo = unix_wait_for_peer(other, timeo);
1914
1915                         err = sock_intr_errno(timeo);
1916                         if (signal_pending(current))
1917                                 goto out_free;
1918
1919                         goto restart;
1920                 }
1921
1922                 if (!sk_locked) {
1923                         unix_state_unlock(other);
1924                         unix_state_double_lock(sk, other);
1925                 }
1926
1927                 if (unix_peer(sk) != other ||
1928                     unix_dgram_peer_wake_me(sk, other)) {
1929                         err = -EAGAIN;
1930                         sk_locked = 1;
1931                         goto out_unlock;
1932                 }
1933
1934                 if (!sk_locked) {
1935                         sk_locked = 1;
1936                         goto restart_locked;
1937                 }
1938         }
1939
1940         if (unlikely(sk_locked))
1941                 unix_state_unlock(sk);
1942
1943         if (sock_flag(other, SOCK_RCVTSTAMP))
1944                 __net_timestamp(skb);
1945         maybe_add_creds(skb, sock, other);
1946         scm_stat_add(other, skb);
1947         skb_queue_tail(&other->sk_receive_queue, skb);
1948         unix_state_unlock(other);
1949         other->sk_data_ready(other);
1950         sock_put(other);
1951         scm_destroy(&scm);
1952         return len;
1953
1954 out_unlock:
1955         if (sk_locked)
1956                 unix_state_unlock(sk);
1957         unix_state_unlock(other);
1958 out_free:
1959         kfree_skb(skb);
1960 out:
1961         if (other)
1962                 sock_put(other);
1963         scm_destroy(&scm);
1964         return err;
1965 }
1966
1967 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1968  * bytes, and a minimum of a full page.
1969  */
1970 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1971
1972 #if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
1973 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other)
1974 {
1975         struct unix_sock *ousk = unix_sk(other);
1976         struct sk_buff *skb;
1977         int err = 0;
1978
1979         skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
1980
1981         if (!skb)
1982                 return err;
1983
1984         skb_put(skb, 1);
1985         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
1986
1987         if (err) {
1988                 kfree_skb(skb);
1989                 return err;
1990         }
1991
1992         unix_state_lock(other);
1993
1994         if (sock_flag(other, SOCK_DEAD) ||
1995             (other->sk_shutdown & RCV_SHUTDOWN)) {
1996                 unix_state_unlock(other);
1997                 kfree_skb(skb);
1998                 return -EPIPE;
1999         }
2000
2001         maybe_add_creds(skb, sock, other);
2002         skb_get(skb);
2003
2004         if (ousk->oob_skb)
2005                 consume_skb(ousk->oob_skb);
2006
2007         WRITE_ONCE(ousk->oob_skb, skb);
2008
2009         scm_stat_add(other, skb);
2010         skb_queue_tail(&other->sk_receive_queue, skb);
2011         sk_send_sigurg(other);
2012         unix_state_unlock(other);
2013         other->sk_data_ready(other);
2014
2015         return err;
2016 }
2017 #endif
2018
2019 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
2020                                size_t len)
2021 {
2022         struct sock *sk = sock->sk;
2023         struct sock *other = NULL;
2024         int err, size;
2025         struct sk_buff *skb;
2026         int sent = 0;
2027         struct scm_cookie scm;
2028         bool fds_sent = false;
2029         int data_len;
2030
2031         wait_for_unix_gc();
2032         err = scm_send(sock, msg, &scm, false);
2033         if (err < 0)
2034                 return err;
2035
2036         err = -EOPNOTSUPP;
2037         if (msg->msg_flags & MSG_OOB) {
2038 #if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
2039                 if (len)
2040                         len--;
2041                 else
2042 #endif
2043                         goto out_err;
2044         }
2045
2046         if (msg->msg_namelen) {
2047                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
2048                 goto out_err;
2049         } else {
2050                 err = -ENOTCONN;
2051                 other = unix_peer(sk);
2052                 if (!other)
2053                         goto out_err;
2054         }
2055
2056         if (sk->sk_shutdown & SEND_SHUTDOWN)
2057                 goto pipe_err;
2058
2059         while (sent < len) {
2060                 size = len - sent;
2061
2062                 /* Keep two messages in the pipe so it schedules better */
2063                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
2064
2065                 /* allow fallback to order-0 allocations */
2066                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
2067
2068                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
2069
2070                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
2071
2072                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
2073                                            msg->msg_flags & MSG_DONTWAIT, &err,
2074                                            get_order(UNIX_SKB_FRAGS_SZ));
2075                 if (!skb)
2076                         goto out_err;
2077
2078                 /* Only send the fds in the first buffer */
2079                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
2080                 if (err < 0) {
2081                         kfree_skb(skb);
2082                         goto out_err;
2083                 }
2084                 fds_sent = true;
2085
2086                 skb_put(skb, size - data_len);
2087                 skb->data_len = data_len;
2088                 skb->len = size;
2089                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
2090                 if (err) {
2091                         kfree_skb(skb);
2092                         goto out_err;
2093                 }
2094
2095                 unix_state_lock(other);
2096
2097                 if (sock_flag(other, SOCK_DEAD) ||
2098                     (other->sk_shutdown & RCV_SHUTDOWN))
2099                         goto pipe_err_free;
2100
2101                 maybe_add_creds(skb, sock, other);
2102                 scm_stat_add(other, skb);
2103                 skb_queue_tail(&other->sk_receive_queue, skb);
2104                 unix_state_unlock(other);
2105                 other->sk_data_ready(other);
2106                 sent += size;
2107         }
2108
2109 #if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
2110         if (msg->msg_flags & MSG_OOB) {
2111                 err = queue_oob(sock, msg, other);
2112                 if (err)
2113                         goto out_err;
2114                 sent++;
2115         }
2116 #endif
2117
2118         scm_destroy(&scm);
2119
2120         return sent;
2121
2122 pipe_err_free:
2123         unix_state_unlock(other);
2124         kfree_skb(skb);
2125 pipe_err:
2126         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
2127                 send_sig(SIGPIPE, current, 0);
2128         err = -EPIPE;
2129 out_err:
2130         scm_destroy(&scm);
2131         return sent ? : err;
2132 }
2133
2134 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
2135                                     int offset, size_t size, int flags)
2136 {
2137         int err;
2138         bool send_sigpipe = false;
2139         bool init_scm = true;
2140         struct scm_cookie scm;
2141         struct sock *other, *sk = socket->sk;
2142         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
2143
2144         if (flags & MSG_OOB)
2145                 return -EOPNOTSUPP;
2146
2147         other = unix_peer(sk);
2148         if (!other || sk->sk_state != TCP_ESTABLISHED)
2149                 return -ENOTCONN;
2150
2151         if (false) {
2152 alloc_skb:
2153                 unix_state_unlock(other);
2154                 mutex_unlock(&unix_sk(other)->iolock);
2155                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
2156                                               &err, 0);
2157                 if (!newskb)
2158                         goto err;
2159         }
2160
2161         /* we must acquire iolock as we modify already present
2162          * skbs in the sk_receive_queue and mess with skb->len
2163          */
2164         err = mutex_lock_interruptible(&unix_sk(other)->iolock);
2165         if (err) {
2166                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
2167                 goto err;
2168         }
2169
2170         if (sk->sk_shutdown & SEND_SHUTDOWN) {
2171                 err = -EPIPE;
2172                 send_sigpipe = true;
2173                 goto err_unlock;
2174         }
2175
2176         unix_state_lock(other);
2177
2178         if (sock_flag(other, SOCK_DEAD) ||
2179             other->sk_shutdown & RCV_SHUTDOWN) {
2180                 err = -EPIPE;
2181                 send_sigpipe = true;
2182                 goto err_state_unlock;
2183         }
2184
2185         if (init_scm) {
2186                 err = maybe_init_creds(&scm, socket, other);
2187                 if (err)
2188                         goto err_state_unlock;
2189                 init_scm = false;
2190         }
2191
2192         skb = skb_peek_tail(&other->sk_receive_queue);
2193         if (tail && tail == skb) {
2194                 skb = newskb;
2195         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2196                 if (newskb) {
2197                         skb = newskb;
2198                 } else {
2199                         tail = skb;
2200                         goto alloc_skb;
2201                 }
2202         } else if (newskb) {
2203                 /* this is fast path, we don't necessarily need to
2204                  * call to kfree_skb even though with newskb == NULL
2205                  * this - does no harm
2206                  */
2207                 consume_skb(newskb);
2208                 newskb = NULL;
2209         }
2210
2211         if (skb_append_pagefrags(skb, page, offset, size)) {
2212                 tail = skb;
2213                 goto alloc_skb;
2214         }
2215
2216         skb->len += size;
2217         skb->data_len += size;
2218         skb->truesize += size;
2219         refcount_add(size, &sk->sk_wmem_alloc);
2220
2221         if (newskb) {
2222                 err = unix_scm_to_skb(&scm, skb, false);
2223                 if (err)
2224                         goto err_state_unlock;
2225                 spin_lock(&other->sk_receive_queue.lock);
2226                 __skb_queue_tail(&other->sk_receive_queue, newskb);
2227                 spin_unlock(&other->sk_receive_queue.lock);
2228         }
2229
2230         unix_state_unlock(other);
2231         mutex_unlock(&unix_sk(other)->iolock);
2232
2233         other->sk_data_ready(other);
2234         scm_destroy(&scm);
2235         return size;
2236
2237 err_state_unlock:
2238         unix_state_unlock(other);
2239 err_unlock:
2240         mutex_unlock(&unix_sk(other)->iolock);
2241 err:
2242         kfree_skb(newskb);
2243         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2244                 send_sig(SIGPIPE, current, 0);
2245         if (!init_scm)
2246                 scm_destroy(&scm);
2247         return err;
2248 }
2249
2250 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2251                                   size_t len)
2252 {
2253         int err;
2254         struct sock *sk = sock->sk;
2255
2256         err = sock_error(sk);
2257         if (err)
2258                 return err;
2259
2260         if (sk->sk_state != TCP_ESTABLISHED)
2261                 return -ENOTCONN;
2262
2263         if (msg->msg_namelen)
2264                 msg->msg_namelen = 0;
2265
2266         return unix_dgram_sendmsg(sock, msg, len);
2267 }
2268
2269 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2270                                   size_t size, int flags)
2271 {
2272         struct sock *sk = sock->sk;
2273
2274         if (sk->sk_state != TCP_ESTABLISHED)
2275                 return -ENOTCONN;
2276
2277         return unix_dgram_recvmsg(sock, msg, size, flags);
2278 }
2279
2280 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2281 {
2282         struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2283
2284         if (addr) {
2285                 msg->msg_namelen = addr->len;
2286                 memcpy(msg->msg_name, addr->name, addr->len);
2287         }
2288 }
2289
2290 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2291                          int flags)
2292 {
2293         struct scm_cookie scm;
2294         struct socket *sock = sk->sk_socket;
2295         struct unix_sock *u = unix_sk(sk);
2296         struct sk_buff *skb, *last;
2297         long timeo;
2298         int skip;
2299         int err;
2300
2301         err = -EOPNOTSUPP;
2302         if (flags&MSG_OOB)
2303                 goto out;
2304
2305         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2306
2307         do {
2308                 mutex_lock(&u->iolock);
2309
2310                 skip = sk_peek_offset(sk, flags);
2311                 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2312                                               &skip, &err, &last);
2313                 if (skb) {
2314                         if (!(flags & MSG_PEEK))
2315                                 scm_stat_del(sk, skb);
2316                         break;
2317                 }
2318
2319                 mutex_unlock(&u->iolock);
2320
2321                 if (err != -EAGAIN)
2322                         break;
2323         } while (timeo &&
2324                  !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2325                                               &err, &timeo, last));
2326
2327         if (!skb) { /* implies iolock unlocked */
2328                 unix_state_lock(sk);
2329                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2330                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2331                     (sk->sk_shutdown & RCV_SHUTDOWN))
2332                         err = 0;
2333                 unix_state_unlock(sk);
2334                 goto out;
2335         }
2336
2337         if (wq_has_sleeper(&u->peer_wait))
2338                 wake_up_interruptible_sync_poll(&u->peer_wait,
2339                                                 EPOLLOUT | EPOLLWRNORM |
2340                                                 EPOLLWRBAND);
2341
2342         if (msg->msg_name)
2343                 unix_copy_addr(msg, skb->sk);
2344
2345         if (size > skb->len - skip)
2346                 size = skb->len - skip;
2347         else if (size < skb->len - skip)
2348                 msg->msg_flags |= MSG_TRUNC;
2349
2350         err = skb_copy_datagram_msg(skb, skip, msg, size);
2351         if (err)
2352                 goto out_free;
2353
2354         if (sock_flag(sk, SOCK_RCVTSTAMP))
2355                 __sock_recv_timestamp(msg, sk, skb);
2356
2357         memset(&scm, 0, sizeof(scm));
2358
2359         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2360         unix_set_secdata(&scm, skb);
2361
2362         if (!(flags & MSG_PEEK)) {
2363                 if (UNIXCB(skb).fp)
2364                         unix_detach_fds(&scm, skb);
2365
2366                 sk_peek_offset_bwd(sk, skb->len);
2367         } else {
2368                 /* It is questionable: on PEEK we could:
2369                    - do not return fds - good, but too simple 8)
2370                    - return fds, and do not return them on read (old strategy,
2371                      apparently wrong)
2372                    - clone fds (I chose it for now, it is the most universal
2373                      solution)
2374
2375                    POSIX 1003.1g does not actually define this clearly
2376                    at all. POSIX 1003.1g doesn't define a lot of things
2377                    clearly however!
2378
2379                 */
2380
2381                 sk_peek_offset_fwd(sk, size);
2382
2383                 if (UNIXCB(skb).fp)
2384                         unix_peek_fds(&scm, skb);
2385         }
2386         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2387
2388         scm_recv(sock, msg, &scm, flags);
2389
2390 out_free:
2391         skb_free_datagram(sk, skb);
2392         mutex_unlock(&u->iolock);
2393 out:
2394         return err;
2395 }
2396
2397 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2398                               int flags)
2399 {
2400         struct sock *sk = sock->sk;
2401
2402 #ifdef CONFIG_BPF_SYSCALL
2403         const struct proto *prot = READ_ONCE(sk->sk_prot);
2404
2405         if (prot != &unix_dgram_proto)
2406                 return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
2407                                             flags & ~MSG_DONTWAIT, NULL);
2408 #endif
2409         return __unix_dgram_recvmsg(sk, msg, size, flags);
2410 }
2411
2412 static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
2413                           sk_read_actor_t recv_actor)
2414 {
2415         int copied = 0;
2416
2417         while (1) {
2418                 struct unix_sock *u = unix_sk(sk);
2419                 struct sk_buff *skb;
2420                 int used, err;
2421
2422                 mutex_lock(&u->iolock);
2423                 skb = skb_recv_datagram(sk, 0, 1, &err);
2424                 mutex_unlock(&u->iolock);
2425                 if (!skb)
2426                         return err;
2427
2428                 used = recv_actor(desc, skb, 0, skb->len);
2429                 if (used <= 0) {
2430                         if (!copied)
2431                                 copied = used;
2432                         kfree_skb(skb);
2433                         break;
2434                 } else if (used <= skb->len) {
2435                         copied += used;
2436                 }
2437
2438                 kfree_skb(skb);
2439                 if (!desc->count)
2440                         break;
2441         }
2442
2443         return copied;
2444 }
2445
2446 /*
2447  *      Sleep until more data has arrived. But check for races..
2448  */
2449 static long unix_stream_data_wait(struct sock *sk, long timeo,
2450                                   struct sk_buff *last, unsigned int last_len,
2451                                   bool freezable)
2452 {
2453         struct sk_buff *tail;
2454         DEFINE_WAIT(wait);
2455
2456         unix_state_lock(sk);
2457
2458         for (;;) {
2459                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2460
2461                 tail = skb_peek_tail(&sk->sk_receive_queue);
2462                 if (tail != last ||
2463                     (tail && tail->len != last_len) ||
2464                     sk->sk_err ||
2465                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2466                     signal_pending(current) ||
2467                     !timeo)
2468                         break;
2469
2470                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2471                 unix_state_unlock(sk);
2472                 if (freezable)
2473                         timeo = freezable_schedule_timeout(timeo);
2474                 else
2475                         timeo = schedule_timeout(timeo);
2476                 unix_state_lock(sk);
2477
2478                 if (sock_flag(sk, SOCK_DEAD))
2479                         break;
2480
2481                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2482         }
2483
2484         finish_wait(sk_sleep(sk), &wait);
2485         unix_state_unlock(sk);
2486         return timeo;
2487 }
2488
2489 static unsigned int unix_skb_len(const struct sk_buff *skb)
2490 {
2491         return skb->len - UNIXCB(skb).consumed;
2492 }
2493
2494 struct unix_stream_read_state {
2495         int (*recv_actor)(struct sk_buff *, int, int,
2496                           struct unix_stream_read_state *);
2497         struct socket *socket;
2498         struct msghdr *msg;
2499         struct pipe_inode_info *pipe;
2500         size_t size;
2501         int flags;
2502         unsigned int splice_flags;
2503 };
2504
2505 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2506 static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2507 {
2508         struct socket *sock = state->socket;
2509         struct sock *sk = sock->sk;
2510         struct unix_sock *u = unix_sk(sk);
2511         int chunk = 1;
2512         struct sk_buff *oob_skb;
2513
2514         mutex_lock(&u->iolock);
2515         unix_state_lock(sk);
2516
2517         if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2518                 unix_state_unlock(sk);
2519                 mutex_unlock(&u->iolock);
2520                 return -EINVAL;
2521         }
2522
2523         oob_skb = u->oob_skb;
2524
2525         if (!(state->flags & MSG_PEEK))
2526                 WRITE_ONCE(u->oob_skb, NULL);
2527
2528         unix_state_unlock(sk);
2529
2530         chunk = state->recv_actor(oob_skb, 0, chunk, state);
2531
2532         if (!(state->flags & MSG_PEEK)) {
2533                 UNIXCB(oob_skb).consumed += 1;
2534                 kfree_skb(oob_skb);
2535         }
2536
2537         mutex_unlock(&u->iolock);
2538
2539         if (chunk < 0)
2540                 return -EFAULT;
2541
2542         state->msg->msg_flags |= MSG_OOB;
2543         return 1;
2544 }
2545
2546 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2547                                   int flags, int copied)
2548 {
2549         struct unix_sock *u = unix_sk(sk);
2550
2551         if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
2552                 skb_unlink(skb, &sk->sk_receive_queue);
2553                 consume_skb(skb);
2554                 skb = NULL;
2555         } else {
2556                 if (skb == u->oob_skb) {
2557                         if (copied) {
2558                                 skb = NULL;
2559                         } else if (sock_flag(sk, SOCK_URGINLINE)) {
2560                                 if (!(flags & MSG_PEEK)) {
2561                                         WRITE_ONCE(u->oob_skb, NULL);
2562                                         consume_skb(skb);
2563                                 }
2564                         } else if (!(flags & MSG_PEEK)) {
2565                                 skb_unlink(skb, &sk->sk_receive_queue);
2566                                 consume_skb(skb);
2567                                 skb = skb_peek(&sk->sk_receive_queue);
2568                         }
2569                 }
2570         }
2571         return skb;
2572 }
2573 #endif
2574
2575 static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
2576                                  sk_read_actor_t recv_actor)
2577 {
2578         if (unlikely(sk->sk_state != TCP_ESTABLISHED))
2579                 return -ENOTCONN;
2580
2581         return unix_read_sock(sk, desc, recv_actor);
2582 }
2583
2584 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2585                                     bool freezable)
2586 {
2587         struct scm_cookie scm;
2588         struct socket *sock = state->socket;
2589         struct sock *sk = sock->sk;
2590         struct unix_sock *u = unix_sk(sk);
2591         int copied = 0;
2592         int flags = state->flags;
2593         int noblock = flags & MSG_DONTWAIT;
2594         bool check_creds = false;
2595         int target;
2596         int err = 0;
2597         long timeo;
2598         int skip;
2599         size_t size = state->size;
2600         unsigned int last_len;
2601
2602         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2603                 err = -EINVAL;
2604                 goto out;
2605         }
2606
2607         if (unlikely(flags & MSG_OOB)) {
2608                 err = -EOPNOTSUPP;
2609 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2610                 err = unix_stream_recv_urg(state);
2611 #endif
2612                 goto out;
2613         }
2614
2615         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2616         timeo = sock_rcvtimeo(sk, noblock);
2617
2618         memset(&scm, 0, sizeof(scm));
2619
2620         /* Lock the socket to prevent queue disordering
2621          * while sleeps in memcpy_tomsg
2622          */
2623         mutex_lock(&u->iolock);
2624
2625         skip = max(sk_peek_offset(sk, flags), 0);
2626
2627         do {
2628                 int chunk;
2629                 bool drop_skb;
2630                 struct sk_buff *skb, *last;
2631
2632 redo:
2633                 unix_state_lock(sk);
2634                 if (sock_flag(sk, SOCK_DEAD)) {
2635                         err = -ECONNRESET;
2636                         goto unlock;
2637                 }
2638                 last = skb = skb_peek(&sk->sk_receive_queue);
2639                 last_len = last ? last->len : 0;
2640
2641 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2642                 if (skb) {
2643                         skb = manage_oob(skb, sk, flags, copied);
2644                         if (!skb) {
2645                                 unix_state_unlock(sk);
2646                                 if (copied)
2647                                         break;
2648                                 goto redo;
2649                         }
2650                 }
2651 #endif
2652 again:
2653                 if (skb == NULL) {
2654                         if (copied >= target)
2655                                 goto unlock;
2656
2657                         /*
2658                          *      POSIX 1003.1g mandates this order.
2659                          */
2660
2661                         err = sock_error(sk);
2662                         if (err)
2663                                 goto unlock;
2664                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2665                                 goto unlock;
2666
2667                         unix_state_unlock(sk);
2668                         if (!timeo) {
2669                                 err = -EAGAIN;
2670                                 break;
2671                         }
2672
2673                         mutex_unlock(&u->iolock);
2674
2675                         timeo = unix_stream_data_wait(sk, timeo, last,
2676                                                       last_len, freezable);
2677
2678                         if (signal_pending(current)) {
2679                                 err = sock_intr_errno(timeo);
2680                                 scm_destroy(&scm);
2681                                 goto out;
2682                         }
2683
2684                         mutex_lock(&u->iolock);
2685                         goto redo;
2686 unlock:
2687                         unix_state_unlock(sk);
2688                         break;
2689                 }
2690
2691                 while (skip >= unix_skb_len(skb)) {
2692                         skip -= unix_skb_len(skb);
2693                         last = skb;
2694                         last_len = skb->len;
2695                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2696                         if (!skb)
2697                                 goto again;
2698                 }
2699
2700                 unix_state_unlock(sk);
2701
2702                 if (check_creds) {
2703                         /* Never glue messages from different writers */
2704                         if (!unix_skb_scm_eq(skb, &scm))
2705                                 break;
2706                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2707                         /* Copy credentials */
2708                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2709                         unix_set_secdata(&scm, skb);
2710                         check_creds = true;
2711                 }
2712
2713                 /* Copy address just once */
2714                 if (state->msg && state->msg->msg_name) {
2715                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2716                                          state->msg->msg_name);
2717                         unix_copy_addr(state->msg, skb->sk);
2718                         sunaddr = NULL;
2719                 }
2720
2721                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2722                 skb_get(skb);
2723                 chunk = state->recv_actor(skb, skip, chunk, state);
2724                 drop_skb = !unix_skb_len(skb);
2725                 /* skb is only safe to use if !drop_skb */
2726                 consume_skb(skb);
2727                 if (chunk < 0) {
2728                         if (copied == 0)
2729                                 copied = -EFAULT;
2730                         break;
2731                 }
2732                 copied += chunk;
2733                 size -= chunk;
2734
2735                 if (drop_skb) {
2736                         /* the skb was touched by a concurrent reader;
2737                          * we should not expect anything from this skb
2738                          * anymore and assume it invalid - we can be
2739                          * sure it was dropped from the socket queue
2740                          *
2741                          * let's report a short read
2742                          */
2743                         err = 0;
2744                         break;
2745                 }
2746
2747                 /* Mark read part of skb as used */
2748                 if (!(flags & MSG_PEEK)) {
2749                         UNIXCB(skb).consumed += chunk;
2750
2751                         sk_peek_offset_bwd(sk, chunk);
2752
2753                         if (UNIXCB(skb).fp) {
2754                                 scm_stat_del(sk, skb);
2755                                 unix_detach_fds(&scm, skb);
2756                         }
2757
2758                         if (unix_skb_len(skb))
2759                                 break;
2760
2761                         skb_unlink(skb, &sk->sk_receive_queue);
2762                         consume_skb(skb);
2763
2764                         if (scm.fp)
2765                                 break;
2766                 } else {
2767                         /* It is questionable, see note in unix_dgram_recvmsg.
2768                          */
2769                         if (UNIXCB(skb).fp)
2770                                 unix_peek_fds(&scm, skb);
2771
2772                         sk_peek_offset_fwd(sk, chunk);
2773
2774                         if (UNIXCB(skb).fp)
2775                                 break;
2776
2777                         skip = 0;
2778                         last = skb;
2779                         last_len = skb->len;
2780                         unix_state_lock(sk);
2781                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2782                         if (skb)
2783                                 goto again;
2784                         unix_state_unlock(sk);
2785                         break;
2786                 }
2787         } while (size);
2788
2789         mutex_unlock(&u->iolock);
2790         if (state->msg)
2791                 scm_recv(sock, state->msg, &scm, flags);
2792         else
2793                 scm_destroy(&scm);
2794 out:
2795         return copied ? : err;
2796 }
2797
2798 static int unix_stream_read_actor(struct sk_buff *skb,
2799                                   int skip, int chunk,
2800                                   struct unix_stream_read_state *state)
2801 {
2802         int ret;
2803
2804         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2805                                     state->msg, chunk);
2806         return ret ?: chunk;
2807 }
2808
2809 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
2810                           size_t size, int flags)
2811 {
2812         struct unix_stream_read_state state = {
2813                 .recv_actor = unix_stream_read_actor,
2814                 .socket = sk->sk_socket,
2815                 .msg = msg,
2816                 .size = size,
2817                 .flags = flags
2818         };
2819
2820         return unix_stream_read_generic(&state, true);
2821 }
2822
2823 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2824                                size_t size, int flags)
2825 {
2826         struct unix_stream_read_state state = {
2827                 .recv_actor = unix_stream_read_actor,
2828                 .socket = sock,
2829                 .msg = msg,
2830                 .size = size,
2831                 .flags = flags
2832         };
2833
2834 #ifdef CONFIG_BPF_SYSCALL
2835         struct sock *sk = sock->sk;
2836         const struct proto *prot = READ_ONCE(sk->sk_prot);
2837
2838         if (prot != &unix_stream_proto)
2839                 return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
2840                                             flags & ~MSG_DONTWAIT, NULL);
2841 #endif
2842         return unix_stream_read_generic(&state, true);
2843 }
2844
2845 static int unix_stream_splice_actor(struct sk_buff *skb,
2846                                     int skip, int chunk,
2847                                     struct unix_stream_read_state *state)
2848 {
2849         return skb_splice_bits(skb, state->socket->sk,
2850                                UNIXCB(skb).consumed + skip,
2851                                state->pipe, chunk, state->splice_flags);
2852 }
2853
2854 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2855                                        struct pipe_inode_info *pipe,
2856                                        size_t size, unsigned int flags)
2857 {
2858         struct unix_stream_read_state state = {
2859                 .recv_actor = unix_stream_splice_actor,
2860                 .socket = sock,
2861                 .pipe = pipe,
2862                 .size = size,
2863                 .splice_flags = flags,
2864         };
2865
2866         if (unlikely(*ppos))
2867                 return -ESPIPE;
2868
2869         if (sock->file->f_flags & O_NONBLOCK ||
2870             flags & SPLICE_F_NONBLOCK)
2871                 state.flags = MSG_DONTWAIT;
2872
2873         return unix_stream_read_generic(&state, false);
2874 }
2875
2876 static int unix_shutdown(struct socket *sock, int mode)
2877 {
2878         struct sock *sk = sock->sk;
2879         struct sock *other;
2880
2881         if (mode < SHUT_RD || mode > SHUT_RDWR)
2882                 return -EINVAL;
2883         /* This maps:
2884          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2885          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2886          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2887          */
2888         ++mode;
2889
2890         unix_state_lock(sk);
2891         sk->sk_shutdown |= mode;
2892         other = unix_peer(sk);
2893         if (other)
2894                 sock_hold(other);
2895         unix_state_unlock(sk);
2896         sk->sk_state_change(sk);
2897
2898         if (other &&
2899                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2900
2901                 int peer_mode = 0;
2902                 const struct proto *prot = READ_ONCE(other->sk_prot);
2903
2904                 if (prot->unhash)
2905                         prot->unhash(other);
2906                 if (mode&RCV_SHUTDOWN)
2907                         peer_mode |= SEND_SHUTDOWN;
2908                 if (mode&SEND_SHUTDOWN)
2909                         peer_mode |= RCV_SHUTDOWN;
2910                 unix_state_lock(other);
2911                 other->sk_shutdown |= peer_mode;
2912                 unix_state_unlock(other);
2913                 other->sk_state_change(other);
2914                 if (peer_mode == SHUTDOWN_MASK)
2915                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2916                 else if (peer_mode & RCV_SHUTDOWN)
2917                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2918         }
2919         if (other)
2920                 sock_put(other);
2921
2922         return 0;
2923 }
2924
2925 long unix_inq_len(struct sock *sk)
2926 {
2927         struct sk_buff *skb;
2928         long amount = 0;
2929
2930         if (sk->sk_state == TCP_LISTEN)
2931                 return -EINVAL;
2932
2933         spin_lock(&sk->sk_receive_queue.lock);
2934         if (sk->sk_type == SOCK_STREAM ||
2935             sk->sk_type == SOCK_SEQPACKET) {
2936                 skb_queue_walk(&sk->sk_receive_queue, skb)
2937                         amount += unix_skb_len(skb);
2938         } else {
2939                 skb = skb_peek(&sk->sk_receive_queue);
2940                 if (skb)
2941                         amount = skb->len;
2942         }
2943         spin_unlock(&sk->sk_receive_queue.lock);
2944
2945         return amount;
2946 }
2947 EXPORT_SYMBOL_GPL(unix_inq_len);
2948
2949 long unix_outq_len(struct sock *sk)
2950 {
2951         return sk_wmem_alloc_get(sk);
2952 }
2953 EXPORT_SYMBOL_GPL(unix_outq_len);
2954
2955 static int unix_open_file(struct sock *sk)
2956 {
2957         struct path path;
2958         struct file *f;
2959         int fd;
2960
2961         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2962                 return -EPERM;
2963
2964         if (!smp_load_acquire(&unix_sk(sk)->addr))
2965                 return -ENOENT;
2966
2967         path = unix_sk(sk)->path;
2968         if (!path.dentry)
2969                 return -ENOENT;
2970
2971         path_get(&path);
2972
2973         fd = get_unused_fd_flags(O_CLOEXEC);
2974         if (fd < 0)
2975                 goto out;
2976
2977         f = dentry_open(&path, O_PATH, current_cred());
2978         if (IS_ERR(f)) {
2979                 put_unused_fd(fd);
2980                 fd = PTR_ERR(f);
2981                 goto out;
2982         }
2983
2984         fd_install(fd, f);
2985 out:
2986         path_put(&path);
2987
2988         return fd;
2989 }
2990
2991 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2992 {
2993         struct sock *sk = sock->sk;
2994         long amount = 0;
2995         int err;
2996
2997         switch (cmd) {
2998         case SIOCOUTQ:
2999                 amount = unix_outq_len(sk);
3000                 err = put_user(amount, (int __user *)arg);
3001                 break;
3002         case SIOCINQ:
3003                 amount = unix_inq_len(sk);
3004                 if (amount < 0)
3005                         err = amount;
3006                 else
3007                         err = put_user(amount, (int __user *)arg);
3008                 break;
3009         case SIOCUNIXFILE:
3010                 err = unix_open_file(sk);
3011                 break;
3012 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3013         case SIOCATMARK:
3014                 {
3015                         struct sk_buff *skb;
3016                         int answ = 0;
3017
3018                         skb = skb_peek(&sk->sk_receive_queue);
3019                         if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
3020                                 answ = 1;
3021                         err = put_user(answ, (int __user *)arg);
3022                 }
3023                 break;
3024 #endif
3025         default:
3026                 err = -ENOIOCTLCMD;
3027                 break;
3028         }
3029         return err;
3030 }
3031
3032 #ifdef CONFIG_COMPAT
3033 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3034 {
3035         return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
3036 }
3037 #endif
3038
3039 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
3040 {
3041         struct sock *sk = sock->sk;
3042         __poll_t mask;
3043
3044         sock_poll_wait(file, sock, wait);
3045         mask = 0;
3046
3047         /* exceptional events? */
3048         if (sk->sk_err)
3049                 mask |= EPOLLERR;
3050         if (sk->sk_shutdown == SHUTDOWN_MASK)
3051                 mask |= EPOLLHUP;
3052         if (sk->sk_shutdown & RCV_SHUTDOWN)
3053                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3054
3055         /* readable? */
3056         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3057                 mask |= EPOLLIN | EPOLLRDNORM;
3058         if (sk_is_readable(sk))
3059                 mask |= EPOLLIN | EPOLLRDNORM;
3060 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3061         if (READ_ONCE(unix_sk(sk)->oob_skb))
3062                 mask |= EPOLLPRI;
3063 #endif
3064
3065         /* Connection-based need to check for termination and startup */
3066         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3067             sk->sk_state == TCP_CLOSE)
3068                 mask |= EPOLLHUP;
3069
3070         /*
3071          * we set writable also when the other side has shut down the
3072          * connection. This prevents stuck sockets.
3073          */
3074         if (unix_writable(sk))
3075                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3076
3077         return mask;
3078 }
3079
3080 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3081                                     poll_table *wait)
3082 {
3083         struct sock *sk = sock->sk, *other;
3084         unsigned int writable;
3085         __poll_t mask;
3086
3087         sock_poll_wait(file, sock, wait);
3088         mask = 0;
3089
3090         /* exceptional events? */
3091         if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
3092                 mask |= EPOLLERR |
3093                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
3094
3095         if (sk->sk_shutdown & RCV_SHUTDOWN)
3096                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3097         if (sk->sk_shutdown == SHUTDOWN_MASK)
3098                 mask |= EPOLLHUP;
3099
3100         /* readable? */
3101         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3102                 mask |= EPOLLIN | EPOLLRDNORM;
3103         if (sk_is_readable(sk))
3104                 mask |= EPOLLIN | EPOLLRDNORM;
3105
3106         /* Connection-based need to check for termination and startup */
3107         if (sk->sk_type == SOCK_SEQPACKET) {
3108                 if (sk->sk_state == TCP_CLOSE)
3109                         mask |= EPOLLHUP;
3110                 /* connection hasn't started yet? */
3111                 if (sk->sk_state == TCP_SYN_SENT)
3112                         return mask;
3113         }
3114
3115         /* No write status requested, avoid expensive OUT tests. */
3116         if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
3117                 return mask;
3118
3119         writable = unix_writable(sk);
3120         if (writable) {
3121                 unix_state_lock(sk);
3122
3123                 other = unix_peer(sk);
3124                 if (other && unix_peer(other) != sk &&
3125                     unix_recvq_full_lockless(other) &&
3126                     unix_dgram_peer_wake_me(sk, other))
3127                         writable = 0;
3128
3129                 unix_state_unlock(sk);
3130         }
3131
3132         if (writable)
3133                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3134         else
3135                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3136
3137         return mask;
3138 }
3139
3140 #ifdef CONFIG_PROC_FS
3141
3142 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3143
3144 #define get_bucket(x) ((x) >> BUCKET_SPACE)
3145 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
3146 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
3147
3148 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
3149 {
3150         unsigned long offset = get_offset(*pos);
3151         unsigned long bucket = get_bucket(*pos);
3152         struct sock *sk;
3153         unsigned long count = 0;
3154
3155         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
3156                 if (sock_net(sk) != seq_file_net(seq))
3157                         continue;
3158                 if (++count == offset)
3159                         break;
3160         }
3161
3162         return sk;
3163 }
3164
3165 static struct sock *unix_next_socket(struct seq_file *seq,
3166                                      struct sock *sk,
3167                                      loff_t *pos)
3168 {
3169         unsigned long bucket;
3170
3171         while (sk > (struct sock *)SEQ_START_TOKEN) {
3172                 sk = sk_next(sk);
3173                 if (!sk)
3174                         goto next_bucket;
3175                 if (sock_net(sk) == seq_file_net(seq))
3176                         return sk;
3177         }
3178
3179         do {
3180                 sk = unix_from_bucket(seq, pos);
3181                 if (sk)
3182                         return sk;
3183
3184 next_bucket:
3185                 bucket = get_bucket(*pos) + 1;
3186                 *pos = set_bucket_offset(bucket, 1);
3187         } while (bucket < ARRAY_SIZE(unix_socket_table));
3188
3189         return NULL;
3190 }
3191
3192 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
3193         __acquires(unix_table_lock)
3194 {
3195         spin_lock(&unix_table_lock);
3196
3197         if (!*pos)
3198                 return SEQ_START_TOKEN;
3199
3200         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
3201                 return NULL;
3202
3203         return unix_next_socket(seq, NULL, pos);
3204 }
3205
3206 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3207 {
3208         ++*pos;
3209         return unix_next_socket(seq, v, pos);
3210 }
3211
3212 static void unix_seq_stop(struct seq_file *seq, void *v)
3213         __releases(unix_table_lock)
3214 {
3215         spin_unlock(&unix_table_lock);
3216 }
3217
3218 static int unix_seq_show(struct seq_file *seq, void *v)
3219 {
3220
3221         if (v == SEQ_START_TOKEN)
3222                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
3223                          "Inode Path\n");
3224         else {
3225                 struct sock *s = v;
3226                 struct unix_sock *u = unix_sk(s);
3227                 unix_state_lock(s);
3228
3229                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
3230                         s,
3231                         refcount_read(&s->sk_refcnt),
3232                         0,
3233                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3234                         s->sk_type,
3235                         s->sk_socket ?
3236                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3237                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3238                         sock_i_ino(s));
3239
3240                 if (u->addr) {  // under unix_table_lock here
3241                         int i, len;
3242                         seq_putc(seq, ' ');
3243
3244                         i = 0;
3245                         len = u->addr->len - sizeof(short);
3246                         if (!UNIX_ABSTRACT(s))
3247                                 len--;
3248                         else {
3249                                 seq_putc(seq, '@');
3250                                 i++;
3251                         }
3252                         for ( ; i < len; i++)
3253                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
3254                                          '@');
3255                 }
3256                 unix_state_unlock(s);
3257                 seq_putc(seq, '\n');
3258         }
3259
3260         return 0;
3261 }
3262
3263 static const struct seq_operations unix_seq_ops = {
3264         .start  = unix_seq_start,
3265         .next   = unix_seq_next,
3266         .stop   = unix_seq_stop,
3267         .show   = unix_seq_show,
3268 };
3269
3270 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
3271 struct bpf_iter__unix {
3272         __bpf_md_ptr(struct bpf_iter_meta *, meta);
3273         __bpf_md_ptr(struct unix_sock *, unix_sk);
3274         uid_t uid __aligned(8);
3275 };
3276
3277 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3278                               struct unix_sock *unix_sk, uid_t uid)
3279 {
3280         struct bpf_iter__unix ctx;
3281
3282         meta->seq_num--;  /* skip SEQ_START_TOKEN */
3283         ctx.meta = meta;
3284         ctx.unix_sk = unix_sk;
3285         ctx.uid = uid;
3286         return bpf_iter_run_prog(prog, &ctx);
3287 }
3288
3289 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3290 {
3291         struct bpf_iter_meta meta;
3292         struct bpf_prog *prog;
3293         struct sock *sk = v;
3294         uid_t uid;
3295
3296         if (v == SEQ_START_TOKEN)
3297                 return 0;
3298
3299         uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
3300         meta.seq = seq;
3301         prog = bpf_iter_get_info(&meta, false);
3302         return unix_prog_seq_show(prog, &meta, v, uid);
3303 }
3304
3305 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3306 {
3307         struct bpf_iter_meta meta;
3308         struct bpf_prog *prog;
3309
3310         if (!v) {
3311                 meta.seq = seq;
3312                 prog = bpf_iter_get_info(&meta, true);
3313                 if (prog)
3314                         (void)unix_prog_seq_show(prog, &meta, v, 0);
3315         }
3316
3317         unix_seq_stop(seq, v);
3318 }
3319
3320 static const struct seq_operations bpf_iter_unix_seq_ops = {
3321         .start  = unix_seq_start,
3322         .next   = unix_seq_next,
3323         .stop   = bpf_iter_unix_seq_stop,
3324         .show   = bpf_iter_unix_seq_show,
3325 };
3326 #endif
3327 #endif
3328
3329 static const struct net_proto_family unix_family_ops = {
3330         .family = PF_UNIX,
3331         .create = unix_create,
3332         .owner  = THIS_MODULE,
3333 };
3334
3335
3336 static int __net_init unix_net_init(struct net *net)
3337 {
3338         int error = -ENOMEM;
3339
3340         net->unx.sysctl_max_dgram_qlen = 10;
3341         if (unix_sysctl_register(net))
3342                 goto out;
3343
3344 #ifdef CONFIG_PROC_FS
3345         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
3346                         sizeof(struct seq_net_private))) {
3347                 unix_sysctl_unregister(net);
3348                 goto out;
3349         }
3350 #endif
3351         error = 0;
3352 out:
3353         return error;
3354 }
3355
3356 static void __net_exit unix_net_exit(struct net *net)
3357 {
3358         unix_sysctl_unregister(net);
3359         remove_proc_entry("unix", net->proc_net);
3360 }
3361
3362 static struct pernet_operations unix_net_ops = {
3363         .init = unix_net_init,
3364         .exit = unix_net_exit,
3365 };
3366
3367 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3368 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3369                      struct unix_sock *unix_sk, uid_t uid)
3370
3371 static const struct bpf_iter_seq_info unix_seq_info = {
3372         .seq_ops                = &bpf_iter_unix_seq_ops,
3373         .init_seq_private       = bpf_iter_init_seq_net,
3374         .fini_seq_private       = bpf_iter_fini_seq_net,
3375         .seq_priv_size          = sizeof(struct seq_net_private),
3376 };
3377
3378 static struct bpf_iter_reg unix_reg_info = {
3379         .target                 = "unix",
3380         .ctx_arg_info_size      = 1,
3381         .ctx_arg_info           = {
3382                 { offsetof(struct bpf_iter__unix, unix_sk),
3383                   PTR_TO_BTF_ID_OR_NULL },
3384         },
3385         .seq_info               = &unix_seq_info,
3386 };
3387
3388 static void __init bpf_iter_register(void)
3389 {
3390         unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3391         if (bpf_iter_reg_target(&unix_reg_info))
3392                 pr_warn("Warning: could not register bpf iterator unix\n");
3393 }
3394 #endif
3395
3396 static int __init af_unix_init(void)
3397 {
3398         int rc = -1;
3399
3400         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
3401
3402         rc = proto_register(&unix_dgram_proto, 1);
3403         if (rc != 0) {
3404                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3405                 goto out;
3406         }
3407
3408         rc = proto_register(&unix_stream_proto, 1);
3409         if (rc != 0) {
3410                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3411                 proto_unregister(&unix_dgram_proto);
3412                 goto out;
3413         }
3414
3415         sock_register(&unix_family_ops);
3416         register_pernet_subsys(&unix_net_ops);
3417         unix_bpf_build_proto();
3418
3419 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3420         bpf_iter_register();
3421 #endif
3422
3423 out:
3424         return rc;
3425 }
3426
3427 static void __exit af_unix_exit(void)
3428 {
3429         sock_unregister(PF_UNIX);
3430         proto_unregister(&unix_dgram_proto);
3431         proto_unregister(&unix_stream_proto);
3432         unregister_pernet_subsys(&unix_net_ops);
3433 }
3434
3435 /* Earlier than device_initcall() so that other drivers invoking
3436    request_module() don't end up in a loop when modprobe tries
3437    to use a UNIX socket. But later than subsys_initcall() because
3438    we depend on stuff initialised there */
3439 fs_initcall(af_unix_init);
3440 module_exit(af_unix_exit);
3441
3442 MODULE_LICENSE("GPL");
3443 MODULE_ALIAS_NETPROTO(PF_UNIX);