net/socket.c

   1 /*
   2  * NET          An implementation of the SOCKET network access protocol.
   3  *
   4  * Version:     @(#)socket.c    1.1.93  18/02/95
   5  *
   6  * Authors:     Orest Zborowski, <obz@Kodak.COM>
   7  *              Ross Biro
   8  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
   9  *
  10  * Fixes:
  11  *              Anonymous       :       NOTSOCK/BADF cleanup. Error fix in
  12  *                                      shutdown()
  13  *              Alan Cox        :       verify_area() fixes
  14  *              Alan Cox        :       Removed DDI
  15  *              Jonathan Kamens :       SOCK_DGRAM reconnect bug
  16  *              Alan Cox        :       Moved a load of checks to the very
  17  *                                      top level.
  18  *              Alan Cox        :       Move address structures to/from user
  19  *                                      mode above the protocol layers.
  20  *              Rob Janssen     :       Allow 0 length sends.
  21  *              Alan Cox        :       Asynchronous I/O support (cribbed from the
  22  *                                      tty drivers).
  23  *              Niibe Yutaka    :       Asynchronous I/O for writes (4.4BSD style)
  24  *              Jeff Uphoff     :       Made max number of sockets command-line
  25  *                                      configurable.
  26  *              Matti Aarnio    :       Made the number of sockets dynamic,
  27  *                                      to be allocated when needed, and mr.
  28  *                                      Uphoff's max is used as max to be
  29  *                                      allowed to allocate.
  30  *              Linus           :       Argh. removed all the socket allocation
  31  *                                      altogether: it's in the inode now.
  32  *              Alan Cox        :       Made sock_alloc()/sock_release() public
  33  *                                      for NetROM and future kernel nfsd type
  34  *                                      stuff.
  35  *              Alan Cox        :       sendmsg/recvmsg basics.
  36  *              Tom Dyas        :       Export net symbols.
  37  *              Marcin Dalecki  :       Fixed problems with CONFIG_NET="n".
  38  *              Alan Cox        :       Added thread locking to sys_* calls
  39  *                                      for sockets. May have errors at the
  40  *                                      moment.
  41  *              Kevin Buhr      :       Fixed the dumb errors in the above.
  42  *              Andi Kleen      :       Some small cleanups, optimizations,
  43  *                                      and fixed a copy_from_user() bug.
  44  *              Tigran Aivazian :       sys_send(args) calls sys_sendto(args, NULL, 0)
  45  *              Tigran Aivazian :       Made listen(2) backlog sanity checks
  46  *                                      protocol-independent
  47  *
  48  *
  49  *              This program is free software; you can redistribute it and/or
  50  *              modify it under the terms of the GNU General Public License
  51  *              as published by the Free Software Foundation; either version
  52  *              2 of the License, or (at your option) any later version.
  53  *
  54  *
  55  *      This module is effectively the top level interface to the BSD socket
  56  *      paradigm.
  57  *
  58  *      Based upon Swansea University Computer Society NET3.039
  59  */
  60
  61 #include <linux/mm.h>
  62 #include <linux/socket.h>
  63 #include <linux/file.h>
  64 #include <linux/net.h>
  65 #include <linux/interrupt.h>
  66 #include <linux/thread_info.h>
  67 #include <linux/rcupdate.h>
  68 #include <linux/netdevice.h>
  69 #include <linux/proc_fs.h>
  70 #include <linux/seq_file.h>
  71 #include <linux/mutex.h>
  72 #include <linux/wanrouter.h>
  73 #include <linux/if_bridge.h>
  74 #include <linux/if_frad.h>
  75 #include <linux/if_vlan.h>
  76 #include <linux/init.h>
  77 #include <linux/poll.h>
  78 #include <linux/cache.h>
  79 #include <linux/module.h>
  80 #include <linux/highmem.h>
  81 #include <linux/mount.h>
  82 #include <linux/security.h>
  83 #include <linux/syscalls.h>
  84 #include <linux/compat.h>
  85 #include <linux/kmod.h>
  86 #include <linux/audit.h>
  87 #include <linux/wireless.h>
  88 #include <linux/nsproxy.h>
  89 #include <linux/magic.h>
  90 #include <linux/slab.h>
  91 #include <linux/xattr.h>
  92
  93 #include <asm/uaccess.h>
  94 #include <asm/unistd.h>
  95
  96 #include <net/compat.h>
  97 #include <net/wext.h>
  98 #include <net/cls_cgroup.h>
  99
 100 #include <net/sock.h>
 101 #include <linux/netfilter.h>
 102
 103 #include <linux/if_tun.h>
 104 #include <linux/ipv6_route.h>
 105 #include <linux/route.h>
 106 #include <linux/sockios.h>
 107 #include <linux/atalk.h>
 108
 109 static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
 110 static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
 111                          unsigned long nr_segs, loff_t pos);
 112 static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
 113                           unsigned long nr_segs, loff_t pos);
 114 static int sock_mmap(struct file *file, struct vm_area_struct *vma);
 115
 116 static int sock_close(struct inode *inode, struct file *file);
 117 static unsigned int sock_poll(struct file *file,
 118                               struct poll_table_struct *wait);
 119 static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 120 #ifdef CONFIG_COMPAT
 121 static long compat_sock_ioctl(struct file *file,
 122                               unsigned int cmd, unsigned long arg);
 123 #endif
 124 static int sock_fasync(int fd, struct file *filp, int on);
 125 static ssize_t sock_sendpage(struct file *file, struct page *page,
 126                              int offset, size_t size, loff_t *ppos, int more);
 127 static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
 128                                 struct pipe_inode_info *pipe, size_t len,
 129                                 unsigned int flags);
 130
 131 /*
 132  *      Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
 133  *      in the operation structures but are done directly via the socketcall() multiplexor.
 134  */
 135
 136 static const struct file_operations socket_file_ops = {
 137         .owner =        THIS_MODULE,
 138         .llseek =       no_llseek,
 139         .aio_read =     sock_aio_read,
 140         .aio_write =    sock_aio_write,
 141         .poll =         sock_poll,
 142         .unlocked_ioctl = sock_ioctl,
 143 #ifdef CONFIG_COMPAT
 144         .compat_ioctl = compat_sock_ioctl,
 145 #endif
 146         .mmap =         sock_mmap,
 147         .open =         sock_no_open,   /* special open code to disallow open via /proc */
 148         .release =      sock_close,
 149         .fasync =       sock_fasync,
 150         .sendpage =     sock_sendpage,
 151         .splice_write = generic_splice_sendpage,
 152         .splice_read =  sock_splice_read,
 153 };
 154
 155 /*
 156  *      The protocol list. Each protocol is registered in here.
 157  */
 158
 159 static DEFINE_SPINLOCK(net_family_lock);
 160 static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
 161
 162 /*
 163  *      Statistics counters of the socket lists
 164  */
 165
 166 static DEFINE_PER_CPU(int, sockets_in_use);
 167
 168 /*
 169  * Support routines.
 170  * Move socket addresses back and forth across the kernel/user
 171  * divide and look after the messy bits.
 172  */
 173
 174 /**
 175  *      move_addr_to_kernel     -       copy a socket address into kernel space
 176  *      @uaddr: Address in user space
 177  *      @kaddr: Address in kernel space
 178  *      @ulen: Length in user space
 179  *
 180  *      The address is copied into kernel space. If the provided address is
 181  *      too long an error code of -EINVAL is returned. If the copy gives
 182  *      invalid addresses -EFAULT is returned. On a success 0 is returned.
 183  */
 184
 185 int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
 186 {
 187         if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
 188                 return -EINVAL;
 189         if (ulen == 0)
 190                 return 0;
 191         if (copy_from_user(kaddr, uaddr, ulen))
 192                 return -EFAULT;
 193         return audit_sockaddr(ulen, kaddr);
 194 }
 195
 196 /**
 197  *      move_addr_to_user       -       copy an address to user space
 198  *      @kaddr: kernel space address
 199  *      @klen: length of address in kernel
 200  *      @uaddr: user space address
 201  *      @ulen: pointer to user length field
 202  *
 203  *      The value pointed to by ulen on entry is the buffer length available.
 204  *      This is overwritten with the buffer space used. -EINVAL is returned
 205  *      if an overlong buffer is specified or a negative buffer size. -EFAULT
 206  *      is returned if either the buffer or the length field are not
 207  *      accessible.
 208  *      After copying the data up to the limit the user specifies, the true
 209  *      length of the data is written over the length limit the user
 210  *      specified. Zero is returned for a success.
 211  */
 212
 213 static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
 214                              void __user *uaddr, int __user *ulen)
 215 {
 216         int err;
 217         int len;
 218
 219         err = get_user(len, ulen);
 220         if (err)
 221                 return err;
 222         if (len > klen)
 223                 len = klen;
 224         if (len < 0 || len > sizeof(struct sockaddr_storage))
 225                 return -EINVAL;
 226         if (len) {
 227                 if (audit_sockaddr(klen, kaddr))
 228                         return -ENOMEM;
 229                 if (copy_to_user(uaddr, kaddr, len))
 230                         return -EFAULT;
 231         }
 232         /*
 233          *      "fromlen shall refer to the value before truncation.."
 234          *                      1003.1g
 235          */
 236         return __put_user(klen, ulen);
 237 }
 238
 239 static struct kmem_cache *sock_inode_cachep __read_mostly;
 240
 241 static struct inode *sock_alloc_inode(struct super_block *sb)
 242 {
 243         struct socket_alloc *ei;
 244         struct socket_wq *wq;
 245
 246         ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
 247         if (!ei)
 248                 return NULL;
 249         wq = kmalloc(sizeof(*wq), GFP_KERNEL);
 250         if (!wq) {
 251                 kmem_cache_free(sock_inode_cachep, ei);
 252                 return NULL;
 253         }
 254         init_waitqueue_head(&wq->wait);
 255         wq->fasync_list = NULL;
 256         RCU_INIT_POINTER(ei->socket.wq, wq);
 257
 258         ei->socket.state = SS_UNCONNECTED;
 259         ei->socket.flags = 0;
 260         ei->socket.ops = NULL;
 261         ei->socket.sk = NULL;
 262         ei->socket.file = NULL;
 263
 264         return &ei->vfs_inode;
 265 }
 266
 267 static void sock_destroy_inode(struct inode *inode)
 268 {
 269         struct socket_alloc *ei;
 270         struct socket_wq *wq;
 271
 272         ei = container_of(inode, struct socket_alloc, vfs_inode);
 273         wq = rcu_dereference_protected(ei->socket.wq, 1);
 274         kfree_rcu(wq, rcu);
 275         kmem_cache_free(sock_inode_cachep, ei);
 276 }
 277
 278 static void init_once(void *foo)
 279 {
 280         struct socket_alloc *ei = (struct socket_alloc *)foo;
 281
 282         inode_init_once(&ei->vfs_inode);
 283 }
 284
 285 static int init_inodecache(void)
 286 {
 287         sock_inode_cachep = kmem_cache_create("sock_inode_cache",
 288                                               sizeof(struct socket_alloc),
 289                                               0,
 290                                               (SLAB_HWCACHE_ALIGN |
 291                                                SLAB_RECLAIM_ACCOUNT |
 292                                                SLAB_MEM_SPREAD),
 293                                               init_once);
 294         if (sock_inode_cachep == NULL)
 295                 return -ENOMEM;
 296         return 0;
 297 }
 298
 299 static const struct super_operations sockfs_ops = {
 300         .alloc_inode    = sock_alloc_inode,
 301         .destroy_inode  = sock_destroy_inode,
 302         .statfs         = simple_statfs,
 303 };
 304
 305 /*
 306  * sockfs_dname() is called from d_path().
 307  */
 308 static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
 309 {
 310         return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
 311                                 dentry->d_inode->i_ino);
 312 }
 313
 314 static const struct dentry_operations sockfs_dentry_operations = {
 315         .d_dname  = sockfs_dname,
 316 };
 317
 318 static struct dentry *sockfs_mount(struct file_system_type *fs_type,
 319                          int flags, const char *dev_name, void *data)
 320 {
 321         return mount_pseudo(fs_type, "socket:", &sockfs_ops,
 322                 &sockfs_dentry_operations, SOCKFS_MAGIC);
 323 }
 324
 325 static struct vfsmount *sock_mnt __read_mostly;
 326
 327 static struct file_system_type sock_fs_type = {
 328         .name =         "sockfs",
 329         .mount =        sockfs_mount,
 330         .kill_sb =      kill_anon_super,
 331 };
 332
 333 /*
 334  *      Obtains the first available file descriptor and sets it up for use.
 335  *
 336  *      These functions create file structures and maps them to fd space
 337  *      of the current process. On success it returns file descriptor
 338  *      and file struct implicitly stored in sock->file.
 339  *      Note that another thread may close file descriptor before we return
 340  *      from this function. We use the fact that now we do not refer
 341  *      to socket after mapping. If one day we will need it, this
 342  *      function will increment ref. count on file by 1.
 343  *
 344  *      In any case returned fd MAY BE not valid!
 345  *      This race condition is unavoidable
 346  *      with shared fd spaces, we cannot solve it inside kernel,
 347  *      but we take care of internal coherence yet.
 348  */
 349
 350 struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
 351 {
 352         struct qstr name = { .name = "" };
 353         struct path path;
 354         struct file *file;
 355
 356         if (dname) {
 357                 name.name = dname;
 358                 name.len = strlen(name.name);
 359         } else if (sock->sk) {
 360                 name.name = sock->sk->sk_prot_creator->name;
 361                 name.len = strlen(name.name);
 362         }
 363         path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
 364         if (unlikely(!path.dentry))
 365                 return ERR_PTR(-ENOMEM);
 366         path.mnt = mntget(sock_mnt);
 367
 368         d_instantiate(path.dentry, SOCK_INODE(sock));
 369         SOCK_INODE(sock)->i_fop = &socket_file_ops;
 370
 371         file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
 372                   &socket_file_ops);
 373         if (unlikely(!file)) {
 374                 /* drop dentry, keep inode */
 375                 ihold(path.dentry->d_inode);
 376                 path_put(&path);
 377                 return ERR_PTR(-ENFILE);
 378         }
 379
 380         sock->file = file;
 381         file->f_flags = O_RDWR | (flags & O_NONBLOCK);
 382         file->f_pos = 0;
 383         file->private_data = sock;
 384         return file;
 385 }
 386 EXPORT_SYMBOL(sock_alloc_file);
 387
 388 static int sock_map_fd(struct socket *sock, int flags)
 389 {
 390         struct file *newfile;
 391         int fd = get_unused_fd_flags(flags);
 392         if (unlikely(fd < 0))
 393                 return fd;
 394
 395         newfile = sock_alloc_file(sock, flags, NULL);
 396         if (likely(!IS_ERR(newfile))) {
 397                 fd_install(fd, newfile);
 398                 return fd;
 399         }
 400
 401         put_unused_fd(fd);
 402         return PTR_ERR(newfile);
 403 }
 404
 405 struct socket *sock_from_file(struct file *file, int *err)
 406 {
 407         if (file->f_op == &socket_file_ops)
 408                 return file->private_data;      /* set in sock_map_fd */
 409
 410         *err = -ENOTSOCK;
 411         return NULL;
 412 }
 413 EXPORT_SYMBOL(sock_from_file);
 414
 415 /**
 416  *      sockfd_lookup - Go from a file number to its socket slot
 417  *      @fd: file handle
 418  *      @err: pointer to an error code return
 419  *
 420  *      The file handle passed in is locked and the socket it is bound
 421  *      too is returned. If an error occurs the err pointer is overwritten
 422  *      with a negative errno code and NULL is returned. The function checks
 423  *      for both invalid handles and passing a handle which is not a socket.
 424  *
 425  *      On a success the socket object pointer is returned.
 426  */
 427
 428 struct socket *sockfd_lookup(int fd, int *err)
 429 {
 430         struct file *file;
 431         struct socket *sock;
 432
 433         file = fget(fd);
 434         if (!file) {
 435                 *err = -EBADF;
 436                 return NULL;
 437         }
 438
 439         sock = sock_from_file(file, err);
 440         if (!sock)
 441                 fput(file);
 442         return sock;
 443 }
 444 EXPORT_SYMBOL(sockfd_lookup);
 445
 446 static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
 447 {
 448         struct file *file;
 449         struct socket *sock;
 450
 451         *err = -EBADF;
 452         file = fget_light(fd, fput_needed);
 453         if (file) {
 454                 sock = sock_from_file(file, err);
 455                 if (sock)
 456                         return sock;
 457                 fput_light(file, *fput_needed);
 458         }
 459         return NULL;
 460 }
 461
 462 #define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
 463 #define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
 464 #define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
 465 static ssize_t sockfs_getxattr(struct dentry *dentry,
 466                                const char *name, void *value, size_t size)
 467 {
 468         const char *proto_name;
 469         size_t proto_size;
 470         int error;
 471
 472         error = -ENODATA;
 473         if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) {
 474                 proto_name = dentry->d_name.name;
 475                 proto_size = strlen(proto_name);
 476
 477                 if (value) {
 478                         error = -ERANGE;
 479                         if (proto_size + 1 > size)
 480                                 goto out;
 481
 482                         strncpy(value, proto_name, proto_size + 1);
 483                 }
 484                 error = proto_size + 1;
 485         }
 486
 487 out:
 488         return error;
 489 }
 490
 491 static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
 492                                 size_t size)
 493 {
 494         ssize_t len;
 495         ssize_t used = 0;
 496
 497         len = security_inode_listsecurity(dentry->d_inode, buffer, size);
 498         if (len < 0)
 499                 return len;
 500         used += len;
 501         if (buffer) {
 502                 if (size < used)
 503                         return -ERANGE;
 504                 buffer += len;
 505         }
 506
 507         len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
 508         used += len;
 509         if (buffer) {
 510                 if (size < used)
 511                         return -ERANGE;
 512                 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
 513                 buffer += len;
 514         }
 515
 516         return used;
 517 }
 518
 519 static const struct inode_operations sockfs_inode_ops = {
 520         .getxattr = sockfs_getxattr,
 521         .listxattr = sockfs_listxattr,
 522 };
 523
 524 /**
 525  *      sock_alloc      -       allocate a socket
 526  *
 527  *      Allocate a new inode and socket object. The two are bound together
 528  *      and initialised. The socket is then returned. If we are out of inodes
 529  *      NULL is returned.
 530  */
 531
 532 static struct socket *sock_alloc(void)
 533 {
 534         struct inode *inode;
 535         struct socket *sock;
 536
 537         inode = new_inode_pseudo(sock_mnt->mnt_sb);
 538         if (!inode)
 539                 return NULL;
 540
 541         sock = SOCKET_I(inode);
 542
 543         kmemcheck_annotate_bitfield(sock, type);
 544         inode->i_ino = get_next_ino();
 545         inode->i_mode = S_IFSOCK | S_IRWXUGO;
 546         inode->i_uid = current_fsuid();
 547         inode->i_gid = current_fsgid();
 548         inode->i_op = &sockfs_inode_ops;
 549
 550         this_cpu_add(sockets_in_use, 1);
 551         return sock;
 552 }
 553
 554 /*
 555  *      In theory you can't get an open on this inode, but /proc provides
 556  *      a back door. Remember to keep it shut otherwise you'll let the
 557  *      creepy crawlies in.
 558  */
 559
 560 static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
 561 {
 562         return -ENXIO;
 563 }
 564
 565 const struct file_operations bad_sock_fops = {
 566         .owner = THIS_MODULE,
 567         .open = sock_no_open,
 568         .llseek = noop_llseek,
 569 };
 570
 571 /**
 572  *      sock_release    -       close a socket
 573  *      @sock: socket to close
 574  *
 575  *      The socket is released from the protocol stack if it has a release
 576  *      callback, and the inode is then released if the socket is bound to
 577  *      an inode not a file.
 578  */
 579
 580 void sock_release(struct socket *sock)
 581 {
 582         if (sock->ops) {
 583                 struct module *owner = sock->ops->owner;
 584
 585                 sock->ops->release(sock);
 586                 sock->ops = NULL;
 587                 module_put(owner);
 588         }
 589
 590         if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
 591                 printk(KERN_ERR "sock_release: fasync list not empty!\n");
 592
 593         if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags))
 594                 return;
 595
 596         this_cpu_sub(sockets_in_use, 1);
 597         if (!sock->file) {
 598                 iput(SOCK_INODE(sock));
 599                 return;
 600         }
 601         sock->file = NULL;
 602 }
 603 EXPORT_SYMBOL(sock_release);
 604
 605 int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
 606 {
 607         *tx_flags = 0;
 608         if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
 609                 *tx_flags |= SKBTX_HW_TSTAMP;
 610         if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
 611                 *tx_flags |= SKBTX_SW_TSTAMP;
 612         if (sock_flag(sk, SOCK_WIFI_STATUS))
 613                 *tx_flags |= SKBTX_WIFI_STATUS;
 614         return 0;
 615 }
 616 EXPORT_SYMBOL(sock_tx_timestamp);
 617
 618 static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
 619                                        struct msghdr *msg, size_t size)
 620 {
 621         struct sock_iocb *si = kiocb_to_siocb(iocb);
 622
 623         sock_update_classid(sock->sk);
 624
 625         si->sock = sock;
 626         si->scm = NULL;
 627         si->msg = msg;
 628         si->size = size;
 629
 630         return sock->ops->sendmsg(iocb, sock, msg, size);
 631 }
 632
 633 static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 634                                  struct msghdr *msg, size_t size)
 635 {
 636         int err = security_socket_sendmsg(sock, msg, size);
 637
 638         return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
 639 }
 640
 641 int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 642 {
 643         struct kiocb iocb;
 644         struct sock_iocb siocb;
 645         int ret;
 646
 647         init_sync_kiocb(&iocb, NULL);
 648         iocb.private = &siocb;
 649         ret = __sock_sendmsg(&iocb, sock, msg, size);
 650         if (-EIOCBQUEUED == ret)
 651                 ret = wait_on_sync_kiocb(&iocb);
 652         return ret;
 653 }
 654 EXPORT_SYMBOL(sock_sendmsg);
 655
 656 static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
 657 {
 658         struct kiocb iocb;
 659         struct sock_iocb siocb;
 660         int ret;
 661
 662         init_sync_kiocb(&iocb, NULL);
 663         iocb.private = &siocb;
 664         ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
 665         if (-EIOCBQUEUED == ret)
 666                 ret = wait_on_sync_kiocb(&iocb);
 667         return ret;
 668 }
 669
 670 int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 671                    struct kvec *vec, size_t num, size_t size)
 672 {
 673         mm_segment_t oldfs = get_fs();
 674         int result;
 675
 676         set_fs(KERNEL_DS);
 677         /*
 678          * the following is safe, since for compiler definitions of kvec and
 679          * iovec are identical, yielding the same in-core layout and alignment
 680          */
 681         msg->msg_iov = (struct iovec *)vec;
 682         msg->msg_iovlen = num;
 683         result = sock_sendmsg(sock, msg, size);
 684         set_fs(oldfs);
 685         return result;
 686 }
 687 EXPORT_SYMBOL(kernel_sendmsg);
 688
 689 static int ktime2ts(ktime_t kt, struct timespec *ts)
 690 {
 691         if (kt.tv64) {
 692                 *ts = ktime_to_timespec(kt);
 693                 return 1;
 694         } else {
 695                 return 0;
 696         }
 697 }
 698
 699 /*
 700  * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
 701  */
 702 void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 703         struct sk_buff *skb)
 704 {
 705         int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
 706         struct timespec ts[3];
 707         int empty = 1;
 708         struct skb_shared_hwtstamps *shhwtstamps =
 709                 skb_hwtstamps(skb);
 710
 711         /* Race occurred between timestamp enabling and packet
 712            receiving.  Fill in the current time for now. */
 713         if (need_software_tstamp && skb->tstamp.tv64 == 0)
 714                 __net_timestamp(skb);
 715
 716         if (need_software_tstamp) {
 717                 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
 718                         struct timeval tv;
 719                         skb_get_timestamp(skb, &tv);
 720                         put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
 721                                  sizeof(tv), &tv);
 722                 } else {
 723                         skb_get_timestampns(skb, &ts[0]);
 724                         put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
 725                                  sizeof(ts[0]), &ts[0]);
 726                 }
 727         }
 728
 729
 730         memset(ts, 0, sizeof(ts));
 731         if (skb->tstamp.tv64 &&
 732             sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) {
 733                 skb_get_timestampns(skb, ts + 0);
 734                 empty = 0;
 735         }
 736         if (shhwtstamps) {
 737                 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
 738                     ktime2ts(shhwtstamps->syststamp, ts + 1))
 739                         empty = 0;
 740                 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
 741                     ktime2ts(shhwtstamps->hwtstamp, ts + 2))
 742                         empty = 0;
 743         }
 744         if (!empty)
 745                 put_cmsg(msg, SOL_SOCKET,
 746                          SCM_TIMESTAMPING, sizeof(ts), &ts);
 747 }
 748 EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
 749
 750 void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
 751         struct sk_buff *skb)
 752 {
 753         int ack;
 754
 755         if (!sock_flag(sk, SOCK_WIFI_STATUS))
 756                 return;
 757         if (!skb->wifi_acked_valid)
 758                 return;
 759
 760         ack = skb->wifi_acked;
 761
 762         put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
 763 }
 764 EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
 765
 766 static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
 767                                    struct sk_buff *skb)
 768 {
 769         if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
 770                 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
 771                         sizeof(__u32), &skb->dropcount);
 772 }
 773
 774 void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
 775         struct sk_buff *skb)
 776 {
 777         sock_recv_timestamp(msg, sk, skb);
 778         sock_recv_drops(msg, sk, skb);
 779 }
 780 EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
 781
 782 static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
 783                                        struct msghdr *msg, size_t size, int flags)
 784 {
 785         struct sock_iocb *si = kiocb_to_siocb(iocb);
 786
 787         sock_update_classid(sock->sk);
 788
 789         si->sock = sock;
 790         si->scm = NULL;
 791         si->msg = msg;
 792         si->size = size;
 793         si->flags = flags;
 794
 795         return sock->ops->recvmsg(iocb, sock, msg, size, flags);
 796 }
 797
 798 static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 799                                  struct msghdr *msg, size_t size, int flags)
 800 {
 801         int err = security_socket_recvmsg(sock, msg, size, flags);
 802
 803         return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
 804 }
 805
 806 int sock_recvmsg(struct socket *sock, struct msghdr *msg,
 807                  size_t size, int flags)
 808 {
 809         struct kiocb iocb;
 810         struct sock_iocb siocb;
 811         int ret;
 812
 813         init_sync_kiocb(&iocb, NULL);
 814         iocb.private = &siocb;
 815         ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
 816         if (-EIOCBQUEUED == ret)
 817                 ret = wait_on_sync_kiocb(&iocb);
 818         return ret;
 819 }
 820 EXPORT_SYMBOL(sock_recvmsg);
 821
 822 static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
 823                               size_t size, int flags)
 824 {
 825         struct kiocb iocb;
 826         struct sock_iocb siocb;
 827         int ret;
 828
 829         init_sync_kiocb(&iocb, NULL);
 830         iocb.private = &siocb;
 831         ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
 832         if (-EIOCBQUEUED == ret)
 833                 ret = wait_on_sync_kiocb(&iocb);
 834         return ret;
 835 }
 836
 837 /**
 838  * kernel_recvmsg - Receive a message from a socket (kernel space)
 839  * @sock:       The socket to receive the message from
 840  * @msg:        Received message
 841  * @vec:        Input s/g array for message data
 842  * @num:        Size of input s/g array
 843  * @size:       Number of bytes to read
 844  * @flags:      Message flags (MSG_DONTWAIT, etc...)
 845  *
 846  * On return the msg structure contains the scatter/gather array passed in the
 847  * vec argument. The array is modified so that it consists of the unfilled
 848  * portion of the original array.
 849  *
 850  * The returned value is the total number of bytes received, or an error.
 851  */
 852 int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
 853                    struct kvec *vec, size_t num, size_t size, int flags)
 854 {
 855         mm_segment_t oldfs = get_fs();
 856         int result;
 857
 858         set_fs(KERNEL_DS);
 859         /*
 860          * the following is safe, since for compiler definitions of kvec and
 861          * iovec are identical, yielding the same in-core layout and alignment
 862          */
 863         msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
 864         result = sock_recvmsg(sock, msg, size, flags);
 865         set_fs(oldfs);
 866         return result;
 867 }
 868 EXPORT_SYMBOL(kernel_recvmsg);
 869
 870 static void sock_aio_dtor(struct kiocb *iocb)
 871 {
 872         kfree(iocb->private);
 873 }
 874
 875 static ssize_t sock_sendpage(struct file *file, struct page *page,
 876                              int offset, size_t size, loff_t *ppos, int more)
 877 {
 878         struct socket *sock;
 879         int flags;
 880
 881         sock = file->private_data;
 882
 883         flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 884         /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
 885         flags |= more;
 886
 887         return kernel_sendpage(sock, page, offset, size, flags);
 888 }
 889
 890 static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
 891                                 struct pipe_inode_info *pipe, size_t len,
 892                                 unsigned int flags)
 893 {
 894         struct socket *sock = file->private_data;
 895
 896         if (unlikely(!sock->ops->splice_read))
 897                 return -EINVAL;
 898
 899         sock_update_classid(sock->sk);
 900
 901         return sock->ops->splice_read(sock, ppos, pipe, len, flags);
 902 }
 903
 904 static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
 905                                          struct sock_iocb *siocb)
 906 {
 907         if (!is_sync_kiocb(iocb)) {
 908                 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
 909                 if (!siocb)
 910                         return NULL;
 911                 iocb->ki_dtor = sock_aio_dtor;
 912         }
 913
 914         siocb->kiocb = iocb;
 915         iocb->private = siocb;
 916         return siocb;
 917 }
 918
 919 static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
 920                 struct file *file, const struct iovec *iov,
 921                 unsigned long nr_segs)
 922 {
 923         struct socket *sock = file->private_data;
 924         size_t size = 0;
 925         int i;
 926
 927         for (i = 0; i < nr_segs; i++)
 928                 size += iov[i].iov_len;
 929
 930         msg->msg_name = NULL;
 931         msg->msg_namelen = 0;
 932         msg->msg_control = NULL;
 933         msg->msg_controllen = 0;
 934         msg->msg_iov = (struct iovec *)iov;
 935         msg->msg_iovlen = nr_segs;
 936         msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 937
 938         return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
 939 }
 940
 941 static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
 942                                 unsigned long nr_segs, loff_t pos)
 943 {
 944         struct sock_iocb siocb, *x;
 945
 946         if (pos != 0)
 947                 return -ESPIPE;
 948
 949         if (iocb->ki_left == 0) /* Match SYS5 behaviour */
 950                 return 0;
 951
 952
 953         x = alloc_sock_iocb(iocb, &siocb);
 954         if (!x)
 955                 return -ENOMEM;
 956         return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
 957 }
 958
 959 static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
 960                         struct file *file, const struct iovec *iov,
 961                         unsigned long nr_segs)
 962 {
 963         struct socket *sock = file->private_data;
 964         size_t size = 0;
 965         int i;
 966
 967         for (i = 0; i < nr_segs; i++)
 968                 size += iov[i].iov_len;
 969
 970         msg->msg_name = NULL;
 971         msg->msg_namelen = 0;
 972         msg->msg_control = NULL;
 973         msg->msg_controllen = 0;
 974         msg->msg_iov = (struct iovec *)iov;
 975         msg->msg_iovlen = nr_segs;
 976         msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 977         if (sock->type == SOCK_SEQPACKET)
 978                 msg->msg_flags |= MSG_EOR;
 979
 980         return __sock_sendmsg(iocb, sock, msg, size);
 981 }
 982
 983 static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
 984                           unsigned long nr_segs, loff_t pos)
 985 {
 986         struct sock_iocb siocb, *x;
 987
 988         if (pos != 0)
 989                 return -ESPIPE;
 990
 991         x = alloc_sock_iocb(iocb, &siocb);
 992         if (!x)
 993                 return -ENOMEM;
 994
 995         return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
 996 }
 997
 998 /*
 999  * Atomic setting of ioctl hooks to avoid race
1000  * with module unload.
1001  */
1002
1003 static DEFINE_MUTEX(br_ioctl_mutex);
1004 static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1005
1006 void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1007 {
1008         mutex_lock(&br_ioctl_mutex);
1009         br_ioctl_hook = hook;
1010         mutex_unlock(&br_ioctl_mutex);
1011 }
1012 EXPORT_SYMBOL(brioctl_set);
1013
1014 static DEFINE_MUTEX(vlan_ioctl_mutex);
1015 static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1016
1017 void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1018 {
1019         mutex_lock(&vlan_ioctl_mutex);
1020         vlan_ioctl_hook = hook;
1021         mutex_unlock(&vlan_ioctl_mutex);
1022 }
1023 EXPORT_SYMBOL(vlan_ioctl_set);
1024
1025 static DEFINE_MUTEX(dlci_ioctl_mutex);
1026 static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1027
1028 void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1029 {
1030         mutex_lock(&dlci_ioctl_mutex);
1031         dlci_ioctl_hook = hook;
1032         mutex_unlock(&dlci_ioctl_mutex);
1033 }
1034 EXPORT_SYMBOL(dlci_ioctl_set);
1035
1036 static long sock_do_ioctl(struct net *net, struct socket *sock,
1037                                  unsigned int cmd, unsigned long arg)
1038 {
1039         int err;
1040         void __user *argp = (void __user *)arg;
1041
1042         err = sock->ops->ioctl(sock, cmd, arg);
1043
1044         /*
1045          * If this ioctl is unknown try to hand it down
1046          * to the NIC driver.
1047          */
1048         if (err == -ENOIOCTLCMD)
1049                 err = dev_ioctl(net, cmd, argp);
1050
1051         return err;
1052 }
1053
1054 /*
1055  *      With an ioctl, arg may well be a user mode pointer, but we don't know
1056  *      what to do with it - that's up to the protocol still.
1057  */
1058
1059 static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1060 {
1061         struct socket *sock;
1062         struct sock *sk;
1063         void __user *argp = (void __user *)arg;
1064         int pid, err;
1065         struct net *net;
1066
1067         sock = file->private_data;
1068         sk = sock->sk;
1069         net = sock_net(sk);
1070         if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
1071                 err = dev_ioctl(net, cmd, argp);
1072         } else
1073 #ifdef CONFIG_WEXT_CORE
1074         if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
1075                 err = dev_ioctl(net, cmd, argp);
1076         } else
1077 #endif
1078                 switch (cmd) {
1079                 case FIOSETOWN:
1080                 case SIOCSPGRP:
1081                         err = -EFAULT;
1082                         if (get_user(pid, (int __user *)argp))
1083                                 break;
1084                         err = f_setown(sock->file, pid, 1);
1085                         break;
1086                 case FIOGETOWN:
1087                 case SIOCGPGRP:
1088                         err = put_user(f_getown(sock->file),
1089                                        (int __user *)argp);
1090                         break;
1091                 case SIOCGIFBR:
1092                 case SIOCSIFBR:
1093                 case SIOCBRADDBR:
1094                 case SIOCBRDELBR:
1095                         err = -ENOPKG;
1096                         if (!br_ioctl_hook)
1097                                 request_module("bridge");
1098
1099                         mutex_lock(&br_ioctl_mutex);
1100                         if (br_ioctl_hook)
1101                                 err = br_ioctl_hook(net, cmd, argp);
1102                         mutex_unlock(&br_ioctl_mutex);
1103                         break;
1104                 case SIOCGIFVLAN:
1105                 case SIOCSIFVLAN:
1106                         err = -ENOPKG;
1107                         if (!vlan_ioctl_hook)
1108                                 request_module("8021q");
1109
1110                         mutex_lock(&vlan_ioctl_mutex);
1111                         if (vlan_ioctl_hook)
1112                                 err = vlan_ioctl_hook(net, argp);
1113                         mutex_unlock(&vlan_ioctl_mutex);
1114                         break;
1115                 case SIOCADDDLCI:
1116                 case SIOCDELDLCI:
1117                         err = -ENOPKG;
1118                         if (!dlci_ioctl_hook)
1119                                 request_module("dlci");
1120
1121                         mutex_lock(&dlci_ioctl_mutex);
1122                         if (dlci_ioctl_hook)
1123                                 err = dlci_ioctl_hook(cmd, argp);
1124                         mutex_unlock(&dlci_ioctl_mutex);
1125                         break;
1126                 default:
1127                         err = sock_do_ioctl(net, sock, cmd, arg);
1128                         break;
1129                 }
1130         return err;
1131 }
1132
1133 int sock_create_lite(int family, int type, int protocol, struct socket **res)
1134 {
1135         int err;
1136         struct socket *sock = NULL;
1137
1138         err = security_socket_create(family, type, protocol, 1);
1139         if (err)
1140                 goto out;
1141
1142         sock = sock_alloc();
1143         if (!sock) {
1144                 err = -ENOMEM;
1145                 goto out;
1146         }
1147
1148         sock->type = type;
1149         err = security_socket_post_create(sock, family, type, protocol, 1);
1150         if (err)
1151                 goto out_release;
1152
1153 out:
1154         *res = sock;
1155         return err;
1156 out_release:
1157         sock_release(sock);
1158         sock = NULL;
1159         goto out;
1160 }
1161 EXPORT_SYMBOL(sock_create_lite);
1162
1163 /* No kernel lock held - perfect */
1164 static unsigned int sock_poll(struct file *file, poll_table *wait)
1165 {
1166         struct socket *sock;
1167
1168         /*
1169          *      We can't return errors to poll, so it's either yes or no.
1170          */
1171         sock = file->private_data;
1172         return sock->ops->poll(file, sock, wait);
1173 }
1174
1175 static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1176 {
1177         struct socket *sock = file->private_data;
1178
1179         return sock->ops->mmap(file, sock, vma);
1180 }
1181
1182 static int sock_close(struct inode *inode, struct file *filp)
1183 {
1184         /*
1185          *      It was possible the inode is NULL we were
1186          *      closing an unfinished socket.
1187          */
1188
1189         if (!inode) {
1190                 printk(KERN_DEBUG "sock_close: NULL inode\n");
1191                 return 0;
1192         }
1193         sock_release(SOCKET_I(inode));
1194         return 0;
1195 }
1196
1197 /*
1198  *      Update the socket async list
1199  *
1200  *      Fasync_list locking strategy.
1201  *
1202  *      1. fasync_list is modified only under process context socket lock
1203  *         i.e. under semaphore.
1204  *      2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1205  *         or under socket lock
1206  */
1207
1208 static int sock_fasync(int fd, struct file *filp, int on)
1209 {
1210         struct socket *sock = filp->private_data;
1211         struct sock *sk = sock->sk;
1212         struct socket_wq *wq;
1213
1214         if (sk == NULL)
1215                 return -EINVAL;
1216
1217         lock_sock(sk);
1218         wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1219         fasync_helper(fd, filp, on, &wq->fasync_list);
1220
1221         if (!wq->fasync_list)
1222                 sock_reset_flag(sk, SOCK_FASYNC);
1223         else
1224                 sock_set_flag(sk, SOCK_FASYNC);
1225
1226         release_sock(sk);
1227         return 0;
1228 }
1229
1230 /* This function may be called only under socket lock or callback_lock or rcu_lock */
1231
1232 int sock_wake_async(struct socket *sock, int how, int band)
1233 {
1234         struct socket_wq *wq;
1235
1236         if (!sock)
1237                 return -1;
1238         rcu_read_lock();
1239         wq = rcu_dereference(sock->wq);
1240         if (!wq || !wq->fasync_list) {
1241                 rcu_read_unlock();
1242                 return -1;
1243         }
1244         switch (how) {
1245         case SOCK_WAKE_WAITD:
1246                 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1247                         break;
1248                 goto call_kill;
1249         case SOCK_WAKE_SPACE:
1250                 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1251                         break;
1252                 /* fall through */
1253         case SOCK_WAKE_IO:
1254 call_kill:
1255                 kill_fasync(&wq->fasync_list, SIGIO, band);
1256                 break;
1257         case SOCK_WAKE_URG:
1258                 kill_fasync(&wq->fasync_list, SIGURG, band);
1259         }
1260         rcu_read_unlock();
1261         return 0;
1262 }
1263 EXPORT_SYMBOL(sock_wake_async);
1264
1265 int __sock_create(struct net *net, int family, int type, int protocol,
1266                          struct socket **res, int kern)
1267 {
1268         int err;
1269         struct socket *sock;
1270         const struct net_proto_family *pf;
1271
1272         /*
1273          *      Check protocol is in range
1274          */
1275         if (family < 0 || family >= NPROTO)
1276                 return -EAFNOSUPPORT;
1277         if (type < 0 || type >= SOCK_MAX)
1278                 return -EINVAL;
1279
1280         /* Compatibility.
1281
1282            This uglymoron is moved from INET layer to here to avoid
1283            deadlock in module load.
1284          */
1285         if (family == PF_INET && type == SOCK_PACKET) {
1286                 static int warned;
1287                 if (!warned) {
1288                         warned = 1;
1289                         printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1290                                current->comm);
1291                 }
1292                 family = PF_PACKET;
1293         }
1294
1295         err = security_socket_create(family, type, protocol, kern);
1296         if (err)
1297                 return err;
1298
1299         /*
1300          *      Allocate the socket and allow the family to set things up. if
1301          *      the protocol is 0, the family is instructed to select an appropriate
1302          *      default.
1303          */
1304         sock = sock_alloc();
1305         if (!sock) {
1306                 net_warn_ratelimited("socket: no more sockets\n");
1307                 return -ENFILE; /* Not exactly a match, but its the
1308                                    closest posix thing */
1309         }
1310
1311         sock->type = type;
1312
1313 #ifdef CONFIG_MODULES
1314         /* Attempt to load a protocol module if the find failed.
1315          *
1316          * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1317          * requested real, full-featured networking support upon configuration.
1318          * Otherwise module support will break!
1319          */
1320         if (rcu_access_pointer(net_families[family]) == NULL)
1321                 request_module("net-pf-%d", family);
1322 #endif
1323
1324         rcu_read_lock();
1325         pf = rcu_dereference(net_families[family]);
1326         err = -EAFNOSUPPORT;
1327         if (!pf)
1328                 goto out_release;
1329
1330         /*
1331          * We will call the ->create function, that possibly is in a loadable
1332          * module, so we have to bump that loadable module refcnt first.
1333          */
1334         if (!try_module_get(pf->owner))
1335                 goto out_release;
1336
1337         /* Now protected by module ref count */
1338         rcu_read_unlock();
1339
1340         err = pf->create(net, sock, protocol, kern);
1341         if (err < 0)
1342                 goto out_module_put;
1343
1344         /*
1345          * Now to bump the refcnt of the [loadable] module that owns this
1346          * socket at sock_release time we decrement its refcnt.
1347          */
1348         if (!try_module_get(sock->ops->owner))
1349                 goto out_module_busy;
1350
1351         /*
1352          * Now that we're done with the ->create function, the [loadable]
1353          * module can have its refcnt decremented
1354          */
1355         module_put(pf->owner);
1356         err = security_socket_post_create(sock, family, type, protocol, kern);
1357         if (err)
1358                 goto out_sock_release;
1359         *res = sock;
1360
1361         return 0;
1362
1363 out_module_busy:
1364         err = -EAFNOSUPPORT;
1365 out_module_put:
1366         sock->ops = NULL;
1367         module_put(pf->owner);
1368 out_sock_release:
1369         sock_release(sock);
1370         return err;
1371
1372 out_release:
1373         rcu_read_unlock();
1374         goto out_sock_release;
1375 }
1376 EXPORT_SYMBOL(__sock_create);
1377
1378 int sock_create(int family, int type, int protocol, struct socket **res)
1379 {
1380         return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1381 }
1382 EXPORT_SYMBOL(sock_create);
1383
1384 int sock_create_kern(int family, int type, int protocol, struct socket **res)
1385 {
1386         return __sock_create(&init_net, family, type, protocol, res, 1);
1387 }
1388 EXPORT_SYMBOL(sock_create_kern);
1389
1390 SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1391 {
1392         int retval;
1393         struct socket *sock;
1394         int flags;
1395
1396         /* Check the SOCK_* constants for consistency.  */
1397         BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1398         BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1399         BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1400         BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1401
1402         flags = type & ~SOCK_TYPE_MASK;
1403         if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1404                 return -EINVAL;
1405         type &= SOCK_TYPE_MASK;
1406
1407         if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1408                 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1409
1410         retval = sock_create(family, type, protocol, &sock);
1411         if (retval < 0)
1412                 goto out;
1413
1414         retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1415         if (retval < 0)
1416                 goto out_release;
1417
1418 out:
1419         /* It may be already another descriptor 8) Not kernel problem. */
1420         return retval;
1421
1422 out_release:
1423         sock_release(sock);
1424         return retval;
1425 }
1426
1427 /*
1428  *      Create a pair of connected sockets.
1429  */
1430
1431 SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1432                 int __user *, usockvec)
1433 {
1434         struct socket *sock1, *sock2;
1435         int fd1, fd2, err;
1436         struct file *newfile1, *newfile2;
1437         int flags;
1438
1439         flags = type & ~SOCK_TYPE_MASK;
1440         if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1441                 return -EINVAL;
1442         type &= SOCK_TYPE_MASK;
1443
1444         if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1445                 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1446
1447         /*
1448          * Obtain the first socket and check if the underlying protocol
1449          * supports the socketpair call.
1450          */
1451
1452         err = sock_create(family, type, protocol, &sock1);
1453         if (err < 0)
1454                 goto out;
1455
1456         err = sock_create(family, type, protocol, &sock2);
1457         if (err < 0)
1458                 goto out_release_1;
1459
1460         err = sock1->ops->socketpair(sock1, sock2);
1461         if (err < 0)
1462                 goto out_release_both;
1463
1464         fd1 = get_unused_fd_flags(flags);
1465         if (unlikely(fd1 < 0)) {
1466                 err = fd1;
1467                 goto out_release_both;
1468         }
1469         fd2 = get_unused_fd_flags(flags);
1470         if (unlikely(fd2 < 0)) {
1471                 err = fd2;
1472                 put_unused_fd(fd1);
1473                 goto out_release_both;
1474         }
1475
1476         newfile1 = sock_alloc_file(sock1, flags, NULL);
1477         if (unlikely(IS_ERR(newfile1))) {
1478                 err = PTR_ERR(newfile1);
1479                 put_unused_fd(fd1);
1480                 put_unused_fd(fd2);
1481                 goto out_release_both;
1482         }
1483
1484         newfile2 = sock_alloc_file(sock2, flags, NULL);
1485         if (IS_ERR(newfile2)) {
1486                 err = PTR_ERR(newfile2);
1487                 fput(newfile1);
1488                 put_unused_fd(fd1);
1489                 put_unused_fd(fd2);
1490                 sock_release(sock2);
1491                 goto out;
1492         }
1493
1494         audit_fd_pair(fd1, fd2);
1495         fd_install(fd1, newfile1);
1496         fd_install(fd2, newfile2);
1497         /* fd1 and fd2 may be already another descriptors.
1498          * Not kernel problem.
1499          */
1500
1501         err = put_user(fd1, &usockvec[0]);
1502         if (!err)
1503                 err = put_user(fd2, &usockvec[1]);
1504         if (!err)
1505                 return 0;
1506
1507         sys_close(fd2);
1508         sys_close(fd1);
1509         return err;
1510
1511 out_release_both:
1512         sock_release(sock2);
1513 out_release_1:
1514         sock_release(sock1);
1515 out:
1516         return err;
1517 }
1518
1519 /*
1520  *      Bind a name to a socket. Nothing much to do here since it's
1521  *      the protocol's responsibility to handle the local address.
1522  *
1523  *      We move the socket address to kernel space before we call
1524  *      the protocol layer (having also checked the address is ok).
1525  */
1526
1527 SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1528 {
1529         struct socket *sock;
1530         struct sockaddr_storage address;
1531         int err, fput_needed;
1532
1533         sock = sockfd_lookup_light(fd, &err, &fput_needed);
1534         if (sock) {
1535                 err = move_addr_to_kernel(umyaddr, addrlen, &address);
1536                 if (err >= 0) {
1537                         err = security_socket_bind(sock,
1538                                                    (struct sockaddr *)&address,
1539                                                    addrlen);
1540                         if (!err)
1541                                 err = sock->ops->bind(sock,
1542                                                       (struct sockaddr *)
1543                                                       &address, addrlen);
1544                 }
1545                 fput_light(sock->file, fput_needed);
1546         }
1547         return err;
1548 }
1549
1550 /*
1551  *      Perform a listen. Basically, we allow the protocol to do anything
1552  *      necessary for a listen, and if that works, we mark the socket as
1553  *      ready for listening.
1554  */
1555
1556 SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1557 {
1558         struct socket *sock;
1559         int err, fput_needed;
1560         int somaxconn;
1561
1562         sock = sockfd_lookup_light(fd, &err, &fput_needed);
1563         if (sock) {
1564                 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
1565                 if ((unsigned int)backlog > somaxconn)
1566                         backlog = somaxconn;
1567
1568                 err = security_socket_listen(sock, backlog);
1569                 if (!err)
1570                         err = sock->ops->listen(sock, backlog);
1571
1572                 fput_light(sock->file, fput_needed);
1573         }
1574         return err;
1575 }
1576
1577 /*
1578  *      For accept, we attempt to create a new socket, set up the link
1579  *      with the client, wake up the client, then return the new
1580  *      connected fd. We collect the address of the connector in kernel
1581  *      space and move it to user at the very end. This is unclean because
1582  *      we open the socket then return an error.
1583  *
1584  *      1003.1g adds the ability to recvmsg() to query connection pending
1585  *      status to recvmsg. We need to add that support in a way thats
1586  *      clean when we restucture accept also.
1587  */
1588
1589 SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1590                 int __user *, upeer_addrlen, int, flags)
1591 {
1592         struct socket *sock, *newsock;
1593         struct file *newfile;
1594         int err, len, newfd, fput_needed;
1595         struct sockaddr_storage address;
1596
1597         if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1598                 return -EINVAL;
1599
1600         if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1601                 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1602
1603         sock = sockfd_lookup_light(fd, &err, &fput_needed);
1604         if (!sock)
1605                 goto out;
1606
1607         err = -ENFILE;
1608         newsock = sock_alloc();
1609         if (!newsock)
1610                 goto out_put;
1611
1612         newsock->type = sock->type;
1613         newsock->ops = sock->ops;
1614
1615         /*
1616          * We don't need try_module_get here, as the listening socket (sock)
1617          * has the protocol module (sock->ops->owner) held.
1618          */
1619         __module_get(newsock->ops->owner);
1620
1621         newfd = get_unused_fd_flags(flags);
1622         if (unlikely(newfd < 0)) {
1623                 err = newfd;
1624                 sock_release(newsock);
1625                 goto out_put;
1626         }
1627         newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
1628         if (unlikely(IS_ERR(newfile))) {
1629                 err = PTR_ERR(newfile);
1630                 put_unused_fd(newfd);
1631                 sock_release(newsock);
1632                 goto out_put;
1633         }
1634
1635         err = security_socket_accept(sock, newsock);
1636         if (err)
1637                 goto out_fd;
1638
1639         err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1640         if (err < 0)
1641                 goto out_fd;
1642
1643         if (upeer_sockaddr) {
1644                 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
1645                                           &len, 2) < 0) {
1646                         err = -ECONNABORTED;
1647                         goto out_fd;
1648                 }
1649                 err = move_addr_to_user(&address,
1650                                         len, upeer_sockaddr, upeer_addrlen);
1651                 if (err < 0)
1652                         goto out_fd;
1653         }
1654
1655         /* File flags are not inherited via accept() unlike another OSes. */
1656
1657         fd_install(newfd, newfile);
1658         err = newfd;
1659
1660 out_put:
1661         fput_light(sock->file, fput_needed);
1662 out:
1663         return err;
1664 out_fd:
1665         fput(newfile);
1666         put_unused_fd(newfd);
1667         goto out_put;
1668 }
1669
1670 SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1671                 int __user *, upeer_addrlen)
1672 {
1673         return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
1674 }
1675
1676 /*
1677  *      Attempt to connect to a socket with the server address.  The address
1678  *      is in user space so we verify it is OK and move it to kernel space.
1679  *
1680  *      For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1681  *      break bindings
1682  *
1683  *      NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1684  *      other SEQPACKET protocols that take time to connect() as it doesn't
1685  *      include the -EINPROGRESS status for such sockets.
1686  */
1687
1688 SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1689                 int, addrlen)
1690 {
1691         struct socket *sock;
1692         struct sockaddr_storage address;
1693         int err, fput_needed;
1694
1695         sock = sockfd_lookup_light(fd, &err, &fput_needed);
1696         if (!sock)
1697                 goto out;
1698         err = move_addr_to_kernel(uservaddr, addrlen, &address);
1699         if (err < 0)
1700                 goto out_put;
1701
1702         err =
1703             security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1704         if (err)
1705                 goto out_put;
1706
1707         err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1708                                  sock->file->f_flags);
1709 out_put:
1710         fput_light(sock->file, fput_needed);
1711 out:
1712         return err;
1713 }
1714
1715 /*
1716  *      Get the local address ('name') of a socket object. Move the obtained
1717  *      name to user space.
1718  */
1719
1720 SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1721                 int __user *, usockaddr_len)
1722 {
1723         struct socket *sock;
1724         struct sockaddr_storage address;
1725         int len, err, fput_needed;
1726
1727         sock = sockfd_lookup_light(fd, &err, &fput_needed);
1728         if (!sock)
1729                 goto out;
1730
1731         err = security_socket_getsockname(sock);
1732         if (err)
1733                 goto out_put;
1734
1735         err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1736         if (err)
1737                 goto out_put;
1738         err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1739
1740 out_put:
1741         fput_light(sock->file, fput_needed);
1742 out:
1743         return err;
1744 }
1745
1746 /*
1747  *      Get the remote address ('name') of a socket object. Move the obtained
1748  *      name to user space.
1749  */
1750
1751 SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1752                 int __user *, usockaddr_len)
1753 {
1754         struct socket *sock;
1755         struct sockaddr_storage address;
1756         int len, err, fput_needed;
1757
1758         sock = sockfd_lookup_light(fd, &err, &fput_needed);
1759         if (sock != NULL) {
1760                 err = security_socket_getpeername(sock);
1761                 if (err) {
1762                         fput_light(sock->file, fput_needed);
1763                         return err;
1764                 }
1765
1766                 err =
1767                     sock->ops->getname(sock, (struct sockaddr *)&address, &len,
1768                                        1);
1769                 if (!err)
1770                         err = move_addr_to_user(&address, len, usockaddr,
1771                                                 usockaddr_len);
1772                 fput_light(sock->file, fput_needed);
1773         }
1774         return err;
1775 }
1776
1777 /*
1778  *      Send a datagram to a given address. We move the address into kernel
1779  *      space and check the user space data area is readable before invoking
1780  *      the protocol.
1781  */
1782
1783 SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1784                 unsigned int, flags, struct sockaddr __user *, addr,
1785                 int, addr_len)
1786 {
1787         struct socket *sock;
1788         struct sockaddr_storage address;
1789         int err;
1790         struct msghdr msg;
1791         struct iovec iov;
1792         int fput_needed;
1793
1794         if (len > INT_MAX)
1795                 len = INT_MAX;
1796         sock = sockfd_lookup_light(fd, &err, &fput_needed);
1797         if (!sock)
1798                 goto out;
1799
1800         iov.iov_base = buff;
1801         iov.iov_len = len;
1802         msg.msg_name = NULL;
1803         msg.msg_iov = &iov;
1804         msg.msg_iovlen = 1;
1805         msg.msg_control = NULL;
1806         msg.msg_controllen = 0;
1807         msg.msg_namelen = 0;
1808         if (addr) {
1809                 err = move_addr_to_kernel(addr, addr_len, &address);
1810                 if (err < 0)
1811                         goto out_put;
1812                 msg.msg_name = (struct sockaddr *)&address;
1813                 msg.msg_namelen = addr_len;
1814         }
1815         if (sock->file->f_flags & O_NONBLOCK)
1816                 flags |= MSG_DONTWAIT;
1817         msg.msg_flags = flags;
1818         err = sock_sendmsg(sock, &msg, len);
1819
1820 out_put:
1821         fput_light(sock->file, fput_needed);
1822 out:
1823         return err;
1824 }
1825
1826 /*
1827  *      Send a datagram down a socket.
1828  */
1829
1830 SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
1831                 unsigned int, flags)
1832 {
1833         return sys_sendto(fd, buff, len, flags, NULL, 0);
1834 }
1835
1836 /*
1837  *      Receive a frame from the socket and optionally record the address of the
1838  *      sender. We verify the buffers are writable and if needed move the
1839  *      sender address from kernel to user space.
1840  */
1841
1842 SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
1843                 unsigned int, flags, struct sockaddr __user *, addr,
1844                 int __user *, addr_len)
1845 {
1846         struct socket *sock;
1847         struct iovec iov;
1848         struct msghdr msg;
1849         struct sockaddr_storage address;
1850         int err, err2;
1851         int fput_needed;
1852
1853         if (size > INT_MAX)
1854                 size = INT_MAX;
1855         sock = sockfd_lookup_light(fd, &err, &fput_needed);
1856         if (!sock)
1857                 goto out;
1858
1859         msg.msg_control = NULL;
1860         msg.msg_controllen = 0;
1861         msg.msg_iovlen = 1;
1862         msg.msg_iov = &iov;
1863         iov.iov_len = size;
1864         iov.iov_base = ubuf;
1865         msg.msg_name = (struct sockaddr *)&address;
1866         msg.msg_namelen = sizeof(address);
1867         if (sock->file->f_flags & O_NONBLOCK)
1868                 flags |= MSG_DONTWAIT;
1869         err = sock_recvmsg(sock, &msg, size, flags);
1870
1871         if (err >= 0 && addr != NULL) {
1872                 err2 = move_addr_to_user(&address,
1873                                          msg.msg_namelen, addr, addr_len);
1874                 if (err2 < 0)
1875                         err = err2;
1876         }
1877
1878         fput_light(sock->file, fput_needed);
1879 out:
1880         return err;
1881 }
1882
1883 /*
1884  *      Receive a datagram from a socket.
1885  */
1886
1887 asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1888                          unsigned int flags)
1889 {
1890         return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1891 }
1892
1893 /*
1894  *      Set a socket option. Because we don't know the option lengths we have
1895  *      to pass the user mode parameter for the protocols to sort out.
1896  */
1897
1898 SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1899                 char __user *, optval, int, optlen)
1900 {
1901         int err, fput_needed;
1902         struct socket *sock;
1903
1904         if (optlen < 0)
1905                 return -EINVAL;
1906
1907         sock = sockfd_lookup_light(fd, &err, &fput_needed);
1908         if (sock != NULL) {
1909                 err = security_socket_setsockopt(sock, level, optname);
1910                 if (err)
1911                         goto out_put;
1912
1913                 if (level == SOL_SOCKET)
1914                         err =
1915                             sock_setsockopt(sock, level, optname, optval,
1916                                             optlen);
1917                 else
1918                         err =
1919                             sock->ops->setsockopt(sock, level, optname, optval,
1920                                                   optlen);
1921 out_put:
1922                 fput_light(sock->file, fput_needed);
1923         }
1924         return err;
1925 }
1926
1927 /*
1928  *      Get a socket option. Because we don't know the option lengths we have
1929  *      to pass a user mode parameter for the protocols to sort out.
1930  */
1931
1932 SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1933                 char __user *, optval, int __user *, optlen)
1934 {
1935         int err, fput_needed;
1936         struct socket *sock;
1937
1938         sock = sockfd_lookup_light(fd, &err, &fput_needed);
1939         if (sock != NULL) {
1940                 err = security_socket_getsockopt(sock, level, optname);
1941                 if (err)
1942                         goto out_put;
1943
1944                 if (level == SOL_SOCKET)
1945                         err =
1946                             sock_getsockopt(sock, level, optname, optval,
1947                                             optlen);
1948                 else
1949                         err =
1950                             sock->ops->getsockopt(sock, level, optname, optval,
1951                                                   optlen);
1952 out_put:
1953                 fput_light(sock->file, fput_needed);
1954         }
1955         return err;
1956 }
1957
1958 /*
1959  *      Shutdown a socket.
1960  */
1961
1962 SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1963 {
1964         int err, fput_needed;
1965         struct socket *sock;
1966
1967         sock = sockfd_lookup_light(fd, &err, &fput_needed);
1968         if (sock != NULL) {
1969                 err = security_socket_shutdown(sock, how);
1970                 if (!err)
1971                         err = sock->ops->shutdown(sock, how);
1972                 fput_light(sock->file, fput_needed);
1973         }
1974         return err;
1975 }
1976
1977 /* A couple of helpful macros for getting the address of the 32/64 bit
1978  * fields which are the same type (int / unsigned) on our platforms.
1979  */
1980 #define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1981 #define COMPAT_NAMELEN(msg)     COMPAT_MSG(msg, msg_namelen)
1982 #define COMPAT_FLAGS(msg)       COMPAT_MSG(msg, msg_flags)
1983
1984 struct used_address {
1985         struct sockaddr_storage name;
1986         unsigned int name_len;
1987 };
1988
1989 static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
1990                          struct msghdr *msg_sys, unsigned int flags,
1991                          struct used_address *used_address)
1992 {
1993         struct compat_msghdr __user *msg_compat =
1994             (struct compat_msghdr __user *)msg;
1995         struct sockaddr_storage address;
1996         struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
1997         unsigned char ctl[sizeof(struct cmsghdr) + 20]
1998             __attribute__ ((aligned(sizeof(__kernel_size_t))));
1999         /* 20 is size of ipv6_pktinfo */
2000         unsigned char *ctl_buf = ctl;
2001         int err, ctl_len, total_len;
2002
2003         err = -EFAULT;
2004         if (MSG_CMSG_COMPAT & flags) {
2005                 if (get_compat_msghdr(msg_sys, msg_compat))
2006                         return -EFAULT;
2007         } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
2008                 return -EFAULT;
2009
2010         if (msg_sys->msg_iovlen > UIO_FASTIOV) {
2011                 err = -EMSGSIZE;
2012                 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2013                         goto out;
2014                 err = -ENOMEM;
2015                 iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
2016                               GFP_KERNEL);
2017                 if (!iov)
2018                         goto out;
2019         }
2020
2021         /* This will also move the address data into kernel space */
2022         if (MSG_CMSG_COMPAT & flags) {
2023                 err = verify_compat_iovec(msg_sys, iov, &address, VERIFY_READ);
2024         } else
2025                 err = verify_iovec(msg_sys, iov, &address, VERIFY_READ);
2026         if (err < 0)
2027                 goto out_freeiov;
2028         total_len = err;
2029
2030         err = -ENOBUFS;
2031
2032         if (msg_sys->msg_controllen > INT_MAX)
2033                 goto out_freeiov;
2034         ctl_len = msg_sys->msg_controllen;
2035         if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
2036                 err =
2037                     cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
2038                                                      sizeof(ctl));
2039                 if (err)
2040                         goto out_freeiov;
2041                 ctl_buf = msg_sys->msg_control;
2042                 ctl_len = msg_sys->msg_controllen;
2043         } else if (ctl_len) {
2044                 if (ctl_len > sizeof(ctl)) {
2045                         ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
2046                         if (ctl_buf == NULL)
2047                                 goto out_freeiov;
2048                 }
2049                 err = -EFAULT;
2050                 /*
2051                  * Careful! Before this, msg_sys->msg_control contains a user pointer.
2052                  * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2053                  * checking falls down on this.
2054                  */
2055                 if (copy_from_user(ctl_buf,
2056                                    (void __user __force *)msg_sys->msg_control,
2057                                    ctl_len))
2058                         goto out_freectl;
2059                 msg_sys->msg_control = ctl_buf;
2060         }
2061         msg_sys->msg_flags = flags;
2062
2063         if (sock->file->f_flags & O_NONBLOCK)
2064                 msg_sys->msg_flags |= MSG_DONTWAIT;
2065         /*
2066          * If this is sendmmsg() and current destination address is same as
2067          * previously succeeded address, omit asking LSM's decision.
2068          * used_address->name_len is initialized to UINT_MAX so that the first
2069          * destination address never matches.
2070          */
2071         if (used_address && msg_sys->msg_name &&
2072             used_address->name_len == msg_sys->msg_namelen &&
2073             !memcmp(&used_address->name, msg_sys->msg_name,
2074                     used_address->name_len)) {
2075                 err = sock_sendmsg_nosec(sock, msg_sys, total_len);
2076                 goto out_freectl;
2077         }
2078         err = sock_sendmsg(sock, msg_sys, total_len);
2079         /*
2080          * If this is sendmmsg() and sending to current destination address was
2081          * successful, remember it.
2082          */
2083         if (used_address && err >= 0) {
2084                 used_address->name_len = msg_sys->msg_namelen;
2085                 if (msg_sys->msg_name)
2086                         memcpy(&used_address->name, msg_sys->msg_name,
2087                                used_address->name_len);
2088         }
2089
2090 out_freectl:
2091         if (ctl_buf != ctl)
2092                 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2093 out_freeiov:
2094         if (iov != iovstack)
2095                 kfree(iov);
2096 out:
2097         return err;
2098 }
2099
2100 /*
2101  *      BSD sendmsg interface
2102  */
2103
2104 SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags)
2105 {
2106         int fput_needed, err;
2107         struct msghdr msg_sys;
2108         struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2109
2110         if (!sock)
2111                 goto out;
2112
2113         err = __sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
2114
2115         fput_light(sock->file, fput_needed);
2116 out:
2117         return err;
2118 }
2119
2120 /*
2121  *      Linux sendmmsg interface
2122  */
2123
2124 int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2125                    unsigned int flags)
2126 {
2127         int fput_needed, err, datagrams;
2128         struct socket *sock;
2129         struct mmsghdr __user *entry;
2130         struct compat_mmsghdr __user *compat_entry;
2131         struct msghdr msg_sys;
2132         struct used_address used_address;
2133
2134         if (vlen > UIO_MAXIOV)
2135                 vlen = UIO_MAXIOV;
2136
2137         datagrams = 0;
2138
2139         sock = sockfd_lookup_light(fd, &err, &fput_needed);
2140         if (!sock)
2141                 return err;
2142
2143         used_address.name_len = UINT_MAX;
2144         entry = mmsg;
2145         compat_entry = (struct compat_mmsghdr __user *)mmsg;
2146         err = 0;
2147
2148         while (datagrams < vlen) {
2149                 if (MSG_CMSG_COMPAT & flags) {
2150                         err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
2151                                             &msg_sys, flags, &used_address);
2152                         if (err < 0)
2153                                 break;
2154                         err = __put_user(err, &compat_entry->msg_len);
2155                         ++compat_entry;
2156                 } else {
2157                         err = __sys_sendmsg(sock, (struct msghdr __user *)entry,
2158                                             &msg_sys, flags, &used_address);
2159                         if (err < 0)
2160                                 break;
2161                         err = put_user(err, &entry->msg_len);
2162                         ++entry;
2163                 }
2164
2165                 if (err)
2166                         break;
2167                 ++datagrams;
2168         }
2169
2170         fput_light(sock->file, fput_needed);
2171
2172         /* We only return an error if no datagrams were able to be sent */
2173         if (datagrams != 0)
2174                 return datagrams;
2175
2176         return err;
2177 }
2178
2179 SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2180                 unsigned int, vlen, unsigned int, flags)
2181 {
2182         return __sys_sendmmsg(fd, mmsg, vlen, flags);
2183 }
2184
2185 static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
2186                          struct msghdr *msg_sys, unsigned int flags, int nosec)
2187 {
2188         struct compat_msghdr __user *msg_compat =
2189             (struct compat_msghdr __user *)msg;
2190         struct iovec iovstack[UIO_FASTIOV];
2191         struct iovec *iov = iovstack;
2192         unsigned long cmsg_ptr;
2193         int err, total_len, len;
2194
2195         /* kernel mode address */
2196         struct sockaddr_storage addr;
2197
2198         /* user mode address pointers */
2199         struct sockaddr __user *uaddr;
2200         int __user *uaddr_len;
2201
2202         if (MSG_CMSG_COMPAT & flags) {
2203                 if (get_compat_msghdr(msg_sys, msg_compat))
2204                         return -EFAULT;
2205         } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
2206                 return -EFAULT;
2207
2208         if (msg_sys->msg_iovlen > UIO_FASTIOV) {
2209                 err = -EMSGSIZE;
2210                 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2211                         goto out;
2212                 err = -ENOMEM;
2213                 iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
2214                               GFP_KERNEL);
2215                 if (!iov)
2216                         goto out;
2217         }
2218
2219         /*
2220          *      Save the user-mode address (verify_iovec will change the
2221          *      kernel msghdr to use the kernel address space)
2222          */
2223
2224         uaddr = (__force void __user *)msg_sys->msg_name;
2225         uaddr_len = COMPAT_NAMELEN(msg);
2226         if (MSG_CMSG_COMPAT & flags) {
2227                 err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
2228         } else
2229                 err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
2230         if (err < 0)
2231                 goto out_freeiov;
2232         total_len = err;
2233
2234         cmsg_ptr = (unsigned long)msg_sys->msg_control;
2235         msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
2236
2237         if (sock->file->f_flags & O_NONBLOCK)
2238                 flags |= MSG_DONTWAIT;
2239         err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2240                                                           total_len, flags);
2241         if (err < 0)
2242                 goto out_freeiov;
2243         len = err;
2244
2245         if (uaddr != NULL) {
2246                 err = move_addr_to_user(&addr,
2247                                         msg_sys->msg_namelen, uaddr,
2248                                         uaddr_len);
2249                 if (err < 0)
2250                         goto out_freeiov;
2251         }
2252         err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
2253                          COMPAT_FLAGS(msg));
2254         if (err)
2255                 goto out_freeiov;
2256         if (MSG_CMSG_COMPAT & flags)
2257                 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
2258                                  &msg_compat->msg_controllen);
2259         else
2260                 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
2261                                  &msg->msg_controllen);
2262         if (err)
2263                 goto out_freeiov;
2264         err = len;
2265
2266 out_freeiov:
2267         if (iov != iovstack)
2268                 kfree(iov);
2269 out:
2270         return err;
2271 }
2272
2273 /*
2274  *      BSD recvmsg interface
2275  */
2276
2277 SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
2278                 unsigned int, flags)
2279 {
2280         int fput_needed, err;
2281         struct msghdr msg_sys;
2282         struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2283
2284         if (!sock)
2285                 goto out;
2286
2287         err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0);
2288
2289         fput_light(sock->file, fput_needed);
2290 out:
2291         return err;
2292 }
2293
2294 /*
2295  *     Linux recvmmsg interface
2296  */
2297
2298 int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2299                    unsigned int flags, struct timespec *timeout)
2300 {
2301         int fput_needed, err, datagrams;
2302         struct socket *sock;
2303         struct mmsghdr __user *entry;
2304         struct compat_mmsghdr __user *compat_entry;
2305         struct msghdr msg_sys;
2306         struct timespec end_time;
2307
2308         if (timeout &&
2309             poll_select_set_timeout(&end_time, timeout->tv_sec,
2310                                     timeout->tv_nsec))
2311                 return -EINVAL;
2312
2313         datagrams = 0;
2314
2315         sock = sockfd_lookup_light(fd, &err, &fput_needed);
2316         if (!sock)
2317                 return err;
2318
2319         err = sock_error(sock->sk);
2320         if (err)
2321                 goto out_put;
2322
2323         entry = mmsg;
2324         compat_entry = (struct compat_mmsghdr __user *)mmsg;
2325
2326         while (datagrams < vlen) {
2327                 /*
2328                  * No need to ask LSM for more than the first datagram.
2329                  */
2330                 if (MSG_CMSG_COMPAT & flags) {
2331                         err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
2332                                             &msg_sys, flags & ~MSG_WAITFORONE,
2333                                             datagrams);
2334                         if (err < 0)
2335                                 break;
2336                         err = __put_user(err, &compat_entry->msg_len);
2337                         ++compat_entry;
2338                 } else {
2339                         err = __sys_recvmsg(sock, (struct msghdr __user *)entry,
2340                                             &msg_sys, flags & ~MSG_WAITFORONE,
2341                                             datagrams);
2342                         if (err < 0)
2343                                 break;
2344                         err = put_user(err, &entry->msg_len);
2345                         ++entry;
2346                 }
2347
2348                 if (err)
2349                         break;
2350                 ++datagrams;
2351
2352                 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2353                 if (flags & MSG_WAITFORONE)
2354                         flags |= MSG_DONTWAIT;
2355
2356                 if (timeout) {
2357                         ktime_get_ts(timeout);
2358                         *timeout = timespec_sub(end_time, *timeout);
2359                         if (timeout->tv_sec < 0) {
2360                                 timeout->tv_sec = timeout->tv_nsec = 0;
2361                                 break;
2362                         }
2363
2364                         /* Timeout, return less than vlen datagrams */
2365                         if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2366                                 break;
2367                 }
2368
2369                 /* Out of band data, return right away */
2370                 if (msg_sys.msg_flags & MSG_OOB)
2371                         break;
2372         }
2373
2374 out_put:
2375         fput_light(sock->file, fput_needed);
2376
2377         if (err == 0)
2378                 return datagrams;
2379
2380         if (datagrams != 0) {
2381                 /*
2382                  * We may return less entries than requested (vlen) if the
2383                  * sock is non block and there aren't enough datagrams...
2384                  */
2385                 if (err != -EAGAIN) {
2386                         /*
2387                          * ... or  if recvmsg returns an error after we
2388                          * received some datagrams, where we record the
2389                          * error to return on the next call or if the
2390                          * app asks about it using getsockopt(SO_ERROR).
2391                          */
2392                         sock->sk->sk_err = -err;
2393                 }
2394
2395                 return datagrams;
2396         }
2397
2398         return err;
2399 }
2400
2401 SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2402                 unsigned int, vlen, unsigned int, flags,
2403                 struct timespec __user *, timeout)
2404 {
2405         int datagrams;
2406         struct timespec timeout_sys;
2407
2408         if (!timeout)
2409                 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2410
2411         if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2412                 return -EFAULT;
2413
2414         datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2415
2416         if (datagrams > 0 &&
2417             copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2418                 datagrams = -EFAULT;
2419
2420         return datagrams;
2421 }
2422
2423 #ifdef __ARCH_WANT_SYS_SOCKETCALL
2424 /* Argument list sizes for sys_socketcall */
2425 #define AL(x) ((x) * sizeof(unsigned long))
2426 static const unsigned char nargs[21] = {
2427         AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2428         AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2429         AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
2430         AL(4), AL(5), AL(4)
2431 };
2432
2433 #undef AL
2434
2435 /*
2436  *      System call vectors.
2437  *
2438  *      Argument checking cleaned up. Saved 20% in size.
2439  *  This function doesn't need to set the kernel lock because
2440  *  it is set by the callees.
2441  */
2442
2443 SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
2444 {
2445         unsigned long a[6];
2446         unsigned long a0, a1;
2447         int err;
2448         unsigned int len;
2449
2450         if (call < 1 || call > SYS_SENDMMSG)
2451                 return -EINVAL;
2452
2453         len = nargs[call];
2454         if (len > sizeof(a))
2455                 return -EINVAL;
2456
2457         /* copy_from_user should be SMP safe. */
2458         if (copy_from_user(a, args, len))
2459                 return -EFAULT;
2460
2461         audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2462
2463         a0 = a[0];
2464         a1 = a[1];
2465
2466         switch (call) {
2467         case SYS_SOCKET:
2468                 err = sys_socket(a0, a1, a[2]);
2469                 break;
2470         case SYS_BIND:
2471                 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2472                 break;
2473         case SYS_CONNECT:
2474                 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2475                 break;
2476         case SYS_LISTEN:
2477                 err = sys_listen(a0, a1);
2478                 break;
2479         case SYS_ACCEPT:
2480                 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2481                                   (int __user *)a[2], 0);
2482                 break;
2483         case SYS_GETSOCKNAME:
2484                 err =
2485                     sys_getsockname(a0, (struct sockaddr __user *)a1,
2486                                     (int __user *)a[2]);
2487                 break;
2488         case SYS_GETPEERNAME:
2489                 err =
2490                     sys_getpeername(a0, (struct sockaddr __user *)a1,
2491                                     (int __user *)a[2]);
2492                 break;
2493         case SYS_SOCKETPAIR:
2494                 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2495                 break;
2496         case SYS_SEND:
2497                 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2498                 break;
2499         case SYS_SENDTO:
2500                 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2501                                  (struct sockaddr __user *)a[4], a[5]);
2502                 break;
2503         case SYS_RECV:
2504                 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2505                 break;
2506         case SYS_RECVFROM:
2507                 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2508                                    (struct sockaddr __user *)a[4],
2509                                    (int __user *)a[5]);
2510                 break;
2511         case SYS_SHUTDOWN:
2512                 err = sys_shutdown(a0, a1);
2513                 break;
2514         case SYS_SETSOCKOPT:
2515                 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2516                 break;
2517         case SYS_GETSOCKOPT:
2518                 err =
2519                     sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2520                                    (int __user *)a[4]);
2521                 break;
2522         case SYS_SENDMSG:
2523                 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2524                 break;
2525         case SYS_SENDMMSG:
2526                 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2527                 break;
2528         case SYS_RECVMSG:
2529                 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2530                 break;
2531         case SYS_RECVMMSG:
2532                 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2533                                    (struct timespec __user *)a[4]);
2534                 break;
2535         case SYS_ACCEPT4:
2536                 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2537                                   (int __user *)a[2], a[3]);
2538                 break;
2539         default:
2540                 err = -EINVAL;
2541                 break;
2542         }
2543         return err;
2544 }
2545
2546 #endif                          /* __ARCH_WANT_SYS_SOCKETCALL */
2547
2548 /**
2549  *      sock_register - add a socket protocol handler
2550  *      @ops: description of protocol
2551  *
2552  *      This function is called by a protocol handler that wants to
2553  *      advertise its address family, and have it linked into the
2554  *      socket interface. The value ops->family coresponds to the
2555  *      socket system call protocol family.
2556  */
2557 int sock_register(const struct net_proto_family *ops)
2558 {
2559         int err;
2560
2561         if (ops->family >= NPROTO) {
2562                 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2563                        NPROTO);
2564                 return -ENOBUFS;
2565         }
2566
2567         spin_lock(&net_family_lock);
2568         if (rcu_dereference_protected(net_families[ops->family],
2569                                       lockdep_is_held(&net_family_lock)))
2570                 err = -EEXIST;
2571         else {
2572                 rcu_assign_pointer(net_families[ops->family], ops);
2573                 err = 0;
2574         }
2575         spin_unlock(&net_family_lock);
2576
2577         printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
2578         return err;
2579 }
2580 EXPORT_SYMBOL(sock_register);
2581
2582 /**
2583  *      sock_unregister - remove a protocol handler
2584  *      @family: protocol family to remove
2585  *
2586  *      This function is called by a protocol handler that wants to
2587  *      remove its address family, and have it unlinked from the
2588  *      new socket creation.
2589  *
2590  *      If protocol handler is a module, then it can use module reference
2591  *      counts to protect against new references. If protocol handler is not
2592  *      a module then it needs to provide its own protection in
2593  *      the ops->create routine.
2594  */
2595 void sock_unregister(int family)
2596 {
2597         BUG_ON(family < 0 || family >= NPROTO);
2598
2599         spin_lock(&net_family_lock);
2600         RCU_INIT_POINTER(net_families[family], NULL);
2601         spin_unlock(&net_family_lock);
2602
2603         synchronize_rcu();
2604
2605         printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
2606 }
2607 EXPORT_SYMBOL(sock_unregister);
2608
2609 static int __init sock_init(void)
2610 {
2611         int err;
2612         /*
2613          *      Initialize the network sysctl infrastructure.
2614          */
2615         err = net_sysctl_init();
2616         if (err)
2617                 goto out;
2618
2619         /*
2620          *      Initialize skbuff SLAB cache
2621          */
2622         skb_init();
2623
2624         /*
2625          *      Initialize the protocols module.
2626          */
2627
2628         init_inodecache();
2629
2630         err = register_filesystem(&sock_fs_type);
2631         if (err)
2632                 goto out_fs;
2633         sock_mnt = kern_mount(&sock_fs_type);
2634         if (IS_ERR(sock_mnt)) {
2635                 err = PTR_ERR(sock_mnt);
2636                 goto out_mount;
2637         }
2638
2639         /* The real protocol initialization is performed in later initcalls.
2640          */
2641
2642 #ifdef CONFIG_NETFILTER
2643         netfilter_init();
2644 #endif
2645
2646 #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
2647         skb_timestamping_init();
2648 #endif
2649
2650 out:
2651         return err;
2652
2653 out_mount:
2654         unregister_filesystem(&sock_fs_type);
2655 out_fs:
2656         goto out;
2657 }
2658
2659 core_initcall(sock_init);       /* early initcall */
2660
2661 #ifdef CONFIG_PROC_FS
2662 void socket_seq_show(struct seq_file *seq)
2663 {
2664         int cpu;
2665         int counter = 0;
2666
2667         for_each_possible_cpu(cpu)
2668             counter += per_cpu(sockets_in_use, cpu);
2669
2670         /* It can be negative, by the way. 8) */
2671         if (counter < 0)
2672                 counter = 0;
2673
2674         seq_printf(seq, "sockets: used %d\n", counter);
2675 }
2676 #endif                          /* CONFIG_PROC_FS */
2677
2678 #ifdef CONFIG_COMPAT
2679 static int do_siocgstamp(struct net *net, struct socket *sock,
2680                          unsigned int cmd, void __user *up)
2681 {
2682         mm_segment_t old_fs = get_fs();
2683         struct timeval ktv;
2684         int err;
2685
2686         set_fs(KERNEL_DS);
2687         err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
2688         set_fs(old_fs);
2689         if (!err)
2690                 err = compat_put_timeval(&ktv, up);
2691
2692         return err;
2693 }
2694
2695 static int do_siocgstampns(struct net *net, struct socket *sock,
2696                            unsigned int cmd, void __user *up)
2697 {
2698         mm_segment_t old_fs = get_fs();
2699         struct timespec kts;
2700         int err;
2701
2702         set_fs(KERNEL_DS);
2703         err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
2704         set_fs(old_fs);
2705         if (!err)
2706                 err = compat_put_timespec(&kts, up);
2707
2708         return err;
2709 }
2710
2711 static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
2712 {
2713         struct ifreq __user *uifr;
2714         int err;
2715
2716         uifr = compat_alloc_user_space(sizeof(struct ifreq));
2717         if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2718                 return -EFAULT;
2719
2720         err = dev_ioctl(net, SIOCGIFNAME, uifr);
2721         if (err)
2722                 return err;
2723
2724         if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
2725                 return -EFAULT;
2726
2727         return 0;
2728 }
2729
2730 static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
2731 {
2732         struct compat_ifconf ifc32;
2733         struct ifconf ifc;
2734         struct ifconf __user *uifc;
2735         struct compat_ifreq __user *ifr32;
2736         struct ifreq __user *ifr;
2737         unsigned int i, j;
2738         int err;
2739
2740         if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
2741                 return -EFAULT;
2742
2743         memset(&ifc, 0, sizeof(ifc));
2744         if (ifc32.ifcbuf == 0) {
2745                 ifc32.ifc_len = 0;
2746                 ifc.ifc_len = 0;
2747                 ifc.ifc_req = NULL;
2748                 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2749         } else {
2750                 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2751                         sizeof(struct ifreq);
2752                 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2753                 ifc.ifc_len = len;
2754                 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2755                 ifr32 = compat_ptr(ifc32.ifcbuf);
2756                 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
2757                         if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
2758                                 return -EFAULT;
2759                         ifr++;
2760                         ifr32++;
2761                 }
2762         }
2763         if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2764                 return -EFAULT;
2765
2766         err = dev_ioctl(net, SIOCGIFCONF, uifc);
2767         if (err)
2768                 return err;
2769
2770         if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2771                 return -EFAULT;
2772
2773         ifr = ifc.ifc_req;
2774         ifr32 = compat_ptr(ifc32.ifcbuf);
2775         for (i = 0, j = 0;
2776              i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2777              i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2778                 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
2779                         return -EFAULT;
2780                 ifr32++;
2781                 ifr++;
2782         }
2783
2784         if (ifc32.ifcbuf == 0) {
2785                 /* Translate from 64-bit structure multiple to
2786                  * a 32-bit one.
2787                  */
2788                 i = ifc.ifc_len;
2789                 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
2790                 ifc32.ifc_len = i;
2791         } else {
2792                 ifc32.ifc_len = i;
2793         }
2794         if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
2795                 return -EFAULT;
2796
2797         return 0;
2798 }
2799
2800 static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
2801 {
2802         struct compat_ethtool_rxnfc __user *compat_rxnfc;
2803         bool convert_in = false, convert_out = false;
2804         size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2805         struct ethtool_rxnfc __user *rxnfc;
2806         struct ifreq __user *ifr;
2807         u32 rule_cnt = 0, actual_rule_cnt;
2808         u32 ethcmd;
2809         u32 data;
2810         int ret;
2811
2812         if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2813                 return -EFAULT;
2814
2815         compat_rxnfc = compat_ptr(data);
2816
2817         if (get_user(ethcmd, &compat_rxnfc->cmd))
2818                 return -EFAULT;
2819
2820         /* Most ethtool structures are defined without padding.
2821          * Unfortunately struct ethtool_rxnfc is an exception.
2822          */
2823         switch (ethcmd) {
2824         default:
2825                 break;
2826         case ETHTOOL_GRXCLSRLALL:
2827                 /* Buffer size is variable */
2828                 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2829                         return -EFAULT;
2830                 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2831                         return -ENOMEM;
2832                 buf_size += rule_cnt * sizeof(u32);
2833                 /* fall through */
2834         case ETHTOOL_GRXRINGS:
2835         case ETHTOOL_GRXCLSRLCNT:
2836         case ETHTOOL_GRXCLSRULE:
2837         case ETHTOOL_SRXCLSRLINS:
2838                 convert_out = true;
2839                 /* fall through */
2840         case ETHTOOL_SRXCLSRLDEL:
2841                 buf_size += sizeof(struct ethtool_rxnfc);
2842                 convert_in = true;
2843                 break;
2844         }
2845
2846         ifr = compat_alloc_user_space(buf_size);
2847         rxnfc = (void *)ifr + ALIGN(sizeof(struct ifreq), 8);
2848
2849         if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2850                 return -EFAULT;
2851
2852         if (put_user(convert_in ? rxnfc : compat_ptr(data),
2853                      &ifr->ifr_ifru.ifru_data))
2854                 return -EFAULT;
2855
2856         if (convert_in) {
2857                 /* We expect there to be holes between fs.m_ext and
2858                  * fs.ring_cookie and at the end of fs, but nowhere else.
2859                  */
2860                 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2861                              sizeof(compat_rxnfc->fs.m_ext) !=
2862                              offsetof(struct ethtool_rxnfc, fs.m_ext) +
2863                              sizeof(rxnfc->fs.m_ext));
2864                 BUILD_BUG_ON(
2865                         offsetof(struct compat_ethtool_rxnfc, fs.location) -
2866                         offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2867                         offsetof(struct ethtool_rxnfc, fs.location) -
2868                         offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2869
2870                 if (copy_in_user(rxnfc, compat_rxnfc,
2871                                  (void *)(&rxnfc->fs.m_ext + 1) -
2872                                  (void *)rxnfc) ||
2873                     copy_in_user(&rxnfc->fs.ring_cookie,
2874                                  &compat_rxnfc->fs.ring_cookie,
2875                                  (void *)(&rxnfc->fs.location + 1) -
2876                                  (void *)&rxnfc->fs.ring_cookie) ||
2877                     copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2878                                  sizeof(rxnfc->rule_cnt)))
2879                         return -EFAULT;
2880         }
2881
2882         ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2883         if (ret)
2884                 return ret;
2885
2886         if (convert_out) {
2887                 if (copy_in_user(compat_rxnfc, rxnfc,
2888                                  (const void *)(&rxnfc->fs.m_ext + 1) -
2889                                  (const void *)rxnfc) ||
2890                     copy_in_user(&compat_rxnfc->fs.ring_cookie,
2891                                  &rxnfc->fs.ring_cookie,
2892                                  (const void *)(&rxnfc->fs.location + 1) -
2893                                  (const void *)&rxnfc->fs.ring_cookie) ||
2894                     copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2895                                  sizeof(rxnfc->rule_cnt)))
2896                         return -EFAULT;
2897
2898                 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2899                         /* As an optimisation, we only copy the actual
2900                          * number of rules that the underlying
2901                          * function returned.  Since Mallory might
2902                          * change the rule count in user memory, we
2903                          * check that it is less than the rule count
2904                          * originally given (as the user buffer size),
2905                          * which has been range-checked.
2906                          */
2907                         if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2908                                 return -EFAULT;
2909                         if (actual_rule_cnt < rule_cnt)
2910                                 rule_cnt = actual_rule_cnt;
2911                         if (copy_in_user(&compat_rxnfc->rule_locs[0],
2912                                          &rxnfc->rule_locs[0],
2913                                          rule_cnt * sizeof(u32)))
2914                                 return -EFAULT;
2915                 }
2916         }
2917
2918         return 0;
2919 }
2920
2921 static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2922 {
2923         void __user *uptr;
2924         compat_uptr_t uptr32;
2925         struct ifreq __user *uifr;
2926
2927         uifr = compat_alloc_user_space(sizeof(*uifr));
2928         if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2929                 return -EFAULT;
2930
2931         if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2932                 return -EFAULT;
2933
2934         uptr = compat_ptr(uptr32);
2935
2936         if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2937                 return -EFAULT;
2938
2939         return dev_ioctl(net, SIOCWANDEV, uifr);
2940 }
2941
2942 static int bond_ioctl(struct net *net, unsigned int cmd,
2943                          struct compat_ifreq __user *ifr32)
2944 {
2945         struct ifreq kifr;
2946         struct ifreq __user *uifr;
2947         mm_segment_t old_fs;
2948         int err;
2949         u32 data;
2950         void __user *datap;
2951
2952         switch (cmd) {
2953         case SIOCBONDENSLAVE:
2954         case SIOCBONDRELEASE:
2955         case SIOCBONDSETHWADDR:
2956         case SIOCBONDCHANGEACTIVE:
2957                 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
2958                         return -EFAULT;
2959
2960                 old_fs = get_fs();
2961                 set_fs(KERNEL_DS);
2962                 err = dev_ioctl(net, cmd,
2963                                 (struct ifreq __user __force *) &kifr);
2964                 set_fs(old_fs);
2965
2966                 return err;
2967         case SIOCBONDSLAVEINFOQUERY:
2968         case SIOCBONDINFOQUERY:
2969                 uifr = compat_alloc_user_space(sizeof(*uifr));
2970                 if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2971                         return -EFAULT;
2972
2973                 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2974                         return -EFAULT;
2975
2976                 datap = compat_ptr(data);
2977                 if (put_user(datap, &uifr->ifr_ifru.ifru_data))
2978                         return -EFAULT;
2979
2980                 return dev_ioctl(net, cmd, uifr);
2981         default:
2982                 return -ENOIOCTLCMD;
2983         }
2984 }
2985
2986 static int siocdevprivate_ioctl(struct net *net, unsigned int cmd,
2987                                  struct compat_ifreq __user *u_ifreq32)
2988 {
2989         struct ifreq __user *u_ifreq64;
2990         char tmp_buf[IFNAMSIZ];
2991         void __user *data64;
2992         u32 data32;
2993
2994         if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2995                            IFNAMSIZ))
2996                 return -EFAULT;
2997         if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
2998                 return -EFAULT;
2999         data64 = compat_ptr(data32);
3000
3001         u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
3002
3003         /* Don't check these user accesses, just let that get trapped
3004          * in the ioctl handler instead.
3005          */
3006         if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
3007                          IFNAMSIZ))
3008                 return -EFAULT;
3009         if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
3010                 return -EFAULT;
3011
3012         return dev_ioctl(net, cmd, u_ifreq64);
3013 }
3014
3015 static int dev_ifsioc(struct net *net, struct socket *sock,
3016                          unsigned int cmd, struct compat_ifreq __user *uifr32)
3017 {
3018         struct ifreq __user *uifr;
3019         int err;
3020
3021         uifr = compat_alloc_user_space(sizeof(*uifr));
3022         if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3023                 return -EFAULT;
3024
3025         err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3026
3027         if (!err) {
3028                 switch (cmd) {
3029                 case SIOCGIFFLAGS:
3030                 case SIOCGIFMETRIC:
3031                 case SIOCGIFMTU:
3032                 case SIOCGIFMEM:
3033                 case SIOCGIFHWADDR:
3034                 case SIOCGIFINDEX:
3035                 case SIOCGIFADDR:
3036                 case SIOCGIFBRDADDR:
3037                 case SIOCGIFDSTADDR:
3038                 case SIOCGIFNETMASK:
3039                 case SIOCGIFPFLAGS:
3040                 case SIOCGIFTXQLEN:
3041                 case SIOCGMIIPHY:
3042                 case SIOCGMIIREG:
3043                         if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
3044                                 err = -EFAULT;
3045                         break;
3046                 }
3047         }
3048         return err;
3049 }
3050
3051 static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3052                         struct compat_ifreq __user *uifr32)
3053 {
3054         struct ifreq ifr;
3055         struct compat_ifmap __user *uifmap32;
3056         mm_segment_t old_fs;
3057         int err;
3058
3059         uifmap32 = &uifr32->ifr_ifru.ifru_map;
3060         err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3061         err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3062         err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3063         err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3064         err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq);
3065         err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma);
3066         err |= __get_user(ifr.ifr_map.port, &uifmap32->port);
3067         if (err)
3068                 return -EFAULT;
3069
3070         old_fs = get_fs();
3071         set_fs(KERNEL_DS);
3072         err = dev_ioctl(net, cmd, (void  __user __force *)&ifr);
3073         set_fs(old_fs);
3074
3075         if (cmd == SIOCGIFMAP && !err) {
3076                 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3077                 err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3078                 err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3079                 err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3080                 err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq);
3081                 err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma);
3082                 err |= __put_user(ifr.ifr_map.port, &uifmap32->port);
3083                 if (err)
3084                         err = -EFAULT;
3085         }
3086         return err;
3087 }
3088
3089 static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uifr32)
3090 {
3091         void __user *uptr;
3092         compat_uptr_t uptr32;
3093         struct ifreq __user *uifr;
3094
3095         uifr = compat_alloc_user_space(sizeof(*uifr));
3096         if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
3097                 return -EFAULT;
3098
3099         if (get_user(uptr32, &uifr32->ifr_data))
3100                 return -EFAULT;
3101
3102         uptr = compat_ptr(uptr32);
3103
3104         if (put_user(uptr, &uifr->ifr_data))
3105                 return -EFAULT;
3106
3107         return dev_ioctl(net, SIOCSHWTSTAMP, uifr);
3108 }
3109
3110 struct rtentry32 {
3111         u32             rt_pad1;
3112         struct sockaddr rt_dst;         /* target address               */
3113         struct sockaddr rt_gateway;     /* gateway addr (RTF_GATEWAY)   */
3114         struct sockaddr rt_genmask;     /* target network mask (IP)     */
3115         unsigned short  rt_flags;
3116         short           rt_pad2;
3117         u32             rt_pad3;
3118         unsigned char   rt_tos;
3119         unsigned char   rt_class;
3120         short           rt_pad4;
3121         short           rt_metric;      /* +1 for binary compatibility! */
3122         /* char * */ u32 rt_dev;        /* forcing the device at add    */
3123         u32             rt_mtu;         /* per route MTU/Window         */
3124         u32             rt_window;      /* Window clamping              */
3125         unsigned short  rt_irtt;        /* Initial RTT                  */
3126 };
3127
3128 struct in6_rtmsg32 {
3129         struct in6_addr         rtmsg_dst;
3130         struct in6_addr         rtmsg_src;
3131         struct in6_addr         rtmsg_gateway;
3132         u32                     rtmsg_type;
3133         u16                     rtmsg_dst_len;
3134         u16                     rtmsg_src_len;
3135         u32                     rtmsg_metric;
3136         u32                     rtmsg_info;
3137         u32                     rtmsg_flags;
3138         s32                     rtmsg_ifindex;
3139 };
3140
3141 static int routing_ioctl(struct net *net, struct socket *sock,
3142                          unsigned int cmd, void __user *argp)
3143 {
3144         int ret;
3145         void *r = NULL;
3146         struct in6_rtmsg r6;
3147         struct rtentry r4;
3148         char devname[16];
3149         u32 rtdev;
3150         mm_segment_t old_fs = get_fs();
3151
3152         if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3153                 struct in6_rtmsg32 __user *ur6 = argp;
3154                 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
3155                         3 * sizeof(struct in6_addr));
3156                 ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3157                 ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3158                 ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3159                 ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3160                 ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3161                 ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3162                 ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
3163
3164                 r = (void *) &r6;
3165         } else { /* ipv4 */
3166                 struct rtentry32 __user *ur4 = argp;
3167                 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
3168                                         3 * sizeof(struct sockaddr));
3169                 ret |= __get_user(r4.rt_flags, &(ur4->rt_flags));
3170                 ret |= __get_user(r4.rt_metric, &(ur4->rt_metric));
3171                 ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu));
3172                 ret |= __get_user(r4.rt_window, &(ur4->rt_window));
3173                 ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt));
3174                 ret |= __get_user(rtdev, &(ur4->rt_dev));
3175                 if (rtdev) {
3176                         ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
3177                         r4.rt_dev = (char __user __force *)devname;
3178                         devname[15] = 0;
3179                 } else
3180                         r4.rt_dev = NULL;
3181
3182                 r = (void *) &r4;
3183         }
3184
3185         if (ret) {
3186                 ret = -EFAULT;
3187                 goto out;
3188         }
3189
3190         set_fs(KERNEL_DS);
3191         ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
3192         set_fs(old_fs);
3193
3194 out:
3195         return ret;
3196 }
3197
3198 /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3199  * for some operations; this forces use of the newer bridge-utils that
3200  * use compatible ioctls
3201  */
3202 static int old_bridge_ioctl(compat_ulong_t __user *argp)
3203 {
3204         compat_ulong_t tmp;
3205
3206         if (get_user(tmp, argp))
3207                 return -EFAULT;
3208         if (tmp == BRCTL_GET_VERSION)
3209                 return BRCTL_VERSION + 1;
3210         return -EINVAL;
3211 }
3212
3213 static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3214                          unsigned int cmd, unsigned long arg)
3215 {
3216         void __user *argp = compat_ptr(arg);
3217         struct sock *sk = sock->sk;
3218         struct net *net = sock_net(sk);
3219
3220         if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
3221                 return siocdevprivate_ioctl(net, cmd, argp);
3222
3223         switch (cmd) {
3224         case SIOCSIFBR:
3225         case SIOCGIFBR:
3226                 return old_bridge_ioctl(argp);
3227         case SIOCGIFNAME:
3228                 return dev_ifname32(net, argp);
3229         case SIOCGIFCONF:
3230                 return dev_ifconf(net, argp);
3231         case SIOCETHTOOL:
3232                 return ethtool_ioctl(net, argp);
3233         case SIOCWANDEV:
3234                 return compat_siocwandev(net, argp);
3235         case SIOCGIFMAP:
3236         case SIOCSIFMAP:
3237                 return compat_sioc_ifmap(net, cmd, argp);
3238         case SIOCBONDENSLAVE:
3239         case SIOCBONDRELEASE:
3240         case SIOCBONDSETHWADDR:
3241         case SIOCBONDSLAVEINFOQUERY:
3242         case SIOCBONDINFOQUERY:
3243         case SIOCBONDCHANGEACTIVE:
3244                 return bond_ioctl(net, cmd, argp);
3245         case SIOCADDRT:
3246         case SIOCDELRT:
3247                 return routing_ioctl(net, sock, cmd, argp);
3248         case SIOCGSTAMP:
3249                 return do_siocgstamp(net, sock, cmd, argp);
3250         case SIOCGSTAMPNS:
3251                 return do_siocgstampns(net, sock, cmd, argp);
3252         case SIOCSHWTSTAMP:
3253                 return compat_siocshwtstamp(net, argp);
3254
3255         case FIOSETOWN:
3256         case SIOCSPGRP:
3257         case FIOGETOWN:
3258         case SIOCGPGRP:
3259         case SIOCBRADDBR:
3260         case SIOCBRDELBR:
3261         case SIOCGIFVLAN:
3262         case SIOCSIFVLAN:
3263         case SIOCADDDLCI:
3264         case SIOCDELDLCI:
3265                 return sock_ioctl(file, cmd, arg);
3266
3267         case SIOCGIFFLAGS:
3268         case SIOCSIFFLAGS:
3269         case SIOCGIFMETRIC:
3270         case SIOCSIFMETRIC:
3271         case SIOCGIFMTU:
3272         case SIOCSIFMTU:
3273         case SIOCGIFMEM:
3274         case SIOCSIFMEM:
3275         case SIOCGIFHWADDR:
3276         case SIOCSIFHWADDR:
3277         case SIOCADDMULTI:
3278         case SIOCDELMULTI:
3279         case SIOCGIFINDEX:
3280         case SIOCGIFADDR:
3281         case SIOCSIFADDR:
3282         case SIOCSIFHWBROADCAST:
3283         case SIOCDIFADDR:
3284         case SIOCGIFBRDADDR:
3285         case SIOCSIFBRDADDR:
3286         case SIOCGIFDSTADDR:
3287         case SIOCSIFDSTADDR:
3288         case SIOCGIFNETMASK:
3289         case SIOCSIFNETMASK:
3290         case SIOCSIFPFLAGS:
3291         case SIOCGIFPFLAGS:
3292         case SIOCGIFTXQLEN:
3293         case SIOCSIFTXQLEN:
3294         case SIOCBRADDIF:
3295         case SIOCBRDELIF:
3296         case SIOCSIFNAME:
3297         case SIOCGMIIPHY:
3298         case SIOCGMIIREG:
3299         case SIOCSMIIREG:
3300                 return dev_ifsioc(net, sock, cmd, argp);
3301
3302         case SIOCSARP:
3303         case SIOCGARP:
3304         case SIOCDARP:
3305         case SIOCATMARK:
3306                 return sock_do_ioctl(net, sock, cmd, arg);
3307         }
3308
3309         return -ENOIOCTLCMD;
3310 }
3311
3312 static long compat_sock_ioctl(struct file *file, unsigned int cmd,
3313                               unsigned long arg)
3314 {
3315         struct socket *sock = file->private_data;
3316         int ret = -ENOIOCTLCMD;
3317         struct sock *sk;
3318         struct net *net;
3319
3320         sk = sock->sk;
3321         net = sock_net(sk);
3322
3323         if (sock->ops->compat_ioctl)
3324                 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3325
3326         if (ret == -ENOIOCTLCMD &&
3327             (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3328                 ret = compat_wext_handle_ioctl(net, cmd, arg);
3329
3330         if (ret == -ENOIOCTLCMD)
3331                 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3332
3333         return ret;
3334 }
3335 #endif
3336
3337 int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3338 {
3339         return sock->ops->bind(sock, addr, addrlen);
3340 }
3341 EXPORT_SYMBOL(kernel_bind);
3342
3343 int kernel_listen(struct socket *sock, int backlog)
3344 {
3345         return sock->ops->listen(sock, backlog);
3346 }
3347 EXPORT_SYMBOL(kernel_listen);
3348
3349 int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3350 {
3351         struct sock *sk = sock->sk;
3352         int err;
3353
3354         err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3355                                newsock);
3356         if (err < 0)
3357                 goto done;
3358
3359         err = sock->ops->accept(sock, *newsock, flags);
3360         if (err < 0) {
3361                 sock_release(*newsock);
3362                 *newsock = NULL;
3363                 goto done;
3364         }
3365
3366         (*newsock)->ops = sock->ops;
3367         __module_get((*newsock)->ops->owner);
3368
3369 done:
3370         return err;
3371 }
3372 EXPORT_SYMBOL(kernel_accept);
3373
3374 int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
3375                    int flags)
3376 {
3377         return sock->ops->connect(sock, addr, addrlen, flags);
3378 }
3379 EXPORT_SYMBOL(kernel_connect);
3380
3381 int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3382                          int *addrlen)
3383 {
3384         return sock->ops->getname(sock, addr, addrlen, 0);
3385 }
3386 EXPORT_SYMBOL(kernel_getsockname);
3387
3388 int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3389                          int *addrlen)
3390 {
3391         return sock->ops->getname(sock, addr, addrlen, 1);
3392 }
3393 EXPORT_SYMBOL(kernel_getpeername);
3394
3395 int kernel_getsockopt(struct socket *sock, int level, int optname,
3396                         char *optval, int *optlen)
3397 {
3398         mm_segment_t oldfs = get_fs();
3399         char __user *uoptval;
3400         int __user *uoptlen;
3401         int err;
3402
3403         uoptval = (char __user __force *) optval;
3404         uoptlen = (int __user __force *) optlen;
3405
3406         set_fs(KERNEL_DS);
3407         if (level == SOL_SOCKET)
3408                 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
3409         else
3410                 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3411                                             uoptlen);
3412         set_fs(oldfs);
3413         return err;
3414 }
3415 EXPORT_SYMBOL(kernel_getsockopt);
3416
3417 int kernel_setsockopt(struct socket *sock, int level, int optname,
3418                         char *optval, unsigned int optlen)
3419 {
3420         mm_segment_t oldfs = get_fs();
3421         char __user *uoptval;
3422         int err;
3423
3424         uoptval = (char __user __force *) optval;
3425
3426         set_fs(KERNEL_DS);
3427         if (level == SOL_SOCKET)
3428                 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
3429         else
3430                 err = sock->ops->setsockopt(sock, level, optname, uoptval,
3431                                             optlen);
3432         set_fs(oldfs);
3433         return err;
3434 }
3435 EXPORT_SYMBOL(kernel_setsockopt);
3436
3437 int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3438                     size_t size, int flags)
3439 {
3440         sock_update_classid(sock->sk);
3441
3442         if (sock->ops->sendpage)
3443                 return sock->ops->sendpage(sock, page, offset, size, flags);
3444
3445         return sock_no_sendpage(sock, page, offset, size, flags);
3446 }
3447 EXPORT_SYMBOL(kernel_sendpage);
3448
3449 int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3450 {
3451         mm_segment_t oldfs = get_fs();
3452         int err;
3453
3454         set_fs(KERNEL_DS);
3455         err = sock->ops->ioctl(sock, cmd, arg);
3456         set_fs(oldfs);
3457
3458         return err;
3459 }
3460 EXPORT_SYMBOL(kernel_sock_ioctl);
3461
3462 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3463 {
3464         return sock->ops->shutdown(sock, how);
3465 }
3466 EXPORT_SYMBOL(kernel_sock_shutdown);