Merge branch 'for-linus' of git://oss.sgi.com:8090/xfs/xfs-2.6
[platform/adaptation/renesas_rcar/renesas_kernel.git] / net / core / sock.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              Generic socket support routines. Memory allocators, socket lock/release
7  *              handler for protocols to use and generic option handler.
8  *
9  *
10  * Version:     $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11  *
12  * Authors:     Ross Biro
13  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *              Florian La Roche, <flla@stud.uni-sb.de>
15  *              Alan Cox, <A.Cox@swansea.ac.uk>
16  *
17  * Fixes:
18  *              Alan Cox        :       Numerous verify_area() problems
19  *              Alan Cox        :       Connecting on a connecting socket
20  *                                      now returns an error for tcp.
21  *              Alan Cox        :       sock->protocol is set correctly.
22  *                                      and is not sometimes left as 0.
23  *              Alan Cox        :       connect handles icmp errors on a
24  *                                      connect properly. Unfortunately there
25  *                                      is a restart syscall nasty there. I
26  *                                      can't match BSD without hacking the C
27  *                                      library. Ideas urgently sought!
28  *              Alan Cox        :       Disallow bind() to addresses that are
29  *                                      not ours - especially broadcast ones!!
30  *              Alan Cox        :       Socket 1024 _IS_ ok for users. (fencepost)
31  *              Alan Cox        :       sock_wfree/sock_rfree don't destroy sockets,
32  *                                      instead they leave that for the DESTROY timer.
33  *              Alan Cox        :       Clean up error flag in accept
34  *              Alan Cox        :       TCP ack handling is buggy, the DESTROY timer
35  *                                      was buggy. Put a remove_sock() in the handler
36  *                                      for memory when we hit 0. Also altered the timer
37  *                                      code. The ACK stuff can wait and needs major
38  *                                      TCP layer surgery.
39  *              Alan Cox        :       Fixed TCP ack bug, removed remove sock
40  *                                      and fixed timer/inet_bh race.
41  *              Alan Cox        :       Added zapped flag for TCP
42  *              Alan Cox        :       Move kfree_skb into skbuff.c and tidied up surplus code
43  *              Alan Cox        :       for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44  *              Alan Cox        :       kfree_s calls now are kfree_skbmem so we can track skb resources
45  *              Alan Cox        :       Supports socket option broadcast now as does udp. Packet and raw need fixing.
46  *              Alan Cox        :       Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47  *              Rick Sladkey    :       Relaxed UDP rules for matching packets.
48  *              C.E.Hawkins     :       IFF_PROMISC/SIOCGHWADDR support
49  *      Pauline Middelink       :       identd support
50  *              Alan Cox        :       Fixed connect() taking signals I think.
51  *              Alan Cox        :       SO_LINGER supported
52  *              Alan Cox        :       Error reporting fixes
53  *              Anonymous       :       inet_create tidied up (sk->reuse setting)
54  *              Alan Cox        :       inet sockets don't set sk->type!
55  *              Alan Cox        :       Split socket option code
56  *              Alan Cox        :       Callbacks
57  *              Alan Cox        :       Nagle flag for Charles & Johannes stuff
58  *              Alex            :       Removed restriction on inet fioctl
59  *              Alan Cox        :       Splitting INET from NET core
60  *              Alan Cox        :       Fixed bogus SO_TYPE handling in getsockopt()
61  *              Adam Caldwell   :       Missing return in SO_DONTROUTE/SO_DEBUG code
62  *              Alan Cox        :       Split IP from generic code
63  *              Alan Cox        :       New kfree_skbmem()
64  *              Alan Cox        :       Make SO_DEBUG superuser only.
65  *              Alan Cox        :       Allow anyone to clear SO_DEBUG
66  *                                      (compatibility fix)
67  *              Alan Cox        :       Added optimistic memory grabbing for AF_UNIX throughput.
68  *              Alan Cox        :       Allocator for a socket is settable.
69  *              Alan Cox        :       SO_ERROR includes soft errors.
70  *              Alan Cox        :       Allow NULL arguments on some SO_ opts
71  *              Alan Cox        :       Generic socket allocation to make hooks
72  *                                      easier (suggested by Craig Metz).
73  *              Michael Pall    :       SO_ERROR returns positive errno again
74  *              Steve Whitehouse:       Added default destructor to free
75  *                                      protocol private data.
76  *              Steve Whitehouse:       Added various other default routines
77  *                                      common to several socket families.
78  *              Chris Evans     :       Call suser() check last on F_SETOWN
79  *              Jay Schulist    :       Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80  *              Andi Kleen      :       Add sock_kmalloc()/sock_kfree_s()
81  *              Andi Kleen      :       Fix write_space callback
82  *              Chris Evans     :       Security fixes - signedness again
83  *              Arnaldo C. Melo :       cleanups, use skb_queue_purge
84  *
85  * To Fix:
86  *
87  *
88  *              This program is free software; you can redistribute it and/or
89  *              modify it under the terms of the GNU General Public License
90  *              as published by the Free Software Foundation; either version
91  *              2 of the License, or (at your option) any later version.
92  */
93
94 #include <linux/capability.h>
95 #include <linux/errno.h>
96 #include <linux/types.h>
97 #include <linux/socket.h>
98 #include <linux/in.h>
99 #include <linux/kernel.h>
100 #include <linux/module.h>
101 #include <linux/proc_fs.h>
102 #include <linux/seq_file.h>
103 #include <linux/sched.h>
104 #include <linux/timer.h>
105 #include <linux/string.h>
106 #include <linux/sockios.h>
107 #include <linux/net.h>
108 #include <linux/mm.h>
109 #include <linux/slab.h>
110 #include <linux/interrupt.h>
111 #include <linux/poll.h>
112 #include <linux/tcp.h>
113 #include <linux/init.h>
114 #include <linux/highmem.h>
115
116 #include <asm/uaccess.h>
117 #include <asm/system.h>
118
119 #include <linux/netdevice.h>
120 #include <net/protocol.h>
121 #include <linux/skbuff.h>
122 #include <net/request_sock.h>
123 #include <net/sock.h>
124 #include <net/xfrm.h>
125 #include <linux/ipsec.h>
126
127 #include <linux/filter.h>
128
129 #ifdef CONFIG_INET
130 #include <net/tcp.h>
131 #endif
132
133 /*
134  * Each address family might have different locking rules, so we have
135  * one slock key per address family:
136  */
137 static struct lock_class_key af_family_keys[AF_MAX];
138 static struct lock_class_key af_family_slock_keys[AF_MAX];
139
140 #ifdef CONFIG_DEBUG_LOCK_ALLOC
141 /*
142  * Make lock validator output more readable. (we pre-construct these
143  * strings build-time, so that runtime initialization of socket
144  * locks is fast):
145  */
146 static const char *af_family_key_strings[AF_MAX+1] = {
147   "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
148   "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
149   "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
150   "sk_lock-AF_X25"   , "sk_lock-AF_INET6"    , "sk_lock-AF_ROSE"     ,
151   "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" ,
152   "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   ,
153   "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   ,
154   "sk_lock-21"       , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
155   "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
156   "sk_lock-27"       , "sk_lock-28"          , "sk_lock-29"          ,
157   "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
158   "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
159 };
160 static const char *af_family_slock_key_strings[AF_MAX+1] = {
161   "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
162   "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
163   "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
164   "slock-AF_X25"   , "slock-AF_INET6"    , "slock-AF_ROSE"     ,
165   "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" ,
166   "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   ,
167   "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   ,
168   "slock-21"       , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
169   "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
170   "slock-27"       , "slock-28"          , "slock-29"          ,
171   "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
172   "slock-AF_RXRPC" , "slock-AF_MAX"
173 };
174 #endif
175
176 /*
177  * sk_callback_lock locking rules are per-address-family,
178  * so split the lock classes by using a per-AF key:
179  */
180 static struct lock_class_key af_callback_keys[AF_MAX];
181
182 /* Take into consideration the size of the struct sk_buff overhead in the
183  * determination of these values, since that is non-constant across
184  * platforms.  This makes socket queueing behavior and performance
185  * not depend upon such differences.
186  */
187 #define _SK_MEM_PACKETS         256
188 #define _SK_MEM_OVERHEAD        (sizeof(struct sk_buff) + 256)
189 #define SK_WMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
190 #define SK_RMEM_MAX             (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
191
192 /* Run time adjustable parameters. */
193 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
194 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
195 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
196 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
197
198 /* Maximal space eaten by iovec or ancilliary data plus some space */
199 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
200
201 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
202 {
203         struct timeval tv;
204
205         if (optlen < sizeof(tv))
206                 return -EINVAL;
207         if (copy_from_user(&tv, optval, sizeof(tv)))
208                 return -EFAULT;
209         if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
210                 return -EDOM;
211
212         if (tv.tv_sec < 0) {
213                 static int warned __read_mostly;
214
215                 *timeo_p = 0;
216                 if (warned < 10 && net_ratelimit())
217                         warned++;
218                         printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
219                                "tries to set negative timeout\n",
220                                 current->comm, current->pid);
221                 return 0;
222         }
223         *timeo_p = MAX_SCHEDULE_TIMEOUT;
224         if (tv.tv_sec == 0 && tv.tv_usec == 0)
225                 return 0;
226         if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
227                 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
228         return 0;
229 }
230
231 static void sock_warn_obsolete_bsdism(const char *name)
232 {
233         static int warned;
234         static char warncomm[TASK_COMM_LEN];
235         if (strcmp(warncomm, current->comm) && warned < 5) {
236                 strcpy(warncomm,  current->comm);
237                 printk(KERN_WARNING "process `%s' is using obsolete "
238                        "%s SO_BSDCOMPAT\n", warncomm, name);
239                 warned++;
240         }
241 }
242
243 static void sock_disable_timestamp(struct sock *sk)
244 {
245         if (sock_flag(sk, SOCK_TIMESTAMP)) {
246                 sock_reset_flag(sk, SOCK_TIMESTAMP);
247                 net_disable_timestamp();
248         }
249 }
250
251
252 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
253 {
254         int err = 0;
255         int skb_len;
256
257         /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
258            number of warnings when compiling with -W --ANK
259          */
260         if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
261             (unsigned)sk->sk_rcvbuf) {
262                 err = -ENOMEM;
263                 goto out;
264         }
265
266         err = sk_filter(sk, skb);
267         if (err)
268                 goto out;
269
270         skb->dev = NULL;
271         skb_set_owner_r(skb, sk);
272
273         /* Cache the SKB length before we tack it onto the receive
274          * queue.  Once it is added it no longer belongs to us and
275          * may be freed by other threads of control pulling packets
276          * from the queue.
277          */
278         skb_len = skb->len;
279
280         skb_queue_tail(&sk->sk_receive_queue, skb);
281
282         if (!sock_flag(sk, SOCK_DEAD))
283                 sk->sk_data_ready(sk, skb_len);
284 out:
285         return err;
286 }
287 EXPORT_SYMBOL(sock_queue_rcv_skb);
288
289 int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
290 {
291         int rc = NET_RX_SUCCESS;
292
293         if (sk_filter(sk, skb))
294                 goto discard_and_relse;
295
296         skb->dev = NULL;
297
298         if (nested)
299                 bh_lock_sock_nested(sk);
300         else
301                 bh_lock_sock(sk);
302         if (!sock_owned_by_user(sk)) {
303                 /*
304                  * trylock + unlock semantics:
305                  */
306                 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
307
308                 rc = sk->sk_backlog_rcv(sk, skb);
309
310                 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
311         } else
312                 sk_add_backlog(sk, skb);
313         bh_unlock_sock(sk);
314 out:
315         sock_put(sk);
316         return rc;
317 discard_and_relse:
318         kfree_skb(skb);
319         goto out;
320 }
321 EXPORT_SYMBOL(sk_receive_skb);
322
323 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
324 {
325         struct dst_entry *dst = sk->sk_dst_cache;
326
327         if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
328                 sk->sk_dst_cache = NULL;
329                 dst_release(dst);
330                 return NULL;
331         }
332
333         return dst;
334 }
335 EXPORT_SYMBOL(__sk_dst_check);
336
337 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
338 {
339         struct dst_entry *dst = sk_dst_get(sk);
340
341         if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
342                 sk_dst_reset(sk);
343                 dst_release(dst);
344                 return NULL;
345         }
346
347         return dst;
348 }
349 EXPORT_SYMBOL(sk_dst_check);
350
351 /*
352  *      This is meant for all protocols to use and covers goings on
353  *      at the socket level. Everything here is generic.
354  */
355
356 int sock_setsockopt(struct socket *sock, int level, int optname,
357                     char __user *optval, int optlen)
358 {
359         struct sock *sk=sock->sk;
360         struct sk_filter *filter;
361         int val;
362         int valbool;
363         struct linger ling;
364         int ret = 0;
365
366         /*
367          *      Options without arguments
368          */
369
370 #ifdef SO_DONTLINGER            /* Compatibility item... */
371         if (optname == SO_DONTLINGER) {
372                 lock_sock(sk);
373                 sock_reset_flag(sk, SOCK_LINGER);
374                 release_sock(sk);
375                 return 0;
376         }
377 #endif
378
379         if (optlen < sizeof(int))
380                 return -EINVAL;
381
382         if (get_user(val, (int __user *)optval))
383                 return -EFAULT;
384
385         valbool = val?1:0;
386
387         lock_sock(sk);
388
389         switch(optname) {
390         case SO_DEBUG:
391                 if (val && !capable(CAP_NET_ADMIN)) {
392                         ret = -EACCES;
393                 }
394                 else if (valbool)
395                         sock_set_flag(sk, SOCK_DBG);
396                 else
397                         sock_reset_flag(sk, SOCK_DBG);
398                 break;
399         case SO_REUSEADDR:
400                 sk->sk_reuse = valbool;
401                 break;
402         case SO_TYPE:
403         case SO_ERROR:
404                 ret = -ENOPROTOOPT;
405                 break;
406         case SO_DONTROUTE:
407                 if (valbool)
408                         sock_set_flag(sk, SOCK_LOCALROUTE);
409                 else
410                         sock_reset_flag(sk, SOCK_LOCALROUTE);
411                 break;
412         case SO_BROADCAST:
413                 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
414                 break;
415         case SO_SNDBUF:
416                 /* Don't error on this BSD doesn't and if you think
417                    about it this is right. Otherwise apps have to
418                    play 'guess the biggest size' games. RCVBUF/SNDBUF
419                    are treated in BSD as hints */
420
421                 if (val > sysctl_wmem_max)
422                         val = sysctl_wmem_max;
423 set_sndbuf:
424                 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
425                 if ((val * 2) < SOCK_MIN_SNDBUF)
426                         sk->sk_sndbuf = SOCK_MIN_SNDBUF;
427                 else
428                         sk->sk_sndbuf = val * 2;
429
430                 /*
431                  *      Wake up sending tasks if we
432                  *      upped the value.
433                  */
434                 sk->sk_write_space(sk);
435                 break;
436
437         case SO_SNDBUFFORCE:
438                 if (!capable(CAP_NET_ADMIN)) {
439                         ret = -EPERM;
440                         break;
441                 }
442                 goto set_sndbuf;
443
444         case SO_RCVBUF:
445                 /* Don't error on this BSD doesn't and if you think
446                    about it this is right. Otherwise apps have to
447                    play 'guess the biggest size' games. RCVBUF/SNDBUF
448                    are treated in BSD as hints */
449
450                 if (val > sysctl_rmem_max)
451                         val = sysctl_rmem_max;
452 set_rcvbuf:
453                 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
454                 /*
455                  * We double it on the way in to account for
456                  * "struct sk_buff" etc. overhead.   Applications
457                  * assume that the SO_RCVBUF setting they make will
458                  * allow that much actual data to be received on that
459                  * socket.
460                  *
461                  * Applications are unaware that "struct sk_buff" and
462                  * other overheads allocate from the receive buffer
463                  * during socket buffer allocation.
464                  *
465                  * And after considering the possible alternatives,
466                  * returning the value we actually used in getsockopt
467                  * is the most desirable behavior.
468                  */
469                 if ((val * 2) < SOCK_MIN_RCVBUF)
470                         sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
471                 else
472                         sk->sk_rcvbuf = val * 2;
473                 break;
474
475         case SO_RCVBUFFORCE:
476                 if (!capable(CAP_NET_ADMIN)) {
477                         ret = -EPERM;
478                         break;
479                 }
480                 goto set_rcvbuf;
481
482         case SO_KEEPALIVE:
483 #ifdef CONFIG_INET
484                 if (sk->sk_protocol == IPPROTO_TCP)
485                         tcp_set_keepalive(sk, valbool);
486 #endif
487                 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
488                 break;
489
490         case SO_OOBINLINE:
491                 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
492                 break;
493
494         case SO_NO_CHECK:
495                 sk->sk_no_check = valbool;
496                 break;
497
498         case SO_PRIORITY:
499                 if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
500                         sk->sk_priority = val;
501                 else
502                         ret = -EPERM;
503                 break;
504
505         case SO_LINGER:
506                 if (optlen < sizeof(ling)) {
507                         ret = -EINVAL;  /* 1003.1g */
508                         break;
509                 }
510                 if (copy_from_user(&ling,optval,sizeof(ling))) {
511                         ret = -EFAULT;
512                         break;
513                 }
514                 if (!ling.l_onoff)
515                         sock_reset_flag(sk, SOCK_LINGER);
516                 else {
517 #if (BITS_PER_LONG == 32)
518                         if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
519                                 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
520                         else
521 #endif
522                                 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
523                         sock_set_flag(sk, SOCK_LINGER);
524                 }
525                 break;
526
527         case SO_BSDCOMPAT:
528                 sock_warn_obsolete_bsdism("setsockopt");
529                 break;
530
531         case SO_PASSCRED:
532                 if (valbool)
533                         set_bit(SOCK_PASSCRED, &sock->flags);
534                 else
535                         clear_bit(SOCK_PASSCRED, &sock->flags);
536                 break;
537
538         case SO_TIMESTAMP:
539         case SO_TIMESTAMPNS:
540                 if (valbool)  {
541                         if (optname == SO_TIMESTAMP)
542                                 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
543                         else
544                                 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
545                         sock_set_flag(sk, SOCK_RCVTSTAMP);
546                         sock_enable_timestamp(sk);
547                 } else {
548                         sock_reset_flag(sk, SOCK_RCVTSTAMP);
549                         sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
550                 }
551                 break;
552
553         case SO_RCVLOWAT:
554                 if (val < 0)
555                         val = INT_MAX;
556                 sk->sk_rcvlowat = val ? : 1;
557                 break;
558
559         case SO_RCVTIMEO:
560                 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
561                 break;
562
563         case SO_SNDTIMEO:
564                 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
565                 break;
566
567 #ifdef CONFIG_NETDEVICES
568         case SO_BINDTODEVICE:
569         {
570                 char devname[IFNAMSIZ];
571
572                 /* Sorry... */
573                 if (!capable(CAP_NET_RAW)) {
574                         ret = -EPERM;
575                         break;
576                 }
577
578                 /* Bind this socket to a particular device like "eth0",
579                  * as specified in the passed interface name. If the
580                  * name is "" or the option length is zero the socket
581                  * is not bound.
582                  */
583
584                 if (!valbool) {
585                         sk->sk_bound_dev_if = 0;
586                 } else {
587                         if (optlen > IFNAMSIZ - 1)
588                                 optlen = IFNAMSIZ - 1;
589                         memset(devname, 0, sizeof(devname));
590                         if (copy_from_user(devname, optval, optlen)) {
591                                 ret = -EFAULT;
592                                 break;
593                         }
594
595                         /* Remove any cached route for this socket. */
596                         sk_dst_reset(sk);
597
598                         if (devname[0] == '\0') {
599                                 sk->sk_bound_dev_if = 0;
600                         } else {
601                                 struct net_device *dev = dev_get_by_name(devname);
602                                 if (!dev) {
603                                         ret = -ENODEV;
604                                         break;
605                                 }
606                                 sk->sk_bound_dev_if = dev->ifindex;
607                                 dev_put(dev);
608                         }
609                 }
610                 break;
611         }
612 #endif
613
614
615         case SO_ATTACH_FILTER:
616                 ret = -EINVAL;
617                 if (optlen == sizeof(struct sock_fprog)) {
618                         struct sock_fprog fprog;
619
620                         ret = -EFAULT;
621                         if (copy_from_user(&fprog, optval, sizeof(fprog)))
622                                 break;
623
624                         ret = sk_attach_filter(&fprog, sk);
625                 }
626                 break;
627
628         case SO_DETACH_FILTER:
629                 rcu_read_lock_bh();
630                 filter = rcu_dereference(sk->sk_filter);
631                 if (filter) {
632                         rcu_assign_pointer(sk->sk_filter, NULL);
633                         sk_filter_release(sk, filter);
634                         rcu_read_unlock_bh();
635                         break;
636                 }
637                 rcu_read_unlock_bh();
638                 ret = -ENONET;
639                 break;
640
641         case SO_PASSSEC:
642                 if (valbool)
643                         set_bit(SOCK_PASSSEC, &sock->flags);
644                 else
645                         clear_bit(SOCK_PASSSEC, &sock->flags);
646                 break;
647
648                 /* We implement the SO_SNDLOWAT etc to
649                    not be settable (1003.1g 5.3) */
650         default:
651                 ret = -ENOPROTOOPT;
652                 break;
653         }
654         release_sock(sk);
655         return ret;
656 }
657
658
659 int sock_getsockopt(struct socket *sock, int level, int optname,
660                     char __user *optval, int __user *optlen)
661 {
662         struct sock *sk = sock->sk;
663
664         union {
665                 int val;
666                 struct linger ling;
667                 struct timeval tm;
668         } v;
669
670         unsigned int lv = sizeof(int);
671         int len;
672
673         if (get_user(len, optlen))
674                 return -EFAULT;
675         if (len < 0)
676                 return -EINVAL;
677
678         switch(optname) {
679         case SO_DEBUG:
680                 v.val = sock_flag(sk, SOCK_DBG);
681                 break;
682
683         case SO_DONTROUTE:
684                 v.val = sock_flag(sk, SOCK_LOCALROUTE);
685                 break;
686
687         case SO_BROADCAST:
688                 v.val = !!sock_flag(sk, SOCK_BROADCAST);
689                 break;
690
691         case SO_SNDBUF:
692                 v.val = sk->sk_sndbuf;
693                 break;
694
695         case SO_RCVBUF:
696                 v.val = sk->sk_rcvbuf;
697                 break;
698
699         case SO_REUSEADDR:
700                 v.val = sk->sk_reuse;
701                 break;
702
703         case SO_KEEPALIVE:
704                 v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
705                 break;
706
707         case SO_TYPE:
708                 v.val = sk->sk_type;
709                 break;
710
711         case SO_ERROR:
712                 v.val = -sock_error(sk);
713                 if (v.val==0)
714                         v.val = xchg(&sk->sk_err_soft, 0);
715                 break;
716
717         case SO_OOBINLINE:
718                 v.val = !!sock_flag(sk, SOCK_URGINLINE);
719                 break;
720
721         case SO_NO_CHECK:
722                 v.val = sk->sk_no_check;
723                 break;
724
725         case SO_PRIORITY:
726                 v.val = sk->sk_priority;
727                 break;
728
729         case SO_LINGER:
730                 lv              = sizeof(v.ling);
731                 v.ling.l_onoff  = !!sock_flag(sk, SOCK_LINGER);
732                 v.ling.l_linger = sk->sk_lingertime / HZ;
733                 break;
734
735         case SO_BSDCOMPAT:
736                 sock_warn_obsolete_bsdism("getsockopt");
737                 break;
738
739         case SO_TIMESTAMP:
740                 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
741                                 !sock_flag(sk, SOCK_RCVTSTAMPNS);
742                 break;
743
744         case SO_TIMESTAMPNS:
745                 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
746                 break;
747
748         case SO_RCVTIMEO:
749                 lv=sizeof(struct timeval);
750                 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
751                         v.tm.tv_sec = 0;
752                         v.tm.tv_usec = 0;
753                 } else {
754                         v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
755                         v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
756                 }
757                 break;
758
759         case SO_SNDTIMEO:
760                 lv=sizeof(struct timeval);
761                 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
762                         v.tm.tv_sec = 0;
763                         v.tm.tv_usec = 0;
764                 } else {
765                         v.tm.tv_sec = sk->sk_sndtimeo / HZ;
766                         v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
767                 }
768                 break;
769
770         case SO_RCVLOWAT:
771                 v.val = sk->sk_rcvlowat;
772                 break;
773
774         case SO_SNDLOWAT:
775                 v.val=1;
776                 break;
777
778         case SO_PASSCRED:
779                 v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
780                 break;
781
782         case SO_PEERCRED:
783                 if (len > sizeof(sk->sk_peercred))
784                         len = sizeof(sk->sk_peercred);
785                 if (copy_to_user(optval, &sk->sk_peercred, len))
786                         return -EFAULT;
787                 goto lenout;
788
789         case SO_PEERNAME:
790         {
791                 char address[128];
792
793                 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
794                         return -ENOTCONN;
795                 if (lv < len)
796                         return -EINVAL;
797                 if (copy_to_user(optval, address, len))
798                         return -EFAULT;
799                 goto lenout;
800         }
801
802         /* Dubious BSD thing... Probably nobody even uses it, but
803          * the UNIX standard wants it for whatever reason... -DaveM
804          */
805         case SO_ACCEPTCONN:
806                 v.val = sk->sk_state == TCP_LISTEN;
807                 break;
808
809         case SO_PASSSEC:
810                 v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
811                 break;
812
813         case SO_PEERSEC:
814                 return security_socket_getpeersec_stream(sock, optval, optlen, len);
815
816         default:
817                 return -ENOPROTOOPT;
818         }
819
820         if (len > lv)
821                 len = lv;
822         if (copy_to_user(optval, &v, len))
823                 return -EFAULT;
824 lenout:
825         if (put_user(len, optlen))
826                 return -EFAULT;
827         return 0;
828 }
829
830 /*
831  * Initialize an sk_lock.
832  *
833  * (We also register the sk_lock with the lock validator.)
834  */
835 static inline void sock_lock_init(struct sock *sk)
836 {
837         sock_lock_init_class_and_name(sk,
838                         af_family_slock_key_strings[sk->sk_family],
839                         af_family_slock_keys + sk->sk_family,
840                         af_family_key_strings[sk->sk_family],
841                         af_family_keys + sk->sk_family);
842 }
843
844 /**
845  *      sk_alloc - All socket objects are allocated here
846  *      @family: protocol family
847  *      @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
848  *      @prot: struct proto associated with this new sock instance
849  *      @zero_it: if we should zero the newly allocated sock
850  */
851 struct sock *sk_alloc(int family, gfp_t priority,
852                       struct proto *prot, int zero_it)
853 {
854         struct sock *sk = NULL;
855         struct kmem_cache *slab = prot->slab;
856
857         if (slab != NULL)
858                 sk = kmem_cache_alloc(slab, priority);
859         else
860                 sk = kmalloc(prot->obj_size, priority);
861
862         if (sk) {
863                 if (zero_it) {
864                         memset(sk, 0, prot->obj_size);
865                         sk->sk_family = family;
866                         /*
867                          * See comment in struct sock definition to understand
868                          * why we need sk_prot_creator -acme
869                          */
870                         sk->sk_prot = sk->sk_prot_creator = prot;
871                         sock_lock_init(sk);
872                 }
873
874                 if (security_sk_alloc(sk, family, priority))
875                         goto out_free;
876
877                 if (!try_module_get(prot->owner))
878                         goto out_free;
879         }
880         return sk;
881
882 out_free:
883         if (slab != NULL)
884                 kmem_cache_free(slab, sk);
885         else
886                 kfree(sk);
887         return NULL;
888 }
889
890 void sk_free(struct sock *sk)
891 {
892         struct sk_filter *filter;
893         struct module *owner = sk->sk_prot_creator->owner;
894
895         if (sk->sk_destruct)
896                 sk->sk_destruct(sk);
897
898         filter = rcu_dereference(sk->sk_filter);
899         if (filter) {
900                 sk_filter_release(sk, filter);
901                 rcu_assign_pointer(sk->sk_filter, NULL);
902         }
903
904         sock_disable_timestamp(sk);
905
906         if (atomic_read(&sk->sk_omem_alloc))
907                 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
908                        __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
909
910         security_sk_free(sk);
911         if (sk->sk_prot_creator->slab != NULL)
912                 kmem_cache_free(sk->sk_prot_creator->slab, sk);
913         else
914                 kfree(sk);
915         module_put(owner);
916 }
917
918 struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
919 {
920         struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
921
922         if (newsk != NULL) {
923                 struct sk_filter *filter;
924
925                 sock_copy(newsk, sk);
926
927                 /* SANITY */
928                 sk_node_init(&newsk->sk_node);
929                 sock_lock_init(newsk);
930                 bh_lock_sock(newsk);
931                 newsk->sk_backlog.head  = newsk->sk_backlog.tail = NULL;
932
933                 atomic_set(&newsk->sk_rmem_alloc, 0);
934                 atomic_set(&newsk->sk_wmem_alloc, 0);
935                 atomic_set(&newsk->sk_omem_alloc, 0);
936                 skb_queue_head_init(&newsk->sk_receive_queue);
937                 skb_queue_head_init(&newsk->sk_write_queue);
938 #ifdef CONFIG_NET_DMA
939                 skb_queue_head_init(&newsk->sk_async_wait_queue);
940 #endif
941
942                 rwlock_init(&newsk->sk_dst_lock);
943                 rwlock_init(&newsk->sk_callback_lock);
944                 lockdep_set_class(&newsk->sk_callback_lock,
945                                    af_callback_keys + newsk->sk_family);
946
947                 newsk->sk_dst_cache     = NULL;
948                 newsk->sk_wmem_queued   = 0;
949                 newsk->sk_forward_alloc = 0;
950                 newsk->sk_send_head     = NULL;
951                 newsk->sk_userlocks     = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
952
953                 sock_reset_flag(newsk, SOCK_DONE);
954                 skb_queue_head_init(&newsk->sk_error_queue);
955
956                 filter = newsk->sk_filter;
957                 if (filter != NULL)
958                         sk_filter_charge(newsk, filter);
959
960                 if (unlikely(xfrm_sk_clone_policy(newsk))) {
961                         /* It is still raw copy of parent, so invalidate
962                          * destructor and make plain sk_free() */
963                         newsk->sk_destruct = NULL;
964                         sk_free(newsk);
965                         newsk = NULL;
966                         goto out;
967                 }
968
969                 newsk->sk_err      = 0;
970                 newsk->sk_priority = 0;
971                 atomic_set(&newsk->sk_refcnt, 2);
972
973                 /*
974                  * Increment the counter in the same struct proto as the master
975                  * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
976                  * is the same as sk->sk_prot->socks, as this field was copied
977                  * with memcpy).
978                  *
979                  * This _changes_ the previous behaviour, where
980                  * tcp_create_openreq_child always was incrementing the
981                  * equivalent to tcp_prot->socks (inet_sock_nr), so this have
982                  * to be taken into account in all callers. -acme
983                  */
984                 sk_refcnt_debug_inc(newsk);
985                 newsk->sk_socket = NULL;
986                 newsk->sk_sleep  = NULL;
987
988                 if (newsk->sk_prot->sockets_allocated)
989                         atomic_inc(newsk->sk_prot->sockets_allocated);
990         }
991 out:
992         return newsk;
993 }
994
995 EXPORT_SYMBOL_GPL(sk_clone);
996
997 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
998 {
999         __sk_dst_set(sk, dst);
1000         sk->sk_route_caps = dst->dev->features;
1001         if (sk->sk_route_caps & NETIF_F_GSO)
1002                 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1003         if (sk_can_gso(sk)) {
1004                 if (dst->header_len)
1005                         sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1006                 else
1007                         sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1008         }
1009 }
1010 EXPORT_SYMBOL_GPL(sk_setup_caps);
1011
1012 void __init sk_init(void)
1013 {
1014         if (num_physpages <= 4096) {
1015                 sysctl_wmem_max = 32767;
1016                 sysctl_rmem_max = 32767;
1017                 sysctl_wmem_default = 32767;
1018                 sysctl_rmem_default = 32767;
1019         } else if (num_physpages >= 131072) {
1020                 sysctl_wmem_max = 131071;
1021                 sysctl_rmem_max = 131071;
1022         }
1023 }
1024
1025 /*
1026  *      Simple resource managers for sockets.
1027  */
1028
1029
1030 /*
1031  * Write buffer destructor automatically called from kfree_skb.
1032  */
1033 void sock_wfree(struct sk_buff *skb)
1034 {
1035         struct sock *sk = skb->sk;
1036
1037         /* In case it might be waiting for more memory. */
1038         atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
1039         if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
1040                 sk->sk_write_space(sk);
1041         sock_put(sk);
1042 }
1043
1044 /*
1045  * Read buffer destructor automatically called from kfree_skb.
1046  */
1047 void sock_rfree(struct sk_buff *skb)
1048 {
1049         struct sock *sk = skb->sk;
1050
1051         atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
1052 }
1053
1054
1055 int sock_i_uid(struct sock *sk)
1056 {
1057         int uid;
1058
1059         read_lock(&sk->sk_callback_lock);
1060         uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
1061         read_unlock(&sk->sk_callback_lock);
1062         return uid;
1063 }
1064
1065 unsigned long sock_i_ino(struct sock *sk)
1066 {
1067         unsigned long ino;
1068
1069         read_lock(&sk->sk_callback_lock);
1070         ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1071         read_unlock(&sk->sk_callback_lock);
1072         return ino;
1073 }
1074
1075 /*
1076  * Allocate a skb from the socket's send buffer.
1077  */
1078 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1079                              gfp_t priority)
1080 {
1081         if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1082                 struct sk_buff * skb = alloc_skb(size, priority);
1083                 if (skb) {
1084                         skb_set_owner_w(skb, sk);
1085                         return skb;
1086                 }
1087         }
1088         return NULL;
1089 }
1090
1091 /*
1092  * Allocate a skb from the socket's receive buffer.
1093  */
1094 struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
1095                              gfp_t priority)
1096 {
1097         if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1098                 struct sk_buff *skb = alloc_skb(size, priority);
1099                 if (skb) {
1100                         skb_set_owner_r(skb, sk);
1101                         return skb;
1102                 }
1103         }
1104         return NULL;
1105 }
1106
1107 /*
1108  * Allocate a memory block from the socket's option memory buffer.
1109  */
1110 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1111 {
1112         if ((unsigned)size <= sysctl_optmem_max &&
1113             atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1114                 void *mem;
1115                 /* First do the add, to avoid the race if kmalloc
1116                  * might sleep.
1117                  */
1118                 atomic_add(size, &sk->sk_omem_alloc);
1119                 mem = kmalloc(size, priority);
1120                 if (mem)
1121                         return mem;
1122                 atomic_sub(size, &sk->sk_omem_alloc);
1123         }
1124         return NULL;
1125 }
1126
1127 /*
1128  * Free an option memory block.
1129  */
1130 void sock_kfree_s(struct sock *sk, void *mem, int size)
1131 {
1132         kfree(mem);
1133         atomic_sub(size, &sk->sk_omem_alloc);
1134 }
1135
1136 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1137    I think, these locks should be removed for datagram sockets.
1138  */
1139 static long sock_wait_for_wmem(struct sock * sk, long timeo)
1140 {
1141         DEFINE_WAIT(wait);
1142
1143         clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1144         for (;;) {
1145                 if (!timeo)
1146                         break;
1147                 if (signal_pending(current))
1148                         break;
1149                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1150                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1151                 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1152                         break;
1153                 if (sk->sk_shutdown & SEND_SHUTDOWN)
1154                         break;
1155                 if (sk->sk_err)
1156                         break;
1157                 timeo = schedule_timeout(timeo);
1158         }
1159         finish_wait(sk->sk_sleep, &wait);
1160         return timeo;
1161 }
1162
1163
1164 /*
1165  *      Generic send/receive buffer handlers
1166  */
1167
1168 static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
1169                                             unsigned long header_len,
1170                                             unsigned long data_len,
1171                                             int noblock, int *errcode)
1172 {
1173         struct sk_buff *skb;
1174         gfp_t gfp_mask;
1175         long timeo;
1176         int err;
1177
1178         gfp_mask = sk->sk_allocation;
1179         if (gfp_mask & __GFP_WAIT)
1180                 gfp_mask |= __GFP_REPEAT;
1181
1182         timeo = sock_sndtimeo(sk, noblock);
1183         while (1) {
1184                 err = sock_error(sk);
1185                 if (err != 0)
1186                         goto failure;
1187
1188                 err = -EPIPE;
1189                 if (sk->sk_shutdown & SEND_SHUTDOWN)
1190                         goto failure;
1191
1192                 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1193                         skb = alloc_skb(header_len, gfp_mask);
1194                         if (skb) {
1195                                 int npages;
1196                                 int i;
1197
1198                                 /* No pages, we're done... */
1199                                 if (!data_len)
1200                                         break;
1201
1202                                 npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1203                                 skb->truesize += data_len;
1204                                 skb_shinfo(skb)->nr_frags = npages;
1205                                 for (i = 0; i < npages; i++) {
1206                                         struct page *page;
1207                                         skb_frag_t *frag;
1208
1209                                         page = alloc_pages(sk->sk_allocation, 0);
1210                                         if (!page) {
1211                                                 err = -ENOBUFS;
1212                                                 skb_shinfo(skb)->nr_frags = i;
1213                                                 kfree_skb(skb);
1214                                                 goto failure;
1215                                         }
1216
1217                                         frag = &skb_shinfo(skb)->frags[i];
1218                                         frag->page = page;
1219                                         frag->page_offset = 0;
1220                                         frag->size = (data_len >= PAGE_SIZE ?
1221                                                       PAGE_SIZE :
1222                                                       data_len);
1223                                         data_len -= PAGE_SIZE;
1224                                 }
1225
1226                                 /* Full success... */
1227                                 break;
1228                         }
1229                         err = -ENOBUFS;
1230                         goto failure;
1231                 }
1232                 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1233                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1234                 err = -EAGAIN;
1235                 if (!timeo)
1236                         goto failure;
1237                 if (signal_pending(current))
1238                         goto interrupted;
1239                 timeo = sock_wait_for_wmem(sk, timeo);
1240         }
1241
1242         skb_set_owner_w(skb, sk);
1243         return skb;
1244
1245 interrupted:
1246         err = sock_intr_errno(timeo);
1247 failure:
1248         *errcode = err;
1249         return NULL;
1250 }
1251
1252 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1253                                     int noblock, int *errcode)
1254 {
1255         return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1256 }
1257
1258 static void __lock_sock(struct sock *sk)
1259 {
1260         DEFINE_WAIT(wait);
1261
1262         for (;;) {
1263                 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1264                                         TASK_UNINTERRUPTIBLE);
1265                 spin_unlock_bh(&sk->sk_lock.slock);
1266                 schedule();
1267                 spin_lock_bh(&sk->sk_lock.slock);
1268                 if (!sock_owned_by_user(sk))
1269                         break;
1270         }
1271         finish_wait(&sk->sk_lock.wq, &wait);
1272 }
1273
1274 static void __release_sock(struct sock *sk)
1275 {
1276         struct sk_buff *skb = sk->sk_backlog.head;
1277
1278         do {
1279                 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1280                 bh_unlock_sock(sk);
1281
1282                 do {
1283                         struct sk_buff *next = skb->next;
1284
1285                         skb->next = NULL;
1286                         sk->sk_backlog_rcv(sk, skb);
1287
1288                         /*
1289                          * We are in process context here with softirqs
1290                          * disabled, use cond_resched_softirq() to preempt.
1291                          * This is safe to do because we've taken the backlog
1292                          * queue private:
1293                          */
1294                         cond_resched_softirq();
1295
1296                         skb = next;
1297                 } while (skb != NULL);
1298
1299                 bh_lock_sock(sk);
1300         } while ((skb = sk->sk_backlog.head) != NULL);
1301 }
1302
1303 /**
1304  * sk_wait_data - wait for data to arrive at sk_receive_queue
1305  * @sk:    sock to wait on
1306  * @timeo: for how long
1307  *
1308  * Now socket state including sk->sk_err is changed only under lock,
1309  * hence we may omit checks after joining wait queue.
1310  * We check receive queue before schedule() only as optimization;
1311  * it is very likely that release_sock() added new data.
1312  */
1313 int sk_wait_data(struct sock *sk, long *timeo)
1314 {
1315         int rc;
1316         DEFINE_WAIT(wait);
1317
1318         prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1319         set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1320         rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1321         clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1322         finish_wait(sk->sk_sleep, &wait);
1323         return rc;
1324 }
1325
1326 EXPORT_SYMBOL(sk_wait_data);
1327
1328 /*
1329  * Set of default routines for initialising struct proto_ops when
1330  * the protocol does not support a particular function. In certain
1331  * cases where it makes no sense for a protocol to have a "do nothing"
1332  * function, some default processing is provided.
1333  */
1334
1335 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1336 {
1337         return -EOPNOTSUPP;
1338 }
1339
1340 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
1341                     int len, int flags)
1342 {
1343         return -EOPNOTSUPP;
1344 }
1345
1346 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1347 {
1348         return -EOPNOTSUPP;
1349 }
1350
1351 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1352 {
1353         return -EOPNOTSUPP;
1354 }
1355
1356 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
1357                     int *len, int peer)
1358 {
1359         return -EOPNOTSUPP;
1360 }
1361
1362 unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1363 {
1364         return 0;
1365 }
1366
1367 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1368 {
1369         return -EOPNOTSUPP;
1370 }
1371
1372 int sock_no_listen(struct socket *sock, int backlog)
1373 {
1374         return -EOPNOTSUPP;
1375 }
1376
1377 int sock_no_shutdown(struct socket *sock, int how)
1378 {
1379         return -EOPNOTSUPP;
1380 }
1381
1382 int sock_no_setsockopt(struct socket *sock, int level, int optname,
1383                     char __user *optval, int optlen)
1384 {
1385         return -EOPNOTSUPP;
1386 }
1387
1388 int sock_no_getsockopt(struct socket *sock, int level, int optname,
1389                     char __user *optval, int __user *optlen)
1390 {
1391         return -EOPNOTSUPP;
1392 }
1393
1394 int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1395                     size_t len)
1396 {
1397         return -EOPNOTSUPP;
1398 }
1399
1400 int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1401                     size_t len, int flags)
1402 {
1403         return -EOPNOTSUPP;
1404 }
1405
1406 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1407 {
1408         /* Mirror missing mmap method error code */
1409         return -ENODEV;
1410 }
1411
1412 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1413 {
1414         ssize_t res;
1415         struct msghdr msg = {.msg_flags = flags};
1416         struct kvec iov;
1417         char *kaddr = kmap(page);
1418         iov.iov_base = kaddr + offset;
1419         iov.iov_len = size;
1420         res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1421         kunmap(page);
1422         return res;
1423 }
1424
1425 /*
1426  *      Default Socket Callbacks
1427  */
1428
1429 static void sock_def_wakeup(struct sock *sk)
1430 {
1431         read_lock(&sk->sk_callback_lock);
1432         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1433                 wake_up_interruptible_all(sk->sk_sleep);
1434         read_unlock(&sk->sk_callback_lock);
1435 }
1436
1437 static void sock_def_error_report(struct sock *sk)
1438 {
1439         read_lock(&sk->sk_callback_lock);
1440         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1441                 wake_up_interruptible(sk->sk_sleep);
1442         sk_wake_async(sk,0,POLL_ERR);
1443         read_unlock(&sk->sk_callback_lock);
1444 }
1445
1446 static void sock_def_readable(struct sock *sk, int len)
1447 {
1448         read_lock(&sk->sk_callback_lock);
1449         if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1450                 wake_up_interruptible(sk->sk_sleep);
1451         sk_wake_async(sk,1,POLL_IN);
1452         read_unlock(&sk->sk_callback_lock);
1453 }
1454
1455 static void sock_def_write_space(struct sock *sk)
1456 {
1457         read_lock(&sk->sk_callback_lock);
1458
1459         /* Do not wake up a writer until he can make "significant"
1460          * progress.  --DaveM
1461          */
1462         if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1463                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1464                         wake_up_interruptible(sk->sk_sleep);
1465
1466                 /* Should agree with poll, otherwise some programs break */
1467                 if (sock_writeable(sk))
1468                         sk_wake_async(sk, 2, POLL_OUT);
1469         }
1470
1471         read_unlock(&sk->sk_callback_lock);
1472 }
1473
1474 static void sock_def_destruct(struct sock *sk)
1475 {
1476         kfree(sk->sk_protinfo);
1477 }
1478
1479 void sk_send_sigurg(struct sock *sk)
1480 {
1481         if (sk->sk_socket && sk->sk_socket->file)
1482                 if (send_sigurg(&sk->sk_socket->file->f_owner))
1483                         sk_wake_async(sk, 3, POLL_PRI);
1484 }
1485
1486 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1487                     unsigned long expires)
1488 {
1489         if (!mod_timer(timer, expires))
1490                 sock_hold(sk);
1491 }
1492
1493 EXPORT_SYMBOL(sk_reset_timer);
1494
1495 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1496 {
1497         if (timer_pending(timer) && del_timer(timer))
1498                 __sock_put(sk);
1499 }
1500
1501 EXPORT_SYMBOL(sk_stop_timer);
1502
1503 void sock_init_data(struct socket *sock, struct sock *sk)
1504 {
1505         skb_queue_head_init(&sk->sk_receive_queue);
1506         skb_queue_head_init(&sk->sk_write_queue);
1507         skb_queue_head_init(&sk->sk_error_queue);
1508 #ifdef CONFIG_NET_DMA
1509         skb_queue_head_init(&sk->sk_async_wait_queue);
1510 #endif
1511
1512         sk->sk_send_head        =       NULL;
1513
1514         init_timer(&sk->sk_timer);
1515
1516         sk->sk_allocation       =       GFP_KERNEL;
1517         sk->sk_rcvbuf           =       sysctl_rmem_default;
1518         sk->sk_sndbuf           =       sysctl_wmem_default;
1519         sk->sk_state            =       TCP_CLOSE;
1520         sk->sk_socket           =       sock;
1521
1522         sock_set_flag(sk, SOCK_ZAPPED);
1523
1524         if (sock) {
1525                 sk->sk_type     =       sock->type;
1526                 sk->sk_sleep    =       &sock->wait;
1527                 sock->sk        =       sk;
1528         } else
1529                 sk->sk_sleep    =       NULL;
1530
1531         rwlock_init(&sk->sk_dst_lock);
1532         rwlock_init(&sk->sk_callback_lock);
1533         lockdep_set_class(&sk->sk_callback_lock,
1534                            af_callback_keys + sk->sk_family);
1535
1536         sk->sk_state_change     =       sock_def_wakeup;
1537         sk->sk_data_ready       =       sock_def_readable;
1538         sk->sk_write_space      =       sock_def_write_space;
1539         sk->sk_error_report     =       sock_def_error_report;
1540         sk->sk_destruct         =       sock_def_destruct;
1541
1542         sk->sk_sndmsg_page      =       NULL;
1543         sk->sk_sndmsg_off       =       0;
1544
1545         sk->sk_peercred.pid     =       0;
1546         sk->sk_peercred.uid     =       -1;
1547         sk->sk_peercred.gid     =       -1;
1548         sk->sk_write_pending    =       0;
1549         sk->sk_rcvlowat         =       1;
1550         sk->sk_rcvtimeo         =       MAX_SCHEDULE_TIMEOUT;
1551         sk->sk_sndtimeo         =       MAX_SCHEDULE_TIMEOUT;
1552
1553         sk->sk_stamp = ktime_set(-1L, -1L);
1554
1555         atomic_set(&sk->sk_refcnt, 1);
1556 }
1557
1558 void fastcall lock_sock_nested(struct sock *sk, int subclass)
1559 {
1560         might_sleep();
1561         spin_lock_bh(&sk->sk_lock.slock);
1562         if (sk->sk_lock.owner)
1563                 __lock_sock(sk);
1564         sk->sk_lock.owner = (void *)1;
1565         spin_unlock(&sk->sk_lock.slock);
1566         /*
1567          * The sk_lock has mutex_lock() semantics here:
1568          */
1569         mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
1570         local_bh_enable();
1571 }
1572
1573 EXPORT_SYMBOL(lock_sock_nested);
1574
1575 void fastcall release_sock(struct sock *sk)
1576 {
1577         /*
1578          * The sk_lock has mutex_unlock() semantics:
1579          */
1580         mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
1581
1582         spin_lock_bh(&sk->sk_lock.slock);
1583         if (sk->sk_backlog.tail)
1584                 __release_sock(sk);
1585         sk->sk_lock.owner = NULL;
1586         if (waitqueue_active(&sk->sk_lock.wq))
1587                 wake_up(&sk->sk_lock.wq);
1588         spin_unlock_bh(&sk->sk_lock.slock);
1589 }
1590 EXPORT_SYMBOL(release_sock);
1591
1592 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1593 {
1594         struct timeval tv;
1595         if (!sock_flag(sk, SOCK_TIMESTAMP))
1596                 sock_enable_timestamp(sk);
1597         tv = ktime_to_timeval(sk->sk_stamp);
1598         if (tv.tv_sec == -1)
1599                 return -ENOENT;
1600         if (tv.tv_sec == 0) {
1601                 sk->sk_stamp = ktime_get_real();
1602                 tv = ktime_to_timeval(sk->sk_stamp);
1603         }
1604         return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
1605 }
1606 EXPORT_SYMBOL(sock_get_timestamp);
1607
1608 int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
1609 {
1610         struct timespec ts;
1611         if (!sock_flag(sk, SOCK_TIMESTAMP))
1612                 sock_enable_timestamp(sk);
1613         ts = ktime_to_timespec(sk->sk_stamp);
1614         if (ts.tv_sec == -1)
1615                 return -ENOENT;
1616         if (ts.tv_sec == 0) {
1617                 sk->sk_stamp = ktime_get_real();
1618                 ts = ktime_to_timespec(sk->sk_stamp);
1619         }
1620         return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
1621 }
1622 EXPORT_SYMBOL(sock_get_timestampns);
1623
1624 void sock_enable_timestamp(struct sock *sk)
1625 {
1626         if (!sock_flag(sk, SOCK_TIMESTAMP)) {
1627                 sock_set_flag(sk, SOCK_TIMESTAMP);
1628                 net_enable_timestamp();
1629         }
1630 }
1631 EXPORT_SYMBOL(sock_enable_timestamp);
1632
1633 /*
1634  *      Get a socket option on an socket.
1635  *
1636  *      FIX: POSIX 1003.1g is very ambiguous here. It states that
1637  *      asynchronous errors should be reported by getsockopt. We assume
1638  *      this means if you specify SO_ERROR (otherwise whats the point of it).
1639  */
1640 int sock_common_getsockopt(struct socket *sock, int level, int optname,
1641                            char __user *optval, int __user *optlen)
1642 {
1643         struct sock *sk = sock->sk;
1644
1645         return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1646 }
1647
1648 EXPORT_SYMBOL(sock_common_getsockopt);
1649
1650 #ifdef CONFIG_COMPAT
1651 int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
1652                                   char __user *optval, int __user *optlen)
1653 {
1654         struct sock *sk = sock->sk;
1655
1656         if (sk->sk_prot->compat_getsockopt != NULL)
1657                 return sk->sk_prot->compat_getsockopt(sk, level, optname,
1658                                                       optval, optlen);
1659         return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1660 }
1661 EXPORT_SYMBOL(compat_sock_common_getsockopt);
1662 #endif
1663
1664 int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1665                         struct msghdr *msg, size_t size, int flags)
1666 {
1667         struct sock *sk = sock->sk;
1668         int addr_len = 0;
1669         int err;
1670
1671         err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1672                                    flags & ~MSG_DONTWAIT, &addr_len);
1673         if (err >= 0)
1674                 msg->msg_namelen = addr_len;
1675         return err;
1676 }
1677
1678 EXPORT_SYMBOL(sock_common_recvmsg);
1679
1680 /*
1681  *      Set socket options on an inet socket.
1682  */
1683 int sock_common_setsockopt(struct socket *sock, int level, int optname,
1684                            char __user *optval, int optlen)
1685 {
1686         struct sock *sk = sock->sk;
1687
1688         return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1689 }
1690
1691 EXPORT_SYMBOL(sock_common_setsockopt);
1692
1693 #ifdef CONFIG_COMPAT
1694 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
1695                                   char __user *optval, int optlen)
1696 {
1697         struct sock *sk = sock->sk;
1698
1699         if (sk->sk_prot->compat_setsockopt != NULL)
1700                 return sk->sk_prot->compat_setsockopt(sk, level, optname,
1701                                                       optval, optlen);
1702         return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1703 }
1704 EXPORT_SYMBOL(compat_sock_common_setsockopt);
1705 #endif
1706
1707 void sk_common_release(struct sock *sk)
1708 {
1709         if (sk->sk_prot->destroy)
1710                 sk->sk_prot->destroy(sk);
1711
1712         /*
1713          * Observation: when sock_common_release is called, processes have
1714          * no access to socket. But net still has.
1715          * Step one, detach it from networking:
1716          *
1717          * A. Remove from hash tables.
1718          */
1719
1720         sk->sk_prot->unhash(sk);
1721
1722         /*
1723          * In this point socket cannot receive new packets, but it is possible
1724          * that some packets are in flight because some CPU runs receiver and
1725          * did hash table lookup before we unhashed socket. They will achieve
1726          * receive queue and will be purged by socket destructor.
1727          *
1728          * Also we still have packets pending on receive queue and probably,
1729          * our own packets waiting in device queues. sock_destroy will drain
1730          * receive queue, but transmitted packets will delay socket destruction
1731          * until the last reference will be released.
1732          */
1733
1734         sock_orphan(sk);
1735
1736         xfrm_sk_free_policy(sk);
1737
1738         sk_refcnt_debug_release(sk);
1739         sock_put(sk);
1740 }
1741
1742 EXPORT_SYMBOL(sk_common_release);
1743
1744 static DEFINE_RWLOCK(proto_list_lock);
1745 static LIST_HEAD(proto_list);
1746
1747 int proto_register(struct proto *prot, int alloc_slab)
1748 {
1749         char *request_sock_slab_name = NULL;
1750         char *timewait_sock_slab_name;
1751         int rc = -ENOBUFS;
1752
1753         if (alloc_slab) {
1754                 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
1755                                                SLAB_HWCACHE_ALIGN, NULL, NULL);
1756
1757                 if (prot->slab == NULL) {
1758                         printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1759                                prot->name);
1760                         goto out;
1761                 }
1762
1763                 if (prot->rsk_prot != NULL) {
1764                         static const char mask[] = "request_sock_%s";
1765
1766                         request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1767                         if (request_sock_slab_name == NULL)
1768                                 goto out_free_sock_slab;
1769
1770                         sprintf(request_sock_slab_name, mask, prot->name);
1771                         prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1772                                                                  prot->rsk_prot->obj_size, 0,
1773                                                                  SLAB_HWCACHE_ALIGN, NULL, NULL);
1774
1775                         if (prot->rsk_prot->slab == NULL) {
1776                                 printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1777                                        prot->name);
1778                                 goto out_free_request_sock_slab_name;
1779                         }
1780                 }
1781
1782                 if (prot->twsk_prot != NULL) {
1783                         static const char mask[] = "tw_sock_%s";
1784
1785                         timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1786
1787                         if (timewait_sock_slab_name == NULL)
1788                                 goto out_free_request_sock_slab;
1789
1790                         sprintf(timewait_sock_slab_name, mask, prot->name);
1791                         prot->twsk_prot->twsk_slab =
1792                                 kmem_cache_create(timewait_sock_slab_name,
1793                                                   prot->twsk_prot->twsk_obj_size,
1794                                                   0, SLAB_HWCACHE_ALIGN,
1795                                                   NULL, NULL);
1796                         if (prot->twsk_prot->twsk_slab == NULL)
1797                                 goto out_free_timewait_sock_slab_name;
1798                 }
1799         }
1800
1801         write_lock(&proto_list_lock);
1802         list_add(&prot->node, &proto_list);
1803         write_unlock(&proto_list_lock);
1804         rc = 0;
1805 out:
1806         return rc;
1807 out_free_timewait_sock_slab_name:
1808         kfree(timewait_sock_slab_name);
1809 out_free_request_sock_slab:
1810         if (prot->rsk_prot && prot->rsk_prot->slab) {
1811                 kmem_cache_destroy(prot->rsk_prot->slab);
1812                 prot->rsk_prot->slab = NULL;
1813         }
1814 out_free_request_sock_slab_name:
1815         kfree(request_sock_slab_name);
1816 out_free_sock_slab:
1817         kmem_cache_destroy(prot->slab);
1818         prot->slab = NULL;
1819         goto out;
1820 }
1821
1822 EXPORT_SYMBOL(proto_register);
1823
1824 void proto_unregister(struct proto *prot)
1825 {
1826         write_lock(&proto_list_lock);
1827         list_del(&prot->node);
1828         write_unlock(&proto_list_lock);
1829
1830         if (prot->slab != NULL) {
1831                 kmem_cache_destroy(prot->slab);
1832                 prot->slab = NULL;
1833         }
1834
1835         if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1836                 const char *name = kmem_cache_name(prot->rsk_prot->slab);
1837
1838                 kmem_cache_destroy(prot->rsk_prot->slab);
1839                 kfree(name);
1840                 prot->rsk_prot->slab = NULL;
1841         }
1842
1843         if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
1844                 const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
1845
1846                 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
1847                 kfree(name);
1848                 prot->twsk_prot->twsk_slab = NULL;
1849         }
1850 }
1851
1852 EXPORT_SYMBOL(proto_unregister);
1853
1854 #ifdef CONFIG_PROC_FS
1855 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1856 {
1857         read_lock(&proto_list_lock);
1858         return seq_list_start_head(&proto_list, *pos);
1859 }
1860
1861 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1862 {
1863         return seq_list_next(v, &proto_list, pos);
1864 }
1865
1866 static void proto_seq_stop(struct seq_file *seq, void *v)
1867 {
1868         read_unlock(&proto_list_lock);
1869 }
1870
1871 static char proto_method_implemented(const void *method)
1872 {
1873         return method == NULL ? 'n' : 'y';
1874 }
1875
1876 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1877 {
1878         seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
1879                         "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1880                    proto->name,
1881                    proto->obj_size,
1882                    proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1883                    proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1884                    proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1885                    proto->max_header,
1886                    proto->slab == NULL ? "no" : "yes",
1887                    module_name(proto->owner),
1888                    proto_method_implemented(proto->close),
1889                    proto_method_implemented(proto->connect),
1890                    proto_method_implemented(proto->disconnect),
1891                    proto_method_implemented(proto->accept),
1892                    proto_method_implemented(proto->ioctl),
1893                    proto_method_implemented(proto->init),
1894                    proto_method_implemented(proto->destroy),
1895                    proto_method_implemented(proto->shutdown),
1896                    proto_method_implemented(proto->setsockopt),
1897                    proto_method_implemented(proto->getsockopt),
1898                    proto_method_implemented(proto->sendmsg),
1899                    proto_method_implemented(proto->recvmsg),
1900                    proto_method_implemented(proto->sendpage),
1901                    proto_method_implemented(proto->bind),
1902                    proto_method_implemented(proto->backlog_rcv),
1903                    proto_method_implemented(proto->hash),
1904                    proto_method_implemented(proto->unhash),
1905                    proto_method_implemented(proto->get_port),
1906                    proto_method_implemented(proto->enter_memory_pressure));
1907 }
1908
1909 static int proto_seq_show(struct seq_file *seq, void *v)
1910 {
1911         if (v == &proto_list)
1912                 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1913                            "protocol",
1914                            "size",
1915                            "sockets",
1916                            "memory",
1917                            "press",
1918                            "maxhdr",
1919                            "slab",
1920                            "module",
1921                            "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1922         else
1923                 proto_seq_printf(seq, list_entry(v, struct proto, node));
1924         return 0;
1925 }
1926
1927 static const struct seq_operations proto_seq_ops = {
1928         .start  = proto_seq_start,
1929         .next   = proto_seq_next,
1930         .stop   = proto_seq_stop,
1931         .show   = proto_seq_show,
1932 };
1933
1934 static int proto_seq_open(struct inode *inode, struct file *file)
1935 {
1936         return seq_open(file, &proto_seq_ops);
1937 }
1938
1939 static const struct file_operations proto_seq_fops = {
1940         .owner          = THIS_MODULE,
1941         .open           = proto_seq_open,
1942         .read           = seq_read,
1943         .llseek         = seq_lseek,
1944         .release        = seq_release,
1945 };
1946
1947 static int __init proto_init(void)
1948 {
1949         /* register /proc/net/protocols */
1950         return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1951 }
1952
1953 subsys_initcall(proto_init);
1954
1955 #endif /* PROC_FS */
1956
1957 EXPORT_SYMBOL(sk_alloc);
1958 EXPORT_SYMBOL(sk_free);
1959 EXPORT_SYMBOL(sk_send_sigurg);
1960 EXPORT_SYMBOL(sock_alloc_send_skb);
1961 EXPORT_SYMBOL(sock_init_data);
1962 EXPORT_SYMBOL(sock_kfree_s);
1963 EXPORT_SYMBOL(sock_kmalloc);
1964 EXPORT_SYMBOL(sock_no_accept);
1965 EXPORT_SYMBOL(sock_no_bind);
1966 EXPORT_SYMBOL(sock_no_connect);
1967 EXPORT_SYMBOL(sock_no_getname);
1968 EXPORT_SYMBOL(sock_no_getsockopt);
1969 EXPORT_SYMBOL(sock_no_ioctl);
1970 EXPORT_SYMBOL(sock_no_listen);
1971 EXPORT_SYMBOL(sock_no_mmap);
1972 EXPORT_SYMBOL(sock_no_poll);
1973 EXPORT_SYMBOL(sock_no_recvmsg);
1974 EXPORT_SYMBOL(sock_no_sendmsg);
1975 EXPORT_SYMBOL(sock_no_sendpage);
1976 EXPORT_SYMBOL(sock_no_setsockopt);
1977 EXPORT_SYMBOL(sock_no_shutdown);
1978 EXPORT_SYMBOL(sock_no_socketpair);
1979 EXPORT_SYMBOL(sock_rfree);
1980 EXPORT_SYMBOL(sock_setsockopt);
1981 EXPORT_SYMBOL(sock_wfree);
1982 EXPORT_SYMBOL(sock_wmalloc);
1983 EXPORT_SYMBOL(sock_i_uid);
1984 EXPORT_SYMBOL(sock_i_ino);
1985 EXPORT_SYMBOL(sysctl_optmem_max);
1986 #ifdef CONFIG_SYSCTL
1987 EXPORT_SYMBOL(sysctl_rmem_max);
1988 EXPORT_SYMBOL(sysctl_wmem_max);
1989 #endif