Merge tag 'tegra-for-5.2-bus-fixes' of git://git.kernel.org/pub/scm/linux/kernel...
[platform/kernel/linux-rpi.git] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <linux/slab.h>
24 #include <net/checksum.h>
25
26 #include <net/inet_sock.h>
27 #include <net/inet_common.h>
28 #include <net/sock.h>
29 #include <net/xfrm.h>
30
31 #include <asm/ioctls.h>
32 #include <linux/spinlock.h>
33 #include <linux/timer.h>
34 #include <linux/delay.h>
35 #include <linux/poll.h>
36
37 #include "ccid.h"
38 #include "dccp.h"
39 #include "feat.h"
40
41 #define CREATE_TRACE_POINTS
42 #include "trace.h"
43
44 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
45
46 EXPORT_SYMBOL_GPL(dccp_statistics);
47
48 struct percpu_counter dccp_orphan_count;
49 EXPORT_SYMBOL_GPL(dccp_orphan_count);
50
51 struct inet_hashinfo dccp_hashinfo;
52 EXPORT_SYMBOL_GPL(dccp_hashinfo);
53
54 /* the maximum queue length for tx in packets. 0 is no limit */
55 int sysctl_dccp_tx_qlen __read_mostly = 5;
56
57 #ifdef CONFIG_IP_DCCP_DEBUG
58 static const char *dccp_state_name(const int state)
59 {
60         static const char *const dccp_state_names[] = {
61         [DCCP_OPEN]             = "OPEN",
62         [DCCP_REQUESTING]       = "REQUESTING",
63         [DCCP_PARTOPEN]         = "PARTOPEN",
64         [DCCP_LISTEN]           = "LISTEN",
65         [DCCP_RESPOND]          = "RESPOND",
66         [DCCP_CLOSING]          = "CLOSING",
67         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
68         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
69         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
70         [DCCP_TIME_WAIT]        = "TIME_WAIT",
71         [DCCP_CLOSED]           = "CLOSED",
72         };
73
74         if (state >= DCCP_MAX_STATES)
75                 return "INVALID STATE!";
76         else
77                 return dccp_state_names[state];
78 }
79 #endif
80
81 void dccp_set_state(struct sock *sk, const int state)
82 {
83         const int oldstate = sk->sk_state;
84
85         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
86                       dccp_state_name(oldstate), dccp_state_name(state));
87         WARN_ON(state == oldstate);
88
89         switch (state) {
90         case DCCP_OPEN:
91                 if (oldstate != DCCP_OPEN)
92                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
93                 /* Client retransmits all Confirm options until entering OPEN */
94                 if (oldstate == DCCP_PARTOPEN)
95                         dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
96                 break;
97
98         case DCCP_CLOSED:
99                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
100                     oldstate == DCCP_CLOSING)
101                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
102
103                 sk->sk_prot->unhash(sk);
104                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
105                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
106                         inet_put_port(sk);
107                 /* fall through */
108         default:
109                 if (oldstate == DCCP_OPEN)
110                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
111         }
112
113         /* Change state AFTER socket is unhashed to avoid closed
114          * socket sitting in hash tables.
115          */
116         inet_sk_set_state(sk, state);
117 }
118
119 EXPORT_SYMBOL_GPL(dccp_set_state);
120
121 static void dccp_finish_passive_close(struct sock *sk)
122 {
123         switch (sk->sk_state) {
124         case DCCP_PASSIVE_CLOSE:
125                 /* Node (client or server) has received Close packet. */
126                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
127                 dccp_set_state(sk, DCCP_CLOSED);
128                 break;
129         case DCCP_PASSIVE_CLOSEREQ:
130                 /*
131                  * Client received CloseReq. We set the `active' flag so that
132                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
133                  */
134                 dccp_send_close(sk, 1);
135                 dccp_set_state(sk, DCCP_CLOSING);
136         }
137 }
138
139 void dccp_done(struct sock *sk)
140 {
141         dccp_set_state(sk, DCCP_CLOSED);
142         dccp_clear_xmit_timers(sk);
143
144         sk->sk_shutdown = SHUTDOWN_MASK;
145
146         if (!sock_flag(sk, SOCK_DEAD))
147                 sk->sk_state_change(sk);
148         else
149                 inet_csk_destroy_sock(sk);
150 }
151
152 EXPORT_SYMBOL_GPL(dccp_done);
153
154 const char *dccp_packet_name(const int type)
155 {
156         static const char *const dccp_packet_names[] = {
157                 [DCCP_PKT_REQUEST]  = "REQUEST",
158                 [DCCP_PKT_RESPONSE] = "RESPONSE",
159                 [DCCP_PKT_DATA]     = "DATA",
160                 [DCCP_PKT_ACK]      = "ACK",
161                 [DCCP_PKT_DATAACK]  = "DATAACK",
162                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
163                 [DCCP_PKT_CLOSE]    = "CLOSE",
164                 [DCCP_PKT_RESET]    = "RESET",
165                 [DCCP_PKT_SYNC]     = "SYNC",
166                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
167         };
168
169         if (type >= DCCP_NR_PKT_TYPES)
170                 return "INVALID";
171         else
172                 return dccp_packet_names[type];
173 }
174
175 EXPORT_SYMBOL_GPL(dccp_packet_name);
176
177 static void dccp_sk_destruct(struct sock *sk)
178 {
179         struct dccp_sock *dp = dccp_sk(sk);
180
181         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
182         dp->dccps_hc_tx_ccid = NULL;
183         inet_sock_destruct(sk);
184 }
185
186 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
187 {
188         struct dccp_sock *dp = dccp_sk(sk);
189         struct inet_connection_sock *icsk = inet_csk(sk);
190
191         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
192         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
193         sk->sk_state            = DCCP_CLOSED;
194         sk->sk_write_space      = dccp_write_space;
195         sk->sk_destruct         = dccp_sk_destruct;
196         icsk->icsk_sync_mss     = dccp_sync_mss;
197         dp->dccps_mss_cache     = 536;
198         dp->dccps_rate_last     = jiffies;
199         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
200         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
201         dp->dccps_tx_qlen       = sysctl_dccp_tx_qlen;
202
203         dccp_init_xmit_timers(sk);
204
205         INIT_LIST_HEAD(&dp->dccps_featneg);
206         /* control socket doesn't need feat nego */
207         if (likely(ctl_sock_initialized))
208                 return dccp_feat_init(sk);
209         return 0;
210 }
211
212 EXPORT_SYMBOL_GPL(dccp_init_sock);
213
214 void dccp_destroy_sock(struct sock *sk)
215 {
216         struct dccp_sock *dp = dccp_sk(sk);
217
218         __skb_queue_purge(&sk->sk_write_queue);
219         if (sk->sk_send_head != NULL) {
220                 kfree_skb(sk->sk_send_head);
221                 sk->sk_send_head = NULL;
222         }
223
224         /* Clean up a referenced DCCP bind bucket. */
225         if (inet_csk(sk)->icsk_bind_hash != NULL)
226                 inet_put_port(sk);
227
228         kfree(dp->dccps_service_list);
229         dp->dccps_service_list = NULL;
230
231         if (dp->dccps_hc_rx_ackvec != NULL) {
232                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
233                 dp->dccps_hc_rx_ackvec = NULL;
234         }
235         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
236         dp->dccps_hc_rx_ccid = NULL;
237
238         /* clean up feature negotiation state */
239         dccp_feat_list_purge(&dp->dccps_featneg);
240 }
241
242 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
243
244 static inline int dccp_listen_start(struct sock *sk, int backlog)
245 {
246         struct dccp_sock *dp = dccp_sk(sk);
247
248         dp->dccps_role = DCCP_ROLE_LISTEN;
249         /* do not start to listen if feature negotiation setup fails */
250         if (dccp_feat_finalise_settings(dp))
251                 return -EPROTO;
252         return inet_csk_listen_start(sk, backlog);
253 }
254
255 static inline int dccp_need_reset(int state)
256 {
257         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
258                state != DCCP_REQUESTING;
259 }
260
261 int dccp_disconnect(struct sock *sk, int flags)
262 {
263         struct inet_connection_sock *icsk = inet_csk(sk);
264         struct inet_sock *inet = inet_sk(sk);
265         struct dccp_sock *dp = dccp_sk(sk);
266         const int old_state = sk->sk_state;
267
268         if (old_state != DCCP_CLOSED)
269                 dccp_set_state(sk, DCCP_CLOSED);
270
271         /*
272          * This corresponds to the ABORT function of RFC793, sec. 3.8
273          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
274          */
275         if (old_state == DCCP_LISTEN) {
276                 inet_csk_listen_stop(sk);
277         } else if (dccp_need_reset(old_state)) {
278                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
279                 sk->sk_err = ECONNRESET;
280         } else if (old_state == DCCP_REQUESTING)
281                 sk->sk_err = ECONNRESET;
282
283         dccp_clear_xmit_timers(sk);
284         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
285         dp->dccps_hc_rx_ccid = NULL;
286
287         __skb_queue_purge(&sk->sk_receive_queue);
288         __skb_queue_purge(&sk->sk_write_queue);
289         if (sk->sk_send_head != NULL) {
290                 __kfree_skb(sk->sk_send_head);
291                 sk->sk_send_head = NULL;
292         }
293
294         inet->inet_dport = 0;
295
296         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
297                 inet_reset_saddr(sk);
298
299         sk->sk_shutdown = 0;
300         sock_reset_flag(sk, SOCK_DONE);
301
302         icsk->icsk_backoff = 0;
303         inet_csk_delack_init(sk);
304         __sk_dst_reset(sk);
305
306         WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
307
308         sk->sk_error_report(sk);
309         return 0;
310 }
311
312 EXPORT_SYMBOL_GPL(dccp_disconnect);
313
314 /*
315  *      Wait for a DCCP event.
316  *
317  *      Note that we don't need to lock the socket, as the upper poll layers
318  *      take care of normal races (between the test and the event) and we don't
319  *      go look at any of the socket buffers directly.
320  */
321 __poll_t dccp_poll(struct file *file, struct socket *sock,
322                        poll_table *wait)
323 {
324         __poll_t mask;
325         struct sock *sk = sock->sk;
326
327         sock_poll_wait(file, sock, wait);
328         if (sk->sk_state == DCCP_LISTEN)
329                 return inet_csk_listen_poll(sk);
330
331         /* Socket is not locked. We are protected from async events
332            by poll logic and correct handling of state changes
333            made by another threads is impossible in any case.
334          */
335
336         mask = 0;
337         if (sk->sk_err)
338                 mask = EPOLLERR;
339
340         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
341                 mask |= EPOLLHUP;
342         if (sk->sk_shutdown & RCV_SHUTDOWN)
343                 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
344
345         /* Connected? */
346         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
347                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
348                         mask |= EPOLLIN | EPOLLRDNORM;
349
350                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
351                         if (sk_stream_is_writeable(sk)) {
352                                 mask |= EPOLLOUT | EPOLLWRNORM;
353                         } else {  /* send SIGIO later */
354                                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
355                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
356
357                                 /* Race breaker. If space is freed after
358                                  * wspace test but before the flags are set,
359                                  * IO signal will be lost.
360                                  */
361                                 if (sk_stream_is_writeable(sk))
362                                         mask |= EPOLLOUT | EPOLLWRNORM;
363                         }
364                 }
365         }
366         return mask;
367 }
368
369 EXPORT_SYMBOL_GPL(dccp_poll);
370
371 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
372 {
373         int rc = -ENOTCONN;
374
375         lock_sock(sk);
376
377         if (sk->sk_state == DCCP_LISTEN)
378                 goto out;
379
380         switch (cmd) {
381         case SIOCINQ: {
382                 struct sk_buff *skb;
383                 unsigned long amount = 0;
384
385                 skb = skb_peek(&sk->sk_receive_queue);
386                 if (skb != NULL) {
387                         /*
388                          * We will only return the amount of this packet since
389                          * that is all that will be read.
390                          */
391                         amount = skb->len;
392                 }
393                 rc = put_user(amount, (int __user *)arg);
394         }
395                 break;
396         default:
397                 rc = -ENOIOCTLCMD;
398                 break;
399         }
400 out:
401         release_sock(sk);
402         return rc;
403 }
404
405 EXPORT_SYMBOL_GPL(dccp_ioctl);
406
407 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
408                                    char __user *optval, unsigned int optlen)
409 {
410         struct dccp_sock *dp = dccp_sk(sk);
411         struct dccp_service_list *sl = NULL;
412
413         if (service == DCCP_SERVICE_INVALID_VALUE ||
414             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
415                 return -EINVAL;
416
417         if (optlen > sizeof(service)) {
418                 sl = kmalloc(optlen, GFP_KERNEL);
419                 if (sl == NULL)
420                         return -ENOMEM;
421
422                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
423                 if (copy_from_user(sl->dccpsl_list,
424                                    optval + sizeof(service),
425                                    optlen - sizeof(service)) ||
426                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
427                         kfree(sl);
428                         return -EFAULT;
429                 }
430         }
431
432         lock_sock(sk);
433         dp->dccps_service = service;
434
435         kfree(dp->dccps_service_list);
436
437         dp->dccps_service_list = sl;
438         release_sock(sk);
439         return 0;
440 }
441
442 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
443 {
444         u8 *list, len;
445         int i, rc;
446
447         if (cscov < 0 || cscov > 15)
448                 return -EINVAL;
449         /*
450          * Populate a list of permissible values, in the range cscov...15. This
451          * is necessary since feature negotiation of single values only works if
452          * both sides incidentally choose the same value. Since the list starts
453          * lowest-value first, negotiation will pick the smallest shared value.
454          */
455         if (cscov == 0)
456                 return 0;
457         len = 16 - cscov;
458
459         list = kmalloc(len, GFP_KERNEL);
460         if (list == NULL)
461                 return -ENOBUFS;
462
463         for (i = 0; i < len; i++)
464                 list[i] = cscov++;
465
466         rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
467
468         if (rc == 0) {
469                 if (rx)
470                         dccp_sk(sk)->dccps_pcrlen = cscov;
471                 else
472                         dccp_sk(sk)->dccps_pcslen = cscov;
473         }
474         kfree(list);
475         return rc;
476 }
477
478 static int dccp_setsockopt_ccid(struct sock *sk, int type,
479                                 char __user *optval, unsigned int optlen)
480 {
481         u8 *val;
482         int rc = 0;
483
484         if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
485                 return -EINVAL;
486
487         val = memdup_user(optval, optlen);
488         if (IS_ERR(val))
489                 return PTR_ERR(val);
490
491         lock_sock(sk);
492         if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
493                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
494
495         if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
496                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
497         release_sock(sk);
498
499         kfree(val);
500         return rc;
501 }
502
503 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
504                 char __user *optval, unsigned int optlen)
505 {
506         struct dccp_sock *dp = dccp_sk(sk);
507         int val, err = 0;
508
509         switch (optname) {
510         case DCCP_SOCKOPT_PACKET_SIZE:
511                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
512                 return 0;
513         case DCCP_SOCKOPT_CHANGE_L:
514         case DCCP_SOCKOPT_CHANGE_R:
515                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
516                 return 0;
517         case DCCP_SOCKOPT_CCID:
518         case DCCP_SOCKOPT_RX_CCID:
519         case DCCP_SOCKOPT_TX_CCID:
520                 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
521         }
522
523         if (optlen < (int)sizeof(int))
524                 return -EINVAL;
525
526         if (get_user(val, (int __user *)optval))
527                 return -EFAULT;
528
529         if (optname == DCCP_SOCKOPT_SERVICE)
530                 return dccp_setsockopt_service(sk, val, optval, optlen);
531
532         lock_sock(sk);
533         switch (optname) {
534         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
535                 if (dp->dccps_role != DCCP_ROLE_SERVER)
536                         err = -EOPNOTSUPP;
537                 else
538                         dp->dccps_server_timewait = (val != 0);
539                 break;
540         case DCCP_SOCKOPT_SEND_CSCOV:
541                 err = dccp_setsockopt_cscov(sk, val, false);
542                 break;
543         case DCCP_SOCKOPT_RECV_CSCOV:
544                 err = dccp_setsockopt_cscov(sk, val, true);
545                 break;
546         case DCCP_SOCKOPT_QPOLICY_ID:
547                 if (sk->sk_state != DCCP_CLOSED)
548                         err = -EISCONN;
549                 else if (val < 0 || val >= DCCPQ_POLICY_MAX)
550                         err = -EINVAL;
551                 else
552                         dp->dccps_qpolicy = val;
553                 break;
554         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
555                 if (val < 0)
556                         err = -EINVAL;
557                 else
558                         dp->dccps_tx_qlen = val;
559                 break;
560         default:
561                 err = -ENOPROTOOPT;
562                 break;
563         }
564         release_sock(sk);
565
566         return err;
567 }
568
569 int dccp_setsockopt(struct sock *sk, int level, int optname,
570                     char __user *optval, unsigned int optlen)
571 {
572         if (level != SOL_DCCP)
573                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
574                                                              optname, optval,
575                                                              optlen);
576         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
577 }
578
579 EXPORT_SYMBOL_GPL(dccp_setsockopt);
580
581 #ifdef CONFIG_COMPAT
582 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
583                            char __user *optval, unsigned int optlen)
584 {
585         if (level != SOL_DCCP)
586                 return inet_csk_compat_setsockopt(sk, level, optname,
587                                                   optval, optlen);
588         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
589 }
590
591 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
592 #endif
593
594 static int dccp_getsockopt_service(struct sock *sk, int len,
595                                    __be32 __user *optval,
596                                    int __user *optlen)
597 {
598         const struct dccp_sock *dp = dccp_sk(sk);
599         const struct dccp_service_list *sl;
600         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
601
602         lock_sock(sk);
603         if ((sl = dp->dccps_service_list) != NULL) {
604                 slen = sl->dccpsl_nr * sizeof(u32);
605                 total_len += slen;
606         }
607
608         err = -EINVAL;
609         if (total_len > len)
610                 goto out;
611
612         err = 0;
613         if (put_user(total_len, optlen) ||
614             put_user(dp->dccps_service, optval) ||
615             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
616                 err = -EFAULT;
617 out:
618         release_sock(sk);
619         return err;
620 }
621
622 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
623                     char __user *optval, int __user *optlen)
624 {
625         struct dccp_sock *dp;
626         int val, len;
627
628         if (get_user(len, optlen))
629                 return -EFAULT;
630
631         if (len < (int)sizeof(int))
632                 return -EINVAL;
633
634         dp = dccp_sk(sk);
635
636         switch (optname) {
637         case DCCP_SOCKOPT_PACKET_SIZE:
638                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
639                 return 0;
640         case DCCP_SOCKOPT_SERVICE:
641                 return dccp_getsockopt_service(sk, len,
642                                                (__be32 __user *)optval, optlen);
643         case DCCP_SOCKOPT_GET_CUR_MPS:
644                 val = dp->dccps_mss_cache;
645                 break;
646         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
647                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
648         case DCCP_SOCKOPT_TX_CCID:
649                 val = ccid_get_current_tx_ccid(dp);
650                 if (val < 0)
651                         return -ENOPROTOOPT;
652                 break;
653         case DCCP_SOCKOPT_RX_CCID:
654                 val = ccid_get_current_rx_ccid(dp);
655                 if (val < 0)
656                         return -ENOPROTOOPT;
657                 break;
658         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
659                 val = dp->dccps_server_timewait;
660                 break;
661         case DCCP_SOCKOPT_SEND_CSCOV:
662                 val = dp->dccps_pcslen;
663                 break;
664         case DCCP_SOCKOPT_RECV_CSCOV:
665                 val = dp->dccps_pcrlen;
666                 break;
667         case DCCP_SOCKOPT_QPOLICY_ID:
668                 val = dp->dccps_qpolicy;
669                 break;
670         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
671                 val = dp->dccps_tx_qlen;
672                 break;
673         case 128 ... 191:
674                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
675                                              len, (u32 __user *)optval, optlen);
676         case 192 ... 255:
677                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
678                                              len, (u32 __user *)optval, optlen);
679         default:
680                 return -ENOPROTOOPT;
681         }
682
683         len = sizeof(val);
684         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
685                 return -EFAULT;
686
687         return 0;
688 }
689
690 int dccp_getsockopt(struct sock *sk, int level, int optname,
691                     char __user *optval, int __user *optlen)
692 {
693         if (level != SOL_DCCP)
694                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
695                                                              optname, optval,
696                                                              optlen);
697         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
698 }
699
700 EXPORT_SYMBOL_GPL(dccp_getsockopt);
701
702 #ifdef CONFIG_COMPAT
703 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
704                            char __user *optval, int __user *optlen)
705 {
706         if (level != SOL_DCCP)
707                 return inet_csk_compat_getsockopt(sk, level, optname,
708                                                   optval, optlen);
709         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
710 }
711
712 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
713 #endif
714
715 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
716 {
717         struct cmsghdr *cmsg;
718
719         /*
720          * Assign an (opaque) qpolicy priority value to skb->priority.
721          *
722          * We are overloading this skb field for use with the qpolicy subystem.
723          * The skb->priority is normally used for the SO_PRIORITY option, which
724          * is initialised from sk_priority. Since the assignment of sk_priority
725          * to skb->priority happens later (on layer 3), we overload this field
726          * for use with queueing priorities as long as the skb is on layer 4.
727          * The default priority value (if nothing is set) is 0.
728          */
729         skb->priority = 0;
730
731         for_each_cmsghdr(cmsg, msg) {
732                 if (!CMSG_OK(msg, cmsg))
733                         return -EINVAL;
734
735                 if (cmsg->cmsg_level != SOL_DCCP)
736                         continue;
737
738                 if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
739                     !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
740                         return -EINVAL;
741
742                 switch (cmsg->cmsg_type) {
743                 case DCCP_SCM_PRIORITY:
744                         if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
745                                 return -EINVAL;
746                         skb->priority = *(__u32 *)CMSG_DATA(cmsg);
747                         break;
748                 default:
749                         return -EINVAL;
750                 }
751         }
752         return 0;
753 }
754
755 int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
756 {
757         const struct dccp_sock *dp = dccp_sk(sk);
758         const int flags = msg->msg_flags;
759         const int noblock = flags & MSG_DONTWAIT;
760         struct sk_buff *skb;
761         int rc, size;
762         long timeo;
763
764         trace_dccp_probe(sk, len);
765
766         if (len > dp->dccps_mss_cache)
767                 return -EMSGSIZE;
768
769         lock_sock(sk);
770
771         if (dccp_qpolicy_full(sk)) {
772                 rc = -EAGAIN;
773                 goto out_release;
774         }
775
776         timeo = sock_sndtimeo(sk, noblock);
777
778         /*
779          * We have to use sk_stream_wait_connect here to set sk_write_pending,
780          * so that the trick in dccp_rcv_request_sent_state_process.
781          */
782         /* Wait for a connection to finish. */
783         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
784                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
785                         goto out_release;
786
787         size = sk->sk_prot->max_header + len;
788         release_sock(sk);
789         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
790         lock_sock(sk);
791         if (skb == NULL)
792                 goto out_release;
793
794         if (sk->sk_state == DCCP_CLOSED) {
795                 rc = -ENOTCONN;
796                 goto out_discard;
797         }
798
799         skb_reserve(skb, sk->sk_prot->max_header);
800         rc = memcpy_from_msg(skb_put(skb, len), msg, len);
801         if (rc != 0)
802                 goto out_discard;
803
804         rc = dccp_msghdr_parse(msg, skb);
805         if (rc != 0)
806                 goto out_discard;
807
808         dccp_qpolicy_push(sk, skb);
809         /*
810          * The xmit_timer is set if the TX CCID is rate-based and will expire
811          * when congestion control permits to release further packets into the
812          * network. Window-based CCIDs do not use this timer.
813          */
814         if (!timer_pending(&dp->dccps_xmit_timer))
815                 dccp_write_xmit(sk);
816 out_release:
817         release_sock(sk);
818         return rc ? : len;
819 out_discard:
820         kfree_skb(skb);
821         goto out_release;
822 }
823
824 EXPORT_SYMBOL_GPL(dccp_sendmsg);
825
826 int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
827                  int flags, int *addr_len)
828 {
829         const struct dccp_hdr *dh;
830         long timeo;
831
832         lock_sock(sk);
833
834         if (sk->sk_state == DCCP_LISTEN) {
835                 len = -ENOTCONN;
836                 goto out;
837         }
838
839         timeo = sock_rcvtimeo(sk, nonblock);
840
841         do {
842                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
843
844                 if (skb == NULL)
845                         goto verify_sock_status;
846
847                 dh = dccp_hdr(skb);
848
849                 switch (dh->dccph_type) {
850                 case DCCP_PKT_DATA:
851                 case DCCP_PKT_DATAACK:
852                         goto found_ok_skb;
853
854                 case DCCP_PKT_CLOSE:
855                 case DCCP_PKT_CLOSEREQ:
856                         if (!(flags & MSG_PEEK))
857                                 dccp_finish_passive_close(sk);
858                         /* fall through */
859                 case DCCP_PKT_RESET:
860                         dccp_pr_debug("found fin (%s) ok!\n",
861                                       dccp_packet_name(dh->dccph_type));
862                         len = 0;
863                         goto found_fin_ok;
864                 default:
865                         dccp_pr_debug("packet_type=%s\n",
866                                       dccp_packet_name(dh->dccph_type));
867                         sk_eat_skb(sk, skb);
868                 }
869 verify_sock_status:
870                 if (sock_flag(sk, SOCK_DONE)) {
871                         len = 0;
872                         break;
873                 }
874
875                 if (sk->sk_err) {
876                         len = sock_error(sk);
877                         break;
878                 }
879
880                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
881                         len = 0;
882                         break;
883                 }
884
885                 if (sk->sk_state == DCCP_CLOSED) {
886                         if (!sock_flag(sk, SOCK_DONE)) {
887                                 /* This occurs when user tries to read
888                                  * from never connected socket.
889                                  */
890                                 len = -ENOTCONN;
891                                 break;
892                         }
893                         len = 0;
894                         break;
895                 }
896
897                 if (!timeo) {
898                         len = -EAGAIN;
899                         break;
900                 }
901
902                 if (signal_pending(current)) {
903                         len = sock_intr_errno(timeo);
904                         break;
905                 }
906
907                 sk_wait_data(sk, &timeo, NULL);
908                 continue;
909         found_ok_skb:
910                 if (len > skb->len)
911                         len = skb->len;
912                 else if (len < skb->len)
913                         msg->msg_flags |= MSG_TRUNC;
914
915                 if (skb_copy_datagram_msg(skb, 0, msg, len)) {
916                         /* Exception. Bailout! */
917                         len = -EFAULT;
918                         break;
919                 }
920                 if (flags & MSG_TRUNC)
921                         len = skb->len;
922         found_fin_ok:
923                 if (!(flags & MSG_PEEK))
924                         sk_eat_skb(sk, skb);
925                 break;
926         } while (1);
927 out:
928         release_sock(sk);
929         return len;
930 }
931
932 EXPORT_SYMBOL_GPL(dccp_recvmsg);
933
934 int inet_dccp_listen(struct socket *sock, int backlog)
935 {
936         struct sock *sk = sock->sk;
937         unsigned char old_state;
938         int err;
939
940         lock_sock(sk);
941
942         err = -EINVAL;
943         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
944                 goto out;
945
946         old_state = sk->sk_state;
947         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
948                 goto out;
949
950         sk->sk_max_ack_backlog = backlog;
951         /* Really, if the socket is already in listen state
952          * we can only allow the backlog to be adjusted.
953          */
954         if (old_state != DCCP_LISTEN) {
955                 /*
956                  * FIXME: here it probably should be sk->sk_prot->listen_start
957                  * see tcp_listen_start
958                  */
959                 err = dccp_listen_start(sk, backlog);
960                 if (err)
961                         goto out;
962         }
963         err = 0;
964
965 out:
966         release_sock(sk);
967         return err;
968 }
969
970 EXPORT_SYMBOL_GPL(inet_dccp_listen);
971
972 static void dccp_terminate_connection(struct sock *sk)
973 {
974         u8 next_state = DCCP_CLOSED;
975
976         switch (sk->sk_state) {
977         case DCCP_PASSIVE_CLOSE:
978         case DCCP_PASSIVE_CLOSEREQ:
979                 dccp_finish_passive_close(sk);
980                 break;
981         case DCCP_PARTOPEN:
982                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
983                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
984                 /* fall through */
985         case DCCP_OPEN:
986                 dccp_send_close(sk, 1);
987
988                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
989                     !dccp_sk(sk)->dccps_server_timewait)
990                         next_state = DCCP_ACTIVE_CLOSEREQ;
991                 else
992                         next_state = DCCP_CLOSING;
993                 /* fall through */
994         default:
995                 dccp_set_state(sk, next_state);
996         }
997 }
998
999 void dccp_close(struct sock *sk, long timeout)
1000 {
1001         struct dccp_sock *dp = dccp_sk(sk);
1002         struct sk_buff *skb;
1003         u32 data_was_unread = 0;
1004         int state;
1005
1006         lock_sock(sk);
1007
1008         sk->sk_shutdown = SHUTDOWN_MASK;
1009
1010         if (sk->sk_state == DCCP_LISTEN) {
1011                 dccp_set_state(sk, DCCP_CLOSED);
1012
1013                 /* Special case. */
1014                 inet_csk_listen_stop(sk);
1015
1016                 goto adjudge_to_death;
1017         }
1018
1019         sk_stop_timer(sk, &dp->dccps_xmit_timer);
1020
1021         /*
1022          * We need to flush the recv. buffs.  We do this only on the
1023          * descriptor close, not protocol-sourced closes, because the
1024           *reader process may not have drained the data yet!
1025          */
1026         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1027                 data_was_unread += skb->len;
1028                 __kfree_skb(skb);
1029         }
1030
1031         /* If socket has been already reset kill it. */
1032         if (sk->sk_state == DCCP_CLOSED)
1033                 goto adjudge_to_death;
1034
1035         if (data_was_unread) {
1036                 /* Unread data was tossed, send an appropriate Reset Code */
1037                 DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1038                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1039                 dccp_set_state(sk, DCCP_CLOSED);
1040         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1041                 /* Check zero linger _after_ checking for unread data. */
1042                 sk->sk_prot->disconnect(sk, 0);
1043         } else if (sk->sk_state != DCCP_CLOSED) {
1044                 /*
1045                  * Normal connection termination. May need to wait if there are
1046                  * still packets in the TX queue that are delayed by the CCID.
1047                  */
1048                 dccp_flush_write_queue(sk, &timeout);
1049                 dccp_terminate_connection(sk);
1050         }
1051
1052         /*
1053          * Flush write queue. This may be necessary in several cases:
1054          * - we have been closed by the peer but still have application data;
1055          * - abortive termination (unread data or zero linger time),
1056          * - normal termination but queue could not be flushed within time limit
1057          */
1058         __skb_queue_purge(&sk->sk_write_queue);
1059
1060         sk_stream_wait_close(sk, timeout);
1061
1062 adjudge_to_death:
1063         state = sk->sk_state;
1064         sock_hold(sk);
1065         sock_orphan(sk);
1066
1067         /*
1068          * It is the last release_sock in its life. It will remove backlog.
1069          */
1070         release_sock(sk);
1071         /*
1072          * Now socket is owned by kernel and we acquire BH lock
1073          * to finish close. No need to check for user refs.
1074          */
1075         local_bh_disable();
1076         bh_lock_sock(sk);
1077         WARN_ON(sock_owned_by_user(sk));
1078
1079         percpu_counter_inc(sk->sk_prot->orphan_count);
1080
1081         /* Have we already been destroyed by a softirq or backlog? */
1082         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1083                 goto out;
1084
1085         if (sk->sk_state == DCCP_CLOSED)
1086                 inet_csk_destroy_sock(sk);
1087
1088         /* Otherwise, socket is reprieved until protocol close. */
1089
1090 out:
1091         bh_unlock_sock(sk);
1092         local_bh_enable();
1093         sock_put(sk);
1094 }
1095
1096 EXPORT_SYMBOL_GPL(dccp_close);
1097
1098 void dccp_shutdown(struct sock *sk, int how)
1099 {
1100         dccp_pr_debug("called shutdown(%x)\n", how);
1101 }
1102
1103 EXPORT_SYMBOL_GPL(dccp_shutdown);
1104
1105 static inline int __init dccp_mib_init(void)
1106 {
1107         dccp_statistics = alloc_percpu(struct dccp_mib);
1108         if (!dccp_statistics)
1109                 return -ENOMEM;
1110         return 0;
1111 }
1112
1113 static inline void dccp_mib_exit(void)
1114 {
1115         free_percpu(dccp_statistics);
1116 }
1117
1118 static int thash_entries;
1119 module_param(thash_entries, int, 0444);
1120 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1121
1122 #ifdef CONFIG_IP_DCCP_DEBUG
1123 bool dccp_debug;
1124 module_param(dccp_debug, bool, 0644);
1125 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1126
1127 EXPORT_SYMBOL_GPL(dccp_debug);
1128 #endif
1129
1130 static int __init dccp_init(void)
1131 {
1132         unsigned long goal;
1133         unsigned long nr_pages = totalram_pages();
1134         int ehash_order, bhash_order, i;
1135         int rc;
1136
1137         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1138                      FIELD_SIZEOF(struct sk_buff, cb));
1139         rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
1140         if (rc)
1141                 goto out_fail;
1142         inet_hashinfo_init(&dccp_hashinfo);
1143         rc = inet_hashinfo2_init_mod(&dccp_hashinfo);
1144         if (rc)
1145                 goto out_fail;
1146         rc = -ENOBUFS;
1147         dccp_hashinfo.bind_bucket_cachep =
1148                 kmem_cache_create("dccp_bind_bucket",
1149                                   sizeof(struct inet_bind_bucket), 0,
1150                                   SLAB_HWCACHE_ALIGN, NULL);
1151         if (!dccp_hashinfo.bind_bucket_cachep)
1152                 goto out_free_percpu;
1153
1154         /*
1155          * Size and allocate the main established and bind bucket
1156          * hash tables.
1157          *
1158          * The methodology is similar to that of the buffer cache.
1159          */
1160         if (nr_pages >= (128 * 1024))
1161                 goal = nr_pages >> (21 - PAGE_SHIFT);
1162         else
1163                 goal = nr_pages >> (23 - PAGE_SHIFT);
1164
1165         if (thash_entries)
1166                 goal = (thash_entries *
1167                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1168         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1169                 ;
1170         do {
1171                 unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1172                                         sizeof(struct inet_ehash_bucket);
1173
1174                 while (hash_size & (hash_size - 1))
1175                         hash_size--;
1176                 dccp_hashinfo.ehash_mask = hash_size - 1;
1177                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1178                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1179         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1180
1181         if (!dccp_hashinfo.ehash) {
1182                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1183                 goto out_free_bind_bucket_cachep;
1184         }
1185
1186         for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
1187                 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1188
1189         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1190                         goto out_free_dccp_ehash;
1191
1192         bhash_order = ehash_order;
1193
1194         do {
1195                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1196                                         sizeof(struct inet_bind_hashbucket);
1197                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1198                     bhash_order > 0)
1199                         continue;
1200                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1201                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1202         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1203
1204         if (!dccp_hashinfo.bhash) {
1205                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1206                 goto out_free_dccp_locks;
1207         }
1208
1209         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1210                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1211                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1212         }
1213
1214         rc = dccp_mib_init();
1215         if (rc)
1216                 goto out_free_dccp_bhash;
1217
1218         rc = dccp_ackvec_init();
1219         if (rc)
1220                 goto out_free_dccp_mib;
1221
1222         rc = dccp_sysctl_init();
1223         if (rc)
1224                 goto out_ackvec_exit;
1225
1226         rc = ccid_initialize_builtins();
1227         if (rc)
1228                 goto out_sysctl_exit;
1229
1230         dccp_timestamping_init();
1231
1232         return 0;
1233
1234 out_sysctl_exit:
1235         dccp_sysctl_exit();
1236 out_ackvec_exit:
1237         dccp_ackvec_exit();
1238 out_free_dccp_mib:
1239         dccp_mib_exit();
1240 out_free_dccp_bhash:
1241         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1242 out_free_dccp_locks:
1243         inet_ehash_locks_free(&dccp_hashinfo);
1244 out_free_dccp_ehash:
1245         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1246 out_free_bind_bucket_cachep:
1247         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1248 out_free_percpu:
1249         percpu_counter_destroy(&dccp_orphan_count);
1250 out_fail:
1251         dccp_hashinfo.bhash = NULL;
1252         dccp_hashinfo.ehash = NULL;
1253         dccp_hashinfo.bind_bucket_cachep = NULL;
1254         return rc;
1255 }
1256
1257 static void __exit dccp_fini(void)
1258 {
1259         ccid_cleanup_builtins();
1260         dccp_mib_exit();
1261         free_pages((unsigned long)dccp_hashinfo.bhash,
1262                    get_order(dccp_hashinfo.bhash_size *
1263                              sizeof(struct inet_bind_hashbucket)));
1264         free_pages((unsigned long)dccp_hashinfo.ehash,
1265                    get_order((dccp_hashinfo.ehash_mask + 1) *
1266                              sizeof(struct inet_ehash_bucket)));
1267         inet_ehash_locks_free(&dccp_hashinfo);
1268         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1269         dccp_ackvec_exit();
1270         dccp_sysctl_exit();
1271         percpu_counter_destroy(&dccp_orphan_count);
1272 }
1273
1274 module_init(dccp_init);
1275 module_exit(dccp_fini);
1276
1277 MODULE_LICENSE("GPL");
1278 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1279 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");