Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
[platform/kernel/linux-starfive.git] / net / dccp / proto.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *  net/dccp/proto.c
4  *
5  *  An implementation of the DCCP protocol
6  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7  */
8
9 #include <linux/dccp.h>
10 #include <linux/module.h>
11 #include <linux/types.h>
12 #include <linux/sched.h>
13 #include <linux/kernel.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/if_arp.h>
18 #include <linux/init.h>
19 #include <linux/random.h>
20 #include <linux/slab.h>
21 #include <net/checksum.h>
22
23 #include <net/inet_sock.h>
24 #include <net/inet_common.h>
25 #include <net/sock.h>
26 #include <net/xfrm.h>
27
28 #include <asm/ioctls.h>
29 #include <linux/spinlock.h>
30 #include <linux/timer.h>
31 #include <linux/delay.h>
32 #include <linux/poll.h>
33
34 #include "ccid.h"
35 #include "dccp.h"
36 #include "feat.h"
37
38 #define CREATE_TRACE_POINTS
39 #include "trace.h"
40
41 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
42
43 EXPORT_SYMBOL_GPL(dccp_statistics);
44
45 struct percpu_counter dccp_orphan_count;
46 EXPORT_SYMBOL_GPL(dccp_orphan_count);
47
48 struct inet_hashinfo dccp_hashinfo;
49 EXPORT_SYMBOL_GPL(dccp_hashinfo);
50
51 /* the maximum queue length for tx in packets. 0 is no limit */
52 int sysctl_dccp_tx_qlen __read_mostly = 5;
53
54 #ifdef CONFIG_IP_DCCP_DEBUG
55 static const char *dccp_state_name(const int state)
56 {
57         static const char *const dccp_state_names[] = {
58         [DCCP_OPEN]             = "OPEN",
59         [DCCP_REQUESTING]       = "REQUESTING",
60         [DCCP_PARTOPEN]         = "PARTOPEN",
61         [DCCP_LISTEN]           = "LISTEN",
62         [DCCP_RESPOND]          = "RESPOND",
63         [DCCP_CLOSING]          = "CLOSING",
64         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
65         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
66         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
67         [DCCP_TIME_WAIT]        = "TIME_WAIT",
68         [DCCP_CLOSED]           = "CLOSED",
69         };
70
71         if (state >= DCCP_MAX_STATES)
72                 return "INVALID STATE!";
73         else
74                 return dccp_state_names[state];
75 }
76 #endif
77
78 void dccp_set_state(struct sock *sk, const int state)
79 {
80         const int oldstate = sk->sk_state;
81
82         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
83                       dccp_state_name(oldstate), dccp_state_name(state));
84         WARN_ON(state == oldstate);
85
86         switch (state) {
87         case DCCP_OPEN:
88                 if (oldstate != DCCP_OPEN)
89                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
90                 /* Client retransmits all Confirm options until entering OPEN */
91                 if (oldstate == DCCP_PARTOPEN)
92                         dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
93                 break;
94
95         case DCCP_CLOSED:
96                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
97                     oldstate == DCCP_CLOSING)
98                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
99
100                 sk->sk_prot->unhash(sk);
101                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
102                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
103                         inet_put_port(sk);
104                 fallthrough;
105         default:
106                 if (oldstate == DCCP_OPEN)
107                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
108         }
109
110         /* Change state AFTER socket is unhashed to avoid closed
111          * socket sitting in hash tables.
112          */
113         inet_sk_set_state(sk, state);
114 }
115
116 EXPORT_SYMBOL_GPL(dccp_set_state);
117
118 static void dccp_finish_passive_close(struct sock *sk)
119 {
120         switch (sk->sk_state) {
121         case DCCP_PASSIVE_CLOSE:
122                 /* Node (client or server) has received Close packet. */
123                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
124                 dccp_set_state(sk, DCCP_CLOSED);
125                 break;
126         case DCCP_PASSIVE_CLOSEREQ:
127                 /*
128                  * Client received CloseReq. We set the `active' flag so that
129                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
130                  */
131                 dccp_send_close(sk, 1);
132                 dccp_set_state(sk, DCCP_CLOSING);
133         }
134 }
135
136 void dccp_done(struct sock *sk)
137 {
138         dccp_set_state(sk, DCCP_CLOSED);
139         dccp_clear_xmit_timers(sk);
140
141         sk->sk_shutdown = SHUTDOWN_MASK;
142
143         if (!sock_flag(sk, SOCK_DEAD))
144                 sk->sk_state_change(sk);
145         else
146                 inet_csk_destroy_sock(sk);
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_done);
150
151 const char *dccp_packet_name(const int type)
152 {
153         static const char *const dccp_packet_names[] = {
154                 [DCCP_PKT_REQUEST]  = "REQUEST",
155                 [DCCP_PKT_RESPONSE] = "RESPONSE",
156                 [DCCP_PKT_DATA]     = "DATA",
157                 [DCCP_PKT_ACK]      = "ACK",
158                 [DCCP_PKT_DATAACK]  = "DATAACK",
159                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
160                 [DCCP_PKT_CLOSE]    = "CLOSE",
161                 [DCCP_PKT_RESET]    = "RESET",
162                 [DCCP_PKT_SYNC]     = "SYNC",
163                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
164         };
165
166         if (type >= DCCP_NR_PKT_TYPES)
167                 return "INVALID";
168         else
169                 return dccp_packet_names[type];
170 }
171
172 EXPORT_SYMBOL_GPL(dccp_packet_name);
173
174 static void dccp_sk_destruct(struct sock *sk)
175 {
176         struct dccp_sock *dp = dccp_sk(sk);
177
178         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
179         dp->dccps_hc_tx_ccid = NULL;
180         inet_sock_destruct(sk);
181 }
182
183 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
184 {
185         struct dccp_sock *dp = dccp_sk(sk);
186         struct inet_connection_sock *icsk = inet_csk(sk);
187
188         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
189         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
190         sk->sk_state            = DCCP_CLOSED;
191         sk->sk_write_space      = dccp_write_space;
192         sk->sk_destruct         = dccp_sk_destruct;
193         icsk->icsk_sync_mss     = dccp_sync_mss;
194         dp->dccps_mss_cache     = 536;
195         dp->dccps_rate_last     = jiffies;
196         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
197         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
198         dp->dccps_tx_qlen       = sysctl_dccp_tx_qlen;
199
200         dccp_init_xmit_timers(sk);
201
202         INIT_LIST_HEAD(&dp->dccps_featneg);
203         /* control socket doesn't need feat nego */
204         if (likely(ctl_sock_initialized))
205                 return dccp_feat_init(sk);
206         return 0;
207 }
208
209 EXPORT_SYMBOL_GPL(dccp_init_sock);
210
211 void dccp_destroy_sock(struct sock *sk)
212 {
213         struct dccp_sock *dp = dccp_sk(sk);
214
215         __skb_queue_purge(&sk->sk_write_queue);
216         if (sk->sk_send_head != NULL) {
217                 kfree_skb(sk->sk_send_head);
218                 sk->sk_send_head = NULL;
219         }
220
221         /* Clean up a referenced DCCP bind bucket. */
222         if (inet_csk(sk)->icsk_bind_hash != NULL)
223                 inet_put_port(sk);
224
225         kfree(dp->dccps_service_list);
226         dp->dccps_service_list = NULL;
227
228         if (dp->dccps_hc_rx_ackvec != NULL) {
229                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
230                 dp->dccps_hc_rx_ackvec = NULL;
231         }
232         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
233         dp->dccps_hc_rx_ccid = NULL;
234
235         /* clean up feature negotiation state */
236         dccp_feat_list_purge(&dp->dccps_featneg);
237 }
238
239 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
240
241 static inline int dccp_listen_start(struct sock *sk, int backlog)
242 {
243         struct dccp_sock *dp = dccp_sk(sk);
244
245         dp->dccps_role = DCCP_ROLE_LISTEN;
246         /* do not start to listen if feature negotiation setup fails */
247         if (dccp_feat_finalise_settings(dp))
248                 return -EPROTO;
249         return inet_csk_listen_start(sk, backlog);
250 }
251
252 static inline int dccp_need_reset(int state)
253 {
254         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
255                state != DCCP_REQUESTING;
256 }
257
258 int dccp_disconnect(struct sock *sk, int flags)
259 {
260         struct inet_connection_sock *icsk = inet_csk(sk);
261         struct inet_sock *inet = inet_sk(sk);
262         struct dccp_sock *dp = dccp_sk(sk);
263         const int old_state = sk->sk_state;
264
265         if (old_state != DCCP_CLOSED)
266                 dccp_set_state(sk, DCCP_CLOSED);
267
268         /*
269          * This corresponds to the ABORT function of RFC793, sec. 3.8
270          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
271          */
272         if (old_state == DCCP_LISTEN) {
273                 inet_csk_listen_stop(sk);
274         } else if (dccp_need_reset(old_state)) {
275                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
276                 sk->sk_err = ECONNRESET;
277         } else if (old_state == DCCP_REQUESTING)
278                 sk->sk_err = ECONNRESET;
279
280         dccp_clear_xmit_timers(sk);
281         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
282         dp->dccps_hc_rx_ccid = NULL;
283
284         __skb_queue_purge(&sk->sk_receive_queue);
285         __skb_queue_purge(&sk->sk_write_queue);
286         if (sk->sk_send_head != NULL) {
287                 __kfree_skb(sk->sk_send_head);
288                 sk->sk_send_head = NULL;
289         }
290
291         inet->inet_dport = 0;
292
293         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
294                 inet_reset_saddr(sk);
295
296         sk->sk_shutdown = 0;
297         sock_reset_flag(sk, SOCK_DONE);
298
299         icsk->icsk_backoff = 0;
300         inet_csk_delack_init(sk);
301         __sk_dst_reset(sk);
302
303         WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
304
305         sk_error_report(sk);
306         return 0;
307 }
308
309 EXPORT_SYMBOL_GPL(dccp_disconnect);
310
311 /*
312  *      Wait for a DCCP event.
313  *
314  *      Note that we don't need to lock the socket, as the upper poll layers
315  *      take care of normal races (between the test and the event) and we don't
316  *      go look at any of the socket buffers directly.
317  */
318 __poll_t dccp_poll(struct file *file, struct socket *sock,
319                        poll_table *wait)
320 {
321         __poll_t mask;
322         struct sock *sk = sock->sk;
323
324         sock_poll_wait(file, sock, wait);
325         if (sk->sk_state == DCCP_LISTEN)
326                 return inet_csk_listen_poll(sk);
327
328         /* Socket is not locked. We are protected from async events
329            by poll logic and correct handling of state changes
330            made by another threads is impossible in any case.
331          */
332
333         mask = 0;
334         if (sk->sk_err)
335                 mask = EPOLLERR;
336
337         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
338                 mask |= EPOLLHUP;
339         if (sk->sk_shutdown & RCV_SHUTDOWN)
340                 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
341
342         /* Connected? */
343         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
344                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
345                         mask |= EPOLLIN | EPOLLRDNORM;
346
347                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
348                         if (sk_stream_is_writeable(sk)) {
349                                 mask |= EPOLLOUT | EPOLLWRNORM;
350                         } else {  /* send SIGIO later */
351                                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
352                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
353
354                                 /* Race breaker. If space is freed after
355                                  * wspace test but before the flags are set,
356                                  * IO signal will be lost.
357                                  */
358                                 if (sk_stream_is_writeable(sk))
359                                         mask |= EPOLLOUT | EPOLLWRNORM;
360                         }
361                 }
362         }
363         return mask;
364 }
365
366 EXPORT_SYMBOL_GPL(dccp_poll);
367
368 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
369 {
370         int rc = -ENOTCONN;
371
372         lock_sock(sk);
373
374         if (sk->sk_state == DCCP_LISTEN)
375                 goto out;
376
377         switch (cmd) {
378         case SIOCOUTQ: {
379                 int amount = sk_wmem_alloc_get(sk);
380                 /* Using sk_wmem_alloc here because sk_wmem_queued is not used by DCCP and
381                  * always 0, comparably to UDP.
382                  */
383
384                 rc = put_user(amount, (int __user *)arg);
385         }
386                 break;
387         case SIOCINQ: {
388                 struct sk_buff *skb;
389                 unsigned long amount = 0;
390
391                 skb = skb_peek(&sk->sk_receive_queue);
392                 if (skb != NULL) {
393                         /*
394                          * We will only return the amount of this packet since
395                          * that is all that will be read.
396                          */
397                         amount = skb->len;
398                 }
399                 rc = put_user(amount, (int __user *)arg);
400         }
401                 break;
402         default:
403                 rc = -ENOIOCTLCMD;
404                 break;
405         }
406 out:
407         release_sock(sk);
408         return rc;
409 }
410
411 EXPORT_SYMBOL_GPL(dccp_ioctl);
412
413 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
414                                    sockptr_t optval, unsigned int optlen)
415 {
416         struct dccp_sock *dp = dccp_sk(sk);
417         struct dccp_service_list *sl = NULL;
418
419         if (service == DCCP_SERVICE_INVALID_VALUE ||
420             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
421                 return -EINVAL;
422
423         if (optlen > sizeof(service)) {
424                 sl = kmalloc(optlen, GFP_KERNEL);
425                 if (sl == NULL)
426                         return -ENOMEM;
427
428                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
429                 if (copy_from_sockptr_offset(sl->dccpsl_list, optval,
430                                 sizeof(service), optlen - sizeof(service)) ||
431                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
432                         kfree(sl);
433                         return -EFAULT;
434                 }
435         }
436
437         lock_sock(sk);
438         dp->dccps_service = service;
439
440         kfree(dp->dccps_service_list);
441
442         dp->dccps_service_list = sl;
443         release_sock(sk);
444         return 0;
445 }
446
447 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
448 {
449         u8 *list, len;
450         int i, rc;
451
452         if (cscov < 0 || cscov > 15)
453                 return -EINVAL;
454         /*
455          * Populate a list of permissible values, in the range cscov...15. This
456          * is necessary since feature negotiation of single values only works if
457          * both sides incidentally choose the same value. Since the list starts
458          * lowest-value first, negotiation will pick the smallest shared value.
459          */
460         if (cscov == 0)
461                 return 0;
462         len = 16 - cscov;
463
464         list = kmalloc(len, GFP_KERNEL);
465         if (list == NULL)
466                 return -ENOBUFS;
467
468         for (i = 0; i < len; i++)
469                 list[i] = cscov++;
470
471         rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
472
473         if (rc == 0) {
474                 if (rx)
475                         dccp_sk(sk)->dccps_pcrlen = cscov;
476                 else
477                         dccp_sk(sk)->dccps_pcslen = cscov;
478         }
479         kfree(list);
480         return rc;
481 }
482
483 static int dccp_setsockopt_ccid(struct sock *sk, int type,
484                                 sockptr_t optval, unsigned int optlen)
485 {
486         u8 *val;
487         int rc = 0;
488
489         if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
490                 return -EINVAL;
491
492         val = memdup_sockptr(optval, optlen);
493         if (IS_ERR(val))
494                 return PTR_ERR(val);
495
496         lock_sock(sk);
497         if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
498                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
499
500         if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
501                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
502         release_sock(sk);
503
504         kfree(val);
505         return rc;
506 }
507
508 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
509                 sockptr_t optval, unsigned int optlen)
510 {
511         struct dccp_sock *dp = dccp_sk(sk);
512         int val, err = 0;
513
514         switch (optname) {
515         case DCCP_SOCKOPT_PACKET_SIZE:
516                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
517                 return 0;
518         case DCCP_SOCKOPT_CHANGE_L:
519         case DCCP_SOCKOPT_CHANGE_R:
520                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
521                 return 0;
522         case DCCP_SOCKOPT_CCID:
523         case DCCP_SOCKOPT_RX_CCID:
524         case DCCP_SOCKOPT_TX_CCID:
525                 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
526         }
527
528         if (optlen < (int)sizeof(int))
529                 return -EINVAL;
530
531         if (copy_from_sockptr(&val, optval, sizeof(int)))
532                 return -EFAULT;
533
534         if (optname == DCCP_SOCKOPT_SERVICE)
535                 return dccp_setsockopt_service(sk, val, optval, optlen);
536
537         lock_sock(sk);
538         switch (optname) {
539         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
540                 if (dp->dccps_role != DCCP_ROLE_SERVER)
541                         err = -EOPNOTSUPP;
542                 else
543                         dp->dccps_server_timewait = (val != 0);
544                 break;
545         case DCCP_SOCKOPT_SEND_CSCOV:
546                 err = dccp_setsockopt_cscov(sk, val, false);
547                 break;
548         case DCCP_SOCKOPT_RECV_CSCOV:
549                 err = dccp_setsockopt_cscov(sk, val, true);
550                 break;
551         case DCCP_SOCKOPT_QPOLICY_ID:
552                 if (sk->sk_state != DCCP_CLOSED)
553                         err = -EISCONN;
554                 else if (val < 0 || val >= DCCPQ_POLICY_MAX)
555                         err = -EINVAL;
556                 else
557                         dp->dccps_qpolicy = val;
558                 break;
559         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
560                 if (val < 0)
561                         err = -EINVAL;
562                 else
563                         dp->dccps_tx_qlen = val;
564                 break;
565         default:
566                 err = -ENOPROTOOPT;
567                 break;
568         }
569         release_sock(sk);
570
571         return err;
572 }
573
574 int dccp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
575                     unsigned int optlen)
576 {
577         if (level != SOL_DCCP)
578                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
579                                                              optname, optval,
580                                                              optlen);
581         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
582 }
583
584 EXPORT_SYMBOL_GPL(dccp_setsockopt);
585
586 static int dccp_getsockopt_service(struct sock *sk, int len,
587                                    __be32 __user *optval,
588                                    int __user *optlen)
589 {
590         const struct dccp_sock *dp = dccp_sk(sk);
591         const struct dccp_service_list *sl;
592         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
593
594         lock_sock(sk);
595         if ((sl = dp->dccps_service_list) != NULL) {
596                 slen = sl->dccpsl_nr * sizeof(u32);
597                 total_len += slen;
598         }
599
600         err = -EINVAL;
601         if (total_len > len)
602                 goto out;
603
604         err = 0;
605         if (put_user(total_len, optlen) ||
606             put_user(dp->dccps_service, optval) ||
607             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
608                 err = -EFAULT;
609 out:
610         release_sock(sk);
611         return err;
612 }
613
614 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
615                     char __user *optval, int __user *optlen)
616 {
617         struct dccp_sock *dp;
618         int val, len;
619
620         if (get_user(len, optlen))
621                 return -EFAULT;
622
623         if (len < (int)sizeof(int))
624                 return -EINVAL;
625
626         dp = dccp_sk(sk);
627
628         switch (optname) {
629         case DCCP_SOCKOPT_PACKET_SIZE:
630                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
631                 return 0;
632         case DCCP_SOCKOPT_SERVICE:
633                 return dccp_getsockopt_service(sk, len,
634                                                (__be32 __user *)optval, optlen);
635         case DCCP_SOCKOPT_GET_CUR_MPS:
636                 val = dp->dccps_mss_cache;
637                 break;
638         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
639                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
640         case DCCP_SOCKOPT_TX_CCID:
641                 val = ccid_get_current_tx_ccid(dp);
642                 if (val < 0)
643                         return -ENOPROTOOPT;
644                 break;
645         case DCCP_SOCKOPT_RX_CCID:
646                 val = ccid_get_current_rx_ccid(dp);
647                 if (val < 0)
648                         return -ENOPROTOOPT;
649                 break;
650         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
651                 val = dp->dccps_server_timewait;
652                 break;
653         case DCCP_SOCKOPT_SEND_CSCOV:
654                 val = dp->dccps_pcslen;
655                 break;
656         case DCCP_SOCKOPT_RECV_CSCOV:
657                 val = dp->dccps_pcrlen;
658                 break;
659         case DCCP_SOCKOPT_QPOLICY_ID:
660                 val = dp->dccps_qpolicy;
661                 break;
662         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
663                 val = dp->dccps_tx_qlen;
664                 break;
665         case 128 ... 191:
666                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
667                                              len, (u32 __user *)optval, optlen);
668         case 192 ... 255:
669                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
670                                              len, (u32 __user *)optval, optlen);
671         default:
672                 return -ENOPROTOOPT;
673         }
674
675         len = sizeof(val);
676         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
677                 return -EFAULT;
678
679         return 0;
680 }
681
682 int dccp_getsockopt(struct sock *sk, int level, int optname,
683                     char __user *optval, int __user *optlen)
684 {
685         if (level != SOL_DCCP)
686                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
687                                                              optname, optval,
688                                                              optlen);
689         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
690 }
691
692 EXPORT_SYMBOL_GPL(dccp_getsockopt);
693
694 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
695 {
696         struct cmsghdr *cmsg;
697
698         /*
699          * Assign an (opaque) qpolicy priority value to skb->priority.
700          *
701          * We are overloading this skb field for use with the qpolicy subystem.
702          * The skb->priority is normally used for the SO_PRIORITY option, which
703          * is initialised from sk_priority. Since the assignment of sk_priority
704          * to skb->priority happens later (on layer 3), we overload this field
705          * for use with queueing priorities as long as the skb is on layer 4.
706          * The default priority value (if nothing is set) is 0.
707          */
708         skb->priority = 0;
709
710         for_each_cmsghdr(cmsg, msg) {
711                 if (!CMSG_OK(msg, cmsg))
712                         return -EINVAL;
713
714                 if (cmsg->cmsg_level != SOL_DCCP)
715                         continue;
716
717                 if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
718                     !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
719                         return -EINVAL;
720
721                 switch (cmsg->cmsg_type) {
722                 case DCCP_SCM_PRIORITY:
723                         if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
724                                 return -EINVAL;
725                         skb->priority = *(__u32 *)CMSG_DATA(cmsg);
726                         break;
727                 default:
728                         return -EINVAL;
729                 }
730         }
731         return 0;
732 }
733
734 int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
735 {
736         const struct dccp_sock *dp = dccp_sk(sk);
737         const int flags = msg->msg_flags;
738         const int noblock = flags & MSG_DONTWAIT;
739         struct sk_buff *skb;
740         int rc, size;
741         long timeo;
742
743         trace_dccp_probe(sk, len);
744
745         if (len > dp->dccps_mss_cache)
746                 return -EMSGSIZE;
747
748         lock_sock(sk);
749
750         if (dccp_qpolicy_full(sk)) {
751                 rc = -EAGAIN;
752                 goto out_release;
753         }
754
755         timeo = sock_sndtimeo(sk, noblock);
756
757         /*
758          * We have to use sk_stream_wait_connect here to set sk_write_pending,
759          * so that the trick in dccp_rcv_request_sent_state_process.
760          */
761         /* Wait for a connection to finish. */
762         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
763                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
764                         goto out_release;
765
766         size = sk->sk_prot->max_header + len;
767         release_sock(sk);
768         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
769         lock_sock(sk);
770         if (skb == NULL)
771                 goto out_release;
772
773         if (sk->sk_state == DCCP_CLOSED) {
774                 rc = -ENOTCONN;
775                 goto out_discard;
776         }
777
778         skb_reserve(skb, sk->sk_prot->max_header);
779         rc = memcpy_from_msg(skb_put(skb, len), msg, len);
780         if (rc != 0)
781                 goto out_discard;
782
783         rc = dccp_msghdr_parse(msg, skb);
784         if (rc != 0)
785                 goto out_discard;
786
787         dccp_qpolicy_push(sk, skb);
788         /*
789          * The xmit_timer is set if the TX CCID is rate-based and will expire
790          * when congestion control permits to release further packets into the
791          * network. Window-based CCIDs do not use this timer.
792          */
793         if (!timer_pending(&dp->dccps_xmit_timer))
794                 dccp_write_xmit(sk);
795 out_release:
796         release_sock(sk);
797         return rc ? : len;
798 out_discard:
799         kfree_skb(skb);
800         goto out_release;
801 }
802
803 EXPORT_SYMBOL_GPL(dccp_sendmsg);
804
805 int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
806                  int flags, int *addr_len)
807 {
808         const struct dccp_hdr *dh;
809         long timeo;
810
811         lock_sock(sk);
812
813         if (sk->sk_state == DCCP_LISTEN) {
814                 len = -ENOTCONN;
815                 goto out;
816         }
817
818         timeo = sock_rcvtimeo(sk, nonblock);
819
820         do {
821                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
822
823                 if (skb == NULL)
824                         goto verify_sock_status;
825
826                 dh = dccp_hdr(skb);
827
828                 switch (dh->dccph_type) {
829                 case DCCP_PKT_DATA:
830                 case DCCP_PKT_DATAACK:
831                         goto found_ok_skb;
832
833                 case DCCP_PKT_CLOSE:
834                 case DCCP_PKT_CLOSEREQ:
835                         if (!(flags & MSG_PEEK))
836                                 dccp_finish_passive_close(sk);
837                         fallthrough;
838                 case DCCP_PKT_RESET:
839                         dccp_pr_debug("found fin (%s) ok!\n",
840                                       dccp_packet_name(dh->dccph_type));
841                         len = 0;
842                         goto found_fin_ok;
843                 default:
844                         dccp_pr_debug("packet_type=%s\n",
845                                       dccp_packet_name(dh->dccph_type));
846                         sk_eat_skb(sk, skb);
847                 }
848 verify_sock_status:
849                 if (sock_flag(sk, SOCK_DONE)) {
850                         len = 0;
851                         break;
852                 }
853
854                 if (sk->sk_err) {
855                         len = sock_error(sk);
856                         break;
857                 }
858
859                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
860                         len = 0;
861                         break;
862                 }
863
864                 if (sk->sk_state == DCCP_CLOSED) {
865                         if (!sock_flag(sk, SOCK_DONE)) {
866                                 /* This occurs when user tries to read
867                                  * from never connected socket.
868                                  */
869                                 len = -ENOTCONN;
870                                 break;
871                         }
872                         len = 0;
873                         break;
874                 }
875
876                 if (!timeo) {
877                         len = -EAGAIN;
878                         break;
879                 }
880
881                 if (signal_pending(current)) {
882                         len = sock_intr_errno(timeo);
883                         break;
884                 }
885
886                 sk_wait_data(sk, &timeo, NULL);
887                 continue;
888         found_ok_skb:
889                 if (len > skb->len)
890                         len = skb->len;
891                 else if (len < skb->len)
892                         msg->msg_flags |= MSG_TRUNC;
893
894                 if (skb_copy_datagram_msg(skb, 0, msg, len)) {
895                         /* Exception. Bailout! */
896                         len = -EFAULT;
897                         break;
898                 }
899                 if (flags & MSG_TRUNC)
900                         len = skb->len;
901         found_fin_ok:
902                 if (!(flags & MSG_PEEK))
903                         sk_eat_skb(sk, skb);
904                 break;
905         } while (1);
906 out:
907         release_sock(sk);
908         return len;
909 }
910
911 EXPORT_SYMBOL_GPL(dccp_recvmsg);
912
913 int inet_dccp_listen(struct socket *sock, int backlog)
914 {
915         struct sock *sk = sock->sk;
916         unsigned char old_state;
917         int err;
918
919         lock_sock(sk);
920
921         err = -EINVAL;
922         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
923                 goto out;
924
925         old_state = sk->sk_state;
926         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
927                 goto out;
928
929         WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
930         /* Really, if the socket is already in listen state
931          * we can only allow the backlog to be adjusted.
932          */
933         if (old_state != DCCP_LISTEN) {
934                 /*
935                  * FIXME: here it probably should be sk->sk_prot->listen_start
936                  * see tcp_listen_start
937                  */
938                 err = dccp_listen_start(sk, backlog);
939                 if (err)
940                         goto out;
941         }
942         err = 0;
943
944 out:
945         release_sock(sk);
946         return err;
947 }
948
949 EXPORT_SYMBOL_GPL(inet_dccp_listen);
950
951 static void dccp_terminate_connection(struct sock *sk)
952 {
953         u8 next_state = DCCP_CLOSED;
954
955         switch (sk->sk_state) {
956         case DCCP_PASSIVE_CLOSE:
957         case DCCP_PASSIVE_CLOSEREQ:
958                 dccp_finish_passive_close(sk);
959                 break;
960         case DCCP_PARTOPEN:
961                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
962                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
963                 fallthrough;
964         case DCCP_OPEN:
965                 dccp_send_close(sk, 1);
966
967                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
968                     !dccp_sk(sk)->dccps_server_timewait)
969                         next_state = DCCP_ACTIVE_CLOSEREQ;
970                 else
971                         next_state = DCCP_CLOSING;
972                 fallthrough;
973         default:
974                 dccp_set_state(sk, next_state);
975         }
976 }
977
978 void dccp_close(struct sock *sk, long timeout)
979 {
980         struct dccp_sock *dp = dccp_sk(sk);
981         struct sk_buff *skb;
982         u32 data_was_unread = 0;
983         int state;
984
985         lock_sock(sk);
986
987         sk->sk_shutdown = SHUTDOWN_MASK;
988
989         if (sk->sk_state == DCCP_LISTEN) {
990                 dccp_set_state(sk, DCCP_CLOSED);
991
992                 /* Special case. */
993                 inet_csk_listen_stop(sk);
994
995                 goto adjudge_to_death;
996         }
997
998         sk_stop_timer(sk, &dp->dccps_xmit_timer);
999
1000         /*
1001          * We need to flush the recv. buffs.  We do this only on the
1002          * descriptor close, not protocol-sourced closes, because the
1003           *reader process may not have drained the data yet!
1004          */
1005         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1006                 data_was_unread += skb->len;
1007                 __kfree_skb(skb);
1008         }
1009
1010         /* If socket has been already reset kill it. */
1011         if (sk->sk_state == DCCP_CLOSED)
1012                 goto adjudge_to_death;
1013
1014         if (data_was_unread) {
1015                 /* Unread data was tossed, send an appropriate Reset Code */
1016                 DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1017                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1018                 dccp_set_state(sk, DCCP_CLOSED);
1019         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1020                 /* Check zero linger _after_ checking for unread data. */
1021                 sk->sk_prot->disconnect(sk, 0);
1022         } else if (sk->sk_state != DCCP_CLOSED) {
1023                 /*
1024                  * Normal connection termination. May need to wait if there are
1025                  * still packets in the TX queue that are delayed by the CCID.
1026                  */
1027                 dccp_flush_write_queue(sk, &timeout);
1028                 dccp_terminate_connection(sk);
1029         }
1030
1031         /*
1032          * Flush write queue. This may be necessary in several cases:
1033          * - we have been closed by the peer but still have application data;
1034          * - abortive termination (unread data or zero linger time),
1035          * - normal termination but queue could not be flushed within time limit
1036          */
1037         __skb_queue_purge(&sk->sk_write_queue);
1038
1039         sk_stream_wait_close(sk, timeout);
1040
1041 adjudge_to_death:
1042         state = sk->sk_state;
1043         sock_hold(sk);
1044         sock_orphan(sk);
1045
1046         /*
1047          * It is the last release_sock in its life. It will remove backlog.
1048          */
1049         release_sock(sk);
1050         /*
1051          * Now socket is owned by kernel and we acquire BH lock
1052          * to finish close. No need to check for user refs.
1053          */
1054         local_bh_disable();
1055         bh_lock_sock(sk);
1056         WARN_ON(sock_owned_by_user(sk));
1057
1058         percpu_counter_inc(sk->sk_prot->orphan_count);
1059
1060         /* Have we already been destroyed by a softirq or backlog? */
1061         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1062                 goto out;
1063
1064         if (sk->sk_state == DCCP_CLOSED)
1065                 inet_csk_destroy_sock(sk);
1066
1067         /* Otherwise, socket is reprieved until protocol close. */
1068
1069 out:
1070         bh_unlock_sock(sk);
1071         local_bh_enable();
1072         sock_put(sk);
1073 }
1074
1075 EXPORT_SYMBOL_GPL(dccp_close);
1076
1077 void dccp_shutdown(struct sock *sk, int how)
1078 {
1079         dccp_pr_debug("called shutdown(%x)\n", how);
1080 }
1081
1082 EXPORT_SYMBOL_GPL(dccp_shutdown);
1083
1084 static inline int __init dccp_mib_init(void)
1085 {
1086         dccp_statistics = alloc_percpu(struct dccp_mib);
1087         if (!dccp_statistics)
1088                 return -ENOMEM;
1089         return 0;
1090 }
1091
1092 static inline void dccp_mib_exit(void)
1093 {
1094         free_percpu(dccp_statistics);
1095 }
1096
1097 static int thash_entries;
1098 module_param(thash_entries, int, 0444);
1099 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1100
1101 #ifdef CONFIG_IP_DCCP_DEBUG
1102 bool dccp_debug;
1103 module_param(dccp_debug, bool, 0644);
1104 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1105
1106 EXPORT_SYMBOL_GPL(dccp_debug);
1107 #endif
1108
1109 static int __init dccp_init(void)
1110 {
1111         unsigned long goal;
1112         unsigned long nr_pages = totalram_pages();
1113         int ehash_order, bhash_order, i;
1114         int rc;
1115
1116         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1117                      sizeof_field(struct sk_buff, cb));
1118         rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
1119         if (rc)
1120                 goto out_fail;
1121         inet_hashinfo_init(&dccp_hashinfo);
1122         rc = inet_hashinfo2_init_mod(&dccp_hashinfo);
1123         if (rc)
1124                 goto out_free_percpu;
1125         rc = -ENOBUFS;
1126         dccp_hashinfo.bind_bucket_cachep =
1127                 kmem_cache_create("dccp_bind_bucket",
1128                                   sizeof(struct inet_bind_bucket), 0,
1129                                   SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
1130         if (!dccp_hashinfo.bind_bucket_cachep)
1131                 goto out_free_hashinfo2;
1132
1133         /*
1134          * Size and allocate the main established and bind bucket
1135          * hash tables.
1136          *
1137          * The methodology is similar to that of the buffer cache.
1138          */
1139         if (nr_pages >= (128 * 1024))
1140                 goal = nr_pages >> (21 - PAGE_SHIFT);
1141         else
1142                 goal = nr_pages >> (23 - PAGE_SHIFT);
1143
1144         if (thash_entries)
1145                 goal = (thash_entries *
1146                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1147         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1148                 ;
1149         do {
1150                 unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1151                                         sizeof(struct inet_ehash_bucket);
1152
1153                 while (hash_size & (hash_size - 1))
1154                         hash_size--;
1155                 dccp_hashinfo.ehash_mask = hash_size - 1;
1156                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1157                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1158         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1159
1160         if (!dccp_hashinfo.ehash) {
1161                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1162                 goto out_free_bind_bucket_cachep;
1163         }
1164
1165         for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
1166                 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1167
1168         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1169                         goto out_free_dccp_ehash;
1170
1171         bhash_order = ehash_order;
1172
1173         do {
1174                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1175                                         sizeof(struct inet_bind_hashbucket);
1176                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1177                     bhash_order > 0)
1178                         continue;
1179                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1180                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1181         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1182
1183         if (!dccp_hashinfo.bhash) {
1184                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1185                 goto out_free_dccp_locks;
1186         }
1187
1188         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1189                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1190                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1191         }
1192
1193         rc = dccp_mib_init();
1194         if (rc)
1195                 goto out_free_dccp_bhash;
1196
1197         rc = dccp_ackvec_init();
1198         if (rc)
1199                 goto out_free_dccp_mib;
1200
1201         rc = dccp_sysctl_init();
1202         if (rc)
1203                 goto out_ackvec_exit;
1204
1205         rc = ccid_initialize_builtins();
1206         if (rc)
1207                 goto out_sysctl_exit;
1208
1209         dccp_timestamping_init();
1210
1211         return 0;
1212
1213 out_sysctl_exit:
1214         dccp_sysctl_exit();
1215 out_ackvec_exit:
1216         dccp_ackvec_exit();
1217 out_free_dccp_mib:
1218         dccp_mib_exit();
1219 out_free_dccp_bhash:
1220         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1221 out_free_dccp_locks:
1222         inet_ehash_locks_free(&dccp_hashinfo);
1223 out_free_dccp_ehash:
1224         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1225 out_free_bind_bucket_cachep:
1226         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1227 out_free_hashinfo2:
1228         inet_hashinfo2_free_mod(&dccp_hashinfo);
1229 out_free_percpu:
1230         percpu_counter_destroy(&dccp_orphan_count);
1231 out_fail:
1232         dccp_hashinfo.bhash = NULL;
1233         dccp_hashinfo.ehash = NULL;
1234         dccp_hashinfo.bind_bucket_cachep = NULL;
1235         return rc;
1236 }
1237
1238 static void __exit dccp_fini(void)
1239 {
1240         ccid_cleanup_builtins();
1241         dccp_mib_exit();
1242         free_pages((unsigned long)dccp_hashinfo.bhash,
1243                    get_order(dccp_hashinfo.bhash_size *
1244                              sizeof(struct inet_bind_hashbucket)));
1245         free_pages((unsigned long)dccp_hashinfo.ehash,
1246                    get_order((dccp_hashinfo.ehash_mask + 1) *
1247                              sizeof(struct inet_ehash_bucket)));
1248         inet_ehash_locks_free(&dccp_hashinfo);
1249         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1250         dccp_ackvec_exit();
1251         dccp_sysctl_exit();
1252         inet_hashinfo2_free_mod(&dccp_hashinfo);
1253         percpu_counter_destroy(&dccp_orphan_count);
1254 }
1255
1256 module_init(dccp_init);
1257 module_exit(dccp_fini);
1258
1259 MODULE_LICENSE("GPL");
1260 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1261 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");