Merge branch 'remove-RTO_ONLINK-users'
[platform/kernel/linux-rpi.git] / net / dccp / proto.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *  net/dccp/proto.c
4  *
5  *  An implementation of the DCCP protocol
6  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7  */
8
9 #include <linux/dccp.h>
10 #include <linux/module.h>
11 #include <linux/types.h>
12 #include <linux/sched.h>
13 #include <linux/kernel.h>
14 #include <linux/skbuff.h>
15 #include <linux/netdevice.h>
16 #include <linux/in.h>
17 #include <linux/if_arp.h>
18 #include <linux/init.h>
19 #include <linux/random.h>
20 #include <linux/slab.h>
21 #include <net/checksum.h>
22
23 #include <net/inet_sock.h>
24 #include <net/inet_common.h>
25 #include <net/sock.h>
26 #include <net/xfrm.h>
27
28 #include <asm/ioctls.h>
29 #include <linux/spinlock.h>
30 #include <linux/timer.h>
31 #include <linux/delay.h>
32 #include <linux/poll.h>
33
34 #include "ccid.h"
35 #include "dccp.h"
36 #include "feat.h"
37
38 #define CREATE_TRACE_POINTS
39 #include "trace.h"
40
41 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
42
43 EXPORT_SYMBOL_GPL(dccp_statistics);
44
45 DEFINE_PER_CPU(unsigned int, dccp_orphan_count);
46 EXPORT_PER_CPU_SYMBOL_GPL(dccp_orphan_count);
47
48 struct inet_hashinfo dccp_hashinfo;
49 EXPORT_SYMBOL_GPL(dccp_hashinfo);
50
51 /* the maximum queue length for tx in packets. 0 is no limit */
52 int sysctl_dccp_tx_qlen __read_mostly = 5;
53
54 #ifdef CONFIG_IP_DCCP_DEBUG
55 static const char *dccp_state_name(const int state)
56 {
57         static const char *const dccp_state_names[] = {
58         [DCCP_OPEN]             = "OPEN",
59         [DCCP_REQUESTING]       = "REQUESTING",
60         [DCCP_PARTOPEN]         = "PARTOPEN",
61         [DCCP_LISTEN]           = "LISTEN",
62         [DCCP_RESPOND]          = "RESPOND",
63         [DCCP_CLOSING]          = "CLOSING",
64         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
65         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
66         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
67         [DCCP_TIME_WAIT]        = "TIME_WAIT",
68         [DCCP_CLOSED]           = "CLOSED",
69         };
70
71         if (state >= DCCP_MAX_STATES)
72                 return "INVALID STATE!";
73         else
74                 return dccp_state_names[state];
75 }
76 #endif
77
78 void dccp_set_state(struct sock *sk, const int state)
79 {
80         const int oldstate = sk->sk_state;
81
82         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
83                       dccp_state_name(oldstate), dccp_state_name(state));
84         WARN_ON(state == oldstate);
85
86         switch (state) {
87         case DCCP_OPEN:
88                 if (oldstate != DCCP_OPEN)
89                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
90                 /* Client retransmits all Confirm options until entering OPEN */
91                 if (oldstate == DCCP_PARTOPEN)
92                         dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
93                 break;
94
95         case DCCP_CLOSED:
96                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
97                     oldstate == DCCP_CLOSING)
98                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
99
100                 sk->sk_prot->unhash(sk);
101                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
102                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
103                         inet_put_port(sk);
104                 fallthrough;
105         default:
106                 if (oldstate == DCCP_OPEN)
107                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
108         }
109
110         /* Change state AFTER socket is unhashed to avoid closed
111          * socket sitting in hash tables.
112          */
113         inet_sk_set_state(sk, state);
114 }
115
116 EXPORT_SYMBOL_GPL(dccp_set_state);
117
118 static void dccp_finish_passive_close(struct sock *sk)
119 {
120         switch (sk->sk_state) {
121         case DCCP_PASSIVE_CLOSE:
122                 /* Node (client or server) has received Close packet. */
123                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
124                 dccp_set_state(sk, DCCP_CLOSED);
125                 break;
126         case DCCP_PASSIVE_CLOSEREQ:
127                 /*
128                  * Client received CloseReq. We set the `active' flag so that
129                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
130                  */
131                 dccp_send_close(sk, 1);
132                 dccp_set_state(sk, DCCP_CLOSING);
133         }
134 }
135
136 void dccp_done(struct sock *sk)
137 {
138         dccp_set_state(sk, DCCP_CLOSED);
139         dccp_clear_xmit_timers(sk);
140
141         sk->sk_shutdown = SHUTDOWN_MASK;
142
143         if (!sock_flag(sk, SOCK_DEAD))
144                 sk->sk_state_change(sk);
145         else
146                 inet_csk_destroy_sock(sk);
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_done);
150
151 const char *dccp_packet_name(const int type)
152 {
153         static const char *const dccp_packet_names[] = {
154                 [DCCP_PKT_REQUEST]  = "REQUEST",
155                 [DCCP_PKT_RESPONSE] = "RESPONSE",
156                 [DCCP_PKT_DATA]     = "DATA",
157                 [DCCP_PKT_ACK]      = "ACK",
158                 [DCCP_PKT_DATAACK]  = "DATAACK",
159                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
160                 [DCCP_PKT_CLOSE]    = "CLOSE",
161                 [DCCP_PKT_RESET]    = "RESET",
162                 [DCCP_PKT_SYNC]     = "SYNC",
163                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
164         };
165
166         if (type >= DCCP_NR_PKT_TYPES)
167                 return "INVALID";
168         else
169                 return dccp_packet_names[type];
170 }
171
172 EXPORT_SYMBOL_GPL(dccp_packet_name);
173
174 void dccp_destruct_common(struct sock *sk)
175 {
176         struct dccp_sock *dp = dccp_sk(sk);
177
178         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
179         dp->dccps_hc_tx_ccid = NULL;
180 }
181 EXPORT_SYMBOL_GPL(dccp_destruct_common);
182
183 static void dccp_sk_destruct(struct sock *sk)
184 {
185         dccp_destruct_common(sk);
186         inet_sock_destruct(sk);
187 }
188
189 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
190 {
191         struct dccp_sock *dp = dccp_sk(sk);
192         struct inet_connection_sock *icsk = inet_csk(sk);
193
194         pr_warn_once("DCCP is deprecated and scheduled to be removed in 2025, "
195                      "please contact the netdev mailing list\n");
196
197         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
198         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
199         sk->sk_state            = DCCP_CLOSED;
200         sk->sk_write_space      = dccp_write_space;
201         sk->sk_destruct         = dccp_sk_destruct;
202         icsk->icsk_sync_mss     = dccp_sync_mss;
203         dp->dccps_mss_cache     = 536;
204         dp->dccps_rate_last     = jiffies;
205         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
206         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
207         dp->dccps_tx_qlen       = sysctl_dccp_tx_qlen;
208
209         dccp_init_xmit_timers(sk);
210
211         INIT_LIST_HEAD(&dp->dccps_featneg);
212         /* control socket doesn't need feat nego */
213         if (likely(ctl_sock_initialized))
214                 return dccp_feat_init(sk);
215         return 0;
216 }
217
218 EXPORT_SYMBOL_GPL(dccp_init_sock);
219
220 void dccp_destroy_sock(struct sock *sk)
221 {
222         struct dccp_sock *dp = dccp_sk(sk);
223
224         __skb_queue_purge(&sk->sk_write_queue);
225         if (sk->sk_send_head != NULL) {
226                 kfree_skb(sk->sk_send_head);
227                 sk->sk_send_head = NULL;
228         }
229
230         /* Clean up a referenced DCCP bind bucket. */
231         if (inet_csk(sk)->icsk_bind_hash != NULL)
232                 inet_put_port(sk);
233
234         kfree(dp->dccps_service_list);
235         dp->dccps_service_list = NULL;
236
237         if (dp->dccps_hc_rx_ackvec != NULL) {
238                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
239                 dp->dccps_hc_rx_ackvec = NULL;
240         }
241         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
242         dp->dccps_hc_rx_ccid = NULL;
243
244         /* clean up feature negotiation state */
245         dccp_feat_list_purge(&dp->dccps_featneg);
246 }
247
248 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
249
250 static inline int dccp_need_reset(int state)
251 {
252         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
253                state != DCCP_REQUESTING;
254 }
255
256 int dccp_disconnect(struct sock *sk, int flags)
257 {
258         struct inet_connection_sock *icsk = inet_csk(sk);
259         struct inet_sock *inet = inet_sk(sk);
260         struct dccp_sock *dp = dccp_sk(sk);
261         const int old_state = sk->sk_state;
262
263         if (old_state != DCCP_CLOSED)
264                 dccp_set_state(sk, DCCP_CLOSED);
265
266         /*
267          * This corresponds to the ABORT function of RFC793, sec. 3.8
268          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
269          */
270         if (old_state == DCCP_LISTEN) {
271                 inet_csk_listen_stop(sk);
272         } else if (dccp_need_reset(old_state)) {
273                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
274                 sk->sk_err = ECONNRESET;
275         } else if (old_state == DCCP_REQUESTING)
276                 sk->sk_err = ECONNRESET;
277
278         dccp_clear_xmit_timers(sk);
279         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
280         dp->dccps_hc_rx_ccid = NULL;
281
282         __skb_queue_purge(&sk->sk_receive_queue);
283         __skb_queue_purge(&sk->sk_write_queue);
284         if (sk->sk_send_head != NULL) {
285                 __kfree_skb(sk->sk_send_head);
286                 sk->sk_send_head = NULL;
287         }
288
289         inet->inet_dport = 0;
290
291         inet_bhash2_reset_saddr(sk);
292
293         sk->sk_shutdown = 0;
294         sock_reset_flag(sk, SOCK_DONE);
295
296         icsk->icsk_backoff = 0;
297         inet_csk_delack_init(sk);
298         __sk_dst_reset(sk);
299
300         WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
301
302         sk_error_report(sk);
303         return 0;
304 }
305
306 EXPORT_SYMBOL_GPL(dccp_disconnect);
307
308 /*
309  *      Wait for a DCCP event.
310  *
311  *      Note that we don't need to lock the socket, as the upper poll layers
312  *      take care of normal races (between the test and the event) and we don't
313  *      go look at any of the socket buffers directly.
314  */
315 __poll_t dccp_poll(struct file *file, struct socket *sock,
316                        poll_table *wait)
317 {
318         __poll_t mask;
319         struct sock *sk = sock->sk;
320
321         sock_poll_wait(file, sock, wait);
322         if (sk->sk_state == DCCP_LISTEN)
323                 return inet_csk_listen_poll(sk);
324
325         /* Socket is not locked. We are protected from async events
326            by poll logic and correct handling of state changes
327            made by another threads is impossible in any case.
328          */
329
330         mask = 0;
331         if (sk->sk_err)
332                 mask = EPOLLERR;
333
334         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
335                 mask |= EPOLLHUP;
336         if (sk->sk_shutdown & RCV_SHUTDOWN)
337                 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
338
339         /* Connected? */
340         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
341                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
342                         mask |= EPOLLIN | EPOLLRDNORM;
343
344                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
345                         if (sk_stream_is_writeable(sk)) {
346                                 mask |= EPOLLOUT | EPOLLWRNORM;
347                         } else {  /* send SIGIO later */
348                                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
349                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
350
351                                 /* Race breaker. If space is freed after
352                                  * wspace test but before the flags are set,
353                                  * IO signal will be lost.
354                                  */
355                                 if (sk_stream_is_writeable(sk))
356                                         mask |= EPOLLOUT | EPOLLWRNORM;
357                         }
358                 }
359         }
360         return mask;
361 }
362
363 EXPORT_SYMBOL_GPL(dccp_poll);
364
365 int dccp_ioctl(struct sock *sk, int cmd, int *karg)
366 {
367         int rc = -ENOTCONN;
368
369         lock_sock(sk);
370
371         if (sk->sk_state == DCCP_LISTEN)
372                 goto out;
373
374         switch (cmd) {
375         case SIOCOUTQ: {
376                 *karg = sk_wmem_alloc_get(sk);
377                 /* Using sk_wmem_alloc here because sk_wmem_queued is not used by DCCP and
378                  * always 0, comparably to UDP.
379                  */
380
381                 rc = 0;
382         }
383                 break;
384         case SIOCINQ: {
385                 struct sk_buff *skb;
386                 *karg = 0;
387
388                 skb = skb_peek(&sk->sk_receive_queue);
389                 if (skb != NULL) {
390                         /*
391                          * We will only return the amount of this packet since
392                          * that is all that will be read.
393                          */
394                         *karg = skb->len;
395                 }
396                 rc = 0;
397         }
398                 break;
399         default:
400                 rc = -ENOIOCTLCMD;
401                 break;
402         }
403 out:
404         release_sock(sk);
405         return rc;
406 }
407
408 EXPORT_SYMBOL_GPL(dccp_ioctl);
409
410 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
411                                    sockptr_t optval, unsigned int optlen)
412 {
413         struct dccp_sock *dp = dccp_sk(sk);
414         struct dccp_service_list *sl = NULL;
415
416         if (service == DCCP_SERVICE_INVALID_VALUE ||
417             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
418                 return -EINVAL;
419
420         if (optlen > sizeof(service)) {
421                 sl = kmalloc(optlen, GFP_KERNEL);
422                 if (sl == NULL)
423                         return -ENOMEM;
424
425                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
426                 if (copy_from_sockptr_offset(sl->dccpsl_list, optval,
427                                 sizeof(service), optlen - sizeof(service)) ||
428                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
429                         kfree(sl);
430                         return -EFAULT;
431                 }
432         }
433
434         lock_sock(sk);
435         dp->dccps_service = service;
436
437         kfree(dp->dccps_service_list);
438
439         dp->dccps_service_list = sl;
440         release_sock(sk);
441         return 0;
442 }
443
444 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
445 {
446         u8 *list, len;
447         int i, rc;
448
449         if (cscov < 0 || cscov > 15)
450                 return -EINVAL;
451         /*
452          * Populate a list of permissible values, in the range cscov...15. This
453          * is necessary since feature negotiation of single values only works if
454          * both sides incidentally choose the same value. Since the list starts
455          * lowest-value first, negotiation will pick the smallest shared value.
456          */
457         if (cscov == 0)
458                 return 0;
459         len = 16 - cscov;
460
461         list = kmalloc(len, GFP_KERNEL);
462         if (list == NULL)
463                 return -ENOBUFS;
464
465         for (i = 0; i < len; i++)
466                 list[i] = cscov++;
467
468         rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
469
470         if (rc == 0) {
471                 if (rx)
472                         dccp_sk(sk)->dccps_pcrlen = cscov;
473                 else
474                         dccp_sk(sk)->dccps_pcslen = cscov;
475         }
476         kfree(list);
477         return rc;
478 }
479
480 static int dccp_setsockopt_ccid(struct sock *sk, int type,
481                                 sockptr_t optval, unsigned int optlen)
482 {
483         u8 *val;
484         int rc = 0;
485
486         if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
487                 return -EINVAL;
488
489         val = memdup_sockptr(optval, optlen);
490         if (IS_ERR(val))
491                 return PTR_ERR(val);
492
493         lock_sock(sk);
494         if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
495                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
496
497         if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
498                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
499         release_sock(sk);
500
501         kfree(val);
502         return rc;
503 }
504
505 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
506                 sockptr_t optval, unsigned int optlen)
507 {
508         struct dccp_sock *dp = dccp_sk(sk);
509         int val, err = 0;
510
511         switch (optname) {
512         case DCCP_SOCKOPT_PACKET_SIZE:
513                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
514                 return 0;
515         case DCCP_SOCKOPT_CHANGE_L:
516         case DCCP_SOCKOPT_CHANGE_R:
517                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
518                 return 0;
519         case DCCP_SOCKOPT_CCID:
520         case DCCP_SOCKOPT_RX_CCID:
521         case DCCP_SOCKOPT_TX_CCID:
522                 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
523         }
524
525         if (optlen < (int)sizeof(int))
526                 return -EINVAL;
527
528         if (copy_from_sockptr(&val, optval, sizeof(int)))
529                 return -EFAULT;
530
531         if (optname == DCCP_SOCKOPT_SERVICE)
532                 return dccp_setsockopt_service(sk, val, optval, optlen);
533
534         lock_sock(sk);
535         switch (optname) {
536         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
537                 if (dp->dccps_role != DCCP_ROLE_SERVER)
538                         err = -EOPNOTSUPP;
539                 else
540                         dp->dccps_server_timewait = (val != 0);
541                 break;
542         case DCCP_SOCKOPT_SEND_CSCOV:
543                 err = dccp_setsockopt_cscov(sk, val, false);
544                 break;
545         case DCCP_SOCKOPT_RECV_CSCOV:
546                 err = dccp_setsockopt_cscov(sk, val, true);
547                 break;
548         case DCCP_SOCKOPT_QPOLICY_ID:
549                 if (sk->sk_state != DCCP_CLOSED)
550                         err = -EISCONN;
551                 else if (val < 0 || val >= DCCPQ_POLICY_MAX)
552                         err = -EINVAL;
553                 else
554                         dp->dccps_qpolicy = val;
555                 break;
556         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
557                 if (val < 0)
558                         err = -EINVAL;
559                 else
560                         dp->dccps_tx_qlen = val;
561                 break;
562         default:
563                 err = -ENOPROTOOPT;
564                 break;
565         }
566         release_sock(sk);
567
568         return err;
569 }
570
571 int dccp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
572                     unsigned int optlen)
573 {
574         if (level != SOL_DCCP)
575                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
576                                                              optname, optval,
577                                                              optlen);
578         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
579 }
580
581 EXPORT_SYMBOL_GPL(dccp_setsockopt);
582
583 static int dccp_getsockopt_service(struct sock *sk, int len,
584                                    __be32 __user *optval,
585                                    int __user *optlen)
586 {
587         const struct dccp_sock *dp = dccp_sk(sk);
588         const struct dccp_service_list *sl;
589         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
590
591         lock_sock(sk);
592         if ((sl = dp->dccps_service_list) != NULL) {
593                 slen = sl->dccpsl_nr * sizeof(u32);
594                 total_len += slen;
595         }
596
597         err = -EINVAL;
598         if (total_len > len)
599                 goto out;
600
601         err = 0;
602         if (put_user(total_len, optlen) ||
603             put_user(dp->dccps_service, optval) ||
604             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
605                 err = -EFAULT;
606 out:
607         release_sock(sk);
608         return err;
609 }
610
611 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
612                     char __user *optval, int __user *optlen)
613 {
614         struct dccp_sock *dp;
615         int val, len;
616
617         if (get_user(len, optlen))
618                 return -EFAULT;
619
620         if (len < (int)sizeof(int))
621                 return -EINVAL;
622
623         dp = dccp_sk(sk);
624
625         switch (optname) {
626         case DCCP_SOCKOPT_PACKET_SIZE:
627                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
628                 return 0;
629         case DCCP_SOCKOPT_SERVICE:
630                 return dccp_getsockopt_service(sk, len,
631                                                (__be32 __user *)optval, optlen);
632         case DCCP_SOCKOPT_GET_CUR_MPS:
633                 val = dp->dccps_mss_cache;
634                 break;
635         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
636                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
637         case DCCP_SOCKOPT_TX_CCID:
638                 val = ccid_get_current_tx_ccid(dp);
639                 if (val < 0)
640                         return -ENOPROTOOPT;
641                 break;
642         case DCCP_SOCKOPT_RX_CCID:
643                 val = ccid_get_current_rx_ccid(dp);
644                 if (val < 0)
645                         return -ENOPROTOOPT;
646                 break;
647         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
648                 val = dp->dccps_server_timewait;
649                 break;
650         case DCCP_SOCKOPT_SEND_CSCOV:
651                 val = dp->dccps_pcslen;
652                 break;
653         case DCCP_SOCKOPT_RECV_CSCOV:
654                 val = dp->dccps_pcrlen;
655                 break;
656         case DCCP_SOCKOPT_QPOLICY_ID:
657                 val = dp->dccps_qpolicy;
658                 break;
659         case DCCP_SOCKOPT_QPOLICY_TXQLEN:
660                 val = dp->dccps_tx_qlen;
661                 break;
662         case 128 ... 191:
663                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
664                                              len, (u32 __user *)optval, optlen);
665         case 192 ... 255:
666                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
667                                              len, (u32 __user *)optval, optlen);
668         default:
669                 return -ENOPROTOOPT;
670         }
671
672         len = sizeof(val);
673         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
674                 return -EFAULT;
675
676         return 0;
677 }
678
679 int dccp_getsockopt(struct sock *sk, int level, int optname,
680                     char __user *optval, int __user *optlen)
681 {
682         if (level != SOL_DCCP)
683                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
684                                                              optname, optval,
685                                                              optlen);
686         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
687 }
688
689 EXPORT_SYMBOL_GPL(dccp_getsockopt);
690
691 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
692 {
693         struct cmsghdr *cmsg;
694
695         /*
696          * Assign an (opaque) qpolicy priority value to skb->priority.
697          *
698          * We are overloading this skb field for use with the qpolicy subystem.
699          * The skb->priority is normally used for the SO_PRIORITY option, which
700          * is initialised from sk_priority. Since the assignment of sk_priority
701          * to skb->priority happens later (on layer 3), we overload this field
702          * for use with queueing priorities as long as the skb is on layer 4.
703          * The default priority value (if nothing is set) is 0.
704          */
705         skb->priority = 0;
706
707         for_each_cmsghdr(cmsg, msg) {
708                 if (!CMSG_OK(msg, cmsg))
709                         return -EINVAL;
710
711                 if (cmsg->cmsg_level != SOL_DCCP)
712                         continue;
713
714                 if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
715                     !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
716                         return -EINVAL;
717
718                 switch (cmsg->cmsg_type) {
719                 case DCCP_SCM_PRIORITY:
720                         if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
721                                 return -EINVAL;
722                         skb->priority = *(__u32 *)CMSG_DATA(cmsg);
723                         break;
724                 default:
725                         return -EINVAL;
726                 }
727         }
728         return 0;
729 }
730
731 int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
732 {
733         const struct dccp_sock *dp = dccp_sk(sk);
734         const int flags = msg->msg_flags;
735         const int noblock = flags & MSG_DONTWAIT;
736         struct sk_buff *skb;
737         int rc, size;
738         long timeo;
739
740         trace_dccp_probe(sk, len);
741
742         if (len > dp->dccps_mss_cache)
743                 return -EMSGSIZE;
744
745         lock_sock(sk);
746
747         timeo = sock_sndtimeo(sk, noblock);
748
749         /*
750          * We have to use sk_stream_wait_connect here to set sk_write_pending,
751          * so that the trick in dccp_rcv_request_sent_state_process.
752          */
753         /* Wait for a connection to finish. */
754         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
755                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
756                         goto out_release;
757
758         size = sk->sk_prot->max_header + len;
759         release_sock(sk);
760         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
761         lock_sock(sk);
762         if (skb == NULL)
763                 goto out_release;
764
765         if (dccp_qpolicy_full(sk)) {
766                 rc = -EAGAIN;
767                 goto out_discard;
768         }
769
770         if (sk->sk_state == DCCP_CLOSED) {
771                 rc = -ENOTCONN;
772                 goto out_discard;
773         }
774
775         skb_reserve(skb, sk->sk_prot->max_header);
776         rc = memcpy_from_msg(skb_put(skb, len), msg, len);
777         if (rc != 0)
778                 goto out_discard;
779
780         rc = dccp_msghdr_parse(msg, skb);
781         if (rc != 0)
782                 goto out_discard;
783
784         dccp_qpolicy_push(sk, skb);
785         /*
786          * The xmit_timer is set if the TX CCID is rate-based and will expire
787          * when congestion control permits to release further packets into the
788          * network. Window-based CCIDs do not use this timer.
789          */
790         if (!timer_pending(&dp->dccps_xmit_timer))
791                 dccp_write_xmit(sk);
792 out_release:
793         release_sock(sk);
794         return rc ? : len;
795 out_discard:
796         kfree_skb(skb);
797         goto out_release;
798 }
799
800 EXPORT_SYMBOL_GPL(dccp_sendmsg);
801
802 int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
803                  int *addr_len)
804 {
805         const struct dccp_hdr *dh;
806         long timeo;
807
808         lock_sock(sk);
809
810         if (sk->sk_state == DCCP_LISTEN) {
811                 len = -ENOTCONN;
812                 goto out;
813         }
814
815         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
816
817         do {
818                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
819
820                 if (skb == NULL)
821                         goto verify_sock_status;
822
823                 dh = dccp_hdr(skb);
824
825                 switch (dh->dccph_type) {
826                 case DCCP_PKT_DATA:
827                 case DCCP_PKT_DATAACK:
828                         goto found_ok_skb;
829
830                 case DCCP_PKT_CLOSE:
831                 case DCCP_PKT_CLOSEREQ:
832                         if (!(flags & MSG_PEEK))
833                                 dccp_finish_passive_close(sk);
834                         fallthrough;
835                 case DCCP_PKT_RESET:
836                         dccp_pr_debug("found fin (%s) ok!\n",
837                                       dccp_packet_name(dh->dccph_type));
838                         len = 0;
839                         goto found_fin_ok;
840                 default:
841                         dccp_pr_debug("packet_type=%s\n",
842                                       dccp_packet_name(dh->dccph_type));
843                         sk_eat_skb(sk, skb);
844                 }
845 verify_sock_status:
846                 if (sock_flag(sk, SOCK_DONE)) {
847                         len = 0;
848                         break;
849                 }
850
851                 if (sk->sk_err) {
852                         len = sock_error(sk);
853                         break;
854                 }
855
856                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
857                         len = 0;
858                         break;
859                 }
860
861                 if (sk->sk_state == DCCP_CLOSED) {
862                         if (!sock_flag(sk, SOCK_DONE)) {
863                                 /* This occurs when user tries to read
864                                  * from never connected socket.
865                                  */
866                                 len = -ENOTCONN;
867                                 break;
868                         }
869                         len = 0;
870                         break;
871                 }
872
873                 if (!timeo) {
874                         len = -EAGAIN;
875                         break;
876                 }
877
878                 if (signal_pending(current)) {
879                         len = sock_intr_errno(timeo);
880                         break;
881                 }
882
883                 sk_wait_data(sk, &timeo, NULL);
884                 continue;
885         found_ok_skb:
886                 if (len > skb->len)
887                         len = skb->len;
888                 else if (len < skb->len)
889                         msg->msg_flags |= MSG_TRUNC;
890
891                 if (skb_copy_datagram_msg(skb, 0, msg, len)) {
892                         /* Exception. Bailout! */
893                         len = -EFAULT;
894                         break;
895                 }
896                 if (flags & MSG_TRUNC)
897                         len = skb->len;
898         found_fin_ok:
899                 if (!(flags & MSG_PEEK))
900                         sk_eat_skb(sk, skb);
901                 break;
902         } while (1);
903 out:
904         release_sock(sk);
905         return len;
906 }
907
908 EXPORT_SYMBOL_GPL(dccp_recvmsg);
909
910 int inet_dccp_listen(struct socket *sock, int backlog)
911 {
912         struct sock *sk = sock->sk;
913         unsigned char old_state;
914         int err;
915
916         lock_sock(sk);
917
918         err = -EINVAL;
919         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
920                 goto out;
921
922         old_state = sk->sk_state;
923         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
924                 goto out;
925
926         WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
927         /* Really, if the socket is already in listen state
928          * we can only allow the backlog to be adjusted.
929          */
930         if (old_state != DCCP_LISTEN) {
931                 struct dccp_sock *dp = dccp_sk(sk);
932
933                 dp->dccps_role = DCCP_ROLE_LISTEN;
934
935                 /* do not start to listen if feature negotiation setup fails */
936                 if (dccp_feat_finalise_settings(dp)) {
937                         err = -EPROTO;
938                         goto out;
939                 }
940
941                 err = inet_csk_listen_start(sk);
942                 if (err)
943                         goto out;
944         }
945         err = 0;
946
947 out:
948         release_sock(sk);
949         return err;
950 }
951
952 EXPORT_SYMBOL_GPL(inet_dccp_listen);
953
954 static void dccp_terminate_connection(struct sock *sk)
955 {
956         u8 next_state = DCCP_CLOSED;
957
958         switch (sk->sk_state) {
959         case DCCP_PASSIVE_CLOSE:
960         case DCCP_PASSIVE_CLOSEREQ:
961                 dccp_finish_passive_close(sk);
962                 break;
963         case DCCP_PARTOPEN:
964                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
965                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
966                 fallthrough;
967         case DCCP_OPEN:
968                 dccp_send_close(sk, 1);
969
970                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
971                     !dccp_sk(sk)->dccps_server_timewait)
972                         next_state = DCCP_ACTIVE_CLOSEREQ;
973                 else
974                         next_state = DCCP_CLOSING;
975                 fallthrough;
976         default:
977                 dccp_set_state(sk, next_state);
978         }
979 }
980
981 void dccp_close(struct sock *sk, long timeout)
982 {
983         struct dccp_sock *dp = dccp_sk(sk);
984         struct sk_buff *skb;
985         u32 data_was_unread = 0;
986         int state;
987
988         lock_sock(sk);
989
990         sk->sk_shutdown = SHUTDOWN_MASK;
991
992         if (sk->sk_state == DCCP_LISTEN) {
993                 dccp_set_state(sk, DCCP_CLOSED);
994
995                 /* Special case. */
996                 inet_csk_listen_stop(sk);
997
998                 goto adjudge_to_death;
999         }
1000
1001         sk_stop_timer(sk, &dp->dccps_xmit_timer);
1002
1003         /*
1004          * We need to flush the recv. buffs.  We do this only on the
1005          * descriptor close, not protocol-sourced closes, because the
1006           *reader process may not have drained the data yet!
1007          */
1008         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1009                 data_was_unread += skb->len;
1010                 __kfree_skb(skb);
1011         }
1012
1013         /* If socket has been already reset kill it. */
1014         if (sk->sk_state == DCCP_CLOSED)
1015                 goto adjudge_to_death;
1016
1017         if (data_was_unread) {
1018                 /* Unread data was tossed, send an appropriate Reset Code */
1019                 DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
1020                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
1021                 dccp_set_state(sk, DCCP_CLOSED);
1022         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1023                 /* Check zero linger _after_ checking for unread data. */
1024                 sk->sk_prot->disconnect(sk, 0);
1025         } else if (sk->sk_state != DCCP_CLOSED) {
1026                 /*
1027                  * Normal connection termination. May need to wait if there are
1028                  * still packets in the TX queue that are delayed by the CCID.
1029                  */
1030                 dccp_flush_write_queue(sk, &timeout);
1031                 dccp_terminate_connection(sk);
1032         }
1033
1034         /*
1035          * Flush write queue. This may be necessary in several cases:
1036          * - we have been closed by the peer but still have application data;
1037          * - abortive termination (unread data or zero linger time),
1038          * - normal termination but queue could not be flushed within time limit
1039          */
1040         __skb_queue_purge(&sk->sk_write_queue);
1041
1042         sk_stream_wait_close(sk, timeout);
1043
1044 adjudge_to_death:
1045         state = sk->sk_state;
1046         sock_hold(sk);
1047         sock_orphan(sk);
1048
1049         /*
1050          * It is the last release_sock in its life. It will remove backlog.
1051          */
1052         release_sock(sk);
1053         /*
1054          * Now socket is owned by kernel and we acquire BH lock
1055          * to finish close. No need to check for user refs.
1056          */
1057         local_bh_disable();
1058         bh_lock_sock(sk);
1059         WARN_ON(sock_owned_by_user(sk));
1060
1061         this_cpu_inc(dccp_orphan_count);
1062
1063         /* Have we already been destroyed by a softirq or backlog? */
1064         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1065                 goto out;
1066
1067         if (sk->sk_state == DCCP_CLOSED)
1068                 inet_csk_destroy_sock(sk);
1069
1070         /* Otherwise, socket is reprieved until protocol close. */
1071
1072 out:
1073         bh_unlock_sock(sk);
1074         local_bh_enable();
1075         sock_put(sk);
1076 }
1077
1078 EXPORT_SYMBOL_GPL(dccp_close);
1079
1080 void dccp_shutdown(struct sock *sk, int how)
1081 {
1082         dccp_pr_debug("called shutdown(%x)\n", how);
1083 }
1084
1085 EXPORT_SYMBOL_GPL(dccp_shutdown);
1086
1087 static inline int __init dccp_mib_init(void)
1088 {
1089         dccp_statistics = alloc_percpu(struct dccp_mib);
1090         if (!dccp_statistics)
1091                 return -ENOMEM;
1092         return 0;
1093 }
1094
1095 static inline void dccp_mib_exit(void)
1096 {
1097         free_percpu(dccp_statistics);
1098 }
1099
1100 static int thash_entries;
1101 module_param(thash_entries, int, 0444);
1102 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1103
1104 #ifdef CONFIG_IP_DCCP_DEBUG
1105 bool dccp_debug;
1106 module_param(dccp_debug, bool, 0644);
1107 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1108
1109 EXPORT_SYMBOL_GPL(dccp_debug);
1110 #endif
1111
1112 static int __init dccp_init(void)
1113 {
1114         unsigned long goal;
1115         unsigned long nr_pages = totalram_pages();
1116         int ehash_order, bhash_order, i;
1117         int rc;
1118
1119         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1120                      sizeof_field(struct sk_buff, cb));
1121         rc = inet_hashinfo2_init_mod(&dccp_hashinfo);
1122         if (rc)
1123                 goto out_fail;
1124         rc = -ENOBUFS;
1125         dccp_hashinfo.bind_bucket_cachep =
1126                 kmem_cache_create("dccp_bind_bucket",
1127                                   sizeof(struct inet_bind_bucket), 0,
1128                                   SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
1129         if (!dccp_hashinfo.bind_bucket_cachep)
1130                 goto out_free_hashinfo2;
1131         dccp_hashinfo.bind2_bucket_cachep =
1132                 kmem_cache_create("dccp_bind2_bucket",
1133                                   sizeof(struct inet_bind2_bucket), 0,
1134                                   SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
1135         if (!dccp_hashinfo.bind2_bucket_cachep)
1136                 goto out_free_bind_bucket_cachep;
1137
1138         /*
1139          * Size and allocate the main established and bind bucket
1140          * hash tables.
1141          *
1142          * The methodology is similar to that of the buffer cache.
1143          */
1144         if (nr_pages >= (128 * 1024))
1145                 goal = nr_pages >> (21 - PAGE_SHIFT);
1146         else
1147                 goal = nr_pages >> (23 - PAGE_SHIFT);
1148
1149         if (thash_entries)
1150                 goal = (thash_entries *
1151                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1152         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1153                 ;
1154         do {
1155                 unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
1156                                         sizeof(struct inet_ehash_bucket);
1157
1158                 while (hash_size & (hash_size - 1))
1159                         hash_size--;
1160                 dccp_hashinfo.ehash_mask = hash_size - 1;
1161                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1162                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
1163         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1164
1165         if (!dccp_hashinfo.ehash) {
1166                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1167                 goto out_free_bind2_bucket_cachep;
1168         }
1169
1170         for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
1171                 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1172
1173         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1174                         goto out_free_dccp_ehash;
1175
1176         bhash_order = ehash_order;
1177
1178         do {
1179                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1180                                         sizeof(struct inet_bind_hashbucket);
1181                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1182                     bhash_order > 0)
1183                         continue;
1184                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1185                         __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
1186         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1187
1188         if (!dccp_hashinfo.bhash) {
1189                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1190                 goto out_free_dccp_locks;
1191         }
1192
1193         dccp_hashinfo.bhash2 = (struct inet_bind_hashbucket *)
1194                 __get_free_pages(GFP_ATOMIC | __GFP_NOWARN, bhash_order);
1195
1196         if (!dccp_hashinfo.bhash2) {
1197                 DCCP_CRIT("Failed to allocate DCCP bind2 hash table");
1198                 goto out_free_dccp_bhash;
1199         }
1200
1201         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1202                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1203                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1204                 spin_lock_init(&dccp_hashinfo.bhash2[i].lock);
1205                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash2[i].chain);
1206         }
1207
1208         dccp_hashinfo.pernet = false;
1209
1210         rc = dccp_mib_init();
1211         if (rc)
1212                 goto out_free_dccp_bhash2;
1213
1214         rc = dccp_ackvec_init();
1215         if (rc)
1216                 goto out_free_dccp_mib;
1217
1218         rc = dccp_sysctl_init();
1219         if (rc)
1220                 goto out_ackvec_exit;
1221
1222         rc = ccid_initialize_builtins();
1223         if (rc)
1224                 goto out_sysctl_exit;
1225
1226         dccp_timestamping_init();
1227
1228         return 0;
1229
1230 out_sysctl_exit:
1231         dccp_sysctl_exit();
1232 out_ackvec_exit:
1233         dccp_ackvec_exit();
1234 out_free_dccp_mib:
1235         dccp_mib_exit();
1236 out_free_dccp_bhash2:
1237         free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
1238 out_free_dccp_bhash:
1239         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1240 out_free_dccp_locks:
1241         inet_ehash_locks_free(&dccp_hashinfo);
1242 out_free_dccp_ehash:
1243         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1244 out_free_bind2_bucket_cachep:
1245         kmem_cache_destroy(dccp_hashinfo.bind2_bucket_cachep);
1246 out_free_bind_bucket_cachep:
1247         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1248 out_free_hashinfo2:
1249         inet_hashinfo2_free_mod(&dccp_hashinfo);
1250 out_fail:
1251         dccp_hashinfo.bhash = NULL;
1252         dccp_hashinfo.bhash2 = NULL;
1253         dccp_hashinfo.ehash = NULL;
1254         dccp_hashinfo.bind_bucket_cachep = NULL;
1255         dccp_hashinfo.bind2_bucket_cachep = NULL;
1256         return rc;
1257 }
1258
1259 static void __exit dccp_fini(void)
1260 {
1261         int bhash_order = get_order(dccp_hashinfo.bhash_size *
1262                                     sizeof(struct inet_bind_hashbucket));
1263
1264         ccid_cleanup_builtins();
1265         dccp_mib_exit();
1266         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1267         free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
1268         free_pages((unsigned long)dccp_hashinfo.ehash,
1269                    get_order((dccp_hashinfo.ehash_mask + 1) *
1270                              sizeof(struct inet_ehash_bucket)));
1271         inet_ehash_locks_free(&dccp_hashinfo);
1272         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1273         dccp_ackvec_exit();
1274         dccp_sysctl_exit();
1275         inet_hashinfo2_free_mod(&dccp_hashinfo);
1276 }
1277
1278 module_init(dccp_init);
1279 module_exit(dccp_fini);
1280
1281 MODULE_LICENSE("GPL");
1282 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1283 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");